ml-sfa 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml_sfa-0.1.0/.env.example +10 -0
- ml_sfa-0.1.0/.githooks/pre-commit +18 -0
- ml_sfa-0.1.0/.github/workflows/auto-release.yml +55 -0
- ml_sfa-0.1.0/.github/workflows/ci.yml +51 -0
- ml_sfa-0.1.0/.github/workflows/release.yml +52 -0
- ml_sfa-0.1.0/.gitignore +66 -0
- ml_sfa-0.1.0/BLUEPRINT.md +361 -0
- ml_sfa-0.1.0/CHANGELOG.md +45 -0
- ml_sfa-0.1.0/Makefile +32 -0
- ml_sfa-0.1.0/PKG-INFO +52 -0
- ml_sfa-0.1.0/README.md +26 -0
- ml_sfa-0.1.0/docs/design/bart_sfm.md +394 -0
- ml_sfa-0.1.0/docs/design/joint_nn_sfa.md +404 -0
- ml_sfa-0.1.0/docs/research/ml_sfa_survey.md +421 -0
- ml_sfa-0.1.0/docs/research/model_selection.md +490 -0
- ml_sfa-0.1.0/notebooks/model_showcase.ipynb +655 -0
- ml_sfa-0.1.0/pyproject.toml +91 -0
- ml_sfa-0.1.0/scripts/release.py +173 -0
- ml_sfa-0.1.0/src/ml_sfa/__init__.py +5 -0
- ml_sfa-0.1.0/src/ml_sfa/_types.py +17 -0
- ml_sfa-0.1.0/src/ml_sfa/_version.py +34 -0
- ml_sfa-0.1.0/src/ml_sfa/data/__init__.py +5 -0
- ml_sfa-0.1.0/src/ml_sfa/data/simulator.py +330 -0
- ml_sfa-0.1.0/src/ml_sfa/evaluation/__init__.py +29 -0
- ml_sfa-0.1.0/src/ml_sfa/evaluation/comparison.py +208 -0
- ml_sfa-0.1.0/src/ml_sfa/evaluation/metrics.py +190 -0
- ml_sfa-0.1.0/src/ml_sfa/models/__init__.py +28 -0
- ml_sfa-0.1.0/src/ml_sfa/models/_sfa_loss.py +165 -0
- ml_sfa-0.1.0/src/ml_sfa/models/_types.py +5 -0
- ml_sfa-0.1.0/src/ml_sfa/models/bart_frontier.py +435 -0
- ml_sfa-0.1.0/src/ml_sfa/models/base.py +247 -0
- ml_sfa-0.1.0/src/ml_sfa/models/kernel_frontier.py +297 -0
- ml_sfa-0.1.0/src/ml_sfa/models/nn_frontier.py +500 -0
- ml_sfa-0.1.0/src/ml_sfa/models/parametric.py +637 -0
- ml_sfa-0.1.0/src/ml_sfa/py.typed +0 -0
- ml_sfa-0.1.0/src/ml_sfa/utils/__init__.py +26 -0
- ml_sfa-0.1.0/src/ml_sfa/utils/constraints.py +106 -0
- ml_sfa-0.1.0/src/ml_sfa/utils/distributions.py +401 -0
- ml_sfa-0.1.0/tests/__init__.py +0 -0
- ml_sfa-0.1.0/tests/conftest.py +10 -0
- ml_sfa-0.1.0/tests/integration/__init__.py +0 -0
- ml_sfa-0.1.0/tests/integration/test_bart_frontier_pipeline.py +140 -0
- ml_sfa-0.1.0/tests/integration/test_model_comparison.py +96 -0
- ml_sfa-0.1.0/tests/integration/test_nn_frontier_pipeline.py +235 -0
- ml_sfa-0.1.0/tests/integration/test_parametric_pipeline.py +194 -0
- ml_sfa-0.1.0/tests/unit/__init__.py +0 -0
- ml_sfa-0.1.0/tests/unit/test_bart_frontier.py +352 -0
- ml_sfa-0.1.0/tests/unit/test_base.py +178 -0
- ml_sfa-0.1.0/tests/unit/test_comparison.py +168 -0
- ml_sfa-0.1.0/tests/unit/test_constraints.py +134 -0
- ml_sfa-0.1.0/tests/unit/test_distributions.py +332 -0
- ml_sfa-0.1.0/tests/unit/test_kernel_frontier.py +358 -0
- ml_sfa-0.1.0/tests/unit/test_metrics.py +194 -0
- ml_sfa-0.1.0/tests/unit/test_nn_frontier.py +356 -0
- ml_sfa-0.1.0/tests/unit/test_package.py +20 -0
- ml_sfa-0.1.0/tests/unit/test_parametric.py +371 -0
- ml_sfa-0.1.0/tests/unit/test_sfa_loss.py +254 -0
- ml_sfa-0.1.0/tests/unit/test_simulator.py +185 -0
- ml_sfa-0.1.0/uv.lock +2613 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Pre-commit hook: runs ruff check + format on staged .py files
|
|
3
|
+
# Activate: git config core.hooksPath .githooks
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
|
|
7
|
+
STAGED=$(git diff --cached --name-only --diff-filter=ACM | grep '\.py$' || true)
|
|
8
|
+
|
|
9
|
+
if [ -z "$STAGED" ]; then
|
|
10
|
+
exit 0
|
|
11
|
+
fi
|
|
12
|
+
|
|
13
|
+
echo "Running ruff check on staged files..."
|
|
14
|
+
uv run ruff check $STAGED
|
|
15
|
+
echo "Running ruff format check on staged files..."
|
|
16
|
+
uv run ruff format --check $STAGED
|
|
17
|
+
|
|
18
|
+
echo "Pre-commit checks passed."
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
name: Auto Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
types: [closed]
|
|
6
|
+
branches: [main]
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
tag-and-release:
|
|
10
|
+
if: github.event.pull_request.merged == true && startsWith(github.event.pull_request.title, 'release:')
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
permissions:
|
|
13
|
+
contents: write
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
with:
|
|
17
|
+
fetch-depth: 0
|
|
18
|
+
|
|
19
|
+
- name: Extract version from PR title
|
|
20
|
+
id: version
|
|
21
|
+
run: |
|
|
22
|
+
TITLE="${{ github.event.pull_request.title }}"
|
|
23
|
+
VERSION=$(echo "$TITLE" | grep -oP 'v\d+\.\d+\.\d+')
|
|
24
|
+
echo "tag=$VERSION" >> "$GITHUB_OUTPUT"
|
|
25
|
+
echo "Extracted version: $VERSION"
|
|
26
|
+
|
|
27
|
+
- name: Validate tag does not exist
|
|
28
|
+
run: |
|
|
29
|
+
if git rev-parse "${{ steps.version.outputs.tag }}" >/dev/null 2>&1; then
|
|
30
|
+
echo "::error::Tag ${{ steps.version.outputs.tag }} already exists"
|
|
31
|
+
exit 1
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
- name: Extract release notes from CHANGELOG
|
|
35
|
+
id: notes
|
|
36
|
+
run: |
|
|
37
|
+
TAG="${{ steps.version.outputs.tag }}"
|
|
38
|
+
VER="${TAG#v}"
|
|
39
|
+
NOTES=$(awk "/^## \[${VER}\]/{flag=1; next} /^## \[/{flag=0} flag" CHANGELOG.md)
|
|
40
|
+
echo "notes<<EOFNOTES" >> "$GITHUB_OUTPUT"
|
|
41
|
+
echo "$NOTES" >> "$GITHUB_OUTPUT"
|
|
42
|
+
echo "EOFNOTES" >> "$GITHUB_OUTPUT"
|
|
43
|
+
|
|
44
|
+
- name: Create tag
|
|
45
|
+
run: |
|
|
46
|
+
git tag "${{ steps.version.outputs.tag }}"
|
|
47
|
+
git push origin "${{ steps.version.outputs.tag }}"
|
|
48
|
+
|
|
49
|
+
- name: Create GitHub Release
|
|
50
|
+
env:
|
|
51
|
+
GH_TOKEN: ${{ github.token }}
|
|
52
|
+
run: |
|
|
53
|
+
gh release create "${{ steps.version.outputs.tag }}" \
|
|
54
|
+
--title "${{ steps.version.outputs.tag }}" \
|
|
55
|
+
--notes "${{ steps.notes.outputs.notes }}"
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
branches: [main, develop]
|
|
6
|
+
push:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
quality:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.11", "3.12"]
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
with:
|
|
18
|
+
fetch-depth: 0
|
|
19
|
+
- uses: astral-sh/setup-uv@v5
|
|
20
|
+
with:
|
|
21
|
+
version: "latest"
|
|
22
|
+
- run: uv python install ${{ matrix.python-version }}
|
|
23
|
+
- run: uv sync --frozen --dev
|
|
24
|
+
- run: uv run ruff check .
|
|
25
|
+
- run: uv run ruff format --check .
|
|
26
|
+
- run: uv run mypy src/ml_sfa/
|
|
27
|
+
- name: Run tests
|
|
28
|
+
run: |
|
|
29
|
+
if [ "${{ github.base_ref }}" = "main" ] || [ "${{ github.ref }}" = "refs/heads/main" ]; then
|
|
30
|
+
uv run pytest -m "" --cov=ml_sfa --cov-fail-under=80 -q
|
|
31
|
+
else
|
|
32
|
+
uv run pytest --cov=ml_sfa --cov-fail-under=80 -q
|
|
33
|
+
fi
|
|
34
|
+
|
|
35
|
+
distribution:
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
steps:
|
|
38
|
+
- uses: actions/checkout@v4
|
|
39
|
+
with:
|
|
40
|
+
fetch-depth: 0
|
|
41
|
+
- uses: astral-sh/setup-uv@v5
|
|
42
|
+
with:
|
|
43
|
+
version: "latest"
|
|
44
|
+
- run: uv python install 3.11
|
|
45
|
+
- run: uv sync --frozen --dev
|
|
46
|
+
- run: uv build
|
|
47
|
+
- run: uv run twine check dist/*
|
|
48
|
+
- name: Smoke test
|
|
49
|
+
run: |
|
|
50
|
+
pip install dist/*.whl
|
|
51
|
+
python -c "from ml_sfa import __version__; print('import ok:', __version__)"
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags: ["v*"]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
build:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
steps:
|
|
11
|
+
- uses: actions/checkout@v4
|
|
12
|
+
with:
|
|
13
|
+
fetch-depth: 0
|
|
14
|
+
- uses: astral-sh/setup-uv@v5
|
|
15
|
+
with:
|
|
16
|
+
version: "latest"
|
|
17
|
+
- run: uv python install 3.11
|
|
18
|
+
- run: uv sync --frozen --dev
|
|
19
|
+
- run: uv build
|
|
20
|
+
- run: uv run twine check dist/*
|
|
21
|
+
- uses: actions/upload-artifact@v4
|
|
22
|
+
with:
|
|
23
|
+
name: dist
|
|
24
|
+
path: dist/
|
|
25
|
+
|
|
26
|
+
publish-testpypi:
|
|
27
|
+
needs: build
|
|
28
|
+
runs-on: ubuntu-latest
|
|
29
|
+
environment: testpypi
|
|
30
|
+
permissions:
|
|
31
|
+
id-token: write
|
|
32
|
+
steps:
|
|
33
|
+
- uses: actions/download-artifact@v4
|
|
34
|
+
with:
|
|
35
|
+
name: dist
|
|
36
|
+
path: dist/
|
|
37
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
38
|
+
with:
|
|
39
|
+
repository-url: https://test.pypi.org/legacy/
|
|
40
|
+
|
|
41
|
+
publish-pypi:
|
|
42
|
+
needs: publish-testpypi
|
|
43
|
+
runs-on: ubuntu-latest
|
|
44
|
+
environment: pypi
|
|
45
|
+
permissions:
|
|
46
|
+
id-token: write
|
|
47
|
+
steps:
|
|
48
|
+
- uses: actions/download-artifact@v4
|
|
49
|
+
with:
|
|
50
|
+
name: dist
|
|
51
|
+
path: dist/
|
|
52
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
ml_sfa-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
*.egg
|
|
9
|
+
|
|
10
|
+
# Auto-generated by hatch-vcs (never edit manually)
|
|
11
|
+
src/ml_sfa/_version.py
|
|
12
|
+
|
|
13
|
+
# Virtual environments
|
|
14
|
+
.venv/
|
|
15
|
+
venv/
|
|
16
|
+
env/
|
|
17
|
+
|
|
18
|
+
# IDE
|
|
19
|
+
.vscode/
|
|
20
|
+
.idea/
|
|
21
|
+
*.swp
|
|
22
|
+
*.swo
|
|
23
|
+
|
|
24
|
+
# Jupyter
|
|
25
|
+
.ipynb_checkpoints/
|
|
26
|
+
|
|
27
|
+
# Testing
|
|
28
|
+
.coverage
|
|
29
|
+
htmlcov/
|
|
30
|
+
.pytest_cache/
|
|
31
|
+
|
|
32
|
+
# mypy
|
|
33
|
+
.mypy_cache/
|
|
34
|
+
|
|
35
|
+
# ruff
|
|
36
|
+
.ruff_cache/
|
|
37
|
+
|
|
38
|
+
# Environment
|
|
39
|
+
.env
|
|
40
|
+
|
|
41
|
+
# OS
|
|
42
|
+
.DS_Store
|
|
43
|
+
Thumbs.db
|
|
44
|
+
|
|
45
|
+
# Data (large files)
|
|
46
|
+
data/raw/
|
|
47
|
+
data/processed/
|
|
48
|
+
*.csv
|
|
49
|
+
*.parquet
|
|
50
|
+
!configs/*.csv
|
|
51
|
+
|
|
52
|
+
# Models
|
|
53
|
+
*.pt
|
|
54
|
+
*.pth
|
|
55
|
+
*.pkl
|
|
56
|
+
|
|
57
|
+
# AI
|
|
58
|
+
.ai_settings/
|
|
59
|
+
.agent/
|
|
60
|
+
.agents/
|
|
61
|
+
.claude/
|
|
62
|
+
.codex/
|
|
63
|
+
.gemini/
|
|
64
|
+
AGENTS.md
|
|
65
|
+
CLAUDE.md
|
|
66
|
+
GEMINI.md
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
# ML-SFA 設計図 (Blueprint)
|
|
2
|
+
|
|
3
|
+
## 1. プロジェクト概要
|
|
4
|
+
|
|
5
|
+
### 目的
|
|
6
|
+
|
|
7
|
+
ML手法を用いた確率的フロンティア分析 (Stochastic Frontier Analysis) のPythonライブラリを構築する。
|
|
8
|
+
従来のパラメトリックSFAの制約(関数形依存・分布仮定への感度)をML技術で緩和し、
|
|
9
|
+
scikit-learn互換APIで提供する。
|
|
10
|
+
|
|
11
|
+
### 背景: 従来のSFA
|
|
12
|
+
|
|
13
|
+
SFAは Aigner, Lovell, Schmidt (1977) および Meeusen, van den Broeck (1977) が提案した手法で、
|
|
14
|
+
生産フロンティアからの乖離を **ノイズ (v)** と **非効率性 (u)** に分解する:
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
y_i = f(x_i; β) + v_i - u_i
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
- `f(x_i; β)`: 決定論的フロンティア(Cobb-Douglas, translog等)
|
|
21
|
+
- `v_i ~ N(0, σ_v²)`: 対称ノイズ(測定誤差、確率的ショック)
|
|
22
|
+
- `u_i ≥ 0`: 片側非効率性(half-normal, truncated normal, exponential等)
|
|
23
|
+
- 技術的効率性: `TE_i = exp(-u_i)`
|
|
24
|
+
|
|
25
|
+
**従来手法の限界:**
|
|
26
|
+
1. フロンティア関数形への依存(Cobb-Douglas vs translog で結果が変わる)
|
|
27
|
+
2. 非効率性の分布仮定への感度
|
|
28
|
+
3. 複雑な非線形投入・産出関係を捉えられない
|
|
29
|
+
4. v と u の分離が分布仮定に完全依存(脆弱な識別)
|
|
30
|
+
|
|
31
|
+
### 既存実装の空白
|
|
32
|
+
|
|
33
|
+
| パッケージ | 特徴 | 欠落 |
|
|
34
|
+
|-----------|------|------|
|
|
35
|
+
| pySFA | 基本的MLE (half-normal のみ) | ML拡張なし、パネルなし |
|
|
36
|
+
| FronPy | 6分布サポート | 線形フロンティアのみ |
|
|
37
|
+
| SFMA | B-spline + shape制約 | メタ分析特化 |
|
|
38
|
+
| pyStoNED | CNLS + SFA分解 | ML手法なし |
|
|
39
|
+
|
|
40
|
+
**→ ML-SFA が埋めるギャップ:**
|
|
41
|
+
- scikit-learn互換APIのSFAライブラリ
|
|
42
|
+
- NN / GP / BART 等によるノンパラメトリックフロンティア
|
|
43
|
+
- 経済学的shape制約(単調性・凹性)
|
|
44
|
+
- パネルデータ対応
|
|
45
|
+
- 不確実性定量化
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## 2. アーキテクチャ
|
|
50
|
+
|
|
51
|
+
### 2.1 全体構成
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
ml_sfa/
|
|
55
|
+
├── models/
|
|
56
|
+
│ ├── base.py # 基底クラス (BaseSFAEstimator)
|
|
57
|
+
│ ├── parametric.py # 伝統的パラメトリックSFA
|
|
58
|
+
│ ├── nn_frontier.py # ニューラルネットワークSFA
|
|
59
|
+
│ ├── gp_frontier.py # ガウス過程SFA
|
|
60
|
+
│ └── kernel_frontier.py # カーネルSFA
|
|
61
|
+
├── data/
|
|
62
|
+
│ ├── loader.py # データ読み込み・検証
|
|
63
|
+
│ ├── preprocessor.py # 前処理パイプライン
|
|
64
|
+
│ └── simulator.py # シミュレーションデータ生成
|
|
65
|
+
├── evaluation/
|
|
66
|
+
│ ├── metrics.py # 効率性・モデル評価指標
|
|
67
|
+
│ └── comparison.py # モデル比較フレームワーク
|
|
68
|
+
└── utils/
|
|
69
|
+
├── distributions.py # 非効率性分布 (half-normal, truncated normal, etc.)
|
|
70
|
+
└── constraints.py # Shape制約 (単調性, 凹性)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### 2.2 クラス階層
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
sklearn.base.BaseEstimator
|
|
77
|
+
└── BaseSFAEstimator (ABC)
|
|
78
|
+
├── fit(X, y) → self
|
|
79
|
+
├── predict(X) → y_hat (フロンティア予測)
|
|
80
|
+
├── efficiency(X, y) → TE (技術的効率性)
|
|
81
|
+
├── get_inefficiency(X, y) → u_hat
|
|
82
|
+
├── get_noise(X, y) → v_hat
|
|
83
|
+
├── log_likelihood() → float
|
|
84
|
+
├── summary() → SFASummary
|
|
85
|
+
│
|
|
86
|
+
├── ParametricSFA
|
|
87
|
+
│ ├── frontier_type: "cobb-douglas" | "translog"
|
|
88
|
+
│ └── inefficiency_dist: "half-normal" | "truncated-normal" | "exponential"
|
|
89
|
+
│
|
|
90
|
+
├── NNFrontierSFA
|
|
91
|
+
│ ├── フロンティア: MLP / RBF ネットワーク
|
|
92
|
+
│ └── 誤差分解: MLE (2段階) or 同時推定
|
|
93
|
+
│
|
|
94
|
+
├── GPFrontierSFA
|
|
95
|
+
│ ├── フロンティア: ガウス過程回帰 (GPyTorch)
|
|
96
|
+
│ └── 単調性: virtual derivative observations
|
|
97
|
+
│
|
|
98
|
+
└── KernelSFA
|
|
99
|
+
├── フロンティア: Nadaraya-Watson / 局所多項式
|
|
100
|
+
└── 誤差分解: 局所MLE or 2段階
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### 2.3 データフロー
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
入力データ (X, y)
|
|
107
|
+
│
|
|
108
|
+
▼
|
|
109
|
+
[前処理] → 標準化, 対数変換, 欠損値処理
|
|
110
|
+
│
|
|
111
|
+
▼
|
|
112
|
+
[フロンティア推定] → f̂(X) をML手法で推定
|
|
113
|
+
│ (shape制約: 単調増加, 凹性)
|
|
114
|
+
▼
|
|
115
|
+
[残差計算] → ε̂_i = y_i - f̂(x_i)
|
|
116
|
+
│
|
|
117
|
+
▼
|
|
118
|
+
[誤差分解] → ε̂ = v̂ - û
|
|
119
|
+
│ (分布仮定 or ノンパラメトリック)
|
|
120
|
+
▼
|
|
121
|
+
[効率性推定] → TE_i = exp(-û_i)
|
|
122
|
+
│ E[u_i | ε_i] (Jondrow et al. 1982)
|
|
123
|
+
▼
|
|
124
|
+
出力: TE, û, v̂, f̂(X), モデル診断
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## 3. モデル設計
|
|
130
|
+
|
|
131
|
+
### Phase 1: 基盤 (ParametricSFA)
|
|
132
|
+
|
|
133
|
+
従来のパラメトリックSFAをscikit-learn APIで実装する。
|
|
134
|
+
これが全MLモデルのベンチマーク基準となる。
|
|
135
|
+
|
|
136
|
+
**仕様:**
|
|
137
|
+
- フロンティア関数: Cobb-Douglas, Translog
|
|
138
|
+
- 非効率性分布: Half-normal, Truncated normal, Exponential
|
|
139
|
+
- 推定: MLE (scipy.optimize + 解析的勾配)
|
|
140
|
+
- 効率性推定: Jondrow et al. (1982) 条件付き期待値
|
|
141
|
+
- パネルデータ: 固定効果, ランダム効果 (将来)
|
|
142
|
+
|
|
143
|
+
**核心となる対数尤度 (half-normal ケース):**
|
|
144
|
+
```
|
|
145
|
+
ln L = const - N ln σ + Σ[ ln Φ(-ε_i λ/σ) - ε_i²/(2σ²) ]
|
|
146
|
+
|
|
147
|
+
σ² = σ_v² + σ_u², λ = σ_u / σ_v
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Phase 2: ニューラルネットワークSFA (NNFrontierSFA)
|
|
151
|
+
|
|
152
|
+
**アプローチ A: 2段階NN-SFA** (Kutlu 2024 に基づく)
|
|
153
|
+
1. Stage 1: MLPでフロンティア `f̂(x)` を推定(単調性制約付き)
|
|
154
|
+
2. Stage 2: 残差に対してパラメトリック誤差分解
|
|
155
|
+
|
|
156
|
+
**アプローチ B: 同時推定** (Tsionas et al. 2023 に基づく)
|
|
157
|
+
- フロンティアNNと誤差分解を同時最適化
|
|
158
|
+
- カスタム損失関数にSFA対数尤度を埋め込む
|
|
159
|
+
- PyTorch autograd で勾配計算
|
|
160
|
+
|
|
161
|
+
**アプローチ C: 構造化NN** (xNN-SF, Zhao 2024 に基づく)
|
|
162
|
+
- SFA分解構造をネットワークアーキテクチャに反映
|
|
163
|
+
- フロンティア・非効率性・ノイズの3サブネット
|
|
164
|
+
- 単調性制約をアーキテクチャレベルで強制
|
|
165
|
+
|
|
166
|
+
**単調性制約の実装方法:**
|
|
167
|
+
- 重み非負制約 + 活性化関数の制約
|
|
168
|
+
- ペナルティ項: `λ * max(0, -∂f/∂x_j)²`
|
|
169
|
+
- Input convex neural networks (ICNN) の応用
|
|
170
|
+
|
|
171
|
+
### Phase 3: ガウス過程SFA (GPFrontierSFA)
|
|
172
|
+
|
|
173
|
+
**アプローチ:** GPyTorch を活用した確率的フロンティア推定
|
|
174
|
+
- カーネル: Matérn 5/2 or RBF(フロンティアの滑らかさに応じて選択)
|
|
175
|
+
- 単調性: 仮想微分観測 (virtual derivative observations)
|
|
176
|
+
- 利点: 自然な不確実性定量化、小〜中規模データに最適
|
|
177
|
+
- 誤差分解: カスタム尤度関数(GPyTorchのカスタムLikelihood)
|
|
178
|
+
|
|
179
|
+
### Phase 4: カーネルSFA (KernelSFA)
|
|
180
|
+
|
|
181
|
+
**アプローチ:** Fan, Li, Weersink (1996) / Kumbhakar et al. (2007)
|
|
182
|
+
- Nadaraya-Watson推定量 or 局所多項式回帰
|
|
183
|
+
- 局所最尤推定: 各点でカーネル重み付きSFA対数尤度を最大化
|
|
184
|
+
- バンド幅選択: leave-one-out クロスバリデーション
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## 4. 共通コンポーネント
|
|
189
|
+
|
|
190
|
+
### 4.1 非効率性分布 (`distributions.py`)
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
class InefficiencyDistribution(Protocol):
|
|
194
|
+
def log_pdf(self, u: NDArray) -> NDArray: ...
|
|
195
|
+
def cdf(self, u: NDArray) -> NDArray: ...
|
|
196
|
+
def conditional_mean(self, epsilon: NDArray, sigma_v: float, sigma_u: float) -> NDArray: ...
|
|
197
|
+
def conditional_mode(self, epsilon: NDArray, sigma_v: float, sigma_u: float) -> NDArray: ...
|
|
198
|
+
|
|
199
|
+
# 実装: HalfNormal, TruncatedNormal, Exponential
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### 4.2 Shape制約 (`constraints.py`)
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
class MonotonicityConstraint:
|
|
206
|
+
"""∂f/∂x_j ≥ 0 を強制(生産関数の単調増加性)"""
|
|
207
|
+
|
|
208
|
+
class ConcavityConstraint:
|
|
209
|
+
"""∂²f/∂x_j² ≤ 0 を強制(限界生産力逓減)"""
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
### 4.3 シミュレーションデータ (`simulator.py`)
|
|
213
|
+
|
|
214
|
+
モデル検証用のDGP(データ生成過程):
|
|
215
|
+
- Cobb-Douglas DGP(ベースライン)
|
|
216
|
+
- Translog DGP
|
|
217
|
+
- 非線形フロンティアDGP(MLの優位性を示す)
|
|
218
|
+
- パネルデータDGP
|
|
219
|
+
|
|
220
|
+
```python
|
|
221
|
+
def simulate_sfa(
|
|
222
|
+
n_obs: int,
|
|
223
|
+
n_inputs: int,
|
|
224
|
+
frontier_type: str, # "cobb-douglas" | "translog" | "nonlinear"
|
|
225
|
+
inefficiency_dist: str, # "half-normal" | "truncated-normal"
|
|
226
|
+
sigma_v: float,
|
|
227
|
+
sigma_u: float,
|
|
228
|
+
seed: int,
|
|
229
|
+
) -> SFADataset:
|
|
230
|
+
"""シミュレーションデータを生成"""
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### 4.4 評価指標 (`metrics.py`)
|
|
234
|
+
|
|
235
|
+
| 指標 | 説明 |
|
|
236
|
+
|------|------|
|
|
237
|
+
| RMSE(TE) | 真のTE vs 推定TEのRMSE |
|
|
238
|
+
| Rank correlation | 効率性ランキングのSpearman相関 |
|
|
239
|
+
| Log-likelihood | 対数尤度 |
|
|
240
|
+
| AIC / BIC | モデル選択基準 |
|
|
241
|
+
| Coverage | 信頼区間のカバー率 |
|
|
242
|
+
| Frontier MSE | フロンティア関数の推定精度 |
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## 5. 実装ロードマップ
|
|
247
|
+
|
|
248
|
+
### Phase 1: ParametricSFA + 基盤 (MVP)
|
|
249
|
+
- [ ] `BaseSFAEstimator` 基底クラス
|
|
250
|
+
- [ ] `InefficiencyDistribution` (half-normal, truncated-normal, exponential)
|
|
251
|
+
- [ ] `ParametricSFA` (Cobb-Douglas + half-normal)
|
|
252
|
+
- [ ] `simulator.py` (Cobb-Douglas DGP)
|
|
253
|
+
- [ ] 評価指標 (RMSE, rank correlation)
|
|
254
|
+
- [ ] ベンチマーク: pySFA / statsmodels との比較
|
|
255
|
+
|
|
256
|
+
### Phase 2: NNFrontierSFA
|
|
257
|
+
- [ ] 2段階NN-SFA (MLP frontier + parametric decomposition)
|
|
258
|
+
- [ ] 単調性制約 (penalty-based)
|
|
259
|
+
- [ ] 同時推定NN-SFA (custom loss)
|
|
260
|
+
- [ ] 非線形DGPでのベンチマーク
|
|
261
|
+
|
|
262
|
+
### Phase 3: GPFrontierSFA
|
|
263
|
+
- [ ] GPyTorchベースのフロンティア推定
|
|
264
|
+
- [ ] カスタムSFA尤度
|
|
265
|
+
- [ ] 効率性の事後分布
|
|
266
|
+
|
|
267
|
+
### Phase 4: KernelSFA + 比較
|
|
268
|
+
- [ ] 局所多項式SFA
|
|
269
|
+
- [ ] 全モデルの体系的比較
|
|
270
|
+
- [ ] 論文用の実験設計
|
|
271
|
+
|
|
272
|
+
---
|
|
273
|
+
|
|
274
|
+
## 6. 技術的課題と対策
|
|
275
|
+
|
|
276
|
+
### 6.1 誤差分解の識別問題
|
|
277
|
+
|
|
278
|
+
**問題:** MLフロンティアが柔軟すぎると、非効率性 u を吸収してしまう。
|
|
279
|
+
|
|
280
|
+
**対策:**
|
|
281
|
+
- Shape制約(単調性・凹性)でフロンティアの過適合を防ぐ
|
|
282
|
+
- 正則化パラメータのCV選択
|
|
283
|
+
- シミュレーション実験で分解精度を検証
|
|
284
|
+
- パネルデータの時間構造を活用
|
|
285
|
+
|
|
286
|
+
### 6.2 計算コスト
|
|
287
|
+
|
|
288
|
+
**問題:** GP (O(N³)), MCMC, 大規模NNの計算量。
|
|
289
|
+
|
|
290
|
+
**対策:**
|
|
291
|
+
- GP: 変分推論, inducing points (GPyTorchのScalableGP)
|
|
292
|
+
- NN: ミニバッチ学習, GPU活用 (PyTorch)
|
|
293
|
+
- 中規模データ (N < 10,000) をまず対象
|
|
294
|
+
|
|
295
|
+
### 6.3 経済学的解釈性の保持
|
|
296
|
+
|
|
297
|
+
**問題:** MLモデルはブラックボックスになりがち。
|
|
298
|
+
|
|
299
|
+
**対策:**
|
|
300
|
+
- 部分依存プロット (PDP)
|
|
301
|
+
- 弾力性推定: `∂ln f / ∂ln x_j` の計算
|
|
302
|
+
- 規模の経済性: 弾力性の和
|
|
303
|
+
- 入力ごとの限界効果の可視化
|
|
304
|
+
|
|
305
|
+
---
|
|
306
|
+
|
|
307
|
+
## 7. API設計例
|
|
308
|
+
|
|
309
|
+
```python
|
|
310
|
+
from ml_sfa.models import ParametricSFA, NNFrontierSFA
|
|
311
|
+
from ml_sfa.data import simulate_sfa
|
|
312
|
+
|
|
313
|
+
# シミュレーションデータ
|
|
314
|
+
data = simulate_sfa(n_obs=500, n_inputs=3, frontier_type="cobb-douglas",
|
|
315
|
+
inefficiency_dist="half-normal", sigma_v=0.1, sigma_u=0.2, seed=42)
|
|
316
|
+
|
|
317
|
+
# 伝統的SFA
|
|
318
|
+
model_param = ParametricSFA(frontier="cobb-douglas", inefficiency="half-normal")
|
|
319
|
+
model_param.fit(data.X, data.y)
|
|
320
|
+
te_param = model_param.efficiency(data.X, data.y)
|
|
321
|
+
|
|
322
|
+
# NN-SFA
|
|
323
|
+
model_nn = NNFrontierSFA(hidden_layers=[64, 32], monotonic=True,
|
|
324
|
+
inefficiency="half-normal", epochs=200)
|
|
325
|
+
model_nn.fit(data.X, data.y)
|
|
326
|
+
te_nn = model_nn.efficiency(data.X, data.y)
|
|
327
|
+
|
|
328
|
+
# モデル比較
|
|
329
|
+
from ml_sfa.evaluation import compare_models
|
|
330
|
+
results = compare_models(
|
|
331
|
+
models={"Parametric": model_param, "NN": model_nn},
|
|
332
|
+
X=data.X, y=data.y, true_te=data.te,
|
|
333
|
+
)
|
|
334
|
+
print(results.summary())
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
---
|
|
338
|
+
|
|
339
|
+
## 8. 参考文献
|
|
340
|
+
|
|
341
|
+
### 基礎
|
|
342
|
+
- Aigner, Lovell, Schmidt (1977). Formulation and Estimation of Stochastic Frontier Production Function Models. *J. Econometrics*
|
|
343
|
+
- Meeusen, van den Broeck (1977). Efficiency Estimation from Cobb-Douglas Production Functions with Composed Error. *Int. Econ. Rev.*
|
|
344
|
+
- Jondrow et al. (1982). On the Estimation of Technical Inefficiency in the SFA Model. *J. Econometrics*
|
|
345
|
+
|
|
346
|
+
### NN-SFA
|
|
347
|
+
- Pendharkar (2023). RBF Neural Network for Stochastic Frontier Analyses. *Neural Processing Letters*
|
|
348
|
+
- Kutlu (2024). A Machine Learning Approach to Stochastic Frontier Modeling. *Research Square*
|
|
349
|
+
- Tsionas, Parmeter, Zelenyuk (2023). Bayesian ANN for Frontier Efficiency Analysis. *J. Econometrics*
|
|
350
|
+
- Zhao (2024). xNN-SF: Explainable Neural Network Inspired by SFA. *ICIC 2024*
|
|
351
|
+
|
|
352
|
+
### ノンパラメトリック
|
|
353
|
+
- Fan, Li, Weersink (1996). Semiparametric Estimation of Stochastic Production Frontier Models. *JBES*
|
|
354
|
+
- Kumbhakar, Park, Simar, Tsionas (2007). Nonparametric Stochastic Frontiers: Local MLE. *J. Econometrics*
|
|
355
|
+
- Kuosmanen, Kortelainen (2012). StoNED. *J. Productivity Analysis*
|
|
356
|
+
|
|
357
|
+
### ツリー・その他
|
|
358
|
+
- Ferrara, Vidoli (2024). BART for SFA. *ECOSTA*
|
|
359
|
+
- Esteve et al. (2020). Efficiency Analysis Trees. *Expert Systems with Applications*
|
|
360
|
+
- Zheng et al. (2024). Robust Nonparametric SFA. *arXiv:2404.04301*
|
|
361
|
+
- GeMA (2025). Learning Latent Manifold Frontiers. *arXiv:2603.16729*
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2026-03-22
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
#### Phase 1: Parametric SFA
|
|
15
|
+
- `BaseSFAEstimator` abstract base class with sklearn-compatible API
|
|
16
|
+
- `ParametricSFA` estimator with MLE (Cobb-Douglas / Translog frontiers)
|
|
17
|
+
- Inefficiency distributions: HalfNormal, TruncatedNormal, Exponential with JLMS formulas
|
|
18
|
+
- SFA data simulator (`simulate_sfa`) with configurable DGPs
|
|
19
|
+
- Evaluation metrics: `rmse_efficiency`, `rank_correlation`, `aic`, `bic`, `frontier_mse`, `coverage_rate`
|
|
20
|
+
|
|
21
|
+
#### Phase 2: Neural Network SFA
|
|
22
|
+
- `NNFrontierSFA` estimator with joint NN frontier + SFA error decomposition (PyTorch)
|
|
23
|
+
- Two-phase optimization: MSE pretraining → SFA NLL fine-tuning (L-BFGS)
|
|
24
|
+
- `MonotonicMLP` for architectural monotonicity constraints
|
|
25
|
+
- PyTorch SFA loss functions for all 3 distributions
|
|
26
|
+
|
|
27
|
+
#### Phase 3: BART SFA
|
|
28
|
+
- `BARTFrontierSFA` estimator with PyMC-BART and data-augmented MCMC
|
|
29
|
+
- Bayesian TE credible intervals via `credible_interval()`
|
|
30
|
+
- Support for half-normal, exponential, truncated-normal inefficiency
|
|
31
|
+
|
|
32
|
+
#### Phase 4: Kernel SFA + Model Comparison
|
|
33
|
+
- `KernelSFA` estimator with local polynomial MLE (Gaussian kernel)
|
|
34
|
+
- Nonlinear DGP (`frontier_type="nonlinear"`) for ML method validation
|
|
35
|
+
- Model comparison framework: `compare_models()`, `run_benchmark()`
|
|
36
|
+
- Interactive showcase notebook (`notebooks/model_showcase.ipynb`)
|
|
37
|
+
|
|
38
|
+
#### Infrastructure
|
|
39
|
+
- Project structure with src layout and scikit-learn compatible API design
|
|
40
|
+
- Research survey: ML approaches to Stochastic Frontier Analysis
|
|
41
|
+
- Model selection analysis (9 models × 7 challenges)
|
|
42
|
+
- Detailed design documents for Joint NN-SFA and BART-SFM
|
|
43
|
+
- CI/CD pipeline (ci.yml, release.yml, auto-release.yml)
|
|
44
|
+
- PyPI publishing via OIDC trusted publisher
|
|
45
|
+
- 220 tests, 93% coverage, mypy strict, ruff clean
|
ml_sfa-0.1.0/Makefile
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
PACKAGE := ml_sfa
|
|
2
|
+
|
|
3
|
+
.PHONY: install test lint format format-check typecheck ci build clean
|
|
4
|
+
|
|
5
|
+
install:
|
|
6
|
+
uv sync --frozen --dev
|
|
7
|
+
|
|
8
|
+
test:
|
|
9
|
+
uv run pytest --cov=$(PACKAGE) --cov-fail-under=80 -q
|
|
10
|
+
|
|
11
|
+
lint:
|
|
12
|
+
uv run ruff check .
|
|
13
|
+
|
|
14
|
+
format:
|
|
15
|
+
uv run ruff format .
|
|
16
|
+
|
|
17
|
+
format-check:
|
|
18
|
+
uv run ruff format --check .
|
|
19
|
+
|
|
20
|
+
typecheck:
|
|
21
|
+
uv run mypy src/$(PACKAGE)/
|
|
22
|
+
|
|
23
|
+
ci: lint format-check typecheck test
|
|
24
|
+
|
|
25
|
+
build:
|
|
26
|
+
uv build
|
|
27
|
+
uv run twine check dist/*
|
|
28
|
+
|
|
29
|
+
clean:
|
|
30
|
+
rm -rf dist/ build/ *.egg-info
|
|
31
|
+
find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
|
|
32
|
+
rm -rf .pytest_cache .mypy_cache .ruff_cache .coverage htmlcov/
|