matching-pmh 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. matching_pmh-0.6.0/.gitignore +15 -0
  2. matching_pmh-0.6.0/CHANGELOG.md +30 -0
  3. matching_pmh-0.6.0/CITATION.cff +25 -0
  4. matching_pmh-0.6.0/GITHUB_SETUP.md +61 -0
  5. matching_pmh-0.6.0/LICENSE +21 -0
  6. matching_pmh-0.6.0/PKG-INFO +170 -0
  7. matching_pmh-0.6.0/PUBLISHING.md +143 -0
  8. matching_pmh-0.6.0/README.md +112 -0
  9. matching_pmh-0.6.0/SEPARATION.md +24 -0
  10. matching_pmh-0.6.0/docs/cli.md +31 -0
  11. matching_pmh-0.6.0/docs/datasets.md +20 -0
  12. matching_pmh-0.6.0/docs/estimators/d1.md +23 -0
  13. matching_pmh-0.6.0/docs/estimators/d2.md +10 -0
  14. matching_pmh-0.6.0/docs/estimators/d3.md +11 -0
  15. matching_pmh-0.6.0/docs/estimators/d4.md +17 -0
  16. matching_pmh-0.6.0/docs/estimators/d5.md +8 -0
  17. matching_pmh-0.6.0/docs/estimators/d6.md +8 -0
  18. matching_pmh-0.6.0/docs/estimators/d7.md +23 -0
  19. matching_pmh-0.6.0/docs/estimators/index.md +33 -0
  20. matching_pmh-0.6.0/docs/getting-started.md +39 -0
  21. matching_pmh-0.6.0/docs/index.md +28 -0
  22. matching_pmh-0.6.0/docs/integrations-hf-trainer.md +34 -0
  23. matching_pmh-0.6.0/docs/integrations-hf.md +40 -0
  24. matching_pmh-0.6.0/docs/integrations-lightning.md +27 -0
  25. matching_pmh-0.6.0/docs/integrations.md +20 -0
  26. matching_pmh-0.6.0/docs/nuisance_types.md +60 -0
  27. matching_pmh-0.6.0/docs/sklearn.md +32 -0
  28. matching_pmh-0.6.0/docs/training.md +21 -0
  29. matching_pmh-0.6.0/examples/01_domain_shift_d4.py +58 -0
  30. matching_pmh-0.6.0/examples/02_save_load_artifact.py +31 -0
  31. matching_pmh-0.6.0/examples/03_compositional_d5.py +22 -0
  32. matching_pmh-0.6.0/examples/04_falsification_controls.py +27 -0
  33. matching_pmh-0.6.0/examples/05_yaml_config.py +29 -0
  34. matching_pmh-0.6.0/examples/06_office31_sklearn.py +141 -0
  35. matching_pmh-0.6.0/examples/07_vision_multilayer.py +77 -0
  36. matching_pmh-0.6.0/examples/08_hf_style_d7.py +129 -0
  37. matching_pmh-0.6.0/examples/09_lightning_module.py +70 -0
  38. matching_pmh-0.6.0/examples/10_hf_trainer.py +82 -0
  39. matching_pmh-0.6.0/examples/11_dpo_lora_style_pmh.py +241 -0
  40. matching_pmh-0.6.0/examples/configs/d4_estimate.json +8 -0
  41. matching_pmh-0.6.0/examples/configs/d7_style_estimate.json +11 -0
  42. matching_pmh-0.6.0/examples/configs/dpo_train_job.json +18 -0
  43. matching_pmh-0.6.0/examples/data/preference_pairs_sample.jsonl +2 -0
  44. matching_pmh-0.6.0/examples/data/style_pairs_sample.jsonl +2 -0
  45. matching_pmh-0.6.0/examples/minimal_loop.py +41 -0
  46. matching_pmh-0.6.0/mkdocs.yml +39 -0
  47. matching_pmh-0.6.0/pyproject.toml +66 -0
  48. matching_pmh-0.6.0/scripts/preflight_release.ps1 +10 -0
  49. matching_pmh-0.6.0/scripts/upload_pypi.ps1 +69 -0
  50. matching_pmh-0.6.0/src/pmh/__init__.py +55 -0
  51. matching_pmh-0.6.0/src/pmh/_tensor.py +37 -0
  52. matching_pmh-0.6.0/src/pmh/artifact.py +99 -0
  53. matching_pmh-0.6.0/src/pmh/baselines/__init__.py +5 -0
  54. matching_pmh-0.6.0/src/pmh/baselines/coral.py +37 -0
  55. matching_pmh-0.6.0/src/pmh/catalog.py +130 -0
  56. matching_pmh-0.6.0/src/pmh/cli/__init__.py +5 -0
  57. matching_pmh-0.6.0/src/pmh/cli/main.py +231 -0
  58. matching_pmh-0.6.0/src/pmh/config.py +145 -0
  59. matching_pmh-0.6.0/src/pmh/controls.py +35 -0
  60. matching_pmh-0.6.0/src/pmh/datasets/__init__.py +5 -0
  61. matching_pmh-0.6.0/src/pmh/datasets/office31.py +103 -0
  62. matching_pmh-0.6.0/src/pmh/diagnostics.py +21 -0
  63. matching_pmh-0.6.0/src/pmh/estimate.py +156 -0
  64. matching_pmh-0.6.0/src/pmh/estimators/__init__.py +19 -0
  65. matching_pmh-0.6.0/src/pmh/estimators/d1_subspace.py +44 -0
  66. matching_pmh-0.6.0/src/pmh/estimators/d2_isotropic.py +22 -0
  67. matching_pmh-0.6.0/src/pmh/estimators/d3_augmentation.py +37 -0
  68. matching_pmh-0.6.0/src/pmh/estimators/d4_domain.py +40 -0
  69. matching_pmh-0.6.0/src/pmh/estimators/d5_compositional.py +28 -0
  70. matching_pmh-0.6.0/src/pmh/estimators/d6_temporal.py +36 -0
  71. matching_pmh-0.6.0/src/pmh/estimators/d7_alignment.py +66 -0
  72. matching_pmh-0.6.0/src/pmh/features.py +59 -0
  73. matching_pmh-0.6.0/src/pmh/integrations/__init__.py +31 -0
  74. matching_pmh-0.6.0/src/pmh/integrations/hf_trainer.py +170 -0
  75. matching_pmh-0.6.0/src/pmh/integrations/huggingface.py +304 -0
  76. matching_pmh-0.6.0/src/pmh/integrations/lightning.py +90 -0
  77. matching_pmh-0.6.0/src/pmh/integrations/torch.py +166 -0
  78. matching_pmh-0.6.0/src/pmh/numpy_api.py +152 -0
  79. matching_pmh-0.6.0/src/pmh/penalty.py +138 -0
  80. matching_pmh-0.6.0/src/pmh/preflight.py +35 -0
  81. matching_pmh-0.6.0/src/pmh/sklearn_match.py +80 -0
  82. matching_pmh-0.6.0/src/pmh/training.py +92 -0
  83. matching_pmh-0.6.0/src/pmh/vision/__init__.py +5 -0
  84. matching_pmh-0.6.0/src/pmh/vision/multilayer.py +136 -0
  85. matching_pmh-0.6.0/tests/test_cli.py +87 -0
  86. matching_pmh-0.6.0/tests/test_config_io.py +48 -0
  87. matching_pmh-0.6.0/tests/test_estimate.py +41 -0
  88. matching_pmh-0.6.0/tests/test_penalty.py +45 -0
  89. matching_pmh-0.6.0/tests/test_training.py +28 -0
  90. matching_pmh-0.6.0/tests/test_v03.py +62 -0
  91. matching_pmh-0.6.0/tests/test_v04.py +112 -0
  92. matching_pmh-0.6.0/tests/test_v05.py +85 -0
@@ -0,0 +1,15 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ .pytest_cache/
4
+ .ruff_cache/
5
+ dist/
6
+ *.egg-info/
7
+ .venv/
8
+ .venv-pypi-test/
9
+ .venv-pypi-verify/
10
+ venv/
11
+ artifacts/
12
+ tmp_trainer/
13
+ tmp_pmh_trainer/
14
+ *.pt
15
+ .DS_Store
@@ -0,0 +1,30 @@
1
+ # Changelog
2
+
3
+ ## 0.6.0
4
+
5
+ - **`pmh-train` CLI**: `list-methods`, `estimate --config job.json`, `preflight`, `run --config job.json`.
6
+ - **`pmh.catalog`**: D1–D7 input requirements and job validation.
7
+ - **Example 11**: Qwen/T7A JSONL (`style_pairs` + `preference_pairs`), optional LoRA + DPO+PMH demo.
8
+ - **Bundled samples**: `examples/data/*.jsonl`, `examples/configs/*.json`.
9
+ - **HF**: `load_preference_pairs_jsonl` for DPO schema.
10
+ - **Publishing**: TestPyPI workflow (`publish-testpypi.yml`), updated `PUBLISHING.md`.
11
+
12
+ ## 0.5.0
13
+
14
+ - HF `PMHTrainer`, CORAL baseline, CI matrix, Office-31 example.
15
+
16
+ ## 0.4.0
17
+
18
+ - Hugging Face D7, Lightning callback, Office-31 loader.
19
+
20
+ ## 0.3.0
21
+
22
+ - Torch/sklearn/vision integrations, MkDocs.
23
+
24
+ ## 0.2.0
25
+
26
+ - Artifacts, `PMHLoss`, configs.
27
+
28
+ ## 0.1.0
29
+
30
+ - Core estimators D1–D7 and penalties.
@@ -0,0 +1,25 @@
1
+ cff-version: 1.2.0
2
+ title: "matching-pmh: reference library for the matching principle"
3
+ message: "If you use this software, please cite the matching-principle paper."
4
+ type: software
5
+ authors:
6
+ - family-names: Rajput
7
+ given-names: Vishal
8
+ repository-code: "https://github.com/matching-pmh/matching-pmh"
9
+ url: "https://github.com/matching-pmh/matching-pmh"
10
+ license: MIT
11
+ version: 0.6.0
12
+ keywords:
13
+ - robustness
14
+ - domain-adaptation
15
+ - representation-learning
16
+ - jacobian-regularization
17
+ - sigma-task
18
+ preferred-citation:
19
+ type: article
20
+ title: "The Matching Principle: A Geometric Theory of Loss Functions for Nuisance-Robust Representation Learning"
21
+ authors:
22
+ - family-names: Rajput
23
+ given-names: Vishal
24
+ year: 2026
25
+ notes: "JMLR submission; update journal fields when accepted."
@@ -0,0 +1,61 @@
1
+ # Publish `matching-pmh` on GitHub
2
+
3
+ The library lives only in this folder (`Desktop/matching-pmh`), not inside the paper repo.
4
+
5
+ ## 1. Create the remote repository
6
+
7
+ On GitHub: **New repository** → name `matching-pmh` → public → **no** README (this tree has one).
8
+
9
+ Or install [GitHub CLI](https://cli.github.com/) and run:
10
+
11
+ ```powershell
12
+ cd C:\Users\Eigenaar\Desktop\matching-pmh
13
+ gh auth login
14
+ gh repo create matching-pmh --public --source=. --remote=origin
15
+ ```
16
+
17
+ ## 2. Publish on PyPI (full package)
18
+
19
+ See **`PUBLISHING.md`** for the complete guide. Summary:
20
+
21
+ 1. Build: `python -m build` && `twine check dist/*`
22
+ 2. Upload: `twine upload dist/*` (or tag `v0.6.0` + trusted publishing on GitHub)
23
+ 3. Users install: `pip install matching-pmh`
24
+
25
+ The name `matching-pmh` is **not yet on PyPI** until you upload once.
26
+
27
+ ## 3. First commit and push
28
+
29
+ ```powershell
30
+ cd C:\Users\Eigenaar\Desktop\matching-pmh
31
+ git init
32
+ git add .
33
+ git commit -m "Initial release: estimate_sigma_task (D1-D7) and PMH penalties (v0.1.0)"
34
+ git branch -M main
35
+ git remote add origin https://github.com/YOUR_USERNAME/matching-pmh.git
36
+ git push -u origin main
37
+ ```
38
+
39
+ Replace `YOUR_USERNAME` with your GitHub account. Update `CITATION.cff` and `pyproject.toml` `[project.urls]` if the URL differs.
40
+
41
+ ## 4. Paper cross-link
42
+
43
+ After the repo exists, set the URL in the manuscript macro (once):
44
+
45
+ `submission_grand_unification/macros.tex` → `\MatchingPmhRepoUrl{https://github.com/YOUR_USERNAME/matching-pmh}`
46
+
47
+ Then rebuild the PDF (`pdflatex` ×2 in `submission_grand_unification/`).
48
+
49
+ ## 4. Remove duplicate from paper tree
50
+
51
+ Delete the old copy under `Paper2/pmh/` (only a stub README should remain there).
52
+
53
+ ## 5. Optional: PyPI
54
+
55
+ When ready for `pip install matching-pmh`:
56
+
57
+ ```powershell
58
+ pip install build twine
59
+ python -m build
60
+ twine upload dist/*
61
+ ```
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Vishal Rajput
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,170 @@
1
+ Metadata-Version: 2.4
2
+ Name: matching-pmh
3
+ Version: 0.6.0
4
+ Summary: Reference library for the matching principle: estimate Sigma_task (D1-D7) and matched PMH penalties
5
+ Project-URL: Homepage, https://github.com/matching-pmh/matching-pmh
6
+ Project-URL: Documentation, https://github.com/matching-pmh/matching-pmh#readme
7
+ Project-URL: Repository, https://github.com/matching-pmh/matching-pmh
8
+ Project-URL: Issues, https://github.com/matching-pmh/matching-pmh/issues
9
+ Project-URL: Changelog, https://github.com/matching-pmh/matching-pmh/blob/main/CHANGELOG.md
10
+ Author: Vishal Rajput
11
+ License-Expression: MIT
12
+ License-File: LICENSE
13
+ Keywords: domain-adaptation,jacobian,pmh,representation-learning,robustness
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: numpy>=1.24
24
+ Requires-Dist: torch>=2.1
25
+ Provides-Extra: all
26
+ Requires-Dist: accelerate>=0.25; extra == 'all'
27
+ Requires-Dist: datasets>=2.14; extra == 'all'
28
+ Requires-Dist: lightning>=2.1; extra == 'all'
29
+ Requires-Dist: mkdocs-material>=9.0; extra == 'all'
30
+ Requires-Dist: mkdocs>=1.5; extra == 'all'
31
+ Requires-Dist: peft>=0.7; extra == 'all'
32
+ Requires-Dist: pytest>=8.0; extra == 'all'
33
+ Requires-Dist: ruff>=0.4; extra == 'all'
34
+ Requires-Dist: scikit-learn>=1.3; extra == 'all'
35
+ Requires-Dist: torchvision>=0.16; extra == 'all'
36
+ Requires-Dist: transformers>=4.36; extra == 'all'
37
+ Provides-Extra: dev
38
+ Requires-Dist: pytest>=8.0; extra == 'dev'
39
+ Requires-Dist: ruff>=0.4; extra == 'dev'
40
+ Provides-Extra: docs
41
+ Requires-Dist: mkdocs-material>=9.0; extra == 'docs'
42
+ Requires-Dist: mkdocs>=1.5; extra == 'docs'
43
+ Provides-Extra: hf
44
+ Requires-Dist: accelerate>=0.25; extra == 'hf'
45
+ Requires-Dist: transformers>=4.36; extra == 'hf'
46
+ Provides-Extra: hf-lora
47
+ Requires-Dist: accelerate>=0.25; extra == 'hf-lora'
48
+ Requires-Dist: datasets>=2.14; extra == 'hf-lora'
49
+ Requires-Dist: peft>=0.7; extra == 'hf-lora'
50
+ Requires-Dist: transformers>=4.36; extra == 'hf-lora'
51
+ Provides-Extra: lightning
52
+ Requires-Dist: lightning>=2.1; extra == 'lightning'
53
+ Provides-Extra: sklearn
54
+ Requires-Dist: scikit-learn>=1.3; extra == 'sklearn'
55
+ Provides-Extra: vision
56
+ Requires-Dist: torchvision>=0.16; extra == 'vision'
57
+ Description-Content-Type: text/markdown
58
+
59
+ # matching-pmh
60
+
61
+ **Independent library** for the *matching principle*: estimate $\Sigma_{\mathrm{task}}$ (Lemmas D1–D7), run matched PMH penalties, save/load artifacts, and wire into training loops.
62
+
63
+ Paper: *The Matching Principle* (separate repository). **v0.2** adds typed configs, artifacts, pre-flight eigengap, `PMHLoss`, and `collect_features`.
64
+
65
+ ## Install
66
+
67
+ ```bash
68
+ cd matching-pmh
69
+ pip install -e ".[dev]"
70
+ pytest
71
+ ```
72
+
73
+ ## Quick start (v0.2)
74
+
75
+ ```python
76
+ import torch
77
+ from pmh import (
78
+ SigmaTaskConfig,
79
+ PMHConfig,
80
+ PMHLoss,
81
+ estimate_from_config,
82
+ collect_features,
83
+ )
84
+
85
+ # 1) Estimate + diagnostics
86
+ cfg = SigmaTaskConfig.for_domain(rank=64)
87
+ artifact = estimate_from_config(cfg, source_feats, target_feats)
88
+ print(artifact.preflight, artifact.eigengap) # pass | marginal | fail
89
+
90
+ # 2) Save for another job / machine
91
+ artifact.save("checkpoints/style_sigma")
92
+
93
+ # 3) Train
94
+ pmh = PMHLoss(artifact, PMHConfig(weight=0.3, cap_ratio=0.3, warmup_epochs=2))
95
+ h = backbone(x)
96
+ task_loss = ...
97
+ total, pmh_term = pmh.capped_total(task_loss, h)
98
+ ```
99
+
100
+ ### Legacy one-liner (still supported)
101
+
102
+ ```python
103
+ from pmh import estimate_sigma_task, pmh_penalty_on_rep
104
+
105
+ sigma = estimate_sigma_task(src, tgt, method="D4", rank=64)
106
+ pen = pmh_penalty_on_rep(h, sigma)
107
+ ```
108
+
109
+ ### Load a saved estimate
110
+
111
+ ```python
112
+ from pmh import SigmaTaskEstimate, PMHLoss
113
+
114
+ artifact = SigmaTaskEstimate.load("checkpoints/style_sigma.pt")
115
+ pmh = PMHLoss(artifact)
116
+ ```
117
+
118
+ ## Examples
119
+
120
+ | Script | What it shows |
121
+ |--------|----------------|
122
+ | `examples/01_domain_shift_d4.py` | `collect_features` + D4 + `PMHLoss` training |
123
+ | `examples/02_save_load_artifact.py` | `.pt` + `.json` artifact I/O |
124
+ | `examples/03_compositional_d5.py` | D5 coordinate-block $\Sigma$ |
125
+ | `examples/04_falsification_controls.py` | matched / wrong-W / isotropic modes |
126
+ | `examples/05_yaml_config.py` | JSON job dict → configs |
127
+ | `examples/minimal_loop.py` | Short end-to-end loop |
128
+ | `examples/06_office31_sklearn.py` | D1 + `MatchedSubspaceProjector` + logistic |
129
+ | `examples/07_vision_multilayer.py` | `MultiLayerPMHLoss` + per-layer Gram noise |
130
+
131
+ ## API map
132
+
133
+ | Goal | API |
134
+ |------|-----|
135
+ | Pick estimator | `SigmaTaskConfig.for_domain()`, `.for_isotropic()`, … |
136
+ | Estimate | `estimate_from_config(cfg, ...)` → `SigmaTaskEstimate` |
137
+ | Pre-flight | `artifact.preflight`, `preflight_eigengap(cov, rank)` |
138
+ | Train | `PMHLoss(artifact, PMHConfig(...))` |
139
+ | Controls | `PMHLoss(..., mode="wrong_w")`, `signal_W_projector` |
140
+ | Data hook | `collect_features(encoder, loader)` |
141
+
142
+ ## Estimators (`method=`)
143
+
144
+ | Method | Lemma | Config helper |
145
+ |--------|-------|----------------|
146
+ | D1 | Subspace SVD | `for_subspace(rank=)` |
147
+ | D2 | Isotropic | `for_isotropic(dim, noise_level)` |
148
+ | D3 | Aug modes | `for_augmentation()` |
149
+ | D4 | Domain Gram | `for_domain(rank=)` |
150
+ | D5 | Compositional | `for_compositional(indices)` |
151
+ | D6 | Temporal | `for_temporal()` |
152
+ | D7 | Style / alignment | `for_alignment(rank=)` |
153
+
154
+ ## Status
155
+
156
+ **0.5.0** — `PMHTrainer` (HF), CORAL baseline, GitHub Actions CI + PyPI publish guide.
157
+
158
+ | Extra | Install | Example |
159
+ |-------|---------|---------|
160
+ | HF Trainer | `pip install "matching-pmh[hf]"` | `examples/10_hf_trainer.py` |
161
+ | CORAL baseline | `pmh.baselines.coral` | `examples/06_office31_sklearn.py` |
162
+ | CI / PyPI | see `PUBLISHING.md` | tag `v0.5.0` |
163
+
164
+ **0.4.0** — Hugging Face D7 (`estimate_style_sigma`), Lightning (`add_pmh_to_loss`), Office-31 features (`--office31-root`).
165
+
166
+ | Extra | Install | Example |
167
+ |-------|---------|---------|
168
+ | HF | `pip install "matching-pmh[hf]"` | `examples/08_hf_style_d7.py` |
169
+ | Lightning | `pip install "matching-pmh[lightning]"` | `examples/09_lightning_module.py` |
170
+ | Vision / Office-31 | `pip install "matching-pmh[vision]"` | `examples/06_office31_sklearn.py --office31-root ...` |
@@ -0,0 +1,143 @@
1
+ # Publishing `matching-pmh` on PyPI (production)
2
+
3
+ **PyPI name:** `matching-pmh` (available — not taken yet)
4
+ **Import:** `import pmh`
5
+ **CLI:** `pmh-train`
6
+
7
+ After a successful release, anyone can install with:
8
+
9
+ ```bash
10
+ pip install matching-pmh
11
+ pip install "matching-pmh[hf]" # Hugging Face D7 / Trainer
12
+ pip install "matching-pmh[hf-lora]" # + PEFT for example 11
13
+ ```
14
+
15
+ ---
16
+
17
+ ## One-time setup (≈15 minutes)
18
+
19
+ ### 1. Accounts
20
+
21
+ 1. [pypi.org](https://pypi.org/account/register/) — production
22
+ 2. [test.pypi.org](https://test.pypi.org/account/register/) — optional dry run
23
+ 3. [github.com](https://github.com) — source + trusted publishing
24
+
25
+ ### 2. Reserve the project name on PyPI
26
+
27
+ On [pypi.org/manage/projects/](https://pypi.org/manage/projects/), the **first successful upload** of `matching-pmh` claims the name. Upload once manually or via CI (below).
28
+
29
+ ### 3. Trusted publishing (recommended — no long-lived API token in GitHub)
30
+
31
+ **On PyPI** → Account settings → **Publishing** → Add pending publisher:
32
+
33
+ | Field | Value |
34
+ |--------|--------|
35
+ | PyPI project name | `matching-pmh` |
36
+ | Owner | your GitHub user or org |
37
+ | Repository | `matching-pmh` |
38
+ | Workflow name | `ci.yml` |
39
+ | Environment name | *(leave empty unless you use one)* |
40
+
41
+ Repeat for **TestPyPI** if you use `publish-testpypi.yml` (workflow name `publish-testpypi.yml`).
42
+
43
+ ### 4. GitHub repository
44
+
45
+ ```powershell
46
+ cd C:\Users\Eigenaar\Desktop\matching-pmh
47
+ gh auth login
48
+ gh repo create matching-pmh --public --source=. --remote=origin
49
+ git add .
50
+ git commit -m "Release matching-pmh 0.6.0: D1-D7 estimators, PMH, pmh-train CLI"
51
+ git branch -M main
52
+ git push -u origin main
53
+ ```
54
+
55
+ Update `pyproject.toml` and `CITATION.cff` `[project.urls]` / `repository-code` if your URL is not `github.com/matching-pmh/matching-pmh`.
56
+
57
+ ---
58
+
59
+ ## Path A — Publish from your machine (first time, fastest)
60
+
61
+ ```powershell
62
+ cd C:\Users\Eigenaar\Desktop\matching-pmh
63
+ pip install build twine
64
+ python -m build
65
+ twine check dist/*
66
+
67
+ # Optional dry run on TestPyPI first:
68
+ # twine upload --repository testpypi dist/*
69
+ # pip install -i https://test.pypi.org/simple/ matching-pmh==0.6.0
70
+
71
+ # Production PyPI (you will be prompted for API token; use scope "Entire account" or project-scoped):
72
+ twine upload dist/*
73
+ ```
74
+
75
+ Create an API token on your [account settings](https://pypi.org/manage/account/) page → **API tokens** → **Add API token** (scope: entire account for first upload, or project **matching-pmh** after it exists).
76
+
77
+ Verify:
78
+
79
+ ```powershell
80
+ pip install matching-pmh==0.6.0
81
+ pmh-train list-methods
82
+ python -c "import pmh; print(pmh.__version__)"
83
+ ```
84
+
85
+ ---
86
+
87
+ ## Path B — Publish via GitHub tag (after trusted publishing is configured)
88
+
89
+ ```powershell
90
+ git tag v0.6.0
91
+ git push origin v0.6.0
92
+ ```
93
+
94
+ Workflow `.github/workflows/ci.yml` runs tests, builds, and publishes to **pypi.org** on tags `v*`.
95
+
96
+ ---
97
+
98
+ ## Version bump checklist (every release)
99
+
100
+ 1. `pyproject.toml` → `version`
101
+ 2. `src/pmh/__init__.py` → `__version__`
102
+ 3. `CITATION.cff` → `version`
103
+ 4. `CHANGELOG.md` → new section
104
+ 5. `git tag vX.Y.Z && git push origin vX.Y.Z`
105
+
106
+ ---
107
+
108
+ ## Fallback: API token in GitHub Actions
109
+
110
+ If trusted publishing is not set up, add repo secret `PYPI_API_TOKEN` and the publish step still works with `pypa/gh-action-pypi-publish`.
111
+
112
+ ---
113
+
114
+ ## Paper cross-link
115
+
116
+ Set in `submission_grand_unification/macros.tex`:
117
+
118
+ ```latex
119
+ \MatchingPmhRepoUrl{https://github.com/YOUR_USER/matching-pmh}
120
+ ```
121
+
122
+ Add to reproducibility text: `pip install matching-pmh` (PyPI).
123
+
124
+ ---
125
+
126
+ ## Local preflight (before any upload)
127
+
128
+ ```powershell
129
+ pip install build twine
130
+ python -m build
131
+ twine check dist/*
132
+ pip install dist\matching_pmh-0.6.0-py3-none-any.whl
133
+ pmh-train list-methods
134
+ pytest -q
135
+ ```
136
+
137
+ ---
138
+
139
+ ## Notes
140
+
141
+ - **Torch** is a core dependency; wheels are large — that is normal for ML libraries on PyPI.
142
+ - The name `pmh` on PyPI may be taken by unrelated projects; this package is **`matching-pmh`** only.
143
+ - Do not commit `dist/`, `artifacts/`, or API tokens.
@@ -0,0 +1,112 @@
1
+ # matching-pmh
2
+
3
+ **Independent library** for the *matching principle*: estimate $\Sigma_{\mathrm{task}}$ (Lemmas D1–D7), run matched PMH penalties, save/load artifacts, and wire into training loops.
4
+
5
+ Paper: *The Matching Principle* (separate repository). **v0.2** adds typed configs, artifacts, pre-flight eigengap, `PMHLoss`, and `collect_features`.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ cd matching-pmh
11
+ pip install -e ".[dev]"
12
+ pytest
13
+ ```
14
+
15
+ ## Quick start (v0.2)
16
+
17
+ ```python
18
+ import torch
19
+ from pmh import (
20
+ SigmaTaskConfig,
21
+ PMHConfig,
22
+ PMHLoss,
23
+ estimate_from_config,
24
+ collect_features,
25
+ )
26
+
27
+ # 1) Estimate + diagnostics
28
+ cfg = SigmaTaskConfig.for_domain(rank=64)
29
+ artifact = estimate_from_config(cfg, source_feats, target_feats)
30
+ print(artifact.preflight, artifact.eigengap) # pass | marginal | fail
31
+
32
+ # 2) Save for another job / machine
33
+ artifact.save("checkpoints/style_sigma")
34
+
35
+ # 3) Train
36
+ pmh = PMHLoss(artifact, PMHConfig(weight=0.3, cap_ratio=0.3, warmup_epochs=2))
37
+ h = backbone(x)
38
+ task_loss = ...
39
+ total, pmh_term = pmh.capped_total(task_loss, h)
40
+ ```
41
+
42
+ ### Legacy one-liner (still supported)
43
+
44
+ ```python
45
+ from pmh import estimate_sigma_task, pmh_penalty_on_rep
46
+
47
+ sigma = estimate_sigma_task(src, tgt, method="D4", rank=64)
48
+ pen = pmh_penalty_on_rep(h, sigma)
49
+ ```
50
+
51
+ ### Load a saved estimate
52
+
53
+ ```python
54
+ from pmh import SigmaTaskEstimate, PMHLoss
55
+
56
+ artifact = SigmaTaskEstimate.load("checkpoints/style_sigma.pt")
57
+ pmh = PMHLoss(artifact)
58
+ ```
59
+
60
+ ## Examples
61
+
62
+ | Script | What it shows |
63
+ |--------|----------------|
64
+ | `examples/01_domain_shift_d4.py` | `collect_features` + D4 + `PMHLoss` training |
65
+ | `examples/02_save_load_artifact.py` | `.pt` + `.json` artifact I/O |
66
+ | `examples/03_compositional_d5.py` | D5 coordinate-block $\Sigma$ |
67
+ | `examples/04_falsification_controls.py` | matched / wrong-W / isotropic modes |
68
+ | `examples/05_yaml_config.py` | JSON job dict → configs |
69
+ | `examples/minimal_loop.py` | Short end-to-end loop |
70
+ | `examples/06_office31_sklearn.py` | D1 + `MatchedSubspaceProjector` + logistic |
71
+ | `examples/07_vision_multilayer.py` | `MultiLayerPMHLoss` + per-layer Gram noise |
72
+
73
+ ## API map
74
+
75
+ | Goal | API |
76
+ |------|-----|
77
+ | Pick estimator | `SigmaTaskConfig.for_domain()`, `.for_isotropic()`, … |
78
+ | Estimate | `estimate_from_config(cfg, ...)` → `SigmaTaskEstimate` |
79
+ | Pre-flight | `artifact.preflight`, `preflight_eigengap(cov, rank)` |
80
+ | Train | `PMHLoss(artifact, PMHConfig(...))` |
81
+ | Controls | `PMHLoss(..., mode="wrong_w")`, `signal_W_projector` |
82
+ | Data hook | `collect_features(encoder, loader)` |
83
+
84
+ ## Estimators (`method=`)
85
+
86
+ | Method | Lemma | Config helper |
87
+ |--------|-------|----------------|
88
+ | D1 | Subspace SVD | `for_subspace(rank=)` |
89
+ | D2 | Isotropic | `for_isotropic(dim, noise_level)` |
90
+ | D3 | Aug modes | `for_augmentation()` |
91
+ | D4 | Domain Gram | `for_domain(rank=)` |
92
+ | D5 | Compositional | `for_compositional(indices)` |
93
+ | D6 | Temporal | `for_temporal()` |
94
+ | D7 | Style / alignment | `for_alignment(rank=)` |
95
+
96
+ ## Status
97
+
98
+ **0.5.0** — `PMHTrainer` (HF), CORAL baseline, GitHub Actions CI + PyPI publish guide.
99
+
100
+ | Extra | Install | Example |
101
+ |-------|---------|---------|
102
+ | HF Trainer | `pip install "matching-pmh[hf]"` | `examples/10_hf_trainer.py` |
103
+ | CORAL baseline | `pmh.baselines.coral` | `examples/06_office31_sklearn.py` |
104
+ | CI / PyPI | see `PUBLISHING.md` | tag `v0.5.0` |
105
+
106
+ **0.4.0** — Hugging Face D7 (`estimate_style_sigma`), Lightning (`add_pmh_to_loss`), Office-31 features (`--office31-root`).
107
+
108
+ | Extra | Install | Example |
109
+ |-------|---------|---------|
110
+ | HF | `pip install "matching-pmh[hf]"` | `examples/08_hf_style_d7.py` |
111
+ | Lightning | `pip install "matching-pmh[lightning]"` | `examples/09_lightning_module.py` |
112
+ | Vision / Office-31 | `pip install "matching-pmh[vision]"` | `examples/06_office31_sklearn.py --office31-root ...` |
@@ -0,0 +1,24 @@
1
+ # Repository separation
2
+
3
+ This project **must** live in its own git repository, separate from the paper LaTeX / experiment codebase.
4
+
5
+ ## Do
6
+
7
+ - Version, release, and license **here** only.
8
+ - Keep dependencies minimal (`torch`, `numpy`).
9
+ - Add integrations (Hugging Face, Lightning) in this repo or optional extras.
10
+
11
+ ## Do not
12
+
13
+ - Import from `Paper2/T1`, `T7`, or `submission_grand_unification/`.
14
+ - Copy frozen task JSON or appendix tables into this tree.
15
+ - Block paper submission on library polish.
16
+
17
+ ## Paper ↔ library link
18
+
19
+ In the manuscript: one sentence in the reproducibility paragraph, e.g.
20
+ *“Reference implementation: [matching-pmh](https://github.com/…).”*
21
+
22
+ In this README: paper title + “cite when using the package.”
23
+
24
+ No shared CI, no monorepo requirement.
@@ -0,0 +1,31 @@
1
+ # CLI: `pmh-train`
2
+
3
+ Installed with the package:
4
+
5
+ ```bash
6
+ pip install matching-pmh
7
+ pmh-train list-methods
8
+ ```
9
+
10
+ ## Commands
11
+
12
+ | Command | Purpose |
13
+ |---------|---------|
14
+ | `list-methods` | Table of D1–D7 inputs |
15
+ | `estimate --config job.json` | Run estimator, write `output.pt` + `.json` |
16
+ | `preflight ARTIFACT.pt` | Eigengap $\gamma_r$ diagnostics |
17
+ | `run --config job.json` | Validate training job (artifact + PMH weights) |
18
+
19
+ ## Example jobs
20
+
21
+ - `examples/configs/d4_estimate.json` — domain Gram (numpy paths)
22
+ - `examples/configs/d7_style_estimate.json` — HF style JSONL
23
+ - `examples/configs/dpo_train_job.json` — training recipe after D7 estimate
24
+
25
+ ```bash
26
+ pmh-train estimate --config examples/configs/d7_style_estimate.json
27
+ pmh-train preflight artifacts/d7_style.pt
28
+ pmh-train run --config examples/configs/dpo_train_job.json
29
+ ```
30
+
31
+ Python equivalent: `python -m pmh.cli.main estimate --config ...`
@@ -0,0 +1,20 @@
1
+ # Datasets
2
+
3
+ ## Office-31 features
4
+
5
+ Install: `pip install "matching-pmh[vision]"` (torchvision).
6
+
7
+ ```python
8
+ from pmh.datasets.office31 import extract_office31_features, list_office31_domains
9
+
10
+ x_a, y_a = extract_office31_features("/data/office31", "amazon", max_samples=2000)
11
+ x_d, y_d = extract_office31_features("/data/office31", "dslr", max_samples=2000)
12
+ ```
13
+
14
+ CLI example:
15
+
16
+ ```bash
17
+ python examples/06_office31_sklearn.py --office31-root /data/office31 --source amazon --target dslr
18
+ ```
19
+
20
+ Expect layout: `root/amazon/<class>/...` (ImageFolder).
@@ -0,0 +1,23 @@
1
+ # D1 — Subspace nuisance (Lemma D1)
2
+
3
+ **When:** a low-rank subspace $W$ explains cross-domain variation (digits, domains with class alignment).
4
+
5
+ **Config:** `SigmaTaskConfig.for_subspace(rank=r)`
6
+
7
+ **PyTorch:**
8
+
9
+ ```python
10
+ artifact = estimate_from_config(cfg, h_src, h_tgt) # [N, d] features
11
+ ```
12
+
13
+ **NumPy / sklearn:**
14
+
15
+ ```python
16
+ from pmh.sklearn_match import MatchedSubspaceProjector
17
+ proj = MatchedSubspaceProjector(rank=16).fit(x_a, y_a, x_d, y_d)
18
+ x_matched = proj.transform(x_a)
19
+ ```
20
+
21
+ **Pre-flight:** check `artifact.preflight` — small eigengap $\Rightarrow$ Office-31-style failure.
22
+
23
+ **Example:** `examples/06_office31_sklearn.py`