matching-pmh 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matching_pmh-0.6.0/.gitignore +15 -0
- matching_pmh-0.6.0/CHANGELOG.md +30 -0
- matching_pmh-0.6.0/CITATION.cff +25 -0
- matching_pmh-0.6.0/GITHUB_SETUP.md +61 -0
- matching_pmh-0.6.0/LICENSE +21 -0
- matching_pmh-0.6.0/PKG-INFO +170 -0
- matching_pmh-0.6.0/PUBLISHING.md +143 -0
- matching_pmh-0.6.0/README.md +112 -0
- matching_pmh-0.6.0/SEPARATION.md +24 -0
- matching_pmh-0.6.0/docs/cli.md +31 -0
- matching_pmh-0.6.0/docs/datasets.md +20 -0
- matching_pmh-0.6.0/docs/estimators/d1.md +23 -0
- matching_pmh-0.6.0/docs/estimators/d2.md +10 -0
- matching_pmh-0.6.0/docs/estimators/d3.md +11 -0
- matching_pmh-0.6.0/docs/estimators/d4.md +17 -0
- matching_pmh-0.6.0/docs/estimators/d5.md +8 -0
- matching_pmh-0.6.0/docs/estimators/d6.md +8 -0
- matching_pmh-0.6.0/docs/estimators/d7.md +23 -0
- matching_pmh-0.6.0/docs/estimators/index.md +33 -0
- matching_pmh-0.6.0/docs/getting-started.md +39 -0
- matching_pmh-0.6.0/docs/index.md +28 -0
- matching_pmh-0.6.0/docs/integrations-hf-trainer.md +34 -0
- matching_pmh-0.6.0/docs/integrations-hf.md +40 -0
- matching_pmh-0.6.0/docs/integrations-lightning.md +27 -0
- matching_pmh-0.6.0/docs/integrations.md +20 -0
- matching_pmh-0.6.0/docs/nuisance_types.md +60 -0
- matching_pmh-0.6.0/docs/sklearn.md +32 -0
- matching_pmh-0.6.0/docs/training.md +21 -0
- matching_pmh-0.6.0/examples/01_domain_shift_d4.py +58 -0
- matching_pmh-0.6.0/examples/02_save_load_artifact.py +31 -0
- matching_pmh-0.6.0/examples/03_compositional_d5.py +22 -0
- matching_pmh-0.6.0/examples/04_falsification_controls.py +27 -0
- matching_pmh-0.6.0/examples/05_yaml_config.py +29 -0
- matching_pmh-0.6.0/examples/06_office31_sklearn.py +141 -0
- matching_pmh-0.6.0/examples/07_vision_multilayer.py +77 -0
- matching_pmh-0.6.0/examples/08_hf_style_d7.py +129 -0
- matching_pmh-0.6.0/examples/09_lightning_module.py +70 -0
- matching_pmh-0.6.0/examples/10_hf_trainer.py +82 -0
- matching_pmh-0.6.0/examples/11_dpo_lora_style_pmh.py +241 -0
- matching_pmh-0.6.0/examples/configs/d4_estimate.json +8 -0
- matching_pmh-0.6.0/examples/configs/d7_style_estimate.json +11 -0
- matching_pmh-0.6.0/examples/configs/dpo_train_job.json +18 -0
- matching_pmh-0.6.0/examples/data/preference_pairs_sample.jsonl +2 -0
- matching_pmh-0.6.0/examples/data/style_pairs_sample.jsonl +2 -0
- matching_pmh-0.6.0/examples/minimal_loop.py +41 -0
- matching_pmh-0.6.0/mkdocs.yml +39 -0
- matching_pmh-0.6.0/pyproject.toml +66 -0
- matching_pmh-0.6.0/scripts/preflight_release.ps1 +10 -0
- matching_pmh-0.6.0/scripts/upload_pypi.ps1 +69 -0
- matching_pmh-0.6.0/src/pmh/__init__.py +55 -0
- matching_pmh-0.6.0/src/pmh/_tensor.py +37 -0
- matching_pmh-0.6.0/src/pmh/artifact.py +99 -0
- matching_pmh-0.6.0/src/pmh/baselines/__init__.py +5 -0
- matching_pmh-0.6.0/src/pmh/baselines/coral.py +37 -0
- matching_pmh-0.6.0/src/pmh/catalog.py +130 -0
- matching_pmh-0.6.0/src/pmh/cli/__init__.py +5 -0
- matching_pmh-0.6.0/src/pmh/cli/main.py +231 -0
- matching_pmh-0.6.0/src/pmh/config.py +145 -0
- matching_pmh-0.6.0/src/pmh/controls.py +35 -0
- matching_pmh-0.6.0/src/pmh/datasets/__init__.py +5 -0
- matching_pmh-0.6.0/src/pmh/datasets/office31.py +103 -0
- matching_pmh-0.6.0/src/pmh/diagnostics.py +21 -0
- matching_pmh-0.6.0/src/pmh/estimate.py +156 -0
- matching_pmh-0.6.0/src/pmh/estimators/__init__.py +19 -0
- matching_pmh-0.6.0/src/pmh/estimators/d1_subspace.py +44 -0
- matching_pmh-0.6.0/src/pmh/estimators/d2_isotropic.py +22 -0
- matching_pmh-0.6.0/src/pmh/estimators/d3_augmentation.py +37 -0
- matching_pmh-0.6.0/src/pmh/estimators/d4_domain.py +40 -0
- matching_pmh-0.6.0/src/pmh/estimators/d5_compositional.py +28 -0
- matching_pmh-0.6.0/src/pmh/estimators/d6_temporal.py +36 -0
- matching_pmh-0.6.0/src/pmh/estimators/d7_alignment.py +66 -0
- matching_pmh-0.6.0/src/pmh/features.py +59 -0
- matching_pmh-0.6.0/src/pmh/integrations/__init__.py +31 -0
- matching_pmh-0.6.0/src/pmh/integrations/hf_trainer.py +170 -0
- matching_pmh-0.6.0/src/pmh/integrations/huggingface.py +304 -0
- matching_pmh-0.6.0/src/pmh/integrations/lightning.py +90 -0
- matching_pmh-0.6.0/src/pmh/integrations/torch.py +166 -0
- matching_pmh-0.6.0/src/pmh/numpy_api.py +152 -0
- matching_pmh-0.6.0/src/pmh/penalty.py +138 -0
- matching_pmh-0.6.0/src/pmh/preflight.py +35 -0
- matching_pmh-0.6.0/src/pmh/sklearn_match.py +80 -0
- matching_pmh-0.6.0/src/pmh/training.py +92 -0
- matching_pmh-0.6.0/src/pmh/vision/__init__.py +5 -0
- matching_pmh-0.6.0/src/pmh/vision/multilayer.py +136 -0
- matching_pmh-0.6.0/tests/test_cli.py +87 -0
- matching_pmh-0.6.0/tests/test_config_io.py +48 -0
- matching_pmh-0.6.0/tests/test_estimate.py +41 -0
- matching_pmh-0.6.0/tests/test_penalty.py +45 -0
- matching_pmh-0.6.0/tests/test_training.py +28 -0
- matching_pmh-0.6.0/tests/test_v03.py +62 -0
- matching_pmh-0.6.0/tests/test_v04.py +112 -0
- matching_pmh-0.6.0/tests/test_v05.py +85 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.6.0
|
|
4
|
+
|
|
5
|
+
- **`pmh-train` CLI**: `list-methods`, `estimate --config job.json`, `preflight`, `run --config job.json`.
|
|
6
|
+
- **`pmh.catalog`**: D1–D7 input requirements and job validation.
|
|
7
|
+
- **Example 11**: Qwen/T7A JSONL (`style_pairs` + `preference_pairs`), optional LoRA + DPO+PMH demo.
|
|
8
|
+
- **Bundled samples**: `examples/data/*.jsonl`, `examples/configs/*.json`.
|
|
9
|
+
- **HF**: `load_preference_pairs_jsonl` for DPO schema.
|
|
10
|
+
- **Publishing**: TestPyPI workflow (`publish-testpypi.yml`), updated `PUBLISHING.md`.
|
|
11
|
+
|
|
12
|
+
## 0.5.0
|
|
13
|
+
|
|
14
|
+
- HF `PMHTrainer`, CORAL baseline, CI matrix, Office-31 example.
|
|
15
|
+
|
|
16
|
+
## 0.4.0
|
|
17
|
+
|
|
18
|
+
- Hugging Face D7, Lightning callback, Office-31 loader.
|
|
19
|
+
|
|
20
|
+
## 0.3.0
|
|
21
|
+
|
|
22
|
+
- Torch/sklearn/vision integrations, MkDocs.
|
|
23
|
+
|
|
24
|
+
## 0.2.0
|
|
25
|
+
|
|
26
|
+
- Artifacts, `PMHLoss`, configs.
|
|
27
|
+
|
|
28
|
+
## 0.1.0
|
|
29
|
+
|
|
30
|
+
- Core estimators D1–D7 and penalties.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
title: "matching-pmh: reference library for the matching principle"
|
|
3
|
+
message: "If you use this software, please cite the matching-principle paper."
|
|
4
|
+
type: software
|
|
5
|
+
authors:
|
|
6
|
+
- family-names: Rajput
|
|
7
|
+
given-names: Vishal
|
|
8
|
+
repository-code: "https://github.com/matching-pmh/matching-pmh"
|
|
9
|
+
url: "https://github.com/matching-pmh/matching-pmh"
|
|
10
|
+
license: MIT
|
|
11
|
+
version: 0.6.0
|
|
12
|
+
keywords:
|
|
13
|
+
- robustness
|
|
14
|
+
- domain-adaptation
|
|
15
|
+
- representation-learning
|
|
16
|
+
- jacobian-regularization
|
|
17
|
+
- sigma-task
|
|
18
|
+
preferred-citation:
|
|
19
|
+
type: article
|
|
20
|
+
title: "The Matching Principle: A Geometric Theory of Loss Functions for Nuisance-Robust Representation Learning"
|
|
21
|
+
authors:
|
|
22
|
+
- family-names: Rajput
|
|
23
|
+
given-names: Vishal
|
|
24
|
+
year: 2026
|
|
25
|
+
notes: "JMLR submission; update journal fields when accepted."
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Publish `matching-pmh` on GitHub
|
|
2
|
+
|
|
3
|
+
The library lives only in this folder (`Desktop/matching-pmh`), not inside the paper repo.
|
|
4
|
+
|
|
5
|
+
## 1. Create the remote repository
|
|
6
|
+
|
|
7
|
+
On GitHub: **New repository** → name `matching-pmh` → public → **no** README (this tree has one).
|
|
8
|
+
|
|
9
|
+
Or install [GitHub CLI](https://cli.github.com/) and run:
|
|
10
|
+
|
|
11
|
+
```powershell
|
|
12
|
+
cd C:\Users\Eigenaar\Desktop\matching-pmh
|
|
13
|
+
gh auth login
|
|
14
|
+
gh repo create matching-pmh --public --source=. --remote=origin
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## 2. Publish on PyPI (full package)
|
|
18
|
+
|
|
19
|
+
See **`PUBLISHING.md`** for the complete guide. Summary:
|
|
20
|
+
|
|
21
|
+
1. Build: `python -m build` && `twine check dist/*`
|
|
22
|
+
2. Upload: `twine upload dist/*` (or tag `v0.6.0` + trusted publishing on GitHub)
|
|
23
|
+
3. Users install: `pip install matching-pmh`
|
|
24
|
+
|
|
25
|
+
The name `matching-pmh` is **not yet on PyPI** until you upload once.
|
|
26
|
+
|
|
27
|
+
## 3. First commit and push
|
|
28
|
+
|
|
29
|
+
```powershell
|
|
30
|
+
cd C:\Users\Eigenaar\Desktop\matching-pmh
|
|
31
|
+
git init
|
|
32
|
+
git add .
|
|
33
|
+
git commit -m "Initial release: estimate_sigma_task (D1-D7) and PMH penalties (v0.1.0)"
|
|
34
|
+
git branch -M main
|
|
35
|
+
git remote add origin https://github.com/YOUR_USERNAME/matching-pmh.git
|
|
36
|
+
git push -u origin main
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Replace `YOUR_USERNAME` with your GitHub account. Update `CITATION.cff` and `pyproject.toml` `[project.urls]` if the URL differs.
|
|
40
|
+
|
|
41
|
+
## 4. Paper cross-link
|
|
42
|
+
|
|
43
|
+
After the repo exists, set the URL in the manuscript macro (once):
|
|
44
|
+
|
|
45
|
+
`submission_grand_unification/macros.tex` → `\MatchingPmhRepoUrl{https://github.com/YOUR_USERNAME/matching-pmh}`
|
|
46
|
+
|
|
47
|
+
Then rebuild the PDF (`pdflatex` ×2 in `submission_grand_unification/`).
|
|
48
|
+
|
|
49
|
+
## 4. Remove duplicate from paper tree
|
|
50
|
+
|
|
51
|
+
Delete the old copy under `Paper2/pmh/` (only a stub README should remain there).
|
|
52
|
+
|
|
53
|
+
## 5. Optional: PyPI
|
|
54
|
+
|
|
55
|
+
When ready for `pip install matching-pmh`:
|
|
56
|
+
|
|
57
|
+
```powershell
|
|
58
|
+
pip install build twine
|
|
59
|
+
python -m build
|
|
60
|
+
twine upload dist/*
|
|
61
|
+
```
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Vishal Rajput
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: matching-pmh
|
|
3
|
+
Version: 0.6.0
|
|
4
|
+
Summary: Reference library for the matching principle: estimate Sigma_task (D1-D7) and matched PMH penalties
|
|
5
|
+
Project-URL: Homepage, https://github.com/matching-pmh/matching-pmh
|
|
6
|
+
Project-URL: Documentation, https://github.com/matching-pmh/matching-pmh#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/matching-pmh/matching-pmh
|
|
8
|
+
Project-URL: Issues, https://github.com/matching-pmh/matching-pmh/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/matching-pmh/matching-pmh/blob/main/CHANGELOG.md
|
|
10
|
+
Author: Vishal Rajput
|
|
11
|
+
License-Expression: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: domain-adaptation,jacobian,pmh,representation-learning,robustness
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: numpy>=1.24
|
|
24
|
+
Requires-Dist: torch>=2.1
|
|
25
|
+
Provides-Extra: all
|
|
26
|
+
Requires-Dist: accelerate>=0.25; extra == 'all'
|
|
27
|
+
Requires-Dist: datasets>=2.14; extra == 'all'
|
|
28
|
+
Requires-Dist: lightning>=2.1; extra == 'all'
|
|
29
|
+
Requires-Dist: mkdocs-material>=9.0; extra == 'all'
|
|
30
|
+
Requires-Dist: mkdocs>=1.5; extra == 'all'
|
|
31
|
+
Requires-Dist: peft>=0.7; extra == 'all'
|
|
32
|
+
Requires-Dist: pytest>=8.0; extra == 'all'
|
|
33
|
+
Requires-Dist: ruff>=0.4; extra == 'all'
|
|
34
|
+
Requires-Dist: scikit-learn>=1.3; extra == 'all'
|
|
35
|
+
Requires-Dist: torchvision>=0.16; extra == 'all'
|
|
36
|
+
Requires-Dist: transformers>=4.36; extra == 'all'
|
|
37
|
+
Provides-Extra: dev
|
|
38
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
40
|
+
Provides-Extra: docs
|
|
41
|
+
Requires-Dist: mkdocs-material>=9.0; extra == 'docs'
|
|
42
|
+
Requires-Dist: mkdocs>=1.5; extra == 'docs'
|
|
43
|
+
Provides-Extra: hf
|
|
44
|
+
Requires-Dist: accelerate>=0.25; extra == 'hf'
|
|
45
|
+
Requires-Dist: transformers>=4.36; extra == 'hf'
|
|
46
|
+
Provides-Extra: hf-lora
|
|
47
|
+
Requires-Dist: accelerate>=0.25; extra == 'hf-lora'
|
|
48
|
+
Requires-Dist: datasets>=2.14; extra == 'hf-lora'
|
|
49
|
+
Requires-Dist: peft>=0.7; extra == 'hf-lora'
|
|
50
|
+
Requires-Dist: transformers>=4.36; extra == 'hf-lora'
|
|
51
|
+
Provides-Extra: lightning
|
|
52
|
+
Requires-Dist: lightning>=2.1; extra == 'lightning'
|
|
53
|
+
Provides-Extra: sklearn
|
|
54
|
+
Requires-Dist: scikit-learn>=1.3; extra == 'sklearn'
|
|
55
|
+
Provides-Extra: vision
|
|
56
|
+
Requires-Dist: torchvision>=0.16; extra == 'vision'
|
|
57
|
+
Description-Content-Type: text/markdown
|
|
58
|
+
|
|
59
|
+
# matching-pmh
|
|
60
|
+
|
|
61
|
+
**Independent library** for the *matching principle*: estimate $\Sigma_{\mathrm{task}}$ (Lemmas D1–D7), run matched PMH penalties, save/load artifacts, and wire into training loops.
|
|
62
|
+
|
|
63
|
+
Paper: *The Matching Principle* (separate repository). **v0.2** adds typed configs, artifacts, pre-flight eigengap, `PMHLoss`, and `collect_features`.
|
|
64
|
+
|
|
65
|
+
## Install
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
cd matching-pmh
|
|
69
|
+
pip install -e ".[dev]"
|
|
70
|
+
pytest
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Quick start (v0.2)
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
import torch
|
|
77
|
+
from pmh import (
|
|
78
|
+
SigmaTaskConfig,
|
|
79
|
+
PMHConfig,
|
|
80
|
+
PMHLoss,
|
|
81
|
+
estimate_from_config,
|
|
82
|
+
collect_features,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# 1) Estimate + diagnostics
|
|
86
|
+
cfg = SigmaTaskConfig.for_domain(rank=64)
|
|
87
|
+
artifact = estimate_from_config(cfg, source_feats, target_feats)
|
|
88
|
+
print(artifact.preflight, artifact.eigengap) # pass | marginal | fail
|
|
89
|
+
|
|
90
|
+
# 2) Save for another job / machine
|
|
91
|
+
artifact.save("checkpoints/style_sigma")
|
|
92
|
+
|
|
93
|
+
# 3) Train
|
|
94
|
+
pmh = PMHLoss(artifact, PMHConfig(weight=0.3, cap_ratio=0.3, warmup_epochs=2))
|
|
95
|
+
h = backbone(x)
|
|
96
|
+
task_loss = ...
|
|
97
|
+
total, pmh_term = pmh.capped_total(task_loss, h)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Legacy one-liner (still supported)
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
from pmh import estimate_sigma_task, pmh_penalty_on_rep
|
|
104
|
+
|
|
105
|
+
sigma = estimate_sigma_task(src, tgt, method="D4", rank=64)
|
|
106
|
+
pen = pmh_penalty_on_rep(h, sigma)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Load a saved estimate
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from pmh import SigmaTaskEstimate, PMHLoss
|
|
113
|
+
|
|
114
|
+
artifact = SigmaTaskEstimate.load("checkpoints/style_sigma.pt")
|
|
115
|
+
pmh = PMHLoss(artifact)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Examples
|
|
119
|
+
|
|
120
|
+
| Script | What it shows |
|
|
121
|
+
|--------|----------------|
|
|
122
|
+
| `examples/01_domain_shift_d4.py` | `collect_features` + D4 + `PMHLoss` training |
|
|
123
|
+
| `examples/02_save_load_artifact.py` | `.pt` + `.json` artifact I/O |
|
|
124
|
+
| `examples/03_compositional_d5.py` | D5 coordinate-block $\Sigma$ |
|
|
125
|
+
| `examples/04_falsification_controls.py` | matched / wrong-W / isotropic modes |
|
|
126
|
+
| `examples/05_yaml_config.py` | JSON job dict → configs |
|
|
127
|
+
| `examples/minimal_loop.py` | Short end-to-end loop |
|
|
128
|
+
| `examples/06_office31_sklearn.py` | D1 + `MatchedSubspaceProjector` + logistic |
|
|
129
|
+
| `examples/07_vision_multilayer.py` | `MultiLayerPMHLoss` + per-layer Gram noise |
|
|
130
|
+
|
|
131
|
+
## API map
|
|
132
|
+
|
|
133
|
+
| Goal | API |
|
|
134
|
+
|------|-----|
|
|
135
|
+
| Pick estimator | `SigmaTaskConfig.for_domain()`, `.for_isotropic()`, … |
|
|
136
|
+
| Estimate | `estimate_from_config(cfg, ...)` → `SigmaTaskEstimate` |
|
|
137
|
+
| Pre-flight | `artifact.preflight`, `preflight_eigengap(cov, rank)` |
|
|
138
|
+
| Train | `PMHLoss(artifact, PMHConfig(...))` |
|
|
139
|
+
| Controls | `PMHLoss(..., mode="wrong_w")`, `signal_W_projector` |
|
|
140
|
+
| Data hook | `collect_features(encoder, loader)` |
|
|
141
|
+
|
|
142
|
+
## Estimators (`method=`)
|
|
143
|
+
|
|
144
|
+
| Method | Lemma | Config helper |
|
|
145
|
+
|--------|-------|----------------|
|
|
146
|
+
| D1 | Subspace SVD | `for_subspace(rank=)` |
|
|
147
|
+
| D2 | Isotropic | `for_isotropic(dim, noise_level)` |
|
|
148
|
+
| D3 | Aug modes | `for_augmentation()` |
|
|
149
|
+
| D4 | Domain Gram | `for_domain(rank=)` |
|
|
150
|
+
| D5 | Compositional | `for_compositional(indices)` |
|
|
151
|
+
| D6 | Temporal | `for_temporal()` |
|
|
152
|
+
| D7 | Style / alignment | `for_alignment(rank=)` |
|
|
153
|
+
|
|
154
|
+
## Status
|
|
155
|
+
|
|
156
|
+
**0.5.0** — `PMHTrainer` (HF), CORAL baseline, GitHub Actions CI + PyPI publish guide.
|
|
157
|
+
|
|
158
|
+
| Extra | Install | Example |
|
|
159
|
+
|-------|---------|---------|
|
|
160
|
+
| HF Trainer | `pip install "matching-pmh[hf]"` | `examples/10_hf_trainer.py` |
|
|
161
|
+
| CORAL baseline | `pmh.baselines.coral` | `examples/06_office31_sklearn.py` |
|
|
162
|
+
| CI / PyPI | see `PUBLISHING.md` | tag `v0.5.0` |
|
|
163
|
+
|
|
164
|
+
**0.4.0** — Hugging Face D7 (`estimate_style_sigma`), Lightning (`add_pmh_to_loss`), Office-31 features (`--office31-root`).
|
|
165
|
+
|
|
166
|
+
| Extra | Install | Example |
|
|
167
|
+
|-------|---------|---------|
|
|
168
|
+
| HF | `pip install "matching-pmh[hf]"` | `examples/08_hf_style_d7.py` |
|
|
169
|
+
| Lightning | `pip install "matching-pmh[lightning]"` | `examples/09_lightning_module.py` |
|
|
170
|
+
| Vision / Office-31 | `pip install "matching-pmh[vision]"` | `examples/06_office31_sklearn.py --office31-root ...` |
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# Publishing `matching-pmh` on PyPI (production)
|
|
2
|
+
|
|
3
|
+
**PyPI name:** `matching-pmh` (available — not taken yet)
|
|
4
|
+
**Import:** `import pmh`
|
|
5
|
+
**CLI:** `pmh-train`
|
|
6
|
+
|
|
7
|
+
After a successful release, anyone can install with:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install matching-pmh
|
|
11
|
+
pip install "matching-pmh[hf]" # Hugging Face D7 / Trainer
|
|
12
|
+
pip install "matching-pmh[hf-lora]" # + PEFT for example 11
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## One-time setup (≈15 minutes)
|
|
18
|
+
|
|
19
|
+
### 1. Accounts
|
|
20
|
+
|
|
21
|
+
1. [pypi.org](https://pypi.org/account/register/) — production
|
|
22
|
+
2. [test.pypi.org](https://test.pypi.org/account/register/) — optional dry run
|
|
23
|
+
3. [github.com](https://github.com) — source + trusted publishing
|
|
24
|
+
|
|
25
|
+
### 2. Reserve the project name on PyPI
|
|
26
|
+
|
|
27
|
+
On [pypi.org/manage/projects/](https://pypi.org/manage/projects/), the **first successful upload** of `matching-pmh` claims the name. Upload once manually or via CI (below).
|
|
28
|
+
|
|
29
|
+
### 3. Trusted publishing (recommended — no long-lived API token in GitHub)
|
|
30
|
+
|
|
31
|
+
**On PyPI** → Account settings → **Publishing** → Add pending publisher:
|
|
32
|
+
|
|
33
|
+
| Field | Value |
|
|
34
|
+
|--------|--------|
|
|
35
|
+
| PyPI project name | `matching-pmh` |
|
|
36
|
+
| Owner | your GitHub user or org |
|
|
37
|
+
| Repository | `matching-pmh` |
|
|
38
|
+
| Workflow name | `ci.yml` |
|
|
39
|
+
| Environment name | *(leave empty unless you use one)* |
|
|
40
|
+
|
|
41
|
+
Repeat for **TestPyPI** if you use `publish-testpypi.yml` (workflow name `publish-testpypi.yml`).
|
|
42
|
+
|
|
43
|
+
### 4. GitHub repository
|
|
44
|
+
|
|
45
|
+
```powershell
|
|
46
|
+
cd C:\Users\Eigenaar\Desktop\matching-pmh
|
|
47
|
+
gh auth login
|
|
48
|
+
gh repo create matching-pmh --public --source=. --remote=origin
|
|
49
|
+
git add .
|
|
50
|
+
git commit -m "Release matching-pmh 0.6.0: D1-D7 estimators, PMH, pmh-train CLI"
|
|
51
|
+
git branch -M main
|
|
52
|
+
git push -u origin main
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Update `pyproject.toml` and `CITATION.cff` `[project.urls]` / `repository-code` if your URL is not `github.com/matching-pmh/matching-pmh`.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Path A — Publish from your machine (first time, fastest)
|
|
60
|
+
|
|
61
|
+
```powershell
|
|
62
|
+
cd C:\Users\Eigenaar\Desktop\matching-pmh
|
|
63
|
+
pip install build twine
|
|
64
|
+
python -m build
|
|
65
|
+
twine check dist/*
|
|
66
|
+
|
|
67
|
+
# Optional dry run on TestPyPI first:
|
|
68
|
+
# twine upload --repository testpypi dist/*
|
|
69
|
+
# pip install -i https://test.pypi.org/simple/ matching-pmh==0.6.0
|
|
70
|
+
|
|
71
|
+
# Production PyPI (you will be prompted for API token; use scope "Entire account" or project-scoped):
|
|
72
|
+
twine upload dist/*
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Create an API token on your [account settings](https://pypi.org/manage/account/) page → **API tokens** → **Add API token** (scope: entire account for first upload, or project **matching-pmh** after it exists).
|
|
76
|
+
|
|
77
|
+
Verify:
|
|
78
|
+
|
|
79
|
+
```powershell
|
|
80
|
+
pip install matching-pmh==0.6.0
|
|
81
|
+
pmh-train list-methods
|
|
82
|
+
python -c "import pmh; print(pmh.__version__)"
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## Path B — Publish via GitHub tag (after trusted publishing is configured)
|
|
88
|
+
|
|
89
|
+
```powershell
|
|
90
|
+
git tag v0.6.0
|
|
91
|
+
git push origin v0.6.0
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Workflow `.github/workflows/ci.yml` runs tests, builds, and publishes to **pypi.org** on tags `v*`.
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Version bump checklist (every release)
|
|
99
|
+
|
|
100
|
+
1. `pyproject.toml` → `version`
|
|
101
|
+
2. `src/pmh/__init__.py` → `__version__`
|
|
102
|
+
3. `CITATION.cff` → `version`
|
|
103
|
+
4. `CHANGELOG.md` → new section
|
|
104
|
+
5. `git tag vX.Y.Z && git push origin vX.Y.Z`
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## Fallback: API token in GitHub Actions
|
|
109
|
+
|
|
110
|
+
If trusted publishing is not set up, add repo secret `PYPI_API_TOKEN` and the publish step still works with `pypa/gh-action-pypi-publish`.
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Paper cross-link
|
|
115
|
+
|
|
116
|
+
Set in `submission_grand_unification/macros.tex`:
|
|
117
|
+
|
|
118
|
+
```latex
|
|
119
|
+
\MatchingPmhRepoUrl{https://github.com/YOUR_USER/matching-pmh}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Add to reproducibility text: `pip install matching-pmh` (PyPI).
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Local preflight (before any upload)
|
|
127
|
+
|
|
128
|
+
```powershell
|
|
129
|
+
pip install build twine
|
|
130
|
+
python -m build
|
|
131
|
+
twine check dist/*
|
|
132
|
+
pip install dist\matching_pmh-0.6.0-py3-none-any.whl
|
|
133
|
+
pmh-train list-methods
|
|
134
|
+
pytest -q
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## Notes
|
|
140
|
+
|
|
141
|
+
- **Torch** is a core dependency; wheels are large — that is normal for ML libraries on PyPI.
|
|
142
|
+
- The name `pmh` on PyPI may be taken by unrelated projects; this package is **`matching-pmh`** only.
|
|
143
|
+
- Do not commit `dist/`, `artifacts/`, or API tokens.
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# matching-pmh
|
|
2
|
+
|
|
3
|
+
**Independent library** for the *matching principle*: estimate $\Sigma_{\mathrm{task}}$ (Lemmas D1–D7), run matched PMH penalties, save/load artifacts, and wire into training loops.
|
|
4
|
+
|
|
5
|
+
Paper: *The Matching Principle* (separate repository). **v0.2** adds typed configs, artifacts, pre-flight eigengap, `PMHLoss`, and `collect_features`.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
cd matching-pmh
|
|
11
|
+
pip install -e ".[dev]"
|
|
12
|
+
pytest
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Quick start (v0.2)
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
import torch
|
|
19
|
+
from pmh import (
|
|
20
|
+
SigmaTaskConfig,
|
|
21
|
+
PMHConfig,
|
|
22
|
+
PMHLoss,
|
|
23
|
+
estimate_from_config,
|
|
24
|
+
collect_features,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# 1) Estimate + diagnostics
|
|
28
|
+
cfg = SigmaTaskConfig.for_domain(rank=64)
|
|
29
|
+
artifact = estimate_from_config(cfg, source_feats, target_feats)
|
|
30
|
+
print(artifact.preflight, artifact.eigengap) # pass | marginal | fail
|
|
31
|
+
|
|
32
|
+
# 2) Save for another job / machine
|
|
33
|
+
artifact.save("checkpoints/style_sigma")
|
|
34
|
+
|
|
35
|
+
# 3) Train
|
|
36
|
+
pmh = PMHLoss(artifact, PMHConfig(weight=0.3, cap_ratio=0.3, warmup_epochs=2))
|
|
37
|
+
h = backbone(x)
|
|
38
|
+
task_loss = ...
|
|
39
|
+
total, pmh_term = pmh.capped_total(task_loss, h)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Legacy one-liner (still supported)
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from pmh import estimate_sigma_task, pmh_penalty_on_rep
|
|
46
|
+
|
|
47
|
+
sigma = estimate_sigma_task(src, tgt, method="D4", rank=64)
|
|
48
|
+
pen = pmh_penalty_on_rep(h, sigma)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Load a saved estimate
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from pmh import SigmaTaskEstimate, PMHLoss
|
|
55
|
+
|
|
56
|
+
artifact = SigmaTaskEstimate.load("checkpoints/style_sigma.pt")
|
|
57
|
+
pmh = PMHLoss(artifact)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Examples
|
|
61
|
+
|
|
62
|
+
| Script | What it shows |
|
|
63
|
+
|--------|----------------|
|
|
64
|
+
| `examples/01_domain_shift_d4.py` | `collect_features` + D4 + `PMHLoss` training |
|
|
65
|
+
| `examples/02_save_load_artifact.py` | `.pt` + `.json` artifact I/O |
|
|
66
|
+
| `examples/03_compositional_d5.py` | D5 coordinate-block $\Sigma$ |
|
|
67
|
+
| `examples/04_falsification_controls.py` | matched / wrong-W / isotropic modes |
|
|
68
|
+
| `examples/05_yaml_config.py` | JSON job dict → configs |
|
|
69
|
+
| `examples/minimal_loop.py` | Short end-to-end loop |
|
|
70
|
+
| `examples/06_office31_sklearn.py` | D1 + `MatchedSubspaceProjector` + logistic |
|
|
71
|
+
| `examples/07_vision_multilayer.py` | `MultiLayerPMHLoss` + per-layer Gram noise |
|
|
72
|
+
|
|
73
|
+
## API map
|
|
74
|
+
|
|
75
|
+
| Goal | API |
|
|
76
|
+
|------|-----|
|
|
77
|
+
| Pick estimator | `SigmaTaskConfig.for_domain()`, `.for_isotropic()`, … |
|
|
78
|
+
| Estimate | `estimate_from_config(cfg, ...)` → `SigmaTaskEstimate` |
|
|
79
|
+
| Pre-flight | `artifact.preflight`, `preflight_eigengap(cov, rank)` |
|
|
80
|
+
| Train | `PMHLoss(artifact, PMHConfig(...))` |
|
|
81
|
+
| Controls | `PMHLoss(..., mode="wrong_w")`, `signal_W_projector` |
|
|
82
|
+
| Data hook | `collect_features(encoder, loader)` |
|
|
83
|
+
|
|
84
|
+
## Estimators (`method=`)
|
|
85
|
+
|
|
86
|
+
| Method | Lemma | Config helper |
|
|
87
|
+
|--------|-------|----------------|
|
|
88
|
+
| D1 | Subspace SVD | `for_subspace(rank=)` |
|
|
89
|
+
| D2 | Isotropic | `for_isotropic(dim, noise_level)` |
|
|
90
|
+
| D3 | Aug modes | `for_augmentation()` |
|
|
91
|
+
| D4 | Domain Gram | `for_domain(rank=)` |
|
|
92
|
+
| D5 | Compositional | `for_compositional(indices)` |
|
|
93
|
+
| D6 | Temporal | `for_temporal()` |
|
|
94
|
+
| D7 | Style / alignment | `for_alignment(rank=)` |
|
|
95
|
+
|
|
96
|
+
## Status
|
|
97
|
+
|
|
98
|
+
**0.5.0** — `PMHTrainer` (HF), CORAL baseline, GitHub Actions CI + PyPI publish guide.
|
|
99
|
+
|
|
100
|
+
| Extra | Install | Example |
|
|
101
|
+
|-------|---------|---------|
|
|
102
|
+
| HF Trainer | `pip install "matching-pmh[hf]"` | `examples/10_hf_trainer.py` |
|
|
103
|
+
| CORAL baseline | `pmh.baselines.coral` | `examples/06_office31_sklearn.py` |
|
|
104
|
+
| CI / PyPI | see `PUBLISHING.md` | tag `v0.5.0` |
|
|
105
|
+
|
|
106
|
+
**0.4.0** — Hugging Face D7 (`estimate_style_sigma`), Lightning (`add_pmh_to_loss`), Office-31 features (`--office31-root`).
|
|
107
|
+
|
|
108
|
+
| Extra | Install | Example |
|
|
109
|
+
|-------|---------|---------|
|
|
110
|
+
| HF | `pip install "matching-pmh[hf]"` | `examples/08_hf_style_d7.py` |
|
|
111
|
+
| Lightning | `pip install "matching-pmh[lightning]"` | `examples/09_lightning_module.py` |
|
|
112
|
+
| Vision / Office-31 | `pip install "matching-pmh[vision]"` | `examples/06_office31_sklearn.py --office31-root ...` |
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Repository separation
|
|
2
|
+
|
|
3
|
+
This project **must** live in its own git repository, separate from the paper LaTeX / experiment codebase.
|
|
4
|
+
|
|
5
|
+
## Do
|
|
6
|
+
|
|
7
|
+
- Version, release, and license **here** only.
|
|
8
|
+
- Keep dependencies minimal (`torch`, `numpy`).
|
|
9
|
+
- Add integrations (Hugging Face, Lightning) in this repo or optional extras.
|
|
10
|
+
|
|
11
|
+
## Do not
|
|
12
|
+
|
|
13
|
+
- Import from `Paper2/T1`, `T7`, or `submission_grand_unification/`.
|
|
14
|
+
- Copy frozen task JSON or appendix tables into this tree.
|
|
15
|
+
- Block paper submission on library polish.
|
|
16
|
+
|
|
17
|
+
## Paper ↔ library link
|
|
18
|
+
|
|
19
|
+
In the manuscript: one sentence in the reproducibility paragraph, e.g.
|
|
20
|
+
*“Reference implementation: [matching-pmh](https://github.com/…).”*
|
|
21
|
+
|
|
22
|
+
In this README: paper title + “cite when using the package.”
|
|
23
|
+
|
|
24
|
+
No shared CI, no monorepo requirement.
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# CLI: `pmh-train`
|
|
2
|
+
|
|
3
|
+
Installed with the package:
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
pip install matching-pmh
|
|
7
|
+
pmh-train list-methods
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
## Commands
|
|
11
|
+
|
|
12
|
+
| Command | Purpose |
|
|
13
|
+
|---------|---------|
|
|
14
|
+
| `list-methods` | Table of D1–D7 inputs |
|
|
15
|
+
| `estimate --config job.json` | Run estimator, write `output.pt` + `.json` |
|
|
16
|
+
| `preflight ARTIFACT.pt` | Eigengap $\gamma_r$ diagnostics |
|
|
17
|
+
| `run --config job.json` | Validate training job (artifact + PMH weights) |
|
|
18
|
+
|
|
19
|
+
## Example jobs
|
|
20
|
+
|
|
21
|
+
- `examples/configs/d4_estimate.json` — domain Gram (numpy paths)
|
|
22
|
+
- `examples/configs/d7_style_estimate.json` — HF style JSONL
|
|
23
|
+
- `examples/configs/dpo_train_job.json` — training recipe after D7 estimate
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pmh-train estimate --config examples/configs/d7_style_estimate.json
|
|
27
|
+
pmh-train preflight artifacts/d7_style.pt
|
|
28
|
+
pmh-train run --config examples/configs/dpo_train_job.json
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Python equivalent: `python -m pmh.cli.main estimate --config ...`
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Datasets
|
|
2
|
+
|
|
3
|
+
## Office-31 features
|
|
4
|
+
|
|
5
|
+
Install: `pip install "matching-pmh[vision]"` (torchvision).
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from pmh.datasets.office31 import extract_office31_features, list_office31_domains
|
|
9
|
+
|
|
10
|
+
x_a, y_a = extract_office31_features("/data/office31", "amazon", max_samples=2000)
|
|
11
|
+
x_d, y_d = extract_office31_features("/data/office31", "dslr", max_samples=2000)
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
CLI example:
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
python examples/06_office31_sklearn.py --office31-root /data/office31 --source amazon --target dslr
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Expect layout: `root/amazon/<class>/...` (ImageFolder).
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# D1 — Subspace nuisance (Lemma D1)
|
|
2
|
+
|
|
3
|
+
**When:** a low-rank subspace $W$ explains cross-domain variation (digits, domains with class alignment).
|
|
4
|
+
|
|
5
|
+
**Config:** `SigmaTaskConfig.for_subspace(rank=r)`
|
|
6
|
+
|
|
7
|
+
**PyTorch:**
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
artifact = estimate_from_config(cfg, h_src, h_tgt) # [N, d] features
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
**NumPy / sklearn:**
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from pmh.sklearn_match import MatchedSubspaceProjector
|
|
17
|
+
proj = MatchedSubspaceProjector(rank=16).fit(x_a, y_a, x_d, y_d)
|
|
18
|
+
x_matched = proj.transform(x_a)
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
**Pre-flight:** check `artifact.preflight` — small eigengap $\Rightarrow$ Office-31-style failure.
|
|
22
|
+
|
|
23
|
+
**Example:** `examples/06_office31_sklearn.py`
|