pfnstudio 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pfnstudio-0.7.0/.gitignore +68 -0
- pfnstudio-0.7.0/PKG-INFO +99 -0
- pfnstudio-0.7.0/README.md +49 -0
- pfnstudio-0.7.0/pfnstudio/__init__.py +1 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/.gitignore +4 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/schemas/eval.schema.json +53 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/schemas/initiative.schema.json +37 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/schemas/model.schema.json +43 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/schemas/prior.schema.json +86 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/schemas/run.schema.json +67 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/templates/fm-project/README.md +39 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/templates/fm-project/ROADMAP.md +25 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/templates/fm-project/evals/example_sachs.yaml +37 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/templates/fm-project/initiatives/0001-define-base-prior.md +32 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/templates/fm-project/literature/references.bib +37 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/templates/fm-project/literature/summaries/mueller2022pfn.md +27 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/templates/fm-project/models/example_transformer.yaml +30 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/templates/fm-project/priors/example_linear_scm/prior.md +19 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/templates/fm-project/priors/example_linear_scm/prior.py +54 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/templates/fm-project/priors/example_linear_scm/prior.yaml +51 -0
- pfnstudio-0.7.0/pfnstudio/_bundled/templates/fm-project/runs/example_run.yaml +32 -0
- pfnstudio-0.7.0/pfnstudio/_paths.py +37 -0
- pfnstudio-0.7.0/pfnstudio/cli.py +809 -0
- pfnstudio-0.7.0/pfnstudio/compute/__init__.py +27 -0
- pfnstudio-0.7.0/pfnstudio/compute/base.py +17 -0
- pfnstudio-0.7.0/pfnstudio/compute/hf_spaces.py +43 -0
- pfnstudio-0.7.0/pfnstudio/compute/local.py +214 -0
- pfnstudio-0.7.0/pfnstudio/compute/modal.py +36 -0
- pfnstudio-0.7.0/pfnstudio/compute/remote.py +422 -0
- pfnstudio-0.7.0/pfnstudio/compute/runpod.py +38 -0
- pfnstudio-0.7.0/pfnstudio/compute/vast.py +1039 -0
- pfnstudio-0.7.0/pfnstudio/lint.py +94 -0
- pfnstudio-0.7.0/pfnstudio/scaffold.py +32 -0
- pfnstudio-0.7.0/pfnstudio/tracking/__init__.py +25 -0
- pfnstudio-0.7.0/pfnstudio/tracking/base.py +20 -0
- pfnstudio-0.7.0/pfnstudio/tracking/local.py +32 -0
- pfnstudio-0.7.0/pfnstudio/tracking/mlflow.py +57 -0
- pfnstudio-0.7.0/pfnstudio/tracking/wandb.py +35 -0
- pfnstudio-0.7.0/pfnstudio/validate.py +83 -0
- pfnstudio-0.7.0/pyproject.toml +66 -0
- pfnstudio-0.7.0/scripts/sync-bundled.sh +30 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
share/python-wheels/
|
|
20
|
+
*.egg-info/
|
|
21
|
+
.installed.cfg
|
|
22
|
+
*.egg
|
|
23
|
+
MANIFEST
|
|
24
|
+
|
|
25
|
+
# Virtual environments
|
|
26
|
+
.venv/
|
|
27
|
+
.deps-venv/
|
|
28
|
+
venv/
|
|
29
|
+
ENV/
|
|
30
|
+
env/
|
|
31
|
+
|
|
32
|
+
# Editable-install metadata
|
|
33
|
+
_editable_impl_*.pth
|
|
34
|
+
|
|
35
|
+
# IDE / editor
|
|
36
|
+
.vscode/
|
|
37
|
+
.idea/
|
|
38
|
+
*.swp
|
|
39
|
+
*.swo
|
|
40
|
+
.DS_Store
|
|
41
|
+
|
|
42
|
+
# pytest / coverage / mypy
|
|
43
|
+
.pytest_cache/
|
|
44
|
+
.mypy_cache/
|
|
45
|
+
.ruff_cache/
|
|
46
|
+
.coverage
|
|
47
|
+
.coverage.*
|
|
48
|
+
htmlcov/
|
|
49
|
+
.tox/
|
|
50
|
+
.nox/
|
|
51
|
+
|
|
52
|
+
# Jupyter
|
|
53
|
+
.ipynb_checkpoints/
|
|
54
|
+
|
|
55
|
+
# Build artifacts
|
|
56
|
+
checkpoint/
|
|
57
|
+
runs/*/checkpoint/
|
|
58
|
+
runs/*/results.json
|
|
59
|
+
runs/*/metrics.jsonl
|
|
60
|
+
*.pt
|
|
61
|
+
*.pth
|
|
62
|
+
|
|
63
|
+
# Dataset cache (downloaded benchmark datasets)
|
|
64
|
+
.priorstudio/datasets/
|
|
65
|
+
|
|
66
|
+
# Logs
|
|
67
|
+
logs/
|
|
68
|
+
*.log
|
pfnstudio-0.7.0/PKG-INFO
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pfnstudio
|
|
3
|
+
Version: 0.7.0
|
|
4
|
+
Summary: CLI for the PFN Studio framework — scaffold, validate, lint, and run prior-fitted foundation model projects.
|
|
5
|
+
Project-URL: Homepage, https://pfnstudio.com
|
|
6
|
+
Project-URL: Repository, https://github.com/profitopsai/pfnstudio
|
|
7
|
+
Project-URL: Issues, https://github.com/profitopsai/pfnstudio/issues
|
|
8
|
+
Project-URL: Documentation, https://github.com/profitopsai/pfnstudio/tree/main/docs
|
|
9
|
+
Author: PFN Studio contributors
|
|
10
|
+
License: Apache-2.0
|
|
11
|
+
Keywords: causal-inference,foundation-models,ml,pfn,prior-fitted-networks
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: jsonschema>=4.21
|
|
18
|
+
Requires-Dist: pfnstudio-core>=0.4.0
|
|
19
|
+
Requires-Dist: pyyaml>=6.0
|
|
20
|
+
Requires-Dist: requests>=2.31
|
|
21
|
+
Requires-Dist: rich>=13.7
|
|
22
|
+
Requires-Dist: typer>=0.12
|
|
23
|
+
Provides-Extra: all
|
|
24
|
+
Requires-Dist: huggingface-hub>=0.20; extra == 'all'
|
|
25
|
+
Requires-Dist: mlflow>=2.10; extra == 'all'
|
|
26
|
+
Requires-Dist: modal>=0.62; extra == 'all'
|
|
27
|
+
Requires-Dist: pfnstudio-core[torch]; extra == 'all'
|
|
28
|
+
Requires-Dist: pfnstudio-studio>=0.4.0; extra == 'all'
|
|
29
|
+
Requires-Dist: runpod>=1.6; extra == 'all'
|
|
30
|
+
Requires-Dist: wandb>=0.16; extra == 'all'
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
35
|
+
Provides-Extra: hf
|
|
36
|
+
Requires-Dist: huggingface-hub>=0.20; extra == 'hf'
|
|
37
|
+
Provides-Extra: mlflow
|
|
38
|
+
Requires-Dist: mlflow>=2.10; extra == 'mlflow'
|
|
39
|
+
Provides-Extra: modal
|
|
40
|
+
Requires-Dist: modal>=0.62; extra == 'modal'
|
|
41
|
+
Provides-Extra: runpod
|
|
42
|
+
Requires-Dist: runpod>=1.6; extra == 'runpod'
|
|
43
|
+
Provides-Extra: studio
|
|
44
|
+
Requires-Dist: pfnstudio-studio>=0.4.0; extra == 'studio'
|
|
45
|
+
Provides-Extra: torch
|
|
46
|
+
Requires-Dist: pfnstudio-core[torch]; extra == 'torch'
|
|
47
|
+
Provides-Extra: wandb
|
|
48
|
+
Requires-Dist: wandb>=0.16; extra == 'wandb'
|
|
49
|
+
Description-Content-Type: text/markdown
|
|
50
|
+
|
|
51
|
+
# priorstudio — CLI
|
|
52
|
+
|
|
53
|
+
The command-line interface for [PFN Studio](https://github.com/profitopsai/pfnstudio),
|
|
54
|
+
the toolkit for training [prior-fitted foundation models](https://arxiv.org/abs/2112.10510).
|
|
55
|
+
|
|
56
|
+
## Install
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install priorstudio
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
For training (requires PyTorch):
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install "priorstudio[torch]"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Commands
|
|
69
|
+
|
|
70
|
+
```text
|
|
71
|
+
priorstudio init <dir> # scaffold a new FM project
|
|
72
|
+
priorstudio validate <path> # check artifacts against JSON Schema
|
|
73
|
+
priorstudio lint <project> # cross-reference + style checks
|
|
74
|
+
priorstudio sample <prior.yaml> # draw N tasks from a prior
|
|
75
|
+
priorstudio run <run.yaml> # execute a training run end-to-end
|
|
76
|
+
priorstudio predict <run-dir> # inference against a trained checkpoint
|
|
77
|
+
priorstudio export <project> # tar-gzipped project archive
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Run `priorstudio --help` for the full list and `<cmd> --help` for each
|
|
81
|
+
subcommand's flags.
|
|
82
|
+
|
|
83
|
+
## What this CLI is for
|
|
84
|
+
|
|
85
|
+
PFN Studio organises every PFN project around five first-class
|
|
86
|
+
artifacts: **priors** (synthetic data generators), **models** (block
|
|
87
|
+
compositions), **evals** (benchmarks + metrics), **runs** (training
|
|
88
|
+
manifests), and **initiatives** (research workstreams). This CLI
|
|
89
|
+
operates on the file layout those artifacts produce — scaffolding new
|
|
90
|
+
projects, validating them, running training, and exporting them for
|
|
91
|
+
sharing.
|
|
92
|
+
|
|
93
|
+
The full story (concepts, architecture, examples, marketplace catalog)
|
|
94
|
+
lives at the main repo:
|
|
95
|
+
**[github.com/profitopsai/pfnstudio](https://github.com/profitopsai/pfnstudio)**
|
|
96
|
+
|
|
97
|
+
## License
|
|
98
|
+
|
|
99
|
+
Apache-2.0.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# priorstudio — CLI
|
|
2
|
+
|
|
3
|
+
The command-line interface for [PFN Studio](https://github.com/profitopsai/pfnstudio),
|
|
4
|
+
the toolkit for training [prior-fitted foundation models](https://arxiv.org/abs/2112.10510).
|
|
5
|
+
|
|
6
|
+
## Install
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
pip install priorstudio
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
For training (requires PyTorch):
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install "priorstudio[torch]"
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Commands
|
|
19
|
+
|
|
20
|
+
```text
|
|
21
|
+
priorstudio init <dir> # scaffold a new FM project
|
|
22
|
+
priorstudio validate <path> # check artifacts against JSON Schema
|
|
23
|
+
priorstudio lint <project> # cross-reference + style checks
|
|
24
|
+
priorstudio sample <prior.yaml> # draw N tasks from a prior
|
|
25
|
+
priorstudio run <run.yaml> # execute a training run end-to-end
|
|
26
|
+
priorstudio predict <run-dir> # inference against a trained checkpoint
|
|
27
|
+
priorstudio export <project> # tar-gzipped project archive
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Run `priorstudio --help` for the full list and `<cmd> --help` for each
|
|
31
|
+
subcommand's flags.
|
|
32
|
+
|
|
33
|
+
## What this CLI is for
|
|
34
|
+
|
|
35
|
+
PFN Studio organises every PFN project around five first-class
|
|
36
|
+
artifacts: **priors** (synthetic data generators), **models** (block
|
|
37
|
+
compositions), **evals** (benchmarks + metrics), **runs** (training
|
|
38
|
+
manifests), and **initiatives** (research workstreams). This CLI
|
|
39
|
+
operates on the file layout those artifacts produce — scaffolding new
|
|
40
|
+
projects, validating them, running training, and exporting them for
|
|
41
|
+
sharing.
|
|
42
|
+
|
|
43
|
+
The full story (concepts, architecture, examples, marketplace catalog)
|
|
44
|
+
lives at the main repo:
|
|
45
|
+
**[github.com/profitopsai/pfnstudio](https://github.com/profitopsai/pfnstudio)**
|
|
46
|
+
|
|
47
|
+
## License
|
|
48
|
+
|
|
49
|
+
Apache-2.0.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.4.0"
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://priorstudio.dev/schemas/eval.schema.json",
|
|
4
|
+
"title": "Eval",
|
|
5
|
+
"description": "A benchmark configuration. Reproducible across runs.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["id", "name", "version", "task", "dataset", "metrics"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"id": { "type": "string", "pattern": "^[a-z0-9_-]+$" },
|
|
10
|
+
"name": { "type": "string" },
|
|
11
|
+
"version": { "type": "string", "pattern": "^\\d+\\.\\d+\\.\\d+$" },
|
|
12
|
+
"task": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"enum": ["discovery", "estimation", "rca", "forecast", "classification", "custom"]
|
|
15
|
+
},
|
|
16
|
+
"dataset": {
|
|
17
|
+
"type": "object",
|
|
18
|
+
"required": ["name"],
|
|
19
|
+
"properties": {
|
|
20
|
+
"name": { "type": "string" },
|
|
21
|
+
"source": { "type": "string", "description": "URL or local path." },
|
|
22
|
+
"version": { "type": "string" },
|
|
23
|
+
"split": { "type": "string", "enum": ["train", "val", "test", "full"] }
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"metrics": {
|
|
27
|
+
"type": "array",
|
|
28
|
+
"minItems": 1,
|
|
29
|
+
"items": {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"required": ["name"],
|
|
32
|
+
"properties": {
|
|
33
|
+
"name": { "type": "string", "description": "e.g. 'shd', 'pehe', 'auroc'." },
|
|
34
|
+
"higher_is_better": { "type": "boolean" },
|
|
35
|
+
"description": { "type": "string" }
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
"baselines": {
|
|
40
|
+
"type": "array",
|
|
41
|
+
"items": {
|
|
42
|
+
"type": "object",
|
|
43
|
+
"required": ["name"],
|
|
44
|
+
"properties": {
|
|
45
|
+
"name": { "type": "string" },
|
|
46
|
+
"score": { "type": "number" },
|
|
47
|
+
"source": { "type": "string" }
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
"citations": { "type": "array", "items": { "type": "string" } }
|
|
52
|
+
}
|
|
53
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://priorstudio.dev/schemas/initiative.schema.json",
|
|
4
|
+
"title": "Initiative",
|
|
5
|
+
"description": "A research workstream. Stored as a markdown file with this frontmatter.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["id", "title", "status", "version_target"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"id": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"pattern": "^[0-9]{4}-[a-z0-9-]+$",
|
|
12
|
+
"description": "Numeric prefix + slug, e.g. '0001-long-range-lags'."
|
|
13
|
+
},
|
|
14
|
+
"title": { "type": "string" },
|
|
15
|
+
"status": {
|
|
16
|
+
"type": "string",
|
|
17
|
+
"enum": ["proposed", "in_progress", "blocked", "done", "abandoned"]
|
|
18
|
+
},
|
|
19
|
+
"version_target": {
|
|
20
|
+
"type": "string",
|
|
21
|
+
"pattern": "^v\\d+\\.\\d+$",
|
|
22
|
+
"description": "Which roadmap version this lands in."
|
|
23
|
+
},
|
|
24
|
+
"owner": { "type": "string" },
|
|
25
|
+
"depends_on": {
|
|
26
|
+
"type": "array",
|
|
27
|
+
"items": { "type": "string" },
|
|
28
|
+
"description": "IDs of other initiatives."
|
|
29
|
+
},
|
|
30
|
+
"tags": {
|
|
31
|
+
"type": "array",
|
|
32
|
+
"items": { "type": "string" }
|
|
33
|
+
},
|
|
34
|
+
"created": { "type": "string", "format": "date" },
|
|
35
|
+
"updated": { "type": "string", "format": "date" }
|
|
36
|
+
}
|
|
37
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://priorstudio.dev/schemas/model.schema.json",
|
|
4
|
+
"title": "Model",
|
|
5
|
+
"description": "An architecture config. Composable from blocks in core/.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["id", "name", "version", "blocks"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"id": { "type": "string", "pattern": "^[a-z0-9_-]+$" },
|
|
10
|
+
"name": { "type": "string" },
|
|
11
|
+
"version": { "type": "string", "pattern": "^\\d+\\.\\d+\\.\\d+$" },
|
|
12
|
+
"description": { "type": "string" },
|
|
13
|
+
"blocks": {
|
|
14
|
+
"type": "array",
|
|
15
|
+
"description": "Ordered architecture blocks. Each names a registered block type and its config.",
|
|
16
|
+
"items": {
|
|
17
|
+
"type": "object",
|
|
18
|
+
"required": ["type"],
|
|
19
|
+
"properties": {
|
|
20
|
+
"type": { "type": "string", "description": "Block type, e.g. 'temporal_encoder', 'causal_attention', 'treatment_head'." },
|
|
21
|
+
"name": { "type": "string", "description": "Optional alias for referencing this block elsewhere." },
|
|
22
|
+
"config": { "type": "object", "additionalProperties": true }
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"input_shape": { "type": "string", "description": "Symbolic, e.g. '(B, T, D)'." },
|
|
27
|
+
"output_heads": {
|
|
28
|
+
"type": "array",
|
|
29
|
+
"items": {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"required": ["name", "task"],
|
|
32
|
+
"properties": {
|
|
33
|
+
"name": { "type": "string" },
|
|
34
|
+
"task": {
|
|
35
|
+
"type": "string",
|
|
36
|
+
"enum": ["discovery", "estimation", "rca", "forecast", "classification", "custom"]
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
"citations": { "type": "array", "items": { "type": "string" } }
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://priorstudio.dev/schemas/prior.schema.json",
|
|
4
|
+
"title": "Prior",
|
|
5
|
+
"description": "A synthetic data generator for a prior-fitted foundation model. Each prior has this YAML spec plus a Python implementation in the same directory.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["id", "name", "version", "kind", "parameters", "outputs"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"id": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"pattern": "^[a-z0-9_-]+$",
|
|
12
|
+
"description": "Stable identifier, kebab/snake-case. Must match directory name."
|
|
13
|
+
},
|
|
14
|
+
"name": { "type": "string" },
|
|
15
|
+
"version": {
|
|
16
|
+
"type": "string",
|
|
17
|
+
"pattern": "^\\d+\\.\\d+\\.\\d+$",
|
|
18
|
+
"description": "Semver. Bump major when sampling distribution changes."
|
|
19
|
+
},
|
|
20
|
+
"kind": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"enum": ["scm", "tabular", "temporal", "temporal_causal", "graph", "custom"],
|
|
23
|
+
"description": "What family of structure the prior generates."
|
|
24
|
+
},
|
|
25
|
+
"description": { "type": "string" },
|
|
26
|
+
"parameters": {
|
|
27
|
+
"type": "object",
|
|
28
|
+
"description": "Hyperparameters of the prior. Keys are parameter names; values describe sampling.",
|
|
29
|
+
"additionalProperties": {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"required": ["type"],
|
|
32
|
+
"properties": {
|
|
33
|
+
"type": {
|
|
34
|
+
"type": "string",
|
|
35
|
+
"enum": ["int", "float", "categorical", "bool"]
|
|
36
|
+
},
|
|
37
|
+
"range": {
|
|
38
|
+
"type": "array",
|
|
39
|
+
"minItems": 2,
|
|
40
|
+
"maxItems": 2,
|
|
41
|
+
"description": "[min, max] for int/float."
|
|
42
|
+
},
|
|
43
|
+
"choices": {
|
|
44
|
+
"type": "array",
|
|
45
|
+
"description": "Values for categorical."
|
|
46
|
+
},
|
|
47
|
+
"default": {},
|
|
48
|
+
"description": { "type": "string" }
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
},
|
|
52
|
+
"outputs": {
|
|
53
|
+
"type": "object",
|
|
54
|
+
"description": "Shape of one sample from the prior.",
|
|
55
|
+
"required": ["variables"],
|
|
56
|
+
"properties": {
|
|
57
|
+
"variables": {
|
|
58
|
+
"type": "array",
|
|
59
|
+
"items": {
|
|
60
|
+
"type": "object",
|
|
61
|
+
"required": ["name", "type"],
|
|
62
|
+
"properties": {
|
|
63
|
+
"name": { "type": "string" },
|
|
64
|
+
"type": {
|
|
65
|
+
"type": "string",
|
|
66
|
+
"enum": ["scalar", "tensor", "graph", "matrix"]
|
|
67
|
+
},
|
|
68
|
+
"shape": { "type": "string", "description": "Symbolic shape, e.g. '(T, D)'." },
|
|
69
|
+
"description": { "type": "string" }
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
"citations": {
|
|
76
|
+
"type": "array",
|
|
77
|
+
"items": { "type": "string" },
|
|
78
|
+
"description": "BibTeX keys from literature/references.bib."
|
|
79
|
+
},
|
|
80
|
+
"implementation": {
|
|
81
|
+
"type": "string",
|
|
82
|
+
"description": "Path to Python module (relative to prior directory).",
|
|
83
|
+
"default": "prior.py"
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://priorstudio.dev/schemas/run.schema.json",
|
|
4
|
+
"title": "Run",
|
|
5
|
+
"description": "An experiment manifest. Pins a prior, model, eval, hyperparams, and compute target.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["id", "prior", "model", "evals", "hyperparams"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"id": { "type": "string", "pattern": "^[a-z0-9_-]+$" },
|
|
10
|
+
"description": { "type": "string" },
|
|
11
|
+
"prior": {
|
|
12
|
+
"type": "object",
|
|
13
|
+
"required": ["id", "version"],
|
|
14
|
+
"properties": {
|
|
15
|
+
"id": { "type": "string" },
|
|
16
|
+
"version": { "type": "string" },
|
|
17
|
+
"overrides": { "type": "object", "description": "Parameter overrides for this run." }
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"model": {
|
|
21
|
+
"type": "object",
|
|
22
|
+
"required": ["id", "version"],
|
|
23
|
+
"properties": {
|
|
24
|
+
"id": { "type": "string" },
|
|
25
|
+
"version": { "type": "string" }
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"evals": {
|
|
29
|
+
"type": "array",
|
|
30
|
+
"minItems": 1,
|
|
31
|
+
"items": {
|
|
32
|
+
"type": "object",
|
|
33
|
+
"required": ["id", "version"],
|
|
34
|
+
"properties": {
|
|
35
|
+
"id": { "type": "string" },
|
|
36
|
+
"version": { "type": "string" }
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
"hyperparams": {
|
|
41
|
+
"type": "object",
|
|
42
|
+
"additionalProperties": true,
|
|
43
|
+
"description": "Free-form, but conventionally: lr, batch_size, steps, seed, optimizer."
|
|
44
|
+
},
|
|
45
|
+
"compute": {
|
|
46
|
+
"type": "object",
|
|
47
|
+
"properties": {
|
|
48
|
+
"target": { "type": "string", "enum": ["local", "vast", "modal", "runpod", "hf_spaces", "custom"] },
|
|
49
|
+
"gpu": { "type": "string", "description": "e.g. 'A100-80GB', 'H100'." },
|
|
50
|
+
"num_gpus": { "type": "integer", "minimum": 1 }
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
"tracking": {
|
|
54
|
+
"type": "object",
|
|
55
|
+
"properties": {
|
|
56
|
+
"wandb_project": { "type": "string" },
|
|
57
|
+
"wandb_run_id": { "type": "string" },
|
|
58
|
+
"hf_repo": { "type": "string" }
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
"results": {
|
|
62
|
+
"type": "object",
|
|
63
|
+
"additionalProperties": true,
|
|
64
|
+
"description": "Filled in after the run completes."
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# {{project_name}}
|
|
2
|
+
|
|
3
|
+
> {{one_line_description}}
|
|
4
|
+
|
|
5
|
+
A prior-fitted foundation model project, scaffolded with [PFN Studio](https://github.com/{{org}}/priorstudio).
|
|
6
|
+
|
|
7
|
+
## Layout
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
.
|
|
11
|
+
├── ROADMAP.md versioned plan (v0.1 → v1.0)
|
|
12
|
+
├── initiatives/ one .md per workstream
|
|
13
|
+
├── priors/ YAML spec + Python impl per prior
|
|
14
|
+
├── models/ architecture configs
|
|
15
|
+
├── evals/ benchmark configs
|
|
16
|
+
├── literature/ BibTeX + per-paper summaries
|
|
17
|
+
└── runs/ experiment manifests
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Workflow
|
|
21
|
+
|
|
22
|
+
1. **Add a prior.** Each prior is a directory under `priors/` with a `prior.yaml` (spec) and `prior.py` (implementation). See `priors/example_linear_scm/`.
|
|
23
|
+
2. **Add a model.** YAML config under `models/` describing the block composition.
|
|
24
|
+
3. **Add an eval.** YAML config under `evals/` pinning a dataset, metrics, and baselines.
|
|
25
|
+
4. **Define a run.** YAML manifest under `runs/` linking a prior + model + eval + hyperparams + compute target.
|
|
26
|
+
5. **Track it as an initiative.** Add a markdown file under `initiatives/` and link from `ROADMAP.md`.
|
|
27
|
+
|
|
28
|
+
## Validate
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
priorstudio validate
|
|
32
|
+
priorstudio lint # cross-reference checks
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Run
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
priorstudio run runs/example_run.yaml
|
|
39
|
+
```
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# {{project_name}} Roadmap
|
|
2
|
+
|
|
3
|
+
## v0.1 — Bootstrap
|
|
4
|
+
|
|
5
|
+
**Goal:** first prior, first model, first eval round-trip end-to-end.
|
|
6
|
+
|
|
7
|
+
- [ ] [0001-define-base-prior](initiatives/0001-define-base-prior.md)
|
|
8
|
+
- [ ] [0002-baseline-architecture](initiatives/0002-baseline-architecture.md)
|
|
9
|
+
- [ ] [0003-baseline-eval](initiatives/0003-baseline-eval.md)
|
|
10
|
+
|
|
11
|
+
## v0.2 — Iteration
|
|
12
|
+
|
|
13
|
+
**Goal:** prior families and architecture variants, comparable on a fixed eval suite.
|
|
14
|
+
|
|
15
|
+
- [ ] Prior variants
|
|
16
|
+
- [ ] Architecture variants
|
|
17
|
+
- [ ] Benchmark suite expansion
|
|
18
|
+
|
|
19
|
+
## v1.0 — Release
|
|
20
|
+
|
|
21
|
+
**Goal:** publishable model checkpoint + paper-ready results.
|
|
22
|
+
|
|
23
|
+
- [ ] Model card
|
|
24
|
+
- [ ] Reproducibility audit
|
|
25
|
+
- [ ] HuggingFace release
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
id: example_sachs
|
|
2
|
+
name: Sachs protein-signaling benchmark
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
task: discovery
|
|
5
|
+
description: |
|
|
6
|
+
Causal discovery on the Sachs et al. (2005) protein-signaling dataset.
|
|
7
|
+
Standard benchmark — 11 nodes, ~17 ground-truth edges depending on accepted
|
|
8
|
+
consensus graph.
|
|
9
|
+
|
|
10
|
+
dataset:
|
|
11
|
+
name: sachs
|
|
12
|
+
source: https://www.science.org/doi/10.1126/science.1105809
|
|
13
|
+
version: "2005"
|
|
14
|
+
split: full
|
|
15
|
+
|
|
16
|
+
metrics:
|
|
17
|
+
- name: shd
|
|
18
|
+
higher_is_better: false
|
|
19
|
+
description: Structural Hamming Distance to ground-truth DAG.
|
|
20
|
+
- name: f1
|
|
21
|
+
higher_is_better: true
|
|
22
|
+
description: F1 over edge presence.
|
|
23
|
+
|
|
24
|
+
baselines:
|
|
25
|
+
- name: PC
|
|
26
|
+
score: 17
|
|
27
|
+
source: tsamardinos2019
|
|
28
|
+
- name: GES
|
|
29
|
+
score: 18
|
|
30
|
+
source: tsamardinos2019
|
|
31
|
+
- name: NOTEARS
|
|
32
|
+
score: 14
|
|
33
|
+
source: zheng2018notears
|
|
34
|
+
|
|
35
|
+
citations:
|
|
36
|
+
- sachs2005causal
|
|
37
|
+
- zheng2018notears
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: 0001-define-base-prior
|
|
3
|
+
title: Define base prior
|
|
4
|
+
status: in_progress
|
|
5
|
+
version_target: v0.1
|
|
6
|
+
owner: tbd
|
|
7
|
+
tags: [prior]
|
|
8
|
+
created: 2026-05-09
|
|
9
|
+
updated: 2026-05-09
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# Define base prior
|
|
13
|
+
|
|
14
|
+
## Motivation
|
|
15
|
+
|
|
16
|
+
Why this prior exists. What real-world phenomenon it's a synthetic stand-in for. What identifiability properties matter.
|
|
17
|
+
|
|
18
|
+
## Acceptance criteria
|
|
19
|
+
|
|
20
|
+
- [ ] `prior.yaml` and `prior.py` in `priors/<id>/`, validated against schema
|
|
21
|
+
- [ ] Sampling reproducible from fixed seed
|
|
22
|
+
- [ ] Documented in `priors/<id>/prior.md` (the *why*)
|
|
23
|
+
|
|
24
|
+
## Open questions
|
|
25
|
+
|
|
26
|
+
- Parameter ranges?
|
|
27
|
+
- Identifiability?
|
|
28
|
+
- Computational cost per sample?
|
|
29
|
+
|
|
30
|
+
## References
|
|
31
|
+
|
|
32
|
+
- See `literature/references.bib`
|