pfnstudio 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. pfnstudio/__init__.py +1 -0
  2. pfnstudio/_bundled/.gitignore +4 -0
  3. pfnstudio/_bundled/schemas/eval.schema.json +53 -0
  4. pfnstudio/_bundled/schemas/initiative.schema.json +37 -0
  5. pfnstudio/_bundled/schemas/model.schema.json +43 -0
  6. pfnstudio/_bundled/schemas/prior.schema.json +86 -0
  7. pfnstudio/_bundled/schemas/run.schema.json +67 -0
  8. pfnstudio/_bundled/templates/fm-project/README.md +39 -0
  9. pfnstudio/_bundled/templates/fm-project/ROADMAP.md +25 -0
  10. pfnstudio/_bundled/templates/fm-project/evals/example_sachs.yaml +37 -0
  11. pfnstudio/_bundled/templates/fm-project/initiatives/0001-define-base-prior.md +32 -0
  12. pfnstudio/_bundled/templates/fm-project/literature/references.bib +37 -0
  13. pfnstudio/_bundled/templates/fm-project/literature/summaries/mueller2022pfn.md +27 -0
  14. pfnstudio/_bundled/templates/fm-project/models/example_transformer.yaml +30 -0
  15. pfnstudio/_bundled/templates/fm-project/priors/example_linear_scm/prior.md +19 -0
  16. pfnstudio/_bundled/templates/fm-project/priors/example_linear_scm/prior.py +54 -0
  17. pfnstudio/_bundled/templates/fm-project/priors/example_linear_scm/prior.yaml +51 -0
  18. pfnstudio/_bundled/templates/fm-project/runs/example_run.yaml +32 -0
  19. pfnstudio/_paths.py +37 -0
  20. pfnstudio/cli.py +809 -0
  21. pfnstudio/compute/__init__.py +27 -0
  22. pfnstudio/compute/base.py +17 -0
  23. pfnstudio/compute/hf_spaces.py +43 -0
  24. pfnstudio/compute/local.py +214 -0
  25. pfnstudio/compute/modal.py +36 -0
  26. pfnstudio/compute/remote.py +422 -0
  27. pfnstudio/compute/runpod.py +38 -0
  28. pfnstudio/compute/vast.py +1039 -0
  29. pfnstudio/lint.py +94 -0
  30. pfnstudio/scaffold.py +32 -0
  31. pfnstudio/tracking/__init__.py +25 -0
  32. pfnstudio/tracking/base.py +20 -0
  33. pfnstudio/tracking/local.py +32 -0
  34. pfnstudio/tracking/mlflow.py +57 -0
  35. pfnstudio/tracking/wandb.py +35 -0
  36. pfnstudio/validate.py +83 -0
  37. pfnstudio-0.7.0.dist-info/METADATA +99 -0
  38. pfnstudio-0.7.0.dist-info/RECORD +40 -0
  39. pfnstudio-0.7.0.dist-info/WHEEL +4 -0
  40. pfnstudio-0.7.0.dist-info/entry_points.txt +2 -0
pfnstudio/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.4.0"
@@ -0,0 +1,4 @@
1
+ # Populated at build time by ../../scripts/sync-bundled.sh.
2
+ # The canonical sources live at the repo root (schemas/, templates/).
3
+ *
4
+ !.gitignore
@@ -0,0 +1,53 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://priorstudio.dev/schemas/eval.schema.json",
4
+ "title": "Eval",
5
+ "description": "A benchmark configuration. Reproducible across runs.",
6
+ "type": "object",
7
+ "required": ["id", "name", "version", "task", "dataset", "metrics"],
8
+ "properties": {
9
+ "id": { "type": "string", "pattern": "^[a-z0-9_-]+$" },
10
+ "name": { "type": "string" },
11
+ "version": { "type": "string", "pattern": "^\\d+\\.\\d+\\.\\d+$" },
12
+ "task": {
13
+ "type": "string",
14
+ "enum": ["discovery", "estimation", "rca", "forecast", "classification", "custom"]
15
+ },
16
+ "dataset": {
17
+ "type": "object",
18
+ "required": ["name"],
19
+ "properties": {
20
+ "name": { "type": "string" },
21
+ "source": { "type": "string", "description": "URL or local path." },
22
+ "version": { "type": "string" },
23
+ "split": { "type": "string", "enum": ["train", "val", "test", "full"] }
24
+ }
25
+ },
26
+ "metrics": {
27
+ "type": "array",
28
+ "minItems": 1,
29
+ "items": {
30
+ "type": "object",
31
+ "required": ["name"],
32
+ "properties": {
33
+ "name": { "type": "string", "description": "e.g. 'shd', 'pehe', 'auroc'." },
34
+ "higher_is_better": { "type": "boolean" },
35
+ "description": { "type": "string" }
36
+ }
37
+ }
38
+ },
39
+ "baselines": {
40
+ "type": "array",
41
+ "items": {
42
+ "type": "object",
43
+ "required": ["name"],
44
+ "properties": {
45
+ "name": { "type": "string" },
46
+ "score": { "type": "number" },
47
+ "source": { "type": "string" }
48
+ }
49
+ }
50
+ },
51
+ "citations": { "type": "array", "items": { "type": "string" } }
52
+ }
53
+ }
@@ -0,0 +1,37 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://priorstudio.dev/schemas/initiative.schema.json",
4
+ "title": "Initiative",
5
+ "description": "A research workstream. Stored as a markdown file with this frontmatter.",
6
+ "type": "object",
7
+ "required": ["id", "title", "status", "version_target"],
8
+ "properties": {
9
+ "id": {
10
+ "type": "string",
11
+ "pattern": "^[0-9]{4}-[a-z0-9-]+$",
12
+ "description": "Numeric prefix + slug, e.g. '0001-long-range-lags'."
13
+ },
14
+ "title": { "type": "string" },
15
+ "status": {
16
+ "type": "string",
17
+ "enum": ["proposed", "in_progress", "blocked", "done", "abandoned"]
18
+ },
19
+ "version_target": {
20
+ "type": "string",
21
+ "pattern": "^v\\d+\\.\\d+$",
22
+ "description": "Which roadmap version this lands in."
23
+ },
24
+ "owner": { "type": "string" },
25
+ "depends_on": {
26
+ "type": "array",
27
+ "items": { "type": "string" },
28
+ "description": "IDs of other initiatives."
29
+ },
30
+ "tags": {
31
+ "type": "array",
32
+ "items": { "type": "string" }
33
+ },
34
+ "created": { "type": "string", "format": "date" },
35
+ "updated": { "type": "string", "format": "date" }
36
+ }
37
+ }
@@ -0,0 +1,43 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://priorstudio.dev/schemas/model.schema.json",
4
+ "title": "Model",
5
+ "description": "An architecture config. Composable from blocks in core/.",
6
+ "type": "object",
7
+ "required": ["id", "name", "version", "blocks"],
8
+ "properties": {
9
+ "id": { "type": "string", "pattern": "^[a-z0-9_-]+$" },
10
+ "name": { "type": "string" },
11
+ "version": { "type": "string", "pattern": "^\\d+\\.\\d+\\.\\d+$" },
12
+ "description": { "type": "string" },
13
+ "blocks": {
14
+ "type": "array",
15
+ "description": "Ordered architecture blocks. Each names a registered block type and its config.",
16
+ "items": {
17
+ "type": "object",
18
+ "required": ["type"],
19
+ "properties": {
20
+ "type": { "type": "string", "description": "Block type, e.g. 'temporal_encoder', 'causal_attention', 'treatment_head'." },
21
+ "name": { "type": "string", "description": "Optional alias for referencing this block elsewhere." },
22
+ "config": { "type": "object", "additionalProperties": true }
23
+ }
24
+ }
25
+ },
26
+ "input_shape": { "type": "string", "description": "Symbolic, e.g. '(B, T, D)'." },
27
+ "output_heads": {
28
+ "type": "array",
29
+ "items": {
30
+ "type": "object",
31
+ "required": ["name", "task"],
32
+ "properties": {
33
+ "name": { "type": "string" },
34
+ "task": {
35
+ "type": "string",
36
+ "enum": ["discovery", "estimation", "rca", "forecast", "classification", "custom"]
37
+ }
38
+ }
39
+ }
40
+ },
41
+ "citations": { "type": "array", "items": { "type": "string" } }
42
+ }
43
+ }
@@ -0,0 +1,86 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://priorstudio.dev/schemas/prior.schema.json",
4
+ "title": "Prior",
5
+ "description": "A synthetic data generator for a prior-fitted foundation model. Each prior has this YAML spec plus a Python implementation in the same directory.",
6
+ "type": "object",
7
+ "required": ["id", "name", "version", "kind", "parameters", "outputs"],
8
+ "properties": {
9
+ "id": {
10
+ "type": "string",
11
+ "pattern": "^[a-z0-9_-]+$",
12
+ "description": "Stable identifier, kebab/snake-case. Must match directory name."
13
+ },
14
+ "name": { "type": "string" },
15
+ "version": {
16
+ "type": "string",
17
+ "pattern": "^\\d+\\.\\d+\\.\\d+$",
18
+ "description": "Semver. Bump major when sampling distribution changes."
19
+ },
20
+ "kind": {
21
+ "type": "string",
22
+ "enum": ["scm", "tabular", "temporal", "temporal_causal", "graph", "custom"],
23
+ "description": "What family of structure the prior generates."
24
+ },
25
+ "description": { "type": "string" },
26
+ "parameters": {
27
+ "type": "object",
28
+ "description": "Hyperparameters of the prior. Keys are parameter names; values describe sampling.",
29
+ "additionalProperties": {
30
+ "type": "object",
31
+ "required": ["type"],
32
+ "properties": {
33
+ "type": {
34
+ "type": "string",
35
+ "enum": ["int", "float", "categorical", "bool"]
36
+ },
37
+ "range": {
38
+ "type": "array",
39
+ "minItems": 2,
40
+ "maxItems": 2,
41
+ "description": "[min, max] for int/float."
42
+ },
43
+ "choices": {
44
+ "type": "array",
45
+ "description": "Values for categorical."
46
+ },
47
+ "default": {},
48
+ "description": { "type": "string" }
49
+ }
50
+ }
51
+ },
52
+ "outputs": {
53
+ "type": "object",
54
+ "description": "Shape of one sample from the prior.",
55
+ "required": ["variables"],
56
+ "properties": {
57
+ "variables": {
58
+ "type": "array",
59
+ "items": {
60
+ "type": "object",
61
+ "required": ["name", "type"],
62
+ "properties": {
63
+ "name": { "type": "string" },
64
+ "type": {
65
+ "type": "string",
66
+ "enum": ["scalar", "tensor", "graph", "matrix"]
67
+ },
68
+ "shape": { "type": "string", "description": "Symbolic shape, e.g. '(T, D)'." },
69
+ "description": { "type": "string" }
70
+ }
71
+ }
72
+ }
73
+ }
74
+ },
75
+ "citations": {
76
+ "type": "array",
77
+ "items": { "type": "string" },
78
+ "description": "BibTeX keys from literature/references.bib."
79
+ },
80
+ "implementation": {
81
+ "type": "string",
82
+ "description": "Path to Python module (relative to prior directory).",
83
+ "default": "prior.py"
84
+ }
85
+ }
86
+ }
@@ -0,0 +1,67 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://priorstudio.dev/schemas/run.schema.json",
4
+ "title": "Run",
5
+ "description": "An experiment manifest. Pins a prior, model, eval, hyperparams, and compute target.",
6
+ "type": "object",
7
+ "required": ["id", "prior", "model", "evals", "hyperparams"],
8
+ "properties": {
9
+ "id": { "type": "string", "pattern": "^[a-z0-9_-]+$" },
10
+ "description": { "type": "string" },
11
+ "prior": {
12
+ "type": "object",
13
+ "required": ["id", "version"],
14
+ "properties": {
15
+ "id": { "type": "string" },
16
+ "version": { "type": "string" },
17
+ "overrides": { "type": "object", "description": "Parameter overrides for this run." }
18
+ }
19
+ },
20
+ "model": {
21
+ "type": "object",
22
+ "required": ["id", "version"],
23
+ "properties": {
24
+ "id": { "type": "string" },
25
+ "version": { "type": "string" }
26
+ }
27
+ },
28
+ "evals": {
29
+ "type": "array",
30
+ "minItems": 1,
31
+ "items": {
32
+ "type": "object",
33
+ "required": ["id", "version"],
34
+ "properties": {
35
+ "id": { "type": "string" },
36
+ "version": { "type": "string" }
37
+ }
38
+ }
39
+ },
40
+ "hyperparams": {
41
+ "type": "object",
42
+ "additionalProperties": true,
43
+ "description": "Free-form, but conventionally: lr, batch_size, steps, seed, optimizer."
44
+ },
45
+ "compute": {
46
+ "type": "object",
47
+ "properties": {
48
+ "target": { "type": "string", "enum": ["local", "vast", "modal", "runpod", "hf_spaces", "custom"] },
49
+ "gpu": { "type": "string", "description": "e.g. 'A100-80GB', 'H100'." },
50
+ "num_gpus": { "type": "integer", "minimum": 1 }
51
+ }
52
+ },
53
+ "tracking": {
54
+ "type": "object",
55
+ "properties": {
56
+ "wandb_project": { "type": "string" },
57
+ "wandb_run_id": { "type": "string" },
58
+ "hf_repo": { "type": "string" }
59
+ }
60
+ },
61
+ "results": {
62
+ "type": "object",
63
+ "additionalProperties": true,
64
+ "description": "Filled in after the run completes."
65
+ }
66
+ }
67
+ }
@@ -0,0 +1,39 @@
1
+ # {{project_name}}
2
+
3
+ > {{one_line_description}}
4
+
5
+ A prior-fitted foundation model project, scaffolded with [PFN Studio](https://github.com/{{org}}/priorstudio).
6
+
7
+ ## Layout
8
+
9
+ ```
10
+ .
11
+ ├── ROADMAP.md versioned plan (v0.1 → v1.0)
12
+ ├── initiatives/ one .md per workstream
13
+ ├── priors/ YAML spec + Python impl per prior
14
+ ├── models/ architecture configs
15
+ ├── evals/ benchmark configs
16
+ ├── literature/ BibTeX + per-paper summaries
17
+ └── runs/ experiment manifests
18
+ ```
19
+
20
+ ## Workflow
21
+
22
+ 1. **Add a prior.** Each prior is a directory under `priors/` with a `prior.yaml` (spec) and `prior.py` (implementation). See `priors/example_linear_scm/`.
23
+ 2. **Add a model.** YAML config under `models/` describing the block composition.
24
+ 3. **Add an eval.** YAML config under `evals/` pinning a dataset, metrics, and baselines.
25
+ 4. **Define a run.** YAML manifest under `runs/` linking a prior + model + eval + hyperparams + compute target.
26
+ 5. **Track it as an initiative.** Add a markdown file under `initiatives/` and link from `ROADMAP.md`.
27
+
28
+ ## Validate
29
+
30
+ ```bash
31
+ priorstudio validate
32
+ priorstudio lint # cross-reference checks
33
+ ```
34
+
35
+ ## Run
36
+
37
+ ```bash
38
+ priorstudio run runs/example_run.yaml
39
+ ```
@@ -0,0 +1,25 @@
1
+ # {{project_name}} Roadmap
2
+
3
+ ## v0.1 — Bootstrap
4
+
5
+ **Goal:** first prior, first model, first eval round-trip end-to-end.
6
+
7
+ - [ ] [0001-define-base-prior](initiatives/0001-define-base-prior.md)
8
+ - [ ] [0002-baseline-architecture](initiatives/0002-baseline-architecture.md)
9
+ - [ ] [0003-baseline-eval](initiatives/0003-baseline-eval.md)
10
+
11
+ ## v0.2 — Iteration
12
+
13
+ **Goal:** prior families and architecture variants, comparable on a fixed eval suite.
14
+
15
+ - [ ] Prior variants
16
+ - [ ] Architecture variants
17
+ - [ ] Benchmark suite expansion
18
+
19
+ ## v1.0 — Release
20
+
21
+ **Goal:** publishable model checkpoint + paper-ready results.
22
+
23
+ - [ ] Model card
24
+ - [ ] Reproducibility audit
25
+ - [ ] HuggingFace release
@@ -0,0 +1,37 @@
1
+ id: example_sachs
2
+ name: Sachs protein-signaling benchmark
3
+ version: 0.1.0
4
+ task: discovery
5
+ description: |
6
+ Causal discovery on the Sachs et al. (2005) protein-signaling dataset.
7
+ Standard benchmark — 11 nodes, ~17 ground-truth edges depending on accepted
8
+ consensus graph.
9
+
10
+ dataset:
11
+ name: sachs
12
+ source: https://www.science.org/doi/10.1126/science.1105809
13
+ version: "2005"
14
+ split: full
15
+
16
+ metrics:
17
+ - name: shd
18
+ higher_is_better: false
19
+ description: Structural Hamming Distance to ground-truth DAG.
20
+ - name: f1
21
+ higher_is_better: true
22
+ description: F1 over edge presence.
23
+
24
+ baselines:
25
+ - name: PC
26
+ score: 17
27
+ source: tsamardinos2019
28
+ - name: GES
29
+ score: 18
30
+ source: tsamardinos2019
31
+ - name: NOTEARS
32
+ score: 14
33
+ source: zheng2018notears
34
+
35
+ citations:
36
+ - sachs2005causal
37
+ - zheng2018notears
@@ -0,0 +1,32 @@
1
+ ---
2
+ id: 0001-define-base-prior
3
+ title: Define base prior
4
+ status: in_progress
5
+ version_target: v0.1
6
+ owner: tbd
7
+ tags: [prior]
8
+ created: 2026-05-09
9
+ updated: 2026-05-09
10
+ ---
11
+
12
+ # Define base prior
13
+
14
+ ## Motivation
15
+
16
+ Why this prior exists. What real-world phenomenon it's a synthetic stand-in for. What identifiability properties matter.
17
+
18
+ ## Acceptance criteria
19
+
20
+ - [ ] `prior.yaml` and `prior.py` in `priors/<id>/`, validated against schema
21
+ - [ ] Sampling reproducible from fixed seed
22
+ - [ ] Documented in `priors/<id>/prior.md` (the *why*)
23
+
24
+ ## Open questions
25
+
26
+ - Parameter ranges?
27
+ - Identifiability?
28
+ - Computational cost per sample?
29
+
30
+ ## References
31
+
32
+ - See `literature/references.bib`
@@ -0,0 +1,37 @@
1
+ @article{mueller2022pfn,
2
+ title={Transformers Can Do Bayesian Inference},
3
+ author={M{\"u}ller, Samuel and Hollmann, Noah and Arango, Sebastian Pineda and Grabocka, Josif and Hutter, Frank},
4
+ journal={ICLR},
5
+ year={2022}
6
+ }
7
+
8
+ @book{peters2017elements,
9
+ title={Elements of Causal Inference: Foundations and Learning Algorithms},
10
+ author={Peters, Jonas and Janzing, Dominik and Sch{\"o}lkopf, Bernhard},
11
+ year={2017},
12
+ publisher={MIT Press}
13
+ }
14
+
15
+ @article{sachs2005causal,
16
+ title={Causal protein-signaling networks derived from multiparameter single-cell data},
17
+ author={Sachs, Karen and Perez, Omar and Pe'er, Dana and Lauffenburger, Douglas A and Nolan, Garry P},
18
+ journal={Science},
19
+ volume={308},
20
+ number={5721},
21
+ pages={523--529},
22
+ year={2005}
23
+ }
24
+
25
+ @article{zheng2018notears,
26
+ title={DAGs with NO TEARS: Continuous Optimization for Structure Learning},
27
+ author={Zheng, Xun and Aragam, Bryon and Ravikumar, Pradeep and Xing, Eric P},
28
+ journal={NeurIPS},
29
+ year={2018}
30
+ }
31
+
32
+ @article{tsamardinos2019,
33
+ title={A greedy feature selection algorithm for Big Data of high dimensionality},
34
+ author={Tsamardinos, Ioannis and others},
35
+ journal={Machine Learning},
36
+ year={2019}
37
+ }
@@ -0,0 +1,27 @@
1
+ ---
2
+ bibkey: mueller2022pfn
3
+ title: Transformers Can Do Bayesian Inference
4
+ year: 2022
5
+ relevance: foundational
6
+ ---
7
+
8
+ # Transformers Can Do Bayesian Inference (Müller et al., 2022)
9
+
10
+ ## TL;DR
11
+
12
+ Train a transformer on samples from a prior `p(D, y)`. At inference, the model approximates the posterior predictive `p(y* | x*, D)` for arbitrary held-out `(x*, y*)`. No fine-tuning needed.
13
+
14
+ ## Why it matters here
15
+
16
+ This is the foundational PFN paper. Sets up the training-on-priors paradigm that PFN Studio is built around. Read first before contributing priors.
17
+
18
+ ## Key ideas to carry forward
19
+
20
+ - **Synthetic prior is the dataset.** Real data only appears at inference.
21
+ - **Reproducibility via the prior spec.** Different prior = different model, by definition.
22
+ - **In-context learning.** Model conditions on `(D, x*)` and predicts `y*` directly.
23
+
24
+ ## Open questions / where this project diverges
25
+
26
+ - Original work is tabular; we generalize to temporal/causal structure.
27
+ - Original priors are simple GP/BNN families; project priors are domain-specific.
@@ -0,0 +1,30 @@
1
+ id: example_transformer
2
+ name: Example Transformer
3
+ version: 0.1.0
4
+ description: Minimal transformer for in-context inference on a tabular prior.
5
+
6
+ input_shape: "(B, N, D)"
7
+
8
+ blocks:
9
+ - type: tabular_embedder
10
+ name: embed
11
+ config:
12
+ d_model: 256
13
+ - type: transformer_encoder
14
+ name: encoder
15
+ config:
16
+ d_model: 256
17
+ n_heads: 8
18
+ n_layers: 6
19
+ dropout: 0.1
20
+ - type: causal_attention_pool
21
+ name: pool
22
+
23
+ output_heads:
24
+ - name: structure_head
25
+ task: discovery
26
+ - name: effect_head
27
+ task: estimation
28
+
29
+ citations:
30
+ - mueller2022pfn
@@ -0,0 +1,19 @@
1
+ # example_linear_scm
2
+
3
+ ## Why this prior
4
+
5
+ A minimal, well-understood prior for smoke-testing the training pipeline. Linear Gaussian SCMs are identifiable in the observational case under faithfulness assumptions, so a model trained on this prior should at least learn to recover the structure.
6
+
7
+ Replace this with the real prior for your project — typically more complex and tailored to the domain (temporal, non-linear, latent confounders, etc.).
8
+
9
+ ## Identifiability notes
10
+
11
+ - DAGs are sampled as upper-triangular adjacency, so topological order is fixed.
12
+ - Coefficients bounded away from zero (|w| ≥ 0.1) to ensure faithful edges.
13
+ - Noise scale is per-variable Gaussian; equal scales are *not* enforced.
14
+
15
+ ## Known limitations
16
+
17
+ - No latent confounders.
18
+ - Only linear relationships.
19
+ - Observational only (no interventions).
@@ -0,0 +1,54 @@
1
+ """Example linear SCM prior — replace with your project's real prior.
2
+
3
+ This file shows the registration pattern. The CLI's `discover_in_project` walks
4
+ priors/, evals/, models/ and imports every .py file, which triggers these
5
+ decorators and populates the runtime registry.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from typing import Any
12
+
13
+ import numpy as np
14
+
15
+ from pfnstudio_core import Prior, register_prior
16
+
17
+
18
+ @dataclass
19
+ class LinearSCMSample:
20
+ X: np.ndarray
21
+ A: np.ndarray
22
+ W: np.ndarray
23
+
24
+
25
+ @register_prior("example_linear_scm")
26
+ class ExampleLinearSCMPrior(Prior):
27
+ def sample(
28
+ self,
29
+ *,
30
+ seed: int,
31
+ num_variables: int = 11,
32
+ edge_density: float = 0.3,
33
+ coefficient_range: float = 1.0,
34
+ noise_scale: float = 0.5,
35
+ num_samples: int = 1000,
36
+ **_: Any,
37
+ ) -> dict[str, Any]:
38
+ rng = np.random.default_rng(seed)
39
+ d = int(num_variables)
40
+
41
+ A = (rng.uniform(size=(d, d)) < edge_density).astype(np.float32)
42
+ A = np.triu(A, k=1)
43
+
44
+ W = A * rng.uniform(-coefficient_range, coefficient_range, size=(d, d)).astype(np.float32)
45
+ floor = 0.1
46
+ small = (np.abs(W) < floor) & (A == 1)
47
+ W[small] = floor
48
+
49
+ noise = rng.normal(0.0, noise_scale, size=(int(num_samples), d)).astype(np.float32)
50
+ X = np.zeros_like(noise)
51
+ for j in range(d):
52
+ X[:, j] = noise[:, j] + X @ W[:, j]
53
+
54
+ return {"X": X, "A": A, "W": W}
@@ -0,0 +1,51 @@
1
+ id: example_linear_scm
2
+ name: Example Linear SCM Prior
3
+ version: 0.1.0
4
+ kind: scm
5
+ description: |
6
+ Sample a random linear structural causal model (SCM) over D variables.
7
+ For each sample: draw a random DAG, draw linear coefficients, draw noise scales,
8
+ generate observational data. Useful as a smoke-test prior — replace with the
9
+ real one for your project.
10
+
11
+ parameters:
12
+ num_variables:
13
+ type: int
14
+ range: [3, 20]
15
+ description: Number of nodes in the SCM.
16
+ edge_density:
17
+ type: float
18
+ range: [0.1, 0.5]
19
+ description: Probability of an edge between any two ordered nodes.
20
+ coefficient_range:
21
+ type: float
22
+ range: [0.5, 2.0]
23
+ description: Magnitude range for linear coefficients.
24
+ noise_scale:
25
+ type: float
26
+ range: [0.1, 1.0]
27
+ description: Std-dev of additive Gaussian noise on each variable.
28
+ num_samples:
29
+ type: int
30
+ range: [100, 5000]
31
+ description: Number of observational samples per task.
32
+
33
+ outputs:
34
+ variables:
35
+ - name: X
36
+ type: tensor
37
+ shape: "(num_samples, num_variables)"
38
+ description: Observational data.
39
+ - name: A
40
+ type: matrix
41
+ shape: "(num_variables, num_variables)"
42
+ description: Ground-truth adjacency matrix (binary).
43
+ - name: W
44
+ type: matrix
45
+ shape: "(num_variables, num_variables)"
46
+ description: Ground-truth weight matrix (real-valued).
47
+
48
+ citations:
49
+ - peters2017elements
50
+
51
+ implementation: prior.py