datadoom 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. datadoom/__init__.py +23 -0
  2. datadoom/adapters/__init__.py +29 -0
  3. datadoom/adapters/frameworks.py +94 -0
  4. datadoom/adapters/loaders.py +72 -0
  5. datadoom/api/__init__.py +11 -0
  6. datadoom/api/app.py +109 -0
  7. datadoom/api/deps.py +30 -0
  8. datadoom/api/errors.py +89 -0
  9. datadoom/api/estimate.py +82 -0
  10. datadoom/api/routes/__init__.py +7 -0
  11. datadoom/api/routes/artifacts.py +147 -0
  12. datadoom/api/routes/datasets.py +180 -0
  13. datadoom/api/routes/meta.py +45 -0
  14. datadoom/api/routes/plugins.py +22 -0
  15. datadoom/api/routes/runs.py +144 -0
  16. datadoom/api/routes/specs.py +73 -0
  17. datadoom/api/routes/templates.py +30 -0
  18. datadoom/api/schemas.py +230 -0
  19. datadoom/api/serializers.py +143 -0
  20. datadoom/api/state.py +24 -0
  21. datadoom/api/store_helpers.py +56 -0
  22. datadoom/api/ws.py +72 -0
  23. datadoom/cli/__init__.py +1 -0
  24. datadoom/cli/main.py +313 -0
  25. datadoom/config.py +108 -0
  26. datadoom/engine/__init__.py +38 -0
  27. datadoom/engine/advice.py +289 -0
  28. datadoom/engine/audit.py +290 -0
  29. datadoom/engine/causal/__init__.py +15 -0
  30. datadoom/engine/causal/execute.py +116 -0
  31. datadoom/engine/causal/functions.py +116 -0
  32. datadoom/engine/causal/graph.py +54 -0
  33. datadoom/engine/difficulty/__init__.py +36 -0
  34. datadoom/engine/difficulty/calibrate.py +235 -0
  35. datadoom/engine/difficulty/knobs.py +171 -0
  36. datadoom/engine/difficulty/probes.py +181 -0
  37. datadoom/engine/dist/__init__.py +35 -0
  38. datadoom/engine/dist/base.py +46 -0
  39. datadoom/engine/dist/builtins.py +172 -0
  40. datadoom/engine/dist/compliance.py +344 -0
  41. datadoom/engine/dist/providers.py +117 -0
  42. datadoom/engine/errors.py +32 -0
  43. datadoom/engine/export/__init__.py +27 -0
  44. datadoom/engine/export/base.py +49 -0
  45. datadoom/engine/export/checksums.py +18 -0
  46. datadoom/engine/export/csv_exporter.py +34 -0
  47. datadoom/engine/export/json_exporter.py +67 -0
  48. datadoom/engine/export/metadata.py +58 -0
  49. datadoom/engine/export/parquet_exporter.py +45 -0
  50. datadoom/engine/failure/__init__.py +18 -0
  51. datadoom/engine/failure/apply.py +37 -0
  52. datadoom/engine/failure/base.py +116 -0
  53. datadoom/engine/failure/modes.py +442 -0
  54. datadoom/engine/pipeline.py +418 -0
  55. datadoom/engine/profile.py +327 -0
  56. datadoom/engine/progress.py +14 -0
  57. datadoom/engine/reference.py +338 -0
  58. datadoom/engine/reports.py +206 -0
  59. datadoom/engine/rng.py +79 -0
  60. datadoom/engine/spec/__init__.py +45 -0
  61. datadoom/engine/spec/hashing.py +57 -0
  62. datadoom/engine/spec/models.py +238 -0
  63. datadoom/engine/spec/validate.py +345 -0
  64. datadoom/engine/timeseries.py +88 -0
  65. datadoom/jobs/__init__.py +14 -0
  66. datadoom/jobs/progress.py +155 -0
  67. datadoom/jobs/worker.py +162 -0
  68. datadoom/plugin.py +35 -0
  69. datadoom/plugins/__init__.py +47 -0
  70. datadoom/plugins/contracts.py +72 -0
  71. datadoom/plugins/loader.py +125 -0
  72. datadoom/plugins/registry.py +214 -0
  73. datadoom/plugins/scaffold.py +434 -0
  74. datadoom/store/__init__.py +47 -0
  75. datadoom/store/artifacts.py +67 -0
  76. datadoom/store/db.py +104 -0
  77. datadoom/store/migrations/__init__.py +0 -0
  78. datadoom/store/migrations/env.py +53 -0
  79. datadoom/store/migrations/script.py.mako +24 -0
  80. datadoom/store/migrations/versions/0001_init.py +149 -0
  81. datadoom/store/migrations/versions/0002_report_mutual_information.py +23 -0
  82. datadoom/store/migrations/versions/0003_run_name.py +23 -0
  83. datadoom/store/migrations/versions/0004_report_profile.py +24 -0
  84. datadoom/store/models.py +170 -0
  85. datadoom/store/repositories.py +279 -0
  86. datadoom/templates/__init__.py +239 -0
  87. datadoom/templates/ab_test.datadoom.yaml +46 -0
  88. datadoom/templates/clinical_deterioration.datadoom.yaml +124 -0
  89. datadoom/templates/credit_default_challenge.datadoom.yaml +147 -0
  90. datadoom/templates/customer_churn.datadoom.yaml +60 -0
  91. datadoom/templates/ecommerce_orders.datadoom.yaml +46 -0
  92. datadoom/templates/fraud_detection.datadoom.yaml +57 -0
  93. datadoom/templates/hospital_readmission.datadoom.yaml +61 -0
  94. datadoom/templates/insurance_claims.datadoom.yaml +43 -0
  95. datadoom/templates/iot_sensors.datadoom.yaml +44 -0
  96. datadoom/templates/people_directory.datadoom.yaml +56 -0
  97. datadoom/templates/predictive_maintenance.datadoom.yaml +107 -0
  98. datadoom/templates/telecom_churn_challenge.datadoom.yaml +125 -0
  99. datadoom/version.py +3 -0
  100. datadoom/webdist/assets/index-V8VAuTJG.js +445 -0
  101. datadoom/webdist/assets/index-doRjyG5s.css +1 -0
  102. datadoom/webdist/assets/inter-cyrillic-ext-wght-normal-BOeWTOD4.woff2 +0 -0
  103. datadoom/webdist/assets/inter-cyrillic-wght-normal-DqGufNeO.woff2 +0 -0
  104. datadoom/webdist/assets/inter-greek-ext-wght-normal-DlzME5K_.woff2 +0 -0
  105. datadoom/webdist/assets/inter-greek-wght-normal-CkhJZR-_.woff2 +0 -0
  106. datadoom/webdist/assets/inter-latin-ext-wght-normal-DO1Apj_S.woff2 +0 -0
  107. datadoom/webdist/assets/inter-latin-wght-normal-Dx4kXJAl.woff2 +0 -0
  108. datadoom/webdist/assets/inter-vietnamese-wght-normal-CBcvBZtf.woff2 +0 -0
  109. datadoom/webdist/assets/jetbrains-mono-cyrillic-wght-normal-D73BlboJ.woff2 +0 -0
  110. datadoom/webdist/assets/jetbrains-mono-greek-wght-normal-Bw9x6K1M.woff2 +0 -0
  111. datadoom/webdist/assets/jetbrains-mono-latin-ext-wght-normal-DBQx-q_a.woff2 +0 -0
  112. datadoom/webdist/assets/jetbrains-mono-latin-wght-normal-B9CIFXIH.woff2 +0 -0
  113. datadoom/webdist/assets/jetbrains-mono-vietnamese-wght-normal-Bt-aOZkq.woff2 +0 -0
  114. datadoom/webdist/assets/space-grotesk-latin-ext-wght-normal-D9tNdqV9.woff2 +0 -0
  115. datadoom/webdist/assets/space-grotesk-latin-wght-normal-BhU9QXUp.woff2 +0 -0
  116. datadoom/webdist/assets/space-grotesk-vietnamese-wght-normal-D0rl6rjA.woff2 +0 -0
  117. datadoom/webdist/index.html +15 -0
  118. datadoom-0.1.0.dev0.dist-info/METADATA +143 -0
  119. datadoom-0.1.0.dev0.dist-info/RECORD +122 -0
  120. datadoom-0.1.0.dev0.dist-info/WHEEL +4 -0
  121. datadoom-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  122. datadoom-0.1.0.dev0.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,162 @@
1
+ """In-process generation worker (03 §3.3, 17 step 8).
2
+
3
+ A run is submitted to a thread pool; the worker loads the immutable spec snapshot
4
+ from ``store``, drives the single ``engine.pipeline`` entry point, streams
5
+ progress to the :class:`EventHub`, persists artifacts + report, and flips the
6
+ ``GenerationRun`` (and its dataset) status. Cancellation is cooperative — checked
7
+ at each pipeline stage boundary by :class:`HubProgressEmitter`.
8
+
9
+ This is the only code path that turns a queued run into artifacts; the CLI and
10
+ ``datadoom.generate()`` share the same engine underneath, never a fork of it.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import traceback
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from pathlib import Path
18
+
19
+ from datadoom.engine import generate, parse_spec
20
+ from datadoom.store import (
21
+ ArtifactRepository,
22
+ ArtifactStore,
23
+ Database,
24
+ DatasetRepository,
25
+ ReportRepository,
26
+ RunRepository,
27
+ SpecRow,
28
+ utcnow_iso,
29
+ )
30
+
31
+ from .progress import EventHub, HubProgressEmitter, RunCancelled
32
+
33
+
34
+ class WorkerPool:
35
+ def __init__(
36
+ self,
37
+ db: Database,
38
+ artifacts: ArtifactStore,
39
+ hub: EventHub,
40
+ package_version: str,
41
+ max_workers: int = 2,
42
+ ) -> None:
43
+ self.db = db
44
+ self.artifacts = artifacts
45
+ self.hub = hub
46
+ self.package_version = package_version
47
+ self._pool = ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix="dd-run")
48
+
49
+ def submit(self, run_id: str) -> None:
50
+ """Schedule a queued run for execution (returns immediately)."""
51
+ self._pool.submit(self._execute, run_id)
52
+
53
+ def shutdown(self) -> None:
54
+ self._pool.shutdown(wait=False, cancel_futures=True)
55
+
56
+ # --- execution ----------------------------------------------------------------
57
+ def _execute(self, run_id: str) -> None:
58
+ try:
59
+ self._run_pipeline(run_id)
60
+ except RunCancelled:
61
+ self._mark_cancelled(run_id)
62
+ except Exception as exc: # noqa: BLE001 — persist any failure, never crash the pool
63
+ self._mark_failed(run_id, exc)
64
+
65
+ def _run_pipeline(self, run_id: str) -> None:
66
+ # Load run + spec; flip to running.
67
+ with self.db.session() as s:
68
+ run = RunRepository(s).get(run_id)
69
+ if run is None:
70
+ return
71
+ if self.hub.is_cancelled(run_id):
72
+ raise RunCancelled("queued")
73
+ spec_row = s.get(SpecRow, run.spec_id)
74
+ assert spec_row is not None
75
+ spec_body = dict(spec_row.body)
76
+ dataset_id = run.dataset_id
77
+ seed = run.seed
78
+ run.status = "running"
79
+ run.stage = "intake"
80
+ run.started_at = utcnow_iso()
81
+ ds = DatasetRepository(s).get(dataset_id)
82
+ if ds is not None:
83
+ ds.status = "running"
84
+ ds.updated_at = utcnow_iso()
85
+
86
+ spec = parse_spec(spec_body)
87
+ out_dir: Path = self.artifacts.run_dir(dataset_id, run_id)
88
+ emitter = HubProgressEmitter(self.hub, run_id)
89
+
90
+ result = generate(spec, seed=seed, out_dir=out_dir, progress=emitter)
91
+ emitter.finish()
92
+
93
+ # Persist artifacts + report + final status (one transaction).
94
+ with self.db.session() as s:
95
+ arts = ArtifactRepository(s)
96
+ for art in result.artifacts:
97
+ abs_path = out_dir / art.path
98
+ arts.add(
99
+ run_id=run_id,
100
+ version=art.version,
101
+ fmt=art.format,
102
+ storage_uri=self.artifacts.to_uri(abs_path),
103
+ checksum_sha256=art.checksum_sha256,
104
+ size_bytes=art.size_bytes,
105
+ split="full",
106
+ )
107
+ report = ReportRepository(s).upsert(run_id, result.report.to_dict())
108
+ run = RunRepository(s).get(run_id)
109
+ assert run is not None
110
+ run.status = "completed"
111
+ run.stage = "packaging"
112
+ run.progress_pct = 100
113
+ run.finished_at = utcnow_iso()
114
+ run.metrics = {"compliance_score": result.compliance.score}
115
+ ds = DatasetRepository(s).get(dataset_id)
116
+ if ds is not None:
117
+ ds.status = "completed"
118
+ ds.latest_run_id = run_id
119
+ ds.updated_at = utcnow_iso()
120
+ report_id = report.report_id
121
+
122
+ self.hub.publish(
123
+ run_id,
124
+ {
125
+ "type": "completed",
126
+ "run_id": run_id,
127
+ "compliance_score": result.compliance.score,
128
+ "report_id": report_id,
129
+ },
130
+ )
131
+
132
+ # --- terminal states ----------------------------------------------------------
133
+ def _mark_cancelled(self, run_id: str) -> None:
134
+ with self.db.session() as s:
135
+ run = RunRepository(s).get(run_id)
136
+ if run is not None:
137
+ run.status = "cancelled"
138
+ run.finished_at = utcnow_iso()
139
+ ds = DatasetRepository(s).get(run.dataset_id)
140
+ if ds is not None and ds.status == "running":
141
+ ds.status = "draft"
142
+ ds.updated_at = utcnow_iso()
143
+ self.hub.publish(run_id, {"type": "cancelled", "run_id": run_id})
144
+
145
+ def _mark_failed(self, run_id: str, exc: Exception) -> None:
146
+ stage = "unknown"
147
+ tb = traceback.format_exc()
148
+ with self.db.session() as s:
149
+ run = RunRepository(s).get(run_id)
150
+ if run is not None:
151
+ stage = run.stage or "unknown"
152
+ run.status = "failed"
153
+ run.error = {"message": str(exc), "traceback": tb, "stage": stage}
154
+ run.finished_at = utcnow_iso()
155
+ ds = DatasetRepository(s).get(run.dataset_id)
156
+ if ds is not None:
157
+ ds.status = "failed"
158
+ ds.updated_at = utcnow_iso()
159
+ self.hub.publish(
160
+ run_id,
161
+ {"type": "failed", "stage": stage, "message": str(exc), "traceback": tb},
162
+ )
datadoom/plugin.py ADDED
@@ -0,0 +1,35 @@
1
+ """Public, stable contract surface for plugin authors (09 §4).
2
+
3
+ Import the base classes and the ``schema`` helper from here::
4
+
5
+ from datadoom.plugin import Distribution, schema
6
+
7
+ class WeibullDistribution(Distribution):
8
+ name = "weibull"
9
+ param_schema = schema({"k": {"type": "number", "minimum": 0}})
10
+ def sample(self, rng, n, params): ...
11
+ def cdf(self, x, params): ...
12
+
13
+ These names are re-exported from ``datadoom.plugins.contracts`` and stay stable
14
+ even as the engine's internal layout evolves.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from datadoom.plugins.contracts import (
20
+ Distribution,
21
+ Exporter,
22
+ FailureMode,
23
+ ProbeModel,
24
+ StructuralFn,
25
+ schema,
26
+ )
27
+
28
+ __all__ = [
29
+ "Distribution",
30
+ "StructuralFn",
31
+ "FailureMode",
32
+ "Exporter",
33
+ "ProbeModel",
34
+ "schema",
35
+ ]
@@ -0,0 +1,47 @@
1
+ """DataDoom plugin system — registry + loader + scaffolder (09, 17 step 17).
2
+
3
+ Plugins extend DataDoom *without forking core*: a small class implementing one of
4
+ the engine ABCs (re-exported as ``datadoom.plugin``) is discovered at startup and
5
+ inserted into the engine's lookup tables, so it works in the CLI, the API, and the
6
+ web UI with no core change. This package depends only on the engine (10 §4).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from .contracts import (
12
+ Distribution,
13
+ Exporter,
14
+ FailureMode,
15
+ ProbeModel,
16
+ StructuralFn,
17
+ schema,
18
+ )
19
+ from .loader import default_plugins_dir, load_plugins
20
+ from .registry import (
21
+ PluginConflictError,
22
+ PluginError,
23
+ PluginRecord,
24
+ PluginRegistry,
25
+ get_registry,
26
+ )
27
+ from .scaffold import ObjectCheck, check_object, check_plugin, scaffold_plugin
28
+
29
+ __all__ = [
30
+ "Distribution",
31
+ "StructuralFn",
32
+ "FailureMode",
33
+ "Exporter",
34
+ "ProbeModel",
35
+ "schema",
36
+ "load_plugins",
37
+ "default_plugins_dir",
38
+ "get_registry",
39
+ "PluginRegistry",
40
+ "PluginRecord",
41
+ "PluginError",
42
+ "PluginConflictError",
43
+ "scaffold_plugin",
44
+ "check_plugin",
45
+ "check_object",
46
+ "ObjectCheck",
47
+ ]
@@ -0,0 +1,72 @@
1
+ """Plugin author contract surface — re-exported as ``datadoom.plugin`` (09 §4).
2
+
3
+ A plugin is a small class implementing one of the engine's typed base classes.
4
+ This module re-exports those ABCs (so authors import them from a stable name that
5
+ does not change as the engine's internal layout evolves) and ships the tiny
6
+ ``schema`` helper for declaring a plugin's ``param_schema`` JSON-schema fragment.
7
+
8
+ The canonical ABCs live in ``datadoom.engine`` — the engine *consumes* registered
9
+ plugin instances through the registry (it never imports ``datadoom.plugins``); the
10
+ registry mutates the engine's lookup tables in place (10 §4). Plugins therefore
11
+ depend only on these ABCs.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import Mapping, Sequence
17
+ from typing import Any
18
+
19
+ from datadoom.engine.causal.functions import StructuralFn
20
+ from datadoom.engine.difficulty.probes import ProbeModel
21
+ from datadoom.engine.dist.base import Distribution
22
+ from datadoom.engine.export.base import Exporter
23
+ from datadoom.engine.failure.base import FailureMode
24
+
25
+ __all__ = [
26
+ "Distribution",
27
+ "StructuralFn",
28
+ "FailureMode",
29
+ "Exporter",
30
+ "ProbeModel",
31
+ "schema",
32
+ "PLUGIN_BASES",
33
+ "KEY_ATTR",
34
+ ]
35
+
36
+ # Plugin kind -> the base class an instance must subclass. The order is the
37
+ # precedence used when resolving an object's kind (kinds are disjoint in practice).
38
+ PLUGIN_BASES: dict[str, type] = {
39
+ "distribution": Distribution,
40
+ "structural_fn": StructuralFn,
41
+ "failure_mode": FailureMode,
42
+ "exporter": Exporter,
43
+ "probe_model": ProbeModel,
44
+ }
45
+
46
+ # The attribute that names each kind inside its registry. Exporters key on
47
+ # ``format`` (it matches ``export.formats[]``); everything else keys on ``name``.
48
+ KEY_ATTR: dict[str, str] = {
49
+ "distribution": "name",
50
+ "structural_fn": "name",
51
+ "failure_mode": "name",
52
+ "exporter": "format",
53
+ "probe_model": "name",
54
+ }
55
+
56
+
57
+ def schema(
58
+ properties: Mapping[str, Mapping[str, Any]],
59
+ *,
60
+ required: Sequence[str] | None = None,
61
+ ) -> dict[str, Any]:
62
+ """Wrap a property map into a JSON-schema ``object`` the Canvas can render.
63
+
64
+ ``param_schema = schema({"k": {"type": "number", "minimum": 0}})`` →
65
+ ``{"type": "object", "properties": {...}, "required": [...]}``. The UI reads
66
+ this fragment and renders form controls (number inputs with min/max, enums as
67
+ dropdowns…) wherever the plugin is selectable, with no frontend changes (09 §6).
68
+ """
69
+ obj: dict[str, Any] = {"type": "object", "properties": dict(properties)}
70
+ if required:
71
+ obj["required"] = list(required)
72
+ return obj
@@ -0,0 +1,125 @@
1
+ """Plugin discovery + loading (09 §3, 17 step 17).
2
+
3
+ Two mechanisms resolve into the in-memory :class:`PluginRegistry` at server/CLI
4
+ startup:
5
+
6
+ 1. **Python entry points** under the ``datadoom.plugins`` group (preferred for
7
+ published ``datadoom-plugin-*`` packages).
8
+ 2. **A local plugins directory** (``$DATADOOM_HOME/plugins/*.py``) auto-imported
9
+ for prototyping and per-project plugins.
10
+
11
+ Each discovered object is registered (validated + inserted into the engine
12
+ tables). Conflicts and broken plugins fail loudly with a clear message rather
13
+ than silently degrading a run.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import importlib.util
19
+ import inspect
20
+ import os
21
+ import sys
22
+ from importlib.metadata import entry_points
23
+ from pathlib import Path
24
+ from types import ModuleType
25
+
26
+ from .registry import PluginError, PluginRegistry, get_registry, resolve_kind_class
27
+
28
+ ENTRY_POINT_GROUP = "datadoom.plugins"
29
+
30
+
31
+ def default_plugins_dir() -> Path:
32
+ """``$DATADOOM_HOME/plugins`` (or ``~/.datadoom/plugins``).
33
+
34
+ Resolved from the environment directly so this package depends only on the
35
+ engine (it does not import ``datadoom.config``).
36
+ """
37
+ home_env = os.environ.get("DATADOOM_HOME")
38
+ home = Path(home_env).expanduser() if home_env else Path.home() / ".datadoom"
39
+ return home / "plugins"
40
+
41
+
42
+ def _instantiate(target: object, *, where: str) -> object:
43
+ """Coerce an entry-point/discovered target to a plugin *instance*."""
44
+ if inspect.isclass(target):
45
+ try:
46
+ return target()
47
+ except Exception as exc: # noqa: BLE001 - surface as a clean plugin error
48
+ raise PluginError(f"could not instantiate {where}: {exc}") from exc
49
+ return target
50
+
51
+
52
+ def load_entry_points(registry: PluginRegistry) -> list[str]:
53
+ """Register every plugin advertised under the ``datadoom.plugins`` group."""
54
+ loaded: list[str] = []
55
+ for ep in entry_points(group=ENTRY_POINT_GROUP):
56
+ try:
57
+ target = ep.load()
58
+ except Exception as exc: # noqa: BLE001
59
+ raise PluginError(f"entry point {ep.name!r} failed to import: {exc}") from exc
60
+ obj = _instantiate(target, where=f"entry point {ep.name!r}")
61
+ version = _dist_version(ep)
62
+ record = registry.register(obj, source="entrypoint", version=version)
63
+ loaded.append(f"{record.kind}:{record.name}")
64
+ return loaded
65
+
66
+
67
+ def load_local_dir(registry: PluginRegistry, directory: Path) -> list[str]:
68
+ """Import every ``*.py`` in ``directory`` and register the plugin classes it defines."""
69
+ loaded: list[str] = []
70
+ if not directory.is_dir():
71
+ return loaded
72
+ for path in sorted(directory.glob("*.py")):
73
+ if path.name.startswith("_"):
74
+ continue
75
+ module = _import_path(path)
76
+ for _, member in inspect.getmembers(module, inspect.isclass):
77
+ # Only classes *defined here* (skip the imported ABCs themselves).
78
+ if member.__module__ != module.__name__:
79
+ continue
80
+ if resolve_kind_class(member) is None:
81
+ continue
82
+ record = registry.register(member(), source="local", module=str(path))
83
+ loaded.append(f"{record.kind}:{record.name}")
84
+ return loaded
85
+
86
+
87
+ def _import_path(path: Path) -> ModuleType:
88
+ mod_name = f"datadoom_local_plugin_{path.stem}"
89
+ spec = importlib.util.spec_from_file_location(mod_name, path)
90
+ if spec is None or spec.loader is None:
91
+ raise PluginError(f"could not load local plugin {path}")
92
+ module = importlib.util.module_from_spec(spec)
93
+ sys.modules[mod_name] = module
94
+ try:
95
+ spec.loader.exec_module(module)
96
+ except Exception as exc: # noqa: BLE001
97
+ sys.modules.pop(mod_name, None)
98
+ raise PluginError(f"local plugin {path.name} failed to import: {exc}") from exc
99
+ return module
100
+
101
+
102
+ def _dist_version(ep: object) -> str | None:
103
+ dist = getattr(ep, "dist", None)
104
+ return getattr(dist, "version", None) if dist is not None else None
105
+
106
+
107
+ def load_plugins(
108
+ registry: PluginRegistry | None = None,
109
+ *,
110
+ use_entry_points: bool = True,
111
+ local_dir: Path | None = None,
112
+ use_local: bool = True,
113
+ ) -> PluginRegistry:
114
+ """Seed built-ins, then discover entry-point and local-directory plugins.
115
+
116
+ Idempotent for built-ins; intended to be called once at startup. ``local_dir``
117
+ defaults to :func:`default_plugins_dir` when ``use_local`` is set.
118
+ """
119
+ registry = registry or get_registry()
120
+ registry.seed_builtins()
121
+ if use_entry_points:
122
+ load_entry_points(registry)
123
+ if use_local:
124
+ load_local_dir(registry, local_dir or default_plugins_dir())
125
+ return registry
@@ -0,0 +1,214 @@
1
+ """In-memory plugin registry (09 §3, 17 step 17).
2
+
3
+ The registry is the single place that knows *every* capability available to a
4
+ run — both core built-ins and discovered plugins. Registering a plugin inserts
5
+ its instance into the matching **engine lookup table** (the same dict the
6
+ pipeline reads by name), so a registered distribution/fn/failure/exporter/probe
7
+ is picked up with no engine change. The engine never imports this module; the
8
+ dependency points the other way (``engine ← plugins``, 10 §4).
9
+
10
+ Conflicts (a plugin reusing a built-in or another plugin's name) fail loudly so
11
+ an install never silently shadows a capability.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import MutableMapping
17
+ from dataclasses import dataclass
18
+ from typing import Any
19
+
20
+ from .contracts import KEY_ATTR, PLUGIN_BASES
21
+
22
+
23
+ class PluginError(Exception):
24
+ """A plugin failed validation (wrong base class, bad schema, missing name)."""
25
+
26
+
27
+ class PluginConflictError(PluginError):
28
+ """Two capabilities of the same kind claim the same name."""
29
+
30
+
31
+ _ALLOWED_SCHEMA_TYPES = {"object", "string", "number", "integer", "boolean", "array", "null"}
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class PluginRecord:
36
+ """Metadata for one registered capability (built-in or plugin)."""
37
+
38
+ name: str
39
+ kind: str
40
+ source: str # "builtin" | "entrypoint" | "local"
41
+ module: str | None = None
42
+ version: str | None = None
43
+ schema: dict[str, Any] | None = None
44
+
45
+ @property
46
+ def builtin(self) -> bool:
47
+ return self.source == "builtin"
48
+
49
+ def to_info(self) -> dict[str, Any]:
50
+ """Shape consumed by ``GET /api/plugins`` (08 §10)."""
51
+ return {
52
+ "name": self.name,
53
+ "kind": self.kind,
54
+ "version": self.version,
55
+ "schema": self.schema,
56
+ "source": self.source,
57
+ "builtin": self.builtin,
58
+ "enabled": True,
59
+ }
60
+
61
+
62
+ def resolve_kind(obj: object) -> str | None:
63
+ """Return the plugin kind ``obj`` (an instance) implements, or ``None``."""
64
+ for kind, base in PLUGIN_BASES.items():
65
+ if isinstance(obj, base):
66
+ return kind
67
+ return None
68
+
69
+
70
+ def resolve_kind_class(cls: object) -> str | None:
71
+ """Return the plugin kind a *class* implements, or ``None`` (skips the ABCs)."""
72
+ if not isinstance(cls, type):
73
+ return None
74
+ for kind, base in PLUGIN_BASES.items():
75
+ if cls is not base and issubclass(cls, base):
76
+ return kind
77
+ return None
78
+
79
+
80
+ def validate_param_schema(schema: object, *, where: str = "param_schema") -> None:
81
+ """Lightweight sanity check that ``schema`` is a renderable JSON-schema fragment."""
82
+ if not isinstance(schema, dict):
83
+ raise PluginError(f"{where} must be a dict (use the schema() helper)")
84
+ stype = schema.get("type")
85
+ if stype is not None and stype not in _ALLOWED_SCHEMA_TYPES:
86
+ raise PluginError(f"{where} has unsupported type {stype!r}")
87
+ props = schema.get("properties")
88
+ if stype == "object" and not isinstance(props, dict):
89
+ raise PluginError(f"{where} of type 'object' needs a 'properties' dict")
90
+ if isinstance(props, dict):
91
+ for pname, pspec in props.items():
92
+ if not isinstance(pspec, dict):
93
+ raise PluginError(f"{where}.properties[{pname!r}] must be a dict")
94
+ ptype = pspec.get("type")
95
+ if ptype is not None and ptype not in _ALLOWED_SCHEMA_TYPES:
96
+ raise PluginError(f"{where}.properties[{pname!r}] has unsupported type {ptype!r}")
97
+
98
+
99
+ def _engine_registries() -> dict[str, MutableMapping[str, Any]]:
100
+ """The five live engine lookup tables, keyed by plugin kind.
101
+
102
+ Imported lazily so this module loads without forcing the whole engine, and so
103
+ the dicts are the *canonical* objects the pipeline reads (mutating them in
104
+ place propagates to every ``from ... import REGISTRY`` reference).
105
+ """
106
+ from datadoom.engine.causal.functions import STRUCTURAL_FNS
107
+ from datadoom.engine.difficulty.probes import PROBES
108
+ from datadoom.engine.dist.builtins import REGISTRY
109
+ from datadoom.engine.export import EXPORTERS
110
+ from datadoom.engine.failure.modes import FAILURE_MODES
111
+
112
+ return {
113
+ "distribution": REGISTRY,
114
+ "structural_fn": STRUCTURAL_FNS,
115
+ "failure_mode": FAILURE_MODES,
116
+ "exporter": EXPORTERS,
117
+ "probe_model": PROBES,
118
+ }
119
+
120
+
121
+ class PluginRegistry:
122
+ """Tracks records and keeps the engine lookup tables in sync."""
123
+
124
+ def __init__(self) -> None:
125
+ self._records: dict[tuple[str, str], PluginRecord] = {}
126
+ self._builtins_seeded = False
127
+
128
+ def seed_builtins(self) -> None:
129
+ """Record the core built-ins already present in the engine tables (idempotent)."""
130
+ if self._builtins_seeded:
131
+ return
132
+ for kind, reg in _engine_registries().items():
133
+ for key, obj in reg.items():
134
+ self._records[(kind, key)] = PluginRecord(
135
+ name=key,
136
+ kind=kind,
137
+ source="builtin",
138
+ module=type(obj).__module__,
139
+ schema=_schema_of(obj),
140
+ )
141
+ self._builtins_seeded = True
142
+
143
+ def register(
144
+ self,
145
+ obj: object,
146
+ *,
147
+ source: str,
148
+ module: str | None = None,
149
+ version: str | None = None,
150
+ ) -> PluginRecord:
151
+ """Validate ``obj`` and insert it into its engine table. Raise on conflict."""
152
+ kind = resolve_kind(obj)
153
+ if kind is None:
154
+ bases = ", ".join(PLUGIN_BASES)
155
+ raise PluginError(
156
+ f"{type(obj).__name__} is not a plugin: it must subclass one of [{bases}]"
157
+ )
158
+ key_attr = KEY_ATTR[kind]
159
+ key = getattr(obj, key_attr, None)
160
+ if not isinstance(key, str) or not key:
161
+ raise PluginError(
162
+ f"{kind} plugin {type(obj).__name__} must set a non-empty '{key_attr}'"
163
+ )
164
+ if (kind, key) in self._records:
165
+ existing = self._records[(kind, key)]
166
+ raise PluginConflictError(
167
+ f"{kind} {key!r} is already registered (source={existing.source}); "
168
+ "capability names must be unique within an install"
169
+ )
170
+ schema = _schema_of(obj)
171
+ if schema is not None:
172
+ validate_param_schema(schema, where=f"{kind} {key!r} param_schema")
173
+
174
+ _engine_registries()[kind][key] = obj
175
+ record = PluginRecord(
176
+ name=key,
177
+ kind=kind,
178
+ source=source,
179
+ module=module or type(obj).__module__,
180
+ version=version,
181
+ schema=schema,
182
+ )
183
+ self._records[(kind, key)] = record
184
+ return record
185
+
186
+ def records(self) -> list[PluginRecord]:
187
+ return sorted(self._records.values(), key=lambda r: (r.kind, r.name))
188
+
189
+ def to_info(self) -> list[dict[str, Any]]:
190
+ return [r.to_info() for r in self.records()]
191
+
192
+ def get(self, kind: str, name: str) -> PluginRecord | None:
193
+ return self._records.get((kind, name))
194
+
195
+ def reset(self) -> None:
196
+ """Remove every non-built-in from the engine tables and records (test aid)."""
197
+ regs = _engine_registries()
198
+ for (kind, key), rec in list(self._records.items()):
199
+ if rec.source != "builtin":
200
+ regs[kind].pop(key, None)
201
+ del self._records[(kind, key)]
202
+
203
+
204
+ def _schema_of(obj: object) -> dict[str, Any] | None:
205
+ schema = getattr(obj, "param_schema", None)
206
+ return dict(schema) if isinstance(schema, dict) else None
207
+
208
+
209
+ _DEFAULT = PluginRegistry()
210
+
211
+
212
+ def get_registry() -> PluginRegistry:
213
+ """The process-wide registry (shared by the API, CLI, and engine tables)."""
214
+ return _DEFAULT