crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. CRCA.py +172 -7
  2. MODEL_CARD.md +53 -0
  3. PKG-INFO +8 -2
  4. RELEASE_NOTES.md +17 -0
  5. STABILITY.md +19 -0
  6. architecture/hybrid/consistency_engine.py +362 -0
  7. architecture/hybrid/conversation_manager.py +421 -0
  8. architecture/hybrid/explanation_generator.py +452 -0
  9. architecture/hybrid/few_shot_learner.py +533 -0
  10. architecture/hybrid/graph_compressor.py +286 -0
  11. architecture/hybrid/hybrid_agent.py +4398 -0
  12. architecture/hybrid/language_compiler.py +623 -0
  13. architecture/hybrid/main,py +0 -0
  14. architecture/hybrid/reasoning_tracker.py +322 -0
  15. architecture/hybrid/self_verifier.py +524 -0
  16. architecture/hybrid/task_decomposer.py +567 -0
  17. architecture/hybrid/text_corrector.py +341 -0
  18. benchmark_results/crca_core_benchmarks.json +178 -0
  19. branches/crca_sd/crca_sd_realtime.py +6 -2
  20. branches/general_agent/__init__.py +102 -0
  21. branches/general_agent/general_agent.py +1400 -0
  22. branches/general_agent/personality.py +169 -0
  23. branches/general_agent/utils/__init__.py +19 -0
  24. branches/general_agent/utils/prompt_builder.py +170 -0
  25. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
  26. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
  27. crca_core/__init__.py +35 -0
  28. crca_core/benchmarks/__init__.py +14 -0
  29. crca_core/benchmarks/synthetic_scm.py +103 -0
  30. crca_core/core/__init__.py +23 -0
  31. crca_core/core/api.py +120 -0
  32. crca_core/core/estimate.py +208 -0
  33. crca_core/core/godclass.py +72 -0
  34. crca_core/core/intervention_design.py +174 -0
  35. crca_core/core/lifecycle.py +48 -0
  36. crca_core/discovery/__init__.py +9 -0
  37. crca_core/discovery/tabular.py +193 -0
  38. crca_core/identify/__init__.py +171 -0
  39. crca_core/identify/backdoor.py +39 -0
  40. crca_core/identify/frontdoor.py +48 -0
  41. crca_core/identify/graph.py +106 -0
  42. crca_core/identify/id_algorithm.py +43 -0
  43. crca_core/identify/iv.py +48 -0
  44. crca_core/models/__init__.py +67 -0
  45. crca_core/models/provenance.py +56 -0
  46. crca_core/models/refusal.py +39 -0
  47. crca_core/models/result.py +83 -0
  48. crca_core/models/spec.py +151 -0
  49. crca_core/models/validation.py +68 -0
  50. crca_core/scm/__init__.py +9 -0
  51. crca_core/scm/linear_gaussian.py +198 -0
  52. crca_core/timeseries/__init__.py +6 -0
  53. crca_core/timeseries/pcmci.py +181 -0
  54. crca_llm/__init__.py +12 -0
  55. crca_llm/client.py +85 -0
  56. crca_llm/coauthor.py +118 -0
  57. crca_llm/orchestrator.py +289 -0
  58. crca_llm/types.py +21 -0
  59. crca_reasoning/__init__.py +16 -0
  60. crca_reasoning/critique.py +54 -0
  61. crca_reasoning/godclass.py +206 -0
  62. crca_reasoning/memory.py +24 -0
  63. crca_reasoning/rationale.py +10 -0
  64. crca_reasoning/react_controller.py +81 -0
  65. crca_reasoning/tool_router.py +97 -0
  66. crca_reasoning/types.py +40 -0
  67. crca_sd/__init__.py +15 -0
  68. crca_sd/crca_sd_core.py +2 -0
  69. crca_sd/crca_sd_governance.py +2 -0
  70. crca_sd/crca_sd_mpc.py +2 -0
  71. crca_sd/crca_sd_realtime.py +2 -0
  72. crca_sd/crca_sd_tui.py +2 -0
  73. cuda-keyring_1.1-1_all.deb +0 -0
  74. cuda-keyring_1.1-1_all.deb.1 +0 -0
  75. docs/IMAGE_ANNOTATION_USAGE.md +539 -0
  76. docs/INSTALL_DEEPSPEED.md +125 -0
  77. docs/api/branches/crca-cg.md +19 -0
  78. docs/api/branches/crca-q.md +27 -0
  79. docs/api/branches/crca-sd.md +37 -0
  80. docs/api/branches/general-agent.md +24 -0
  81. docs/api/branches/overview.md +19 -0
  82. docs/api/crca/agent-methods.md +62 -0
  83. docs/api/crca/operations.md +79 -0
  84. docs/api/crca/overview.md +32 -0
  85. docs/api/image-annotation/engine.md +52 -0
  86. docs/api/image-annotation/overview.md +17 -0
  87. docs/api/schemas/annotation.md +34 -0
  88. docs/api/schemas/core-schemas.md +82 -0
  89. docs/api/schemas/overview.md +32 -0
  90. docs/api/schemas/policy.md +30 -0
  91. docs/api/utils/conversation.md +22 -0
  92. docs/api/utils/graph-reasoner.md +32 -0
  93. docs/api/utils/overview.md +21 -0
  94. docs/api/utils/router.md +19 -0
  95. docs/api/utils/utilities.md +97 -0
  96. docs/architecture/causal-graphs.md +41 -0
  97. docs/architecture/data-flow.md +29 -0
  98. docs/architecture/design-principles.md +33 -0
  99. docs/architecture/hybrid-agent/components.md +38 -0
  100. docs/architecture/hybrid-agent/consistency.md +26 -0
  101. docs/architecture/hybrid-agent/overview.md +44 -0
  102. docs/architecture/hybrid-agent/reasoning.md +22 -0
  103. docs/architecture/llm-integration.md +26 -0
  104. docs/architecture/modular-structure.md +37 -0
  105. docs/architecture/overview.md +69 -0
  106. docs/architecture/policy-engine-arch.md +29 -0
  107. docs/branches/crca-cg/corposwarm.md +39 -0
  108. docs/branches/crca-cg/esg-scoring.md +30 -0
  109. docs/branches/crca-cg/multi-agent.md +35 -0
  110. docs/branches/crca-cg/overview.md +40 -0
  111. docs/branches/crca-q/alternative-data.md +55 -0
  112. docs/branches/crca-q/architecture.md +71 -0
  113. docs/branches/crca-q/backtesting.md +45 -0
  114. docs/branches/crca-q/causal-engine.md +33 -0
  115. docs/branches/crca-q/execution.md +39 -0
  116. docs/branches/crca-q/market-data.md +60 -0
  117. docs/branches/crca-q/overview.md +58 -0
  118. docs/branches/crca-q/philosophy.md +60 -0
  119. docs/branches/crca-q/portfolio-optimization.md +66 -0
  120. docs/branches/crca-q/risk-management.md +102 -0
  121. docs/branches/crca-q/setup.md +65 -0
  122. docs/branches/crca-q/signal-generation.md +61 -0
  123. docs/branches/crca-q/signal-validation.md +43 -0
  124. docs/branches/crca-sd/core.md +84 -0
  125. docs/branches/crca-sd/governance.md +53 -0
  126. docs/branches/crca-sd/mpc-solver.md +65 -0
  127. docs/branches/crca-sd/overview.md +59 -0
  128. docs/branches/crca-sd/realtime.md +28 -0
  129. docs/branches/crca-sd/tui.md +20 -0
  130. docs/branches/general-agent/overview.md +37 -0
  131. docs/branches/general-agent/personality.md +36 -0
  132. docs/branches/general-agent/prompt-builder.md +30 -0
  133. docs/changelog/index.md +79 -0
  134. docs/contributing/code-style.md +69 -0
  135. docs/contributing/documentation.md +43 -0
  136. docs/contributing/overview.md +29 -0
  137. docs/contributing/testing.md +29 -0
  138. docs/core/crcagent/async-operations.md +65 -0
  139. docs/core/crcagent/automatic-extraction.md +107 -0
  140. docs/core/crcagent/batch-prediction.md +80 -0
  141. docs/core/crcagent/bayesian-inference.md +60 -0
  142. docs/core/crcagent/causal-graph.md +92 -0
  143. docs/core/crcagent/counterfactuals.md +96 -0
  144. docs/core/crcagent/deterministic-simulation.md +78 -0
  145. docs/core/crcagent/dual-mode-operation.md +82 -0
  146. docs/core/crcagent/initialization.md +88 -0
  147. docs/core/crcagent/optimization.md +65 -0
  148. docs/core/crcagent/overview.md +63 -0
  149. docs/core/crcagent/time-series.md +57 -0
  150. docs/core/schemas/annotation.md +30 -0
  151. docs/core/schemas/core-schemas.md +82 -0
  152. docs/core/schemas/overview.md +30 -0
  153. docs/core/schemas/policy.md +41 -0
  154. docs/core/templates/base-agent.md +31 -0
  155. docs/core/templates/feature-mixins.md +31 -0
  156. docs/core/templates/overview.md +29 -0
  157. docs/core/templates/templates-guide.md +75 -0
  158. docs/core/tools/mcp-client.md +34 -0
  159. docs/core/tools/overview.md +24 -0
  160. docs/core/utils/conversation.md +27 -0
  161. docs/core/utils/graph-reasoner.md +29 -0
  162. docs/core/utils/overview.md +27 -0
  163. docs/core/utils/router.md +27 -0
  164. docs/core/utils/utilities.md +97 -0
  165. docs/css/custom.css +84 -0
  166. docs/examples/basic-usage.md +57 -0
  167. docs/examples/general-agent/general-agent-examples.md +50 -0
  168. docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
  169. docs/examples/image-annotation/image-annotation-examples.md +54 -0
  170. docs/examples/integration/integration-examples.md +58 -0
  171. docs/examples/overview.md +37 -0
  172. docs/examples/trading/trading-examples.md +46 -0
  173. docs/features/causal-reasoning/advanced-topics.md +101 -0
  174. docs/features/causal-reasoning/counterfactuals.md +43 -0
  175. docs/features/causal-reasoning/do-calculus.md +50 -0
  176. docs/features/causal-reasoning/overview.md +47 -0
  177. docs/features/causal-reasoning/structural-models.md +52 -0
  178. docs/features/hybrid-agent/advanced-components.md +55 -0
  179. docs/features/hybrid-agent/core-components.md +64 -0
  180. docs/features/hybrid-agent/overview.md +34 -0
  181. docs/features/image-annotation/engine.md +82 -0
  182. docs/features/image-annotation/features.md +113 -0
  183. docs/features/image-annotation/integration.md +75 -0
  184. docs/features/image-annotation/overview.md +53 -0
  185. docs/features/image-annotation/quickstart.md +73 -0
  186. docs/features/policy-engine/doctrine-ledger.md +105 -0
  187. docs/features/policy-engine/monitoring.md +44 -0
  188. docs/features/policy-engine/mpc-control.md +89 -0
  189. docs/features/policy-engine/overview.md +46 -0
  190. docs/getting-started/configuration.md +225 -0
  191. docs/getting-started/first-agent.md +164 -0
  192. docs/getting-started/installation.md +144 -0
  193. docs/getting-started/quickstart.md +137 -0
  194. docs/index.md +118 -0
  195. docs/js/mathjax.js +13 -0
  196. docs/lrm/discovery_proof_notes.md +25 -0
  197. docs/lrm/finetune_full.md +83 -0
  198. docs/lrm/math_appendix.md +120 -0
  199. docs/lrm/overview.md +32 -0
  200. docs/mkdocs.yml +238 -0
  201. docs/stylesheets/extra.css +21 -0
  202. docs_generated/crca_core/CounterfactualResult.md +12 -0
  203. docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
  204. docs_generated/crca_core/DraftSpec.md +13 -0
  205. docs_generated/crca_core/EstimateResult.md +13 -0
  206. docs_generated/crca_core/IdentificationResult.md +17 -0
  207. docs_generated/crca_core/InterventionDesignResult.md +12 -0
  208. docs_generated/crca_core/LockedSpec.md +15 -0
  209. docs_generated/crca_core/RefusalResult.md +12 -0
  210. docs_generated/crca_core/ValidationReport.md +9 -0
  211. docs_generated/crca_core/index.md +13 -0
  212. examples/general_agent_example.py +277 -0
  213. examples/general_agent_quickstart.py +202 -0
  214. examples/general_agent_simple.py +92 -0
  215. examples/hybrid_agent_auto_extraction.py +84 -0
  216. examples/hybrid_agent_dictionary_demo.py +104 -0
  217. examples/hybrid_agent_enhanced.py +179 -0
  218. examples/hybrid_agent_general_knowledge.py +107 -0
  219. examples/image_annotation_quickstart.py +328 -0
  220. examples/test_hybrid_fixes.py +77 -0
  221. image_annotation/__init__.py +27 -0
  222. image_annotation/annotation_engine.py +2593 -0
  223. install_cuda_wsl2.sh +59 -0
  224. install_deepspeed.sh +56 -0
  225. install_deepspeed_simple.sh +87 -0
  226. mkdocs.yml +252 -0
  227. ollama/Modelfile +8 -0
  228. prompts/__init__.py +2 -1
  229. prompts/default_crca.py +9 -1
  230. prompts/general_agent.py +227 -0
  231. prompts/image_annotation.py +56 -0
  232. pyproject.toml +17 -2
  233. requirements-docs.txt +10 -0
  234. requirements.txt +21 -2
  235. schemas/__init__.py +26 -1
  236. schemas/annotation.py +222 -0
  237. schemas/conversation.py +193 -0
  238. schemas/hybrid.py +211 -0
  239. schemas/reasoning.py +276 -0
  240. schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
  241. schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
  242. schemas_export/crca_core/DraftSpec.schema.json +635 -0
  243. schemas_export/crca_core/EstimateResult.schema.json +113 -0
  244. schemas_export/crca_core/IdentificationResult.schema.json +145 -0
  245. schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
  246. schemas_export/crca_core/LockedSpec.schema.json +646 -0
  247. schemas_export/crca_core/RefusalResult.schema.json +90 -0
  248. schemas_export/crca_core/ValidationReport.schema.json +62 -0
  249. scripts/build_lrm_dataset.py +80 -0
  250. scripts/export_crca_core_schemas.py +54 -0
  251. scripts/export_hf_lrm.py +37 -0
  252. scripts/export_ollama_gguf.py +45 -0
  253. scripts/generate_changelog.py +157 -0
  254. scripts/generate_crca_core_docs_from_schemas.py +86 -0
  255. scripts/run_crca_core_benchmarks.py +163 -0
  256. scripts/run_full_finetune.py +198 -0
  257. scripts/run_lrm_eval.py +31 -0
  258. templates/graph_management.py +29 -0
  259. tests/conftest.py +9 -0
  260. tests/test_core.py +2 -3
  261. tests/test_crca_core_discovery_tabular.py +15 -0
  262. tests/test_crca_core_estimate_dowhy.py +36 -0
  263. tests/test_crca_core_identify.py +18 -0
  264. tests/test_crca_core_intervention_design.py +36 -0
  265. tests/test_crca_core_linear_gaussian_scm.py +69 -0
  266. tests/test_crca_core_spec.py +25 -0
  267. tests/test_crca_core_timeseries_pcmci.py +15 -0
  268. tests/test_crca_llm_coauthor.py +12 -0
  269. tests/test_crca_llm_orchestrator.py +80 -0
  270. tests/test_hybrid_agent_llm_enhanced.py +556 -0
  271. tests/test_image_annotation_demo.py +376 -0
  272. tests/test_image_annotation_operational.py +408 -0
  273. tests/test_image_annotation_unit.py +551 -0
  274. tests/test_training_moe.py +13 -0
  275. training/__init__.py +42 -0
  276. training/datasets.py +140 -0
  277. training/deepspeed_zero2_0_5b.json +22 -0
  278. training/deepspeed_zero2_1_5b.json +22 -0
  279. training/deepspeed_zero3_0_5b.json +28 -0
  280. training/deepspeed_zero3_14b.json +28 -0
  281. training/deepspeed_zero3_h100_3gpu.json +20 -0
  282. training/deepspeed_zero3_offload.json +28 -0
  283. training/eval.py +92 -0
  284. training/finetune.py +516 -0
  285. training/public_datasets.py +89 -0
  286. training_data/react_train.jsonl +7473 -0
  287. utils/agent_discovery.py +311 -0
  288. utils/batch_processor.py +317 -0
  289. utils/conversation.py +78 -0
  290. utils/edit_distance.py +118 -0
  291. utils/formatter.py +33 -0
  292. utils/graph_reasoner.py +530 -0
  293. utils/rate_limiter.py +283 -0
  294. utils/router.py +2 -2
  295. utils/tool_discovery.py +307 -0
  296. webui/__init__.py +10 -0
  297. webui/app.py +229 -0
  298. webui/config.py +104 -0
  299. webui/static/css/style.css +332 -0
  300. webui/static/js/main.js +284 -0
  301. webui/templates/index.html +42 -0
  302. tests/test_crca_excel.py +0 -166
  303. tests/test_data_broker.py +0 -424
  304. tests/test_palantir.py +0 -349
  305. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
  306. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
crca_core/core/api.py ADDED
@@ -0,0 +1,120 @@
1
+ """Public API functions for the H1 `crca_core`.
2
+
3
+ These functions provide the stable, refusal-first entry points that other
4
+ layers (including LLM tooling) should call.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any, Dict, Optional
10
+
11
+ from crca_core.core.estimate import EstimatorConfig, estimate_effect_dowhy
12
+ from crca_core.identify import identify_effect
13
+ from crca_core.core.intervention_design import (
14
+ FeasibilityConstraints,
15
+ TargetQuery,
16
+ design_intervention,
17
+ )
18
+ from crca_core.models.provenance import ProvenanceManifest
19
+ from crca_core.models.refusal import RefusalChecklistItem, RefusalReasonCode, RefusalResult
20
+ from crca_core.models.result import CounterfactualResult
21
+ from crca_core.scm import LinearGaussianSCM
22
+ from crca_core.models.spec import DraftSpec, LockedSpec
23
+ from crca_core.timeseries.pcmci import PCMCIConfig, discover_timeseries_pcmci
24
+ from crca_core.discovery.tabular import TabularDiscoveryConfig, discover_tabular
25
+ from utils.canonical import stable_hash
26
+
27
+
28
+ def simulate_counterfactual(
29
+ *,
30
+ locked_spec: LockedSpec,
31
+ factual_observation: Dict[str, float],
32
+ intervention: Dict[str, float],
33
+ allow_partial_observation: bool = False,
34
+ ) -> CounterfactualResult | RefusalResult:
35
+ """Simulate a counterfactual under an explicit SCM (required).
36
+
37
+ Refuses if `locked_spec.scm` is missing.
38
+ """
39
+
40
+ if locked_spec.scm is None:
41
+ return RefusalResult(
42
+ message="Counterfactuals require an explicit SCMSpec (structural equations + noise model).",
43
+ reason_codes=[RefusalReasonCode.NO_SCM_FOR_COUNTERFACTUAL],
44
+ checklist=[
45
+ RefusalChecklistItem(
46
+ item="Provide SCMSpec",
47
+ rationale="A DAG alone does not define counterfactual semantics; SCM is required.",
48
+ )
49
+ ],
50
+ suggested_next_steps=[
51
+ "Attach a SCMSpec (e.g., linear_gaussian) to the spec, then re-lock and retry."
52
+ ],
53
+ )
54
+
55
+ scm = LinearGaussianSCM.from_spec(locked_spec.scm)
56
+ try:
57
+ u = scm.abduce_noise(factual_observation, allow_partial=allow_partial_observation)
58
+ except ValueError as exc:
59
+ return RefusalResult(
60
+ message=str(exc),
61
+ reason_codes=[RefusalReasonCode.INPUT_INVALID],
62
+ checklist=[
63
+ RefusalChecklistItem(
64
+ item="Provide complete factual observation",
65
+ rationale="Counterfactuals require abduction for all endogenous variables in v1.0 unless partial mode is enabled.",
66
+ )
67
+ ],
68
+ suggested_next_steps=[
69
+ "Provide all endogenous variables or set allow_partial_observation=True (partial mode)."
70
+ ],
71
+ )
72
+ cf = scm.predict(u, interventions=intervention)
73
+
74
+ prov = ProvenanceManifest.minimal(
75
+ spec_hash=stable_hash(
76
+ {
77
+ "spec_hash": locked_spec.spec_hash,
78
+ "module": "simulate_counterfactual",
79
+ "intervention": intervention,
80
+ "factual_keys": sorted(list(factual_observation.keys())),
81
+ }
82
+ )
83
+ )
84
+
85
+ return CounterfactualResult(
86
+ provenance=prov,
87
+ assumptions=[
88
+ "SCM structure and parameters are correct (strong assumption).",
89
+ "Factual observation includes all endogenous variables for abduction in v1.0 unless partial mode is enabled.",
90
+ ],
91
+ limitations=[
92
+ "v0.1 counterfactuals require a fully observed system (no missing variables).",
93
+ "Only linear-Gaussian SCMs are supported in v0.1.",
94
+ ],
95
+ counterfactual={"factual": dict(factual_observation), "do": dict(intervention), "result": cf},
96
+ )
97
+
98
+
99
+ __all__ = [
100
+ # Core lifecycle
101
+ "DraftSpec",
102
+ "LockedSpec",
103
+ # Identification
104
+ "identify_effect",
105
+ # Discovery
106
+ "TabularDiscoveryConfig",
107
+ "discover_tabular",
108
+ "PCMCIConfig",
109
+ "discover_timeseries_pcmci",
110
+ # Design
111
+ "TargetQuery",
112
+ "FeasibilityConstraints",
113
+ "design_intervention",
114
+ # Counterfactuals
115
+ "simulate_counterfactual",
116
+ # Estimation
117
+ "EstimatorConfig",
118
+ "estimate_effect_dowhy",
119
+ ]
120
+
@@ -0,0 +1,208 @@
1
+ """Identification → estimation → refutation wrapper (DoWhy).
2
+
3
+ This is supporting infrastructure for causal R&D. It is gated behind:
4
+ - LockedSpec
5
+ - explicit treatment/outcome
6
+ - DoWhy identification success
7
+
8
+ Refuters do not prove causality; they are diagnostics and must be surfaced.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from typing import Any, Dict, List, Optional
14
+
15
+ from pydantic import BaseModel, Field
16
+
17
+ from crca_core.models.provenance import ProvenanceManifest
18
+ from crca_core.models.refusal import RefusalChecklistItem, RefusalReasonCode, RefusalResult
19
+ from crca_core.models.result import EstimateResult, IdentificationResult
20
+ from crca_core.models.spec import LockedSpec
21
+ from utils.canonical import stable_hash
22
+
23
+
24
+ class EstimatorConfig(BaseModel):
25
+ method_name: str = Field(default="backdoor.linear_regression")
26
+ test_significance: bool = True
27
+ confidence_intervals: bool = True
28
+ refuters: List[str] = Field(
29
+ default_factory=lambda: ["placebo_treatment_refuter", "random_common_cause", "subset_refuter"]
30
+ )
31
+
32
+
33
+ def _ensure_networkx_compat() -> None:
34
+ """Patch NetworkX API differences required by DoWhy.
35
+
36
+ DoWhy versions in the wild have referenced `networkx.algorithms.d_separated`,
37
+ while NetworkX >=3.6 provides `networkx.algorithms.d_separation.is_d_separator`.
38
+ """
39
+ try:
40
+ import networkx as nx # type: ignore
41
+ import networkx.algorithms.d_separation as ds # type: ignore
42
+
43
+ if not hasattr(nx.algorithms, "d_separated") and hasattr(ds, "is_d_separator"):
44
+ setattr(nx.algorithms, "d_separated", ds.is_d_separator)
45
+ except Exception:
46
+ # If networkx isn't available, DoWhy will fail later anyway.
47
+ return
48
+
49
+
50
+ def _graph_to_dot(spec: LockedSpec) -> str:
51
+ # Minimal DOT string compatible with DoWhy's graph parser.
52
+ edges = spec.graph.edges
53
+ lines = ["digraph {"]
54
+ for n in spec.graph.nodes:
55
+ lines.append(f' "{n.name}";')
56
+ for e in edges:
57
+ lines.append(f' "{e.source}" -> "{e.target}";')
58
+ lines.append("}")
59
+ return "\n".join(lines)
60
+
61
+
62
+ def estimate_effect_dowhy(
63
+ *,
64
+ data: Any,
65
+ locked_spec: LockedSpec,
66
+ treatment: str,
67
+ outcome: str,
68
+ identification_result: IdentificationResult | None = None,
69
+ config: Optional[EstimatorConfig] = None,
70
+ ) -> EstimateResult | RefusalResult:
71
+ """Run DoWhy identify→estimate→refute and return a structured result."""
72
+
73
+ cfg = config or EstimatorConfig()
74
+
75
+ # Basic gating
76
+ if not treatment or not outcome:
77
+ return RefusalResult(
78
+ message="Treatment and outcome must be provided.",
79
+ reason_codes=[RefusalReasonCode.INPUT_INVALID],
80
+ checklist=[
81
+ RefusalChecklistItem(item="Provide treatment", rationale="Required to define the estimand."),
82
+ RefusalChecklistItem(item="Provide outcome", rationale="Required to define the estimand."),
83
+ ],
84
+ suggested_next_steps=["Pass treatment='X', outcome='Y'."],
85
+ )
86
+
87
+ if identification_result is None:
88
+ return RefusalResult(
89
+ message="Estimation requires an IdentificationResult.",
90
+ reason_codes=[RefusalReasonCode.INPUT_INVALID],
91
+ checklist=[
92
+ RefusalChecklistItem(
93
+ item="Run identify_effect() first",
94
+ rationale="Estimation is gated behind validated identifiability.",
95
+ )
96
+ ],
97
+ suggested_next_steps=["Call identify_effect() and pass its result here."],
98
+ )
99
+
100
+ if identification_result.method == "not_identifiable":
101
+ return RefusalResult(
102
+ message="Cannot estimate: identification failed.",
103
+ reason_codes=[RefusalReasonCode.NOT_IDENTIFIABLE],
104
+ checklist=[
105
+ RefusalChecklistItem(
106
+ item="Revise causal model or collect additional data/interventions",
107
+ rationale="Identification result indicates non-identifiability.",
108
+ )
109
+ ],
110
+ suggested_next_steps=["Use design_intervention() to propose identifying experiments."],
111
+ )
112
+
113
+ dot = _graph_to_dot(locked_spec)
114
+ prov = ProvenanceManifest.minimal(
115
+ spec_hash=stable_hash(
116
+ {
117
+ "spec_hash": locked_spec.spec_hash,
118
+ "treatment": treatment,
119
+ "outcome": outcome,
120
+ "method": cfg.model_dump(),
121
+ "graph": dot,
122
+ "module": "dowhy_pipeline",
123
+ }
124
+ ),
125
+ algorithm_config=cfg.model_dump(),
126
+ )
127
+
128
+ try:
129
+ import pandas as pd # type: ignore
130
+
131
+ if not isinstance(data, pd.DataFrame):
132
+ return RefusalResult(
133
+ message="DoWhy estimation requires a pandas DataFrame.",
134
+ reason_codes=[RefusalReasonCode.INPUT_INVALID],
135
+ checklist=[RefusalChecklistItem(item="Provide pandas DataFrame", rationale="DoWhy expects tabular data.")],
136
+ suggested_next_steps=["Convert your data to pandas.DataFrame and retry."],
137
+ )
138
+ except Exception as e: # pragma: no cover
139
+ return RefusalResult(
140
+ message=f"pandas is required for estimation: {e}",
141
+ reason_codes=[RefusalReasonCode.UNSUPPORTED_OPERATION],
142
+ checklist=[RefusalChecklistItem(item="Install pandas", rationale="Required dependency for tabular estimation.")],
143
+ suggested_next_steps=["pip install pandas"],
144
+ )
145
+
146
+ try:
147
+ _ensure_networkx_compat()
148
+ from dowhy import CausalModel # type: ignore
149
+ except Exception as e:
150
+ return RefusalResult(
151
+ message=f"DoWhy not available: {e}",
152
+ reason_codes=[RefusalReasonCode.UNSUPPORTED_OPERATION],
153
+ checklist=[RefusalChecklistItem(item="Install dowhy", rationale="Required for this estimation pipeline.")],
154
+ suggested_next_steps=["pip install dowhy"],
155
+ )
156
+
157
+ model = CausalModel(data=data, treatment=treatment, outcome=outcome, graph=dot)
158
+ identified_estimand = model.identify_effect()
159
+
160
+ # If identification fails, DoWhy usually still returns an object; we gate on its string.
161
+ if identified_estimand is None:
162
+ return RefusalResult(
163
+ message="Causal effect not identifiable under the provided graph/assumptions.",
164
+ reason_codes=[RefusalReasonCode.NOT_IDENTIFIABLE],
165
+ checklist=[
166
+ RefusalChecklistItem(
167
+ item="Revise the causal model or collect additional data/interventions",
168
+ rationale="Effect identification failed.",
169
+ )
170
+ ],
171
+ suggested_next_steps=["Use design_intervention() to propose identifying experiments."],
172
+ )
173
+
174
+ estimate = model.estimate_effect(
175
+ identified_estimand,
176
+ method_name=cfg.method_name,
177
+ test_significance=cfg.test_significance,
178
+ confidence_intervals=cfg.confidence_intervals,
179
+ )
180
+
181
+ refutations: Dict[str, Any] = {}
182
+ for refuter in cfg.refuters:
183
+ try:
184
+ ref = model.refute_estimate(identified_estimand, estimate, method_name=refuter)
185
+ refutations[refuter] = str(ref)
186
+ except Exception as e:
187
+ refutations[refuter] = {"error": str(e)}
188
+
189
+ return EstimateResult(
190
+ provenance=prov,
191
+ assumptions=[
192
+ "Causal graph is correctly specified (strong assumption).",
193
+ "Estimator assumptions depend on the chosen method (see DoWhy).",
194
+ ],
195
+ limitations=[
196
+ "Refutation tests are diagnostics; passing does not prove causality.",
197
+ "Estimation quality depends on overlap/positivity and measurement quality.",
198
+ ],
199
+ estimate={
200
+ "value": float(getattr(estimate, "value", float("nan"))),
201
+ "method_name": cfg.method_name,
202
+ "estimand": str(identified_estimand),
203
+ "raw_estimate": str(estimate),
204
+ },
205
+ refutations=refutations,
206
+ artifacts={"identification_result": identification_result.model_dump()},
207
+ )
208
+
@@ -0,0 +1,72 @@
1
+ """CausalCore GodClass: consolidated core API facade."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+ from crca_core.core.api import (
8
+ EstimatorConfig,
9
+ FeasibilityConstraints,
10
+ PCMCIConfig,
11
+ TabularDiscoveryConfig,
12
+ TargetQuery,
13
+ design_intervention,
14
+ discover_tabular,
15
+ discover_timeseries_pcmci,
16
+ identify_effect,
17
+ estimate_effect_dowhy,
18
+ simulate_counterfactual,
19
+ )
20
+ from crca_core.models.result import AnyResult, IdentificationResult
21
+ from crca_core.models.spec import LockedSpec
22
+
23
+
24
+ class CausalCoreGod:
25
+ """Single class exposing all core causal operations."""
26
+
27
+ def __init__(self) -> None:
28
+ self.last_identification: Optional[IdentificationResult] = None
29
+
30
+ def identify(self, *, locked_spec: LockedSpec, treatment: str, outcome: str) -> AnyResult:
31
+ res = identify_effect(locked_spec=locked_spec, treatment=treatment, outcome=outcome)
32
+ if isinstance(res, IdentificationResult):
33
+ self.last_identification = res
34
+ return res
35
+
36
+ def estimate(self, *, data: Any, locked_spec: LockedSpec, treatment: str, outcome: str) -> AnyResult:
37
+ return estimate_effect_dowhy(
38
+ data=data,
39
+ locked_spec=locked_spec,
40
+ treatment=treatment,
41
+ outcome=outcome,
42
+ identification_result=self.last_identification,
43
+ config=EstimatorConfig(),
44
+ )
45
+
46
+ def counterfactual(
47
+ self,
48
+ *,
49
+ locked_spec: LockedSpec,
50
+ factual_observation: Dict[str, float],
51
+ intervention: Dict[str, float],
52
+ allow_partial_observation: bool = False,
53
+ ) -> AnyResult:
54
+ return simulate_counterfactual(
55
+ locked_spec=locked_spec,
56
+ factual_observation=factual_observation,
57
+ intervention=intervention,
58
+ allow_partial_observation=allow_partial_observation,
59
+ )
60
+
61
+ def design_intervention(self, *, locked_spec: LockedSpec, target_query: TargetQuery) -> AnyResult:
62
+ return design_intervention(
63
+ locked_spec=locked_spec,
64
+ target_query=target_query,
65
+ constraints=FeasibilityConstraints(),
66
+ )
67
+
68
+ def discover_tabular(self, *, data: Any) -> AnyResult:
69
+ return discover_tabular(data, TabularDiscoveryConfig())
70
+
71
+ def discover_timeseries(self, *, data: Any) -> AnyResult:
72
+ return discover_timeseries_pcmci(data, PCMCIConfig())
@@ -0,0 +1,174 @@
1
+ """Intervention/experiment design (v0.1: graphical, non-probabilistic).
2
+
3
+ This module is intentionally conservative:
4
+ - It does not invent numeric information gain.
5
+ - It produces structured candidate designs with explicit prerequisites and rationale.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any, Dict, List, Literal, Optional
11
+
12
+ from pydantic import BaseModel, Field
13
+
14
+ from crca_core.models.provenance import ProvenanceManifest
15
+ from crca_core.models.result import InterventionDesignResult
16
+ from utils.canonical import stable_hash
17
+
18
+
19
+ class TargetQuery(BaseModel):
20
+ query_type: Literal["identify_effect", "reduce_uncertainty_edge"] = "identify_effect"
21
+ treatment: Optional[str] = None
22
+ outcome: Optional[str] = None
23
+ edge_source: Optional[str] = None
24
+ edge_target: Optional[str] = None
25
+
26
+
27
+ class FeasibilityConstraints(BaseModel):
28
+ manipulable_variables: List[str] = Field(default_factory=list)
29
+ observable_variables: List[str] = Field(default_factory=list)
30
+ costs: Dict[str, Any] = Field(default_factory=dict)
31
+ ethics_notes: Optional[str] = None
32
+
33
+
34
+ class DesignCandidate(BaseModel):
35
+ design_type: str
36
+ mechanism: str
37
+ prerequisites: List[str] = Field(default_factory=list)
38
+ feasibility_inputs_needed: List[str] = Field(default_factory=list)
39
+ notes: Optional[str] = None
40
+
41
+
42
+ def _parents_of(graph_edges, node: str) -> List[str]:
43
+ res: List[str] = []
44
+ for e in graph_edges:
45
+ if e.target == node:
46
+ res.append(e.source)
47
+ return res
48
+
49
+
50
+ def design_intervention(
51
+ *,
52
+ locked_spec: Any,
53
+ target_query: TargetQuery,
54
+ constraints: Optional[FeasibilityConstraints] = None,
55
+ ) -> InterventionDesignResult:
56
+ """Propose interventions/measurements to support identifiability or hypothesis discrimination.
57
+
58
+ v0.1 is graphical and deliberately non-numeric. It does not claim identifiability;
59
+ it produces designs and checklists that *could* help under explicit assumptions.
60
+ """
61
+
62
+ constraints = constraints or FeasibilityConstraints()
63
+
64
+ # Provenance: we hash only the spec hash + query + constraints (no raw data).
65
+ spec_hash = getattr(locked_spec, "spec_hash", "unknown")
66
+ prov = ProvenanceManifest.minimal(
67
+ spec_hash=stable_hash(
68
+ {
69
+ "spec_hash": spec_hash,
70
+ "target_query": target_query.model_dump(),
71
+ "constraints": constraints.model_dump(),
72
+ "module": "intervention_design_v0.1",
73
+ }
74
+ )
75
+ )
76
+
77
+ designs: List[Dict[str, Any]] = []
78
+
79
+ if target_query.query_type == "identify_effect":
80
+ X = target_query.treatment
81
+ Y = target_query.outcome
82
+ if not X or not Y:
83
+ return InterventionDesignResult(
84
+ provenance=prov,
85
+ assumptions=[],
86
+ limitations=["Missing treatment/outcome in target_query."],
87
+ designs=[],
88
+ )
89
+
90
+ # 1) Randomize treatment if feasible
91
+ if X in constraints.manipulable_variables:
92
+ designs.append(
93
+ DesignCandidate(
94
+ design_type="randomize_treatment",
95
+ mechanism=f"Randomize {X} to break backdoor confounding when estimating effect on {Y}.",
96
+ prerequisites=[
97
+ "Well-defined intervention on treatment (consistency/SUTVA).",
98
+ "No interference between units (SUTVA).",
99
+ "Feasible randomization protocol and compliance monitoring.",
100
+ ],
101
+ feasibility_inputs_needed=["sample_size", "randomization_unit", "ethical_constraints"],
102
+ ).model_dump()
103
+ )
104
+
105
+ # 2) Measure candidate confounders (parents of treatment in the current draft DAG)
106
+ parents_x = _parents_of(locked_spec.graph.edges, X)
107
+ if parents_x:
108
+ designs.append(
109
+ DesignCandidate(
110
+ design_type="measure_confounder_candidates",
111
+ mechanism=(
112
+ f"Measure candidate confounders {parents_x} because they are modeled as direct causes of {X}; "
113
+ f"conditioning/adjusting may help estimate effect of {X} on {Y} under exchangeability."
114
+ ),
115
+ prerequisites=[
116
+ "Candidate confounders are measured without severe error or are modeled as proxies.",
117
+ "Exchangeability holds conditional on measured covariates (assumption).",
118
+ "Positivity/overlap holds in the collected data.",
119
+ ],
120
+ feasibility_inputs_needed=["measurement_instrument_quality", "data_collection_costs"],
121
+ notes="This does not guarantee identifiability; it is a measurement suggestion grounded in the current graph hypothesis.",
122
+ ).model_dump()
123
+ )
124
+
125
+ # 3) Instrument design if user provided candidate instruments
126
+ if getattr(locked_spec.roles, "instruments", []):
127
+ Zs = list(getattr(locked_spec.roles, "instruments", []))
128
+ designs.append(
129
+ DesignCandidate(
130
+ design_type="instrument_design",
131
+ mechanism=f"Collect/create instrument(s) {Zs} to identify effect of {X} on {Y} under IV assumptions.",
132
+ prerequisites=[
133
+ "Relevance: Z affects X.",
134
+ "Exclusion: Z affects Y only through X.",
135
+ "Independence: Z is independent of unmeasured causes of Y.",
136
+ ],
137
+ feasibility_inputs_needed=["instrument_source", "exclusion_justification"],
138
+ ).model_dump()
139
+ )
140
+
141
+ elif target_query.query_type == "reduce_uncertainty_edge":
142
+ s = target_query.edge_source
143
+ t = target_query.edge_target
144
+ if not s or not t:
145
+ return InterventionDesignResult(
146
+ provenance=prov,
147
+ assumptions=[],
148
+ limitations=["Missing edge_source/edge_target in target_query."],
149
+ designs=[],
150
+ )
151
+
152
+ if s in constraints.manipulable_variables:
153
+ designs.append(
154
+ DesignCandidate(
155
+ design_type="perturb_source",
156
+ mechanism=f"Intervene on {s} (do({s}=...)) and observe downstream changes in {t} to test the edge hypothesis {s}→{t}.",
157
+ prerequisites=[
158
+ "Well-defined intervention on source variable.",
159
+ "No simultaneous changes to other upstream causes (or they are measured/controlled).",
160
+ ],
161
+ feasibility_inputs_needed=["intervention_range", "measurement_frequency", "time_horizon"],
162
+ ).model_dump()
163
+ )
164
+
165
+ return InterventionDesignResult(
166
+ provenance=prov,
167
+ assumptions=[],
168
+ limitations=[
169
+ "v0.1 design is graphical and non-probabilistic; it does not compute numeric information gain.",
170
+ "Suggestions depend on the correctness of the locked causal graph/spec assumptions.",
171
+ ],
172
+ designs=designs,
173
+ )
174
+
@@ -0,0 +1,48 @@
1
+ """Spec lifecycle: DraftSpec → LockedSpec.
2
+
3
+ The LockedSpec is a scientific boundary: only LockedSpec may be used for numeric
4
+ causal outputs. This module enforces that boundary.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime, timezone
10
+ from typing import List
11
+
12
+ from crca_core.models.spec import DraftSpec, LockedSpec
13
+ from utils.canonical import stable_hash
14
+
15
+
16
+ def lock_spec(draft: DraftSpec, approvals: List[str]) -> LockedSpec:
17
+ """Lock a draft spec by hashing its canonical content and recording approvals.
18
+
19
+ Args:
20
+ draft: The draft specification (possibly LLM-generated).
21
+ approvals: Human (or explicit programmatic) approvals. Must be non-empty.
22
+
23
+ Returns:
24
+ LockedSpec
25
+
26
+ Raises:
27
+ ValueError: If approvals are empty.
28
+ """
29
+
30
+ if not approvals:
31
+ raise ValueError("approvals must be non-empty to lock a spec")
32
+
33
+ # Canonicalize via stable_hash over model_dump
34
+ draft_payload = draft.model_dump()
35
+ spec_hash = stable_hash(draft_payload)
36
+ locked_at = datetime.now(timezone.utc).isoformat()
37
+
38
+ return LockedSpec(
39
+ spec_hash=spec_hash,
40
+ approvals=list(approvals),
41
+ locked_at_utc=locked_at,
42
+ data=draft.data,
43
+ graph=draft.graph,
44
+ roles=draft.roles,
45
+ assumptions=draft.assumptions,
46
+ scm=draft.scm,
47
+ )
48
+
@@ -0,0 +1,9 @@
1
+ """Causal discovery (hypothesis generation) modules.
2
+
3
+ Discovery outputs are hypotheses under explicit assumptions; they are not truth.
4
+ """
5
+
6
+ from crca_core.discovery.tabular import TabularDiscoveryConfig, discover_tabular
7
+
8
+ __all__ = ["TabularDiscoveryConfig", "discover_tabular"]
9
+