crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. CRCA.py +172 -7
  2. MODEL_CARD.md +53 -0
  3. PKG-INFO +8 -2
  4. RELEASE_NOTES.md +17 -0
  5. STABILITY.md +19 -0
  6. architecture/hybrid/consistency_engine.py +362 -0
  7. architecture/hybrid/conversation_manager.py +421 -0
  8. architecture/hybrid/explanation_generator.py +452 -0
  9. architecture/hybrid/few_shot_learner.py +533 -0
  10. architecture/hybrid/graph_compressor.py +286 -0
  11. architecture/hybrid/hybrid_agent.py +4398 -0
  12. architecture/hybrid/language_compiler.py +623 -0
  13. architecture/hybrid/main,py +0 -0
  14. architecture/hybrid/reasoning_tracker.py +322 -0
  15. architecture/hybrid/self_verifier.py +524 -0
  16. architecture/hybrid/task_decomposer.py +567 -0
  17. architecture/hybrid/text_corrector.py +341 -0
  18. benchmark_results/crca_core_benchmarks.json +178 -0
  19. branches/crca_sd/crca_sd_realtime.py +6 -2
  20. branches/general_agent/__init__.py +102 -0
  21. branches/general_agent/general_agent.py +1400 -0
  22. branches/general_agent/personality.py +169 -0
  23. branches/general_agent/utils/__init__.py +19 -0
  24. branches/general_agent/utils/prompt_builder.py +170 -0
  25. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
  26. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
  27. crca_core/__init__.py +35 -0
  28. crca_core/benchmarks/__init__.py +14 -0
  29. crca_core/benchmarks/synthetic_scm.py +103 -0
  30. crca_core/core/__init__.py +23 -0
  31. crca_core/core/api.py +120 -0
  32. crca_core/core/estimate.py +208 -0
  33. crca_core/core/godclass.py +72 -0
  34. crca_core/core/intervention_design.py +174 -0
  35. crca_core/core/lifecycle.py +48 -0
  36. crca_core/discovery/__init__.py +9 -0
  37. crca_core/discovery/tabular.py +193 -0
  38. crca_core/identify/__init__.py +171 -0
  39. crca_core/identify/backdoor.py +39 -0
  40. crca_core/identify/frontdoor.py +48 -0
  41. crca_core/identify/graph.py +106 -0
  42. crca_core/identify/id_algorithm.py +43 -0
  43. crca_core/identify/iv.py +48 -0
  44. crca_core/models/__init__.py +67 -0
  45. crca_core/models/provenance.py +56 -0
  46. crca_core/models/refusal.py +39 -0
  47. crca_core/models/result.py +83 -0
  48. crca_core/models/spec.py +151 -0
  49. crca_core/models/validation.py +68 -0
  50. crca_core/scm/__init__.py +9 -0
  51. crca_core/scm/linear_gaussian.py +198 -0
  52. crca_core/timeseries/__init__.py +6 -0
  53. crca_core/timeseries/pcmci.py +181 -0
  54. crca_llm/__init__.py +12 -0
  55. crca_llm/client.py +85 -0
  56. crca_llm/coauthor.py +118 -0
  57. crca_llm/orchestrator.py +289 -0
  58. crca_llm/types.py +21 -0
  59. crca_reasoning/__init__.py +16 -0
  60. crca_reasoning/critique.py +54 -0
  61. crca_reasoning/godclass.py +206 -0
  62. crca_reasoning/memory.py +24 -0
  63. crca_reasoning/rationale.py +10 -0
  64. crca_reasoning/react_controller.py +81 -0
  65. crca_reasoning/tool_router.py +97 -0
  66. crca_reasoning/types.py +40 -0
  67. crca_sd/__init__.py +15 -0
  68. crca_sd/crca_sd_core.py +2 -0
  69. crca_sd/crca_sd_governance.py +2 -0
  70. crca_sd/crca_sd_mpc.py +2 -0
  71. crca_sd/crca_sd_realtime.py +2 -0
  72. crca_sd/crca_sd_tui.py +2 -0
  73. cuda-keyring_1.1-1_all.deb +0 -0
  74. cuda-keyring_1.1-1_all.deb.1 +0 -0
  75. docs/IMAGE_ANNOTATION_USAGE.md +539 -0
  76. docs/INSTALL_DEEPSPEED.md +125 -0
  77. docs/api/branches/crca-cg.md +19 -0
  78. docs/api/branches/crca-q.md +27 -0
  79. docs/api/branches/crca-sd.md +37 -0
  80. docs/api/branches/general-agent.md +24 -0
  81. docs/api/branches/overview.md +19 -0
  82. docs/api/crca/agent-methods.md +62 -0
  83. docs/api/crca/operations.md +79 -0
  84. docs/api/crca/overview.md +32 -0
  85. docs/api/image-annotation/engine.md +52 -0
  86. docs/api/image-annotation/overview.md +17 -0
  87. docs/api/schemas/annotation.md +34 -0
  88. docs/api/schemas/core-schemas.md +82 -0
  89. docs/api/schemas/overview.md +32 -0
  90. docs/api/schemas/policy.md +30 -0
  91. docs/api/utils/conversation.md +22 -0
  92. docs/api/utils/graph-reasoner.md +32 -0
  93. docs/api/utils/overview.md +21 -0
  94. docs/api/utils/router.md +19 -0
  95. docs/api/utils/utilities.md +97 -0
  96. docs/architecture/causal-graphs.md +41 -0
  97. docs/architecture/data-flow.md +29 -0
  98. docs/architecture/design-principles.md +33 -0
  99. docs/architecture/hybrid-agent/components.md +38 -0
  100. docs/architecture/hybrid-agent/consistency.md +26 -0
  101. docs/architecture/hybrid-agent/overview.md +44 -0
  102. docs/architecture/hybrid-agent/reasoning.md +22 -0
  103. docs/architecture/llm-integration.md +26 -0
  104. docs/architecture/modular-structure.md +37 -0
  105. docs/architecture/overview.md +69 -0
  106. docs/architecture/policy-engine-arch.md +29 -0
  107. docs/branches/crca-cg/corposwarm.md +39 -0
  108. docs/branches/crca-cg/esg-scoring.md +30 -0
  109. docs/branches/crca-cg/multi-agent.md +35 -0
  110. docs/branches/crca-cg/overview.md +40 -0
  111. docs/branches/crca-q/alternative-data.md +55 -0
  112. docs/branches/crca-q/architecture.md +71 -0
  113. docs/branches/crca-q/backtesting.md +45 -0
  114. docs/branches/crca-q/causal-engine.md +33 -0
  115. docs/branches/crca-q/execution.md +39 -0
  116. docs/branches/crca-q/market-data.md +60 -0
  117. docs/branches/crca-q/overview.md +58 -0
  118. docs/branches/crca-q/philosophy.md +60 -0
  119. docs/branches/crca-q/portfolio-optimization.md +66 -0
  120. docs/branches/crca-q/risk-management.md +102 -0
  121. docs/branches/crca-q/setup.md +65 -0
  122. docs/branches/crca-q/signal-generation.md +61 -0
  123. docs/branches/crca-q/signal-validation.md +43 -0
  124. docs/branches/crca-sd/core.md +84 -0
  125. docs/branches/crca-sd/governance.md +53 -0
  126. docs/branches/crca-sd/mpc-solver.md +65 -0
  127. docs/branches/crca-sd/overview.md +59 -0
  128. docs/branches/crca-sd/realtime.md +28 -0
  129. docs/branches/crca-sd/tui.md +20 -0
  130. docs/branches/general-agent/overview.md +37 -0
  131. docs/branches/general-agent/personality.md +36 -0
  132. docs/branches/general-agent/prompt-builder.md +30 -0
  133. docs/changelog/index.md +79 -0
  134. docs/contributing/code-style.md +69 -0
  135. docs/contributing/documentation.md +43 -0
  136. docs/contributing/overview.md +29 -0
  137. docs/contributing/testing.md +29 -0
  138. docs/core/crcagent/async-operations.md +65 -0
  139. docs/core/crcagent/automatic-extraction.md +107 -0
  140. docs/core/crcagent/batch-prediction.md +80 -0
  141. docs/core/crcagent/bayesian-inference.md +60 -0
  142. docs/core/crcagent/causal-graph.md +92 -0
  143. docs/core/crcagent/counterfactuals.md +96 -0
  144. docs/core/crcagent/deterministic-simulation.md +78 -0
  145. docs/core/crcagent/dual-mode-operation.md +82 -0
  146. docs/core/crcagent/initialization.md +88 -0
  147. docs/core/crcagent/optimization.md +65 -0
  148. docs/core/crcagent/overview.md +63 -0
  149. docs/core/crcagent/time-series.md +57 -0
  150. docs/core/schemas/annotation.md +30 -0
  151. docs/core/schemas/core-schemas.md +82 -0
  152. docs/core/schemas/overview.md +30 -0
  153. docs/core/schemas/policy.md +41 -0
  154. docs/core/templates/base-agent.md +31 -0
  155. docs/core/templates/feature-mixins.md +31 -0
  156. docs/core/templates/overview.md +29 -0
  157. docs/core/templates/templates-guide.md +75 -0
  158. docs/core/tools/mcp-client.md +34 -0
  159. docs/core/tools/overview.md +24 -0
  160. docs/core/utils/conversation.md +27 -0
  161. docs/core/utils/graph-reasoner.md +29 -0
  162. docs/core/utils/overview.md +27 -0
  163. docs/core/utils/router.md +27 -0
  164. docs/core/utils/utilities.md +97 -0
  165. docs/css/custom.css +84 -0
  166. docs/examples/basic-usage.md +57 -0
  167. docs/examples/general-agent/general-agent-examples.md +50 -0
  168. docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
  169. docs/examples/image-annotation/image-annotation-examples.md +54 -0
  170. docs/examples/integration/integration-examples.md +58 -0
  171. docs/examples/overview.md +37 -0
  172. docs/examples/trading/trading-examples.md +46 -0
  173. docs/features/causal-reasoning/advanced-topics.md +101 -0
  174. docs/features/causal-reasoning/counterfactuals.md +43 -0
  175. docs/features/causal-reasoning/do-calculus.md +50 -0
  176. docs/features/causal-reasoning/overview.md +47 -0
  177. docs/features/causal-reasoning/structural-models.md +52 -0
  178. docs/features/hybrid-agent/advanced-components.md +55 -0
  179. docs/features/hybrid-agent/core-components.md +64 -0
  180. docs/features/hybrid-agent/overview.md +34 -0
  181. docs/features/image-annotation/engine.md +82 -0
  182. docs/features/image-annotation/features.md +113 -0
  183. docs/features/image-annotation/integration.md +75 -0
  184. docs/features/image-annotation/overview.md +53 -0
  185. docs/features/image-annotation/quickstart.md +73 -0
  186. docs/features/policy-engine/doctrine-ledger.md +105 -0
  187. docs/features/policy-engine/monitoring.md +44 -0
  188. docs/features/policy-engine/mpc-control.md +89 -0
  189. docs/features/policy-engine/overview.md +46 -0
  190. docs/getting-started/configuration.md +225 -0
  191. docs/getting-started/first-agent.md +164 -0
  192. docs/getting-started/installation.md +144 -0
  193. docs/getting-started/quickstart.md +137 -0
  194. docs/index.md +118 -0
  195. docs/js/mathjax.js +13 -0
  196. docs/lrm/discovery_proof_notes.md +25 -0
  197. docs/lrm/finetune_full.md +83 -0
  198. docs/lrm/math_appendix.md +120 -0
  199. docs/lrm/overview.md +32 -0
  200. docs/mkdocs.yml +238 -0
  201. docs/stylesheets/extra.css +21 -0
  202. docs_generated/crca_core/CounterfactualResult.md +12 -0
  203. docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
  204. docs_generated/crca_core/DraftSpec.md +13 -0
  205. docs_generated/crca_core/EstimateResult.md +13 -0
  206. docs_generated/crca_core/IdentificationResult.md +17 -0
  207. docs_generated/crca_core/InterventionDesignResult.md +12 -0
  208. docs_generated/crca_core/LockedSpec.md +15 -0
  209. docs_generated/crca_core/RefusalResult.md +12 -0
  210. docs_generated/crca_core/ValidationReport.md +9 -0
  211. docs_generated/crca_core/index.md +13 -0
  212. examples/general_agent_example.py +277 -0
  213. examples/general_agent_quickstart.py +202 -0
  214. examples/general_agent_simple.py +92 -0
  215. examples/hybrid_agent_auto_extraction.py +84 -0
  216. examples/hybrid_agent_dictionary_demo.py +104 -0
  217. examples/hybrid_agent_enhanced.py +179 -0
  218. examples/hybrid_agent_general_knowledge.py +107 -0
  219. examples/image_annotation_quickstart.py +328 -0
  220. examples/test_hybrid_fixes.py +77 -0
  221. image_annotation/__init__.py +27 -0
  222. image_annotation/annotation_engine.py +2593 -0
  223. install_cuda_wsl2.sh +59 -0
  224. install_deepspeed.sh +56 -0
  225. install_deepspeed_simple.sh +87 -0
  226. mkdocs.yml +252 -0
  227. ollama/Modelfile +8 -0
  228. prompts/__init__.py +2 -1
  229. prompts/default_crca.py +9 -1
  230. prompts/general_agent.py +227 -0
  231. prompts/image_annotation.py +56 -0
  232. pyproject.toml +17 -2
  233. requirements-docs.txt +10 -0
  234. requirements.txt +21 -2
  235. schemas/__init__.py +26 -1
  236. schemas/annotation.py +222 -0
  237. schemas/conversation.py +193 -0
  238. schemas/hybrid.py +211 -0
  239. schemas/reasoning.py +276 -0
  240. schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
  241. schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
  242. schemas_export/crca_core/DraftSpec.schema.json +635 -0
  243. schemas_export/crca_core/EstimateResult.schema.json +113 -0
  244. schemas_export/crca_core/IdentificationResult.schema.json +145 -0
  245. schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
  246. schemas_export/crca_core/LockedSpec.schema.json +646 -0
  247. schemas_export/crca_core/RefusalResult.schema.json +90 -0
  248. schemas_export/crca_core/ValidationReport.schema.json +62 -0
  249. scripts/build_lrm_dataset.py +80 -0
  250. scripts/export_crca_core_schemas.py +54 -0
  251. scripts/export_hf_lrm.py +37 -0
  252. scripts/export_ollama_gguf.py +45 -0
  253. scripts/generate_changelog.py +157 -0
  254. scripts/generate_crca_core_docs_from_schemas.py +86 -0
  255. scripts/run_crca_core_benchmarks.py +163 -0
  256. scripts/run_full_finetune.py +198 -0
  257. scripts/run_lrm_eval.py +31 -0
  258. templates/graph_management.py +29 -0
  259. tests/conftest.py +9 -0
  260. tests/test_core.py +2 -3
  261. tests/test_crca_core_discovery_tabular.py +15 -0
  262. tests/test_crca_core_estimate_dowhy.py +36 -0
  263. tests/test_crca_core_identify.py +18 -0
  264. tests/test_crca_core_intervention_design.py +36 -0
  265. tests/test_crca_core_linear_gaussian_scm.py +69 -0
  266. tests/test_crca_core_spec.py +25 -0
  267. tests/test_crca_core_timeseries_pcmci.py +15 -0
  268. tests/test_crca_llm_coauthor.py +12 -0
  269. tests/test_crca_llm_orchestrator.py +80 -0
  270. tests/test_hybrid_agent_llm_enhanced.py +556 -0
  271. tests/test_image_annotation_demo.py +376 -0
  272. tests/test_image_annotation_operational.py +408 -0
  273. tests/test_image_annotation_unit.py +551 -0
  274. tests/test_training_moe.py +13 -0
  275. training/__init__.py +42 -0
  276. training/datasets.py +140 -0
  277. training/deepspeed_zero2_0_5b.json +22 -0
  278. training/deepspeed_zero2_1_5b.json +22 -0
  279. training/deepspeed_zero3_0_5b.json +28 -0
  280. training/deepspeed_zero3_14b.json +28 -0
  281. training/deepspeed_zero3_h100_3gpu.json +20 -0
  282. training/deepspeed_zero3_offload.json +28 -0
  283. training/eval.py +92 -0
  284. training/finetune.py +516 -0
  285. training/public_datasets.py +89 -0
  286. training_data/react_train.jsonl +7473 -0
  287. utils/agent_discovery.py +311 -0
  288. utils/batch_processor.py +317 -0
  289. utils/conversation.py +78 -0
  290. utils/edit_distance.py +118 -0
  291. utils/formatter.py +33 -0
  292. utils/graph_reasoner.py +530 -0
  293. utils/rate_limiter.py +283 -0
  294. utils/router.py +2 -2
  295. utils/tool_discovery.py +307 -0
  296. webui/__init__.py +10 -0
  297. webui/app.py +229 -0
  298. webui/config.py +104 -0
  299. webui/static/css/style.css +332 -0
  300. webui/static/js/main.js +284 -0
  301. webui/templates/index.html +42 -0
  302. tests/test_crca_excel.py +0 -166
  303. tests/test_data_broker.py +0 -424
  304. tests/test_palantir.py +0 -349
  305. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
  306. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,193 @@
1
+ """Tabular causal discovery (hypothesis generation).
2
+
3
+ Design goals:
4
+ - Wrap established implementations when available (preferred).
5
+ - If required backends are missing, return a structured Refusal (never ad-hoc heuristics).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any, Dict, List, Literal, Optional
11
+
12
+ from pydantic import BaseModel, Field
13
+
14
+ from crca_core.models.provenance import ProvenanceManifest
15
+ from crca_core.models.refusal import RefusalChecklistItem, RefusalReasonCode, RefusalResult
16
+ from crca_core.models.result import DiscoveryHypothesisResult
17
+ from utils.canonical import stable_hash
18
+
19
+
20
+ class TabularDiscoveryConfig(BaseModel):
21
+ algorithm: Literal["pc", "fci", "ges"] = "pc"
22
+ alpha: float = Field(default=0.05, gt=0.0, lt=1.0)
23
+ bootstrap_samples: int = Field(default=0, ge=0)
24
+ ci_test: Literal["fisherz", "gsq", "chisq"] = "fisherz"
25
+ stable: bool = True
26
+ min_samples: int = Field(default=200, ge=20)
27
+ notes: Optional[str] = None
28
+
29
+
30
+ def _backend_available() -> bool:
31
+ try:
32
+ import importlib.util
33
+
34
+ # causal-learn installs as `causallearn`
35
+ return importlib.util.find_spec("causallearn") is not None
36
+ except Exception:
37
+ return False
38
+
39
+
40
+ def discover_tabular(
41
+ data: Any,
42
+ discovery_config: Optional[TabularDiscoveryConfig] = None,
43
+ assumptions: Optional[List[str]] = None,
44
+ ) -> DiscoveryHypothesisResult | RefusalResult:
45
+ """Run tabular causal discovery and return a hypothesis object.
46
+
47
+ Notes:
48
+ - This is hypothesis generation only.
49
+ - If `causal-learn` is not installed, we refuse and provide an actionable checklist.
50
+ """
51
+
52
+ cfg = discovery_config or TabularDiscoveryConfig()
53
+ assumptions = assumptions or []
54
+
55
+ # Compute a lightweight data hash for provenance (schema-level only).
56
+ # We intentionally do not hash raw data values here.
57
+ schema_sig = {}
58
+ try:
59
+ import pandas as pd # type: ignore
60
+
61
+ if isinstance(data, pd.DataFrame):
62
+ schema_sig = {c: str(t) for c, t in data.dtypes.items()}
63
+ else:
64
+ schema_sig = {"type": str(type(data))}
65
+ except Exception:
66
+ schema_sig = {"type": str(type(data))}
67
+
68
+ spec_hash = stable_hash({"discovery": "tabular", "config": cfg.model_dump(), "schema": schema_sig})
69
+ prov = ProvenanceManifest.minimal(
70
+ spec_hash=spec_hash,
71
+ data_hash=stable_hash(schema_sig),
72
+ algorithm_config=cfg.model_dump(),
73
+ )
74
+
75
+ if not _backend_available():
76
+ return RefusalResult(
77
+ message="Tabular causal discovery backend not available.",
78
+ reason_codes=[RefusalReasonCode.UNSUPPORTED_OPERATION],
79
+ checklist=[
80
+ RefusalChecklistItem(
81
+ item="Install causal-learn",
82
+ rationale="Tabular discovery is wrap-first; we refuse rather than run unvalidated heuristics.",
83
+ )
84
+ ],
85
+ suggested_next_steps=["pip install causal-learn"],
86
+ )
87
+
88
+ try:
89
+ import numpy as np # type: ignore
90
+ import pandas as pd # type: ignore
91
+ from causallearn.search.ConstraintBased.PC import pc # type: ignore
92
+ from causallearn.search.ConstraintBased.FCI import fci # type: ignore
93
+ from causallearn.search.ScoreBased.GES import ges # type: ignore
94
+ except Exception as e:
95
+ return RefusalResult(
96
+ message=f"Failed to import causal-learn: {e}",
97
+ reason_codes=[RefusalReasonCode.UNSUPPORTED_OPERATION],
98
+ checklist=[
99
+ RefusalChecklistItem(
100
+ item="Install causal-learn",
101
+ rationale="Tabular discovery requires causal-learn backend.",
102
+ )
103
+ ],
104
+ suggested_next_steps=["pip install causal-learn"],
105
+ )
106
+
107
+ if not isinstance(data, pd.DataFrame):
108
+ return RefusalResult(
109
+ message="Tabular discovery requires pandas DataFrame input.",
110
+ reason_codes=[RefusalReasonCode.INPUT_INVALID],
111
+ checklist=[
112
+ RefusalChecklistItem(
113
+ item="Provide pandas DataFrame",
114
+ rationale="Causal-learn expects tabular numpy/pandas data.",
115
+ )
116
+ ],
117
+ suggested_next_steps=["Convert your data to pandas.DataFrame and retry."],
118
+ )
119
+
120
+ columns = list(data.columns)
121
+ values = data.to_numpy(dtype=float)
122
+ if values.shape[0] < cfg.min_samples:
123
+ return RefusalResult(
124
+ message="Insufficient samples for reliable discovery.",
125
+ reason_codes=[RefusalReasonCode.INPUT_INVALID],
126
+ checklist=[
127
+ RefusalChecklistItem(
128
+ item="Increase sample size",
129
+ rationale=f"Need at least {cfg.min_samples} rows for stable discovery.",
130
+ )
131
+ ],
132
+ suggested_next_steps=["Collect more samples or lower min_samples (not recommended)."],
133
+ )
134
+
135
+ def _run_once() -> Dict[str, Any]:
136
+ if cfg.algorithm == "pc":
137
+ cg = pc(values, alpha=cfg.alpha, indep_test=cfg.ci_test, stable=cfg.stable)
138
+ graph_obj = getattr(cg, "G", cg)
139
+ elif cfg.algorithm == "fci":
140
+ res = fci(values, alpha=cfg.alpha, indep_test=cfg.ci_test)
141
+ graph_obj = res[0] if isinstance(res, (list, tuple)) else res
142
+ graph_obj = getattr(graph_obj, "G", graph_obj)
143
+ else:
144
+ res = ges(values)
145
+ graph_obj = res.get("G") if isinstance(res, dict) else res
146
+ mat = getattr(graph_obj, "graph", None)
147
+ if mat is None:
148
+ return {"graph_type": "unknown", "raw": str(graph_obj)}
149
+ return {
150
+ "graph_type": "causal_learn_matrix",
151
+ "adjacency": np.asarray(mat).tolist(),
152
+ "columns": columns,
153
+ }
154
+
155
+ graph_hypothesis = _run_once()
156
+ stability_report: Dict[str, Any] = {"bootstrap_samples": cfg.bootstrap_samples}
157
+
158
+ if cfg.bootstrap_samples > 0:
159
+ edge_counts = None
160
+ for _ in range(cfg.bootstrap_samples):
161
+ idx = np.random.randint(0, values.shape[0], size=values.shape[0])
162
+ boot_values = values[idx]
163
+ if cfg.algorithm == "pc":
164
+ cg = pc(boot_values, alpha=cfg.alpha, indep_test=cfg.ci_test, stable=cfg.stable)
165
+ graph_obj = getattr(cg, "G", cg)
166
+ elif cfg.algorithm == "fci":
167
+ res = fci(boot_values, alpha=cfg.alpha, indep_test=cfg.ci_test)
168
+ graph_obj = res[0] if isinstance(res, (list, tuple)) else res
169
+ graph_obj = getattr(graph_obj, "G", graph_obj)
170
+ else:
171
+ res = ges(boot_values)
172
+ graph_obj = res.get("G") if isinstance(res, dict) else res
173
+ mat = getattr(graph_obj, "graph", None)
174
+ if mat is None:
175
+ continue
176
+ mat = np.asarray(mat)
177
+ if edge_counts is None:
178
+ edge_counts = np.zeros_like(mat, dtype=float)
179
+ edge_counts += (mat != 0).astype(float)
180
+ if edge_counts is not None and cfg.bootstrap_samples > 0:
181
+ stability_report["edge_frequency"] = (edge_counts / float(cfg.bootstrap_samples)).tolist()
182
+
183
+ return DiscoveryHypothesisResult(
184
+ provenance=prov,
185
+ assumptions=assumptions,
186
+ limitations=[
187
+ "Discovery outputs are hypotheses under assumptions (e.g., faithfulness, causal sufficiency/latent handling).",
188
+ "Returned graph structure depends on CI test assumptions and sample size.",
189
+ ],
190
+ graph_hypothesis=graph_hypothesis,
191
+ stability_report=stability_report,
192
+ )
193
+
@@ -0,0 +1,171 @@
1
+ """Identification entry points for crca_core (in-house)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List, Optional
6
+
7
+ from crca_core.identify.backdoor import find_backdoor_adjustment_set
8
+ from crca_core.identify.frontdoor import find_frontdoor_mediator
9
+ from crca_core.identify.graph import CausalGraph
10
+ from crca_core.identify.id_algorithm import id_algorithm
11
+ from crca_core.identify.iv import find_instrument
12
+ from crca_core.models.provenance import ProvenanceManifest
13
+ from crca_core.models.refusal import RefusalChecklistItem, RefusalReasonCode, RefusalResult
14
+ from crca_core.models.result import IdentificationResult
15
+ from crca_core.models.spec import LockedSpec
16
+ from utils.canonical import stable_hash
17
+
18
+
19
+ def identify_effect(
20
+ *,
21
+ locked_spec: LockedSpec,
22
+ treatment: str,
23
+ outcome: str,
24
+ ) -> IdentificationResult | RefusalResult:
25
+ """Identify an effect using in-house methods (backdoor/frontdoor/IV/ID)."""
26
+ if not treatment or not outcome:
27
+ return RefusalResult(
28
+ message="Treatment and outcome must be provided.",
29
+ reason_codes=[RefusalReasonCode.INPUT_INVALID],
30
+ checklist=[
31
+ RefusalChecklistItem(item="Provide treatment", rationale="Required to define the estimand."),
32
+ RefusalChecklistItem(item="Provide outcome", rationale="Required to define the estimand."),
33
+ ],
34
+ suggested_next_steps=["Pass treatment='X', outcome='Y'."],
35
+ )
36
+
37
+ graph = CausalGraph.from_spec(locked_spec.graph)
38
+ prov = ProvenanceManifest.minimal(
39
+ spec_hash=stable_hash(
40
+ {
41
+ "spec_hash": locked_spec.spec_hash,
42
+ "treatment": treatment,
43
+ "outcome": outcome,
44
+ "module": "identify_effect",
45
+ }
46
+ )
47
+ )
48
+
49
+ # 1) Backdoor
50
+ z = find_backdoor_adjustment_set(graph, treatment, outcome)
51
+ if z is not None:
52
+ expr = f"sum_{{z}} P({outcome}|{treatment},z) P(z)"
53
+ return IdentificationResult(
54
+ provenance=prov,
55
+ method="backdoor",
56
+ scope="partial",
57
+ confidence="medium",
58
+ estimand_expression=expr,
59
+ assumptions_used=[
60
+ "Backdoor criterion holds with the returned adjustment set.",
61
+ "No unmeasured confounding conditional on Z.",
62
+ "Positivity/overlap for adjustment set.",
63
+ ],
64
+ witnesses={"adjustment_set": sorted(list(z))},
65
+ proof={
66
+ "type": "do-calculus",
67
+ "steps": [
68
+ "In G_{X̄}, Z d-separates X and Y (backdoor).",
69
+ "Apply Rule 2 to replace do(X) with observe(X) given Z.",
70
+ ],
71
+ },
72
+ limitations=["Identification assumes all confounding is captured by Z."],
73
+ )
74
+
75
+ # 2) Frontdoor
76
+ mediator = find_frontdoor_mediator(
77
+ graph, treatment, outcome, mediators=locked_spec.roles.mediators
78
+ )
79
+ if mediator is not None:
80
+ expr = (
81
+ f"sum_m P(m|{treatment}) sum_{treatment} P({outcome}|m,{treatment}) P({treatment})"
82
+ )
83
+ return IdentificationResult(
84
+ provenance=prov,
85
+ method="frontdoor",
86
+ scope="partial",
87
+ confidence="medium",
88
+ estimand_expression=expr,
89
+ assumptions_used=[
90
+ "Frontdoor criterion holds for mediator M.",
91
+ "No unmeasured confounding between X and M.",
92
+ "All backdoor paths from M to Y are blocked by X.",
93
+ ],
94
+ witnesses={"mediator": mediator},
95
+ proof={
96
+ "type": "do-calculus",
97
+ "steps": [
98
+ "Use Rule 3 to exchange do(X) with observe(X) for M→Y component.",
99
+ "Use Rule 2 to exchange do(X) with observe(X) for X→M component.",
100
+ ],
101
+ },
102
+ limitations=["Frontdoor validity depends on strong mediator assumptions."],
103
+ )
104
+
105
+ # 3) Instrumental variable
106
+ instrument = find_instrument(
107
+ graph, treatment, outcome, instruments=locked_spec.roles.instruments
108
+ )
109
+ if instrument is not None:
110
+ expr = "IV estimand (see instrument assumptions)"
111
+ return IdentificationResult(
112
+ provenance=prov,
113
+ method="iv",
114
+ scope="partial",
115
+ confidence="low",
116
+ estimand_expression=expr,
117
+ assumptions_used=[
118
+ "Relevance: Z affects X.",
119
+ "Exclusion: Z affects Y only through X.",
120
+ "Independence: Z independent of unmeasured causes of Y.",
121
+ ],
122
+ witnesses={"instrument": instrument},
123
+ proof={
124
+ "type": "linear-IV",
125
+ "steps": [
126
+ "Assume linear SCM with exclusion and independence.",
127
+ "Derive β = Cov(Z,Y)/Cov(Z,X).",
128
+ ],
129
+ },
130
+ limitations=["IV estimand expression is left symbolic; estimator must implement IV."],
131
+ )
132
+
133
+ # 4) In-house ID algorithm (conservative)
134
+ id_expr = id_algorithm(graph, treatment, outcome)
135
+ if id_expr is not None:
136
+ method, expr = id_expr
137
+ return IdentificationResult(
138
+ provenance=prov,
139
+ method=method,
140
+ scope="conservative",
141
+ confidence="low",
142
+ estimand_expression=expr,
143
+ assumptions_used=["Causal graph is correct; no latent confounding beyond declared."],
144
+ witnesses={},
145
+ proof={
146
+ "type": "id-algorithm",
147
+ "steps": [
148
+ "Apply ID recursion on C-components.",
149
+ "Return g-formula when no bidirected edges.",
150
+ ],
151
+ },
152
+ limitations=[
153
+ "ID algorithm is conservative: may return non-identifiable for some identifiable cases with latent confounding."
154
+ ],
155
+ )
156
+
157
+ return RefusalResult(
158
+ message="Effect not identifiable under current graph/assumptions.",
159
+ reason_codes=[RefusalReasonCode.NOT_IDENTIFIABLE],
160
+ checklist=[
161
+ RefusalChecklistItem(
162
+ item="Revise causal model or add interventions/measurements",
163
+ rationale="Identification failed with backdoor/frontdoor/IV/ID checks.",
164
+ )
165
+ ],
166
+ suggested_next_steps=["Use design_intervention() to propose identifying experiments."],
167
+ )
168
+
169
+
170
+ __all__ = ["identify_effect"]
171
+
@@ -0,0 +1,39 @@
1
+ """Backdoor identification helper."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import itertools
6
+ from typing import List, Optional, Sequence, Set
7
+
8
+ from crca_core.identify.graph import CausalGraph
9
+
10
+
11
+ def find_backdoor_adjustment_set(
12
+ graph: CausalGraph,
13
+ treatment: str,
14
+ outcome: str,
15
+ *,
16
+ max_candidates: int = 12,
17
+ max_set_size: int = 6,
18
+ ) -> Optional[Set[str]]:
19
+ """Find a valid backdoor adjustment set (if any).
20
+
21
+ This uses a bounded search over observed, non-descendant candidates.
22
+ """
23
+ x = treatment
24
+ y = outcome
25
+ observed = set(graph.observed)
26
+ descendants_x = graph.descendants([x])
27
+ candidates = sorted(list(observed - {x, y} - descendants_x))
28
+
29
+ if len(candidates) > max_candidates:
30
+ return None
31
+
32
+ g_bd = graph.remove_outgoing([x])
33
+ for k in range(0, min(max_set_size, len(candidates)) + 1):
34
+ for combo in itertools.combinations(candidates, k):
35
+ z = set(combo)
36
+ if g_bd.d_separated([x], [y], list(z)):
37
+ return z
38
+ return None
39
+
@@ -0,0 +1,48 @@
1
+ """Frontdoor identification helper."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Iterable, Optional, Sequence
6
+
7
+ import networkx as nx
8
+
9
+ from crca_core.identify.graph import CausalGraph
10
+
11
+
12
+ def _directed_paths_through(
13
+ graph: CausalGraph, treatment: str, outcome: str, mediator: str
14
+ ) -> bool:
15
+ """Return True if all directed paths from treatment to outcome go through mediator."""
16
+ try:
17
+ paths = list(nx.all_simple_paths(graph.directed, treatment, outcome))
18
+ except nx.NetworkXNoPath:
19
+ return False
20
+ if not paths:
21
+ return False
22
+ return all(mediator in p for p in paths)
23
+
24
+
25
+ def find_frontdoor_mediator(
26
+ graph: CausalGraph,
27
+ treatment: str,
28
+ outcome: str,
29
+ mediators: Sequence[str],
30
+ ) -> Optional[str]:
31
+ """Return a mediator that satisfies a conservative frontdoor check."""
32
+ for m in mediators:
33
+ if m in (treatment, outcome):
34
+ continue
35
+ if not _directed_paths_through(graph, treatment, outcome, m):
36
+ continue
37
+
38
+ # No backdoor from X to M (empty set) in graph with outgoing edges removed.
39
+ if not graph.remove_outgoing([treatment]).d_separated([treatment], [m], []):
40
+ continue
41
+
42
+ # Backdoor from M to Y is blocked by X (conservative check).
43
+ if not graph.remove_outgoing([m]).d_separated([m], [outcome], [treatment]):
44
+ continue
45
+
46
+ return m
47
+ return None
48
+
@@ -0,0 +1,106 @@
1
+ """Graph utilities for identification.
2
+
3
+ Supports directed edges plus bidirected (latent confounding) edges.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+ from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple
10
+
11
+ import networkx as nx
12
+
13
+ from crca_core.models.spec import CausalGraphSpec
14
+
15
+
16
+ def _parse_latent_confounders(latent: Sequence[str]) -> Set[Tuple[str, str]]:
17
+ """Parse latent confounder pairs from strings.
18
+
19
+ Supported formats:
20
+ - "A<->B"
21
+ - "A,B"
22
+ """
23
+ pairs: Set[Tuple[str, str]] = set()
24
+ for item in latent:
25
+ if "<->" in item:
26
+ a, b = [s.strip() for s in item.split("<->", 1)]
27
+ elif "," in item:
28
+ a, b = [s.strip() for s in item.split(",", 1)]
29
+ else:
30
+ # Unknown format, skip conservatively.
31
+ continue
32
+ if a and b and a != b:
33
+ pairs.add(tuple(sorted((a, b))))
34
+ return pairs
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class CausalGraph:
39
+ nodes: Tuple[str, ...]
40
+ observed: Set[str]
41
+ directed: nx.DiGraph
42
+ bidirected: Set[Tuple[str, str]]
43
+
44
+ @classmethod
45
+ def from_spec(cls, spec: CausalGraphSpec) -> "CausalGraph":
46
+ nodes = tuple(n.name for n in spec.nodes)
47
+ observed = {n.name for n in spec.nodes if n.observed}
48
+ g = nx.DiGraph()
49
+ for n in nodes:
50
+ g.add_node(n)
51
+ for e in spec.edges:
52
+ g.add_edge(e.source, e.target)
53
+ bidirected = _parse_latent_confounders(spec.latent_confounders)
54
+ return cls(nodes=nodes, observed=observed, directed=g, bidirected=bidirected)
55
+
56
+ def ancestors(self, nodes: Iterable[str]) -> Set[str]:
57
+ anc: Set[str] = set()
58
+ for n in nodes:
59
+ anc |= nx.ancestors(self.directed, n)
60
+ anc |= set(nodes)
61
+ return anc
62
+
63
+ def descendants(self, nodes: Iterable[str]) -> Set[str]:
64
+ desc: Set[str] = set()
65
+ for n in nodes:
66
+ desc |= nx.descendants(self.directed, n)
67
+ desc |= set(nodes)
68
+ return desc
69
+
70
+ def induced_subgraph(self, nodes: Set[str]) -> "CausalGraph":
71
+ g = self.directed.subgraph(nodes).copy()
72
+ bidirected = {p for p in self.bidirected if p[0] in nodes and p[1] in nodes}
73
+ observed = {n for n in self.observed if n in nodes}
74
+ return CausalGraph(nodes=tuple(nodes), observed=observed, directed=g, bidirected=bidirected)
75
+
76
+ def remove_outgoing(self, x: Sequence[str]) -> "CausalGraph":
77
+ g = self.directed.copy()
78
+ for node in x:
79
+ for _, child in list(g.out_edges(node)):
80
+ g.remove_edge(node, child)
81
+ return CausalGraph(nodes=self.nodes, observed=set(self.observed), directed=g, bidirected=set(self.bidirected))
82
+
83
+ def remove_incoming(self, x: Sequence[str]) -> "CausalGraph":
84
+ g = self.directed.copy()
85
+ for node in x:
86
+ for parent, _ in list(g.in_edges(node)):
87
+ g.remove_edge(parent, node)
88
+ return CausalGraph(nodes=self.nodes, observed=set(self.observed), directed=g, bidirected=set(self.bidirected))
89
+
90
+ def c_components(self, nodes: Optional[Set[str]] = None) -> List[Set[str]]:
91
+ """Compute c-components (bidirected connected components)."""
92
+ nodes = nodes or set(self.nodes)
93
+ # Build undirected graph of bidirected connections
94
+ undirected = nx.Graph()
95
+ undirected.add_nodes_from(nodes)
96
+ for a, b in self.bidirected:
97
+ if a in nodes and b in nodes:
98
+ undirected.add_edge(a, b)
99
+ return [set(c) for c in nx.connected_components(undirected)]
100
+
101
+ def d_separated(self, x: Sequence[str], y: Sequence[str], z: Sequence[str]) -> bool:
102
+ # Use networkx d-separation check
103
+ from networkx.algorithms.d_separation import is_d_separator
104
+
105
+ return bool(is_d_separator(self.directed, set(x), set(y), set(z)))
106
+
@@ -0,0 +1,43 @@
1
+ """In-house identification (ID) algorithm scaffold.
2
+
3
+ This module implements a conservative identification strategy:
4
+ - If no bidirected edges (no latent confounding), return g-formula.
5
+ - If latent confounding is present, we currently return not identifiable.
6
+
7
+ This is intentionally strict and avoids over-claiming identifiability.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Optional, Sequence, Tuple
13
+
14
+ from crca_core.identify.graph import CausalGraph
15
+
16
+
17
+ def identify_g_formula(
18
+ graph: CausalGraph, treatment: str, outcome: str
19
+ ) -> Tuple[str, str]:
20
+ """Return g-formula expression for DAGs without latent confounding."""
21
+ vars_all = sorted(list(graph.nodes))
22
+ summation_vars = [v for v in vars_all if v not in {treatment, outcome}]
23
+ summation = f"sum_{{{','.join(summation_vars)}}}" if summation_vars else ""
24
+ expr = (
25
+ f"{summation} Π_v P(v | Pa(v)) with do({treatment})"
26
+ if summation
27
+ else f"Π_v P(v | Pa(v)) with do({treatment})"
28
+ )
29
+ return "id_g_formula", expr
30
+
31
+
32
+ def id_algorithm(
33
+ graph: CausalGraph,
34
+ treatment: str,
35
+ outcome: str,
36
+ ) -> Optional[Tuple[str, str]]:
37
+ """Return an identification expression if possible, otherwise None."""
38
+ # If no bidirected edges, g-formula identifies interventional distribution.
39
+ if not graph.bidirected:
40
+ return identify_g_formula(graph, treatment, outcome)
41
+
42
+ # Conservative: refuse when latent confounding is present (no over-claiming).
43
+ return None
@@ -0,0 +1,48 @@
1
+ """Instrumental variable identification helper (conservative)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional, Sequence
6
+
7
+ import networkx as nx
8
+
9
+ from crca_core.identify.graph import CausalGraph
10
+
11
+
12
+ def find_instrument(
13
+ graph: CausalGraph,
14
+ treatment: str,
15
+ outcome: str,
16
+ instruments: Sequence[str],
17
+ ) -> Optional[str]:
18
+ """Return a candidate instrument satisfying conservative IV checks."""
19
+ for z in instruments:
20
+ if z in (treatment, outcome):
21
+ continue
22
+
23
+ # Relevance: Z causes X
24
+ try:
25
+ if not nx.has_path(graph.directed, z, treatment):
26
+ continue
27
+ except nx.NetworkXError:
28
+ continue
29
+
30
+ # Exclusion: no directed path from Z to Y that avoids X
31
+ try:
32
+ paths = nx.all_simple_paths(graph.directed, z, outcome)
33
+ bad = False
34
+ for p in paths:
35
+ if treatment not in p:
36
+ bad = True
37
+ break
38
+ if bad:
39
+ continue
40
+ except nx.NetworkXNoPath:
41
+ pass
42
+
43
+ # Independence (conservative): Z and Y d-separated given X in graph removing X's outgoing edges
44
+ if not graph.remove_outgoing([treatment]).d_separated([z], [outcome], [treatment]):
45
+ continue
46
+
47
+ return z
48
+ return None