crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. CRCA.py +172 -7
  2. MODEL_CARD.md +53 -0
  3. PKG-INFO +8 -2
  4. RELEASE_NOTES.md +17 -0
  5. STABILITY.md +19 -0
  6. architecture/hybrid/consistency_engine.py +362 -0
  7. architecture/hybrid/conversation_manager.py +421 -0
  8. architecture/hybrid/explanation_generator.py +452 -0
  9. architecture/hybrid/few_shot_learner.py +533 -0
  10. architecture/hybrid/graph_compressor.py +286 -0
  11. architecture/hybrid/hybrid_agent.py +4398 -0
  12. architecture/hybrid/language_compiler.py +623 -0
  13. architecture/hybrid/main,py +0 -0
  14. architecture/hybrid/reasoning_tracker.py +322 -0
  15. architecture/hybrid/self_verifier.py +524 -0
  16. architecture/hybrid/task_decomposer.py +567 -0
  17. architecture/hybrid/text_corrector.py +341 -0
  18. benchmark_results/crca_core_benchmarks.json +178 -0
  19. branches/crca_sd/crca_sd_realtime.py +6 -2
  20. branches/general_agent/__init__.py +102 -0
  21. branches/general_agent/general_agent.py +1400 -0
  22. branches/general_agent/personality.py +169 -0
  23. branches/general_agent/utils/__init__.py +19 -0
  24. branches/general_agent/utils/prompt_builder.py +170 -0
  25. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
  26. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
  27. crca_core/__init__.py +35 -0
  28. crca_core/benchmarks/__init__.py +14 -0
  29. crca_core/benchmarks/synthetic_scm.py +103 -0
  30. crca_core/core/__init__.py +23 -0
  31. crca_core/core/api.py +120 -0
  32. crca_core/core/estimate.py +208 -0
  33. crca_core/core/godclass.py +72 -0
  34. crca_core/core/intervention_design.py +174 -0
  35. crca_core/core/lifecycle.py +48 -0
  36. crca_core/discovery/__init__.py +9 -0
  37. crca_core/discovery/tabular.py +193 -0
  38. crca_core/identify/__init__.py +171 -0
  39. crca_core/identify/backdoor.py +39 -0
  40. crca_core/identify/frontdoor.py +48 -0
  41. crca_core/identify/graph.py +106 -0
  42. crca_core/identify/id_algorithm.py +43 -0
  43. crca_core/identify/iv.py +48 -0
  44. crca_core/models/__init__.py +67 -0
  45. crca_core/models/provenance.py +56 -0
  46. crca_core/models/refusal.py +39 -0
  47. crca_core/models/result.py +83 -0
  48. crca_core/models/spec.py +151 -0
  49. crca_core/models/validation.py +68 -0
  50. crca_core/scm/__init__.py +9 -0
  51. crca_core/scm/linear_gaussian.py +198 -0
  52. crca_core/timeseries/__init__.py +6 -0
  53. crca_core/timeseries/pcmci.py +181 -0
  54. crca_llm/__init__.py +12 -0
  55. crca_llm/client.py +85 -0
  56. crca_llm/coauthor.py +118 -0
  57. crca_llm/orchestrator.py +289 -0
  58. crca_llm/types.py +21 -0
  59. crca_reasoning/__init__.py +16 -0
  60. crca_reasoning/critique.py +54 -0
  61. crca_reasoning/godclass.py +206 -0
  62. crca_reasoning/memory.py +24 -0
  63. crca_reasoning/rationale.py +10 -0
  64. crca_reasoning/react_controller.py +81 -0
  65. crca_reasoning/tool_router.py +97 -0
  66. crca_reasoning/types.py +40 -0
  67. crca_sd/__init__.py +15 -0
  68. crca_sd/crca_sd_core.py +2 -0
  69. crca_sd/crca_sd_governance.py +2 -0
  70. crca_sd/crca_sd_mpc.py +2 -0
  71. crca_sd/crca_sd_realtime.py +2 -0
  72. crca_sd/crca_sd_tui.py +2 -0
  73. cuda-keyring_1.1-1_all.deb +0 -0
  74. cuda-keyring_1.1-1_all.deb.1 +0 -0
  75. docs/IMAGE_ANNOTATION_USAGE.md +539 -0
  76. docs/INSTALL_DEEPSPEED.md +125 -0
  77. docs/api/branches/crca-cg.md +19 -0
  78. docs/api/branches/crca-q.md +27 -0
  79. docs/api/branches/crca-sd.md +37 -0
  80. docs/api/branches/general-agent.md +24 -0
  81. docs/api/branches/overview.md +19 -0
  82. docs/api/crca/agent-methods.md +62 -0
  83. docs/api/crca/operations.md +79 -0
  84. docs/api/crca/overview.md +32 -0
  85. docs/api/image-annotation/engine.md +52 -0
  86. docs/api/image-annotation/overview.md +17 -0
  87. docs/api/schemas/annotation.md +34 -0
  88. docs/api/schemas/core-schemas.md +82 -0
  89. docs/api/schemas/overview.md +32 -0
  90. docs/api/schemas/policy.md +30 -0
  91. docs/api/utils/conversation.md +22 -0
  92. docs/api/utils/graph-reasoner.md +32 -0
  93. docs/api/utils/overview.md +21 -0
  94. docs/api/utils/router.md +19 -0
  95. docs/api/utils/utilities.md +97 -0
  96. docs/architecture/causal-graphs.md +41 -0
  97. docs/architecture/data-flow.md +29 -0
  98. docs/architecture/design-principles.md +33 -0
  99. docs/architecture/hybrid-agent/components.md +38 -0
  100. docs/architecture/hybrid-agent/consistency.md +26 -0
  101. docs/architecture/hybrid-agent/overview.md +44 -0
  102. docs/architecture/hybrid-agent/reasoning.md +22 -0
  103. docs/architecture/llm-integration.md +26 -0
  104. docs/architecture/modular-structure.md +37 -0
  105. docs/architecture/overview.md +69 -0
  106. docs/architecture/policy-engine-arch.md +29 -0
  107. docs/branches/crca-cg/corposwarm.md +39 -0
  108. docs/branches/crca-cg/esg-scoring.md +30 -0
  109. docs/branches/crca-cg/multi-agent.md +35 -0
  110. docs/branches/crca-cg/overview.md +40 -0
  111. docs/branches/crca-q/alternative-data.md +55 -0
  112. docs/branches/crca-q/architecture.md +71 -0
  113. docs/branches/crca-q/backtesting.md +45 -0
  114. docs/branches/crca-q/causal-engine.md +33 -0
  115. docs/branches/crca-q/execution.md +39 -0
  116. docs/branches/crca-q/market-data.md +60 -0
  117. docs/branches/crca-q/overview.md +58 -0
  118. docs/branches/crca-q/philosophy.md +60 -0
  119. docs/branches/crca-q/portfolio-optimization.md +66 -0
  120. docs/branches/crca-q/risk-management.md +102 -0
  121. docs/branches/crca-q/setup.md +65 -0
  122. docs/branches/crca-q/signal-generation.md +61 -0
  123. docs/branches/crca-q/signal-validation.md +43 -0
  124. docs/branches/crca-sd/core.md +84 -0
  125. docs/branches/crca-sd/governance.md +53 -0
  126. docs/branches/crca-sd/mpc-solver.md +65 -0
  127. docs/branches/crca-sd/overview.md +59 -0
  128. docs/branches/crca-sd/realtime.md +28 -0
  129. docs/branches/crca-sd/tui.md +20 -0
  130. docs/branches/general-agent/overview.md +37 -0
  131. docs/branches/general-agent/personality.md +36 -0
  132. docs/branches/general-agent/prompt-builder.md +30 -0
  133. docs/changelog/index.md +79 -0
  134. docs/contributing/code-style.md +69 -0
  135. docs/contributing/documentation.md +43 -0
  136. docs/contributing/overview.md +29 -0
  137. docs/contributing/testing.md +29 -0
  138. docs/core/crcagent/async-operations.md +65 -0
  139. docs/core/crcagent/automatic-extraction.md +107 -0
  140. docs/core/crcagent/batch-prediction.md +80 -0
  141. docs/core/crcagent/bayesian-inference.md +60 -0
  142. docs/core/crcagent/causal-graph.md +92 -0
  143. docs/core/crcagent/counterfactuals.md +96 -0
  144. docs/core/crcagent/deterministic-simulation.md +78 -0
  145. docs/core/crcagent/dual-mode-operation.md +82 -0
  146. docs/core/crcagent/initialization.md +88 -0
  147. docs/core/crcagent/optimization.md +65 -0
  148. docs/core/crcagent/overview.md +63 -0
  149. docs/core/crcagent/time-series.md +57 -0
  150. docs/core/schemas/annotation.md +30 -0
  151. docs/core/schemas/core-schemas.md +82 -0
  152. docs/core/schemas/overview.md +30 -0
  153. docs/core/schemas/policy.md +41 -0
  154. docs/core/templates/base-agent.md +31 -0
  155. docs/core/templates/feature-mixins.md +31 -0
  156. docs/core/templates/overview.md +29 -0
  157. docs/core/templates/templates-guide.md +75 -0
  158. docs/core/tools/mcp-client.md +34 -0
  159. docs/core/tools/overview.md +24 -0
  160. docs/core/utils/conversation.md +27 -0
  161. docs/core/utils/graph-reasoner.md +29 -0
  162. docs/core/utils/overview.md +27 -0
  163. docs/core/utils/router.md +27 -0
  164. docs/core/utils/utilities.md +97 -0
  165. docs/css/custom.css +84 -0
  166. docs/examples/basic-usage.md +57 -0
  167. docs/examples/general-agent/general-agent-examples.md +50 -0
  168. docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
  169. docs/examples/image-annotation/image-annotation-examples.md +54 -0
  170. docs/examples/integration/integration-examples.md +58 -0
  171. docs/examples/overview.md +37 -0
  172. docs/examples/trading/trading-examples.md +46 -0
  173. docs/features/causal-reasoning/advanced-topics.md +101 -0
  174. docs/features/causal-reasoning/counterfactuals.md +43 -0
  175. docs/features/causal-reasoning/do-calculus.md +50 -0
  176. docs/features/causal-reasoning/overview.md +47 -0
  177. docs/features/causal-reasoning/structural-models.md +52 -0
  178. docs/features/hybrid-agent/advanced-components.md +55 -0
  179. docs/features/hybrid-agent/core-components.md +64 -0
  180. docs/features/hybrid-agent/overview.md +34 -0
  181. docs/features/image-annotation/engine.md +82 -0
  182. docs/features/image-annotation/features.md +113 -0
  183. docs/features/image-annotation/integration.md +75 -0
  184. docs/features/image-annotation/overview.md +53 -0
  185. docs/features/image-annotation/quickstart.md +73 -0
  186. docs/features/policy-engine/doctrine-ledger.md +105 -0
  187. docs/features/policy-engine/monitoring.md +44 -0
  188. docs/features/policy-engine/mpc-control.md +89 -0
  189. docs/features/policy-engine/overview.md +46 -0
  190. docs/getting-started/configuration.md +225 -0
  191. docs/getting-started/first-agent.md +164 -0
  192. docs/getting-started/installation.md +144 -0
  193. docs/getting-started/quickstart.md +137 -0
  194. docs/index.md +118 -0
  195. docs/js/mathjax.js +13 -0
  196. docs/lrm/discovery_proof_notes.md +25 -0
  197. docs/lrm/finetune_full.md +83 -0
  198. docs/lrm/math_appendix.md +120 -0
  199. docs/lrm/overview.md +32 -0
  200. docs/mkdocs.yml +238 -0
  201. docs/stylesheets/extra.css +21 -0
  202. docs_generated/crca_core/CounterfactualResult.md +12 -0
  203. docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
  204. docs_generated/crca_core/DraftSpec.md +13 -0
  205. docs_generated/crca_core/EstimateResult.md +13 -0
  206. docs_generated/crca_core/IdentificationResult.md +17 -0
  207. docs_generated/crca_core/InterventionDesignResult.md +12 -0
  208. docs_generated/crca_core/LockedSpec.md +15 -0
  209. docs_generated/crca_core/RefusalResult.md +12 -0
  210. docs_generated/crca_core/ValidationReport.md +9 -0
  211. docs_generated/crca_core/index.md +13 -0
  212. examples/general_agent_example.py +277 -0
  213. examples/general_agent_quickstart.py +202 -0
  214. examples/general_agent_simple.py +92 -0
  215. examples/hybrid_agent_auto_extraction.py +84 -0
  216. examples/hybrid_agent_dictionary_demo.py +104 -0
  217. examples/hybrid_agent_enhanced.py +179 -0
  218. examples/hybrid_agent_general_knowledge.py +107 -0
  219. examples/image_annotation_quickstart.py +328 -0
  220. examples/test_hybrid_fixes.py +77 -0
  221. image_annotation/__init__.py +27 -0
  222. image_annotation/annotation_engine.py +2593 -0
  223. install_cuda_wsl2.sh +59 -0
  224. install_deepspeed.sh +56 -0
  225. install_deepspeed_simple.sh +87 -0
  226. mkdocs.yml +252 -0
  227. ollama/Modelfile +8 -0
  228. prompts/__init__.py +2 -1
  229. prompts/default_crca.py +9 -1
  230. prompts/general_agent.py +227 -0
  231. prompts/image_annotation.py +56 -0
  232. pyproject.toml +17 -2
  233. requirements-docs.txt +10 -0
  234. requirements.txt +21 -2
  235. schemas/__init__.py +26 -1
  236. schemas/annotation.py +222 -0
  237. schemas/conversation.py +193 -0
  238. schemas/hybrid.py +211 -0
  239. schemas/reasoning.py +276 -0
  240. schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
  241. schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
  242. schemas_export/crca_core/DraftSpec.schema.json +635 -0
  243. schemas_export/crca_core/EstimateResult.schema.json +113 -0
  244. schemas_export/crca_core/IdentificationResult.schema.json +145 -0
  245. schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
  246. schemas_export/crca_core/LockedSpec.schema.json +646 -0
  247. schemas_export/crca_core/RefusalResult.schema.json +90 -0
  248. schemas_export/crca_core/ValidationReport.schema.json +62 -0
  249. scripts/build_lrm_dataset.py +80 -0
  250. scripts/export_crca_core_schemas.py +54 -0
  251. scripts/export_hf_lrm.py +37 -0
  252. scripts/export_ollama_gguf.py +45 -0
  253. scripts/generate_changelog.py +157 -0
  254. scripts/generate_crca_core_docs_from_schemas.py +86 -0
  255. scripts/run_crca_core_benchmarks.py +163 -0
  256. scripts/run_full_finetune.py +198 -0
  257. scripts/run_lrm_eval.py +31 -0
  258. templates/graph_management.py +29 -0
  259. tests/conftest.py +9 -0
  260. tests/test_core.py +2 -3
  261. tests/test_crca_core_discovery_tabular.py +15 -0
  262. tests/test_crca_core_estimate_dowhy.py +36 -0
  263. tests/test_crca_core_identify.py +18 -0
  264. tests/test_crca_core_intervention_design.py +36 -0
  265. tests/test_crca_core_linear_gaussian_scm.py +69 -0
  266. tests/test_crca_core_spec.py +25 -0
  267. tests/test_crca_core_timeseries_pcmci.py +15 -0
  268. tests/test_crca_llm_coauthor.py +12 -0
  269. tests/test_crca_llm_orchestrator.py +80 -0
  270. tests/test_hybrid_agent_llm_enhanced.py +556 -0
  271. tests/test_image_annotation_demo.py +376 -0
  272. tests/test_image_annotation_operational.py +408 -0
  273. tests/test_image_annotation_unit.py +551 -0
  274. tests/test_training_moe.py +13 -0
  275. training/__init__.py +42 -0
  276. training/datasets.py +140 -0
  277. training/deepspeed_zero2_0_5b.json +22 -0
  278. training/deepspeed_zero2_1_5b.json +22 -0
  279. training/deepspeed_zero3_0_5b.json +28 -0
  280. training/deepspeed_zero3_14b.json +28 -0
  281. training/deepspeed_zero3_h100_3gpu.json +20 -0
  282. training/deepspeed_zero3_offload.json +28 -0
  283. training/eval.py +92 -0
  284. training/finetune.py +516 -0
  285. training/public_datasets.py +89 -0
  286. training_data/react_train.jsonl +7473 -0
  287. utils/agent_discovery.py +311 -0
  288. utils/batch_processor.py +317 -0
  289. utils/conversation.py +78 -0
  290. utils/edit_distance.py +118 -0
  291. utils/formatter.py +33 -0
  292. utils/graph_reasoner.py +530 -0
  293. utils/rate_limiter.py +283 -0
  294. utils/router.py +2 -2
  295. utils/tool_discovery.py +307 -0
  296. webui/__init__.py +10 -0
  297. webui/app.py +229 -0
  298. webui/config.py +104 -0
  299. webui/static/css/style.css +332 -0
  300. webui/static/js/main.js +284 -0
  301. webui/templates/index.html +42 -0
  302. tests/test_crca_excel.py +0 -166
  303. tests/test_data_broker.py +0 -424
  304. tests/test_palantir.py +0 -349
  305. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
  306. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,163 @@
1
+ """Run `crca_core` synthetic benchmarks and emit structured JSON.
2
+
3
+ This is a technical harness (not marketing). It should be runnable in CI/CD or
4
+ locally and produce machine-parseable outputs with provenance.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import sys
11
+ from pathlib import Path
12
+ from typing import Any, Dict
13
+
14
+ REPO_ROOT = Path(__file__).resolve().parents[1]
15
+ if str(REPO_ROOT) not in sys.path:
16
+ sys.path.insert(0, str(REPO_ROOT))
17
+
18
+ from crca_core.benchmarks.synthetic_scm import (
19
+ generate_latent_confounder_graph,
20
+ generate_lagged_timeseries,
21
+ generate_linear_gaussian_chain,
22
+ )
23
+ from crca_core.discovery import TabularDiscoveryConfig, discover_tabular
24
+ from crca_core.identify import identify_effect
25
+ from crca_core.core.lifecycle import lock_spec
26
+ from crca_core.models.spec import CausalGraphSpec, DraftSpec, EdgeSpec, NodeSpec, RoleSpec
27
+ from crca_core.timeseries import PCMCIConfig, discover_timeseries_pcmci
28
+ from crca_core.models.provenance import ProvenanceManifest
29
+ from crca_core.scm import LinearGaussianSCM
30
+ from utils.canonical import stable_hash
31
+
32
+
33
+ def run_linear_gaussian_chain() -> Dict[str, Any]:
34
+ spec, factual, noise = generate_linear_gaussian_chain(n_vars=4, beta=0.9, seed=1)
35
+ scm = LinearGaussianSCM.from_spec(spec)
36
+
37
+ abduced = scm.abduce_noise(factual)
38
+ max_abs_err = max(abs(abduced[k] - noise[k]) for k in noise.keys())
39
+
40
+ cf = scm.counterfactual(factual, interventions={"X0": factual["X0"] + 1.0})
41
+
42
+ prov = ProvenanceManifest.minimal(
43
+ spec_hash=stable_hash(
44
+ {
45
+ "benchmark": "linear_gaussian_chain",
46
+ "generator": {"n_vars": 4, "beta": 0.9, "seed": 1},
47
+ "scm_type": "linear_gaussian",
48
+ }
49
+ ),
50
+ algorithm_config={"benchmark": "linear_gaussian_chain"},
51
+ random_seeds={"numpy": 1},
52
+ )
53
+
54
+ return {
55
+ "result_type": "BenchmarkResult",
56
+ "benchmark": "linear_gaussian_chain",
57
+ "provenance": prov.model_dump(),
58
+ "metrics": {"abduction_max_abs_error": float(max_abs_err)},
59
+ "artifacts": {
60
+ "factual": factual,
61
+ "counterfactual": cf,
62
+ "notes": [
63
+ "This benchmark checks abduction correctness (noise recovery) under full observability.",
64
+ "Counterfactual uses abduction–action–prediction with fixed exogenous noise.",
65
+ ],
66
+ },
67
+ }
68
+
69
+
70
+ def run_identification_benchmarks() -> Dict[str, Any]:
71
+ # Identifiable (simple chain)
72
+ draft = DraftSpec(
73
+ graph=CausalGraphSpec(
74
+ nodes=[NodeSpec(name="X"), NodeSpec(name="Y")],
75
+ edges=[EdgeSpec(source="X", target="Y")],
76
+ ),
77
+ roles=RoleSpec(treatments=["X"], outcomes=["Y"]),
78
+ )
79
+ locked = lock_spec(draft, approvals=["human"])
80
+ ident_simple = identify_effect(locked_spec=locked, treatment="X", outcome="Y")
81
+
82
+ # Latent confounder (non-identifiable in conservative ID)
83
+ latent_graph = generate_latent_confounder_graph()
84
+ draft_latent = DraftSpec(
85
+ graph=latent_graph,
86
+ roles=RoleSpec(treatments=["X"], outcomes=["Y"]),
87
+ )
88
+ locked_latent = lock_spec(draft_latent, approvals=["human"])
89
+ ident_latent = identify_effect(locked_spec=locked_latent, treatment="X", outcome="Y")
90
+
91
+ def _dump(obj: Any) -> Dict[str, Any]:
92
+ return obj.model_dump() if hasattr(obj, "model_dump") else {"value": str(obj)}
93
+
94
+ return {
95
+ "result_type": "BenchmarkResult",
96
+ "benchmark": "identification",
97
+ "provenance": ProvenanceManifest.minimal(
98
+ spec_hash=stable_hash({"benchmark": "identification"})
99
+ ).model_dump(),
100
+ "metrics": {},
101
+ "artifacts": {
102
+ "identifiable_case": _dump(ident_simple),
103
+ "latent_confounder_case": _dump(ident_latent),
104
+ },
105
+ }
106
+
107
+
108
+ def run_discovery_benchmarks() -> Dict[str, Any]:
109
+ # Tabular discovery
110
+ import pandas as pd
111
+ import numpy as np
112
+
113
+ rng = np.random.default_rng(0)
114
+ n = 200
115
+ x = rng.normal(size=n)
116
+ y = 2.0 * x + rng.normal(size=n)
117
+ df = pd.DataFrame({"X": x, "Y": y})
118
+ tabular = discover_tabular(df, TabularDiscoveryConfig(algorithm="pc", alpha=0.05))
119
+
120
+ # Time-series discovery
121
+ ts, cols = generate_lagged_timeseries(n_steps=200, seed=1)
122
+ ts_df = pd.DataFrame(ts, columns=cols)
123
+ ts_res = discover_timeseries_pcmci(
124
+ ts_df, PCMCIConfig(max_lag=3, alpha=0.05, assume_sorted=True)
125
+ )
126
+
127
+ def _dump(obj: Any) -> Dict[str, Any]:
128
+ return obj.model_dump() if hasattr(obj, "model_dump") else {"value": str(obj)}
129
+
130
+ return {
131
+ "result_type": "BenchmarkResult",
132
+ "benchmark": "discovery",
133
+ "provenance": ProvenanceManifest.minimal(
134
+ spec_hash=stable_hash({"benchmark": "discovery"})
135
+ ).model_dump(),
136
+ "metrics": {},
137
+ "artifacts": {
138
+ "tabular": _dump(tabular),
139
+ "timeseries": _dump(ts_res),
140
+ },
141
+ }
142
+
143
+
144
+ def main() -> None:
145
+ results = {
146
+ "benchmarks": [
147
+ run_linear_gaussian_chain(),
148
+ run_identification_benchmarks(),
149
+ run_discovery_benchmarks(),
150
+ ]
151
+ }
152
+
153
+ out_dir = REPO_ROOT / "benchmark_results"
154
+ out_dir.mkdir(parents=True, exist_ok=True)
155
+ out_path = out_dir / "crca_core_benchmarks.json"
156
+ out_path.write_text(json.dumps(results, indent=2), encoding="utf-8")
157
+
158
+ print(str(out_path))
159
+
160
+
161
+ if __name__ == "__main__":
162
+ main()
163
+
@@ -0,0 +1,198 @@
1
+ """Run full finetune for Qwen2.5 models with CRCA-optimized configurations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ REPO_ROOT = Path(__file__).resolve().parents[1]
11
+ if str(REPO_ROOT) not in sys.path:
12
+ sys.path.insert(0, str(REPO_ROOT))
13
+
14
+ from training.finetune import (
15
+ FinetuneConfig,
16
+ full_finetune_qwen25_0_5b_config,
17
+ full_finetune_qwen25_0_5b_config_cloud,
18
+ full_finetune_qwen25_1_5b_config,
19
+ full_finetune_qwen25_7b_config,
20
+ full_finetune_qwen25_14b_config,
21
+ run_finetune,
22
+ )
23
+
24
+
25
+ def _infer_model_key(base_model: str) -> str:
26
+ model = base_model.lower()
27
+ if "14b" in model:
28
+ return "14b"
29
+ if "7b" in model:
30
+ return "7b"
31
+ if "1.5b" in model:
32
+ return "1.5b"
33
+ if "0.5b" in model:
34
+ return "0.5b"
35
+ return "unknown"
36
+
37
+
38
+ def _apply_auto_config(cfg: FinetuneConfig, args: argparse.Namespace) -> None:
39
+ if getattr(args, "no_auto_config", False):
40
+ return
41
+ try:
42
+ import torch
43
+ except Exception:
44
+ return
45
+
46
+ if not torch.cuda.is_available():
47
+ return
48
+
49
+ device_count = torch.cuda.device_count()
50
+ device_name = torch.cuda.get_device_name(0)
51
+ total_mem_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
52
+ bf16_supported = torch.cuda.is_bf16_supported()
53
+
54
+ if bf16_supported:
55
+ cfg.bf16 = True
56
+ cfg.fp16 = False
57
+
58
+ # DeepSpeed config selection for multi-GPU NVIDIA setups
59
+ if device_count > 1 and args.deepspeed_config is None:
60
+ if total_mem_gb >= 60:
61
+ cfg.deepspeed_config = str((REPO_ROOT / "training" / "deepspeed_zero3_h100_3gpu.json").resolve())
62
+ else:
63
+ cfg.deepspeed_config = str((REPO_ROOT / "training" / "deepspeed_zero3_offload.json").resolve())
64
+
65
+ # Batch/grad/seq tuning for high-memory NVIDIA GPUs
66
+ model_key = _infer_model_key(cfg.base_model)
67
+ if total_mem_gb >= 60 and model_key != "unknown":
68
+ if model_key == "1.5b":
69
+ cfg.per_device_batch_size = 16
70
+ if args.grad_accum is None:
71
+ cfg.gradient_accumulation_steps = 8
72
+ cfg.max_seq_length = 8192
73
+ elif model_key == "7b":
74
+ cfg.per_device_batch_size = 8
75
+ if args.grad_accum is None:
76
+ cfg.gradient_accumulation_steps = 16
77
+ cfg.max_seq_length = 4096
78
+ elif model_key == "14b":
79
+ cfg.per_device_batch_size = 4
80
+ if args.grad_accum is None:
81
+ cfg.gradient_accumulation_steps = 32
82
+ cfg.max_seq_length = 2048
83
+ elif model_key == "0.5b":
84
+ cfg.per_device_batch_size = 32
85
+ if args.grad_accum is None:
86
+ cfg.gradient_accumulation_steps = 4
87
+ cfg.max_seq_length = 4096
88
+
89
+ print(
90
+ f"Auto-config: gpu={device_name}, count={device_count}, mem={total_mem_gb:.0f}GB, "
91
+ f"bf16={'on' if cfg.bf16 else 'off'}, ds={'on' if cfg.deepspeed_config else 'off'}"
92
+ )
93
+
94
+
95
+ def main() -> None:
96
+ parser = argparse.ArgumentParser(
97
+ description="Run full finetune for Qwen2.5 models (1.5B, 7B, 14B) with CRCA-optimized configurations."
98
+ )
99
+ parser.add_argument("--train-file", type=str, required=True, help="Path to training JSONL.")
100
+ parser.add_argument("--eval-file", type=str, default=None, help="Optional eval JSONL.")
101
+ parser.add_argument("--output-dir", type=str, default=None, help="Output directory.")
102
+ parser.add_argument("--epochs", type=int, default=None, help="Override num_train_epochs.")
103
+ parser.add_argument("--grad-accum", type=int, default=None, help="Override gradient accumulation.")
104
+ parser.add_argument(
105
+ "--model-size",
106
+ type=str,
107
+ choices=["0.5b", "1.5b", "7b", "14b"],
108
+ default="0.5b",
109
+ help="Model size to finetune: 0.5b, 1.5b, 7b, or 14b. Default: 0.5b",
110
+ )
111
+ parser.add_argument(
112
+ "--cloud",
113
+ action="store_true",
114
+ help="Use cloud-optimized config (only for 0.5B model). For other sizes, configs are already cloud-optimized.",
115
+ )
116
+ parser.add_argument(
117
+ "--model-id",
118
+ type=str,
119
+ default=None,
120
+ help="Override base model ID (e.g. google/switch-base-8 for MoE Seq2Seq training).",
121
+ )
122
+ parser.add_argument(
123
+ "--deepspeed-config",
124
+ type=str,
125
+ default=None,
126
+ help="Override DeepSpeed config path (e.g. training/deepspeed_zero3_h100_3gpu.json for 3x H100 ZeRO-3).",
127
+ )
128
+ parser.add_argument(
129
+ "--local-rank",
130
+ "--local_rank",
131
+ type=int,
132
+ default=-1,
133
+ dest="local_rank",
134
+ help="Local rank for distributed training (set by DeepSpeed launcher).",
135
+ )
136
+ parser.add_argument(
137
+ "--no-auto-config",
138
+ action="store_true",
139
+ help="Disable automatic GPU-based configuration tuning.",
140
+ )
141
+ args = parser.parse_args()
142
+
143
+ # Map model sizes to config functions
144
+ config_map = {
145
+ "0.5b": full_finetune_qwen25_0_5b_config_cloud if args.cloud else full_finetune_qwen25_0_5b_config,
146
+ "1.5b": full_finetune_qwen25_1_5b_config,
147
+ "7b": full_finetune_qwen25_7b_config,
148
+ "14b": full_finetune_qwen25_14b_config,
149
+ }
150
+
151
+ # Get the appropriate config function
152
+ config_func = config_map.get(args.model_size.lower())
153
+ if config_func is None:
154
+ raise ValueError(f"Invalid model size: {args.model_size}. Choose from: 0.5b, 1.5b, 7b, 14b")
155
+
156
+ cfg = config_func()
157
+ if args.model_id:
158
+ cfg.base_model = args.model_id
159
+ cfg.train_file = args.train_file
160
+ # Only set eval_file if explicitly provided and file exists
161
+ if args.eval_file:
162
+ if not Path(args.eval_file).exists():
163
+ raise FileNotFoundError(f"Eval file not found: {args.eval_file}")
164
+ cfg.eval_file = args.eval_file
165
+ if args.output_dir:
166
+ cfg.output_dir = args.output_dir
167
+ if args.epochs is not None:
168
+ cfg.num_train_epochs = args.epochs
169
+ if args.grad_accum is not None:
170
+ cfg.gradient_accumulation_steps = args.grad_accum
171
+ _apply_auto_config(cfg, args)
172
+ if args.deepspeed_config is None and cfg.deepspeed_config:
173
+ args.deepspeed_config = cfg.deepspeed_config
174
+ if args.deepspeed_config:
175
+ ds_path = Path(args.deepspeed_config)
176
+ if not ds_path.is_absolute():
177
+ ds_path = REPO_ROOT / ds_path
178
+ if not ds_path.exists():
179
+ raise FileNotFoundError(f"DeepSpeed config not found: {args.deepspeed_config}")
180
+ cfg.deepspeed_config = str(ds_path.resolve())
181
+ try:
182
+ ds_config = json.loads(Path(cfg.deepspeed_config).read_text())
183
+ bf16_enabled = bool(ds_config.get("bf16", {}).get("enabled", False))
184
+ fp16_enabled = bool(ds_config.get("fp16", {}).get("enabled", False))
185
+ if bf16_enabled and not fp16_enabled:
186
+ cfg.bf16 = True
187
+ cfg.fp16 = False
188
+ except Exception:
189
+ pass
190
+
191
+ if cfg.deepspeed_config and not Path(cfg.deepspeed_config).exists():
192
+ raise FileNotFoundError(f"Missing deepspeed config: {cfg.deepspeed_config}")
193
+
194
+ run_finetune(cfg)
195
+
196
+
197
+ if __name__ == "__main__":
198
+ main()
@@ -0,0 +1,31 @@
1
+ """Run LRM evaluation on trace files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ REPO_ROOT = Path(__file__).resolve().parents[1]
10
+ if str(REPO_ROOT) not in sys.path:
11
+ sys.path.insert(0, str(REPO_ROOT))
12
+
13
+ from training.datasets import load_plan_results
14
+ from training.eval import EvalConfig, run_eval
15
+
16
+
17
+ def main() -> None:
18
+ parser = argparse.ArgumentParser(description="Run LRM eval on trace JSON/JSONL.")
19
+ parser.add_argument("--trace", action="append", default=[], help="Path to trace JSON or JSONL.")
20
+ parser.add_argument("--output", type=str, default="eval_results/lrm_eval.json")
21
+ args = parser.parse_args()
22
+
23
+ if not args.trace:
24
+ raise ValueError("At least one --trace file is required.")
25
+
26
+ plans = load_plan_results([Path(p) for p in args.trace])
27
+ run_eval(plans, EvalConfig(output_path=args.output))
28
+
29
+
30
+ if __name__ == "__main__":
31
+ main()
@@ -439,4 +439,33 @@ class GraphManager:
439
439
  """
440
440
  for u, v in edges:
441
441
  self.add_relationship(u, v, **default_metadata)
442
+
443
+ def temporal_topological_sort(self) -> List[str]:
444
+ """
445
+ Perform topological sort respecting temporal ordering.
446
+
447
+ Temporal edges (BEFORE, AFTER, DELAYED) are respected in ordering.
448
+
449
+ Returns:
450
+ List of nodes in temporal topological order
451
+ """
452
+ # For now, use standard topological sort
453
+ # In future, can be enhanced to respect temporal edge types
454
+ return self.topological_sort()
455
+
456
+ def get_temporal_edges(self) -> List[Tuple[str, str, Dict[str, Any]]]:
457
+ """
458
+ Get all edges with temporal metadata.
459
+
460
+ Returns:
461
+ List of (source, target, metadata) tuples for temporal edges
462
+ """
463
+ temporal_edges = []
464
+ for source, targets in self.graph.items():
465
+ for target, meta in targets.items():
466
+ if isinstance(meta, dict):
467
+ temporal_type = meta.get("temporal_type")
468
+ if temporal_type and temporal_type != "immediate":
469
+ temporal_edges.append((source, target, meta))
470
+ return temporal_edges
442
471
 
tests/conftest.py ADDED
@@ -0,0 +1,9 @@
1
+ import sys
2
+ from pathlib import Path
3
+
4
+
5
+ # Ensure repository root is importable when running pytest from any cwd.
6
+ REPO_ROOT = Path(__file__).resolve().parents[1]
7
+ if str(REPO_ROOT) not in sys.path:
8
+ sys.path.insert(0, str(REPO_ROOT))
9
+
tests/test_core.py CHANGED
@@ -4,9 +4,8 @@ import importlib.util
4
4
 
5
5
  def load_crca_module():
6
6
  repo_root = os.path.dirname(os.path.dirname(__file__))
7
- # CRCA.py lives inside the ceca_lite package directory
8
- # In this repository layout the implementation file is `CRCA.py` under CR-CA
9
- target = os.path.join(repo_root, "CR-CA", "CRCA.py")
7
+ # In this repository layout the implementation file is `CRCA.py` at repo root.
8
+ target = os.path.join(repo_root, "CRCA.py")
10
9
  spec = importlib.util.spec_from_file_location("crca_module", target)
11
10
  mod = importlib.util.module_from_spec(spec)
12
11
  spec.loader.exec_module(mod)
@@ -0,0 +1,15 @@
1
+ import pandas as pd
2
+
3
+ from crca_core.discovery import TabularDiscoveryConfig, discover_tabular
4
+ from crca_core.models.refusal import RefusalResult, RefusalReasonCode
5
+ from crca_core.models.result import DiscoveryHypothesisResult
6
+
7
+
8
+ def test_discover_tabular_refuses_when_backend_missing() -> None:
9
+ df = pd.DataFrame({"x": [1, 2, 3], "y": [0, 1, 0]})
10
+ res = discover_tabular(df, TabularDiscoveryConfig(algorithm="pc"))
11
+ if isinstance(res, RefusalResult):
12
+ assert RefusalReasonCode.UNSUPPORTED_OPERATION in res.reason_codes
13
+ else:
14
+ assert isinstance(res, DiscoveryHypothesisResult)
15
+
@@ -0,0 +1,36 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from crca_core import DraftSpec, EstimatorConfig, estimate_effect_dowhy, lock_spec
5
+ from crca_core.identify import identify_effect
6
+ from crca_core.models.spec import CausalGraphSpec, EdgeSpec, NodeSpec, RoleSpec
7
+ from crca_core.models.result import EstimateResult
8
+
9
+
10
+ def test_estimate_effect_dowhy_runs_on_simple_linear_model() -> None:
11
+ # Generate data: Y = 3*X + noise
12
+ rng = np.random.default_rng(0)
13
+ n = 200
14
+ X = rng.normal(0, 1, size=n)
15
+ Y = 3.0 * X + rng.normal(0, 1, size=n)
16
+ df = pd.DataFrame({"X": X, "Y": Y})
17
+
18
+ draft = DraftSpec(
19
+ graph=CausalGraphSpec(nodes=[NodeSpec(name="X"), NodeSpec(name="Y")], edges=[EdgeSpec(source="X", target="Y")]),
20
+ roles=RoleSpec(treatments=["X"], outcomes=["Y"]),
21
+ )
22
+ locked = lock_spec(draft, approvals=["human"])
23
+
24
+ ident = identify_effect(locked_spec=locked, treatment="X", outcome="Y")
25
+ res = estimate_effect_dowhy(
26
+ data=df,
27
+ locked_spec=locked,
28
+ treatment="X",
29
+ outcome="Y",
30
+ identification_result=ident,
31
+ config=EstimatorConfig(method_name="backdoor.linear_regression"),
32
+ )
33
+ assert isinstance(res, EstimateResult)
34
+ assert "value" in res.estimate
35
+ assert np.isfinite(res.estimate["value"])
36
+
@@ -0,0 +1,18 @@
1
+ from crca_core.identify import identify_effect
2
+ from crca_core.core.lifecycle import lock_spec
3
+ from crca_core.models.result import IdentificationResult
4
+ from crca_core.models.spec import CausalGraphSpec, DraftSpec, EdgeSpec, NodeSpec, RoleSpec
5
+
6
+
7
+ def test_identify_backdoor_empty_set_for_simple_chain() -> None:
8
+ draft = DraftSpec(
9
+ graph=CausalGraphSpec(
10
+ nodes=[NodeSpec(name="X"), NodeSpec(name="Y")],
11
+ edges=[EdgeSpec(source="X", target="Y")],
12
+ ),
13
+ roles=RoleSpec(treatments=["X"], outcomes=["Y"]),
14
+ )
15
+ locked = lock_spec(draft, approvals=["human"])
16
+ res = identify_effect(locked_spec=locked, treatment="X", outcome="Y")
17
+ assert isinstance(res, IdentificationResult)
18
+ assert res.method == "backdoor"
@@ -0,0 +1,36 @@
1
+ from crca_core.core.intervention_design import FeasibilityConstraints, TargetQuery, design_intervention
2
+ from crca_core.core.lifecycle import lock_spec
3
+ from crca_core.models.spec import DraftSpec, CausalGraphSpec, EdgeSpec, NodeSpec, RoleSpec
4
+
5
+
6
+ def test_design_intervention_randomize_when_manipulable() -> None:
7
+ draft = DraftSpec(
8
+ graph=CausalGraphSpec(nodes=[NodeSpec(name="X"), NodeSpec(name="Y")], edges=[EdgeSpec(source="X", target="Y")]),
9
+ roles=RoleSpec(treatments=["X"], outcomes=["Y"]),
10
+ )
11
+ locked = lock_spec(draft, approvals=["human"])
12
+ res = design_intervention(
13
+ locked_spec=locked,
14
+ target_query=TargetQuery(query_type="identify_effect", treatment="X", outcome="Y"),
15
+ constraints=FeasibilityConstraints(manipulable_variables=["X"]),
16
+ )
17
+ assert res.result_type == "InterventionDesign"
18
+ assert any(d["design_type"] == "randomize_treatment" for d in res.designs)
19
+
20
+
21
+ def test_design_intervention_measure_parents_of_treatment() -> None:
22
+ draft = DraftSpec(
23
+ graph=CausalGraphSpec(
24
+ nodes=[NodeSpec(name="Z"), NodeSpec(name="X"), NodeSpec(name="Y")],
25
+ edges=[EdgeSpec(source="Z", target="X"), EdgeSpec(source="X", target="Y")],
26
+ ),
27
+ roles=RoleSpec(treatments=["X"], outcomes=["Y"]),
28
+ )
29
+ locked = lock_spec(draft, approvals=["human"])
30
+ res = design_intervention(
31
+ locked_spec=locked,
32
+ target_query=TargetQuery(query_type="identify_effect", treatment="X", outcome="Y"),
33
+ constraints=FeasibilityConstraints(manipulable_variables=[]),
34
+ )
35
+ assert any(d["design_type"] == "measure_confounder_candidates" for d in res.designs)
36
+
@@ -0,0 +1,69 @@
1
+ import numpy as np
2
+
3
+ from crca_core.scm import LinearGaussianSCM
4
+ from crca_core.models.spec import NoiseSpec, SCMSpec, StructuralEquationSpec
5
+
6
+
7
+ def test_linear_gaussian_scm_counterfactual_simple_chain() -> None:
8
+ # X := Ux
9
+ # Y := 2*X + Uy
10
+ spec = SCMSpec(
11
+ scm_type="linear_gaussian",
12
+ equations=[
13
+ StructuralEquationSpec(
14
+ variable="X",
15
+ parents=[],
16
+ coefficients={},
17
+ intercept=0.0,
18
+ noise=NoiseSpec(distribution="gaussian", params={"mean": 0.0, "std": 1.0}),
19
+ ),
20
+ StructuralEquationSpec(
21
+ variable="Y",
22
+ parents=["X"],
23
+ coefficients={"X": 2.0},
24
+ intercept=0.0,
25
+ noise=NoiseSpec(distribution="gaussian", params={"mean": 0.0, "std": 1.0}),
26
+ ),
27
+ ],
28
+ )
29
+ scm = LinearGaussianSCM.from_spec(spec)
30
+
31
+ factual = {"X": 1.0, "Y": 3.0} # implies Uy = 1
32
+ cf = scm.counterfactual(factual, interventions={"X": 2.0})
33
+ assert cf["X"] == 2.0
34
+ assert cf["Y"] == 5.0
35
+
36
+
37
+ def test_linear_gaussian_scm_abduction_action_prediction_matches_manual() -> None:
38
+ rng = np.random.default_rng(0)
39
+ # Chain: A -> B -> C
40
+ beta_ab = 0.7
41
+ beta_bc = -1.3
42
+ spec = SCMSpec(
43
+ scm_type="linear_gaussian",
44
+ equations=[
45
+ StructuralEquationSpec(variable="A", parents=[], coefficients={}, intercept=0.2),
46
+ StructuralEquationSpec(variable="B", parents=["A"], coefficients={"A": beta_ab}, intercept=-0.1),
47
+ StructuralEquationSpec(variable="C", parents=["B"], coefficients={"B": beta_bc}, intercept=0.0),
48
+ ],
49
+ )
50
+ scm = LinearGaussianSCM.from_spec(spec)
51
+
52
+ # Sample one factual realization
53
+ uA, uB, uC = rng.normal(0, 1, size=3)
54
+ A = 0.2 + uA
55
+ B = -0.1 + beta_ab * A + uB
56
+ C = 0.0 + beta_bc * B + uC
57
+ factual = {"A": float(A), "B": float(B), "C": float(C)}
58
+
59
+ # Counterfactual intervention on A
60
+ A_do = float(A + 1.0)
61
+ cf = scm.counterfactual(factual, interventions={"A": A_do})
62
+
63
+ # Manual AAP with same u's
64
+ B_do = -0.1 + beta_ab * A_do + uB
65
+ C_do = 0.0 + beta_bc * B_do + uC
66
+
67
+ assert np.isclose(cf["B"], B_do)
68
+ assert np.isclose(cf["C"], C_do)
69
+
@@ -0,0 +1,25 @@
1
+ from crca_core import DraftSpec, lock_spec, validate_spec
2
+
3
+
4
+ def test_draft_spec_validates() -> None:
5
+ draft = DraftSpec()
6
+ report = validate_spec(draft)
7
+ assert report.ok is True
8
+ assert report.errors == []
9
+
10
+
11
+ def test_lock_spec_requires_approvals() -> None:
12
+ draft = DraftSpec()
13
+ try:
14
+ lock_spec(draft, approvals=[])
15
+ assert False, "Expected ValueError"
16
+ except ValueError:
17
+ pass
18
+
19
+
20
+ def test_lock_spec_produces_hash() -> None:
21
+ draft = DraftSpec()
22
+ locked = lock_spec(draft, approvals=["human"])
23
+ assert locked.spec_hash
24
+ assert locked.status.value == "locked"
25
+
@@ -0,0 +1,15 @@
1
+ import pandas as pd
2
+
3
+ from crca_core.models.refusal import RefusalReasonCode, RefusalResult
4
+ from crca_core.models.result import DiscoveryHypothesisResult
5
+ from crca_core.timeseries import PCMCIConfig, discover_timeseries_pcmci
6
+
7
+
8
+ def test_discover_timeseries_pcmci_refuses_when_backend_missing() -> None:
9
+ df = pd.DataFrame({"x": [1, 2, 3], "y": [0, 1, 0]})
10
+ res = discover_timeseries_pcmci(df, PCMCIConfig(max_lag=2))
11
+ if isinstance(res, RefusalResult):
12
+ assert RefusalReasonCode.UNSUPPORTED_OPERATION in res.reason_codes
13
+ else:
14
+ assert isinstance(res, DiscoveryHypothesisResult)
15
+