crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. CRCA.py +172 -7
  2. MODEL_CARD.md +53 -0
  3. PKG-INFO +8 -2
  4. RELEASE_NOTES.md +17 -0
  5. STABILITY.md +19 -0
  6. architecture/hybrid/consistency_engine.py +362 -0
  7. architecture/hybrid/conversation_manager.py +421 -0
  8. architecture/hybrid/explanation_generator.py +452 -0
  9. architecture/hybrid/few_shot_learner.py +533 -0
  10. architecture/hybrid/graph_compressor.py +286 -0
  11. architecture/hybrid/hybrid_agent.py +4398 -0
  12. architecture/hybrid/language_compiler.py +623 -0
  13. architecture/hybrid/main,py +0 -0
  14. architecture/hybrid/reasoning_tracker.py +322 -0
  15. architecture/hybrid/self_verifier.py +524 -0
  16. architecture/hybrid/task_decomposer.py +567 -0
  17. architecture/hybrid/text_corrector.py +341 -0
  18. benchmark_results/crca_core_benchmarks.json +178 -0
  19. branches/crca_sd/crca_sd_realtime.py +6 -2
  20. branches/general_agent/__init__.py +102 -0
  21. branches/general_agent/general_agent.py +1400 -0
  22. branches/general_agent/personality.py +169 -0
  23. branches/general_agent/utils/__init__.py +19 -0
  24. branches/general_agent/utils/prompt_builder.py +170 -0
  25. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
  26. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
  27. crca_core/__init__.py +35 -0
  28. crca_core/benchmarks/__init__.py +14 -0
  29. crca_core/benchmarks/synthetic_scm.py +103 -0
  30. crca_core/core/__init__.py +23 -0
  31. crca_core/core/api.py +120 -0
  32. crca_core/core/estimate.py +208 -0
  33. crca_core/core/godclass.py +72 -0
  34. crca_core/core/intervention_design.py +174 -0
  35. crca_core/core/lifecycle.py +48 -0
  36. crca_core/discovery/__init__.py +9 -0
  37. crca_core/discovery/tabular.py +193 -0
  38. crca_core/identify/__init__.py +171 -0
  39. crca_core/identify/backdoor.py +39 -0
  40. crca_core/identify/frontdoor.py +48 -0
  41. crca_core/identify/graph.py +106 -0
  42. crca_core/identify/id_algorithm.py +43 -0
  43. crca_core/identify/iv.py +48 -0
  44. crca_core/models/__init__.py +67 -0
  45. crca_core/models/provenance.py +56 -0
  46. crca_core/models/refusal.py +39 -0
  47. crca_core/models/result.py +83 -0
  48. crca_core/models/spec.py +151 -0
  49. crca_core/models/validation.py +68 -0
  50. crca_core/scm/__init__.py +9 -0
  51. crca_core/scm/linear_gaussian.py +198 -0
  52. crca_core/timeseries/__init__.py +6 -0
  53. crca_core/timeseries/pcmci.py +181 -0
  54. crca_llm/__init__.py +12 -0
  55. crca_llm/client.py +85 -0
  56. crca_llm/coauthor.py +118 -0
  57. crca_llm/orchestrator.py +289 -0
  58. crca_llm/types.py +21 -0
  59. crca_reasoning/__init__.py +16 -0
  60. crca_reasoning/critique.py +54 -0
  61. crca_reasoning/godclass.py +206 -0
  62. crca_reasoning/memory.py +24 -0
  63. crca_reasoning/rationale.py +10 -0
  64. crca_reasoning/react_controller.py +81 -0
  65. crca_reasoning/tool_router.py +97 -0
  66. crca_reasoning/types.py +40 -0
  67. crca_sd/__init__.py +15 -0
  68. crca_sd/crca_sd_core.py +2 -0
  69. crca_sd/crca_sd_governance.py +2 -0
  70. crca_sd/crca_sd_mpc.py +2 -0
  71. crca_sd/crca_sd_realtime.py +2 -0
  72. crca_sd/crca_sd_tui.py +2 -0
  73. cuda-keyring_1.1-1_all.deb +0 -0
  74. cuda-keyring_1.1-1_all.deb.1 +0 -0
  75. docs/IMAGE_ANNOTATION_USAGE.md +539 -0
  76. docs/INSTALL_DEEPSPEED.md +125 -0
  77. docs/api/branches/crca-cg.md +19 -0
  78. docs/api/branches/crca-q.md +27 -0
  79. docs/api/branches/crca-sd.md +37 -0
  80. docs/api/branches/general-agent.md +24 -0
  81. docs/api/branches/overview.md +19 -0
  82. docs/api/crca/agent-methods.md +62 -0
  83. docs/api/crca/operations.md +79 -0
  84. docs/api/crca/overview.md +32 -0
  85. docs/api/image-annotation/engine.md +52 -0
  86. docs/api/image-annotation/overview.md +17 -0
  87. docs/api/schemas/annotation.md +34 -0
  88. docs/api/schemas/core-schemas.md +82 -0
  89. docs/api/schemas/overview.md +32 -0
  90. docs/api/schemas/policy.md +30 -0
  91. docs/api/utils/conversation.md +22 -0
  92. docs/api/utils/graph-reasoner.md +32 -0
  93. docs/api/utils/overview.md +21 -0
  94. docs/api/utils/router.md +19 -0
  95. docs/api/utils/utilities.md +97 -0
  96. docs/architecture/causal-graphs.md +41 -0
  97. docs/architecture/data-flow.md +29 -0
  98. docs/architecture/design-principles.md +33 -0
  99. docs/architecture/hybrid-agent/components.md +38 -0
  100. docs/architecture/hybrid-agent/consistency.md +26 -0
  101. docs/architecture/hybrid-agent/overview.md +44 -0
  102. docs/architecture/hybrid-agent/reasoning.md +22 -0
  103. docs/architecture/llm-integration.md +26 -0
  104. docs/architecture/modular-structure.md +37 -0
  105. docs/architecture/overview.md +69 -0
  106. docs/architecture/policy-engine-arch.md +29 -0
  107. docs/branches/crca-cg/corposwarm.md +39 -0
  108. docs/branches/crca-cg/esg-scoring.md +30 -0
  109. docs/branches/crca-cg/multi-agent.md +35 -0
  110. docs/branches/crca-cg/overview.md +40 -0
  111. docs/branches/crca-q/alternative-data.md +55 -0
  112. docs/branches/crca-q/architecture.md +71 -0
  113. docs/branches/crca-q/backtesting.md +45 -0
  114. docs/branches/crca-q/causal-engine.md +33 -0
  115. docs/branches/crca-q/execution.md +39 -0
  116. docs/branches/crca-q/market-data.md +60 -0
  117. docs/branches/crca-q/overview.md +58 -0
  118. docs/branches/crca-q/philosophy.md +60 -0
  119. docs/branches/crca-q/portfolio-optimization.md +66 -0
  120. docs/branches/crca-q/risk-management.md +102 -0
  121. docs/branches/crca-q/setup.md +65 -0
  122. docs/branches/crca-q/signal-generation.md +61 -0
  123. docs/branches/crca-q/signal-validation.md +43 -0
  124. docs/branches/crca-sd/core.md +84 -0
  125. docs/branches/crca-sd/governance.md +53 -0
  126. docs/branches/crca-sd/mpc-solver.md +65 -0
  127. docs/branches/crca-sd/overview.md +59 -0
  128. docs/branches/crca-sd/realtime.md +28 -0
  129. docs/branches/crca-sd/tui.md +20 -0
  130. docs/branches/general-agent/overview.md +37 -0
  131. docs/branches/general-agent/personality.md +36 -0
  132. docs/branches/general-agent/prompt-builder.md +30 -0
  133. docs/changelog/index.md +79 -0
  134. docs/contributing/code-style.md +69 -0
  135. docs/contributing/documentation.md +43 -0
  136. docs/contributing/overview.md +29 -0
  137. docs/contributing/testing.md +29 -0
  138. docs/core/crcagent/async-operations.md +65 -0
  139. docs/core/crcagent/automatic-extraction.md +107 -0
  140. docs/core/crcagent/batch-prediction.md +80 -0
  141. docs/core/crcagent/bayesian-inference.md +60 -0
  142. docs/core/crcagent/causal-graph.md +92 -0
  143. docs/core/crcagent/counterfactuals.md +96 -0
  144. docs/core/crcagent/deterministic-simulation.md +78 -0
  145. docs/core/crcagent/dual-mode-operation.md +82 -0
  146. docs/core/crcagent/initialization.md +88 -0
  147. docs/core/crcagent/optimization.md +65 -0
  148. docs/core/crcagent/overview.md +63 -0
  149. docs/core/crcagent/time-series.md +57 -0
  150. docs/core/schemas/annotation.md +30 -0
  151. docs/core/schemas/core-schemas.md +82 -0
  152. docs/core/schemas/overview.md +30 -0
  153. docs/core/schemas/policy.md +41 -0
  154. docs/core/templates/base-agent.md +31 -0
  155. docs/core/templates/feature-mixins.md +31 -0
  156. docs/core/templates/overview.md +29 -0
  157. docs/core/templates/templates-guide.md +75 -0
  158. docs/core/tools/mcp-client.md +34 -0
  159. docs/core/tools/overview.md +24 -0
  160. docs/core/utils/conversation.md +27 -0
  161. docs/core/utils/graph-reasoner.md +29 -0
  162. docs/core/utils/overview.md +27 -0
  163. docs/core/utils/router.md +27 -0
  164. docs/core/utils/utilities.md +97 -0
  165. docs/css/custom.css +84 -0
  166. docs/examples/basic-usage.md +57 -0
  167. docs/examples/general-agent/general-agent-examples.md +50 -0
  168. docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
  169. docs/examples/image-annotation/image-annotation-examples.md +54 -0
  170. docs/examples/integration/integration-examples.md +58 -0
  171. docs/examples/overview.md +37 -0
  172. docs/examples/trading/trading-examples.md +46 -0
  173. docs/features/causal-reasoning/advanced-topics.md +101 -0
  174. docs/features/causal-reasoning/counterfactuals.md +43 -0
  175. docs/features/causal-reasoning/do-calculus.md +50 -0
  176. docs/features/causal-reasoning/overview.md +47 -0
  177. docs/features/causal-reasoning/structural-models.md +52 -0
  178. docs/features/hybrid-agent/advanced-components.md +55 -0
  179. docs/features/hybrid-agent/core-components.md +64 -0
  180. docs/features/hybrid-agent/overview.md +34 -0
  181. docs/features/image-annotation/engine.md +82 -0
  182. docs/features/image-annotation/features.md +113 -0
  183. docs/features/image-annotation/integration.md +75 -0
  184. docs/features/image-annotation/overview.md +53 -0
  185. docs/features/image-annotation/quickstart.md +73 -0
  186. docs/features/policy-engine/doctrine-ledger.md +105 -0
  187. docs/features/policy-engine/monitoring.md +44 -0
  188. docs/features/policy-engine/mpc-control.md +89 -0
  189. docs/features/policy-engine/overview.md +46 -0
  190. docs/getting-started/configuration.md +225 -0
  191. docs/getting-started/first-agent.md +164 -0
  192. docs/getting-started/installation.md +144 -0
  193. docs/getting-started/quickstart.md +137 -0
  194. docs/index.md +118 -0
  195. docs/js/mathjax.js +13 -0
  196. docs/lrm/discovery_proof_notes.md +25 -0
  197. docs/lrm/finetune_full.md +83 -0
  198. docs/lrm/math_appendix.md +120 -0
  199. docs/lrm/overview.md +32 -0
  200. docs/mkdocs.yml +238 -0
  201. docs/stylesheets/extra.css +21 -0
  202. docs_generated/crca_core/CounterfactualResult.md +12 -0
  203. docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
  204. docs_generated/crca_core/DraftSpec.md +13 -0
  205. docs_generated/crca_core/EstimateResult.md +13 -0
  206. docs_generated/crca_core/IdentificationResult.md +17 -0
  207. docs_generated/crca_core/InterventionDesignResult.md +12 -0
  208. docs_generated/crca_core/LockedSpec.md +15 -0
  209. docs_generated/crca_core/RefusalResult.md +12 -0
  210. docs_generated/crca_core/ValidationReport.md +9 -0
  211. docs_generated/crca_core/index.md +13 -0
  212. examples/general_agent_example.py +277 -0
  213. examples/general_agent_quickstart.py +202 -0
  214. examples/general_agent_simple.py +92 -0
  215. examples/hybrid_agent_auto_extraction.py +84 -0
  216. examples/hybrid_agent_dictionary_demo.py +104 -0
  217. examples/hybrid_agent_enhanced.py +179 -0
  218. examples/hybrid_agent_general_knowledge.py +107 -0
  219. examples/image_annotation_quickstart.py +328 -0
  220. examples/test_hybrid_fixes.py +77 -0
  221. image_annotation/__init__.py +27 -0
  222. image_annotation/annotation_engine.py +2593 -0
  223. install_cuda_wsl2.sh +59 -0
  224. install_deepspeed.sh +56 -0
  225. install_deepspeed_simple.sh +87 -0
  226. mkdocs.yml +252 -0
  227. ollama/Modelfile +8 -0
  228. prompts/__init__.py +2 -1
  229. prompts/default_crca.py +9 -1
  230. prompts/general_agent.py +227 -0
  231. prompts/image_annotation.py +56 -0
  232. pyproject.toml +17 -2
  233. requirements-docs.txt +10 -0
  234. requirements.txt +21 -2
  235. schemas/__init__.py +26 -1
  236. schemas/annotation.py +222 -0
  237. schemas/conversation.py +193 -0
  238. schemas/hybrid.py +211 -0
  239. schemas/reasoning.py +276 -0
  240. schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
  241. schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
  242. schemas_export/crca_core/DraftSpec.schema.json +635 -0
  243. schemas_export/crca_core/EstimateResult.schema.json +113 -0
  244. schemas_export/crca_core/IdentificationResult.schema.json +145 -0
  245. schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
  246. schemas_export/crca_core/LockedSpec.schema.json +646 -0
  247. schemas_export/crca_core/RefusalResult.schema.json +90 -0
  248. schemas_export/crca_core/ValidationReport.schema.json +62 -0
  249. scripts/build_lrm_dataset.py +80 -0
  250. scripts/export_crca_core_schemas.py +54 -0
  251. scripts/export_hf_lrm.py +37 -0
  252. scripts/export_ollama_gguf.py +45 -0
  253. scripts/generate_changelog.py +157 -0
  254. scripts/generate_crca_core_docs_from_schemas.py +86 -0
  255. scripts/run_crca_core_benchmarks.py +163 -0
  256. scripts/run_full_finetune.py +198 -0
  257. scripts/run_lrm_eval.py +31 -0
  258. templates/graph_management.py +29 -0
  259. tests/conftest.py +9 -0
  260. tests/test_core.py +2 -3
  261. tests/test_crca_core_discovery_tabular.py +15 -0
  262. tests/test_crca_core_estimate_dowhy.py +36 -0
  263. tests/test_crca_core_identify.py +18 -0
  264. tests/test_crca_core_intervention_design.py +36 -0
  265. tests/test_crca_core_linear_gaussian_scm.py +69 -0
  266. tests/test_crca_core_spec.py +25 -0
  267. tests/test_crca_core_timeseries_pcmci.py +15 -0
  268. tests/test_crca_llm_coauthor.py +12 -0
  269. tests/test_crca_llm_orchestrator.py +80 -0
  270. tests/test_hybrid_agent_llm_enhanced.py +556 -0
  271. tests/test_image_annotation_demo.py +376 -0
  272. tests/test_image_annotation_operational.py +408 -0
  273. tests/test_image_annotation_unit.py +551 -0
  274. tests/test_training_moe.py +13 -0
  275. training/__init__.py +42 -0
  276. training/datasets.py +140 -0
  277. training/deepspeed_zero2_0_5b.json +22 -0
  278. training/deepspeed_zero2_1_5b.json +22 -0
  279. training/deepspeed_zero3_0_5b.json +28 -0
  280. training/deepspeed_zero3_14b.json +28 -0
  281. training/deepspeed_zero3_h100_3gpu.json +20 -0
  282. training/deepspeed_zero3_offload.json +28 -0
  283. training/eval.py +92 -0
  284. training/finetune.py +516 -0
  285. training/public_datasets.py +89 -0
  286. training_data/react_train.jsonl +7473 -0
  287. utils/agent_discovery.py +311 -0
  288. utils/batch_processor.py +317 -0
  289. utils/conversation.py +78 -0
  290. utils/edit_distance.py +118 -0
  291. utils/formatter.py +33 -0
  292. utils/graph_reasoner.py +530 -0
  293. utils/rate_limiter.py +283 -0
  294. utils/router.py +2 -2
  295. utils/tool_discovery.py +307 -0
  296. webui/__init__.py +10 -0
  297. webui/app.py +229 -0
  298. webui/config.py +104 -0
  299. webui/static/css/style.css +332 -0
  300. webui/static/js/main.js +284 -0
  301. webui/templates/index.html +42 -0
  302. tests/test_crca_excel.py +0 -166
  303. tests/test_data_broker.py +0 -424
  304. tests/test_palantir.py +0 -349
  305. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
  306. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
training/datasets.py ADDED
@@ -0,0 +1,140 @@
1
+ """Dataset assembly for ReAct training traces."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import Dict, Iterable, List, Optional, Sequence
9
+
10
+ from crca_reasoning.types import LRMPlanResult
11
+
12
+
13
+ @dataclass
14
+ class ReActExample:
15
+ prompt: str
16
+ response: str
17
+ tags: Dict[str, str]
18
+ refusal: bool = False
19
+
20
+
21
+ def plan_result_to_examples(plan: LRMPlanResult) -> List[ReActExample]:
22
+ examples: List[ReActExample] = []
23
+ for cycle in plan.cycle_traces:
24
+ prompt = cycle.reasoning
25
+ response = ""
26
+ if cycle.actions:
27
+ response += "Actions:\n"
28
+ for act in cycle.actions:
29
+ response += f"- {act.tool_name}: {act.payload}\n"
30
+ if cycle.critique:
31
+ response += f"Critique: {cycle.critique}\n"
32
+ examples.append(
33
+ ReActExample(
34
+ prompt=prompt,
35
+ response=response,
36
+ tags={"type": "react_cycle"},
37
+ refusal=any(obs.refusal is not None for obs in cycle.observations),
38
+ )
39
+ )
40
+ if plan.rationale_trace and plan.rationale_trace.steps:
41
+ examples.append(
42
+ ReActExample(
43
+ prompt="RationaleTrace",
44
+ response="\n".join(plan.rationale_trace.steps),
45
+ tags={"type": "rationale_trace"},
46
+ refusal=False,
47
+ )
48
+ )
49
+ return examples
50
+
51
+
52
+ def load_plan_results(paths: Sequence[Path]) -> List[LRMPlanResult]:
53
+ """Load LRMPlanResult objects from JSON or JSONL files."""
54
+ results: List[LRMPlanResult] = []
55
+ for path in paths:
56
+ if not path.exists():
57
+ raise FileNotFoundError(f"Trace file not found: {path}")
58
+ if path.suffix.lower() == ".jsonl":
59
+ with path.open("r", encoding="utf-8") as f:
60
+ for line in f:
61
+ line = line.strip()
62
+ if not line:
63
+ continue
64
+ payload = json.loads(line)
65
+ results.append(LRMPlanResult.model_validate(payload))
66
+ else:
67
+ payload = json.loads(path.read_text(encoding="utf-8"))
68
+ if isinstance(payload, list):
69
+ results.extend(LRMPlanResult.model_validate(item) for item in payload)
70
+ else:
71
+ results.append(LRMPlanResult.model_validate(payload))
72
+ return results
73
+
74
+
75
+ def examples_from_traces(paths: Sequence[Path]) -> List[ReActExample]:
76
+ """Load plan results from trace files and convert to ReActExamples."""
77
+ plans = load_plan_results(paths)
78
+ examples: List[ReActExample] = []
79
+ for plan in plans:
80
+ examples.extend(plan_result_to_examples(plan))
81
+ return examples
82
+
83
+
84
+ def normalize_text(text: str) -> str:
85
+ return " ".join(text.strip().split())
86
+
87
+
88
+ def filter_examples(
89
+ examples: Iterable[ReActExample],
90
+ *,
91
+ min_response_len: int = 1,
92
+ max_prompt_len: Optional[int] = None,
93
+ max_response_len: Optional[int] = None,
94
+ ) -> List[ReActExample]:
95
+ filtered: List[ReActExample] = []
96
+ for ex in examples:
97
+ prompt = normalize_text(ex.prompt)
98
+ response = normalize_text(ex.response)
99
+ if len(response) < min_response_len:
100
+ continue
101
+ if max_prompt_len is not None and len(prompt) > max_prompt_len:
102
+ continue
103
+ if max_response_len is not None and len(response) > max_response_len:
104
+ continue
105
+ filtered.append(
106
+ ReActExample(
107
+ prompt=prompt,
108
+ response=response,
109
+ tags=dict(ex.tags),
110
+ refusal=ex.refusal,
111
+ )
112
+ )
113
+ return filtered
114
+
115
+
116
+ def merge_examples(
117
+ *,
118
+ internal_examples: Iterable[ReActExample],
119
+ public_examples: Iterable[ReActExample],
120
+ max_internal: Optional[int] = None,
121
+ max_public: Optional[int] = None,
122
+ ) -> List[ReActExample]:
123
+ merged: List[ReActExample] = []
124
+ if max_internal is None:
125
+ merged.extend(list(internal_examples))
126
+ else:
127
+ merged.extend(list(internal_examples)[: max_internal])
128
+ if max_public is None:
129
+ merged.extend(list(public_examples))
130
+ else:
131
+ merged.extend(list(public_examples)[: max_public])
132
+ return merged
133
+
134
+
135
+ def save_jsonl(examples: Iterable[ReActExample], path: Path) -> None:
136
+ path.parent.mkdir(parents=True, exist_ok=True)
137
+ with path.open("w", encoding="utf-8") as f:
138
+ for ex in examples:
139
+ f.write(json.dumps(ex.__dict__, ensure_ascii=False) + "\n")
140
+
@@ -0,0 +1,22 @@
1
+ {
2
+ "fp16": {
3
+ "enabled": true
4
+ },
5
+ "bf16": {
6
+ "enabled": false
7
+ },
8
+ "zero_optimization": {
9
+ "stage": 2,
10
+ "offload_optimizer": {
11
+ "device": "cpu",
12
+ "pin_memory": false
13
+ },
14
+ "overlap_comm": true,
15
+ "contiguous_gradients": true,
16
+ "reduce_bucket_size": 15000000
17
+ },
18
+ "train_micro_batch_size_per_gpu": "auto",
19
+ "gradient_accumulation_steps": "auto",
20
+ "gradient_clipping": 1.0,
21
+ "zero_allow_untested_optimizer": true
22
+ }
@@ -0,0 +1,22 @@
1
+ {
2
+ "fp16": {
3
+ "enabled": true
4
+ },
5
+ "bf16": {
6
+ "enabled": false
7
+ },
8
+ "zero_optimization": {
9
+ "stage": 2,
10
+ "offload_optimizer": {
11
+ "device": "cpu",
12
+ "pin_memory": false
13
+ },
14
+ "overlap_comm": true,
15
+ "contiguous_gradients": true,
16
+ "reduce_bucket_size": 20000000
17
+ },
18
+ "train_micro_batch_size_per_gpu": "auto",
19
+ "gradient_accumulation_steps": "auto",
20
+ "gradient_clipping": 1.0,
21
+ "zero_allow_untested_optimizer": true
22
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "fp16": {
3
+ "enabled": true
4
+ },
5
+ "bf16": {
6
+ "enabled": false
7
+ },
8
+ "zero_optimization": {
9
+ "stage": 3,
10
+ "offload_param": {
11
+ "device": "cpu",
12
+ "pin_memory": false
13
+ },
14
+ "offload_optimizer": {
15
+ "device": "cpu",
16
+ "pin_memory": false
17
+ },
18
+ "overlap_comm": true,
19
+ "contiguous_gradients": true,
20
+ "reduce_bucket_size": 15000000,
21
+ "stage3_prefetch_bucket_size": 15000000,
22
+ "stage3_param_persistence_threshold": 500000
23
+ },
24
+ "train_micro_batch_size_per_gpu": "auto",
25
+ "gradient_accumulation_steps": "auto",
26
+ "gradient_clipping": 1.0,
27
+ "zero_allow_untested_optimizer": true
28
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "fp16": {
3
+ "enabled": false
4
+ },
5
+ "bf16": {
6
+ "enabled": true
7
+ },
8
+ "zero_optimization": {
9
+ "stage": 3,
10
+ "offload_param": {
11
+ "device": "cpu",
12
+ "pin_memory": false
13
+ },
14
+ "offload_optimizer": {
15
+ "device": "cpu",
16
+ "pin_memory": false
17
+ },
18
+ "overlap_comm": true,
19
+ "contiguous_gradients": true,
20
+ "reduce_bucket_size": 100000000,
21
+ "stage3_prefetch_bucket_size": 100000000,
22
+ "stage3_param_persistence_threshold": 2000000
23
+ },
24
+ "train_micro_batch_size_per_gpu": "auto",
25
+ "gradient_accumulation_steps": "auto",
26
+ "gradient_clipping": 1.0,
27
+ "zero_allow_untested_optimizer": true
28
+ }
@@ -0,0 +1,20 @@
1
+ {
2
+ "fp16": {
3
+ "enabled": false
4
+ },
5
+ "bf16": {
6
+ "enabled": true
7
+ },
8
+ "zero_optimization": {
9
+ "stage": 3,
10
+ "overlap_comm": true,
11
+ "contiguous_gradients": true,
12
+ "reduce_bucket_size": 50000000,
13
+ "stage3_prefetch_bucket_size": 50000000,
14
+ "stage3_param_persistence_threshold": 1000000
15
+ },
16
+ "train_micro_batch_size_per_gpu": "auto",
17
+ "gradient_accumulation_steps": "auto",
18
+ "gradient_clipping": 1.0,
19
+ "zero_allow_untested_optimizer": true
20
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "fp16": {
3
+ "enabled": false
4
+ },
5
+ "bf16": {
6
+ "enabled": true
7
+ },
8
+ "zero_optimization": {
9
+ "stage": 3,
10
+ "offload_param": {
11
+ "device": "cpu",
12
+ "pin_memory": false
13
+ },
14
+ "offload_optimizer": {
15
+ "device": "cpu",
16
+ "pin_memory": false
17
+ },
18
+ "overlap_comm": true,
19
+ "contiguous_gradients": true,
20
+ "reduce_bucket_size": 50000000,
21
+ "stage3_prefetch_bucket_size": 50000000,
22
+ "stage3_param_persistence_threshold": 1000000
23
+ },
24
+ "train_micro_batch_size_per_gpu": "auto",
25
+ "gradient_accumulation_steps": "auto",
26
+ "gradient_clipping": 1.0,
27
+ "zero_allow_untested_optimizer": true
28
+ }
training/eval.py ADDED
@@ -0,0 +1,92 @@
1
+ """Mixed evaluation harness for LRM."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import Dict, List
9
+
10
+ from crca_core.benchmarks.synthetic_scm import generate_latent_confounder_graph
11
+ from crca_core.identify import identify_effect
12
+ from crca_core.core.lifecycle import lock_spec
13
+ from crca_core.models.spec import CausalGraphSpec, DraftSpec, EdgeSpec, NodeSpec, RoleSpec
14
+ from crca_reasoning.types import LRMPlanResult
15
+
16
+
17
+ @dataclass
18
+ class EvalConfig:
19
+ output_path: str = "eval_results/lrm_eval.json"
20
+
21
+
22
+ def eval_react_metrics(plans: List[LRMPlanResult]) -> Dict[str, float]:
23
+ if not plans:
24
+ return {
25
+ "cycle_convergence": 0.0,
26
+ "refusal_rate": 0.0,
27
+ "refusal_structured_rate": 0.0,
28
+ "tool_call_coverage": 0.0,
29
+ }
30
+ cycles = [len(p.cycle_traces) for p in plans]
31
+ refusal_count = sum(len(p.refusals) for p in plans)
32
+ refusal_structured = sum(
33
+ 1
34
+ for p in plans
35
+ for r in p.refusals
36
+ if r.reason_codes and r.message
37
+ )
38
+ action_count = 0
39
+ observed_actions = 0
40
+ for plan in plans:
41
+ for cycle in plan.cycle_traces:
42
+ action_count += len(cycle.actions)
43
+ observed_actions += sum(
44
+ 1
45
+ for act in cycle.actions
46
+ if any(obs.tool_name == act.tool_name for obs in cycle.observations)
47
+ )
48
+ tool_call_coverage = (observed_actions / float(action_count)) if action_count else 0.0
49
+ return {
50
+ "cycle_convergence": sum(1 for c in cycles if c == 1) / float(len(cycles)),
51
+ "refusal_rate": refusal_count / float(len(plans)),
52
+ "refusal_structured_rate": refusal_structured / float(max(1, refusal_count)),
53
+ "tool_call_coverage": tool_call_coverage,
54
+ }
55
+
56
+
57
+ def eval_causal_identification() -> Dict[str, str]:
58
+ # Identifiable chain
59
+ draft = DraftSpec(
60
+ graph=CausalGraphSpec(
61
+ nodes=[NodeSpec(name="X"), NodeSpec(name="Y")],
62
+ edges=[EdgeSpec(source="X", target="Y")],
63
+ ),
64
+ roles=RoleSpec(treatments=["X"], outcomes=["Y"]),
65
+ )
66
+ locked = lock_spec(draft, approvals=["human"])
67
+ ident_chain = identify_effect(locked_spec=locked, treatment="X", outcome="Y")
68
+
69
+ # Latent confounding case
70
+ latent_graph = generate_latent_confounder_graph()
71
+ draft_latent = DraftSpec(
72
+ graph=latent_graph,
73
+ roles=RoleSpec(treatments=["X"], outcomes=["Y"]),
74
+ )
75
+ locked_latent = lock_spec(draft_latent, approvals=["human"])
76
+ ident_latent = identify_effect(locked_spec=locked_latent, treatment="X", outcome="Y")
77
+
78
+ return {
79
+ "ident_chain": ident_chain.result_type,
80
+ "latent_case": ident_latent.result_type,
81
+ }
82
+
83
+
84
+ def run_eval(plans: List[LRMPlanResult], cfg: EvalConfig) -> None:
85
+ results = {
86
+ "react_metrics": eval_react_metrics(plans),
87
+ "causal_identification": eval_causal_identification(),
88
+ }
89
+ out_path = Path(cfg.output_path)
90
+ out_path.parent.mkdir(parents=True, exist_ok=True)
91
+ out_path.write_text(json.dumps(results, indent=2), encoding="utf-8")
92
+