crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. CRCA.py +172 -7
  2. MODEL_CARD.md +53 -0
  3. PKG-INFO +8 -2
  4. RELEASE_NOTES.md +17 -0
  5. STABILITY.md +19 -0
  6. architecture/hybrid/consistency_engine.py +362 -0
  7. architecture/hybrid/conversation_manager.py +421 -0
  8. architecture/hybrid/explanation_generator.py +452 -0
  9. architecture/hybrid/few_shot_learner.py +533 -0
  10. architecture/hybrid/graph_compressor.py +286 -0
  11. architecture/hybrid/hybrid_agent.py +4398 -0
  12. architecture/hybrid/language_compiler.py +623 -0
  13. architecture/hybrid/main,py +0 -0
  14. architecture/hybrid/reasoning_tracker.py +322 -0
  15. architecture/hybrid/self_verifier.py +524 -0
  16. architecture/hybrid/task_decomposer.py +567 -0
  17. architecture/hybrid/text_corrector.py +341 -0
  18. benchmark_results/crca_core_benchmarks.json +178 -0
  19. branches/crca_sd/crca_sd_realtime.py +6 -2
  20. branches/general_agent/__init__.py +102 -0
  21. branches/general_agent/general_agent.py +1400 -0
  22. branches/general_agent/personality.py +169 -0
  23. branches/general_agent/utils/__init__.py +19 -0
  24. branches/general_agent/utils/prompt_builder.py +170 -0
  25. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
  26. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
  27. crca_core/__init__.py +35 -0
  28. crca_core/benchmarks/__init__.py +14 -0
  29. crca_core/benchmarks/synthetic_scm.py +103 -0
  30. crca_core/core/__init__.py +23 -0
  31. crca_core/core/api.py +120 -0
  32. crca_core/core/estimate.py +208 -0
  33. crca_core/core/godclass.py +72 -0
  34. crca_core/core/intervention_design.py +174 -0
  35. crca_core/core/lifecycle.py +48 -0
  36. crca_core/discovery/__init__.py +9 -0
  37. crca_core/discovery/tabular.py +193 -0
  38. crca_core/identify/__init__.py +171 -0
  39. crca_core/identify/backdoor.py +39 -0
  40. crca_core/identify/frontdoor.py +48 -0
  41. crca_core/identify/graph.py +106 -0
  42. crca_core/identify/id_algorithm.py +43 -0
  43. crca_core/identify/iv.py +48 -0
  44. crca_core/models/__init__.py +67 -0
  45. crca_core/models/provenance.py +56 -0
  46. crca_core/models/refusal.py +39 -0
  47. crca_core/models/result.py +83 -0
  48. crca_core/models/spec.py +151 -0
  49. crca_core/models/validation.py +68 -0
  50. crca_core/scm/__init__.py +9 -0
  51. crca_core/scm/linear_gaussian.py +198 -0
  52. crca_core/timeseries/__init__.py +6 -0
  53. crca_core/timeseries/pcmci.py +181 -0
  54. crca_llm/__init__.py +12 -0
  55. crca_llm/client.py +85 -0
  56. crca_llm/coauthor.py +118 -0
  57. crca_llm/orchestrator.py +289 -0
  58. crca_llm/types.py +21 -0
  59. crca_reasoning/__init__.py +16 -0
  60. crca_reasoning/critique.py +54 -0
  61. crca_reasoning/godclass.py +206 -0
  62. crca_reasoning/memory.py +24 -0
  63. crca_reasoning/rationale.py +10 -0
  64. crca_reasoning/react_controller.py +81 -0
  65. crca_reasoning/tool_router.py +97 -0
  66. crca_reasoning/types.py +40 -0
  67. crca_sd/__init__.py +15 -0
  68. crca_sd/crca_sd_core.py +2 -0
  69. crca_sd/crca_sd_governance.py +2 -0
  70. crca_sd/crca_sd_mpc.py +2 -0
  71. crca_sd/crca_sd_realtime.py +2 -0
  72. crca_sd/crca_sd_tui.py +2 -0
  73. cuda-keyring_1.1-1_all.deb +0 -0
  74. cuda-keyring_1.1-1_all.deb.1 +0 -0
  75. docs/IMAGE_ANNOTATION_USAGE.md +539 -0
  76. docs/INSTALL_DEEPSPEED.md +125 -0
  77. docs/api/branches/crca-cg.md +19 -0
  78. docs/api/branches/crca-q.md +27 -0
  79. docs/api/branches/crca-sd.md +37 -0
  80. docs/api/branches/general-agent.md +24 -0
  81. docs/api/branches/overview.md +19 -0
  82. docs/api/crca/agent-methods.md +62 -0
  83. docs/api/crca/operations.md +79 -0
  84. docs/api/crca/overview.md +32 -0
  85. docs/api/image-annotation/engine.md +52 -0
  86. docs/api/image-annotation/overview.md +17 -0
  87. docs/api/schemas/annotation.md +34 -0
  88. docs/api/schemas/core-schemas.md +82 -0
  89. docs/api/schemas/overview.md +32 -0
  90. docs/api/schemas/policy.md +30 -0
  91. docs/api/utils/conversation.md +22 -0
  92. docs/api/utils/graph-reasoner.md +32 -0
  93. docs/api/utils/overview.md +21 -0
  94. docs/api/utils/router.md +19 -0
  95. docs/api/utils/utilities.md +97 -0
  96. docs/architecture/causal-graphs.md +41 -0
  97. docs/architecture/data-flow.md +29 -0
  98. docs/architecture/design-principles.md +33 -0
  99. docs/architecture/hybrid-agent/components.md +38 -0
  100. docs/architecture/hybrid-agent/consistency.md +26 -0
  101. docs/architecture/hybrid-agent/overview.md +44 -0
  102. docs/architecture/hybrid-agent/reasoning.md +22 -0
  103. docs/architecture/llm-integration.md +26 -0
  104. docs/architecture/modular-structure.md +37 -0
  105. docs/architecture/overview.md +69 -0
  106. docs/architecture/policy-engine-arch.md +29 -0
  107. docs/branches/crca-cg/corposwarm.md +39 -0
  108. docs/branches/crca-cg/esg-scoring.md +30 -0
  109. docs/branches/crca-cg/multi-agent.md +35 -0
  110. docs/branches/crca-cg/overview.md +40 -0
  111. docs/branches/crca-q/alternative-data.md +55 -0
  112. docs/branches/crca-q/architecture.md +71 -0
  113. docs/branches/crca-q/backtesting.md +45 -0
  114. docs/branches/crca-q/causal-engine.md +33 -0
  115. docs/branches/crca-q/execution.md +39 -0
  116. docs/branches/crca-q/market-data.md +60 -0
  117. docs/branches/crca-q/overview.md +58 -0
  118. docs/branches/crca-q/philosophy.md +60 -0
  119. docs/branches/crca-q/portfolio-optimization.md +66 -0
  120. docs/branches/crca-q/risk-management.md +102 -0
  121. docs/branches/crca-q/setup.md +65 -0
  122. docs/branches/crca-q/signal-generation.md +61 -0
  123. docs/branches/crca-q/signal-validation.md +43 -0
  124. docs/branches/crca-sd/core.md +84 -0
  125. docs/branches/crca-sd/governance.md +53 -0
  126. docs/branches/crca-sd/mpc-solver.md +65 -0
  127. docs/branches/crca-sd/overview.md +59 -0
  128. docs/branches/crca-sd/realtime.md +28 -0
  129. docs/branches/crca-sd/tui.md +20 -0
  130. docs/branches/general-agent/overview.md +37 -0
  131. docs/branches/general-agent/personality.md +36 -0
  132. docs/branches/general-agent/prompt-builder.md +30 -0
  133. docs/changelog/index.md +79 -0
  134. docs/contributing/code-style.md +69 -0
  135. docs/contributing/documentation.md +43 -0
  136. docs/contributing/overview.md +29 -0
  137. docs/contributing/testing.md +29 -0
  138. docs/core/crcagent/async-operations.md +65 -0
  139. docs/core/crcagent/automatic-extraction.md +107 -0
  140. docs/core/crcagent/batch-prediction.md +80 -0
  141. docs/core/crcagent/bayesian-inference.md +60 -0
  142. docs/core/crcagent/causal-graph.md +92 -0
  143. docs/core/crcagent/counterfactuals.md +96 -0
  144. docs/core/crcagent/deterministic-simulation.md +78 -0
  145. docs/core/crcagent/dual-mode-operation.md +82 -0
  146. docs/core/crcagent/initialization.md +88 -0
  147. docs/core/crcagent/optimization.md +65 -0
  148. docs/core/crcagent/overview.md +63 -0
  149. docs/core/crcagent/time-series.md +57 -0
  150. docs/core/schemas/annotation.md +30 -0
  151. docs/core/schemas/core-schemas.md +82 -0
  152. docs/core/schemas/overview.md +30 -0
  153. docs/core/schemas/policy.md +41 -0
  154. docs/core/templates/base-agent.md +31 -0
  155. docs/core/templates/feature-mixins.md +31 -0
  156. docs/core/templates/overview.md +29 -0
  157. docs/core/templates/templates-guide.md +75 -0
  158. docs/core/tools/mcp-client.md +34 -0
  159. docs/core/tools/overview.md +24 -0
  160. docs/core/utils/conversation.md +27 -0
  161. docs/core/utils/graph-reasoner.md +29 -0
  162. docs/core/utils/overview.md +27 -0
  163. docs/core/utils/router.md +27 -0
  164. docs/core/utils/utilities.md +97 -0
  165. docs/css/custom.css +84 -0
  166. docs/examples/basic-usage.md +57 -0
  167. docs/examples/general-agent/general-agent-examples.md +50 -0
  168. docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
  169. docs/examples/image-annotation/image-annotation-examples.md +54 -0
  170. docs/examples/integration/integration-examples.md +58 -0
  171. docs/examples/overview.md +37 -0
  172. docs/examples/trading/trading-examples.md +46 -0
  173. docs/features/causal-reasoning/advanced-topics.md +101 -0
  174. docs/features/causal-reasoning/counterfactuals.md +43 -0
  175. docs/features/causal-reasoning/do-calculus.md +50 -0
  176. docs/features/causal-reasoning/overview.md +47 -0
  177. docs/features/causal-reasoning/structural-models.md +52 -0
  178. docs/features/hybrid-agent/advanced-components.md +55 -0
  179. docs/features/hybrid-agent/core-components.md +64 -0
  180. docs/features/hybrid-agent/overview.md +34 -0
  181. docs/features/image-annotation/engine.md +82 -0
  182. docs/features/image-annotation/features.md +113 -0
  183. docs/features/image-annotation/integration.md +75 -0
  184. docs/features/image-annotation/overview.md +53 -0
  185. docs/features/image-annotation/quickstart.md +73 -0
  186. docs/features/policy-engine/doctrine-ledger.md +105 -0
  187. docs/features/policy-engine/monitoring.md +44 -0
  188. docs/features/policy-engine/mpc-control.md +89 -0
  189. docs/features/policy-engine/overview.md +46 -0
  190. docs/getting-started/configuration.md +225 -0
  191. docs/getting-started/first-agent.md +164 -0
  192. docs/getting-started/installation.md +144 -0
  193. docs/getting-started/quickstart.md +137 -0
  194. docs/index.md +118 -0
  195. docs/js/mathjax.js +13 -0
  196. docs/lrm/discovery_proof_notes.md +25 -0
  197. docs/lrm/finetune_full.md +83 -0
  198. docs/lrm/math_appendix.md +120 -0
  199. docs/lrm/overview.md +32 -0
  200. docs/mkdocs.yml +238 -0
  201. docs/stylesheets/extra.css +21 -0
  202. docs_generated/crca_core/CounterfactualResult.md +12 -0
  203. docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
  204. docs_generated/crca_core/DraftSpec.md +13 -0
  205. docs_generated/crca_core/EstimateResult.md +13 -0
  206. docs_generated/crca_core/IdentificationResult.md +17 -0
  207. docs_generated/crca_core/InterventionDesignResult.md +12 -0
  208. docs_generated/crca_core/LockedSpec.md +15 -0
  209. docs_generated/crca_core/RefusalResult.md +12 -0
  210. docs_generated/crca_core/ValidationReport.md +9 -0
  211. docs_generated/crca_core/index.md +13 -0
  212. examples/general_agent_example.py +277 -0
  213. examples/general_agent_quickstart.py +202 -0
  214. examples/general_agent_simple.py +92 -0
  215. examples/hybrid_agent_auto_extraction.py +84 -0
  216. examples/hybrid_agent_dictionary_demo.py +104 -0
  217. examples/hybrid_agent_enhanced.py +179 -0
  218. examples/hybrid_agent_general_knowledge.py +107 -0
  219. examples/image_annotation_quickstart.py +328 -0
  220. examples/test_hybrid_fixes.py +77 -0
  221. image_annotation/__init__.py +27 -0
  222. image_annotation/annotation_engine.py +2593 -0
  223. install_cuda_wsl2.sh +59 -0
  224. install_deepspeed.sh +56 -0
  225. install_deepspeed_simple.sh +87 -0
  226. mkdocs.yml +252 -0
  227. ollama/Modelfile +8 -0
  228. prompts/__init__.py +2 -1
  229. prompts/default_crca.py +9 -1
  230. prompts/general_agent.py +227 -0
  231. prompts/image_annotation.py +56 -0
  232. pyproject.toml +17 -2
  233. requirements-docs.txt +10 -0
  234. requirements.txt +21 -2
  235. schemas/__init__.py +26 -1
  236. schemas/annotation.py +222 -0
  237. schemas/conversation.py +193 -0
  238. schemas/hybrid.py +211 -0
  239. schemas/reasoning.py +276 -0
  240. schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
  241. schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
  242. schemas_export/crca_core/DraftSpec.schema.json +635 -0
  243. schemas_export/crca_core/EstimateResult.schema.json +113 -0
  244. schemas_export/crca_core/IdentificationResult.schema.json +145 -0
  245. schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
  246. schemas_export/crca_core/LockedSpec.schema.json +646 -0
  247. schemas_export/crca_core/RefusalResult.schema.json +90 -0
  248. schemas_export/crca_core/ValidationReport.schema.json +62 -0
  249. scripts/build_lrm_dataset.py +80 -0
  250. scripts/export_crca_core_schemas.py +54 -0
  251. scripts/export_hf_lrm.py +37 -0
  252. scripts/export_ollama_gguf.py +45 -0
  253. scripts/generate_changelog.py +157 -0
  254. scripts/generate_crca_core_docs_from_schemas.py +86 -0
  255. scripts/run_crca_core_benchmarks.py +163 -0
  256. scripts/run_full_finetune.py +198 -0
  257. scripts/run_lrm_eval.py +31 -0
  258. templates/graph_management.py +29 -0
  259. tests/conftest.py +9 -0
  260. tests/test_core.py +2 -3
  261. tests/test_crca_core_discovery_tabular.py +15 -0
  262. tests/test_crca_core_estimate_dowhy.py +36 -0
  263. tests/test_crca_core_identify.py +18 -0
  264. tests/test_crca_core_intervention_design.py +36 -0
  265. tests/test_crca_core_linear_gaussian_scm.py +69 -0
  266. tests/test_crca_core_spec.py +25 -0
  267. tests/test_crca_core_timeseries_pcmci.py +15 -0
  268. tests/test_crca_llm_coauthor.py +12 -0
  269. tests/test_crca_llm_orchestrator.py +80 -0
  270. tests/test_hybrid_agent_llm_enhanced.py +556 -0
  271. tests/test_image_annotation_demo.py +376 -0
  272. tests/test_image_annotation_operational.py +408 -0
  273. tests/test_image_annotation_unit.py +551 -0
  274. tests/test_training_moe.py +13 -0
  275. training/__init__.py +42 -0
  276. training/datasets.py +140 -0
  277. training/deepspeed_zero2_0_5b.json +22 -0
  278. training/deepspeed_zero2_1_5b.json +22 -0
  279. training/deepspeed_zero3_0_5b.json +28 -0
  280. training/deepspeed_zero3_14b.json +28 -0
  281. training/deepspeed_zero3_h100_3gpu.json +20 -0
  282. training/deepspeed_zero3_offload.json +28 -0
  283. training/eval.py +92 -0
  284. training/finetune.py +516 -0
  285. training/public_datasets.py +89 -0
  286. training_data/react_train.jsonl +7473 -0
  287. utils/agent_discovery.py +311 -0
  288. utils/batch_processor.py +317 -0
  289. utils/conversation.py +78 -0
  290. utils/edit_distance.py +118 -0
  291. utils/formatter.py +33 -0
  292. utils/graph_reasoner.py +530 -0
  293. utils/rate_limiter.py +283 -0
  294. utils/router.py +2 -2
  295. utils/tool_discovery.py +307 -0
  296. webui/__init__.py +10 -0
  297. webui/app.py +229 -0
  298. webui/config.py +104 -0
  299. webui/static/css/style.css +332 -0
  300. webui/static/js/main.js +284 -0
  301. webui/templates/index.html +42 -0
  302. tests/test_crca_excel.py +0 -166
  303. tests/test_data_broker.py +0 -424
  304. tests/test_palantir.py +0 -349
  305. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
  306. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,90 @@
1
+ {
2
+ "$defs": {
3
+ "RefusalChecklistItem": {
4
+ "description": "A single required input/action needed to proceed.",
5
+ "properties": {
6
+ "item": {
7
+ "minLength": 1,
8
+ "title": "Item",
9
+ "type": "string"
10
+ },
11
+ "rationale": {
12
+ "minLength": 1,
13
+ "title": "Rationale",
14
+ "type": "string"
15
+ }
16
+ },
17
+ "required": [
18
+ "item",
19
+ "rationale"
20
+ ],
21
+ "title": "RefusalChecklistItem",
22
+ "type": "object"
23
+ },
24
+ "RefusalReasonCode": {
25
+ "description": "Stable reason codes for refusal-first behavior.",
26
+ "enum": [
27
+ "SPEC_NOT_LOCKED",
28
+ "NO_SCM_FOR_COUNTERFACTUAL",
29
+ "NOT_IDENTIFIABLE",
30
+ "TIME_INDEX_INVALID",
31
+ "ASSUMPTIONS_UNDECLARED",
32
+ "INPUT_INVALID",
33
+ "UNSUPPORTED_OPERATION"
34
+ ],
35
+ "title": "RefusalReasonCode",
36
+ "type": "string"
37
+ }
38
+ },
39
+ "description": "Structured refusal (no numeric causal output).",
40
+ "properties": {
41
+ "result_type": {
42
+ "default": "Refusal",
43
+ "title": "Result Type",
44
+ "type": "string"
45
+ },
46
+ "reason_codes": {
47
+ "items": {
48
+ "$ref": "#/$defs/RefusalReasonCode"
49
+ },
50
+ "title": "Reason Codes",
51
+ "type": "array"
52
+ },
53
+ "message": {
54
+ "minLength": 1,
55
+ "title": "Message",
56
+ "type": "string"
57
+ },
58
+ "checklist": {
59
+ "items": {
60
+ "$ref": "#/$defs/RefusalChecklistItem"
61
+ },
62
+ "title": "Checklist",
63
+ "type": "array"
64
+ },
65
+ "suggested_next_steps": {
66
+ "items": {
67
+ "type": "string"
68
+ },
69
+ "title": "Suggested Next Steps",
70
+ "type": "array"
71
+ },
72
+ "details": {
73
+ "anyOf": [
74
+ {
75
+ "type": "string"
76
+ },
77
+ {
78
+ "type": "null"
79
+ }
80
+ ],
81
+ "default": null,
82
+ "title": "Details"
83
+ }
84
+ },
85
+ "required": [
86
+ "message"
87
+ ],
88
+ "title": "RefusalResult",
89
+ "type": "object"
90
+ }
@@ -0,0 +1,62 @@
1
+ {
2
+ "$defs": {
3
+ "ValidationIssue": {
4
+ "properties": {
5
+ "code": {
6
+ "minLength": 1,
7
+ "title": "Code",
8
+ "type": "string"
9
+ },
10
+ "message": {
11
+ "minLength": 1,
12
+ "title": "Message",
13
+ "type": "string"
14
+ },
15
+ "path": {
16
+ "anyOf": [
17
+ {
18
+ "type": "string"
19
+ },
20
+ {
21
+ "type": "null"
22
+ }
23
+ ],
24
+ "default": null,
25
+ "title": "Path"
26
+ }
27
+ },
28
+ "required": [
29
+ "code",
30
+ "message"
31
+ ],
32
+ "title": "ValidationIssue",
33
+ "type": "object"
34
+ }
35
+ },
36
+ "description": "Returned by `validate_spec`.",
37
+ "properties": {
38
+ "ok": {
39
+ "title": "Ok",
40
+ "type": "boolean"
41
+ },
42
+ "errors": {
43
+ "items": {
44
+ "$ref": "#/$defs/ValidationIssue"
45
+ },
46
+ "title": "Errors",
47
+ "type": "array"
48
+ },
49
+ "warnings": {
50
+ "items": {
51
+ "$ref": "#/$defs/ValidationIssue"
52
+ },
53
+ "title": "Warnings",
54
+ "type": "array"
55
+ }
56
+ },
57
+ "required": [
58
+ "ok"
59
+ ],
60
+ "title": "ValidationReport",
61
+ "type": "object"
62
+ }
@@ -0,0 +1,80 @@
1
+ """Build a hybrid LRM dataset from internal traces and public datasets."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+ from typing import List
10
+
11
+ REPO_ROOT = Path(__file__).resolve().parents[1]
12
+ if str(REPO_ROOT) not in sys.path:
13
+ sys.path.insert(0, str(REPO_ROOT))
14
+
15
+ from training.datasets import (
16
+ ReActExample,
17
+ examples_from_traces,
18
+ filter_examples,
19
+ merge_examples,
20
+ save_jsonl,
21
+ )
22
+ from training.public_datasets import PublicDatasetConfig, default_public_configs, load_public_examples
23
+
24
+
25
+ def _load_public_config(path: Path) -> List[PublicDatasetConfig]:
26
+ payload = json.loads(path.read_text(encoding="utf-8"))
27
+ configs: List[PublicDatasetConfig] = []
28
+ for item in payload:
29
+ configs.append(PublicDatasetConfig(**item))
30
+ return configs
31
+
32
+
33
+ def main() -> None:
34
+ parser = argparse.ArgumentParser(description="Build hybrid LRM dataset JSONL.")
35
+ parser.add_argument("--trace-jsonl", action="append", default=[], help="Path to LRM plan trace JSONL.")
36
+ parser.add_argument("--public-config", type=str, default="", help="Path to public dataset config JSON.")
37
+ parser.add_argument("--output", type=str, required=True, help="Output JSONL path.")
38
+ parser.add_argument("--max-internal", type=int, default=None, help="Max internal examples to include.")
39
+ parser.add_argument("--max-public", type=int, default=None, help="Max public examples to include.")
40
+ parser.add_argument("--min-response-len", type=int, default=1)
41
+ parser.add_argument("--max-prompt-len", type=int, default=None)
42
+ parser.add_argument("--max-response-len", type=int, default=None)
43
+ args = parser.parse_args()
44
+
45
+ internal_examples: List[ReActExample] = []
46
+ if args.trace_jsonl:
47
+ trace_paths = [Path(p) for p in args.trace_jsonl]
48
+ internal_examples = examples_from_traces(trace_paths)
49
+
50
+ if args.public_config:
51
+ public_configs = _load_public_config(Path(args.public_config))
52
+ else:
53
+ public_configs = default_public_configs()
54
+ public_examples = load_public_examples(public_configs)
55
+
56
+ internal_examples = filter_examples(
57
+ internal_examples,
58
+ min_response_len=args.min_response_len,
59
+ max_prompt_len=args.max_prompt_len,
60
+ max_response_len=args.max_response_len,
61
+ )
62
+ public_examples = filter_examples(
63
+ public_examples,
64
+ min_response_len=args.min_response_len,
65
+ max_prompt_len=args.max_prompt_len,
66
+ max_response_len=args.max_response_len,
67
+ )
68
+
69
+ merged = merge_examples(
70
+ internal_examples=internal_examples,
71
+ public_examples=public_examples,
72
+ max_internal=args.max_internal,
73
+ max_public=args.max_public,
74
+ )
75
+
76
+ save_jsonl(merged, Path(args.output))
77
+
78
+
79
+ if __name__ == "__main__":
80
+ main()
@@ -0,0 +1,54 @@
1
+ """Export `crca_core` Pydantic JSON schemas for downstream tooling.
2
+
3
+ This supports the "structured object only" requirement: downstream systems can
4
+ validate inputs/outputs against stable schemas.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ REPO_ROOT = Path(__file__).resolve().parents[1]
14
+ if str(REPO_ROOT) not in sys.path:
15
+ sys.path.insert(0, str(REPO_ROOT))
16
+
17
+ from crca_core.models.refusal import RefusalResult
18
+ from crca_core.models.result import (
19
+ CounterfactualResult,
20
+ DiscoveryHypothesisResult,
21
+ EstimateResult,
22
+ IdentificationResult,
23
+ InterventionDesignResult,
24
+ ValidationReport,
25
+ )
26
+ from crca_core.models.spec import DraftSpec, LockedSpec
27
+
28
+
29
+ def main() -> None:
30
+ out_dir = Path(__file__).resolve().parents[1] / "schemas_export" / "crca_core"
31
+ out_dir.mkdir(parents=True, exist_ok=True)
32
+
33
+ models = [
34
+ ("DraftSpec", DraftSpec),
35
+ ("LockedSpec", LockedSpec),
36
+ ("ValidationReport", ValidationReport),
37
+ ("RefusalResult", RefusalResult),
38
+ ("DiscoveryHypothesisResult", DiscoveryHypothesisResult),
39
+ ("InterventionDesignResult", InterventionDesignResult),
40
+ ("CounterfactualResult", CounterfactualResult),
41
+ ("IdentificationResult", IdentificationResult),
42
+ ("EstimateResult", EstimateResult),
43
+ ]
44
+
45
+ for name, model in models:
46
+ schema = model.model_json_schema()
47
+ (out_dir / f"{name}.schema.json").write_text(json.dumps(schema, indent=2), encoding="utf-8")
48
+
49
+ print(f"Wrote {len(models)} schemas to {out_dir}")
50
+
51
+
52
+ if __name__ == "__main__":
53
+ main()
54
+
@@ -0,0 +1,37 @@
1
+ """Export finetuned LRM model for HuggingFace."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import shutil
7
+ from pathlib import Path
8
+
9
+
10
+ def main() -> None:
11
+ parser = argparse.ArgumentParser(description="Export finetuned LRM model to HF format.")
12
+ parser.add_argument("--checkpoint-dir", type=str, required=True, help="Path to finetuned checkpoint.")
13
+ parser.add_argument("--output-dir", type=str, required=True, help="Output directory for HF upload.")
14
+ parser.add_argument("--model-card", type=str, default="MODEL_CARD.md", help="Model card path.")
15
+ args = parser.parse_args()
16
+
17
+ try:
18
+ from transformers import AutoModelForCausalLM, AutoTokenizer # type: ignore
19
+ except Exception as exc:
20
+ raise RuntimeError("transformers is required to export model.") from exc
21
+
22
+ checkpoint_dir = Path(args.checkpoint_dir)
23
+ output_dir = Path(args.output_dir)
24
+ output_dir.mkdir(parents=True, exist_ok=True)
25
+
26
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint_dir)
27
+ model = AutoModelForCausalLM.from_pretrained(checkpoint_dir)
28
+ tokenizer.save_pretrained(output_dir)
29
+ model.save_pretrained(output_dir, safe_serialization=True)
30
+
31
+ card_path = Path(args.model_card)
32
+ if card_path.exists():
33
+ shutil.copy(card_path, output_dir / "README.md")
34
+
35
+
36
+ if __name__ == "__main__":
37
+ main()
@@ -0,0 +1,45 @@
1
+ """Export a finetuned HF model to GGUF for Ollama."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import os
7
+ import subprocess
8
+ from pathlib import Path
9
+
10
+
11
+ def main() -> None:
12
+ parser = argparse.ArgumentParser(description="Convert HF model to GGUF via llama.cpp.")
13
+ parser.add_argument("--checkpoint-dir", type=str, required=True, help="HF checkpoint directory.")
14
+ parser.add_argument("--output", type=str, required=True, help="Output GGUF path.")
15
+ parser.add_argument("--llama-cpp-dir", type=str, default="", help="Path to llama.cpp repo.")
16
+ args = parser.parse_args()
17
+
18
+ llama_cpp_dir = Path(args.llama_cpp_dir) if args.llama_cpp_dir else None
19
+ if llama_cpp_dir is None:
20
+ env_path = os.environ.get("LLAMA_CPP_DIR")
21
+ if env_path:
22
+ llama_cpp_dir = Path(env_path)
23
+ if llama_cpp_dir is None:
24
+ raise RuntimeError("Provide --llama-cpp-dir or set LLAMA_CPP_DIR.")
25
+
26
+ converter = llama_cpp_dir / "convert-hf-to-gguf.py"
27
+ if not converter.exists():
28
+ raise FileNotFoundError(f"Missing convert script: {converter}")
29
+
30
+ checkpoint_dir = Path(args.checkpoint_dir)
31
+ output_path = Path(args.output)
32
+ output_path.parent.mkdir(parents=True, exist_ok=True)
33
+
34
+ cmd = [
35
+ "python",
36
+ str(converter),
37
+ str(checkpoint_dir),
38
+ "--outfile",
39
+ str(output_path),
40
+ ]
41
+ subprocess.check_call(cmd)
42
+
43
+
44
+ if __name__ == "__main__":
45
+ main()
@@ -0,0 +1,157 @@
1
+ """
2
+ Generate changelog from git history and pyproject.toml.
3
+
4
+ This script generates a changelog in Keep a Changelog format from:
5
+ - Git tags (version numbers)
6
+ - Conventional commit messages
7
+ - README.md changelog section
8
+ - pyproject.toml version information
9
+ """
10
+
11
+ import re
12
+ import subprocess
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+ from typing import Dict, List, Optional, Tuple
16
+ import toml
17
+
18
+ try:
19
+ import git
20
+ GITPYTHON_AVAILABLE = True
21
+ except ImportError:
22
+ GITPYTHON_AVAILABLE = False
23
+
24
+
25
+ def get_version_from_pyproject() -> Optional[str]:
26
+ """Get version from pyproject.toml."""
27
+ try:
28
+ pyproject_path = Path(__file__).parent.parent / "pyproject.toml"
29
+ if pyproject_path.exists():
30
+ data = toml.load(pyproject_path)
31
+ return data.get("project", {}).get("version")
32
+ except Exception:
33
+ pass
34
+ return None
35
+
36
+
37
+ def get_git_tags() -> List[Tuple[str, str]]:
38
+ """Get git tags with dates."""
39
+ if not GITPYTHON_AVAILABLE:
40
+ return []
41
+
42
+ try:
43
+ repo = git.Repo(Path(__file__).parent.parent)
44
+ tags = []
45
+ for tag in repo.tags:
46
+ try:
47
+ commit = repo.commit(tag)
48
+ date = datetime.fromtimestamp(commit.committed_date)
49
+ tags.append((tag.name, date.strftime("%Y-%m-%d")))
50
+ except Exception:
51
+ pass
52
+ return sorted(tags, key=lambda x: x[1], reverse=True)
53
+ except Exception:
54
+ return []
55
+
56
+
57
+ def parse_conventional_commits(commits: List[str]) -> Dict[str, List[str]]:
58
+ """Parse conventional commits into categories."""
59
+ categories = {
60
+ "Added": [],
61
+ "Changed": [],
62
+ "Deprecated": [],
63
+ "Removed": [],
64
+ "Fixed": [],
65
+ "Security": []
66
+ }
67
+
68
+ for commit in commits:
69
+ # Parse conventional commit format: type(scope): message
70
+ match = re.match(r'^(feat|fix|docs|style|refactor|perf|test|chore)(\(.+\))?:\s*(.+)$', commit)
71
+ if match:
72
+ commit_type = match.group(1)
73
+ message = match.group(3)
74
+
75
+ if commit_type == "feat":
76
+ categories["Added"].append(message)
77
+ elif commit_type == "fix":
78
+ categories["Fixed"].append(message)
79
+ elif commit_type in ["refactor", "perf"]:
80
+ categories["Changed"].append(message)
81
+ elif commit_type == "docs":
82
+ categories["Changed"].append(f"Documentation: {message}")
83
+
84
+ return categories
85
+
86
+
87
+ def extract_changelog_from_readme() -> Dict[str, str]:
88
+ """Extract changelog section from README.md."""
89
+ readme_path = Path(__file__).parent.parent / "README.md"
90
+ if not readme_path.exists():
91
+ return {}
92
+
93
+ try:
94
+ content = readme_path.read_text(encoding="utf-8")
95
+ # Extract changelog section (simplified)
96
+ # This would need more sophisticated parsing
97
+ return {}
98
+ except Exception:
99
+ return {}
100
+
101
+
102
+ def generate_changelog() -> str:
103
+ """Generate changelog markdown."""
104
+ version = get_version_from_pyproject() or "Unknown"
105
+ tags = get_git_tags()
106
+
107
+ changelog = f"""# Changelog
108
+
109
+ All notable changes to CR-CA will be documented in this file.
110
+
111
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
112
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
113
+
114
+ ## [Unreleased]
115
+
116
+ ### Added
117
+ - New features in development
118
+
119
+ ### Changed
120
+ - Changes in development
121
+
122
+ ## [{version}] - {datetime.now().strftime("%Y-%m-%d")}
123
+
124
+ ### Current Version
125
+
126
+ Current version: {version}
127
+
128
+ """
129
+
130
+ # Add entries from git tags
131
+ for tag_name, tag_date in tags[:10]: # Last 10 versions
132
+ changelog += f"\n## [{tag_name}] - {tag_date}\n\n"
133
+ changelog += "### Changes\n\n"
134
+ changelog += "- See git history for details\n\n"
135
+
136
+ return changelog
137
+
138
+
139
+ def main():
140
+ """Generate and write changelog."""
141
+ changelog = generate_changelog()
142
+
143
+ # Write to CHANGELOG.md
144
+ changelog_path = Path(__file__).parent.parent / "CHANGELOG.md"
145
+ changelog_path.write_text(changelog, encoding="utf-8")
146
+
147
+ # Write to docs/changelog/index.md
148
+ docs_changelog_path = Path(__file__).parent.parent / "docs" / "changelog" / "index.md"
149
+ docs_changelog_path.parent.mkdir(parents=True, exist_ok=True)
150
+ docs_changelog_path.write_text(changelog, encoding="utf-8")
151
+
152
+ print(f"Changelog generated: {changelog_path}")
153
+ print(f"Docs changelog updated: {docs_changelog_path}")
154
+
155
+
156
+ if __name__ == "__main__":
157
+ main()
@@ -0,0 +1,86 @@
1
+ """Generate lightweight Markdown docs from exported JSON schemas.
2
+
3
+ This intentionally documents *structured contracts* (schemas), not narratives.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import sys
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Tuple
12
+
13
+ REPO_ROOT = Path(__file__).resolve().parents[1]
14
+
15
+
16
+ def _load_schema(path: Path) -> Dict[str, Any]:
17
+ return json.loads(path.read_text(encoding="utf-8"))
18
+
19
+
20
+ def _schema_title(schema: Dict[str, Any], fallback: str) -> str:
21
+ return schema.get("title") or fallback
22
+
23
+
24
+ def _collect_properties(schema: Dict[str, Any]) -> List[Tuple[str, str, str]]:
25
+ props = schema.get("properties") or {}
26
+ required = set(schema.get("required") or [])
27
+ rows: List[Tuple[str, str, str]] = []
28
+ for name, p in props.items():
29
+ typ = p.get("type") or p.get("$ref") or "unknown"
30
+ req = "required" if name in required else "optional"
31
+ desc = (p.get("description") or "").replace("\n", " ").strip()
32
+ rows.append((name, str(typ), f"{req}. {desc}".strip()))
33
+ return rows
34
+
35
+
36
+ def _render_md(name: str, schema: Dict[str, Any]) -> str:
37
+ title = _schema_title(schema, name)
38
+ rows = _collect_properties(schema)
39
+
40
+ lines: List[str] = []
41
+ lines.append(f"## `{title}`")
42
+ lines.append("")
43
+ lines.append("This page is generated from the JSON schema (contract-first).")
44
+ lines.append("")
45
+ if not rows:
46
+ lines.append("_No top-level properties found in schema._")
47
+ lines.append("")
48
+ return "\n".join(lines)
49
+
50
+ lines.append("| Field | Type | Notes |")
51
+ lines.append("|---|---|---|")
52
+ for field, typ, notes in rows:
53
+ lines.append(f"| `{field}` | `{typ}` | {notes} |")
54
+ lines.append("")
55
+ return "\n".join(lines)
56
+
57
+
58
+ def main() -> None:
59
+ schema_dir = REPO_ROOT / "schemas_export" / "crca_core"
60
+ if not schema_dir.exists():
61
+ raise SystemExit(f"Missing {schema_dir}. Run scripts/export_crca_core_schemas.py first.")
62
+
63
+ out_dir = REPO_ROOT / "docs_generated" / "crca_core"
64
+ out_dir.mkdir(parents=True, exist_ok=True)
65
+
66
+ index_lines = [
67
+ "# crca_core schema contracts (generated)",
68
+ "",
69
+ "These documents are generated from exported Pydantic JSON schemas.",
70
+ "",
71
+ ]
72
+
73
+ for schema_path in sorted(schema_dir.glob("*.schema.json")):
74
+ name = schema_path.name.replace(".schema.json", "")
75
+ schema = _load_schema(schema_path)
76
+ md = _render_md(name, schema)
77
+ (out_dir / f"{name}.md").write_text(md, encoding="utf-8")
78
+ index_lines.append(f"- `{name}`: `{name}.md`")
79
+
80
+ (out_dir / "index.md").write_text("\n".join(index_lines) + "\n", encoding="utf-8")
81
+ print(str(out_dir))
82
+
83
+
84
+ if __name__ == "__main__":
85
+ main()
86
+