crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CRCA.py +172 -7
- MODEL_CARD.md +53 -0
- PKG-INFO +8 -2
- RELEASE_NOTES.md +17 -0
- STABILITY.md +19 -0
- architecture/hybrid/consistency_engine.py +362 -0
- architecture/hybrid/conversation_manager.py +421 -0
- architecture/hybrid/explanation_generator.py +452 -0
- architecture/hybrid/few_shot_learner.py +533 -0
- architecture/hybrid/graph_compressor.py +286 -0
- architecture/hybrid/hybrid_agent.py +4398 -0
- architecture/hybrid/language_compiler.py +623 -0
- architecture/hybrid/main,py +0 -0
- architecture/hybrid/reasoning_tracker.py +322 -0
- architecture/hybrid/self_verifier.py +524 -0
- architecture/hybrid/task_decomposer.py +567 -0
- architecture/hybrid/text_corrector.py +341 -0
- benchmark_results/crca_core_benchmarks.json +178 -0
- branches/crca_sd/crca_sd_realtime.py +6 -2
- branches/general_agent/__init__.py +102 -0
- branches/general_agent/general_agent.py +1400 -0
- branches/general_agent/personality.py +169 -0
- branches/general_agent/utils/__init__.py +19 -0
- branches/general_agent/utils/prompt_builder.py +170 -0
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
- crca_core/__init__.py +35 -0
- crca_core/benchmarks/__init__.py +14 -0
- crca_core/benchmarks/synthetic_scm.py +103 -0
- crca_core/core/__init__.py +23 -0
- crca_core/core/api.py +120 -0
- crca_core/core/estimate.py +208 -0
- crca_core/core/godclass.py +72 -0
- crca_core/core/intervention_design.py +174 -0
- crca_core/core/lifecycle.py +48 -0
- crca_core/discovery/__init__.py +9 -0
- crca_core/discovery/tabular.py +193 -0
- crca_core/identify/__init__.py +171 -0
- crca_core/identify/backdoor.py +39 -0
- crca_core/identify/frontdoor.py +48 -0
- crca_core/identify/graph.py +106 -0
- crca_core/identify/id_algorithm.py +43 -0
- crca_core/identify/iv.py +48 -0
- crca_core/models/__init__.py +67 -0
- crca_core/models/provenance.py +56 -0
- crca_core/models/refusal.py +39 -0
- crca_core/models/result.py +83 -0
- crca_core/models/spec.py +151 -0
- crca_core/models/validation.py +68 -0
- crca_core/scm/__init__.py +9 -0
- crca_core/scm/linear_gaussian.py +198 -0
- crca_core/timeseries/__init__.py +6 -0
- crca_core/timeseries/pcmci.py +181 -0
- crca_llm/__init__.py +12 -0
- crca_llm/client.py +85 -0
- crca_llm/coauthor.py +118 -0
- crca_llm/orchestrator.py +289 -0
- crca_llm/types.py +21 -0
- crca_reasoning/__init__.py +16 -0
- crca_reasoning/critique.py +54 -0
- crca_reasoning/godclass.py +206 -0
- crca_reasoning/memory.py +24 -0
- crca_reasoning/rationale.py +10 -0
- crca_reasoning/react_controller.py +81 -0
- crca_reasoning/tool_router.py +97 -0
- crca_reasoning/types.py +40 -0
- crca_sd/__init__.py +15 -0
- crca_sd/crca_sd_core.py +2 -0
- crca_sd/crca_sd_governance.py +2 -0
- crca_sd/crca_sd_mpc.py +2 -0
- crca_sd/crca_sd_realtime.py +2 -0
- crca_sd/crca_sd_tui.py +2 -0
- cuda-keyring_1.1-1_all.deb +0 -0
- cuda-keyring_1.1-1_all.deb.1 +0 -0
- docs/IMAGE_ANNOTATION_USAGE.md +539 -0
- docs/INSTALL_DEEPSPEED.md +125 -0
- docs/api/branches/crca-cg.md +19 -0
- docs/api/branches/crca-q.md +27 -0
- docs/api/branches/crca-sd.md +37 -0
- docs/api/branches/general-agent.md +24 -0
- docs/api/branches/overview.md +19 -0
- docs/api/crca/agent-methods.md +62 -0
- docs/api/crca/operations.md +79 -0
- docs/api/crca/overview.md +32 -0
- docs/api/image-annotation/engine.md +52 -0
- docs/api/image-annotation/overview.md +17 -0
- docs/api/schemas/annotation.md +34 -0
- docs/api/schemas/core-schemas.md +82 -0
- docs/api/schemas/overview.md +32 -0
- docs/api/schemas/policy.md +30 -0
- docs/api/utils/conversation.md +22 -0
- docs/api/utils/graph-reasoner.md +32 -0
- docs/api/utils/overview.md +21 -0
- docs/api/utils/router.md +19 -0
- docs/api/utils/utilities.md +97 -0
- docs/architecture/causal-graphs.md +41 -0
- docs/architecture/data-flow.md +29 -0
- docs/architecture/design-principles.md +33 -0
- docs/architecture/hybrid-agent/components.md +38 -0
- docs/architecture/hybrid-agent/consistency.md +26 -0
- docs/architecture/hybrid-agent/overview.md +44 -0
- docs/architecture/hybrid-agent/reasoning.md +22 -0
- docs/architecture/llm-integration.md +26 -0
- docs/architecture/modular-structure.md +37 -0
- docs/architecture/overview.md +69 -0
- docs/architecture/policy-engine-arch.md +29 -0
- docs/branches/crca-cg/corposwarm.md +39 -0
- docs/branches/crca-cg/esg-scoring.md +30 -0
- docs/branches/crca-cg/multi-agent.md +35 -0
- docs/branches/crca-cg/overview.md +40 -0
- docs/branches/crca-q/alternative-data.md +55 -0
- docs/branches/crca-q/architecture.md +71 -0
- docs/branches/crca-q/backtesting.md +45 -0
- docs/branches/crca-q/causal-engine.md +33 -0
- docs/branches/crca-q/execution.md +39 -0
- docs/branches/crca-q/market-data.md +60 -0
- docs/branches/crca-q/overview.md +58 -0
- docs/branches/crca-q/philosophy.md +60 -0
- docs/branches/crca-q/portfolio-optimization.md +66 -0
- docs/branches/crca-q/risk-management.md +102 -0
- docs/branches/crca-q/setup.md +65 -0
- docs/branches/crca-q/signal-generation.md +61 -0
- docs/branches/crca-q/signal-validation.md +43 -0
- docs/branches/crca-sd/core.md +84 -0
- docs/branches/crca-sd/governance.md +53 -0
- docs/branches/crca-sd/mpc-solver.md +65 -0
- docs/branches/crca-sd/overview.md +59 -0
- docs/branches/crca-sd/realtime.md +28 -0
- docs/branches/crca-sd/tui.md +20 -0
- docs/branches/general-agent/overview.md +37 -0
- docs/branches/general-agent/personality.md +36 -0
- docs/branches/general-agent/prompt-builder.md +30 -0
- docs/changelog/index.md +79 -0
- docs/contributing/code-style.md +69 -0
- docs/contributing/documentation.md +43 -0
- docs/contributing/overview.md +29 -0
- docs/contributing/testing.md +29 -0
- docs/core/crcagent/async-operations.md +65 -0
- docs/core/crcagent/automatic-extraction.md +107 -0
- docs/core/crcagent/batch-prediction.md +80 -0
- docs/core/crcagent/bayesian-inference.md +60 -0
- docs/core/crcagent/causal-graph.md +92 -0
- docs/core/crcagent/counterfactuals.md +96 -0
- docs/core/crcagent/deterministic-simulation.md +78 -0
- docs/core/crcagent/dual-mode-operation.md +82 -0
- docs/core/crcagent/initialization.md +88 -0
- docs/core/crcagent/optimization.md +65 -0
- docs/core/crcagent/overview.md +63 -0
- docs/core/crcagent/time-series.md +57 -0
- docs/core/schemas/annotation.md +30 -0
- docs/core/schemas/core-schemas.md +82 -0
- docs/core/schemas/overview.md +30 -0
- docs/core/schemas/policy.md +41 -0
- docs/core/templates/base-agent.md +31 -0
- docs/core/templates/feature-mixins.md +31 -0
- docs/core/templates/overview.md +29 -0
- docs/core/templates/templates-guide.md +75 -0
- docs/core/tools/mcp-client.md +34 -0
- docs/core/tools/overview.md +24 -0
- docs/core/utils/conversation.md +27 -0
- docs/core/utils/graph-reasoner.md +29 -0
- docs/core/utils/overview.md +27 -0
- docs/core/utils/router.md +27 -0
- docs/core/utils/utilities.md +97 -0
- docs/css/custom.css +84 -0
- docs/examples/basic-usage.md +57 -0
- docs/examples/general-agent/general-agent-examples.md +50 -0
- docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
- docs/examples/image-annotation/image-annotation-examples.md +54 -0
- docs/examples/integration/integration-examples.md +58 -0
- docs/examples/overview.md +37 -0
- docs/examples/trading/trading-examples.md +46 -0
- docs/features/causal-reasoning/advanced-topics.md +101 -0
- docs/features/causal-reasoning/counterfactuals.md +43 -0
- docs/features/causal-reasoning/do-calculus.md +50 -0
- docs/features/causal-reasoning/overview.md +47 -0
- docs/features/causal-reasoning/structural-models.md +52 -0
- docs/features/hybrid-agent/advanced-components.md +55 -0
- docs/features/hybrid-agent/core-components.md +64 -0
- docs/features/hybrid-agent/overview.md +34 -0
- docs/features/image-annotation/engine.md +82 -0
- docs/features/image-annotation/features.md +113 -0
- docs/features/image-annotation/integration.md +75 -0
- docs/features/image-annotation/overview.md +53 -0
- docs/features/image-annotation/quickstart.md +73 -0
- docs/features/policy-engine/doctrine-ledger.md +105 -0
- docs/features/policy-engine/monitoring.md +44 -0
- docs/features/policy-engine/mpc-control.md +89 -0
- docs/features/policy-engine/overview.md +46 -0
- docs/getting-started/configuration.md +225 -0
- docs/getting-started/first-agent.md +164 -0
- docs/getting-started/installation.md +144 -0
- docs/getting-started/quickstart.md +137 -0
- docs/index.md +118 -0
- docs/js/mathjax.js +13 -0
- docs/lrm/discovery_proof_notes.md +25 -0
- docs/lrm/finetune_full.md +83 -0
- docs/lrm/math_appendix.md +120 -0
- docs/lrm/overview.md +32 -0
- docs/mkdocs.yml +238 -0
- docs/stylesheets/extra.css +21 -0
- docs_generated/crca_core/CounterfactualResult.md +12 -0
- docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
- docs_generated/crca_core/DraftSpec.md +13 -0
- docs_generated/crca_core/EstimateResult.md +13 -0
- docs_generated/crca_core/IdentificationResult.md +17 -0
- docs_generated/crca_core/InterventionDesignResult.md +12 -0
- docs_generated/crca_core/LockedSpec.md +15 -0
- docs_generated/crca_core/RefusalResult.md +12 -0
- docs_generated/crca_core/ValidationReport.md +9 -0
- docs_generated/crca_core/index.md +13 -0
- examples/general_agent_example.py +277 -0
- examples/general_agent_quickstart.py +202 -0
- examples/general_agent_simple.py +92 -0
- examples/hybrid_agent_auto_extraction.py +84 -0
- examples/hybrid_agent_dictionary_demo.py +104 -0
- examples/hybrid_agent_enhanced.py +179 -0
- examples/hybrid_agent_general_knowledge.py +107 -0
- examples/image_annotation_quickstart.py +328 -0
- examples/test_hybrid_fixes.py +77 -0
- image_annotation/__init__.py +27 -0
- image_annotation/annotation_engine.py +2593 -0
- install_cuda_wsl2.sh +59 -0
- install_deepspeed.sh +56 -0
- install_deepspeed_simple.sh +87 -0
- mkdocs.yml +252 -0
- ollama/Modelfile +8 -0
- prompts/__init__.py +2 -1
- prompts/default_crca.py +9 -1
- prompts/general_agent.py +227 -0
- prompts/image_annotation.py +56 -0
- pyproject.toml +17 -2
- requirements-docs.txt +10 -0
- requirements.txt +21 -2
- schemas/__init__.py +26 -1
- schemas/annotation.py +222 -0
- schemas/conversation.py +193 -0
- schemas/hybrid.py +211 -0
- schemas/reasoning.py +276 -0
- schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
- schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
- schemas_export/crca_core/DraftSpec.schema.json +635 -0
- schemas_export/crca_core/EstimateResult.schema.json +113 -0
- schemas_export/crca_core/IdentificationResult.schema.json +145 -0
- schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
- schemas_export/crca_core/LockedSpec.schema.json +646 -0
- schemas_export/crca_core/RefusalResult.schema.json +90 -0
- schemas_export/crca_core/ValidationReport.schema.json +62 -0
- scripts/build_lrm_dataset.py +80 -0
- scripts/export_crca_core_schemas.py +54 -0
- scripts/export_hf_lrm.py +37 -0
- scripts/export_ollama_gguf.py +45 -0
- scripts/generate_changelog.py +157 -0
- scripts/generate_crca_core_docs_from_schemas.py +86 -0
- scripts/run_crca_core_benchmarks.py +163 -0
- scripts/run_full_finetune.py +198 -0
- scripts/run_lrm_eval.py +31 -0
- templates/graph_management.py +29 -0
- tests/conftest.py +9 -0
- tests/test_core.py +2 -3
- tests/test_crca_core_discovery_tabular.py +15 -0
- tests/test_crca_core_estimate_dowhy.py +36 -0
- tests/test_crca_core_identify.py +18 -0
- tests/test_crca_core_intervention_design.py +36 -0
- tests/test_crca_core_linear_gaussian_scm.py +69 -0
- tests/test_crca_core_spec.py +25 -0
- tests/test_crca_core_timeseries_pcmci.py +15 -0
- tests/test_crca_llm_coauthor.py +12 -0
- tests/test_crca_llm_orchestrator.py +80 -0
- tests/test_hybrid_agent_llm_enhanced.py +556 -0
- tests/test_image_annotation_demo.py +376 -0
- tests/test_image_annotation_operational.py +408 -0
- tests/test_image_annotation_unit.py +551 -0
- tests/test_training_moe.py +13 -0
- training/__init__.py +42 -0
- training/datasets.py +140 -0
- training/deepspeed_zero2_0_5b.json +22 -0
- training/deepspeed_zero2_1_5b.json +22 -0
- training/deepspeed_zero3_0_5b.json +28 -0
- training/deepspeed_zero3_14b.json +28 -0
- training/deepspeed_zero3_h100_3gpu.json +20 -0
- training/deepspeed_zero3_offload.json +28 -0
- training/eval.py +92 -0
- training/finetune.py +516 -0
- training/public_datasets.py +89 -0
- training_data/react_train.jsonl +7473 -0
- utils/agent_discovery.py +311 -0
- utils/batch_processor.py +317 -0
- utils/conversation.py +78 -0
- utils/edit_distance.py +118 -0
- utils/formatter.py +33 -0
- utils/graph_reasoner.py +530 -0
- utils/rate_limiter.py +283 -0
- utils/router.py +2 -2
- utils/tool_discovery.py +307 -0
- webui/__init__.py +10 -0
- webui/app.py +229 -0
- webui/config.py +104 -0
- webui/static/css/style.css +332 -0
- webui/static/js/main.js +284 -0
- webui/templates/index.html +42 -0
- tests/test_crca_excel.py +0 -166
- tests/test_data_broker.py +0 -424
- tests/test_palantir.py +0 -349
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$defs": {
|
|
3
|
+
"RefusalChecklistItem": {
|
|
4
|
+
"description": "A single required input/action needed to proceed.",
|
|
5
|
+
"properties": {
|
|
6
|
+
"item": {
|
|
7
|
+
"minLength": 1,
|
|
8
|
+
"title": "Item",
|
|
9
|
+
"type": "string"
|
|
10
|
+
},
|
|
11
|
+
"rationale": {
|
|
12
|
+
"minLength": 1,
|
|
13
|
+
"title": "Rationale",
|
|
14
|
+
"type": "string"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"required": [
|
|
18
|
+
"item",
|
|
19
|
+
"rationale"
|
|
20
|
+
],
|
|
21
|
+
"title": "RefusalChecklistItem",
|
|
22
|
+
"type": "object"
|
|
23
|
+
},
|
|
24
|
+
"RefusalReasonCode": {
|
|
25
|
+
"description": "Stable reason codes for refusal-first behavior.",
|
|
26
|
+
"enum": [
|
|
27
|
+
"SPEC_NOT_LOCKED",
|
|
28
|
+
"NO_SCM_FOR_COUNTERFACTUAL",
|
|
29
|
+
"NOT_IDENTIFIABLE",
|
|
30
|
+
"TIME_INDEX_INVALID",
|
|
31
|
+
"ASSUMPTIONS_UNDECLARED",
|
|
32
|
+
"INPUT_INVALID",
|
|
33
|
+
"UNSUPPORTED_OPERATION"
|
|
34
|
+
],
|
|
35
|
+
"title": "RefusalReasonCode",
|
|
36
|
+
"type": "string"
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
"description": "Structured refusal (no numeric causal output).",
|
|
40
|
+
"properties": {
|
|
41
|
+
"result_type": {
|
|
42
|
+
"default": "Refusal",
|
|
43
|
+
"title": "Result Type",
|
|
44
|
+
"type": "string"
|
|
45
|
+
},
|
|
46
|
+
"reason_codes": {
|
|
47
|
+
"items": {
|
|
48
|
+
"$ref": "#/$defs/RefusalReasonCode"
|
|
49
|
+
},
|
|
50
|
+
"title": "Reason Codes",
|
|
51
|
+
"type": "array"
|
|
52
|
+
},
|
|
53
|
+
"message": {
|
|
54
|
+
"minLength": 1,
|
|
55
|
+
"title": "Message",
|
|
56
|
+
"type": "string"
|
|
57
|
+
},
|
|
58
|
+
"checklist": {
|
|
59
|
+
"items": {
|
|
60
|
+
"$ref": "#/$defs/RefusalChecklistItem"
|
|
61
|
+
},
|
|
62
|
+
"title": "Checklist",
|
|
63
|
+
"type": "array"
|
|
64
|
+
},
|
|
65
|
+
"suggested_next_steps": {
|
|
66
|
+
"items": {
|
|
67
|
+
"type": "string"
|
|
68
|
+
},
|
|
69
|
+
"title": "Suggested Next Steps",
|
|
70
|
+
"type": "array"
|
|
71
|
+
},
|
|
72
|
+
"details": {
|
|
73
|
+
"anyOf": [
|
|
74
|
+
{
|
|
75
|
+
"type": "string"
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"type": "null"
|
|
79
|
+
}
|
|
80
|
+
],
|
|
81
|
+
"default": null,
|
|
82
|
+
"title": "Details"
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
"required": [
|
|
86
|
+
"message"
|
|
87
|
+
],
|
|
88
|
+
"title": "RefusalResult",
|
|
89
|
+
"type": "object"
|
|
90
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$defs": {
|
|
3
|
+
"ValidationIssue": {
|
|
4
|
+
"properties": {
|
|
5
|
+
"code": {
|
|
6
|
+
"minLength": 1,
|
|
7
|
+
"title": "Code",
|
|
8
|
+
"type": "string"
|
|
9
|
+
},
|
|
10
|
+
"message": {
|
|
11
|
+
"minLength": 1,
|
|
12
|
+
"title": "Message",
|
|
13
|
+
"type": "string"
|
|
14
|
+
},
|
|
15
|
+
"path": {
|
|
16
|
+
"anyOf": [
|
|
17
|
+
{
|
|
18
|
+
"type": "string"
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"type": "null"
|
|
22
|
+
}
|
|
23
|
+
],
|
|
24
|
+
"default": null,
|
|
25
|
+
"title": "Path"
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"required": [
|
|
29
|
+
"code",
|
|
30
|
+
"message"
|
|
31
|
+
],
|
|
32
|
+
"title": "ValidationIssue",
|
|
33
|
+
"type": "object"
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
"description": "Returned by `validate_spec`.",
|
|
37
|
+
"properties": {
|
|
38
|
+
"ok": {
|
|
39
|
+
"title": "Ok",
|
|
40
|
+
"type": "boolean"
|
|
41
|
+
},
|
|
42
|
+
"errors": {
|
|
43
|
+
"items": {
|
|
44
|
+
"$ref": "#/$defs/ValidationIssue"
|
|
45
|
+
},
|
|
46
|
+
"title": "Errors",
|
|
47
|
+
"type": "array"
|
|
48
|
+
},
|
|
49
|
+
"warnings": {
|
|
50
|
+
"items": {
|
|
51
|
+
"$ref": "#/$defs/ValidationIssue"
|
|
52
|
+
},
|
|
53
|
+
"title": "Warnings",
|
|
54
|
+
"type": "array"
|
|
55
|
+
}
|
|
56
|
+
},
|
|
57
|
+
"required": [
|
|
58
|
+
"ok"
|
|
59
|
+
],
|
|
60
|
+
"title": "ValidationReport",
|
|
61
|
+
"type": "object"
|
|
62
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Build a hybrid LRM dataset from internal traces and public datasets."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List
|
|
10
|
+
|
|
11
|
+
REPO_ROOT = Path(__file__).resolve().parents[1]
|
|
12
|
+
if str(REPO_ROOT) not in sys.path:
|
|
13
|
+
sys.path.insert(0, str(REPO_ROOT))
|
|
14
|
+
|
|
15
|
+
from training.datasets import (
|
|
16
|
+
ReActExample,
|
|
17
|
+
examples_from_traces,
|
|
18
|
+
filter_examples,
|
|
19
|
+
merge_examples,
|
|
20
|
+
save_jsonl,
|
|
21
|
+
)
|
|
22
|
+
from training.public_datasets import PublicDatasetConfig, default_public_configs, load_public_examples
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _load_public_config(path: Path) -> List[PublicDatasetConfig]:
|
|
26
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
27
|
+
configs: List[PublicDatasetConfig] = []
|
|
28
|
+
for item in payload:
|
|
29
|
+
configs.append(PublicDatasetConfig(**item))
|
|
30
|
+
return configs
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def main() -> None:
|
|
34
|
+
parser = argparse.ArgumentParser(description="Build hybrid LRM dataset JSONL.")
|
|
35
|
+
parser.add_argument("--trace-jsonl", action="append", default=[], help="Path to LRM plan trace JSONL.")
|
|
36
|
+
parser.add_argument("--public-config", type=str, default="", help="Path to public dataset config JSON.")
|
|
37
|
+
parser.add_argument("--output", type=str, required=True, help="Output JSONL path.")
|
|
38
|
+
parser.add_argument("--max-internal", type=int, default=None, help="Max internal examples to include.")
|
|
39
|
+
parser.add_argument("--max-public", type=int, default=None, help="Max public examples to include.")
|
|
40
|
+
parser.add_argument("--min-response-len", type=int, default=1)
|
|
41
|
+
parser.add_argument("--max-prompt-len", type=int, default=None)
|
|
42
|
+
parser.add_argument("--max-response-len", type=int, default=None)
|
|
43
|
+
args = parser.parse_args()
|
|
44
|
+
|
|
45
|
+
internal_examples: List[ReActExample] = []
|
|
46
|
+
if args.trace_jsonl:
|
|
47
|
+
trace_paths = [Path(p) for p in args.trace_jsonl]
|
|
48
|
+
internal_examples = examples_from_traces(trace_paths)
|
|
49
|
+
|
|
50
|
+
if args.public_config:
|
|
51
|
+
public_configs = _load_public_config(Path(args.public_config))
|
|
52
|
+
else:
|
|
53
|
+
public_configs = default_public_configs()
|
|
54
|
+
public_examples = load_public_examples(public_configs)
|
|
55
|
+
|
|
56
|
+
internal_examples = filter_examples(
|
|
57
|
+
internal_examples,
|
|
58
|
+
min_response_len=args.min_response_len,
|
|
59
|
+
max_prompt_len=args.max_prompt_len,
|
|
60
|
+
max_response_len=args.max_response_len,
|
|
61
|
+
)
|
|
62
|
+
public_examples = filter_examples(
|
|
63
|
+
public_examples,
|
|
64
|
+
min_response_len=args.min_response_len,
|
|
65
|
+
max_prompt_len=args.max_prompt_len,
|
|
66
|
+
max_response_len=args.max_response_len,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
merged = merge_examples(
|
|
70
|
+
internal_examples=internal_examples,
|
|
71
|
+
public_examples=public_examples,
|
|
72
|
+
max_internal=args.max_internal,
|
|
73
|
+
max_public=args.max_public,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
save_jsonl(merged, Path(args.output))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
main()
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Export `crca_core` Pydantic JSON schemas for downstream tooling.
|
|
2
|
+
|
|
3
|
+
This supports the "structured object only" requirement: downstream systems can
|
|
4
|
+
validate inputs/outputs against stable schemas.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import sys
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
REPO_ROOT = Path(__file__).resolve().parents[1]
|
|
14
|
+
if str(REPO_ROOT) not in sys.path:
|
|
15
|
+
sys.path.insert(0, str(REPO_ROOT))
|
|
16
|
+
|
|
17
|
+
from crca_core.models.refusal import RefusalResult
|
|
18
|
+
from crca_core.models.result import (
|
|
19
|
+
CounterfactualResult,
|
|
20
|
+
DiscoveryHypothesisResult,
|
|
21
|
+
EstimateResult,
|
|
22
|
+
IdentificationResult,
|
|
23
|
+
InterventionDesignResult,
|
|
24
|
+
ValidationReport,
|
|
25
|
+
)
|
|
26
|
+
from crca_core.models.spec import DraftSpec, LockedSpec
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def main() -> None:
|
|
30
|
+
out_dir = Path(__file__).resolve().parents[1] / "schemas_export" / "crca_core"
|
|
31
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
32
|
+
|
|
33
|
+
models = [
|
|
34
|
+
("DraftSpec", DraftSpec),
|
|
35
|
+
("LockedSpec", LockedSpec),
|
|
36
|
+
("ValidationReport", ValidationReport),
|
|
37
|
+
("RefusalResult", RefusalResult),
|
|
38
|
+
("DiscoveryHypothesisResult", DiscoveryHypothesisResult),
|
|
39
|
+
("InterventionDesignResult", InterventionDesignResult),
|
|
40
|
+
("CounterfactualResult", CounterfactualResult),
|
|
41
|
+
("IdentificationResult", IdentificationResult),
|
|
42
|
+
("EstimateResult", EstimateResult),
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
for name, model in models:
|
|
46
|
+
schema = model.model_json_schema()
|
|
47
|
+
(out_dir / f"{name}.schema.json").write_text(json.dumps(schema, indent=2), encoding="utf-8")
|
|
48
|
+
|
|
49
|
+
print(f"Wrote {len(models)} schemas to {out_dir}")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if __name__ == "__main__":
|
|
53
|
+
main()
|
|
54
|
+
|
scripts/export_hf_lrm.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Export finetuned LRM model for HuggingFace."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import shutil
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main() -> None:
|
|
11
|
+
parser = argparse.ArgumentParser(description="Export finetuned LRM model to HF format.")
|
|
12
|
+
parser.add_argument("--checkpoint-dir", type=str, required=True, help="Path to finetuned checkpoint.")
|
|
13
|
+
parser.add_argument("--output-dir", type=str, required=True, help="Output directory for HF upload.")
|
|
14
|
+
parser.add_argument("--model-card", type=str, default="MODEL_CARD.md", help="Model card path.")
|
|
15
|
+
args = parser.parse_args()
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer # type: ignore
|
|
19
|
+
except Exception as exc:
|
|
20
|
+
raise RuntimeError("transformers is required to export model.") from exc
|
|
21
|
+
|
|
22
|
+
checkpoint_dir = Path(args.checkpoint_dir)
|
|
23
|
+
output_dir = Path(args.output_dir)
|
|
24
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
25
|
+
|
|
26
|
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint_dir)
|
|
27
|
+
model = AutoModelForCausalLM.from_pretrained(checkpoint_dir)
|
|
28
|
+
tokenizer.save_pretrained(output_dir)
|
|
29
|
+
model.save_pretrained(output_dir, safe_serialization=True)
|
|
30
|
+
|
|
31
|
+
card_path = Path(args.model_card)
|
|
32
|
+
if card_path.exists():
|
|
33
|
+
shutil.copy(card_path, output_dir / "README.md")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
if __name__ == "__main__":
|
|
37
|
+
main()
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Export a finetuned HF model to GGUF for Ollama."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import os
|
|
7
|
+
import subprocess
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def main() -> None:
|
|
12
|
+
parser = argparse.ArgumentParser(description="Convert HF model to GGUF via llama.cpp.")
|
|
13
|
+
parser.add_argument("--checkpoint-dir", type=str, required=True, help="HF checkpoint directory.")
|
|
14
|
+
parser.add_argument("--output", type=str, required=True, help="Output GGUF path.")
|
|
15
|
+
parser.add_argument("--llama-cpp-dir", type=str, default="", help="Path to llama.cpp repo.")
|
|
16
|
+
args = parser.parse_args()
|
|
17
|
+
|
|
18
|
+
llama_cpp_dir = Path(args.llama_cpp_dir) if args.llama_cpp_dir else None
|
|
19
|
+
if llama_cpp_dir is None:
|
|
20
|
+
env_path = os.environ.get("LLAMA_CPP_DIR")
|
|
21
|
+
if env_path:
|
|
22
|
+
llama_cpp_dir = Path(env_path)
|
|
23
|
+
if llama_cpp_dir is None:
|
|
24
|
+
raise RuntimeError("Provide --llama-cpp-dir or set LLAMA_CPP_DIR.")
|
|
25
|
+
|
|
26
|
+
converter = llama_cpp_dir / "convert-hf-to-gguf.py"
|
|
27
|
+
if not converter.exists():
|
|
28
|
+
raise FileNotFoundError(f"Missing convert script: {converter}")
|
|
29
|
+
|
|
30
|
+
checkpoint_dir = Path(args.checkpoint_dir)
|
|
31
|
+
output_path = Path(args.output)
|
|
32
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
|
|
34
|
+
cmd = [
|
|
35
|
+
"python",
|
|
36
|
+
str(converter),
|
|
37
|
+
str(checkpoint_dir),
|
|
38
|
+
"--outfile",
|
|
39
|
+
str(output_path),
|
|
40
|
+
]
|
|
41
|
+
subprocess.check_call(cmd)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
if __name__ == "__main__":
|
|
45
|
+
main()
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Generate changelog from git history and pyproject.toml.
|
|
3
|
+
|
|
4
|
+
This script generates a changelog in Keep a Changelog format from:
|
|
5
|
+
- Git tags (version numbers)
|
|
6
|
+
- Conventional commit messages
|
|
7
|
+
- README.md changelog section
|
|
8
|
+
- pyproject.toml version information
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
import subprocess
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Dict, List, Optional, Tuple
|
|
16
|
+
import toml
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
import git
|
|
20
|
+
GITPYTHON_AVAILABLE = True
|
|
21
|
+
except ImportError:
|
|
22
|
+
GITPYTHON_AVAILABLE = False
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_version_from_pyproject() -> Optional[str]:
|
|
26
|
+
"""Get version from pyproject.toml."""
|
|
27
|
+
try:
|
|
28
|
+
pyproject_path = Path(__file__).parent.parent / "pyproject.toml"
|
|
29
|
+
if pyproject_path.exists():
|
|
30
|
+
data = toml.load(pyproject_path)
|
|
31
|
+
return data.get("project", {}).get("version")
|
|
32
|
+
except Exception:
|
|
33
|
+
pass
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_git_tags() -> List[Tuple[str, str]]:
|
|
38
|
+
"""Get git tags with dates."""
|
|
39
|
+
if not GITPYTHON_AVAILABLE:
|
|
40
|
+
return []
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
repo = git.Repo(Path(__file__).parent.parent)
|
|
44
|
+
tags = []
|
|
45
|
+
for tag in repo.tags:
|
|
46
|
+
try:
|
|
47
|
+
commit = repo.commit(tag)
|
|
48
|
+
date = datetime.fromtimestamp(commit.committed_date)
|
|
49
|
+
tags.append((tag.name, date.strftime("%Y-%m-%d")))
|
|
50
|
+
except Exception:
|
|
51
|
+
pass
|
|
52
|
+
return sorted(tags, key=lambda x: x[1], reverse=True)
|
|
53
|
+
except Exception:
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def parse_conventional_commits(commits: List[str]) -> Dict[str, List[str]]:
|
|
58
|
+
"""Parse conventional commits into categories."""
|
|
59
|
+
categories = {
|
|
60
|
+
"Added": [],
|
|
61
|
+
"Changed": [],
|
|
62
|
+
"Deprecated": [],
|
|
63
|
+
"Removed": [],
|
|
64
|
+
"Fixed": [],
|
|
65
|
+
"Security": []
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
for commit in commits:
|
|
69
|
+
# Parse conventional commit format: type(scope): message
|
|
70
|
+
match = re.match(r'^(feat|fix|docs|style|refactor|perf|test|chore)(\(.+\))?:\s*(.+)$', commit)
|
|
71
|
+
if match:
|
|
72
|
+
commit_type = match.group(1)
|
|
73
|
+
message = match.group(3)
|
|
74
|
+
|
|
75
|
+
if commit_type == "feat":
|
|
76
|
+
categories["Added"].append(message)
|
|
77
|
+
elif commit_type == "fix":
|
|
78
|
+
categories["Fixed"].append(message)
|
|
79
|
+
elif commit_type in ["refactor", "perf"]:
|
|
80
|
+
categories["Changed"].append(message)
|
|
81
|
+
elif commit_type == "docs":
|
|
82
|
+
categories["Changed"].append(f"Documentation: {message}")
|
|
83
|
+
|
|
84
|
+
return categories
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def extract_changelog_from_readme() -> Dict[str, str]:
|
|
88
|
+
"""Extract changelog section from README.md."""
|
|
89
|
+
readme_path = Path(__file__).parent.parent / "README.md"
|
|
90
|
+
if not readme_path.exists():
|
|
91
|
+
return {}
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
content = readme_path.read_text(encoding="utf-8")
|
|
95
|
+
# Extract changelog section (simplified)
|
|
96
|
+
# This would need more sophisticated parsing
|
|
97
|
+
return {}
|
|
98
|
+
except Exception:
|
|
99
|
+
return {}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def generate_changelog() -> str:
|
|
103
|
+
"""Generate changelog markdown."""
|
|
104
|
+
version = get_version_from_pyproject() or "Unknown"
|
|
105
|
+
tags = get_git_tags()
|
|
106
|
+
|
|
107
|
+
changelog = f"""# Changelog
|
|
108
|
+
|
|
109
|
+
All notable changes to CR-CA will be documented in this file.
|
|
110
|
+
|
|
111
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
112
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
113
|
+
|
|
114
|
+
## [Unreleased]
|
|
115
|
+
|
|
116
|
+
### Added
|
|
117
|
+
- New features in development
|
|
118
|
+
|
|
119
|
+
### Changed
|
|
120
|
+
- Changes in development
|
|
121
|
+
|
|
122
|
+
## [{version}] - {datetime.now().strftime("%Y-%m-%d")}
|
|
123
|
+
|
|
124
|
+
### Current Version
|
|
125
|
+
|
|
126
|
+
Current version: {version}
|
|
127
|
+
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
# Add entries from git tags
|
|
131
|
+
for tag_name, tag_date in tags[:10]: # Last 10 versions
|
|
132
|
+
changelog += f"\n## [{tag_name}] - {tag_date}\n\n"
|
|
133
|
+
changelog += "### Changes\n\n"
|
|
134
|
+
changelog += "- See git history for details\n\n"
|
|
135
|
+
|
|
136
|
+
return changelog
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def main():
|
|
140
|
+
"""Generate and write changelog."""
|
|
141
|
+
changelog = generate_changelog()
|
|
142
|
+
|
|
143
|
+
# Write to CHANGELOG.md
|
|
144
|
+
changelog_path = Path(__file__).parent.parent / "CHANGELOG.md"
|
|
145
|
+
changelog_path.write_text(changelog, encoding="utf-8")
|
|
146
|
+
|
|
147
|
+
# Write to docs/changelog/index.md
|
|
148
|
+
docs_changelog_path = Path(__file__).parent.parent / "docs" / "changelog" / "index.md"
|
|
149
|
+
docs_changelog_path.parent.mkdir(parents=True, exist_ok=True)
|
|
150
|
+
docs_changelog_path.write_text(changelog, encoding="utf-8")
|
|
151
|
+
|
|
152
|
+
print(f"Changelog generated: {changelog_path}")
|
|
153
|
+
print(f"Docs changelog updated: {docs_changelog_path}")
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
if __name__ == "__main__":
|
|
157
|
+
main()
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Generate lightweight Markdown docs from exported JSON schemas.
|
|
2
|
+
|
|
3
|
+
This intentionally documents *structured contracts* (schemas), not narratives.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Dict, List, Tuple
|
|
12
|
+
|
|
13
|
+
REPO_ROOT = Path(__file__).resolve().parents[1]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _load_schema(path: Path) -> Dict[str, Any]:
|
|
17
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _schema_title(schema: Dict[str, Any], fallback: str) -> str:
|
|
21
|
+
return schema.get("title") or fallback
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _collect_properties(schema: Dict[str, Any]) -> List[Tuple[str, str, str]]:
|
|
25
|
+
props = schema.get("properties") or {}
|
|
26
|
+
required = set(schema.get("required") or [])
|
|
27
|
+
rows: List[Tuple[str, str, str]] = []
|
|
28
|
+
for name, p in props.items():
|
|
29
|
+
typ = p.get("type") or p.get("$ref") or "unknown"
|
|
30
|
+
req = "required" if name in required else "optional"
|
|
31
|
+
desc = (p.get("description") or "").replace("\n", " ").strip()
|
|
32
|
+
rows.append((name, str(typ), f"{req}. {desc}".strip()))
|
|
33
|
+
return rows
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _render_md(name: str, schema: Dict[str, Any]) -> str:
|
|
37
|
+
title = _schema_title(schema, name)
|
|
38
|
+
rows = _collect_properties(schema)
|
|
39
|
+
|
|
40
|
+
lines: List[str] = []
|
|
41
|
+
lines.append(f"## `{title}`")
|
|
42
|
+
lines.append("")
|
|
43
|
+
lines.append("This page is generated from the JSON schema (contract-first).")
|
|
44
|
+
lines.append("")
|
|
45
|
+
if not rows:
|
|
46
|
+
lines.append("_No top-level properties found in schema._")
|
|
47
|
+
lines.append("")
|
|
48
|
+
return "\n".join(lines)
|
|
49
|
+
|
|
50
|
+
lines.append("| Field | Type | Notes |")
|
|
51
|
+
lines.append("|---|---|---|")
|
|
52
|
+
for field, typ, notes in rows:
|
|
53
|
+
lines.append(f"| `{field}` | `{typ}` | {notes} |")
|
|
54
|
+
lines.append("")
|
|
55
|
+
return "\n".join(lines)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def main() -> None:
|
|
59
|
+
schema_dir = REPO_ROOT / "schemas_export" / "crca_core"
|
|
60
|
+
if not schema_dir.exists():
|
|
61
|
+
raise SystemExit(f"Missing {schema_dir}. Run scripts/export_crca_core_schemas.py first.")
|
|
62
|
+
|
|
63
|
+
out_dir = REPO_ROOT / "docs_generated" / "crca_core"
|
|
64
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
65
|
+
|
|
66
|
+
index_lines = [
|
|
67
|
+
"# crca_core schema contracts (generated)",
|
|
68
|
+
"",
|
|
69
|
+
"These documents are generated from exported Pydantic JSON schemas.",
|
|
70
|
+
"",
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
for schema_path in sorted(schema_dir.glob("*.schema.json")):
|
|
74
|
+
name = schema_path.name.replace(".schema.json", "")
|
|
75
|
+
schema = _load_schema(schema_path)
|
|
76
|
+
md = _render_md(name, schema)
|
|
77
|
+
(out_dir / f"{name}.md").write_text(md, encoding="utf-8")
|
|
78
|
+
index_lines.append(f"- `{name}`: `{name}.md`")
|
|
79
|
+
|
|
80
|
+
(out_dir / "index.md").write_text("\n".join(index_lines) + "\n", encoding="utf-8")
|
|
81
|
+
print(str(out_dir))
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
main()
|
|
86
|
+
|