crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CRCA.py +172 -7
- MODEL_CARD.md +53 -0
- PKG-INFO +8 -2
- RELEASE_NOTES.md +17 -0
- STABILITY.md +19 -0
- architecture/hybrid/consistency_engine.py +362 -0
- architecture/hybrid/conversation_manager.py +421 -0
- architecture/hybrid/explanation_generator.py +452 -0
- architecture/hybrid/few_shot_learner.py +533 -0
- architecture/hybrid/graph_compressor.py +286 -0
- architecture/hybrid/hybrid_agent.py +4398 -0
- architecture/hybrid/language_compiler.py +623 -0
- architecture/hybrid/main,py +0 -0
- architecture/hybrid/reasoning_tracker.py +322 -0
- architecture/hybrid/self_verifier.py +524 -0
- architecture/hybrid/task_decomposer.py +567 -0
- architecture/hybrid/text_corrector.py +341 -0
- benchmark_results/crca_core_benchmarks.json +178 -0
- branches/crca_sd/crca_sd_realtime.py +6 -2
- branches/general_agent/__init__.py +102 -0
- branches/general_agent/general_agent.py +1400 -0
- branches/general_agent/personality.py +169 -0
- branches/general_agent/utils/__init__.py +19 -0
- branches/general_agent/utils/prompt_builder.py +170 -0
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
- crca_core/__init__.py +35 -0
- crca_core/benchmarks/__init__.py +14 -0
- crca_core/benchmarks/synthetic_scm.py +103 -0
- crca_core/core/__init__.py +23 -0
- crca_core/core/api.py +120 -0
- crca_core/core/estimate.py +208 -0
- crca_core/core/godclass.py +72 -0
- crca_core/core/intervention_design.py +174 -0
- crca_core/core/lifecycle.py +48 -0
- crca_core/discovery/__init__.py +9 -0
- crca_core/discovery/tabular.py +193 -0
- crca_core/identify/__init__.py +171 -0
- crca_core/identify/backdoor.py +39 -0
- crca_core/identify/frontdoor.py +48 -0
- crca_core/identify/graph.py +106 -0
- crca_core/identify/id_algorithm.py +43 -0
- crca_core/identify/iv.py +48 -0
- crca_core/models/__init__.py +67 -0
- crca_core/models/provenance.py +56 -0
- crca_core/models/refusal.py +39 -0
- crca_core/models/result.py +83 -0
- crca_core/models/spec.py +151 -0
- crca_core/models/validation.py +68 -0
- crca_core/scm/__init__.py +9 -0
- crca_core/scm/linear_gaussian.py +198 -0
- crca_core/timeseries/__init__.py +6 -0
- crca_core/timeseries/pcmci.py +181 -0
- crca_llm/__init__.py +12 -0
- crca_llm/client.py +85 -0
- crca_llm/coauthor.py +118 -0
- crca_llm/orchestrator.py +289 -0
- crca_llm/types.py +21 -0
- crca_reasoning/__init__.py +16 -0
- crca_reasoning/critique.py +54 -0
- crca_reasoning/godclass.py +206 -0
- crca_reasoning/memory.py +24 -0
- crca_reasoning/rationale.py +10 -0
- crca_reasoning/react_controller.py +81 -0
- crca_reasoning/tool_router.py +97 -0
- crca_reasoning/types.py +40 -0
- crca_sd/__init__.py +15 -0
- crca_sd/crca_sd_core.py +2 -0
- crca_sd/crca_sd_governance.py +2 -0
- crca_sd/crca_sd_mpc.py +2 -0
- crca_sd/crca_sd_realtime.py +2 -0
- crca_sd/crca_sd_tui.py +2 -0
- cuda-keyring_1.1-1_all.deb +0 -0
- cuda-keyring_1.1-1_all.deb.1 +0 -0
- docs/IMAGE_ANNOTATION_USAGE.md +539 -0
- docs/INSTALL_DEEPSPEED.md +125 -0
- docs/api/branches/crca-cg.md +19 -0
- docs/api/branches/crca-q.md +27 -0
- docs/api/branches/crca-sd.md +37 -0
- docs/api/branches/general-agent.md +24 -0
- docs/api/branches/overview.md +19 -0
- docs/api/crca/agent-methods.md +62 -0
- docs/api/crca/operations.md +79 -0
- docs/api/crca/overview.md +32 -0
- docs/api/image-annotation/engine.md +52 -0
- docs/api/image-annotation/overview.md +17 -0
- docs/api/schemas/annotation.md +34 -0
- docs/api/schemas/core-schemas.md +82 -0
- docs/api/schemas/overview.md +32 -0
- docs/api/schemas/policy.md +30 -0
- docs/api/utils/conversation.md +22 -0
- docs/api/utils/graph-reasoner.md +32 -0
- docs/api/utils/overview.md +21 -0
- docs/api/utils/router.md +19 -0
- docs/api/utils/utilities.md +97 -0
- docs/architecture/causal-graphs.md +41 -0
- docs/architecture/data-flow.md +29 -0
- docs/architecture/design-principles.md +33 -0
- docs/architecture/hybrid-agent/components.md +38 -0
- docs/architecture/hybrid-agent/consistency.md +26 -0
- docs/architecture/hybrid-agent/overview.md +44 -0
- docs/architecture/hybrid-agent/reasoning.md +22 -0
- docs/architecture/llm-integration.md +26 -0
- docs/architecture/modular-structure.md +37 -0
- docs/architecture/overview.md +69 -0
- docs/architecture/policy-engine-arch.md +29 -0
- docs/branches/crca-cg/corposwarm.md +39 -0
- docs/branches/crca-cg/esg-scoring.md +30 -0
- docs/branches/crca-cg/multi-agent.md +35 -0
- docs/branches/crca-cg/overview.md +40 -0
- docs/branches/crca-q/alternative-data.md +55 -0
- docs/branches/crca-q/architecture.md +71 -0
- docs/branches/crca-q/backtesting.md +45 -0
- docs/branches/crca-q/causal-engine.md +33 -0
- docs/branches/crca-q/execution.md +39 -0
- docs/branches/crca-q/market-data.md +60 -0
- docs/branches/crca-q/overview.md +58 -0
- docs/branches/crca-q/philosophy.md +60 -0
- docs/branches/crca-q/portfolio-optimization.md +66 -0
- docs/branches/crca-q/risk-management.md +102 -0
- docs/branches/crca-q/setup.md +65 -0
- docs/branches/crca-q/signal-generation.md +61 -0
- docs/branches/crca-q/signal-validation.md +43 -0
- docs/branches/crca-sd/core.md +84 -0
- docs/branches/crca-sd/governance.md +53 -0
- docs/branches/crca-sd/mpc-solver.md +65 -0
- docs/branches/crca-sd/overview.md +59 -0
- docs/branches/crca-sd/realtime.md +28 -0
- docs/branches/crca-sd/tui.md +20 -0
- docs/branches/general-agent/overview.md +37 -0
- docs/branches/general-agent/personality.md +36 -0
- docs/branches/general-agent/prompt-builder.md +30 -0
- docs/changelog/index.md +79 -0
- docs/contributing/code-style.md +69 -0
- docs/contributing/documentation.md +43 -0
- docs/contributing/overview.md +29 -0
- docs/contributing/testing.md +29 -0
- docs/core/crcagent/async-operations.md +65 -0
- docs/core/crcagent/automatic-extraction.md +107 -0
- docs/core/crcagent/batch-prediction.md +80 -0
- docs/core/crcagent/bayesian-inference.md +60 -0
- docs/core/crcagent/causal-graph.md +92 -0
- docs/core/crcagent/counterfactuals.md +96 -0
- docs/core/crcagent/deterministic-simulation.md +78 -0
- docs/core/crcagent/dual-mode-operation.md +82 -0
- docs/core/crcagent/initialization.md +88 -0
- docs/core/crcagent/optimization.md +65 -0
- docs/core/crcagent/overview.md +63 -0
- docs/core/crcagent/time-series.md +57 -0
- docs/core/schemas/annotation.md +30 -0
- docs/core/schemas/core-schemas.md +82 -0
- docs/core/schemas/overview.md +30 -0
- docs/core/schemas/policy.md +41 -0
- docs/core/templates/base-agent.md +31 -0
- docs/core/templates/feature-mixins.md +31 -0
- docs/core/templates/overview.md +29 -0
- docs/core/templates/templates-guide.md +75 -0
- docs/core/tools/mcp-client.md +34 -0
- docs/core/tools/overview.md +24 -0
- docs/core/utils/conversation.md +27 -0
- docs/core/utils/graph-reasoner.md +29 -0
- docs/core/utils/overview.md +27 -0
- docs/core/utils/router.md +27 -0
- docs/core/utils/utilities.md +97 -0
- docs/css/custom.css +84 -0
- docs/examples/basic-usage.md +57 -0
- docs/examples/general-agent/general-agent-examples.md +50 -0
- docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
- docs/examples/image-annotation/image-annotation-examples.md +54 -0
- docs/examples/integration/integration-examples.md +58 -0
- docs/examples/overview.md +37 -0
- docs/examples/trading/trading-examples.md +46 -0
- docs/features/causal-reasoning/advanced-topics.md +101 -0
- docs/features/causal-reasoning/counterfactuals.md +43 -0
- docs/features/causal-reasoning/do-calculus.md +50 -0
- docs/features/causal-reasoning/overview.md +47 -0
- docs/features/causal-reasoning/structural-models.md +52 -0
- docs/features/hybrid-agent/advanced-components.md +55 -0
- docs/features/hybrid-agent/core-components.md +64 -0
- docs/features/hybrid-agent/overview.md +34 -0
- docs/features/image-annotation/engine.md +82 -0
- docs/features/image-annotation/features.md +113 -0
- docs/features/image-annotation/integration.md +75 -0
- docs/features/image-annotation/overview.md +53 -0
- docs/features/image-annotation/quickstart.md +73 -0
- docs/features/policy-engine/doctrine-ledger.md +105 -0
- docs/features/policy-engine/monitoring.md +44 -0
- docs/features/policy-engine/mpc-control.md +89 -0
- docs/features/policy-engine/overview.md +46 -0
- docs/getting-started/configuration.md +225 -0
- docs/getting-started/first-agent.md +164 -0
- docs/getting-started/installation.md +144 -0
- docs/getting-started/quickstart.md +137 -0
- docs/index.md +118 -0
- docs/js/mathjax.js +13 -0
- docs/lrm/discovery_proof_notes.md +25 -0
- docs/lrm/finetune_full.md +83 -0
- docs/lrm/math_appendix.md +120 -0
- docs/lrm/overview.md +32 -0
- docs/mkdocs.yml +238 -0
- docs/stylesheets/extra.css +21 -0
- docs_generated/crca_core/CounterfactualResult.md +12 -0
- docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
- docs_generated/crca_core/DraftSpec.md +13 -0
- docs_generated/crca_core/EstimateResult.md +13 -0
- docs_generated/crca_core/IdentificationResult.md +17 -0
- docs_generated/crca_core/InterventionDesignResult.md +12 -0
- docs_generated/crca_core/LockedSpec.md +15 -0
- docs_generated/crca_core/RefusalResult.md +12 -0
- docs_generated/crca_core/ValidationReport.md +9 -0
- docs_generated/crca_core/index.md +13 -0
- examples/general_agent_example.py +277 -0
- examples/general_agent_quickstart.py +202 -0
- examples/general_agent_simple.py +92 -0
- examples/hybrid_agent_auto_extraction.py +84 -0
- examples/hybrid_agent_dictionary_demo.py +104 -0
- examples/hybrid_agent_enhanced.py +179 -0
- examples/hybrid_agent_general_knowledge.py +107 -0
- examples/image_annotation_quickstart.py +328 -0
- examples/test_hybrid_fixes.py +77 -0
- image_annotation/__init__.py +27 -0
- image_annotation/annotation_engine.py +2593 -0
- install_cuda_wsl2.sh +59 -0
- install_deepspeed.sh +56 -0
- install_deepspeed_simple.sh +87 -0
- mkdocs.yml +252 -0
- ollama/Modelfile +8 -0
- prompts/__init__.py +2 -1
- prompts/default_crca.py +9 -1
- prompts/general_agent.py +227 -0
- prompts/image_annotation.py +56 -0
- pyproject.toml +17 -2
- requirements-docs.txt +10 -0
- requirements.txt +21 -2
- schemas/__init__.py +26 -1
- schemas/annotation.py +222 -0
- schemas/conversation.py +193 -0
- schemas/hybrid.py +211 -0
- schemas/reasoning.py +276 -0
- schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
- schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
- schemas_export/crca_core/DraftSpec.schema.json +635 -0
- schemas_export/crca_core/EstimateResult.schema.json +113 -0
- schemas_export/crca_core/IdentificationResult.schema.json +145 -0
- schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
- schemas_export/crca_core/LockedSpec.schema.json +646 -0
- schemas_export/crca_core/RefusalResult.schema.json +90 -0
- schemas_export/crca_core/ValidationReport.schema.json +62 -0
- scripts/build_lrm_dataset.py +80 -0
- scripts/export_crca_core_schemas.py +54 -0
- scripts/export_hf_lrm.py +37 -0
- scripts/export_ollama_gguf.py +45 -0
- scripts/generate_changelog.py +157 -0
- scripts/generate_crca_core_docs_from_schemas.py +86 -0
- scripts/run_crca_core_benchmarks.py +163 -0
- scripts/run_full_finetune.py +198 -0
- scripts/run_lrm_eval.py +31 -0
- templates/graph_management.py +29 -0
- tests/conftest.py +9 -0
- tests/test_core.py +2 -3
- tests/test_crca_core_discovery_tabular.py +15 -0
- tests/test_crca_core_estimate_dowhy.py +36 -0
- tests/test_crca_core_identify.py +18 -0
- tests/test_crca_core_intervention_design.py +36 -0
- tests/test_crca_core_linear_gaussian_scm.py +69 -0
- tests/test_crca_core_spec.py +25 -0
- tests/test_crca_core_timeseries_pcmci.py +15 -0
- tests/test_crca_llm_coauthor.py +12 -0
- tests/test_crca_llm_orchestrator.py +80 -0
- tests/test_hybrid_agent_llm_enhanced.py +556 -0
- tests/test_image_annotation_demo.py +376 -0
- tests/test_image_annotation_operational.py +408 -0
- tests/test_image_annotation_unit.py +551 -0
- tests/test_training_moe.py +13 -0
- training/__init__.py +42 -0
- training/datasets.py +140 -0
- training/deepspeed_zero2_0_5b.json +22 -0
- training/deepspeed_zero2_1_5b.json +22 -0
- training/deepspeed_zero3_0_5b.json +28 -0
- training/deepspeed_zero3_14b.json +28 -0
- training/deepspeed_zero3_h100_3gpu.json +20 -0
- training/deepspeed_zero3_offload.json +28 -0
- training/eval.py +92 -0
- training/finetune.py +516 -0
- training/public_datasets.py +89 -0
- training_data/react_train.jsonl +7473 -0
- utils/agent_discovery.py +311 -0
- utils/batch_processor.py +317 -0
- utils/conversation.py +78 -0
- utils/edit_distance.py +118 -0
- utils/formatter.py +33 -0
- utils/graph_reasoner.py +530 -0
- utils/rate_limiter.py +283 -0
- utils/router.py +2 -2
- utils/tool_discovery.py +307 -0
- webui/__init__.py +10 -0
- webui/app.py +229 -0
- webui/config.py +104 -0
- webui/static/css/style.css +332 -0
- webui/static/js/main.js +284 -0
- webui/templates/index.html +42 -0
- tests/test_crca_excel.py +0 -166
- tests/test_data_broker.py +0 -424
- tests/test_palantir.py +0 -349
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
training/datasets.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""Dataset assembly for ReAct training traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, Iterable, List, Optional, Sequence
|
|
9
|
+
|
|
10
|
+
from crca_reasoning.types import LRMPlanResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ReActExample:
|
|
15
|
+
prompt: str
|
|
16
|
+
response: str
|
|
17
|
+
tags: Dict[str, str]
|
|
18
|
+
refusal: bool = False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def plan_result_to_examples(plan: LRMPlanResult) -> List[ReActExample]:
|
|
22
|
+
examples: List[ReActExample] = []
|
|
23
|
+
for cycle in plan.cycle_traces:
|
|
24
|
+
prompt = cycle.reasoning
|
|
25
|
+
response = ""
|
|
26
|
+
if cycle.actions:
|
|
27
|
+
response += "Actions:\n"
|
|
28
|
+
for act in cycle.actions:
|
|
29
|
+
response += f"- {act.tool_name}: {act.payload}\n"
|
|
30
|
+
if cycle.critique:
|
|
31
|
+
response += f"Critique: {cycle.critique}\n"
|
|
32
|
+
examples.append(
|
|
33
|
+
ReActExample(
|
|
34
|
+
prompt=prompt,
|
|
35
|
+
response=response,
|
|
36
|
+
tags={"type": "react_cycle"},
|
|
37
|
+
refusal=any(obs.refusal is not None for obs in cycle.observations),
|
|
38
|
+
)
|
|
39
|
+
)
|
|
40
|
+
if plan.rationale_trace and plan.rationale_trace.steps:
|
|
41
|
+
examples.append(
|
|
42
|
+
ReActExample(
|
|
43
|
+
prompt="RationaleTrace",
|
|
44
|
+
response="\n".join(plan.rationale_trace.steps),
|
|
45
|
+
tags={"type": "rationale_trace"},
|
|
46
|
+
refusal=False,
|
|
47
|
+
)
|
|
48
|
+
)
|
|
49
|
+
return examples
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def load_plan_results(paths: Sequence[Path]) -> List[LRMPlanResult]:
|
|
53
|
+
"""Load LRMPlanResult objects from JSON or JSONL files."""
|
|
54
|
+
results: List[LRMPlanResult] = []
|
|
55
|
+
for path in paths:
|
|
56
|
+
if not path.exists():
|
|
57
|
+
raise FileNotFoundError(f"Trace file not found: {path}")
|
|
58
|
+
if path.suffix.lower() == ".jsonl":
|
|
59
|
+
with path.open("r", encoding="utf-8") as f:
|
|
60
|
+
for line in f:
|
|
61
|
+
line = line.strip()
|
|
62
|
+
if not line:
|
|
63
|
+
continue
|
|
64
|
+
payload = json.loads(line)
|
|
65
|
+
results.append(LRMPlanResult.model_validate(payload))
|
|
66
|
+
else:
|
|
67
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
68
|
+
if isinstance(payload, list):
|
|
69
|
+
results.extend(LRMPlanResult.model_validate(item) for item in payload)
|
|
70
|
+
else:
|
|
71
|
+
results.append(LRMPlanResult.model_validate(payload))
|
|
72
|
+
return results
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def examples_from_traces(paths: Sequence[Path]) -> List[ReActExample]:
|
|
76
|
+
"""Load plan results from trace files and convert to ReActExamples."""
|
|
77
|
+
plans = load_plan_results(paths)
|
|
78
|
+
examples: List[ReActExample] = []
|
|
79
|
+
for plan in plans:
|
|
80
|
+
examples.extend(plan_result_to_examples(plan))
|
|
81
|
+
return examples
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def normalize_text(text: str) -> str:
|
|
85
|
+
return " ".join(text.strip().split())
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def filter_examples(
|
|
89
|
+
examples: Iterable[ReActExample],
|
|
90
|
+
*,
|
|
91
|
+
min_response_len: int = 1,
|
|
92
|
+
max_prompt_len: Optional[int] = None,
|
|
93
|
+
max_response_len: Optional[int] = None,
|
|
94
|
+
) -> List[ReActExample]:
|
|
95
|
+
filtered: List[ReActExample] = []
|
|
96
|
+
for ex in examples:
|
|
97
|
+
prompt = normalize_text(ex.prompt)
|
|
98
|
+
response = normalize_text(ex.response)
|
|
99
|
+
if len(response) < min_response_len:
|
|
100
|
+
continue
|
|
101
|
+
if max_prompt_len is not None and len(prompt) > max_prompt_len:
|
|
102
|
+
continue
|
|
103
|
+
if max_response_len is not None and len(response) > max_response_len:
|
|
104
|
+
continue
|
|
105
|
+
filtered.append(
|
|
106
|
+
ReActExample(
|
|
107
|
+
prompt=prompt,
|
|
108
|
+
response=response,
|
|
109
|
+
tags=dict(ex.tags),
|
|
110
|
+
refusal=ex.refusal,
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
return filtered
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def merge_examples(
|
|
117
|
+
*,
|
|
118
|
+
internal_examples: Iterable[ReActExample],
|
|
119
|
+
public_examples: Iterable[ReActExample],
|
|
120
|
+
max_internal: Optional[int] = None,
|
|
121
|
+
max_public: Optional[int] = None,
|
|
122
|
+
) -> List[ReActExample]:
|
|
123
|
+
merged: List[ReActExample] = []
|
|
124
|
+
if max_internal is None:
|
|
125
|
+
merged.extend(list(internal_examples))
|
|
126
|
+
else:
|
|
127
|
+
merged.extend(list(internal_examples)[: max_internal])
|
|
128
|
+
if max_public is None:
|
|
129
|
+
merged.extend(list(public_examples))
|
|
130
|
+
else:
|
|
131
|
+
merged.extend(list(public_examples)[: max_public])
|
|
132
|
+
return merged
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def save_jsonl(examples: Iterable[ReActExample], path: Path) -> None:
|
|
136
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
137
|
+
with path.open("w", encoding="utf-8") as f:
|
|
138
|
+
for ex in examples:
|
|
139
|
+
f.write(json.dumps(ex.__dict__, ensure_ascii=False) + "\n")
|
|
140
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"fp16": {
|
|
3
|
+
"enabled": true
|
|
4
|
+
},
|
|
5
|
+
"bf16": {
|
|
6
|
+
"enabled": false
|
|
7
|
+
},
|
|
8
|
+
"zero_optimization": {
|
|
9
|
+
"stage": 2,
|
|
10
|
+
"offload_optimizer": {
|
|
11
|
+
"device": "cpu",
|
|
12
|
+
"pin_memory": false
|
|
13
|
+
},
|
|
14
|
+
"overlap_comm": true,
|
|
15
|
+
"contiguous_gradients": true,
|
|
16
|
+
"reduce_bucket_size": 15000000
|
|
17
|
+
},
|
|
18
|
+
"train_micro_batch_size_per_gpu": "auto",
|
|
19
|
+
"gradient_accumulation_steps": "auto",
|
|
20
|
+
"gradient_clipping": 1.0,
|
|
21
|
+
"zero_allow_untested_optimizer": true
|
|
22
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"fp16": {
|
|
3
|
+
"enabled": true
|
|
4
|
+
},
|
|
5
|
+
"bf16": {
|
|
6
|
+
"enabled": false
|
|
7
|
+
},
|
|
8
|
+
"zero_optimization": {
|
|
9
|
+
"stage": 2,
|
|
10
|
+
"offload_optimizer": {
|
|
11
|
+
"device": "cpu",
|
|
12
|
+
"pin_memory": false
|
|
13
|
+
},
|
|
14
|
+
"overlap_comm": true,
|
|
15
|
+
"contiguous_gradients": true,
|
|
16
|
+
"reduce_bucket_size": 20000000
|
|
17
|
+
},
|
|
18
|
+
"train_micro_batch_size_per_gpu": "auto",
|
|
19
|
+
"gradient_accumulation_steps": "auto",
|
|
20
|
+
"gradient_clipping": 1.0,
|
|
21
|
+
"zero_allow_untested_optimizer": true
|
|
22
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"fp16": {
|
|
3
|
+
"enabled": true
|
|
4
|
+
},
|
|
5
|
+
"bf16": {
|
|
6
|
+
"enabled": false
|
|
7
|
+
},
|
|
8
|
+
"zero_optimization": {
|
|
9
|
+
"stage": 3,
|
|
10
|
+
"offload_param": {
|
|
11
|
+
"device": "cpu",
|
|
12
|
+
"pin_memory": false
|
|
13
|
+
},
|
|
14
|
+
"offload_optimizer": {
|
|
15
|
+
"device": "cpu",
|
|
16
|
+
"pin_memory": false
|
|
17
|
+
},
|
|
18
|
+
"overlap_comm": true,
|
|
19
|
+
"contiguous_gradients": true,
|
|
20
|
+
"reduce_bucket_size": 15000000,
|
|
21
|
+
"stage3_prefetch_bucket_size": 15000000,
|
|
22
|
+
"stage3_param_persistence_threshold": 500000
|
|
23
|
+
},
|
|
24
|
+
"train_micro_batch_size_per_gpu": "auto",
|
|
25
|
+
"gradient_accumulation_steps": "auto",
|
|
26
|
+
"gradient_clipping": 1.0,
|
|
27
|
+
"zero_allow_untested_optimizer": true
|
|
28
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"fp16": {
|
|
3
|
+
"enabled": false
|
|
4
|
+
},
|
|
5
|
+
"bf16": {
|
|
6
|
+
"enabled": true
|
|
7
|
+
},
|
|
8
|
+
"zero_optimization": {
|
|
9
|
+
"stage": 3,
|
|
10
|
+
"offload_param": {
|
|
11
|
+
"device": "cpu",
|
|
12
|
+
"pin_memory": false
|
|
13
|
+
},
|
|
14
|
+
"offload_optimizer": {
|
|
15
|
+
"device": "cpu",
|
|
16
|
+
"pin_memory": false
|
|
17
|
+
},
|
|
18
|
+
"overlap_comm": true,
|
|
19
|
+
"contiguous_gradients": true,
|
|
20
|
+
"reduce_bucket_size": 100000000,
|
|
21
|
+
"stage3_prefetch_bucket_size": 100000000,
|
|
22
|
+
"stage3_param_persistence_threshold": 2000000
|
|
23
|
+
},
|
|
24
|
+
"train_micro_batch_size_per_gpu": "auto",
|
|
25
|
+
"gradient_accumulation_steps": "auto",
|
|
26
|
+
"gradient_clipping": 1.0,
|
|
27
|
+
"zero_allow_untested_optimizer": true
|
|
28
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"fp16": {
|
|
3
|
+
"enabled": false
|
|
4
|
+
},
|
|
5
|
+
"bf16": {
|
|
6
|
+
"enabled": true
|
|
7
|
+
},
|
|
8
|
+
"zero_optimization": {
|
|
9
|
+
"stage": 3,
|
|
10
|
+
"overlap_comm": true,
|
|
11
|
+
"contiguous_gradients": true,
|
|
12
|
+
"reduce_bucket_size": 50000000,
|
|
13
|
+
"stage3_prefetch_bucket_size": 50000000,
|
|
14
|
+
"stage3_param_persistence_threshold": 1000000
|
|
15
|
+
},
|
|
16
|
+
"train_micro_batch_size_per_gpu": "auto",
|
|
17
|
+
"gradient_accumulation_steps": "auto",
|
|
18
|
+
"gradient_clipping": 1.0,
|
|
19
|
+
"zero_allow_untested_optimizer": true
|
|
20
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"fp16": {
|
|
3
|
+
"enabled": false
|
|
4
|
+
},
|
|
5
|
+
"bf16": {
|
|
6
|
+
"enabled": true
|
|
7
|
+
},
|
|
8
|
+
"zero_optimization": {
|
|
9
|
+
"stage": 3,
|
|
10
|
+
"offload_param": {
|
|
11
|
+
"device": "cpu",
|
|
12
|
+
"pin_memory": false
|
|
13
|
+
},
|
|
14
|
+
"offload_optimizer": {
|
|
15
|
+
"device": "cpu",
|
|
16
|
+
"pin_memory": false
|
|
17
|
+
},
|
|
18
|
+
"overlap_comm": true,
|
|
19
|
+
"contiguous_gradients": true,
|
|
20
|
+
"reduce_bucket_size": 50000000,
|
|
21
|
+
"stage3_prefetch_bucket_size": 50000000,
|
|
22
|
+
"stage3_param_persistence_threshold": 1000000
|
|
23
|
+
},
|
|
24
|
+
"train_micro_batch_size_per_gpu": "auto",
|
|
25
|
+
"gradient_accumulation_steps": "auto",
|
|
26
|
+
"gradient_clipping": 1.0,
|
|
27
|
+
"zero_allow_untested_optimizer": true
|
|
28
|
+
}
|
training/eval.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Mixed evaluation harness for LRM."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, List
|
|
9
|
+
|
|
10
|
+
from crca_core.benchmarks.synthetic_scm import generate_latent_confounder_graph
|
|
11
|
+
from crca_core.identify import identify_effect
|
|
12
|
+
from crca_core.core.lifecycle import lock_spec
|
|
13
|
+
from crca_core.models.spec import CausalGraphSpec, DraftSpec, EdgeSpec, NodeSpec, RoleSpec
|
|
14
|
+
from crca_reasoning.types import LRMPlanResult
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class EvalConfig:
|
|
19
|
+
output_path: str = "eval_results/lrm_eval.json"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def eval_react_metrics(plans: List[LRMPlanResult]) -> Dict[str, float]:
|
|
23
|
+
if not plans:
|
|
24
|
+
return {
|
|
25
|
+
"cycle_convergence": 0.0,
|
|
26
|
+
"refusal_rate": 0.0,
|
|
27
|
+
"refusal_structured_rate": 0.0,
|
|
28
|
+
"tool_call_coverage": 0.0,
|
|
29
|
+
}
|
|
30
|
+
cycles = [len(p.cycle_traces) for p in plans]
|
|
31
|
+
refusal_count = sum(len(p.refusals) for p in plans)
|
|
32
|
+
refusal_structured = sum(
|
|
33
|
+
1
|
|
34
|
+
for p in plans
|
|
35
|
+
for r in p.refusals
|
|
36
|
+
if r.reason_codes and r.message
|
|
37
|
+
)
|
|
38
|
+
action_count = 0
|
|
39
|
+
observed_actions = 0
|
|
40
|
+
for plan in plans:
|
|
41
|
+
for cycle in plan.cycle_traces:
|
|
42
|
+
action_count += len(cycle.actions)
|
|
43
|
+
observed_actions += sum(
|
|
44
|
+
1
|
|
45
|
+
for act in cycle.actions
|
|
46
|
+
if any(obs.tool_name == act.tool_name for obs in cycle.observations)
|
|
47
|
+
)
|
|
48
|
+
tool_call_coverage = (observed_actions / float(action_count)) if action_count else 0.0
|
|
49
|
+
return {
|
|
50
|
+
"cycle_convergence": sum(1 for c in cycles if c == 1) / float(len(cycles)),
|
|
51
|
+
"refusal_rate": refusal_count / float(len(plans)),
|
|
52
|
+
"refusal_structured_rate": refusal_structured / float(max(1, refusal_count)),
|
|
53
|
+
"tool_call_coverage": tool_call_coverage,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def eval_causal_identification() -> Dict[str, str]:
|
|
58
|
+
# Identifiable chain
|
|
59
|
+
draft = DraftSpec(
|
|
60
|
+
graph=CausalGraphSpec(
|
|
61
|
+
nodes=[NodeSpec(name="X"), NodeSpec(name="Y")],
|
|
62
|
+
edges=[EdgeSpec(source="X", target="Y")],
|
|
63
|
+
),
|
|
64
|
+
roles=RoleSpec(treatments=["X"], outcomes=["Y"]),
|
|
65
|
+
)
|
|
66
|
+
locked = lock_spec(draft, approvals=["human"])
|
|
67
|
+
ident_chain = identify_effect(locked_spec=locked, treatment="X", outcome="Y")
|
|
68
|
+
|
|
69
|
+
# Latent confounding case
|
|
70
|
+
latent_graph = generate_latent_confounder_graph()
|
|
71
|
+
draft_latent = DraftSpec(
|
|
72
|
+
graph=latent_graph,
|
|
73
|
+
roles=RoleSpec(treatments=["X"], outcomes=["Y"]),
|
|
74
|
+
)
|
|
75
|
+
locked_latent = lock_spec(draft_latent, approvals=["human"])
|
|
76
|
+
ident_latent = identify_effect(locked_spec=locked_latent, treatment="X", outcome="Y")
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
"ident_chain": ident_chain.result_type,
|
|
80
|
+
"latent_case": ident_latent.result_type,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def run_eval(plans: List[LRMPlanResult], cfg: EvalConfig) -> None:
|
|
85
|
+
results = {
|
|
86
|
+
"react_metrics": eval_react_metrics(plans),
|
|
87
|
+
"causal_identification": eval_causal_identification(),
|
|
88
|
+
}
|
|
89
|
+
out_path = Path(cfg.output_path)
|
|
90
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
91
|
+
out_path.write_text(json.dumps(results, indent=2), encoding="utf-8")
|
|
92
|
+
|