crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CRCA.py +172 -7
- MODEL_CARD.md +53 -0
- PKG-INFO +8 -2
- RELEASE_NOTES.md +17 -0
- STABILITY.md +19 -0
- architecture/hybrid/consistency_engine.py +362 -0
- architecture/hybrid/conversation_manager.py +421 -0
- architecture/hybrid/explanation_generator.py +452 -0
- architecture/hybrid/few_shot_learner.py +533 -0
- architecture/hybrid/graph_compressor.py +286 -0
- architecture/hybrid/hybrid_agent.py +4398 -0
- architecture/hybrid/language_compiler.py +623 -0
- architecture/hybrid/main,py +0 -0
- architecture/hybrid/reasoning_tracker.py +322 -0
- architecture/hybrid/self_verifier.py +524 -0
- architecture/hybrid/task_decomposer.py +567 -0
- architecture/hybrid/text_corrector.py +341 -0
- benchmark_results/crca_core_benchmarks.json +178 -0
- branches/crca_sd/crca_sd_realtime.py +6 -2
- branches/general_agent/__init__.py +102 -0
- branches/general_agent/general_agent.py +1400 -0
- branches/general_agent/personality.py +169 -0
- branches/general_agent/utils/__init__.py +19 -0
- branches/general_agent/utils/prompt_builder.py +170 -0
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
- crca_core/__init__.py +35 -0
- crca_core/benchmarks/__init__.py +14 -0
- crca_core/benchmarks/synthetic_scm.py +103 -0
- crca_core/core/__init__.py +23 -0
- crca_core/core/api.py +120 -0
- crca_core/core/estimate.py +208 -0
- crca_core/core/godclass.py +72 -0
- crca_core/core/intervention_design.py +174 -0
- crca_core/core/lifecycle.py +48 -0
- crca_core/discovery/__init__.py +9 -0
- crca_core/discovery/tabular.py +193 -0
- crca_core/identify/__init__.py +171 -0
- crca_core/identify/backdoor.py +39 -0
- crca_core/identify/frontdoor.py +48 -0
- crca_core/identify/graph.py +106 -0
- crca_core/identify/id_algorithm.py +43 -0
- crca_core/identify/iv.py +48 -0
- crca_core/models/__init__.py +67 -0
- crca_core/models/provenance.py +56 -0
- crca_core/models/refusal.py +39 -0
- crca_core/models/result.py +83 -0
- crca_core/models/spec.py +151 -0
- crca_core/models/validation.py +68 -0
- crca_core/scm/__init__.py +9 -0
- crca_core/scm/linear_gaussian.py +198 -0
- crca_core/timeseries/__init__.py +6 -0
- crca_core/timeseries/pcmci.py +181 -0
- crca_llm/__init__.py +12 -0
- crca_llm/client.py +85 -0
- crca_llm/coauthor.py +118 -0
- crca_llm/orchestrator.py +289 -0
- crca_llm/types.py +21 -0
- crca_reasoning/__init__.py +16 -0
- crca_reasoning/critique.py +54 -0
- crca_reasoning/godclass.py +206 -0
- crca_reasoning/memory.py +24 -0
- crca_reasoning/rationale.py +10 -0
- crca_reasoning/react_controller.py +81 -0
- crca_reasoning/tool_router.py +97 -0
- crca_reasoning/types.py +40 -0
- crca_sd/__init__.py +15 -0
- crca_sd/crca_sd_core.py +2 -0
- crca_sd/crca_sd_governance.py +2 -0
- crca_sd/crca_sd_mpc.py +2 -0
- crca_sd/crca_sd_realtime.py +2 -0
- crca_sd/crca_sd_tui.py +2 -0
- cuda-keyring_1.1-1_all.deb +0 -0
- cuda-keyring_1.1-1_all.deb.1 +0 -0
- docs/IMAGE_ANNOTATION_USAGE.md +539 -0
- docs/INSTALL_DEEPSPEED.md +125 -0
- docs/api/branches/crca-cg.md +19 -0
- docs/api/branches/crca-q.md +27 -0
- docs/api/branches/crca-sd.md +37 -0
- docs/api/branches/general-agent.md +24 -0
- docs/api/branches/overview.md +19 -0
- docs/api/crca/agent-methods.md +62 -0
- docs/api/crca/operations.md +79 -0
- docs/api/crca/overview.md +32 -0
- docs/api/image-annotation/engine.md +52 -0
- docs/api/image-annotation/overview.md +17 -0
- docs/api/schemas/annotation.md +34 -0
- docs/api/schemas/core-schemas.md +82 -0
- docs/api/schemas/overview.md +32 -0
- docs/api/schemas/policy.md +30 -0
- docs/api/utils/conversation.md +22 -0
- docs/api/utils/graph-reasoner.md +32 -0
- docs/api/utils/overview.md +21 -0
- docs/api/utils/router.md +19 -0
- docs/api/utils/utilities.md +97 -0
- docs/architecture/causal-graphs.md +41 -0
- docs/architecture/data-flow.md +29 -0
- docs/architecture/design-principles.md +33 -0
- docs/architecture/hybrid-agent/components.md +38 -0
- docs/architecture/hybrid-agent/consistency.md +26 -0
- docs/architecture/hybrid-agent/overview.md +44 -0
- docs/architecture/hybrid-agent/reasoning.md +22 -0
- docs/architecture/llm-integration.md +26 -0
- docs/architecture/modular-structure.md +37 -0
- docs/architecture/overview.md +69 -0
- docs/architecture/policy-engine-arch.md +29 -0
- docs/branches/crca-cg/corposwarm.md +39 -0
- docs/branches/crca-cg/esg-scoring.md +30 -0
- docs/branches/crca-cg/multi-agent.md +35 -0
- docs/branches/crca-cg/overview.md +40 -0
- docs/branches/crca-q/alternative-data.md +55 -0
- docs/branches/crca-q/architecture.md +71 -0
- docs/branches/crca-q/backtesting.md +45 -0
- docs/branches/crca-q/causal-engine.md +33 -0
- docs/branches/crca-q/execution.md +39 -0
- docs/branches/crca-q/market-data.md +60 -0
- docs/branches/crca-q/overview.md +58 -0
- docs/branches/crca-q/philosophy.md +60 -0
- docs/branches/crca-q/portfolio-optimization.md +66 -0
- docs/branches/crca-q/risk-management.md +102 -0
- docs/branches/crca-q/setup.md +65 -0
- docs/branches/crca-q/signal-generation.md +61 -0
- docs/branches/crca-q/signal-validation.md +43 -0
- docs/branches/crca-sd/core.md +84 -0
- docs/branches/crca-sd/governance.md +53 -0
- docs/branches/crca-sd/mpc-solver.md +65 -0
- docs/branches/crca-sd/overview.md +59 -0
- docs/branches/crca-sd/realtime.md +28 -0
- docs/branches/crca-sd/tui.md +20 -0
- docs/branches/general-agent/overview.md +37 -0
- docs/branches/general-agent/personality.md +36 -0
- docs/branches/general-agent/prompt-builder.md +30 -0
- docs/changelog/index.md +79 -0
- docs/contributing/code-style.md +69 -0
- docs/contributing/documentation.md +43 -0
- docs/contributing/overview.md +29 -0
- docs/contributing/testing.md +29 -0
- docs/core/crcagent/async-operations.md +65 -0
- docs/core/crcagent/automatic-extraction.md +107 -0
- docs/core/crcagent/batch-prediction.md +80 -0
- docs/core/crcagent/bayesian-inference.md +60 -0
- docs/core/crcagent/causal-graph.md +92 -0
- docs/core/crcagent/counterfactuals.md +96 -0
- docs/core/crcagent/deterministic-simulation.md +78 -0
- docs/core/crcagent/dual-mode-operation.md +82 -0
- docs/core/crcagent/initialization.md +88 -0
- docs/core/crcagent/optimization.md +65 -0
- docs/core/crcagent/overview.md +63 -0
- docs/core/crcagent/time-series.md +57 -0
- docs/core/schemas/annotation.md +30 -0
- docs/core/schemas/core-schemas.md +82 -0
- docs/core/schemas/overview.md +30 -0
- docs/core/schemas/policy.md +41 -0
- docs/core/templates/base-agent.md +31 -0
- docs/core/templates/feature-mixins.md +31 -0
- docs/core/templates/overview.md +29 -0
- docs/core/templates/templates-guide.md +75 -0
- docs/core/tools/mcp-client.md +34 -0
- docs/core/tools/overview.md +24 -0
- docs/core/utils/conversation.md +27 -0
- docs/core/utils/graph-reasoner.md +29 -0
- docs/core/utils/overview.md +27 -0
- docs/core/utils/router.md +27 -0
- docs/core/utils/utilities.md +97 -0
- docs/css/custom.css +84 -0
- docs/examples/basic-usage.md +57 -0
- docs/examples/general-agent/general-agent-examples.md +50 -0
- docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
- docs/examples/image-annotation/image-annotation-examples.md +54 -0
- docs/examples/integration/integration-examples.md +58 -0
- docs/examples/overview.md +37 -0
- docs/examples/trading/trading-examples.md +46 -0
- docs/features/causal-reasoning/advanced-topics.md +101 -0
- docs/features/causal-reasoning/counterfactuals.md +43 -0
- docs/features/causal-reasoning/do-calculus.md +50 -0
- docs/features/causal-reasoning/overview.md +47 -0
- docs/features/causal-reasoning/structural-models.md +52 -0
- docs/features/hybrid-agent/advanced-components.md +55 -0
- docs/features/hybrid-agent/core-components.md +64 -0
- docs/features/hybrid-agent/overview.md +34 -0
- docs/features/image-annotation/engine.md +82 -0
- docs/features/image-annotation/features.md +113 -0
- docs/features/image-annotation/integration.md +75 -0
- docs/features/image-annotation/overview.md +53 -0
- docs/features/image-annotation/quickstart.md +73 -0
- docs/features/policy-engine/doctrine-ledger.md +105 -0
- docs/features/policy-engine/monitoring.md +44 -0
- docs/features/policy-engine/mpc-control.md +89 -0
- docs/features/policy-engine/overview.md +46 -0
- docs/getting-started/configuration.md +225 -0
- docs/getting-started/first-agent.md +164 -0
- docs/getting-started/installation.md +144 -0
- docs/getting-started/quickstart.md +137 -0
- docs/index.md +118 -0
- docs/js/mathjax.js +13 -0
- docs/lrm/discovery_proof_notes.md +25 -0
- docs/lrm/finetune_full.md +83 -0
- docs/lrm/math_appendix.md +120 -0
- docs/lrm/overview.md +32 -0
- docs/mkdocs.yml +238 -0
- docs/stylesheets/extra.css +21 -0
- docs_generated/crca_core/CounterfactualResult.md +12 -0
- docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
- docs_generated/crca_core/DraftSpec.md +13 -0
- docs_generated/crca_core/EstimateResult.md +13 -0
- docs_generated/crca_core/IdentificationResult.md +17 -0
- docs_generated/crca_core/InterventionDesignResult.md +12 -0
- docs_generated/crca_core/LockedSpec.md +15 -0
- docs_generated/crca_core/RefusalResult.md +12 -0
- docs_generated/crca_core/ValidationReport.md +9 -0
- docs_generated/crca_core/index.md +13 -0
- examples/general_agent_example.py +277 -0
- examples/general_agent_quickstart.py +202 -0
- examples/general_agent_simple.py +92 -0
- examples/hybrid_agent_auto_extraction.py +84 -0
- examples/hybrid_agent_dictionary_demo.py +104 -0
- examples/hybrid_agent_enhanced.py +179 -0
- examples/hybrid_agent_general_knowledge.py +107 -0
- examples/image_annotation_quickstart.py +328 -0
- examples/test_hybrid_fixes.py +77 -0
- image_annotation/__init__.py +27 -0
- image_annotation/annotation_engine.py +2593 -0
- install_cuda_wsl2.sh +59 -0
- install_deepspeed.sh +56 -0
- install_deepspeed_simple.sh +87 -0
- mkdocs.yml +252 -0
- ollama/Modelfile +8 -0
- prompts/__init__.py +2 -1
- prompts/default_crca.py +9 -1
- prompts/general_agent.py +227 -0
- prompts/image_annotation.py +56 -0
- pyproject.toml +17 -2
- requirements-docs.txt +10 -0
- requirements.txt +21 -2
- schemas/__init__.py +26 -1
- schemas/annotation.py +222 -0
- schemas/conversation.py +193 -0
- schemas/hybrid.py +211 -0
- schemas/reasoning.py +276 -0
- schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
- schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
- schemas_export/crca_core/DraftSpec.schema.json +635 -0
- schemas_export/crca_core/EstimateResult.schema.json +113 -0
- schemas_export/crca_core/IdentificationResult.schema.json +145 -0
- schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
- schemas_export/crca_core/LockedSpec.schema.json +646 -0
- schemas_export/crca_core/RefusalResult.schema.json +90 -0
- schemas_export/crca_core/ValidationReport.schema.json +62 -0
- scripts/build_lrm_dataset.py +80 -0
- scripts/export_crca_core_schemas.py +54 -0
- scripts/export_hf_lrm.py +37 -0
- scripts/export_ollama_gguf.py +45 -0
- scripts/generate_changelog.py +157 -0
- scripts/generate_crca_core_docs_from_schemas.py +86 -0
- scripts/run_crca_core_benchmarks.py +163 -0
- scripts/run_full_finetune.py +198 -0
- scripts/run_lrm_eval.py +31 -0
- templates/graph_management.py +29 -0
- tests/conftest.py +9 -0
- tests/test_core.py +2 -3
- tests/test_crca_core_discovery_tabular.py +15 -0
- tests/test_crca_core_estimate_dowhy.py +36 -0
- tests/test_crca_core_identify.py +18 -0
- tests/test_crca_core_intervention_design.py +36 -0
- tests/test_crca_core_linear_gaussian_scm.py +69 -0
- tests/test_crca_core_spec.py +25 -0
- tests/test_crca_core_timeseries_pcmci.py +15 -0
- tests/test_crca_llm_coauthor.py +12 -0
- tests/test_crca_llm_orchestrator.py +80 -0
- tests/test_hybrid_agent_llm_enhanced.py +556 -0
- tests/test_image_annotation_demo.py +376 -0
- tests/test_image_annotation_operational.py +408 -0
- tests/test_image_annotation_unit.py +551 -0
- tests/test_training_moe.py +13 -0
- training/__init__.py +42 -0
- training/datasets.py +140 -0
- training/deepspeed_zero2_0_5b.json +22 -0
- training/deepspeed_zero2_1_5b.json +22 -0
- training/deepspeed_zero3_0_5b.json +28 -0
- training/deepspeed_zero3_14b.json +28 -0
- training/deepspeed_zero3_h100_3gpu.json +20 -0
- training/deepspeed_zero3_offload.json +28 -0
- training/eval.py +92 -0
- training/finetune.py +516 -0
- training/public_datasets.py +89 -0
- training_data/react_train.jsonl +7473 -0
- utils/agent_discovery.py +311 -0
- utils/batch_processor.py +317 -0
- utils/conversation.py +78 -0
- utils/edit_distance.py +118 -0
- utils/formatter.py +33 -0
- utils/graph_reasoner.py +530 -0
- utils/rate_limiter.py +283 -0
- utils/router.py +2 -2
- utils/tool_discovery.py +307 -0
- webui/__init__.py +10 -0
- webui/app.py +229 -0
- webui/config.py +104 -0
- webui/static/css/style.css +332 -0
- webui/static/js/main.js +284 -0
- webui/templates/index.html +42 -0
- tests/test_crca_excel.py +0 -166
- tests/test_data_broker.py +0 -424
- tests/test_palantir.py +0 -349
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Async Operations
|
|
2
|
+
|
|
3
|
+
CRCAAgent supports asynchronous operations for concurrent causal analysis.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Async operations allow non-blocking causal inference, enabling concurrent processing of multiple tasks.
|
|
8
|
+
|
|
9
|
+
## Basic Usage
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
import asyncio
|
|
13
|
+
from CRCA import CRCAAgent
|
|
14
|
+
|
|
15
|
+
agent = CRCAAgent(model_name="gpt-4o-mini")
|
|
16
|
+
|
|
17
|
+
async def analyze():
|
|
18
|
+
result = await agent.run_async("Analyze X -> Y")
|
|
19
|
+
return result
|
|
20
|
+
|
|
21
|
+
# Run async
|
|
22
|
+
result = asyncio.run(analyze())
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Concurrent Analysis
|
|
26
|
+
|
|
27
|
+
Process multiple tasks concurrently:
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
async def analyze_multiple():
|
|
31
|
+
tasks = [
|
|
32
|
+
agent.run_async("Analyze education -> income"),
|
|
33
|
+
agent.run_async("Analyze experience -> income"),
|
|
34
|
+
agent.run_async("Analyze location -> income")
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
results = await asyncio.gather(*tasks)
|
|
38
|
+
return results
|
|
39
|
+
|
|
40
|
+
results = asyncio.run(analyze_multiple())
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Async Batch Processing
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
async def batch_async():
|
|
47
|
+
scenarios = [...]
|
|
48
|
+
results = await agent.batch_predict_async(
|
|
49
|
+
scenarios=scenarios,
|
|
50
|
+
target="income"
|
|
51
|
+
)
|
|
52
|
+
return results
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Performance Benefits
|
|
56
|
+
|
|
57
|
+
Async operations improve throughput:
|
|
58
|
+
|
|
59
|
+
- **Sequential**: Total time = $\sum_{i=1}^n t_i$
|
|
60
|
+
- **Async**: Total time = $\max_{i=1}^n t_i$ (for I/O bound operations)
|
|
61
|
+
|
|
62
|
+
## Next Steps
|
|
63
|
+
|
|
64
|
+
- [Batch Prediction](batch-prediction.md) - Batch processing
|
|
65
|
+
- [Optimization](optimization.md) - Optimization methods
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# Automatic Variable Extraction
|
|
2
|
+
|
|
3
|
+
CRCAAgent can automatically extract variables and causal relationships from natural language tasks.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The automatic extraction system uses LLM-based analysis to identify:
|
|
8
|
+
- Variables of interest
|
|
9
|
+
- Causal relationships between variables
|
|
10
|
+
- Confounders and mediators
|
|
11
|
+
- Structural equations
|
|
12
|
+
|
|
13
|
+
## Mathematical Foundation
|
|
14
|
+
|
|
15
|
+
Given a natural language task $T$, the extraction process identifies:
|
|
16
|
+
|
|
17
|
+
1. **Variable Set**: $V = \{V_1, V_2, \ldots, V_n\}$
|
|
18
|
+
2. **Causal Graph**: $G = (V, E)$ where $E$ are causal edges
|
|
19
|
+
3. **Structural Equations**: $F = \{f_1, f_2, \ldots, f_n\}$
|
|
20
|
+
|
|
21
|
+
For each variable $V_i$, the system extracts:
|
|
22
|
+
|
|
23
|
+
$$V_i = f_i(Pa(V_i), U_i)$$
|
|
24
|
+
|
|
25
|
+
Where $Pa(V_i)$ are the parents identified from the text.
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
### Basic Extraction
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from CRCA import CRCAAgent
|
|
33
|
+
|
|
34
|
+
agent = CRCAAgent(
|
|
35
|
+
model_name="gpt-4o-mini",
|
|
36
|
+
enable_automatic_extraction=True
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
task = """
|
|
40
|
+
Analyze how increasing the minimum wage affects employment rates.
|
|
41
|
+
Consider inflation, business costs, and consumer spending.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
result = agent.run(task)
|
|
45
|
+
# Variables and relationships are automatically extracted
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### View Extracted Variables
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
# After running a task
|
|
52
|
+
graph = agent.causal_graph
|
|
53
|
+
variables = graph.get_variables()
|
|
54
|
+
print("Extracted variables:", variables)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### View Causal Relationships
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
relationships = graph.get_relationships()
|
|
61
|
+
for parent, child, strength in relationships:
|
|
62
|
+
print(f"{parent} -> {child}: {strength:.2f}")
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Extraction Process
|
|
66
|
+
|
|
67
|
+
The extraction process follows these steps:
|
|
68
|
+
|
|
69
|
+
1. **Text Analysis**: Parse natural language to identify entities
|
|
70
|
+
2. **Variable Identification**: Extract variable names and types
|
|
71
|
+
3. **Relationship Detection**: Identify causal relationships
|
|
72
|
+
4. **Graph Construction**: Build causal DAG
|
|
73
|
+
5. **Strength Estimation**: Estimate relationship strengths
|
|
74
|
+
|
|
75
|
+
## Example
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
task = """
|
|
79
|
+
Study the effect of education on income.
|
|
80
|
+
Education affects experience, which in turn affects income.
|
|
81
|
+
Geographic location also influences income.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
result = agent.run(task)
|
|
85
|
+
|
|
86
|
+
# Extracted structure:
|
|
87
|
+
# education -> experience -> income
|
|
88
|
+
# location -> income
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Configuration
|
|
92
|
+
|
|
93
|
+
Control extraction behavior:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
agent = CRCAAgent(
|
|
97
|
+
model_name="gpt-4o-mini",
|
|
98
|
+
enable_automatic_extraction=True,
|
|
99
|
+
extraction_confidence_threshold=0.5,
|
|
100
|
+
max_extracted_variables=20
|
|
101
|
+
)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Next Steps
|
|
105
|
+
|
|
106
|
+
- [Causal Graph](causal-graph.md) - Work with extracted graphs
|
|
107
|
+
- [Dual-Mode Operation](dual-mode-operation.md) - Combine with deterministic mode
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Batch Prediction
|
|
2
|
+
|
|
3
|
+
CRCAAgent supports batch processing for multiple scenarios simultaneously.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Batch prediction allows processing multiple causal queries in parallel, improving efficiency for large-scale analyses.
|
|
8
|
+
|
|
9
|
+
## Mathematical Foundation
|
|
10
|
+
|
|
11
|
+
For a batch of $n$ scenarios, we compute:
|
|
12
|
+
|
|
13
|
+
$$\{E[Y_i | do(X_i=x_i)]\}_{i=1}^n$$
|
|
14
|
+
|
|
15
|
+
Where each scenario $i$ has intervention $X_i=x_i$ and target $Y_i$.
|
|
16
|
+
|
|
17
|
+
## Basic Usage
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
# Batch of scenarios
|
|
21
|
+
scenarios = [
|
|
22
|
+
{"education": 12, "experience": 2},
|
|
23
|
+
{"education": 16, "experience": 5},
|
|
24
|
+
{"education": 20, "experience": 10}
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
results = agent.batch_predict(
|
|
28
|
+
scenarios=scenarios,
|
|
29
|
+
target="income"
|
|
30
|
+
)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Vectorized Operations
|
|
34
|
+
|
|
35
|
+
The agent uses vectorized operations for efficiency:
|
|
36
|
+
|
|
37
|
+
$$\mathbf{Y} = f(\mathbf{X}, \mathbf{Z}, \mathbf{U})$$
|
|
38
|
+
|
|
39
|
+
Where bold symbols represent vectors/matrices.
|
|
40
|
+
|
|
41
|
+
## Parallel Processing
|
|
42
|
+
|
|
43
|
+
Enable parallel processing:
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
agent = CRCAAgent(
|
|
47
|
+
model_name="gpt-4o-mini",
|
|
48
|
+
parallel_workers=4
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
results = agent.batch_predict(scenarios, target="income")
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Performance
|
|
55
|
+
|
|
56
|
+
Batch processing is significantly faster than sequential processing:
|
|
57
|
+
|
|
58
|
+
- **Sequential**: $O(n \cdot t)$ where $t$ is time per scenario
|
|
59
|
+
- **Batch**: $O(n \cdot t / p)$ where $p$ is number of workers
|
|
60
|
+
|
|
61
|
+
## Example
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
# Generate batch of counterfactuals
|
|
65
|
+
interventions = [
|
|
66
|
+
{"education": "college"},
|
|
67
|
+
{"education": "masters"},
|
|
68
|
+
{"education": "phd"}
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
counterfactuals = agent.batch_generate_counterfactuals(
|
|
72
|
+
interventions=interventions,
|
|
73
|
+
outcome="income"
|
|
74
|
+
)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Next Steps
|
|
78
|
+
|
|
79
|
+
- [Async Operations](async-operations.md) - Asynchronous batch processing
|
|
80
|
+
- [Optimization](optimization.md) - Optimize batch operations
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Bayesian Inference
|
|
2
|
+
|
|
3
|
+
CRCAAgent supports Bayesian inference for causal effects with uncertainty quantification.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Bayesian inference provides posterior distributions over causal effects, enabling uncertainty quantification through credible intervals.
|
|
8
|
+
|
|
9
|
+
## Mathematical Foundation
|
|
10
|
+
|
|
11
|
+
### Posterior Distribution
|
|
12
|
+
|
|
13
|
+
Given data $D$ and prior $P(\theta)$, the posterior is:
|
|
14
|
+
|
|
15
|
+
$$P(\theta | D) = \frac{P(D | \theta) P(\theta)}{P(D)}$$
|
|
16
|
+
|
|
17
|
+
### Causal Effect Posterior
|
|
18
|
+
|
|
19
|
+
For causal effect $\tau = E[Y | do(X=1)] - E[Y | do(X=0)]$:
|
|
20
|
+
|
|
21
|
+
$$P(\tau | D) = \int P(\tau | \theta) P(\theta | D) d\theta$$
|
|
22
|
+
|
|
23
|
+
### Credible Intervals
|
|
24
|
+
|
|
25
|
+
A $(1-\alpha)$ credible interval $[a, b]$ satisfies:
|
|
26
|
+
|
|
27
|
+
$$P(a \leq \tau \leq b | D) = 1 - \alpha$$
|
|
28
|
+
|
|
29
|
+
## Usage
|
|
30
|
+
|
|
31
|
+
### Bayesian Causal Inference
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
# Compute posterior distribution
|
|
35
|
+
posterior = agent.bayesian_causal_inference(
|
|
36
|
+
treatment="X",
|
|
37
|
+
outcome="Y",
|
|
38
|
+
data=dataframe
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Get credible interval
|
|
42
|
+
ci = posterior.credible_interval(alpha=0.05)
|
|
43
|
+
print(f"95% CI: [{ci.lower}, {ci.upper}]")
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Bootstrap Sampling
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
# Bootstrap for uncertainty
|
|
50
|
+
samples = agent.bootstrap_causal_effect(
|
|
51
|
+
treatment="X",
|
|
52
|
+
outcome="Y",
|
|
53
|
+
n_samples=1000
|
|
54
|
+
)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Next Steps
|
|
58
|
+
|
|
59
|
+
- [Core Modules Overview](../overview.md) - Return to core modules
|
|
60
|
+
- [Utils](../utils/overview.md) - Utility functions
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Causal Graph Management
|
|
2
|
+
|
|
3
|
+
CRCAAgent manages causal relationships through directed acyclic graphs (DAGs).
|
|
4
|
+
|
|
5
|
+
## Graph Structure
|
|
6
|
+
|
|
7
|
+
A causal graph $G = (V, E)$ consists of:
|
|
8
|
+
- **Vertices $V$**: Variables in the system
|
|
9
|
+
- **Edges $E$**: Causal relationships $(X \to Y)$
|
|
10
|
+
|
|
11
|
+
The graph must be acyclic: no directed paths from a variable to itself.
|
|
12
|
+
|
|
13
|
+
## Mathematical Foundation
|
|
14
|
+
|
|
15
|
+
For a causal graph, each variable $V_i$ has structural equation:
|
|
16
|
+
|
|
17
|
+
$$V_i = f_i(Pa(V_i), U_i)$$
|
|
18
|
+
|
|
19
|
+
Where:
|
|
20
|
+
- $Pa(V_i)$: Parents of $V_i$ in the graph
|
|
21
|
+
- $U_i$: Exogenous variable for $V_i$
|
|
22
|
+
|
|
23
|
+
## Building Graphs
|
|
24
|
+
|
|
25
|
+
### Automatic Construction
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
agent = CRCAAgent(model_name="gpt-4o-mini")
|
|
29
|
+
result = agent.run("Analyze X -> Y -> Z")
|
|
30
|
+
# Graph is automatically constructed
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Manual Construction
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
# Add variables
|
|
37
|
+
agent.add_variable("education")
|
|
38
|
+
agent.add_variable("income")
|
|
39
|
+
agent.add_variable("experience")
|
|
40
|
+
|
|
41
|
+
# Add causal relationships
|
|
42
|
+
agent.add_causal_relationship("education", "income", strength=0.6)
|
|
43
|
+
agent.add_causal_relationship("education", "experience", strength=0.4)
|
|
44
|
+
agent.add_causal_relationship("experience", "income", strength=0.5)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Graph Operations
|
|
48
|
+
|
|
49
|
+
### Get Variables
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
variables = agent.causal_graph.get_variables()
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Get Relationships
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
relationships = agent.causal_graph.get_relationships()
|
|
59
|
+
for parent, child, strength in relationships:
|
|
60
|
+
print(f"{parent} -> {child}: {strength}")
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Check Paths
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
# Check if path exists
|
|
67
|
+
has_path = agent.causal_graph.has_path("X", "Y")
|
|
68
|
+
|
|
69
|
+
# Get all paths
|
|
70
|
+
paths = agent.causal_graph.get_paths("X", "Y")
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Causal Identification
|
|
74
|
+
|
|
75
|
+
The agent can identify causal effects using do-calculus rules:
|
|
76
|
+
|
|
77
|
+
$$P(Y | do(X=x)) = \sum_{z} P(Y | X=x, Z=z) P(Z=z)$$
|
|
78
|
+
|
|
79
|
+
Where $Z$ is a valid adjustment set.
|
|
80
|
+
|
|
81
|
+
## Graph Visualization
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
# Export graph for visualization
|
|
85
|
+
graph_data = agent.causal_graph.export()
|
|
86
|
+
# Use with graph visualization libraries
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Next Steps
|
|
90
|
+
|
|
91
|
+
- [Deterministic Simulation](deterministic-simulation.md) - Use graphs for simulation
|
|
92
|
+
- [Counterfactuals](counterfactuals.md) - Generate counterfactuals from graphs
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Counterfactual Generation
|
|
2
|
+
|
|
3
|
+
CRCAAgent generates counterfactual scenarios to answer "what-if" questions.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Counterfactuals answer questions of the form: "What would have happened if $X$ had been $x'$ instead of $x$, given that we observed $X=x$ and $Y=y$?"
|
|
8
|
+
|
|
9
|
+
## Mathematical Foundation
|
|
10
|
+
|
|
11
|
+
A counterfactual query is:
|
|
12
|
+
|
|
13
|
+
$$P(Y_{x'} | X=x, Y=y)$$
|
|
14
|
+
|
|
15
|
+
Where:
|
|
16
|
+
- $Y_{x'}$: Potential outcome under intervention $do(X=x')$
|
|
17
|
+
- $X=x, Y=y$: Observed values
|
|
18
|
+
|
|
19
|
+
This is computed using the three-step process:
|
|
20
|
+
|
|
21
|
+
1. **Abduction**: Infer exogenous variables from observations
|
|
22
|
+
2. **Action**: Set $X = x'$
|
|
23
|
+
3. **Prediction**: Compute $Y$ using structural equations
|
|
24
|
+
|
|
25
|
+
## Basic Usage
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
# Generate counterfactuals
|
|
29
|
+
counterfactuals = agent.generate_counterfactuals(
|
|
30
|
+
intervention={"education": "college"},
|
|
31
|
+
outcome="income",
|
|
32
|
+
observed={"education": "high_school", "income": 30000}
|
|
33
|
+
)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Counterfactual Types
|
|
37
|
+
|
|
38
|
+
### Simple Counterfactuals
|
|
39
|
+
|
|
40
|
+
Single intervention:
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
cf = agent.generate_counterfactuals(
|
|
44
|
+
intervention={"X": x_new},
|
|
45
|
+
outcome="Y"
|
|
46
|
+
)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Multiple Interventions
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
cf = agent.generate_counterfactuals(
|
|
53
|
+
intervention={"X1": x1_new, "X2": x2_new},
|
|
54
|
+
outcome="Y"
|
|
55
|
+
)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Conditional Counterfactuals
|
|
59
|
+
|
|
60
|
+
Given observed values:
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
cf = agent.generate_counterfactuals(
|
|
64
|
+
intervention={"X": x_new},
|
|
65
|
+
outcome="Y",
|
|
66
|
+
observed={"X": x_old, "Y": y_old}
|
|
67
|
+
)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Counterfactual Computation
|
|
71
|
+
|
|
72
|
+
For a structural equation $Y = f(X, Z, U_Y)$:
|
|
73
|
+
|
|
74
|
+
1. **Abduction**: $U_Y = f^{-1}(X=x, Z=z, Y=y)$
|
|
75
|
+
2. **Action**: Set $X = x'$
|
|
76
|
+
3. **Prediction**: $Y_{x'} = f(X=x', Z=z, U_Y)$
|
|
77
|
+
|
|
78
|
+
## Example
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
# What if education was college instead of high school?
|
|
82
|
+
counterfactuals = agent.generate_counterfactuals(
|
|
83
|
+
intervention={"education": "college"},
|
|
84
|
+
outcome="income",
|
|
85
|
+
observed={"education": "high_school", "income": 30000}
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
for cf in counterfactuals:
|
|
89
|
+
print(f"Expected income: ${cf.expected_outcome:,.2f}")
|
|
90
|
+
print(f"Confidence: {cf.confidence:.2f}")
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Next Steps
|
|
94
|
+
|
|
95
|
+
- [Deterministic Simulation](deterministic-simulation.md) - Understand simulation
|
|
96
|
+
- [Batch Prediction](batch-prediction.md) - Process multiple scenarios
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Deterministic Simulation
|
|
2
|
+
|
|
3
|
+
CRCAAgent performs deterministic causal simulations using structural equations.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Deterministic simulation uses known or estimated structural equations to compute causal effects mathematically, without relying on LLM inference.
|
|
8
|
+
|
|
9
|
+
## Mathematical Foundation
|
|
10
|
+
|
|
11
|
+
Given structural equations:
|
|
12
|
+
|
|
13
|
+
$$V_i = f_i(Pa(V_i), U_i)$$
|
|
14
|
+
|
|
15
|
+
For $i = 1, \ldots, n$, we can simulate the system by:
|
|
16
|
+
|
|
17
|
+
1. Setting exogenous variables $U_i$
|
|
18
|
+
2. Computing variables in topological order
|
|
19
|
+
3. Propagating values through the graph
|
|
20
|
+
|
|
21
|
+
## Basic Simulation
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
# Simulate with specific variable values
|
|
25
|
+
result = agent.simulate(
|
|
26
|
+
variables={
|
|
27
|
+
"education": 16,
|
|
28
|
+
"experience": 5
|
|
29
|
+
},
|
|
30
|
+
target="income"
|
|
31
|
+
)
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Simulation Methods
|
|
35
|
+
|
|
36
|
+
### Euler Method
|
|
37
|
+
|
|
38
|
+
For differential equation systems:
|
|
39
|
+
|
|
40
|
+
$$V_i(t+\Delta t) = V_i(t) + f_i(Pa(V_i(t)), U_i) \cdot \Delta t$$
|
|
41
|
+
|
|
42
|
+
### Runge-Kutta 4th Order
|
|
43
|
+
|
|
44
|
+
More accurate for complex systems:
|
|
45
|
+
|
|
46
|
+
$$k_1 = f(t, V(t))$$
|
|
47
|
+
$$k_2 = f(t + \Delta t/2, V(t) + k_1 \Delta t/2)$$
|
|
48
|
+
$$k_3 = f(t + \Delta t/2, V(t) + k_2 \Delta t/2)$$
|
|
49
|
+
$$k_4 = f(t + \Delta t, V(t) + k_3 \Delta t)$$
|
|
50
|
+
$$V(t+\Delta t) = V(t) + \frac{\Delta t}{6}(k_1 + 2k_2 + 2k_3 + k_4)$$
|
|
51
|
+
|
|
52
|
+
## Configuration
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
agent = CRCAAgent(
|
|
56
|
+
model_name="gpt-4o-mini",
|
|
57
|
+
simulation_method="rk4", # or "euler"
|
|
58
|
+
simulation_steps=100,
|
|
59
|
+
simulation_dt=0.01
|
|
60
|
+
)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Time-Series Simulation
|
|
64
|
+
|
|
65
|
+
Simulate over time:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
time_series = agent.simulate_time_series(
|
|
69
|
+
initial_conditions={"X": 0, "Y": 0},
|
|
70
|
+
time_steps=100,
|
|
71
|
+
dt=0.1
|
|
72
|
+
)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Next Steps
|
|
76
|
+
|
|
77
|
+
- [Counterfactuals](counterfactuals.md) - Generate counterfactual scenarios
|
|
78
|
+
- [Optimization](optimization.md) - Optimize interventions
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Dual-Mode Operation
|
|
2
|
+
|
|
3
|
+
CRCAAgent supports two operational modes: LLM-based analysis and deterministic simulation.
|
|
4
|
+
|
|
5
|
+
## Modes
|
|
6
|
+
|
|
7
|
+
### LLM Mode
|
|
8
|
+
|
|
9
|
+
Uses large language models for causal reasoning and analysis. Suitable for:
|
|
10
|
+
- Natural language understanding
|
|
11
|
+
- Complex causal reasoning
|
|
12
|
+
- Variable extraction
|
|
13
|
+
- Qualitative analysis
|
|
14
|
+
|
|
15
|
+
### Deterministic Mode
|
|
16
|
+
|
|
17
|
+
Uses mathematical simulation for precise causal inference. Suitable for:
|
|
18
|
+
- Quantitative analysis
|
|
19
|
+
- Numerical simulations
|
|
20
|
+
- Optimization
|
|
21
|
+
- Batch processing
|
|
22
|
+
|
|
23
|
+
## Mathematical Foundation
|
|
24
|
+
|
|
25
|
+
### LLM Mode
|
|
26
|
+
|
|
27
|
+
In LLM mode, the agent uses the LLM to infer causal relationships:
|
|
28
|
+
|
|
29
|
+
$$P(Y | do(X=x), Z=z) \approx LLM(T, X, Y, Z)$$
|
|
30
|
+
|
|
31
|
+
Where $T$ is the task description, and the LLM approximates the causal effect.
|
|
32
|
+
|
|
33
|
+
### Deterministic Mode
|
|
34
|
+
|
|
35
|
+
In deterministic mode, the agent uses structural equations:
|
|
36
|
+
|
|
37
|
+
$$Y = f(X, Z, U_Y)$$
|
|
38
|
+
|
|
39
|
+
Where $f$ is a known or estimated function, and $U_Y$ is an exogenous variable.
|
|
40
|
+
|
|
41
|
+
## Mode Selection
|
|
42
|
+
|
|
43
|
+
### Automatic Mode Selection
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
agent = CRCAAgent(
|
|
47
|
+
model_name="gpt-4o-mini",
|
|
48
|
+
auto_mode_selection=True # Automatically choose mode
|
|
49
|
+
)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Manual Mode Selection
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
# LLM mode
|
|
56
|
+
result_llm = agent.run(task, mode="llm")
|
|
57
|
+
|
|
58
|
+
# Deterministic mode
|
|
59
|
+
result_det = agent.run(task, mode="deterministic")
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Hybrid Approach
|
|
63
|
+
|
|
64
|
+
Combine both modes:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
# Use LLM for extraction
|
|
68
|
+
agent.run(task, mode="llm")
|
|
69
|
+
|
|
70
|
+
# Switch to deterministic for simulation
|
|
71
|
+
simulation = agent.simulate(variables, mode="deterministic")
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Performance Considerations
|
|
75
|
+
|
|
76
|
+
- **LLM Mode**: Slower but more flexible, handles complex reasoning
|
|
77
|
+
- **Deterministic Mode**: Faster, precise, requires known structural equations
|
|
78
|
+
|
|
79
|
+
## Next Steps
|
|
80
|
+
|
|
81
|
+
- [Deterministic Simulation](deterministic-simulation.md) - Learn about simulations
|
|
82
|
+
- [Causal Graph](causal-graph.md) - Understand graph operations
|