PyPI - crca - Versions diffs - 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

crca 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (306) hide show

CRCA.py +172 -7
MODEL_CARD.md +53 -0
PKG-INFO +8 -2
RELEASE_NOTES.md +17 -0
STABILITY.md +19 -0
architecture/hybrid/consistency_engine.py +362 -0
architecture/hybrid/conversation_manager.py +421 -0
architecture/hybrid/explanation_generator.py +452 -0
architecture/hybrid/few_shot_learner.py +533 -0
architecture/hybrid/graph_compressor.py +286 -0
architecture/hybrid/hybrid_agent.py +4398 -0
architecture/hybrid/language_compiler.py +623 -0
architecture/hybrid/main,py +0 -0
architecture/hybrid/reasoning_tracker.py +322 -0
architecture/hybrid/self_verifier.py +524 -0
architecture/hybrid/task_decomposer.py +567 -0
architecture/hybrid/text_corrector.py +341 -0
benchmark_results/crca_core_benchmarks.json +178 -0
branches/crca_sd/crca_sd_realtime.py +6 -2
branches/general_agent/__init__.py +102 -0
branches/general_agent/general_agent.py +1400 -0
branches/general_agent/personality.py +169 -0
branches/general_agent/utils/__init__.py +19 -0
branches/general_agent/utils/prompt_builder.py +170 -0
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
crca_core/__init__.py +35 -0
crca_core/benchmarks/__init__.py +14 -0
crca_core/benchmarks/synthetic_scm.py +103 -0
crca_core/core/__init__.py +23 -0
crca_core/core/api.py +120 -0
crca_core/core/estimate.py +208 -0
crca_core/core/godclass.py +72 -0
crca_core/core/intervention_design.py +174 -0
crca_core/core/lifecycle.py +48 -0
crca_core/discovery/__init__.py +9 -0
crca_core/discovery/tabular.py +193 -0
crca_core/identify/__init__.py +171 -0
crca_core/identify/backdoor.py +39 -0
crca_core/identify/frontdoor.py +48 -0
crca_core/identify/graph.py +106 -0
crca_core/identify/id_algorithm.py +43 -0
crca_core/identify/iv.py +48 -0
crca_core/models/__init__.py +67 -0
crca_core/models/provenance.py +56 -0
crca_core/models/refusal.py +39 -0
crca_core/models/result.py +83 -0
crca_core/models/spec.py +151 -0
crca_core/models/validation.py +68 -0
crca_core/scm/__init__.py +9 -0
crca_core/scm/linear_gaussian.py +198 -0
crca_core/timeseries/__init__.py +6 -0
crca_core/timeseries/pcmci.py +181 -0
crca_llm/__init__.py +12 -0
crca_llm/client.py +85 -0
crca_llm/coauthor.py +118 -0
crca_llm/orchestrator.py +289 -0
crca_llm/types.py +21 -0
crca_reasoning/__init__.py +16 -0
crca_reasoning/critique.py +54 -0
crca_reasoning/godclass.py +206 -0
crca_reasoning/memory.py +24 -0
crca_reasoning/rationale.py +10 -0
crca_reasoning/react_controller.py +81 -0
crca_reasoning/tool_router.py +97 -0
crca_reasoning/types.py +40 -0
crca_sd/__init__.py +15 -0
crca_sd/crca_sd_core.py +2 -0
crca_sd/crca_sd_governance.py +2 -0
crca_sd/crca_sd_mpc.py +2 -0
crca_sd/crca_sd_realtime.py +2 -0
crca_sd/crca_sd_tui.py +2 -0
cuda-keyring_1.1-1_all.deb +0 -0
cuda-keyring_1.1-1_all.deb.1 +0 -0
docs/IMAGE_ANNOTATION_USAGE.md +539 -0
docs/INSTALL_DEEPSPEED.md +125 -0
docs/api/branches/crca-cg.md +19 -0
docs/api/branches/crca-q.md +27 -0
docs/api/branches/crca-sd.md +37 -0
docs/api/branches/general-agent.md +24 -0
docs/api/branches/overview.md +19 -0
docs/api/crca/agent-methods.md +62 -0
docs/api/crca/operations.md +79 -0
docs/api/crca/overview.md +32 -0
docs/api/image-annotation/engine.md +52 -0
docs/api/image-annotation/overview.md +17 -0
docs/api/schemas/annotation.md +34 -0
docs/api/schemas/core-schemas.md +82 -0
docs/api/schemas/overview.md +32 -0
docs/api/schemas/policy.md +30 -0
docs/api/utils/conversation.md +22 -0
docs/api/utils/graph-reasoner.md +32 -0
docs/api/utils/overview.md +21 -0
docs/api/utils/router.md +19 -0
docs/api/utils/utilities.md +97 -0
docs/architecture/causal-graphs.md +41 -0
docs/architecture/data-flow.md +29 -0
docs/architecture/design-principles.md +33 -0
docs/architecture/hybrid-agent/components.md +38 -0
docs/architecture/hybrid-agent/consistency.md +26 -0
docs/architecture/hybrid-agent/overview.md +44 -0
docs/architecture/hybrid-agent/reasoning.md +22 -0
docs/architecture/llm-integration.md +26 -0
docs/architecture/modular-structure.md +37 -0
docs/architecture/overview.md +69 -0
docs/architecture/policy-engine-arch.md +29 -0
docs/branches/crca-cg/corposwarm.md +39 -0
docs/branches/crca-cg/esg-scoring.md +30 -0
docs/branches/crca-cg/multi-agent.md +35 -0
docs/branches/crca-cg/overview.md +40 -0
docs/branches/crca-q/alternative-data.md +55 -0
docs/branches/crca-q/architecture.md +71 -0
docs/branches/crca-q/backtesting.md +45 -0
docs/branches/crca-q/causal-engine.md +33 -0
docs/branches/crca-q/execution.md +39 -0
docs/branches/crca-q/market-data.md +60 -0
docs/branches/crca-q/overview.md +58 -0
docs/branches/crca-q/philosophy.md +60 -0
docs/branches/crca-q/portfolio-optimization.md +66 -0
docs/branches/crca-q/risk-management.md +102 -0
docs/branches/crca-q/setup.md +65 -0
docs/branches/crca-q/signal-generation.md +61 -0
docs/branches/crca-q/signal-validation.md +43 -0
docs/branches/crca-sd/core.md +84 -0
docs/branches/crca-sd/governance.md +53 -0
docs/branches/crca-sd/mpc-solver.md +65 -0
docs/branches/crca-sd/overview.md +59 -0
docs/branches/crca-sd/realtime.md +28 -0
docs/branches/crca-sd/tui.md +20 -0
docs/branches/general-agent/overview.md +37 -0
docs/branches/general-agent/personality.md +36 -0
docs/branches/general-agent/prompt-builder.md +30 -0
docs/changelog/index.md +79 -0
docs/contributing/code-style.md +69 -0
docs/contributing/documentation.md +43 -0
docs/contributing/overview.md +29 -0
docs/contributing/testing.md +29 -0
docs/core/crcagent/async-operations.md +65 -0
docs/core/crcagent/automatic-extraction.md +107 -0
docs/core/crcagent/batch-prediction.md +80 -0
docs/core/crcagent/bayesian-inference.md +60 -0
docs/core/crcagent/causal-graph.md +92 -0
docs/core/crcagent/counterfactuals.md +96 -0
docs/core/crcagent/deterministic-simulation.md +78 -0
docs/core/crcagent/dual-mode-operation.md +82 -0
docs/core/crcagent/initialization.md +88 -0
docs/core/crcagent/optimization.md +65 -0
docs/core/crcagent/overview.md +63 -0
docs/core/crcagent/time-series.md +57 -0
docs/core/schemas/annotation.md +30 -0
docs/core/schemas/core-schemas.md +82 -0
docs/core/schemas/overview.md +30 -0
docs/core/schemas/policy.md +41 -0
docs/core/templates/base-agent.md +31 -0
docs/core/templates/feature-mixins.md +31 -0
docs/core/templates/overview.md +29 -0
docs/core/templates/templates-guide.md +75 -0
docs/core/tools/mcp-client.md +34 -0
docs/core/tools/overview.md +24 -0
docs/core/utils/conversation.md +27 -0
docs/core/utils/graph-reasoner.md +29 -0
docs/core/utils/overview.md +27 -0
docs/core/utils/router.md +27 -0
docs/core/utils/utilities.md +97 -0
docs/css/custom.css +84 -0
docs/examples/basic-usage.md +57 -0
docs/examples/general-agent/general-agent-examples.md +50 -0
docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
docs/examples/image-annotation/image-annotation-examples.md +54 -0
docs/examples/integration/integration-examples.md +58 -0
docs/examples/overview.md +37 -0
docs/examples/trading/trading-examples.md +46 -0
docs/features/causal-reasoning/advanced-topics.md +101 -0
docs/features/causal-reasoning/counterfactuals.md +43 -0
docs/features/causal-reasoning/do-calculus.md +50 -0
docs/features/causal-reasoning/overview.md +47 -0
docs/features/causal-reasoning/structural-models.md +52 -0
docs/features/hybrid-agent/advanced-components.md +55 -0
docs/features/hybrid-agent/core-components.md +64 -0
docs/features/hybrid-agent/overview.md +34 -0
docs/features/image-annotation/engine.md +82 -0
docs/features/image-annotation/features.md +113 -0
docs/features/image-annotation/integration.md +75 -0
docs/features/image-annotation/overview.md +53 -0
docs/features/image-annotation/quickstart.md +73 -0
docs/features/policy-engine/doctrine-ledger.md +105 -0
docs/features/policy-engine/monitoring.md +44 -0
docs/features/policy-engine/mpc-control.md +89 -0
docs/features/policy-engine/overview.md +46 -0
docs/getting-started/configuration.md +225 -0
docs/getting-started/first-agent.md +164 -0
docs/getting-started/installation.md +144 -0
docs/getting-started/quickstart.md +137 -0
docs/index.md +118 -0
docs/js/mathjax.js +13 -0
docs/lrm/discovery_proof_notes.md +25 -0
docs/lrm/finetune_full.md +83 -0
docs/lrm/math_appendix.md +120 -0
docs/lrm/overview.md +32 -0
docs/mkdocs.yml +238 -0
docs/stylesheets/extra.css +21 -0
docs_generated/crca_core/CounterfactualResult.md +12 -0
docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
docs_generated/crca_core/DraftSpec.md +13 -0
docs_generated/crca_core/EstimateResult.md +13 -0
docs_generated/crca_core/IdentificationResult.md +17 -0
docs_generated/crca_core/InterventionDesignResult.md +12 -0
docs_generated/crca_core/LockedSpec.md +15 -0
docs_generated/crca_core/RefusalResult.md +12 -0
docs_generated/crca_core/ValidationReport.md +9 -0
docs_generated/crca_core/index.md +13 -0
examples/general_agent_example.py +277 -0
examples/general_agent_quickstart.py +202 -0
examples/general_agent_simple.py +92 -0
examples/hybrid_agent_auto_extraction.py +84 -0
examples/hybrid_agent_dictionary_demo.py +104 -0
examples/hybrid_agent_enhanced.py +179 -0
examples/hybrid_agent_general_knowledge.py +107 -0
examples/image_annotation_quickstart.py +328 -0
examples/test_hybrid_fixes.py +77 -0
image_annotation/__init__.py +27 -0
image_annotation/annotation_engine.py +2593 -0
install_cuda_wsl2.sh +59 -0
install_deepspeed.sh +56 -0
install_deepspeed_simple.sh +87 -0
mkdocs.yml +252 -0
ollama/Modelfile +8 -0
prompts/__init__.py +2 -1
prompts/default_crca.py +9 -1
prompts/general_agent.py +227 -0
prompts/image_annotation.py +56 -0
pyproject.toml +17 -2
requirements-docs.txt +10 -0
requirements.txt +21 -2
schemas/__init__.py +26 -1
schemas/annotation.py +222 -0
schemas/conversation.py +193 -0
schemas/hybrid.py +211 -0
schemas/reasoning.py +276 -0
schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
schemas_export/crca_core/DraftSpec.schema.json +635 -0
schemas_export/crca_core/EstimateResult.schema.json +113 -0
schemas_export/crca_core/IdentificationResult.schema.json +145 -0
schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
schemas_export/crca_core/LockedSpec.schema.json +646 -0
schemas_export/crca_core/RefusalResult.schema.json +90 -0
schemas_export/crca_core/ValidationReport.schema.json +62 -0
scripts/build_lrm_dataset.py +80 -0
scripts/export_crca_core_schemas.py +54 -0
scripts/export_hf_lrm.py +37 -0
scripts/export_ollama_gguf.py +45 -0
scripts/generate_changelog.py +157 -0
scripts/generate_crca_core_docs_from_schemas.py +86 -0
scripts/run_crca_core_benchmarks.py +163 -0
scripts/run_full_finetune.py +198 -0
scripts/run_lrm_eval.py +31 -0
templates/graph_management.py +29 -0
tests/conftest.py +9 -0
tests/test_core.py +2 -3
tests/test_crca_core_discovery_tabular.py +15 -0
tests/test_crca_core_estimate_dowhy.py +36 -0
tests/test_crca_core_identify.py +18 -0
tests/test_crca_core_intervention_design.py +36 -0
tests/test_crca_core_linear_gaussian_scm.py +69 -0
tests/test_crca_core_spec.py +25 -0
tests/test_crca_core_timeseries_pcmci.py +15 -0
tests/test_crca_llm_coauthor.py +12 -0
tests/test_crca_llm_orchestrator.py +80 -0
tests/test_hybrid_agent_llm_enhanced.py +556 -0
tests/test_image_annotation_demo.py +376 -0
tests/test_image_annotation_operational.py +408 -0
tests/test_image_annotation_unit.py +551 -0
tests/test_training_moe.py +13 -0
training/__init__.py +42 -0
training/datasets.py +140 -0
training/deepspeed_zero2_0_5b.json +22 -0
training/deepspeed_zero2_1_5b.json +22 -0
training/deepspeed_zero3_0_5b.json +28 -0
training/deepspeed_zero3_14b.json +28 -0
training/deepspeed_zero3_h100_3gpu.json +20 -0
training/deepspeed_zero3_offload.json +28 -0
training/eval.py +92 -0
training/finetune.py +516 -0
training/public_datasets.py +89 -0
training_data/react_train.jsonl +7473 -0
utils/agent_discovery.py +311 -0
utils/batch_processor.py +317 -0
utils/conversation.py +78 -0
utils/edit_distance.py +118 -0
utils/formatter.py +33 -0
utils/graph_reasoner.py +530 -0
utils/rate_limiter.py +283 -0
utils/router.py +2 -2
utils/tool_discovery.py +307 -0
webui/__init__.py +10 -0
webui/app.py +229 -0
webui/config.py +104 -0
webui/static/css/style.css +332 -0
webui/static/js/main.js +284 -0
webui/templates/index.html +42 -0
tests/test_crca_excel.py +0 -166
tests/test_data_broker.py +0 -424
tests/test_palantir.py +0 -349
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0

crca_core/core/api.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Public API functions for the H1 `crca_core`.
+These functions provide the stable, refusal-first entry points that other
+layers (including LLM tooling) should call.
+"""
+from __future__ import annotations
+from typing import Any, Dict, Optional
+from crca_core.core.estimate import EstimatorConfig, estimate_effect_dowhy
+from crca_core.identify import identify_effect
+from crca_core.core.intervention_design import (
+    FeasibilityConstraints,
+    TargetQuery,
+    design_intervention,
+)
+from crca_core.models.provenance import ProvenanceManifest
+from crca_core.models.refusal import RefusalChecklistItem, RefusalReasonCode, RefusalResult
+from crca_core.models.result import CounterfactualResult
+from crca_core.scm import LinearGaussianSCM
+from crca_core.models.spec import DraftSpec, LockedSpec
+from crca_core.timeseries.pcmci import PCMCIConfig, discover_timeseries_pcmci
+from crca_core.discovery.tabular import TabularDiscoveryConfig, discover_tabular
+from utils.canonical import stable_hash
+def simulate_counterfactual(
+    *,
+    locked_spec: LockedSpec,
+    factual_observation: Dict[str, float],
+    intervention: Dict[str, float],
+    allow_partial_observation: bool = False,
+) -> CounterfactualResult | RefusalResult:
+    """Simulate a counterfactual under an explicit SCM (required).
+    Refuses if `locked_spec.scm` is missing.
+    """
+    if locked_spec.scm is None:
+        return RefusalResult(
+            message="Counterfactuals require an explicit SCMSpec (structural equations + noise model).",
+            reason_codes=[RefusalReasonCode.NO_SCM_FOR_COUNTERFACTUAL],
+            checklist=[
+                RefusalChecklistItem(
+                    item="Provide SCMSpec",
+                    rationale="A DAG alone does not define counterfactual semantics; SCM is required.",
+                )
+            ],
+            suggested_next_steps=[
+                "Attach a SCMSpec (e.g., linear_gaussian) to the spec, then re-lock and retry."
+            ],
+        )
+    scm = LinearGaussianSCM.from_spec(locked_spec.scm)
+    try:
+        u = scm.abduce_noise(factual_observation, allow_partial=allow_partial_observation)
+    except ValueError as exc:
+        return RefusalResult(
+            message=str(exc),
+            reason_codes=[RefusalReasonCode.INPUT_INVALID],
+            checklist=[
+                RefusalChecklistItem(
+                    item="Provide complete factual observation",
+                    rationale="Counterfactuals require abduction for all endogenous variables in v1.0 unless partial mode is enabled.",
+                )
+            ],
+            suggested_next_steps=[
+                "Provide all endogenous variables or set allow_partial_observation=True (partial mode)."
+            ],
+        )
+    cf = scm.predict(u, interventions=intervention)
+    prov = ProvenanceManifest.minimal(
+        spec_hash=stable_hash(
+            {
+                "spec_hash": locked_spec.spec_hash,
+                "module": "simulate_counterfactual",
+                "intervention": intervention,
+                "factual_keys": sorted(list(factual_observation.keys())),
+            }
+        )
+    )
+    return CounterfactualResult(
+        provenance=prov,
+        assumptions=[
+            "SCM structure and parameters are correct (strong assumption).",
+            "Factual observation includes all endogenous variables for abduction in v1.0 unless partial mode is enabled.",
+        ],
+        limitations=[
+            "v0.1 counterfactuals require a fully observed system (no missing variables).",
+            "Only linear-Gaussian SCMs are supported in v0.1.",
+        ],
+        counterfactual={"factual": dict(factual_observation), "do": dict(intervention), "result": cf},
+    )
+__all__ = [
+    # Core lifecycle
+    "DraftSpec",
+    "LockedSpec",
+    # Identification
+    "identify_effect",
+    # Discovery
+    "TabularDiscoveryConfig",
+    "discover_tabular",
+    "PCMCIConfig",
+    "discover_timeseries_pcmci",
+    # Design
+    "TargetQuery",
+    "FeasibilityConstraints",
+    "design_intervention",
+    # Counterfactuals
+    "simulate_counterfactual",
+    # Estimation
+    "EstimatorConfig",
+    "estimate_effect_dowhy",
+]

crca_core/core/estimate.py ADDED Viewed

@@ -0,0 +1,208 @@
+"""Identification → estimation → refutation wrapper (DoWhy).
+This is supporting infrastructure for causal R&D. It is gated behind:
+- LockedSpec
+- explicit treatment/outcome
+- DoWhy identification success
+Refuters do not prove causality; they are diagnostics and must be surfaced.
+"""
+from __future__ import annotations
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, Field
+from crca_core.models.provenance import ProvenanceManifest
+from crca_core.models.refusal import RefusalChecklistItem, RefusalReasonCode, RefusalResult
+from crca_core.models.result import EstimateResult, IdentificationResult
+from crca_core.models.spec import LockedSpec
+from utils.canonical import stable_hash
+class EstimatorConfig(BaseModel):
+    method_name: str = Field(default="backdoor.linear_regression")
+    test_significance: bool = True
+    confidence_intervals: bool = True
+    refuters: List[str] = Field(
+        default_factory=lambda: ["placebo_treatment_refuter", "random_common_cause", "subset_refuter"]
+    )
+def _ensure_networkx_compat() -> None:
+    """Patch NetworkX API differences required by DoWhy.
+    DoWhy versions in the wild have referenced `networkx.algorithms.d_separated`,
+    while NetworkX >=3.6 provides `networkx.algorithms.d_separation.is_d_separator`.
+    """
+    try:
+        import networkx as nx  # type: ignore
+        import networkx.algorithms.d_separation as ds  # type: ignore
+        if not hasattr(nx.algorithms, "d_separated") and hasattr(ds, "is_d_separator"):
+            setattr(nx.algorithms, "d_separated", ds.is_d_separator)
+    except Exception:
+        # If networkx isn't available, DoWhy will fail later anyway.
+        return
+def _graph_to_dot(spec: LockedSpec) -> str:
+    # Minimal DOT string compatible with DoWhy's graph parser.
+    edges = spec.graph.edges
+    lines = ["digraph {"]
+    for n in spec.graph.nodes:
+        lines.append(f'  "{n.name}";')
+    for e in edges:
+        lines.append(f'  "{e.source}" -> "{e.target}";')
+    lines.append("}")
+    return "\n".join(lines)
+def estimate_effect_dowhy(
+    *,
+    data: Any,
+    locked_spec: LockedSpec,
+    treatment: str,
+    outcome: str,
+    identification_result: IdentificationResult | None = None,
+    config: Optional[EstimatorConfig] = None,
+) -> EstimateResult | RefusalResult:
+    """Run DoWhy identify→estimate→refute and return a structured result."""
+    cfg = config or EstimatorConfig()
+    # Basic gating
+    if not treatment or not outcome:
+        return RefusalResult(
+            message="Treatment and outcome must be provided.",
+            reason_codes=[RefusalReasonCode.INPUT_INVALID],
+            checklist=[
+                RefusalChecklistItem(item="Provide treatment", rationale="Required to define the estimand."),
+                RefusalChecklistItem(item="Provide outcome", rationale="Required to define the estimand."),
+            ],
+            suggested_next_steps=["Pass treatment='X', outcome='Y'."],
+        )
+    if identification_result is None:
+        return RefusalResult(
+            message="Estimation requires an IdentificationResult.",
+            reason_codes=[RefusalReasonCode.INPUT_INVALID],
+            checklist=[
+                RefusalChecklistItem(
+                    item="Run identify_effect() first",
+                    rationale="Estimation is gated behind validated identifiability.",
+                )
+            ],
+            suggested_next_steps=["Call identify_effect() and pass its result here."],
+        )
+    if identification_result.method == "not_identifiable":
+        return RefusalResult(
+            message="Cannot estimate: identification failed.",
+            reason_codes=[RefusalReasonCode.NOT_IDENTIFIABLE],
+            checklist=[
+                RefusalChecklistItem(
+                    item="Revise causal model or collect additional data/interventions",
+                    rationale="Identification result indicates non-identifiability.",
+                )
+            ],
+            suggested_next_steps=["Use design_intervention() to propose identifying experiments."],
+        )
+    dot = _graph_to_dot(locked_spec)
+    prov = ProvenanceManifest.minimal(
+        spec_hash=stable_hash(
+            {
+                "spec_hash": locked_spec.spec_hash,
+                "treatment": treatment,
+                "outcome": outcome,
+                "method": cfg.model_dump(),
+                "graph": dot,
+                "module": "dowhy_pipeline",
+            }
+        ),
+        algorithm_config=cfg.model_dump(),
+    )
+    try:
+        import pandas as pd  # type: ignore
+        if not isinstance(data, pd.DataFrame):
+            return RefusalResult(
+                message="DoWhy estimation requires a pandas DataFrame.",
+                reason_codes=[RefusalReasonCode.INPUT_INVALID],
+                checklist=[RefusalChecklistItem(item="Provide pandas DataFrame", rationale="DoWhy expects tabular data.")],
+                suggested_next_steps=["Convert your data to pandas.DataFrame and retry."],
+            )
+    except Exception as e:  # pragma: no cover
+        return RefusalResult(
+            message=f"pandas is required for estimation: {e}",
+            reason_codes=[RefusalReasonCode.UNSUPPORTED_OPERATION],
+            checklist=[RefusalChecklistItem(item="Install pandas", rationale="Required dependency for tabular estimation.")],
+            suggested_next_steps=["pip install pandas"],
+        )
+    try:
+        _ensure_networkx_compat()
+        from dowhy import CausalModel  # type: ignore
+    except Exception as e:
+        return RefusalResult(
+            message=f"DoWhy not available: {e}",
+            reason_codes=[RefusalReasonCode.UNSUPPORTED_OPERATION],
+            checklist=[RefusalChecklistItem(item="Install dowhy", rationale="Required for this estimation pipeline.")],
+            suggested_next_steps=["pip install dowhy"],
+        )
+    model = CausalModel(data=data, treatment=treatment, outcome=outcome, graph=dot)
+    identified_estimand = model.identify_effect()
+    # If identification fails, DoWhy usually still returns an object; we gate on its string.
+    if identified_estimand is None:
+        return RefusalResult(
+            message="Causal effect not identifiable under the provided graph/assumptions.",
+            reason_codes=[RefusalReasonCode.NOT_IDENTIFIABLE],
+            checklist=[
+                RefusalChecklistItem(
+                    item="Revise the causal model or collect additional data/interventions",
+                    rationale="Effect identification failed.",
+                )
+            ],
+            suggested_next_steps=["Use design_intervention() to propose identifying experiments."],
+        )
+    estimate = model.estimate_effect(
+        identified_estimand,
+        method_name=cfg.method_name,
+        test_significance=cfg.test_significance,
+        confidence_intervals=cfg.confidence_intervals,
+    )
+    refutations: Dict[str, Any] = {}
+    for refuter in cfg.refuters:
+        try:
+            ref = model.refute_estimate(identified_estimand, estimate, method_name=refuter)
+            refutations[refuter] = str(ref)
+        except Exception as e:
+            refutations[refuter] = {"error": str(e)}
+    return EstimateResult(
+        provenance=prov,
+        assumptions=[
+            "Causal graph is correctly specified (strong assumption).",
+            "Estimator assumptions depend on the chosen method (see DoWhy).",
+        ],
+        limitations=[
+            "Refutation tests are diagnostics; passing does not prove causality.",
+            "Estimation quality depends on overlap/positivity and measurement quality.",
+        ],
+        estimate={
+            "value": float(getattr(estimate, "value", float("nan"))),
+            "method_name": cfg.method_name,
+            "estimand": str(identified_estimand),
+            "raw_estimate": str(estimate),
+        },
+        refutations=refutations,
+        artifacts={"identification_result": identification_result.model_dump()},
+    )

crca_core/core/godclass.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""CausalCore GodClass: consolidated core API facade."""
+from __future__ import annotations
+from typing import Any, Dict, Optional
+from crca_core.core.api import (
+    EstimatorConfig,
+    FeasibilityConstraints,
+    PCMCIConfig,
+    TabularDiscoveryConfig,
+    TargetQuery,
+    design_intervention,
+    discover_tabular,
+    discover_timeseries_pcmci,
+    identify_effect,
+    estimate_effect_dowhy,
+    simulate_counterfactual,
+)
+from crca_core.models.result import AnyResult, IdentificationResult
+from crca_core.models.spec import LockedSpec
+class CausalCoreGod:
+    """Single class exposing all core causal operations."""
+    def __init__(self) -> None:
+        self.last_identification: Optional[IdentificationResult] = None
+    def identify(self, *, locked_spec: LockedSpec, treatment: str, outcome: str) -> AnyResult:
+        res = identify_effect(locked_spec=locked_spec, treatment=treatment, outcome=outcome)
+        if isinstance(res, IdentificationResult):
+            self.last_identification = res
+        return res
+    def estimate(self, *, data: Any, locked_spec: LockedSpec, treatment: str, outcome: str) -> AnyResult:
+        return estimate_effect_dowhy(
+            data=data,
+            locked_spec=locked_spec,
+            treatment=treatment,
+            outcome=outcome,
+            identification_result=self.last_identification,
+            config=EstimatorConfig(),
+        )
+    def counterfactual(
+        self,
+        *,
+        locked_spec: LockedSpec,
+        factual_observation: Dict[str, float],
+        intervention: Dict[str, float],
+        allow_partial_observation: bool = False,
+    ) -> AnyResult:
+        return simulate_counterfactual(
+            locked_spec=locked_spec,
+            factual_observation=factual_observation,
+            intervention=intervention,
+            allow_partial_observation=allow_partial_observation,
+        )
+    def design_intervention(self, *, locked_spec: LockedSpec, target_query: TargetQuery) -> AnyResult:
+        return design_intervention(
+            locked_spec=locked_spec,
+            target_query=target_query,
+            constraints=FeasibilityConstraints(),
+        )
+    def discover_tabular(self, *, data: Any) -> AnyResult:
+        return discover_tabular(data, TabularDiscoveryConfig())
+    def discover_timeseries(self, *, data: Any) -> AnyResult:
+        return discover_timeseries_pcmci(data, PCMCIConfig())

crca_core/core/intervention_design.py ADDED Viewed

@@ -0,0 +1,174 @@
+"""Intervention/experiment design (v0.1: graphical, non-probabilistic).
+This module is intentionally conservative:
+- It does not invent numeric information gain.
+- It produces structured candidate designs with explicit prerequisites and rationale.
+"""
+from __future__ import annotations
+from typing import Any, Dict, List, Literal, Optional
+from pydantic import BaseModel, Field
+from crca_core.models.provenance import ProvenanceManifest
+from crca_core.models.result import InterventionDesignResult
+from utils.canonical import stable_hash
+class TargetQuery(BaseModel):
+    query_type: Literal["identify_effect", "reduce_uncertainty_edge"] = "identify_effect"
+    treatment: Optional[str] = None
+    outcome: Optional[str] = None
+    edge_source: Optional[str] = None
+    edge_target: Optional[str] = None
+class FeasibilityConstraints(BaseModel):
+    manipulable_variables: List[str] = Field(default_factory=list)
+    observable_variables: List[str] = Field(default_factory=list)
+    costs: Dict[str, Any] = Field(default_factory=dict)
+    ethics_notes: Optional[str] = None
+class DesignCandidate(BaseModel):
+    design_type: str
+    mechanism: str
+    prerequisites: List[str] = Field(default_factory=list)
+    feasibility_inputs_needed: List[str] = Field(default_factory=list)
+    notes: Optional[str] = None
+def _parents_of(graph_edges, node: str) -> List[str]:
+    res: List[str] = []
+    for e in graph_edges:
+        if e.target == node:
+            res.append(e.source)
+    return res
+def design_intervention(
+    *,
+    locked_spec: Any,
+    target_query: TargetQuery,
+    constraints: Optional[FeasibilityConstraints] = None,
+) -> InterventionDesignResult:
+    """Propose interventions/measurements to support identifiability or hypothesis discrimination.
+    v0.1 is graphical and deliberately non-numeric. It does not claim identifiability;
+    it produces designs and checklists that *could* help under explicit assumptions.
+    """
+    constraints = constraints or FeasibilityConstraints()
+    # Provenance: we hash only the spec hash + query + constraints (no raw data).
+    spec_hash = getattr(locked_spec, "spec_hash", "unknown")
+    prov = ProvenanceManifest.minimal(
+        spec_hash=stable_hash(
+            {
+                "spec_hash": spec_hash,
+                "target_query": target_query.model_dump(),
+                "constraints": constraints.model_dump(),
+                "module": "intervention_design_v0.1",
+            }
+        )
+    )
+    designs: List[Dict[str, Any]] = []
+    if target_query.query_type == "identify_effect":
+        X = target_query.treatment
+        Y = target_query.outcome
+        if not X or not Y:
+            return InterventionDesignResult(
+                provenance=prov,
+                assumptions=[],
+                limitations=["Missing treatment/outcome in target_query."],
+                designs=[],
+            )
+        # 1) Randomize treatment if feasible
+        if X in constraints.manipulable_variables:
+            designs.append(
+                DesignCandidate(
+                    design_type="randomize_treatment",
+                    mechanism=f"Randomize {X} to break backdoor confounding when estimating effect on {Y}.",
+                    prerequisites=[
+                        "Well-defined intervention on treatment (consistency/SUTVA).",
+                        "No interference between units (SUTVA).",
+                        "Feasible randomization protocol and compliance monitoring.",
+                    ],
+                    feasibility_inputs_needed=["sample_size", "randomization_unit", "ethical_constraints"],
+                ).model_dump()
+            )
+        # 2) Measure candidate confounders (parents of treatment in the current draft DAG)
+        parents_x = _parents_of(locked_spec.graph.edges, X)
+        if parents_x:
+            designs.append(
+                DesignCandidate(
+                    design_type="measure_confounder_candidates",
+                    mechanism=(
+                        f"Measure candidate confounders {parents_x} because they are modeled as direct causes of {X}; "
+                        f"conditioning/adjusting may help estimate effect of {X} on {Y} under exchangeability."
+                    ),
+                    prerequisites=[
+                        "Candidate confounders are measured without severe error or are modeled as proxies.",
+                        "Exchangeability holds conditional on measured covariates (assumption).",
+                        "Positivity/overlap holds in the collected data.",
+                    ],
+                    feasibility_inputs_needed=["measurement_instrument_quality", "data_collection_costs"],
+                    notes="This does not guarantee identifiability; it is a measurement suggestion grounded in the current graph hypothesis.",
+                ).model_dump()
+            )
+        # 3) Instrument design if user provided candidate instruments
+        if getattr(locked_spec.roles, "instruments", []):
+            Zs = list(getattr(locked_spec.roles, "instruments", []))
+            designs.append(
+                DesignCandidate(
+                    design_type="instrument_design",
+                    mechanism=f"Collect/create instrument(s) {Zs} to identify effect of {X} on {Y} under IV assumptions.",
+                    prerequisites=[
+                        "Relevance: Z affects X.",
+                        "Exclusion: Z affects Y only through X.",
+                        "Independence: Z is independent of unmeasured causes of Y.",
+                    ],
+                    feasibility_inputs_needed=["instrument_source", "exclusion_justification"],
+                ).model_dump()
+            )
+    elif target_query.query_type == "reduce_uncertainty_edge":
+        s = target_query.edge_source
+        t = target_query.edge_target
+        if not s or not t:
+            return InterventionDesignResult(
+                provenance=prov,
+                assumptions=[],
+                limitations=["Missing edge_source/edge_target in target_query."],
+                designs=[],
+            )
+        if s in constraints.manipulable_variables:
+            designs.append(
+                DesignCandidate(
+                    design_type="perturb_source",
+                    mechanism=f"Intervene on {s} (do({s}=...)) and observe downstream changes in {t} to test the edge hypothesis {s}→{t}.",
+                    prerequisites=[
+                        "Well-defined intervention on source variable.",
+                        "No simultaneous changes to other upstream causes (or they are measured/controlled).",
+                    ],
+                    feasibility_inputs_needed=["intervention_range", "measurement_frequency", "time_horizon"],
+                ).model_dump()
+            )
+    return InterventionDesignResult(
+        provenance=prov,
+        assumptions=[],
+        limitations=[
+            "v0.1 design is graphical and non-probabilistic; it does not compute numeric information gain.",
+            "Suggestions depend on the correctness of the locked causal graph/spec assumptions.",
+        ],
+        designs=designs,
+    )

crca_core/core/lifecycle.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Spec lifecycle: DraftSpec → LockedSpec.
+The LockedSpec is a scientific boundary: only LockedSpec may be used for numeric
+causal outputs. This module enforces that boundary.
+"""
+from __future__ import annotations
+from datetime import datetime, timezone
+from typing import List
+from crca_core.models.spec import DraftSpec, LockedSpec
+from utils.canonical import stable_hash
+def lock_spec(draft: DraftSpec, approvals: List[str]) -> LockedSpec:
+    """Lock a draft spec by hashing its canonical content and recording approvals.
+    Args:
+        draft: The draft specification (possibly LLM-generated).
+        approvals: Human (or explicit programmatic) approvals. Must be non-empty.
+    Returns:
+        LockedSpec
+    Raises:
+        ValueError: If approvals are empty.
+    """
+    if not approvals:
+        raise ValueError("approvals must be non-empty to lock a spec")
+    # Canonicalize via stable_hash over model_dump
+    draft_payload = draft.model_dump()
+    spec_hash = stable_hash(draft_payload)
+    locked_at = datetime.now(timezone.utc).isoformat()
+    return LockedSpec(
+        spec_hash=spec_hash,
+        approvals=list(approvals),
+        locked_at_utc=locked_at,
+        data=draft.data,
+        graph=draft.graph,
+        roles=draft.roles,
+        assumptions=draft.assumptions,
+        scm=draft.scm,
+    )

crca_core/discovery/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Causal discovery (hypothesis generation) modules.
+Discovery outputs are hypotheses under explicit assumptions; they are not truth.
+"""
+from crca_core.discovery.tabular import TabularDiscoveryConfig, discover_tabular
+__all__ = ["TabularDiscoveryConfig", "discover_tabular"]

crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

crca 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl