PyPI - crca - Versions diffs - 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

crca 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (306) hide show

CRCA.py +172 -7
MODEL_CARD.md +53 -0
PKG-INFO +8 -2
RELEASE_NOTES.md +17 -0
STABILITY.md +19 -0
architecture/hybrid/consistency_engine.py +362 -0
architecture/hybrid/conversation_manager.py +421 -0
architecture/hybrid/explanation_generator.py +452 -0
architecture/hybrid/few_shot_learner.py +533 -0
architecture/hybrid/graph_compressor.py +286 -0
architecture/hybrid/hybrid_agent.py +4398 -0
architecture/hybrid/language_compiler.py +623 -0
architecture/hybrid/main,py +0 -0
architecture/hybrid/reasoning_tracker.py +322 -0
architecture/hybrid/self_verifier.py +524 -0
architecture/hybrid/task_decomposer.py +567 -0
architecture/hybrid/text_corrector.py +341 -0
benchmark_results/crca_core_benchmarks.json +178 -0
branches/crca_sd/crca_sd_realtime.py +6 -2
branches/general_agent/__init__.py +102 -0
branches/general_agent/general_agent.py +1400 -0
branches/general_agent/personality.py +169 -0
branches/general_agent/utils/__init__.py +19 -0
branches/general_agent/utils/prompt_builder.py +170 -0
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
crca_core/__init__.py +35 -0
crca_core/benchmarks/__init__.py +14 -0
crca_core/benchmarks/synthetic_scm.py +103 -0
crca_core/core/__init__.py +23 -0
crca_core/core/api.py +120 -0
crca_core/core/estimate.py +208 -0
crca_core/core/godclass.py +72 -0
crca_core/core/intervention_design.py +174 -0
crca_core/core/lifecycle.py +48 -0
crca_core/discovery/__init__.py +9 -0
crca_core/discovery/tabular.py +193 -0
crca_core/identify/__init__.py +171 -0
crca_core/identify/backdoor.py +39 -0
crca_core/identify/frontdoor.py +48 -0
crca_core/identify/graph.py +106 -0
crca_core/identify/id_algorithm.py +43 -0
crca_core/identify/iv.py +48 -0
crca_core/models/__init__.py +67 -0
crca_core/models/provenance.py +56 -0
crca_core/models/refusal.py +39 -0
crca_core/models/result.py +83 -0
crca_core/models/spec.py +151 -0
crca_core/models/validation.py +68 -0
crca_core/scm/__init__.py +9 -0
crca_core/scm/linear_gaussian.py +198 -0
crca_core/timeseries/__init__.py +6 -0
crca_core/timeseries/pcmci.py +181 -0
crca_llm/__init__.py +12 -0
crca_llm/client.py +85 -0
crca_llm/coauthor.py +118 -0
crca_llm/orchestrator.py +289 -0
crca_llm/types.py +21 -0
crca_reasoning/__init__.py +16 -0
crca_reasoning/critique.py +54 -0
crca_reasoning/godclass.py +206 -0
crca_reasoning/memory.py +24 -0
crca_reasoning/rationale.py +10 -0
crca_reasoning/react_controller.py +81 -0
crca_reasoning/tool_router.py +97 -0
crca_reasoning/types.py +40 -0
crca_sd/__init__.py +15 -0
crca_sd/crca_sd_core.py +2 -0
crca_sd/crca_sd_governance.py +2 -0
crca_sd/crca_sd_mpc.py +2 -0
crca_sd/crca_sd_realtime.py +2 -0
crca_sd/crca_sd_tui.py +2 -0
cuda-keyring_1.1-1_all.deb +0 -0
cuda-keyring_1.1-1_all.deb.1 +0 -0
docs/IMAGE_ANNOTATION_USAGE.md +539 -0
docs/INSTALL_DEEPSPEED.md +125 -0
docs/api/branches/crca-cg.md +19 -0
docs/api/branches/crca-q.md +27 -0
docs/api/branches/crca-sd.md +37 -0
docs/api/branches/general-agent.md +24 -0
docs/api/branches/overview.md +19 -0
docs/api/crca/agent-methods.md +62 -0
docs/api/crca/operations.md +79 -0
docs/api/crca/overview.md +32 -0
docs/api/image-annotation/engine.md +52 -0
docs/api/image-annotation/overview.md +17 -0
docs/api/schemas/annotation.md +34 -0
docs/api/schemas/core-schemas.md +82 -0
docs/api/schemas/overview.md +32 -0
docs/api/schemas/policy.md +30 -0
docs/api/utils/conversation.md +22 -0
docs/api/utils/graph-reasoner.md +32 -0
docs/api/utils/overview.md +21 -0
docs/api/utils/router.md +19 -0
docs/api/utils/utilities.md +97 -0
docs/architecture/causal-graphs.md +41 -0
docs/architecture/data-flow.md +29 -0
docs/architecture/design-principles.md +33 -0
docs/architecture/hybrid-agent/components.md +38 -0
docs/architecture/hybrid-agent/consistency.md +26 -0
docs/architecture/hybrid-agent/overview.md +44 -0
docs/architecture/hybrid-agent/reasoning.md +22 -0
docs/architecture/llm-integration.md +26 -0
docs/architecture/modular-structure.md +37 -0
docs/architecture/overview.md +69 -0
docs/architecture/policy-engine-arch.md +29 -0
docs/branches/crca-cg/corposwarm.md +39 -0
docs/branches/crca-cg/esg-scoring.md +30 -0
docs/branches/crca-cg/multi-agent.md +35 -0
docs/branches/crca-cg/overview.md +40 -0
docs/branches/crca-q/alternative-data.md +55 -0
docs/branches/crca-q/architecture.md +71 -0
docs/branches/crca-q/backtesting.md +45 -0
docs/branches/crca-q/causal-engine.md +33 -0
docs/branches/crca-q/execution.md +39 -0
docs/branches/crca-q/market-data.md +60 -0
docs/branches/crca-q/overview.md +58 -0
docs/branches/crca-q/philosophy.md +60 -0
docs/branches/crca-q/portfolio-optimization.md +66 -0
docs/branches/crca-q/risk-management.md +102 -0
docs/branches/crca-q/setup.md +65 -0
docs/branches/crca-q/signal-generation.md +61 -0
docs/branches/crca-q/signal-validation.md +43 -0
docs/branches/crca-sd/core.md +84 -0
docs/branches/crca-sd/governance.md +53 -0
docs/branches/crca-sd/mpc-solver.md +65 -0
docs/branches/crca-sd/overview.md +59 -0
docs/branches/crca-sd/realtime.md +28 -0
docs/branches/crca-sd/tui.md +20 -0
docs/branches/general-agent/overview.md +37 -0
docs/branches/general-agent/personality.md +36 -0
docs/branches/general-agent/prompt-builder.md +30 -0
docs/changelog/index.md +79 -0
docs/contributing/code-style.md +69 -0
docs/contributing/documentation.md +43 -0
docs/contributing/overview.md +29 -0
docs/contributing/testing.md +29 -0
docs/core/crcagent/async-operations.md +65 -0
docs/core/crcagent/automatic-extraction.md +107 -0
docs/core/crcagent/batch-prediction.md +80 -0
docs/core/crcagent/bayesian-inference.md +60 -0
docs/core/crcagent/causal-graph.md +92 -0
docs/core/crcagent/counterfactuals.md +96 -0
docs/core/crcagent/deterministic-simulation.md +78 -0
docs/core/crcagent/dual-mode-operation.md +82 -0
docs/core/crcagent/initialization.md +88 -0
docs/core/crcagent/optimization.md +65 -0
docs/core/crcagent/overview.md +63 -0
docs/core/crcagent/time-series.md +57 -0
docs/core/schemas/annotation.md +30 -0
docs/core/schemas/core-schemas.md +82 -0
docs/core/schemas/overview.md +30 -0
docs/core/schemas/policy.md +41 -0
docs/core/templates/base-agent.md +31 -0
docs/core/templates/feature-mixins.md +31 -0
docs/core/templates/overview.md +29 -0
docs/core/templates/templates-guide.md +75 -0
docs/core/tools/mcp-client.md +34 -0
docs/core/tools/overview.md +24 -0
docs/core/utils/conversation.md +27 -0
docs/core/utils/graph-reasoner.md +29 -0
docs/core/utils/overview.md +27 -0
docs/core/utils/router.md +27 -0
docs/core/utils/utilities.md +97 -0
docs/css/custom.css +84 -0
docs/examples/basic-usage.md +57 -0
docs/examples/general-agent/general-agent-examples.md +50 -0
docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
docs/examples/image-annotation/image-annotation-examples.md +54 -0
docs/examples/integration/integration-examples.md +58 -0
docs/examples/overview.md +37 -0
docs/examples/trading/trading-examples.md +46 -0
docs/features/causal-reasoning/advanced-topics.md +101 -0
docs/features/causal-reasoning/counterfactuals.md +43 -0
docs/features/causal-reasoning/do-calculus.md +50 -0
docs/features/causal-reasoning/overview.md +47 -0
docs/features/causal-reasoning/structural-models.md +52 -0
docs/features/hybrid-agent/advanced-components.md +55 -0
docs/features/hybrid-agent/core-components.md +64 -0
docs/features/hybrid-agent/overview.md +34 -0
docs/features/image-annotation/engine.md +82 -0
docs/features/image-annotation/features.md +113 -0
docs/features/image-annotation/integration.md +75 -0
docs/features/image-annotation/overview.md +53 -0
docs/features/image-annotation/quickstart.md +73 -0
docs/features/policy-engine/doctrine-ledger.md +105 -0
docs/features/policy-engine/monitoring.md +44 -0
docs/features/policy-engine/mpc-control.md +89 -0
docs/features/policy-engine/overview.md +46 -0
docs/getting-started/configuration.md +225 -0
docs/getting-started/first-agent.md +164 -0
docs/getting-started/installation.md +144 -0
docs/getting-started/quickstart.md +137 -0
docs/index.md +118 -0
docs/js/mathjax.js +13 -0
docs/lrm/discovery_proof_notes.md +25 -0
docs/lrm/finetune_full.md +83 -0
docs/lrm/math_appendix.md +120 -0
docs/lrm/overview.md +32 -0
docs/mkdocs.yml +238 -0
docs/stylesheets/extra.css +21 -0
docs_generated/crca_core/CounterfactualResult.md +12 -0
docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
docs_generated/crca_core/DraftSpec.md +13 -0
docs_generated/crca_core/EstimateResult.md +13 -0
docs_generated/crca_core/IdentificationResult.md +17 -0
docs_generated/crca_core/InterventionDesignResult.md +12 -0
docs_generated/crca_core/LockedSpec.md +15 -0
docs_generated/crca_core/RefusalResult.md +12 -0
docs_generated/crca_core/ValidationReport.md +9 -0
docs_generated/crca_core/index.md +13 -0
examples/general_agent_example.py +277 -0
examples/general_agent_quickstart.py +202 -0
examples/general_agent_simple.py +92 -0
examples/hybrid_agent_auto_extraction.py +84 -0
examples/hybrid_agent_dictionary_demo.py +104 -0
examples/hybrid_agent_enhanced.py +179 -0
examples/hybrid_agent_general_knowledge.py +107 -0
examples/image_annotation_quickstart.py +328 -0
examples/test_hybrid_fixes.py +77 -0
image_annotation/__init__.py +27 -0
image_annotation/annotation_engine.py +2593 -0
install_cuda_wsl2.sh +59 -0
install_deepspeed.sh +56 -0
install_deepspeed_simple.sh +87 -0
mkdocs.yml +252 -0
ollama/Modelfile +8 -0
prompts/__init__.py +2 -1
prompts/default_crca.py +9 -1
prompts/general_agent.py +227 -0
prompts/image_annotation.py +56 -0
pyproject.toml +17 -2
requirements-docs.txt +10 -0
requirements.txt +21 -2
schemas/__init__.py +26 -1
schemas/annotation.py +222 -0
schemas/conversation.py +193 -0
schemas/hybrid.py +211 -0
schemas/reasoning.py +276 -0
schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
schemas_export/crca_core/DraftSpec.schema.json +635 -0
schemas_export/crca_core/EstimateResult.schema.json +113 -0
schemas_export/crca_core/IdentificationResult.schema.json +145 -0
schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
schemas_export/crca_core/LockedSpec.schema.json +646 -0
schemas_export/crca_core/RefusalResult.schema.json +90 -0
schemas_export/crca_core/ValidationReport.schema.json +62 -0
scripts/build_lrm_dataset.py +80 -0
scripts/export_crca_core_schemas.py +54 -0
scripts/export_hf_lrm.py +37 -0
scripts/export_ollama_gguf.py +45 -0
scripts/generate_changelog.py +157 -0
scripts/generate_crca_core_docs_from_schemas.py +86 -0
scripts/run_crca_core_benchmarks.py +163 -0
scripts/run_full_finetune.py +198 -0
scripts/run_lrm_eval.py +31 -0
templates/graph_management.py +29 -0
tests/conftest.py +9 -0
tests/test_core.py +2 -3
tests/test_crca_core_discovery_tabular.py +15 -0
tests/test_crca_core_estimate_dowhy.py +36 -0
tests/test_crca_core_identify.py +18 -0
tests/test_crca_core_intervention_design.py +36 -0
tests/test_crca_core_linear_gaussian_scm.py +69 -0
tests/test_crca_core_spec.py +25 -0
tests/test_crca_core_timeseries_pcmci.py +15 -0
tests/test_crca_llm_coauthor.py +12 -0
tests/test_crca_llm_orchestrator.py +80 -0
tests/test_hybrid_agent_llm_enhanced.py +556 -0
tests/test_image_annotation_demo.py +376 -0
tests/test_image_annotation_operational.py +408 -0
tests/test_image_annotation_unit.py +551 -0
tests/test_training_moe.py +13 -0
training/__init__.py +42 -0
training/datasets.py +140 -0
training/deepspeed_zero2_0_5b.json +22 -0
training/deepspeed_zero2_1_5b.json +22 -0
training/deepspeed_zero3_0_5b.json +28 -0
training/deepspeed_zero3_14b.json +28 -0
training/deepspeed_zero3_h100_3gpu.json +20 -0
training/deepspeed_zero3_offload.json +28 -0
training/eval.py +92 -0
training/finetune.py +516 -0
training/public_datasets.py +89 -0
training_data/react_train.jsonl +7473 -0
utils/agent_discovery.py +311 -0
utils/batch_processor.py +317 -0
utils/conversation.py +78 -0
utils/edit_distance.py +118 -0
utils/formatter.py +33 -0
utils/graph_reasoner.py +530 -0
utils/rate_limiter.py +283 -0
utils/router.py +2 -2
utils/tool_discovery.py +307 -0
webui/__init__.py +10 -0
webui/app.py +229 -0
webui/config.py +104 -0
webui/static/css/style.css +332 -0
webui/static/js/main.js +284 -0
webui/templates/index.html +42 -0
tests/test_crca_excel.py +0 -166
tests/test_data_broker.py +0 -424
tests/test_palantir.py +0 -349
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0

crca_core/discovery/tabular.py ADDED Viewed

@@ -0,0 +1,193 @@
+"""Tabular causal discovery (hypothesis generation).
+Design goals:
+- Wrap established implementations when available (preferred).
+- If required backends are missing, return a structured Refusal (never ad-hoc heuristics).
+"""
+from __future__ import annotations
+from typing import Any, Dict, List, Literal, Optional
+from pydantic import BaseModel, Field
+from crca_core.models.provenance import ProvenanceManifest
+from crca_core.models.refusal import RefusalChecklistItem, RefusalReasonCode, RefusalResult
+from crca_core.models.result import DiscoveryHypothesisResult
+from utils.canonical import stable_hash
+class TabularDiscoveryConfig(BaseModel):
+    algorithm: Literal["pc", "fci", "ges"] = "pc"
+    alpha: float = Field(default=0.05, gt=0.0, lt=1.0)
+    bootstrap_samples: int = Field(default=0, ge=0)
+    ci_test: Literal["fisherz", "gsq", "chisq"] = "fisherz"
+    stable: bool = True
+    min_samples: int = Field(default=200, ge=20)
+    notes: Optional[str] = None
+def _backend_available() -> bool:
+    try:
+        import importlib.util
+        # causal-learn installs as `causallearn`
+        return importlib.util.find_spec("causallearn") is not None
+    except Exception:
+        return False
+def discover_tabular(
+    data: Any,
+    discovery_config: Optional[TabularDiscoveryConfig] = None,
+    assumptions: Optional[List[str]] = None,
+) -> DiscoveryHypothesisResult | RefusalResult:
+    """Run tabular causal discovery and return a hypothesis object.
+    Notes:
+    - This is hypothesis generation only.
+    - If `causal-learn` is not installed, we refuse and provide an actionable checklist.
+    """
+    cfg = discovery_config or TabularDiscoveryConfig()
+    assumptions = assumptions or []
+    # Compute a lightweight data hash for provenance (schema-level only).
+    # We intentionally do not hash raw data values here.
+    schema_sig = {}
+    try:
+        import pandas as pd  # type: ignore
+        if isinstance(data, pd.DataFrame):
+            schema_sig = {c: str(t) for c, t in data.dtypes.items()}
+        else:
+            schema_sig = {"type": str(type(data))}
+    except Exception:
+        schema_sig = {"type": str(type(data))}
+    spec_hash = stable_hash({"discovery": "tabular", "config": cfg.model_dump(), "schema": schema_sig})
+    prov = ProvenanceManifest.minimal(
+        spec_hash=spec_hash,
+        data_hash=stable_hash(schema_sig),
+        algorithm_config=cfg.model_dump(),
+    )
+    if not _backend_available():
+        return RefusalResult(
+            message="Tabular causal discovery backend not available.",
+            reason_codes=[RefusalReasonCode.UNSUPPORTED_OPERATION],
+            checklist=[
+                RefusalChecklistItem(
+                    item="Install causal-learn",
+                    rationale="Tabular discovery is wrap-first; we refuse rather than run unvalidated heuristics.",
+                )
+            ],
+            suggested_next_steps=["pip install causal-learn"],
+        )
+    try:
+        import numpy as np  # type: ignore
+        import pandas as pd  # type: ignore
+        from causallearn.search.ConstraintBased.PC import pc  # type: ignore
+        from causallearn.search.ConstraintBased.FCI import fci  # type: ignore
+        from causallearn.search.ScoreBased.GES import ges  # type: ignore
+    except Exception as e:
+        return RefusalResult(
+            message=f"Failed to import causal-learn: {e}",
+            reason_codes=[RefusalReasonCode.UNSUPPORTED_OPERATION],
+            checklist=[
+                RefusalChecklistItem(
+                    item="Install causal-learn",
+                    rationale="Tabular discovery requires causal-learn backend.",
+                )
+            ],
+            suggested_next_steps=["pip install causal-learn"],
+        )
+    if not isinstance(data, pd.DataFrame):
+        return RefusalResult(
+            message="Tabular discovery requires pandas DataFrame input.",
+            reason_codes=[RefusalReasonCode.INPUT_INVALID],
+            checklist=[
+                RefusalChecklistItem(
+                    item="Provide pandas DataFrame",
+                    rationale="Causal-learn expects tabular numpy/pandas data.",
+                )
+            ],
+            suggested_next_steps=["Convert your data to pandas.DataFrame and retry."],
+        )
+    columns = list(data.columns)
+    values = data.to_numpy(dtype=float)
+    if values.shape[0] < cfg.min_samples:
+        return RefusalResult(
+            message="Insufficient samples for reliable discovery.",
+            reason_codes=[RefusalReasonCode.INPUT_INVALID],
+            checklist=[
+                RefusalChecklistItem(
+                    item="Increase sample size",
+                    rationale=f"Need at least {cfg.min_samples} rows for stable discovery.",
+                )
+            ],
+            suggested_next_steps=["Collect more samples or lower min_samples (not recommended)."],
+        )
+    def _run_once() -> Dict[str, Any]:
+        if cfg.algorithm == "pc":
+            cg = pc(values, alpha=cfg.alpha, indep_test=cfg.ci_test, stable=cfg.stable)
+            graph_obj = getattr(cg, "G", cg)
+        elif cfg.algorithm == "fci":
+            res = fci(values, alpha=cfg.alpha, indep_test=cfg.ci_test)
+            graph_obj = res[0] if isinstance(res, (list, tuple)) else res
+            graph_obj = getattr(graph_obj, "G", graph_obj)
+        else:
+            res = ges(values)
+            graph_obj = res.get("G") if isinstance(res, dict) else res
+        mat = getattr(graph_obj, "graph", None)
+        if mat is None:
+            return {"graph_type": "unknown", "raw": str(graph_obj)}
+        return {
+            "graph_type": "causal_learn_matrix",
+            "adjacency": np.asarray(mat).tolist(),
+            "columns": columns,
+        }
+    graph_hypothesis = _run_once()
+    stability_report: Dict[str, Any] = {"bootstrap_samples": cfg.bootstrap_samples}
+    if cfg.bootstrap_samples > 0:
+        edge_counts = None
+        for _ in range(cfg.bootstrap_samples):
+            idx = np.random.randint(0, values.shape[0], size=values.shape[0])
+            boot_values = values[idx]
+            if cfg.algorithm == "pc":
+                cg = pc(boot_values, alpha=cfg.alpha, indep_test=cfg.ci_test, stable=cfg.stable)
+                graph_obj = getattr(cg, "G", cg)
+            elif cfg.algorithm == "fci":
+                res = fci(boot_values, alpha=cfg.alpha, indep_test=cfg.ci_test)
+                graph_obj = res[0] if isinstance(res, (list, tuple)) else res
+                graph_obj = getattr(graph_obj, "G", graph_obj)
+            else:
+                res = ges(boot_values)
+                graph_obj = res.get("G") if isinstance(res, dict) else res
+            mat = getattr(graph_obj, "graph", None)
+            if mat is None:
+                continue
+            mat = np.asarray(mat)
+            if edge_counts is None:
+                edge_counts = np.zeros_like(mat, dtype=float)
+            edge_counts += (mat != 0).astype(float)
+        if edge_counts is not None and cfg.bootstrap_samples > 0:
+            stability_report["edge_frequency"] = (edge_counts / float(cfg.bootstrap_samples)).tolist()
+    return DiscoveryHypothesisResult(
+        provenance=prov,
+        assumptions=assumptions,
+        limitations=[
+            "Discovery outputs are hypotheses under assumptions (e.g., faithfulness, causal sufficiency/latent handling).",
+            "Returned graph structure depends on CI test assumptions and sample size.",
+        ],
+        graph_hypothesis=graph_hypothesis,
+        stability_report=stability_report,
+    )

crca_core/identify/__init__.py ADDED Viewed

@@ -0,0 +1,171 @@
+"""Identification entry points for crca_core (in-house)."""
+from __future__ import annotations
+from typing import Dict, List, Optional
+from crca_core.identify.backdoor import find_backdoor_adjustment_set
+from crca_core.identify.frontdoor import find_frontdoor_mediator
+from crca_core.identify.graph import CausalGraph
+from crca_core.identify.id_algorithm import id_algorithm
+from crca_core.identify.iv import find_instrument
+from crca_core.models.provenance import ProvenanceManifest
+from crca_core.models.refusal import RefusalChecklistItem, RefusalReasonCode, RefusalResult
+from crca_core.models.result import IdentificationResult
+from crca_core.models.spec import LockedSpec
+from utils.canonical import stable_hash
+def identify_effect(
+    *,
+    locked_spec: LockedSpec,
+    treatment: str,
+    outcome: str,
+) -> IdentificationResult | RefusalResult:
+    """Identify an effect using in-house methods (backdoor/frontdoor/IV/ID)."""
+    if not treatment or not outcome:
+        return RefusalResult(
+            message="Treatment and outcome must be provided.",
+            reason_codes=[RefusalReasonCode.INPUT_INVALID],
+            checklist=[
+                RefusalChecklistItem(item="Provide treatment", rationale="Required to define the estimand."),
+                RefusalChecklistItem(item="Provide outcome", rationale="Required to define the estimand."),
+            ],
+            suggested_next_steps=["Pass treatment='X', outcome='Y'."],
+        )
+    graph = CausalGraph.from_spec(locked_spec.graph)
+    prov = ProvenanceManifest.minimal(
+        spec_hash=stable_hash(
+            {
+                "spec_hash": locked_spec.spec_hash,
+                "treatment": treatment,
+                "outcome": outcome,
+                "module": "identify_effect",
+            }
+        )
+    )
+    # 1) Backdoor
+    z = find_backdoor_adjustment_set(graph, treatment, outcome)
+    if z is not None:
+        expr = f"sum_{{z}} P({outcome}|{treatment},z) P(z)"
+        return IdentificationResult(
+            provenance=prov,
+            method="backdoor",
+            scope="partial",
+            confidence="medium",
+            estimand_expression=expr,
+            assumptions_used=[
+                "Backdoor criterion holds with the returned adjustment set.",
+                "No unmeasured confounding conditional on Z.",
+                "Positivity/overlap for adjustment set.",
+            ],
+            witnesses={"adjustment_set": sorted(list(z))},
+            proof={
+                "type": "do-calculus",
+                "steps": [
+                    "In G_{X̄}, Z d-separates X and Y (backdoor).",
+                    "Apply Rule 2 to replace do(X) with observe(X) given Z.",
+                ],
+            },
+            limitations=["Identification assumes all confounding is captured by Z."],
+        )
+    # 2) Frontdoor
+    mediator = find_frontdoor_mediator(
+        graph, treatment, outcome, mediators=locked_spec.roles.mediators
+    )
+    if mediator is not None:
+        expr = (
+            f"sum_m P(m|{treatment}) sum_{treatment} P({outcome}|m,{treatment}) P({treatment})"
+        )
+        return IdentificationResult(
+            provenance=prov,
+            method="frontdoor",
+            scope="partial",
+            confidence="medium",
+            estimand_expression=expr,
+            assumptions_used=[
+                "Frontdoor criterion holds for mediator M.",
+                "No unmeasured confounding between X and M.",
+                "All backdoor paths from M to Y are blocked by X.",
+            ],
+            witnesses={"mediator": mediator},
+            proof={
+                "type": "do-calculus",
+                "steps": [
+                    "Use Rule 3 to exchange do(X) with observe(X) for M→Y component.",
+                    "Use Rule 2 to exchange do(X) with observe(X) for X→M component.",
+                ],
+            },
+            limitations=["Frontdoor validity depends on strong mediator assumptions."],
+        )
+    # 3) Instrumental variable
+    instrument = find_instrument(
+        graph, treatment, outcome, instruments=locked_spec.roles.instruments
+    )
+    if instrument is not None:
+        expr = "IV estimand (see instrument assumptions)"
+        return IdentificationResult(
+            provenance=prov,
+            method="iv",
+            scope="partial",
+            confidence="low",
+            estimand_expression=expr,
+            assumptions_used=[
+                "Relevance: Z affects X.",
+                "Exclusion: Z affects Y only through X.",
+                "Independence: Z independent of unmeasured causes of Y.",
+            ],
+            witnesses={"instrument": instrument},
+            proof={
+                "type": "linear-IV",
+                "steps": [
+                    "Assume linear SCM with exclusion and independence.",
+                    "Derive β = Cov(Z,Y)/Cov(Z,X).",
+                ],
+            },
+            limitations=["IV estimand expression is left symbolic; estimator must implement IV."],
+        )
+    # 4) In-house ID algorithm (conservative)
+    id_expr = id_algorithm(graph, treatment, outcome)
+    if id_expr is not None:
+        method, expr = id_expr
+        return IdentificationResult(
+            provenance=prov,
+            method=method,
+            scope="conservative",
+            confidence="low",
+            estimand_expression=expr,
+            assumptions_used=["Causal graph is correct; no latent confounding beyond declared."],
+            witnesses={},
+            proof={
+                "type": "id-algorithm",
+                "steps": [
+                    "Apply ID recursion on C-components.",
+                    "Return g-formula when no bidirected edges.",
+                ],
+            },
+            limitations=[
+                "ID algorithm is conservative: may return non-identifiable for some identifiable cases with latent confounding."
+            ],
+        )
+    return RefusalResult(
+        message="Effect not identifiable under current graph/assumptions.",
+        reason_codes=[RefusalReasonCode.NOT_IDENTIFIABLE],
+        checklist=[
+            RefusalChecklistItem(
+                item="Revise causal model or add interventions/measurements",
+                rationale="Identification failed with backdoor/frontdoor/IV/ID checks.",
+            )
+        ],
+        suggested_next_steps=["Use design_intervention() to propose identifying experiments."],
+    )
+__all__ = ["identify_effect"]

crca_core/identify/backdoor.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""Backdoor identification helper."""
+from __future__ import annotations
+import itertools
+from typing import List, Optional, Sequence, Set
+from crca_core.identify.graph import CausalGraph
+def find_backdoor_adjustment_set(
+    graph: CausalGraph,
+    treatment: str,
+    outcome: str,
+    *,
+    max_candidates: int = 12,
+    max_set_size: int = 6,
+) -> Optional[Set[str]]:
+    """Find a valid backdoor adjustment set (if any).
+    This uses a bounded search over observed, non-descendant candidates.
+    """
+    x = treatment
+    y = outcome
+    observed = set(graph.observed)
+    descendants_x = graph.descendants([x])
+    candidates = sorted(list(observed - {x, y} - descendants_x))
+    if len(candidates) > max_candidates:
+        return None
+    g_bd = graph.remove_outgoing([x])
+    for k in range(0, min(max_set_size, len(candidates)) + 1):
+        for combo in itertools.combinations(candidates, k):
+            z = set(combo)
+            if g_bd.d_separated([x], [y], list(z)):
+                return z
+    return None

crca_core/identify/frontdoor.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Frontdoor identification helper."""
+from __future__ import annotations
+from typing import Iterable, Optional, Sequence
+import networkx as nx
+from crca_core.identify.graph import CausalGraph
+def _directed_paths_through(
+    graph: CausalGraph, treatment: str, outcome: str, mediator: str
+) -> bool:
+    """Return True if all directed paths from treatment to outcome go through mediator."""
+    try:
+        paths = list(nx.all_simple_paths(graph.directed, treatment, outcome))
+    except nx.NetworkXNoPath:
+        return False
+    if not paths:
+        return False
+    return all(mediator in p for p in paths)
+def find_frontdoor_mediator(
+    graph: CausalGraph,
+    treatment: str,
+    outcome: str,
+    mediators: Sequence[str],
+) -> Optional[str]:
+    """Return a mediator that satisfies a conservative frontdoor check."""
+    for m in mediators:
+        if m in (treatment, outcome):
+            continue
+        if not _directed_paths_through(graph, treatment, outcome, m):
+            continue
+        # No backdoor from X to M (empty set) in graph with outgoing edges removed.
+        if not graph.remove_outgoing([treatment]).d_separated([treatment], [m], []):
+            continue
+        # Backdoor from M to Y is blocked by X (conservative check).
+        if not graph.remove_outgoing([m]).d_separated([m], [outcome], [treatment]):
+            continue
+        return m
+    return None

crca_core/identify/graph.py ADDED Viewed

@@ -0,0 +1,106 @@
+"""Graph utilities for identification.
+Supports directed edges plus bidirected (latent confounding) edges.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple
+import networkx as nx
+from crca_core.models.spec import CausalGraphSpec
+def _parse_latent_confounders(latent: Sequence[str]) -> Set[Tuple[str, str]]:
+    """Parse latent confounder pairs from strings.
+    Supported formats:
+    - "A<->B"
+    - "A,B"
+    """
+    pairs: Set[Tuple[str, str]] = set()
+    for item in latent:
+        if "<->" in item:
+            a, b = [s.strip() for s in item.split("<->", 1)]
+        elif "," in item:
+            a, b = [s.strip() for s in item.split(",", 1)]
+        else:
+            # Unknown format, skip conservatively.
+            continue
+        if a and b and a != b:
+            pairs.add(tuple(sorted((a, b))))
+    return pairs
+@dataclass(frozen=True)
+class CausalGraph:
+    nodes: Tuple[str, ...]
+    observed: Set[str]
+    directed: nx.DiGraph
+    bidirected: Set[Tuple[str, str]]
+    @classmethod
+    def from_spec(cls, spec: CausalGraphSpec) -> "CausalGraph":
+        nodes = tuple(n.name for n in spec.nodes)
+        observed = {n.name for n in spec.nodes if n.observed}
+        g = nx.DiGraph()
+        for n in nodes:
+            g.add_node(n)
+        for e in spec.edges:
+            g.add_edge(e.source, e.target)
+        bidirected = _parse_latent_confounders(spec.latent_confounders)
+        return cls(nodes=nodes, observed=observed, directed=g, bidirected=bidirected)
+    def ancestors(self, nodes: Iterable[str]) -> Set[str]:
+        anc: Set[str] = set()
+        for n in nodes:
+            anc |= nx.ancestors(self.directed, n)
+        anc |= set(nodes)
+        return anc
+    def descendants(self, nodes: Iterable[str]) -> Set[str]:
+        desc: Set[str] = set()
+        for n in nodes:
+            desc |= nx.descendants(self.directed, n)
+        desc |= set(nodes)
+        return desc
+    def induced_subgraph(self, nodes: Set[str]) -> "CausalGraph":
+        g = self.directed.subgraph(nodes).copy()
+        bidirected = {p for p in self.bidirected if p[0] in nodes and p[1] in nodes}
+        observed = {n for n in self.observed if n in nodes}
+        return CausalGraph(nodes=tuple(nodes), observed=observed, directed=g, bidirected=bidirected)
+    def remove_outgoing(self, x: Sequence[str]) -> "CausalGraph":
+        g = self.directed.copy()
+        for node in x:
+            for _, child in list(g.out_edges(node)):
+                g.remove_edge(node, child)
+        return CausalGraph(nodes=self.nodes, observed=set(self.observed), directed=g, bidirected=set(self.bidirected))
+    def remove_incoming(self, x: Sequence[str]) -> "CausalGraph":
+        g = self.directed.copy()
+        for node in x:
+            for parent, _ in list(g.in_edges(node)):
+                g.remove_edge(parent, node)
+        return CausalGraph(nodes=self.nodes, observed=set(self.observed), directed=g, bidirected=set(self.bidirected))
+    def c_components(self, nodes: Optional[Set[str]] = None) -> List[Set[str]]:
+        """Compute c-components (bidirected connected components)."""
+        nodes = nodes or set(self.nodes)
+        # Build undirected graph of bidirected connections
+        undirected = nx.Graph()
+        undirected.add_nodes_from(nodes)
+        for a, b in self.bidirected:
+            if a in nodes and b in nodes:
+                undirected.add_edge(a, b)
+        return [set(c) for c in nx.connected_components(undirected)]
+    def d_separated(self, x: Sequence[str], y: Sequence[str], z: Sequence[str]) -> bool:
+        # Use networkx d-separation check
+        from networkx.algorithms.d_separation import is_d_separator
+        return bool(is_d_separator(self.directed, set(x), set(y), set(z)))

crca_core/identify/id_algorithm.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""In-house identification (ID) algorithm scaffold.
+This module implements a conservative identification strategy:
+- If no bidirected edges (no latent confounding), return g-formula.
+- If latent confounding is present, we currently return not identifiable.
+This is intentionally strict and avoids over-claiming identifiability.
+"""
+from __future__ import annotations
+from typing import Optional, Sequence, Tuple
+from crca_core.identify.graph import CausalGraph
+def identify_g_formula(
+    graph: CausalGraph, treatment: str, outcome: str
+) -> Tuple[str, str]:
+    """Return g-formula expression for DAGs without latent confounding."""
+    vars_all = sorted(list(graph.nodes))
+    summation_vars = [v for v in vars_all if v not in {treatment, outcome}]
+    summation = f"sum_{{{','.join(summation_vars)}}}" if summation_vars else ""
+    expr = (
+        f"{summation} Π_v P(v | Pa(v)) with do({treatment})"
+        if summation
+        else f"Π_v P(v | Pa(v)) with do({treatment})"
+    )
+    return "id_g_formula", expr
+def id_algorithm(
+    graph: CausalGraph,
+    treatment: str,
+    outcome: str,
+) -> Optional[Tuple[str, str]]:
+    """Return an identification expression if possible, otherwise None."""
+    # If no bidirected edges, g-formula identifies interventional distribution.
+    if not graph.bidirected:
+        return identify_g_formula(graph, treatment, outcome)
+    # Conservative: refuse when latent confounding is present (no over-claiming).
+    return None

crca_core/identify/iv.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Instrumental variable identification helper (conservative)."""
+from __future__ import annotations
+from typing import Optional, Sequence
+import networkx as nx
+from crca_core.identify.graph import CausalGraph
+def find_instrument(
+    graph: CausalGraph,
+    treatment: str,
+    outcome: str,
+    instruments: Sequence[str],
+) -> Optional[str]:
+    """Return a candidate instrument satisfying conservative IV checks."""
+    for z in instruments:
+        if z in (treatment, outcome):
+            continue
+        # Relevance: Z causes X
+        try:
+            if not nx.has_path(graph.directed, z, treatment):
+                continue
+        except nx.NetworkXError:
+            continue
+        # Exclusion: no directed path from Z to Y that avoids X
+        try:
+            paths = nx.all_simple_paths(graph.directed, z, outcome)
+            bad = False
+            for p in paths:
+                if treatment not in p:
+                    bad = True
+                    break
+            if bad:
+                continue
+        except nx.NetworkXNoPath:
+            pass
+        # Independence (conservative): Z and Y d-separated given X in graph removing X's outgoing edges
+        if not graph.remove_outgoing([treatment]).d_separated([z], [outcome], [treatment]):
+            continue
+        return z
+    return None

crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

crca 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl