PyPI - crca - Versions diffs - 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

crca 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (306) hide show

CRCA.py +172 -7
MODEL_CARD.md +53 -0
PKG-INFO +8 -2
RELEASE_NOTES.md +17 -0
STABILITY.md +19 -0
architecture/hybrid/consistency_engine.py +362 -0
architecture/hybrid/conversation_manager.py +421 -0
architecture/hybrid/explanation_generator.py +452 -0
architecture/hybrid/few_shot_learner.py +533 -0
architecture/hybrid/graph_compressor.py +286 -0
architecture/hybrid/hybrid_agent.py +4398 -0
architecture/hybrid/language_compiler.py +623 -0
architecture/hybrid/main,py +0 -0
architecture/hybrid/reasoning_tracker.py +322 -0
architecture/hybrid/self_verifier.py +524 -0
architecture/hybrid/task_decomposer.py +567 -0
architecture/hybrid/text_corrector.py +341 -0
benchmark_results/crca_core_benchmarks.json +178 -0
branches/crca_sd/crca_sd_realtime.py +6 -2
branches/general_agent/__init__.py +102 -0
branches/general_agent/general_agent.py +1400 -0
branches/general_agent/personality.py +169 -0
branches/general_agent/utils/__init__.py +19 -0
branches/general_agent/utils/prompt_builder.py +170 -0
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
crca_core/__init__.py +35 -0
crca_core/benchmarks/__init__.py +14 -0
crca_core/benchmarks/synthetic_scm.py +103 -0
crca_core/core/__init__.py +23 -0
crca_core/core/api.py +120 -0
crca_core/core/estimate.py +208 -0
crca_core/core/godclass.py +72 -0
crca_core/core/intervention_design.py +174 -0
crca_core/core/lifecycle.py +48 -0
crca_core/discovery/__init__.py +9 -0
crca_core/discovery/tabular.py +193 -0
crca_core/identify/__init__.py +171 -0
crca_core/identify/backdoor.py +39 -0
crca_core/identify/frontdoor.py +48 -0
crca_core/identify/graph.py +106 -0
crca_core/identify/id_algorithm.py +43 -0
crca_core/identify/iv.py +48 -0
crca_core/models/__init__.py +67 -0
crca_core/models/provenance.py +56 -0
crca_core/models/refusal.py +39 -0
crca_core/models/result.py +83 -0
crca_core/models/spec.py +151 -0
crca_core/models/validation.py +68 -0
crca_core/scm/__init__.py +9 -0
crca_core/scm/linear_gaussian.py +198 -0
crca_core/timeseries/__init__.py +6 -0
crca_core/timeseries/pcmci.py +181 -0
crca_llm/__init__.py +12 -0
crca_llm/client.py +85 -0
crca_llm/coauthor.py +118 -0
crca_llm/orchestrator.py +289 -0
crca_llm/types.py +21 -0
crca_reasoning/__init__.py +16 -0
crca_reasoning/critique.py +54 -0
crca_reasoning/godclass.py +206 -0
crca_reasoning/memory.py +24 -0
crca_reasoning/rationale.py +10 -0
crca_reasoning/react_controller.py +81 -0
crca_reasoning/tool_router.py +97 -0
crca_reasoning/types.py +40 -0
crca_sd/__init__.py +15 -0
crca_sd/crca_sd_core.py +2 -0
crca_sd/crca_sd_governance.py +2 -0
crca_sd/crca_sd_mpc.py +2 -0
crca_sd/crca_sd_realtime.py +2 -0
crca_sd/crca_sd_tui.py +2 -0
cuda-keyring_1.1-1_all.deb +0 -0
cuda-keyring_1.1-1_all.deb.1 +0 -0
docs/IMAGE_ANNOTATION_USAGE.md +539 -0
docs/INSTALL_DEEPSPEED.md +125 -0
docs/api/branches/crca-cg.md +19 -0
docs/api/branches/crca-q.md +27 -0
docs/api/branches/crca-sd.md +37 -0
docs/api/branches/general-agent.md +24 -0
docs/api/branches/overview.md +19 -0
docs/api/crca/agent-methods.md +62 -0
docs/api/crca/operations.md +79 -0
docs/api/crca/overview.md +32 -0
docs/api/image-annotation/engine.md +52 -0
docs/api/image-annotation/overview.md +17 -0
docs/api/schemas/annotation.md +34 -0
docs/api/schemas/core-schemas.md +82 -0
docs/api/schemas/overview.md +32 -0
docs/api/schemas/policy.md +30 -0
docs/api/utils/conversation.md +22 -0
docs/api/utils/graph-reasoner.md +32 -0
docs/api/utils/overview.md +21 -0
docs/api/utils/router.md +19 -0
docs/api/utils/utilities.md +97 -0
docs/architecture/causal-graphs.md +41 -0
docs/architecture/data-flow.md +29 -0
docs/architecture/design-principles.md +33 -0
docs/architecture/hybrid-agent/components.md +38 -0
docs/architecture/hybrid-agent/consistency.md +26 -0
docs/architecture/hybrid-agent/overview.md +44 -0
docs/architecture/hybrid-agent/reasoning.md +22 -0
docs/architecture/llm-integration.md +26 -0
docs/architecture/modular-structure.md +37 -0
docs/architecture/overview.md +69 -0
docs/architecture/policy-engine-arch.md +29 -0
docs/branches/crca-cg/corposwarm.md +39 -0
docs/branches/crca-cg/esg-scoring.md +30 -0
docs/branches/crca-cg/multi-agent.md +35 -0
docs/branches/crca-cg/overview.md +40 -0
docs/branches/crca-q/alternative-data.md +55 -0
docs/branches/crca-q/architecture.md +71 -0
docs/branches/crca-q/backtesting.md +45 -0
docs/branches/crca-q/causal-engine.md +33 -0
docs/branches/crca-q/execution.md +39 -0
docs/branches/crca-q/market-data.md +60 -0
docs/branches/crca-q/overview.md +58 -0
docs/branches/crca-q/philosophy.md +60 -0
docs/branches/crca-q/portfolio-optimization.md +66 -0
docs/branches/crca-q/risk-management.md +102 -0
docs/branches/crca-q/setup.md +65 -0
docs/branches/crca-q/signal-generation.md +61 -0
docs/branches/crca-q/signal-validation.md +43 -0
docs/branches/crca-sd/core.md +84 -0
docs/branches/crca-sd/governance.md +53 -0
docs/branches/crca-sd/mpc-solver.md +65 -0
docs/branches/crca-sd/overview.md +59 -0
docs/branches/crca-sd/realtime.md +28 -0
docs/branches/crca-sd/tui.md +20 -0
docs/branches/general-agent/overview.md +37 -0
docs/branches/general-agent/personality.md +36 -0
docs/branches/general-agent/prompt-builder.md +30 -0
docs/changelog/index.md +79 -0
docs/contributing/code-style.md +69 -0
docs/contributing/documentation.md +43 -0
docs/contributing/overview.md +29 -0
docs/contributing/testing.md +29 -0
docs/core/crcagent/async-operations.md +65 -0
docs/core/crcagent/automatic-extraction.md +107 -0
docs/core/crcagent/batch-prediction.md +80 -0
docs/core/crcagent/bayesian-inference.md +60 -0
docs/core/crcagent/causal-graph.md +92 -0
docs/core/crcagent/counterfactuals.md +96 -0
docs/core/crcagent/deterministic-simulation.md +78 -0
docs/core/crcagent/dual-mode-operation.md +82 -0
docs/core/crcagent/initialization.md +88 -0
docs/core/crcagent/optimization.md +65 -0
docs/core/crcagent/overview.md +63 -0
docs/core/crcagent/time-series.md +57 -0
docs/core/schemas/annotation.md +30 -0
docs/core/schemas/core-schemas.md +82 -0
docs/core/schemas/overview.md +30 -0
docs/core/schemas/policy.md +41 -0
docs/core/templates/base-agent.md +31 -0
docs/core/templates/feature-mixins.md +31 -0
docs/core/templates/overview.md +29 -0
docs/core/templates/templates-guide.md +75 -0
docs/core/tools/mcp-client.md +34 -0
docs/core/tools/overview.md +24 -0
docs/core/utils/conversation.md +27 -0
docs/core/utils/graph-reasoner.md +29 -0
docs/core/utils/overview.md +27 -0
docs/core/utils/router.md +27 -0
docs/core/utils/utilities.md +97 -0
docs/css/custom.css +84 -0
docs/examples/basic-usage.md +57 -0
docs/examples/general-agent/general-agent-examples.md +50 -0
docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
docs/examples/image-annotation/image-annotation-examples.md +54 -0
docs/examples/integration/integration-examples.md +58 -0
docs/examples/overview.md +37 -0
docs/examples/trading/trading-examples.md +46 -0
docs/features/causal-reasoning/advanced-topics.md +101 -0
docs/features/causal-reasoning/counterfactuals.md +43 -0
docs/features/causal-reasoning/do-calculus.md +50 -0
docs/features/causal-reasoning/overview.md +47 -0
docs/features/causal-reasoning/structural-models.md +52 -0
docs/features/hybrid-agent/advanced-components.md +55 -0
docs/features/hybrid-agent/core-components.md +64 -0
docs/features/hybrid-agent/overview.md +34 -0
docs/features/image-annotation/engine.md +82 -0
docs/features/image-annotation/features.md +113 -0
docs/features/image-annotation/integration.md +75 -0
docs/features/image-annotation/overview.md +53 -0
docs/features/image-annotation/quickstart.md +73 -0
docs/features/policy-engine/doctrine-ledger.md +105 -0
docs/features/policy-engine/monitoring.md +44 -0
docs/features/policy-engine/mpc-control.md +89 -0
docs/features/policy-engine/overview.md +46 -0
docs/getting-started/configuration.md +225 -0
docs/getting-started/first-agent.md +164 -0
docs/getting-started/installation.md +144 -0
docs/getting-started/quickstart.md +137 -0
docs/index.md +118 -0
docs/js/mathjax.js +13 -0
docs/lrm/discovery_proof_notes.md +25 -0
docs/lrm/finetune_full.md +83 -0
docs/lrm/math_appendix.md +120 -0
docs/lrm/overview.md +32 -0
docs/mkdocs.yml +238 -0
docs/stylesheets/extra.css +21 -0
docs_generated/crca_core/CounterfactualResult.md +12 -0
docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
docs_generated/crca_core/DraftSpec.md +13 -0
docs_generated/crca_core/EstimateResult.md +13 -0
docs_generated/crca_core/IdentificationResult.md +17 -0
docs_generated/crca_core/InterventionDesignResult.md +12 -0
docs_generated/crca_core/LockedSpec.md +15 -0
docs_generated/crca_core/RefusalResult.md +12 -0
docs_generated/crca_core/ValidationReport.md +9 -0
docs_generated/crca_core/index.md +13 -0
examples/general_agent_example.py +277 -0
examples/general_agent_quickstart.py +202 -0
examples/general_agent_simple.py +92 -0
examples/hybrid_agent_auto_extraction.py +84 -0
examples/hybrid_agent_dictionary_demo.py +104 -0
examples/hybrid_agent_enhanced.py +179 -0
examples/hybrid_agent_general_knowledge.py +107 -0
examples/image_annotation_quickstart.py +328 -0
examples/test_hybrid_fixes.py +77 -0
image_annotation/__init__.py +27 -0
image_annotation/annotation_engine.py +2593 -0
install_cuda_wsl2.sh +59 -0
install_deepspeed.sh +56 -0
install_deepspeed_simple.sh +87 -0
mkdocs.yml +252 -0
ollama/Modelfile +8 -0
prompts/__init__.py +2 -1
prompts/default_crca.py +9 -1
prompts/general_agent.py +227 -0
prompts/image_annotation.py +56 -0
pyproject.toml +17 -2
requirements-docs.txt +10 -0
requirements.txt +21 -2
schemas/__init__.py +26 -1
schemas/annotation.py +222 -0
schemas/conversation.py +193 -0
schemas/hybrid.py +211 -0
schemas/reasoning.py +276 -0
schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
schemas_export/crca_core/DraftSpec.schema.json +635 -0
schemas_export/crca_core/EstimateResult.schema.json +113 -0
schemas_export/crca_core/IdentificationResult.schema.json +145 -0
schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
schemas_export/crca_core/LockedSpec.schema.json +646 -0
schemas_export/crca_core/RefusalResult.schema.json +90 -0
schemas_export/crca_core/ValidationReport.schema.json +62 -0
scripts/build_lrm_dataset.py +80 -0
scripts/export_crca_core_schemas.py +54 -0
scripts/export_hf_lrm.py +37 -0
scripts/export_ollama_gguf.py +45 -0
scripts/generate_changelog.py +157 -0
scripts/generate_crca_core_docs_from_schemas.py +86 -0
scripts/run_crca_core_benchmarks.py +163 -0
scripts/run_full_finetune.py +198 -0
scripts/run_lrm_eval.py +31 -0
templates/graph_management.py +29 -0
tests/conftest.py +9 -0
tests/test_core.py +2 -3
tests/test_crca_core_discovery_tabular.py +15 -0
tests/test_crca_core_estimate_dowhy.py +36 -0
tests/test_crca_core_identify.py +18 -0
tests/test_crca_core_intervention_design.py +36 -0
tests/test_crca_core_linear_gaussian_scm.py +69 -0
tests/test_crca_core_spec.py +25 -0
tests/test_crca_core_timeseries_pcmci.py +15 -0
tests/test_crca_llm_coauthor.py +12 -0
tests/test_crca_llm_orchestrator.py +80 -0
tests/test_hybrid_agent_llm_enhanced.py +556 -0
tests/test_image_annotation_demo.py +376 -0
tests/test_image_annotation_operational.py +408 -0
tests/test_image_annotation_unit.py +551 -0
tests/test_training_moe.py +13 -0
training/__init__.py +42 -0
training/datasets.py +140 -0
training/deepspeed_zero2_0_5b.json +22 -0
training/deepspeed_zero2_1_5b.json +22 -0
training/deepspeed_zero3_0_5b.json +28 -0
training/deepspeed_zero3_14b.json +28 -0
training/deepspeed_zero3_h100_3gpu.json +20 -0
training/deepspeed_zero3_offload.json +28 -0
training/eval.py +92 -0
training/finetune.py +516 -0
training/public_datasets.py +89 -0
training_data/react_train.jsonl +7473 -0
utils/agent_discovery.py +311 -0
utils/batch_processor.py +317 -0
utils/conversation.py +78 -0
utils/edit_distance.py +118 -0
utils/formatter.py +33 -0
utils/graph_reasoner.py +530 -0
utils/rate_limiter.py +283 -0
utils/router.py +2 -2
utils/tool_discovery.py +307 -0
webui/__init__.py +10 -0
webui/app.py +229 -0
webui/config.py +104 -0
webui/static/css/style.css +332 -0
webui/static/js/main.js +284 -0
webui/templates/index.html +42 -0
tests/test_crca_excel.py +0 -166
tests/test_data_broker.py +0 -424
tests/test_palantir.py +0 -349
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0

architecture/hybrid/text_corrector.py ADDED Viewed

@@ -0,0 +1,341 @@
+"""
+Non-destructive text correction pipeline for CRCA hybrid agent.
+Provides three-layer correction:
+1. Orthographic normalization: Spelling errors
+2. Informal compression handling: Abbreviations, shortcuts
+3. Grammar recovery: Recover structure, not polish
+All corrections are non-destructive - original form is preserved
+with confidence scores and provenance.
+"""
+from typing import Dict, List, Optional, Tuple, Any
+import logging
+import re
+from schemas.hybrid import AnnotatedToken, DependencyTree
+from utils.edit_distance import find_closest_match, damerau_levenshtein_distance
+logger = logging.getLogger(__name__)
+class TextCorrector:
+    """
+    Non-destructive text corrector with three correction layers.
+    Correction produces annotated tokens with:
+    - original_form: What user typed
+    - normalized_form: Corrected version
+    - confidence: Correction confidence
+    - correction_type: Type of correction
+    - provenance: Why correction was made
+    - metadata: Optional dictionary information
+    """
+    def __init__(
+        self,
+        vocabulary: Optional[List[str]] = None,
+        lexical_compiler: Optional[Any] = None
+    ):
+        """
+        Initialize text corrector.
+        Args:
+            vocabulary: Optional vocabulary list for spelling correction
+            lexical_compiler: Optional LexicalCompiler instance for dictionary lookups
+        """
+        self.vocabulary = set(vocabulary) if vocabulary else set()
+        self.lexical_compiler = lexical_compiler
+        self.abbreviations = self._build_abbreviation_dict()
+        self.common_words = self._build_common_words()
+    def _build_abbreviation_dict(self) -> Dict[str, str]:
+        """Build dictionary of common abbreviations."""
+        return {
+            "depnds": "depends",
+            "demnad": "demand",
+            "prce": "price",
+            "qulity": "quality",
+            "affcts": "affects",
+            "influnces": "influences",
+            "causes": "causes",  # Already correct, but included for completeness
+        }
+    def _build_common_words(self) -> set:
+        """Build set of common English words."""
+        return {
+            "the", "a", "an", "and", "or", "but", "if", "then", "when",
+            "what", "which", "how", "why", "where", "who",
+            "is", "are", "was", "were", "be", "been", "being",
+            "have", "has", "had", "do", "does", "did",
+            "will", "would", "could", "should", "may", "might",
+            "cause", "causes", "effect", "effects", "affect", "affects",
+            "influence", "influences", "depend", "depends", "lead", "leads",
+            "result", "results", "impact", "impacts", "determine", "determines"
+        }
+    def correct_orthographic(self, text: str) -> List[AnnotatedToken]:
+        """
+        Correct orthographic (spelling) errors.
+        Args:
+            text: Text to correct
+        Returns:
+            List of AnnotatedToken objects
+        """
+        tokens = text.split()
+        corrected_tokens = []
+        for token in tokens:
+            # Clean token (remove punctuation for matching)
+            clean_token = re.sub(r'[^\w]', '', token.lower())
+            # Check if token is already correct
+            if clean_token in self.common_words or clean_token in self.vocabulary:
+                corrected_tokens.append(AnnotatedToken(
+                    original_form=token,
+                    normalized_form=token,
+                    confidence=1.0,
+                    correction_type="none"
+                ))
+                continue
+            # Try to find closest match
+            candidates = list(self.common_words) + list(self.vocabulary)
+            match, distance = find_closest_match(clean_token, candidates, max_distance=2)
+            # If dictionary is available, also check if word is valid
+            word_info = None
+            if self.lexical_compiler and self.lexical_compiler.enable_dictionary:
+                word_info = self.lexical_compiler.get_word_info(clean_token)
+                if word_info and word_info.get('found'):
+                    # Word is valid, no correction needed
+                    match = clean_token
+                    distance = 0
+            if match and distance <= 2:
+                # Calculate confidence based on distance
+                confidence = max(0.0, 1.0 - (distance / 3.0))
+                # Preserve original capitalization if possible
+                normalized = match
+                if token[0].isupper():
+                    normalized = match.capitalize()
+                # Store dictionary info in metadata if available
+                metadata = None
+                if word_info:
+                    metadata = word_info
+                corrected_tokens.append(AnnotatedToken(
+                    original_form=token,
+                    normalized_form=normalized,
+                    confidence=confidence,
+                    correction_type="spelling" if distance > 0 else "none",
+                    provenance=f"Edit distance: {distance}" if distance > 0 else "Valid word (dictionary verified)",
+                    metadata=metadata
+                ))
+            else:
+                # No correction found, but check dictionary to see if it's a valid word
+                if self.lexical_compiler and self.lexical_compiler.enable_dictionary:
+                    word_info = self.lexical_compiler.get_word_info(clean_token)
+                    if word_info and word_info.get('found'):
+                        # Word is valid, just not in our local vocabulary
+                        metadata = word_info
+                        corrected_tokens.append(AnnotatedToken(
+                            original_form=token,
+                            normalized_form=token,
+                            confidence=0.8,  # Medium confidence - valid word but not in local vocab
+                            correction_type="none",
+                            provenance="Valid word (dictionary verified)",
+                            metadata=metadata
+                        ))
+                    else:
+                        # Word not found in dictionary either
+                        corrected_tokens.append(AnnotatedToken(
+                            original_form=token,
+                            normalized_form=token,
+                            confidence=0.3,  # Low confidence - unknown word
+                            correction_type="none",
+                            provenance="Unknown word, not found in dictionary"
+                        ))
+                else:
+                    # No dictionary available
+                    corrected_tokens.append(AnnotatedToken(
+                        original_form=token,
+                        normalized_form=token,
+                        confidence=0.5,  # Low confidence - unknown word
+                        correction_type="none",
+                        provenance="Unknown word, no correction found"
+                    ))
+        return corrected_tokens
+    def expand_abbreviations(self, tokens: List[AnnotatedToken]) -> List[AnnotatedToken]:
+        """
+        Expand abbreviations and shortcuts.
+        Args:
+            tokens: List of AnnotatedToken objects
+        Returns:
+            List of AnnotatedToken objects with abbreviations expanded
+        """
+        expanded_tokens = []
+        for token in tokens:
+            original = token.original_form.lower()
+            # Check abbreviation dictionary
+            if original in self.abbreviations:
+                expanded = self.abbreviations[original]
+                # Preserve original capitalization
+                if token.original_form[0].isupper():
+                    expanded = expanded.capitalize()
+                expanded_tokens.append(AnnotatedToken(
+                    original_form=token.original_form,
+                    normalized_form=expanded,
+                    confidence=0.9,
+                    correction_type="abbreviation",
+                    provenance=f"Abbreviation expansion: {original} -> {expanded}"
+                ))
+            else:
+                # No abbreviation found, keep token as-is
+                expanded_tokens.append(token)
+        return expanded_tokens
+    def recover_grammar(self, tokens: List[AnnotatedToken]) -> DependencyTree:
+        """
+        Recover grammar structure from tokens (not polish, just structure).
+        Args:
+            tokens: List of AnnotatedToken objects
+        Returns:
+            DependencyTree representing recovered structure
+        """
+        words = [token.normalized_form for token in tokens]
+        # Simple dependency recovery
+        # In full implementation, would use more sophisticated parsing
+        nodes = words
+        edges = []
+        root = None
+        if len(words) >= 2:
+            # Look for verb
+            verbs = ["is", "are", "was", "were", "causes", "affects", "influences", "depends"]
+            verb_idx = None
+            for i, word in enumerate(words):
+                if word.lower() in verbs:
+                    verb_idx = i
+                    break
+            if verb_idx is not None:
+                root = words[verb_idx]
+                # Subject is before verb
+                if verb_idx > 0:
+                    edges.append((words[verb_idx - 1], words[verb_idx], "nsubj"))
+                # Object is after verb
+                if verb_idx < len(words) - 1:
+                    edges.append((words[verb_idx], words[verb_idx + 1], "dobj"))
+        if root is None and words:
+            root = words[0]
+        return DependencyTree(nodes=nodes, edges=edges, root=root)
+    def correct_text(
+        self,
+        text: str,
+        use_abbreviation_expansion: bool = True,
+        use_grammar_recovery: bool = True
+    ) -> Dict[str, Any]:
+        """
+        Complete correction pipeline.
+        Args:
+            text: Text to correct
+            use_abbreviation_expansion: Whether to expand abbreviations
+            use_grammar_recovery: Whether to recover grammar structure
+        Returns:
+            Dictionary with corrected tokens and dependency tree
+        """
+        # Step 1: Orthographic correction
+        tokens = self.correct_orthographic(text)
+        # Step 2: Abbreviation expansion
+        if use_abbreviation_expansion:
+            tokens = self.expand_abbreviations(tokens)
+        # Step 3: Grammar recovery
+        dependency_tree = None
+        if use_grammar_recovery:
+            dependency_tree = self.recover_grammar(tokens)
+        return {
+            "original_text": text,
+            "corrected_tokens": tokens,
+            "corrected_text": " ".join([t.normalized_form for t in tokens]),
+            "dependency_tree": dependency_tree,
+            "confidence": min([t.confidence for t in tokens]) if tokens else 1.0
+        }
+    def disambiguate_with_graph(
+        self,
+        tokens: List[AnnotatedToken],
+        graph_manager: Any,  # GraphManager instance
+        expected_pattern: Optional[str] = None
+    ) -> List[AnnotatedToken]:
+        """
+        Use graph structure for context-aware disambiguation.
+        Args:
+            tokens: List of AnnotatedToken objects
+            graph_manager: GraphManager instance for graph structure
+            expected_pattern: Optional expected pattern (e.g., "depends on")
+        Returns:
+            List of disambiguated AnnotatedToken objects
+        """
+        disambiguated = []
+        # Get variables from graph
+        graph_variables = set(graph_manager.get_nodes())
+        for token in tokens:
+            normalized = token.normalized_form.lower()
+            # If token matches a graph variable, increase confidence
+            if normalized in {v.lower() for v in graph_variables}:
+                disambiguated.append(AnnotatedToken(
+                    original_form=token.original_form,
+                    normalized_form=token.normalized_form,
+                    confidence=min(1.0, token.confidence + 0.2),  # Boost confidence
+                    correction_type=token.correction_type,
+                    provenance=f"{token.provenance}; Graph variable match"
+                ))
+            else:
+                # Try to find closest graph variable
+                if graph_variables:
+                    match, distance = find_closest_match(normalized, list(graph_variables), max_distance=2)
+                    if match and distance <= 2:
+                        disambiguated.append(AnnotatedToken(
+                            original_form=token.original_form,
+                            normalized_form=match,
+                            confidence=0.7,
+                            correction_type="graph_disambiguation",
+                            provenance=f"Matched to graph variable '{match}' (distance: {distance})"
+                        ))
+                    else:
+                        disambiguated.append(token)
+                else:
+                    disambiguated.append(token)
+        return disambiguated

benchmark_results/crca_core_benchmarks.json ADDED Viewed

@@ -0,0 +1,178 @@
+{
+  "benchmarks": [
+    {
+      "result_type": "BenchmarkResult",
+      "benchmark": "linear_gaussian_chain",
+      "provenance": {
+        "run_id": "116d10c6-dd8a-4bd1-a19b-d1dde4192c23",
+        "timestamp_utc": "2026-01-24T00:15:54.840013+00:00",
+        "spec_hash": "ef1c101c6e1f853994f418127f20b81ace7f4a5dfd4ad1f75011c0d7612221ad",
+        "data_hash": null,
+        "library_versions": {
+          "python": "3.14.2",
+          "platform": "Windows-10-10.0.19044-SP0"
+        },
+        "random_seeds": {
+          "numpy": 1
+        },
+        "algorithm_config": {
+          "benchmark": "linear_gaussian_chain"
+        },
+        "hardware_notes": null
+      },
+      "metrics": {
+        "abduction_max_abs_error": 1.1102230246251565e-16
+      },
+      "artifacts": {
+        "factual": {
+          "X0": 0.345584192064786,
+          "X1": 1.1326439163594657,
+          "X2": 1.3498166009069061,
+          "X3": -0.08832229078814535
+        },
+        "counterfactual": {
+          "X0": 1.345584192064786,
+          "X1": 2.032643916359466,
+          "X2": 2.1598166009069066,
+          "X3": 0.6406777092118552
+        },
+        "notes": [
+          "This benchmark checks abduction correctness (noise recovery) under full observability.",
+          "Counterfactual uses abduction\u2013action\u2013prediction with fixed exogenous noise."
+        ]
+      }
+    },
+    {
+      "result_type": "BenchmarkResult",
+      "benchmark": "identification",
+      "provenance": {
+        "run_id": "f7724b4a-e289-45b4-8678-7b25f53c634d",
+        "timestamp_utc": "2026-01-24T00:15:54.841337+00:00",
+        "spec_hash": "5993a3988c56bf780cdf380e1d583aef633b4984d92c47af74a20aeacdc7c312",
+        "data_hash": null,
+        "library_versions": {
+          "python": "3.14.2",
+          "platform": "Windows-10-10.0.19044-SP0"
+        },
+        "random_seeds": {},
+        "algorithm_config": {},
+        "hardware_notes": null
+      },
+      "metrics": {},
+      "artifacts": {
+        "identifiable_case": {
+          "result_type": "IdentificationResult",
+          "provenance": {
+            "run_id": "89d3c7f1-1192-4810-882e-26178d164cea",
+            "timestamp_utc": "2026-01-24T00:15:54.840274+00:00",
+            "spec_hash": "ac1a7f2ad5f6987c2bd5bcab24ed0e00a9fe91cc58f826aedd347a1c4afe7485",
+            "data_hash": null,
+            "library_versions": {
+              "python": "3.14.2",
+              "platform": "Windows-10-10.0.19044-SP0"
+            },
+            "random_seeds": {},
+            "algorithm_config": {},
+            "hardware_notes": null
+          },
+          "assumptions": [],
+          "limitations": [],
+          "artifacts": {},
+          "method": "backdoor",
+          "estimand_expression": "sum_{z} P(Y|X,z) P(z)",
+          "assumptions_used": [
+            "Backdoor criterion holds with the returned adjustment set.",
+            "No unmeasured confounding conditional on Z.",
+            "Positivity/overlap for adjustment set."
+          ],
+          "witnesses": {
+            "adjustment_set": []
+          }
+        },
+        "latent_confounder_case": {
+          "result_type": "IdentificationResult",
+          "provenance": {
+            "run_id": "eede5b84-87b3-440f-8bbd-806414da1c65",
+            "timestamp_utc": "2026-01-24T00:15:54.841222+00:00",
+            "spec_hash": "4c10d99fa36dab1db0d2c447e4ca53ae3c97ae5417e822bd581519a817ad1849",
+            "data_hash": null,
+            "library_versions": {
+              "python": "3.14.2",
+              "platform": "Windows-10-10.0.19044-SP0"
+            },
+            "random_seeds": {},
+            "algorithm_config": {},
+            "hardware_notes": null
+          },
+          "assumptions": [],
+          "limitations": [],
+          "artifacts": {},
+          "method": "backdoor",
+          "estimand_expression": "sum_{z} P(Y|X,z) P(z)",
+          "assumptions_used": [
+            "Backdoor criterion holds with the returned adjustment set.",
+            "No unmeasured confounding conditional on Z.",
+            "Positivity/overlap for adjustment set."
+          ],
+          "witnesses": {
+            "adjustment_set": []
+          }
+        }
+      }
+    },
+    {
+      "result_type": "BenchmarkResult",
+      "benchmark": "discovery",
+      "provenance": {
+        "run_id": "e981afd2-56be-4191-be89-2d5ce369ffb4",
+        "timestamp_utc": "2026-01-24T00:15:55.601239+00:00",
+        "spec_hash": "221827b0c5dee7dca1d7b1ad0bdf4781ad8b225bc469e8c86e7d0ce5601a19c1",
+        "data_hash": null,
+        "library_versions": {
+          "python": "3.14.2",
+          "platform": "Windows-10-10.0.19044-SP0"
+        },
+        "random_seeds": {},
+        "algorithm_config": {},
+        "hardware_notes": null
+      },
+      "metrics": {},
+      "artifacts": {
+        "tabular": {
+          "result_type": "Refusal",
+          "reason_codes": [
+            "UNSUPPORTED_OPERATION"
+          ],
+          "message": "Tabular causal discovery backend not available.",
+          "checklist": [
+            {
+              "item": "Install causal-learn",
+              "rationale": "Tabular discovery is wrap-first; we refuse rather than run unvalidated heuristics."
+            }
+          ],
+          "suggested_next_steps": [
+            "pip install causal-learn"
+          ],
+          "details": null
+        },
+        "timeseries": {
+          "result_type": "Refusal",
+          "reason_codes": [
+            "UNSUPPORTED_OPERATION"
+          ],
+          "message": "Time-series causal discovery backend (tigramite) not available.",
+          "checklist": [
+            {
+              "item": "Install tigramite",
+              "rationale": "PCMCI/PCMCI+ discovery is wrap-first; we refuse rather than run unvalidated heuristics."
+            }
+          ],
+          "suggested_next_steps": [
+            "pip install tigramite"
+          ],
+          "details": null
+        }
+      }
+    }
+  ]
+}

branches/crca_sd/crca_sd_realtime.py CHANGED Viewed

@@ -934,7 +934,10 @@ class SafetyInterlocks:
     def __init__(
         self,
-        max_budget_change: float = 0.20,  # 20% max change per period
+        # NOTE: Budget-share changes are measured using L1 distance over the simplex.
+        # A 30% shift between two categories yields an L1 distance of 0.60.
+        # The default is intentionally permissive; major changes are gated by approval.
+        max_budget_change: float = 0.60,
         major_change_threshold: float = 0.10,  # 10% = major change
         confidence_threshold: float = 0.95,  # 95% confidence required
     ) -> None:
@@ -1004,7 +1007,8 @@ class SafetyInterlocks:
         total_change = sum(budget_change.values())
-        if total_change > self.max_budget_change:
+        # Allow tiny floating-point error at the threshold.
+        if (total_change - self.max_budget_change) > 1e-12:
             return False, f"Budget change {total_change:.2%} exceeds limit {self.max_budget_change:.2%}", True
         return True, "Within rate limits", False

branches/general_agent/__init__.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""
+General Agent Module
+A general-purpose conversational agent (GPT-style) that can handle diverse tasks,
+use tools, and optionally access other specialized CRCA agents via AOP/router integration.
+"""
+from typing import Optional
+__version__ = "0.1.0"
+# Lazy imports
+try:
+    from branches.general_agent.general_agent import GeneralAgent, GeneralAgentConfig
+    GENERAL_AGENT_AVAILABLE = True
+except ImportError as e:
+    GeneralAgent = None
+    GeneralAgentConfig = None
+    GENERAL_AGENT_AVAILABLE = False
+    _import_error = e
+try:
+    from branches.general_agent.personality import (
+        Personality,
+        get_personality,
+        create_custom_personality,
+        list_personalities,
+        PERSONALITIES,
+    )
+    PERSONALITY_AVAILABLE = True
+except ImportError:
+    Personality = None
+    get_personality = None
+    create_custom_personality = None
+    list_personalities = None
+    PERSONALITIES = {}
+    PERSONALITY_AVAILABLE = False
+__all__ = [
+    "GeneralAgent",
+    "GeneralAgentConfig",
+    "Personality",
+    "get_personality",
+    "create_custom_personality",
+    "list_personalities",
+    "PERSONALITIES",
+    "get_general_agent",
+    "create_agent",
+    "GENERAL_AGENT_AVAILABLE",
+    "PERSONALITY_AVAILABLE",
+    "__version__",
+]
+def get_general_agent(**kwargs) -> Optional["GeneralAgent"]:
+    """
+    Get GeneralAgent instance (simple factory function).
+    Simplest usage:
+        agent = get_general_agent()
+        agent = get_general_agent(model_name="gpt-4o")
+        agent = get_general_agent(personality="friendly")
+    Args:
+        **kwargs: Arguments to pass to GeneralAgent constructor
+    Returns:
+        GeneralAgent instance or None if not available
+    """
+    if not GENERAL_AGENT_AVAILABLE:
+        if '_import_error' in globals():
+            raise ImportError(f"GeneralAgent not available: {_import_error}")
+        return None
+    try:
+        return GeneralAgent(**kwargs)
+    except Exception as e:
+        raise RuntimeError(f"Failed to create GeneralAgent: {e}") from e
+def create_agent(
+    model_name: Optional[str] = None,
+    personality: Optional[str] = None,
+    **kwargs
+) -> Optional["GeneralAgent"]:
+    """
+    Ultra-simple agent creation function.
+    Usage:
+        agent = create_agent()
+        agent = create_agent("gpt-4o")
+        agent = create_agent("gpt-4o", "friendly")
+    Args:
+        model_name: LLM model name (optional)
+        personality: Personality name (optional)
+        **kwargs: Additional parameters
+    Returns:
+        GeneralAgent instance
+    """
+    return get_general_agent(model_name=model_name, personality=personality, **kwargs)

crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

crca 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl