PyPI - crca - Versions diffs - 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

crca 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (306) hide show

CRCA.py +172 -7
MODEL_CARD.md +53 -0
PKG-INFO +8 -2
RELEASE_NOTES.md +17 -0
STABILITY.md +19 -0
architecture/hybrid/consistency_engine.py +362 -0
architecture/hybrid/conversation_manager.py +421 -0
architecture/hybrid/explanation_generator.py +452 -0
architecture/hybrid/few_shot_learner.py +533 -0
architecture/hybrid/graph_compressor.py +286 -0
architecture/hybrid/hybrid_agent.py +4398 -0
architecture/hybrid/language_compiler.py +623 -0
architecture/hybrid/main,py +0 -0
architecture/hybrid/reasoning_tracker.py +322 -0
architecture/hybrid/self_verifier.py +524 -0
architecture/hybrid/task_decomposer.py +567 -0
architecture/hybrid/text_corrector.py +341 -0
benchmark_results/crca_core_benchmarks.json +178 -0
branches/crca_sd/crca_sd_realtime.py +6 -2
branches/general_agent/__init__.py +102 -0
branches/general_agent/general_agent.py +1400 -0
branches/general_agent/personality.py +169 -0
branches/general_agent/utils/__init__.py +19 -0
branches/general_agent/utils/prompt_builder.py +170 -0
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
crca_core/__init__.py +35 -0
crca_core/benchmarks/__init__.py +14 -0
crca_core/benchmarks/synthetic_scm.py +103 -0
crca_core/core/__init__.py +23 -0
crca_core/core/api.py +120 -0
crca_core/core/estimate.py +208 -0
crca_core/core/godclass.py +72 -0
crca_core/core/intervention_design.py +174 -0
crca_core/core/lifecycle.py +48 -0
crca_core/discovery/__init__.py +9 -0
crca_core/discovery/tabular.py +193 -0
crca_core/identify/__init__.py +171 -0
crca_core/identify/backdoor.py +39 -0
crca_core/identify/frontdoor.py +48 -0
crca_core/identify/graph.py +106 -0
crca_core/identify/id_algorithm.py +43 -0
crca_core/identify/iv.py +48 -0
crca_core/models/__init__.py +67 -0
crca_core/models/provenance.py +56 -0
crca_core/models/refusal.py +39 -0
crca_core/models/result.py +83 -0
crca_core/models/spec.py +151 -0
crca_core/models/validation.py +68 -0
crca_core/scm/__init__.py +9 -0
crca_core/scm/linear_gaussian.py +198 -0
crca_core/timeseries/__init__.py +6 -0
crca_core/timeseries/pcmci.py +181 -0
crca_llm/__init__.py +12 -0
crca_llm/client.py +85 -0
crca_llm/coauthor.py +118 -0
crca_llm/orchestrator.py +289 -0
crca_llm/types.py +21 -0
crca_reasoning/__init__.py +16 -0
crca_reasoning/critique.py +54 -0
crca_reasoning/godclass.py +206 -0
crca_reasoning/memory.py +24 -0
crca_reasoning/rationale.py +10 -0
crca_reasoning/react_controller.py +81 -0
crca_reasoning/tool_router.py +97 -0
crca_reasoning/types.py +40 -0
crca_sd/__init__.py +15 -0
crca_sd/crca_sd_core.py +2 -0
crca_sd/crca_sd_governance.py +2 -0
crca_sd/crca_sd_mpc.py +2 -0
crca_sd/crca_sd_realtime.py +2 -0
crca_sd/crca_sd_tui.py +2 -0
cuda-keyring_1.1-1_all.deb +0 -0
cuda-keyring_1.1-1_all.deb.1 +0 -0
docs/IMAGE_ANNOTATION_USAGE.md +539 -0
docs/INSTALL_DEEPSPEED.md +125 -0
docs/api/branches/crca-cg.md +19 -0
docs/api/branches/crca-q.md +27 -0
docs/api/branches/crca-sd.md +37 -0
docs/api/branches/general-agent.md +24 -0
docs/api/branches/overview.md +19 -0
docs/api/crca/agent-methods.md +62 -0
docs/api/crca/operations.md +79 -0
docs/api/crca/overview.md +32 -0
docs/api/image-annotation/engine.md +52 -0
docs/api/image-annotation/overview.md +17 -0
docs/api/schemas/annotation.md +34 -0
docs/api/schemas/core-schemas.md +82 -0
docs/api/schemas/overview.md +32 -0
docs/api/schemas/policy.md +30 -0
docs/api/utils/conversation.md +22 -0
docs/api/utils/graph-reasoner.md +32 -0
docs/api/utils/overview.md +21 -0
docs/api/utils/router.md +19 -0
docs/api/utils/utilities.md +97 -0
docs/architecture/causal-graphs.md +41 -0
docs/architecture/data-flow.md +29 -0
docs/architecture/design-principles.md +33 -0
docs/architecture/hybrid-agent/components.md +38 -0
docs/architecture/hybrid-agent/consistency.md +26 -0
docs/architecture/hybrid-agent/overview.md +44 -0
docs/architecture/hybrid-agent/reasoning.md +22 -0
docs/architecture/llm-integration.md +26 -0
docs/architecture/modular-structure.md +37 -0
docs/architecture/overview.md +69 -0
docs/architecture/policy-engine-arch.md +29 -0
docs/branches/crca-cg/corposwarm.md +39 -0
docs/branches/crca-cg/esg-scoring.md +30 -0
docs/branches/crca-cg/multi-agent.md +35 -0
docs/branches/crca-cg/overview.md +40 -0
docs/branches/crca-q/alternative-data.md +55 -0
docs/branches/crca-q/architecture.md +71 -0
docs/branches/crca-q/backtesting.md +45 -0
docs/branches/crca-q/causal-engine.md +33 -0
docs/branches/crca-q/execution.md +39 -0
docs/branches/crca-q/market-data.md +60 -0
docs/branches/crca-q/overview.md +58 -0
docs/branches/crca-q/philosophy.md +60 -0
docs/branches/crca-q/portfolio-optimization.md +66 -0
docs/branches/crca-q/risk-management.md +102 -0
docs/branches/crca-q/setup.md +65 -0
docs/branches/crca-q/signal-generation.md +61 -0
docs/branches/crca-q/signal-validation.md +43 -0
docs/branches/crca-sd/core.md +84 -0
docs/branches/crca-sd/governance.md +53 -0
docs/branches/crca-sd/mpc-solver.md +65 -0
docs/branches/crca-sd/overview.md +59 -0
docs/branches/crca-sd/realtime.md +28 -0
docs/branches/crca-sd/tui.md +20 -0
docs/branches/general-agent/overview.md +37 -0
docs/branches/general-agent/personality.md +36 -0
docs/branches/general-agent/prompt-builder.md +30 -0
docs/changelog/index.md +79 -0
docs/contributing/code-style.md +69 -0
docs/contributing/documentation.md +43 -0
docs/contributing/overview.md +29 -0
docs/contributing/testing.md +29 -0
docs/core/crcagent/async-operations.md +65 -0
docs/core/crcagent/automatic-extraction.md +107 -0
docs/core/crcagent/batch-prediction.md +80 -0
docs/core/crcagent/bayesian-inference.md +60 -0
docs/core/crcagent/causal-graph.md +92 -0
docs/core/crcagent/counterfactuals.md +96 -0
docs/core/crcagent/deterministic-simulation.md +78 -0
docs/core/crcagent/dual-mode-operation.md +82 -0
docs/core/crcagent/initialization.md +88 -0
docs/core/crcagent/optimization.md +65 -0
docs/core/crcagent/overview.md +63 -0
docs/core/crcagent/time-series.md +57 -0
docs/core/schemas/annotation.md +30 -0
docs/core/schemas/core-schemas.md +82 -0
docs/core/schemas/overview.md +30 -0
docs/core/schemas/policy.md +41 -0
docs/core/templates/base-agent.md +31 -0
docs/core/templates/feature-mixins.md +31 -0
docs/core/templates/overview.md +29 -0
docs/core/templates/templates-guide.md +75 -0
docs/core/tools/mcp-client.md +34 -0
docs/core/tools/overview.md +24 -0
docs/core/utils/conversation.md +27 -0
docs/core/utils/graph-reasoner.md +29 -0
docs/core/utils/overview.md +27 -0
docs/core/utils/router.md +27 -0
docs/core/utils/utilities.md +97 -0
docs/css/custom.css +84 -0
docs/examples/basic-usage.md +57 -0
docs/examples/general-agent/general-agent-examples.md +50 -0
docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
docs/examples/image-annotation/image-annotation-examples.md +54 -0
docs/examples/integration/integration-examples.md +58 -0
docs/examples/overview.md +37 -0
docs/examples/trading/trading-examples.md +46 -0
docs/features/causal-reasoning/advanced-topics.md +101 -0
docs/features/causal-reasoning/counterfactuals.md +43 -0
docs/features/causal-reasoning/do-calculus.md +50 -0
docs/features/causal-reasoning/overview.md +47 -0
docs/features/causal-reasoning/structural-models.md +52 -0
docs/features/hybrid-agent/advanced-components.md +55 -0
docs/features/hybrid-agent/core-components.md +64 -0
docs/features/hybrid-agent/overview.md +34 -0
docs/features/image-annotation/engine.md +82 -0
docs/features/image-annotation/features.md +113 -0
docs/features/image-annotation/integration.md +75 -0
docs/features/image-annotation/overview.md +53 -0
docs/features/image-annotation/quickstart.md +73 -0
docs/features/policy-engine/doctrine-ledger.md +105 -0
docs/features/policy-engine/monitoring.md +44 -0
docs/features/policy-engine/mpc-control.md +89 -0
docs/features/policy-engine/overview.md +46 -0
docs/getting-started/configuration.md +225 -0
docs/getting-started/first-agent.md +164 -0
docs/getting-started/installation.md +144 -0
docs/getting-started/quickstart.md +137 -0
docs/index.md +118 -0
docs/js/mathjax.js +13 -0
docs/lrm/discovery_proof_notes.md +25 -0
docs/lrm/finetune_full.md +83 -0
docs/lrm/math_appendix.md +120 -0
docs/lrm/overview.md +32 -0
docs/mkdocs.yml +238 -0
docs/stylesheets/extra.css +21 -0
docs_generated/crca_core/CounterfactualResult.md +12 -0
docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
docs_generated/crca_core/DraftSpec.md +13 -0
docs_generated/crca_core/EstimateResult.md +13 -0
docs_generated/crca_core/IdentificationResult.md +17 -0
docs_generated/crca_core/InterventionDesignResult.md +12 -0
docs_generated/crca_core/LockedSpec.md +15 -0
docs_generated/crca_core/RefusalResult.md +12 -0
docs_generated/crca_core/ValidationReport.md +9 -0
docs_generated/crca_core/index.md +13 -0
examples/general_agent_example.py +277 -0
examples/general_agent_quickstart.py +202 -0
examples/general_agent_simple.py +92 -0
examples/hybrid_agent_auto_extraction.py +84 -0
examples/hybrid_agent_dictionary_demo.py +104 -0
examples/hybrid_agent_enhanced.py +179 -0
examples/hybrid_agent_general_knowledge.py +107 -0
examples/image_annotation_quickstart.py +328 -0
examples/test_hybrid_fixes.py +77 -0
image_annotation/__init__.py +27 -0
image_annotation/annotation_engine.py +2593 -0
install_cuda_wsl2.sh +59 -0
install_deepspeed.sh +56 -0
install_deepspeed_simple.sh +87 -0
mkdocs.yml +252 -0
ollama/Modelfile +8 -0
prompts/__init__.py +2 -1
prompts/default_crca.py +9 -1
prompts/general_agent.py +227 -0
prompts/image_annotation.py +56 -0
pyproject.toml +17 -2
requirements-docs.txt +10 -0
requirements.txt +21 -2
schemas/__init__.py +26 -1
schemas/annotation.py +222 -0
schemas/conversation.py +193 -0
schemas/hybrid.py +211 -0
schemas/reasoning.py +276 -0
schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
schemas_export/crca_core/DraftSpec.schema.json +635 -0
schemas_export/crca_core/EstimateResult.schema.json +113 -0
schemas_export/crca_core/IdentificationResult.schema.json +145 -0
schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
schemas_export/crca_core/LockedSpec.schema.json +646 -0
schemas_export/crca_core/RefusalResult.schema.json +90 -0
schemas_export/crca_core/ValidationReport.schema.json +62 -0
scripts/build_lrm_dataset.py +80 -0
scripts/export_crca_core_schemas.py +54 -0
scripts/export_hf_lrm.py +37 -0
scripts/export_ollama_gguf.py +45 -0
scripts/generate_changelog.py +157 -0
scripts/generate_crca_core_docs_from_schemas.py +86 -0
scripts/run_crca_core_benchmarks.py +163 -0
scripts/run_full_finetune.py +198 -0
scripts/run_lrm_eval.py +31 -0
templates/graph_management.py +29 -0
tests/conftest.py +9 -0
tests/test_core.py +2 -3
tests/test_crca_core_discovery_tabular.py +15 -0
tests/test_crca_core_estimate_dowhy.py +36 -0
tests/test_crca_core_identify.py +18 -0
tests/test_crca_core_intervention_design.py +36 -0
tests/test_crca_core_linear_gaussian_scm.py +69 -0
tests/test_crca_core_spec.py +25 -0
tests/test_crca_core_timeseries_pcmci.py +15 -0
tests/test_crca_llm_coauthor.py +12 -0
tests/test_crca_llm_orchestrator.py +80 -0
tests/test_hybrid_agent_llm_enhanced.py +556 -0
tests/test_image_annotation_demo.py +376 -0
tests/test_image_annotation_operational.py +408 -0
tests/test_image_annotation_unit.py +551 -0
tests/test_training_moe.py +13 -0
training/__init__.py +42 -0
training/datasets.py +140 -0
training/deepspeed_zero2_0_5b.json +22 -0
training/deepspeed_zero2_1_5b.json +22 -0
training/deepspeed_zero3_0_5b.json +28 -0
training/deepspeed_zero3_14b.json +28 -0
training/deepspeed_zero3_h100_3gpu.json +20 -0
training/deepspeed_zero3_offload.json +28 -0
training/eval.py +92 -0
training/finetune.py +516 -0
training/public_datasets.py +89 -0
training_data/react_train.jsonl +7473 -0
utils/agent_discovery.py +311 -0
utils/batch_processor.py +317 -0
utils/conversation.py +78 -0
utils/edit_distance.py +118 -0
utils/formatter.py +33 -0
utils/graph_reasoner.py +530 -0
utils/rate_limiter.py +283 -0
utils/router.py +2 -2
utils/tool_discovery.py +307 -0
webui/__init__.py +10 -0
webui/app.py +229 -0
webui/config.py +104 -0
webui/static/css/style.css +332 -0
webui/static/js/main.js +284 -0
webui/templates/index.html +42 -0
tests/test_crca_excel.py +0 -166
tests/test_data_broker.py +0 -424
tests/test_palantir.py +0 -349
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
{crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0

utils/agent_discovery.py ADDED Viewed

@@ -0,0 +1,311 @@
+"""
+Agent discovery utilities.
+Provides functionality for:
+- Auto-discovery of AOP instances
+- Auto-discovery of router instances
+- Agent listing and metadata
+- Route-first routing helpers
+"""
+import inspect
+import sys
+from typing import Any, Dict, List, Optional, Union
+from loguru import logger
+# Try to import AOP and Router
+try:
+    from utils.aop import AOP
+    AOP_AVAILABLE = True
+except ImportError:
+    AOP = None
+    AOP_AVAILABLE = False
+    logger.debug("AOP not available for agent discovery")
+try:
+    from utils.router import SwarmRouter
+    ROUTER_AVAILABLE = True
+except ImportError:
+    SwarmRouter = None
+    ROUTER_AVAILABLE = False
+    logger.debug("Router not available for agent discovery")
+def discover_aop_instances() -> List[Any]:
+    """Auto-discover AOP instances in the current runtime.
+    Searches for AOP instances in:
+    - Global namespace
+    - Module-level variables
+    - Active objects
+    Returns:
+        List of discovered AOP instances
+    """
+    if not AOP_AVAILABLE:
+        return []
+    instances = []
+    # Search in global namespace
+    try:
+        frame = sys._getframe(1)
+        while frame:
+            for name, obj in frame.f_globals.items():
+                if isinstance(obj, AOP):
+                    instances.append(obj)
+            frame = frame.f_back
+    except Exception as e:
+        logger.debug(f"Error discovering AOP instances from frames: {e}")
+    # Search in module-level variables (limited approach)
+    # This is a best-effort discovery
+    try:
+        import gc
+        for obj in gc.get_objects():
+            if isinstance(obj, AOP):
+                if obj not in instances:
+                    instances.append(obj)
+    except Exception as e:
+        logger.debug(f"Error discovering AOP instances from GC: {e}")
+    logger.debug(f"Discovered {len(instances)} AOP instance(s)")
+    return instances
+def discover_router_instances() -> List[Any]:
+    """Auto-discover router instances in the current runtime.
+    Searches for router instances in:
+    - Global namespace
+    - Module-level variables
+    - Active objects
+    Returns:
+        List of discovered router instances
+    """
+    if not ROUTER_AVAILABLE:
+        return []
+    instances = []
+    # Search in global namespace
+    try:
+        frame = sys._getframe(1)
+        while frame:
+            for name, obj in frame.f_globals.items():
+                if isinstance(obj, SwarmRouter):
+                    instances.append(obj)
+            frame = frame.f_back
+    except Exception as e:
+        logger.debug(f"Error discovering router instances from frames: {e}")
+    # Search in module-level variables
+    try:
+        import gc
+        for obj in gc.get_objects():
+            if isinstance(obj, SwarmRouter):
+                if obj not in instances:
+                    instances.append(obj)
+    except Exception as e:
+        logger.debug(f"Error discovering router instances from GC: {e}")
+    logger.debug(f"Discovered {len(instances)} router instance(s)")
+    return instances
+def get_agents_from_aop(aop_instance: Any) -> Dict[str, Any]:
+    """Get list of agents from an AOP instance.
+    Args:
+        aop_instance: AOP instance to query
+    Returns:
+        Dictionary mapping agent names to agent metadata
+    """
+    if not AOP_AVAILABLE or not isinstance(aop_instance, AOP):
+        return {}
+    agents = {}
+    try:
+        if hasattr(aop_instance, 'agents'):
+            for agent_name, agent in aop_instance.agents.items():
+                agents[agent_name] = {
+                    "name": agent_name,
+                    "type": type(agent).__name__,
+                    "description": getattr(agent, 'agent_description', 'No description'),
+                    "available": True,
+                }
+        if hasattr(aop_instance, 'tool_configs'):
+            for tool_name, config in aop_instance.tool_configs.items():
+                if tool_name not in agents:
+                    agents[tool_name] = {
+                        "name": tool_name,
+                        "type": "tool",
+                        "description": getattr(config, 'tool_description', 'No description'),
+                        "available": True,
+                    }
+    except Exception as e:
+        logger.error(f"Error getting agents from AOP instance: {e}")
+    return agents
+def get_agents_from_router(router_instance: Any) -> Dict[str, Any]:
+    """Get list of agents from a router instance.
+    Args:
+        router_instance: Router instance to query
+    Returns:
+        Dictionary mapping agent names to agent metadata
+    """
+    if not ROUTER_AVAILABLE or not isinstance(router_instance, SwarmRouter):
+        return {}
+    agents = {}
+    try:
+        if hasattr(router_instance, 'agents'):
+            for i, agent in enumerate(router_instance.agents):
+                agent_name = getattr(agent, 'agent_name', f"agent_{i}")
+                agents[agent_name] = {
+                    "name": agent_name,
+                    "type": type(agent).__name__,
+                    "description": getattr(agent, 'agent_description', 'No description'),
+                    "available": True,
+                }
+    except Exception as e:
+        logger.error(f"Error getting agents from router instance: {e}")
+    return agents
+def discover_all_agents(
+    aop_instances: Optional[List[Any]] = None,
+    router_instances: Optional[List[Any]] = None,
+) -> Dict[str, Any]:
+    """Discover all available agents from AOP and router instances.
+    Args:
+        aop_instances: Optional list of AOP instances (auto-discovered if None)
+        router_instances: Optional list of router instances (auto-discovered if None)
+    Returns:
+        Dictionary mapping agent names to agent metadata
+    """
+    all_agents = {}
+    # Discover AOP instances if not provided
+    if aop_instances is None:
+        aop_instances = discover_aop_instances()
+    # Discover router instances if not provided
+    if router_instances is None:
+        router_instances = discover_router_instances()
+    # Get agents from AOP instances
+    for aop_instance in aop_instances:
+        agents = get_agents_from_aop(aop_instance)
+        all_agents.update(agents)
+    # Get agents from router instances
+    for router_instance in router_instances:
+        agents = get_agents_from_router(router_instance)
+        all_agents.update(agents)
+    logger.info(f"Discovered {len(all_agents)} total agent(s)")
+    return all_agents
+def find_best_agent_for_task(
+    task: str,
+    available_agents: Dict[str, Any],
+    aop_instances: Optional[List[Any]] = None,
+    router_instances: Optional[List[Any]] = None,
+) -> Optional[tuple[str, Any, str]]:
+    """Find the best agent for a given task (route-first strategy).
+    Args:
+        task: Task description
+        available_agents: Dictionary of available agents
+        aop_instances: Optional list of AOP instances
+        router_instances: Optional list of router instances
+    Returns:
+        Tuple of (agent_name, agent_instance, source) or None if no suitable agent found
+        source is either 'aop' or 'router'
+    """
+    if not available_agents:
+        return None
+    # Simple keyword-based matching (can be enhanced with LLM-based routing)
+    task_lower = task.lower()
+    # Check for specialized agents first (route-first strategy)
+    for agent_name, agent_info in available_agents.items():
+        description = agent_info.get("description", "").lower()
+        agent_type = agent_info.get("type", "").lower()
+        # Simple matching logic
+        if any(keyword in description or keyword in agent_type for keyword in task_lower.split()):
+            # Try to get the actual agent instance
+            if aop_instances:
+                for aop in aop_instances:
+                    if hasattr(aop, 'agents') and agent_name in aop.agents:
+                        return (agent_name, aop.agents[agent_name], "aop")
+            if router_instances:
+                for router in router_instances:
+                    if hasattr(router, 'agents'):
+                        for agent in router.agents:
+                            if getattr(agent, 'agent_name', None) == agent_name:
+                                return (agent_name, agent, "router")
+    # If no match found, return None (fallback to direct handling)
+    return None
+def route_to_agent(
+    agent_name: str,
+    task: str,
+    aop_instances: Optional[List[Any]] = None,
+    router_instances: Optional[List[Any]] = None,
+) -> Optional[Any]:
+    """Route a task to a specific agent.
+    Args:
+        agent_name: Name of the agent to route to
+        task: Task to execute
+        aop_instances: Optional list of AOP instances
+        router_instances: Optional list of router instances
+    Returns:
+        Agent response or None if agent not found
+    """
+    # Discover instances if not provided
+    if aop_instances is None:
+        aop_instances = discover_aop_instances()
+    if router_instances is None:
+        router_instances = discover_router_instances()
+    # Try AOP instances first
+    for aop in aop_instances:
+        if hasattr(aop, 'agents') and agent_name in aop.agents:
+            agent = aop.agents[agent_name]
+            if hasattr(agent, 'run'):
+                return agent.run(task)
+    # Try router instances
+    for router in router_instances:
+        if hasattr(router, 'agents'):
+            for agent in router.agents:
+                if getattr(agent, 'agent_name', None) == agent_name:
+                    if hasattr(agent, 'run'):
+                        return agent.run(task)
+    logger.warning(f"Agent '{agent_name}' not found for routing")
+    return None

utils/batch_processor.py ADDED Viewed

@@ -0,0 +1,317 @@
+"""
+Batch processing utilities.
+Provides functionality for:
+- Parallel task execution
+- Batch rate limiting
+- Progress tracking
+- Error aggregation
+"""
+import asyncio
+import concurrent.futures
+import time
+from dataclasses import dataclass, field
+from typing import Any, Callable, Dict, List, Optional, Tuple
+from loguru import logger
+try:
+    from utils.rate_limiter import RateLimiter, RateLimitConfig
+    RATE_LIMITER_AVAILABLE = True
+except ImportError:
+    RATE_LIMITER_AVAILABLE = False
+    logger.debug("Rate limiter not available for batch processing")
+@dataclass
+class BatchResult:
+    """Result of a batch processing operation.
+    Attributes:
+        task_id: Task identifier
+        success: Whether task succeeded
+        result: Task result (if successful)
+        error: Error message (if failed)
+        execution_time: Time taken to execute task
+    """
+    task_id: str
+    success: bool
+    result: Any = None
+    error: Optional[str] = None
+    execution_time: float = 0.0
+@dataclass
+class BatchStats:
+    """Statistics for batch processing.
+    Attributes:
+        total_tasks: Total number of tasks
+        completed_tasks: Number of completed tasks
+        failed_tasks: Number of failed tasks
+        total_time: Total execution time
+        average_time: Average execution time per task
+    """
+    total_tasks: int = 0
+    completed_tasks: int = 0
+    failed_tasks: int = 0
+    total_time: float = 0.0
+    average_time: float = 0.0
+class BatchProcessor:
+    """Batch processor for parallel task execution.
+    Provides functionality for:
+    - Parallel task execution with configurable workers
+    - Batch rate limiting
+    - Progress tracking
+    - Error aggregation
+    """
+    def __init__(
+        self,
+        max_workers: int = 4,
+        rate_limiter: Optional[Any] = None,
+        rate_limit_config: Optional[Any] = None,
+    ):
+        """Initialize batch processor.
+        Args:
+            max_workers: Maximum number of parallel workers
+            rate_limiter: Optional rate limiter instance
+            rate_limit_config: Optional rate limit configuration
+        """
+        self.max_workers = max_workers
+        # Set up rate limiting
+        if rate_limiter is not None:
+            self.rate_limiter = rate_limiter
+        elif RATE_LIMITER_AVAILABLE and rate_limit_config is not None:
+            self.rate_limiter = RateLimiter(rate_limit_config)
+        elif RATE_LIMITER_AVAILABLE:
+            # Default rate limiting for batch processing
+            config = RateLimitConfig(
+                requests_per_minute=100,
+                requests_per_hour=5000,
+            )
+            self.rate_limiter = RateLimiter(config)
+        else:
+            self.rate_limiter = None
+        logger.debug(f"Initialized BatchProcessor with {max_workers} workers")
+    def process_batch(
+        self,
+        tasks: List[Any],
+        task_fn: Callable,
+        task_ids: Optional[List[str]] = None,
+        user_id: str = "default",
+        show_progress: bool = True,
+    ) -> Tuple[List[BatchResult], BatchStats]:
+        """Process a batch of tasks in parallel.
+        Args:
+            tasks: List of task inputs
+            task_fn: Function to execute for each task
+            task_ids: Optional list of task identifiers
+            user_id: User identifier for rate limiting
+            show_progress: Whether to show progress updates
+        Returns:
+            Tuple of (results, stats)
+        """
+        if not tasks:
+            return [], BatchStats()
+        start_time = time.time()
+        results: List[BatchResult] = []
+        # Generate task IDs if not provided
+        if task_ids is None:
+            task_ids = [f"task_{i}" for i in range(len(tasks))]
+        if len(task_ids) != len(tasks):
+            logger.warning("Task IDs length doesn't match tasks length, generating new IDs")
+            task_ids = [f"task_{i}" for i in range(len(tasks))]
+        def process_single_task(task: Any, task_id: str) -> BatchResult:
+            """Process a single task with error handling."""
+            task_start = time.time()
+            try:
+                # Apply rate limiting if available
+                if self.rate_limiter:
+                    is_allowed, error_msg = self.rate_limiter.check_rate_limit(user_id)
+                    if not is_allowed:
+                        # Wait if rate limited
+                        self.rate_limiter.wait_if_rate_limited(user_id, max_wait=60.0)
+                # Execute task
+                result = task_fn(task)
+                execution_time = time.time() - task_start
+                return BatchResult(
+                    task_id=task_id,
+                    success=True,
+                    result=result,
+                    execution_time=execution_time,
+                )
+            except Exception as e:
+                execution_time = time.time() - task_start
+                error_msg = str(e)
+                logger.error(f"Error processing task {task_id}: {error_msg}")
+                return BatchResult(
+                    task_id=task_id,
+                    success=False,
+                    error=error_msg,
+                    execution_time=execution_time,
+                )
+        # Process tasks in parallel
+        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            # Submit all tasks
+            future_to_task = {
+                executor.submit(process_single_task, task, task_id): (task, task_id)
+                for task, task_id in zip(tasks, task_ids)
+            }
+            # Collect results as they complete
+            completed = 0
+            for future in concurrent.futures.as_completed(future_to_task):
+                result = future.result()
+                results.append(result)
+                completed += 1
+                if show_progress:
+                    logger.info(f"Batch progress: {completed}/{len(tasks)} tasks completed")
+        # Calculate statistics
+        total_time = time.time() - start_time
+        completed_tasks = sum(1 for r in results if r.success)
+        failed_tasks = len(results) - completed_tasks
+        average_time = total_time / len(results) if results else 0.0
+        stats = BatchStats(
+            total_tasks=len(tasks),
+            completed_tasks=completed_tasks,
+            failed_tasks=failed_tasks,
+            total_time=total_time,
+            average_time=average_time,
+        )
+        logger.info(
+            f"Batch processing complete: {completed_tasks}/{len(tasks)} succeeded, "
+            f"{failed_tasks} failed, {total_time:.2f}s total"
+        )
+        return results, stats
+    async def process_batch_async(
+        self,
+        tasks: List[Any],
+        task_fn: Callable,
+        task_ids: Optional[List[str]] = None,
+        user_id: str = "default",
+        show_progress: bool = True,
+    ) -> Tuple[List[BatchResult], BatchStats]:
+        """Process a batch of tasks asynchronously.
+        Args:
+            tasks: List of task inputs
+            task_fn: Async function to execute for each task
+            task_ids: Optional list of task identifiers
+            user_id: User identifier for rate limiting
+            show_progress: Whether to show progress updates
+        Returns:
+            Tuple of (results, stats)
+        """
+        if not tasks:
+            return [], BatchStats()
+        start_time = time.time()
+        results: List[BatchResult] = []
+        # Generate task IDs if not provided
+        if task_ids is None:
+            task_ids = [f"task_{i}" for i in range(len(tasks))]
+        if len(task_ids) != len(tasks):
+            logger.warning("Task IDs length doesn't match tasks length, generating new IDs")
+            task_ids = [f"task_{i}" for i in range(len(tasks))]
+        async def process_single_task_async(task: Any, task_id: str) -> BatchResult:
+            """Process a single task asynchronously with error handling."""
+            task_start = time.time()
+            try:
+                # Apply rate limiting if available
+                if self.rate_limiter:
+                    is_allowed, error_msg = self.rate_limiter.check_rate_limit(user_id)
+                    if not is_allowed:
+                        # Wait if rate limited (async sleep)
+                        await asyncio.sleep(1.0)
+                        # Try again
+                        is_allowed, error_msg = self.rate_limiter.check_rate_limit(user_id)
+                        if not is_allowed:
+                            await asyncio.sleep(5.0)
+                # Execute task (assume it's async or can be awaited)
+                if asyncio.iscoroutinefunction(task_fn):
+                    result = await task_fn(task)
+                else:
+                    # Run sync function in executor
+                    try:
+                        loop = asyncio.get_running_loop()
+                    except RuntimeError:
+                        # No running loop, create new one
+                        loop = asyncio.new_event_loop()
+                        asyncio.set_event_loop(loop)
+                    result = await loop.run_in_executor(None, task_fn, task)
+                execution_time = time.time() - task_start
+                return BatchResult(
+                    task_id=task_id,
+                    success=True,
+                    result=result,
+                    execution_time=execution_time,
+                )
+            except Exception as e:
+                execution_time = time.time() - task_start
+                error_msg = str(e)
+                logger.error(f"Error processing task {task_id}: {error_msg}")
+                return BatchResult(
+                    task_id=task_id,
+                    success=False,
+                    error=error_msg,
+                    execution_time=execution_time,
+                )
+        # Process tasks concurrently
+        tasks_to_run = [process_single_task_async(task, task_id) for task, task_id in zip(tasks, task_ids)]
+        results = await asyncio.gather(*tasks_to_run)
+        # Calculate statistics
+        total_time = time.time() - start_time
+        completed_tasks = sum(1 for r in results if r.success)
+        failed_tasks = len(results) - completed_tasks
+        average_time = total_time / len(results) if results else 0.0
+        stats = BatchStats(
+            total_tasks=len(tasks),
+            completed_tasks=completed_tasks,
+            failed_tasks=failed_tasks,
+            total_time=total_time,
+            average_time=average_time,
+        )
+        logger.info(
+            f"Async batch processing complete: {completed_tasks}/{len(tasks)} succeeded, "
+            f"{failed_tasks} failed, {total_time:.2f}s total"
+        )
+        return results, stats

utils/conversation.py CHANGED Viewed

@@ -1192,4 +1192,82 @@ class Conversation:
         except Exception as e:
             logger.error(f"Dynamic auto chunking failed: {e}")
             return self._return_history_as_string_worker()
+    def get_conversation_stats(self) -> Dict[str, Any]:
+        """Get statistics about the conversation.
+        Returns:
+            Dictionary with conversation statistics
+        """
+        stats = {
+            "total_messages": len(self.conversation_history),
+            "total_tokens": 0,
+            "messages_by_role": {},
+            "created_at": getattr(self, "created_at", None),
+            "last_updated": None,
+        }
+        # Count messages by role
+        for message in self.conversation_history:
+            role = message.get("role", "unknown")
+            stats["messages_by_role"][role] = stats["messages_by_role"].get(role, 0) + 1
+            # Sum token counts if available
+            if "token_count" in message:
+                stats["total_tokens"] += message["token_count"]
+            # Get last updated timestamp
+            if "timestamp" in message:
+                stats["last_updated"] = message["timestamp"]
+        return stats
+    def get_enhanced_metadata(self) -> Dict[str, Any]:
+        """Get enhanced metadata about the conversation.
+        Returns:
+            Dictionary with enhanced metadata
+        """
+        metadata = {
+            "id": self.id,
+            "name": self.name,
+            "created_at": getattr(self, "created_at", None),
+            "context_length": self.context_length,
+            "export_method": self.export_method,
+            "save_filepath": self.save_filepath,
+            "stats": self.get_conversation_stats(),
+        }
+        return metadata
+    def save_with_metadata(self, filepath: Optional[str] = None, force: bool = True) -> None:
+        """Save conversation with enhanced metadata.
+        Args:
+            filepath: Optional filepath to save to (uses default if None)
+            force: If True, saves regardless of autosave setting
+        """
+        if filepath:
+            self.save_filepath = filepath
+        # Prepare data with metadata
+        data = {
+            "metadata": self.get_enhanced_metadata(),
+            "history": self.conversation_history,
+        }
+        try:
+            self._ensure_save_path()
+            if self.export_method == "json":
+                with open(self.save_filepath, "w", encoding="utf-8") as f:
+                    json.dump(data, f, indent=4, default=str)
+            else:
+                with open(self.save_filepath, "w", encoding="utf-8") as f:
+                    yaml.dump(data, f, indent=4, default_flow_style=False, sort_keys=False)
+            logger.info(f"Conversation with metadata saved to {self.save_filepath}")
+        except Exception as e:
+            logger.error(f"Failed to save conversation with metadata: {str(e)}\n{traceback.format_exc()}")
+            raise

crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

crca 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl