crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CRCA.py +172 -7
- MODEL_CARD.md +53 -0
- PKG-INFO +8 -2
- RELEASE_NOTES.md +17 -0
- STABILITY.md +19 -0
- architecture/hybrid/consistency_engine.py +362 -0
- architecture/hybrid/conversation_manager.py +421 -0
- architecture/hybrid/explanation_generator.py +452 -0
- architecture/hybrid/few_shot_learner.py +533 -0
- architecture/hybrid/graph_compressor.py +286 -0
- architecture/hybrid/hybrid_agent.py +4398 -0
- architecture/hybrid/language_compiler.py +623 -0
- architecture/hybrid/main,py +0 -0
- architecture/hybrid/reasoning_tracker.py +322 -0
- architecture/hybrid/self_verifier.py +524 -0
- architecture/hybrid/task_decomposer.py +567 -0
- architecture/hybrid/text_corrector.py +341 -0
- benchmark_results/crca_core_benchmarks.json +178 -0
- branches/crca_sd/crca_sd_realtime.py +6 -2
- branches/general_agent/__init__.py +102 -0
- branches/general_agent/general_agent.py +1400 -0
- branches/general_agent/personality.py +169 -0
- branches/general_agent/utils/__init__.py +19 -0
- branches/general_agent/utils/prompt_builder.py +170 -0
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
- crca_core/__init__.py +35 -0
- crca_core/benchmarks/__init__.py +14 -0
- crca_core/benchmarks/synthetic_scm.py +103 -0
- crca_core/core/__init__.py +23 -0
- crca_core/core/api.py +120 -0
- crca_core/core/estimate.py +208 -0
- crca_core/core/godclass.py +72 -0
- crca_core/core/intervention_design.py +174 -0
- crca_core/core/lifecycle.py +48 -0
- crca_core/discovery/__init__.py +9 -0
- crca_core/discovery/tabular.py +193 -0
- crca_core/identify/__init__.py +171 -0
- crca_core/identify/backdoor.py +39 -0
- crca_core/identify/frontdoor.py +48 -0
- crca_core/identify/graph.py +106 -0
- crca_core/identify/id_algorithm.py +43 -0
- crca_core/identify/iv.py +48 -0
- crca_core/models/__init__.py +67 -0
- crca_core/models/provenance.py +56 -0
- crca_core/models/refusal.py +39 -0
- crca_core/models/result.py +83 -0
- crca_core/models/spec.py +151 -0
- crca_core/models/validation.py +68 -0
- crca_core/scm/__init__.py +9 -0
- crca_core/scm/linear_gaussian.py +198 -0
- crca_core/timeseries/__init__.py +6 -0
- crca_core/timeseries/pcmci.py +181 -0
- crca_llm/__init__.py +12 -0
- crca_llm/client.py +85 -0
- crca_llm/coauthor.py +118 -0
- crca_llm/orchestrator.py +289 -0
- crca_llm/types.py +21 -0
- crca_reasoning/__init__.py +16 -0
- crca_reasoning/critique.py +54 -0
- crca_reasoning/godclass.py +206 -0
- crca_reasoning/memory.py +24 -0
- crca_reasoning/rationale.py +10 -0
- crca_reasoning/react_controller.py +81 -0
- crca_reasoning/tool_router.py +97 -0
- crca_reasoning/types.py +40 -0
- crca_sd/__init__.py +15 -0
- crca_sd/crca_sd_core.py +2 -0
- crca_sd/crca_sd_governance.py +2 -0
- crca_sd/crca_sd_mpc.py +2 -0
- crca_sd/crca_sd_realtime.py +2 -0
- crca_sd/crca_sd_tui.py +2 -0
- cuda-keyring_1.1-1_all.deb +0 -0
- cuda-keyring_1.1-1_all.deb.1 +0 -0
- docs/IMAGE_ANNOTATION_USAGE.md +539 -0
- docs/INSTALL_DEEPSPEED.md +125 -0
- docs/api/branches/crca-cg.md +19 -0
- docs/api/branches/crca-q.md +27 -0
- docs/api/branches/crca-sd.md +37 -0
- docs/api/branches/general-agent.md +24 -0
- docs/api/branches/overview.md +19 -0
- docs/api/crca/agent-methods.md +62 -0
- docs/api/crca/operations.md +79 -0
- docs/api/crca/overview.md +32 -0
- docs/api/image-annotation/engine.md +52 -0
- docs/api/image-annotation/overview.md +17 -0
- docs/api/schemas/annotation.md +34 -0
- docs/api/schemas/core-schemas.md +82 -0
- docs/api/schemas/overview.md +32 -0
- docs/api/schemas/policy.md +30 -0
- docs/api/utils/conversation.md +22 -0
- docs/api/utils/graph-reasoner.md +32 -0
- docs/api/utils/overview.md +21 -0
- docs/api/utils/router.md +19 -0
- docs/api/utils/utilities.md +97 -0
- docs/architecture/causal-graphs.md +41 -0
- docs/architecture/data-flow.md +29 -0
- docs/architecture/design-principles.md +33 -0
- docs/architecture/hybrid-agent/components.md +38 -0
- docs/architecture/hybrid-agent/consistency.md +26 -0
- docs/architecture/hybrid-agent/overview.md +44 -0
- docs/architecture/hybrid-agent/reasoning.md +22 -0
- docs/architecture/llm-integration.md +26 -0
- docs/architecture/modular-structure.md +37 -0
- docs/architecture/overview.md +69 -0
- docs/architecture/policy-engine-arch.md +29 -0
- docs/branches/crca-cg/corposwarm.md +39 -0
- docs/branches/crca-cg/esg-scoring.md +30 -0
- docs/branches/crca-cg/multi-agent.md +35 -0
- docs/branches/crca-cg/overview.md +40 -0
- docs/branches/crca-q/alternative-data.md +55 -0
- docs/branches/crca-q/architecture.md +71 -0
- docs/branches/crca-q/backtesting.md +45 -0
- docs/branches/crca-q/causal-engine.md +33 -0
- docs/branches/crca-q/execution.md +39 -0
- docs/branches/crca-q/market-data.md +60 -0
- docs/branches/crca-q/overview.md +58 -0
- docs/branches/crca-q/philosophy.md +60 -0
- docs/branches/crca-q/portfolio-optimization.md +66 -0
- docs/branches/crca-q/risk-management.md +102 -0
- docs/branches/crca-q/setup.md +65 -0
- docs/branches/crca-q/signal-generation.md +61 -0
- docs/branches/crca-q/signal-validation.md +43 -0
- docs/branches/crca-sd/core.md +84 -0
- docs/branches/crca-sd/governance.md +53 -0
- docs/branches/crca-sd/mpc-solver.md +65 -0
- docs/branches/crca-sd/overview.md +59 -0
- docs/branches/crca-sd/realtime.md +28 -0
- docs/branches/crca-sd/tui.md +20 -0
- docs/branches/general-agent/overview.md +37 -0
- docs/branches/general-agent/personality.md +36 -0
- docs/branches/general-agent/prompt-builder.md +30 -0
- docs/changelog/index.md +79 -0
- docs/contributing/code-style.md +69 -0
- docs/contributing/documentation.md +43 -0
- docs/contributing/overview.md +29 -0
- docs/contributing/testing.md +29 -0
- docs/core/crcagent/async-operations.md +65 -0
- docs/core/crcagent/automatic-extraction.md +107 -0
- docs/core/crcagent/batch-prediction.md +80 -0
- docs/core/crcagent/bayesian-inference.md +60 -0
- docs/core/crcagent/causal-graph.md +92 -0
- docs/core/crcagent/counterfactuals.md +96 -0
- docs/core/crcagent/deterministic-simulation.md +78 -0
- docs/core/crcagent/dual-mode-operation.md +82 -0
- docs/core/crcagent/initialization.md +88 -0
- docs/core/crcagent/optimization.md +65 -0
- docs/core/crcagent/overview.md +63 -0
- docs/core/crcagent/time-series.md +57 -0
- docs/core/schemas/annotation.md +30 -0
- docs/core/schemas/core-schemas.md +82 -0
- docs/core/schemas/overview.md +30 -0
- docs/core/schemas/policy.md +41 -0
- docs/core/templates/base-agent.md +31 -0
- docs/core/templates/feature-mixins.md +31 -0
- docs/core/templates/overview.md +29 -0
- docs/core/templates/templates-guide.md +75 -0
- docs/core/tools/mcp-client.md +34 -0
- docs/core/tools/overview.md +24 -0
- docs/core/utils/conversation.md +27 -0
- docs/core/utils/graph-reasoner.md +29 -0
- docs/core/utils/overview.md +27 -0
- docs/core/utils/router.md +27 -0
- docs/core/utils/utilities.md +97 -0
- docs/css/custom.css +84 -0
- docs/examples/basic-usage.md +57 -0
- docs/examples/general-agent/general-agent-examples.md +50 -0
- docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
- docs/examples/image-annotation/image-annotation-examples.md +54 -0
- docs/examples/integration/integration-examples.md +58 -0
- docs/examples/overview.md +37 -0
- docs/examples/trading/trading-examples.md +46 -0
- docs/features/causal-reasoning/advanced-topics.md +101 -0
- docs/features/causal-reasoning/counterfactuals.md +43 -0
- docs/features/causal-reasoning/do-calculus.md +50 -0
- docs/features/causal-reasoning/overview.md +47 -0
- docs/features/causal-reasoning/structural-models.md +52 -0
- docs/features/hybrid-agent/advanced-components.md +55 -0
- docs/features/hybrid-agent/core-components.md +64 -0
- docs/features/hybrid-agent/overview.md +34 -0
- docs/features/image-annotation/engine.md +82 -0
- docs/features/image-annotation/features.md +113 -0
- docs/features/image-annotation/integration.md +75 -0
- docs/features/image-annotation/overview.md +53 -0
- docs/features/image-annotation/quickstart.md +73 -0
- docs/features/policy-engine/doctrine-ledger.md +105 -0
- docs/features/policy-engine/monitoring.md +44 -0
- docs/features/policy-engine/mpc-control.md +89 -0
- docs/features/policy-engine/overview.md +46 -0
- docs/getting-started/configuration.md +225 -0
- docs/getting-started/first-agent.md +164 -0
- docs/getting-started/installation.md +144 -0
- docs/getting-started/quickstart.md +137 -0
- docs/index.md +118 -0
- docs/js/mathjax.js +13 -0
- docs/lrm/discovery_proof_notes.md +25 -0
- docs/lrm/finetune_full.md +83 -0
- docs/lrm/math_appendix.md +120 -0
- docs/lrm/overview.md +32 -0
- docs/mkdocs.yml +238 -0
- docs/stylesheets/extra.css +21 -0
- docs_generated/crca_core/CounterfactualResult.md +12 -0
- docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
- docs_generated/crca_core/DraftSpec.md +13 -0
- docs_generated/crca_core/EstimateResult.md +13 -0
- docs_generated/crca_core/IdentificationResult.md +17 -0
- docs_generated/crca_core/InterventionDesignResult.md +12 -0
- docs_generated/crca_core/LockedSpec.md +15 -0
- docs_generated/crca_core/RefusalResult.md +12 -0
- docs_generated/crca_core/ValidationReport.md +9 -0
- docs_generated/crca_core/index.md +13 -0
- examples/general_agent_example.py +277 -0
- examples/general_agent_quickstart.py +202 -0
- examples/general_agent_simple.py +92 -0
- examples/hybrid_agent_auto_extraction.py +84 -0
- examples/hybrid_agent_dictionary_demo.py +104 -0
- examples/hybrid_agent_enhanced.py +179 -0
- examples/hybrid_agent_general_knowledge.py +107 -0
- examples/image_annotation_quickstart.py +328 -0
- examples/test_hybrid_fixes.py +77 -0
- image_annotation/__init__.py +27 -0
- image_annotation/annotation_engine.py +2593 -0
- install_cuda_wsl2.sh +59 -0
- install_deepspeed.sh +56 -0
- install_deepspeed_simple.sh +87 -0
- mkdocs.yml +252 -0
- ollama/Modelfile +8 -0
- prompts/__init__.py +2 -1
- prompts/default_crca.py +9 -1
- prompts/general_agent.py +227 -0
- prompts/image_annotation.py +56 -0
- pyproject.toml +17 -2
- requirements-docs.txt +10 -0
- requirements.txt +21 -2
- schemas/__init__.py +26 -1
- schemas/annotation.py +222 -0
- schemas/conversation.py +193 -0
- schemas/hybrid.py +211 -0
- schemas/reasoning.py +276 -0
- schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
- schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
- schemas_export/crca_core/DraftSpec.schema.json +635 -0
- schemas_export/crca_core/EstimateResult.schema.json +113 -0
- schemas_export/crca_core/IdentificationResult.schema.json +145 -0
- schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
- schemas_export/crca_core/LockedSpec.schema.json +646 -0
- schemas_export/crca_core/RefusalResult.schema.json +90 -0
- schemas_export/crca_core/ValidationReport.schema.json +62 -0
- scripts/build_lrm_dataset.py +80 -0
- scripts/export_crca_core_schemas.py +54 -0
- scripts/export_hf_lrm.py +37 -0
- scripts/export_ollama_gguf.py +45 -0
- scripts/generate_changelog.py +157 -0
- scripts/generate_crca_core_docs_from_schemas.py +86 -0
- scripts/run_crca_core_benchmarks.py +163 -0
- scripts/run_full_finetune.py +198 -0
- scripts/run_lrm_eval.py +31 -0
- templates/graph_management.py +29 -0
- tests/conftest.py +9 -0
- tests/test_core.py +2 -3
- tests/test_crca_core_discovery_tabular.py +15 -0
- tests/test_crca_core_estimate_dowhy.py +36 -0
- tests/test_crca_core_identify.py +18 -0
- tests/test_crca_core_intervention_design.py +36 -0
- tests/test_crca_core_linear_gaussian_scm.py +69 -0
- tests/test_crca_core_spec.py +25 -0
- tests/test_crca_core_timeseries_pcmci.py +15 -0
- tests/test_crca_llm_coauthor.py +12 -0
- tests/test_crca_llm_orchestrator.py +80 -0
- tests/test_hybrid_agent_llm_enhanced.py +556 -0
- tests/test_image_annotation_demo.py +376 -0
- tests/test_image_annotation_operational.py +408 -0
- tests/test_image_annotation_unit.py +551 -0
- tests/test_training_moe.py +13 -0
- training/__init__.py +42 -0
- training/datasets.py +140 -0
- training/deepspeed_zero2_0_5b.json +22 -0
- training/deepspeed_zero2_1_5b.json +22 -0
- training/deepspeed_zero3_0_5b.json +28 -0
- training/deepspeed_zero3_14b.json +28 -0
- training/deepspeed_zero3_h100_3gpu.json +20 -0
- training/deepspeed_zero3_offload.json +28 -0
- training/eval.py +92 -0
- training/finetune.py +516 -0
- training/public_datasets.py +89 -0
- training_data/react_train.jsonl +7473 -0
- utils/agent_discovery.py +311 -0
- utils/batch_processor.py +317 -0
- utils/conversation.py +78 -0
- utils/edit_distance.py +118 -0
- utils/formatter.py +33 -0
- utils/graph_reasoner.py +530 -0
- utils/rate_limiter.py +283 -0
- utils/router.py +2 -2
- utils/tool_discovery.py +307 -0
- webui/__init__.py +10 -0
- webui/app.py +229 -0
- webui/config.py +104 -0
- webui/static/css/style.css +332 -0
- webui/static/js/main.js +284 -0
- webui/templates/index.html +42 -0
- tests/test_crca_excel.py +0 -166
- tests/test_data_broker.py +0 -424
- tests/test_palantir.py +0 -349
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
tests/test_data_broker.py
DELETED
|
@@ -1,424 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Test Suite for Data Broker Agent
|
|
3
|
-
|
|
4
|
-
Tests comprehensive data broker capabilities including:
|
|
5
|
-
- Multi-source data collection
|
|
6
|
-
- Causal dependency modeling
|
|
7
|
-
- Intelligent data routing
|
|
8
|
-
- Pipeline management
|
|
9
|
-
- LLM-powered data discovery
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import pytest
|
|
13
|
-
import sys
|
|
14
|
-
import os
|
|
15
|
-
from unittest.mock import Mock, patch, MagicMock
|
|
16
|
-
from typing import Dict, Any, List
|
|
17
|
-
|
|
18
|
-
# Add parent directory to path
|
|
19
|
-
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
20
|
-
|
|
21
|
-
from data_broker import (
|
|
22
|
-
DataBrokerAgent,
|
|
23
|
-
APIDataSource,
|
|
24
|
-
DatabaseDataSource,
|
|
25
|
-
FileDataSource,
|
|
26
|
-
DataSchema,
|
|
27
|
-
ConsumerRequirement,
|
|
28
|
-
PipelineStage,
|
|
29
|
-
StageType,
|
|
30
|
-
RouteMatchStrategy
|
|
31
|
-
)
|
|
32
|
-
from data_broker.pipeline import filter_stage, validate_stage, aggregate_stage
|
|
33
|
-
from CRCA import CausalRelationType
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
@pytest.fixture
|
|
37
|
-
def broker():
|
|
38
|
-
"""Create a test broker instance."""
|
|
39
|
-
return DataBrokerAgent(
|
|
40
|
-
agent_name="test-broker",
|
|
41
|
-
model_name="gpt-4o-mini",
|
|
42
|
-
max_loops=2,
|
|
43
|
-
routing_strategy=RouteMatchStrategy.COMPOSITE
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
@pytest.fixture
|
|
48
|
-
def sample_api_source():
|
|
49
|
-
"""Create a sample API data source."""
|
|
50
|
-
return APIDataSource(
|
|
51
|
-
name="test_sales_api",
|
|
52
|
-
url="https://api.test.com/sales",
|
|
53
|
-
method="GET",
|
|
54
|
-
headers={"Authorization": "Bearer test_token"},
|
|
55
|
-
schema=DataSchema(
|
|
56
|
-
fields={
|
|
57
|
-
"date": "datetime",
|
|
58
|
-
"product_id": "str",
|
|
59
|
-
"quantity": "int",
|
|
60
|
-
"revenue": "float"
|
|
61
|
-
},
|
|
62
|
-
timestamp_field="date"
|
|
63
|
-
),
|
|
64
|
-
update_frequency=3600.0
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
@pytest.fixture
|
|
69
|
-
def sample_file_source():
|
|
70
|
-
"""Create a sample file data source."""
|
|
71
|
-
return FileDataSource(
|
|
72
|
-
name="test_inventory_file",
|
|
73
|
-
file_path="test_data/inventory.csv",
|
|
74
|
-
schema=DataSchema(
|
|
75
|
-
fields={
|
|
76
|
-
"product_id": "str",
|
|
77
|
-
"stock_level": "int",
|
|
78
|
-
"warehouse": "str"
|
|
79
|
-
},
|
|
80
|
-
primary_key="product_id"
|
|
81
|
-
)
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
class TestBasicSetup:
|
|
86
|
-
"""Test basic broker setup and initialization."""
|
|
87
|
-
|
|
88
|
-
def test_broker_initialization(self, broker):
|
|
89
|
-
"""Test that broker initializes correctly."""
|
|
90
|
-
assert broker.agent_name == "test-broker"
|
|
91
|
-
assert broker.model_name == "gpt-4o-mini"
|
|
92
|
-
assert len(broker.data_sources) == 0
|
|
93
|
-
assert len(broker.pipelines) == 0
|
|
94
|
-
|
|
95
|
-
def test_register_api_source(self, broker, sample_api_source):
|
|
96
|
-
"""Test registering an API data source."""
|
|
97
|
-
with patch.object(sample_api_source, 'connect', return_value=True):
|
|
98
|
-
result = broker.register_data_source(sample_api_source, auto_connect=True)
|
|
99
|
-
assert result is True
|
|
100
|
-
assert "test_sales_api" in broker.data_sources
|
|
101
|
-
assert "test_sales_api" in broker.data_catalog
|
|
102
|
-
|
|
103
|
-
def test_register_file_source(self, broker, sample_file_source):
|
|
104
|
-
"""Test registering a file data source."""
|
|
105
|
-
with patch.object(sample_file_source, 'connect', return_value=True):
|
|
106
|
-
result = broker.register_data_source(sample_file_source, auto_connect=True)
|
|
107
|
-
assert result is True
|
|
108
|
-
assert "test_inventory_file" in broker.data_sources
|
|
109
|
-
assert "test_inventory_file" in broker.data_catalog
|
|
110
|
-
|
|
111
|
-
def test_register_multiple_sources(self, broker, sample_api_source, sample_file_source):
|
|
112
|
-
"""Test registering multiple data sources."""
|
|
113
|
-
with patch.object(sample_api_source, 'connect', return_value=True), \
|
|
114
|
-
patch.object(sample_file_source, 'connect', return_value=True):
|
|
115
|
-
broker.register_data_source(sample_api_source, auto_connect=True)
|
|
116
|
-
broker.register_data_source(sample_file_source, auto_connect=True)
|
|
117
|
-
|
|
118
|
-
assert len(broker.data_sources) == 2
|
|
119
|
-
assert len(broker.data_catalog) == 2
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
class TestCausalModeling:
|
|
123
|
-
"""Test causal dependency modeling."""
|
|
124
|
-
|
|
125
|
-
def test_add_causal_relationship(self, broker):
|
|
126
|
-
"""Test adding causal relationships."""
|
|
127
|
-
broker.add_causal_relationship(
|
|
128
|
-
"source_a",
|
|
129
|
-
"target_b",
|
|
130
|
-
strength=0.8,
|
|
131
|
-
relation_type=CausalRelationType.DIRECT
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
assert "source_a" in broker.causal_graph
|
|
135
|
-
assert "target_b" in broker.causal_graph["source_a"]
|
|
136
|
-
|
|
137
|
-
def test_analyze_dependencies(self, broker):
|
|
138
|
-
"""Test analyzing data dependencies."""
|
|
139
|
-
# Add some causal relationships
|
|
140
|
-
broker.add_causal_relationship("source_a", "target_b", strength=0.8)
|
|
141
|
-
broker.add_causal_relationship("source_a", "target_c", strength=0.6)
|
|
142
|
-
broker.add_causal_relationship("target_b", "target_d", strength=0.7)
|
|
143
|
-
|
|
144
|
-
# Analyze dependencies
|
|
145
|
-
analysis = broker.analyze_data_dependencies("source_a", "target_b")
|
|
146
|
-
|
|
147
|
-
assert analysis["source"] == "source_a"
|
|
148
|
-
assert analysis["target"] == "target_b"
|
|
149
|
-
assert "dependencies" in analysis
|
|
150
|
-
assert "downstream_impacts" in analysis
|
|
151
|
-
assert "causal_strength" in analysis
|
|
152
|
-
|
|
153
|
-
def test_identify_causal_chain(self, broker):
|
|
154
|
-
"""Test identifying causal chains."""
|
|
155
|
-
broker.add_causal_relationship("a", "b", strength=0.8)
|
|
156
|
-
broker.add_causal_relationship("b", "c", strength=0.7)
|
|
157
|
-
|
|
158
|
-
chain = broker.identify_causal_chain("a", "c")
|
|
159
|
-
assert len(chain) == 3
|
|
160
|
-
assert chain[0] == "a"
|
|
161
|
-
assert chain[-1] == "c"
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
class TestDataCollection:
|
|
165
|
-
"""Test data collection functionality."""
|
|
166
|
-
|
|
167
|
-
def test_collect_data_with_cache(self, broker, sample_api_source):
|
|
168
|
-
"""Test collecting data with caching."""
|
|
169
|
-
with patch.object(sample_api_source, 'connect', return_value=True), \
|
|
170
|
-
patch.object(sample_api_source, 'get_cached_data', return_value={"test": "data"}):
|
|
171
|
-
broker.register_data_source(sample_api_source, auto_connect=False)
|
|
172
|
-
|
|
173
|
-
data = broker.collect_data(
|
|
174
|
-
sources=["test_sales_api"],
|
|
175
|
-
use_cache=True
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
assert "test_sales_api" in data
|
|
179
|
-
assert data["test_sales_api"] == {"test": "data"}
|
|
180
|
-
|
|
181
|
-
def test_collect_data_from_multiple_sources(self, broker, sample_api_source, sample_file_source):
|
|
182
|
-
"""Test collecting from multiple sources."""
|
|
183
|
-
with patch.object(sample_api_source, 'connect', return_value=True), \
|
|
184
|
-
patch.object(sample_file_source, 'connect', return_value=True), \
|
|
185
|
-
patch.object(sample_api_source, 'get_cached_data', return_value={"api": "data"}), \
|
|
186
|
-
patch.object(sample_file_source, 'get_cached_data', return_value={"file": "data"}):
|
|
187
|
-
|
|
188
|
-
broker.register_data_source(sample_api_source, auto_connect=False)
|
|
189
|
-
broker.register_data_source(sample_file_source, auto_connect=False)
|
|
190
|
-
|
|
191
|
-
data = broker.collect_data(
|
|
192
|
-
sources=["test_sales_api", "test_inventory_file"],
|
|
193
|
-
use_cache=True
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
assert len(data) == 2
|
|
197
|
-
assert "test_sales_api" in data
|
|
198
|
-
assert "test_inventory_file" in data
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
class TestIntelligentRouting:
|
|
202
|
-
"""Test intelligent data routing."""
|
|
203
|
-
|
|
204
|
-
def test_register_consumer(self, broker):
|
|
205
|
-
"""Test registering a consumer."""
|
|
206
|
-
consumer = ConsumerRequirement(
|
|
207
|
-
name="test_consumer",
|
|
208
|
-
required_fields=["product_id", "revenue"],
|
|
209
|
-
min_quality_score=0.7,
|
|
210
|
-
causal_dependencies=["test_sales_api"]
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
broker.register_consumer(consumer)
|
|
214
|
-
assert "test_consumer" in broker.routing_engine.consumers
|
|
215
|
-
|
|
216
|
-
def test_route_data_with_causal_matching(self, broker, sample_api_source):
|
|
217
|
-
"""Test routing data using causal matching."""
|
|
218
|
-
# Register source
|
|
219
|
-
with patch.object(sample_api_source, 'connect', return_value=True):
|
|
220
|
-
broker.register_data_source(sample_api_source, auto_connect=False)
|
|
221
|
-
|
|
222
|
-
# Add causal relationship
|
|
223
|
-
broker.add_causal_relationship("test_sales_api", "revenue_prediction", strength=0.8)
|
|
224
|
-
|
|
225
|
-
# Register consumer
|
|
226
|
-
consumer = ConsumerRequirement(
|
|
227
|
-
name="analytics_service",
|
|
228
|
-
required_fields=["product_id", "revenue"],
|
|
229
|
-
causal_dependencies=["test_sales_api"]
|
|
230
|
-
)
|
|
231
|
-
broker.register_consumer(consumer)
|
|
232
|
-
|
|
233
|
-
# Route data
|
|
234
|
-
routes = broker.route_data(
|
|
235
|
-
data="test_sales_api",
|
|
236
|
-
consumers=["analytics_service"]
|
|
237
|
-
)
|
|
238
|
-
|
|
239
|
-
assert "analytics_service" in routes
|
|
240
|
-
assert len(routes["analytics_service"]) > 0
|
|
241
|
-
assert routes["analytics_service"][0].producer == "test_sales_api"
|
|
242
|
-
|
|
243
|
-
def test_route_data_schema_matching(self, broker, sample_api_source):
|
|
244
|
-
"""Test routing based on schema compatibility."""
|
|
245
|
-
# Register source with schema
|
|
246
|
-
with patch.object(sample_api_source, 'connect', return_value=True):
|
|
247
|
-
broker.register_data_source(sample_api_source, auto_connect=False)
|
|
248
|
-
|
|
249
|
-
# Register consumer with matching schema
|
|
250
|
-
consumer = ConsumerRequirement(
|
|
251
|
-
name="matching_consumer",
|
|
252
|
-
required_fields=["product_id", "revenue", "quantity"],
|
|
253
|
-
schema_preferences={
|
|
254
|
-
"product_id": "str",
|
|
255
|
-
"revenue": "float"
|
|
256
|
-
}
|
|
257
|
-
)
|
|
258
|
-
broker.register_consumer(consumer)
|
|
259
|
-
|
|
260
|
-
# Route data
|
|
261
|
-
routes = broker.route_data(data="test_sales_api", consumers=["matching_consumer"])
|
|
262
|
-
|
|
263
|
-
if "matching_consumer" in routes:
|
|
264
|
-
match = routes["matching_consumer"][0]
|
|
265
|
-
assert match.schema_compatibility > 0
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
class TestPipelineManagement:
|
|
269
|
-
"""Test pipeline management."""
|
|
270
|
-
|
|
271
|
-
def test_create_pipeline(self, broker):
|
|
272
|
-
"""Test creating a pipeline."""
|
|
273
|
-
pipeline = broker.create_pipeline(
|
|
274
|
-
name="test_pipeline",
|
|
275
|
-
stages=[],
|
|
276
|
-
causal_optimization=False
|
|
277
|
-
)
|
|
278
|
-
|
|
279
|
-
assert pipeline.name == "test_pipeline"
|
|
280
|
-
assert "test_pipeline" in broker.pipelines
|
|
281
|
-
|
|
282
|
-
def test_pipeline_with_stages(self, broker):
|
|
283
|
-
"""Test creating pipeline with stages."""
|
|
284
|
-
validate_stage_obj = PipelineStage(
|
|
285
|
-
name="validate",
|
|
286
|
-
stage_type=StageType.VALIDATE,
|
|
287
|
-
function=validate_stage,
|
|
288
|
-
config={"required_fields": ["product_id"]}
|
|
289
|
-
)
|
|
290
|
-
|
|
291
|
-
pipeline = broker.create_pipeline(
|
|
292
|
-
name="test_pipeline",
|
|
293
|
-
stages=[validate_stage_obj]
|
|
294
|
-
)
|
|
295
|
-
|
|
296
|
-
assert len(pipeline.stages) == 1
|
|
297
|
-
assert pipeline.stages[0].name == "validate"
|
|
298
|
-
|
|
299
|
-
def test_pipeline_execution(self, broker):
|
|
300
|
-
"""Test executing a pipeline."""
|
|
301
|
-
# Create simple pipeline
|
|
302
|
-
def identity_stage(data, context, **kwargs):
|
|
303
|
-
return data
|
|
304
|
-
|
|
305
|
-
stage = PipelineStage(
|
|
306
|
-
name="identity",
|
|
307
|
-
stage_type=StageType.CUSTOM,
|
|
308
|
-
function=identity_stage
|
|
309
|
-
)
|
|
310
|
-
|
|
311
|
-
pipeline = broker.create_pipeline(
|
|
312
|
-
name="test_pipeline",
|
|
313
|
-
stages=[stage]
|
|
314
|
-
)
|
|
315
|
-
|
|
316
|
-
test_data = {"test": "data"}
|
|
317
|
-
result = pipeline.execute(test_data)
|
|
318
|
-
|
|
319
|
-
assert result == test_data
|
|
320
|
-
assert len(pipeline.execution_history) == 1
|
|
321
|
-
assert pipeline.execution_history[0]["success"] is True
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
class TestLLMDiscovery:
|
|
325
|
-
"""Test LLM-powered data discovery."""
|
|
326
|
-
|
|
327
|
-
def test_simple_discovery(self, broker, sample_api_source):
|
|
328
|
-
"""Test simple keyword-based discovery."""
|
|
329
|
-
with patch.object(sample_api_source, 'connect', return_value=True):
|
|
330
|
-
broker.register_data_source(sample_api_source, auto_connect=False)
|
|
331
|
-
|
|
332
|
-
# Update catalog description for better matching
|
|
333
|
-
broker.data_catalog["test_sales_api"]["description"] = "Sales data API"
|
|
334
|
-
|
|
335
|
-
results = broker.discover_data("sales", use_llm=False)
|
|
336
|
-
|
|
337
|
-
assert len(results) > 0
|
|
338
|
-
assert results[0]["source"] == "test_sales_api"
|
|
339
|
-
assert results[0]["relevance"] > 0
|
|
340
|
-
|
|
341
|
-
def test_llm_discovery(self, broker, sample_api_source):
|
|
342
|
-
"""Test LLM-powered discovery."""
|
|
343
|
-
with patch.object(sample_api_source, 'connect', return_value=True), \
|
|
344
|
-
patch.object(broker, 'step', return_value="test_sales_api"):
|
|
345
|
-
broker.register_data_source(sample_api_source, auto_connect=False)
|
|
346
|
-
|
|
347
|
-
broker.data_catalog["test_sales_api"]["description"] = "Sales data API"
|
|
348
|
-
|
|
349
|
-
results = broker.discover_data("sales data", use_llm=True)
|
|
350
|
-
|
|
351
|
-
assert len(results) >= 0 # May fall back to simple discovery
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
class TestDataQuality:
|
|
355
|
-
"""Test data quality assessment."""
|
|
356
|
-
|
|
357
|
-
def test_assess_data_quality(self, broker, sample_api_source):
|
|
358
|
-
"""Test data quality assessment."""
|
|
359
|
-
with patch.object(sample_api_source, 'connect', return_value=True), \
|
|
360
|
-
patch.object(sample_api_source, 'get_cached_data', return_value={"test": "data"}), \
|
|
361
|
-
patch.object(broker, 'step', return_value="Data quality is good"):
|
|
362
|
-
broker.register_data_source(sample_api_source, auto_connect=False)
|
|
363
|
-
|
|
364
|
-
assessment = broker.assess_data_quality("test_sales_api")
|
|
365
|
-
|
|
366
|
-
assert "quality_score" in assessment
|
|
367
|
-
assert "metrics" in assessment
|
|
368
|
-
assert assessment["source"] == "test_sales_api"
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
class TestErrorHandling:
|
|
372
|
-
"""Test error handling."""
|
|
373
|
-
|
|
374
|
-
def test_register_invalid_source(self, broker, sample_api_source):
|
|
375
|
-
"""Test handling invalid source registration."""
|
|
376
|
-
with patch.object(sample_api_source, 'connect', return_value=False):
|
|
377
|
-
result = broker.register_data_source(sample_api_source, auto_connect=True)
|
|
378
|
-
assert result is False
|
|
379
|
-
|
|
380
|
-
def test_collect_from_nonexistent_source(self, broker):
|
|
381
|
-
"""Test collecting from non-existent source."""
|
|
382
|
-
data = broker.collect_data(sources=["nonexistent"])
|
|
383
|
-
assert "nonexistent" not in data or data.get("nonexistent") is None
|
|
384
|
-
|
|
385
|
-
def test_analyze_nonexistent_source(self, broker):
|
|
386
|
-
"""Test analyzing non-existent source."""
|
|
387
|
-
analysis = broker.analyze_data_dependencies("nonexistent")
|
|
388
|
-
assert "error" in analysis
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
class TestIntegration:
|
|
392
|
-
"""Integration tests."""
|
|
393
|
-
|
|
394
|
-
def test_complete_workflow(self, broker, sample_api_source, sample_file_source):
|
|
395
|
-
"""Test complete workflow from setup to routing."""
|
|
396
|
-
# Setup
|
|
397
|
-
with patch.object(sample_api_source, 'connect', return_value=True), \
|
|
398
|
-
patch.object(sample_file_source, 'connect', return_value=True):
|
|
399
|
-
broker.register_data_source(sample_api_source, auto_connect=False)
|
|
400
|
-
broker.register_data_source(sample_file_source, auto_connect=False)
|
|
401
|
-
|
|
402
|
-
# Model dependencies
|
|
403
|
-
broker.add_causal_relationship("test_sales_api", "revenue_prediction", strength=0.8)
|
|
404
|
-
|
|
405
|
-
# Register consumer
|
|
406
|
-
consumer = ConsumerRequirement(
|
|
407
|
-
name="analytics",
|
|
408
|
-
required_fields=["product_id", "revenue"],
|
|
409
|
-
causal_dependencies=["test_sales_api"]
|
|
410
|
-
)
|
|
411
|
-
broker.register_consumer(consumer)
|
|
412
|
-
|
|
413
|
-
# Route data
|
|
414
|
-
routes = broker.route_data(data="test_sales_api", consumers=["analytics"])
|
|
415
|
-
|
|
416
|
-
# Verify
|
|
417
|
-
assert len(broker.data_sources) == 2
|
|
418
|
-
assert len(broker.routing_engine.consumers) == 1
|
|
419
|
-
assert "analytics" in routes or len(routes) >= 0 # May be empty if no match
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
if __name__ == "__main__":
|
|
423
|
-
pytest.main([__file__, "-v"])
|
|
424
|
-
|