crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. CRCA.py +172 -7
  2. MODEL_CARD.md +53 -0
  3. PKG-INFO +8 -2
  4. RELEASE_NOTES.md +17 -0
  5. STABILITY.md +19 -0
  6. architecture/hybrid/consistency_engine.py +362 -0
  7. architecture/hybrid/conversation_manager.py +421 -0
  8. architecture/hybrid/explanation_generator.py +452 -0
  9. architecture/hybrid/few_shot_learner.py +533 -0
  10. architecture/hybrid/graph_compressor.py +286 -0
  11. architecture/hybrid/hybrid_agent.py +4398 -0
  12. architecture/hybrid/language_compiler.py +623 -0
  13. architecture/hybrid/main,py +0 -0
  14. architecture/hybrid/reasoning_tracker.py +322 -0
  15. architecture/hybrid/self_verifier.py +524 -0
  16. architecture/hybrid/task_decomposer.py +567 -0
  17. architecture/hybrid/text_corrector.py +341 -0
  18. benchmark_results/crca_core_benchmarks.json +178 -0
  19. branches/crca_sd/crca_sd_realtime.py +6 -2
  20. branches/general_agent/__init__.py +102 -0
  21. branches/general_agent/general_agent.py +1400 -0
  22. branches/general_agent/personality.py +169 -0
  23. branches/general_agent/utils/__init__.py +19 -0
  24. branches/general_agent/utils/prompt_builder.py +170 -0
  25. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
  26. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
  27. crca_core/__init__.py +35 -0
  28. crca_core/benchmarks/__init__.py +14 -0
  29. crca_core/benchmarks/synthetic_scm.py +103 -0
  30. crca_core/core/__init__.py +23 -0
  31. crca_core/core/api.py +120 -0
  32. crca_core/core/estimate.py +208 -0
  33. crca_core/core/godclass.py +72 -0
  34. crca_core/core/intervention_design.py +174 -0
  35. crca_core/core/lifecycle.py +48 -0
  36. crca_core/discovery/__init__.py +9 -0
  37. crca_core/discovery/tabular.py +193 -0
  38. crca_core/identify/__init__.py +171 -0
  39. crca_core/identify/backdoor.py +39 -0
  40. crca_core/identify/frontdoor.py +48 -0
  41. crca_core/identify/graph.py +106 -0
  42. crca_core/identify/id_algorithm.py +43 -0
  43. crca_core/identify/iv.py +48 -0
  44. crca_core/models/__init__.py +67 -0
  45. crca_core/models/provenance.py +56 -0
  46. crca_core/models/refusal.py +39 -0
  47. crca_core/models/result.py +83 -0
  48. crca_core/models/spec.py +151 -0
  49. crca_core/models/validation.py +68 -0
  50. crca_core/scm/__init__.py +9 -0
  51. crca_core/scm/linear_gaussian.py +198 -0
  52. crca_core/timeseries/__init__.py +6 -0
  53. crca_core/timeseries/pcmci.py +181 -0
  54. crca_llm/__init__.py +12 -0
  55. crca_llm/client.py +85 -0
  56. crca_llm/coauthor.py +118 -0
  57. crca_llm/orchestrator.py +289 -0
  58. crca_llm/types.py +21 -0
  59. crca_reasoning/__init__.py +16 -0
  60. crca_reasoning/critique.py +54 -0
  61. crca_reasoning/godclass.py +206 -0
  62. crca_reasoning/memory.py +24 -0
  63. crca_reasoning/rationale.py +10 -0
  64. crca_reasoning/react_controller.py +81 -0
  65. crca_reasoning/tool_router.py +97 -0
  66. crca_reasoning/types.py +40 -0
  67. crca_sd/__init__.py +15 -0
  68. crca_sd/crca_sd_core.py +2 -0
  69. crca_sd/crca_sd_governance.py +2 -0
  70. crca_sd/crca_sd_mpc.py +2 -0
  71. crca_sd/crca_sd_realtime.py +2 -0
  72. crca_sd/crca_sd_tui.py +2 -0
  73. cuda-keyring_1.1-1_all.deb +0 -0
  74. cuda-keyring_1.1-1_all.deb.1 +0 -0
  75. docs/IMAGE_ANNOTATION_USAGE.md +539 -0
  76. docs/INSTALL_DEEPSPEED.md +125 -0
  77. docs/api/branches/crca-cg.md +19 -0
  78. docs/api/branches/crca-q.md +27 -0
  79. docs/api/branches/crca-sd.md +37 -0
  80. docs/api/branches/general-agent.md +24 -0
  81. docs/api/branches/overview.md +19 -0
  82. docs/api/crca/agent-methods.md +62 -0
  83. docs/api/crca/operations.md +79 -0
  84. docs/api/crca/overview.md +32 -0
  85. docs/api/image-annotation/engine.md +52 -0
  86. docs/api/image-annotation/overview.md +17 -0
  87. docs/api/schemas/annotation.md +34 -0
  88. docs/api/schemas/core-schemas.md +82 -0
  89. docs/api/schemas/overview.md +32 -0
  90. docs/api/schemas/policy.md +30 -0
  91. docs/api/utils/conversation.md +22 -0
  92. docs/api/utils/graph-reasoner.md +32 -0
  93. docs/api/utils/overview.md +21 -0
  94. docs/api/utils/router.md +19 -0
  95. docs/api/utils/utilities.md +97 -0
  96. docs/architecture/causal-graphs.md +41 -0
  97. docs/architecture/data-flow.md +29 -0
  98. docs/architecture/design-principles.md +33 -0
  99. docs/architecture/hybrid-agent/components.md +38 -0
  100. docs/architecture/hybrid-agent/consistency.md +26 -0
  101. docs/architecture/hybrid-agent/overview.md +44 -0
  102. docs/architecture/hybrid-agent/reasoning.md +22 -0
  103. docs/architecture/llm-integration.md +26 -0
  104. docs/architecture/modular-structure.md +37 -0
  105. docs/architecture/overview.md +69 -0
  106. docs/architecture/policy-engine-arch.md +29 -0
  107. docs/branches/crca-cg/corposwarm.md +39 -0
  108. docs/branches/crca-cg/esg-scoring.md +30 -0
  109. docs/branches/crca-cg/multi-agent.md +35 -0
  110. docs/branches/crca-cg/overview.md +40 -0
  111. docs/branches/crca-q/alternative-data.md +55 -0
  112. docs/branches/crca-q/architecture.md +71 -0
  113. docs/branches/crca-q/backtesting.md +45 -0
  114. docs/branches/crca-q/causal-engine.md +33 -0
  115. docs/branches/crca-q/execution.md +39 -0
  116. docs/branches/crca-q/market-data.md +60 -0
  117. docs/branches/crca-q/overview.md +58 -0
  118. docs/branches/crca-q/philosophy.md +60 -0
  119. docs/branches/crca-q/portfolio-optimization.md +66 -0
  120. docs/branches/crca-q/risk-management.md +102 -0
  121. docs/branches/crca-q/setup.md +65 -0
  122. docs/branches/crca-q/signal-generation.md +61 -0
  123. docs/branches/crca-q/signal-validation.md +43 -0
  124. docs/branches/crca-sd/core.md +84 -0
  125. docs/branches/crca-sd/governance.md +53 -0
  126. docs/branches/crca-sd/mpc-solver.md +65 -0
  127. docs/branches/crca-sd/overview.md +59 -0
  128. docs/branches/crca-sd/realtime.md +28 -0
  129. docs/branches/crca-sd/tui.md +20 -0
  130. docs/branches/general-agent/overview.md +37 -0
  131. docs/branches/general-agent/personality.md +36 -0
  132. docs/branches/general-agent/prompt-builder.md +30 -0
  133. docs/changelog/index.md +79 -0
  134. docs/contributing/code-style.md +69 -0
  135. docs/contributing/documentation.md +43 -0
  136. docs/contributing/overview.md +29 -0
  137. docs/contributing/testing.md +29 -0
  138. docs/core/crcagent/async-operations.md +65 -0
  139. docs/core/crcagent/automatic-extraction.md +107 -0
  140. docs/core/crcagent/batch-prediction.md +80 -0
  141. docs/core/crcagent/bayesian-inference.md +60 -0
  142. docs/core/crcagent/causal-graph.md +92 -0
  143. docs/core/crcagent/counterfactuals.md +96 -0
  144. docs/core/crcagent/deterministic-simulation.md +78 -0
  145. docs/core/crcagent/dual-mode-operation.md +82 -0
  146. docs/core/crcagent/initialization.md +88 -0
  147. docs/core/crcagent/optimization.md +65 -0
  148. docs/core/crcagent/overview.md +63 -0
  149. docs/core/crcagent/time-series.md +57 -0
  150. docs/core/schemas/annotation.md +30 -0
  151. docs/core/schemas/core-schemas.md +82 -0
  152. docs/core/schemas/overview.md +30 -0
  153. docs/core/schemas/policy.md +41 -0
  154. docs/core/templates/base-agent.md +31 -0
  155. docs/core/templates/feature-mixins.md +31 -0
  156. docs/core/templates/overview.md +29 -0
  157. docs/core/templates/templates-guide.md +75 -0
  158. docs/core/tools/mcp-client.md +34 -0
  159. docs/core/tools/overview.md +24 -0
  160. docs/core/utils/conversation.md +27 -0
  161. docs/core/utils/graph-reasoner.md +29 -0
  162. docs/core/utils/overview.md +27 -0
  163. docs/core/utils/router.md +27 -0
  164. docs/core/utils/utilities.md +97 -0
  165. docs/css/custom.css +84 -0
  166. docs/examples/basic-usage.md +57 -0
  167. docs/examples/general-agent/general-agent-examples.md +50 -0
  168. docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
  169. docs/examples/image-annotation/image-annotation-examples.md +54 -0
  170. docs/examples/integration/integration-examples.md +58 -0
  171. docs/examples/overview.md +37 -0
  172. docs/examples/trading/trading-examples.md +46 -0
  173. docs/features/causal-reasoning/advanced-topics.md +101 -0
  174. docs/features/causal-reasoning/counterfactuals.md +43 -0
  175. docs/features/causal-reasoning/do-calculus.md +50 -0
  176. docs/features/causal-reasoning/overview.md +47 -0
  177. docs/features/causal-reasoning/structural-models.md +52 -0
  178. docs/features/hybrid-agent/advanced-components.md +55 -0
  179. docs/features/hybrid-agent/core-components.md +64 -0
  180. docs/features/hybrid-agent/overview.md +34 -0
  181. docs/features/image-annotation/engine.md +82 -0
  182. docs/features/image-annotation/features.md +113 -0
  183. docs/features/image-annotation/integration.md +75 -0
  184. docs/features/image-annotation/overview.md +53 -0
  185. docs/features/image-annotation/quickstart.md +73 -0
  186. docs/features/policy-engine/doctrine-ledger.md +105 -0
  187. docs/features/policy-engine/monitoring.md +44 -0
  188. docs/features/policy-engine/mpc-control.md +89 -0
  189. docs/features/policy-engine/overview.md +46 -0
  190. docs/getting-started/configuration.md +225 -0
  191. docs/getting-started/first-agent.md +164 -0
  192. docs/getting-started/installation.md +144 -0
  193. docs/getting-started/quickstart.md +137 -0
  194. docs/index.md +118 -0
  195. docs/js/mathjax.js +13 -0
  196. docs/lrm/discovery_proof_notes.md +25 -0
  197. docs/lrm/finetune_full.md +83 -0
  198. docs/lrm/math_appendix.md +120 -0
  199. docs/lrm/overview.md +32 -0
  200. docs/mkdocs.yml +238 -0
  201. docs/stylesheets/extra.css +21 -0
  202. docs_generated/crca_core/CounterfactualResult.md +12 -0
  203. docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
  204. docs_generated/crca_core/DraftSpec.md +13 -0
  205. docs_generated/crca_core/EstimateResult.md +13 -0
  206. docs_generated/crca_core/IdentificationResult.md +17 -0
  207. docs_generated/crca_core/InterventionDesignResult.md +12 -0
  208. docs_generated/crca_core/LockedSpec.md +15 -0
  209. docs_generated/crca_core/RefusalResult.md +12 -0
  210. docs_generated/crca_core/ValidationReport.md +9 -0
  211. docs_generated/crca_core/index.md +13 -0
  212. examples/general_agent_example.py +277 -0
  213. examples/general_agent_quickstart.py +202 -0
  214. examples/general_agent_simple.py +92 -0
  215. examples/hybrid_agent_auto_extraction.py +84 -0
  216. examples/hybrid_agent_dictionary_demo.py +104 -0
  217. examples/hybrid_agent_enhanced.py +179 -0
  218. examples/hybrid_agent_general_knowledge.py +107 -0
  219. examples/image_annotation_quickstart.py +328 -0
  220. examples/test_hybrid_fixes.py +77 -0
  221. image_annotation/__init__.py +27 -0
  222. image_annotation/annotation_engine.py +2593 -0
  223. install_cuda_wsl2.sh +59 -0
  224. install_deepspeed.sh +56 -0
  225. install_deepspeed_simple.sh +87 -0
  226. mkdocs.yml +252 -0
  227. ollama/Modelfile +8 -0
  228. prompts/__init__.py +2 -1
  229. prompts/default_crca.py +9 -1
  230. prompts/general_agent.py +227 -0
  231. prompts/image_annotation.py +56 -0
  232. pyproject.toml +17 -2
  233. requirements-docs.txt +10 -0
  234. requirements.txt +21 -2
  235. schemas/__init__.py +26 -1
  236. schemas/annotation.py +222 -0
  237. schemas/conversation.py +193 -0
  238. schemas/hybrid.py +211 -0
  239. schemas/reasoning.py +276 -0
  240. schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
  241. schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
  242. schemas_export/crca_core/DraftSpec.schema.json +635 -0
  243. schemas_export/crca_core/EstimateResult.schema.json +113 -0
  244. schemas_export/crca_core/IdentificationResult.schema.json +145 -0
  245. schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
  246. schemas_export/crca_core/LockedSpec.schema.json +646 -0
  247. schemas_export/crca_core/RefusalResult.schema.json +90 -0
  248. schemas_export/crca_core/ValidationReport.schema.json +62 -0
  249. scripts/build_lrm_dataset.py +80 -0
  250. scripts/export_crca_core_schemas.py +54 -0
  251. scripts/export_hf_lrm.py +37 -0
  252. scripts/export_ollama_gguf.py +45 -0
  253. scripts/generate_changelog.py +157 -0
  254. scripts/generate_crca_core_docs_from_schemas.py +86 -0
  255. scripts/run_crca_core_benchmarks.py +163 -0
  256. scripts/run_full_finetune.py +198 -0
  257. scripts/run_lrm_eval.py +31 -0
  258. templates/graph_management.py +29 -0
  259. tests/conftest.py +9 -0
  260. tests/test_core.py +2 -3
  261. tests/test_crca_core_discovery_tabular.py +15 -0
  262. tests/test_crca_core_estimate_dowhy.py +36 -0
  263. tests/test_crca_core_identify.py +18 -0
  264. tests/test_crca_core_intervention_design.py +36 -0
  265. tests/test_crca_core_linear_gaussian_scm.py +69 -0
  266. tests/test_crca_core_spec.py +25 -0
  267. tests/test_crca_core_timeseries_pcmci.py +15 -0
  268. tests/test_crca_llm_coauthor.py +12 -0
  269. tests/test_crca_llm_orchestrator.py +80 -0
  270. tests/test_hybrid_agent_llm_enhanced.py +556 -0
  271. tests/test_image_annotation_demo.py +376 -0
  272. tests/test_image_annotation_operational.py +408 -0
  273. tests/test_image_annotation_unit.py +551 -0
  274. tests/test_training_moe.py +13 -0
  275. training/__init__.py +42 -0
  276. training/datasets.py +140 -0
  277. training/deepspeed_zero2_0_5b.json +22 -0
  278. training/deepspeed_zero2_1_5b.json +22 -0
  279. training/deepspeed_zero3_0_5b.json +28 -0
  280. training/deepspeed_zero3_14b.json +28 -0
  281. training/deepspeed_zero3_h100_3gpu.json +20 -0
  282. training/deepspeed_zero3_offload.json +28 -0
  283. training/eval.py +92 -0
  284. training/finetune.py +516 -0
  285. training/public_datasets.py +89 -0
  286. training_data/react_train.jsonl +7473 -0
  287. utils/agent_discovery.py +311 -0
  288. utils/batch_processor.py +317 -0
  289. utils/conversation.py +78 -0
  290. utils/edit_distance.py +118 -0
  291. utils/formatter.py +33 -0
  292. utils/graph_reasoner.py +530 -0
  293. utils/rate_limiter.py +283 -0
  294. utils/router.py +2 -2
  295. utils/tool_discovery.py +307 -0
  296. webui/__init__.py +10 -0
  297. webui/app.py +229 -0
  298. webui/config.py +104 -0
  299. webui/static/css/style.css +332 -0
  300. webui/static/js/main.js +284 -0
  301. webui/templates/index.html +42 -0
  302. tests/test_crca_excel.py +0 -166
  303. tests/test_data_broker.py +0 -424
  304. tests/test_palantir.py +0 -349
  305. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
  306. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
tests/test_data_broker.py DELETED
@@ -1,424 +0,0 @@
1
- """
2
- Test Suite for Data Broker Agent
3
-
4
- Tests comprehensive data broker capabilities including:
5
- - Multi-source data collection
6
- - Causal dependency modeling
7
- - Intelligent data routing
8
- - Pipeline management
9
- - LLM-powered data discovery
10
- """
11
-
12
- import pytest
13
- import sys
14
- import os
15
- from unittest.mock import Mock, patch, MagicMock
16
- from typing import Dict, Any, List
17
-
18
- # Add parent directory to path
19
- sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
20
-
21
- from data_broker import (
22
- DataBrokerAgent,
23
- APIDataSource,
24
- DatabaseDataSource,
25
- FileDataSource,
26
- DataSchema,
27
- ConsumerRequirement,
28
- PipelineStage,
29
- StageType,
30
- RouteMatchStrategy
31
- )
32
- from data_broker.pipeline import filter_stage, validate_stage, aggregate_stage
33
- from CRCA import CausalRelationType
34
-
35
-
36
- @pytest.fixture
37
- def broker():
38
- """Create a test broker instance."""
39
- return DataBrokerAgent(
40
- agent_name="test-broker",
41
- model_name="gpt-4o-mini",
42
- max_loops=2,
43
- routing_strategy=RouteMatchStrategy.COMPOSITE
44
- )
45
-
46
-
47
- @pytest.fixture
48
- def sample_api_source():
49
- """Create a sample API data source."""
50
- return APIDataSource(
51
- name="test_sales_api",
52
- url="https://api.test.com/sales",
53
- method="GET",
54
- headers={"Authorization": "Bearer test_token"},
55
- schema=DataSchema(
56
- fields={
57
- "date": "datetime",
58
- "product_id": "str",
59
- "quantity": "int",
60
- "revenue": "float"
61
- },
62
- timestamp_field="date"
63
- ),
64
- update_frequency=3600.0
65
- )
66
-
67
-
68
- @pytest.fixture
69
- def sample_file_source():
70
- """Create a sample file data source."""
71
- return FileDataSource(
72
- name="test_inventory_file",
73
- file_path="test_data/inventory.csv",
74
- schema=DataSchema(
75
- fields={
76
- "product_id": "str",
77
- "stock_level": "int",
78
- "warehouse": "str"
79
- },
80
- primary_key="product_id"
81
- )
82
- )
83
-
84
-
85
- class TestBasicSetup:
86
- """Test basic broker setup and initialization."""
87
-
88
- def test_broker_initialization(self, broker):
89
- """Test that broker initializes correctly."""
90
- assert broker.agent_name == "test-broker"
91
- assert broker.model_name == "gpt-4o-mini"
92
- assert len(broker.data_sources) == 0
93
- assert len(broker.pipelines) == 0
94
-
95
- def test_register_api_source(self, broker, sample_api_source):
96
- """Test registering an API data source."""
97
- with patch.object(sample_api_source, 'connect', return_value=True):
98
- result = broker.register_data_source(sample_api_source, auto_connect=True)
99
- assert result is True
100
- assert "test_sales_api" in broker.data_sources
101
- assert "test_sales_api" in broker.data_catalog
102
-
103
- def test_register_file_source(self, broker, sample_file_source):
104
- """Test registering a file data source."""
105
- with patch.object(sample_file_source, 'connect', return_value=True):
106
- result = broker.register_data_source(sample_file_source, auto_connect=True)
107
- assert result is True
108
- assert "test_inventory_file" in broker.data_sources
109
- assert "test_inventory_file" in broker.data_catalog
110
-
111
- def test_register_multiple_sources(self, broker, sample_api_source, sample_file_source):
112
- """Test registering multiple data sources."""
113
- with patch.object(sample_api_source, 'connect', return_value=True), \
114
- patch.object(sample_file_source, 'connect', return_value=True):
115
- broker.register_data_source(sample_api_source, auto_connect=True)
116
- broker.register_data_source(sample_file_source, auto_connect=True)
117
-
118
- assert len(broker.data_sources) == 2
119
- assert len(broker.data_catalog) == 2
120
-
121
-
122
- class TestCausalModeling:
123
- """Test causal dependency modeling."""
124
-
125
- def test_add_causal_relationship(self, broker):
126
- """Test adding causal relationships."""
127
- broker.add_causal_relationship(
128
- "source_a",
129
- "target_b",
130
- strength=0.8,
131
- relation_type=CausalRelationType.DIRECT
132
- )
133
-
134
- assert "source_a" in broker.causal_graph
135
- assert "target_b" in broker.causal_graph["source_a"]
136
-
137
- def test_analyze_dependencies(self, broker):
138
- """Test analyzing data dependencies."""
139
- # Add some causal relationships
140
- broker.add_causal_relationship("source_a", "target_b", strength=0.8)
141
- broker.add_causal_relationship("source_a", "target_c", strength=0.6)
142
- broker.add_causal_relationship("target_b", "target_d", strength=0.7)
143
-
144
- # Analyze dependencies
145
- analysis = broker.analyze_data_dependencies("source_a", "target_b")
146
-
147
- assert analysis["source"] == "source_a"
148
- assert analysis["target"] == "target_b"
149
- assert "dependencies" in analysis
150
- assert "downstream_impacts" in analysis
151
- assert "causal_strength" in analysis
152
-
153
- def test_identify_causal_chain(self, broker):
154
- """Test identifying causal chains."""
155
- broker.add_causal_relationship("a", "b", strength=0.8)
156
- broker.add_causal_relationship("b", "c", strength=0.7)
157
-
158
- chain = broker.identify_causal_chain("a", "c")
159
- assert len(chain) == 3
160
- assert chain[0] == "a"
161
- assert chain[-1] == "c"
162
-
163
-
164
- class TestDataCollection:
165
- """Test data collection functionality."""
166
-
167
- def test_collect_data_with_cache(self, broker, sample_api_source):
168
- """Test collecting data with caching."""
169
- with patch.object(sample_api_source, 'connect', return_value=True), \
170
- patch.object(sample_api_source, 'get_cached_data', return_value={"test": "data"}):
171
- broker.register_data_source(sample_api_source, auto_connect=False)
172
-
173
- data = broker.collect_data(
174
- sources=["test_sales_api"],
175
- use_cache=True
176
- )
177
-
178
- assert "test_sales_api" in data
179
- assert data["test_sales_api"] == {"test": "data"}
180
-
181
- def test_collect_data_from_multiple_sources(self, broker, sample_api_source, sample_file_source):
182
- """Test collecting from multiple sources."""
183
- with patch.object(sample_api_source, 'connect', return_value=True), \
184
- patch.object(sample_file_source, 'connect', return_value=True), \
185
- patch.object(sample_api_source, 'get_cached_data', return_value={"api": "data"}), \
186
- patch.object(sample_file_source, 'get_cached_data', return_value={"file": "data"}):
187
-
188
- broker.register_data_source(sample_api_source, auto_connect=False)
189
- broker.register_data_source(sample_file_source, auto_connect=False)
190
-
191
- data = broker.collect_data(
192
- sources=["test_sales_api", "test_inventory_file"],
193
- use_cache=True
194
- )
195
-
196
- assert len(data) == 2
197
- assert "test_sales_api" in data
198
- assert "test_inventory_file" in data
199
-
200
-
201
- class TestIntelligentRouting:
202
- """Test intelligent data routing."""
203
-
204
- def test_register_consumer(self, broker):
205
- """Test registering a consumer."""
206
- consumer = ConsumerRequirement(
207
- name="test_consumer",
208
- required_fields=["product_id", "revenue"],
209
- min_quality_score=0.7,
210
- causal_dependencies=["test_sales_api"]
211
- )
212
-
213
- broker.register_consumer(consumer)
214
- assert "test_consumer" in broker.routing_engine.consumers
215
-
216
- def test_route_data_with_causal_matching(self, broker, sample_api_source):
217
- """Test routing data using causal matching."""
218
- # Register source
219
- with patch.object(sample_api_source, 'connect', return_value=True):
220
- broker.register_data_source(sample_api_source, auto_connect=False)
221
-
222
- # Add causal relationship
223
- broker.add_causal_relationship("test_sales_api", "revenue_prediction", strength=0.8)
224
-
225
- # Register consumer
226
- consumer = ConsumerRequirement(
227
- name="analytics_service",
228
- required_fields=["product_id", "revenue"],
229
- causal_dependencies=["test_sales_api"]
230
- )
231
- broker.register_consumer(consumer)
232
-
233
- # Route data
234
- routes = broker.route_data(
235
- data="test_sales_api",
236
- consumers=["analytics_service"]
237
- )
238
-
239
- assert "analytics_service" in routes
240
- assert len(routes["analytics_service"]) > 0
241
- assert routes["analytics_service"][0].producer == "test_sales_api"
242
-
243
- def test_route_data_schema_matching(self, broker, sample_api_source):
244
- """Test routing based on schema compatibility."""
245
- # Register source with schema
246
- with patch.object(sample_api_source, 'connect', return_value=True):
247
- broker.register_data_source(sample_api_source, auto_connect=False)
248
-
249
- # Register consumer with matching schema
250
- consumer = ConsumerRequirement(
251
- name="matching_consumer",
252
- required_fields=["product_id", "revenue", "quantity"],
253
- schema_preferences={
254
- "product_id": "str",
255
- "revenue": "float"
256
- }
257
- )
258
- broker.register_consumer(consumer)
259
-
260
- # Route data
261
- routes = broker.route_data(data="test_sales_api", consumers=["matching_consumer"])
262
-
263
- if "matching_consumer" in routes:
264
- match = routes["matching_consumer"][0]
265
- assert match.schema_compatibility > 0
266
-
267
-
268
- class TestPipelineManagement:
269
- """Test pipeline management."""
270
-
271
- def test_create_pipeline(self, broker):
272
- """Test creating a pipeline."""
273
- pipeline = broker.create_pipeline(
274
- name="test_pipeline",
275
- stages=[],
276
- causal_optimization=False
277
- )
278
-
279
- assert pipeline.name == "test_pipeline"
280
- assert "test_pipeline" in broker.pipelines
281
-
282
- def test_pipeline_with_stages(self, broker):
283
- """Test creating pipeline with stages."""
284
- validate_stage_obj = PipelineStage(
285
- name="validate",
286
- stage_type=StageType.VALIDATE,
287
- function=validate_stage,
288
- config={"required_fields": ["product_id"]}
289
- )
290
-
291
- pipeline = broker.create_pipeline(
292
- name="test_pipeline",
293
- stages=[validate_stage_obj]
294
- )
295
-
296
- assert len(pipeline.stages) == 1
297
- assert pipeline.stages[0].name == "validate"
298
-
299
- def test_pipeline_execution(self, broker):
300
- """Test executing a pipeline."""
301
- # Create simple pipeline
302
- def identity_stage(data, context, **kwargs):
303
- return data
304
-
305
- stage = PipelineStage(
306
- name="identity",
307
- stage_type=StageType.CUSTOM,
308
- function=identity_stage
309
- )
310
-
311
- pipeline = broker.create_pipeline(
312
- name="test_pipeline",
313
- stages=[stage]
314
- )
315
-
316
- test_data = {"test": "data"}
317
- result = pipeline.execute(test_data)
318
-
319
- assert result == test_data
320
- assert len(pipeline.execution_history) == 1
321
- assert pipeline.execution_history[0]["success"] is True
322
-
323
-
324
- class TestLLMDiscovery:
325
- """Test LLM-powered data discovery."""
326
-
327
- def test_simple_discovery(self, broker, sample_api_source):
328
- """Test simple keyword-based discovery."""
329
- with patch.object(sample_api_source, 'connect', return_value=True):
330
- broker.register_data_source(sample_api_source, auto_connect=False)
331
-
332
- # Update catalog description for better matching
333
- broker.data_catalog["test_sales_api"]["description"] = "Sales data API"
334
-
335
- results = broker.discover_data("sales", use_llm=False)
336
-
337
- assert len(results) > 0
338
- assert results[0]["source"] == "test_sales_api"
339
- assert results[0]["relevance"] > 0
340
-
341
- def test_llm_discovery(self, broker, sample_api_source):
342
- """Test LLM-powered discovery."""
343
- with patch.object(sample_api_source, 'connect', return_value=True), \
344
- patch.object(broker, 'step', return_value="test_sales_api"):
345
- broker.register_data_source(sample_api_source, auto_connect=False)
346
-
347
- broker.data_catalog["test_sales_api"]["description"] = "Sales data API"
348
-
349
- results = broker.discover_data("sales data", use_llm=True)
350
-
351
- assert len(results) >= 0 # May fall back to simple discovery
352
-
353
-
354
- class TestDataQuality:
355
- """Test data quality assessment."""
356
-
357
- def test_assess_data_quality(self, broker, sample_api_source):
358
- """Test data quality assessment."""
359
- with patch.object(sample_api_source, 'connect', return_value=True), \
360
- patch.object(sample_api_source, 'get_cached_data', return_value={"test": "data"}), \
361
- patch.object(broker, 'step', return_value="Data quality is good"):
362
- broker.register_data_source(sample_api_source, auto_connect=False)
363
-
364
- assessment = broker.assess_data_quality("test_sales_api")
365
-
366
- assert "quality_score" in assessment
367
- assert "metrics" in assessment
368
- assert assessment["source"] == "test_sales_api"
369
-
370
-
371
- class TestErrorHandling:
372
- """Test error handling."""
373
-
374
- def test_register_invalid_source(self, broker, sample_api_source):
375
- """Test handling invalid source registration."""
376
- with patch.object(sample_api_source, 'connect', return_value=False):
377
- result = broker.register_data_source(sample_api_source, auto_connect=True)
378
- assert result is False
379
-
380
- def test_collect_from_nonexistent_source(self, broker):
381
- """Test collecting from non-existent source."""
382
- data = broker.collect_data(sources=["nonexistent"])
383
- assert "nonexistent" not in data or data.get("nonexistent") is None
384
-
385
- def test_analyze_nonexistent_source(self, broker):
386
- """Test analyzing non-existent source."""
387
- analysis = broker.analyze_data_dependencies("nonexistent")
388
- assert "error" in analysis
389
-
390
-
391
- class TestIntegration:
392
- """Integration tests."""
393
-
394
- def test_complete_workflow(self, broker, sample_api_source, sample_file_source):
395
- """Test complete workflow from setup to routing."""
396
- # Setup
397
- with patch.object(sample_api_source, 'connect', return_value=True), \
398
- patch.object(sample_file_source, 'connect', return_value=True):
399
- broker.register_data_source(sample_api_source, auto_connect=False)
400
- broker.register_data_source(sample_file_source, auto_connect=False)
401
-
402
- # Model dependencies
403
- broker.add_causal_relationship("test_sales_api", "revenue_prediction", strength=0.8)
404
-
405
- # Register consumer
406
- consumer = ConsumerRequirement(
407
- name="analytics",
408
- required_fields=["product_id", "revenue"],
409
- causal_dependencies=["test_sales_api"]
410
- )
411
- broker.register_consumer(consumer)
412
-
413
- # Route data
414
- routes = broker.route_data(data="test_sales_api", consumers=["analytics"])
415
-
416
- # Verify
417
- assert len(broker.data_sources) == 2
418
- assert len(broker.routing_engine.consumers) == 1
419
- assert "analytics" in routes or len(routes) >= 0 # May be empty if no match
420
-
421
-
422
- if __name__ == "__main__":
423
- pytest.main([__file__, "-v"])
424
-