crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. CRCA.py +172 -7
  2. MODEL_CARD.md +53 -0
  3. PKG-INFO +8 -2
  4. RELEASE_NOTES.md +17 -0
  5. STABILITY.md +19 -0
  6. architecture/hybrid/consistency_engine.py +362 -0
  7. architecture/hybrid/conversation_manager.py +421 -0
  8. architecture/hybrid/explanation_generator.py +452 -0
  9. architecture/hybrid/few_shot_learner.py +533 -0
  10. architecture/hybrid/graph_compressor.py +286 -0
  11. architecture/hybrid/hybrid_agent.py +4398 -0
  12. architecture/hybrid/language_compiler.py +623 -0
  13. architecture/hybrid/main,py +0 -0
  14. architecture/hybrid/reasoning_tracker.py +322 -0
  15. architecture/hybrid/self_verifier.py +524 -0
  16. architecture/hybrid/task_decomposer.py +567 -0
  17. architecture/hybrid/text_corrector.py +341 -0
  18. benchmark_results/crca_core_benchmarks.json +178 -0
  19. branches/crca_sd/crca_sd_realtime.py +6 -2
  20. branches/general_agent/__init__.py +102 -0
  21. branches/general_agent/general_agent.py +1400 -0
  22. branches/general_agent/personality.py +169 -0
  23. branches/general_agent/utils/__init__.py +19 -0
  24. branches/general_agent/utils/prompt_builder.py +170 -0
  25. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
  26. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
  27. crca_core/__init__.py +35 -0
  28. crca_core/benchmarks/__init__.py +14 -0
  29. crca_core/benchmarks/synthetic_scm.py +103 -0
  30. crca_core/core/__init__.py +23 -0
  31. crca_core/core/api.py +120 -0
  32. crca_core/core/estimate.py +208 -0
  33. crca_core/core/godclass.py +72 -0
  34. crca_core/core/intervention_design.py +174 -0
  35. crca_core/core/lifecycle.py +48 -0
  36. crca_core/discovery/__init__.py +9 -0
  37. crca_core/discovery/tabular.py +193 -0
  38. crca_core/identify/__init__.py +171 -0
  39. crca_core/identify/backdoor.py +39 -0
  40. crca_core/identify/frontdoor.py +48 -0
  41. crca_core/identify/graph.py +106 -0
  42. crca_core/identify/id_algorithm.py +43 -0
  43. crca_core/identify/iv.py +48 -0
  44. crca_core/models/__init__.py +67 -0
  45. crca_core/models/provenance.py +56 -0
  46. crca_core/models/refusal.py +39 -0
  47. crca_core/models/result.py +83 -0
  48. crca_core/models/spec.py +151 -0
  49. crca_core/models/validation.py +68 -0
  50. crca_core/scm/__init__.py +9 -0
  51. crca_core/scm/linear_gaussian.py +198 -0
  52. crca_core/timeseries/__init__.py +6 -0
  53. crca_core/timeseries/pcmci.py +181 -0
  54. crca_llm/__init__.py +12 -0
  55. crca_llm/client.py +85 -0
  56. crca_llm/coauthor.py +118 -0
  57. crca_llm/orchestrator.py +289 -0
  58. crca_llm/types.py +21 -0
  59. crca_reasoning/__init__.py +16 -0
  60. crca_reasoning/critique.py +54 -0
  61. crca_reasoning/godclass.py +206 -0
  62. crca_reasoning/memory.py +24 -0
  63. crca_reasoning/rationale.py +10 -0
  64. crca_reasoning/react_controller.py +81 -0
  65. crca_reasoning/tool_router.py +97 -0
  66. crca_reasoning/types.py +40 -0
  67. crca_sd/__init__.py +15 -0
  68. crca_sd/crca_sd_core.py +2 -0
  69. crca_sd/crca_sd_governance.py +2 -0
  70. crca_sd/crca_sd_mpc.py +2 -0
  71. crca_sd/crca_sd_realtime.py +2 -0
  72. crca_sd/crca_sd_tui.py +2 -0
  73. cuda-keyring_1.1-1_all.deb +0 -0
  74. cuda-keyring_1.1-1_all.deb.1 +0 -0
  75. docs/IMAGE_ANNOTATION_USAGE.md +539 -0
  76. docs/INSTALL_DEEPSPEED.md +125 -0
  77. docs/api/branches/crca-cg.md +19 -0
  78. docs/api/branches/crca-q.md +27 -0
  79. docs/api/branches/crca-sd.md +37 -0
  80. docs/api/branches/general-agent.md +24 -0
  81. docs/api/branches/overview.md +19 -0
  82. docs/api/crca/agent-methods.md +62 -0
  83. docs/api/crca/operations.md +79 -0
  84. docs/api/crca/overview.md +32 -0
  85. docs/api/image-annotation/engine.md +52 -0
  86. docs/api/image-annotation/overview.md +17 -0
  87. docs/api/schemas/annotation.md +34 -0
  88. docs/api/schemas/core-schemas.md +82 -0
  89. docs/api/schemas/overview.md +32 -0
  90. docs/api/schemas/policy.md +30 -0
  91. docs/api/utils/conversation.md +22 -0
  92. docs/api/utils/graph-reasoner.md +32 -0
  93. docs/api/utils/overview.md +21 -0
  94. docs/api/utils/router.md +19 -0
  95. docs/api/utils/utilities.md +97 -0
  96. docs/architecture/causal-graphs.md +41 -0
  97. docs/architecture/data-flow.md +29 -0
  98. docs/architecture/design-principles.md +33 -0
  99. docs/architecture/hybrid-agent/components.md +38 -0
  100. docs/architecture/hybrid-agent/consistency.md +26 -0
  101. docs/architecture/hybrid-agent/overview.md +44 -0
  102. docs/architecture/hybrid-agent/reasoning.md +22 -0
  103. docs/architecture/llm-integration.md +26 -0
  104. docs/architecture/modular-structure.md +37 -0
  105. docs/architecture/overview.md +69 -0
  106. docs/architecture/policy-engine-arch.md +29 -0
  107. docs/branches/crca-cg/corposwarm.md +39 -0
  108. docs/branches/crca-cg/esg-scoring.md +30 -0
  109. docs/branches/crca-cg/multi-agent.md +35 -0
  110. docs/branches/crca-cg/overview.md +40 -0
  111. docs/branches/crca-q/alternative-data.md +55 -0
  112. docs/branches/crca-q/architecture.md +71 -0
  113. docs/branches/crca-q/backtesting.md +45 -0
  114. docs/branches/crca-q/causal-engine.md +33 -0
  115. docs/branches/crca-q/execution.md +39 -0
  116. docs/branches/crca-q/market-data.md +60 -0
  117. docs/branches/crca-q/overview.md +58 -0
  118. docs/branches/crca-q/philosophy.md +60 -0
  119. docs/branches/crca-q/portfolio-optimization.md +66 -0
  120. docs/branches/crca-q/risk-management.md +102 -0
  121. docs/branches/crca-q/setup.md +65 -0
  122. docs/branches/crca-q/signal-generation.md +61 -0
  123. docs/branches/crca-q/signal-validation.md +43 -0
  124. docs/branches/crca-sd/core.md +84 -0
  125. docs/branches/crca-sd/governance.md +53 -0
  126. docs/branches/crca-sd/mpc-solver.md +65 -0
  127. docs/branches/crca-sd/overview.md +59 -0
  128. docs/branches/crca-sd/realtime.md +28 -0
  129. docs/branches/crca-sd/tui.md +20 -0
  130. docs/branches/general-agent/overview.md +37 -0
  131. docs/branches/general-agent/personality.md +36 -0
  132. docs/branches/general-agent/prompt-builder.md +30 -0
  133. docs/changelog/index.md +79 -0
  134. docs/contributing/code-style.md +69 -0
  135. docs/contributing/documentation.md +43 -0
  136. docs/contributing/overview.md +29 -0
  137. docs/contributing/testing.md +29 -0
  138. docs/core/crcagent/async-operations.md +65 -0
  139. docs/core/crcagent/automatic-extraction.md +107 -0
  140. docs/core/crcagent/batch-prediction.md +80 -0
  141. docs/core/crcagent/bayesian-inference.md +60 -0
  142. docs/core/crcagent/causal-graph.md +92 -0
  143. docs/core/crcagent/counterfactuals.md +96 -0
  144. docs/core/crcagent/deterministic-simulation.md +78 -0
  145. docs/core/crcagent/dual-mode-operation.md +82 -0
  146. docs/core/crcagent/initialization.md +88 -0
  147. docs/core/crcagent/optimization.md +65 -0
  148. docs/core/crcagent/overview.md +63 -0
  149. docs/core/crcagent/time-series.md +57 -0
  150. docs/core/schemas/annotation.md +30 -0
  151. docs/core/schemas/core-schemas.md +82 -0
  152. docs/core/schemas/overview.md +30 -0
  153. docs/core/schemas/policy.md +41 -0
  154. docs/core/templates/base-agent.md +31 -0
  155. docs/core/templates/feature-mixins.md +31 -0
  156. docs/core/templates/overview.md +29 -0
  157. docs/core/templates/templates-guide.md +75 -0
  158. docs/core/tools/mcp-client.md +34 -0
  159. docs/core/tools/overview.md +24 -0
  160. docs/core/utils/conversation.md +27 -0
  161. docs/core/utils/graph-reasoner.md +29 -0
  162. docs/core/utils/overview.md +27 -0
  163. docs/core/utils/router.md +27 -0
  164. docs/core/utils/utilities.md +97 -0
  165. docs/css/custom.css +84 -0
  166. docs/examples/basic-usage.md +57 -0
  167. docs/examples/general-agent/general-agent-examples.md +50 -0
  168. docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
  169. docs/examples/image-annotation/image-annotation-examples.md +54 -0
  170. docs/examples/integration/integration-examples.md +58 -0
  171. docs/examples/overview.md +37 -0
  172. docs/examples/trading/trading-examples.md +46 -0
  173. docs/features/causal-reasoning/advanced-topics.md +101 -0
  174. docs/features/causal-reasoning/counterfactuals.md +43 -0
  175. docs/features/causal-reasoning/do-calculus.md +50 -0
  176. docs/features/causal-reasoning/overview.md +47 -0
  177. docs/features/causal-reasoning/structural-models.md +52 -0
  178. docs/features/hybrid-agent/advanced-components.md +55 -0
  179. docs/features/hybrid-agent/core-components.md +64 -0
  180. docs/features/hybrid-agent/overview.md +34 -0
  181. docs/features/image-annotation/engine.md +82 -0
  182. docs/features/image-annotation/features.md +113 -0
  183. docs/features/image-annotation/integration.md +75 -0
  184. docs/features/image-annotation/overview.md +53 -0
  185. docs/features/image-annotation/quickstart.md +73 -0
  186. docs/features/policy-engine/doctrine-ledger.md +105 -0
  187. docs/features/policy-engine/monitoring.md +44 -0
  188. docs/features/policy-engine/mpc-control.md +89 -0
  189. docs/features/policy-engine/overview.md +46 -0
  190. docs/getting-started/configuration.md +225 -0
  191. docs/getting-started/first-agent.md +164 -0
  192. docs/getting-started/installation.md +144 -0
  193. docs/getting-started/quickstart.md +137 -0
  194. docs/index.md +118 -0
  195. docs/js/mathjax.js +13 -0
  196. docs/lrm/discovery_proof_notes.md +25 -0
  197. docs/lrm/finetune_full.md +83 -0
  198. docs/lrm/math_appendix.md +120 -0
  199. docs/lrm/overview.md +32 -0
  200. docs/mkdocs.yml +238 -0
  201. docs/stylesheets/extra.css +21 -0
  202. docs_generated/crca_core/CounterfactualResult.md +12 -0
  203. docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
  204. docs_generated/crca_core/DraftSpec.md +13 -0
  205. docs_generated/crca_core/EstimateResult.md +13 -0
  206. docs_generated/crca_core/IdentificationResult.md +17 -0
  207. docs_generated/crca_core/InterventionDesignResult.md +12 -0
  208. docs_generated/crca_core/LockedSpec.md +15 -0
  209. docs_generated/crca_core/RefusalResult.md +12 -0
  210. docs_generated/crca_core/ValidationReport.md +9 -0
  211. docs_generated/crca_core/index.md +13 -0
  212. examples/general_agent_example.py +277 -0
  213. examples/general_agent_quickstart.py +202 -0
  214. examples/general_agent_simple.py +92 -0
  215. examples/hybrid_agent_auto_extraction.py +84 -0
  216. examples/hybrid_agent_dictionary_demo.py +104 -0
  217. examples/hybrid_agent_enhanced.py +179 -0
  218. examples/hybrid_agent_general_knowledge.py +107 -0
  219. examples/image_annotation_quickstart.py +328 -0
  220. examples/test_hybrid_fixes.py +77 -0
  221. image_annotation/__init__.py +27 -0
  222. image_annotation/annotation_engine.py +2593 -0
  223. install_cuda_wsl2.sh +59 -0
  224. install_deepspeed.sh +56 -0
  225. install_deepspeed_simple.sh +87 -0
  226. mkdocs.yml +252 -0
  227. ollama/Modelfile +8 -0
  228. prompts/__init__.py +2 -1
  229. prompts/default_crca.py +9 -1
  230. prompts/general_agent.py +227 -0
  231. prompts/image_annotation.py +56 -0
  232. pyproject.toml +17 -2
  233. requirements-docs.txt +10 -0
  234. requirements.txt +21 -2
  235. schemas/__init__.py +26 -1
  236. schemas/annotation.py +222 -0
  237. schemas/conversation.py +193 -0
  238. schemas/hybrid.py +211 -0
  239. schemas/reasoning.py +276 -0
  240. schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
  241. schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
  242. schemas_export/crca_core/DraftSpec.schema.json +635 -0
  243. schemas_export/crca_core/EstimateResult.schema.json +113 -0
  244. schemas_export/crca_core/IdentificationResult.schema.json +145 -0
  245. schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
  246. schemas_export/crca_core/LockedSpec.schema.json +646 -0
  247. schemas_export/crca_core/RefusalResult.schema.json +90 -0
  248. schemas_export/crca_core/ValidationReport.schema.json +62 -0
  249. scripts/build_lrm_dataset.py +80 -0
  250. scripts/export_crca_core_schemas.py +54 -0
  251. scripts/export_hf_lrm.py +37 -0
  252. scripts/export_ollama_gguf.py +45 -0
  253. scripts/generate_changelog.py +157 -0
  254. scripts/generate_crca_core_docs_from_schemas.py +86 -0
  255. scripts/run_crca_core_benchmarks.py +163 -0
  256. scripts/run_full_finetune.py +198 -0
  257. scripts/run_lrm_eval.py +31 -0
  258. templates/graph_management.py +29 -0
  259. tests/conftest.py +9 -0
  260. tests/test_core.py +2 -3
  261. tests/test_crca_core_discovery_tabular.py +15 -0
  262. tests/test_crca_core_estimate_dowhy.py +36 -0
  263. tests/test_crca_core_identify.py +18 -0
  264. tests/test_crca_core_intervention_design.py +36 -0
  265. tests/test_crca_core_linear_gaussian_scm.py +69 -0
  266. tests/test_crca_core_spec.py +25 -0
  267. tests/test_crca_core_timeseries_pcmci.py +15 -0
  268. tests/test_crca_llm_coauthor.py +12 -0
  269. tests/test_crca_llm_orchestrator.py +80 -0
  270. tests/test_hybrid_agent_llm_enhanced.py +556 -0
  271. tests/test_image_annotation_demo.py +376 -0
  272. tests/test_image_annotation_operational.py +408 -0
  273. tests/test_image_annotation_unit.py +551 -0
  274. tests/test_training_moe.py +13 -0
  275. training/__init__.py +42 -0
  276. training/datasets.py +140 -0
  277. training/deepspeed_zero2_0_5b.json +22 -0
  278. training/deepspeed_zero2_1_5b.json +22 -0
  279. training/deepspeed_zero3_0_5b.json +28 -0
  280. training/deepspeed_zero3_14b.json +28 -0
  281. training/deepspeed_zero3_h100_3gpu.json +20 -0
  282. training/deepspeed_zero3_offload.json +28 -0
  283. training/eval.py +92 -0
  284. training/finetune.py +516 -0
  285. training/public_datasets.py +89 -0
  286. training_data/react_train.jsonl +7473 -0
  287. utils/agent_discovery.py +311 -0
  288. utils/batch_processor.py +317 -0
  289. utils/conversation.py +78 -0
  290. utils/edit_distance.py +118 -0
  291. utils/formatter.py +33 -0
  292. utils/graph_reasoner.py +530 -0
  293. utils/rate_limiter.py +283 -0
  294. utils/router.py +2 -2
  295. utils/tool_discovery.py +307 -0
  296. webui/__init__.py +10 -0
  297. webui/app.py +229 -0
  298. webui/config.py +104 -0
  299. webui/static/css/style.css +332 -0
  300. webui/static/js/main.js +284 -0
  301. webui/templates/index.html +42 -0
  302. tests/test_crca_excel.py +0 -166
  303. tests/test_data_broker.py +0 -424
  304. tests/test_palantir.py +0 -349
  305. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
  306. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
utils/edit_distance.py ADDED
@@ -0,0 +1,118 @@
1
+ """
2
+ Edit distance utilities for text correction.
3
+
4
+ Provides Levenshtein and Damerau-Levenshtein distance calculations
5
+ for non-destructive text correction.
6
+ """
7
+
8
+ from typing import List, Tuple
9
+
10
+
11
+ def levenshtein_distance(s1: str, s2: str) -> int:
12
+ """
13
+ Calculate Levenshtein distance between two strings.
14
+
15
+ Args:
16
+ s1: First string
17
+ s2: Second string
18
+
19
+ Returns:
20
+ Levenshtein distance
21
+ """
22
+ if len(s1) < len(s2):
23
+ return levenshtein_distance(s2, s1)
24
+
25
+ if len(s2) == 0:
26
+ return len(s1)
27
+
28
+ previous_row = list(range(len(s2) + 1))
29
+ for i, c1 in enumerate(s1):
30
+ current_row = [i + 1]
31
+ for j, c2 in enumerate(s2):
32
+ insertions = previous_row[j + 1] + 1
33
+ deletions = current_row[j] + 1
34
+ substitutions = previous_row[j] + (c1 != c2)
35
+ current_row.append(min(insertions, deletions, substitutions))
36
+ previous_row = current_row
37
+
38
+ return previous_row[-1]
39
+
40
+
41
+ def damerau_levenshtein_distance(s1: str, s2: str) -> int:
42
+ """
43
+ Calculate Damerau-Levenshtein distance (includes transpositions).
44
+
45
+ Args:
46
+ s1: First string
47
+ s2: Second string
48
+
49
+ Returns:
50
+ Damerau-Levenshtein distance
51
+ """
52
+ if len(s1) < len(s2):
53
+ return damerau_levenshtein_distance(s2, s1)
54
+
55
+ if len(s2) == 0:
56
+ return len(s1)
57
+
58
+ # Create distance matrix
59
+ d = {}
60
+ maxdist = len(s1) + len(s2)
61
+ d[-1, -1] = maxdist
62
+
63
+ for i in range(len(s1) + 1):
64
+ d[i, -1] = maxdist
65
+ d[i, 0] = i
66
+ for j in range(len(s2) + 1):
67
+ d[-1, j] = maxdist
68
+ d[0, j] = j
69
+
70
+ # Dictionary of last occurrence of each character
71
+ last_row = {}
72
+
73
+ for i in range(len(s1)):
74
+ last_match_col = 0
75
+ for j in range(len(s2)):
76
+ last_match_row = last_row.get(s2[j], 0)
77
+ cost = 1 if s1[i] != s2[j] else 0
78
+ d[i, j] = min(
79
+ d[i - 1, j] + 1, # deletion
80
+ d[i, j - 1] + 1, # insertion
81
+ d[i - 1, j - 1] + cost # substitution
82
+ )
83
+ if i > 0 and j > 0 and s1[i] == s2[j - 1] and s1[i - 1] == s2[j]:
84
+ # Transposition
85
+ d[i, j] = min(d[i, j], d[last_match_row - 1, last_match_col - 1] + (i - last_match_row - 1) + 1 + (j - last_match_col - 1))
86
+ last_match_col = j
87
+ last_row[s1[i]] = i
88
+
89
+ return d[len(s1) - 1, len(s2) - 1]
90
+
91
+
92
+ def find_closest_match(word: str, candidates: List[str], max_distance: int = 3) -> Tuple[Optional[str], int]:
93
+ """
94
+ Find closest match for a word from candidate list.
95
+
96
+ Args:
97
+ word: Word to match
98
+ candidates: List of candidate words
99
+ max_distance: Maximum allowed edit distance
100
+
101
+ Returns:
102
+ Tuple of (closest_match, distance) or (None, max_distance) if no match found
103
+ """
104
+ best_match = None
105
+ best_distance = max_distance + 1
106
+
107
+ for candidate in candidates:
108
+ distance = damerau_levenshtein_distance(word.lower(), candidate.lower())
109
+ if distance < best_distance:
110
+ best_distance = distance
111
+ best_match = candidate
112
+ if distance == 0:
113
+ break
114
+
115
+ if best_distance <= max_distance:
116
+ return best_match, best_distance
117
+ else:
118
+ return None, best_distance
utils/formatter.py CHANGED
@@ -470,5 +470,38 @@ class Formatter:
470
470
  logger.info(f" - {step.get('step_id')} ({step.get('priority')}): {step.get('description')}")
471
471
 
472
472
 
473
+ def format_markdown_streaming(self, content: str, chunk_size: int = 10) -> str:
474
+ """Format content for streaming markdown output.
475
+
476
+ Args:
477
+ content: Content to format
478
+ chunk_size: Size of chunks for streaming
479
+
480
+ Returns:
481
+ Formatted markdown content
482
+ """
483
+ if not content:
484
+ return ""
485
+
486
+ # Clean and format for streaming
487
+ cleaned = self.markdown_handler._clean_output(content) if self.markdown_handler else content
488
+ return cleaned
489
+
490
+ def create_custom_formatter(self, style: str = "blue", border_style: str = "blue") -> Callable:
491
+ """Create a custom formatter function with specific styling.
492
+
493
+ Args:
494
+ style: Text style
495
+ border_style: Border style
496
+
497
+ Returns:
498
+ Custom formatter function
499
+ """
500
+ def custom_format(content: str, title: str = "") -> None:
501
+ self.print_panel(content, title, border_style)
502
+
503
+ return custom_format
504
+
505
+
473
506
  # Global formatter instance with markdown disabled by default
474
507
  formatter = Formatter(md=False)
@@ -0,0 +1,530 @@
1
+ """
2
+ Graph-first reasoning engine for CRCA.
3
+
4
+ A standalone module that provides pure graph-based reasoning capabilities.
5
+ All answers come from graph state only - never directly from text parsing.
6
+
7
+ This module is designed to be reusable across CRCA components and works
8
+ with any GraphManager instance.
9
+ """
10
+
11
+ from typing import Dict, List, Optional, Tuple, Any, Set
12
+ import logging
13
+ from collections import deque, defaultdict
14
+
15
+ from templates.graph_management import GraphManager
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class GraphFirstReasoner:
21
+ """
22
+ Graph-first reasoning engine that ONLY uses graph state for answers.
23
+
24
+ Never parses text directly for answers. All reasoning is derived from
25
+ graph structure, edge strengths, temporal relationships, and graph traversal.
26
+
27
+ This is a standalone utility that can be used by:
28
+ - Hybrid agent
29
+ - Other CRCA components
30
+ - Any code needing graph-based reasoning
31
+ """
32
+
33
+ def __init__(self, graph_manager: Optional[GraphManager] = None):
34
+ """
35
+ Initialize the graph-first reasoner.
36
+
37
+ Args:
38
+ graph_manager: Optional GraphManager instance. If None, must be provided in method calls.
39
+ """
40
+ self.graph_manager = graph_manager
41
+
42
+ def reason_from_graph(
43
+ self,
44
+ task: str,
45
+ graph_state: Optional[Dict[str, Any]] = None,
46
+ graph_manager: Optional[GraphManager] = None
47
+ ) -> Dict[str, Any]:
48
+ """
49
+ Reason about a task using ONLY graph state.
50
+
51
+ This method never parses the task text directly for answers.
52
+ It queries the graph structure to derive answers.
53
+
54
+ Args:
55
+ task: Natural language task (used for intent detection only, not for answers)
56
+ graph_state: Optional explicit graph state dict
57
+ graph_manager: GraphManager instance (uses self.graph_manager if not provided)
58
+
59
+ Returns:
60
+ Dictionary with reasoning results derived from graph state
61
+ """
62
+ gm = graph_manager or self.graph_manager
63
+ if gm is None:
64
+ raise ValueError("GraphManager must be provided either in __init__ or as parameter")
65
+
66
+ # Extract graph state from GraphManager if not provided
67
+ if graph_state is None:
68
+ graph_state = self._extract_graph_state(gm)
69
+
70
+ # Detect query intent from task (but don't use task for answers)
71
+ intent = self._detect_query_intent(task)
72
+
73
+ # Reason from graph state only
74
+ result = self._reason_from_graph_state(graph_state, intent, gm)
75
+
76
+ return result
77
+
78
+ def query_causal_path(
79
+ self,
80
+ source: str,
81
+ target: str,
82
+ graph_manager: Optional[GraphManager] = None
83
+ ) -> List[str]:
84
+ """
85
+ Query for a causal path from source to target using graph structure.
86
+
87
+ Args:
88
+ source: Source node
89
+ target: Target node
90
+ graph_manager: GraphManager instance (uses self.graph_manager if not provided)
91
+
92
+ Returns:
93
+ List of nodes forming the causal path, or empty list if no path exists
94
+ """
95
+ gm = graph_manager or self.graph_manager
96
+ if gm is None:
97
+ raise ValueError("GraphManager must be provided")
98
+
99
+ # Use GraphManager's path finding
100
+ path = gm.identify_path(source, target)
101
+
102
+ # If no direct path, try to find indirect paths through intermediate nodes
103
+ if not path:
104
+ path = self._find_indirect_path(source, target, gm)
105
+
106
+ return path
107
+
108
+ def query_effects(
109
+ self,
110
+ intervention: Dict[str, float],
111
+ graph_manager: Optional[GraphManager] = None,
112
+ max_depth: int = 5
113
+ ) -> Dict[str, float]:
114
+ """
115
+ Query for effects of an intervention using graph traversal.
116
+
117
+ Args:
118
+ intervention: Dictionary mapping variable names to intervention values
119
+ graph_manager: GraphManager instance (uses self.graph_manager if not provided)
120
+ max_depth: Maximum depth to traverse from intervention nodes
121
+
122
+ Returns:
123
+ Dictionary mapping affected variables to their expected values
124
+ """
125
+ gm = graph_manager or self.graph_manager
126
+ if gm is None:
127
+ raise ValueError("GraphManager must be provided")
128
+
129
+ effects: Dict[str, float] = {}
130
+ visited: Set[str] = set()
131
+
132
+ # Start from intervention nodes
133
+ queue: deque = deque([(node, value, 0) for node, value in intervention.items()])
134
+
135
+ while queue:
136
+ current_node, current_value, depth = queue.popleft()
137
+
138
+ if depth > max_depth or current_node in visited:
139
+ continue
140
+
141
+ visited.add(current_node)
142
+
143
+ # Store effect
144
+ if current_node not in intervention: # Don't overwrite intervention values
145
+ effects[current_node] = current_value
146
+
147
+ # Traverse to children
148
+ children = gm.get_children(current_node)
149
+ for child in children:
150
+ if child in visited:
151
+ continue
152
+
153
+ # Get edge strength
154
+ edge_strength = gm.edge_strength(current_node, child)
155
+
156
+ # Calculate effect (simple linear propagation)
157
+ child_value = current_value * edge_strength
158
+
159
+ queue.append((child, child_value, depth + 1))
160
+
161
+ return effects
162
+
163
+ def query_temporal_sequence(
164
+ self,
165
+ variable: str,
166
+ time_horizon: int,
167
+ graph_manager: Optional[GraphManager] = None
168
+ ) -> Dict[int, float]:
169
+ """
170
+ Query for temporal sequence of a variable over time.
171
+
172
+ Args:
173
+ variable: Variable to track
174
+ time_horizon: Number of time steps to project
175
+ graph_manager: GraphManager instance (uses self.graph_manager if not provided)
176
+
177
+ Returns:
178
+ Dictionary mapping time step to expected value
179
+ """
180
+ gm = graph_manager or self.graph_manager
181
+ if gm is None:
182
+ raise ValueError("GraphManager must be provided")
183
+
184
+ sequence: Dict[int, float] = {}
185
+
186
+ # Get initial value from graph (if available)
187
+ initial_value = 1.0 # Default
188
+
189
+ # Check for temporal edges affecting this variable
190
+ parents = gm.get_parents(variable)
191
+
192
+ for t in range(time_horizon):
193
+ value = initial_value
194
+
195
+ # Aggregate effects from parents
196
+ for parent in parents:
197
+ edge_strength = gm.edge_strength(parent, variable)
198
+ parent_value = sequence.get(t - 1, initial_value) if t > 0 else initial_value
199
+ value += parent_value * edge_strength
200
+
201
+ sequence[t] = value
202
+
203
+ return sequence
204
+
205
+ def query_feedback_loops(
206
+ self,
207
+ variable: str,
208
+ graph_manager: Optional[GraphManager] = None
209
+ ) -> List[Dict[str, Any]]:
210
+ """
211
+ Query for feedback loops involving a variable.
212
+
213
+ Args:
214
+ variable: Variable to check for feedback loops
215
+ graph_manager: GraphManager instance (uses self.graph_manager if not provided)
216
+
217
+ Returns:
218
+ List of feedback loop descriptions
219
+ """
220
+ gm = graph_manager or self.graph_manager
221
+ if gm is None:
222
+ raise ValueError("GraphManager must be provided")
223
+
224
+ loops: List[Dict[str, Any]] = []
225
+
226
+ # Find cycles involving this variable
227
+ visited: Set[str] = set()
228
+ path: List[str] = []
229
+
230
+ def find_cycles(node: str, target: str) -> None:
231
+ """Find cycles starting from node that return to target."""
232
+ if node in visited:
233
+ if node == target and len(path) > 1:
234
+ # Found a cycle
235
+ loop_path = path + [target]
236
+ loops.append({
237
+ "path": loop_path,
238
+ "length": len(loop_path) - 1,
239
+ "strength": self._calculate_loop_strength(loop_path, gm)
240
+ })
241
+ return
242
+
243
+ visited.add(node)
244
+ path.append(node)
245
+
246
+ children = gm.get_children(node)
247
+ for child in children:
248
+ find_cycles(child, target)
249
+
250
+ path.pop()
251
+ visited.remove(node)
252
+
253
+ find_cycles(variable, variable)
254
+
255
+ return loops
256
+
257
+ def query_graph_state(
258
+ self,
259
+ question: str,
260
+ graph_manager: Optional[GraphManager] = None
261
+ ) -> Dict[str, Any]:
262
+ """
263
+ Query graph state to answer a question.
264
+
265
+ Args:
266
+ question: Natural language question
267
+ graph_manager: GraphManager instance (uses self.graph_manager if not provided)
268
+
269
+ Returns:
270
+ Dictionary with answer derived from graph state
271
+ """
272
+ gm = graph_manager or self.graph_manager
273
+ if gm is None:
274
+ raise ValueError("GraphManager must be provided")
275
+
276
+ # Extract graph state
277
+ graph_state = self._extract_graph_state(gm)
278
+
279
+ # Detect query intent
280
+ intent = self._detect_query_intent(question)
281
+
282
+ # Answer from graph state
283
+ answer = self._reason_from_graph_state(graph_state, intent, gm)
284
+
285
+ return answer
286
+
287
+ def reason_from_graph_state(
288
+ self,
289
+ state: Dict[str, Any],
290
+ query: str,
291
+ graph_manager: Optional[GraphManager] = None
292
+ ) -> Dict[str, Any]:
293
+ """
294
+ Pure graph reasoning from explicit graph state.
295
+
296
+ Args:
297
+ state: Graph state dictionary
298
+ query: Query string (for intent detection)
299
+ graph_manager: GraphManager instance (uses self.graph_manager if not provided)
300
+
301
+ Returns:
302
+ Dictionary with reasoning results
303
+ """
304
+ gm = graph_manager or self.graph_manager
305
+ if gm is None:
306
+ raise ValueError("GraphManager must be provided")
307
+
308
+ intent = self._detect_query_intent(query)
309
+
310
+ return self._reason_from_graph_state(state, intent, gm)
311
+
312
+ # Private helper methods
313
+
314
+ def _extract_graph_state(self, graph_manager: GraphManager) -> Dict[str, Any]:
315
+ """
316
+ Extract current graph state from GraphManager.
317
+
318
+ Args:
319
+ graph_manager: GraphManager instance
320
+
321
+ Returns:
322
+ Dictionary representing graph state
323
+ """
324
+ nodes = graph_manager.get_nodes()
325
+ edges = graph_manager.get_edges()
326
+
327
+ # Build edge structure with metadata
328
+ edge_data = {}
329
+ for source, target in edges:
330
+ edge_meta = graph_manager.graph.get(source, {}).get(target, {})
331
+ edge_data[(source, target)] = {
332
+ "strength": edge_meta.get("strength", 1.0),
333
+ "confidence": edge_meta.get("confidence", 1.0),
334
+ "relation_type": edge_meta.get("relation_type", "causal"),
335
+ **{k: v for k, v in edge_meta.items() if k not in ["strength", "confidence", "relation_type"]}
336
+ }
337
+
338
+ return {
339
+ "nodes": nodes,
340
+ "edges": edges,
341
+ "edge_data": edge_data,
342
+ "topological_order": graph_manager.topological_sort(),
343
+ "is_dag": graph_manager.is_dag()
344
+ }
345
+
346
+ def _detect_query_intent(self, query: str) -> Dict[str, Any]:
347
+ """
348
+ Detect intent from query (but don't use query for answers).
349
+
350
+ Args:
351
+ query: Query string
352
+
353
+ Returns:
354
+ Dictionary with intent information
355
+ """
356
+ query_lower = query.lower()
357
+
358
+ intent = {
359
+ "type": "analysis", # default
360
+ "question_type": None,
361
+ "target_variables": [],
362
+ "intervention_variables": [],
363
+ "temporal": False
364
+ }
365
+
366
+ # Question type detection
367
+ if any(word in query_lower for word in ["what", "which", "who"]):
368
+ intent["question_type"] = "what"
369
+ elif any(word in query_lower for word in ["how", "why"]):
370
+ intent["question_type"] = "how"
371
+ elif any(word in query_lower for word in ["when", "where"]):
372
+ intent["question_type"] = "when_where"
373
+
374
+ # Intent type detection
375
+ if any(word in query_lower for word in ["predict", "forecast", "estimate", "will", "would"]):
376
+ intent["type"] = "prediction"
377
+ elif any(word in query_lower for word in ["what if", "if", "suppose", "assume"]):
378
+ intent["type"] = "counterfactual"
379
+ elif any(word in query_lower for word in ["effect", "impact", "influence"]):
380
+ intent["type"] = "effect_analysis"
381
+ elif any(word in query_lower for word in ["path", "connection", "link"]):
382
+ intent["type"] = "path_query"
383
+ elif any(word in query_lower for word in ["feedback", "loop", "cycle"]):
384
+ intent["type"] = "feedback_analysis"
385
+
386
+ # Temporal detection
387
+ if any(word in query_lower for word in ["before", "after", "delay", "time", "days", "hours"]):
388
+ intent["temporal"] = True
389
+
390
+ return intent
391
+
392
+ def _reason_from_graph_state(
393
+ self,
394
+ graph_state: Dict[str, Any],
395
+ intent: Dict[str, Any],
396
+ graph_manager: GraphManager
397
+ ) -> Dict[str, Any]:
398
+ """
399
+ Core reasoning logic using graph state only.
400
+
401
+ Args:
402
+ graph_state: Graph state dictionary
403
+ intent: Intent dictionary
404
+ graph_manager: GraphManager instance
405
+
406
+ Returns:
407
+ Dictionary with reasoning results
408
+ """
409
+ result = {
410
+ "reasoning_type": intent["type"],
411
+ "graph_nodes": graph_state["nodes"],
412
+ "graph_edges": graph_state["edges"],
413
+ "answer": None,
414
+ "confidence": 1.0,
415
+ "supporting_evidence": []
416
+ }
417
+
418
+ intent_type = intent["type"]
419
+
420
+ if intent_type == "path_query":
421
+ # Find paths between variables mentioned in query
422
+ # This is a simplified version - in practice, extract variables from query
423
+ if len(graph_state["nodes"]) >= 2:
424
+ path = self.query_causal_path(
425
+ graph_state["nodes"][0],
426
+ graph_state["nodes"][-1],
427
+ graph_manager
428
+ )
429
+ result["answer"] = f"Path found: {' -> '.join(path)}" if path else "No path found"
430
+ result["supporting_evidence"] = [{"type": "path", "path": path}]
431
+
432
+ elif intent_type == "effect_analysis":
433
+ # Analyze effects from graph structure
434
+ if graph_state["nodes"]:
435
+ # Use first node as example (in practice, extract from query)
436
+ source = graph_state["nodes"][0]
437
+ effects = self.query_effects({source: 1.0}, graph_manager)
438
+ result["answer"] = f"Effects of {source}: {effects}"
439
+ result["supporting_evidence"] = [{"type": "effects", "effects": effects}]
440
+
441
+ elif intent_type == "feedback_analysis":
442
+ # Find feedback loops
443
+ if graph_state["nodes"]:
444
+ loops = self.query_feedback_loops(graph_state["nodes"][0], graph_manager)
445
+ result["answer"] = f"Found {len(loops)} feedback loops"
446
+ result["supporting_evidence"] = [{"type": "feedback_loops", "loops": loops}]
447
+
448
+ elif intent_type == "prediction":
449
+ # Make prediction from graph structure
450
+ if graph_state["nodes"]:
451
+ sequence = self.query_temporal_sequence(graph_state["nodes"][0], 10, graph_manager)
452
+ result["answer"] = f"Temporal sequence: {sequence}"
453
+ result["supporting_evidence"] = [{"type": "temporal_sequence", "sequence": sequence}]
454
+
455
+ else:
456
+ # Default: general graph analysis
457
+ result["answer"] = f"Graph contains {len(graph_state['nodes'])} nodes and {len(graph_state['edges'])} edges"
458
+ result["supporting_evidence"] = [
459
+ {"type": "graph_structure", "nodes": graph_state["nodes"], "edges": graph_state["edges"]}
460
+ ]
461
+
462
+ return result
463
+
464
+ def _find_indirect_path(
465
+ self,
466
+ source: str,
467
+ target: str,
468
+ graph_manager: GraphManager,
469
+ max_depth: int = 10
470
+ ) -> List[str]:
471
+ """
472
+ Find indirect path through intermediate nodes.
473
+
474
+ Args:
475
+ source: Source node
476
+ target: Target node
477
+ graph_manager: GraphManager instance
478
+ max_depth: Maximum search depth
479
+
480
+ Returns:
481
+ Path as list of nodes, or empty list
482
+ """
483
+ # Use BFS to find shortest path
484
+ queue: deque = deque([(source, [source])])
485
+ visited: Set[str] = {source}
486
+
487
+ while queue:
488
+ current, path = queue.popleft()
489
+
490
+ if len(path) > max_depth:
491
+ continue
492
+
493
+ children = graph_manager.get_children(current)
494
+ for child in children:
495
+ if child == target:
496
+ return path + [child]
497
+
498
+ if child not in visited:
499
+ visited.add(child)
500
+ queue.append((child, path + [child]))
501
+
502
+ return []
503
+
504
+ def _calculate_loop_strength(self, loop_path: List[str], graph_manager: GraphManager) -> float:
505
+ """
506
+ Calculate overall strength of a feedback loop.
507
+
508
+ Args:
509
+ loop_path: List of nodes forming the loop
510
+ graph_manager: GraphManager instance
511
+
512
+ Returns:
513
+ Overall loop strength
514
+ """
515
+ if len(loop_path) < 2:
516
+ return 0.0
517
+
518
+ strengths = []
519
+ for i in range(len(loop_path) - 1):
520
+ source = loop_path[i]
521
+ target = loop_path[i + 1]
522
+ strength = graph_manager.edge_strength(source, target)
523
+ strengths.append(strength)
524
+
525
+ # Multiply all edge strengths (assuming independence)
526
+ total_strength = 1.0
527
+ for s in strengths:
528
+ total_strength *= s
529
+
530
+ return total_strength