crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. CRCA.py +172 -7
  2. MODEL_CARD.md +53 -0
  3. PKG-INFO +8 -2
  4. RELEASE_NOTES.md +17 -0
  5. STABILITY.md +19 -0
  6. architecture/hybrid/consistency_engine.py +362 -0
  7. architecture/hybrid/conversation_manager.py +421 -0
  8. architecture/hybrid/explanation_generator.py +452 -0
  9. architecture/hybrid/few_shot_learner.py +533 -0
  10. architecture/hybrid/graph_compressor.py +286 -0
  11. architecture/hybrid/hybrid_agent.py +4398 -0
  12. architecture/hybrid/language_compiler.py +623 -0
  13. architecture/hybrid/main,py +0 -0
  14. architecture/hybrid/reasoning_tracker.py +322 -0
  15. architecture/hybrid/self_verifier.py +524 -0
  16. architecture/hybrid/task_decomposer.py +567 -0
  17. architecture/hybrid/text_corrector.py +341 -0
  18. benchmark_results/crca_core_benchmarks.json +178 -0
  19. branches/crca_sd/crca_sd_realtime.py +6 -2
  20. branches/general_agent/__init__.py +102 -0
  21. branches/general_agent/general_agent.py +1400 -0
  22. branches/general_agent/personality.py +169 -0
  23. branches/general_agent/utils/__init__.py +19 -0
  24. branches/general_agent/utils/prompt_builder.py +170 -0
  25. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
  26. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
  27. crca_core/__init__.py +35 -0
  28. crca_core/benchmarks/__init__.py +14 -0
  29. crca_core/benchmarks/synthetic_scm.py +103 -0
  30. crca_core/core/__init__.py +23 -0
  31. crca_core/core/api.py +120 -0
  32. crca_core/core/estimate.py +208 -0
  33. crca_core/core/godclass.py +72 -0
  34. crca_core/core/intervention_design.py +174 -0
  35. crca_core/core/lifecycle.py +48 -0
  36. crca_core/discovery/__init__.py +9 -0
  37. crca_core/discovery/tabular.py +193 -0
  38. crca_core/identify/__init__.py +171 -0
  39. crca_core/identify/backdoor.py +39 -0
  40. crca_core/identify/frontdoor.py +48 -0
  41. crca_core/identify/graph.py +106 -0
  42. crca_core/identify/id_algorithm.py +43 -0
  43. crca_core/identify/iv.py +48 -0
  44. crca_core/models/__init__.py +67 -0
  45. crca_core/models/provenance.py +56 -0
  46. crca_core/models/refusal.py +39 -0
  47. crca_core/models/result.py +83 -0
  48. crca_core/models/spec.py +151 -0
  49. crca_core/models/validation.py +68 -0
  50. crca_core/scm/__init__.py +9 -0
  51. crca_core/scm/linear_gaussian.py +198 -0
  52. crca_core/timeseries/__init__.py +6 -0
  53. crca_core/timeseries/pcmci.py +181 -0
  54. crca_llm/__init__.py +12 -0
  55. crca_llm/client.py +85 -0
  56. crca_llm/coauthor.py +118 -0
  57. crca_llm/orchestrator.py +289 -0
  58. crca_llm/types.py +21 -0
  59. crca_reasoning/__init__.py +16 -0
  60. crca_reasoning/critique.py +54 -0
  61. crca_reasoning/godclass.py +206 -0
  62. crca_reasoning/memory.py +24 -0
  63. crca_reasoning/rationale.py +10 -0
  64. crca_reasoning/react_controller.py +81 -0
  65. crca_reasoning/tool_router.py +97 -0
  66. crca_reasoning/types.py +40 -0
  67. crca_sd/__init__.py +15 -0
  68. crca_sd/crca_sd_core.py +2 -0
  69. crca_sd/crca_sd_governance.py +2 -0
  70. crca_sd/crca_sd_mpc.py +2 -0
  71. crca_sd/crca_sd_realtime.py +2 -0
  72. crca_sd/crca_sd_tui.py +2 -0
  73. cuda-keyring_1.1-1_all.deb +0 -0
  74. cuda-keyring_1.1-1_all.deb.1 +0 -0
  75. docs/IMAGE_ANNOTATION_USAGE.md +539 -0
  76. docs/INSTALL_DEEPSPEED.md +125 -0
  77. docs/api/branches/crca-cg.md +19 -0
  78. docs/api/branches/crca-q.md +27 -0
  79. docs/api/branches/crca-sd.md +37 -0
  80. docs/api/branches/general-agent.md +24 -0
  81. docs/api/branches/overview.md +19 -0
  82. docs/api/crca/agent-methods.md +62 -0
  83. docs/api/crca/operations.md +79 -0
  84. docs/api/crca/overview.md +32 -0
  85. docs/api/image-annotation/engine.md +52 -0
  86. docs/api/image-annotation/overview.md +17 -0
  87. docs/api/schemas/annotation.md +34 -0
  88. docs/api/schemas/core-schemas.md +82 -0
  89. docs/api/schemas/overview.md +32 -0
  90. docs/api/schemas/policy.md +30 -0
  91. docs/api/utils/conversation.md +22 -0
  92. docs/api/utils/graph-reasoner.md +32 -0
  93. docs/api/utils/overview.md +21 -0
  94. docs/api/utils/router.md +19 -0
  95. docs/api/utils/utilities.md +97 -0
  96. docs/architecture/causal-graphs.md +41 -0
  97. docs/architecture/data-flow.md +29 -0
  98. docs/architecture/design-principles.md +33 -0
  99. docs/architecture/hybrid-agent/components.md +38 -0
  100. docs/architecture/hybrid-agent/consistency.md +26 -0
  101. docs/architecture/hybrid-agent/overview.md +44 -0
  102. docs/architecture/hybrid-agent/reasoning.md +22 -0
  103. docs/architecture/llm-integration.md +26 -0
  104. docs/architecture/modular-structure.md +37 -0
  105. docs/architecture/overview.md +69 -0
  106. docs/architecture/policy-engine-arch.md +29 -0
  107. docs/branches/crca-cg/corposwarm.md +39 -0
  108. docs/branches/crca-cg/esg-scoring.md +30 -0
  109. docs/branches/crca-cg/multi-agent.md +35 -0
  110. docs/branches/crca-cg/overview.md +40 -0
  111. docs/branches/crca-q/alternative-data.md +55 -0
  112. docs/branches/crca-q/architecture.md +71 -0
  113. docs/branches/crca-q/backtesting.md +45 -0
  114. docs/branches/crca-q/causal-engine.md +33 -0
  115. docs/branches/crca-q/execution.md +39 -0
  116. docs/branches/crca-q/market-data.md +60 -0
  117. docs/branches/crca-q/overview.md +58 -0
  118. docs/branches/crca-q/philosophy.md +60 -0
  119. docs/branches/crca-q/portfolio-optimization.md +66 -0
  120. docs/branches/crca-q/risk-management.md +102 -0
  121. docs/branches/crca-q/setup.md +65 -0
  122. docs/branches/crca-q/signal-generation.md +61 -0
  123. docs/branches/crca-q/signal-validation.md +43 -0
  124. docs/branches/crca-sd/core.md +84 -0
  125. docs/branches/crca-sd/governance.md +53 -0
  126. docs/branches/crca-sd/mpc-solver.md +65 -0
  127. docs/branches/crca-sd/overview.md +59 -0
  128. docs/branches/crca-sd/realtime.md +28 -0
  129. docs/branches/crca-sd/tui.md +20 -0
  130. docs/branches/general-agent/overview.md +37 -0
  131. docs/branches/general-agent/personality.md +36 -0
  132. docs/branches/general-agent/prompt-builder.md +30 -0
  133. docs/changelog/index.md +79 -0
  134. docs/contributing/code-style.md +69 -0
  135. docs/contributing/documentation.md +43 -0
  136. docs/contributing/overview.md +29 -0
  137. docs/contributing/testing.md +29 -0
  138. docs/core/crcagent/async-operations.md +65 -0
  139. docs/core/crcagent/automatic-extraction.md +107 -0
  140. docs/core/crcagent/batch-prediction.md +80 -0
  141. docs/core/crcagent/bayesian-inference.md +60 -0
  142. docs/core/crcagent/causal-graph.md +92 -0
  143. docs/core/crcagent/counterfactuals.md +96 -0
  144. docs/core/crcagent/deterministic-simulation.md +78 -0
  145. docs/core/crcagent/dual-mode-operation.md +82 -0
  146. docs/core/crcagent/initialization.md +88 -0
  147. docs/core/crcagent/optimization.md +65 -0
  148. docs/core/crcagent/overview.md +63 -0
  149. docs/core/crcagent/time-series.md +57 -0
  150. docs/core/schemas/annotation.md +30 -0
  151. docs/core/schemas/core-schemas.md +82 -0
  152. docs/core/schemas/overview.md +30 -0
  153. docs/core/schemas/policy.md +41 -0
  154. docs/core/templates/base-agent.md +31 -0
  155. docs/core/templates/feature-mixins.md +31 -0
  156. docs/core/templates/overview.md +29 -0
  157. docs/core/templates/templates-guide.md +75 -0
  158. docs/core/tools/mcp-client.md +34 -0
  159. docs/core/tools/overview.md +24 -0
  160. docs/core/utils/conversation.md +27 -0
  161. docs/core/utils/graph-reasoner.md +29 -0
  162. docs/core/utils/overview.md +27 -0
  163. docs/core/utils/router.md +27 -0
  164. docs/core/utils/utilities.md +97 -0
  165. docs/css/custom.css +84 -0
  166. docs/examples/basic-usage.md +57 -0
  167. docs/examples/general-agent/general-agent-examples.md +50 -0
  168. docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
  169. docs/examples/image-annotation/image-annotation-examples.md +54 -0
  170. docs/examples/integration/integration-examples.md +58 -0
  171. docs/examples/overview.md +37 -0
  172. docs/examples/trading/trading-examples.md +46 -0
  173. docs/features/causal-reasoning/advanced-topics.md +101 -0
  174. docs/features/causal-reasoning/counterfactuals.md +43 -0
  175. docs/features/causal-reasoning/do-calculus.md +50 -0
  176. docs/features/causal-reasoning/overview.md +47 -0
  177. docs/features/causal-reasoning/structural-models.md +52 -0
  178. docs/features/hybrid-agent/advanced-components.md +55 -0
  179. docs/features/hybrid-agent/core-components.md +64 -0
  180. docs/features/hybrid-agent/overview.md +34 -0
  181. docs/features/image-annotation/engine.md +82 -0
  182. docs/features/image-annotation/features.md +113 -0
  183. docs/features/image-annotation/integration.md +75 -0
  184. docs/features/image-annotation/overview.md +53 -0
  185. docs/features/image-annotation/quickstart.md +73 -0
  186. docs/features/policy-engine/doctrine-ledger.md +105 -0
  187. docs/features/policy-engine/monitoring.md +44 -0
  188. docs/features/policy-engine/mpc-control.md +89 -0
  189. docs/features/policy-engine/overview.md +46 -0
  190. docs/getting-started/configuration.md +225 -0
  191. docs/getting-started/first-agent.md +164 -0
  192. docs/getting-started/installation.md +144 -0
  193. docs/getting-started/quickstart.md +137 -0
  194. docs/index.md +118 -0
  195. docs/js/mathjax.js +13 -0
  196. docs/lrm/discovery_proof_notes.md +25 -0
  197. docs/lrm/finetune_full.md +83 -0
  198. docs/lrm/math_appendix.md +120 -0
  199. docs/lrm/overview.md +32 -0
  200. docs/mkdocs.yml +238 -0
  201. docs/stylesheets/extra.css +21 -0
  202. docs_generated/crca_core/CounterfactualResult.md +12 -0
  203. docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
  204. docs_generated/crca_core/DraftSpec.md +13 -0
  205. docs_generated/crca_core/EstimateResult.md +13 -0
  206. docs_generated/crca_core/IdentificationResult.md +17 -0
  207. docs_generated/crca_core/InterventionDesignResult.md +12 -0
  208. docs_generated/crca_core/LockedSpec.md +15 -0
  209. docs_generated/crca_core/RefusalResult.md +12 -0
  210. docs_generated/crca_core/ValidationReport.md +9 -0
  211. docs_generated/crca_core/index.md +13 -0
  212. examples/general_agent_example.py +277 -0
  213. examples/general_agent_quickstart.py +202 -0
  214. examples/general_agent_simple.py +92 -0
  215. examples/hybrid_agent_auto_extraction.py +84 -0
  216. examples/hybrid_agent_dictionary_demo.py +104 -0
  217. examples/hybrid_agent_enhanced.py +179 -0
  218. examples/hybrid_agent_general_knowledge.py +107 -0
  219. examples/image_annotation_quickstart.py +328 -0
  220. examples/test_hybrid_fixes.py +77 -0
  221. image_annotation/__init__.py +27 -0
  222. image_annotation/annotation_engine.py +2593 -0
  223. install_cuda_wsl2.sh +59 -0
  224. install_deepspeed.sh +56 -0
  225. install_deepspeed_simple.sh +87 -0
  226. mkdocs.yml +252 -0
  227. ollama/Modelfile +8 -0
  228. prompts/__init__.py +2 -1
  229. prompts/default_crca.py +9 -1
  230. prompts/general_agent.py +227 -0
  231. prompts/image_annotation.py +56 -0
  232. pyproject.toml +17 -2
  233. requirements-docs.txt +10 -0
  234. requirements.txt +21 -2
  235. schemas/__init__.py +26 -1
  236. schemas/annotation.py +222 -0
  237. schemas/conversation.py +193 -0
  238. schemas/hybrid.py +211 -0
  239. schemas/reasoning.py +276 -0
  240. schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
  241. schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
  242. schemas_export/crca_core/DraftSpec.schema.json +635 -0
  243. schemas_export/crca_core/EstimateResult.schema.json +113 -0
  244. schemas_export/crca_core/IdentificationResult.schema.json +145 -0
  245. schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
  246. schemas_export/crca_core/LockedSpec.schema.json +646 -0
  247. schemas_export/crca_core/RefusalResult.schema.json +90 -0
  248. schemas_export/crca_core/ValidationReport.schema.json +62 -0
  249. scripts/build_lrm_dataset.py +80 -0
  250. scripts/export_crca_core_schemas.py +54 -0
  251. scripts/export_hf_lrm.py +37 -0
  252. scripts/export_ollama_gguf.py +45 -0
  253. scripts/generate_changelog.py +157 -0
  254. scripts/generate_crca_core_docs_from_schemas.py +86 -0
  255. scripts/run_crca_core_benchmarks.py +163 -0
  256. scripts/run_full_finetune.py +198 -0
  257. scripts/run_lrm_eval.py +31 -0
  258. templates/graph_management.py +29 -0
  259. tests/conftest.py +9 -0
  260. tests/test_core.py +2 -3
  261. tests/test_crca_core_discovery_tabular.py +15 -0
  262. tests/test_crca_core_estimate_dowhy.py +36 -0
  263. tests/test_crca_core_identify.py +18 -0
  264. tests/test_crca_core_intervention_design.py +36 -0
  265. tests/test_crca_core_linear_gaussian_scm.py +69 -0
  266. tests/test_crca_core_spec.py +25 -0
  267. tests/test_crca_core_timeseries_pcmci.py +15 -0
  268. tests/test_crca_llm_coauthor.py +12 -0
  269. tests/test_crca_llm_orchestrator.py +80 -0
  270. tests/test_hybrid_agent_llm_enhanced.py +556 -0
  271. tests/test_image_annotation_demo.py +376 -0
  272. tests/test_image_annotation_operational.py +408 -0
  273. tests/test_image_annotation_unit.py +551 -0
  274. tests/test_training_moe.py +13 -0
  275. training/__init__.py +42 -0
  276. training/datasets.py +140 -0
  277. training/deepspeed_zero2_0_5b.json +22 -0
  278. training/deepspeed_zero2_1_5b.json +22 -0
  279. training/deepspeed_zero3_0_5b.json +28 -0
  280. training/deepspeed_zero3_14b.json +28 -0
  281. training/deepspeed_zero3_h100_3gpu.json +20 -0
  282. training/deepspeed_zero3_offload.json +28 -0
  283. training/eval.py +92 -0
  284. training/finetune.py +516 -0
  285. training/public_datasets.py +89 -0
  286. training_data/react_train.jsonl +7473 -0
  287. utils/agent_discovery.py +311 -0
  288. utils/batch_processor.py +317 -0
  289. utils/conversation.py +78 -0
  290. utils/edit_distance.py +118 -0
  291. utils/formatter.py +33 -0
  292. utils/graph_reasoner.py +530 -0
  293. utils/rate_limiter.py +283 -0
  294. utils/router.py +2 -2
  295. utils/tool_discovery.py +307 -0
  296. webui/__init__.py +10 -0
  297. webui/app.py +229 -0
  298. webui/config.py +104 -0
  299. webui/static/css/style.css +332 -0
  300. webui/static/js/main.js +284 -0
  301. webui/templates/index.html +42 -0
  302. tests/test_crca_excel.py +0 -166
  303. tests/test_data_broker.py +0 -424
  304. tests/test_palantir.py +0 -349
  305. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
  306. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,193 @@
1
+ """
2
+ Schema definitions for conversation management and context tracking.
3
+
4
+ Implements data structures for multi-turn conversations with memory networks,
5
+ attention mechanisms, and coreference resolution.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Dict, List, Optional, Any, Tuple, Set
10
+ from enum import Enum
11
+ import time
12
+
13
+
14
+ class MessageRole(Enum):
15
+ """Role of message sender."""
16
+ USER = "user"
17
+ AGENT = "agent"
18
+ SYSTEM = "system"
19
+
20
+
21
+ @dataclass
22
+ class ConversationMessage:
23
+ """
24
+ Represents a single message in a conversation.
25
+
26
+ Attributes:
27
+ role: Role of the message sender (user/agent/system)
28
+ content: Message text content
29
+ timestamp: When message was sent (Unix timestamp)
30
+ metadata: Additional metadata (variables extracted, graph state, etc.)
31
+ message_id: Unique identifier for this message
32
+ """
33
+ role: MessageRole
34
+ content: str
35
+ timestamp: float = field(default_factory=time.time)
36
+ metadata: Dict[str, Any] = field(default_factory=dict)
37
+ message_id: Optional[str] = None
38
+
39
+ def __post_init__(self):
40
+ """Generate message ID if not provided."""
41
+ if self.message_id is None:
42
+ self.message_id = f"{self.role.value}_{int(self.timestamp * 1000)}"
43
+
44
+
45
+ @dataclass
46
+ class VariableMapping:
47
+ """
48
+ Maps variables across conversation turns.
49
+
50
+ Tracks variable evolution: φ: V_t → V_{t+1}
51
+
52
+ Attributes:
53
+ source_variable: Variable name at turn t
54
+ target_variable: Variable name at turn t+1
55
+ confidence: Confidence in the mapping (0.0-1.0)
56
+ evidence: Evidence for the mapping (e.g., "same context", "explicit mention")
57
+ turn_from: Source turn number
58
+ turn_to: Target turn number
59
+ """
60
+ source_variable: str
61
+ target_variable: str
62
+ confidence: float = 1.0
63
+ evidence: str = ""
64
+ turn_from: int = 0
65
+ turn_to: int = 0
66
+
67
+
68
+ @dataclass
69
+ class GraphSnapshot:
70
+ """
71
+ Immutable snapshot of graph state at a specific turn.
72
+
73
+ Attributes:
74
+ turn_number: Turn number this snapshot represents
75
+ nodes: Set of node names
76
+ edges: List of (source, target) tuples
77
+ node_attributes: Dictionary of node attributes
78
+ edge_attributes: Dictionary of edge attributes {(source, target): attrs}
79
+ timestamp: When snapshot was created
80
+ """
81
+ turn_number: int
82
+ nodes: Set[str] = field(default_factory=set)
83
+ edges: List[Tuple[str, str]] = field(default_factory=list)
84
+ node_attributes: Dict[str, Dict[str, Any]] = field(default_factory=dict)
85
+ edge_attributes: Dict[Tuple[str, str], Dict[str, Any]] = field(default_factory=dict)
86
+ timestamp: float = field(default_factory=time.time)
87
+
88
+
89
+ @dataclass
90
+ class ConversationContext:
91
+ """
92
+ Full conversation state with memory network and attention mechanisms.
93
+
94
+ Implements episodic memory with exponential decay attention weights.
95
+
96
+ Attributes:
97
+ conversation_id: Unique identifier for this conversation
98
+ messages: List of conversation messages (temporal ordering)
99
+ graph_snapshots: Graph state snapshots per turn
100
+ variable_mappings: Variable evolution mappings across turns
101
+ current_turn: Current turn number
102
+ attention_weights: Attention weights for each message (computed on demand)
103
+ decay_lambda: Exponential decay parameter for attention
104
+ topic_history: Conversation topic transitions
105
+ reference_resolution: Coreference resolution mappings
106
+ """
107
+ conversation_id: str
108
+ messages: List[ConversationMessage] = field(default_factory=list)
109
+ graph_snapshots: Dict[int, GraphSnapshot] = field(default_factory=dict)
110
+ variable_mappings: List[VariableMapping] = field(default_factory=list)
111
+ current_turn: int = 0
112
+ attention_weights: Optional[Dict[int, float]] = None
113
+ decay_lambda: float = 0.1 # Exponential decay parameter
114
+ topic_history: List[Dict[str, Any]] = field(default_factory=list)
115
+ reference_resolution: Dict[str, str] = field(default_factory=dict)
116
+
117
+ def add_message(self, message: ConversationMessage) -> None:
118
+ """
119
+ Add a message to the conversation.
120
+
121
+ Args:
122
+ message: Message to add
123
+ """
124
+ self.messages.append(message)
125
+ self.current_turn = len(self.messages)
126
+
127
+ def get_recent_messages(self, k: int = 10) -> List[ConversationMessage]:
128
+ """
129
+ Get k most recent messages.
130
+
131
+ Args:
132
+ k: Number of recent messages to retrieve
133
+
134
+ Returns:
135
+ List of k most recent messages
136
+ """
137
+ return self.messages[-k:] if len(self.messages) > k else self.messages
138
+
139
+ def compute_attention_weights(self, query: Optional[str] = None) -> Dict[int, float]:
140
+ """
141
+ Compute attention weights for messages using exponential decay.
142
+
143
+ Attention weights: w_i = exp(-λ·(t-i)) where t is current turn, i is message turn.
144
+
145
+ Args:
146
+ query: Optional query for attention computation (for future query-based attention)
147
+
148
+ Returns:
149
+ Dictionary mapping message index to attention weight
150
+ """
151
+ if query is None:
152
+ # Simple exponential decay: w_i = exp(-λ·(t-i))
153
+ weights = {}
154
+ t = len(self.messages)
155
+ import math
156
+ for i, msg in enumerate(self.messages):
157
+ if NUMPY_AVAILABLE:
158
+ weights[i] = float(np.exp(-self.decay_lambda * (t - i)))
159
+ else:
160
+ weights[i] = float(math.exp(-self.decay_lambda * (t - i)))
161
+ self.attention_weights = weights
162
+ return weights
163
+ else:
164
+ # Future: Implement query-based attention
165
+ # For now, fall back to exponential decay
166
+ return self.compute_attention_weights(None)
167
+
168
+ def get_context_window(self, window_size: int = 5) -> List[ConversationMessage]:
169
+ """
170
+ Get context window using attention-weighted selection.
171
+
172
+ Args:
173
+ window_size: Maximum number of messages to include
174
+
175
+ Returns:
176
+ List of messages in context window
177
+ """
178
+ weights = self.compute_attention_weights()
179
+ # Sort by attention weight and take top k
180
+ sorted_indices = sorted(weights.items(), key=lambda x: x[1], reverse=True)
181
+ top_indices = [idx for idx, _ in sorted_indices[:window_size]]
182
+ return [self.messages[i] for i in sorted(top_indices)]
183
+
184
+
185
+ # Import numpy for attention computation
186
+ try:
187
+ import numpy as np
188
+ NUMPY_AVAILABLE = True
189
+ except ImportError:
190
+ # Fallback if numpy not available
191
+ import math
192
+ NUMPY_AVAILABLE = False
193
+ np = None
schemas/hybrid.py ADDED
@@ -0,0 +1,211 @@
1
+ """
2
+ Schema definitions for the hybrid agent system.
3
+
4
+ Defines data structures for provenance tracking, temporal edges,
5
+ language compilation, and error correction.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Dict, List, Optional, Any, Tuple, Set
10
+ from enum import Enum
11
+ import time
12
+
13
+
14
+ class TemporalType(Enum):
15
+ """Temporal relationship types for causal edges."""
16
+ BEFORE = "before"
17
+ AFTER = "after"
18
+ DELAYED = "delayed"
19
+ FEEDBACK_LOOP = "feedback_loop"
20
+ IMMEDIATE = "immediate" # Default, no temporal delay
21
+
22
+
23
+ @dataclass
24
+ class EdgeProvenance:
25
+ """
26
+ Tracks the provenance of a causal edge - where it came from and how confident we are.
27
+
28
+ Attributes:
29
+ source_sentence: Original text that created the edge
30
+ extraction_pattern: Pattern that matched (pattern ID/name)
31
+ pattern_confidence: Initial confidence from pattern (0.0-1.0)
32
+ extraction_timestamp: When edge was extracted (Unix timestamp)
33
+ confidence_decay_rate: How confidence decays over time (per day, 0.0-1.0)
34
+ validation_history: List of validations/updates with timestamps
35
+ contradictions: List of contradictory edges found
36
+ """
37
+ source_sentence: str
38
+ extraction_pattern: str
39
+ pattern_confidence: float = 1.0
40
+ extraction_timestamp: float = field(default_factory=time.time)
41
+ confidence_decay_rate: float = 0.01 # 1% per day
42
+ validation_history: List[Dict[str, Any]] = field(default_factory=list)
43
+ contradictions: List[Dict[str, Any]] = field(default_factory=list)
44
+
45
+ def get_current_confidence(self) -> float:
46
+ """
47
+ Calculate current confidence accounting for decay.
48
+
49
+ Returns:
50
+ Current confidence value (0.0-1.0)
51
+ """
52
+ days_elapsed = (time.time() - self.extraction_timestamp) / 86400.0
53
+ decay_factor = (1.0 - self.confidence_decay_rate) ** days_elapsed
54
+ return max(0.0, min(1.0, self.pattern_confidence * decay_factor))
55
+
56
+ def add_validation(self, validation_type: str, result: bool, notes: str = "") -> None:
57
+ """
58
+ Add a validation entry to the history.
59
+
60
+ Args:
61
+ validation_type: Type of validation (e.g., "data_check", "expert_review")
62
+ result: Whether validation passed
63
+ notes: Additional notes about the validation
64
+ """
65
+ self.validation_history.append({
66
+ "type": validation_type,
67
+ "result": result,
68
+ "notes": notes,
69
+ "timestamp": time.time()
70
+ })
71
+
72
+ def add_contradiction(self, contradictory_edge: str, reason: str) -> None:
73
+ """
74
+ Record a contradiction with another edge.
75
+
76
+ Args:
77
+ contradictory_edge: Identifier of the contradictory edge
78
+ reason: Why this is a contradiction
79
+ """
80
+ self.contradictions.append({
81
+ "edge": contradictory_edge,
82
+ "reason": reason,
83
+ "timestamp": time.time()
84
+ })
85
+
86
+
87
+ @dataclass
88
+ class TemporalEdge:
89
+ """
90
+ Represents a temporal causal relationship.
91
+
92
+ Attributes:
93
+ temporal_type: Type of temporal relationship
94
+ delay: Optional delay duration (in time units, e.g., days)
95
+ decay_function: How effect decays over time (function name or parameters)
96
+ feedback_strength: For feedback loops, the strength of the feedback
97
+ """
98
+ temporal_type: TemporalType = TemporalType.IMMEDIATE
99
+ delay: Optional[float] = None
100
+ decay_function: Optional[str] = None # e.g., "exponential", "linear", "step"
101
+ decay_params: Dict[str, float] = field(default_factory=dict)
102
+ feedback_strength: float = 1.0
103
+
104
+ def apply_temporal_decay(self, base_strength: float, time_elapsed: float) -> float:
105
+ """
106
+ Apply temporal decay to a base strength value.
107
+
108
+ Args:
109
+ base_strength: Base strength of the effect
110
+ time_elapsed: Time elapsed since the cause (in same units as delay)
111
+
112
+ Returns:
113
+ Decayed strength value
114
+ """
115
+ if self.decay_function is None:
116
+ return base_strength
117
+
118
+ if self.decay_function == "exponential":
119
+ decay_rate = self.decay_params.get("rate", 0.1)
120
+ return base_strength * (1.0 - decay_rate) ** time_elapsed
121
+ elif self.decay_function == "linear":
122
+ max_time = self.decay_params.get("max_time", 100.0)
123
+ if time_elapsed >= max_time:
124
+ return 0.0
125
+ return base_strength * (1.0 - time_elapsed / max_time)
126
+ elif self.decay_function == "step":
127
+ threshold = self.decay_params.get("threshold", 1.0)
128
+ return base_strength if time_elapsed < threshold else 0.0
129
+ else:
130
+ return base_strength
131
+
132
+
133
+ @dataclass
134
+ class AnnotatedToken:
135
+ """
136
+ Represents a token with correction metadata (for error correction pipeline).
137
+
138
+ Attributes:
139
+ original_form: What user typed
140
+ normalized_form: Corrected version
141
+ confidence: Correction confidence (0.0-1.0)
142
+ correction_type: Type of correction (spelling/abbreviation/inferred)
143
+ provenance: Why correction was made
144
+ metadata: Optional additional metadata (e.g., dictionary info)
145
+ """
146
+ original_form: str
147
+ normalized_form: str
148
+ confidence: float = 1.0
149
+ correction_type: str = "none" # spelling, abbreviation, inferred, none
150
+ provenance: str = ""
151
+ metadata: Optional[Dict[str, Any]] = field(default=None)
152
+
153
+
154
+ @dataclass
155
+ class LexicalGraph:
156
+ """
157
+ Represents a compiled lexical knowledge graph.
158
+
159
+ Attributes:
160
+ synonym_sets: Dictionary mapping canonical terms to sets of synonyms
161
+ hypernym_chains: Dictionary mapping terms to their hypernym chains
162
+ vocabulary: Set of all known terms
163
+ """
164
+ synonym_sets: Dict[str, Set[str]] = field(default_factory=dict)
165
+ hypernym_chains: Dict[str, List[str]] = field(default_factory=dict)
166
+ vocabulary: set = field(default_factory=set)
167
+
168
+
169
+ @dataclass
170
+ class SynonymSet:
171
+ """
172
+ A set of synonymous terms.
173
+
174
+ Attributes:
175
+ canonical: Canonical form of the term
176
+ synonyms: Set of synonymous terms
177
+ """
178
+ canonical: str
179
+ synonyms: set = field(default_factory=set)
180
+
181
+
182
+ @dataclass
183
+ class DependencyTree:
184
+ """
185
+ Represents a dependency parse tree.
186
+
187
+ Attributes:
188
+ nodes: List of nodes (words/phrases)
189
+ edges: List of (head, dependent, relation) tuples
190
+ root: Root node identifier
191
+ """
192
+ nodes: List[str] = field(default_factory=list)
193
+ edges: List[Tuple[str, str, str]] = field(default_factory=list)
194
+ root: Optional[str] = None
195
+
196
+
197
+ @dataclass
198
+ class CausalStructure:
199
+ """
200
+ Represents extracted causal structure from a sentence.
201
+
202
+ Attributes:
203
+ cause: Cause variable/phrase
204
+ effect: Effect variable/phrase
205
+ relation_type: Type of causal relation
206
+ confidence: Confidence in the extraction
207
+ """
208
+ cause: str
209
+ effect: str
210
+ relation_type: str
211
+ confidence: float = 1.0
schemas/reasoning.py ADDED
@@ -0,0 +1,276 @@
1
+ """
2
+ Schema definitions for chain-of-thought reasoning and proof tracking.
3
+
4
+ Implements natural deduction with proof trees, explicit inference chains,
5
+ and evidence tracking for complete traceability.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Dict, List, Optional, Any, Tuple, Set
10
+ from enum import Enum
11
+ import time
12
+
13
+
14
+ class InferenceRule(Enum):
15
+ """Inference rules for natural deduction."""
16
+ MODUS_PONENS = "modus_ponens"
17
+ UNIVERSAL_INSTANTIATION = "universal_instantiation"
18
+ CAUSAL_INFERENCE = "causal_inference"
19
+ EXTRACTION = "extraction"
20
+ VALIDATION = "validation"
21
+ INFERENCE = "inference"
22
+ DEDUCTION = "deduction"
23
+ INDUCTION = "induction"
24
+ ABDUCTION = "abduction"
25
+
26
+
27
+ class StepType(Enum):
28
+ """Type of reasoning step."""
29
+ EXTRACTION = "extraction"
30
+ INFERENCE = "inference"
31
+ VALIDATION = "validation"
32
+ TRANSFORMATION = "transformation"
33
+ AGGREGATION = "aggregation"
34
+ DECISION = "decision"
35
+
36
+
37
+ @dataclass
38
+ class Evidence:
39
+ """
40
+ Evidence for a conclusion.
41
+
42
+ Attributes:
43
+ source: Source of evidence (e.g., "extraction", "inference", "user_input")
44
+ content: Evidence content
45
+ confidence: Confidence in evidence (0.0-1.0)
46
+ timestamp: When evidence was collected
47
+ metadata: Additional metadata
48
+ """
49
+ source: str
50
+ content: Any
51
+ confidence: float = 1.0
52
+ timestamp: float = field(default_factory=time.time)
53
+ metadata: Dict[str, Any] = field(default_factory=dict)
54
+
55
+
56
+ @dataclass
57
+ class ReasoningStep:
58
+ """
59
+ Individual reasoning step in a proof chain.
60
+
61
+ Implements a proof step with type safety and evidence tracking.
62
+
63
+ Attributes:
64
+ step_id: Unique identifier for this step
65
+ step_type: Type of reasoning step
66
+ inference_rule: Inference rule used (if applicable)
67
+ input_state: Precondition graph G_pre (as dictionary representation)
68
+ operation: Description of operation performed
69
+ output_state: Postcondition graph G_post (as dictionary representation)
70
+ confidence: Bayesian posterior P(conclusion | evidence)
71
+ uncertainty: Uncertainty quantification (standard deviation or credible interval)
72
+ evidence: Set of premises {p₁, ..., pₙ} and evidence for conclusion
73
+ premises: List of premise step IDs
74
+ conclusion: Conclusion reached in this step
75
+ timestamp: When step was executed
76
+ metadata: Additional metadata
77
+ """
78
+ step_id: str
79
+ step_type: StepType
80
+ inference_rule: Optional[InferenceRule] = None
81
+ input_state: Dict[str, Any] = field(default_factory=dict)
82
+ operation: str = ""
83
+ output_state: Dict[str, Any] = field(default_factory=dict)
84
+ confidence: float = 1.0
85
+ uncertainty: Optional[float] = None
86
+ evidence: List[Evidence] = field(default_factory=list)
87
+ premises: List[str] = field(default_factory=list)
88
+ conclusion: Optional[Any] = None
89
+ timestamp: float = field(default_factory=time.time)
90
+ metadata: Dict[str, Any] = field(default_factory=dict)
91
+
92
+ def add_evidence(self, evidence: Evidence) -> None:
93
+ """
94
+ Add evidence to this step.
95
+
96
+ Args:
97
+ evidence: Evidence to add
98
+ """
99
+ self.evidence.append(evidence)
100
+
101
+ def is_valid_proof_step(self) -> bool:
102
+ """
103
+ Check if this is a valid proof step.
104
+
105
+ A valid step must have:
106
+ - Valid inference rule (if inference step)
107
+ - Evidence or premises
108
+ - Conclusion
109
+
110
+ Returns:
111
+ True if valid, False otherwise
112
+ """
113
+ if self.step_type == StepType.INFERENCE and self.inference_rule is None:
114
+ return False
115
+ if not self.evidence and not self.premises:
116
+ return False
117
+ if self.conclusion is None:
118
+ return False
119
+ return True
120
+
121
+
122
+ @dataclass
123
+ class ReasoningChain:
124
+ """
125
+ Complete reasoning path (proof tree/DAG structure).
126
+
127
+ Tracks complete reasoning path with backtracking support and decision point recording.
128
+
129
+ Attributes:
130
+ chain_id: Unique identifier for this reasoning chain
131
+ steps: List of reasoning steps (ordered sequence)
132
+ root_step: Root step ID (starting point)
133
+ leaf_steps: Leaf step IDs (end points)
134
+ alternative_branches: Alternative proof branches explored
135
+ decision_points: Choice points with alternatives explored
136
+ success: Whether reasoning chain succeeded
137
+ final_conclusion: Final conclusion reached
138
+ timestamp: When chain was created
139
+ metadata: Additional metadata
140
+ """
141
+ chain_id: str
142
+ steps: List[ReasoningStep] = field(default_factory=list)
143
+ root_step: Optional[str] = None
144
+ leaf_steps: List[str] = field(default_factory=list)
145
+ alternative_branches: List[List[str]] = field(default_factory=list)
146
+ decision_points: List[Dict[str, Any]] = field(default_factory=list)
147
+ success: bool = False
148
+ final_conclusion: Optional[Any] = None
149
+ timestamp: float = field(default_factory=time.time)
150
+ metadata: Dict[str, Any] = field(default_factory=dict)
151
+
152
+ def add_step(self, step: ReasoningStep) -> None:
153
+ """
154
+ Add a step to the reasoning chain.
155
+
156
+ Args:
157
+ step: Reasoning step to add
158
+ """
159
+ self.steps.append(step)
160
+
161
+ # Update root if this is first step
162
+ if self.root_step is None:
163
+ self.root_step = step.step_id
164
+
165
+ # Update leaf steps
166
+ if not step.premises: # No dependencies, could be a leaf
167
+ if step.step_id not in self.leaf_steps:
168
+ self.leaf_steps.append(step.step_id)
169
+
170
+ # Remove from leaf steps if other steps depend on it
171
+ for other_step in self.steps:
172
+ if step.step_id in other_step.premises and step.step_id in self.leaf_steps:
173
+ self.leaf_steps.remove(step.step_id)
174
+ break
175
+
176
+ # Add to leaf steps if no other steps depend on it
177
+ has_dependents = any(step.step_id in s.premises for s in self.steps if s.step_id != step.step_id)
178
+ if not has_dependents and step.step_id not in self.leaf_steps:
179
+ self.leaf_steps.append(step.step_id)
180
+
181
+ def get_step(self, step_id: str) -> Optional[ReasoningStep]:
182
+ """
183
+ Get step by ID.
184
+
185
+ Args:
186
+ step_id: Step ID
187
+
188
+ Returns:
189
+ ReasoningStep or None if not found
190
+ """
191
+ for step in self.steps:
192
+ if step.step_id == step_id:
193
+ return step
194
+ return None
195
+
196
+ def get_path_to_step(self, step_id: str) -> List[ReasoningStep]:
197
+ """
198
+ Get reasoning path to a specific step.
199
+
200
+ Args:
201
+ step_id: Target step ID
202
+
203
+ Returns:
204
+ List of steps forming path from root to target
205
+ """
206
+ step = self.get_step(step_id)
207
+ if step is None:
208
+ return []
209
+
210
+ path = [step]
211
+ visited = {step_id}
212
+
213
+ # Backtrack through premises
214
+ current_premises = step.premises.copy()
215
+ while current_premises:
216
+ next_premise = current_premises.pop(0)
217
+ if next_premise in visited:
218
+ continue
219
+ visited.add(next_premise)
220
+
221
+ premise_step = self.get_step(next_premise)
222
+ if premise_step:
223
+ path.insert(0, premise_step)
224
+ current_premises.extend(premise_step.premises)
225
+
226
+ return path
227
+
228
+ def validate_chain(self) -> Tuple[bool, Optional[str]]:
229
+ """
230
+ Validate that reasoning chain is sound.
231
+
232
+ Checks:
233
+ - All steps are valid proof steps
234
+ - All premises exist
235
+ - No circular dependencies
236
+ - Chain is connected
237
+
238
+ Returns:
239
+ Tuple of (is_valid, error_message)
240
+ """
241
+ # Check all steps are valid
242
+ for step in self.steps:
243
+ if not step.is_valid_proof_step():
244
+ return False, f"Invalid proof step: {step.step_id}"
245
+
246
+ # Check all premises exist
247
+ step_ids = {step.step_id for step in self.steps}
248
+ for step in self.steps:
249
+ for premise_id in step.premises:
250
+ if premise_id not in step_ids:
251
+ return False, f"Premise {premise_id} not found for step {step.step_id}"
252
+
253
+ # Check for circular dependencies (simple check)
254
+ # Build dependency graph and check for cycles
255
+ dependencies = {step.step_id: step.premises for step in self.steps}
256
+ visited = set()
257
+ rec_stack = set()
258
+
259
+ def has_cycle(node: str) -> bool:
260
+ visited.add(node)
261
+ rec_stack.add(node)
262
+ for dep in dependencies.get(node, []):
263
+ if dep not in visited:
264
+ if has_cycle(dep):
265
+ return True
266
+ elif dep in rec_stack:
267
+ return True
268
+ rec_stack.remove(node)
269
+ return False
270
+
271
+ for step_id in step_ids:
272
+ if step_id not in visited:
273
+ if has_cycle(step_id):
274
+ return False, f"Circular dependency detected involving {step_id}"
275
+
276
+ return True, None