crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. CRCA.py +172 -7
  2. MODEL_CARD.md +53 -0
  3. PKG-INFO +8 -2
  4. RELEASE_NOTES.md +17 -0
  5. STABILITY.md +19 -0
  6. architecture/hybrid/consistency_engine.py +362 -0
  7. architecture/hybrid/conversation_manager.py +421 -0
  8. architecture/hybrid/explanation_generator.py +452 -0
  9. architecture/hybrid/few_shot_learner.py +533 -0
  10. architecture/hybrid/graph_compressor.py +286 -0
  11. architecture/hybrid/hybrid_agent.py +4398 -0
  12. architecture/hybrid/language_compiler.py +623 -0
  13. architecture/hybrid/main,py +0 -0
  14. architecture/hybrid/reasoning_tracker.py +322 -0
  15. architecture/hybrid/self_verifier.py +524 -0
  16. architecture/hybrid/task_decomposer.py +567 -0
  17. architecture/hybrid/text_corrector.py +341 -0
  18. benchmark_results/crca_core_benchmarks.json +178 -0
  19. branches/crca_sd/crca_sd_realtime.py +6 -2
  20. branches/general_agent/__init__.py +102 -0
  21. branches/general_agent/general_agent.py +1400 -0
  22. branches/general_agent/personality.py +169 -0
  23. branches/general_agent/utils/__init__.py +19 -0
  24. branches/general_agent/utils/prompt_builder.py +170 -0
  25. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
  26. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
  27. crca_core/__init__.py +35 -0
  28. crca_core/benchmarks/__init__.py +14 -0
  29. crca_core/benchmarks/synthetic_scm.py +103 -0
  30. crca_core/core/__init__.py +23 -0
  31. crca_core/core/api.py +120 -0
  32. crca_core/core/estimate.py +208 -0
  33. crca_core/core/godclass.py +72 -0
  34. crca_core/core/intervention_design.py +174 -0
  35. crca_core/core/lifecycle.py +48 -0
  36. crca_core/discovery/__init__.py +9 -0
  37. crca_core/discovery/tabular.py +193 -0
  38. crca_core/identify/__init__.py +171 -0
  39. crca_core/identify/backdoor.py +39 -0
  40. crca_core/identify/frontdoor.py +48 -0
  41. crca_core/identify/graph.py +106 -0
  42. crca_core/identify/id_algorithm.py +43 -0
  43. crca_core/identify/iv.py +48 -0
  44. crca_core/models/__init__.py +67 -0
  45. crca_core/models/provenance.py +56 -0
  46. crca_core/models/refusal.py +39 -0
  47. crca_core/models/result.py +83 -0
  48. crca_core/models/spec.py +151 -0
  49. crca_core/models/validation.py +68 -0
  50. crca_core/scm/__init__.py +9 -0
  51. crca_core/scm/linear_gaussian.py +198 -0
  52. crca_core/timeseries/__init__.py +6 -0
  53. crca_core/timeseries/pcmci.py +181 -0
  54. crca_llm/__init__.py +12 -0
  55. crca_llm/client.py +85 -0
  56. crca_llm/coauthor.py +118 -0
  57. crca_llm/orchestrator.py +289 -0
  58. crca_llm/types.py +21 -0
  59. crca_reasoning/__init__.py +16 -0
  60. crca_reasoning/critique.py +54 -0
  61. crca_reasoning/godclass.py +206 -0
  62. crca_reasoning/memory.py +24 -0
  63. crca_reasoning/rationale.py +10 -0
  64. crca_reasoning/react_controller.py +81 -0
  65. crca_reasoning/tool_router.py +97 -0
  66. crca_reasoning/types.py +40 -0
  67. crca_sd/__init__.py +15 -0
  68. crca_sd/crca_sd_core.py +2 -0
  69. crca_sd/crca_sd_governance.py +2 -0
  70. crca_sd/crca_sd_mpc.py +2 -0
  71. crca_sd/crca_sd_realtime.py +2 -0
  72. crca_sd/crca_sd_tui.py +2 -0
  73. cuda-keyring_1.1-1_all.deb +0 -0
  74. cuda-keyring_1.1-1_all.deb.1 +0 -0
  75. docs/IMAGE_ANNOTATION_USAGE.md +539 -0
  76. docs/INSTALL_DEEPSPEED.md +125 -0
  77. docs/api/branches/crca-cg.md +19 -0
  78. docs/api/branches/crca-q.md +27 -0
  79. docs/api/branches/crca-sd.md +37 -0
  80. docs/api/branches/general-agent.md +24 -0
  81. docs/api/branches/overview.md +19 -0
  82. docs/api/crca/agent-methods.md +62 -0
  83. docs/api/crca/operations.md +79 -0
  84. docs/api/crca/overview.md +32 -0
  85. docs/api/image-annotation/engine.md +52 -0
  86. docs/api/image-annotation/overview.md +17 -0
  87. docs/api/schemas/annotation.md +34 -0
  88. docs/api/schemas/core-schemas.md +82 -0
  89. docs/api/schemas/overview.md +32 -0
  90. docs/api/schemas/policy.md +30 -0
  91. docs/api/utils/conversation.md +22 -0
  92. docs/api/utils/graph-reasoner.md +32 -0
  93. docs/api/utils/overview.md +21 -0
  94. docs/api/utils/router.md +19 -0
  95. docs/api/utils/utilities.md +97 -0
  96. docs/architecture/causal-graphs.md +41 -0
  97. docs/architecture/data-flow.md +29 -0
  98. docs/architecture/design-principles.md +33 -0
  99. docs/architecture/hybrid-agent/components.md +38 -0
  100. docs/architecture/hybrid-agent/consistency.md +26 -0
  101. docs/architecture/hybrid-agent/overview.md +44 -0
  102. docs/architecture/hybrid-agent/reasoning.md +22 -0
  103. docs/architecture/llm-integration.md +26 -0
  104. docs/architecture/modular-structure.md +37 -0
  105. docs/architecture/overview.md +69 -0
  106. docs/architecture/policy-engine-arch.md +29 -0
  107. docs/branches/crca-cg/corposwarm.md +39 -0
  108. docs/branches/crca-cg/esg-scoring.md +30 -0
  109. docs/branches/crca-cg/multi-agent.md +35 -0
  110. docs/branches/crca-cg/overview.md +40 -0
  111. docs/branches/crca-q/alternative-data.md +55 -0
  112. docs/branches/crca-q/architecture.md +71 -0
  113. docs/branches/crca-q/backtesting.md +45 -0
  114. docs/branches/crca-q/causal-engine.md +33 -0
  115. docs/branches/crca-q/execution.md +39 -0
  116. docs/branches/crca-q/market-data.md +60 -0
  117. docs/branches/crca-q/overview.md +58 -0
  118. docs/branches/crca-q/philosophy.md +60 -0
  119. docs/branches/crca-q/portfolio-optimization.md +66 -0
  120. docs/branches/crca-q/risk-management.md +102 -0
  121. docs/branches/crca-q/setup.md +65 -0
  122. docs/branches/crca-q/signal-generation.md +61 -0
  123. docs/branches/crca-q/signal-validation.md +43 -0
  124. docs/branches/crca-sd/core.md +84 -0
  125. docs/branches/crca-sd/governance.md +53 -0
  126. docs/branches/crca-sd/mpc-solver.md +65 -0
  127. docs/branches/crca-sd/overview.md +59 -0
  128. docs/branches/crca-sd/realtime.md +28 -0
  129. docs/branches/crca-sd/tui.md +20 -0
  130. docs/branches/general-agent/overview.md +37 -0
  131. docs/branches/general-agent/personality.md +36 -0
  132. docs/branches/general-agent/prompt-builder.md +30 -0
  133. docs/changelog/index.md +79 -0
  134. docs/contributing/code-style.md +69 -0
  135. docs/contributing/documentation.md +43 -0
  136. docs/contributing/overview.md +29 -0
  137. docs/contributing/testing.md +29 -0
  138. docs/core/crcagent/async-operations.md +65 -0
  139. docs/core/crcagent/automatic-extraction.md +107 -0
  140. docs/core/crcagent/batch-prediction.md +80 -0
  141. docs/core/crcagent/bayesian-inference.md +60 -0
  142. docs/core/crcagent/causal-graph.md +92 -0
  143. docs/core/crcagent/counterfactuals.md +96 -0
  144. docs/core/crcagent/deterministic-simulation.md +78 -0
  145. docs/core/crcagent/dual-mode-operation.md +82 -0
  146. docs/core/crcagent/initialization.md +88 -0
  147. docs/core/crcagent/optimization.md +65 -0
  148. docs/core/crcagent/overview.md +63 -0
  149. docs/core/crcagent/time-series.md +57 -0
  150. docs/core/schemas/annotation.md +30 -0
  151. docs/core/schemas/core-schemas.md +82 -0
  152. docs/core/schemas/overview.md +30 -0
  153. docs/core/schemas/policy.md +41 -0
  154. docs/core/templates/base-agent.md +31 -0
  155. docs/core/templates/feature-mixins.md +31 -0
  156. docs/core/templates/overview.md +29 -0
  157. docs/core/templates/templates-guide.md +75 -0
  158. docs/core/tools/mcp-client.md +34 -0
  159. docs/core/tools/overview.md +24 -0
  160. docs/core/utils/conversation.md +27 -0
  161. docs/core/utils/graph-reasoner.md +29 -0
  162. docs/core/utils/overview.md +27 -0
  163. docs/core/utils/router.md +27 -0
  164. docs/core/utils/utilities.md +97 -0
  165. docs/css/custom.css +84 -0
  166. docs/examples/basic-usage.md +57 -0
  167. docs/examples/general-agent/general-agent-examples.md +50 -0
  168. docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
  169. docs/examples/image-annotation/image-annotation-examples.md +54 -0
  170. docs/examples/integration/integration-examples.md +58 -0
  171. docs/examples/overview.md +37 -0
  172. docs/examples/trading/trading-examples.md +46 -0
  173. docs/features/causal-reasoning/advanced-topics.md +101 -0
  174. docs/features/causal-reasoning/counterfactuals.md +43 -0
  175. docs/features/causal-reasoning/do-calculus.md +50 -0
  176. docs/features/causal-reasoning/overview.md +47 -0
  177. docs/features/causal-reasoning/structural-models.md +52 -0
  178. docs/features/hybrid-agent/advanced-components.md +55 -0
  179. docs/features/hybrid-agent/core-components.md +64 -0
  180. docs/features/hybrid-agent/overview.md +34 -0
  181. docs/features/image-annotation/engine.md +82 -0
  182. docs/features/image-annotation/features.md +113 -0
  183. docs/features/image-annotation/integration.md +75 -0
  184. docs/features/image-annotation/overview.md +53 -0
  185. docs/features/image-annotation/quickstart.md +73 -0
  186. docs/features/policy-engine/doctrine-ledger.md +105 -0
  187. docs/features/policy-engine/monitoring.md +44 -0
  188. docs/features/policy-engine/mpc-control.md +89 -0
  189. docs/features/policy-engine/overview.md +46 -0
  190. docs/getting-started/configuration.md +225 -0
  191. docs/getting-started/first-agent.md +164 -0
  192. docs/getting-started/installation.md +144 -0
  193. docs/getting-started/quickstart.md +137 -0
  194. docs/index.md +118 -0
  195. docs/js/mathjax.js +13 -0
  196. docs/lrm/discovery_proof_notes.md +25 -0
  197. docs/lrm/finetune_full.md +83 -0
  198. docs/lrm/math_appendix.md +120 -0
  199. docs/lrm/overview.md +32 -0
  200. docs/mkdocs.yml +238 -0
  201. docs/stylesheets/extra.css +21 -0
  202. docs_generated/crca_core/CounterfactualResult.md +12 -0
  203. docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
  204. docs_generated/crca_core/DraftSpec.md +13 -0
  205. docs_generated/crca_core/EstimateResult.md +13 -0
  206. docs_generated/crca_core/IdentificationResult.md +17 -0
  207. docs_generated/crca_core/InterventionDesignResult.md +12 -0
  208. docs_generated/crca_core/LockedSpec.md +15 -0
  209. docs_generated/crca_core/RefusalResult.md +12 -0
  210. docs_generated/crca_core/ValidationReport.md +9 -0
  211. docs_generated/crca_core/index.md +13 -0
  212. examples/general_agent_example.py +277 -0
  213. examples/general_agent_quickstart.py +202 -0
  214. examples/general_agent_simple.py +92 -0
  215. examples/hybrid_agent_auto_extraction.py +84 -0
  216. examples/hybrid_agent_dictionary_demo.py +104 -0
  217. examples/hybrid_agent_enhanced.py +179 -0
  218. examples/hybrid_agent_general_knowledge.py +107 -0
  219. examples/image_annotation_quickstart.py +328 -0
  220. examples/test_hybrid_fixes.py +77 -0
  221. image_annotation/__init__.py +27 -0
  222. image_annotation/annotation_engine.py +2593 -0
  223. install_cuda_wsl2.sh +59 -0
  224. install_deepspeed.sh +56 -0
  225. install_deepspeed_simple.sh +87 -0
  226. mkdocs.yml +252 -0
  227. ollama/Modelfile +8 -0
  228. prompts/__init__.py +2 -1
  229. prompts/default_crca.py +9 -1
  230. prompts/general_agent.py +227 -0
  231. prompts/image_annotation.py +56 -0
  232. pyproject.toml +17 -2
  233. requirements-docs.txt +10 -0
  234. requirements.txt +21 -2
  235. schemas/__init__.py +26 -1
  236. schemas/annotation.py +222 -0
  237. schemas/conversation.py +193 -0
  238. schemas/hybrid.py +211 -0
  239. schemas/reasoning.py +276 -0
  240. schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
  241. schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
  242. schemas_export/crca_core/DraftSpec.schema.json +635 -0
  243. schemas_export/crca_core/EstimateResult.schema.json +113 -0
  244. schemas_export/crca_core/IdentificationResult.schema.json +145 -0
  245. schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
  246. schemas_export/crca_core/LockedSpec.schema.json +646 -0
  247. schemas_export/crca_core/RefusalResult.schema.json +90 -0
  248. schemas_export/crca_core/ValidationReport.schema.json +62 -0
  249. scripts/build_lrm_dataset.py +80 -0
  250. scripts/export_crca_core_schemas.py +54 -0
  251. scripts/export_hf_lrm.py +37 -0
  252. scripts/export_ollama_gguf.py +45 -0
  253. scripts/generate_changelog.py +157 -0
  254. scripts/generate_crca_core_docs_from_schemas.py +86 -0
  255. scripts/run_crca_core_benchmarks.py +163 -0
  256. scripts/run_full_finetune.py +198 -0
  257. scripts/run_lrm_eval.py +31 -0
  258. templates/graph_management.py +29 -0
  259. tests/conftest.py +9 -0
  260. tests/test_core.py +2 -3
  261. tests/test_crca_core_discovery_tabular.py +15 -0
  262. tests/test_crca_core_estimate_dowhy.py +36 -0
  263. tests/test_crca_core_identify.py +18 -0
  264. tests/test_crca_core_intervention_design.py +36 -0
  265. tests/test_crca_core_linear_gaussian_scm.py +69 -0
  266. tests/test_crca_core_spec.py +25 -0
  267. tests/test_crca_core_timeseries_pcmci.py +15 -0
  268. tests/test_crca_llm_coauthor.py +12 -0
  269. tests/test_crca_llm_orchestrator.py +80 -0
  270. tests/test_hybrid_agent_llm_enhanced.py +556 -0
  271. tests/test_image_annotation_demo.py +376 -0
  272. tests/test_image_annotation_operational.py +408 -0
  273. tests/test_image_annotation_unit.py +551 -0
  274. tests/test_training_moe.py +13 -0
  275. training/__init__.py +42 -0
  276. training/datasets.py +140 -0
  277. training/deepspeed_zero2_0_5b.json +22 -0
  278. training/deepspeed_zero2_1_5b.json +22 -0
  279. training/deepspeed_zero3_0_5b.json +28 -0
  280. training/deepspeed_zero3_14b.json +28 -0
  281. training/deepspeed_zero3_h100_3gpu.json +20 -0
  282. training/deepspeed_zero3_offload.json +28 -0
  283. training/eval.py +92 -0
  284. training/finetune.py +516 -0
  285. training/public_datasets.py +89 -0
  286. training_data/react_train.jsonl +7473 -0
  287. utils/agent_discovery.py +311 -0
  288. utils/batch_processor.py +317 -0
  289. utils/conversation.py +78 -0
  290. utils/edit_distance.py +118 -0
  291. utils/formatter.py +33 -0
  292. utils/graph_reasoner.py +530 -0
  293. utils/rate_limiter.py +283 -0
  294. utils/router.py +2 -2
  295. utils/tool_discovery.py +307 -0
  296. webui/__init__.py +10 -0
  297. webui/app.py +229 -0
  298. webui/config.py +104 -0
  299. webui/static/css/style.css +332 -0
  300. webui/static/js/main.js +284 -0
  301. webui/templates/index.html +42 -0
  302. tests/test_crca_excel.py +0 -166
  303. tests/test_data_broker.py +0 -424
  304. tests/test_palantir.py +0 -349
  305. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
  306. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,623 @@
1
+ """
2
+ Language compilation system for CRCA hybrid agent.
3
+
4
+ Provides three-layer language compilation:
5
+ 1. Lexical layer: Words and phrases (synonyms, hypernyms, vocabulary)
6
+ 2. Grammatical layer: Sentence structure (dependency grammar, causal patterns)
7
+ 3. Pragmatic layer: Tone and style (confidence-based language decisions)
8
+
9
+ All language knowledge is compiled into queryable structures at initialization,
10
+ not parsed at runtime.
11
+ """
12
+
13
+ from typing import Dict, List, Optional, Set, Tuple, Any
14
+ import logging
15
+ import re
16
+ import time
17
+ from collections import defaultdict
18
+
19
+ try:
20
+ import requests
21
+ REQUESTS_AVAILABLE = True
22
+ except ImportError:
23
+ REQUESTS_AVAILABLE = False
24
+
25
+ from schemas.hybrid import LexicalGraph, SynonymSet, DependencyTree, CausalStructure
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class LexicalCompiler:
31
+ """
32
+ Compiles lexical knowledge (words and phrases) into queryable structures.
33
+
34
+ Features:
35
+ - Synonym sets and hypernym chains
36
+ - Controlled vocabulary expansion
37
+ - Term normalization (canonical forms)
38
+ - Vocabulary validation
39
+ """
40
+
41
+ def __init__(self, enable_dictionary: bool = True, cache_enabled: bool = True):
42
+ """
43
+ Initialize lexical compiler with dictionary integration.
44
+
45
+ Args:
46
+ enable_dictionary: Enable online dictionary lookups
47
+ cache_enabled: Enable caching of dictionary lookups
48
+ """
49
+ self.lexical_graph = LexicalGraph()
50
+ self.enable_dictionary = enable_dictionary and REQUESTS_AVAILABLE
51
+ self.cache_enabled = cache_enabled
52
+
53
+ # Dictionary cache to avoid repeated API calls
54
+ self._dictionary_cache: Dict[str, Dict[str, Any]] = {}
55
+ self._dictionary_cache_timestamps: Dict[str, float] = {}
56
+ self._cache_ttl = 86400 # 24 hours
57
+
58
+ # Dictionary API endpoint (Free Dictionary API - no key required)
59
+ self.dictionary_api_url = "https://api.dictionaryapi.dev/api/v2/entries/en"
60
+
61
+ # Rate limiting
62
+ self._last_request_time = 0.0
63
+ self._min_request_interval = 0.1 # 100ms between requests
64
+
65
+ self._build_basic_vocabulary()
66
+
67
+ def _build_basic_vocabulary(self) -> None:
68
+ """Build basic vocabulary from common causal terms."""
69
+ # Basic causal vocabulary
70
+ causal_terms = {
71
+ "cause": {"cause", "causes", "caused", "causing", "causation"},
72
+ "effect": {"effect", "effects", "affected", "affecting", "affects"},
73
+ "influence": {"influence", "influences", "influenced", "influencing"},
74
+ "determine": {"determine", "determines", "determined", "determining"},
75
+ "depend": {"depend", "depends", "depended", "depending", "dependent"},
76
+ "lead": {"lead", "leads", "led", "leading"},
77
+ "result": {"result", "results", "resulted", "resulting"},
78
+ "impact": {"impact", "impacts", "impacted", "impacting"},
79
+ "drive": {"drive", "drives", "drove", "driving", "driven"},
80
+ "control": {"control", "controls", "controlled", "controlling"}
81
+ }
82
+
83
+ for canonical, synonyms in causal_terms.items():
84
+ self.add_synonym_set(canonical, synonyms)
85
+
86
+ def compile_lexicon(self, sources: List[str]) -> LexicalGraph:
87
+ """
88
+ Compile lexicon from multiple sources.
89
+
90
+ Args:
91
+ sources: List of source identifiers (for future expansion)
92
+
93
+ Returns:
94
+ Compiled LexicalGraph
95
+ """
96
+ # For now, use built-in vocabulary
97
+ # In future, can load from dictionaries, WordNet, etc.
98
+ return self.lexical_graph
99
+
100
+ def add_synonym_set(self, canonical: str, synonyms: Set[str]) -> None:
101
+ """
102
+ Add a set of synonyms.
103
+
104
+ Args:
105
+ canonical: Canonical form of the term
106
+ synonyms: Set of synonymous terms
107
+ """
108
+ self.lexical_graph.synonym_sets[canonical] = synonyms.copy()
109
+ self.lexical_graph.vocabulary.add(canonical)
110
+ self.lexical_graph.vocabulary.update(synonyms)
111
+
112
+ def expand_vocabulary(self, word: str) -> Set[str]:
113
+ """
114
+ Expand vocabulary for a word (get synonyms and related terms).
115
+
116
+ Args:
117
+ word: Word to expand
118
+
119
+ Returns:
120
+ Set of related terms (including the word itself)
121
+ """
122
+ expanded = {word}
123
+ word_lower = word.lower()
124
+
125
+ # Find canonical form
126
+ for canonical, synonyms in self.lexical_graph.synonym_sets.items():
127
+ if word_lower == canonical.lower() or word_lower in {s.lower() for s in synonyms}:
128
+ expanded.add(canonical)
129
+ expanded.update(synonyms)
130
+
131
+ return expanded
132
+
133
+ def normalize_term(self, term: str) -> str:
134
+ """
135
+ Normalize a term to its canonical form.
136
+
137
+ Args:
138
+ term: Term to normalize
139
+
140
+ Returns:
141
+ Canonical form of the term
142
+ """
143
+ term_lower = term.lower()
144
+
145
+ # Check if term is already canonical
146
+ if term_lower in {c.lower() for c in self.lexical_graph.synonym_sets.keys()}:
147
+ return term_lower
148
+
149
+ # Find canonical form in synonym sets
150
+ for canonical, synonyms in self.lexical_graph.synonym_sets.items():
151
+ if term_lower == canonical.lower():
152
+ return canonical
153
+ if term_lower in {s.lower() for s in synonyms}:
154
+ return canonical
155
+
156
+ # If not found, return original (lowercased)
157
+ return term_lower
158
+
159
+ def validate_vocabulary(self, term: str) -> bool:
160
+ """
161
+ Validate if a term is in the vocabulary.
162
+
163
+ Uses both local vocabulary and online dictionary if enabled.
164
+
165
+ Args:
166
+ term: Term to validate
167
+
168
+ Returns:
169
+ True if term is in vocabulary, False otherwise
170
+ """
171
+ term_lower = term.lower()
172
+
173
+ # Check local vocabulary first
174
+ if term_lower in {t.lower() for t in self.lexical_graph.vocabulary}:
175
+ return True
176
+
177
+ # Check online dictionary if enabled
178
+ if self.enable_dictionary:
179
+ word_info = self._lookup_dictionary(term)
180
+ if word_info and word_info.get('found'):
181
+ # Add to vocabulary cache
182
+ self.lexical_graph.vocabulary.add(term_lower)
183
+ return True
184
+
185
+ return False
186
+
187
+ def _lookup_dictionary(self, word: str) -> Optional[Dict[str, Any]]:
188
+ """
189
+ Look up a word in the online dictionary.
190
+
191
+ Uses Free Dictionary API (dictionaryapi.dev) - no API key required.
192
+
193
+ Args:
194
+ word: Word to look up
195
+
196
+ Returns:
197
+ Dictionary with word information or None if not found
198
+ """
199
+ if not self.enable_dictionary:
200
+ return None
201
+
202
+ word_lower = word.lower().strip()
203
+
204
+ # Check cache first
205
+ if self.cache_enabled and word_lower in self._dictionary_cache:
206
+ cache_time = self._dictionary_cache_timestamps.get(word_lower, 0)
207
+ if time.time() - cache_time < self._cache_ttl:
208
+ return self._dictionary_cache[word_lower]
209
+
210
+ # Rate limiting
211
+ current_time = time.time()
212
+ time_since_last = current_time - self._last_request_time
213
+ if time_since_last < self._min_request_interval:
214
+ time.sleep(self._min_request_interval - time_since_last)
215
+
216
+ try:
217
+ # Free Dictionary API - no API key needed
218
+ url = f"{self.dictionary_api_url}/{word_lower}"
219
+ response = requests.get(url, timeout=5)
220
+ self._last_request_time = time.time()
221
+
222
+ if response.status_code == 200:
223
+ data = response.json()
224
+ if isinstance(data, list) and len(data) > 0:
225
+ # Extract word information
226
+ word_data = data[0]
227
+
228
+ word_info = {
229
+ 'found': True,
230
+ 'word': word_data.get('word', word_lower),
231
+ 'phonetic': word_data.get('phonetic', ''),
232
+ 'meanings': [],
233
+ 'synonyms': set(),
234
+ 'antonyms': set(),
235
+ 'part_of_speech': []
236
+ }
237
+
238
+ # Extract meanings, synonyms, antonyms
239
+ for meaning in word_data.get('meanings', []):
240
+ pos = meaning.get('partOfSpeech', '')
241
+ word_info['part_of_speech'].append(pos)
242
+
243
+ meaning_entry = {
244
+ 'part_of_speech': pos,
245
+ 'definitions': [],
246
+ 'synonyms': [],
247
+ 'antonyms': []
248
+ }
249
+
250
+ for definition in meaning.get('definitions', []):
251
+ meaning_entry['definitions'].append({
252
+ 'definition': definition.get('definition', ''),
253
+ 'example': definition.get('example', '')
254
+ })
255
+
256
+ # Collect synonyms and antonyms
257
+ for syn in meaning.get('synonyms', []):
258
+ word_info['synonyms'].add(syn.lower())
259
+ meaning_entry['synonyms'].append(syn.lower())
260
+
261
+ for ant in meaning.get('antonyms', []):
262
+ word_info['antonyms'].add(ant.lower())
263
+ meaning_entry['antonyms'].append(ant.lower())
264
+
265
+ word_info['meanings'].append(meaning_entry)
266
+
267
+ # Cache the result
268
+ if self.cache_enabled:
269
+ self._dictionary_cache[word_lower] = word_info
270
+ self._dictionary_cache_timestamps[word_lower] = time.time()
271
+
272
+ return word_info
273
+
274
+ elif response.status_code == 404:
275
+ # Word not found
276
+ word_info = {'found': False, 'word': word_lower}
277
+ if self.cache_enabled:
278
+ self._dictionary_cache[word_lower] = word_info
279
+ self._dictionary_cache_timestamps[word_lower] = time.time()
280
+ return word_info
281
+
282
+ except requests.exceptions.RequestException as e:
283
+ logger.debug(f"Dictionary lookup failed for '{word}': {e}")
284
+ return None
285
+ except Exception as e:
286
+ logger.warning(f"Unexpected error in dictionary lookup for '{word}': {e}")
287
+ return None
288
+
289
+ return None
290
+
291
+ def get_word_info(self, word: str) -> Optional[Dict[str, Any]]:
292
+ """
293
+ Get comprehensive word information from dictionary.
294
+
295
+ Args:
296
+ word: Word to look up
297
+
298
+ Returns:
299
+ Dictionary with word information (definitions, synonyms, part of speech, etc.)
300
+ """
301
+ return self._lookup_dictionary(word)
302
+
303
+ def is_valid_word(self, word: str) -> bool:
304
+ """
305
+ Check if a word exists in the dictionary.
306
+
307
+ Args:
308
+ word: Word to check
309
+
310
+ Returns:
311
+ True if word exists, False otherwise
312
+ """
313
+ if not word or len(word.strip()) == 0:
314
+ return False
315
+
316
+ # Check cache first
317
+ word_lower = word.lower().strip()
318
+ if self.cache_enabled and word_lower in self._dictionary_cache:
319
+ cache_time = self._dictionary_cache_timestamps.get(word_lower, 0)
320
+ if time.time() - cache_time < self._cache_ttl:
321
+ cached = self._dictionary_cache[word_lower]
322
+ return cached.get('found', False)
323
+
324
+ # Look up in dictionary
325
+ word_info = self._lookup_dictionary(word)
326
+ return word_info is not None and word_info.get('found', False)
327
+
328
+ def get_synonyms(self, word: str) -> Set[str]:
329
+ """
330
+ Get synonyms for a word using dictionary.
331
+
332
+ Args:
333
+ word: Word to get synonyms for
334
+
335
+ Returns:
336
+ Set of synonyms
337
+ """
338
+ synonyms = set()
339
+
340
+ # Check local synonym sets first
341
+ local_synonyms = self.expand_vocabulary(word)
342
+ synonyms.update(local_synonyms)
343
+
344
+ # Look up in dictionary
345
+ if self.enable_dictionary:
346
+ word_info = self._lookup_dictionary(word)
347
+ if word_info and word_info.get('found'):
348
+ dict_synonyms = word_info.get('synonyms', set())
349
+ synonyms.update(dict_synonyms)
350
+
351
+ # Add to local synonym sets for future use
352
+ if dict_synonyms:
353
+ canonical = word_info.get('word', word.lower())
354
+ self.add_synonym_set(canonical, dict_synonyms)
355
+
356
+ return synonyms
357
+
358
+ def get_part_of_speech(self, word: str) -> List[str]:
359
+ """
360
+ Get part of speech for a word.
361
+
362
+ Args:
363
+ word: Word to check
364
+
365
+ Returns:
366
+ List of parts of speech (noun, verb, adjective, etc.)
367
+ """
368
+ if not self.enable_dictionary:
369
+ return []
370
+
371
+ word_info = self._lookup_dictionary(word)
372
+ if word_info and word_info.get('found'):
373
+ return word_info.get('part_of_speech', [])
374
+
375
+ return []
376
+
377
+ def is_action_verb(self, word: str) -> bool:
378
+ """
379
+ Check if a word is an action verb using dictionary.
380
+
381
+ Args:
382
+ word: Word to check
383
+
384
+ Returns:
385
+ True if word is a verb, False otherwise
386
+ """
387
+ pos_list = self.get_part_of_speech(word)
388
+ return 'verb' in pos_list
389
+
390
+ def is_noun(self, word: str) -> bool:
391
+ """
392
+ Check if a word is a noun using dictionary.
393
+
394
+ Args:
395
+ word: Word to check
396
+
397
+ Returns:
398
+ True if word is a noun, False otherwise
399
+ """
400
+ pos_list = self.get_part_of_speech(word)
401
+ return 'noun' in pos_list
402
+
403
+
404
+ class GrammaticalCompiler:
405
+ """
406
+ Compiles grammatical knowledge (sentence structure) into queryable patterns.
407
+
408
+ Features:
409
+ - Dependency grammar rules
410
+ - Causal expression patterns
411
+ - Active/passive transformation
412
+ - Tense and aspect for temporal causality
413
+ """
414
+
415
+ def __init__(self):
416
+ """Initialize grammatical compiler with pattern definitions."""
417
+ self.causal_patterns = self._build_causal_patterns()
418
+ self.dependency_patterns = self._build_dependency_patterns()
419
+
420
+ def _build_causal_patterns(self) -> List[Tuple[str, str, float]]:
421
+ """
422
+ Build patterns for causal expressions.
423
+
424
+ Returns:
425
+ List of (pattern, relation_type, confidence) tuples
426
+ """
427
+ return [
428
+ # Direct causal verbs
429
+ (r'(\w+(?:\s+\w+)?)\s+(?:causes?|leads?\s+to|results?\s+in)\s+(\w+(?:\s+\w+)?)', 'causes', 0.95),
430
+ (r'(\w+(?:\s+\w+)?)\s+(?:affects?|influences?|impacts?)\s+(\w+(?:\s+\w+)?)', 'affects', 0.9),
431
+ (r'(\w+(?:\s+\w+)?)\s+(?:depends?\s+on|depends?\s+upon)\s+(\w+(?:\s+\w+)?)', 'depends_on', 0.9),
432
+ (r'(\w+(?:\s+\w+)?)\s+(?:determines?|controls?|drives?)\s+(\w+(?:\s+\w+)?)', 'determines', 0.95),
433
+
434
+ # Passive voice
435
+ (r'(\w+(?:\s+\w+)?)\s+is\s+(?:caused|affected|influenced|determined)\s+by\s+(\w+(?:\s+\w+)?)', 'caused_by', 0.95),
436
+ (r'(\w+(?:\s+\w+)?)\s+results?\s+from\s+(\w+(?:\s+\w+)?)', 'results_from', 0.9),
437
+
438
+ # Conditional
439
+ (r'if\s+(\w+(?:\s+\w+)?)\s+then\s+(\w+(?:\s+\w+)?)', 'conditional', 0.85),
440
+ (r'when\s+(\w+(?:\s+\w+)?)\s*,\s*(\w+(?:\s+\w+)?)', 'temporal', 0.8),
441
+ ]
442
+
443
+ def _build_dependency_patterns(self) -> List[Tuple[str, str]]:
444
+ """
445
+ Build dependency grammar patterns.
446
+
447
+ Returns:
448
+ List of (pattern, relation) tuples
449
+ """
450
+ return [
451
+ # Subject-verb-object
452
+ (r'(\w+)\s+(\w+)\s+(\w+)', 'SVO'),
453
+ # Prepositional phrases
454
+ (r'(\w+)\s+(?:in|on|at|by|with|for|from|to)\s+(\w+)', 'PREP'),
455
+ ]
456
+
457
+ def parse_dependencies(self, sentence: str) -> DependencyTree:
458
+ """
459
+ Parse sentence into dependency tree.
460
+
461
+ Args:
462
+ sentence: Sentence to parse
463
+
464
+ Returns:
465
+ DependencyTree representation
466
+ """
467
+ words = sentence.split()
468
+ nodes = words
469
+ edges = []
470
+
471
+ # Simple dependency parsing (subject-verb-object)
472
+ # In a full implementation, would use proper dependency parser
473
+ if len(words) >= 3:
474
+ # Assume first word is subject, second is verb, third is object
475
+ edges.append((words[0], words[1], "nsubj")) # subject
476
+ edges.append((words[1], words[2], "dobj")) # object
477
+
478
+ return DependencyTree(nodes=nodes, edges=edges, root=words[0] if words else None)
479
+
480
+ def extract_causal_structure(self, parse_tree: DependencyTree) -> Optional[CausalStructure]:
481
+ """
482
+ Extract causal structure from dependency parse tree.
483
+
484
+ Args:
485
+ parse_tree: DependencyTree to analyze
486
+
487
+ Returns:
488
+ CausalStructure if found, None otherwise
489
+ """
490
+ # Match against causal patterns
491
+ sentence = ' '.join(parse_tree.nodes)
492
+
493
+ for pattern, relation_type, confidence in self.causal_patterns:
494
+ match = re.search(pattern, sentence, re.IGNORECASE)
495
+ if match:
496
+ if len(match.groups()) >= 2:
497
+ cause = match.group(1).strip()
498
+ effect = match.group(2).strip()
499
+ return CausalStructure(
500
+ cause=cause,
501
+ effect=effect,
502
+ relation_type=relation_type,
503
+ confidence=confidence
504
+ )
505
+
506
+ return None
507
+
508
+ def transform_voice(self, sentence: str, target_voice: str) -> str:
509
+ """
510
+ Transform sentence between active and passive voice.
511
+
512
+ Args:
513
+ sentence: Sentence to transform
514
+ target_voice: Target voice ("active" or "passive")
515
+
516
+ Returns:
517
+ Transformed sentence
518
+ """
519
+ # Simplified transformation
520
+ # In full implementation, would use proper grammar rules
521
+
522
+ if target_voice == "passive":
523
+ # Simple active to passive: "X causes Y" -> "Y is caused by X"
524
+ active_pattern = r'(\w+(?:\s+\w+)?)\s+(causes?|affects?|influences?)\s+(\w+(?:\s+\w+)?)'
525
+ match = re.search(active_pattern, sentence, re.IGNORECASE)
526
+ if match:
527
+ subject = match.group(1)
528
+ verb = match.group(2)
529
+ object_ = match.group(3)
530
+
531
+ # Convert verb to past participle
532
+ verb_map = {
533
+ "causes": "caused",
534
+ "cause": "caused",
535
+ "affects": "affected",
536
+ "affect": "affected",
537
+ "influences": "influenced",
538
+ "influence": "influenced"
539
+ }
540
+ past_participle = verb_map.get(verb.lower(), verb + "ed")
541
+
542
+ return f"{object_} is {past_participle} by {subject}"
543
+
544
+ return sentence
545
+
546
+
547
+ class PragmaticCompiler:
548
+ """
549
+ Compiles pragmatic knowledge (tone and style) based on graph properties.
550
+
551
+ Maps graph properties to language decisions:
552
+ - Low confidence → hedging ("may", "possibly", "uncertain")
553
+ - High confidence → assertive ("will", "determines", "causes")
554
+ - Complex graph → explanatory phrasing
555
+ - Simple graph → concise phrasing
556
+ """
557
+
558
+ def __init__(self):
559
+ """Initialize pragmatic compiler."""
560
+ self.hedging_phrases = [
561
+ "may", "might", "possibly", "potentially", "could",
562
+ "uncertain", "unclear", "suggests", "indicates"
563
+ ]
564
+ self.assertive_phrases = [
565
+ "will", "determines", "causes", "leads to", "results in",
566
+ "clearly", "definitely", "certainly", "always"
567
+ ]
568
+
569
+ def select_register(self, confidence: float, complexity: int) -> str:
570
+ """
571
+ Select appropriate language register based on confidence and complexity.
572
+
573
+ Args:
574
+ confidence: Confidence level (0.0-1.0)
575
+ complexity: Graph complexity (number of nodes/edges)
576
+
577
+ Returns:
578
+ Register type ("formal", "informal", "technical", "casual")
579
+ """
580
+ if confidence > 0.8 and complexity < 10:
581
+ return "assertive"
582
+ elif confidence < 0.5:
583
+ return "hedging"
584
+ elif complexity > 20:
585
+ return "explanatory"
586
+ else:
587
+ return "neutral"
588
+
589
+ def generate_hedging(self, confidence: float) -> str:
590
+ """
591
+ Generate hedging phrase based on confidence level.
592
+
593
+ Args:
594
+ confidence: Confidence level (0.0-1.0)
595
+
596
+ Returns:
597
+ Hedging phrase
598
+ """
599
+ if confidence > 0.7:
600
+ return "likely"
601
+ elif confidence > 0.5:
602
+ return "possibly"
603
+ elif confidence > 0.3:
604
+ return "may"
605
+ else:
606
+ return "uncertain"
607
+
608
+ def adjust_explicitness(self, depth: int) -> int:
609
+ """
610
+ Adjust explicitness level based on reasoning depth.
611
+
612
+ Args:
613
+ depth: Depth of reasoning chain
614
+
615
+ Returns:
616
+ Explicitness level (0-5, where 5 is most explicit)
617
+ """
618
+ if depth <= 1:
619
+ return 1 # Concise
620
+ elif depth <= 3:
621
+ return 3 # Moderate
622
+ else:
623
+ return 5 # Very explicit
File without changes