crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. CRCA.py +172 -7
  2. MODEL_CARD.md +53 -0
  3. PKG-INFO +8 -2
  4. RELEASE_NOTES.md +17 -0
  5. STABILITY.md +19 -0
  6. architecture/hybrid/consistency_engine.py +362 -0
  7. architecture/hybrid/conversation_manager.py +421 -0
  8. architecture/hybrid/explanation_generator.py +452 -0
  9. architecture/hybrid/few_shot_learner.py +533 -0
  10. architecture/hybrid/graph_compressor.py +286 -0
  11. architecture/hybrid/hybrid_agent.py +4398 -0
  12. architecture/hybrid/language_compiler.py +623 -0
  13. architecture/hybrid/main,py +0 -0
  14. architecture/hybrid/reasoning_tracker.py +322 -0
  15. architecture/hybrid/self_verifier.py +524 -0
  16. architecture/hybrid/task_decomposer.py +567 -0
  17. architecture/hybrid/text_corrector.py +341 -0
  18. benchmark_results/crca_core_benchmarks.json +178 -0
  19. branches/crca_sd/crca_sd_realtime.py +6 -2
  20. branches/general_agent/__init__.py +102 -0
  21. branches/general_agent/general_agent.py +1400 -0
  22. branches/general_agent/personality.py +169 -0
  23. branches/general_agent/utils/__init__.py +19 -0
  24. branches/general_agent/utils/prompt_builder.py +170 -0
  25. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
  26. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
  27. crca_core/__init__.py +35 -0
  28. crca_core/benchmarks/__init__.py +14 -0
  29. crca_core/benchmarks/synthetic_scm.py +103 -0
  30. crca_core/core/__init__.py +23 -0
  31. crca_core/core/api.py +120 -0
  32. crca_core/core/estimate.py +208 -0
  33. crca_core/core/godclass.py +72 -0
  34. crca_core/core/intervention_design.py +174 -0
  35. crca_core/core/lifecycle.py +48 -0
  36. crca_core/discovery/__init__.py +9 -0
  37. crca_core/discovery/tabular.py +193 -0
  38. crca_core/identify/__init__.py +171 -0
  39. crca_core/identify/backdoor.py +39 -0
  40. crca_core/identify/frontdoor.py +48 -0
  41. crca_core/identify/graph.py +106 -0
  42. crca_core/identify/id_algorithm.py +43 -0
  43. crca_core/identify/iv.py +48 -0
  44. crca_core/models/__init__.py +67 -0
  45. crca_core/models/provenance.py +56 -0
  46. crca_core/models/refusal.py +39 -0
  47. crca_core/models/result.py +83 -0
  48. crca_core/models/spec.py +151 -0
  49. crca_core/models/validation.py +68 -0
  50. crca_core/scm/__init__.py +9 -0
  51. crca_core/scm/linear_gaussian.py +198 -0
  52. crca_core/timeseries/__init__.py +6 -0
  53. crca_core/timeseries/pcmci.py +181 -0
  54. crca_llm/__init__.py +12 -0
  55. crca_llm/client.py +85 -0
  56. crca_llm/coauthor.py +118 -0
  57. crca_llm/orchestrator.py +289 -0
  58. crca_llm/types.py +21 -0
  59. crca_reasoning/__init__.py +16 -0
  60. crca_reasoning/critique.py +54 -0
  61. crca_reasoning/godclass.py +206 -0
  62. crca_reasoning/memory.py +24 -0
  63. crca_reasoning/rationale.py +10 -0
  64. crca_reasoning/react_controller.py +81 -0
  65. crca_reasoning/tool_router.py +97 -0
  66. crca_reasoning/types.py +40 -0
  67. crca_sd/__init__.py +15 -0
  68. crca_sd/crca_sd_core.py +2 -0
  69. crca_sd/crca_sd_governance.py +2 -0
  70. crca_sd/crca_sd_mpc.py +2 -0
  71. crca_sd/crca_sd_realtime.py +2 -0
  72. crca_sd/crca_sd_tui.py +2 -0
  73. cuda-keyring_1.1-1_all.deb +0 -0
  74. cuda-keyring_1.1-1_all.deb.1 +0 -0
  75. docs/IMAGE_ANNOTATION_USAGE.md +539 -0
  76. docs/INSTALL_DEEPSPEED.md +125 -0
  77. docs/api/branches/crca-cg.md +19 -0
  78. docs/api/branches/crca-q.md +27 -0
  79. docs/api/branches/crca-sd.md +37 -0
  80. docs/api/branches/general-agent.md +24 -0
  81. docs/api/branches/overview.md +19 -0
  82. docs/api/crca/agent-methods.md +62 -0
  83. docs/api/crca/operations.md +79 -0
  84. docs/api/crca/overview.md +32 -0
  85. docs/api/image-annotation/engine.md +52 -0
  86. docs/api/image-annotation/overview.md +17 -0
  87. docs/api/schemas/annotation.md +34 -0
  88. docs/api/schemas/core-schemas.md +82 -0
  89. docs/api/schemas/overview.md +32 -0
  90. docs/api/schemas/policy.md +30 -0
  91. docs/api/utils/conversation.md +22 -0
  92. docs/api/utils/graph-reasoner.md +32 -0
  93. docs/api/utils/overview.md +21 -0
  94. docs/api/utils/router.md +19 -0
  95. docs/api/utils/utilities.md +97 -0
  96. docs/architecture/causal-graphs.md +41 -0
  97. docs/architecture/data-flow.md +29 -0
  98. docs/architecture/design-principles.md +33 -0
  99. docs/architecture/hybrid-agent/components.md +38 -0
  100. docs/architecture/hybrid-agent/consistency.md +26 -0
  101. docs/architecture/hybrid-agent/overview.md +44 -0
  102. docs/architecture/hybrid-agent/reasoning.md +22 -0
  103. docs/architecture/llm-integration.md +26 -0
  104. docs/architecture/modular-structure.md +37 -0
  105. docs/architecture/overview.md +69 -0
  106. docs/architecture/policy-engine-arch.md +29 -0
  107. docs/branches/crca-cg/corposwarm.md +39 -0
  108. docs/branches/crca-cg/esg-scoring.md +30 -0
  109. docs/branches/crca-cg/multi-agent.md +35 -0
  110. docs/branches/crca-cg/overview.md +40 -0
  111. docs/branches/crca-q/alternative-data.md +55 -0
  112. docs/branches/crca-q/architecture.md +71 -0
  113. docs/branches/crca-q/backtesting.md +45 -0
  114. docs/branches/crca-q/causal-engine.md +33 -0
  115. docs/branches/crca-q/execution.md +39 -0
  116. docs/branches/crca-q/market-data.md +60 -0
  117. docs/branches/crca-q/overview.md +58 -0
  118. docs/branches/crca-q/philosophy.md +60 -0
  119. docs/branches/crca-q/portfolio-optimization.md +66 -0
  120. docs/branches/crca-q/risk-management.md +102 -0
  121. docs/branches/crca-q/setup.md +65 -0
  122. docs/branches/crca-q/signal-generation.md +61 -0
  123. docs/branches/crca-q/signal-validation.md +43 -0
  124. docs/branches/crca-sd/core.md +84 -0
  125. docs/branches/crca-sd/governance.md +53 -0
  126. docs/branches/crca-sd/mpc-solver.md +65 -0
  127. docs/branches/crca-sd/overview.md +59 -0
  128. docs/branches/crca-sd/realtime.md +28 -0
  129. docs/branches/crca-sd/tui.md +20 -0
  130. docs/branches/general-agent/overview.md +37 -0
  131. docs/branches/general-agent/personality.md +36 -0
  132. docs/branches/general-agent/prompt-builder.md +30 -0
  133. docs/changelog/index.md +79 -0
  134. docs/contributing/code-style.md +69 -0
  135. docs/contributing/documentation.md +43 -0
  136. docs/contributing/overview.md +29 -0
  137. docs/contributing/testing.md +29 -0
  138. docs/core/crcagent/async-operations.md +65 -0
  139. docs/core/crcagent/automatic-extraction.md +107 -0
  140. docs/core/crcagent/batch-prediction.md +80 -0
  141. docs/core/crcagent/bayesian-inference.md +60 -0
  142. docs/core/crcagent/causal-graph.md +92 -0
  143. docs/core/crcagent/counterfactuals.md +96 -0
  144. docs/core/crcagent/deterministic-simulation.md +78 -0
  145. docs/core/crcagent/dual-mode-operation.md +82 -0
  146. docs/core/crcagent/initialization.md +88 -0
  147. docs/core/crcagent/optimization.md +65 -0
  148. docs/core/crcagent/overview.md +63 -0
  149. docs/core/crcagent/time-series.md +57 -0
  150. docs/core/schemas/annotation.md +30 -0
  151. docs/core/schemas/core-schemas.md +82 -0
  152. docs/core/schemas/overview.md +30 -0
  153. docs/core/schemas/policy.md +41 -0
  154. docs/core/templates/base-agent.md +31 -0
  155. docs/core/templates/feature-mixins.md +31 -0
  156. docs/core/templates/overview.md +29 -0
  157. docs/core/templates/templates-guide.md +75 -0
  158. docs/core/tools/mcp-client.md +34 -0
  159. docs/core/tools/overview.md +24 -0
  160. docs/core/utils/conversation.md +27 -0
  161. docs/core/utils/graph-reasoner.md +29 -0
  162. docs/core/utils/overview.md +27 -0
  163. docs/core/utils/router.md +27 -0
  164. docs/core/utils/utilities.md +97 -0
  165. docs/css/custom.css +84 -0
  166. docs/examples/basic-usage.md +57 -0
  167. docs/examples/general-agent/general-agent-examples.md +50 -0
  168. docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
  169. docs/examples/image-annotation/image-annotation-examples.md +54 -0
  170. docs/examples/integration/integration-examples.md +58 -0
  171. docs/examples/overview.md +37 -0
  172. docs/examples/trading/trading-examples.md +46 -0
  173. docs/features/causal-reasoning/advanced-topics.md +101 -0
  174. docs/features/causal-reasoning/counterfactuals.md +43 -0
  175. docs/features/causal-reasoning/do-calculus.md +50 -0
  176. docs/features/causal-reasoning/overview.md +47 -0
  177. docs/features/causal-reasoning/structural-models.md +52 -0
  178. docs/features/hybrid-agent/advanced-components.md +55 -0
  179. docs/features/hybrid-agent/core-components.md +64 -0
  180. docs/features/hybrid-agent/overview.md +34 -0
  181. docs/features/image-annotation/engine.md +82 -0
  182. docs/features/image-annotation/features.md +113 -0
  183. docs/features/image-annotation/integration.md +75 -0
  184. docs/features/image-annotation/overview.md +53 -0
  185. docs/features/image-annotation/quickstart.md +73 -0
  186. docs/features/policy-engine/doctrine-ledger.md +105 -0
  187. docs/features/policy-engine/monitoring.md +44 -0
  188. docs/features/policy-engine/mpc-control.md +89 -0
  189. docs/features/policy-engine/overview.md +46 -0
  190. docs/getting-started/configuration.md +225 -0
  191. docs/getting-started/first-agent.md +164 -0
  192. docs/getting-started/installation.md +144 -0
  193. docs/getting-started/quickstart.md +137 -0
  194. docs/index.md +118 -0
  195. docs/js/mathjax.js +13 -0
  196. docs/lrm/discovery_proof_notes.md +25 -0
  197. docs/lrm/finetune_full.md +83 -0
  198. docs/lrm/math_appendix.md +120 -0
  199. docs/lrm/overview.md +32 -0
  200. docs/mkdocs.yml +238 -0
  201. docs/stylesheets/extra.css +21 -0
  202. docs_generated/crca_core/CounterfactualResult.md +12 -0
  203. docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
  204. docs_generated/crca_core/DraftSpec.md +13 -0
  205. docs_generated/crca_core/EstimateResult.md +13 -0
  206. docs_generated/crca_core/IdentificationResult.md +17 -0
  207. docs_generated/crca_core/InterventionDesignResult.md +12 -0
  208. docs_generated/crca_core/LockedSpec.md +15 -0
  209. docs_generated/crca_core/RefusalResult.md +12 -0
  210. docs_generated/crca_core/ValidationReport.md +9 -0
  211. docs_generated/crca_core/index.md +13 -0
  212. examples/general_agent_example.py +277 -0
  213. examples/general_agent_quickstart.py +202 -0
  214. examples/general_agent_simple.py +92 -0
  215. examples/hybrid_agent_auto_extraction.py +84 -0
  216. examples/hybrid_agent_dictionary_demo.py +104 -0
  217. examples/hybrid_agent_enhanced.py +179 -0
  218. examples/hybrid_agent_general_knowledge.py +107 -0
  219. examples/image_annotation_quickstart.py +328 -0
  220. examples/test_hybrid_fixes.py +77 -0
  221. image_annotation/__init__.py +27 -0
  222. image_annotation/annotation_engine.py +2593 -0
  223. install_cuda_wsl2.sh +59 -0
  224. install_deepspeed.sh +56 -0
  225. install_deepspeed_simple.sh +87 -0
  226. mkdocs.yml +252 -0
  227. ollama/Modelfile +8 -0
  228. prompts/__init__.py +2 -1
  229. prompts/default_crca.py +9 -1
  230. prompts/general_agent.py +227 -0
  231. prompts/image_annotation.py +56 -0
  232. pyproject.toml +17 -2
  233. requirements-docs.txt +10 -0
  234. requirements.txt +21 -2
  235. schemas/__init__.py +26 -1
  236. schemas/annotation.py +222 -0
  237. schemas/conversation.py +193 -0
  238. schemas/hybrid.py +211 -0
  239. schemas/reasoning.py +276 -0
  240. schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
  241. schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
  242. schemas_export/crca_core/DraftSpec.schema.json +635 -0
  243. schemas_export/crca_core/EstimateResult.schema.json +113 -0
  244. schemas_export/crca_core/IdentificationResult.schema.json +145 -0
  245. schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
  246. schemas_export/crca_core/LockedSpec.schema.json +646 -0
  247. schemas_export/crca_core/RefusalResult.schema.json +90 -0
  248. schemas_export/crca_core/ValidationReport.schema.json +62 -0
  249. scripts/build_lrm_dataset.py +80 -0
  250. scripts/export_crca_core_schemas.py +54 -0
  251. scripts/export_hf_lrm.py +37 -0
  252. scripts/export_ollama_gguf.py +45 -0
  253. scripts/generate_changelog.py +157 -0
  254. scripts/generate_crca_core_docs_from_schemas.py +86 -0
  255. scripts/run_crca_core_benchmarks.py +163 -0
  256. scripts/run_full_finetune.py +198 -0
  257. scripts/run_lrm_eval.py +31 -0
  258. templates/graph_management.py +29 -0
  259. tests/conftest.py +9 -0
  260. tests/test_core.py +2 -3
  261. tests/test_crca_core_discovery_tabular.py +15 -0
  262. tests/test_crca_core_estimate_dowhy.py +36 -0
  263. tests/test_crca_core_identify.py +18 -0
  264. tests/test_crca_core_intervention_design.py +36 -0
  265. tests/test_crca_core_linear_gaussian_scm.py +69 -0
  266. tests/test_crca_core_spec.py +25 -0
  267. tests/test_crca_core_timeseries_pcmci.py +15 -0
  268. tests/test_crca_llm_coauthor.py +12 -0
  269. tests/test_crca_llm_orchestrator.py +80 -0
  270. tests/test_hybrid_agent_llm_enhanced.py +556 -0
  271. tests/test_image_annotation_demo.py +376 -0
  272. tests/test_image_annotation_operational.py +408 -0
  273. tests/test_image_annotation_unit.py +551 -0
  274. tests/test_training_moe.py +13 -0
  275. training/__init__.py +42 -0
  276. training/datasets.py +140 -0
  277. training/deepspeed_zero2_0_5b.json +22 -0
  278. training/deepspeed_zero2_1_5b.json +22 -0
  279. training/deepspeed_zero3_0_5b.json +28 -0
  280. training/deepspeed_zero3_14b.json +28 -0
  281. training/deepspeed_zero3_h100_3gpu.json +20 -0
  282. training/deepspeed_zero3_offload.json +28 -0
  283. training/eval.py +92 -0
  284. training/finetune.py +516 -0
  285. training/public_datasets.py +89 -0
  286. training_data/react_train.jsonl +7473 -0
  287. utils/agent_discovery.py +311 -0
  288. utils/batch_processor.py +317 -0
  289. utils/conversation.py +78 -0
  290. utils/edit_distance.py +118 -0
  291. utils/formatter.py +33 -0
  292. utils/graph_reasoner.py +530 -0
  293. utils/rate_limiter.py +283 -0
  294. utils/router.py +2 -2
  295. utils/tool_discovery.py +307 -0
  296. webui/__init__.py +10 -0
  297. webui/app.py +229 -0
  298. webui/config.py +104 -0
  299. webui/static/css/style.css +332 -0
  300. webui/static/js/main.js +284 -0
  301. webui/templates/index.html +42 -0
  302. tests/test_crca_excel.py +0 -166
  303. tests/test_data_broker.py +0 -424
  304. tests/test_palantir.py +0 -349
  305. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
  306. {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,341 @@
1
+ """
2
+ Non-destructive text correction pipeline for CRCA hybrid agent.
3
+
4
+ Provides three-layer correction:
5
+ 1. Orthographic normalization: Spelling errors
6
+ 2. Informal compression handling: Abbreviations, shortcuts
7
+ 3. Grammar recovery: Recover structure, not polish
8
+
9
+ All corrections are non-destructive - original form is preserved
10
+ with confidence scores and provenance.
11
+ """
12
+
13
+ from typing import Dict, List, Optional, Tuple, Any
14
+ import logging
15
+ import re
16
+
17
+ from schemas.hybrid import AnnotatedToken, DependencyTree
18
+ from utils.edit_distance import find_closest_match, damerau_levenshtein_distance
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class TextCorrector:
24
+ """
25
+ Non-destructive text corrector with three correction layers.
26
+
27
+ Correction produces annotated tokens with:
28
+ - original_form: What user typed
29
+ - normalized_form: Corrected version
30
+ - confidence: Correction confidence
31
+ - correction_type: Type of correction
32
+ - provenance: Why correction was made
33
+ - metadata: Optional dictionary information
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ vocabulary: Optional[List[str]] = None,
39
+ lexical_compiler: Optional[Any] = None
40
+ ):
41
+ """
42
+ Initialize text corrector.
43
+
44
+ Args:
45
+ vocabulary: Optional vocabulary list for spelling correction
46
+ lexical_compiler: Optional LexicalCompiler instance for dictionary lookups
47
+ """
48
+ self.vocabulary = set(vocabulary) if vocabulary else set()
49
+ self.lexical_compiler = lexical_compiler
50
+ self.abbreviations = self._build_abbreviation_dict()
51
+ self.common_words = self._build_common_words()
52
+
53
+ def _build_abbreviation_dict(self) -> Dict[str, str]:
54
+ """Build dictionary of common abbreviations."""
55
+ return {
56
+ "depnds": "depends",
57
+ "demnad": "demand",
58
+ "prce": "price",
59
+ "qulity": "quality",
60
+ "affcts": "affects",
61
+ "influnces": "influences",
62
+ "causes": "causes", # Already correct, but included for completeness
63
+ }
64
+
65
+ def _build_common_words(self) -> set:
66
+ """Build set of common English words."""
67
+ return {
68
+ "the", "a", "an", "and", "or", "but", "if", "then", "when",
69
+ "what", "which", "how", "why", "where", "who",
70
+ "is", "are", "was", "were", "be", "been", "being",
71
+ "have", "has", "had", "do", "does", "did",
72
+ "will", "would", "could", "should", "may", "might",
73
+ "cause", "causes", "effect", "effects", "affect", "affects",
74
+ "influence", "influences", "depend", "depends", "lead", "leads",
75
+ "result", "results", "impact", "impacts", "determine", "determines"
76
+ }
77
+
78
+ def correct_orthographic(self, text: str) -> List[AnnotatedToken]:
79
+ """
80
+ Correct orthographic (spelling) errors.
81
+
82
+ Args:
83
+ text: Text to correct
84
+
85
+ Returns:
86
+ List of AnnotatedToken objects
87
+ """
88
+ tokens = text.split()
89
+ corrected_tokens = []
90
+
91
+ for token in tokens:
92
+ # Clean token (remove punctuation for matching)
93
+ clean_token = re.sub(r'[^\w]', '', token.lower())
94
+
95
+ # Check if token is already correct
96
+ if clean_token in self.common_words or clean_token in self.vocabulary:
97
+ corrected_tokens.append(AnnotatedToken(
98
+ original_form=token,
99
+ normalized_form=token,
100
+ confidence=1.0,
101
+ correction_type="none"
102
+ ))
103
+ continue
104
+
105
+ # Try to find closest match
106
+ candidates = list(self.common_words) + list(self.vocabulary)
107
+ match, distance = find_closest_match(clean_token, candidates, max_distance=2)
108
+
109
+ # If dictionary is available, also check if word is valid
110
+ word_info = None
111
+ if self.lexical_compiler and self.lexical_compiler.enable_dictionary:
112
+ word_info = self.lexical_compiler.get_word_info(clean_token)
113
+ if word_info and word_info.get('found'):
114
+ # Word is valid, no correction needed
115
+ match = clean_token
116
+ distance = 0
117
+
118
+ if match and distance <= 2:
119
+ # Calculate confidence based on distance
120
+ confidence = max(0.0, 1.0 - (distance / 3.0))
121
+
122
+ # Preserve original capitalization if possible
123
+ normalized = match
124
+ if token[0].isupper():
125
+ normalized = match.capitalize()
126
+
127
+ # Store dictionary info in metadata if available
128
+ metadata = None
129
+ if word_info:
130
+ metadata = word_info
131
+
132
+ corrected_tokens.append(AnnotatedToken(
133
+ original_form=token,
134
+ normalized_form=normalized,
135
+ confidence=confidence,
136
+ correction_type="spelling" if distance > 0 else "none",
137
+ provenance=f"Edit distance: {distance}" if distance > 0 else "Valid word (dictionary verified)",
138
+ metadata=metadata
139
+ ))
140
+ else:
141
+ # No correction found, but check dictionary to see if it's a valid word
142
+ if self.lexical_compiler and self.lexical_compiler.enable_dictionary:
143
+ word_info = self.lexical_compiler.get_word_info(clean_token)
144
+ if word_info and word_info.get('found'):
145
+ # Word is valid, just not in our local vocabulary
146
+ metadata = word_info
147
+ corrected_tokens.append(AnnotatedToken(
148
+ original_form=token,
149
+ normalized_form=token,
150
+ confidence=0.8, # Medium confidence - valid word but not in local vocab
151
+ correction_type="none",
152
+ provenance="Valid word (dictionary verified)",
153
+ metadata=metadata
154
+ ))
155
+ else:
156
+ # Word not found in dictionary either
157
+ corrected_tokens.append(AnnotatedToken(
158
+ original_form=token,
159
+ normalized_form=token,
160
+ confidence=0.3, # Low confidence - unknown word
161
+ correction_type="none",
162
+ provenance="Unknown word, not found in dictionary"
163
+ ))
164
+ else:
165
+ # No dictionary available
166
+ corrected_tokens.append(AnnotatedToken(
167
+ original_form=token,
168
+ normalized_form=token,
169
+ confidence=0.5, # Low confidence - unknown word
170
+ correction_type="none",
171
+ provenance="Unknown word, no correction found"
172
+ ))
173
+
174
+ return corrected_tokens
175
+
176
+ def expand_abbreviations(self, tokens: List[AnnotatedToken]) -> List[AnnotatedToken]:
177
+ """
178
+ Expand abbreviations and shortcuts.
179
+
180
+ Args:
181
+ tokens: List of AnnotatedToken objects
182
+
183
+ Returns:
184
+ List of AnnotatedToken objects with abbreviations expanded
185
+ """
186
+ expanded_tokens = []
187
+
188
+ for token in tokens:
189
+ original = token.original_form.lower()
190
+
191
+ # Check abbreviation dictionary
192
+ if original in self.abbreviations:
193
+ expanded = self.abbreviations[original]
194
+
195
+ # Preserve original capitalization
196
+ if token.original_form[0].isupper():
197
+ expanded = expanded.capitalize()
198
+
199
+ expanded_tokens.append(AnnotatedToken(
200
+ original_form=token.original_form,
201
+ normalized_form=expanded,
202
+ confidence=0.9,
203
+ correction_type="abbreviation",
204
+ provenance=f"Abbreviation expansion: {original} -> {expanded}"
205
+ ))
206
+ else:
207
+ # No abbreviation found, keep token as-is
208
+ expanded_tokens.append(token)
209
+
210
+ return expanded_tokens
211
+
212
+ def recover_grammar(self, tokens: List[AnnotatedToken]) -> DependencyTree:
213
+ """
214
+ Recover grammar structure from tokens (not polish, just structure).
215
+
216
+ Args:
217
+ tokens: List of AnnotatedToken objects
218
+
219
+ Returns:
220
+ DependencyTree representing recovered structure
221
+ """
222
+ words = [token.normalized_form for token in tokens]
223
+
224
+ # Simple dependency recovery
225
+ # In full implementation, would use more sophisticated parsing
226
+ nodes = words
227
+ edges = []
228
+ root = None
229
+
230
+ if len(words) >= 2:
231
+ # Look for verb
232
+ verbs = ["is", "are", "was", "were", "causes", "affects", "influences", "depends"]
233
+ verb_idx = None
234
+ for i, word in enumerate(words):
235
+ if word.lower() in verbs:
236
+ verb_idx = i
237
+ break
238
+
239
+ if verb_idx is not None:
240
+ root = words[verb_idx]
241
+ # Subject is before verb
242
+ if verb_idx > 0:
243
+ edges.append((words[verb_idx - 1], words[verb_idx], "nsubj"))
244
+ # Object is after verb
245
+ if verb_idx < len(words) - 1:
246
+ edges.append((words[verb_idx], words[verb_idx + 1], "dobj"))
247
+
248
+ if root is None and words:
249
+ root = words[0]
250
+
251
+ return DependencyTree(nodes=nodes, edges=edges, root=root)
252
+
253
+ def correct_text(
254
+ self,
255
+ text: str,
256
+ use_abbreviation_expansion: bool = True,
257
+ use_grammar_recovery: bool = True
258
+ ) -> Dict[str, Any]:
259
+ """
260
+ Complete correction pipeline.
261
+
262
+ Args:
263
+ text: Text to correct
264
+ use_abbreviation_expansion: Whether to expand abbreviations
265
+ use_grammar_recovery: Whether to recover grammar structure
266
+
267
+ Returns:
268
+ Dictionary with corrected tokens and dependency tree
269
+ """
270
+ # Step 1: Orthographic correction
271
+ tokens = self.correct_orthographic(text)
272
+
273
+ # Step 2: Abbreviation expansion
274
+ if use_abbreviation_expansion:
275
+ tokens = self.expand_abbreviations(tokens)
276
+
277
+ # Step 3: Grammar recovery
278
+ dependency_tree = None
279
+ if use_grammar_recovery:
280
+ dependency_tree = self.recover_grammar(tokens)
281
+
282
+ return {
283
+ "original_text": text,
284
+ "corrected_tokens": tokens,
285
+ "corrected_text": " ".join([t.normalized_form for t in tokens]),
286
+ "dependency_tree": dependency_tree,
287
+ "confidence": min([t.confidence for t in tokens]) if tokens else 1.0
288
+ }
289
+
290
+ def disambiguate_with_graph(
291
+ self,
292
+ tokens: List[AnnotatedToken],
293
+ graph_manager: Any, # GraphManager instance
294
+ expected_pattern: Optional[str] = None
295
+ ) -> List[AnnotatedToken]:
296
+ """
297
+ Use graph structure for context-aware disambiguation.
298
+
299
+ Args:
300
+ tokens: List of AnnotatedToken objects
301
+ graph_manager: GraphManager instance for graph structure
302
+ expected_pattern: Optional expected pattern (e.g., "depends on")
303
+
304
+ Returns:
305
+ List of disambiguated AnnotatedToken objects
306
+ """
307
+ disambiguated = []
308
+
309
+ # Get variables from graph
310
+ graph_variables = set(graph_manager.get_nodes())
311
+
312
+ for token in tokens:
313
+ normalized = token.normalized_form.lower()
314
+
315
+ # If token matches a graph variable, increase confidence
316
+ if normalized in {v.lower() for v in graph_variables}:
317
+ disambiguated.append(AnnotatedToken(
318
+ original_form=token.original_form,
319
+ normalized_form=token.normalized_form,
320
+ confidence=min(1.0, token.confidence + 0.2), # Boost confidence
321
+ correction_type=token.correction_type,
322
+ provenance=f"{token.provenance}; Graph variable match"
323
+ ))
324
+ else:
325
+ # Try to find closest graph variable
326
+ if graph_variables:
327
+ match, distance = find_closest_match(normalized, list(graph_variables), max_distance=2)
328
+ if match and distance <= 2:
329
+ disambiguated.append(AnnotatedToken(
330
+ original_form=token.original_form,
331
+ normalized_form=match,
332
+ confidence=0.7,
333
+ correction_type="graph_disambiguation",
334
+ provenance=f"Matched to graph variable '{match}' (distance: {distance})"
335
+ ))
336
+ else:
337
+ disambiguated.append(token)
338
+ else:
339
+ disambiguated.append(token)
340
+
341
+ return disambiguated
@@ -0,0 +1,178 @@
1
+ {
2
+ "benchmarks": [
3
+ {
4
+ "result_type": "BenchmarkResult",
5
+ "benchmark": "linear_gaussian_chain",
6
+ "provenance": {
7
+ "run_id": "116d10c6-dd8a-4bd1-a19b-d1dde4192c23",
8
+ "timestamp_utc": "2026-01-24T00:15:54.840013+00:00",
9
+ "spec_hash": "ef1c101c6e1f853994f418127f20b81ace7f4a5dfd4ad1f75011c0d7612221ad",
10
+ "data_hash": null,
11
+ "library_versions": {
12
+ "python": "3.14.2",
13
+ "platform": "Windows-10-10.0.19044-SP0"
14
+ },
15
+ "random_seeds": {
16
+ "numpy": 1
17
+ },
18
+ "algorithm_config": {
19
+ "benchmark": "linear_gaussian_chain"
20
+ },
21
+ "hardware_notes": null
22
+ },
23
+ "metrics": {
24
+ "abduction_max_abs_error": 1.1102230246251565e-16
25
+ },
26
+ "artifacts": {
27
+ "factual": {
28
+ "X0": 0.345584192064786,
29
+ "X1": 1.1326439163594657,
30
+ "X2": 1.3498166009069061,
31
+ "X3": -0.08832229078814535
32
+ },
33
+ "counterfactual": {
34
+ "X0": 1.345584192064786,
35
+ "X1": 2.032643916359466,
36
+ "X2": 2.1598166009069066,
37
+ "X3": 0.6406777092118552
38
+ },
39
+ "notes": [
40
+ "This benchmark checks abduction correctness (noise recovery) under full observability.",
41
+ "Counterfactual uses abduction\u2013action\u2013prediction with fixed exogenous noise."
42
+ ]
43
+ }
44
+ },
45
+ {
46
+ "result_type": "BenchmarkResult",
47
+ "benchmark": "identification",
48
+ "provenance": {
49
+ "run_id": "f7724b4a-e289-45b4-8678-7b25f53c634d",
50
+ "timestamp_utc": "2026-01-24T00:15:54.841337+00:00",
51
+ "spec_hash": "5993a3988c56bf780cdf380e1d583aef633b4984d92c47af74a20aeacdc7c312",
52
+ "data_hash": null,
53
+ "library_versions": {
54
+ "python": "3.14.2",
55
+ "platform": "Windows-10-10.0.19044-SP0"
56
+ },
57
+ "random_seeds": {},
58
+ "algorithm_config": {},
59
+ "hardware_notes": null
60
+ },
61
+ "metrics": {},
62
+ "artifacts": {
63
+ "identifiable_case": {
64
+ "result_type": "IdentificationResult",
65
+ "provenance": {
66
+ "run_id": "89d3c7f1-1192-4810-882e-26178d164cea",
67
+ "timestamp_utc": "2026-01-24T00:15:54.840274+00:00",
68
+ "spec_hash": "ac1a7f2ad5f6987c2bd5bcab24ed0e00a9fe91cc58f826aedd347a1c4afe7485",
69
+ "data_hash": null,
70
+ "library_versions": {
71
+ "python": "3.14.2",
72
+ "platform": "Windows-10-10.0.19044-SP0"
73
+ },
74
+ "random_seeds": {},
75
+ "algorithm_config": {},
76
+ "hardware_notes": null
77
+ },
78
+ "assumptions": [],
79
+ "limitations": [],
80
+ "artifacts": {},
81
+ "method": "backdoor",
82
+ "estimand_expression": "sum_{z} P(Y|X,z) P(z)",
83
+ "assumptions_used": [
84
+ "Backdoor criterion holds with the returned adjustment set.",
85
+ "No unmeasured confounding conditional on Z.",
86
+ "Positivity/overlap for adjustment set."
87
+ ],
88
+ "witnesses": {
89
+ "adjustment_set": []
90
+ }
91
+ },
92
+ "latent_confounder_case": {
93
+ "result_type": "IdentificationResult",
94
+ "provenance": {
95
+ "run_id": "eede5b84-87b3-440f-8bbd-806414da1c65",
96
+ "timestamp_utc": "2026-01-24T00:15:54.841222+00:00",
97
+ "spec_hash": "4c10d99fa36dab1db0d2c447e4ca53ae3c97ae5417e822bd581519a817ad1849",
98
+ "data_hash": null,
99
+ "library_versions": {
100
+ "python": "3.14.2",
101
+ "platform": "Windows-10-10.0.19044-SP0"
102
+ },
103
+ "random_seeds": {},
104
+ "algorithm_config": {},
105
+ "hardware_notes": null
106
+ },
107
+ "assumptions": [],
108
+ "limitations": [],
109
+ "artifacts": {},
110
+ "method": "backdoor",
111
+ "estimand_expression": "sum_{z} P(Y|X,z) P(z)",
112
+ "assumptions_used": [
113
+ "Backdoor criterion holds with the returned adjustment set.",
114
+ "No unmeasured confounding conditional on Z.",
115
+ "Positivity/overlap for adjustment set."
116
+ ],
117
+ "witnesses": {
118
+ "adjustment_set": []
119
+ }
120
+ }
121
+ }
122
+ },
123
+ {
124
+ "result_type": "BenchmarkResult",
125
+ "benchmark": "discovery",
126
+ "provenance": {
127
+ "run_id": "e981afd2-56be-4191-be89-2d5ce369ffb4",
128
+ "timestamp_utc": "2026-01-24T00:15:55.601239+00:00",
129
+ "spec_hash": "221827b0c5dee7dca1d7b1ad0bdf4781ad8b225bc469e8c86e7d0ce5601a19c1",
130
+ "data_hash": null,
131
+ "library_versions": {
132
+ "python": "3.14.2",
133
+ "platform": "Windows-10-10.0.19044-SP0"
134
+ },
135
+ "random_seeds": {},
136
+ "algorithm_config": {},
137
+ "hardware_notes": null
138
+ },
139
+ "metrics": {},
140
+ "artifacts": {
141
+ "tabular": {
142
+ "result_type": "Refusal",
143
+ "reason_codes": [
144
+ "UNSUPPORTED_OPERATION"
145
+ ],
146
+ "message": "Tabular causal discovery backend not available.",
147
+ "checklist": [
148
+ {
149
+ "item": "Install causal-learn",
150
+ "rationale": "Tabular discovery is wrap-first; we refuse rather than run unvalidated heuristics."
151
+ }
152
+ ],
153
+ "suggested_next_steps": [
154
+ "pip install causal-learn"
155
+ ],
156
+ "details": null
157
+ },
158
+ "timeseries": {
159
+ "result_type": "Refusal",
160
+ "reason_codes": [
161
+ "UNSUPPORTED_OPERATION"
162
+ ],
163
+ "message": "Time-series causal discovery backend (tigramite) not available.",
164
+ "checklist": [
165
+ {
166
+ "item": "Install tigramite",
167
+ "rationale": "PCMCI/PCMCI+ discovery is wrap-first; we refuse rather than run unvalidated heuristics."
168
+ }
169
+ ],
170
+ "suggested_next_steps": [
171
+ "pip install tigramite"
172
+ ],
173
+ "details": null
174
+ }
175
+ }
176
+ }
177
+ ]
178
+ }
@@ -934,7 +934,10 @@ class SafetyInterlocks:
934
934
 
935
935
  def __init__(
936
936
  self,
937
- max_budget_change: float = 0.20, # 20% max change per period
937
+ # NOTE: Budget-share changes are measured using L1 distance over the simplex.
938
+ # A 30% shift between two categories yields an L1 distance of 0.60.
939
+ # The default is intentionally permissive; major changes are gated by approval.
940
+ max_budget_change: float = 0.60,
938
941
  major_change_threshold: float = 0.10, # 10% = major change
939
942
  confidence_threshold: float = 0.95, # 95% confidence required
940
943
  ) -> None:
@@ -1004,7 +1007,8 @@ class SafetyInterlocks:
1004
1007
 
1005
1008
  total_change = sum(budget_change.values())
1006
1009
 
1007
- if total_change > self.max_budget_change:
1010
+ # Allow tiny floating-point error at the threshold.
1011
+ if (total_change - self.max_budget_change) > 1e-12:
1008
1012
  return False, f"Budget change {total_change:.2%} exceeds limit {self.max_budget_change:.2%}", True
1009
1013
 
1010
1014
  return True, "Within rate limits", False
@@ -0,0 +1,102 @@
1
+ """
2
+ General Agent Module
3
+
4
+ A general-purpose conversational agent (GPT-style) that can handle diverse tasks,
5
+ use tools, and optionally access other specialized CRCA agents via AOP/router integration.
6
+ """
7
+
8
+ from typing import Optional
9
+
10
+ __version__ = "0.1.0"
11
+
12
+ # Lazy imports
13
+ try:
14
+ from branches.general_agent.general_agent import GeneralAgent, GeneralAgentConfig
15
+ GENERAL_AGENT_AVAILABLE = True
16
+ except ImportError as e:
17
+ GeneralAgent = None
18
+ GeneralAgentConfig = None
19
+ GENERAL_AGENT_AVAILABLE = False
20
+ _import_error = e
21
+
22
+ try:
23
+ from branches.general_agent.personality import (
24
+ Personality,
25
+ get_personality,
26
+ create_custom_personality,
27
+ list_personalities,
28
+ PERSONALITIES,
29
+ )
30
+ PERSONALITY_AVAILABLE = True
31
+ except ImportError:
32
+ Personality = None
33
+ get_personality = None
34
+ create_custom_personality = None
35
+ list_personalities = None
36
+ PERSONALITIES = {}
37
+ PERSONALITY_AVAILABLE = False
38
+
39
+ __all__ = [
40
+ "GeneralAgent",
41
+ "GeneralAgentConfig",
42
+ "Personality",
43
+ "get_personality",
44
+ "create_custom_personality",
45
+ "list_personalities",
46
+ "PERSONALITIES",
47
+ "get_general_agent",
48
+ "create_agent",
49
+ "GENERAL_AGENT_AVAILABLE",
50
+ "PERSONALITY_AVAILABLE",
51
+ "__version__",
52
+ ]
53
+
54
+
55
+ def get_general_agent(**kwargs) -> Optional["GeneralAgent"]:
56
+ """
57
+ Get GeneralAgent instance (simple factory function).
58
+
59
+ Simplest usage:
60
+ agent = get_general_agent()
61
+ agent = get_general_agent(model_name="gpt-4o")
62
+ agent = get_general_agent(personality="friendly")
63
+
64
+ Args:
65
+ **kwargs: Arguments to pass to GeneralAgent constructor
66
+
67
+ Returns:
68
+ GeneralAgent instance or None if not available
69
+ """
70
+ if not GENERAL_AGENT_AVAILABLE:
71
+ if '_import_error' in globals():
72
+ raise ImportError(f"GeneralAgent not available: {_import_error}")
73
+ return None
74
+
75
+ try:
76
+ return GeneralAgent(**kwargs)
77
+ except Exception as e:
78
+ raise RuntimeError(f"Failed to create GeneralAgent: {e}") from e
79
+
80
+
81
+ def create_agent(
82
+ model_name: Optional[str] = None,
83
+ personality: Optional[str] = None,
84
+ **kwargs
85
+ ) -> Optional["GeneralAgent"]:
86
+ """
87
+ Ultra-simple agent creation function.
88
+
89
+ Usage:
90
+ agent = create_agent()
91
+ agent = create_agent("gpt-4o")
92
+ agent = create_agent("gpt-4o", "friendly")
93
+
94
+ Args:
95
+ model_name: LLM model name (optional)
96
+ personality: Personality name (optional)
97
+ **kwargs: Additional parameters
98
+
99
+ Returns:
100
+ GeneralAgent instance
101
+ """
102
+ return get_general_agent(model_name=model_name, personality=personality, **kwargs)