crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CRCA.py +172 -7
- MODEL_CARD.md +53 -0
- PKG-INFO +8 -2
- RELEASE_NOTES.md +17 -0
- STABILITY.md +19 -0
- architecture/hybrid/consistency_engine.py +362 -0
- architecture/hybrid/conversation_manager.py +421 -0
- architecture/hybrid/explanation_generator.py +452 -0
- architecture/hybrid/few_shot_learner.py +533 -0
- architecture/hybrid/graph_compressor.py +286 -0
- architecture/hybrid/hybrid_agent.py +4398 -0
- architecture/hybrid/language_compiler.py +623 -0
- architecture/hybrid/main,py +0 -0
- architecture/hybrid/reasoning_tracker.py +322 -0
- architecture/hybrid/self_verifier.py +524 -0
- architecture/hybrid/task_decomposer.py +567 -0
- architecture/hybrid/text_corrector.py +341 -0
- benchmark_results/crca_core_benchmarks.json +178 -0
- branches/crca_sd/crca_sd_realtime.py +6 -2
- branches/general_agent/__init__.py +102 -0
- branches/general_agent/general_agent.py +1400 -0
- branches/general_agent/personality.py +169 -0
- branches/general_agent/utils/__init__.py +19 -0
- branches/general_agent/utils/prompt_builder.py +170 -0
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
- crca_core/__init__.py +35 -0
- crca_core/benchmarks/__init__.py +14 -0
- crca_core/benchmarks/synthetic_scm.py +103 -0
- crca_core/core/__init__.py +23 -0
- crca_core/core/api.py +120 -0
- crca_core/core/estimate.py +208 -0
- crca_core/core/godclass.py +72 -0
- crca_core/core/intervention_design.py +174 -0
- crca_core/core/lifecycle.py +48 -0
- crca_core/discovery/__init__.py +9 -0
- crca_core/discovery/tabular.py +193 -0
- crca_core/identify/__init__.py +171 -0
- crca_core/identify/backdoor.py +39 -0
- crca_core/identify/frontdoor.py +48 -0
- crca_core/identify/graph.py +106 -0
- crca_core/identify/id_algorithm.py +43 -0
- crca_core/identify/iv.py +48 -0
- crca_core/models/__init__.py +67 -0
- crca_core/models/provenance.py +56 -0
- crca_core/models/refusal.py +39 -0
- crca_core/models/result.py +83 -0
- crca_core/models/spec.py +151 -0
- crca_core/models/validation.py +68 -0
- crca_core/scm/__init__.py +9 -0
- crca_core/scm/linear_gaussian.py +198 -0
- crca_core/timeseries/__init__.py +6 -0
- crca_core/timeseries/pcmci.py +181 -0
- crca_llm/__init__.py +12 -0
- crca_llm/client.py +85 -0
- crca_llm/coauthor.py +118 -0
- crca_llm/orchestrator.py +289 -0
- crca_llm/types.py +21 -0
- crca_reasoning/__init__.py +16 -0
- crca_reasoning/critique.py +54 -0
- crca_reasoning/godclass.py +206 -0
- crca_reasoning/memory.py +24 -0
- crca_reasoning/rationale.py +10 -0
- crca_reasoning/react_controller.py +81 -0
- crca_reasoning/tool_router.py +97 -0
- crca_reasoning/types.py +40 -0
- crca_sd/__init__.py +15 -0
- crca_sd/crca_sd_core.py +2 -0
- crca_sd/crca_sd_governance.py +2 -0
- crca_sd/crca_sd_mpc.py +2 -0
- crca_sd/crca_sd_realtime.py +2 -0
- crca_sd/crca_sd_tui.py +2 -0
- cuda-keyring_1.1-1_all.deb +0 -0
- cuda-keyring_1.1-1_all.deb.1 +0 -0
- docs/IMAGE_ANNOTATION_USAGE.md +539 -0
- docs/INSTALL_DEEPSPEED.md +125 -0
- docs/api/branches/crca-cg.md +19 -0
- docs/api/branches/crca-q.md +27 -0
- docs/api/branches/crca-sd.md +37 -0
- docs/api/branches/general-agent.md +24 -0
- docs/api/branches/overview.md +19 -0
- docs/api/crca/agent-methods.md +62 -0
- docs/api/crca/operations.md +79 -0
- docs/api/crca/overview.md +32 -0
- docs/api/image-annotation/engine.md +52 -0
- docs/api/image-annotation/overview.md +17 -0
- docs/api/schemas/annotation.md +34 -0
- docs/api/schemas/core-schemas.md +82 -0
- docs/api/schemas/overview.md +32 -0
- docs/api/schemas/policy.md +30 -0
- docs/api/utils/conversation.md +22 -0
- docs/api/utils/graph-reasoner.md +32 -0
- docs/api/utils/overview.md +21 -0
- docs/api/utils/router.md +19 -0
- docs/api/utils/utilities.md +97 -0
- docs/architecture/causal-graphs.md +41 -0
- docs/architecture/data-flow.md +29 -0
- docs/architecture/design-principles.md +33 -0
- docs/architecture/hybrid-agent/components.md +38 -0
- docs/architecture/hybrid-agent/consistency.md +26 -0
- docs/architecture/hybrid-agent/overview.md +44 -0
- docs/architecture/hybrid-agent/reasoning.md +22 -0
- docs/architecture/llm-integration.md +26 -0
- docs/architecture/modular-structure.md +37 -0
- docs/architecture/overview.md +69 -0
- docs/architecture/policy-engine-arch.md +29 -0
- docs/branches/crca-cg/corposwarm.md +39 -0
- docs/branches/crca-cg/esg-scoring.md +30 -0
- docs/branches/crca-cg/multi-agent.md +35 -0
- docs/branches/crca-cg/overview.md +40 -0
- docs/branches/crca-q/alternative-data.md +55 -0
- docs/branches/crca-q/architecture.md +71 -0
- docs/branches/crca-q/backtesting.md +45 -0
- docs/branches/crca-q/causal-engine.md +33 -0
- docs/branches/crca-q/execution.md +39 -0
- docs/branches/crca-q/market-data.md +60 -0
- docs/branches/crca-q/overview.md +58 -0
- docs/branches/crca-q/philosophy.md +60 -0
- docs/branches/crca-q/portfolio-optimization.md +66 -0
- docs/branches/crca-q/risk-management.md +102 -0
- docs/branches/crca-q/setup.md +65 -0
- docs/branches/crca-q/signal-generation.md +61 -0
- docs/branches/crca-q/signal-validation.md +43 -0
- docs/branches/crca-sd/core.md +84 -0
- docs/branches/crca-sd/governance.md +53 -0
- docs/branches/crca-sd/mpc-solver.md +65 -0
- docs/branches/crca-sd/overview.md +59 -0
- docs/branches/crca-sd/realtime.md +28 -0
- docs/branches/crca-sd/tui.md +20 -0
- docs/branches/general-agent/overview.md +37 -0
- docs/branches/general-agent/personality.md +36 -0
- docs/branches/general-agent/prompt-builder.md +30 -0
- docs/changelog/index.md +79 -0
- docs/contributing/code-style.md +69 -0
- docs/contributing/documentation.md +43 -0
- docs/contributing/overview.md +29 -0
- docs/contributing/testing.md +29 -0
- docs/core/crcagent/async-operations.md +65 -0
- docs/core/crcagent/automatic-extraction.md +107 -0
- docs/core/crcagent/batch-prediction.md +80 -0
- docs/core/crcagent/bayesian-inference.md +60 -0
- docs/core/crcagent/causal-graph.md +92 -0
- docs/core/crcagent/counterfactuals.md +96 -0
- docs/core/crcagent/deterministic-simulation.md +78 -0
- docs/core/crcagent/dual-mode-operation.md +82 -0
- docs/core/crcagent/initialization.md +88 -0
- docs/core/crcagent/optimization.md +65 -0
- docs/core/crcagent/overview.md +63 -0
- docs/core/crcagent/time-series.md +57 -0
- docs/core/schemas/annotation.md +30 -0
- docs/core/schemas/core-schemas.md +82 -0
- docs/core/schemas/overview.md +30 -0
- docs/core/schemas/policy.md +41 -0
- docs/core/templates/base-agent.md +31 -0
- docs/core/templates/feature-mixins.md +31 -0
- docs/core/templates/overview.md +29 -0
- docs/core/templates/templates-guide.md +75 -0
- docs/core/tools/mcp-client.md +34 -0
- docs/core/tools/overview.md +24 -0
- docs/core/utils/conversation.md +27 -0
- docs/core/utils/graph-reasoner.md +29 -0
- docs/core/utils/overview.md +27 -0
- docs/core/utils/router.md +27 -0
- docs/core/utils/utilities.md +97 -0
- docs/css/custom.css +84 -0
- docs/examples/basic-usage.md +57 -0
- docs/examples/general-agent/general-agent-examples.md +50 -0
- docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
- docs/examples/image-annotation/image-annotation-examples.md +54 -0
- docs/examples/integration/integration-examples.md +58 -0
- docs/examples/overview.md +37 -0
- docs/examples/trading/trading-examples.md +46 -0
- docs/features/causal-reasoning/advanced-topics.md +101 -0
- docs/features/causal-reasoning/counterfactuals.md +43 -0
- docs/features/causal-reasoning/do-calculus.md +50 -0
- docs/features/causal-reasoning/overview.md +47 -0
- docs/features/causal-reasoning/structural-models.md +52 -0
- docs/features/hybrid-agent/advanced-components.md +55 -0
- docs/features/hybrid-agent/core-components.md +64 -0
- docs/features/hybrid-agent/overview.md +34 -0
- docs/features/image-annotation/engine.md +82 -0
- docs/features/image-annotation/features.md +113 -0
- docs/features/image-annotation/integration.md +75 -0
- docs/features/image-annotation/overview.md +53 -0
- docs/features/image-annotation/quickstart.md +73 -0
- docs/features/policy-engine/doctrine-ledger.md +105 -0
- docs/features/policy-engine/monitoring.md +44 -0
- docs/features/policy-engine/mpc-control.md +89 -0
- docs/features/policy-engine/overview.md +46 -0
- docs/getting-started/configuration.md +225 -0
- docs/getting-started/first-agent.md +164 -0
- docs/getting-started/installation.md +144 -0
- docs/getting-started/quickstart.md +137 -0
- docs/index.md +118 -0
- docs/js/mathjax.js +13 -0
- docs/lrm/discovery_proof_notes.md +25 -0
- docs/lrm/finetune_full.md +83 -0
- docs/lrm/math_appendix.md +120 -0
- docs/lrm/overview.md +32 -0
- docs/mkdocs.yml +238 -0
- docs/stylesheets/extra.css +21 -0
- docs_generated/crca_core/CounterfactualResult.md +12 -0
- docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
- docs_generated/crca_core/DraftSpec.md +13 -0
- docs_generated/crca_core/EstimateResult.md +13 -0
- docs_generated/crca_core/IdentificationResult.md +17 -0
- docs_generated/crca_core/InterventionDesignResult.md +12 -0
- docs_generated/crca_core/LockedSpec.md +15 -0
- docs_generated/crca_core/RefusalResult.md +12 -0
- docs_generated/crca_core/ValidationReport.md +9 -0
- docs_generated/crca_core/index.md +13 -0
- examples/general_agent_example.py +277 -0
- examples/general_agent_quickstart.py +202 -0
- examples/general_agent_simple.py +92 -0
- examples/hybrid_agent_auto_extraction.py +84 -0
- examples/hybrid_agent_dictionary_demo.py +104 -0
- examples/hybrid_agent_enhanced.py +179 -0
- examples/hybrid_agent_general_knowledge.py +107 -0
- examples/image_annotation_quickstart.py +328 -0
- examples/test_hybrid_fixes.py +77 -0
- image_annotation/__init__.py +27 -0
- image_annotation/annotation_engine.py +2593 -0
- install_cuda_wsl2.sh +59 -0
- install_deepspeed.sh +56 -0
- install_deepspeed_simple.sh +87 -0
- mkdocs.yml +252 -0
- ollama/Modelfile +8 -0
- prompts/__init__.py +2 -1
- prompts/default_crca.py +9 -1
- prompts/general_agent.py +227 -0
- prompts/image_annotation.py +56 -0
- pyproject.toml +17 -2
- requirements-docs.txt +10 -0
- requirements.txt +21 -2
- schemas/__init__.py +26 -1
- schemas/annotation.py +222 -0
- schemas/conversation.py +193 -0
- schemas/hybrid.py +211 -0
- schemas/reasoning.py +276 -0
- schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
- schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
- schemas_export/crca_core/DraftSpec.schema.json +635 -0
- schemas_export/crca_core/EstimateResult.schema.json +113 -0
- schemas_export/crca_core/IdentificationResult.schema.json +145 -0
- schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
- schemas_export/crca_core/LockedSpec.schema.json +646 -0
- schemas_export/crca_core/RefusalResult.schema.json +90 -0
- schemas_export/crca_core/ValidationReport.schema.json +62 -0
- scripts/build_lrm_dataset.py +80 -0
- scripts/export_crca_core_schemas.py +54 -0
- scripts/export_hf_lrm.py +37 -0
- scripts/export_ollama_gguf.py +45 -0
- scripts/generate_changelog.py +157 -0
- scripts/generate_crca_core_docs_from_schemas.py +86 -0
- scripts/run_crca_core_benchmarks.py +163 -0
- scripts/run_full_finetune.py +198 -0
- scripts/run_lrm_eval.py +31 -0
- templates/graph_management.py +29 -0
- tests/conftest.py +9 -0
- tests/test_core.py +2 -3
- tests/test_crca_core_discovery_tabular.py +15 -0
- tests/test_crca_core_estimate_dowhy.py +36 -0
- tests/test_crca_core_identify.py +18 -0
- tests/test_crca_core_intervention_design.py +36 -0
- tests/test_crca_core_linear_gaussian_scm.py +69 -0
- tests/test_crca_core_spec.py +25 -0
- tests/test_crca_core_timeseries_pcmci.py +15 -0
- tests/test_crca_llm_coauthor.py +12 -0
- tests/test_crca_llm_orchestrator.py +80 -0
- tests/test_hybrid_agent_llm_enhanced.py +556 -0
- tests/test_image_annotation_demo.py +376 -0
- tests/test_image_annotation_operational.py +408 -0
- tests/test_image_annotation_unit.py +551 -0
- tests/test_training_moe.py +13 -0
- training/__init__.py +42 -0
- training/datasets.py +140 -0
- training/deepspeed_zero2_0_5b.json +22 -0
- training/deepspeed_zero2_1_5b.json +22 -0
- training/deepspeed_zero3_0_5b.json +28 -0
- training/deepspeed_zero3_14b.json +28 -0
- training/deepspeed_zero3_h100_3gpu.json +20 -0
- training/deepspeed_zero3_offload.json +28 -0
- training/eval.py +92 -0
- training/finetune.py +516 -0
- training/public_datasets.py +89 -0
- training_data/react_train.jsonl +7473 -0
- utils/agent_discovery.py +311 -0
- utils/batch_processor.py +317 -0
- utils/conversation.py +78 -0
- utils/edit_distance.py +118 -0
- utils/formatter.py +33 -0
- utils/graph_reasoner.py +530 -0
- utils/rate_limiter.py +283 -0
- utils/router.py +2 -2
- utils/tool_discovery.py +307 -0
- webui/__init__.py +10 -0
- webui/app.py +229 -0
- webui/config.py +104 -0
- webui/static/css/style.css +332 -0
- webui/static/js/main.js +284 -0
- webui/templates/index.html +42 -0
- tests/test_crca_excel.py +0 -166
- tests/test_data_broker.py +0 -424
- tests/test_palantir.py +0 -349
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,623 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Language compilation system for CRCA hybrid agent.
|
|
3
|
+
|
|
4
|
+
Provides three-layer language compilation:
|
|
5
|
+
1. Lexical layer: Words and phrases (synonyms, hypernyms, vocabulary)
|
|
6
|
+
2. Grammatical layer: Sentence structure (dependency grammar, causal patterns)
|
|
7
|
+
3. Pragmatic layer: Tone and style (confidence-based language decisions)
|
|
8
|
+
|
|
9
|
+
All language knowledge is compiled into queryable structures at initialization,
|
|
10
|
+
not parsed at runtime.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Dict, List, Optional, Set, Tuple, Any
|
|
14
|
+
import logging
|
|
15
|
+
import re
|
|
16
|
+
import time
|
|
17
|
+
from collections import defaultdict
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
import requests
|
|
21
|
+
REQUESTS_AVAILABLE = True
|
|
22
|
+
except ImportError:
|
|
23
|
+
REQUESTS_AVAILABLE = False
|
|
24
|
+
|
|
25
|
+
from schemas.hybrid import LexicalGraph, SynonymSet, DependencyTree, CausalStructure
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class LexicalCompiler:
|
|
31
|
+
"""
|
|
32
|
+
Compiles lexical knowledge (words and phrases) into queryable structures.
|
|
33
|
+
|
|
34
|
+
Features:
|
|
35
|
+
- Synonym sets and hypernym chains
|
|
36
|
+
- Controlled vocabulary expansion
|
|
37
|
+
- Term normalization (canonical forms)
|
|
38
|
+
- Vocabulary validation
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, enable_dictionary: bool = True, cache_enabled: bool = True):
|
|
42
|
+
"""
|
|
43
|
+
Initialize lexical compiler with dictionary integration.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
enable_dictionary: Enable online dictionary lookups
|
|
47
|
+
cache_enabled: Enable caching of dictionary lookups
|
|
48
|
+
"""
|
|
49
|
+
self.lexical_graph = LexicalGraph()
|
|
50
|
+
self.enable_dictionary = enable_dictionary and REQUESTS_AVAILABLE
|
|
51
|
+
self.cache_enabled = cache_enabled
|
|
52
|
+
|
|
53
|
+
# Dictionary cache to avoid repeated API calls
|
|
54
|
+
self._dictionary_cache: Dict[str, Dict[str, Any]] = {}
|
|
55
|
+
self._dictionary_cache_timestamps: Dict[str, float] = {}
|
|
56
|
+
self._cache_ttl = 86400 # 24 hours
|
|
57
|
+
|
|
58
|
+
# Dictionary API endpoint (Free Dictionary API - no key required)
|
|
59
|
+
self.dictionary_api_url = "https://api.dictionaryapi.dev/api/v2/entries/en"
|
|
60
|
+
|
|
61
|
+
# Rate limiting
|
|
62
|
+
self._last_request_time = 0.0
|
|
63
|
+
self._min_request_interval = 0.1 # 100ms between requests
|
|
64
|
+
|
|
65
|
+
self._build_basic_vocabulary()
|
|
66
|
+
|
|
67
|
+
def _build_basic_vocabulary(self) -> None:
|
|
68
|
+
"""Build basic vocabulary from common causal terms."""
|
|
69
|
+
# Basic causal vocabulary
|
|
70
|
+
causal_terms = {
|
|
71
|
+
"cause": {"cause", "causes", "caused", "causing", "causation"},
|
|
72
|
+
"effect": {"effect", "effects", "affected", "affecting", "affects"},
|
|
73
|
+
"influence": {"influence", "influences", "influenced", "influencing"},
|
|
74
|
+
"determine": {"determine", "determines", "determined", "determining"},
|
|
75
|
+
"depend": {"depend", "depends", "depended", "depending", "dependent"},
|
|
76
|
+
"lead": {"lead", "leads", "led", "leading"},
|
|
77
|
+
"result": {"result", "results", "resulted", "resulting"},
|
|
78
|
+
"impact": {"impact", "impacts", "impacted", "impacting"},
|
|
79
|
+
"drive": {"drive", "drives", "drove", "driving", "driven"},
|
|
80
|
+
"control": {"control", "controls", "controlled", "controlling"}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
for canonical, synonyms in causal_terms.items():
|
|
84
|
+
self.add_synonym_set(canonical, synonyms)
|
|
85
|
+
|
|
86
|
+
def compile_lexicon(self, sources: List[str]) -> LexicalGraph:
|
|
87
|
+
"""
|
|
88
|
+
Compile lexicon from multiple sources.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
sources: List of source identifiers (for future expansion)
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Compiled LexicalGraph
|
|
95
|
+
"""
|
|
96
|
+
# For now, use built-in vocabulary
|
|
97
|
+
# In future, can load from dictionaries, WordNet, etc.
|
|
98
|
+
return self.lexical_graph
|
|
99
|
+
|
|
100
|
+
def add_synonym_set(self, canonical: str, synonyms: Set[str]) -> None:
|
|
101
|
+
"""
|
|
102
|
+
Add a set of synonyms.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
canonical: Canonical form of the term
|
|
106
|
+
synonyms: Set of synonymous terms
|
|
107
|
+
"""
|
|
108
|
+
self.lexical_graph.synonym_sets[canonical] = synonyms.copy()
|
|
109
|
+
self.lexical_graph.vocabulary.add(canonical)
|
|
110
|
+
self.lexical_graph.vocabulary.update(synonyms)
|
|
111
|
+
|
|
112
|
+
def expand_vocabulary(self, word: str) -> Set[str]:
|
|
113
|
+
"""
|
|
114
|
+
Expand vocabulary for a word (get synonyms and related terms).
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
word: Word to expand
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Set of related terms (including the word itself)
|
|
121
|
+
"""
|
|
122
|
+
expanded = {word}
|
|
123
|
+
word_lower = word.lower()
|
|
124
|
+
|
|
125
|
+
# Find canonical form
|
|
126
|
+
for canonical, synonyms in self.lexical_graph.synonym_sets.items():
|
|
127
|
+
if word_lower == canonical.lower() or word_lower in {s.lower() for s in synonyms}:
|
|
128
|
+
expanded.add(canonical)
|
|
129
|
+
expanded.update(synonyms)
|
|
130
|
+
|
|
131
|
+
return expanded
|
|
132
|
+
|
|
133
|
+
def normalize_term(self, term: str) -> str:
|
|
134
|
+
"""
|
|
135
|
+
Normalize a term to its canonical form.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
term: Term to normalize
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Canonical form of the term
|
|
142
|
+
"""
|
|
143
|
+
term_lower = term.lower()
|
|
144
|
+
|
|
145
|
+
# Check if term is already canonical
|
|
146
|
+
if term_lower in {c.lower() for c in self.lexical_graph.synonym_sets.keys()}:
|
|
147
|
+
return term_lower
|
|
148
|
+
|
|
149
|
+
# Find canonical form in synonym sets
|
|
150
|
+
for canonical, synonyms in self.lexical_graph.synonym_sets.items():
|
|
151
|
+
if term_lower == canonical.lower():
|
|
152
|
+
return canonical
|
|
153
|
+
if term_lower in {s.lower() for s in synonyms}:
|
|
154
|
+
return canonical
|
|
155
|
+
|
|
156
|
+
# If not found, return original (lowercased)
|
|
157
|
+
return term_lower
|
|
158
|
+
|
|
159
|
+
def validate_vocabulary(self, term: str) -> bool:
|
|
160
|
+
"""
|
|
161
|
+
Validate if a term is in the vocabulary.
|
|
162
|
+
|
|
163
|
+
Uses both local vocabulary and online dictionary if enabled.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
term: Term to validate
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
True if term is in vocabulary, False otherwise
|
|
170
|
+
"""
|
|
171
|
+
term_lower = term.lower()
|
|
172
|
+
|
|
173
|
+
# Check local vocabulary first
|
|
174
|
+
if term_lower in {t.lower() for t in self.lexical_graph.vocabulary}:
|
|
175
|
+
return True
|
|
176
|
+
|
|
177
|
+
# Check online dictionary if enabled
|
|
178
|
+
if self.enable_dictionary:
|
|
179
|
+
word_info = self._lookup_dictionary(term)
|
|
180
|
+
if word_info and word_info.get('found'):
|
|
181
|
+
# Add to vocabulary cache
|
|
182
|
+
self.lexical_graph.vocabulary.add(term_lower)
|
|
183
|
+
return True
|
|
184
|
+
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
def _lookup_dictionary(self, word: str) -> Optional[Dict[str, Any]]:
|
|
188
|
+
"""
|
|
189
|
+
Look up a word in the online dictionary.
|
|
190
|
+
|
|
191
|
+
Uses Free Dictionary API (dictionaryapi.dev) - no API key required.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
word: Word to look up
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
Dictionary with word information or None if not found
|
|
198
|
+
"""
|
|
199
|
+
if not self.enable_dictionary:
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
word_lower = word.lower().strip()
|
|
203
|
+
|
|
204
|
+
# Check cache first
|
|
205
|
+
if self.cache_enabled and word_lower in self._dictionary_cache:
|
|
206
|
+
cache_time = self._dictionary_cache_timestamps.get(word_lower, 0)
|
|
207
|
+
if time.time() - cache_time < self._cache_ttl:
|
|
208
|
+
return self._dictionary_cache[word_lower]
|
|
209
|
+
|
|
210
|
+
# Rate limiting
|
|
211
|
+
current_time = time.time()
|
|
212
|
+
time_since_last = current_time - self._last_request_time
|
|
213
|
+
if time_since_last < self._min_request_interval:
|
|
214
|
+
time.sleep(self._min_request_interval - time_since_last)
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
# Free Dictionary API - no API key needed
|
|
218
|
+
url = f"{self.dictionary_api_url}/{word_lower}"
|
|
219
|
+
response = requests.get(url, timeout=5)
|
|
220
|
+
self._last_request_time = time.time()
|
|
221
|
+
|
|
222
|
+
if response.status_code == 200:
|
|
223
|
+
data = response.json()
|
|
224
|
+
if isinstance(data, list) and len(data) > 0:
|
|
225
|
+
# Extract word information
|
|
226
|
+
word_data = data[0]
|
|
227
|
+
|
|
228
|
+
word_info = {
|
|
229
|
+
'found': True,
|
|
230
|
+
'word': word_data.get('word', word_lower),
|
|
231
|
+
'phonetic': word_data.get('phonetic', ''),
|
|
232
|
+
'meanings': [],
|
|
233
|
+
'synonyms': set(),
|
|
234
|
+
'antonyms': set(),
|
|
235
|
+
'part_of_speech': []
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
# Extract meanings, synonyms, antonyms
|
|
239
|
+
for meaning in word_data.get('meanings', []):
|
|
240
|
+
pos = meaning.get('partOfSpeech', '')
|
|
241
|
+
word_info['part_of_speech'].append(pos)
|
|
242
|
+
|
|
243
|
+
meaning_entry = {
|
|
244
|
+
'part_of_speech': pos,
|
|
245
|
+
'definitions': [],
|
|
246
|
+
'synonyms': [],
|
|
247
|
+
'antonyms': []
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
for definition in meaning.get('definitions', []):
|
|
251
|
+
meaning_entry['definitions'].append({
|
|
252
|
+
'definition': definition.get('definition', ''),
|
|
253
|
+
'example': definition.get('example', '')
|
|
254
|
+
})
|
|
255
|
+
|
|
256
|
+
# Collect synonyms and antonyms
|
|
257
|
+
for syn in meaning.get('synonyms', []):
|
|
258
|
+
word_info['synonyms'].add(syn.lower())
|
|
259
|
+
meaning_entry['synonyms'].append(syn.lower())
|
|
260
|
+
|
|
261
|
+
for ant in meaning.get('antonyms', []):
|
|
262
|
+
word_info['antonyms'].add(ant.lower())
|
|
263
|
+
meaning_entry['antonyms'].append(ant.lower())
|
|
264
|
+
|
|
265
|
+
word_info['meanings'].append(meaning_entry)
|
|
266
|
+
|
|
267
|
+
# Cache the result
|
|
268
|
+
if self.cache_enabled:
|
|
269
|
+
self._dictionary_cache[word_lower] = word_info
|
|
270
|
+
self._dictionary_cache_timestamps[word_lower] = time.time()
|
|
271
|
+
|
|
272
|
+
return word_info
|
|
273
|
+
|
|
274
|
+
elif response.status_code == 404:
|
|
275
|
+
# Word not found
|
|
276
|
+
word_info = {'found': False, 'word': word_lower}
|
|
277
|
+
if self.cache_enabled:
|
|
278
|
+
self._dictionary_cache[word_lower] = word_info
|
|
279
|
+
self._dictionary_cache_timestamps[word_lower] = time.time()
|
|
280
|
+
return word_info
|
|
281
|
+
|
|
282
|
+
except requests.exceptions.RequestException as e:
|
|
283
|
+
logger.debug(f"Dictionary lookup failed for '{word}': {e}")
|
|
284
|
+
return None
|
|
285
|
+
except Exception as e:
|
|
286
|
+
logger.warning(f"Unexpected error in dictionary lookup for '{word}': {e}")
|
|
287
|
+
return None
|
|
288
|
+
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
def get_word_info(self, word: str) -> Optional[Dict[str, Any]]:
|
|
292
|
+
"""
|
|
293
|
+
Get comprehensive word information from dictionary.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
word: Word to look up
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Dictionary with word information (definitions, synonyms, part of speech, etc.)
|
|
300
|
+
"""
|
|
301
|
+
return self._lookup_dictionary(word)
|
|
302
|
+
|
|
303
|
+
def is_valid_word(self, word: str) -> bool:
|
|
304
|
+
"""
|
|
305
|
+
Check if a word exists in the dictionary.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
word: Word to check
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
True if word exists, False otherwise
|
|
312
|
+
"""
|
|
313
|
+
if not word or len(word.strip()) == 0:
|
|
314
|
+
return False
|
|
315
|
+
|
|
316
|
+
# Check cache first
|
|
317
|
+
word_lower = word.lower().strip()
|
|
318
|
+
if self.cache_enabled and word_lower in self._dictionary_cache:
|
|
319
|
+
cache_time = self._dictionary_cache_timestamps.get(word_lower, 0)
|
|
320
|
+
if time.time() - cache_time < self._cache_ttl:
|
|
321
|
+
cached = self._dictionary_cache[word_lower]
|
|
322
|
+
return cached.get('found', False)
|
|
323
|
+
|
|
324
|
+
# Look up in dictionary
|
|
325
|
+
word_info = self._lookup_dictionary(word)
|
|
326
|
+
return word_info is not None and word_info.get('found', False)
|
|
327
|
+
|
|
328
|
+
def get_synonyms(self, word: str) -> Set[str]:
|
|
329
|
+
"""
|
|
330
|
+
Get synonyms for a word using dictionary.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
word: Word to get synonyms for
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
Set of synonyms
|
|
337
|
+
"""
|
|
338
|
+
synonyms = set()
|
|
339
|
+
|
|
340
|
+
# Check local synonym sets first
|
|
341
|
+
local_synonyms = self.expand_vocabulary(word)
|
|
342
|
+
synonyms.update(local_synonyms)
|
|
343
|
+
|
|
344
|
+
# Look up in dictionary
|
|
345
|
+
if self.enable_dictionary:
|
|
346
|
+
word_info = self._lookup_dictionary(word)
|
|
347
|
+
if word_info and word_info.get('found'):
|
|
348
|
+
dict_synonyms = word_info.get('synonyms', set())
|
|
349
|
+
synonyms.update(dict_synonyms)
|
|
350
|
+
|
|
351
|
+
# Add to local synonym sets for future use
|
|
352
|
+
if dict_synonyms:
|
|
353
|
+
canonical = word_info.get('word', word.lower())
|
|
354
|
+
self.add_synonym_set(canonical, dict_synonyms)
|
|
355
|
+
|
|
356
|
+
return synonyms
|
|
357
|
+
|
|
358
|
+
def get_part_of_speech(self, word: str) -> List[str]:
|
|
359
|
+
"""
|
|
360
|
+
Get part of speech for a word.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
word: Word to check
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
List of parts of speech (noun, verb, adjective, etc.)
|
|
367
|
+
"""
|
|
368
|
+
if not self.enable_dictionary:
|
|
369
|
+
return []
|
|
370
|
+
|
|
371
|
+
word_info = self._lookup_dictionary(word)
|
|
372
|
+
if word_info and word_info.get('found'):
|
|
373
|
+
return word_info.get('part_of_speech', [])
|
|
374
|
+
|
|
375
|
+
return []
|
|
376
|
+
|
|
377
|
+
def is_action_verb(self, word: str) -> bool:
|
|
378
|
+
"""
|
|
379
|
+
Check if a word is an action verb using dictionary.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
word: Word to check
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
True if word is a verb, False otherwise
|
|
386
|
+
"""
|
|
387
|
+
pos_list = self.get_part_of_speech(word)
|
|
388
|
+
return 'verb' in pos_list
|
|
389
|
+
|
|
390
|
+
def is_noun(self, word: str) -> bool:
|
|
391
|
+
"""
|
|
392
|
+
Check if a word is a noun using dictionary.
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
word: Word to check
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
True if word is a noun, False otherwise
|
|
399
|
+
"""
|
|
400
|
+
pos_list = self.get_part_of_speech(word)
|
|
401
|
+
return 'noun' in pos_list
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
class GrammaticalCompiler:
|
|
405
|
+
"""
|
|
406
|
+
Compiles grammatical knowledge (sentence structure) into queryable patterns.
|
|
407
|
+
|
|
408
|
+
Features:
|
|
409
|
+
- Dependency grammar rules
|
|
410
|
+
- Causal expression patterns
|
|
411
|
+
- Active/passive transformation
|
|
412
|
+
- Tense and aspect for temporal causality
|
|
413
|
+
"""
|
|
414
|
+
|
|
415
|
+
def __init__(self):
|
|
416
|
+
"""Initialize grammatical compiler with pattern definitions."""
|
|
417
|
+
self.causal_patterns = self._build_causal_patterns()
|
|
418
|
+
self.dependency_patterns = self._build_dependency_patterns()
|
|
419
|
+
|
|
420
|
+
def _build_causal_patterns(self) -> List[Tuple[str, str, float]]:
|
|
421
|
+
"""
|
|
422
|
+
Build patterns for causal expressions.
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
List of (pattern, relation_type, confidence) tuples
|
|
426
|
+
"""
|
|
427
|
+
return [
|
|
428
|
+
# Direct causal verbs
|
|
429
|
+
(r'(\w+(?:\s+\w+)?)\s+(?:causes?|leads?\s+to|results?\s+in)\s+(\w+(?:\s+\w+)?)', 'causes', 0.95),
|
|
430
|
+
(r'(\w+(?:\s+\w+)?)\s+(?:affects?|influences?|impacts?)\s+(\w+(?:\s+\w+)?)', 'affects', 0.9),
|
|
431
|
+
(r'(\w+(?:\s+\w+)?)\s+(?:depends?\s+on|depends?\s+upon)\s+(\w+(?:\s+\w+)?)', 'depends_on', 0.9),
|
|
432
|
+
(r'(\w+(?:\s+\w+)?)\s+(?:determines?|controls?|drives?)\s+(\w+(?:\s+\w+)?)', 'determines', 0.95),
|
|
433
|
+
|
|
434
|
+
# Passive voice
|
|
435
|
+
(r'(\w+(?:\s+\w+)?)\s+is\s+(?:caused|affected|influenced|determined)\s+by\s+(\w+(?:\s+\w+)?)', 'caused_by', 0.95),
|
|
436
|
+
(r'(\w+(?:\s+\w+)?)\s+results?\s+from\s+(\w+(?:\s+\w+)?)', 'results_from', 0.9),
|
|
437
|
+
|
|
438
|
+
# Conditional
|
|
439
|
+
(r'if\s+(\w+(?:\s+\w+)?)\s+then\s+(\w+(?:\s+\w+)?)', 'conditional', 0.85),
|
|
440
|
+
(r'when\s+(\w+(?:\s+\w+)?)\s*,\s*(\w+(?:\s+\w+)?)', 'temporal', 0.8),
|
|
441
|
+
]
|
|
442
|
+
|
|
443
|
+
def _build_dependency_patterns(self) -> List[Tuple[str, str]]:
|
|
444
|
+
"""
|
|
445
|
+
Build dependency grammar patterns.
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
List of (pattern, relation) tuples
|
|
449
|
+
"""
|
|
450
|
+
return [
|
|
451
|
+
# Subject-verb-object
|
|
452
|
+
(r'(\w+)\s+(\w+)\s+(\w+)', 'SVO'),
|
|
453
|
+
# Prepositional phrases
|
|
454
|
+
(r'(\w+)\s+(?:in|on|at|by|with|for|from|to)\s+(\w+)', 'PREP'),
|
|
455
|
+
]
|
|
456
|
+
|
|
457
|
+
def parse_dependencies(self, sentence: str) -> DependencyTree:
|
|
458
|
+
"""
|
|
459
|
+
Parse sentence into dependency tree.
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
sentence: Sentence to parse
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
DependencyTree representation
|
|
466
|
+
"""
|
|
467
|
+
words = sentence.split()
|
|
468
|
+
nodes = words
|
|
469
|
+
edges = []
|
|
470
|
+
|
|
471
|
+
# Simple dependency parsing (subject-verb-object)
|
|
472
|
+
# In a full implementation, would use proper dependency parser
|
|
473
|
+
if len(words) >= 3:
|
|
474
|
+
# Assume first word is subject, second is verb, third is object
|
|
475
|
+
edges.append((words[0], words[1], "nsubj")) # subject
|
|
476
|
+
edges.append((words[1], words[2], "dobj")) # object
|
|
477
|
+
|
|
478
|
+
return DependencyTree(nodes=nodes, edges=edges, root=words[0] if words else None)
|
|
479
|
+
|
|
480
|
+
def extract_causal_structure(self, parse_tree: DependencyTree) -> Optional[CausalStructure]:
|
|
481
|
+
"""
|
|
482
|
+
Extract causal structure from dependency parse tree.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
parse_tree: DependencyTree to analyze
|
|
486
|
+
|
|
487
|
+
Returns:
|
|
488
|
+
CausalStructure if found, None otherwise
|
|
489
|
+
"""
|
|
490
|
+
# Match against causal patterns
|
|
491
|
+
sentence = ' '.join(parse_tree.nodes)
|
|
492
|
+
|
|
493
|
+
for pattern, relation_type, confidence in self.causal_patterns:
|
|
494
|
+
match = re.search(pattern, sentence, re.IGNORECASE)
|
|
495
|
+
if match:
|
|
496
|
+
if len(match.groups()) >= 2:
|
|
497
|
+
cause = match.group(1).strip()
|
|
498
|
+
effect = match.group(2).strip()
|
|
499
|
+
return CausalStructure(
|
|
500
|
+
cause=cause,
|
|
501
|
+
effect=effect,
|
|
502
|
+
relation_type=relation_type,
|
|
503
|
+
confidence=confidence
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
return None
|
|
507
|
+
|
|
508
|
+
def transform_voice(self, sentence: str, target_voice: str) -> str:
|
|
509
|
+
"""
|
|
510
|
+
Transform sentence between active and passive voice.
|
|
511
|
+
|
|
512
|
+
Args:
|
|
513
|
+
sentence: Sentence to transform
|
|
514
|
+
target_voice: Target voice ("active" or "passive")
|
|
515
|
+
|
|
516
|
+
Returns:
|
|
517
|
+
Transformed sentence
|
|
518
|
+
"""
|
|
519
|
+
# Simplified transformation
|
|
520
|
+
# In full implementation, would use proper grammar rules
|
|
521
|
+
|
|
522
|
+
if target_voice == "passive":
|
|
523
|
+
# Simple active to passive: "X causes Y" -> "Y is caused by X"
|
|
524
|
+
active_pattern = r'(\w+(?:\s+\w+)?)\s+(causes?|affects?|influences?)\s+(\w+(?:\s+\w+)?)'
|
|
525
|
+
match = re.search(active_pattern, sentence, re.IGNORECASE)
|
|
526
|
+
if match:
|
|
527
|
+
subject = match.group(1)
|
|
528
|
+
verb = match.group(2)
|
|
529
|
+
object_ = match.group(3)
|
|
530
|
+
|
|
531
|
+
# Convert verb to past participle
|
|
532
|
+
verb_map = {
|
|
533
|
+
"causes": "caused",
|
|
534
|
+
"cause": "caused",
|
|
535
|
+
"affects": "affected",
|
|
536
|
+
"affect": "affected",
|
|
537
|
+
"influences": "influenced",
|
|
538
|
+
"influence": "influenced"
|
|
539
|
+
}
|
|
540
|
+
past_participle = verb_map.get(verb.lower(), verb + "ed")
|
|
541
|
+
|
|
542
|
+
return f"{object_} is {past_participle} by {subject}"
|
|
543
|
+
|
|
544
|
+
return sentence
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
class PragmaticCompiler:
|
|
548
|
+
"""
|
|
549
|
+
Compiles pragmatic knowledge (tone and style) based on graph properties.
|
|
550
|
+
|
|
551
|
+
Maps graph properties to language decisions:
|
|
552
|
+
- Low confidence → hedging ("may", "possibly", "uncertain")
|
|
553
|
+
- High confidence → assertive ("will", "determines", "causes")
|
|
554
|
+
- Complex graph → explanatory phrasing
|
|
555
|
+
- Simple graph → concise phrasing
|
|
556
|
+
"""
|
|
557
|
+
|
|
558
|
+
def __init__(self):
|
|
559
|
+
"""Initialize pragmatic compiler."""
|
|
560
|
+
self.hedging_phrases = [
|
|
561
|
+
"may", "might", "possibly", "potentially", "could",
|
|
562
|
+
"uncertain", "unclear", "suggests", "indicates"
|
|
563
|
+
]
|
|
564
|
+
self.assertive_phrases = [
|
|
565
|
+
"will", "determines", "causes", "leads to", "results in",
|
|
566
|
+
"clearly", "definitely", "certainly", "always"
|
|
567
|
+
]
|
|
568
|
+
|
|
569
|
+
def select_register(self, confidence: float, complexity: int) -> str:
|
|
570
|
+
"""
|
|
571
|
+
Select appropriate language register based on confidence and complexity.
|
|
572
|
+
|
|
573
|
+
Args:
|
|
574
|
+
confidence: Confidence level (0.0-1.0)
|
|
575
|
+
complexity: Graph complexity (number of nodes/edges)
|
|
576
|
+
|
|
577
|
+
Returns:
|
|
578
|
+
Register type ("formal", "informal", "technical", "casual")
|
|
579
|
+
"""
|
|
580
|
+
if confidence > 0.8 and complexity < 10:
|
|
581
|
+
return "assertive"
|
|
582
|
+
elif confidence < 0.5:
|
|
583
|
+
return "hedging"
|
|
584
|
+
elif complexity > 20:
|
|
585
|
+
return "explanatory"
|
|
586
|
+
else:
|
|
587
|
+
return "neutral"
|
|
588
|
+
|
|
589
|
+
def generate_hedging(self, confidence: float) -> str:
|
|
590
|
+
"""
|
|
591
|
+
Generate hedging phrase based on confidence level.
|
|
592
|
+
|
|
593
|
+
Args:
|
|
594
|
+
confidence: Confidence level (0.0-1.0)
|
|
595
|
+
|
|
596
|
+
Returns:
|
|
597
|
+
Hedging phrase
|
|
598
|
+
"""
|
|
599
|
+
if confidence > 0.7:
|
|
600
|
+
return "likely"
|
|
601
|
+
elif confidence > 0.5:
|
|
602
|
+
return "possibly"
|
|
603
|
+
elif confidence > 0.3:
|
|
604
|
+
return "may"
|
|
605
|
+
else:
|
|
606
|
+
return "uncertain"
|
|
607
|
+
|
|
608
|
+
def adjust_explicitness(self, depth: int) -> int:
|
|
609
|
+
"""
|
|
610
|
+
Adjust explicitness level based on reasoning depth.
|
|
611
|
+
|
|
612
|
+
Args:
|
|
613
|
+
depth: Depth of reasoning chain
|
|
614
|
+
|
|
615
|
+
Returns:
|
|
616
|
+
Explicitness level (0-5, where 5 is most explicit)
|
|
617
|
+
"""
|
|
618
|
+
if depth <= 1:
|
|
619
|
+
return 1 # Concise
|
|
620
|
+
elif depth <= 3:
|
|
621
|
+
return 3 # Moderate
|
|
622
|
+
else:
|
|
623
|
+
return 5 # Very explicit
|
|
File without changes
|