odin-engine 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmarks/__init__.py +17 -17
- benchmarks/datasets.py +284 -284
- benchmarks/metrics.py +275 -275
- benchmarks/run_ablation.py +279 -279
- benchmarks/run_npll_benchmark.py +270 -270
- npll/__init__.py +10 -10
- npll/bootstrap.py +474 -474
- npll/core/__init__.py +33 -33
- npll/core/knowledge_graph.py +308 -308
- npll/core/logical_rules.py +496 -496
- npll/core/mln.py +474 -474
- npll/inference/__init__.py +40 -40
- npll/inference/e_step.py +419 -419
- npll/inference/elbo.py +434 -434
- npll/inference/m_step.py +576 -576
- npll/npll_model.py +631 -631
- npll/scoring/__init__.py +42 -42
- npll/scoring/embeddings.py +441 -441
- npll/scoring/probability.py +402 -402
- npll/scoring/scoring_module.py +369 -369
- npll/training/__init__.py +24 -24
- npll/training/evaluation.py +496 -496
- npll/training/npll_trainer.py +520 -520
- npll/utils/__init__.py +47 -47
- npll/utils/batch_utils.py +492 -492
- npll/utils/config.py +144 -144
- npll/utils/math_utils.py +338 -338
- odin/__init__.py +21 -20
- odin/engine.py +264 -264
- odin/schema.py +210 -0
- {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/METADATA +503 -456
- odin_engine-0.2.0.dist-info/RECORD +63 -0
- {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/licenses/LICENSE +21 -21
- retrieval/__init__.py +50 -50
- retrieval/adapters.py +140 -140
- retrieval/adapters_arango.py +1418 -1418
- retrieval/aggregators.py +707 -707
- retrieval/beam.py +127 -127
- retrieval/budget.py +60 -60
- retrieval/cache.py +159 -159
- retrieval/confidence.py +88 -88
- retrieval/eval.py +49 -49
- retrieval/linker.py +87 -87
- retrieval/metrics.py +105 -105
- retrieval/metrics_motifs.py +36 -36
- retrieval/orchestrator.py +571 -571
- retrieval/ppr/__init__.py +12 -12
- retrieval/ppr/anchors.py +41 -41
- retrieval/ppr/bippr.py +61 -61
- retrieval/ppr/engines.py +257 -257
- retrieval/ppr/global_pr.py +76 -76
- retrieval/ppr/indexes.py +78 -78
- retrieval/ppr.py +156 -156
- retrieval/ppr_cache.py +25 -25
- retrieval/scoring.py +294 -294
- retrieval/utils/pii_redaction.py +36 -36
- retrieval/writers/__init__.py +9 -9
- retrieval/writers/arango_writer.py +28 -28
- retrieval/writers/base.py +21 -21
- retrieval/writers/janus_writer.py +36 -36
- odin_engine-0.1.0.dist-info/RECORD +0 -62
- {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/WHEEL +0 -0
- {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/top_level.txt +0 -0
odin/__init__.py
CHANGED
|
@@ -1,20 +1,21 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Odin Knowledge Graph Intelligence Engine
|
|
3
|
-
|
|
4
|
-
A library for intelligent knowledge graph exploration using:
|
|
5
|
-
- Personalized PageRank (PPR) for structural importance
|
|
6
|
-
- Beam Search for efficient path finding
|
|
7
|
-
- NPLL (Neural Probabilistic Logic Learning) for semantic plausibility
|
|
8
|
-
|
|
9
|
-
Usage:
|
|
10
|
-
from odin import OdinEngine
|
|
11
|
-
|
|
12
|
-
engine = OdinEngine(db=my_arango_db)
|
|
13
|
-
results = engine.retrieve(seeds=["Patient_123"])
|
|
14
|
-
score = engine.score_edge("Patient_A", "treated_by", "Dr_Smith")
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
from .engine import OdinEngine
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
1
|
+
"""
|
|
2
|
+
Odin Knowledge Graph Intelligence Engine
|
|
3
|
+
|
|
4
|
+
A library for intelligent knowledge graph exploration using:
|
|
5
|
+
- Personalized PageRank (PPR) for structural importance
|
|
6
|
+
- Beam Search for efficient path finding
|
|
7
|
+
- NPLL (Neural Probabilistic Logic Learning) for semantic plausibility
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
from odin import OdinEngine
|
|
11
|
+
|
|
12
|
+
engine = OdinEngine(db=my_arango_db)
|
|
13
|
+
results = engine.retrieve(seeds=["Patient_123"])
|
|
14
|
+
score = engine.score_edge("Patient_A", "treated_by", "Dr_Smith")
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .engine import OdinEngine
|
|
18
|
+
from .schema import SchemaInspector, inspect_arango_schema
|
|
19
|
+
|
|
20
|
+
__all__ = ["OdinEngine", "SchemaInspector", "inspect_arango_schema"]
|
|
21
|
+
__version__ = "0.2.0"
|
odin/engine.py
CHANGED
|
@@ -1,264 +1,264 @@
|
|
|
1
|
-
"""
|
|
2
|
-
OdinEngine: The main entry point for the Odin KG Intelligence Library.
|
|
3
|
-
|
|
4
|
-
This class orchestrates all components:
|
|
5
|
-
- Graph access (with caching)
|
|
6
|
-
- NPLL model management (auto-train if needed)
|
|
7
|
-
- Retrieval (PPR + Beam Search + Scoring)
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
import os
|
|
11
|
-
import sys
|
|
12
|
-
import logging
|
|
13
|
-
from typing import List, Dict, Any, Optional
|
|
14
|
-
|
|
15
|
-
from arango.database import StandardDatabase
|
|
16
|
-
|
|
17
|
-
# Add parent path for imports
|
|
18
|
-
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
19
|
-
|
|
20
|
-
from npll.bootstrap import KnowledgeBootstrapper
|
|
21
|
-
from npll.npll_model import NPLLModel
|
|
22
|
-
from retrieval.orchestrator import RetrievalOrchestrator, OrchestratorParams
|
|
23
|
-
from retrieval.adapters_arango import ArangoCommunityAccessor, GlobalGraphAccessor
|
|
24
|
-
from retrieval.cache import CachedGraphAccessor
|
|
25
|
-
from retrieval.confidence import NPLLConfidence, ConstantConfidence
|
|
26
|
-
from retrieval.ppr.anchors import APPRAnchors, APPRAnchorParams
|
|
27
|
-
|
|
28
|
-
logger = logging.getLogger("odin")
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class OdinEngine:
|
|
32
|
-
"""
|
|
33
|
-
Main entry point for the Odin Knowledge Graph Intelligence Library.
|
|
34
|
-
|
|
35
|
-
Handles:
|
|
36
|
-
- Graph access (with caching)
|
|
37
|
-
- NPLL model loading (auto-trains if needed)
|
|
38
|
-
- Retrieval orchestration (PPR + Beam Search + NPLL Scoring)
|
|
39
|
-
|
|
40
|
-
Example:
|
|
41
|
-
from odin import OdinEngine
|
|
42
|
-
from arango import ArangoClient
|
|
43
|
-
|
|
44
|
-
client = ArangoClient(hosts="http://localhost:8529")
|
|
45
|
-
db = client.db("KG-test", username="root", password="")
|
|
46
|
-
|
|
47
|
-
engine = OdinEngine(db)
|
|
48
|
-
results = engine.retrieve(seeds=["Patient_123"])
|
|
49
|
-
"""
|
|
50
|
-
|
|
51
|
-
def __init__(
|
|
52
|
-
self,
|
|
53
|
-
db: StandardDatabase,
|
|
54
|
-
community_id: str = "global",
|
|
55
|
-
cache_size: int = 5000,
|
|
56
|
-
auto_train: bool = True,
|
|
57
|
-
community_mode: str = "none", # "none" = global, "mapping" = scoped
|
|
58
|
-
):
|
|
59
|
-
"""
|
|
60
|
-
Initialize the Odin Engine.
|
|
61
|
-
|
|
62
|
-
Args:
|
|
63
|
-
db: Connected ArangoDB database instance
|
|
64
|
-
community_id: Community to scope queries to (default: "global")
|
|
65
|
-
cache_size: Size of the graph accessor cache (default: 5000)
|
|
66
|
-
auto_train: If True, automatically train NPLL if no model exists (default: True)
|
|
67
|
-
community_mode: "none" for global exploration, "mapping" for community-scoped
|
|
68
|
-
"""
|
|
69
|
-
self.db = db
|
|
70
|
-
self.community_id = community_id
|
|
71
|
-
|
|
72
|
-
logger.info(f"Initializing OdinEngine for community '{community_id}' (mode: {community_mode})...")
|
|
73
|
-
|
|
74
|
-
# 1. Setup Graph Accessor (with caching)
|
|
75
|
-
base_accessor = ArangoCommunityAccessor(
|
|
76
|
-
db=db,
|
|
77
|
-
community_id=community_id,
|
|
78
|
-
community_mode=community_mode,
|
|
79
|
-
)
|
|
80
|
-
self.accessor = CachedGraphAccessor(base_accessor, cache_size=cache_size)
|
|
81
|
-
|
|
82
|
-
# Global accessor for cross-community queries
|
|
83
|
-
self.global_accessor = GlobalGraphAccessor(db=db, algorithm="gnn")
|
|
84
|
-
|
|
85
|
-
# 2. Load/Train NPLL Model
|
|
86
|
-
self.npll_model: Optional[NPLLModel] = None
|
|
87
|
-
self.confidence = self._initialize_intelligence(auto_train)
|
|
88
|
-
|
|
89
|
-
# 3. Setup Orchestrator
|
|
90
|
-
self.orchestrator = RetrievalOrchestrator(
|
|
91
|
-
accessor=self.accessor,
|
|
92
|
-
edge_confidence=self.confidence,
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
# 4. Setup PPR Anchor Engine
|
|
96
|
-
self.anchor_engine = APPRAnchors(self.accessor)
|
|
97
|
-
|
|
98
|
-
mode = "NPLL" if self.npll_model else "Fallback"
|
|
99
|
-
logger.info(f"✓ OdinEngine initialized (Intelligence: {mode})")
|
|
100
|
-
|
|
101
|
-
def _initialize_intelligence(self, auto_train: bool):
|
|
102
|
-
"""Load or train NPLL model."""
|
|
103
|
-
if not auto_train:
|
|
104
|
-
logger.info("Auto-train disabled. Using constant confidence.")
|
|
105
|
-
return ConstantConfidence(0.8)
|
|
106
|
-
|
|
107
|
-
try:
|
|
108
|
-
bootstrapper = KnowledgeBootstrapper(db=self.db)
|
|
109
|
-
self.npll_model = bootstrapper.ensure_model_ready()
|
|
110
|
-
|
|
111
|
-
if self.npll_model:
|
|
112
|
-
return NPLLConfidence(self.npll_model, cache_size=10000)
|
|
113
|
-
else:
|
|
114
|
-
logger.warning("NPLL training failed. Using constant confidence.")
|
|
115
|
-
return ConstantConfidence(0.8)
|
|
116
|
-
|
|
117
|
-
except Exception as e:
|
|
118
|
-
logger.error(f"Failed to initialize NPLL: {e}")
|
|
119
|
-
return ConstantConfidence(0.8)
|
|
120
|
-
|
|
121
|
-
def retrieve(
|
|
122
|
-
self,
|
|
123
|
-
seeds: List[str],
|
|
124
|
-
max_paths: int = 50,
|
|
125
|
-
hop_limit: int = 3,
|
|
126
|
-
beam_width: int = 64,
|
|
127
|
-
) -> Dict[str, Any]:
|
|
128
|
-
"""
|
|
129
|
-
Retrieve relevant paths from seed nodes.
|
|
130
|
-
|
|
131
|
-
Uses PPR + Beam Search + NPLL Scoring to find the most relevant
|
|
132
|
-
paths in the knowledge graph starting from the given seeds.
|
|
133
|
-
|
|
134
|
-
Args:
|
|
135
|
-
seeds: List of starting node IDs (e.g., ["Patient_123", "Claim_456"])
|
|
136
|
-
max_paths: Maximum number of paths to return (default: 50)
|
|
137
|
-
hop_limit: Maximum path length (default: 3)
|
|
138
|
-
beam_width: Beam search width (default: 64)
|
|
139
|
-
|
|
140
|
-
Returns:
|
|
141
|
-
Dict containing:
|
|
142
|
-
- topk_ppr: Top nodes by PageRank importance
|
|
143
|
-
- paths: Discovered paths with scores
|
|
144
|
-
- insight_score: Overall quality score
|
|
145
|
-
- aggregates: Motifs, relations, anchors
|
|
146
|
-
"""
|
|
147
|
-
params = OrchestratorParams(
|
|
148
|
-
community_id=self.community_id,
|
|
149
|
-
max_paths=max_paths,
|
|
150
|
-
hop_limit=hop_limit,
|
|
151
|
-
beam_width=beam_width,
|
|
152
|
-
)
|
|
153
|
-
return self.orchestrator.retrieve(seeds=seeds, params=params)
|
|
154
|
-
|
|
155
|
-
def score_edge(self, src: str, rel: str, dst: str) -> float:
|
|
156
|
-
"""
|
|
157
|
-
Score how plausible an edge is (0.0 to 1.0).
|
|
158
|
-
|
|
159
|
-
Uses the trained NPLL model to estimate the probability
|
|
160
|
-
that the given edge (src --rel--> dst) is valid.
|
|
161
|
-
|
|
162
|
-
Args:
|
|
163
|
-
src: Source node ID
|
|
164
|
-
rel: Relationship type
|
|
165
|
-
dst: Destination node ID
|
|
166
|
-
|
|
167
|
-
Returns:
|
|
168
|
-
Probability score between 0.0 and 1.0
|
|
169
|
-
"""
|
|
170
|
-
return self.confidence.confidence(src, rel, dst)
|
|
171
|
-
|
|
172
|
-
def find_anchors(self, seeds: List[str], topn: int = 20) -> List[tuple]:
|
|
173
|
-
"""
|
|
174
|
-
Use PPR (PageRank) to find the most important nodes relative to seeds.
|
|
175
|
-
|
|
176
|
-
Args:
|
|
177
|
-
seeds: Starting node IDs
|
|
178
|
-
topn: Number of top nodes to return (default: 20)
|
|
179
|
-
|
|
180
|
-
Returns:
|
|
181
|
-
List of (node_id, ppr_score) tuples sorted by importance
|
|
182
|
-
"""
|
|
183
|
-
params = APPRAnchorParams(topn=topn)
|
|
184
|
-
return self.anchor_engine.build_for_community(
|
|
185
|
-
community_id=self.community_id,
|
|
186
|
-
seed_set=seeds,
|
|
187
|
-
params=params,
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
def get_neighbors(self, node_id: str) -> Dict[str, Any]:
|
|
191
|
-
"""
|
|
192
|
-
Get all neighbors of a node with relationship types.
|
|
193
|
-
|
|
194
|
-
Args:
|
|
195
|
-
node_id: The node to inspect
|
|
196
|
-
|
|
197
|
-
Returns:
|
|
198
|
-
Dict with node info and list of neighbors
|
|
199
|
-
"""
|
|
200
|
-
node = self.accessor.get_node(node_id)
|
|
201
|
-
|
|
202
|
-
neighbors = []
|
|
203
|
-
for neighbor_id, relation, weight in self.accessor.iter_out(node_id):
|
|
204
|
-
neighbors.append({
|
|
205
|
-
"id": neighbor_id,
|
|
206
|
-
"rel": relation,
|
|
207
|
-
"weight": weight,
|
|
208
|
-
"direction": "out"
|
|
209
|
-
})
|
|
210
|
-
|
|
211
|
-
for neighbor_id, relation, weight in self.accessor.iter_in(node_id):
|
|
212
|
-
neighbors.append({
|
|
213
|
-
"id": neighbor_id,
|
|
214
|
-
"rel": relation,
|
|
215
|
-
"weight": weight,
|
|
216
|
-
"direction": "in"
|
|
217
|
-
})
|
|
218
|
-
|
|
219
|
-
return {
|
|
220
|
-
"node": node,
|
|
221
|
-
"neighbors": neighbors,
|
|
222
|
-
"degree": len(neighbors),
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
def retrain_model(self) -> bool:
|
|
226
|
-
"""
|
|
227
|
-
Force retrain the NPLL model.
|
|
228
|
-
|
|
229
|
-
Useful after significant data changes.
|
|
230
|
-
|
|
231
|
-
Returns:
|
|
232
|
-
True if training succeeded, False otherwise
|
|
233
|
-
"""
|
|
234
|
-
try:
|
|
235
|
-
bootstrapper = KnowledgeBootstrapper(db=self.db)
|
|
236
|
-
self.npll_model = bootstrapper.ensure_model_ready(force_retrain=True)
|
|
237
|
-
|
|
238
|
-
if self.npll_model:
|
|
239
|
-
self.confidence = NPLLConfidence(self.npll_model, cache_size=10000)
|
|
240
|
-
self.orchestrator = RetrievalOrchestrator(
|
|
241
|
-
accessor=self.accessor,
|
|
242
|
-
edge_confidence=self.confidence,
|
|
243
|
-
)
|
|
244
|
-
logger.info("✓ Model retrained successfully")
|
|
245
|
-
return True
|
|
246
|
-
return False
|
|
247
|
-
|
|
248
|
-
except Exception as e:
|
|
249
|
-
logger.error(f"Retraining failed: {e}")
|
|
250
|
-
return False
|
|
251
|
-
|
|
252
|
-
@property
|
|
253
|
-
def has_npll(self) -> bool:
|
|
254
|
-
"""Check if NPLL model is loaded."""
|
|
255
|
-
return self.npll_model is not None
|
|
256
|
-
|
|
257
|
-
def get_status(self) -> Dict[str, Any]:
|
|
258
|
-
"""Get engine status information."""
|
|
259
|
-
return {
|
|
260
|
-
"community_id": self.community_id,
|
|
261
|
-
"npll_loaded": self.has_npll,
|
|
262
|
-
"intelligence_mode": "NPLL" if self.has_npll else "Constant",
|
|
263
|
-
"cache_size": getattr(self.accessor, 'cache_size', 'unknown'),
|
|
264
|
-
}
|
|
1
|
+
"""
|
|
2
|
+
OdinEngine: The main entry point for the Odin KG Intelligence Library.
|
|
3
|
+
|
|
4
|
+
This class orchestrates all components:
|
|
5
|
+
- Graph access (with caching)
|
|
6
|
+
- NPLL model management (auto-train if needed)
|
|
7
|
+
- Retrieval (PPR + Beam Search + Scoring)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import sys
|
|
12
|
+
import logging
|
|
13
|
+
from typing import List, Dict, Any, Optional
|
|
14
|
+
|
|
15
|
+
from arango.database import StandardDatabase
|
|
16
|
+
|
|
17
|
+
# Add parent path for imports
|
|
18
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
19
|
+
|
|
20
|
+
from npll.bootstrap import KnowledgeBootstrapper
|
|
21
|
+
from npll.npll_model import NPLLModel
|
|
22
|
+
from retrieval.orchestrator import RetrievalOrchestrator, OrchestratorParams
|
|
23
|
+
from retrieval.adapters_arango import ArangoCommunityAccessor, GlobalGraphAccessor
|
|
24
|
+
from retrieval.cache import CachedGraphAccessor
|
|
25
|
+
from retrieval.confidence import NPLLConfidence, ConstantConfidence
|
|
26
|
+
from retrieval.ppr.anchors import APPRAnchors, APPRAnchorParams
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger("odin")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class OdinEngine:
|
|
32
|
+
"""
|
|
33
|
+
Main entry point for the Odin Knowledge Graph Intelligence Library.
|
|
34
|
+
|
|
35
|
+
Handles:
|
|
36
|
+
- Graph access (with caching)
|
|
37
|
+
- NPLL model loading (auto-trains if needed)
|
|
38
|
+
- Retrieval orchestration (PPR + Beam Search + NPLL Scoring)
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
from odin import OdinEngine
|
|
42
|
+
from arango import ArangoClient
|
|
43
|
+
|
|
44
|
+
client = ArangoClient(hosts="http://localhost:8529")
|
|
45
|
+
db = client.db("KG-test", username="root", password="")
|
|
46
|
+
|
|
47
|
+
engine = OdinEngine(db)
|
|
48
|
+
results = engine.retrieve(seeds=["Patient_123"])
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
db: StandardDatabase,
|
|
54
|
+
community_id: str = "global",
|
|
55
|
+
cache_size: int = 5000,
|
|
56
|
+
auto_train: bool = True,
|
|
57
|
+
community_mode: str = "none", # "none" = global, "mapping" = scoped
|
|
58
|
+
):
|
|
59
|
+
"""
|
|
60
|
+
Initialize the Odin Engine.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
db: Connected ArangoDB database instance
|
|
64
|
+
community_id: Community to scope queries to (default: "global")
|
|
65
|
+
cache_size: Size of the graph accessor cache (default: 5000)
|
|
66
|
+
auto_train: If True, automatically train NPLL if no model exists (default: True)
|
|
67
|
+
community_mode: "none" for global exploration, "mapping" for community-scoped
|
|
68
|
+
"""
|
|
69
|
+
self.db = db
|
|
70
|
+
self.community_id = community_id
|
|
71
|
+
|
|
72
|
+
logger.info(f"Initializing OdinEngine for community '{community_id}' (mode: {community_mode})...")
|
|
73
|
+
|
|
74
|
+
# 1. Setup Graph Accessor (with caching)
|
|
75
|
+
base_accessor = ArangoCommunityAccessor(
|
|
76
|
+
db=db,
|
|
77
|
+
community_id=community_id,
|
|
78
|
+
community_mode=community_mode,
|
|
79
|
+
)
|
|
80
|
+
self.accessor = CachedGraphAccessor(base_accessor, cache_size=cache_size)
|
|
81
|
+
|
|
82
|
+
# Global accessor for cross-community queries
|
|
83
|
+
self.global_accessor = GlobalGraphAccessor(db=db, algorithm="gnn")
|
|
84
|
+
|
|
85
|
+
# 2. Load/Train NPLL Model
|
|
86
|
+
self.npll_model: Optional[NPLLModel] = None
|
|
87
|
+
self.confidence = self._initialize_intelligence(auto_train)
|
|
88
|
+
|
|
89
|
+
# 3. Setup Orchestrator
|
|
90
|
+
self.orchestrator = RetrievalOrchestrator(
|
|
91
|
+
accessor=self.accessor,
|
|
92
|
+
edge_confidence=self.confidence,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# 4. Setup PPR Anchor Engine
|
|
96
|
+
self.anchor_engine = APPRAnchors(self.accessor)
|
|
97
|
+
|
|
98
|
+
mode = "NPLL" if self.npll_model else "Fallback"
|
|
99
|
+
logger.info(f"✓ OdinEngine initialized (Intelligence: {mode})")
|
|
100
|
+
|
|
101
|
+
def _initialize_intelligence(self, auto_train: bool):
|
|
102
|
+
"""Load or train NPLL model."""
|
|
103
|
+
if not auto_train:
|
|
104
|
+
logger.info("Auto-train disabled. Using constant confidence.")
|
|
105
|
+
return ConstantConfidence(0.8)
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
bootstrapper = KnowledgeBootstrapper(db=self.db)
|
|
109
|
+
self.npll_model = bootstrapper.ensure_model_ready()
|
|
110
|
+
|
|
111
|
+
if self.npll_model:
|
|
112
|
+
return NPLLConfidence(self.npll_model, cache_size=10000)
|
|
113
|
+
else:
|
|
114
|
+
logger.warning("NPLL training failed. Using constant confidence.")
|
|
115
|
+
return ConstantConfidence(0.8)
|
|
116
|
+
|
|
117
|
+
except Exception as e:
|
|
118
|
+
logger.error(f"Failed to initialize NPLL: {e}")
|
|
119
|
+
return ConstantConfidence(0.8)
|
|
120
|
+
|
|
121
|
+
def retrieve(
|
|
122
|
+
self,
|
|
123
|
+
seeds: List[str],
|
|
124
|
+
max_paths: int = 50,
|
|
125
|
+
hop_limit: int = 3,
|
|
126
|
+
beam_width: int = 64,
|
|
127
|
+
) -> Dict[str, Any]:
|
|
128
|
+
"""
|
|
129
|
+
Retrieve relevant paths from seed nodes.
|
|
130
|
+
|
|
131
|
+
Uses PPR + Beam Search + NPLL Scoring to find the most relevant
|
|
132
|
+
paths in the knowledge graph starting from the given seeds.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
seeds: List of starting node IDs (e.g., ["Patient_123", "Claim_456"])
|
|
136
|
+
max_paths: Maximum number of paths to return (default: 50)
|
|
137
|
+
hop_limit: Maximum path length (default: 3)
|
|
138
|
+
beam_width: Beam search width (default: 64)
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Dict containing:
|
|
142
|
+
- topk_ppr: Top nodes by PageRank importance
|
|
143
|
+
- paths: Discovered paths with scores
|
|
144
|
+
- insight_score: Overall quality score
|
|
145
|
+
- aggregates: Motifs, relations, anchors
|
|
146
|
+
"""
|
|
147
|
+
params = OrchestratorParams(
|
|
148
|
+
community_id=self.community_id,
|
|
149
|
+
max_paths=max_paths,
|
|
150
|
+
hop_limit=hop_limit,
|
|
151
|
+
beam_width=beam_width,
|
|
152
|
+
)
|
|
153
|
+
return self.orchestrator.retrieve(seeds=seeds, params=params)
|
|
154
|
+
|
|
155
|
+
def score_edge(self, src: str, rel: str, dst: str) -> float:
|
|
156
|
+
"""
|
|
157
|
+
Score how plausible an edge is (0.0 to 1.0).
|
|
158
|
+
|
|
159
|
+
Uses the trained NPLL model to estimate the probability
|
|
160
|
+
that the given edge (src --rel--> dst) is valid.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
src: Source node ID
|
|
164
|
+
rel: Relationship type
|
|
165
|
+
dst: Destination node ID
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Probability score between 0.0 and 1.0
|
|
169
|
+
"""
|
|
170
|
+
return self.confidence.confidence(src, rel, dst)
|
|
171
|
+
|
|
172
|
+
def find_anchors(self, seeds: List[str], topn: int = 20) -> List[tuple]:
|
|
173
|
+
"""
|
|
174
|
+
Use PPR (PageRank) to find the most important nodes relative to seeds.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
seeds: Starting node IDs
|
|
178
|
+
topn: Number of top nodes to return (default: 20)
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
List of (node_id, ppr_score) tuples sorted by importance
|
|
182
|
+
"""
|
|
183
|
+
params = APPRAnchorParams(topn=topn)
|
|
184
|
+
return self.anchor_engine.build_for_community(
|
|
185
|
+
community_id=self.community_id,
|
|
186
|
+
seed_set=seeds,
|
|
187
|
+
params=params,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
def get_neighbors(self, node_id: str) -> Dict[str, Any]:
|
|
191
|
+
"""
|
|
192
|
+
Get all neighbors of a node with relationship types.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
node_id: The node to inspect
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Dict with node info and list of neighbors
|
|
199
|
+
"""
|
|
200
|
+
node = self.accessor.get_node(node_id)
|
|
201
|
+
|
|
202
|
+
neighbors = []
|
|
203
|
+
for neighbor_id, relation, weight in self.accessor.iter_out(node_id):
|
|
204
|
+
neighbors.append({
|
|
205
|
+
"id": neighbor_id,
|
|
206
|
+
"rel": relation,
|
|
207
|
+
"weight": weight,
|
|
208
|
+
"direction": "out"
|
|
209
|
+
})
|
|
210
|
+
|
|
211
|
+
for neighbor_id, relation, weight in self.accessor.iter_in(node_id):
|
|
212
|
+
neighbors.append({
|
|
213
|
+
"id": neighbor_id,
|
|
214
|
+
"rel": relation,
|
|
215
|
+
"weight": weight,
|
|
216
|
+
"direction": "in"
|
|
217
|
+
})
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
"node": node,
|
|
221
|
+
"neighbors": neighbors,
|
|
222
|
+
"degree": len(neighbors),
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
def retrain_model(self) -> bool:
|
|
226
|
+
"""
|
|
227
|
+
Force retrain the NPLL model.
|
|
228
|
+
|
|
229
|
+
Useful after significant data changes.
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
True if training succeeded, False otherwise
|
|
233
|
+
"""
|
|
234
|
+
try:
|
|
235
|
+
bootstrapper = KnowledgeBootstrapper(db=self.db)
|
|
236
|
+
self.npll_model = bootstrapper.ensure_model_ready(force_retrain=True)
|
|
237
|
+
|
|
238
|
+
if self.npll_model:
|
|
239
|
+
self.confidence = NPLLConfidence(self.npll_model, cache_size=10000)
|
|
240
|
+
self.orchestrator = RetrievalOrchestrator(
|
|
241
|
+
accessor=self.accessor,
|
|
242
|
+
edge_confidence=self.confidence,
|
|
243
|
+
)
|
|
244
|
+
logger.info("✓ Model retrained successfully")
|
|
245
|
+
return True
|
|
246
|
+
return False
|
|
247
|
+
|
|
248
|
+
except Exception as e:
|
|
249
|
+
logger.error(f"Retraining failed: {e}")
|
|
250
|
+
return False
|
|
251
|
+
|
|
252
|
+
@property
|
|
253
|
+
def has_npll(self) -> bool:
|
|
254
|
+
"""Check if NPLL model is loaded."""
|
|
255
|
+
return self.npll_model is not None
|
|
256
|
+
|
|
257
|
+
def get_status(self) -> Dict[str, Any]:
|
|
258
|
+
"""Get engine status information."""
|
|
259
|
+
return {
|
|
260
|
+
"community_id": self.community_id,
|
|
261
|
+
"npll_loaded": self.has_npll,
|
|
262
|
+
"intelligence_mode": "NPLL" if self.has_npll else "Constant",
|
|
263
|
+
"cache_size": getattr(self.accessor, 'cache_size', 'unknown'),
|
|
264
|
+
}
|