odin-engine 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmarks/__init__.py +17 -17
- benchmarks/datasets.py +284 -284
- benchmarks/metrics.py +275 -275
- benchmarks/run_ablation.py +279 -279
- benchmarks/run_npll_benchmark.py +270 -270
- npll/__init__.py +10 -10
- npll/bootstrap.py +474 -474
- npll/core/__init__.py +33 -33
- npll/core/knowledge_graph.py +308 -308
- npll/core/logical_rules.py +496 -496
- npll/core/mln.py +474 -474
- npll/inference/__init__.py +40 -40
- npll/inference/e_step.py +419 -419
- npll/inference/elbo.py +434 -434
- npll/inference/m_step.py +576 -576
- npll/npll_model.py +631 -631
- npll/scoring/__init__.py +42 -42
- npll/scoring/embeddings.py +441 -441
- npll/scoring/probability.py +402 -402
- npll/scoring/scoring_module.py +369 -369
- npll/training/__init__.py +24 -24
- npll/training/evaluation.py +496 -496
- npll/training/npll_trainer.py +520 -520
- npll/utils/__init__.py +47 -47
- npll/utils/batch_utils.py +492 -492
- npll/utils/config.py +144 -144
- npll/utils/math_utils.py +338 -338
- odin/__init__.py +21 -20
- odin/engine.py +264 -264
- odin/schema.py +210 -0
- {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/METADATA +503 -456
- odin_engine-0.2.0.dist-info/RECORD +63 -0
- {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/licenses/LICENSE +21 -21
- retrieval/__init__.py +50 -50
- retrieval/adapters.py +140 -140
- retrieval/adapters_arango.py +1418 -1418
- retrieval/aggregators.py +707 -707
- retrieval/beam.py +127 -127
- retrieval/budget.py +60 -60
- retrieval/cache.py +159 -159
- retrieval/confidence.py +88 -88
- retrieval/eval.py +49 -49
- retrieval/linker.py +87 -87
- retrieval/metrics.py +105 -105
- retrieval/metrics_motifs.py +36 -36
- retrieval/orchestrator.py +571 -571
- retrieval/ppr/__init__.py +12 -12
- retrieval/ppr/anchors.py +41 -41
- retrieval/ppr/bippr.py +61 -61
- retrieval/ppr/engines.py +257 -257
- retrieval/ppr/global_pr.py +76 -76
- retrieval/ppr/indexes.py +78 -78
- retrieval/ppr.py +156 -156
- retrieval/ppr_cache.py +25 -25
- retrieval/scoring.py +294 -294
- retrieval/utils/pii_redaction.py +36 -36
- retrieval/writers/__init__.py +9 -9
- retrieval/writers/arango_writer.py +28 -28
- retrieval/writers/base.py +21 -21
- retrieval/writers/janus_writer.py +36 -36
- odin_engine-0.1.0.dist-info/RECORD +0 -62
- {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/WHEEL +0 -0
- {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/top_level.txt +0 -0
odin/schema.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Used by AI agents to understand graph structure and write valid AQL queries.
|
|
3
|
+
"""
|
|
4
|
+
from typing import Dict, List, Optional, Any
|
|
5
|
+
from dataclasses import dataclass, asdict
|
|
6
|
+
import json
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class CollectionSchema:
|
|
11
|
+
"""Schema information for a single collection."""
|
|
12
|
+
name: str
|
|
13
|
+
type: str # "document" or "edge"
|
|
14
|
+
count: int
|
|
15
|
+
fields: List[str]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class EdgeSchema:
|
|
20
|
+
"""Schema information for an edge collection."""
|
|
21
|
+
name: str
|
|
22
|
+
count: int
|
|
23
|
+
from_collections: List[str]
|
|
24
|
+
to_collections: List[str]
|
|
25
|
+
fields: List[str]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class SchemaMap:
|
|
30
|
+
"""Complete schema map of an ArangoDB database."""
|
|
31
|
+
database_name: str
|
|
32
|
+
collections: List[CollectionSchema]
|
|
33
|
+
edges: List[EdgeSchema]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class SchemaInspector:
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
Queries the database to discover:
|
|
40
|
+
- All collections (vertex and edge)
|
|
41
|
+
- Field names in each collection
|
|
42
|
+
- Edge relationships (_from/_to patterns)
|
|
43
|
+
|
|
44
|
+
Usage:
|
|
45
|
+
inspector = SchemaInspector(arango_db)
|
|
46
|
+
schema = inspector.get_schema_map()
|
|
47
|
+
entity_info = inspector.get_collection_info("ExtractedEntities")
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self, db, max_sample_docs: int = 5):
|
|
51
|
+
"""
|
|
52
|
+
Initialize schema inspector.
|
|
53
|
+
"""
|
|
54
|
+
self.db = db
|
|
55
|
+
self.max_sample_docs = max_sample_docs
|
|
56
|
+
self._schema_cache: Optional[SchemaMap] = None
|
|
57
|
+
|
|
58
|
+
def get_schema_map(self, refresh: bool = False) -> Dict[str, Any]:
|
|
59
|
+
if self._schema_cache is None or refresh:
|
|
60
|
+
self._schema_cache = self._build_schema_map()
|
|
61
|
+
|
|
62
|
+
return asdict(self._schema_cache)
|
|
63
|
+
|
|
64
|
+
def get_collection_info(self, collection_name: str) -> Optional[Dict[str, Any]]:
|
|
65
|
+
schema = self.get_schema_map()
|
|
66
|
+
|
|
67
|
+
# Check document collections
|
|
68
|
+
for col in schema['collections']:
|
|
69
|
+
if col['name'] == collection_name:
|
|
70
|
+
return col
|
|
71
|
+
|
|
72
|
+
# Check edge collections
|
|
73
|
+
for edge in schema['edges']:
|
|
74
|
+
if edge['name'] == collection_name:
|
|
75
|
+
return edge
|
|
76
|
+
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
def get_edge_info(self, edge_collection: str) -> Optional[Dict[str, Any]]:
|
|
80
|
+
schema = self.get_schema_map()
|
|
81
|
+
|
|
82
|
+
for edge in schema['edges']:
|
|
83
|
+
if edge['name'] == edge_collection:
|
|
84
|
+
return edge
|
|
85
|
+
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
def _build_schema_map(self) -> SchemaMap:
|
|
89
|
+
"""Build complete schema map by querying ArangoDB."""
|
|
90
|
+
db_name = self.db.name
|
|
91
|
+
|
|
92
|
+
# Get all collections
|
|
93
|
+
all_collections = self.db.collections()
|
|
94
|
+
|
|
95
|
+
document_collections = []
|
|
96
|
+
edge_collections = []
|
|
97
|
+
|
|
98
|
+
for col_info in all_collections:
|
|
99
|
+
col_name = col_info['name']
|
|
100
|
+
|
|
101
|
+
# Skip system collections
|
|
102
|
+
if col_name.startswith('_'):
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
col = self.db.collection(col_name)
|
|
106
|
+
is_edge = col_info['type'] == 3 # Edge collection type
|
|
107
|
+
|
|
108
|
+
if is_edge:
|
|
109
|
+
edge_schema = self._inspect_edge_collection(col_name)
|
|
110
|
+
edge_collections.append(edge_schema)
|
|
111
|
+
else:
|
|
112
|
+
doc_schema = self._inspect_document_collection(col_name)
|
|
113
|
+
document_collections.append(doc_schema)
|
|
114
|
+
|
|
115
|
+
return SchemaMap(
|
|
116
|
+
database_name=db_name,
|
|
117
|
+
collections=document_collections,
|
|
118
|
+
edges=edge_collections
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
def _inspect_document_collection(self, col_name: str) -> CollectionSchema:
|
|
122
|
+
"""Inspect a document collection and extract schema."""
|
|
123
|
+
col = self.db.collection(col_name)
|
|
124
|
+
count = col.count()
|
|
125
|
+
|
|
126
|
+
# Get sample documents to extract fields (always fetch at least 1 for field discovery)
|
|
127
|
+
fields = set()
|
|
128
|
+
|
|
129
|
+
if count > 0:
|
|
130
|
+
# Use max(1, max_sample_docs) to ensure at least 1 doc for fields
|
|
131
|
+
sample_limit = max(1, self.max_sample_docs)
|
|
132
|
+
aql = f"""
|
|
133
|
+
FOR doc IN {col_name}
|
|
134
|
+
LIMIT {sample_limit}
|
|
135
|
+
RETURN doc
|
|
136
|
+
"""
|
|
137
|
+
cursor = self.db.aql.execute(aql)
|
|
138
|
+
|
|
139
|
+
for doc in cursor:
|
|
140
|
+
# Extract all field names
|
|
141
|
+
fields.update(doc.keys())
|
|
142
|
+
|
|
143
|
+
return CollectionSchema(
|
|
144
|
+
name=col_name,
|
|
145
|
+
type="document",
|
|
146
|
+
count=count,
|
|
147
|
+
fields=sorted(list(fields))
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
def _inspect_edge_collection(self, col_name: str) -> EdgeSchema:
|
|
151
|
+
"""Inspect an edge collection and extract schema."""
|
|
152
|
+
col = self.db.collection(col_name)
|
|
153
|
+
count = col.count()
|
|
154
|
+
|
|
155
|
+
# Get sample edges to extract fields and _from/_to patterns (always fetch at least 1)
|
|
156
|
+
fields = set()
|
|
157
|
+
from_collections = set()
|
|
158
|
+
to_collections = set()
|
|
159
|
+
|
|
160
|
+
if count > 0:
|
|
161
|
+
# Use max(1, max_sample_docs) to ensure at least 1 edge for fields
|
|
162
|
+
sample_limit = max(1, self.max_sample_docs)
|
|
163
|
+
aql = f"""
|
|
164
|
+
FOR edge IN {col_name}
|
|
165
|
+
LIMIT {sample_limit}
|
|
166
|
+
RETURN edge
|
|
167
|
+
"""
|
|
168
|
+
cursor = self.db.aql.execute(aql)
|
|
169
|
+
|
|
170
|
+
for edge in cursor:
|
|
171
|
+
# Extract fields
|
|
172
|
+
fields.update(edge.keys())
|
|
173
|
+
|
|
174
|
+
# Extract _from/_to collection names
|
|
175
|
+
if '_from' in edge:
|
|
176
|
+
from_col = edge['_from'].split('/')[0]
|
|
177
|
+
from_collections.add(from_col)
|
|
178
|
+
|
|
179
|
+
if '_to' in edge:
|
|
180
|
+
to_col = edge['_to'].split('/')[0]
|
|
181
|
+
to_collections.add(to_col)
|
|
182
|
+
|
|
183
|
+
return EdgeSchema(
|
|
184
|
+
name=col_name,
|
|
185
|
+
count=count,
|
|
186
|
+
from_collections=sorted(list(from_collections)),
|
|
187
|
+
to_collections=sorted(list(to_collections)),
|
|
188
|
+
fields=sorted(list(fields))
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def inspect_arango_schema(db, output_file: Optional[str] = None) -> Dict[str, Any]:
|
|
193
|
+
"""
|
|
194
|
+
Convenience function to inspect ArangoDB schema and optionally save to file.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
db: ArangoDB database connection
|
|
198
|
+
output_file: Optional path to save schema as JSON
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Schema map as dictionary
|
|
202
|
+
"""
|
|
203
|
+
inspector = SchemaInspector(db)
|
|
204
|
+
schema = inspector.get_schema_map()
|
|
205
|
+
|
|
206
|
+
if output_file:
|
|
207
|
+
with open(output_file, 'w', encoding='utf-8') as f:
|
|
208
|
+
json.dump(schema, f, indent=2, default=str)
|
|
209
|
+
|
|
210
|
+
return schema
|