odin-engine 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. benchmarks/__init__.py +17 -17
  2. benchmarks/datasets.py +284 -284
  3. benchmarks/metrics.py +275 -275
  4. benchmarks/run_ablation.py +279 -279
  5. benchmarks/run_npll_benchmark.py +270 -270
  6. npll/__init__.py +10 -10
  7. npll/bootstrap.py +474 -474
  8. npll/core/__init__.py +33 -33
  9. npll/core/knowledge_graph.py +308 -308
  10. npll/core/logical_rules.py +496 -496
  11. npll/core/mln.py +474 -474
  12. npll/inference/__init__.py +40 -40
  13. npll/inference/e_step.py +419 -419
  14. npll/inference/elbo.py +434 -434
  15. npll/inference/m_step.py +576 -576
  16. npll/npll_model.py +631 -631
  17. npll/scoring/__init__.py +42 -42
  18. npll/scoring/embeddings.py +441 -441
  19. npll/scoring/probability.py +402 -402
  20. npll/scoring/scoring_module.py +369 -369
  21. npll/training/__init__.py +24 -24
  22. npll/training/evaluation.py +496 -496
  23. npll/training/npll_trainer.py +520 -520
  24. npll/utils/__init__.py +47 -47
  25. npll/utils/batch_utils.py +492 -492
  26. npll/utils/config.py +144 -144
  27. npll/utils/math_utils.py +338 -338
  28. odin/__init__.py +21 -20
  29. odin/engine.py +264 -264
  30. odin/schema.py +210 -0
  31. {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/METADATA +503 -456
  32. odin_engine-0.2.0.dist-info/RECORD +63 -0
  33. {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/licenses/LICENSE +21 -21
  34. retrieval/__init__.py +50 -50
  35. retrieval/adapters.py +140 -140
  36. retrieval/adapters_arango.py +1418 -1418
  37. retrieval/aggregators.py +707 -707
  38. retrieval/beam.py +127 -127
  39. retrieval/budget.py +60 -60
  40. retrieval/cache.py +159 -159
  41. retrieval/confidence.py +88 -88
  42. retrieval/eval.py +49 -49
  43. retrieval/linker.py +87 -87
  44. retrieval/metrics.py +105 -105
  45. retrieval/metrics_motifs.py +36 -36
  46. retrieval/orchestrator.py +571 -571
  47. retrieval/ppr/__init__.py +12 -12
  48. retrieval/ppr/anchors.py +41 -41
  49. retrieval/ppr/bippr.py +61 -61
  50. retrieval/ppr/engines.py +257 -257
  51. retrieval/ppr/global_pr.py +76 -76
  52. retrieval/ppr/indexes.py +78 -78
  53. retrieval/ppr.py +156 -156
  54. retrieval/ppr_cache.py +25 -25
  55. retrieval/scoring.py +294 -294
  56. retrieval/utils/pii_redaction.py +36 -36
  57. retrieval/writers/__init__.py +9 -9
  58. retrieval/writers/arango_writer.py +28 -28
  59. retrieval/writers/base.py +21 -21
  60. retrieval/writers/janus_writer.py +36 -36
  61. odin_engine-0.1.0.dist-info/RECORD +0 -62
  62. {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/WHEEL +0 -0
  63. {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/top_level.txt +0 -0
odin/schema.py ADDED
@@ -0,0 +1,210 @@
1
+ """
2
+ Used by AI agents to understand graph structure and write valid AQL queries.
3
+ """
4
+ from typing import Dict, List, Optional, Any
5
+ from dataclasses import dataclass, asdict
6
+ import json
7
+
8
+
9
+ @dataclass
10
+ class CollectionSchema:
11
+ """Schema information for a single collection."""
12
+ name: str
13
+ type: str # "document" or "edge"
14
+ count: int
15
+ fields: List[str]
16
+
17
+
18
+ @dataclass
19
+ class EdgeSchema:
20
+ """Schema information for an edge collection."""
21
+ name: str
22
+ count: int
23
+ from_collections: List[str]
24
+ to_collections: List[str]
25
+ fields: List[str]
26
+
27
+
28
+ @dataclass
29
+ class SchemaMap:
30
+ """Complete schema map of an ArangoDB database."""
31
+ database_name: str
32
+ collections: List[CollectionSchema]
33
+ edges: List[EdgeSchema]
34
+
35
+
36
+ class SchemaInspector:
37
+ """
38
+
39
+ Queries the database to discover:
40
+ - All collections (vertex and edge)
41
+ - Field names in each collection
42
+ - Edge relationships (_from/_to patterns)
43
+
44
+ Usage:
45
+ inspector = SchemaInspector(arango_db)
46
+ schema = inspector.get_schema_map()
47
+ entity_info = inspector.get_collection_info("ExtractedEntities")
48
+ """
49
+
50
+ def __init__(self, db, max_sample_docs: int = 5):
51
+ """
52
+ Initialize schema inspector.
53
+ """
54
+ self.db = db
55
+ self.max_sample_docs = max_sample_docs
56
+ self._schema_cache: Optional[SchemaMap] = None
57
+
58
+ def get_schema_map(self, refresh: bool = False) -> Dict[str, Any]:
59
+ if self._schema_cache is None or refresh:
60
+ self._schema_cache = self._build_schema_map()
61
+
62
+ return asdict(self._schema_cache)
63
+
64
+ def get_collection_info(self, collection_name: str) -> Optional[Dict[str, Any]]:
65
+ schema = self.get_schema_map()
66
+
67
+ # Check document collections
68
+ for col in schema['collections']:
69
+ if col['name'] == collection_name:
70
+ return col
71
+
72
+ # Check edge collections
73
+ for edge in schema['edges']:
74
+ if edge['name'] == collection_name:
75
+ return edge
76
+
77
+ return None
78
+
79
+ def get_edge_info(self, edge_collection: str) -> Optional[Dict[str, Any]]:
80
+ schema = self.get_schema_map()
81
+
82
+ for edge in schema['edges']:
83
+ if edge['name'] == edge_collection:
84
+ return edge
85
+
86
+ return None
87
+
88
+ def _build_schema_map(self) -> SchemaMap:
89
+ """Build complete schema map by querying ArangoDB."""
90
+ db_name = self.db.name
91
+
92
+ # Get all collections
93
+ all_collections = self.db.collections()
94
+
95
+ document_collections = []
96
+ edge_collections = []
97
+
98
+ for col_info in all_collections:
99
+ col_name = col_info['name']
100
+
101
+ # Skip system collections
102
+ if col_name.startswith('_'):
103
+ continue
104
+
105
+ col = self.db.collection(col_name)
106
+ is_edge = col_info['type'] == 3 # Edge collection type
107
+
108
+ if is_edge:
109
+ edge_schema = self._inspect_edge_collection(col_name)
110
+ edge_collections.append(edge_schema)
111
+ else:
112
+ doc_schema = self._inspect_document_collection(col_name)
113
+ document_collections.append(doc_schema)
114
+
115
+ return SchemaMap(
116
+ database_name=db_name,
117
+ collections=document_collections,
118
+ edges=edge_collections
119
+ )
120
+
121
+ def _inspect_document_collection(self, col_name: str) -> CollectionSchema:
122
+ """Inspect a document collection and extract schema."""
123
+ col = self.db.collection(col_name)
124
+ count = col.count()
125
+
126
+ # Get sample documents to extract fields (always fetch at least 1 for field discovery)
127
+ fields = set()
128
+
129
+ if count > 0:
130
+ # Use max(1, max_sample_docs) to ensure at least 1 doc for fields
131
+ sample_limit = max(1, self.max_sample_docs)
132
+ aql = f"""
133
+ FOR doc IN {col_name}
134
+ LIMIT {sample_limit}
135
+ RETURN doc
136
+ """
137
+ cursor = self.db.aql.execute(aql)
138
+
139
+ for doc in cursor:
140
+ # Extract all field names
141
+ fields.update(doc.keys())
142
+
143
+ return CollectionSchema(
144
+ name=col_name,
145
+ type="document",
146
+ count=count,
147
+ fields=sorted(list(fields))
148
+ )
149
+
150
+ def _inspect_edge_collection(self, col_name: str) -> EdgeSchema:
151
+ """Inspect an edge collection and extract schema."""
152
+ col = self.db.collection(col_name)
153
+ count = col.count()
154
+
155
+ # Get sample edges to extract fields and _from/_to patterns (always fetch at least 1)
156
+ fields = set()
157
+ from_collections = set()
158
+ to_collections = set()
159
+
160
+ if count > 0:
161
+ # Use max(1, max_sample_docs) to ensure at least 1 edge for fields
162
+ sample_limit = max(1, self.max_sample_docs)
163
+ aql = f"""
164
+ FOR edge IN {col_name}
165
+ LIMIT {sample_limit}
166
+ RETURN edge
167
+ """
168
+ cursor = self.db.aql.execute(aql)
169
+
170
+ for edge in cursor:
171
+ # Extract fields
172
+ fields.update(edge.keys())
173
+
174
+ # Extract _from/_to collection names
175
+ if '_from' in edge:
176
+ from_col = edge['_from'].split('/')[0]
177
+ from_collections.add(from_col)
178
+
179
+ if '_to' in edge:
180
+ to_col = edge['_to'].split('/')[0]
181
+ to_collections.add(to_col)
182
+
183
+ return EdgeSchema(
184
+ name=col_name,
185
+ count=count,
186
+ from_collections=sorted(list(from_collections)),
187
+ to_collections=sorted(list(to_collections)),
188
+ fields=sorted(list(fields))
189
+ )
190
+
191
+
192
+ def inspect_arango_schema(db, output_file: Optional[str] = None) -> Dict[str, Any]:
193
+ """
194
+ Convenience function to inspect ArangoDB schema and optionally save to file.
195
+
196
+ Args:
197
+ db: ArangoDB database connection
198
+ output_file: Optional path to save schema as JSON
199
+
200
+ Returns:
201
+ Schema map as dictionary
202
+ """
203
+ inspector = SchemaInspector(db)
204
+ schema = inspector.get_schema_map()
205
+
206
+ if output_file:
207
+ with open(output_file, 'w', encoding='utf-8') as f:
208
+ json.dump(schema, f, indent=2, default=str)
209
+
210
+ return schema