PyPI - odin-engine - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

odin-engine 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

benchmarks/__init__.py +17 -17
benchmarks/datasets.py +284 -284
benchmarks/metrics.py +275 -275
benchmarks/run_ablation.py +279 -279
benchmarks/run_npll_benchmark.py +270 -270
npll/__init__.py +10 -10
npll/bootstrap.py +474 -474
npll/core/__init__.py +33 -33
npll/core/knowledge_graph.py +308 -308
npll/core/logical_rules.py +496 -496
npll/core/mln.py +474 -474
npll/inference/__init__.py +40 -40
npll/inference/e_step.py +419 -419
npll/inference/elbo.py +434 -434
npll/inference/m_step.py +576 -576
npll/npll_model.py +631 -631
npll/scoring/__init__.py +42 -42
npll/scoring/embeddings.py +441 -441
npll/scoring/probability.py +402 -402
npll/scoring/scoring_module.py +369 -369
npll/training/__init__.py +24 -24
npll/training/evaluation.py +496 -496
npll/training/npll_trainer.py +520 -520
npll/utils/__init__.py +47 -47
npll/utils/batch_utils.py +492 -492
npll/utils/config.py +144 -144
npll/utils/math_utils.py +338 -338
odin/__init__.py +21 -20
odin/engine.py +264 -264
odin/schema.py +210 -0
{odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/METADATA +503 -456
odin_engine-0.2.0.dist-info/RECORD +63 -0
{odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/licenses/LICENSE +21 -21
retrieval/__init__.py +50 -50
retrieval/adapters.py +140 -140
retrieval/adapters_arango.py +1418 -1418
retrieval/aggregators.py +707 -707
retrieval/beam.py +127 -127
retrieval/budget.py +60 -60
retrieval/cache.py +159 -159
retrieval/confidence.py +88 -88
retrieval/eval.py +49 -49
retrieval/linker.py +87 -87
retrieval/metrics.py +105 -105
retrieval/metrics_motifs.py +36 -36
retrieval/orchestrator.py +571 -571
retrieval/ppr/__init__.py +12 -12
retrieval/ppr/anchors.py +41 -41
retrieval/ppr/bippr.py +61 -61
retrieval/ppr/engines.py +257 -257
retrieval/ppr/global_pr.py +76 -76
retrieval/ppr/indexes.py +78 -78
retrieval/ppr.py +156 -156
retrieval/ppr_cache.py +25 -25
retrieval/scoring.py +294 -294
retrieval/utils/pii_redaction.py +36 -36
retrieval/writers/__init__.py +9 -9
retrieval/writers/arango_writer.py +28 -28
retrieval/writers/base.py +21 -21
retrieval/writers/janus_writer.py +36 -36
odin_engine-0.1.0.dist-info/RECORD +0 -62
{odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/WHEEL +0 -0
{odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/top_level.txt +0 -0

odin/schema.py ADDED Viewed

@@ -0,0 +1,210 @@
+"""
+Used by AI agents to understand graph structure and write valid AQL queries.
+"""
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass, asdict
+import json
+@dataclass
+class CollectionSchema:
+    """Schema information for a single collection."""
+    name: str
+    type: str  # "document" or "edge"
+    count: int
+    fields: List[str]
+@dataclass
+class EdgeSchema:
+    """Schema information for an edge collection."""
+    name: str
+    count: int
+    from_collections: List[str]
+    to_collections: List[str]
+    fields: List[str]
+@dataclass
+class SchemaMap:
+    """Complete schema map of an ArangoDB database."""
+    database_name: str
+    collections: List[CollectionSchema]
+    edges: List[EdgeSchema]
+class SchemaInspector:
+    """
+    Queries the database to discover:
+    - All collections (vertex and edge)
+    - Field names in each collection
+    - Edge relationships (_from/_to patterns)
+    Usage:
+        inspector = SchemaInspector(arango_db)
+        schema = inspector.get_schema_map()
+        entity_info = inspector.get_collection_info("ExtractedEntities")
+    """
+    def __init__(self, db, max_sample_docs: int = 5):
+        """
+        Initialize schema inspector.
+        """
+        self.db = db
+        self.max_sample_docs = max_sample_docs
+        self._schema_cache: Optional[SchemaMap] = None
+    def get_schema_map(self, refresh: bool = False) -> Dict[str, Any]:
+        if self._schema_cache is None or refresh:
+            self._schema_cache = self._build_schema_map()
+        return asdict(self._schema_cache)
+    def get_collection_info(self, collection_name: str) -> Optional[Dict[str, Any]]:
+        schema = self.get_schema_map()
+        # Check document collections
+        for col in schema['collections']:
+            if col['name'] == collection_name:
+                return col
+        # Check edge collections
+        for edge in schema['edges']:
+            if edge['name'] == collection_name:
+                return edge
+        return None
+    def get_edge_info(self, edge_collection: str) -> Optional[Dict[str, Any]]:
+        schema = self.get_schema_map()
+        for edge in schema['edges']:
+            if edge['name'] == edge_collection:
+                return edge
+        return None
+    def _build_schema_map(self) -> SchemaMap:
+        """Build complete schema map by querying ArangoDB."""
+        db_name = self.db.name
+        # Get all collections
+        all_collections = self.db.collections()
+        document_collections = []
+        edge_collections = []
+        for col_info in all_collections:
+            col_name = col_info['name']
+            # Skip system collections
+            if col_name.startswith('_'):
+                continue
+            col = self.db.collection(col_name)
+            is_edge = col_info['type'] == 3  # Edge collection type
+            if is_edge:
+                edge_schema = self._inspect_edge_collection(col_name)
+                edge_collections.append(edge_schema)
+            else:
+                doc_schema = self._inspect_document_collection(col_name)
+                document_collections.append(doc_schema)
+        return SchemaMap(
+            database_name=db_name,
+            collections=document_collections,
+            edges=edge_collections
+        )
+    def _inspect_document_collection(self, col_name: str) -> CollectionSchema:
+        """Inspect a document collection and extract schema."""
+        col = self.db.collection(col_name)
+        count = col.count()
+        # Get sample documents to extract fields (always fetch at least 1 for field discovery)
+        fields = set()
+        if count > 0:
+            # Use max(1, max_sample_docs) to ensure at least 1 doc for fields
+            sample_limit = max(1, self.max_sample_docs)
+            aql = f"""
+            FOR doc IN {col_name}
+            LIMIT {sample_limit}
+            RETURN doc
+            """
+            cursor = self.db.aql.execute(aql)
+            for doc in cursor:
+                # Extract all field names
+                fields.update(doc.keys())
+        return CollectionSchema(
+            name=col_name,
+            type="document",
+            count=count,
+            fields=sorted(list(fields))
+        )
+    def _inspect_edge_collection(self, col_name: str) -> EdgeSchema:
+        """Inspect an edge collection and extract schema."""
+        col = self.db.collection(col_name)
+        count = col.count()
+        # Get sample edges to extract fields and _from/_to patterns (always fetch at least 1)
+        fields = set()
+        from_collections = set()
+        to_collections = set()
+        if count > 0:
+            # Use max(1, max_sample_docs) to ensure at least 1 edge for fields
+            sample_limit = max(1, self.max_sample_docs)
+            aql = f"""
+            FOR edge IN {col_name}
+            LIMIT {sample_limit}
+            RETURN edge
+            """
+            cursor = self.db.aql.execute(aql)
+            for edge in cursor:
+                # Extract fields
+                fields.update(edge.keys())
+                # Extract _from/_to collection names
+                if '_from' in edge:
+                    from_col = edge['_from'].split('/')[0]
+                    from_collections.add(from_col)
+                if '_to' in edge:
+                    to_col = edge['_to'].split('/')[0]
+                    to_collections.add(to_col)
+        return EdgeSchema(
+            name=col_name,
+            count=count,
+            from_collections=sorted(list(from_collections)),
+            to_collections=sorted(list(to_collections)),
+            fields=sorted(list(fields))
+        )
+def inspect_arango_schema(db, output_file: Optional[str] = None) -> Dict[str, Any]:
+    """
+    Convenience function to inspect ArangoDB schema and optionally save to file.
+    Args:
+        db: ArangoDB database connection
+        output_file: Optional path to save schema as JSON
+    Returns:
+        Schema map as dictionary
+    """
+    inspector = SchemaInspector(db)
+    schema = inspector.get_schema_map()
+    if output_file:
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(schema, f, indent=2, default=str)
+    return schema

odin-engine 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

odin-engine 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl