PyPI - odin-engine - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

odin-engine 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

benchmarks/__init__.py +17 -17
benchmarks/datasets.py +284 -284
benchmarks/metrics.py +275 -275
benchmarks/run_ablation.py +279 -279
benchmarks/run_npll_benchmark.py +270 -270
npll/__init__.py +10 -10
npll/bootstrap.py +474 -474
npll/core/__init__.py +33 -33
npll/core/knowledge_graph.py +308 -308
npll/core/logical_rules.py +496 -496
npll/core/mln.py +474 -474
npll/inference/__init__.py +40 -40
npll/inference/e_step.py +419 -419
npll/inference/elbo.py +434 -434
npll/inference/m_step.py +576 -576
npll/npll_model.py +631 -631
npll/scoring/__init__.py +42 -42
npll/scoring/embeddings.py +441 -441
npll/scoring/probability.py +402 -402
npll/scoring/scoring_module.py +369 -369
npll/training/__init__.py +24 -24
npll/training/evaluation.py +496 -496
npll/training/npll_trainer.py +520 -520
npll/utils/__init__.py +47 -47
npll/utils/batch_utils.py +492 -492
npll/utils/config.py +144 -144
npll/utils/math_utils.py +338 -338
odin/__init__.py +21 -20
odin/engine.py +264 -264
odin/schema.py +210 -0
{odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/METADATA +503 -456
odin_engine-0.2.0.dist-info/RECORD +63 -0
{odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/licenses/LICENSE +21 -21
retrieval/__init__.py +50 -50
retrieval/adapters.py +140 -140
retrieval/adapters_arango.py +1418 -1418
retrieval/aggregators.py +707 -707
retrieval/beam.py +127 -127
retrieval/budget.py +60 -60
retrieval/cache.py +159 -159
retrieval/confidence.py +88 -88
retrieval/eval.py +49 -49
retrieval/linker.py +87 -87
retrieval/metrics.py +105 -105
retrieval/metrics_motifs.py +36 -36
retrieval/orchestrator.py +571 -571
retrieval/ppr/__init__.py +12 -12
retrieval/ppr/anchors.py +41 -41
retrieval/ppr/bippr.py +61 -61
retrieval/ppr/engines.py +257 -257
retrieval/ppr/global_pr.py +76 -76
retrieval/ppr/indexes.py +78 -78
retrieval/ppr.py +156 -156
retrieval/ppr_cache.py +25 -25
retrieval/scoring.py +294 -294
retrieval/utils/pii_redaction.py +36 -36
retrieval/writers/__init__.py +9 -9
retrieval/writers/arango_writer.py +28 -28
retrieval/writers/base.py +21 -21
retrieval/writers/janus_writer.py +36 -36
odin_engine-0.1.0.dist-info/RECORD +0 -62
{odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/WHEEL +0 -0
{odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/top_level.txt +0 -0

retrieval/ppr/indexes.py CHANGED Viewed

@@ -1,78 +1,78 @@
-from __future__ import annotations
-from dataclasses import dataclass
-from typing import Dict, List, Iterable, Optional
-import random
-NodeId = int
-@dataclass
-class WalkIndexConfig:
-    omega: int = 10     # walks per node (cap)
-    rmax: float = 1e-3  # residual threshold knob (for capacity heuristics)
-    alpha: float = 0.15 # teleport
-    seed: int = 42
-class RandomWalkIndex:
-    """
-    FIRM-style random-walk index (skeleton):
-    - stores short geometric walks per node to accelerate SSPPR queries
-    - supports O(1) expected-time updates under random arrival model (sketch)
-    """
-    def __init__(self, cfg: WalkIndexConfig):
-        self.cfg = cfg
-        self.walks: Dict[NodeId, List[List[NodeId]]] = {}
-        random.seed(cfg.seed)
-    def build(self, graph, nodes: Optional[Iterable[NodeId]] = None):
-        nodes = nodes or graph.nodes()
-        for u in nodes:
-            self.walks[u] = self._sample_walks(graph, u, self.cfg.omega)
-    def _sample_walks(self, graph, u: NodeId, k: int) -> List[List[NodeId]]:
-        walks: List[List[NodeId]] = []
-        for _ in range(k):
-            path = [u]
-            v = u
-            while True:
-                if random.random() < self.cfg.alpha:
-                    break
-                nbrs = list(graph.out_neighbors(v))
-                if not nbrs:
-                    break
-                v = random.choice(nbrs)
-                path.append(v)
-            walks.append(path)
-        return walks
-    def on_edge_insert(self, graph, u: NodeId, v: NodeId):
-        if u not in self.walks:
-            return
-        W = self.walks[u]
-        target = max(1, int(graph.out_degree(u) * self.cfg.rmax * self.cfg.omega))
-        while len(W) < target:
-            W.append(self._sample_walks(graph, u, 1)[0])
-        while len(W) > target and W:
-            W.pop()
-    def on_edge_delete(self, graph, u: NodeId, v: NodeId):
-        if u not in self.walks:
-            return
-        W = self.walks[u]
-        for _ in range(min(2, len(W))):
-            if W:
-                W.pop()
-        target = max(1, int(graph.out_degree(u) * self.cfg.rmax * self.cfg.omega))
-        while len(W) < target:
-            W.append(self._sample_walks(graph, u, 1)[0])
-    def sample_hits(self, source: NodeId) -> Dict[NodeId, int]:
-        counts: Dict[NodeId, int] = {}
-        for w in self.walks.get(source, []):
-            for x in w:
-                counts[x] = counts.get(x, 0) + 1
-        return counts
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, List, Iterable, Optional
+import random
+NodeId = int
+@dataclass
+class WalkIndexConfig:
+    omega: int = 10     # walks per node (cap)
+    rmax: float = 1e-3  # residual threshold knob (for capacity heuristics)
+    alpha: float = 0.15 # teleport
+    seed: int = 42
+class RandomWalkIndex:
+    """
+    FIRM-style random-walk index (skeleton):
+    - stores short geometric walks per node to accelerate SSPPR queries
+    - supports O(1) expected-time updates under random arrival model (sketch)
+    """
+    def __init__(self, cfg: WalkIndexConfig):
+        self.cfg = cfg
+        self.walks: Dict[NodeId, List[List[NodeId]]] = {}
+        random.seed(cfg.seed)
+    def build(self, graph, nodes: Optional[Iterable[NodeId]] = None):
+        nodes = nodes or graph.nodes()
+        for u in nodes:
+            self.walks[u] = self._sample_walks(graph, u, self.cfg.omega)
+    def _sample_walks(self, graph, u: NodeId, k: int) -> List[List[NodeId]]:
+        walks: List[List[NodeId]] = []
+        for _ in range(k):
+            path = [u]
+            v = u
+            while True:
+                if random.random() < self.cfg.alpha:
+                    break
+                nbrs = list(graph.out_neighbors(v))
+                if not nbrs:
+                    break
+                v = random.choice(nbrs)
+                path.append(v)
+            walks.append(path)
+        return walks
+    def on_edge_insert(self, graph, u: NodeId, v: NodeId):
+        if u not in self.walks:
+            return
+        W = self.walks[u]
+        target = max(1, int(graph.out_degree(u) * self.cfg.rmax * self.cfg.omega))
+        while len(W) < target:
+            W.append(self._sample_walks(graph, u, 1)[0])
+        while len(W) > target and W:
+            W.pop()
+    def on_edge_delete(self, graph, u: NodeId, v: NodeId):
+        if u not in self.walks:
+            return
+        W = self.walks[u]
+        for _ in range(min(2, len(W))):
+            if W:
+                W.pop()
+        target = max(1, int(graph.out_degree(u) * self.cfg.rmax * self.cfg.omega))
+        while len(W) < target:
+            W.append(self._sample_walks(graph, u, 1)[0])
+    def sample_hits(self, source: NodeId) -> Dict[NodeId, int]:
+        counts: Dict[NodeId, int] = {}
+        for w in self.walks.get(source, []):
+            for x in w:
+                counts[x] = counts.get(x, 0) + 1
+        return counts

retrieval/ppr.py CHANGED Viewed

@@ -1,156 +1,156 @@
-from __future__ import annotations
-from typing import Dict, List, Tuple, Optional
-from collections import defaultdict, deque
-from dataclasses import dataclass
-import random
-from .adapters import GraphAccessor, NodeId
-from .budget import SearchBudget, BudgetTracker
-@dataclass
-class PPRParams:
-    alpha: float = 0.2
-    eps: float = 1e-6
-    num_walks: int = 2000
-    walk_len: int = 50
-    topn: int = 200
-@dataclass
-class PPRResult:
-    scores: List[Tuple[NodeId, float]]
-    mass: float
-    used_budget: Dict[str, int]
-    trace: Dict[str, object]
-def build_alias_table(weighted_neighbors: List[Tuple[NodeId, float]]):
-    if not weighted_neighbors:
-        return [], [], []
-    total = sum(max(0.0, w) for _, w in weighted_neighbors) or 1.0
-    probs = [(n, w / total) for n, w in weighted_neighbors]
-    n = len(probs)
-    scaled = [p * n for _, p in probs]
-    alias, prob = [0] * n, [0.0] * n
-    small, large = [], []
-    for i, sp in enumerate(scaled):
-        (small if sp < 1 else large).append(i)
-    while small and large:
-        s, l = small.pop(), large.pop()
-        prob[s] = scaled[s]
-        alias[s] = l
-        scaled[l] = scaled[l] - (1 - prob[s])
-        (small if scaled[l] < 1 else large).append(l)
-    for i in small + large:
-        prob[i] = 1.0
-        alias[i] = i
-    nodes = [n for n, _ in probs]
-    return nodes, prob, alias
-def alias_draw(nodes, prob, alias):
-    if not nodes:
-        return None
-    i = random.randrange(len(nodes))
-    return nodes[i] if random.random() < prob[i] else nodes[alias[i]]
-class PushPPREngine:
-    def __init__(self, accessor: GraphAccessor, community_id: str):
-        self.A = accessor
-        self.cid = community_id
-    def run(self, seeds: List[NodeId], params: PPRParams, budget: Optional[SearchBudget] = None) -> PPRResult:
-        bt = BudgetTracker(budget or SearchBudget())
-        p: Dict[NodeId, float] = defaultdict(float)
-        r: Dict[NodeId, float] = defaultdict(float)
-        q: deque[NodeId] = deque()
-        seeds = seeds or []
-        if not seeds:
-            return PPRResult([], 0.0, bt.usage.__dict__, {"engine": "push", "iters": 0, "cache_hit": False})
-        for s in seeds:
-            r[s] += 1.0 / len(seeds)
-            q.append(s)
-        iters = 0
-        while q and not bt.over():
-            u = q.popleft()
-            iters += 1
-            ru = r[u]
-            if ru <= 0:
-                continue
-            p[u] += params.alpha * ru
-            residual = (1 - params.alpha) * ru
-            r[u] = 0.0
-            nbrs = list(self.A.iter_out(u))
-            deg = len(nbrs)
-            if deg == 0:
-                continue
-            share = residual / deg
-            for v, _, _ in nbrs:
-                r[v] += share
-                bt.tick_edges(1)
-                if r[v] / max(1, self.A.degree(v)) > params.eps:
-                    q.append(v)
-            bt.tick_nodes(1)
-            if bt.timed_out():
-                break
-        items = sorted(p.items(), key=lambda kv: kv[1], reverse=True)[: params.topn]
-        mass = sum(p.values())
-        return PPRResult(scores=items, mass=mass, used_budget=bt.usage.__dict__, trace={"engine": "push", "iters": iters, "cache_hit": False})
-class MonteCarloPPREngine:
-    def __init__(self, accessor: GraphAccessor, community_id: str, walk_index=None):
-        self.A = accessor
-        self.cid = community_id
-        self._alias_cache: Dict[NodeId, Tuple[List[NodeId], List[float], List[int]]] = {}
-        self.walk_index = walk_index
-    def _alias_for(self, u: NodeId):
-        if u in self._alias_cache:
-            return self._alias_cache[u]
-        nbrs = list(self.A.iter_out(u))
-        table = build_alias_table([(v, w) for v, _, w in nbrs])
-        self._alias_cache[u] = table
-        return table
-    def run(self, seeds: List[NodeId], params: PPRParams, budget: Optional[SearchBudget] = None) -> PPRResult:
-        bt = BudgetTracker(budget or SearchBudget())
-        if not seeds:
-            return PPRResult([], 0.0, bt.usage.__dict__, {"engine": "mc", "iters": 0, "cache_hit": False})
-        hits: Dict[NodeId, int] = defaultdict(int)
-        # Optional pre-hit sampling from walk index to save MC effort
-        if self.walk_index is not None:
-            for s in seeds:
-                for v, c in self.walk_index.sample_hits(s).items():
-                    hits[v] += int(c)
-        for _ in range(params.num_walks):
-            if bt.over():
-                break
-            u = random.choice(seeds)
-            for _ in range(params.walk_len):
-                hits[u] += 1
-                bt.tick_nodes(1)
-                if random.random() < params.alpha:
-                    u = random.choice(seeds)
-                    continue
-                nodes, prob, alias = self._alias_for(u)
-                if not nodes:
-                    u = random.choice(seeds)
-                    continue
-                u = alias_draw(nodes, prob, alias)
-                bt.tick_edges(1)
-                if bt.timed_out():
-                    break
-        total = float(sum(hits.values())) or 1.0
-        scores = sorted(((n, c / total) for n, c in hits.items()), key=lambda kv: kv[1], reverse=True)[: params.topn]
-        return PPRResult(scores=scores, mass=1.0, used_budget=bt.usage.__dict__, trace={"engine": "mc", "iters": params.num_walks, "cache_hit": False})
+from __future__ import annotations
+from typing import Dict, List, Tuple, Optional
+from collections import defaultdict, deque
+from dataclasses import dataclass
+import random
+from .adapters import GraphAccessor, NodeId
+from .budget import SearchBudget, BudgetTracker
+@dataclass
+class PPRParams:
+    alpha: float = 0.2
+    eps: float = 1e-6
+    num_walks: int = 2000
+    walk_len: int = 50
+    topn: int = 200
+@dataclass
+class PPRResult:
+    scores: List[Tuple[NodeId, float]]
+    mass: float
+    used_budget: Dict[str, int]
+    trace: Dict[str, object]
+def build_alias_table(weighted_neighbors: List[Tuple[NodeId, float]]):
+    if not weighted_neighbors:
+        return [], [], []
+    total = sum(max(0.0, w) for _, w in weighted_neighbors) or 1.0
+    probs = [(n, w / total) for n, w in weighted_neighbors]
+    n = len(probs)
+    scaled = [p * n for _, p in probs]
+    alias, prob = [0] * n, [0.0] * n
+    small, large = [], []
+    for i, sp in enumerate(scaled):
+        (small if sp < 1 else large).append(i)
+    while small and large:
+        s, l = small.pop(), large.pop()
+        prob[s] = scaled[s]
+        alias[s] = l
+        scaled[l] = scaled[l] - (1 - prob[s])
+        (small if scaled[l] < 1 else large).append(l)
+    for i in small + large:
+        prob[i] = 1.0
+        alias[i] = i
+    nodes = [n for n, _ in probs]
+    return nodes, prob, alias
+def alias_draw(nodes, prob, alias):
+    if not nodes:
+        return None
+    i = random.randrange(len(nodes))
+    return nodes[i] if random.random() < prob[i] else nodes[alias[i]]
+class PushPPREngine:
+    def __init__(self, accessor: GraphAccessor, community_id: str):
+        self.A = accessor
+        self.cid = community_id
+    def run(self, seeds: List[NodeId], params: PPRParams, budget: Optional[SearchBudget] = None) -> PPRResult:
+        bt = BudgetTracker(budget or SearchBudget())
+        p: Dict[NodeId, float] = defaultdict(float)
+        r: Dict[NodeId, float] = defaultdict(float)
+        q: deque[NodeId] = deque()
+        seeds = seeds or []
+        if not seeds:
+            return PPRResult([], 0.0, bt.usage.__dict__, {"engine": "push", "iters": 0, "cache_hit": False})
+        for s in seeds:
+            r[s] += 1.0 / len(seeds)
+            q.append(s)
+        iters = 0
+        while q and not bt.over():
+            u = q.popleft()
+            iters += 1
+            ru = r[u]
+            if ru <= 0:
+                continue
+            p[u] += params.alpha * ru
+            residual = (1 - params.alpha) * ru
+            r[u] = 0.0
+            nbrs = list(self.A.iter_out(u))
+            deg = len(nbrs)
+            if deg == 0:
+                continue
+            share = residual / deg
+            for v, _, _ in nbrs:
+                r[v] += share
+                bt.tick_edges(1)
+                if r[v] / max(1, self.A.degree(v)) > params.eps:
+                    q.append(v)
+            bt.tick_nodes(1)
+            if bt.timed_out():
+                break
+        items = sorted(p.items(), key=lambda kv: kv[1], reverse=True)[: params.topn]
+        mass = sum(p.values())
+        return PPRResult(scores=items, mass=mass, used_budget=bt.usage.__dict__, trace={"engine": "push", "iters": iters, "cache_hit": False})
+class MonteCarloPPREngine:
+    def __init__(self, accessor: GraphAccessor, community_id: str, walk_index=None):
+        self.A = accessor
+        self.cid = community_id
+        self._alias_cache: Dict[NodeId, Tuple[List[NodeId], List[float], List[int]]] = {}
+        self.walk_index = walk_index
+    def _alias_for(self, u: NodeId):
+        if u in self._alias_cache:
+            return self._alias_cache[u]
+        nbrs = list(self.A.iter_out(u))
+        table = build_alias_table([(v, w) for v, _, w in nbrs])
+        self._alias_cache[u] = table
+        return table
+    def run(self, seeds: List[NodeId], params: PPRParams, budget: Optional[SearchBudget] = None) -> PPRResult:
+        bt = BudgetTracker(budget or SearchBudget())
+        if not seeds:
+            return PPRResult([], 0.0, bt.usage.__dict__, {"engine": "mc", "iters": 0, "cache_hit": False})
+        hits: Dict[NodeId, int] = defaultdict(int)
+        # Optional pre-hit sampling from walk index to save MC effort
+        if self.walk_index is not None:
+            for s in seeds:
+                for v, c in self.walk_index.sample_hits(s).items():
+                    hits[v] += int(c)
+        for _ in range(params.num_walks):
+            if bt.over():
+                break
+            u = random.choice(seeds)
+            for _ in range(params.walk_len):
+                hits[u] += 1
+                bt.tick_nodes(1)
+                if random.random() < params.alpha:
+                    u = random.choice(seeds)
+                    continue
+                nodes, prob, alias = self._alias_for(u)
+                if not nodes:
+                    u = random.choice(seeds)
+                    continue
+                u = alias_draw(nodes, prob, alias)
+                bt.tick_edges(1)
+                if bt.timed_out():
+                    break
+        total = float(sum(hits.values())) or 1.0
+        scores = sorted(((n, c / total) for n, c in hits.items()), key=lambda kv: kv[1], reverse=True)[: params.topn]
+        return PPRResult(scores=scores, mass=1.0, used_budget=bt.usage.__dict__, trace={"engine": "mc", "iters": params.num_walks, "cache_hit": False})

retrieval/ppr_cache.py CHANGED Viewed

@@ -1,25 +1,25 @@
-import json
-import hashlib
-def _key(community_id: str, seeds: list[str], alpha: float, engine: str, **kwargs) -> str:
-    seed_hash = hashlib.md5(json.dumps(sorted(seeds)).encode(), usedforsecurity=False).hexdigest()
-    prior_hash = kwargs.get('prior_hash', '')
-    return f"{community_id}:{engine}:{alpha:.4f}:{seed_hash}:{prior_hash}"
-class PPRCache:
-    def __init__(self, capacity: int = 256):
-        self.capacity = capacity
-        self._cache: dict[str, object] = {}
-    def get(self, key: str):
-        return self._cache.get(key)
-    def put(self, key: str, value: object):
-        if len(self._cache) >= self.capacity:
-            # Simple FIFO eviction
-            self._cache.pop(next(iter(self._cache)))
-        self._cache[key] = value
+import json
+import hashlib
+def _key(community_id: str, seeds: list[str], alpha: float, engine: str, **kwargs) -> str:
+    seed_hash = hashlib.md5(json.dumps(sorted(seeds)).encode(), usedforsecurity=False).hexdigest()
+    prior_hash = kwargs.get('prior_hash', '')
+    return f"{community_id}:{engine}:{alpha:.4f}:{seed_hash}:{prior_hash}"
+class PPRCache:
+    def __init__(self, capacity: int = 256):
+        self.capacity = capacity
+        self._cache: dict[str, object] = {}
+    def get(self, key: str):
+        return self._cache.get(key)
+    def put(self, key: str, value: object):
+        if len(self._cache) >= self.capacity:
+            # Simple FIFO eviction
+            self._cache.pop(next(iter(self._cache)))
+        self._cache[key] = value

odin-engine 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

odin-engine 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl