PyPI - odin-engine - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

odin-engine 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

benchmarks/__init__.py +17 -17
benchmarks/datasets.py +284 -284
benchmarks/metrics.py +275 -275
benchmarks/run_ablation.py +279 -279
benchmarks/run_npll_benchmark.py +270 -270
npll/__init__.py +10 -10
npll/bootstrap.py +474 -474
npll/core/__init__.py +33 -33
npll/core/knowledge_graph.py +308 -308
npll/core/logical_rules.py +496 -496
npll/core/mln.py +474 -474
npll/inference/__init__.py +40 -40
npll/inference/e_step.py +419 -419
npll/inference/elbo.py +434 -434
npll/inference/m_step.py +576 -576
npll/npll_model.py +631 -631
npll/scoring/__init__.py +42 -42
npll/scoring/embeddings.py +441 -441
npll/scoring/probability.py +402 -402
npll/scoring/scoring_module.py +369 -369
npll/training/__init__.py +24 -24
npll/training/evaluation.py +496 -496
npll/training/npll_trainer.py +520 -520
npll/utils/__init__.py +47 -47
npll/utils/batch_utils.py +492 -492
npll/utils/config.py +144 -144
npll/utils/math_utils.py +338 -338
odin/__init__.py +21 -20
odin/engine.py +264 -264
odin/schema.py +210 -0
{odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/METADATA +503 -456
odin_engine-0.2.0.dist-info/RECORD +63 -0
{odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/licenses/LICENSE +21 -21
retrieval/__init__.py +50 -50
retrieval/adapters.py +140 -140
retrieval/adapters_arango.py +1418 -1418
retrieval/aggregators.py +707 -707
retrieval/beam.py +127 -127
retrieval/budget.py +60 -60
retrieval/cache.py +159 -159
retrieval/confidence.py +88 -88
retrieval/eval.py +49 -49
retrieval/linker.py +87 -87
retrieval/metrics.py +105 -105
retrieval/metrics_motifs.py +36 -36
retrieval/orchestrator.py +571 -571
retrieval/ppr/__init__.py +12 -12
retrieval/ppr/anchors.py +41 -41
retrieval/ppr/bippr.py +61 -61
retrieval/ppr/engines.py +257 -257
retrieval/ppr/global_pr.py +76 -76
retrieval/ppr/indexes.py +78 -78
retrieval/ppr.py +156 -156
retrieval/ppr_cache.py +25 -25
retrieval/scoring.py +294 -294
retrieval/utils/pii_redaction.py +36 -36
retrieval/writers/__init__.py +9 -9
retrieval/writers/arango_writer.py +28 -28
retrieval/writers/base.py +21 -21
retrieval/writers/janus_writer.py +36 -36
odin_engine-0.1.0.dist-info/RECORD +0 -62
{odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/WHEEL +0 -0
{odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/top_level.txt +0 -0

retrieval/ppr/engines.py CHANGED Viewed

@@ -1,257 +1,257 @@
-from __future__ import annotations
-from typing import Dict, List, Tuple, Optional
-from collections import defaultdict, deque
-from dataclasses import dataclass
-import random
-from ..adapters import GraphAccessor, NodeId
-from ..budget import SearchBudget, BudgetTracker
-@dataclass
-class PPRParams:
-    alpha: float = 0.2
-    eps: float = 1e-6
-    num_walks: int = 2000
-    walk_len: int = 50
-    topn: int = 200
-@dataclass
-class PPRResult:
-    scores: List[Tuple[NodeId, float]]
-    mass: float
-    used_budget: Dict[str, int]
-    trace: Dict[str, object]
-def build_alias_table(weighted_neighbors: List[Tuple[NodeId, float]]):
-    if not weighted_neighbors:
-        return [], [], []
-    total = sum(max(0.0, w) for _, w in weighted_neighbors) or 1.0
-    probs = [(n, w / total) for n, w in weighted_neighbors]
-    n = len(probs)
-    scaled = [p * n for _, p in probs]
-    alias, prob = [0] * n, [0.0] * n
-    small, large = [], []
-    for i, sp in enumerate(scaled):
-        (small if sp < 1 else large).append(i)
-    while small and large:
-        s, l = small.pop(), large.pop()
-        prob[s] = scaled[s]
-        alias[s] = l
-        scaled[l] = scaled[l] - (1 - prob[s])
-        (small if scaled[l] < 1 else large).append(l)
-    for i in small + large:
-        prob[i] = 1.0
-        alias[i] = i
-    nodes = [n for n, _ in probs]
-    return nodes, prob, alias
-def alias_draw(nodes, prob, alias):
-    if not nodes:
-        return None
-    i = random.randrange(len(nodes))
-    return nodes[i] if random.random() < prob[i] else nodes[alias[i]]
-class PushPPREngine:
-    def __init__(self, accessor: GraphAccessor, community_id: str):
-        self.A = accessor
-        self.cid = community_id
-    def run(self, seeds: List[NodeId], params: PPRParams, budget: Optional[SearchBudget] = None, personalization: Optional[Dict[NodeId, float]] = None) -> PPRResult:
-        bt = BudgetTracker(budget or SearchBudget())
-        p: Dict[NodeId, float] = defaultdict(float)
-        r: Dict[NodeId, float] = defaultdict(float)
-        q: deque[NodeId] = deque()
-        if personalization is None:
-            # Default behavior if no personalization is provided (uniform over seeds)
-            seeds = seeds or []
-            if not seeds:
-                return PPRResult([], 0.0, bt.usage.__dict__, {"engine": "push", "iters": 0, "cache_hit": False})
-            init_mass = 1.0 / len(seeds)
-            for s in seeds:
-                r[s] += init_mass
-                q.append(s)
-        else:
-            # Use provided personalization vector
-            for s, mass in personalization.items():
-                r[s] += mass
-                q.append(s)
-            if not personalization:
-                return PPRResult([], 0.0, bt.usage.__dict__, {"engine": "push", "iters": 0, "cache_hit": False})
-        early_stop_reason = None
-        iters = 0
-        while q and not bt.over():
-            u = q.popleft()
-            iters += 1
-            ru = r[u]
-            if ru <= 0:
-                continue
-            p[u] += params.alpha * ru
-            residual = (1 - params.alpha) * ru
-            r[u] = 0.0
-            nbrs = list(self.A.iter_out(u))
-            deg = len(nbrs)
-            if deg == 0:
-                continue
-            share = residual / deg
-            for v, _, _ in nbrs:
-                r[v] += share
-                bt.tick_edges(1)
-                if r[v] / max(1, self.A.degree(v)) > params.eps:
-                    q.append(v)
-            bt.tick_nodes(1)
-            if bt.timed_out():
-                early_stop_reason = "timeout"
-                break
-        if not early_stop_reason and bt.over():
-            early_stop_reason = "budget_exhausted"
-        items = sorted(p.items(), key=lambda kv: kv[1], reverse=True)[: params.topn]
-        mass = sum(p.values())
-        return PPRResult(scores=items, mass=mass, used_budget=bt.usage.__dict__, trace={"engine": "push", "iters": iters, "cache_hit": False, "early_stop_reason": early_stop_reason})
-class MonteCarloPPREngine:
-    def __init__(self, accessor: GraphAccessor, community_id: str, walk_index=None):
-        self.A = accessor
-        self.cid = community_id
-        self._alias_cache: Dict[NodeId, Tuple[List[NodeId], List[float], List[int]]] = {}
-        self.walk_index = walk_index
-    def _alias_for(self, u: NodeId):
-        if u in self._alias_cache:
-            return self._alias_cache[u]
-        nbrs = list(self.A.iter_out(u))
-        table = build_alias_table([(v, w) for v, _, w in nbrs])
-        self._alias_cache[u] = table
-        return table
-    def run(self, seeds: List[NodeId], params: PPRParams, budget: Optional[SearchBudget] = None, personalization: Optional[Dict[NodeId, float]] = None) -> PPRResult:
-        bt = BudgetTracker(budget or SearchBudget())
-        if not seeds and (personalization is None or not personalization):
-            return PPRResult([], 0.0, bt.usage.__dict__, {"engine": "mc", "iters": 0, "cache_hit": False})
-        hits: Dict[NodeId, int] = defaultdict(int)
-        # Optional pre-hit sampling from walk index to save MC effort
-        if self.walk_index is not None:
-            for s in seeds:
-                for v, c in self.walk_index.sample_hits(s).items():
-                    hits[v] += int(c)
-        # Prepare for weighted random choice if personalization is provided
-        personalization_nodes = list(personalization.keys()) if personalization else seeds
-        personalization_weights = list(personalization.values()) if personalization else [1.0] * len(seeds)
-        # Normalize weights for random.choices if provided
-        total_personalization_weight = sum(personalization_weights)
-        if total_personalization_weight == 0:
-            # Fallback to uniform if all weights are zero
-            personalization_weights = [1.0] * len(personalization_nodes)
-            total_personalization_weight = float(len(personalization_nodes))
-        normalized_personalization_weights = [w / total_personalization_weight for w in personalization_weights]
-        early_stop_reason = None
-        for _ in range(params.num_walks):
-            if bt.over():
-                early_stop_reason = "budget_exhausted"
-                break
-            # Start walk from personalized distribution if available, else uniform from seeds
-            if personalization:
-                u = random.choices(personalization_nodes, weights=normalized_personalization_weights, k=1)[0]
-            else:
-                u = random.choice(seeds)
-            for _ in range(params.walk_len):
-                if bt.over(): # Check budget before each step
-                    early_stop_reason = "budget_exhausted"
-                    break
-                hits[u] += 1
-                bt.tick_nodes(1)
-                if random.random() < params.alpha:
-                    # Teleport back to personalized distribution if available, else uniform from seeds
-                    if personalization:
-                        u = random.choices(personalization_nodes, weights=normalized_personalization_weights, k=1)[0]
-                    else:
-                        u = random.choice(seeds)
-                    continue
-                nodes, prob, alias = self._alias_for(u)
-                if not nodes:
-                    # If dangling, teleport back to personalized distribution if available, else uniform from seeds
-                    if personalization:
-                        u = random.choices(personalization_nodes, weights=normalized_personalization_weights, k=1)[0]
-                    else:
-                        u = random.choice(seeds)
-                    break
-                u = alias_draw(nodes, prob, alias)
-                bt.tick_edges(1)
-                if bt.timed_out():
-                    early_stop_reason = "timeout"
-                    break
-        total = float(sum(hits.values()) or 1.0)
-        scores = sorted(((n, c / total) for n, c in hits.items()), key=lambda kv: kv[1], reverse=True)[: params.topn]
-        return PPRResult(scores=scores, mass=1.0, used_budget=bt.usage.__dict__, trace={"engine": "mc", "iters": params.num_walks, "cache_hit": False, "early_stop_reason": early_stop_reason})
-class BiPPREngine:
-    """
-    Bidirectional PPR for source→target scoring:
-    - Reverse push from targets builds fingerprints
-    - Forward random walks from source intersect with reverse mass
-    """
-    def __init__(self, graph: GraphAccessor, alpha=0.15, rmax=1e-4):
-        self.G = graph
-        self.alpha, self.rmax = alpha, rmax
-    def _reverse_push(self, targets: List[NodeId]) -> Dict[NodeId, float]:
-        p_t, r_t = defaultdict(float), defaultdict(float)
-        q = deque()
-        tgt = list(targets)
-        if not tgt:
-            return p_t
-        mass = 1.0 / len(tgt)
-        for t in tgt:
-            r_t[t] = mass
-            q.append(t)
-        while q:
-            u = q.popleft()
-            if r_t[u] / max(1, self.G.in_degree(u)) <= self.rmax:
-                continue
-            push = (1 - self.alpha) * r_t[u]
-            p_t[u] += self.alpha * r_t[u]
-            r_t[u] = 0.0
-            deg = self.G.in_degree(u)
-            if deg == 0:
-                continue
-            share = push / deg
-            for v in self.G.in_neighbors(u):
-                r_t[v] += share
-                if r_t[v] / max(1, self.G.in_degree(v)) > self.rmax:
-                    q.append(v)
-        return p_t
-    def score(self, source: NodeId, targets: List[NodeId], walks=5000) -> List[Tuple[NodeId, float]]:
-        fp = self._reverse_push(targets)
-        hits = defaultdict(int)
-        for _ in range(walks):
-            u = source
-            while True:
-                hits[u] += 1
-                if random.random() < self.alpha:
-                    break
-                nbrs = list(self.G.out_neighbors(u))
-                if not nbrs:
-                    break
-                u = random.choice(nbrs)
-        Z = float(sum(hits.values()) or 1)
-        return sorted(((t, fp.get(t, 0.0) * hits.get(t, 0) / Z) for t in targets), key=lambda kv: kv[1], reverse=True)
+from __future__ import annotations
+from typing import Dict, List, Tuple, Optional
+from collections import defaultdict, deque
+from dataclasses import dataclass
+import random
+from ..adapters import GraphAccessor, NodeId
+from ..budget import SearchBudget, BudgetTracker
+@dataclass
+class PPRParams:
+    alpha: float = 0.2
+    eps: float = 1e-6
+    num_walks: int = 2000
+    walk_len: int = 50
+    topn: int = 200
+@dataclass
+class PPRResult:
+    scores: List[Tuple[NodeId, float]]
+    mass: float
+    used_budget: Dict[str, int]
+    trace: Dict[str, object]
+def build_alias_table(weighted_neighbors: List[Tuple[NodeId, float]]):
+    if not weighted_neighbors:
+        return [], [], []
+    total = sum(max(0.0, w) for _, w in weighted_neighbors) or 1.0
+    probs = [(n, w / total) for n, w in weighted_neighbors]
+    n = len(probs)
+    scaled = [p * n for _, p in probs]
+    alias, prob = [0] * n, [0.0] * n
+    small, large = [], []
+    for i, sp in enumerate(scaled):
+        (small if sp < 1 else large).append(i)
+    while small and large:
+        s, l = small.pop(), large.pop()
+        prob[s] = scaled[s]
+        alias[s] = l
+        scaled[l] = scaled[l] - (1 - prob[s])
+        (small if scaled[l] < 1 else large).append(l)
+    for i in small + large:
+        prob[i] = 1.0
+        alias[i] = i
+    nodes = [n for n, _ in probs]
+    return nodes, prob, alias
+def alias_draw(nodes, prob, alias):
+    if not nodes:
+        return None
+    i = random.randrange(len(nodes))
+    return nodes[i] if random.random() < prob[i] else nodes[alias[i]]
+class PushPPREngine:
+    def __init__(self, accessor: GraphAccessor, community_id: str):
+        self.A = accessor
+        self.cid = community_id
+    def run(self, seeds: List[NodeId], params: PPRParams, budget: Optional[SearchBudget] = None, personalization: Optional[Dict[NodeId, float]] = None) -> PPRResult:
+        bt = BudgetTracker(budget or SearchBudget())
+        p: Dict[NodeId, float] = defaultdict(float)
+        r: Dict[NodeId, float] = defaultdict(float)
+        q: deque[NodeId] = deque()
+        if personalization is None:
+            # Default behavior if no personalization is provided (uniform over seeds)
+            seeds = seeds or []
+            if not seeds:
+                return PPRResult([], 0.0, bt.usage.__dict__, {"engine": "push", "iters": 0, "cache_hit": False})
+            init_mass = 1.0 / len(seeds)
+            for s in seeds:
+                r[s] += init_mass
+                q.append(s)
+        else:
+            # Use provided personalization vector
+            for s, mass in personalization.items():
+                r[s] += mass
+                q.append(s)
+            if not personalization:
+                return PPRResult([], 0.0, bt.usage.__dict__, {"engine": "push", "iters": 0, "cache_hit": False})
+        early_stop_reason = None
+        iters = 0
+        while q and not bt.over():
+            u = q.popleft()
+            iters += 1
+            ru = r[u]
+            if ru <= 0:
+                continue
+            p[u] += params.alpha * ru
+            residual = (1 - params.alpha) * ru
+            r[u] = 0.0
+            nbrs = list(self.A.iter_out(u))
+            deg = len(nbrs)
+            if deg == 0:
+                continue
+            share = residual / deg
+            for v, _, _ in nbrs:
+                r[v] += share
+                bt.tick_edges(1)
+                if r[v] / max(1, self.A.degree(v)) > params.eps:
+                    q.append(v)
+            bt.tick_nodes(1)
+            if bt.timed_out():
+                early_stop_reason = "timeout"
+                break
+        if not early_stop_reason and bt.over():
+            early_stop_reason = "budget_exhausted"
+        items = sorted(p.items(), key=lambda kv: kv[1], reverse=True)[: params.topn]
+        mass = sum(p.values())
+        return PPRResult(scores=items, mass=mass, used_budget=bt.usage.__dict__, trace={"engine": "push", "iters": iters, "cache_hit": False, "early_stop_reason": early_stop_reason})
+class MonteCarloPPREngine:
+    def __init__(self, accessor: GraphAccessor, community_id: str, walk_index=None):
+        self.A = accessor
+        self.cid = community_id
+        self._alias_cache: Dict[NodeId, Tuple[List[NodeId], List[float], List[int]]] = {}
+        self.walk_index = walk_index
+    def _alias_for(self, u: NodeId):
+        if u in self._alias_cache:
+            return self._alias_cache[u]
+        nbrs = list(self.A.iter_out(u))
+        table = build_alias_table([(v, w) for v, _, w in nbrs])
+        self._alias_cache[u] = table
+        return table
+    def run(self, seeds: List[NodeId], params: PPRParams, budget: Optional[SearchBudget] = None, personalization: Optional[Dict[NodeId, float]] = None) -> PPRResult:
+        bt = BudgetTracker(budget or SearchBudget())
+        if not seeds and (personalization is None or not personalization):
+            return PPRResult([], 0.0, bt.usage.__dict__, {"engine": "mc", "iters": 0, "cache_hit": False})
+        hits: Dict[NodeId, int] = defaultdict(int)
+        # Optional pre-hit sampling from walk index to save MC effort
+        if self.walk_index is not None:
+            for s in seeds:
+                for v, c in self.walk_index.sample_hits(s).items():
+                    hits[v] += int(c)
+        # Prepare for weighted random choice if personalization is provided
+        personalization_nodes = list(personalization.keys()) if personalization else seeds
+        personalization_weights = list(personalization.values()) if personalization else [1.0] * len(seeds)
+        # Normalize weights for random.choices if provided
+        total_personalization_weight = sum(personalization_weights)
+        if total_personalization_weight == 0:
+            # Fallback to uniform if all weights are zero
+            personalization_weights = [1.0] * len(personalization_nodes)
+            total_personalization_weight = float(len(personalization_nodes))
+        normalized_personalization_weights = [w / total_personalization_weight for w in personalization_weights]
+        early_stop_reason = None
+        for _ in range(params.num_walks):
+            if bt.over():
+                early_stop_reason = "budget_exhausted"
+                break
+            # Start walk from personalized distribution if available, else uniform from seeds
+            if personalization:
+                u = random.choices(personalization_nodes, weights=normalized_personalization_weights, k=1)[0]
+            else:
+                u = random.choice(seeds)
+            for _ in range(params.walk_len):
+                if bt.over(): # Check budget before each step
+                    early_stop_reason = "budget_exhausted"
+                    break
+                hits[u] += 1
+                bt.tick_nodes(1)
+                if random.random() < params.alpha:
+                    # Teleport back to personalized distribution if available, else uniform from seeds
+                    if personalization:
+                        u = random.choices(personalization_nodes, weights=normalized_personalization_weights, k=1)[0]
+                    else:
+                        u = random.choice(seeds)
+                    continue
+                nodes, prob, alias = self._alias_for(u)
+                if not nodes:
+                    # If dangling, teleport back to personalized distribution if available, else uniform from seeds
+                    if personalization:
+                        u = random.choices(personalization_nodes, weights=normalized_personalization_weights, k=1)[0]
+                    else:
+                        u = random.choice(seeds)
+                    break
+                u = alias_draw(nodes, prob, alias)
+                bt.tick_edges(1)
+                if bt.timed_out():
+                    early_stop_reason = "timeout"
+                    break
+        total = float(sum(hits.values()) or 1.0)
+        scores = sorted(((n, c / total) for n, c in hits.items()), key=lambda kv: kv[1], reverse=True)[: params.topn]
+        return PPRResult(scores=scores, mass=1.0, used_budget=bt.usage.__dict__, trace={"engine": "mc", "iters": params.num_walks, "cache_hit": False, "early_stop_reason": early_stop_reason})
+class BiPPREngine:
+    """
+    Bidirectional PPR for source→target scoring:
+    - Reverse push from targets builds fingerprints
+    - Forward random walks from source intersect with reverse mass
+    """
+    def __init__(self, graph: GraphAccessor, alpha=0.15, rmax=1e-4):
+        self.G = graph
+        self.alpha, self.rmax = alpha, rmax
+    def _reverse_push(self, targets: List[NodeId]) -> Dict[NodeId, float]:
+        p_t, r_t = defaultdict(float), defaultdict(float)
+        q = deque()
+        tgt = list(targets)
+        if not tgt:
+            return p_t
+        mass = 1.0 / len(tgt)
+        for t in tgt:
+            r_t[t] = mass
+            q.append(t)
+        while q:
+            u = q.popleft()
+            if r_t[u] / max(1, self.G.in_degree(u)) <= self.rmax:
+                continue
+            push = (1 - self.alpha) * r_t[u]
+            p_t[u] += self.alpha * r_t[u]
+            r_t[u] = 0.0
+            deg = self.G.in_degree(u)
+            if deg == 0:
+                continue
+            share = push / deg
+            for v in self.G.in_neighbors(u):
+                r_t[v] += share
+                if r_t[v] / max(1, self.G.in_degree(v)) > self.rmax:
+                    q.append(v)
+        return p_t
+    def score(self, source: NodeId, targets: List[NodeId], walks=5000) -> List[Tuple[NodeId, float]]:
+        fp = self._reverse_push(targets)
+        hits = defaultdict(int)
+        for _ in range(walks):
+            u = source
+            while True:
+                hits[u] += 1
+                if random.random() < self.alpha:
+                    break
+                nbrs = list(self.G.out_neighbors(u))
+                if not nbrs:
+                    break
+                u = random.choice(nbrs)
+        Z = float(sum(hits.values()) or 1)
+        return sorted(((t, fp.get(t, 0.0) * hits.get(t, 0) / Z) for t in targets), key=lambda kv: kv[1], reverse=True)

retrieval/ppr/global_pr.py CHANGED Viewed

@@ -1,76 +1,76 @@
-from __future__ import annotations
-from dataclasses import dataclass
-from typing import Dict, Iterable
-from ..adapters import GraphAccessor, NodeId
-@dataclass
-class GlobalPRParams:
-    alpha: float = 0.15
-    tol: float = 1e-8
-    max_iter: int = 100
-class GlobalPR:
-    """
-    Power-iteration PageRank over nodes visible to the GraphAccessor in a given community.
-    Handles dangling nodes by redistributing mass to personalization (uniform over nodes).
-    """
-    def __init__(self, accessor: GraphAccessor, community_id: str):
-        self.A = accessor
-        self.cid = community_id
-        self.pr: Dict[NodeId, float] = {}
-    def fit(self, params: GlobalPRParams = GlobalPRParams()) -> Dict[NodeId, float]:
-        nodes = list(self.A.nodes(self.cid))
-        if not nodes:
-            self.pr = {}
-            return self.pr
-        n = len(nodes)
-        idx: Dict[NodeId, int] = {u: i for i, u in enumerate(nodes)}
-        pr_prev = [1.0 / n] * n
-        alpha = params.alpha
-        teleport = 1.0 / n
-        # Precompute out-neighbors indices
-        out_idx: Dict[int, list[int]] = {}
-        for u in nodes:
-            ui = idx[u]
-            nbrs = [idx[v] for v, _, _ in self.A.iter_out(u)]
-            out_idx[ui] = nbrs
-        for _ in range(params.max_iter):
-            pr = [0.0] * n
-            dangling_mass = 0.0
-            for ui in range(n):
-                nbrs = out_idx[ui]
-                if not nbrs:
-                    dangling_mass += (1.0 - alpha) * pr_prev[ui]
-                    pr[ui] += alpha * pr_prev[ui]
-                    continue
-                share = (1.0 - alpha) * pr_prev[ui] / len(nbrs)
-                pr[ui] += alpha * pr_prev[ui]
-                for vj in nbrs:
-                    pr[vj] += share
-            # Redistribute dangling to teleport set (uniform personalization)
-            if dangling_mass > 0:
-                add = dangling_mass * teleport
-                pr = [x + add for x in pr]
-            # Teleportation to uniform as well
-            pr = [alpha * teleport + (1.0 - alpha) * (x - alpha * teleport) for x in pr]
-            # Normalize and check convergence (L1)
-            s = sum(pr) or 1.0
-            pr = [x / s for x in pr]
-            diff = sum(abs(pr[i] - pr_prev[i]) for i in range(n))
-            pr_prev = pr
-            if diff < params.tol:
-                break
-        self.pr = {u: pr_prev[idx[u]] for u in nodes}
-        return self.pr
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, Iterable
+from ..adapters import GraphAccessor, NodeId
+@dataclass
+class GlobalPRParams:
+    alpha: float = 0.15
+    tol: float = 1e-8
+    max_iter: int = 100
+class GlobalPR:
+    """
+    Power-iteration PageRank over nodes visible to the GraphAccessor in a given community.
+    Handles dangling nodes by redistributing mass to personalization (uniform over nodes).
+    """
+    def __init__(self, accessor: GraphAccessor, community_id: str):
+        self.A = accessor
+        self.cid = community_id
+        self.pr: Dict[NodeId, float] = {}
+    def fit(self, params: GlobalPRParams = GlobalPRParams()) -> Dict[NodeId, float]:
+        nodes = list(self.A.nodes(self.cid))
+        if not nodes:
+            self.pr = {}
+            return self.pr
+        n = len(nodes)
+        idx: Dict[NodeId, int] = {u: i for i, u in enumerate(nodes)}
+        pr_prev = [1.0 / n] * n
+        alpha = params.alpha
+        teleport = 1.0 / n
+        # Precompute out-neighbors indices
+        out_idx: Dict[int, list[int]] = {}
+        for u in nodes:
+            ui = idx[u]
+            nbrs = [idx[v] for v, _, _ in self.A.iter_out(u)]
+            out_idx[ui] = nbrs
+        for _ in range(params.max_iter):
+            pr = [0.0] * n
+            dangling_mass = 0.0
+            for ui in range(n):
+                nbrs = out_idx[ui]
+                if not nbrs:
+                    dangling_mass += (1.0 - alpha) * pr_prev[ui]
+                    pr[ui] += alpha * pr_prev[ui]
+                    continue
+                share = (1.0 - alpha) * pr_prev[ui] / len(nbrs)
+                pr[ui] += alpha * pr_prev[ui]
+                for vj in nbrs:
+                    pr[vj] += share
+            # Redistribute dangling to teleport set (uniform personalization)
+            if dangling_mass > 0:
+                add = dangling_mass * teleport
+                pr = [x + add for x in pr]
+            # Teleportation to uniform as well
+            pr = [alpha * teleport + (1.0 - alpha) * (x - alpha * teleport) for x in pr]
+            # Normalize and check convergence (L1)
+            s = sum(pr) or 1.0
+            pr = [x / s for x in pr]
+            diff = sum(abs(pr[i] - pr_prev[i]) for i in range(n))
+            pr_prev = pr
+            if diff < params.tol:
+                break
+        self.pr = {u: pr_prev[idx[u]] for u in nodes}
+        return self.pr

odin-engine 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

odin-engine 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl