odin-engine 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. benchmarks/__init__.py +17 -17
  2. benchmarks/datasets.py +284 -284
  3. benchmarks/metrics.py +275 -275
  4. benchmarks/run_ablation.py +279 -279
  5. benchmarks/run_npll_benchmark.py +270 -270
  6. npll/__init__.py +10 -10
  7. npll/bootstrap.py +474 -474
  8. npll/core/__init__.py +33 -33
  9. npll/core/knowledge_graph.py +308 -308
  10. npll/core/logical_rules.py +496 -496
  11. npll/core/mln.py +474 -474
  12. npll/inference/__init__.py +40 -40
  13. npll/inference/e_step.py +419 -419
  14. npll/inference/elbo.py +434 -434
  15. npll/inference/m_step.py +576 -576
  16. npll/npll_model.py +631 -631
  17. npll/scoring/__init__.py +42 -42
  18. npll/scoring/embeddings.py +441 -441
  19. npll/scoring/probability.py +402 -402
  20. npll/scoring/scoring_module.py +369 -369
  21. npll/training/__init__.py +24 -24
  22. npll/training/evaluation.py +496 -496
  23. npll/training/npll_trainer.py +520 -520
  24. npll/utils/__init__.py +47 -47
  25. npll/utils/batch_utils.py +492 -492
  26. npll/utils/config.py +144 -144
  27. npll/utils/math_utils.py +338 -338
  28. odin/__init__.py +21 -20
  29. odin/engine.py +264 -264
  30. odin/schema.py +210 -0
  31. {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/METADATA +503 -456
  32. odin_engine-0.2.0.dist-info/RECORD +63 -0
  33. {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/licenses/LICENSE +21 -21
  34. retrieval/__init__.py +50 -50
  35. retrieval/adapters.py +140 -140
  36. retrieval/adapters_arango.py +1418 -1418
  37. retrieval/aggregators.py +707 -707
  38. retrieval/beam.py +127 -127
  39. retrieval/budget.py +60 -60
  40. retrieval/cache.py +159 -159
  41. retrieval/confidence.py +88 -88
  42. retrieval/eval.py +49 -49
  43. retrieval/linker.py +87 -87
  44. retrieval/metrics.py +105 -105
  45. retrieval/metrics_motifs.py +36 -36
  46. retrieval/orchestrator.py +571 -571
  47. retrieval/ppr/__init__.py +12 -12
  48. retrieval/ppr/anchors.py +41 -41
  49. retrieval/ppr/bippr.py +61 -61
  50. retrieval/ppr/engines.py +257 -257
  51. retrieval/ppr/global_pr.py +76 -76
  52. retrieval/ppr/indexes.py +78 -78
  53. retrieval/ppr.py +156 -156
  54. retrieval/ppr_cache.py +25 -25
  55. retrieval/scoring.py +294 -294
  56. retrieval/utils/pii_redaction.py +36 -36
  57. retrieval/writers/__init__.py +9 -9
  58. retrieval/writers/arango_writer.py +28 -28
  59. retrieval/writers/base.py +21 -21
  60. retrieval/writers/janus_writer.py +36 -36
  61. odin_engine-0.1.0.dist-info/RECORD +0 -62
  62. {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/WHEEL +0 -0
  63. {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/top_level.txt +0 -0
retrieval/ppr/__init__.py CHANGED
@@ -1,12 +1,12 @@
1
- from .engines import PushPPREngine, MonteCarloPPREngine, PPRParams
2
- from .bippr import BiPPREngine
3
- from .anchors import APPRAnchors, APPRAnchorParams
4
- from .global_pr import GlobalPR, GlobalPRParams
5
- from .indexes import RandomWalkIndex, WalkIndexConfig
6
-
7
- __all__ = [
8
- 'PushPPREngine', 'MonteCarloPPREngine', 'BiPPREngine', 'PPRParams',
9
- 'APPRAnchors', 'APPRAnchorParams',
10
- 'GlobalPR', 'GlobalPRParams',
11
- 'RandomWalkIndex', 'WalkIndexConfig',
12
- ]
1
+ from .engines import PushPPREngine, MonteCarloPPREngine, PPRParams
2
+ from .bippr import BiPPREngine
3
+ from .anchors import APPRAnchors, APPRAnchorParams
4
+ from .global_pr import GlobalPR, GlobalPRParams
5
+ from .indexes import RandomWalkIndex, WalkIndexConfig
6
+
7
+ __all__ = [
8
+ 'PushPPREngine', 'MonteCarloPPREngine', 'BiPPREngine', 'PPRParams',
9
+ 'APPRAnchors', 'APPRAnchorParams',
10
+ 'GlobalPR', 'GlobalPRParams',
11
+ 'RandomWalkIndex', 'WalkIndexConfig',
12
+ ]
retrieval/ppr/anchors.py CHANGED
@@ -1,41 +1,41 @@
1
- from __future__ import annotations
2
- from dataclasses import dataclass
3
- from typing import Dict, List, Tuple, Iterable, Optional
4
-
5
- from ..adapters import GraphAccessor, NodeId
6
- from .engines import PushPPREngine, PPRParams
7
-
8
-
9
- @dataclass
10
- class APPRAnchorParams:
11
- alpha: float = 0.15
12
- eps: float = 1e-4
13
- topn: int = 200
14
-
15
-
16
- class APPRAnchors:
17
- """
18
- Build per-community APPR anchor sets using push-based PPR.
19
- Intended for offline or periodic refresh to warm personalization priors.
20
- """
21
-
22
- def __init__(self, accessor: GraphAccessor):
23
- self.A = accessor
24
- self.cache: Dict[Tuple[str, str], List[Tuple[NodeId, float]]] = {}
25
-
26
- def build_for_community(
27
- self,
28
- community_id: str,
29
- seed_set: List[NodeId],
30
- params: APPRAnchorParams = APPRAnchorParams(),
31
- ) -> List[Tuple[NodeId, float]]:
32
- key = (community_id, ",".join(sorted(map(str, seed_set))))
33
- if key in self.cache:
34
- return self.cache[key]
35
- engine = PushPPREngine(self.A, community_id)
36
- p = PPRParams(alpha=params.alpha, eps=params.eps, topn=params.topn)
37
- res = engine.run(seeds=seed_set, params=p)
38
- self.cache[key] = res.scores
39
- return res.scores
40
-
41
-
1
+ from __future__ import annotations
2
+ from dataclasses import dataclass
3
+ from typing import Dict, List, Tuple, Iterable, Optional
4
+
5
+ from ..adapters import GraphAccessor, NodeId
6
+ from .engines import PushPPREngine, PPRParams
7
+
8
+
9
+ @dataclass
10
+ class APPRAnchorParams:
11
+ alpha: float = 0.15
12
+ eps: float = 1e-4
13
+ topn: int = 200
14
+
15
+
16
+ class APPRAnchors:
17
+ """
18
+ Build per-community APPR anchor sets using push-based PPR.
19
+ Intended for offline or periodic refresh to warm personalization priors.
20
+ """
21
+
22
+ def __init__(self, accessor: GraphAccessor):
23
+ self.A = accessor
24
+ self.cache: Dict[Tuple[str, str], List[Tuple[NodeId, float]]] = {}
25
+
26
+ def build_for_community(
27
+ self,
28
+ community_id: str,
29
+ seed_set: List[NodeId],
30
+ params: APPRAnchorParams = APPRAnchorParams(),
31
+ ) -> List[Tuple[NodeId, float]]:
32
+ key = (community_id, ",".join(sorted(map(str, seed_set))))
33
+ if key in self.cache:
34
+ return self.cache[key]
35
+ engine = PushPPREngine(self.A, community_id)
36
+ p = PPRParams(alpha=params.alpha, eps=params.eps, topn=params.topn)
37
+ res = engine.run(seeds=seed_set, params=p)
38
+ self.cache[key] = res.scores
39
+ return res.scores
40
+
41
+
retrieval/ppr/bippr.py CHANGED
@@ -1,61 +1,61 @@
1
- from __future__ import annotations
2
- from typing import Dict, Iterable, Tuple, List
3
- from collections import defaultdict, deque
4
- import random
5
-
6
- from ..adapters import GraphAccessor, NodeId
7
-
8
- class BiPPREngine:
9
- """
10
- Bidirectional PPR for source→target scoring:
11
- - Reverse push from targets builds fingerprints
12
- - Forward random walks from source intersect with reverse mass
13
- """
14
-
15
- def __init__(self, graph: GraphAccessor, alpha=0.15, rmax=1e-4):
16
- self.G = graph
17
- self.alpha, self.rmax = alpha, rmax
18
-
19
- def _reverse_push(self, targets: Iterable[NodeId]) -> Dict[NodeId, float]:
20
- p_t, r_t = defaultdict(float), defaultdict(float)
21
- q = deque()
22
- tgt = list(targets)
23
- if not tgt:
24
- return p_t
25
- mass = 1.0 / len(tgt)
26
- for t in tgt:
27
- r_t[t] = mass
28
- q.append(t)
29
- while q:
30
- u = q.popleft()
31
- if r_t[u] / max(1, self.G.in_degree(u)) <= self.rmax:
32
- continue
33
- push = (1 - self.alpha) * r_t[u]
34
- p_t[u] += self.alpha * r_t[u]
35
- r_t[u] = 0.0
36
- deg = self.G.in_degree(u)
37
- if deg == 0:
38
- continue
39
- share = push / deg
40
- # BiPPR needs in_neighbors, which needs to be in GraphAccessor and Mock
41
- for v in self.G.in_neighbors(u):
42
- r_t[v] += share
43
- if r_t[v] / max(1, self.G.in_degree(v)) > self.rmax:
44
- q.append(v)
45
- return p_t
46
-
47
- def score(self, source: NodeId, targets: List[NodeId], walks=5000) -> List[Tuple[NodeId, float]]:
48
- fp = self._reverse_push(targets)
49
- hits = defaultdict(int)
50
- for _ in range(walks):
51
- u = source
52
- while True:
53
- hits[u] += 1
54
- if random.random() < self.alpha:
55
- break
56
- nbrs = list(self.G.out_neighbors(u))
57
- if not nbrs:
58
- break
59
- u = random.choice(nbrs)
60
- Z = float(sum(hits.values()) or 1)
61
- return sorted(((t, fp.get(t, 0.0) * hits.get(t, 0) / Z) for t in targets), key=lambda kv: kv[1], reverse=True)
1
+ from __future__ import annotations
2
+ from typing import Dict, Iterable, Tuple, List
3
+ from collections import defaultdict, deque
4
+ import random
5
+
6
+ from ..adapters import GraphAccessor, NodeId
7
+
8
+ class BiPPREngine:
9
+ """
10
+ Bidirectional PPR for source→target scoring:
11
+ - Reverse push from targets builds fingerprints
12
+ - Forward random walks from source intersect with reverse mass
13
+ """
14
+
15
+ def __init__(self, graph: GraphAccessor, alpha=0.15, rmax=1e-4):
16
+ self.G = graph
17
+ self.alpha, self.rmax = alpha, rmax
18
+
19
+ def _reverse_push(self, targets: Iterable[NodeId]) -> Dict[NodeId, float]:
20
+ p_t, r_t = defaultdict(float), defaultdict(float)
21
+ q = deque()
22
+ tgt = list(targets)
23
+ if not tgt:
24
+ return p_t
25
+ mass = 1.0 / len(tgt)
26
+ for t in tgt:
27
+ r_t[t] = mass
28
+ q.append(t)
29
+ while q:
30
+ u = q.popleft()
31
+ if r_t[u] / max(1, self.G.in_degree(u)) <= self.rmax:
32
+ continue
33
+ push = (1 - self.alpha) * r_t[u]
34
+ p_t[u] += self.alpha * r_t[u]
35
+ r_t[u] = 0.0
36
+ deg = self.G.in_degree(u)
37
+ if deg == 0:
38
+ continue
39
+ share = push / deg
40
+ # BiPPR needs in_neighbors, which needs to be in GraphAccessor and Mock
41
+ for v in self.G.in_neighbors(u):
42
+ r_t[v] += share
43
+ if r_t[v] / max(1, self.G.in_degree(v)) > self.rmax:
44
+ q.append(v)
45
+ return p_t
46
+
47
+ def score(self, source: NodeId, targets: List[NodeId], walks=5000) -> List[Tuple[NodeId, float]]:
48
+ fp = self._reverse_push(targets)
49
+ hits = defaultdict(int)
50
+ for _ in range(walks):
51
+ u = source
52
+ while True:
53
+ hits[u] += 1
54
+ if random.random() < self.alpha:
55
+ break
56
+ nbrs = list(self.G.out_neighbors(u))
57
+ if not nbrs:
58
+ break
59
+ u = random.choice(nbrs)
60
+ Z = float(sum(hits.values()) or 1)
61
+ return sorted(((t, fp.get(t, 0.0) * hits.get(t, 0) / Z) for t in targets), key=lambda kv: kv[1], reverse=True)