odin-engine 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmarks/__init__.py +17 -17
- benchmarks/datasets.py +284 -284
- benchmarks/metrics.py +275 -275
- benchmarks/run_ablation.py +279 -279
- benchmarks/run_npll_benchmark.py +270 -270
- npll/__init__.py +10 -10
- npll/bootstrap.py +474 -474
- npll/core/__init__.py +33 -33
- npll/core/knowledge_graph.py +308 -308
- npll/core/logical_rules.py +496 -496
- npll/core/mln.py +474 -474
- npll/inference/__init__.py +40 -40
- npll/inference/e_step.py +419 -419
- npll/inference/elbo.py +434 -434
- npll/inference/m_step.py +576 -576
- npll/npll_model.py +631 -631
- npll/scoring/__init__.py +42 -42
- npll/scoring/embeddings.py +441 -441
- npll/scoring/probability.py +402 -402
- npll/scoring/scoring_module.py +369 -369
- npll/training/__init__.py +24 -24
- npll/training/evaluation.py +496 -496
- npll/training/npll_trainer.py +520 -520
- npll/utils/__init__.py +47 -47
- npll/utils/batch_utils.py +492 -492
- npll/utils/config.py +144 -144
- npll/utils/math_utils.py +338 -338
- odin/__init__.py +21 -20
- odin/engine.py +264 -264
- odin/schema.py +210 -0
- {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/METADATA +503 -456
- odin_engine-0.2.0.dist-info/RECORD +63 -0
- {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/licenses/LICENSE +21 -21
- retrieval/__init__.py +50 -50
- retrieval/adapters.py +140 -140
- retrieval/adapters_arango.py +1418 -1418
- retrieval/aggregators.py +707 -707
- retrieval/beam.py +127 -127
- retrieval/budget.py +60 -60
- retrieval/cache.py +159 -159
- retrieval/confidence.py +88 -88
- retrieval/eval.py +49 -49
- retrieval/linker.py +87 -87
- retrieval/metrics.py +105 -105
- retrieval/metrics_motifs.py +36 -36
- retrieval/orchestrator.py +571 -571
- retrieval/ppr/__init__.py +12 -12
- retrieval/ppr/anchors.py +41 -41
- retrieval/ppr/bippr.py +61 -61
- retrieval/ppr/engines.py +257 -257
- retrieval/ppr/global_pr.py +76 -76
- retrieval/ppr/indexes.py +78 -78
- retrieval/ppr.py +156 -156
- retrieval/ppr_cache.py +25 -25
- retrieval/scoring.py +294 -294
- retrieval/utils/pii_redaction.py +36 -36
- retrieval/writers/__init__.py +9 -9
- retrieval/writers/arango_writer.py +28 -28
- retrieval/writers/base.py +21 -21
- retrieval/writers/janus_writer.py +36 -36
- odin_engine-0.1.0.dist-info/RECORD +0 -62
- {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/WHEEL +0 -0
- {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/top_level.txt +0 -0
retrieval/ppr/__init__.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
from .engines import PushPPREngine, MonteCarloPPREngine, PPRParams
|
|
2
|
-
from .bippr import BiPPREngine
|
|
3
|
-
from .anchors import APPRAnchors, APPRAnchorParams
|
|
4
|
-
from .global_pr import GlobalPR, GlobalPRParams
|
|
5
|
-
from .indexes import RandomWalkIndex, WalkIndexConfig
|
|
6
|
-
|
|
7
|
-
__all__ = [
|
|
8
|
-
'PushPPREngine', 'MonteCarloPPREngine', 'BiPPREngine', 'PPRParams',
|
|
9
|
-
'APPRAnchors', 'APPRAnchorParams',
|
|
10
|
-
'GlobalPR', 'GlobalPRParams',
|
|
11
|
-
'RandomWalkIndex', 'WalkIndexConfig',
|
|
12
|
-
]
|
|
1
|
+
from .engines import PushPPREngine, MonteCarloPPREngine, PPRParams
|
|
2
|
+
from .bippr import BiPPREngine
|
|
3
|
+
from .anchors import APPRAnchors, APPRAnchorParams
|
|
4
|
+
from .global_pr import GlobalPR, GlobalPRParams
|
|
5
|
+
from .indexes import RandomWalkIndex, WalkIndexConfig
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
'PushPPREngine', 'MonteCarloPPREngine', 'BiPPREngine', 'PPRParams',
|
|
9
|
+
'APPRAnchors', 'APPRAnchorParams',
|
|
10
|
+
'GlobalPR', 'GlobalPRParams',
|
|
11
|
+
'RandomWalkIndex', 'WalkIndexConfig',
|
|
12
|
+
]
|
retrieval/ppr/anchors.py
CHANGED
|
@@ -1,41 +1,41 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
from typing import Dict, List, Tuple, Iterable, Optional
|
|
4
|
-
|
|
5
|
-
from ..adapters import GraphAccessor, NodeId
|
|
6
|
-
from .engines import PushPPREngine, PPRParams
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@dataclass
|
|
10
|
-
class APPRAnchorParams:
|
|
11
|
-
alpha: float = 0.15
|
|
12
|
-
eps: float = 1e-4
|
|
13
|
-
topn: int = 200
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class APPRAnchors:
|
|
17
|
-
"""
|
|
18
|
-
Build per-community APPR anchor sets using push-based PPR.
|
|
19
|
-
Intended for offline or periodic refresh to warm personalization priors.
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
def __init__(self, accessor: GraphAccessor):
|
|
23
|
-
self.A = accessor
|
|
24
|
-
self.cache: Dict[Tuple[str, str], List[Tuple[NodeId, float]]] = {}
|
|
25
|
-
|
|
26
|
-
def build_for_community(
|
|
27
|
-
self,
|
|
28
|
-
community_id: str,
|
|
29
|
-
seed_set: List[NodeId],
|
|
30
|
-
params: APPRAnchorParams = APPRAnchorParams(),
|
|
31
|
-
) -> List[Tuple[NodeId, float]]:
|
|
32
|
-
key = (community_id, ",".join(sorted(map(str, seed_set))))
|
|
33
|
-
if key in self.cache:
|
|
34
|
-
return self.cache[key]
|
|
35
|
-
engine = PushPPREngine(self.A, community_id)
|
|
36
|
-
p = PPRParams(alpha=params.alpha, eps=params.eps, topn=params.topn)
|
|
37
|
-
res = engine.run(seeds=seed_set, params=p)
|
|
38
|
-
self.cache[key] = res.scores
|
|
39
|
-
return res.scores
|
|
40
|
-
|
|
41
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Dict, List, Tuple, Iterable, Optional
|
|
4
|
+
|
|
5
|
+
from ..adapters import GraphAccessor, NodeId
|
|
6
|
+
from .engines import PushPPREngine, PPRParams
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class APPRAnchorParams:
|
|
11
|
+
alpha: float = 0.15
|
|
12
|
+
eps: float = 1e-4
|
|
13
|
+
topn: int = 200
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class APPRAnchors:
|
|
17
|
+
"""
|
|
18
|
+
Build per-community APPR anchor sets using push-based PPR.
|
|
19
|
+
Intended for offline or periodic refresh to warm personalization priors.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, accessor: GraphAccessor):
|
|
23
|
+
self.A = accessor
|
|
24
|
+
self.cache: Dict[Tuple[str, str], List[Tuple[NodeId, float]]] = {}
|
|
25
|
+
|
|
26
|
+
def build_for_community(
|
|
27
|
+
self,
|
|
28
|
+
community_id: str,
|
|
29
|
+
seed_set: List[NodeId],
|
|
30
|
+
params: APPRAnchorParams = APPRAnchorParams(),
|
|
31
|
+
) -> List[Tuple[NodeId, float]]:
|
|
32
|
+
key = (community_id, ",".join(sorted(map(str, seed_set))))
|
|
33
|
+
if key in self.cache:
|
|
34
|
+
return self.cache[key]
|
|
35
|
+
engine = PushPPREngine(self.A, community_id)
|
|
36
|
+
p = PPRParams(alpha=params.alpha, eps=params.eps, topn=params.topn)
|
|
37
|
+
res = engine.run(seeds=seed_set, params=p)
|
|
38
|
+
self.cache[key] = res.scores
|
|
39
|
+
return res.scores
|
|
40
|
+
|
|
41
|
+
|
retrieval/ppr/bippr.py
CHANGED
|
@@ -1,61 +1,61 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
from typing import Dict, Iterable, Tuple, List
|
|
3
|
-
from collections import defaultdict, deque
|
|
4
|
-
import random
|
|
5
|
-
|
|
6
|
-
from ..adapters import GraphAccessor, NodeId
|
|
7
|
-
|
|
8
|
-
class BiPPREngine:
|
|
9
|
-
"""
|
|
10
|
-
Bidirectional PPR for source→target scoring:
|
|
11
|
-
- Reverse push from targets builds fingerprints
|
|
12
|
-
- Forward random walks from source intersect with reverse mass
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
def __init__(self, graph: GraphAccessor, alpha=0.15, rmax=1e-4):
|
|
16
|
-
self.G = graph
|
|
17
|
-
self.alpha, self.rmax = alpha, rmax
|
|
18
|
-
|
|
19
|
-
def _reverse_push(self, targets: Iterable[NodeId]) -> Dict[NodeId, float]:
|
|
20
|
-
p_t, r_t = defaultdict(float), defaultdict(float)
|
|
21
|
-
q = deque()
|
|
22
|
-
tgt = list(targets)
|
|
23
|
-
if not tgt:
|
|
24
|
-
return p_t
|
|
25
|
-
mass = 1.0 / len(tgt)
|
|
26
|
-
for t in tgt:
|
|
27
|
-
r_t[t] = mass
|
|
28
|
-
q.append(t)
|
|
29
|
-
while q:
|
|
30
|
-
u = q.popleft()
|
|
31
|
-
if r_t[u] / max(1, self.G.in_degree(u)) <= self.rmax:
|
|
32
|
-
continue
|
|
33
|
-
push = (1 - self.alpha) * r_t[u]
|
|
34
|
-
p_t[u] += self.alpha * r_t[u]
|
|
35
|
-
r_t[u] = 0.0
|
|
36
|
-
deg = self.G.in_degree(u)
|
|
37
|
-
if deg == 0:
|
|
38
|
-
continue
|
|
39
|
-
share = push / deg
|
|
40
|
-
# BiPPR needs in_neighbors, which needs to be in GraphAccessor and Mock
|
|
41
|
-
for v in self.G.in_neighbors(u):
|
|
42
|
-
r_t[v] += share
|
|
43
|
-
if r_t[v] / max(1, self.G.in_degree(v)) > self.rmax:
|
|
44
|
-
q.append(v)
|
|
45
|
-
return p_t
|
|
46
|
-
|
|
47
|
-
def score(self, source: NodeId, targets: List[NodeId], walks=5000) -> List[Tuple[NodeId, float]]:
|
|
48
|
-
fp = self._reverse_push(targets)
|
|
49
|
-
hits = defaultdict(int)
|
|
50
|
-
for _ in range(walks):
|
|
51
|
-
u = source
|
|
52
|
-
while True:
|
|
53
|
-
hits[u] += 1
|
|
54
|
-
if random.random() < self.alpha:
|
|
55
|
-
break
|
|
56
|
-
nbrs = list(self.G.out_neighbors(u))
|
|
57
|
-
if not nbrs:
|
|
58
|
-
break
|
|
59
|
-
u = random.choice(nbrs)
|
|
60
|
-
Z = float(sum(hits.values()) or 1)
|
|
61
|
-
return sorted(((t, fp.get(t, 0.0) * hits.get(t, 0) / Z) for t in targets), key=lambda kv: kv[1], reverse=True)
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Dict, Iterable, Tuple, List
|
|
3
|
+
from collections import defaultdict, deque
|
|
4
|
+
import random
|
|
5
|
+
|
|
6
|
+
from ..adapters import GraphAccessor, NodeId
|
|
7
|
+
|
|
8
|
+
class BiPPREngine:
|
|
9
|
+
"""
|
|
10
|
+
Bidirectional PPR for source→target scoring:
|
|
11
|
+
- Reverse push from targets builds fingerprints
|
|
12
|
+
- Forward random walks from source intersect with reverse mass
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, graph: GraphAccessor, alpha=0.15, rmax=1e-4):
|
|
16
|
+
self.G = graph
|
|
17
|
+
self.alpha, self.rmax = alpha, rmax
|
|
18
|
+
|
|
19
|
+
def _reverse_push(self, targets: Iterable[NodeId]) -> Dict[NodeId, float]:
|
|
20
|
+
p_t, r_t = defaultdict(float), defaultdict(float)
|
|
21
|
+
q = deque()
|
|
22
|
+
tgt = list(targets)
|
|
23
|
+
if not tgt:
|
|
24
|
+
return p_t
|
|
25
|
+
mass = 1.0 / len(tgt)
|
|
26
|
+
for t in tgt:
|
|
27
|
+
r_t[t] = mass
|
|
28
|
+
q.append(t)
|
|
29
|
+
while q:
|
|
30
|
+
u = q.popleft()
|
|
31
|
+
if r_t[u] / max(1, self.G.in_degree(u)) <= self.rmax:
|
|
32
|
+
continue
|
|
33
|
+
push = (1 - self.alpha) * r_t[u]
|
|
34
|
+
p_t[u] += self.alpha * r_t[u]
|
|
35
|
+
r_t[u] = 0.0
|
|
36
|
+
deg = self.G.in_degree(u)
|
|
37
|
+
if deg == 0:
|
|
38
|
+
continue
|
|
39
|
+
share = push / deg
|
|
40
|
+
# BiPPR needs in_neighbors, which needs to be in GraphAccessor and Mock
|
|
41
|
+
for v in self.G.in_neighbors(u):
|
|
42
|
+
r_t[v] += share
|
|
43
|
+
if r_t[v] / max(1, self.G.in_degree(v)) > self.rmax:
|
|
44
|
+
q.append(v)
|
|
45
|
+
return p_t
|
|
46
|
+
|
|
47
|
+
def score(self, source: NodeId, targets: List[NodeId], walks=5000) -> List[Tuple[NodeId, float]]:
|
|
48
|
+
fp = self._reverse_push(targets)
|
|
49
|
+
hits = defaultdict(int)
|
|
50
|
+
for _ in range(walks):
|
|
51
|
+
u = source
|
|
52
|
+
while True:
|
|
53
|
+
hits[u] += 1
|
|
54
|
+
if random.random() < self.alpha:
|
|
55
|
+
break
|
|
56
|
+
nbrs = list(self.G.out_neighbors(u))
|
|
57
|
+
if not nbrs:
|
|
58
|
+
break
|
|
59
|
+
u = random.choice(nbrs)
|
|
60
|
+
Z = float(sum(hits.values()) or 1)
|
|
61
|
+
return sorted(((t, fp.get(t, 0.0) * hits.get(t, 0) / Z) for t in targets), key=lambda kv: kv[1], reverse=True)
|