metaspn-entities 0.1.5__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/PKG-INFO +15 -1
  2. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/README.md +14 -0
  3. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/metaspn_entities/__init__.py +3 -1
  4. metaspn_entities-0.1.6/metaspn_entities/context.py +191 -0
  5. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/metaspn_entities/resolver.py +7 -1
  6. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/metaspn_entities.egg-info/PKG-INFO +15 -1
  7. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/metaspn_entities.egg-info/SOURCES.txt +1 -0
  8. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/pyproject.toml +1 -1
  9. metaspn_entities-0.1.6/tests/test_recommendation_context.py +89 -0
  10. metaspn_entities-0.1.5/metaspn_entities/context.py +0 -68
  11. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/LICENSE +0 -0
  12. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/metaspn_entities/adapter.py +0 -0
  13. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/metaspn_entities/events.py +0 -0
  14. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/metaspn_entities/models.py +0 -0
  15. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/metaspn_entities/normalize.py +0 -0
  16. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/metaspn_entities/sqlite_backend.py +0 -0
  17. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/metaspn_entities.egg-info/dependency_links.txt +0 -0
  18. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/metaspn_entities.egg-info/requires.txt +0 -0
  19. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/metaspn_entities.egg-info/top_level.txt +0 -0
  20. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/setup.cfg +0 -0
  21. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/tests/test_adapter.py +0 -0
  22. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/tests/test_context.py +0 -0
  23. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/tests/test_event_contract.py +0 -0
  24. {metaspn_entities-0.1.5 → metaspn_entities-0.1.6}/tests/test_resolver.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metaspn-entities
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Canonical entity resolution, aliasing, and merges for MetaSPN systems
5
5
  Author: MetaSPN Contributors
6
6
  License-Expression: MIT
@@ -108,3 +108,17 @@ Profiler/router workers can read consolidated context using:
108
108
  - `resolver.confidence_summary(entity_id)`
109
109
 
110
110
  Both APIs resolve canonical redirects first, so merged IDs return coherent context.
111
+
112
+ ## M2 Recommendation Context API
113
+
114
+ Recommendation and drafter workers can consume:
115
+
116
+ - `resolver.recommendation_context(entity_id)`
117
+
118
+ The recommendation context includes:
119
+ - identity confidence
120
+ - activity recency (days)
121
+ - interaction history summary (evidence count + source distribution)
122
+ - preferred channel hint
123
+ - relationship stage hint (`cold` / `warm` / `engaged`)
124
+ - merge-safe continuity fields keyed to canonical entity IDs
@@ -83,3 +83,17 @@ Profiler/router workers can read consolidated context using:
83
83
  - `resolver.confidence_summary(entity_id)`
84
84
 
85
85
  Both APIs resolve canonical redirects first, so merged IDs return coherent context.
86
+
87
+ ## M2 Recommendation Context API
88
+
89
+ Recommendation and drafter workers can consume:
90
+
91
+ - `resolver.recommendation_context(entity_id)`
92
+
93
+ The recommendation context includes:
94
+ - identity confidence
95
+ - activity recency (days)
96
+ - interaction history summary (evidence count + source distribution)
97
+ - preferred channel hint
98
+ - relationship stage hint (`cold` / `warm` / `engaged`)
99
+ - merge-safe continuity fields keyed to canonical entity IDs
@@ -1,5 +1,5 @@
1
1
  from .adapter import SignalResolutionResult, resolve_normalized_social_signal
2
- from .context import EntityContext, build_confidence_summary
2
+ from .context import RecommendationContext, EntityContext, build_confidence_summary, build_recommendation_context
3
3
  from .events import EmittedEvent
4
4
  from .models import EntityResolution
5
5
  from .resolver import EntityResolver
@@ -9,7 +9,9 @@ __all__ = [
9
9
  "resolve_normalized_social_signal",
10
10
  "SignalResolutionResult",
11
11
  "EntityContext",
12
+ "RecommendationContext",
12
13
  "build_confidence_summary",
14
+ "build_recommendation_context",
13
15
  "EntityResolver",
14
16
  "EntityResolution",
15
17
  "EmittedEvent",
@@ -0,0 +1,191 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime, timezone
5
+ from typing import Any, Dict, List
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class EntityContext:
10
+ entity_id: str
11
+ aliases: List[Dict[str, Any]]
12
+ identifiers: List[Dict[str, Any]]
13
+ recent_evidence: List[Dict[str, Any]]
14
+ confidence_summary: Dict[str, Any]
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class RecommendationContext:
19
+ entity_id: str
20
+ identity_confidence: float
21
+ activity_recency_days: float
22
+ interaction_history_summary: Dict[str, Any]
23
+ preferred_channel_hint: str
24
+ relationship_stage_hint: str
25
+ continuity: Dict[str, Any]
26
+
27
+
28
+ def build_confidence_summary(
29
+ aliases: List[Dict[str, Any]],
30
+ identifiers: List[Dict[str, Any]],
31
+ evidence: List[Dict[str, Any]],
32
+ ) -> Dict[str, Any]:
33
+ identifier_confidences = sorted(float(item["confidence"]) for item in identifiers)
34
+ alias_confidences = sorted(float(item["confidence"]) for item in aliases)
35
+ source_set = sorted(
36
+ {
37
+ str(item.get("provenance"))
38
+ for item in evidence
39
+ if item.get("provenance") not in (None, "")
40
+ }
41
+ )
42
+
43
+ identifier_avg = _avg(identifier_confidences)
44
+ alias_avg = _avg(alias_confidences)
45
+ source_diversity = min(1.0, len(source_set) / 3.0)
46
+
47
+ overall = min(1.0, (0.65 * identifier_avg) + (0.25 * alias_avg) + (0.10 * source_diversity))
48
+ by_identifier_type = _rollup_by_identifier_type(identifiers)
49
+
50
+ return {
51
+ "overall_confidence": round(overall, 6),
52
+ "identifier_confidence_avg": round(identifier_avg, 6),
53
+ "alias_confidence_avg": round(alias_avg, 6),
54
+ "unique_source_count": len(source_set),
55
+ "evidence_count": len(evidence),
56
+ "by_identifier_type": by_identifier_type,
57
+ }
58
+
59
+
60
+ def _avg(values: List[float]) -> float:
61
+ if not values:
62
+ return 0.0
63
+ return sum(values) / len(values)
64
+
65
+
66
+ def _rollup_by_identifier_type(identifiers: List[Dict[str, Any]]) -> Dict[str, Dict[str, float]]:
67
+ grouped: Dict[str, List[float]] = {}
68
+ for item in identifiers:
69
+ key = str(item["identifier_type"])
70
+ grouped.setdefault(key, []).append(float(item["confidence"]))
71
+
72
+ rollup: Dict[str, Dict[str, float]] = {}
73
+ for key in sorted(grouped):
74
+ values = sorted(grouped[key])
75
+ rollup[key] = {
76
+ "count": float(len(values)),
77
+ "avg_confidence": round(_avg(values), 6),
78
+ "max_confidence": round(max(values), 6),
79
+ }
80
+ return rollup
81
+
82
+
83
+ def build_recommendation_context(
84
+ entity_id: str,
85
+ aliases: List[Dict[str, Any]],
86
+ identifiers: List[Dict[str, Any]],
87
+ *,
88
+ now: datetime | None = None,
89
+ ) -> RecommendationContext:
90
+ current_now = now or datetime.now(timezone.utc)
91
+ evidence_count = len(identifiers)
92
+ recent_seen = _latest_seen(identifiers)
93
+ activity_recency_days = _recency_days(recent_seen, current_now)
94
+
95
+ summary = build_confidence_summary(aliases, identifiers, identifiers)
96
+ preferred_channel = _preferred_channel_hint(identifiers)
97
+ relationship_stage = _relationship_stage_hint(
98
+ evidence_count=evidence_count,
99
+ recency_days=activity_recency_days,
100
+ confidence=summary["overall_confidence"],
101
+ )
102
+
103
+ provenance_counts: Dict[str, int] = {}
104
+ for item in identifiers:
105
+ provenance = str(item.get("provenance") or "unknown")
106
+ provenance_counts[provenance] = provenance_counts.get(provenance, 0) + 1
107
+
108
+ interaction_history_summary = {
109
+ "evidence_count": evidence_count,
110
+ "distinct_sources": len(provenance_counts),
111
+ "sources": {k: provenance_counts[k] for k in sorted(provenance_counts)},
112
+ }
113
+
114
+ continuity = {
115
+ "canonical_entity_id": entity_id,
116
+ "alias_count": len(aliases),
117
+ "identifier_count": len(identifiers),
118
+ }
119
+
120
+ return RecommendationContext(
121
+ entity_id=entity_id,
122
+ identity_confidence=float(summary["overall_confidence"]),
123
+ activity_recency_days=activity_recency_days,
124
+ interaction_history_summary=interaction_history_summary,
125
+ preferred_channel_hint=preferred_channel,
126
+ relationship_stage_hint=relationship_stage,
127
+ continuity=continuity,
128
+ )
129
+
130
+
131
+ def _latest_seen(identifiers: List[Dict[str, Any]]) -> datetime | None:
132
+ timestamps = [
133
+ _parse_iso(str(item.get("last_seen_at")))
134
+ for item in identifiers
135
+ if item.get("last_seen_at")
136
+ ]
137
+ clean = [ts for ts in timestamps if ts is not None]
138
+ if not clean:
139
+ return None
140
+ return max(clean)
141
+
142
+
143
+ def _parse_iso(raw: str) -> datetime | None:
144
+ text = raw.strip()
145
+ if not text:
146
+ return None
147
+ if text.endswith("Z"):
148
+ text = text[:-1] + "+00:00"
149
+ try:
150
+ dt = datetime.fromisoformat(text)
151
+ except ValueError:
152
+ return None
153
+ if dt.tzinfo is None:
154
+ dt = dt.replace(tzinfo=timezone.utc)
155
+ return dt.astimezone(timezone.utc)
156
+
157
+
158
+ def _recency_days(last_seen: datetime | None, now: datetime) -> float:
159
+ if last_seen is None:
160
+ return float("inf")
161
+ delta = now - last_seen
162
+ seconds = max(0.0, delta.total_seconds())
163
+ return round(seconds / 86400.0, 6)
164
+
165
+
166
+ def _preferred_channel_hint(identifiers: List[Dict[str, Any]]) -> str:
167
+ weights = {
168
+ "email": 5,
169
+ "linkedin_handle": 4,
170
+ "twitter_handle": 3,
171
+ "github_handle": 3,
172
+ "canonical_url": 2,
173
+ "domain": 1,
174
+ "name": 0,
175
+ }
176
+ scores: Dict[str, int] = {}
177
+ for item in identifiers:
178
+ id_type = str(item["identifier_type"])
179
+ score = weights.get(id_type, 1)
180
+ scores[id_type] = scores.get(id_type, 0) + score
181
+ if not scores:
182
+ return "unknown"
183
+ return sorted(scores.items(), key=lambda kv: (-kv[1], kv[0]))[0][0]
184
+
185
+
186
+ def _relationship_stage_hint(*, evidence_count: int, recency_days: float, confidence: float) -> str:
187
+ if evidence_count >= 6 and recency_days <= 30 and confidence >= 0.8:
188
+ return "engaged"
189
+ if evidence_count >= 3 and recency_days <= 90 and confidence >= 0.65:
190
+ return "warm"
191
+ return "cold"
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from typing import Any, Dict, List, Optional
4
4
 
5
- from .context import EntityContext, build_confidence_summary
5
+ from .context import RecommendationContext, EntityContext, build_confidence_summary, build_recommendation_context
6
6
  from .events import EmittedEvent, EventFactory
7
7
  from .models import (
8
8
  DEFAULT_MATCH_CONFIDENCE,
@@ -160,6 +160,12 @@ class EntityResolver:
160
160
  confidence_summary=summary,
161
161
  )
162
162
 
163
+ def recommendation_context(self, entity_id: str) -> RecommendationContext:
164
+ canonical_id = self.store.canonical_entity_id(entity_id)
165
+ aliases = self.store.list_aliases_for_entity(canonical_id)
166
+ identifiers = self.store.list_identifier_records_for_entity(canonical_id)
167
+ return build_recommendation_context(canonical_id, aliases, identifiers)
168
+
163
169
  def export_snapshot(self, output_path: str) -> None:
164
170
  self.store.export_snapshot(output_path)
165
171
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metaspn-entities
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Canonical entity resolution, aliasing, and merges for MetaSPN systems
5
5
  Author: MetaSPN Contributors
6
6
  License-Expression: MIT
@@ -108,3 +108,17 @@ Profiler/router workers can read consolidated context using:
108
108
  - `resolver.confidence_summary(entity_id)`
109
109
 
110
110
  Both APIs resolve canonical redirects first, so merged IDs return coherent context.
111
+
112
+ ## M2 Recommendation Context API
113
+
114
+ Recommendation and drafter workers can consume:
115
+
116
+ - `resolver.recommendation_context(entity_id)`
117
+
118
+ The recommendation context includes:
119
+ - identity confidence
120
+ - activity recency (days)
121
+ - interaction history summary (evidence count + source distribution)
122
+ - preferred channel hint
123
+ - relationship stage hint (`cold` / `warm` / `engaged`)
124
+ - merge-safe continuity fields keyed to canonical entity IDs
@@ -17,4 +17,5 @@ metaspn_entities.egg-info/top_level.txt
17
17
  tests/test_adapter.py
18
18
  tests/test_context.py
19
19
  tests/test_event_contract.py
20
+ tests/test_recommendation_context.py
20
21
  tests/test_resolver.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "metaspn-entities"
7
- version = "0.1.5"
7
+ version = "0.1.6"
8
8
  description = "Canonical entity resolution, aliasing, and merges for MetaSPN systems"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -0,0 +1,89 @@
1
+ import tempfile
2
+ import unittest
3
+ from pathlib import Path
4
+
5
+ from metaspn_entities.adapter import resolve_normalized_social_signal
6
+ from metaspn_entities.resolver import EntityResolver
7
+ from metaspn_entities.sqlite_backend import SQLiteEntityStore
8
+
9
+
10
+ class RecommendationContextTests(unittest.TestCase):
11
+ def setUp(self) -> None:
12
+ self.tempdir = tempfile.TemporaryDirectory()
13
+ self.db_path = str(Path(self.tempdir.name) / "entities.db")
14
+ self.store = SQLiteEntityStore(self.db_path)
15
+ self.resolver = EntityResolver(self.store)
16
+
17
+ def tearDown(self) -> None:
18
+ self.store.close()
19
+ self.tempdir.cleanup()
20
+
21
+ def test_cross_source_consistency(self) -> None:
22
+ signal_a = {
23
+ "source": "social.ingest.twitter",
24
+ "payload": {
25
+ "platform": "twitter",
26
+ "author_handle": "rec_user",
27
+ "profile_url": "https://example.com/p/rec_user",
28
+ },
29
+ }
30
+ signal_b = {
31
+ "source": "social.ingest.linkedin",
32
+ "payload": {
33
+ "platform": "linkedin",
34
+ "handle": "rec-user",
35
+ "profile_url": "http://www.example.com/p/rec_user/",
36
+ },
37
+ }
38
+
39
+ first = resolve_normalized_social_signal(self.resolver, signal_a)
40
+ second = resolve_normalized_social_signal(self.resolver, signal_b)
41
+ self.assertEqual(first.entity_id, second.entity_id)
42
+
43
+ rec = self.resolver.recommendation_context(first.entity_id)
44
+ self.assertEqual(rec.entity_id, first.entity_id)
45
+ self.assertGreaterEqual(rec.identity_confidence, 0.0)
46
+ self.assertIn(rec.relationship_stage_hint, {"cold", "warm", "engaged"})
47
+ self.assertIn("social.ingest.linkedin", rec.interaction_history_summary["sources"])
48
+ self.assertIn("social.ingest.twitter", rec.interaction_history_summary["sources"])
49
+
50
+ def test_merge_safe_continuity(self) -> None:
51
+ a = self.resolver.resolve("twitter_handle", "merge_rec_a")
52
+ b = self.resolver.resolve("twitter_handle", "merge_rec_b")
53
+ self.resolver.add_alias(a.entity_id, "email", "a@rec.dev")
54
+ self.resolver.add_alias(b.entity_id, "domain", "rec.dev")
55
+ self.resolver.merge_entities(a.entity_id, b.entity_id, reason="dedupe")
56
+
57
+ rec_from = self.resolver.recommendation_context(a.entity_id)
58
+ rec_to = self.resolver.recommendation_context(b.entity_id)
59
+
60
+ self.assertEqual(rec_from.entity_id, rec_to.entity_id)
61
+ self.assertEqual(rec_from.continuity["canonical_entity_id"], rec_to.continuity["canonical_entity_id"])
62
+ self.assertGreaterEqual(rec_from.continuity["identifier_count"], 2)
63
+
64
+ def test_rerun_determinism(self) -> None:
65
+ signal = {
66
+ "source": "social.ingest",
67
+ "payload": {
68
+ "platform": "twitter",
69
+ "author_handle": "deterministic_rec",
70
+ "profile_url": "https://example.org/deterministic_rec",
71
+ },
72
+ }
73
+
74
+ first = resolve_normalized_social_signal(self.resolver, signal)
75
+ rec_1 = self.resolver.recommendation_context(first.entity_id)
76
+
77
+ second = resolve_normalized_social_signal(self.resolver, signal)
78
+ rec_2 = self.resolver.recommendation_context(second.entity_id)
79
+
80
+ self.assertEqual(first.entity_id, second.entity_id)
81
+ self.assertEqual(rec_1.entity_id, rec_2.entity_id)
82
+ self.assertEqual(rec_1.preferred_channel_hint, rec_2.preferred_channel_hint)
83
+ self.assertEqual(rec_1.relationship_stage_hint, rec_2.relationship_stage_hint)
84
+ self.assertEqual(rec_1.continuity, rec_2.continuity)
85
+ self.assertEqual(rec_1.interaction_history_summary, rec_2.interaction_history_summary)
86
+
87
+
88
+ if __name__ == "__main__":
89
+ unittest.main()
@@ -1,68 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from dataclasses import dataclass
4
- from typing import Any, Dict, List
5
-
6
-
7
- @dataclass(frozen=True)
8
- class EntityContext:
9
- entity_id: str
10
- aliases: List[Dict[str, Any]]
11
- identifiers: List[Dict[str, Any]]
12
- recent_evidence: List[Dict[str, Any]]
13
- confidence_summary: Dict[str, Any]
14
-
15
-
16
- def build_confidence_summary(
17
- aliases: List[Dict[str, Any]],
18
- identifiers: List[Dict[str, Any]],
19
- evidence: List[Dict[str, Any]],
20
- ) -> Dict[str, Any]:
21
- identifier_confidences = sorted(float(item["confidence"]) for item in identifiers)
22
- alias_confidences = sorted(float(item["confidence"]) for item in aliases)
23
- source_set = sorted(
24
- {
25
- str(item.get("provenance"))
26
- for item in evidence
27
- if item.get("provenance") not in (None, "")
28
- }
29
- )
30
-
31
- identifier_avg = _avg(identifier_confidences)
32
- alias_avg = _avg(alias_confidences)
33
- source_diversity = min(1.0, len(source_set) / 3.0)
34
-
35
- overall = min(1.0, (0.65 * identifier_avg) + (0.25 * alias_avg) + (0.10 * source_diversity))
36
- by_identifier_type = _rollup_by_identifier_type(identifiers)
37
-
38
- return {
39
- "overall_confidence": round(overall, 6),
40
- "identifier_confidence_avg": round(identifier_avg, 6),
41
- "alias_confidence_avg": round(alias_avg, 6),
42
- "unique_source_count": len(source_set),
43
- "evidence_count": len(evidence),
44
- "by_identifier_type": by_identifier_type,
45
- }
46
-
47
-
48
- def _avg(values: List[float]) -> float:
49
- if not values:
50
- return 0.0
51
- return sum(values) / len(values)
52
-
53
-
54
- def _rollup_by_identifier_type(identifiers: List[Dict[str, Any]]) -> Dict[str, Dict[str, float]]:
55
- grouped: Dict[str, List[float]] = {}
56
- for item in identifiers:
57
- key = str(item["identifier_type"])
58
- grouped.setdefault(key, []).append(float(item["confidence"]))
59
-
60
- rollup: Dict[str, Dict[str, float]] = {}
61
- for key in sorted(grouped):
62
- values = sorted(grouped[key])
63
- rollup[key] = {
64
- "count": float(len(values)),
65
- "avg_confidence": round(_avg(values), 6),
66
- "max_confidence": round(max(values), 6),
67
- }
68
- return rollup