metaspn-entities 0.1.5__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/PKG-INFO +28 -1
  2. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/README.md +27 -0
  3. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities/__init__.py +5 -1
  4. metaspn_entities-0.1.7/metaspn_entities/attribution.py +88 -0
  5. metaspn_entities-0.1.7/metaspn_entities/context.py +191 -0
  6. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities/resolver.py +36 -1
  7. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities/sqlite_backend.py +6 -0
  8. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities.egg-info/PKG-INFO +28 -1
  9. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities.egg-info/SOURCES.txt +3 -0
  10. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/pyproject.toml +1 -1
  11. metaspn_entities-0.1.7/tests/test_attribution.py +66 -0
  12. metaspn_entities-0.1.7/tests/test_recommendation_context.py +89 -0
  13. metaspn_entities-0.1.5/metaspn_entities/context.py +0 -68
  14. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/LICENSE +0 -0
  15. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities/adapter.py +0 -0
  16. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities/events.py +0 -0
  17. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities/models.py +0 -0
  18. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities/normalize.py +0 -0
  19. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities.egg-info/dependency_links.txt +0 -0
  20. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities.egg-info/requires.txt +0 -0
  21. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities.egg-info/top_level.txt +0 -0
  22. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/setup.cfg +0 -0
  23. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/tests/test_adapter.py +0 -0
  24. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/tests/test_context.py +0 -0
  25. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/tests/test_event_contract.py +0 -0
  26. {metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/tests/test_resolver.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metaspn-entities
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: Canonical entity resolution, aliasing, and merges for MetaSPN systems
5
5
  Author: MetaSPN Contributors
6
6
  License-Expression: MIT
@@ -108,3 +108,30 @@ Profiler/router workers can read consolidated context using:
108
108
  - `resolver.confidence_summary(entity_id)`
109
109
 
110
110
  Both APIs resolve canonical redirects first, so merged IDs return coherent context.
111
+
112
+ ## M2 Recommendation Context API
113
+
114
+ Recommendation and drafter workers can consume:
115
+
116
+ - `resolver.recommendation_context(entity_id)`
117
+
118
+ The recommendation context includes:
119
+ - identity confidence
120
+ - activity recency (days)
121
+ - interaction history summary (evidence count + source distribution)
122
+ - preferred channel hint
123
+ - relationship stage hint (`cold` / `warm` / `engaged`)
124
+ - merge-safe continuity fields keyed to canonical entity IDs
125
+
126
+ ## M3 Outcome Attribution API
127
+
128
+ Outcome evaluators can map attempt/outcome references back to canonical entity lineage:
129
+
130
+ - `resolver.attribute_outcome(references)`
131
+
132
+ Supported references include `entity_id`, `email`, `canonical_url`, handles, domains, and names.
133
+
134
+ Attribution guarantees:
135
+ - canonical merge redirects are resolved before returning `entity_id`
136
+ - output includes explicit confidence for downstream learning logic
137
+ - deterministic tie-breaks are applied by score, then hit count, then entity ID
@@ -83,3 +83,30 @@ Profiler/router workers can read consolidated context using:
83
83
  - `resolver.confidence_summary(entity_id)`
84
84
 
85
85
  Both APIs resolve canonical redirects first, so merged IDs return coherent context.
86
+
87
+ ## M2 Recommendation Context API
88
+
89
+ Recommendation and drafter workers can consume:
90
+
91
+ - `resolver.recommendation_context(entity_id)`
92
+
93
+ The recommendation context includes:
94
+ - identity confidence
95
+ - activity recency (days)
96
+ - interaction history summary (evidence count + source distribution)
97
+ - preferred channel hint
98
+ - relationship stage hint (`cold` / `warm` / `engaged`)
99
+ - merge-safe continuity fields keyed to canonical entity IDs
100
+
101
+ ## M3 Outcome Attribution API
102
+
103
+ Outcome evaluators can map attempt/outcome references back to canonical entity lineage:
104
+
105
+ - `resolver.attribute_outcome(references)`
106
+
107
+ Supported references include `entity_id`, `email`, `canonical_url`, handles, domains, and names.
108
+
109
+ Attribution guarantees:
110
+ - canonical merge redirects are resolved before returning `entity_id`
111
+ - output includes explicit confidence for downstream learning logic
112
+ - deterministic tie-breaks are applied by score, then hit count, then entity ID
@@ -1,5 +1,6 @@
1
1
  from .adapter import SignalResolutionResult, resolve_normalized_social_signal
2
- from .context import EntityContext, build_confidence_summary
2
+ from .attribution import OutcomeAttribution
3
+ from .context import RecommendationContext, EntityContext, build_confidence_summary, build_recommendation_context
3
4
  from .events import EmittedEvent
4
5
  from .models import EntityResolution
5
6
  from .resolver import EntityResolver
@@ -8,8 +9,11 @@ from .sqlite_backend import SQLiteEntityStore
8
9
  __all__ = [
9
10
  "resolve_normalized_social_signal",
10
11
  "SignalResolutionResult",
12
+ "OutcomeAttribution",
11
13
  "EntityContext",
14
+ "RecommendationContext",
12
15
  "build_confidence_summary",
16
+ "build_recommendation_context",
13
17
  "EntityResolver",
14
18
  "EntityResolution",
15
19
  "EmittedEvent",
@@ -0,0 +1,88 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple
5
+
6
+ from .normalize import normalize_identifier
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class OutcomeAttribution:
11
+ entity_id: Optional[str]
12
+ confidence: float
13
+ matched_references: List[Dict[str, Any]] = field(default_factory=list)
14
+ strategy: str = "confidence-weighted-reference-v1"
15
+
16
+
17
+ def normalize_outcome_references(references: Mapping[str, Any] | Sequence[Mapping[str, Any]]) -> List[Tuple[str, str]]:
18
+ refs: List[Tuple[str, str]] = []
19
+ if isinstance(references, Mapping):
20
+ for raw_type in sorted(references):
21
+ value = references[raw_type]
22
+ if value is None:
23
+ continue
24
+ if isinstance(value, str) and value.strip():
25
+ refs.append((str(raw_type), value.strip()))
26
+ return refs
27
+
28
+ for item in references:
29
+ id_type = str(item.get("identifier_type") or item.get("type") or "").strip()
30
+ value = str(item.get("value") or "").strip()
31
+ if not id_type or not value:
32
+ continue
33
+ refs.append((id_type, value))
34
+ return refs
35
+
36
+
37
+ def rank_entity_candidates(
38
+ references: Iterable[Tuple[str, str]],
39
+ resolve_reference: Any,
40
+ ) -> OutcomeAttribution:
41
+ candidate_scores: Dict[str, float] = {}
42
+ candidate_hits: Dict[str, int] = {}
43
+ matched: List[Dict[str, Any]] = []
44
+ total_refs = 0
45
+
46
+ for identifier_type, value in references:
47
+ total_refs += 1
48
+ match = resolve_reference(identifier_type, value)
49
+ matched.append(
50
+ {
51
+ "identifier_type": identifier_type,
52
+ "value": value,
53
+ "normalized_value": match.get("normalized_value"),
54
+ "matched_entity_id": match.get("entity_id"),
55
+ "reference_confidence": float(match.get("confidence", 0.0)),
56
+ }
57
+ )
58
+ entity_id = match.get("entity_id")
59
+ confidence = float(match.get("confidence", 0.0))
60
+ if entity_id:
61
+ candidate_scores[entity_id] = candidate_scores.get(entity_id, 0.0) + confidence
62
+ candidate_hits[entity_id] = candidate_hits.get(entity_id, 0) + 1
63
+
64
+ if not candidate_scores:
65
+ return OutcomeAttribution(entity_id=None, confidence=0.0, matched_references=matched)
66
+
67
+ ranked = sorted(
68
+ candidate_scores.items(),
69
+ key=lambda kv: (
70
+ -kv[1],
71
+ -candidate_hits.get(kv[0], 0),
72
+ kv[0],
73
+ ),
74
+ )
75
+ best_entity_id, best_score = ranked[0]
76
+ denom = max(1, total_refs)
77
+ normalized_confidence = min(1.0, round(best_score / float(denom), 6))
78
+ return OutcomeAttribution(
79
+ entity_id=best_entity_id,
80
+ confidence=normalized_confidence,
81
+ matched_references=matched,
82
+ )
83
+
84
+
85
+ def normalize_reference(identifier_type: str, value: str) -> Tuple[str, str]:
86
+ if identifier_type == "entity_id":
87
+ return identifier_type, value
88
+ return identifier_type, normalize_identifier(identifier_type, value)
@@ -0,0 +1,191 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime, timezone
5
+ from typing import Any, Dict, List
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class EntityContext:
10
+ entity_id: str
11
+ aliases: List[Dict[str, Any]]
12
+ identifiers: List[Dict[str, Any]]
13
+ recent_evidence: List[Dict[str, Any]]
14
+ confidence_summary: Dict[str, Any]
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class RecommendationContext:
19
+ entity_id: str
20
+ identity_confidence: float
21
+ activity_recency_days: float
22
+ interaction_history_summary: Dict[str, Any]
23
+ preferred_channel_hint: str
24
+ relationship_stage_hint: str
25
+ continuity: Dict[str, Any]
26
+
27
+
28
+ def build_confidence_summary(
29
+ aliases: List[Dict[str, Any]],
30
+ identifiers: List[Dict[str, Any]],
31
+ evidence: List[Dict[str, Any]],
32
+ ) -> Dict[str, Any]:
33
+ identifier_confidences = sorted(float(item["confidence"]) for item in identifiers)
34
+ alias_confidences = sorted(float(item["confidence"]) for item in aliases)
35
+ source_set = sorted(
36
+ {
37
+ str(item.get("provenance"))
38
+ for item in evidence
39
+ if item.get("provenance") not in (None, "")
40
+ }
41
+ )
42
+
43
+ identifier_avg = _avg(identifier_confidences)
44
+ alias_avg = _avg(alias_confidences)
45
+ source_diversity = min(1.0, len(source_set) / 3.0)
46
+
47
+ overall = min(1.0, (0.65 * identifier_avg) + (0.25 * alias_avg) + (0.10 * source_diversity))
48
+ by_identifier_type = _rollup_by_identifier_type(identifiers)
49
+
50
+ return {
51
+ "overall_confidence": round(overall, 6),
52
+ "identifier_confidence_avg": round(identifier_avg, 6),
53
+ "alias_confidence_avg": round(alias_avg, 6),
54
+ "unique_source_count": len(source_set),
55
+ "evidence_count": len(evidence),
56
+ "by_identifier_type": by_identifier_type,
57
+ }
58
+
59
+
60
+ def _avg(values: List[float]) -> float:
61
+ if not values:
62
+ return 0.0
63
+ return sum(values) / len(values)
64
+
65
+
66
+ def _rollup_by_identifier_type(identifiers: List[Dict[str, Any]]) -> Dict[str, Dict[str, float]]:
67
+ grouped: Dict[str, List[float]] = {}
68
+ for item in identifiers:
69
+ key = str(item["identifier_type"])
70
+ grouped.setdefault(key, []).append(float(item["confidence"]))
71
+
72
+ rollup: Dict[str, Dict[str, float]] = {}
73
+ for key in sorted(grouped):
74
+ values = sorted(grouped[key])
75
+ rollup[key] = {
76
+ "count": float(len(values)),
77
+ "avg_confidence": round(_avg(values), 6),
78
+ "max_confidence": round(max(values), 6),
79
+ }
80
+ return rollup
81
+
82
+
83
+ def build_recommendation_context(
84
+ entity_id: str,
85
+ aliases: List[Dict[str, Any]],
86
+ identifiers: List[Dict[str, Any]],
87
+ *,
88
+ now: datetime | None = None,
89
+ ) -> RecommendationContext:
90
+ current_now = now or datetime.now(timezone.utc)
91
+ evidence_count = len(identifiers)
92
+ recent_seen = _latest_seen(identifiers)
93
+ activity_recency_days = _recency_days(recent_seen, current_now)
94
+
95
+ summary = build_confidence_summary(aliases, identifiers, identifiers)
96
+ preferred_channel = _preferred_channel_hint(identifiers)
97
+ relationship_stage = _relationship_stage_hint(
98
+ evidence_count=evidence_count,
99
+ recency_days=activity_recency_days,
100
+ confidence=summary["overall_confidence"],
101
+ )
102
+
103
+ provenance_counts: Dict[str, int] = {}
104
+ for item in identifiers:
105
+ provenance = str(item.get("provenance") or "unknown")
106
+ provenance_counts[provenance] = provenance_counts.get(provenance, 0) + 1
107
+
108
+ interaction_history_summary = {
109
+ "evidence_count": evidence_count,
110
+ "distinct_sources": len(provenance_counts),
111
+ "sources": {k: provenance_counts[k] for k in sorted(provenance_counts)},
112
+ }
113
+
114
+ continuity = {
115
+ "canonical_entity_id": entity_id,
116
+ "alias_count": len(aliases),
117
+ "identifier_count": len(identifiers),
118
+ }
119
+
120
+ return RecommendationContext(
121
+ entity_id=entity_id,
122
+ identity_confidence=float(summary["overall_confidence"]),
123
+ activity_recency_days=activity_recency_days,
124
+ interaction_history_summary=interaction_history_summary,
125
+ preferred_channel_hint=preferred_channel,
126
+ relationship_stage_hint=relationship_stage,
127
+ continuity=continuity,
128
+ )
129
+
130
+
131
+ def _latest_seen(identifiers: List[Dict[str, Any]]) -> datetime | None:
132
+ timestamps = [
133
+ _parse_iso(str(item.get("last_seen_at")))
134
+ for item in identifiers
135
+ if item.get("last_seen_at")
136
+ ]
137
+ clean = [ts for ts in timestamps if ts is not None]
138
+ if not clean:
139
+ return None
140
+ return max(clean)
141
+
142
+
143
+ def _parse_iso(raw: str) -> datetime | None:
144
+ text = raw.strip()
145
+ if not text:
146
+ return None
147
+ if text.endswith("Z"):
148
+ text = text[:-1] + "+00:00"
149
+ try:
150
+ dt = datetime.fromisoformat(text)
151
+ except ValueError:
152
+ return None
153
+ if dt.tzinfo is None:
154
+ dt = dt.replace(tzinfo=timezone.utc)
155
+ return dt.astimezone(timezone.utc)
156
+
157
+
158
+ def _recency_days(last_seen: datetime | None, now: datetime) -> float:
159
+ if last_seen is None:
160
+ return float("inf")
161
+ delta = now - last_seen
162
+ seconds = max(0.0, delta.total_seconds())
163
+ return round(seconds / 86400.0, 6)
164
+
165
+
166
+ def _preferred_channel_hint(identifiers: List[Dict[str, Any]]) -> str:
167
+ weights = {
168
+ "email": 5,
169
+ "linkedin_handle": 4,
170
+ "twitter_handle": 3,
171
+ "github_handle": 3,
172
+ "canonical_url": 2,
173
+ "domain": 1,
174
+ "name": 0,
175
+ }
176
+ scores: Dict[str, int] = {}
177
+ for item in identifiers:
178
+ id_type = str(item["identifier_type"])
179
+ score = weights.get(id_type, 1)
180
+ scores[id_type] = scores.get(id_type, 0) + score
181
+ if not scores:
182
+ return "unknown"
183
+ return sorted(scores.items(), key=lambda kv: (-kv[1], kv[0]))[0][0]
184
+
185
+
186
+ def _relationship_stage_hint(*, evidence_count: int, recency_days: float, confidence: float) -> str:
187
+ if evidence_count >= 6 and recency_days <= 30 and confidence >= 0.8:
188
+ return "engaged"
189
+ if evidence_count >= 3 and recency_days <= 90 and confidence >= 0.65:
190
+ return "warm"
191
+ return "cold"
@@ -2,7 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  from typing import Any, Dict, List, Optional
4
4
 
5
- from .context import EntityContext, build_confidence_summary
5
+ from .attribution import OutcomeAttribution, normalize_outcome_references, normalize_reference, rank_entity_candidates
6
+ from .context import RecommendationContext, EntityContext, build_confidence_summary, build_recommendation_context
6
7
  from .events import EmittedEvent, EventFactory
7
8
  from .models import (
8
9
  DEFAULT_MATCH_CONFIDENCE,
@@ -160,6 +161,40 @@ class EntityResolver:
160
161
  confidence_summary=summary,
161
162
  )
162
163
 
164
+ def recommendation_context(self, entity_id: str) -> RecommendationContext:
165
+ canonical_id = self.store.canonical_entity_id(entity_id)
166
+ aliases = self.store.list_aliases_for_entity(canonical_id)
167
+ identifiers = self.store.list_identifier_records_for_entity(canonical_id)
168
+ return build_recommendation_context(canonical_id, aliases, identifiers)
169
+
170
+ def attribute_outcome(self, references: Any) -> OutcomeAttribution:
171
+ refs = normalize_outcome_references(references)
172
+
173
+ def _resolve_ref(identifier_type: str, value: str) -> Dict[str, Any]:
174
+ raw_type, normalized = normalize_reference(identifier_type, value)
175
+ if raw_type == "entity_id":
176
+ entity = self.store.get_entity(normalized)
177
+ if not entity:
178
+ return {"entity_id": None, "confidence": 0.0, "normalized_value": normalized}
179
+ return {
180
+ "entity_id": self.store.canonical_entity_id(str(entity["entity_id"])),
181
+ "confidence": 0.99,
182
+ "normalized_value": normalized,
183
+ }
184
+
185
+ alias = self.store.find_alias(raw_type, normalized)
186
+ if not alias:
187
+ return {"entity_id": None, "confidence": 0.0, "normalized_value": normalized}
188
+
189
+ canonical = self.store.canonical_entity_id(str(alias["entity_id"]))
190
+ identifier = self.store.get_identifier(raw_type, normalized)
191
+ alias_conf = float(alias["confidence"])
192
+ identifier_conf = float(identifier["confidence"]) if identifier else 0.0
193
+ confidence = round(max(alias_conf, identifier_conf), 6)
194
+ return {"entity_id": canonical, "confidence": confidence, "normalized_value": normalized}
195
+
196
+ return rank_entity_candidates(refs, _resolve_ref)
197
+
163
198
  def export_snapshot(self, output_path: str) -> None:
164
199
  self.store.export_snapshot(output_path)
165
200
 
@@ -103,6 +103,12 @@ class SQLiteEntityStore:
103
103
  (identifier_type, normalized_value),
104
104
  ).fetchone()
105
105
 
106
+ def get_identifier(self, identifier_type: str, normalized_value: str) -> Optional[sqlite3.Row]:
107
+ return self.conn.execute(
108
+ "SELECT * FROM identifiers WHERE identifier_type = ? AND normalized_value = ?",
109
+ (identifier_type, normalized_value),
110
+ ).fetchone()
111
+
106
112
  def upsert_identifier(
107
113
  self,
108
114
  identifier_type: str,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metaspn-entities
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: Canonical entity resolution, aliasing, and merges for MetaSPN systems
5
5
  Author: MetaSPN Contributors
6
6
  License-Expression: MIT
@@ -108,3 +108,30 @@ Profiler/router workers can read consolidated context using:
108
108
  - `resolver.confidence_summary(entity_id)`
109
109
 
110
110
  Both APIs resolve canonical redirects first, so merged IDs return coherent context.
111
+
112
+ ## M2 Recommendation Context API
113
+
114
+ Recommendation and drafter workers can consume:
115
+
116
+ - `resolver.recommendation_context(entity_id)`
117
+
118
+ The recommendation context includes:
119
+ - identity confidence
120
+ - activity recency (days)
121
+ - interaction history summary (evidence count + source distribution)
122
+ - preferred channel hint
123
+ - relationship stage hint (`cold` / `warm` / `engaged`)
124
+ - merge-safe continuity fields keyed to canonical entity IDs
125
+
126
+ ## M3 Outcome Attribution API
127
+
128
+ Outcome evaluators can map attempt/outcome references back to canonical entity lineage:
129
+
130
+ - `resolver.attribute_outcome(references)`
131
+
132
+ Supported references include `entity_id`, `email`, `canonical_url`, handles, domains, and names.
133
+
134
+ Attribution guarantees:
135
+ - canonical merge redirects are resolved before returning `entity_id`
136
+ - output includes explicit confidence for downstream learning logic
137
+ - deterministic tie-breaks are applied by score, then hit count, then entity ID
@@ -3,6 +3,7 @@ README.md
3
3
  pyproject.toml
4
4
  metaspn_entities/__init__.py
5
5
  metaspn_entities/adapter.py
6
+ metaspn_entities/attribution.py
6
7
  metaspn_entities/context.py
7
8
  metaspn_entities/events.py
8
9
  metaspn_entities/models.py
@@ -15,6 +16,8 @@ metaspn_entities.egg-info/dependency_links.txt
15
16
  metaspn_entities.egg-info/requires.txt
16
17
  metaspn_entities.egg-info/top_level.txt
17
18
  tests/test_adapter.py
19
+ tests/test_attribution.py
18
20
  tests/test_context.py
19
21
  tests/test_event_contract.py
22
+ tests/test_recommendation_context.py
20
23
  tests/test_resolver.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "metaspn-entities"
7
- version = "0.1.5"
7
+ version = "0.1.7"
8
8
  description = "Canonical entity resolution, aliasing, and merges for MetaSPN systems"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -0,0 +1,66 @@
1
+ import tempfile
2
+ import unittest
3
+ from pathlib import Path
4
+
5
+ from metaspn_entities.resolver import EntityResolver
6
+ from metaspn_entities.sqlite_backend import SQLiteEntityStore
7
+
8
+
9
+ class AttributionTests(unittest.TestCase):
10
+ def setUp(self) -> None:
11
+ self.tempdir = tempfile.TemporaryDirectory()
12
+ self.db_path = str(Path(self.tempdir.name) / "entities.db")
13
+ self.store = SQLiteEntityStore(self.db_path)
14
+ self.resolver = EntityResolver(self.store)
15
+
16
+ def tearDown(self) -> None:
17
+ self.store.close()
18
+ self.tempdir.cleanup()
19
+
20
+ def test_merge_after_attempt_before_outcome(self) -> None:
21
+ attempt = self.resolver.resolve("twitter_handle", "attempt_user")
22
+ winner = self.resolver.resolve("twitter_handle", "winner_user")
23
+ self.resolver.add_alias(attempt.entity_id, "email", "attempt@example.com", confidence=0.9)
24
+
25
+ # Merge happens between attempt capture and outcome ingestion.
26
+ self.resolver.merge_entities(attempt.entity_id, winner.entity_id, reason="dedupe")
27
+
28
+ result = self.resolver.attribute_outcome(
29
+ {
30
+ "entity_id": attempt.entity_id,
31
+ "email": "attempt@example.com",
32
+ }
33
+ )
34
+ self.assertEqual(result.entity_id, winner.entity_id)
35
+ self.assertGreater(result.confidence, 0.9)
36
+
37
+ def test_undo_merge_edge_case(self) -> None:
38
+ a = self.resolver.resolve("twitter_handle", "undo_attr_a")
39
+ b = self.resolver.resolve("twitter_handle", "undo_attr_b")
40
+ self.resolver.merge_entities(a.entity_id, b.entity_id, reason="dedupe")
41
+ self.resolver.undo_merge(a.entity_id, b.entity_id)
42
+
43
+ # After undo implementation, b redirects to a.
44
+ result = self.resolver.attribute_outcome({"entity_id": b.entity_id})
45
+ self.assertEqual(result.entity_id, a.entity_id)
46
+ self.assertAlmostEqual(result.confidence, 0.99, places=6)
47
+
48
+ def test_conflicting_aliases_tie_break_by_confidence(self) -> None:
49
+ high = self.resolver.resolve("twitter_handle", "high_conf")
50
+ low = self.resolver.resolve("twitter_handle", "low_conf")
51
+
52
+ self.resolver.add_alias(high.entity_id, "email", "high@example.com", confidence=0.95)
53
+ self.resolver.add_alias(low.entity_id, "canonical_url", "https://low.example.com/profile", confidence=0.60)
54
+
55
+ result = self.resolver.attribute_outcome(
56
+ {
57
+ "email": "HIGH@example.com",
58
+ "canonical_url": "https://low.example.com/profile/",
59
+ }
60
+ )
61
+ self.assertEqual(result.entity_id, high.entity_id)
62
+ self.assertAlmostEqual(result.confidence, 0.475, places=6)
63
+
64
+
65
+ if __name__ == "__main__":
66
+ unittest.main()
@@ -0,0 +1,89 @@
1
+ import tempfile
2
+ import unittest
3
+ from pathlib import Path
4
+
5
+ from metaspn_entities.adapter import resolve_normalized_social_signal
6
+ from metaspn_entities.resolver import EntityResolver
7
+ from metaspn_entities.sqlite_backend import SQLiteEntityStore
8
+
9
+
10
+ class RecommendationContextTests(unittest.TestCase):
11
+ def setUp(self) -> None:
12
+ self.tempdir = tempfile.TemporaryDirectory()
13
+ self.db_path = str(Path(self.tempdir.name) / "entities.db")
14
+ self.store = SQLiteEntityStore(self.db_path)
15
+ self.resolver = EntityResolver(self.store)
16
+
17
+ def tearDown(self) -> None:
18
+ self.store.close()
19
+ self.tempdir.cleanup()
20
+
21
+ def test_cross_source_consistency(self) -> None:
22
+ signal_a = {
23
+ "source": "social.ingest.twitter",
24
+ "payload": {
25
+ "platform": "twitter",
26
+ "author_handle": "rec_user",
27
+ "profile_url": "https://example.com/p/rec_user",
28
+ },
29
+ }
30
+ signal_b = {
31
+ "source": "social.ingest.linkedin",
32
+ "payload": {
33
+ "platform": "linkedin",
34
+ "handle": "rec-user",
35
+ "profile_url": "http://www.example.com/p/rec_user/",
36
+ },
37
+ }
38
+
39
+ first = resolve_normalized_social_signal(self.resolver, signal_a)
40
+ second = resolve_normalized_social_signal(self.resolver, signal_b)
41
+ self.assertEqual(first.entity_id, second.entity_id)
42
+
43
+ rec = self.resolver.recommendation_context(first.entity_id)
44
+ self.assertEqual(rec.entity_id, first.entity_id)
45
+ self.assertGreaterEqual(rec.identity_confidence, 0.0)
46
+ self.assertIn(rec.relationship_stage_hint, {"cold", "warm", "engaged"})
47
+ self.assertIn("social.ingest.linkedin", rec.interaction_history_summary["sources"])
48
+ self.assertIn("social.ingest.twitter", rec.interaction_history_summary["sources"])
49
+
50
+ def test_merge_safe_continuity(self) -> None:
51
+ a = self.resolver.resolve("twitter_handle", "merge_rec_a")
52
+ b = self.resolver.resolve("twitter_handle", "merge_rec_b")
53
+ self.resolver.add_alias(a.entity_id, "email", "a@rec.dev")
54
+ self.resolver.add_alias(b.entity_id, "domain", "rec.dev")
55
+ self.resolver.merge_entities(a.entity_id, b.entity_id, reason="dedupe")
56
+
57
+ rec_from = self.resolver.recommendation_context(a.entity_id)
58
+ rec_to = self.resolver.recommendation_context(b.entity_id)
59
+
60
+ self.assertEqual(rec_from.entity_id, rec_to.entity_id)
61
+ self.assertEqual(rec_from.continuity["canonical_entity_id"], rec_to.continuity["canonical_entity_id"])
62
+ self.assertGreaterEqual(rec_from.continuity["identifier_count"], 2)
63
+
64
+ def test_rerun_determinism(self) -> None:
65
+ signal = {
66
+ "source": "social.ingest",
67
+ "payload": {
68
+ "platform": "twitter",
69
+ "author_handle": "deterministic_rec",
70
+ "profile_url": "https://example.org/deterministic_rec",
71
+ },
72
+ }
73
+
74
+ first = resolve_normalized_social_signal(self.resolver, signal)
75
+ rec_1 = self.resolver.recommendation_context(first.entity_id)
76
+
77
+ second = resolve_normalized_social_signal(self.resolver, signal)
78
+ rec_2 = self.resolver.recommendation_context(second.entity_id)
79
+
80
+ self.assertEqual(first.entity_id, second.entity_id)
81
+ self.assertEqual(rec_1.entity_id, rec_2.entity_id)
82
+ self.assertEqual(rec_1.preferred_channel_hint, rec_2.preferred_channel_hint)
83
+ self.assertEqual(rec_1.relationship_stage_hint, rec_2.relationship_stage_hint)
84
+ self.assertEqual(rec_1.continuity, rec_2.continuity)
85
+ self.assertEqual(rec_1.interaction_history_summary, rec_2.interaction_history_summary)
86
+
87
+
88
+ if __name__ == "__main__":
89
+ unittest.main()
@@ -1,68 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from dataclasses import dataclass
4
- from typing import Any, Dict, List
5
-
6
-
7
- @dataclass(frozen=True)
8
- class EntityContext:
9
- entity_id: str
10
- aliases: List[Dict[str, Any]]
11
- identifiers: List[Dict[str, Any]]
12
- recent_evidence: List[Dict[str, Any]]
13
- confidence_summary: Dict[str, Any]
14
-
15
-
16
- def build_confidence_summary(
17
- aliases: List[Dict[str, Any]],
18
- identifiers: List[Dict[str, Any]],
19
- evidence: List[Dict[str, Any]],
20
- ) -> Dict[str, Any]:
21
- identifier_confidences = sorted(float(item["confidence"]) for item in identifiers)
22
- alias_confidences = sorted(float(item["confidence"]) for item in aliases)
23
- source_set = sorted(
24
- {
25
- str(item.get("provenance"))
26
- for item in evidence
27
- if item.get("provenance") not in (None, "")
28
- }
29
- )
30
-
31
- identifier_avg = _avg(identifier_confidences)
32
- alias_avg = _avg(alias_confidences)
33
- source_diversity = min(1.0, len(source_set) / 3.0)
34
-
35
- overall = min(1.0, (0.65 * identifier_avg) + (0.25 * alias_avg) + (0.10 * source_diversity))
36
- by_identifier_type = _rollup_by_identifier_type(identifiers)
37
-
38
- return {
39
- "overall_confidence": round(overall, 6),
40
- "identifier_confidence_avg": round(identifier_avg, 6),
41
- "alias_confidence_avg": round(alias_avg, 6),
42
- "unique_source_count": len(source_set),
43
- "evidence_count": len(evidence),
44
- "by_identifier_type": by_identifier_type,
45
- }
46
-
47
-
48
- def _avg(values: List[float]) -> float:
49
- if not values:
50
- return 0.0
51
- return sum(values) / len(values)
52
-
53
-
54
- def _rollup_by_identifier_type(identifiers: List[Dict[str, Any]]) -> Dict[str, Dict[str, float]]:
55
- grouped: Dict[str, List[float]] = {}
56
- for item in identifiers:
57
- key = str(item["identifier_type"])
58
- grouped.setdefault(key, []).append(float(item["confidence"]))
59
-
60
- rollup: Dict[str, Dict[str, float]] = {}
61
- for key in sorted(grouped):
62
- values = sorted(grouped[key])
63
- rollup[key] = {
64
- "count": float(len(values)),
65
- "avg_confidence": round(_avg(values), 6),
66
- "max_confidence": round(max(values), 6),
67
- }
68
- return rollup