metaspn-entities 0.1.6__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/PKG-INFO +27 -1
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/README.md +26 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities/__init__.py +4 -0
- metaspn_entities-0.1.8/metaspn_entities/attribution.py +88 -0
- metaspn_entities-0.1.8/metaspn_entities/demo.py +81 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities/normalize.py +11 -1
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities/resolver.py +29 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities/sqlite_backend.py +6 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities.egg-info/PKG-INFO +27 -1
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities.egg-info/SOURCES.txt +4 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/pyproject.toml +1 -1
- metaspn_entities-0.1.8/tests/test_attribution.py +66 -0
- metaspn_entities-0.1.8/tests/test_demo_support.py +84 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/LICENSE +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities/adapter.py +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities/context.py +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities/events.py +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities/models.py +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities.egg-info/dependency_links.txt +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities.egg-info/requires.txt +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities.egg-info/top_level.txt +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/setup.cfg +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/tests/test_adapter.py +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/tests/test_context.py +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/tests/test_event_contract.py +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/tests/test_recommendation_context.py +0 -0
- {metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/tests/test_resolver.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: metaspn-entities
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: Canonical entity resolution, aliasing, and merges for MetaSPN systems
|
|
5
5
|
Author: MetaSPN Contributors
|
|
6
6
|
License-Expression: MIT
|
|
@@ -122,3 +122,29 @@ The recommendation context includes:
|
|
|
122
122
|
- preferred channel hint
|
|
123
123
|
- relationship stage hint (`cold` / `warm` / `engaged`)
|
|
124
124
|
- merge-safe continuity fields keyed to canonical entity IDs
|
|
125
|
+
|
|
126
|
+
## M3 Outcome Attribution API
|
|
127
|
+
|
|
128
|
+
Outcome evaluators can map attempt/outcome references back to canonical entity lineage:
|
|
129
|
+
|
|
130
|
+
- `resolver.attribute_outcome(references)`
|
|
131
|
+
|
|
132
|
+
Supported references include `entity_id`, `email`, `canonical_url`, handles, domains, and names.
|
|
133
|
+
|
|
134
|
+
Attribution guarantees:
|
|
135
|
+
- canonical merge redirects are resolved before returning `entity_id`
|
|
136
|
+
- output includes explicit confidence for downstream learning logic
|
|
137
|
+
- deterministic tie-breaks are applied by score, then hit count, then entity ID
|
|
138
|
+
|
|
139
|
+
## Demo Pipeline Invocation
|
|
140
|
+
|
|
141
|
+
For demo digest identity resolution (without direct DB queries in renderer), use:
|
|
142
|
+
|
|
143
|
+
- `resolve_demo_social_identity(resolver, social_payload)`
|
|
144
|
+
|
|
145
|
+
Returned payload includes:
|
|
146
|
+
- `entity_id`
|
|
147
|
+
- `confidence`
|
|
148
|
+
- `matched_identifiers`
|
|
149
|
+
- `why` metadata (confidence summary, counts, relationship hint)
|
|
150
|
+
- emitted event payloads for auditability
|
|
@@ -97,3 +97,29 @@ The recommendation context includes:
|
|
|
97
97
|
- preferred channel hint
|
|
98
98
|
- relationship stage hint (`cold` / `warm` / `engaged`)
|
|
99
99
|
- merge-safe continuity fields keyed to canonical entity IDs
|
|
100
|
+
|
|
101
|
+
## M3 Outcome Attribution API
|
|
102
|
+
|
|
103
|
+
Outcome evaluators can map attempt/outcome references back to canonical entity lineage:
|
|
104
|
+
|
|
105
|
+
- `resolver.attribute_outcome(references)`
|
|
106
|
+
|
|
107
|
+
Supported references include `entity_id`, `email`, `canonical_url`, handles, domains, and names.
|
|
108
|
+
|
|
109
|
+
Attribution guarantees:
|
|
110
|
+
- canonical merge redirects are resolved before returning `entity_id`
|
|
111
|
+
- output includes explicit confidence for downstream learning logic
|
|
112
|
+
- deterministic tie-breaks are applied by score, then hit count, then entity ID
|
|
113
|
+
|
|
114
|
+
## Demo Pipeline Invocation
|
|
115
|
+
|
|
116
|
+
For demo digest identity resolution (without direct DB queries in renderer), use:
|
|
117
|
+
|
|
118
|
+
- `resolve_demo_social_identity(resolver, social_payload)`
|
|
119
|
+
|
|
120
|
+
Returned payload includes:
|
|
121
|
+
- `entity_id`
|
|
122
|
+
- `confidence`
|
|
123
|
+
- `matched_identifiers`
|
|
124
|
+
- `why` metadata (confidence summary, counts, relationship hint)
|
|
125
|
+
- emitted event payloads for auditability
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from .adapter import SignalResolutionResult, resolve_normalized_social_signal
|
|
2
|
+
from .attribution import OutcomeAttribution
|
|
2
3
|
from .context import RecommendationContext, EntityContext, build_confidence_summary, build_recommendation_context
|
|
4
|
+
from .demo import resolve_demo_social_identity
|
|
3
5
|
from .events import EmittedEvent
|
|
4
6
|
from .models import EntityResolution
|
|
5
7
|
from .resolver import EntityResolver
|
|
@@ -8,6 +10,8 @@ from .sqlite_backend import SQLiteEntityStore
|
|
|
8
10
|
__all__ = [
|
|
9
11
|
"resolve_normalized_social_signal",
|
|
10
12
|
"SignalResolutionResult",
|
|
13
|
+
"OutcomeAttribution",
|
|
14
|
+
"resolve_demo_social_identity",
|
|
11
15
|
"EntityContext",
|
|
12
16
|
"RecommendationContext",
|
|
13
17
|
"build_confidence_summary",
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple
|
|
5
|
+
|
|
6
|
+
from .normalize import normalize_identifier
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class OutcomeAttribution:
|
|
11
|
+
entity_id: Optional[str]
|
|
12
|
+
confidence: float
|
|
13
|
+
matched_references: List[Dict[str, Any]] = field(default_factory=list)
|
|
14
|
+
strategy: str = "confidence-weighted-reference-v1"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def normalize_outcome_references(references: Mapping[str, Any] | Sequence[Mapping[str, Any]]) -> List[Tuple[str, str]]:
|
|
18
|
+
refs: List[Tuple[str, str]] = []
|
|
19
|
+
if isinstance(references, Mapping):
|
|
20
|
+
for raw_type in sorted(references):
|
|
21
|
+
value = references[raw_type]
|
|
22
|
+
if value is None:
|
|
23
|
+
continue
|
|
24
|
+
if isinstance(value, str) and value.strip():
|
|
25
|
+
refs.append((str(raw_type), value.strip()))
|
|
26
|
+
return refs
|
|
27
|
+
|
|
28
|
+
for item in references:
|
|
29
|
+
id_type = str(item.get("identifier_type") or item.get("type") or "").strip()
|
|
30
|
+
value = str(item.get("value") or "").strip()
|
|
31
|
+
if not id_type or not value:
|
|
32
|
+
continue
|
|
33
|
+
refs.append((id_type, value))
|
|
34
|
+
return refs
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def rank_entity_candidates(
|
|
38
|
+
references: Iterable[Tuple[str, str]],
|
|
39
|
+
resolve_reference: Any,
|
|
40
|
+
) -> OutcomeAttribution:
|
|
41
|
+
candidate_scores: Dict[str, float] = {}
|
|
42
|
+
candidate_hits: Dict[str, int] = {}
|
|
43
|
+
matched: List[Dict[str, Any]] = []
|
|
44
|
+
total_refs = 0
|
|
45
|
+
|
|
46
|
+
for identifier_type, value in references:
|
|
47
|
+
total_refs += 1
|
|
48
|
+
match = resolve_reference(identifier_type, value)
|
|
49
|
+
matched.append(
|
|
50
|
+
{
|
|
51
|
+
"identifier_type": identifier_type,
|
|
52
|
+
"value": value,
|
|
53
|
+
"normalized_value": match.get("normalized_value"),
|
|
54
|
+
"matched_entity_id": match.get("entity_id"),
|
|
55
|
+
"reference_confidence": float(match.get("confidence", 0.0)),
|
|
56
|
+
}
|
|
57
|
+
)
|
|
58
|
+
entity_id = match.get("entity_id")
|
|
59
|
+
confidence = float(match.get("confidence", 0.0))
|
|
60
|
+
if entity_id:
|
|
61
|
+
candidate_scores[entity_id] = candidate_scores.get(entity_id, 0.0) + confidence
|
|
62
|
+
candidate_hits[entity_id] = candidate_hits.get(entity_id, 0) + 1
|
|
63
|
+
|
|
64
|
+
if not candidate_scores:
|
|
65
|
+
return OutcomeAttribution(entity_id=None, confidence=0.0, matched_references=matched)
|
|
66
|
+
|
|
67
|
+
ranked = sorted(
|
|
68
|
+
candidate_scores.items(),
|
|
69
|
+
key=lambda kv: (
|
|
70
|
+
-kv[1],
|
|
71
|
+
-candidate_hits.get(kv[0], 0),
|
|
72
|
+
kv[0],
|
|
73
|
+
),
|
|
74
|
+
)
|
|
75
|
+
best_entity_id, best_score = ranked[0]
|
|
76
|
+
denom = max(1, total_refs)
|
|
77
|
+
normalized_confidence = min(1.0, round(best_score / float(denom), 6))
|
|
78
|
+
return OutcomeAttribution(
|
|
79
|
+
entity_id=best_entity_id,
|
|
80
|
+
confidence=normalized_confidence,
|
|
81
|
+
matched_references=matched,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def normalize_reference(identifier_type: str, value: str) -> Tuple[str, str]:
|
|
86
|
+
if identifier_type == "entity_id":
|
|
87
|
+
return identifier_type, value
|
|
88
|
+
return identifier_type, normalize_identifier(identifier_type, value)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Mapping
|
|
4
|
+
|
|
5
|
+
from .models import EntityType
|
|
6
|
+
from .resolver import EntityResolver
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def resolve_demo_social_identity(
|
|
10
|
+
resolver: EntityResolver,
|
|
11
|
+
social_payload: Mapping[str, Any],
|
|
12
|
+
*,
|
|
13
|
+
caused_by: str = "demo-pipeline",
|
|
14
|
+
) -> Dict[str, Any]:
|
|
15
|
+
platform = str(social_payload.get("platform") or "").strip().lower()
|
|
16
|
+
source = str(social_payload.get("source") or social_payload.get("provenance") or "demo")
|
|
17
|
+
|
|
18
|
+
handle = social_payload.get("author_handle") or social_payload.get("handle")
|
|
19
|
+
if not isinstance(handle, str) or not handle.strip():
|
|
20
|
+
raise ValueError("demo payload requires author_handle or handle")
|
|
21
|
+
handle = handle.strip()
|
|
22
|
+
|
|
23
|
+
handle_type = f"{platform}_handle" if platform else "handle"
|
|
24
|
+
resolution = resolver.resolve(
|
|
25
|
+
handle_type,
|
|
26
|
+
handle,
|
|
27
|
+
context={
|
|
28
|
+
"entity_type": EntityType.PERSON,
|
|
29
|
+
"caused_by": caused_by,
|
|
30
|
+
"provenance": source,
|
|
31
|
+
"confidence": 0.93,
|
|
32
|
+
},
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
for key in ("profile_url", "author_url", "canonical_url"):
|
|
36
|
+
url = social_payload.get(key)
|
|
37
|
+
if isinstance(url, str) and url.strip():
|
|
38
|
+
resolver.add_alias(
|
|
39
|
+
resolution.entity_id,
|
|
40
|
+
"canonical_url",
|
|
41
|
+
url.strip(),
|
|
42
|
+
confidence=0.96,
|
|
43
|
+
caused_by=caused_by,
|
|
44
|
+
provenance=source,
|
|
45
|
+
)
|
|
46
|
+
break
|
|
47
|
+
|
|
48
|
+
email = social_payload.get("email")
|
|
49
|
+
if isinstance(email, str) and email.strip():
|
|
50
|
+
resolver.add_alias(
|
|
51
|
+
resolution.entity_id,
|
|
52
|
+
"email",
|
|
53
|
+
email.strip(),
|
|
54
|
+
confidence=0.98,
|
|
55
|
+
caused_by=caused_by,
|
|
56
|
+
provenance=source,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
canonical_id = resolver.store.canonical_entity_id(resolution.entity_id)
|
|
60
|
+
context = resolver.entity_context(canonical_id)
|
|
61
|
+
digest_payload = {
|
|
62
|
+
"entity_id": canonical_id,
|
|
63
|
+
"confidence": context.confidence_summary["overall_confidence"],
|
|
64
|
+
"matched_identifiers": [
|
|
65
|
+
{
|
|
66
|
+
"identifier_type": item["identifier_type"],
|
|
67
|
+
"value": item["value"],
|
|
68
|
+
"confidence": item["confidence"],
|
|
69
|
+
"last_seen_at": item["last_seen_at"],
|
|
70
|
+
}
|
|
71
|
+
for item in context.identifiers
|
|
72
|
+
],
|
|
73
|
+
"why": {
|
|
74
|
+
"matched_identifier_count": len(context.identifiers),
|
|
75
|
+
"alias_count": len(context.aliases),
|
|
76
|
+
"confidence_summary": context.confidence_summary,
|
|
77
|
+
"relationship_stage_hint": resolver.recommendation_context(canonical_id).relationship_stage_hint,
|
|
78
|
+
},
|
|
79
|
+
"events": [event.payload for event in resolver.drain_events()],
|
|
80
|
+
}
|
|
81
|
+
return digest_payload
|
|
@@ -7,7 +7,17 @@ def normalize_identifier(identifier_type: str, value: str) -> str:
|
|
|
7
7
|
identifier_type = identifier_type.strip().lower()
|
|
8
8
|
value = value.strip()
|
|
9
9
|
|
|
10
|
-
if identifier_type in {
|
|
10
|
+
if identifier_type in {
|
|
11
|
+
"twitter_handle",
|
|
12
|
+
"x_handle",
|
|
13
|
+
"linkedin_handle",
|
|
14
|
+
"github_handle",
|
|
15
|
+
"instagram_handle",
|
|
16
|
+
"tiktok_handle",
|
|
17
|
+
"bluesky_handle",
|
|
18
|
+
"youtube_handle",
|
|
19
|
+
"handle",
|
|
20
|
+
}:
|
|
11
21
|
return value.lstrip("@").lower()
|
|
12
22
|
|
|
13
23
|
if identifier_type == "email":
|
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import Any, Dict, List, Optional
|
|
4
4
|
|
|
5
|
+
from .attribution import OutcomeAttribution, normalize_outcome_references, normalize_reference, rank_entity_candidates
|
|
5
6
|
from .context import RecommendationContext, EntityContext, build_confidence_summary, build_recommendation_context
|
|
6
7
|
from .events import EmittedEvent, EventFactory
|
|
7
8
|
from .models import (
|
|
@@ -166,6 +167,34 @@ class EntityResolver:
|
|
|
166
167
|
identifiers = self.store.list_identifier_records_for_entity(canonical_id)
|
|
167
168
|
return build_recommendation_context(canonical_id, aliases, identifiers)
|
|
168
169
|
|
|
170
|
+
def attribute_outcome(self, references: Any) -> OutcomeAttribution:
|
|
171
|
+
refs = normalize_outcome_references(references)
|
|
172
|
+
|
|
173
|
+
def _resolve_ref(identifier_type: str, value: str) -> Dict[str, Any]:
|
|
174
|
+
raw_type, normalized = normalize_reference(identifier_type, value)
|
|
175
|
+
if raw_type == "entity_id":
|
|
176
|
+
entity = self.store.get_entity(normalized)
|
|
177
|
+
if not entity:
|
|
178
|
+
return {"entity_id": None, "confidence": 0.0, "normalized_value": normalized}
|
|
179
|
+
return {
|
|
180
|
+
"entity_id": self.store.canonical_entity_id(str(entity["entity_id"])),
|
|
181
|
+
"confidence": 0.99,
|
|
182
|
+
"normalized_value": normalized,
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
alias = self.store.find_alias(raw_type, normalized)
|
|
186
|
+
if not alias:
|
|
187
|
+
return {"entity_id": None, "confidence": 0.0, "normalized_value": normalized}
|
|
188
|
+
|
|
189
|
+
canonical = self.store.canonical_entity_id(str(alias["entity_id"]))
|
|
190
|
+
identifier = self.store.get_identifier(raw_type, normalized)
|
|
191
|
+
alias_conf = float(alias["confidence"])
|
|
192
|
+
identifier_conf = float(identifier["confidence"]) if identifier else 0.0
|
|
193
|
+
confidence = round(max(alias_conf, identifier_conf), 6)
|
|
194
|
+
return {"entity_id": canonical, "confidence": confidence, "normalized_value": normalized}
|
|
195
|
+
|
|
196
|
+
return rank_entity_candidates(refs, _resolve_ref)
|
|
197
|
+
|
|
169
198
|
def export_snapshot(self, output_path: str) -> None:
|
|
170
199
|
self.store.export_snapshot(output_path)
|
|
171
200
|
|
|
@@ -103,6 +103,12 @@ class SQLiteEntityStore:
|
|
|
103
103
|
(identifier_type, normalized_value),
|
|
104
104
|
).fetchone()
|
|
105
105
|
|
|
106
|
+
def get_identifier(self, identifier_type: str, normalized_value: str) -> Optional[sqlite3.Row]:
|
|
107
|
+
return self.conn.execute(
|
|
108
|
+
"SELECT * FROM identifiers WHERE identifier_type = ? AND normalized_value = ?",
|
|
109
|
+
(identifier_type, normalized_value),
|
|
110
|
+
).fetchone()
|
|
111
|
+
|
|
106
112
|
def upsert_identifier(
|
|
107
113
|
self,
|
|
108
114
|
identifier_type: str,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: metaspn-entities
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: Canonical entity resolution, aliasing, and merges for MetaSPN systems
|
|
5
5
|
Author: MetaSPN Contributors
|
|
6
6
|
License-Expression: MIT
|
|
@@ -122,3 +122,29 @@ The recommendation context includes:
|
|
|
122
122
|
- preferred channel hint
|
|
123
123
|
- relationship stage hint (`cold` / `warm` / `engaged`)
|
|
124
124
|
- merge-safe continuity fields keyed to canonical entity IDs
|
|
125
|
+
|
|
126
|
+
## M3 Outcome Attribution API
|
|
127
|
+
|
|
128
|
+
Outcome evaluators can map attempt/outcome references back to canonical entity lineage:
|
|
129
|
+
|
|
130
|
+
- `resolver.attribute_outcome(references)`
|
|
131
|
+
|
|
132
|
+
Supported references include `entity_id`, `email`, `canonical_url`, handles, domains, and names.
|
|
133
|
+
|
|
134
|
+
Attribution guarantees:
|
|
135
|
+
- canonical merge redirects are resolved before returning `entity_id`
|
|
136
|
+
- output includes explicit confidence for downstream learning logic
|
|
137
|
+
- deterministic tie-breaks are applied by score, then hit count, then entity ID
|
|
138
|
+
|
|
139
|
+
## Demo Pipeline Invocation
|
|
140
|
+
|
|
141
|
+
For demo digest identity resolution (without direct DB queries in renderer), use:
|
|
142
|
+
|
|
143
|
+
- `resolve_demo_social_identity(resolver, social_payload)`
|
|
144
|
+
|
|
145
|
+
Returned payload includes:
|
|
146
|
+
- `entity_id`
|
|
147
|
+
- `confidence`
|
|
148
|
+
- `matched_identifiers`
|
|
149
|
+
- `why` metadata (confidence summary, counts, relationship hint)
|
|
150
|
+
- emitted event payloads for auditability
|
|
@@ -3,7 +3,9 @@ README.md
|
|
|
3
3
|
pyproject.toml
|
|
4
4
|
metaspn_entities/__init__.py
|
|
5
5
|
metaspn_entities/adapter.py
|
|
6
|
+
metaspn_entities/attribution.py
|
|
6
7
|
metaspn_entities/context.py
|
|
8
|
+
metaspn_entities/demo.py
|
|
7
9
|
metaspn_entities/events.py
|
|
8
10
|
metaspn_entities/models.py
|
|
9
11
|
metaspn_entities/normalize.py
|
|
@@ -15,7 +17,9 @@ metaspn_entities.egg-info/dependency_links.txt
|
|
|
15
17
|
metaspn_entities.egg-info/requires.txt
|
|
16
18
|
metaspn_entities.egg-info/top_level.txt
|
|
17
19
|
tests/test_adapter.py
|
|
20
|
+
tests/test_attribution.py
|
|
18
21
|
tests/test_context.py
|
|
22
|
+
tests/test_demo_support.py
|
|
19
23
|
tests/test_event_contract.py
|
|
20
24
|
tests/test_recommendation_context.py
|
|
21
25
|
tests/test_resolver.py
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import tempfile
|
|
2
|
+
import unittest
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from metaspn_entities.resolver import EntityResolver
|
|
6
|
+
from metaspn_entities.sqlite_backend import SQLiteEntityStore
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AttributionTests(unittest.TestCase):
|
|
10
|
+
def setUp(self) -> None:
|
|
11
|
+
self.tempdir = tempfile.TemporaryDirectory()
|
|
12
|
+
self.db_path = str(Path(self.tempdir.name) / "entities.db")
|
|
13
|
+
self.store = SQLiteEntityStore(self.db_path)
|
|
14
|
+
self.resolver = EntityResolver(self.store)
|
|
15
|
+
|
|
16
|
+
def tearDown(self) -> None:
|
|
17
|
+
self.store.close()
|
|
18
|
+
self.tempdir.cleanup()
|
|
19
|
+
|
|
20
|
+
def test_merge_after_attempt_before_outcome(self) -> None:
|
|
21
|
+
attempt = self.resolver.resolve("twitter_handle", "attempt_user")
|
|
22
|
+
winner = self.resolver.resolve("twitter_handle", "winner_user")
|
|
23
|
+
self.resolver.add_alias(attempt.entity_id, "email", "attempt@example.com", confidence=0.9)
|
|
24
|
+
|
|
25
|
+
# Merge happens between attempt capture and outcome ingestion.
|
|
26
|
+
self.resolver.merge_entities(attempt.entity_id, winner.entity_id, reason="dedupe")
|
|
27
|
+
|
|
28
|
+
result = self.resolver.attribute_outcome(
|
|
29
|
+
{
|
|
30
|
+
"entity_id": attempt.entity_id,
|
|
31
|
+
"email": "attempt@example.com",
|
|
32
|
+
}
|
|
33
|
+
)
|
|
34
|
+
self.assertEqual(result.entity_id, winner.entity_id)
|
|
35
|
+
self.assertGreater(result.confidence, 0.9)
|
|
36
|
+
|
|
37
|
+
def test_undo_merge_edge_case(self) -> None:
|
|
38
|
+
a = self.resolver.resolve("twitter_handle", "undo_attr_a")
|
|
39
|
+
b = self.resolver.resolve("twitter_handle", "undo_attr_b")
|
|
40
|
+
self.resolver.merge_entities(a.entity_id, b.entity_id, reason="dedupe")
|
|
41
|
+
self.resolver.undo_merge(a.entity_id, b.entity_id)
|
|
42
|
+
|
|
43
|
+
# After undo implementation, b redirects to a.
|
|
44
|
+
result = self.resolver.attribute_outcome({"entity_id": b.entity_id})
|
|
45
|
+
self.assertEqual(result.entity_id, a.entity_id)
|
|
46
|
+
self.assertAlmostEqual(result.confidence, 0.99, places=6)
|
|
47
|
+
|
|
48
|
+
def test_conflicting_aliases_tie_break_by_confidence(self) -> None:
|
|
49
|
+
high = self.resolver.resolve("twitter_handle", "high_conf")
|
|
50
|
+
low = self.resolver.resolve("twitter_handle", "low_conf")
|
|
51
|
+
|
|
52
|
+
self.resolver.add_alias(high.entity_id, "email", "high@example.com", confidence=0.95)
|
|
53
|
+
self.resolver.add_alias(low.entity_id, "canonical_url", "https://low.example.com/profile", confidence=0.60)
|
|
54
|
+
|
|
55
|
+
result = self.resolver.attribute_outcome(
|
|
56
|
+
{
|
|
57
|
+
"email": "HIGH@example.com",
|
|
58
|
+
"canonical_url": "https://low.example.com/profile/",
|
|
59
|
+
}
|
|
60
|
+
)
|
|
61
|
+
self.assertEqual(result.entity_id, high.entity_id)
|
|
62
|
+
self.assertAlmostEqual(result.confidence, 0.475, places=6)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__":
|
|
66
|
+
unittest.main()
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import tempfile
|
|
2
|
+
import unittest
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from metaspn_entities.demo import resolve_demo_social_identity
|
|
6
|
+
from metaspn_entities.resolver import EntityResolver
|
|
7
|
+
from metaspn_entities.sqlite_backend import SQLiteEntityStore
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DemoSupportTests(unittest.TestCase):
|
|
11
|
+
def setUp(self) -> None:
|
|
12
|
+
self.tempdir = tempfile.TemporaryDirectory()
|
|
13
|
+
self.db_path = str(Path(self.tempdir.name) / "entities.db")
|
|
14
|
+
self.store = SQLiteEntityStore(self.db_path)
|
|
15
|
+
self.resolver = EntityResolver(self.store)
|
|
16
|
+
|
|
17
|
+
def tearDown(self) -> None:
|
|
18
|
+
self.store.close()
|
|
19
|
+
self.tempdir.cleanup()
|
|
20
|
+
|
|
21
|
+
def test_same_author_repeated_days_stable_entity(self) -> None:
|
|
22
|
+
day_1 = {
|
|
23
|
+
"platform": "x",
|
|
24
|
+
"author_handle": "@DemoAuthor",
|
|
25
|
+
"profile_url": "https://x.com/demoauthor",
|
|
26
|
+
"source": "demo.day1",
|
|
27
|
+
}
|
|
28
|
+
day_2 = {
|
|
29
|
+
"platform": "x",
|
|
30
|
+
"author_handle": "demoauthor",
|
|
31
|
+
"profile_url": "https://x.com/demoauthor/",
|
|
32
|
+
"source": "demo.day2",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
first = resolve_demo_social_identity(self.resolver, day_1)
|
|
36
|
+
second = resolve_demo_social_identity(self.resolver, day_2)
|
|
37
|
+
|
|
38
|
+
self.assertEqual(first["entity_id"], second["entity_id"])
|
|
39
|
+
|
|
40
|
+
def test_alias_collision_merge_safe_continuity(self) -> None:
|
|
41
|
+
one = resolve_demo_social_identity(
|
|
42
|
+
self.resolver,
|
|
43
|
+
{
|
|
44
|
+
"platform": "twitter",
|
|
45
|
+
"author_handle": "alpha_demo",
|
|
46
|
+
"profile_url": "https://example.com/u/shared",
|
|
47
|
+
"source": "demo.a",
|
|
48
|
+
},
|
|
49
|
+
)
|
|
50
|
+
two = resolve_demo_social_identity(
|
|
51
|
+
self.resolver,
|
|
52
|
+
{
|
|
53
|
+
"platform": "linkedin",
|
|
54
|
+
"author_handle": "beta_demo",
|
|
55
|
+
"profile_url": "http://www.example.com/u/shared/",
|
|
56
|
+
"source": "demo.b",
|
|
57
|
+
},
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# canonical_url collision should keep one canonical identity for both reruns.
|
|
61
|
+
self.assertEqual(one["entity_id"], two["entity_id"])
|
|
62
|
+
|
|
63
|
+
def test_digest_payload_contains_explainability_context(self) -> None:
|
|
64
|
+
payload = resolve_demo_social_identity(
|
|
65
|
+
self.resolver,
|
|
66
|
+
{
|
|
67
|
+
"platform": "bluesky",
|
|
68
|
+
"author_handle": "DigestUser",
|
|
69
|
+
"profile_url": "https://bsky.app/profile/digestuser",
|
|
70
|
+
"source": "demo.digest",
|
|
71
|
+
},
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
self.assertIn("entity_id", payload)
|
|
75
|
+
self.assertIn("confidence", payload)
|
|
76
|
+
self.assertIn("matched_identifiers", payload)
|
|
77
|
+
self.assertIn("why", payload)
|
|
78
|
+
self.assertIn("confidence_summary", payload["why"])
|
|
79
|
+
self.assertGreaterEqual(payload["why"]["matched_identifier_count"], 1)
|
|
80
|
+
self.assertIsInstance(payload["events"], list)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
if __name__ == "__main__":
|
|
84
|
+
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{metaspn_entities-0.1.6 → metaspn_entities-0.1.8}/metaspn_entities.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|