metaspn-entities 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,12 @@
1
+ from .adapter import SignalResolutionResult, resolve_normalized_social_signal
1
2
  from .events import EmittedEvent
2
3
  from .models import EntityResolution
3
4
  from .resolver import EntityResolver
4
5
  from .sqlite_backend import SQLiteEntityStore
5
6
 
6
7
  __all__ = [
8
+ "resolve_normalized_social_signal",
9
+ "SignalResolutionResult",
7
10
  "EntityResolver",
8
11
  "EntityResolution",
9
12
  "EmittedEvent",
@@ -0,0 +1,131 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple
5
+
6
+ from .events import EmittedEvent
7
+ from .models import EntityType
8
+ from .resolver import EntityResolver
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class SignalResolutionResult:
13
+ entity_id: str
14
+ confidence: float
15
+ emitted_events: List[EmittedEvent]
16
+
17
+
18
+ def resolve_normalized_social_signal(
19
+ resolver: EntityResolver,
20
+ signal_envelope: Mapping[str, Any] | Any,
21
+ *,
22
+ default_entity_type: str = EntityType.PERSON,
23
+ caused_by: str = "m0-ingestion",
24
+ ) -> SignalResolutionResult:
25
+ """Resolve a normalized social signal envelope into a canonical entity.
26
+
27
+ The adapter is intentionally deterministic:
28
+ - Identifier extraction order is fixed.
29
+ - Primary resolution always uses the highest-priority available identifier.
30
+ - Remaining identifiers are added as aliases in deterministic order.
31
+ """
32
+
33
+ # Keep adapter call output scoped to actions taken in this invocation only.
34
+ resolver.drain_events()
35
+
36
+ envelope = _coerce_envelope(signal_envelope)
37
+ payload = _coerce_payload(envelope.get("payload"))
38
+ source = str(envelope.get("source") or "unknown-source")
39
+
40
+ identifiers = _extract_identifiers(payload)
41
+ if not identifiers:
42
+ raise ValueError("No resolvable identifiers found in normalized social signal payload")
43
+
44
+ primary_type, primary_value, primary_confidence = identifiers[0]
45
+ resolution = resolver.resolve(
46
+ primary_type,
47
+ primary_value,
48
+ context={
49
+ "confidence": primary_confidence,
50
+ "entity_type": default_entity_type,
51
+ "caused_by": caused_by,
52
+ "provenance": source,
53
+ },
54
+ )
55
+
56
+ for alias_type, alias_value, alias_confidence in identifiers[1:]:
57
+ resolver.add_alias(
58
+ resolution.entity_id,
59
+ alias_type,
60
+ alias_value,
61
+ confidence=alias_confidence,
62
+ caused_by=caused_by,
63
+ provenance=source,
64
+ )
65
+
66
+ emitted = resolver.drain_events()
67
+ return SignalResolutionResult(
68
+ entity_id=resolution.entity_id,
69
+ confidence=resolution.confidence,
70
+ emitted_events=emitted,
71
+ )
72
+
73
+
74
+ def _coerce_envelope(signal_envelope: Mapping[str, Any] | Any) -> Dict[str, Any]:
75
+ if isinstance(signal_envelope, Mapping):
76
+ return dict(signal_envelope)
77
+ if hasattr(signal_envelope, "to_dict") and callable(signal_envelope.to_dict):
78
+ return dict(signal_envelope.to_dict())
79
+ raise TypeError("signal_envelope must be a mapping or provide to_dict()")
80
+
81
+
82
+ def _coerce_payload(payload: Any) -> Dict[str, Any]:
83
+ if payload is None:
84
+ return {}
85
+ if isinstance(payload, Mapping):
86
+ return dict(payload)
87
+ if hasattr(payload, "to_dict") and callable(payload.to_dict):
88
+ return dict(payload.to_dict())
89
+ raise TypeError("signal payload must be a mapping or provide to_dict()")
90
+
91
+
92
+ def _extract_identifiers(payload: Dict[str, Any]) -> List[Tuple[str, str, float]]:
93
+ platform = str(payload.get("platform") or "").strip().lower()
94
+
95
+ candidates: List[Tuple[int, str, str, float]] = []
96
+
97
+ # Highest confidence identifiers first.
98
+ if isinstance(payload.get("email"), str) and payload["email"].strip():
99
+ candidates.append((0, "email", payload["email"].strip(), 0.98))
100
+
101
+ for key in ("profile_url", "author_url", "canonical_url"):
102
+ value = payload.get(key)
103
+ if isinstance(value, str) and value.strip():
104
+ candidates.append((1, "canonical_url", value.strip(), 0.96))
105
+ break
106
+
107
+ handle = payload.get("author_handle") or payload.get("handle")
108
+ if isinstance(handle, str) and handle.strip():
109
+ handle_type = f"{platform}_handle" if platform else "handle"
110
+ candidates.append((2, handle_type, handle.strip(), 0.93))
111
+
112
+ if isinstance(payload.get("domain"), str) and payload["domain"].strip():
113
+ candidates.append((3, "domain", payload["domain"].strip(), 0.9))
114
+
115
+ for key in ("display_name", "name"):
116
+ value = payload.get(key)
117
+ if isinstance(value, str) and value.strip():
118
+ candidates.append((4, "name", value.strip(), 0.7))
119
+ break
120
+
121
+ # Deduplicate by (identifier_type, raw value) while preserving deterministic order.
122
+ seen: set[Tuple[str, str]] = set()
123
+ ordered: List[Tuple[str, str, float]] = []
124
+ for _, id_type, id_value, confidence in sorted(candidates, key=lambda c: (c[0], c[1], c[2])):
125
+ key = (id_type, id_value)
126
+ if key in seen:
127
+ continue
128
+ seen.add(key)
129
+ ordered.append((id_type, id_value, confidence))
130
+
131
+ return ordered
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metaspn-entities
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Canonical entity resolution, aliasing, and merges for MetaSPN systems
5
5
  Author: MetaSPN Contributors
6
6
  License-Expression: MIT
@@ -69,3 +69,33 @@ schema-compatible with `metaspn-schemas` entity events.
69
69
  - `entity_id`, `alias`, `alias_type`, `added_at`, `schema_version`
70
70
 
71
71
  Datetime fields are emitted as UTC ISO-8601 strings for deterministic serialization.
72
+
73
+ ## M0 Ingestion Adapter
74
+
75
+ For worker/runtime integration, use `resolve_normalized_social_signal(...)` with a
76
+ normalized signal envelope.
77
+
78
+ ```python
79
+ from metaspn_entities import EntityResolver, resolve_normalized_social_signal
80
+
81
+ resolver = EntityResolver()
82
+ signal = {
83
+ "source": "social.ingest",
84
+ "payload_type": "SocialPostSeen",
85
+ "payload": {
86
+ "platform": "twitter",
87
+ "author_handle": "@some_handle",
88
+ "profile_url": "https://example.com/profiles/some-handle",
89
+ },
90
+ }
91
+
92
+ result = resolve_normalized_social_signal(resolver, signal)
93
+ print(result.entity_id, result.confidence)
94
+ for event in result.emitted_events:
95
+ print(event.event_type, event.payload)
96
+ ```
97
+
98
+ Adapter behavior:
99
+ - Extracts deterministic identifier candidates from normalized payloads.
100
+ - Resolves a primary identifier, then adds remaining identifiers as aliases.
101
+ - Returns only events produced during the adapter call.
@@ -1,11 +1,12 @@
1
- metaspn_entities/__init__.py,sha256=7hsZse74SfsKugEKs3P6hIyfSDuzDWdeUFQdnev2yBo,259
1
+ metaspn_entities/__init__.py,sha256=BkVyTYAyQBV8h9EO66VAygcIoK4oPbPQe6wW6oMdMTk,407
2
+ metaspn_entities/adapter.py,sha256=eNB5kr1tinav85WPA4YCldRDJBgb6uYe3ZWCRVjdOms,4654
2
3
  metaspn_entities/events.py,sha256=Hkc3gy5_vRTSR0MKUvF24dTqNqOkG423_PTUe7csUfw,2066
3
4
  metaspn_entities/models.py,sha256=b2EFsc1EIT9Ao_bKA2I52-5W_0fTwhsyO6VFRG8gZg8,1377
4
5
  metaspn_entities/normalize.py,sha256=nPAHRfipgS6zHy2x70ZFd5HB1W4FKmeTF8Kd4TYz5tI,1125
5
6
  metaspn_entities/resolver.py,sha256=350XMAng6qJvZqRmmikP7mRZtA22pZ2CwZcvmk-q8tU,6654
6
7
  metaspn_entities/sqlite_backend.py,sha256=0QmHkMd4XZxdSmgvn_s6xsRr5Ocv0b6dERF_QSUuelM,10562
7
- metaspn_entities-0.1.3.dist-info/licenses/LICENSE,sha256=tvVpto97dUnh1-KVYPs1rCr5dzyX8jUyNmT7F7ZPVAM,1077
8
- metaspn_entities-0.1.3.dist-info/METADATA,sha256=NWxTysmpuXB1KY-lQl4QqKOnisF6eqAfBmJWi7PtzWI,2524
9
- metaspn_entities-0.1.3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
10
- metaspn_entities-0.1.3.dist-info/top_level.txt,sha256=YP2V8Z1Statrs3YAI-tGvyC73vLjPHr9Vkal4yqXkhs,17
11
- metaspn_entities-0.1.3.dist-info/RECORD,,
8
+ metaspn_entities-0.1.4.dist-info/licenses/LICENSE,sha256=tvVpto97dUnh1-KVYPs1rCr5dzyX8jUyNmT7F7ZPVAM,1077
9
+ metaspn_entities-0.1.4.dist-info/METADATA,sha256=qg5xn2Rg0Aw4KTakgJTKchlUjodGHutwxYxCqiOYeAY,3436
10
+ metaspn_entities-0.1.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
11
+ metaspn_entities-0.1.4.dist-info/top_level.txt,sha256=YP2V8Z1Statrs3YAI-tGvyC73vLjPHr9Vkal4yqXkhs,17
12
+ metaspn_entities-0.1.4.dist-info/RECORD,,