metaspn-entities 0.1.0__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaspn_entities/events.py +28 -12
- metaspn_entities/resolver.py +9 -14
- {metaspn_entities-0.1.0.dist-info → metaspn_entities-0.1.3.dist-info}/METADATA +15 -1
- metaspn_entities-0.1.3.dist-info/RECORD +11 -0
- metaspn_entities-0.1.0.dist-info/RECORD +0 -11
- {metaspn_entities-0.1.0.dist-info → metaspn_entities-0.1.3.dist-info}/WHEEL +0 -0
- {metaspn_entities-0.1.0.dist-info → metaspn_entities-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {metaspn_entities-0.1.0.dist-info → metaspn_entities-0.1.3.dist-info}/top_level.txt +0 -0
metaspn_entities/events.py
CHANGED
|
@@ -1,8 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime, timezone
|
|
4
5
|
from typing import Any, Dict
|
|
5
6
|
|
|
7
|
+
DEFAULT_SCHEMA_VERSION = "0.1"
|
|
8
|
+
try:
|
|
9
|
+
from metaspn_schemas.core import DEFAULT_SCHEMA_VERSION as _SCHEMA_VERSION
|
|
10
|
+
|
|
11
|
+
DEFAULT_SCHEMA_VERSION = _SCHEMA_VERSION
|
|
12
|
+
except Exception:
|
|
13
|
+
# Keep local behavior deterministic when dependency is not importable in dev sandboxes.
|
|
14
|
+
pass
|
|
15
|
+
|
|
6
16
|
|
|
7
17
|
@dataclass(frozen=True)
|
|
8
18
|
class EmittedEvent:
|
|
@@ -12,38 +22,44 @@ class EmittedEvent:
|
|
|
12
22
|
|
|
13
23
|
class EventFactory:
|
|
14
24
|
@staticmethod
|
|
15
|
-
def
|
|
25
|
+
def _now() -> datetime:
|
|
26
|
+
return datetime.now(timezone.utc).replace(microsecond=0)
|
|
27
|
+
|
|
28
|
+
@staticmethod
|
|
29
|
+
def entity_resolved(entity_id: str, resolver: str, confidence: float) -> EmittedEvent:
|
|
16
30
|
return EmittedEvent(
|
|
17
31
|
event_type="EntityResolved",
|
|
18
32
|
payload={
|
|
19
33
|
"entity_id": entity_id,
|
|
20
|
-
"
|
|
21
|
-
"
|
|
34
|
+
"resolver": resolver,
|
|
35
|
+
"resolved_at": EventFactory._now().isoformat(),
|
|
22
36
|
"confidence": confidence,
|
|
23
|
-
"
|
|
37
|
+
"schema_version": DEFAULT_SCHEMA_VERSION,
|
|
24
38
|
},
|
|
25
39
|
)
|
|
26
40
|
|
|
27
41
|
@staticmethod
|
|
28
|
-
def entity_merged(
|
|
42
|
+
def entity_merged(entity_id: str, merged_from: tuple[str, ...], reason: str | None = None) -> EmittedEvent:
|
|
29
43
|
return EmittedEvent(
|
|
30
44
|
event_type="EntityMerged",
|
|
31
45
|
payload={
|
|
32
|
-
"
|
|
33
|
-
"
|
|
46
|
+
"entity_id": entity_id,
|
|
47
|
+
"merged_from": list(merged_from),
|
|
48
|
+
"merged_at": EventFactory._now().isoformat(),
|
|
34
49
|
"reason": reason,
|
|
35
|
-
"
|
|
50
|
+
"schema_version": DEFAULT_SCHEMA_VERSION,
|
|
36
51
|
},
|
|
37
52
|
)
|
|
38
53
|
|
|
39
54
|
@staticmethod
|
|
40
|
-
def entity_alias_added(entity_id: str,
|
|
55
|
+
def entity_alias_added(entity_id: str, alias: str, alias_type: str) -> EmittedEvent:
|
|
41
56
|
return EmittedEvent(
|
|
42
57
|
event_type="EntityAliasAdded",
|
|
43
58
|
payload={
|
|
44
59
|
"entity_id": entity_id,
|
|
45
|
-
"
|
|
46
|
-
"
|
|
47
|
-
"
|
|
60
|
+
"alias": alias,
|
|
61
|
+
"alias_type": alias_type,
|
|
62
|
+
"added_at": EventFactory._now().isoformat(),
|
|
63
|
+
"schema_version": DEFAULT_SCHEMA_VERSION,
|
|
48
64
|
},
|
|
49
65
|
)
|
metaspn_entities/resolver.py
CHANGED
|
@@ -39,12 +39,11 @@ class EntityResolver:
|
|
|
39
39
|
created_new_entity=False,
|
|
40
40
|
matched_identifiers=matched_identifiers,
|
|
41
41
|
)
|
|
42
|
-
self._event_buffer.append(
|
|
43
|
-
EventFactory.entity_resolved(entity_id, identifier_type, value, resolution.confidence, False)
|
|
44
|
-
)
|
|
42
|
+
self._event_buffer.append(EventFactory.entity_resolved(entity_id, caused_by, resolution.confidence))
|
|
45
43
|
return resolution
|
|
46
44
|
|
|
47
45
|
entity_id = self.store.create_entity(entity_type)
|
|
46
|
+
created_entity_id = entity_id
|
|
48
47
|
added, conflicting_entity_id = self.store.add_alias(
|
|
49
48
|
identifier_type=identifier_type,
|
|
50
49
|
normalized_value=normalized,
|
|
@@ -58,9 +57,7 @@ class EntityResolver:
|
|
|
58
57
|
merge_reason = f"auto-merge on {identifier_type}:{normalized}"
|
|
59
58
|
self.store.merge_entities(entity_id, conflicting_entity_id, merge_reason, "auto-merge")
|
|
60
59
|
entity_id = self.store.canonical_entity_id(conflicting_entity_id)
|
|
61
|
-
self._event_buffer.append(
|
|
62
|
-
EventFactory.entity_merged(entity_id, conflicting_entity_id, merge_reason, "auto-merge")
|
|
63
|
-
)
|
|
60
|
+
self._event_buffer.append(EventFactory.entity_merged(entity_id, (created_entity_id,), merge_reason))
|
|
64
61
|
|
|
65
62
|
matched_identifiers = list(self.store.iter_identifiers_for_entity(entity_id))
|
|
66
63
|
resolution = EntityResolution(
|
|
@@ -70,10 +67,8 @@ class EntityResolver:
|
|
|
70
67
|
matched_identifiers=matched_identifiers,
|
|
71
68
|
)
|
|
72
69
|
if added:
|
|
73
|
-
self._event_buffer.append(EventFactory.entity_alias_added(entity_id,
|
|
74
|
-
self._event_buffer.append(
|
|
75
|
-
EventFactory.entity_resolved(entity_id, identifier_type, value, resolution.confidence, True)
|
|
76
|
-
)
|
|
70
|
+
self._event_buffer.append(EventFactory.entity_alias_added(entity_id, normalized, identifier_type))
|
|
71
|
+
self._event_buffer.append(EventFactory.entity_resolved(entity_id, caused_by, resolution.confidence))
|
|
77
72
|
return resolution
|
|
78
73
|
|
|
79
74
|
def add_alias(
|
|
@@ -102,7 +97,7 @@ class EntityResolver:
|
|
|
102
97
|
if identifier_type in AUTO_MERGE_IDENTIFIER_TYPES:
|
|
103
98
|
reason = f"auto-merge on {identifier_type}:{normalized}"
|
|
104
99
|
self.store.merge_entities(canonical_entity_id, conflicting_entity_id, reason, "auto-merge")
|
|
105
|
-
event = EventFactory.entity_merged(
|
|
100
|
+
event = EventFactory.entity_merged(conflicting_entity_id, (canonical_entity_id,), reason)
|
|
106
101
|
self._event_buffer.append(event)
|
|
107
102
|
return [event]
|
|
108
103
|
raise ValueError(
|
|
@@ -112,7 +107,7 @@ class EntityResolver:
|
|
|
112
107
|
if not added:
|
|
113
108
|
return []
|
|
114
109
|
|
|
115
|
-
event = EventFactory.entity_alias_added(canonical_entity_id,
|
|
110
|
+
event = EventFactory.entity_alias_added(canonical_entity_id, normalized, identifier_type)
|
|
116
111
|
self._event_buffer.append(event)
|
|
117
112
|
return [event]
|
|
118
113
|
|
|
@@ -120,7 +115,7 @@ class EntityResolver:
|
|
|
120
115
|
self.store.ensure_entity(from_entity_id)
|
|
121
116
|
self.store.ensure_entity(to_entity_id)
|
|
122
117
|
self.store.merge_entities(from_entity_id, to_entity_id, reason, caused_by)
|
|
123
|
-
event = EventFactory.entity_merged(
|
|
118
|
+
event = EventFactory.entity_merged(self.store.canonical_entity_id(to_entity_id), (from_entity_id,), reason)
|
|
124
119
|
self._event_buffer.append(event)
|
|
125
120
|
return event
|
|
126
121
|
|
|
@@ -130,7 +125,7 @@ class EntityResolver:
|
|
|
130
125
|
self.store.remove_redirect(from_entity_id)
|
|
131
126
|
self.store.set_entity_status(from_entity_id, EntityStatus.ACTIVE)
|
|
132
127
|
self.store.merge_entities(to_entity_id, from_entity_id, reason, caused_by)
|
|
133
|
-
event = EventFactory.entity_merged(
|
|
128
|
+
event = EventFactory.entity_merged(self.store.canonical_entity_id(from_entity_id), (to_entity_id,), reason)
|
|
134
129
|
self._event_buffer.append(event)
|
|
135
130
|
return event
|
|
136
131
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: metaspn-entities
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: Canonical entity resolution, aliasing, and merges for MetaSPN systems
|
|
5
5
|
Author: MetaSPN Contributors
|
|
6
6
|
License-Expression: MIT
|
|
@@ -55,3 +55,17 @@ print(resolution.entity_id, resolution.confidence)
|
|
|
55
55
|
- `undo_merge(from_entity_id, to_entity_id, ...)` (implemented as reverse merge with redirect correction)
|
|
56
56
|
- `drain_events() -> list[EmittedEvent]`
|
|
57
57
|
- `export_snapshot(output_path)` to inspect SQLite state as JSON
|
|
58
|
+
|
|
59
|
+
## Event Contract Guarantees
|
|
60
|
+
|
|
61
|
+
`drain_events()` returns `EmittedEvent` objects whose `event_type` and `payload` are
|
|
62
|
+
schema-compatible with `metaspn-schemas` entity events.
|
|
63
|
+
|
|
64
|
+
- `EntityResolved` payload keys:
|
|
65
|
+
- `entity_id`, `resolver`, `resolved_at`, `confidence`, `schema_version`
|
|
66
|
+
- `EntityMerged` payload keys:
|
|
67
|
+
- `entity_id`, `merged_from`, `merged_at`, `reason`, `schema_version`
|
|
68
|
+
- `EntityAliasAdded` payload keys:
|
|
69
|
+
- `entity_id`, `alias`, `alias_type`, `added_at`, `schema_version`
|
|
70
|
+
|
|
71
|
+
Datetime fields are emitted as UTC ISO-8601 strings for deterministic serialization.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
metaspn_entities/__init__.py,sha256=7hsZse74SfsKugEKs3P6hIyfSDuzDWdeUFQdnev2yBo,259
|
|
2
|
+
metaspn_entities/events.py,sha256=Hkc3gy5_vRTSR0MKUvF24dTqNqOkG423_PTUe7csUfw,2066
|
|
3
|
+
metaspn_entities/models.py,sha256=b2EFsc1EIT9Ao_bKA2I52-5W_0fTwhsyO6VFRG8gZg8,1377
|
|
4
|
+
metaspn_entities/normalize.py,sha256=nPAHRfipgS6zHy2x70ZFd5HB1W4FKmeTF8Kd4TYz5tI,1125
|
|
5
|
+
metaspn_entities/resolver.py,sha256=350XMAng6qJvZqRmmikP7mRZtA22pZ2CwZcvmk-q8tU,6654
|
|
6
|
+
metaspn_entities/sqlite_backend.py,sha256=0QmHkMd4XZxdSmgvn_s6xsRr5Ocv0b6dERF_QSUuelM,10562
|
|
7
|
+
metaspn_entities-0.1.3.dist-info/licenses/LICENSE,sha256=tvVpto97dUnh1-KVYPs1rCr5dzyX8jUyNmT7F7ZPVAM,1077
|
|
8
|
+
metaspn_entities-0.1.3.dist-info/METADATA,sha256=NWxTysmpuXB1KY-lQl4QqKOnisF6eqAfBmJWi7PtzWI,2524
|
|
9
|
+
metaspn_entities-0.1.3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
10
|
+
metaspn_entities-0.1.3.dist-info/top_level.txt,sha256=YP2V8Z1Statrs3YAI-tGvyC73vLjPHr9Vkal4yqXkhs,17
|
|
11
|
+
metaspn_entities-0.1.3.dist-info/RECORD,,
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
metaspn_entities/__init__.py,sha256=7hsZse74SfsKugEKs3P6hIyfSDuzDWdeUFQdnev2yBo,259
|
|
2
|
-
metaspn_entities/events.py,sha256=plfAg6GYwAwYPz0CC6ryS82WUl6oo23NR-vPZyFAJdA,1567
|
|
3
|
-
metaspn_entities/models.py,sha256=b2EFsc1EIT9Ao_bKA2I52-5W_0fTwhsyO6VFRG8gZg8,1377
|
|
4
|
-
metaspn_entities/normalize.py,sha256=nPAHRfipgS6zHy2x70ZFd5HB1W4FKmeTF8Kd4TYz5tI,1125
|
|
5
|
-
metaspn_entities/resolver.py,sha256=aM37vYLev_CmYVxriRyVrQq8x49LW4olIdPyeZIH4Ag,6737
|
|
6
|
-
metaspn_entities/sqlite_backend.py,sha256=0QmHkMd4XZxdSmgvn_s6xsRr5Ocv0b6dERF_QSUuelM,10562
|
|
7
|
-
metaspn_entities-0.1.0.dist-info/licenses/LICENSE,sha256=tvVpto97dUnh1-KVYPs1rCr5dzyX8jUyNmT7F7ZPVAM,1077
|
|
8
|
-
metaspn_entities-0.1.0.dist-info/METADATA,sha256=R-cIauos_ZIaWF10KiICr_TsCw8zATTj8VPkXQIDazM,1950
|
|
9
|
-
metaspn_entities-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
10
|
-
metaspn_entities-0.1.0.dist-info/top_level.txt,sha256=YP2V8Z1Statrs3YAI-tGvyC73vLjPHr9Vkal4yqXkhs,17
|
|
11
|
-
metaspn_entities-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|