metaspn-entities 0.1.0__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/PKG-INFO +15 -1
  2. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/README.md +14 -0
  3. metaspn_entities-0.1.3/metaspn_entities/events.py +65 -0
  4. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/metaspn_entities/resolver.py +9 -14
  5. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/metaspn_entities.egg-info/PKG-INFO +15 -1
  6. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/metaspn_entities.egg-info/SOURCES.txt +1 -0
  7. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/pyproject.toml +1 -1
  8. metaspn_entities-0.1.3/tests/test_event_contract.py +98 -0
  9. metaspn_entities-0.1.0/metaspn_entities/events.py +0 -49
  10. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/LICENSE +0 -0
  11. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/metaspn_entities/__init__.py +0 -0
  12. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/metaspn_entities/models.py +0 -0
  13. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/metaspn_entities/normalize.py +0 -0
  14. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/metaspn_entities/sqlite_backend.py +0 -0
  15. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/metaspn_entities.egg-info/dependency_links.txt +0 -0
  16. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/metaspn_entities.egg-info/requires.txt +0 -0
  17. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/metaspn_entities.egg-info/top_level.txt +0 -0
  18. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/setup.cfg +0 -0
  19. {metaspn_entities-0.1.0 → metaspn_entities-0.1.3}/tests/test_resolver.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metaspn-entities
3
- Version: 0.1.0
3
+ Version: 0.1.3
4
4
  Summary: Canonical entity resolution, aliasing, and merges for MetaSPN systems
5
5
  Author: MetaSPN Contributors
6
6
  License-Expression: MIT
@@ -55,3 +55,17 @@ print(resolution.entity_id, resolution.confidence)
55
55
  - `undo_merge(from_entity_id, to_entity_id, ...)` (implemented as reverse merge with redirect correction)
56
56
  - `drain_events() -> list[EmittedEvent]`
57
57
  - `export_snapshot(output_path)` to inspect SQLite state as JSON
58
+
59
+ ## Event Contract Guarantees
60
+
61
+ `drain_events()` returns `EmittedEvent` objects whose `event_type` and `payload` are
62
+ schema-compatible with `metaspn-schemas` entity events.
63
+
64
+ - `EntityResolved` payload keys:
65
+ - `entity_id`, `resolver`, `resolved_at`, `confidence`, `schema_version`
66
+ - `EntityMerged` payload keys:
67
+ - `entity_id`, `merged_from`, `merged_at`, `reason`, `schema_version`
68
+ - `EntityAliasAdded` payload keys:
69
+ - `entity_id`, `alias`, `alias_type`, `added_at`, `schema_version`
70
+
71
+ Datetime fields are emitted as UTC ISO-8601 strings for deterministic serialization.
@@ -30,3 +30,17 @@ print(resolution.entity_id, resolution.confidence)
30
30
  - `undo_merge(from_entity_id, to_entity_id, ...)` (implemented as reverse merge with redirect correction)
31
31
  - `drain_events() -> list[EmittedEvent]`
32
32
  - `export_snapshot(output_path)` to inspect SQLite state as JSON
33
+
34
+ ## Event Contract Guarantees
35
+
36
+ `drain_events()` returns `EmittedEvent` objects whose `event_type` and `payload` are
37
+ schema-compatible with `metaspn-schemas` entity events.
38
+
39
+ - `EntityResolved` payload keys:
40
+ - `entity_id`, `resolver`, `resolved_at`, `confidence`, `schema_version`
41
+ - `EntityMerged` payload keys:
42
+ - `entity_id`, `merged_from`, `merged_at`, `reason`, `schema_version`
43
+ - `EntityAliasAdded` payload keys:
44
+ - `entity_id`, `alias`, `alias_type`, `added_at`, `schema_version`
45
+
46
+ Datetime fields are emitted as UTC ISO-8601 strings for deterministic serialization.
@@ -0,0 +1,65 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime, timezone
5
+ from typing import Any, Dict
6
+
7
+ DEFAULT_SCHEMA_VERSION = "0.1"
8
+ try:
9
+ from metaspn_schemas.core import DEFAULT_SCHEMA_VERSION as _SCHEMA_VERSION
10
+
11
+ DEFAULT_SCHEMA_VERSION = _SCHEMA_VERSION
12
+ except Exception:
13
+ # Keep local behavior deterministic when dependency is not importable in dev sandboxes.
14
+ pass
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class EmittedEvent:
19
+ event_type: str
20
+ payload: Dict[str, Any]
21
+
22
+
23
+ class EventFactory:
24
+ @staticmethod
25
+ def _now() -> datetime:
26
+ return datetime.now(timezone.utc).replace(microsecond=0)
27
+
28
+ @staticmethod
29
+ def entity_resolved(entity_id: str, resolver: str, confidence: float) -> EmittedEvent:
30
+ return EmittedEvent(
31
+ event_type="EntityResolved",
32
+ payload={
33
+ "entity_id": entity_id,
34
+ "resolver": resolver,
35
+ "resolved_at": EventFactory._now().isoformat(),
36
+ "confidence": confidence,
37
+ "schema_version": DEFAULT_SCHEMA_VERSION,
38
+ },
39
+ )
40
+
41
+ @staticmethod
42
+ def entity_merged(entity_id: str, merged_from: tuple[str, ...], reason: str | None = None) -> EmittedEvent:
43
+ return EmittedEvent(
44
+ event_type="EntityMerged",
45
+ payload={
46
+ "entity_id": entity_id,
47
+ "merged_from": list(merged_from),
48
+ "merged_at": EventFactory._now().isoformat(),
49
+ "reason": reason,
50
+ "schema_version": DEFAULT_SCHEMA_VERSION,
51
+ },
52
+ )
53
+
54
+ @staticmethod
55
+ def entity_alias_added(entity_id: str, alias: str, alias_type: str) -> EmittedEvent:
56
+ return EmittedEvent(
57
+ event_type="EntityAliasAdded",
58
+ payload={
59
+ "entity_id": entity_id,
60
+ "alias": alias,
61
+ "alias_type": alias_type,
62
+ "added_at": EventFactory._now().isoformat(),
63
+ "schema_version": DEFAULT_SCHEMA_VERSION,
64
+ },
65
+ )
@@ -39,12 +39,11 @@ class EntityResolver:
39
39
  created_new_entity=False,
40
40
  matched_identifiers=matched_identifiers,
41
41
  )
42
- self._event_buffer.append(
43
- EventFactory.entity_resolved(entity_id, identifier_type, value, resolution.confidence, False)
44
- )
42
+ self._event_buffer.append(EventFactory.entity_resolved(entity_id, caused_by, resolution.confidence))
45
43
  return resolution
46
44
 
47
45
  entity_id = self.store.create_entity(entity_type)
46
+ created_entity_id = entity_id
48
47
  added, conflicting_entity_id = self.store.add_alias(
49
48
  identifier_type=identifier_type,
50
49
  normalized_value=normalized,
@@ -58,9 +57,7 @@ class EntityResolver:
58
57
  merge_reason = f"auto-merge on {identifier_type}:{normalized}"
59
58
  self.store.merge_entities(entity_id, conflicting_entity_id, merge_reason, "auto-merge")
60
59
  entity_id = self.store.canonical_entity_id(conflicting_entity_id)
61
- self._event_buffer.append(
62
- EventFactory.entity_merged(entity_id, conflicting_entity_id, merge_reason, "auto-merge")
63
- )
60
+ self._event_buffer.append(EventFactory.entity_merged(entity_id, (created_entity_id,), merge_reason))
64
61
 
65
62
  matched_identifiers = list(self.store.iter_identifiers_for_entity(entity_id))
66
63
  resolution = EntityResolution(
@@ -70,10 +67,8 @@ class EntityResolver:
70
67
  matched_identifiers=matched_identifiers,
71
68
  )
72
69
  if added:
73
- self._event_buffer.append(EventFactory.entity_alias_added(entity_id, identifier_type, normalized, caused_by))
74
- self._event_buffer.append(
75
- EventFactory.entity_resolved(entity_id, identifier_type, value, resolution.confidence, True)
76
- )
70
+ self._event_buffer.append(EventFactory.entity_alias_added(entity_id, normalized, identifier_type))
71
+ self._event_buffer.append(EventFactory.entity_resolved(entity_id, caused_by, resolution.confidence))
77
72
  return resolution
78
73
 
79
74
  def add_alias(
@@ -102,7 +97,7 @@ class EntityResolver:
102
97
  if identifier_type in AUTO_MERGE_IDENTIFIER_TYPES:
103
98
  reason = f"auto-merge on {identifier_type}:{normalized}"
104
99
  self.store.merge_entities(canonical_entity_id, conflicting_entity_id, reason, "auto-merge")
105
- event = EventFactory.entity_merged(canonical_entity_id, conflicting_entity_id, reason, "auto-merge")
100
+ event = EventFactory.entity_merged(conflicting_entity_id, (canonical_entity_id,), reason)
106
101
  self._event_buffer.append(event)
107
102
  return [event]
108
103
  raise ValueError(
@@ -112,7 +107,7 @@ class EntityResolver:
112
107
  if not added:
113
108
  return []
114
109
 
115
- event = EventFactory.entity_alias_added(canonical_entity_id, identifier_type, normalized, caused_by)
110
+ event = EventFactory.entity_alias_added(canonical_entity_id, normalized, identifier_type)
116
111
  self._event_buffer.append(event)
117
112
  return [event]
118
113
 
@@ -120,7 +115,7 @@ class EntityResolver:
120
115
  self.store.ensure_entity(from_entity_id)
121
116
  self.store.ensure_entity(to_entity_id)
122
117
  self.store.merge_entities(from_entity_id, to_entity_id, reason, caused_by)
123
- event = EventFactory.entity_merged(from_entity_id, to_entity_id, reason, caused_by)
118
+ event = EventFactory.entity_merged(self.store.canonical_entity_id(to_entity_id), (from_entity_id,), reason)
124
119
  self._event_buffer.append(event)
125
120
  return event
126
121
 
@@ -130,7 +125,7 @@ class EntityResolver:
130
125
  self.store.remove_redirect(from_entity_id)
131
126
  self.store.set_entity_status(from_entity_id, EntityStatus.ACTIVE)
132
127
  self.store.merge_entities(to_entity_id, from_entity_id, reason, caused_by)
133
- event = EventFactory.entity_merged(to_entity_id, from_entity_id, reason, caused_by)
128
+ event = EventFactory.entity_merged(self.store.canonical_entity_id(from_entity_id), (to_entity_id,), reason)
134
129
  self._event_buffer.append(event)
135
130
  return event
136
131
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metaspn-entities
3
- Version: 0.1.0
3
+ Version: 0.1.3
4
4
  Summary: Canonical entity resolution, aliasing, and merges for MetaSPN systems
5
5
  Author: MetaSPN Contributors
6
6
  License-Expression: MIT
@@ -55,3 +55,17 @@ print(resolution.entity_id, resolution.confidence)
55
55
  - `undo_merge(from_entity_id, to_entity_id, ...)` (implemented as reverse merge with redirect correction)
56
56
  - `drain_events() -> list[EmittedEvent]`
57
57
  - `export_snapshot(output_path)` to inspect SQLite state as JSON
58
+
59
+ ## Event Contract Guarantees
60
+
61
+ `drain_events()` returns `EmittedEvent` objects whose `event_type` and `payload` are
62
+ schema-compatible with `metaspn-schemas` entity events.
63
+
64
+ - `EntityResolved` payload keys:
65
+ - `entity_id`, `resolver`, `resolved_at`, `confidence`, `schema_version`
66
+ - `EntityMerged` payload keys:
67
+ - `entity_id`, `merged_from`, `merged_at`, `reason`, `schema_version`
68
+ - `EntityAliasAdded` payload keys:
69
+ - `entity_id`, `alias`, `alias_type`, `added_at`, `schema_version`
70
+
71
+ Datetime fields are emitted as UTC ISO-8601 strings for deterministic serialization.
@@ -12,4 +12,5 @@ metaspn_entities.egg-info/SOURCES.txt
12
12
  metaspn_entities.egg-info/dependency_links.txt
13
13
  metaspn_entities.egg-info/requires.txt
14
14
  metaspn_entities.egg-info/top_level.txt
15
+ tests/test_event_contract.py
15
16
  tests/test_resolver.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "metaspn-entities"
7
- version = "0.1.0"
7
+ version = "0.1.3"
8
8
  description = "Canonical entity resolution, aliasing, and merges for MetaSPN systems"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -0,0 +1,98 @@
1
+ import importlib
2
+ import sys
3
+ import tempfile
4
+ import unittest
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+
8
+ from metaspn_entities import EntityResolver, SQLiteEntityStore
9
+
10
+
11
+ class EventContractTests(unittest.TestCase):
12
+ def setUp(self) -> None:
13
+ self.tempdir = tempfile.TemporaryDirectory()
14
+ self.db_path = str(Path(self.tempdir.name) / "entities.db")
15
+ self.store = SQLiteEntityStore(self.db_path)
16
+ self.resolver = EntityResolver(self.store)
17
+
18
+ def tearDown(self) -> None:
19
+ self.store.close()
20
+ self.tempdir.cleanup()
21
+
22
+ def test_entity_resolved_payload_shape(self) -> None:
23
+ self.resolver.resolve("twitter_handle", "contract_user")
24
+ event = self.resolver.drain_events()[-1]
25
+
26
+ self.assertEqual(event.event_type, "EntityResolved")
27
+ self.assertEqual(
28
+ sorted(event.payload.keys()),
29
+ ["confidence", "entity_id", "resolved_at", "resolver", "schema_version"],
30
+ )
31
+ self.assertIsInstance(event.payload["resolver"], str)
32
+ self.assertGreaterEqual(float(event.payload["confidence"]), 0.0)
33
+ datetime.fromisoformat(event.payload["resolved_at"])
34
+
35
+ def test_entity_merged_payload_shape(self) -> None:
36
+ a = self.resolver.resolve("twitter_handle", "merge_a")
37
+ b = self.resolver.resolve("twitter_handle", "merge_b")
38
+ self.resolver.drain_events()
39
+
40
+ self.resolver.merge_entities(a.entity_id, b.entity_id, reason="dedupe")
41
+ event = self.resolver.drain_events()[-1]
42
+
43
+ self.assertEqual(event.event_type, "EntityMerged")
44
+ self.assertEqual(
45
+ sorted(event.payload.keys()),
46
+ ["entity_id", "merged_at", "merged_from", "reason", "schema_version"],
47
+ )
48
+ self.assertEqual(event.payload["entity_id"], b.entity_id)
49
+ self.assertEqual(event.payload["merged_from"], [a.entity_id])
50
+ datetime.fromisoformat(event.payload["merged_at"])
51
+
52
+ def test_entity_alias_added_payload_shape(self) -> None:
53
+ created = self.resolver.resolve("twitter_handle", "alias_contract")
54
+ self.resolver.drain_events()
55
+
56
+ events = self.resolver.add_alias(created.entity_id, "email", "alias@example.com")
57
+ self.assertEqual(len(events), 1)
58
+ event = events[0]
59
+
60
+ self.assertEqual(event.event_type, "EntityAliasAdded")
61
+ self.assertEqual(
62
+ sorted(event.payload.keys()),
63
+ ["added_at", "alias", "alias_type", "entity_id", "schema_version"],
64
+ )
65
+ self.assertEqual(event.payload["entity_id"], created.entity_id)
66
+ self.assertEqual(event.payload["alias_type"], "email")
67
+ self.assertEqual(event.payload["alias"], "alias@example.com")
68
+ datetime.fromisoformat(event.payload["added_at"])
69
+
70
+ def test_schema_round_trip_when_metaspn_schemas_is_available(self) -> None:
71
+ # Try import from installed package first, then from sibling checkout if present.
72
+ entities_module = None
73
+ try:
74
+ entities_module = importlib.import_module("metaspn_schemas.entities")
75
+ except Exception:
76
+ sibling_src = Path(__file__).resolve().parents[2] / "metaspn-schemas" / "src"
77
+ if sibling_src.exists():
78
+ sys.path.insert(0, str(sibling_src))
79
+ entities_module = importlib.import_module("metaspn_schemas.entities")
80
+
81
+ if entities_module is None:
82
+ self.skipTest("metaspn_schemas is unavailable")
83
+
84
+ self.resolver.resolve("twitter_handle", "roundtrip_user")
85
+ resolved_event = self.resolver.drain_events()[-1]
86
+
87
+ entity_resolved = entities_module.EntityResolved.from_dict(resolved_event.payload)
88
+ self.assertEqual(entity_resolved.entity_id, resolved_event.payload["entity_id"])
89
+
90
+ created = self.resolver.resolve("twitter_handle", "roundtrip_alias")
91
+ self.resolver.drain_events()
92
+ alias_event = self.resolver.add_alias(created.entity_id, "email", "rt@example.com")[0]
93
+ entity_alias = entities_module.EntityAliasAdded.from_dict(alias_event.payload)
94
+ self.assertEqual(entity_alias.alias, "rt@example.com")
95
+
96
+
97
+ if __name__ == "__main__":
98
+ unittest.main()
@@ -1,49 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from dataclasses import dataclass
4
- from typing import Any, Dict
5
-
6
-
7
- @dataclass(frozen=True)
8
- class EmittedEvent:
9
- event_type: str
10
- payload: Dict[str, Any]
11
-
12
-
13
- class EventFactory:
14
- @staticmethod
15
- def entity_resolved(entity_id: str, identifier_type: str, value: str, confidence: float, created_new_entity: bool) -> EmittedEvent:
16
- return EmittedEvent(
17
- event_type="EntityResolved",
18
- payload={
19
- "entity_id": entity_id,
20
- "identifier_type": identifier_type,
21
- "value": value,
22
- "confidence": confidence,
23
- "created_new_entity": created_new_entity,
24
- },
25
- )
26
-
27
- @staticmethod
28
- def entity_merged(from_entity_id: str, to_entity_id: str, reason: str, caused_by: str) -> EmittedEvent:
29
- return EmittedEvent(
30
- event_type="EntityMerged",
31
- payload={
32
- "from_entity_id": from_entity_id,
33
- "to_entity_id": to_entity_id,
34
- "reason": reason,
35
- "caused_by": caused_by,
36
- },
37
- )
38
-
39
- @staticmethod
40
- def entity_alias_added(entity_id: str, identifier_type: str, normalized_value: str, caused_by: str) -> EmittedEvent:
41
- return EmittedEvent(
42
- event_type="EntityAliasAdded",
43
- payload={
44
- "entity_id": entity_id,
45
- "identifier_type": identifier_type,
46
- "normalized_value": normalized_value,
47
- "caused_by": caused_by,
48
- },
49
- )