PyPI - metaspn-entities - Versions diffs - 0.1.5__tar.gz → 0.1.7__tar.gz - Mend

metaspn-entities 0.1.5tar.gz → 0.1.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metaspn-entities
-Version: 0.1.5
+Version: 0.1.7
 Summary: Canonical entity resolution, aliasing, and merges for MetaSPN systems
 Author: MetaSPN Contributors
 License-Expression: MIT
@@ -108,3 +108,30 @@ Profiler/router workers can read consolidated context using:
 - `resolver.confidence_summary(entity_id)`
 Both APIs resolve canonical redirects first, so merged IDs return coherent context.
+## M2 Recommendation Context API
+Recommendation and drafter workers can consume:
+- `resolver.recommendation_context(entity_id)`
+The recommendation context includes:
+- identity confidence
+- activity recency (days)
+- interaction history summary (evidence count + source distribution)
+- preferred channel hint
+- relationship stage hint (`cold` / `warm` / `engaged`)
+- merge-safe continuity fields keyed to canonical entity IDs
+## M3 Outcome Attribution API
+Outcome evaluators can map attempt/outcome references back to canonical entity lineage:
+- `resolver.attribute_outcome(references)`
+Supported references include `entity_id`, `email`, `canonical_url`, handles, domains, and names.
+Attribution guarantees:
+- canonical merge redirects are resolved before returning `entity_id`
+- output includes explicit confidence for downstream learning logic
+- deterministic tie-breaks are applied by score, then hit count, then entity ID

{metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/README.md RENAMED Viewed

@@ -83,3 +83,30 @@ Profiler/router workers can read consolidated context using:
 - `resolver.confidence_summary(entity_id)`
 Both APIs resolve canonical redirects first, so merged IDs return coherent context.
+## M2 Recommendation Context API
+Recommendation and drafter workers can consume:
+- `resolver.recommendation_context(entity_id)`
+The recommendation context includes:
+- identity confidence
+- activity recency (days)
+- interaction history summary (evidence count + source distribution)
+- preferred channel hint
+- relationship stage hint (`cold` / `warm` / `engaged`)
+- merge-safe continuity fields keyed to canonical entity IDs
+## M3 Outcome Attribution API
+Outcome evaluators can map attempt/outcome references back to canonical entity lineage:
+- `resolver.attribute_outcome(references)`
+Supported references include `entity_id`, `email`, `canonical_url`, handles, domains, and names.
+Attribution guarantees:
+- canonical merge redirects are resolved before returning `entity_id`
+- output includes explicit confidence for downstream learning logic
+- deterministic tie-breaks are applied by score, then hit count, then entity ID

{metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities/__init__.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from .adapter import SignalResolutionResult, resolve_normalized_social_signal
-from .context import EntityContext, build_confidence_summary
+from .attribution import OutcomeAttribution
+from .context import RecommendationContext, EntityContext, build_confidence_summary, build_recommendation_context
 from .events import EmittedEvent
 from .models import EntityResolution
 from .resolver import EntityResolver
@@ -8,8 +9,11 @@ from .sqlite_backend import SQLiteEntityStore
 __all__ = [
     "resolve_normalized_social_signal",
     "SignalResolutionResult",
+    "OutcomeAttribution",
     "EntityContext",
+    "RecommendationContext",
     "build_confidence_summary",
+    "build_recommendation_context",
     "EntityResolver",
     "EntityResolution",
     "EmittedEvent",

metaspn_entities-0.1.7/metaspn_entities/attribution.py ADDED Viewed

@@ -0,0 +1,88 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple
+from .normalize import normalize_identifier
+@dataclass(frozen=True)
+class OutcomeAttribution:
+    entity_id: Optional[str]
+    confidence: float
+    matched_references: List[Dict[str, Any]] = field(default_factory=list)
+    strategy: str = "confidence-weighted-reference-v1"
+def normalize_outcome_references(references: Mapping[str, Any] | Sequence[Mapping[str, Any]]) -> List[Tuple[str, str]]:
+    refs: List[Tuple[str, str]] = []
+    if isinstance(references, Mapping):
+        for raw_type in sorted(references):
+            value = references[raw_type]
+            if value is None:
+                continue
+            if isinstance(value, str) and value.strip():
+                refs.append((str(raw_type), value.strip()))
+        return refs
+    for item in references:
+        id_type = str(item.get("identifier_type") or item.get("type") or "").strip()
+        value = str(item.get("value") or "").strip()
+        if not id_type or not value:
+            continue
+        refs.append((id_type, value))
+    return refs
+def rank_entity_candidates(
+    references: Iterable[Tuple[str, str]],
+    resolve_reference: Any,
+) -> OutcomeAttribution:
+    candidate_scores: Dict[str, float] = {}
+    candidate_hits: Dict[str, int] = {}
+    matched: List[Dict[str, Any]] = []
+    total_refs = 0
+    for identifier_type, value in references:
+        total_refs += 1
+        match = resolve_reference(identifier_type, value)
+        matched.append(
+            {
+                "identifier_type": identifier_type,
+                "value": value,
+                "normalized_value": match.get("normalized_value"),
+                "matched_entity_id": match.get("entity_id"),
+                "reference_confidence": float(match.get("confidence", 0.0)),
+            }
+        )
+        entity_id = match.get("entity_id")
+        confidence = float(match.get("confidence", 0.0))
+        if entity_id:
+            candidate_scores[entity_id] = candidate_scores.get(entity_id, 0.0) + confidence
+            candidate_hits[entity_id] = candidate_hits.get(entity_id, 0) + 1
+    if not candidate_scores:
+        return OutcomeAttribution(entity_id=None, confidence=0.0, matched_references=matched)
+    ranked = sorted(
+        candidate_scores.items(),
+        key=lambda kv: (
+            -kv[1],
+            -candidate_hits.get(kv[0], 0),
+            kv[0],
+        ),
+    )
+    best_entity_id, best_score = ranked[0]
+    denom = max(1, total_refs)
+    normalized_confidence = min(1.0, round(best_score / float(denom), 6))
+    return OutcomeAttribution(
+        entity_id=best_entity_id,
+        confidence=normalized_confidence,
+        matched_references=matched,
+    )
+def normalize_reference(identifier_type: str, value: str) -> Tuple[str, str]:
+    if identifier_type == "entity_id":
+        return identifier_type, value
+    return identifier_type, normalize_identifier(identifier_type, value)

metaspn_entities-0.1.7/metaspn_entities/context.py ADDED Viewed

@@ -0,0 +1,191 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any, Dict, List
+@dataclass(frozen=True)
+class EntityContext:
+    entity_id: str
+    aliases: List[Dict[str, Any]]
+    identifiers: List[Dict[str, Any]]
+    recent_evidence: List[Dict[str, Any]]
+    confidence_summary: Dict[str, Any]
+@dataclass(frozen=True)
+class RecommendationContext:
+    entity_id: str
+    identity_confidence: float
+    activity_recency_days: float
+    interaction_history_summary: Dict[str, Any]
+    preferred_channel_hint: str
+    relationship_stage_hint: str
+    continuity: Dict[str, Any]
+def build_confidence_summary(
+    aliases: List[Dict[str, Any]],
+    identifiers: List[Dict[str, Any]],
+    evidence: List[Dict[str, Any]],
+) -> Dict[str, Any]:
+    identifier_confidences = sorted(float(item["confidence"]) for item in identifiers)
+    alias_confidences = sorted(float(item["confidence"]) for item in aliases)
+    source_set = sorted(
+        {
+            str(item.get("provenance"))
+            for item in evidence
+            if item.get("provenance") not in (None, "")
+        }
+    )
+    identifier_avg = _avg(identifier_confidences)
+    alias_avg = _avg(alias_confidences)
+    source_diversity = min(1.0, len(source_set) / 3.0)
+    overall = min(1.0, (0.65 * identifier_avg) + (0.25 * alias_avg) + (0.10 * source_diversity))
+    by_identifier_type = _rollup_by_identifier_type(identifiers)
+    return {
+        "overall_confidence": round(overall, 6),
+        "identifier_confidence_avg": round(identifier_avg, 6),
+        "alias_confidence_avg": round(alias_avg, 6),
+        "unique_source_count": len(source_set),
+        "evidence_count": len(evidence),
+        "by_identifier_type": by_identifier_type,
+    }
+def _avg(values: List[float]) -> float:
+    if not values:
+        return 0.0
+    return sum(values) / len(values)
+def _rollup_by_identifier_type(identifiers: List[Dict[str, Any]]) -> Dict[str, Dict[str, float]]:
+    grouped: Dict[str, List[float]] = {}
+    for item in identifiers:
+        key = str(item["identifier_type"])
+        grouped.setdefault(key, []).append(float(item["confidence"]))
+    rollup: Dict[str, Dict[str, float]] = {}
+    for key in sorted(grouped):
+        values = sorted(grouped[key])
+        rollup[key] = {
+            "count": float(len(values)),
+            "avg_confidence": round(_avg(values), 6),
+            "max_confidence": round(max(values), 6),
+        }
+    return rollup
+def build_recommendation_context(
+    entity_id: str,
+    aliases: List[Dict[str, Any]],
+    identifiers: List[Dict[str, Any]],
+    *,
+    now: datetime | None = None,
+) -> RecommendationContext:
+    current_now = now or datetime.now(timezone.utc)
+    evidence_count = len(identifiers)
+    recent_seen = _latest_seen(identifiers)
+    activity_recency_days = _recency_days(recent_seen, current_now)
+    summary = build_confidence_summary(aliases, identifiers, identifiers)
+    preferred_channel = _preferred_channel_hint(identifiers)
+    relationship_stage = _relationship_stage_hint(
+        evidence_count=evidence_count,
+        recency_days=activity_recency_days,
+        confidence=summary["overall_confidence"],
+    )
+    provenance_counts: Dict[str, int] = {}
+    for item in identifiers:
+        provenance = str(item.get("provenance") or "unknown")
+        provenance_counts[provenance] = provenance_counts.get(provenance, 0) + 1
+    interaction_history_summary = {
+        "evidence_count": evidence_count,
+        "distinct_sources": len(provenance_counts),
+        "sources": {k: provenance_counts[k] for k in sorted(provenance_counts)},
+    }
+    continuity = {
+        "canonical_entity_id": entity_id,
+        "alias_count": len(aliases),
+        "identifier_count": len(identifiers),
+    }
+    return RecommendationContext(
+        entity_id=entity_id,
+        identity_confidence=float(summary["overall_confidence"]),
+        activity_recency_days=activity_recency_days,
+        interaction_history_summary=interaction_history_summary,
+        preferred_channel_hint=preferred_channel,
+        relationship_stage_hint=relationship_stage,
+        continuity=continuity,
+    )
+def _latest_seen(identifiers: List[Dict[str, Any]]) -> datetime | None:
+    timestamps = [
+        _parse_iso(str(item.get("last_seen_at")))
+        for item in identifiers
+        if item.get("last_seen_at")
+    ]
+    clean = [ts for ts in timestamps if ts is not None]
+    if not clean:
+        return None
+    return max(clean)
+def _parse_iso(raw: str) -> datetime | None:
+    text = raw.strip()
+    if not text:
+        return None
+    if text.endswith("Z"):
+        text = text[:-1] + "+00:00"
+    try:
+        dt = datetime.fromisoformat(text)
+    except ValueError:
+        return None
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    return dt.astimezone(timezone.utc)
+def _recency_days(last_seen: datetime | None, now: datetime) -> float:
+    if last_seen is None:
+        return float("inf")
+    delta = now - last_seen
+    seconds = max(0.0, delta.total_seconds())
+    return round(seconds / 86400.0, 6)
+def _preferred_channel_hint(identifiers: List[Dict[str, Any]]) -> str:
+    weights = {
+        "email": 5,
+        "linkedin_handle": 4,
+        "twitter_handle": 3,
+        "github_handle": 3,
+        "canonical_url": 2,
+        "domain": 1,
+        "name": 0,
+    }
+    scores: Dict[str, int] = {}
+    for item in identifiers:
+        id_type = str(item["identifier_type"])
+        score = weights.get(id_type, 1)
+        scores[id_type] = scores.get(id_type, 0) + score
+    if not scores:
+        return "unknown"
+    return sorted(scores.items(), key=lambda kv: (-kv[1], kv[0]))[0][0]
+def _relationship_stage_hint(*, evidence_count: int, recency_days: float, confidence: float) -> str:
+    if evidence_count >= 6 and recency_days <= 30 and confidence >= 0.8:
+        return "engaged"
+    if evidence_count >= 3 and recency_days <= 90 and confidence >= 0.65:
+        return "warm"
+    return "cold"

{metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities/resolver.py RENAMED Viewed

@@ -2,7 +2,8 @@ from __future__ import annotations
 from typing import Any, Dict, List, Optional
-from .context import EntityContext, build_confidence_summary
+from .attribution import OutcomeAttribution, normalize_outcome_references, normalize_reference, rank_entity_candidates
+from .context import RecommendationContext, EntityContext, build_confidence_summary, build_recommendation_context
 from .events import EmittedEvent, EventFactory
 from .models import (
     DEFAULT_MATCH_CONFIDENCE,
@@ -160,6 +161,40 @@ class EntityResolver:
             confidence_summary=summary,
         )
+    def recommendation_context(self, entity_id: str) -> RecommendationContext:
+        canonical_id = self.store.canonical_entity_id(entity_id)
+        aliases = self.store.list_aliases_for_entity(canonical_id)
+        identifiers = self.store.list_identifier_records_for_entity(canonical_id)
+        return build_recommendation_context(canonical_id, aliases, identifiers)
+    def attribute_outcome(self, references: Any) -> OutcomeAttribution:
+        refs = normalize_outcome_references(references)
+        def _resolve_ref(identifier_type: str, value: str) -> Dict[str, Any]:
+            raw_type, normalized = normalize_reference(identifier_type, value)
+            if raw_type == "entity_id":
+                entity = self.store.get_entity(normalized)
+                if not entity:
+                    return {"entity_id": None, "confidence": 0.0, "normalized_value": normalized}
+                return {
+                    "entity_id": self.store.canonical_entity_id(str(entity["entity_id"])),
+                    "confidence": 0.99,
+                    "normalized_value": normalized,
+                }
+            alias = self.store.find_alias(raw_type, normalized)
+            if not alias:
+                return {"entity_id": None, "confidence": 0.0, "normalized_value": normalized}
+            canonical = self.store.canonical_entity_id(str(alias["entity_id"]))
+            identifier = self.store.get_identifier(raw_type, normalized)
+            alias_conf = float(alias["confidence"])
+            identifier_conf = float(identifier["confidence"]) if identifier else 0.0
+            confidence = round(max(alias_conf, identifier_conf), 6)
+            return {"entity_id": canonical, "confidence": confidence, "normalized_value": normalized}
+        return rank_entity_candidates(refs, _resolve_ref)
     def export_snapshot(self, output_path: str) -> None:
         self.store.export_snapshot(output_path)

{metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities/sqlite_backend.py RENAMED Viewed

@@ -103,6 +103,12 @@ class SQLiteEntityStore:
             (identifier_type, normalized_value),
         ).fetchone()
+    def get_identifier(self, identifier_type: str, normalized_value: str) -> Optional[sqlite3.Row]:
+        return self.conn.execute(
+            "SELECT * FROM identifiers WHERE identifier_type = ? AND normalized_value = ?",
+            (identifier_type, normalized_value),
+        ).fetchone()
     def upsert_identifier(
         self,
         identifier_type: str,

{metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metaspn-entities
-Version: 0.1.5
+Version: 0.1.7
 Summary: Canonical entity resolution, aliasing, and merges for MetaSPN systems
 Author: MetaSPN Contributors
 License-Expression: MIT
@@ -108,3 +108,30 @@ Profiler/router workers can read consolidated context using:
 - `resolver.confidence_summary(entity_id)`
 Both APIs resolve canonical redirects first, so merged IDs return coherent context.
+## M2 Recommendation Context API
+Recommendation and drafter workers can consume:
+- `resolver.recommendation_context(entity_id)`
+The recommendation context includes:
+- identity confidence
+- activity recency (days)
+- interaction history summary (evidence count + source distribution)
+- preferred channel hint
+- relationship stage hint (`cold` / `warm` / `engaged`)
+- merge-safe continuity fields keyed to canonical entity IDs
+## M3 Outcome Attribution API
+Outcome evaluators can map attempt/outcome references back to canonical entity lineage:
+- `resolver.attribute_outcome(references)`
+Supported references include `entity_id`, `email`, `canonical_url`, handles, domains, and names.
+Attribution guarantees:
+- canonical merge redirects are resolved before returning `entity_id`
+- output includes explicit confidence for downstream learning logic
+- deterministic tie-breaks are applied by score, then hit count, then entity ID

{metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/metaspn_entities.egg-info/SOURCES.txt RENAMED Viewed

@@ -3,6 +3,7 @@ README.md
 pyproject.toml
 metaspn_entities/__init__.py
 metaspn_entities/adapter.py
+metaspn_entities/attribution.py
 metaspn_entities/context.py
 metaspn_entities/events.py
 metaspn_entities/models.py
@@ -15,6 +16,8 @@ metaspn_entities.egg-info/dependency_links.txt
 metaspn_entities.egg-info/requires.txt
 metaspn_entities.egg-info/top_level.txt
 tests/test_adapter.py
+tests/test_attribution.py
 tests/test_context.py
 tests/test_event_contract.py
+tests/test_recommendation_context.py
 tests/test_resolver.py

{metaspn_entities-0.1.5 → metaspn_entities-0.1.7}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "metaspn-entities"
-version = "0.1.5"
+version = "0.1.7"
 description = "Canonical entity resolution, aliasing, and merges for MetaSPN systems"
 readme = "README.md"
 requires-python = ">=3.10"

metaspn_entities-0.1.7/tests/test_attribution.py ADDED Viewed

@@ -0,0 +1,66 @@
+import tempfile
+import unittest
+from pathlib import Path
+from metaspn_entities.resolver import EntityResolver
+from metaspn_entities.sqlite_backend import SQLiteEntityStore
+class AttributionTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tempdir = tempfile.TemporaryDirectory()
+        self.db_path = str(Path(self.tempdir.name) / "entities.db")
+        self.store = SQLiteEntityStore(self.db_path)
+        self.resolver = EntityResolver(self.store)
+    def tearDown(self) -> None:
+        self.store.close()
+        self.tempdir.cleanup()
+    def test_merge_after_attempt_before_outcome(self) -> None:
+        attempt = self.resolver.resolve("twitter_handle", "attempt_user")
+        winner = self.resolver.resolve("twitter_handle", "winner_user")
+        self.resolver.add_alias(attempt.entity_id, "email", "attempt@example.com", confidence=0.9)
+        # Merge happens between attempt capture and outcome ingestion.
+        self.resolver.merge_entities(attempt.entity_id, winner.entity_id, reason="dedupe")
+        result = self.resolver.attribute_outcome(
+            {
+                "entity_id": attempt.entity_id,
+                "email": "attempt@example.com",
+            }
+        )
+        self.assertEqual(result.entity_id, winner.entity_id)
+        self.assertGreater(result.confidence, 0.9)
+    def test_undo_merge_edge_case(self) -> None:
+        a = self.resolver.resolve("twitter_handle", "undo_attr_a")
+        b = self.resolver.resolve("twitter_handle", "undo_attr_b")
+        self.resolver.merge_entities(a.entity_id, b.entity_id, reason="dedupe")
+        self.resolver.undo_merge(a.entity_id, b.entity_id)
+        # After undo implementation, b redirects to a.
+        result = self.resolver.attribute_outcome({"entity_id": b.entity_id})
+        self.assertEqual(result.entity_id, a.entity_id)
+        self.assertAlmostEqual(result.confidence, 0.99, places=6)
+    def test_conflicting_aliases_tie_break_by_confidence(self) -> None:
+        high = self.resolver.resolve("twitter_handle", "high_conf")
+        low = self.resolver.resolve("twitter_handle", "low_conf")
+        self.resolver.add_alias(high.entity_id, "email", "high@example.com", confidence=0.95)
+        self.resolver.add_alias(low.entity_id, "canonical_url", "https://low.example.com/profile", confidence=0.60)
+        result = self.resolver.attribute_outcome(
+            {
+                "email": "HIGH@example.com",
+                "canonical_url": "https://low.example.com/profile/",
+            }
+        )
+        self.assertEqual(result.entity_id, high.entity_id)
+        self.assertAlmostEqual(result.confidence, 0.475, places=6)
+if __name__ == "__main__":
+    unittest.main()

metaspn_entities-0.1.7/tests/test_recommendation_context.py ADDED Viewed

@@ -0,0 +1,89 @@
+import tempfile
+import unittest
+from pathlib import Path
+from metaspn_entities.adapter import resolve_normalized_social_signal
+from metaspn_entities.resolver import EntityResolver
+from metaspn_entities.sqlite_backend import SQLiteEntityStore
+class RecommendationContextTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tempdir = tempfile.TemporaryDirectory()
+        self.db_path = str(Path(self.tempdir.name) / "entities.db")
+        self.store = SQLiteEntityStore(self.db_path)
+        self.resolver = EntityResolver(self.store)
+    def tearDown(self) -> None:
+        self.store.close()
+        self.tempdir.cleanup()
+    def test_cross_source_consistency(self) -> None:
+        signal_a = {
+            "source": "social.ingest.twitter",
+            "payload": {
+                "platform": "twitter",
+                "author_handle": "rec_user",
+                "profile_url": "https://example.com/p/rec_user",
+            },
+        }
+        signal_b = {
+            "source": "social.ingest.linkedin",
+            "payload": {
+                "platform": "linkedin",
+                "handle": "rec-user",
+                "profile_url": "http://www.example.com/p/rec_user/",
+            },
+        }
+        first = resolve_normalized_social_signal(self.resolver, signal_a)
+        second = resolve_normalized_social_signal(self.resolver, signal_b)
+        self.assertEqual(first.entity_id, second.entity_id)
+        rec = self.resolver.recommendation_context(first.entity_id)
+        self.assertEqual(rec.entity_id, first.entity_id)
+        self.assertGreaterEqual(rec.identity_confidence, 0.0)
+        self.assertIn(rec.relationship_stage_hint, {"cold", "warm", "engaged"})
+        self.assertIn("social.ingest.linkedin", rec.interaction_history_summary["sources"])
+        self.assertIn("social.ingest.twitter", rec.interaction_history_summary["sources"])
+    def test_merge_safe_continuity(self) -> None:
+        a = self.resolver.resolve("twitter_handle", "merge_rec_a")
+        b = self.resolver.resolve("twitter_handle", "merge_rec_b")
+        self.resolver.add_alias(a.entity_id, "email", "a@rec.dev")
+        self.resolver.add_alias(b.entity_id, "domain", "rec.dev")
+        self.resolver.merge_entities(a.entity_id, b.entity_id, reason="dedupe")
+        rec_from = self.resolver.recommendation_context(a.entity_id)
+        rec_to = self.resolver.recommendation_context(b.entity_id)
+        self.assertEqual(rec_from.entity_id, rec_to.entity_id)
+        self.assertEqual(rec_from.continuity["canonical_entity_id"], rec_to.continuity["canonical_entity_id"])
+        self.assertGreaterEqual(rec_from.continuity["identifier_count"], 2)
+    def test_rerun_determinism(self) -> None:
+        signal = {
+            "source": "social.ingest",
+            "payload": {
+                "platform": "twitter",
+                "author_handle": "deterministic_rec",
+                "profile_url": "https://example.org/deterministic_rec",
+            },
+        }
+        first = resolve_normalized_social_signal(self.resolver, signal)
+        rec_1 = self.resolver.recommendation_context(first.entity_id)
+        second = resolve_normalized_social_signal(self.resolver, signal)
+        rec_2 = self.resolver.recommendation_context(second.entity_id)
+        self.assertEqual(first.entity_id, second.entity_id)
+        self.assertEqual(rec_1.entity_id, rec_2.entity_id)
+        self.assertEqual(rec_1.preferred_channel_hint, rec_2.preferred_channel_hint)
+        self.assertEqual(rec_1.relationship_stage_hint, rec_2.relationship_stage_hint)
+        self.assertEqual(rec_1.continuity, rec_2.continuity)
+        self.assertEqual(rec_1.interaction_history_summary, rec_2.interaction_history_summary)
+if __name__ == "__main__":
+    unittest.main()

metaspn_entities-0.1.5/metaspn_entities/context.py DELETED Viewed

@@ -1,68 +0,0 @@
-from __future__ import annotations
-from dataclasses import dataclass
-from typing import Any, Dict, List
-@dataclass(frozen=True)
-class EntityContext:
-    entity_id: str
-    aliases: List[Dict[str, Any]]
-    identifiers: List[Dict[str, Any]]
-    recent_evidence: List[Dict[str, Any]]
-    confidence_summary: Dict[str, Any]
-def build_confidence_summary(
-    aliases: List[Dict[str, Any]],
-    identifiers: List[Dict[str, Any]],
-    evidence: List[Dict[str, Any]],
-) -> Dict[str, Any]:
-    identifier_confidences = sorted(float(item["confidence"]) for item in identifiers)
-    alias_confidences = sorted(float(item["confidence"]) for item in aliases)
-    source_set = sorted(
-        {
-            str(item.get("provenance"))
-            for item in evidence
-            if item.get("provenance") not in (None, "")
-        }
-    )
-    identifier_avg = _avg(identifier_confidences)
-    alias_avg = _avg(alias_confidences)
-    source_diversity = min(1.0, len(source_set) / 3.0)
-    overall = min(1.0, (0.65 * identifier_avg) + (0.25 * alias_avg) + (0.10 * source_diversity))
-    by_identifier_type = _rollup_by_identifier_type(identifiers)
-    return {
-        "overall_confidence": round(overall, 6),
-        "identifier_confidence_avg": round(identifier_avg, 6),
-        "alias_confidence_avg": round(alias_avg, 6),
-        "unique_source_count": len(source_set),
-        "evidence_count": len(evidence),
-        "by_identifier_type": by_identifier_type,
-    }
-def _avg(values: List[float]) -> float:
-    if not values:
-        return 0.0
-    return sum(values) / len(values)
-def _rollup_by_identifier_type(identifiers: List[Dict[str, Any]]) -> Dict[str, Dict[str, float]]:
-    grouped: Dict[str, List[float]] = {}
-    for item in identifiers:
-        key = str(item["identifier_type"])
-        grouped.setdefault(key, []).append(float(item["confidence"]))
-    rollup: Dict[str, Dict[str, float]] = {}
-    for key in sorted(grouped):
-        values = sorted(grouped[key])
-        rollup[key] = {
-            "count": float(len(values)),
-            "avg_confidence": round(_avg(values), 6),
-            "max_confidence": round(max(values), 6),
-        }
-    return rollup