@pentatonic-ai/ai-agent-sdk 0.10.18 → 0.10.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,218 @@
1
+ """Structured diff between two extractions (student vs teacher gold).
2
+
3
+ Replaces the crude agreement proxies we'd been quoting (entity-name-exact-match;
4
+ word-Jaccard on whole statements) with a STRUCTURED comparison that:
5
+
6
+ - matches entities on fuzzy name + compatible type (not exact lowercased string,
7
+ which penalised "Acme" vs "Acme Corp" / normalisation variants),
8
+ - matches facts on their s·p·o STRUCTURE plus the statement as a fallback
9
+ (not bag-of-words on the statement, which ignored who-did-what),
10
+ - matches relationships as (from, type, to) triples,
11
+ - reports precision / recall / F1 PER AXIS, and facts broken down PER CATEGORY
12
+ (so `decision`/`commitment` agreement is isolated — the cascade's
13
+ high-value gate question).
14
+
15
+ Deterministic + stdlib-only (difflib) so it runs offline, in CI, and on any box
16
+ without a GPU. Semantic-embedding matching is a deliberate non-goal here: a
17
+ deterministic structural diff is the defensible, un-game-able baseline (no model
18
+ judging another model's output); an embedding tiebreak can layer on later if the
19
+ fuzzy threshold proves too strict.
20
+
21
+ Shapes (mirror _parse_guided_json output):
22
+ entity = {"name", "type", "aliases"?: [emails]}
23
+ fact = {"category", "subject", "predicate", "object"?, "statement"}
24
+ relationship = {"from", "to", "type"}
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import re
30
+ from dataclasses import dataclass
31
+ from difflib import SequenceMatcher
32
+ from typing import Any
33
+
34
+ # Org/legal-form suffixes stripped before name comparison so "Acme" == "Acme Inc".
35
+ _ORG_SUFFIX = re.compile(
36
+ r"\b(inc|incorporated|ltd|limited|llc|llp|plc|corp|corporation|co|gmbh|ag|sa|"
37
+ r"sas|bv|nv|pty|group|holdings?|company)\b\.?",
38
+ re.IGNORECASE,
39
+ )
40
+ _NONWORD = re.compile(r"[^a-z0-9 ]+")
41
+ _WS = re.compile(r"\s+")
42
+
43
+
44
+ def normalize_name(s: str | None) -> str:
45
+ """Lowercase, drop punctuation + org suffixes, collapse whitespace."""
46
+ if not s:
47
+ return ""
48
+ s = s.lower()
49
+ s = _ORG_SUFFIX.sub(" ", s)
50
+ s = _NONWORD.sub(" ", s)
51
+ return _WS.sub(" ", s).strip()
52
+
53
+
54
+ def _tokens(s: str) -> set[str]:
55
+ return set(normalize_name(s).split())
56
+
57
+
58
+ def sim(a: str | None, b: str | None) -> float:
59
+ """Similarity in [0,1]: max of char-level ratio and token-set Jaccard on the
60
+ normalised strings. The token-set arm rewards word overlap regardless of
61
+ order/length ("ship the Q3 release" vs "Q3 release will ship"); the
62
+ char-ratio arm rewards near-identical short strings."""
63
+ na, nb = normalize_name(a), normalize_name(b)
64
+ if not na and not nb:
65
+ return 1.0
66
+ if not na or not nb:
67
+ return 0.0
68
+ if na == nb:
69
+ return 1.0
70
+ char = SequenceMatcher(None, na, nb).ratio()
71
+ ta, tb = set(na.split()), set(nb.split())
72
+ jac = len(ta & tb) / len(ta | tb) if (ta | tb) else 0.0
73
+ return max(char, jac)
74
+
75
+
76
+ @dataclass
77
+ class PRF:
78
+ """Precision/recall/F1 for one axis. n_gold/n_pred are the item counts."""
79
+ n_gold: int
80
+ n_pred: int
81
+ matched: int
82
+
83
+ @property
84
+ def precision(self) -> float:
85
+ if self.n_pred == 0:
86
+ return 1.0 if self.n_gold == 0 else 0.0
87
+ return self.matched / self.n_pred
88
+
89
+ @property
90
+ def recall(self) -> float:
91
+ if self.n_gold == 0:
92
+ return 1.0 if self.n_pred == 0 else 0.0
93
+ return self.matched / self.n_gold
94
+
95
+ @property
96
+ def f1(self) -> float:
97
+ p, r = self.precision, self.recall
98
+ return 2 * p * r / (p + r) if (p + r) else (1.0 if self.n_gold == 0 and self.n_pred == 0 else 0.0)
99
+
100
+ def as_dict(self) -> dict[str, float | int]:
101
+ return {
102
+ "n_gold": self.n_gold, "n_pred": self.n_pred, "matched": self.matched,
103
+ "precision": round(self.precision, 4), "recall": round(self.recall, 4),
104
+ "f1": round(self.f1, 4),
105
+ }
106
+
107
+
108
+ def _greedy_match(gold: list, pred: list, score_fn, threshold: float) -> int:
109
+ """Count matched gold items via greedy 1:1 alignment. All (gold, pred) pairs
110
+ scored, sorted desc, claimed highest-first so each item matches at most once.
111
+ Deterministic (stable sort, then index tiebreak)."""
112
+ pairs = []
113
+ for gi, g in enumerate(gold):
114
+ for pi, p in enumerate(pred):
115
+ s = score_fn(g, p)
116
+ if s >= threshold:
117
+ pairs.append((-s, gi, pi))
118
+ pairs.sort()
119
+ used_g: set[int] = set()
120
+ used_p: set[int] = set()
121
+ matched = 0
122
+ for _, gi, pi in pairs:
123
+ if gi in used_g or pi in used_p:
124
+ continue
125
+ used_g.add(gi)
126
+ used_p.add(pi)
127
+ matched += 1
128
+ return matched
129
+
130
+
131
+ # ── per-item scorers ─────────────────────────────────────────────────────
132
+
133
+ def _entity_score(g: dict, p: dict) -> float:
134
+ """Name similarity, gated by type compatibility (equal, or either side
135
+ 'other'/missing — the LLMs disagree on type far more than on identity)."""
136
+ gt = (g.get("type") or "").lower()
137
+ pt = (p.get("type") or "").lower()
138
+ if gt and pt and gt != pt and "other" not in (gt, pt):
139
+ return 0.0
140
+ return sim(g.get("name"), p.get("name"))
141
+
142
+
143
+ def _fact_score(g: dict, p: dict) -> float:
144
+ """Structural s·p·o similarity, OR statement similarity as a fallback.
145
+ Structure: mean of subject/predicate/object sims (object absent on both =
146
+ neutral 1.0 for that term). Take the max of structural and statement so a
147
+ well-phrased statement still matches even if s/p/o were split differently."""
148
+ subj = sim(g.get("subject"), p.get("subject"))
149
+ pred = sim(g.get("predicate"), p.get("predicate"))
150
+ go, po = g.get("object"), p.get("object")
151
+ obj = 1.0 if not go and not po else sim(go, po)
152
+ structural = (subj + pred + obj) / 3
153
+ statement = sim(g.get("statement"), p.get("statement"))
154
+ return max(structural, statement)
155
+
156
+
157
+ def _rel_score(g: dict, p: dict) -> float:
158
+ """(from, type, to) triple: mean of the three term sims."""
159
+ return (sim(g.get("from"), p.get("from"))
160
+ + sim(g.get("type"), p.get("type"))
161
+ + sim(g.get("to"), p.get("to"))) / 3
162
+
163
+
164
+ # ── public API ─────────────────────────────────────────────────────────────
165
+
166
+ ENTITY_THRESHOLD = 0.85
167
+ FACT_THRESHOLD = 0.60
168
+ REL_THRESHOLD = 0.60
169
+
170
+
171
+ def diff_axis(gold: list[dict], pred: list[dict], kind: str) -> PRF:
172
+ """Match one axis ('entities' | 'facts' | 'relationships'); return PRF."""
173
+ scorer, thr = {
174
+ "entities": (_entity_score, ENTITY_THRESHOLD),
175
+ "facts": (_fact_score, FACT_THRESHOLD),
176
+ "relationships": (_rel_score, REL_THRESHOLD),
177
+ }[kind]
178
+ matched = _greedy_match(gold, pred, scorer, thr)
179
+ return PRF(n_gold=len(gold), n_pred=len(pred), matched=matched)
180
+
181
+
182
+ def _facts_in(extraction: dict, categories: set[str] | None) -> list[dict]:
183
+ facts = extraction.get("facts") or []
184
+ if categories is None:
185
+ return facts
186
+ return [f for f in facts if (f.get("category") or "").lower() in categories]
187
+
188
+
189
+ def diff_extractions(
190
+ gold: dict, pred: dict, fact_categories: set[str] | None = None
191
+ ) -> dict[str, Any]:
192
+ """Full structured diff. `gold`/`pred` are extraction dicts. Returns per-axis
193
+ PRF dicts plus per-fact-category PRF. `fact_categories`, if given, also
194
+ reports a 'facts_filtered' PRF over just those categories (e.g.
195
+ {'decision','commitment'} for the high-value-gate question)."""
196
+ out: dict[str, Any] = {
197
+ "entities": diff_axis(gold.get("entities") or [], pred.get("entities") or [], "entities").as_dict(),
198
+ "facts": diff_axis(gold.get("facts") or [], pred.get("facts") or [], "facts").as_dict(),
199
+ "relationships": diff_axis(
200
+ gold.get("relationships") or [], pred.get("relationships") or [], "relationships"
201
+ ).as_dict(),
202
+ }
203
+ # per-category fact breakdown
204
+ cats = {(f.get("category") or "").lower() for f in (gold.get("facts") or [])}
205
+ cats |= {(f.get("category") or "").lower() for f in (pred.get("facts") or [])}
206
+ cats.discard("")
207
+ by_cat: dict[str, Any] = {}
208
+ for c in sorted(cats):
209
+ g = _facts_in(gold, {c})
210
+ p = _facts_in(pred, {c})
211
+ by_cat[c] = diff_axis(g, p, "facts").as_dict()
212
+ out["facts_by_category"] = by_cat
213
+ if fact_categories is not None:
214
+ g = _facts_in(gold, fact_categories)
215
+ p = _facts_in(pred, fact_categories)
216
+ out["facts_filtered"] = diff_axis(g, p, "facts").as_dict()
217
+ out["facts_filtered_categories"] = sorted(fact_categories)
218
+ return out
@@ -0,0 +1,78 @@
1
+ """Unit tests for the email-alias guard (_email_plausibly_belongs).
2
+
3
+ Pins the live pollution case (the "Johann Boedecker" node, 2026-06-22): keep the
4
+ person's own addresses; drop the bystander emails (a hotel, newsletters, unrelated
5
+ gmails) the LLM stapled on from co-occurring documents.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import importlib.util
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ import pytest
15
+
16
+ _THIS = Path(__file__).resolve().parent
17
+
18
+
19
+ def _load(name="extractor_async_worker_aliasguard"):
20
+ spec = importlib.util.spec_from_file_location(name, _THIS / "worker.py")
21
+ mod = importlib.util.module_from_spec(spec)
22
+ sys.modules[name] = mod
23
+ spec.loader.exec_module(mod)
24
+ return mod
25
+
26
+
27
+ try:
28
+ worker = _load()
29
+ except ImportError as e:
30
+ pytest.skip(f"extractor-async deps unavailable: {e}", allow_module_level=True)
31
+
32
+ belongs = lambda n, e: worker._email_plausibly_belongs(n, e)
33
+
34
+
35
+ # ── KEEP: the person's own addresses ─────────────────────────────────────
36
+ @pytest.mark.parametrize("email", [
37
+ "johann@pentatonic.com",
38
+ "johann.boedecker@pentatonic.com",
39
+ "boedeckerjohann@gmail.com",
40
+ "JOHANN@pentatonic.com", # case-insensitive
41
+ "jb@pentatonic.com", # initials
42
+ "j.boedecker@pentatonic.com", # surname token
43
+ ])
44
+ def test_keeps_owner_emails(email):
45
+ assert belongs("Johann Boedecker", email) is True
46
+
47
+
48
+ # ── DROP: the actual bystander emails found on the live Johann node ──────
49
+ @pytest.mark.parametrize("email", [
50
+ "reservations.nyc@acehotel.com",
51
+ "marketingadmin@sustainablebrands.com",
52
+ "martinvasquez87@gmail.com",
53
+ "schwaabd@yahoo.de",
54
+ "cvanderlip@redish.com",
55
+ "leechihshan33@gmail.com",
56
+ ])
57
+ def test_drops_bystander_emails(email):
58
+ assert belongs("Johann Boedecker", email) is False
59
+
60
+
61
+ # ── edges ────────────────────────────────────────────────────────────────
62
+ def test_initials_either_order():
63
+ assert belongs("Johann Boedecker", "bj@pentatonic.com") is True # reversed initials
64
+
65
+
66
+ def test_no_usable_name_does_not_overfilter():
67
+ # a bare/empty name has nothing to check against → keep (don't strip)
68
+ assert belongs("", "anything@x.com") is True
69
+ assert belongs("J", "anything@x.com") is True # single letter < 2 → no tokens
70
+
71
+
72
+ def test_surname_only_person_keeps_surname_email():
73
+ assert belongs("Vickers", "will.vickers@vickers-oil.com") is True
74
+ assert belongs("Vickers", "reservations.nyc@acehotel.com") is False
75
+
76
+
77
+ def test_guard_flag_default_on():
78
+ assert worker.EMAIL_ALIAS_GUARD is True
@@ -0,0 +1,180 @@
1
+ """Unit tests for the structured-diff agreement metric (extraction_diff).
2
+
3
+ Pins the behaviours that make it a real metric rather than the old proxies:
4
+ fuzzy/normalised name matching, structural s·p·o fact matching, per-axis P/R/F1,
5
+ per-category fact breakdown, and the high-value filtered view.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import importlib.util
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ import pytest
15
+
16
+ _THIS = Path(__file__).resolve().parent
17
+
18
+
19
+ def _load(name="extraction_diff_mod"):
20
+ spec = importlib.util.spec_from_file_location(name, _THIS / "extraction_diff.py")
21
+ mod = importlib.util.module_from_spec(spec)
22
+ # Register before exec so @dataclass's type resolution (which walks
23
+ # sys.modules[cls.__module__]) works under the importlib custom-name load
24
+ # on Python 3.12+/3.14. A normal `import extraction_diff` (CI/prod) is fine.
25
+ sys.modules[name] = mod
26
+ spec.loader.exec_module(mod)
27
+ return mod
28
+
29
+
30
+ ed = _load()
31
+
32
+
33
+ # ── normalize_name / sim ─────────────────────────────────────────────────
34
+
35
+ def test_normalize_strips_org_suffix_and_punct():
36
+ assert ed.normalize_name("Acme Corp.") == "acme"
37
+ assert ed.normalize_name("Acme, Inc.") == "acme"
38
+ assert ed.normalize_name("ACME Limited") == "acme"
39
+
40
+
41
+ def test_sim_normalisation_variants_are_high():
42
+ # the exact-string proxy scored these 0; structured sim should be ~1.
43
+ assert ed.sim("Acme", "Acme Corp") >= 0.99
44
+ assert ed.sim("Acme Inc.", "ACME") >= 0.99
45
+
46
+
47
+ def test_sim_token_overlap_order_invariant():
48
+ assert ed.sim("ship the Q3 release", "Q3 release will ship") >= 0.6
49
+
50
+
51
+ def test_sim_unrelated_low():
52
+ assert ed.sim("Acme", "Globex") < 0.5
53
+
54
+
55
+ def test_sim_both_empty_is_one():
56
+ assert ed.sim("", "") == 1.0
57
+ assert ed.sim(None, "x") == 0.0
58
+
59
+
60
+ # ── PRF arithmetic ───────────────────────────────────────────────────────
61
+
62
+ def test_prf_basic():
63
+ prf = ed.PRF(n_gold=4, n_pred=5, matched=3)
64
+ assert prf.recall == 0.75
65
+ assert prf.precision == 0.6
66
+ assert round(prf.f1, 3) == 0.667
67
+
68
+
69
+ def test_prf_empty_both_is_perfect():
70
+ prf = ed.PRF(n_gold=0, n_pred=0, matched=0)
71
+ assert prf.precision == 1.0 and prf.recall == 1.0 and prf.f1 == 1.0
72
+
73
+
74
+ def test_prf_pred_without_gold_is_zero_precision():
75
+ prf = ed.PRF(n_gold=0, n_pred=2, matched=0)
76
+ assert prf.precision == 0.0
77
+
78
+
79
+ # ── entity matching ──────────────────────────────────────────────────────
80
+
81
+ def test_entity_match_fuzzy_name_same_type():
82
+ g = [{"name": "Acme Corp", "type": "org"}]
83
+ p = [{"name": "Acme", "type": "org"}]
84
+ prf = ed.diff_axis(g, p, "entities")
85
+ assert prf.matched == 1 and prf.f1 == 1.0
86
+
87
+
88
+ def test_entity_type_mismatch_blocks_match():
89
+ g = [{"name": "Apple", "type": "org"}]
90
+ p = [{"name": "Apple", "type": "person"}]
91
+ assert ed.diff_axis(g, p, "entities").matched == 0
92
+
93
+
94
+ def test_entity_other_type_is_compatible():
95
+ g = [{"name": "Apple", "type": "org"}]
96
+ p = [{"name": "Apple", "type": "other"}]
97
+ assert ed.diff_axis(g, p, "entities").matched == 1
98
+
99
+
100
+ def test_entity_greedy_one_to_one():
101
+ # two golds, one pred → at most one match
102
+ g = [{"name": "Acme", "type": "org"}, {"name": "Acme", "type": "org"}]
103
+ p = [{"name": "Acme", "type": "org"}]
104
+ prf = ed.diff_axis(g, p, "entities")
105
+ assert prf.matched == 1 and prf.recall == 0.5 and prf.precision == 1.0
106
+
107
+
108
+ # ── fact matching (structural vs statement) ──────────────────────────────
109
+
110
+ def test_fact_match_on_structure_despite_statement_rephrase():
111
+ g = [{"category": "decision", "subject": "Acme", "predicate": "will renew",
112
+ "object": "the contract", "statement": "Acme decided to renew the contract for 2027."}]
113
+ p = [{"category": "decision", "subject": "Acme", "predicate": "renews",
114
+ "object": "contract", "statement": "The 2027 contract renewal was agreed by Acme."}]
115
+ prf = ed.diff_axis(g, p, "facts")
116
+ assert prf.matched == 1
117
+
118
+
119
+ def test_fact_match_on_statement_when_spo_split_differs():
120
+ g = [{"category": "commitment", "subject": "Bob", "predicate": "owns", "object": "migration",
121
+ "statement": "Bob will lead the migration starting in March."}]
122
+ p = [{"category": "commitment", "subject": "Bob Chen", "predicate": "leads",
123
+ "object": "the data migration", "statement": "Bob will lead the migration starting in March."}]
124
+ assert ed.diff_axis(g, p, "facts").matched == 1
125
+
126
+
127
+ def test_fact_unrelated_no_match():
128
+ g = [{"category": "decision", "subject": "Acme", "predicate": "hired", "object": "a CFO",
129
+ "statement": "Acme hired a new CFO."}]
130
+ p = [{"category": "decision", "subject": "Globex", "predicate": "closed", "object": "the Berlin office",
131
+ "statement": "Globex shut its Berlin office."}]
132
+ assert ed.diff_axis(g, p, "facts").matched == 0
133
+
134
+
135
+ # ── relationships ────────────────────────────────────────────────────────
136
+
137
+ def test_relationship_triple_match():
138
+ g = [{"from": "Jane", "to": "Acme Corp", "type": "works at"}]
139
+ p = [{"from": "Jane", "to": "Acme", "type": "employed by"}]
140
+ # from + to match strongly; type weaker → mean may dip below threshold
141
+ prf = ed.diff_axis(g, p, "relationships")
142
+ assert prf.n_gold == 1 and prf.n_pred == 1
143
+
144
+
145
+ # ── full diff + per-category + filtered ──────────────────────────────────
146
+
147
+ def test_diff_extractions_per_category_and_filtered():
148
+ gold = {
149
+ "entities": [{"name": "Acme", "type": "org"}],
150
+ "facts": [
151
+ {"category": "decision", "subject": "Acme", "predicate": "will renew",
152
+ "object": "contract", "statement": "Acme will renew the contract."},
153
+ {"category": "state", "subject": "Acme", "predicate": "is", "object": "a customer",
154
+ "statement": "Acme is a customer."},
155
+ ],
156
+ "relationships": [],
157
+ }
158
+ pred = {
159
+ "entities": [{"name": "Acme Corp", "type": "org"}],
160
+ "facts": [
161
+ {"category": "decision", "subject": "Acme", "predicate": "renews",
162
+ "object": "the contract", "statement": "Acme renews its contract."},
163
+ ],
164
+ "relationships": [],
165
+ }
166
+ out = ed.diff_extractions(gold, pred, fact_categories={"decision", "commitment"})
167
+ assert out["entities"]["matched"] == 1
168
+ # per-category: decision matched 1/1; state missing (recall 0)
169
+ assert out["facts_by_category"]["decision"]["matched"] == 1
170
+ assert out["facts_by_category"]["state"]["recall"] == 0.0
171
+ # filtered to decision/commitment: 1 gold, 1 pred, 1 matched → perfect
172
+ assert out["facts_filtered"]["recall"] == 1.0
173
+ assert out["facts_filtered"]["precision"] == 1.0
174
+ assert out["facts_filtered_categories"] == ["commitment", "decision"]
175
+
176
+
177
+ def test_diff_extractions_empty_both():
178
+ out = ed.diff_extractions({"entities": [], "facts": [], "relationships": []},
179
+ {"entities": [], "facts": [], "relationships": []})
180
+ assert out["facts"]["f1"] == 1.0
@@ -0,0 +1,58 @@
1
+ """Guard tests for the distiller system-prompt content rules.
2
+
3
+ Pins that the email-discipline + entity-separation rules (this change) and the
4
+ #126 modality/attribution rules are present in BOTH prompt variants — a cheap
5
+ regression guard so a future prompt edit can't silently drop them.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import importlib.util
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ import pytest
15
+
16
+ _THIS = Path(__file__).resolve().parent
17
+
18
+
19
+ def _load(name="extractor_async_worker_prompts"):
20
+ spec = importlib.util.spec_from_file_location(name, _THIS / "worker.py")
21
+ mod = importlib.util.module_from_spec(spec)
22
+ sys.modules[name] = mod
23
+ spec.loader.exec_module(mod)
24
+ return mod
25
+
26
+
27
+ try:
28
+ worker = _load()
29
+ except ImportError as e:
30
+ pytest.skip(f"extractor-async deps unavailable: {e}", allow_module_level=True)
31
+
32
+ PROMPTS = lambda: (worker.BATCH_SYSTEM_PROMPT, worker.GUIDED_JSON_SYSTEM_PROMPT)
33
+
34
+
35
+ def test_email_discipline_in_both_prompts():
36
+ for p in PROMPTS():
37
+ assert "An email address is NOT a person" in p
38
+ assert "reservations@" in p # role/transactional examples present
39
+ assert "clearly THEIRS" in p # bystander-attachment ban
40
+
41
+
42
+ def test_distinct_entities_rule_in_both_prompts():
43
+ for p in PROMPTS():
44
+ assert "DISTINCT ENTITIES" in p
45
+ assert "Acme & Globex" in p # conflation-split example
46
+ assert "warehouse" in p # generic-token suppression
47
+
48
+
49
+ def test_126_rules_not_regressed():
50
+ for p in PROMPTS():
51
+ assert "TENSE & MODALITY" in p
52
+ assert "ATTRIBUTION FIDELITY" in p
53
+
54
+
55
+ def test_active_prompt_carries_the_rules_and_fresh_hash():
56
+ assert "An email address is NOT a person" in worker.ACTIVE_SYSTEM_PROMPT
57
+ assert "DISTINCT ENTITIES" in worker.ACTIVE_SYSTEM_PROMPT
58
+ assert len(worker.SYSTEM_PROMPT_HASH) == 16 # hash recomputed off ACTIVE prompt
@@ -284,6 +284,38 @@ and NEVER drop a field.
284
284
  object MAY be an entity name OR a literal string OR `-` if absent.
285
285
  statement ≤ 140 characters, a self-contained sentence.
286
286
  WORKED EXAMPLE: `FCT|commitment|Timothy Bradley|agreed to|SAFE amendments|Timothy confirmed the SAFE amendments are set (14 May 2026)`
287
+ - TENSE & MODALITY — never record the future or the merely-planned as done:
288
+ * A SCHEDULED or FUTURE event (a calendar invite, a meeting/call dated \
289
+ later than this event, a recurring-meeting instance) is UPCOMING, not \
290
+ completed. NEVER emit "attended / hosted / met / reported / decided" for a \
291
+ meeting that has not happened — use category=commitment with predicate "is \
292
+ scheduled to" / "plans to".
293
+ * A PLAN, PROPOSAL, INTENT or next-step ("will", "plans to", "aims to", \
294
+ "to organise", "we'll", "next steps:") is category=commitment, NOT state and \
295
+ NOT a completed act. Keep "will send" as a commitment — never record it as "sent".
296
+ - ATTRIBUTION FIDELITY — the subject must be who the source actually credits:
297
+ * Attribute a document/deck/message's content to a person ONLY if the source \
298
+ names them as its author or speaker. Do NOT attribute an unauthored doc, agenda \
299
+ or deck to whoever it merely mentions or whoever shared it.
300
+ * Do NOT promote a meeting ATTENDEE to organiser/host without explicit evidence.
301
+ * Do NOT attribute an ORGANISATION's activity (deals, intros, pipeline) to an \
302
+ individual person.
303
+ - IDENTITY & EMAILS — do NOT infer a person's employer/affiliation from an email \
304
+ signature or a company's standard footer/boilerplate; affiliation needs an \
305
+ explicit statement in the body. \
306
+ An email address is NOT a person: NEVER emit an entity whose name is an email \
307
+ address, and NEVER treat a role/transactional address (reservations@, bookings@, \
308
+ no-reply@, info@, office@, marketing@, support@, notifications@, admin@) as a \
309
+ person. Attach an email to a person ONLY when it is clearly THEIRS (the author, \
310
+ or a local-part matching their name) — NEVER attach an address that merely \
311
+ co-occurs in the same thread, CC list, or document (a hotel booking, a \
312
+ newsletter, an unrelated contact).
313
+ - DISTINCT ENTITIES — two names joined by "and" / "&" / "/" are TWO separate \
314
+ entities, never one merged node ("Acme & Globex" → emit Acme AND Globex). Do NOT \
315
+ turn a sentence fragment or phrase into an entity, and do NOT mint generic \
316
+ infrastructure / environment tokens (prod, staging, preview, UAT, warehouse, \
317
+ main, PRD, CRM, platform, localhost) as entities — they are not named people, \
318
+ orgs, products, or projects.
287
319
  - REL lines have exactly 4 fields: `REL`, from, to, rel_type.
288
320
  from and to MUST be entity names declared in THIS event's ENT lines.
289
321
  rel_type is a short verb / preposition phrase.
@@ -337,6 +369,38 @@ observation, preference}.
337
369
  WORKED EXAMPLE: {"category": "commitment", "subject": "Timothy \
338
370
  Bradley", "predicate": "agreed to", "object": "SAFE amendments", \
339
371
  "statement": "Timothy confirmed the SAFE amendments are set (14 May 2026)"}
372
+ - TENSE & MODALITY — never record the future or the merely-planned as done:
373
+ * A SCHEDULED or FUTURE event (a calendar invite, a meeting/call dated \
374
+ later than this event, a recurring-meeting instance) is UPCOMING, not \
375
+ completed. NEVER emit "attended / hosted / met / reported / decided" for a \
376
+ meeting that has not happened — use category=commitment with predicate "is \
377
+ scheduled to" / "plans to".
378
+ * A PLAN, PROPOSAL, INTENT or next-step ("will", "plans to", "aims to", \
379
+ "to organise", "we'll", "next steps:") is category=commitment, NOT state and \
380
+ NOT a completed act. Keep "will send" as a commitment — never record it as "sent".
381
+ - ATTRIBUTION FIDELITY — the subject must be who the source actually credits:
382
+ * Attribute a document/deck/message's content to a person ONLY if the source \
383
+ names them as its author or speaker. Do NOT attribute an unauthored doc, agenda \
384
+ or deck to whoever it merely mentions or whoever shared it.
385
+ * Do NOT promote a meeting ATTENDEE to organiser/host without explicit evidence.
386
+ * Do NOT attribute an ORGANISATION's activity (deals, intros, pipeline) to an \
387
+ individual person.
388
+ - IDENTITY & EMAILS — do NOT infer a person's employer/affiliation from an email \
389
+ signature or a company's standard footer/boilerplate; affiliation needs an \
390
+ explicit statement in the body. \
391
+ An email address is NOT a person: NEVER emit an entity whose name is an email \
392
+ address, and NEVER treat a role/transactional address (reservations@, bookings@, \
393
+ no-reply@, info@, office@, marketing@, support@, notifications@, admin@) as a \
394
+ person. Attach an email to a person ONLY when it is clearly THEIRS (the author, \
395
+ or a local-part matching their name) — NEVER attach an address that merely \
396
+ co-occurs in the same thread, CC list, or document (a hotel booking, a \
397
+ newsletter, an unrelated contact).
398
+ - DISTINCT ENTITIES — two names joined by "and" / "&" / "/" are TWO separate \
399
+ entities, never one merged node ("Acme & Globex" → emit Acme AND Globex). Do NOT \
400
+ turn a sentence fragment or phrase into an entity, and do NOT mint generic \
401
+ infrastructure / environment tokens (prod, staging, preview, UAT, warehouse, \
402
+ main, PRD, CRM, platform, localhost) as entities — they are not named people, \
403
+ orgs, products, or projects.
340
404
  - relationships: "from" and "to" MUST be entity names declared in THIS \
341
405
  event's "entities". "type" is a short verb / preposition phrase.
342
406
  - HARD CAPS per event: 8 entities, 6 facts, 6 relationships. Pick the \
@@ -1189,6 +1253,43 @@ def org_node_id_key(entity_type: str, name: str, stamped_domain: str | None) ->
1189
1253
  return name
1190
1254
 
1191
1255
 
1256
+ # --------------------------------------------------------------------
1257
+ # Email-alias guard — stop bystander emails polluting a person
1258
+ # --------------------------------------------------------------------
1259
+ # The async LLM pass sometimes emits a PERSON entity whose `email` is a BYSTANDER
1260
+ # address co-occurring in the same doc/thread (a hotel booking, a newsletter, an
1261
+ # unrelated gmail). _parse_guided_json promotes it into the entity's aliases and
1262
+ # upsert_entities then stores + RESOLVES on it — folding strangers' identities
1263
+ # (and their facts) onto the person. Measured live (pentatonic-team): a "Johann
1264
+ # Boedecker" node carrying reservations.nyc@acehotel.com + unrelated gmails, all
1265
+ # from STUDENT-distilled `doc` events. This guard keeps an email alias on a person
1266
+ # only when its local-part plausibly relates to the person's name; clear bystanders
1267
+ # are dropped BEFORE resolution/storage. Conservative: dropping a genuine but
1268
+ # non-name-matching alias is a mild loss; keeping a bystander is a confabulation
1269
+ # source. Flag-revertible (EMAIL_ALIAS_GUARD=false). Persons only — org domain
1270
+ # stamping is untouched.
1271
+ EMAIL_ALIAS_GUARD = _envflag("EMAIL_ALIAS_GUARD", "true")
1272
+ _ALIAS_NONALPHA = re.compile(r"[^a-z]")
1273
+ _ALIAS_SPLIT = re.compile(r"[^a-z]+")
1274
+
1275
+
1276
+ def _email_plausibly_belongs(person_name: str, email: str) -> bool:
1277
+ """True ⇒ keep this email as an alias of `person_name`; False ⇒ drop (clear
1278
+ bystander). Match = a name token appears in the local-part, OR the local-part
1279
+ is the person's initials. Pure + deterministic."""
1280
+ local = email.split("@", 1)[0].lower()
1281
+ local_letters = _ALIAS_NONALPHA.sub("", local)
1282
+ name_tokens = {t for t in _ALIAS_SPLIT.split(person_name.lower()) if len(t) >= 2}
1283
+ if not name_tokens or not local_letters:
1284
+ return True # nothing to check against — don't over-filter
1285
+ if any(nt in local_letters for nt in name_tokens):
1286
+ return True # johann@…, johann.boedecker@…, boedeckerjohann@…
1287
+ initials = "".join(t[0] for t in person_name.lower().split() if t[:1].isalpha())
1288
+ if len(initials) >= 2 and local_letters in (initials, initials[::-1]):
1289
+ return True # jb@… / bj@… for "Johann Boedecker"
1290
+ return False
1291
+
1292
+
1192
1293
  def upsert_entities(
1193
1294
  conn: psycopg.Connection,
1194
1295
  arena: str,
@@ -1281,6 +1382,21 @@ def upsert_entities(
1281
1382
  continue
1282
1383
  aliases = [a for a in (e.get("aliases") or []) if a]
1283
1384
 
1385
+ # Email-alias guard (persons only): drop bystander emails the LLM
1386
+ # stapled on from a co-occurring doc/thread, BEFORE they reach
1387
+ # resolution or storage. See _email_plausibly_belongs.
1388
+ if EMAIL_ALIAS_GUARD and etype == "person" and aliases:
1389
+ kept = []
1390
+ for a in aliases:
1391
+ if "@" in a and " " not in a and not _email_plausibly_belongs(name, a):
1392
+ log.info(
1393
+ f"alias-guard: dropped bystander email {a!r} from "
1394
+ f"person {name!r} (arena={arena})"
1395
+ )
1396
+ continue
1397
+ kept.append(a)
1398
+ aliases = kept
1399
+
1284
1400
  # Hard-key stamps for THIS entity, merged onto the node's attributes
1285
1401
  # and (for domain) into the resolution aliases. Adding domain to
1286
1402
  # aliases before forms are computed is deliberate — that's what makes