@reconcrap/people-network-memory 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +476 -0
  2. package/docs/mcp_tools.md +138 -0
  3. package/harness_adapters/openclaw/mcp.managed.unix.template.json +25 -0
  4. package/harness_adapters/openclaw/mcp.managed.windows.template.json +26 -0
  5. package/harness_adapters/openclaw/mcp.template.json +14 -0
  6. package/harness_adapters/openclaw/ppl/SKILL.md +114 -0
  7. package/package.json +30 -0
  8. package/pyproject.toml +26 -0
  9. package/scripts/install_windows.ps1 +92 -0
  10. package/scripts/npm/people-memory.js +276 -0
  11. package/scripts/people_memory_bootstrap.py +247 -0
  12. package/scripts/run_graphiti_live_from_liepin.ps1 +87 -0
  13. package/scripts/run_tests_with_artifacts.ps1 +307 -0
  14. package/src/people_network_memory/__init__.py +6 -0
  15. package/src/people_network_memory/application/__init__.py +16 -0
  16. package/src/people_network_memory/application/normalization.py +1441 -0
  17. package/src/people_network_memory/application/services.py +921 -0
  18. package/src/people_network_memory/cli.py +1212 -0
  19. package/src/people_network_memory/config.py +268 -0
  20. package/src/people_network_memory/domain/__init__.py +55 -0
  21. package/src/people_network_memory/domain/identity.py +77 -0
  22. package/src/people_network_memory/domain/models.py +355 -0
  23. package/src/people_network_memory/fixtures/__init__.py +6 -0
  24. package/src/people_network_memory/fixtures/eval.py +398 -0
  25. package/src/people_network_memory/fixtures/extractor_eval.py +364 -0
  26. package/src/people_network_memory/fixtures/generator.py +290 -0
  27. package/src/people_network_memory/fixtures/report.py +252 -0
  28. package/src/people_network_memory/graphiti_adapter/__init__.py +9 -0
  29. package/src/people_network_memory/graphiti_adapter/episode_formatter.py +70 -0
  30. package/src/people_network_memory/graphiti_adapter/graphiti_store.py +655 -0
  31. package/src/people_network_memory/graphiti_adapter/indexer.py +194 -0
  32. package/src/people_network_memory/graphiti_adapter/ontology.py +68 -0
  33. package/src/people_network_memory/harness_adapters/__init__.py +2 -0
  34. package/src/people_network_memory/harness_adapters/openclaw/__init__.py +9 -0
  35. package/src/people_network_memory/harness_adapters/openclaw/installer.py +577 -0
  36. package/src/people_network_memory/harness_adapters/openclaw/integration_eval.py +508 -0
  37. package/src/people_network_memory/harness_adapters/openclaw/smoke.py +292 -0
  38. package/src/people_network_memory/infrastructure/__init__.py +2 -0
  39. package/src/people_network_memory/infrastructure/archive_backup.py +171 -0
  40. package/src/people_network_memory/infrastructure/diagnostics.py +171 -0
  41. package/src/people_network_memory/infrastructure/embeddings.py +155 -0
  42. package/src/people_network_memory/infrastructure/file_store.py +129 -0
  43. package/src/people_network_memory/infrastructure/graphiti_promotion.py +212 -0
  44. package/src/people_network_memory/infrastructure/id_generator.py +40 -0
  45. package/src/people_network_memory/infrastructure/in_memory_store.py +1008 -0
  46. package/src/people_network_memory/infrastructure/llm_extractor.py +476 -0
  47. package/src/people_network_memory/infrastructure/llm_identity_advisor.py +200 -0
  48. package/src/people_network_memory/infrastructure/llm_judge.py +162 -0
  49. package/src/people_network_memory/infrastructure/redaction.py +21 -0
  50. package/src/people_network_memory/infrastructure/release_check.py +186 -0
  51. package/src/people_network_memory/infrastructure/retrieval_intent.py +98 -0
  52. package/src/people_network_memory/infrastructure/semantic_index.py +262 -0
  53. package/src/people_network_memory/mcp_server/__init__.py +2 -0
  54. package/src/people_network_memory/mcp_server/contracts.py +85 -0
  55. package/src/people_network_memory/mcp_server/runtime.py +133 -0
  56. package/src/people_network_memory/mcp_server/tools.py +588 -0
  57. package/src/people_network_memory/ports/__init__.py +2 -0
  58. package/src/people_network_memory/ports/errors.py +25 -0
  59. package/src/people_network_memory/ports/interfaces.py +103 -0
  60. package/src/people_network_memory/projection/__init__.py +6 -0
  61. package/src/people_network_memory/projection/builders.py +46 -0
@@ -0,0 +1,1008 @@
1
+ """In-memory adapter used for tests, fixtures, and CLI test mode."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from datetime import datetime, timezone
8
+ from typing import Iterable
9
+
10
+ from people_network_memory.domain.models import (
11
+ AttributedClaim,
12
+ ContactMethod,
13
+ DirectFact,
14
+ EducationRecord,
15
+ Evidence,
16
+ FollowUpTask,
17
+ IdentityCandidate,
18
+ PersonMemoryRecord,
19
+ PersonRef,
20
+ RecordInteractionResult,
21
+ RetrievalItem,
22
+ ReviewItem,
23
+ SensitivityLabel,
24
+ SocialInteraction,
25
+ WorkHistoryRecord,
26
+ )
27
+ from people_network_memory.infrastructure.retrieval_intent import (
28
+ is_follow_up_query,
29
+ mentioned_query_target,
30
+ person_is_only_mentioned_target,
31
+ text_answers_mentioned_query,
32
+ )
33
+ from people_network_memory.ports.errors import PersistenceError
34
+ from people_network_memory.infrastructure.id_generator import SequentialIdGenerator
35
+
36
+
37
+ ASCII_TOKEN_RE = re.compile(r"[a-z0-9_]+", flags=re.IGNORECASE)
38
+ CJK_RUN_RE = re.compile(r"[\u4e00-\u9fff]+")
39
+ CJK_STOPWORDS = {"的", "了", "和", "与", "在"}
40
+ CJK_ALIAS_LEFT_BOUNDARY = set("叫找查问和与跟给向说提约帮为")
41
+ CJK_ALIAS_RIGHT_BOUNDARY = set("是的在有做聊说问要还关喜哪什现吗呢呀吧了")
42
+
43
+
44
+ class InMemoryPeopleStore:
45
+ def __init__(self) -> None:
46
+ self._ids = SequentialIdGenerator()
47
+ self.people: dict[str, PersonMemoryRecord] = {}
48
+ self.review_items: list[ReviewItem] = []
49
+ self.interactions: dict[str, SocialInteraction] = {}
50
+ self.evidence: dict[str, Evidence] = {}
51
+
52
+ @property
53
+ def id_generator(self) -> SequentialIdGenerator:
54
+ return self._ids
55
+
56
+ def find_identity_candidates(self, ref: PersonRef) -> list[IdentityCandidate]:
57
+ candidates: list[IdentityCandidate] = []
58
+ ref_names = [_norm(ref.label), *[_norm(alias) for alias in ref.aliases]]
59
+ ref_names = [name for name in ref_names if name]
60
+ explicit_name_matches = _explicit_name_match_ids(self.people.values(), ref_names)
61
+ for person in self.people.values():
62
+ labels = [person.display_name, *person.aliases]
63
+ if not explicit_name_matches:
64
+ labels.extend(_cjk_name_aliases(person.display_name))
65
+ normalized_labels = [_norm(label) for label in labels if _norm(label)]
66
+ exact_identifier = False
67
+ for contact in person.contacts:
68
+ if ref.email and contact.kind == "email" and _norm(contact.value) == _norm(ref.email):
69
+ exact_identifier = True
70
+ if ref.phone and contact.kind == "phone" and _digits(contact.value) == _digits(ref.phone):
71
+ exact_identifier = True
72
+ name_score = max(
73
+ (
74
+ _name_score(ref_name, candidate_name)
75
+ for ref_name in ref_names
76
+ for candidate_name in normalized_labels
77
+ ),
78
+ default=0.0,
79
+ )
80
+ exact_name_match = _norm(ref.label) in normalized_labels
81
+ company_score = 0.0
82
+ if ref.company_hint:
83
+ company_score = max(
84
+ (
85
+ 0.2
86
+ for work in person.work_history
87
+ if _norm(ref.company_hint) in _norm(work.organization)
88
+ ),
89
+ default=0.0,
90
+ )
91
+ score = 1.0 if exact_identifier else min(0.95, name_score + company_score)
92
+ if exact_identifier or score >= 0.45:
93
+ candidates.append(
94
+ IdentityCandidate(
95
+ person_id=person.person_id,
96
+ display_name=person.display_name,
97
+ score=score,
98
+ evidence=labels,
99
+ exact_identifier_match=exact_identifier,
100
+ exact_name_match=exact_name_match,
101
+ )
102
+ )
103
+ return sorted(candidates, key=lambda item: item.score, reverse=True)[:5]
104
+
105
+ def save_interaction(
106
+ self, interaction: SocialInteraction, identity_map: dict[str, str | None]
107
+ ) -> RecordInteractionResult:
108
+ now = datetime.now(timezone.utc)
109
+ interaction_id = self._new_unique_id("interaction", self.interactions)
110
+ evidence = Evidence(
111
+ evidence_id=self._new_unique_id("evidence", self.evidence),
112
+ source_text=interaction.source_text,
113
+ recorded_at=interaction.occurred_at or now,
114
+ )
115
+ self.evidence[evidence.evidence_id] = evidence
116
+ self.interactions[interaction_id] = interaction
117
+ created: list[str] = []
118
+ updated: list[str] = []
119
+ all_refs = list(_iter_refs(interaction))
120
+ ref_to_id: dict[str, str] = {}
121
+ for ref in all_refs:
122
+ key = _ref_key(ref)
123
+ if key in ref_to_id:
124
+ continue
125
+ person_id = identity_map.get(key) or ref.person_id
126
+ if not person_id:
127
+ person_id = self._create_person(ref, evidence)
128
+ created.append(person_id)
129
+ elif person_id not in self.people:
130
+ person_id = self._create_person(ref, evidence, explicit_id=person_id)
131
+ created.append(person_id)
132
+ else:
133
+ updated.append(person_id)
134
+ self._append_unique_evidence(person_id, evidence)
135
+ self._merge_aliases(self.people[person_id], [ref.label, *ref.aliases])
136
+ ref_to_id[key] = person_id
137
+ attached_person_ids: set[str] = set()
138
+ for ref in all_refs:
139
+ person_id = ref_to_id[_ref_key(ref)]
140
+ if person_id in attached_person_ids:
141
+ continue
142
+ attached_person_ids.add(person_id)
143
+ record = self.people[person_id]
144
+ record.interactions.append(interaction)
145
+ self._append_unique_evidence(person_id, evidence)
146
+ self._attach_interaction_details(interaction, ref_to_id)
147
+ self._complete_matching_follow_ups(interaction, ref_to_id)
148
+ return RecordInteractionResult(
149
+ interaction_id=interaction_id,
150
+ created_people=sorted(set(created)),
151
+ updated_people=sorted(set(updated)),
152
+ person_ref_map=ref_to_id,
153
+ evidence=[evidence],
154
+ )
155
+
156
+ def get_person_memory(self, person_id: str) -> PersonMemoryRecord | None:
157
+ return self.people.get(person_id)
158
+
159
+ def find_person_memory_by_name(self, name: str) -> list[PersonMemoryRecord]:
160
+ normalized = _norm(name)
161
+ if not normalized:
162
+ return []
163
+ matches: list[PersonMemoryRecord] = []
164
+ explicit_name_matches = _explicit_name_match_ids(
165
+ self.people.values(), [normalized]
166
+ )
167
+ for person in self.people.values():
168
+ labels = [person.display_name, *person.aliases]
169
+ if not explicit_name_matches:
170
+ labels.extend(_cjk_name_aliases(person.display_name))
171
+ if any(_norm(label) == normalized for label in labels):
172
+ matches.append(person)
173
+ return sorted(matches, key=lambda person: person.display_name.casefold())
174
+
175
+ def merge_people(
176
+ self, *, source_person_id: str, target_person_id: str, note: str | None = None
177
+ ) -> PersonMemoryRecord:
178
+ if source_person_id == target_person_id:
179
+ raise PersistenceError("source_person_id and target_person_id must differ")
180
+ source = self.people.get(source_person_id)
181
+ target = self.people.get(target_person_id)
182
+ if source is None:
183
+ raise PersistenceError(f"Source person not found: {source_person_id}")
184
+ if target is None:
185
+ raise PersistenceError(f"Target person not found: {target_person_id}")
186
+
187
+ self._merge_aliases(target, [source.display_name, *source.aliases])
188
+ _extend_unique(target.work_history, source.work_history)
189
+ _extend_unique(target.education, source.education)
190
+ _extend_unique(target.interests, source.interests)
191
+ _extend_unique(target.important_dates, source.important_dates)
192
+ _extend_unique(target.contacts, source.contacts)
193
+ _extend_unique(target.preferences, source.preferences)
194
+ _extend_unique(target.direct_facts, source.direct_facts)
195
+ _extend_unique(target.attributed_claims, source.attributed_claims)
196
+ _extend_unique(target.relationships, source.relationships)
197
+ _extend_unique(target.follow_ups, source.follow_ups)
198
+ _extend_unique(target.evidence, source.evidence)
199
+
200
+ moved_interactions = list(source.interactions)
201
+ for interaction in moved_interactions:
202
+ _remap_interaction(interaction, source_person_id, target_person_id)
203
+ _extend_unique(target.interactions, moved_interactions)
204
+ for interaction in self.interactions.values():
205
+ _remap_interaction(interaction, source_person_id, target_person_id)
206
+ _remap_person_record(target, source_person_id, target_person_id)
207
+ del self.people[source_person_id]
208
+ return target
209
+
210
+ def search(
211
+ self,
212
+ query: str,
213
+ *,
214
+ limit: int = 10,
215
+ include_sensitive: bool = False,
216
+ mode: str = "recall",
217
+ ) -> list[RetrievalItem]:
218
+ query_tokens = _tokens(query)
219
+ mention_target = mentioned_query_target(query)
220
+ follow_up_intent = is_follow_up_query(query)
221
+ explicit_query_person_ids = (
222
+ set()
223
+ if mention_target
224
+ else _explicit_query_alias_match_ids(self.people.values(), query)
225
+ )
226
+ ambiguous_name_person_ids = (
227
+ set()
228
+ if (
229
+ explicit_query_person_ids
230
+ or mention_target
231
+ or follow_up_intent
232
+ )
233
+ else _ambiguous_query_display_name_match_ids(self.people.values(), query)
234
+ )
235
+ items: list[RetrievalItem] = []
236
+ for person in self.people.values():
237
+ if (
238
+ explicit_query_person_ids
239
+ and person.person_id not in explicit_query_person_ids
240
+ ):
241
+ continue
242
+ if (
243
+ ambiguous_name_person_ids
244
+ and person.person_id not in ambiguous_name_person_ids
245
+ ):
246
+ continue
247
+ items.extend(
248
+ self._person_items(
249
+ person,
250
+ query_tokens,
251
+ include_sensitive,
252
+ mode,
253
+ mention_target=mention_target,
254
+ follow_up_intent=follow_up_intent,
255
+ profile_only=bool(ambiguous_name_person_ids),
256
+ )
257
+ )
258
+ return sorted(items, key=lambda item: item.score, reverse=True)[:limit]
259
+
260
+ def add_review_item(self, item: ReviewItem) -> None:
261
+ self.review_items.append(item)
262
+
263
+ def list_review_items(self, *, status: str | None = None) -> list[ReviewItem]:
264
+ if status is None:
265
+ return list(self.review_items)
266
+ return [item for item in self.review_items if item.status == status]
267
+
268
+ def update_review_item(self, item: ReviewItem) -> ReviewItem:
269
+ for index, existing in enumerate(self.review_items):
270
+ if existing.review_id == item.review_id:
271
+ self.review_items[index] = item
272
+ return item
273
+ raise PersistenceError(f"Review item not found: {item.review_id}")
274
+
275
+ def export_data(self) -> dict[str, object]:
276
+ return {
277
+ "people": [person.model_dump(mode="json") for person in self.people.values()],
278
+ "review_items": [item.model_dump(mode="json") for item in self.review_items],
279
+ "interactions": [
280
+ interaction.model_dump(mode="json")
281
+ for interaction in self.interactions.values()
282
+ ],
283
+ }
284
+
285
+ def _create_person(
286
+ self, ref: PersonRef, evidence: Evidence, *, explicit_id: str | None = None
287
+ ) -> str:
288
+ person_id = explicit_id or self._new_unique_id("person", self.people)
289
+ if person_id in self.people:
290
+ raise PersistenceError(f"Person id already exists: {person_id}")
291
+ aliases = [alias for alias in ref.aliases if alias != ref.label]
292
+ self.people[person_id] = PersonMemoryRecord(
293
+ person_id=person_id,
294
+ display_name=ref.label,
295
+ aliases=aliases,
296
+ evidence=[evidence],
297
+ )
298
+ return person_id
299
+
300
+ def _new_unique_id(self, prefix: str, existing: object) -> str:
301
+ if isinstance(existing, dict):
302
+ existing_ids = set(existing)
303
+ else:
304
+ existing_ids = set(existing)
305
+ while True:
306
+ item_id = self._ids.new_id(prefix)
307
+ if item_id not in existing_ids:
308
+ return item_id
309
+
310
+ def _append_unique_evidence(self, person_id: str, evidence: Evidence) -> None:
311
+ record = self.people[person_id]
312
+ if all(item.evidence_id != evidence.evidence_id for item in record.evidence):
313
+ record.evidence.append(evidence)
314
+
315
+ def _merge_aliases(self, record: PersonMemoryRecord, aliases: list[str]) -> None:
316
+ for alias in aliases:
317
+ alias = alias.strip()
318
+ if alias and alias != record.display_name and alias not in record.aliases:
319
+ record.aliases.append(alias)
320
+
321
+ def _attach_interaction_details(
322
+ self, interaction: SocialInteraction, ref_to_id: dict[str, str]
323
+ ) -> None:
324
+ for fact in interaction.direct_facts:
325
+ person_id = ref_to_id.get(_ref_key(fact.subject))
326
+ if person_id:
327
+ resolved_fact = fact.model_copy(
328
+ update={"subject": self._resolved_ref(fact.subject, ref_to_id)}
329
+ )
330
+ self.people[person_id].direct_facts.append(resolved_fact)
331
+ self._derive_profile_fact(self.people[person_id], resolved_fact)
332
+ for claim in interaction.attributed_claims:
333
+ resolved_claim = claim.model_copy(
334
+ update={
335
+ "speaker": (
336
+ self._resolved_ref(claim.speaker, ref_to_id)
337
+ if claim.speaker
338
+ else None
339
+ ),
340
+ "subject": (
341
+ self._resolved_ref(claim.subject, ref_to_id)
342
+ if claim.subject
343
+ else None
344
+ ),
345
+ }
346
+ )
347
+ if claim.subject:
348
+ person_id = ref_to_id.get(_ref_key(claim.subject))
349
+ if person_id:
350
+ self.people[person_id].attributed_claims.append(resolved_claim)
351
+ if claim.speaker:
352
+ speaker_id = ref_to_id.get(_ref_key(claim.speaker))
353
+ if (
354
+ speaker_id
355
+ and resolved_claim not in self.people[speaker_id].attributed_claims
356
+ ):
357
+ self.people[speaker_id].attributed_claims.append(resolved_claim)
358
+ for follow_up in interaction.follow_ups:
359
+ resolved_follow_up = follow_up.model_copy(
360
+ update={
361
+ "related_people": [
362
+ self._resolved_ref(ref, ref_to_id)
363
+ for ref in follow_up.related_people
364
+ ]
365
+ }
366
+ )
367
+ for ref in follow_up.related_people:
368
+ person_id = ref_to_id.get(_ref_key(ref))
369
+ if person_id:
370
+ self.people[person_id].follow_ups.append(resolved_follow_up)
371
+ for relationship in interaction.relationships:
372
+ resolved_relationship = relationship.model_copy(
373
+ update={
374
+ "source": self._resolved_ref(relationship.source, ref_to_id),
375
+ "target": self._resolved_ref(relationship.target, ref_to_id),
376
+ }
377
+ )
378
+ for ref in [relationship.source, relationship.target]:
379
+ person_id = ref_to_id.get(_ref_key(ref))
380
+ if person_id:
381
+ self.people[person_id].relationships.append(resolved_relationship)
382
+ for topic in interaction.topics:
383
+ for participant in interaction.participants:
384
+ person_id = ref_to_id.get(_ref_key(participant.person))
385
+ if person_id and topic not in self.people[person_id].interests:
386
+ self.people[person_id].interests.append(topic)
387
+
388
+ def _complete_matching_follow_ups(
389
+ self, interaction: SocialInteraction, ref_to_id: dict[str, str]
390
+ ) -> None:
391
+ if not _looks_like_follow_up_completion(interaction.source_text):
392
+ return
393
+ candidate_person_ids = {
394
+ person_id
395
+ for ref in _iter_refs(interaction)
396
+ for person_id in [ref_to_id.get(_ref_key(ref))]
397
+ if person_id
398
+ }
399
+ for person_id in candidate_person_ids:
400
+ record = self.people.get(person_id)
401
+ if record is None:
402
+ continue
403
+ for index, follow_up in enumerate(record.follow_ups):
404
+ if not _completion_matches_follow_up(interaction.source_text, follow_up):
405
+ continue
406
+ record.follow_ups[index] = follow_up.model_copy(update={"status": "done"})
407
+ _mark_matching_follow_ups_done(record.interactions, interaction.source_text)
408
+ _mark_matching_follow_ups_done(self.interactions.values(), interaction.source_text)
409
+
410
+ def _resolved_ref(
411
+ self, ref: PersonRef, ref_to_id: dict[str, str]
412
+ ) -> PersonRef:
413
+ person_id = ref_to_id.get(_ref_key(ref))
414
+ record = self.people.get(person_id or "")
415
+ if record is None:
416
+ return ref
417
+ aliases = [alias for alias in record.aliases if alias != record.display_name]
418
+ return ref.model_copy(update={"label": record.display_name, "aliases": aliases})
419
+
420
+ def _derive_profile_fact(self, record: PersonMemoryRecord, fact: DirectFact) -> None:
421
+ predicate = fact.predicate.lower()
422
+ value = fact.value.strip()
423
+ if predicate in {"works_at", "worked_at", "work", "current_job"}:
424
+ work = WorkHistoryRecord(
425
+ organization=value,
426
+ role=_metadata_str(fact.metadata, "role"),
427
+ is_current=predicate != "worked_at",
428
+ )
429
+ _append_unique(record.work_history, work)
430
+ return
431
+ if predicate in {"studied_at", "school", "education"}:
432
+ education = EducationRecord(
433
+ school=value,
434
+ degree=_metadata_str(fact.metadata, "degree"),
435
+ major=_metadata_str(fact.metadata, "major"),
436
+ )
437
+ _append_unique(record.education, education)
438
+ return
439
+ if predicate in {"email", "phone", "linkedin", "wechat"}:
440
+ contact = ContactMethod(kind=predicate, value=value)
441
+ _append_unique(record.contacts, contact)
442
+ return
443
+ if predicate in {"interest", "interested_in", "hobby"} and value not in record.interests:
444
+ record.interests.append(value)
445
+ if predicate in {"preference", "likes"} and value not in record.preferences:
446
+ record.preferences.append(value)
447
+
448
+ def _person_items(
449
+ self,
450
+ person: PersonMemoryRecord,
451
+ query_tokens: set[str],
452
+ include_sensitive: bool,
453
+ mode: str,
454
+ *,
455
+ mention_target: str | None,
456
+ follow_up_intent: bool,
457
+ profile_only: bool = False,
458
+ ) -> list[RetrievalItem]:
459
+ candidates: list[tuple[str, str, str, list[SensitivityLabel], bool]] = []
460
+ if not follow_up_intent and not mention_target:
461
+ candidates.append(("person", person.display_name, person.display_name, [], False))
462
+ profile_summary = _profile_summary_text(person)
463
+ if profile_summary and not follow_up_intent and not mention_target:
464
+ candidates.append(
465
+ (
466
+ "fact",
467
+ f"Profile summary for {person.display_name}",
468
+ profile_summary,
469
+ [],
470
+ False,
471
+ )
472
+ )
473
+ for interaction in person.interactions:
474
+ if mention_target:
475
+ mentioned_pairs = [
476
+ (
477
+ mention.person.label,
478
+ mention.mentioned_by.label if mention.mentioned_by else None,
479
+ )
480
+ for mention in interaction.mentioned_people
481
+ ]
482
+ if person_is_only_mentioned_target(
483
+ person_label=person.display_name,
484
+ target_label=mention_target,
485
+ mentioned_pairs=mentioned_pairs,
486
+ ):
487
+ continue
488
+ if not text_answers_mentioned_query(interaction.source_text, mention_target):
489
+ continue
490
+ if follow_up_intent:
491
+ continue
492
+ interaction_sensitivity = _interaction_sensitivity(interaction)
493
+ candidates.append(
494
+ (
495
+ "interaction",
496
+ f"Interaction with {person.display_name}",
497
+ interaction.source_text,
498
+ interaction_sensitivity,
499
+ False,
500
+ )
501
+ )
502
+ for fact in person.direct_facts:
503
+ if follow_up_intent or mention_target:
504
+ continue
505
+ candidates.append(
506
+ (
507
+ "fact",
508
+ f"{person.display_name}: {fact.predicate}",
509
+ f"{fact.subject.label} {fact.predicate} {fact.value}",
510
+ fact.sensitivity,
511
+ False,
512
+ )
513
+ )
514
+ for claim in person.attributed_claims:
515
+ if follow_up_intent:
516
+ continue
517
+ if mention_target and not text_answers_mentioned_query(claim.claim_text, mention_target):
518
+ continue
519
+ candidates.append(
520
+ (
521
+ "claim",
522
+ f"Claim involving {person.display_name}",
523
+ claim.claim_text,
524
+ claim.sensitivity,
525
+ True,
526
+ )
527
+ )
528
+ for relationship in person.relationships:
529
+ if follow_up_intent or mention_target:
530
+ continue
531
+ candidates.append(
532
+ (
533
+ "fact",
534
+ f"Relationship involving {person.display_name}",
535
+ _relationship_text(relationship),
536
+ relationship.sensitivity,
537
+ False,
538
+ )
539
+ )
540
+ for follow_up in person.follow_ups:
541
+ if follow_up.status != "open":
542
+ continue
543
+ if mention_target:
544
+ continue
545
+ candidates.append(
546
+ (
547
+ "follow_up",
548
+ f"Follow-up for {person.display_name}",
549
+ follow_up.description,
550
+ follow_up.sensitivity,
551
+ False,
552
+ )
553
+ )
554
+ if profile_only:
555
+ candidates = [
556
+ candidate
557
+ for candidate in candidates
558
+ if candidate[0] == "fact" and candidate[1].startswith("Profile summary")
559
+ ]
560
+ items: list[RetrievalItem] = []
561
+ for kind, title, text, sensitivity, secondhand in candidates:
562
+ if _blocked_by_sensitivity(sensitivity, include_sensitive):
563
+ continue
564
+ if kind == "person" and not query_tokens.issubset(_tokens(person.display_name)):
565
+ continue
566
+ score = _score(query_tokens, text, person.display_name)
567
+ if kind == "follow_up" and follow_up_intent:
568
+ score += 2.0
569
+ if title.startswith("Profile summary") and query_tokens.issubset(_tokens(text)):
570
+ score += 2.0
571
+ if mention_target and text_answers_mentioned_query(text, mention_target):
572
+ score += 1.5
573
+ if mode == "brief" and kind in {"interaction", "follow_up", "claim"}:
574
+ score += 0.25
575
+ if score <= 0:
576
+ continue
577
+ items.append(
578
+ RetrievalItem(
579
+ item_id=f"{person.person_id}:{kind}:{abs(hash(text))}",
580
+ kind=kind, # type: ignore[arg-type]
581
+ title=title,
582
+ matched_text=text,
583
+ score=score,
584
+ why_matched="Matched query terms and related person context.",
585
+ person_ids=[person.person_id],
586
+ sensitivity=sensitivity,
587
+ evidence=_result_evidence(person, include_sensitive),
588
+ is_secondhand=secondhand,
589
+ )
590
+ )
591
+ return items
592
+
593
+
594
+ def _iter_refs(interaction: SocialInteraction) -> Iterable[PersonRef]:
595
+ for participant in interaction.participants:
596
+ yield participant.person
597
+ for mentioned in interaction.mentioned_people:
598
+ yield mentioned.person
599
+ if mentioned.mentioned_by:
600
+ yield mentioned.mentioned_by
601
+ for claim in interaction.attributed_claims:
602
+ if claim.speaker:
603
+ yield claim.speaker
604
+ if claim.subject:
605
+ yield claim.subject
606
+ for fact in interaction.direct_facts:
607
+ yield fact.subject
608
+ for follow_up in interaction.follow_ups:
609
+ yield from follow_up.related_people
610
+ for relationship in interaction.relationships:
611
+ yield relationship.source
612
+ yield relationship.target
613
+
614
+
615
+ def _ref_key(ref: PersonRef) -> str:
616
+ return ref.person_id or ref.email or ref.phone or ref.label
617
+
618
+
619
+ def _norm(value: str | None) -> str:
620
+ return (value or "").strip().lower()
621
+
622
+
623
+ def _digits(value: str | None) -> str:
624
+ return re.sub(r"\D+", "", value or "")
625
+
626
+
627
+ def _name_score(ref_name: str, candidate_name: str) -> float:
628
+ if not ref_name or not candidate_name:
629
+ return 0.0
630
+ if ref_name == candidate_name:
631
+ return 0.9
632
+ if ref_name in candidate_name or candidate_name in ref_name:
633
+ return 0.65
634
+ ref_parts = set(ref_name.split())
635
+ candidate_parts = set(candidate_name.split())
636
+ if ref_parts & candidate_parts:
637
+ return 0.5
638
+ return 0.0
639
+
640
+
641
+ def _explicit_name_match_ids(
642
+ people: Iterable[PersonMemoryRecord], ref_names: list[str]
643
+ ) -> set[str]:
644
+ ref_name_set = {name for name in ref_names if name}
645
+ if not ref_name_set:
646
+ return set()
647
+ return {
648
+ person.person_id
649
+ for person in people
650
+ if any(
651
+ _norm(label) in ref_name_set
652
+ for label in [person.display_name, *person.aliases]
653
+ )
654
+ }
655
+
656
+
657
+ def _explicit_query_alias_match_ids(
658
+ people: Iterable[PersonMemoryRecord], query: str
659
+ ) -> set[str]:
660
+ normalized_query = _norm(query)
661
+ if not normalized_query:
662
+ return set()
663
+ return {
664
+ person.person_id
665
+ for person in people
666
+ if any(
667
+ _query_mentions_explicit_label(normalized_query, _norm(alias))
668
+ for alias in person.aliases
669
+ )
670
+ }
671
+
672
+
673
+ def _ambiguous_query_display_name_match_ids(
674
+ people: Iterable[PersonMemoryRecord], query: str
675
+ ) -> set[str]:
676
+ normalized_query = _norm(query)
677
+ if not normalized_query:
678
+ return set()
679
+ matches_by_name: dict[str, set[str]] = {}
680
+ for person in people:
681
+ name = _norm(person.display_name)
682
+ if not _query_mentions_explicit_label(normalized_query, name):
683
+ continue
684
+ matches_by_name.setdefault(name, set()).add(person.person_id)
685
+ duplicate_matches: set[str] = set()
686
+ for person_ids in matches_by_name.values():
687
+ if len(person_ids) > 1:
688
+ duplicate_matches.update(person_ids)
689
+ return duplicate_matches
690
+
691
+
692
+ def _query_mentions_explicit_label(query: str, label: str) -> bool:
693
+ if not query or not label:
694
+ return False
695
+ if query == label:
696
+ return True
697
+ if re.fullmatch(r"[\u4e00-\u9fff]+", label):
698
+ return len(label) >= 2 and _query_mentions_cjk_alias(query, label)
699
+ return bool(re.search(rf"\b{re.escape(label)}\b", query, flags=re.IGNORECASE))
700
+
701
+
702
+ def _query_mentions_cjk_alias(query: str, label: str) -> bool:
703
+ for match in re.finditer(re.escape(label), query):
704
+ left = query[match.start() - 1] if match.start() > 0 else ""
705
+ right = query[match.end()] if match.end() < len(query) else ""
706
+ left_ok = (
707
+ not left
708
+ or not re.fullmatch(r"[\u4e00-\u9fff]", left)
709
+ or left in CJK_ALIAS_LEFT_BOUNDARY
710
+ )
711
+ right_ok = (
712
+ not right
713
+ or not re.fullmatch(r"[\u4e00-\u9fff]", right)
714
+ or right in CJK_ALIAS_RIGHT_BOUNDARY
715
+ )
716
+ if left_ok and right_ok:
717
+ return True
718
+ return False
719
+
720
+
721
+ def _cjk_name_aliases(label: str) -> list[str]:
722
+ compact = re.sub(r"\s+", "", label.strip())
723
+ if compact.startswith("测试"):
724
+ compact = compact.removeprefix("测试")
725
+ if not re.fullmatch(r"[\u4e00-\u9fff]{3,4}", compact):
726
+ return []
727
+ aliases = [compact, compact[-2:]]
728
+ return list(dict.fromkeys(alias for alias in aliases if alias and alias != label))
729
+
730
+
731
+ def _tokens(text: str) -> set[str]:
732
+ lowered = text.lower()
733
+ tokens = {token for token in ASCII_TOKEN_RE.findall(lowered)}
734
+ for run in CJK_RUN_RE.findall(lowered):
735
+ chars = [char for char in run if char not in CJK_STOPWORDS]
736
+ tokens.update(chars)
737
+ tokens.update(_ngrams(chars, 2))
738
+ tokens.update(_ngrams(chars, 3))
739
+ return tokens
740
+
741
+
742
+ def _looks_like_follow_up_completion(text: str) -> bool:
743
+ return bool(
744
+ re.search(
745
+ r"(?:已经|已|刚刚|刚)?(?:给|把)[^。;;,.,]{1,40}"
746
+ r"(?:发了|发送了|发过去了|分享了|转发了)"
747
+ r"|(?:完成|办完|搞定)[^。;;,.,]{0,40}"
748
+ r"|\b(?:sent|shared|emailed|completed|finished|done)\b",
749
+ text,
750
+ flags=re.IGNORECASE,
751
+ )
752
+ )
753
+
754
+
755
+ def _completion_matches_follow_up(source_text: str, follow_up: FollowUpTask) -> bool:
756
+ if follow_up.status != "open":
757
+ return False
758
+ source_objects = _send_objects(source_text)
759
+ follow_up_objects = _send_objects(follow_up.description)
760
+ if source_objects and follow_up_objects:
761
+ return any(
762
+ _similar_task_object(source_object, follow_up_object)
763
+ for source_object in source_objects
764
+ for follow_up_object in follow_up_objects
765
+ )
766
+ return _task_token_overlap(source_text, follow_up.description) >= 0.58
767
+
768
+
769
+ def _mark_matching_follow_ups_done(
770
+ interactions: Iterable[SocialInteraction], source_text: str
771
+ ) -> None:
772
+ for interaction in interactions:
773
+ updated: list[FollowUpTask] = []
774
+ changed = False
775
+ for follow_up in interaction.follow_ups:
776
+ if _completion_matches_follow_up(source_text, follow_up):
777
+ updated.append(follow_up.model_copy(update={"status": "done"}))
778
+ changed = True
779
+ else:
780
+ updated.append(follow_up)
781
+ if changed:
782
+ interaction.follow_ups = updated
783
+
784
+
785
+ def _send_objects(text: str) -> list[str]:
786
+ objects: list[str] = []
787
+ patterns = [
788
+ r"(?:发|发送)(?:了|过|过去了)?(?:一份|一封|一个|些|一下)?(?P<object>[^,。,.;;]+)",
789
+ r"(?:sent|shared|emailed)\s+(?P<object>[^.;,]+)",
790
+ ]
791
+ for pattern in patterns:
792
+ for match in re.finditer(pattern, text, flags=re.IGNORECASE):
793
+ item = _normalize_task_object(match.group("object"))
794
+ if item:
795
+ objects.append(item)
796
+ return list(dict.fromkeys(objects))
797
+
798
+
799
+ def _normalize_task_object(text: str) -> str:
800
+ text = text.casefold().strip()
801
+ text = re.sub(r"^(?:给|给他|给她|给他们|给她们|给我|to\s+)", "", text).strip()
802
+ text = re.sub(r"^(?:一份|一封|一个|些|一下|the\s+|a\s+|an\s+)", "", text).strip()
803
+ return re.sub(r"\s+", " ", text.strip(" \t\r\n,.;:,。!?!?\"'"))
804
+
805
+
806
+ def _similar_task_object(left: str, right: str) -> bool:
807
+ left = _normalize_task_object(left)
808
+ right = _normalize_task_object(right)
809
+ if not left or not right:
810
+ return False
811
+ if left in right or right in left:
812
+ return True
813
+ return _task_token_overlap(left, right) >= 0.72
814
+
815
+
816
+ def _task_token_overlap(left: str, right: str) -> float:
817
+ left_tokens = _task_match_tokens(left)
818
+ right_tokens = _task_match_tokens(right)
819
+ if not left_tokens or not right_tokens:
820
+ return 0.0
821
+ overlap = left_tokens & right_tokens
822
+ return len(overlap) / min(len(left_tokens), len(right_tokens))
823
+
824
+
825
+ def _task_match_tokens(text: str) -> set[str]:
826
+ lowered = text.casefold()
827
+ tokens = {
828
+ token
829
+ for token in ASCII_TOKEN_RE.findall(lowered)
830
+ if len(token) >= 3 and token not in {"the", "and", "for", "with", "sent", "done"}
831
+ }
832
+ for run in CJK_RUN_RE.findall(lowered):
833
+ cleaned = re.sub(
834
+ r"(今天|昨天|明天|后天|下周[一二三四五六日天]?|上午|下午|晚上|"
835
+ r"已经|刚刚|一份|一封|一个|给|把|发了|发送了|发过去了|分享了|转发了|完成|搞定)",
836
+ "",
837
+ run,
838
+ )
839
+ chars = [char for char in cleaned if char not in CJK_STOPWORDS and not char.isdigit()]
840
+ tokens.update(_ngrams(chars, 2))
841
+ tokens.update(_ngrams(chars, 3))
842
+ tokens.update(_ngrams(chars, 4))
843
+ return tokens
844
+
845
+
846
+ def _score(query_tokens: set[str], text: str, person_name: str) -> float:
847
+ text_tokens = _tokens(text + " " + person_name)
848
+ if not query_tokens:
849
+ return 0.0
850
+ overlap = query_tokens & text_tokens
851
+ substring_bonus = min(
852
+ 0.6,
853
+ sum(1 for token in query_tokens if token and token in text.lower()) * 0.15,
854
+ )
855
+ return (len(overlap) / max(len(query_tokens), 1)) + substring_bonus
856
+
857
+
858
+ def _profile_summary_text(person: PersonMemoryRecord) -> str:
859
+ parts = [f"Person: {person.display_name}"]
860
+ if person.aliases:
861
+ parts.append("Aliases: " + "; ".join(person.aliases))
862
+ if person.work_history:
863
+ parts.append(
864
+ "Works at: "
865
+ + "; ".join(
866
+ work.organization
867
+ + (f" as {work.role}" if work.role else "")
868
+ for work in person.work_history
869
+ )
870
+ )
871
+ if person.education:
872
+ parts.append("Studied at: " + "; ".join(edu.school for edu in person.education))
873
+ if person.interests:
874
+ parts.append("Interests and discussed topics: " + "; ".join(person.interests))
875
+ if person.preferences:
876
+ parts.append("Preferences: " + "; ".join(person.preferences))
877
+ if len(parts) == 1:
878
+ return ""
879
+ return ". ".join(parts) + "."
880
+
881
+
882
+ def _relationship_text(relationship: object) -> str:
883
+ source = getattr(getattr(relationship, "source", None), "label", "Someone")
884
+ target = getattr(getattr(relationship, "target", None), "label", "someone")
885
+ relationship_type = str(getattr(relationship, "relationship_type", "associated_with"))
886
+ return f"{source} {relationship_type.replace('_', ' ')} {target}"
887
+
888
+
889
+ def _ngrams(chars: list[str], size: int) -> list[str]:
890
+ if len(chars) < size:
891
+ return []
892
+ return ["".join(chars[index : index + size]) for index in range(len(chars) - size + 1)]
893
+
894
+
895
+ def _blocked_by_sensitivity(
896
+ sensitivity: list[SensitivityLabel], include_sensitive: bool
897
+ ) -> bool:
898
+ if include_sensitive:
899
+ return False
900
+ blocked = {
901
+ SensitivityLabel.SENSITIVE,
902
+ SensitivityLabel.DO_NOT_SURFACE_UNPROMPTED,
903
+ }
904
+ return any(label in blocked for label in sensitivity)
905
+
906
+
907
+ def _interaction_sensitivity(interaction: SocialInteraction) -> list[SensitivityLabel]:
908
+ sensitivity = list(interaction.sensitivity)
909
+ for claim in interaction.attributed_claims:
910
+ for label in claim.sensitivity:
911
+ if label not in sensitivity:
912
+ sensitivity.append(label)
913
+ for fact in interaction.direct_facts:
914
+ for label in fact.sensitivity:
915
+ if label not in sensitivity:
916
+ sensitivity.append(label)
917
+ return sensitivity
918
+
919
+
920
+ def _result_evidence(
921
+ person: PersonMemoryRecord, include_sensitive: bool
922
+ ) -> list[Evidence]:
923
+ if include_sensitive:
924
+ return person.evidence[-3:]
925
+ blocked_sources = {
926
+ interaction.source_text
927
+ for interaction in person.interactions
928
+ if _blocked_by_sensitivity(_interaction_sensitivity(interaction), include_sensitive)
929
+ }
930
+ safe = [
931
+ evidence for evidence in person.evidence if evidence.source_text not in blocked_sources
932
+ ]
933
+ if safe:
934
+ return safe[-3:]
935
+ return person.evidence[-3:]
936
+
937
+
938
+ def _extend_unique(target: list, source: list) -> None:
939
+ existing = {_stable_key(item) for item in target}
940
+ for item in source:
941
+ key = _stable_key(item)
942
+ if key not in existing:
943
+ target.append(item)
944
+ existing.add(key)
945
+
946
+
947
+ def _append_unique(target: list, item: object) -> None:
948
+ key = _stable_key(item)
949
+ if key not in {_stable_key(existing) for existing in target}:
950
+ target.append(item)
951
+
952
+
953
+ def _stable_key(item: object) -> str:
954
+ if hasattr(item, "model_dump"):
955
+ return json.dumps(item.model_dump(mode="json"), ensure_ascii=False, sort_keys=True)
956
+ return json.dumps(item, ensure_ascii=False, sort_keys=True)
957
+
958
+
959
+ def _metadata_str(metadata: dict[str, object], key: str) -> str | None:
960
+ value = metadata.get(key)
961
+ return value if isinstance(value, str) and value.strip() else None
962
+
963
+
964
+ def _remap_person_record(record: PersonMemoryRecord, source_person_id: str, target_person_id: str) -> None:
965
+ for interaction in record.interactions:
966
+ _remap_interaction(interaction, source_person_id, target_person_id)
967
+ for fact in record.direct_facts:
968
+ _remap_ref(fact.subject, source_person_id, target_person_id)
969
+ for claim in record.attributed_claims:
970
+ if claim.speaker:
971
+ _remap_ref(claim.speaker, source_person_id, target_person_id)
972
+ if claim.subject:
973
+ _remap_ref(claim.subject, source_person_id, target_person_id)
974
+ for follow_up in record.follow_ups:
975
+ for ref in follow_up.related_people:
976
+ _remap_ref(ref, source_person_id, target_person_id)
977
+ for relationship in record.relationships:
978
+ _remap_ref(relationship.source, source_person_id, target_person_id)
979
+ _remap_ref(relationship.target, source_person_id, target_person_id)
980
+
981
+
982
+ def _remap_interaction(
983
+ interaction: SocialInteraction, source_person_id: str, target_person_id: str
984
+ ) -> None:
985
+ for participant in interaction.participants:
986
+ _remap_ref(participant.person, source_person_id, target_person_id)
987
+ for mentioned in interaction.mentioned_people:
988
+ _remap_ref(mentioned.person, source_person_id, target_person_id)
989
+ if mentioned.mentioned_by:
990
+ _remap_ref(mentioned.mentioned_by, source_person_id, target_person_id)
991
+ for claim in interaction.attributed_claims:
992
+ if claim.speaker:
993
+ _remap_ref(claim.speaker, source_person_id, target_person_id)
994
+ if claim.subject:
995
+ _remap_ref(claim.subject, source_person_id, target_person_id)
996
+ for fact in interaction.direct_facts:
997
+ _remap_ref(fact.subject, source_person_id, target_person_id)
998
+ for follow_up in interaction.follow_ups:
999
+ for ref in follow_up.related_people:
1000
+ _remap_ref(ref, source_person_id, target_person_id)
1001
+ for relationship in interaction.relationships:
1002
+ _remap_ref(relationship.source, source_person_id, target_person_id)
1003
+ _remap_ref(relationship.target, source_person_id, target_person_id)
1004
+
1005
+
1006
+ def _remap_ref(ref: PersonRef, source_person_id: str, target_person_id: str) -> None:
1007
+ if ref.person_id == source_person_id:
1008
+ ref.person_id = target_person_id