@reconcrap/people-network-memory 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +476 -0
- package/docs/mcp_tools.md +138 -0
- package/harness_adapters/openclaw/mcp.managed.unix.template.json +25 -0
- package/harness_adapters/openclaw/mcp.managed.windows.template.json +26 -0
- package/harness_adapters/openclaw/mcp.template.json +14 -0
- package/harness_adapters/openclaw/ppl/SKILL.md +114 -0
- package/package.json +30 -0
- package/pyproject.toml +26 -0
- package/scripts/install_windows.ps1 +92 -0
- package/scripts/npm/people-memory.js +276 -0
- package/scripts/people_memory_bootstrap.py +247 -0
- package/scripts/run_graphiti_live_from_liepin.ps1 +87 -0
- package/scripts/run_tests_with_artifacts.ps1 +307 -0
- package/src/people_network_memory/__init__.py +6 -0
- package/src/people_network_memory/application/__init__.py +16 -0
- package/src/people_network_memory/application/normalization.py +1441 -0
- package/src/people_network_memory/application/services.py +921 -0
- package/src/people_network_memory/cli.py +1212 -0
- package/src/people_network_memory/config.py +268 -0
- package/src/people_network_memory/domain/__init__.py +55 -0
- package/src/people_network_memory/domain/identity.py +77 -0
- package/src/people_network_memory/domain/models.py +355 -0
- package/src/people_network_memory/fixtures/__init__.py +6 -0
- package/src/people_network_memory/fixtures/eval.py +398 -0
- package/src/people_network_memory/fixtures/extractor_eval.py +364 -0
- package/src/people_network_memory/fixtures/generator.py +290 -0
- package/src/people_network_memory/fixtures/report.py +252 -0
- package/src/people_network_memory/graphiti_adapter/__init__.py +9 -0
- package/src/people_network_memory/graphiti_adapter/episode_formatter.py +70 -0
- package/src/people_network_memory/graphiti_adapter/graphiti_store.py +655 -0
- package/src/people_network_memory/graphiti_adapter/indexer.py +194 -0
- package/src/people_network_memory/graphiti_adapter/ontology.py +68 -0
- package/src/people_network_memory/harness_adapters/__init__.py +2 -0
- package/src/people_network_memory/harness_adapters/openclaw/__init__.py +9 -0
- package/src/people_network_memory/harness_adapters/openclaw/installer.py +577 -0
- package/src/people_network_memory/harness_adapters/openclaw/integration_eval.py +508 -0
- package/src/people_network_memory/harness_adapters/openclaw/smoke.py +292 -0
- package/src/people_network_memory/infrastructure/__init__.py +2 -0
- package/src/people_network_memory/infrastructure/archive_backup.py +171 -0
- package/src/people_network_memory/infrastructure/diagnostics.py +171 -0
- package/src/people_network_memory/infrastructure/embeddings.py +155 -0
- package/src/people_network_memory/infrastructure/file_store.py +129 -0
- package/src/people_network_memory/infrastructure/graphiti_promotion.py +212 -0
- package/src/people_network_memory/infrastructure/id_generator.py +40 -0
- package/src/people_network_memory/infrastructure/in_memory_store.py +1008 -0
- package/src/people_network_memory/infrastructure/llm_extractor.py +476 -0
- package/src/people_network_memory/infrastructure/llm_identity_advisor.py +200 -0
- package/src/people_network_memory/infrastructure/llm_judge.py +162 -0
- package/src/people_network_memory/infrastructure/redaction.py +21 -0
- package/src/people_network_memory/infrastructure/release_check.py +186 -0
- package/src/people_network_memory/infrastructure/retrieval_intent.py +98 -0
- package/src/people_network_memory/infrastructure/semantic_index.py +262 -0
- package/src/people_network_memory/mcp_server/__init__.py +2 -0
- package/src/people_network_memory/mcp_server/contracts.py +85 -0
- package/src/people_network_memory/mcp_server/runtime.py +133 -0
- package/src/people_network_memory/mcp_server/tools.py +588 -0
- package/src/people_network_memory/ports/__init__.py +2 -0
- package/src/people_network_memory/ports/errors.py +25 -0
- package/src/people_network_memory/ports/interfaces.py +103 -0
- package/src/people_network_memory/projection/__init__.py +6 -0
- package/src/people_network_memory/projection/builders.py +46 -0
|
@@ -0,0 +1,1008 @@
|
|
|
1
|
+
"""In-memory adapter used for tests, fixtures, and CLI test mode."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import Iterable
|
|
9
|
+
|
|
10
|
+
from people_network_memory.domain.models import (
|
|
11
|
+
AttributedClaim,
|
|
12
|
+
ContactMethod,
|
|
13
|
+
DirectFact,
|
|
14
|
+
EducationRecord,
|
|
15
|
+
Evidence,
|
|
16
|
+
FollowUpTask,
|
|
17
|
+
IdentityCandidate,
|
|
18
|
+
PersonMemoryRecord,
|
|
19
|
+
PersonRef,
|
|
20
|
+
RecordInteractionResult,
|
|
21
|
+
RetrievalItem,
|
|
22
|
+
ReviewItem,
|
|
23
|
+
SensitivityLabel,
|
|
24
|
+
SocialInteraction,
|
|
25
|
+
WorkHistoryRecord,
|
|
26
|
+
)
|
|
27
|
+
from people_network_memory.infrastructure.retrieval_intent import (
|
|
28
|
+
is_follow_up_query,
|
|
29
|
+
mentioned_query_target,
|
|
30
|
+
person_is_only_mentioned_target,
|
|
31
|
+
text_answers_mentioned_query,
|
|
32
|
+
)
|
|
33
|
+
from people_network_memory.ports.errors import PersistenceError
|
|
34
|
+
from people_network_memory.infrastructure.id_generator import SequentialIdGenerator
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
ASCII_TOKEN_RE = re.compile(r"[a-z0-9_]+", flags=re.IGNORECASE)
|
|
38
|
+
CJK_RUN_RE = re.compile(r"[\u4e00-\u9fff]+")
|
|
39
|
+
CJK_STOPWORDS = {"的", "了", "和", "与", "在"}
|
|
40
|
+
CJK_ALIAS_LEFT_BOUNDARY = set("叫找查问和与跟给向说提约帮为")
|
|
41
|
+
CJK_ALIAS_RIGHT_BOUNDARY = set("是的在有做聊说问要还关喜哪什现吗呢呀吧了")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class InMemoryPeopleStore:
|
|
45
|
+
def __init__(self) -> None:
|
|
46
|
+
self._ids = SequentialIdGenerator()
|
|
47
|
+
self.people: dict[str, PersonMemoryRecord] = {}
|
|
48
|
+
self.review_items: list[ReviewItem] = []
|
|
49
|
+
self.interactions: dict[str, SocialInteraction] = {}
|
|
50
|
+
self.evidence: dict[str, Evidence] = {}
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def id_generator(self) -> SequentialIdGenerator:
|
|
54
|
+
return self._ids
|
|
55
|
+
|
|
56
|
+
def find_identity_candidates(self, ref: PersonRef) -> list[IdentityCandidate]:
|
|
57
|
+
candidates: list[IdentityCandidate] = []
|
|
58
|
+
ref_names = [_norm(ref.label), *[_norm(alias) for alias in ref.aliases]]
|
|
59
|
+
ref_names = [name for name in ref_names if name]
|
|
60
|
+
explicit_name_matches = _explicit_name_match_ids(self.people.values(), ref_names)
|
|
61
|
+
for person in self.people.values():
|
|
62
|
+
labels = [person.display_name, *person.aliases]
|
|
63
|
+
if not explicit_name_matches:
|
|
64
|
+
labels.extend(_cjk_name_aliases(person.display_name))
|
|
65
|
+
normalized_labels = [_norm(label) for label in labels if _norm(label)]
|
|
66
|
+
exact_identifier = False
|
|
67
|
+
for contact in person.contacts:
|
|
68
|
+
if ref.email and contact.kind == "email" and _norm(contact.value) == _norm(ref.email):
|
|
69
|
+
exact_identifier = True
|
|
70
|
+
if ref.phone and contact.kind == "phone" and _digits(contact.value) == _digits(ref.phone):
|
|
71
|
+
exact_identifier = True
|
|
72
|
+
name_score = max(
|
|
73
|
+
(
|
|
74
|
+
_name_score(ref_name, candidate_name)
|
|
75
|
+
for ref_name in ref_names
|
|
76
|
+
for candidate_name in normalized_labels
|
|
77
|
+
),
|
|
78
|
+
default=0.0,
|
|
79
|
+
)
|
|
80
|
+
exact_name_match = _norm(ref.label) in normalized_labels
|
|
81
|
+
company_score = 0.0
|
|
82
|
+
if ref.company_hint:
|
|
83
|
+
company_score = max(
|
|
84
|
+
(
|
|
85
|
+
0.2
|
|
86
|
+
for work in person.work_history
|
|
87
|
+
if _norm(ref.company_hint) in _norm(work.organization)
|
|
88
|
+
),
|
|
89
|
+
default=0.0,
|
|
90
|
+
)
|
|
91
|
+
score = 1.0 if exact_identifier else min(0.95, name_score + company_score)
|
|
92
|
+
if exact_identifier or score >= 0.45:
|
|
93
|
+
candidates.append(
|
|
94
|
+
IdentityCandidate(
|
|
95
|
+
person_id=person.person_id,
|
|
96
|
+
display_name=person.display_name,
|
|
97
|
+
score=score,
|
|
98
|
+
evidence=labels,
|
|
99
|
+
exact_identifier_match=exact_identifier,
|
|
100
|
+
exact_name_match=exact_name_match,
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
return sorted(candidates, key=lambda item: item.score, reverse=True)[:5]
|
|
104
|
+
|
|
105
|
+
def save_interaction(
|
|
106
|
+
self, interaction: SocialInteraction, identity_map: dict[str, str | None]
|
|
107
|
+
) -> RecordInteractionResult:
|
|
108
|
+
now = datetime.now(timezone.utc)
|
|
109
|
+
interaction_id = self._new_unique_id("interaction", self.interactions)
|
|
110
|
+
evidence = Evidence(
|
|
111
|
+
evidence_id=self._new_unique_id("evidence", self.evidence),
|
|
112
|
+
source_text=interaction.source_text,
|
|
113
|
+
recorded_at=interaction.occurred_at or now,
|
|
114
|
+
)
|
|
115
|
+
self.evidence[evidence.evidence_id] = evidence
|
|
116
|
+
self.interactions[interaction_id] = interaction
|
|
117
|
+
created: list[str] = []
|
|
118
|
+
updated: list[str] = []
|
|
119
|
+
all_refs = list(_iter_refs(interaction))
|
|
120
|
+
ref_to_id: dict[str, str] = {}
|
|
121
|
+
for ref in all_refs:
|
|
122
|
+
key = _ref_key(ref)
|
|
123
|
+
if key in ref_to_id:
|
|
124
|
+
continue
|
|
125
|
+
person_id = identity_map.get(key) or ref.person_id
|
|
126
|
+
if not person_id:
|
|
127
|
+
person_id = self._create_person(ref, evidence)
|
|
128
|
+
created.append(person_id)
|
|
129
|
+
elif person_id not in self.people:
|
|
130
|
+
person_id = self._create_person(ref, evidence, explicit_id=person_id)
|
|
131
|
+
created.append(person_id)
|
|
132
|
+
else:
|
|
133
|
+
updated.append(person_id)
|
|
134
|
+
self._append_unique_evidence(person_id, evidence)
|
|
135
|
+
self._merge_aliases(self.people[person_id], [ref.label, *ref.aliases])
|
|
136
|
+
ref_to_id[key] = person_id
|
|
137
|
+
attached_person_ids: set[str] = set()
|
|
138
|
+
for ref in all_refs:
|
|
139
|
+
person_id = ref_to_id[_ref_key(ref)]
|
|
140
|
+
if person_id in attached_person_ids:
|
|
141
|
+
continue
|
|
142
|
+
attached_person_ids.add(person_id)
|
|
143
|
+
record = self.people[person_id]
|
|
144
|
+
record.interactions.append(interaction)
|
|
145
|
+
self._append_unique_evidence(person_id, evidence)
|
|
146
|
+
self._attach_interaction_details(interaction, ref_to_id)
|
|
147
|
+
self._complete_matching_follow_ups(interaction, ref_to_id)
|
|
148
|
+
return RecordInteractionResult(
|
|
149
|
+
interaction_id=interaction_id,
|
|
150
|
+
created_people=sorted(set(created)),
|
|
151
|
+
updated_people=sorted(set(updated)),
|
|
152
|
+
person_ref_map=ref_to_id,
|
|
153
|
+
evidence=[evidence],
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def get_person_memory(self, person_id: str) -> PersonMemoryRecord | None:
|
|
157
|
+
return self.people.get(person_id)
|
|
158
|
+
|
|
159
|
+
def find_person_memory_by_name(self, name: str) -> list[PersonMemoryRecord]:
|
|
160
|
+
normalized = _norm(name)
|
|
161
|
+
if not normalized:
|
|
162
|
+
return []
|
|
163
|
+
matches: list[PersonMemoryRecord] = []
|
|
164
|
+
explicit_name_matches = _explicit_name_match_ids(
|
|
165
|
+
self.people.values(), [normalized]
|
|
166
|
+
)
|
|
167
|
+
for person in self.people.values():
|
|
168
|
+
labels = [person.display_name, *person.aliases]
|
|
169
|
+
if not explicit_name_matches:
|
|
170
|
+
labels.extend(_cjk_name_aliases(person.display_name))
|
|
171
|
+
if any(_norm(label) == normalized for label in labels):
|
|
172
|
+
matches.append(person)
|
|
173
|
+
return sorted(matches, key=lambda person: person.display_name.casefold())
|
|
174
|
+
|
|
175
|
+
def merge_people(
|
|
176
|
+
self, *, source_person_id: str, target_person_id: str, note: str | None = None
|
|
177
|
+
) -> PersonMemoryRecord:
|
|
178
|
+
if source_person_id == target_person_id:
|
|
179
|
+
raise PersistenceError("source_person_id and target_person_id must differ")
|
|
180
|
+
source = self.people.get(source_person_id)
|
|
181
|
+
target = self.people.get(target_person_id)
|
|
182
|
+
if source is None:
|
|
183
|
+
raise PersistenceError(f"Source person not found: {source_person_id}")
|
|
184
|
+
if target is None:
|
|
185
|
+
raise PersistenceError(f"Target person not found: {target_person_id}")
|
|
186
|
+
|
|
187
|
+
self._merge_aliases(target, [source.display_name, *source.aliases])
|
|
188
|
+
_extend_unique(target.work_history, source.work_history)
|
|
189
|
+
_extend_unique(target.education, source.education)
|
|
190
|
+
_extend_unique(target.interests, source.interests)
|
|
191
|
+
_extend_unique(target.important_dates, source.important_dates)
|
|
192
|
+
_extend_unique(target.contacts, source.contacts)
|
|
193
|
+
_extend_unique(target.preferences, source.preferences)
|
|
194
|
+
_extend_unique(target.direct_facts, source.direct_facts)
|
|
195
|
+
_extend_unique(target.attributed_claims, source.attributed_claims)
|
|
196
|
+
_extend_unique(target.relationships, source.relationships)
|
|
197
|
+
_extend_unique(target.follow_ups, source.follow_ups)
|
|
198
|
+
_extend_unique(target.evidence, source.evidence)
|
|
199
|
+
|
|
200
|
+
moved_interactions = list(source.interactions)
|
|
201
|
+
for interaction in moved_interactions:
|
|
202
|
+
_remap_interaction(interaction, source_person_id, target_person_id)
|
|
203
|
+
_extend_unique(target.interactions, moved_interactions)
|
|
204
|
+
for interaction in self.interactions.values():
|
|
205
|
+
_remap_interaction(interaction, source_person_id, target_person_id)
|
|
206
|
+
_remap_person_record(target, source_person_id, target_person_id)
|
|
207
|
+
del self.people[source_person_id]
|
|
208
|
+
return target
|
|
209
|
+
|
|
210
|
+
def search(
|
|
211
|
+
self,
|
|
212
|
+
query: str,
|
|
213
|
+
*,
|
|
214
|
+
limit: int = 10,
|
|
215
|
+
include_sensitive: bool = False,
|
|
216
|
+
mode: str = "recall",
|
|
217
|
+
) -> list[RetrievalItem]:
|
|
218
|
+
query_tokens = _tokens(query)
|
|
219
|
+
mention_target = mentioned_query_target(query)
|
|
220
|
+
follow_up_intent = is_follow_up_query(query)
|
|
221
|
+
explicit_query_person_ids = (
|
|
222
|
+
set()
|
|
223
|
+
if mention_target
|
|
224
|
+
else _explicit_query_alias_match_ids(self.people.values(), query)
|
|
225
|
+
)
|
|
226
|
+
ambiguous_name_person_ids = (
|
|
227
|
+
set()
|
|
228
|
+
if (
|
|
229
|
+
explicit_query_person_ids
|
|
230
|
+
or mention_target
|
|
231
|
+
or follow_up_intent
|
|
232
|
+
)
|
|
233
|
+
else _ambiguous_query_display_name_match_ids(self.people.values(), query)
|
|
234
|
+
)
|
|
235
|
+
items: list[RetrievalItem] = []
|
|
236
|
+
for person in self.people.values():
|
|
237
|
+
if (
|
|
238
|
+
explicit_query_person_ids
|
|
239
|
+
and person.person_id not in explicit_query_person_ids
|
|
240
|
+
):
|
|
241
|
+
continue
|
|
242
|
+
if (
|
|
243
|
+
ambiguous_name_person_ids
|
|
244
|
+
and person.person_id not in ambiguous_name_person_ids
|
|
245
|
+
):
|
|
246
|
+
continue
|
|
247
|
+
items.extend(
|
|
248
|
+
self._person_items(
|
|
249
|
+
person,
|
|
250
|
+
query_tokens,
|
|
251
|
+
include_sensitive,
|
|
252
|
+
mode,
|
|
253
|
+
mention_target=mention_target,
|
|
254
|
+
follow_up_intent=follow_up_intent,
|
|
255
|
+
profile_only=bool(ambiguous_name_person_ids),
|
|
256
|
+
)
|
|
257
|
+
)
|
|
258
|
+
return sorted(items, key=lambda item: item.score, reverse=True)[:limit]
|
|
259
|
+
|
|
260
|
+
def add_review_item(self, item: ReviewItem) -> None:
|
|
261
|
+
self.review_items.append(item)
|
|
262
|
+
|
|
263
|
+
def list_review_items(self, *, status: str | None = None) -> list[ReviewItem]:
|
|
264
|
+
if status is None:
|
|
265
|
+
return list(self.review_items)
|
|
266
|
+
return [item for item in self.review_items if item.status == status]
|
|
267
|
+
|
|
268
|
+
def update_review_item(self, item: ReviewItem) -> ReviewItem:
|
|
269
|
+
for index, existing in enumerate(self.review_items):
|
|
270
|
+
if existing.review_id == item.review_id:
|
|
271
|
+
self.review_items[index] = item
|
|
272
|
+
return item
|
|
273
|
+
raise PersistenceError(f"Review item not found: {item.review_id}")
|
|
274
|
+
|
|
275
|
+
def export_data(self) -> dict[str, object]:
|
|
276
|
+
return {
|
|
277
|
+
"people": [person.model_dump(mode="json") for person in self.people.values()],
|
|
278
|
+
"review_items": [item.model_dump(mode="json") for item in self.review_items],
|
|
279
|
+
"interactions": [
|
|
280
|
+
interaction.model_dump(mode="json")
|
|
281
|
+
for interaction in self.interactions.values()
|
|
282
|
+
],
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
def _create_person(
|
|
286
|
+
self, ref: PersonRef, evidence: Evidence, *, explicit_id: str | None = None
|
|
287
|
+
) -> str:
|
|
288
|
+
person_id = explicit_id or self._new_unique_id("person", self.people)
|
|
289
|
+
if person_id in self.people:
|
|
290
|
+
raise PersistenceError(f"Person id already exists: {person_id}")
|
|
291
|
+
aliases = [alias for alias in ref.aliases if alias != ref.label]
|
|
292
|
+
self.people[person_id] = PersonMemoryRecord(
|
|
293
|
+
person_id=person_id,
|
|
294
|
+
display_name=ref.label,
|
|
295
|
+
aliases=aliases,
|
|
296
|
+
evidence=[evidence],
|
|
297
|
+
)
|
|
298
|
+
return person_id
|
|
299
|
+
|
|
300
|
+
def _new_unique_id(self, prefix: str, existing: object) -> str:
|
|
301
|
+
if isinstance(existing, dict):
|
|
302
|
+
existing_ids = set(existing)
|
|
303
|
+
else:
|
|
304
|
+
existing_ids = set(existing)
|
|
305
|
+
while True:
|
|
306
|
+
item_id = self._ids.new_id(prefix)
|
|
307
|
+
if item_id not in existing_ids:
|
|
308
|
+
return item_id
|
|
309
|
+
|
|
310
|
+
def _append_unique_evidence(self, person_id: str, evidence: Evidence) -> None:
|
|
311
|
+
record = self.people[person_id]
|
|
312
|
+
if all(item.evidence_id != evidence.evidence_id for item in record.evidence):
|
|
313
|
+
record.evidence.append(evidence)
|
|
314
|
+
|
|
315
|
+
def _merge_aliases(self, record: PersonMemoryRecord, aliases: list[str]) -> None:
|
|
316
|
+
for alias in aliases:
|
|
317
|
+
alias = alias.strip()
|
|
318
|
+
if alias and alias != record.display_name and alias not in record.aliases:
|
|
319
|
+
record.aliases.append(alias)
|
|
320
|
+
|
|
321
|
+
def _attach_interaction_details(
|
|
322
|
+
self, interaction: SocialInteraction, ref_to_id: dict[str, str]
|
|
323
|
+
) -> None:
|
|
324
|
+
for fact in interaction.direct_facts:
|
|
325
|
+
person_id = ref_to_id.get(_ref_key(fact.subject))
|
|
326
|
+
if person_id:
|
|
327
|
+
resolved_fact = fact.model_copy(
|
|
328
|
+
update={"subject": self._resolved_ref(fact.subject, ref_to_id)}
|
|
329
|
+
)
|
|
330
|
+
self.people[person_id].direct_facts.append(resolved_fact)
|
|
331
|
+
self._derive_profile_fact(self.people[person_id], resolved_fact)
|
|
332
|
+
for claim in interaction.attributed_claims:
|
|
333
|
+
resolved_claim = claim.model_copy(
|
|
334
|
+
update={
|
|
335
|
+
"speaker": (
|
|
336
|
+
self._resolved_ref(claim.speaker, ref_to_id)
|
|
337
|
+
if claim.speaker
|
|
338
|
+
else None
|
|
339
|
+
),
|
|
340
|
+
"subject": (
|
|
341
|
+
self._resolved_ref(claim.subject, ref_to_id)
|
|
342
|
+
if claim.subject
|
|
343
|
+
else None
|
|
344
|
+
),
|
|
345
|
+
}
|
|
346
|
+
)
|
|
347
|
+
if claim.subject:
|
|
348
|
+
person_id = ref_to_id.get(_ref_key(claim.subject))
|
|
349
|
+
if person_id:
|
|
350
|
+
self.people[person_id].attributed_claims.append(resolved_claim)
|
|
351
|
+
if claim.speaker:
|
|
352
|
+
speaker_id = ref_to_id.get(_ref_key(claim.speaker))
|
|
353
|
+
if (
|
|
354
|
+
speaker_id
|
|
355
|
+
and resolved_claim not in self.people[speaker_id].attributed_claims
|
|
356
|
+
):
|
|
357
|
+
self.people[speaker_id].attributed_claims.append(resolved_claim)
|
|
358
|
+
for follow_up in interaction.follow_ups:
|
|
359
|
+
resolved_follow_up = follow_up.model_copy(
|
|
360
|
+
update={
|
|
361
|
+
"related_people": [
|
|
362
|
+
self._resolved_ref(ref, ref_to_id)
|
|
363
|
+
for ref in follow_up.related_people
|
|
364
|
+
]
|
|
365
|
+
}
|
|
366
|
+
)
|
|
367
|
+
for ref in follow_up.related_people:
|
|
368
|
+
person_id = ref_to_id.get(_ref_key(ref))
|
|
369
|
+
if person_id:
|
|
370
|
+
self.people[person_id].follow_ups.append(resolved_follow_up)
|
|
371
|
+
for relationship in interaction.relationships:
|
|
372
|
+
resolved_relationship = relationship.model_copy(
|
|
373
|
+
update={
|
|
374
|
+
"source": self._resolved_ref(relationship.source, ref_to_id),
|
|
375
|
+
"target": self._resolved_ref(relationship.target, ref_to_id),
|
|
376
|
+
}
|
|
377
|
+
)
|
|
378
|
+
for ref in [relationship.source, relationship.target]:
|
|
379
|
+
person_id = ref_to_id.get(_ref_key(ref))
|
|
380
|
+
if person_id:
|
|
381
|
+
self.people[person_id].relationships.append(resolved_relationship)
|
|
382
|
+
for topic in interaction.topics:
|
|
383
|
+
for participant in interaction.participants:
|
|
384
|
+
person_id = ref_to_id.get(_ref_key(participant.person))
|
|
385
|
+
if person_id and topic not in self.people[person_id].interests:
|
|
386
|
+
self.people[person_id].interests.append(topic)
|
|
387
|
+
|
|
388
|
+
def _complete_matching_follow_ups(
|
|
389
|
+
self, interaction: SocialInteraction, ref_to_id: dict[str, str]
|
|
390
|
+
) -> None:
|
|
391
|
+
if not _looks_like_follow_up_completion(interaction.source_text):
|
|
392
|
+
return
|
|
393
|
+
candidate_person_ids = {
|
|
394
|
+
person_id
|
|
395
|
+
for ref in _iter_refs(interaction)
|
|
396
|
+
for person_id in [ref_to_id.get(_ref_key(ref))]
|
|
397
|
+
if person_id
|
|
398
|
+
}
|
|
399
|
+
for person_id in candidate_person_ids:
|
|
400
|
+
record = self.people.get(person_id)
|
|
401
|
+
if record is None:
|
|
402
|
+
continue
|
|
403
|
+
for index, follow_up in enumerate(record.follow_ups):
|
|
404
|
+
if not _completion_matches_follow_up(interaction.source_text, follow_up):
|
|
405
|
+
continue
|
|
406
|
+
record.follow_ups[index] = follow_up.model_copy(update={"status": "done"})
|
|
407
|
+
_mark_matching_follow_ups_done(record.interactions, interaction.source_text)
|
|
408
|
+
_mark_matching_follow_ups_done(self.interactions.values(), interaction.source_text)
|
|
409
|
+
|
|
410
|
+
def _resolved_ref(
|
|
411
|
+
self, ref: PersonRef, ref_to_id: dict[str, str]
|
|
412
|
+
) -> PersonRef:
|
|
413
|
+
person_id = ref_to_id.get(_ref_key(ref))
|
|
414
|
+
record = self.people.get(person_id or "")
|
|
415
|
+
if record is None:
|
|
416
|
+
return ref
|
|
417
|
+
aliases = [alias for alias in record.aliases if alias != record.display_name]
|
|
418
|
+
return ref.model_copy(update={"label": record.display_name, "aliases": aliases})
|
|
419
|
+
|
|
420
|
+
def _derive_profile_fact(self, record: PersonMemoryRecord, fact: DirectFact) -> None:
|
|
421
|
+
predicate = fact.predicate.lower()
|
|
422
|
+
value = fact.value.strip()
|
|
423
|
+
if predicate in {"works_at", "worked_at", "work", "current_job"}:
|
|
424
|
+
work = WorkHistoryRecord(
|
|
425
|
+
organization=value,
|
|
426
|
+
role=_metadata_str(fact.metadata, "role"),
|
|
427
|
+
is_current=predicate != "worked_at",
|
|
428
|
+
)
|
|
429
|
+
_append_unique(record.work_history, work)
|
|
430
|
+
return
|
|
431
|
+
if predicate in {"studied_at", "school", "education"}:
|
|
432
|
+
education = EducationRecord(
|
|
433
|
+
school=value,
|
|
434
|
+
degree=_metadata_str(fact.metadata, "degree"),
|
|
435
|
+
major=_metadata_str(fact.metadata, "major"),
|
|
436
|
+
)
|
|
437
|
+
_append_unique(record.education, education)
|
|
438
|
+
return
|
|
439
|
+
if predicate in {"email", "phone", "linkedin", "wechat"}:
|
|
440
|
+
contact = ContactMethod(kind=predicate, value=value)
|
|
441
|
+
_append_unique(record.contacts, contact)
|
|
442
|
+
return
|
|
443
|
+
if predicate in {"interest", "interested_in", "hobby"} and value not in record.interests:
|
|
444
|
+
record.interests.append(value)
|
|
445
|
+
if predicate in {"preference", "likes"} and value not in record.preferences:
|
|
446
|
+
record.preferences.append(value)
|
|
447
|
+
|
|
448
|
+
def _person_items(
|
|
449
|
+
self,
|
|
450
|
+
person: PersonMemoryRecord,
|
|
451
|
+
query_tokens: set[str],
|
|
452
|
+
include_sensitive: bool,
|
|
453
|
+
mode: str,
|
|
454
|
+
*,
|
|
455
|
+
mention_target: str | None,
|
|
456
|
+
follow_up_intent: bool,
|
|
457
|
+
profile_only: bool = False,
|
|
458
|
+
) -> list[RetrievalItem]:
|
|
459
|
+
candidates: list[tuple[str, str, str, list[SensitivityLabel], bool]] = []
|
|
460
|
+
if not follow_up_intent and not mention_target:
|
|
461
|
+
candidates.append(("person", person.display_name, person.display_name, [], False))
|
|
462
|
+
profile_summary = _profile_summary_text(person)
|
|
463
|
+
if profile_summary and not follow_up_intent and not mention_target:
|
|
464
|
+
candidates.append(
|
|
465
|
+
(
|
|
466
|
+
"fact",
|
|
467
|
+
f"Profile summary for {person.display_name}",
|
|
468
|
+
profile_summary,
|
|
469
|
+
[],
|
|
470
|
+
False,
|
|
471
|
+
)
|
|
472
|
+
)
|
|
473
|
+
for interaction in person.interactions:
|
|
474
|
+
if mention_target:
|
|
475
|
+
mentioned_pairs = [
|
|
476
|
+
(
|
|
477
|
+
mention.person.label,
|
|
478
|
+
mention.mentioned_by.label if mention.mentioned_by else None,
|
|
479
|
+
)
|
|
480
|
+
for mention in interaction.mentioned_people
|
|
481
|
+
]
|
|
482
|
+
if person_is_only_mentioned_target(
|
|
483
|
+
person_label=person.display_name,
|
|
484
|
+
target_label=mention_target,
|
|
485
|
+
mentioned_pairs=mentioned_pairs,
|
|
486
|
+
):
|
|
487
|
+
continue
|
|
488
|
+
if not text_answers_mentioned_query(interaction.source_text, mention_target):
|
|
489
|
+
continue
|
|
490
|
+
if follow_up_intent:
|
|
491
|
+
continue
|
|
492
|
+
interaction_sensitivity = _interaction_sensitivity(interaction)
|
|
493
|
+
candidates.append(
|
|
494
|
+
(
|
|
495
|
+
"interaction",
|
|
496
|
+
f"Interaction with {person.display_name}",
|
|
497
|
+
interaction.source_text,
|
|
498
|
+
interaction_sensitivity,
|
|
499
|
+
False,
|
|
500
|
+
)
|
|
501
|
+
)
|
|
502
|
+
for fact in person.direct_facts:
|
|
503
|
+
if follow_up_intent or mention_target:
|
|
504
|
+
continue
|
|
505
|
+
candidates.append(
|
|
506
|
+
(
|
|
507
|
+
"fact",
|
|
508
|
+
f"{person.display_name}: {fact.predicate}",
|
|
509
|
+
f"{fact.subject.label} {fact.predicate} {fact.value}",
|
|
510
|
+
fact.sensitivity,
|
|
511
|
+
False,
|
|
512
|
+
)
|
|
513
|
+
)
|
|
514
|
+
for claim in person.attributed_claims:
|
|
515
|
+
if follow_up_intent:
|
|
516
|
+
continue
|
|
517
|
+
if mention_target and not text_answers_mentioned_query(claim.claim_text, mention_target):
|
|
518
|
+
continue
|
|
519
|
+
candidates.append(
|
|
520
|
+
(
|
|
521
|
+
"claim",
|
|
522
|
+
f"Claim involving {person.display_name}",
|
|
523
|
+
claim.claim_text,
|
|
524
|
+
claim.sensitivity,
|
|
525
|
+
True,
|
|
526
|
+
)
|
|
527
|
+
)
|
|
528
|
+
for relationship in person.relationships:
|
|
529
|
+
if follow_up_intent or mention_target:
|
|
530
|
+
continue
|
|
531
|
+
candidates.append(
|
|
532
|
+
(
|
|
533
|
+
"fact",
|
|
534
|
+
f"Relationship involving {person.display_name}",
|
|
535
|
+
_relationship_text(relationship),
|
|
536
|
+
relationship.sensitivity,
|
|
537
|
+
False,
|
|
538
|
+
)
|
|
539
|
+
)
|
|
540
|
+
for follow_up in person.follow_ups:
|
|
541
|
+
if follow_up.status != "open":
|
|
542
|
+
continue
|
|
543
|
+
if mention_target:
|
|
544
|
+
continue
|
|
545
|
+
candidates.append(
|
|
546
|
+
(
|
|
547
|
+
"follow_up",
|
|
548
|
+
f"Follow-up for {person.display_name}",
|
|
549
|
+
follow_up.description,
|
|
550
|
+
follow_up.sensitivity,
|
|
551
|
+
False,
|
|
552
|
+
)
|
|
553
|
+
)
|
|
554
|
+
if profile_only:
|
|
555
|
+
candidates = [
|
|
556
|
+
candidate
|
|
557
|
+
for candidate in candidates
|
|
558
|
+
if candidate[0] == "fact" and candidate[1].startswith("Profile summary")
|
|
559
|
+
]
|
|
560
|
+
items: list[RetrievalItem] = []
|
|
561
|
+
for kind, title, text, sensitivity, secondhand in candidates:
|
|
562
|
+
if _blocked_by_sensitivity(sensitivity, include_sensitive):
|
|
563
|
+
continue
|
|
564
|
+
if kind == "person" and not query_tokens.issubset(_tokens(person.display_name)):
|
|
565
|
+
continue
|
|
566
|
+
score = _score(query_tokens, text, person.display_name)
|
|
567
|
+
if kind == "follow_up" and follow_up_intent:
|
|
568
|
+
score += 2.0
|
|
569
|
+
if title.startswith("Profile summary") and query_tokens.issubset(_tokens(text)):
|
|
570
|
+
score += 2.0
|
|
571
|
+
if mention_target and text_answers_mentioned_query(text, mention_target):
|
|
572
|
+
score += 1.5
|
|
573
|
+
if mode == "brief" and kind in {"interaction", "follow_up", "claim"}:
|
|
574
|
+
score += 0.25
|
|
575
|
+
if score <= 0:
|
|
576
|
+
continue
|
|
577
|
+
items.append(
|
|
578
|
+
RetrievalItem(
|
|
579
|
+
item_id=f"{person.person_id}:{kind}:{abs(hash(text))}",
|
|
580
|
+
kind=kind, # type: ignore[arg-type]
|
|
581
|
+
title=title,
|
|
582
|
+
matched_text=text,
|
|
583
|
+
score=score,
|
|
584
|
+
why_matched="Matched query terms and related person context.",
|
|
585
|
+
person_ids=[person.person_id],
|
|
586
|
+
sensitivity=sensitivity,
|
|
587
|
+
evidence=_result_evidence(person, include_sensitive),
|
|
588
|
+
is_secondhand=secondhand,
|
|
589
|
+
)
|
|
590
|
+
)
|
|
591
|
+
return items
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def _iter_refs(interaction: SocialInteraction) -> Iterable[PersonRef]:
|
|
595
|
+
for participant in interaction.participants:
|
|
596
|
+
yield participant.person
|
|
597
|
+
for mentioned in interaction.mentioned_people:
|
|
598
|
+
yield mentioned.person
|
|
599
|
+
if mentioned.mentioned_by:
|
|
600
|
+
yield mentioned.mentioned_by
|
|
601
|
+
for claim in interaction.attributed_claims:
|
|
602
|
+
if claim.speaker:
|
|
603
|
+
yield claim.speaker
|
|
604
|
+
if claim.subject:
|
|
605
|
+
yield claim.subject
|
|
606
|
+
for fact in interaction.direct_facts:
|
|
607
|
+
yield fact.subject
|
|
608
|
+
for follow_up in interaction.follow_ups:
|
|
609
|
+
yield from follow_up.related_people
|
|
610
|
+
for relationship in interaction.relationships:
|
|
611
|
+
yield relationship.source
|
|
612
|
+
yield relationship.target
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def _ref_key(ref: PersonRef) -> str:
|
|
616
|
+
return ref.person_id or ref.email or ref.phone or ref.label
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def _norm(value: str | None) -> str:
|
|
620
|
+
return (value or "").strip().lower()
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def _digits(value: str | None) -> str:
|
|
624
|
+
return re.sub(r"\D+", "", value or "")
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
def _name_score(ref_name: str, candidate_name: str) -> float:
|
|
628
|
+
if not ref_name or not candidate_name:
|
|
629
|
+
return 0.0
|
|
630
|
+
if ref_name == candidate_name:
|
|
631
|
+
return 0.9
|
|
632
|
+
if ref_name in candidate_name or candidate_name in ref_name:
|
|
633
|
+
return 0.65
|
|
634
|
+
ref_parts = set(ref_name.split())
|
|
635
|
+
candidate_parts = set(candidate_name.split())
|
|
636
|
+
if ref_parts & candidate_parts:
|
|
637
|
+
return 0.5
|
|
638
|
+
return 0.0
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def _explicit_name_match_ids(
|
|
642
|
+
people: Iterable[PersonMemoryRecord], ref_names: list[str]
|
|
643
|
+
) -> set[str]:
|
|
644
|
+
ref_name_set = {name for name in ref_names if name}
|
|
645
|
+
if not ref_name_set:
|
|
646
|
+
return set()
|
|
647
|
+
return {
|
|
648
|
+
person.person_id
|
|
649
|
+
for person in people
|
|
650
|
+
if any(
|
|
651
|
+
_norm(label) in ref_name_set
|
|
652
|
+
for label in [person.display_name, *person.aliases]
|
|
653
|
+
)
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def _explicit_query_alias_match_ids(
|
|
658
|
+
people: Iterable[PersonMemoryRecord], query: str
|
|
659
|
+
) -> set[str]:
|
|
660
|
+
normalized_query = _norm(query)
|
|
661
|
+
if not normalized_query:
|
|
662
|
+
return set()
|
|
663
|
+
return {
|
|
664
|
+
person.person_id
|
|
665
|
+
for person in people
|
|
666
|
+
if any(
|
|
667
|
+
_query_mentions_explicit_label(normalized_query, _norm(alias))
|
|
668
|
+
for alias in person.aliases
|
|
669
|
+
)
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def _ambiguous_query_display_name_match_ids(
|
|
674
|
+
people: Iterable[PersonMemoryRecord], query: str
|
|
675
|
+
) -> set[str]:
|
|
676
|
+
normalized_query = _norm(query)
|
|
677
|
+
if not normalized_query:
|
|
678
|
+
return set()
|
|
679
|
+
matches_by_name: dict[str, set[str]] = {}
|
|
680
|
+
for person in people:
|
|
681
|
+
name = _norm(person.display_name)
|
|
682
|
+
if not _query_mentions_explicit_label(normalized_query, name):
|
|
683
|
+
continue
|
|
684
|
+
matches_by_name.setdefault(name, set()).add(person.person_id)
|
|
685
|
+
duplicate_matches: set[str] = set()
|
|
686
|
+
for person_ids in matches_by_name.values():
|
|
687
|
+
if len(person_ids) > 1:
|
|
688
|
+
duplicate_matches.update(person_ids)
|
|
689
|
+
return duplicate_matches
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
def _query_mentions_explicit_label(query: str, label: str) -> bool:
|
|
693
|
+
if not query or not label:
|
|
694
|
+
return False
|
|
695
|
+
if query == label:
|
|
696
|
+
return True
|
|
697
|
+
if re.fullmatch(r"[\u4e00-\u9fff]+", label):
|
|
698
|
+
return len(label) >= 2 and _query_mentions_cjk_alias(query, label)
|
|
699
|
+
return bool(re.search(rf"\b{re.escape(label)}\b", query, flags=re.IGNORECASE))
|
|
700
|
+
|
|
701
|
+
|
|
702
|
+
def _query_mentions_cjk_alias(query: str, label: str) -> bool:
|
|
703
|
+
for match in re.finditer(re.escape(label), query):
|
|
704
|
+
left = query[match.start() - 1] if match.start() > 0 else ""
|
|
705
|
+
right = query[match.end()] if match.end() < len(query) else ""
|
|
706
|
+
left_ok = (
|
|
707
|
+
not left
|
|
708
|
+
or not re.fullmatch(r"[\u4e00-\u9fff]", left)
|
|
709
|
+
or left in CJK_ALIAS_LEFT_BOUNDARY
|
|
710
|
+
)
|
|
711
|
+
right_ok = (
|
|
712
|
+
not right
|
|
713
|
+
or not re.fullmatch(r"[\u4e00-\u9fff]", right)
|
|
714
|
+
or right in CJK_ALIAS_RIGHT_BOUNDARY
|
|
715
|
+
)
|
|
716
|
+
if left_ok and right_ok:
|
|
717
|
+
return True
|
|
718
|
+
return False
|
|
719
|
+
|
|
720
|
+
|
|
721
|
+
def _cjk_name_aliases(label: str) -> list[str]:
|
|
722
|
+
compact = re.sub(r"\s+", "", label.strip())
|
|
723
|
+
if compact.startswith("测试"):
|
|
724
|
+
compact = compact.removeprefix("测试")
|
|
725
|
+
if not re.fullmatch(r"[\u4e00-\u9fff]{3,4}", compact):
|
|
726
|
+
return []
|
|
727
|
+
aliases = [compact, compact[-2:]]
|
|
728
|
+
return list(dict.fromkeys(alias for alias in aliases if alias and alias != label))
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
def _tokens(text: str) -> set[str]:
|
|
732
|
+
lowered = text.lower()
|
|
733
|
+
tokens = {token for token in ASCII_TOKEN_RE.findall(lowered)}
|
|
734
|
+
for run in CJK_RUN_RE.findall(lowered):
|
|
735
|
+
chars = [char for char in run if char not in CJK_STOPWORDS]
|
|
736
|
+
tokens.update(chars)
|
|
737
|
+
tokens.update(_ngrams(chars, 2))
|
|
738
|
+
tokens.update(_ngrams(chars, 3))
|
|
739
|
+
return tokens
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
def _looks_like_follow_up_completion(text: str) -> bool:
|
|
743
|
+
return bool(
|
|
744
|
+
re.search(
|
|
745
|
+
r"(?:已经|已|刚刚|刚)?(?:给|把)[^。;;,.,]{1,40}"
|
|
746
|
+
r"(?:发了|发送了|发过去了|分享了|转发了)"
|
|
747
|
+
r"|(?:完成|办完|搞定)[^。;;,.,]{0,40}"
|
|
748
|
+
r"|\b(?:sent|shared|emailed|completed|finished|done)\b",
|
|
749
|
+
text,
|
|
750
|
+
flags=re.IGNORECASE,
|
|
751
|
+
)
|
|
752
|
+
)
|
|
753
|
+
|
|
754
|
+
|
|
755
|
+
def _completion_matches_follow_up(source_text: str, follow_up: FollowUpTask) -> bool:
|
|
756
|
+
if follow_up.status != "open":
|
|
757
|
+
return False
|
|
758
|
+
source_objects = _send_objects(source_text)
|
|
759
|
+
follow_up_objects = _send_objects(follow_up.description)
|
|
760
|
+
if source_objects and follow_up_objects:
|
|
761
|
+
return any(
|
|
762
|
+
_similar_task_object(source_object, follow_up_object)
|
|
763
|
+
for source_object in source_objects
|
|
764
|
+
for follow_up_object in follow_up_objects
|
|
765
|
+
)
|
|
766
|
+
return _task_token_overlap(source_text, follow_up.description) >= 0.58
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
def _mark_matching_follow_ups_done(
|
|
770
|
+
interactions: Iterable[SocialInteraction], source_text: str
|
|
771
|
+
) -> None:
|
|
772
|
+
for interaction in interactions:
|
|
773
|
+
updated: list[FollowUpTask] = []
|
|
774
|
+
changed = False
|
|
775
|
+
for follow_up in interaction.follow_ups:
|
|
776
|
+
if _completion_matches_follow_up(source_text, follow_up):
|
|
777
|
+
updated.append(follow_up.model_copy(update={"status": "done"}))
|
|
778
|
+
changed = True
|
|
779
|
+
else:
|
|
780
|
+
updated.append(follow_up)
|
|
781
|
+
if changed:
|
|
782
|
+
interaction.follow_ups = updated
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
def _send_objects(text: str) -> list[str]:
|
|
786
|
+
objects: list[str] = []
|
|
787
|
+
patterns = [
|
|
788
|
+
r"(?:发|发送)(?:了|过|过去了)?(?:一份|一封|一个|些|一下)?(?P<object>[^,。,.;;]+)",
|
|
789
|
+
r"(?:sent|shared|emailed)\s+(?P<object>[^.;,]+)",
|
|
790
|
+
]
|
|
791
|
+
for pattern in patterns:
|
|
792
|
+
for match in re.finditer(pattern, text, flags=re.IGNORECASE):
|
|
793
|
+
item = _normalize_task_object(match.group("object"))
|
|
794
|
+
if item:
|
|
795
|
+
objects.append(item)
|
|
796
|
+
return list(dict.fromkeys(objects))
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
def _normalize_task_object(text: str) -> str:
|
|
800
|
+
text = text.casefold().strip()
|
|
801
|
+
text = re.sub(r"^(?:给|给他|给她|给他们|给她们|给我|to\s+)", "", text).strip()
|
|
802
|
+
text = re.sub(r"^(?:一份|一封|一个|些|一下|the\s+|a\s+|an\s+)", "", text).strip()
|
|
803
|
+
return re.sub(r"\s+", " ", text.strip(" \t\r\n,.;:,。!?!?\"'"))
|
|
804
|
+
|
|
805
|
+
|
|
806
|
+
def _similar_task_object(left: str, right: str) -> bool:
|
|
807
|
+
left = _normalize_task_object(left)
|
|
808
|
+
right = _normalize_task_object(right)
|
|
809
|
+
if not left or not right:
|
|
810
|
+
return False
|
|
811
|
+
if left in right or right in left:
|
|
812
|
+
return True
|
|
813
|
+
return _task_token_overlap(left, right) >= 0.72
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
def _task_token_overlap(left: str, right: str) -> float:
|
|
817
|
+
left_tokens = _task_match_tokens(left)
|
|
818
|
+
right_tokens = _task_match_tokens(right)
|
|
819
|
+
if not left_tokens or not right_tokens:
|
|
820
|
+
return 0.0
|
|
821
|
+
overlap = left_tokens & right_tokens
|
|
822
|
+
return len(overlap) / min(len(left_tokens), len(right_tokens))
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
def _task_match_tokens(text: str) -> set[str]:
|
|
826
|
+
lowered = text.casefold()
|
|
827
|
+
tokens = {
|
|
828
|
+
token
|
|
829
|
+
for token in ASCII_TOKEN_RE.findall(lowered)
|
|
830
|
+
if len(token) >= 3 and token not in {"the", "and", "for", "with", "sent", "done"}
|
|
831
|
+
}
|
|
832
|
+
for run in CJK_RUN_RE.findall(lowered):
|
|
833
|
+
cleaned = re.sub(
|
|
834
|
+
r"(今天|昨天|明天|后天|下周[一二三四五六日天]?|上午|下午|晚上|"
|
|
835
|
+
r"已经|刚刚|一份|一封|一个|给|把|发了|发送了|发过去了|分享了|转发了|完成|搞定)",
|
|
836
|
+
"",
|
|
837
|
+
run,
|
|
838
|
+
)
|
|
839
|
+
chars = [char for char in cleaned if char not in CJK_STOPWORDS and not char.isdigit()]
|
|
840
|
+
tokens.update(_ngrams(chars, 2))
|
|
841
|
+
tokens.update(_ngrams(chars, 3))
|
|
842
|
+
tokens.update(_ngrams(chars, 4))
|
|
843
|
+
return tokens
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
def _score(query_tokens: set[str], text: str, person_name: str) -> float:
|
|
847
|
+
text_tokens = _tokens(text + " " + person_name)
|
|
848
|
+
if not query_tokens:
|
|
849
|
+
return 0.0
|
|
850
|
+
overlap = query_tokens & text_tokens
|
|
851
|
+
substring_bonus = min(
|
|
852
|
+
0.6,
|
|
853
|
+
sum(1 for token in query_tokens if token and token in text.lower()) * 0.15,
|
|
854
|
+
)
|
|
855
|
+
return (len(overlap) / max(len(query_tokens), 1)) + substring_bonus
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
def _profile_summary_text(person: PersonMemoryRecord) -> str:
|
|
859
|
+
parts = [f"Person: {person.display_name}"]
|
|
860
|
+
if person.aliases:
|
|
861
|
+
parts.append("Aliases: " + "; ".join(person.aliases))
|
|
862
|
+
if person.work_history:
|
|
863
|
+
parts.append(
|
|
864
|
+
"Works at: "
|
|
865
|
+
+ "; ".join(
|
|
866
|
+
work.organization
|
|
867
|
+
+ (f" as {work.role}" if work.role else "")
|
|
868
|
+
for work in person.work_history
|
|
869
|
+
)
|
|
870
|
+
)
|
|
871
|
+
if person.education:
|
|
872
|
+
parts.append("Studied at: " + "; ".join(edu.school for edu in person.education))
|
|
873
|
+
if person.interests:
|
|
874
|
+
parts.append("Interests and discussed topics: " + "; ".join(person.interests))
|
|
875
|
+
if person.preferences:
|
|
876
|
+
parts.append("Preferences: " + "; ".join(person.preferences))
|
|
877
|
+
if len(parts) == 1:
|
|
878
|
+
return ""
|
|
879
|
+
return ". ".join(parts) + "."
|
|
880
|
+
|
|
881
|
+
|
|
882
|
+
def _relationship_text(relationship: object) -> str:
|
|
883
|
+
source = getattr(getattr(relationship, "source", None), "label", "Someone")
|
|
884
|
+
target = getattr(getattr(relationship, "target", None), "label", "someone")
|
|
885
|
+
relationship_type = str(getattr(relationship, "relationship_type", "associated_with"))
|
|
886
|
+
return f"{source} {relationship_type.replace('_', ' ')} {target}"
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
def _ngrams(chars: list[str], size: int) -> list[str]:
|
|
890
|
+
if len(chars) < size:
|
|
891
|
+
return []
|
|
892
|
+
return ["".join(chars[index : index + size]) for index in range(len(chars) - size + 1)]
|
|
893
|
+
|
|
894
|
+
|
|
895
|
+
def _blocked_by_sensitivity(
|
|
896
|
+
sensitivity: list[SensitivityLabel], include_sensitive: bool
|
|
897
|
+
) -> bool:
|
|
898
|
+
if include_sensitive:
|
|
899
|
+
return False
|
|
900
|
+
blocked = {
|
|
901
|
+
SensitivityLabel.SENSITIVE,
|
|
902
|
+
SensitivityLabel.DO_NOT_SURFACE_UNPROMPTED,
|
|
903
|
+
}
|
|
904
|
+
return any(label in blocked for label in sensitivity)
|
|
905
|
+
|
|
906
|
+
|
|
907
|
+
def _interaction_sensitivity(interaction: SocialInteraction) -> list[SensitivityLabel]:
|
|
908
|
+
sensitivity = list(interaction.sensitivity)
|
|
909
|
+
for claim in interaction.attributed_claims:
|
|
910
|
+
for label in claim.sensitivity:
|
|
911
|
+
if label not in sensitivity:
|
|
912
|
+
sensitivity.append(label)
|
|
913
|
+
for fact in interaction.direct_facts:
|
|
914
|
+
for label in fact.sensitivity:
|
|
915
|
+
if label not in sensitivity:
|
|
916
|
+
sensitivity.append(label)
|
|
917
|
+
return sensitivity
|
|
918
|
+
|
|
919
|
+
|
|
920
|
+
def _result_evidence(
|
|
921
|
+
person: PersonMemoryRecord, include_sensitive: bool
|
|
922
|
+
) -> list[Evidence]:
|
|
923
|
+
if include_sensitive:
|
|
924
|
+
return person.evidence[-3:]
|
|
925
|
+
blocked_sources = {
|
|
926
|
+
interaction.source_text
|
|
927
|
+
for interaction in person.interactions
|
|
928
|
+
if _blocked_by_sensitivity(_interaction_sensitivity(interaction), include_sensitive)
|
|
929
|
+
}
|
|
930
|
+
safe = [
|
|
931
|
+
evidence for evidence in person.evidence if evidence.source_text not in blocked_sources
|
|
932
|
+
]
|
|
933
|
+
if safe:
|
|
934
|
+
return safe[-3:]
|
|
935
|
+
return person.evidence[-3:]
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
def _extend_unique(target: list, source: list) -> None:
|
|
939
|
+
existing = {_stable_key(item) for item in target}
|
|
940
|
+
for item in source:
|
|
941
|
+
key = _stable_key(item)
|
|
942
|
+
if key not in existing:
|
|
943
|
+
target.append(item)
|
|
944
|
+
existing.add(key)
|
|
945
|
+
|
|
946
|
+
|
|
947
|
+
def _append_unique(target: list, item: object) -> None:
|
|
948
|
+
key = _stable_key(item)
|
|
949
|
+
if key not in {_stable_key(existing) for existing in target}:
|
|
950
|
+
target.append(item)
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
def _stable_key(item: object) -> str:
|
|
954
|
+
if hasattr(item, "model_dump"):
|
|
955
|
+
return json.dumps(item.model_dump(mode="json"), ensure_ascii=False, sort_keys=True)
|
|
956
|
+
return json.dumps(item, ensure_ascii=False, sort_keys=True)
|
|
957
|
+
|
|
958
|
+
|
|
959
|
+
def _metadata_str(metadata: dict[str, object], key: str) -> str | None:
|
|
960
|
+
value = metadata.get(key)
|
|
961
|
+
return value if isinstance(value, str) and value.strip() else None
|
|
962
|
+
|
|
963
|
+
|
|
964
|
+
def _remap_person_record(record: PersonMemoryRecord, source_person_id: str, target_person_id: str) -> None:
|
|
965
|
+
for interaction in record.interactions:
|
|
966
|
+
_remap_interaction(interaction, source_person_id, target_person_id)
|
|
967
|
+
for fact in record.direct_facts:
|
|
968
|
+
_remap_ref(fact.subject, source_person_id, target_person_id)
|
|
969
|
+
for claim in record.attributed_claims:
|
|
970
|
+
if claim.speaker:
|
|
971
|
+
_remap_ref(claim.speaker, source_person_id, target_person_id)
|
|
972
|
+
if claim.subject:
|
|
973
|
+
_remap_ref(claim.subject, source_person_id, target_person_id)
|
|
974
|
+
for follow_up in record.follow_ups:
|
|
975
|
+
for ref in follow_up.related_people:
|
|
976
|
+
_remap_ref(ref, source_person_id, target_person_id)
|
|
977
|
+
for relationship in record.relationships:
|
|
978
|
+
_remap_ref(relationship.source, source_person_id, target_person_id)
|
|
979
|
+
_remap_ref(relationship.target, source_person_id, target_person_id)
|
|
980
|
+
|
|
981
|
+
|
|
982
|
+
def _remap_interaction(
|
|
983
|
+
interaction: SocialInteraction, source_person_id: str, target_person_id: str
|
|
984
|
+
) -> None:
|
|
985
|
+
for participant in interaction.participants:
|
|
986
|
+
_remap_ref(participant.person, source_person_id, target_person_id)
|
|
987
|
+
for mentioned in interaction.mentioned_people:
|
|
988
|
+
_remap_ref(mentioned.person, source_person_id, target_person_id)
|
|
989
|
+
if mentioned.mentioned_by:
|
|
990
|
+
_remap_ref(mentioned.mentioned_by, source_person_id, target_person_id)
|
|
991
|
+
for claim in interaction.attributed_claims:
|
|
992
|
+
if claim.speaker:
|
|
993
|
+
_remap_ref(claim.speaker, source_person_id, target_person_id)
|
|
994
|
+
if claim.subject:
|
|
995
|
+
_remap_ref(claim.subject, source_person_id, target_person_id)
|
|
996
|
+
for fact in interaction.direct_facts:
|
|
997
|
+
_remap_ref(fact.subject, source_person_id, target_person_id)
|
|
998
|
+
for follow_up in interaction.follow_ups:
|
|
999
|
+
for ref in follow_up.related_people:
|
|
1000
|
+
_remap_ref(ref, source_person_id, target_person_id)
|
|
1001
|
+
for relationship in interaction.relationships:
|
|
1002
|
+
_remap_ref(relationship.source, source_person_id, target_person_id)
|
|
1003
|
+
_remap_ref(relationship.target, source_person_id, target_person_id)
|
|
1004
|
+
|
|
1005
|
+
|
|
1006
|
+
def _remap_ref(ref: PersonRef, source_person_id: str, target_person_id: str) -> None:
|
|
1007
|
+
if ref.person_id == source_person_id:
|
|
1008
|
+
ref.person_id = target_person_id
|