@reconcrap/people-network-memory 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +476 -0
  2. package/docs/mcp_tools.md +138 -0
  3. package/harness_adapters/openclaw/mcp.managed.unix.template.json +25 -0
  4. package/harness_adapters/openclaw/mcp.managed.windows.template.json +26 -0
  5. package/harness_adapters/openclaw/mcp.template.json +14 -0
  6. package/harness_adapters/openclaw/ppl/SKILL.md +114 -0
  7. package/package.json +30 -0
  8. package/pyproject.toml +26 -0
  9. package/scripts/install_windows.ps1 +92 -0
  10. package/scripts/npm/people-memory.js +276 -0
  11. package/scripts/people_memory_bootstrap.py +247 -0
  12. package/scripts/run_graphiti_live_from_liepin.ps1 +87 -0
  13. package/scripts/run_tests_with_artifacts.ps1 +307 -0
  14. package/src/people_network_memory/__init__.py +6 -0
  15. package/src/people_network_memory/application/__init__.py +16 -0
  16. package/src/people_network_memory/application/normalization.py +1441 -0
  17. package/src/people_network_memory/application/services.py +921 -0
  18. package/src/people_network_memory/cli.py +1212 -0
  19. package/src/people_network_memory/config.py +268 -0
  20. package/src/people_network_memory/domain/__init__.py +55 -0
  21. package/src/people_network_memory/domain/identity.py +77 -0
  22. package/src/people_network_memory/domain/models.py +355 -0
  23. package/src/people_network_memory/fixtures/__init__.py +6 -0
  24. package/src/people_network_memory/fixtures/eval.py +398 -0
  25. package/src/people_network_memory/fixtures/extractor_eval.py +364 -0
  26. package/src/people_network_memory/fixtures/generator.py +290 -0
  27. package/src/people_network_memory/fixtures/report.py +252 -0
  28. package/src/people_network_memory/graphiti_adapter/__init__.py +9 -0
  29. package/src/people_network_memory/graphiti_adapter/episode_formatter.py +70 -0
  30. package/src/people_network_memory/graphiti_adapter/graphiti_store.py +655 -0
  31. package/src/people_network_memory/graphiti_adapter/indexer.py +194 -0
  32. package/src/people_network_memory/graphiti_adapter/ontology.py +68 -0
  33. package/src/people_network_memory/harness_adapters/__init__.py +2 -0
  34. package/src/people_network_memory/harness_adapters/openclaw/__init__.py +9 -0
  35. package/src/people_network_memory/harness_adapters/openclaw/installer.py +577 -0
  36. package/src/people_network_memory/harness_adapters/openclaw/integration_eval.py +508 -0
  37. package/src/people_network_memory/harness_adapters/openclaw/smoke.py +292 -0
  38. package/src/people_network_memory/infrastructure/__init__.py +2 -0
  39. package/src/people_network_memory/infrastructure/archive_backup.py +171 -0
  40. package/src/people_network_memory/infrastructure/diagnostics.py +171 -0
  41. package/src/people_network_memory/infrastructure/embeddings.py +155 -0
  42. package/src/people_network_memory/infrastructure/file_store.py +129 -0
  43. package/src/people_network_memory/infrastructure/graphiti_promotion.py +212 -0
  44. package/src/people_network_memory/infrastructure/id_generator.py +40 -0
  45. package/src/people_network_memory/infrastructure/in_memory_store.py +1008 -0
  46. package/src/people_network_memory/infrastructure/llm_extractor.py +476 -0
  47. package/src/people_network_memory/infrastructure/llm_identity_advisor.py +200 -0
  48. package/src/people_network_memory/infrastructure/llm_judge.py +162 -0
  49. package/src/people_network_memory/infrastructure/redaction.py +21 -0
  50. package/src/people_network_memory/infrastructure/release_check.py +186 -0
  51. package/src/people_network_memory/infrastructure/retrieval_intent.py +98 -0
  52. package/src/people_network_memory/infrastructure/semantic_index.py +262 -0
  53. package/src/people_network_memory/mcp_server/__init__.py +2 -0
  54. package/src/people_network_memory/mcp_server/contracts.py +85 -0
  55. package/src/people_network_memory/mcp_server/runtime.py +133 -0
  56. package/src/people_network_memory/mcp_server/tools.py +588 -0
  57. package/src/people_network_memory/ports/__init__.py +2 -0
  58. package/src/people_network_memory/ports/errors.py +25 -0
  59. package/src/people_network_memory/ports/interfaces.py +103 -0
  60. package/src/people_network_memory/projection/__init__.py +6 -0
  61. package/src/people_network_memory/projection/builders.py +46 -0
@@ -0,0 +1,476 @@
1
+ """Optional OpenAI-compatible ingestion extractor.
2
+
3
+ The extractor is intentionally an infrastructure adapter. Application services
4
+ only depend on the InteractionExtractor port and keep deterministic validation,
5
+ identity policy, and evidence handling after this step.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import re
12
+ from dataclasses import dataclass
13
+ from typing import Any
14
+
15
+ import httpx
16
+ from pydantic import ValidationError
17
+
18
+ from people_network_memory.config import PeopleMemoryConfig
19
+ from people_network_memory.domain.models import SocialInteraction
20
+ from people_network_memory.ports.errors import ConfigError
21
+
22
+
23
+ INTERACTION_PATCH_FIELDS = {
24
+ "occurred_at",
25
+ "interaction_type",
26
+ "place",
27
+ "participants",
28
+ "mentioned_people",
29
+ "topics",
30
+ "direct_facts",
31
+ "attributed_claims",
32
+ "follow_ups",
33
+ "relationships",
34
+ "sensitivity",
35
+ "metadata",
36
+ }
37
+
38
+ CONTACT_PREFERENCE_ONLY_RE = re.compile(
39
+ r"(?:之后|以后|后续|下次|以后有事)?\s*"
40
+ r"(?:最好|偏好|喜欢|倾向于)?\s*(?:用|通过)?\s*"
41
+ r"(?:微信|短信|电话|邮件|邮箱|whatsapp|飞书|钉钉)\s*"
42
+ r"(?:联系|沟通|跟进)",
43
+ flags=re.IGNORECASE,
44
+ )
45
+ CONCRETE_FOLLOW_UP_CUE_RE = re.compile(
46
+ r"(?:"
47
+ r"明天|后天|下周|周[一二三四五六日天]|星期[一二三四五六日天]|礼拜[一二三四五六日天]|"
48
+ r"\d{1,2}[月/.-]\d{1,2}|20\d{2}[-/]\d{1,2}[-/]\d{1,2}|"
49
+ r"答应|承诺|说好|约好|提醒我|需要我|让我|"
50
+ r"发|发送|推荐|介绍|引荐|安排|确认|准备|"
51
+ r"tomorrow|next\s+week|next\s+\w+day|by\s+\w+day|"
52
+ r"promised|asked\s+me\s+to|need\s+to|send|introduce|share|confirm|schedule"
53
+ r")",
54
+ flags=re.IGNORECASE,
55
+ )
56
+
57
+
58
+ @dataclass(frozen=True)
59
+ class LlmExtractorSettings:
60
+ base_url: str
61
+ model: str
62
+ api_key: str
63
+ timeout_seconds: float = 30.0
64
+ response_format: str = "none"
65
+
66
+ @classmethod
67
+ def from_config(cls, config: PeopleMemoryConfig) -> "LlmExtractorSettings":
68
+ missing: list[str] = []
69
+ if not config.llm_base_url:
70
+ missing.append("PEOPLE_MEMORY_LLM_BASE_URL")
71
+ if not config.llm_model:
72
+ missing.append("PEOPLE_MEMORY_LLM_MODEL")
73
+ if not config.llm_api_key:
74
+ missing.append("PEOPLE_MEMORY_LLM_API_KEY")
75
+ if missing:
76
+ raise ConfigError("LLM ingestion extractor requires: " + ", ".join(missing))
77
+ return cls(
78
+ base_url=config.llm_base_url or "",
79
+ model=config.llm_model or "",
80
+ api_key=config.llm_api_key or "",
81
+ timeout_seconds=config.ingestion_extractor_timeout_seconds,
82
+ response_format=config.llm_response_format,
83
+ )
84
+
85
+
86
+ class OpenAICompatibleInteractionExtractor:
87
+ """Extract structure from messy social notes before deterministic normalization."""
88
+
89
+ def __init__(self, settings: LlmExtractorSettings) -> None:
90
+ self._settings = settings
91
+
92
+ @classmethod
93
+ def from_config(
94
+ cls, config: PeopleMemoryConfig
95
+ ) -> "OpenAICompatibleInteractionExtractor":
96
+ return cls(LlmExtractorSettings.from_config(config))
97
+
98
+ def extract(self, interaction: SocialInteraction) -> SocialInteraction:
99
+ try:
100
+ payload = self._request_interaction_patch(interaction)
101
+ extracted = _interaction_from_patch(interaction, payload)
102
+ except (
103
+ httpx.HTTPError,
104
+ KeyError,
105
+ TypeError,
106
+ ValueError,
107
+ json.JSONDecodeError,
108
+ ValidationError,
109
+ ):
110
+ return interaction
111
+ return _merge_interactions(interaction, extracted)
112
+
113
+ def _request_interaction_patch(self, interaction: SocialInteraction) -> dict[str, Any]:
114
+ url = self._settings.base_url.rstrip("/") + "/chat/completions"
115
+ payload: dict[str, Any] = {
116
+ "model": self._settings.model,
117
+ "messages": [
118
+ {
119
+ "role": "system",
120
+ "content": (
121
+ "You extract structured social-memory data from messy user notes. "
122
+ "Return only JSON. Preserve the user's original source text. "
123
+ "Use only facts visible in the note. Do not invent person IDs, "
124
+ "dates, companies, or relationships. Separate people present from "
125
+ "people merely mentioned. Store 'A said B...' as an attributed "
126
+ "claim, not a direct fact about B. For Chinese aliases like "
127
+ "胡八一(胖子), 胡八一又叫胖子, or 胡八一绰号胖子, use 胡八一 as "
128
+ "the person label and put 胖子 in that PersonRef's aliases. "
129
+ "Preserve visible Chinese names exactly; do not remove prefixes "
130
+ "such as 测试 or infer a different legal name. Extract follow-ups "
131
+ "and commitments broadly, not only literal promise wording: examples "
132
+ "include 我答应, 我回头, 记得, 提醒我, 她让我, 需要我, 下次帮她, "
133
+ "need to, should, promised, asked me to, follow up, send, introduce. "
134
+ "A follow-up is a user action or reminder after the interaction; do "
135
+ "not store it as an attributed claim. Contact preferences such as "
136
+ "之后最好用邮件联系, prefers WeChat, or likes email are preferences, "
137
+ "not follow-ups, unless the user gave a concrete task or date. "
138
+ "If a relative due date is clear "
139
+ "and existing_structured_fields has occurred_at, normalize due_at as "
140
+ "YYYY-MM-DD; otherwise omit due_at or set it to null. Keep follow-up "
141
+ "descriptions specific enough to act on, including person, action, "
142
+ "object, and visible time phrase when present."
143
+ ),
144
+ },
145
+ {
146
+ "role": "user",
147
+ "content": json.dumps(
148
+ {
149
+ "source_text": interaction.source_text,
150
+ "existing_structured_fields": _existing_structured_payload(
151
+ interaction
152
+ ),
153
+ "return_schema": {
154
+ "interaction_type": (
155
+ "meeting|coffee|dinner|call|message|intro|event|other"
156
+ ),
157
+ "place": "string or null",
158
+ "participants": [
159
+ {"person": {"label": "string", "aliases": ["string"]}}
160
+ ],
161
+ "mentioned_people": [
162
+ {
163
+ "person": {"label": "string"},
164
+ "mentioned_by": {"label": "string"},
165
+ "context": "string or null",
166
+ }
167
+ ],
168
+ "topics": ["string"],
169
+ "direct_facts": [
170
+ {
171
+ "subject": {"label": "string"},
172
+ "predicate": "works_at|studied_at|interest|...",
173
+ "value": "string",
174
+ "metadata": {
175
+ "role": "string when a role/title is visible"
176
+ },
177
+ }
178
+ ],
179
+ "attributed_claims": [
180
+ {
181
+ "speaker": {"label": "string"},
182
+ "subject": {"label": "string"},
183
+ "claim_text": "verbatim claim sentence",
184
+ "claim_type": "optional short category",
185
+ }
186
+ ],
187
+ "follow_ups": [
188
+ {
189
+ "description": (
190
+ "specific action item, preserving user language"
191
+ ),
192
+ "due_at": "YYYY-MM-DD or null",
193
+ "related_people": [{"label": "string"}],
194
+ }
195
+ ],
196
+ "relationships": [
197
+ {
198
+ "source": {"label": "string"},
199
+ "target": {"label": "string"},
200
+ "relationship_type": "knows|works_with|...",
201
+ }
202
+ ],
203
+ },
204
+ },
205
+ ensure_ascii=False,
206
+ ),
207
+ },
208
+ ],
209
+ "temperature": 0,
210
+ }
211
+ if self._settings.response_format == "json_object":
212
+ payload["response_format"] = {"type": "json_object"}
213
+ response = httpx.post(
214
+ url,
215
+ headers={
216
+ "Authorization": f"Bearer {self._settings.api_key}",
217
+ "Content-Type": "application/json",
218
+ },
219
+ json=payload,
220
+ timeout=self._settings.timeout_seconds,
221
+ )
222
+ response.raise_for_status()
223
+ content = response.json()["choices"][0]["message"]["content"]
224
+ return _parse_json_object(content)
225
+
226
+
227
+ def _existing_structured_payload(interaction: SocialInteraction) -> dict[str, object]:
228
+ payload = interaction.model_dump(mode="json", exclude={"source_text"})
229
+ return {key: value for key, value in payload.items() if value not in (None, [], {})}
230
+
231
+
232
+ def _interaction_from_patch(
233
+ original: SocialInteraction, patch: dict[str, Any]
234
+ ) -> SocialInteraction:
235
+ if "interaction" in patch and isinstance(patch["interaction"], dict):
236
+ patch = patch["interaction"]
237
+ if "record_interaction" in patch and isinstance(patch["record_interaction"], dict):
238
+ patch = patch["record_interaction"]
239
+ clean_patch = {
240
+ key: _coerce_extractor_field(key, _without_person_ids(value))
241
+ for key, value in patch.items()
242
+ if key in INTERACTION_PATCH_FIELDS
243
+ }
244
+ clean_patch["source_text"] = original.source_text
245
+ return SocialInteraction.model_validate(clean_patch)
246
+
247
+
248
+ def _without_person_ids(value: Any) -> Any:
249
+ if isinstance(value, dict):
250
+ return {
251
+ key: _without_person_ids(item)
252
+ for key, item in value.items()
253
+ if key != "person_id"
254
+ }
255
+ if isinstance(value, list):
256
+ return [_without_person_ids(item) for item in value]
257
+ return value
258
+
259
+
260
+ def _coerce_extractor_field(field_name: str, value: Any) -> Any:
261
+ if field_name == "participants" and isinstance(value, list):
262
+ return [_coerce_participant(item) for item in value]
263
+ if field_name == "mentioned_people" and isinstance(value, list):
264
+ return [_coerce_mentioned_person(item) for item in value]
265
+ if field_name == "direct_facts" and isinstance(value, list):
266
+ return [_coerce_direct_fact(item) for item in value]
267
+ if field_name == "attributed_claims" and isinstance(value, list):
268
+ return [_coerce_attributed_claim(item) for item in value]
269
+ if field_name == "follow_ups" and isinstance(value, list):
270
+ return [
271
+ coerced
272
+ for coerced in (_coerce_follow_up(item) for item in value)
273
+ if not _looks_like_contact_preference_only_follow_up(coerced)
274
+ ]
275
+ if field_name == "relationships" and isinstance(value, list):
276
+ return [_coerce_relationship(item) for item in value]
277
+ return value
278
+
279
+
280
+ def _coerce_participant(item: Any) -> Any:
281
+ if isinstance(item, str):
282
+ return {"person": {"label": item}}
283
+ if not isinstance(item, dict):
284
+ return item
285
+ if "person" in item:
286
+ return {**item, "person": _coerce_person_ref(item["person"])}
287
+ if "name" in item:
288
+ role = item.get("role", "participant")
289
+ return {"person": {"label": item["name"]}, "role": role}
290
+ return item
291
+
292
+
293
+ def _coerce_mentioned_person(item: Any) -> Any:
294
+ if isinstance(item, str):
295
+ return {"person": {"label": item}}
296
+ if not isinstance(item, dict):
297
+ return item
298
+ coerced = dict(item)
299
+ if "person" in coerced:
300
+ coerced["person"] = _coerce_person_ref(coerced["person"])
301
+ elif "name" in coerced:
302
+ coerced["person"] = {"label": coerced.pop("name")}
303
+ if "mentioned_by" in coerced:
304
+ coerced["mentioned_by"] = _coerce_person_ref(coerced["mentioned_by"])
305
+ return coerced
306
+
307
+
308
+ def _coerce_direct_fact(item: Any) -> Any:
309
+ if not isinstance(item, dict):
310
+ return item
311
+ coerced = dict(item)
312
+ if "subject" in coerced:
313
+ coerced["subject"] = _coerce_person_ref(coerced["subject"])
314
+ return coerced
315
+
316
+
317
+ def _coerce_attributed_claim(item: Any) -> Any:
318
+ if not isinstance(item, dict):
319
+ return item
320
+ coerced = dict(item)
321
+ if "speaker" in coerced:
322
+ coerced["speaker"] = _coerce_person_ref(coerced["speaker"])
323
+ if "subject" in coerced:
324
+ coerced["subject"] = _coerce_person_ref(coerced["subject"])
325
+ return coerced
326
+
327
+
328
+ def _coerce_follow_up(item: Any) -> Any:
329
+ if isinstance(item, str):
330
+ return {"description": item}
331
+ if not isinstance(item, dict):
332
+ return item
333
+ coerced = dict(item)
334
+ if "what" in coerced and "description" not in coerced:
335
+ coerced["description"] = coerced.pop("what")
336
+ if "due" in coerced and "due_at" not in coerced:
337
+ coerced["due_at"] = coerced.pop("due")
338
+ if isinstance(coerced.get("related_people"), list):
339
+ coerced["related_people"] = [
340
+ _coerce_person_ref(ref) for ref in coerced["related_people"]
341
+ ]
342
+ return coerced
343
+
344
+
345
+ def _looks_like_contact_preference_only_follow_up(item: Any) -> bool:
346
+ if isinstance(item, str):
347
+ description = item
348
+ elif isinstance(item, dict):
349
+ description = str(item.get("description") or item.get("what") or "")
350
+ else:
351
+ return False
352
+ normalized = " ".join(description.casefold().split())
353
+ if not CONTACT_PREFERENCE_ONLY_RE.search(normalized):
354
+ return False
355
+ without_contact_phrase = CONTACT_PREFERENCE_ONLY_RE.sub("", normalized).strip()
356
+ without_contact_phrase = re.sub(
357
+ r"^[,,。;;\s]+|[,,。;;\s]+$", "", without_contact_phrase
358
+ )
359
+ if CONCRETE_FOLLOW_UP_CUE_RE.search(without_contact_phrase):
360
+ return False
361
+ return True
362
+
363
+
364
+ def _coerce_relationship(item: Any) -> Any:
365
+ if not isinstance(item, dict):
366
+ return item
367
+ coerced = dict(item)
368
+ if "source" in coerced:
369
+ coerced["source"] = _coerce_person_ref(coerced["source"])
370
+ if "target" in coerced:
371
+ coerced["target"] = _coerce_person_ref(coerced["target"])
372
+ return coerced
373
+
374
+
375
+ def _coerce_person_ref(item: Any) -> Any:
376
+ if isinstance(item, str):
377
+ return {"label": item}
378
+ if isinstance(item, dict) and "name" in item and "label" not in item:
379
+ coerced = dict(item)
380
+ coerced["label"] = coerced.pop("name")
381
+ return coerced
382
+ return item
383
+
384
+
385
+ def _merge_interactions(
386
+ base: SocialInteraction, extracted: SocialInteraction
387
+ ) -> SocialInteraction:
388
+ updates: dict[str, object] = {}
389
+ if not base.occurred_at and extracted.occurred_at:
390
+ updates["occurred_at"] = extracted.occurred_at
391
+ if base.interaction_type == "meeting" and extracted.interaction_type != "meeting":
392
+ updates["interaction_type"] = extracted.interaction_type
393
+ if not base.place and extracted.place:
394
+ updates["place"] = extracted.place
395
+ for field_name in [
396
+ "participants",
397
+ "mentioned_people",
398
+ "topics",
399
+ "direct_facts",
400
+ "attributed_claims",
401
+ "follow_ups",
402
+ "relationships",
403
+ "sensitivity",
404
+ ]:
405
+ base_items = list(getattr(base, field_name))
406
+ extracted_items = list(getattr(extracted, field_name))
407
+ updates[field_name] = _merge_list_field(field_name, base_items, extracted_items)
408
+ if extracted.metadata:
409
+ updates["metadata"] = {**extracted.metadata, **base.metadata}
410
+ return base.model_copy(update=updates)
411
+
412
+
413
+ def _merge_list_field(
414
+ field_name: str, base_items: list[Any], extracted_items: list[Any]
415
+ ) -> list[Any]:
416
+ result = list(base_items)
417
+ seen = {_dedupe_key(field_name, item) for item in result}
418
+ for item in extracted_items:
419
+ key = _dedupe_key(field_name, item)
420
+ if key in seen:
421
+ continue
422
+ seen.add(key)
423
+ result.append(item)
424
+ return result
425
+
426
+
427
+ def _dedupe_key(field_name: str, item: Any) -> tuple[object, ...]:
428
+ if field_name == "topics" or field_name == "sensitivity":
429
+ return (str(item).casefold(),)
430
+ if field_name == "participants":
431
+ return (_person_key(item.person),)
432
+ if field_name == "mentioned_people":
433
+ mentioned_by = item.mentioned_by.label if item.mentioned_by else ""
434
+ return (_person_key(item.person), mentioned_by.casefold())
435
+ if field_name == "direct_facts":
436
+ return (
437
+ _person_key(item.subject),
438
+ item.predicate.casefold(),
439
+ item.value.casefold(),
440
+ )
441
+ if field_name == "attributed_claims":
442
+ speaker = item.speaker.label if item.speaker else ""
443
+ subject = item.subject.label if item.subject else ""
444
+ return (speaker.casefold(), subject.casefold(), item.claim_text.casefold())
445
+ if field_name == "follow_ups":
446
+ return (item.description.casefold(),)
447
+ if field_name == "relationships":
448
+ return (
449
+ _person_key(item.source),
450
+ _person_key(item.target),
451
+ item.relationship_type.casefold(),
452
+ )
453
+ return (repr(item),)
454
+
455
+
456
+ def _person_key(ref: Any) -> str:
457
+ return str(getattr(ref, "label", "")).casefold().strip()
458
+
459
+
460
+ def _parse_json_object(text: str) -> dict[str, Any]:
461
+ stripped = text.strip()
462
+ if stripped.startswith("```"):
463
+ lines = stripped.splitlines()
464
+ if len(lines) >= 3 and lines[-1].strip() == "```":
465
+ stripped = "\n".join(lines[1:-1]).strip()
466
+ try:
467
+ parsed = json.loads(stripped)
468
+ except json.JSONDecodeError:
469
+ start = stripped.find("{")
470
+ end = stripped.rfind("}")
471
+ if start < 0 or end <= start:
472
+ raise
473
+ parsed = json.loads(stripped[start : end + 1])
474
+ if not isinstance(parsed, dict):
475
+ raise ValueError("LLM extractor response must be a JSON object")
476
+ return parsed
@@ -0,0 +1,200 @@
1
+ """Optional OpenAI-compatible identity disambiguation advisor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import dataclass
7
+ from typing import Any
8
+
9
+ import httpx
10
+ from pydantic import ValidationError
11
+
12
+ from people_network_memory.config import PeopleMemoryConfig
13
+ from people_network_memory.domain.models import (
14
+ IdentityAdvice,
15
+ IdentityCandidate,
16
+ PersonRef,
17
+ SocialInteraction,
18
+ )
19
+ from people_network_memory.ports.errors import ConfigError
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class LlmIdentityAdvisorSettings:
24
+ base_url: str
25
+ model: str
26
+ api_key: str
27
+ timeout_seconds: float = 30.0
28
+ response_format: str = "none"
29
+
30
+ @classmethod
31
+ def from_config(cls, config: PeopleMemoryConfig) -> "LlmIdentityAdvisorSettings":
32
+ missing: list[str] = []
33
+ if not config.llm_base_url:
34
+ missing.append("PEOPLE_MEMORY_LLM_BASE_URL")
35
+ if not config.llm_model:
36
+ missing.append("PEOPLE_MEMORY_LLM_MODEL")
37
+ if not config.llm_api_key:
38
+ missing.append("PEOPLE_MEMORY_LLM_API_KEY")
39
+ if missing:
40
+ raise ConfigError("LLM identity advisor requires: " + ", ".join(missing))
41
+ return cls(
42
+ base_url=config.llm_base_url or "",
43
+ model=config.llm_model or "",
44
+ api_key=config.llm_api_key or "",
45
+ timeout_seconds=config.identity_advisor_timeout_seconds,
46
+ response_format=config.llm_response_format,
47
+ )
48
+
49
+
50
+ class OpenAICompatibleIdentityAdvisor:
51
+ """Advises on identity ambiguity without owning merge/link decisions."""
52
+
53
+ def __init__(self, settings: LlmIdentityAdvisorSettings) -> None:
54
+ self._settings = settings
55
+
56
+ @classmethod
57
+ def from_config(cls, config: PeopleMemoryConfig) -> "OpenAICompatibleIdentityAdvisor":
58
+ return cls(LlmIdentityAdvisorSettings.from_config(config))
59
+
60
+ def advise(
61
+ self,
62
+ *,
63
+ interaction: SocialInteraction,
64
+ ref: PersonRef,
65
+ candidates: list[IdentityCandidate],
66
+ ) -> IdentityAdvice:
67
+ if not candidates:
68
+ return IdentityAdvice(recommendation="unknown")
69
+ try:
70
+ return self._request_advice(interaction=interaction, ref=ref, candidates=candidates)
71
+ except (
72
+ httpx.HTTPError,
73
+ KeyError,
74
+ TypeError,
75
+ ValueError,
76
+ json.JSONDecodeError,
77
+ ValidationError,
78
+ ):
79
+ return IdentityAdvice(recommendation="unknown")
80
+
81
+ def _request_advice(
82
+ self,
83
+ *,
84
+ interaction: SocialInteraction,
85
+ ref: PersonRef,
86
+ candidates: list[IdentityCandidate],
87
+ ) -> IdentityAdvice:
88
+ url = self._settings.base_url.rstrip("/") + "/chat/completions"
89
+ payload: dict[str, Any] = {
90
+ "model": self._settings.model,
91
+ "messages": [
92
+ {
93
+ "role": "system",
94
+ "content": (
95
+ "You are an identity disambiguation advisor for a private "
96
+ "personal-network memory tool. Decide whether the current "
97
+ "reference appears to mean the same person as an existing "
98
+ "candidate, a different person, or ambiguous. Use only the "
99
+ "provided source note and candidate evidence. Return JSON "
100
+ "only. This is advisory: never recommend merging unless the "
101
+ "source clearly supports the same person."
102
+ ),
103
+ },
104
+ {
105
+ "role": "user",
106
+ "content": json.dumps(
107
+ {
108
+ "source_text": interaction.source_text,
109
+ "current_reference": {
110
+ "label": ref.label,
111
+ "aliases": ref.aliases,
112
+ "company_hint": ref.company_hint,
113
+ },
114
+ "candidates": [_candidate_payload(item) for item in candidates],
115
+ "return_schema": {
116
+ "recommendation": (
117
+ "same_person|different_person|"
118
+ "ambiguous_needs_review|unknown"
119
+ ),
120
+ "confidence": "number 0..1",
121
+ "candidate_person_id": "string or null",
122
+ "reasons": ["short reason strings"],
123
+ "evidence": ["short source/candidate evidence strings"],
124
+ },
125
+ },
126
+ ensure_ascii=False,
127
+ ),
128
+ },
129
+ ],
130
+ "temperature": 0,
131
+ }
132
+ if self._settings.response_format == "json_object":
133
+ payload["response_format"] = {"type": "json_object"}
134
+ response = httpx.post(
135
+ url,
136
+ headers={
137
+ "Authorization": f"Bearer {self._settings.api_key}",
138
+ "Content-Type": "application/json",
139
+ },
140
+ json=payload,
141
+ timeout=self._settings.timeout_seconds,
142
+ )
143
+ response.raise_for_status()
144
+ content = response.json()["choices"][0]["message"]["content"]
145
+ parsed = _parse_json_object(content)
146
+ recommendation = str(parsed.get("recommendation", "unknown"))
147
+ if recommendation not in {
148
+ "same_person",
149
+ "different_person",
150
+ "ambiguous_needs_review",
151
+ "unknown",
152
+ }:
153
+ recommendation = "unknown"
154
+ return IdentityAdvice(
155
+ recommendation=recommendation, # type: ignore[arg-type]
156
+ confidence=float(parsed.get("confidence") or 0.0),
157
+ candidate_person_id=(
158
+ str(parsed["candidate_person_id"])
159
+ if parsed.get("candidate_person_id") is not None
160
+ else None
161
+ ),
162
+ reasons=_string_list(parsed.get("reasons")),
163
+ evidence=_string_list(parsed.get("evidence")),
164
+ )
165
+
166
+
167
+ def _candidate_payload(candidate: IdentityCandidate) -> dict[str, object]:
168
+ return {
169
+ "person_id": candidate.person_id,
170
+ "display_name": candidate.display_name,
171
+ "score": candidate.score,
172
+ "evidence": candidate.evidence[:5],
173
+ "exact_identifier_match": candidate.exact_identifier_match,
174
+ "exact_name_match": candidate.exact_name_match,
175
+ }
176
+
177
+
178
+ def _string_list(value: object) -> list[str]:
179
+ if not isinstance(value, list):
180
+ return []
181
+ return [str(item) for item in value if str(item).strip()]
182
+
183
+
184
+ def _parse_json_object(text: str) -> dict[str, Any]:
185
+ stripped = text.strip()
186
+ if stripped.startswith("```"):
187
+ lines = stripped.splitlines()
188
+ if len(lines) >= 3 and lines[-1].strip() == "```":
189
+ stripped = "\n".join(lines[1:-1]).strip()
190
+ try:
191
+ parsed = json.loads(stripped)
192
+ except json.JSONDecodeError:
193
+ start = stripped.find("{")
194
+ end = stripped.rfind("}")
195
+ if start < 0 or end <= start:
196
+ raise
197
+ parsed = json.loads(stripped[start : end + 1])
198
+ if not isinstance(parsed, dict):
199
+ raise ValueError("LLM identity advisor response must be a JSON object")
200
+ return parsed