@reconcrap/people-network-memory 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +476 -0
  2. package/docs/mcp_tools.md +138 -0
  3. package/harness_adapters/openclaw/mcp.managed.unix.template.json +25 -0
  4. package/harness_adapters/openclaw/mcp.managed.windows.template.json +26 -0
  5. package/harness_adapters/openclaw/mcp.template.json +14 -0
  6. package/harness_adapters/openclaw/ppl/SKILL.md +114 -0
  7. package/package.json +30 -0
  8. package/pyproject.toml +26 -0
  9. package/scripts/install_windows.ps1 +92 -0
  10. package/scripts/npm/people-memory.js +276 -0
  11. package/scripts/people_memory_bootstrap.py +247 -0
  12. package/scripts/run_graphiti_live_from_liepin.ps1 +87 -0
  13. package/scripts/run_tests_with_artifacts.ps1 +307 -0
  14. package/src/people_network_memory/__init__.py +6 -0
  15. package/src/people_network_memory/application/__init__.py +16 -0
  16. package/src/people_network_memory/application/normalization.py +1441 -0
  17. package/src/people_network_memory/application/services.py +921 -0
  18. package/src/people_network_memory/cli.py +1212 -0
  19. package/src/people_network_memory/config.py +268 -0
  20. package/src/people_network_memory/domain/__init__.py +55 -0
  21. package/src/people_network_memory/domain/identity.py +77 -0
  22. package/src/people_network_memory/domain/models.py +355 -0
  23. package/src/people_network_memory/fixtures/__init__.py +6 -0
  24. package/src/people_network_memory/fixtures/eval.py +398 -0
  25. package/src/people_network_memory/fixtures/extractor_eval.py +364 -0
  26. package/src/people_network_memory/fixtures/generator.py +290 -0
  27. package/src/people_network_memory/fixtures/report.py +252 -0
  28. package/src/people_network_memory/graphiti_adapter/__init__.py +9 -0
  29. package/src/people_network_memory/graphiti_adapter/episode_formatter.py +70 -0
  30. package/src/people_network_memory/graphiti_adapter/graphiti_store.py +655 -0
  31. package/src/people_network_memory/graphiti_adapter/indexer.py +194 -0
  32. package/src/people_network_memory/graphiti_adapter/ontology.py +68 -0
  33. package/src/people_network_memory/harness_adapters/__init__.py +2 -0
  34. package/src/people_network_memory/harness_adapters/openclaw/__init__.py +9 -0
  35. package/src/people_network_memory/harness_adapters/openclaw/installer.py +577 -0
  36. package/src/people_network_memory/harness_adapters/openclaw/integration_eval.py +508 -0
  37. package/src/people_network_memory/harness_adapters/openclaw/smoke.py +292 -0
  38. package/src/people_network_memory/infrastructure/__init__.py +2 -0
  39. package/src/people_network_memory/infrastructure/archive_backup.py +171 -0
  40. package/src/people_network_memory/infrastructure/diagnostics.py +171 -0
  41. package/src/people_network_memory/infrastructure/embeddings.py +155 -0
  42. package/src/people_network_memory/infrastructure/file_store.py +129 -0
  43. package/src/people_network_memory/infrastructure/graphiti_promotion.py +212 -0
  44. package/src/people_network_memory/infrastructure/id_generator.py +40 -0
  45. package/src/people_network_memory/infrastructure/in_memory_store.py +1008 -0
  46. package/src/people_network_memory/infrastructure/llm_extractor.py +476 -0
  47. package/src/people_network_memory/infrastructure/llm_identity_advisor.py +200 -0
  48. package/src/people_network_memory/infrastructure/llm_judge.py +162 -0
  49. package/src/people_network_memory/infrastructure/redaction.py +21 -0
  50. package/src/people_network_memory/infrastructure/release_check.py +186 -0
  51. package/src/people_network_memory/infrastructure/retrieval_intent.py +98 -0
  52. package/src/people_network_memory/infrastructure/semantic_index.py +262 -0
  53. package/src/people_network_memory/mcp_server/__init__.py +2 -0
  54. package/src/people_network_memory/mcp_server/contracts.py +85 -0
  55. package/src/people_network_memory/mcp_server/runtime.py +133 -0
  56. package/src/people_network_memory/mcp_server/tools.py +588 -0
  57. package/src/people_network_memory/ports/__init__.py +2 -0
  58. package/src/people_network_memory/ports/errors.py +25 -0
  59. package/src/people_network_memory/ports/interfaces.py +103 -0
  60. package/src/people_network_memory/projection/__init__.py +6 -0
  61. package/src/people_network_memory/projection/builders.py +46 -0
@@ -0,0 +1,398 @@
1
+ """Fixture retrieval evaluation helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+
7
+ from people_network_memory.application.services import RecordInteractionService, RetrieveContextService
8
+ from people_network_memory.domain.models import RetrievalItem, SensitivityLabel
9
+ from people_network_memory.fixtures.generator import MockDataset
10
+ from people_network_memory.infrastructure.id_generator import SequentialIdGenerator
11
+ from people_network_memory.infrastructure.in_memory_store import InMemoryPeopleStore
12
+ from people_network_memory.infrastructure.retrieval_intent import text_answers_mentioned_query
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class EvalResult:
17
+ checked: int
18
+ top3_hits: int
19
+ top5_hits: int
20
+ evidence_complete: int
21
+ ingested_interactions: int = 0
22
+ returned_results: int = 0
23
+ evidence_results: int = 0
24
+ sensitive_leaks: int = 0
25
+ no_result_count: int = 0
26
+ candidate_queries: int = 0
27
+ skipped_unanswerable_queries: int = 0
28
+ cases: list[dict[str, object]] = field(default_factory=list)
29
+
30
+ @property
31
+ def recall_at_3(self) -> float:
32
+ return self.top3_hits / self.checked if self.checked else 0.0
33
+
34
+ @property
35
+ def recall_at_5(self) -> float:
36
+ return self.top5_hits / self.checked if self.checked else 0.0
37
+
38
+ @property
39
+ def evidence_rate(self) -> float:
40
+ return self.evidence_complete / self.checked if self.checked else 0.0
41
+
42
+ @property
43
+ def returned_result_evidence_rate(self) -> float:
44
+ if not self.returned_results:
45
+ return 0.0
46
+ return self.evidence_results / self.returned_results
47
+
48
+ def to_json(
49
+ self,
50
+ *,
51
+ include_cases: bool = True,
52
+ failures_only: bool = False,
53
+ ) -> dict[str, object]:
54
+ payload: dict[str, object] = {
55
+ "checked": self.checked,
56
+ "candidate_queries": self.candidate_queries,
57
+ "skipped_unanswerable_queries": self.skipped_unanswerable_queries,
58
+ "ingested_interactions": self.ingested_interactions,
59
+ "top3_hits": self.top3_hits,
60
+ "top5_hits": self.top5_hits,
61
+ "recall_at_3": round(self.recall_at_3, 4),
62
+ "recall_at_5": round(self.recall_at_5, 4),
63
+ "evidence_rate": round(self.evidence_rate, 4),
64
+ "returned_results": self.returned_results,
65
+ "evidence_results": self.evidence_results,
66
+ "returned_result_evidence_rate": round(self.returned_result_evidence_rate, 4),
67
+ "semantic_result_count": self._semantic_result_count(),
68
+ "sensitive_leaks": self.sensitive_leaks,
69
+ "no_result_count": self.no_result_count,
70
+ "category_breakdown": self._category_breakdown(),
71
+ "passes_v1_thresholds": self.recall_at_3 >= 0.70
72
+ and self.recall_at_5 >= 0.85
73
+ and self.returned_result_evidence_rate == 1.0
74
+ and self.sensitive_leaks == 0,
75
+ }
76
+ if include_cases:
77
+ payload["cases"] = [
78
+ case for case in self.cases if not failures_only or _case_failed(case)
79
+ ]
80
+ payload["case_filter"] = "failures_only" if failures_only else "all"
81
+ return payload
82
+
83
+ def _category_breakdown(self) -> dict[str, dict[str, object]]:
84
+ buckets: dict[str, dict[str, int]] = {}
85
+ for case in self.cases:
86
+ category = str(case.get("category", "unknown"))
87
+ bucket = buckets.setdefault(
88
+ category,
89
+ {
90
+ "checked": 0,
91
+ "top3_hits": 0,
92
+ "top5_hits": 0,
93
+ "evidence_complete": 0,
94
+ "sensitive_leaks": 0,
95
+ "no_result_count": 0,
96
+ },
97
+ )
98
+ bucket["checked"] += 1
99
+ bucket["top3_hits"] += 1 if case.get("hit_top3") else 0
100
+ bucket["top5_hits"] += 1 if case.get("hit_top5") else 0
101
+ bucket["evidence_complete"] += 1 if case.get("evidence_complete") else 0
102
+ bucket["sensitive_leaks"] += int(case.get("sensitive_leak_count", 0))
103
+ bucket["no_result_count"] += 1 if int(case.get("result_count", 0)) == 0 else 0
104
+ return {
105
+ category: {
106
+ **bucket,
107
+ "recall_at_3": round(bucket["top3_hits"] / bucket["checked"], 4),
108
+ "recall_at_5": round(bucket["top5_hits"] / bucket["checked"], 4),
109
+ "evidence_rate": round(bucket["evidence_complete"] / bucket["checked"], 4),
110
+ }
111
+ for category, bucket in sorted(buckets.items())
112
+ }
113
+
114
+ def _semantic_result_count(self) -> int:
115
+ count = 0
116
+ for case in self.cases:
117
+ for result in case.get("actual_results", []):
118
+ if isinstance(result, dict) and result.get("title") == "Semantic interaction match":
119
+ count += 1
120
+ return count
121
+
122
+
123
+ def evaluate_dataset(
124
+ dataset: MockDataset,
125
+ *,
126
+ max_interactions: int | None = None,
127
+ max_queries: int | None = None,
128
+ only_answerable: bool = True,
129
+ ) -> EvalResult:
130
+ store = InMemoryPeopleStore()
131
+ record = RecordInteractionService(
132
+ memory_store=store,
133
+ identity_index=store,
134
+ review_queue=store,
135
+ id_generator=SequentialIdGenerator(),
136
+ )
137
+ retrieve = RetrieveContextService(graph_search=store, review_queue=store)
138
+ return evaluate_services(
139
+ dataset,
140
+ record_service=record,
141
+ retrieve_service=retrieve,
142
+ max_interactions=max_interactions,
143
+ max_queries=max_queries,
144
+ only_answerable=only_answerable,
145
+ )
146
+
147
+
148
+ def evaluate_services(
149
+ dataset: MockDataset,
150
+ *,
151
+ record_service: RecordInteractionService,
152
+ retrieve_service: RetrieveContextService,
153
+ max_interactions: int | None = None,
154
+ max_queries: int | None = None,
155
+ only_answerable: bool = True,
156
+ ) -> EvalResult:
157
+ interactions = dataset.interactions[:max_interactions] if max_interactions else dataset.interactions
158
+ for interaction in interactions:
159
+ record_service.record(interaction)
160
+ return evaluate_retrieval_service(
161
+ dataset,
162
+ retrieve_service=retrieve_service,
163
+ indexed_interactions=len(interactions),
164
+ max_queries=max_queries,
165
+ only_answerable=only_answerable,
166
+ )
167
+
168
+
169
+ def evaluate_retrieval_service(
170
+ dataset: MockDataset,
171
+ *,
172
+ retrieve_service: RetrieveContextService,
173
+ indexed_interactions: int | None = None,
174
+ max_queries: int | None = None,
175
+ only_answerable: bool = True,
176
+ ) -> EvalResult:
177
+ indexed_count = indexed_interactions if indexed_interactions is not None else len(dataset.interactions)
178
+ candidate_queries = dataset.eval_queries[:max_queries] if max_queries else dataset.eval_queries
179
+ skipped_unanswerable_queries = 0
180
+ if indexed_interactions is not None and only_answerable:
181
+ queries = [
182
+ query
183
+ for query in candidate_queries
184
+ if query.source_interaction_index is None
185
+ or query.source_interaction_index < indexed_count
186
+ ]
187
+ skipped_unanswerable_queries = len(candidate_queries) - len(queries)
188
+ else:
189
+ queries = candidate_queries
190
+ top3_hits = 0
191
+ top5_hits = 0
192
+ evidence_complete = 0
193
+ returned_results = 0
194
+ evidence_results = 0
195
+ sensitive_leaks = 0
196
+ no_result_count = 0
197
+ cases: list[dict[str, object]] = []
198
+ for query_index, query in enumerate(queries, start=1):
199
+ response = retrieve_service.retrieve(
200
+ query.query,
201
+ limit=5,
202
+ sensitivity_policy="strict",
203
+ output_context="private",
204
+ )
205
+ returned_results += len(response.results)
206
+ evidence_results += sum(1 for item in response.results if item.evidence)
207
+ leak_count = sum(1 for item in response.results if _has_sensitive_leak(item.sensitivity))
208
+ sensitive_leaks += leak_count
209
+ if not response.results:
210
+ no_result_count += 1
211
+ evidence_ok = bool(response.results) and all(item.evidence for item in response.results)
212
+ if evidence_ok:
213
+ evidence_complete += 1
214
+ expected = query.expected_people + query.expected_terms
215
+ strict_top5 = _first_strict_match_rank(
216
+ category=query.category,
217
+ results=response.results,
218
+ expected_people=query.expected_people,
219
+ expected_terms=query.expected_terms,
220
+ max_rank=5,
221
+ )
222
+ strict_top3 = _first_strict_match_rank(
223
+ category=query.category,
224
+ results=response.results,
225
+ expected_people=query.expected_people,
226
+ expected_terms=query.expected_terms,
227
+ max_rank=3,
228
+ )
229
+ joined_top5 = " ".join(item.matched_text + " " + item.title for item in response.results)
230
+ matched_top5 = [item for item in expected if _contains(joined_top5, item)]
231
+ hit_top5 = strict_top5 is not None
232
+ hit_top3 = strict_top3 is not None
233
+ if hit_top5:
234
+ top5_hits += 1
235
+ if hit_top3:
236
+ top3_hits += 1
237
+ cases.append(
238
+ {
239
+ "query": query.query,
240
+ "query_index": query_index,
241
+ "category": query.category,
242
+ "source_interaction_index": query.source_interaction_index,
243
+ "expected": expected,
244
+ "expected_people": query.expected_people,
245
+ "expected_terms": query.expected_terms,
246
+ "matched_expected": matched_top5,
247
+ "missed_expected": [item for item in expected if item not in matched_top5],
248
+ "hit_top3": hit_top3,
249
+ "hit_top5": hit_top5,
250
+ "strict_match_rank": strict_top5,
251
+ "expected_person_rank": _first_expected_person_rank(
252
+ response.results,
253
+ query.expected_people,
254
+ max_rank=5,
255
+ ),
256
+ "result_count": len(response.results),
257
+ "evidence_complete": evidence_ok,
258
+ "sensitive_leak_count": leak_count,
259
+ "top_matches": [
260
+ {
261
+ "title": item.title,
262
+ "matched_text": item.matched_text,
263
+ "why_matched": item.why_matched,
264
+ "score": round(item.score, 4),
265
+ "person_ids": item.person_ids,
266
+ "sensitivity": [label.value for label in item.sensitivity],
267
+ "evidence_ids": [evidence.evidence_id for evidence in item.evidence],
268
+ }
269
+ for item in response.results[:3]
270
+ ],
271
+ "actual_results": [
272
+ _serialize_actual_result(rank, item)
273
+ for rank, item in enumerate(response.results, start=1)
274
+ ],
275
+ }
276
+ )
277
+ return EvalResult(
278
+ checked=len(queries),
279
+ ingested_interactions=indexed_count,
280
+ candidate_queries=len(candidate_queries),
281
+ skipped_unanswerable_queries=skipped_unanswerable_queries,
282
+ top3_hits=top3_hits,
283
+ top5_hits=top5_hits,
284
+ evidence_complete=evidence_complete,
285
+ returned_results=returned_results,
286
+ evidence_results=evidence_results,
287
+ sensitive_leaks=sensitive_leaks,
288
+ no_result_count=no_result_count,
289
+ cases=cases,
290
+ )
291
+
292
+
293
+ def _has_sensitive_leak(labels: list[SensitivityLabel]) -> bool:
294
+ blocked = {
295
+ SensitivityLabel.SENSITIVE,
296
+ SensitivityLabel.DO_NOT_SURFACE_UNPROMPTED,
297
+ }
298
+ return any(label in blocked for label in labels)
299
+
300
+
301
+ def _serialize_actual_result(rank: int, item: RetrievalItem) -> dict[str, object]:
302
+ return {
303
+ "rank": rank,
304
+ "item_id": item.item_id,
305
+ "kind": item.kind,
306
+ "title": item.title,
307
+ "matched_text": item.matched_text,
308
+ "why_matched": item.why_matched,
309
+ "score": round(item.score, 4),
310
+ "person_ids": item.person_ids,
311
+ "sensitivity": [label.value for label in item.sensitivity],
312
+ "is_secondhand": item.is_secondhand,
313
+ "evidence": [
314
+ {
315
+ "evidence_id": evidence.evidence_id,
316
+ "source_text": evidence.source_text,
317
+ "recorded_at": evidence.recorded_at.isoformat(),
318
+ "speaker_person_id": evidence.speaker_person_id,
319
+ "speaker_label": evidence.speaker_label,
320
+ "confidence": round(evidence.confidence, 4),
321
+ }
322
+ for evidence in item.evidence
323
+ ],
324
+ }
325
+
326
+
327
+ def _first_strict_match_rank(
328
+ *,
329
+ category: str,
330
+ results: list[RetrievalItem],
331
+ expected_people: list[str],
332
+ expected_terms: list[str],
333
+ max_rank: int,
334
+ ) -> int | None:
335
+ for rank, item in enumerate(results[:max_rank], start=1):
336
+ if _result_satisfies_query(
337
+ category=category,
338
+ item=item,
339
+ expected_people=expected_people,
340
+ expected_terms=expected_terms,
341
+ ):
342
+ return rank
343
+ return None
344
+
345
+
346
+ def _result_satisfies_query(
347
+ *,
348
+ category: str,
349
+ item: RetrievalItem,
350
+ expected_people: list[str],
351
+ expected_terms: list[str],
352
+ ) -> bool:
353
+ text = f"{item.title} {item.matched_text}"
354
+ if category == "mentioned":
355
+ target = expected_terms[0] if expected_terms else ""
356
+ return (
357
+ bool(expected_people)
358
+ and any(_contains(text, person) for person in expected_people)
359
+ and text_answers_mentioned_query(text, target)
360
+ )
361
+ if category == "follow_up":
362
+ return (
363
+ item.kind == "follow_up"
364
+ and all(_contains(text, person) for person in expected_people)
365
+ and all(_contains(text, term) for term in expected_terms)
366
+ )
367
+ if category == "profile":
368
+ return all(_contains(text, term) for term in expected_terms)
369
+ if category in {"vague", "bilingual"}:
370
+ return all(_contains(text, term) for term in expected_terms)
371
+ return all(_contains(text, item) for item in [*expected_people, *expected_terms])
372
+
373
+
374
+ def _first_expected_person_rank(
375
+ results: list[RetrievalItem],
376
+ expected_people: list[str],
377
+ *,
378
+ max_rank: int,
379
+ ) -> int | None:
380
+ if not expected_people:
381
+ return None
382
+ for rank, item in enumerate(results[:max_rank], start=1):
383
+ text = f"{item.title} {item.matched_text}"
384
+ if any(_contains(text, person) for person in expected_people):
385
+ return rank
386
+ return None
387
+
388
+
389
+ def _contains(text: str, expected: str) -> bool:
390
+ return expected.lower() in text.lower()
391
+
392
+
393
+ def _case_failed(case: dict[str, object]) -> bool:
394
+ return (
395
+ not bool(case.get("hit_top5"))
396
+ or not bool(case.get("evidence_complete"))
397
+ or int(case.get("sensitive_leak_count", 0)) > 0
398
+ )