@reconcrap/people-network-memory 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +476 -0
- package/docs/mcp_tools.md +138 -0
- package/harness_adapters/openclaw/mcp.managed.unix.template.json +25 -0
- package/harness_adapters/openclaw/mcp.managed.windows.template.json +26 -0
- package/harness_adapters/openclaw/mcp.template.json +14 -0
- package/harness_adapters/openclaw/ppl/SKILL.md +114 -0
- package/package.json +30 -0
- package/pyproject.toml +26 -0
- package/scripts/install_windows.ps1 +92 -0
- package/scripts/npm/people-memory.js +276 -0
- package/scripts/people_memory_bootstrap.py +247 -0
- package/scripts/run_graphiti_live_from_liepin.ps1 +87 -0
- package/scripts/run_tests_with_artifacts.ps1 +307 -0
- package/src/people_network_memory/__init__.py +6 -0
- package/src/people_network_memory/application/__init__.py +16 -0
- package/src/people_network_memory/application/normalization.py +1441 -0
- package/src/people_network_memory/application/services.py +921 -0
- package/src/people_network_memory/cli.py +1212 -0
- package/src/people_network_memory/config.py +268 -0
- package/src/people_network_memory/domain/__init__.py +55 -0
- package/src/people_network_memory/domain/identity.py +77 -0
- package/src/people_network_memory/domain/models.py +355 -0
- package/src/people_network_memory/fixtures/__init__.py +6 -0
- package/src/people_network_memory/fixtures/eval.py +398 -0
- package/src/people_network_memory/fixtures/extractor_eval.py +364 -0
- package/src/people_network_memory/fixtures/generator.py +290 -0
- package/src/people_network_memory/fixtures/report.py +252 -0
- package/src/people_network_memory/graphiti_adapter/__init__.py +9 -0
- package/src/people_network_memory/graphiti_adapter/episode_formatter.py +70 -0
- package/src/people_network_memory/graphiti_adapter/graphiti_store.py +655 -0
- package/src/people_network_memory/graphiti_adapter/indexer.py +194 -0
- package/src/people_network_memory/graphiti_adapter/ontology.py +68 -0
- package/src/people_network_memory/harness_adapters/__init__.py +2 -0
- package/src/people_network_memory/harness_adapters/openclaw/__init__.py +9 -0
- package/src/people_network_memory/harness_adapters/openclaw/installer.py +577 -0
- package/src/people_network_memory/harness_adapters/openclaw/integration_eval.py +508 -0
- package/src/people_network_memory/harness_adapters/openclaw/smoke.py +292 -0
- package/src/people_network_memory/infrastructure/__init__.py +2 -0
- package/src/people_network_memory/infrastructure/archive_backup.py +171 -0
- package/src/people_network_memory/infrastructure/diagnostics.py +171 -0
- package/src/people_network_memory/infrastructure/embeddings.py +155 -0
- package/src/people_network_memory/infrastructure/file_store.py +129 -0
- package/src/people_network_memory/infrastructure/graphiti_promotion.py +212 -0
- package/src/people_network_memory/infrastructure/id_generator.py +40 -0
- package/src/people_network_memory/infrastructure/in_memory_store.py +1008 -0
- package/src/people_network_memory/infrastructure/llm_extractor.py +476 -0
- package/src/people_network_memory/infrastructure/llm_identity_advisor.py +200 -0
- package/src/people_network_memory/infrastructure/llm_judge.py +162 -0
- package/src/people_network_memory/infrastructure/redaction.py +21 -0
- package/src/people_network_memory/infrastructure/release_check.py +186 -0
- package/src/people_network_memory/infrastructure/retrieval_intent.py +98 -0
- package/src/people_network_memory/infrastructure/semantic_index.py +262 -0
- package/src/people_network_memory/mcp_server/__init__.py +2 -0
- package/src/people_network_memory/mcp_server/contracts.py +85 -0
- package/src/people_network_memory/mcp_server/runtime.py +133 -0
- package/src/people_network_memory/mcp_server/tools.py +588 -0
- package/src/people_network_memory/ports/__init__.py +2 -0
- package/src/people_network_memory/ports/errors.py +25 -0
- package/src/people_network_memory/ports/interfaces.py +103 -0
- package/src/people_network_memory/projection/__init__.py +6 -0
- package/src/people_network_memory/projection/builders.py +46 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""Human-readable eval report rendering."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def render_eval_markdown_report(
|
|
10
|
+
payload: dict[str, Any],
|
|
11
|
+
*,
|
|
12
|
+
title: str = "Personal Network Memory Retrieval Test Report",
|
|
13
|
+
compare_payload: dict[str, Any] | None = None,
|
|
14
|
+
) -> str:
|
|
15
|
+
comparison = _cases_by_index(compare_payload) if compare_payload else {}
|
|
16
|
+
lines: list[str] = [
|
|
17
|
+
f"# {title}",
|
|
18
|
+
"",
|
|
19
|
+
"## Summary",
|
|
20
|
+
"",
|
|
21
|
+
f"- Checked queries: {payload.get('checked')}",
|
|
22
|
+
f"- Recall@3: {payload.get('recall_at_3')}",
|
|
23
|
+
f"- Recall@5: {payload.get('recall_at_5')}",
|
|
24
|
+
f"- Evidence rate: {payload.get('evidence_rate')}",
|
|
25
|
+
f"- Returned-result evidence rate: {payload.get('returned_result_evidence_rate')}",
|
|
26
|
+
f"- Sensitive leaks: {payload.get('sensitive_leaks')}",
|
|
27
|
+
f"- Semantic result count: {payload.get('semantic_result_count')}",
|
|
28
|
+
f"- Passes V1 thresholds: {payload.get('passes_v1_thresholds')}",
|
|
29
|
+
*_rank_summary_lines(payload),
|
|
30
|
+
"",
|
|
31
|
+
]
|
|
32
|
+
if compare_payload:
|
|
33
|
+
lines.extend(
|
|
34
|
+
[
|
|
35
|
+
"## Comparison Baseline",
|
|
36
|
+
"",
|
|
37
|
+
f"- Baseline checked queries: {compare_payload.get('checked')}",
|
|
38
|
+
f"- Baseline Recall@3: {compare_payload.get('recall_at_3')}",
|
|
39
|
+
f"- Baseline Recall@5: {compare_payload.get('recall_at_5')}",
|
|
40
|
+
f"- Baseline passes V1 thresholds: {compare_payload.get('passes_v1_thresholds')}",
|
|
41
|
+
*_rank_summary_lines(compare_payload, prefix="Baseline "),
|
|
42
|
+
f"- Top result changed by judged retrieval: {_changed_top_count(payload, compare_payload)}",
|
|
43
|
+
"",
|
|
44
|
+
]
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
for case in payload.get("cases", []):
|
|
48
|
+
if not isinstance(case, dict):
|
|
49
|
+
continue
|
|
50
|
+
index = case.get("query_index")
|
|
51
|
+
baseline = comparison.get(index)
|
|
52
|
+
status = "PASS" if case.get("hit_top5") else "FAIL"
|
|
53
|
+
lines.extend(
|
|
54
|
+
[
|
|
55
|
+
f"## Case {index}: {case.get('category')} - {status}",
|
|
56
|
+
"",
|
|
57
|
+
f"Query: `{case.get('query')}`",
|
|
58
|
+
"",
|
|
59
|
+
"Expected Results:",
|
|
60
|
+
_expected_line(case),
|
|
61
|
+
"",
|
|
62
|
+
]
|
|
63
|
+
)
|
|
64
|
+
if baseline:
|
|
65
|
+
lines.extend(
|
|
66
|
+
[
|
|
67
|
+
"Deterministic Actual Results:",
|
|
68
|
+
*_actual_lines(baseline.get("actual_results", [])),
|
|
69
|
+
"",
|
|
70
|
+
"LLM-Judged Actual Results:",
|
|
71
|
+
*_actual_lines(case.get("actual_results", [])),
|
|
72
|
+
"",
|
|
73
|
+
]
|
|
74
|
+
)
|
|
75
|
+
else:
|
|
76
|
+
lines.extend(
|
|
77
|
+
[
|
|
78
|
+
"Actual Results:",
|
|
79
|
+
*_actual_lines(case.get("actual_results", [])),
|
|
80
|
+
"",
|
|
81
|
+
]
|
|
82
|
+
)
|
|
83
|
+
lines.extend(
|
|
84
|
+
[
|
|
85
|
+
"Evaluation Reasoning:",
|
|
86
|
+
_reasoning_line(case),
|
|
87
|
+
"",
|
|
88
|
+
]
|
|
89
|
+
)
|
|
90
|
+
if baseline:
|
|
91
|
+
lines.extend(
|
|
92
|
+
[
|
|
93
|
+
"Comparison Note:",
|
|
94
|
+
_comparison_line(case, baseline),
|
|
95
|
+
"",
|
|
96
|
+
]
|
|
97
|
+
)
|
|
98
|
+
return "\n".join(lines).rstrip() + "\n"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def write_eval_markdown_report(
|
|
102
|
+
payload: dict[str, Any],
|
|
103
|
+
output: str | Path,
|
|
104
|
+
*,
|
|
105
|
+
title: str = "Personal Network Memory Retrieval Test Report",
|
|
106
|
+
compare_payload: dict[str, Any] | None = None,
|
|
107
|
+
) -> None:
|
|
108
|
+
path = Path(output)
|
|
109
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
110
|
+
path.write_text(
|
|
111
|
+
render_eval_markdown_report(payload, title=title, compare_payload=compare_payload),
|
|
112
|
+
encoding="utf-8",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _cases_by_index(payload: dict[str, Any] | None) -> dict[object, dict[str, Any]]:
|
|
117
|
+
if not payload:
|
|
118
|
+
return {}
|
|
119
|
+
cases: dict[object, dict[str, Any]] = {}
|
|
120
|
+
for case in payload.get("cases", []):
|
|
121
|
+
if isinstance(case, dict):
|
|
122
|
+
cases[case.get("query_index")] = case
|
|
123
|
+
return cases
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _rank_summary_lines(payload: dict[str, Any], *, prefix: str = "") -> list[str]:
|
|
127
|
+
cases = [case for case in payload.get("cases", []) if isinstance(case, dict)]
|
|
128
|
+
if not cases:
|
|
129
|
+
return []
|
|
130
|
+
strict_top1 = sum(1 for case in cases if case.get("strict_match_rank") == 1)
|
|
131
|
+
expected_top1 = sum(1 for case in cases if case.get("expected_person_rank") == 1)
|
|
132
|
+
expected_top3 = sum(
|
|
133
|
+
1
|
|
134
|
+
for case in cases
|
|
135
|
+
if isinstance(case.get("expected_person_rank"), int)
|
|
136
|
+
and case["expected_person_rank"] <= 3
|
|
137
|
+
)
|
|
138
|
+
expected_top5 = sum(
|
|
139
|
+
1
|
|
140
|
+
for case in cases
|
|
141
|
+
if isinstance(case.get("expected_person_rank"), int)
|
|
142
|
+
and case["expected_person_rank"] <= 5
|
|
143
|
+
)
|
|
144
|
+
total = len(cases)
|
|
145
|
+
return [
|
|
146
|
+
f"- {prefix}Strict match at rank 1: {strict_top1}/{total}",
|
|
147
|
+
f"- {prefix}Expected person at rank 1: {expected_top1}/{total}",
|
|
148
|
+
f"- {prefix}Expected person within top 3: {expected_top3}/{total}",
|
|
149
|
+
f"- {prefix}Expected person within top 5: {expected_top5}/{total}",
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _changed_top_count(payload: dict[str, Any], baseline: dict[str, Any]) -> str:
|
|
154
|
+
baseline_cases = _cases_by_index(baseline)
|
|
155
|
+
changed = 0
|
|
156
|
+
checked = 0
|
|
157
|
+
for case in payload.get("cases", []):
|
|
158
|
+
if not isinstance(case, dict):
|
|
159
|
+
continue
|
|
160
|
+
base_case = baseline_cases.get(case.get("query_index"))
|
|
161
|
+
if not base_case:
|
|
162
|
+
continue
|
|
163
|
+
checked += 1
|
|
164
|
+
if _top_item_id(case) != _top_item_id(base_case):
|
|
165
|
+
changed += 1
|
|
166
|
+
return f"{changed}/{checked}" if checked else "0/0"
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _expected_line(case: dict[str, Any]) -> str:
|
|
170
|
+
people = ", ".join(str(item) for item in case.get("expected_people", [])) or "None"
|
|
171
|
+
terms = ", ".join(str(item) for item in case.get("expected_terms", [])) or "None"
|
|
172
|
+
return f"- Expected people: {people}; expected terms/facts: {terms}"
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _actual_lines(results: object) -> list[str]:
|
|
176
|
+
if not isinstance(results, list) or not results:
|
|
177
|
+
return ["- No results returned."]
|
|
178
|
+
lines: list[str] = []
|
|
179
|
+
for result in results[:5]:
|
|
180
|
+
if not isinstance(result, dict):
|
|
181
|
+
continue
|
|
182
|
+
evidence = result.get("evidence", [])
|
|
183
|
+
evidence_text = ""
|
|
184
|
+
if isinstance(evidence, list) and evidence:
|
|
185
|
+
first = evidence[0]
|
|
186
|
+
if isinstance(first, dict):
|
|
187
|
+
evidence_text = str(first.get("source_text", ""))
|
|
188
|
+
lines.append(
|
|
189
|
+
"- Rank {rank}: {title} ({kind}) - {matched_text}".format(
|
|
190
|
+
rank=result.get("rank"),
|
|
191
|
+
title=result.get("title"),
|
|
192
|
+
kind=result.get("kind"),
|
|
193
|
+
matched_text=_compact(str(result.get("matched_text", ""))),
|
|
194
|
+
)
|
|
195
|
+
)
|
|
196
|
+
lines.append(f" - Why returned: {_compact(str(result.get('why_matched', '')))}")
|
|
197
|
+
if evidence_text:
|
|
198
|
+
lines.append(f" - Evidence: {_compact(evidence_text)}")
|
|
199
|
+
return lines
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _reasoning_line(case: dict[str, Any]) -> str:
|
|
203
|
+
category = str(case.get("category"))
|
|
204
|
+
strict_rank = case.get("strict_match_rank")
|
|
205
|
+
expected_rank = case.get("expected_person_rank")
|
|
206
|
+
if strict_rank is None:
|
|
207
|
+
return (
|
|
208
|
+
"- FAIL: no top-5 result satisfied the intent-aware rule for this category. "
|
|
209
|
+
f"Matched expected strings: {case.get('matched_expected', [])}; "
|
|
210
|
+
f"missed: {case.get('missed_expected', [])}."
|
|
211
|
+
)
|
|
212
|
+
if category == "mentioned":
|
|
213
|
+
rule = "a result must identify the speaker and state that they mentioned the target"
|
|
214
|
+
elif category == "follow_up":
|
|
215
|
+
rule = "a result must be a follow-up item for the requested person"
|
|
216
|
+
elif category == "profile":
|
|
217
|
+
rule = "one result must combine the requested profile terms for a person"
|
|
218
|
+
elif category in {"vague", "bilingual"}:
|
|
219
|
+
rule = "one result must include the requested vague place/topic clues"
|
|
220
|
+
else:
|
|
221
|
+
rule = "one result must satisfy all expected terms"
|
|
222
|
+
suffix = (
|
|
223
|
+
f" Expected fixture person first appears at rank {expected_rank}."
|
|
224
|
+
if expected_rank is not None
|
|
225
|
+
else ""
|
|
226
|
+
)
|
|
227
|
+
return f"- PASS: strict match at rank {strict_rank}; {rule}.{suffix}"
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _comparison_line(case: dict[str, Any], baseline: dict[str, Any]) -> str:
|
|
231
|
+
judged_top = _top_item_id(case)
|
|
232
|
+
baseline_top = _top_item_id(baseline)
|
|
233
|
+
if judged_top == baseline_top:
|
|
234
|
+
return "- The LLM-judged top result is the same as the deterministic top result."
|
|
235
|
+
return (
|
|
236
|
+
"- The LLM judge changed the top result from "
|
|
237
|
+
f"`{baseline_top}` to `{judged_top}`."
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _top_item_id(case: dict[str, Any]) -> str:
|
|
242
|
+
results = case.get("actual_results", [])
|
|
243
|
+
if isinstance(results, list) and results and isinstance(results[0], dict):
|
|
244
|
+
return str(results[0].get("item_id") or results[0].get("title"))
|
|
245
|
+
return "none"
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _compact(text: str, *, limit: int = 420) -> str:
|
|
249
|
+
compacted = " ".join(text.split())
|
|
250
|
+
if len(compacted) <= limit:
|
|
251
|
+
return compacted
|
|
252
|
+
return compacted[: limit - 3].rstrip() + "..."
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Compact Graphiti episode formatting.
|
|
2
|
+
|
|
3
|
+
Graphiti's extraction step is LLM-backed, so large structured JSON episodes can
|
|
4
|
+
be slow and fragile. This formatter keeps the social-memory semantics explicit
|
|
5
|
+
while sending short text snippets to Graphiti.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from people_network_memory.domain.models import SocialInteraction
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def format_graphiti_episode(interaction: SocialInteraction) -> str:
|
|
14
|
+
lines: list[str] = []
|
|
15
|
+
if interaction.occurred_at:
|
|
16
|
+
lines.append(f"Date: {interaction.occurred_at.date().isoformat()}.")
|
|
17
|
+
if interaction.interaction_type:
|
|
18
|
+
lines.append(f"Interaction type: {interaction.interaction_type}.")
|
|
19
|
+
if interaction.place:
|
|
20
|
+
lines.append(f"Place: {interaction.place}.")
|
|
21
|
+
participants = [participant.person.label for participant in interaction.participants]
|
|
22
|
+
if participants:
|
|
23
|
+
lines.append("Participants: " + ", ".join(participants) + ".")
|
|
24
|
+
mentioned = [
|
|
25
|
+
_mentioned_sentence(index, mention)
|
|
26
|
+
for index, mention in enumerate(interaction.mentioned_people, start=1)
|
|
27
|
+
]
|
|
28
|
+
lines.extend(mentioned)
|
|
29
|
+
for topic in interaction.topics:
|
|
30
|
+
lines.append(f"Discussed topic: {topic}.")
|
|
31
|
+
for fact in interaction.direct_facts:
|
|
32
|
+
lines.append(
|
|
33
|
+
f"Direct fact: {fact.subject.label} {fact.predicate.replace('_', ' ')} {fact.value}."
|
|
34
|
+
)
|
|
35
|
+
for claim in interaction.attributed_claims:
|
|
36
|
+
lines.append(
|
|
37
|
+
"Attributed claim: "
|
|
38
|
+
f"{claim.speaker.label} said about {claim.subject.label}: {claim.claim_text}"
|
|
39
|
+
)
|
|
40
|
+
for relationship in interaction.relationships:
|
|
41
|
+
lines.append(
|
|
42
|
+
"Relationship: "
|
|
43
|
+
f"{relationship.source.label} {relationship.relationship_type.replace('_', ' ')} "
|
|
44
|
+
f"{relationship.target.label}."
|
|
45
|
+
)
|
|
46
|
+
for follow_up in interaction.follow_ups:
|
|
47
|
+
related = ", ".join(person.label for person in follow_up.related_people)
|
|
48
|
+
if related:
|
|
49
|
+
lines.append(f"Follow-up: {follow_up.description} Related people: {related}.")
|
|
50
|
+
else:
|
|
51
|
+
lines.append(f"Follow-up: {follow_up.description}.")
|
|
52
|
+
if interaction.sensitivity:
|
|
53
|
+
labels = ", ".join(label.value for label in interaction.sensitivity)
|
|
54
|
+
lines.append(f"Sensitivity labels: {labels}.")
|
|
55
|
+
lines.append(f"Original note: {interaction.source_text}")
|
|
56
|
+
return "\n".join(lines)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _mentioned_sentence(index: int, mention: object) -> str:
|
|
60
|
+
person = getattr(mention, "person")
|
|
61
|
+
mentioned_by = getattr(mention, "mentioned_by", None)
|
|
62
|
+
context = getattr(mention, "context", None)
|
|
63
|
+
prefix = f"Mentioned person {index}: "
|
|
64
|
+
if mentioned_by:
|
|
65
|
+
sentence = f"{mentioned_by.label} mentioned {person.label}"
|
|
66
|
+
else:
|
|
67
|
+
sentence = f"{person.label} was mentioned"
|
|
68
|
+
if context:
|
|
69
|
+
sentence += f" in context: {context}"
|
|
70
|
+
return prefix + sentence + "."
|