@reconcrap/people-network-memory 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +476 -0
- package/docs/mcp_tools.md +138 -0
- package/harness_adapters/openclaw/mcp.managed.unix.template.json +25 -0
- package/harness_adapters/openclaw/mcp.managed.windows.template.json +26 -0
- package/harness_adapters/openclaw/mcp.template.json +14 -0
- package/harness_adapters/openclaw/ppl/SKILL.md +114 -0
- package/package.json +30 -0
- package/pyproject.toml +26 -0
- package/scripts/install_windows.ps1 +92 -0
- package/scripts/npm/people-memory.js +276 -0
- package/scripts/people_memory_bootstrap.py +247 -0
- package/scripts/run_graphiti_live_from_liepin.ps1 +87 -0
- package/scripts/run_tests_with_artifacts.ps1 +307 -0
- package/src/people_network_memory/__init__.py +6 -0
- package/src/people_network_memory/application/__init__.py +16 -0
- package/src/people_network_memory/application/normalization.py +1441 -0
- package/src/people_network_memory/application/services.py +921 -0
- package/src/people_network_memory/cli.py +1212 -0
- package/src/people_network_memory/config.py +268 -0
- package/src/people_network_memory/domain/__init__.py +55 -0
- package/src/people_network_memory/domain/identity.py +77 -0
- package/src/people_network_memory/domain/models.py +355 -0
- package/src/people_network_memory/fixtures/__init__.py +6 -0
- package/src/people_network_memory/fixtures/eval.py +398 -0
- package/src/people_network_memory/fixtures/extractor_eval.py +364 -0
- package/src/people_network_memory/fixtures/generator.py +290 -0
- package/src/people_network_memory/fixtures/report.py +252 -0
- package/src/people_network_memory/graphiti_adapter/__init__.py +9 -0
- package/src/people_network_memory/graphiti_adapter/episode_formatter.py +70 -0
- package/src/people_network_memory/graphiti_adapter/graphiti_store.py +655 -0
- package/src/people_network_memory/graphiti_adapter/indexer.py +194 -0
- package/src/people_network_memory/graphiti_adapter/ontology.py +68 -0
- package/src/people_network_memory/harness_adapters/__init__.py +2 -0
- package/src/people_network_memory/harness_adapters/openclaw/__init__.py +9 -0
- package/src/people_network_memory/harness_adapters/openclaw/installer.py +577 -0
- package/src/people_network_memory/harness_adapters/openclaw/integration_eval.py +508 -0
- package/src/people_network_memory/harness_adapters/openclaw/smoke.py +292 -0
- package/src/people_network_memory/infrastructure/__init__.py +2 -0
- package/src/people_network_memory/infrastructure/archive_backup.py +171 -0
- package/src/people_network_memory/infrastructure/diagnostics.py +171 -0
- package/src/people_network_memory/infrastructure/embeddings.py +155 -0
- package/src/people_network_memory/infrastructure/file_store.py +129 -0
- package/src/people_network_memory/infrastructure/graphiti_promotion.py +212 -0
- package/src/people_network_memory/infrastructure/id_generator.py +40 -0
- package/src/people_network_memory/infrastructure/in_memory_store.py +1008 -0
- package/src/people_network_memory/infrastructure/llm_extractor.py +476 -0
- package/src/people_network_memory/infrastructure/llm_identity_advisor.py +200 -0
- package/src/people_network_memory/infrastructure/llm_judge.py +162 -0
- package/src/people_network_memory/infrastructure/redaction.py +21 -0
- package/src/people_network_memory/infrastructure/release_check.py +186 -0
- package/src/people_network_memory/infrastructure/retrieval_intent.py +98 -0
- package/src/people_network_memory/infrastructure/semantic_index.py +262 -0
- package/src/people_network_memory/mcp_server/__init__.py +2 -0
- package/src/people_network_memory/mcp_server/contracts.py +85 -0
- package/src/people_network_memory/mcp_server/runtime.py +133 -0
- package/src/people_network_memory/mcp_server/tools.py +588 -0
- package/src/people_network_memory/ports/__init__.py +2 -0
- package/src/people_network_memory/ports/errors.py +25 -0
- package/src/people_network_memory/ports/interfaces.py +103 -0
- package/src/people_network_memory/projection/__init__.py +6 -0
- package/src/people_network_memory/projection/builders.py +46 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Optional OpenAI-compatible final retrieval judge."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
from people_network_memory.config import PeopleMemoryConfig
|
|
12
|
+
from people_network_memory.domain.models import RetrievalItem
|
|
13
|
+
from people_network_memory.ports.errors import ConfigError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class LlmJudgeSettings:
|
|
18
|
+
base_url: str
|
|
19
|
+
model: str
|
|
20
|
+
api_key: str
|
|
21
|
+
timeout_seconds: float = 30.0
|
|
22
|
+
response_format: str = "none"
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def from_config(cls, config: PeopleMemoryConfig) -> "LlmJudgeSettings":
|
|
26
|
+
missing: list[str] = []
|
|
27
|
+
if not config.llm_base_url:
|
|
28
|
+
missing.append("PEOPLE_MEMORY_LLM_BASE_URL")
|
|
29
|
+
if not config.llm_model:
|
|
30
|
+
missing.append("PEOPLE_MEMORY_LLM_MODEL")
|
|
31
|
+
if not config.llm_api_key:
|
|
32
|
+
missing.append("PEOPLE_MEMORY_LLM_API_KEY")
|
|
33
|
+
if missing:
|
|
34
|
+
raise ConfigError("LLM retrieval judge requires: " + ", ".join(missing))
|
|
35
|
+
return cls(
|
|
36
|
+
base_url=config.llm_base_url or "",
|
|
37
|
+
model=config.llm_model or "",
|
|
38
|
+
api_key=config.llm_api_key or "",
|
|
39
|
+
timeout_seconds=config.retrieval_judge_timeout_seconds,
|
|
40
|
+
response_format=config.llm_response_format,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class OpenAICompatibleRetrievalJudge:
|
|
45
|
+
"""Rerank already-retrieved candidates using a small JSON LLM decision."""
|
|
46
|
+
|
|
47
|
+
def __init__(self, settings: LlmJudgeSettings) -> None:
|
|
48
|
+
self._settings = settings
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_config(cls, config: PeopleMemoryConfig) -> "OpenAICompatibleRetrievalJudge":
|
|
52
|
+
return cls(LlmJudgeSettings.from_config(config))
|
|
53
|
+
|
|
54
|
+
def judge(
|
|
55
|
+
self,
|
|
56
|
+
query: str,
|
|
57
|
+
candidates: list[RetrievalItem],
|
|
58
|
+
*,
|
|
59
|
+
limit: int,
|
|
60
|
+
) -> list[RetrievalItem]:
|
|
61
|
+
if not candidates:
|
|
62
|
+
return []
|
|
63
|
+
try:
|
|
64
|
+
ranked_ids = self._request_ranked_ids(query, candidates, limit=limit)
|
|
65
|
+
except (httpx.HTTPError, ValueError, KeyError, TypeError, json.JSONDecodeError):
|
|
66
|
+
return candidates[:limit]
|
|
67
|
+
by_id = {item.item_id: item for item in candidates}
|
|
68
|
+
ranked = [by_id[item_id] for item_id in ranked_ids if item_id in by_id]
|
|
69
|
+
seen = {item.item_id for item in ranked}
|
|
70
|
+
ranked.extend(item for item in candidates if item.item_id not in seen)
|
|
71
|
+
return ranked[:limit]
|
|
72
|
+
|
|
73
|
+
def _request_ranked_ids(
|
|
74
|
+
self,
|
|
75
|
+
query: str,
|
|
76
|
+
candidates: list[RetrievalItem],
|
|
77
|
+
*,
|
|
78
|
+
limit: int,
|
|
79
|
+
) -> list[str]:
|
|
80
|
+
url = self._settings.base_url.rstrip("/") + "/chat/completions"
|
|
81
|
+
payload: dict[str, Any] = {
|
|
82
|
+
"model": self._settings.model,
|
|
83
|
+
"messages": [
|
|
84
|
+
{
|
|
85
|
+
"role": "system",
|
|
86
|
+
"content": (
|
|
87
|
+
"You are a retrieval judge for a personal network memory tool. "
|
|
88
|
+
"Rank only candidates that directly answer the query. Prefer one "
|
|
89
|
+
"candidate that satisfies all query constraints for the same person. "
|
|
90
|
+
"Return JSON with ranked_item_ids and rejected_item_ids."
|
|
91
|
+
),
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
"role": "user",
|
|
95
|
+
"content": json.dumps(
|
|
96
|
+
{
|
|
97
|
+
"query": query,
|
|
98
|
+
"limit": limit,
|
|
99
|
+
"candidates": [_candidate_payload(item) for item in candidates],
|
|
100
|
+
},
|
|
101
|
+
ensure_ascii=False,
|
|
102
|
+
),
|
|
103
|
+
},
|
|
104
|
+
],
|
|
105
|
+
"temperature": 0,
|
|
106
|
+
}
|
|
107
|
+
if self._settings.response_format == "json_object":
|
|
108
|
+
payload["response_format"] = {"type": "json_object"}
|
|
109
|
+
response = httpx.post(
|
|
110
|
+
url,
|
|
111
|
+
headers={
|
|
112
|
+
"Authorization": f"Bearer {self._settings.api_key}",
|
|
113
|
+
"Content-Type": "application/json",
|
|
114
|
+
},
|
|
115
|
+
json=payload,
|
|
116
|
+
timeout=self._settings.timeout_seconds,
|
|
117
|
+
)
|
|
118
|
+
response.raise_for_status()
|
|
119
|
+
content = response.json()["choices"][0]["message"]["content"]
|
|
120
|
+
parsed = _parse_json_object(content)
|
|
121
|
+
ranked = parsed.get("ranked_item_ids", [])
|
|
122
|
+
if not isinstance(ranked, list):
|
|
123
|
+
raise ValueError("LLM judge response ranked_item_ids must be a list")
|
|
124
|
+
return [str(item_id) for item_id in ranked]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _candidate_payload(item: RetrievalItem) -> dict[str, object]:
|
|
128
|
+
return {
|
|
129
|
+
"item_id": item.item_id,
|
|
130
|
+
"kind": item.kind,
|
|
131
|
+
"title": item.title,
|
|
132
|
+
"matched_text": item.matched_text,
|
|
133
|
+
"why_matched": item.why_matched,
|
|
134
|
+
"person_ids": item.person_ids,
|
|
135
|
+
"evidence": [
|
|
136
|
+
{
|
|
137
|
+
"evidence_id": evidence.evidence_id,
|
|
138
|
+
"source_text": evidence.source_text,
|
|
139
|
+
"recorded_at": evidence.recorded_at.isoformat(),
|
|
140
|
+
}
|
|
141
|
+
for evidence in item.evidence[:3]
|
|
142
|
+
],
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _parse_json_object(text: str) -> dict[str, Any]:
|
|
147
|
+
stripped = text.strip()
|
|
148
|
+
if stripped.startswith("```"):
|
|
149
|
+
lines = stripped.splitlines()
|
|
150
|
+
if len(lines) >= 3 and lines[-1].strip() == "```":
|
|
151
|
+
stripped = "\n".join(lines[1:-1]).strip()
|
|
152
|
+
try:
|
|
153
|
+
parsed = json.loads(stripped)
|
|
154
|
+
except json.JSONDecodeError:
|
|
155
|
+
start = stripped.find("{")
|
|
156
|
+
end = stripped.rfind("}")
|
|
157
|
+
if start < 0 or end <= start:
|
|
158
|
+
raise
|
|
159
|
+
parsed = json.loads(stripped[start : end + 1])
|
|
160
|
+
if not isinstance(parsed, dict):
|
|
161
|
+
raise ValueError("LLM judge response must be a JSON object")
|
|
162
|
+
return parsed
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Redaction helpers for logs and diagnostics."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
EMAIL_RE = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
|
|
9
|
+
PHONE_RE = re.compile(r"(?<!\d)(?:\+?\d[\d\s().-]{6,}\d)(?!\d)")
|
|
10
|
+
SENSITIVE_MARKERS = re.compile(
|
|
11
|
+
r"\b(?:leaving|salary|compensation|health|private|confidential|sensitive)\b",
|
|
12
|
+
flags=re.IGNORECASE,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def redact_sensitive_text(text: str) -> str:
|
|
17
|
+
redacted = EMAIL_RE.sub("[REDACTED_EMAIL]", text)
|
|
18
|
+
redacted = PHONE_RE.sub("[REDACTED_PHONE]", redacted)
|
|
19
|
+
redacted = SENSITIVE_MARKERS.sub("[REDACTED_SENSITIVE]", redacted)
|
|
20
|
+
return redacted
|
|
21
|
+
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Local V1 release readiness checks."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import tempfile
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from people_network_memory.config import PeopleMemoryConfig
|
|
11
|
+
from people_network_memory.fixtures.eval import evaluate_dataset
|
|
12
|
+
from people_network_memory.fixtures.generator import generate_mock_dataset
|
|
13
|
+
from people_network_memory.harness_adapters.openclaw.integration_eval import (
|
|
14
|
+
run_harness_memory_integration_eval,
|
|
15
|
+
)
|
|
16
|
+
from people_network_memory.harness_adapters.openclaw.smoke import run_openclaw_adapter_smoke
|
|
17
|
+
from people_network_memory.infrastructure.archive_backup import (
|
|
18
|
+
create_archive_backup,
|
|
19
|
+
restore_archive_backup,
|
|
20
|
+
)
|
|
21
|
+
from people_network_memory.infrastructure.redaction import redact_sensitive_text
|
|
22
|
+
from people_network_memory.mcp_server.contracts import public_tool_contracts, public_tool_names
|
|
23
|
+
from people_network_memory.mcp_server.runtime import build_runtime
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class ReleaseCheck:
|
|
28
|
+
name: str
|
|
29
|
+
ok: bool
|
|
30
|
+
detail: dict[str, Any]
|
|
31
|
+
|
|
32
|
+
def to_json(self) -> dict[str, Any]:
|
|
33
|
+
return {"name": self.name, "ok": self.ok, "detail": self.detail}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def run_local_release_check() -> dict[str, Any]:
|
|
37
|
+
checks = [
|
|
38
|
+
_check_test_mode_mcp_start(),
|
|
39
|
+
_check_tool_contracts(),
|
|
40
|
+
_check_fixture_eval(),
|
|
41
|
+
_check_openclaw_smoke(),
|
|
42
|
+
_check_harness_memory_integration(),
|
|
43
|
+
_check_backup_restore(),
|
|
44
|
+
_check_redaction(),
|
|
45
|
+
]
|
|
46
|
+
return {
|
|
47
|
+
"ok": all(check.ok for check in checks),
|
|
48
|
+
"scope": "local_v1",
|
|
49
|
+
"checks": [check.to_json() for check in checks],
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _check_test_mode_mcp_start() -> ReleaseCheck:
|
|
54
|
+
runtime = build_runtime(PeopleMemoryConfig(test_mode=True))
|
|
55
|
+
try:
|
|
56
|
+
tools = sorted(runtime.tools.tool_names())
|
|
57
|
+
expected = sorted(public_tool_names())
|
|
58
|
+
return ReleaseCheck(
|
|
59
|
+
name="mcp_test_mode_start",
|
|
60
|
+
ok=tools == expected,
|
|
61
|
+
detail={"tools": tools, "expected_tools": expected},
|
|
62
|
+
)
|
|
63
|
+
finally:
|
|
64
|
+
runtime.close()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _check_tool_contracts() -> ReleaseCheck:
|
|
68
|
+
contracts = public_tool_contracts()
|
|
69
|
+
names = [contract["name"] for contract in contracts]
|
|
70
|
+
return ReleaseCheck(
|
|
71
|
+
name="tool_contracts",
|
|
72
|
+
ok=names == public_tool_names() and all(contract["version"] == "v1" for contract in contracts),
|
|
73
|
+
detail={"contract_version": "v1", "tool_names": names},
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _check_fixture_eval() -> ReleaseCheck:
|
|
78
|
+
result = evaluate_dataset(generate_mock_dataset(seed=42))
|
|
79
|
+
payload = result.to_json(include_cases=False)
|
|
80
|
+
return ReleaseCheck(
|
|
81
|
+
name="fixture_retrieval_eval",
|
|
82
|
+
ok=bool(payload["passes_v1_thresholds"]),
|
|
83
|
+
detail={
|
|
84
|
+
"checked": payload["checked"],
|
|
85
|
+
"recall_at_3": payload["recall_at_3"],
|
|
86
|
+
"recall_at_5": payload["recall_at_5"],
|
|
87
|
+
"returned_result_evidence_rate": payload["returned_result_evidence_rate"],
|
|
88
|
+
"sensitive_leaks": payload["sensitive_leaks"],
|
|
89
|
+
"no_result_count": payload["no_result_count"],
|
|
90
|
+
},
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _check_openclaw_smoke() -> ReleaseCheck:
|
|
95
|
+
payload = run_openclaw_adapter_smoke()
|
|
96
|
+
return ReleaseCheck(
|
|
97
|
+
name="openclaw_adapter_smoke",
|
|
98
|
+
ok=bool(payload["ok"]),
|
|
99
|
+
detail={
|
|
100
|
+
"workflow_ok": payload["workflow"]["ok"],
|
|
101
|
+
"route_checks": [
|
|
102
|
+
{"prompt": item["prompt"], "ok": item["ok"], "actual": item["actual"]}
|
|
103
|
+
for item in payload["route_checks"]
|
|
104
|
+
],
|
|
105
|
+
},
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _check_harness_memory_integration() -> ReleaseCheck:
|
|
110
|
+
payload = run_harness_memory_integration_eval()
|
|
111
|
+
return ReleaseCheck(
|
|
112
|
+
name="harness_memory_integration_eval",
|
|
113
|
+
ok=bool(payload["ok"]),
|
|
114
|
+
detail={
|
|
115
|
+
"checked": payload["checked"],
|
|
116
|
+
"passed": payload["passed"],
|
|
117
|
+
"failed": payload["failed"],
|
|
118
|
+
"failed_cases": [
|
|
119
|
+
case["case"]["case_id"] for case in payload["cases"] if not case["ok"]
|
|
120
|
+
],
|
|
121
|
+
},
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _check_backup_restore() -> ReleaseCheck:
|
|
126
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
127
|
+
root = Path(tmp)
|
|
128
|
+
data_path = root / "data"
|
|
129
|
+
restore_path = root / "restore"
|
|
130
|
+
archive_path = root / "backup.zip"
|
|
131
|
+
runtime = build_runtime(
|
|
132
|
+
PeopleMemoryConfig(backend="local_json", data_path=str(data_path))
|
|
133
|
+
)
|
|
134
|
+
try:
|
|
135
|
+
runtime.tools.record_interaction(
|
|
136
|
+
{
|
|
137
|
+
"source_text": (
|
|
138
|
+
"Remember Alice Zhang. Alice Zhang works at Tencent Robotics "
|
|
139
|
+
"as product lead. Alice Zhang studied at Tsinghua."
|
|
140
|
+
)
|
|
141
|
+
}
|
|
142
|
+
)
|
|
143
|
+
finally:
|
|
144
|
+
runtime.close()
|
|
145
|
+
backup = create_archive_backup(
|
|
146
|
+
PeopleMemoryConfig(backend="local_json", data_path=str(data_path)),
|
|
147
|
+
archive_path,
|
|
148
|
+
)
|
|
149
|
+
restore = restore_archive_backup(
|
|
150
|
+
PeopleMemoryConfig(backend="local_json", data_path=str(restore_path)),
|
|
151
|
+
archive_path,
|
|
152
|
+
confirm="OVERWRITE",
|
|
153
|
+
)
|
|
154
|
+
restored_runtime = build_runtime(
|
|
155
|
+
PeopleMemoryConfig(backend="local_json", data_path=str(restore_path))
|
|
156
|
+
)
|
|
157
|
+
try:
|
|
158
|
+
card = restored_runtime.tools.get_person({"person_id": "person_0001"})
|
|
159
|
+
finally:
|
|
160
|
+
restored_runtime.close()
|
|
161
|
+
ok = bool(backup["ok"]) and bool(restore["ok"]) and bool(card["found"])
|
|
162
|
+
return ReleaseCheck(
|
|
163
|
+
name="backup_restore_roundtrip",
|
|
164
|
+
ok=ok,
|
|
165
|
+
detail={
|
|
166
|
+
"backup_people": backup["people"],
|
|
167
|
+
"restore_people": restore["people"],
|
|
168
|
+
"person_found": card["found"],
|
|
169
|
+
"display_name": card.get("display_name"),
|
|
170
|
+
},
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _check_redaction() -> ReleaseCheck:
|
|
175
|
+
sample = "Email alice@example.com or +1 415 555 1234 about private salary."
|
|
176
|
+
redacted = redact_sensitive_text(sample)
|
|
177
|
+
leaked = [
|
|
178
|
+
token
|
|
179
|
+
for token in ["alice@example.com", "+1 415 555 1234", "private", "salary"]
|
|
180
|
+
if token in redacted
|
|
181
|
+
]
|
|
182
|
+
return ReleaseCheck(
|
|
183
|
+
name="redaction",
|
|
184
|
+
ok=not leaked,
|
|
185
|
+
detail={"leaked_tokens": leaked, "redacted": redacted},
|
|
186
|
+
)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Small query-intent helpers for retrieval and eval.
|
|
2
|
+
|
|
3
|
+
These are intentionally deterministic. They do not replace semantic search;
|
|
4
|
+
they keep common social-memory intents from being scored by accidental token
|
|
5
|
+
overlap alone.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
MENTIONED_QUERY_RE = re.compile(
|
|
14
|
+
r"\bwho\s+(?:mentioned|said|told\s+me\s+about)\s+(?P<target>.+?)\s*$",
|
|
15
|
+
flags=re.IGNORECASE,
|
|
16
|
+
)
|
|
17
|
+
CHINESE_TARGET_WHO_MENTIONED_RE = re.compile(
|
|
18
|
+
r"(?P<target>[\u4e00-\u9fffA-Za-z0-9 _-]{1,40})是谁(?:提到|提起|说到)的"
|
|
19
|
+
)
|
|
20
|
+
CHINESE_MENTIONED_QUERY_RE = re.compile(r"(?:谁|誰).*(?:提到|提起|说到)(?P<target>.+?)$")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def mentioned_query_target(query: str) -> str | None:
|
|
24
|
+
stripped = query.strip(" ?.!。!?")
|
|
25
|
+
match = MENTIONED_QUERY_RE.search(stripped)
|
|
26
|
+
if match:
|
|
27
|
+
return _clean_target(match.group("target"))
|
|
28
|
+
match = CHINESE_TARGET_WHO_MENTIONED_RE.search(stripped)
|
|
29
|
+
if match:
|
|
30
|
+
return _clean_target(match.group("target"))
|
|
31
|
+
match = CHINESE_MENTIONED_QUERY_RE.search(stripped)
|
|
32
|
+
if match:
|
|
33
|
+
return _clean_target(match.group("target"))
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def is_follow_up_query(query: str) -> bool:
|
|
38
|
+
lowered = query.lower()
|
|
39
|
+
return any(
|
|
40
|
+
term in lowered
|
|
41
|
+
for term in [
|
|
42
|
+
"follow up",
|
|
43
|
+
"follow-up",
|
|
44
|
+
"promise",
|
|
45
|
+
"promised",
|
|
46
|
+
"commitment",
|
|
47
|
+
"what did i promise",
|
|
48
|
+
"owe",
|
|
49
|
+
"owed",
|
|
50
|
+
"what do i owe",
|
|
51
|
+
"todo",
|
|
52
|
+
"next step",
|
|
53
|
+
]
|
|
54
|
+
) or any(term in query for term in ["跟进", "承诺", "答应", "待办"])
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def text_answers_mentioned_query(text: str, target: str) -> bool:
|
|
58
|
+
lowered = _squash(text)
|
|
59
|
+
target_lower = _squash(target)
|
|
60
|
+
if not target_lower or target_lower not in lowered:
|
|
61
|
+
return False
|
|
62
|
+
if re.search(rf"\b(?:mentioned|said|told)\b.*\b{re.escape(target_lower)}\b", lowered):
|
|
63
|
+
return True
|
|
64
|
+
if re.search(rf"\b{re.escape(target_lower)}\b.*\b(?:was|is)\s+mentioned\b", lowered):
|
|
65
|
+
return True
|
|
66
|
+
compact = lowered.replace(" ", "")
|
|
67
|
+
compact_target = target_lower.replace(" ", "")
|
|
68
|
+
return (
|
|
69
|
+
f"提到{compact_target}" in compact
|
|
70
|
+
or f"提起{compact_target}" in compact
|
|
71
|
+
or f"说到{compact_target}" in compact
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def person_is_only_mentioned_target(
|
|
76
|
+
*,
|
|
77
|
+
person_label: str,
|
|
78
|
+
target_label: str,
|
|
79
|
+
mentioned_pairs: list[tuple[str, str | None]],
|
|
80
|
+
) -> bool:
|
|
81
|
+
if _squash(person_label) != _squash(target_label):
|
|
82
|
+
return False
|
|
83
|
+
person = _squash(person_label)
|
|
84
|
+
target = _squash(target_label)
|
|
85
|
+
was_target = any(_squash(mentioned) == target for mentioned, _ in mentioned_pairs)
|
|
86
|
+
was_speaker_for_target = any(
|
|
87
|
+
_squash(mentioned) == target and _squash(speaker or "") == person
|
|
88
|
+
for mentioned, speaker in mentioned_pairs
|
|
89
|
+
)
|
|
90
|
+
return was_target and not was_speaker_for_target
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _clean_target(value: str) -> str:
|
|
94
|
+
return value.strip(" ?.!。!?'\"")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _squash(value: str) -> str:
|
|
98
|
+
return re.sub(r"\s+", " ", value.strip().lower())
|