@pentatonic-ai/ai-agent-sdk 0.10.5 → 0.10.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/packages/memory-engine-v2/compat/requirements.txt +6 -0
- package/packages/memory-engine-v2/compat/server.py +258 -18
- package/packages/memory-engine-v2/eval/recall_at_k.py +242 -0
- package/packages/memory-engine-v2/eval/retrieval_golden.seed.json +69 -0
- package/packages/memory-engine-v2/extractor-async/Dockerfile +1 -1
- package/packages/memory-engine-v2/extractor-async/extraction_schema.py +246 -0
- package/packages/memory-engine-v2/extractor-async/test_guided_json_parser.py +411 -0
- package/packages/memory-engine-v2/extractor-async/worker.py +361 -31
- package/packages/memory-engine-v2/resolution-queue-design.md +165 -0
- package/packages/memory-engine-v2/scripts/backfill_entity_reconciliation.py +11 -2
- package/packages/memory-engine-v2/scripts/backfill_sparse_vectors.py +369 -0
- package/packages/memory-engine-v2/scripts/bakeoff_guided_vs_kv.py +607 -0
- package/packages/memory-engine-v2/scripts/entity_resolution_v2.py +1041 -0
- package/packages/memory-engine-v2/tests/test_entity_resolution_v2.py +507 -0
- package/packages/memory-engine-v2/tests/test_hybrid_retrieval.py +810 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_instructions": [
|
|
3
|
+
"Retrieval-eval golden seed for hybrid BM25+RRF retrieval (BET 3).",
|
|
4
|
+
"Each question maps to the event_ids that a correct /search MUST surface.",
|
|
5
|
+
"HOW TO FILL IN: replace each 'EVENT_ID_PLACEHOLDER_*' with a real",
|
|
6
|
+
"`events.id` from the engine's Postgres (the same id /search returns as",
|
|
7
|
+
"`results[].id`). Find them by running the question through /search on",
|
|
8
|
+
"the live engine and/or `SELECT id, content FROM events WHERE arena = $1",
|
|
9
|
+
"AND content ILIKE '%<distinctive phrase>%'`, then manually verifying the",
|
|
10
|
+
"event actually answers the question. relevance: 2 = directly answers,",
|
|
11
|
+
"1 = useful supporting context. Leave a question's `relevant` empty to",
|
|
12
|
+
"exclude it from metrics (recall_at_k.py skips unfilled questions).",
|
|
13
|
+
"Add questions freely — lexical-heavy ones (exact names, codes, file",
|
|
14
|
+
"names, invoice numbers) are the cases hybrid BM25 is meant to win;",
|
|
15
|
+
"keep paraphrase-style ones too so dense regressions are caught.",
|
|
16
|
+
"No live calls happen from this file — it is data for eval/recall_at_k.py."
|
|
17
|
+
],
|
|
18
|
+
"version": 1,
|
|
19
|
+
"default_arena": "REPLACE_WITH_ARENA (e.g. pentatonic-team:usr_xxx)",
|
|
20
|
+
"questions": [
|
|
21
|
+
{
|
|
22
|
+
"id": "q-exact-name-1",
|
|
23
|
+
"class": "lexical",
|
|
24
|
+
"note": "Exact proper-noun lookup — BM25 should dominate.",
|
|
25
|
+
"query": "REPLACE: a question naming a specific person/company verbatim",
|
|
26
|
+
"relevant": [
|
|
27
|
+
{"event_id": "EVENT_ID_PLACEHOLDER_1A", "relevance": 2},
|
|
28
|
+
{"event_id": "EVENT_ID_PLACEHOLDER_1B", "relevance": 1}
|
|
29
|
+
]
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"id": "q-exact-code-1",
|
|
33
|
+
"class": "lexical",
|
|
34
|
+
"note": "Identifier/code/file-name lookup (e.g. invoice no, PR #, doc title) — the classic dense-retrieval miss.",
|
|
35
|
+
"query": "REPLACE: a question containing an exact identifier",
|
|
36
|
+
"relevant": [
|
|
37
|
+
{"event_id": "EVENT_ID_PLACEHOLDER_2A", "relevance": 2}
|
|
38
|
+
]
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"id": "q-paraphrase-1",
|
|
42
|
+
"class": "semantic",
|
|
43
|
+
"note": "Paraphrase with zero keyword overlap — dense should carry; guards against hybrid regressing semantic recall.",
|
|
44
|
+
"query": "REPLACE: a question that paraphrases the source content",
|
|
45
|
+
"relevant": [
|
|
46
|
+
{"event_id": "EVENT_ID_PLACEHOLDER_3A", "relevance": 2}
|
|
47
|
+
]
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"id": "q-temporal-1",
|
|
51
|
+
"class": "temporal",
|
|
52
|
+
"note": "'last meeting'-class query — checks the temporal re-rank still works on RRF-fused candidates.",
|
|
53
|
+
"query": "REPLACE: when did we last meet <person>?",
|
|
54
|
+
"relevant": [
|
|
55
|
+
{"event_id": "EVENT_ID_PLACEHOLDER_4A", "relevance": 2}
|
|
56
|
+
]
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"id": "q-mixed-1",
|
|
60
|
+
"class": "mixed",
|
|
61
|
+
"note": "Named entity + semantic intent — the case RRF fusion is built for.",
|
|
62
|
+
"query": "REPLACE: e.g. summary of the <Company> contract discussion",
|
|
63
|
+
"relevant": [
|
|
64
|
+
{"event_id": "EVENT_ID_PLACEHOLDER_5A", "relevance": 2},
|
|
65
|
+
{"event_id": "EVENT_ID_PLACEHOLDER_5B", "relevance": 1}
|
|
66
|
+
]
|
|
67
|
+
}
|
|
68
|
+
]
|
|
69
|
+
}
|
|
@@ -14,6 +14,6 @@ COPY worker.py .
|
|
|
14
14
|
# add a new sibling module, add it here too — missing COPY makes the
|
|
15
15
|
# container crash-loop on import at startup (observed 2026-06-08 deploy).
|
|
16
16
|
# The test_*.py files are intentionally excluded; pytest only, not runtime.
|
|
17
|
-
COPY noise_filter.py confidence.py entity_id.py sensitive_filter.py ./
|
|
17
|
+
COPY noise_filter.py confidence.py entity_id.py sensitive_filter.py extraction_schema.py ./
|
|
18
18
|
|
|
19
19
|
CMD ["python", "worker.py"]
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""extraction_schema — JSON Schema for guided-JSON distiller output.
|
|
2
|
+
|
|
3
|
+
Used by worker.py when DISTILL_OUTPUT_MODE=guided_json: the schema is
|
|
4
|
+
handed to vLLM's structured-output engine (xgrammar/outlines), which
|
|
5
|
+
masks logits during decoding so the model is INCAPABLE of emitting
|
|
6
|
+
schema-invalid bytes. This is what makes JSON output safe to revisit
|
|
7
|
+
after the 2026-05-18 removal of guided_json (see worker.py ~87-92):
|
|
8
|
+
|
|
9
|
+
- The old failure ("one bad char in a 13k-char JSON blob nukes the
|
|
10
|
+
whole 15-event chunk") is answered twice over: (a) guided decoding
|
|
11
|
+
means the server, not the model's goodwill, guarantees well-formed
|
|
12
|
+
output; (b) the schema is an ARRAY OF PER-EVENT OBJECTS, so one
|
|
13
|
+
event's content can never corrupt another event's parse — the only
|
|
14
|
+
residual failure mode is max_tokens truncation, and the parser
|
|
15
|
+
salvages every complete event object before the cut.
|
|
16
|
+
- The old "server half-ignored structured output" applied to the
|
|
17
|
+
Qwen3-VL-30B gateway deployment; the self-hosted Qwen2.5-7B vLLM
|
|
18
|
+
box enforces it.
|
|
19
|
+
|
|
20
|
+
This module is the single source of truth for the allowed entity
|
|
21
|
+
types and fact categories (worker.py imports them from here — leaf
|
|
22
|
+
module, so no circular import). The fact object carries EXACTLY the
|
|
23
|
+
same 5 semantic fields the KV parser yields (category, subject,
|
|
24
|
+
predicate, object, statement) so the upsert path is untouched.
|
|
25
|
+
|
|
26
|
+
Pure module — no I/O, stdlib only. Importable from worker.py, tests,
|
|
27
|
+
and scripts/bakeoff_guided_vs_kv.py without psycopg/httpx.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import json
|
|
33
|
+
from typing import Any
|
|
34
|
+
|
|
35
|
+
# Allowed-value enums. Moved here from worker.py (which now imports
|
|
36
|
+
# them) so the schema pins to the SAME constants the KV prompt and
|
|
37
|
+
# downstream normalisation use — change them in one place only.
|
|
38
|
+
ALLOWED_ENT_TYPES = {
|
|
39
|
+
"person", "org", "product", "place", "project",
|
|
40
|
+
"concept", "topic", "date", "other",
|
|
41
|
+
}
|
|
42
|
+
ALLOWED_FCT_CATEGORIES = {
|
|
43
|
+
"decision", "commitment", "state", "mention",
|
|
44
|
+
"observation", "preference",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
# Hard caps per event — mirror BATCH_SYSTEM_PROMPT's "HARD CAPS per
|
|
48
|
+
# event: 8 ENT, 6 FCT, 6 REL" so guided decoding enforces what the KV
|
|
49
|
+
# prompt could only request.
|
|
50
|
+
MAX_ENTITIES_PER_EVENT = 8
|
|
51
|
+
MAX_FACTS_PER_EVENT = 6
|
|
52
|
+
MAX_RELATIONSHIPS_PER_EVENT = 6
|
|
53
|
+
MAX_STATEMENT_CHARS = 140
|
|
54
|
+
|
|
55
|
+
# sorted() so the schema (and anything hashed from it) is byte-stable
|
|
56
|
+
# across processes — set iteration order is hash-randomised.
|
|
57
|
+
_ENT_TYPE_ENUM = sorted(ALLOWED_ENT_TYPES)
|
|
58
|
+
_FCT_CATEGORY_ENUM = sorted(ALLOWED_FCT_CATEGORIES)
|
|
59
|
+
|
|
60
|
+
EXTRACTION_SCHEMA: dict[str, Any] = {
|
|
61
|
+
"type": "object",
|
|
62
|
+
"properties": {
|
|
63
|
+
"events": {
|
|
64
|
+
"type": "array",
|
|
65
|
+
"items": {
|
|
66
|
+
"type": "object",
|
|
67
|
+
"properties": {
|
|
68
|
+
# Zero-indexed position of the event in the input
|
|
69
|
+
# batch — mirrors the `[event K]` header so parsed
|
|
70
|
+
# objects reattach to the right queue item even if
|
|
71
|
+
# the model reorders or a truncation drops the tail.
|
|
72
|
+
"index": {"type": "integer", "minimum": 0},
|
|
73
|
+
"entities": {
|
|
74
|
+
"type": "array",
|
|
75
|
+
"maxItems": MAX_ENTITIES_PER_EVENT,
|
|
76
|
+
"items": {
|
|
77
|
+
"type": "object",
|
|
78
|
+
"properties": {
|
|
79
|
+
"name": {"type": "string", "minLength": 1},
|
|
80
|
+
"type": {"type": "string", "enum": _ENT_TYPE_ENUM},
|
|
81
|
+
# Optional, person-only (prompt rule);
|
|
82
|
+
# promoted into aliases by the parser
|
|
83
|
+
# exactly like the KV 4th field.
|
|
84
|
+
"email": {"type": "string"},
|
|
85
|
+
},
|
|
86
|
+
"required": ["name", "type"],
|
|
87
|
+
"additionalProperties": False,
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
"facts": {
|
|
91
|
+
"type": "array",
|
|
92
|
+
"maxItems": MAX_FACTS_PER_EVENT,
|
|
93
|
+
"items": {
|
|
94
|
+
"type": "object",
|
|
95
|
+
# EXACTLY the 5 semantic fields the KV
|
|
96
|
+
# parser yields (FCT line = literal `FCT`
|
|
97
|
+
# + these 5) — upserts stay untouched.
|
|
98
|
+
"properties": {
|
|
99
|
+
"category": {"type": "string", "enum": _FCT_CATEGORY_ENUM},
|
|
100
|
+
"subject": {"type": "string", "minLength": 1},
|
|
101
|
+
"predicate": {"type": "string", "minLength": 1},
|
|
102
|
+
# null when absent — the KV format's `-`.
|
|
103
|
+
"object": {"type": ["string", "null"]},
|
|
104
|
+
"statement": {
|
|
105
|
+
"type": "string",
|
|
106
|
+
"minLength": 1,
|
|
107
|
+
"maxLength": MAX_STATEMENT_CHARS,
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
"required": [
|
|
111
|
+
"category", "subject", "predicate",
|
|
112
|
+
"object", "statement",
|
|
113
|
+
],
|
|
114
|
+
"additionalProperties": False,
|
|
115
|
+
},
|
|
116
|
+
},
|
|
117
|
+
"relationships": {
|
|
118
|
+
"type": "array",
|
|
119
|
+
"maxItems": MAX_RELATIONSHIPS_PER_EVENT,
|
|
120
|
+
"items": {
|
|
121
|
+
"type": "object",
|
|
122
|
+
# Mirror the KV REL fields: REL|from|to|rel_type.
|
|
123
|
+
"properties": {
|
|
124
|
+
"from": {"type": "string", "minLength": 1},
|
|
125
|
+
"to": {"type": "string", "minLength": 1},
|
|
126
|
+
"type": {"type": "string", "minLength": 1},
|
|
127
|
+
},
|
|
128
|
+
"required": ["from", "to", "type"],
|
|
129
|
+
"additionalProperties": False,
|
|
130
|
+
},
|
|
131
|
+
},
|
|
132
|
+
},
|
|
133
|
+
"required": ["index", "entities", "facts", "relationships"],
|
|
134
|
+
"additionalProperties": False,
|
|
135
|
+
},
|
|
136
|
+
},
|
|
137
|
+
},
|
|
138
|
+
"required": ["events"],
|
|
139
|
+
"additionalProperties": False,
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def extraction_schema_json() -> str:
|
|
144
|
+
"""Stable serialisation of EXTRACTION_SCHEMA (sorted keys) for
|
|
145
|
+
request bodies and fingerprinting."""
|
|
146
|
+
return json.dumps(EXTRACTION_SCHEMA, sort_keys=True, separators=(",", ":"))
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ----------------------------------------------------------------------
|
|
150
|
+
# Hand-rolled payload validation.
|
|
151
|
+
#
|
|
152
|
+
# The runtime guarantee comes from vLLM's logit masking, NOT from this
|
|
153
|
+
# function — it exists so tests and the bake-off script can check
|
|
154
|
+
# payloads against the same constraints without adding a `jsonschema`
|
|
155
|
+
# dependency (not currently in requirements.txt). It covers exactly the
|
|
156
|
+
# constraints EXTRACTION_SCHEMA expresses; if you extend the schema,
|
|
157
|
+
# extend this too.
|
|
158
|
+
# ----------------------------------------------------------------------
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def validate_payload(payload: Any) -> list[str]:
|
|
162
|
+
"""Return a list of human-readable violations ([] == valid)."""
|
|
163
|
+
errors: list[str] = []
|
|
164
|
+
if not isinstance(payload, dict):
|
|
165
|
+
return ["payload: not an object"]
|
|
166
|
+
if set(payload.keys()) - {"events"}:
|
|
167
|
+
errors.append("payload: unexpected top-level keys")
|
|
168
|
+
events = payload.get("events")
|
|
169
|
+
if not isinstance(events, list):
|
|
170
|
+
return errors + ["events: missing or not an array"]
|
|
171
|
+
for i, ev in enumerate(events):
|
|
172
|
+
if not isinstance(ev, dict):
|
|
173
|
+
errors.append(f"events[{i}]: not an object")
|
|
174
|
+
continue
|
|
175
|
+
if set(ev.keys()) - {"index", "entities", "facts", "relationships"}:
|
|
176
|
+
errors.append(f"events[{i}]: unexpected keys")
|
|
177
|
+
idx = ev.get("index")
|
|
178
|
+
if not isinstance(idx, int) or isinstance(idx, bool) or idx < 0:
|
|
179
|
+
errors.append(f"events[{i}].index: not a non-negative integer")
|
|
180
|
+
ents = ev.get("entities")
|
|
181
|
+
if not isinstance(ents, list):
|
|
182
|
+
errors.append(f"events[{i}].entities: not an array")
|
|
183
|
+
ents = []
|
|
184
|
+
if len(ents) > MAX_ENTITIES_PER_EVENT:
|
|
185
|
+
errors.append(f"events[{i}].entities: exceeds {MAX_ENTITIES_PER_EVENT}")
|
|
186
|
+
for j, e in enumerate(ents):
|
|
187
|
+
loc = f"events[{i}].entities[{j}]"
|
|
188
|
+
if not isinstance(e, dict):
|
|
189
|
+
errors.append(f"{loc}: not an object")
|
|
190
|
+
continue
|
|
191
|
+
if set(e.keys()) - {"name", "type", "email"}:
|
|
192
|
+
errors.append(f"{loc}: unexpected keys")
|
|
193
|
+
if not (isinstance(e.get("name"), str) and e.get("name")):
|
|
194
|
+
errors.append(f"{loc}.name: missing/empty")
|
|
195
|
+
if e.get("type") not in ALLOWED_ENT_TYPES:
|
|
196
|
+
errors.append(f"{loc}.type: not in ALLOWED_ENT_TYPES")
|
|
197
|
+
if "email" in e and not isinstance(e["email"], str):
|
|
198
|
+
errors.append(f"{loc}.email: not a string")
|
|
199
|
+
facts = ev.get("facts")
|
|
200
|
+
if not isinstance(facts, list):
|
|
201
|
+
errors.append(f"events[{i}].facts: not an array")
|
|
202
|
+
facts = []
|
|
203
|
+
if len(facts) > MAX_FACTS_PER_EVENT:
|
|
204
|
+
errors.append(f"events[{i}].facts: exceeds {MAX_FACTS_PER_EVENT}")
|
|
205
|
+
for j, f in enumerate(facts):
|
|
206
|
+
loc = f"events[{i}].facts[{j}]"
|
|
207
|
+
if not isinstance(f, dict):
|
|
208
|
+
errors.append(f"{loc}: not an object")
|
|
209
|
+
continue
|
|
210
|
+
required = {"category", "subject", "predicate", "object", "statement"}
|
|
211
|
+
if set(f.keys()) != required:
|
|
212
|
+
errors.append(f"{loc}: keys != {{category,subject,predicate,object,statement}}")
|
|
213
|
+
continue
|
|
214
|
+
if f["category"] not in ALLOWED_FCT_CATEGORIES:
|
|
215
|
+
errors.append(f"{loc}.category: not in ALLOWED_FCT_CATEGORIES")
|
|
216
|
+
if not (isinstance(f["subject"], str) and f["subject"]):
|
|
217
|
+
errors.append(f"{loc}.subject: missing/empty")
|
|
218
|
+
if not (isinstance(f["predicate"], str) and f["predicate"]):
|
|
219
|
+
errors.append(f"{loc}.predicate: missing/empty")
|
|
220
|
+
if f["object"] is not None and not isinstance(f["object"], str):
|
|
221
|
+
errors.append(f"{loc}.object: not string-or-null")
|
|
222
|
+
stmt = f["statement"]
|
|
223
|
+
if not (isinstance(stmt, str) and stmt):
|
|
224
|
+
errors.append(f"{loc}.statement: missing/empty")
|
|
225
|
+
elif len(stmt) > MAX_STATEMENT_CHARS:
|
|
226
|
+
errors.append(f"{loc}.statement: exceeds {MAX_STATEMENT_CHARS} chars")
|
|
227
|
+
rels = ev.get("relationships")
|
|
228
|
+
if not isinstance(rels, list):
|
|
229
|
+
errors.append(f"events[{i}].relationships: not an array")
|
|
230
|
+
rels = []
|
|
231
|
+
if len(rels) > MAX_RELATIONSHIPS_PER_EVENT:
|
|
232
|
+
errors.append(
|
|
233
|
+
f"events[{i}].relationships: exceeds {MAX_RELATIONSHIPS_PER_EVENT}"
|
|
234
|
+
)
|
|
235
|
+
for j, r in enumerate(rels):
|
|
236
|
+
loc = f"events[{i}].relationships[{j}]"
|
|
237
|
+
if not isinstance(r, dict):
|
|
238
|
+
errors.append(f"{loc}: not an object")
|
|
239
|
+
continue
|
|
240
|
+
if set(r.keys()) != {"from", "to", "type"}:
|
|
241
|
+
errors.append(f"{loc}: keys != {{from,to,type}}")
|
|
242
|
+
continue
|
|
243
|
+
for k in ("from", "to", "type"):
|
|
244
|
+
if not (isinstance(r[k], str) and r[k]):
|
|
245
|
+
errors.append(f"{loc}.{k}: missing/empty")
|
|
246
|
+
return errors
|