@pentatonic-ai/ai-agent-sdk 0.10.4 → 0.10.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,69 @@
1
+ {
2
+ "_instructions": [
3
+ "Retrieval-eval golden seed for hybrid BM25+RRF retrieval (BET 3).",
4
+ "Each question maps to the event_ids that a correct /search MUST surface.",
5
+ "HOW TO FILL IN: replace each 'EVENT_ID_PLACEHOLDER_*' with a real",
6
+ "`events.id` from the engine's Postgres (the same id /search returns as",
7
+ "`results[].id`). Find them by running the question through /search on",
8
+ "the live engine and/or `SELECT id, content FROM events WHERE arena = $1",
9
+ "AND content ILIKE '%<distinctive phrase>%'`, then manually verifying the",
10
+ "event actually answers the question. relevance: 2 = directly answers,",
11
+ "1 = useful supporting context. Leave a question's `relevant` empty to",
12
+ "exclude it from metrics (recall_at_k.py skips unfilled questions).",
13
+ "Add questions freely — lexical-heavy ones (exact names, codes, file",
14
+ "names, invoice numbers) are the cases hybrid BM25 is meant to win;",
15
+ "keep paraphrase-style ones too so dense regressions are caught.",
16
+ "No live calls happen from this file — it is data for eval/recall_at_k.py."
17
+ ],
18
+ "version": 1,
19
+ "default_arena": "REPLACE_WITH_ARENA (e.g. pentatonic-team:usr_xxx)",
20
+ "questions": [
21
+ {
22
+ "id": "q-exact-name-1",
23
+ "class": "lexical",
24
+ "note": "Exact proper-noun lookup — BM25 should dominate.",
25
+ "query": "REPLACE: a question naming a specific person/company verbatim",
26
+ "relevant": [
27
+ {"event_id": "EVENT_ID_PLACEHOLDER_1A", "relevance": 2},
28
+ {"event_id": "EVENT_ID_PLACEHOLDER_1B", "relevance": 1}
29
+ ]
30
+ },
31
+ {
32
+ "id": "q-exact-code-1",
33
+ "class": "lexical",
34
+ "note": "Identifier/code/file-name lookup (e.g. invoice no, PR #, doc title) — the classic dense-retrieval miss.",
35
+ "query": "REPLACE: a question containing an exact identifier",
36
+ "relevant": [
37
+ {"event_id": "EVENT_ID_PLACEHOLDER_2A", "relevance": 2}
38
+ ]
39
+ },
40
+ {
41
+ "id": "q-paraphrase-1",
42
+ "class": "semantic",
43
+ "note": "Paraphrase with zero keyword overlap — dense should carry; guards against hybrid regressing semantic recall.",
44
+ "query": "REPLACE: a question that paraphrases the source content",
45
+ "relevant": [
46
+ {"event_id": "EVENT_ID_PLACEHOLDER_3A", "relevance": 2}
47
+ ]
48
+ },
49
+ {
50
+ "id": "q-temporal-1",
51
+ "class": "temporal",
52
+ "note": "'last meeting'-class query — checks the temporal re-rank still works on RRF-fused candidates.",
53
+ "query": "REPLACE: when did we last meet <person>?",
54
+ "relevant": [
55
+ {"event_id": "EVENT_ID_PLACEHOLDER_4A", "relevance": 2}
56
+ ]
57
+ },
58
+ {
59
+ "id": "q-mixed-1",
60
+ "class": "mixed",
61
+ "note": "Named entity + semantic intent — the case RRF fusion is built for.",
62
+ "query": "REPLACE: e.g. summary of the <Company> contract discussion",
63
+ "relevant": [
64
+ {"event_id": "EVENT_ID_PLACEHOLDER_5A", "relevance": 2},
65
+ {"event_id": "EVENT_ID_PLACEHOLDER_5B", "relevance": 1}
66
+ ]
67
+ }
68
+ ]
69
+ }
@@ -14,6 +14,6 @@ COPY worker.py .
14
14
  # add a new sibling module, add it here too — missing COPY makes the
15
15
  # container crash-loop on import at startup (observed 2026-06-08 deploy).
16
16
  # The test_*.py files are intentionally excluded; pytest only, not runtime.
17
- COPY noise_filter.py confidence.py entity_id.py sensitive_filter.py ./
17
+ COPY noise_filter.py confidence.py entity_id.py sensitive_filter.py extraction_schema.py ./
18
18
 
19
19
  CMD ["python", "worker.py"]
@@ -0,0 +1,246 @@
1
+ """extraction_schema — JSON Schema for guided-JSON distiller output.
2
+
3
+ Used by worker.py when DISTILL_OUTPUT_MODE=guided_json: the schema is
4
+ handed to vLLM's structured-output engine (xgrammar/outlines), which
5
+ masks logits during decoding so the model is INCAPABLE of emitting
6
+ schema-invalid bytes. This is what makes JSON output safe to revisit
7
+ after the 2026-05-18 removal of guided_json (see worker.py ~87-92):
8
+
9
+ - The old failure ("one bad char in a 13k-char JSON blob nukes the
10
+ whole 15-event chunk") is answered twice over: (a) guided decoding
11
+ means the server, not the model's goodwill, guarantees well-formed
12
+ output; (b) the schema is an ARRAY OF PER-EVENT OBJECTS, so one
13
+ event's content can never corrupt another event's parse — the only
14
+ residual failure mode is max_tokens truncation, and the parser
15
+ salvages every complete event object before the cut.
16
+ - The old "server half-ignored structured output" applied to the
17
+ Qwen3-VL-30B gateway deployment; the self-hosted Qwen2.5-7B vLLM
18
+ box enforces it.
19
+
20
+ This module is the single source of truth for the allowed entity
21
+ types and fact categories (worker.py imports them from here — leaf
22
+ module, so no circular import). The fact object carries EXACTLY the
23
+ same 5 semantic fields the KV parser yields (category, subject,
24
+ predicate, object, statement) so the upsert path is untouched.
25
+
26
+ Pure module — no I/O, stdlib only. Importable from worker.py, tests,
27
+ and scripts/bakeoff_guided_vs_kv.py without psycopg/httpx.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import json
33
+ from typing import Any
34
+
35
+ # Allowed-value enums. Moved here from worker.py (which now imports
36
+ # them) so the schema pins to the SAME constants the KV prompt and
37
+ # downstream normalisation use — change them in one place only.
38
+ ALLOWED_ENT_TYPES = {
39
+ "person", "org", "product", "place", "project",
40
+ "concept", "topic", "date", "other",
41
+ }
42
+ ALLOWED_FCT_CATEGORIES = {
43
+ "decision", "commitment", "state", "mention",
44
+ "observation", "preference",
45
+ }
46
+
47
+ # Hard caps per event — mirror BATCH_SYSTEM_PROMPT's "HARD CAPS per
48
+ # event: 8 ENT, 6 FCT, 6 REL" so guided decoding enforces what the KV
49
+ # prompt could only request.
50
+ MAX_ENTITIES_PER_EVENT = 8
51
+ MAX_FACTS_PER_EVENT = 6
52
+ MAX_RELATIONSHIPS_PER_EVENT = 6
53
+ MAX_STATEMENT_CHARS = 140
54
+
55
+ # sorted() so the schema (and anything hashed from it) is byte-stable
56
+ # across processes — set iteration order is hash-randomised.
57
+ _ENT_TYPE_ENUM = sorted(ALLOWED_ENT_TYPES)
58
+ _FCT_CATEGORY_ENUM = sorted(ALLOWED_FCT_CATEGORIES)
59
+
60
+ EXTRACTION_SCHEMA: dict[str, Any] = {
61
+ "type": "object",
62
+ "properties": {
63
+ "events": {
64
+ "type": "array",
65
+ "items": {
66
+ "type": "object",
67
+ "properties": {
68
+ # Zero-indexed position of the event in the input
69
+ # batch — mirrors the `[event K]` header so parsed
70
+ # objects reattach to the right queue item even if
71
+ # the model reorders or a truncation drops the tail.
72
+ "index": {"type": "integer", "minimum": 0},
73
+ "entities": {
74
+ "type": "array",
75
+ "maxItems": MAX_ENTITIES_PER_EVENT,
76
+ "items": {
77
+ "type": "object",
78
+ "properties": {
79
+ "name": {"type": "string", "minLength": 1},
80
+ "type": {"type": "string", "enum": _ENT_TYPE_ENUM},
81
+ # Optional, person-only (prompt rule);
82
+ # promoted into aliases by the parser
83
+ # exactly like the KV 4th field.
84
+ "email": {"type": "string"},
85
+ },
86
+ "required": ["name", "type"],
87
+ "additionalProperties": False,
88
+ },
89
+ },
90
+ "facts": {
91
+ "type": "array",
92
+ "maxItems": MAX_FACTS_PER_EVENT,
93
+ "items": {
94
+ "type": "object",
95
+ # EXACTLY the 5 semantic fields the KV
96
+ # parser yields (FCT line = literal `FCT`
97
+ # + these 5) — upserts stay untouched.
98
+ "properties": {
99
+ "category": {"type": "string", "enum": _FCT_CATEGORY_ENUM},
100
+ "subject": {"type": "string", "minLength": 1},
101
+ "predicate": {"type": "string", "minLength": 1},
102
+ # null when absent — the KV format's `-`.
103
+ "object": {"type": ["string", "null"]},
104
+ "statement": {
105
+ "type": "string",
106
+ "minLength": 1,
107
+ "maxLength": MAX_STATEMENT_CHARS,
108
+ },
109
+ },
110
+ "required": [
111
+ "category", "subject", "predicate",
112
+ "object", "statement",
113
+ ],
114
+ "additionalProperties": False,
115
+ },
116
+ },
117
+ "relationships": {
118
+ "type": "array",
119
+ "maxItems": MAX_RELATIONSHIPS_PER_EVENT,
120
+ "items": {
121
+ "type": "object",
122
+ # Mirror the KV REL fields: REL|from|to|rel_type.
123
+ "properties": {
124
+ "from": {"type": "string", "minLength": 1},
125
+ "to": {"type": "string", "minLength": 1},
126
+ "type": {"type": "string", "minLength": 1},
127
+ },
128
+ "required": ["from", "to", "type"],
129
+ "additionalProperties": False,
130
+ },
131
+ },
132
+ },
133
+ "required": ["index", "entities", "facts", "relationships"],
134
+ "additionalProperties": False,
135
+ },
136
+ },
137
+ },
138
+ "required": ["events"],
139
+ "additionalProperties": False,
140
+ }
141
+
142
+
143
+ def extraction_schema_json() -> str:
144
+ """Stable serialisation of EXTRACTION_SCHEMA (sorted keys) for
145
+ request bodies and fingerprinting."""
146
+ return json.dumps(EXTRACTION_SCHEMA, sort_keys=True, separators=(",", ":"))
147
+
148
+
149
+ # ----------------------------------------------------------------------
150
+ # Hand-rolled payload validation.
151
+ #
152
+ # The runtime guarantee comes from vLLM's logit masking, NOT from this
153
+ # function — it exists so tests and the bake-off script can check
154
+ # payloads against the same constraints without adding a `jsonschema`
155
+ # dependency (not currently in requirements.txt). It covers exactly the
156
+ # constraints EXTRACTION_SCHEMA expresses; if you extend the schema,
157
+ # extend this too.
158
+ # ----------------------------------------------------------------------
159
+
160
+
161
+ def validate_payload(payload: Any) -> list[str]:
162
+ """Return a list of human-readable violations ([] == valid)."""
163
+ errors: list[str] = []
164
+ if not isinstance(payload, dict):
165
+ return ["payload: not an object"]
166
+ if set(payload.keys()) - {"events"}:
167
+ errors.append("payload: unexpected top-level keys")
168
+ events = payload.get("events")
169
+ if not isinstance(events, list):
170
+ return errors + ["events: missing or not an array"]
171
+ for i, ev in enumerate(events):
172
+ if not isinstance(ev, dict):
173
+ errors.append(f"events[{i}]: not an object")
174
+ continue
175
+ if set(ev.keys()) - {"index", "entities", "facts", "relationships"}:
176
+ errors.append(f"events[{i}]: unexpected keys")
177
+ idx = ev.get("index")
178
+ if not isinstance(idx, int) or isinstance(idx, bool) or idx < 0:
179
+ errors.append(f"events[{i}].index: not a non-negative integer")
180
+ ents = ev.get("entities")
181
+ if not isinstance(ents, list):
182
+ errors.append(f"events[{i}].entities: not an array")
183
+ ents = []
184
+ if len(ents) > MAX_ENTITIES_PER_EVENT:
185
+ errors.append(f"events[{i}].entities: exceeds {MAX_ENTITIES_PER_EVENT}")
186
+ for j, e in enumerate(ents):
187
+ loc = f"events[{i}].entities[{j}]"
188
+ if not isinstance(e, dict):
189
+ errors.append(f"{loc}: not an object")
190
+ continue
191
+ if set(e.keys()) - {"name", "type", "email"}:
192
+ errors.append(f"{loc}: unexpected keys")
193
+ if not (isinstance(e.get("name"), str) and e.get("name")):
194
+ errors.append(f"{loc}.name: missing/empty")
195
+ if e.get("type") not in ALLOWED_ENT_TYPES:
196
+ errors.append(f"{loc}.type: not in ALLOWED_ENT_TYPES")
197
+ if "email" in e and not isinstance(e["email"], str):
198
+ errors.append(f"{loc}.email: not a string")
199
+ facts = ev.get("facts")
200
+ if not isinstance(facts, list):
201
+ errors.append(f"events[{i}].facts: not an array")
202
+ facts = []
203
+ if len(facts) > MAX_FACTS_PER_EVENT:
204
+ errors.append(f"events[{i}].facts: exceeds {MAX_FACTS_PER_EVENT}")
205
+ for j, f in enumerate(facts):
206
+ loc = f"events[{i}].facts[{j}]"
207
+ if not isinstance(f, dict):
208
+ errors.append(f"{loc}: not an object")
209
+ continue
210
+ required = {"category", "subject", "predicate", "object", "statement"}
211
+ if set(f.keys()) != required:
212
+ errors.append(f"{loc}: keys != {{category,subject,predicate,object,statement}}")
213
+ continue
214
+ if f["category"] not in ALLOWED_FCT_CATEGORIES:
215
+ errors.append(f"{loc}.category: not in ALLOWED_FCT_CATEGORIES")
216
+ if not (isinstance(f["subject"], str) and f["subject"]):
217
+ errors.append(f"{loc}.subject: missing/empty")
218
+ if not (isinstance(f["predicate"], str) and f["predicate"]):
219
+ errors.append(f"{loc}.predicate: missing/empty")
220
+ if f["object"] is not None and not isinstance(f["object"], str):
221
+ errors.append(f"{loc}.object: not string-or-null")
222
+ stmt = f["statement"]
223
+ if not (isinstance(stmt, str) and stmt):
224
+ errors.append(f"{loc}.statement: missing/empty")
225
+ elif len(stmt) > MAX_STATEMENT_CHARS:
226
+ errors.append(f"{loc}.statement: exceeds {MAX_STATEMENT_CHARS} chars")
227
+ rels = ev.get("relationships")
228
+ if not isinstance(rels, list):
229
+ errors.append(f"events[{i}].relationships: not an array")
230
+ rels = []
231
+ if len(rels) > MAX_RELATIONSHIPS_PER_EVENT:
232
+ errors.append(
233
+ f"events[{i}].relationships: exceeds {MAX_RELATIONSHIPS_PER_EVENT}"
234
+ )
235
+ for j, r in enumerate(rels):
236
+ loc = f"events[{i}].relationships[{j}]"
237
+ if not isinstance(r, dict):
238
+ errors.append(f"{loc}: not an object")
239
+ continue
240
+ if set(r.keys()) != {"from", "to", "type"}:
241
+ errors.append(f"{loc}: keys != {{from,to,type}}")
242
+ continue
243
+ for k in ("from", "to", "type"):
244
+ if not (isinstance(r[k], str) and r[k]):
245
+ errors.append(f"{loc}.{k}: missing/empty")
246
+ return errors