@pentatonic-ai/ai-agent-sdk 0.10.5 → 0.10.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. package/dist/index.cjs +1 -1
  2. package/dist/index.js +1 -1
  3. package/package.json +1 -1
  4. package/packages/memory-engine-v2/compat/requirements.txt +6 -0
  5. package/packages/memory-engine-v2/compat/server.py +258 -18
  6. package/packages/memory-engine-v2/docker-compose.aws.yml +62 -1
  7. package/packages/memory-engine-v2/docker-compose.yml +8 -1
  8. package/packages/memory-engine-v2/eval/recall_at_k.py +242 -0
  9. package/packages/memory-engine-v2/eval/retrieval_golden.seed.json +69 -0
  10. package/packages/memory-engine-v2/extractor-async/Dockerfile +1 -1
  11. package/packages/memory-engine-v2/extractor-async/extraction_schema.py +246 -0
  12. package/packages/memory-engine-v2/extractor-async/test_guided_json_parser.py +455 -0
  13. package/packages/memory-engine-v2/extractor-async/worker.py +391 -31
  14. package/packages/memory-engine-v2/extractor-sync/server.py +6 -2
  15. package/packages/memory-engine-v2/extractor-sync/test_paired_extraction.py +82 -1
  16. package/packages/memory-engine-v2/org-model/migrations/004_source_kind_code_reference.sql +12 -0
  17. package/packages/memory-engine-v2/org-model/migrations/005_fk_indexes.sql +20 -0
  18. package/packages/memory-engine-v2/resolution-queue-design.md +165 -0
  19. package/packages/memory-engine-v2/scripts/backfill_entity_reconciliation.py +11 -2
  20. package/packages/memory-engine-v2/scripts/backfill_sparse_vectors.py +369 -0
  21. package/packages/memory-engine-v2/scripts/bakeoff_guided_vs_kv.py +607 -0
  22. package/packages/memory-engine-v2/scripts/entity_resolution_v2.py +1041 -0
  23. package/packages/memory-engine-v2/tests/test_entity_resolution_v2.py +507 -0
  24. package/packages/memory-engine-v2/tests/test_hybrid_retrieval.py +810 -0
@@ -0,0 +1,455 @@
1
+ """Unit tests for the guided-JSON output mode (DISTILL_OUTPUT_MODE).
2
+
3
+ Covers: the extraction schema (enum pinning, caps, validation), the
4
+ _parse_guided_json sibling parser (well-formed, malformed, truncated),
5
+ kv↔guided parse parity (identical upsert-ready dicts), and the flag
6
+ contract (default 'kv' = byte-identical request body + prompt hash to
7
+ the pre-flag worker — the whole change is a no-op until an operator
8
+ flips the env var).
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import hashlib
14
+ import importlib.util
15
+ import json
16
+ from pathlib import Path
17
+
18
+ import pytest
19
+
20
+ _THIS = Path(__file__).resolve().parent
21
+
22
+
23
+ def _load_worker(name: str = "extractor_async_worker"):
24
+ spec = importlib.util.spec_from_file_location(name, _THIS / "worker.py")
25
+ assert spec and spec.loader
26
+ mod = importlib.util.module_from_spec(spec)
27
+ spec.loader.exec_module(mod)
28
+ return mod
29
+
30
+
31
+ try:
32
+ worker = _load_worker()
33
+ except ImportError as e:
34
+ pytest.skip(f"extractor-async deps unavailable: {e}", allow_module_level=True)
35
+
36
+ import extraction_schema as xs
37
+
38
+
39
+ # ----------------------------------------------------------------------
40
+ # Schema structure — enums pinned to the shared constants, caps mirror
41
+ # the prompt's hard caps, statement length capped.
42
+ # ----------------------------------------------------------------------
43
+
44
+ def _event_item_schema() -> dict:
45
+ return xs.EXTRACTION_SCHEMA["properties"]["events"]["items"]
46
+
47
+
48
+ def test_schema_enums_pin_to_shared_constants() -> None:
49
+ """The schema's enums and worker's allowed-sets are the SAME
50
+ constants (worker imports them from extraction_schema) — change
51
+ one place, everything moves together."""
52
+ assert worker.ALLOWED_ENT_TYPES is xs.ALLOWED_ENT_TYPES
53
+ assert worker.ALLOWED_FCT_CATEGORIES is xs.ALLOWED_FCT_CATEGORIES
54
+ item = _event_item_schema()
55
+ ent_enum = item["properties"]["entities"]["items"]["properties"]["type"]["enum"]
56
+ fct_enum = item["properties"]["facts"]["items"]["properties"]["category"]["enum"]
57
+ assert set(ent_enum) == xs.ALLOWED_ENT_TYPES
58
+ assert set(fct_enum) == xs.ALLOWED_FCT_CATEGORIES
59
+ # sorted → byte-stable serialisation across processes
60
+ assert ent_enum == sorted(ent_enum)
61
+ assert fct_enum == sorted(fct_enum)
62
+
63
+
64
+ def test_schema_caps_mirror_prompt_hard_caps() -> None:
65
+ """8 ENT / 6 FCT / 6 REL per event, statement <= 140 — what
66
+ BATCH_SYSTEM_PROMPT requests, the schema enforces."""
67
+ item = _event_item_schema()
68
+ assert item["properties"]["entities"]["maxItems"] == 8
69
+ assert item["properties"]["facts"]["maxItems"] == 6
70
+ assert item["properties"]["relationships"]["maxItems"] == 6
71
+ stmt = item["properties"]["facts"]["items"]["properties"]["statement"]
72
+ assert stmt["maxLength"] == 140
73
+
74
+
75
+ def test_schema_fact_fields_match_kv_parser_output() -> None:
76
+ """Facts carry EXACTLY the 5 semantic fields _parse_kv_records
77
+ yields — the upsert path must not need to change."""
78
+ fact_props = _event_item_schema()["properties"]["facts"]["items"]
79
+ assert set(fact_props["properties"].keys()) == {
80
+ "category", "subject", "predicate", "object", "statement",
81
+ }
82
+ assert set(fact_props["required"]) == set(fact_props["properties"].keys())
83
+
84
+
85
+ def test_schema_json_helper_is_stable_and_parseable() -> None:
86
+ s1, s2 = xs.extraction_schema_json(), xs.extraction_schema_json()
87
+ assert s1 == s2
88
+ assert json.loads(s1) == xs.EXTRACTION_SCHEMA
89
+
90
+
91
+ # ----------------------------------------------------------------------
92
+ # validate_payload — hand-rolled checks (jsonschema is not a dep).
93
+ # ----------------------------------------------------------------------
94
+
95
+ def _good_payload() -> dict:
96
+ return {
97
+ "events": [
98
+ {
99
+ "index": 0,
100
+ "entities": [
101
+ {"name": "Alex Wong", "type": "person", "email": "alex@example.com"},
102
+ {"name": "Acme Corp", "type": "org"},
103
+ ],
104
+ "facts": [
105
+ {
106
+ "category": "commitment",
107
+ "subject": "Alex Wong",
108
+ "predicate": "agreed to",
109
+ "object": "Acme Corp",
110
+ "statement": "Alex agreed to the Acme deal",
111
+ },
112
+ {
113
+ "category": "state",
114
+ "subject": "Acme Corp",
115
+ "predicate": "is based in",
116
+ "object": None,
117
+ "statement": "Acme is based in London",
118
+ },
119
+ ],
120
+ "relationships": [
121
+ {"from": "Alex Wong", "to": "Acme Corp", "type": "works_at"},
122
+ ],
123
+ },
124
+ {"index": 1, "entities": [], "facts": [], "relationships": []},
125
+ ]
126
+ }
127
+
128
+
129
+ def test_validate_payload_accepts_good() -> None:
130
+ assert xs.validate_payload(_good_payload()) == []
131
+
132
+
133
+ def test_validate_payload_rejects_bad_enum() -> None:
134
+ p = _good_payload()
135
+ p["events"][0]["entities"][0]["type"] = "spaceship"
136
+ assert any(".type" in e for e in xs.validate_payload(p))
137
+ p = _good_payload()
138
+ p["events"][0]["facts"][0]["category"] = "vibe"
139
+ assert any(".category" in e for e in xs.validate_payload(p))
140
+
141
+
142
+ def test_validate_payload_rejects_cap_and_length_violations() -> None:
143
+ p = _good_payload()
144
+ p["events"][0]["entities"] = [
145
+ {"name": f"E{i}", "type": "concept"} for i in range(9)
146
+ ]
147
+ assert any("exceeds 8" in e for e in xs.validate_payload(p))
148
+ p = _good_payload()
149
+ p["events"][0]["facts"][0]["statement"] = "x" * 141
150
+ assert any("exceeds 140" in e for e in xs.validate_payload(p))
151
+
152
+
153
+ def test_validate_payload_rejects_missing_fact_field_and_extra_keys() -> None:
154
+ p = _good_payload()
155
+ del p["events"][0]["facts"][0]["object"]
156
+ assert xs.validate_payload(p)
157
+ p = _good_payload()
158
+ p["events"][0]["surprise"] = True
159
+ assert any("unexpected keys" in e for e in xs.validate_payload(p))
160
+
161
+
162
+ # ----------------------------------------------------------------------
163
+ # _parse_guided_json — sibling of _parse_kv_records, identical shape.
164
+ # ----------------------------------------------------------------------
165
+
166
+ def test_guided_parser_well_formed() -> None:
167
+ out = worker._parse_guided_json(json.dumps(_good_payload()), expected_n=2)
168
+ assert len(out) == 2
169
+ assert out[0]["entities"] == [
170
+ {"type": "person", "name": "Alex Wong", "aliases": ["alex@example.com"]},
171
+ {"type": "org", "name": "Acme Corp"},
172
+ ]
173
+ assert out[0]["facts"][1]["object"] is None
174
+ assert out[0]["relationships"] == [
175
+ {"from": "Alex Wong", "to": "Acme Corp", "type": "works_at"}
176
+ ]
177
+ assert out[1] == {"entities": [], "facts": [], "relationships": []}
178
+
179
+
180
+ def test_guided_parser_normalises_like_kv() -> None:
181
+ """Uppercase type/category lowercased; '-'/'null' object → None;
182
+ junk email dropped; whitespace stripped."""
183
+ payload = {
184
+ "events": [
185
+ {
186
+ "index": 0,
187
+ "entities": [
188
+ {"name": " Sam Patel ", "type": "Person", "email": "not an email"},
189
+ ],
190
+ "facts": [
191
+ {
192
+ "category": "Mention",
193
+ "subject": " Sam Patel",
194
+ "predicate": " mentioned ",
195
+ "object": "-",
196
+ "statement": " Sam mentioned the launch ",
197
+ },
198
+ ],
199
+ "relationships": [],
200
+ }
201
+ ]
202
+ }
203
+ out = worker._parse_guided_json(json.dumps(payload), expected_n=1)
204
+ assert out[0]["entities"] == [{"type": "person", "name": "Sam Patel"}]
205
+ f = out[0]["facts"][0]
206
+ assert f["category"] == "mention"
207
+ assert f["subject"] == "Sam Patel"
208
+ assert f["predicate"] == "mentioned"
209
+ assert f["object"] is None
210
+ assert f["statement"] == "Sam mentioned the launch"
211
+
212
+
213
+ def test_guided_parser_enforces_caps_defensively() -> None:
214
+ """Guided decoding enforces maxItems server-side, but the salvage
215
+ path / replay over unguided output may not — the parser re-caps."""
216
+ payload = {
217
+ "events": [
218
+ {
219
+ "index": 0,
220
+ "entities": [{"name": f"E{i}", "type": "concept"} for i in range(12)],
221
+ "facts": [],
222
+ "relationships": [],
223
+ }
224
+ ]
225
+ }
226
+ out = worker._parse_guided_json(json.dumps(payload), expected_n=1)
227
+ assert len(out[0]["entities"]) == 8
228
+
229
+
230
+ def test_guided_parser_index_routing_and_out_of_range() -> None:
231
+ """Events placed by their "index" field even out of order; an
232
+ out-of-range index with an out-of-range position is dropped without
233
+ corrupting other slots."""
234
+ payload = {
235
+ "events": [
236
+ {"index": 1, "entities": [{"name": "B", "type": "org"}],
237
+ "facts": [], "relationships": []},
238
+ {"index": 0, "entities": [{"name": "A", "type": "org"}],
239
+ "facts": [], "relationships": []},
240
+ {"index": 9, "entities": [{"name": "Rogue", "type": "org"}],
241
+ "facts": [], "relationships": []},
242
+ ]
243
+ }
244
+ out = worker._parse_guided_json(json.dumps(payload), expected_n=2)
245
+ assert out[0]["entities"][0]["name"] == "A"
246
+ assert out[1]["entities"][0]["name"] == "B"
247
+ assert all(
248
+ e["name"] != "Rogue" for rec in out for e in rec["entities"]
249
+ )
250
+
251
+
252
+ def test_guided_parser_garbage_input_degrades_to_empty() -> None:
253
+ for garbage in ("", "not json at all", "[1,2,3]", '{"weird": true}'):
254
+ out = worker._parse_guided_json(garbage, expected_n=3)
255
+ assert out == [
256
+ {"entities": [], "facts": [], "relationships": []} for _ in range(3)
257
+ ]
258
+
259
+
260
+ def test_guided_parser_salvages_truncated_output() -> None:
261
+ """THE design point: truncation (the only failure mode under guided
262
+ decoding) loses only the cut-off event, never the chunk. Cut the
263
+ serialised payload mid-way through event 1 — event 0 must land."""
264
+ full = json.dumps(_good_payload())
265
+ cut_at = full.find('"index": 1')
266
+ assert cut_at > 0
267
+ truncated = full[: cut_at + 15] # mid-object, invalid JSON
268
+ out = worker._parse_guided_json(truncated, expected_n=2)
269
+ assert out[0]["entities"][0]["name"] == "Alex Wong"
270
+ assert len(out[0]["facts"]) == 2
271
+ assert out[1] == {"entities": [], "facts": [], "relationships": []}
272
+
273
+
274
+ def test_guided_parser_strips_markdown_fences() -> None:
275
+ """Can't happen under guided decoding; matters for bake-off replay
276
+ of unguided output."""
277
+ text = "```json\n" + json.dumps(_good_payload()) + "\n```"
278
+ out = worker._parse_guided_json(text, expected_n=2)
279
+ assert out[0]["entities"][0]["name"] == "Alex Wong"
280
+
281
+
282
+ def test_guided_event_slices_shape_contract() -> None:
283
+ """Same shape contract as _split_event_blocks: expected_n entries,
284
+ missing events as empty strings, slices reparse to the source
285
+ object (trace-logging fidelity)."""
286
+ payload = {
287
+ "events": [
288
+ {"index": 0, "entities": [{"name": "A", "type": "org"}],
289
+ "facts": [], "relationships": []},
290
+ ]
291
+ }
292
+ slices = worker._guided_event_slices(json.dumps(payload), expected_n=3)
293
+ assert len(slices) == 3
294
+ assert json.loads(slices[0]) == payload["events"][0]
295
+ assert slices[1] == "" and slices[2] == ""
296
+
297
+
298
+ # ----------------------------------------------------------------------
299
+ # Parity — same logical content through both parsers yields IDENTICAL
300
+ # upsert-ready dicts. This is what lets the upsert path stay untouched.
301
+ # ----------------------------------------------------------------------
302
+
303
+ def test_kv_and_guided_parsers_yield_identical_dicts() -> None:
304
+ kv_text = (
305
+ "=== event 0 ===\n"
306
+ "ENT|person|Alex Wong|alex@example.com\n"
307
+ "ENT|org|Acme Corp\n"
308
+ "FCT|commitment|Alex Wong|agreed to|Acme Corp|Alex agreed to the Acme deal\n"
309
+ "FCT|state|Acme Corp|is based in|-|Acme is based in London\n"
310
+ "REL|Alex Wong|Acme Corp|works_at\n"
311
+ "=== event 1 ===\n"
312
+ )
313
+ from_kv = worker._parse_kv_records(kv_text, expected_n=2)
314
+ from_guided = worker._parse_guided_json(json.dumps(_good_payload()), expected_n=2)
315
+ assert from_kv == from_guided
316
+
317
+
318
+ # ----------------------------------------------------------------------
319
+ # Flag contract — DEFAULT IS A NO-OP. Until an operator sets
320
+ # DISTILL_OUTPUT_MODE=guided_json the worker behaves byte-for-byte as
321
+ # before this change.
322
+ # ----------------------------------------------------------------------
323
+
324
+ def test_default_mode_is_kv(monkeypatch: pytest.MonkeyPatch) -> None:
325
+ monkeypatch.delenv("DISTILL_OUTPUT_MODE", raising=False)
326
+ w = _load_worker("worker_default_mode")
327
+ assert w.DISTILL_OUTPUT_MODE == "kv"
328
+ assert w.ACTIVE_SYSTEM_PROMPT is w.BATCH_SYSTEM_PROMPT
329
+ # Prompt-hash property: computed from the ACTIVE prompt, so the
330
+ # default hash equals the historical BATCH_SYSTEM_PROMPT hash —
331
+ # existing distillation_traces segmentation is unchanged.
332
+ assert w.SYSTEM_PROMPT_HASH == hashlib.sha256(
333
+ w.BATCH_SYSTEM_PROMPT.encode()
334
+ ).hexdigest()[:16]
335
+
336
+
337
+ def test_default_request_body_unchanged(monkeypatch: pytest.MonkeyPatch) -> None:
338
+ """No structured-output params, KV prompt, KV token budget — the
339
+ exact pre-flag request body."""
340
+ monkeypatch.delenv("DISTILL_OUTPUT_MODE", raising=False)
341
+ w = _load_worker("worker_default_body")
342
+ body = w._build_request_body("PROMPT", 15)
343
+ assert body == {
344
+ "model": w.LLM_MODEL,
345
+ "messages": [
346
+ {"role": "system", "content": w.BATCH_SYSTEM_PROMPT},
347
+ {"role": "user", "content": "PROMPT"},
348
+ ],
349
+ "temperature": 0.0,
350
+ "max_tokens": w.LLM_MAX_TOKENS_PER_EVENT * 15,
351
+ }
352
+ assert "response_format" not in body and "guided_json" not in body
353
+
354
+
355
+ def test_unrecognised_mode_falls_back_to_kv(monkeypatch: pytest.MonkeyPatch) -> None:
356
+ monkeypatch.setenv("DISTILL_OUTPUT_MODE", "yaml-please")
357
+ w = _load_worker("worker_bad_mode")
358
+ assert w.DISTILL_OUTPUT_MODE == "kv"
359
+ assert w.ACTIVE_SYSTEM_PROMPT is w.BATCH_SYSTEM_PROMPT
360
+
361
+
362
+ def test_guided_mode_flips_prompt_hash_and_params(
363
+ monkeypatch: pytest.MonkeyPatch,
364
+ ) -> None:
365
+ monkeypatch.setenv("DISTILL_OUTPUT_MODE", "guided_json")
366
+ monkeypatch.delenv("DISTILL_GUIDED_PARAM_STYLE", raising=False)
367
+ w = _load_worker("worker_guided_mode")
368
+ assert w.DISTILL_OUTPUT_MODE == "guided_json"
369
+ assert w.ACTIVE_SYSTEM_PROMPT is w.GUIDED_JSON_SYSTEM_PROMPT
370
+ # Hash follows the ACTIVE prompt → traces auto-segment by teacher
371
+ # version when the mode flips (migration 003's contract).
372
+ assert w.SYSTEM_PROMPT_HASH == hashlib.sha256(
373
+ w.GUIDED_JSON_SYSTEM_PROMPT.encode()
374
+ ).hexdigest()[:16]
375
+ assert w.SYSTEM_PROMPT_HASH != hashlib.sha256(
376
+ w.BATCH_SYSTEM_PROMPT.encode()
377
+ ).hexdigest()[:16]
378
+
379
+ body = w._build_request_body("PROMPT", 15)
380
+ assert body["messages"][0]["content"] == w.GUIDED_JSON_SYSTEM_PROMPT
381
+ assert body["max_tokens"] == w.LLM_MAX_TOKENS_PER_EVENT_JSON * 15
382
+ # Default param style: OpenAI-style response_format json_schema.
383
+ assert "guided_json" not in body
384
+ rf = body["response_format"]
385
+ assert rf["type"] == "json_schema"
386
+ assert rf["json_schema"]["schema"] == xs.EXTRACTION_SCHEMA
387
+
388
+
389
+ def test_guided_mode_legacy_param_style(monkeypatch: pytest.MonkeyPatch) -> None:
390
+ monkeypatch.setenv("DISTILL_OUTPUT_MODE", "guided_json")
391
+ monkeypatch.setenv("DISTILL_GUIDED_PARAM_STYLE", "guided_json")
392
+ w = _load_worker("worker_guided_legacy")
393
+ body = w._build_request_body("PROMPT", 2)
394
+ assert "response_format" not in body
395
+ assert body["guided_json"] == xs.EXTRACTION_SCHEMA
396
+
397
+
398
+ def test_guided_prompt_keeps_content_rules() -> None:
399
+ """The JSON prompt variant must carry ALL the content rules from
400
+ BATCH_SYSTEM_PROMPT (only the pipe-format scaffolding is dropped)."""
401
+ p = worker.GUIDED_JSON_SYSTEM_PROMPT
402
+ assert "conservative" in p # conservatism
403
+ assert "8 entities, 6 facts, 6 relationships" in p # hard caps
404
+ assert "NOT variables, types, or method names" in p # code-content rule
405
+ assert 'subject MUST be an entity name declared in THIS event' in p
406
+ assert "unambiguously identifies the person" in p # email-alias pairing
407
+ assert "140 characters" in p
408
+ assert "NEVER skip an event" in p
409
+ # Pipe scaffolding gone
410
+ assert "COUNT THE PIPES" not in p
411
+ assert "PIPE-DELIMITED" not in p
412
+
413
+
414
+ # ----------------------------------------------------------------------
415
+ # DISTILL_CHAT_TEMPLATE_KWARGS — thinking-teacher template switch
416
+ # ----------------------------------------------------------------------
417
+
418
+
419
+ def test_default_body_has_no_chat_template_kwargs(
420
+ monkeypatch: pytest.MonkeyPatch,
421
+ ) -> None:
422
+ """Unset env → the request body is byte-identical to before the
423
+ knob existed (Qwen2.5-class teachers need no template switches)."""
424
+ monkeypatch.delenv("DISTILL_CHAT_TEMPLATE_KWARGS", raising=False)
425
+ w = _load_worker("worker_no_ctk")
426
+ assert w.DISTILL_CHAT_TEMPLATE_KWARGS is None
427
+ assert "chat_template_kwargs" not in w._build_request_body("PROMPT", 5)
428
+
429
+
430
+ def test_chat_template_kwargs_forwarded(monkeypatch: pytest.MonkeyPatch) -> None:
431
+ """The Qwen3.x swap case: {"enable_thinking": false} must land
432
+ verbatim in every request body, in both output modes."""
433
+ monkeypatch.setenv("DISTILL_CHAT_TEMPLATE_KWARGS", '{"enable_thinking": false}')
434
+ w = _load_worker("worker_ctk")
435
+ assert w.DISTILL_CHAT_TEMPLATE_KWARGS == {"enable_thinking": False}
436
+ body = w._build_request_body("PROMPT", 5)
437
+ assert body["chat_template_kwargs"] == {"enable_thinking": False}
438
+
439
+ monkeypatch.setenv("DISTILL_OUTPUT_MODE", "guided_json")
440
+ w2 = _load_worker("worker_ctk_guided")
441
+ body2 = w2._build_request_body("PROMPT", 5)
442
+ assert body2["chat_template_kwargs"] == {"enable_thinking": False}
443
+ assert "response_format" in body2
444
+
445
+
446
+ def test_chat_template_kwargs_invalid_ignored(
447
+ monkeypatch: pytest.MonkeyPatch,
448
+ ) -> None:
449
+ """Malformed JSON or a non-object must not take the worker down —
450
+ log + ignore, requests stay clean."""
451
+ for bad in ("{not json", '["a", "list"]', '"a string"'):
452
+ monkeypatch.setenv("DISTILL_CHAT_TEMPLATE_KWARGS", bad)
453
+ w = _load_worker(f"worker_ctk_bad_{abs(hash(bad))}")
454
+ assert w.DISTILL_CHAT_TEMPLATE_KWARGS is None
455
+ assert "chat_template_kwargs" not in w._build_request_body("PROMPT", 5)