npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.10.5 → 0.10.7 - Mend

@pentatonic-ai/ai-agent-sdk 0.10.5 → 0.10.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/packages/memory-engine-v2/extractor-async/test_guided_json_parser.py ADDED Viewed

@@ -0,0 +1,455 @@
+"""Unit tests for the guided-JSON output mode (DISTILL_OUTPUT_MODE).
+Covers: the extraction schema (enum pinning, caps, validation), the
+_parse_guided_json sibling parser (well-formed, malformed, truncated),
+kv↔guided parse parity (identical upsert-ready dicts), and the flag
+contract (default 'kv' = byte-identical request body + prompt hash to
+the pre-flag worker — the whole change is a no-op until an operator
+flips the env var).
+"""
+from __future__ import annotations
+import hashlib
+import importlib.util
+import json
+from pathlib import Path
+import pytest
+_THIS = Path(__file__).resolve().parent
+def _load_worker(name: str = "extractor_async_worker"):
+    spec = importlib.util.spec_from_file_location(name, _THIS / "worker.py")
+    assert spec and spec.loader
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    return mod
+try:
+    worker = _load_worker()
+except ImportError as e:
+    pytest.skip(f"extractor-async deps unavailable: {e}", allow_module_level=True)
+import extraction_schema as xs
+# ----------------------------------------------------------------------
+# Schema structure — enums pinned to the shared constants, caps mirror
+# the prompt's hard caps, statement length capped.
+# ----------------------------------------------------------------------
+def _event_item_schema() -> dict:
+    return xs.EXTRACTION_SCHEMA["properties"]["events"]["items"]
+def test_schema_enums_pin_to_shared_constants() -> None:
+    """The schema's enums and worker's allowed-sets are the SAME
+    constants (worker imports them from extraction_schema) — change
+    one place, everything moves together."""
+    assert worker.ALLOWED_ENT_TYPES is xs.ALLOWED_ENT_TYPES
+    assert worker.ALLOWED_FCT_CATEGORIES is xs.ALLOWED_FCT_CATEGORIES
+    item = _event_item_schema()
+    ent_enum = item["properties"]["entities"]["items"]["properties"]["type"]["enum"]
+    fct_enum = item["properties"]["facts"]["items"]["properties"]["category"]["enum"]
+    assert set(ent_enum) == xs.ALLOWED_ENT_TYPES
+    assert set(fct_enum) == xs.ALLOWED_FCT_CATEGORIES
+    # sorted → byte-stable serialisation across processes
+    assert ent_enum == sorted(ent_enum)
+    assert fct_enum == sorted(fct_enum)
+def test_schema_caps_mirror_prompt_hard_caps() -> None:
+    """8 ENT / 6 FCT / 6 REL per event, statement <= 140 — what
+    BATCH_SYSTEM_PROMPT requests, the schema enforces."""
+    item = _event_item_schema()
+    assert item["properties"]["entities"]["maxItems"] == 8
+    assert item["properties"]["facts"]["maxItems"] == 6
+    assert item["properties"]["relationships"]["maxItems"] == 6
+    stmt = item["properties"]["facts"]["items"]["properties"]["statement"]
+    assert stmt["maxLength"] == 140
+def test_schema_fact_fields_match_kv_parser_output() -> None:
+    """Facts carry EXACTLY the 5 semantic fields _parse_kv_records
+    yields — the upsert path must not need to change."""
+    fact_props = _event_item_schema()["properties"]["facts"]["items"]
+    assert set(fact_props["properties"].keys()) == {
+        "category", "subject", "predicate", "object", "statement",
+    }
+    assert set(fact_props["required"]) == set(fact_props["properties"].keys())
+def test_schema_json_helper_is_stable_and_parseable() -> None:
+    s1, s2 = xs.extraction_schema_json(), xs.extraction_schema_json()
+    assert s1 == s2
+    assert json.loads(s1) == xs.EXTRACTION_SCHEMA
+# ----------------------------------------------------------------------
+# validate_payload — hand-rolled checks (jsonschema is not a dep).
+# ----------------------------------------------------------------------
+def _good_payload() -> dict:
+    return {
+        "events": [
+            {
+                "index": 0,
+                "entities": [
+                    {"name": "Alex Wong", "type": "person", "email": "alex@example.com"},
+                    {"name": "Acme Corp", "type": "org"},
+                ],
+                "facts": [
+                    {
+                        "category": "commitment",
+                        "subject": "Alex Wong",
+                        "predicate": "agreed to",
+                        "object": "Acme Corp",
+                        "statement": "Alex agreed to the Acme deal",
+                    },
+                    {
+                        "category": "state",
+                        "subject": "Acme Corp",
+                        "predicate": "is based in",
+                        "object": None,
+                        "statement": "Acme is based in London",
+                    },
+                ],
+                "relationships": [
+                    {"from": "Alex Wong", "to": "Acme Corp", "type": "works_at"},
+                ],
+            },
+            {"index": 1, "entities": [], "facts": [], "relationships": []},
+        ]
+    }
+def test_validate_payload_accepts_good() -> None:
+    assert xs.validate_payload(_good_payload()) == []
+def test_validate_payload_rejects_bad_enum() -> None:
+    p = _good_payload()
+    p["events"][0]["entities"][0]["type"] = "spaceship"
+    assert any(".type" in e for e in xs.validate_payload(p))
+    p = _good_payload()
+    p["events"][0]["facts"][0]["category"] = "vibe"
+    assert any(".category" in e for e in xs.validate_payload(p))
+def test_validate_payload_rejects_cap_and_length_violations() -> None:
+    p = _good_payload()
+    p["events"][0]["entities"] = [
+        {"name": f"E{i}", "type": "concept"} for i in range(9)
+    ]
+    assert any("exceeds 8" in e for e in xs.validate_payload(p))
+    p = _good_payload()
+    p["events"][0]["facts"][0]["statement"] = "x" * 141
+    assert any("exceeds 140" in e for e in xs.validate_payload(p))
+def test_validate_payload_rejects_missing_fact_field_and_extra_keys() -> None:
+    p = _good_payload()
+    del p["events"][0]["facts"][0]["object"]
+    assert xs.validate_payload(p)
+    p = _good_payload()
+    p["events"][0]["surprise"] = True
+    assert any("unexpected keys" in e for e in xs.validate_payload(p))
+# ----------------------------------------------------------------------
+# _parse_guided_json — sibling of _parse_kv_records, identical shape.
+# ----------------------------------------------------------------------
+def test_guided_parser_well_formed() -> None:
+    out = worker._parse_guided_json(json.dumps(_good_payload()), expected_n=2)
+    assert len(out) == 2
+    assert out[0]["entities"] == [
+        {"type": "person", "name": "Alex Wong", "aliases": ["alex@example.com"]},
+        {"type": "org", "name": "Acme Corp"},
+    ]
+    assert out[0]["facts"][1]["object"] is None
+    assert out[0]["relationships"] == [
+        {"from": "Alex Wong", "to": "Acme Corp", "type": "works_at"}
+    ]
+    assert out[1] == {"entities": [], "facts": [], "relationships": []}
+def test_guided_parser_normalises_like_kv() -> None:
+    """Uppercase type/category lowercased; '-'/'null' object → None;
+    junk email dropped; whitespace stripped."""
+    payload = {
+        "events": [
+            {
+                "index": 0,
+                "entities": [
+                    {"name": "  Sam Patel ", "type": "Person", "email": "not an email"},
+                ],
+                "facts": [
+                    {
+                        "category": "Mention",
+                        "subject": " Sam Patel",
+                        "predicate": " mentioned ",
+                        "object": "-",
+                        "statement": "  Sam mentioned the launch  ",
+                    },
+                ],
+                "relationships": [],
+            }
+        ]
+    }
+    out = worker._parse_guided_json(json.dumps(payload), expected_n=1)
+    assert out[0]["entities"] == [{"type": "person", "name": "Sam Patel"}]
+    f = out[0]["facts"][0]
+    assert f["category"] == "mention"
+    assert f["subject"] == "Sam Patel"
+    assert f["predicate"] == "mentioned"
+    assert f["object"] is None
+    assert f["statement"] == "Sam mentioned the launch"
+def test_guided_parser_enforces_caps_defensively() -> None:
+    """Guided decoding enforces maxItems server-side, but the salvage
+    path / replay over unguided output may not — the parser re-caps."""
+    payload = {
+        "events": [
+            {
+                "index": 0,
+                "entities": [{"name": f"E{i}", "type": "concept"} for i in range(12)],
+                "facts": [],
+                "relationships": [],
+            }
+        ]
+    }
+    out = worker._parse_guided_json(json.dumps(payload), expected_n=1)
+    assert len(out[0]["entities"]) == 8
+def test_guided_parser_index_routing_and_out_of_range() -> None:
+    """Events placed by their "index" field even out of order; an
+    out-of-range index with an out-of-range position is dropped without
+    corrupting other slots."""
+    payload = {
+        "events": [
+            {"index": 1, "entities": [{"name": "B", "type": "org"}],
+             "facts": [], "relationships": []},
+            {"index": 0, "entities": [{"name": "A", "type": "org"}],
+             "facts": [], "relationships": []},
+            {"index": 9, "entities": [{"name": "Rogue", "type": "org"}],
+             "facts": [], "relationships": []},
+        ]
+    }
+    out = worker._parse_guided_json(json.dumps(payload), expected_n=2)
+    assert out[0]["entities"][0]["name"] == "A"
+    assert out[1]["entities"][0]["name"] == "B"
+    assert all(
+        e["name"] != "Rogue" for rec in out for e in rec["entities"]
+    )
+def test_guided_parser_garbage_input_degrades_to_empty() -> None:
+    for garbage in ("", "not json at all", "[1,2,3]", '{"weird": true}'):
+        out = worker._parse_guided_json(garbage, expected_n=3)
+        assert out == [
+            {"entities": [], "facts": [], "relationships": []} for _ in range(3)
+        ]
+def test_guided_parser_salvages_truncated_output() -> None:
+    """THE design point: truncation (the only failure mode under guided
+    decoding) loses only the cut-off event, never the chunk. Cut the
+    serialised payload mid-way through event 1 — event 0 must land."""
+    full = json.dumps(_good_payload())
+    cut_at = full.find('"index": 1')
+    assert cut_at > 0
+    truncated = full[: cut_at + 15]  # mid-object, invalid JSON
+    out = worker._parse_guided_json(truncated, expected_n=2)
+    assert out[0]["entities"][0]["name"] == "Alex Wong"
+    assert len(out[0]["facts"]) == 2
+    assert out[1] == {"entities": [], "facts": [], "relationships": []}
+def test_guided_parser_strips_markdown_fences() -> None:
+    """Can't happen under guided decoding; matters for bake-off replay
+    of unguided output."""
+    text = "```json\n" + json.dumps(_good_payload()) + "\n```"
+    out = worker._parse_guided_json(text, expected_n=2)
+    assert out[0]["entities"][0]["name"] == "Alex Wong"
+def test_guided_event_slices_shape_contract() -> None:
+    """Same shape contract as _split_event_blocks: expected_n entries,
+    missing events as empty strings, slices reparse to the source
+    object (trace-logging fidelity)."""
+    payload = {
+        "events": [
+            {"index": 0, "entities": [{"name": "A", "type": "org"}],
+             "facts": [], "relationships": []},
+        ]
+    }
+    slices = worker._guided_event_slices(json.dumps(payload), expected_n=3)
+    assert len(slices) == 3
+    assert json.loads(slices[0]) == payload["events"][0]
+    assert slices[1] == "" and slices[2] == ""
+# ----------------------------------------------------------------------
+# Parity — same logical content through both parsers yields IDENTICAL
+# upsert-ready dicts. This is what lets the upsert path stay untouched.
+# ----------------------------------------------------------------------
+def test_kv_and_guided_parsers_yield_identical_dicts() -> None:
+    kv_text = (
+        "=== event 0 ===\n"
+        "ENT|person|Alex Wong|alex@example.com\n"
+        "ENT|org|Acme Corp\n"
+        "FCT|commitment|Alex Wong|agreed to|Acme Corp|Alex agreed to the Acme deal\n"
+        "FCT|state|Acme Corp|is based in|-|Acme is based in London\n"
+        "REL|Alex Wong|Acme Corp|works_at\n"
+        "=== event 1 ===\n"
+    )
+    from_kv = worker._parse_kv_records(kv_text, expected_n=2)
+    from_guided = worker._parse_guided_json(json.dumps(_good_payload()), expected_n=2)
+    assert from_kv == from_guided
+# ----------------------------------------------------------------------
+# Flag contract — DEFAULT IS A NO-OP. Until an operator sets
+# DISTILL_OUTPUT_MODE=guided_json the worker behaves byte-for-byte as
+# before this change.
+# ----------------------------------------------------------------------
+def test_default_mode_is_kv(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv("DISTILL_OUTPUT_MODE", raising=False)
+    w = _load_worker("worker_default_mode")
+    assert w.DISTILL_OUTPUT_MODE == "kv"
+    assert w.ACTIVE_SYSTEM_PROMPT is w.BATCH_SYSTEM_PROMPT
+    # Prompt-hash property: computed from the ACTIVE prompt, so the
+    # default hash equals the historical BATCH_SYSTEM_PROMPT hash —
+    # existing distillation_traces segmentation is unchanged.
+    assert w.SYSTEM_PROMPT_HASH == hashlib.sha256(
+        w.BATCH_SYSTEM_PROMPT.encode()
+    ).hexdigest()[:16]
+def test_default_request_body_unchanged(monkeypatch: pytest.MonkeyPatch) -> None:
+    """No structured-output params, KV prompt, KV token budget — the
+    exact pre-flag request body."""
+    monkeypatch.delenv("DISTILL_OUTPUT_MODE", raising=False)
+    w = _load_worker("worker_default_body")
+    body = w._build_request_body("PROMPT", 15)
+    assert body == {
+        "model": w.LLM_MODEL,
+        "messages": [
+            {"role": "system", "content": w.BATCH_SYSTEM_PROMPT},
+            {"role": "user", "content": "PROMPT"},
+        ],
+        "temperature": 0.0,
+        "max_tokens": w.LLM_MAX_TOKENS_PER_EVENT * 15,
+    }
+    assert "response_format" not in body and "guided_json" not in body
+def test_unrecognised_mode_falls_back_to_kv(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("DISTILL_OUTPUT_MODE", "yaml-please")
+    w = _load_worker("worker_bad_mode")
+    assert w.DISTILL_OUTPUT_MODE == "kv"
+    assert w.ACTIVE_SYSTEM_PROMPT is w.BATCH_SYSTEM_PROMPT
+def test_guided_mode_flips_prompt_hash_and_params(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("DISTILL_OUTPUT_MODE", "guided_json")
+    monkeypatch.delenv("DISTILL_GUIDED_PARAM_STYLE", raising=False)
+    w = _load_worker("worker_guided_mode")
+    assert w.DISTILL_OUTPUT_MODE == "guided_json"
+    assert w.ACTIVE_SYSTEM_PROMPT is w.GUIDED_JSON_SYSTEM_PROMPT
+    # Hash follows the ACTIVE prompt → traces auto-segment by teacher
+    # version when the mode flips (migration 003's contract).
+    assert w.SYSTEM_PROMPT_HASH == hashlib.sha256(
+        w.GUIDED_JSON_SYSTEM_PROMPT.encode()
+    ).hexdigest()[:16]
+    assert w.SYSTEM_PROMPT_HASH != hashlib.sha256(
+        w.BATCH_SYSTEM_PROMPT.encode()
+    ).hexdigest()[:16]
+    body = w._build_request_body("PROMPT", 15)
+    assert body["messages"][0]["content"] == w.GUIDED_JSON_SYSTEM_PROMPT
+    assert body["max_tokens"] == w.LLM_MAX_TOKENS_PER_EVENT_JSON * 15
+    # Default param style: OpenAI-style response_format json_schema.
+    assert "guided_json" not in body
+    rf = body["response_format"]
+    assert rf["type"] == "json_schema"
+    assert rf["json_schema"]["schema"] == xs.EXTRACTION_SCHEMA
+def test_guided_mode_legacy_param_style(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("DISTILL_OUTPUT_MODE", "guided_json")
+    monkeypatch.setenv("DISTILL_GUIDED_PARAM_STYLE", "guided_json")
+    w = _load_worker("worker_guided_legacy")
+    body = w._build_request_body("PROMPT", 2)
+    assert "response_format" not in body
+    assert body["guided_json"] == xs.EXTRACTION_SCHEMA
+def test_guided_prompt_keeps_content_rules() -> None:
+    """The JSON prompt variant must carry ALL the content rules from
+    BATCH_SYSTEM_PROMPT (only the pipe-format scaffolding is dropped)."""
+    p = worker.GUIDED_JSON_SYSTEM_PROMPT
+    assert "conservative" in p                      # conservatism
+    assert "8 entities, 6 facts, 6 relationships" in p  # hard caps
+    assert "NOT variables, types, or method names" in p  # code-content rule
+    assert 'subject MUST be an entity name declared in THIS event' in p
+    assert "unambiguously identifies the person" in p    # email-alias pairing
+    assert "140 characters" in p
+    assert "NEVER skip an event" in p
+    # Pipe scaffolding gone
+    assert "COUNT THE PIPES" not in p
+    assert "PIPE-DELIMITED" not in p
+# ----------------------------------------------------------------------
+# DISTILL_CHAT_TEMPLATE_KWARGS — thinking-teacher template switch
+# ----------------------------------------------------------------------
+def test_default_body_has_no_chat_template_kwargs(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Unset env → the request body is byte-identical to before the
+    knob existed (Qwen2.5-class teachers need no template switches)."""
+    monkeypatch.delenv("DISTILL_CHAT_TEMPLATE_KWARGS", raising=False)
+    w = _load_worker("worker_no_ctk")
+    assert w.DISTILL_CHAT_TEMPLATE_KWARGS is None
+    assert "chat_template_kwargs" not in w._build_request_body("PROMPT", 5)
+def test_chat_template_kwargs_forwarded(monkeypatch: pytest.MonkeyPatch) -> None:
+    """The Qwen3.x swap case: {"enable_thinking": false} must land
+    verbatim in every request body, in both output modes."""
+    monkeypatch.setenv("DISTILL_CHAT_TEMPLATE_KWARGS", '{"enable_thinking": false}')
+    w = _load_worker("worker_ctk")
+    assert w.DISTILL_CHAT_TEMPLATE_KWARGS == {"enable_thinking": False}
+    body = w._build_request_body("PROMPT", 5)
+    assert body["chat_template_kwargs"] == {"enable_thinking": False}
+    monkeypatch.setenv("DISTILL_OUTPUT_MODE", "guided_json")
+    w2 = _load_worker("worker_ctk_guided")
+    body2 = w2._build_request_body("PROMPT", 5)
+    assert body2["chat_template_kwargs"] == {"enable_thinking": False}
+    assert "response_format" in body2
+def test_chat_template_kwargs_invalid_ignored(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Malformed JSON or a non-object must not take the worker down —
+    log + ignore, requests stay clean."""
+    for bad in ("{not json", '["a", "list"]', '"a string"'):
+        monkeypatch.setenv("DISTILL_CHAT_TEMPLATE_KWARGS", bad)
+        w = _load_worker(f"worker_ctk_bad_{abs(hash(bad))}")
+        assert w.DISTILL_CHAT_TEMPLATE_KWARGS is None
+        assert "chat_template_kwargs" not in w._build_request_body("PROMPT", 5)