npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.10.17 → 0.10.18 - Mend

@pentatonic-ai/ai-agent-sdk 0.10.17 → 0.10.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.cjs +1 -1
package/dist/index.js +1 -1
package/package.json +1 -1
package/packages/memory-engine-v2/extractor-async/test_queue_attempts.py +69 -0
package/packages/memory-engine-v2/extractor-async/worker.py +30 -5

package/dist/index.cjs CHANGED Viewed

@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.10.17";
+var VERSION = "0.10.18";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/dist/index.js CHANGED Viewed

@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.10.17";
+var VERSION = "0.10.18";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.10.17",
+  "version": "0.10.18",
   "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",

package/packages/memory-engine-v2/extractor-async/test_queue_attempts.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""Unit tests for the distillation_queue attempts/retry accounting.
+Regression guard for the lease-reclaim bug (gotcha #11): claiming must NOT
+consume the retry budget — only genuine processing failures do — so a worker
+restart (deploy recreating the container) can re-claim stranded in-flight work
+indefinitely instead of stranding it in `claimed` forever. The DB-touching
+claim/release/fail SQL isn't unit-testable here (no DB in this suite), but the
+give-up decision is pure logic, so we pin it.
+"""
+from __future__ import annotations
+import importlib.util
+from pathlib import Path
+import pytest
+_THIS = Path(__file__).resolve().parent
+def _load_worker(name: str = "extractor_async_worker_qa"):
+    spec = importlib.util.spec_from_file_location(name, _THIS / "worker.py")
+    assert spec and spec.loader
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    return mod
+try:
+    worker = _load_worker()
+except ImportError as e:
+    pytest.skip(f"extractor-async deps unavailable: {e}", allow_module_level=True)
+def test_attempts_exhausted_gives_exactly_max_genuine_tries(monkeypatch) -> None:
+    """`attempts` is the count of PRIOR genuine failures at claim time. With
+    MAX_ATTEMPTS=3 the sequence is: fail#1 (attempts=0)→retry, fail#2
+    (attempts=1)→retry, fail#3 (attempts=2)→terminal. Exactly 3 tries."""
+    monkeypatch.setattr(worker, "MAX_ATTEMPTS", 3)
+    assert worker._attempts_exhausted(0) is False  # 1st failure → retry
+    assert worker._attempts_exhausted(1) is False  # 2nd failure → retry
+    assert worker._attempts_exhausted(2) is True   # 3rd failure → give up
+    assert worker._attempts_exhausted(3) is True
+def test_attempts_exhausted_respects_max(monkeypatch) -> None:
+    monkeypatch.setattr(worker, "MAX_ATTEMPTS", 1)
+    assert worker._attempts_exhausted(0) is True   # single try, no retry
+    monkeypatch.setattr(worker, "MAX_ATTEMPTS", 5)
+    assert worker._attempts_exhausted(3) is False
+    assert worker._attempts_exhausted(4) is True
+def test_claim_sql_does_not_increment_attempts() -> None:
+    """The fix: claiming must not touch `attempts` (only release/fail do). Guard
+    against a regression that reintroduces the increment at claim time. We check
+    the source of claim_next_batch rather than execute it (no DB here)."""
+    import inspect
+    src = inspect.getsource(worker.claim_next_batch)
+    # the claim UPDATE must not bump attempts; the only attempts reference is the
+    # eligibility predicate `attempts < %s`.
+    assert "attempts = attempts + 1" not in src
+    assert "attempts <" in src  # eligibility gate still present
+def test_release_and_fail_increment_attempts() -> None:
+    import inspect
+    assert "attempts = attempts + 1" in inspect.getsource(worker.release_claim)
+    assert "attempts = attempts + 1" in inspect.getsource(worker.mark_failed)

package/packages/memory-engine-v2/extractor-async/worker.py CHANGED Viewed

@@ -1905,8 +1905,16 @@ def claim_next_batch(conn: psycopg.Connection) -> list[dict[str, Any]]:
               status = 'claimed',
               claimed_by = %s,
               claimed_at = NOW(),
-              claim_expires_at = NOW() + (%s || ' seconds')::interval,
-              attempts = attempts + 1
+              claim_expires_at = NOW() + (%s || ' seconds')::interval
+              -- NB: claiming does NOT increment `attempts`. `attempts` counts
+              -- genuine PROCESSING failures (release_claim / mark_failed), not
+              -- claim-grabs. A worker that dies mid-batch (e.g. a deploy
+              -- recreates the container) leaves its rows in `claimed`; the lease
+              -- expires and they are re-claimed here WITHOUT burning the retry
+              -- budget — so restarts can't strand in-flight work. (Pre-fix, the
+              -- increment lived here and ~3 deploys could push a row to
+              -- attempts=MAX, making it forever-ineligible for reclaim AND never
+              -- marked failed → orphaned in `claimed`. See gotcha #11.)
             WHERE id IN (
               SELECT id FROM distillation_queue
               WHERE (
@@ -1943,14 +1951,21 @@ def mark_done(conn: psycopg.Connection, queue_id: int) -> None:
 def mark_failed(conn: psycopg.Connection, queue_id: int, error: str) -> None:
+    # Terminal genuine-failure path → count the attempt (claiming no longer
+    # does; see claim_next_batch). Leaves the row's `attempts` reflecting the
+    # true number of processing attempts on a failed row.
     with conn.cursor() as cur:
         cur.execute(
-            "UPDATE distillation_queue SET status = 'failed', last_error = %s WHERE id = %s",
+            "UPDATE distillation_queue SET status = 'failed', "
+            "attempts = attempts + 1, last_error = %s WHERE id = %s",
             (error[:1024], queue_id),
         )
 def release_claim(conn: psycopg.Connection, queue_id: int, error: str) -> None:
+    # Recoverable genuine-failure path (will retry) → count the attempt. This is
+    # where the retry budget is spent — NOT at claim time — so a deploy-induced
+    # reclaim never consumes it.
     with conn.cursor() as cur:
         cur.execute(
             """
@@ -1959,6 +1974,7 @@ def release_claim(conn: psycopg.Connection, queue_id: int, error: str) -> None:
               claimed_by = NULL,
               claimed_at = NULL,
               claim_expires_at = NULL,
+              attempts = attempts + 1,
               last_error = %s
             WHERE id = %s
             """,
@@ -1966,6 +1982,15 @@ def release_claim(conn: psycopg.Connection, queue_id: int, error: str) -> None:
         )
+def _attempts_exhausted(attempts: int) -> bool:
+    """Whether THIS processing failure should be terminal (mark_failed) rather
+    than retried (release_claim). `attempts` is the row's value at claim time =
+    the count of PRIOR genuine failures (claiming no longer increments it). This
+    failure is attempt #(attempts+1), so we give up once that reaches
+    MAX_ATTEMPTS — giving exactly MAX_ATTEMPTS genuine tries before failing."""
+    return attempts + 1 >= MAX_ATTEMPTS
 # --------------------------------------------------------------------
 # Main loop
 # --------------------------------------------------------------------
@@ -2142,7 +2167,7 @@ async def _run_teacher(
                 log.warning(
                     f"extraction failed queue_id={queue_id} attempts={attempts}: {err}"
                 )
-                if attempts >= MAX_ATTEMPTS:
+                if _attempts_exhausted(attempts):
                     mark_failed(conn, queue_id, err)
                 else:
                     release_claim(conn, queue_id, err)
@@ -2254,7 +2279,7 @@ def _apply_extraction(
         log.warning(
             f"db upsert failed queue_id={queue_id} attempts={attempts}: {err}"
         )
-        if attempts >= MAX_ATTEMPTS:
+        if _attempts_exhausted(attempts):
             mark_failed(conn, queue_id, err)
         else:
             release_claim(conn, queue_id, err)