@pentatonic-ai/ai-agent-sdk 0.10.17 → 0.10.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs
CHANGED
|
@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
878
878
|
}
|
|
879
879
|
|
|
880
880
|
// src/telemetry.js
|
|
881
|
-
var VERSION = "0.10.
|
|
881
|
+
var VERSION = "0.10.18";
|
|
882
882
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
883
883
|
function machineId() {
|
|
884
884
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/dist/index.js
CHANGED
|
@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
847
847
|
}
|
|
848
848
|
|
|
849
849
|
// src/telemetry.js
|
|
850
|
-
var VERSION = "0.10.
|
|
850
|
+
var VERSION = "0.10.18";
|
|
851
851
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
852
852
|
function machineId() {
|
|
853
853
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.10.
|
|
3
|
+
"version": "0.10.18",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Unit tests for the distillation_queue attempts/retry accounting.
|
|
2
|
+
|
|
3
|
+
Regression guard for the lease-reclaim bug (gotcha #11): claiming must NOT
|
|
4
|
+
consume the retry budget — only genuine processing failures do — so a worker
|
|
5
|
+
restart (deploy recreating the container) can re-claim stranded in-flight work
|
|
6
|
+
indefinitely instead of stranding it in `claimed` forever. The DB-touching
|
|
7
|
+
claim/release/fail SQL isn't unit-testable here (no DB in this suite), but the
|
|
8
|
+
give-up decision is pure logic, so we pin it.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import importlib.util
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
import pytest
|
|
17
|
+
|
|
18
|
+
_THIS = Path(__file__).resolve().parent
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _load_worker(name: str = "extractor_async_worker_qa"):
|
|
22
|
+
spec = importlib.util.spec_from_file_location(name, _THIS / "worker.py")
|
|
23
|
+
assert spec and spec.loader
|
|
24
|
+
mod = importlib.util.module_from_spec(spec)
|
|
25
|
+
spec.loader.exec_module(mod)
|
|
26
|
+
return mod
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
worker = _load_worker()
|
|
31
|
+
except ImportError as e:
|
|
32
|
+
pytest.skip(f"extractor-async deps unavailable: {e}", allow_module_level=True)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_attempts_exhausted_gives_exactly_max_genuine_tries(monkeypatch) -> None:
|
|
36
|
+
"""`attempts` is the count of PRIOR genuine failures at claim time. With
|
|
37
|
+
MAX_ATTEMPTS=3 the sequence is: fail#1 (attempts=0)→retry, fail#2
|
|
38
|
+
(attempts=1)→retry, fail#3 (attempts=2)→terminal. Exactly 3 tries."""
|
|
39
|
+
monkeypatch.setattr(worker, "MAX_ATTEMPTS", 3)
|
|
40
|
+
assert worker._attempts_exhausted(0) is False # 1st failure → retry
|
|
41
|
+
assert worker._attempts_exhausted(1) is False # 2nd failure → retry
|
|
42
|
+
assert worker._attempts_exhausted(2) is True # 3rd failure → give up
|
|
43
|
+
assert worker._attempts_exhausted(3) is True
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_attempts_exhausted_respects_max(monkeypatch) -> None:
|
|
47
|
+
monkeypatch.setattr(worker, "MAX_ATTEMPTS", 1)
|
|
48
|
+
assert worker._attempts_exhausted(0) is True # single try, no retry
|
|
49
|
+
monkeypatch.setattr(worker, "MAX_ATTEMPTS", 5)
|
|
50
|
+
assert worker._attempts_exhausted(3) is False
|
|
51
|
+
assert worker._attempts_exhausted(4) is True
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_claim_sql_does_not_increment_attempts() -> None:
|
|
55
|
+
"""The fix: claiming must not touch `attempts` (only release/fail do). Guard
|
|
56
|
+
against a regression that reintroduces the increment at claim time. We check
|
|
57
|
+
the source of claim_next_batch rather than execute it (no DB here)."""
|
|
58
|
+
import inspect
|
|
59
|
+
src = inspect.getsource(worker.claim_next_batch)
|
|
60
|
+
# the claim UPDATE must not bump attempts; the only attempts reference is the
|
|
61
|
+
# eligibility predicate `attempts < %s`.
|
|
62
|
+
assert "attempts = attempts + 1" not in src
|
|
63
|
+
assert "attempts <" in src # eligibility gate still present
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_release_and_fail_increment_attempts() -> None:
|
|
67
|
+
import inspect
|
|
68
|
+
assert "attempts = attempts + 1" in inspect.getsource(worker.release_claim)
|
|
69
|
+
assert "attempts = attempts + 1" in inspect.getsource(worker.mark_failed)
|
|
@@ -1905,8 +1905,16 @@ def claim_next_batch(conn: psycopg.Connection) -> list[dict[str, Any]]:
|
|
|
1905
1905
|
status = 'claimed',
|
|
1906
1906
|
claimed_by = %s,
|
|
1907
1907
|
claimed_at = NOW(),
|
|
1908
|
-
claim_expires_at = NOW() + (%s || ' seconds')::interval
|
|
1909
|
-
|
|
1908
|
+
claim_expires_at = NOW() + (%s || ' seconds')::interval
|
|
1909
|
+
-- NB: claiming does NOT increment `attempts`. `attempts` counts
|
|
1910
|
+
-- genuine PROCESSING failures (release_claim / mark_failed), not
|
|
1911
|
+
-- claim-grabs. A worker that dies mid-batch (e.g. a deploy
|
|
1912
|
+
-- recreates the container) leaves its rows in `claimed`; the lease
|
|
1913
|
+
-- expires and they are re-claimed here WITHOUT burning the retry
|
|
1914
|
+
-- budget — so restarts can't strand in-flight work. (Pre-fix, the
|
|
1915
|
+
-- increment lived here and ~3 deploys could push a row to
|
|
1916
|
+
-- attempts=MAX, making it forever-ineligible for reclaim AND never
|
|
1917
|
+
-- marked failed → orphaned in `claimed`. See gotcha #11.)
|
|
1910
1918
|
WHERE id IN (
|
|
1911
1919
|
SELECT id FROM distillation_queue
|
|
1912
1920
|
WHERE (
|
|
@@ -1943,14 +1951,21 @@ def mark_done(conn: psycopg.Connection, queue_id: int) -> None:
|
|
|
1943
1951
|
|
|
1944
1952
|
|
|
1945
1953
|
def mark_failed(conn: psycopg.Connection, queue_id: int, error: str) -> None:
|
|
1954
|
+
# Terminal genuine-failure path → count the attempt (claiming no longer
|
|
1955
|
+
# does; see claim_next_batch). Leaves the row's `attempts` reflecting the
|
|
1956
|
+
# true number of processing attempts on a failed row.
|
|
1946
1957
|
with conn.cursor() as cur:
|
|
1947
1958
|
cur.execute(
|
|
1948
|
-
"UPDATE distillation_queue SET status = 'failed',
|
|
1959
|
+
"UPDATE distillation_queue SET status = 'failed', "
|
|
1960
|
+
"attempts = attempts + 1, last_error = %s WHERE id = %s",
|
|
1949
1961
|
(error[:1024], queue_id),
|
|
1950
1962
|
)
|
|
1951
1963
|
|
|
1952
1964
|
|
|
1953
1965
|
def release_claim(conn: psycopg.Connection, queue_id: int, error: str) -> None:
|
|
1966
|
+
# Recoverable genuine-failure path (will retry) → count the attempt. This is
|
|
1967
|
+
# where the retry budget is spent — NOT at claim time — so a deploy-induced
|
|
1968
|
+
# reclaim never consumes it.
|
|
1954
1969
|
with conn.cursor() as cur:
|
|
1955
1970
|
cur.execute(
|
|
1956
1971
|
"""
|
|
@@ -1959,6 +1974,7 @@ def release_claim(conn: psycopg.Connection, queue_id: int, error: str) -> None:
|
|
|
1959
1974
|
claimed_by = NULL,
|
|
1960
1975
|
claimed_at = NULL,
|
|
1961
1976
|
claim_expires_at = NULL,
|
|
1977
|
+
attempts = attempts + 1,
|
|
1962
1978
|
last_error = %s
|
|
1963
1979
|
WHERE id = %s
|
|
1964
1980
|
""",
|
|
@@ -1966,6 +1982,15 @@ def release_claim(conn: psycopg.Connection, queue_id: int, error: str) -> None:
|
|
|
1966
1982
|
)
|
|
1967
1983
|
|
|
1968
1984
|
|
|
1985
|
+
def _attempts_exhausted(attempts: int) -> bool:
|
|
1986
|
+
"""Whether THIS processing failure should be terminal (mark_failed) rather
|
|
1987
|
+
than retried (release_claim). `attempts` is the row's value at claim time =
|
|
1988
|
+
the count of PRIOR genuine failures (claiming no longer increments it). This
|
|
1989
|
+
failure is attempt #(attempts+1), so we give up once that reaches
|
|
1990
|
+
MAX_ATTEMPTS — giving exactly MAX_ATTEMPTS genuine tries before failing."""
|
|
1991
|
+
return attempts + 1 >= MAX_ATTEMPTS
|
|
1992
|
+
|
|
1993
|
+
|
|
1969
1994
|
# --------------------------------------------------------------------
|
|
1970
1995
|
# Main loop
|
|
1971
1996
|
# --------------------------------------------------------------------
|
|
@@ -2142,7 +2167,7 @@ async def _run_teacher(
|
|
|
2142
2167
|
log.warning(
|
|
2143
2168
|
f"extraction failed queue_id={queue_id} attempts={attempts}: {err}"
|
|
2144
2169
|
)
|
|
2145
|
-
if attempts
|
|
2170
|
+
if _attempts_exhausted(attempts):
|
|
2146
2171
|
mark_failed(conn, queue_id, err)
|
|
2147
2172
|
else:
|
|
2148
2173
|
release_claim(conn, queue_id, err)
|
|
@@ -2254,7 +2279,7 @@ def _apply_extraction(
|
|
|
2254
2279
|
log.warning(
|
|
2255
2280
|
f"db upsert failed queue_id={queue_id} attempts={attempts}: {err}"
|
|
2256
2281
|
)
|
|
2257
|
-
if attempts
|
|
2282
|
+
if _attempts_exhausted(attempts):
|
|
2258
2283
|
mark_failed(conn, queue_id, err)
|
|
2259
2284
|
else:
|
|
2260
2285
|
release_claim(conn, queue_id, err)
|