npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.10.3 → 0.10.5 - Mend

@pentatonic-ai/ai-agent-sdk 0.10.3 → 0.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.cjs +1 -1
package/dist/index.js +1 -1
package/package.json +1 -1
package/packages/memory-engine-v2/extractor-async/Dockerfile +8 -5
package/packages/memory-engine-v2/extractor-async/worker.py +56 -0

package/dist/index.cjs CHANGED Viewed

@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.10.3";
+var VERSION = "0.10.5";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/dist/index.js CHANGED Viewed

@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.10.3";
+var VERSION = "0.10.5";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.10.3",
+  "version": "0.10.5",
   "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",

package/packages/memory-engine-v2/extractor-async/Dockerfile CHANGED Viewed

@@ -7,10 +7,13 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY worker.py .
 # Pure helper modules — sibling imports inside worker.py
-# (noise_filter, confidence, entity_id). entity_id.py is byte-identical to
-# extractor-sync's copy (per-service build contexts prevent a shared module;
-# tests/test_entity_id_parity.py guards drift). The test_*.py files are
-# intentionally excluded; they're for local pytest, not container runtime.
-COPY noise_filter.py confidence.py entity_id.py ./
+# (noise_filter, confidence, entity_id, sensitive_filter). entity_id.py is
+# byte-identical to extractor-sync's copy (per-service build contexts
+# prevent a shared module; tests/test_entity_id_parity.py guards drift).
+# sensitive_filter was added in #67 (interpersonal-gossip guard); if you
+# add a new sibling module, add it here too — missing COPY makes the
+# container crash-loop on import at startup (observed 2026-06-08 deploy).
+# The test_*.py files are intentionally excluded; pytest only, not runtime.
+COPY noise_filter.py confidence.py entity_id.py sensitive_filter.py ./
 CMD ["python", "worker.py"]

package/packages/memory-engine-v2/extractor-async/worker.py CHANGED Viewed

@@ -761,6 +761,25 @@ SKIP_ATTRIBUTE_SOURCES = set(
 )
 DISTILL_MAX_AGE_DAYS = int(os.environ.get("DISTILL_MAX_AGE_DAYS", "90"))
+# Layer-1 content pre-filter (cascade tier 1 — the cheap deterministic gate in
+# front of the student/7B). Skips events with NO extractable signal BEFORE the
+# LLM, so GPU is spent only on text that can yield facts.
+#   - bytes-garbage: a binary doc (raw PDF bytes) stored as text decodes to a
+#     wall of U+FFFD replacement chars. `build_event_block` feeds `content` to
+#     the LLM, so it sees the garbage and extracts nothing (live 2026-06-10:
+#     35,296/39,453 pentatonic-team `doc` events are bytes-garbage). HIGH
+#     PRECISION — real text effectively never crosses a 5–10% replacement-char
+#     ratio, so this is a zero-quality-loss skip. (Durable fix = extract PDF
+#     text at INGEST; this stops the GPU waste meanwhile.)
+#   - too-short: trivially short content (one-line acks / emoji) has no facts.
+#     Conservative and OFF by default (0) to guarantee zero quality loss; tune
+#     up once layer-2 (the student model) owns the borderline cases.
+SKIP_BYTES_GARBAGE = os.environ.get(
+    "DISTILL_SKIP_BYTES_GARBAGE", "true"
+).strip().lower() not in ("false", "0", "no", "off")
+GARBAGE_CHAR_RATIO = float(os.environ.get("DISTILL_GARBAGE_CHAR_RATIO", "0.10"))
+MIN_CONTENT_CHARS = int(os.environ.get("DISTILL_MIN_CONTENT_CHARS", "0"))
 def claim_next_batch(conn: psycopg.Connection) -> list[dict[str, Any]]:
     """Atomically claim up to BATCH_SIZE pending items. SKIP LOCKED so
@@ -843,6 +862,43 @@ def claim_next_batch(conn: psycopg.Connection) -> list[dict[str, Any]]:
             """,
             (DISTILL_MAX_AGE_DAYS, DISTILL_MAX_AGE_DAYS),
         )
+        # Pre-filter: bytes-garbage content. A binary doc (raw PDF bytes)
+        # stored as text decodes to mostly U+FFFD (chr(65533)); the LLM
+        # extracts nothing from it. Skip when the replacement-char ratio
+        # exceeds GARBAGE_CHAR_RATIO — real text never crosses it, so no
+        # quality loss. Scoped to the pending set; one cheap UPDATE/cycle.
+        if SKIP_BYTES_GARBAGE:
+            cur.execute(
+                """
+                UPDATE distillation_queue dq SET
+                  status = 'done',
+                  completed_at = NOW(),
+                  last_error = 'filtered: bytes_garbage'
+                FROM events e
+                WHERE dq.event_id = e.id
+                  AND dq.status = 'pending'
+                  AND length(e.content) > 0
+                  AND (length(e.content) - length(replace(e.content, chr(65533), '')))::float
+                      / length(e.content) > %s
+                """,
+                (GARBAGE_CHAR_RATIO,),
+            )
+        # Pre-filter: trivially-short content (one-line acks / emoji). OFF by
+        # default (MIN_CONTENT_CHARS=0) so it never costs a fact unless tuned on.
+        if MIN_CONTENT_CHARS > 0:
+            cur.execute(
+                """
+                UPDATE distillation_queue dq SET
+                  status = 'done',
+                  completed_at = NOW(),
+                  last_error = 'filtered: too_short'
+                FROM events e
+                WHERE dq.event_id = e.id
+                  AND dq.status = 'pending'
+                  AND length(trim(e.content)) < %s
+                """,
+                (MIN_CONTENT_CHARS,),
+            )
     with conn.cursor(row_factory=psycopg.rows.dict_row) as cur:
         cur.execute(