npm - watchmyagents - Versions diffs - 1.1.1 → 1.1.2 - Mend

watchmyagents 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +1 -1
package/scripts/fetch-anthropic.js +21 -1
package/scripts/upload-fortress.js +20 -1
package/src/shield/policy.js +15 -1
package/src/shield/sources/fortress.js +36 -1
package/src/shield/stream.js +17 -0
package/src/sources/anthropic-managed.js +21 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "watchmyagents",
-  "version": "1.1.1",
+  "version": "1.1.2",
   "description": "Security observability + real-time policy enforcement for AI agents. Local-first NDJSON capture with a continuous Watch daemon that auto-uploads anonymized signals, Shield CLI that blocks policy violations live (with policies pulled from Fortress cloud), anonymizer producing signals-only payloads, bidirectional sync with WatchMyAgents Fortress, and one-command install as an always-on launchd/systemd service — closing the recursive Watch→Guardian→Shield security loop.",
   "type": "module",
   "files": [

package/scripts/fetch-anthropic.js CHANGED Viewed

@@ -85,6 +85,12 @@ function resolveModel(agent) {
 }
 // HTTPS POST helper for the --upload signals push (mirrors wma-upload-fortress).
+// v1.1.2 F-17: response body cap for the Fortress ingest-signals POST.
+// The expected reply is a small JSON confirmation ({signal_id, agent_id,
+// registered_new_agent}) — well under 1 MB. Any larger and the endpoint
+// is misconfigured or compromised; abort.
+const MAX_FORTRESS_RESPONSE_BYTES = 1 * 1024 * 1024;
 function postJson(url, headers, body) {
   return new Promise((resolveReq, rejectReq) => {
     const u = new URL(url);
@@ -97,8 +103,22 @@ function postJson(url, headers, body) {
       rejectUnauthorized: true,
     }, (res) => {
       const chunks = [];
-      res.on('data', (c) => chunks.push(c));
+      let receivedBytes = 0;
+      let aborted = false;
+      res.on('data', (c) => {
+        if (aborted) return;
+        receivedBytes += c.length;
+        if (receivedBytes > MAX_FORTRESS_RESPONSE_BYTES) {
+          aborted = true;
+          chunks.length = 0;
+          try { req.destroy(); } catch { /* already destroyed */ }
+          rejectReq(new Error(`Fortress response exceeded ${MAX_FORTRESS_RESPONSE_BYTES} bytes — aborting`));
+          return;
+        }
+        chunks.push(c);
+      });
       res.on('end', () => {
+        if (aborted) return;
         const raw = Buffer.concat(chunks).toString('utf8');
         let parsed = null; try { parsed = JSON.parse(raw); } catch { /* keep raw */ }
         resolveReq({ status: res.statusCode || 0, body: parsed ?? raw });

package/scripts/upload-fortress.js CHANGED Viewed

@@ -63,6 +63,11 @@ async function collectFiles(p) {
   return out;
 }
+// v1.1.2 F-17: Fortress ingest-signals response is a small confirmation
+// JSON. Cap at 1 MB and abort if the endpoint streams more — defensive
+// against a compromised or misconfigured response.
+const MAX_FORTRESS_RESPONSE_BYTES = 1 * 1024 * 1024;
 function postJson(url, headers, body) {
   return new Promise((resolveReq, rejectReq) => {
     const u = new URL(url);
@@ -85,8 +90,22 @@ function postJson(url, headers, body) {
       },
       (res) => {
         const chunks = [];
-        res.on('data', (c) => chunks.push(c));
+        let receivedBytes = 0;
+        let aborted = false;
+        res.on('data', (c) => {
+          if (aborted) return;
+          receivedBytes += c.length;
+          if (receivedBytes > MAX_FORTRESS_RESPONSE_BYTES) {
+            aborted = true;
+            chunks.length = 0;
+            try { req.destroy(); } catch { /* already destroyed */ }
+            rejectReq(new Error(`Fortress response exceeded ${MAX_FORTRESS_RESPONSE_BYTES} bytes — aborting`));
+            return;
+          }
+          chunks.push(c);
+        });
         res.on('end', () => {
+          if (aborted) return;
           const raw = Buffer.concat(chunks).toString('utf8');
           let parsed = null;
           try { parsed = JSON.parse(raw); } catch { /* keep raw */ }

package/src/shield/policy.js CHANGED Viewed

@@ -32,13 +32,27 @@ export async function loadPolicies(path) {
     throw new Error(`policy file ${path} has no "policies" array`);
   }
   // Pre-compile regex for performance + early failure on bad patterns.
+  const VALID_ACTIONS = ['allow', 'deny', 'interrupt'];
   for (const p of data.policies) {
     compileMatchRegexes(p.match || {});
-    if (!['allow', 'deny', 'interrupt'].includes(p.action)) {
+    if (!VALID_ACTIONS.includes(p.action)) {
       throw new Error(`policy ${p.id || p.name}: unsupported action "${p.action}"`);
     }
   }
+  // v1.1.2 F-14 (P2 Codex audit): validate the ruleset's default.action
+  // against the SAME canonical set as per-policy actions. Before this fix
+  // a typo like `default: { action: "drop" }` was accepted silently — at
+  // evaluation time evaluate() returned `decision: "drop"`, which the
+  // interrupt-mode runtime treated as a no-op (only deny/interrupt trigger
+  // termination) and the tool_confirmation-mode runtime left dangling
+  // (no allow/deny event sent). Either way the agent ran without
+  // enforcement, exactly opposite of the operator's intent.
   data.default = data.default || { action: 'allow' };
+  if (!VALID_ACTIONS.includes(data.default.action)) {
+    throw new Error(
+      `policy file ${path} default.action "${data.default.action}" is invalid — must be one of: ${VALID_ACTIONS.join(', ')}`,
+    );
+  }
   return data;
 }

package/src/shield/sources/fortress.js CHANGED Viewed

@@ -14,6 +14,15 @@ import { URL } from 'node:url';
 import { fortressEndpoint } from '../../fortress/url.js';
 const DEFAULT_TIMEOUT_MS = 15_000;
+// v1.1.2 F-17 (P3 Codex audit): cap on the total bytes we'll accumulate
+// for a Fortress JSON response before aborting the request. A misconfigured
+// or compromised endpoint streaming an unbounded body would otherwise
+// exhaust Shield's memory, despite the HTTPS-only + timeout guards.
+// 8 MB is far above the realistic ceiling for a customer's policy ruleset
+// (hundreds of policies × ~1 KB each → ~hundreds of KB). On overflow we
+// destroy the request, which propagates to onError + cached-ruleset
+// fallback.
+const MAX_RESPONSE_BYTES = 8 * 1024 * 1024;
 function httpsJson(method, url, headers, body, timeoutMs = DEFAULT_TIMEOUT_MS) {
   return new Promise((resolveReq, rejectReq) => {
@@ -35,8 +44,23 @@ function httpsJson(method, url, headers, body, timeoutMs = DEFAULT_TIMEOUT_MS) {
     };
     const req = httpsRequest(opts, (res) => {
       const chunks = [];
-      res.on('data', (c) => chunks.push(c));
+      let receivedBytes = 0;
+      let aborted = false;
+      res.on('data', (c) => {
+        if (aborted) return;
+        receivedBytes += c.length;
+        if (receivedBytes > MAX_RESPONSE_BYTES) {
+          aborted = true;
+          // Free anything we already buffered, then tear down the request.
+          chunks.length = 0;
+          try { req.destroy(); } catch { /* already destroyed */ }
+          rejectReq(new Error(`Fortress response exceeded ${MAX_RESPONSE_BYTES} bytes — aborting (received ${receivedBytes} so far)`));
+          return;
+        }
+        chunks.push(c);
+      });
       res.on('end', () => {
+        if (aborted) return;
         const raw = Buffer.concat(chunks).toString('utf8');
         let parsed = null;
         try { parsed = raw ? JSON.parse(raw) : null; } catch { /* keep raw */ }
@@ -179,6 +203,17 @@ export class FortressPolicySource {
           this.onError(new Error(`skipping invalid Fortress policy "${p?.rule_id || p?.name || '?'}": ${e.message}`));
         }
       }
+      // v1.1.2 F-15 (P2 Codex audit): the policy evaluator is "first match
+      // wins" (src/shield/policy.js evaluate()), so policy order matters.
+      // Fortress validates `priority` server-side, but the API does not
+      // contractually guarantee that the returned array is sorted by
+      // priority. If a wide "allow" rule sat before a higher-priority
+      // "deny" rule in the response, the deny would never fire. Sort
+      // client-side by descending priority (higher priority first) before
+      // assigning to ruleset. Policies without `priority` (or with equal
+      // priorities) keep their relative order via the stable sort
+      // guarantee in V8 — predictable behavior.
+      compiled.sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0));
       this.ruleset = {
         version: 1,
         policies: compiled,

package/src/shield/stream.js CHANGED Viewed

@@ -9,6 +9,15 @@
 const API_BASE = 'https://api.anthropic.com';
 const BETA = 'managed-agents-2026-04-01';
 const VERSION = '2023-06-01';
+// v1.1.2 F-16 (P2 Codex audit): hard cap on a single SSE frame buffer.
+// A buggy upstream proxy that strips event separators OR a compromised
+// Anthropic-style endpoint streaming bytes forever without "\n\n" would
+// otherwise OOM Shield's host. 1 MB is far above any real Anthropic
+// event payload (the heaviest events are agent.thinking + agent.message
+// which carry at most a few hundred KB of text). On overflow we throw,
+// which propagates through the generator and triggers the caller's
+// reconnect logic — same outcome as a network error.
+const MAX_SSE_FRAME_BYTES = 1 * 1024 * 1024;
 function authHeaders(apiKey) {
   return {
@@ -43,6 +52,14 @@ export async function* openEventStream({ apiKey, sessionId, signal }) {
       if (done) break;
       buffer += decoder.decode(value, { stream: true });
+      // v1.1.2 F-16: guard against an upstream that never emits "\n\n" —
+      // throw to abort the stream cleanly, the caller's reconnect logic
+      // will pick up. Drop the buffer to free memory before throwing.
+      if (buffer.length > MAX_SSE_FRAME_BYTES) {
+        buffer = '';
+        throw new Error(`SSE frame exceeded ${MAX_SSE_FRAME_BYTES} bytes — aborting stream (caller should reconnect)`);
+      }
       // SSE frames are separated by a blank line ("\n\n"). Each frame may
       // contain multiple lines; we only care about `data:` lines for now.
       let nlIdx;

package/src/sources/anthropic-managed.js CHANGED Viewed

@@ -29,6 +29,12 @@ const VERSION = '2023-06-01';
 // Hard cap on any single GET so a hung connection can't pin Watch/Shield
 // forever. getWithRetry will retry on timeout (the error propagates here).
 const REQUEST_TIMEOUT_MS = 30_000;
+// v1.1.2 F-17 (P3 Codex audit): cap on a single Anthropic response body.
+// Event history pages (/v1/sessions/{id}/events) can carry up to ~1000
+// events × thousands of bytes each, so 16 MB is the headroom we leave
+// before we conclude something is wrong. Above this we abort the
+// request and getWithRetry will retry on the next attempt.
+const MAX_ANTHROPIC_RESPONSE_BYTES = 16 * 1024 * 1024;
 function httpGet(apiKey, path) {
   return new Promise((resolve, reject) => {
@@ -43,8 +49,22 @@ function httpGet(apiKey, path) {
       },
     }, res => {
       const chunks = [];
-      res.on('data', c => chunks.push(c));
+      let receivedBytes = 0;
+      let aborted = false;
+      res.on('data', c => {
+        if (aborted) return;
+        receivedBytes += c.length;
+        if (receivedBytes > MAX_ANTHROPIC_RESPONSE_BYTES) {
+          aborted = true;
+          chunks.length = 0;
+          try { req.destroy(); } catch { /* already destroyed */ }
+          reject(new Error(`Anthropic response exceeded ${MAX_ANTHROPIC_RESPONSE_BYTES} bytes — aborting (${path})`));
+          return;
+        }
+        chunks.push(c);
+      });
       res.on('end', () => {
+        if (aborted) return;
         const body = Buffer.concat(chunks).toString('utf8');
         if (res.statusCode >= 200 && res.statusCode < 300) {
           try { resolve(JSON.parse(body)); } catch (e) { reject(e); }