npm - @circuitwall/jarela - Versions diffs - 0.9.3 → 0.10.0 - Mend

@circuitwall/jarela 0.9.3 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

package/lib/agents/run-registry.ts CHANGED Viewed

@@ -15,6 +15,23 @@ type Subscriber = (chunk: StreamChunk) => void;
 const MAX_BUFFERED = 4000;        // text_delta chunks accumulate fast; cap them
 const RECENT_TTL_MS = 5 * 60_000; // keep finished runs visible for 5 min
+// Idle (no-progress) ceiling: if no chunk has been broadcast for this
+// long the registry assumes the LLM/tool call wedged and force-finishes
+// the run. This is the user-perceived "stream is dead" signal and is
+// short by design — long legitimate turns keep streaming text/tool
+// chunks, so they reset the idle clock on every broadcast(). The
+// wall-clock ceiling (runMaxMs) is the absolute safety net for the
+// degenerate case where broadcast() is never called at all (or fires
+// faster than the idle window forever).
+// Override with JARELA_RUN_IDLE_MS / JARELA_RUN_MAX_MS.
+function runIdleMs(): number {
+  const raw = Number(process.env.JARELA_RUN_IDLE_MS);
+  return Number.isFinite(raw) && raw > 0 ? raw : 90_000;
+}
+function runMaxMs(): number {
+  const raw = Number(process.env.JARELA_RUN_MAX_MS);
+  return Number.isFinite(raw) && raw > 0 ? raw : 15 * 60_000;
+}
 export interface ActiveRun {
   thread_id: string;
@@ -30,6 +47,9 @@ export interface ActiveRun {
   // disconnects), we signal this controller so the LangGraph stream cancels
   // itself instead of running to completion in the background.
   abort: AbortController;
+  // Last activity timestamp — bumped on every broadcast() so the idle
+  // watchdog can tell live progress from a wedged stream.
+  last_chunk_at: number;
 }
 const runs = new Map<string, ActiveRun>();
@@ -40,25 +60,64 @@ export function startRun(thread_id: string, agent_id: string | null): ActiveRun
   if (existing && existing.status === "running") {
     throw new Error(`A run is already active for thread ${thread_id}`);
   }
+  const now = Date.now();
   const run: ActiveRun = {
     thread_id,
     agent_id,
-    started_at: Date.now(),
+    started_at: now,
     finished_at: null,
     status: "running",
     events: [],
     subscribers: new Set(),
     final_text: "",
     abort: new AbortController(),
+    last_chunk_at: now,
   };
   runs.set(thread_id, run);
+  scheduleIdleWatchdog(run);
+  scheduleMaxWatchdog(run);
   return run;
 }
+// Self-rearming idle watchdog. Fires when no chunk has arrived for
+// `idleMs`; otherwise reschedules itself for `(last_chunk_at + idleMs) -
+// now`. We never carry a handle on the run — the closure just bails if
+// the run is no longer the registry's entry or no longer running.
+function scheduleIdleWatchdog(run: ActiveRun): void {
+  const idleMs = runIdleMs();
+  const fireIn = Math.max(0, (run.last_chunk_at + idleMs) - Date.now());
+  setTimeout(() => {
+    const cur = runs.get(run.thread_id);
+    if (cur !== run) return;
+    if (run.status !== "running") return;
+    const idle = Date.now() - run.last_chunk_at;
+    if (idle < idleMs) {
+      scheduleIdleWatchdog(run);
+      return;
+    }
+    console.warn(`[run-registry] idle watchdog: force-finishing stalled run for thread ${run.thread_id} after ${idle}ms of no progress`);
+    try { run.abort.abort("run_idle_timeout"); } catch { /* */ }
+    finishRun(run, "error");
+  }, fireIn).unref?.();
+}
+function scheduleMaxWatchdog(run: ActiveRun): void {
+  const max = runMaxMs();
+  setTimeout(() => {
+    const cur = runs.get(run.thread_id);
+    if (cur !== run) return;
+    if (run.status !== "running") return;
+    console.warn(`[run-registry] wall-clock watchdog: force-finishing run for thread ${run.thread_id} after ${max}ms`);
+    try { run.abort.abort("run_watchdog_timeout"); } catch { /* */ }
+    finishRun(run, "error");
+  }, max).unref?.();
+}
 export function broadcast(run: ActiveRun, chunk: StreamChunk): void {
   // Identity-check: a superseded run must not smear trailing chunks onto
   // the replacement entry in the registry.
   if (runs.get(run.thread_id) !== run) return;
+  run.last_chunk_at = Date.now();
   if (chunk.type === "text_delta") {
     run.final_text += (chunk.data.delta as string) ?? "";
   }

package/lib/stores/dashboard-metrics.test.ts ADDED Viewed

@@ -0,0 +1,33 @@
+import { describe, it, expect } from "vitest";
+import { computeDataQuality } from "./dashboard-metrics";
+describe("computeDataQuality", () => {
+  it("treats empty windows as fully measured to avoid a misleading red chip", () => {
+    expect(computeDataQuality(0, 0)).toEqual({
+      measured_messages: 0,
+      estimated_messages: 0,
+      measured_pct: 1,
+    });
+  });
+  it("reports the measured ratio for mixed windows", () => {
+    expect(computeDataQuality(9, 1)).toEqual({
+      measured_messages: 9,
+      estimated_messages: 1,
+      measured_pct: 0.9,
+    });
+  });
+  it("rounds to four decimals", () => {
+    const q = computeDataQuality(1, 2);
+    expect(q.measured_pct).toBe(0.3333);
+  });
+  it("reports 0% when every row is a legacy estimate", () => {
+    expect(computeDataQuality(0, 7)).toEqual({
+      measured_messages: 0,
+      estimated_messages: 7,
+      measured_pct: 0,
+    });
+  });
+});

package/lib/stores/dashboard-metrics.ts CHANGED Viewed

@@ -7,6 +7,24 @@ import type { PersistedToolEvent } from "@/lib/stores/threads";
 const CHARS_PER_TOKEN = 4;
 const DEFAULT_WINDOW_DAYS = 30;
+export interface DashboardTierTokens {
+  hot_tokens: number;
+  warm_tokens: number;
+  facts_tokens: number;
+  overhead_tokens: number;
+  /** Sum of the four tiers — convenience for stacked-bar totals. */
+  measured_input_tokens: number;
+}
+export interface DashboardDataQuality {
+  /** Assistant turns in the window that have an immutable message_usage snapshot. */
+  measured_messages: number;
+  /** Assistant turns falling back to content-length estimates. */
+  estimated_messages: number;
+  /** measured / (measured + estimated), 0..1; 1 when no traffic. */
+  measured_pct: number;
+}
 export interface DashboardSeriesPoint {
   day: string;
   input_tokens_est: number;
@@ -17,6 +35,11 @@ export interface DashboardSeriesPoint {
   tool_errors: number;
   success_rate: number;
   error_rate: number;
+  /** Per-tier breakdown of authoritative snapshot input tokens for the
+   *  day. Zero for legacy rows with no message_usage entry — these are
+   *  surfaced via the `data_quality` chip instead so users know the bar
+   *  reflects only measured traffic. */
+  tier_tokens: DashboardTierTokens;
 }
 export interface DashboardToolTop {
@@ -92,6 +115,7 @@ export interface DashboardDayBreakdown {
     tool_errors: number;
     success_rate: number;
     error_rate: number;
+    tier_tokens: DashboardTierTokens;
   };
   top_agents: DashboardAgentTop[];
   by_provider: DashboardProviderBreakdown[];
@@ -110,6 +134,8 @@ export interface DashboardMetrics {
     tool_errors: number;
     success_rate: number;
     error_rate: number;
+    tier_tokens: DashboardTierTokens;
+    data_quality: DashboardDataQuality;
   };
   series: DashboardSeriesPoint[];
   top_tools: DashboardToolTop[];
@@ -146,6 +172,17 @@ type UsageRow = {
   mu_model_config_name: string | null;
   mu_agent_id: string | null;
   mu_agent_name: string | null;
+  mu_hot_tokens: number | null;
+  mu_warm_tokens: number | null;
+  mu_facts_tokens: number | null;
+  mu_overhead_tokens: number | null;
+};
+type TierBucket = {
+  hot: number;
+  warm: number;
+  facts: number;
+  overhead: number;
 };
 type DayBucket = {
@@ -155,6 +192,7 @@ type DayBucket = {
   toolCalls: number;
   toolSuccesses: number;
   toolErrors: number;
+  tier: TierBucket;
 };
 type AgentBucket = {
@@ -235,7 +273,11 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
               mu.model_id         AS mu_model_id,
               mu.model_config_name AS mu_model_config_name,
               mu.agent_id         AS mu_agent_id,
-              mu.agent_name       AS mu_agent_name
+              mu.agent_name       AS mu_agent_name,
+              mu.hot_tokens       AS mu_hot_tokens,
+              mu.warm_tokens      AS mu_warm_tokens,
+              mu.facts_tokens     AS mu_facts_tokens,
+              mu.overhead_tokens  AS mu_overhead_tokens
          FROM messages m
          JOIN threads t ON t.thread_id = m.thread_id
          LEFT JOIN agent_configs a ON a.id = t.agent_id
@@ -301,6 +343,11 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
   let totalCalls = 0;
   let totalSuccesses = 0;
   let totalErrors = 0;
+  const tierTotals: TierBucket = { hot: 0, warm: 0, facts: 0, overhead: 0 };
+  // Data-quality counters: only assistant turns are eligible since
+  // user/system rows never carry a message_usage snapshot by design.
+  let measuredAssistantMessages = 0;
+  let estimatedAssistantMessages = 0;
   for (const row of usageRows) {
     const day = row.created_at.slice(0, 10);
@@ -332,6 +379,22 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
       attribModelConfig = row.mu_model_config_name ?? attribModelConfig;
       attribAgentId = row.mu_agent_id ?? attribAgentId;
       attribAgentName = row.mu_agent_name ?? attribAgentName;
+      if (row.role === "assistant") measuredAssistantMessages += 1;
+      // Accumulate tier breakdown — null columns (legacy snapshots
+      // before the tier wire-up) contribute zero, which is the right
+      // behaviour for a stacked bar that visualises *known* tier split.
+      const hot = row.mu_hot_tokens ?? 0;
+      const warm = row.mu_warm_tokens ?? 0;
+      const facts = row.mu_facts_tokens ?? 0;
+      const overhead = row.mu_overhead_tokens ?? 0;
+      tierTotals.hot += hot;
+      tierTotals.warm += warm;
+      tierTotals.facts += facts;
+      tierTotals.overhead += overhead;
+      dayBucket.tier.hot += hot;
+      dayBucket.tier.warm += warm;
+      dayBucket.tier.facts += facts;
+      dayBucket.tier.overhead += overhead;
     } else if (row.role === "user" && threadHasSnapshot) {
       // Suppressed: snapshotted assistant turns in this thread already
       // capture this user message's tokens in their input_tokens.
@@ -343,6 +406,7 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
       outputTokens = isInput ? 0 : tokenEstimate;
       const rates = modelRatesFor(byProvider, byProviderModel, byModel, row.provider, row.model_id);
       estCost = estimateCostUsd(inputTokens, outputTokens, rates);
+      if (row.role === "assistant") estimatedAssistantMessages += 1;
     }
     dayBucket.inputTokens += inputTokens;
@@ -469,6 +533,7 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
       tool_errors: b.toolErrors,
       success_rate: round4(successRate),
       error_rate: round4(errorRate),
+      tier_tokens: tierBucketToTokens(b.tier),
     } satisfies DashboardSeriesPoint;
   });
@@ -561,6 +626,7 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
         tool_errors: dayPoint?.tool_errors ?? 0,
         success_rate: dayPoint?.success_rate ?? 1,
         error_rate: dayPoint?.error_rate ?? 0,
+        tier_tokens: dayPoint?.tier_tokens ?? emptyTierTokens(),
       },
       top_agents: dayAgents,
       by_provider: dayProviders,
@@ -580,6 +646,8 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
       tool_errors: totalErrors,
       success_rate: round4(overallSuccessRate),
       error_rate: round4(overallErrorRate),
+      tier_tokens: tierBucketToTokens(tierTotals),
+      data_quality: computeDataQuality(measuredAssistantMessages, estimatedAssistantMessages),
     },
     series,
     top_tools,
@@ -641,11 +709,35 @@ function seedDayBuckets(now: Date, days: number): Map<string, DayBucket> {
       toolCalls: 0,
       toolSuccesses: 0,
       toolErrors: 0,
+      tier: { hot: 0, warm: 0, facts: 0, overhead: 0 },
     });
   }
   return out;
 }
+function emptyTierTokens(): DashboardTierTokens {
+  return { hot_tokens: 0, warm_tokens: 0, facts_tokens: 0, overhead_tokens: 0, measured_input_tokens: 0 };
+}
+function tierBucketToTokens(b: TierBucket): DashboardTierTokens {
+  return {
+    hot_tokens: b.hot,
+    warm_tokens: b.warm,
+    facts_tokens: b.facts,
+    overhead_tokens: b.overhead,
+    measured_input_tokens: b.hot + b.warm + b.facts + b.overhead,
+  };
+}
+export function computeDataQuality(measured: number, estimated: number): DashboardDataQuality {
+  const total = measured + estimated;
+  return {
+    measured_messages: measured,
+    estimated_messages: estimated,
+    measured_pct: total === 0 ? 1 : round4(measured / total),
+  };
+}
 function estimateTokens(text: string): number {
   const trimmed = text.trim();
   if (!trimmed) return 0;

package/lib/tools/exec.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import { tool } from "@langchain/core/tools";
 import { z } from "zod";
 import { registerTools } from "./registry";
 import { getInjectedSubprocessEnv } from "@/lib/env/allowlist";
+import { checkExecAllowed, resolveSafetyMode } from "./safety";
 const MAX_OUTPUT_BYTES = 8_000;
 const DEFAULT_TIMEOUT_MS = 10_000;
@@ -40,11 +41,14 @@ function runLocalCommand(
   const timeout = Math.min(options.timeout_ms ?? DEFAULT_TIMEOUT_MS, MAX_TIMEOUT_MS);
-  if (!options.allow_unsafe && isBlockedCommand(command)) {
-    return JSON.stringify({
-      exit_code: 126,
-      stderr: "Command blocked by safety policy. Pass allow_unsafe=true only when you fully trust the command.",
-    });
+  const mode = resolveSafetyMode();
+  const gate = checkExecAllowed(command, {
+    mode,
+    allowUnsafe: options.allow_unsafe,
+    blockedByPattern: isBlockedCommand(command),
+  });
+  if (!gate.allowed) {
+    return JSON.stringify({ exit_code: 126, stderr: gate.reason, safety_mode: mode });
   }
   const cwd = options.cwd?.trim() ? options.cwd : process.cwd();

package/lib/tools/files.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import path from "node:path";
 import { tool } from "@langchain/core/tools";
 import { z } from "zod";
 import { registerTools } from "./registry";
+import { checkFsAllowed, resolveSafetyMode } from "./safety";
 // Dedicated file tools. Agents previously had to drive every edit through
 // `local_exec` / `shell_exec`, which works for "create a new file with this
@@ -90,6 +91,11 @@ function jarelaDataDir(): string {
 }
 function assertSafePath(abs: string, op: "read" | "write"): void {
+  const mode = resolveSafetyMode();
+  const gate = checkFsAllowed(op, { mode });
+  if (!gate.allowed) throw new Error(gate.reason);
+  // bypass mode disables every guard, including the credential denylist.
+  if (mode === "bypass") return;
   if (process.env.JARELA_ALLOW_SENSITIVE_FILES === "1") return;
   for (const base of sensitiveBase()) {
     if (isInside(abs, base)) {

package/lib/tools/safety.test.ts ADDED Viewed

@@ -0,0 +1,95 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { checkExecAllowed, checkFsAllowed, resolveSafetyMode } from "./safety";
+const ORIGINAL = process.env.JARELA_TOOL_SAFETY;
+afterEach(() => {
+  if (ORIGINAL === undefined) delete process.env.JARELA_TOOL_SAFETY;
+  else process.env.JARELA_TOOL_SAFETY = ORIGINAL;
+});
+describe("resolveSafetyMode", () => {
+  it("defaults to mostly_safe", () => {
+    delete process.env.JARELA_TOOL_SAFETY;
+    expect(resolveSafetyMode()).toBe("mostly_safe");
+  });
+  it("accepts safe / mostly_safe / bypass", () => {
+    process.env.JARELA_TOOL_SAFETY = "safe";
+    expect(resolveSafetyMode()).toBe("safe");
+    process.env.JARELA_TOOL_SAFETY = "BYPASS";
+    expect(resolveSafetyMode()).toBe("bypass");
+    process.env.JARELA_TOOL_SAFETY = "unsafe";
+    expect(resolveSafetyMode()).toBe("bypass");
+    process.env.JARELA_TOOL_SAFETY = "garbage";
+    expect(resolveSafetyMode()).toBe("mostly_safe");
+  });
+});
+describe("checkExecAllowed - bypass", () => {
+  it("allows anything", () => {
+    expect(checkExecAllowed("rm -rf /", { mode: "bypass", blockedByPattern: true }).allowed).toBe(true);
+  });
+});
+describe("checkExecAllowed - mostly_safe", () => {
+  it("blocks dangerous pattern without allow_unsafe", () => {
+    const r = checkExecAllowed("rm -rf /", { mode: "mostly_safe", blockedByPattern: true });
+    expect(r.allowed).toBe(false);
+  });
+  it("permits dangerous pattern with allow_unsafe", () => {
+    const r = checkExecAllowed("rm -rf /", { mode: "mostly_safe", blockedByPattern: true, allowUnsafe: true });
+    expect(r.allowed).toBe(true);
+  });
+  it("permits normal commands", () => {
+    expect(checkExecAllowed("ls -la", { mode: "mostly_safe", blockedByPattern: false }).allowed).toBe(true);
+  });
+});
+describe("checkExecAllowed - safe", () => {
+  const opts = { mode: "safe" as const, blockedByPattern: false };
+  it("allows ls", () => {
+    expect(checkExecAllowed("ls -la", opts).allowed).toBe(true);
+  });
+  it("allows git status", () => {
+    expect(checkExecAllowed("git status", opts).allowed).toBe(true);
+  });
+  it("blocks git push", () => {
+    expect(checkExecAllowed("git push origin main", opts).allowed).toBe(false);
+  });
+  it("blocks unknown commands", () => {
+    expect(checkExecAllowed("rm file", opts).allowed).toBe(false);
+  });
+  it("blocks pipelines and composition", () => {
+    expect(checkExecAllowed("ls | grep foo", opts).allowed).toBe(false);
+    expect(checkExecAllowed("ls && pwd", opts).allowed).toBe(false);
+    expect(checkExecAllowed("ls; pwd", opts).allowed).toBe(false);
+    expect(checkExecAllowed("ls > out.txt", opts).allowed).toBe(false);
+    expect(checkExecAllowed("echo $(whoami)", opts).allowed).toBe(false);
+  });
+  it("blocks tools that execute arbitrary code", () => {
+    expect(checkExecAllowed("node -e 'process.exit()'", opts).allowed).toBe(false);
+    expect(checkExecAllowed("python -c 'print(1)'", opts).allowed).toBe(false);
+    expect(checkExecAllowed("npx some-pkg", opts).allowed).toBe(false);
+  });
+  it("ignores allow_unsafe", () => {
+    expect(
+      checkExecAllowed("rm -rf /", { mode: "safe", blockedByPattern: true, allowUnsafe: true }).allowed,
+    ).toBe(false);
+  });
+  it("rejects empty command", () => {
+    expect(checkExecAllowed("   ", opts).allowed).toBe(false);
+  });
+});
+describe("checkFsAllowed", () => {
+  it("bypass + mostly_safe always permit", () => {
+    expect(checkFsAllowed("write", { mode: "bypass" }).allowed).toBe(true);
+    expect(checkFsAllowed("write", { mode: "mostly_safe" }).allowed).toBe(true);
+    expect(checkFsAllowed("read", { mode: "bypass" }).allowed).toBe(true);
+    expect(checkFsAllowed("read", { mode: "mostly_safe" }).allowed).toBe(true);
+  });
+  it("safe permits reads, blocks writes", () => {
+    expect(checkFsAllowed("read", { mode: "safe" }).allowed).toBe(true);
+    expect(checkFsAllowed("write", { mode: "safe" }).allowed).toBe(false);
+  });
+});

package/lib/tools/safety.ts ADDED Viewed

@@ -0,0 +1,147 @@
+// Safety mode for destructive built-in tools (exec + filesystem writes).
+//
+// Resolved once per call from `JARELA_TOOL_SAFETY`. Three tiers:
+//
+//   "safe"        — read-only. Exec accepts only an allowlisted set of
+//                   inspection commands (ls, git status, …); filesystem
+//                   tools refuse every write, edit, move, copy, delete,
+//                   or mkdir. Per-call `allow_unsafe` is IGNORED.
+//   "mostly_safe" — default. Exec blocks the obviously-dangerous pattern
+//                   list (rm -rf /, shutdown, fork bomb, …); filesystem
+//                   tools refuse credential paths and the Jarela data dir.
+//                   Per-call `allow_unsafe=true` lifts the exec block for
+//                   that single call.
+//   "bypass"      — every guard off. For local development on a machine
+//                   you control and trust completely. NOT for use behind
+//                   a tunnel or with untrusted prompt sources.
+//
+// The mode is process-wide so prompt injection cannot escalate by
+// passing arguments — the LLM can only ever *downgrade* (via
+// `allow_unsafe=false` semantics, which is just "don't try to bypass").
+export type SafetyMode = "safe" | "mostly_safe" | "bypass";
+export function resolveSafetyMode(): SafetyMode {
+  const raw = (process.env.JARELA_TOOL_SAFETY ?? "").trim().toLowerCase();
+  if (raw === "safe") return "safe";
+  if (raw === "bypass" || raw === "unsafe") return "bypass";
+  return "mostly_safe";
+}
+// Inspection-only commands allowed in `safe` mode. Matched as the FIRST
+// token (after stripping leading whitespace) — pipelines, redirections,
+// command substitution, &&, ;, etc. are all rejected because we cannot
+// reason about what the right-hand side will do.
+const SAFE_EXEC_ALLOWLIST = new Set([
+  "ls", "dir", "pwd", "cd", "echo", "cat", "type", "head", "tail",
+  "wc", "stat", "file", "which", "where", "whoami", "hostname",
+  "date", "uname", "df", "du", "ps", "env", "printenv",
+  "git", "node", "npm", "npx", "deno", "python", "python3", "pip", "pip3",
+]);
+// Subcommands considered read-only for tools that take a verb. We only
+// need to enumerate the dangerous tools here — anything not listed falls
+// back to "the whole tool is read-only" (e.g. `cat`, `ls`).
+const SAFE_SUBCOMMANDS: Record<string, Set<string>> = {
+  git: new Set([
+    "status", "log", "diff", "show", "blame", "branch", "tag",
+    "remote", "ls-files", "ls-tree", "config", "rev-parse",
+    "describe", "shortlog", "reflog",
+  ]),
+  npm: new Set(["ls", "list", "view", "info", "outdated", "config", "whoami", "ping", "doctor"]),
+  npx: new Set([]), // npx runs arbitrary code; never allow under "safe"
+  node: new Set([]), // bare `node` opens a REPL; `node script.js` runs anything
+  python: new Set([]),
+  python3: new Set([]),
+  deno: new Set(["info", "doc"]),
+  pip: new Set(["list", "show", "freeze", "config"]),
+  pip3: new Set(["list", "show", "freeze", "config"]),
+};
+// Shell metacharacters that compose commands or redirect IO. Their
+// presence in `safe` mode is grounds for rejection because the
+// allowlist check only inspects the first token.
+const COMPOSER_RE = /[|&;`$<>]|\$\(|\|\||&&/;
+export interface ExecAllowResult {
+  allowed: boolean;
+  reason?: string;
+}
+export function checkExecAllowed(
+  command: string,
+  opts: { mode: SafetyMode; allowUnsafe?: boolean; blockedByPattern: boolean },
+): ExecAllowResult {
+  if (opts.mode === "bypass") return { allowed: true };
+  if (opts.mode === "mostly_safe") {
+    if (opts.blockedByPattern && !opts.allowUnsafe) {
+      return {
+        allowed: false,
+        reason:
+          "Command blocked by safety policy (mode=mostly_safe). Pass allow_unsafe=true only when you fully trust the command.",
+      };
+    }
+    return { allowed: true };
+  }
+  // safe mode
+  const trimmed = command.trim();
+  if (!trimmed) return { allowed: false, reason: "command is required" };
+  if (COMPOSER_RE.test(trimmed)) {
+    return {
+      allowed: false,
+      reason:
+        "safe mode rejects pipelines, redirection, command substitution, &&, and ;. " +
+        "Set JARELA_TOOL_SAFETY=mostly_safe (or bypass) to allow composite commands.",
+    };
+  }
+  const tokens = trimmed.split(/\s+/);
+  const head = tokens[0]?.toLowerCase();
+  if (!head || !SAFE_EXEC_ALLOWLIST.has(head)) {
+    return {
+      allowed: false,
+      reason:
+        `safe mode allows only inspection commands (${[...SAFE_EXEC_ALLOWLIST].sort().join(", ")}). ` +
+        "Set JARELA_TOOL_SAFETY=mostly_safe to enable the broader policy.",
+    };
+  }
+  const subAllowlist = SAFE_SUBCOMMANDS[head];
+  if (subAllowlist) {
+    const sub = tokens[1]?.toLowerCase().replace(/^--?/, "");
+    // Allow bare invocations that are themselves read-only (e.g. `git`
+    // alone prints help). Reject if the subcommand is missing for tools
+    // that need one to be safe (node/python/npx → arbitrary code).
+    if (subAllowlist.size === 0) {
+      return {
+        allowed: false,
+        reason: `safe mode refuses '${head}' because it can execute arbitrary code. Use mostly_safe or bypass.`,
+      };
+    }
+    if (sub && !subAllowlist.has(sub)) {
+      return {
+        allowed: false,
+        reason:
+          `safe mode allows '${head}' only for: ${[...subAllowlist].sort().join(", ")}. ` +
+          "Use mostly_safe or bypass for other subcommands.",
+      };
+    }
+  }
+  return { allowed: true };
+}
+// File-system op classification.
+export type FsOp = "read" | "write";
+export function checkFsAllowed(
+  op: FsOp,
+  opts: { mode: SafetyMode },
+): ExecAllowResult {
+  if (opts.mode === "bypass" || opts.mode === "mostly_safe") return { allowed: true };
+  // safe mode: reads are fine, writes are not.
+  if (op === "read") return { allowed: true };
+  return {
+    allowed: false,
+    reason:
+      "safe mode refuses filesystem mutations (write/edit/move/copy/delete/mkdir). " +
+      "Set JARELA_TOOL_SAFETY=mostly_safe to enable writes outside credential dirs.",
+  };
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@circuitwall/jarela",
-  "version": "0.9.3",
+  "version": "0.10.0",
   "description": "Jarela — local chat interface for LangGraph agents (multi-provider, single-process, SQLite-backed).",
   "license": "Apache-2.0",
   "author": "Andrew Ge Wu",