@circuitwall/jarela 0.9.3 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/.next/standalone/.next/BUILD_ID +1 -1
  2. package/.next/standalone/.next/app-path-routes-manifest.json +2 -2
  3. package/.next/standalone/.next/build-manifest.json +2 -2
  4. package/.next/standalone/.next/prerender-manifest.json +3 -3
  5. package/.next/standalone/.next/server/app/_global-error/page_client-reference-manifest.js +1 -1
  6. package/.next/standalone/.next/server/app/_global-error.html +1 -1
  7. package/.next/standalone/.next/server/app/_global-error.rsc +1 -1
  8. package/.next/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  9. package/.next/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  10. package/.next/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  11. package/.next/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  12. package/.next/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  13. package/.next/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  14. package/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  15. package/.next/standalone/.next/server/app/_not-found.html +2 -2
  16. package/.next/standalone/.next/server/app/_not-found.rsc +2 -2
  17. package/.next/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +2 -2
  18. package/.next/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  19. package/.next/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +2 -2
  20. package/.next/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  21. package/.next/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  22. package/.next/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +2 -2
  23. package/.next/standalone/.next/server/app/api/v1/dashboard/metrics/route.js +72 -5
  24. package/.next/standalone/.next/server/app/api/v1/dashboard/metrics/route.js.map +1 -1
  25. package/.next/standalone/.next/server/app/api/v1/extensions/route.js +2 -2
  26. package/.next/standalone/.next/server/app/api/v1/extensions/tools/[name]/secrets/route.js +2 -2
  27. package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/run/route.js +136 -26
  28. package/.next/standalone/.next/server/app/api/v1/threads/[thread_id]/run/route.js.map +1 -1
  29. package/.next/standalone/.next/server/app/api/v1/tools/route.js +2 -2
  30. package/.next/standalone/.next/server/app/index.html +2 -2
  31. package/.next/standalone/.next/server/app/index.rsc +3 -3
  32. package/.next/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +2 -2
  33. package/.next/standalone/.next/server/app/index.segments/_full.segment.rsc +3 -3
  34. package/.next/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  35. package/.next/standalone/.next/server/app/index.segments/_index.segment.rsc +2 -2
  36. package/.next/standalone/.next/server/app/index.segments/_tree.segment.rsc +2 -2
  37. package/.next/standalone/.next/server/app/page.js +266 -40
  38. package/.next/standalone/.next/server/app/page.js.map +1 -1
  39. package/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
  40. package/.next/standalone/.next/server/app/setup/page_client-reference-manifest.js +1 -1
  41. package/.next/standalone/.next/server/app/setup.html +1 -1
  42. package/.next/standalone/.next/server/app/setup.rsc +2 -2
  43. package/.next/standalone/.next/server/app/setup.segments/_full.segment.rsc +2 -2
  44. package/.next/standalone/.next/server/app/setup.segments/_head.segment.rsc +1 -1
  45. package/.next/standalone/.next/server/app/setup.segments/_index.segment.rsc +2 -2
  46. package/.next/standalone/.next/server/app/setup.segments/_tree.segment.rsc +2 -2
  47. package/.next/standalone/.next/server/app/setup.segments/setup/__PAGE__.segment.rsc +1 -1
  48. package/.next/standalone/.next/server/app/setup.segments/setup.segment.rsc +1 -1
  49. package/.next/standalone/.next/server/app-paths-manifest.json +2 -2
  50. package/.next/standalone/.next/server/chunks/210.js +1 -1
  51. package/.next/standalone/.next/server/chunks/2151.js +60 -2
  52. package/.next/standalone/.next/server/chunks/2151.js.map +1 -1
  53. package/.next/standalone/.next/server/chunks/614.js +336 -93
  54. package/.next/standalone/.next/server/chunks/614.js.map +1 -1
  55. package/.next/standalone/.next/server/chunks/6765.js +35 -0
  56. package/.next/standalone/.next/server/chunks/6765.js.map +1 -1
  57. package/.next/standalone/.next/server/chunks/8697.js +15246 -15002
  58. package/.next/standalone/.next/server/chunks/8697.js.map +1 -1
  59. package/.next/standalone/.next/server/middleware-build-manifest.js +2 -2
  60. package/.next/standalone/.next/server/pages/404.html +2 -2
  61. package/.next/standalone/.next/server/pages/500.html +1 -1
  62. package/.next/standalone/.next/server/server-reference-manifest.json +1 -1
  63. package/.next/standalone/.next/static/chunks/{3741-344e2bfc5028b9c8.js → 3741-2d64471ff763b8fa.js} +36 -1
  64. package/.next/standalone/.next/static/chunks/3741-2d64471ff763b8fa.js.map +1 -0
  65. package/.next/standalone/.next/static/chunks/app/{page-c77ab600642bbfc2.js → page-318743bf47fac345.js} +267 -41
  66. package/.next/standalone/.next/static/chunks/app/page-318743bf47fac345.js.map +1 -0
  67. package/.next/standalone/.next/static/css/b6b85b0f13bc0e98.css +5 -0
  68. package/.next/standalone/.next/static/css/b6b85b0f13bc0e98.css.map +1 -0
  69. package/.next/standalone/package.json +1 -1
  70. package/CHANGELOG.md +48 -0
  71. package/README.md +2 -0
  72. package/api/client.ts +37 -1
  73. package/api/types.ts +18 -0
  74. package/app/api/v1/threads/[thread_id]/run/route.ts +69 -22
  75. package/components/agents/AgentEditor.tsx +7 -4
  76. package/components/chat/MessageBubble.tsx +108 -1
  77. package/components/dashboard/DashboardPanel.tsx +79 -21
  78. package/hooks/useSSE.ts +22 -9
  79. package/lib/agents/prepare/system-prompt.ts +30 -0
  80. package/lib/agents/run-registry.test.ts +94 -0
  81. package/lib/agents/run-registry.ts +60 -1
  82. package/lib/stores/dashboard-metrics.test.ts +33 -0
  83. package/lib/stores/dashboard-metrics.ts +93 -1
  84. package/lib/tools/exec.ts +9 -5
  85. package/lib/tools/files.ts +6 -0
  86. package/lib/tools/safety.test.ts +95 -0
  87. package/lib/tools/safety.ts +147 -0
  88. package/package.json +1 -1
  89. package/.next/standalone/.next/static/chunks/3741-344e2bfc5028b9c8.js.map +0 -1
  90. package/.next/standalone/.next/static/chunks/app/page-c77ab600642bbfc2.js.map +0 -1
  91. package/.next/standalone/.next/static/css/53f85613a5500253.css +0 -5
  92. package/.next/standalone/.next/static/css/53f85613a5500253.css.map +0 -1
  93. /package/.next/standalone/.next/static/{6uLoytvvEtLKIblEB53e0 → 8qTBpUDFnSMYwe3Zc0bGV}/_buildManifest.js +0 -0
  94. /package/.next/standalone/.next/static/{6uLoytvvEtLKIblEB53e0 → 8qTBpUDFnSMYwe3Zc0bGV}/_ssgManifest.js +0 -0
@@ -15,6 +15,23 @@ type Subscriber = (chunk: StreamChunk) => void;
15
15
 
16
16
  const MAX_BUFFERED = 4000; // text_delta chunks accumulate fast; cap them
17
17
  const RECENT_TTL_MS = 5 * 60_000; // keep finished runs visible for 5 min
18
+ // Idle (no-progress) ceiling: if no chunk has been broadcast for this
19
+ // long the registry assumes the LLM/tool call wedged and force-finishes
20
+ // the run. This is the user-perceived "stream is dead" signal and is
21
+ // short by design — long legitimate turns keep streaming text/tool
22
+ // chunks, so they reset the idle clock on every broadcast(). The
23
+ // wall-clock ceiling (runMaxMs) is the absolute safety net for the
24
+ // degenerate case where broadcast() is never called at all (or fires
25
+ // faster than the idle window forever).
26
+ // Override with JARELA_RUN_IDLE_MS / JARELA_RUN_MAX_MS.
27
+ function runIdleMs(): number {
28
+ const raw = Number(process.env.JARELA_RUN_IDLE_MS);
29
+ return Number.isFinite(raw) && raw > 0 ? raw : 90_000;
30
+ }
31
+ function runMaxMs(): number {
32
+ const raw = Number(process.env.JARELA_RUN_MAX_MS);
33
+ return Number.isFinite(raw) && raw > 0 ? raw : 15 * 60_000;
34
+ }
18
35
 
19
36
  export interface ActiveRun {
20
37
  thread_id: string;
@@ -30,6 +47,9 @@ export interface ActiveRun {
30
47
  // disconnects), we signal this controller so the LangGraph stream cancels
31
48
  // itself instead of running to completion in the background.
32
49
  abort: AbortController;
50
+ // Last activity timestamp — bumped on every broadcast() so the idle
51
+ // watchdog can tell live progress from a wedged stream.
52
+ last_chunk_at: number;
33
53
  }
34
54
 
35
55
  const runs = new Map<string, ActiveRun>();
@@ -40,25 +60,64 @@ export function startRun(thread_id: string, agent_id: string | null): ActiveRun
40
60
  if (existing && existing.status === "running") {
41
61
  throw new Error(`A run is already active for thread ${thread_id}`);
42
62
  }
63
+ const now = Date.now();
43
64
  const run: ActiveRun = {
44
65
  thread_id,
45
66
  agent_id,
46
- started_at: Date.now(),
67
+ started_at: now,
47
68
  finished_at: null,
48
69
  status: "running",
49
70
  events: [],
50
71
  subscribers: new Set(),
51
72
  final_text: "",
52
73
  abort: new AbortController(),
74
+ last_chunk_at: now,
53
75
  };
54
76
  runs.set(thread_id, run);
77
+ scheduleIdleWatchdog(run);
78
+ scheduleMaxWatchdog(run);
55
79
  return run;
56
80
  }
57
81
 
82
+ // Self-rearming idle watchdog. Fires when no chunk has arrived for
83
+ // `idleMs`; otherwise reschedules itself for `(last_chunk_at + idleMs) -
84
+ // now`. We never carry a handle on the run — the closure just bails if
85
+ // the run is no longer the registry's entry or no longer running.
86
+ function scheduleIdleWatchdog(run: ActiveRun): void {
87
+ const idleMs = runIdleMs();
88
+ const fireIn = Math.max(0, (run.last_chunk_at + idleMs) - Date.now());
89
+ setTimeout(() => {
90
+ const cur = runs.get(run.thread_id);
91
+ if (cur !== run) return;
92
+ if (run.status !== "running") return;
93
+ const idle = Date.now() - run.last_chunk_at;
94
+ if (idle < idleMs) {
95
+ scheduleIdleWatchdog(run);
96
+ return;
97
+ }
98
+ console.warn(`[run-registry] idle watchdog: force-finishing stalled run for thread ${run.thread_id} after ${idle}ms of no progress`);
99
+ try { run.abort.abort("run_idle_timeout"); } catch { /* */ }
100
+ finishRun(run, "error");
101
+ }, fireIn).unref?.();
102
+ }
103
+
104
+ function scheduleMaxWatchdog(run: ActiveRun): void {
105
+ const max = runMaxMs();
106
+ setTimeout(() => {
107
+ const cur = runs.get(run.thread_id);
108
+ if (cur !== run) return;
109
+ if (run.status !== "running") return;
110
+ console.warn(`[run-registry] wall-clock watchdog: force-finishing run for thread ${run.thread_id} after ${max}ms`);
111
+ try { run.abort.abort("run_watchdog_timeout"); } catch { /* */ }
112
+ finishRun(run, "error");
113
+ }, max).unref?.();
114
+ }
115
+
58
116
  export function broadcast(run: ActiveRun, chunk: StreamChunk): void {
59
117
  // Identity-check: a superseded run must not smear trailing chunks onto
60
118
  // the replacement entry in the registry.
61
119
  if (runs.get(run.thread_id) !== run) return;
120
+ run.last_chunk_at = Date.now();
62
121
  if (chunk.type === "text_delta") {
63
122
  run.final_text += (chunk.data.delta as string) ?? "";
64
123
  }
@@ -0,0 +1,33 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { computeDataQuality } from "./dashboard-metrics";
3
+
4
+ describe("computeDataQuality", () => {
5
+ it("treats empty windows as fully measured to avoid a misleading red chip", () => {
6
+ expect(computeDataQuality(0, 0)).toEqual({
7
+ measured_messages: 0,
8
+ estimated_messages: 0,
9
+ measured_pct: 1,
10
+ });
11
+ });
12
+
13
+ it("reports the measured ratio for mixed windows", () => {
14
+ expect(computeDataQuality(9, 1)).toEqual({
15
+ measured_messages: 9,
16
+ estimated_messages: 1,
17
+ measured_pct: 0.9,
18
+ });
19
+ });
20
+
21
+ it("rounds to four decimals", () => {
22
+ const q = computeDataQuality(1, 2);
23
+ expect(q.measured_pct).toBe(0.3333);
24
+ });
25
+
26
+ it("reports 0% when every row is a legacy estimate", () => {
27
+ expect(computeDataQuality(0, 7)).toEqual({
28
+ measured_messages: 0,
29
+ estimated_messages: 7,
30
+ measured_pct: 0,
31
+ });
32
+ });
33
+ });
@@ -7,6 +7,24 @@ import type { PersistedToolEvent } from "@/lib/stores/threads";
7
7
  const CHARS_PER_TOKEN = 4;
8
8
  const DEFAULT_WINDOW_DAYS = 30;
9
9
 
10
+ export interface DashboardTierTokens {
11
+ hot_tokens: number;
12
+ warm_tokens: number;
13
+ facts_tokens: number;
14
+ overhead_tokens: number;
15
+ /** Sum of the four tiers — convenience for stacked-bar totals. */
16
+ measured_input_tokens: number;
17
+ }
18
+
19
+ export interface DashboardDataQuality {
20
+ /** Assistant turns in the window that have an immutable message_usage snapshot. */
21
+ measured_messages: number;
22
+ /** Assistant turns falling back to content-length estimates. */
23
+ estimated_messages: number;
24
+ /** measured / (measured + estimated), 0..1; 1 when no traffic. */
25
+ measured_pct: number;
26
+ }
27
+
10
28
  export interface DashboardSeriesPoint {
11
29
  day: string;
12
30
  input_tokens_est: number;
@@ -17,6 +35,11 @@ export interface DashboardSeriesPoint {
17
35
  tool_errors: number;
18
36
  success_rate: number;
19
37
  error_rate: number;
38
+ /** Per-tier breakdown of authoritative snapshot input tokens for the
39
+ * day. Zero for legacy rows with no message_usage entry — these are
40
+ * surfaced via the `data_quality` chip instead so users know the bar
41
+ * reflects only measured traffic. */
42
+ tier_tokens: DashboardTierTokens;
20
43
  }
21
44
 
22
45
  export interface DashboardToolTop {
@@ -92,6 +115,7 @@ export interface DashboardDayBreakdown {
92
115
  tool_errors: number;
93
116
  success_rate: number;
94
117
  error_rate: number;
118
+ tier_tokens: DashboardTierTokens;
95
119
  };
96
120
  top_agents: DashboardAgentTop[];
97
121
  by_provider: DashboardProviderBreakdown[];
@@ -110,6 +134,8 @@ export interface DashboardMetrics {
110
134
  tool_errors: number;
111
135
  success_rate: number;
112
136
  error_rate: number;
137
+ tier_tokens: DashboardTierTokens;
138
+ data_quality: DashboardDataQuality;
113
139
  };
114
140
  series: DashboardSeriesPoint[];
115
141
  top_tools: DashboardToolTop[];
@@ -146,6 +172,17 @@ type UsageRow = {
146
172
  mu_model_config_name: string | null;
147
173
  mu_agent_id: string | null;
148
174
  mu_agent_name: string | null;
175
+ mu_hot_tokens: number | null;
176
+ mu_warm_tokens: number | null;
177
+ mu_facts_tokens: number | null;
178
+ mu_overhead_tokens: number | null;
179
+ };
180
+
181
+ type TierBucket = {
182
+ hot: number;
183
+ warm: number;
184
+ facts: number;
185
+ overhead: number;
149
186
  };
150
187
 
151
188
  type DayBucket = {
@@ -155,6 +192,7 @@ type DayBucket = {
155
192
  toolCalls: number;
156
193
  toolSuccesses: number;
157
194
  toolErrors: number;
195
+ tier: TierBucket;
158
196
  };
159
197
 
160
198
  type AgentBucket = {
@@ -235,7 +273,11 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
235
273
  mu.model_id AS mu_model_id,
236
274
  mu.model_config_name AS mu_model_config_name,
237
275
  mu.agent_id AS mu_agent_id,
238
- mu.agent_name AS mu_agent_name
276
+ mu.agent_name AS mu_agent_name,
277
+ mu.hot_tokens AS mu_hot_tokens,
278
+ mu.warm_tokens AS mu_warm_tokens,
279
+ mu.facts_tokens AS mu_facts_tokens,
280
+ mu.overhead_tokens AS mu_overhead_tokens
239
281
  FROM messages m
240
282
  JOIN threads t ON t.thread_id = m.thread_id
241
283
  LEFT JOIN agent_configs a ON a.id = t.agent_id
@@ -301,6 +343,11 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
301
343
  let totalCalls = 0;
302
344
  let totalSuccesses = 0;
303
345
  let totalErrors = 0;
346
+ const tierTotals: TierBucket = { hot: 0, warm: 0, facts: 0, overhead: 0 };
347
+ // Data-quality counters: only assistant turns are eligible since
348
+ // user/system rows never carry a message_usage snapshot by design.
349
+ let measuredAssistantMessages = 0;
350
+ let estimatedAssistantMessages = 0;
304
351
 
305
352
  for (const row of usageRows) {
306
353
  const day = row.created_at.slice(0, 10);
@@ -332,6 +379,22 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
332
379
  attribModelConfig = row.mu_model_config_name ?? attribModelConfig;
333
380
  attribAgentId = row.mu_agent_id ?? attribAgentId;
334
381
  attribAgentName = row.mu_agent_name ?? attribAgentName;
382
+ if (row.role === "assistant") measuredAssistantMessages += 1;
383
+ // Accumulate tier breakdown — null columns (legacy snapshots
384
+ // before the tier wire-up) contribute zero, which is the right
385
+ // behaviour for a stacked bar that visualises *known* tier split.
386
+ const hot = row.mu_hot_tokens ?? 0;
387
+ const warm = row.mu_warm_tokens ?? 0;
388
+ const facts = row.mu_facts_tokens ?? 0;
389
+ const overhead = row.mu_overhead_tokens ?? 0;
390
+ tierTotals.hot += hot;
391
+ tierTotals.warm += warm;
392
+ tierTotals.facts += facts;
393
+ tierTotals.overhead += overhead;
394
+ dayBucket.tier.hot += hot;
395
+ dayBucket.tier.warm += warm;
396
+ dayBucket.tier.facts += facts;
397
+ dayBucket.tier.overhead += overhead;
335
398
  } else if (row.role === "user" && threadHasSnapshot) {
336
399
  // Suppressed: snapshotted assistant turns in this thread already
337
400
  // capture this user message's tokens in their input_tokens.
@@ -343,6 +406,7 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
343
406
  outputTokens = isInput ? 0 : tokenEstimate;
344
407
  const rates = modelRatesFor(byProvider, byProviderModel, byModel, row.provider, row.model_id);
345
408
  estCost = estimateCostUsd(inputTokens, outputTokens, rates);
409
+ if (row.role === "assistant") estimatedAssistantMessages += 1;
346
410
  }
347
411
 
348
412
  dayBucket.inputTokens += inputTokens;
@@ -469,6 +533,7 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
469
533
  tool_errors: b.toolErrors,
470
534
  success_rate: round4(successRate),
471
535
  error_rate: round4(errorRate),
536
+ tier_tokens: tierBucketToTokens(b.tier),
472
537
  } satisfies DashboardSeriesPoint;
473
538
  });
474
539
 
@@ -561,6 +626,7 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
561
626
  tool_errors: dayPoint?.tool_errors ?? 0,
562
627
  success_rate: dayPoint?.success_rate ?? 1,
563
628
  error_rate: dayPoint?.error_rate ?? 0,
629
+ tier_tokens: dayPoint?.tier_tokens ?? emptyTierTokens(),
564
630
  },
565
631
  top_agents: dayAgents,
566
632
  by_provider: dayProviders,
@@ -580,6 +646,8 @@ export async function getDashboardMetrics(days = DEFAULT_WINDOW_DAYS): Promise<D
580
646
  tool_errors: totalErrors,
581
647
  success_rate: round4(overallSuccessRate),
582
648
  error_rate: round4(overallErrorRate),
649
+ tier_tokens: tierBucketToTokens(tierTotals),
650
+ data_quality: computeDataQuality(measuredAssistantMessages, estimatedAssistantMessages),
583
651
  },
584
652
  series,
585
653
  top_tools,
@@ -641,11 +709,35 @@ function seedDayBuckets(now: Date, days: number): Map<string, DayBucket> {
641
709
  toolCalls: 0,
642
710
  toolSuccesses: 0,
643
711
  toolErrors: 0,
712
+ tier: { hot: 0, warm: 0, facts: 0, overhead: 0 },
644
713
  });
645
714
  }
646
715
  return out;
647
716
  }
648
717
 
718
+ function emptyTierTokens(): DashboardTierTokens {
719
+ return { hot_tokens: 0, warm_tokens: 0, facts_tokens: 0, overhead_tokens: 0, measured_input_tokens: 0 };
720
+ }
721
+
722
+ function tierBucketToTokens(b: TierBucket): DashboardTierTokens {
723
+ return {
724
+ hot_tokens: b.hot,
725
+ warm_tokens: b.warm,
726
+ facts_tokens: b.facts,
727
+ overhead_tokens: b.overhead,
728
+ measured_input_tokens: b.hot + b.warm + b.facts + b.overhead,
729
+ };
730
+ }
731
+
732
+ export function computeDataQuality(measured: number, estimated: number): DashboardDataQuality {
733
+ const total = measured + estimated;
734
+ return {
735
+ measured_messages: measured,
736
+ estimated_messages: estimated,
737
+ measured_pct: total === 0 ? 1 : round4(measured / total),
738
+ };
739
+ }
740
+
649
741
  function estimateTokens(text: string): number {
650
742
  const trimmed = text.trim();
651
743
  if (!trimmed) return 0;
package/lib/tools/exec.ts CHANGED
@@ -3,6 +3,7 @@ import { tool } from "@langchain/core/tools";
3
3
  import { z } from "zod";
4
4
  import { registerTools } from "./registry";
5
5
  import { getInjectedSubprocessEnv } from "@/lib/env/allowlist";
6
+ import { checkExecAllowed, resolveSafetyMode } from "./safety";
6
7
 
7
8
  const MAX_OUTPUT_BYTES = 8_000;
8
9
  const DEFAULT_TIMEOUT_MS = 10_000;
@@ -40,11 +41,14 @@ function runLocalCommand(
40
41
 
41
42
  const timeout = Math.min(options.timeout_ms ?? DEFAULT_TIMEOUT_MS, MAX_TIMEOUT_MS);
42
43
 
43
- if (!options.allow_unsafe && isBlockedCommand(command)) {
44
- return JSON.stringify({
45
- exit_code: 126,
46
- stderr: "Command blocked by safety policy. Pass allow_unsafe=true only when you fully trust the command.",
47
- });
44
+ const mode = resolveSafetyMode();
45
+ const gate = checkExecAllowed(command, {
46
+ mode,
47
+ allowUnsafe: options.allow_unsafe,
48
+ blockedByPattern: isBlockedCommand(command),
49
+ });
50
+ if (!gate.allowed) {
51
+ return JSON.stringify({ exit_code: 126, stderr: gate.reason, safety_mode: mode });
48
52
  }
49
53
 
50
54
  const cwd = options.cwd?.trim() ? options.cwd : process.cwd();
@@ -4,6 +4,7 @@ import path from "node:path";
4
4
  import { tool } from "@langchain/core/tools";
5
5
  import { z } from "zod";
6
6
  import { registerTools } from "./registry";
7
+ import { checkFsAllowed, resolveSafetyMode } from "./safety";
7
8
 
8
9
  // Dedicated file tools. Agents previously had to drive every edit through
9
10
  // `local_exec` / `shell_exec`, which works for "create a new file with this
@@ -90,6 +91,11 @@ function jarelaDataDir(): string {
90
91
  }
91
92
 
92
93
  function assertSafePath(abs: string, op: "read" | "write"): void {
94
+ const mode = resolveSafetyMode();
95
+ const gate = checkFsAllowed(op, { mode });
96
+ if (!gate.allowed) throw new Error(gate.reason);
97
+ // bypass mode disables every guard, including the credential denylist.
98
+ if (mode === "bypass") return;
93
99
  if (process.env.JARELA_ALLOW_SENSITIVE_FILES === "1") return;
94
100
  for (const base of sensitiveBase()) {
95
101
  if (isInside(abs, base)) {
@@ -0,0 +1,95 @@
1
+ import { describe, it, expect, beforeEach, afterEach } from "vitest";
2
+ import { checkExecAllowed, checkFsAllowed, resolveSafetyMode } from "./safety";
3
+
4
+ const ORIGINAL = process.env.JARELA_TOOL_SAFETY;
5
+
6
+ afterEach(() => {
7
+ if (ORIGINAL === undefined) delete process.env.JARELA_TOOL_SAFETY;
8
+ else process.env.JARELA_TOOL_SAFETY = ORIGINAL;
9
+ });
10
+
11
+ describe("resolveSafetyMode", () => {
12
+ it("defaults to mostly_safe", () => {
13
+ delete process.env.JARELA_TOOL_SAFETY;
14
+ expect(resolveSafetyMode()).toBe("mostly_safe");
15
+ });
16
+ it("accepts safe / mostly_safe / bypass", () => {
17
+ process.env.JARELA_TOOL_SAFETY = "safe";
18
+ expect(resolveSafetyMode()).toBe("safe");
19
+ process.env.JARELA_TOOL_SAFETY = "BYPASS";
20
+ expect(resolveSafetyMode()).toBe("bypass");
21
+ process.env.JARELA_TOOL_SAFETY = "unsafe";
22
+ expect(resolveSafetyMode()).toBe("bypass");
23
+ process.env.JARELA_TOOL_SAFETY = "garbage";
24
+ expect(resolveSafetyMode()).toBe("mostly_safe");
25
+ });
26
+ });
27
+
28
+ describe("checkExecAllowed - bypass", () => {
29
+ it("allows anything", () => {
30
+ expect(checkExecAllowed("rm -rf /", { mode: "bypass", blockedByPattern: true }).allowed).toBe(true);
31
+ });
32
+ });
33
+
34
+ describe("checkExecAllowed - mostly_safe", () => {
35
+ it("blocks dangerous pattern without allow_unsafe", () => {
36
+ const r = checkExecAllowed("rm -rf /", { mode: "mostly_safe", blockedByPattern: true });
37
+ expect(r.allowed).toBe(false);
38
+ });
39
+ it("permits dangerous pattern with allow_unsafe", () => {
40
+ const r = checkExecAllowed("rm -rf /", { mode: "mostly_safe", blockedByPattern: true, allowUnsafe: true });
41
+ expect(r.allowed).toBe(true);
42
+ });
43
+ it("permits normal commands", () => {
44
+ expect(checkExecAllowed("ls -la", { mode: "mostly_safe", blockedByPattern: false }).allowed).toBe(true);
45
+ });
46
+ });
47
+
48
+ describe("checkExecAllowed - safe", () => {
49
+ const opts = { mode: "safe" as const, blockedByPattern: false };
50
+ it("allows ls", () => {
51
+ expect(checkExecAllowed("ls -la", opts).allowed).toBe(true);
52
+ });
53
+ it("allows git status", () => {
54
+ expect(checkExecAllowed("git status", opts).allowed).toBe(true);
55
+ });
56
+ it("blocks git push", () => {
57
+ expect(checkExecAllowed("git push origin main", opts).allowed).toBe(false);
58
+ });
59
+ it("blocks unknown commands", () => {
60
+ expect(checkExecAllowed("rm file", opts).allowed).toBe(false);
61
+ });
62
+ it("blocks pipelines and composition", () => {
63
+ expect(checkExecAllowed("ls | grep foo", opts).allowed).toBe(false);
64
+ expect(checkExecAllowed("ls && pwd", opts).allowed).toBe(false);
65
+ expect(checkExecAllowed("ls; pwd", opts).allowed).toBe(false);
66
+ expect(checkExecAllowed("ls > out.txt", opts).allowed).toBe(false);
67
+ expect(checkExecAllowed("echo $(whoami)", opts).allowed).toBe(false);
68
+ });
69
+ it("blocks tools that execute arbitrary code", () => {
70
+ expect(checkExecAllowed("node -e 'process.exit()'", opts).allowed).toBe(false);
71
+ expect(checkExecAllowed("python -c 'print(1)'", opts).allowed).toBe(false);
72
+ expect(checkExecAllowed("npx some-pkg", opts).allowed).toBe(false);
73
+ });
74
+ it("ignores allow_unsafe", () => {
75
+ expect(
76
+ checkExecAllowed("rm -rf /", { mode: "safe", blockedByPattern: true, allowUnsafe: true }).allowed,
77
+ ).toBe(false);
78
+ });
79
+ it("rejects empty command", () => {
80
+ expect(checkExecAllowed(" ", opts).allowed).toBe(false);
81
+ });
82
+ });
83
+
84
+ describe("checkFsAllowed", () => {
85
+ it("bypass + mostly_safe always permit", () => {
86
+ expect(checkFsAllowed("write", { mode: "bypass" }).allowed).toBe(true);
87
+ expect(checkFsAllowed("write", { mode: "mostly_safe" }).allowed).toBe(true);
88
+ expect(checkFsAllowed("read", { mode: "bypass" }).allowed).toBe(true);
89
+ expect(checkFsAllowed("read", { mode: "mostly_safe" }).allowed).toBe(true);
90
+ });
91
+ it("safe permits reads, blocks writes", () => {
92
+ expect(checkFsAllowed("read", { mode: "safe" }).allowed).toBe(true);
93
+ expect(checkFsAllowed("write", { mode: "safe" }).allowed).toBe(false);
94
+ });
95
+ });
@@ -0,0 +1,147 @@
1
+ // Safety mode for destructive built-in tools (exec + filesystem writes).
2
+ //
3
+ // Resolved once per call from `JARELA_TOOL_SAFETY`. Three tiers:
4
+ //
5
+ // "safe" — read-only. Exec accepts only an allowlisted set of
6
+ // inspection commands (ls, git status, …); filesystem
7
+ // tools refuse every write, edit, move, copy, delete,
8
+ // or mkdir. Per-call `allow_unsafe` is IGNORED.
9
+ // "mostly_safe" — default. Exec blocks the obviously-dangerous pattern
10
+ // list (rm -rf /, shutdown, fork bomb, …); filesystem
11
+ // tools refuse credential paths and the Jarela data dir.
12
+ // Per-call `allow_unsafe=true` lifts the exec block for
13
+ // that single call.
14
+ // "bypass" — every guard off. For local development on a machine
15
+ // you control and trust completely. NOT for use behind
16
+ // a tunnel or with untrusted prompt sources.
17
+ //
18
+ // The mode is process-wide so prompt injection cannot escalate by
19
+ // passing arguments — the LLM can only ever *downgrade* (via
20
+ // `allow_unsafe=false` semantics, which is just "don't try to bypass").
21
+
22
+ export type SafetyMode = "safe" | "mostly_safe" | "bypass";
23
+
24
+ export function resolveSafetyMode(): SafetyMode {
25
+ const raw = (process.env.JARELA_TOOL_SAFETY ?? "").trim().toLowerCase();
26
+ if (raw === "safe") return "safe";
27
+ if (raw === "bypass" || raw === "unsafe") return "bypass";
28
+ return "mostly_safe";
29
+ }
30
+
31
+ // Inspection-only commands allowed in `safe` mode. Matched as the FIRST
32
+ // token (after stripping leading whitespace) — pipelines, redirections,
33
+ // command substitution, &&, ;, etc. are all rejected because we cannot
34
+ // reason about what the right-hand side will do.
35
+ const SAFE_EXEC_ALLOWLIST = new Set([
36
+ "ls", "dir", "pwd", "cd", "echo", "cat", "type", "head", "tail",
37
+ "wc", "stat", "file", "which", "where", "whoami", "hostname",
38
+ "date", "uname", "df", "du", "ps", "env", "printenv",
39
+ "git", "node", "npm", "npx", "deno", "python", "python3", "pip", "pip3",
40
+ ]);
41
+
42
+ // Subcommands considered read-only for tools that take a verb. We only
43
+ // need to enumerate the dangerous tools here — anything not listed falls
44
+ // back to "the whole tool is read-only" (e.g. `cat`, `ls`).
45
+ const SAFE_SUBCOMMANDS: Record<string, Set<string>> = {
46
+ git: new Set([
47
+ "status", "log", "diff", "show", "blame", "branch", "tag",
48
+ "remote", "ls-files", "ls-tree", "config", "rev-parse",
49
+ "describe", "shortlog", "reflog",
50
+ ]),
51
+ npm: new Set(["ls", "list", "view", "info", "outdated", "config", "whoami", "ping", "doctor"]),
52
+ npx: new Set([]), // npx runs arbitrary code; never allow under "safe"
53
+ node: new Set([]), // bare `node` opens a REPL; `node script.js` runs anything
54
+ python: new Set([]),
55
+ python3: new Set([]),
56
+ deno: new Set(["info", "doc"]),
57
+ pip: new Set(["list", "show", "freeze", "config"]),
58
+ pip3: new Set(["list", "show", "freeze", "config"]),
59
+ };
60
+
61
+ // Shell metacharacters that compose commands or redirect IO. Their
62
+ // presence in `safe` mode is grounds for rejection because the
63
+ // allowlist check only inspects the first token.
64
+ const COMPOSER_RE = /[|&;`$<>]|\$\(|\|\||&&/;
65
+
66
+ export interface ExecAllowResult {
67
+ allowed: boolean;
68
+ reason?: string;
69
+ }
70
+
71
+ export function checkExecAllowed(
72
+ command: string,
73
+ opts: { mode: SafetyMode; allowUnsafe?: boolean; blockedByPattern: boolean },
74
+ ): ExecAllowResult {
75
+ if (opts.mode === "bypass") return { allowed: true };
76
+ if (opts.mode === "mostly_safe") {
77
+ if (opts.blockedByPattern && !opts.allowUnsafe) {
78
+ return {
79
+ allowed: false,
80
+ reason:
81
+ "Command blocked by safety policy (mode=mostly_safe). Pass allow_unsafe=true only when you fully trust the command.",
82
+ };
83
+ }
84
+ return { allowed: true };
85
+ }
86
+ // safe mode
87
+ const trimmed = command.trim();
88
+ if (!trimmed) return { allowed: false, reason: "command is required" };
89
+ if (COMPOSER_RE.test(trimmed)) {
90
+ return {
91
+ allowed: false,
92
+ reason:
93
+ "safe mode rejects pipelines, redirection, command substitution, &&, and ;. " +
94
+ "Set JARELA_TOOL_SAFETY=mostly_safe (or bypass) to allow composite commands.",
95
+ };
96
+ }
97
+ const tokens = trimmed.split(/\s+/);
98
+ const head = tokens[0]?.toLowerCase();
99
+ if (!head || !SAFE_EXEC_ALLOWLIST.has(head)) {
100
+ return {
101
+ allowed: false,
102
+ reason:
103
+ `safe mode allows only inspection commands (${[...SAFE_EXEC_ALLOWLIST].sort().join(", ")}). ` +
104
+ "Set JARELA_TOOL_SAFETY=mostly_safe to enable the broader policy.",
105
+ };
106
+ }
107
+ const subAllowlist = SAFE_SUBCOMMANDS[head];
108
+ if (subAllowlist) {
109
+ const sub = tokens[1]?.toLowerCase().replace(/^--?/, "");
110
+ // Allow bare invocations that are themselves read-only (e.g. `git`
111
+ // alone prints help). Reject if the subcommand is missing for tools
112
+ // that need one to be safe (node/python/npx → arbitrary code).
113
+ if (subAllowlist.size === 0) {
114
+ return {
115
+ allowed: false,
116
+ reason: `safe mode refuses '${head}' because it can execute arbitrary code. Use mostly_safe or bypass.`,
117
+ };
118
+ }
119
+ if (sub && !subAllowlist.has(sub)) {
120
+ return {
121
+ allowed: false,
122
+ reason:
123
+ `safe mode allows '${head}' only for: ${[...subAllowlist].sort().join(", ")}. ` +
124
+ "Use mostly_safe or bypass for other subcommands.",
125
+ };
126
+ }
127
+ }
128
+ return { allowed: true };
129
+ }
130
+
131
+ // File-system op classification.
132
+ export type FsOp = "read" | "write";
133
+
134
+ export function checkFsAllowed(
135
+ op: FsOp,
136
+ opts: { mode: SafetyMode },
137
+ ): ExecAllowResult {
138
+ if (opts.mode === "bypass" || opts.mode === "mostly_safe") return { allowed: true };
139
+ // safe mode: reads are fine, writes are not.
140
+ if (op === "read") return { allowed: true };
141
+ return {
142
+ allowed: false,
143
+ reason:
144
+ "safe mode refuses filesystem mutations (write/edit/move/copy/delete/mkdir). " +
145
+ "Set JARELA_TOOL_SAFETY=mostly_safe to enable writes outside credential dirs.",
146
+ };
147
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@circuitwall/jarela",
3
- "version": "0.9.3",
3
+ "version": "0.10.0",
4
4
  "description": "Jarela — local chat interface for LangGraph agents (multi-provider, single-process, SQLite-backed).",
5
5
  "license": "Apache-2.0",
6
6
  "author": "Andrew Ge Wu",