@compilr-dev/agents 0.3.14 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -15,6 +15,9 @@
15
15
  [![npm version](https://img.shields.io/npm/v/@compilr-dev/agents.svg)](https://www.npmjs.com/package/@compilr-dev/agents)
16
16
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
17
17
 
18
+ > [!WARNING]
19
+ > This package is in beta. APIs may change between minor versions.
20
+
18
21
  ## Features
19
22
 
20
23
  - **Multi-LLM Support**: 9 providers -- Claude, OpenAI, Gemini, Ollama (local), Together AI, Groq, Fireworks, Perplexity, OpenRouter
package/dist/agent.js CHANGED
@@ -1784,14 +1784,23 @@ export class Agent {
1784
1784
  messages.push(assistantMsg);
1785
1785
  newMessages.push(assistantMsg);
1786
1786
  // Execute tools and add results
1787
- // Check if we can parallelize - only parallelize tools marked as parallel-safe
1788
- const parallelTools = toolUses.filter((tu) => {
1787
+ // A tool is parallel-safe if explicitly marked parallel OR readonly
1788
+ const isParallelSafe = (tu) => {
1789
1789
  const tool = this.toolRegistry.get(tu.name);
1790
- return tool?.parallel === true;
1791
- });
1792
- const canParallelize = parallelTools.length > 1 && parallelTools.length === toolUses.length;
1790
+ return tool?.parallel === true || tool?.readonly === true;
1791
+ };
1792
+ const groups = [];
1793
+ for (const tu of toolUses) {
1794
+ const safe = isParallelSafe(tu);
1795
+ if (groups.length > 0 && groups[groups.length - 1].parallel === safe) {
1796
+ groups[groups.length - 1].tools.push(tu);
1797
+ }
1798
+ else {
1799
+ groups.push({ tools: [tu], parallel: safe });
1800
+ }
1801
+ }
1793
1802
  // Helper to execute a single tool with all checks
1794
- const executeSingleTool = async (toolUse) => {
1803
+ const executeSingleTool = async (toolUse, inParallelGroup = false) => {
1795
1804
  // Check for abort
1796
1805
  if (signal?.aborted) {
1797
1806
  return {
@@ -1992,7 +2001,7 @@ export class Agent {
1992
2001
  ? JSON.stringify(result.result)
1993
2002
  : `Error: ${result.error ?? 'Unknown error'}`;
1994
2003
  // Context management (only for sequential - parallel handles this after)
1995
- if (!canParallelize && this.contextManager && this.autoContextManagement) {
2004
+ if (!inParallelGroup && this.contextManager && this.autoContextManagement) {
1996
2005
  const estimatedTokens = this.contextManager.estimateTokens(toolResultContent);
1997
2006
  const preflight = this.contextManager.canAddContent(estimatedTokens, 'toolResults');
1998
2007
  if (!preflight.allowed) {
@@ -2023,78 +2032,83 @@ export class Agent {
2023
2032
  aborted: false,
2024
2033
  };
2025
2034
  };
2026
- // Execute tools - parallel if all are parallel-safe, otherwise sequential
2027
- if (canParallelize) {
2028
- // Parallel execution
2029
- const results = await Promise.all(toolUses.map((tu) => executeSingleTool(tu)));
2030
- for (let i = 0; i < toolUses.length; i++) {
2031
- const toolUse = toolUses[i];
2032
- const { result, toolResultMsg, aborted: wasAborted } = results[i];
2033
- if (wasAborted) {
2034
- aborted = true;
2035
- break;
2036
- }
2037
- // Tool loop detection (still applies per-tool)
2038
- if (this.maxConsecutiveToolCalls > 0) {
2039
- const currentHash = hashToolCall(toolUse.name, toolUse.input);
2040
- if (currentHash === lastToolCallHash) {
2041
- consecutiveIdenticalCalls++;
2042
- if (consecutiveIdenticalCalls >= this.maxConsecutiveToolCalls) {
2043
- throw new ToolLoopError(toolUse.name, consecutiveIdenticalCalls, toolUse.input);
2044
- }
2045
- emit({
2046
- type: 'tool_loop_warning',
2047
- toolName: toolUse.name,
2048
- consecutiveCalls: consecutiveIdenticalCalls,
2049
- });
2035
+ // Execute tools group-based scheduler
2036
+ // Parallel-safe groups with >1 tool run concurrently; everything else runs sequentially.
2037
+ for (const group of groups) {
2038
+ if (aborted)
2039
+ break;
2040
+ if (group.parallel && group.tools.length > 1) {
2041
+ // Parallel execution for this group
2042
+ const results = await Promise.all(group.tools.map((tu) => executeSingleTool(tu, true)));
2043
+ for (let i = 0; i < group.tools.length; i++) {
2044
+ const toolUse = group.tools[i];
2045
+ const { result, toolResultMsg, aborted: wasAborted } = results[i];
2046
+ if (wasAborted) {
2047
+ aborted = true;
2048
+ break;
2050
2049
  }
2051
- else {
2052
- lastToolCallHash = currentHash;
2053
- consecutiveIdenticalCalls = 1;
2050
+ // Tool loop detection (still applies per-tool)
2051
+ if (this.maxConsecutiveToolCalls > 0) {
2052
+ const currentHash = hashToolCall(toolUse.name, toolUse.input);
2053
+ if (currentHash === lastToolCallHash) {
2054
+ consecutiveIdenticalCalls++;
2055
+ if (consecutiveIdenticalCalls >= this.maxConsecutiveToolCalls) {
2056
+ throw new ToolLoopError(toolUse.name, consecutiveIdenticalCalls, toolUse.input);
2057
+ }
2058
+ emit({
2059
+ type: 'tool_loop_warning',
2060
+ toolName: toolUse.name,
2061
+ consecutiveCalls: consecutiveIdenticalCalls,
2062
+ });
2063
+ }
2064
+ else {
2065
+ lastToolCallHash = currentHash;
2066
+ consecutiveIdenticalCalls = 1;
2067
+ }
2054
2068
  }
2069
+ const toolCallEntry = { name: toolUse.name, input: toolUse.input, result };
2070
+ toolCalls.push(toolCallEntry);
2071
+ iterationToolCalls.push(toolCallEntry);
2072
+ messages.push(toolResultMsg);
2073
+ newMessages.push(toolResultMsg);
2055
2074
  }
2056
- const toolCallEntry = { name: toolUse.name, input: toolUse.input, result };
2057
- toolCalls.push(toolCallEntry);
2058
- iterationToolCalls.push(toolCallEntry);
2059
- messages.push(toolResultMsg);
2060
- newMessages.push(toolResultMsg);
2061
2075
  }
2062
- }
2063
- else {
2064
- // Sequential execution (original loop, but using the helper)
2065
- for (const toolUse of toolUses) {
2066
- const { result, toolResultMsg, skipped, aborted: wasAborted, } = await executeSingleTool(toolUse);
2067
- if (wasAborted) {
2068
- aborted = true;
2069
- break;
2070
- }
2071
- // Tool loop detection
2072
- if (this.maxConsecutiveToolCalls > 0) {
2073
- const currentHash = hashToolCall(toolUse.name, toolUse.input);
2074
- if (currentHash === lastToolCallHash) {
2075
- consecutiveIdenticalCalls++;
2076
- if (consecutiveIdenticalCalls >= this.maxConsecutiveToolCalls) {
2077
- throw new ToolLoopError(toolUse.name, consecutiveIdenticalCalls, toolUse.input);
2076
+ else {
2077
+ // Sequential execution for this group
2078
+ for (const toolUse of group.tools) {
2079
+ const { result, toolResultMsg, skipped, aborted: wasAborted, } = await executeSingleTool(toolUse);
2080
+ if (wasAborted) {
2081
+ aborted = true;
2082
+ break;
2083
+ }
2084
+ // Tool loop detection
2085
+ if (this.maxConsecutiveToolCalls > 0) {
2086
+ const currentHash = hashToolCall(toolUse.name, toolUse.input);
2087
+ if (currentHash === lastToolCallHash) {
2088
+ consecutiveIdenticalCalls++;
2089
+ if (consecutiveIdenticalCalls >= this.maxConsecutiveToolCalls) {
2090
+ throw new ToolLoopError(toolUse.name, consecutiveIdenticalCalls, toolUse.input);
2091
+ }
2092
+ emit({
2093
+ type: 'tool_loop_warning',
2094
+ toolName: toolUse.name,
2095
+ consecutiveCalls: consecutiveIdenticalCalls,
2096
+ });
2097
+ }
2098
+ else {
2099
+ lastToolCallHash = currentHash;
2100
+ consecutiveIdenticalCalls = 1;
2078
2101
  }
2079
- emit({
2080
- type: 'tool_loop_warning',
2081
- toolName: toolUse.name,
2082
- consecutiveCalls: consecutiveIdenticalCalls,
2083
- });
2084
2102
  }
2085
- else {
2086
- lastToolCallHash = currentHash;
2087
- consecutiveIdenticalCalls = 1;
2103
+ const toolCallEntry = { name: toolUse.name, input: toolUse.input, result };
2104
+ toolCalls.push(toolCallEntry);
2105
+ iterationToolCalls.push(toolCallEntry);
2106
+ messages.push(toolResultMsg);
2107
+ newMessages.push(toolResultMsg);
2108
+ if (skipped) {
2109
+ continue;
2088
2110
  }
2089
2111
  }
2090
- const toolCallEntry = { name: toolUse.name, input: toolUse.input, result };
2091
- toolCalls.push(toolCallEntry);
2092
- iterationToolCalls.push(toolCallEntry);
2093
- messages.push(toolResultMsg);
2094
- newMessages.push(toolResultMsg);
2095
- if (skipped) {
2096
- continue;
2097
- }
2098
2112
  }
2099
2113
  }
2100
2114
  if (aborted) {
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Effort Estimation
3
+ *
4
+ * Pure function to estimate effort level from raw signals.
5
+ * Score-based: each signal contributes points, thresholds map to effort levels.
6
+ */
7
+ import type { Effort, EffortSignals, EffortWeights } from './types.js';
8
+ /**
9
+ * Default weights for effort estimation.
10
+ */
11
+ export declare const DEFAULT_WEIGHTS: EffortWeights;
12
+ /**
13
+ * Ordered effort levels for ordinal comparison.
14
+ * Index 0 = lowest, index 4 = highest.
15
+ */
16
+ export declare const EFFORT_ORDER: readonly Effort[];
17
+ /**
18
+ * Estimate effort level from raw signals.
19
+ *
20
+ * Score formula:
21
+ * fileCount * fileCountMultiplier
22
+ * + min(linesChanged / linesPerPoint, 10)
23
+ * + toolCallCount * toolCallWeight
24
+ * + (durationMs / 60000) / minutesPerPoint
25
+ * + complexity bonuses: newFiles(+5), multiLang(+3), tests(+5), config(+2)
26
+ *
27
+ * Thresholds:
28
+ * < 5 = trivial, < 15 = low, < 40 = medium, < 100 = high, else = significant
29
+ */
30
+ export declare function estimateEffort(signals: EffortSignals, weights?: Partial<EffortWeights>): Effort;
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Effort Estimation
3
+ *
4
+ * Pure function to estimate effort level from raw signals.
5
+ * Score-based: each signal contributes points, thresholds map to effort levels.
6
+ */
7
+ // =============================================================================
8
+ // Constants
9
+ // =============================================================================
10
+ /**
11
+ * Default weights for effort estimation.
12
+ */
13
+ export const DEFAULT_WEIGHTS = {
14
+ fileCountMultiplier: 2,
15
+ linesPerPoint: 50,
16
+ minutesPerPoint: 1,
17
+ toolCallWeight: 1,
18
+ };
19
+ /**
20
+ * Ordered effort levels for ordinal comparison.
21
+ * Index 0 = lowest, index 4 = highest.
22
+ */
23
+ export const EFFORT_ORDER = [
24
+ 'trivial',
25
+ 'low',
26
+ 'medium',
27
+ 'high',
28
+ 'significant',
29
+ ];
30
+ // Lines-changed contribution is capped at this many points
31
+ const LINES_CAP = 10;
32
+ // Complexity bonuses
33
+ const BONUS_NEW_FILES = 5;
34
+ const BONUS_MULTI_LANG = 3;
35
+ const BONUS_TESTS = 5;
36
+ const BONUS_CONFIG = 2;
37
+ // Score thresholds (exclusive upper bounds)
38
+ const THRESHOLD_TRIVIAL = 5;
39
+ const THRESHOLD_LOW = 15;
40
+ const THRESHOLD_MEDIUM = 40;
41
+ const THRESHOLD_HIGH = 100;
42
+ // =============================================================================
43
+ // Public API
44
+ // =============================================================================
45
+ /**
46
+ * Estimate effort level from raw signals.
47
+ *
48
+ * Score formula:
49
+ * fileCount * fileCountMultiplier
50
+ * + min(linesChanged / linesPerPoint, 10)
51
+ * + toolCallCount * toolCallWeight
52
+ * + (durationMs / 60000) / minutesPerPoint
53
+ * + complexity bonuses: newFiles(+5), multiLang(+3), tests(+5), config(+2)
54
+ *
55
+ * Thresholds:
56
+ * < 5 = trivial, < 15 = low, < 40 = medium, < 100 = high, else = significant
57
+ */
58
+ export function estimateEffort(signals, weights) {
59
+ const w = { ...DEFAULT_WEIGHTS, ...weights };
60
+ // Base score components
61
+ const fileScore = signals.fileCount * w.fileCountMultiplier;
62
+ const linesScore = Math.min(signals.linesChanged / w.linesPerPoint, LINES_CAP);
63
+ const toolScore = signals.toolCallCount * w.toolCallWeight;
64
+ const timeScore = signals.durationMs / 60_000 / w.minutesPerPoint;
65
+ // Complexity bonuses
66
+ let bonus = 0;
67
+ if (signals.complexityIndicators.newFiles)
68
+ bonus += BONUS_NEW_FILES;
69
+ if (signals.complexityIndicators.multiLanguage)
70
+ bonus += BONUS_MULTI_LANG;
71
+ if (signals.complexityIndicators.tests)
72
+ bonus += BONUS_TESTS;
73
+ if (signals.complexityIndicators.configChanges)
74
+ bonus += BONUS_CONFIG;
75
+ const score = fileScore + linesScore + toolScore + timeScore + bonus;
76
+ // Map score to effort level
77
+ if (score < THRESHOLD_TRIVIAL)
78
+ return 'trivial';
79
+ if (score < THRESHOLD_LOW)
80
+ return 'low';
81
+ if (score < THRESHOLD_MEDIUM)
82
+ return 'medium';
83
+ if (score < THRESHOLD_HIGH)
84
+ return 'high';
85
+ return 'significant';
86
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Episodes Module
3
+ *
4
+ * Work history tracking with effort estimation.
5
+ */
6
+ export type { Effort, WorkEpisode, EffortSignals, EffortWeights, EffortSummary, ProjectWorkSummary, EpisodeStore, } from './types.js';
7
+ export { estimateEffort, DEFAULT_WEIGHTS, EFFORT_ORDER } from './effort.js';
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Episodes Module
3
+ *
4
+ * Work history tracking with effort estimation.
5
+ */
6
+ // Functions and constants
7
+ export { estimateEffort, DEFAULT_WEIGHTS, EFFORT_ORDER } from './effort.js';
@@ -0,0 +1,158 @@
1
+ /**
2
+ * Episodic Memory Types
3
+ *
4
+ * Core types for tracking work history with effort estimation.
5
+ * These types enable agents to understand what work has been done,
6
+ * by whom, and how much effort was involved.
7
+ */
8
+ /**
9
+ * Effort level for a work episode.
10
+ * Ordered from least to most significant.
11
+ */
12
+ export type Effort = 'trivial' | 'low' | 'medium' | 'high' | 'significant';
13
+ /**
14
+ * A single unit of tracked work.
15
+ * Represents something an agent did — e.g., editing files, running tests, committing.
16
+ */
17
+ export interface WorkEpisode {
18
+ /** Unique episode ID (UUID) */
19
+ id: string;
20
+ /** Agent ID that performed this work (e.g., 'default', 'backend', 'tester') */
21
+ agentId: string;
22
+ /** Terminal session prefix (first 8 chars of session ID) */
23
+ terminalPrefix: string;
24
+ /** High-level action label (e.g., 'edit', 'test', 'commit', 'refactor') */
25
+ action: string;
26
+ /** Human-readable summary of what was done */
27
+ summary: string;
28
+ /** Files affected by this episode */
29
+ files: string[];
30
+ /** Total lines changed (added + removed), if known */
31
+ linesChanged?: number;
32
+ /** ISO timestamp when the episode was recorded */
33
+ timestamp: string;
34
+ /** Session ID for grouping episodes within a session */
35
+ sessionId: string;
36
+ /** Estimated effort level */
37
+ effort: Effort;
38
+ /** Duration in milliseconds, if tracked */
39
+ durationMs?: number;
40
+ /** Number of tool calls in this episode */
41
+ toolCalls?: number;
42
+ /** Related work item ID (from workitem system) */
43
+ workItemId?: string;
44
+ /** Related git commit hashes */
45
+ relatedCommits?: string[];
46
+ /** Parent episode ID (for sub-tasks) */
47
+ parentEpisode?: string;
48
+ }
49
+ /**
50
+ * Raw signals used to estimate effort.
51
+ * These are collected from tool calls and timing data.
52
+ */
53
+ export interface EffortSignals {
54
+ /** Number of unique files touched */
55
+ fileCount: number;
56
+ /** Total lines changed (added + removed) */
57
+ linesChanged: number;
58
+ /** Total number of tool calls */
59
+ toolCallCount: number;
60
+ /** Duration in milliseconds */
61
+ durationMs: number;
62
+ /** Number of edit/write iterations on same files */
63
+ iterationCount: number;
64
+ /** Complexity indicators detected */
65
+ complexityIndicators: {
66
+ /** New files were created (not just edited) */
67
+ newFiles?: boolean;
68
+ /** Multiple languages involved */
69
+ multiLanguage?: boolean;
70
+ /** Test files were created or modified */
71
+ tests?: boolean;
72
+ /** Config files were modified */
73
+ configChanges?: boolean;
74
+ };
75
+ }
76
+ /**
77
+ * Tunable weights for effort estimation.
78
+ * All weights are multipliers or divisors applied to raw signals.
79
+ */
80
+ export interface EffortWeights {
81
+ /** Points per file (default: 2) */
82
+ fileCountMultiplier: number;
83
+ /** Lines per point (default: 50) — higher means lines matter less */
84
+ linesPerPoint: number;
85
+ /** Minutes per point (default: 1) */
86
+ minutesPerPoint: number;
87
+ /** Points per tool call (default: 1) */
88
+ toolCallWeight: number;
89
+ }
90
+ /**
91
+ * Summary of effort across multiple episodes.
92
+ */
93
+ export interface EffortSummary {
94
+ /** Number of episodes included */
95
+ episodeCount: number;
96
+ /** Maximum effort level across episodes */
97
+ totalEffort: Effort;
98
+ /** Total time spent in milliseconds */
99
+ timeSpentMs: number;
100
+ /** Unique agent IDs involved */
101
+ agents: string[];
102
+ /** Human-readable description */
103
+ description: string;
104
+ }
105
+ /**
106
+ * Project-level work summary with breakdown.
107
+ */
108
+ export interface ProjectWorkSummary {
109
+ /** Total number of episodes */
110
+ episodeCount: number;
111
+ /** Maximum effort level */
112
+ totalEffort: Effort;
113
+ /** Total time spent in milliseconds */
114
+ timeSpentMs: number;
115
+ /** Effort breakdown by agent */
116
+ agentBreakdown: Array<{
117
+ agentId: string;
118
+ episodeCount: number;
119
+ maxEffort: Effort;
120
+ timeSpentMs: number;
121
+ }>;
122
+ /** Most frequently touched files */
123
+ topFiles: Array<{
124
+ path: string;
125
+ touchCount: number;
126
+ }>;
127
+ /** Episodes since the last git commit */
128
+ uncommittedWork: WorkEpisode[];
129
+ }
130
+ /**
131
+ * Persistence interface for work episodes.
132
+ * Write methods may be async (for file I/O), read methods are synchronous
133
+ * (read from in-memory cache).
134
+ */
135
+ export interface EpisodeStore {
136
+ /** Save a single episode */
137
+ save(episode: WorkEpisode): void | Promise<void>;
138
+ /** Save multiple episodes at once */
139
+ saveBatch(episodes: WorkEpisode[]): void | Promise<void>;
140
+ /** Get all episodes */
141
+ getAll(): WorkEpisode[];
142
+ /** Get episodes for specific files */
143
+ getByFiles(files: string[]): WorkEpisode[];
144
+ /** Get episodes by agent ID */
145
+ getByAgent(agentId: string): WorkEpisode[];
146
+ /** Get episodes by session ID */
147
+ getBySession(sessionId: string): WorkEpisode[];
148
+ /** Get episodes within a time range (ISO timestamps) */
149
+ getByTimeRange(start: string, end: string): WorkEpisode[];
150
+ /** Get the N most recent episodes */
151
+ getRecent(count: number): WorkEpisode[];
152
+ /** Get project work summary */
153
+ getWorkSummary(): ProjectWorkSummary;
154
+ /** Get the maximum effort level across all episodes (or a subset) */
155
+ getTotalEffort(episodes?: WorkEpisode[]): Effort;
156
+ /** Remove episodes older than maxAge milliseconds. Returns count removed. */
157
+ cleanup(maxAgeMs: number): number | Promise<number>;
158
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Episodic Memory Types
3
+ *
4
+ * Core types for tracking work history with effort estimation.
5
+ * These types enable agents to understand what work has been done,
6
+ * by whom, and how much effort was involved.
7
+ */
8
+ export {};
@@ -2,5 +2,7 @@
2
2
  * Guardrails module - Pattern-based safety checks for tool execution
3
3
  */
4
4
  export { GuardrailManager } from './manager.js';
5
+ export { parseShellCommand } from './shell-parser.js';
6
+ export type { ShellToken } from './shell-parser.js';
5
7
  export { getBuiltinGuardrails, isBuiltinGuardrail, getBuiltinGuardrailIds, getGuardrailsByTag, BUILTIN_GUARDRAILS, } from './builtin.js';
6
8
  export type { Guardrail, GuardrailInput, GuardrailAction, GuardrailResult, GuardrailContext, GuardrailManagerOptions, GuardrailTriggeredHandler, GuardrailEventType, GuardrailEvent, GuardrailEventHandler, } from './types.js';
@@ -2,4 +2,5 @@
2
2
  * Guardrails module - Pattern-based safety checks for tool execution
3
3
  */
4
4
  export { GuardrailManager } from './manager.js';
5
+ export { parseShellCommand } from './shell-parser.js';
5
6
  export { getBuiltinGuardrails, isBuiltinGuardrail, getBuiltinGuardrailIds, getGuardrailsByTag, BUILTIN_GUARDRAILS, } from './builtin.js';
@@ -91,11 +91,32 @@ export declare class GuardrailManager {
91
91
  /**
92
92
  * Check tool input against all applicable guardrails
93
93
  *
94
+ * For inputs with a `command` field (e.g. bash tool), automatically
95
+ * parses compound commands and checks each subcommand independently.
96
+ *
94
97
  * @param toolName - Name of the tool being called
95
98
  * @param input - Tool input to check
96
99
  * @returns GuardrailResult indicating if any guardrail was triggered
97
100
  */
98
101
  check(toolName: string, input: unknown): GuardrailResult;
102
+ /**
103
+ * Check a compound shell command against guardrails.
104
+ *
105
+ * Parses the command into subcommands (splitting on |, &&, ||, ;)
106
+ * and validates each independently. Also checks the full command string
107
+ * to catch cross-subcommand patterns (e.g. `curl ... | bash`).
108
+ * Returns the highest-severity match with subcommand context.
109
+ *
110
+ * @param toolName - Name of the tool being called
111
+ * @param command - The shell command string
112
+ * @param originalInput - The original tool input (for result metadata)
113
+ * @returns GuardrailResult indicating if any guardrail was triggered
114
+ */
115
+ checkCommand(toolName: string, command: string, originalInput?: unknown): GuardrailResult;
116
+ /**
117
+ * Check an input string against all applicable guardrail patterns
118
+ */
119
+ private checkPatterns;
99
120
  /**
100
121
  * Check and handle guardrail triggering
101
122
  *
@@ -2,6 +2,7 @@
2
2
  * GuardrailManager - Pattern-based safety checks for tool execution
3
3
  */
4
4
  import { getBuiltinGuardrails } from './builtin.js';
5
+ import { parseShellCommand } from './shell-parser.js';
5
6
  /**
6
7
  * Default options for GuardrailManager
7
8
  */
@@ -159,6 +160,9 @@ export class GuardrailManager {
159
160
  /**
160
161
  * Check tool input against all applicable guardrails
161
162
  *
163
+ * For inputs with a `command` field (e.g. bash tool), automatically
164
+ * parses compound commands and checks each subcommand independently.
165
+ *
162
166
  * @param toolName - Name of the tool being called
163
167
  * @param input - Tool input to check
164
168
  * @returns GuardrailResult indicating if any guardrail was triggered
@@ -168,8 +172,72 @@ export class GuardrailManager {
168
172
  if (!this.options.enabled) {
169
173
  return { triggered: false };
170
174
  }
171
- // Stringify the input for pattern matching
175
+ // Auto-detect compound shell commands (bash tool sends { command: "..." })
176
+ if (typeof input === 'object' && input !== null && 'command' in input) {
177
+ const cmd = input.command;
178
+ if (typeof cmd === 'string') {
179
+ return this.checkCommand(toolName, cmd, input);
180
+ }
181
+ }
182
+ // Original: pattern match on stringified input
172
183
  const inputString = this.stringifyInput(input);
184
+ return this.checkPatterns(toolName, inputString, input);
185
+ }
186
+ /**
187
+ * Check a compound shell command against guardrails.
188
+ *
189
+ * Parses the command into subcommands (splitting on |, &&, ||, ;)
190
+ * and validates each independently. Also checks the full command string
191
+ * to catch cross-subcommand patterns (e.g. `curl ... | bash`).
192
+ * Returns the highest-severity match with subcommand context.
193
+ *
194
+ * @param toolName - Name of the tool being called
195
+ * @param command - The shell command string
196
+ * @param originalInput - The original tool input (for result metadata)
197
+ * @returns GuardrailResult indicating if any guardrail was triggered
198
+ */
199
+ checkCommand(toolName, command, originalInput) {
200
+ const tokens = parseShellCommand(command);
201
+ if (tokens.length <= 1) {
202
+ // Single command — delegate to pattern matching directly
203
+ return this.checkPatterns(toolName, command, originalInput);
204
+ }
205
+ const applicableGuardrails = this.getForTool(toolName);
206
+ const severityOrder = { block: 3, confirm: 2, warn: 1 };
207
+ // Start with full-string check (catches cross-subcommand patterns like curl|bash)
208
+ let worst = this.checkPatterns(toolName, command, originalInput);
209
+ // Check each subcommand against all guardrails, keep highest severity
210
+ for (const token of tokens) {
211
+ for (const guardrail of applicableGuardrails) {
212
+ for (const pattern of guardrail.patterns) {
213
+ const match = token.command.match(pattern);
214
+ if (match) {
215
+ const result = {
216
+ triggered: true,
217
+ guardrail,
218
+ match: match[0],
219
+ action: guardrail.action,
220
+ toolName,
221
+ input: originalInput,
222
+ subcommand: token.command,
223
+ subcommandIndex: token.index,
224
+ };
225
+ const resultSeverity = severityOrder[result.action ?? 'warn'] ?? 0;
226
+ const worstSeverity = severityOrder[worst.action ?? ''] ?? 0;
227
+ if (resultSeverity >= worstSeverity) {
228
+ worst = result;
229
+ }
230
+ break; // Found match for this guardrail, move to next
231
+ }
232
+ }
233
+ }
234
+ }
235
+ return worst;
236
+ }
237
+ /**
238
+ * Check an input string against all applicable guardrail patterns
239
+ */
240
+ checkPatterns(toolName, inputString, input) {
173
241
  // Get guardrails that apply to this tool
174
242
  const applicableGuardrails = this.getForTool(toolName);
175
243
  // Check each guardrail