@compilr-dev/agents 0.3.14 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/dist/agent.js +85 -71
- package/dist/episodes/effort.d.ts +30 -0
- package/dist/episodes/effort.js +86 -0
- package/dist/episodes/index.d.ts +7 -0
- package/dist/episodes/index.js +7 -0
- package/dist/episodes/types.d.ts +158 -0
- package/dist/episodes/types.js +8 -0
- package/dist/guardrails/index.d.ts +2 -0
- package/dist/guardrails/index.js +1 -0
- package/dist/guardrails/manager.d.ts +21 -0
- package/dist/guardrails/manager.js +69 -1
- package/dist/guardrails/shell-parser.d.ts +36 -0
- package/dist/guardrails/shell-parser.js +103 -0
- package/dist/guardrails/types.d.ts +8 -0
- package/dist/index.d.ts +4 -2
- package/dist/index.js +5 -1
- package/dist/providers/gemini-native.js +8 -1
- package/dist/tools/builtin/glob.js +2 -0
- package/dist/tools/builtin/grep.js +2 -0
- package/dist/tools/builtin/read-file.js +2 -0
- package/dist/tools/builtin/todo.js +2 -0
- package/dist/tools/builtin/web-fetch.js +2 -0
- package/dist/tools/define.d.ts +7 -0
- package/dist/tools/define.js +1 -0
- package/dist/tools/types.d.ts +7 -0
- package/dist/tracing/index.d.ts +3 -0
- package/dist/tracing/index.js +4 -1
- package/dist/tracing/otel-attributes.d.ts +59 -0
- package/dist/tracing/otel-attributes.js +71 -0
- package/dist/tracing/otel-hooks.d.ts +61 -0
- package/dist/tracing/otel-hooks.js +220 -0
- package/package.json +3 -1
package/README.md
CHANGED
|
@@ -15,6 +15,9 @@
|
|
|
15
15
|
[](https://www.npmjs.com/package/@compilr-dev/agents)
|
|
16
16
|
[](https://opensource.org/licenses/MIT)
|
|
17
17
|
|
|
18
|
+
> [!WARNING]
|
|
19
|
+
> This package is in beta. APIs may change between minor versions.
|
|
20
|
+
|
|
18
21
|
## Features
|
|
19
22
|
|
|
20
23
|
- **Multi-LLM Support**: 9 providers -- Claude, OpenAI, Gemini, Ollama (local), Together AI, Groq, Fireworks, Perplexity, OpenRouter
|
package/dist/agent.js
CHANGED
|
@@ -1784,14 +1784,23 @@ export class Agent {
|
|
|
1784
1784
|
messages.push(assistantMsg);
|
|
1785
1785
|
newMessages.push(assistantMsg);
|
|
1786
1786
|
// Execute tools and add results
|
|
1787
|
-
//
|
|
1788
|
-
const
|
|
1787
|
+
// A tool is parallel-safe if explicitly marked parallel OR readonly
|
|
1788
|
+
const isParallelSafe = (tu) => {
|
|
1789
1789
|
const tool = this.toolRegistry.get(tu.name);
|
|
1790
|
-
return tool?.parallel === true;
|
|
1791
|
-
}
|
|
1792
|
-
const
|
|
1790
|
+
return tool?.parallel === true || tool?.readonly === true;
|
|
1791
|
+
};
|
|
1792
|
+
const groups = [];
|
|
1793
|
+
for (const tu of toolUses) {
|
|
1794
|
+
const safe = isParallelSafe(tu);
|
|
1795
|
+
if (groups.length > 0 && groups[groups.length - 1].parallel === safe) {
|
|
1796
|
+
groups[groups.length - 1].tools.push(tu);
|
|
1797
|
+
}
|
|
1798
|
+
else {
|
|
1799
|
+
groups.push({ tools: [tu], parallel: safe });
|
|
1800
|
+
}
|
|
1801
|
+
}
|
|
1793
1802
|
// Helper to execute a single tool with all checks
|
|
1794
|
-
const executeSingleTool = async (toolUse) => {
|
|
1803
|
+
const executeSingleTool = async (toolUse, inParallelGroup = false) => {
|
|
1795
1804
|
// Check for abort
|
|
1796
1805
|
if (signal?.aborted) {
|
|
1797
1806
|
return {
|
|
@@ -1992,7 +2001,7 @@ export class Agent {
|
|
|
1992
2001
|
? JSON.stringify(result.result)
|
|
1993
2002
|
: `Error: ${result.error ?? 'Unknown error'}`;
|
|
1994
2003
|
// Context management (only for sequential - parallel handles this after)
|
|
1995
|
-
if (!
|
|
2004
|
+
if (!inParallelGroup && this.contextManager && this.autoContextManagement) {
|
|
1996
2005
|
const estimatedTokens = this.contextManager.estimateTokens(toolResultContent);
|
|
1997
2006
|
const preflight = this.contextManager.canAddContent(estimatedTokens, 'toolResults');
|
|
1998
2007
|
if (!preflight.allowed) {
|
|
@@ -2023,78 +2032,83 @@ export class Agent {
|
|
|
2023
2032
|
aborted: false,
|
|
2024
2033
|
};
|
|
2025
2034
|
};
|
|
2026
|
-
// Execute tools
|
|
2027
|
-
|
|
2028
|
-
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
if (currentHash === lastToolCallHash) {
|
|
2041
|
-
consecutiveIdenticalCalls++;
|
|
2042
|
-
if (consecutiveIdenticalCalls >= this.maxConsecutiveToolCalls) {
|
|
2043
|
-
throw new ToolLoopError(toolUse.name, consecutiveIdenticalCalls, toolUse.input);
|
|
2044
|
-
}
|
|
2045
|
-
emit({
|
|
2046
|
-
type: 'tool_loop_warning',
|
|
2047
|
-
toolName: toolUse.name,
|
|
2048
|
-
consecutiveCalls: consecutiveIdenticalCalls,
|
|
2049
|
-
});
|
|
2035
|
+
// Execute tools — group-based scheduler
|
|
2036
|
+
// Parallel-safe groups with >1 tool run concurrently; everything else runs sequentially.
|
|
2037
|
+
for (const group of groups) {
|
|
2038
|
+
if (aborted)
|
|
2039
|
+
break;
|
|
2040
|
+
if (group.parallel && group.tools.length > 1) {
|
|
2041
|
+
// Parallel execution for this group
|
|
2042
|
+
const results = await Promise.all(group.tools.map((tu) => executeSingleTool(tu, true)));
|
|
2043
|
+
for (let i = 0; i < group.tools.length; i++) {
|
|
2044
|
+
const toolUse = group.tools[i];
|
|
2045
|
+
const { result, toolResultMsg, aborted: wasAborted } = results[i];
|
|
2046
|
+
if (wasAborted) {
|
|
2047
|
+
aborted = true;
|
|
2048
|
+
break;
|
|
2050
2049
|
}
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2050
|
+
// Tool loop detection (still applies per-tool)
|
|
2051
|
+
if (this.maxConsecutiveToolCalls > 0) {
|
|
2052
|
+
const currentHash = hashToolCall(toolUse.name, toolUse.input);
|
|
2053
|
+
if (currentHash === lastToolCallHash) {
|
|
2054
|
+
consecutiveIdenticalCalls++;
|
|
2055
|
+
if (consecutiveIdenticalCalls >= this.maxConsecutiveToolCalls) {
|
|
2056
|
+
throw new ToolLoopError(toolUse.name, consecutiveIdenticalCalls, toolUse.input);
|
|
2057
|
+
}
|
|
2058
|
+
emit({
|
|
2059
|
+
type: 'tool_loop_warning',
|
|
2060
|
+
toolName: toolUse.name,
|
|
2061
|
+
consecutiveCalls: consecutiveIdenticalCalls,
|
|
2062
|
+
});
|
|
2063
|
+
}
|
|
2064
|
+
else {
|
|
2065
|
+
lastToolCallHash = currentHash;
|
|
2066
|
+
consecutiveIdenticalCalls = 1;
|
|
2067
|
+
}
|
|
2054
2068
|
}
|
|
2069
|
+
const toolCallEntry = { name: toolUse.name, input: toolUse.input, result };
|
|
2070
|
+
toolCalls.push(toolCallEntry);
|
|
2071
|
+
iterationToolCalls.push(toolCallEntry);
|
|
2072
|
+
messages.push(toolResultMsg);
|
|
2073
|
+
newMessages.push(toolResultMsg);
|
|
2055
2074
|
}
|
|
2056
|
-
const toolCallEntry = { name: toolUse.name, input: toolUse.input, result };
|
|
2057
|
-
toolCalls.push(toolCallEntry);
|
|
2058
|
-
iterationToolCalls.push(toolCallEntry);
|
|
2059
|
-
messages.push(toolResultMsg);
|
|
2060
|
-
newMessages.push(toolResultMsg);
|
|
2061
2075
|
}
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2076
|
+
else {
|
|
2077
|
+
// Sequential execution for this group
|
|
2078
|
+
for (const toolUse of group.tools) {
|
|
2079
|
+
const { result, toolResultMsg, skipped, aborted: wasAborted, } = await executeSingleTool(toolUse);
|
|
2080
|
+
if (wasAborted) {
|
|
2081
|
+
aborted = true;
|
|
2082
|
+
break;
|
|
2083
|
+
}
|
|
2084
|
+
// Tool loop detection
|
|
2085
|
+
if (this.maxConsecutiveToolCalls > 0) {
|
|
2086
|
+
const currentHash = hashToolCall(toolUse.name, toolUse.input);
|
|
2087
|
+
if (currentHash === lastToolCallHash) {
|
|
2088
|
+
consecutiveIdenticalCalls++;
|
|
2089
|
+
if (consecutiveIdenticalCalls >= this.maxConsecutiveToolCalls) {
|
|
2090
|
+
throw new ToolLoopError(toolUse.name, consecutiveIdenticalCalls, toolUse.input);
|
|
2091
|
+
}
|
|
2092
|
+
emit({
|
|
2093
|
+
type: 'tool_loop_warning',
|
|
2094
|
+
toolName: toolUse.name,
|
|
2095
|
+
consecutiveCalls: consecutiveIdenticalCalls,
|
|
2096
|
+
});
|
|
2097
|
+
}
|
|
2098
|
+
else {
|
|
2099
|
+
lastToolCallHash = currentHash;
|
|
2100
|
+
consecutiveIdenticalCalls = 1;
|
|
2078
2101
|
}
|
|
2079
|
-
emit({
|
|
2080
|
-
type: 'tool_loop_warning',
|
|
2081
|
-
toolName: toolUse.name,
|
|
2082
|
-
consecutiveCalls: consecutiveIdenticalCalls,
|
|
2083
|
-
});
|
|
2084
2102
|
}
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2103
|
+
const toolCallEntry = { name: toolUse.name, input: toolUse.input, result };
|
|
2104
|
+
toolCalls.push(toolCallEntry);
|
|
2105
|
+
iterationToolCalls.push(toolCallEntry);
|
|
2106
|
+
messages.push(toolResultMsg);
|
|
2107
|
+
newMessages.push(toolResultMsg);
|
|
2108
|
+
if (skipped) {
|
|
2109
|
+
continue;
|
|
2088
2110
|
}
|
|
2089
2111
|
}
|
|
2090
|
-
const toolCallEntry = { name: toolUse.name, input: toolUse.input, result };
|
|
2091
|
-
toolCalls.push(toolCallEntry);
|
|
2092
|
-
iterationToolCalls.push(toolCallEntry);
|
|
2093
|
-
messages.push(toolResultMsg);
|
|
2094
|
-
newMessages.push(toolResultMsg);
|
|
2095
|
-
if (skipped) {
|
|
2096
|
-
continue;
|
|
2097
|
-
}
|
|
2098
2112
|
}
|
|
2099
2113
|
}
|
|
2100
2114
|
if (aborted) {
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Effort Estimation
|
|
3
|
+
*
|
|
4
|
+
* Pure function to estimate effort level from raw signals.
|
|
5
|
+
* Score-based: each signal contributes points, thresholds map to effort levels.
|
|
6
|
+
*/
|
|
7
|
+
import type { Effort, EffortSignals, EffortWeights } from './types.js';
|
|
8
|
+
/**
|
|
9
|
+
* Default weights for effort estimation.
|
|
10
|
+
*/
|
|
11
|
+
export declare const DEFAULT_WEIGHTS: EffortWeights;
|
|
12
|
+
/**
|
|
13
|
+
* Ordered effort levels for ordinal comparison.
|
|
14
|
+
* Index 0 = lowest, index 4 = highest.
|
|
15
|
+
*/
|
|
16
|
+
export declare const EFFORT_ORDER: readonly Effort[];
|
|
17
|
+
/**
|
|
18
|
+
* Estimate effort level from raw signals.
|
|
19
|
+
*
|
|
20
|
+
* Score formula:
|
|
21
|
+
* fileCount * fileCountMultiplier
|
|
22
|
+
* + min(linesChanged / linesPerPoint, 10)
|
|
23
|
+
* + toolCallCount * toolCallWeight
|
|
24
|
+
* + (durationMs / 60000) / minutesPerPoint
|
|
25
|
+
* + complexity bonuses: newFiles(+5), multiLang(+3), tests(+5), config(+2)
|
|
26
|
+
*
|
|
27
|
+
* Thresholds:
|
|
28
|
+
* < 5 = trivial, < 15 = low, < 40 = medium, < 100 = high, else = significant
|
|
29
|
+
*/
|
|
30
|
+
export declare function estimateEffort(signals: EffortSignals, weights?: Partial<EffortWeights>): Effort;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Effort Estimation
|
|
3
|
+
*
|
|
4
|
+
* Pure function to estimate effort level from raw signals.
|
|
5
|
+
* Score-based: each signal contributes points, thresholds map to effort levels.
|
|
6
|
+
*/
|
|
7
|
+
// =============================================================================
|
|
8
|
+
// Constants
|
|
9
|
+
// =============================================================================
|
|
10
|
+
/**
|
|
11
|
+
* Default weights for effort estimation.
|
|
12
|
+
*/
|
|
13
|
+
export const DEFAULT_WEIGHTS = {
|
|
14
|
+
fileCountMultiplier: 2,
|
|
15
|
+
linesPerPoint: 50,
|
|
16
|
+
minutesPerPoint: 1,
|
|
17
|
+
toolCallWeight: 1,
|
|
18
|
+
};
|
|
19
|
+
/**
|
|
20
|
+
* Ordered effort levels for ordinal comparison.
|
|
21
|
+
* Index 0 = lowest, index 4 = highest.
|
|
22
|
+
*/
|
|
23
|
+
export const EFFORT_ORDER = [
|
|
24
|
+
'trivial',
|
|
25
|
+
'low',
|
|
26
|
+
'medium',
|
|
27
|
+
'high',
|
|
28
|
+
'significant',
|
|
29
|
+
];
|
|
30
|
+
// Lines-changed contribution is capped at this many points
|
|
31
|
+
const LINES_CAP = 10;
|
|
32
|
+
// Complexity bonuses
|
|
33
|
+
const BONUS_NEW_FILES = 5;
|
|
34
|
+
const BONUS_MULTI_LANG = 3;
|
|
35
|
+
const BONUS_TESTS = 5;
|
|
36
|
+
const BONUS_CONFIG = 2;
|
|
37
|
+
// Score thresholds (exclusive upper bounds)
|
|
38
|
+
const THRESHOLD_TRIVIAL = 5;
|
|
39
|
+
const THRESHOLD_LOW = 15;
|
|
40
|
+
const THRESHOLD_MEDIUM = 40;
|
|
41
|
+
const THRESHOLD_HIGH = 100;
|
|
42
|
+
// =============================================================================
|
|
43
|
+
// Public API
|
|
44
|
+
// =============================================================================
|
|
45
|
+
/**
|
|
46
|
+
* Estimate effort level from raw signals.
|
|
47
|
+
*
|
|
48
|
+
* Score formula:
|
|
49
|
+
* fileCount * fileCountMultiplier
|
|
50
|
+
* + min(linesChanged / linesPerPoint, 10)
|
|
51
|
+
* + toolCallCount * toolCallWeight
|
|
52
|
+
* + (durationMs / 60000) / minutesPerPoint
|
|
53
|
+
* + complexity bonuses: newFiles(+5), multiLang(+3), tests(+5), config(+2)
|
|
54
|
+
*
|
|
55
|
+
* Thresholds:
|
|
56
|
+
* < 5 = trivial, < 15 = low, < 40 = medium, < 100 = high, else = significant
|
|
57
|
+
*/
|
|
58
|
+
export function estimateEffort(signals, weights) {
|
|
59
|
+
const w = { ...DEFAULT_WEIGHTS, ...weights };
|
|
60
|
+
// Base score components
|
|
61
|
+
const fileScore = signals.fileCount * w.fileCountMultiplier;
|
|
62
|
+
const linesScore = Math.min(signals.linesChanged / w.linesPerPoint, LINES_CAP);
|
|
63
|
+
const toolScore = signals.toolCallCount * w.toolCallWeight;
|
|
64
|
+
const timeScore = signals.durationMs / 60_000 / w.minutesPerPoint;
|
|
65
|
+
// Complexity bonuses
|
|
66
|
+
let bonus = 0;
|
|
67
|
+
if (signals.complexityIndicators.newFiles)
|
|
68
|
+
bonus += BONUS_NEW_FILES;
|
|
69
|
+
if (signals.complexityIndicators.multiLanguage)
|
|
70
|
+
bonus += BONUS_MULTI_LANG;
|
|
71
|
+
if (signals.complexityIndicators.tests)
|
|
72
|
+
bonus += BONUS_TESTS;
|
|
73
|
+
if (signals.complexityIndicators.configChanges)
|
|
74
|
+
bonus += BONUS_CONFIG;
|
|
75
|
+
const score = fileScore + linesScore + toolScore + timeScore + bonus;
|
|
76
|
+
// Map score to effort level
|
|
77
|
+
if (score < THRESHOLD_TRIVIAL)
|
|
78
|
+
return 'trivial';
|
|
79
|
+
if (score < THRESHOLD_LOW)
|
|
80
|
+
return 'low';
|
|
81
|
+
if (score < THRESHOLD_MEDIUM)
|
|
82
|
+
return 'medium';
|
|
83
|
+
if (score < THRESHOLD_HIGH)
|
|
84
|
+
return 'high';
|
|
85
|
+
return 'significant';
|
|
86
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Episodes Module
|
|
3
|
+
*
|
|
4
|
+
* Work history tracking with effort estimation.
|
|
5
|
+
*/
|
|
6
|
+
export type { Effort, WorkEpisode, EffortSignals, EffortWeights, EffortSummary, ProjectWorkSummary, EpisodeStore, } from './types.js';
|
|
7
|
+
export { estimateEffort, DEFAULT_WEIGHTS, EFFORT_ORDER } from './effort.js';
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Episodic Memory Types
|
|
3
|
+
*
|
|
4
|
+
* Core types for tracking work history with effort estimation.
|
|
5
|
+
* These types enable agents to understand what work has been done,
|
|
6
|
+
* by whom, and how much effort was involved.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Effort level for a work episode.
|
|
10
|
+
* Ordered from least to most significant.
|
|
11
|
+
*/
|
|
12
|
+
export type Effort = 'trivial' | 'low' | 'medium' | 'high' | 'significant';
|
|
13
|
+
/**
|
|
14
|
+
* A single unit of tracked work.
|
|
15
|
+
* Represents something an agent did — e.g., editing files, running tests, committing.
|
|
16
|
+
*/
|
|
17
|
+
export interface WorkEpisode {
|
|
18
|
+
/** Unique episode ID (UUID) */
|
|
19
|
+
id: string;
|
|
20
|
+
/** Agent ID that performed this work (e.g., 'default', 'backend', 'tester') */
|
|
21
|
+
agentId: string;
|
|
22
|
+
/** Terminal session prefix (first 8 chars of session ID) */
|
|
23
|
+
terminalPrefix: string;
|
|
24
|
+
/** High-level action label (e.g., 'edit', 'test', 'commit', 'refactor') */
|
|
25
|
+
action: string;
|
|
26
|
+
/** Human-readable summary of what was done */
|
|
27
|
+
summary: string;
|
|
28
|
+
/** Files affected by this episode */
|
|
29
|
+
files: string[];
|
|
30
|
+
/** Total lines changed (added + removed), if known */
|
|
31
|
+
linesChanged?: number;
|
|
32
|
+
/** ISO timestamp when the episode was recorded */
|
|
33
|
+
timestamp: string;
|
|
34
|
+
/** Session ID for grouping episodes within a session */
|
|
35
|
+
sessionId: string;
|
|
36
|
+
/** Estimated effort level */
|
|
37
|
+
effort: Effort;
|
|
38
|
+
/** Duration in milliseconds, if tracked */
|
|
39
|
+
durationMs?: number;
|
|
40
|
+
/** Number of tool calls in this episode */
|
|
41
|
+
toolCalls?: number;
|
|
42
|
+
/** Related work item ID (from workitem system) */
|
|
43
|
+
workItemId?: string;
|
|
44
|
+
/** Related git commit hashes */
|
|
45
|
+
relatedCommits?: string[];
|
|
46
|
+
/** Parent episode ID (for sub-tasks) */
|
|
47
|
+
parentEpisode?: string;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Raw signals used to estimate effort.
|
|
51
|
+
* These are collected from tool calls and timing data.
|
|
52
|
+
*/
|
|
53
|
+
export interface EffortSignals {
|
|
54
|
+
/** Number of unique files touched */
|
|
55
|
+
fileCount: number;
|
|
56
|
+
/** Total lines changed (added + removed) */
|
|
57
|
+
linesChanged: number;
|
|
58
|
+
/** Total number of tool calls */
|
|
59
|
+
toolCallCount: number;
|
|
60
|
+
/** Duration in milliseconds */
|
|
61
|
+
durationMs: number;
|
|
62
|
+
/** Number of edit/write iterations on same files */
|
|
63
|
+
iterationCount: number;
|
|
64
|
+
/** Complexity indicators detected */
|
|
65
|
+
complexityIndicators: {
|
|
66
|
+
/** New files were created (not just edited) */
|
|
67
|
+
newFiles?: boolean;
|
|
68
|
+
/** Multiple languages involved */
|
|
69
|
+
multiLanguage?: boolean;
|
|
70
|
+
/** Test files were created or modified */
|
|
71
|
+
tests?: boolean;
|
|
72
|
+
/** Config files were modified */
|
|
73
|
+
configChanges?: boolean;
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Tunable weights for effort estimation.
|
|
78
|
+
* All weights are multipliers or divisors applied to raw signals.
|
|
79
|
+
*/
|
|
80
|
+
export interface EffortWeights {
|
|
81
|
+
/** Points per file (default: 2) */
|
|
82
|
+
fileCountMultiplier: number;
|
|
83
|
+
/** Lines per point (default: 50) — higher means lines matter less */
|
|
84
|
+
linesPerPoint: number;
|
|
85
|
+
/** Minutes per point (default: 1) */
|
|
86
|
+
minutesPerPoint: number;
|
|
87
|
+
/** Points per tool call (default: 1) */
|
|
88
|
+
toolCallWeight: number;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Summary of effort across multiple episodes.
|
|
92
|
+
*/
|
|
93
|
+
export interface EffortSummary {
|
|
94
|
+
/** Number of episodes included */
|
|
95
|
+
episodeCount: number;
|
|
96
|
+
/** Maximum effort level across episodes */
|
|
97
|
+
totalEffort: Effort;
|
|
98
|
+
/** Total time spent in milliseconds */
|
|
99
|
+
timeSpentMs: number;
|
|
100
|
+
/** Unique agent IDs involved */
|
|
101
|
+
agents: string[];
|
|
102
|
+
/** Human-readable description */
|
|
103
|
+
description: string;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Project-level work summary with breakdown.
|
|
107
|
+
*/
|
|
108
|
+
export interface ProjectWorkSummary {
|
|
109
|
+
/** Total number of episodes */
|
|
110
|
+
episodeCount: number;
|
|
111
|
+
/** Maximum effort level */
|
|
112
|
+
totalEffort: Effort;
|
|
113
|
+
/** Total time spent in milliseconds */
|
|
114
|
+
timeSpentMs: number;
|
|
115
|
+
/** Effort breakdown by agent */
|
|
116
|
+
agentBreakdown: Array<{
|
|
117
|
+
agentId: string;
|
|
118
|
+
episodeCount: number;
|
|
119
|
+
maxEffort: Effort;
|
|
120
|
+
timeSpentMs: number;
|
|
121
|
+
}>;
|
|
122
|
+
/** Most frequently touched files */
|
|
123
|
+
topFiles: Array<{
|
|
124
|
+
path: string;
|
|
125
|
+
touchCount: number;
|
|
126
|
+
}>;
|
|
127
|
+
/** Episodes since the last git commit */
|
|
128
|
+
uncommittedWork: WorkEpisode[];
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Persistence interface for work episodes.
|
|
132
|
+
* Write methods may be async (for file I/O), read methods are synchronous
|
|
133
|
+
* (read from in-memory cache).
|
|
134
|
+
*/
|
|
135
|
+
export interface EpisodeStore {
|
|
136
|
+
/** Save a single episode */
|
|
137
|
+
save(episode: WorkEpisode): void | Promise<void>;
|
|
138
|
+
/** Save multiple episodes at once */
|
|
139
|
+
saveBatch(episodes: WorkEpisode[]): void | Promise<void>;
|
|
140
|
+
/** Get all episodes */
|
|
141
|
+
getAll(): WorkEpisode[];
|
|
142
|
+
/** Get episodes for specific files */
|
|
143
|
+
getByFiles(files: string[]): WorkEpisode[];
|
|
144
|
+
/** Get episodes by agent ID */
|
|
145
|
+
getByAgent(agentId: string): WorkEpisode[];
|
|
146
|
+
/** Get episodes by session ID */
|
|
147
|
+
getBySession(sessionId: string): WorkEpisode[];
|
|
148
|
+
/** Get episodes within a time range (ISO timestamps) */
|
|
149
|
+
getByTimeRange(start: string, end: string): WorkEpisode[];
|
|
150
|
+
/** Get the N most recent episodes */
|
|
151
|
+
getRecent(count: number): WorkEpisode[];
|
|
152
|
+
/** Get project work summary */
|
|
153
|
+
getWorkSummary(): ProjectWorkSummary;
|
|
154
|
+
/** Get the maximum effort level across all episodes (or a subset) */
|
|
155
|
+
getTotalEffort(episodes?: WorkEpisode[]): Effort;
|
|
156
|
+
/** Remove episodes older than maxAge milliseconds. Returns count removed. */
|
|
157
|
+
cleanup(maxAgeMs: number): number | Promise<number>;
|
|
158
|
+
}
|
|
@@ -2,5 +2,7 @@
|
|
|
2
2
|
* Guardrails module - Pattern-based safety checks for tool execution
|
|
3
3
|
*/
|
|
4
4
|
export { GuardrailManager } from './manager.js';
|
|
5
|
+
export { parseShellCommand } from './shell-parser.js';
|
|
6
|
+
export type { ShellToken } from './shell-parser.js';
|
|
5
7
|
export { getBuiltinGuardrails, isBuiltinGuardrail, getBuiltinGuardrailIds, getGuardrailsByTag, BUILTIN_GUARDRAILS, } from './builtin.js';
|
|
6
8
|
export type { Guardrail, GuardrailInput, GuardrailAction, GuardrailResult, GuardrailContext, GuardrailManagerOptions, GuardrailTriggeredHandler, GuardrailEventType, GuardrailEvent, GuardrailEventHandler, } from './types.js';
|
package/dist/guardrails/index.js
CHANGED
|
@@ -2,4 +2,5 @@
|
|
|
2
2
|
* Guardrails module - Pattern-based safety checks for tool execution
|
|
3
3
|
*/
|
|
4
4
|
export { GuardrailManager } from './manager.js';
|
|
5
|
+
export { parseShellCommand } from './shell-parser.js';
|
|
5
6
|
export { getBuiltinGuardrails, isBuiltinGuardrail, getBuiltinGuardrailIds, getGuardrailsByTag, BUILTIN_GUARDRAILS, } from './builtin.js';
|
|
@@ -91,11 +91,32 @@ export declare class GuardrailManager {
|
|
|
91
91
|
/**
|
|
92
92
|
* Check tool input against all applicable guardrails
|
|
93
93
|
*
|
|
94
|
+
* For inputs with a `command` field (e.g. bash tool), automatically
|
|
95
|
+
* parses compound commands and checks each subcommand independently.
|
|
96
|
+
*
|
|
94
97
|
* @param toolName - Name of the tool being called
|
|
95
98
|
* @param input - Tool input to check
|
|
96
99
|
* @returns GuardrailResult indicating if any guardrail was triggered
|
|
97
100
|
*/
|
|
98
101
|
check(toolName: string, input: unknown): GuardrailResult;
|
|
102
|
+
/**
|
|
103
|
+
* Check a compound shell command against guardrails.
|
|
104
|
+
*
|
|
105
|
+
* Parses the command into subcommands (splitting on |, &&, ||, ;)
|
|
106
|
+
* and validates each independently. Also checks the full command string
|
|
107
|
+
* to catch cross-subcommand patterns (e.g. `curl ... | bash`).
|
|
108
|
+
* Returns the highest-severity match with subcommand context.
|
|
109
|
+
*
|
|
110
|
+
* @param toolName - Name of the tool being called
|
|
111
|
+
* @param command - The shell command string
|
|
112
|
+
* @param originalInput - The original tool input (for result metadata)
|
|
113
|
+
* @returns GuardrailResult indicating if any guardrail was triggered
|
|
114
|
+
*/
|
|
115
|
+
checkCommand(toolName: string, command: string, originalInput?: unknown): GuardrailResult;
|
|
116
|
+
/**
|
|
117
|
+
* Check an input string against all applicable guardrail patterns
|
|
118
|
+
*/
|
|
119
|
+
private checkPatterns;
|
|
99
120
|
/**
|
|
100
121
|
* Check and handle guardrail triggering
|
|
101
122
|
*
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* GuardrailManager - Pattern-based safety checks for tool execution
|
|
3
3
|
*/
|
|
4
4
|
import { getBuiltinGuardrails } from './builtin.js';
|
|
5
|
+
import { parseShellCommand } from './shell-parser.js';
|
|
5
6
|
/**
|
|
6
7
|
* Default options for GuardrailManager
|
|
7
8
|
*/
|
|
@@ -159,6 +160,9 @@ export class GuardrailManager {
|
|
|
159
160
|
/**
|
|
160
161
|
* Check tool input against all applicable guardrails
|
|
161
162
|
*
|
|
163
|
+
* For inputs with a `command` field (e.g. bash tool), automatically
|
|
164
|
+
* parses compound commands and checks each subcommand independently.
|
|
165
|
+
*
|
|
162
166
|
* @param toolName - Name of the tool being called
|
|
163
167
|
* @param input - Tool input to check
|
|
164
168
|
* @returns GuardrailResult indicating if any guardrail was triggered
|
|
@@ -168,8 +172,72 @@ export class GuardrailManager {
|
|
|
168
172
|
if (!this.options.enabled) {
|
|
169
173
|
return { triggered: false };
|
|
170
174
|
}
|
|
171
|
-
//
|
|
175
|
+
// Auto-detect compound shell commands (bash tool sends { command: "..." })
|
|
176
|
+
if (typeof input === 'object' && input !== null && 'command' in input) {
|
|
177
|
+
const cmd = input.command;
|
|
178
|
+
if (typeof cmd === 'string') {
|
|
179
|
+
return this.checkCommand(toolName, cmd, input);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
// Original: pattern match on stringified input
|
|
172
183
|
const inputString = this.stringifyInput(input);
|
|
184
|
+
return this.checkPatterns(toolName, inputString, input);
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Check a compound shell command against guardrails.
|
|
188
|
+
*
|
|
189
|
+
* Parses the command into subcommands (splitting on |, &&, ||, ;)
|
|
190
|
+
* and validates each independently. Also checks the full command string
|
|
191
|
+
* to catch cross-subcommand patterns (e.g. `curl ... | bash`).
|
|
192
|
+
* Returns the highest-severity match with subcommand context.
|
|
193
|
+
*
|
|
194
|
+
* @param toolName - Name of the tool being called
|
|
195
|
+
* @param command - The shell command string
|
|
196
|
+
* @param originalInput - The original tool input (for result metadata)
|
|
197
|
+
* @returns GuardrailResult indicating if any guardrail was triggered
|
|
198
|
+
*/
|
|
199
|
+
checkCommand(toolName, command, originalInput) {
|
|
200
|
+
const tokens = parseShellCommand(command);
|
|
201
|
+
if (tokens.length <= 1) {
|
|
202
|
+
// Single command — delegate to pattern matching directly
|
|
203
|
+
return this.checkPatterns(toolName, command, originalInput);
|
|
204
|
+
}
|
|
205
|
+
const applicableGuardrails = this.getForTool(toolName);
|
|
206
|
+
const severityOrder = { block: 3, confirm: 2, warn: 1 };
|
|
207
|
+
// Start with full-string check (catches cross-subcommand patterns like curl|bash)
|
|
208
|
+
let worst = this.checkPatterns(toolName, command, originalInput);
|
|
209
|
+
// Check each subcommand against all guardrails, keep highest severity
|
|
210
|
+
for (const token of tokens) {
|
|
211
|
+
for (const guardrail of applicableGuardrails) {
|
|
212
|
+
for (const pattern of guardrail.patterns) {
|
|
213
|
+
const match = token.command.match(pattern);
|
|
214
|
+
if (match) {
|
|
215
|
+
const result = {
|
|
216
|
+
triggered: true,
|
|
217
|
+
guardrail,
|
|
218
|
+
match: match[0],
|
|
219
|
+
action: guardrail.action,
|
|
220
|
+
toolName,
|
|
221
|
+
input: originalInput,
|
|
222
|
+
subcommand: token.command,
|
|
223
|
+
subcommandIndex: token.index,
|
|
224
|
+
};
|
|
225
|
+
const resultSeverity = severityOrder[result.action ?? 'warn'] ?? 0;
|
|
226
|
+
const worstSeverity = severityOrder[worst.action ?? ''] ?? 0;
|
|
227
|
+
if (resultSeverity >= worstSeverity) {
|
|
228
|
+
worst = result;
|
|
229
|
+
}
|
|
230
|
+
break; // Found match for this guardrail, move to next
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
return worst;
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Check an input string against all applicable guardrail patterns
|
|
239
|
+
*/
|
|
240
|
+
checkPatterns(toolName, inputString, input) {
|
|
173
241
|
// Get guardrails that apply to this tool
|
|
174
242
|
const applicableGuardrails = this.getForTool(toolName);
|
|
175
243
|
// Check each guardrail
|