@helmiq/crew 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/defaults/personas/architect.persona.yaml +72 -0
- package/defaults/personas/engineer.persona.yaml +137 -0
- package/defaults/personas/persona-spec.schema.yaml +149 -0
- package/defaults/personas/reviewer.persona.yaml +47 -0
- package/defaults/rubrics/adr.rubric.yaml +48 -0
- package/defaults/rubrics/code-review.rubric.yaml +39 -0
- package/defaults/rubrics/pull-request.rubric.yaml +40 -0
- package/dist/actions/actions.test.d.ts +2 -0
- package/dist/actions/actions.test.d.ts.map +1 -0
- package/dist/actions/actions.test.js +158 -0
- package/dist/actions/direct-dispatcher.d.ts +10 -0
- package/dist/actions/direct-dispatcher.d.ts.map +1 -0
- package/dist/actions/direct-dispatcher.js +27 -0
- package/dist/actions/dispatcher.d.ts +11 -0
- package/dist/actions/dispatcher.d.ts.map +1 -0
- package/dist/actions/dispatcher.js +1 -0
- package/dist/actions/index.d.ts +7 -0
- package/dist/actions/index.d.ts.map +1 -0
- package/dist/actions/index.js +3 -0
- package/dist/actions/registry.d.ts +13 -0
- package/dist/actions/registry.d.ts.map +1 -0
- package/dist/actions/registry.js +40 -0
- package/dist/actions/resolver.d.ts +47 -0
- package/dist/actions/resolver.d.ts.map +1 -0
- package/dist/actions/resolver.js +43 -0
- package/dist/cli/cli.test.d.ts +2 -0
- package/dist/cli/cli.test.d.ts.map +1 -0
- package/dist/cli/cli.test.js +392 -0
- package/dist/cli/run.d.ts +45 -0
- package/dist/cli/run.d.ts.map +1 -0
- package/dist/cli/run.js +236 -0
- package/dist/common/errors.d.ts +76 -0
- package/dist/common/errors.d.ts.map +1 -0
- package/dist/common/errors.js +74 -0
- package/dist/config/config.test.d.ts +2 -0
- package/dist/config/config.test.d.ts.map +1 -0
- package/dist/config/config.test.js +691 -0
- package/dist/config/index.d.ts +7 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +4 -0
- package/dist/config/loader.d.ts +16 -0
- package/dist/config/loader.d.ts.map +1 -0
- package/dist/config/loader.js +56 -0
- package/dist/config/model-resolver.d.ts +24 -0
- package/dist/config/model-resolver.d.ts.map +1 -0
- package/dist/config/model-resolver.js +39 -0
- package/dist/config/resolver.d.ts +22 -0
- package/dist/config/resolver.d.ts.map +1 -0
- package/dist/config/resolver.js +115 -0
- package/dist/config/schemas.d.ts +266 -0
- package/dist/config/schemas.d.ts.map +1 -0
- package/dist/config/schemas.js +115 -0
- package/dist/context/artifact-reader.d.ts +12 -0
- package/dist/context/artifact-reader.d.ts.map +1 -0
- package/dist/context/artifact-reader.js +92 -0
- package/dist/context/assembler.d.ts +22 -0
- package/dist/context/assembler.d.ts.map +1 -0
- package/dist/context/assembler.js +126 -0
- package/dist/context/code-reader.d.ts +14 -0
- package/dist/context/code-reader.d.ts.map +1 -0
- package/dist/context/code-reader.js +56 -0
- package/dist/context/context.test.d.ts +2 -0
- package/dist/context/context.test.d.ts.map +1 -0
- package/dist/context/context.test.js +260 -0
- package/dist/context/index.d.ts +9 -0
- package/dist/context/index.d.ts.map +1 -0
- package/dist/context/index.js +5 -0
- package/dist/context/section-extractor.d.ts +9 -0
- package/dist/context/section-extractor.d.ts.map +1 -0
- package/dist/context/section-extractor.js +32 -0
- package/dist/context/token-budget.d.ts +11 -0
- package/dist/context/token-budget.d.ts.map +1 -0
- package/dist/context/token-budget.js +22 -0
- package/dist/control/control.test.d.ts +2 -0
- package/dist/control/control.test.d.ts.map +1 -0
- package/dist/control/control.test.js +137 -0
- package/dist/control/id-generator.d.ts +12 -0
- package/dist/control/id-generator.d.ts.map +1 -0
- package/dist/control/id-generator.js +20 -0
- package/dist/control/index.d.ts +5 -0
- package/dist/control/index.d.ts.map +1 -0
- package/dist/control/index.js +3 -0
- package/dist/control/lock-manager.d.ts +13 -0
- package/dist/control/lock-manager.d.ts.map +1 -0
- package/dist/control/lock-manager.js +72 -0
- package/dist/control/run-state.d.ts +16 -0
- package/dist/control/run-state.d.ts.map +1 -0
- package/dist/control/run-state.js +55 -0
- package/dist/engine/composite.d.ts +34 -0
- package/dist/engine/composite.d.ts.map +1 -0
- package/dist/engine/composite.js +192 -0
- package/dist/engine/composite.test.d.ts +2 -0
- package/dist/engine/composite.test.d.ts.map +1 -0
- package/dist/engine/composite.test.js +1947 -0
- package/dist/engine/engine.test.d.ts +2 -0
- package/dist/engine/engine.test.d.ts.map +1 -0
- package/dist/engine/engine.test.js +334 -0
- package/dist/engine/index.d.ts +10 -0
- package/dist/engine/index.d.ts.map +1 -0
- package/dist/engine/index.js +5 -0
- package/dist/engine/llm-client.d.ts +27 -0
- package/dist/engine/llm-client.d.ts.map +1 -0
- package/dist/engine/llm-client.js +46 -0
- package/dist/engine/simple.d.ts +21 -0
- package/dist/engine/simple.d.ts.map +1 -0
- package/dist/engine/simple.js +59 -0
- package/dist/engine/tool-dispatch.d.ts +37 -0
- package/dist/engine/tool-dispatch.d.ts.map +1 -0
- package/dist/engine/tool-dispatch.js +146 -0
- package/dist/engine/tool-dispatch.test.d.ts +2 -0
- package/dist/engine/tool-dispatch.test.d.ts.map +1 -0
- package/dist/engine/tool-dispatch.test.js +348 -0
- package/dist/engine/tool-filter.d.ts +13 -0
- package/dist/engine/tool-filter.d.ts.map +1 -0
- package/dist/engine/tool-filter.js +25 -0
- package/dist/evaluation/evaluation.test.d.ts +2 -0
- package/dist/evaluation/evaluation.test.d.ts.map +1 -0
- package/dist/evaluation/evaluation.test.js +490 -0
- package/dist/evaluation/evaluator.d.ts +19 -0
- package/dist/evaluation/evaluator.d.ts.map +1 -0
- package/dist/evaluation/evaluator.js +78 -0
- package/dist/evaluation/index.d.ts +4 -0
- package/dist/evaluation/index.d.ts.map +1 -0
- package/dist/evaluation/index.js +2 -0
- package/dist/evaluation/scorer.d.ts +38 -0
- package/dist/evaluation/scorer.d.ts.map +1 -0
- package/dist/evaluation/scorer.js +94 -0
- package/dist/index.d.ts +47 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +28 -0
- package/dist/providers/index.d.ts +2 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +1 -0
- package/dist/providers/provider-factory.d.ts +11 -0
- package/dist/providers/provider-factory.d.ts.map +1 -0
- package/dist/providers/provider-factory.js +30 -0
- package/dist/publication/frontmatter.d.ts +21 -0
- package/dist/publication/frontmatter.d.ts.map +1 -0
- package/dist/publication/frontmatter.js +15 -0
- package/dist/publication/git-ops.d.ts +18 -0
- package/dist/publication/git-ops.d.ts.map +1 -0
- package/dist/publication/git-ops.js +74 -0
- package/dist/publication/index.d.ts +9 -0
- package/dist/publication/index.d.ts.map +1 -0
- package/dist/publication/index.js +5 -0
- package/dist/publication/provenance-writer.d.ts +27 -0
- package/dist/publication/provenance-writer.d.ts.map +1 -0
- package/dist/publication/provenance-writer.js +21 -0
- package/dist/publication/publication.test.d.ts +2 -0
- package/dist/publication/publication.test.d.ts.map +1 -0
- package/dist/publication/publication.test.js +235 -0
- package/dist/publication/publisher.d.ts +32 -0
- package/dist/publication/publisher.d.ts.map +1 -0
- package/dist/publication/publisher.js +113 -0
- package/dist/publication/secret-scanner.d.ts +6 -0
- package/dist/publication/secret-scanner.d.ts.map +1 -0
- package/dist/publication/secret-scanner.js +19 -0
- package/dist/tools/index.d.ts +4 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +2 -0
- package/dist/tools/registry.d.ts +15 -0
- package/dist/tools/registry.d.ts.map +1 -0
- package/dist/tools/registry.js +288 -0
- package/dist/tools/registry.test.d.ts +2 -0
- package/dist/tools/registry.test.d.ts.map +1 -0
- package/dist/tools/registry.test.js +131 -0
- package/dist/tools/tool-groups.d.ts +20 -0
- package/dist/tools/tool-groups.d.ts.map +1 -0
- package/dist/tools/tool-groups.js +48 -0
- package/dist/tools/tool-groups.test.d.ts +2 -0
- package/dist/tools/tool-groups.test.d.ts.map +1 -0
- package/dist/tools/tool-groups.test.js +127 -0
- package/dist/types/artifact-store.d.ts +33 -0
- package/dist/types/artifact-store.d.ts.map +1 -0
- package/dist/types/artifact-store.js +9 -0
- package/dist/types/evaluation-rubric.d.ts +18 -0
- package/dist/types/evaluation-rubric.d.ts.map +1 -0
- package/dist/types/evaluation-rubric.js +1 -0
- package/dist/types/index.d.ts +10 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/llm-provider.d.ts +47 -0
- package/dist/types/llm-provider.d.ts.map +1 -0
- package/dist/types/llm-provider.js +8 -0
- package/dist/types/persona-spec.d.ts +79 -0
- package/dist/types/persona-spec.d.ts.map +1 -0
- package/dist/types/persona-spec.js +1 -0
- package/dist/types/project-config.d.ts +28 -0
- package/dist/types/project-config.d.ts.map +1 -0
- package/dist/types/project-config.js +1 -0
- package/dist/types/provenance.d.ts +67 -0
- package/dist/types/provenance.d.ts.map +1 -0
- package/dist/types/provenance.js +1 -0
- package/dist/types/run-state.d.ts +11 -0
- package/dist/types/run-state.d.ts.map +1 -0
- package/dist/types/run-state.js +1 -0
- package/dist/types/tool-runtime.d.ts +43 -0
- package/dist/types/tool-runtime.d.ts.map +1 -0
- package/dist/types/tool-runtime.js +30 -0
- package/dist/workspace/detect.d.ts +11 -0
- package/dist/workspace/detect.d.ts.map +1 -0
- package/dist/workspace/detect.js +28 -0
- package/dist/workspace/detect.test.d.ts +2 -0
- package/dist/workspace/detect.test.d.ts.map +1 -0
- package/dist/workspace/detect.test.js +53 -0
- package/dist/workspace/index.d.ts +2 -0
- package/dist/workspace/index.d.ts.map +1 -0
- package/dist/workspace/index.js +1 -0
- package/package.json +51 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Runtime-managed tool-call dispatch loop per ADR-0010.
|
|
3
|
+
*
|
|
4
|
+
* Calls the provider with schema-only tools, intercepts ToolCallRecord
|
|
5
|
+
* results, dispatches each to the corresponding CrewTool execute function,
|
|
6
|
+
* appends tool-result messages, and repeats until the LLM produces a
|
|
7
|
+
* final text response or the max round limit is reached.
|
|
8
|
+
*/
|
|
9
|
+
import { callLlm } from './llm-client.js';
|
|
10
|
+
import { expandToolNames } from '../tools/tool-groups.js';
|
|
11
|
+
const DEFAULT_MAX_ROUNDS = 8;
|
|
12
|
+
export function toSchemaOnlyToolSet(tools) {
|
|
13
|
+
const schema = {};
|
|
14
|
+
for (const [name, tool] of Object.entries(tools)) {
|
|
15
|
+
schema[name] = {
|
|
16
|
+
description: tool.description,
|
|
17
|
+
parameters: tool.parameters,
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
return schema;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Filter available CrewTools by persona permissions and sub-agent restrictions.
|
|
24
|
+
*
|
|
25
|
+
* Supports tool group expansion: persona specs may use category names
|
|
26
|
+
* (e.g. "write-code", "git") which are expanded to individual tool names
|
|
27
|
+
* before matching against the registry.
|
|
28
|
+
*/
|
|
29
|
+
export function filterCrewTools(available, permitted, denied, subAgentTools) {
|
|
30
|
+
const expandedPermitted = expandToolNames(permitted);
|
|
31
|
+
const expandedDenied = expandToolNames(denied);
|
|
32
|
+
const expandedSubAgent = subAgentTools ? expandToolNames(subAgentTools) : undefined;
|
|
33
|
+
const filtered = {};
|
|
34
|
+
for (const [name, tool] of Object.entries(available)) {
|
|
35
|
+
if (expandedDenied.has(name))
|
|
36
|
+
continue;
|
|
37
|
+
if (!expandedPermitted.has(name))
|
|
38
|
+
continue;
|
|
39
|
+
if (expandedSubAgent && !expandedSubAgent.has(name))
|
|
40
|
+
continue;
|
|
41
|
+
filtered[name] = tool;
|
|
42
|
+
}
|
|
43
|
+
return filtered;
|
|
44
|
+
}
|
|
45
|
+
export async function executeWithToolDispatch(options) {
|
|
46
|
+
const { callOptions, tools, context, maxRounds = DEFAULT_MAX_ROUNDS } = options;
|
|
47
|
+
const schemaTools = toSchemaOnlyToolSet(tools);
|
|
48
|
+
const messages = [...callOptions.messages];
|
|
49
|
+
const allExecutions = [];
|
|
50
|
+
let totalTokensIn = 0;
|
|
51
|
+
let totalTokensOut = 0;
|
|
52
|
+
let totalDurationMs = 0;
|
|
53
|
+
let lastModel = '';
|
|
54
|
+
for (let round = 0; round < maxRounds; round++) {
|
|
55
|
+
const result = await callLlm({
|
|
56
|
+
...callOptions,
|
|
57
|
+
messages,
|
|
58
|
+
tools: schemaTools,
|
|
59
|
+
});
|
|
60
|
+
totalTokensIn += result.tokensIn;
|
|
61
|
+
totalTokensOut += result.tokensOut;
|
|
62
|
+
totalDurationMs += result.durationMs;
|
|
63
|
+
lastModel = result.model;
|
|
64
|
+
if (result.toolCalls.length === 0) {
|
|
65
|
+
return {
|
|
66
|
+
text: result.text,
|
|
67
|
+
toolExecutions: allExecutions,
|
|
68
|
+
totalTokensIn,
|
|
69
|
+
totalTokensOut,
|
|
70
|
+
totalDurationMs,
|
|
71
|
+
rounds: round + 1,
|
|
72
|
+
model: lastModel,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
if (result.text) {
|
|
76
|
+
messages.push({ role: 'assistant', content: result.text });
|
|
77
|
+
}
|
|
78
|
+
for (const tc of result.toolCalls) {
|
|
79
|
+
const execution = await dispatchToolCall(tc, tools, context);
|
|
80
|
+
allExecutions.push(execution);
|
|
81
|
+
messages.push({
|
|
82
|
+
role: 'tool',
|
|
83
|
+
toolCallId: tc.toolCallId,
|
|
84
|
+
toolName: tc.toolName,
|
|
85
|
+
content: execution.success
|
|
86
|
+
? execution.outputSummary
|
|
87
|
+
: `Error: ${execution.errorCode ?? 'TOOL_EXECUTION_FAILED'}: ${execution.outputSummary}`,
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return {
|
|
92
|
+
text: '',
|
|
93
|
+
toolExecutions: allExecutions,
|
|
94
|
+
totalTokensIn,
|
|
95
|
+
totalTokensOut,
|
|
96
|
+
totalDurationMs,
|
|
97
|
+
rounds: maxRounds,
|
|
98
|
+
model: lastModel,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
async function dispatchToolCall(tc, tools, context) {
|
|
102
|
+
const tool = tools[tc.toolName];
|
|
103
|
+
const startMs = Date.now();
|
|
104
|
+
if (!tool) {
|
|
105
|
+
return {
|
|
106
|
+
toolCallId: tc.toolCallId,
|
|
107
|
+
toolName: tc.toolName,
|
|
108
|
+
inputSummary: summarize(tc.input),
|
|
109
|
+
outputSummary: `Tool not found: ${tc.toolName}`,
|
|
110
|
+
durationMs: 0,
|
|
111
|
+
success: false,
|
|
112
|
+
errorCode: 'TOOL_NOT_FOUND',
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
try {
|
|
116
|
+
const result = await tool.execute(tc.input, context);
|
|
117
|
+
const durationMs = Date.now() - startMs;
|
|
118
|
+
return {
|
|
119
|
+
toolCallId: tc.toolCallId,
|
|
120
|
+
toolName: tc.toolName,
|
|
121
|
+
inputSummary: summarize(tc.input),
|
|
122
|
+
outputSummary: summarize(result),
|
|
123
|
+
durationMs,
|
|
124
|
+
success: true,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
catch (err) {
|
|
128
|
+
const durationMs = Date.now() - startMs;
|
|
129
|
+
const errorCode = err && typeof err === 'object' && 'code' in err
|
|
130
|
+
? String(err.code)
|
|
131
|
+
: 'TOOL_EXECUTION_FAILED';
|
|
132
|
+
return {
|
|
133
|
+
toolCallId: tc.toolCallId,
|
|
134
|
+
toolName: tc.toolName,
|
|
135
|
+
inputSummary: summarize(tc.input),
|
|
136
|
+
outputSummary: err instanceof Error ? err.message : String(err),
|
|
137
|
+
durationMs,
|
|
138
|
+
success: false,
|
|
139
|
+
errorCode,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
function summarize(value) {
|
|
144
|
+
const str = typeof value === 'string' ? value : JSON.stringify(value);
|
|
145
|
+
return str.length > 500 ? str.slice(0, 497) + '...' : str;
|
|
146
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tool-dispatch.test.d.ts","sourceRoot":"","sources":["../../src/engine/tool-dispatch.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
import { describe, it, expect, vi } from 'vitest';
|
|
2
|
+
import { toSchemaOnlyToolSet, filterCrewTools, executeWithToolDispatch } from './tool-dispatch.js';
|
|
3
|
+
function makeTool(name) {
|
|
4
|
+
return {
|
|
5
|
+
name,
|
|
6
|
+
description: `Test tool: ${name}`,
|
|
7
|
+
parameters: { type: 'object', properties: {} },
|
|
8
|
+
execute: vi.fn().mockResolvedValue({ ok: true }),
|
|
9
|
+
};
|
|
10
|
+
}
|
|
11
|
+
const mockContext = {
|
|
12
|
+
workspacePath: '/tmp/ws',
|
|
13
|
+
targetRepoPath: '/tmp/target',
|
|
14
|
+
project: {
|
|
15
|
+
project: { name: 'test', key: 'T' },
|
|
16
|
+
workspace: { path: '/tmp/ws', work: 'work/{EPIC_ID}', runs: 'runs' },
|
|
17
|
+
source: { repo: 'github:test/repo', path: '/tmp/target' },
|
|
18
|
+
llm: { default_model: 'test', providers: {} },
|
|
19
|
+
},
|
|
20
|
+
persona: 'engineer',
|
|
21
|
+
task: 'implement-story',
|
|
22
|
+
runId: 'run-test-001',
|
|
23
|
+
protectedPaths: ['product/**', 'AGENTS.md'],
|
|
24
|
+
logger: () => { },
|
|
25
|
+
};
|
|
26
|
+
describe('tool-dispatch', () => {
|
|
27
|
+
// T-02-006a: tool-name/runtime alignment
|
|
28
|
+
describe('toSchemaOnlyToolSet', () => {
|
|
29
|
+
it('strips execute and preserves schema fields', () => {
|
|
30
|
+
const tools = {
|
|
31
|
+
'read-file': makeTool('read-file'),
|
|
32
|
+
'write-file': makeTool('write-file'),
|
|
33
|
+
};
|
|
34
|
+
const schema = toSchemaOnlyToolSet(tools);
|
|
35
|
+
expect(Object.keys(schema)).toEqual(['read-file', 'write-file']);
|
|
36
|
+
expect(schema['read-file']).toHaveProperty('description');
|
|
37
|
+
expect(schema['read-file']).toHaveProperty('parameters');
|
|
38
|
+
expect(schema['read-file']).not.toHaveProperty('execute');
|
|
39
|
+
});
|
|
40
|
+
});
|
|
41
|
+
// T-02-006b: public export contract -- CrewToolSet filters by permission
|
|
42
|
+
describe('filterCrewTools', () => {
|
|
43
|
+
it('filters by permitted and denied sets', () => {
|
|
44
|
+
const all = {
|
|
45
|
+
'read-file': makeTool('read-file'),
|
|
46
|
+
'write-file': makeTool('write-file'),
|
|
47
|
+
'git-branch': makeTool('git-branch'),
|
|
48
|
+
'write-strategy-artifacts': makeTool('write-strategy-artifacts'),
|
|
49
|
+
};
|
|
50
|
+
const filtered = filterCrewTools(all, new Set(['read-file', 'write-file', 'git-branch', 'write-strategy-artifacts']), new Set(['write-strategy-artifacts']));
|
|
51
|
+
expect(Object.keys(filtered).sort()).toEqual(['git-branch', 'read-file', 'write-file']);
|
|
52
|
+
});
|
|
53
|
+
it('further restricts by sub-agent tools list', () => {
|
|
54
|
+
const all = {
|
|
55
|
+
'read-file': makeTool('read-file'),
|
|
56
|
+
'write-file': makeTool('write-file'),
|
|
57
|
+
'run-command': makeTool('run-command'),
|
|
58
|
+
};
|
|
59
|
+
const filtered = filterCrewTools(all, new Set(['read-file', 'write-file', 'run-command']), new Set(), ['run-command']);
|
|
60
|
+
expect(Object.keys(filtered)).toEqual(['run-command']);
|
|
61
|
+
});
|
|
62
|
+
it('expands tool group names in permitted set', () => {
|
|
63
|
+
const all = {
|
|
64
|
+
'read-file': makeTool('read-file'),
|
|
65
|
+
'write-file': makeTool('write-file'),
|
|
66
|
+
'list-directory': makeTool('list-directory'),
|
|
67
|
+
'git-branch': makeTool('git-branch'),
|
|
68
|
+
'run-command': makeTool('run-command'),
|
|
69
|
+
};
|
|
70
|
+
const filtered = filterCrewTools(all, new Set(['code', 'git-operations', 'shell']), new Set());
|
|
71
|
+
expect(Object.keys(filtered).sort()).toEqual([
|
|
72
|
+
'git-branch',
|
|
73
|
+
'list-directory',
|
|
74
|
+
'read-file',
|
|
75
|
+
'run-command',
|
|
76
|
+
'write-file',
|
|
77
|
+
]);
|
|
78
|
+
});
|
|
79
|
+
it('expands tool group names in sub-agent tools', () => {
|
|
80
|
+
const all = {
|
|
81
|
+
'read-file': makeTool('read-file'),
|
|
82
|
+
'write-file': makeTool('write-file'),
|
|
83
|
+
'git-branch': makeTool('git-branch'),
|
|
84
|
+
'git-commit': makeTool('git-commit'),
|
|
85
|
+
'run-command': makeTool('run-command'),
|
|
86
|
+
};
|
|
87
|
+
const filtered = filterCrewTools(all, new Set(['code', 'git-operations', 'shell']), new Set(), ['code', 'git']);
|
|
88
|
+
expect(Object.keys(filtered).sort()).toEqual([
|
|
89
|
+
'git-branch',
|
|
90
|
+
'git-commit',
|
|
91
|
+
'read-file',
|
|
92
|
+
'write-file',
|
|
93
|
+
]);
|
|
94
|
+
expect(filtered['run-command']).toBeUndefined();
|
|
95
|
+
});
|
|
96
|
+
it('expands tool group names in denied set', () => {
|
|
97
|
+
const all = {
|
|
98
|
+
'read-file': makeTool('read-file'),
|
|
99
|
+
'write-file': makeTool('write-file'),
|
|
100
|
+
'git-branch': makeTool('git-branch'),
|
|
101
|
+
};
|
|
102
|
+
const filtered = filterCrewTools(all, new Set(['code', 'git-operations']), new Set(['git-operations']));
|
|
103
|
+
expect(Object.keys(filtered).sort()).toEqual(['read-file', 'write-file']);
|
|
104
|
+
});
|
|
105
|
+
it('matches Engineer implementer tools correctly', () => {
|
|
106
|
+
const all = {
|
|
107
|
+
'read-file': makeTool('read-file'),
|
|
108
|
+
'write-file': makeTool('write-file'),
|
|
109
|
+
'list-directory': makeTool('list-directory'),
|
|
110
|
+
'search-codebase': makeTool('search-codebase'),
|
|
111
|
+
'git-branch': makeTool('git-branch'),
|
|
112
|
+
'git-commit': makeTool('git-commit'),
|
|
113
|
+
'git-push': makeTool('git-push'),
|
|
114
|
+
'git-diff': makeTool('git-diff'),
|
|
115
|
+
'git-log': makeTool('git-log'),
|
|
116
|
+
'create-pr': makeTool('create-pr'),
|
|
117
|
+
'run-command': makeTool('run-command'),
|
|
118
|
+
};
|
|
119
|
+
const filtered = filterCrewTools(all, new Set(['read-artifact', 'write-artifact', 'code', 'git-operations', 'shell']), new Set(['write-strategy-artifacts', 'write-standards']), ['code', 'git', 'shell']);
|
|
120
|
+
expect(Object.keys(filtered).sort()).toEqual([
|
|
121
|
+
'create-pr',
|
|
122
|
+
'git-branch',
|
|
123
|
+
'git-commit',
|
|
124
|
+
'git-diff',
|
|
125
|
+
'git-log',
|
|
126
|
+
'git-push',
|
|
127
|
+
'list-directory',
|
|
128
|
+
'read-file',
|
|
129
|
+
'run-command',
|
|
130
|
+
'search-codebase',
|
|
131
|
+
'write-file',
|
|
132
|
+
]);
|
|
133
|
+
});
|
|
134
|
+
it('matches Reviewer tools correctly -- read-only, no write-code or git-write', () => {
|
|
135
|
+
const all = {
|
|
136
|
+
'read-file': makeTool('read-file'),
|
|
137
|
+
'write-file': makeTool('write-file'),
|
|
138
|
+
'list-directory': makeTool('list-directory'),
|
|
139
|
+
'search-codebase': makeTool('search-codebase'),
|
|
140
|
+
'git-branch': makeTool('git-branch'),
|
|
141
|
+
'git-commit': makeTool('git-commit'),
|
|
142
|
+
'git-push': makeTool('git-push'),
|
|
143
|
+
'git-diff': makeTool('git-diff'),
|
|
144
|
+
'git-log': makeTool('git-log'),
|
|
145
|
+
'create-pr': makeTool('create-pr'),
|
|
146
|
+
'run-command': makeTool('run-command'),
|
|
147
|
+
'read-artifact': makeTool('read-artifact'),
|
|
148
|
+
'write-artifact': makeTool('write-artifact'),
|
|
149
|
+
};
|
|
150
|
+
const filtered = filterCrewTools(all, new Set(['read-artifact', 'write-artifact', 'read-code', 'git-read', 'shell']), new Set(['write-code', 'git-write']), ['read-artifact', 'read-code', 'git-read', 'shell']);
|
|
151
|
+
expect(Object.keys(filtered).sort()).toEqual([
|
|
152
|
+
'git-diff',
|
|
153
|
+
'git-log',
|
|
154
|
+
'list-directory',
|
|
155
|
+
'read-artifact',
|
|
156
|
+
'read-file',
|
|
157
|
+
'run-command',
|
|
158
|
+
'search-codebase',
|
|
159
|
+
]);
|
|
160
|
+
expect(filtered['write-file']).toBeUndefined();
|
|
161
|
+
expect(filtered['git-branch']).toBeUndefined();
|
|
162
|
+
expect(filtered['git-commit']).toBeUndefined();
|
|
163
|
+
expect(filtered['git-push']).toBeUndefined();
|
|
164
|
+
expect(filtered['create-pr']).toBeUndefined();
|
|
165
|
+
});
|
|
166
|
+
});
|
|
167
|
+
// T-02-006c: dispatch loop handles tool calls and returns results
|
|
168
|
+
describe('executeWithToolDispatch', () => {
|
|
169
|
+
it('returns text directly when no tool calls', async () => {
|
|
170
|
+
const mockProvider = {
|
|
171
|
+
generateText: vi.fn().mockResolvedValue({
|
|
172
|
+
text: 'Final answer',
|
|
173
|
+
toolCalls: [],
|
|
174
|
+
tokensIn: 100,
|
|
175
|
+
tokensOut: 50,
|
|
176
|
+
}),
|
|
177
|
+
};
|
|
178
|
+
const result = await executeWithToolDispatch({
|
|
179
|
+
callOptions: {
|
|
180
|
+
provider: mockProvider,
|
|
181
|
+
model: {
|
|
182
|
+
alias: 'test',
|
|
183
|
+
provider: 'test',
|
|
184
|
+
concreteModel: 'test-model',
|
|
185
|
+
apiKeyEnv: 'TEST_KEY',
|
|
186
|
+
},
|
|
187
|
+
system: 'You are a test.',
|
|
188
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
189
|
+
},
|
|
190
|
+
tools: {},
|
|
191
|
+
context: mockContext,
|
|
192
|
+
});
|
|
193
|
+
expect(result.text).toBe('Final answer');
|
|
194
|
+
expect(result.toolExecutions).toEqual([]);
|
|
195
|
+
expect(result.rounds).toBe(1);
|
|
196
|
+
});
|
|
197
|
+
it('dispatches tool calls and feeds results back', async () => {
|
|
198
|
+
const readTool = makeTool('read-file');
|
|
199
|
+
readTool.execute.mockResolvedValue({ content: 'file content' });
|
|
200
|
+
const mockProvider = {
|
|
201
|
+
generateText: vi
|
|
202
|
+
.fn()
|
|
203
|
+
.mockResolvedValueOnce({
|
|
204
|
+
text: '',
|
|
205
|
+
toolCalls: [
|
|
206
|
+
{ toolCallId: 'tc-1', toolName: 'read-file', input: { path: 'src/app.ts' } },
|
|
207
|
+
],
|
|
208
|
+
tokensIn: 100,
|
|
209
|
+
tokensOut: 50,
|
|
210
|
+
})
|
|
211
|
+
.mockResolvedValueOnce({
|
|
212
|
+
text: 'Done reading the file.',
|
|
213
|
+
toolCalls: [],
|
|
214
|
+
tokensIn: 200,
|
|
215
|
+
tokensOut: 100,
|
|
216
|
+
}),
|
|
217
|
+
};
|
|
218
|
+
const result = await executeWithToolDispatch({
|
|
219
|
+
callOptions: {
|
|
220
|
+
provider: mockProvider,
|
|
221
|
+
model: {
|
|
222
|
+
alias: 'test',
|
|
223
|
+
provider: 'test',
|
|
224
|
+
concreteModel: 'test-model',
|
|
225
|
+
apiKeyEnv: 'TEST_KEY',
|
|
226
|
+
},
|
|
227
|
+
system: 'You are a test.',
|
|
228
|
+
messages: [{ role: 'user', content: 'Read app.ts' }],
|
|
229
|
+
},
|
|
230
|
+
tools: { 'read-file': readTool },
|
|
231
|
+
context: mockContext,
|
|
232
|
+
});
|
|
233
|
+
expect(result.text).toBe('Done reading the file.');
|
|
234
|
+
expect(result.toolExecutions.length).toBe(1);
|
|
235
|
+
expect(result.toolExecutions[0].toolName).toBe('read-file');
|
|
236
|
+
expect(result.toolExecutions[0].success).toBe(true);
|
|
237
|
+
expect(result.rounds).toBe(2);
|
|
238
|
+
expect(result.totalTokensIn).toBe(300);
|
|
239
|
+
});
|
|
240
|
+
it('records failed tool executions without crashing the loop', async () => {
|
|
241
|
+
const failTool = {
|
|
242
|
+
name: 'fail-tool',
|
|
243
|
+
description: 'Always fails',
|
|
244
|
+
parameters: {},
|
|
245
|
+
execute: () => Promise.reject(new Error('Boom')),
|
|
246
|
+
};
|
|
247
|
+
const mockProvider = {
|
|
248
|
+
generateText: vi
|
|
249
|
+
.fn()
|
|
250
|
+
.mockResolvedValueOnce({
|
|
251
|
+
text: '',
|
|
252
|
+
toolCalls: [{ toolCallId: 'tc-1', toolName: 'fail-tool', input: {} }],
|
|
253
|
+
tokensIn: 50,
|
|
254
|
+
tokensOut: 20,
|
|
255
|
+
})
|
|
256
|
+
.mockResolvedValueOnce({
|
|
257
|
+
text: 'Handled error.',
|
|
258
|
+
toolCalls: [],
|
|
259
|
+
tokensIn: 80,
|
|
260
|
+
tokensOut: 30,
|
|
261
|
+
}),
|
|
262
|
+
};
|
|
263
|
+
const result = await executeWithToolDispatch({
|
|
264
|
+
callOptions: {
|
|
265
|
+
provider: mockProvider,
|
|
266
|
+
model: {
|
|
267
|
+
alias: 'test',
|
|
268
|
+
provider: 'test',
|
|
269
|
+
concreteModel: 'test-model',
|
|
270
|
+
apiKeyEnv: 'TEST_KEY',
|
|
271
|
+
},
|
|
272
|
+
system: 'You are a test.',
|
|
273
|
+
messages: [{ role: 'user', content: 'Try failing' }],
|
|
274
|
+
},
|
|
275
|
+
tools: { 'fail-tool': failTool },
|
|
276
|
+
context: mockContext,
|
|
277
|
+
});
|
|
278
|
+
expect(result.toolExecutions.length).toBe(1);
|
|
279
|
+
expect(result.toolExecutions[0].success).toBe(false);
|
|
280
|
+
expect(result.toolExecutions[0].outputSummary).toContain('Boom');
|
|
281
|
+
expect(result.text).toBe('Handled error.');
|
|
282
|
+
});
|
|
283
|
+
it('rejects tool calls for tools not in the filtered set', async () => {
|
|
284
|
+
const mockProvider = {
|
|
285
|
+
generateText: vi
|
|
286
|
+
.fn()
|
|
287
|
+
.mockResolvedValueOnce({
|
|
288
|
+
text: '',
|
|
289
|
+
toolCalls: [{ toolCallId: 'tc-1', toolName: 'unknown-tool', input: {} }],
|
|
290
|
+
tokensIn: 50,
|
|
291
|
+
tokensOut: 20,
|
|
292
|
+
})
|
|
293
|
+
.mockResolvedValueOnce({
|
|
294
|
+
text: 'OK.',
|
|
295
|
+
toolCalls: [],
|
|
296
|
+
tokensIn: 50,
|
|
297
|
+
tokensOut: 20,
|
|
298
|
+
}),
|
|
299
|
+
};
|
|
300
|
+
const result = await executeWithToolDispatch({
|
|
301
|
+
callOptions: {
|
|
302
|
+
provider: mockProvider,
|
|
303
|
+
model: {
|
|
304
|
+
alias: 'test',
|
|
305
|
+
provider: 'test',
|
|
306
|
+
concreteModel: 'test-model',
|
|
307
|
+
apiKeyEnv: 'TEST_KEY',
|
|
308
|
+
},
|
|
309
|
+
system: 'test',
|
|
310
|
+
messages: [{ role: 'user', content: 'test' }],
|
|
311
|
+
},
|
|
312
|
+
tools: {},
|
|
313
|
+
context: mockContext,
|
|
314
|
+
});
|
|
315
|
+
expect(result.toolExecutions[0].success).toBe(false);
|
|
316
|
+
expect(result.toolExecutions[0].errorCode).toBe('TOOL_NOT_FOUND');
|
|
317
|
+
});
|
|
318
|
+
it('stops after maxRounds', async () => {
|
|
319
|
+
const tool = makeTool('loop-tool');
|
|
320
|
+
const mockProvider = {
|
|
321
|
+
generateText: vi.fn().mockResolvedValue({
|
|
322
|
+
text: '',
|
|
323
|
+
toolCalls: [{ toolCallId: 'tc-1', toolName: 'loop-tool', input: {} }],
|
|
324
|
+
tokensIn: 10,
|
|
325
|
+
tokensOut: 5,
|
|
326
|
+
}),
|
|
327
|
+
};
|
|
328
|
+
const result = await executeWithToolDispatch({
|
|
329
|
+
callOptions: {
|
|
330
|
+
provider: mockProvider,
|
|
331
|
+
model: {
|
|
332
|
+
alias: 'test',
|
|
333
|
+
provider: 'test',
|
|
334
|
+
concreteModel: 'test-model',
|
|
335
|
+
apiKeyEnv: 'TEST_KEY',
|
|
336
|
+
},
|
|
337
|
+
system: 'test',
|
|
338
|
+
messages: [{ role: 'user', content: 'test' }],
|
|
339
|
+
},
|
|
340
|
+
tools: { 'loop-tool': tool },
|
|
341
|
+
context: mockContext,
|
|
342
|
+
maxRounds: 3,
|
|
343
|
+
});
|
|
344
|
+
expect(result.rounds).toBe(3);
|
|
345
|
+
expect(result.toolExecutions.length).toBe(3);
|
|
346
|
+
});
|
|
347
|
+
});
|
|
348
|
+
});
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { ToolPermissions, ToolSet } from '../types/index.js';
|
|
2
|
+
/**
|
|
3
|
+
* Filter a set of available tools based on the persona's tool permissions.
|
|
4
|
+
*
|
|
5
|
+
* Only tools whose names appear in `permitted` and do NOT appear in `denied`
|
|
6
|
+
* are returned. The runtime enforces this boundary -- the LLM never sees
|
|
7
|
+
* tools outside the permitted set.
|
|
8
|
+
*
|
|
9
|
+
* If a sub-agent declares a `tools` restriction, apply it as an additional
|
|
10
|
+
* subset filter on top of the persona-level permissions.
|
|
11
|
+
*/
|
|
12
|
+
export declare function filterTools(available: ToolSet, permissions: ToolPermissions, subAgentTools?: string[]): ToolSet;
|
|
13
|
+
//# sourceMappingURL=tool-filter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tool-filter.d.ts","sourceRoot":"","sources":["../../src/engine/tool-filter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAElE;;;;;;;;;GASG;AACH,wBAAgB,WAAW,CACzB,SAAS,EAAE,OAAO,EAClB,WAAW,EAAE,eAAe,EAC5B,aAAa,CAAC,EAAE,MAAM,EAAE,GACvB,OAAO,CAcT"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Filter a set of available tools based on the persona's tool permissions.
|
|
3
|
+
*
|
|
4
|
+
* Only tools whose names appear in `permitted` and do NOT appear in `denied`
|
|
5
|
+
* are returned. The runtime enforces this boundary -- the LLM never sees
|
|
6
|
+
* tools outside the permitted set.
|
|
7
|
+
*
|
|
8
|
+
* If a sub-agent declares a `tools` restriction, apply it as an additional
|
|
9
|
+
* subset filter on top of the persona-level permissions.
|
|
10
|
+
*/
|
|
11
|
+
export function filterTools(available, permissions, subAgentTools) {
|
|
12
|
+
const denied = new Set(permissions.denied);
|
|
13
|
+
const permitted = new Set(permissions.permitted);
|
|
14
|
+
const filtered = {};
|
|
15
|
+
for (const [name, definition] of Object.entries(available)) {
|
|
16
|
+
if (denied.has(name))
|
|
17
|
+
continue;
|
|
18
|
+
if (!permitted.has(name))
|
|
19
|
+
continue;
|
|
20
|
+
if (subAgentTools && !subAgentTools.includes(name))
|
|
21
|
+
continue;
|
|
22
|
+
filtered[name] = definition;
|
|
23
|
+
}
|
|
24
|
+
return filtered;
|
|
25
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluation.test.d.ts","sourceRoot":"","sources":["../../src/evaluation/evaluation.test.ts"],"names":[],"mappings":""}
|