oxe-cc 1.8.0 → 1.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +63 -0
- package/README.md +1 -1
- package/bin/lib/oxe-manifest.cjs +20 -13
- package/bin/lib/oxe-operational.cjs +96 -10
- package/bin/lib/oxe-project-health.cjs +77 -18
- package/bin/lib/oxe-rationality.cjs +9 -7
- package/bin/oxe-cc.js +202 -39
- package/lib/runtime/compiler/graph-compiler.js +1 -1
- package/lib/runtime/executor/action-tool-map.js +4 -0
- package/lib/runtime/executor/built-in-tools.js +27 -0
- package/lib/runtime/executor/llm-task-executor.d.ts +4 -1
- package/lib/runtime/executor/llm-task-executor.js +41 -5
- package/lib/runtime/executor/node-prompt-builder.d.ts +4 -1
- package/lib/runtime/executor/node-prompt-builder.js +13 -2
- package/lib/runtime/models/failure.d.ts +1 -1
- package/lib/runtime/scheduler/scheduler.d.ts +5 -1
- package/lib/runtime/scheduler/scheduler.js +82 -14
- package/lib/runtime/verification/verification-compiler.js +7 -5
- package/lib/sdk/index.cjs +48 -44
- package/oxe/templates/PLAN.template.md +23 -9
- package/oxe/templates/SPEC.template.md +55 -22
- package/oxe/workflows/plan.md +18 -6
- package/oxe/workflows/spec.md +31 -9
- package/package.json +103 -100
- package/packages/runtime/package.json +18 -18
- package/packages/runtime/src/compiler/graph-compiler.ts +1 -1
- package/packages/runtime/src/evidence/evidence-store.ts +2 -2
- package/packages/runtime/src/executor/action-tool-map.ts +4 -0
- package/packages/runtime/src/executor/built-in-tools.ts +29 -0
- package/packages/runtime/src/executor/llm-task-executor.ts +46 -4
- package/packages/runtime/src/executor/node-prompt-builder.ts +18 -1
- package/packages/runtime/src/models/failure.ts +2 -0
- package/packages/runtime/src/scheduler/scheduler.ts +93 -15
- package/packages/runtime/src/verification/verification-compiler.ts +7 -5
- package/vscode-extension/package.json +185 -185
- package/vscode-extension/oxe-agents-0.9.1.vsix +0 -0
- package/vscode-extension/oxe-agents-0.9.2.vsix +0 -0
- package/vscode-extension/oxe-agents-1.0.0.vsix +0 -0
- package/vscode-extension/oxe-agents-1.4.0.vsix +0 -0
- package/vscode-extension/oxe-agents-1.5.0.vsix +0 -0
- package/vscode-extension/oxe-agents-1.5.1.vsix +0 -0
- package/vscode-extension/oxe-agents-1.6.0.vsix +0 -0
- package/vscode-extension/oxe-agents-1.7.0.vsix +0 -0
- package/vscode-extension/oxe-agents-1.8.0.vsix +0 -0
package/package.json
CHANGED
|
@@ -1,100 +1,103 @@
|
|
|
1
|
-
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
1
|
+
{
|
|
2
|
+
"name": "oxe-cc",
|
|
3
|
+
"version": "1.8.3",
|
|
4
|
+
"description": "OXE — spec-driven workflows in .oxe/ with runtime enterprise, evidence-first verification and multi-runtime integrations (npx)",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"author": "",
|
|
7
|
+
"homepage": "https://www.npmjs.com/package/oxe-cc",
|
|
8
|
+
"bugs": {
|
|
9
|
+
"url": "https://github.com/propagno/oxe-build/issues"
|
|
10
|
+
},
|
|
11
|
+
"repository": {
|
|
12
|
+
"type": "git",
|
|
13
|
+
"url": "git+https://github.com/propagno/oxe-build.git"
|
|
14
|
+
},
|
|
15
|
+
"keywords": [
|
|
16
|
+
"cursor",
|
|
17
|
+
"github-copilot",
|
|
18
|
+
"copilot",
|
|
19
|
+
"claude",
|
|
20
|
+
"claude-code",
|
|
21
|
+
"opencode",
|
|
22
|
+
"gemini-cli",
|
|
23
|
+
"codex",
|
|
24
|
+
"windsurf",
|
|
25
|
+
"antigravity",
|
|
26
|
+
"spec-driven",
|
|
27
|
+
"context-engineering",
|
|
28
|
+
"ai-agents",
|
|
29
|
+
"oxe"
|
|
30
|
+
],
|
|
31
|
+
"engines": {
|
|
32
|
+
"node": ">=18.0.0"
|
|
33
|
+
},
|
|
34
|
+
"main": "lib/sdk/index.cjs",
|
|
35
|
+
"exports": {
|
|
36
|
+
".": "./lib/sdk/index.cjs",
|
|
37
|
+
"./package.json": "./package.json"
|
|
38
|
+
},
|
|
39
|
+
"types": "lib/sdk/index.d.ts",
|
|
40
|
+
"bin": {
|
|
41
|
+
"oxe-cc": "bin/oxe-cc.js",
|
|
42
|
+
"oxe": "bin/oxe-cc.js"
|
|
43
|
+
},
|
|
44
|
+
"files": [
|
|
45
|
+
"bin",
|
|
46
|
+
"lib",
|
|
47
|
+
"oxe",
|
|
48
|
+
"assets",
|
|
49
|
+
".cursor",
|
|
50
|
+
".github",
|
|
51
|
+
"commands",
|
|
52
|
+
"vscode-extension/package.json",
|
|
53
|
+
"vscode-extension/.vscodeignore",
|
|
54
|
+
"vscode-extension/LICENSE",
|
|
55
|
+
"vscode-extension/src",
|
|
56
|
+
"docs",
|
|
57
|
+
"QUICKSTART.md",
|
|
58
|
+
"packages/runtime/src",
|
|
59
|
+
"packages/runtime/package.json",
|
|
60
|
+
"packages/runtime/tsconfig.json",
|
|
61
|
+
"AGENTS.md",
|
|
62
|
+
"README.md",
|
|
63
|
+
"CHANGELOG.md"
|
|
64
|
+
],
|
|
65
|
+
"scripts": {
|
|
66
|
+
"build:runtime": "cd packages/runtime && npm run build",
|
|
67
|
+
"sync:runtime-metadata": "node scripts/sync-runtime-metadata.cjs",
|
|
68
|
+
"sync:cursor": "node scripts/sync-cursor-from-prompts.cjs",
|
|
69
|
+
"release:doctor": "node scripts/release-doctor.cjs",
|
|
70
|
+
"release:manifest": "node scripts/release-doctor.cjs --write-manifest",
|
|
71
|
+
"test:root": "node --test tests/install.test.cjs tests/oxe-project-health.test.cjs tests/oxe-dashboard.test.cjs tests/oxe-operational.test.cjs tests/oxe-azure.test.cjs tests/oxe-sdk.test.cjs tests/oxe-manifest.test.cjs tests/oxe-agent-install.test.cjs tests/oxe-install-resolve-full.test.cjs tests/oxe-health-extended.test.cjs tests/oxe-workflows-edge.test.cjs tests/oxe-sdk-edge.test.cjs tests/oxe-cli-edge.test.cjs tests/oxe-npm-version.test.cjs tests/oxe-scripts.test.cjs tests/oxe-retro-health.test.cjs tests/oxe-security-permissions.test.cjs tests/oxe-runtime-semantics.test.cjs tests/oxe-plugins.test.cjs",
|
|
72
|
+
"test:runtime": "cd packages/runtime && npm test",
|
|
73
|
+
"test:runtime-smoke": "node scripts/runtime-smoke-matrix.cjs",
|
|
74
|
+
"test:recovery-fixtures": "node scripts/run-recovery-fixtures.cjs",
|
|
75
|
+
"test:multi-agent-soak": "node scripts/run-multi-agent-soak.cjs",
|
|
76
|
+
"test": "npm run build:runtime && npm run test:root && npm run test:runtime && npm run test:runtime-smoke && npm run test:recovery-fixtures && npm run test:multi-agent-soak",
|
|
77
|
+
"test:coverage": "c8 --check-coverage --lines 82 --functions 85 --branches 58 --statements 82 npm test",
|
|
78
|
+
"scan:assets": "node scripts/oxe-assets-scan.cjs",
|
|
79
|
+
"build:vscode-ext": "cd vscode-extension && npx @vscode/vsce package --no-yarn --allow-missing-repository",
|
|
80
|
+
"prepublishOnly": "npm test && npm run scan:assets && npm run build:vscode-ext && npm run release:manifest && node bin/oxe-cc.js --version"
|
|
81
|
+
},
|
|
82
|
+
"c8": {
|
|
83
|
+
"all": true,
|
|
84
|
+
"include": [
|
|
85
|
+
"bin/oxe-cc.js",
|
|
86
|
+
"bin/lib/**/*.cjs",
|
|
87
|
+
"lib/**/*.cjs",
|
|
88
|
+
"scripts/**/*.cjs"
|
|
89
|
+
],
|
|
90
|
+
"exclude": [
|
|
91
|
+
"**/node_modules/**"
|
|
92
|
+
],
|
|
93
|
+
"reporter": [
|
|
94
|
+
"text-summary"
|
|
95
|
+
]
|
|
96
|
+
},
|
|
97
|
+
"devDependencies": {
|
|
98
|
+
"c8": "^11.0.0"
|
|
99
|
+
},
|
|
100
|
+
"dependencies": {
|
|
101
|
+
"semver": "^7.7.4"
|
|
102
|
+
}
|
|
103
|
+
}
|
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@oxe/runtime",
|
|
3
|
-
"version": "1.8.
|
|
4
|
-
"private": true,
|
|
5
|
-
"license": "MIT",
|
|
6
|
-
"description": "OXE agentic execution engine — enterprise runtime core",
|
|
7
|
-
"main": "../../lib/runtime/index.js",
|
|
8
|
-
"types": "../../lib/runtime/index.d.ts",
|
|
9
|
-
"scripts": {
|
|
10
|
-
"build": "tsc",
|
|
11
|
-
"test": "tsc --project tsconfig.test.json && node --test dist-tests/tests/*.test.js",
|
|
12
|
-
"clean": "node -e \"const fs=require('fs');['../../lib/runtime','dist-tests'].forEach(d=>{try{fs.rmSync(d,{recursive:true})}catch{}});\""
|
|
13
|
-
},
|
|
14
|
-
"devDependencies": {
|
|
15
|
-
"@types/node": "^25.6.0",
|
|
16
|
-
"typescript": "^5.4.5"
|
|
17
|
-
}
|
|
18
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "@oxe/runtime",
|
|
3
|
+
"version": "1.8.3",
|
|
4
|
+
"private": true,
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"description": "OXE agentic execution engine — enterprise runtime core",
|
|
7
|
+
"main": "../../lib/runtime/index.js",
|
|
8
|
+
"types": "../../lib/runtime/index.d.ts",
|
|
9
|
+
"scripts": {
|
|
10
|
+
"build": "tsc",
|
|
11
|
+
"test": "tsc --project tsconfig.test.json && node --test dist-tests/tests/*.test.js",
|
|
12
|
+
"clean": "node -e \"const fs=require('fs');['../../lib/runtime','dist-tests'].forEach(d=>{try{fs.rmSync(d,{recursive:true})}catch{}});\""
|
|
13
|
+
},
|
|
14
|
+
"devDependencies": {
|
|
15
|
+
"@types/node": "^25.6.0",
|
|
16
|
+
"typescript": "^5.4.5"
|
|
17
|
+
}
|
|
18
|
+
}
|
|
@@ -123,7 +123,7 @@ export function compile(
|
|
|
123
123
|
mutation_scope: task.files,
|
|
124
124
|
actions: buildActions(task),
|
|
125
125
|
verify: {
|
|
126
|
-
must_pass: task.verifyCommand ? ['tests'] : [],
|
|
126
|
+
must_pass: task.verifyCommand ? (task.aceite.length > 0 ? task.aceite : ['tests']) : [],
|
|
127
127
|
acceptance_refs: task.aceite,
|
|
128
128
|
command: task.verifyCommand,
|
|
129
129
|
},
|
|
@@ -75,8 +75,8 @@ export class EvidenceStore {
|
|
|
75
75
|
const ext = EXT_MAP[type] ?? 'bin';
|
|
76
76
|
|
|
77
77
|
const existing = this.readIndex(run_id, work_item_id, attempt_number);
|
|
78
|
-
const seq = existing.filter((e) => e.type === type).length + 1;
|
|
79
|
-
const filename = seq === 1 ? `${type}.${ext}` : `${type}-${seq}.${ext}`;
|
|
78
|
+
const seq = existing.filter((e) => e.type === type).length + 1;
|
|
79
|
+
const filename = seq === 1 ? `${type}.${ext}` : `${type}-${seq}.${ext}`;
|
|
80
80
|
const filePath = path.join(dir, filename);
|
|
81
81
|
|
|
82
82
|
fs.writeFileSync(filePath, buf);
|
|
@@ -42,5 +42,9 @@ export function selectToolsForActions(actions: Action[]): ToolSchema[] {
|
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
44
|
}
|
|
45
|
+
// finish_task is always available so the LLM can signal authoritative completion
|
|
46
|
+
if (!seen.has('finish_task') && BUILT_IN_TOOLS.finish_task) {
|
|
47
|
+
result.push(BUILT_IN_TOOLS.finish_task.schema);
|
|
48
|
+
}
|
|
45
49
|
return result;
|
|
46
50
|
}
|
|
@@ -262,6 +262,34 @@ function runShell(command: string, cwd: string, timeoutMs: number): Promise<stri
|
|
|
262
262
|
});
|
|
263
263
|
}
|
|
264
264
|
|
|
265
|
+
// ─── finish_task ──────────────────────────────────────────────────────────────
|
|
266
|
+
|
|
267
|
+
const finishTask: BuiltInToolHandler = {
|
|
268
|
+
idempotent: true,
|
|
269
|
+
schema: {
|
|
270
|
+
type: 'function',
|
|
271
|
+
function: {
|
|
272
|
+
name: 'finish_task',
|
|
273
|
+
description: 'Signal that the task is complete. Call this when ALL required actions have been performed.',
|
|
274
|
+
parameters: {
|
|
275
|
+
type: 'object',
|
|
276
|
+
properties: {
|
|
277
|
+
summary: { type: 'string', description: 'Summary of what was accomplished' },
|
|
278
|
+
evidence_paths: { type: 'array', items: { type: 'string' }, description: 'Paths to files created or modified' },
|
|
279
|
+
},
|
|
280
|
+
required: ['summary'],
|
|
281
|
+
},
|
|
282
|
+
},
|
|
283
|
+
},
|
|
284
|
+
async execute(args, _cwd) {
|
|
285
|
+
return JSON.stringify({
|
|
286
|
+
__finish_task__: true,
|
|
287
|
+
summary: String(args.summary || ''),
|
|
288
|
+
evidence_paths: Array.isArray(args.evidence_paths) ? args.evidence_paths : [],
|
|
289
|
+
});
|
|
290
|
+
},
|
|
291
|
+
};
|
|
292
|
+
|
|
265
293
|
// ─── Registry ─────────────────────────────────────────────────────────────────
|
|
266
294
|
|
|
267
295
|
export const BUILT_IN_TOOLS: Record<string, BuiltInToolHandler> = {
|
|
@@ -271,6 +299,7 @@ export const BUILT_IN_TOOLS: Record<string, BuiltInToolHandler> = {
|
|
|
271
299
|
glob,
|
|
272
300
|
grep,
|
|
273
301
|
run_command: runCommand,
|
|
302
|
+
finish_task: finishTask,
|
|
274
303
|
};
|
|
275
304
|
|
|
276
305
|
export const ALL_BUILT_IN_SCHEMAS: ToolSchema[] = Object.values(BUILT_IN_TOOLS).map((t) => t.schema);
|
|
@@ -32,9 +32,13 @@ export interface LlmExecutorEvent {
|
|
|
32
32
|
detail?: Record<string, unknown>;
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
+
export interface LlmExecuteOptions {
|
|
36
|
+
previousError?: string | null;
|
|
37
|
+
}
|
|
38
|
+
|
|
35
39
|
const DEFAULT_SYSTEM_PROMPT =
|
|
36
40
|
'You are a precise software engineering agent. Use the tools provided to complete the task. ' +
|
|
37
|
-
'When
|
|
41
|
+
'When all actions are done, call finish_task with a summary of what was accomplished.';
|
|
38
42
|
|
|
39
43
|
export class LlmTaskExecutor implements TaskExecutor {
|
|
40
44
|
constructor(
|
|
@@ -48,8 +52,9 @@ export class LlmTaskExecutor implements TaskExecutor {
|
|
|
48
52
|
lease: WorkspaceLease,
|
|
49
53
|
runId: string,
|
|
50
54
|
attempt: number,
|
|
55
|
+
options: LlmExecuteOptions = {},
|
|
51
56
|
): Promise<TaskResult> {
|
|
52
|
-
const prompt = buildNodePrompt(node, lease, runId, attempt);
|
|
57
|
+
const prompt = buildNodePrompt(node, lease, runId, attempt, { previousError: options.previousError ?? null });
|
|
53
58
|
const tools = selectToolsForActions(node.actions);
|
|
54
59
|
const cwd = lease.root_path;
|
|
55
60
|
const maxTurns = this.provider.maxTurns ?? 10;
|
|
@@ -62,8 +67,11 @@ export class LlmTaskExecutor implements TaskExecutor {
|
|
|
62
67
|
|
|
63
68
|
let finalOutput = '';
|
|
64
69
|
const evidencePaths: string[] = [];
|
|
70
|
+
let completedByFinishTask = false;
|
|
71
|
+
let finishTaskSummary = '';
|
|
65
72
|
|
|
66
|
-
|
|
73
|
+
let turn = 0;
|
|
74
|
+
for (; turn < maxTurns; turn++) {
|
|
67
75
|
this.emit({ type: 'turn_start', nodeId: node.id, attempt, detail: { turn } });
|
|
68
76
|
|
|
69
77
|
let response;
|
|
@@ -105,13 +113,47 @@ export class LlmTaskExecutor implements TaskExecutor {
|
|
|
105
113
|
}
|
|
106
114
|
|
|
107
115
|
messages.push(...concurrentResults, ...serialResults);
|
|
116
|
+
|
|
117
|
+
// Detect finish_task call — authoritative completion signal
|
|
118
|
+
const allResults = [...concurrentResults, ...serialResults];
|
|
119
|
+
const finishResult = allResults.find((r) => r.name === 'finish_task');
|
|
120
|
+
if (finishResult) {
|
|
121
|
+
try {
|
|
122
|
+
const parsed = JSON.parse(finishResult.content as string);
|
|
123
|
+
if (parsed.__finish_task__) {
|
|
124
|
+
completedByFinishTask = true;
|
|
125
|
+
finishTaskSummary = parsed.summary || '';
|
|
126
|
+
if (Array.isArray(parsed.evidence_paths)) {
|
|
127
|
+
evidencePaths.push(...parsed.evidence_paths.filter((p: unknown) => typeof p === 'string'));
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
} catch { /* ignore parse errors */ }
|
|
131
|
+
if (completedByFinishTask) break;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const completedBy = completedByFinishTask
|
|
136
|
+
? 'finish_task'
|
|
137
|
+
: turn < maxTurns
|
|
138
|
+
? 'no_tool_call'
|
|
139
|
+
: 'turn_limit_exhausted';
|
|
140
|
+
|
|
141
|
+
if (completedBy === 'turn_limit_exhausted') {
|
|
142
|
+
return {
|
|
143
|
+
success: false,
|
|
144
|
+
failure_class: 'llm',
|
|
145
|
+
evidence: evidencePaths,
|
|
146
|
+
output: finalOutput || `Task exhausted ${maxTurns} turns without calling finish_task`,
|
|
147
|
+
completed_by: completedBy,
|
|
148
|
+
};
|
|
108
149
|
}
|
|
109
150
|
|
|
110
151
|
return {
|
|
111
152
|
success: true,
|
|
112
153
|
failure_class: null,
|
|
113
154
|
evidence: evidencePaths,
|
|
114
|
-
output: finalOutput,
|
|
155
|
+
output: completedByFinishTask ? (finishTaskSummary || finalOutput) : finalOutput,
|
|
156
|
+
completed_by: completedBy,
|
|
115
157
|
};
|
|
116
158
|
}
|
|
117
159
|
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
import type { GraphNode } from '../compiler/graph-compiler';
|
|
2
2
|
import type { WorkspaceLease } from '../models/workspace';
|
|
3
3
|
|
|
4
|
+
export interface NodePromptOptions {
|
|
5
|
+
previousError?: string | null;
|
|
6
|
+
}
|
|
7
|
+
|
|
4
8
|
export function buildNodePrompt(
|
|
5
9
|
node: GraphNode,
|
|
6
10
|
lease: WorkspaceLease,
|
|
7
11
|
runId: string,
|
|
8
12
|
attempt: number,
|
|
13
|
+
options: NodePromptOptions = {},
|
|
9
14
|
): string {
|
|
10
15
|
const lines: string[] = [
|
|
11
16
|
`# Tarefa: ${node.title}`,
|
|
@@ -18,6 +23,15 @@ export function buildNodePrompt(
|
|
|
18
23
|
lines.push(`**Escopo de mutação:** ${node.mutation_scope.join(', ')}`);
|
|
19
24
|
}
|
|
20
25
|
|
|
26
|
+
if (attempt > 1 && options.previousError) {
|
|
27
|
+
lines.push('', '## Contexto da tentativa anterior');
|
|
28
|
+
lines.push(`Esta é a tentativa **${attempt}**. A tentativa anterior falhou:`);
|
|
29
|
+
lines.push('', '```');
|
|
30
|
+
lines.push(String(options.previousError).slice(0, 2000));
|
|
31
|
+
lines.push('```', '');
|
|
32
|
+
lines.push('Analise o erro e tente uma abordagem diferente.');
|
|
33
|
+
}
|
|
34
|
+
|
|
21
35
|
if (node.actions.length > 0) {
|
|
22
36
|
lines.push('', '## Ações requeridas');
|
|
23
37
|
for (const action of node.actions) {
|
|
@@ -39,7 +53,10 @@ export function buildNodePrompt(
|
|
|
39
53
|
lines.push('', `**Verificação:** \`${node.verify.command}\``);
|
|
40
54
|
}
|
|
41
55
|
|
|
42
|
-
lines.push('', '
|
|
56
|
+
lines.push('', '## Conclusão da tarefa');
|
|
57
|
+
lines.push('Quando **todas** as ações estiverem concluídas, chame `finish_task` com um resumo do que foi realizado.');
|
|
58
|
+
lines.push('NÃO chame `finish_task` antes de completar todas as ações requeridas.');
|
|
59
|
+
lines.push('', 'Execute as ações acima usando as ferramentas disponíveis.');
|
|
43
60
|
|
|
44
61
|
return lines.join('\n');
|
|
45
62
|
}
|
|
@@ -8,4 +8,6 @@ export type FailureClass =
|
|
|
8
8
|
| 'test' // verification / acceptance test failed
|
|
9
9
|
| 'timeout' // task or run exceeded time budget
|
|
10
10
|
| 'evidence_missing' // required evidence was not collected
|
|
11
|
+
| 'verify' // inline verification command failed after execution
|
|
12
|
+
| 'llm' // LLM exhausted turn budget without calling finish_task
|
|
11
13
|
| null; // success — no failure
|
|
@@ -24,12 +24,15 @@ import type { FailureClass } from '../models/failure';
|
|
|
24
24
|
import { listMemos } from '../decision/decision-memo';
|
|
25
25
|
import type { RollbackPlan } from '../decision/decision-memo';
|
|
26
26
|
import { runCapabilityAsync } from '../plugins/capability-adapter';
|
|
27
|
+
import { verifyRun } from '../verification/verification-compiler';
|
|
28
|
+
import type { AcceptanceCheckSuite } from '../verification/verification-compiler';
|
|
27
29
|
|
|
28
30
|
export interface TaskResult {
|
|
29
31
|
success: boolean;
|
|
30
32
|
failure_class: FailureClass;
|
|
31
33
|
evidence: string[];
|
|
32
34
|
output: string;
|
|
35
|
+
completed_by?: string;
|
|
33
36
|
}
|
|
34
37
|
|
|
35
38
|
export interface TaskExecutor {
|
|
@@ -37,7 +40,8 @@ export interface TaskExecutor {
|
|
|
37
40
|
node: GraphNode,
|
|
38
41
|
lease: WorkspaceLease,
|
|
39
42
|
runId: string,
|
|
40
|
-
attemptNumber: number
|
|
43
|
+
attemptNumber: number,
|
|
44
|
+
options?: { previousError?: string | null }
|
|
41
45
|
): Promise<TaskResult>;
|
|
42
46
|
}
|
|
43
47
|
|
|
@@ -477,6 +481,7 @@ export class Scheduler {
|
|
|
477
481
|
|
|
478
482
|
let lease: WorkspaceLease | null = null;
|
|
479
483
|
let lastResult: TaskResult | null = null;
|
|
484
|
+
let lastError: string | null = null;
|
|
480
485
|
const maxAttempts = node.policy.max_retries + 1;
|
|
481
486
|
const quotaBlocked = this.consumeQuotaForNode(ctx, node);
|
|
482
487
|
if (quotaBlocked) {
|
|
@@ -542,21 +547,33 @@ export class Scheduler {
|
|
|
542
547
|
payload: { workspace_id: lease.workspace_id, strategy: lease.strategy },
|
|
543
548
|
});
|
|
544
549
|
|
|
545
|
-
lastResult = await this.executeNode(node, lease, ctx, attempt, attemptId);
|
|
550
|
+
lastResult = await this.executeNode(node, lease, ctx, attempt, attemptId, { previousError: lastError });
|
|
546
551
|
|
|
547
552
|
if (lastResult.success) {
|
|
548
|
-
this.
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
553
|
+
const verifyResult = await this.verifyNode(node, lease, ctx, attemptId, attempt);
|
|
554
|
+
if (verifyResult && verifyResult.status === 'failed') {
|
|
555
|
+
lastResult = {
|
|
556
|
+
success: false,
|
|
557
|
+
failure_class: 'verify',
|
|
558
|
+
evidence: lastResult.evidence,
|
|
559
|
+
output: `Verification failed: ${(verifyResult.gaps || []).join('; ') || 'checks did not pass'}`,
|
|
560
|
+
};
|
|
561
|
+
} else {
|
|
562
|
+
this.emit(ctx, {
|
|
563
|
+
type: 'WorkItemCompleted',
|
|
564
|
+
work_item_id: nodeId,
|
|
565
|
+
attempt_id: attemptId,
|
|
566
|
+
payload: { attempt_number: attempt, evidence: lastResult.evidence },
|
|
567
|
+
});
|
|
568
|
+
status.set(nodeId, 'completed');
|
|
569
|
+
completed.push(nodeId);
|
|
570
|
+
this.recordProgress();
|
|
571
|
+
return;
|
|
572
|
+
}
|
|
558
573
|
}
|
|
559
574
|
|
|
575
|
+
lastError = lastResult.output || (lastResult.failure_class ?? 'unknown error');
|
|
576
|
+
|
|
560
577
|
if (lastResult.failure_class === 'policy') break;
|
|
561
578
|
|
|
562
579
|
if (attempt < maxAttempts) {
|
|
@@ -592,6 +609,7 @@ export class Scheduler {
|
|
|
592
609
|
evidence: [],
|
|
593
610
|
output: `[error_boundary] ${message}`,
|
|
594
611
|
};
|
|
612
|
+
lastError = lastResult.output;
|
|
595
613
|
if (attempt < maxAttempts) {
|
|
596
614
|
const backoffMs = Math.min(1_000 * Math.pow(2, attempt - 1) + Math.random() * 500, 30_000);
|
|
597
615
|
await new Promise<void>(resolve => setTimeout(resolve, backoffMs));
|
|
@@ -660,12 +678,13 @@ export class Scheduler {
|
|
|
660
678
|
lease: WorkspaceLease,
|
|
661
679
|
ctx: SchedulerContext,
|
|
662
680
|
attempt: number,
|
|
663
|
-
attemptId: string
|
|
681
|
+
attemptId: string,
|
|
682
|
+
options: { previousError?: string | null } = {},
|
|
664
683
|
): Promise<TaskResult> {
|
|
665
684
|
const primaryAction = pickPrimaryAction(node, ctx.pluginRegistry);
|
|
666
685
|
const provider = primaryAction ? ctx.pluginRegistry?.toolProviderFor(primaryAction.type) : null;
|
|
667
686
|
if (!provider || !primaryAction) {
|
|
668
|
-
return ctx.executor.execute(node, lease, ctx.runId, attempt);
|
|
687
|
+
return ctx.executor.execute(node, lease, ctx.runId, attempt, options);
|
|
669
688
|
}
|
|
670
689
|
|
|
671
690
|
ctx.auditTrail?.record('plugin_invoked', ctx.policyActor ?? 'runtime', {
|
|
@@ -733,6 +752,62 @@ export class Scheduler {
|
|
|
733
752
|
};
|
|
734
753
|
}
|
|
735
754
|
|
|
755
|
+
private async verifyNode(
|
|
756
|
+
node: GraphNode,
|
|
757
|
+
lease: WorkspaceLease,
|
|
758
|
+
ctx: SchedulerContext,
|
|
759
|
+
attemptId: string,
|
|
760
|
+
attempt: number,
|
|
761
|
+
): Promise<{ status: string; gaps?: string[] } | null> {
|
|
762
|
+
if (!node.verify?.command) return null;
|
|
763
|
+
this.emit(ctx, {
|
|
764
|
+
type: 'VerificationStarted',
|
|
765
|
+
work_item_id: node.id,
|
|
766
|
+
payload: { command: node.verify.command, attempt_number: attempt },
|
|
767
|
+
});
|
|
768
|
+
const suite: AcceptanceCheckSuite = {
|
|
769
|
+
checks: [{
|
|
770
|
+
id: `inline-${node.id}`,
|
|
771
|
+
type: 'custom',
|
|
772
|
+
command: node.verify.command,
|
|
773
|
+
evidence_type_expected: 'stdout',
|
|
774
|
+
acceptance_ref: null,
|
|
775
|
+
description: `Verify ${node.id}`,
|
|
776
|
+
}],
|
|
777
|
+
compiled_at: new Date().toISOString(),
|
|
778
|
+
spec_hash: '',
|
|
779
|
+
plan_hash: '',
|
|
780
|
+
};
|
|
781
|
+
let result: { status: string; gaps?: string[] };
|
|
782
|
+
try {
|
|
783
|
+
result = await verifyRun({
|
|
784
|
+
suite,
|
|
785
|
+
cwd: lease.root_path,
|
|
786
|
+
timeoutMs: (ctx.options as Record<string, unknown>)?.verifyTimeoutMs as number ?? 60_000,
|
|
787
|
+
runId: ctx.runId,
|
|
788
|
+
workItemId: node.id,
|
|
789
|
+
attemptNumber: attempt,
|
|
790
|
+
projectRoot: ctx.projectRoot,
|
|
791
|
+
pluginRegistry: ctx.pluginRegistry,
|
|
792
|
+
});
|
|
793
|
+
} catch (err) {
|
|
794
|
+
this.emit(ctx, {
|
|
795
|
+
type: 'VerificationCompleted',
|
|
796
|
+
work_item_id: node.id,
|
|
797
|
+
attempt_id: attemptId,
|
|
798
|
+
payload: { status: 'error', error: String(err) },
|
|
799
|
+
});
|
|
800
|
+
return null;
|
|
801
|
+
}
|
|
802
|
+
this.emit(ctx, {
|
|
803
|
+
type: 'VerificationCompleted',
|
|
804
|
+
work_item_id: node.id,
|
|
805
|
+
attempt_id: attemptId,
|
|
806
|
+
payload: { status: result.status },
|
|
807
|
+
});
|
|
808
|
+
return result;
|
|
809
|
+
}
|
|
810
|
+
|
|
736
811
|
private evaluatePolicyForNode(node: GraphNode, ctx: SchedulerContext): PersistedPolicyDecision | null {
|
|
737
812
|
if (!ctx.policyEngine) return null;
|
|
738
813
|
const primaryAction = pickPrimaryAction(node, ctx.pluginRegistry);
|
|
@@ -767,7 +842,10 @@ export class Scheduler {
|
|
|
767
842
|
ctx: SchedulerContext,
|
|
768
843
|
decision: PersistedPolicyDecision | null
|
|
769
844
|
): Promise<string> {
|
|
770
|
-
if (!ctx.gateManager)
|
|
845
|
+
if (!ctx.gateManager) {
|
|
846
|
+
console.warn('[scheduler] ctx.gateManager not configured — gates will not be persisted');
|
|
847
|
+
return 'gate-missing-manager';
|
|
848
|
+
}
|
|
771
849
|
const scope = inferGateScope(node);
|
|
772
850
|
const primaryAction = pickPrimaryAction(node, ctx.pluginRegistry);
|
|
773
851
|
const gate = await ctx.gateManager.request(scope, {
|