funolio-agent 0.17.9 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/approval.d.ts +7 -6
- package/dist/approval.d.ts.map +1 -1
- package/dist/approval.js +40 -10
- package/dist/approval.js.map +1 -1
- package/dist/auth/auto-detect.d.ts.map +1 -1
- package/dist/auth/auto-detect.js +11 -1
- package/dist/auth/auto-detect.js.map +1 -1
- package/dist/backfill.js +2 -2
- package/dist/backfill.js.map +1 -1
- package/dist/bot-manager.d.ts +2 -1
- package/dist/bot-manager.d.ts.map +1 -1
- package/dist/bot-manager.js +9 -2
- package/dist/bot-manager.js.map +1 -1
- package/dist/clerk-model.d.ts +0 -1
- package/dist/clerk-model.d.ts.map +1 -1
- package/dist/clerk-model.js +24 -50
- package/dist/clerk-model.js.map +1 -1
- package/dist/commands/configure-provider.js +2 -2
- package/dist/commands/configure-provider.js.map +1 -1
- package/dist/commands/configure.d.ts.map +1 -1
- package/dist/commands/configure.js +10 -14
- package/dist/commands/configure.js.map +1 -1
- package/dist/commands/start.d.ts.map +1 -1
- package/dist/commands/start.js +23 -4
- package/dist/commands/start.js.map +1 -1
- package/dist/config.d.ts +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +2 -2
- package/dist/config.js.map +1 -1
- package/dist/context-window.d.ts +2 -8
- package/dist/context-window.d.ts.map +1 -1
- package/dist/context-window.js +4 -4
- package/dist/context-window.js.map +1 -1
- package/dist/eval/orchestrator-front-door-replay.js +43 -2
- package/dist/eval/orchestrator-front-door-replay.js.map +1 -1
- package/dist/eval/orchestrator-todo-dispatch-replay.d.ts +2 -0
- package/dist/eval/orchestrator-todo-dispatch-replay.d.ts.map +1 -0
- package/dist/eval/orchestrator-todo-dispatch-replay.js +253 -0
- package/dist/eval/orchestrator-todo-dispatch-replay.js.map +1 -0
- package/dist/eval/orchestrator-todo-planning-replay.d.ts +2 -0
- package/dist/eval/orchestrator-todo-planning-replay.d.ts.map +1 -0
- package/dist/eval/orchestrator-todo-planning-replay.js +247 -0
- package/dist/eval/orchestrator-todo-planning-replay.js.map +1 -0
- package/dist/eval/policy-detection-replay.d.ts +2 -0
- package/dist/eval/policy-detection-replay.d.ts.map +1 -0
- package/dist/eval/policy-detection-replay.js +122 -0
- package/dist/eval/policy-detection-replay.js.map +1 -0
- package/dist/eval/todo-worker-runtime-replay.d.ts +2 -0
- package/dist/eval/todo-worker-runtime-replay.d.ts.map +1 -0
- package/dist/eval/todo-worker-runtime-replay.js +520 -0
- package/dist/eval/todo-worker-runtime-replay.js.map +1 -0
- package/dist/integration-tokens.d.ts +6 -0
- package/dist/integration-tokens.d.ts.map +1 -1
- package/dist/integration-tokens.js +43 -0
- package/dist/integration-tokens.js.map +1 -1
- package/dist/local-data.d.ts +128 -1
- package/dist/local-data.d.ts.map +1 -1
- package/dist/local-data.js +702 -18
- package/dist/local-data.js.map +1 -1
- package/dist/local-db.d.ts.map +1 -1
- package/dist/local-db.js +216 -12
- package/dist/local-db.js.map +1 -1
- package/dist/local-funnel.d.ts.map +1 -1
- package/dist/local-funnel.js +7 -0
- package/dist/local-funnel.js.map +1 -1
- package/dist/local-server.d.ts.map +1 -1
- package/dist/local-server.js +119 -96
- package/dist/local-server.js.map +1 -1
- package/dist/mcp/bridge-server.d.ts.map +1 -1
- package/dist/mcp/bridge-server.js +8 -2
- package/dist/mcp/bridge-server.js.map +1 -1
- package/dist/mcp/manager.d.ts +5 -0
- package/dist/mcp/manager.d.ts.map +1 -1
- package/dist/mcp/manager.js +36 -0
- package/dist/mcp/manager.js.map +1 -1
- package/dist/mcp/sync-cli-config.d.ts +5 -0
- package/dist/mcp/sync-cli-config.d.ts.map +1 -1
- package/dist/mcp/sync-cli-config.js +10 -2
- package/dist/mcp/sync-cli-config.js.map +1 -1
- package/dist/message-loop.d.ts +1 -0
- package/dist/message-loop.d.ts.map +1 -1
- package/dist/message-loop.js +122 -17
- package/dist/message-loop.js.map +1 -1
- package/dist/mqtt-client.d.ts +44 -0
- package/dist/mqtt-client.d.ts.map +1 -1
- package/dist/mqtt-client.js.map +1 -1
- package/dist/orchestration/front-door-policy.d.ts +26 -9
- package/dist/orchestration/front-door-policy.d.ts.map +1 -1
- package/dist/orchestration/front-door-policy.js +242 -69
- package/dist/orchestration/front-door-policy.js.map +1 -1
- package/dist/orchestration/orchestrator-blocked-prompt.d.ts +18 -0
- package/dist/orchestration/orchestrator-blocked-prompt.d.ts.map +1 -0
- package/dist/orchestration/orchestrator-blocked-prompt.js +46 -0
- package/dist/orchestration/orchestrator-blocked-prompt.js.map +1 -0
- package/dist/orchestration/orchestrator-final-response-prompt.d.ts +10 -0
- package/dist/orchestration/orchestrator-final-response-prompt.d.ts.map +1 -0
- package/dist/orchestration/orchestrator-final-response-prompt.js +39 -0
- package/dist/orchestration/orchestrator-final-response-prompt.js.map +1 -0
- package/dist/orchestration/orchestrator-operating-prompt.d.ts +11 -0
- package/dist/orchestration/orchestrator-operating-prompt.d.ts.map +1 -1
- package/dist/orchestration/orchestrator-operating-prompt.js +106 -36
- package/dist/orchestration/orchestrator-operating-prompt.js.map +1 -1
- package/dist/orchestration/policy-prompt.d.ts +6 -0
- package/dist/orchestration/policy-prompt.d.ts.map +1 -0
- package/dist/orchestration/policy-prompt.js +40 -0
- package/dist/orchestration/policy-prompt.js.map +1 -0
- package/dist/orchestration/worker-operating-prompt.d.ts +16 -0
- package/dist/orchestration/worker-operating-prompt.d.ts.map +1 -0
- package/dist/orchestration/worker-operating-prompt.js +75 -0
- package/dist/orchestration/worker-operating-prompt.js.map +1 -0
- package/dist/orchestrator.d.ts +19 -0
- package/dist/orchestrator.d.ts.map +1 -1
- package/dist/orchestrator.js +614 -54
- package/dist/orchestrator.js.map +1 -1
- package/dist/policy-detection.d.ts +40 -0
- package/dist/policy-detection.d.ts.map +1 -0
- package/dist/policy-detection.js +298 -0
- package/dist/policy-detection.js.map +1 -0
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +35 -6
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/claude-cli.d.ts.map +1 -1
- package/dist/providers/claude-cli.js +11 -2
- package/dist/providers/claude-cli.js.map +1 -1
- package/dist/providers/codex-cli.d.ts.map +1 -1
- package/dist/providers/codex-cli.js +121 -2
- package/dist/providers/codex-cli.js.map +1 -1
- package/dist/providers/index.d.ts +4 -0
- package/dist/providers/index.d.ts.map +1 -1
- package/dist/providers/index.js.map +1 -1
- package/dist/summarization-pipeline.d.ts +1 -4
- package/dist/summarization-pipeline.d.ts.map +1 -1
- package/dist/summarization-pipeline.js +43 -56
- package/dist/summarization-pipeline.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +2 -0
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/todo-tasks.d.ts +2 -0
- package/dist/tools/todo-tasks.d.ts.map +1 -1
- package/dist/tools/todo-tasks.js +203 -6
- package/dist/tools/todo-tasks.js.map +1 -1
- package/dist/types.d.ts +2 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/wizard-state.d.ts.map +1 -1
- package/dist/wizard-state.js +30 -8
- package/dist/wizard-state.js.map +1 -1
- package/dist/wizard-support.d.ts +2 -2
- package/dist/wizard-support.d.ts.map +1 -1
- package/dist/wizard-support.js +80 -93
- package/dist/wizard-support.js.map +1 -1
- package/dist/workflow-engine.d.ts +3 -0
- package/dist/workflow-engine.d.ts.map +1 -1
- package/dist/workflow-engine.js +111 -82
- package/dist/workflow-engine.js.map +1 -1
- package/package.json +5 -1
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
const fs_1 = __importDefault(require("fs"));
|
|
40
|
+
const path_1 = __importDefault(require("path"));
|
|
41
|
+
const clerk_model_1 = require("../clerk-model");
|
|
42
|
+
const data = __importStar(require("../local-data"));
|
|
43
|
+
const policy_detection_1 = require("../policy-detection");
|
|
44
|
+
function parseOutputPath(argv) {
|
|
45
|
+
const index = argv.findIndex((value) => value === '--output');
|
|
46
|
+
if (index === -1)
|
|
47
|
+
return null;
|
|
48
|
+
return argv[index + 1] || null;
|
|
49
|
+
}
|
|
50
|
+
async function main() {
|
|
51
|
+
const clerk = (0, clerk_model_1.getClerk)();
|
|
52
|
+
if (!clerk) {
|
|
53
|
+
throw new Error('Clerk is not configured. Configure a clerk model before running policy replay.');
|
|
54
|
+
}
|
|
55
|
+
const agentNames = data.listAgentProfiles().map((agent) => agent.name).filter(Boolean);
|
|
56
|
+
const currentPolicy = data.getEffectiveOrchestrationPolicy();
|
|
57
|
+
const prompts = [
|
|
58
|
+
{ label: 'no_policy', prompt: 'Code this responsive landing page.' },
|
|
59
|
+
{ label: 'deploy_permission', prompt: "Don't deploy without my permission." },
|
|
60
|
+
{ label: 'default_coder_project', prompt: 'For this project, Ben should code by default.' },
|
|
61
|
+
{ label: 'mixed_work_and_policy', prompt: "Build this with Ben and John, and don't deploy without my permission." },
|
|
62
|
+
{ label: 'subtle_release_guard', prompt: 'Releases need my green light first.' },
|
|
63
|
+
{ label: 'status_heartbeat', prompt: 'Keep me updated every 30 seconds.' },
|
|
64
|
+
{ label: 'multiple_defaults', prompt: 'For this project, John should QA by default and Brain should review ideas by default.' },
|
|
65
|
+
{ label: 'rename_only', prompt: 'Call you O from now on.' },
|
|
66
|
+
];
|
|
67
|
+
const results = [];
|
|
68
|
+
for (const entry of prompts) {
|
|
69
|
+
const codeSignal = (0, policy_detection_1.detectPolicySignals)(entry.prompt, { agentNames });
|
|
70
|
+
let interpreted = null;
|
|
71
|
+
try {
|
|
72
|
+
interpreted = await clerk.interpretPolicyUpdate({
|
|
73
|
+
prompt: entry.prompt,
|
|
74
|
+
currentPolicy,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
catch {
|
|
78
|
+
interpreted = null;
|
|
79
|
+
}
|
|
80
|
+
const mergedPatch = (0, policy_detection_1.sanitizePolicyPatch)(entry.prompt, {
|
|
81
|
+
...(0, policy_detection_1.extractPolicyPatchFromPrompt)(entry.prompt, currentPolicy, { agentNames }),
|
|
82
|
+
...(interpreted?.patch || {}),
|
|
83
|
+
}, currentPolicy);
|
|
84
|
+
results.push({
|
|
85
|
+
label: entry.label,
|
|
86
|
+
prompt: entry.prompt,
|
|
87
|
+
codeSignals: codeSignal,
|
|
88
|
+
clerkPolicyChange: interpreted?.policyChange ?? false,
|
|
89
|
+
clerkScope: interpreted?.scope ?? null,
|
|
90
|
+
clerkSummaryLines: interpreted?.summaryLines ?? [],
|
|
91
|
+
clerkPatch: interpreted?.patch ?? {},
|
|
92
|
+
fallbackSummaryLines: (0, policy_detection_1.summarizePolicyPatch)(interpreted?.patch ?? {}),
|
|
93
|
+
stagedPatchPreview: mergedPatch,
|
|
94
|
+
stagedSummaryPreview: (0, policy_detection_1.summarizePolicyPatch)(mergedPatch),
|
|
95
|
+
turnPathWouldCallClerk: codeSignal.hasSignals,
|
|
96
|
+
summaryBackstopWouldCallClerk: !codeSignal.hasSignals,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
const summary = {
|
|
100
|
+
generatedAt: new Date().toISOString(),
|
|
101
|
+
agentNames,
|
|
102
|
+
samples: results.length,
|
|
103
|
+
positivesByCode: results.filter((row) => row.codeSignals.hasSignals).length,
|
|
104
|
+
positivesByClerk: results.filter((row) => row.clerkPolicyChange).length,
|
|
105
|
+
results,
|
|
106
|
+
};
|
|
107
|
+
const outputPath = parseOutputPath(process.argv.slice(2));
|
|
108
|
+
const payload = JSON.stringify(summary, null, 2);
|
|
109
|
+
if (outputPath) {
|
|
110
|
+
const resolved = path_1.default.resolve(process.cwd(), outputPath);
|
|
111
|
+
fs_1.default.writeFileSync(resolved, payload, 'utf8');
|
|
112
|
+
console.log(`Wrote ${resolved}`);
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
console.log(payload);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
main().catch((err) => {
|
|
119
|
+
console.error(err instanceof Error ? err.message : String(err));
|
|
120
|
+
process.exitCode = 1;
|
|
121
|
+
});
|
|
122
|
+
//# sourceMappingURL=policy-detection-replay.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"policy-detection-replay.js","sourceRoot":"","sources":["../../src/eval/policy-detection-replay.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,4CAAoB;AACpB,gDAAwB;AACxB,gDAA0C;AAC1C,oDAAsC;AACtC,0DAK6B;AAE7B,SAAS,eAAe,CAAC,IAAc;IACrC,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,UAAU,CAAC,CAAC;IAC9D,IAAI,KAAK,KAAK,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IAC9B,OAAO,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC;AACjC,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,MAAM,KAAK,GAAG,IAAA,sBAAQ,GAAE,CAAC;IACzB,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,gFAAgF,CAAC,CAAC;IACpG,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACvF,MAAM,aAAa,GAAG,IAAI,CAAC,+BAA+B,EAAE,CAAC;IAC7D,MAAM,OAAO,GAAG;QACd,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,oCAAoC,EAAE;QACpE,EAAE,KAAK,EAAE,mBAAmB,EAAE,MAAM,EAAE,qCAAqC,EAAE;QAC7E,EAAE,KAAK,EAAE,uBAAuB,EAAE,MAAM,EAAE,+CAA+C,EAAE;QAC3F,EAAE,KAAK,EAAE,uBAAuB,EAAE,MAAM,EAAE,uEAAuE,EAAE;QACnH,EAAE,KAAK,EAAE,sBAAsB,EAAE,MAAM,EAAE,qCAAqC,EAAE;QAChF,EAAE,KAAK,EAAE,kBAAkB,EAAE,MAAM,EAAE,mCAAmC,EAAE;QAC1E,EAAE,KAAK,EAAE,mBAAmB,EAAE,MAAM,EAAE,uFAAuF,EAAE;QAC/H,EAAE,KAAK,EAAE,aAAa,EAAE,MAAM,EAAE,yBAAyB,EAAE;KAC5D,CAAC;IAEF,MAAM,OAAO,GAAU,EAAE,CAAC;IAC1B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,UAAU,GAAG,IAAA,sCAAmB,EAAC,KAAK,CAAC,MAAM,EAAE,EAAE,UAAU,EAAE,CAAC,CAAC;QACrE,IAAI,WAAW,GAAG,IAAsE,CAAC;QACzF,IAAI,CAAC;YACH,WAAW,GAAG,MAAM,KAAK,CAAC,qBAAqB,CAAC;gBAC9C,MAAM,EAAE,KAAK,CAAC,MAAM;gBACpB,aAAa;aACd,CAAC,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACP,WAAW,GAAG,IAAI,CAAC;QACrB,CAAC;QACD,MAAM,WAAW,GAAG,IAAA,sCAAmB,EACrC,KAAK,CAAC,MAAM,EACZ;YACE,GAAG,IAAA,+CAA4B,EAAC,KAAK,CAAC,MAAM,EAAE,aAAa,EAAE,EAAE,UAAU,EAAE,CAAC;YAC5E,GAAG,CAAC,WAAW,EAAE,KAAK,IAAI,EAAE,CAAC;SAC9B,EACD,aAAa,CACd,CAAC;QACF,OAAO,CAAC,IAAI,CAAC;YACX,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,WAAW,EAAE,UAAU;YACvB,iBAAiB,EAAE,WAAW,EAAE,YAAY,IAAI,KAAK;YACrD,UAAU,EAAE,WAAW,EAAE,KAAK,IAAI,IAAI;YACtC,iBAAiB,EAAE,WAAW,EAAE,YAAY,IAAI,EAAE;YAClD,UAAU,EAAE,WAAW,EAAE,KAAK,IAAI,EAAE;YACpC,oBAAoB,EAAE,IAAA,uCAAoB,EAAC,WAAW,EAAE,KAAK,IAAI,EAAE,CAAC;YACpE,kBAAkB,EAAE,WAAW;YAC/B,oBAAoB,EAAE,IAAA,uCAAoB,EAAC,WAAW,CAAC;YACvD,sBAAsB,EAAE,UAAU,CAAC,UAAU;YAC7C,6BAA6B,EAAE,CAAC,UAAU,CAAC,UAAU;SACtD,CAAC,CAAC;IACL,CAAC;IAED,MAAM,OAAO,GAAG;QACd,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACrC,UAAU;QACV,OAAO,EAAE,OAAO,CAAC,MAAM;QACvB,eAAe,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC,MAAM;QAC3E,gBAAgB,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC,MAAM;QACvE,OAAO;KACR,CAAC;IAEF,MAAM,UAAU,GAAG,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1D,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IACjD,IAAI,UAAU,EAAE,CAAC;QACf,MAAM,QAAQ,GAAG,cAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,UAAU,CAAC,CAAC;QACzD,YAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;QAC5C,OAAO,CAAC,GAAG,CAAC,SAAS,QAAQ,EAAE,CAAC,CAAC;IACnC,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACvB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;IAChE,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;AACvB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"todo-worker-runtime-replay.d.ts","sourceRoot":"","sources":["../../src/eval/todo-worker-runtime-replay.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
const fs = __importStar(require("fs"));
|
|
37
|
+
const os = __importStar(require("os"));
|
|
38
|
+
const path = __importStar(require("path"));
|
|
39
|
+
const data = __importStar(require("../local-data"));
|
|
40
|
+
const todo_tasks_1 = require("../tools/todo-tasks");
|
|
41
|
+
function parseArgs(argv) {
|
|
42
|
+
const args = {};
|
|
43
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
44
|
+
if (argv[i] === '--output' && argv[i + 1]) {
|
|
45
|
+
args.output = argv[i + 1];
|
|
46
|
+
i += 1;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return args;
|
|
50
|
+
}
|
|
51
|
+
function setupTempDb() {
|
|
52
|
+
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'funolio-todo-runtime-eval-'));
|
|
53
|
+
process.env.FUNOLIO_LOCAL_DB_PATH = path.join(tempRoot, 'local.db');
|
|
54
|
+
data.closeDb();
|
|
55
|
+
return tempRoot;
|
|
56
|
+
}
|
|
57
|
+
function teardownTempDb(tempRoot) {
|
|
58
|
+
data.closeDb();
|
|
59
|
+
delete process.env.FUNOLIO_LOCAL_DB_PATH;
|
|
60
|
+
fs.rmSync(tempRoot, { recursive: true, force: true });
|
|
61
|
+
}
|
|
62
|
+
function seedProgrammingContext() {
|
|
63
|
+
const ben = data.createAgentProfile({ provider: 'claude-cli', model: 'claude-opus-4-6', name: 'Ben', roleClass: 'coding' });
|
|
64
|
+
const john = data.createAgentProfile({ provider: 'codex-cli', model: 'gpt-5.2-codex', name: 'John', roleClass: 'qa' });
|
|
65
|
+
const brain = data.createAgentProfile({ provider: 'openai', model: 'gpt-5.2-chat-latest', name: 'Brain', roleClass: 'brainstorm', isOrchestrator: true });
|
|
66
|
+
const project = data.createProject({ name: 'Runtime Eval Project' });
|
|
67
|
+
const conversation = data.createConversation(brain.id, 'Runtime Eval Conversation', 'local', { projectId: project.id });
|
|
68
|
+
return { ben, john, brain, project, conversation };
|
|
69
|
+
}
|
|
70
|
+
async function runScenarios() {
|
|
71
|
+
const scenarios = [];
|
|
72
|
+
{
|
|
73
|
+
const tempRoot = setupTempDb();
|
|
74
|
+
try {
|
|
75
|
+
const { ben, brain, project, conversation } = seedProgrammingContext();
|
|
76
|
+
const task = data.addTodoTask({
|
|
77
|
+
projectId: project.id,
|
|
78
|
+
conversationId: conversation.id,
|
|
79
|
+
title: 'Implement the change',
|
|
80
|
+
details: 'Write the requested feature.',
|
|
81
|
+
participants: [ben.name],
|
|
82
|
+
successCriteria: 'Feature is implemented',
|
|
83
|
+
ownerBotId: ben.id,
|
|
84
|
+
ownerName: ben.name,
|
|
85
|
+
taskType: 'coding',
|
|
86
|
+
actor: { actorType: 'orchestrator', actorId: brain.id },
|
|
87
|
+
});
|
|
88
|
+
const result = data.completeTodoTaskByWorker(task.id, {
|
|
89
|
+
outputSummary: 'Implemented the feature',
|
|
90
|
+
handoffPrompt: 'Tell the user the feature is ready.',
|
|
91
|
+
actor: { actorType: 'llm', actorId: ben.name },
|
|
92
|
+
});
|
|
93
|
+
scenarios.push({
|
|
94
|
+
name: 'return_to_orchestrator',
|
|
95
|
+
expected: 'Completing a worker TODO without insertion returns control to orchestrator and persists the completed task.',
|
|
96
|
+
pass: result.returnedToOrchestrator === true && !result.insertedTask && !data.getTodoTask(task.id, 'active'),
|
|
97
|
+
details: {
|
|
98
|
+
returnedToOrchestrator: result.returnedToOrchestrator,
|
|
99
|
+
completedState: data.getTodoTask(task.id, 'completed')?.state ?? null,
|
|
100
|
+
auditActions: data.listTodoAudit({ taskId: task.id, limit: 10 }).map((entry) => entry.action),
|
|
101
|
+
},
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
finally {
|
|
105
|
+
teardownTempDb(tempRoot);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
{
|
|
109
|
+
const tempRoot = setupTempDb();
|
|
110
|
+
try {
|
|
111
|
+
const { ben, john, brain, project, conversation } = seedProgrammingContext();
|
|
112
|
+
const first = data.addTodoTask({
|
|
113
|
+
projectId: project.id,
|
|
114
|
+
conversationId: conversation.id,
|
|
115
|
+
title: 'Brainstorm layout',
|
|
116
|
+
details: 'Produce a design direction.',
|
|
117
|
+
participants: [brain.name],
|
|
118
|
+
successCriteria: 'Direction chosen',
|
|
119
|
+
ownerBotId: brain.id,
|
|
120
|
+
ownerName: brain.name,
|
|
121
|
+
taskType: 'research',
|
|
122
|
+
nextWorkerBotId: ben.id,
|
|
123
|
+
nextWorkerName: ben.name,
|
|
124
|
+
nextWorkerRole: ben.role_class,
|
|
125
|
+
actor: { actorType: 'orchestrator', actorId: brain.id },
|
|
126
|
+
});
|
|
127
|
+
const later = data.addTodoTask({
|
|
128
|
+
projectId: project.id,
|
|
129
|
+
conversationId: conversation.id,
|
|
130
|
+
title: 'QA final result',
|
|
131
|
+
details: 'Check the final page.',
|
|
132
|
+
participants: [john.name],
|
|
133
|
+
successCriteria: 'QA complete',
|
|
134
|
+
ownerBotId: john.id,
|
|
135
|
+
ownerName: john.name,
|
|
136
|
+
taskType: 'qa',
|
|
137
|
+
actor: { actorType: 'orchestrator', actorId: brain.id },
|
|
138
|
+
});
|
|
139
|
+
const result = data.completeTodoTaskByWorker(first.id, {
|
|
140
|
+
outputSummary: 'Created a design brief',
|
|
141
|
+
insertTask: {
|
|
142
|
+
title: 'Build the page',
|
|
143
|
+
prompt: 'Code the page using the design brief.',
|
|
144
|
+
nextWorker: ben.name,
|
|
145
|
+
},
|
|
146
|
+
actor: { actorType: 'llm', actorId: brain.name },
|
|
147
|
+
});
|
|
148
|
+
scenarios.push({
|
|
149
|
+
name: 'insert_and_shift_order',
|
|
150
|
+
expected: 'Inserted task lands immediately after the completed task and later queued tasks shift down.',
|
|
151
|
+
pass: result.insertedTask?.position === 2 && data.getTodoTask(later.id)?.position === 3,
|
|
152
|
+
details: {
|
|
153
|
+
insertedTaskPosition: result.insertedTask?.position ?? null,
|
|
154
|
+
insertedTaskOwner: result.insertedTask?.owner_name ?? null,
|
|
155
|
+
shiftedTaskPosition: data.getTodoTask(later.id)?.position ?? null,
|
|
156
|
+
},
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
finally {
|
|
160
|
+
teardownTempDb(tempRoot);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
{
|
|
164
|
+
const tempRoot = setupTempDb();
|
|
165
|
+
try {
|
|
166
|
+
const { ben, brain, project, conversation } = seedProgrammingContext();
|
|
167
|
+
const task = data.addTodoTask({
|
|
168
|
+
projectId: project.id,
|
|
169
|
+
conversationId: conversation.id,
|
|
170
|
+
title: 'Implement the page',
|
|
171
|
+
details: 'Build the requested page.',
|
|
172
|
+
participants: [ben.name],
|
|
173
|
+
successCriteria: 'Page exists',
|
|
174
|
+
ownerBotId: ben.id,
|
|
175
|
+
ownerName: ben.name,
|
|
176
|
+
taskType: 'coding',
|
|
177
|
+
actor: { actorType: 'orchestrator', actorId: brain.id },
|
|
178
|
+
});
|
|
179
|
+
const result = data.blockTodoTaskByWorker(task.id, {
|
|
180
|
+
blockerSummary: 'The output path is not specified.',
|
|
181
|
+
checkedContext: 'Checked the current TODO, summary, and workspace.',
|
|
182
|
+
userQuestion: 'Where should the HTML file be written?',
|
|
183
|
+
artifactRefs: ['C:\\Users\\sp\\Desktop\\notes.txt'],
|
|
184
|
+
actor: { actorType: 'llm', actorId: ben.name },
|
|
185
|
+
});
|
|
186
|
+
scenarios.push({
|
|
187
|
+
name: 'worker_blocked_handoff',
|
|
188
|
+
expected: 'A blocked worker should persist blocker details and return a structured orchestrator handoff without completing the TODO.',
|
|
189
|
+
pass: result.returnedToOrchestrator === true
|
|
190
|
+
&& result.blockedTask.state === 'active'
|
|
191
|
+
&& result.blockedTask.blocker_summary === 'The output path is not specified.'
|
|
192
|
+
&& /Where should the HTML file be written\?/i.test(result.orchestratorPrompt),
|
|
193
|
+
details: {
|
|
194
|
+
returnedToOrchestrator: result.returnedToOrchestrator,
|
|
195
|
+
blockerSummary: result.blockedTask.blocker_summary,
|
|
196
|
+
blockerQuestion: result.blockedTask.blocker_question,
|
|
197
|
+
},
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
finally {
|
|
201
|
+
teardownTempDb(tempRoot);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
{
|
|
205
|
+
const tempRoot = setupTempDb();
|
|
206
|
+
try {
|
|
207
|
+
const { ben, john, brain, project, conversation } = seedProgrammingContext();
|
|
208
|
+
const task = data.addTodoTask({
|
|
209
|
+
projectId: project.id,
|
|
210
|
+
conversationId: conversation.id,
|
|
211
|
+
title: 'Brainstorm layout',
|
|
212
|
+
details: 'Produce a design direction.',
|
|
213
|
+
participants: [brain.name],
|
|
214
|
+
successCriteria: 'Direction chosen',
|
|
215
|
+
ownerBotId: brain.id,
|
|
216
|
+
ownerName: brain.name,
|
|
217
|
+
taskType: 'research',
|
|
218
|
+
nextWorkerBotId: ben.id,
|
|
219
|
+
nextWorkerName: ben.name,
|
|
220
|
+
nextWorkerRole: ben.role_class,
|
|
221
|
+
actor: { actorType: 'orchestrator', actorId: brain.id },
|
|
222
|
+
});
|
|
223
|
+
let error = '';
|
|
224
|
+
try {
|
|
225
|
+
data.completeTodoTaskByWorker(task.id, {
|
|
226
|
+
outputSummary: 'Ready for next step',
|
|
227
|
+
insertTask: {
|
|
228
|
+
title: 'Unexpected QA',
|
|
229
|
+
prompt: 'Jump straight to QA.',
|
|
230
|
+
nextWorker: john.name,
|
|
231
|
+
},
|
|
232
|
+
actor: { actorType: 'llm', actorId: brain.name },
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
catch (err) {
|
|
236
|
+
error = err.message;
|
|
237
|
+
}
|
|
238
|
+
scenarios.push({
|
|
239
|
+
name: 'reject_unapproved_next_worker',
|
|
240
|
+
expected: 'Worker cannot insert a task for an unapproved next worker.',
|
|
241
|
+
pass: /not allowed/i.test(error),
|
|
242
|
+
details: { error },
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
finally {
|
|
246
|
+
teardownTempDb(tempRoot);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
{
|
|
250
|
+
const tempRoot = setupTempDb();
|
|
251
|
+
try {
|
|
252
|
+
const { ben, john, brain, project, conversation } = seedProgrammingContext();
|
|
253
|
+
const coding = data.addTodoTask({
|
|
254
|
+
projectId: project.id,
|
|
255
|
+
conversationId: conversation.id,
|
|
256
|
+
title: 'Code the page',
|
|
257
|
+
details: 'Implement the requested page.',
|
|
258
|
+
participants: [ben.name],
|
|
259
|
+
successCriteria: 'Page exists',
|
|
260
|
+
ownerBotId: ben.id,
|
|
261
|
+
ownerName: ben.name,
|
|
262
|
+
taskType: 'coding',
|
|
263
|
+
nextWorkerBotId: john.id,
|
|
264
|
+
nextWorkerName: john.name,
|
|
265
|
+
nextWorkerRole: john.role_class,
|
|
266
|
+
actor: { actorType: 'orchestrator', actorId: brain.id },
|
|
267
|
+
});
|
|
268
|
+
const qa = data.addTodoTask({
|
|
269
|
+
projectId: project.id,
|
|
270
|
+
conversationId: conversation.id,
|
|
271
|
+
title: 'QA the page',
|
|
272
|
+
details: 'Verify the implementation.',
|
|
273
|
+
participants: [john.name],
|
|
274
|
+
successCriteria: 'QA done',
|
|
275
|
+
ownerBotId: john.id,
|
|
276
|
+
ownerName: john.name,
|
|
277
|
+
taskType: 'qa',
|
|
278
|
+
actor: { actorType: 'orchestrator', actorId: brain.id },
|
|
279
|
+
});
|
|
280
|
+
data.completeTodoTaskByWorker(coding.id, {
|
|
281
|
+
outputSummary: 'Built the page',
|
|
282
|
+
actor: { actorType: 'llm', actorId: ben.name },
|
|
283
|
+
});
|
|
284
|
+
const qaResult = data.completeTodoTaskByWorker(qa.id, {
|
|
285
|
+
outputSummary: 'Found two defects',
|
|
286
|
+
insertTask: {
|
|
287
|
+
title: 'Fix QA findings',
|
|
288
|
+
prompt: 'Fix the two layout defects and keep the rest unchanged.',
|
|
289
|
+
nextWorker: ben.name,
|
|
290
|
+
taskType: 'coding',
|
|
291
|
+
},
|
|
292
|
+
actor: { actorType: 'llm', actorId: john.name },
|
|
293
|
+
});
|
|
294
|
+
scenarios.push({
|
|
295
|
+
name: 'qa_back_to_previous_worker',
|
|
296
|
+
expected: 'QA can insert a fix task back to the previous worker using completed-task history.',
|
|
297
|
+
pass: qaResult.insertedTask?.owner_name === ben.name && qaResult.insertedTask?.task_type === 'coding',
|
|
298
|
+
details: {
|
|
299
|
+
insertedTaskOwner: qaResult.insertedTask?.owner_name ?? null,
|
|
300
|
+
insertedTaskType: qaResult.insertedTask?.task_type ?? null,
|
|
301
|
+
insertedTaskPosition: qaResult.insertedTask?.position ?? null,
|
|
302
|
+
},
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
finally {
|
|
306
|
+
teardownTempDb(tempRoot);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
{
|
|
310
|
+
const tempRoot = setupTempDb();
|
|
311
|
+
try {
|
|
312
|
+
const { ben, john, brain, project, conversation } = seedProgrammingContext();
|
|
313
|
+
const coding = data.addTodoTask({
|
|
314
|
+
projectId: project.id,
|
|
315
|
+
conversationId: conversation.id,
|
|
316
|
+
title: 'Code the page',
|
|
317
|
+
details: 'Implement the requested page.',
|
|
318
|
+
participants: [ben.name],
|
|
319
|
+
successCriteria: 'Page exists',
|
|
320
|
+
ownerBotId: ben.id,
|
|
321
|
+
ownerName: ben.name,
|
|
322
|
+
taskType: 'coding',
|
|
323
|
+
nextWorkerBotId: john.id,
|
|
324
|
+
nextWorkerName: john.name,
|
|
325
|
+
nextWorkerRole: john.role_class,
|
|
326
|
+
actor: { actorType: 'orchestrator', actorId: brain.id },
|
|
327
|
+
});
|
|
328
|
+
const codingResult = data.completeTodoTaskByWorker(coding.id, {
|
|
329
|
+
outputSummary: 'Initial build complete',
|
|
330
|
+
artifactRefs: ['C:\\Users\\sp\\Desktop\\funtest.html'],
|
|
331
|
+
insertTask: {
|
|
332
|
+
title: 'QA the page',
|
|
333
|
+
prompt: 'QA the generated HTML file.',
|
|
334
|
+
nextWorker: john.name,
|
|
335
|
+
},
|
|
336
|
+
actor: { actorType: 'llm', actorId: ben.name },
|
|
337
|
+
});
|
|
338
|
+
const qaTask = codingResult.insertedTask;
|
|
339
|
+
const qaResult = data.completeTodoTaskByWorker(qaTask.id, {
|
|
340
|
+
outputSummary: 'Logged findings',
|
|
341
|
+
artifactRefs: [
|
|
342
|
+
'C:\\Users\\sp\\Desktop\\funtest.html',
|
|
343
|
+
'C:\\Users\\sp\\Desktop\\qa-report.txt',
|
|
344
|
+
],
|
|
345
|
+
actor: { actorType: 'llm', actorId: john.name },
|
|
346
|
+
});
|
|
347
|
+
scenarios.push({
|
|
348
|
+
name: 'artifact_dedupe_and_carry_forward',
|
|
349
|
+
expected: 'Artifacts carry forward and duplicate references collapse to a single artifact row.',
|
|
350
|
+
pass: qaResult.artifacts.length === 2 && qaResult.completedTask.artifact_ids.length === 2,
|
|
351
|
+
details: {
|
|
352
|
+
artifactCount: qaResult.artifacts.length,
|
|
353
|
+
artifactRefs: qaResult.artifacts.map((item) => item.path_or_ref),
|
|
354
|
+
completedArtifactIds: qaResult.completedTask.artifact_ids,
|
|
355
|
+
},
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
finally {
|
|
359
|
+
teardownTempDb(tempRoot);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
{
|
|
363
|
+
const tempRoot = setupTempDb();
|
|
364
|
+
try {
|
|
365
|
+
const { ben, john, brain, project, conversation } = seedProgrammingContext();
|
|
366
|
+
const task = data.addTodoTask({
|
|
367
|
+
projectId: project.id,
|
|
368
|
+
conversationId: conversation.id,
|
|
369
|
+
title: 'Implement the change',
|
|
370
|
+
details: 'Write the requested feature.',
|
|
371
|
+
participants: [ben.name],
|
|
372
|
+
successCriteria: 'Feature is implemented',
|
|
373
|
+
ownerBotId: ben.id,
|
|
374
|
+
ownerName: ben.name,
|
|
375
|
+
taskType: 'coding',
|
|
376
|
+
actor: { actorType: 'orchestrator', actorId: brain.id },
|
|
377
|
+
});
|
|
378
|
+
let error = '';
|
|
379
|
+
try {
|
|
380
|
+
data.completeTodoTaskByWorker(task.id, {
|
|
381
|
+
outputSummary: 'Tried to complete someone else’s work',
|
|
382
|
+
actor: { actorType: 'llm', actorId: john.name },
|
|
383
|
+
});
|
|
384
|
+
}
|
|
385
|
+
catch (err) {
|
|
386
|
+
error = err.message;
|
|
387
|
+
}
|
|
388
|
+
scenarios.push({
|
|
389
|
+
name: 'reject_non_owner_completion',
|
|
390
|
+
expected: 'Only the assigned owner can complete a worker TODO.',
|
|
391
|
+
pass: /not the owner/i.test(error),
|
|
392
|
+
details: { error },
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
finally {
|
|
396
|
+
teardownTempDb(tempRoot);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
{
|
|
400
|
+
const tempRoot = setupTempDb();
|
|
401
|
+
try {
|
|
402
|
+
const { ben, brain, project, conversation } = seedProgrammingContext();
|
|
403
|
+
const task = data.addTodoTask({
|
|
404
|
+
projectId: project.id,
|
|
405
|
+
conversationId: conversation.id,
|
|
406
|
+
title: 'Implement the change',
|
|
407
|
+
details: 'Write the requested feature.',
|
|
408
|
+
participants: [ben.name],
|
|
409
|
+
successCriteria: 'Feature is implemented',
|
|
410
|
+
ownerBotId: ben.id,
|
|
411
|
+
ownerName: ben.name,
|
|
412
|
+
taskType: 'coding',
|
|
413
|
+
actor: { actorType: 'orchestrator', actorId: brain.id },
|
|
414
|
+
});
|
|
415
|
+
const toolResult = await todo_tasks_1.completeWorkerTaskTool.execute({
|
|
416
|
+
task_id: task.id,
|
|
417
|
+
output_summary: 'Implemented through tool path',
|
|
418
|
+
handoff_prompt: 'Return results to orchestrator',
|
|
419
|
+
artifact_refs: ['C:\\Users\\sp\\Desktop\\tool-output.txt'],
|
|
420
|
+
}, {
|
|
421
|
+
projectDir: process.cwd(),
|
|
422
|
+
projectId: project.id,
|
|
423
|
+
commandTimeout: 60,
|
|
424
|
+
maxCommandTimeout: 300,
|
|
425
|
+
maxFileSize: 1024 * 1024,
|
|
426
|
+
actorType: 'llm',
|
|
427
|
+
actorId: ben.name,
|
|
428
|
+
});
|
|
429
|
+
const parsed = toolResult.success ? JSON.parse(toolResult.output) : null;
|
|
430
|
+
scenarios.push({
|
|
431
|
+
name: 'tool_wrapper_complete_worker_task',
|
|
432
|
+
expected: 'The worker completion tool should reach the same runtime path and return orchestrator handoff data.',
|
|
433
|
+
pass: toolResult.success === true && parsed?.returnedToOrchestrator === true,
|
|
434
|
+
details: {
|
|
435
|
+
success: toolResult.success,
|
|
436
|
+
error: toolResult.error ?? null,
|
|
437
|
+
returnedToOrchestrator: parsed?.returnedToOrchestrator ?? null,
|
|
438
|
+
},
|
|
439
|
+
});
|
|
440
|
+
}
|
|
441
|
+
finally {
|
|
442
|
+
teardownTempDb(tempRoot);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
{
|
|
446
|
+
const tempRoot = setupTempDb();
|
|
447
|
+
try {
|
|
448
|
+
const { ben, brain, project, conversation } = seedProgrammingContext();
|
|
449
|
+
const task = data.addTodoTask({
|
|
450
|
+
projectId: project.id,
|
|
451
|
+
conversationId: conversation.id,
|
|
452
|
+
title: 'Implement the change',
|
|
453
|
+
details: 'Write the requested feature.',
|
|
454
|
+
participants: [ben.name],
|
|
455
|
+
successCriteria: 'Feature is implemented',
|
|
456
|
+
ownerBotId: ben.id,
|
|
457
|
+
ownerName: ben.name,
|
|
458
|
+
taskType: 'coding',
|
|
459
|
+
actor: { actorType: 'orchestrator', actorId: brain.id },
|
|
460
|
+
});
|
|
461
|
+
const toolResult = await todo_tasks_1.blockWorkerTaskTool.execute({
|
|
462
|
+
task_id: task.id,
|
|
463
|
+
blocker_summary: 'Missing destination folder.',
|
|
464
|
+
checked_context: 'Checked the task, summary, and local files.',
|
|
465
|
+
user_question: 'Which folder should receive the output file?',
|
|
466
|
+
}, {
|
|
467
|
+
projectDir: process.cwd(),
|
|
468
|
+
projectId: project.id,
|
|
469
|
+
commandTimeout: 60,
|
|
470
|
+
maxCommandTimeout: 300,
|
|
471
|
+
maxFileSize: 1024 * 1024,
|
|
472
|
+
actorType: 'llm',
|
|
473
|
+
actorId: ben.name,
|
|
474
|
+
});
|
|
475
|
+
const parsed = toolResult.success ? JSON.parse(toolResult.output) : null;
|
|
476
|
+
scenarios.push({
|
|
477
|
+
name: 'tool_wrapper_block_worker_task',
|
|
478
|
+
expected: 'The blocked worker tool should persist the blocker and return orchestrator handoff data.',
|
|
479
|
+
pass: toolResult.success === true
|
|
480
|
+
&& parsed?.returnedToOrchestrator === true
|
|
481
|
+
&& parsed?.blockedTask?.blocker_summary === 'Missing destination folder.',
|
|
482
|
+
details: {
|
|
483
|
+
success: toolResult.success,
|
|
484
|
+
error: toolResult.error ?? null,
|
|
485
|
+
returnedToOrchestrator: parsed?.returnedToOrchestrator ?? null,
|
|
486
|
+
blockerSummary: parsed?.blockedTask?.blocker_summary ?? null,
|
|
487
|
+
},
|
|
488
|
+
});
|
|
489
|
+
}
|
|
490
|
+
finally {
|
|
491
|
+
teardownTempDb(tempRoot);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
return scenarios;
|
|
495
|
+
}
|
|
496
|
+
async function main() {
|
|
497
|
+
const args = parseArgs(process.argv.slice(2));
|
|
498
|
+
const scenarios = await runScenarios();
|
|
499
|
+
const passed = scenarios.filter((item) => item.pass).length;
|
|
500
|
+
const report = {
|
|
501
|
+
generatedAt: new Date().toISOString(),
|
|
502
|
+
slice: 'todo-worker-runtime',
|
|
503
|
+
passed,
|
|
504
|
+
total: scenarios.length,
|
|
505
|
+
success: passed === scenarios.length,
|
|
506
|
+
scenarios,
|
|
507
|
+
};
|
|
508
|
+
const serialized = JSON.stringify(report, null, 2);
|
|
509
|
+
if (args.output) {
|
|
510
|
+
fs.writeFileSync(path.resolve(args.output), serialized, 'utf8');
|
|
511
|
+
}
|
|
512
|
+
else {
|
|
513
|
+
console.log(serialized);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
main().catch((err) => {
|
|
517
|
+
console.error(err);
|
|
518
|
+
process.exit(1);
|
|
519
|
+
});
|
|
520
|
+
//# sourceMappingURL=todo-worker-runtime-replay.js.map
|