task-o-matic 0.0.7 ā 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +286 -23
- package/dist/commands/benchmark.d.ts +3 -0
- package/dist/commands/benchmark.d.ts.map +1 -0
- package/dist/commands/benchmark.js +569 -0
- package/dist/commands/prd.d.ts.map +1 -1
- package/dist/commands/prd.js +203 -9
- package/dist/commands/tasks/execute-loop.d.ts +3 -0
- package/dist/commands/tasks/execute-loop.d.ts.map +1 -0
- package/dist/commands/tasks/execute-loop.js +118 -0
- package/dist/commands/tasks/index.d.ts +1 -0
- package/dist/commands/tasks/index.d.ts.map +1 -1
- package/dist/commands/tasks/index.js +1 -0
- package/dist/commands/tasks.d.ts.map +1 -1
- package/dist/commands/tasks.js +1 -0
- package/dist/commands/workflow.d.ts.map +1 -1
- package/dist/commands/workflow.js +491 -331
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/lib/ai-service/ai-operations.d.ts +5 -0
- package/dist/lib/ai-service/ai-operations.d.ts.map +1 -1
- package/dist/lib/ai-service/ai-operations.js +167 -0
- package/dist/lib/benchmark/registry.d.ts +11 -0
- package/dist/lib/benchmark/registry.d.ts.map +1 -0
- package/dist/lib/benchmark/registry.js +89 -0
- package/dist/lib/benchmark/runner.d.ts +6 -0
- package/dist/lib/benchmark/runner.d.ts.map +1 -0
- package/dist/lib/benchmark/runner.js +150 -0
- package/dist/lib/benchmark/storage.d.ts +13 -0
- package/dist/lib/benchmark/storage.d.ts.map +1 -0
- package/dist/lib/benchmark/storage.js +99 -0
- package/dist/lib/benchmark/types.d.ts +104 -0
- package/dist/lib/benchmark/types.d.ts.map +1 -0
- package/dist/lib/benchmark/types.js +2 -0
- package/dist/lib/index.d.ts +9 -0
- package/dist/lib/index.d.ts.map +1 -1
- package/dist/lib/index.js +7 -1
- package/dist/lib/prompt-registry.d.ts.map +1 -1
- package/dist/lib/prompt-registry.js +23 -0
- package/dist/lib/task-loop-execution.d.ts +25 -0
- package/dist/lib/task-loop-execution.d.ts.map +1 -0
- package/dist/lib/task-loop-execution.js +473 -0
- package/dist/prompts/index.d.ts +7 -6
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +1 -0
- package/dist/prompts/prd-question.d.ts +3 -0
- package/dist/prompts/prd-question.d.ts.map +1 -0
- package/dist/prompts/prd-question.js +40 -0
- package/dist/services/benchmark.d.ts +12 -0
- package/dist/services/benchmark.d.ts.map +1 -0
- package/dist/services/benchmark.js +18 -0
- package/dist/services/prd.d.ts +25 -0
- package/dist/services/prd.d.ts.map +1 -1
- package/dist/services/prd.js +224 -29
- package/dist/services/tasks.d.ts.map +1 -1
- package/dist/services/tasks.js +90 -3
- package/dist/services/workflow-benchmark.d.ts +34 -0
- package/dist/services/workflow-benchmark.d.ts.map +1 -0
- package/dist/services/workflow-benchmark.js +317 -0
- package/dist/services/workflow.d.ts +85 -0
- package/dist/services/workflow.d.ts.map +1 -0
- package/dist/services/workflow.js +476 -0
- package/dist/test/task-loop-git.test.d.ts +2 -0
- package/dist/test/task-loop-git.test.d.ts.map +1 -0
- package/dist/test/task-loop-git.test.js +62 -0
- package/dist/types/index.d.ts +53 -0
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/options.d.ts +2 -1
- package/dist/types/options.d.ts.map +1 -1
- package/dist/types/options.js +16 -0
- package/dist/types/results.d.ts +29 -1
- package/dist/types/results.d.ts.map +1 -1
- package/dist/types/workflow-options.d.ts +45 -0
- package/dist/types/workflow-options.d.ts.map +1 -0
- package/dist/types/workflow-options.js +2 -0
- package/dist/types/workflow-results.d.ts +82 -0
- package/dist/types/workflow-results.d.ts.map +1 -0
- package/dist/types/workflow-results.js +2 -0
- package/package.json +1 -1
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.benchmarkCommand = void 0;
|
|
40
|
+
const commander_1 = require("commander");
|
|
41
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
42
|
+
const benchmark_1 = require("../services/benchmark");
|
|
43
|
+
const workflow_prompts_1 = require("../utils/workflow-prompts");
|
|
44
|
+
exports.benchmarkCommand = new commander_1.Command("benchmark").description("Run and manage AI benchmarks");
|
|
45
|
+
// Helper to parse model string
|
|
46
|
+
// Format: provider:model[:reasoning=<tokens>]
|
|
47
|
+
function parseModelString(modelStr) {
|
|
48
|
+
const parts = modelStr.split(":");
|
|
49
|
+
if (parts.length < 2) {
|
|
50
|
+
throw new Error(`Invalid model format: ${modelStr}. Expected provider:model[:reasoning=<tokens>]`);
|
|
51
|
+
}
|
|
52
|
+
const provider = parts[0];
|
|
53
|
+
const model = parts[1];
|
|
54
|
+
let reasoningTokens;
|
|
55
|
+
if (parts.length > 2) {
|
|
56
|
+
const extra = parts[2];
|
|
57
|
+
if (extra.startsWith("reasoning=")) {
|
|
58
|
+
reasoningTokens = parseInt(extra.split("=")[1], 10);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return { provider, model, reasoningTokens };
|
|
62
|
+
}
|
|
63
|
+
exports.benchmarkCommand
|
|
64
|
+
.command("run")
|
|
65
|
+
.description("Run a benchmark operation")
|
|
66
|
+
.argument("<operation>", "Operation to benchmark (e.g., prd-parse, task-breakdown)")
|
|
67
|
+
.requiredOption("--models <list>", "Comma-separated list of models (provider:model[:reasoning=<tokens>])")
|
|
68
|
+
.option("--file <path>", "Input file path (for PRD ops)")
|
|
69
|
+
.option("--task-id <id>", "Task ID (for Task ops)")
|
|
70
|
+
.option("--concurrency <number>", "Max concurrent requests", "5")
|
|
71
|
+
.option("--delay <number>", "Delay between requests in ms", "250")
|
|
72
|
+
.option("--prompt <prompt>", "Override prompt")
|
|
73
|
+
.option("--message <message>", "User message")
|
|
74
|
+
.option("--tools", "Enable filesystem tools")
|
|
75
|
+
.option("--feedback <feedback>", "Feedback (for rework)")
|
|
76
|
+
.action(async (operation, options) => {
|
|
77
|
+
try {
|
|
78
|
+
const modelStrings = options.models.split(",");
|
|
79
|
+
const models = modelStrings.map((s) => parseModelString(s.trim()));
|
|
80
|
+
const config = {
|
|
81
|
+
models,
|
|
82
|
+
concurrency: parseInt(options.concurrency, 10),
|
|
83
|
+
delay: parseInt(options.delay, 10),
|
|
84
|
+
};
|
|
85
|
+
console.log(chalk_1.default.blue(`Starting benchmark for ${operation}...`));
|
|
86
|
+
console.log(chalk_1.default.dim(`Models: ${models.length}, Concurrency: ${config.concurrency}, Delay: ${config.delay}ms`));
|
|
87
|
+
// Construct input object with all potential options
|
|
88
|
+
const input = {
|
|
89
|
+
file: options.file,
|
|
90
|
+
taskId: options.taskId,
|
|
91
|
+
prompt: options.prompt,
|
|
92
|
+
message: options.message,
|
|
93
|
+
tools: options.tools,
|
|
94
|
+
feedback: options.feedback,
|
|
95
|
+
workingDirectory: process.cwd(), // Always pass current working directory
|
|
96
|
+
};
|
|
97
|
+
// Prepare dashboard
|
|
98
|
+
console.log(chalk_1.default.bold("\nBenchmark Progress:"));
|
|
99
|
+
const modelMap = new Map();
|
|
100
|
+
const modelStatus = new Map();
|
|
101
|
+
// Print initial lines and map indices
|
|
102
|
+
models.forEach((m, i) => {
|
|
103
|
+
const id = `${m.provider}:${m.model}${m.reasoningTokens ? `:reasoning=${m.reasoningTokens}` : ""}`;
|
|
104
|
+
modelMap.set(id, i);
|
|
105
|
+
modelStatus.set(id, "Waiting...");
|
|
106
|
+
console.log(chalk_1.default.dim(`- ${id}: Waiting...`));
|
|
107
|
+
});
|
|
108
|
+
const totalModels = models.length;
|
|
109
|
+
const run = await benchmark_1.benchmarkService.runBenchmark(operation, input, config, (event) => {
|
|
110
|
+
const index = modelMap.get(event.modelId);
|
|
111
|
+
if (index === undefined)
|
|
112
|
+
return;
|
|
113
|
+
// Update status in memory
|
|
114
|
+
let statusStr = "";
|
|
115
|
+
if (event.type === "start") {
|
|
116
|
+
statusStr = chalk_1.default.yellow("Starting...");
|
|
117
|
+
}
|
|
118
|
+
else if (event.type === "progress") {
|
|
119
|
+
const bps = event.currentBps ? `${event.currentBps} B/s` : "0 B/s";
|
|
120
|
+
const size = event.currentSize ? `${event.currentSize} B` : "0 B";
|
|
121
|
+
statusStr = `${chalk_1.default.blue("Running")} - Size: ${size}, Speed: ${bps}`;
|
|
122
|
+
}
|
|
123
|
+
else if (event.type === "complete") {
|
|
124
|
+
statusStr = chalk_1.default.green(`Completed (${event.duration}ms)`);
|
|
125
|
+
}
|
|
126
|
+
else if (event.type === "error") {
|
|
127
|
+
statusStr = chalk_1.default.red(`Failed: ${event.error}`);
|
|
128
|
+
}
|
|
129
|
+
modelStatus.set(event.modelId, statusStr);
|
|
130
|
+
// Update display
|
|
131
|
+
// Move cursor up to the specific line
|
|
132
|
+
// Distance from bottom = totalModels - index
|
|
133
|
+
const up = totalModels - index;
|
|
134
|
+
process.stdout.write(`\x1B[${up}A`); // Move up
|
|
135
|
+
process.stdout.write(`\x1B[2K`); // Clear line
|
|
136
|
+
process.stdout.write(`- ${chalk_1.default.bold(event.modelId)}: ${statusStr}\r`);
|
|
137
|
+
process.stdout.write(`\x1B[${up}B`); // Move down
|
|
138
|
+
});
|
|
139
|
+
console.log(chalk_1.default.green(`\nā Benchmark completed! Run ID: ${run.id}`));
|
|
140
|
+
console.log(chalk_1.default.bold(`\n${"Model".padEnd(40)} | ${"Duration".padEnd(10)} | ${"TTFT".padEnd(8)} | ${"Tokens".padEnd(10)} | ${"TPS".padEnd(8)} | ${"BPS".padEnd(8)} | ${"Size".padEnd(10)} | ${"Cost".padEnd(10)}`));
|
|
141
|
+
console.log("-".repeat(130)); // Adjusted line length for new columns
|
|
142
|
+
run.results.forEach((r) => {
|
|
143
|
+
const duration = `${r.duration}ms`.padEnd(10);
|
|
144
|
+
const ttft = r.timeToFirstToken
|
|
145
|
+
? `${r.timeToFirstToken}ms`.padEnd(8)
|
|
146
|
+
: "-".padEnd(8);
|
|
147
|
+
const tokens = r.tokenUsage
|
|
148
|
+
? `${r.tokenUsage.total}`.padEnd(10)
|
|
149
|
+
: "-".padEnd(10);
|
|
150
|
+
const tps = r.tps ? `${r.tps}`.padEnd(8) : "-".padEnd(8);
|
|
151
|
+
const bps = r.bps ? `${r.bps}`.padEnd(8) : "-".padEnd(8);
|
|
152
|
+
const size = r.responseSize
|
|
153
|
+
? `${r.responseSize}`.padEnd(10)
|
|
154
|
+
: "-".padEnd(10);
|
|
155
|
+
const cost = r.cost
|
|
156
|
+
? `$${r.cost.toFixed(6)}`.padEnd(10)
|
|
157
|
+
: "-".padEnd(10);
|
|
158
|
+
console.log(`${r.modelId.padEnd(40)} | ${duration} | ${ttft} | ${tokens} | ${tps} | ${bps} | ${size} | ${cost}`);
|
|
159
|
+
if (r.error) {
|
|
160
|
+
console.log(chalk_1.default.red(` Error: ${r.error}`));
|
|
161
|
+
}
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
catch (error) {
|
|
165
|
+
console.error(chalk_1.default.red("Benchmark failed:"), error.message);
|
|
166
|
+
process.exit(1);
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
exports.benchmarkCommand
|
|
170
|
+
.command("list")
|
|
171
|
+
.description("List past benchmark runs")
|
|
172
|
+
.action(() => {
|
|
173
|
+
const runs = benchmark_1.benchmarkService.listRuns();
|
|
174
|
+
if (runs.length === 0) {
|
|
175
|
+
console.log(chalk_1.default.yellow("No benchmark runs found."));
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
console.log(chalk_1.default.bold("Benchmark Runs:"));
|
|
179
|
+
runs.forEach((run) => {
|
|
180
|
+
const date = new Date(run.timestamp).toLocaleString();
|
|
181
|
+
console.log(`- ${chalk_1.default.cyan(run.id)} (${date}) - ${run.command}`);
|
|
182
|
+
});
|
|
183
|
+
});
|
|
184
|
+
exports.benchmarkCommand
|
|
185
|
+
.command("show")
|
|
186
|
+
.description("Show details of a benchmark run")
|
|
187
|
+
.argument("<id>", "Run ID")
|
|
188
|
+
.action((id) => {
|
|
189
|
+
const run = benchmark_1.benchmarkService.getRun(id);
|
|
190
|
+
if (!run) {
|
|
191
|
+
console.error(chalk_1.default.red(`Run ${id} not found`));
|
|
192
|
+
process.exit(1);
|
|
193
|
+
}
|
|
194
|
+
console.log(chalk_1.default.bold(`Run: ${run.id}`));
|
|
195
|
+
console.log(`Date: ${new Date(run.timestamp).toLocaleString()}`);
|
|
196
|
+
console.log(`Command: ${run.command}`);
|
|
197
|
+
console.log(`Input: ${JSON.stringify(run.input, null, 2)}`); // Might be large
|
|
198
|
+
console.log(chalk_1.default.bold("\nConfiguration:"));
|
|
199
|
+
console.log(`Concurrency: ${run.config.concurrency}`);
|
|
200
|
+
console.log(`Delay: ${run.config.delay}ms`);
|
|
201
|
+
console.log(chalk_1.default.bold("\nResults:"));
|
|
202
|
+
const results = run.results;
|
|
203
|
+
results.forEach((result) => {
|
|
204
|
+
console.log(chalk_1.default.bold(`\n[${result.modelId}]`));
|
|
205
|
+
console.log(`Duration: ${result.duration}ms`);
|
|
206
|
+
if (result.timeToFirstToken) {
|
|
207
|
+
console.log(`TTFT: ${result.timeToFirstToken}ms`);
|
|
208
|
+
}
|
|
209
|
+
if (result.tokenUsage) {
|
|
210
|
+
console.log(`Tokens: ${result.tokenUsage.total} (Prompt: ${result.tokenUsage.prompt}, Completion: ${result.tokenUsage.completion})`);
|
|
211
|
+
}
|
|
212
|
+
if (result.bps) {
|
|
213
|
+
console.log(`Throughput: ${result.bps} B/s`);
|
|
214
|
+
}
|
|
215
|
+
if (result.responseSize) {
|
|
216
|
+
console.log(`Size: ${result.responseSize} bytes`);
|
|
217
|
+
}
|
|
218
|
+
if (result.cost) {
|
|
219
|
+
console.log(`Estimated Cost: $${result.cost.toFixed(6)}`);
|
|
220
|
+
}
|
|
221
|
+
if (result.error) {
|
|
222
|
+
console.log(chalk_1.default.red(`Error: ${result.error}`));
|
|
223
|
+
}
|
|
224
|
+
else {
|
|
225
|
+
const outputStr = typeof result.output === "string"
|
|
226
|
+
? result.output
|
|
227
|
+
: JSON.stringify(result.output, null, 2);
|
|
228
|
+
const preview = outputStr.length > 500
|
|
229
|
+
? outputStr.substring(0, 500) + "..."
|
|
230
|
+
: outputStr;
|
|
231
|
+
console.log(`Output: ${preview}`);
|
|
232
|
+
}
|
|
233
|
+
});
|
|
234
|
+
});
|
|
235
|
+
exports.benchmarkCommand
|
|
236
|
+
.command("compare")
|
|
237
|
+
.description("Compare results of a benchmark run")
|
|
238
|
+
.argument("<id>", "Run ID")
|
|
239
|
+
.action((id) => {
|
|
240
|
+
const run = benchmark_1.benchmarkService.getRun(id);
|
|
241
|
+
if (!run) {
|
|
242
|
+
console.error(chalk_1.default.red(`Run ${id} not found`));
|
|
243
|
+
process.exit(1);
|
|
244
|
+
}
|
|
245
|
+
console.log(chalk_1.default.bold(`Comparison for Run: ${run.id}`));
|
|
246
|
+
// Simple comparison: Duration and Success/Fail
|
|
247
|
+
// In future could add diffing of outputs
|
|
248
|
+
const table = run.results.map((res) => ({
|
|
249
|
+
Model: res.modelId,
|
|
250
|
+
Status: res.error ? "FAILED" : "SUCCESS",
|
|
251
|
+
Duration: `${res.duration}ms`,
|
|
252
|
+
Tokens: res.tokenUsage ? res.tokenUsage.total : "?",
|
|
253
|
+
BPS: res.bps ? res.bps : "?",
|
|
254
|
+
Size: res.responseSize
|
|
255
|
+
? res.responseSize
|
|
256
|
+
: res.output
|
|
257
|
+
? JSON.stringify(res.output).length
|
|
258
|
+
: 0,
|
|
259
|
+
}));
|
|
260
|
+
console.table(table);
|
|
261
|
+
});
|
|
262
|
+
exports.benchmarkCommand
|
|
263
|
+
.command("workflow")
|
|
264
|
+
.description("Benchmark complete workflow execution across multiple models")
|
|
265
|
+
.requiredOption("--models <list>", "Comma-separated list of models (provider:model[:reasoning=<tokens>])")
|
|
266
|
+
.option("--concurrency <number>", "Max concurrent requests", "3")
|
|
267
|
+
.option("--delay <number>", "Delay between requests in ms", "1000")
|
|
268
|
+
// Inherit all workflow command options
|
|
269
|
+
.option("--stream", "Show streaming AI output")
|
|
270
|
+
.option("--skip-all", "Skip all optional steps (use defaults)")
|
|
271
|
+
.option("--auto-accept", "Auto-accept all AI suggestions")
|
|
272
|
+
.option("--config-file <path>", "Load workflow options from JSON file")
|
|
273
|
+
// Step 1: Initialize
|
|
274
|
+
.option("--skip-init", "Skip initialization step")
|
|
275
|
+
.option("--project-name <name>", "Project name")
|
|
276
|
+
.option("--init-method <method>", "Initialization method: quick, custom, ai")
|
|
277
|
+
.option("--project-description <desc>", "Project description for AI-assisted init")
|
|
278
|
+
.option("--frontend <framework>", "Frontend framework")
|
|
279
|
+
.option("--backend <framework>", "Backend framework")
|
|
280
|
+
.option("--database <db>", "Database choice")
|
|
281
|
+
.option("--auth", "Include authentication")
|
|
282
|
+
.option("--no-auth", "Exclude authentication")
|
|
283
|
+
.option("--bootstrap", "Bootstrap with Better-T-Stack")
|
|
284
|
+
.option("--no-bootstrap", "Skip bootstrapping")
|
|
285
|
+
// Step 2: Define PRD
|
|
286
|
+
.option("--skip-prd", "Skip PRD definition")
|
|
287
|
+
.option("--prd-method <method>", "PRD method: upload, manual, ai, skip")
|
|
288
|
+
.option("--prd-file <path>", "Path to existing PRD file")
|
|
289
|
+
.option("--prd-description <desc>", "Product description for AI-assisted PRD")
|
|
290
|
+
.option("--prd-content <content>", "Direct PRD content")
|
|
291
|
+
// Step 3: Refine PRD
|
|
292
|
+
.option("--skip-refine", "Skip PRD refinement")
|
|
293
|
+
.option("--refine-method <method>", "Refinement method: manual, ai, skip")
|
|
294
|
+
.option("--refine-feedback <feedback>", "Feedback for AI refinement")
|
|
295
|
+
// Step 4: Generate Tasks
|
|
296
|
+
.option("--skip-generate", "Skip task generation")
|
|
297
|
+
.option("--generate-method <method>", "Generation method: standard, ai")
|
|
298
|
+
.option("--generate-instructions <instructions>", "Custom task generation instructions")
|
|
299
|
+
// Step 5: Split Tasks
|
|
300
|
+
.option("--skip-split", "Skip task splitting")
|
|
301
|
+
.option("--split-tasks <ids>", "Comma-separated task IDs to split")
|
|
302
|
+
.option("--split-all", "Split all tasks")
|
|
303
|
+
.option("--split-method <method>", "Split method: interactive, standard, custom")
|
|
304
|
+
.option("--split-instructions <instructions>", "Custom split instructions")
|
|
305
|
+
.action(async (options) => {
|
|
306
|
+
try {
|
|
307
|
+
await runWorkflowBenchmark(options);
|
|
308
|
+
}
|
|
309
|
+
catch (error) {
|
|
310
|
+
console.error(chalk_1.default.red("Workflow benchmark failed:"), error.message);
|
|
311
|
+
process.exit(1);
|
|
312
|
+
}
|
|
313
|
+
});
|
|
314
|
+
/**
|
|
315
|
+
* Execute workflow benchmark across multiple models
|
|
316
|
+
*/
|
|
317
|
+
async function runWorkflowBenchmark(options) {
|
|
318
|
+
console.log(chalk_1.default.blue.bold("\nš Task-O-Matic Workflow Benchmark\n"));
|
|
319
|
+
// Parse models
|
|
320
|
+
const modelStrings = options.models.split(",");
|
|
321
|
+
const models = modelStrings.map((s) => parseModelString(s.trim()));
|
|
322
|
+
const config = {
|
|
323
|
+
models,
|
|
324
|
+
concurrency: parseInt(options.concurrency, 10),
|
|
325
|
+
delay: parseInt(options.delay, 10),
|
|
326
|
+
};
|
|
327
|
+
console.log(chalk_1.default.dim(`Models: ${models.length}, Concurrency: ${config.concurrency}, Delay: ${config.delay}ms`));
|
|
328
|
+
// Phase 1: Collect user responses interactively
|
|
329
|
+
console.log(chalk_1.default.blue.bold("\nš Phase 1: Collecting Workflow Responses\n"));
|
|
330
|
+
console.log(chalk_1.default.gray("Please answer the following questions. Your responses will be used for all models."));
|
|
331
|
+
const collectedResponses = await collectWorkflowResponses(options);
|
|
332
|
+
// Phase 2: Execute workflow on all models
|
|
333
|
+
console.log(chalk_1.default.blue.bold("\nā” Phase 2: Executing Workflows\n"));
|
|
334
|
+
console.log(chalk_1.default.gray(`Running workflow on ${models.length} models...\n`));
|
|
335
|
+
// Prepare workflow input
|
|
336
|
+
const workflowInput = {
|
|
337
|
+
collectedResponses,
|
|
338
|
+
workflowOptions: options,
|
|
339
|
+
tempDirBase: "/tmp",
|
|
340
|
+
};
|
|
341
|
+
// Prepare dashboard
|
|
342
|
+
console.log(chalk_1.default.bold("Benchmark Progress:"));
|
|
343
|
+
const modelMap = new Map();
|
|
344
|
+
const modelStatus = new Map();
|
|
345
|
+
// Print initial lines and map indices
|
|
346
|
+
models.forEach((m, i) => {
|
|
347
|
+
const id = `${m.provider}:${m.model}${m.reasoningTokens ? `:reasoning=${m.reasoningTokens}` : ""}`;
|
|
348
|
+
modelMap.set(id, i);
|
|
349
|
+
modelStatus.set(id, "Waiting...");
|
|
350
|
+
console.log(chalk_1.default.dim(`- ${id}: Waiting...`));
|
|
351
|
+
});
|
|
352
|
+
const totalModels = models.length;
|
|
353
|
+
const run = await benchmark_1.benchmarkService.runBenchmark("workflow-full", workflowInput, config, (event) => {
|
|
354
|
+
const index = modelMap.get(event.modelId);
|
|
355
|
+
if (index === undefined)
|
|
356
|
+
return;
|
|
357
|
+
// Update status in memory
|
|
358
|
+
let statusStr = "";
|
|
359
|
+
if (event.type === "start") {
|
|
360
|
+
statusStr = chalk_1.default.yellow("Starting...");
|
|
361
|
+
}
|
|
362
|
+
else if (event.type === "progress") {
|
|
363
|
+
statusStr = chalk_1.default.blue("Running workflow...");
|
|
364
|
+
}
|
|
365
|
+
else if (event.type === "complete") {
|
|
366
|
+
statusStr = chalk_1.default.green(`Completed (${event.duration}ms)`);
|
|
367
|
+
}
|
|
368
|
+
else if (event.type === "error") {
|
|
369
|
+
statusStr = chalk_1.default.red(`Failed: ${event.error}`);
|
|
370
|
+
}
|
|
371
|
+
modelStatus.set(event.modelId, statusStr);
|
|
372
|
+
// Update display
|
|
373
|
+
const up = totalModels - index;
|
|
374
|
+
process.stdout.write(`\x1B[${up}A`); // Move up
|
|
375
|
+
process.stdout.write(`\x1B[2K`); // Clear line
|
|
376
|
+
process.stdout.write(`- ${chalk_1.default.bold(event.modelId)}: ${statusStr}\r`);
|
|
377
|
+
process.stdout.write(`\x1B[${up}B`); // Move down
|
|
378
|
+
});
|
|
379
|
+
console.log(chalk_1.default.green(`\nā
Workflow benchmark completed! Run ID: ${run.id}`));
|
|
380
|
+
// Display results
|
|
381
|
+
await displayWorkflowBenchmarkResults(run);
|
|
382
|
+
// Optional: Let user select a model for project setup
|
|
383
|
+
await promptForModelSelection(run, collectedResponses);
|
|
384
|
+
}
|
|
385
|
+
/**
|
|
386
|
+
* Collect workflow responses from user interactively
|
|
387
|
+
*/
|
|
388
|
+
async function collectWorkflowResponses(options) {
|
|
389
|
+
// Use provided options or prompt user
|
|
390
|
+
const getOrPrompt = async (preAnswered, promptFn, skipCondition = false) => {
|
|
391
|
+
if (skipCondition) {
|
|
392
|
+
throw new Error("Step skipped");
|
|
393
|
+
}
|
|
394
|
+
if (preAnswered !== undefined) {
|
|
395
|
+
return preAnswered;
|
|
396
|
+
}
|
|
397
|
+
return promptFn();
|
|
398
|
+
};
|
|
399
|
+
// Project setup questions
|
|
400
|
+
const projectName = await getOrPrompt(options.projectName, () => (0, workflow_prompts_1.textInputPrompt)("What is the name of your project?", "my-benchmark-project"));
|
|
401
|
+
const initMethod = await getOrPrompt(options.initMethod, () => (0, workflow_prompts_1.selectPrompt)("How would you like to configure your project stack?", [
|
|
402
|
+
{ name: "Quick start (recommended defaults)", value: "quick" },
|
|
403
|
+
{ name: "Custom configuration", value: "custom" },
|
|
404
|
+
{ name: "AI-assisted (describe your project)", value: "ai" },
|
|
405
|
+
]));
|
|
406
|
+
let projectDescription;
|
|
407
|
+
if (initMethod === "ai") {
|
|
408
|
+
projectDescription = await getOrPrompt(options.projectDescription, () => (0, workflow_prompts_1.textInputPrompt)("Describe your project (e.g., 'A SaaS app for team collaboration'):"));
|
|
409
|
+
}
|
|
410
|
+
// Stack configuration (for custom method)
|
|
411
|
+
let stackConfig = {};
|
|
412
|
+
if (initMethod === "custom") {
|
|
413
|
+
stackConfig.frontend = await getOrPrompt(options.frontend, () => (0, workflow_prompts_1.selectPrompt)("Frontend framework:", ["next", "react", "vue", "svelte"]));
|
|
414
|
+
stackConfig.backend = await getOrPrompt(options.backend, () => (0, workflow_prompts_1.selectPrompt)("Backend framework:", ["hono", "express", "fastify", "nestjs"]));
|
|
415
|
+
stackConfig.database = await getOrPrompt(options.database, () => (0, workflow_prompts_1.selectPrompt)("Database:", ["sqlite", "postgres", "mysql", "mongodb"]));
|
|
416
|
+
stackConfig.auth = await getOrPrompt(options.auth, () => (0, workflow_prompts_1.confirmPrompt)("Include authentication?", true));
|
|
417
|
+
}
|
|
418
|
+
// PRD questions
|
|
419
|
+
const prdMethod = await getOrPrompt(options.prdMethod, () => (0, workflow_prompts_1.selectPrompt)("How would you like to define your PRD?", [
|
|
420
|
+
{ name: "AI-assisted creation", value: "ai" },
|
|
421
|
+
{ name: "Upload existing file", value: "upload" },
|
|
422
|
+
{ name: "Write manually", value: "manual" },
|
|
423
|
+
{ name: "Skip PRD", value: "skip" },
|
|
424
|
+
]));
|
|
425
|
+
let prdDescription;
|
|
426
|
+
let prdFile;
|
|
427
|
+
let prdContent;
|
|
428
|
+
if (prdMethod === "ai") {
|
|
429
|
+
prdDescription = await getOrPrompt(options.prdDescription, () => (0, workflow_prompts_1.textInputPrompt)("Describe your product in detail:"));
|
|
430
|
+
}
|
|
431
|
+
else if (prdMethod === "upload") {
|
|
432
|
+
prdFile = await getOrPrompt(options.prdFile, () => (0, workflow_prompts_1.textInputPrompt)("Path to PRD file:"));
|
|
433
|
+
}
|
|
434
|
+
else if (prdMethod === "manual") {
|
|
435
|
+
prdContent = await getOrPrompt(options.prdContent, () => (0, workflow_prompts_1.editorPrompt)("Write your PRD:", "# Product Requirements Document\n\n## Overview\n\n## Features\n\n"));
|
|
436
|
+
}
|
|
437
|
+
// Additional workflow questions
|
|
438
|
+
const refinePrd = !options.skipRefine && prdMethod !== "skip" ?
|
|
439
|
+
await (0, workflow_prompts_1.confirmPrompt)("Refine PRD with AI feedback?", false) : false;
|
|
440
|
+
let refineFeedback;
|
|
441
|
+
if (refinePrd) {
|
|
442
|
+
refineFeedback = await getOrPrompt(options.refineFeedback, () => (0, workflow_prompts_1.textInputPrompt)("What feedback should be used for PRD refinement?", "Add more technical details and clarify requirements"));
|
|
443
|
+
}
|
|
444
|
+
const generateTasks = !options.skipGenerate && prdMethod !== "skip";
|
|
445
|
+
const customInstructions = options.generateInstructions ||
|
|
446
|
+
(generateTasks ? await (0, workflow_prompts_1.textInputPrompt)("Custom task generation instructions (optional):", "") : undefined);
|
|
447
|
+
const splitTasks = !options.skipSplit && generateTasks ?
|
|
448
|
+
await (0, workflow_prompts_1.confirmPrompt)("Split complex tasks into subtasks?", true) : false;
|
|
449
|
+
const splitInstructions = splitTasks && options.splitInstructions ?
|
|
450
|
+
options.splitInstructions :
|
|
451
|
+
(splitTasks ? await (0, workflow_prompts_1.textInputPrompt)("Custom splitting instructions (optional):", "Break into 2-4 hour chunks") : undefined);
|
|
452
|
+
return {
|
|
453
|
+
projectName,
|
|
454
|
+
initMethod: initMethod,
|
|
455
|
+
projectDescription,
|
|
456
|
+
stackConfig,
|
|
457
|
+
prdMethod: prdMethod,
|
|
458
|
+
prdContent,
|
|
459
|
+
prdDescription,
|
|
460
|
+
prdFile,
|
|
461
|
+
refinePrd,
|
|
462
|
+
refineFeedback,
|
|
463
|
+
generateTasks,
|
|
464
|
+
customInstructions,
|
|
465
|
+
splitTasks,
|
|
466
|
+
splitInstructions,
|
|
467
|
+
};
|
|
468
|
+
}
|
|
469
|
+
/**
|
|
470
|
+
* Display workflow benchmark results in a comprehensive format
|
|
471
|
+
*/
|
|
472
|
+
async function displayWorkflowBenchmarkResults(run) {
|
|
473
|
+
console.log(chalk_1.default.bold("\nš Workflow Benchmark Results\n"));
|
|
474
|
+
// Summary table
|
|
475
|
+
console.log(chalk_1.default.bold(`${"Model".padEnd(40)} | ${"Duration".padEnd(10)} | ${"Tasks".padEnd(8)} | ${"PRD Size".padEnd(10)} | ${"Steps".padEnd(8)} | ${"Cost".padEnd(10)}`));
|
|
476
|
+
console.log("-".repeat(130));
|
|
477
|
+
run.results.forEach((r) => {
|
|
478
|
+
const duration = `${r.duration}ms`.padEnd(10);
|
|
479
|
+
const taskCount = r.output?.stats?.totalTasks || 0;
|
|
480
|
+
const tasks = `${taskCount}`.padEnd(8);
|
|
481
|
+
const prdSize = r.output?.stats?.prdSize ? `${r.output.stats.prdSize} chars`.padEnd(10) : "-".padEnd(10);
|
|
482
|
+
const steps = r.output?.stats ? `${r.output.stats.successfulSteps}/${r.output.stats.totalSteps}`.padEnd(8) : "-".padEnd(8);
|
|
483
|
+
const cost = r.cost ? `$${r.cost.toFixed(6)}`.padEnd(10) : "-".padEnd(10);
|
|
484
|
+
console.log(`${r.modelId.padEnd(40)} | ${duration} | ${tasks} | ${prdSize} | ${steps} | ${cost}`);
|
|
485
|
+
if (r.error) {
|
|
486
|
+
console.log(chalk_1.default.red(` Error: ${r.error}`));
|
|
487
|
+
}
|
|
488
|
+
});
|
|
489
|
+
// Detailed comparison
|
|
490
|
+
console.log(chalk_1.default.bold("\nš Detailed Comparison\n"));
|
|
491
|
+
run.results.forEach((r, index) => {
|
|
492
|
+
if (r.error)
|
|
493
|
+
return;
|
|
494
|
+
console.log(chalk_1.default.cyan(`\n[${index + 1}] ${r.modelId}`));
|
|
495
|
+
console.log(`Duration: ${r.duration}ms`);
|
|
496
|
+
if (r.output?.stats) {
|
|
497
|
+
const stats = r.output.stats;
|
|
498
|
+
console.log(`Steps Completed: ${stats.successfulSteps}/${stats.totalSteps}`);
|
|
499
|
+
if (stats.initDuration)
|
|
500
|
+
console.log(` Init: ${stats.initDuration}ms`);
|
|
501
|
+
if (stats.prdGenerationDuration)
|
|
502
|
+
console.log(` PRD Generation: ${stats.prdGenerationDuration}ms`);
|
|
503
|
+
if (stats.taskGenerationDuration)
|
|
504
|
+
console.log(` Task Generation: ${stats.taskGenerationDuration}ms`);
|
|
505
|
+
if (stats.taskSplittingDuration)
|
|
506
|
+
console.log(` Task Splitting: ${stats.taskSplittingDuration}ms`);
|
|
507
|
+
console.log(`Tasks Created: ${stats.totalTasks}`);
|
|
508
|
+
if (stats.tasksWithSubtasks)
|
|
509
|
+
console.log(`Tasks with Subtasks: ${stats.tasksWithSubtasks}`);
|
|
510
|
+
if (stats.prdSize)
|
|
511
|
+
console.log(`PRD Size: ${stats.prdSize} characters`);
|
|
512
|
+
}
|
|
513
|
+
if (r.tokenUsage) {
|
|
514
|
+
console.log(`Tokens: ${r.tokenUsage.total} (Prompt: ${r.tokenUsage.prompt}, Completion: ${r.tokenUsage.completion})`);
|
|
515
|
+
}
|
|
516
|
+
if (r.cost) {
|
|
517
|
+
console.log(`Cost: $${r.cost.toFixed(6)}`);
|
|
518
|
+
}
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
/**
|
|
522
|
+
* Allow user to select a model and set up project with its results
|
|
523
|
+
*/
|
|
524
|
+
async function promptForModelSelection(run, responses) {
|
|
525
|
+
const successfulResults = run.results.filter((r) => !r.error);
|
|
526
|
+
if (successfulResults.length === 0) {
|
|
527
|
+
console.log(chalk_1.default.yellow("\nā ļø No successful results to select from."));
|
|
528
|
+
return;
|
|
529
|
+
}
|
|
530
|
+
if (successfulResults.length === 1) {
|
|
531
|
+
console.log(chalk_1.default.green(`\nā
Only one successful result from ${successfulResults[0].modelId}`));
|
|
532
|
+
return;
|
|
533
|
+
}
|
|
534
|
+
console.log(chalk_1.default.blue.bold("\nšÆ Model Selection\n"));
|
|
535
|
+
const shouldSelect = await (0, workflow_prompts_1.confirmPrompt)("Would you like to select a model and set up your project with its results?", false);
|
|
536
|
+
if (!shouldSelect) {
|
|
537
|
+
console.log(chalk_1.default.gray("Benchmark complete. Results have been saved."));
|
|
538
|
+
return;
|
|
539
|
+
}
|
|
540
|
+
const choices = successfulResults.map((r, index) => ({
|
|
541
|
+
name: `${r.modelId} (${r.duration}ms, ${r.output?.stats?.totalTasks || 0} tasks, $${r.cost?.toFixed(6) || 'unknown'})`,
|
|
542
|
+
value: index,
|
|
543
|
+
}));
|
|
544
|
+
const selectedIndex = await (0, workflow_prompts_1.selectPrompt)("Select the model whose results you want to use for your project:", choices);
|
|
545
|
+
const selectedResult = successfulResults[selectedIndex];
|
|
546
|
+
console.log(chalk_1.default.green(`\nā
Selected: ${selectedResult.modelId}`));
|
|
547
|
+
console.log(chalk_1.default.gray("Setting up your project with the selected results..."));
|
|
548
|
+
// Get target directory
|
|
549
|
+
const targetDir = await (0, workflow_prompts_1.textInputPrompt)("Enter target directory for your project:", `./${responses.projectName}`);
|
|
550
|
+
try {
|
|
551
|
+
console.log(chalk_1.default.cyan("\nš§ Applying benchmark results..."));
|
|
552
|
+
const { workflowBenchmarkService } = await Promise.resolve().then(() => __importStar(require("../services/workflow-benchmark")));
|
|
553
|
+
const result = await workflowBenchmarkService.applyBenchmarkResult(selectedResult, targetDir, responses);
|
|
554
|
+
if (result.success) {
|
|
555
|
+
console.log(chalk_1.default.green(`\nā
${result.message}`));
|
|
556
|
+
console.log(chalk_1.default.cyan("\nNext steps:"));
|
|
557
|
+
console.log(chalk_1.default.gray(` ⢠Navigate to: cd ${targetDir}`));
|
|
558
|
+
console.log(chalk_1.default.gray(" ⢠Review your tasks: task-o-matic tasks list"));
|
|
559
|
+
console.log(chalk_1.default.gray(" ⢠View task tree: task-o-matic tasks tree"));
|
|
560
|
+
console.log(chalk_1.default.gray(" ⢠Start working: task-o-matic tasks next"));
|
|
561
|
+
}
|
|
562
|
+
else {
|
|
563
|
+
console.log(chalk_1.default.red(`\nā ${result.message}`));
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
catch (error) {
|
|
567
|
+
console.log(chalk_1.default.red(`\nā Failed to apply results: ${error instanceof Error ? error.message : String(error)}`));
|
|
568
|
+
}
|
|
569
|
+
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prd.d.ts","sourceRoot":"","sources":["../../src/commands/prd.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"prd.d.ts","sourceRoot":"","sources":["../../src/commands/prd.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,eAAO,MAAM,UAAU,SAEtB,CAAC"}
|