task-o-matic 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +86 -23
  2. package/dist/commands/benchmark.d.ts +3 -0
  3. package/dist/commands/benchmark.d.ts.map +1 -0
  4. package/dist/commands/benchmark.js +227 -0
  5. package/dist/commands/prd.d.ts.map +1 -1
  6. package/dist/commands/prd.js +203 -9
  7. package/dist/commands/workflow.d.ts.map +1 -1
  8. package/dist/commands/workflow.js +464 -286
  9. package/dist/index.d.ts.map +1 -1
  10. package/dist/index.js +2 -0
  11. package/dist/lib/ai-service/ai-operations.d.ts +5 -0
  12. package/dist/lib/ai-service/ai-operations.d.ts.map +1 -1
  13. package/dist/lib/ai-service/ai-operations.js +167 -0
  14. package/dist/lib/benchmark/registry.d.ts +11 -0
  15. package/dist/lib/benchmark/registry.d.ts.map +1 -0
  16. package/dist/lib/benchmark/registry.js +78 -0
  17. package/dist/lib/benchmark/runner.d.ts +6 -0
  18. package/dist/lib/benchmark/runner.d.ts.map +1 -0
  19. package/dist/lib/benchmark/runner.js +150 -0
  20. package/dist/lib/benchmark/storage.d.ts +13 -0
  21. package/dist/lib/benchmark/storage.d.ts.map +1 -0
  22. package/dist/lib/benchmark/storage.js +99 -0
  23. package/dist/lib/benchmark/types.d.ts +54 -0
  24. package/dist/lib/benchmark/types.d.ts.map +1 -0
  25. package/dist/lib/benchmark/types.js +2 -0
  26. package/dist/lib/index.d.ts +9 -0
  27. package/dist/lib/index.d.ts.map +1 -1
  28. package/dist/lib/index.js +7 -1
  29. package/dist/lib/prompt-registry.d.ts.map +1 -1
  30. package/dist/lib/prompt-registry.js +23 -0
  31. package/dist/prompts/index.d.ts +7 -6
  32. package/dist/prompts/index.d.ts.map +1 -1
  33. package/dist/prompts/index.js +1 -0
  34. package/dist/prompts/prd-question.d.ts +3 -0
  35. package/dist/prompts/prd-question.d.ts.map +1 -0
  36. package/dist/prompts/prd-question.js +40 -0
  37. package/dist/services/benchmark.d.ts +12 -0
  38. package/dist/services/benchmark.d.ts.map +1 -0
  39. package/dist/services/benchmark.js +18 -0
  40. package/dist/services/prd.d.ts +25 -0
  41. package/dist/services/prd.d.ts.map +1 -1
  42. package/dist/services/prd.js +188 -28
  43. package/dist/services/workflow.d.ts +85 -0
  44. package/dist/services/workflow.d.ts.map +1 -0
  45. package/dist/services/workflow.js +363 -0
  46. package/dist/types/index.d.ts +3 -0
  47. package/dist/types/index.d.ts.map +1 -1
  48. package/dist/types/options.d.ts +3 -1
  49. package/dist/types/options.d.ts.map +1 -1
  50. package/dist/types/options.js +16 -0
  51. package/dist/types/workflow-options.d.ts +45 -0
  52. package/dist/types/workflow-options.d.ts.map +1 -0
  53. package/dist/types/workflow-options.js +2 -0
  54. package/dist/types/workflow-results.d.ts +55 -0
  55. package/dist/types/workflow-results.d.ts.map +1 -0
  56. package/dist/types/workflow-results.js +2 -0
  57. package/package.json +1 -1
package/README.md CHANGED
@@ -6,6 +6,8 @@ AI-powered task management for CLI, TUI, and web applications. Parse PRDs, enhan
6
6
 
7
7
  - šŸ¤– **AI-Powered**: Parse PRDs and enhance tasks using multiple AI providers
8
8
  - šŸŽ­ **Interactive Workflow**: Guided setup from project init to task generation with AI assistance
9
+ - ā“ **PRD Question/Refine**: AI generates clarifying questions and can answer them automatically
10
+ - 🧠 **AI Reasoning Support**: Enable advanced reasoning for better PRD refinement
9
11
  - šŸ“¦ **Multi-Purpose Package**: Use as CLI tool, library, or MCP server
10
12
  - šŸ“ **Project-Local Storage**: All data stored locally in `.task-o-matic/` directory
11
13
  - šŸŽÆ **Task Management**: Full CRUD operations with AI enhancement
@@ -14,6 +16,7 @@ AI-powered task management for CLI, TUI, and web applications. Parse PRDs, enhan
14
16
  - šŸ”§ **Multi-Provider AI**: Support for OpenAI, Anthropic, OpenRouter, and custom providers
15
17
  - šŸ“Š **Smart Breakdown**: AI-powered task decomposition into subtasks
16
18
  - 🌊 **Real-time Streaming**: Watch AI responses generate live with streaming output
19
+ - šŸ“Š **Model Benchmarking**: Compare performance and quality across different AI models
17
20
  - šŸ  **Single-Project Focus**: Self-contained within each project directory
18
21
  - šŸ’» **Framework-Agnostic**: Easily integrate into TUI, web apps, or any Node.js project
19
22
 
@@ -55,14 +58,14 @@ task-o-matic/
55
58
  ā”œā”€ā”€ dist/ # Compiled output (published)
56
59
  │ ā”œā”€ā”€ lib/ # Library entry point + core exports
57
60
  │ ā”œā”€ā”€ cli/ # CLI binary
58
- │ ā”œā”€ā”€ services/ # Business logic layer
61
+ │ ā”œā”€ā”€ services/ # Business logic layer (WorkflowService, PRDService, TaskService)
59
62
  │ ā”œā”€ā”€ commands/ # CLI commands
60
63
  │ ā”œā”€ā”€ mcp/ # MCP server
61
64
  │ └── types/ # TypeScript definitions
62
65
  ā”œā”€ā”€ src/
63
66
  │ ā”œā”€ā”€ lib/ # Core library (Storage, Config, AI, etc.)
64
67
  │ │ └── index.ts # Main library exports
65
- │ ā”œā”€ā”€ services/ # TaskService, PRDService (framework-agnostic)
68
+ │ ā”œā”€ā”€ services/ # WorkflowService, PRDService, TaskService (framework-agnostic)
66
69
  │ ā”œā”€ā”€ cli/ # CLI-specific logic
67
70
  │ │ └── bin.ts # CLI binary entry point
68
71
  │ ā”œā”€ā”€ commands/ # Commander.js command implementations
@@ -75,8 +78,8 @@ task-o-matic/
75
78
 
76
79
  ### Core Components
77
80
 
78
- - **Service Layer** (`TaskService`, `PRDService`): Framework-agnostic business logic
79
- - **AI Service**: Uses Vercel AI SDK for multi-provider support
81
+ - **Service Layer** (`WorkflowService`, `PRDService`, `TaskService`): Framework-agnostic business logic
82
+ - **AI Service**: Uses Vercel AI SDK for multi-provider support with reasoning capabilities
80
83
  - **Local Storage**: JSON-based file storage in `.task-o-matic/` directory
81
84
  - **Configuration**: Project-local config with AI provider settings
82
85
  - **Prompt Templates**: Structured AI prompts for consistent results
@@ -104,24 +107,45 @@ npm install task-o-matic
104
107
 
105
108
  ```typescript
106
109
  import {
110
+ WorkflowService,
107
111
  TaskService,
108
112
  PRDService,
109
113
  type Task,
110
114
  type AIConfig,
111
115
  } from "task-o-matic";
112
116
 
113
- // Initialize the service
117
+ // Use the workflow service for complete project setup
118
+ const workflowService = new WorkflowService();
119
+
120
+ const result = await workflowService.initializeProject({
121
+ projectName: "my-app",
122
+ initMethod: "quick",
123
+ bootstrap: true,
124
+ aiOptions: {
125
+ aiProvider: "anthropic",
126
+ aiModel: "claude-3-5-sonnet",
127
+ aiKey: process.env.ANTHROPIC_API_KEY,
128
+ },
129
+ callbacks: {
130
+ onProgress: (event) => {
131
+ console.log(`Progress: ${event.message}`);
132
+ },
133
+ },
134
+ });
135
+
136
+ console.log("Project initialized:", result.projectName);
137
+
138
+ // Or use task service directly
114
139
  const taskService = new TaskService();
115
140
 
116
- // Create a task with AI enhancement
117
- const result = await taskService.createTask({
141
+ const taskResult = await taskService.createTask({
118
142
  title: "Implement user authentication",
119
143
  content: "Add login and signup functionality",
120
144
  aiEnhance: true,
121
145
  aiOptions: {
122
- provider: "anthropic",
123
- model: "claude-3-5-sonnet",
124
- apiKey: process.env.ANTHROPIC_API_KEY,
146
+ aiProvider: "anthropic",
147
+ aiModel: "claude-3-5-sonnet",
148
+ aiKey: process.env.ANTHROPIC_API_KEY,
125
149
  },
126
150
  callbacks: {
127
151
  onProgress: (event) => {
@@ -130,7 +154,7 @@ const result = await taskService.createTask({
130
154
  },
131
155
  });
132
156
 
133
- console.log("Task created:", result.task);
157
+ console.log("Task created:", taskResult.task);
134
158
  ```
135
159
 
136
160
  #### TUI Integration Example
@@ -167,20 +191,28 @@ const result = await taskService.createTask({
167
191
  });
168
192
  ```
169
193
 
170
- #### PRD Parsing Example
194
+ #### PRD Question/Refine Example
171
195
 
172
196
  ```typescript
173
197
  import { PRDService } from "task-o-matic";
174
198
 
175
199
  const prdService = new PRDService();
176
200
 
177
- const result = await prdService.parsePRD({
201
+ // Generate questions and refine PRD with AI answering
202
+ const result = await prdService.refinePRDWithQuestions({
178
203
  file: "./requirements.md",
204
+ questionMode: "ai", // or "user" for interactive
205
+ questionAIOptions: {
206
+ // Optional: use a different AI for answering
207
+ aiProvider: "openrouter",
208
+ aiModel: "anthropic/claude-3-opus",
209
+ aiReasoning: "enabled", // Enable reasoning for better answers
210
+ },
179
211
  workingDirectory: process.cwd(),
180
212
  aiOptions: {
181
- provider: "openrouter",
182
- model: "anthropic/claude-3.5-sonnet",
183
- apiKey: process.env.OPENROUTER_API_KEY,
213
+ aiProvider: "anthropic",
214
+ aiModel: "claude-3-5-sonnet",
215
+ aiKey: process.env.ANTHROPIC_API_KEY,
184
216
  },
185
217
  callbacks: {
186
218
  onProgress: (event) => {
@@ -189,9 +221,10 @@ const result = await prdService.parsePRD({
189
221
  },
190
222
  });
191
223
 
192
- console.log(`Created ${result.tasks.length} tasks from PRD`);
193
- result.tasks.forEach((task) => {
194
- console.log(`- ${task.title}`);
224
+ console.log(`Refined PRD with ${result.questions.length} questions`);
225
+ result.questions.forEach((q, i) => {
226
+ console.log(`Q${i + 1}: ${q}`);
227
+ console.log(`A${i + 1}: ${result.answers[q]}`);
195
228
  });
196
229
  ```
197
230
 
@@ -227,6 +260,13 @@ import type {
227
260
  CreateTaskOptions,
228
261
  PRDParseResult,
229
262
  TaskAIMetadata,
263
+ // Workflow types
264
+ WorkflowService,
265
+ InitializeResult,
266
+ DefinePRDResult,
267
+ RefinePRDResult,
268
+ GenerateTasksResult,
269
+ SplitTasksResult,
230
270
  } from "task-o-matic";
231
271
  ```
232
272
 
@@ -316,9 +356,14 @@ task-o-matic workflow --stream
316
356
 
317
357
  1. **Project Initialization** - Choose quick start, custom, or AI-assisted configuration
318
358
  2. **PRD Definition** - Upload file, write manually, or use AI to generate from description
319
- 3. **PRD Refinement** - Optional AI-assisted improvements
320
- 4. **Task Generation** - Parse PRD into actionable tasks
321
- 5. **Task Splitting** - Break down complex tasks into subtasks
359
+ 3. **PRD Question/Refine** (NEW) - AI generates clarifying questions and refines PRD
360
+ - User can answer questions interactively
361
+ - OR AI can answer with PRD + stack context
362
+ - Optional: Use different AI model for answering (e.g., smarter model)
363
+ - Optional: Enable reasoning for better answers
364
+ 4. **PRD Manual Refinement** - Optional additional AI-assisted improvements
365
+ 5. **Task Generation** - Parse PRD into actionable tasks
366
+ 6. **Task Splitting** - Break down complex tasks into subtasks
322
367
 
323
368
  **AI Assistance at Every Step:**
324
369
 
@@ -395,7 +440,25 @@ task-o-matic tasks create --title "Add payment system" --ai-enhance --stream
395
440
  task-o-matic tasks split --task-id <task-id>
396
441
  ```
397
442
 
398
- ### Workflow 3: Project Bootstrapping
443
+ ### Workflow 3: Benchmarking Models
444
+
445
+ Compare different AI models for performance, cost, and quality.
446
+
447
+ ```bash
448
+ # 1. Run a benchmark for PRD parsing
449
+ task-o-matic benchmark run prd-parse \
450
+ --file requirements.md \
451
+ --models "openai:gpt-4o,openrouter:anthropic/claude-3.5-sonnet" \
452
+ --concurrency 5
453
+
454
+ # 2. Compare results
455
+ task-o-matic benchmark compare <run-id>
456
+
457
+ # 3. View detailed metrics (Tokens, BPS, Size)
458
+ task-o-matic benchmark show <run-id>
459
+ ```
460
+
461
+ ### Workflow 4: Project Bootstrapping
399
462
 
400
463
  ```bash
401
464
  # Option 1: One-step setup (recommended)
@@ -0,0 +1,3 @@
1
+ import { Command } from "commander";
2
+ export declare const benchmarkCommand: Command;
3
+ //# sourceMappingURL=benchmark.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/commands/benchmark.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAKpC,eAAO,MAAM,gBAAgB,SAE5B,CAAC"}
@@ -0,0 +1,227 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.benchmarkCommand = void 0;
7
+ const commander_1 = require("commander");
8
+ const chalk_1 = __importDefault(require("chalk"));
9
+ const benchmark_1 = require("../services/benchmark");
10
+ exports.benchmarkCommand = new commander_1.Command("benchmark").description("Run and manage AI benchmarks");
11
+ // Helper to parse model string
12
+ // Format: provider:model[:reasoning=<tokens>]
13
+ function parseModelString(modelStr) {
14
+ const parts = modelStr.split(":");
15
+ if (parts.length < 2) {
16
+ throw new Error(`Invalid model format: ${modelStr}. Expected provider:model[:reasoning=<tokens>]`);
17
+ }
18
+ const provider = parts[0];
19
+ const model = parts[1];
20
+ let reasoningTokens;
21
+ if (parts.length > 2) {
22
+ const extra = parts[2];
23
+ if (extra.startsWith("reasoning=")) {
24
+ reasoningTokens = parseInt(extra.split("=")[1], 10);
25
+ }
26
+ }
27
+ return { provider, model, reasoningTokens };
28
+ }
29
+ exports.benchmarkCommand
30
+ .command("run")
31
+ .description("Run a benchmark operation")
32
+ .argument("<operation>", "Operation to benchmark (e.g., prd-parse, task-breakdown)")
33
+ .requiredOption("--models <list>", "Comma-separated list of models (provider:model[:reasoning=<tokens>])")
34
+ .option("--file <path>", "Input file path (for PRD ops)")
35
+ .option("--task-id <id>", "Task ID (for Task ops)")
36
+ .option("--concurrency <number>", "Max concurrent requests", "5")
37
+ .option("--delay <number>", "Delay between requests in ms", "250")
38
+ .option("--prompt <prompt>", "Override prompt")
39
+ .option("--message <message>", "User message")
40
+ .option("--tools", "Enable filesystem tools")
41
+ .option("--feedback <feedback>", "Feedback (for rework)")
42
+ .action(async (operation, options) => {
43
+ try {
44
+ const modelStrings = options.models.split(",");
45
+ const models = modelStrings.map((s) => parseModelString(s.trim()));
46
+ const config = {
47
+ models,
48
+ concurrency: parseInt(options.concurrency, 10),
49
+ delay: parseInt(options.delay, 10),
50
+ };
51
+ console.log(chalk_1.default.blue(`Starting benchmark for ${operation}...`));
52
+ console.log(chalk_1.default.dim(`Models: ${models.length}, Concurrency: ${config.concurrency}, Delay: ${config.delay}ms`));
53
+ // Construct input object with all potential options
54
+ const input = {
55
+ file: options.file,
56
+ taskId: options.taskId,
57
+ prompt: options.prompt,
58
+ message: options.message,
59
+ tools: options.tools,
60
+ feedback: options.feedback,
61
+ workingDirectory: process.cwd(), // Always pass current working directory
62
+ };
63
+ // Prepare dashboard
64
+ console.log(chalk_1.default.bold("\nBenchmark Progress:"));
65
+ const modelMap = new Map();
66
+ const modelStatus = new Map();
67
+ // Print initial lines and map indices
68
+ models.forEach((m, i) => {
69
+ const id = `${m.provider}:${m.model}${m.reasoningTokens ? `:reasoning=${m.reasoningTokens}` : ""}`;
70
+ modelMap.set(id, i);
71
+ modelStatus.set(id, "Waiting...");
72
+ console.log(chalk_1.default.dim(`- ${id}: Waiting...`));
73
+ });
74
+ const totalModels = models.length;
75
+ const run = await benchmark_1.benchmarkService.runBenchmark(operation, input, config, (event) => {
76
+ const index = modelMap.get(event.modelId);
77
+ if (index === undefined)
78
+ return;
79
+ // Update status in memory
80
+ let statusStr = "";
81
+ if (event.type === "start") {
82
+ statusStr = chalk_1.default.yellow("Starting...");
83
+ }
84
+ else if (event.type === "progress") {
85
+ const bps = event.currentBps ? `${event.currentBps} B/s` : "0 B/s";
86
+ const size = event.currentSize ? `${event.currentSize} B` : "0 B";
87
+ statusStr = `${chalk_1.default.blue("Running")} - Size: ${size}, Speed: ${bps}`;
88
+ }
89
+ else if (event.type === "complete") {
90
+ statusStr = chalk_1.default.green(`Completed (${event.duration}ms)`);
91
+ }
92
+ else if (event.type === "error") {
93
+ statusStr = chalk_1.default.red(`Failed: ${event.error}`);
94
+ }
95
+ modelStatus.set(event.modelId, statusStr);
96
+ // Update display
97
+ // Move cursor up to the specific line
98
+ // Distance from bottom = totalModels - index
99
+ const up = totalModels - index;
100
+ process.stdout.write(`\x1B[${up}A`); // Move up
101
+ process.stdout.write(`\x1B[2K`); // Clear line
102
+ process.stdout.write(`- ${chalk_1.default.bold(event.modelId)}: ${statusStr}\r`);
103
+ process.stdout.write(`\x1B[${up}B`); // Move down
104
+ });
105
+ console.log(chalk_1.default.green(`\nāœ“ Benchmark completed! Run ID: ${run.id}`));
106
+ console.log(chalk_1.default.bold(`\n${"Model".padEnd(40)} | ${"Duration".padEnd(10)} | ${"TTFT".padEnd(8)} | ${"Tokens".padEnd(10)} | ${"TPS".padEnd(8)} | ${"BPS".padEnd(8)} | ${"Size".padEnd(10)} | ${"Cost".padEnd(10)}`));
107
+ console.log("-".repeat(130)); // Adjusted line length for new columns
108
+ run.results.forEach((r) => {
109
+ const duration = `${r.duration}ms`.padEnd(10);
110
+ const ttft = r.timeToFirstToken
111
+ ? `${r.timeToFirstToken}ms`.padEnd(8)
112
+ : "-".padEnd(8);
113
+ const tokens = r.tokenUsage
114
+ ? `${r.tokenUsage.total}`.padEnd(10)
115
+ : "-".padEnd(10);
116
+ const tps = r.tps ? `${r.tps}`.padEnd(8) : "-".padEnd(8);
117
+ const bps = r.bps ? `${r.bps}`.padEnd(8) : "-".padEnd(8);
118
+ const size = r.responseSize
119
+ ? `${r.responseSize}`.padEnd(10)
120
+ : "-".padEnd(10);
121
+ const cost = r.cost
122
+ ? `$${r.cost.toFixed(6)}`.padEnd(10)
123
+ : "-".padEnd(10);
124
+ console.log(`${r.modelId.padEnd(40)} | ${duration} | ${ttft} | ${tokens} | ${tps} | ${bps} | ${size} | ${cost}`);
125
+ if (r.error) {
126
+ console.log(chalk_1.default.red(` Error: ${r.error}`));
127
+ }
128
+ });
129
+ }
130
+ catch (error) {
131
+ console.error(chalk_1.default.red("Benchmark failed:"), error.message);
132
+ process.exit(1);
133
+ }
134
+ });
135
+ exports.benchmarkCommand
136
+ .command("list")
137
+ .description("List past benchmark runs")
138
+ .action(() => {
139
+ const runs = benchmark_1.benchmarkService.listRuns();
140
+ if (runs.length === 0) {
141
+ console.log(chalk_1.default.yellow("No benchmark runs found."));
142
+ return;
143
+ }
144
+ console.log(chalk_1.default.bold("Benchmark Runs:"));
145
+ runs.forEach((run) => {
146
+ const date = new Date(run.timestamp).toLocaleString();
147
+ console.log(`- ${chalk_1.default.cyan(run.id)} (${date}) - ${run.command}`);
148
+ });
149
+ });
150
+ exports.benchmarkCommand
151
+ .command("show")
152
+ .description("Show details of a benchmark run")
153
+ .argument("<id>", "Run ID")
154
+ .action((id) => {
155
+ const run = benchmark_1.benchmarkService.getRun(id);
156
+ if (!run) {
157
+ console.error(chalk_1.default.red(`Run ${id} not found`));
158
+ process.exit(1);
159
+ }
160
+ console.log(chalk_1.default.bold(`Run: ${run.id}`));
161
+ console.log(`Date: ${new Date(run.timestamp).toLocaleString()}`);
162
+ console.log(`Command: ${run.command}`);
163
+ console.log(`Input: ${JSON.stringify(run.input, null, 2)}`); // Might be large
164
+ console.log(chalk_1.default.bold("\nConfiguration:"));
165
+ console.log(`Concurrency: ${run.config.concurrency}`);
166
+ console.log(`Delay: ${run.config.delay}ms`);
167
+ console.log(chalk_1.default.bold("\nResults:"));
168
+ const results = run.results;
169
+ results.forEach((result) => {
170
+ console.log(chalk_1.default.bold(`\n[${result.modelId}]`));
171
+ console.log(`Duration: ${result.duration}ms`);
172
+ if (result.timeToFirstToken) {
173
+ console.log(`TTFT: ${result.timeToFirstToken}ms`);
174
+ }
175
+ if (result.tokenUsage) {
176
+ console.log(`Tokens: ${result.tokenUsage.total} (Prompt: ${result.tokenUsage.prompt}, Completion: ${result.tokenUsage.completion})`);
177
+ }
178
+ if (result.bps) {
179
+ console.log(`Throughput: ${result.bps} B/s`);
180
+ }
181
+ if (result.responseSize) {
182
+ console.log(`Size: ${result.responseSize} bytes`);
183
+ }
184
+ if (result.cost) {
185
+ console.log(`Estimated Cost: $${result.cost.toFixed(6)}`);
186
+ }
187
+ if (result.error) {
188
+ console.log(chalk_1.default.red(`Error: ${result.error}`));
189
+ }
190
+ else {
191
+ const outputStr = typeof result.output === "string"
192
+ ? result.output
193
+ : JSON.stringify(result.output, null, 2);
194
+ const preview = outputStr.length > 500
195
+ ? outputStr.substring(0, 500) + "..."
196
+ : outputStr;
197
+ console.log(`Output: ${preview}`);
198
+ }
199
+ });
200
+ });
201
+ exports.benchmarkCommand
202
+ .command("compare")
203
+ .description("Compare results of a benchmark run")
204
+ .argument("<id>", "Run ID")
205
+ .action((id) => {
206
+ const run = benchmark_1.benchmarkService.getRun(id);
207
+ if (!run) {
208
+ console.error(chalk_1.default.red(`Run ${id} not found`));
209
+ process.exit(1);
210
+ }
211
+ console.log(chalk_1.default.bold(`Comparison for Run: ${run.id}`));
212
+ // Simple comparison: Duration and Success/Fail
213
+ // In future could add diffing of outputs
214
+ const table = run.results.map((res) => ({
215
+ Model: res.modelId,
216
+ Status: res.error ? "FAILED" : "SUCCESS",
217
+ Duration: `${res.duration}ms`,
218
+ Tokens: res.tokenUsage ? res.tokenUsage.total : "?",
219
+ BPS: res.bps ? res.bps : "?",
220
+ Size: res.responseSize
221
+ ? res.responseSize
222
+ : res.output
223
+ ? JSON.stringify(res.output).length
224
+ : 0,
225
+ }));
226
+ console.table(table);
227
+ });
@@ -1 +1 @@
1
- {"version":3,"file":"prd.d.ts","sourceRoot":"","sources":["../../src/commands/prd.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAMpC,eAAO,MAAM,UAAU,SAAmE,CAAC"}
1
+ {"version":3,"file":"prd.d.ts","sourceRoot":"","sources":["../../src/commands/prd.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,eAAO,MAAM,UAAU,SAEtB,CAAC"}