task-o-matic 0.0.7 ā 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -23
- package/dist/commands/benchmark.d.ts +3 -0
- package/dist/commands/benchmark.d.ts.map +1 -0
- package/dist/commands/benchmark.js +227 -0
- package/dist/commands/prd.d.ts.map +1 -1
- package/dist/commands/prd.js +203 -9
- package/dist/commands/workflow.d.ts.map +1 -1
- package/dist/commands/workflow.js +452 -331
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/lib/ai-service/ai-operations.d.ts +5 -0
- package/dist/lib/ai-service/ai-operations.d.ts.map +1 -1
- package/dist/lib/ai-service/ai-operations.js +167 -0
- package/dist/lib/benchmark/registry.d.ts +11 -0
- package/dist/lib/benchmark/registry.d.ts.map +1 -0
- package/dist/lib/benchmark/registry.js +78 -0
- package/dist/lib/benchmark/runner.d.ts +6 -0
- package/dist/lib/benchmark/runner.d.ts.map +1 -0
- package/dist/lib/benchmark/runner.js +150 -0
- package/dist/lib/benchmark/storage.d.ts +13 -0
- package/dist/lib/benchmark/storage.d.ts.map +1 -0
- package/dist/lib/benchmark/storage.js +99 -0
- package/dist/lib/benchmark/types.d.ts +54 -0
- package/dist/lib/benchmark/types.d.ts.map +1 -0
- package/dist/lib/benchmark/types.js +2 -0
- package/dist/lib/index.d.ts +9 -0
- package/dist/lib/index.d.ts.map +1 -1
- package/dist/lib/index.js +7 -1
- package/dist/lib/prompt-registry.d.ts.map +1 -1
- package/dist/lib/prompt-registry.js +23 -0
- package/dist/prompts/index.d.ts +7 -6
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +1 -0
- package/dist/prompts/prd-question.d.ts +3 -0
- package/dist/prompts/prd-question.d.ts.map +1 -0
- package/dist/prompts/prd-question.js +40 -0
- package/dist/services/benchmark.d.ts +12 -0
- package/dist/services/benchmark.d.ts.map +1 -0
- package/dist/services/benchmark.js +18 -0
- package/dist/services/prd.d.ts +25 -0
- package/dist/services/prd.d.ts.map +1 -1
- package/dist/services/prd.js +188 -28
- package/dist/services/workflow.d.ts +85 -0
- package/dist/services/workflow.d.ts.map +1 -0
- package/dist/services/workflow.js +363 -0
- package/dist/types/index.d.ts +3 -0
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/options.d.ts +2 -1
- package/dist/types/options.d.ts.map +1 -1
- package/dist/types/options.js +16 -0
- package/dist/types/workflow-options.d.ts +45 -0
- package/dist/types/workflow-options.d.ts.map +1 -0
- package/dist/types/workflow-options.js +2 -0
- package/dist/types/workflow-results.d.ts +55 -0
- package/dist/types/workflow-results.d.ts.map +1 -0
- package/dist/types/workflow-results.js +2 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,6 +6,8 @@ AI-powered task management for CLI, TUI, and web applications. Parse PRDs, enhan
|
|
|
6
6
|
|
|
7
7
|
- š¤ **AI-Powered**: Parse PRDs and enhance tasks using multiple AI providers
|
|
8
8
|
- š **Interactive Workflow**: Guided setup from project init to task generation with AI assistance
|
|
9
|
+
- ā **PRD Question/Refine**: AI generates clarifying questions and can answer them automatically
|
|
10
|
+
- š§ **AI Reasoning Support**: Enable advanced reasoning for better PRD refinement
|
|
9
11
|
- š¦ **Multi-Purpose Package**: Use as CLI tool, library, or MCP server
|
|
10
12
|
- š **Project-Local Storage**: All data stored locally in `.task-o-matic/` directory
|
|
11
13
|
- šÆ **Task Management**: Full CRUD operations with AI enhancement
|
|
@@ -14,6 +16,7 @@ AI-powered task management for CLI, TUI, and web applications. Parse PRDs, enhan
|
|
|
14
16
|
- š§ **Multi-Provider AI**: Support for OpenAI, Anthropic, OpenRouter, and custom providers
|
|
15
17
|
- š **Smart Breakdown**: AI-powered task decomposition into subtasks
|
|
16
18
|
- š **Real-time Streaming**: Watch AI responses generate live with streaming output
|
|
19
|
+
- š **Model Benchmarking**: Compare performance and quality across different AI models
|
|
17
20
|
- š **Single-Project Focus**: Self-contained within each project directory
|
|
18
21
|
- š» **Framework-Agnostic**: Easily integrate into TUI, web apps, or any Node.js project
|
|
19
22
|
|
|
@@ -55,14 +58,14 @@ task-o-matic/
|
|
|
55
58
|
āāā dist/ # Compiled output (published)
|
|
56
59
|
ā āāā lib/ # Library entry point + core exports
|
|
57
60
|
ā āāā cli/ # CLI binary
|
|
58
|
-
ā āāā services/ # Business logic layer
|
|
61
|
+
ā āāā services/ # Business logic layer (WorkflowService, PRDService, TaskService)
|
|
59
62
|
ā āāā commands/ # CLI commands
|
|
60
63
|
ā āāā mcp/ # MCP server
|
|
61
64
|
ā āāā types/ # TypeScript definitions
|
|
62
65
|
āāā src/
|
|
63
66
|
ā āāā lib/ # Core library (Storage, Config, AI, etc.)
|
|
64
67
|
ā ā āāā index.ts # Main library exports
|
|
65
|
-
ā āāā services/ #
|
|
68
|
+
ā āāā services/ # WorkflowService, PRDService, TaskService (framework-agnostic)
|
|
66
69
|
ā āāā cli/ # CLI-specific logic
|
|
67
70
|
ā ā āāā bin.ts # CLI binary entry point
|
|
68
71
|
ā āāā commands/ # Commander.js command implementations
|
|
@@ -75,8 +78,8 @@ task-o-matic/
|
|
|
75
78
|
|
|
76
79
|
### Core Components
|
|
77
80
|
|
|
78
|
-
- **Service Layer** (`
|
|
79
|
-
- **AI Service**: Uses Vercel AI SDK for multi-provider support
|
|
81
|
+
- **Service Layer** (`WorkflowService`, `PRDService`, `TaskService`): Framework-agnostic business logic
|
|
82
|
+
- **AI Service**: Uses Vercel AI SDK for multi-provider support with reasoning capabilities
|
|
80
83
|
- **Local Storage**: JSON-based file storage in `.task-o-matic/` directory
|
|
81
84
|
- **Configuration**: Project-local config with AI provider settings
|
|
82
85
|
- **Prompt Templates**: Structured AI prompts for consistent results
|
|
@@ -104,24 +107,45 @@ npm install task-o-matic
|
|
|
104
107
|
|
|
105
108
|
```typescript
|
|
106
109
|
import {
|
|
110
|
+
WorkflowService,
|
|
107
111
|
TaskService,
|
|
108
112
|
PRDService,
|
|
109
113
|
type Task,
|
|
110
114
|
type AIConfig,
|
|
111
115
|
} from "task-o-matic";
|
|
112
116
|
|
|
113
|
-
//
|
|
117
|
+
// Use the workflow service for complete project setup
|
|
118
|
+
const workflowService = new WorkflowService();
|
|
119
|
+
|
|
120
|
+
const result = await workflowService.initializeProject({
|
|
121
|
+
projectName: "my-app",
|
|
122
|
+
initMethod: "quick",
|
|
123
|
+
bootstrap: true,
|
|
124
|
+
aiOptions: {
|
|
125
|
+
aiProvider: "anthropic",
|
|
126
|
+
aiModel: "claude-3-5-sonnet",
|
|
127
|
+
aiKey: process.env.ANTHROPIC_API_KEY,
|
|
128
|
+
},
|
|
129
|
+
callbacks: {
|
|
130
|
+
onProgress: (event) => {
|
|
131
|
+
console.log(`Progress: ${event.message}`);
|
|
132
|
+
},
|
|
133
|
+
},
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
console.log("Project initialized:", result.projectName);
|
|
137
|
+
|
|
138
|
+
// Or use task service directly
|
|
114
139
|
const taskService = new TaskService();
|
|
115
140
|
|
|
116
|
-
|
|
117
|
-
const result = await taskService.createTask({
|
|
141
|
+
const taskResult = await taskService.createTask({
|
|
118
142
|
title: "Implement user authentication",
|
|
119
143
|
content: "Add login and signup functionality",
|
|
120
144
|
aiEnhance: true,
|
|
121
145
|
aiOptions: {
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
146
|
+
aiProvider: "anthropic",
|
|
147
|
+
aiModel: "claude-3-5-sonnet",
|
|
148
|
+
aiKey: process.env.ANTHROPIC_API_KEY,
|
|
125
149
|
},
|
|
126
150
|
callbacks: {
|
|
127
151
|
onProgress: (event) => {
|
|
@@ -130,7 +154,7 @@ const result = await taskService.createTask({
|
|
|
130
154
|
},
|
|
131
155
|
});
|
|
132
156
|
|
|
133
|
-
console.log("Task created:",
|
|
157
|
+
console.log("Task created:", taskResult.task);
|
|
134
158
|
```
|
|
135
159
|
|
|
136
160
|
#### TUI Integration Example
|
|
@@ -167,20 +191,28 @@ const result = await taskService.createTask({
|
|
|
167
191
|
});
|
|
168
192
|
```
|
|
169
193
|
|
|
170
|
-
#### PRD
|
|
194
|
+
#### PRD Question/Refine Example
|
|
171
195
|
|
|
172
196
|
```typescript
|
|
173
197
|
import { PRDService } from "task-o-matic";
|
|
174
198
|
|
|
175
199
|
const prdService = new PRDService();
|
|
176
200
|
|
|
177
|
-
|
|
201
|
+
// Generate questions and refine PRD with AI answering
|
|
202
|
+
const result = await prdService.refinePRDWithQuestions({
|
|
178
203
|
file: "./requirements.md",
|
|
204
|
+
questionMode: "ai", // or "user" for interactive
|
|
205
|
+
questionAIOptions: {
|
|
206
|
+
// Optional: use a different AI for answering
|
|
207
|
+
aiProvider: "openrouter",
|
|
208
|
+
aiModel: "anthropic/claude-3-opus",
|
|
209
|
+
aiReasoning: "enabled", // Enable reasoning for better answers
|
|
210
|
+
},
|
|
179
211
|
workingDirectory: process.cwd(),
|
|
180
212
|
aiOptions: {
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
213
|
+
aiProvider: "anthropic",
|
|
214
|
+
aiModel: "claude-3-5-sonnet",
|
|
215
|
+
aiKey: process.env.ANTHROPIC_API_KEY,
|
|
184
216
|
},
|
|
185
217
|
callbacks: {
|
|
186
218
|
onProgress: (event) => {
|
|
@@ -189,9 +221,10 @@ const result = await prdService.parsePRD({
|
|
|
189
221
|
},
|
|
190
222
|
});
|
|
191
223
|
|
|
192
|
-
console.log(`
|
|
193
|
-
result.
|
|
194
|
-
console.log(
|
|
224
|
+
console.log(`Refined PRD with ${result.questions.length} questions`);
|
|
225
|
+
result.questions.forEach((q, i) => {
|
|
226
|
+
console.log(`Q${i + 1}: ${q}`);
|
|
227
|
+
console.log(`A${i + 1}: ${result.answers[q]}`);
|
|
195
228
|
});
|
|
196
229
|
```
|
|
197
230
|
|
|
@@ -227,6 +260,13 @@ import type {
|
|
|
227
260
|
CreateTaskOptions,
|
|
228
261
|
PRDParseResult,
|
|
229
262
|
TaskAIMetadata,
|
|
263
|
+
// Workflow types
|
|
264
|
+
WorkflowService,
|
|
265
|
+
InitializeResult,
|
|
266
|
+
DefinePRDResult,
|
|
267
|
+
RefinePRDResult,
|
|
268
|
+
GenerateTasksResult,
|
|
269
|
+
SplitTasksResult,
|
|
230
270
|
} from "task-o-matic";
|
|
231
271
|
```
|
|
232
272
|
|
|
@@ -316,9 +356,14 @@ task-o-matic workflow --stream
|
|
|
316
356
|
|
|
317
357
|
1. **Project Initialization** - Choose quick start, custom, or AI-assisted configuration
|
|
318
358
|
2. **PRD Definition** - Upload file, write manually, or use AI to generate from description
|
|
319
|
-
3. **PRD
|
|
320
|
-
|
|
321
|
-
|
|
359
|
+
3. **PRD Question/Refine** (NEW) - AI generates clarifying questions and refines PRD
|
|
360
|
+
- User can answer questions interactively
|
|
361
|
+
- OR AI can answer with PRD + stack context
|
|
362
|
+
- Optional: Use different AI model for answering (e.g., smarter model)
|
|
363
|
+
- Optional: Enable reasoning for better answers
|
|
364
|
+
4. **PRD Manual Refinement** - Optional additional AI-assisted improvements
|
|
365
|
+
5. **Task Generation** - Parse PRD into actionable tasks
|
|
366
|
+
6. **Task Splitting** - Break down complex tasks into subtasks
|
|
322
367
|
|
|
323
368
|
**AI Assistance at Every Step:**
|
|
324
369
|
|
|
@@ -395,7 +440,25 @@ task-o-matic tasks create --title "Add payment system" --ai-enhance --stream
|
|
|
395
440
|
task-o-matic tasks split --task-id <task-id>
|
|
396
441
|
```
|
|
397
442
|
|
|
398
|
-
### Workflow 3:
|
|
443
|
+
### Workflow 3: Benchmarking Models
|
|
444
|
+
|
|
445
|
+
Compare different AI models for performance, cost, and quality.
|
|
446
|
+
|
|
447
|
+
```bash
|
|
448
|
+
# 1. Run a benchmark for PRD parsing
|
|
449
|
+
task-o-matic benchmark run prd-parse \
|
|
450
|
+
--file requirements.md \
|
|
451
|
+
--models "openai:gpt-4o,openrouter:anthropic/claude-3.5-sonnet" \
|
|
452
|
+
--concurrency 5
|
|
453
|
+
|
|
454
|
+
# 2. Compare results
|
|
455
|
+
task-o-matic benchmark compare <run-id>
|
|
456
|
+
|
|
457
|
+
# 3. View detailed metrics (Tokens, BPS, Size)
|
|
458
|
+
task-o-matic benchmark show <run-id>
|
|
459
|
+
```
|
|
460
|
+
|
|
461
|
+
### Workflow 4: Project Bootstrapping
|
|
399
462
|
|
|
400
463
|
```bash
|
|
401
464
|
# Option 1: One-step setup (recommended)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/commands/benchmark.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAKpC,eAAO,MAAM,gBAAgB,SAE5B,CAAC"}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.benchmarkCommand = void 0;
|
|
7
|
+
const commander_1 = require("commander");
|
|
8
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
9
|
+
const benchmark_1 = require("../services/benchmark");
|
|
10
|
+
exports.benchmarkCommand = new commander_1.Command("benchmark").description("Run and manage AI benchmarks");
|
|
11
|
+
// Helper to parse model string
|
|
12
|
+
// Format: provider:model[:reasoning=<tokens>]
|
|
13
|
+
function parseModelString(modelStr) {
|
|
14
|
+
const parts = modelStr.split(":");
|
|
15
|
+
if (parts.length < 2) {
|
|
16
|
+
throw new Error(`Invalid model format: ${modelStr}. Expected provider:model[:reasoning=<tokens>]`);
|
|
17
|
+
}
|
|
18
|
+
const provider = parts[0];
|
|
19
|
+
const model = parts[1];
|
|
20
|
+
let reasoningTokens;
|
|
21
|
+
if (parts.length > 2) {
|
|
22
|
+
const extra = parts[2];
|
|
23
|
+
if (extra.startsWith("reasoning=")) {
|
|
24
|
+
reasoningTokens = parseInt(extra.split("=")[1], 10);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return { provider, model, reasoningTokens };
|
|
28
|
+
}
|
|
29
|
+
exports.benchmarkCommand
|
|
30
|
+
.command("run")
|
|
31
|
+
.description("Run a benchmark operation")
|
|
32
|
+
.argument("<operation>", "Operation to benchmark (e.g., prd-parse, task-breakdown)")
|
|
33
|
+
.requiredOption("--models <list>", "Comma-separated list of models (provider:model[:reasoning=<tokens>])")
|
|
34
|
+
.option("--file <path>", "Input file path (for PRD ops)")
|
|
35
|
+
.option("--task-id <id>", "Task ID (for Task ops)")
|
|
36
|
+
.option("--concurrency <number>", "Max concurrent requests", "5")
|
|
37
|
+
.option("--delay <number>", "Delay between requests in ms", "250")
|
|
38
|
+
.option("--prompt <prompt>", "Override prompt")
|
|
39
|
+
.option("--message <message>", "User message")
|
|
40
|
+
.option("--tools", "Enable filesystem tools")
|
|
41
|
+
.option("--feedback <feedback>", "Feedback (for rework)")
|
|
42
|
+
.action(async (operation, options) => {
|
|
43
|
+
try {
|
|
44
|
+
const modelStrings = options.models.split(",");
|
|
45
|
+
const models = modelStrings.map((s) => parseModelString(s.trim()));
|
|
46
|
+
const config = {
|
|
47
|
+
models,
|
|
48
|
+
concurrency: parseInt(options.concurrency, 10),
|
|
49
|
+
delay: parseInt(options.delay, 10),
|
|
50
|
+
};
|
|
51
|
+
console.log(chalk_1.default.blue(`Starting benchmark for ${operation}...`));
|
|
52
|
+
console.log(chalk_1.default.dim(`Models: ${models.length}, Concurrency: ${config.concurrency}, Delay: ${config.delay}ms`));
|
|
53
|
+
// Construct input object with all potential options
|
|
54
|
+
const input = {
|
|
55
|
+
file: options.file,
|
|
56
|
+
taskId: options.taskId,
|
|
57
|
+
prompt: options.prompt,
|
|
58
|
+
message: options.message,
|
|
59
|
+
tools: options.tools,
|
|
60
|
+
feedback: options.feedback,
|
|
61
|
+
workingDirectory: process.cwd(), // Always pass current working directory
|
|
62
|
+
};
|
|
63
|
+
// Prepare dashboard
|
|
64
|
+
console.log(chalk_1.default.bold("\nBenchmark Progress:"));
|
|
65
|
+
const modelMap = new Map();
|
|
66
|
+
const modelStatus = new Map();
|
|
67
|
+
// Print initial lines and map indices
|
|
68
|
+
models.forEach((m, i) => {
|
|
69
|
+
const id = `${m.provider}:${m.model}${m.reasoningTokens ? `:reasoning=${m.reasoningTokens}` : ""}`;
|
|
70
|
+
modelMap.set(id, i);
|
|
71
|
+
modelStatus.set(id, "Waiting...");
|
|
72
|
+
console.log(chalk_1.default.dim(`- ${id}: Waiting...`));
|
|
73
|
+
});
|
|
74
|
+
const totalModels = models.length;
|
|
75
|
+
const run = await benchmark_1.benchmarkService.runBenchmark(operation, input, config, (event) => {
|
|
76
|
+
const index = modelMap.get(event.modelId);
|
|
77
|
+
if (index === undefined)
|
|
78
|
+
return;
|
|
79
|
+
// Update status in memory
|
|
80
|
+
let statusStr = "";
|
|
81
|
+
if (event.type === "start") {
|
|
82
|
+
statusStr = chalk_1.default.yellow("Starting...");
|
|
83
|
+
}
|
|
84
|
+
else if (event.type === "progress") {
|
|
85
|
+
const bps = event.currentBps ? `${event.currentBps} B/s` : "0 B/s";
|
|
86
|
+
const size = event.currentSize ? `${event.currentSize} B` : "0 B";
|
|
87
|
+
statusStr = `${chalk_1.default.blue("Running")} - Size: ${size}, Speed: ${bps}`;
|
|
88
|
+
}
|
|
89
|
+
else if (event.type === "complete") {
|
|
90
|
+
statusStr = chalk_1.default.green(`Completed (${event.duration}ms)`);
|
|
91
|
+
}
|
|
92
|
+
else if (event.type === "error") {
|
|
93
|
+
statusStr = chalk_1.default.red(`Failed: ${event.error}`);
|
|
94
|
+
}
|
|
95
|
+
modelStatus.set(event.modelId, statusStr);
|
|
96
|
+
// Update display
|
|
97
|
+
// Move cursor up to the specific line
|
|
98
|
+
// Distance from bottom = totalModels - index
|
|
99
|
+
const up = totalModels - index;
|
|
100
|
+
process.stdout.write(`\x1B[${up}A`); // Move up
|
|
101
|
+
process.stdout.write(`\x1B[2K`); // Clear line
|
|
102
|
+
process.stdout.write(`- ${chalk_1.default.bold(event.modelId)}: ${statusStr}\r`);
|
|
103
|
+
process.stdout.write(`\x1B[${up}B`); // Move down
|
|
104
|
+
});
|
|
105
|
+
console.log(chalk_1.default.green(`\nā Benchmark completed! Run ID: ${run.id}`));
|
|
106
|
+
console.log(chalk_1.default.bold(`\n${"Model".padEnd(40)} | ${"Duration".padEnd(10)} | ${"TTFT".padEnd(8)} | ${"Tokens".padEnd(10)} | ${"TPS".padEnd(8)} | ${"BPS".padEnd(8)} | ${"Size".padEnd(10)} | ${"Cost".padEnd(10)}`));
|
|
107
|
+
console.log("-".repeat(130)); // Adjusted line length for new columns
|
|
108
|
+
run.results.forEach((r) => {
|
|
109
|
+
const duration = `${r.duration}ms`.padEnd(10);
|
|
110
|
+
const ttft = r.timeToFirstToken
|
|
111
|
+
? `${r.timeToFirstToken}ms`.padEnd(8)
|
|
112
|
+
: "-".padEnd(8);
|
|
113
|
+
const tokens = r.tokenUsage
|
|
114
|
+
? `${r.tokenUsage.total}`.padEnd(10)
|
|
115
|
+
: "-".padEnd(10);
|
|
116
|
+
const tps = r.tps ? `${r.tps}`.padEnd(8) : "-".padEnd(8);
|
|
117
|
+
const bps = r.bps ? `${r.bps}`.padEnd(8) : "-".padEnd(8);
|
|
118
|
+
const size = r.responseSize
|
|
119
|
+
? `${r.responseSize}`.padEnd(10)
|
|
120
|
+
: "-".padEnd(10);
|
|
121
|
+
const cost = r.cost
|
|
122
|
+
? `$${r.cost.toFixed(6)}`.padEnd(10)
|
|
123
|
+
: "-".padEnd(10);
|
|
124
|
+
console.log(`${r.modelId.padEnd(40)} | ${duration} | ${ttft} | ${tokens} | ${tps} | ${bps} | ${size} | ${cost}`);
|
|
125
|
+
if (r.error) {
|
|
126
|
+
console.log(chalk_1.default.red(` Error: ${r.error}`));
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
catch (error) {
|
|
131
|
+
console.error(chalk_1.default.red("Benchmark failed:"), error.message);
|
|
132
|
+
process.exit(1);
|
|
133
|
+
}
|
|
134
|
+
});
|
|
135
|
+
exports.benchmarkCommand
|
|
136
|
+
.command("list")
|
|
137
|
+
.description("List past benchmark runs")
|
|
138
|
+
.action(() => {
|
|
139
|
+
const runs = benchmark_1.benchmarkService.listRuns();
|
|
140
|
+
if (runs.length === 0) {
|
|
141
|
+
console.log(chalk_1.default.yellow("No benchmark runs found."));
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
console.log(chalk_1.default.bold("Benchmark Runs:"));
|
|
145
|
+
runs.forEach((run) => {
|
|
146
|
+
const date = new Date(run.timestamp).toLocaleString();
|
|
147
|
+
console.log(`- ${chalk_1.default.cyan(run.id)} (${date}) - ${run.command}`);
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
exports.benchmarkCommand
|
|
151
|
+
.command("show")
|
|
152
|
+
.description("Show details of a benchmark run")
|
|
153
|
+
.argument("<id>", "Run ID")
|
|
154
|
+
.action((id) => {
|
|
155
|
+
const run = benchmark_1.benchmarkService.getRun(id);
|
|
156
|
+
if (!run) {
|
|
157
|
+
console.error(chalk_1.default.red(`Run ${id} not found`));
|
|
158
|
+
process.exit(1);
|
|
159
|
+
}
|
|
160
|
+
console.log(chalk_1.default.bold(`Run: ${run.id}`));
|
|
161
|
+
console.log(`Date: ${new Date(run.timestamp).toLocaleString()}`);
|
|
162
|
+
console.log(`Command: ${run.command}`);
|
|
163
|
+
console.log(`Input: ${JSON.stringify(run.input, null, 2)}`); // Might be large
|
|
164
|
+
console.log(chalk_1.default.bold("\nConfiguration:"));
|
|
165
|
+
console.log(`Concurrency: ${run.config.concurrency}`);
|
|
166
|
+
console.log(`Delay: ${run.config.delay}ms`);
|
|
167
|
+
console.log(chalk_1.default.bold("\nResults:"));
|
|
168
|
+
const results = run.results;
|
|
169
|
+
results.forEach((result) => {
|
|
170
|
+
console.log(chalk_1.default.bold(`\n[${result.modelId}]`));
|
|
171
|
+
console.log(`Duration: ${result.duration}ms`);
|
|
172
|
+
if (result.timeToFirstToken) {
|
|
173
|
+
console.log(`TTFT: ${result.timeToFirstToken}ms`);
|
|
174
|
+
}
|
|
175
|
+
if (result.tokenUsage) {
|
|
176
|
+
console.log(`Tokens: ${result.tokenUsage.total} (Prompt: ${result.tokenUsage.prompt}, Completion: ${result.tokenUsage.completion})`);
|
|
177
|
+
}
|
|
178
|
+
if (result.bps) {
|
|
179
|
+
console.log(`Throughput: ${result.bps} B/s`);
|
|
180
|
+
}
|
|
181
|
+
if (result.responseSize) {
|
|
182
|
+
console.log(`Size: ${result.responseSize} bytes`);
|
|
183
|
+
}
|
|
184
|
+
if (result.cost) {
|
|
185
|
+
console.log(`Estimated Cost: $${result.cost.toFixed(6)}`);
|
|
186
|
+
}
|
|
187
|
+
if (result.error) {
|
|
188
|
+
console.log(chalk_1.default.red(`Error: ${result.error}`));
|
|
189
|
+
}
|
|
190
|
+
else {
|
|
191
|
+
const outputStr = typeof result.output === "string"
|
|
192
|
+
? result.output
|
|
193
|
+
: JSON.stringify(result.output, null, 2);
|
|
194
|
+
const preview = outputStr.length > 500
|
|
195
|
+
? outputStr.substring(0, 500) + "..."
|
|
196
|
+
: outputStr;
|
|
197
|
+
console.log(`Output: ${preview}`);
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
});
|
|
201
|
+
exports.benchmarkCommand
|
|
202
|
+
.command("compare")
|
|
203
|
+
.description("Compare results of a benchmark run")
|
|
204
|
+
.argument("<id>", "Run ID")
|
|
205
|
+
.action((id) => {
|
|
206
|
+
const run = benchmark_1.benchmarkService.getRun(id);
|
|
207
|
+
if (!run) {
|
|
208
|
+
console.error(chalk_1.default.red(`Run ${id} not found`));
|
|
209
|
+
process.exit(1);
|
|
210
|
+
}
|
|
211
|
+
console.log(chalk_1.default.bold(`Comparison for Run: ${run.id}`));
|
|
212
|
+
// Simple comparison: Duration and Success/Fail
|
|
213
|
+
// In future could add diffing of outputs
|
|
214
|
+
const table = run.results.map((res) => ({
|
|
215
|
+
Model: res.modelId,
|
|
216
|
+
Status: res.error ? "FAILED" : "SUCCESS",
|
|
217
|
+
Duration: `${res.duration}ms`,
|
|
218
|
+
Tokens: res.tokenUsage ? res.tokenUsage.total : "?",
|
|
219
|
+
BPS: res.bps ? res.bps : "?",
|
|
220
|
+
Size: res.responseSize
|
|
221
|
+
? res.responseSize
|
|
222
|
+
: res.output
|
|
223
|
+
? JSON.stringify(res.output).length
|
|
224
|
+
: 0,
|
|
225
|
+
}));
|
|
226
|
+
console.table(table);
|
|
227
|
+
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prd.d.ts","sourceRoot":"","sources":["../../src/commands/prd.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"prd.d.ts","sourceRoot":"","sources":["../../src/commands/prd.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,eAAO,MAAM,UAAU,SAEtB,CAAC"}
|