task-o-matic 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +200 -0
  2. package/dist/commands/benchmark.d.ts.map +1 -1
  3. package/dist/commands/benchmark.js +342 -0
  4. package/dist/commands/tasks/execute-loop.d.ts +3 -0
  5. package/dist/commands/tasks/execute-loop.d.ts.map +1 -0
  6. package/dist/commands/tasks/execute-loop.js +118 -0
  7. package/dist/commands/tasks/index.d.ts +1 -0
  8. package/dist/commands/tasks/index.d.ts.map +1 -1
  9. package/dist/commands/tasks/index.js +1 -0
  10. package/dist/commands/tasks.d.ts.map +1 -1
  11. package/dist/commands/tasks.js +1 -0
  12. package/dist/commands/workflow.js +39 -0
  13. package/dist/lib/benchmark/registry.d.ts.map +1 -1
  14. package/dist/lib/benchmark/registry.js +11 -0
  15. package/dist/lib/benchmark/types.d.ts +50 -0
  16. package/dist/lib/benchmark/types.d.ts.map +1 -1
  17. package/dist/lib/task-loop-execution.d.ts +25 -0
  18. package/dist/lib/task-loop-execution.d.ts.map +1 -0
  19. package/dist/lib/task-loop-execution.js +473 -0
  20. package/dist/services/prd.d.ts.map +1 -1
  21. package/dist/services/prd.js +36 -1
  22. package/dist/services/tasks.d.ts.map +1 -1
  23. package/dist/services/tasks.js +90 -3
  24. package/dist/services/workflow-benchmark.d.ts +34 -0
  25. package/dist/services/workflow-benchmark.d.ts.map +1 -0
  26. package/dist/services/workflow-benchmark.js +317 -0
  27. package/dist/services/workflow.d.ts.map +1 -1
  28. package/dist/services/workflow.js +120 -7
  29. package/dist/test/task-loop-git.test.d.ts +2 -0
  30. package/dist/test/task-loop-git.test.d.ts.map +1 -0
  31. package/dist/test/task-loop-git.test.js +62 -0
  32. package/dist/types/index.d.ts +50 -0
  33. package/dist/types/index.d.ts.map +1 -1
  34. package/dist/types/results.d.ts +29 -1
  35. package/dist/types/results.d.ts.map +1 -1
  36. package/dist/types/workflow-results.d.ts +27 -0
  37. package/dist/types/workflow-results.d.ts.map +1 -1
  38. package/package.json +1 -1
package/README.md CHANGED
@@ -17,6 +17,7 @@ AI-powered task management for CLI, TUI, and web applications. Parse PRDs, enhan
17
17
  - šŸ“Š **Smart Breakdown**: AI-powered task decomposition into subtasks
18
18
  - 🌊 **Real-time Streaming**: Watch AI responses generate live with streaming output
19
19
  - šŸ“Š **Model Benchmarking**: Compare performance and quality across different AI models
20
+ - šŸ **Workflow Benchmarking**: Test complete workflows across multiple models and compare results
20
21
  - šŸ  **Single-Project Focus**: Self-contained within each project directory
21
22
  - šŸ’» **Framework-Agnostic**: Easily integrate into TUI, web apps, or any Node.js project
22
23
 
@@ -267,6 +268,11 @@ import type {
267
268
  RefinePRDResult,
268
269
  GenerateTasksResult,
269
270
  SplitTasksResult,
271
+ // Benchmark types
272
+ WorkflowBenchmarkInput,
273
+ WorkflowBenchmarkResult,
274
+ BenchmarkConfig,
275
+ BenchmarkResult,
270
276
  } from "task-o-matic";
271
277
  ```
272
278
 
@@ -350,6 +356,9 @@ task-o-matic workflow
350
356
 
351
357
  # With streaming AI output
352
358
  task-o-matic workflow --stream
359
+
360
+ # Want to test multiple AI models? Try workflow benchmarking:
361
+ task-o-matic benchmark workflow --models "openai:gpt-4o,anthropic:claude-3-5-sonnet"
353
362
  ```
354
363
 
355
364
  **The workflow will guide you through:**
@@ -389,6 +398,7 @@ task-o-matic workflow --stream
389
398
  - [AI Integration](docs/ai-integration.md) - AI providers and prompt engineering
390
399
  - [Project Initialization](docs/projects.md) - Project setup and bootstrapping
391
400
  - [Streaming Output](docs/streaming.md) - Real-time AI streaming capabilities
401
+ - [Model Benchmarking](docs/benchmarking.md) - Compare AI models and workflow performance
392
402
 
393
403
  ## šŸŽÆ Common Workflows
394
404
 
@@ -458,6 +468,55 @@ task-o-matic benchmark compare <run-id>
458
468
  task-o-matic benchmark show <run-id>
459
469
  ```
460
470
 
471
+ ### Workflow 3b: Complete Workflow Benchmarking
472
+
473
+ Test entire workflows across multiple AI models and automatically set up your project with the best results.
474
+
475
+ ```bash
476
+ # 1. Basic workflow benchmark with interactive setup
477
+ task-o-matic benchmark workflow \
478
+ --models "openai:gpt-4o,anthropic:claude-3-5-sonnet,openrouter:qwen/qwen-2.5-72b-instruct" \
479
+ --concurrency 2 \
480
+ --delay 1000
481
+
482
+ # 2. Automated workflow benchmark
483
+ task-o-matic benchmark workflow \
484
+ --models "openai:gpt-4o,anthropic:claude-3-5-sonnet" \
485
+ --project-name "my-saas-app" \
486
+ --project-description "Team collaboration platform with real-time chat" \
487
+ --init-method ai \
488
+ --prd-method ai \
489
+ --auto-accept \
490
+ --skip-all
491
+
492
+ # 3. Benchmark with specific workflow options
493
+ task-o-matic benchmark workflow \
494
+ --models "openai:gpt-4o,anthropic:claude-3-5-sonnet" \
495
+ --project-name "e-commerce-app" \
496
+ --init-method custom \
497
+ --frontend next \
498
+ --backend hono \
499
+ --database postgres \
500
+ --prd-method ai \
501
+ --prd-description "Modern e-commerce platform with AI recommendations" \
502
+ --refine-feedback "Focus on scalability and security" \
503
+ --split-all
504
+
505
+ # Results include:
506
+ # - Comprehensive comparison table (duration, tasks, PRD size, costs)
507
+ # - Detailed per-model breakdowns with timing and token metrics
508
+ # - Interactive selection to choose the best model
509
+ # - Automatic project setup with selected model's results
510
+ ```
511
+
512
+ **Workflow Benchmark Features:**
513
+
514
+ - **Two-Phase Execution**: Interactive question collection, then parallel execution
515
+ - **Complete Workflow**: Project init → PRD creation → task generation → task splitting
516
+ - **Comprehensive Metrics**: Performance, cost, quality, and output comparison
517
+ - **Model Selection**: Choose the best performer and auto-setup your project
518
+ - **Identical Conditions**: All models receive the same inputs for fair comparison
519
+
461
520
  ### Workflow 4: Project Bootstrapping
462
521
 
463
522
  ```bash
@@ -477,6 +536,130 @@ task-o-matic init bootstrap my-app
477
536
  task-o-matic tasks create --title "Set up development environment" --ai-enhance --stream
478
537
  ```
479
538
 
539
+ ## šŸ“Š Benchmarking Commands
540
+
541
+ ### Basic Model Benchmarking
542
+
543
+ Compare different AI models on specific operations:
544
+
545
+ ```bash
546
+ # Benchmark PRD parsing across multiple models
547
+ task-o-matic benchmark run prd-parse \
548
+ --file requirements.md \
549
+ --models "openai:gpt-4o,anthropic:claude-3-5-sonnet,openrouter:qwen/qwen-2.5-72b-instruct" \
550
+ --concurrency 3 \
551
+ --delay 1000
552
+
553
+ # Benchmark task splitting
554
+ task-o-matic benchmark run task-breakdown \
555
+ --task-id <task-id> \
556
+ --models "openai:gpt-4o,anthropic:claude-3-5-sonnet" \
557
+ --concurrency 2
558
+
559
+ # View benchmark results
560
+ task-o-matic benchmark list
561
+ task-o-matic benchmark show <run-id>
562
+ task-o-matic benchmark compare <run-id>
563
+ ```
564
+
565
+ ### Complete Workflow Benchmarking
566
+
567
+ Test entire project workflows across multiple AI models:
568
+
569
+ ```bash
570
+ # Interactive workflow benchmark (recommended)
571
+ task-o-matic benchmark workflow \
572
+ --models "openai:gpt-4o,anthropic:claude-3-5-sonnet,openrouter:qwen/qwen-2.5-72b-instruct"
573
+ ```
574
+
575
+ **What happens:**
576
+ 1. **Phase 1**: You answer workflow questions once (project setup, PRD creation, etc.)
577
+ 2. **Phase 2**: All models execute the identical workflow in parallel
578
+ 3. **Results**: Comprehensive comparison table with metrics and model selection
579
+
580
+ **Full automation example:**
581
+
582
+ ```bash
583
+ task-o-matic benchmark workflow \
584
+ --models "openai:gpt-4o,anthropic:claude-3-5-sonnet" \
585
+ --project-name "my-saas-platform" \
586
+ --project-description "Team collaboration platform with real-time messaging" \
587
+ --init-method ai \
588
+ --prd-method ai \
589
+ --auto-accept \
590
+ --refine-feedback "Add more technical details and security considerations" \
591
+ --generate-instructions "Focus on MVP features and break into small tasks" \
592
+ --split-all \
593
+ --concurrency 2 \
594
+ --delay 2000
595
+ ```
596
+
597
+ **Output includes:**
598
+
599
+ ```
600
+ šŸ“Š Workflow Benchmark Results
601
+
602
+ Model | Duration | Tasks | PRD Size | Steps | Cost
603
+ ---------------------------------------- | ---------- | ----- | ---------- | ----- | ----------
604
+ openai:gpt-4o | 45234ms | 12 | 2843 chars | 5/5 | $0.023400
605
+ anthropic:claude-3-5-sonnet | 42156ms | 15 | 3021 chars | 5/5 | $0.019800
606
+
607
+ šŸ” Detailed Comparison
608
+
609
+ [1] openai:gpt-4o
610
+ Duration: 45234ms
611
+ Steps Completed: 5/5
612
+ Init: 2341ms
613
+ PRD Generation: 12456ms
614
+ Task Generation: 8234ms
615
+ Task Splitting: 3421ms
616
+ Tasks Created: 12
617
+ PRD Size: 2843 characters
618
+ Tokens: 4521 (Prompt: 2341, Completion: 2180)
619
+ Cost: $0.023400
620
+
621
+ šŸŽÆ Model Selection
622
+ Would you like to select a model and set up your project with its results? (y/N)
623
+ ```
624
+
625
+ ### Benchmark Options
626
+
627
+ All benchmark commands support:
628
+
629
+ - `--models <list>`: Comma-separated model list (required)
630
+ - `--concurrency <number>`: Max parallel requests (default: 3)
631
+ - `--delay <ms>`: Delay between requests (default: 1000ms)
632
+
633
+ **Model format:** `provider:model[:reasoning=<tokens>]`
634
+
635
+ **Examples:**
636
+ - `openai:gpt-4o`
637
+ - `anthropic:claude-3-5-sonnet`
638
+ - `openrouter:anthropic/claude-3.5-sonnet`
639
+ - `openrouter:openai/o1-preview:reasoning=50000`
640
+
641
+ ### Workflow Benchmark Inheritance
642
+
643
+ The `benchmark workflow` command supports ALL workflow command options:
644
+
645
+ ```bash
646
+ # All these workflow options work in benchmarks:
647
+ --project-name, --init-method, --project-description
648
+ --frontend, --backend, --database, --auth/--no-auth
649
+ --prd-method, --prd-file, --prd-description, --prd-content
650
+ --refine-feedback, --generate-instructions
651
+ --split-tasks, --split-all, --split-instructions
652
+ --skip-init, --skip-prd, --skip-refine, --skip-generate, --skip-split
653
+ --stream, --auto-accept, --config-file
654
+ ```
655
+
656
+ This allows you to:
657
+ - **Pre-configure workflow steps** via command-line options
658
+ - **Skip interactive questions** for automated benchmarking
659
+ - **Compare identical workflows** across different models
660
+ - **Test specific scenarios** (e.g., only AI vs only custom stack)
661
+ ```
662
+
480
663
  ## šŸ”§ Environment Variables
481
664
 
482
665
  ```bash
@@ -508,6 +691,23 @@ AI_TEMPERATURE=0.7
508
691
  - **PRD Parsing**: `claude-3.5-sonnet` or `gpt-4`
509
692
  - **Task Enhancement**: `claude-3-haiku` or `gpt-3.5-turbo`
510
693
  - **Task Breakdown**: `claude-3.5-sonnet` for complex tasks
694
+ - **Workflow Benchmarking**: Test 2-3 models to find optimal performance for your use case
695
+
696
+ ### Choosing the Right Model
697
+
698
+ Not sure which model to use? Try workflow benchmarking:
699
+
700
+ ```bash
701
+ # Test your specific workflow across multiple models
702
+ task-o-matic benchmark workflow \
703
+ --models "openai:gpt-4o,anthropic:claude-3-5-sonnet,openrouter:qwen/qwen-2.5-72b-instruct" \
704
+ --project-description "Your project description here"
705
+
706
+ # The benchmark will show you:
707
+ # - Performance (speed, tokens, cost)
708
+ # - Quality (tasks created, PRD completeness)
709
+ # - Best model for your specific needs
710
+ ```
511
711
 
512
712
  ## šŸ“ Storage Structure
513
713
 
@@ -1 +1 @@
1
- {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/commands/benchmark.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAKpC,eAAO,MAAM,gBAAgB,SAE5B,CAAC"}
1
+ {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/commands/benchmark.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAapC,eAAO,MAAM,gBAAgB,SAE5B,CAAC"}
@@ -1,4 +1,37 @@
1
1
  "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
2
35
  var __importDefault = (this && this.__importDefault) || function (mod) {
3
36
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
37
  };
@@ -7,6 +40,7 @@ exports.benchmarkCommand = void 0;
7
40
  const commander_1 = require("commander");
8
41
  const chalk_1 = __importDefault(require("chalk"));
9
42
  const benchmark_1 = require("../services/benchmark");
43
+ const workflow_prompts_1 = require("../utils/workflow-prompts");
10
44
  exports.benchmarkCommand = new commander_1.Command("benchmark").description("Run and manage AI benchmarks");
11
45
  // Helper to parse model string
12
46
  // Format: provider:model[:reasoning=<tokens>]
@@ -225,3 +259,311 @@ exports.benchmarkCommand
225
259
  }));
226
260
  console.table(table);
227
261
  });
262
+ exports.benchmarkCommand
263
+ .command("workflow")
264
+ .description("Benchmark complete workflow execution across multiple models")
265
+ .requiredOption("--models <list>", "Comma-separated list of models (provider:model[:reasoning=<tokens>])")
266
+ .option("--concurrency <number>", "Max concurrent requests", "3")
267
+ .option("--delay <number>", "Delay between requests in ms", "1000")
268
+ // Inherit all workflow command options
269
+ .option("--stream", "Show streaming AI output")
270
+ .option("--skip-all", "Skip all optional steps (use defaults)")
271
+ .option("--auto-accept", "Auto-accept all AI suggestions")
272
+ .option("--config-file <path>", "Load workflow options from JSON file")
273
+ // Step 1: Initialize
274
+ .option("--skip-init", "Skip initialization step")
275
+ .option("--project-name <name>", "Project name")
276
+ .option("--init-method <method>", "Initialization method: quick, custom, ai")
277
+ .option("--project-description <desc>", "Project description for AI-assisted init")
278
+ .option("--frontend <framework>", "Frontend framework")
279
+ .option("--backend <framework>", "Backend framework")
280
+ .option("--database <db>", "Database choice")
281
+ .option("--auth", "Include authentication")
282
+ .option("--no-auth", "Exclude authentication")
283
+ .option("--bootstrap", "Bootstrap with Better-T-Stack")
284
+ .option("--no-bootstrap", "Skip bootstrapping")
285
+ // Step 2: Define PRD
286
+ .option("--skip-prd", "Skip PRD definition")
287
+ .option("--prd-method <method>", "PRD method: upload, manual, ai, skip")
288
+ .option("--prd-file <path>", "Path to existing PRD file")
289
+ .option("--prd-description <desc>", "Product description for AI-assisted PRD")
290
+ .option("--prd-content <content>", "Direct PRD content")
291
+ // Step 3: Refine PRD
292
+ .option("--skip-refine", "Skip PRD refinement")
293
+ .option("--refine-method <method>", "Refinement method: manual, ai, skip")
294
+ .option("--refine-feedback <feedback>", "Feedback for AI refinement")
295
+ // Step 4: Generate Tasks
296
+ .option("--skip-generate", "Skip task generation")
297
+ .option("--generate-method <method>", "Generation method: standard, ai")
298
+ .option("--generate-instructions <instructions>", "Custom task generation instructions")
299
+ // Step 5: Split Tasks
300
+ .option("--skip-split", "Skip task splitting")
301
+ .option("--split-tasks <ids>", "Comma-separated task IDs to split")
302
+ .option("--split-all", "Split all tasks")
303
+ .option("--split-method <method>", "Split method: interactive, standard, custom")
304
+ .option("--split-instructions <instructions>", "Custom split instructions")
305
+ .action(async (options) => {
306
+ try {
307
+ await runWorkflowBenchmark(options);
308
+ }
309
+ catch (error) {
310
+ console.error(chalk_1.default.red("Workflow benchmark failed:"), error.message);
311
+ process.exit(1);
312
+ }
313
+ });
314
+ /**
315
+ * Execute workflow benchmark across multiple models
316
+ */
317
+ async function runWorkflowBenchmark(options) {
318
+ console.log(chalk_1.default.blue.bold("\nšŸš€ Task-O-Matic Workflow Benchmark\n"));
319
+ // Parse models
320
+ const modelStrings = options.models.split(",");
321
+ const models = modelStrings.map((s) => parseModelString(s.trim()));
322
+ const config = {
323
+ models,
324
+ concurrency: parseInt(options.concurrency, 10),
325
+ delay: parseInt(options.delay, 10),
326
+ };
327
+ console.log(chalk_1.default.dim(`Models: ${models.length}, Concurrency: ${config.concurrency}, Delay: ${config.delay}ms`));
328
+ // Phase 1: Collect user responses interactively
329
+ console.log(chalk_1.default.blue.bold("\nšŸ“‹ Phase 1: Collecting Workflow Responses\n"));
330
+ console.log(chalk_1.default.gray("Please answer the following questions. Your responses will be used for all models."));
331
+ const collectedResponses = await collectWorkflowResponses(options);
332
+ // Phase 2: Execute workflow on all models
333
+ console.log(chalk_1.default.blue.bold("\n⚔ Phase 2: Executing Workflows\n"));
334
+ console.log(chalk_1.default.gray(`Running workflow on ${models.length} models...\n`));
335
+ // Prepare workflow input
336
+ const workflowInput = {
337
+ collectedResponses,
338
+ workflowOptions: options,
339
+ tempDirBase: "/tmp",
340
+ };
341
+ // Prepare dashboard
342
+ console.log(chalk_1.default.bold("Benchmark Progress:"));
343
+ const modelMap = new Map();
344
+ const modelStatus = new Map();
345
+ // Print initial lines and map indices
346
+ models.forEach((m, i) => {
347
+ const id = `${m.provider}:${m.model}${m.reasoningTokens ? `:reasoning=${m.reasoningTokens}` : ""}`;
348
+ modelMap.set(id, i);
349
+ modelStatus.set(id, "Waiting...");
350
+ console.log(chalk_1.default.dim(`- ${id}: Waiting...`));
351
+ });
352
+ const totalModels = models.length;
353
+ const run = await benchmark_1.benchmarkService.runBenchmark("workflow-full", workflowInput, config, (event) => {
354
+ const index = modelMap.get(event.modelId);
355
+ if (index === undefined)
356
+ return;
357
+ // Update status in memory
358
+ let statusStr = "";
359
+ if (event.type === "start") {
360
+ statusStr = chalk_1.default.yellow("Starting...");
361
+ }
362
+ else if (event.type === "progress") {
363
+ statusStr = chalk_1.default.blue("Running workflow...");
364
+ }
365
+ else if (event.type === "complete") {
366
+ statusStr = chalk_1.default.green(`Completed (${event.duration}ms)`);
367
+ }
368
+ else if (event.type === "error") {
369
+ statusStr = chalk_1.default.red(`Failed: ${event.error}`);
370
+ }
371
+ modelStatus.set(event.modelId, statusStr);
372
+ // Update display
373
+ const up = totalModels - index;
374
+ process.stdout.write(`\x1B[${up}A`); // Move up
375
+ process.stdout.write(`\x1B[2K`); // Clear line
376
+ process.stdout.write(`- ${chalk_1.default.bold(event.modelId)}: ${statusStr}\r`);
377
+ process.stdout.write(`\x1B[${up}B`); // Move down
378
+ });
379
+ console.log(chalk_1.default.green(`\nāœ… Workflow benchmark completed! Run ID: ${run.id}`));
380
+ // Display results
381
+ await displayWorkflowBenchmarkResults(run);
382
+ // Optional: Let user select a model for project setup
383
+ await promptForModelSelection(run, collectedResponses);
384
+ }
385
+ /**
386
+ * Collect workflow responses from user interactively
387
+ */
388
+ async function collectWorkflowResponses(options) {
389
+ // Use provided options or prompt user
390
+ const getOrPrompt = async (preAnswered, promptFn, skipCondition = false) => {
391
+ if (skipCondition) {
392
+ throw new Error("Step skipped");
393
+ }
394
+ if (preAnswered !== undefined) {
395
+ return preAnswered;
396
+ }
397
+ return promptFn();
398
+ };
399
+ // Project setup questions
400
+ const projectName = await getOrPrompt(options.projectName, () => (0, workflow_prompts_1.textInputPrompt)("What is the name of your project?", "my-benchmark-project"));
401
+ const initMethod = await getOrPrompt(options.initMethod, () => (0, workflow_prompts_1.selectPrompt)("How would you like to configure your project stack?", [
402
+ { name: "Quick start (recommended defaults)", value: "quick" },
403
+ { name: "Custom configuration", value: "custom" },
404
+ { name: "AI-assisted (describe your project)", value: "ai" },
405
+ ]));
406
+ let projectDescription;
407
+ if (initMethod === "ai") {
408
+ projectDescription = await getOrPrompt(options.projectDescription, () => (0, workflow_prompts_1.textInputPrompt)("Describe your project (e.g., 'A SaaS app for team collaboration'):"));
409
+ }
410
+ // Stack configuration (for custom method)
411
+ let stackConfig = {};
412
+ if (initMethod === "custom") {
413
+ stackConfig.frontend = await getOrPrompt(options.frontend, () => (0, workflow_prompts_1.selectPrompt)("Frontend framework:", ["next", "react", "vue", "svelte"]));
414
+ stackConfig.backend = await getOrPrompt(options.backend, () => (0, workflow_prompts_1.selectPrompt)("Backend framework:", ["hono", "express", "fastify", "nestjs"]));
415
+ stackConfig.database = await getOrPrompt(options.database, () => (0, workflow_prompts_1.selectPrompt)("Database:", ["sqlite", "postgres", "mysql", "mongodb"]));
416
+ stackConfig.auth = await getOrPrompt(options.auth, () => (0, workflow_prompts_1.confirmPrompt)("Include authentication?", true));
417
+ }
418
+ // PRD questions
419
+ const prdMethod = await getOrPrompt(options.prdMethod, () => (0, workflow_prompts_1.selectPrompt)("How would you like to define your PRD?", [
420
+ { name: "AI-assisted creation", value: "ai" },
421
+ { name: "Upload existing file", value: "upload" },
422
+ { name: "Write manually", value: "manual" },
423
+ { name: "Skip PRD", value: "skip" },
424
+ ]));
425
+ let prdDescription;
426
+ let prdFile;
427
+ let prdContent;
428
+ if (prdMethod === "ai") {
429
+ prdDescription = await getOrPrompt(options.prdDescription, () => (0, workflow_prompts_1.textInputPrompt)("Describe your product in detail:"));
430
+ }
431
+ else if (prdMethod === "upload") {
432
+ prdFile = await getOrPrompt(options.prdFile, () => (0, workflow_prompts_1.textInputPrompt)("Path to PRD file:"));
433
+ }
434
+ else if (prdMethod === "manual") {
435
+ prdContent = await getOrPrompt(options.prdContent, () => (0, workflow_prompts_1.editorPrompt)("Write your PRD:", "# Product Requirements Document\n\n## Overview\n\n## Features\n\n"));
436
+ }
437
+ // Additional workflow questions
438
+ const refinePrd = !options.skipRefine && prdMethod !== "skip" ?
439
+ await (0, workflow_prompts_1.confirmPrompt)("Refine PRD with AI feedback?", false) : false;
440
+ let refineFeedback;
441
+ if (refinePrd) {
442
+ refineFeedback = await getOrPrompt(options.refineFeedback, () => (0, workflow_prompts_1.textInputPrompt)("What feedback should be used for PRD refinement?", "Add more technical details and clarify requirements"));
443
+ }
444
+ const generateTasks = !options.skipGenerate && prdMethod !== "skip";
445
+ const customInstructions = options.generateInstructions ||
446
+ (generateTasks ? await (0, workflow_prompts_1.textInputPrompt)("Custom task generation instructions (optional):", "") : undefined);
447
+ const splitTasks = !options.skipSplit && generateTasks ?
448
+ await (0, workflow_prompts_1.confirmPrompt)("Split complex tasks into subtasks?", true) : false;
449
+ const splitInstructions = splitTasks && options.splitInstructions ?
450
+ options.splitInstructions :
451
+ (splitTasks ? await (0, workflow_prompts_1.textInputPrompt)("Custom splitting instructions (optional):", "Break into 2-4 hour chunks") : undefined);
452
+ return {
453
+ projectName,
454
+ initMethod: initMethod,
455
+ projectDescription,
456
+ stackConfig,
457
+ prdMethod: prdMethod,
458
+ prdContent,
459
+ prdDescription,
460
+ prdFile,
461
+ refinePrd,
462
+ refineFeedback,
463
+ generateTasks,
464
+ customInstructions,
465
+ splitTasks,
466
+ splitInstructions,
467
+ };
468
+ }
469
+ /**
470
+ * Display workflow benchmark results in a comprehensive format
471
+ */
472
+ async function displayWorkflowBenchmarkResults(run) {
473
+ console.log(chalk_1.default.bold("\nšŸ“Š Workflow Benchmark Results\n"));
474
+ // Summary table
475
+ console.log(chalk_1.default.bold(`${"Model".padEnd(40)} | ${"Duration".padEnd(10)} | ${"Tasks".padEnd(8)} | ${"PRD Size".padEnd(10)} | ${"Steps".padEnd(8)} | ${"Cost".padEnd(10)}`));
476
+ console.log("-".repeat(130));
477
+ run.results.forEach((r) => {
478
+ const duration = `${r.duration}ms`.padEnd(10);
479
+ const taskCount = r.output?.stats?.totalTasks || 0;
480
+ const tasks = `${taskCount}`.padEnd(8);
481
+ const prdSize = r.output?.stats?.prdSize ? `${r.output.stats.prdSize} chars`.padEnd(10) : "-".padEnd(10);
482
+ const steps = r.output?.stats ? `${r.output.stats.successfulSteps}/${r.output.stats.totalSteps}`.padEnd(8) : "-".padEnd(8);
483
+ const cost = r.cost ? `$${r.cost.toFixed(6)}`.padEnd(10) : "-".padEnd(10);
484
+ console.log(`${r.modelId.padEnd(40)} | ${duration} | ${tasks} | ${prdSize} | ${steps} | ${cost}`);
485
+ if (r.error) {
486
+ console.log(chalk_1.default.red(` Error: ${r.error}`));
487
+ }
488
+ });
489
+ // Detailed comparison
490
+ console.log(chalk_1.default.bold("\nšŸ” Detailed Comparison\n"));
491
+ run.results.forEach((r, index) => {
492
+ if (r.error)
493
+ return;
494
+ console.log(chalk_1.default.cyan(`\n[${index + 1}] ${r.modelId}`));
495
+ console.log(`Duration: ${r.duration}ms`);
496
+ if (r.output?.stats) {
497
+ const stats = r.output.stats;
498
+ console.log(`Steps Completed: ${stats.successfulSteps}/${stats.totalSteps}`);
499
+ if (stats.initDuration)
500
+ console.log(` Init: ${stats.initDuration}ms`);
501
+ if (stats.prdGenerationDuration)
502
+ console.log(` PRD Generation: ${stats.prdGenerationDuration}ms`);
503
+ if (stats.taskGenerationDuration)
504
+ console.log(` Task Generation: ${stats.taskGenerationDuration}ms`);
505
+ if (stats.taskSplittingDuration)
506
+ console.log(` Task Splitting: ${stats.taskSplittingDuration}ms`);
507
+ console.log(`Tasks Created: ${stats.totalTasks}`);
508
+ if (stats.tasksWithSubtasks)
509
+ console.log(`Tasks with Subtasks: ${stats.tasksWithSubtasks}`);
510
+ if (stats.prdSize)
511
+ console.log(`PRD Size: ${stats.prdSize} characters`);
512
+ }
513
+ if (r.tokenUsage) {
514
+ console.log(`Tokens: ${r.tokenUsage.total} (Prompt: ${r.tokenUsage.prompt}, Completion: ${r.tokenUsage.completion})`);
515
+ }
516
+ if (r.cost) {
517
+ console.log(`Cost: $${r.cost.toFixed(6)}`);
518
+ }
519
+ });
520
+ }
521
+ /**
522
+ * Allow user to select a model and set up project with its results
523
+ */
524
+ async function promptForModelSelection(run, responses) {
525
+ const successfulResults = run.results.filter((r) => !r.error);
526
+ if (successfulResults.length === 0) {
527
+ console.log(chalk_1.default.yellow("\nāš ļø No successful results to select from."));
528
+ return;
529
+ }
530
+ if (successfulResults.length === 1) {
531
+ console.log(chalk_1.default.green(`\nāœ… Only one successful result from ${successfulResults[0].modelId}`));
532
+ return;
533
+ }
534
+ console.log(chalk_1.default.blue.bold("\nšŸŽÆ Model Selection\n"));
535
+ const shouldSelect = await (0, workflow_prompts_1.confirmPrompt)("Would you like to select a model and set up your project with its results?", false);
536
+ if (!shouldSelect) {
537
+ console.log(chalk_1.default.gray("Benchmark complete. Results have been saved."));
538
+ return;
539
+ }
540
+ const choices = successfulResults.map((r, index) => ({
541
+ name: `${r.modelId} (${r.duration}ms, ${r.output?.stats?.totalTasks || 0} tasks, $${r.cost?.toFixed(6) || 'unknown'})`,
542
+ value: index,
543
+ }));
544
+ const selectedIndex = await (0, workflow_prompts_1.selectPrompt)("Select the model whose results you want to use for your project:", choices);
545
+ const selectedResult = successfulResults[selectedIndex];
546
+ console.log(chalk_1.default.green(`\nāœ… Selected: ${selectedResult.modelId}`));
547
+ console.log(chalk_1.default.gray("Setting up your project with the selected results..."));
548
+ // Get target directory
549
+ const targetDir = await (0, workflow_prompts_1.textInputPrompt)("Enter target directory for your project:", `./${responses.projectName}`);
550
+ try {
551
+ console.log(chalk_1.default.cyan("\nšŸ”§ Applying benchmark results..."));
552
+ const { workflowBenchmarkService } = await Promise.resolve().then(() => __importStar(require("../services/workflow-benchmark")));
553
+ const result = await workflowBenchmarkService.applyBenchmarkResult(selectedResult, targetDir, responses);
554
+ if (result.success) {
555
+ console.log(chalk_1.default.green(`\nāœ… ${result.message}`));
556
+ console.log(chalk_1.default.cyan("\nNext steps:"));
557
+ console.log(chalk_1.default.gray(` • Navigate to: cd ${targetDir}`));
558
+ console.log(chalk_1.default.gray(" • Review your tasks: task-o-matic tasks list"));
559
+ console.log(chalk_1.default.gray(" • View task tree: task-o-matic tasks tree"));
560
+ console.log(chalk_1.default.gray(" • Start working: task-o-matic tasks next"));
561
+ }
562
+ else {
563
+ console.log(chalk_1.default.red(`\nāŒ ${result.message}`));
564
+ }
565
+ }
566
+ catch (error) {
567
+ console.log(chalk_1.default.red(`\nāŒ Failed to apply results: ${error instanceof Error ? error.message : String(error)}`));
568
+ }
569
+ }
@@ -0,0 +1,3 @@
1
+ import { Command } from "commander";
2
+ export declare const executeLoopCommand: Command;
3
+ //# sourceMappingURL=execute-loop.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"execute-loop.d.ts","sourceRoot":"","sources":["../../../src/commands/tasks/execute-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAiDpC,eAAO,MAAM,kBAAkB,SAoI3B,CAAC"}