npm - task-o-matic - Versions diffs - 0.0.7 → 0.0.9 - Mend

task-o-matic 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/README.md +286 -23
package/dist/commands/benchmark.d.ts +3 -0
package/dist/commands/benchmark.d.ts.map +1 -0
package/dist/commands/benchmark.js +569 -0
package/dist/commands/prd.d.ts.map +1 -1
package/dist/commands/prd.js +203 -9
package/dist/commands/tasks/execute-loop.d.ts +3 -0
package/dist/commands/tasks/execute-loop.d.ts.map +1 -0
package/dist/commands/tasks/execute-loop.js +118 -0
package/dist/commands/tasks/index.d.ts +1 -0
package/dist/commands/tasks/index.d.ts.map +1 -1
package/dist/commands/tasks/index.js +1 -0
package/dist/commands/tasks.d.ts.map +1 -1
package/dist/commands/tasks.js +1 -0
package/dist/commands/workflow.d.ts.map +1 -1
package/dist/commands/workflow.js +491 -331
package/dist/index.d.ts.map +1 -1
package/dist/index.js +2 -0
package/dist/lib/ai-service/ai-operations.d.ts +5 -0
package/dist/lib/ai-service/ai-operations.d.ts.map +1 -1
package/dist/lib/ai-service/ai-operations.js +167 -0
package/dist/lib/benchmark/registry.d.ts +11 -0
package/dist/lib/benchmark/registry.d.ts.map +1 -0
package/dist/lib/benchmark/registry.js +89 -0
package/dist/lib/benchmark/runner.d.ts +6 -0
package/dist/lib/benchmark/runner.d.ts.map +1 -0
package/dist/lib/benchmark/runner.js +150 -0
package/dist/lib/benchmark/storage.d.ts +13 -0
package/dist/lib/benchmark/storage.d.ts.map +1 -0
package/dist/lib/benchmark/storage.js +99 -0
package/dist/lib/benchmark/types.d.ts +104 -0
package/dist/lib/benchmark/types.d.ts.map +1 -0
package/dist/lib/benchmark/types.js +2 -0
package/dist/lib/index.d.ts +9 -0
package/dist/lib/index.d.ts.map +1 -1
package/dist/lib/index.js +7 -1
package/dist/lib/prompt-registry.d.ts.map +1 -1
package/dist/lib/prompt-registry.js +23 -0
package/dist/lib/task-loop-execution.d.ts +25 -0
package/dist/lib/task-loop-execution.d.ts.map +1 -0
package/dist/lib/task-loop-execution.js +473 -0
package/dist/prompts/index.d.ts +7 -6
package/dist/prompts/index.d.ts.map +1 -1
package/dist/prompts/index.js +1 -0
package/dist/prompts/prd-question.d.ts +3 -0
package/dist/prompts/prd-question.d.ts.map +1 -0
package/dist/prompts/prd-question.js +40 -0
package/dist/services/benchmark.d.ts +12 -0
package/dist/services/benchmark.d.ts.map +1 -0
package/dist/services/benchmark.js +18 -0
package/dist/services/prd.d.ts +25 -0
package/dist/services/prd.d.ts.map +1 -1
package/dist/services/prd.js +224 -29
package/dist/services/tasks.d.ts.map +1 -1
package/dist/services/tasks.js +90 -3
package/dist/services/workflow-benchmark.d.ts +34 -0
package/dist/services/workflow-benchmark.d.ts.map +1 -0
package/dist/services/workflow-benchmark.js +317 -0
package/dist/services/workflow.d.ts +85 -0
package/dist/services/workflow.d.ts.map +1 -0
package/dist/services/workflow.js +476 -0
package/dist/test/task-loop-git.test.d.ts +2 -0
package/dist/test/task-loop-git.test.d.ts.map +1 -0
package/dist/test/task-loop-git.test.js +62 -0
package/dist/types/index.d.ts +53 -0
package/dist/types/index.d.ts.map +1 -1
package/dist/types/options.d.ts +2 -1
package/dist/types/options.d.ts.map +1 -1
package/dist/types/options.js +16 -0
package/dist/types/results.d.ts +29 -1
package/dist/types/results.d.ts.map +1 -1
package/dist/types/workflow-options.d.ts +45 -0
package/dist/types/workflow-options.d.ts.map +1 -0
package/dist/types/workflow-options.js +2 -0
package/dist/types/workflow-results.d.ts +82 -0
package/dist/types/workflow-results.d.ts.map +1 -0
package/dist/types/workflow-results.js +2 -0
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -6,6 +6,8 @@ AI-powered task management for CLI, TUI, and web applications. Parse PRDs, enhan
 - 🤖 **AI-Powered**: Parse PRDs and enhance tasks using multiple AI providers
 - 🎭 **Interactive Workflow**: Guided setup from project init to task generation with AI assistance
+- ❓ **PRD Question/Refine**: AI generates clarifying questions and can answer them automatically
+- 🧠 **AI Reasoning Support**: Enable advanced reasoning for better PRD refinement
 - 📦 **Multi-Purpose Package**: Use as CLI tool, library, or MCP server
 - 📁 **Project-Local Storage**: All data stored locally in `.task-o-matic/` directory
 - 🎯 **Task Management**: Full CRUD operations with AI enhancement
@@ -14,6 +16,8 @@ AI-powered task management for CLI, TUI, and web applications. Parse PRDs, enhan
 - 🔧 **Multi-Provider AI**: Support for OpenAI, Anthropic, OpenRouter, and custom providers
 - 📊 **Smart Breakdown**: AI-powered task decomposition into subtasks
 - 🌊 **Real-time Streaming**: Watch AI responses generate live with streaming output
+- 📊 **Model Benchmarking**: Compare performance and quality across different AI models
+- 🏁 **Workflow Benchmarking**: Test complete workflows across multiple models and compare results
 - 🏠 **Single-Project Focus**: Self-contained within each project directory
 - 💻 **Framework-Agnostic**: Easily integrate into TUI, web apps, or any Node.js project
@@ -55,14 +59,14 @@ task-o-matic/
 ├── dist/              # Compiled output (published)
 │   ├── lib/           # Library entry point + core exports
 │   ├── cli/           # CLI binary
-│   ├── services/      # Business logic layer
+│   ├── services/      # Business logic layer (WorkflowService, PRDService, TaskService)
 │   ├── commands/      # CLI commands
 │   ├── mcp/           # MCP server
 │   └── types/         # TypeScript definitions
 ├── src/
 │   ├── lib/           # Core library (Storage, Config, AI, etc.)
 │   │   └── index.ts   # Main library exports
-│   ├── services/      # TaskService, PRDService (framework-agnostic)
+│   ├── services/      # WorkflowService, PRDService, TaskService (framework-agnostic)
 │   ├── cli/           # CLI-specific logic
 │   │   └── bin.ts     # CLI binary entry point
 │   ├── commands/      # Commander.js command implementations
@@ -75,8 +79,8 @@ task-o-matic/
 ### Core Components
-- **Service Layer** (`TaskService`, `PRDService`): Framework-agnostic business logic
-- **AI Service**: Uses Vercel AI SDK for multi-provider support
+- **Service Layer** (`WorkflowService`, `PRDService`, `TaskService`): Framework-agnostic business logic
+- **AI Service**: Uses Vercel AI SDK for multi-provider support with reasoning capabilities
 - **Local Storage**: JSON-based file storage in `.task-o-matic/` directory
 - **Configuration**: Project-local config with AI provider settings
 - **Prompt Templates**: Structured AI prompts for consistent results
@@ -104,24 +108,45 @@ npm install task-o-matic
 ```typescript
 import {
+  WorkflowService,
   TaskService,
   PRDService,
   type Task,
   type AIConfig,
 } from "task-o-matic";
-// Initialize the service
+// Use the workflow service for complete project setup
+const workflowService = new WorkflowService();
+const result = await workflowService.initializeProject({
+  projectName: "my-app",
+  initMethod: "quick",
+  bootstrap: true,
+  aiOptions: {
+    aiProvider: "anthropic",
+    aiModel: "claude-3-5-sonnet",
+    aiKey: process.env.ANTHROPIC_API_KEY,
+  },
+  callbacks: {
+    onProgress: (event) => {
+      console.log(`Progress: ${event.message}`);
+    },
+  },
+});
+console.log("Project initialized:", result.projectName);
+// Or use task service directly
 const taskService = new TaskService();
-// Create a task with AI enhancement
-const result = await taskService.createTask({
+const taskResult = await taskService.createTask({
   title: "Implement user authentication",
   content: "Add login and signup functionality",
   aiEnhance: true,
   aiOptions: {
-    provider: "anthropic",
-    model: "claude-3-5-sonnet",
-    apiKey: process.env.ANTHROPIC_API_KEY,
+    aiProvider: "anthropic",
+    aiModel: "claude-3-5-sonnet",
+    aiKey: process.env.ANTHROPIC_API_KEY,
   },
   callbacks: {
     onProgress: (event) => {
@@ -130,7 +155,7 @@ const result = await taskService.createTask({
   },
 });
-console.log("Task created:", result.task);
+console.log("Task created:", taskResult.task);
 ```
 #### TUI Integration Example
@@ -167,20 +192,28 @@ const result = await taskService.createTask({
 });
 ```
-#### PRD Parsing Example
+#### PRD Question/Refine Example
 ```typescript
 import { PRDService } from "task-o-matic";
 const prdService = new PRDService();
-const result = await prdService.parsePRD({
+// Generate questions and refine PRD with AI answering
+const result = await prdService.refinePRDWithQuestions({
   file: "./requirements.md",
+  questionMode: "ai", // or "user" for interactive
+  questionAIOptions: {
+    // Optional: use a different AI for answering
+    aiProvider: "openrouter",
+    aiModel: "anthropic/claude-3-opus",
+    aiReasoning: "enabled", // Enable reasoning for better answers
+  },
   workingDirectory: process.cwd(),
   aiOptions: {
-    provider: "openrouter",
-    model: "anthropic/claude-3.5-sonnet",
-    apiKey: process.env.OPENROUTER_API_KEY,
+    aiProvider: "anthropic",
+    aiModel: "claude-3-5-sonnet",
+    aiKey: process.env.ANTHROPIC_API_KEY,
   },
   callbacks: {
     onProgress: (event) => {
@@ -189,9 +222,10 @@ const result = await prdService.parsePRD({
   },
 });
-console.log(`Created ${result.tasks.length} tasks from PRD`);
-result.tasks.forEach((task) => {
-  console.log(`- ${task.title}`);
+console.log(`Refined PRD with ${result.questions.length} questions`);
+result.questions.forEach((q, i) => {
+  console.log(`Q${i + 1}: ${q}`);
+  console.log(`A${i + 1}: ${result.answers[q]}`);
 });
 ```
@@ -227,6 +261,18 @@ import type {
   CreateTaskOptions,
   PRDParseResult,
   TaskAIMetadata,
+  // Workflow types
+  WorkflowService,
+  InitializeResult,
+  DefinePRDResult,
+  RefinePRDResult,
+  GenerateTasksResult,
+  SplitTasksResult,
+  // Benchmark types
+  WorkflowBenchmarkInput,
+  WorkflowBenchmarkResult,
+  BenchmarkConfig,
+  BenchmarkResult,
 } from "task-o-matic";
 ```
@@ -310,15 +356,23 @@ task-o-matic workflow
 # With streaming AI output
 task-o-matic workflow --stream
+# Want to test multiple AI models? Try workflow benchmarking:
+task-o-matic benchmark workflow --models "openai:gpt-4o,anthropic:claude-3-5-sonnet"
 ```
 **The workflow will guide you through:**
 1. **Project Initialization** - Choose quick start, custom, or AI-assisted configuration
 2. **PRD Definition** - Upload file, write manually, or use AI to generate from description
-3. **PRD Refinement** - Optional AI-assisted improvements
-4. **Task Generation** - Parse PRD into actionable tasks
-5. **Task Splitting** - Break down complex tasks into subtasks
+3. **PRD Question/Refine** (NEW) - AI generates clarifying questions and refines PRD
+   - User can answer questions interactively
+   - OR AI can answer with PRD + stack context
+   - Optional: Use different AI model for answering (e.g., smarter model)
+   - Optional: Enable reasoning for better answers
+4. **PRD Manual Refinement** - Optional additional AI-assisted improvements
+5. **Task Generation** - Parse PRD into actionable tasks
+6. **Task Splitting** - Break down complex tasks into subtasks
 **AI Assistance at Every Step:**
@@ -344,6 +398,7 @@ task-o-matic workflow --stream
 - [AI Integration](docs/ai-integration.md) - AI providers and prompt engineering
 - [Project Initialization](docs/projects.md) - Project setup and bootstrapping
 - [Streaming Output](docs/streaming.md) - Real-time AI streaming capabilities
+- [Model Benchmarking](docs/benchmarking.md) - Compare AI models and workflow performance
 ## 🎯 Common Workflows
@@ -395,7 +450,74 @@ task-o-matic tasks create --title "Add payment system" --ai-enhance --stream
 task-o-matic tasks split --task-id <task-id>
 ```
-### Workflow 3: Project Bootstrapping
+### Workflow 3: Benchmarking Models
+Compare different AI models for performance, cost, and quality.
+```bash
+# 1. Run a benchmark for PRD parsing
+task-o-matic benchmark run prd-parse \
+  --file requirements.md \
+  --models "openai:gpt-4o,openrouter:anthropic/claude-3.5-sonnet" \
+  --concurrency 5
+# 2. Compare results
+task-o-matic benchmark compare <run-id>
+# 3. View detailed metrics (Tokens, BPS, Size)
+task-o-matic benchmark show <run-id>
+```
+### Workflow 3b: Complete Workflow Benchmarking
+Test entire workflows across multiple AI models and automatically set up your project with the best results.
+```bash
+# 1. Basic workflow benchmark with interactive setup
+task-o-matic benchmark workflow \
+  --models "openai:gpt-4o,anthropic:claude-3-5-sonnet,openrouter:qwen/qwen-2.5-72b-instruct" \
+  --concurrency 2 \
+  --delay 1000
+# 2. Automated workflow benchmark
+task-o-matic benchmark workflow \
+  --models "openai:gpt-4o,anthropic:claude-3-5-sonnet" \
+  --project-name "my-saas-app" \
+  --project-description "Team collaboration platform with real-time chat" \
+  --init-method ai \
+  --prd-method ai \
+  --auto-accept \
+  --skip-all
+# 3. Benchmark with specific workflow options
+task-o-matic benchmark workflow \
+  --models "openai:gpt-4o,anthropic:claude-3-5-sonnet" \
+  --project-name "e-commerce-app" \
+  --init-method custom \
+  --frontend next \
+  --backend hono \
+  --database postgres \
+  --prd-method ai \
+  --prd-description "Modern e-commerce platform with AI recommendations" \
+  --refine-feedback "Focus on scalability and security" \
+  --split-all
+# Results include:
+# - Comprehensive comparison table (duration, tasks, PRD size, costs)
+# - Detailed per-model breakdowns with timing and token metrics
+# - Interactive selection to choose the best model
+# - Automatic project setup with selected model's results
+```
+**Workflow Benchmark Features:**
+- **Two-Phase Execution**: Interactive question collection, then parallel execution
+- **Complete Workflow**: Project init → PRD creation → task generation → task splitting
+- **Comprehensive Metrics**: Performance, cost, quality, and output comparison
+- **Model Selection**: Choose the best performer and auto-setup your project
+- **Identical Conditions**: All models receive the same inputs for fair comparison
+### Workflow 4: Project Bootstrapping
 ```bash
 # Option 1: One-step setup (recommended)
@@ -414,6 +536,130 @@ task-o-matic init bootstrap my-app
 task-o-matic tasks create --title "Set up development environment" --ai-enhance --stream
 ```
+## 📊 Benchmarking Commands
+### Basic Model Benchmarking
+Compare different AI models on specific operations:
+```bash
+# Benchmark PRD parsing across multiple models
+task-o-matic benchmark run prd-parse \
+  --file requirements.md \
+  --models "openai:gpt-4o,anthropic:claude-3-5-sonnet,openrouter:qwen/qwen-2.5-72b-instruct" \
+  --concurrency 3 \
+  --delay 1000
+# Benchmark task splitting
+task-o-matic benchmark run task-breakdown \
+  --task-id <task-id> \
+  --models "openai:gpt-4o,anthropic:claude-3-5-sonnet" \
+  --concurrency 2
+# View benchmark results
+task-o-matic benchmark list
+task-o-matic benchmark show <run-id>
+task-o-matic benchmark compare <run-id>
+```
+### Complete Workflow Benchmarking
+Test entire project workflows across multiple AI models:
+```bash
+# Interactive workflow benchmark (recommended)
+task-o-matic benchmark workflow \
+  --models "openai:gpt-4o,anthropic:claude-3-5-sonnet,openrouter:qwen/qwen-2.5-72b-instruct"
+```
+**What happens:**
+1. **Phase 1**: You answer workflow questions once (project setup, PRD creation, etc.)
+2. **Phase 2**: All models execute the identical workflow in parallel
+3. **Results**: Comprehensive comparison table with metrics and model selection
+**Full automation example:**
+```bash
+task-o-matic benchmark workflow \
+  --models "openai:gpt-4o,anthropic:claude-3-5-sonnet" \
+  --project-name "my-saas-platform" \
+  --project-description "Team collaboration platform with real-time messaging" \
+  --init-method ai \
+  --prd-method ai \
+  --auto-accept \
+  --refine-feedback "Add more technical details and security considerations" \
+  --generate-instructions "Focus on MVP features and break into small tasks" \
+  --split-all \
+  --concurrency 2 \
+  --delay 2000
+```
+**Output includes:**
+```
+📊 Workflow Benchmark Results
+Model                                    | Duration   | Tasks | PRD Size   | Steps | Cost
+---------------------------------------- | ---------- | ----- | ---------- | ----- | ----------
+openai:gpt-4o                           | 45234ms    | 12    | 2843 chars | 5/5   | $0.023400
+anthropic:claude-3-5-sonnet             | 42156ms    | 15    | 3021 chars | 5/5   | $0.019800
+🔍 Detailed Comparison
+[1] openai:gpt-4o
+Duration: 45234ms
+Steps Completed: 5/5
+  Init: 2341ms
+  PRD Generation: 12456ms
+  Task Generation: 8234ms
+  Task Splitting: 3421ms
+Tasks Created: 12
+PRD Size: 2843 characters
+Tokens: 4521 (Prompt: 2341, Completion: 2180)
+Cost: $0.023400
+🎯 Model Selection
+Would you like to select a model and set up your project with its results? (y/N)
+```
+### Benchmark Options
+All benchmark commands support:
+- `--models <list>`: Comma-separated model list (required)
+- `--concurrency <number>`: Max parallel requests (default: 3)
+- `--delay <ms>`: Delay between requests (default: 1000ms)
+**Model format:** `provider:model[:reasoning=<tokens>]`
+**Examples:**
+- `openai:gpt-4o`
+- `anthropic:claude-3-5-sonnet`
+- `openrouter:anthropic/claude-3.5-sonnet`
+- `openrouter:openai/o1-preview:reasoning=50000`
+### Workflow Benchmark Inheritance
+The `benchmark workflow` command supports ALL workflow command options:
+```bash
+# All these workflow options work in benchmarks:
+--project-name, --init-method, --project-description
+--frontend, --backend, --database, --auth/--no-auth
+--prd-method, --prd-file, --prd-description, --prd-content
+--refine-feedback, --generate-instructions
+--split-tasks, --split-all, --split-instructions
+--skip-init, --skip-prd, --skip-refine, --skip-generate, --skip-split
+--stream, --auto-accept, --config-file
+```
+This allows you to:
+- **Pre-configure workflow steps** via command-line options
+- **Skip interactive questions** for automated benchmarking
+- **Compare identical workflows** across different models
+- **Test specific scenarios** (e.g., only AI vs only custom stack)
+```
 ## 🔧 Environment Variables
 ```bash
@@ -445,6 +691,23 @@ AI_TEMPERATURE=0.7
 - **PRD Parsing**: `claude-3.5-sonnet` or `gpt-4`
 - **Task Enhancement**: `claude-3-haiku` or `gpt-3.5-turbo`
 - **Task Breakdown**: `claude-3.5-sonnet` for complex tasks
+- **Workflow Benchmarking**: Test 2-3 models to find optimal performance for your use case
+### Choosing the Right Model
+Not sure which model to use? Try workflow benchmarking:
+```bash
+# Test your specific workflow across multiple models
+task-o-matic benchmark workflow \
+  --models "openai:gpt-4o,anthropic:claude-3-5-sonnet,openrouter:qwen/qwen-2.5-72b-instruct" \
+  --project-description "Your project description here"
+# The benchmark will show you:
+# - Performance (speed, tokens, cost)
+# - Quality (tasks created, PRD completeness)
+# - Best model for your specific needs
+```
 ## 📁 Storage Structure

package/dist/commands/benchmark.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import { Command } from "commander";
+export declare const benchmarkCommand: Command;
+//# sourceMappingURL=benchmark.d.ts.map

package/dist/commands/benchmark.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/commands/benchmark.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAapC,eAAO,MAAM,gBAAgB,SAE5B,CAAC"}