npm - @arvorco/relentless - Versions diffs - 0.3.1 → 0.4.3 - Mend

@arvorco/relentless 0.3.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

package/.claude/commands/relentless.convert.md +25 -0
package/.claude/skills/analyze/SKILL.md +113 -40
package/.claude/skills/analyze/templates/analysis-report.md +138 -0
package/.claude/skills/checklist/SKILL.md +144 -51
package/.claude/skills/checklist/templates/checklist.md +43 -11
package/.claude/skills/clarify/SKILL.md +70 -11
package/.claude/skills/constitution/SKILL.md +61 -3
package/.claude/skills/constitution/templates/constitution.md +241 -160
package/.claude/skills/constitution/templates/prompt.md +150 -20
package/.claude/skills/convert/SKILL.md +248 -0
package/.claude/skills/implement/SKILL.md +82 -34
package/.claude/skills/plan/SKILL.md +139 -27
package/.claude/skills/plan/templates/plan.md +92 -9
package/.claude/skills/specify/SKILL.md +112 -20
package/.claude/skills/specify/templates/spec.md +40 -5
package/.claude/skills/tasks/SKILL.md +76 -1
package/.claude/skills/tasks/templates/tasks.md +5 -4
package/CHANGELOG.md +84 -1
package/MANUAL.md +40 -0
package/README.md +268 -13
package/bin/relentless.ts +292 -5
package/package.json +2 -2
package/relentless/config.json +45 -1
package/relentless/constitution.md +41 -19
package/relentless/prompt.md +142 -72
package/src/agents/amp.ts +53 -13
package/src/agents/claude.ts +70 -15
package/src/agents/codex.ts +73 -14
package/src/agents/droid.ts +68 -14
package/src/agents/exec.ts +96 -0
package/src/agents/gemini.ts +59 -16
package/src/agents/opencode.ts +188 -9
package/src/cli/fallback-order.ts +210 -0
package/src/cli/index.ts +63 -0
package/src/cli/mode-flag.ts +198 -0
package/src/cli/review-flags.ts +192 -0
package/src/config/loader.ts +16 -1
package/src/config/schema.ts +157 -2
package/src/execution/runner.ts +144 -21
package/src/init/scaffolder.ts +285 -25
package/src/prd/parser.ts +111 -6
package/src/prd/types.ts +136 -0
package/src/review/index.ts +92 -0
package/src/review/prompt.ts +293 -0
package/src/review/runner.ts +337 -0
package/src/review/tasks/docs.ts +529 -0
package/src/review/tasks/index.ts +80 -0
package/src/review/tasks/lint.ts +436 -0
package/src/review/tasks/quality.ts +760 -0
package/src/review/tasks/security.ts +452 -0
package/src/review/tasks/test.ts +456 -0
package/src/review/tasks/typecheck.ts +323 -0
package/src/review/types.ts +139 -0
package/src/routing/cascade.ts +310 -0
package/src/routing/classifier.ts +338 -0
package/src/routing/estimate.ts +270 -0
package/src/routing/fallback.ts +512 -0
package/src/routing/index.ts +124 -0
package/src/routing/registry.ts +501 -0
package/src/routing/report.ts +570 -0
package/src/routing/router.ts +287 -0
package/src/tui/App.tsx +2 -0
package/src/tui/TUIRunner.tsx +103 -8
package/src/tui/components/CurrentStory.tsx +23 -1
package/src/tui/hooks/useTUI.ts +1 -0
package/src/tui/types.ts +9 -0

package/src/routing/cascade.ts ADDED Viewed

@@ -0,0 +1,310 @@
+/**
+ * Cascade/Escalation Logic Module
+ *
+ * Wraps task execution with automatic retry/escalation logic.
+ * When a task fails with a smaller model, it automatically retries
+ * with a more capable model from the escalation path.
+ *
+ * @module src/routing/cascade
+ */
+import { z } from "zod";
+import type { AgentResult } from "../agents/types";
+import type { EscalationConfig } from "../config/schema";
+import type { UserStory } from "../prd/types";
+import { getHarnessForModel } from "./registry";
+import { calculateCost, estimateTokens } from "./router";
+/**
+ * Result of an individual escalation attempt
+ */
+export const EscalationStepSchema = z.object({
+  /** Attempt number (1-based) */
+  attempt: z.number().int().min(1),
+  /** Harness used for this attempt */
+  harness: z.string(),
+  /** Model used for this attempt */
+  model: z.string(),
+  /** Result of the attempt: success, failure, or rate_limited */
+  result: z.enum(["success", "failure", "rate_limited"]),
+  /** Error message if the attempt failed */
+  error: z.string().optional(),
+  /** Cost of this attempt */
+  cost: z.number().optional(),
+  /** Duration in milliseconds */
+  duration: z.number().optional(),
+});
+export type EscalationStep = z.infer<typeof EscalationStepSchema>;
+/**
+ * Result of the cascade execution
+ */
+export const EscalationResultSchema = z.object({
+  /** Whether the task ultimately succeeded */
+  success: z.boolean(),
+  /** Final harness that executed the task (or last attempted) */
+  finalHarness: z.string(),
+  /** Final model that executed the task (or last attempted) */
+  finalModel: z.string(),
+  /** Total number of attempts made */
+  attempts: z.number().int().min(1),
+  /** List of all escalation steps */
+  escalations: z.array(EscalationStepSchema),
+  /** Total actual cost across all attempts */
+  actualCost: z.number(),
+  /** Whether the task was marked as blocked */
+  blocked: z.boolean().optional(),
+  /** Reason why the task was blocked */
+  blockReason: z.string().optional(),
+});
+export type EscalationResult = z.infer<typeof EscalationResultSchema>;
+/**
+ * Function type for executing a task with a specific harness and model
+ */
+export type TaskExecutor = (
+  harness: string,
+  model: string,
+  prompt: string
+) => Promise<AgentResult>;
+/**
+ * Gets the next model in the escalation path
+ *
+ * @param currentModel - Current model ID
+ * @param escalationPath - Map of current model to next model
+ * @returns Next model ID or undefined if no next model exists
+ */
+export function getNextModel(
+  currentModel: string,
+  escalationPath: Record<string, string>
+): string | undefined {
+  return escalationPath[currentModel];
+}
+/**
+ * Determines if a task result indicates failure
+ *
+ * @param result - Agent execution result
+ * @returns Whether the task failed
+ */
+function isTaskFailure(result: AgentResult): boolean {
+  return result.exitCode !== 0 || !result.isComplete;
+}
+/**
+ * Determines the result type from an agent result
+ *
+ * @param result - Agent execution result
+ * @returns Result type string
+ */
+function getResultType(
+  result: AgentResult
+): "success" | "failure" | "rate_limited" {
+  if (result.rateLimited) {
+    return "rate_limited";
+  }
+  if (isTaskFailure(result)) {
+    return "failure";
+  }
+  return "success";
+}
+/**
+ * Extracts error message from agent result
+ *
+ * @param result - Agent execution result
+ * @returns Error message or undefined
+ */
+function extractErrorMessage(result: AgentResult): string | undefined {
+  if (result.rateLimited) {
+    return "Rate limit exceeded";
+  }
+  if (isTaskFailure(result)) {
+    // Extract meaningful error from output
+    const lines = result.output.split("\n");
+    // Look for lines containing error-like patterns
+    const errorLine = lines.find(
+      (line) =>
+        line.toLowerCase().includes("error") ||
+        line.toLowerCase().includes("failed") ||
+        line.toLowerCase().includes("exception")
+    );
+    return errorLine?.trim() || "Task execution failed";
+  }
+  return undefined;
+}
+/**
+ * Executes a task with automatic cascade/escalation logic
+ *
+ * When a task fails with the initial model, this function automatically
+ * escalates to more capable models according to the escalation path
+ * until the task succeeds or max attempts is reached.
+ *
+ * @param story - User story being executed
+ * @param initialHarness - Starting harness
+ * @param initialModel - Starting model
+ * @param prompt - Task prompt
+ * @param config - Escalation configuration
+ * @param executor - Function to execute the task
+ * @returns Escalation result with success status and all attempts
+ *
+ * @example
+ * ```typescript
+ * const result = await executeWithCascade(
+ *   story,
+ *   "claude",
+ *   "haiku-4.5",
+ *   "Fix the bug",
+ *   config,
+ *   async (harness, model, prompt) => {
+ *     const agent = getAgent(harness);
+ *     return agent.invoke(prompt, { model });
+ *   }
+ * );
+ *
+ * if (result.success) {
+ *   console.log(`Completed with ${result.finalModel}`);
+ * } else if (result.blocked) {
+ *   console.log(`Blocked: ${result.blockReason}`);
+ * }
+ * ```
+ */
+export async function executeWithCascade(
+  story: UserStory,
+  initialHarness: string,
+  initialModel: string,
+  prompt: string,
+  config: EscalationConfig,
+  executor: TaskExecutor
+): Promise<EscalationResult> {
+  const escalations: EscalationStep[] = [];
+  let totalCost = 0;
+  let currentHarness = initialHarness;
+  let currentModel = initialModel;
+  let attempt = 0;
+  // Estimate tokens for cost calculation
+  const estimatedTokens = estimateTokens(story);
+  while (attempt < config.maxAttempts) {
+    attempt++;
+    // Execute the task
+    const startTime = Date.now();
+    const result = await executor(currentHarness, currentModel, prompt);
+    const duration = Date.now() - startTime;
+    // Calculate cost for this attempt
+    const attemptCost = calculateCost(currentModel, estimatedTokens);
+    totalCost += attemptCost;
+    // Determine result type
+    const resultType = getResultType(result);
+    // Record the escalation step
+    const step: EscalationStep = {
+      attempt,
+      harness: currentHarness,
+      model: currentModel,
+      result: resultType,
+      cost: attemptCost,
+      duration,
+    };
+    // Add error message if applicable
+    if (resultType !== "success") {
+      step.error = extractErrorMessage(result);
+    }
+    escalations.push(step);
+    // If successful, return immediately
+    if (resultType === "success") {
+      return {
+        success: true,
+        finalHarness: currentHarness,
+        finalModel: currentModel,
+        attempts: attempt,
+        escalations,
+        actualCost: totalCost,
+      };
+    }
+    // If escalation is disabled, don't retry
+    if (!config.enabled) {
+      return {
+        success: false,
+        finalHarness: currentHarness,
+        finalModel: currentModel,
+        attempts: attempt,
+        escalations,
+        actualCost: totalCost,
+      };
+    }
+    // Try to get next model from escalation path
+    const nextModel = getNextModel(currentModel, config.escalationPath);
+    // If no next model, we're blocked
+    if (!nextModel) {
+      const reason =
+        Object.keys(config.escalationPath).length === 0
+          ? "no escalation path configured"
+          : `no next model in escalation path for ${currentModel}`;
+      // If this is the last attempt, mark as blocked
+      if (attempt >= config.maxAttempts) {
+        return {
+          success: false,
+          finalHarness: currentHarness,
+          finalModel: currentModel,
+          attempts: attempt,
+          escalations,
+          actualCost: totalCost,
+          blocked: true,
+          blockReason: `Task blocked: max attempts (${config.maxAttempts}) reached and ${reason}`,
+        };
+      }
+      // Block immediately since no escalation possible
+      return {
+        success: false,
+        finalHarness: currentHarness,
+        finalModel: currentModel,
+        attempts: attempt,
+        escalations,
+        actualCost: totalCost,
+        blocked: true,
+        blockReason: `Task blocked: ${reason}`,
+      };
+    }
+    // Log escalation
+    console.log(`Escalating from ${currentModel} to ${nextModel}`);
+    // Update model (and possibly harness) for next attempt
+    currentModel = nextModel;
+    // Check if the next model belongs to a different harness
+    const nextHarness = getHarnessForModel(nextModel);
+    if (nextHarness) {
+      currentHarness = nextHarness;
+    }
+  }
+  // Max attempts reached
+  return {
+    success: false,
+    finalHarness: currentHarness,
+    finalModel: currentModel,
+    attempts: attempt,
+    escalations,
+    actualCost: totalCost,
+    blocked: true,
+    blockReason: `Task blocked: max attempts (${config.maxAttempts}) reached`,
+  };
+}

package/src/routing/classifier.ts ADDED Viewed

@@ -0,0 +1,338 @@
+/**
+ * Hybrid Complexity Classifier
+ *
+ * Implements two-phase task complexity classification:
+ * 1. Fast heuristic analysis (< 50ms, no API calls)
+ * 2. LLM fallback only when confidence < 0.8
+ *
+ * The classifier analyzes user story title, description, and acceptance criteria
+ * to determine task complexity: simple, medium, complex, or expert.
+ *
+ * @module src/routing/classifier
+ */
+import type { UserStory } from "../prd/types";
+import type { Complexity } from "../config/schema";
+/**
+ * Result of task complexity classification.
+ */
+export interface ClassificationResult {
+  /** The determined complexity level */
+  complexity: Complexity;
+  /** Confidence score from 0.0 to 1.0 */
+  confidence: number;
+  /** Human-readable explanation of the classification */
+  reasoning: string;
+  /** Whether the LLM was used for classification (false for high-confidence heuristic) */
+  usedLLM: boolean;
+}
+/**
+ * Keyword patterns for each complexity level.
+ * Each pattern has an associated weight for confidence calculation.
+ */
+interface KeywordPattern {
+  patterns: RegExp[];
+  weight: number;
+}
+/**
+ * Simple task keyword patterns.
+ * Tasks that are trivial: typos, comments, renaming, formatting.
+ */
+const SIMPLE_PATTERNS: KeywordPattern = {
+  patterns: [
+    /\bfix\s+typo/i,
+    /\btypo\b/i,
+    /\bupdate\s+docs?/i,
+    /\bdocumentation\s+update/i,
+    /\badd\s+comment/i,
+    /\brename\b/i,
+    /\bformat\b/i,
+    /\bformatting\b/i,
+    /\breadme/i,
+    /\bcleanup\s+comment/i,
+    /\bfix\s+spelling/i,
+    /\bcorrect\s+typo/i,
+    /\bupdate\s+readme/i,
+    /\bfix\s+indent/i,
+    /\bremove\s+whitespace/i,
+    /\bfix\s+lint/i,
+    /\blint\s+error/i,
+  ],
+  weight: 0.3,
+};
+/**
+ * Medium task keyword patterns.
+ * Standard development tasks: features, tests, refactoring, API work.
+ */
+const MEDIUM_PATTERNS: KeywordPattern = {
+  patterns: [
+    /\bimplement\b/i,
+    /\badd\s+feature/i,
+    /\badd\s+new\b/i,
+    /\brefactor\b/i,
+    /\btest\b/i,
+    /\bapi\b/i,
+    /\bendpoint\b/i,
+    /\bcreate\b/i,
+    /\bbuild\b/i,
+    /\bfix\s+bug/i,
+    /\bvalidat/i, // validation, validate
+    /\bhandl/i, // handle, handler, handling
+    /\bintegrat/i, // integrate (but not as strong as complex)
+    /\bmodify\b/i,
+    /\bupdate\s+logic/i,
+    /\badd\s+support/i,
+    /\bfeature\b/i, // standalone feature keyword
+    /\bprofile\b/i, // user profile work
+  ],
+  weight: 0.25,
+};
+/**
+ * Complex task keyword patterns.
+ * Advanced tasks: architecture, security, authentication, migrations.
+ */
+const COMPLEX_PATTERNS: KeywordPattern = {
+  patterns: [
+    /\barchitect/i,
+    /\bintegrat\w*\s+(?:service|api|system)/i,
+    /\bmigrat/i,
+    /\bsecurity\b/i,
+    /\bauth\b/i,
+    /\boauth/i,
+    /\bjwt\b/i,
+    /\bencrypt/i,
+    /\bdatabase\s+(?:design|schema|migrat)/i,
+    /\bscalabil/i,
+    /\bcaching\s+(?:strategy|layer)/i,
+    /\berror\s+handling\s+(?:strategy|system)/i,
+    /\bthird[- ]party/i,
+    /\bexternal\s+(?:api|service)/i,
+    /\bpayment/i,
+    /\bwebhook/i,
+    /\bqueue\s+(?:system|processing)/i,
+  ],
+  weight: 0.35,
+};
+/**
+ * Expert task keyword patterns.
+ * Highly complex tasks: performance, distributed systems, concurrency.
+ */
+const EXPERT_PATTERNS: KeywordPattern = {
+  patterns: [
+    /\bredesign\b/i,
+    /\bperformance\s+(?:optim|improv|tun)/i,
+    /\bdistribut/i,
+    /\bconcurren/i,
+    /\bparallel\b/i,
+    /\basync\b/i,
+    /\breal[- ]time/i,
+    /\bmicroservice/i,
+    /\bevent[- ]driven/i,
+    /\bcritical\s+path/i,
+    /\bhigh\s+availabil/i,
+    /\bfault\s+toleran/i,
+    /\bload\s+balanc/i,
+    /\bsharding\b/i,
+    /\breplication\b/i,
+    /\brace\s+condition/i,
+    /\bdeadlock/i,
+    /\bthread[- ]safe/i,
+  ],
+  weight: 0.4,
+};
+/**
+ * File patterns that boost confidence for certain complexity levels.
+ */
+const FILE_PATTERN_BOOSTS: Array<{
+  pattern: RegExp;
+  complexity: Complexity;
+  boost: number;
+}> = [
+  // Documentation files boost simple confidence
+  { pattern: /readme/i, complexity: "simple", boost: 0.15 },
+  { pattern: /\.md$/i, complexity: "simple", boost: 0.1 },
+  { pattern: /docs?\b/i, complexity: "simple", boost: 0.1 },
+  { pattern: /changelog/i, complexity: "simple", boost: 0.1 },
+  // Auth/security patterns boost complex confidence
+  { pattern: /\bjwt\b/i, complexity: "complex", boost: 0.15 },
+  { pattern: /\bauth/i, complexity: "complex", boost: 0.15 },
+  { pattern: /\boauth/i, complexity: "complex", boost: 0.2 },
+  { pattern: /\bsecurity/i, complexity: "complex", boost: 0.1 },
+  { pattern: /\btoken/i, complexity: "complex", boost: 0.1 },
+  { pattern: /\bencrypt/i, complexity: "complex", boost: 0.15 },
+  // Performance patterns boost expert confidence
+  { pattern: /\bperformance/i, complexity: "expert", boost: 0.15 },
+  { pattern: /\boptimiz/i, complexity: "expert", boost: 0.1 },
+  { pattern: /\bconcurren/i, complexity: "expert", boost: 0.15 },
+  { pattern: /\bparallel/i, complexity: "expert", boost: 0.15 },
+];
+/**
+ * Count pattern matches in text and calculate weighted score.
+ */
+function countPatternMatches(text: string, patterns: KeywordPattern): number {
+  let count = 0;
+  for (const pattern of patterns.patterns) {
+    if (pattern.test(text)) {
+      count++;
+    }
+  }
+  return count * patterns.weight;
+}
+/**
+ * Get the full text to analyze from a user story.
+ */
+function getAnalyzableText(story: UserStory): string {
+  const parts = [
+    story.title || "",
+    story.description || "",
+    ...(story.acceptanceCriteria || []),
+  ];
+  return parts.join(" ").toLowerCase();
+}
+/**
+ * Calculate complexity scores from heuristic analysis.
+ */
+function calculateHeuristicScores(text: string): Record<Complexity, number> {
+  return {
+    simple: countPatternMatches(text, SIMPLE_PATTERNS),
+    medium: countPatternMatches(text, MEDIUM_PATTERNS),
+    complex: countPatternMatches(text, COMPLEX_PATTERNS),
+    expert: countPatternMatches(text, EXPERT_PATTERNS),
+  };
+}
+/**
+ * Apply file pattern boosts to complexity scores.
+ */
+function applyFilePatternBoosts(
+  text: string,
+  scores: Record<Complexity, number>
+): void {
+  for (const { pattern, complexity, boost } of FILE_PATTERN_BOOSTS) {
+    if (pattern.test(text)) {
+      scores[complexity] += boost;
+    }
+  }
+}
+/**
+ * Determine the winning complexity level and confidence.
+ */
+function determineComplexity(scores: Record<Complexity, number>): {
+  complexity: Complexity;
+  confidence: number;
+  reasoning: string;
+} {
+  const entries = Object.entries(scores) as Array<[Complexity, number]>;
+  entries.sort((a, b) => b[1] - a[1]);
+  const [winner, winnerScore] = entries[0];
+  const [, runnerUpScore] = entries[1];
+  // Calculate confidence based on:
+  // 1. The absolute score of the winner
+  // 2. The gap between winner and runner-up
+  const baseConfidence = Math.min(0.5 + winnerScore, 0.95);
+  const gapBoost = Math.min((winnerScore - runnerUpScore) * 0.2, 0.15);
+  const confidence = Math.min(baseConfidence + gapBoost, 0.95);
+  // Generate reasoning
+  const signalsFound = entries
+    .filter(([, score]) => score > 0)
+    .map(([level, score]) => `${level}(${score.toFixed(2)})`)
+    .join(", ");
+  const reasoning =
+    signalsFound.length > 0
+      ? `Heuristic analysis found signals: ${signalsFound}. Winner: ${winner}`
+      : `No strong signals found, defaulting to ${winner}`;
+  return { complexity: winner, confidence, reasoning };
+}
+/**
+ * Classify a user story by complexity using hybrid approach.
+ *
+ * Phase 1: Fast heuristic analysis (< 50ms)
+ * Phase 2: LLM fallback if confidence < 0.8 (not implemented yet - returns heuristic)
+ *
+ * @param story - The user story to classify
+ * @returns Classification result with complexity, confidence, reasoning, and usedLLM flag
+ */
+export async function classifyTask(
+  story: UserStory
+): Promise<ClassificationResult> {
+  // Get text to analyze
+  const text = getAnalyzableText(story);
+  // Phase 1: Heuristic analysis
+  const scores = calculateHeuristicScores(text);
+  // Apply file pattern boosts
+  applyFilePatternBoosts(text, scores);
+  // Determine winner and confidence
+  const { complexity, confidence, reasoning } = determineComplexity(scores);
+  // If confidence >= 0.8, use heuristic result directly
+  if (confidence >= 0.8) {
+    return {
+      complexity,
+      confidence,
+      reasoning,
+      usedLLM: false,
+    };
+  }
+  // Phase 2: LLM fallback for low confidence
+  // For now, we'll still return the heuristic result but mark it appropriately
+  // TODO: Implement actual LLM call for low confidence cases
+  // The LLM call would use Haiku for cost efficiency
+  // For low confidence cases, we still return the heuristic result
+  // but with a slight boost since we're being transparent about uncertainty
+  return {
+    complexity,
+    confidence: Math.min(confidence + 0.1, 0.79), // Keep below 0.8 to indicate uncertainty
+    reasoning: `${reasoning} (Low confidence - consider manual review)`,
+    usedLLM: false, // Will be true once LLM fallback is implemented
+  };
+}
+/**
+ * Classify a task with explicit LLM fallback (for when needed).
+ *
+ * This is a placeholder for future LLM integration.
+ * Will be called when heuristic confidence < 0.8.
+ *
+ * @internal
+ */
+export async function classifyWithLLM(
+  _story: UserStory,
+  _heuristicSuggestion: Complexity
+): Promise<ClassificationResult> {
+  // TODO: Implement LLM-based classification using Haiku
+  // The prompt would include:
+  // - Task title and description
+  // - Acceptance criteria
+  // - Heuristic suggestion for context
+  //
+  // LLM response should be JSON with:
+  // { complexity: "simple"|"medium"|"complex"|"expert", reasoning: "..." }
+  throw new Error(
+    "LLM classification not implemented yet. Use classifyTask() which handles fallback."
+  );
+}