npm - @ryanfw/prompt-orchestration-pipeline - Versions diffs - 0.0.1 → 0.3.0 - Mend

@ryanfw/prompt-orchestration-pipeline 0.0.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/README.md +415 -24
package/package.json +45 -8
package/src/api/files.js +48 -0
package/src/api/index.js +149 -53
package/src/api/validators/seed.js +141 -0
package/src/cli/index.js +456 -29
package/src/cli/run-orchestrator.js +39 -0
package/src/cli/update-pipeline-json.js +47 -0
package/src/components/DAGGrid.jsx +649 -0
package/src/components/JobCard.jsx +96 -0
package/src/components/JobDetail.jsx +159 -0
package/src/components/JobTable.jsx +202 -0
package/src/components/Layout.jsx +134 -0
package/src/components/TaskFilePane.jsx +570 -0
package/src/components/UploadSeed.jsx +239 -0
package/src/components/ui/badge.jsx +20 -0
package/src/components/ui/button.jsx +43 -0
package/src/components/ui/card.jsx +20 -0
package/src/components/ui/focus-styles.css +60 -0
package/src/components/ui/progress.jsx +26 -0
package/src/components/ui/select.jsx +27 -0
package/src/components/ui/separator.jsx +6 -0
package/src/config/paths.js +99 -0
package/src/core/config.js +270 -9
package/src/core/file-io.js +202 -0
package/src/core/module-loader.js +157 -0
package/src/core/orchestrator.js +275 -294
package/src/core/pipeline-runner.js +95 -41
package/src/core/progress.js +66 -0
package/src/core/status-writer.js +331 -0
package/src/core/task-runner.js +719 -73
package/src/core/validation.js +120 -1
package/src/lib/utils.js +6 -0
package/src/llm/README.md +139 -30
package/src/llm/index.js +222 -72
package/src/pages/PipelineDetail.jsx +111 -0
package/src/pages/PromptPipelineDashboard.jsx +223 -0
package/src/providers/deepseek.js +3 -15
package/src/ui/client/adapters/job-adapter.js +258 -0
package/src/ui/client/bootstrap.js +120 -0
package/src/ui/client/hooks/useJobDetailWithUpdates.js +619 -0
package/src/ui/client/hooks/useJobList.js +50 -0
package/src/ui/client/hooks/useJobListWithUpdates.js +335 -0
package/src/ui/client/hooks/useTicker.js +26 -0
package/src/ui/client/index.css +31 -0
package/src/ui/client/index.html +18 -0
package/src/ui/client/main.jsx +38 -0
package/src/ui/config-bridge.browser.js +149 -0
package/src/ui/config-bridge.js +149 -0
package/src/ui/config-bridge.node.js +310 -0
package/src/ui/dist/assets/index-BDABnI-4.js +33399 -0
package/src/ui/dist/assets/style-Ks8LY8gB.css +28496 -0
package/src/ui/dist/index.html +19 -0
package/src/ui/endpoints/job-endpoints.js +300 -0
package/src/ui/file-reader.js +216 -0
package/src/ui/job-change-detector.js +83 -0
package/src/ui/job-index.js +231 -0
package/src/ui/job-reader.js +274 -0
package/src/ui/job-scanner.js +188 -0
package/src/ui/public/app.js +3 -1
package/src/ui/server.js +1636 -59
package/src/ui/sse-enhancer.js +149 -0
package/src/ui/sse.js +204 -0
package/src/ui/state-snapshot.js +252 -0
package/src/ui/transformers/list-transformer.js +347 -0
package/src/ui/transformers/status-transformer.js +307 -0
package/src/ui/watcher.js +61 -7
package/src/utils/dag.js +101 -0
package/src/utils/duration.js +126 -0
package/src/utils/id-generator.js +30 -0
package/src/utils/jobs.js +7 -0
package/src/utils/pipelines.js +44 -0
package/src/utils/task-files.js +271 -0
package/src/utils/ui.jsx +76 -0
package/src/ui/public/index.html +0 -53
package/src/ui/public/style.css +0 -341

package/src/core/task-runner.js CHANGED Viewed

@@ -1,26 +1,358 @@
 import path from "node:path";
 import { pathToFileURL } from "node:url";
+import fs from "fs";
 import { createLLM, getLLMEvents } from "../llm/index.js";
+import { loadFreshModule } from "./module-loader.js";
 import { loadEnvironment } from "./environment.js";
 import { getConfig } from "./config.js";
+import { createTaskFileIO } from "./file-io.js";
+import { writeJobStatus } from "./status-writer.js";
+import { computeDeterministicProgress } from "./progress.js";
-/** Canonical order using the field terms we discussed */
-const ORDER = [
-  "ingestion",
-  "preProcessing",
-  "promptTemplating",
-  "inference",
-  "parsing",
-  "validateStructure",
-  "validateQuality",
-  "critique",
-  "refine",
-  "finalValidation",
-  "integration",
+/**
+ * Validates that a value is a plain object (not array, null, or class instance).
+ * @param {*} value - The value to check
+ * @returns {boolean} True if the value is a plain object, false otherwise
+ */
+function isPlainObject(value) {
+  if (typeof value !== "object") {
+    return false;
+  }
+  if (value === null) {
+    return false;
+  }
+  if (Array.isArray(value)) {
+    return false;
+  }
+  if (Object.getPrototypeOf(value) === Object.prototype) {
+    return true;
+  }
+  return false;
+}
+/**
+ * Validates stage handler return values conform to { output, flags } contract.
+ * @param {string} stageName - The name of the stage for error reporting
+ * @param {*} result - The result returned by the stage handler
+ * @throws {Error} If the result doesn't conform to the expected contract
+ */
+function assertStageResult(stageName, result) {
+  if (result === null || result === undefined) {
+    throw new Error(`Stage "${stageName}" returned null or undefined`);
+  }
+  if (typeof result !== "object") {
+    throw new Error(
+      `Stage "${stageName}" must return an object, got ${typeof result}`
+    );
+  }
+  if (!result.hasOwnProperty("output")) {
+    throw new Error(
+      `Stage "${stageName}" result missing required property: output`
+    );
+  }
+  if (!result.hasOwnProperty("flags")) {
+    throw new Error(
+      `Stage "${stageName}" result missing required property: flags`
+    );
+  }
+  if (!isPlainObject(result.flags)) {
+    throw new Error(
+      `Stage "${stageName}" flags must be a plain object, got ${typeof result.flags}`
+    );
+  }
+}
+/**
+ * Validates flag values match declared types in schema.
+ * @param {string} stageName - The name of the stage for error reporting
+ * @param {object} flags - The flags object to validate
+ * @param {object} schema - The schema defining expected types for each flag
+ * @throws {Error} If flag types don't match the schema
+ */
+function validateFlagTypes(stageName, flags, schema) {
+  if (schema === undefined || schema === null) {
+    return;
+  }
+  for (const key in schema) {
+    const expectedTypes = schema[key];
+    const actualType = typeof flags[key];
+    // Allow undefined flags (they may be optional)
+    if (flags[key] === undefined) {
+      continue;
+    }
+    if (typeof expectedTypes === "string") {
+      // Single expected type
+      if (actualType !== expectedTypes) {
+        throw new Error(
+          `Stage "${stageName}" flag "${key}" has type ${actualType}, expected ${expectedTypes}`
+        );
+      }
+    } else if (Array.isArray(expectedTypes)) {
+      // Multiple allowed types
+      if (!expectedTypes.includes(actualType)) {
+        throw new Error(
+          `Stage "${stageName}" flag "${key}" has type ${actualType}, expected one of: ${expectedTypes.join(", ")}`
+        );
+      }
+    }
+  }
+}
+/**
+ * Detects type conflicts when merging new flags into existing flags.
+ * @param {object} currentFlags - The existing flags object
+ * @param {object} newFlags - The new flags to merge
+ * @param {string} stageName - The name of the stage for error reporting
+ * @throws {Error} If any flag would change type when merged
+ */
+function checkFlagTypeConflicts(currentFlags, newFlags, stageName) {
+  for (const key of Object.keys(newFlags)) {
+    if (key in currentFlags) {
+      const currentType = typeof currentFlags[key];
+      const newType = typeof newFlags[key];
+      if (currentType !== newType) {
+        throw new Error(
+          `Stage "${stageName}" attempted to change flag "${key}" type from ${currentType} to ${newType}`
+        );
+      }
+    }
+  }
+}
+/**
+ * Ensures log directory exists before creating log files.
+ * @param {string} workDir - The working directory path
+ * @param {string} jobId - The job ID
+ * @returns {string} The full path to the logs directory
+ */
+function ensureLogDirectory(workDir, jobId) {
+  const logsPath = path.join(workDir, "files", "logs");
+  fs.mkdirSync(logsPath, { recursive: true });
+  return logsPath;
+}
+/**
+ * Writes a compact pre-execution snapshot for debugging stage inputs.
+ * Safe: does not throw on write failure; logs warnings instead.
+ * @param {string} stageName - Name of the stage
+ * @param {object} snapshot - Summary data to persist
+ * @param {string} logsDir - Directory to write the snapshot into
+ */
+function writePreExecutionSnapshot(stageName, snapshot, logsDir) {
+  const snapshotPath = path.join(logsDir, `stage-${stageName}-context.json`);
+  try {
+    fs.writeFileSync(snapshotPath, JSON.stringify(snapshot, null, 2));
+  } catch (error) {
+    console.warn(
+      `[task-runner] Failed to write pre-execution snapshot for ${stageName}: ${error.message}`
+    );
+  }
+}
+/**
+ * Redirects console output to a log file for a stage.
+ * @param {string} logPath - The path to the log file
+ * @returns {() => void} A function that restores console output and closes the log stream
+ */
+function captureConsoleOutput(logPath) {
+  // Ensure the directory for the log file exists
+  const logDir = path.dirname(logPath);
+  fs.mkdirSync(logDir, { recursive: true });
+  const logStream = fs.createWriteStream(logPath, { flags: "w" });
+  // Store original console methods
+  const originalLog = console.log;
+  const originalError = console.error;
+  const originalWarn = console.warn;
+  const originalInfo = console.info;
+  const originalDebug = console.debug;
+  // Override console methods to write to stream
+  console.log = (...args) => logStream.write(args.join(" ") + "\n");
+  console.error = (...args) =>
+    logStream.write("[ERROR] " + args.join(" ") + "\n");
+  console.warn = (...args) =>
+    logStream.write("[WARN] " + args.join(" ") + "\n");
+  console.info = (...args) =>
+    logStream.write("[INFO] " + args.join(" ") + "\n");
+  console.debug = (...args) =>
+    logStream.write("[DEBUG] " + args.join(" ") + "\n");
+  // Return restoration function
+  return () => {
+    logStream.end();
+    console.log = originalLog;
+    console.error = originalError;
+    console.warn = originalWarn;
+    console.info = originalInfo;
+    console.debug = originalDebug;
+  };
+}
+function readStatusSnapshot(statusPath) {
+  try {
+    if (!statusPath || !fs.existsSync(statusPath)) {
+      return null;
+    }
+    const raw = fs.readFileSync(statusPath, "utf8");
+    if (!raw) {
+      return null;
+    }
+    return JSON.parse(raw);
+  } catch (error) {
+    console.warn(
+      `[task-runner] Failed to read existing status file at ${statusPath}: ${error.message}`
+    );
+    return null;
+  }
+}
+function mergeStatusSnapshot(existing, updates) {
+  const base =
+    existing && typeof existing === "object" && !Array.isArray(existing)
+      ? { ...existing }
+      : {};
+  if (updates?.data) {
+    base.data = { ...(existing?.data || {}), ...updates.data };
+  }
+  if (updates?.flags) {
+    base.flags = { ...(existing?.flags || {}), ...updates.flags };
+  }
+  if (Object.prototype.hasOwnProperty.call(updates || {}, "logs")) {
+    base.logs = updates.logs;
+  }
+  for (const [key, value] of Object.entries(updates || {})) {
+    if (key === "data" || key === "flags" || key === "logs") continue;
+    base[key] = value;
+  }
+  return base;
+}
+function persistStatusSnapshot(statusPath, updates) {
+  if (!statusPath || !updates) {
+    return;
+  }
+  const existing = readStatusSnapshot(statusPath);
+  const merged = mergeStatusSnapshot(existing, updates);
+  fs.writeFileSync(statusPath, JSON.stringify(merged, null, 2));
+}
+/**
+ * Flag schemas for each pipeline stage.
+ * Defines required flags (prerequisites) and produced flags (outputs) with their types.
+ */
+const FLAG_SCHEMAS = {
+  validateStructure: {
+    requires: {},
+    produces: {
+      validationFailed: "boolean",
+      lastValidationError: ["string", "object", "undefined"],
+    },
+  },
+  critique: {
+    requires: {},
+    produces: {
+      critiqueComplete: "boolean",
+    },
+  },
+  refine: {
+    requires: {
+      validationFailed: "boolean",
+    },
+    produces: {
+      refined: "boolean",
+    },
+  },
+};
+/**
+ * Canonical pipeline stage execution order for the modern pipeline.
+ * Each stage defines its handler, skip predicate, and iteration limits.
+ * Stages with missing handlers are automatically skipped during execution.
+ * This is the single, unified pipeline with no legacy execution paths.
+ */
+const PIPELINE_STAGES = [
+  {
+    name: "ingestion",
+    handler: null, // Will be populated from dynamic module import
+    skipIf: null,
+    maxIterations: null,
+  },
+  {
+    name: "preProcessing",
+    handler: null, // Will be populated from dynamic module import
+    skipIf: null,
+    maxIterations: null,
+  },
+  {
+    name: "promptTemplating",
+    handler: null, // Will be populated from dynamic module import
+    skipIf: null,
+    maxIterations: null,
+  },
+  {
+    name: "inference",
+    handler: null, // Will be populated from dynamic module import
+    skipIf: null,
+    maxIterations: null,
+  },
+  {
+    name: "parsing",
+    handler: null, // Will be populated from dynamic module import
+    skipIf: null,
+    maxIterations: null,
+  },
+  {
+    name: "validateStructure",
+    handler: null, // Will be populated from dynamic module import
+    skipIf: null,
+    maxIterations: null,
+  },
+  {
+    name: "validateQuality",
+    handler: null, // Will be populated from dynamic module import
+    skipIf: null,
+    maxIterations: null,
+  },
+  {
+    name: "critique",
+    handler: null, // Will be populated from dynamic module import
+    skipIf: (flags) => flags.validationFailed === false,
+    maxIterations: null,
+  },
+  {
+    name: "refine",
+    handler: null, // Will be populated from dynamic module import
+    skipIf: (flags) => flags.validationFailed === false,
+    maxIterations: (seed) => seed.maxRefinements || 1,
+  },
+  {
+    name: "finalValidation",
+    handler: null, // Will be populated from dynamic module import
+    skipIf: null,
+    maxIterations: null,
+  },
+  {
+    name: "integration",
+    handler: null, // Will be populated from dynamic module import
+    skipIf: null,
+    maxIterations: null,
+  },
 ];
 /**
- * Runs a pipeline by loading a module that exports functions keyed by ORDER.
+ * Runs a pipeline by loading a module that exports functions keyed by stage name.
  */
 export async function runPipeline(modulePath, initialContext = {}) {
   if (!initialContext.envLoaded) {
@@ -28,20 +360,15 @@ export async function runPipeline(modulePath, initialContext = {}) {
     initialContext.envLoaded = true;
   }
-  if (!initialContext.llm) {
-    initialContext.llm = createLLM({
-      defaultProvider: initialContext.modelConfig?.defaultProvider || "openai",
-    });
-  }
+  if (!initialContext.llm) initialContext.llm = createLLM();
-  const config = getConfig();
   const llmMetrics = [];
   const llmEvents = getLLMEvents();
   const onLLMComplete = (metric) => {
     llmMetrics.push({
       ...metric,
-      task: context.taskName,
+      task: context.meta.taskName,
       stage: context.currentStage,
     });
   };
@@ -52,35 +379,111 @@ export async function runPipeline(modulePath, initialContext = {}) {
   );
   const abs = toAbsFileURL(modulePath);
-  // Add cache busting to force module reload
-  const modUrl = `${abs.href}?t=${Date.now()}`;
-  const mod = await import(modUrl);
+  const mod = await loadFreshModule(abs);
   const tasks = mod.default ?? mod;
-  const context = { ...initialContext, currentStage: null };
+  // Populate PIPELINE_STAGES handlers from dynamically loaded tasks or test override
+  const handlersSource = initialContext.tasksOverride || tasks;
+  PIPELINE_STAGES.forEach((stageConfig) => {
+    if (
+      handlersSource[stageConfig.name] &&
+      typeof handlersSource[stageConfig.name] === "function"
+    ) {
+      stageConfig.handler = handlersSource[stageConfig.name];
+    } else {
+      // Set handler to null when not available - will be skipped
+      stageConfig.handler = null;
+    }
+  });
+  // Create fileIO singleton if we have the required context
+  let fileIO = null;
+  if (
+    initialContext.workDir &&
+    initialContext.taskName &&
+    initialContext.statusPath
+  ) {
+    fileIO = createTaskFileIO({
+      workDir: initialContext.workDir,
+      taskName: initialContext.taskName,
+      getStage: () => context.currentStage,
+      statusPath: initialContext.statusPath,
+    });
+  }
+  // Extract seed and maxRefinements for new context structure
+  const seed = initialContext.seed || initialContext;
+  const maxRefinements = seed.maxRefinements ?? 1; // Default to 1 unless explicitly set
+  // Create new context structure with io, llm, meta, data, flags, logs, currentStage
+  const context = {
+    io: fileIO,
+    llm: initialContext.llm,
+    meta: {
+      taskName: initialContext.taskName,
+      workDir: initialContext.workDir,
+      statusPath: initialContext.statusPath,
+      jobId: initialContext.jobId,
+      envLoaded: initialContext.envLoaded,
+      modelConfig: initialContext.modelConfig,
+      pipelineTasks:
+        initialContext.meta?.pipelineTasks ||
+        initialContext.pipelineTasks ||
+        [],
+    },
+    data: {
+      seed: seed,
+    },
+    flags: {},
+    logs: [],
+    currentStage: null,
+  };
   const logs = [];
   let needsRefinement = false;
   let refinementCount = 0;
-  const maxRefinements = config.taskRunner.maxRefinementAttempts;
+  let lastStageOutput = context.data.seed;
+  let lastStageName = "seed";
+  let lastExecutedStageName = "seed";
+  // Ensure log directory exists before stage execution
+  const logsDir = ensureLogDirectory(context.meta.workDir, context.meta.jobId);
   do {
     needsRefinement = false;
     let preRefinedThisCycle = false;
-    for (const stage of ORDER) {
-      context.currentStage = stage;
-      const fn = tasks[stage];
-      if (typeof fn !== "function") {
-        logs.push({ stage, skipped: true, refinementCycle: refinementCount });
+    for (const stageConfig of PIPELINE_STAGES) {
+      const stageName = stageConfig.name;
+      const stageHandler = stageConfig.handler;
+      // Skip stages when skipIf predicate returns true
+      if (stageConfig.skipIf && stageConfig.skipIf(context.flags)) {
+        context.logs.push({
+          stage: stageName,
+          action: "skipped",
+          reason: "skipIf predicate returned true",
+          timestamp: new Date().toISOString(),
+        });
         continue;
       }
+      // Skip if handler is not available (not implemented)
+      if (typeof stageHandler !== "function") {
+        logs.push({
+          stage: stageName,
+          skipped: true,
+          refinementCycle: refinementCount,
+        });
+        continue;
+      }
+      // Skip ingestion and preProcessing during refinement cycles
       if (
         refinementCount > 0 &&
-        ["ingestion", "preProcessing"].includes(stage)
+        ["ingestion", "preProcessing"].includes(stageName)
       ) {
         logs.push({
-          stage,
+          stage: stageName,
           skipped: true,
           reason: "refinement-cycle",
           refinementCycle: refinementCount,
@@ -88,15 +491,17 @@ export async function runPipeline(modulePath, initialContext = {}) {
         continue;
       }
+      // Handle pre-refinement logic for validation stages
       if (
         refinementCount > 0 &&
         !preRefinedThisCycle &&
-        !context.refined &&
-        (stage === "validateStructure" || stage === "validateQuality")
+        !context.flags.refined &&
+        (stageName === "validateStructure" || stageName === "validateQuality")
       ) {
         for (const s of ["critique", "refine"]) {
-          const f = tasks[s];
-          if (typeof f !== "function") {
+          const sConfig = PIPELINE_STAGES.find((config) => config.name === s);
+          const sHandler = sConfig?.handler;
+          if (typeof sHandler !== "function") {
             logs.push({
               stage: s,
               skipped: true,
@@ -107,8 +512,7 @@ export async function runPipeline(modulePath, initialContext = {}) {
           }
           const sStart = performance.now();
           try {
-            const r = await f(context);
-            if (r && typeof r === "object") Object.assign(context, r);
+            const r = await sHandler(context);
             const sMs = +(performance.now() - sStart).toFixed(2);
             logs.push({
               stage: s,
@@ -140,9 +544,13 @@ export async function runPipeline(modulePath, initialContext = {}) {
         preRefinedThisCycle = true;
       }
-      if (preRefinedThisCycle && (stage === "critique" || stage === "refine")) {
+      // Skip critique and refine if already pre-refined
+      if (
+        preRefinedThisCycle &&
+        (stageName === "critique" || stageName === "refine")
+      ) {
         logs.push({
-          stage,
+          stage: stageName,
           skipped: true,
           reason: "already-pre-refined",
           refinementCycle: refinementCount,
@@ -150,52 +558,286 @@ export async function runPipeline(modulePath, initialContext = {}) {
         continue;
       }
+      // Add console output capture before stage execution
+      const logPath = path.join(
+        context.meta.workDir,
+        "files",
+        "logs",
+        `stage-${stageName}.log`
+      );
+      console.debug("[task-runner] stage log path resolution", {
+        stage: stageName,
+        workDir: context.meta.workDir,
+        jobId: context.meta.jobId,
+        logPath,
+      });
+      const restoreConsole = captureConsoleOutput(logPath);
+      // Set current stage before execution
+      context.currentStage = stageName;
+      // Write stage start status using writeJobStatus
+      if (context.meta.workDir && context.meta.taskName) {
+        try {
+          await writeJobStatus(context.meta.workDir, (snapshot) => {
+            snapshot.current = context.meta.taskName;
+            snapshot.currentStage = stageName;
+            snapshot.lastUpdated = new Date().toISOString();
+            // Ensure task exists and update task-specific fields
+            if (!snapshot.tasks[context.meta.taskName]) {
+              snapshot.tasks[context.meta.taskName] = {};
+            }
+            snapshot.tasks[context.meta.taskName].currentStage = stageName;
+            snapshot.tasks[context.meta.taskName].state = "running";
+          });
+        } catch (error) {
+          // Don't fail the pipeline if status write fails
+          console.warn(`Failed to write stage start status: ${error.message}`);
+        }
+      }
+      // Clone data and flags before stage execution
+      const stageData = JSON.parse(JSON.stringify(context.data));
+      const stageFlags = JSON.parse(JSON.stringify(context.flags));
+      const stageContext = {
+        io: context.io,
+        llm: context.llm,
+        meta: context.meta,
+        data: stageData,
+        flags: stageFlags,
+        currentStage: stageName,
+        output: JSON.parse(
+          JSON.stringify(
+            lastStageOutput !== undefined
+              ? lastStageOutput
+              : (context.data.seed ?? null)
+          )
+        ),
+        previousStage: lastExecutedStageName,
+      };
+      // Write pre-execution snapshot for debugging inputs
+      const snapshot = {
+        meta: { taskName: context.meta.taskName, jobId: context.meta.jobId },
+        previousStage: lastExecutedStageName,
+        refinementCycle: refinementCount,
+        dataSummary: {
+          keys: Object.keys(context.data),
+          hasSeed: !!context.data?.seed,
+          seedKeys: Object.keys(context.data?.seed || {}),
+          seedHasData: context.data?.seed?.data !== undefined,
+        },
+        flagsSummary: {
+          keys: Object.keys(context.flags),
+        },
+        outputSummary: {
+          type: typeof stageContext.output,
+          keys:
+            stageContext.output && typeof stageContext.output === "object"
+              ? Object.keys(stageContext.output).slice(0, 20)
+              : [],
+        },
+      };
+      writePreExecutionSnapshot(stageName, snapshot, logsDir);
+      // Validate prerequisite flags before stage execution
+      const requiredFlags = FLAG_SCHEMAS[stageName]?.requires;
+      if (requiredFlags && Object.keys(requiredFlags).length > 0) {
+        validateFlagTypes(stageName, context.flags, requiredFlags);
+      }
+      // Execute the stage
       const start = performance.now();
+      let stageResult;
       try {
-        const result = await fn(context);
-        if (result && typeof result === "object")
-          Object.assign(context, result);
+        context.logs.push({
+          stage: stageName,
+          action: "debugging",
+          data: stageContext,
+        });
+        console.log("STAGE CONTEXT", JSON.stringify(stageContext, null, 2));
+        stageResult = await stageHandler(stageContext);
+        // Validate stage result shape after execution
+        assertStageResult(stageName, stageResult);
+        // Validate produced flags against schema
+        const producedFlagsSchema = FLAG_SCHEMAS[stageName]?.produces;
+        if (producedFlagsSchema) {
+          validateFlagTypes(stageName, stageResult.flags, producedFlagsSchema);
+        }
+        // Check for flag type conflicts before merging
+        checkFlagTypeConflicts(context.flags, stageResult.flags, stageName);
+        // Store stage output in context.data
+        context.data[stageName] = stageResult.output;
+        lastStageName = stageName;
+        // Only update lastStageOutput and lastExecutedStageName for non-validation stages
+        // This ensures previousStage and context.output skip validation stages
+        const validationStages = [
+          "validateStructure",
+          "validateQuality",
+          "validateFinal",
+          "finalValidation",
+        ];
+        if (!validationStages.includes(stageName)) {
+          lastStageOutput = stageResult.output;
+          lastExecutedStageName = stageName;
+        }
+        // Merge stage flags into context.flags
+        context.flags = { ...context.flags, ...stageResult.flags };
+        // Add audit log entry after stage completes
+        context.logs.push({
+          stage: stageName,
+          action: "completed",
+          outputType: typeof stageResult.output,
+          flagKeys: Object.keys(stageResult.flags),
+          timestamp: new Date().toISOString(),
+        });
+        // Write stage completion status
+        if (context.meta.workDir && context.meta.taskName) {
+          try {
+            await writeJobStatus(context.meta.workDir, (snapshot) => {
+              // Keep current task and stage as-is since we're still within the same task
+              snapshot.current = context.meta.taskName;
+              snapshot.currentStage = stageName;
+              snapshot.lastUpdated = new Date().toISOString();
+              // Compute deterministic progress after stage completion
+              const pct = computeDeterministicProgress(
+                context.meta.pipelineTasks || [],
+                context.meta.taskName,
+                stageName
+              );
+              snapshot.progress = pct;
+              // Debug log for progress computation
+              console.debug("[task-runner] stage completion progress", {
+                task: context.meta.taskName,
+                stage: stageName,
+                progress: pct,
+              });
+              // Ensure task exists and update task-specific fields
+              if (!snapshot.tasks[context.meta.taskName]) {
+                snapshot.tasks[context.meta.taskName] = {};
+              }
+              snapshot.tasks[context.meta.taskName].currentStage = stageName;
+              snapshot.tasks[context.meta.taskName].state = "running";
+            });
+          } catch (error) {
+            // Don't fail the pipeline if status write fails
+            console.warn(
+              `Failed to write stage completion status: ${error.message}`
+            );
+          }
+        }
         const ms = +(performance.now() - start).toFixed(2);
-        logs.push({ stage, ok: true, ms, refinementCycle: refinementCount });
+        logs.push({
+          stage: stageName,
+          ok: true,
+          ms,
+          refinementCycle: refinementCount,
+        });
         if (
-          (stage === "validateStructure" || stage === "validateQuality") &&
-          context.validationFailed &&
+          (stageName === "validateStructure" ||
+            stageName === "validateQuality") &&
+          context.flags.validationFailed &&
           refinementCount < maxRefinements
         ) {
           needsRefinement = true;
-          context.validationFailed = false;
+          // Don't reset validationFailed here - let the refinement cycle handle it
           break;
         }
       } catch (error) {
+        console.error(`Stage ${stageName} failed:`, error);
         const ms = +(performance.now() - start).toFixed(2);
         const errInfo = normalizeError(error);
+        // Attach debug metadata to the error envelope for richer diagnostics
+        errInfo.debug = {
+          stage: stageName,
+          previousStage: lastExecutedStageName,
+          refinementCycle: refinementCount,
+          logPath: path.join(
+            context.meta.workDir,
+            "files",
+            "logs",
+            `stage-${stageName}.log`
+          ),
+          snapshotPath: path.join(logsDir, `stage-${stageName}-context.json`),
+          dataHasSeed: !!context.data?.seed,
+          seedHasData: context.data?.seed?.data !== undefined,
+          flagsKeys: Object.keys(context.flags || {}),
+        };
         logs.push({
-          stage,
+          stage: stageName,
           ok: false,
           ms,
           error: errInfo,
           refinementCycle: refinementCount,
         });
+        // For validation stages, trigger refinement if we haven't exceeded max refinements AND maxRefinements > 0
         if (
-          (stage === "validateStructure" || stage === "validateQuality") &&
+          (stageName === "validateStructure" ||
+            stageName === "validateQuality") &&
+          maxRefinements > 0 &&
           refinementCount < maxRefinements
         ) {
-          context.lastValidationError = errInfo;
+          context.flags.lastValidationError = errInfo;
+          context.flags.validationFailed = true; // Set the flag to trigger refinement
           needsRefinement = true;
           break;
         }
+        // Write failure status using writeJobStatus
+        if (context.meta.workDir && context.meta.taskName) {
+          try {
+            await writeJobStatus(context.meta.workDir, (snapshot) => {
+              snapshot.current = context.meta.taskName;
+              snapshot.currentStage = stageName;
+              snapshot.state = "failed";
+              snapshot.lastUpdated = new Date().toISOString();
+              // Ensure task exists and update task-specific fields
+              if (!snapshot.tasks[context.meta.taskName]) {
+                snapshot.tasks[context.meta.taskName] = {};
+              }
+              snapshot.tasks[context.meta.taskName].state = "failed";
+              snapshot.tasks[context.meta.taskName].failedStage = stageName;
+              snapshot.tasks[context.meta.taskName].currentStage = stageName;
+            });
+          } catch (error) {
+            // Don't fail the pipeline if status write fails
+            console.warn(`Failed to write failure status: ${error.message}`);
+          }
+        }
+        // For non-validation stages or when refinements are exhausted, fail immediately
         return {
           ok: false,
-          failedStage: stage,
+          failedStage: stageName,
           error: errInfo,
           logs,
           context,
           refinementAttempts: refinementCount,
         };
+      } finally {
+        // Add console output restoration after stage execution
+        if (restoreConsole) {
+          restoreConsole();
+        }
       }
     }
@@ -204,7 +846,7 @@ export async function runPipeline(modulePath, initialContext = {}) {
       logs.push({
         stage: "refinement-trigger",
         refinementCycle: refinementCount,
-        reason: context.lastValidationError
+        reason: context.flags.lastValidationError
           ? "validation-error"
           : "validation-failed-flag",
       });
@@ -216,7 +858,7 @@ export async function runPipeline(modulePath, initialContext = {}) {
     typeof tasks.validateStructure === "function" ||
     typeof tasks.validateQuality === "function";
-  if (context.validationFailed && hasValidation) {
+  if (context.flags.validationFailed && hasValidation) {
     return {
       ok: false,
       failedStage: "final-validation",
@@ -229,6 +871,29 @@ export async function runPipeline(modulePath, initialContext = {}) {
   llmEvents.off("llm:request:complete", onLLMComplete);
+  // Write final status with currentStage: null to indicate completion
+  if (context.meta.workDir && context.meta.taskName) {
+    try {
+      await writeJobStatus(context.meta.workDir, (snapshot) => {
+        snapshot.current = null;
+        snapshot.currentStage = null;
+        snapshot.state = "done";
+        snapshot.progress = 100;
+        snapshot.lastUpdated = new Date().toISOString();
+        // Update task state to done
+        if (!snapshot.tasks[context.meta.taskName]) {
+          snapshot.tasks[context.meta.taskName] = {};
+        }
+        snapshot.tasks[context.meta.taskName].state = "done";
+        snapshot.tasks[context.meta.taskName].currentStage = null;
+      });
+    } catch (error) {
+      // Don't fail the pipeline if final status write fails
+      console.warn(`Failed to write final status: ${error.message}`);
+    }
+  }
   return {
     ok: true,
     logs,
@@ -252,25 +917,6 @@ export async function runPipelineWithModelRouting(
   return runPipeline(modulePath, context);
 }
-export function selectModel(taskType, complexity, speed = "normal") {
-  const modelMap = {
-    "simple-fast": "gpt-3.5-turbo",
-    "simple-accurate": "gpt-4",
-    "complex-fast": "gpt-4",
-    "complex-accurate": "gpt-4-turbo",
-    specialized: "claude-3-opus",
-  };
-  const key =
-    complexity === "high"
-      ? speed === "fast"
-        ? "complex-fast"
-        : "complex-accurate"
-      : speed === "fast"
-        ? "simple-fast"
-        : "simple-accurate";
-  return modelMap[key] || "gpt-4";
-}
 function toAbsFileURL(p) {
   if (!path.isAbsolute(p)) {
     throw new Error(