npm - @polka-codes/core - Versions diffs - 0.9.79 → 0.9.80 - Mend

@polka-codes/core 0.9.79 → 0.9.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +3 -3
package/dist/_tsup-dts-rollup.d.ts +30 -21
package/dist/index.d.ts +3 -3
package/dist/index.js +539 -82
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -29,7 +29,7 @@ import {
   agentWorkflow,
   createContext,
   makeStepFn,
-  type ToolResponse,
+  type AgentToolResponse,
   ToolResponseType,
 } from '@polka-codes/core';
 import { z } from 'zod';
@@ -52,10 +52,10 @@ async function main() {
         const { location } = input as z.infer<typeof getCurrentWeather.parameters>;
         // In a real app, you would call a weather API here
         const weather = `The weather in ${location} is 70°F and sunny.`;
-        const response: ToolResponse = { type: ToolResponseType.Reply, message: weather };
+        const response: AgentToolResponse = { type: ToolResponseType.Reply, message: weather };
         return response;
       }
-      const response: ToolResponse = { type: ToolResponseType.Error, message: 'Tool not found' };
+      const response: AgentToolResponse = { type: ToolResponseType.Error, message: 'Tool not found' };
       return response;
     },
     // A simple text generation function

package/dist/_tsup-dts-rollup.d.ts CHANGED Viewed

@@ -21,6 +21,14 @@ import { ZodObject } from 'zod';
 import { ZodOptional } from 'zod';
 import { ZodString } from 'zod';
+declare type AgentToolInfo = {
+    name: string;
+    description: string;
+    parameters: z.ZodObject<any>;
+};
+export { AgentToolInfo }
+export { AgentToolInfo as AgentToolInfo_alias_1 }
 declare type AgentToolRegistry = {
     generateText: {
         input: {
@@ -39,20 +47,24 @@ declare type AgentToolRegistry = {
             toolName: string;
             input: any;
         };
-        output: ToolResponse;
+        output: AgentToolResponse;
     };
 };
 export { AgentToolRegistry }
 export { AgentToolRegistry as AgentToolRegistry_alias_1 }
 export { AgentToolRegistry as AgentToolRegistry_alias_2 }
+declare type AgentToolResponse = ToolResponseReply | ToolResponseExit | ToolResponseError;
+export { AgentToolResponse }
+export { AgentToolResponse as AgentToolResponse_alias_1 }
 declare const agentWorkflow: WorkflowFn<AgentWorkflowInput, ExitReason, AgentToolRegistry>;
 export { agentWorkflow }
 export { agentWorkflow as agentWorkflow_alias_1 }
 export { agentWorkflow as agentWorkflow_alias_2 }
 declare type AgentWorkflowInput = {
-    tools: Readonly<FullToolInfo[]>;
+    tools: Readonly<FullAgentToolInfo[]>;
     maxToolRoundTrips?: number;
     userMessage: readonly JsonUserModelMessage[];
     outputSchema?: z.ZodSchema;
@@ -569,7 +581,7 @@ declare type DynamicStepRuntimeContext<TTools extends ToolRegistry> = {
     logger: Logger;
     step: StepFn;
     runWorkflow: (workflowId: string, input?: Record<string, any>) => Promise<any>;
-    toolInfo: Readonly<FullToolInfo[]> | undefined;
+    toolInfo: Readonly<FullAgentToolInfo[]> | undefined;
 };
 export { DynamicStepRuntimeContext }
 export { DynamicStepRuntimeContext as DynamicStepRuntimeContext_alias_1 }
@@ -595,7 +607,7 @@ declare type DynamicWorkflowRunnerOptions = {
      * Tool definitions used when a step does not have persisted `code`
      * and needs to be executed via `agentWorkflow`.
      */
-    toolInfo?: Readonly<FullToolInfo[]>;
+    toolInfo?: Readonly<FullAgentToolInfo[]>;
     /**
      * Model id forwarded to `agentWorkflow` for agent-executed steps.
      */
@@ -618,6 +630,11 @@ declare type DynamicWorkflowRunnerOptions = {
         input: any;
         state: any;
     }) => string;
+    /**
+     * Whether to wrap plain text agent responses in an object { result: ... }.
+     * Defaults to false.
+     */
+    wrapAgentResultInObject?: boolean;
 };
 export { DynamicWorkflowRunnerOptions }
 export { DynamicWorkflowRunnerOptions as DynamicWorkflowRunnerOptions_alias_1 }
@@ -663,11 +680,11 @@ export { fromJsonModelMessage }
 export { fromJsonModelMessage as fromJsonModelMessage_alias_1 }
 export { fromJsonModelMessage as fromJsonModelMessage_alias_2 }
-declare type FullToolInfo = ToolInfo & {
-    handler: ToolHandler<ToolInfo, any>;
+declare type FullAgentToolInfo = AgentToolInfo & {
+    handler: ToolHandler<AgentToolInfo, any>;
 };
-export { FullToolInfo }
-export { FullToolInfo as FullToolInfo_alias_1 }
+export { FullAgentToolInfo }
+export { FullAgentToolInfo as FullAgentToolInfo_alias_1 }
 declare type GenerateWorkflowCodeInput = z.infer<typeof GenerateWorkflowCodeInputSchema>;
 export { GenerateWorkflowCodeInput }
@@ -690,6 +707,7 @@ declare const GenerateWorkflowCodeInputSchema: z.ZodObject<{
                 expected_outcome: z.ZodOptional<z.ZodNullable<z.ZodString>>;
                 code: z.ZodOptional<z.ZodNullable<z.ZodString>>;
                 outputSchema: z.ZodOptional<z.ZodNullable<z.ZodAny>>;
+                timeout: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
             }, z.core.$strip>>;
             output: z.ZodOptional<z.ZodNullable<z.ZodString>>;
         }, z.core.$strip>>;
@@ -1266,18 +1284,10 @@ export { toJsonModelMessage }
 export { toJsonModelMessage as toJsonModelMessage_alias_1 }
 export { toJsonModelMessage as toJsonModelMessage_alias_2 }
-declare type ToolHandler<_T, P> = (provider: P, args: Partial<Record<string, ToolParameterValue>>) => Promise<ToolResponse>;
+declare type ToolHandler<_T, P> = (provider: P, args: Partial<Record<string, ToolParameterValue>>) => Promise<AgentToolResponse>;
 export { ToolHandler }
 export { ToolHandler as ToolHandler_alias_1 }
-declare type ToolInfo = {
-    name: string;
-    description: string;
-    parameters: z.ZodObject<any>;
-};
-export { ToolInfo }
-export { ToolInfo as ToolInfo_alias_1 }
 export declare const toolInfo: {
     readonly name: "askFollowupQuestion";
     readonly description: "Call this when vital details are missing. Pose each follow-up as one direct, unambiguous question. If it speeds the reply, add up to five short, mutually-exclusive answer options. Group any related questions in the same call to avoid a back-and-forth chain.";
@@ -1483,10 +1493,6 @@ export { ToolRegistry }
 export { ToolRegistry as ToolRegistry_alias_1 }
 export { ToolRegistry as ToolRegistry_alias_2 }
-declare type ToolResponse = ToolResponseReply | ToolResponseExit | ToolResponseError;
-export { ToolResponse }
-export { ToolResponse as ToolResponse_alias_1 }
 declare type ToolResponseError = {
     type: ToolResponseType.Error;
     message: ToolResponseResult;
@@ -1690,6 +1696,7 @@ declare const WorkflowDefinitionSchema: z.ZodObject<{
         expected_outcome: z.ZodOptional<z.ZodNullable<z.ZodString>>;
         code: z.ZodOptional<z.ZodNullable<z.ZodString>>;
         outputSchema: z.ZodOptional<z.ZodNullable<z.ZodAny>>;
+        timeout: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
     }, z.core.$strip>>;
     output: z.ZodOptional<z.ZodNullable<z.ZodString>>;
 }, z.core.$strip>;
@@ -1716,6 +1723,7 @@ declare const WorkflowFileSchema: z.ZodObject<{
             expected_outcome: z.ZodOptional<z.ZodNullable<z.ZodString>>;
             code: z.ZodOptional<z.ZodNullable<z.ZodString>>;
             outputSchema: z.ZodOptional<z.ZodNullable<z.ZodAny>>;
+            timeout: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
         }, z.core.$strip>>;
         output: z.ZodOptional<z.ZodNullable<z.ZodString>>;
     }, z.core.$strip>>;
@@ -1752,6 +1760,7 @@ declare const WorkflowStepDefinitionSchema: z.ZodObject<{
     expected_outcome: z.ZodOptional<z.ZodNullable<z.ZodString>>;
     code: z.ZodOptional<z.ZodNullable<z.ZodString>>;
     outputSchema: z.ZodOptional<z.ZodNullable<z.ZodAny>>;
+    timeout: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
 }, z.core.$strip>;
 export { WorkflowStepDefinitionSchema }
 export { WorkflowStepDefinitionSchema as WorkflowStepDefinitionSchema_alias_1 }

package/dist/index.d.ts CHANGED Viewed

@@ -10,15 +10,15 @@ export { configSchema_alias_1 as configSchema } from './_tsup-dts-rollup.js';
 export { Config_alias_1 as Config } from './_tsup-dts-rollup.js';
 export { ToolParameterValue } from './_tsup-dts-rollup.js';
 export { ToolParameter } from './_tsup-dts-rollup.js';
-export { ToolInfo } from './_tsup-dts-rollup.js';
-export { FullToolInfo } from './_tsup-dts-rollup.js';
+export { AgentToolInfo } from './_tsup-dts-rollup.js';
+export { FullAgentToolInfo } from './_tsup-dts-rollup.js';
 export { ToolResponseType } from './_tsup-dts-rollup.js';
 export { ToolResponseResultMedia } from './_tsup-dts-rollup.js';
 export { ToolResponseResult } from './_tsup-dts-rollup.js';
 export { ToolResponseReply } from './_tsup-dts-rollup.js';
 export { ToolResponseExit } from './_tsup-dts-rollup.js';
 export { ToolResponseError } from './_tsup-dts-rollup.js';
-export { ToolResponse } from './_tsup-dts-rollup.js';
+export { AgentToolResponse } from './_tsup-dts-rollup.js';
 export { ToolHandler } from './_tsup-dts-rollup.js';
 export { askFollowupQuestion } from './_tsup-dts-rollup.js';
 export { executeCommand } from './_tsup-dts-rollup.js';

package/dist/index.js CHANGED Viewed

@@ -1930,7 +1930,11 @@ var WorkflowStepDefinitionSchema = z21.object({
    * Optional JSON schema or other metadata for future structured outputs.
    * Not interpreted by core today.
    */
-  outputSchema: z21.any().nullish()
+  outputSchema: z21.any().nullish(),
+  /**
+   * Optional timeout in milliseconds. Step execution will be aborted if it exceeds this duration.
+   */
+  timeout: z21.number().positive().nullish()
 });
 var WorkflowDefinitionSchema = z21.object({
   task: z21.string(),
@@ -1957,6 +1961,28 @@ function parseDynamicWorkflowDefinition(source) {
 }
 var AsyncFunction = Object.getPrototypeOf(async () => {
 }).constructor;
+function validateAndApplyDefaults(workflowId, workflow, input) {
+  if (!workflow.inputs || workflow.inputs.length === 0) {
+    return input;
+  }
+  const validatedInput = {};
+  const errors = [];
+  for (const inputDef of workflow.inputs) {
+    const providedValue = input[inputDef.id];
+    if (providedValue !== void 0 && providedValue !== null) {
+      validatedInput[inputDef.id] = providedValue;
+    } else if (inputDef.default !== void 0 && inputDef.default !== null) {
+      validatedInput[inputDef.id] = inputDef.default;
+    } else {
+      errors.push(`Missing required input '${inputDef.id}'${inputDef.description ? `: ${inputDef.description}` : ""}`);
+    }
+  }
+  if (errors.length > 0) {
+    throw new Error(`Workflow '${workflowId}' input validation failed:
+${errors.map((e) => `  - ${e}`).join("\n")}`);
+  }
+  return validatedInput;
+}
 function createRunWorkflowFn(args) {
   return async (subWorkflowId, subInput) => {
     const mergedInput = { ...args.input, ...args.state, ...subInput ?? {} };
@@ -1977,8 +2003,13 @@ function compileStep(stepDef, workflowId, compiledSteps) {
     compiledSteps.set(key, fn);
     return fn;
   } catch (error) {
+    const errorMsg = error instanceof Error ? error.message : String(error);
+    const codePreview = stepDef.code.length > 200 ? `${stepDef.code.substring(0, 200)}...` : stepDef.code;
     throw new Error(
-      `Failed to compile code for step '${stepDef.id}' in workflow '${workflowId}': ${error instanceof Error ? error.message : String(error)}`
+      `Failed to compile code for step '${stepDef.id}' in workflow '${workflowId}':
+  Error: ${errorMsg}
+  Code:
+${codePreview.split("\n").map((line) => `    ${line}`).join("\n")}`
     );
   }
 }
@@ -2010,6 +2041,7 @@ async function executeStepWithAgent(stepDef, workflowId, input, state, context,
     });
   }
   const allowedToolNameSet = new Set(toolsForAgent.map((t) => t.name));
+  context.logger.debug(`[Agent] Available tools for step '${stepDef.id}': ${toolsForAgent.map((t) => t.name).join(", ")}`);
   const systemPrompt = options.stepSystemPrompt?.({ workflowId, step: stepDef, input, state }) ?? [
     `You are an AI assistant executing a workflow step.`,
     "",
@@ -2079,28 +2111,86 @@ async function executeStepWithAgent(stepDef, workflowId, input, state, context,
     if (parsed.success) {
       return parsed.data;
     }
+    if (options.wrapAgentResultInObject) {
+      context.logger.warn(`[Agent] Step '${stepDef.id}' returned plain text instead of JSON. Wrapping in {result: ...}`);
+      return { result: result.message };
+    }
     return result.message;
   }
-  throw new Error(`Agent execution for step '${stepDef.id}' in workflow '${workflowId}' did not exit cleanly.`);
+  if (result.type === "Error") {
+    throw new Error(`Agent step '${stepDef.id}' in workflow '${workflowId}' failed: ${result.error?.message || "Unknown error"}`);
+  }
+  if (result.type === "UsageExceeded") {
+    throw new Error(`Agent step '${stepDef.id}' in workflow '${workflowId}' exceeded usage limits (tokens or rounds)`);
+  }
+  throw new Error(`Agent step '${stepDef.id}' in workflow '${workflowId}' exited unexpectedly with type: ${result.type}`);
+}
+async function executeStepWithTimeout(stepDef, workflowId, input, state, context, options, compiledSteps, runInternal) {
+  const executeStepLogic = async () => {
+    if (stepDef.code && options.allowUnsafeCodeExecution) {
+      context.logger.debug(`[Step] Executing step '${stepDef.id}' with compiled code`);
+      const fn = compileStep(stepDef, workflowId, compiledSteps);
+      const runWorkflow = createRunWorkflowFn({ input, state, context, runInternal });
+      const runtimeCtx = {
+        workflowId,
+        stepId: stepDef.id,
+        input,
+        state,
+        tools: context.tools,
+        logger: context.logger,
+        step: context.step,
+        runWorkflow,
+        toolInfo: options.toolInfo
+      };
+      const result2 = await fn(runtimeCtx);
+      context.logger.debug(`[Step] Compiled code execution completed for step '${stepDef.id}'`);
+      return result2;
+    }
+    context.logger.debug(`[Step] Executing step '${stepDef.id}' with agent`);
+    const result = await executeStepWithAgent(stepDef, workflowId, input, state, context, options, runInternal);
+    context.logger.debug(`[Step] Agent execution completed for step '${stepDef.id}'`);
+    return result;
+  };
+  if (stepDef.timeout && stepDef.timeout > 0) {
+    context.logger.debug(`[Step] Step '${stepDef.id}' has timeout of ${stepDef.timeout}ms`);
+    let timeoutId;
+    const timeoutPromise = new Promise((_, reject) => {
+      timeoutId = setTimeout(
+        () => reject(new Error(`Step '${stepDef.id}' in workflow '${workflowId}' timed out after ${stepDef.timeout}ms`)),
+        stepDef.timeout
+      );
+    });
+    try {
+      return await Promise.race([executeStepLogic(), timeoutPromise]);
+    } finally {
+      if (timeoutId) clearTimeout(timeoutId);
+    }
+  }
+  return await executeStepLogic();
 }
 async function executeStep(stepDef, workflowId, input, state, context, options, compiledSteps, runInternal) {
-  if (stepDef.code && options.allowUnsafeCodeExecution) {
-    const fn = compileStep(stepDef, workflowId, compiledSteps);
-    const runWorkflow = createRunWorkflowFn({ input, state, context, runInternal });
-    const runtimeCtx = {
-      workflowId,
-      stepId: stepDef.id,
-      input,
-      state,
-      tools: context.tools,
-      logger: context.logger,
-      step: context.step,
-      runWorkflow,
-      toolInfo: options.toolInfo
-    };
-    return await fn(runtimeCtx);
+  const result = await executeStepWithTimeout(stepDef, workflowId, input, state, context, options, compiledSteps, runInternal);
+  if (stepDef.outputSchema) {
+    try {
+      const _schema = z22.any();
+      if (typeof stepDef.outputSchema === "object") {
+        context.logger.debug(`[Step] Validating output for step '${stepDef.id}' against schema`);
+        if (stepDef.outputSchema.type === "object") {
+          if (typeof result !== "object" || result === null || Array.isArray(result)) {
+            throw new Error(`Expected object output, got ${Array.isArray(result) ? "array" : result === null ? "null" : typeof result}`);
+          }
+        }
+        if (stepDef.outputSchema.type === "array" && !Array.isArray(result)) {
+          throw new Error(`Expected array output, got ${typeof result}`);
+        }
+      }
+    } catch (error) {
+      throw new Error(
+        `Step '${stepDef.id}' in workflow '${workflowId}' output validation failed: ${error instanceof Error ? error.message : String(error)}`
+      );
+    }
   }
-  return await executeStepWithAgent(stepDef, workflowId, input, state, context, options, runInternal);
+  return result;
 }
 function createDynamicWorkflow(definition, options = {}) {
   if (typeof definition === "string") {
@@ -2116,19 +2206,37 @@ function createDynamicWorkflow(definition, options = {}) {
     if (!workflow) {
       throw new Error(`Workflow '${workflowId}' not found`);
     }
+    const validatedInput = validateAndApplyDefaults(workflowId, workflow, input);
+    context.logger.info(`[Workflow] Starting workflow '${workflowId}'`);
+    context.logger.debug(`[Workflow] Input: ${JSON.stringify(validatedInput)}`);
+    context.logger.debug(`[Workflow] Inherited state: ${JSON.stringify(inheritedState)}`);
+    context.logger.debug(`[Workflow] Steps: ${workflow.steps.map((s) => s.id).join(", ")}`);
     const state = { ...inheritedState };
     let lastOutput;
-    for (const stepDef of workflow.steps) {
+    for (let i = 0; i < workflow.steps.length; i++) {
+      const stepDef = workflow.steps[i];
       const stepName = `${workflowId}.${stepDef.id}`;
+      context.logger.info(`[Workflow] Step ${i + 1}/${workflow.steps.length}: ${stepDef.id}`);
+      context.logger.debug(`[Workflow] Step task: ${stepDef.task}`);
+      if (stepDef.expected_outcome) {
+        context.logger.debug(`[Workflow] Expected outcome: ${stepDef.expected_outcome}`);
+      }
+      context.logger.debug(`[Workflow] Current state keys: ${Object.keys(state).join(", ")}`);
       lastOutput = await context.step(stepName, async () => {
-        return await executeStep(stepDef, workflowId, input, state, context, options, compiledSteps, runInternal);
+        return await executeStep(stepDef, workflowId, validatedInput, state, context, options, compiledSteps, runInternal);
       });
       const outputKey = stepDef.output ?? stepDef.id;
       state[outputKey] = lastOutput;
+      context.logger.debug(
+        `[Workflow] Step output stored as '${outputKey}': ${typeof lastOutput === "object" ? JSON.stringify(lastOutput).substring(0, 200) : lastOutput}`
+      );
     }
+    context.logger.info(`[Workflow] Completed workflow '${workflowId}'`);
     if (workflow.output) {
+      context.logger.debug(`[Workflow] Returning output field: ${workflow.output}`);
       return state[workflow.output];
     }
+    context.logger.debug(`[Workflow] Returning full state with keys: ${Object.keys(state).join(", ")}`);
     return state;
   };
   return async (workflowId, input, context) => {
@@ -2165,8 +2273,11 @@ The workflow definition must follow this structure:
         {
           "id": "stepId",
           "task": "Description of the step",
-          "tools": ["toolName1", "toolName2"], // Optional list of tools needed
-          "output": "outputVariableName", // Optional
+          "tools": ["toolName1", "toolName2"], // Optional: restrict which tools can be used
+          "output": "outputVariableName", // Optional: defaults to step id
+          "timeout": 30000, // Optional: timeout in milliseconds
+          "expected_outcome": "What this step produces", // Optional: documentation
+          "outputSchema": { "type": "object" } // Optional: validation schema
         }
       ],
       "output": "outputVariableName" // Optional
@@ -2180,6 +2291,15 @@ Constraints:
 - Break down complex tasks into logical steps.
 - Define clear inputs and outputs.
+Quality Guidelines:
+- Add "timeout" field (in milliseconds) for steps that might take long (file I/O, API calls, searches)
+- Use "expected_outcome" field to document what each step should produce
+- Use descriptive step IDs (e.g., "validateInput", "fetchUserData", not "step1", "step2")
+- Design steps to be focused - one responsibility per step
+- For steps that process multiple items, consider creating a sub-workflow
+- Add "outputSchema" with type information for validation-critical steps
+- Order steps logically with clear data flow
 Example 1:
 User: "Research a topic and summarize it."
 Output:
@@ -2267,70 +2387,407 @@ Your task is to implement the TypeScript code for the steps in the provided work
 You will receive a JSON workflow definition where the "code" field is null.
 You must fill in the "code" field for each step with valid TypeScript code.
-The code will be executed in an async function with the following signature:
-async (ctx) => {
-  // Your code here
-}
+CRITICAL: Each step "code" field must contain ONLY the function body statements (the code inside the curly braces).
+DO NOT include function declaration, arrow function syntax, async keyword, parameter list, or outer curly braces.
-The \`ctx\` object provides access to:
-- \`ctx.input\`: The workflow inputs.
-- \`ctx.state\`: A shared state object for passing data between steps.
-- \`ctx.tools\`: An object containing available tools.
-- \`ctx.runWorkflow\`: (workflowId: string, input?: any) => Promise<any>. Use this to run other workflows.
-Guidelines:
-- Use \`await\` for asynchronous operations.
-- Return the output value of the step.
-- Access inputs via \`ctx.input.inputName\`.
-- Access previous step outputs via \`ctx.state.stepOutputName\`.
-- Use \`ctx.tools.invokeTool({ toolName: 'name', input: { ... } })\` to call tools.
-- Use \`ctx.tools.generateText({ messages: [...] })\` for LLM calls.
-- Use \`ctx.tools.invokeTool({ toolName: 'runAgent', input: { prompt: '...' } })\` for complex sub-tasks that require multiple steps or tools. Prefer this over \`generateText\` for advanced tasks.
-Example Code for a step:
-\`\`\`typescript
-const searchResults = await ctx.tools.invokeTool({
-  toolName: 'search',
-  input: { query: ctx.input.topic }
-});
-return searchResults;
-\`\`\`
+The code will be wrapped automatically in: \`async (ctx) => { YOUR_CODE_HERE }\`
-Example Code for LLM step:
-\`\`\`typescript
-const summary = await ctx.tools.generateText({
-  messages: [
-    { role: 'system', content: 'Summarize the following text.' },
-    { role: 'user', content: ctx.state.searchResults }
-  ]
-});
-return summary;
-\`\`\`
+Example of CORRECT code field:
+	\`\`\`ts
+	const result = await ctx.tools.readFile({ path: 'README.md' })
+	if (!result) throw new Error('File not found')
+	return result
+	\`\`\`
-Example Code for runAgent:
-\`\`\`typescript
-const result = await ctx.tools.invokeTool({
-  toolName: 'runAgent',
-  input: {
-    prompt: 'Research the history of the internet and write a summary.',
-    tools: ['search', 'generateText']
-  }
-});
-return result;
-\`\`\`
+Example of INCORRECT code field (DO NOT DO THIS):
+	\`\`\`ts
+	async (ctx) => {
+	  const result = await ctx.tools.readFile({ path: 'README.md' })
+	  return result
+	}
+	\`\`\`
-Example Code for invoking a sub-workflow:
-\`\`\`typescript
-const results = [];
-for (const pr of ctx.state.prs) {
-  const review = await ctx.runWorkflow('reviewPR', { prId: pr.id });
-  results.push(review);
-}
-return results;
-\`\`\`
+Example of INCORRECT code field (DO NOT DO THIS):
+	\`\`\`ts
+	(ctx) => {
+	  return 'hello'
+	}
+	\`\`\`
-Return the complete workflow JSON with the "code" fields populated.
-`;
+	## Runtime context (ctx)
+	\`\`\`ts
+	// Runtime types (for reference)
+	type Logger = {
+	  debug: (...args: any[]) => void
+	  info: (...args: any[]) => void
+	  warn: (...args: any[]) => void
+	  error: (...args: any[]) => void
+	}
+	type StepFn = {
+	  <T>(name: string, fn: () => Promise<T>): Promise<T>
+	  <T>(name: string, options: { retry?: number }, fn: () => Promise<T>): Promise<T>
+	}
+	type JsonModelMessage = { role: 'system' | 'user' | 'assistant' | 'tool'; content: any }
+	type JsonResponseMessage = { role: 'assistant' | 'tool'; content: any }
+	type ToolSet = Record<string, any>
+	type ToolResponseResult =
+	  | { type: 'text'; value: string }
+	  | { type: 'json'; value: any }
+	  | { type: 'error-text'; value: string }
+	  | { type: 'error-json'; value: any }
+	  | { type: 'content'; value: any[] }
+	type AgentToolResponse =
+	  | { type: 'Reply'; message: ToolResponseResult }
+	  | { type: 'Exit'; message: string; object?: any }
+	  | { type: 'Error'; message: ToolResponseResult }
+	type ExitReason =
+	  | { type: 'UsageExceeded' }
+	  | { type: 'Exit'; message: string; object?: any }
+	  | { type: 'Error'; error: { message: string; stack?: string } }
+	type FullAgentToolInfo = { name: string; description: string; parameters: any; handler: any }
+	// Tools available on ctx.tools in dynamic steps
+	type DynamicWorkflowTools = {
+	  // LLM + agent helpers
+	  generateText: (input: { messages: JsonModelMessage[]; tools: ToolSet }) => Promise<JsonResponseMessage[]>
+	  runAgent: (input: {
+	    tools: Readonly<FullAgentToolInfo[]>
+	    maxToolRoundTrips?: number
+	    userMessage: readonly JsonModelMessage[]
+	  } & ({ messages: JsonModelMessage[] } | { systemPrompt: string })) => Promise<ExitReason>
+	  // Generic bridge to "agent tools" by name
+	  invokeTool: (input: { toolName: string; input: any }) => Promise<AgentToolResponse>
+	  // File + command helpers (direct)
+	  readFile: (input: { path: string }) => Promise<string | null>
+	  writeToFile: (input: { path: string; content: string }) => Promise<void>
+	  executeCommand: (input: { command: string; pipe?: boolean } & ({ args: string[]; shell?: false } | { shell: true })) => Promise<{
+	    exitCode: number
+	    stdout: string
+	    stderr: string
+	  }>
+	  // CLI UX helpers
+	  confirm: (input: { message: string }) => Promise<boolean>
+	  input: (input: { message: string; default?: string }) => Promise<string>
+	  select: (input: { message: string; choices: { name: string; value: string }[] }) => Promise<string>
+	}
+	type DynamicStepRuntimeContext = {
+	  workflowId: string
+	  stepId: string
+	  input: Record<string, any>
+	  state: Record<string, any>
+	  tools: DynamicWorkflowTools
+	  logger: Logger
+	  step: StepFn
+	  runWorkflow: (workflowId: string, input?: Record<string, any>) => Promise<any>
+	  toolInfo?: ReadonlyArray<FullAgentToolInfo>
+	}
+	\`\`\`
+- \`ctx.input\`: workflow inputs (read-only).
+- \`ctx.state\`: shared state between steps (previous step outputs are stored here).
+- \`ctx.tools\`: async tool functions. Call tools as \`await ctx.tools.someTool({ ... })\`.
+- \`ctx.runWorkflow\`: run a sub-workflow by id.
+	## Guidelines
+	- Use \`await\` for all async operations.
+	- Return the output value for the step (this becomes the step output).
+	- Access inputs via \`ctx.input.<inputId>\`.
+	- Access previous step outputs via \`ctx.state.<stepOutputKey>\` (defaults to the step \`output\` or \`id\`).
+	## Quality Guidelines for Code Implementation
+	### Error Handling
+	- ALWAYS validate inputs at the start of steps
+	- Use try-catch for operations that might fail (file I/O, parsing, API calls)
+	- Preserve stack traces: re-throw original errors rather than creating new ones
+	- Use error type guards: \`const err = error instanceof Error ? error : new Error(String(error))\`
+	- Check for null/undefined before using values
+	- Handle edge cases (empty arrays, missing files, invalid data)
+	### Logging
+	- Use \`ctx.logger.info()\` for important progress updates
+	- Use \`ctx.logger.debug()\` for detailed information
+	- Use \`ctx.logger.warn()\` for recoverable issues
+	- Use \`ctx.logger.error()\` before throwing errors
+	- Log when starting and completing significant operations
+	- Use template literals for readability: \`ctx.logger.info(\\\`Processing \${items.length} items...\\\`)\`
+	### User Experience
+	- Provide progress feedback for long operations
+	- Return structured data (objects/arrays), not strings when possible
+	- Include helpful metadata in results (counts, timestamps, status)
+	- For batch operations, report progress: \`Processed 5/10 items\`
+	### Data Validation
+	- Validate required fields exist before accessing
+	- Check data types match expectations
+	- Validate array lengths before iteration
+	- Example: \`if (!data?.users || !Array.isArray(data.users)) throw new Error('Invalid data format')\`
+	### Best Practices
+	- Use meaningful variable names
+	- Avoid nested callbacks - use async/await
+	- Clean up resources (close files, clear timeouts)
+	- Return consistent data structures across similar steps
+	- For iteration, consider batching or rate limiting
+	### When to Simplify
+	- Simple transformation steps (e.g., formatting strings) need only basic error handling
+	- Internal sub-workflow steps with validated inputs from parent can skip redundant validation
+	- Minimal logging is fine for fast steps (<100ms) that don't perform I/O or external calls
+	- Use judgment: match error handling complexity to the step's failure risk and impact
+	## Tool calling examples (every tool)
+	### Direct ctx.tools methods
+	\`\`\`ts
+	// readFile
+	const readme = await ctx.tools.readFile({ path: 'README.md' })
+	if (readme == null) throw new Error('README.md not found')
+	// writeToFile
+	await ctx.tools.writeToFile({ path: 'notes.txt', content: 'hello\\n' })
+	// executeCommand (args form)
+	const rg = await ctx.tools.executeCommand({ command: 'rg', args: ['-n', 'TODO', '.'] })
+	if (rg.exitCode !== 0) throw new Error(rg.stderr)
+	// executeCommand (shell form)
+	await ctx.tools.executeCommand({ command: 'ls -la', shell: true, pipe: true })
+	// generateText (LLM call; pass tools: {})
+	const msgs = await ctx.tools.generateText({
+	  messages: [
+	    { role: 'system', content: 'Summarize the following text.' },
+	    { role: 'user', content: readme },
+	  ],
+	  tools: {},
+	})
+	const last = msgs[msgs.length - 1]
+	const lastText = typeof last?.content === 'string' ? last.content : JSON.stringify(last?.content)
+	// runAgent (nested agent; use ctx.toolInfo as the tool list)
+	const agentRes = await ctx.tools.runAgent({
+	  systemPrompt: 'You are a helpful assistant.',
+	  userMessage: [{ role: 'user', content: 'Summarize README.md in 3 bullets.' }],
+	  tools: (ctx.toolInfo ?? []) as any,
+	})
+	if (agentRes.type !== 'Exit') throw new Error('runAgent failed')
+	// confirm / input / select (interactive)
+	const ok = await ctx.tools.confirm({ message: 'Proceed?' })
+	const name = await ctx.tools.input({ message: 'Name?', default: 'main' })
+	const flavor = await ctx.tools.select({
+	  message: 'Pick one',
+	  choices: [
+	    { name: 'A', value: 'a' },
+	    { name: 'B', value: 'b' },
+	  ],
+	})
+	\`\`\`
+	### Agent tools via ctx.tools.invokeTool (toolName examples)
+	\`\`\`ts
+	// Helper to unwrap a successful tool reply
+	function unwrapToolValue(resp: any) {
+	  if (!resp || resp.type !== 'Reply') {
+	    const msg = resp?.message?.value
+	    throw new Error(typeof msg === 'string' ? msg : JSON.stringify(resp))
+	  }
+	  return resp.message.value
+	}
+	// askFollowupQuestion
+	const answersText = unwrapToolValue(
+	  await ctx.tools.invokeTool({
+	    toolName: 'askFollowupQuestion',
+	    input: { questions: [{ prompt: 'Which directory?', options: ['src', 'packages'] }] },
+	  }),
+	)
+	// listFiles
+	const filesText = unwrapToolValue(
+	  await ctx.tools.invokeTool({
+	    toolName: 'listFiles',
+	    input: { path: 'src', recursive: true, maxCount: 2000, includeIgnored: false },
+	  }),
+	)
+	// searchFiles
+	const hitsText = unwrapToolValue(
+	  await ctx.tools.invokeTool({
+	    toolName: 'searchFiles',
+	    input: { path: '.', regex: 'generateWorkflowCodeWorkflow', filePattern: '*.ts' },
+	  }),
+	)
+	// fetchUrl
+	const pageText = unwrapToolValue(await ctx.tools.invokeTool({ toolName: 'fetchUrl', input: { url: 'https://example.com' } }))
+	// search (web search)
+	const webResults = unwrapToolValue(
+	  await ctx.tools.invokeTool({ toolName: 'search', input: { query: 'TypeScript zod schema examples' } }),
+	)
+	// executeCommand (provider-backed; may require approval in some environments)
+	const cmdText = unwrapToolValue(
+	  await ctx.tools.invokeTool({ toolName: 'executeCommand', input: { command: 'bun test', requiresApproval: false } }),
+	)
+	// readFile / writeToFile (provider-backed)
+	const fileText = unwrapToolValue(
+	  await ctx.tools.invokeTool({ toolName: 'readFile', input: { path: 'README.md', includeIgnored: false } }),
+	)
+	const writeText = unwrapToolValue(await ctx.tools.invokeTool({ toolName: 'writeToFile', input: { path: 'out.txt', content: 'hi' } }))
+	// replaceInFile
+	const diff = ['<<<<<<< SEARCH', 'old', '=======', 'new', '>>>>>>> REPLACE'].join('\\n')
+	const replaceText = unwrapToolValue(await ctx.tools.invokeTool({ toolName: 'replaceInFile', input: { path: 'out.txt', diff } }))
+	// removeFile / renameFile
+	const rmText = unwrapToolValue(await ctx.tools.invokeTool({ toolName: 'removeFile', input: { path: 'out.txt' } }))
+	const mvText = unwrapToolValue(
+	  await ctx.tools.invokeTool({ toolName: 'renameFile', input: { source_path: 'a.txt', target_path: 'b.txt' } }),
+	)
+	// readBinaryFile (returns { type: 'content', value: [...] } in resp.message)
+	const binResp = await ctx.tools.invokeTool({ toolName: 'readBinaryFile', input: { url: 'file://path/to/image.png' } })
+	\`\`\`
+	### Sub-workflow example (ctx.runWorkflow)
+	\`\`\`ts
+	const results: any[] = []
+	for (const pr of ctx.state.prs ?? []) {
+	  results.push(await ctx.runWorkflow('reviewPR', { prId: pr.id }))
+	}
+	return results
+	\`\`\`
+	## Complete Example: High-Quality Step Implementation
+	This example demonstrates all quality guidelines in a single step:
+	\`\`\`ts
+	// Step: processUserData
+	// Task: Read, validate, and process user data from a file
+	// Input validation
+	if (!ctx.input.dataFile) {
+	  throw new Error('Missing required input: dataFile')
+	}
+	ctx.logger.info(\`Starting user data processing for: \${ctx.input.dataFile}\`)
+	// Read file with error handling
+	let rawData
+	try {
+	  ctx.logger.debug(\`Reading file: \${ctx.input.dataFile}\`)
+	  rawData = await ctx.tools.readFile({ path: ctx.input.dataFile })
+	  if (!rawData) {
+	    throw new Error(\`File not found or empty: \${ctx.input.dataFile}\`)
+	  }
+	} catch (error) {
+	  const err = error instanceof Error ? error : new Error(String(error))
+	  ctx.logger.error(\`Failed to read file: \${err.message}\`)
+	  throw err  // Preserve original stack trace
+	}
+	// Parse and validate data
+	let users
+	try {
+	  ctx.logger.debug('Parsing JSON data')
+	  const parsed = JSON.parse(rawData)
+	  if (!parsed?.users || !Array.isArray(parsed.users)) {
+	    throw new Error('Invalid data format: expected {users: [...]}')
+	  }
+	  users = parsed.users
+	  ctx.logger.info(\`Found \${users.length} users to process\`)
+	} catch (error) {
+	  const err = error instanceof Error ? error : new Error(String(error))
+	  ctx.logger.error(\`Data parsing failed: \${err.message}\`)
+	  throw err  // Preserve original stack trace
+	}
+	// Process each user with progress reporting
+	const results = []
+	for (let i = 0; i < users.length; i++) {
+	  const user = users[i]
+	  // Validate each user object
+	  if (!user?.id || !user?.email) {
+	    ctx.logger.warn(\`Skipping invalid user at index \${i}: missing id or email\`)
+	    continue
+	  }
+	  // Process user
+	  const processed = {
+	    id: user.id,
+	    email: user.email.toLowerCase().trim(),
+	    name: user.name?.trim() || 'Unknown',
+	    processedAt: new Date().toISOString(),
+	    status: 'active'
+	  }
+	  results.push(processed)
+	  // Progress feedback every 10 items
+	  if ((i + 1) % 10 === 0) {
+	    ctx.logger.info(\`Processed \${i + 1}/\${users.length} users\`)
+	  }
+	}
+	ctx.logger.info(\`Successfully processed \${results.length}/\${users.length} users\`)
+	// Return structured result with metadata
+	return {
+	  users: results,
+	  metadata: {
+	    totalInput: users.length,
+	    totalProcessed: results.length,
+	    skipped: users.length - results.length,
+	    processedAt: new Date().toISOString()
+	  }
+	}
+	\`\`\`
+	Key features demonstrated:
+	- Input validation at start
+	- Comprehensive error handling with try-catch that preserves stack traces
+	- Logging at info, debug, warn, and error levels
+	- Progress reporting for long operations (every 10 items)
+	- Data validation throughout (null checks, type checks, array validation)
+	- Structured return value with metadata for observability
+	- Descriptive error messages with context
+	- Meaningful variable names (rawData, users, processed)
+	- Clean async/await usage
+	- Template literals for readable string interpolation
+	- Proper error type guards (error instanceof Error)
+	## Final Instructions
+	REMEMBER: The "code" field must be ONLY the function body statements.
+	- DO NOT wrap code in arrow functions: \`(ctx) => { ... }\`
+	- DO NOT wrap code in async functions: \`async (ctx) => { ... }\`
+	- DO NOT include outer curly braces
+	- DO include a return statement if the step should produce output
+	- Each "code" field should be a string containing multiple statements separated by newlines
+	Return the complete workflow JSON with the "code" fields populated.
+	`;
 var generateWorkflowDefinitionWorkflow = async (input, ctx) => {
   let systemPrompt = WORKFLOW_DEFINITION_SYSTEM_PROMPT;
   if (input.availableTools && input.availableTools.length > 0) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@polka-codes/core",
-  "version": "0.9.79",
+  "version": "0.9.80",
   "license": "AGPL-3.0",
   "author": "github@polka.codes",
   "type": "module",