@polka-codes/core 0.9.79 → 0.9.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -29,7 +29,7 @@ import {
29
29
  agentWorkflow,
30
30
  createContext,
31
31
  makeStepFn,
32
- type ToolResponse,
32
+ type AgentToolResponse,
33
33
  ToolResponseType,
34
34
  } from '@polka-codes/core';
35
35
  import { z } from 'zod';
@@ -52,10 +52,10 @@ async function main() {
52
52
  const { location } = input as z.infer<typeof getCurrentWeather.parameters>;
53
53
  // In a real app, you would call a weather API here
54
54
  const weather = `The weather in ${location} is 70°F and sunny.`;
55
- const response: ToolResponse = { type: ToolResponseType.Reply, message: weather };
55
+ const response: AgentToolResponse = { type: ToolResponseType.Reply, message: weather };
56
56
  return response;
57
57
  }
58
- const response: ToolResponse = { type: ToolResponseType.Error, message: 'Tool not found' };
58
+ const response: AgentToolResponse = { type: ToolResponseType.Error, message: 'Tool not found' };
59
59
  return response;
60
60
  },
61
61
  // A simple text generation function
@@ -21,6 +21,14 @@ import { ZodObject } from 'zod';
21
21
  import { ZodOptional } from 'zod';
22
22
  import { ZodString } from 'zod';
23
23
 
24
+ declare type AgentToolInfo = {
25
+ name: string;
26
+ description: string;
27
+ parameters: z.ZodObject<any>;
28
+ };
29
+ export { AgentToolInfo }
30
+ export { AgentToolInfo as AgentToolInfo_alias_1 }
31
+
24
32
  declare type AgentToolRegistry = {
25
33
  generateText: {
26
34
  input: {
@@ -39,20 +47,24 @@ declare type AgentToolRegistry = {
39
47
  toolName: string;
40
48
  input: any;
41
49
  };
42
- output: ToolResponse;
50
+ output: AgentToolResponse;
43
51
  };
44
52
  };
45
53
  export { AgentToolRegistry }
46
54
  export { AgentToolRegistry as AgentToolRegistry_alias_1 }
47
55
  export { AgentToolRegistry as AgentToolRegistry_alias_2 }
48
56
 
57
+ declare type AgentToolResponse = ToolResponseReply | ToolResponseExit | ToolResponseError;
58
+ export { AgentToolResponse }
59
+ export { AgentToolResponse as AgentToolResponse_alias_1 }
60
+
49
61
  declare const agentWorkflow: WorkflowFn<AgentWorkflowInput, ExitReason, AgentToolRegistry>;
50
62
  export { agentWorkflow }
51
63
  export { agentWorkflow as agentWorkflow_alias_1 }
52
64
  export { agentWorkflow as agentWorkflow_alias_2 }
53
65
 
54
66
  declare type AgentWorkflowInput = {
55
- tools: Readonly<FullToolInfo[]>;
67
+ tools: Readonly<FullAgentToolInfo[]>;
56
68
  maxToolRoundTrips?: number;
57
69
  userMessage: readonly JsonUserModelMessage[];
58
70
  outputSchema?: z.ZodSchema;
@@ -569,7 +581,7 @@ declare type DynamicStepRuntimeContext<TTools extends ToolRegistry> = {
569
581
  logger: Logger;
570
582
  step: StepFn;
571
583
  runWorkflow: (workflowId: string, input?: Record<string, any>) => Promise<any>;
572
- toolInfo: Readonly<FullToolInfo[]> | undefined;
584
+ toolInfo: Readonly<FullAgentToolInfo[]> | undefined;
573
585
  };
574
586
  export { DynamicStepRuntimeContext }
575
587
  export { DynamicStepRuntimeContext as DynamicStepRuntimeContext_alias_1 }
@@ -595,7 +607,7 @@ declare type DynamicWorkflowRunnerOptions = {
595
607
  * Tool definitions used when a step does not have persisted `code`
596
608
  * and needs to be executed via `agentWorkflow`.
597
609
  */
598
- toolInfo?: Readonly<FullToolInfo[]>;
610
+ toolInfo?: Readonly<FullAgentToolInfo[]>;
599
611
  /**
600
612
  * Model id forwarded to `agentWorkflow` for agent-executed steps.
601
613
  */
@@ -618,6 +630,11 @@ declare type DynamicWorkflowRunnerOptions = {
618
630
  input: any;
619
631
  state: any;
620
632
  }) => string;
633
+ /**
634
+ * Whether to wrap plain text agent responses in an object { result: ... }.
635
+ * Defaults to false.
636
+ */
637
+ wrapAgentResultInObject?: boolean;
621
638
  };
622
639
  export { DynamicWorkflowRunnerOptions }
623
640
  export { DynamicWorkflowRunnerOptions as DynamicWorkflowRunnerOptions_alias_1 }
@@ -663,11 +680,11 @@ export { fromJsonModelMessage }
663
680
  export { fromJsonModelMessage as fromJsonModelMessage_alias_1 }
664
681
  export { fromJsonModelMessage as fromJsonModelMessage_alias_2 }
665
682
 
666
- declare type FullToolInfo = ToolInfo & {
667
- handler: ToolHandler<ToolInfo, any>;
683
+ declare type FullAgentToolInfo = AgentToolInfo & {
684
+ handler: ToolHandler<AgentToolInfo, any>;
668
685
  };
669
- export { FullToolInfo }
670
- export { FullToolInfo as FullToolInfo_alias_1 }
686
+ export { FullAgentToolInfo }
687
+ export { FullAgentToolInfo as FullAgentToolInfo_alias_1 }
671
688
 
672
689
  declare type GenerateWorkflowCodeInput = z.infer<typeof GenerateWorkflowCodeInputSchema>;
673
690
  export { GenerateWorkflowCodeInput }
@@ -690,6 +707,7 @@ declare const GenerateWorkflowCodeInputSchema: z.ZodObject<{
690
707
  expected_outcome: z.ZodOptional<z.ZodNullable<z.ZodString>>;
691
708
  code: z.ZodOptional<z.ZodNullable<z.ZodString>>;
692
709
  outputSchema: z.ZodOptional<z.ZodNullable<z.ZodAny>>;
710
+ timeout: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
693
711
  }, z.core.$strip>>;
694
712
  output: z.ZodOptional<z.ZodNullable<z.ZodString>>;
695
713
  }, z.core.$strip>>;
@@ -1266,18 +1284,10 @@ export { toJsonModelMessage }
1266
1284
  export { toJsonModelMessage as toJsonModelMessage_alias_1 }
1267
1285
  export { toJsonModelMessage as toJsonModelMessage_alias_2 }
1268
1286
 
1269
- declare type ToolHandler<_T, P> = (provider: P, args: Partial<Record<string, ToolParameterValue>>) => Promise<ToolResponse>;
1287
+ declare type ToolHandler<_T, P> = (provider: P, args: Partial<Record<string, ToolParameterValue>>) => Promise<AgentToolResponse>;
1270
1288
  export { ToolHandler }
1271
1289
  export { ToolHandler as ToolHandler_alias_1 }
1272
1290
 
1273
- declare type ToolInfo = {
1274
- name: string;
1275
- description: string;
1276
- parameters: z.ZodObject<any>;
1277
- };
1278
- export { ToolInfo }
1279
- export { ToolInfo as ToolInfo_alias_1 }
1280
-
1281
1291
  export declare const toolInfo: {
1282
1292
  readonly name: "askFollowupQuestion";
1283
1293
  readonly description: "Call this when vital details are missing. Pose each follow-up as one direct, unambiguous question. If it speeds the reply, add up to five short, mutually-exclusive answer options. Group any related questions in the same call to avoid a back-and-forth chain.";
@@ -1483,10 +1493,6 @@ export { ToolRegistry }
1483
1493
  export { ToolRegistry as ToolRegistry_alias_1 }
1484
1494
  export { ToolRegistry as ToolRegistry_alias_2 }
1485
1495
 
1486
- declare type ToolResponse = ToolResponseReply | ToolResponseExit | ToolResponseError;
1487
- export { ToolResponse }
1488
- export { ToolResponse as ToolResponse_alias_1 }
1489
-
1490
1496
  declare type ToolResponseError = {
1491
1497
  type: ToolResponseType.Error;
1492
1498
  message: ToolResponseResult;
@@ -1690,6 +1696,7 @@ declare const WorkflowDefinitionSchema: z.ZodObject<{
1690
1696
  expected_outcome: z.ZodOptional<z.ZodNullable<z.ZodString>>;
1691
1697
  code: z.ZodOptional<z.ZodNullable<z.ZodString>>;
1692
1698
  outputSchema: z.ZodOptional<z.ZodNullable<z.ZodAny>>;
1699
+ timeout: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
1693
1700
  }, z.core.$strip>>;
1694
1701
  output: z.ZodOptional<z.ZodNullable<z.ZodString>>;
1695
1702
  }, z.core.$strip>;
@@ -1716,6 +1723,7 @@ declare const WorkflowFileSchema: z.ZodObject<{
1716
1723
  expected_outcome: z.ZodOptional<z.ZodNullable<z.ZodString>>;
1717
1724
  code: z.ZodOptional<z.ZodNullable<z.ZodString>>;
1718
1725
  outputSchema: z.ZodOptional<z.ZodNullable<z.ZodAny>>;
1726
+ timeout: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
1719
1727
  }, z.core.$strip>>;
1720
1728
  output: z.ZodOptional<z.ZodNullable<z.ZodString>>;
1721
1729
  }, z.core.$strip>>;
@@ -1752,6 +1760,7 @@ declare const WorkflowStepDefinitionSchema: z.ZodObject<{
1752
1760
  expected_outcome: z.ZodOptional<z.ZodNullable<z.ZodString>>;
1753
1761
  code: z.ZodOptional<z.ZodNullable<z.ZodString>>;
1754
1762
  outputSchema: z.ZodOptional<z.ZodNullable<z.ZodAny>>;
1763
+ timeout: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
1755
1764
  }, z.core.$strip>;
1756
1765
  export { WorkflowStepDefinitionSchema }
1757
1766
  export { WorkflowStepDefinitionSchema as WorkflowStepDefinitionSchema_alias_1 }
package/dist/index.d.ts CHANGED
@@ -10,15 +10,15 @@ export { configSchema_alias_1 as configSchema } from './_tsup-dts-rollup.js';
10
10
  export { Config_alias_1 as Config } from './_tsup-dts-rollup.js';
11
11
  export { ToolParameterValue } from './_tsup-dts-rollup.js';
12
12
  export { ToolParameter } from './_tsup-dts-rollup.js';
13
- export { ToolInfo } from './_tsup-dts-rollup.js';
14
- export { FullToolInfo } from './_tsup-dts-rollup.js';
13
+ export { AgentToolInfo } from './_tsup-dts-rollup.js';
14
+ export { FullAgentToolInfo } from './_tsup-dts-rollup.js';
15
15
  export { ToolResponseType } from './_tsup-dts-rollup.js';
16
16
  export { ToolResponseResultMedia } from './_tsup-dts-rollup.js';
17
17
  export { ToolResponseResult } from './_tsup-dts-rollup.js';
18
18
  export { ToolResponseReply } from './_tsup-dts-rollup.js';
19
19
  export { ToolResponseExit } from './_tsup-dts-rollup.js';
20
20
  export { ToolResponseError } from './_tsup-dts-rollup.js';
21
- export { ToolResponse } from './_tsup-dts-rollup.js';
21
+ export { AgentToolResponse } from './_tsup-dts-rollup.js';
22
22
  export { ToolHandler } from './_tsup-dts-rollup.js';
23
23
  export { askFollowupQuestion } from './_tsup-dts-rollup.js';
24
24
  export { executeCommand } from './_tsup-dts-rollup.js';
package/dist/index.js CHANGED
@@ -1930,7 +1930,11 @@ var WorkflowStepDefinitionSchema = z21.object({
1930
1930
  * Optional JSON schema or other metadata for future structured outputs.
1931
1931
  * Not interpreted by core today.
1932
1932
  */
1933
- outputSchema: z21.any().nullish()
1933
+ outputSchema: z21.any().nullish(),
1934
+ /**
1935
+ * Optional timeout in milliseconds. Step execution will be aborted if it exceeds this duration.
1936
+ */
1937
+ timeout: z21.number().positive().nullish()
1934
1938
  });
1935
1939
  var WorkflowDefinitionSchema = z21.object({
1936
1940
  task: z21.string(),
@@ -1957,6 +1961,28 @@ function parseDynamicWorkflowDefinition(source) {
1957
1961
  }
1958
1962
  var AsyncFunction = Object.getPrototypeOf(async () => {
1959
1963
  }).constructor;
1964
+ function validateAndApplyDefaults(workflowId, workflow, input) {
1965
+ if (!workflow.inputs || workflow.inputs.length === 0) {
1966
+ return input;
1967
+ }
1968
+ const validatedInput = {};
1969
+ const errors = [];
1970
+ for (const inputDef of workflow.inputs) {
1971
+ const providedValue = input[inputDef.id];
1972
+ if (providedValue !== void 0 && providedValue !== null) {
1973
+ validatedInput[inputDef.id] = providedValue;
1974
+ } else if (inputDef.default !== void 0 && inputDef.default !== null) {
1975
+ validatedInput[inputDef.id] = inputDef.default;
1976
+ } else {
1977
+ errors.push(`Missing required input '${inputDef.id}'${inputDef.description ? `: ${inputDef.description}` : ""}`);
1978
+ }
1979
+ }
1980
+ if (errors.length > 0) {
1981
+ throw new Error(`Workflow '${workflowId}' input validation failed:
1982
+ ${errors.map((e) => ` - ${e}`).join("\n")}`);
1983
+ }
1984
+ return validatedInput;
1985
+ }
1960
1986
  function createRunWorkflowFn(args) {
1961
1987
  return async (subWorkflowId, subInput) => {
1962
1988
  const mergedInput = { ...args.input, ...args.state, ...subInput ?? {} };
@@ -1977,8 +2003,13 @@ function compileStep(stepDef, workflowId, compiledSteps) {
1977
2003
  compiledSteps.set(key, fn);
1978
2004
  return fn;
1979
2005
  } catch (error) {
2006
+ const errorMsg = error instanceof Error ? error.message : String(error);
2007
+ const codePreview = stepDef.code.length > 200 ? `${stepDef.code.substring(0, 200)}...` : stepDef.code;
1980
2008
  throw new Error(
1981
- `Failed to compile code for step '${stepDef.id}' in workflow '${workflowId}': ${error instanceof Error ? error.message : String(error)}`
2009
+ `Failed to compile code for step '${stepDef.id}' in workflow '${workflowId}':
2010
+ Error: ${errorMsg}
2011
+ Code:
2012
+ ${codePreview.split("\n").map((line) => ` ${line}`).join("\n")}`
1982
2013
  );
1983
2014
  }
1984
2015
  }
@@ -2010,6 +2041,7 @@ async function executeStepWithAgent(stepDef, workflowId, input, state, context,
2010
2041
  });
2011
2042
  }
2012
2043
  const allowedToolNameSet = new Set(toolsForAgent.map((t) => t.name));
2044
+ context.logger.debug(`[Agent] Available tools for step '${stepDef.id}': ${toolsForAgent.map((t) => t.name).join(", ")}`);
2013
2045
  const systemPrompt = options.stepSystemPrompt?.({ workflowId, step: stepDef, input, state }) ?? [
2014
2046
  `You are an AI assistant executing a workflow step.`,
2015
2047
  "",
@@ -2079,28 +2111,86 @@ async function executeStepWithAgent(stepDef, workflowId, input, state, context,
2079
2111
  if (parsed.success) {
2080
2112
  return parsed.data;
2081
2113
  }
2114
+ if (options.wrapAgentResultInObject) {
2115
+ context.logger.warn(`[Agent] Step '${stepDef.id}' returned plain text instead of JSON. Wrapping in {result: ...}`);
2116
+ return { result: result.message };
2117
+ }
2082
2118
  return result.message;
2083
2119
  }
2084
- throw new Error(`Agent execution for step '${stepDef.id}' in workflow '${workflowId}' did not exit cleanly.`);
2120
+ if (result.type === "Error") {
2121
+ throw new Error(`Agent step '${stepDef.id}' in workflow '${workflowId}' failed: ${result.error?.message || "Unknown error"}`);
2122
+ }
2123
+ if (result.type === "UsageExceeded") {
2124
+ throw new Error(`Agent step '${stepDef.id}' in workflow '${workflowId}' exceeded usage limits (tokens or rounds)`);
2125
+ }
2126
+ throw new Error(`Agent step '${stepDef.id}' in workflow '${workflowId}' exited unexpectedly with type: ${result.type}`);
2127
+ }
2128
+ async function executeStepWithTimeout(stepDef, workflowId, input, state, context, options, compiledSteps, runInternal) {
2129
+ const executeStepLogic = async () => {
2130
+ if (stepDef.code && options.allowUnsafeCodeExecution) {
2131
+ context.logger.debug(`[Step] Executing step '${stepDef.id}' with compiled code`);
2132
+ const fn = compileStep(stepDef, workflowId, compiledSteps);
2133
+ const runWorkflow = createRunWorkflowFn({ input, state, context, runInternal });
2134
+ const runtimeCtx = {
2135
+ workflowId,
2136
+ stepId: stepDef.id,
2137
+ input,
2138
+ state,
2139
+ tools: context.tools,
2140
+ logger: context.logger,
2141
+ step: context.step,
2142
+ runWorkflow,
2143
+ toolInfo: options.toolInfo
2144
+ };
2145
+ const result2 = await fn(runtimeCtx);
2146
+ context.logger.debug(`[Step] Compiled code execution completed for step '${stepDef.id}'`);
2147
+ return result2;
2148
+ }
2149
+ context.logger.debug(`[Step] Executing step '${stepDef.id}' with agent`);
2150
+ const result = await executeStepWithAgent(stepDef, workflowId, input, state, context, options, runInternal);
2151
+ context.logger.debug(`[Step] Agent execution completed for step '${stepDef.id}'`);
2152
+ return result;
2153
+ };
2154
+ if (stepDef.timeout && stepDef.timeout > 0) {
2155
+ context.logger.debug(`[Step] Step '${stepDef.id}' has timeout of ${stepDef.timeout}ms`);
2156
+ let timeoutId;
2157
+ const timeoutPromise = new Promise((_, reject) => {
2158
+ timeoutId = setTimeout(
2159
+ () => reject(new Error(`Step '${stepDef.id}' in workflow '${workflowId}' timed out after ${stepDef.timeout}ms`)),
2160
+ stepDef.timeout
2161
+ );
2162
+ });
2163
+ try {
2164
+ return await Promise.race([executeStepLogic(), timeoutPromise]);
2165
+ } finally {
2166
+ if (timeoutId) clearTimeout(timeoutId);
2167
+ }
2168
+ }
2169
+ return await executeStepLogic();
2085
2170
  }
2086
2171
  async function executeStep(stepDef, workflowId, input, state, context, options, compiledSteps, runInternal) {
2087
- if (stepDef.code && options.allowUnsafeCodeExecution) {
2088
- const fn = compileStep(stepDef, workflowId, compiledSteps);
2089
- const runWorkflow = createRunWorkflowFn({ input, state, context, runInternal });
2090
- const runtimeCtx = {
2091
- workflowId,
2092
- stepId: stepDef.id,
2093
- input,
2094
- state,
2095
- tools: context.tools,
2096
- logger: context.logger,
2097
- step: context.step,
2098
- runWorkflow,
2099
- toolInfo: options.toolInfo
2100
- };
2101
- return await fn(runtimeCtx);
2172
+ const result = await executeStepWithTimeout(stepDef, workflowId, input, state, context, options, compiledSteps, runInternal);
2173
+ if (stepDef.outputSchema) {
2174
+ try {
2175
+ const _schema = z22.any();
2176
+ if (typeof stepDef.outputSchema === "object") {
2177
+ context.logger.debug(`[Step] Validating output for step '${stepDef.id}' against schema`);
2178
+ if (stepDef.outputSchema.type === "object") {
2179
+ if (typeof result !== "object" || result === null || Array.isArray(result)) {
2180
+ throw new Error(`Expected object output, got ${Array.isArray(result) ? "array" : result === null ? "null" : typeof result}`);
2181
+ }
2182
+ }
2183
+ if (stepDef.outputSchema.type === "array" && !Array.isArray(result)) {
2184
+ throw new Error(`Expected array output, got ${typeof result}`);
2185
+ }
2186
+ }
2187
+ } catch (error) {
2188
+ throw new Error(
2189
+ `Step '${stepDef.id}' in workflow '${workflowId}' output validation failed: ${error instanceof Error ? error.message : String(error)}`
2190
+ );
2191
+ }
2102
2192
  }
2103
- return await executeStepWithAgent(stepDef, workflowId, input, state, context, options, runInternal);
2193
+ return result;
2104
2194
  }
2105
2195
  function createDynamicWorkflow(definition, options = {}) {
2106
2196
  if (typeof definition === "string") {
@@ -2116,19 +2206,37 @@ function createDynamicWorkflow(definition, options = {}) {
2116
2206
  if (!workflow) {
2117
2207
  throw new Error(`Workflow '${workflowId}' not found`);
2118
2208
  }
2209
+ const validatedInput = validateAndApplyDefaults(workflowId, workflow, input);
2210
+ context.logger.info(`[Workflow] Starting workflow '${workflowId}'`);
2211
+ context.logger.debug(`[Workflow] Input: ${JSON.stringify(validatedInput)}`);
2212
+ context.logger.debug(`[Workflow] Inherited state: ${JSON.stringify(inheritedState)}`);
2213
+ context.logger.debug(`[Workflow] Steps: ${workflow.steps.map((s) => s.id).join(", ")}`);
2119
2214
  const state = { ...inheritedState };
2120
2215
  let lastOutput;
2121
- for (const stepDef of workflow.steps) {
2216
+ for (let i = 0; i < workflow.steps.length; i++) {
2217
+ const stepDef = workflow.steps[i];
2122
2218
  const stepName = `${workflowId}.${stepDef.id}`;
2219
+ context.logger.info(`[Workflow] Step ${i + 1}/${workflow.steps.length}: ${stepDef.id}`);
2220
+ context.logger.debug(`[Workflow] Step task: ${stepDef.task}`);
2221
+ if (stepDef.expected_outcome) {
2222
+ context.logger.debug(`[Workflow] Expected outcome: ${stepDef.expected_outcome}`);
2223
+ }
2224
+ context.logger.debug(`[Workflow] Current state keys: ${Object.keys(state).join(", ")}`);
2123
2225
  lastOutput = await context.step(stepName, async () => {
2124
- return await executeStep(stepDef, workflowId, input, state, context, options, compiledSteps, runInternal);
2226
+ return await executeStep(stepDef, workflowId, validatedInput, state, context, options, compiledSteps, runInternal);
2125
2227
  });
2126
2228
  const outputKey = stepDef.output ?? stepDef.id;
2127
2229
  state[outputKey] = lastOutput;
2230
+ context.logger.debug(
2231
+ `[Workflow] Step output stored as '${outputKey}': ${typeof lastOutput === "object" ? JSON.stringify(lastOutput).substring(0, 200) : lastOutput}`
2232
+ );
2128
2233
  }
2234
+ context.logger.info(`[Workflow] Completed workflow '${workflowId}'`);
2129
2235
  if (workflow.output) {
2236
+ context.logger.debug(`[Workflow] Returning output field: ${workflow.output}`);
2130
2237
  return state[workflow.output];
2131
2238
  }
2239
+ context.logger.debug(`[Workflow] Returning full state with keys: ${Object.keys(state).join(", ")}`);
2132
2240
  return state;
2133
2241
  };
2134
2242
  return async (workflowId, input, context) => {
@@ -2165,8 +2273,11 @@ The workflow definition must follow this structure:
2165
2273
  {
2166
2274
  "id": "stepId",
2167
2275
  "task": "Description of the step",
2168
- "tools": ["toolName1", "toolName2"], // Optional list of tools needed
2169
- "output": "outputVariableName", // Optional
2276
+ "tools": ["toolName1", "toolName2"], // Optional: restrict which tools can be used
2277
+ "output": "outputVariableName", // Optional: defaults to step id
2278
+ "timeout": 30000, // Optional: timeout in milliseconds
2279
+ "expected_outcome": "What this step produces", // Optional: documentation
2280
+ "outputSchema": { "type": "object" } // Optional: validation schema
2170
2281
  }
2171
2282
  ],
2172
2283
  "output": "outputVariableName" // Optional
@@ -2180,6 +2291,15 @@ Constraints:
2180
2291
  - Break down complex tasks into logical steps.
2181
2292
  - Define clear inputs and outputs.
2182
2293
 
2294
+ Quality Guidelines:
2295
+ - Add "timeout" field (in milliseconds) for steps that might take long (file I/O, API calls, searches)
2296
+ - Use "expected_outcome" field to document what each step should produce
2297
+ - Use descriptive step IDs (e.g., "validateInput", "fetchUserData", not "step1", "step2")
2298
+ - Design steps to be focused - one responsibility per step
2299
+ - For steps that process multiple items, consider creating a sub-workflow
2300
+ - Add "outputSchema" with type information for validation-critical steps
2301
+ - Order steps logically with clear data flow
2302
+
2183
2303
  Example 1:
2184
2304
  User: "Research a topic and summarize it."
2185
2305
  Output:
@@ -2267,70 +2387,407 @@ Your task is to implement the TypeScript code for the steps in the provided work
2267
2387
  You will receive a JSON workflow definition where the "code" field is null.
2268
2388
  You must fill in the "code" field for each step with valid TypeScript code.
2269
2389
 
2270
- The code will be executed in an async function with the following signature:
2271
- async (ctx) => {
2272
- // Your code here
2273
- }
2390
+ CRITICAL: Each step "code" field must contain ONLY the function body statements (the code inside the curly braces).
2391
+ DO NOT include function declaration, arrow function syntax, async keyword, parameter list, or outer curly braces.
2274
2392
 
2275
- The \`ctx\` object provides access to:
2276
- - \`ctx.input\`: The workflow inputs.
2277
- - \`ctx.state\`: A shared state object for passing data between steps.
2278
- - \`ctx.tools\`: An object containing available tools.
2279
- - \`ctx.runWorkflow\`: (workflowId: string, input?: any) => Promise<any>. Use this to run other workflows.
2280
-
2281
- Guidelines:
2282
- - Use \`await\` for asynchronous operations.
2283
- - Return the output value of the step.
2284
- - Access inputs via \`ctx.input.inputName\`.
2285
- - Access previous step outputs via \`ctx.state.stepOutputName\`.
2286
- - Use \`ctx.tools.invokeTool({ toolName: 'name', input: { ... } })\` to call tools.
2287
- - Use \`ctx.tools.generateText({ messages: [...] })\` for LLM calls.
2288
- - Use \`ctx.tools.invokeTool({ toolName: 'runAgent', input: { prompt: '...' } })\` for complex sub-tasks that require multiple steps or tools. Prefer this over \`generateText\` for advanced tasks.
2289
-
2290
- Example Code for a step:
2291
- \`\`\`typescript
2292
- const searchResults = await ctx.tools.invokeTool({
2293
- toolName: 'search',
2294
- input: { query: ctx.input.topic }
2295
- });
2296
- return searchResults;
2297
- \`\`\`
2393
+ The code will be wrapped automatically in: \`async (ctx) => { YOUR_CODE_HERE }\`
2298
2394
 
2299
- Example Code for LLM step:
2300
- \`\`\`typescript
2301
- const summary = await ctx.tools.generateText({
2302
- messages: [
2303
- { role: 'system', content: 'Summarize the following text.' },
2304
- { role: 'user', content: ctx.state.searchResults }
2305
- ]
2306
- });
2307
- return summary;
2308
- \`\`\`
2395
+ Example of CORRECT code field:
2396
+ \`\`\`ts
2397
+ const result = await ctx.tools.readFile({ path: 'README.md' })
2398
+ if (!result) throw new Error('File not found')
2399
+ return result
2400
+ \`\`\`
2309
2401
 
2310
- Example Code for runAgent:
2311
- \`\`\`typescript
2312
- const result = await ctx.tools.invokeTool({
2313
- toolName: 'runAgent',
2314
- input: {
2315
- prompt: 'Research the history of the internet and write a summary.',
2316
- tools: ['search', 'generateText']
2317
- }
2318
- });
2319
- return result;
2320
- \`\`\`
2402
+ Example of INCORRECT code field (DO NOT DO THIS):
2403
+ \`\`\`ts
2404
+ async (ctx) => {
2405
+ const result = await ctx.tools.readFile({ path: 'README.md' })
2406
+ return result
2407
+ }
2408
+ \`\`\`
2321
2409
 
2322
- Example Code for invoking a sub-workflow:
2323
- \`\`\`typescript
2324
- const results = [];
2325
- for (const pr of ctx.state.prs) {
2326
- const review = await ctx.runWorkflow('reviewPR', { prId: pr.id });
2327
- results.push(review);
2328
- }
2329
- return results;
2330
- \`\`\`
2410
+ Example of INCORRECT code field (DO NOT DO THIS):
2411
+ \`\`\`ts
2412
+ (ctx) => {
2413
+ return 'hello'
2414
+ }
2415
+ \`\`\`
2331
2416
 
2332
- Return the complete workflow JSON with the "code" fields populated.
2333
- `;
2417
+ ## Runtime context (ctx)
2418
+ \`\`\`ts
2419
+ // Runtime types (for reference)
2420
+ type Logger = {
2421
+ debug: (...args: any[]) => void
2422
+ info: (...args: any[]) => void
2423
+ warn: (...args: any[]) => void
2424
+ error: (...args: any[]) => void
2425
+ }
2426
+
2427
+ type StepFn = {
2428
+ <T>(name: string, fn: () => Promise<T>): Promise<T>
2429
+ <T>(name: string, options: { retry?: number }, fn: () => Promise<T>): Promise<T>
2430
+ }
2431
+
2432
+ type JsonModelMessage = { role: 'system' | 'user' | 'assistant' | 'tool'; content: any }
2433
+ type JsonResponseMessage = { role: 'assistant' | 'tool'; content: any }
2434
+ type ToolSet = Record<string, any>
2435
+
2436
+ type ToolResponseResult =
2437
+ | { type: 'text'; value: string }
2438
+ | { type: 'json'; value: any }
2439
+ | { type: 'error-text'; value: string }
2440
+ | { type: 'error-json'; value: any }
2441
+ | { type: 'content'; value: any[] }
2442
+
2443
+ type AgentToolResponse =
2444
+ | { type: 'Reply'; message: ToolResponseResult }
2445
+ | { type: 'Exit'; message: string; object?: any }
2446
+ | { type: 'Error'; message: ToolResponseResult }
2447
+
2448
+ type ExitReason =
2449
+ | { type: 'UsageExceeded' }
2450
+ | { type: 'Exit'; message: string; object?: any }
2451
+ | { type: 'Error'; error: { message: string; stack?: string } }
2452
+
2453
+ type FullAgentToolInfo = { name: string; description: string; parameters: any; handler: any }
2454
+
2455
+ // Tools available on ctx.tools in dynamic steps
2456
+ type DynamicWorkflowTools = {
2457
+ // LLM + agent helpers
2458
+ generateText: (input: { messages: JsonModelMessage[]; tools: ToolSet }) => Promise<JsonResponseMessage[]>
2459
+ runAgent: (input: {
2460
+ tools: Readonly<FullAgentToolInfo[]>
2461
+ maxToolRoundTrips?: number
2462
+ userMessage: readonly JsonModelMessage[]
2463
+ } & ({ messages: JsonModelMessage[] } | { systemPrompt: string })) => Promise<ExitReason>
2464
+
2465
+ // Generic bridge to "agent tools" by name
2466
+ invokeTool: (input: { toolName: string; input: any }) => Promise<AgentToolResponse>
2467
+
2468
+ // File + command helpers (direct)
2469
+ readFile: (input: { path: string }) => Promise<string | null>
2470
+ writeToFile: (input: { path: string; content: string }) => Promise<void>
2471
+ executeCommand: (input: { command: string; pipe?: boolean } & ({ args: string[]; shell?: false } | { shell: true })) => Promise<{
2472
+ exitCode: number
2473
+ stdout: string
2474
+ stderr: string
2475
+ }>
2476
+
2477
+ // CLI UX helpers
2478
+ confirm: (input: { message: string }) => Promise<boolean>
2479
+ input: (input: { message: string; default?: string }) => Promise<string>
2480
+ select: (input: { message: string; choices: { name: string; value: string }[] }) => Promise<string>
2481
+ }
2482
+
2483
+ type DynamicStepRuntimeContext = {
2484
+ workflowId: string
2485
+ stepId: string
2486
+ input: Record<string, any>
2487
+ state: Record<string, any>
2488
+ tools: DynamicWorkflowTools
2489
+ logger: Logger
2490
+ step: StepFn
2491
+ runWorkflow: (workflowId: string, input?: Record<string, any>) => Promise<any>
2492
+ toolInfo?: ReadonlyArray<FullAgentToolInfo>
2493
+ }
2494
+ \`\`\`
2495
+
2496
+ - \`ctx.input\`: workflow inputs (read-only).
2497
+ - \`ctx.state\`: shared state between steps (previous step outputs are stored here).
2498
+ - \`ctx.tools\`: async tool functions. Call tools as \`await ctx.tools.someTool({ ... })\`.
2499
+ - \`ctx.runWorkflow\`: run a sub-workflow by id.
2500
+
2501
+ ## Guidelines
2502
+ - Use \`await\` for all async operations.
2503
+ - Return the output value for the step (this becomes the step output).
2504
+ - Access inputs via \`ctx.input.<inputId>\`.
2505
+ - Access previous step outputs via \`ctx.state.<stepOutputKey>\` (defaults to the step \`output\` or \`id\`).
2506
+
2507
+ ## Quality Guidelines for Code Implementation
2508
+
2509
+ ### Error Handling
2510
+ - ALWAYS validate inputs at the start of steps
2511
+ - Use try-catch for operations that might fail (file I/O, parsing, API calls)
2512
+ - Preserve stack traces: re-throw original errors rather than creating new ones
2513
+ - Use error type guards: \`const err = error instanceof Error ? error : new Error(String(error))\`
2514
+ - Check for null/undefined before using values
2515
+ - Handle edge cases (empty arrays, missing files, invalid data)
2516
+
2517
+ ### Logging
2518
+ - Use \`ctx.logger.info()\` for important progress updates
2519
+ - Use \`ctx.logger.debug()\` for detailed information
2520
+ - Use \`ctx.logger.warn()\` for recoverable issues
2521
+ - Use \`ctx.logger.error()\` before throwing errors
2522
+ - Log when starting and completing significant operations
2523
+ - Use template literals for readability: \`ctx.logger.info(\\\`Processing \${items.length} items...\\\`)\`
2524
+
2525
+ ### User Experience
2526
+ - Provide progress feedback for long operations
2527
+ - Return structured data (objects/arrays), not strings when possible
2528
+ - Include helpful metadata in results (counts, timestamps, status)
2529
+ - For batch operations, report progress: \`Processed 5/10 items\`
2530
+
2531
+ ### Data Validation
2532
+ - Validate required fields exist before accessing
2533
+ - Check data types match expectations
2534
+ - Validate array lengths before iteration
2535
+ - Example: \`if (!data?.users || !Array.isArray(data.users)) throw new Error('Invalid data format')\`
2536
+
2537
+ ### Best Practices
2538
+ - Use meaningful variable names
2539
+ - Avoid nested callbacks - use async/await
2540
+ - Clean up resources (close files, clear timeouts)
2541
+ - Return consistent data structures across similar steps
2542
+ - For iteration, consider batching or rate limiting
2543
+
2544
+ ### When to Simplify
2545
+ - Simple transformation steps (e.g., formatting strings) need only basic error handling
2546
+ - Internal sub-workflow steps with validated inputs from parent can skip redundant validation
2547
+ - Minimal logging is fine for fast steps (<100ms) that don't perform I/O or external calls
2548
+ - Use judgment: match error handling complexity to the step's failure risk and impact
2549
+
2550
+ ## Tool calling examples (every tool)
2551
+
2552
+ ### Direct ctx.tools methods
2553
+ \`\`\`ts
2554
+ // readFile
2555
+ const readme = await ctx.tools.readFile({ path: 'README.md' })
2556
+ if (readme == null) throw new Error('README.md not found')
2557
+
2558
+ // writeToFile
2559
+ await ctx.tools.writeToFile({ path: 'notes.txt', content: 'hello\\n' })
2560
+
2561
+ // executeCommand (args form)
2562
+ const rg = await ctx.tools.executeCommand({ command: 'rg', args: ['-n', 'TODO', '.'] })
2563
+ if (rg.exitCode !== 0) throw new Error(rg.stderr)
2564
+
2565
+ // executeCommand (shell form)
2566
+ await ctx.tools.executeCommand({ command: 'ls -la', shell: true, pipe: true })
2567
+
2568
+ // generateText (LLM call; pass tools: {})
2569
+ const msgs = await ctx.tools.generateText({
2570
+ messages: [
2571
+ { role: 'system', content: 'Summarize the following text.' },
2572
+ { role: 'user', content: readme },
2573
+ ],
2574
+ tools: {},
2575
+ })
2576
+ const last = msgs[msgs.length - 1]
2577
+ const lastText = typeof last?.content === 'string' ? last.content : JSON.stringify(last?.content)
2578
+
2579
+ // runAgent (nested agent; use ctx.toolInfo as the tool list)
2580
+ const agentRes = await ctx.tools.runAgent({
2581
+ systemPrompt: 'You are a helpful assistant.',
2582
+ userMessage: [{ role: 'user', content: 'Summarize README.md in 3 bullets.' }],
2583
+ tools: (ctx.toolInfo ?? []) as any,
2584
+ })
2585
+ if (agentRes.type !== 'Exit') throw new Error('runAgent failed')
2586
+
2587
+ // confirm / input / select (interactive)
2588
+ const ok = await ctx.tools.confirm({ message: 'Proceed?' })
2589
+ const name = await ctx.tools.input({ message: 'Name?', default: 'main' })
2590
+ const flavor = await ctx.tools.select({
2591
+ message: 'Pick one',
2592
+ choices: [
2593
+ { name: 'A', value: 'a' },
2594
+ { name: 'B', value: 'b' },
2595
+ ],
2596
+ })
2597
+
2598
+ \`\`\`
2599
+
2600
+ ### Agent tools via ctx.tools.invokeTool (toolName examples)
2601
+ \`\`\`ts
2602
+ // Helper to unwrap a successful tool reply
2603
+ function unwrapToolValue(resp: any) {
2604
+ if (!resp || resp.type !== 'Reply') {
2605
+ const msg = resp?.message?.value
2606
+ throw new Error(typeof msg === 'string' ? msg : JSON.stringify(resp))
2607
+ }
2608
+ return resp.message.value
2609
+ }
2610
+
2611
+ // askFollowupQuestion
2612
+ const answersText = unwrapToolValue(
2613
+ await ctx.tools.invokeTool({
2614
+ toolName: 'askFollowupQuestion',
2615
+ input: { questions: [{ prompt: 'Which directory?', options: ['src', 'packages'] }] },
2616
+ }),
2617
+ )
2618
+
2619
+ // listFiles
2620
+ const filesText = unwrapToolValue(
2621
+ await ctx.tools.invokeTool({
2622
+ toolName: 'listFiles',
2623
+ input: { path: 'src', recursive: true, maxCount: 2000, includeIgnored: false },
2624
+ }),
2625
+ )
2626
+
2627
+ // searchFiles
2628
+ const hitsText = unwrapToolValue(
2629
+ await ctx.tools.invokeTool({
2630
+ toolName: 'searchFiles',
2631
+ input: { path: '.', regex: 'generateWorkflowCodeWorkflow', filePattern: '*.ts' },
2632
+ }),
2633
+ )
2634
+
2635
+ // fetchUrl
2636
+ const pageText = unwrapToolValue(await ctx.tools.invokeTool({ toolName: 'fetchUrl', input: { url: 'https://example.com' } }))
2637
+
2638
+ // search (web search)
2639
+ const webResults = unwrapToolValue(
2640
+ await ctx.tools.invokeTool({ toolName: 'search', input: { query: 'TypeScript zod schema examples' } }),
2641
+ )
2642
+
2643
+ // executeCommand (provider-backed; may require approval in some environments)
2644
+ const cmdText = unwrapToolValue(
2645
+ await ctx.tools.invokeTool({ toolName: 'executeCommand', input: { command: 'bun test', requiresApproval: false } }),
2646
+ )
2647
+
2648
+ // readFile / writeToFile (provider-backed)
2649
+ const fileText = unwrapToolValue(
2650
+ await ctx.tools.invokeTool({ toolName: 'readFile', input: { path: 'README.md', includeIgnored: false } }),
2651
+ )
2652
+ const writeText = unwrapToolValue(await ctx.tools.invokeTool({ toolName: 'writeToFile', input: { path: 'out.txt', content: 'hi' } }))
2653
+
2654
+ // replaceInFile
2655
+ const diff = ['<<<<<<< SEARCH', 'old', '=======', 'new', '>>>>>>> REPLACE'].join('\\n')
2656
+ const replaceText = unwrapToolValue(await ctx.tools.invokeTool({ toolName: 'replaceInFile', input: { path: 'out.txt', diff } }))
2657
+
2658
+ // removeFile / renameFile
2659
+ const rmText = unwrapToolValue(await ctx.tools.invokeTool({ toolName: 'removeFile', input: { path: 'out.txt' } }))
2660
+ const mvText = unwrapToolValue(
2661
+ await ctx.tools.invokeTool({ toolName: 'renameFile', input: { source_path: 'a.txt', target_path: 'b.txt' } }),
2662
+ )
2663
+
2664
+ // readBinaryFile (returns { type: 'content', value: [...] } in resp.message)
2665
+ const binResp = await ctx.tools.invokeTool({ toolName: 'readBinaryFile', input: { url: 'file://path/to/image.png' } })
2666
+ \`\`\`
2667
+
2668
+ ### Sub-workflow example (ctx.runWorkflow)
2669
+ \`\`\`ts
2670
+ const results: any[] = []
2671
+ for (const pr of ctx.state.prs ?? []) {
2672
+ results.push(await ctx.runWorkflow('reviewPR', { prId: pr.id }))
2673
+ }
2674
+ return results
2675
+ \`\`\`
2676
+
2677
+ ## Complete Example: High-Quality Step Implementation
2678
+
2679
+ This example demonstrates all quality guidelines in a single step:
2680
+
2681
+ \`\`\`ts
2682
+ // Step: processUserData
2683
+ // Task: Read, validate, and process user data from a file
2684
+
2685
+ // Input validation
2686
+ if (!ctx.input.dataFile) {
2687
+ throw new Error('Missing required input: dataFile')
2688
+ }
2689
+
2690
+ ctx.logger.info(\`Starting user data processing for: \${ctx.input.dataFile}\`)
2691
+
2692
+ // Read file with error handling
2693
+ let rawData
2694
+ try {
2695
+ ctx.logger.debug(\`Reading file: \${ctx.input.dataFile}\`)
2696
+ rawData = await ctx.tools.readFile({ path: ctx.input.dataFile })
2697
+
2698
+ if (!rawData) {
2699
+ throw new Error(\`File not found or empty: \${ctx.input.dataFile}\`)
2700
+ }
2701
+ } catch (error) {
2702
+ const err = error instanceof Error ? error : new Error(String(error))
2703
+ ctx.logger.error(\`Failed to read file: \${err.message}\`)
2704
+ throw err // Preserve original stack trace
2705
+ }
2706
+
2707
+ // Parse and validate data
2708
+ let users
2709
+ try {
2710
+ ctx.logger.debug('Parsing JSON data')
2711
+ const parsed = JSON.parse(rawData)
2712
+
2713
+ if (!parsed?.users || !Array.isArray(parsed.users)) {
2714
+ throw new Error('Invalid data format: expected {users: [...]}')
2715
+ }
2716
+
2717
+ users = parsed.users
2718
+ ctx.logger.info(\`Found \${users.length} users to process\`)
2719
+ } catch (error) {
2720
+ const err = error instanceof Error ? error : new Error(String(error))
2721
+ ctx.logger.error(\`Data parsing failed: \${err.message}\`)
2722
+ throw err // Preserve original stack trace
2723
+ }
2724
+
2725
+ // Process each user with progress reporting
2726
+ const results = []
2727
+ for (let i = 0; i < users.length; i++) {
2728
+ const user = users[i]
2729
+
2730
+ // Validate each user object
2731
+ if (!user?.id || !user?.email) {
2732
+ ctx.logger.warn(\`Skipping invalid user at index \${i}: missing id or email\`)
2733
+ continue
2734
+ }
2735
+
2736
+ // Process user
2737
+ const processed = {
2738
+ id: user.id,
2739
+ email: user.email.toLowerCase().trim(),
2740
+ name: user.name?.trim() || 'Unknown',
2741
+ processedAt: new Date().toISOString(),
2742
+ status: 'active'
2743
+ }
2744
+
2745
+ results.push(processed)
2746
+
2747
+ // Progress feedback every 10 items
2748
+ if ((i + 1) % 10 === 0) {
2749
+ ctx.logger.info(\`Processed \${i + 1}/\${users.length} users\`)
2750
+ }
2751
+ }
2752
+
2753
+ ctx.logger.info(\`Successfully processed \${results.length}/\${users.length} users\`)
2754
+
2755
+ // Return structured result with metadata
2756
+ return {
2757
+ users: results,
2758
+ metadata: {
2759
+ totalInput: users.length,
2760
+ totalProcessed: results.length,
2761
+ skipped: users.length - results.length,
2762
+ processedAt: new Date().toISOString()
2763
+ }
2764
+ }
2765
+ \`\`\`
2766
+
2767
+ Key features demonstrated:
2768
+ - Input validation at start
2769
+ - Comprehensive error handling with try-catch that preserves stack traces
2770
+ - Logging at info, debug, warn, and error levels
2771
+ - Progress reporting for long operations (every 10 items)
2772
+ - Data validation throughout (null checks, type checks, array validation)
2773
+ - Structured return value with metadata for observability
2774
+ - Descriptive error messages with context
2775
+ - Meaningful variable names (rawData, users, processed)
2776
+ - Clean async/await usage
2777
+ - Template literals for readable string interpolation
2778
+ - Proper error type guards (error instanceof Error)
2779
+
2780
+ ## Final Instructions
2781
+
2782
+ REMEMBER: The "code" field must be ONLY the function body statements.
2783
+ - DO NOT wrap code in arrow functions: \`(ctx) => { ... }\`
2784
+ - DO NOT wrap code in async functions: \`async (ctx) => { ... }\`
2785
+ - DO NOT include outer curly braces
2786
+ - DO include a return statement if the step should produce output
2787
+ - Each "code" field should be a string containing multiple statements separated by newlines
2788
+
2789
+ Return the complete workflow JSON with the "code" fields populated.
2790
+ `;
2334
2791
  var generateWorkflowDefinitionWorkflow = async (input, ctx) => {
2335
2792
  let systemPrompt = WORKFLOW_DEFINITION_SYSTEM_PROMPT;
2336
2793
  if (input.availableTools && input.availableTools.length > 0) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polka-codes/core",
3
- "version": "0.9.79",
3
+ "version": "0.9.80",
4
4
  "license": "AGPL-3.0",
5
5
  "author": "github@polka.codes",
6
6
  "type": "module",