npm - @output.ai/cli - Versions diffs - 0.7.6 → 0.7.8-dev.pr263-a59dd0e - Mend

@output.ai/cli 0.7.6 → 0.7.8-dev.pr263-a59dd0e

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/api/http_client.d.ts CHANGED Viewed

@@ -2,6 +2,19 @@
  * Custom ky-based HTTP client for Orval-generated API
  */
 import type { Options as KyOptions } from 'ky';
+/**
+ * Custom error class for HTTP errors with response details
+ */
+export declare class HttpError extends Error {
+    response: {
+        status: number;
+        data?: unknown;
+    };
+    constructor(message: string, response: {
+        status: number;
+        data?: unknown;
+    });
+}
 /**
  * Custom API request options that extend RequestInit with additional config
  */

package/dist/api/http_client.js CHANGED Viewed

@@ -3,6 +3,17 @@
  */
 import ky from 'ky';
 import { config } from '#config.js';
+/**
+ * Custom error class for HTTP errors with response details
+ */
+export class HttpError extends Error {
+    response;
+    constructor(message, response) {
+        super(message);
+        this.response = response;
+        this.name = 'HttpError';
+    }
+}
 const api = ky.create({
     prefixUrl: config.apiUrl,
     timeout: config.requestTimeout,
@@ -46,5 +57,14 @@ const wrapResponse = (response, data) => ({
 export const customFetchInstance = async (url, options) => {
     const response = await api(stripLeadingSlash(url), buildKyOptions(options));
     const data = await response.json().catch(() => undefined);
+    // Throw for non-2xx responses so catch handlers can process errors
+    if (!response.ok) {
+        const errorData = data;
+        const message = errorData?.message || `HTTP ${response.status} error`;
+        throw new HttpError(message, {
+            status: response.status,
+            data: errorData
+        });
+    }
     return wrapResponse(response, data);
 };

package/dist/assets/docker/docker-compose-dev.yml CHANGED Viewed

@@ -98,7 +98,7 @@ services:
     depends_on:
       temporal:
         condition: service_healthy
-    image: node:24.3-slim
+    image: node:24.13.0-slim
     networks:
       - main
     env_file: ./.env

package/dist/commands/workflow/run.js CHANGED Viewed

@@ -65,7 +65,8 @@ export default class WorkflowRun extends Command {
     }
     async catch(error) {
         return handleApiError(error, (...args) => this.error(...args), {
-            404: 'Workflow not found. Check the workflow name.'
+            404: 'Workflow not found. Check the workflow name.',
+            500: 'Workflow execution failed.'
         });
     }
 }

package/dist/services/messages.js CHANGED Viewed

@@ -177,7 +177,7 @@ export const getProjectSuccessMessage = (folderName, installSuccess, envConfigur
         note: 'Launches Temporal, Redis, PostgreSQL, API, Worker, and UI'
     }, {
         step: 'Run example workflow',
-        command: 'npx output workflow run example_question --input src/workflows/example_question/scenarios/question_ada_lovelace.json',
+        command: 'npx output workflow run poke_battle --input src/workflows/poke_battle/scenarios/pikachu_vs_charmander.json',
         note: 'Execute in a new terminal after services are running'
     }, {
         step: 'Monitor workflows',

package/dist/templates/agent_instructions/dotoutputai/AGENTS.md.template CHANGED Viewed

@@ -2,7 +2,7 @@
 ## Overview
-This project uses Output Framework to build durable, LLM-powered workflows orchestrated by Temporal. Output Framework provides abstractions for creating reliable AI workflows with automatic retry, tracing, and error handling. Developers use it to build workflows like fact checkers, content generators, data extractors, research assistants, and multi-step AI agents.
+This project uses Output.ai Framework to build durable, LLM-powered workflows orchestrated by Temporal. Output Framework provides abstractions for creating reliable AI workflows with automatic retry, tracing, and error handling. Developers use it to build workflows like fact checkers, content generators, data extractors, research assistants, and multi-step AI agents mixing API clients.
 ### Project Overview
@@ -26,17 +26,127 @@ Each workflow is self-contained in a single folder with a predictable structure:
 - **Steps**: All external operations (APIs, DBs, LLMs) must be wrapped in steps
 - **Schemas**: Use Zod (`z`) from `@output.ai/core` to define input/output schemas
+## Code Reuse Rules
+**IMPORTANT: Workflows are isolated by design.** Do not reuse code directly between sibling workflows.
+### Allowed Code Sharing Locations:
+- `src/clients/` - HTTP API clients shared across ALL workflows
+- `src/workflows/{name}/shared/` - Shared utilities within a workflow group
+- `src/workflows/{name}/lib/` - Internal libraries for a workflow group
+### Forbidden:
+- Importing from sibling workflow folders (e.g., `../other_workflow/steps.js`)
+- Copying step implementations between workflows
+- Referencing types from other workflow folders
+Each workflow must remain independently deployable and testable. Cross-workflow imports create hidden dependencies that break this isolation.
 ## Project Structure
 ```
-src/workflows/{name}/
-  workflow.ts          # Orchestration logic (deterministic)
-  steps.ts             # I/O operations (APIs, LLM, DB)
-  evaluators.ts        # Analysis steps returning EvaluationResult
-  prompts/*.prompt             # LLM prompts (name@v1.prompt)
-  scenarios/*.json             # Test scenarios
+src/
+  clients/             # Shared HTTP API clients (one file per external service)
+    jina.ts            # Example: Jina Reader API client
+    stripe.ts          # Example: Stripe API client
+  workflows/{name}/
+    types.ts           # Zod schemas and TypeScript types (should hold ALL types for the workflow)
+    workflow.ts        # Orchestration logic (deterministic)
+    steps.ts           # I/O operations (APIs, LLM, DB)
+    evaluators.ts      # LLM-as-a-judge analysis steps returning EvaluationResult
+    prompts/*.prompt   # LLM prompts (name@v1.prompt)
+    scenarios/*.json   # Test scenarios
+```
+## API Clients Pattern
+External API integrations should be placed in `src/clients/` as reusable modules. Each client wraps `@output.ai/http` for automatic tracing and retries.
+```typescript
+// src/clients/jina.ts
+import { httpClient } from '@output.ai/http';
+const client = httpClient({
+  prefixUrl: 'https://r.jina.ai',
+  timeout: 30000
+});
+export const jinaClient = {
+  read: async (url: string): Promise<string> => {
+    const response = await client.get(url);
+    return response.text();
+  }
+};
 ```
+**Usage in steps:**
+```typescript
+// src/workflows/my_workflow/steps.ts
+import { jinaClient } from '../../clients/jina.js';
+export const scrapeUrl = step({
+  name: 'scrapeUrl',
+  // ...
+  fn: async (url) => jinaClient.read(url)
+});
+```
+**Key requirements:**
+- One file per external service in `src/clients/`
+- Always use `@output.ai/http` (never axios or fetch directly)
+- Export a named client object with typed methods
+- Import clients in steps using relative paths (`../../clients/`)
+## Types Pattern
+Every workflow MUST have a `types.ts` file containing all Zod schemas and TypeScript types. This ensures type safety and schema reusability across workflow components.
+```typescript
+import { z } from '@output.ai/core';
+// Workflow input/output schemas
+export const WorkflowInputSchema = z.object({
+  query: z.string().describe('The search query'),
+  maxResults: z.number().optional().default(10)
+});
+export const WorkflowOutputSchema = z.object({
+  results: z.array(z.object({
+    title: z.string(),
+    content: z.string()
+  })),
+  totalCount: z.number()
+});
+// Step schemas
+export const FetchDataInputSchema = z.object({
+  url: z.string().url()
+});
+export const FetchDataOutputSchema = z.object({
+  data: z.unknown(),
+  status: z.number()
+});
+// Inferred TypeScript types
+export type WorkflowInput = z.infer<typeof WorkflowInputSchema>;
+export type WorkflowOutput = z.infer<typeof WorkflowOutputSchema>;
+export type FetchDataInput = z.infer<typeof FetchDataInputSchema>;
+export type FetchDataOutput = z.infer<typeof FetchDataOutputSchema>;
+// Shared interfaces (non-Zod types used within the workflow)
+export interface ApiResponse {
+  code: number;
+  data: unknown;
+}
+```
+**Key requirements:**
+- All Zod schemas used in `workflow.ts` and `steps.ts` MUST be defined in `types.ts`
+- Export both schemas (for runtime validation) and inferred types (for TypeScript)
+- Use `.describe()` on schema fields for documentation
+- Keep API response interfaces separate from Zod schemas
 ## Commands
 ```bash
@@ -53,6 +163,8 @@ npx output workflow result <workflowId>             # Get result when complete
 npx output workflow stop <workflowId>               # Cancel running workflow
 ```
+**When running workflows for users**: After execution completes, try to format the result nicely for readability. Use markdown formatting, tables for structured data, and highlight key values. Don't just dump raw JSON.
 ## Workflow Pattern
 Workflows orchestrate steps. They must be deterministic (no direct I/O).
@@ -74,7 +186,7 @@ export default workflow({
 });
 ```
-**Allowed imports**: steps.ts, evaluators.ts, shared_steps.ts, types.ts, consts.ts, utils.ts
+**Allowed imports**: steps.ts, evaluators.ts, ../../shared/steps/*.ts, types.ts, consts.ts, utils.ts
 **Forbidden in workflows**: Direct API calls, Math.random(), Date.now(), dynamic imports
@@ -116,7 +228,7 @@ temperature: 0.7
 maxTokens: 2000
 ---
 <system>You are a helpful assistant.</system>
-<user>Summarize: \{{ content }}</user>
+<user>Summarize: {{ content }}</user>
 ```
 **Step using prompt**:
@@ -129,10 +241,11 @@ export const summarize = step({
   inputSchema: z.object({ content: z.string() }),
   outputSchema: z.string(),
   fn: async ({ content }) => {
-    return generateText({
+    const { result } = await generateText({
       prompt: 'summarize@v1',
       variables: { content }
     });
+    return result;
   }
 });
@@ -142,11 +255,12 @@ export const extractInfo = step({
   inputSchema: z.object({ text: z.string() }),
   outputSchema: z.object({ title: z.string(), summary: z.string() }),
   fn: async ({ text }) => {
-    return generateObject({
+    const { result } = await generateObject({
       prompt: 'extract@v1',
       variables: { text },
       schema: z.object({ title: z.string(), summary: z.string() })
     });
+    return result;
   }
 });
 ```
@@ -175,7 +289,7 @@ const result = await client.post('endpoint', { json: payload }).json();
 ## Evaluator Pattern
-Evaluators analyze data and return confidence-scored results.
+Evaluators are LLM-as-a-judge for analyzing data and return confidence-scored results. They are highly recommended for anything that is high-value involving LLMs and can benefit from self-improvement loops or scoring for logging the results on tracing.
 ```typescript
 import { evaluator, EvaluationStringResult } from '@output.ai/core';
@@ -205,23 +319,116 @@ throw new FatalError('Critical failure - do not retry');
 throw new ValidationError('Invalid input format');
 ```
-## Claude Code Sub-Agents
+## Creating New Workflows
+**IMPORTANT**: When creating a new workflow, you MUST use the following agents and commands in order. Do not skip steps.
+### Mandatory Workflow Creation Process
+1. **Plan** → `/outputai:plan_workflow` or `workflow-planner` agent
+   - Defines workflow architecture, steps, and data flow
+   - Identifies required external APIs and LLM operations
+   - MUST be run first before any implementation
+2. **Build** → `/outputai:build_workflow`
+   - Creates the workflow folder structure
+   - Generates `types.ts`, `workflow.ts`, `steps.ts`
+   - Sets up test scenarios
+3. **Prompts** → `workflow-prompt-writer` agent
+   - Creates `.prompt` files for all LLM operations
+   - Reviews and optimizes prompt templates
+   - Ensures proper Liquid.js templating
+4. **Quality** → `workflow-quality` agent
+   - Validates implementation against best practices
+   - Checks for proper error handling and retries
+   - Ensures schema consistency across components
+### Available Sub-Agents
+| Agent | Purpose |
+|-------|---------|
+| `workflow-planner` | Architecture design, step breakdown, data flow planning |
+| `workflow-prompt-writer` | Create and review `.prompt` files with proper templates |
+| `workflow-quality` | Validate best practices, error handling, schema consistency |
+| `workflow-context-fetcher` | Retrieve project context (used by other agents) |
+| `workflow-debugger` | Debug workflow issues, analyze execution traces |
+### Available Commands
+| Command | When to Use |
+|---------|-------------|
+| `/outputai:plan_workflow` | **ALWAYS FIRST** - Plan new workflow architecture |
+| `/outputai:build_workflow` | Implement the planned workflow |
+| `/outputai:debug_workflow` | Debug failing or misbehaving workflows |
+## Working with This Codebase
+**CRITICAL: Trust the documentation.** When creating or modifying workflows:
+1. **Do NOT scan the entire codebase** - The patterns and examples in this document are authoritative
+2. **Follow the documented patterns** - Use the step, workflow, and evaluator patterns exactly as shown
+3. **Use the sub-agents** - They have the context needed to create correct implementations
+### What to Read:
+- This current file (you're reading it)
+- The specific workflow folder you're modifying
+- `src/clients/` for available HTTP clients
+### What NOT to Do:
+- Grep through all workflows looking for "examples"
+- Read multiple workflow implementations to "understand patterns"
+- Second-guess the documented patterns based on existing code variations
+## Common Issues
+### Restarting Worker After Adding Workflows
+After creating a new workflow, you likely need restart the worker container for changes to take effect.
+```bash
+# Check running containers
+docker ps --filter "name=output" --format "{{.Names}}: {{.Status}}"
+# Restart the worker (adjust container name based on your project)
+docker restart <project-name>-worker-1
+# Wait for worker to restart, then run the workflow
+sleep 5 && npx output workflow run <workflow_name> --input '<json>'
+```
+### Payload Size Limits
+Plan for size limits when designing workflows.**
+- **Temporal limit**: ~2MB per workflow input/output payload
+- **gRPC limit**: ~4MB per message
+When planning workflows that process large data (documents, images, API responses):
+- Chunk large arrays and process in batches
+- Summarize or extract only needed fields from large API responses
+- When dealing with files prompt the user about cloud storage (S3, etc)
-For workflow planning and implementation:
+### Docker-Based Development
-- workflow-planner: Workflow architecture specialist
-- workflow-quality: Workflow quality and best practices specialist
-- workflow-prompt-writer: Prompt file creation and review specialist
-- workflow-context-fetcher: Efficient context retrieval (used by other agents)
-- workflow-debugger: Workflow debugging specialist
+`npx output dev` runs services in Docker containers. For debugging:
-## Claude Code Commands
+```bash
+# View worker logs
+docker logs -f output-worker-1
+# View API logs
+docker logs -f output-api-1
-For workflow planning and implementation:
+# View Temporal logs
+docker logs -f output-temporal-1
+# Shell into a container
+docker exec -it output-worker-1 sh
+```
-- /outputai:plan_workflow: Workflow Planning command
-- /outputai:build_workflow: Workflow Implementation command
-- /outputai:debug_workflow: Workflow Debugging command
+Logs do not appear in the terminal directly - check container logs when debugging workflow issues.
 ## Configuration

package/dist/templates/project/README.md.template CHANGED Viewed

@@ -7,6 +7,43 @@
 - Node.js >= 24.3
 - Docker and Docker Compose (for local development)
+## Project Structure
+```
+src/
+├── shared/                    # Shared code across workflows
+│   ├── clients/               # API clients (e.g., pokeapi.ts)
+│   └── utils/                 # Utility functions (e.g., string.ts)
+└── workflows/                 # Workflow definitions
+    └── poke_battle/           # Example workflow
+        ├── workflow.ts        # Main workflow
+        ├── steps.ts           # Workflow steps
+        ├── evaluators.ts      # Quality evaluators
+        ├── utils.ts           # Local utilities
+        ├── prompts/           # LLM prompts
+        └── scenarios/         # Test scenarios
+```
+### Shared Directory
+The `src/shared/` directory contains code shared across multiple workflows:
+- **`shared/clients/`** - API clients using `@output.ai/http` for external services
+- **`shared/utils/`** - Helper functions and utilities
+### Import Rules
+**Workflows** can import from:
+- Local steps, evaluators, and utilities
+- Shared steps, evaluators, clients, and utilities
+**Steps and Evaluators** can import from:
+- Local utilities and clients
+- Shared utilities and clients
+**Steps and Evaluators cannot** import from:
+- Other steps or evaluators (Temporal activity isolation)
 ## Getting Started
 ### 1. Install Dependencies
@@ -44,7 +81,7 @@ This starts:
 In a new terminal:
 ```bash
-npx output workflow run example_question --input '{"question": "who really is ada lovelace?"}'
+npx output workflow run poke_battle --input src/workflows/poke_battle/scenarios/pikachu_vs_charmander.json
 ```
 ### 5. Stop Services

package/dist/templates/project/src/shared/clients/pokeapi.ts.template ADDED Viewed

@@ -0,0 +1,29 @@
+import { httpClient, HttpClientOptions } from '@output.ai/http';
+export interface Pokemon {
+  name: string;
+  types: string[];
+  abilities: string[];
+  stats: { name: string; value: number }[];
+}
+interface PokeApiPokemon {
+  name: string;
+  types: object[];
+  abilities: object[];
+  stats: object[];
+}
+const basePokeApiClient = httpClient( {
+  prefixUrl: 'https://pokeapi.co/api/v2'
+} );
+const pokemonClient = basePokeApiClient.extend( options => ( {
+  prefixUrl: `${options.prefixUrl}/pokemon`,
+  timeout: 3000
+} ) as HttpClientOptions );
+export async function getPokemon( name: string ): Promise<Pokemon> {
+  const response = await pokemonClient.get<PokeApiPokemon>( name );
+  return response.json() as Promise<Pokemon>;
+}

package/dist/templates/project/src/shared/utils/string.ts.template ADDED Viewed

@@ -0,0 +1,3 @@
+export function lowercase( str: string ): string {
+  return str.toLowerCase();
+}

package/dist/templates/project/src/workflows/poke_battle/evaluators.ts.template ADDED Viewed

@@ -0,0 +1,44 @@
+import { evaluator, z, EvaluationNumberResult } from '@output.ai/core';
+import { generateObject } from '@output.ai/llm';
+import type { Pokemon } from '../../shared/clients/pokeapi.js';
+export const evaluateBattleRealism = evaluator( {
+  name: 'evaluate_battle_realism',
+  description: 'Evaluate how realistic the Pokemon battle screenplay is',
+  inputSchema: z.object( {
+    screenplay: z.string(),
+    pokemon1: z.object( {
+      name: z.string(),
+      types: z.array( z.string() ),
+      abilities: z.array( z.string() ),
+      stats: z.array( z.object( { name: z.string(), value: z.number() } ) )
+    } ),
+    pokemon2: z.object( {
+      name: z.string(),
+      types: z.array( z.string() ),
+      abilities: z.array( z.string() ),
+      stats: z.array( z.object( { name: z.string(), value: z.number() } ) )
+    } )
+  } ),
+  fn: async ( input: { screenplay: string; pokemon1: Pokemon; pokemon2: Pokemon } ) => {
+    const { result } = await generateObject( {
+      prompt: 'evaluate_realism@v1',
+      variables: {
+        pokemon1Name: input.pokemon1.name,
+        pokemon1Types: input.pokemon1.types.join( '/' ),
+        pokemon1Stats: input.pokemon1.stats.map( s => `${s.name}: ${s.value}` ).join( ', ' ),
+        pokemon2Name: input.pokemon2.name,
+        pokemon2Types: input.pokemon2.types.join( '/' ),
+        pokemon2Stats: input.pokemon2.stats.map( s => `${s.name}: ${s.value}` ).join( ', ' ),
+        screenplay: input.screenplay
+      },
+      schema: z.object( {
+        score: z.number().min( 0 ).max( 100 ).describe( 'Realism score 0-100' )
+      } )
+    } );
+    return new EvaluationNumberResult( {
+      value: result.score,
+      confidence: 0.9
+    } );
+  }
+} );

package/dist/templates/project/src/workflows/poke_battle/prompts/evaluate_realism@v1.prompt.template ADDED Viewed

@@ -0,0 +1,24 @@
+---
+provider: anthropic
+model: claude-haiku-4-5
+temperature: 0.7
+maxTokens: 8192
+---
+Evaluate this Pokemon battle screenplay for realism.
+Consider:
+- Are the moves/abilities used consistent with each Pokemon's actual abilities?
+- Is the battle outcome realistic given their types and stats?
+- Does the pacing feel like a real battle?
+\{{ pokemon1Name }}'s Stat Sheet:
+\{{ pokemon1Description }}
+\{{ pokemon2Name }}'s Stat Sheet:
+\{{ pokemon2Description }}
+Screenplay:
+\{{ screenplay }}

package/dist/templates/project/src/workflows/poke_battle/prompts/generate_screenplay@v1.prompt.template ADDED Viewed

@@ -0,0 +1,18 @@
+---
+provider: anthropic
+model: claude-haiku-4-5
+temperature: 0.7
+maxTokens: 8192
+---
+Write a dramatic 5-minute screenplay of a Pokemon battle between \{{ pokemon1Name }} and \{{ pokemon2Name }}.
+\{{ pokemon1Name }}'s Stat Sheet:
+\{{ pokemon1Description }}
+\{{ pokemon2Name }}'s Stat Sheet:
+\{{ pokemon2Description }}
+Include dramatic moments, skill usage that matches their actual abilities, and a decisive outcome based on their type matchups. Format it as a proper screenplay with scene descriptions and dialogue.

package/dist/templates/project/src/workflows/poke_battle/scenarios/pikachu_vs_charmander.json.template ADDED Viewed

@@ -0,0 +1,8 @@
+{
+  "name": "pikachu_vs_charmander",
+  "description": "A classic battle between Pikachu and Charmander",
+  "input": {
+    "pokemon1Name": "pikachu",
+    "pokemon2Name": "charmander"
+  }
+}

package/dist/templates/project/src/workflows/poke_battle/steps.ts.template ADDED Viewed

@@ -0,0 +1,37 @@
+import { step, z } from '@output.ai/core';
+import { generateText } from '@output.ai/llm';
+import type { Pokemon } from '../../shared/clients/pokeapi.js';
+export const generateScreenplay = step( {
+  name: 'generate_screenplay',
+  description: 'Generate a dramatic 5-minute Pokemon battle screenplay',
+  inputSchema: z.object( {
+    pokemon1: z.object( {
+      name: z.string(),
+      types: z.array( z.object() ),
+      abilities: z.array( z.object() ),
+      stats: z.array( z.object() )
+    } ),
+    pokemon2: z.object( {
+      name: z.string(),
+      types: z.array( z.object() ),
+      abilities: z.array( z.object() ),
+      stats: z.array( z.object() )
+    } )
+  } ),
+  outputSchema: z.object( {
+    screenplay: z.string()
+  } ),
+  fn: async ( input: { pokemon1: Pokemon; pokemon2: Pokemon } ) => {
+    const { result } = await generateText( {
+      prompt: 'generate_screenplay@v1',
+      variables: {
+        pokemon1Name: input.pokemon1.name,
+        pokemon2Name: input.pokemon2.name,
+        pokemon1Description: JSON.stringify( input.pokemon1 ),
+        pokemon2Description: JSON.stringify( input.pokemon2 )
+      }
+    } );
+    return { screenplay: result };
+  }
+} );

package/dist/templates/project/src/workflows/poke_battle/utils.ts.template ADDED Viewed

@@ -0,0 +1,7 @@
+export function createWorkflowOutput( screenplay: string, confidenceScore: number ) {
+  return {
+    screenplay,
+    confidenceScore,
+    summary: `Battle screenplay (${confidenceScore}% confidence)`
+  };
+}

package/dist/templates/project/src/workflows/poke_battle/workflow.ts.template ADDED Viewed

@@ -0,0 +1,47 @@
+import { workflow, z } from '@output.ai/core';
+import { lowercase } from '../../shared/utils/string.js';
+import { getPokemon } from '../../shared/clients/pokeapi.js';
+import { generateScreenplay } from './steps.js';
+import { evaluateBattleRealism } from './evaluators.js';
+import { createWorkflowOutput } from './utils.js';
+export default workflow( {
+  name: 'poke_battle',
+  description: '{{description}}',
+  inputSchema: z.object( {
+    pokemon1Name: z.string().describe( 'Name of the first Pokemon' ),
+    pokemon2Name: z.string().describe( 'Name of the second Pokemon' )
+  } ),
+  outputSchema: z.object( {
+    screenplay: z.string().describe( 'A 5-minute battle screenplay' ),
+    confidenceScore: z.number().describe( 'Realism confidence score 0-100' ),
+    summary: z.string().describe( 'Battle summary with confidence' )
+  } ),
+  fn: async input => {
+    // Use shared util to normalize names
+    const name1 = lowercase( input.pokemon1Name );
+    const name2 = lowercase( input.pokemon2Name );
+    // Use shared client to fetch Pokemon data
+    const pokemon1 = await getPokemon( name1 );
+    const pokemon2 = await getPokemon( name2 );
+    // Generate the battle screenplay
+    const { screenplay } = await generateScreenplay( { pokemon1, pokemon2 } );
+    // Evaluate realism
+    const evaluation = await evaluateBattleRealism( {
+      screenplay,
+      pokemon1,
+      pokemon2
+    } );
+    // Use local util to format output
+    return createWorkflowOutput( screenplay, evaluation.value );
+  },
+  options: {
+    retry: {
+      maximumAttempts: 3
+    }
+  }
+} );

package/dist/templates/workflow/README.md.template CHANGED Viewed

@@ -10,8 +10,50 @@ This workflow was generated using the Output SDK CLI. It provides a starting poi
 - `workflow.ts` - Main workflow definition with input/output schemas
 - `steps.ts` - Activity/step definitions with input/output schemas
-- `prompt@v1.prompt` - Example LLM prompt template
-- `.env` - Environment variables for API keys and configuration
+- `evaluators.ts` - Quality evaluators for workflow outputs
+- `prompts/` - LLM prompt templates
+## File Organization
+You can organize your workflow files in two ways:
+**Flat files:**
+```
+workflow/
+├── workflow.ts
+├── steps.ts
+├── evaluators.ts
+└── utils.ts
+```
+**Folder-based:**
+```
+workflow/
+├── workflow.ts
+├── steps/
+│   ├── fetch_data.ts
+│   └── process_data.ts
+├── evaluators/
+│   └── quality.ts
+└── utils/
+    └── helpers.ts
+```
+## Import Rules
+**Important:** Steps and evaluators are Temporal activities. Activities cannot call other activities.
+**Steps can import from:**
+- Local utilities (`./utils.ts`, `./utils/*.ts`)
+- Shared utilities (`../../shared/utils/*.ts`)
+- Shared clients (`../../shared/clients/*.ts`)
+**Steps cannot import from:**
+- Other steps or evaluators (activity isolation)
+- Workflow files
+**Workflows can import from:**
+- Steps, evaluators, and utilities (local and shared)
 ## Setup
@@ -63,121 +105,109 @@ Example:
 ### Workflow Structure
-The workflow follows the new Output SDK conventions:
+The workflow follows the Output SDK conventions:
 ```typescript
-import { workflow } from '@output.ai/core';
-import { myStep, anotherStep } from './steps.js';
-const inputSchema = {
-  type: 'object',
-  properties: {
-    // Define your input properties
-  }
-};
-const outputSchema = {
-  type: 'object',
-  properties: {
-    // Define your output properties
-  }
-};
+import { workflow, z } from '@output.ai/core';
+import { myStep } from './steps.js';
+import { evaluateQuality } from './evaluators.js';
 export default workflow( {
   name: 'workflowName',
   description: 'Workflow description',
-  inputSchema,
-  outputSchema,
+  inputSchema: z.object( { /* ... */ } ),
+  outputSchema: z.object( { /* ... */ } ),
   fn: async ( input ) => {
-    // Call steps directly
     const result = await myStep( input );
-    return result;
+    const { score } = await evaluateQuality( { input, output: result } );
+    return { result, qualityScore: score };
   }
 } );
 ```
 ### Adding New Steps
-1. Define new steps in `steps.ts` with schemas:
+Define steps in `steps.ts` with schemas:
 ```typescript
-import { step } from '@output.ai/core';
-const inputSchema = {
-  type: 'object',
-  properties: {
-    value: { type: 'number' }
-  },
-  required: ['value']
-};
-const outputSchema = {
-  type: 'object',
-  properties: {
-    result: { type: 'string' }
-  }
-};
+import { step, z } from '@output.ai/core';
 export const myStep = step( {
   name: 'myStep',
   description: 'Description of what this step does',
-  inputSchema,
-  outputSchema,
-  fn: async ( input: { value: number } ) => {
-    // Step implementation
+  inputSchema: z.object( {
+    value: z.number()
+  } ),
+  outputSchema: z.object( {
+    result: z.string()
+  } ),
+  fn: async ( input ) => {
     return { result: `Processed ${input.value}` };
   }
 } );
 ```
-2. Import and use the step in your workflow (`workflow.ts`):
+### Adding Evaluators
+Define evaluators in `evaluators.ts`:
 ```typescript
-import { myStep } from './steps.js';
+import { evaluator, z } from '@output.ai/core';
+import { generateText } from '@output.ai/llm';
-// Inside workflow fn:
-const result = await myStep( { value: 42 } );
+export const evaluateQuality = evaluator( {
+  name: 'evaluate_quality',
+  description: 'Evaluate output quality',
+  inputSchema: z.object( {
+    input: z.any(),
+    output: z.any()
+  } ),
+  outputSchema: z.object( {
+    score: z.number().min( 0 ).max( 100 )
+  } ),
+  fn: async ( data ) => {
+    const { result } = await generateText( {
+      prompt: 'evaluate@v1',
+      variables: { input: data.input, output: data.output }
+    } );
+    return { score: parseInt( result, 10 ) };
+  }
+} );
 ```
 ### Using LLM in Steps
-The template includes an example of using LLM with prompts:
 ```typescript
 import { generateText } from '@output.ai/llm';
 export const llmStep = step( {
   name: 'llmStep',
   description: 'Generate text using LLM',
-  inputSchema: {
-    type: 'object',
-    properties: {
-      userInput: { type: 'string' }
-    }
-  },
-  outputSchema: { type: 'string' },
-  fn: async ( input: { userInput: string } ) => {
-    const response = await generateText( {
+  inputSchema: z.object( {
+    userInput: z.string()
+  } ),
+  outputSchema: z.string(),
+  fn: async ( input ) => {
+    const { result } = await generateText( {
       prompt: 'prompt@v1',
       variables: { userInput: input.userInput }
     } );
-    return response;
+    return result;
   }
 } );
 ```
 ### Creating Prompt Templates
-Create new prompt files following the pattern:
+Create prompt files in `prompts/` following the pattern:
 - File naming: `promptName@v1.prompt`
-- Include YAML frontmatter with provider and model
+- Include YAML frontmatter with model
 - Use LiquidJS syntax for variables: `{{ variableName }}`
 Example prompt file:
 ```
 ---
-provider: anthropic
-model: claude-3-5-sonnet-latest
+model: anthropic/claude-sonnet-4-20250514
 ---
 {{ userInput }}
@@ -195,19 +225,10 @@ To test your workflow:
 Example execution:
 ```bash
-curl -X POST http://localhost:3001/workflow \
-  -H "Content-Type: application/json" \
-  -d '{
-    "workflowName": "{{workflowName}}",
-    "input": {
-      "prompt": "Tell me about workflows",
-      "data": { "value": 42, "type": "example" }
-    }
-  }'
+npx output workflow run {{workflowName}} --input '{"prompt": "Hello"}'
 ```
 ## Resources
-- [Output SDK Documentation](https://github.com/growthxai/output-sdk)
+- [Output SDK Documentation](https://docs.output.ai)
 - [Temporal Documentation](https://docs.temporal.io)
-- [AI SDK Documentation](https://sdk.vercel.ai/docs)

package/dist/templates/workflow/evaluators.ts.template ADDED Viewed

@@ -0,0 +1,23 @@
+import { evaluator, z } from '@output.ai/core';
+// Example evaluator - customize for your workflow
+export const evaluate{{WorkflowName}} = evaluator( {
+  name: 'evaluate_{{workflowName}}',
+  description: 'Evaluate the quality of {{workflowName}} output',
+  inputSchema: z.object( {
+    input: z.any(),
+    output: z.any()
+  } ),
+  outputSchema: z.object( {
+    score: z.number().min( 0 ).max( 100 ).describe( 'Quality score 0-100' ),
+    feedback: z.string().describe( 'Feedback on the output quality' )
+  } ),
+  fn: async () => {
+    // TODO: Implement evaluation logic
+    // Use LLM or custom logic to score the output quality
+    return {
+      score: 100,
+      feedback: 'Evaluation not yet implemented'
+    };
+  }
+} );

package/dist/utils/error_handler.js CHANGED Viewed

@@ -3,8 +3,24 @@ const DEFAULT_MESSAGES = {
     ECONNREFUSED: `Connection refused to ${config.apiUrl}. Is the API server running?`,
     401: 'Authentication failed. Check your API_AUTH_TOKEN.',
     404: 'Resource not found.',
+    500: 'Server error.',
     UNKNOWN: 'An unknown error occurred.'
 };
+/**
+ * Extract error type and message from API response data
+ */
+function extractApiErrorDetails(data) {
+    const errorData = data;
+    if (!errorData?.error && !errorData?.message) {
+        return null;
+    }
+    const errorType = errorData.error || 'Error';
+    const baseMsg = errorData.message || 'Unknown error';
+    const rootCauseLine = errorData.rootCause ?
+        `\n${errorData.rootCause.error}: ${errorData.rootCause.message}` :
+        '';
+    return { errorType, errorMsg: `${baseMsg}.${rootCauseLine}` };
+}
 /**
  * Extract detailed error information from fetch errors and their causes
  */
@@ -39,6 +55,12 @@ export function handleApiError(error, errorFn, overrides = {}) {
     }
     if (apiError.response?.status) {
         const status = apiError.response.status;
+        // Extract error details from response body
+        const apiErrorDetails = extractApiErrorDetails(apiError.response.data);
+        if (apiErrorDetails) {
+            const { errorType, errorMsg } = apiErrorDetails;
+            errorFn(`${errorType}: ${errorMsg}`, { exit: 1 });
+        }
         const message = errorMessages[status];
         if (message) {
             errorFn(message, { exit: 1 });

package/dist/utils/paths.d.ts CHANGED Viewed

@@ -9,7 +9,7 @@ export declare const TEMPLATE_DIRS: {
  * Default output directories
  */
 export declare const DEFAULT_OUTPUT_DIRS: {
-    readonly workflows: "src";
+    readonly workflows: "src/workflows";
 };
 /**
  * Resolve the output directory path

package/dist/utils/paths.js CHANGED Viewed

@@ -13,7 +13,7 @@ export const TEMPLATE_DIRS = {
  * Default output directories
  */
 export const DEFAULT_OUTPUT_DIRS = {
-    workflows: 'src'
+    workflows: 'src/workflows'
 };
 /**
  * Resolve the output directory path

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@output.ai/cli",
-  "version": "0.7.6",
+  "version": "0.7.8-dev.pr263-a59dd0e",
   "description": "CLI for Output.ai workflow generation",
   "type": "module",
   "main": "dist/index.js",

package/dist/templates/project/src/workflows/example_question/prompts/answer_question@v1.prompt.template DELETED Viewed

@@ -1,13 +0,0 @@
----
-provider: anthropic
-model: claude-opus-4-1-20250805
-temperature: 0.7
----
-<system>
-You are a helpful assistant. Answer the user's question concisely and clearly.
-</system>
-<user>
-Answer the following question: \{{ question }}
-</user>

package/dist/templates/project/src/workflows/example_question/scenarios/question_ada_lovelace.json.template DELETED Viewed

@@ -1,3 +0,0 @@
-{
-  "question": "who really is ada lovelace?"
-}

package/dist/templates/project/src/workflows/example_question/steps.ts.template DELETED Viewed

@@ -1,16 +0,0 @@
-import { step, z } from '@output.ai/core';
-import { generateText } from '@output.ai/llm';
-export const answerQuestion = step( {
-  name: 'answerQuestion',
-  description: 'Answer a question using an LLM',
-  inputSchema: z.string(),
-  outputSchema: z.string(),
-  fn: async question => {
-    const { result } = await generateText( {
-      prompt: 'answer_question@v1',
-      variables: { question }
-    } );
-    return result;
-  }
-} );

package/dist/templates/project/src/workflows/example_question/workflow.ts.template DELETED Viewed

@@ -1,22 +0,0 @@
-import { workflow, z } from '@output.ai/core';
-import { answerQuestion } from './steps.js';
-export default workflow( {
-  name: 'example_question',
-  description: '{{description}}',
-  inputSchema: z.object( {
-    question: z.string().describe( 'A question to answer' )
-  } ),
-  outputSchema: z.object( {
-    answer: z.string().describe( 'The answer to the question' )
-  } ),
-  fn: async input => {
-    const answer = await answerQuestion( input.question );
-    return { answer };
-  },
-  options: {
-    retry: {
-      maximumAttempts: 3
-    }
-  }
-} );