@bratsos/workflow-engine 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,535 @@
1
+ # Stage Definitions
2
+
3
+ Complete API reference for `defineStage` and `defineAsyncBatchStage`.
4
+
5
+ ## defineStage
6
+
7
+ Creates a synchronous stage that executes immediately and returns a result.
8
+
9
+ ```typescript
10
+ import { defineStage } from "@bratsos/workflow-engine";
11
+ import { z } from "zod";
12
+
13
+ const myStage = defineStage({
14
+ // Required fields
15
+ id: string, // Unique identifier (used in workflow context)
16
+ name: string, // Human-readable name
17
+ schemas: {
18
+ input: ZodSchema | "none", // Input validation schema
19
+ output: ZodSchema, // Output validation schema
20
+ config: ZodSchema, // Configuration schema
21
+ },
22
+ execute: (ctx) => Promise<SimpleStageResult>,
23
+
24
+ // Optional fields
25
+ description?: string, // Stage description
26
+ dependencies?: string[], // Stage IDs that must complete first
27
+ estimateCost?: (input, config) => number, // Cost estimation
28
+ });
29
+ ```
30
+
31
+ ## defineAsyncBatchStage
32
+
33
+ Creates an asynchronous stage that can suspend execution and resume later.
34
+
35
+ ```typescript
36
+ import { defineAsyncBatchStage } from "@bratsos/workflow-engine";
37
+
38
+ const batchStage = defineAsyncBatchStage({
39
+ id: "batch-process",
40
+ name: "Batch Process",
41
+ mode: "async-batch", // Required marker
42
+
43
+ schemas: {
44
+ input: InputSchema,
45
+ output: OutputSchema,
46
+ config: ConfigSchema,
47
+ },
48
+
49
+ execute: (ctx) => Promise<SimpleStageResult | SimpleSuspendedResult>,
50
+ checkCompletion: (state, ctx) => Promise<CompletionCheckResult>,
51
+ });
52
+ ```
53
+
54
+ ## Schema Patterns
55
+
56
+ ### Input Schema
57
+
58
+ ```typescript
59
+ // Standard input schema
60
+ schemas: {
61
+ input: z.object({
62
+ documentId: z.string(),
63
+ options: z.object({
64
+ format: z.enum(["pdf", "docx"]).default("pdf"),
65
+ }).optional(),
66
+ }),
67
+ // ...
68
+ }
69
+
70
+ // No input (uses workflow context only)
71
+ schemas: {
72
+ input: "none", // or: input: NoInputSchema
73
+ // ...
74
+ }
75
+ ```
76
+
77
+ ### Output Schema
78
+
79
+ ```typescript
80
+ schemas: {
81
+ // ...
82
+ output: z.object({
83
+ processedData: z.array(z.string()),
84
+ metadata: z.object({
85
+ count: z.number(),
86
+ timestamp: z.string(),
87
+ }),
88
+ }),
89
+ }
90
+ ```
91
+
92
+ ### Config Schema with Defaults
93
+
94
+ ```typescript
95
+ schemas: {
96
+ // ...
97
+ config: z.object({
98
+ // Required config
99
+ apiKey: z.string(),
100
+
101
+ // Optional with default
102
+ maxRetries: z.number().default(3),
103
+ timeout: z.number().default(30000),
104
+
105
+ // Optional without default
106
+ customEndpoint: z.string().optional(),
107
+
108
+ // Nested config
109
+ ai: z.object({
110
+ model: z.string().default("gemini-2.5-flash"),
111
+ temperature: z.number().default(0.7),
112
+ }).default({}),
113
+ }),
114
+ }
115
+ ```
116
+
117
+ ## EnhancedStageContext
118
+
119
+ The context object passed to `execute()`:
120
+
121
+ ```typescript
122
+ interface EnhancedStageContext<TInput, TConfig, TContext> {
123
+ // Input and config
124
+ input: TInput; // Validated input data
125
+ config: TConfig; // Validated config
126
+ workflowContext: TContext; // All previous stage outputs
127
+
128
+ // Workflow metadata
129
+ workflowRunId: string; // Current run ID
130
+ stageId: string; // Current stage ID
131
+
132
+ // Services
133
+ log: LogFunction; // Async logging
134
+ storage: StageStorage; // Artifact storage
135
+
136
+ // Resume state (for async-batch stages)
137
+ resumeState?: SuspendedState; // Present when resuming
138
+
139
+ // Fluent helpers
140
+ require<K>(stageId: K): TContext[K]; // Get required output
141
+ optional<K>(stageId: K): TContext[K] | undefined; // Get optional output
142
+ }
143
+ ```
144
+
145
+ ### Using require() and optional()
146
+
147
+ ```typescript
148
+ async execute(ctx) {
149
+ // Throws if "data-extraction" output is missing
150
+ const extraction = ctx.require("data-extraction");
151
+
152
+ // Returns undefined if "optional-enrichment" didn't run
153
+ const enrichment = ctx.optional("optional-enrichment");
154
+
155
+ // Type-safe access to nested data
156
+ const items = extraction.items;
157
+
158
+ if (enrichment) {
159
+ // Use enrichment data
160
+ }
161
+ }
162
+ ```
163
+
164
+ ### Logging
165
+
166
+ ```typescript
167
+ async execute(ctx) {
168
+ await ctx.log("INFO", "Starting processing");
169
+ await ctx.log("DEBUG", "Input received", { count: ctx.input.items.length });
170
+
171
+ try {
172
+ // ... processing
173
+ await ctx.log("INFO", "Processing complete");
174
+ } catch (error) {
175
+ await ctx.log("ERROR", "Processing failed", { error: error.message });
176
+ throw error;
177
+ }
178
+ }
179
+ ```
180
+
181
+ ### Storage
182
+
183
+ ```typescript
184
+ async execute(ctx) {
185
+ // Save intermediate data
186
+ await ctx.storage.save("raw-data", rawData);
187
+
188
+ // Check if data exists
189
+ if (await ctx.storage.exists("cached-result")) {
190
+ return { output: await ctx.storage.load("cached-result") };
191
+ }
192
+
193
+ // Delete old data
194
+ await ctx.storage.delete("old-cache");
195
+
196
+ // Get stage-specific key
197
+ const key = ctx.storage.getStageKey(ctx.stageId, "output.json");
198
+ }
199
+ ```
200
+
201
+ ## SimpleStageResult
202
+
203
+ Return type for successful execution:
204
+
205
+ ```typescript
206
+ interface SimpleStageResult<TOutput> {
207
+ output: TOutput; // Required: validated output
208
+ customMetrics?: Record<string, number>; // Optional: custom metrics
209
+ artifacts?: Record<string, unknown>; // Optional: artifacts to store
210
+ }
211
+ ```
212
+
213
+ ### Examples
214
+
215
+ ```typescript
216
+ // Minimal return
217
+ return { output: { result: "processed" } };
218
+
219
+ // With metrics
220
+ return {
221
+ output: { items: processedItems },
222
+ customMetrics: {
223
+ itemsProcessed: processedItems.length,
224
+ duplicatesRemoved: 5,
225
+ },
226
+ };
227
+
228
+ // With artifacts
229
+ return {
230
+ output: { summary: "..." },
231
+ artifacts: {
232
+ rawData: originalData,
233
+ debugInfo: { steps: executionSteps },
234
+ },
235
+ };
236
+ ```
237
+
238
+ ## SimpleSuspendedResult
239
+
240
+ Return type for suspending execution (async-batch stages only):
241
+
242
+ ```typescript
243
+ interface SimpleSuspendedResult {
244
+ suspended: true; // Required marker
245
+ state: {
246
+ batchId: string; // Required: external job ID
247
+ submittedAt: string; // Required: ISO timestamp
248
+ pollInterval: number; // Required: ms between checks
249
+ maxWaitTime: number; // Required: max wait ms
250
+ metadata?: Record<string, unknown>; // Optional: custom data
251
+ apiKey?: string; // Optional: for resumption
252
+ };
253
+ pollConfig: {
254
+ pollInterval: number; // ms between polls
255
+ maxWaitTime: number; // max total wait
256
+ nextPollAt: Date; // first poll time
257
+ };
258
+ customMetrics?: Record<string, number>;
259
+ }
260
+ ```
261
+
262
+ ### Example
263
+
264
+ ```typescript
265
+ async execute(ctx) {
266
+ const batchId = await submitBatch(requests);
267
+
268
+ return {
269
+ suspended: true,
270
+ state: {
271
+ batchId,
272
+ submittedAt: new Date().toISOString(),
273
+ pollInterval: 60000,
274
+ maxWaitTime: 3600000,
275
+ metadata: { requestCount: requests.length },
276
+ },
277
+ pollConfig: {
278
+ pollInterval: 60000,
279
+ maxWaitTime: 3600000,
280
+ nextPollAt: new Date(Date.now() + 60000),
281
+ },
282
+ };
283
+ }
284
+ ```
285
+
286
+ ## CompletionCheckResult
287
+
288
+ Return type for `checkCompletion`:
289
+
290
+ ```typescript
291
+ interface CompletionCheckResult<TOutput> {
292
+ ready: boolean; // Is the batch complete?
293
+ output?: TOutput; // Output if ready=true
294
+ error?: string; // Error message if failed
295
+ nextCheckIn?: number; // ms until next check (if not ready)
296
+ metrics?: Record<string, number>;
297
+ embeddings?: unknown; // Optional embedding info
298
+ }
299
+ ```
300
+
301
+ ### Examples
302
+
303
+ ```typescript
304
+ // Not ready yet
305
+ return { ready: false, nextCheckIn: 30000 };
306
+
307
+ // Completed successfully
308
+ return {
309
+ ready: true,
310
+ output: { results: batchResults },
311
+ metrics: { itemsProcessed: batchResults.length },
312
+ };
313
+
314
+ // Failed
315
+ return {
316
+ ready: false,
317
+ error: "Batch processing failed: timeout exceeded",
318
+ };
319
+ ```
320
+
321
+ ## CheckCompletionContext
322
+
323
+ Context passed to `checkCompletion`:
324
+
325
+ ```typescript
326
+ interface CheckCompletionContext<TConfig> {
327
+ workflowRunId: string;
328
+ stageId: string;
329
+ stageRecordId: string; // For AI logging context
330
+ config: TConfig;
331
+ log: LogFunction;
332
+ onLog: LogFunction; // Alias for log
333
+ storage: StageStorage;
334
+ }
335
+ ```
336
+
337
+ ## Complete Examples
338
+
339
+ ### Data Extraction Stage
340
+
341
+ ```typescript
342
+ const extractionStage = defineStage({
343
+ id: "data-extraction",
344
+ name: "Data Extraction",
345
+ description: "Extracts structured data from documents",
346
+
347
+ schemas: {
348
+ input: z.object({
349
+ documentUrl: z.string().url(),
350
+ format: z.enum(["pdf", "docx", "html"]),
351
+ }),
352
+ output: z.object({
353
+ title: z.string(),
354
+ sections: z.array(z.object({
355
+ heading: z.string(),
356
+ content: z.string(),
357
+ })),
358
+ metadata: z.object({
359
+ pageCount: z.number(),
360
+ wordCount: z.number(),
361
+ }),
362
+ }),
363
+ config: z.object({
364
+ extractImages: z.boolean().default(false),
365
+ maxPages: z.number().default(100),
366
+ }),
367
+ },
368
+
369
+ async execute(ctx) {
370
+ await ctx.log("INFO", `Extracting from ${ctx.input.documentUrl}`);
371
+
372
+ const document = await fetchDocument(ctx.input.documentUrl);
373
+ const extracted = await extractContent(document, {
374
+ format: ctx.input.format,
375
+ extractImages: ctx.config.extractImages,
376
+ maxPages: ctx.config.maxPages,
377
+ });
378
+
379
+ return {
380
+ output: extracted,
381
+ customMetrics: {
382
+ pagesProcessed: extracted.metadata.pageCount,
383
+ sectionsFound: extracted.sections.length,
384
+ },
385
+ };
386
+ },
387
+ });
388
+ ```
389
+
390
+ ### AI Classification Stage
391
+
392
+ ```typescript
393
+ const classificationStage = defineStage({
394
+ id: "classification",
395
+ name: "Content Classification",
396
+ dependencies: ["data-extraction"],
397
+
398
+ schemas: {
399
+ input: "none",
400
+ output: z.object({
401
+ categories: z.array(z.string()),
402
+ confidence: z.number(),
403
+ reasoning: z.string(),
404
+ }),
405
+ config: z.object({
406
+ model: z.string().default("gemini-2.5-flash"),
407
+ minConfidence: z.number().default(0.8),
408
+ }),
409
+ },
410
+
411
+ async execute(ctx) {
412
+ const extraction = ctx.require("data-extraction");
413
+
414
+ const ai = createAIHelper("classification", aiLogger);
415
+ const { object } = await ai.generateObject(
416
+ ctx.config.model,
417
+ `Classify this document:\n\n${extraction.sections.map(s => s.content).join("\n")}`,
418
+ ctx.schemas.output
419
+ );
420
+
421
+ return { output: object };
422
+ },
423
+ });
424
+ ```
425
+
426
+ ### Batch Processing Stage
427
+
428
+ ```typescript
429
+ const batchEmbeddingStage = defineAsyncBatchStage({
430
+ id: "batch-embeddings",
431
+ name: "Batch Embeddings",
432
+ mode: "async-batch",
433
+ dependencies: ["data-extraction"],
434
+
435
+ schemas: {
436
+ input: "none",
437
+ output: z.object({
438
+ embeddings: z.array(z.object({
439
+ sectionId: z.number(),
440
+ vector: z.array(z.number()),
441
+ })),
442
+ }),
443
+ config: z.object({
444
+ model: z.string().default("text-embedding-004"),
445
+ }),
446
+ },
447
+
448
+ async execute(ctx) {
449
+ // Check for resume
450
+ if (ctx.resumeState) {
451
+ const cached = await ctx.storage.load("embeddings-result");
452
+ if (cached) return { output: cached };
453
+ }
454
+
455
+ const extraction = ctx.require("data-extraction");
456
+ const texts = extraction.sections.map(s => s.content);
457
+
458
+ // Submit batch
459
+ const ai = createAIHelper(`batch.${ctx.workflowRunId}`, aiLogger);
460
+ const batch = ai.batch(ctx.config.model, "google");
461
+ const handle = await batch.submit(
462
+ texts.map((text, i) => ({ id: `section-${i}`, prompt: text }))
463
+ );
464
+
465
+ return {
466
+ suspended: true,
467
+ state: {
468
+ batchId: handle.id,
469
+ submittedAt: new Date().toISOString(),
470
+ pollInterval: 30000,
471
+ maxWaitTime: 1800000,
472
+ metadata: { sectionCount: texts.length },
473
+ },
474
+ pollConfig: {
475
+ pollInterval: 30000,
476
+ maxWaitTime: 1800000,
477
+ nextPollAt: new Date(Date.now() + 30000),
478
+ },
479
+ };
480
+ },
481
+
482
+ async checkCompletion(state, ctx) {
483
+ const ai = createAIHelper(`batch.${ctx.workflowRunId}`, aiLogger);
484
+ const batch = ai.batch(ctx.config.model, "google");
485
+
486
+ const status = await batch.getStatus(state.batchId);
487
+
488
+ if (status.status === "completed") {
489
+ const results = await batch.getResults(state.batchId);
490
+ const embeddings = results.map((r, i) => ({
491
+ sectionId: i,
492
+ vector: r.result as number[],
493
+ }));
494
+
495
+ // Cache for resume
496
+ await ctx.storage.save("embeddings-result", { embeddings });
497
+
498
+ return { ready: true, output: { embeddings } };
499
+ }
500
+
501
+ if (status.status === "failed") {
502
+ return { ready: false, error: "Batch embedding failed" };
503
+ }
504
+
505
+ return { ready: false, nextCheckIn: 30000 };
506
+ },
507
+ });
508
+ ```
509
+
510
+ ## NoInputSchema
511
+
512
+ For stages that only use workflow context:
513
+
514
+ ```typescript
515
+ import { NoInputSchema } from "@bratsos/workflow-engine";
516
+
517
+ // These are equivalent:
518
+ schemas: { input: "none", ... }
519
+ schemas: { input: NoInputSchema, ... }
520
+ ```
521
+
522
+ ## Type Inference Utilities
523
+
524
+ ```typescript
525
+ import type {
526
+ InferStageInput,
527
+ InferStageOutput,
528
+ InferStageConfig,
529
+ } from "@bratsos/workflow-engine";
530
+
531
+ // Extract types from a stage
532
+ type Input = InferStageInput<typeof myStage>;
533
+ type Output = InferStageOutput<typeof myStage>;
534
+ type Config = InferStageConfig<typeof myStage>;
535
+ ```