npm - @bratsos/workflow-engine - Versions diffs - 0.1.0 → 0.2.0 - Mend

@bratsos/workflow-engine 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +270 -513
package/dist/chunk-HL3OJG7W.js +1033 -0
package/dist/chunk-HL3OJG7W.js.map +1 -0
package/dist/{chunk-7IITBLFY.js → chunk-NYKMT46J.js} +268 -25
package/dist/chunk-NYKMT46J.js.map +1 -0
package/dist/chunk-SPXBCZLB.js +17 -0
package/dist/chunk-SPXBCZLB.js.map +1 -0
package/dist/{client-5vz5Vv4A.d.ts → client-D4PoxADF.d.ts} +3 -143
package/dist/client.d.ts +3 -2
package/dist/{index-DmR3E8D7.d.ts → index-DAzCfO1R.d.ts} +20 -1
package/dist/index.d.ts +234 -601
package/dist/index.js +46 -2034
package/dist/index.js.map +1 -1
package/dist/{interface-Cv22wvLG.d.ts → interface-MMqhfQQK.d.ts} +69 -2
package/dist/kernel/index.d.ts +26 -0
package/dist/kernel/index.js +3 -0
package/dist/kernel/index.js.map +1 -0
package/dist/kernel/testing/index.d.ts +44 -0
package/dist/kernel/testing/index.js +85 -0
package/dist/kernel/testing/index.js.map +1 -0
package/dist/persistence/index.d.ts +2 -2
package/dist/persistence/index.js +2 -1
package/dist/persistence/prisma/index.d.ts +2 -2
package/dist/persistence/prisma/index.js +2 -1
package/dist/plugins-BCnDUwIc.d.ts +415 -0
package/dist/ports-tU3rzPXJ.d.ts +245 -0
package/dist/stage-BPw7m9Wx.d.ts +144 -0
package/dist/testing/index.d.ts +23 -1
package/dist/testing/index.js +156 -13
package/dist/testing/index.js.map +1 -1
package/package.json +11 -1
package/skills/workflow-engine/SKILL.md +234 -348
package/skills/workflow-engine/references/03-runtime-setup.md +111 -426
package/skills/workflow-engine/references/05-persistence-setup.md +32 -0
package/skills/workflow-engine/references/07-testing-patterns.md +141 -474
package/skills/workflow-engine/references/08-common-patterns.md +118 -431
package/dist/chunk-7IITBLFY.js.map +0 -1

package/skills/workflow-engine/references/03-runtime-setup.md CHANGED Viewed

@@ -1,497 +1,182 @@
-# Runtime Setup
+# Kernel & Host Setup
-Complete guide for configuring and running WorkflowRuntime.
+Complete guide for configuring the command kernel and choosing a host.
-## Creating a Runtime
+## Creating a Kernel
+The kernel is the core command dispatcher. It's environment-agnostic -- no timers, no signals, no global state.
 ```typescript
-import { createWorkflowRuntime } from "@bratsos/workflow-engine";
+import { createKernel } from "@bratsos/workflow-engine/kernel";
+import type {
+  Kernel,
+  KernelConfig,
+  Persistence,
+  BlobStore,
+  JobTransport,
+  EventSink,
+  Scheduler,
+  Clock,
+} from "@bratsos/workflow-engine/kernel";
 import {
   createPrismaWorkflowPersistence,
   createPrismaJobQueue,
-  createPrismaAICallLogger,
 } from "@bratsos/workflow-engine/persistence/prisma";
 import { PrismaClient } from "@prisma/client";
 const prisma = new PrismaClient();
-// PostgreSQL (default)
-const runtime = createWorkflowRuntime({
-  // Required
+const kernel = createKernel({
+  // Required: metadata storage (runs, stages, logs, outbox, idempotency)
   persistence: createPrismaWorkflowPersistence(prisma),
-  jobQueue: createPrismaJobQueue(prisma),
-  registry: {
-    getWorkflow: (id) => workflowMap[id] ?? null,
-  },
-  // Optional
-  aiCallLogger: createPrismaAICallLogger(prisma),
-  pollIntervalMs: 10000,
-  jobPollIntervalMs: 1000,
-  staleJobThresholdMs: 60000,
-  workerId: "worker-1",
-  getWorkflowPriority: (id) => priorityMap[id] ?? 5,
-});
-// SQLite - pass databaseType to persistence and job queue
-const runtime = createWorkflowRuntime({
-  persistence: createPrismaWorkflowPersistence(prisma, { databaseType: "sqlite" }),
-  jobQueue: createPrismaJobQueue(prisma, { databaseType: "sqlite" }),
-  registry: { getWorkflow: (id) => workflowMap[id] ?? null },
-  aiCallLogger: createPrismaAICallLogger(prisma),
-});
-```
-## WorkflowRuntimeConfig
-```typescript
-interface WorkflowRuntimeConfig {
-  /** Persistence implementation (required) */
-  persistence: WorkflowPersistence;
-  /** Job queue implementation (required) */
-  jobQueue: JobQueue;
-  /** Workflow registry (required) */
-  registry: WorkflowRegistry;
-  /** AI call logger for createAIHelper (optional) */
-  aiCallLogger?: AICallLogger;
-  /** Orchestration poll interval in ms (default: 10000) */
-  pollIntervalMs?: number;
-  /** Job dequeue interval in ms (default: 1000) */
-  jobPollIntervalMs?: number;
-  /** Worker ID (default: auto-generated) */
-  workerId?: string;
-  /** Stale job threshold in ms (default: 60000) */
-  staleJobThresholdMs?: number;
-  /** Function to determine workflow priority */
-  getWorkflowPriority?: (workflowId: string) => number;
-}
-```
-## WorkflowRegistry
-The registry maps workflow IDs to workflow definitions:
-```typescript
-interface WorkflowRegistry {
-  getWorkflow(workflowId: string): Workflow<any, any, any> | null;
-}
-// Simple implementation
-const registry: WorkflowRegistry = {
-  getWorkflow: (id) => {
-    const workflows = {
-      "document-analysis": documentAnalysisWorkflow,
-      "data-processing": dataProcessingWorkflow,
-    };
-    return workflows[id] ?? null;
-  },
-};
-// With type safety
-const workflowMap: Record<string, Workflow<any, any, any>> = {
-  "document-analysis": documentAnalysisWorkflow,
-  "data-processing": dataProcessingWorkflow,
-};
-const registry: WorkflowRegistry = {
-  getWorkflow: (id) => workflowMap[id] ?? null,
-};
-```
-## Lifecycle Methods
-### start()
-Start the runtime as a worker that processes jobs and polls for state changes.
+  // Required: large payload storage
+  blobStore: myBlobStore,
-```typescript
-await runtime.start();
-// Runtime is now:
-// - Polling for pending workflows
-// - Processing jobs from the queue
-// - Checking suspended stages
-// - Handling graceful shutdown on SIGTERM/SIGINT
-```
-### stop()
-Stop the runtime gracefully.
+  // Required: job queue
+  jobTransport: createPrismaJobQueue(prisma),
-```typescript
-runtime.stop();
-// Stops polling and job processing
-// Current job completes before stopping
-```
+  // Required: async event publishing
+  eventSink: myEventSink,
-## Creating and Running Workflows
+  // Required: deferred command triggers
+  scheduler: myScheduler,
-### createRun(options)
+  // Required: injectable time source
+  clock: { now: () => new Date() },
-Create a new workflow run. The runtime picks it up automatically on the next poll.
-```typescript
-interface CreateRunOptions {
-  workflowId: string;                    // Required
-  input: Record<string, unknown>;        // Required
-  config?: Record<string, unknown>;      // Optional
-  priority?: number;                     // Optional (default: 5)
-  metadata?: Record<string, unknown>;    // Optional domain-specific data
-}
-const { workflowRunId } = await runtime.createRun({
-  workflowId: "document-analysis",
-  input: { documentUrl: "https://example.com/doc.pdf" },
-  config: {
-    extract: { maxLength: 5000 },
-  },
-  priority: 8,  // Higher = more important
-  metadata: {
-    userId: "user-123",
-    requestId: "req-456",
+  // Required: workflow definition lookup
+  registry: {
+    getWorkflow: (id) => workflowMap.get(id),
   },
 });
 ```
-The method:
-1. Validates the workflow exists in the registry
-2. Validates input against the workflow's input schema
-3. Merges provided config with workflow defaults
-4. Validates merged config against all stage config schemas
-5. Creates a WorkflowRun record with status PENDING
-### transitionWorkflow(workflowRunId)
-Manually trigger workflow state transition (usually handled automatically).
+## Port Interfaces
-```typescript
-await runtime.transitionWorkflow(workflowRunId);
-```
+| Port | Interface | Purpose |
+|------|-----------|---------|
+| `persistence` | `Persistence` | CRUD for runs, stages, logs, outbox events, idempotency keys |
+| `blobStore` | `BlobStore` | `put(key, data)`, `get(key)`, `has(key)`, `delete(key)`, `list(prefix)` |
+| `jobTransport` | `JobTransport` | `enqueue`, `enqueueParallel`, `dequeue`, `complete`, `suspend`, `fail` |
+| `eventSink` | `EventSink` | `emit(event)` - async event publishing |
+| `scheduler` | `Scheduler` | `schedule(type, payload, runAt)`, `cancel(type, correlationId)` |
+| `clock` | `Clock` | `now()` - returns `Date` |
-### pollSuspendedStages()
+## Node Host
-Manually check suspended stages (usually handled automatically).
+For long-running worker processes (Node.js, Docker containers, etc.).
 ```typescript
-await runtime.pollSuspendedStages();
-```
-## AI Helper Integration
+import { createNodeHost } from "@bratsos/workflow-engine-host-node";
-### createAIHelper(topic, logContext?)
-Create an AIHelper bound to the runtime's logger.
-```typescript
-// Simple usage
-const ai = runtime.createAIHelper("my-task");
-// With log context (for batch operations)
-const logContext = runtime.createLogContext(workflowRunId, stageRecordId);
-const ai = runtime.createAIHelper(`workflow.${workflowRunId}`, logContext);
-```
-### createLogContext(workflowRunId, stageRecordId)
-Create a log context for AIHelper (enables batch logging to persistence).
-```typescript
-const logContext = runtime.createLogContext(workflowRunId, stageRecordId);
-// { workflowRunId, stageRecordId, createLog: fn }
-```
-## Complete Setup Example
-```typescript
-import {
-  createWorkflowRuntime,
-  WorkflowBuilder,
-  defineStage,
-} from "@bratsos/workflow-engine";
-import {
-  createPrismaWorkflowPersistence,
-  createPrismaJobQueue,
-  createPrismaAICallLogger,
-} from "@bratsos/workflow-engine/persistence/prisma";
-import { PrismaClient } from "@prisma/client";
-import { z } from "zod";
-// Initialize Prisma
-const prisma = new PrismaClient();
-// Define stages
-const helloStage = defineStage({
-  id: "hello",
-  name: "Hello Stage",
-  schemas: {
-    input: z.object({ name: z.string() }),
-    output: z.object({ greeting: z.string() }),
-    config: z.object({ prefix: z.string().default("Hello") }),
-  },
-  async execute(ctx) {
-    return {
-      output: { greeting: `${ctx.config.prefix}, ${ctx.input.name}!` },
-    };
-  },
-});
-// Build workflow
-const helloWorkflow = new WorkflowBuilder(
-  "hello-workflow",
-  "Hello Workflow",
-  "A simple greeting workflow",
-  z.object({ name: z.string() }),
-  z.object({ greeting: z.string() })
-)
-  .pipe(helloStage)
-  .build();
-// Create registry
-const registry = {
-  getWorkflow: (id: string) => {
-    if (id === "hello-workflow") return helloWorkflow;
-    return null;
-  },
-};
+const host = createNodeHost({
+  kernel,
+  jobTransport: createPrismaJobQueue(prisma),
+  workerId: "worker-1",
-// Create runtime
-const runtime = createWorkflowRuntime({
-  persistence: createPrismaWorkflowPersistence(prisma),
-  jobQueue: createPrismaJobQueue(prisma),
-  aiCallLogger: createPrismaAICallLogger(prisma),
-  registry,
-  pollIntervalMs: 5000,
-  jobPollIntervalMs: 500,
+  // Optional tuning
+  orchestrationIntervalMs: 10_000,    // Claim pending, poll suspended, reap stale, flush outbox
+  jobPollIntervalMs: 1_000,           // Dequeue and execute jobs
+  staleLeaseThresholdMs: 60_000,      // Release stale job leases
+  maxClaimsPerTick: 10,               // Max pending runs to claim per tick
+  maxSuspendedChecksPerTick: 10,      // Max suspended stages to poll per tick
+  maxOutboxFlushPerTick: 100,         // Max outbox events to flush per tick
 });
-// Start runtime
-async function main() {
-  console.log("Starting runtime...");
-  await runtime.start();
-  // Create a workflow run
-  const { workflowRunId } = await runtime.createRun({
-    workflowId: "hello-workflow",
-    input: { name: "World" },
-  });
-  console.log(`Created workflow run: ${workflowRunId}`);
-  // Runtime will automatically:
-  // 1. Pick up the pending workflow
-  // 2. Enqueue the first stage
-  // 3. Execute the stage
-  // 4. Mark workflow as completed
-}
-main().catch(console.error);
+// Start polling loops and register SIGTERM/SIGINT handlers
+await host.start();
 // Graceful shutdown
-process.on("SIGTERM", () => {
-  runtime.stop();
-  prisma.$disconnect();
-});
-```
+await host.stop();
-## Rerunning Workflows from a Specific Stage
-You can rerun a workflow starting from a specific stage, skipping earlier stages and using their persisted outputs. This is useful for:
-- Retrying after a stage failure (fix the bug, rerun from the failed stage)
-- Re-processing data with updated stage logic
-- Testing specific stages in isolation
-### Using WorkflowExecutor.execute() with fromStage
-```typescript
-import { WorkflowExecutor } from "@bratsos/workflow-engine";
-// Given: A workflow that has already been run (stages 1-4 completed)
-const executor = new WorkflowExecutor(
-  workflow,
-  workflowRunId,
-  workflowType,
-  { persistence, aiLogger }
-);
-// Rerun from stage 3 - skips stages 1-2, runs 3-4
-const result = await executor.execute(
-  input,    // Original input (not used when fromStage is set)
-  config,
-  { fromStage: "stage-3" }
-);
+// Runtime stats
+const stats = host.getStats();
+// { workerId, jobsProcessed, orchestrationTicks, isRunning, uptimeMs }
 ```
-### How It Works
-1. **Finds the execution group** containing the specified stage
-2. **Loads input** from the previous stage's persisted output (or workflow input if first stage)
-3. **Rebuilds workflowContext** from all completed stages before the target group
-4. **Deletes stage records** for the target stage and all subsequent stages (clean re-execution)
-5. **Executes** from the target stage forward
-### Requirements
-- **Previous stages must have been executed** - their outputs must be persisted
-- **Stage must exist** in the workflow definition
-### Error Handling
+### Worker Process Pattern
 ```typescript
-// Error: Stage doesn't exist
-await executor.execute(input, config, { fromStage: "non-existent" });
-// Throws: Stage "non-existent" not found in workflow "my-workflow"
-// Error: No prior execution
-await executor.execute(input, config, { fromStage: "stage-3" });
-// Throws: Cannot rerun from stage "stage-3": no completed stages found before execution group 3
-```
-### Common Use Cases
-**Retry After Failure:**
-```typescript
-// Stage 3 failed, you fixed the bug
-await executor.execute(input, config, { fromStage: "stage-3" });
-```
-**Re-process with Updated Logic:**
-```typescript
-// Updated stage-2 implementation, want to rerun from there
-await executor.execute(input, config, { fromStage: "stage-2" });
-```
-**Fresh Start from Beginning:**
-```typescript
-// Rerun entire workflow
-await executor.execute(input, config, { fromStage: "stage-1" });
-```
-### workflowContext Availability
+// worker.ts
+import { host } from "./setup";
-When rerunning from a stage, `ctx.workflowContext` contains outputs from all stages **before** the target group:
+process.on("SIGTERM", () => host.stop());
+process.on("SIGINT", () => host.stop());
-```typescript
-// Rerunning from stage-3 (group 3)
-// ctx.workflowContext contains:
-// - "stage-1": { ... }  // from group 1
-// - "stage-2": { ... }  // from group 2
-// - NOT "stage-3" or later
+console.log("Starting workflow worker...");
+await host.start();
 ```
-## Worker Deployment Patterns
+## Serverless Host
-### Single Worker
+For stateless environments (Cloudflare Workers, AWS Lambda, Vercel Edge, Deno Deploy).
 ```typescript
-// worker.ts
-const runtime = createWorkflowRuntime({ ... });
-await runtime.start();
-```
-### Multiple Workers (Horizontal Scaling)
+import { createServerlessHost } from "@bratsos/workflow-engine-host-serverless";
-```typescript
-// Each worker gets a unique ID
-const workerId = `worker-${process.env.POD_NAME || process.pid}`;
+const host = createServerlessHost({
+  kernel,
+  jobTransport,
+  workerId: "my-worker",
-const runtime = createWorkflowRuntime({
-  ...config,
-  workerId,
+  // Optional tuning (same as Node host)
+  staleLeaseThresholdMs: 60_000,
+  maxClaimsPerTick: 10,
+  maxSuspendedChecksPerTick: 10,
+  maxOutboxFlushPerTick: 100,
 });
-await runtime.start();
-// Workers compete for jobs using atomic dequeue
-// Each job is processed by exactly one worker
 ```
-### API Server + Separate Workers
+### Handle a Single Job
+When a queue message arrives (Cloudflare Queue, SQS, etc.):
 ```typescript
-// api-server.ts - Only creates runs, doesn't process
-const runtime = createWorkflowRuntime({ ...config });
-// Don't call runtime.start()
-app.post("/workflows/:id/runs", async (req, res) => {
-  const { workflowRunId } = await runtime.createRun({
-    workflowId: req.params.id,
-    input: req.body,
-  });
-  res.json({ workflowRunId });
+const result = await host.handleJob({
+  jobId: msg.id,
+  workflowRunId: msg.body.workflowRunId,
+  workflowId: msg.body.workflowId,
+  stageId: msg.body.stageId,
+  attempt: msg.body.attempt,
+  maxAttempts: msg.body.maxAttempts,
+  payload: msg.body.payload,
 });
-// worker.ts - Only processes, doesn't create
-const runtime = createWorkflowRuntime({ ...config });
-await runtime.start();
+if (result.outcome === "completed") msg.ack();
+else if (result.outcome === "suspended") msg.ack();
+else msg.retry();
 ```
-## Configuration Recommendations
+### Dequeue and Process Jobs
-### Development
+For environments that poll rather than receive:
 ```typescript
-const runtime = createWorkflowRuntime({
-  ...config,
-  pollIntervalMs: 2000,      // Fast polling for development
-  jobPollIntervalMs: 500,    // Quick job pickup
-  staleJobThresholdMs: 30000, // Short timeout
-});
+const result = await host.processAvailableJobs({ maxJobs: 5 });
+// { processed, succeeded, failed }
 ```
-### Production
-```typescript
-const runtime = createWorkflowRuntime({
-  ...config,
-  pollIntervalMs: 10000,      // Standard polling
-  jobPollIntervalMs: 1000,    // Balance between latency and DB load
-  staleJobThresholdMs: 60000, // Allow for longer processing
-  workerId: `worker-${process.env.HOSTNAME}`,
-});
-```
+### Maintenance Tick
-### High-Throughput
+Run from a cron trigger (Cloudflare Cron, EventBridge, etc.):
 ```typescript
-const runtime = createWorkflowRuntime({
-  ...config,
-  pollIntervalMs: 5000,       // More frequent orchestration
-  jobPollIntervalMs: 100,     // Aggressive job pickup
-  staleJobThresholdMs: 120000, // Longer timeout for long jobs
-});
+const tick = await host.runMaintenanceTick();
+// { claimed, suspendedChecked, staleReleased, eventsFlushed }
+// Resumed suspended stages are automatically followed by run.transition.
 ```
-## Monitoring
-The runtime logs key events to console:
+## Multi-Worker Setup
-```
-[Runtime] Starting worker worker-12345-hostname
-[Runtime] Poll interval: 10000ms, Job poll: 1000ms
-[Runtime] Created WorkflowRun abc123 for document-analysis
-[Runtime] Found 1 pending workflows
-[Runtime] Started workflow abc123
-[Runtime] Processing stage extract for workflow abc123
-[Runtime] Worker worker-12345-hostname: processed 10 jobs
-[Runtime] Workflow abc123 completed
-```
-For production monitoring, integrate with your observability stack:
+Multiple workers can share the same database. Each worker needs a unique `workerId`:
 ```typescript
-// Custom logging
-const originalLog = console.log;
-console.log = (...args) => {
-  if (args[0]?.includes("[Runtime]")) {
-    metrics.increment("workflow.runtime.log");
-    logger.info(args.join(" "));
-  }
-  originalLog(...args);
-};
+// Worker 1
+createNodeHost({ kernel, jobTransport, workerId: "worker-1" });
+// Worker 2
+createNodeHost({ kernel, jobTransport, workerId: "worker-2" });
 ```
+The `claimPendingRun` operation uses `FOR UPDATE SKIP LOCKED` in PostgreSQL to prevent race conditions.

package/skills/workflow-engine/references/05-persistence-setup.md CHANGED Viewed

@@ -39,6 +39,8 @@ The workflow engine uses three persistence interfaces:
 ```typescript
 interface WorkflowPersistence {
+  withTransaction<T>(fn: (tx: WorkflowPersistence) => Promise<T>): Promise<T>;
   // WorkflowRun operations
   createRun(data: CreateRunInput): Promise<WorkflowRunRecord>;
   updateRun(id: string, data: UpdateRunInput): Promise<void>;
@@ -74,6 +76,36 @@ interface WorkflowPersistence {
   // Stage output convenience
   saveStageOutput(runId, workflowType, stageId, output): Promise<string>;
+  // Outbox operations
+  appendOutboxEvents(events: CreateOutboxEventInput[]): Promise<void>;
+  getUnpublishedOutboxEvents(limit?: number): Promise<OutboxRecord[]>;
+  markOutboxEventsPublished(ids: string[]): Promise<void>;
+  incrementOutboxRetryCount(id: string): Promise<number>;
+  moveOutboxEventToDLQ(id: string): Promise<void>;
+  replayDLQEvents(maxEvents: number): Promise<number>;
+  // Idempotency operations
+  acquireIdempotencyKey(key: string, commandType: string): Promise<
+    | { status: "acquired" }
+    | { status: "replay"; result: unknown }
+    | { status: "in_progress" }
+  >;
+  completeIdempotencyKey(key: string, commandType: string, result: unknown): Promise<void>;
+  releaseIdempotencyKey(key: string, commandType: string): Promise<void>;
+}
+```
+```typescript
+interface DequeueResult {
+  jobId: string;
+  workflowRunId: string;
+  workflowId: string;
+  stageId: string;
+  priority: number;
+  attempt: number;
+  maxAttempts: number;
+  payload: Record<string, unknown>;
 }
 ```