npm - @livingdata/pipex - Versions diffs - 0.0.1 - Mend

@livingdata/pipex 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +261 -0
package/dist/cli/index.js +126 -0
package/dist/cli/pipeline-loader.js +87 -0
package/dist/cli/pipeline-runner.js +193 -0
package/dist/cli/reporter.js +78 -0
package/dist/cli/state.js +89 -0
package/dist/cli/types.js +1 -0
package/dist/engine/docker-executor.js +96 -0
package/dist/engine/docker-runtime.js +65 -0
package/dist/engine/executor.js +16 -0
package/dist/engine/index.js +3 -0
package/dist/engine/runtime.js +2 -0
package/dist/engine/types.js +1 -0
package/dist/engine/workspace.js +264 -0
package/dist/index.js +40 -0
package/dist/reporter.js +13 -0
package/package.json +40 -0

package/README.md ADDED Viewed

@@ -0,0 +1,261 @@
+# Pipex
+Execution engine for containerized steps via Docker CLI.
+Runs containers with explicit volume mounts and manages artifacts through a staging/commit lifecycle. Designed to be driven by different orchestrators (CLI included, AI agent planned).
+## Installation
+```bash
+npm install
+cp .env.example .env
+# Edit .env to set PIPEX_WORKDIR if needed (defaults to ./workdir)
+```
+## Prerequisites
+- Node.js 24+
+- Docker CLI installed and accessible
+## Usage
+### Running a pipeline
+```bash
+# Interactive mode (default)
+npm start -- run pipeline.example.json
+# With workspace name (enables caching)
+npm start -- run pipeline.example.json --workspace my-build
+# JSON mode (for CI/CD)
+npm start -- run pipeline.example.json --json
+# Custom workdir
+npm start -- run pipeline.example.json --workdir /tmp/builds
+```
+### Managing workspaces
+```bash
+# List workspaces (with artifact/cache counts)
+npm start -- list
+npm start -- ls --json
+# Remove specific workspaces
+npm start -- rm my-build other-build
+# Remove all workspaces
+npm start -- clean
+```
+### Via npx
+```bash
+# Build first
+npm run build
+# Run locally via npx
+npx . run example/pipeline.json --workspace my-build
+npx . list
+```
+### Commands
+| Command | Description |
+|---------|-------------|
+| `run <pipeline>` | Execute a pipeline |
+| `list` (alias `ls`) | List workspaces |
+| `rm <workspace...>` | Remove one or more workspaces |
+| `clean` | Remove all workspaces |
+### Global Options
+| Option | Description |
+|--------|-------------|
+| `--workdir <path>` | Workspaces root directory (default: `./workdir`) |
+| `--json` | Structured JSON logs instead of interactive UI |
+### Run Options
+| Option | Alias | Description |
+|--------|-------|-------------|
+| `--workspace <name>` | `-w` | Workspace name for caching |
+| `--force [steps]` | `-f` | Skip cache for all steps, or a comma-separated list |
+## Pipeline Format
+Minimal example:
+```json
+{
+  "name": "my-pipeline",
+  "steps": [
+    {
+      "id": "download",
+      "image": "alpine:3.19",
+      "cmd": ["sh", "-c", "echo hello > /output/hello.txt"]
+    },
+    {
+      "id": "process",
+      "image": "alpine:3.19",
+      "cmd": ["cat", "/input/download/hello.txt"],
+      "inputs": [{"step": "download"}]
+    }
+  ]
+}
+```
+### Step Options
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | string | Step identifier (required) |
+| `image` | string | Docker image (required) |
+| `cmd` | string[] | Command to execute (required) |
+| `inputs` | InputSpec[] | Previous steps to mount as read-only |
+| `env` | Record<string, string> | Environment variables |
+| `outputPath` | string | Output mount point (default: `/output`) |
+| `mounts` | MountSpec[] | Host directories to bind mount (read-only) |
+| `caches` | CacheSpec[] | Persistent caches to mount |
+| `timeoutSec` | number | Execution timeout |
+| `allowFailure` | boolean | Continue pipeline if step fails |
+| `allowNetwork` | boolean | Enable network access |
+### Inputs
+Mount previous steps as read-only:
+```json
+"inputs": [
+  {"step": "step1"},
+  {"step": "step2", "copyToOutput": true}
+]
+```
+- Mounted under `/input/{stepName}/`
+- `copyToOutput: true` copies content to output before execution
+### Host Mounts
+Mount host directories into containers as **read-only**:
+```json
+"mounts": [
+  {"host": "src/app", "container": "/app"},
+  {"host": "config", "container": "/config"}
+]
+```
+- `host` must be a **relative** path (resolved from the pipeline file's directory)
+- `container` must be an **absolute** path
+- Neither path can contain `..`
+- Always mounted read-only -- containers cannot modify host files
+This means a pipeline at `/project/ci/pipeline.json` can only mount subdirectories of `/project/ci/`. Use `/tmp` or `/output` inside the container for writes.
+### Caches
+Persistent read-write directories shared across steps and executions:
+```json
+"caches": [
+  {"name": "pnpm-store", "path": "/root/.local/share/pnpm/store"},
+  {"name": "build-cache", "path": "/tmp/cache"}
+]
+```
+- **Persistent**: Caches survive across pipeline executions
+- **Shared**: Multiple steps can use the same cache
+- **Mutable**: Steps can read and write to caches
+Common use cases:
+- Package manager caches (pnpm, npm, cargo, maven)
+- Build caches (gradle, ccache)
+- Downloaded assets
+**Note**: Caches are workspace-scoped (not global). Different workspaces have isolated caches.
+## Example
+The `example/` directory contains a multi-language pipeline that chains Node.js and Python steps:
+```
+example/
+├── pipeline.json
+└── scripts/
+    ├── nodejs/          # lodash-based data analysis
+    │   ├── package.json
+    │   ├── analyze.js
+    │   └── transform.js
+    └── python/          # pyyaml-based enrichment
+        ├── pyproject.toml
+        ├── analyze.py
+        └── transform.py
+```
+The pipeline runs 4 steps: `node-analyze` → `node-transform` → `python-analyze` → `python-transform`. Each step mounts its scripts directory as read-only and passes artifacts to the next step via `/input`.
+```bash
+npm start -- run example/pipeline.json --workspace example-test
+```
+## Caching & Workspaces
+Workspaces enable caching across runs. Name is determined by:
+1. CLI flag `--workspace` (highest priority)
+2. Config `"name"` field
+3. Filename (e.g., `build.json` → `build`)
+4. Auto-generated timestamp
+**Cache behavior**: Steps are skipped if image, cmd, env, inputs, and mounts haven't changed. See code documentation for details.
+## Troubleshooting
+### Docker not found
+```bash
+# Verify Docker is accessible
+docker --version
+docker ps
+```
+### Permission denied (Linux)
+```bash
+sudo usermod -aG docker $USER
+newgrp docker
+```
+### Workspace disk full
+Clean old workspaces:
+```bash
+npm start -- list
+npm start -- rm old-workspace-id
+# Or remove all at once
+npm start -- clean
+```
+### Cached step with missing artifact
+Force re-execution:
+```bash
+rm $PIPEX_WORKDIR/{workspace-id}/state.json
+```
+## Development
+```bash
+npm run build
+npm run lint
+npm run lint:fix
+```
+## Architecture
+For implementation details, see code documentation in:
+- `src/engine/` - Low-level container execution (workspace, executor)
+- `src/cli/` - Pipeline orchestration (runner, loader, state)

package/dist/cli/index.js ADDED Viewed

@@ -0,0 +1,126 @@
+#!/usr/bin/env node
+import 'dotenv/config';
+import process from 'node:process';
+import { resolve } from 'node:path';
+import chalk from 'chalk';
+import { Command } from 'commander';
+import { Workspace } from '../engine/workspace.js';
+import { DockerCliExecutor } from '../engine/docker-executor.js';
+import { PipelineLoader } from './pipeline-loader.js';
+import { PipelineRunner } from './pipeline-runner.js';
+import { ConsoleReporter, InteractiveReporter } from './reporter.js';
+function getGlobalOptions(cmd) {
+    return cmd.optsWithGlobals();
+}
+async function main() {
+    const program = new Command();
+    program
+        .name('pipex')
+        .description('Execution engine for containerized steps')
+        .version('0.1.0')
+        .option('--workdir <path>', 'Workspaces root directory', process.env.PIPEX_WORKDIR ?? './workdir')
+        .option('--json', 'Output structured JSON logs');
+    program
+        .command('run')
+        .description('Execute a pipeline')
+        .argument('<pipeline>', 'Pipeline JSON file to execute')
+        .option('-w, --workspace <name>', 'Workspace name (for caching)')
+        .option('-f, --force [steps]', 'Skip cache for all steps, or a comma-separated list (e.g. --force step1,step2)')
+        .action(async (pipelineFile, options, cmd) => {
+        const { workdir, json } = getGlobalOptions(cmd);
+        const workdirRoot = resolve(workdir);
+        const loader = new PipelineLoader();
+        const runtime = new DockerCliExecutor();
+        const reporter = json ? new ConsoleReporter() : new InteractiveReporter();
+        const runner = new PipelineRunner(loader, runtime, reporter, workdirRoot);
+        try {
+            const force = options.force === true
+                ? true
+                : (typeof options.force === 'string' ? options.force.split(',') : undefined);
+            await runner.run(pipelineFile, { workspace: options.workspace, force });
+            if (json) {
+                console.log('Pipeline completed');
+            }
+        }
+        catch (error) {
+            if (json) {
+                console.error('Pipeline failed:', error instanceof Error ? error.message : error);
+            }
+            throw error;
+        }
+    });
+    program
+        .command('list')
+        .alias('ls')
+        .description('List workspaces')
+        .action(async (_options, cmd) => {
+        const { workdir, json } = getGlobalOptions(cmd);
+        const workdirRoot = resolve(workdir);
+        const names = await Workspace.list(workdirRoot);
+        if (json) {
+            console.log(JSON.stringify(names));
+            return;
+        }
+        if (names.length === 0) {
+            console.log(chalk.gray('No workspaces found.'));
+            return;
+        }
+        const rows = [];
+        for (const name of names) {
+            const ws = await Workspace.open(workdirRoot, name);
+            const artifacts = await ws.listArtifacts();
+            const caches = await ws.listCaches();
+            rows.push({ name, artifacts: artifacts.length, caches: caches.length });
+        }
+        const nameWidth = Math.max('WORKSPACE'.length, ...rows.map(r => r.name.length));
+        const header = `${'WORKSPACE'.padEnd(nameWidth)}  ARTIFACTS  CACHES`;
+        console.log(chalk.bold(header));
+        for (const row of rows) {
+            console.log(`${row.name.padEnd(nameWidth)}  ${String(row.artifacts).padStart(9)}  ${String(row.caches).padStart(6)}`);
+        }
+    });
+    program
+        .command('rm')
+        .description('Remove one or more workspaces')
+        .argument('<workspace...>', 'Workspace names to remove')
+        .action(async (workspaces, _options, cmd) => {
+        const { workdir } = getGlobalOptions(cmd);
+        const workdirRoot = resolve(workdir);
+        const existing = await Workspace.list(workdirRoot);
+        for (const name of workspaces) {
+            if (!existing.includes(name)) {
+                console.error(chalk.red(`Workspace not found: ${name}`));
+                process.exitCode = 1;
+                return;
+            }
+        }
+        for (const name of workspaces) {
+            await Workspace.remove(workdirRoot, name);
+            console.log(chalk.green(`Removed ${name}`));
+        }
+    });
+    program
+        .command('clean')
+        .description('Remove all workspaces')
+        .action(async (_options, cmd) => {
+        const { workdir } = getGlobalOptions(cmd);
+        const workdirRoot = resolve(workdir);
+        const names = await Workspace.list(workdirRoot);
+        if (names.length === 0) {
+            console.log(chalk.gray('No workspaces to clean.'));
+            return;
+        }
+        for (const name of names) {
+            await Workspace.remove(workdirRoot, name);
+        }
+        console.log(chalk.green(`Removed ${names.length} workspace${names.length > 1 ? 's' : ''}.`));
+    });
+    await program.parseAsync();
+}
+try {
+    await main();
+}
+catch (error) {
+    console.error('Fatal error:', error);
+    throw error;
+}

package/dist/cli/pipeline-loader.js ADDED Viewed

@@ -0,0 +1,87 @@
+import { readFile } from 'node:fs/promises';
+export class PipelineLoader {
+    async load(filePath) {
+        const content = await readFile(filePath, 'utf8');
+        const config = JSON.parse(content);
+        if (!Array.isArray(config.steps) || config.steps.length === 0) {
+            throw new Error('Invalid pipeline: steps must be a non-empty array');
+        }
+        for (const step of config.steps) {
+            this.validateStep(step);
+        }
+        return config;
+    }
+    validateStep(step) {
+        if (!step.id || typeof step.id !== 'string') {
+            throw new Error('Invalid step: id is required');
+        }
+        this.validateIdentifier(step.id, 'step id');
+        if (!step.image || typeof step.image !== 'string') {
+            throw new Error(`Invalid step ${step.id}: image is required`);
+        }
+        if (!Array.isArray(step.cmd) || step.cmd.length === 0) {
+            throw new Error(`Invalid step ${step.id}: cmd must be a non-empty array`);
+        }
+        if (step.inputs) {
+            for (const input of step.inputs) {
+                this.validateIdentifier(input.step, `input step name in step ${step.id}`);
+            }
+        }
+        if (step.mounts) {
+            this.validateMounts(step.id, step.mounts);
+        }
+        if (step.caches) {
+            this.validateCaches(step.id, step.caches);
+        }
+    }
+    validateMounts(stepId, mounts) {
+        if (!Array.isArray(mounts)) {
+            throw new TypeError(`Step ${stepId}: mounts must be an array`);
+        }
+        for (const mount of mounts) {
+            if (!mount.host || typeof mount.host !== 'string') {
+                throw new Error(`Step ${stepId}: mount.host is required and must be a string`);
+            }
+            if (mount.host.startsWith('/')) {
+                throw new Error(`Step ${stepId}: mount.host '${mount.host}' must be a relative path`);
+            }
+            if (mount.host.includes('..')) {
+                throw new Error(`Step ${stepId}: mount.host '${mount.host}' must not contain '..'`);
+            }
+            if (!mount.container || typeof mount.container !== 'string') {
+                throw new Error(`Step ${stepId}: mount.container is required and must be a string`);
+            }
+            if (!mount.container.startsWith('/')) {
+                throw new Error(`Step ${stepId}: mount.container '${mount.container}' must be an absolute path`);
+            }
+            if (mount.container.includes('..')) {
+                throw new Error(`Step ${stepId}: mount.container '${mount.container}' must not contain '..'`);
+            }
+        }
+    }
+    validateCaches(stepId, caches) {
+        if (!Array.isArray(caches)) {
+            throw new TypeError(`Step ${stepId}: caches must be an array`);
+        }
+        for (const cache of caches) {
+            if (!cache.name || typeof cache.name !== 'string') {
+                throw new Error(`Step ${stepId}: cache.name is required and must be a string`);
+            }
+            this.validateIdentifier(cache.name, `cache name in step ${stepId}`);
+            if (!cache.path || typeof cache.path !== 'string') {
+                throw new Error(`Step ${stepId}: cache.path is required and must be a string`);
+            }
+            if (!cache.path.startsWith('/')) {
+                throw new Error(`Step ${stepId}: cache.path '${cache.path}' must be an absolute path`);
+            }
+        }
+    }
+    validateIdentifier(id, context) {
+        if (!/^[\w-]+$/.test(id)) {
+            throw new Error(`Invalid ${context}: '${id}' must contain only alphanumeric characters, underscore, and hyphen`);
+        }
+        if (id.includes('..')) {
+            throw new Error(`Invalid ${context}: '${id}' cannot contain '..'`);
+        }
+    }
+}

package/dist/cli/pipeline-runner.js ADDED Viewed

@@ -0,0 +1,193 @@
+import { cp } from 'node:fs/promises';
+import { basename, dirname, resolve } from 'node:path';
+import { Workspace } from '../engine/index.js';
+import { StateManager } from './state.js';
+/**
+ * Orchestrates pipeline execution with dependency resolution and caching.
+ *
+ * ## Workflow
+ *
+ * 1. **Workspace Resolution**: Determines workspace ID from CLI flag, config, or filename
+ * 2. **State Loading**: Loads cached fingerprints from state.json
+ * 3. **Step Execution**: For each step:
+ *    a. Computes fingerprint (image + cmd + env + input artifact IDs)
+ *    b. Checks cache (fingerprint match + artifact exists)
+ *    c. If cached: skips execution
+ *    d. If not cached: resolves inputs, prepares staging, executes container
+ *    e. On success: commits artifact, saves state
+ *    f. On failure: discards artifact, halts pipeline (unless allowFailure)
+ * 4. **Completion**: Reports final pipeline status
+ *
+ * ## Dependencies
+ *
+ * Steps declare dependencies via `inputs: [{step: "stepId"}]`.
+ * The runner:
+ * - Mounts input artifacts as read-only volumes
+ * - Optionally copies inputs to output staging (if `copyToOutput: true`)
+ * - Tracks execution order to resolve step names to artifact IDs
+ *
+ * ## Caching
+ *
+ * Cache invalidation is automatic:
+ * - Changing a step's configuration re-runs it
+ * - Re-running a step invalidates all dependent steps
+ */
+export class PipelineRunner {
+    loader;
+    runtime;
+    reporter;
+    workdirRoot;
+    constructor(loader, runtime, reporter, workdirRoot) {
+        this.loader = loader;
+        this.runtime = runtime;
+        this.reporter = reporter;
+        this.workdirRoot = workdirRoot;
+    }
+    async run(pipelineFilePath, options) {
+        const { workspace: workspaceName, force } = options ?? {};
+        const config = await this.loader.load(pipelineFilePath);
+        const pipelineRoot = dirname(resolve(pipelineFilePath));
+        // Workspace ID priority: CLI arg > config.name > filename
+        const workspaceId = workspaceName
+            ?? config.name
+            ?? basename(pipelineFilePath, '.json').replaceAll(/[^\w-]/g, '-');
+        let workspace;
+        try {
+            workspace = await Workspace.open(this.workdirRoot, workspaceId);
+        }
+        catch {
+            workspace = await Workspace.create(this.workdirRoot, workspaceId);
+        }
+        await workspace.cleanupStaging();
+        await this.runtime.check();
+        const state = new StateManager(workspace.root);
+        await state.load();
+        const stepArtifacts = new Map();
+        this.reporter.state(workspace.id, 'PIPELINE_START');
+        for (const step of config.steps) {
+            const inputArtifactIds = step.inputs
+                ?.map(i => stepArtifacts.get(i.step))
+                .filter((id) => id !== undefined);
+            const resolvedMounts = step.mounts?.map(m => ({
+                hostPath: resolve(pipelineRoot, m.host),
+                containerPath: m.container
+            }));
+            const currentFingerprint = StateManager.fingerprint({
+                image: step.image,
+                cmd: step.cmd,
+                env: step.env,
+                inputArtifactIds,
+                mounts: resolvedMounts
+            });
+            const skipCache = force === true || (Array.isArray(force) && force.includes(step.id));
+            if (!skipCache && await this.tryUseCache({ workspace, state, step, currentFingerprint, stepArtifacts })) {
+                continue;
+            }
+            this.reporter.state(workspace.id, 'STEP_STARTING', step.id);
+            const artifactId = workspace.generateArtifactId();
+            const stagingPath = await workspace.prepareArtifact(artifactId);
+            await this.prepareStagingWithInputs(workspace, step, stagingPath, stepArtifacts);
+            // Prepare caches
+            if (step.caches) {
+                for (const cache of step.caches) {
+                    await workspace.prepareCache(cache.name);
+                }
+            }
+            const { inputs, output, caches, mounts } = this.buildMounts(step, artifactId, stepArtifacts, pipelineRoot);
+            const result = await this.runtime.run(workspace, {
+                name: `pipex-${workspace.id}-${step.id}-${Date.now()}`,
+                image: step.image,
+                cmd: step.cmd,
+                env: step.env,
+                inputs,
+                output,
+                caches,
+                mounts,
+                network: step.allowNetwork ? 'bridge' : 'none',
+                timeoutSec: step.timeoutSec
+            }, ({ stream, line }) => {
+                this.reporter.log(workspace.id, step.id, stream, line);
+            });
+            this.reporter.result(workspace.id, step.id, result);
+            if (result.exitCode === 0 || step.allowFailure) {
+                await workspace.commitArtifact(artifactId);
+                stepArtifacts.set(step.id, artifactId);
+                state.setStep(step.id, artifactId, currentFingerprint);
+                await state.save();
+                this.reporter.state(workspace.id, 'STEP_FINISHED', step.id, { artifactId });
+            }
+            else {
+                await workspace.discardArtifact(artifactId);
+                this.reporter.state(workspace.id, 'STEP_FAILED', step.id, { exitCode: result.exitCode });
+                this.reporter.state(workspace.id, 'PIPELINE_FAILED');
+                throw new Error(`Step ${step.id} failed with exit code ${result.exitCode}`);
+            }
+        }
+        this.reporter.state(workspace.id, 'PIPELINE_FINISHED');
+    }
+    async tryUseCache({ workspace, state, step, currentFingerprint, stepArtifacts }) {
+        const cached = state.getStep(step.id);
+        if (cached?.fingerprint === currentFingerprint) {
+            try {
+                const artifacts = await workspace.listArtifacts();
+                if (artifacts.includes(cached.artifactId)) {
+                    stepArtifacts.set(step.id, cached.artifactId);
+                    this.reporter.state(workspace.id, 'STEP_SKIPPED', step.id, { artifactId: cached.artifactId, reason: 'cached' });
+                    return true;
+                }
+            }
+            catch {
+                // Artifact missing, proceed with execution
+            }
+        }
+        return false;
+    }
+    async prepareStagingWithInputs(workspace, step, stagingPath, stepArtifacts) {
+        if (!step.inputs) {
+            return;
+        }
+        for (const input of step.inputs) {
+            const inputArtifactId = stepArtifacts.get(input.step);
+            if (!inputArtifactId) {
+                throw new Error(`Step ${step.id}: input step '${input.step}' not found or not yet executed`);
+            }
+            if (input.copyToOutput) {
+                await cp(workspace.artifactPath(inputArtifactId), stagingPath, { recursive: true });
+            }
+        }
+    }
+    buildMounts(step, outputArtifactId, stepArtifacts, pipelineRoot) {
+        const inputs = [];
+        if (step.inputs) {
+            for (const input of step.inputs) {
+                const inputArtifactId = stepArtifacts.get(input.step);
+                if (inputArtifactId) {
+                    inputs.push({
+                        artifactId: inputArtifactId,
+                        containerPath: `/input/${input.step}`
+                    });
+                }
+            }
+        }
+        const output = {
+            stagingArtifactId: outputArtifactId,
+            containerPath: step.outputPath ?? '/output'
+        };
+        // Build cache mounts
+        let caches;
+        if (step.caches) {
+            caches = step.caches.map(c => ({
+                name: c.name,
+                containerPath: c.path
+            }));
+        }
+        let mounts;
+        if (step.mounts) {
+            mounts = step.mounts.map(m => ({
+                hostPath: resolve(pipelineRoot, m.host),
+                containerPath: m.container
+            }));
+        }
+        return { inputs, output, caches, mounts };
+    }
+}