npm - @rip-lang/swarm - Versions diffs - 1.0.2 → 1.0.4 - Mend

@rip-lang/swarm 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -4,16 +4,37 @@
 > **Parallel job runner with worker threads — setup once, swarm many**
-Swarm processes large batches of tasks in parallel using Bun's worker
-threads. Define a setup function (runs once) and a perform function
-(runs per task), and swarm handles the rest — worker pool management,
-file-based task lifecycle, ANSI progress bars, crash recovery, and a
-clean summary at the end.
+Swarm is a high-performance batch job engine for Rip. Give it a list of
+tasks and a function to process each one, and it fans out across worker
+threads with real-time progress bars, automatic retries, and a clean
+summary when done. No database, no message broker, no dependencies —
+just files, threads, and message passing.
+## Why This Approach?
+Most job queues add complexity: Redis, RabbitMQ, database-backed queues,
+distributed locks. Swarm takes the opposite approach:
+- **Tasks are files.** A directory listing *is* the queue. You can
+  inspect, add, or remove tasks with basic shell commands.
+- **State is a file move.** `todo/ → done/` is one atomic `rename`.
+  No transactions, no eventual consistency. If the process crashes,
+  unfinished tasks are still in `todo/` — restart and pick up where
+  you left off.
+- **Workers are threads.** Setup runs once in the main thread, context
+  is cloned to N workers via message passing. No shared mutable state,
+  no locks, no deadlocks.
+- **Progress is real-time.** The main thread owns the terminal — ANSI
+  progress bars update live with per-worker stats. Workers never touch
+  stdout.
+The result: ~330 lines of Rip, zero dependencies, and it handles
+thousands of tasks reliably. Boring infrastructure, rock solid.
 ## Quick Start
 ```bash
-bun add -g @rip-lang/swarm
+bun add @rip-lang/swarm        # add to your project
 ```
 Create a job script:
@@ -31,43 +52,61 @@ perform = (task, ctx) ->
   await Bun.sleep(Math.random() * 1000)
   throw new Error("boom") if Math.random() < 0.03
-swarm { setup, perform, workers: 10 }
+swarm { setup, perform }
 ```
 Run it:
 ```bash
-rip jobs.rip
-rip jobs.rip -w 10       # 10 workers (default: CPU count)
+rip jobs.rip                # workers default to CPU count
+rip jobs.rip -w 10          # 10 workers
+rip jobs.rip -w 40          # 40 workers for I/O-heavy jobs
 ```
 ## How It Works
 ```
-┌──────────────────────────────────────────────┐
-│              Single Bun Process              │
-│                                              │
-│  Main Thread          Worker Threads         │
-│  ───────────          ──────────────         │
-│  setup()              perform(task, ctx)     │
-│  task dispatch        perform(task, ctx)     │
-│  progress bars        perform(task, ctx)     │
-│  file lifecycle       ...                    │
-│                                              │
-│  .swarm/todo/ ──→ .swarm/done/               │
-│                └──→ .swarm/died/             │
-└──────────────────────────────────────────────┘
+┌──────────────────────────────────────────────────┐
+│                Single Bun Process                │
+│                                                  │
+│  Main Thread              Worker Threads (N)     │
+│  ──────────              ──────────────────      │
+│  setup() runs once        each loads your script │
+│  creates .swarm/todo/*    receives tasks via IPC │
+│  dispatches tasks         calls perform(task)    │
+│  renders progress bars    reports done/failed    │
+│  moves files atomically   stays alive for more   │
+│                                                  │
+│  .swarm/todo/42 ──rename──→ .swarm/done/42       │
+│                 ──rename──→ .swarm/died/42       │
+└──────────────────────────────────────────────────┘
 ```
-1. **setup()** runs once in the main thread — creates tasks and returns
-   an optional context object
+1. **`setup()`** runs once in the main thread — creates task files and
+   returns an optional context object (auth tokens, config, paths)
 2. **N worker threads** are spawned — each loads your script and gets
-   the `perform` function
+   the `perform` function. Workers are long-lived and process many tasks
 3. Tasks are dispatched from `.swarm/todo/` to workers via message passing
-4. Workers call `perform(task, ctx)` and report done or failed
-5. Main thread moves files to `.swarm/done/` or `.swarm/died/` and
-   updates the progress display
-6. When all tasks complete, a summary is printed
+4. Workers call `perform(task, ctx)` — on success the file moves to
+   `done/`, on failure it moves to `died/`
+5. ANSI progress bars update live — per-worker throughput and overall
+   completion. When done, per-worker stats are shown
+6. If tasks died, just run it again — `retry()` moves them back to
+   `todo/` and only those tasks are reprocessed
+## Task Lifecycle
+```
+.swarm/
+├── todo/       ← tasks waiting to be processed
+├── done/       ← completed successfully
+└── died/       ← failed (retryable)
+```
+Tasks are plain files. The filename identifies the task (e.g., `000315`,
+`2024-01-15`, `amazon.json`). Files can be empty (filename is the data)
+or contain a payload that `perform` reads. File moves use `renameSync`
+— atomic on the same filesystem, no partial states.
 ## API
@@ -89,12 +128,13 @@ swarm { setup, perform }
 swarm { setup, perform, workers: 8, bar: 30, char: '█' }
 ```
-Options:
-- **setup** — function, runs once in main thread, returns optional context
-- **perform** — function `(taskPath, ctx)`, runs in worker threads
-- **workers** — number of worker threads (default: CPU count)
-- **bar** — progress bar width in characters (default: 20)
-- **char** — character for progress bars (default: `•`)
+| Option | Description | Default |
+|--------|-------------|---------|
+| **setup** | Runs once in main thread, returns optional context | — |
+| **perform** | `(taskPath, ctx)` — runs in worker threads | required |
+| **workers** | Number of worker threads | CPU count |
+| **bar** | Progress bar width in characters | 20 |
+| **char** | Character for progress bars | `•` |
 ### CLI Flags
@@ -107,39 +147,70 @@ Options:
 CLI flags override options passed to `swarm()`.
-## Task Lifecycle
+### args()
-```
-.swarm/
-├── todo/       ← tasks waiting to be processed
-├── done/       ← successfully completed tasks
-└── died/       ← failed tasks (can be retried)
-```
+Swarm also exports `args()` which returns `process.argv` with all
+swarm flags stripped — only your script's positional arguments remain:
+```coffee
+import { swarm, args } from '@rip-lang/swarm'
-Tasks are plain files. The filename identifies the task. Files can be
-empty (filename is the data) or contain a payload (JSON, text, etc.).
-File moves use `renameSync` for atomic operations.
+inputFile = args()[0]    # first non-swarm argument
+```
 ## Crash Recovery
 | Failure | What Happens | Recovery |
 |---------|-------------|----------|
-| `perform()` throws | Worker catches it, reports failed, continues | Automatic |
+| `perform()` throws | Worker catches it, reports failed, picks up next task | Automatic |
 | Unhandled rejection | Worker error handler fires, continues | Automatic |
 | Worker thread dies | Main thread detects exit, respawns worker | Automatic |
-| Task timeout | (planned) AbortSignal kills task | Worker continues |
+| Process killed (Ctrl+C) | Unfinished tasks remain in `todo/`, cursor restored | Re-run to continue |
 Failed tasks land in `.swarm/died/`. Call `retry()` in your next
-`setup()` to move them back to `.swarm/todo/` for reprocessing.
-## Comparison with vete (Ruby)
-| Feature | vete (Ruby) | swarm (Rip/Bun) |
-|---------|------------|-----------------|
-| Parallelism | fork() per task | Worker threads (reused) |
-| Setup | Runs once (fork shares memory) | Runs once (context cloned) |
-| Per-task overhead | ~100μs (fork) | ~0 (message passing) |
-| Crash recovery | Process dies, slot freed | Exception caught, worker continues |
-| Timeout support | None | Planned (AbortSignal) |
-| Default workers | 1 | CPU count |
-| Dependencies | fileutils, optparse, thread | Zero (Bun builtins) |
+`setup()` to move them back for reprocessing — only the failed tasks
+run, not the entire batch.
+## Real-World Example
+Downloading 15,000 lab test definitions from an API with 40 workers:
+```coffee
+import { swarm, args, init, retry, todo } from '@rip-lang/swarm'
+import { isMainThread } from 'worker_threads'
+import { readFileSync, existsSync, mkdirSync } from 'fs'
+import { join, resolve } from 'path'
+TESTS_FILE = null
+if isMainThread
+  TESTS_FILE = args()[0]
+setup = ->
+  unless retry()
+    init()
+    lines = readFileSync(TESTS_FILE, 'utf-8').trim().split('\n')
+    for code in lines then todo(code.trim()) if code.trim()
+  outDir = resolve('../data/tests')
+  mkdirSync(outDir, { recursive: true })
+  auth = readFileSync(resolve('.auth'), 'utf-8')
+  xibm = auth.match(/^X-IBM-Client-Id=(.*)$/m)?[1]
+  cook = auth.match(/^lch-authorization_ACC=.*$/m)?[0]
+  { xibm, cook, outDir }
+perform = (task, ctx) ->
+  code = task.split('/').pop()
+  return if existsSync(join(ctx.outDir, "#{code}.json"))
+  resp = await fetch "https://api.example.com/tests/#{code}",
+    method: 'POST'
+    headers: { 'Cookie': ctx.cook }
+    body: JSON.stringify { testCode: code }
+  throw new Error("HTTP #{resp.status}") unless resp.ok
+  await Bun.write(join(ctx.outDir, "#{code}.json"), await resp.text())
+swarm { setup, perform }
+```
+```bash
+rip download-tests.rip tests.txt -w 40
+# 15,000 tests across 40 workers — finishes in minutes
+```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@rip-lang/swarm",
-  "version": "1.0.2",
+  "version": "1.0.4",
   "description": "Parallel job runner with worker threads — setup once, swarm many",
   "type": "module",
   "main": "swarm.rip",

package/swarm.rip CHANGED Viewed

@@ -160,6 +160,7 @@ export swarm = (opts = {}) ->
   barw    = parseInt(findArg(args, '-b', '--bar'))     or opts.bar    or 20
   char    = findArg(args, '-c', '--char')              or opts.char   or '•'
   doreset = args.includes('-r') or args.includes('--reset')
+  dosafe  = args.includes('-s') or args.includes('--safe')
   if workers < 1
     console.error 'error: workers must be at least 1'
@@ -182,6 +183,7 @@ export swarm = (opts = {}) ->
   if typeof opts.setup is 'function'
     result = await opts.setup()
     context = result if result? and typeof result is 'object'
+  context.safe = dosafe
   # read task list
   unless existsSync(_todo)
@@ -200,12 +202,17 @@ export swarm = (opts = {}) ->
   workerPath = join(dirname(new URL(import.meta.url).pathname), 'lib', 'worker.mjs')
   scriptPath = resolve(process.argv[1] or '')
-  # find rip-loader for workers
+  # find rip-loader for workers (check local, then global)
   loaderPath = null
   try
     loaderPath = join(dirname(require.resolve('rip-lang')), '..', 'rip-loader.js')
   catch
-    null
+    # fall back to global install
+    try
+      globalDir = join(process.env.HOME or '', '.bun', 'install', 'global', 'node_modules', 'rip-lang')
+      loaderPath = join(globalDir, 'rip-loader.js') if existsSync(join(globalDir, 'rip-loader.js'))
+    catch
+      null
   # state
   live    = 0
@@ -339,7 +346,7 @@ export swarm = (opts = {}) ->
 # flags that swarm consumes (with value)
 _flagsWithValue = ['-w', '--workers', '-b', '--bar', '-c', '--char']
 # flags that swarm consumes (standalone)
-_flagsAlone     = ['-r', '--reset', '-h', '--help', '-v', '--version']
+_flagsAlone     = ['-r', '--reset', '-s', '--safe', '-h', '--help', '-v', '--version']
 findArg = (args, short, long) ->
   for arg, i in args