@rip-lang/swarm 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +131 -60
  2. package/package.json +1 -1
  3. package/swarm.rip +10 -3
package/README.md CHANGED
@@ -4,16 +4,37 @@
4
4
 
5
5
  > **Parallel job runner with worker threads — setup once, swarm many**
6
6
 
7
- Swarm processes large batches of tasks in parallel using Bun's worker
8
- threads. Define a setup function (runs once) and a perform function
9
- (runs per task), and swarm handles the rest worker pool management,
10
- file-based task lifecycle, ANSI progress bars, crash recovery, and a
11
- clean summary at the end.
7
+ Swarm is a high-performance batch job engine for Rip. Give it a list of
8
+ tasks and a function to process each one, and it fans out across worker
9
+ threads with real-time progress bars, automatic retries, and a clean
10
+ summary when done. No database, no message broker, no dependencies
11
+ just files, threads, and message passing.
12
+
13
+ ## Why This Approach?
14
+
15
+ Most job queues add complexity: Redis, RabbitMQ, database-backed queues,
16
+ distributed locks. Swarm takes the opposite approach:
17
+
18
+ - **Tasks are files.** A directory listing *is* the queue. You can
19
+ inspect, add, or remove tasks with basic shell commands.
20
+ - **State is a file move.** `todo/ → done/` is one atomic `rename`.
21
+ No transactions, no eventual consistency. If the process crashes,
22
+ unfinished tasks are still in `todo/` — restart and pick up where
23
+ you left off.
24
+ - **Workers are threads.** Setup runs once in the main thread, context
25
+ is cloned to N workers via message passing. No shared mutable state,
26
+ no locks, no deadlocks.
27
+ - **Progress is real-time.** The main thread owns the terminal — ANSI
28
+ progress bars update live with per-worker stats. Workers never touch
29
+ stdout.
30
+
31
+ The result: ~330 lines of Rip, zero dependencies, and it handles
32
+ thousands of tasks reliably. Boring infrastructure, rock solid.
12
33
 
13
34
  ## Quick Start
14
35
 
15
36
  ```bash
16
- bun add -g @rip-lang/swarm
37
+ bun add @rip-lang/swarm # add to your project
17
38
  ```
18
39
 
19
40
  Create a job script:
@@ -31,43 +52,61 @@ perform = (task, ctx) ->
31
52
  await Bun.sleep(Math.random() * 1000)
32
53
  throw new Error("boom") if Math.random() < 0.03
33
54
 
34
- swarm { setup, perform, workers: 10 }
55
+ swarm { setup, perform }
35
56
  ```
36
57
 
37
58
  Run it:
38
59
 
39
60
  ```bash
40
- rip jobs.rip
41
- rip jobs.rip -w 10 # 10 workers (default: CPU count)
61
+ rip jobs.rip # workers default to CPU count
62
+ rip jobs.rip -w 10 # 10 workers
63
+ rip jobs.rip -w 40 # 40 workers for I/O-heavy jobs
42
64
  ```
43
65
 
44
66
  ## How It Works
45
67
 
46
68
  ```
47
- ┌──────────────────────────────────────────────┐
48
- Single Bun Process
49
-
50
- │ Main Thread Worker Threads
51
- ─────────── ──────────────
52
- │ setup() perform(task, ctx)
53
- task dispatch perform(task, ctx)
54
- progress bars perform(task, ctx)
55
- file lifecycle ...
56
-
57
- .swarm/todo/ ──→ .swarm/done/
58
- └──→ .swarm/died/
59
- └──────────────────────────────────────────────┘
69
+ ┌──────────────────────────────────────────────────┐
70
+ Single Bun Process
71
+
72
+ │ Main Thread Worker Threads (N)
73
+ ────────── ──────────────────
74
+ │ setup() runs once each loads your script
75
+ creates .swarm/todo/* receives tasks via IPC
76
+ dispatches tasks calls perform(task)
77
+ renders progress bars reports done/failed
78
+ moves files atomically stays alive for more
79
+
80
+ .swarm/todo/42 ──rename──→ .swarm/done/42
81
+ │ ──rename──→ .swarm/died/42 │
82
+ └──────────────────────────────────────────────────┘
60
83
  ```
61
84
 
62
- 1. **setup()** runs once in the main thread — creates tasks and returns
63
- an optional context object
85
+ 1. **`setup()`** runs once in the main thread — creates task files and
86
+ returns an optional context object (auth tokens, config, paths)
64
87
  2. **N worker threads** are spawned — each loads your script and gets
65
- the `perform` function
88
+ the `perform` function. Workers are long-lived and process many tasks
66
89
  3. Tasks are dispatched from `.swarm/todo/` to workers via message passing
67
- 4. Workers call `perform(task, ctx)` and report done or failed
68
- 5. Main thread moves files to `.swarm/done/` or `.swarm/died/` and
69
- updates the progress display
70
- 6. When all tasks complete, a summary is printed
90
+ 4. Workers call `perform(task, ctx)` on success the file moves to
91
+ `done/`, on failure it moves to `died/`
92
+ 5. ANSI progress bars update live — per-worker throughput and overall
93
+ completion. When done, per-worker stats are shown
94
+ 6. If tasks died, just run it again — `retry()` moves them back to
95
+ `todo/` and only those tasks are reprocessed
96
+
97
+ ## Task Lifecycle
98
+
99
+ ```
100
+ .swarm/
101
+ ├── todo/ ← tasks waiting to be processed
102
+ ├── done/ ← completed successfully
103
+ └── died/ ← failed (retryable)
104
+ ```
105
+
106
+ Tasks are plain files. The filename identifies the task (e.g., `000315`,
107
+ `2024-01-15`, `amazon.json`). Files can be empty (filename is the data)
108
+ or contain a payload that `perform` reads. File moves use `renameSync`
109
+ — atomic on the same filesystem, no partial states.
71
110
 
72
111
  ## API
73
112
 
@@ -89,12 +128,13 @@ swarm { setup, perform }
89
128
  swarm { setup, perform, workers: 8, bar: 30, char: '█' }
90
129
  ```
91
130
 
92
- Options:
93
- - **setup** — function, runs once in main thread, returns optional context
94
- - **perform** function `(taskPath, ctx)`, runs in worker threads
95
- - **workers** — number of worker threads (default: CPU count)
96
- - **bar** progress bar width in characters (default: 20)
97
- - **char** character for progress bars (default: `•`)
131
+ | Option | Description | Default |
132
+ |--------|-------------|---------|
133
+ | **setup** | Runs once in main thread, returns optional context | — |
134
+ | **perform** | `(taskPath, ctx)` runs in worker threads | required |
135
+ | **workers** | Number of worker threads | CPU count |
136
+ | **bar** | Progress bar width in characters | 20 |
137
+ | **char** | Character for progress bars | `•` |
98
138
 
99
139
  ### CLI Flags
100
140
 
@@ -107,39 +147,70 @@ Options:
107
147
 
108
148
  CLI flags override options passed to `swarm()`.
109
149
 
110
- ## Task Lifecycle
150
+ ### args()
111
151
 
112
- ```
113
- .swarm/
114
- ├── todo/ ← tasks waiting to be processed
115
- ├── done/ ← successfully completed tasks
116
- └── died/ ← failed tasks (can be retried)
117
- ```
152
+ Swarm also exports `args()` which returns `process.argv` with all
153
+ swarm flags stripped — only your script's positional arguments remain:
154
+
155
+ ```coffee
156
+ import { swarm, args } from '@rip-lang/swarm'
118
157
 
119
- Tasks are plain files. The filename identifies the task. Files can be
120
- empty (filename is the data) or contain a payload (JSON, text, etc.).
121
- File moves use `renameSync` for atomic operations.
158
+ inputFile = args()[0] # first non-swarm argument
159
+ ```
122
160
 
123
161
  ## Crash Recovery
124
162
 
125
163
  | Failure | What Happens | Recovery |
126
164
  |---------|-------------|----------|
127
- | `perform()` throws | Worker catches it, reports failed, continues | Automatic |
165
+ | `perform()` throws | Worker catches it, reports failed, picks up next task | Automatic |
128
166
  | Unhandled rejection | Worker error handler fires, continues | Automatic |
129
167
  | Worker thread dies | Main thread detects exit, respawns worker | Automatic |
130
- | Task timeout | (planned) AbortSignal kills task | Worker continues |
168
+ | Process killed (Ctrl+C) | Unfinished tasks remain in `todo/`, cursor restored | Re-run to continue |
131
169
 
132
170
  Failed tasks land in `.swarm/died/`. Call `retry()` in your next
133
- `setup()` to move them back to `.swarm/todo/` for reprocessing.
134
-
135
- ## Comparison with vete (Ruby)
136
-
137
- | Feature | vete (Ruby) | swarm (Rip/Bun) |
138
- |---------|------------|-----------------|
139
- | Parallelism | fork() per task | Worker threads (reused) |
140
- | Setup | Runs once (fork shares memory) | Runs once (context cloned) |
141
- | Per-task overhead | ~100μs (fork) | ~0 (message passing) |
142
- | Crash recovery | Process dies, slot freed | Exception caught, worker continues |
143
- | Timeout support | None | Planned (AbortSignal) |
144
- | Default workers | 1 | CPU count |
145
- | Dependencies | fileutils, optparse, thread | Zero (Bun builtins) |
171
+ `setup()` to move them back for reprocessing only the failed tasks
172
+ run, not the entire batch.
173
+
174
+ ## Real-World Example
175
+
176
+ Downloading 15,000 lab test definitions from an API with 40 workers:
177
+
178
+ ```coffee
179
+ import { swarm, args, init, retry, todo } from '@rip-lang/swarm'
180
+ import { isMainThread } from 'worker_threads'
181
+ import { readFileSync, existsSync, mkdirSync } from 'fs'
182
+ import { join, resolve } from 'path'
183
+
184
+ TESTS_FILE = null
185
+ if isMainThread
186
+ TESTS_FILE = args()[0]
187
+
188
+ setup = ->
189
+ unless retry()
190
+ init()
191
+ lines = readFileSync(TESTS_FILE, 'utf-8').trim().split('\n')
192
+ for code in lines then todo(code.trim()) if code.trim()
193
+ outDir = resolve('../data/tests')
194
+ mkdirSync(outDir, { recursive: true })
195
+ auth = readFileSync(resolve('.auth'), 'utf-8')
196
+ xibm = auth.match(/^X-IBM-Client-Id=(.*)$/m)?[1]
197
+ cook = auth.match(/^lch-authorization_ACC=.*$/m)?[0]
198
+ { xibm, cook, outDir }
199
+
200
+ perform = (task, ctx) ->
201
+ code = task.split('/').pop()
202
+ return if existsSync(join(ctx.outDir, "#{code}.json"))
203
+ resp = await fetch "https://api.example.com/tests/#{code}",
204
+ method: 'POST'
205
+ headers: { 'Cookie': ctx.cook }
206
+ body: JSON.stringify { testCode: code }
207
+ throw new Error("HTTP #{resp.status}") unless resp.ok
208
+ await Bun.write(join(ctx.outDir, "#{code}.json"), await resp.text())
209
+
210
+ swarm { setup, perform }
211
+ ```
212
+
213
+ ```bash
214
+ rip download-tests.rip tests.txt -w 40
215
+ # 15,000 tests across 40 workers — finishes in minutes
216
+ ```
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rip-lang/swarm",
3
- "version": "1.0.2",
3
+ "version": "1.0.4",
4
4
  "description": "Parallel job runner with worker threads — setup once, swarm many",
5
5
  "type": "module",
6
6
  "main": "swarm.rip",
package/swarm.rip CHANGED
@@ -160,6 +160,7 @@ export swarm = (opts = {}) ->
160
160
  barw = parseInt(findArg(args, '-b', '--bar')) or opts.bar or 20
161
161
  char = findArg(args, '-c', '--char') or opts.char or '•'
162
162
  doreset = args.includes('-r') or args.includes('--reset')
163
+ dosafe = args.includes('-s') or args.includes('--safe')
163
164
 
164
165
  if workers < 1
165
166
  console.error 'error: workers must be at least 1'
@@ -182,6 +183,7 @@ export swarm = (opts = {}) ->
182
183
  if typeof opts.setup is 'function'
183
184
  result = await opts.setup()
184
185
  context = result if result? and typeof result is 'object'
186
+ context.safe = dosafe
185
187
 
186
188
  # read task list
187
189
  unless existsSync(_todo)
@@ -200,12 +202,17 @@ export swarm = (opts = {}) ->
200
202
  workerPath = join(dirname(new URL(import.meta.url).pathname), 'lib', 'worker.mjs')
201
203
  scriptPath = resolve(process.argv[1] or '')
202
204
 
203
- # find rip-loader for workers
205
+ # find rip-loader for workers (check local, then global)
204
206
  loaderPath = null
205
207
  try
206
208
  loaderPath = join(dirname(require.resolve('rip-lang')), '..', 'rip-loader.js')
207
209
  catch
208
- null
210
+ # fall back to global install
211
+ try
212
+ globalDir = join(process.env.HOME or '', '.bun', 'install', 'global', 'node_modules', 'rip-lang')
213
+ loaderPath = join(globalDir, 'rip-loader.js') if existsSync(join(globalDir, 'rip-loader.js'))
214
+ catch
215
+ null
209
216
 
210
217
  # state
211
218
  live = 0
@@ -339,7 +346,7 @@ export swarm = (opts = {}) ->
339
346
  # flags that swarm consumes (with value)
340
347
  _flagsWithValue = ['-w', '--workers', '-b', '--bar', '-c', '--char']
341
348
  # flags that swarm consumes (standalone)
342
- _flagsAlone = ['-r', '--reset', '-h', '--help', '-v', '--version']
349
+ _flagsAlone = ['-r', '--reset', '-s', '--safe', '-h', '--help', '-v', '--version']
343
350
 
344
351
  findArg = (args, short, long) ->
345
352
  for arg, i in args