@nxtedition/shared 3.0.0 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2022 nxtedition
3
+ Copyright (c) nxtedition
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
package/README.md CHANGED
@@ -1,50 +1,231 @@
1
- # shared
1
+ # @nxtedition/shared
2
2
 
3
- Ring Buffer for NodeJS cross Worker communication.
3
+ A high-performance, thread-safe ring buffer for inter-thread communication in Node.js using `SharedArrayBuffer`.
4
+
5
+ ## Why
6
+
7
+ Passing data between worker threads in Node.js typically involves structured cloning or transferring `ArrayBuffer` ownership. Structured cloning copies every byte — fine for occasional messages, but a bottleneck when streaming megabytes per second between threads. Transferable objects avoid the copy, but each transfer still requires allocating a new `ArrayBuffer`, serializing the transfer list, and coordinating ownership between threads — overhead that adds up quickly in high-throughput scenarios.
8
+
9
+ This ring buffer avoids these problems. A single `SharedArrayBuffer` is mapped into both threads. The writer appends messages by advancing a write pointer; the reader consumes them by advancing a read pointer. No copies, no ownership transfers, no cloning overhead. Because messages are stored inline in a contiguous buffer, data lives right where the protocol is rather than scattered across separately allocated `ArrayBuffer`s — keeping access patterns cache-friendly. The pointers are coordinated with `Atomics` operations, and cache-line-aligned to prevent false sharing between CPU cores.
10
+
11
+ Reads are zero-copy: the reader callback receives a `DataView` directly into the shared buffer, so parsing can happen in-place without allocating intermediate buffers. Writes are batched — the write pointer is only published to the reader after a high-water mark is reached or the current event loop tick ends, drastically reducing the frequency of expensive atomic stores.
12
+
13
+ ## Platform Assumptions
14
+
15
+ This library assumes that unaligned 32-bit reads and writes will not tear on the target platform. This holds true on x86/x64 and ARM64, which are the primary targets for Node.js.
4
16
 
5
17
  ## Install
6
18
 
7
- ```
8
- npm i @nxtedition/shared
19
+ ```sh
20
+ npm install @nxtedition/shared
9
21
  ```
10
22
 
11
- ## Quick Start
23
+ ## Usage
12
24
 
13
25
  ```js
14
- // index.js
26
+ import { alloc, reader, writer } from '@nxtedition/shared'
27
+
28
+ // Allocate shared memory (pass these buffers to a worker thread)
29
+ const { sharedState, sharedBuffer } = alloc(1024 * 1024) // 1 MB ring buffer
15
30
 
16
- import * as shared from '@nxtedition/shared'
17
- import tp from 'timers/promise'
31
+ // --- Writer side (e.g. main thread) ---
32
+ const w = writer({ sharedState, sharedBuffer })
18
33
 
19
- const writer = shared.alloc(16 * 1024 * 1024)
20
- const reader = shared.alloc(16 * 1024 * 1024)
34
+ const payload = Buffer.from('hello world')
35
+ w.writeSync(payload.length, (data) => {
36
+ payload.copy(data.buffer, data.offset)
37
+ return data.offset + payload.length
38
+ })
39
+
40
+ // --- Reader side (e.g. worker thread) ---
41
+ const r = reader({ sharedState, sharedBuffer })
21
42
 
22
- const worker = new Worker(new URL('worker.js', import.meta.url), {
23
- workerData: { reader, writer },
43
+ r.readSome((data) => {
44
+ const msg = data.buffer.subarray(data.offset, data.offset + data.length).toString()
45
+ console.log(msg) // 'hello world'
24
46
  })
47
+ ```
48
+
49
+ ### Batching writes with cork
25
50
 
26
- const writeToWorker = shared.writer(reader)
51
+ ```js
52
+ w.cork(() => {
53
+ for (const item of items) {
54
+ const buf = Buffer.from(JSON.stringify(item))
55
+ w.writeSync(buf.length, (data) => {
56
+ buf.copy(data.buffer, data.offset)
57
+ return data.offset + buf.length
58
+ })
59
+ }
60
+ })
61
+ // All writes flushed atomically when cork returns
62
+ ```
27
63
 
28
- writeToWorker(Buffer.from('ping'))
64
+ ### Non-blocking writes with tryWrite
29
65
 
30
- for await (const buffer of shared.reader(writer)) {
31
- console.log(`From worker ${buffer}`)
32
- await tp.setTimeout(1e3) // Backpressure
33
- writeToWorker(Buffer.from('pong'))
66
+ ```js
67
+ const buf = Buffer.from('data')
68
+ const ok = w.tryWrite(buf.length, (data) => {
69
+ buf.copy(data.buffer, data.offset)
70
+ return data.offset + buf.length
71
+ })
72
+ if (!ok) {
73
+ // Buffer is full — the reader hasn't caught up yet
34
74
  }
35
75
  ```
36
76
 
77
+ ### Cross-thread usage
78
+
37
79
  ```js
38
- // worker.js
80
+ // main.js
81
+ import { alloc, writer } from '@nxtedition/shared'
82
+ import { Worker } from 'node:worker_threads'
83
+
84
+ const { sharedState, sharedBuffer } = alloc(1024 * 1024)
85
+ const worker = new Worker('./reader-worker.js', {
86
+ workerData: { sharedState, sharedBuffer },
87
+ })
39
88
 
40
- import * as shared from '@nxtedition/shared'
41
- import tp from 'timers/promise'
89
+ const w = writer({ sharedState, sharedBuffer })
90
+ // ... write messages
91
+ ```
92
+
93
+ ```js
94
+ // reader-worker.js
95
+ import { reader } from '@nxtedition/shared'
96
+ import { workerData } from 'node:worker_threads'
42
97
 
43
- const writeToParent = shared.writer(workerData.writer)
98
+ const r = reader(workerData)
44
99
 
45
- for await (const buffer of shared.reader(workerData.reader)) {
46
- console.log(`From parent ${buffer}`)
47
- await tp.setTimeout(1e3) // Backpressure
48
- writeToWorker(Buffer.from('pong'))
100
+ function poll() {
101
+ const count = r.readSome((data) => {
102
+ // process data.buffer at data.offset..data.offset+data.length
103
+ })
104
+ setImmediate(poll)
49
105
  }
106
+ poll()
107
+ ```
108
+
109
+ ## API
110
+
111
+ ### `alloc(size: number): SharedBuffers`
112
+
113
+ Allocates the shared memory buffers for a ring buffer of the given byte size.
114
+
115
+ - **size** — Buffer capacity in bytes (must be a positive integer, max ~2 GB)
116
+ - Returns `{ sharedState: SharedArrayBuffer, sharedBuffer: SharedArrayBuffer }`
117
+
118
+ ### `reader(buffers: SharedBuffers): Reader`
119
+
120
+ Creates a reader for the ring buffer.
121
+
122
+ #### `reader.readSome(next): number`
123
+
124
+ Reads a batch of messages. Calls `next(data)` for each message, where `data` has:
125
+
126
+ - `buffer: Buffer` — The underlying shared buffer
127
+ - `view: DataView` — A DataView over the shared buffer
128
+ - `offset: number` — Start offset of the message payload
129
+ - `length: number` — Length of the message payload in bytes
130
+
131
+ Return `false` from the callback to stop reading early. Returns the number of messages processed.
132
+
133
+ Messages are batched: up to 1024 items or 256 KiB per call.
134
+
135
+ ### `writer(buffers: SharedBuffers, options?): Writer`
136
+
137
+ Creates a writer for the ring buffer.
138
+
139
+ **Options:**
140
+
141
+ - `yield?: () => void` — Called when the writer must wait for the reader to catch up. Useful to prevent deadlocks when the writer thread also drives the reader.
142
+ - `logger?: { warn(obj, msg): void }` — Logger for yield warnings (pino-compatible).
143
+
144
+ #### `writer.writeSync(len, fn, timeout?): void`
145
+
146
+ Synchronously writes a message. Blocks (via `Atomics.wait`) until buffer space is available.
147
+
148
+ - **len** — Maximum payload size in bytes. Writing beyond `len` bytes in the callback is undefined behavior.
149
+ - **fn(data) → number** — Write callback. Write payload into `data.buffer` starting at `data.offset`. **Must return the end position** (`data.offset + bytesWritten`), not the byte count.
150
+ - **timeout** — Max wait time in ms (default: 60000). Throws on timeout.
151
+
152
+ #### `writer.tryWrite(len, fn): boolean`
153
+
154
+ Non-blocking write attempt. Returns `false` if the buffer is full. The `fn` callback follows the same contract as `writeSync`.
155
+
156
+ #### `writer.cork(callback): T`
157
+
158
+ Batches multiple writes within the callback. The write pointer is only published to the reader when `cork` returns, reducing atomic operation overhead.
159
+
160
+ ## Benchmarks
161
+
162
+ Measured on Apple M3 Pro (3.51 GHz), Node.js 25.6.1, 8 MiB ring buffer.
163
+
164
+ Each benchmark writes batches of fixed-size messages from the main thread and
165
+ reads them in a worker thread. The shared ring buffer is compared against
166
+ Node.js `postMessage` (structured clone). Hardware performance counters were
167
+ collected with [`@mitata/counters`](https://github.com/evanwashere/mitata).
168
+
169
+ ### Throughput
170
+
171
+ | Size | shared (buffer) | shared (string) | postMessage (buffer) | postMessage (string) |
172
+ | -----: | --------------: | --------------: | -------------------: | -------------------: |
173
+ | 64 B | **1.07 GiB/s** | 793 MiB/s | 93 MiB/s | 117 MiB/s |
174
+ | 256 B | **2.98 GiB/s** | 2.56 GiB/s | 259 MiB/s | 391 MiB/s |
175
+ | 1 KiB | 4.65 GiB/s | **7.52 GiB/s** | 1.24 GiB/s | 1.68 GiB/s |
176
+ | 4 KiB | 4.94 GiB/s | **16.38 GiB/s** | 3.77 GiB/s | 4.84 GiB/s |
177
+ | 16 KiB | 5.25 GiB/s | **22.33 GiB/s** | 8.54 GiB/s | 9.65 GiB/s |
178
+ | 64 KiB | 5.53 GiB/s | **19.86 GiB/s** | 10.94 GiB/s | 12.25 GiB/s |
179
+
180
+ ### Message rate
181
+
182
+ | Size | shared (buffer) | shared (string) | postMessage (buffer) | postMessage (string) |
183
+ | -----: | --------------: | --------------: | -------------------: | -------------------: |
184
+ | 64 B | **17.99 M/s** | 12.99 M/s | 1.53 M/s | 1.92 M/s |
185
+ | 256 B | **12.50 M/s** | 10.73 M/s | 1.06 M/s | 1.60 M/s |
186
+ | 1 KiB | 4.87 M/s | **7.88 M/s** | 1.30 M/s | 1.76 M/s |
187
+ | 4 KiB | 1.30 M/s | **4.29 M/s** | 989 K/s | 1.27 M/s |
188
+ | 16 KiB | 344 K/s | **1.46 M/s** | 560 K/s | 632 K/s |
189
+ | 64 KiB | 91 K/s | **325 K/s** | 179 K/s | 201 K/s |
190
+
191
+ ### CPU efficiency (instructions per cycle)
192
+
193
+ | Size | shared (buffer) | shared (string) | postMessage (buffer) | postMessage (string) |
194
+ | -----: | --------------: | --------------: | -------------------: | -------------------: |
195
+ | 64 B | 4.80 | 5.79 | 3.91 | 3.37 |
196
+ | 256 B | 4.46 | 5.98 | 3.48 | 3.06 |
197
+ | 1 KiB | 4.17 | **6.29** | 3.63 | 3.15 |
198
+ | 4 KiB | 3.75 | **6.72** | 3.38 | 2.83 |
199
+ | 16 KiB | 3.80 | **6.03** | 2.74 | 2.86 |
200
+ | 64 KiB | 3.96 | **4.57** | 2.43 | 2.93 |
201
+
202
+ ### Key findings
203
+
204
+ - **Small messages (64-256 B):** The shared ring buffer with `Buffer.copy` delivers
205
+ up to **12x higher message rate** and **9x higher throughput** than `postMessage`.
206
+ Per-message overhead dominates at these sizes, and avoiding structured cloning makes
207
+ the biggest difference.
208
+
209
+ - **Large messages (1-64 KiB):** The shared ring buffer with string encoding
210
+ (`Buffer.write`) reaches up to **22 GiB/s** — roughly **2-4x faster** than
211
+ `postMessage`. V8's ASCII fast path for UTF-8 encoding is heavily vectorized
212
+ (6-7 IPC on Apple M3 Pro), which explains why string writes outperform raw
213
+ `Buffer.copy` at larger sizes.
214
+
215
+ - **CPU efficiency:** The shared ring buffer consistently achieves higher IPC
216
+ (4-7) compared to `postMessage` (2-4), indicating less time spent stalled on
217
+ memory or synchronization.
218
+
219
+ - **Caveat:** The string benchmark uses ASCII-only content. Multi-byte UTF-8
220
+ strings will not hit V8's vectorized fast path and will be significantly slower.
221
+
222
+ ### Running the benchmark
223
+
224
+ ```sh
225
+ # Hardware counters require elevated privileges on macOS
226
+ sudo node --allow-natives-syntax packages/shared/src/bench.mjs
50
227
  ```
228
+
229
+ ## License
230
+
231
+ MIT
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
package/lib/index.d.ts ADDED
@@ -0,0 +1,44 @@
1
+ export interface SharedBuffers {
2
+ sharedState: SharedArrayBuffer;
3
+ sharedBuffer: SharedArrayBuffer;
4
+ }
5
+ export interface BufferRegion {
6
+ buffer: Buffer;
7
+ view: DataView;
8
+ offset: number;
9
+ length: number;
10
+ byteOffset: number;
11
+ byteLength: number;
12
+ }
13
+ /**
14
+ * Allocates the shared memory buffers.
15
+ */
16
+ export declare function alloc(size: number): SharedBuffers;
17
+ export interface Reader {
18
+ readSome<U>(next: (data: BufferRegion, opaque: U) => void | boolean, opaque: U): number;
19
+ readSome(next: (data: BufferRegion) => void | boolean): number;
20
+ }
21
+ /**
22
+ * Creates a reader for the ring buffer.
23
+ */
24
+ export declare function reader({ sharedState, sharedBuffer }: SharedBuffers): Reader;
25
+ export interface WriterOptions {
26
+ yield?: () => void;
27
+ logger?: {
28
+ warn(obj: object, msg: string): void;
29
+ };
30
+ }
31
+ export interface Writer {
32
+ tryWrite(len: number, fn: (data: BufferRegion) => number): boolean;
33
+ tryWrite<U>(len: number, fn: (data: BufferRegion, opaque: U) => number, opaque: U): boolean;
34
+ writeSync(len: number, fn: (data: BufferRegion) => number): void;
35
+ writeSync<U>(len: number, fn: (data: BufferRegion, opaque: U) => number, opaque: U): void;
36
+ cork<T>(callback: () => T): T;
37
+ cork(): void;
38
+ uncork(): void;
39
+ flushSync(): void;
40
+ }
41
+ /**
42
+ * Creates a writer for the ring buffer.
43
+ */
44
+ export declare function writer({ sharedState, sharedBuffer }: SharedBuffers, { yield: onYield, logger }?: WriterOptions): Writer;