npm - @nxtedition/shared - Versions diffs - 3.0.0 → 3.0.2 - Mend

@nxtedition/shared 3.0.0 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/LICENSE CHANGED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2022 nxtedition
+Copyright (c) nxtedition
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

package/README.md CHANGED Viewed

@@ -1,50 +1,231 @@
-# shared
+# @nxtedition/shared
-Ring Buffer for NodeJS cross Worker communication.
+A high-performance, thread-safe ring buffer for inter-thread communication in Node.js using `SharedArrayBuffer`.
+## Why
+Passing data between worker threads in Node.js typically involves structured cloning or transferring `ArrayBuffer` ownership. Structured cloning copies every byte — fine for occasional messages, but a bottleneck when streaming megabytes per second between threads. Transferable objects avoid the copy, but each transfer still requires allocating a new `ArrayBuffer`, serializing the transfer list, and coordinating ownership between threads — overhead that adds up quickly in high-throughput scenarios.
+This ring buffer avoids these problems. A single `SharedArrayBuffer` is mapped into both threads. The writer appends messages by advancing a write pointer; the reader consumes them by advancing a read pointer. No copies, no ownership transfers, no cloning overhead. Because messages are stored inline in a contiguous buffer, data lives right where the protocol is rather than scattered across separately allocated `ArrayBuffer`s — keeping access patterns cache-friendly. The pointers are coordinated with `Atomics` operations, and cache-line-aligned to prevent false sharing between CPU cores.
+Reads are zero-copy: the reader callback receives a `DataView` directly into the shared buffer, so parsing can happen in-place without allocating intermediate buffers. Writes are batched — the write pointer is only published to the reader after a high-water mark is reached or the current event loop tick ends, drastically reducing the frequency of expensive atomic stores.
+## Platform Assumptions
+This library assumes that unaligned 32-bit reads and writes will not tear on the target platform. This holds true on x86/x64 and ARM64, which are the primary targets for Node.js.
 ## Install
-```
-npm i @nxtedition/shared
+```sh
+npm install @nxtedition/shared
 ```
-## Quick Start
+## Usage
 ```js
-// index.js
+import { alloc, reader, writer } from '@nxtedition/shared'
+// Allocate shared memory (pass these buffers to a worker thread)
+const { sharedState, sharedBuffer } = alloc(1024 * 1024) // 1 MB ring buffer
-import * as shared from '@nxtedition/shared'
-import tp from 'timers/promise'
+// --- Writer side (e.g. main thread) ---
+const w = writer({ sharedState, sharedBuffer })
-const writer = shared.alloc(16 * 1024 * 1024)
-const reader = shared.alloc(16 * 1024 * 1024)
+const payload = Buffer.from('hello world')
+w.writeSync(payload.length, (data) => {
+  payload.copy(data.buffer, data.offset)
+  return data.offset + payload.length
+})
+// --- Reader side (e.g. worker thread) ---
+const r = reader({ sharedState, sharedBuffer })
-const worker = new Worker(new URL('worker.js', import.meta.url), {
-  workerData: { reader, writer },
+r.readSome((data) => {
+  const msg = data.buffer.subarray(data.offset, data.offset + data.length).toString()
+  console.log(msg) // 'hello world'
 })
+```
+### Batching writes with cork
-const writeToWorker = shared.writer(reader)
+```js
+w.cork(() => {
+  for (const item of items) {
+    const buf = Buffer.from(JSON.stringify(item))
+    w.writeSync(buf.length, (data) => {
+      buf.copy(data.buffer, data.offset)
+      return data.offset + buf.length
+    })
+  }
+})
+// All writes flushed atomically when cork returns
+```
-writeToWorker(Buffer.from('ping'))
+### Non-blocking writes with tryWrite
-for await (const buffer of shared.reader(writer)) {
-  console.log(`From worker ${buffer}`)
-  await tp.setTimeout(1e3) // Backpressure
-  writeToWorker(Buffer.from('pong'))
+```js
+const buf = Buffer.from('data')
+const ok = w.tryWrite(buf.length, (data) => {
+  buf.copy(data.buffer, data.offset)
+  return data.offset + buf.length
+})
+if (!ok) {
+  // Buffer is full — the reader hasn't caught up yet
 }
 ```
+### Cross-thread usage
 ```js
-// worker.js
+// main.js
+import { alloc, writer } from '@nxtedition/shared'
+import { Worker } from 'node:worker_threads'
+const { sharedState, sharedBuffer } = alloc(1024 * 1024)
+const worker = new Worker('./reader-worker.js', {
+  workerData: { sharedState, sharedBuffer },
+})
-import * as shared from '@nxtedition/shared'
-import tp from 'timers/promise'
+const w = writer({ sharedState, sharedBuffer })
+// ... write messages
+```
+```js
+// reader-worker.js
+import { reader } from '@nxtedition/shared'
+import { workerData } from 'node:worker_threads'
-const writeToParent = shared.writer(workerData.writer)
+const r = reader(workerData)
-for await (const buffer of shared.reader(workerData.reader)) {
-  console.log(`From parent ${buffer}`)
-  await tp.setTimeout(1e3) // Backpressure
-  writeToWorker(Buffer.from('pong'))
+function poll() {
+  const count = r.readSome((data) => {
+    // process data.buffer at data.offset..data.offset+data.length
+  })
+  setImmediate(poll)
 }
+poll()
+```
+## API
+### `alloc(size: number): SharedBuffers`
+Allocates the shared memory buffers for a ring buffer of the given byte size.
+- **size** — Buffer capacity in bytes (must be a positive integer, max ~2 GB)
+- Returns `{ sharedState: SharedArrayBuffer, sharedBuffer: SharedArrayBuffer }`
+### `reader(buffers: SharedBuffers): Reader`
+Creates a reader for the ring buffer.
+#### `reader.readSome(next): number`
+Reads a batch of messages. Calls `next(data)` for each message, where `data` has:
+- `buffer: Buffer` — The underlying shared buffer
+- `view: DataView` — A DataView over the shared buffer
+- `offset: number` — Start offset of the message payload
+- `length: number` — Length of the message payload in bytes
+Return `false` from the callback to stop reading early. Returns the number of messages processed.
+Messages are batched: up to 1024 items or 256 KiB per call.
+### `writer(buffers: SharedBuffers, options?): Writer`
+Creates a writer for the ring buffer.
+**Options:**
+- `yield?: () => void` — Called when the writer must wait for the reader to catch up. Useful to prevent deadlocks when the writer thread also drives the reader.
+- `logger?: { warn(obj, msg): void }` — Logger for yield warnings (pino-compatible).
+#### `writer.writeSync(len, fn, timeout?): void`
+Synchronously writes a message. Blocks (via `Atomics.wait`) until buffer space is available.
+- **len** — Maximum payload size in bytes. Writing beyond `len` bytes in the callback is undefined behavior.
+- **fn(data) → number** — Write callback. Write payload into `data.buffer` starting at `data.offset`. **Must return the end position** (`data.offset + bytesWritten`), not the byte count.
+- **timeout** — Max wait time in ms (default: 60000). Throws on timeout.
+#### `writer.tryWrite(len, fn): boolean`
+Non-blocking write attempt. Returns `false` if the buffer is full. The `fn` callback follows the same contract as `writeSync`.
+#### `writer.cork(callback): T`
+Batches multiple writes within the callback. The write pointer is only published to the reader when `cork` returns, reducing atomic operation overhead.
+## Benchmarks
+Measured on Apple M3 Pro (3.51 GHz), Node.js 25.6.1, 8 MiB ring buffer.
+Each benchmark writes batches of fixed-size messages from the main thread and
+reads them in a worker thread. The shared ring buffer is compared against
+Node.js `postMessage` (structured clone). Hardware performance counters were
+collected with [`@mitata/counters`](https://github.com/evanwashere/mitata).
+### Throughput
+|   Size | shared (buffer) | shared (string) | postMessage (buffer) | postMessage (string) |
+| -----: | --------------: | --------------: | -------------------: | -------------------: |
+|   64 B |  **1.07 GiB/s** |       793 MiB/s |             93 MiB/s |            117 MiB/s |
+|  256 B |  **2.98 GiB/s** |      2.56 GiB/s |            259 MiB/s |            391 MiB/s |
+|  1 KiB |      4.65 GiB/s |  **7.52 GiB/s** |           1.24 GiB/s |           1.68 GiB/s |
+|  4 KiB |      4.94 GiB/s | **16.38 GiB/s** |           3.77 GiB/s |           4.84 GiB/s |
+| 16 KiB |      5.25 GiB/s | **22.33 GiB/s** |           8.54 GiB/s |           9.65 GiB/s |
+| 64 KiB |      5.53 GiB/s | **19.86 GiB/s** |          10.94 GiB/s |          12.25 GiB/s |
+### Message rate
+|   Size | shared (buffer) | shared (string) | postMessage (buffer) | postMessage (string) |
+| -----: | --------------: | --------------: | -------------------: | -------------------: |
+|   64 B |   **17.99 M/s** |       12.99 M/s |             1.53 M/s |             1.92 M/s |
+|  256 B |   **12.50 M/s** |       10.73 M/s |             1.06 M/s |             1.60 M/s |
+|  1 KiB |        4.87 M/s |    **7.88 M/s** |             1.30 M/s |             1.76 M/s |
+|  4 KiB |        1.30 M/s |    **4.29 M/s** |              989 K/s |             1.27 M/s |
+| 16 KiB |         344 K/s |    **1.46 M/s** |              560 K/s |              632 K/s |
+| 64 KiB |          91 K/s |     **325 K/s** |              179 K/s |              201 K/s |
+### CPU efficiency (instructions per cycle)
+|   Size | shared (buffer) | shared (string) | postMessage (buffer) | postMessage (string) |
+| -----: | --------------: | --------------: | -------------------: | -------------------: |
+|   64 B |            4.80 |            5.79 |                 3.91 |                 3.37 |
+|  256 B |            4.46 |            5.98 |                 3.48 |                 3.06 |
+|  1 KiB |            4.17 |        **6.29** |                 3.63 |                 3.15 |
+|  4 KiB |            3.75 |        **6.72** |                 3.38 |                 2.83 |
+| 16 KiB |            3.80 |        **6.03** |                 2.74 |                 2.86 |
+| 64 KiB |            3.96 |        **4.57** |                 2.43 |                 2.93 |
+### Key findings
+- **Small messages (64-256 B):** The shared ring buffer with `Buffer.copy` delivers
+  up to **12x higher message rate** and **9x higher throughput** than `postMessage`.
+  Per-message overhead dominates at these sizes, and avoiding structured cloning makes
+  the biggest difference.
+- **Large messages (1-64 KiB):** The shared ring buffer with string encoding
+  (`Buffer.write`) reaches up to **22 GiB/s** — roughly **2-4x faster** than
+  `postMessage`. V8's ASCII fast path for UTF-8 encoding is heavily vectorized
+  (6-7 IPC on Apple M3 Pro), which explains why string writes outperform raw
+  `Buffer.copy` at larger sizes.
+- **CPU efficiency:** The shared ring buffer consistently achieves higher IPC
+  (4-7) compared to `postMessage` (2-4), indicating less time spent stalled on
+  memory or synchronization.
+- **Caveat:** The string benchmark uses ASCII-only content. Multi-byte UTF-8
+  strings will not hit V8's vectorized fast path and will be significantly slower.
+### Running the benchmark
+```sh
+# Hardware counters require elevated privileges on macOS
+sudo node --allow-natives-syntax packages/shared/src/bench.mjs
 ```
+## License
+MIT

package/lib/bench-worker.d.mts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/lib/bench.d.mts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/lib/index.d.ts ADDED Viewed

@@ -0,0 +1,44 @@
+export interface SharedBuffers {
+    sharedState: SharedArrayBuffer;
+    sharedBuffer: SharedArrayBuffer;
+}
+export interface BufferRegion {
+    buffer: Buffer;
+    view: DataView;
+    offset: number;
+    length: number;
+    byteOffset: number;
+    byteLength: number;
+}
+/**
+ * Allocates the shared memory buffers.
+ */
+export declare function alloc(size: number): SharedBuffers;
+export interface Reader {
+    readSome<U>(next: (data: BufferRegion, opaque: U) => void | boolean, opaque: U): number;
+    readSome(next: (data: BufferRegion) => void | boolean): number;
+}
+/**
+ * Creates a reader for the ring buffer.
+ */
+export declare function reader({ sharedState, sharedBuffer }: SharedBuffers): Reader;
+export interface WriterOptions {
+    yield?: () => void;
+    logger?: {
+        warn(obj: object, msg: string): void;
+    };
+}
+export interface Writer {
+    tryWrite(len: number, fn: (data: BufferRegion) => number): boolean;
+    tryWrite<U>(len: number, fn: (data: BufferRegion, opaque: U) => number, opaque: U): boolean;
+    writeSync(len: number, fn: (data: BufferRegion) => number): void;
+    writeSync<U>(len: number, fn: (data: BufferRegion, opaque: U) => number, opaque: U): void;
+    cork<T>(callback: () => T): T;
+    cork(): void;
+    uncork(): void;
+    flushSync(): void;
+}
+/**
+ * Creates a writer for the ring buffer.
+ */
+export declare function writer({ sharedState, sharedBuffer }: SharedBuffers, { yield: onYield, logger }?: WriterOptions): Writer;