npm - @nxtedition/shared - Versions diffs - 3.0.2 → 4.0.1 - Mend

@nxtedition/shared 3.0.2 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -12,7 +12,7 @@ Reads are zero-copy: the reader callback receives a `DataView` directly into the
 ## Platform Assumptions
-This library assumes that unaligned 32-bit reads and writes will not tear on the target platform. This holds true on x86/x64 and ARM64, which are the primary targets for Node.js.
+All messages are aligned on 4-byte boundaries. Message length headers are read and written via `Int32Array` indexing rather than `DataView`, avoiding per-access endianness checks on the hot path.
 ## Install
@@ -23,25 +23,25 @@ npm install @nxtedition/shared
 ## Usage
 ```js
-import { alloc, reader, writer } from '@nxtedition/shared'
+import { State, Reader, Writer } from '@nxtedition/shared'
-// Allocate shared memory (pass these buffers to a worker thread)
-const { sharedState, sharedBuffer } = alloc(1024 * 1024) // 1 MB ring buffer
+// Allocate shared memory (pass state.buffer to a worker thread)
+const state = new State(1024 * 1024) // 1 MB ring buffer
 // --- Writer side (e.g. main thread) ---
-const w = writer({ sharedState, sharedBuffer })
+const w = new Writer(state)
 const payload = Buffer.from('hello world')
 w.writeSync(payload.length, (data) => {
-  payload.copy(data.buffer, data.offset)
-  return data.offset + payload.length
+  payload.copy(data.buffer, data.byteOffset)
+  return data.byteOffset + payload.length
 })
 // --- Reader side (e.g. worker thread) ---
-const r = reader({ sharedState, sharedBuffer })
+const r = new Reader(state)
 r.readSome((data) => {
-  const msg = data.buffer.subarray(data.offset, data.offset + data.length).toString()
+  const msg = data.buffer.subarray(data.byteOffset, data.byteOffset + data.byteLength).toString()
   console.log(msg) // 'hello world'
 })
 ```
@@ -53,21 +53,30 @@ w.cork(() => {
   for (const item of items) {
     const buf = Buffer.from(JSON.stringify(item))
     w.writeSync(buf.length, (data) => {
-      buf.copy(data.buffer, data.offset)
-      return data.offset + buf.length
+      buf.copy(data.buffer, data.byteOffset)
+      return data.byteOffset + buf.length
     })
   }
 })
 // All writes flushed atomically when cork returns
 ```
+Or manually:
+```js
+w.cork()
+w.writeSync(buf1.length, writeFn, buf1)
+w.writeSync(buf2.length, writeFn, buf2)
+w.uncork() // publishes all writes to the reader
+```
 ### Non-blocking writes with tryWrite
 ```js
 const buf = Buffer.from('data')
 const ok = w.tryWrite(buf.length, (data) => {
-  buf.copy(data.buffer, data.offset)
-  return data.offset + buf.length
+  buf.copy(data.buffer, data.byteOffset)
+  return data.byteOffset + buf.length
 })
 if (!ok) {
   // Buffer is full — the reader hasn't caught up yet
@@ -78,28 +87,28 @@ if (!ok) {
 ```js
 // main.js
-import { alloc, writer } from '@nxtedition/shared'
+import { State, Writer } from '@nxtedition/shared'
 import { Worker } from 'node:worker_threads'
-const { sharedState, sharedBuffer } = alloc(1024 * 1024)
+const state = new State(1024 * 1024)
 const worker = new Worker('./reader-worker.js', {
-  workerData: { sharedState, sharedBuffer },
+  workerData: state.buffer,
 })
-const w = writer({ sharedState, sharedBuffer })
+const w = new Writer(state)
 // ... write messages
 ```
 ```js
 // reader-worker.js
-import { reader } from '@nxtedition/shared'
+import { Reader } from '@nxtedition/shared'
 import { workerData } from 'node:worker_threads'
-const r = reader(workerData)
+const r = new Reader(workerData)
 function poll() {
   const count = r.readSome((data) => {
-    // process data.buffer at data.offset..data.offset+data.length
+    // process data.buffer at data.byteOffset..data.byteOffset+data.byteLength
   })
   setImmediate(poll)
 }
@@ -108,113 +117,117 @@ poll()
 ## API
-### `alloc(size: number): SharedBuffers`
+### `new State(size: number)` / `new State(buffer: SharedArrayBuffer)`
+Allocates or wraps a shared memory buffer for the ring buffer. The first 128 bytes are reserved for state (read/write pointers); the rest is the data region.
-Allocates the shared memory buffers for a ring buffer of the given byte size.
+- **size** — Data capacity in bytes (must be a positive integer, max ~2 GB)
+- **buffer** — An existing `SharedArrayBuffer` to wrap
-- **size** — Buffer capacity in bytes (must be a positive integer, max ~2 GB)
-- Returns `{ sharedState: SharedArrayBuffer, sharedBuffer: SharedArrayBuffer }`
+#### `state.buffer`
-### `reader(buffers: SharedBuffers): Reader`
+The underlying `SharedArrayBuffer`. Pass this to a worker thread to share the ring buffer.
-Creates a reader for the ring buffer.
+### `new Reader(state: State | SharedArrayBuffer)`
-#### `reader.readSome(next): number`
+Creates a reader for the ring buffer. Accepts a `State` instance or a `SharedArrayBuffer` directly (shorthand for `new Reader(new State(buf))`).
-Reads a batch of messages. Calls `next(data)` for each message, where `data` has:
+#### `reader.readSome(next, opaque?)`
+Reads a batch of messages. Calls `next(data, opaque)` for each message, where `data` has:
 - `buffer: Buffer` — The underlying shared buffer
 - `view: DataView` — A DataView over the shared buffer
-- `offset: number` — Start offset of the message payload
-- `length: number` — Length of the message payload in bytes
+- `byteOffset: number` — Start offset of the message payload
+- `byteLength: number` — Length of the message payload in bytes
+- **opaque** — Optional user-provided context object passed through to the callback. Useful for avoiding closures on hot paths.
 Return `false` from the callback to stop reading early. Returns the number of messages processed.
 Messages are batched: up to 1024 items or 256 KiB per call.
-### `writer(buffers: SharedBuffers, options?): Writer`
+### `new Writer(state: State | SharedArrayBuffer, options?)`
-Creates a writer for the ring buffer.
+Creates a writer for the ring buffer. Accepts a `State` instance or a `SharedArrayBuffer` directly (shorthand for `new Writer(new State(buf))`).
 **Options:**
 - `yield?: () => void` — Called when the writer must wait for the reader to catch up. Useful to prevent deadlocks when the writer thread also drives the reader.
 - `logger?: { warn(obj, msg): void }` — Logger for yield warnings (pino-compatible).
-#### `writer.writeSync(len, fn, timeout?): void`
+#### `writer.writeSync(len, fn, opaque?)`
 Synchronously writes a message. Blocks (via `Atomics.wait`) until buffer space is available.
 - **len** — Maximum payload size in bytes. Writing beyond `len` bytes in the callback is undefined behavior.
-- **fn(data) → number** — Write callback. Write payload into `data.buffer` starting at `data.offset`. **Must return the end position** (`data.offset + bytesWritten`), not the byte count.
-- **timeout** — Max wait time in ms (default: 60000). Throws on timeout.
+- **fn(data, opaque) → number** — Write callback. Write payload into `data.buffer` starting at `data.byteOffset`. **Must return the end position** (`data.byteOffset + bytesWritten`), not the byte count.
+- **opaque** — Optional user-provided context object passed through to the callback. Useful for avoiding closures on hot paths.
+Throws on timeout (default: 60000 ms).
+#### `writer.tryWrite(len, fn, opaque?)`
-#### `writer.tryWrite(len, fn): boolean`
+Non-blocking write attempt. Returns `false` if the buffer is full. The `fn` and `opaque` parameters follow the same contract as `writeSync`.
-Non-blocking write attempt. Returns `false` if the buffer is full. The `fn` callback follows the same contract as `writeSync`.
+#### `writer.cork(callback?)`
-#### `writer.cork(callback): T`
+Batches multiple writes. The write pointer is only published to the reader when the cork is released, reducing atomic operation overhead.
-Batches multiple writes within the callback. The write pointer is only published to the reader when `cork` returns, reducing atomic operation overhead.
+When called with a callback, uncork is called automatically when the callback returns. When called without a callback, you must call `uncork()` manually.
+#### `writer.uncork()`
+Decrements the cork counter. When it reaches zero, publishes the pending write position to the reader. Safe to call when not corked (no-op).
+#### `writer.flushSync()`
+Immediately publishes the pending write position to the reader. Unlike `uncork`, this does not interact with the cork counter — it forces a flush regardless.
 ## Benchmarks
-Measured on Apple M3 Pro (3.51 GHz), Node.js 25.6.1, 8 MiB ring buffer.
+Measured on AMD EPYC 9355P (4.28 GHz), Node.js 25.6.0, 8 MiB ring buffer, Docker (x64-linux).
 Each benchmark writes batches of fixed-size messages from the main thread and
 reads them in a worker thread. The shared ring buffer is compared against
-Node.js `postMessage` (structured clone). Hardware performance counters were
-collected with [`@mitata/counters`](https://github.com/evanwashere/mitata).
+Node.js `postMessage` (structured clone).
 ### Throughput
 |   Size | shared (buffer) | shared (string) | postMessage (buffer) | postMessage (string) |
 | -----: | --------------: | --------------: | -------------------: | -------------------: |
-|   64 B |  **1.07 GiB/s** |       793 MiB/s |             93 MiB/s |            117 MiB/s |
-|  256 B |  **2.98 GiB/s** |      2.56 GiB/s |            259 MiB/s |            391 MiB/s |
-|  1 KiB |      4.65 GiB/s |  **7.52 GiB/s** |           1.24 GiB/s |           1.68 GiB/s |
-|  4 KiB |      4.94 GiB/s | **16.38 GiB/s** |           3.77 GiB/s |           4.84 GiB/s |
-| 16 KiB |      5.25 GiB/s | **22.33 GiB/s** |           8.54 GiB/s |           9.65 GiB/s |
-| 64 KiB |      5.53 GiB/s | **19.86 GiB/s** |          10.94 GiB/s |          12.25 GiB/s |
+|   64 B |   **901 MiB/s** |       410 MiB/s |             25 MiB/s |             42 MiB/s |
+|  256 B |  **2.67 GiB/s** |       896 MiB/s |             88 MiB/s |            158 MiB/s |
+|  1 KiB |  **4.88 GiB/s** |      1.26 GiB/s |            328 MiB/s |            498 MiB/s |
+|  4 KiB |  **9.22 GiB/s** |      1.50 GiB/s |           1.14 GiB/s |           1.70 GiB/s |
+| 16 KiB | **10.90 GiB/s** |      1.56 GiB/s |           4.29 GiB/s |           6.27 GiB/s |
+| 64 KiB |     13.03 GiB/s |      1.55 GiB/s |          10.10 GiB/s |      **15.18 GiB/s** |
 ### Message rate
 |   Size | shared (buffer) | shared (string) | postMessage (buffer) | postMessage (string) |
 | -----: | --------------: | --------------: | -------------------: | -------------------: |
-|   64 B |   **17.99 M/s** |       12.99 M/s |             1.53 M/s |             1.92 M/s |
-|  256 B |   **12.50 M/s** |       10.73 M/s |             1.06 M/s |             1.60 M/s |
-|  1 KiB |        4.87 M/s |    **7.88 M/s** |             1.30 M/s |             1.76 M/s |
-|  4 KiB |        1.30 M/s |    **4.29 M/s** |              989 K/s |             1.27 M/s |
-| 16 KiB |         344 K/s |    **1.46 M/s** |              560 K/s |              632 K/s |
-| 64 KiB |          91 K/s |     **325 K/s** |              179 K/s |              201 K/s |
-### CPU efficiency (instructions per cycle)
-|   Size | shared (buffer) | shared (string) | postMessage (buffer) | postMessage (string) |
-| -----: | --------------: | --------------: | -------------------: | -------------------: |
-|   64 B |            4.80 |            5.79 |                 3.91 |                 3.37 |
-|  256 B |            4.46 |            5.98 |                 3.48 |                 3.06 |
-|  1 KiB |            4.17 |        **6.29** |                 3.63 |                 3.15 |
-|  4 KiB |            3.75 |        **6.72** |                 3.38 |                 2.83 |
-| 16 KiB |            3.80 |        **6.03** |                 2.74 |                 2.86 |
-| 64 KiB |            3.96 |        **4.57** |                 2.43 |                 2.93 |
+|   64 B |   **14.76 M/s** |        6.72 M/s |              405 K/s |              688 K/s |
+|  256 B |   **11.20 M/s** |        3.67 M/s |              360 K/s |              648 K/s |
+|  1 KiB |    **5.12 M/s** |        1.32 M/s |              336 K/s |              510 K/s |
+|  4 KiB |    **2.42 M/s** |         394 K/s |              298 K/s |              445 K/s |
+| 16 KiB |     **714 K/s** |         102 K/s |              281 K/s |              411 K/s |
+| 64 KiB |         213 K/s |          25 K/s |              165 K/s |          **249 K/s** |
 ### Key findings
-- **Small messages (64-256 B):** The shared ring buffer with `Buffer.copy` delivers
-  up to **12x higher message rate** and **9x higher throughput** than `postMessage`.
-  Per-message overhead dominates at these sizes, and avoiding structured cloning makes
-  the biggest difference.
+- **Small messages (64–256 B):** The shared ring buffer with `Buffer.set` delivers
+  **14.8–11.2 M msg/s** — up to **36x faster** than `postMessage` (buffer) and
+  **21x faster** than `postMessage` (string). Per-message overhead dominates at
+  these sizes, and avoiding structured cloning makes the biggest difference.
-- **Large messages (1-64 KiB):** The shared ring buffer with string encoding
-  (`Buffer.write`) reaches up to **22 GiB/s** — roughly **2-4x faster** than
-  `postMessage`. V8's ASCII fast path for UTF-8 encoding is heavily vectorized
-  (6-7 IPC on Apple M3 Pro), which explains why string writes outperform raw
-  `Buffer.copy` at larger sizes.
+- **Medium to large messages (1–16 KiB):** `Buffer.set` via the ring buffer
+  maintains its lead, reaching **10.9 GiB/s** at 16 KiB — **1.7–5.4x faster**
+  than the best `postMessage` variant.
-- **CPU efficiency:** The shared ring buffer consistently achieves higher IPC
-  (4-7) compared to `postMessage` (2-4), indicating less time spent stalled on
-  memory or synchronization.
+- **Very large messages (64 KiB):** `postMessage` (string) overtakes the shared
+  buffer at **15.2 GiB/s** vs **13.0 GiB/s**. At this size, structured cloning
+  overhead is amortized and the kernel's optimized `memcpy` dominates.
 - **Caveat:** The string benchmark uses ASCII-only content. Multi-byte UTF-8
   strings will not hit V8's vectorized fast path and will be significantly slower.
@@ -222,8 +235,7 @@ collected with [`@mitata/counters`](https://github.com/evanwashere/mitata).
 ### Running the benchmark
 ```sh
-# Hardware counters require elevated privileges on macOS
-sudo node --allow-natives-syntax packages/shared/src/bench.mjs
+node --allow-natives-syntax packages/shared/src/bench.mjs
 ```
 ## License

package/lib/index.d.ts CHANGED Viewed

@@ -1,44 +1,57 @@
-export interface SharedBuffers {
-    sharedState: SharedArrayBuffer;
-    sharedBuffer: SharedArrayBuffer;
-}
 export interface BufferRegion {
     buffer: Buffer;
     view: DataView;
-    offset: number;
-    length: number;
     byteOffset: number;
     byteLength: number;
 }
-/**
- * Allocates the shared memory buffers.
- */
-export declare function alloc(size: number): SharedBuffers;
-export interface Reader {
-    readSome<U>(next: (data: BufferRegion, opaque: U) => void | boolean, opaque: U): number;
-    readSome(next: (data: BufferRegion) => void | boolean): number;
-}
-/**
- * Creates a reader for the ring buffer.
- */
-export declare function reader({ sharedState, sharedBuffer }: SharedBuffers): Reader;
 export interface WriterOptions {
     yield?: () => void;
     logger?: {
         warn(obj: object, msg: string): void;
     };
 }
-export interface Writer {
-    tryWrite(len: number, fn: (data: BufferRegion) => number): boolean;
-    tryWrite<U>(len: number, fn: (data: BufferRegion, opaque: U) => number, opaque: U): boolean;
-    writeSync(len: number, fn: (data: BufferRegion) => number): void;
-    writeSync<U>(len: number, fn: (data: BufferRegion, opaque: U) => number, opaque: U): void;
-    cork<T>(callback: () => T): T;
-    cork(): void;
-    uncork(): void;
-    flushSync(): void;
+/**
+ * Shared ring buffer state. Allocates or wraps a SharedArrayBuffer where the
+ * first 128 bytes are reserved for read/write pointers and the rest is the
+ * data region.
+ */
+export declare class State {
+    readonly buffer: SharedArrayBuffer;
+    constructor(size: number);
+    constructor(buffer: SharedArrayBuffer);
+}
+/**
+ * Reader for the ring buffer.
+ */
+export declare class Reader {
+    #private;
+    constructor(state: State | SharedArrayBuffer);
+    readSome<U>(next: (data: BufferRegion, opaque?: U) => void | boolean, opaque?: U): number;
 }
 /**
- * Creates a writer for the ring buffer.
+ * Writer for the ring buffer.
  */
-export declare function writer({ sharedState, sharedBuffer }: SharedBuffers, { yield: onYield, logger }?: WriterOptions): Writer;
+export declare class Writer {
+    #private;
+    constructor(state: State | SharedArrayBuffer, { yield: onYield, logger }?: WriterOptions);
+    /**
+     * Synchronously writes a message. Blocks (via `Atomics.wait`) until buffer space is available.
+     * Writing more than "len" bytes in the callback will cause undefined behavior.
+     */
+    writeSync<U>(len: number, fn: (data: BufferRegion, opaque?: U) => number, opaque?: U): void;
+    /**
+     * Non-blocking write attempt. Returns `false` if the buffer is full.
+     * Writing more than "len" bytes in the callback will cause undefined behavior.
+     */
+    tryWrite<U>(len: number, fn: (data: BufferRegion, opaque?: U) => number, opaque?: U): boolean;
+    /**
+     * Batches multiple writes within the callback. The write pointer is only
+     * published to the reader when cork returns, reducing atomic operation overhead.
+     */
+    cork<T>(callback?: () => T): T | undefined;
+    /**
+     * Publishes the pending write position to the reader.
+     */
+    uncork(): void;
+    flushSync(): void;
+}

package/lib/index.js CHANGED Viewed

@@ -7,90 +7,106 @@
 const WRITE_INDEX = 0
 const READ_INDEX = 16
+// The first 128 bytes of the buffer are reserved for state (read/write pointers).
+// Data starts at byte offset 128.
+const STATE_BYTES = 128
 // High-Water Mark for batching operations to reduce the frequency
 // of expensive atomic writes.
 const HWM_BYTES = 256 * 1024 // 256 KiB
 const HWM_COUNT = 1024 // 1024 items
 /**
- * Allocates the shared memory buffers.
+ * Shared ring buffer state. Allocates or wraps a SharedArrayBuffer where the
+ * first 128 bytes are reserved for read/write pointers and the rest is the
+ * data region.
  */
-export function alloc(size        )                {
-  if (!Number.isInteger(size)) {
-    throw new TypeError('size must be a positive integer')
-  }
-  if (size <= 0) {
-    throw new RangeError('size must be a positive integer')
-  }
-  if (size >= 2 ** 31 - 8) {
-    throw new RangeError('size exceeds maximum of 2GB minus header size')
-  }
-  return {
-    // A small buffer for sharing state (read/write pointers).
-    sharedState: new SharedArrayBuffer(128),
-    // The main buffer for transferring data.
-    // We need another 8 bytes for entry headers.
-    sharedBuffer: new SharedArrayBuffer(size + 8),
+export class State {
+           buffer
+  constructor(sizeOrBuffer                            ) {
+    if (sizeOrBuffer instanceof SharedArrayBuffer) {
+      if (sizeOrBuffer.byteLength < STATE_BYTES + 8) {
+        throw new RangeError('SharedArrayBuffer too small for ring buffer state')
+      }
+      if (sizeOrBuffer.byteLength >= 2 ** 31) {
+        throw new RangeError('Shared buffer size exceeds maximum of 2GB')
+      }
+      this.buffer = sizeOrBuffer
+    } else {
+      const size = sizeOrBuffer
+      if (!Number.isInteger(size)) {
+        throw new TypeError('size must be a positive integer')
+      }
+      if (size <= 0) {
+        throw new RangeError('size must be a positive integer')
+      }
+      if (size >= 2 ** 31 - 11) {
+        throw new RangeError('size exceeds maximum of 2GB minus header size')
+      }
+      // 128 bytes for state + data region (rounded up to 4-byte boundary for Int32Array).
+      this.buffer = new SharedArrayBuffer(STATE_BYTES + ((size + 8 + 3) & ~3))
+    }
   }
 }
 /**
- * Creates a reader for the ring buffer.
+ * Reader for the ring buffer.
  */
-export function reader({ sharedState, sharedBuffer }               )         {
-  if (!(sharedState instanceof SharedArrayBuffer)) {
-    throw new TypeError('sharedState must be a SharedArrayBuffer')
-  }
-  if (!(sharedBuffer instanceof SharedArrayBuffer)) {
-    throw new TypeError('sharedBuffer must be a SharedArrayBuffer')
-  }
-  if (sharedBuffer.byteLength >= 2 ** 31) {
-    throw new RangeError('Shared buffer size exceeds maximum of 2GB')
+export class Reader {
+  #state
+  #size
+  #int32
+  #data
+  #readPos
+  constructor(state                           ) {
+    const sharedBuffer = state instanceof SharedArrayBuffer ? state : state.buffer
+    const size = sharedBuffer.byteLength - STATE_BYTES
+    this.#state = new Int32Array(sharedBuffer, 0, STATE_BYTES >> 2)
+    this.#size = size
+    this.#int32 = new Int32Array(sharedBuffer, STATE_BYTES)
+    // This object is reused to avoid creating new objects in a hot path.
+    // This helps V8 maintain a stable hidden class for the object,
+    // which is a key optimization (zero-copy read).
+    this.#data = {
+      buffer: Buffer.from(sharedBuffer, STATE_BYTES, size),
+      view: new DataView(sharedBuffer, STATE_BYTES, size),
+      byteOffset: 0,
+      byteLength: 0,
+    }
+    // Local copy of the pointer. The `| 0` is a hint to the V8 JIT
+    // compiler that this is a 32-bit integer, enabling optimizations.
+    this.#readPos = Atomics.load(this.#state, READ_INDEX) | 0
   }
-  const state = new Int32Array(sharedState)
-  const size = sharedBuffer.byteLength
-  const buffer = Buffer.from(sharedBuffer)
-  const view = new DataView(sharedBuffer)
-  // This object is reused to avoid creating new objects in a hot path.
-  // This helps V8 maintain a stable hidden class for the object,
-  // which is a key optimization (zero-copy read).
-  const data               = { buffer, view, offset: 0, length: 0, byteOffset: 0, byteLength: 0 }
-  // Local copies of the pointers. The `| 0` is a hint to the V8 JIT
-  // compiler that these are 32-bit integers, enabling optimizations.
-  let readPos = Atomics.load(state, READ_INDEX) | 0
-  let writePos = Atomics.load(state, WRITE_INDEX) | 0
-  function readSome   (
-    next                                                    ,
-    opaque    ,
-  )         {
+  readSome   (next                                                    , opaque    ) {
     let count = 0
     let bytes = 0
-    writePos = state[WRITE_INDEX] | 0
+    const state = this.#state
+    const int32 = this.#int32
+    const size = this.#size
+    const data = this.#data
+    let readPos = this.#readPos
+    let writePos = state[WRITE_INDEX] | 0
     // First, check if the local writePos matches the readPos.
     // If so, refresh it from shared memory in case the writer has added data.
@@ -100,8 +116,8 @@ export function reader({ sharedState, sharedBuffer }               )         {
     // Process messages in a batch to minimize loop and atomic operation overhead.
     while (count < HWM_COUNT && bytes < HWM_BYTES && readPos !== writePos) {
+      const dataLen = int32[readPos >> 2] | 0
       const dataPos = readPos + 4
-      const dataLen = view.getInt32(dataPos - 4, true) | 0
       bytes += 4
@@ -120,16 +136,16 @@ export function reader({ sharedState, sharedBuffer }               )         {
           throw new Error('Data exceeds buffer size')
         }
-        readPos += 4 + dataLen
+        // Advance by aligned length so next header is on a 4-byte boundary.
+        const alignedLen = (dataLen + 3) & ~3
+        readPos += 4 + alignedLen
-        bytes += dataLen
+        bytes += alignedLen
         count += 1
         // This is a "zero-copy" operation. We don't copy the data out.
         // Instead, we pass a "view" into the shared buffer.
-        data.offset = dataPos
         data.byteOffset = dataPos
-        data.length = dataLen
         data.byteLength = dataLen
         if (next(data, opaque) === false) {
@@ -138,6 +154,8 @@ export function reader({ sharedState, sharedBuffer }               )         {
       }
     }
+    this.#readPos = readPos
     // IMPORTANT: The reader only updates its shared `readPos` after a batch
     // is processed. This significantly reduces atomic operation overhead.
     if (bytes > 0) {
@@ -146,190 +164,169 @@ export function reader({ sharedState, sharedBuffer }               )         {
     return count
   }
-  return { readSome }
 }
 /**
- * Creates a writer for the ring buffer.
+ * Writer for the ring buffer.
  */
-export function writer(
-  { sharedState, sharedBuffer }               ,
-  { yield: onYield, logger }                = {},
-)         {
-  if (!(sharedState instanceof SharedArrayBuffer)) {
-    throw new TypeError('sharedState must be a SharedArrayBuffer')
-  }
-  if (!(sharedBuffer instanceof SharedArrayBuffer)) {
-    throw new TypeError('sharedBuffer must be a SharedArrayBuffer')
-  }
-  if (sharedBuffer.byteLength >= 2 ** 31) {
-    throw new RangeError('Shared buffer size exceeds maximum of 2GB')
-  }
-  const state = new Int32Array(sharedState)
-  const size = sharedBuffer.byteLength
-  const buffer = Buffer.from(sharedBuffer)
-  const view = new DataView(sharedBuffer)
+export class Writer {
+  #state
+  #size
+  #int32
+  #data
+  #readPos
+  #writePos
+  #yielding
+  #corked
+  #pending
+  #onYield
+  #logger
+  #uncorkBound
+  constructor(state                           , { yield: onYield, logger }                = {}) {
+    const sharedBuffer = state instanceof SharedArrayBuffer ? state : state.buffer
+    if (onYield != null && typeof onYield !== 'function') {
+      throw new TypeError('onYield must be a function')
+    }
-  // This object is reused to avoid creating new objects in a hot path.
-  // This helps V8 maintain a stable hidden class for the object,
-  // which is a key optimization (zero-copy read).
-  const data               = { buffer, view, offset: 0, length: 0, byteOffset: 0, byteLength: 0 }
+    const size = sharedBuffer.byteLength - STATE_BYTES
-  // Local copies of the pointers. The `| 0` is a hint to the V8 JIT
-  // compiler that these are 32-bit integers, enabling optimizations.
-  let readPos = Atomics.load(state, READ_INDEX) | 0
-  let writePos = Atomics.load(state, WRITE_INDEX) | 0
+    this.#state = new Int32Array(sharedBuffer, 0, STATE_BYTES >> 2)
+    this.#size = size
+    this.#int32 = new Int32Array(sharedBuffer, STATE_BYTES)
-  let yielding = 0
-  let corked = 0
-  let pending = 0
+    // This object is reused to avoid creating new objects in a hot path.
+    // This helps V8 maintain a stable hidden class for the object,
+    // which is a key optimization (zero-copy read).
+    this.#data = {
+      buffer: Buffer.from(sharedBuffer, STATE_BYTES, size),
+      view: new DataView(sharedBuffer, STATE_BYTES, size),
+      byteOffset: 0,
+      byteLength: 0,
+    }
-  if (onYield != null && typeof onYield !== 'function') {
-    throw new TypeError('onYield must be a function')
+    // Local copies of the pointers. The `| 0` is a hint to the V8 JIT
+    // compiler that these are 32-bit integers, enabling optimizations.
+    this.#readPos = Atomics.load(this.#state, READ_INDEX) | 0
+    this.#writePos = Atomics.load(this.#state, WRITE_INDEX) | 0
+    this.#yielding = 0
+    this.#corked = 0
+    this.#pending = 0
+    this.#onYield = onYield
+    this.#logger = logger
+    this.#uncorkBound = this.uncork.bind(this)
   }
   /**
    * Pauses the writer thread to wait for the reader to catch up.
    */
-  function _yield(delay        )       {
-    if (yielding > 128) {
+  #yield(delay        ) {
+    if (this.#yielding > 128) {
       throw new Error('Detected possible deadlock: writer yielding too many times')
     }
     // First, ensure the very latest write position is visible to the reader.
-    _flush()
+    this.flushSync()
-    if (onYield) {
-      yielding += 1
+    if (this.#onYield) {
+      this.#yielding += 1
       try {
         // Call the user-provided yield function, if any. This can be important
         // if the writer is waiting for the reader to process data which would
         // otherwise deadlock.
-        onYield()
+        this.#onYield()
       } finally {
-        yielding -= 1
+        this.#yielding -= 1
       }
     }
     // Atomics.wait is the most efficient way to pause. It puts the thread
     // to sleep, consuming no CPU, until the reader changes the READ_INDEX.
     if (delay > 0) {
-      Atomics.wait(state, READ_INDEX, readPos, delay)
+      Atomics.wait(this.#state, READ_INDEX, this.#readPos, delay)
     } else {
       // @ts-expect-error Atomics.pause is Stage 3, available in Node.js 25+
       Atomics.pause()
     }
     // After waking up, refresh the local view of the reader's position.
-    readPos = Atomics.load(state, READ_INDEX) | 0
+    this.#readPos = Atomics.load(this.#state, READ_INDEX) | 0
   }
   /**
    * Tries to acquire enough space in the buffer for a new message.
    */
-  function _acquire(len        )          {
-    // Total space required: payload + its 4-byte length header + a potential
+  #acquire(len        ) {
+    // Total space required: aligned payload + its 4-byte length header + a potential
     // 4-byte header for the *next* message (for wrap-around check).
-    const required = len + 4 + 4
+    const required = ((len + 3) & ~3) + 4 + 4
+    const size = this.#size
+    const state = this.#state
+    const int32 = this.#int32
-    if (writePos >= readPos) {
+    if (this.#writePos >= this.#readPos) {
       // Case 1: The writer is ahead of the reader. [ 0 - R ... W - size ]
       // There is free space from W to the end (s) and from 0 to R.
-      if (size - writePos >= required) {
+      if (size - this.#writePos >= required) {
         // Enough space at the end of the buffer.
         return true
       }
-      readPos = state[READ_INDEX] | 0
-      if (readPos === 0) {
-        _yield(0)
+      this.#readPos = state[READ_INDEX] | 0
+      if (this.#readPos === 0) {
+        this.#yield(0)
       }
       // Not enough space at the end. Check if there's space at the beginning.
-      if (readPos === 0) {
+      if (this.#readPos === 0) {
         // Reader is at the beginning, so no space to wrap around into.
         return false
       }
       // Mark the current position with a wrap-around signal (-1).
-      view.setInt32(writePos, -1, true)
+      int32[this.#writePos >> 2] = -1
       // Reset writer position to the beginning.
-      writePos = 0
+      this.#writePos = 0
-      if (writePos + 4 > size) {
+      if (this.#writePos + 4 > size) {
         // assertion
-        throw new Error(`Write position ${writePos} with next header exceeds buffer size ${size}`)
+        throw new Error(
+          `Write position ${this.#writePos} with next header exceeds buffer size ${size}`,
+        )
       }
-      if (writePos === readPos) {
+      if (this.#writePos === this.#readPos) {
         // assertion
-        throw new Error(`Write position ${writePos} cannot equal read position ${readPos}`)
+        throw new Error(
+          `Write position ${this.#writePos} cannot equal read position ${this.#readPos}`,
+        )
       }
-      Atomics.store(state, WRITE_INDEX, writePos)
+      Atomics.store(state, WRITE_INDEX, this.#writePos)
     }
     // Case 2: The writer has wrapped around. [ 0 ... W - R ... s ]
     // The only free space is between W and R.
-    readPos = state[READ_INDEX] | 0
-    if (readPos - writePos < required) {
-      _yield(0)
-    }
-    return readPos - writePos >= required
-  }
-  /**
-   * "Uncorks" the stream by publishing the pending write position.
-   * This is called from a microtask to batch atomic stores.
-   */
-  function _uncork()       {
-    corked -= 1
-    if (corked === 0) {
-      _flush()
+    this.#readPos = state[READ_INDEX] | 0
+    if (this.#readPos - this.#writePos < required) {
+      this.#yield(0)
     }
-  }
-  function _flush()       {
-    if (pending > 0) {
-      Atomics.store(state, WRITE_INDEX, writePos)
-      pending = 0
-    }
+    return this.#readPos - this.#writePos >= required
   }
   /**
    * Performs the actual write into the buffer after space has been acquired.
    */
-  function _write   (
-    dataCap        ,
-    fn                                            ,
-    opaque    ,
-  )       {
-    const dataPos = writePos + 4
-    data.offset = dataPos
+  #write   (dataCap        , fn                                            , opaque    ) {
+    const dataPos = this.#writePos + 4
+    const data = this.#data
     data.byteOffset = dataPos
-    data.length = dataCap
     data.byteLength = dataCap
     // The user-provided function writes the data and returns the final position.
@@ -347,56 +344,54 @@ export function writer(
       throw new RangeError(`"fn" returned a number ${dataLen} that exceeds capacity ${dataCap}`)
     }
+    const size = this.#size
     if (dataPos + dataLen > size) {
       // assertion
       throw new Error(`Data position ${dataPos} with length ${dataLen} exceeds buffer size ${size}`)
     }
-    const nextPos = writePos + 4 + dataLen
+    const alignedLen = (dataLen + 3) & ~3
+    const nextPos = this.#writePos + 4 + alignedLen
     if (nextPos + 4 > size) {
       // assertion
       throw new Error(`Write position ${nextPos} with next header exceeds buffer size ${size}`)
     }
-    if (nextPos === readPos) {
+    if (nextPos === this.#readPos) {
       // assertion
-      throw new Error(`Write position ${nextPos} cannot equal read position ${readPos}`)
+      throw new Error(`Write position ${nextPos} cannot equal read position ${this.#readPos}`)
     }
     // Write the actual length of the data into the 4-byte header.
-    view.setInt32(writePos, dataLen, true)
-    writePos += 4 + dataLen
-    pending += 4 + dataLen
+    this.#int32[this.#writePos >> 2] = dataLen
+    this.#writePos += 4 + alignedLen
+    this.#pending += 4 + alignedLen
     // This is the "corking" optimization. Instead of calling Atomics.store
     // on every write, we batch them. We either write when a certain
     // amount of data is pending (HWM_BYTES) or at the end of the current
     // event loop tick. This drastically reduces atomic operation overhead.
-    if (pending >= HWM_BYTES) {
-      Atomics.store(state, WRITE_INDEX, writePos)
-      pending = 0
-    } else if (corked === 0) {
-      corked += 1
-      setImmediate(_uncork)
+    if (this.#pending >= HWM_BYTES) {
+      Atomics.store(this.#state, WRITE_INDEX, this.#writePos)
+      this.#pending = 0
+    } else if (this.#corked === 0) {
+      this.#corked += 1
+      setImmediate(this.#uncorkBound)
     }
   }
   /**
-   * Public write method. Acquires space and synchronously writes data with a timeout. Will
-   * wait until space is available.
+   * Synchronously writes a message. Blocks (via `Atomics.wait`) until buffer space is available.
    * Writing more than "len" bytes in the callback will cause undefined behavior.
    */
-  function writeSync   (
-    len        ,
-    fn                                            ,
-    opaque    ,
-  )       {
+  writeSync   (len        , fn                                            , opaque    ) {
     if (typeof len !== 'number') {
       throw new TypeError('"len" must be a non-negative number')
     }
     if (len < 0) {
       throw new RangeError(`"len" ${len} is negative`)
     }
+    const size = this.#size
     if (len >= 2 ** 31 || len > size - 8) {
       throw new Error(`"len" ${len} exceeds maximum allowed size ${size - 8}`)
     }
@@ -404,48 +399,53 @@ export function writer(
       throw new TypeError('"fn" must be a function')
     }
-    if (!_acquire(len)) {
+    if (!this.#acquire(len)) {
       const startTime = performance.now()
       let yieldCount = 0
       let yieldTime = 0
-      for (let n = 0; !_acquire(len); n++) {
+      for (let n = 0; !this.#acquire(len); n++) {
         if (performance.now() - startTime > 60e3) {
           throw new Error('Timeout while waiting for space in the buffer')
         }
-        _yield(3)
+        this.#yield(3)
         yieldCount += 1
         yieldTime += 3
       }
       const elapsedTime = performance.now() - startTime
-      logger?.warn(
-        { yieldLength: len, readPos, writePos, elapsedTime, yieldCount, yieldTime },
+      this.#logger?.warn(
+        {
+          yieldLength: len,
+          readPos: this.#readPos,
+          writePos: this.#writePos,
+          elapsedTime,
+          yieldCount,
+          yieldTime,
+        },
         'yielded',
       )
     }
-    _write(len, fn, opaque)
+    this.#write(len, fn, opaque)
-    if (writePos === readPos) {
-      throw new Error(`Write position ${writePos} cannot equal read position ${readPos}`)
+    if (this.#writePos === this.#readPos) {
+      throw new Error(
+        `Write position ${this.#writePos} cannot equal read position ${this.#readPos}`,
+      )
     }
   }
   /**
-   * Public write method. Acquires space and tries to write data.
+   * Non-blocking write attempt. Returns `false` if the buffer is full.
    * Writing more than "len" bytes in the callback will cause undefined behavior.
    */
-  function tryWrite   (
-    len        ,
-    fn                                            ,
-    opaque    ,
-  )          {
+  tryWrite   (len        , fn                                            , opaque    ) {
     if (typeof len !== 'number') {
       throw new TypeError('"len" must be a non-negative number')
     }
     if (len < 0) {
       throw new RangeError(`"len" ${len} is negative`)
     }
+    const size = this.#size
     if (len >= 2 ** 31 || len > size - 8) {
       throw new Error(`"len" ${len} exceeds maximum allowed size ${size - 8}`)
     }
@@ -453,29 +453,54 @@ export function writer(
       throw new TypeError('"fn" must be a function')
     }
-    if (!_acquire(len)) {
+    if (!this.#acquire(len)) {
       return false
     }
-    _write(len, fn, opaque)
+    this.#write(len, fn, opaque)
-    if (writePos === readPos) {
-      throw new Error(`Write position ${writePos} cannot equal read position ${readPos}`)
+    if (this.#writePos === this.#readPos) {
+      throw new Error(
+        `Write position ${this.#writePos} cannot equal read position ${this.#readPos}`,
+      )
     }
     return true
   }
-  function cork   (callback          )                {
-    corked += 1
+  /**
+   * Batches multiple writes within the callback. The write pointer is only
+   * published to the reader when cork returns, reducing atomic operation overhead.
+   */
+  cork   (callback          ) {
+    this.#corked += 1
     if (callback != null) {
       try {
         return callback()
       } finally {
-        _uncork()
+        this.uncork()
       }
     }
   }
-  return { tryWrite, writeSync, cork, uncork: _uncork, flushSync: _flush }
+  /**
+   * Publishes the pending write position to the reader.
+   */
+  uncork() {
+    if (this.#corked === 0) {
+      return
+    }
+    this.#corked -= 1
+    if (this.#corked === 0) {
+      this.flushSync()
+    }
+  }
+  flushSync() {
+    if (this.#pending > 0) {
+      Atomics.store(this.#state, WRITE_INDEX, this.#writePos)
+      this.#pending = 0
+    }
+  }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@nxtedition/shared",
-  "version": "3.0.2",
+  "version": "4.0.1",
   "type": "module",
   "main": "lib/index.js",
   "types": "lib/index.d.ts",
@@ -26,6 +26,5 @@
     "oxlint-tsgolint": "^0.13.0",
     "rimraf": "^6.1.3",
     "typescript": "^5.9.3"
-  },
-  "gitHead": "3648df9e97a19a6ebdf497afb1845a01b5301460"
+  }
 }