thread-stream 4.1.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(npx tap:*)",
5
+ "Bash(node:*)",
6
+ "Bash(for i in 1 2 3 4 5)",
7
+ "Bash(do)",
8
+ "Bash(echo:*)",
9
+ "Bash(done)",
10
+ "Bash(npm test:*)"
11
+ ],
12
+ "deny": [],
13
+ "ask": []
14
+ }
15
+ }
package/index.js CHANGED
@@ -8,18 +8,33 @@ const { pathToFileURL } = require('url')
8
8
  const { wait } = require('./lib/wait')
9
9
  const {
10
10
  WRITE_INDEX,
11
- READ_INDEX
11
+ READ_INDEX,
12
+ SEQ_INDEX
12
13
  } = require('./lib/indexes')
13
14
  const buffer = require('buffer')
14
15
  const assert = require('assert')
15
16
 
16
17
  const kImpl = Symbol('kImpl')
17
18
 
18
- // V8 limit for string size
19
+ // Maximum pending buffered data before forcing a synchronous drain
19
20
  const MAX_STRING = buffer.constants.MAX_STRING_LENGTH
20
21
 
21
22
  function noop () {}
22
23
 
24
+ function updateState (stream, fn) {
25
+ Atomics.add(stream[kImpl].state, SEQ_INDEX, 1)
26
+ fn()
27
+ Atomics.add(stream[kImpl].state, SEQ_INDEX, 1)
28
+ Atomics.notify(stream[kImpl].state, SEQ_INDEX)
29
+ }
30
+
31
+ function resetIndexes (stream) {
32
+ updateState(stream, () => {
33
+ Atomics.store(stream[kImpl].state, READ_INDEX, 0)
34
+ Atomics.store(stream[kImpl].state, WRITE_INDEX, 0)
35
+ })
36
+ }
37
+
23
38
  class FakeWeakRef {
24
39
  constructor (value) {
25
40
  this._value = value
@@ -93,66 +108,46 @@ function drain (stream) {
93
108
  }
94
109
 
95
110
  function nextFlush (stream) {
96
- const writeIndex = Atomics.load(stream[kImpl].state, WRITE_INDEX)
97
- let leftover = stream[kImpl].data.length - writeIndex
111
+ while (true) {
112
+ const writeIndex = Atomics.load(stream[kImpl].state, WRITE_INDEX)
113
+ const leftover = stream[kImpl].data.length - writeIndex
114
+
115
+ if (leftover > 0) {
116
+ if (stream[kImpl].bufLen === 0) {
117
+ stream[kImpl].flushing = false
98
118
 
99
- if (leftover > 0) {
100
- if (stream[kImpl].buf.length === 0) {
101
- stream[kImpl].flushing = false
119
+ if (stream[kImpl].ending) {
120
+ end(stream)
121
+ } else if (stream[kImpl].needDrain) {
122
+ process.nextTick(drain, stream)
123
+ }
102
124
 
103
- if (stream[kImpl].ending) {
104
- end(stream)
105
- } else if (stream[kImpl].needDrain) {
106
- process.nextTick(drain, stream)
125
+ return
107
126
  }
108
127
 
109
- return
128
+ write(stream, leftover, noop)
129
+ continue
110
130
  }
111
131
 
112
- let toWrite = stream[kImpl].buf.slice(0, leftover)
113
- let toWriteBytes = Buffer.byteLength(toWrite)
114
- if (toWriteBytes <= leftover) {
115
- stream[kImpl].buf = stream[kImpl].buf.slice(leftover)
116
- // process._rawDebug('writing ' + toWrite.length)
117
- write(stream, toWrite, nextFlush.bind(null, stream))
118
- } else {
119
- // multi-byte utf-8
132
+ if (leftover === 0) {
133
+ if (writeIndex === 0 && stream[kImpl].bufLen === 0) {
134
+ // we had a flushSync in the meanwhile
135
+ return
136
+ }
120
137
  waitForRead(stream, () => {
121
- // err is already handled in waitForRead()
122
138
  if (stream.destroyed) {
123
139
  return
124
140
  }
125
141
 
126
- Atomics.store(stream[kImpl].state, READ_INDEX, 0)
127
- Atomics.store(stream[kImpl].state, WRITE_INDEX, 0)
128
- Atomics.notify(stream[kImpl].state, READ_INDEX)
129
-
130
- // Find a toWrite length that fits the buffer
131
- // it must exists as the buffer is at least 4 bytes length
132
- // and the max utf-8 length for a char is 4 bytes.
133
- while (toWriteBytes > stream[kImpl].data.length) {
134
- leftover = leftover / 2
135
- toWrite = stream[kImpl].buf.slice(0, leftover)
136
- toWriteBytes = Buffer.byteLength(toWrite)
137
- }
138
- stream[kImpl].buf = stream[kImpl].buf.slice(leftover)
139
- write(stream, toWrite, nextFlush.bind(null, stream))
142
+ resetIndexes(stream)
143
+ nextFlush(stream)
140
144
  })
141
- }
142
- } else if (leftover === 0) {
143
- if (writeIndex === 0 && stream[kImpl].buf.length === 0) {
144
- // we had a flushSync in the meanwhile
145
145
  return
146
146
  }
147
- waitForRead(stream, () => {
148
- Atomics.store(stream[kImpl].state, READ_INDEX, 0)
149
- Atomics.store(stream[kImpl].state, WRITE_INDEX, 0)
150
- Atomics.notify(stream[kImpl].state, READ_INDEX)
151
- nextFlush(stream)
152
- })
153
- } else {
147
+
154
148
  // This should never happen
155
149
  destroy(stream, new Error('overwritten'))
150
+ return
156
151
  }
157
152
  }
158
153
 
@@ -248,7 +243,9 @@ class ThreadStream extends EventEmitter {
248
243
  this[kImpl].finished = false
249
244
  this[kImpl].errored = null
250
245
  this[kImpl].closed = false
251
- this[kImpl].buf = ''
246
+ this[kImpl].buf = []
247
+ this[kImpl].bufHead = 0
248
+ this[kImpl].bufLen = 0
252
249
  this[kImpl].flushCallbacks = new Map()
253
250
  this[kImpl].nextFlushId = 0
254
251
 
@@ -260,6 +257,7 @@ class ThreadStream extends EventEmitter {
260
257
  }
261
258
 
262
259
  write (data) {
260
+ const dataBuf = Buffer.isBuffer(data) ? data : Buffer.from(data)
263
261
  if (this[kImpl].destroyed) {
264
262
  error(this, new Error('the worker has exited'))
265
263
  return false
@@ -270,7 +268,7 @@ class ThreadStream extends EventEmitter {
270
268
  return false
271
269
  }
272
270
 
273
- if (this[kImpl].flushing && this[kImpl].buf.length + data.length >= MAX_STRING) {
271
+ if (this[kImpl].flushing && this[kImpl].bufLen + dataBuf.length >= MAX_STRING) {
274
272
  try {
275
273
  writeSync(this)
276
274
  this[kImpl].flushing = true
@@ -280,7 +278,8 @@ class ThreadStream extends EventEmitter {
280
278
  }
281
279
  }
282
280
 
283
- this[kImpl].buf += data
281
+ this[kImpl].buf.push(dataBuf)
282
+ this[kImpl].bufLen += dataBuf.length
284
283
 
285
284
  if (this[kImpl].sync) {
286
285
  try {
@@ -297,7 +296,7 @@ class ThreadStream extends EventEmitter {
297
296
  setImmediate(nextFlush, this)
298
297
  }
299
298
 
300
- this[kImpl].needDrain = this[kImpl].data.length - this[kImpl].buf.length - Atomics.load(this[kImpl].state, WRITE_INDEX) <= 0
299
+ this[kImpl].needDrain = this[kImpl].data.length - this[kImpl].bufLen - Atomics.load(this[kImpl].state, WRITE_INDEX) <= 0
301
300
  return !this[kImpl].needDrain
302
301
  }
303
302
 
@@ -383,7 +382,7 @@ function flushBuffer (stream, cb) {
383
382
  return
384
383
  }
385
384
 
386
- if (!stream[kImpl].sync && (stream[kImpl].flushing || stream[kImpl].buf.length > 0)) {
385
+ if (!stream[kImpl].sync && (stream[kImpl].flushing || stream[kImpl].bufLen > 0)) {
387
386
  setImmediate(flushBuffer, stream, cb)
388
387
  return
389
388
  }
@@ -497,13 +496,43 @@ function destroy (stream, err) {
497
496
  }
498
497
  }
499
498
 
500
- function write (stream, data, cb) {
499
+ function write (stream, maxBytes, cb) {
501
500
  // data is smaller than the shared buffer length
502
501
  const current = Atomics.load(stream[kImpl].state, WRITE_INDEX)
503
- const length = Buffer.byteLength(data)
504
- stream[kImpl].data.write(data, current)
505
- Atomics.store(stream[kImpl].state, WRITE_INDEX, current + length)
506
- Atomics.notify(stream[kImpl].state, WRITE_INDEX)
502
+ let offset = current
503
+ let remaining = maxBytes
504
+
505
+ while (remaining > 0 && stream[kImpl].bufLen !== 0) {
506
+ const head = stream[kImpl].bufHead
507
+ const buf = stream[kImpl].buf[head]
508
+
509
+ if (buf.length <= remaining) {
510
+ buf.copy(stream[kImpl].data, offset)
511
+ offset += buf.length
512
+ remaining -= buf.length
513
+ stream[kImpl].bufLen -= buf.length
514
+ stream[kImpl].bufHead = head + 1
515
+
516
+ if (stream[kImpl].bufHead === stream[kImpl].buf.length) {
517
+ stream[kImpl].buf.length = 0
518
+ stream[kImpl].bufHead = 0
519
+ } else if (stream[kImpl].bufHead >= 1024 && stream[kImpl].bufHead * 2 >= stream[kImpl].buf.length) {
520
+ stream[kImpl].buf.splice(0, stream[kImpl].bufHead)
521
+ stream[kImpl].bufHead = 0
522
+ }
523
+ continue
524
+ }
525
+
526
+ buf.copy(stream[kImpl].data, offset, 0, remaining)
527
+ stream[kImpl].buf[head] = buf.subarray(remaining)
528
+ stream[kImpl].bufLen -= remaining
529
+ offset += remaining
530
+ remaining = 0
531
+ }
532
+
533
+ updateState(stream, () => {
534
+ Atomics.store(stream[kImpl].state, WRITE_INDEX, offset)
535
+ })
507
536
  cb()
508
537
  return true
509
538
  }
@@ -520,9 +549,10 @@ function end (stream) {
520
549
  let readIndex = Atomics.load(stream[kImpl].state, READ_INDEX)
521
550
 
522
551
  // process._rawDebug('writing index')
523
- Atomics.store(stream[kImpl].state, WRITE_INDEX, -1)
552
+ updateState(stream, () => {
553
+ Atomics.store(stream[kImpl].state, WRITE_INDEX, -1)
554
+ })
524
555
  // process._rawDebug(`(end) readIndex (${Atomics.load(stream.state, READ_INDEX)}) writeIndex (${Atomics.load(stream.state, WRITE_INDEX)})`)
525
- Atomics.notify(stream[kImpl].state, WRITE_INDEX)
526
556
 
527
557
  // Wait for the process to complete
528
558
  let spins = 0
@@ -562,44 +592,19 @@ function writeSync (stream) {
562
592
  }
563
593
  stream[kImpl].flushing = false
564
594
 
565
- while (stream[kImpl].buf.length !== 0) {
595
+ while (stream[kImpl].bufLen !== 0) {
566
596
  const writeIndex = Atomics.load(stream[kImpl].state, WRITE_INDEX)
567
- let leftover = stream[kImpl].data.length - writeIndex
597
+ const leftover = stream[kImpl].data.length - writeIndex
568
598
  if (leftover === 0) {
569
599
  flushSync(stream)
570
- Atomics.store(stream[kImpl].state, READ_INDEX, 0)
571
- Atomics.store(stream[kImpl].state, WRITE_INDEX, 0)
572
- Atomics.notify(stream[kImpl].state, READ_INDEX)
600
+ resetIndexes(stream)
573
601
  continue
574
602
  } else if (leftover < 0) {
575
603
  // stream should never happen
576
604
  throw new Error('overwritten')
577
605
  }
578
606
 
579
- let toWrite = stream[kImpl].buf.slice(0, leftover)
580
- let toWriteBytes = Buffer.byteLength(toWrite)
581
- if (toWriteBytes <= leftover) {
582
- stream[kImpl].buf = stream[kImpl].buf.slice(leftover)
583
- // process._rawDebug('writing ' + toWrite.length)
584
- write(stream, toWrite, cb)
585
- } else {
586
- // multi-byte utf-8
587
- flushSync(stream)
588
- Atomics.store(stream[kImpl].state, READ_INDEX, 0)
589
- Atomics.store(stream[kImpl].state, WRITE_INDEX, 0)
590
- Atomics.notify(stream[kImpl].state, READ_INDEX)
591
-
592
- // Find a toWrite length that fits the buffer
593
- // it must exists as the buffer is at least 4 bytes length
594
- // and the max utf-8 length for a char is 4 bytes.
595
- while (toWriteBytes > stream[kImpl].buf.length) {
596
- leftover = leftover / 2
597
- toWrite = stream[kImpl].buf.slice(0, leftover)
598
- toWriteBytes = Buffer.byteLength(toWrite)
599
- }
600
- stream[kImpl].buf = stream[kImpl].buf.slice(leftover)
601
- write(stream, toWrite, cb)
602
- }
607
+ write(stream, leftover, cb)
603
608
  }
604
609
  }
605
610
 
package/lib/indexes.js CHANGED
@@ -1,9 +1,11 @@
1
1
  'use strict'
2
2
 
3
+ const SEQ_INDEX = 2
3
4
  const WRITE_INDEX = 4
4
5
  const READ_INDEX = 8
5
6
 
6
7
  module.exports = {
7
8
  WRITE_INDEX,
8
- READ_INDEX
9
+ READ_INDEX,
10
+ SEQ_INDEX
9
11
  }
package/lib/wait.js CHANGED
@@ -50,12 +50,21 @@ function waitDiff (state, index, expected, timeout, done) {
50
50
  return
51
51
  }
52
52
 
53
- // Wait for value to change from expected
53
+ // Wait for value to change from expected.
54
+ // If we are notified, resume immediately even if the value cycled back
55
+ // to the same number before we could re-read it.
54
56
  const remaining = max === Infinity ? WAIT_MS : Math.min(WAIT_MS, Math.max(1, max - Date.now()))
55
57
  const result = Atomics.waitAsync(state, index, expected, remaining)
56
58
 
57
59
  if (result.async) {
58
- result.value.then(check)
60
+ result.value.then((res) => {
61
+ if (res === 'ok') {
62
+ done(null, 'ok')
63
+ return
64
+ }
65
+
66
+ check()
67
+ })
59
68
  } else {
60
69
  // Value already changed (not-equal) - recheck on next tick
61
70
  setImmediate(check)
package/lib/worker.js CHANGED
@@ -2,7 +2,8 @@
2
2
 
3
3
  const { realImport, realRequire } = require('real-require')
4
4
  const { workerData, parentPort } = require('worker_threads')
5
- const { WRITE_INDEX, READ_INDEX } = require('./indexes')
5
+ const { StringDecoder } = require('string_decoder')
6
+ const { WRITE_INDEX, READ_INDEX, SEQ_INDEX } = require('./indexes')
6
7
  const { waitDiff } = require('./wait')
7
8
 
8
9
  const {
@@ -17,6 +18,7 @@ let flushing = false
17
18
 
18
19
  const state = new Int32Array(stateBuf)
19
20
  const data = Buffer.from(dataBuf)
21
+ const decoder = new StringDecoder('utf8')
20
22
 
21
23
  // Keep the event loop alive - Atomics.waitAsync promises don't prevent worker exit
22
24
  const keepAlive = setInterval(() => {}, 60 * 60 * 1000)
@@ -207,18 +209,30 @@ start().then(function () {
207
209
  process.nextTick(run)
208
210
  })
209
211
 
212
+ function readState () {
213
+ while (true) {
214
+ const seq = Atomics.load(state, SEQ_INDEX)
215
+
216
+ if ((seq & 1) !== 0) {
217
+ continue
218
+ }
219
+
220
+ const current = Atomics.load(state, READ_INDEX)
221
+ const end = Atomics.load(state, WRITE_INDEX)
222
+
223
+ if (seq === Atomics.load(state, SEQ_INDEX)) {
224
+ return { current, end, seq }
225
+ }
226
+ }
227
+ }
228
+
210
229
  function run () {
211
- const current = Atomics.load(state, READ_INDEX)
212
- const end = Atomics.load(state, WRITE_INDEX)
230
+ const { current, end, seq } = readState()
213
231
 
214
232
  // process._rawDebug(`pre state ${current} ${end}`)
215
233
 
216
234
  if (end === current) {
217
- if (end === data.length) {
218
- waitDiff(state, READ_INDEX, end, Infinity, run)
219
- } else {
220
- waitDiff(state, WRITE_INDEX, end, Infinity, run)
221
- }
235
+ waitDiff(state, SEQ_INDEX, seq, Infinity, run)
222
236
  return
223
237
  }
224
238
 
@@ -226,11 +240,15 @@ function run () {
226
240
 
227
241
  if (end === -1) {
228
242
  // process._rawDebug('end')
243
+ const remaining = decoder.end()
244
+ if (remaining.length > 0) {
245
+ destination.write(remaining)
246
+ }
229
247
  destination.end()
230
248
  return
231
249
  }
232
250
 
233
- const toWrite = data.toString('utf8', current, end)
251
+ const toWrite = decoder.write(data.subarray(current, end))
234
252
  // process._rawDebug('worker writing: ' + toWrite)
235
253
 
236
254
  const res = destination.write(toWrite)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "thread-stream",
3
- "version": "4.1.0",
3
+ "version": "4.2.0",
4
4
  "description": "A streaming way to send data to a Node.js Worker Thread",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
@@ -18,7 +18,7 @@
18
18
  "eslint": "^9.39.1",
19
19
  "fastbench": "^1.0.1",
20
20
  "neostandard": "^0.13.0",
21
- "pino-elasticsearch": "^8.0.0",
21
+ "pino-elasticsearch": "^9.0.0",
22
22
  "sonic-boom": "^5.0.0",
23
23
  "ts-node": "^10.8.0",
24
24
  "typescript": "~5.7.3"
@@ -0,0 +1,33 @@
1
+ import { test } from 'node:test'
2
+ import assert from 'node:assert/strict'
3
+ import { readFile } from 'node:fs/promises'
4
+ import ThreadStream from '../index.js'
5
+ import { join } from 'desm'
6
+ import { file } from './helper.js'
7
+
8
+ test('preserves multibyte records that cross the buffer boundary', async () => {
9
+ const dest = file()
10
+ const stream = new ThreadStream({
11
+ bufferSize: 128,
12
+ filename: join(import.meta.url, 'to-file.js'),
13
+ workerData: { dest },
14
+ sync: false
15
+ })
16
+
17
+ let expected = ''
18
+
19
+ for (let i = 0; i < 1000; i++) {
20
+ const line = `{"idx":${i},"alert":"🚨"}\n`
21
+ expected += line
22
+ stream.write(line)
23
+ }
24
+
25
+ await new Promise((resolve, reject) => {
26
+ stream.once('error', reject)
27
+ stream.once('close', resolve)
28
+ stream.end()
29
+ })
30
+
31
+ const data = await readFile(dest, 'utf8')
32
+ assert.strictEqual(data, expected)
33
+ })