@naturalcycles/nodejs-lib 15.23.0 → 15.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,6 @@
1
1
  import { type Transform } from 'node:stream';
2
+ import type { ReadableStream as WebReadableStream } from 'node:stream/web';
3
+ import { type ZlibOptions } from 'node:zlib';
2
4
  import type { AbortableAsyncMapper, AsyncIndexedMapper, AsyncPredicate, END, IndexedMapper, NonNegativeInteger, PositiveInteger, Predicate, SKIP } from '@naturalcycles/js-lib/types';
3
5
  import type { ReadableTyped, TransformOptions, TransformTyped, WritableTyped } from './stream.model.js';
4
6
  import { type TransformLogProgressOptions } from './transform/transformLogProgress.js';
@@ -13,15 +15,18 @@ export declare class Pipeline<T> {
13
15
  private transforms;
14
16
  private destination?;
15
17
  private readableLimit?;
18
+ private objectMode;
16
19
  private abortableSignal;
17
20
  private constructor();
18
21
  static from<T>(source: ReadableTyped<T>): Pipeline<T>;
22
+ static fromWeb<T>(webReadableStream: WebReadableStream<T>): Pipeline<T>;
19
23
  /**
20
24
  * Technically same as `fromIterable` (since Array is Iterable),
21
25
  * but named a bit friendlier.
22
26
  */
23
27
  static fromArray<T>(input: T[]): Pipeline<T>;
24
28
  static fromIterable<T>(input: Iterable<T> | AsyncIterable<T>): Pipeline<T>;
29
+ static fromFile(sourceFilePath: string): Pipeline<Uint8Array>;
25
30
  static fromNDJsonFile<T>(sourceFilePath: string): Pipeline<T>;
26
31
  /**
27
32
  * Limits the source Readable, but using `.take(limit)` on it.
@@ -69,6 +74,17 @@ export declare class Pipeline<T> {
69
74
  * No runtime effect.
70
75
  */
71
76
  typeCastAs<TO>(): Pipeline<TO>;
77
+ setObjectMode(objectMode: boolean): this;
78
+ /**
79
+ * Transform the stream of Objects into a stream of JSON lines.
80
+ * Technically, it goes into objectMode=false, so it's a binary stream at the end.
81
+ */
82
+ toNDJson(): Pipeline<Uint8Array>;
83
+ parseNDJson<TO = unknown>(this: Pipeline<Uint8Array>): Pipeline<TO>;
84
+ splitOnNewline(this: Pipeline<Uint8Array>): Pipeline<Buffer>;
85
+ parseJson<TO = unknown>(this: Pipeline<Buffer> | Pipeline<Uint8Array> | Pipeline<string>): Pipeline<TO>;
86
+ gzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array>;
87
+ gunzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array>;
72
88
  toArray(opt?: TransformOptions): Promise<T[]>;
73
89
  toFile(outputFilePath: string): Promise<void>;
74
90
  toNDJsonFile(outputFilePath: string): Promise<void>;
@@ -1,9 +1,11 @@
1
1
  import { Readable } from 'node:stream';
2
2
  import { pipeline } from 'node:stream/promises';
3
+ import { createUnzip } from 'node:zlib';
3
4
  import { createGzip } from 'node:zlib';
4
5
  import { createAbortableSignal } from '@naturalcycles/js-lib';
5
6
  import { fs2 } from '../fs/fs2.js';
6
7
  import { createReadStreamAsNDJSON } from './ndjson/createReadStreamAsNDJSON.js';
8
+ import { transformJsonParse } from './ndjson/transformJsonParse.js';
7
9
  import { transformToNDJson } from './ndjson/transformToNDJson.js';
8
10
  import { PIPELINE_GRACEFUL_ABORT } from './stream.util.js';
9
11
  import { transformChunk } from './transform/transformChunk.js';
@@ -15,6 +17,7 @@ import { transformMap } from './transform/transformMap.js';
15
17
  import { transformMapSimple, } from './transform/transformMapSimple.js';
16
18
  import { transformMapSync } from './transform/transformMapSync.js';
17
19
  import { transformOffset } from './transform/transformOffset.js';
20
+ import { transformSplitOnNewline } from './transform/transformSplit.js';
18
21
  import { transformTap } from './transform/transformTap.js';
19
22
  import { transformThrottle } from './transform/transformThrottle.js';
20
23
  import { writablePushToArray } from './writable/writablePushToArray.js';
@@ -25,13 +28,19 @@ export class Pipeline {
25
28
  transforms = [];
26
29
  destination;
27
30
  readableLimit;
31
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
32
+ objectMode;
28
33
  abortableSignal = createAbortableSignal();
29
- constructor(source) {
34
+ constructor(source, objectMode = true) {
30
35
  this.source = source;
36
+ this.objectMode = objectMode;
31
37
  }
32
38
  static from(source) {
33
39
  return new Pipeline(source);
34
40
  }
41
+ static fromWeb(webReadableStream) {
42
+ return new Pipeline(Readable.fromWeb(webReadableStream));
43
+ }
35
44
  /**
36
45
  * Technically same as `fromIterable` (since Array is Iterable),
37
46
  * but named a bit friendlier.
@@ -42,6 +51,9 @@ export class Pipeline {
42
51
  static fromIterable(input) {
43
52
  return new Pipeline(Readable.from(input));
44
53
  }
54
+ static fromFile(sourceFilePath) {
55
+ return new Pipeline(fs2.createReadStream(sourceFilePath), false);
56
+ }
45
57
  static fromNDJsonFile(sourceFilePath) {
46
58
  return new Pipeline(createReadStreamAsNDJSON(sourceFilePath));
47
59
  }
@@ -156,6 +168,50 @@ export class Pipeline {
156
168
  typeCastAs() {
157
169
  return this;
158
170
  }
171
+ setObjectMode(objectMode) {
172
+ this.objectMode = objectMode;
173
+ return this;
174
+ }
175
+ /**
176
+ * Transform the stream of Objects into a stream of JSON lines.
177
+ * Technically, it goes into objectMode=false, so it's a binary stream at the end.
178
+ */
179
+ toNDJson() {
180
+ this.transforms.push(transformToNDJson());
181
+ this.objectMode = false;
182
+ return this;
183
+ }
184
+ parseNDJson() {
185
+ // It was said that transformJsonParse() separately is 10% or more slower than .map(line => JSON.parse(line))
186
+ // So, we can investigate a speedup
187
+ this.transforms.push(transformSplitOnNewline(), transformJsonParse());
188
+ this.objectMode = true;
189
+ return this;
190
+ }
191
+ splitOnNewline() {
192
+ // Input: objectMode=false - binary stream
193
+ // Output: objectMode=true - stream of Buffer objects (which are also strings?)
194
+ this.transforms.push(transformSplitOnNewline());
195
+ this.objectMode = true;
196
+ return this;
197
+ }
198
+ parseJson() {
199
+ // Input: objectMode=false - takes a stream of strings one by one
200
+ // Output: objectMode=true - stream of json-parsed Objects
201
+ this.transforms.push(transformJsonParse());
202
+ this.objectMode = true;
203
+ return this;
204
+ }
205
+ gzip(opt) {
206
+ this.transforms.push(createGzip(opt));
207
+ this.objectMode = false;
208
+ return this;
209
+ }
210
+ gunzip(opt) {
211
+ this.transforms.push(createUnzip(opt));
212
+ this.objectMode = false;
213
+ return this;
214
+ }
159
215
  async toArray(opt) {
160
216
  const arr = [];
161
217
  this.destination = writablePushToArray(arr, opt);
@@ -11,8 +11,9 @@ import { Transform } from 'node:stream';
11
11
  export function transformSplitOnNewline() {
12
12
  let buffered;
13
13
  return new Transform({
14
- readableObjectMode: true,
14
+ writableObjectMode: false,
15
15
  writableHighWaterMark: 64 * 1024,
16
+ readableObjectMode: true,
16
17
  transform(buf, _enc, done) {
17
18
  let offset = 0;
18
19
  let lastMatch = 0;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@naturalcycles/nodejs-lib",
3
3
  "type": "module",
4
- "version": "15.23.0",
4
+ "version": "15.25.0",
5
5
  "dependencies": {
6
6
  "@naturalcycles/js-lib": "^15",
7
7
  "@types/js-yaml": "^4",
@@ -1,5 +1,7 @@
1
1
  import { Readable, type Transform } from 'node:stream'
2
2
  import { pipeline } from 'node:stream/promises'
3
+ import type { ReadableStream as WebReadableStream } from 'node:stream/web'
4
+ import { createUnzip, type ZlibOptions } from 'node:zlib'
3
5
  import { createGzip } from 'node:zlib'
4
6
  import { createAbortableSignal } from '@naturalcycles/js-lib'
5
7
  import type {
@@ -16,6 +18,7 @@ import type {
16
18
  } from '@naturalcycles/js-lib/types'
17
19
  import { fs2 } from '../fs/fs2.js'
18
20
  import { createReadStreamAsNDJSON } from './ndjson/createReadStreamAsNDJSON.js'
21
+ import { transformJsonParse } from './ndjson/transformJsonParse.js'
19
22
  import { transformToNDJson } from './ndjson/transformToNDJson.js'
20
23
  import type {
21
24
  ReadableTyped,
@@ -39,6 +42,7 @@ import {
39
42
  } from './transform/transformMapSimple.js'
40
43
  import { transformMapSync, type TransformMapSyncOptions } from './transform/transformMapSync.js'
41
44
  import { transformOffset, type TransformOffsetOptions } from './transform/transformOffset.js'
45
+ import { transformSplitOnNewline } from './transform/transformSplit.js'
42
46
  import { transformTap, type TransformTapOptions } from './transform/transformTap.js'
43
47
  import { transformThrottle, type TransformThrottleOptions } from './transform/transformThrottle.js'
44
48
  import { writablePushToArray } from './writable/writablePushToArray.js'
@@ -50,16 +54,23 @@ export class Pipeline<T> {
50
54
  private transforms: NodeJS.ReadWriteStream[] = []
51
55
  private destination?: NodeJS.WritableStream
52
56
  private readableLimit?: Integer
57
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
58
+ private objectMode: boolean
53
59
  private abortableSignal = createAbortableSignal()
54
60
 
55
- private constructor(source: ReadableTyped<T>) {
61
+ private constructor(source: ReadableTyped<T>, objectMode = true) {
56
62
  this.source = source
63
+ this.objectMode = objectMode
57
64
  }
58
65
 
59
66
  static from<T>(source: ReadableTyped<T>): Pipeline<T> {
60
67
  return new Pipeline(source)
61
68
  }
62
69
 
70
+ static fromWeb<T>(webReadableStream: WebReadableStream<T>): Pipeline<T> {
71
+ return new Pipeline(Readable.fromWeb(webReadableStream))
72
+ }
73
+
63
74
  /**
64
75
  * Technically same as `fromIterable` (since Array is Iterable),
65
76
  * but named a bit friendlier.
@@ -72,6 +83,10 @@ export class Pipeline<T> {
72
83
  return new Pipeline(Readable.from(input))
73
84
  }
74
85
 
86
+ static fromFile(sourceFilePath: string): Pipeline<Uint8Array> {
87
+ return new Pipeline(fs2.createReadStream(sourceFilePath), false)
88
+ }
89
+
75
90
  static fromNDJsonFile<T>(sourceFilePath: string): Pipeline<T> {
76
91
  return new Pipeline(createReadStreamAsNDJSON<T>(sourceFilePath))
77
92
  }
@@ -219,6 +234,59 @@ export class Pipeline<T> {
219
234
  return this as any
220
235
  }
221
236
 
237
+ setObjectMode(objectMode: boolean): this {
238
+ this.objectMode = objectMode
239
+ return this
240
+ }
241
+
242
+ /**
243
+ * Transform the stream of Objects into a stream of JSON lines.
244
+ * Technically, it goes into objectMode=false, so it's a binary stream at the end.
245
+ */
246
+ toNDJson(): Pipeline<Uint8Array> {
247
+ this.transforms.push(transformToNDJson())
248
+ this.objectMode = false
249
+ return this as any
250
+ }
251
+
252
+ parseNDJson<TO = unknown>(this: Pipeline<Uint8Array>): Pipeline<TO> {
253
+ // It was said that transformJsonParse() separately is 10% or more slower than .map(line => JSON.parse(line))
254
+ // So, we can investigate a speedup
255
+ this.transforms.push(transformSplitOnNewline(), transformJsonParse())
256
+ this.objectMode = true
257
+ return this as any
258
+ }
259
+
260
+ splitOnNewline(this: Pipeline<Uint8Array>): Pipeline<Buffer> {
261
+ // Input: objectMode=false - binary stream
262
+ // Output: objectMode=true - stream of Buffer objects (which are also strings?)
263
+ this.transforms.push(transformSplitOnNewline())
264
+ this.objectMode = true
265
+ return this as any
266
+ }
267
+
268
+ parseJson<TO = unknown>(
269
+ this: Pipeline<Buffer> | Pipeline<Uint8Array> | Pipeline<string>,
270
+ ): Pipeline<TO> {
271
+ // Input: objectMode=false - takes a stream of strings one by one
272
+ // Output: objectMode=true - stream of json-parsed Objects
273
+ this.transforms.push(transformJsonParse())
274
+ this.objectMode = true
275
+ return this as any
276
+ }
277
+
278
+ gzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array> {
279
+ this.transforms.push(createGzip(opt))
280
+ this.objectMode = false
281
+ return this as any
282
+ }
283
+
284
+ gunzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array> {
285
+ this.transforms.push(createUnzip(opt))
286
+ this.objectMode = false
287
+ return this as any
288
+ }
289
+
222
290
  async toArray(opt?: TransformOptions): Promise<T[]> {
223
291
  const arr: T[] = []
224
292
  this.destination = writablePushToArray(arr, opt)
@@ -15,8 +15,9 @@ export function transformSplitOnNewline(): TransformTyped<Buffer, Buffer> {
15
15
  let buffered: Buffer | undefined
16
16
 
17
17
  return new Transform({
18
- readableObjectMode: true,
18
+ writableObjectMode: false,
19
19
  writableHighWaterMark: 64 * 1024,
20
+ readableObjectMode: true,
20
21
 
21
22
  transform(buf: Buffer, _enc, done) {
22
23
  let offset = 0