@naturalcycles/nodejs-lib 15.24.0 → 15.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/dist/stream/index.d.ts +0 -5
  2. package/dist/stream/index.js +0 -5
  3. package/dist/stream/ndjson/ndjsonMap.d.ts +2 -0
  4. package/dist/stream/ndjson/ndjsonMap.js +2 -0
  5. package/dist/stream/pipeline.d.ts +3 -1
  6. package/dist/stream/pipeline.js +36 -7
  7. package/dist/stream/writable/writableVoid.d.ts +1 -8
  8. package/dist/stream/writable/writableVoid.js +0 -1
  9. package/package.json +1 -1
  10. package/src/stream/index.ts +0 -5
  11. package/src/stream/ndjson/ndjsonMap.ts +2 -0
  12. package/src/stream/pipeline.ts +48 -7
  13. package/src/stream/writable/writableVoid.ts +1 -10
  14. package/dist/stream/ndjson/createReadStreamAsNDJSON.d.ts +0 -19
  15. package/dist/stream/ndjson/createReadStreamAsNDJSON.js +0 -38
  16. package/dist/stream/ndjson/createWriteStreamAsNDJSON.d.ts +0 -11
  17. package/dist/stream/ndjson/createWriteStreamAsNDJSON.js +0 -27
  18. package/dist/stream/ndjson/ndjsonStreamForEach.d.ts +0 -10
  19. package/dist/stream/ndjson/ndjsonStreamForEach.js +0 -15
  20. package/dist/stream/readable/readableToArray.d.ts +0 -9
  21. package/dist/stream/readable/readableToArray.js +0 -17
  22. package/dist/stream/writable/writableForEach.d.ts +0 -12
  23. package/dist/stream/writable/writableForEach.js +0 -15
  24. package/dist/stream/writable/writableLimit.d.ts +0 -8
  25. package/dist/stream/writable/writableLimit.js +0 -25
  26. package/src/stream/ndjson/createReadStreamAsNDJSON.ts +0 -46
  27. package/src/stream/ndjson/createWriteStreamAsNDJSON.ts +0 -30
  28. package/src/stream/ndjson/ndjsonStreamForEach.ts +0 -28
  29. package/src/stream/readable/readableToArray.ts +0 -19
  30. package/src/stream/writable/writableForEach.ts +0 -25
  31. package/src/stream/writable/writableLimit.ts +0 -29
@@ -1,8 +1,5 @@
1
- export * from './ndjson/createReadStreamAsNDJSON.js';
2
- export * from './ndjson/createWriteStreamAsNDJSON.js';
3
1
  export * from './ndjson/ndjson.model.js';
4
2
  export * from './ndjson/ndjsonMap.js';
5
- export * from './ndjson/ndjsonStreamForEach.js';
6
3
  export * from './ndjson/transformJsonParse.js';
7
4
  export * from './ndjson/transformToNDJson.js';
8
5
  export * from './pipeline.js';
@@ -10,7 +7,6 @@ export * from './progressLogger.js';
10
7
  export * from './readable/readableCombined.js';
11
8
  export * from './readable/readableCreate.js';
12
9
  export * from './readable/readableFromArray.js';
13
- export * from './readable/readableToArray.js';
14
10
  export * from './stream.model.js';
15
11
  export * from './transform/transformChunk.js';
16
12
  export * from './transform/transformFilter.js';
@@ -30,7 +26,6 @@ export * from './transform/transformToArray.js';
30
26
  export * from './transform/worker/baseWorkerClass.js';
31
27
  export * from './transform/worker/transformMultiThreaded.js';
32
28
  export * from './transform/worker/transformMultiThreaded.model.js';
33
- export * from './writable/writableForEach.js';
34
29
  export * from './writable/writableFork.js';
35
30
  export * from './writable/writablePushToArray.js';
36
31
  export * from './writable/writableVoid.js';
@@ -1,8 +1,5 @@
1
- export * from './ndjson/createReadStreamAsNDJSON.js';
2
- export * from './ndjson/createWriteStreamAsNDJSON.js';
3
1
  export * from './ndjson/ndjson.model.js';
4
2
  export * from './ndjson/ndjsonMap.js';
5
- export * from './ndjson/ndjsonStreamForEach.js';
6
3
  export * from './ndjson/transformJsonParse.js';
7
4
  export * from './ndjson/transformToNDJson.js';
8
5
  export * from './pipeline.js';
@@ -10,7 +7,6 @@ export * from './progressLogger.js';
10
7
  export * from './readable/readableCombined.js';
11
8
  export * from './readable/readableCreate.js';
12
9
  export * from './readable/readableFromArray.js';
13
- export * from './readable/readableToArray.js';
14
10
  export * from './stream.model.js';
15
11
  export * from './transform/transformChunk.js';
16
12
  export * from './transform/transformFilter.js';
@@ -30,7 +26,6 @@ export * from './transform/transformToArray.js';
30
26
  export * from './transform/worker/baseWorkerClass.js';
31
27
  export * from './transform/worker/transformMultiThreaded.js';
32
28
  export * from './transform/worker/transformMultiThreaded.model.js';
33
- export * from './writable/writableForEach.js';
34
29
  export * from './writable/writableFork.js';
35
30
  export * from './writable/writablePushToArray.js';
36
31
  export * from './writable/writableVoid.js';
@@ -13,5 +13,7 @@ export interface NDJSONMapOptions<IN = any, OUT = IN> extends TransformMapOption
13
13
  /**
14
14
  * Unzips input file automatically, if it ends with `.gz`.
15
15
  * Zips output file automatically, if it ends with `.gz`.
16
+ *
17
+ * @deprecated use Pipeline directly
16
18
  */
17
19
  export declare function ndjsonMap<IN = any, OUT = any>(mapper: AbortableAsyncMapper<IN, OUT>, opt: NDJSONMapOptions<IN, OUT>): Promise<void>;
@@ -3,6 +3,8 @@ import { Pipeline } from '../pipeline.js';
3
3
  /**
4
4
  * Unzips input file automatically, if it ends with `.gz`.
5
5
  * Zips output file automatically, if it ends with `.gz`.
6
+ *
7
+ * @deprecated use Pipeline directly
6
8
  */
7
9
  export async function ndjsonMap(mapper, opt) {
8
10
  const { inputFilePath, outputFilePath, logEveryOutput = 100_000, limitInput, limitOutput } = opt;
@@ -26,8 +26,8 @@ export declare class Pipeline<T> {
26
26
  */
27
27
  static fromArray<T>(input: T[]): Pipeline<T>;
28
28
  static fromIterable<T>(input: Iterable<T> | AsyncIterable<T>): Pipeline<T>;
29
- static fromFile(sourceFilePath: string): Pipeline<Uint8Array>;
30
29
  static fromNDJsonFile<T>(sourceFilePath: string): Pipeline<T>;
30
+ static fromFile(sourceFilePath: string): Pipeline<Uint8Array>;
31
31
  /**
32
32
  * Limits the source Readable, but using `.take(limit)` on it.
33
33
  * This is THE preferred way of limiting the source.
@@ -81,6 +81,8 @@ export declare class Pipeline<T> {
81
81
  */
82
82
  toNDJson(): Pipeline<Uint8Array>;
83
83
  parseNDJson<TO = unknown>(this: Pipeline<Uint8Array>): Pipeline<TO>;
84
+ splitOnNewline(this: Pipeline<Uint8Array>): Pipeline<Buffer>;
85
+ parseJson<TO = unknown>(this: Pipeline<Buffer> | Pipeline<Uint8Array> | Pipeline<string>): Pipeline<TO>;
84
86
  gzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array>;
85
87
  gunzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array>;
86
88
  toArray(opt?: TransformOptions): Promise<T[]>;
@@ -4,7 +4,6 @@ import { createUnzip } from 'node:zlib';
4
4
  import { createGzip } from 'node:zlib';
5
5
  import { createAbortableSignal } from '@naturalcycles/js-lib';
6
6
  import { fs2 } from '../fs/fs2.js';
7
- import { createReadStreamAsNDJSON } from './ndjson/createReadStreamAsNDJSON.js';
8
7
  import { transformJsonParse } from './ndjson/transformJsonParse.js';
9
8
  import { transformToNDJson } from './ndjson/transformToNDJson.js';
10
9
  import { PIPELINE_GRACEFUL_ABORT } from './stream.util.js';
@@ -51,11 +50,21 @@ export class Pipeline {
51
50
  static fromIterable(input) {
52
51
  return new Pipeline(Readable.from(input));
53
52
  }
54
- static fromFile(sourceFilePath) {
55
- return new Pipeline(fs2.createReadStream(sourceFilePath), false);
56
- }
57
53
  static fromNDJsonFile(sourceFilePath) {
58
- return new Pipeline(createReadStreamAsNDJSON(sourceFilePath));
54
+ fs2.requireFileToExist(sourceFilePath);
55
+ const p = Pipeline.fromFile(sourceFilePath);
56
+ if (sourceFilePath.endsWith('.gz')) {
57
+ p.gunzip();
58
+ }
59
+ return p.parseJson();
60
+ // return stream.pipe(transformSplitOnNewline()).map(line => JSON.parse(line))
61
+ // For some crazy reason .map is much faster than transformJsonParse!
62
+ // ~5000 vs ~4000 rps !!!
63
+ }
64
+ static fromFile(sourceFilePath) {
65
+ return new Pipeline(fs2.createReadStream(sourceFilePath, {
66
+ highWaterMark: 64 * 1024, // no observed speedup
67
+ }), false);
59
68
  }
60
69
  /**
61
70
  * Limits the source Readable, but using `.take(limit)` on it.
@@ -188,13 +197,33 @@ export class Pipeline {
188
197
  this.objectMode = true;
189
198
  return this;
190
199
  }
200
+ splitOnNewline() {
201
+ // Input: objectMode=false - binary stream
202
+ // Output: objectMode=true - stream of Buffer objects (which are also strings?)
203
+ this.transforms.push(transformSplitOnNewline());
204
+ this.objectMode = true;
205
+ return this;
206
+ }
207
+ parseJson() {
208
+ // Input: objectMode=false - takes a stream of strings one by one
209
+ // Output: objectMode=true - stream of json-parsed Objects
210
+ this.transforms.push(transformJsonParse());
211
+ this.objectMode = true;
212
+ return this;
213
+ }
191
214
  gzip(opt) {
192
- this.transforms.push(createGzip(opt));
215
+ this.transforms.push(createGzip({
216
+ // chunkSize: 64 * 1024, // no observed speedup
217
+ ...opt,
218
+ }));
193
219
  this.objectMode = false;
194
220
  return this;
195
221
  }
196
222
  gunzip(opt) {
197
- this.transforms.push(createUnzip(opt));
223
+ this.transforms.push(createUnzip({
224
+ chunkSize: 64 * 1024, // speedup from ~3200 to 3800 rps!
225
+ ...opt,
226
+ }));
198
227
  this.objectMode = false;
199
228
  return this;
200
229
  }
@@ -1,15 +1,8 @@
1
1
  import { Writable } from 'node:stream';
2
- import type { DeferredPromise } from '@naturalcycles/js-lib/promise';
3
2
  import type { TransformOptions } from '../stream.model.js';
4
- export interface WritableVoidOptions extends TransformOptions {
5
- /**
6
- * If set - it will be Resolved when the Stream is done (after final.cb)
7
- */
8
- streamDone?: DeferredPromise;
9
- }
10
3
  /**
11
4
  * Use as a "null-terminator" of stream.pipeline.
12
5
  * It consumes the stream as quickly as possible without doing anything.
13
6
  * Put it in the end of your pipeline in case it ends with Transform that needs a consumer.
14
7
  */
15
- export declare function writableVoid(opt?: WritableVoidOptions): Writable;
8
+ export declare function writableVoid(opt?: TransformOptions): Writable;
@@ -13,7 +13,6 @@ export function writableVoid(opt = {}) {
13
13
  },
14
14
  final(cb) {
15
15
  cb();
16
- opt.streamDone?.resolve();
17
16
  },
18
17
  });
19
18
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@naturalcycles/nodejs-lib",
3
3
  "type": "module",
4
- "version": "15.24.0",
4
+ "version": "15.26.0",
5
5
  "dependencies": {
6
6
  "@naturalcycles/js-lib": "^15",
7
7
  "@types/js-yaml": "^4",
@@ -1,8 +1,5 @@
1
- export * from './ndjson/createReadStreamAsNDJSON.js'
2
- export * from './ndjson/createWriteStreamAsNDJSON.js'
3
1
  export * from './ndjson/ndjson.model.js'
4
2
  export * from './ndjson/ndjsonMap.js'
5
- export * from './ndjson/ndjsonStreamForEach.js'
6
3
  export * from './ndjson/transformJsonParse.js'
7
4
  export * from './ndjson/transformToNDJson.js'
8
5
  export * from './pipeline.js'
@@ -10,7 +7,6 @@ export * from './progressLogger.js'
10
7
  export * from './readable/readableCombined.js'
11
8
  export * from './readable/readableCreate.js'
12
9
  export * from './readable/readableFromArray.js'
13
- export * from './readable/readableToArray.js'
14
10
  export * from './stream.model.js'
15
11
  export * from './transform/transformChunk.js'
16
12
  export * from './transform/transformFilter.js'
@@ -30,7 +26,6 @@ export * from './transform/transformToArray.js'
30
26
  export * from './transform/worker/baseWorkerClass.js'
31
27
  export * from './transform/worker/transformMultiThreaded.js'
32
28
  export * from './transform/worker/transformMultiThreaded.model.js'
33
- export * from './writable/writableForEach.js'
34
29
  export * from './writable/writableFork.js'
35
30
  export * from './writable/writablePushToArray.js'
36
31
  export * from './writable/writableVoid.js'
@@ -21,6 +21,8 @@ export interface NDJSONMapOptions<IN = any, OUT = IN>
21
21
  /**
22
22
  * Unzips input file automatically, if it ends with `.gz`.
23
23
  * Zips output file automatically, if it ends with `.gz`.
24
+ *
25
+ * @deprecated use Pipeline directly
24
26
  */
25
27
  export async function ndjsonMap<IN = any, OUT = any>(
26
28
  mapper: AbortableAsyncMapper<IN, OUT>,
@@ -17,7 +17,6 @@ import type {
17
17
  SKIP,
18
18
  } from '@naturalcycles/js-lib/types'
19
19
  import { fs2 } from '../fs/fs2.js'
20
- import { createReadStreamAsNDJSON } from './ndjson/createReadStreamAsNDJSON.js'
21
20
  import { transformJsonParse } from './ndjson/transformJsonParse.js'
22
21
  import { transformToNDJson } from './ndjson/transformToNDJson.js'
23
22
  import type {
@@ -83,12 +82,26 @@ export class Pipeline<T> {
83
82
  return new Pipeline(Readable.from(input))
84
83
  }
85
84
 
86
- static fromFile(sourceFilePath: string): Pipeline<Uint8Array> {
87
- return new Pipeline(fs2.createReadStream(sourceFilePath), false)
85
+ static fromNDJsonFile<T>(sourceFilePath: string): Pipeline<T> {
86
+ fs2.requireFileToExist(sourceFilePath)
87
+
88
+ const p = Pipeline.fromFile(sourceFilePath)
89
+ if (sourceFilePath.endsWith('.gz')) {
90
+ p.gunzip()
91
+ }
92
+ return p.parseJson()
93
+ // return stream.pipe(transformSplitOnNewline()).map(line => JSON.parse(line))
94
+ // For some crazy reason .map is much faster than transformJsonParse!
95
+ // ~5000 vs ~4000 rps !!!
88
96
  }
89
97
 
90
- static fromNDJsonFile<T>(sourceFilePath: string): Pipeline<T> {
91
- return new Pipeline(createReadStreamAsNDJSON<T>(sourceFilePath))
98
+ static fromFile(sourceFilePath: string): Pipeline<Uint8Array> {
99
+ return new Pipeline(
100
+ fs2.createReadStream(sourceFilePath, {
101
+ highWaterMark: 64 * 1024, // no observed speedup
102
+ }),
103
+ false,
104
+ )
92
105
  }
93
106
 
94
107
  /**
@@ -257,14 +270,42 @@ export class Pipeline<T> {
257
270
  return this as any
258
271
  }
259
272
 
273
+ splitOnNewline(this: Pipeline<Uint8Array>): Pipeline<Buffer> {
274
+ // Input: objectMode=false - binary stream
275
+ // Output: objectMode=true - stream of Buffer objects (which are also strings?)
276
+ this.transforms.push(transformSplitOnNewline())
277
+ this.objectMode = true
278
+ return this as any
279
+ }
280
+
281
+ parseJson<TO = unknown>(
282
+ this: Pipeline<Buffer> | Pipeline<Uint8Array> | Pipeline<string>,
283
+ ): Pipeline<TO> {
284
+ // Input: objectMode=false - takes a stream of strings one by one
285
+ // Output: objectMode=true - stream of json-parsed Objects
286
+ this.transforms.push(transformJsonParse())
287
+ this.objectMode = true
288
+ return this as any
289
+ }
290
+
260
291
  gzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array> {
261
- this.transforms.push(createGzip(opt))
292
+ this.transforms.push(
293
+ createGzip({
294
+ // chunkSize: 64 * 1024, // no observed speedup
295
+ ...opt,
296
+ }),
297
+ )
262
298
  this.objectMode = false
263
299
  return this as any
264
300
  }
265
301
 
266
302
  gunzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array> {
267
- this.transforms.push(createUnzip(opt))
303
+ this.transforms.push(
304
+ createUnzip({
305
+ chunkSize: 64 * 1024, // speedup from ~3200 to 3800 rps!
306
+ ...opt,
307
+ }),
308
+ )
268
309
  this.objectMode = false
269
310
  return this as any
270
311
  }
@@ -1,20 +1,12 @@
1
1
  import { Writable } from 'node:stream'
2
- import type { DeferredPromise } from '@naturalcycles/js-lib/promise'
3
2
  import type { TransformOptions } from '../stream.model.js'
4
3
 
5
- export interface WritableVoidOptions extends TransformOptions {
6
- /**
7
- * If set - it will be Resolved when the Stream is done (after final.cb)
8
- */
9
- streamDone?: DeferredPromise
10
- }
11
-
12
4
  /**
13
5
  * Use as a "null-terminator" of stream.pipeline.
14
6
  * It consumes the stream as quickly as possible without doing anything.
15
7
  * Put it in the end of your pipeline in case it ends with Transform that needs a consumer.
16
8
  */
17
- export function writableVoid(opt: WritableVoidOptions = {}): Writable {
9
+ export function writableVoid(opt: TransformOptions = {}): Writable {
18
10
  return new Writable({
19
11
  objectMode: true,
20
12
  ...opt,
@@ -23,7 +15,6 @@ export function writableVoid(opt: WritableVoidOptions = {}): Writable {
23
15
  },
24
16
  final(cb) {
25
17
  cb()
26
- opt.streamDone?.resolve()
27
18
  },
28
19
  })
29
20
  }
@@ -1,19 +0,0 @@
1
- import type { ReadableTyped } from '../stream.model.js';
2
- /**
3
- Returns a Readable of [already parsed] NDJSON objects.
4
-
5
- Replaces a list of operations:
6
- - requireFileToExist(inputPath)
7
- - fs.createReadStream
8
- - createUnzip (only if path ends with '.gz')
9
- - transformSplitOnNewline
10
- - transformJsonParse
11
-
12
- To add a Limit or Offset: just add .take() or .drop(), example:
13
-
14
- _pipeline([
15
- fs2.createReadStreamAsNDJSON().take(100),
16
- transformX(),
17
- ])
18
- */
19
- export declare function createReadStreamAsNDJSON<ROW = any>(inputPath: string): ReadableTyped<ROW>;
@@ -1,38 +0,0 @@
1
- import { createUnzip } from 'node:zlib';
2
- import { fs2 } from '../../fs/fs2.js';
3
- import { transformSplitOnNewline } from '../transform/transformSplit.js';
4
- /**
5
- Returns a Readable of [already parsed] NDJSON objects.
6
-
7
- Replaces a list of operations:
8
- - requireFileToExist(inputPath)
9
- - fs.createReadStream
10
- - createUnzip (only if path ends with '.gz')
11
- - transformSplitOnNewline
12
- - transformJsonParse
13
-
14
- To add a Limit or Offset: just add .take() or .drop(), example:
15
-
16
- _pipeline([
17
- fs2.createReadStreamAsNDJSON().take(100),
18
- transformX(),
19
- ])
20
- */
21
- export function createReadStreamAsNDJSON(inputPath) {
22
- fs2.requireFileToExist(inputPath);
23
- let stream = fs2
24
- .createReadStream(inputPath, {
25
- highWaterMark: 64 * 1024, // no observed speedup
26
- })
27
- .on('error', err => stream.emit('error', err));
28
- if (inputPath.endsWith('.gz')) {
29
- stream = stream.pipe(createUnzip({
30
- chunkSize: 64 * 1024, // speedup from ~3200 to 3800 rps!
31
- }));
32
- }
33
- return stream.pipe(transformSplitOnNewline()).map(line => JSON.parse(line));
34
- // For some crazy reason .map is much faster than transformJsonParse!
35
- // ~5000 vs ~4000 rps !!!
36
- // .on('error', err => stream.emit('error', err))
37
- // .pipe(transformJsonParse<ROW>())
38
- }
@@ -1,11 +0,0 @@
1
- import type { TransformTyped } from '../stream.model.js';
2
- /**
3
- Returns an array of Transforms, so that you can ...destructure them at
4
- the end of the _pipeline.
5
-
6
- Replaces a list of operations:
7
- - transformToNDJson
8
- - createGzip (only if path ends with '.gz')
9
- - fs.createWriteStream
10
- */
11
- export declare function createWriteStreamAsNDJSON(outputPath: string): TransformTyped<any, any>[];
@@ -1,27 +0,0 @@
1
- import { createGzip } from 'node:zlib';
2
- import { _isTruthy } from '@naturalcycles/js-lib';
3
- import { fs2 } from '../../fs/fs2.js';
4
- import { transformToNDJson } from './transformToNDJson.js';
5
- /**
6
- Returns an array of Transforms, so that you can ...destructure them at
7
- the end of the _pipeline.
8
-
9
- Replaces a list of operations:
10
- - transformToNDJson
11
- - createGzip (only if path ends with '.gz')
12
- - fs.createWriteStream
13
- */
14
- export function createWriteStreamAsNDJSON(outputPath) {
15
- fs2.ensureFile(outputPath);
16
- return [
17
- transformToNDJson(),
18
- outputPath.endsWith('.gz')
19
- ? createGzip({
20
- // chunkSize: 64 * 1024, // no observed speedup
21
- })
22
- : undefined,
23
- fs2.createWriteStream(outputPath, {
24
- // highWaterMark: 64 * 1024, // no observed speedup
25
- }),
26
- ].filter(_isTruthy);
27
- }
@@ -1,10 +0,0 @@
1
- import type { AbortableAsyncMapper } from '@naturalcycles/js-lib/types';
2
- import type { TransformLogProgressOptions } from '../transform/transformLogProgress.js';
3
- import type { TransformMapOptions } from '../transform/transformMap.js';
4
- export interface NDJSONStreamForEachOptions<IN = any> extends TransformMapOptions<IN, void>, TransformLogProgressOptions<IN> {
5
- inputFilePath: string;
6
- }
7
- /**
8
- * Convenience function to `forEach` through an ndjson file.
9
- */
10
- export declare function ndjsonStreamForEach<T>(mapper: AbortableAsyncMapper<T, void>, opt: NDJSONStreamForEachOptions<T>): Promise<void>;
@@ -1,15 +0,0 @@
1
- import { ErrorMode } from '@naturalcycles/js-lib/error/errorMode.js';
2
- import { Pipeline } from '../pipeline.js';
3
- /**
4
- * Convenience function to `forEach` through an ndjson file.
5
- */
6
- export async function ndjsonStreamForEach(mapper, opt) {
7
- await Pipeline.fromNDJsonFile(opt.inputFilePath)
8
- .map(mapper, {
9
- errorMode: ErrorMode.THROW_AGGREGATED,
10
- ...opt,
11
- predicate: () => true, // to log progress properly
12
- })
13
- .logProgress(opt)
14
- .run();
15
- }
@@ -1,9 +0,0 @@
1
- import type { ReadableTyped } from '../stream.model.js';
2
- /**
3
- * Convenience function to read the whole Readable stream into Array (in-memory)
4
- * and return that array.
5
- *
6
- * Native `await readable.toArray()` can be used instead.
7
- * This helper is kept for type-safery support.
8
- */
9
- export declare function readableToArray<T>(readable: ReadableTyped<T>): Promise<T[]>;
@@ -1,17 +0,0 @@
1
- /**
2
- * Convenience function to read the whole Readable stream into Array (in-memory)
3
- * and return that array.
4
- *
5
- * Native `await readable.toArray()` can be used instead.
6
- * This helper is kept for type-safery support.
7
- */
8
- export async function readableToArray(readable) {
9
- return await readable.toArray();
10
- // const a: T[] = []
11
- //
12
- // for await (const item of readable) {
13
- // a.push(item)
14
- // }
15
- //
16
- // return a
17
- }
@@ -1,12 +0,0 @@
1
- import type { AsyncIndexedMapper, IndexedMapper } from '@naturalcycles/js-lib/types';
2
- import type { WritableTyped } from '../stream.model.js';
3
- import { type TransformMapOptions } from '../transform/transformMap.js';
4
- import { type TransformMapSyncOptions } from '../transform/transformMapSync.js';
5
- /**
6
- * Just an alias to transformMap that declares OUT as void.
7
- */
8
- export declare function writableForEach<IN = any>(mapper: AsyncIndexedMapper<IN, void>, opt?: TransformMapOptions<IN, void>): WritableTyped<IN>;
9
- /**
10
- * Just an alias to transformMap that declares OUT as void.
11
- */
12
- export declare function writableForEachSync<IN = any>(mapper: IndexedMapper<IN, void>, opt?: TransformMapSyncOptions<IN, void>): WritableTyped<IN>;
@@ -1,15 +0,0 @@
1
- import { _passNothingPredicate } from '@naturalcycles/js-lib/types';
2
- import { transformMap } from '../transform/transformMap.js';
3
- import { transformMapSync } from '../transform/transformMapSync.js';
4
- /**
5
- * Just an alias to transformMap that declares OUT as void.
6
- */
7
- export function writableForEach(mapper, opt = {}) {
8
- return transformMap(mapper, { ...opt, predicate: _passNothingPredicate });
9
- }
10
- /**
11
- * Just an alias to transformMap that declares OUT as void.
12
- */
13
- export function writableForEachSync(mapper, opt = {}) {
14
- return transformMapSync(mapper, { ...opt, predicate: _passNothingPredicate });
15
- }
@@ -1,8 +0,0 @@
1
- import type { Readable } from 'node:stream';
2
- import type { WritableTyped } from '../stream.model.js';
3
- /**
4
- * Allows to stop the Readable stream after the pipeline has processed X number of rows.
5
- * It counts OUTPUT rows (not input), because this Writable is always at the end of the Pipeline.
6
- * It ensures that everything has been processed before issuing a STOP on the readable.
7
- */
8
- export declare function writableLimit<T>(readable: Readable, limit: number): WritableTyped<T>;
@@ -1,25 +0,0 @@
1
- import { Writable } from 'node:stream';
2
- /**
3
- * Allows to stop the Readable stream after the pipeline has processed X number of rows.
4
- * It counts OUTPUT rows (not input), because this Writable is always at the end of the Pipeline.
5
- * It ensures that everything has been processed before issuing a STOP on the readable.
6
- */
7
- export function writableLimit(readable, limit) {
8
- let i = 0;
9
- return new Writable({
10
- objectMode: true,
11
- write(_chunk, _, cb) {
12
- if (limit === 0)
13
- return cb(); // no limit, just passthrough
14
- i++;
15
- if (i === limit) {
16
- console.log(`writableLimit of ${limit} reached`);
17
- readable.destroy();
18
- cb(); // do we need it?
19
- }
20
- else {
21
- cb(); // passthrough
22
- }
23
- },
24
- });
25
- }
@@ -1,46 +0,0 @@
1
- import { createUnzip } from 'node:zlib'
2
- import { fs2 } from '../../fs/fs2.js'
3
- import type { ReadableTyped } from '../stream.model.js'
4
- import { transformSplitOnNewline } from '../transform/transformSplit.js'
5
-
6
- /**
7
- Returns a Readable of [already parsed] NDJSON objects.
8
-
9
- Replaces a list of operations:
10
- - requireFileToExist(inputPath)
11
- - fs.createReadStream
12
- - createUnzip (only if path ends with '.gz')
13
- - transformSplitOnNewline
14
- - transformJsonParse
15
-
16
- To add a Limit or Offset: just add .take() or .drop(), example:
17
-
18
- _pipeline([
19
- fs2.createReadStreamAsNDJSON().take(100),
20
- transformX(),
21
- ])
22
- */
23
-
24
- export function createReadStreamAsNDJSON<ROW = any>(inputPath: string): ReadableTyped<ROW> {
25
- fs2.requireFileToExist(inputPath)
26
-
27
- let stream: ReadableTyped<ROW> = fs2
28
- .createReadStream(inputPath, {
29
- highWaterMark: 64 * 1024, // no observed speedup
30
- })
31
- .on('error', err => stream.emit('error', err))
32
-
33
- if (inputPath.endsWith('.gz')) {
34
- stream = stream.pipe(
35
- createUnzip({
36
- chunkSize: 64 * 1024, // speedup from ~3200 to 3800 rps!
37
- }),
38
- )
39
- }
40
-
41
- return stream.pipe(transformSplitOnNewline()).map(line => JSON.parse(line))
42
- // For some crazy reason .map is much faster than transformJsonParse!
43
- // ~5000 vs ~4000 rps !!!
44
- // .on('error', err => stream.emit('error', err))
45
- // .pipe(transformJsonParse<ROW>())
46
- }
@@ -1,30 +0,0 @@
1
- import { createGzip } from 'node:zlib'
2
- import { _isTruthy } from '@naturalcycles/js-lib'
3
- import { fs2 } from '../../fs/fs2.js'
4
- import type { TransformTyped } from '../stream.model.js'
5
- import { transformToNDJson } from './transformToNDJson.js'
6
-
7
- /**
8
- Returns an array of Transforms, so that you can ...destructure them at
9
- the end of the _pipeline.
10
-
11
- Replaces a list of operations:
12
- - transformToNDJson
13
- - createGzip (only if path ends with '.gz')
14
- - fs.createWriteStream
15
- */
16
- export function createWriteStreamAsNDJSON(outputPath: string): TransformTyped<any, any>[] {
17
- fs2.ensureFile(outputPath)
18
-
19
- return [
20
- transformToNDJson(),
21
- outputPath.endsWith('.gz')
22
- ? createGzip({
23
- // chunkSize: 64 * 1024, // no observed speedup
24
- })
25
- : undefined,
26
- fs2.createWriteStream(outputPath, {
27
- // highWaterMark: 64 * 1024, // no observed speedup
28
- }),
29
- ].filter(_isTruthy) as TransformTyped<any, any>[]
30
- }
@@ -1,28 +0,0 @@
1
- import { ErrorMode } from '@naturalcycles/js-lib/error/errorMode.js'
2
- import type { AbortableAsyncMapper } from '@naturalcycles/js-lib/types'
3
- import { Pipeline } from '../pipeline.js'
4
- import type { TransformLogProgressOptions } from '../transform/transformLogProgress.js'
5
- import type { TransformMapOptions } from '../transform/transformMap.js'
6
-
7
- export interface NDJSONStreamForEachOptions<IN = any>
8
- extends TransformMapOptions<IN, void>,
9
- TransformLogProgressOptions<IN> {
10
- inputFilePath: string
11
- }
12
-
13
- /**
14
- * Convenience function to `forEach` through an ndjson file.
15
- */
16
- export async function ndjsonStreamForEach<T>(
17
- mapper: AbortableAsyncMapper<T, void>,
18
- opt: NDJSONStreamForEachOptions<T>,
19
- ): Promise<void> {
20
- await Pipeline.fromNDJsonFile<T>(opt.inputFilePath)
21
- .map(mapper, {
22
- errorMode: ErrorMode.THROW_AGGREGATED,
23
- ...opt,
24
- predicate: () => true, // to log progress properly
25
- })
26
- .logProgress(opt)
27
- .run()
28
- }
@@ -1,19 +0,0 @@
1
- import type { ReadableTyped } from '../stream.model.js'
2
-
3
- /**
4
- * Convenience function to read the whole Readable stream into Array (in-memory)
5
- * and return that array.
6
- *
7
- * Native `await readable.toArray()` can be used instead.
8
- * This helper is kept for type-safery support.
9
- */
10
- export async function readableToArray<T>(readable: ReadableTyped<T>): Promise<T[]> {
11
- return await readable.toArray()
12
- // const a: T[] = []
13
- //
14
- // for await (const item of readable) {
15
- // a.push(item)
16
- // }
17
- //
18
- // return a
19
- }
@@ -1,25 +0,0 @@
1
- import type { AsyncIndexedMapper, IndexedMapper } from '@naturalcycles/js-lib/types'
2
- import { _passNothingPredicate } from '@naturalcycles/js-lib/types'
3
- import type { WritableTyped } from '../stream.model.js'
4
- import { transformMap, type TransformMapOptions } from '../transform/transformMap.js'
5
- import { transformMapSync, type TransformMapSyncOptions } from '../transform/transformMapSync.js'
6
-
7
- /**
8
- * Just an alias to transformMap that declares OUT as void.
9
- */
10
- export function writableForEach<IN = any>(
11
- mapper: AsyncIndexedMapper<IN, void>,
12
- opt: TransformMapOptions<IN, void> = {},
13
- ): WritableTyped<IN> {
14
- return transformMap<IN, void>(mapper, { ...opt, predicate: _passNothingPredicate })
15
- }
16
-
17
- /**
18
- * Just an alias to transformMap that declares OUT as void.
19
- */
20
- export function writableForEachSync<IN = any>(
21
- mapper: IndexedMapper<IN, void>,
22
- opt: TransformMapSyncOptions<IN, void> = {},
23
- ): WritableTyped<IN> {
24
- return transformMapSync<IN, void>(mapper, { ...opt, predicate: _passNothingPredicate })
25
- }
@@ -1,29 +0,0 @@
1
- import type { Readable } from 'node:stream'
2
- import { Writable } from 'node:stream'
3
- import type { WritableTyped } from '../stream.model.js'
4
-
5
- /**
6
- * Allows to stop the Readable stream after the pipeline has processed X number of rows.
7
- * It counts OUTPUT rows (not input), because this Writable is always at the end of the Pipeline.
8
- * It ensures that everything has been processed before issuing a STOP on the readable.
9
- */
10
- export function writableLimit<T>(readable: Readable, limit: number): WritableTyped<T> {
11
- let i = 0
12
-
13
- return new Writable({
14
- objectMode: true,
15
- write(_chunk, _, cb) {
16
- if (limit === 0) return cb() // no limit, just passthrough
17
-
18
- i++
19
-
20
- if (i === limit) {
21
- console.log(`writableLimit of ${limit} reached`)
22
- readable.destroy()
23
- cb() // do we need it?
24
- } else {
25
- cb() // passthrough
26
- }
27
- },
28
- })
29
- }