@naturalcycles/nodejs-lib 15.24.0 → 15.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stream/index.d.ts +0 -5
- package/dist/stream/index.js +0 -5
- package/dist/stream/ndjson/ndjsonMap.d.ts +2 -0
- package/dist/stream/ndjson/ndjsonMap.js +2 -0
- package/dist/stream/pipeline.d.ts +3 -1
- package/dist/stream/pipeline.js +36 -7
- package/dist/stream/writable/writableVoid.d.ts +1 -8
- package/dist/stream/writable/writableVoid.js +0 -1
- package/package.json +1 -1
- package/src/stream/index.ts +0 -5
- package/src/stream/ndjson/ndjsonMap.ts +2 -0
- package/src/stream/pipeline.ts +48 -7
- package/src/stream/writable/writableVoid.ts +1 -10
- package/dist/stream/ndjson/createReadStreamAsNDJSON.d.ts +0 -19
- package/dist/stream/ndjson/createReadStreamAsNDJSON.js +0 -38
- package/dist/stream/ndjson/createWriteStreamAsNDJSON.d.ts +0 -11
- package/dist/stream/ndjson/createWriteStreamAsNDJSON.js +0 -27
- package/dist/stream/ndjson/ndjsonStreamForEach.d.ts +0 -10
- package/dist/stream/ndjson/ndjsonStreamForEach.js +0 -15
- package/dist/stream/readable/readableToArray.d.ts +0 -9
- package/dist/stream/readable/readableToArray.js +0 -17
- package/dist/stream/writable/writableForEach.d.ts +0 -12
- package/dist/stream/writable/writableForEach.js +0 -15
- package/dist/stream/writable/writableLimit.d.ts +0 -8
- package/dist/stream/writable/writableLimit.js +0 -25
- package/src/stream/ndjson/createReadStreamAsNDJSON.ts +0 -46
- package/src/stream/ndjson/createWriteStreamAsNDJSON.ts +0 -30
- package/src/stream/ndjson/ndjsonStreamForEach.ts +0 -28
- package/src/stream/readable/readableToArray.ts +0 -19
- package/src/stream/writable/writableForEach.ts +0 -25
- package/src/stream/writable/writableLimit.ts +0 -29
package/dist/stream/index.d.ts
CHANGED
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
export * from './ndjson/createReadStreamAsNDJSON.js';
|
|
2
|
-
export * from './ndjson/createWriteStreamAsNDJSON.js';
|
|
3
1
|
export * from './ndjson/ndjson.model.js';
|
|
4
2
|
export * from './ndjson/ndjsonMap.js';
|
|
5
|
-
export * from './ndjson/ndjsonStreamForEach.js';
|
|
6
3
|
export * from './ndjson/transformJsonParse.js';
|
|
7
4
|
export * from './ndjson/transformToNDJson.js';
|
|
8
5
|
export * from './pipeline.js';
|
|
@@ -10,7 +7,6 @@ export * from './progressLogger.js';
|
|
|
10
7
|
export * from './readable/readableCombined.js';
|
|
11
8
|
export * from './readable/readableCreate.js';
|
|
12
9
|
export * from './readable/readableFromArray.js';
|
|
13
|
-
export * from './readable/readableToArray.js';
|
|
14
10
|
export * from './stream.model.js';
|
|
15
11
|
export * from './transform/transformChunk.js';
|
|
16
12
|
export * from './transform/transformFilter.js';
|
|
@@ -30,7 +26,6 @@ export * from './transform/transformToArray.js';
|
|
|
30
26
|
export * from './transform/worker/baseWorkerClass.js';
|
|
31
27
|
export * from './transform/worker/transformMultiThreaded.js';
|
|
32
28
|
export * from './transform/worker/transformMultiThreaded.model.js';
|
|
33
|
-
export * from './writable/writableForEach.js';
|
|
34
29
|
export * from './writable/writableFork.js';
|
|
35
30
|
export * from './writable/writablePushToArray.js';
|
|
36
31
|
export * from './writable/writableVoid.js';
|
package/dist/stream/index.js
CHANGED
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
export * from './ndjson/createReadStreamAsNDJSON.js';
|
|
2
|
-
export * from './ndjson/createWriteStreamAsNDJSON.js';
|
|
3
1
|
export * from './ndjson/ndjson.model.js';
|
|
4
2
|
export * from './ndjson/ndjsonMap.js';
|
|
5
|
-
export * from './ndjson/ndjsonStreamForEach.js';
|
|
6
3
|
export * from './ndjson/transformJsonParse.js';
|
|
7
4
|
export * from './ndjson/transformToNDJson.js';
|
|
8
5
|
export * from './pipeline.js';
|
|
@@ -10,7 +7,6 @@ export * from './progressLogger.js';
|
|
|
10
7
|
export * from './readable/readableCombined.js';
|
|
11
8
|
export * from './readable/readableCreate.js';
|
|
12
9
|
export * from './readable/readableFromArray.js';
|
|
13
|
-
export * from './readable/readableToArray.js';
|
|
14
10
|
export * from './stream.model.js';
|
|
15
11
|
export * from './transform/transformChunk.js';
|
|
16
12
|
export * from './transform/transformFilter.js';
|
|
@@ -30,7 +26,6 @@ export * from './transform/transformToArray.js';
|
|
|
30
26
|
export * from './transform/worker/baseWorkerClass.js';
|
|
31
27
|
export * from './transform/worker/transformMultiThreaded.js';
|
|
32
28
|
export * from './transform/worker/transformMultiThreaded.model.js';
|
|
33
|
-
export * from './writable/writableForEach.js';
|
|
34
29
|
export * from './writable/writableFork.js';
|
|
35
30
|
export * from './writable/writablePushToArray.js';
|
|
36
31
|
export * from './writable/writableVoid.js';
|
|
@@ -13,5 +13,7 @@ export interface NDJSONMapOptions<IN = any, OUT = IN> extends TransformMapOption
|
|
|
13
13
|
/**
|
|
14
14
|
* Unzips input file automatically, if it ends with `.gz`.
|
|
15
15
|
* Zips output file automatically, if it ends with `.gz`.
|
|
16
|
+
*
|
|
17
|
+
* @deprecated use Pipeline directly
|
|
16
18
|
*/
|
|
17
19
|
export declare function ndjsonMap<IN = any, OUT = any>(mapper: AbortableAsyncMapper<IN, OUT>, opt: NDJSONMapOptions<IN, OUT>): Promise<void>;
|
|
@@ -3,6 +3,8 @@ import { Pipeline } from '../pipeline.js';
|
|
|
3
3
|
/**
|
|
4
4
|
* Unzips input file automatically, if it ends with `.gz`.
|
|
5
5
|
* Zips output file automatically, if it ends with `.gz`.
|
|
6
|
+
*
|
|
7
|
+
* @deprecated use Pipeline directly
|
|
6
8
|
*/
|
|
7
9
|
export async function ndjsonMap(mapper, opt) {
|
|
8
10
|
const { inputFilePath, outputFilePath, logEveryOutput = 100_000, limitInput, limitOutput } = opt;
|
|
@@ -26,8 +26,8 @@ export declare class Pipeline<T> {
|
|
|
26
26
|
*/
|
|
27
27
|
static fromArray<T>(input: T[]): Pipeline<T>;
|
|
28
28
|
static fromIterable<T>(input: Iterable<T> | AsyncIterable<T>): Pipeline<T>;
|
|
29
|
-
static fromFile(sourceFilePath: string): Pipeline<Uint8Array>;
|
|
30
29
|
static fromNDJsonFile<T>(sourceFilePath: string): Pipeline<T>;
|
|
30
|
+
static fromFile(sourceFilePath: string): Pipeline<Uint8Array>;
|
|
31
31
|
/**
|
|
32
32
|
* Limits the source Readable, but using `.take(limit)` on it.
|
|
33
33
|
* This is THE preferred way of limiting the source.
|
|
@@ -81,6 +81,8 @@ export declare class Pipeline<T> {
|
|
|
81
81
|
*/
|
|
82
82
|
toNDJson(): Pipeline<Uint8Array>;
|
|
83
83
|
parseNDJson<TO = unknown>(this: Pipeline<Uint8Array>): Pipeline<TO>;
|
|
84
|
+
splitOnNewline(this: Pipeline<Uint8Array>): Pipeline<Buffer>;
|
|
85
|
+
parseJson<TO = unknown>(this: Pipeline<Buffer> | Pipeline<Uint8Array> | Pipeline<string>): Pipeline<TO>;
|
|
84
86
|
gzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array>;
|
|
85
87
|
gunzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array>;
|
|
86
88
|
toArray(opt?: TransformOptions): Promise<T[]>;
|
package/dist/stream/pipeline.js
CHANGED
|
@@ -4,7 +4,6 @@ import { createUnzip } from 'node:zlib';
|
|
|
4
4
|
import { createGzip } from 'node:zlib';
|
|
5
5
|
import { createAbortableSignal } from '@naturalcycles/js-lib';
|
|
6
6
|
import { fs2 } from '../fs/fs2.js';
|
|
7
|
-
import { createReadStreamAsNDJSON } from './ndjson/createReadStreamAsNDJSON.js';
|
|
8
7
|
import { transformJsonParse } from './ndjson/transformJsonParse.js';
|
|
9
8
|
import { transformToNDJson } from './ndjson/transformToNDJson.js';
|
|
10
9
|
import { PIPELINE_GRACEFUL_ABORT } from './stream.util.js';
|
|
@@ -51,11 +50,21 @@ export class Pipeline {
|
|
|
51
50
|
static fromIterable(input) {
|
|
52
51
|
return new Pipeline(Readable.from(input));
|
|
53
52
|
}
|
|
54
|
-
static fromFile(sourceFilePath) {
|
|
55
|
-
return new Pipeline(fs2.createReadStream(sourceFilePath), false);
|
|
56
|
-
}
|
|
57
53
|
static fromNDJsonFile(sourceFilePath) {
|
|
58
|
-
|
|
54
|
+
fs2.requireFileToExist(sourceFilePath);
|
|
55
|
+
const p = Pipeline.fromFile(sourceFilePath);
|
|
56
|
+
if (sourceFilePath.endsWith('.gz')) {
|
|
57
|
+
p.gunzip();
|
|
58
|
+
}
|
|
59
|
+
return p.parseJson();
|
|
60
|
+
// return stream.pipe(transformSplitOnNewline()).map(line => JSON.parse(line))
|
|
61
|
+
// For some crazy reason .map is much faster than transformJsonParse!
|
|
62
|
+
// ~5000 vs ~4000 rps !!!
|
|
63
|
+
}
|
|
64
|
+
static fromFile(sourceFilePath) {
|
|
65
|
+
return new Pipeline(fs2.createReadStream(sourceFilePath, {
|
|
66
|
+
highWaterMark: 64 * 1024, // no observed speedup
|
|
67
|
+
}), false);
|
|
59
68
|
}
|
|
60
69
|
/**
|
|
61
70
|
* Limits the source Readable, but using `.take(limit)` on it.
|
|
@@ -188,13 +197,33 @@ export class Pipeline {
|
|
|
188
197
|
this.objectMode = true;
|
|
189
198
|
return this;
|
|
190
199
|
}
|
|
200
|
+
splitOnNewline() {
|
|
201
|
+
// Input: objectMode=false - binary stream
|
|
202
|
+
// Output: objectMode=true - stream of Buffer objects (which are also strings?)
|
|
203
|
+
this.transforms.push(transformSplitOnNewline());
|
|
204
|
+
this.objectMode = true;
|
|
205
|
+
return this;
|
|
206
|
+
}
|
|
207
|
+
parseJson() {
|
|
208
|
+
// Input: objectMode=false - takes a stream of strings one by one
|
|
209
|
+
// Output: objectMode=true - stream of json-parsed Objects
|
|
210
|
+
this.transforms.push(transformJsonParse());
|
|
211
|
+
this.objectMode = true;
|
|
212
|
+
return this;
|
|
213
|
+
}
|
|
191
214
|
gzip(opt) {
|
|
192
|
-
this.transforms.push(createGzip(
|
|
215
|
+
this.transforms.push(createGzip({
|
|
216
|
+
// chunkSize: 64 * 1024, // no observed speedup
|
|
217
|
+
...opt,
|
|
218
|
+
}));
|
|
193
219
|
this.objectMode = false;
|
|
194
220
|
return this;
|
|
195
221
|
}
|
|
196
222
|
gunzip(opt) {
|
|
197
|
-
this.transforms.push(createUnzip(
|
|
223
|
+
this.transforms.push(createUnzip({
|
|
224
|
+
chunkSize: 64 * 1024, // speedup from ~3200 to 3800 rps!
|
|
225
|
+
...opt,
|
|
226
|
+
}));
|
|
198
227
|
this.objectMode = false;
|
|
199
228
|
return this;
|
|
200
229
|
}
|
|
@@ -1,15 +1,8 @@
|
|
|
1
1
|
import { Writable } from 'node:stream';
|
|
2
|
-
import type { DeferredPromise } from '@naturalcycles/js-lib/promise';
|
|
3
2
|
import type { TransformOptions } from '../stream.model.js';
|
|
4
|
-
export interface WritableVoidOptions extends TransformOptions {
|
|
5
|
-
/**
|
|
6
|
-
* If set - it will be Resolved when the Stream is done (after final.cb)
|
|
7
|
-
*/
|
|
8
|
-
streamDone?: DeferredPromise;
|
|
9
|
-
}
|
|
10
3
|
/**
|
|
11
4
|
* Use as a "null-terminator" of stream.pipeline.
|
|
12
5
|
* It consumes the stream as quickly as possible without doing anything.
|
|
13
6
|
* Put it in the end of your pipeline in case it ends with Transform that needs a consumer.
|
|
14
7
|
*/
|
|
15
|
-
export declare function writableVoid(opt?:
|
|
8
|
+
export declare function writableVoid(opt?: TransformOptions): Writable;
|
package/package.json
CHANGED
package/src/stream/index.ts
CHANGED
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
export * from './ndjson/createReadStreamAsNDJSON.js'
|
|
2
|
-
export * from './ndjson/createWriteStreamAsNDJSON.js'
|
|
3
1
|
export * from './ndjson/ndjson.model.js'
|
|
4
2
|
export * from './ndjson/ndjsonMap.js'
|
|
5
|
-
export * from './ndjson/ndjsonStreamForEach.js'
|
|
6
3
|
export * from './ndjson/transformJsonParse.js'
|
|
7
4
|
export * from './ndjson/transformToNDJson.js'
|
|
8
5
|
export * from './pipeline.js'
|
|
@@ -10,7 +7,6 @@ export * from './progressLogger.js'
|
|
|
10
7
|
export * from './readable/readableCombined.js'
|
|
11
8
|
export * from './readable/readableCreate.js'
|
|
12
9
|
export * from './readable/readableFromArray.js'
|
|
13
|
-
export * from './readable/readableToArray.js'
|
|
14
10
|
export * from './stream.model.js'
|
|
15
11
|
export * from './transform/transformChunk.js'
|
|
16
12
|
export * from './transform/transformFilter.js'
|
|
@@ -30,7 +26,6 @@ export * from './transform/transformToArray.js'
|
|
|
30
26
|
export * from './transform/worker/baseWorkerClass.js'
|
|
31
27
|
export * from './transform/worker/transformMultiThreaded.js'
|
|
32
28
|
export * from './transform/worker/transformMultiThreaded.model.js'
|
|
33
|
-
export * from './writable/writableForEach.js'
|
|
34
29
|
export * from './writable/writableFork.js'
|
|
35
30
|
export * from './writable/writablePushToArray.js'
|
|
36
31
|
export * from './writable/writableVoid.js'
|
|
@@ -21,6 +21,8 @@ export interface NDJSONMapOptions<IN = any, OUT = IN>
|
|
|
21
21
|
/**
|
|
22
22
|
* Unzips input file automatically, if it ends with `.gz`.
|
|
23
23
|
* Zips output file automatically, if it ends with `.gz`.
|
|
24
|
+
*
|
|
25
|
+
* @deprecated use Pipeline directly
|
|
24
26
|
*/
|
|
25
27
|
export async function ndjsonMap<IN = any, OUT = any>(
|
|
26
28
|
mapper: AbortableAsyncMapper<IN, OUT>,
|
package/src/stream/pipeline.ts
CHANGED
|
@@ -17,7 +17,6 @@ import type {
|
|
|
17
17
|
SKIP,
|
|
18
18
|
} from '@naturalcycles/js-lib/types'
|
|
19
19
|
import { fs2 } from '../fs/fs2.js'
|
|
20
|
-
import { createReadStreamAsNDJSON } from './ndjson/createReadStreamAsNDJSON.js'
|
|
21
20
|
import { transformJsonParse } from './ndjson/transformJsonParse.js'
|
|
22
21
|
import { transformToNDJson } from './ndjson/transformToNDJson.js'
|
|
23
22
|
import type {
|
|
@@ -83,12 +82,26 @@ export class Pipeline<T> {
|
|
|
83
82
|
return new Pipeline(Readable.from(input))
|
|
84
83
|
}
|
|
85
84
|
|
|
86
|
-
static
|
|
87
|
-
|
|
85
|
+
static fromNDJsonFile<T>(sourceFilePath: string): Pipeline<T> {
|
|
86
|
+
fs2.requireFileToExist(sourceFilePath)
|
|
87
|
+
|
|
88
|
+
const p = Pipeline.fromFile(sourceFilePath)
|
|
89
|
+
if (sourceFilePath.endsWith('.gz')) {
|
|
90
|
+
p.gunzip()
|
|
91
|
+
}
|
|
92
|
+
return p.parseJson()
|
|
93
|
+
// return stream.pipe(transformSplitOnNewline()).map(line => JSON.parse(line))
|
|
94
|
+
// For some crazy reason .map is much faster than transformJsonParse!
|
|
95
|
+
// ~5000 vs ~4000 rps !!!
|
|
88
96
|
}
|
|
89
97
|
|
|
90
|
-
static
|
|
91
|
-
return new Pipeline(
|
|
98
|
+
static fromFile(sourceFilePath: string): Pipeline<Uint8Array> {
|
|
99
|
+
return new Pipeline(
|
|
100
|
+
fs2.createReadStream(sourceFilePath, {
|
|
101
|
+
highWaterMark: 64 * 1024, // no observed speedup
|
|
102
|
+
}),
|
|
103
|
+
false,
|
|
104
|
+
)
|
|
92
105
|
}
|
|
93
106
|
|
|
94
107
|
/**
|
|
@@ -257,14 +270,42 @@ export class Pipeline<T> {
|
|
|
257
270
|
return this as any
|
|
258
271
|
}
|
|
259
272
|
|
|
273
|
+
splitOnNewline(this: Pipeline<Uint8Array>): Pipeline<Buffer> {
|
|
274
|
+
// Input: objectMode=false - binary stream
|
|
275
|
+
// Output: objectMode=true - stream of Buffer objects (which are also strings?)
|
|
276
|
+
this.transforms.push(transformSplitOnNewline())
|
|
277
|
+
this.objectMode = true
|
|
278
|
+
return this as any
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
parseJson<TO = unknown>(
|
|
282
|
+
this: Pipeline<Buffer> | Pipeline<Uint8Array> | Pipeline<string>,
|
|
283
|
+
): Pipeline<TO> {
|
|
284
|
+
// Input: objectMode=false - takes a stream of strings one by one
|
|
285
|
+
// Output: objectMode=true - stream of json-parsed Objects
|
|
286
|
+
this.transforms.push(transformJsonParse())
|
|
287
|
+
this.objectMode = true
|
|
288
|
+
return this as any
|
|
289
|
+
}
|
|
290
|
+
|
|
260
291
|
gzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array> {
|
|
261
|
-
this.transforms.push(
|
|
292
|
+
this.transforms.push(
|
|
293
|
+
createGzip({
|
|
294
|
+
// chunkSize: 64 * 1024, // no observed speedup
|
|
295
|
+
...opt,
|
|
296
|
+
}),
|
|
297
|
+
)
|
|
262
298
|
this.objectMode = false
|
|
263
299
|
return this as any
|
|
264
300
|
}
|
|
265
301
|
|
|
266
302
|
gunzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array> {
|
|
267
|
-
this.transforms.push(
|
|
303
|
+
this.transforms.push(
|
|
304
|
+
createUnzip({
|
|
305
|
+
chunkSize: 64 * 1024, // speedup from ~3200 to 3800 rps!
|
|
306
|
+
...opt,
|
|
307
|
+
}),
|
|
308
|
+
)
|
|
268
309
|
this.objectMode = false
|
|
269
310
|
return this as any
|
|
270
311
|
}
|
|
@@ -1,20 +1,12 @@
|
|
|
1
1
|
import { Writable } from 'node:stream'
|
|
2
|
-
import type { DeferredPromise } from '@naturalcycles/js-lib/promise'
|
|
3
2
|
import type { TransformOptions } from '../stream.model.js'
|
|
4
3
|
|
|
5
|
-
export interface WritableVoidOptions extends TransformOptions {
|
|
6
|
-
/**
|
|
7
|
-
* If set - it will be Resolved when the Stream is done (after final.cb)
|
|
8
|
-
*/
|
|
9
|
-
streamDone?: DeferredPromise
|
|
10
|
-
}
|
|
11
|
-
|
|
12
4
|
/**
|
|
13
5
|
* Use as a "null-terminator" of stream.pipeline.
|
|
14
6
|
* It consumes the stream as quickly as possible without doing anything.
|
|
15
7
|
* Put it in the end of your pipeline in case it ends with Transform that needs a consumer.
|
|
16
8
|
*/
|
|
17
|
-
export function writableVoid(opt:
|
|
9
|
+
export function writableVoid(opt: TransformOptions = {}): Writable {
|
|
18
10
|
return new Writable({
|
|
19
11
|
objectMode: true,
|
|
20
12
|
...opt,
|
|
@@ -23,7 +15,6 @@ export function writableVoid(opt: WritableVoidOptions = {}): Writable {
|
|
|
23
15
|
},
|
|
24
16
|
final(cb) {
|
|
25
17
|
cb()
|
|
26
|
-
opt.streamDone?.resolve()
|
|
27
18
|
},
|
|
28
19
|
})
|
|
29
20
|
}
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import type { ReadableTyped } from '../stream.model.js';
|
|
2
|
-
/**
|
|
3
|
-
Returns a Readable of [already parsed] NDJSON objects.
|
|
4
|
-
|
|
5
|
-
Replaces a list of operations:
|
|
6
|
-
- requireFileToExist(inputPath)
|
|
7
|
-
- fs.createReadStream
|
|
8
|
-
- createUnzip (only if path ends with '.gz')
|
|
9
|
-
- transformSplitOnNewline
|
|
10
|
-
- transformJsonParse
|
|
11
|
-
|
|
12
|
-
To add a Limit or Offset: just add .take() or .drop(), example:
|
|
13
|
-
|
|
14
|
-
_pipeline([
|
|
15
|
-
fs2.createReadStreamAsNDJSON().take(100),
|
|
16
|
-
transformX(),
|
|
17
|
-
])
|
|
18
|
-
*/
|
|
19
|
-
export declare function createReadStreamAsNDJSON<ROW = any>(inputPath: string): ReadableTyped<ROW>;
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import { createUnzip } from 'node:zlib';
|
|
2
|
-
import { fs2 } from '../../fs/fs2.js';
|
|
3
|
-
import { transformSplitOnNewline } from '../transform/transformSplit.js';
|
|
4
|
-
/**
|
|
5
|
-
Returns a Readable of [already parsed] NDJSON objects.
|
|
6
|
-
|
|
7
|
-
Replaces a list of operations:
|
|
8
|
-
- requireFileToExist(inputPath)
|
|
9
|
-
- fs.createReadStream
|
|
10
|
-
- createUnzip (only if path ends with '.gz')
|
|
11
|
-
- transformSplitOnNewline
|
|
12
|
-
- transformJsonParse
|
|
13
|
-
|
|
14
|
-
To add a Limit or Offset: just add .take() or .drop(), example:
|
|
15
|
-
|
|
16
|
-
_pipeline([
|
|
17
|
-
fs2.createReadStreamAsNDJSON().take(100),
|
|
18
|
-
transformX(),
|
|
19
|
-
])
|
|
20
|
-
*/
|
|
21
|
-
export function createReadStreamAsNDJSON(inputPath) {
|
|
22
|
-
fs2.requireFileToExist(inputPath);
|
|
23
|
-
let stream = fs2
|
|
24
|
-
.createReadStream(inputPath, {
|
|
25
|
-
highWaterMark: 64 * 1024, // no observed speedup
|
|
26
|
-
})
|
|
27
|
-
.on('error', err => stream.emit('error', err));
|
|
28
|
-
if (inputPath.endsWith('.gz')) {
|
|
29
|
-
stream = stream.pipe(createUnzip({
|
|
30
|
-
chunkSize: 64 * 1024, // speedup from ~3200 to 3800 rps!
|
|
31
|
-
}));
|
|
32
|
-
}
|
|
33
|
-
return stream.pipe(transformSplitOnNewline()).map(line => JSON.parse(line));
|
|
34
|
-
// For some crazy reason .map is much faster than transformJsonParse!
|
|
35
|
-
// ~5000 vs ~4000 rps !!!
|
|
36
|
-
// .on('error', err => stream.emit('error', err))
|
|
37
|
-
// .pipe(transformJsonParse<ROW>())
|
|
38
|
-
}
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
import type { TransformTyped } from '../stream.model.js';
|
|
2
|
-
/**
|
|
3
|
-
Returns an array of Transforms, so that you can ...destructure them at
|
|
4
|
-
the end of the _pipeline.
|
|
5
|
-
|
|
6
|
-
Replaces a list of operations:
|
|
7
|
-
- transformToNDJson
|
|
8
|
-
- createGzip (only if path ends with '.gz')
|
|
9
|
-
- fs.createWriteStream
|
|
10
|
-
*/
|
|
11
|
-
export declare function createWriteStreamAsNDJSON(outputPath: string): TransformTyped<any, any>[];
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import { createGzip } from 'node:zlib';
|
|
2
|
-
import { _isTruthy } from '@naturalcycles/js-lib';
|
|
3
|
-
import { fs2 } from '../../fs/fs2.js';
|
|
4
|
-
import { transformToNDJson } from './transformToNDJson.js';
|
|
5
|
-
/**
|
|
6
|
-
Returns an array of Transforms, so that you can ...destructure them at
|
|
7
|
-
the end of the _pipeline.
|
|
8
|
-
|
|
9
|
-
Replaces a list of operations:
|
|
10
|
-
- transformToNDJson
|
|
11
|
-
- createGzip (only if path ends with '.gz')
|
|
12
|
-
- fs.createWriteStream
|
|
13
|
-
*/
|
|
14
|
-
export function createWriteStreamAsNDJSON(outputPath) {
|
|
15
|
-
fs2.ensureFile(outputPath);
|
|
16
|
-
return [
|
|
17
|
-
transformToNDJson(),
|
|
18
|
-
outputPath.endsWith('.gz')
|
|
19
|
-
? createGzip({
|
|
20
|
-
// chunkSize: 64 * 1024, // no observed speedup
|
|
21
|
-
})
|
|
22
|
-
: undefined,
|
|
23
|
-
fs2.createWriteStream(outputPath, {
|
|
24
|
-
// highWaterMark: 64 * 1024, // no observed speedup
|
|
25
|
-
}),
|
|
26
|
-
].filter(_isTruthy);
|
|
27
|
-
}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
import type { AbortableAsyncMapper } from '@naturalcycles/js-lib/types';
|
|
2
|
-
import type { TransformLogProgressOptions } from '../transform/transformLogProgress.js';
|
|
3
|
-
import type { TransformMapOptions } from '../transform/transformMap.js';
|
|
4
|
-
export interface NDJSONStreamForEachOptions<IN = any> extends TransformMapOptions<IN, void>, TransformLogProgressOptions<IN> {
|
|
5
|
-
inputFilePath: string;
|
|
6
|
-
}
|
|
7
|
-
/**
|
|
8
|
-
* Convenience function to `forEach` through an ndjson file.
|
|
9
|
-
*/
|
|
10
|
-
export declare function ndjsonStreamForEach<T>(mapper: AbortableAsyncMapper<T, void>, opt: NDJSONStreamForEachOptions<T>): Promise<void>;
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import { ErrorMode } from '@naturalcycles/js-lib/error/errorMode.js';
|
|
2
|
-
import { Pipeline } from '../pipeline.js';
|
|
3
|
-
/**
|
|
4
|
-
* Convenience function to `forEach` through an ndjson file.
|
|
5
|
-
*/
|
|
6
|
-
export async function ndjsonStreamForEach(mapper, opt) {
|
|
7
|
-
await Pipeline.fromNDJsonFile(opt.inputFilePath)
|
|
8
|
-
.map(mapper, {
|
|
9
|
-
errorMode: ErrorMode.THROW_AGGREGATED,
|
|
10
|
-
...opt,
|
|
11
|
-
predicate: () => true, // to log progress properly
|
|
12
|
-
})
|
|
13
|
-
.logProgress(opt)
|
|
14
|
-
.run();
|
|
15
|
-
}
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import type { ReadableTyped } from '../stream.model.js';
|
|
2
|
-
/**
|
|
3
|
-
* Convenience function to read the whole Readable stream into Array (in-memory)
|
|
4
|
-
* and return that array.
|
|
5
|
-
*
|
|
6
|
-
* Native `await readable.toArray()` can be used instead.
|
|
7
|
-
* This helper is kept for type-safery support.
|
|
8
|
-
*/
|
|
9
|
-
export declare function readableToArray<T>(readable: ReadableTyped<T>): Promise<T[]>;
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Convenience function to read the whole Readable stream into Array (in-memory)
|
|
3
|
-
* and return that array.
|
|
4
|
-
*
|
|
5
|
-
* Native `await readable.toArray()` can be used instead.
|
|
6
|
-
* This helper is kept for type-safery support.
|
|
7
|
-
*/
|
|
8
|
-
export async function readableToArray(readable) {
|
|
9
|
-
return await readable.toArray();
|
|
10
|
-
// const a: T[] = []
|
|
11
|
-
//
|
|
12
|
-
// for await (const item of readable) {
|
|
13
|
-
// a.push(item)
|
|
14
|
-
// }
|
|
15
|
-
//
|
|
16
|
-
// return a
|
|
17
|
-
}
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import type { AsyncIndexedMapper, IndexedMapper } from '@naturalcycles/js-lib/types';
|
|
2
|
-
import type { WritableTyped } from '../stream.model.js';
|
|
3
|
-
import { type TransformMapOptions } from '../transform/transformMap.js';
|
|
4
|
-
import { type TransformMapSyncOptions } from '../transform/transformMapSync.js';
|
|
5
|
-
/**
|
|
6
|
-
* Just an alias to transformMap that declares OUT as void.
|
|
7
|
-
*/
|
|
8
|
-
export declare function writableForEach<IN = any>(mapper: AsyncIndexedMapper<IN, void>, opt?: TransformMapOptions<IN, void>): WritableTyped<IN>;
|
|
9
|
-
/**
|
|
10
|
-
* Just an alias to transformMap that declares OUT as void.
|
|
11
|
-
*/
|
|
12
|
-
export declare function writableForEachSync<IN = any>(mapper: IndexedMapper<IN, void>, opt?: TransformMapSyncOptions<IN, void>): WritableTyped<IN>;
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import { _passNothingPredicate } from '@naturalcycles/js-lib/types';
|
|
2
|
-
import { transformMap } from '../transform/transformMap.js';
|
|
3
|
-
import { transformMapSync } from '../transform/transformMapSync.js';
|
|
4
|
-
/**
|
|
5
|
-
* Just an alias to transformMap that declares OUT as void.
|
|
6
|
-
*/
|
|
7
|
-
export function writableForEach(mapper, opt = {}) {
|
|
8
|
-
return transformMap(mapper, { ...opt, predicate: _passNothingPredicate });
|
|
9
|
-
}
|
|
10
|
-
/**
|
|
11
|
-
* Just an alias to transformMap that declares OUT as void.
|
|
12
|
-
*/
|
|
13
|
-
export function writableForEachSync(mapper, opt = {}) {
|
|
14
|
-
return transformMapSync(mapper, { ...opt, predicate: _passNothingPredicate });
|
|
15
|
-
}
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
import type { Readable } from 'node:stream';
|
|
2
|
-
import type { WritableTyped } from '../stream.model.js';
|
|
3
|
-
/**
|
|
4
|
-
* Allows to stop the Readable stream after the pipeline has processed X number of rows.
|
|
5
|
-
* It counts OUTPUT rows (not input), because this Writable is always at the end of the Pipeline.
|
|
6
|
-
* It ensures that everything has been processed before issuing a STOP on the readable.
|
|
7
|
-
*/
|
|
8
|
-
export declare function writableLimit<T>(readable: Readable, limit: number): WritableTyped<T>;
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import { Writable } from 'node:stream';
|
|
2
|
-
/**
|
|
3
|
-
* Allows to stop the Readable stream after the pipeline has processed X number of rows.
|
|
4
|
-
* It counts OUTPUT rows (not input), because this Writable is always at the end of the Pipeline.
|
|
5
|
-
* It ensures that everything has been processed before issuing a STOP on the readable.
|
|
6
|
-
*/
|
|
7
|
-
export function writableLimit(readable, limit) {
|
|
8
|
-
let i = 0;
|
|
9
|
-
return new Writable({
|
|
10
|
-
objectMode: true,
|
|
11
|
-
write(_chunk, _, cb) {
|
|
12
|
-
if (limit === 0)
|
|
13
|
-
return cb(); // no limit, just passthrough
|
|
14
|
-
i++;
|
|
15
|
-
if (i === limit) {
|
|
16
|
-
console.log(`writableLimit of ${limit} reached`);
|
|
17
|
-
readable.destroy();
|
|
18
|
-
cb(); // do we need it?
|
|
19
|
-
}
|
|
20
|
-
else {
|
|
21
|
-
cb(); // passthrough
|
|
22
|
-
}
|
|
23
|
-
},
|
|
24
|
-
});
|
|
25
|
-
}
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
import { createUnzip } from 'node:zlib'
|
|
2
|
-
import { fs2 } from '../../fs/fs2.js'
|
|
3
|
-
import type { ReadableTyped } from '../stream.model.js'
|
|
4
|
-
import { transformSplitOnNewline } from '../transform/transformSplit.js'
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
Returns a Readable of [already parsed] NDJSON objects.
|
|
8
|
-
|
|
9
|
-
Replaces a list of operations:
|
|
10
|
-
- requireFileToExist(inputPath)
|
|
11
|
-
- fs.createReadStream
|
|
12
|
-
- createUnzip (only if path ends with '.gz')
|
|
13
|
-
- transformSplitOnNewline
|
|
14
|
-
- transformJsonParse
|
|
15
|
-
|
|
16
|
-
To add a Limit or Offset: just add .take() or .drop(), example:
|
|
17
|
-
|
|
18
|
-
_pipeline([
|
|
19
|
-
fs2.createReadStreamAsNDJSON().take(100),
|
|
20
|
-
transformX(),
|
|
21
|
-
])
|
|
22
|
-
*/
|
|
23
|
-
|
|
24
|
-
export function createReadStreamAsNDJSON<ROW = any>(inputPath: string): ReadableTyped<ROW> {
|
|
25
|
-
fs2.requireFileToExist(inputPath)
|
|
26
|
-
|
|
27
|
-
let stream: ReadableTyped<ROW> = fs2
|
|
28
|
-
.createReadStream(inputPath, {
|
|
29
|
-
highWaterMark: 64 * 1024, // no observed speedup
|
|
30
|
-
})
|
|
31
|
-
.on('error', err => stream.emit('error', err))
|
|
32
|
-
|
|
33
|
-
if (inputPath.endsWith('.gz')) {
|
|
34
|
-
stream = stream.pipe(
|
|
35
|
-
createUnzip({
|
|
36
|
-
chunkSize: 64 * 1024, // speedup from ~3200 to 3800 rps!
|
|
37
|
-
}),
|
|
38
|
-
)
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
return stream.pipe(transformSplitOnNewline()).map(line => JSON.parse(line))
|
|
42
|
-
// For some crazy reason .map is much faster than transformJsonParse!
|
|
43
|
-
// ~5000 vs ~4000 rps !!!
|
|
44
|
-
// .on('error', err => stream.emit('error', err))
|
|
45
|
-
// .pipe(transformJsonParse<ROW>())
|
|
46
|
-
}
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import { createGzip } from 'node:zlib'
|
|
2
|
-
import { _isTruthy } from '@naturalcycles/js-lib'
|
|
3
|
-
import { fs2 } from '../../fs/fs2.js'
|
|
4
|
-
import type { TransformTyped } from '../stream.model.js'
|
|
5
|
-
import { transformToNDJson } from './transformToNDJson.js'
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
Returns an array of Transforms, so that you can ...destructure them at
|
|
9
|
-
the end of the _pipeline.
|
|
10
|
-
|
|
11
|
-
Replaces a list of operations:
|
|
12
|
-
- transformToNDJson
|
|
13
|
-
- createGzip (only if path ends with '.gz')
|
|
14
|
-
- fs.createWriteStream
|
|
15
|
-
*/
|
|
16
|
-
export function createWriteStreamAsNDJSON(outputPath: string): TransformTyped<any, any>[] {
|
|
17
|
-
fs2.ensureFile(outputPath)
|
|
18
|
-
|
|
19
|
-
return [
|
|
20
|
-
transformToNDJson(),
|
|
21
|
-
outputPath.endsWith('.gz')
|
|
22
|
-
? createGzip({
|
|
23
|
-
// chunkSize: 64 * 1024, // no observed speedup
|
|
24
|
-
})
|
|
25
|
-
: undefined,
|
|
26
|
-
fs2.createWriteStream(outputPath, {
|
|
27
|
-
// highWaterMark: 64 * 1024, // no observed speedup
|
|
28
|
-
}),
|
|
29
|
-
].filter(_isTruthy) as TransformTyped<any, any>[]
|
|
30
|
-
}
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import { ErrorMode } from '@naturalcycles/js-lib/error/errorMode.js'
|
|
2
|
-
import type { AbortableAsyncMapper } from '@naturalcycles/js-lib/types'
|
|
3
|
-
import { Pipeline } from '../pipeline.js'
|
|
4
|
-
import type { TransformLogProgressOptions } from '../transform/transformLogProgress.js'
|
|
5
|
-
import type { TransformMapOptions } from '../transform/transformMap.js'
|
|
6
|
-
|
|
7
|
-
export interface NDJSONStreamForEachOptions<IN = any>
|
|
8
|
-
extends TransformMapOptions<IN, void>,
|
|
9
|
-
TransformLogProgressOptions<IN> {
|
|
10
|
-
inputFilePath: string
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Convenience function to `forEach` through an ndjson file.
|
|
15
|
-
*/
|
|
16
|
-
export async function ndjsonStreamForEach<T>(
|
|
17
|
-
mapper: AbortableAsyncMapper<T, void>,
|
|
18
|
-
opt: NDJSONStreamForEachOptions<T>,
|
|
19
|
-
): Promise<void> {
|
|
20
|
-
await Pipeline.fromNDJsonFile<T>(opt.inputFilePath)
|
|
21
|
-
.map(mapper, {
|
|
22
|
-
errorMode: ErrorMode.THROW_AGGREGATED,
|
|
23
|
-
...opt,
|
|
24
|
-
predicate: () => true, // to log progress properly
|
|
25
|
-
})
|
|
26
|
-
.logProgress(opt)
|
|
27
|
-
.run()
|
|
28
|
-
}
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import type { ReadableTyped } from '../stream.model.js'
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Convenience function to read the whole Readable stream into Array (in-memory)
|
|
5
|
-
* and return that array.
|
|
6
|
-
*
|
|
7
|
-
* Native `await readable.toArray()` can be used instead.
|
|
8
|
-
* This helper is kept for type-safery support.
|
|
9
|
-
*/
|
|
10
|
-
export async function readableToArray<T>(readable: ReadableTyped<T>): Promise<T[]> {
|
|
11
|
-
return await readable.toArray()
|
|
12
|
-
// const a: T[] = []
|
|
13
|
-
//
|
|
14
|
-
// for await (const item of readable) {
|
|
15
|
-
// a.push(item)
|
|
16
|
-
// }
|
|
17
|
-
//
|
|
18
|
-
// return a
|
|
19
|
-
}
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import type { AsyncIndexedMapper, IndexedMapper } from '@naturalcycles/js-lib/types'
|
|
2
|
-
import { _passNothingPredicate } from '@naturalcycles/js-lib/types'
|
|
3
|
-
import type { WritableTyped } from '../stream.model.js'
|
|
4
|
-
import { transformMap, type TransformMapOptions } from '../transform/transformMap.js'
|
|
5
|
-
import { transformMapSync, type TransformMapSyncOptions } from '../transform/transformMapSync.js'
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Just an alias to transformMap that declares OUT as void.
|
|
9
|
-
*/
|
|
10
|
-
export function writableForEach<IN = any>(
|
|
11
|
-
mapper: AsyncIndexedMapper<IN, void>,
|
|
12
|
-
opt: TransformMapOptions<IN, void> = {},
|
|
13
|
-
): WritableTyped<IN> {
|
|
14
|
-
return transformMap<IN, void>(mapper, { ...opt, predicate: _passNothingPredicate })
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* Just an alias to transformMap that declares OUT as void.
|
|
19
|
-
*/
|
|
20
|
-
export function writableForEachSync<IN = any>(
|
|
21
|
-
mapper: IndexedMapper<IN, void>,
|
|
22
|
-
opt: TransformMapSyncOptions<IN, void> = {},
|
|
23
|
-
): WritableTyped<IN> {
|
|
24
|
-
return transformMapSync<IN, void>(mapper, { ...opt, predicate: _passNothingPredicate })
|
|
25
|
-
}
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
import type { Readable } from 'node:stream'
|
|
2
|
-
import { Writable } from 'node:stream'
|
|
3
|
-
import type { WritableTyped } from '../stream.model.js'
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Allows to stop the Readable stream after the pipeline has processed X number of rows.
|
|
7
|
-
* It counts OUTPUT rows (not input), because this Writable is always at the end of the Pipeline.
|
|
8
|
-
* It ensures that everything has been processed before issuing a STOP on the readable.
|
|
9
|
-
*/
|
|
10
|
-
export function writableLimit<T>(readable: Readable, limit: number): WritableTyped<T> {
|
|
11
|
-
let i = 0
|
|
12
|
-
|
|
13
|
-
return new Writable({
|
|
14
|
-
objectMode: true,
|
|
15
|
-
write(_chunk, _, cb) {
|
|
16
|
-
if (limit === 0) return cb() // no limit, just passthrough
|
|
17
|
-
|
|
18
|
-
i++
|
|
19
|
-
|
|
20
|
-
if (i === limit) {
|
|
21
|
-
console.log(`writableLimit of ${limit} reached`)
|
|
22
|
-
readable.destroy()
|
|
23
|
-
cb() // do we need it?
|
|
24
|
-
} else {
|
|
25
|
-
cb() // passthrough
|
|
26
|
-
}
|
|
27
|
-
},
|
|
28
|
-
})
|
|
29
|
-
}
|