@naturalcycles/nodejs-lib 15.37.0 → 15.37.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,6 @@ export * from './transform/transformLogProgress.js';
18
18
  export * from './transform/transformMap.js';
19
19
  export * from './transform/transformMapSimple.js';
20
20
  export * from './transform/transformMapSync.js';
21
- export * from './transform/transformMultiFork.js';
22
21
  export * from './transform/transformNoOp.js';
23
22
  export * from './transform/transformOffset.js';
24
23
  export * from './transform/transformSplit.js';
@@ -18,7 +18,6 @@ export * from './transform/transformLogProgress.js';
18
18
  export * from './transform/transformMap.js';
19
19
  export * from './transform/transformMapSimple.js';
20
20
  export * from './transform/transformMapSync.js';
21
- export * from './transform/transformMultiFork.js';
22
21
  export * from './transform/transformNoOp.js';
23
22
  export * from './transform/transformOffset.js';
24
23
  export * from './transform/transformSplit.js';
@@ -1,15 +1,13 @@
1
- import type { Predicate } from '@naturalcycles/js-lib/types';
2
- import { type TransformOptions, type TransformTyped, type WritableTyped } from '../index.js';
3
- export interface WritableChunkOptions<T> extends TransformOptions {
4
- splitPredicate: Predicate<T>;
5
- transformFactories?: (() => TransformTyped<T, T>)[];
6
- writableFactory: (splitIndex: number) => WritableTyped<T>;
7
- }
1
+ import type { NonNegativeInteger, Predicate } from '@naturalcycles/js-lib/types';
2
+ import { Pipeline } from '../pipeline.js';
3
+ import type { TransformOptions, WritableTyped } from '../stream.model.js';
8
4
  /**
9
- * Allows to split the output to multiple files by splitting into chunks
10
- * based on `shouldSplitFn`.
11
- * `transformFactories` are used to create a chain of transforms for each chunk.
12
- * It was meant to be used with createGzip, which needs a proper start and end for each chunk
13
- * for the output file to be a valid gzip file.
5
+ * Allows to "split the stream" into chunks, and attach a new Pipeline to
6
+ * each of the chunks.
7
+ *
8
+ * Example use case: you want to write to Cloud Storage, 1000 rows per file,
9
+ * each file needs its own destination Pipeline.
10
+ *
11
+ * @experimental
14
12
  */
15
- export declare function writableChunk<T>(opt: WritableChunkOptions<T>): WritableTyped<T>;
13
+ export declare function writableChunk<T>(splitPredicate: Predicate<T>, fn: (pipeline: Pipeline<T>, splitIndex: NonNegativeInteger) => Promise<void>, opt?: TransformOptions): WritableTyped<T>;
@@ -1,83 +1,74 @@
1
1
  import { Writable } from 'node:stream';
2
- import { _first, _last } from '@naturalcycles/js-lib/array';
3
2
  import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log';
4
- import { _deepCopy } from '@naturalcycles/js-lib/object';
5
- import { transformNoOp, } from '../index.js';
3
+ import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js';
4
+ import { Pipeline } from '../pipeline.js';
5
+ import { createReadable } from '../readable/createReadable.js';
6
6
  /**
7
- * Allows to split the output to multiple files by splitting into chunks
8
- * based on `shouldSplitFn`.
9
- * `transformFactories` are used to create a chain of transforms for each chunk.
10
- * It was meant to be used with createGzip, which needs a proper start and end for each chunk
11
- * for the output file to be a valid gzip file.
7
+ * Allows to "split the stream" into chunks, and attach a new Pipeline to
8
+ * each of the chunks.
9
+ *
10
+ * Example use case: you want to write to Cloud Storage, 1000 rows per file,
11
+ * each file needs its own destination Pipeline.
12
+ *
13
+ * @experimental
12
14
  */
13
- export function writableChunk(opt) {
14
- const { highWaterMark, splitPredicate, transformFactories = [], writableFactory } = opt;
15
+ export function writableChunk(splitPredicate, fn, opt = {}) {
16
+ const { objectMode = true, highWaterMark } = opt;
15
17
  const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel);
16
18
  let indexWritten = 0;
17
- let currentSplitIndex = 0;
18
- // We don't want to have an empty chain, so we add a no-op transform
19
- if (transformFactories.length === 0) {
20
- transformFactories.push((transformNoOp));
21
- }
22
- // Create the transforms as well as the Writable, and pipe them together
23
- let currentWritable = writableFactory(currentSplitIndex);
24
- let transforms = transformFactories.map(f => f());
25
- generateTuples(transforms).forEach(([t1, t2]) => t1.pipe(t2));
26
- _last(transforms).pipe(currentWritable);
27
- // We keep track of all the pending writables, so we can await them in the final method
28
- const writablesFinish = [awaitFinish(currentWritable)];
19
+ let splitIndex = 0;
20
+ let lock;
21
+ let fork = createNewFork();
29
22
  return new Writable({
30
- objectMode: true,
23
+ objectMode,
31
24
  highWaterMark,
32
- write(chunk, _, cb) {
33
- // pipe will take care of piping the data through the different streams correctly
34
- transforms[0].write(chunk, cb);
25
+ async write(chunk, _, cb) {
26
+ if (lock) {
27
+ // Forked pipeline is locked - let's wait for it to call _read
28
+ await lock;
29
+ // lock is undefined at this point
30
+ }
31
+ // pass to the "forked" pipeline
32
+ const shouldContinue = fork.push(chunk);
33
+ if (!shouldContinue && !lock) {
34
+ // Forked pipeline indicates that we should Pause
35
+ lock = pDefer();
36
+ logger.debug(`WritableChunk(${splitIndex}): pause`);
37
+ }
35
38
  if (splitPredicate(chunk, ++indexWritten)) {
36
- logger.log(`writableChunk: splitting at index ${currentSplitIndex}`);
37
- currentSplitIndex++;
38
- transforms[0].end();
39
- currentWritable = writableFactory(currentSplitIndex);
40
- transforms = transformFactories.map(f => f());
41
- generateTuples(transforms).forEach(([t1, t2]) => t1.pipe(t2));
42
- _last(transforms).pipe(currentWritable);
43
- writablesFinish.push(awaitFinish(currentWritable));
39
+ logger.log(`WritableChunk(${splitIndex}): splitting to ${splitIndex + 1}`);
40
+ splitIndex++;
41
+ fork.push(null);
42
+ lock?.resolve();
43
+ lock = undefined;
44
+ fork = createNewFork();
44
45
  }
46
+ // acknowledge that we've finished processing the input chunk
47
+ cb();
45
48
  },
46
49
  async final(cb) {
47
- try {
48
- transforms[0].end();
49
- await Promise.all(writablesFinish);
50
- logger.log('writableChunk: all writables are finished');
51
- cb();
52
- }
53
- catch (err) {
54
- cb(err);
55
- }
50
+ logger.log(`WritableChunk: final`);
51
+ // Pushing null "closes"/ends the secondary pipeline correctly
52
+ fork.push(null);
53
+ // Acknowledge that we've received `null` and passed it through to the fork
54
+ cb();
56
55
  },
57
56
  });
58
- }
59
- /**
60
- * This is a helper function to create a promise which resolves when the stream emits a 'finish'
61
- * event.
62
- * This is used to await all the writables in the final method of the writableChunk
63
- */
64
- async function awaitFinish(stream) {
65
- return await new Promise(resolve => {
66
- stream.on('finish', resolve);
67
- });
68
- }
69
- /**
70
- * Generates an array of [arr[i], arr[i+1]] tuples from the input array.
71
- * The resulting array will have a length of `arr.length - 1`.
72
- * ```ts
73
- * generateTuples([1, 2, 3, 4]) // [[1, 2], [2, 3], [3, 4]]
74
- * ```
75
- */
76
- function generateTuples(arr) {
77
- const tuples = [];
78
- const arrCopy = _deepCopy(arr);
79
- for (let i = 1; i < arrCopy.length; i++) {
80
- tuples.push([arrCopy[i - 1], arrCopy[i]]);
57
+ function createNewFork() {
58
+ const currentSplitIndex = splitIndex;
59
+ const readable = createReadable([], {}, () => {
60
+ // `_read` is called
61
+ if (!lock)
62
+ return;
63
+ // We had a lock - let's Resume
64
+ logger.debug(`WritableChunk(${currentSplitIndex}): resume`);
65
+ const lockCopy = lock;
66
+ lock = undefined;
67
+ lockCopy.resolve();
68
+ });
69
+ void fn(Pipeline.from(readable), currentSplitIndex).then(() => {
70
+ logger.log(`WritableChunk(${currentSplitIndex}): done`);
71
+ });
72
+ return readable;
81
73
  }
82
- return tuples;
83
74
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@naturalcycles/nodejs-lib",
3
3
  "type": "module",
4
- "version": "15.37.0",
4
+ "version": "15.37.2",
5
5
  "dependencies": {
6
6
  "@naturalcycles/js-lib": "^15",
7
7
  "@types/js-yaml": "^4",
@@ -18,7 +18,6 @@ export * from './transform/transformLogProgress.js'
18
18
  export * from './transform/transformMap.js'
19
19
  export * from './transform/transformMapSimple.js'
20
20
  export * from './transform/transformMapSync.js'
21
- export * from './transform/transformMultiFork.js'
22
21
  export * from './transform/transformNoOp.js'
23
22
  export * from './transform/transformOffset.js'
24
23
  export * from './transform/transformSplit.js'
@@ -1,104 +1,91 @@
1
1
  import { Writable } from 'node:stream'
2
- import { _first, _last } from '@naturalcycles/js-lib/array'
3
2
  import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log'
4
- import { _deepCopy } from '@naturalcycles/js-lib/object'
5
- import type { Predicate } from '@naturalcycles/js-lib/types'
6
- import {
7
- transformNoOp,
8
- type TransformOptions,
9
- type TransformTyped,
10
- type WritableTyped,
11
- } from '../index.js'
12
-
13
- export interface WritableChunkOptions<T> extends TransformOptions {
14
- splitPredicate: Predicate<T>
15
- transformFactories?: (() => TransformTyped<T, T>)[]
16
- writableFactory: (splitIndex: number) => WritableTyped<T>
17
- }
3
+ import { type DeferredPromise, pDefer } from '@naturalcycles/js-lib/promise/pDefer.js'
4
+ import type { NonNegativeInteger, Predicate } from '@naturalcycles/js-lib/types'
5
+ import { Pipeline } from '../pipeline.js'
6
+ import { createReadable } from '../readable/createReadable.js'
7
+ import type { ReadableTyped, TransformOptions, WritableTyped } from '../stream.model.js'
18
8
 
19
9
  /**
20
- * Allows to split the output to multiple files by splitting into chunks
21
- * based on `shouldSplitFn`.
22
- * `transformFactories` are used to create a chain of transforms for each chunk.
23
- * It was meant to be used with createGzip, which needs a proper start and end for each chunk
24
- * for the output file to be a valid gzip file.
10
+ * Allows to "split the stream" into chunks, and attach a new Pipeline to
11
+ * each of the chunks.
12
+ *
13
+ * Example use case: you want to write to Cloud Storage, 1000 rows per file,
14
+ * each file needs its own destination Pipeline.
15
+ *
16
+ * @experimental
25
17
  */
26
- export function writableChunk<T>(opt: WritableChunkOptions<T>): WritableTyped<T> {
27
- const { highWaterMark, splitPredicate, transformFactories = [], writableFactory } = opt
18
+ export function writableChunk<T>(
19
+ splitPredicate: Predicate<T>,
20
+ fn: (pipeline: Pipeline<T>, splitIndex: NonNegativeInteger) => Promise<void>,
21
+ opt: TransformOptions = {},
22
+ ): WritableTyped<T> {
23
+ const { objectMode = true, highWaterMark } = opt
28
24
  const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel)
29
-
30
25
  let indexWritten = 0
31
- let currentSplitIndex = 0
32
- // We don't want to have an empty chain, so we add a no-op transform
33
- if (transformFactories.length === 0) {
34
- transformFactories.push(transformNoOp<T>)
35
- }
36
-
37
- // Create the transforms as well as the Writable, and pipe them together
38
- let currentWritable = writableFactory(currentSplitIndex)
39
- let transforms = transformFactories.map(f => f())
40
- generateTuples(transforms).forEach(([t1, t2]) => t1.pipe(t2))
41
- _last(transforms).pipe(currentWritable)
26
+ let splitIndex = 0
42
27
 
43
- // We keep track of all the pending writables, so we can await them in the final method
44
- const writablesFinish: Promise<void>[] = [awaitFinish(currentWritable)]
28
+ let lock: DeferredPromise | undefined
29
+ let fork = createNewFork()
45
30
 
46
31
  return new Writable({
47
- objectMode: true,
32
+ objectMode,
48
33
  highWaterMark,
49
- write(chunk: T, _, cb) {
50
- // pipe will take care of piping the data through the different streams correctly
51
- transforms[0]!.write(chunk, cb)
52
-
53
- if (splitPredicate(chunk, ++indexWritten)) {
54
- logger.log(`writableChunk: splitting at index ${currentSplitIndex}`)
55
- currentSplitIndex++
56
- transforms[0]!.end()
34
+ async write(chunk: T, _, cb) {
35
+ if (lock) {
36
+ // Forked pipeline is locked - let's wait for it to call _read
37
+ await lock
38
+ // lock is undefined at this point
39
+ }
57
40
 
58
- currentWritable = writableFactory(currentSplitIndex)
59
- transforms = transformFactories.map(f => f())
60
- generateTuples(transforms).forEach(([t1, t2]) => t1.pipe(t2))
61
- _last(transforms).pipe(currentWritable)
41
+ // pass to the "forked" pipeline
42
+ const shouldContinue = fork.push(chunk)
43
+ if (!shouldContinue && !lock) {
44
+ // Forked pipeline indicates that we should Pause
45
+ lock = pDefer()
46
+ logger.debug(`WritableChunk(${splitIndex}): pause`)
47
+ }
62
48
 
63
- writablesFinish.push(awaitFinish(currentWritable))
49
+ if (splitPredicate(chunk, ++indexWritten)) {
50
+ logger.log(`WritableChunk(${splitIndex}): splitting to ${splitIndex + 1}`)
51
+ splitIndex++
52
+ fork.push(null)
53
+ lock?.resolve()
54
+ lock = undefined
55
+ fork = createNewFork()
64
56
  }
57
+
58
+ // acknowledge that we've finished processing the input chunk
59
+ cb()
65
60
  },
66
61
  async final(cb) {
67
- try {
68
- transforms[0]!.end()
69
- await Promise.all(writablesFinish)
70
- logger.log('writableChunk: all writables are finished')
71
- cb()
72
- } catch (err) {
73
- cb(err as Error)
74
- }
62
+ logger.log(`WritableChunk: final`)
63
+
64
+ // Pushing null "closes"/ends the secondary pipeline correctly
65
+ fork.push(null)
66
+
67
+ // Acknowledge that we've received `null` and passed it through to the fork
68
+ cb()
75
69
  },
76
70
  })
77
- }
78
71
 
79
- /**
80
- * This is a helper function to create a promise which resolves when the stream emits a 'finish'
81
- * event.
82
- * This is used to await all the writables in the final method of the writableChunk
83
- */
84
- async function awaitFinish(stream: Writable): Promise<void> {
85
- return await new Promise(resolve => {
86
- stream.on('finish', resolve)
87
- })
88
- }
72
+ function createNewFork(): ReadableTyped<T> {
73
+ const currentSplitIndex = splitIndex
89
74
 
90
- /**
91
- * Generates an array of [arr[i], arr[i+1]] tuples from the input array.
92
- * The resulting array will have a length of `arr.length - 1`.
93
- * ```ts
94
- * generateTuples([1, 2, 3, 4]) // [[1, 2], [2, 3], [3, 4]]
95
- * ```
96
- */
97
- function generateTuples<T>(arr: T[]): [T, T][] {
98
- const tuples: [T, T][] = []
99
- const arrCopy = _deepCopy(arr)
100
- for (let i = 1; i < arrCopy.length; i++) {
101
- tuples.push([arrCopy[i - 1]!, arrCopy[i]!])
75
+ const readable = createReadable<T>([], {}, () => {
76
+ // `_read` is called
77
+ if (!lock) return
78
+ // We had a lock - let's Resume
79
+ logger.debug(`WritableChunk(${currentSplitIndex}): resume`)
80
+ const lockCopy = lock
81
+ lock = undefined
82
+ lockCopy.resolve()
83
+ })
84
+
85
+ void fn(Pipeline.from<T>(readable), currentSplitIndex).then(() => {
86
+ logger.log(`WritableChunk(${currentSplitIndex}): done`)
87
+ })
88
+
89
+ return readable
102
90
  }
103
- return tuples
104
91
  }
@@ -1,14 +0,0 @@
1
- import type { Predicate } from '@naturalcycles/js-lib/types';
2
- import { Pipeline } from '../pipeline.js';
3
- import type { TransformOptions, TransformTyped } from '../stream.model.js';
4
- /**
5
- * Like transformFork, but allows to fork multiple times,
6
- * aka "split the stream" into chunks, and attach a Pipeline to
7
- * each of the chunks.
8
- *
9
- * Example use case: you want to write to Cloud Storage, 1000 rows per file,
10
- * each file needs its own destination Pipeline.
11
- *
12
- * @experimental
13
- */
14
- export declare function transformMultiFork<T>(splitPredicate: Predicate<T>, fn: (pipeline: Pipeline<T>) => Promise<void>, opt?: TransformOptions): TransformTyped<T, T>;
@@ -1,79 +0,0 @@
1
- import { Transform } from 'node:stream';
2
- import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log';
3
- import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js';
4
- import { Pipeline } from '../pipeline.js';
5
- import { createReadable } from '../readable/createReadable.js';
6
- /**
7
- * Like transformFork, but allows to fork multiple times,
8
- * aka "split the stream" into chunks, and attach a Pipeline to
9
- * each of the chunks.
10
- *
11
- * Example use case: you want to write to Cloud Storage, 1000 rows per file,
12
- * each file needs its own destination Pipeline.
13
- *
14
- * @experimental
15
- */
16
- export function transformMultiFork(splitPredicate, fn, opt = {}) {
17
- const { objectMode = true, highWaterMark } = opt;
18
- const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel);
19
- let indexWritten = 0;
20
- let splitIndex = 0;
21
- let lock;
22
- let fork = createNewFork();
23
- return new Transform({
24
- objectMode,
25
- highWaterMark,
26
- async transform(chunk, _, cb) {
27
- // pass through to the "main" pipeline
28
- // Main pipeline should handle backpressure "automatically",
29
- // so, we're not maintaining a Lock for it
30
- this.push(chunk);
31
- if (lock) {
32
- // Forked pipeline is locked - let's wait for it to call _read
33
- await lock;
34
- // lock is undefined at this point
35
- }
36
- // pass to the "forked" pipeline
37
- const shouldContinue = fork.push(chunk);
38
- if (!shouldContinue && !lock) {
39
- // Forked pipeline indicates that we should Pause
40
- lock = pDefer();
41
- logger.debug(`TransformMultiFork(${splitIndex}): pause`);
42
- }
43
- if (splitPredicate(chunk, ++indexWritten)) {
44
- logger.log(`TransformMultiFork(${splitIndex}): splitting to ${splitIndex + 1}`);
45
- splitIndex++;
46
- fork.push(null);
47
- lock?.resolve();
48
- lock = undefined;
49
- fork = createNewFork();
50
- }
51
- // acknowledge that we've finished processing the input chunk
52
- cb();
53
- },
54
- async final(cb) {
55
- logger.log(`TransformMultiFork: final`);
56
- // Pushing null "closes"/ends the secondary pipeline correctly
57
- fork.push(null);
58
- // Acknowledge that we've received `null` and passed it through to the fork
59
- cb();
60
- },
61
- });
62
- function createNewFork() {
63
- const mySplitIndex = splitIndex;
64
- const readable = createReadable([], {}, () => {
65
- // `_read` is called
66
- if (!lock)
67
- return;
68
- // We had a lock - let's Resume
69
- logger.debug(`TransformMultiFork(${mySplitIndex}): resume`);
70
- const lockCopy = lock;
71
- lock = undefined;
72
- lockCopy.resolve();
73
- });
74
- void fn(Pipeline.from(readable)).then(() => {
75
- logger.log(`TransformMultiFork(${mySplitIndex}): done`);
76
- });
77
- return readable;
78
- }
79
- }
@@ -1,97 +0,0 @@
1
- import { Transform } from 'node:stream'
2
- import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log'
3
- import { type DeferredPromise, pDefer } from '@naturalcycles/js-lib/promise/pDefer.js'
4
- import type { Predicate } from '@naturalcycles/js-lib/types'
5
- import { Pipeline } from '../pipeline.js'
6
- import { createReadable } from '../readable/createReadable.js'
7
- import type { ReadableTyped, TransformOptions, TransformTyped } from '../stream.model.js'
8
-
9
- /**
10
- * Like transformFork, but allows to fork multiple times,
11
- * aka "split the stream" into chunks, and attach a Pipeline to
12
- * each of the chunks.
13
- *
14
- * Example use case: you want to write to Cloud Storage, 1000 rows per file,
15
- * each file needs its own destination Pipeline.
16
- *
17
- * @experimental
18
- */
19
- export function transformMultiFork<T>(
20
- splitPredicate: Predicate<T>,
21
- fn: (pipeline: Pipeline<T>) => Promise<void>,
22
- opt: TransformOptions = {},
23
- ): TransformTyped<T, T> {
24
- const { objectMode = true, highWaterMark } = opt
25
- const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel)
26
- let indexWritten = 0
27
- let splitIndex = 0
28
-
29
- let lock: DeferredPromise | undefined
30
- let fork = createNewFork()
31
-
32
- return new Transform({
33
- objectMode,
34
- highWaterMark,
35
- async transform(chunk: T, _, cb) {
36
- // pass through to the "main" pipeline
37
- // Main pipeline should handle backpressure "automatically",
38
- // so, we're not maintaining a Lock for it
39
- this.push(chunk)
40
-
41
- if (lock) {
42
- // Forked pipeline is locked - let's wait for it to call _read
43
- await lock
44
- // lock is undefined at this point
45
- }
46
-
47
- // pass to the "forked" pipeline
48
- const shouldContinue = fork.push(chunk)
49
- if (!shouldContinue && !lock) {
50
- // Forked pipeline indicates that we should Pause
51
- lock = pDefer()
52
- logger.debug(`TransformMultiFork(${splitIndex}): pause`)
53
- }
54
-
55
- if (splitPredicate(chunk, ++indexWritten)) {
56
- logger.log(`TransformMultiFork(${splitIndex}): splitting to ${splitIndex + 1}`)
57
- splitIndex++
58
- fork.push(null)
59
- lock?.resolve()
60
- lock = undefined
61
- fork = createNewFork()
62
- }
63
-
64
- // acknowledge that we've finished processing the input chunk
65
- cb()
66
- },
67
- async final(cb) {
68
- logger.log(`TransformMultiFork: final`)
69
-
70
- // Pushing null "closes"/ends the secondary pipeline correctly
71
- fork.push(null)
72
-
73
- // Acknowledge that we've received `null` and passed it through to the fork
74
- cb()
75
- },
76
- })
77
-
78
- function createNewFork(): ReadableTyped<T> {
79
- const mySplitIndex = splitIndex
80
-
81
- const readable = createReadable<T>([], {}, () => {
82
- // `_read` is called
83
- if (!lock) return
84
- // We had a lock - let's Resume
85
- logger.debug(`TransformMultiFork(${mySplitIndex}): resume`)
86
- const lockCopy = lock
87
- lock = undefined
88
- lockCopy.resolve()
89
- })
90
-
91
- void fn(Pipeline.from<T>(readable)).then(() => {
92
- logger.log(`TransformMultiFork(${mySplitIndex}): done`)
93
- })
94
-
95
- return readable
96
- }
97
- }