npm - @naturalcycles/nodejs-lib - Versions diffs - 13.21.0 → 13.23.0 - Mend

@naturalcycles/nodejs-lib 13.21.0 → 13.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/dist/fs/fs2.d.ts +3 -0
package/dist/fs/fs2.js +61 -0
package/dist/index.d.ts +0 -6
package/dist/index.js +0 -6
package/dist/stream/ndjson/ndjsonMap.js +4 -13
package/dist/stream/ndjson/ndjsonStreamForEach.js +1 -9
package/dist/stream/ndjson/transformJsonParse.d.ts +1 -1
package/dist/stream/transform/transformSplit.d.ts +13 -3
package/dist/stream/transform/transformSplit.js +128 -8
package/package.json +1 -2
package/src/fs/fs2.ts +74 -0
package/src/index.ts +0 -6
package/src/stream/ndjson/ndjsonMap.ts +5 -19
package/src/stream/ndjson/ndjsonStreamForEach.ts +2 -13
package/src/stream/ndjson/transformJsonParse.ts +2 -2
package/src/stream/transform/transformSplit.ts +134 -8
package/dist/stream/ndjson/ndJsonFileRead.d.ts +0 -5
package/dist/stream/ndjson/ndJsonFileRead.js +0 -14
package/dist/stream/ndjson/ndJsonFileWrite.d.ts +0 -5
package/dist/stream/ndjson/ndJsonFileWrite.js +0 -12
package/dist/stream/ndjson/pipelineFromNDJsonFile.d.ts +0 -24
package/dist/stream/ndjson/pipelineFromNDJsonFile.js +0 -37
package/dist/stream/ndjson/pipelineToNDJsonFile.d.ts +0 -27
package/dist/stream/ndjson/pipelineToNDJsonFile.js +0 -42
package/dist/stream/ndjson/streamToNDJsonFile.d.ts +0 -3
package/dist/stream/ndjson/streamToNDJsonFile.js +0 -8
package/dist/stream/transform/transformToString.d.ts +0 -12
package/dist/stream/transform/transformToString.js +0 -24
package/src/stream/ndjson/ndJsonFileRead.ts +0 -15
package/src/stream/ndjson/ndJsonFileWrite.ts +0 -12
package/src/stream/ndjson/pipelineFromNDJsonFile.ts +0 -62
package/src/stream/ndjson/pipelineToNDJsonFile.ts +0 -70
package/src/stream/ndjson/streamToNDJsonFile.ts +0 -9
package/src/stream/transform/transformToString.ts +0 -22

package/dist/fs/fs2.d.ts CHANGED Viewed

@@ -4,6 +4,7 @@
 import type { RmOptions } from 'node:fs';
 import fs from 'node:fs';
 import { DumpOptions } from 'js-yaml';
+import { ReadableTyped, WritableTyped } from '../stream/stream.model';
 /**
  * fs2 conveniently groups filesystem functions together.
  * Supposed to be almost a drop-in replacement for these things together:
@@ -75,6 +76,8 @@ declare class FS2 {
     readdirAsync: typeof fs.promises.readdir;
     createWriteStream: typeof fs.createWriteStream;
     createReadStream: typeof fs.createReadStream;
+    createReadStreamAsNDJSON<ROW = any>(inputPath: string): ReadableTyped<ROW>;
+    createWriteStreamAsNDJSON(outputPath: string): WritableTyped<any>;
 }
 export declare const fs2: FS2;
 export interface JsonOptions {

package/dist/fs/fs2.js CHANGED Viewed

@@ -20,8 +20,12 @@ const tslib_1 = require("tslib");
 const node_fs_1 = tslib_1.__importDefault(require("node:fs"));
 const promises_1 = tslib_1.__importDefault(require("node:fs/promises"));
 const node_path_1 = tslib_1.__importDefault(require("node:path"));
+const node_zlib_1 = require("node:zlib");
 const js_lib_1 = require("@naturalcycles/js-lib");
 const js_yaml_1 = tslib_1.__importDefault(require("js-yaml"));
+const transformToNDJson_1 = require("../stream/ndjson/transformToNDJson");
+const transformSplit_1 = require("../stream/transform/transformSplit");
+const env_util_1 = require("../util/env.util");
 /**
  * fs2 conveniently groups filesystem functions together.
  * Supposed to be almost a drop-in replacement for these things together:
@@ -271,6 +275,63 @@ class FS2 {
         await this.copyPathAsync(src, dest, opt);
         await this.removePathAsync(src);
     }
+    /*
+    Returns a Readable of [already parsed] NDJSON objects.
+    Replaces a list of operations:
+    - requireFileToExist(inputPath)
+    - fs.createReadStream
+    - createUnzip (only if path ends with '.gz')
+    - transformSplitOnNewline
+    - transformJsonParse
+    To add a Limit or Offset: just add .take() or .drop(), example:
+    _pipeline([
+      fs2.createReadStreamAsNDJSON().take(100),
+      transformX(),
+    ])
+     */
+    createReadStreamAsNDJSON(inputPath) {
+        (0, env_util_1.requireFileToExist)(inputPath);
+        let stream = node_fs_1.default
+            .createReadStream(inputPath, {
+            highWaterMark: 64 * 1024, // no observed speedup
+        })
+            .on('error', err => stream.emit('error', err));
+        if (inputPath.endsWith('.gz')) {
+            stream = stream.pipe((0, node_zlib_1.createUnzip)({
+                chunkSize: 64 * 1024, // speedup from ~3200 to 3800 rps!
+            }));
+        }
+        return stream.pipe((0, transformSplit_1.transformSplitOnNewline)()).map(line => JSON.parse(line));
+        // For some crazy reason .map is much faster than transformJsonParse!
+        // ~5000 vs ~4000 rps !!!
+        // .on('error', err => stream.emit('error', err))
+        // .pipe(transformJsonParse<ROW>())
+    }
+    /*
+    Returns a Writable.
+    Replaces a list of operations:
+    - transformToNDJson
+    - createGzip (only if path ends with '.gz')
+    - fs.createWriteStream
+     */
+    createWriteStreamAsNDJSON(outputPath) {
+        this.ensureFile(outputPath);
+        const transform1 = (0, transformToNDJson_1.transformToNDJson)();
+        let transform = transform1;
+        if (outputPath.endsWith('.gz')) {
+            transform = transform.pipe((0, node_zlib_1.createGzip)({
+            // chunkSize: 64 * 1024, // no observed speedup
+            }));
+        }
+        transform.pipe(node_fs_1.default.createWriteStream(outputPath, {
+        // highWaterMark: 64 * 1024, // no observed speedup
+        }));
+        return transform1;
+    }
 }
 exports.fs2 = new FS2();
 function stringify(data, opt) {

package/dist/index.d.ts CHANGED Viewed

@@ -18,13 +18,8 @@ export * from './log/log.util';
 export * from './slack/slack.service';
 export * from './slack/slack.service.model';
 export * from './stream/ndjson/ndjson.model';
-export * from './stream/ndjson/ndJsonFileRead';
-export * from './stream/ndjson/ndJsonFileWrite';
 export * from './stream/ndjson/ndjsonMap';
 export * from './stream/ndjson/ndjsonStreamForEach';
-export * from './stream/ndjson/pipelineFromNDJsonFile';
-export * from './stream/ndjson/pipelineToNDJsonFile';
-export * from './stream/ndjson/streamToNDJsonFile';
 export * from './stream/ndjson/transformJsonParse';
 export * from './stream/ndjson/transformToNDJson';
 export * from './stream/pipeline/pipeline';
@@ -46,7 +41,6 @@ export * from './stream/transform/transformMapSync';
 export * from './stream/transform/transformSplit';
 export * from './stream/transform/transformTap';
 export * from './stream/transform/transformToArray';
-export * from './stream/transform/transformToString';
 export * from './stream/transform/transformTee';
 export * from './stream/transform/worker/baseWorkerClass';
 export * from './stream/transform/worker/transformMultiThreaded';

package/dist/index.js CHANGED Viewed

@@ -22,13 +22,8 @@ tslib_1.__exportStar(require("./log/log.util"), exports);
 tslib_1.__exportStar(require("./slack/slack.service"), exports);
 tslib_1.__exportStar(require("./slack/slack.service.model"), exports);
 tslib_1.__exportStar(require("./stream/ndjson/ndjson.model"), exports);
-tslib_1.__exportStar(require("./stream/ndjson/ndJsonFileRead"), exports);
-tslib_1.__exportStar(require("./stream/ndjson/ndJsonFileWrite"), exports);
 tslib_1.__exportStar(require("./stream/ndjson/ndjsonMap"), exports);
 tslib_1.__exportStar(require("./stream/ndjson/ndjsonStreamForEach"), exports);
-tslib_1.__exportStar(require("./stream/ndjson/pipelineFromNDJsonFile"), exports);
-tslib_1.__exportStar(require("./stream/ndjson/pipelineToNDJsonFile"), exports);
-tslib_1.__exportStar(require("./stream/ndjson/streamToNDJsonFile"), exports);
 tslib_1.__exportStar(require("./stream/ndjson/transformJsonParse"), exports);
 tslib_1.__exportStar(require("./stream/ndjson/transformToNDJson"), exports);
 tslib_1.__exportStar(require("./stream/pipeline/pipeline"), exports);
@@ -50,7 +45,6 @@ tslib_1.__exportStar(require("./stream/transform/transformMapSync"), exports);
 tslib_1.__exportStar(require("./stream/transform/transformSplit"), exports);
 tslib_1.__exportStar(require("./stream/transform/transformTap"), exports);
 tslib_1.__exportStar(require("./stream/transform/transformToArray"), exports);
-tslib_1.__exportStar(require("./stream/transform/transformToString"), exports);
 tslib_1.__exportStar(require("./stream/transform/transformTee"), exports);
 tslib_1.__exportStar(require("./stream/transform/worker/baseWorkerClass"), exports);
 tslib_1.__exportStar(require("./stream/transform/worker/transformMultiThreaded"), exports);

package/dist/stream/ndjson/ndjsonMap.js CHANGED Viewed

@@ -1,8 +1,6 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.ndjsonMap = void 0;
-const node_fs_1 = require("node:fs");
-const node_zlib_1 = require("node:zlib");
 const js_lib_1 = require("@naturalcycles/js-lib");
 const __1 = require("../..");
 /**
@@ -11,20 +9,15 @@ const __1 = require("../..");
  */
 async function ndjsonMap(mapper, opt) {
     const { inputFilePath, outputFilePath, logEveryOutput = 100_000, limitInput, limitOutput } = opt;
-    (0, __1.requireFileToExist)(inputFilePath);
     console.log({
         inputFilePath,
         outputFilePath,
     });
-    const transformUnzip = inputFilePath.endsWith('.gz') ? [(0, node_zlib_1.createUnzip)()] : [];
-    const transformZip = outputFilePath.endsWith('.gz') ? [(0, node_zlib_1.createGzip)()] : [];
-    const readable = (0, node_fs_1.createReadStream)(inputFilePath);
+    const readable = __1.fs2
+        .createReadStreamAsNDJSON(inputFilePath)
+        .take(limitInput || Number.POSITIVE_INFINITY);
     await (0, __1._pipeline)([
         readable,
-        ...transformUnzip,
-        (0, __1.transformSplit)(), // splits by \n
-        (0, __1.transformJsonParse)(),
-        (0, __1.transformLimit)({ limit: limitInput, sourceReadable: readable }),
         (0, __1.transformLogProgress)({ metric: 'read', ...opt }),
         (0, __1.transformMap)(mapper, {
             flattenArrayOutput: true,
@@ -33,9 +26,7 @@ async function ndjsonMap(mapper, opt) {
         }),
         (0, __1.transformLimit)({ limit: limitOutput, sourceReadable: readable }),
         (0, __1.transformLogProgress)({ metric: 'saved', logEvery: logEveryOutput }),
-        (0, __1.transformToNDJson)(),
-        ...transformZip,
-        (0, node_fs_1.createWriteStream)(outputFilePath),
+        __1.fs2.createWriteStreamAsNDJSON(outputFilePath),
     ]);
 }
 exports.ndjsonMap = ndjsonMap;

package/dist/stream/ndjson/ndjsonStreamForEach.js CHANGED Viewed

@@ -1,22 +1,14 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.ndjsonStreamForEach = void 0;
-const tslib_1 = require("tslib");
-const node_fs_1 = tslib_1.__importDefault(require("node:fs"));
-const node_zlib_1 = require("node:zlib");
 const js_lib_1 = require("@naturalcycles/js-lib");
 const __1 = require("../..");
 /**
  * Convenience function to `forEach` through an ndjson file.
  */
 async function ndjsonStreamForEach(mapper, opt) {
-    (0, __1.requireFileToExist)(opt.inputFilePath);
-    const transformUnzip = opt.inputFilePath.endsWith('.gz') ? [(0, node_zlib_1.createUnzip)()] : [];
     await (0, __1._pipeline)([
-        node_fs_1.default.createReadStream(opt.inputFilePath),
-        ...transformUnzip,
-        (0, __1.transformSplit)(),
-        (0, __1.transformJsonParse)(),
+        __1.fs2.createReadStreamAsNDJSON(opt.inputFilePath),
         (0, __1.transformMap)(mapper, {
             errorMode: js_lib_1.ErrorMode.THROW_AGGREGATED,
             ...opt,

package/dist/stream/ndjson/transformJsonParse.d.ts CHANGED Viewed

@@ -24,5 +24,5 @@ export interface TransformJsonParseOptions {
  *   consumeYourStream...
  * [)
  */
-export declare function transformJsonParse<OUT = any>(opt?: TransformJsonParseOptions): TransformTyped<string | Buffer, OUT>;
+export declare function transformJsonParse<ROW = any>(opt?: TransformJsonParseOptions): TransformTyped<string | Buffer, ROW>;
 export declare const bufferReviver: Reviver;

package/dist/stream/transform/transformSplit.d.ts CHANGED Viewed

@@ -1,9 +1,19 @@
 /// <reference types="node" />
 import { TransformTyped } from '../stream.model';
+/**
+ * Transforms input Buffer/string stream into Buffer chunks (objectMode: true) split by newLine.
+ *
+ * Useful for reading NDJSON files from fs.
+ *
+ * Same as binarySplit, but optimized (hard-coded) to split on NEWLINE (aka `\n`).
+ * (+5-10% _pipeline speedup measured, compared to generic `binarySplit` on variable length delimiter)
+ */
+export declare function transformSplitOnNewline(): TransformTyped<Buffer, Buffer>;
 /**
  * Input: stream (objectMode=false) of arbitrary string|Buffer chunks, like when read from fs
- * Output: stream (objectMode=false) or string|Buffer chunks split by `separator` (@default to `\n`)
+ * Output: stream (objectMode=true) or string|Buffer chunks split by `separator` (@default to `\n`)
  *
- * Useful to, for example, reading NDJSON files from fs
+ * Please use slightly more optimized `transformSplitOnNewline` for NDJSON file parsing.
+ * (+5-10% _pipeline speedup measured!)
  */
-export declare function transformSplit(separator?: string): TransformTyped<string | Buffer, string | Buffer>;
+export declare function transformSplit(separator?: string): TransformTyped<Buffer, Buffer>;

package/dist/stream/transform/transformSplit.js CHANGED Viewed

@@ -1,17 +1,137 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.transformSplit = void 0;
-// https://github.com/max-mapper/binary-split
-// todo: test its newer version that doesn't have `through2` dependency
-// todo: test writableHighWaterMark of 64k
-const _binarySplit = require('binary-split');
+exports.transformSplit = exports.transformSplitOnNewline = void 0;
+const node_stream_1 = require("node:stream");
+// The code below is carefully adopted from: https://github.com/max-mapper/binary-split
+/**
+ * Transforms input Buffer/string stream into Buffer chunks (objectMode: true) split by newLine.
+ *
+ * Useful for reading NDJSON files from fs.
+ *
+ * Same as binarySplit, but optimized (hard-coded) to split on NEWLINE (aka `\n`).
+ * (+5-10% _pipeline speedup measured, compared to generic `binarySplit` on variable length delimiter)
+ */
+function transformSplitOnNewline() {
+    let buffered;
+    return new node_stream_1.Transform({
+        readableObjectMode: true,
+        writableHighWaterMark: 64 * 1024,
+        transform(buf, enc, done) {
+            let offset = 0;
+            let lastMatch = 0;
+            if (buffered) {
+                buf = Buffer.concat([buffered, buf]);
+                offset = buffered.length;
+                buffered = undefined;
+            }
+            while (true) {
+                const idx = firstNewlineMatch(buf, offset);
+                if (idx !== -1 && idx < buf.length) {
+                    if (lastMatch !== idx) {
+                        this.push(buf.slice(lastMatch, idx));
+                    }
+                    offset = idx + 1;
+                    lastMatch = offset;
+                }
+                else {
+                    buffered = buf.slice(lastMatch);
+                    break;
+                }
+            }
+            done();
+        },
+        flush(done) {
+            if (buffered && buffered.length > 0)
+                this.push(buffered);
+            done();
+        },
+    });
+}
+exports.transformSplitOnNewline = transformSplitOnNewline;
 /**
  * Input: stream (objectMode=false) of arbitrary string|Buffer chunks, like when read from fs
- * Output: stream (objectMode=false) or string|Buffer chunks split by `separator` (@default to `\n`)
+ * Output: stream (objectMode=true) or string|Buffer chunks split by `separator` (@default to `\n`)
  *
- * Useful to, for example, reading NDJSON files from fs
+ * Please use slightly more optimized `transformSplitOnNewline` for NDJSON file parsing.
+ * (+5-10% _pipeline speedup measured!)
  */
 function transformSplit(separator = '\n') {
-    return _binarySplit(separator);
+    const matcher = Buffer.from(separator);
+    let buffered;
+    return new node_stream_1.Transform({
+        readableObjectMode: true,
+        writableHighWaterMark: 64 * 1024,
+        transform(buf, enc, done) {
+            let offset = 0;
+            let lastMatch = 0;
+            if (buffered) {
+                buf = Buffer.concat([buffered, buf]);
+                offset = buffered.length;
+                buffered = undefined;
+            }
+            while (true) {
+                const idx = firstMatch(buf, offset - matcher.length + 1, matcher);
+                if (idx !== -1 && idx < buf.length) {
+                    if (lastMatch !== idx) {
+                        this.push(buf.slice(lastMatch, idx));
+                    }
+                    offset = idx + matcher.length;
+                    lastMatch = offset;
+                }
+                else {
+                    buffered = buf.slice(lastMatch);
+                    break;
+                }
+            }
+            done();
+        },
+        flush(done) {
+            if (buffered && buffered.length > 0)
+                this.push(buffered);
+            done();
+        },
+    });
 }
 exports.transformSplit = transformSplit;
+// const NEWLINE = Buffer.from('\n')
+// const NEWLINE_CODE = NEWLINE[0]! // it is `10`
+const NEWLINE_CODE = 10;
+/**
+ * Same as firstMatch, but optimized (hard-coded) to find NEWLINE (aka `\n`).
+ */
+function firstNewlineMatch(buf, offset) {
+    const bufLength = buf.length;
+    if (offset >= bufLength)
+        return -1;
+    for (let i = offset; i < bufLength; i++) {
+        if (buf[i] === NEWLINE_CODE) {
+            return i;
+        }
+    }
+    return -1; // this code is unreachable, because i is guaranteed to be found in the loop above
+}
+function firstMatch(buf, offset, matcher) {
+    if (offset >= buf.length)
+        return -1;
+    let i;
+    for (i = offset; i < buf.length; i++) {
+        if (buf[i] === matcher[0]) {
+            if (matcher.length > 1) {
+                let fullMatch = true;
+                let j = i;
+                for (let k = 0; j < i + matcher.length; j++, k++) {
+                    if (buf[j] !== matcher[k]) {
+                        fullMatch = false;
+                        break;
+                    }
+                }
+                if (fullMatch)
+                    return j - matcher.length;
+            }
+            else {
+                break;
+            }
+        }
+    }
+    return i + matcher.length - 1;
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@naturalcycles/nodejs-lib",
-  "version": "13.21.0",
+  "version": "13.23.0",
   "scripts": {
     "prepare": "husky",
     "docs-serve": "vuepress dev docs",
@@ -21,7 +21,6 @@
     "ajv": "^8.6.2",
     "ajv-formats": "^3.0.1",
     "ajv-keywords": "^5.0.0",
-    "binary-split": "^1.0.5",
     "chalk": "^4.0.0",
     "debug": "^4.1.1",
     "dotenv": "^16.0.0",

package/src/fs/fs2.ts CHANGED Viewed

@@ -18,8 +18,13 @@ import type { RmOptions } from 'node:fs'
 import fs from 'node:fs'
 import fsp from 'node:fs/promises'
 import path from 'node:path'
+import { createGzip, createUnzip } from 'node:zlib'
 import { _jsonParse } from '@naturalcycles/js-lib'
 import yaml, { DumpOptions } from 'js-yaml'
+import { transformToNDJson } from '../stream/ndjson/transformToNDJson'
+import { ReadableTyped, WritableTyped } from '../stream/stream.model'
+import { transformSplitOnNewline } from '../stream/transform/transformSplit'
+import { requireFileToExist } from '../util/env.util'
 /**
  * fs2 conveniently groups filesystem functions together.
@@ -305,6 +310,75 @@ class FS2 {
   readdirAsync = fsp.readdir
   createWriteStream = fs.createWriteStream
   createReadStream = fs.createReadStream
+  /*
+  Returns a Readable of [already parsed] NDJSON objects.
+  Replaces a list of operations:
+  - requireFileToExist(inputPath)
+  - fs.createReadStream
+  - createUnzip (only if path ends with '.gz')
+  - transformSplitOnNewline
+  - transformJsonParse
+  To add a Limit or Offset: just add .take() or .drop(), example:
+  _pipeline([
+    fs2.createReadStreamAsNDJSON().take(100),
+    transformX(),
+  ])
+   */
+  createReadStreamAsNDJSON<ROW = any>(inputPath: string): ReadableTyped<ROW> {
+    requireFileToExist(inputPath)
+    let stream: ReadableTyped<ROW> = fs
+      .createReadStream(inputPath, {
+        highWaterMark: 64 * 1024, // no observed speedup
+      })
+      .on('error', err => stream.emit('error', err))
+    if (inputPath.endsWith('.gz')) {
+      stream = stream.pipe(
+        createUnzip({
+          chunkSize: 64 * 1024, // speedup from ~3200 to 3800 rps!
+        }),
+      )
+    }
+    return stream.pipe(transformSplitOnNewline()).map(line => JSON.parse(line))
+    // For some crazy reason .map is much faster than transformJsonParse!
+    // ~5000 vs ~4000 rps !!!
+    // .on('error', err => stream.emit('error', err))
+    // .pipe(transformJsonParse<ROW>())
+  }
+  /*
+  Returns a Writable.
+  Replaces a list of operations:
+  - transformToNDJson
+  - createGzip (only if path ends with '.gz')
+  - fs.createWriteStream
+   */
+  createWriteStreamAsNDJSON(outputPath: string): WritableTyped<any> {
+    this.ensureFile(outputPath)
+    const transform1 = transformToNDJson()
+    let transform = transform1
+    if (outputPath.endsWith('.gz')) {
+      transform = transform.pipe(
+        createGzip({
+          // chunkSize: 64 * 1024, // no observed speedup
+        }),
+      )
+    }
+    transform.pipe(
+      fs.createWriteStream(outputPath, {
+        // highWaterMark: 64 * 1024, // no observed speedup
+      }),
+    )
+    return transform1
+  }
 }
 export const fs2 = new FS2()

package/src/index.ts CHANGED Viewed

@@ -28,13 +28,8 @@ export * from './log/log.util'
 export * from './slack/slack.service'
 export * from './slack/slack.service.model'
 export * from './stream/ndjson/ndjson.model'
-export * from './stream/ndjson/ndJsonFileRead'
-export * from './stream/ndjson/ndJsonFileWrite'
 export * from './stream/ndjson/ndjsonMap'
 export * from './stream/ndjson/ndjsonStreamForEach'
-export * from './stream/ndjson/pipelineFromNDJsonFile'
-export * from './stream/ndjson/pipelineToNDJsonFile'
-export * from './stream/ndjson/streamToNDJsonFile'
 export * from './stream/ndjson/transformJsonParse'
 export * from './stream/ndjson/transformToNDJson'
 export * from './stream/pipeline/pipeline'
@@ -56,7 +51,6 @@ export * from './stream/transform/transformMapSync'
 export * from './stream/transform/transformSplit'
 export * from './stream/transform/transformTap'
 export * from './stream/transform/transformToArray'
-export * from './stream/transform/transformToString'
 export * from './stream/transform/transformTee'
 export * from './stream/transform/worker/baseWorkerClass'
 export * from './stream/transform/worker/transformMultiThreaded'

package/src/stream/ndjson/ndjsonMap.ts CHANGED Viewed

@@ -1,17 +1,12 @@
-import { createReadStream, createWriteStream } from 'node:fs'
-import { createGzip, createUnzip } from 'node:zlib'
 import { AbortableAsyncMapper, ErrorMode } from '@naturalcycles/js-lib'
 import {
-  requireFileToExist,
-  transformJsonParse,
   transformLimit,
   transformLogProgress,
   transformMap,
   TransformMapOptions,
-  transformSplit,
-  transformToNDJson,
   _pipeline,
   TransformLogProgressOptions,
+  fs2,
 } from '../..'
 export interface NDJSONMapOptions<IN = any, OUT = IN>
@@ -46,24 +41,17 @@ export async function ndjsonMap<IN = any, OUT = any>(
 ): Promise<void> {
   const { inputFilePath, outputFilePath, logEveryOutput = 100_000, limitInput, limitOutput } = opt
-  requireFileToExist(inputFilePath)
   console.log({
     inputFilePath,
     outputFilePath,
   })
-  const transformUnzip = inputFilePath.endsWith('.gz') ? [createUnzip()] : []
-  const transformZip = outputFilePath.endsWith('.gz') ? [createGzip()] : []
-  const readable = createReadStream(inputFilePath)
+  const readable = fs2
+    .createReadStreamAsNDJSON(inputFilePath)
+    .take(limitInput || Number.POSITIVE_INFINITY)
   await _pipeline([
     readable,
-    ...transformUnzip,
-    transformSplit(), // splits by \n
-    transformJsonParse(),
-    transformLimit({ limit: limitInput, sourceReadable: readable }),
     transformLogProgress({ metric: 'read', ...opt }),
     transformMap(mapper, {
       flattenArrayOutput: true,
@@ -72,8 +60,6 @@ export async function ndjsonMap<IN = any, OUT = any>(
     }),
     transformLimit({ limit: limitOutput, sourceReadable: readable }),
     transformLogProgress({ metric: 'saved', logEvery: logEveryOutput }),
-    transformToNDJson(),
-    ...transformZip,
-    createWriteStream(outputFilePath),
+    fs2.createWriteStreamAsNDJSON(outputFilePath),
   ])
 }

package/src/stream/ndjson/ndjsonStreamForEach.ts CHANGED Viewed

@@ -1,16 +1,12 @@
-import fs from 'node:fs'
-import { createUnzip } from 'node:zlib'
 import { AbortableAsyncMapper, ErrorMode } from '@naturalcycles/js-lib'
 import {
-  requireFileToExist,
-  transformJsonParse,
   transformLogProgress,
   TransformLogProgressOptions,
   transformMap,
   TransformMapOptions,
-  transformSplit,
   writableVoid,
   _pipeline,
+  fs2,
 } from '../..'
 export interface NDJSONStreamForEachOptions<IN = any>
@@ -26,15 +22,8 @@ export async function ndjsonStreamForEach<T>(
   mapper: AbortableAsyncMapper<T, void>,
   opt: NDJSONStreamForEachOptions<T>,
 ): Promise<void> {
-  requireFileToExist(opt.inputFilePath)
-  const transformUnzip = opt.inputFilePath.endsWith('.gz') ? [createUnzip()] : []
   await _pipeline([
-    fs.createReadStream(opt.inputFilePath),
-    ...transformUnzip,
-    transformSplit(),
-    transformJsonParse(),
+    fs2.createReadStreamAsNDJSON(opt.inputFilePath),
     transformMap<T, any>(mapper, {
       errorMode: ErrorMode.THROW_AGGREGATED,
       ...opt,

package/src/stream/ndjson/transformJsonParse.ts CHANGED Viewed

@@ -27,9 +27,9 @@ export interface TransformJsonParseOptions {
  *   consumeYourStream...
  * [)
  */
-export function transformJsonParse<OUT = any>(
+export function transformJsonParse<ROW = any>(
   opt: TransformJsonParseOptions = {},
-): TransformTyped<string | Buffer, OUT> {
+): TransformTyped<string | Buffer, ROW> {
   const { strict = true, reviver } = opt
   return new Transform({

package/src/stream/transform/transformSplit.ts CHANGED Viewed

@@ -1,16 +1,142 @@
+import { Transform } from 'node:stream'
 import { TransformTyped } from '../stream.model'
-// https://github.com/max-mapper/binary-split
-// todo: test its newer version that doesn't have `through2` dependency
-// todo: test writableHighWaterMark of 64k
-const _binarySplit = require('binary-split')
+// The code below is carefully adopted from: https://github.com/max-mapper/binary-split
+/**
+ * Transforms input Buffer/string stream into Buffer chunks (objectMode: true) split by newLine.
+ *
+ * Useful for reading NDJSON files from fs.
+ *
+ * Same as binarySplit, but optimized (hard-coded) to split on NEWLINE (aka `\n`).
+ * (+5-10% _pipeline speedup measured, compared to generic `binarySplit` on variable length delimiter)
+ */
+export function transformSplitOnNewline(): TransformTyped<Buffer, Buffer> {
+  let buffered: Buffer | undefined
+  return new Transform({
+    readableObjectMode: true,
+    writableHighWaterMark: 64 * 1024,
+    transform(buf: Buffer, enc, done) {
+      let offset = 0
+      let lastMatch = 0
+      if (buffered) {
+        buf = Buffer.concat([buffered, buf])
+        offset = buffered.length
+        buffered = undefined
+      }
+      while (true) {
+        const idx = firstNewlineMatch(buf, offset)
+        if (idx !== -1 && idx < buf.length) {
+          if (lastMatch !== idx) {
+            this.push(buf.slice(lastMatch, idx))
+          }
+          offset = idx + 1
+          lastMatch = offset
+        } else {
+          buffered = buf.slice(lastMatch)
+          break
+        }
+      }
+      done()
+    },
+    flush(done) {
+      if (buffered && buffered.length > 0) this.push(buffered)
+      done()
+    },
+  })
+}
 /**
  * Input: stream (objectMode=false) of arbitrary string|Buffer chunks, like when read from fs
- * Output: stream (objectMode=false) or string|Buffer chunks split by `separator` (@default to `\n`)
+ * Output: stream (objectMode=true) or string|Buffer chunks split by `separator` (@default to `\n`)
  *
- * Useful to, for example, reading NDJSON files from fs
+ * Please use slightly more optimized `transformSplitOnNewline` for NDJSON file parsing.
+ * (+5-10% _pipeline speedup measured!)
+ */
+export function transformSplit(separator = '\n'): TransformTyped<Buffer, Buffer> {
+  const matcher = Buffer.from(separator)
+  let buffered: Buffer | undefined
+  return new Transform({
+    readableObjectMode: true,
+    writableHighWaterMark: 64 * 1024,
+    transform(buf: Buffer, enc, done) {
+      let offset = 0
+      let lastMatch = 0
+      if (buffered) {
+        buf = Buffer.concat([buffered, buf])
+        offset = buffered.length
+        buffered = undefined
+      }
+      while (true) {
+        const idx = firstMatch(buf, offset - matcher.length + 1, matcher)
+        if (idx !== -1 && idx < buf.length) {
+          if (lastMatch !== idx) {
+            this.push(buf.slice(lastMatch, idx))
+          }
+          offset = idx + matcher.length
+          lastMatch = offset
+        } else {
+          buffered = buf.slice(lastMatch)
+          break
+        }
+      }
+      done()
+    },
+    flush(done) {
+      if (buffered && buffered.length > 0) this.push(buffered)
+      done()
+    },
+  })
+}
+// const NEWLINE = Buffer.from('\n')
+// const NEWLINE_CODE = NEWLINE[0]! // it is `10`
+const NEWLINE_CODE = 10
+/**
+ * Same as firstMatch, but optimized (hard-coded) to find NEWLINE (aka `\n`).
  */
-export function transformSplit(separator = '\n'): TransformTyped<string | Buffer, string | Buffer> {
-  return _binarySplit(separator)
+function firstNewlineMatch(buf: Buffer, offset: number): number {
+  const bufLength = buf.length
+  if (offset >= bufLength) return -1
+  for (let i = offset; i < bufLength; i++) {
+    if (buf[i] === NEWLINE_CODE) {
+      return i
+    }
+  }
+  return -1 // this code is unreachable, because i is guaranteed to be found in the loop above
+}
+function firstMatch(buf: Buffer, offset: number, matcher: Buffer): number {
+  if (offset >= buf.length) return -1
+  let i
+  for (i = offset; i < buf.length; i++) {
+    if (buf[i] === matcher[0]) {
+      if (matcher.length > 1) {
+        let fullMatch = true
+        let j = i
+        for (let k = 0; j < i + matcher.length; j++, k++) {
+          if (buf[j] !== matcher[k]) {
+            fullMatch = false
+            break
+          }
+        }
+        if (fullMatch) return j - matcher.length
+      } else {
+        break
+      }
+    }
+  }
+  return i + matcher.length - 1
 }

package/dist/stream/ndjson/ndJsonFileRead.d.ts DELETED Viewed

@@ -1,5 +0,0 @@
-import { PipelineFromNDJsonFileOptions } from './pipelineFromNDJsonFile';
-/**
- * Read whole NDJSON file into memory, resolve promise with resulting array of items.
- */
-export declare function ndJsonFileRead<OUT = any>(opt: PipelineFromNDJsonFileOptions): Promise<OUT[]>;

package/dist/stream/ndjson/ndJsonFileRead.js DELETED Viewed

@@ -1,14 +0,0 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.ndJsonFileRead = void 0;
-const __1 = require("../..");
-const pipelineFromNDJsonFile_1 = require("./pipelineFromNDJsonFile");
-/**
- * Read whole NDJSON file into memory, resolve promise with resulting array of items.
- */
-async function ndJsonFileRead(opt) {
-    const res = [];
-    await (0, pipelineFromNDJsonFile_1.pipelineFromNDJsonFile)([(0, __1.writablePushToArray)(res)], opt);
-    return res;
-}
-exports.ndJsonFileRead = ndJsonFileRead;

package/dist/stream/ndjson/ndJsonFileWrite.d.ts DELETED Viewed

@@ -1,5 +0,0 @@
-import { PipelineToNDJsonFileOptions } from './pipelineToNDJsonFile';
-/**
- * Write array of objects (in memory) into NDJSON file. Resolve when done.
- */
-export declare function ndJsonFileWrite<IN = any>(items: IN[], opt: PipelineToNDJsonFileOptions): Promise<void>;

package/dist/stream/ndjson/ndJsonFileWrite.js DELETED Viewed

@@ -1,12 +0,0 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.ndJsonFileWrite = void 0;
-const readableFromArray_1 = require("../readable/readableFromArray");
-const pipelineToNDJsonFile_1 = require("./pipelineToNDJsonFile");
-/**
- * Write array of objects (in memory) into NDJSON file. Resolve when done.
- */
-async function ndJsonFileWrite(items, opt) {
-    await (0, pipelineToNDJsonFile_1.pipelineToNDJsonFile)([(0, readableFromArray_1.readableFromArray)(items)], opt);
-}
-exports.ndJsonFileWrite = ndJsonFileWrite;

package/dist/stream/ndjson/pipelineFromNDJsonFile.d.ts DELETED Viewed

@@ -1,24 +0,0 @@
-/// <reference types="node" />
-/// <reference types="node" />
-import { ZlibOptions } from 'node:zlib';
-import { NDJsonStats } from './ndjson.model';
-import { TransformJsonParseOptions } from './transformJsonParse';
-export interface PipelineFromNDJsonFileOptions extends TransformJsonParseOptions {
-    filePath: string;
-    /**
-     * @default `\n`
-     */
-    separator?: string;
-    /**
-     * @default false
-     */
-    gzip?: boolean;
-    /**
-     * Only applicable if `gzip` is enabled
-     */
-    zlibOptions?: ZlibOptions;
-}
-/**
- * Convenience pipeline that starts from reading NDJSON file.
- */
-export declare function pipelineFromNDJsonFile(streams: NodeJS.WritableStream[], opt: PipelineFromNDJsonFileOptions): Promise<NDJsonStats>;

package/dist/stream/ndjson/pipelineFromNDJsonFile.js DELETED Viewed

@@ -1,37 +0,0 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.pipelineFromNDJsonFile = void 0;
-const tslib_1 = require("tslib");
-const node_fs_1 = tslib_1.__importDefault(require("node:fs"));
-const node_zlib_1 = require("node:zlib");
-const js_lib_1 = require("@naturalcycles/js-lib");
-const __1 = require("../..");
-const colors_1 = require("../../colors/colors");
-const ndjson_model_1 = require("./ndjson.model");
-const transformJsonParse_1 = require("./transformJsonParse");
-/**
- * Convenience pipeline that starts from reading NDJSON file.
- */
-async function pipelineFromNDJsonFile(streams, opt) {
-    const { filePath, gzip, separator } = opt;
-    const started = Date.now();
-    let rows = 0;
-    const { size: sizeBytes } = node_fs_1.default.statSync(filePath);
-    console.log(`<< ${(0, colors_1.grey)(filePath)} ${(0, colors_1.dimWhite)((0, js_lib_1._hb)(sizeBytes))} started...`);
-    await (0, __1._pipeline)([
-        node_fs_1.default.createReadStream(filePath),
-        ...(gzip ? [(0, node_zlib_1.createUnzip)(opt.zlibOptions)] : []),
-        (0, __1.transformSplit)(separator), // splits by separator
-        (0, transformJsonParse_1.transformJsonParse)(opt),
-        (0, __1.transformTap)(() => rows++),
-        ...streams,
-    ]);
-    const stats = ndjson_model_1.NDJsonStats.create({
-        tookMillis: Date.now() - started,
-        rows,
-        sizeBytes,
-    });
-    console.log(`<< ${(0, colors_1.grey)(filePath)}\n` + stats.toPretty());
-    return stats;
-}
-exports.pipelineFromNDJsonFile = pipelineFromNDJsonFile;

package/dist/stream/ndjson/pipelineToNDJsonFile.d.ts DELETED Viewed

@@ -1,27 +0,0 @@
-/// <reference types="node" />
-/// <reference types="node" />
-import { ZlibOptions } from 'node:zlib';
-import { NDJsonStats } from './ndjson.model';
-import { TransformToNDJsonOptions } from './transformToNDJson';
-export interface PipelineToNDJsonFileOptions extends TransformToNDJsonOptions {
-    filePath: string;
-    /**
-     * @default false
-     * If true - will fail if output file already exists.
-     */
-    protectFromOverwrite?: boolean;
-    /**
-     * @default false
-     */
-    gzip?: boolean;
-    /**
-     * Only applicable if `gzip` is enabled
-     */
-    zlibOptions?: ZlibOptions;
-}
-/**
- * Convenience pipeline to transform stream of objects into a file in NDJSON format.
- *
- * Does fs.ensureFile() before starting, which will create all needed directories and truncate the file if it existed.
- */
-export declare function pipelineToNDJsonFile(streams: (NodeJS.ReadableStream | NodeJS.WritableStream)[], opt: PipelineToNDJsonFileOptions): Promise<NDJsonStats>;

package/dist/stream/ndjson/pipelineToNDJsonFile.js DELETED Viewed

@@ -1,42 +0,0 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.pipelineToNDJsonFile = void 0;
-const tslib_1 = require("tslib");
-const node_fs_1 = tslib_1.__importDefault(require("node:fs"));
-const node_zlib_1 = require("node:zlib");
-const js_lib_1 = require("@naturalcycles/js-lib");
-const __1 = require("../..");
-const colors_1 = require("../../colors/colors");
-const ndjson_model_1 = require("./ndjson.model");
-const transformToNDJson_1 = require("./transformToNDJson");
-/**
- * Convenience pipeline to transform stream of objects into a file in NDJSON format.
- *
- * Does fs.ensureFile() before starting, which will create all needed directories and truncate the file if it existed.
- */
-async function pipelineToNDJsonFile(streams, opt) {
-    const { filePath, gzip, protectFromOverwrite = false } = opt;
-    if (protectFromOverwrite && __1.fs2.pathExists(filePath)) {
-        throw new js_lib_1.AppError(`pipelineToNDJsonFile: output file exists: ${filePath}`);
-    }
-    const started = Date.now();
-    let rows = 0;
-    __1.fs2.ensureFile(filePath);
-    console.log(`>> ${(0, colors_1.grey)(filePath)} started...`);
-    await (0, __1._pipeline)([
-        ...streams,
-        (0, __1.transformTap)(() => rows++),
-        (0, transformToNDJson_1.transformToNDJson)(opt),
-        ...(gzip ? [(0, node_zlib_1.createGzip)(opt.zlibOptions)] : []), // optional gzip
-        node_fs_1.default.createWriteStream(filePath),
-    ]);
-    const { size: sizeBytes } = node_fs_1.default.statSync(filePath);
-    const stats = ndjson_model_1.NDJsonStats.create({
-        tookMillis: Date.now() - started,
-        rows,
-        sizeBytes,
-    });
-    console.log(`>> ${(0, colors_1.grey)(filePath)}\n` + stats.toPretty());
-    return stats;
-}
-exports.pipelineToNDJsonFile = pipelineToNDJsonFile;

package/dist/stream/ndjson/streamToNDJsonFile.d.ts DELETED Viewed

@@ -1,3 +0,0 @@
-import { ReadableTyped } from '../stream.model';
-import { PipelineToNDJsonFileOptions } from './pipelineToNDJsonFile';
-export declare function streamToNDJsonFile<IN>(stream: ReadableTyped<IN>, opt: PipelineToNDJsonFileOptions): Promise<void>;

package/dist/stream/ndjson/streamToNDJsonFile.js DELETED Viewed

@@ -1,8 +0,0 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.streamToNDJsonFile = void 0;
-const pipelineToNDJsonFile_1 = require("./pipelineToNDJsonFile");
-async function streamToNDJsonFile(stream, opt) {
-    await (0, pipelineToNDJsonFile_1.pipelineToNDJsonFile)([stream], opt);
-}
-exports.streamToNDJsonFile = streamToNDJsonFile;

package/dist/stream/transform/transformToString.d.ts DELETED Viewed

@@ -1,12 +0,0 @@
-/// <reference types="node" />
-import { TransformTyped } from '../stream.model';
-/**
- * Transforms objectMode=false Buffers/strings into objectMode=true strings.
- *
- * Useful in this _pipeline:
- * fs.createReadStream(inputPath),
- * createUnzip(), // binary
- * transformSplit(), // string chunks, but objectMode==false
- * transformToString(), // string chunks, but objectMode==true
- */
-export declare function transformToString(): TransformTyped<Buffer, string>;

package/dist/stream/transform/transformToString.js DELETED Viewed

@@ -1,24 +0,0 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.transformToString = void 0;
-const node_stream_1 = require("node:stream");
-/**
- * Transforms objectMode=false Buffers/strings into objectMode=true strings.
- *
- * Useful in this _pipeline:
- * fs.createReadStream(inputPath),
- * createUnzip(), // binary
- * transformSplit(), // string chunks, but objectMode==false
- * transformToString(), // string chunks, but objectMode==true
- */
-function transformToString() {
-    return new node_stream_1.Transform({
-        objectMode: false,
-        readableObjectMode: true,
-        transform(chunk, _, cb) {
-            // console.log(`enc: ${_}`, chunk.toString())
-            cb(null, chunk.toString());
-        },
-    });
-}
-exports.transformToString = transformToString;

package/src/stream/ndjson/ndJsonFileRead.ts DELETED Viewed

@@ -1,15 +0,0 @@
-import { writablePushToArray } from '../..'
-import { pipelineFromNDJsonFile, PipelineFromNDJsonFileOptions } from './pipelineFromNDJsonFile'
-/**
- * Read whole NDJSON file into memory, resolve promise with resulting array of items.
- */
-export async function ndJsonFileRead<OUT = any>(
-  opt: PipelineFromNDJsonFileOptions,
-): Promise<OUT[]> {
-  const res: OUT[] = []
-  await pipelineFromNDJsonFile([writablePushToArray(res)], opt)
-  return res
-}

package/src/stream/ndjson/ndJsonFileWrite.ts DELETED Viewed

@@ -1,12 +0,0 @@
-import { readableFromArray } from '../readable/readableFromArray'
-import { pipelineToNDJsonFile, PipelineToNDJsonFileOptions } from './pipelineToNDJsonFile'
-/**
- * Write array of objects (in memory) into NDJSON file. Resolve when done.
- */
-export async function ndJsonFileWrite<IN = any>(
-  items: IN[],
-  opt: PipelineToNDJsonFileOptions,
-): Promise<void> {
-  await pipelineToNDJsonFile([readableFromArray(items)], opt)
-}

package/src/stream/ndjson/pipelineFromNDJsonFile.ts DELETED Viewed

@@ -1,62 +0,0 @@
-import fs from 'node:fs'
-import { createUnzip, ZlibOptions } from 'node:zlib'
-import { _hb } from '@naturalcycles/js-lib'
-import { transformTap, _pipeline, transformSplit } from '../..'
-import { dimWhite, grey } from '../../colors/colors'
-import { NDJsonStats } from './ndjson.model'
-import { transformJsonParse, TransformJsonParseOptions } from './transformJsonParse'
-export interface PipelineFromNDJsonFileOptions extends TransformJsonParseOptions {
-  filePath: string
-  /**
-   * @default `\n`
-   */
-  separator?: string
-  /**
-   * @default false
-   */
-  gzip?: boolean
-  /**
-   * Only applicable if `gzip` is enabled
-   */
-  zlibOptions?: ZlibOptions
-}
-/**
- * Convenience pipeline that starts from reading NDJSON file.
- */
-export async function pipelineFromNDJsonFile(
-  streams: NodeJS.WritableStream[],
-  opt: PipelineFromNDJsonFileOptions,
-): Promise<NDJsonStats> {
-  const { filePath, gzip, separator } = opt
-  const started = Date.now()
-  let rows = 0
-  const { size: sizeBytes } = fs.statSync(filePath)
-  console.log(`<< ${grey(filePath)} ${dimWhite(_hb(sizeBytes))} started...`)
-  await _pipeline([
-    fs.createReadStream(filePath),
-    ...(gzip ? [createUnzip(opt.zlibOptions)] : []),
-    transformSplit(separator), // splits by separator
-    transformJsonParse(opt),
-    transformTap(() => rows++),
-    ...streams,
-  ])
-  const stats = NDJsonStats.create({
-    tookMillis: Date.now() - started,
-    rows,
-    sizeBytes,
-  })
-  console.log(`<< ${grey(filePath)}\n` + stats.toPretty())
-  return stats
-}

package/src/stream/ndjson/pipelineToNDJsonFile.ts DELETED Viewed

@@ -1,70 +0,0 @@
-import fs from 'node:fs'
-import { createGzip, ZlibOptions } from 'node:zlib'
-import { AppError } from '@naturalcycles/js-lib'
-import { transformTap, _pipeline, fs2 } from '../..'
-import { grey } from '../../colors/colors'
-import { NDJsonStats } from './ndjson.model'
-import { transformToNDJson, TransformToNDJsonOptions } from './transformToNDJson'
-export interface PipelineToNDJsonFileOptions extends TransformToNDJsonOptions {
-  filePath: string
-  /**
-   * @default false
-   * If true - will fail if output file already exists.
-   */
-  protectFromOverwrite?: boolean
-  /**
-   * @default false
-   */
-  gzip?: boolean
-  /**
-   * Only applicable if `gzip` is enabled
-   */
-  zlibOptions?: ZlibOptions
-}
-/**
- * Convenience pipeline to transform stream of objects into a file in NDJSON format.
- *
- * Does fs.ensureFile() before starting, which will create all needed directories and truncate the file if it existed.
- */
-export async function pipelineToNDJsonFile(
-  streams: (NodeJS.ReadableStream | NodeJS.WritableStream)[],
-  opt: PipelineToNDJsonFileOptions,
-): Promise<NDJsonStats> {
-  const { filePath, gzip, protectFromOverwrite = false } = opt
-  if (protectFromOverwrite && fs2.pathExists(filePath)) {
-    throw new AppError(`pipelineToNDJsonFile: output file exists: ${filePath}`)
-  }
-  const started = Date.now()
-  let rows = 0
-  fs2.ensureFile(filePath)
-  console.log(`>> ${grey(filePath)} started...`)
-  await _pipeline([
-    ...streams,
-    transformTap(() => rows++),
-    transformToNDJson(opt),
-    ...(gzip ? [createGzip(opt.zlibOptions)] : []), // optional gzip
-    fs.createWriteStream(filePath),
-  ])
-  const { size: sizeBytes } = fs.statSync(filePath)
-  const stats = NDJsonStats.create({
-    tookMillis: Date.now() - started,
-    rows,
-    sizeBytes,
-  })
-  console.log(`>> ${grey(filePath)}\n` + stats.toPretty())
-  return stats
-}

package/src/stream/ndjson/streamToNDJsonFile.ts DELETED Viewed

@@ -1,9 +0,0 @@
-import { ReadableTyped } from '../stream.model'
-import { pipelineToNDJsonFile, PipelineToNDJsonFileOptions } from './pipelineToNDJsonFile'
-export async function streamToNDJsonFile<IN>(
-  stream: ReadableTyped<IN>,
-  opt: PipelineToNDJsonFileOptions,
-): Promise<void> {
-  await pipelineToNDJsonFile([stream], opt)
-}

package/src/stream/transform/transformToString.ts DELETED Viewed

@@ -1,22 +0,0 @@
-import { Transform } from 'node:stream'
-import { TransformTyped } from '../stream.model'
-/**
- * Transforms objectMode=false Buffers/strings into objectMode=true strings.
- *
- * Useful in this _pipeline:
- * fs.createReadStream(inputPath),
- * createUnzip(), // binary
- * transformSplit(), // string chunks, but objectMode==false
- * transformToString(), // string chunks, but objectMode==true
- */
-export function transformToString(): TransformTyped<Buffer, string> {
-  return new Transform({
-    objectMode: false,
-    readableObjectMode: true,
-    transform(chunk: Buffer, _, cb) {
-      // console.log(`enc: ${_}`, chunk.toString())
-      cb(null, chunk.toString())
-    },
-  })
-}