npm - taraskevizer - Versions diffs - 10.4.9 → 10.4.11 - Mend

taraskevizer 10.4.9 → 10.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/bin/index.js +121 -92
package/package.json +1 -1

package/dist/bin/index.js CHANGED Viewed

@@ -1,22 +1,33 @@
 #!/usr/bin/env node
 import { cpus } from 'node:os';
+import * as path from 'node:path';
+import { fileURLToPath } from 'node:url';
 import { Worker } from 'node:worker_threads';
 import { pipelines } from '../index.js';
 import { parseArgs } from './parse-args.js';
-const printWithPrefix = (msg) => {
-    process.stdout.write("[34m[taraskevizer][0m" + ' ' + msg + '\n');
+const getPrint = (stream) => (msg) => {
+    stream.write("[34m[taraskevizer][0m" + ' ' + msg);
 };
+const getPrintLn = (printFn) => (msg) => {
+    printFn(msg + '\n');
+};
+const print = getPrint(process.stdout);
+const printErr = getPrint(process.stderr);
+const printLn = getPrintLn(print);
+const printErrLn = getPrintLn(printErr);
+// AI-written, may need improvements
 const splitIntoChunks = (text, n) => {
     const size = Math.ceil(text.length / n);
     const chunks = [];
     let start = 0;
     for (let i = 0; i < n; i++) {
         let end = start + size;
+        // Adjust end to the nearest newline character
         if (end < text.length) {
             const forward = text.indexOf('\n', end);
             const backward = text.lastIndexOf('\n', end);
             if (forward === -1 && backward === -1) {
-                // no-op, use raw end
+                // no-op
             }
             else if (forward === -1) {
                 end = backward;
@@ -28,6 +39,17 @@ const splitIntoChunks = (text, n) => {
                 end = forward - end < end - backward ? forward : backward;
             }
         }
+        // Ensure we don't split inside HTML tags or special syntax tags
+        if (end < text.length) {
+            const lastOpen = text.lastIndexOf('<', end);
+            const lastClose = text.lastIndexOf('>', end);
+            if (lastOpen > lastClose) {
+                const nextClose = text.indexOf('>', end);
+                if (nextClose !== -1) {
+                    end = nextClose + 1;
+                }
+            }
+        }
         if (end > text.length)
             end = text.length;
         chunks.push(text.slice(start, end));
@@ -37,122 +59,129 @@ const splitIntoChunks = (text, n) => {
     }
     return chunks;
 };
+const getPrettyByteSize = (n) => n < 1024
+    ? `${n} B`
+    : n < 1024 * 1024
+        ? `${(n / 1024).toFixed(2)} KB`
+        : `${(n / (1024 * 1024)).toFixed(2)} MB`;
 process.argv.splice(0, 2);
 const firstArg = process.argv[0];
 if (firstArg) {
     if (firstArg === '-v' || firstArg === '--version') {
-        printWithPrefix("10.4.9");
+        printLn("10.4.11");
         process.exit(0);
     }
     if (firstArg === '-h' || firstArg === '--help') {
-        printWithPrefix(`Usage: [34mtarask[0m [options] text
-If text is not passed, interactive mode is enabled
-EXAMPLES
-Convert and latinize a word
-	[34mtarask[0m [35m--latin[0m 'планета'
-Prints "p[32ml[0ma[32mne[0mta"
-Read from one file and write converted text to another
-	[34mtarask[0m < ./cyr-text.txt > ./lat-text.txt
-Enter interactive mode
-	[34mtarask[0m
-Prints "[34m[taraskevizer][0m Enter the text:" and waits until you enter a new line
-OPTIONS
-[33mGeneral[0m:
-  [35m-h[0m [35m--help[0m
-  [35m-v[0m [35m--version[0m
-[33mAlphabet[0m:
-  [35m-l[0m  [35m--latin[0m
-  [35m-lj[0m [35m--latin-ji[0m
-  [35m-a[0m  [35m--arabic[0m
-[33mWhen to replace і(i) by й(j) after vowels[0m:
-  [35m-jr[0m [35m--jrandom[0m
-  [35m-ja[0m [35m--jalways[0m
-[33mReplace ґ(g) by г(h) in cyrillic alphabet[0m:
-  [35m--h[0m
-[33mVariations[0m:
-  [35m-nv[0m [35m--no-variations[0m
-  [35m-fv[0m [35m--first-variation[0m
-[33mMode (only one can be used)[0m:
-  [35m-html[0m [35m--html[0m
-  [35m-abc[0m  [35m--alphabet-only[0m
-[33mOther[0m:
-  [35m-nec[0m [35m--not-escape-caps[0m
-  [35m-nc[0m  [35m--no-color[0m
-  [35m-st[0m  [35m--single-thread[0m
-`);
+        printLn("Usage: \u001b[34mtarask\u001b[0m [options] text\nIf text is not passed, interactive mode is enabled\n\nEXAMPLES\n\nConvert and latinize a word\n\t\u001b[34mtarask\u001b[0m \u001b[35m--latin\u001b[0m 'планета'\nPrints \"p\u001b[32ml\u001b[0ma\u001b[32mne\u001b[0mta\"\n\nRead from one file and write converted text to another\n\t\u001b[34mtarask\u001b[0m < ./cyr-text.txt > ./lat-text.txt\n\nEnter interactive mode\n\t\u001b[34mtarask\u001b[0m\nPrints \"\u001b[34m[taraskevizer]\u001b[0m Enter the text:\" and waits until you enter a new line\n\nOPTIONS\n\n\u001b[33mGeneral\u001b[0m:\n  \u001b[35m-h\u001b[0m \u001b[35m--help\u001b[0m\n  \u001b[35m-v\u001b[0m \u001b[35m--version\u001b[0m\n\n\u001b[33mAlphabet\u001b[0m:\n  \u001b[35m-l\u001b[0m  \u001b[35m--latin\u001b[0m\n  \u001b[35m-lj\u001b[0m \u001b[35m--latin-ji\u001b[0m\n  \u001b[35m-a\u001b[0m  \u001b[35m--arabic\u001b[0m\n\n\u001b[33mWhen to replace і(i) by й(j) after vowels\u001b[0m:\n  \u001b[35m-jr\u001b[0m \u001b[35m--jrandom\u001b[0m\n  \u001b[35m-ja\u001b[0m \u001b[35m--jalways\u001b[0m\n\n\u001b[33mReplace ґ(g) by г(h) in cyrillic alphabet\u001b[0m:\n  \u001b[35m--h\u001b[0m\n\n\u001b[33mVariations\u001b[0m:\n  \u001b[35m-nv\u001b[0m \u001b[35m--no-variations\u001b[0m\n  \u001b[35m-fv\u001b[0m \u001b[35m--first-variation\u001b[0m\n\n\u001b[33mMode (only one can be used)\u001b[0m:\n  \u001b[35m-html\u001b[0m \u001b[35m--html\u001b[0m\n  \u001b[35m-abc\u001b[0m  \u001b[35m--alphabet-only\u001b[0m\n\n\u001b[33mOther\u001b[0m:\n  \u001b[35m-nec\u001b[0m \u001b[35m--not-escape-caps\u001b[0m\n  \u001b[35m-nc\u001b[0m  \u001b[35m--no-color\u001b[0m\n  \u001b[35m-st\u001b[0m  \u001b[35m--single-thread\u001b[0m\n");
         process.exit(0);
     }
 }
+const argv = process.argv.slice();
 const { mode, cfg, doForceSingleThread } = parseArgs(process.argv);
-let text = '';
+const workers = {
+    size: cpus()?.length || 1,
+    workers: null,
+    init() {
+        if (this.workers)
+            return;
+        process.stderr.write(`(Initializing ${this.size} workers... `);
+        const dirname = path.dirname(fileURLToPath(import.meta.url));
+        this.workers = Array.from({ length: this.size }, () => new Worker("const {\n\tparentPort,\n\tworkerData: { argv, dirname },\n} = require('node:worker_threads');\nconst { resolve } = require('node:path');\nconst { pipelines } = require(resolve(dirname, '..'));\nconst { parseArgs } = require(resolve(dirname, 'parse-args'));\n\nconst { mode, cfg } = parseArgs(argv);\n\nparentPort.on('message', (chunk) => {\n\tparentPort.postMessage(pipelines[mode](chunk, cfg));\n});\n", {
+            eval: true,
+            workerData: { argv, dirname },
+        }));
+        process.stderr.write('done.) ');
+    },
+    process(chunks) {
+        return Promise.all(chunks.map((chunk, i) => new Promise((resolve, reject) => {
+            const worker = this.workers[i % this.size];
+            worker.postMessage(chunk);
+            worker.once('message', resolve);
+            worker.once('error', reject);
+        })));
+    },
+};
+const processText = async (text) => {
+    let result = '';
+    if (!doForceSingleThread && workers.size > 1 && text.length > 50_000) {
+        workers.init();
+        const chunks = splitIntoChunks(text, workers.size);
+        const results = await workers.process(chunks);
+        result = results.join('');
+    }
+    else {
+        result = pipelines[mode](text, cfg);
+    }
+    if (!process.stdout.write(result)) {
+        process.stdout.once('drain', () => {
+            printErrLn('Drain event fired, exiting.');
+            process.exit(0);
+        });
+    }
+};
 if (process.argv.length) {
-    text = process.argv.reverse().join(' ');
+    printErrLn('Processing the rest of command-line arguments as text...');
+    await processText(process.argv.reverse().join(' '));
 }
 else {
     const chunks = [];
+    let value = '';
     let length = 0;
+    let byteLength = 0;
+    const MAX_BYTE_LENGTH = 64 * 1024 * 1024;
+    const getChunksString = () => Buffer.concat(chunks, length).toString();
     if (process.stdin.isTTY) {
-        printWithPrefix('Enter the text');
+        printErrLn('Enter the text');
         for await (const chunk of process.stdin) {
             chunks.push(chunk);
             length += chunk.length;
             if (chunk.includes('\n'))
                 break;
         }
+        await processText(getChunksString());
+        process.stdout.write('\n');
     }
     else {
+        printErrLn('Reading from stdin...');
+        const processTextWithLogs = async (value) => {
+            printErr(`Processing ${value.length
+                .toString()
+                .replace(/\B(?=(\d{3})+(?!\d))/g, ' ')} characters (${getPrettyByteSize(Buffer.byteLength(value))}) chunk... `);
+            const startTime = performance.now();
+            await processText(value);
+            process.stderr.write(`done in ${((performance.now() - startTime) / 1000).toFixed(2)} seconds.\n`);
+        };
         for await (const chunk of process.stdin) {
-            chunks.push(chunk);
+            byteLength += chunk.byteLength;
+            if (byteLength >= MAX_BYTE_LENGTH) {
+                value += getChunksString();
+                const lastNewlineIndex = value.lastIndexOf('\n');
+                if (lastNewlineIndex === -1) {
+                    printErrLn('\nInput exceeded maximum size of ' +
+                        MAX_BYTE_LENGTH +
+                        ' bytes without a newline. Stopping.');
+                    process.exit(1);
+                }
+                let valueForNextBatch = value.slice(lastNewlineIndex + 1);
+                value = value.slice(0, lastNewlineIndex + 1);
+                const lastOpeningTagIndex = value.lastIndexOf('<');
+                const lastClosingTagIndex = value.lastIndexOf('>');
+                if (lastOpeningTagIndex !== -1 &&
+                    lastOpeningTagIndex > lastClosingTagIndex) {
+                    const incompleteTag = value.slice(lastOpeningTagIndex);
+                    value = value.slice(0, lastOpeningTagIndex);
+                    valueForNextBatch = incompleteTag + valueForNextBatch;
+                }
+                await processTextWithLogs(value);
+                value = valueForNextBatch;
+                byteLength = chunk.byteLength;
+                length = 0;
+                chunks.length = 0;
+            }
             length += chunk.length;
+            chunks.push(chunk);
         }
+        await processTextWithLogs(value + getChunksString());
     }
-    text = Buffer.concat(chunks, length).toString();
-}
-let result = '';
-if (text.length > 50_000 && !doForceSingleThread) {
-    const cpuCount = Math.max(1, cpus()?.length || 1);
-    const chunks = splitIntoChunks(text, cpuCount);
-    const WORKER_CODE = `
-const { parentPort, workerData } = require('node:worker_threads');
-const { pipelines } = require('./dist');
-const { parseArgs } = require('./dist/bin/parse-args');
-const { argv, chunk } = workerData;
-const { mode, cfg } = parseArgs(argv);
-parentPort.postMessage(pipelines[mode](chunk, cfg));`;
-    const results = await Promise.all(chunks.map((chunk) => new Promise((resolve, reject) => {
-        const worker = new Worker(WORKER_CODE, {
-            eval: true,
-            workerData: { argv: process.argv, chunk },
-        });
-        worker.on('message', resolve);
-        worker.on('error', reject);
-        worker.on('exit', (code) => {
-            if (code !== 0)
-                reject(new Error('Worker exit code ' + code));
-        });
-    })));
-    result = results.join('\n') + '\n';
-}
-else {
-    result = pipelines[mode](text, cfg) + '\n';
-}
-if (process.stdout.write(result)) {
-    process.exit(0);
-}
-else {
-    process.stdout.once('drain', () => {
-        process.exit(0);
-    });
 }
+process.exit(0);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "taraskevizer",
-	"version": "10.4.9",
+	"version": "10.4.11",
 	"author": "GooseOb",
 	"repository": {
 		"type": "git",