taraskevizer 10.4.8 → 10.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,184 @@
1
+ #!/usr/bin/env node
2
+ import { cpus } from 'node:os';
3
+ import { Worker } from 'node:worker_threads';
4
+ import { pipelines } from '../index.js';
5
+ import { parseArgs } from './parse-args.js';
6
+ const getPrint = (stream) => (msg) => {
7
+ stream.write("[taraskevizer]" + ' ' + msg);
8
+ };
9
+ const getPrintLn = (printFn) => (msg) => {
10
+ printFn(msg + '\n');
11
+ };
12
+ const print = getPrint(process.stdout);
13
+ const printErr = getPrint(process.stderr);
14
+ const printLn = getPrintLn(print);
15
+ const printErrLn = getPrintLn(printErr);
16
+ // AI-written, may need improvements
17
+ const splitIntoChunks = (text, n) => {
18
+ const size = Math.ceil(text.length / n);
19
+ const chunks = [];
20
+ let start = 0;
21
+ for (let i = 0; i < n; i++) {
22
+ let end = start + size;
23
+ // Adjust end to the nearest newline character
24
+ if (end < text.length) {
25
+ const forward = text.indexOf('\n', end);
26
+ const backward = text.lastIndexOf('\n', end);
27
+ if (forward === -1 && backward === -1) {
28
+ // no-op
29
+ }
30
+ else if (forward === -1) {
31
+ end = backward;
32
+ }
33
+ else if (backward === -1) {
34
+ end = forward;
35
+ }
36
+ else {
37
+ end = forward - end < end - backward ? forward : backward;
38
+ }
39
+ }
40
+ // Ensure we don't split inside HTML tags or special syntax tags
41
+ if (end < text.length) {
42
+ const lastOpen = text.lastIndexOf('<', end);
43
+ const lastClose = text.lastIndexOf('>', end);
44
+ if (lastOpen > lastClose) {
45
+ const nextClose = text.indexOf('>', end);
46
+ if (nextClose !== -1) {
47
+ end = nextClose + 1;
48
+ }
49
+ }
50
+ }
51
+ if (end > text.length)
52
+ end = text.length;
53
+ chunks.push(text.slice(start, end));
54
+ start = end;
55
+ if (start >= text.length)
56
+ break;
57
+ }
58
+ return chunks;
59
+ };
60
+ const getPrettyByteSize = (n) => n < 1024
61
+ ? `${n} B`
62
+ : n < 1024 * 1024
63
+ ? `${(n / 1024).toFixed(2)} KB`
64
+ : `${(n / (1024 * 1024)).toFixed(2)} MB`;
65
+ process.argv.splice(0, 2);
66
+ const firstArg = process.argv[0];
67
+ if (firstArg) {
68
+ if (firstArg === '-v' || firstArg === '--version') {
69
+ printLn("10.4.10");
70
+ process.exit(0);
71
+ }
72
+ if (firstArg === '-h' || firstArg === '--help') {
73
+ printLn("Usage: \u001b[34mtarask\u001b[0m [options] text\nIf text is not passed, interactive mode is enabled\n\nEXAMPLES\n\nConvert and latinize a word\n\t\u001b[34mtarask\u001b[0m \u001b[35m--latin\u001b[0m 'планета'\nPrints \"p\u001b[32ml\u001b[0ma\u001b[32mne\u001b[0mta\"\n\nRead from one file and write converted text to another\n\t\u001b[34mtarask\u001b[0m < ./cyr-text.txt > ./lat-text.txt\n\nEnter interactive mode\n\t\u001b[34mtarask\u001b[0m\nPrints \"\u001b[34m[taraskevizer]\u001b[0m Enter the text:\" and waits until you enter a new line\n\nOPTIONS\n\n\u001b[33mGeneral\u001b[0m:\n \u001b[35m-h\u001b[0m \u001b[35m--help\u001b[0m\n \u001b[35m-v\u001b[0m \u001b[35m--version\u001b[0m\n\n\u001b[33mAlphabet\u001b[0m:\n \u001b[35m-l\u001b[0m \u001b[35m--latin\u001b[0m\n \u001b[35m-lj\u001b[0m \u001b[35m--latin-ji\u001b[0m\n \u001b[35m-a\u001b[0m \u001b[35m--arabic\u001b[0m\n\n\u001b[33mWhen to replace і(i) by й(j) after vowels\u001b[0m:\n \u001b[35m-jr\u001b[0m \u001b[35m--jrandom\u001b[0m\n \u001b[35m-ja\u001b[0m \u001b[35m--jalways\u001b[0m\n\n\u001b[33mReplace ґ(g) by г(h) in cyrillic alphabet\u001b[0m:\n \u001b[35m--h\u001b[0m\n\n\u001b[33mVariations\u001b[0m:\n \u001b[35m-nv\u001b[0m \u001b[35m--no-variations\u001b[0m\n \u001b[35m-fv\u001b[0m \u001b[35m--first-variation\u001b[0m\n\n\u001b[33mMode (only one can be used)\u001b[0m:\n \u001b[35m-html\u001b[0m \u001b[35m--html\u001b[0m\n \u001b[35m-abc\u001b[0m \u001b[35m--alphabet-only\u001b[0m\n\n\u001b[33mOther\u001b[0m:\n \u001b[35m-nec\u001b[0m \u001b[35m--not-escape-caps\u001b[0m\n \u001b[35m-nc\u001b[0m \u001b[35m--no-color\u001b[0m\n \u001b[35m-st\u001b[0m \u001b[35m--single-thread\u001b[0m\n");
74
+ process.exit(0);
75
+ }
76
+ }
77
+ const argv = process.argv.slice();
78
+ const { mode, cfg, doForceSingleThread } = parseArgs(process.argv);
79
+ const workers = {
80
+ size: cpus()?.length || 1,
81
+ workers: null,
82
+ init() {
83
+ if (this.workers)
84
+ return;
85
+ process.stderr.write(`(Initializing ${this.size} workers... `);
86
+ this.workers = Array.from({ length: this.size }, () => new Worker("const { parentPort, workerData } = require('node:worker_threads');\nconst { pipelines } = require('./dist');\nconst { parseArgs } = require('./dist/bin/parse-args');\n\nconst { mode, cfg } = parseArgs(workerData.argv);\n\nparentPort.on('message', (chunk) => {\n\tparentPort.postMessage(pipelines[mode](chunk, cfg));\n});\n", {
87
+ eval: true,
88
+ workerData: { argv },
89
+ }));
90
+ process.stderr.write('done.) ');
91
+ },
92
+ process(chunks) {
93
+ return Promise.all(chunks.map((chunk, i) => new Promise((resolve, reject) => {
94
+ const worker = this.workers[i % this.size];
95
+ worker.postMessage(chunk);
96
+ worker.once('message', resolve);
97
+ worker.once('error', reject);
98
+ })));
99
+ },
100
+ };
101
+ const processText = async (text) => {
102
+ let result = '';
103
+ if (!doForceSingleThread && workers.size > 1 && text.length > 50_000) {
104
+ workers.init();
105
+ const chunks = splitIntoChunks(text, workers.size);
106
+ const results = await workers.process(chunks);
107
+ result = results.join('');
108
+ }
109
+ else {
110
+ result = pipelines[mode](text, cfg);
111
+ }
112
+ if (!process.stdout.write(result)) {
113
+ process.stdout.once('drain', () => {
114
+ printErrLn('Drain event fired, exiting.');
115
+ process.exit(0);
116
+ });
117
+ }
118
+ };
119
+ if (process.argv.length) {
120
+ printErrLn('Processing the rest of command-line arguments as text...');
121
+ await processText(process.argv.reverse().join(' '));
122
+ }
123
+ else {
124
+ const chunks = [];
125
+ let value = '';
126
+ let length = 0;
127
+ let byteLength = 0;
128
+ const MAX_BYTE_LENGTH = 64 * 1024 * 1024;
129
+ const getChunksString = () => Buffer.concat(chunks, length).toString();
130
+ if (process.stdin.isTTY) {
131
+ printErrLn('Enter the text');
132
+ for await (const chunk of process.stdin) {
133
+ chunks.push(chunk);
134
+ length += chunk.length;
135
+ if (chunk.includes('\n'))
136
+ break;
137
+ }
138
+ await processText(getChunksString());
139
+ process.stdout.write('\n');
140
+ }
141
+ else {
142
+ printErrLn('Reading from stdin...');
143
+ const processTextWithLogs = async (value) => {
144
+ printErr(`Processing ${value.length
145
+ .toString()
146
+ .replace(/\B(?=(\d{3})+(?!\d))/g, ' ')} characters (${getPrettyByteSize(Buffer.byteLength(value))}) chunk... `);
147
+ const startTime = performance.now();
148
+ await processText(value);
149
+ process.stderr.write(`done in ${((performance.now() - startTime) / 1000).toFixed(2)} seconds.\n`);
150
+ };
151
+ for await (const chunk of process.stdin) {
152
+ byteLength += chunk.byteLength;
153
+ if (byteLength >= MAX_BYTE_LENGTH) {
154
+ value += getChunksString();
155
+ const lastNewlineIndex = value.lastIndexOf('\n');
156
+ if (lastNewlineIndex === -1) {
157
+ printErrLn('\nInput exceeded maximum size of ' +
158
+ MAX_BYTE_LENGTH +
159
+ ' bytes without a newline. Stopping.');
160
+ process.exit(1);
161
+ }
162
+ let valueForNextBatch = value.slice(lastNewlineIndex + 1);
163
+ value = value.slice(0, lastNewlineIndex + 1);
164
+ const lastOpeningTagIndex = value.lastIndexOf('<');
165
+ const lastClosingTagIndex = value.lastIndexOf('>');
166
+ if (lastOpeningTagIndex !== -1 &&
167
+ lastOpeningTagIndex > lastClosingTagIndex) {
168
+ const incompleteTag = value.slice(lastOpeningTagIndex);
169
+ value = value.slice(0, lastOpeningTagIndex);
170
+ valueForNextBatch = incompleteTag + valueForNextBatch;
171
+ }
172
+ await processTextWithLogs(value);
173
+ value = valueForNextBatch;
174
+ byteLength = chunk.byteLength;
175
+ length = 0;
176
+ chunks.length = 0;
177
+ }
178
+ length += chunk.length;
179
+ chunks.push(chunk);
180
+ }
181
+ await processTextWithLogs(value + getChunksString());
182
+ }
183
+ }
184
+ process.exit(0);
@@ -0,0 +1,6 @@
1
+ import { TaraskConfig } from '..';
2
+ export declare const parseArgs: (argv: string[]) => {
3
+ cfg: Partial<TaraskConfig>;
4
+ mode: "tarask";
5
+ doForceSingleThread: boolean;
6
+ };
@@ -0,0 +1,123 @@
1
+ import { dicts, htmlConfigOptions, TaraskConfig, wrappers, } from '../index.js';
2
+ const toHashTable = (dict) => {
3
+ const result = {};
4
+ for (const { 0: options, 1: callback } of dict)
5
+ for (const option of options)
6
+ result[option] = callback;
7
+ return result;
8
+ };
9
+ export const parseArgs = (argv) => {
10
+ let cfg = {
11
+ g: true,
12
+ variations: 'all',
13
+ wrappers: wrappers.ansiColor,
14
+ };
15
+ let mode = 'tarask';
16
+ let isHtml = false;
17
+ let doForceSingleThread = false;
18
+ const optionDict = toHashTable([
19
+ [
20
+ ['--latin', '-l'],
21
+ () => {
22
+ cfg.abc = dicts.alphabets.latin;
23
+ },
24
+ ],
25
+ [
26
+ ['--latin-ji', '-lj'],
27
+ () => {
28
+ cfg.abc = dicts.alphabets.latinJi;
29
+ },
30
+ ],
31
+ [
32
+ ['--arabic', '-a'],
33
+ () => {
34
+ cfg.abc = dicts.alphabets.arabic;
35
+ },
36
+ ],
37
+ [
38
+ ['--jrandom', '-jr'],
39
+ () => {
40
+ cfg.j = 'random';
41
+ },
42
+ ],
43
+ [
44
+ ['--jalways', '-ja'],
45
+ () => {
46
+ cfg.j = 'always';
47
+ },
48
+ ],
49
+ [
50
+ ['--no-escape-caps', '-nec'],
51
+ () => {
52
+ cfg.doEscapeCapitalized = false;
53
+ },
54
+ ],
55
+ [
56
+ ['--h'],
57
+ () => {
58
+ cfg.g = false;
59
+ },
60
+ ],
61
+ [
62
+ ['--no-variations', '-nv'],
63
+ () => {
64
+ cfg.variations = 'no';
65
+ },
66
+ ],
67
+ [
68
+ ['--first-variation', '-fv'],
69
+ () => {
70
+ cfg.variations = 'first';
71
+ },
72
+ ],
73
+ [
74
+ ['--no-color', '-nc'],
75
+ () => {
76
+ cfg.wrappers = null;
77
+ },
78
+ ],
79
+ [
80
+ ['--html', '-html'],
81
+ () => {
82
+ isHtml = true;
83
+ cfg.wrappers = htmlConfigOptions.wrappers;
84
+ },
85
+ ],
86
+ [
87
+ ['--alphabet-only', '-abc'],
88
+ () => {
89
+ mode = 'alphabetic';
90
+ },
91
+ ],
92
+ [
93
+ ['--phonetic', '-ph'],
94
+ () => {
95
+ mode = 'phonetic';
96
+ },
97
+ ],
98
+ [
99
+ ['--single-thread', '-st'],
100
+ () => {
101
+ doForceSingleThread = true;
102
+ },
103
+ ],
104
+ ]);
105
+ let currOption;
106
+ argv.reverse();
107
+ while ((currOption = argv.pop())) {
108
+ if (currOption in optionDict) {
109
+ optionDict[currOption]();
110
+ }
111
+ else {
112
+ argv.push(currOption);
113
+ break;
114
+ }
115
+ }
116
+ cfg = new TaraskConfig(isHtml
117
+ ? {
118
+ ...htmlConfigOptions,
119
+ ...cfg,
120
+ }
121
+ : cfg);
122
+ return { cfg, mode, doForceSingleThread };
123
+ };
package/dist/config.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import type { DeepReadonly } from './types';
2
1
  import type { Alphabet } from './dict/alphabets';
2
+ import type { DeepReadonly } from './types';
3
3
  import { type Wrappers } from './wrappers';
4
4
  export type Variation = 'no' | 'first' | 'all';
5
5
  export type OptionJ = 'never' | 'random' | 'always';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "taraskevizer",
3
- "version": "10.4.8",
3
+ "version": "10.4.10",
4
4
  "author": "GooseOb",
5
5
  "repository": {
6
6
  "type": "git",
@@ -22,7 +22,7 @@
22
22
  "typescript-eslint": "^8.42.0"
23
23
  },
24
24
  "bin": {
25
- "tarask": "dist/bin.js"
25
+ "tarask": "dist/bin/index.js"
26
26
  },
27
27
  "description": "Канвэртацыя акадэмічнага правапісу ў клясычны",
28
28
  "files": [
package/dist/bin.js DELETED
@@ -1,206 +0,0 @@
1
- #!/usr/bin/env node
2
- import { dicts, TaraskConfig, pipelines, htmlConfigOptions, wrappers } from './index.js';
3
- const printWithPrefix = (msg) => {
4
- process.stdout.write("[taraskevizer]" + ' ' + msg + '\n');
5
- };
6
- process.argv.splice(0, 2);
7
- const firstArg = process.argv[0];
8
- if (firstArg) {
9
- if (firstArg === '-v' || firstArg === '--version') {
10
- printWithPrefix("10.4.8");
11
- process.exit(0);
12
- }
13
- if (firstArg === '-h' || firstArg === '--help') {
14
- printWithPrefix(`Usage: tarask [options] text
15
- If text is not passed, interactive mode will be enabled
16
-
17
- EXAMPLES
18
-
19
- Convert and latinize a word
20
- tarask --latin 'планета'
21
- Will print "planeta"
22
-
23
- Read from one file and write converted text to another
24
- tarask < ./cyr-text.txt > ./lat-text.txt
25
-
26
- Enter interactive mode
27
- tarask
28
- Will print "[taraskevizer] Enter the text:" and wait until you press Enter
29
-
30
- OPTIONS
31
-
32
- General:
33
- -h --help
34
- -v --version
35
-
36
- Alphabet:
37
- -l --latin
38
- -lj --latin-ji
39
- -a --arabic
40
-
41
- When to replace і(i) by й(j) after vowels:
42
- -jr --jrandom
43
- -ja --jalways
44
-
45
- Replace ґ(g) by г(h) in cyrillic alphabet:
46
- --h
47
-
48
- Variations:
49
- -nv --no-variations
50
- -fv --first-variation
51
-
52
- Mode (only one can be used):
53
- -html --html
54
- -abc --alphabet-only
55
-
56
- Other:
57
- -nec --not-escape-caps
58
- -nc --no-color
59
- `);
60
- process.exit(0);
61
- }
62
- }
63
- let cfg = {
64
- g: true,
65
- variations: 'all',
66
- wrappers: wrappers.ansiColor,
67
- };
68
- let mode = 'tarask';
69
- const toHashTable = (dict) => {
70
- const result = {};
71
- for (const { 0: options, 1: callback } of dict)
72
- for (const option of options)
73
- result[option] = callback;
74
- return result;
75
- };
76
- let isHtml = false;
77
- const optionDict = toHashTable([
78
- [
79
- ['--latin', '-l'],
80
- () => {
81
- cfg.abc = dicts.alphabets.latin;
82
- },
83
- ],
84
- [
85
- ['--latin-ji', '-lj'],
86
- () => {
87
- cfg.abc = dicts.alphabets.latinJi;
88
- },
89
- ],
90
- [
91
- ['--arabic', '-a'],
92
- () => {
93
- cfg.abc = dicts.alphabets.arabic;
94
- },
95
- ],
96
- [
97
- ['--jrandom', '-jr'],
98
- () => {
99
- cfg.j = 'random';
100
- },
101
- ],
102
- [
103
- ['--jalways', '-ja'],
104
- () => {
105
- cfg.j = 'always';
106
- },
107
- ],
108
- [
109
- ['--no-escape-caps', '-nec'],
110
- () => {
111
- cfg.doEscapeCapitalized = false;
112
- },
113
- ],
114
- [
115
- ['--h'],
116
- () => {
117
- cfg.g = false;
118
- },
119
- ],
120
- [
121
- ['--no-variations', '-nv'],
122
- () => {
123
- cfg.variations = 'no';
124
- },
125
- ],
126
- [
127
- ['--first-variation', '-fv'],
128
- () => {
129
- cfg.variations = 'first';
130
- },
131
- ],
132
- [
133
- ['--no-color', '-nc'],
134
- () => {
135
- cfg.wrappers = null;
136
- },
137
- ],
138
- [
139
- ['--html', '-html'],
140
- () => {
141
- isHtml = true;
142
- cfg.wrappers = htmlConfigOptions.wrappers;
143
- },
144
- ],
145
- [
146
- ['--alphabet-only', '-abc'],
147
- () => {
148
- mode = 'alphabetic';
149
- },
150
- ],
151
- [
152
- ['--phonetic', '-ph'],
153
- () => {
154
- mode = 'phonetic';
155
- },
156
- ],
157
- ]);
158
- let currOption;
159
- process.argv.reverse();
160
- while ((currOption = process.argv.pop())) {
161
- if (currOption in optionDict) {
162
- optionDict[currOption]();
163
- }
164
- else {
165
- process.argv.push(currOption);
166
- break;
167
- }
168
- }
169
- let text = '';
170
- if (process.argv.length) {
171
- text = process.argv.reverse().join(' ');
172
- }
173
- else {
174
- const chunks = [];
175
- let length = 0;
176
- if (process.stdin.isTTY) {
177
- printWithPrefix('Enter the text');
178
- for await (const chunk of process.stdin) {
179
- chunks.push(chunk);
180
- length += chunk.length;
181
- if (chunk.includes('\n'))
182
- break;
183
- }
184
- }
185
- else {
186
- for await (const chunk of process.stdin) {
187
- chunks.push(chunk);
188
- length += chunk.length;
189
- }
190
- }
191
- text = Buffer.concat(chunks, length).toString();
192
- }
193
- cfg = new TaraskConfig(isHtml
194
- ? {
195
- ...htmlConfigOptions,
196
- ...cfg,
197
- }
198
- : cfg);
199
- if (process.stdout.write(pipelines[mode](text, cfg) + '\n')) {
200
- process.exit(0);
201
- }
202
- else {
203
- process.stdout.once('drain', () => {
204
- process.exit(0);
205
- });
206
- }
File without changes