@dev-pi2pie/word-counter 0.1.4 → 0.1.5-canary.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -0
- package/dist/cjs/detector.cjs +427 -0
- package/dist/cjs/detector.cjs.map +1 -0
- package/dist/cjs/index.cjs +10 -1257
- package/dist/cjs/index.cjs.map +1 -1
- package/dist/cjs/markdown.cjs +1318 -0
- package/dist/cjs/markdown.cjs.map +1 -0
- package/dist/esm/bin.mjs +966 -298
- package/dist/esm/bin.mjs.map +1 -1
- package/dist/esm/detector.d.mts +37 -0
- package/dist/esm/detector.mjs +412 -0
- package/dist/esm/detector.mjs.map +1 -0
- package/dist/esm/index.d.mts +1 -1
- package/dist/esm/index.mjs +2 -1248
- package/dist/esm/index.mjs.map +1 -1
- package/dist/esm/index2.d.mts +2 -0
- package/dist/esm/markdown.mjs +1229 -0
- package/dist/esm/markdown.mjs.map +1 -0
- package/dist/esm/worker/count-worker.mjs +412 -47
- package/dist/esm/worker/count-worker.mjs.map +1 -1
- package/dist/esm/worker-pool.mjs +6 -3
- package/dist/esm/worker-pool.mjs.map +1 -1
- package/dist/wasm-language-detector/LICENSE +21 -0
- package/dist/wasm-language-detector/language_detector.d.ts +4 -0
- package/dist/wasm-language-detector/language_detector.js +132 -0
- package/dist/wasm-language-detector/language_detector_bg.wasm +0 -0
- package/dist/wasm-language-detector/language_detector_bg.wasm.d.ts +8 -0
- package/dist/wasm-language-detector/package.json +17 -0
- package/package.json +18 -10
package/dist/esm/bin.mjs
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
|
+
import { createRequire } from "node:module";
|
|
3
4
|
import { Command, Option } from "commander";
|
|
4
5
|
import { closeSync, createWriteStream, existsSync, mkdirSync, openSync, readFileSync, statSync } from "node:fs";
|
|
5
6
|
import { basename, dirname, extname, join, relative, resolve, sep } from "node:path";
|
|
6
|
-
import { fileURLToPath } from "node:url";
|
|
7
7
|
import os from "node:os";
|
|
8
|
+
import { fileURLToPath } from "node:url";
|
|
8
9
|
import { parseDocument } from "yaml";
|
|
9
10
|
import { readFile, readdir, stat } from "node:fs/promises";
|
|
10
|
-
|
|
11
11
|
//#region \0rolldown/runtime.js
|
|
12
12
|
var __create = Object.create;
|
|
13
13
|
var __defProp = Object.defineProperty;
|
|
@@ -17,16 +17,12 @@ var __getProtoOf = Object.getPrototypeOf;
|
|
|
17
17
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
18
18
|
var __commonJSMin = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
|
|
19
19
|
var __copyProps = (to, from, except, desc) => {
|
|
20
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
|
|
27
|
-
});
|
|
28
|
-
}
|
|
29
|
-
}
|
|
20
|
+
if (from && typeof from === "object" || typeof from === "function") for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
|
|
21
|
+
key = keys[i];
|
|
22
|
+
if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, {
|
|
23
|
+
get: ((k) => from[k]).bind(null, key),
|
|
24
|
+
enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
|
|
25
|
+
});
|
|
30
26
|
}
|
|
31
27
|
return to;
|
|
32
28
|
};
|
|
@@ -34,7 +30,6 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
34
30
|
value: mod,
|
|
35
31
|
enumerable: true
|
|
36
32
|
}) : target, mod));
|
|
37
|
-
|
|
38
33
|
//#endregion
|
|
39
34
|
//#region src/cli/debug/channel.ts
|
|
40
35
|
const NOOP_CLOSE = async () => {};
|
|
@@ -144,7 +139,470 @@ function createDebugChannel(options) {
|
|
|
144
139
|
}
|
|
145
140
|
};
|
|
146
141
|
}
|
|
147
|
-
|
|
142
|
+
//#endregion
|
|
143
|
+
//#region src/cli/program/version-embedded.ts
|
|
144
|
+
var import_picocolors = /* @__PURE__ */ __toESM((/* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
145
|
+
let p = process || {}, argv = p.argv || [], env = p.env || {};
|
|
146
|
+
let isColorSupported = !(!!env.NO_COLOR || argv.includes("--no-color")) && (!!env.FORCE_COLOR || argv.includes("--color") || p.platform === "win32" || (p.stdout || {}).isTTY && env.TERM !== "dumb" || !!env.CI);
|
|
147
|
+
let formatter = (open, close, replace = open) => (input) => {
|
|
148
|
+
let string = "" + input, index = string.indexOf(close, open.length);
|
|
149
|
+
return ~index ? open + replaceClose(string, close, replace, index) + close : open + string + close;
|
|
150
|
+
};
|
|
151
|
+
let replaceClose = (string, close, replace, index) => {
|
|
152
|
+
let result = "", cursor = 0;
|
|
153
|
+
do {
|
|
154
|
+
result += string.substring(cursor, index) + replace;
|
|
155
|
+
cursor = index + close.length;
|
|
156
|
+
index = string.indexOf(close, cursor);
|
|
157
|
+
} while (~index);
|
|
158
|
+
return result + string.substring(cursor);
|
|
159
|
+
};
|
|
160
|
+
let createColors = (enabled = isColorSupported) => {
|
|
161
|
+
let f = enabled ? formatter : () => String;
|
|
162
|
+
return {
|
|
163
|
+
isColorSupported: enabled,
|
|
164
|
+
reset: f("\x1B[0m", "\x1B[0m"),
|
|
165
|
+
bold: f("\x1B[1m", "\x1B[22m", "\x1B[22m\x1B[1m"),
|
|
166
|
+
dim: f("\x1B[2m", "\x1B[22m", "\x1B[22m\x1B[2m"),
|
|
167
|
+
italic: f("\x1B[3m", "\x1B[23m"),
|
|
168
|
+
underline: f("\x1B[4m", "\x1B[24m"),
|
|
169
|
+
inverse: f("\x1B[7m", "\x1B[27m"),
|
|
170
|
+
hidden: f("\x1B[8m", "\x1B[28m"),
|
|
171
|
+
strikethrough: f("\x1B[9m", "\x1B[29m"),
|
|
172
|
+
black: f("\x1B[30m", "\x1B[39m"),
|
|
173
|
+
red: f("\x1B[31m", "\x1B[39m"),
|
|
174
|
+
green: f("\x1B[32m", "\x1B[39m"),
|
|
175
|
+
yellow: f("\x1B[33m", "\x1B[39m"),
|
|
176
|
+
blue: f("\x1B[34m", "\x1B[39m"),
|
|
177
|
+
magenta: f("\x1B[35m", "\x1B[39m"),
|
|
178
|
+
cyan: f("\x1B[36m", "\x1B[39m"),
|
|
179
|
+
white: f("\x1B[37m", "\x1B[39m"),
|
|
180
|
+
gray: f("\x1B[90m", "\x1B[39m"),
|
|
181
|
+
bgBlack: f("\x1B[40m", "\x1B[49m"),
|
|
182
|
+
bgRed: f("\x1B[41m", "\x1B[49m"),
|
|
183
|
+
bgGreen: f("\x1B[42m", "\x1B[49m"),
|
|
184
|
+
bgYellow: f("\x1B[43m", "\x1B[49m"),
|
|
185
|
+
bgBlue: f("\x1B[44m", "\x1B[49m"),
|
|
186
|
+
bgMagenta: f("\x1B[45m", "\x1B[49m"),
|
|
187
|
+
bgCyan: f("\x1B[46m", "\x1B[49m"),
|
|
188
|
+
bgWhite: f("\x1B[47m", "\x1B[49m"),
|
|
189
|
+
blackBright: f("\x1B[90m", "\x1B[39m"),
|
|
190
|
+
redBright: f("\x1B[91m", "\x1B[39m"),
|
|
191
|
+
greenBright: f("\x1B[92m", "\x1B[39m"),
|
|
192
|
+
yellowBright: f("\x1B[93m", "\x1B[39m"),
|
|
193
|
+
blueBright: f("\x1B[94m", "\x1B[39m"),
|
|
194
|
+
magentaBright: f("\x1B[95m", "\x1B[39m"),
|
|
195
|
+
cyanBright: f("\x1B[96m", "\x1B[39m"),
|
|
196
|
+
whiteBright: f("\x1B[97m", "\x1B[39m"),
|
|
197
|
+
bgBlackBright: f("\x1B[100m", "\x1B[49m"),
|
|
198
|
+
bgRedBright: f("\x1B[101m", "\x1B[49m"),
|
|
199
|
+
bgGreenBright: f("\x1B[102m", "\x1B[49m"),
|
|
200
|
+
bgYellowBright: f("\x1B[103m", "\x1B[49m"),
|
|
201
|
+
bgBlueBright: f("\x1B[104m", "\x1B[49m"),
|
|
202
|
+
bgMagentaBright: f("\x1B[105m", "\x1B[49m"),
|
|
203
|
+
bgCyanBright: f("\x1B[106m", "\x1B[49m"),
|
|
204
|
+
bgWhiteBright: f("\x1B[107m", "\x1B[49m")
|
|
205
|
+
};
|
|
206
|
+
};
|
|
207
|
+
module.exports = createColors();
|
|
208
|
+
module.exports.createColors = createColors;
|
|
209
|
+
})))(), 1);
|
|
210
|
+
//#endregion
|
|
211
|
+
//#region src/cli/batch/jobs/limits.ts
|
|
212
|
+
const DEFAULT_UV_THREADPOOL_SIZE = 4;
|
|
213
|
+
function parsePositiveInteger(value) {
|
|
214
|
+
if (!value) return;
|
|
215
|
+
const parsed = Number.parseInt(value, 10);
|
|
216
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return;
|
|
217
|
+
return parsed;
|
|
218
|
+
}
|
|
219
|
+
function resolveBatchJobsLimit(env = process.env) {
|
|
220
|
+
const cpuLimit = Math.max(1, os.availableParallelism());
|
|
221
|
+
const uvThreadpool = parsePositiveInteger(env.UV_THREADPOOL_SIZE) ?? DEFAULT_UV_THREADPOOL_SIZE;
|
|
222
|
+
const ioLimit = Math.max(1, uvThreadpool * 2);
|
|
223
|
+
return {
|
|
224
|
+
suggestedMaxJobs: Math.max(1, Math.min(cpuLimit, ioLimit)),
|
|
225
|
+
cpuLimit,
|
|
226
|
+
uvThreadpool,
|
|
227
|
+
ioLimit
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
function clampRequestedJobs(requestedJobs, limits) {
|
|
231
|
+
return Math.max(1, Math.min(requestedJobs, limits.suggestedMaxJobs));
|
|
232
|
+
}
|
|
233
|
+
function formatJobsAdvisoryWarning(requestedJobs, effectiveJobs, limits) {
|
|
234
|
+
return [
|
|
235
|
+
`Warning: requested --jobs=${requestedJobs} exceeds suggested host limit (${limits.suggestedMaxJobs}).`,
|
|
236
|
+
`Running with --jobs=${effectiveJobs} as a safety cap.`,
|
|
237
|
+
`Host limits: cpuLimit=${limits.cpuLimit}, uvThreadpool=${limits.uvThreadpool}, ioLimit=${limits.ioLimit}.`
|
|
238
|
+
].join(" ");
|
|
239
|
+
}
|
|
240
|
+
function isResourceLimitError(error) {
|
|
241
|
+
if (typeof error !== "object" || error === null) return false;
|
|
242
|
+
const code = "code" in error ? error.code : void 0;
|
|
243
|
+
return code === "EMFILE" || code === "ENFILE";
|
|
244
|
+
}
|
|
245
|
+
function createResourceLimitError(path, error, requestedJobs, limits) {
|
|
246
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
247
|
+
const code = typeof error === "object" && error !== null && "code" in error ? String(error.code) : "UNKNOWN";
|
|
248
|
+
return new Error([
|
|
249
|
+
`Resource limit reached while processing: ${path} (${code}: ${message}).`,
|
|
250
|
+
`Requested --jobs=${requestedJobs}; suggested host limit is ${limits.suggestedMaxJobs}.`,
|
|
251
|
+
"Reduce --jobs or raise OS file descriptor limits before retrying."
|
|
252
|
+
].join(" "));
|
|
253
|
+
}
|
|
254
|
+
//#endregion
|
|
255
|
+
//#region src/cli/batch/jobs/load-count-worker.ts
|
|
256
|
+
var WorkerRouteUnavailableError = class extends Error {};
|
|
257
|
+
async function resolveWorkerThreadsAvailability() {
|
|
258
|
+
try {
|
|
259
|
+
return typeof (await import("node:worker_threads")).Worker === "function";
|
|
260
|
+
} catch {
|
|
261
|
+
return false;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
function isFallbackFriendlyWorkerError(error) {
|
|
265
|
+
if (typeof error !== "object" || error === null) return false;
|
|
266
|
+
const code = "code" in error ? String(error.code) : "";
|
|
267
|
+
if (code === "ERR_WORKER_PATH" || code === "ERR_WORKER_UNSUPPORTED_EXTENSION" || code === "ERR_UNKNOWN_FILE_EXTENSION" || code === "ERR_MODULE_NOT_FOUND") return true;
|
|
268
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
269
|
+
return message.includes("Unknown file extension") || message.includes("Cannot find module");
|
|
270
|
+
}
|
|
271
|
+
async function resolveWorkerRoutePreflight(env = process.env) {
|
|
272
|
+
const disableWorkerJobsEnv = env.WORD_COUNTER_DISABLE_WORKER_JOBS ?? null;
|
|
273
|
+
const workerRouteDisabledByEnv = disableWorkerJobsEnv === "1";
|
|
274
|
+
const workerThreadsAvailable = await resolveWorkerThreadsAvailability();
|
|
275
|
+
try {
|
|
276
|
+
return {
|
|
277
|
+
workerThreadsAvailable,
|
|
278
|
+
workerRouteDisabledByEnv,
|
|
279
|
+
disableWorkerJobsEnv,
|
|
280
|
+
workerPoolModuleLoadable: true,
|
|
281
|
+
workerEntryFound: (await import("./worker-pool.mjs")).resolveWorkerEntryUrl() !== null
|
|
282
|
+
};
|
|
283
|
+
} catch {
|
|
284
|
+
return {
|
|
285
|
+
workerThreadsAvailable,
|
|
286
|
+
workerRouteDisabledByEnv,
|
|
287
|
+
disableWorkerJobsEnv,
|
|
288
|
+
workerPoolModuleLoadable: false,
|
|
289
|
+
workerEntryFound: false
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
async function countBatchInputsWithWorkerJobs(filePaths, options) {
|
|
294
|
+
if (process.env.WORD_COUNTER_DISABLE_WORKER_JOBS === "1") throw new WorkerRouteUnavailableError("Worker route disabled by environment.");
|
|
295
|
+
let workerPoolModule;
|
|
296
|
+
try {
|
|
297
|
+
workerPoolModule = await import("./worker-pool.mjs");
|
|
298
|
+
} catch (error) {
|
|
299
|
+
throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
|
|
300
|
+
}
|
|
301
|
+
try {
|
|
302
|
+
return await workerPoolModule.countBatchInputsWithWorkerPool({
|
|
303
|
+
filePaths,
|
|
304
|
+
jobs: options.jobs,
|
|
305
|
+
section: options.section,
|
|
306
|
+
detectorMode: options.detectorMode ?? "regex",
|
|
307
|
+
wcOptions: options.wcOptions,
|
|
308
|
+
preserveCollectorSegments: options.preserveCollectorSegments,
|
|
309
|
+
onFileProcessed: options.onFileProcessed
|
|
310
|
+
});
|
|
311
|
+
} catch (error) {
|
|
312
|
+
if (error instanceof workerPoolModule.WorkerPoolTaskFatalError) {
|
|
313
|
+
if (error.code === "EMFILE" || error.code === "ENFILE") throw createResourceLimitError(error.path, {
|
|
314
|
+
code: error.code,
|
|
315
|
+
message: error.message
|
|
316
|
+
}, options.jobs, resolveBatchJobsLimit());
|
|
317
|
+
throw new Error(error.message);
|
|
318
|
+
}
|
|
319
|
+
if (error instanceof workerPoolModule.WorkerPoolUnavailableError || isFallbackFriendlyWorkerError(error)) throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
|
|
320
|
+
throw error;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
//#endregion
|
|
324
|
+
//#region src/cli/doctor/checks.ts
|
|
325
|
+
const REQUIRED_NODE_RANGE = ">=20";
|
|
326
|
+
const REQUIRED_NODE_MAJOR = 20;
|
|
327
|
+
const SAMPLE_TEXT = "Hello 世界";
|
|
328
|
+
function normalizePackageVersion(value) {
|
|
329
|
+
const trimmed = value?.trim();
|
|
330
|
+
return trimmed && trimmed.length > 0 ? trimmed : "0.0.0";
|
|
331
|
+
}
|
|
332
|
+
function deriveBuildChannel(packageVersion) {
|
|
333
|
+
const prereleaseMatch = /(?:^|[.-])(alpha|beta|rc|canary)(?:[.-]|$)/i.exec(packageVersion);
|
|
334
|
+
if (!prereleaseMatch) return "stable";
|
|
335
|
+
const channel = prereleaseMatch[1]?.toLowerCase();
|
|
336
|
+
if (channel === "alpha" || channel === "beta" || channel === "rc" || channel === "canary") return channel;
|
|
337
|
+
return "stable";
|
|
338
|
+
}
|
|
339
|
+
function parseNodeMajor(version) {
|
|
340
|
+
const match = /^v?(\d+)(?:\.\d+){0,2}(?:[-+].*)?$/.exec(version.trim());
|
|
341
|
+
if (!match) return null;
|
|
342
|
+
const major = Number.parseInt(match[1] ?? "", 10);
|
|
343
|
+
return Number.isFinite(major) ? major : null;
|
|
344
|
+
}
|
|
345
|
+
function resolveRuntimeSummary(overrides = {}) {
|
|
346
|
+
const packageVersion = normalizePackageVersion(overrides.packageVersion ?? "0.1.5-canary.2");
|
|
347
|
+
const nodeVersion = overrides.nodeVersion ?? process.version;
|
|
348
|
+
const major = parseNodeMajor(nodeVersion);
|
|
349
|
+
return {
|
|
350
|
+
packageVersion,
|
|
351
|
+
buildChannel: deriveBuildChannel(packageVersion),
|
|
352
|
+
requiredNodeRange: REQUIRED_NODE_RANGE,
|
|
353
|
+
nodeVersion,
|
|
354
|
+
meetsProjectRequirement: major !== null && major >= REQUIRED_NODE_MAJOR,
|
|
355
|
+
platform: overrides.platform ?? process.platform,
|
|
356
|
+
arch: overrides.arch ?? process.arch
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
function resolveSegmenterHealth(overrides = {}) {
|
|
360
|
+
const Segmenter = (overrides.intl ?? Intl).Segmenter;
|
|
361
|
+
const available = typeof Segmenter === "function";
|
|
362
|
+
let wordGranularity = false;
|
|
363
|
+
let graphemeGranularity = false;
|
|
364
|
+
let sampleWordSegmentation = false;
|
|
365
|
+
if (!available) return {
|
|
366
|
+
available,
|
|
367
|
+
wordGranularity,
|
|
368
|
+
graphemeGranularity,
|
|
369
|
+
sampleWordSegmentation
|
|
370
|
+
};
|
|
371
|
+
try {
|
|
372
|
+
const wordSegmenter = new Segmenter("en", { granularity: "word" });
|
|
373
|
+
wordGranularity = true;
|
|
374
|
+
for (const _segment of wordSegmenter.segment(SAMPLE_TEXT)) {
|
|
375
|
+
sampleWordSegmentation = true;
|
|
376
|
+
break;
|
|
377
|
+
}
|
|
378
|
+
} catch {
|
|
379
|
+
wordGranularity = false;
|
|
380
|
+
sampleWordSegmentation = false;
|
|
381
|
+
}
|
|
382
|
+
try {
|
|
383
|
+
new Segmenter("en", { granularity: "grapheme" });
|
|
384
|
+
graphemeGranularity = true;
|
|
385
|
+
} catch {
|
|
386
|
+
graphemeGranularity = false;
|
|
387
|
+
}
|
|
388
|
+
return {
|
|
389
|
+
available,
|
|
390
|
+
wordGranularity,
|
|
391
|
+
graphemeGranularity,
|
|
392
|
+
sampleWordSegmentation
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
function collectWarnings(runtime, segmenter, workerRoute) {
|
|
396
|
+
const warnings = [];
|
|
397
|
+
if (!runtime.meetsProjectRequirement) warnings.push(`Node.js ${runtime.nodeVersion} is outside the supported range ${runtime.requiredNodeRange}.`);
|
|
398
|
+
if (!segmenter.available) warnings.push("Intl.Segmenter is unavailable.");
|
|
399
|
+
else {
|
|
400
|
+
if (!segmenter.wordGranularity) warnings.push("Intl.Segmenter word granularity is unusable.");
|
|
401
|
+
if (!segmenter.graphemeGranularity) warnings.push("Intl.Segmenter grapheme granularity is unusable.");
|
|
402
|
+
if (!segmenter.sampleWordSegmentation) warnings.push("Intl.Segmenter sample segmentation failed.");
|
|
403
|
+
}
|
|
404
|
+
if (!workerRoute.workerThreadsAvailable) warnings.push("Worker threads are unavailable on this runtime.");
|
|
405
|
+
if (workerRoute.workerRouteDisabledByEnv) warnings.push("Worker route is disabled by environment.");
|
|
406
|
+
if (!workerRoute.workerPoolModuleLoadable) warnings.push("Worker route preflight failed: worker-pool module could not be loaded.");
|
|
407
|
+
else if (!workerRoute.workerEntryFound) warnings.push("Worker route preflight failed: count-worker entry file was not found.");
|
|
408
|
+
return warnings;
|
|
409
|
+
}
|
|
410
|
+
function resolveStatus(segmenter, warnings) {
|
|
411
|
+
if (!segmenter.available || !segmenter.wordGranularity || !segmenter.graphemeGranularity || !segmenter.sampleWordSegmentation) return "fail";
|
|
412
|
+
if (warnings.length > 0) return "warn";
|
|
413
|
+
return "ok";
|
|
414
|
+
}
|
|
415
|
+
async function createDoctorReport(overrides = {}) {
|
|
416
|
+
const runtime = resolveRuntimeSummary(overrides);
|
|
417
|
+
const segmenter = resolveSegmenterHealth(overrides);
|
|
418
|
+
const env = overrides.env ?? process.env;
|
|
419
|
+
const jobs = resolveBatchJobsLimit(env);
|
|
420
|
+
const workerRoute = await resolveWorkerRoutePreflight(env);
|
|
421
|
+
const warnings = collectWarnings(runtime, segmenter, workerRoute);
|
|
422
|
+
return {
|
|
423
|
+
status: resolveStatus(segmenter, warnings),
|
|
424
|
+
runtime,
|
|
425
|
+
segmenter,
|
|
426
|
+
jobs,
|
|
427
|
+
workerRoute,
|
|
428
|
+
warnings
|
|
429
|
+
};
|
|
430
|
+
}
|
|
431
|
+
//#endregion
|
|
432
|
+
//#region src/cli/doctor/render.ts
|
|
433
|
+
function colorStatus(status) {
|
|
434
|
+
if (status === "ok") return import_picocolors.default.green(import_picocolors.default.bold(status));
|
|
435
|
+
if (status === "warn") return import_picocolors.default.yellow(import_picocolors.default.bold(status));
|
|
436
|
+
return import_picocolors.default.red(import_picocolors.default.bold(status));
|
|
437
|
+
}
|
|
438
|
+
function renderSection(title, lines) {
|
|
439
|
+
console.log(import_picocolors.default.bold(title));
|
|
440
|
+
for (const line of lines) console.log(`- ${line}`);
|
|
441
|
+
console.log("");
|
|
442
|
+
}
|
|
443
|
+
function colorBoolean(value, yes = "yes", no = "no") {
|
|
444
|
+
return value ? import_picocolors.default.green(yes) : import_picocolors.default.red(no);
|
|
445
|
+
}
|
|
446
|
+
function colorNumber(value) {
|
|
447
|
+
return import_picocolors.default.yellow(String(value));
|
|
448
|
+
}
|
|
449
|
+
function colorStatusWord(value) {
|
|
450
|
+
return value ? import_picocolors.default.green("ok") : import_picocolors.default.red("fail");
|
|
451
|
+
}
|
|
452
|
+
function renderStandardDoctorReport(report) {
|
|
453
|
+
console.log(`Doctor: ${colorStatus(report.status)}`);
|
|
454
|
+
console.log("");
|
|
455
|
+
renderSection("Runtime", [
|
|
456
|
+
`package: ${report.runtime.packageVersion} (${report.runtime.buildChannel})`,
|
|
457
|
+
`node: ${report.runtime.nodeVersion} (supported: ${colorBoolean(report.runtime.meetsProjectRequirement)}; required ${report.runtime.requiredNodeRange})`,
|
|
458
|
+
`platform: ${report.runtime.platform} ${report.runtime.arch}`
|
|
459
|
+
]);
|
|
460
|
+
renderSection("Segmenter", [
|
|
461
|
+
`Intl.Segmenter: ${colorBoolean(report.segmenter.available, "available", "missing")}`,
|
|
462
|
+
`word granularity: ${colorStatusWord(report.segmenter.wordGranularity)}`,
|
|
463
|
+
`grapheme granularity: ${colorStatusWord(report.segmenter.graphemeGranularity)}`,
|
|
464
|
+
`sample segmentation: ${colorStatusWord(report.segmenter.sampleWordSegmentation)}`
|
|
465
|
+
]);
|
|
466
|
+
renderSection("Batch jobs", [
|
|
467
|
+
`cpuLimit: ${colorNumber(report.jobs.cpuLimit)}`,
|
|
468
|
+
`uvThreadpool: ${colorNumber(report.jobs.uvThreadpool)}`,
|
|
469
|
+
`ioLimit: ${colorNumber(report.jobs.ioLimit)}`,
|
|
470
|
+
`suggestedMaxJobs: ${colorNumber(report.jobs.suggestedMaxJobs)}`
|
|
471
|
+
]);
|
|
472
|
+
renderSection("Worker route", [
|
|
473
|
+
`worker threads: ${colorBoolean(report.workerRoute.workerThreadsAvailable, "available", "missing")}`,
|
|
474
|
+
`disabled by env: ${colorBoolean(report.workerRoute.workerRouteDisabledByEnv)}`,
|
|
475
|
+
`disableWorkerJobsEnv: ${report.workerRoute.disableWorkerJobsEnv ?? "null"}`,
|
|
476
|
+
`worker pool module: ${colorBoolean(report.workerRoute.workerPoolModuleLoadable, "loadable", "missing")}`,
|
|
477
|
+
`worker entry: ${colorBoolean(report.workerRoute.workerEntryFound, "found", "missing")}`
|
|
478
|
+
]);
|
|
479
|
+
if (report.warnings.length > 0) {
|
|
480
|
+
console.log(import_picocolors.default.bold("Warnings"));
|
|
481
|
+
for (const warning of report.warnings) console.log(import_picocolors.default.yellow(`- ${warning}`));
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
function renderDoctorReport(report, options) {
|
|
485
|
+
if (options.format === "json") {
|
|
486
|
+
console.log(JSON.stringify(report, null, options.pretty ? 2 : 0));
|
|
487
|
+
return;
|
|
488
|
+
}
|
|
489
|
+
renderStandardDoctorReport(report);
|
|
490
|
+
}
|
|
491
|
+
//#endregion
|
|
492
|
+
//#region src/cli/doctor/run.ts
|
|
493
|
+
const DOCTOR_HELP_LINES = [
|
|
494
|
+
"Usage: word-counter doctor [options]",
|
|
495
|
+
"",
|
|
496
|
+
"report runtime diagnostics for this host",
|
|
497
|
+
"",
|
|
498
|
+
"Options:",
|
|
499
|
+
" --format <format> doctor output format (json)",
|
|
500
|
+
" --pretty pretty print doctor JSON output (default: false)",
|
|
501
|
+
" -h, --help display help for command"
|
|
502
|
+
];
|
|
503
|
+
function parseDoctorFormat(rawValue) {
|
|
504
|
+
if (rawValue === void 0) return "standard";
|
|
505
|
+
if (rawValue === "json") return "json";
|
|
506
|
+
return null;
|
|
507
|
+
}
|
|
508
|
+
function validateDoctorInvocation(argv) {
|
|
509
|
+
const doctorIndex = argv.findIndex((token, index) => index >= 2 && token === "doctor");
|
|
510
|
+
const tokens = doctorIndex >= 0 ? argv.slice(doctorIndex + 1) : [];
|
|
511
|
+
let expectsFormatValue = false;
|
|
512
|
+
let format = "standard";
|
|
513
|
+
let pretty = false;
|
|
514
|
+
for (const token of tokens) {
|
|
515
|
+
if (token === "-h" || token === "--help") return {
|
|
516
|
+
ok: true,
|
|
517
|
+
help: true
|
|
518
|
+
};
|
|
519
|
+
if (expectsFormatValue) {
|
|
520
|
+
const parsedFormat = parseDoctorFormat(token);
|
|
521
|
+
if (parsedFormat === null) return {
|
|
522
|
+
ok: false,
|
|
523
|
+
message: "`doctor` only supports default text output or `--format json`."
|
|
524
|
+
};
|
|
525
|
+
format = parsedFormat;
|
|
526
|
+
expectsFormatValue = false;
|
|
527
|
+
continue;
|
|
528
|
+
}
|
|
529
|
+
if (token === "--") return {
|
|
530
|
+
ok: false,
|
|
531
|
+
message: "`doctor` does not accept positional inputs."
|
|
532
|
+
};
|
|
533
|
+
if (token === "--format") {
|
|
534
|
+
expectsFormatValue = true;
|
|
535
|
+
continue;
|
|
536
|
+
}
|
|
537
|
+
if (token.startsWith("--format=")) {
|
|
538
|
+
const rawValue = token.slice(9);
|
|
539
|
+
if (rawValue.length === 0) return {
|
|
540
|
+
ok: false,
|
|
541
|
+
message: "`--format` requires a value."
|
|
542
|
+
};
|
|
543
|
+
const parsedFormat = parseDoctorFormat(rawValue);
|
|
544
|
+
if (parsedFormat === null) return {
|
|
545
|
+
ok: false,
|
|
546
|
+
message: "`doctor` only supports default text output or `--format json`."
|
|
547
|
+
};
|
|
548
|
+
format = parsedFormat;
|
|
549
|
+
continue;
|
|
550
|
+
}
|
|
551
|
+
if (token === "--pretty") {
|
|
552
|
+
pretty = true;
|
|
553
|
+
continue;
|
|
554
|
+
}
|
|
555
|
+
if (token.startsWith("-")) return {
|
|
556
|
+
ok: false,
|
|
557
|
+
message: `\`${token}\` is not supported by \`doctor\`.`
|
|
558
|
+
};
|
|
559
|
+
return {
|
|
560
|
+
ok: false,
|
|
561
|
+
message: "`doctor` does not accept positional inputs."
|
|
562
|
+
};
|
|
563
|
+
}
|
|
564
|
+
if (expectsFormatValue) return {
|
|
565
|
+
ok: false,
|
|
566
|
+
message: "`--format` requires a value."
|
|
567
|
+
};
|
|
568
|
+
if (pretty && format !== "json") return {
|
|
569
|
+
ok: false,
|
|
570
|
+
message: "`--pretty` requires `--format json`."
|
|
571
|
+
};
|
|
572
|
+
return {
|
|
573
|
+
ok: true,
|
|
574
|
+
format,
|
|
575
|
+
pretty
|
|
576
|
+
};
|
|
577
|
+
}
|
|
578
|
+
function isExplicitDoctorInvocation(argv) {
|
|
579
|
+
if (argv[2] !== "doctor") return false;
|
|
580
|
+
const trailingTokens = argv.slice(3);
|
|
581
|
+
if (trailingTokens.length === 0) return true;
|
|
582
|
+
return trailingTokens.some((token) => token === "--" || token.startsWith("-"));
|
|
583
|
+
}
|
|
584
|
+
function printDoctorHelp() {
|
|
585
|
+
for (const line of DOCTOR_HELP_LINES) console.log(line);
|
|
586
|
+
}
|
|
587
|
+
async function executeDoctorCommand({ argv, runtime }) {
|
|
588
|
+
const validated = validateDoctorInvocation(argv);
|
|
589
|
+
if (!validated.ok) {
|
|
590
|
+
console.error(import_picocolors.default.red(`error: ${validated.message}`));
|
|
591
|
+
process.exitCode = 1;
|
|
592
|
+
return;
|
|
593
|
+
}
|
|
594
|
+
if ("help" in validated) {
|
|
595
|
+
printDoctorHelp();
|
|
596
|
+
process.exitCode = 0;
|
|
597
|
+
return;
|
|
598
|
+
}
|
|
599
|
+
const report = await createDoctorReport(runtime);
|
|
600
|
+
renderDoctorReport(report, {
|
|
601
|
+
format: validated.format,
|
|
602
|
+
pretty: validated.pretty
|
|
603
|
+
});
|
|
604
|
+
process.exitCode = report.status === "fail" ? 2 : 0;
|
|
605
|
+
}
|
|
148
606
|
//#endregion
|
|
149
607
|
//#region src/cli/path/filter.ts
|
|
150
608
|
const DEFAULT_INCLUDE_EXTENSIONS = Object.freeze([
|
|
@@ -221,7 +679,6 @@ function shouldIncludeFromDirectoryRegex(relativePath, filter) {
|
|
|
221
679
|
if (!filter.regex) return true;
|
|
222
680
|
return filter.regex.test(relativePath);
|
|
223
681
|
}
|
|
224
|
-
|
|
225
682
|
//#endregion
|
|
226
683
|
//#region src/cli/total-of.ts
|
|
227
684
|
const TOTAL_OF_PARTS = Object.freeze([
|
|
@@ -316,7 +773,6 @@ function resolveTotalOfOverride(result, parts) {
|
|
|
316
773
|
function formatTotalOfParts(parts) {
|
|
317
774
|
return parts.join(", ");
|
|
318
775
|
}
|
|
319
|
-
|
|
320
776
|
//#endregion
|
|
321
777
|
//#region src/cli/program/options.ts
|
|
322
778
|
const MODE_CHOICES = [
|
|
@@ -326,6 +782,7 @@ const MODE_CHOICES = [
|
|
|
326
782
|
"char",
|
|
327
783
|
"char-collector"
|
|
328
784
|
];
|
|
785
|
+
const DETECTOR_CHOICES = ["regex", "wasm"];
|
|
329
786
|
const FORMAT_CHOICES = [
|
|
330
787
|
"standard",
|
|
331
788
|
"raw",
|
|
@@ -353,83 +810,8 @@ function parseJobsOption(value) {
|
|
|
353
810
|
return parsed;
|
|
354
811
|
}
|
|
355
812
|
function configureProgramOptions(program, parseMode) {
|
|
356
|
-
program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--jobs <n>", "batch jobs in --path mode (1=async main-thread, >1=worker load+count)", parseJobsOption, 1).option("--print-jobs-limit", "print host jobs-limit JSON and exit (must be used alone)").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-warnings", "suppress non-fatal warning diagnostics").option("--quiet-skips", "suppress debug skip output and per-file json skipped field").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
|
|
813
|
+
program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--detector <mode>", "locale detector mode").choices(DETECTOR_CHOICES).default("regex")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--jobs <n>", "batch jobs in --path mode (1=async main-thread, >1=worker load+count)", parseJobsOption, 1).option("--print-jobs-limit", "print host jobs-limit JSON and exit (must be used alone)").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-warnings", "suppress non-fatal warning diagnostics").option("--quiet-skips", "suppress debug skip output and per-file json skipped field").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
|
|
357
814
|
}
|
|
358
|
-
|
|
359
|
-
//#endregion
|
|
360
|
-
//#region node_modules/picocolors/picocolors.js
|
|
361
|
-
var require_picocolors = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
362
|
-
let p = process || {}, argv = p.argv || [], env = p.env || {};
|
|
363
|
-
let isColorSupported = !(!!env.NO_COLOR || argv.includes("--no-color")) && (!!env.FORCE_COLOR || argv.includes("--color") || p.platform === "win32" || (p.stdout || {}).isTTY && env.TERM !== "dumb" || !!env.CI);
|
|
364
|
-
let formatter = (open, close, replace = open) => (input) => {
|
|
365
|
-
let string = "" + input, index = string.indexOf(close, open.length);
|
|
366
|
-
return ~index ? open + replaceClose(string, close, replace, index) + close : open + string + close;
|
|
367
|
-
};
|
|
368
|
-
let replaceClose = (string, close, replace, index) => {
|
|
369
|
-
let result = "", cursor = 0;
|
|
370
|
-
do {
|
|
371
|
-
result += string.substring(cursor, index) + replace;
|
|
372
|
-
cursor = index + close.length;
|
|
373
|
-
index = string.indexOf(close, cursor);
|
|
374
|
-
} while (~index);
|
|
375
|
-
return result + string.substring(cursor);
|
|
376
|
-
};
|
|
377
|
-
let createColors = (enabled = isColorSupported) => {
|
|
378
|
-
let f = enabled ? formatter : () => String;
|
|
379
|
-
return {
|
|
380
|
-
isColorSupported: enabled,
|
|
381
|
-
reset: f("\x1B[0m", "\x1B[0m"),
|
|
382
|
-
bold: f("\x1B[1m", "\x1B[22m", "\x1B[22m\x1B[1m"),
|
|
383
|
-
dim: f("\x1B[2m", "\x1B[22m", "\x1B[22m\x1B[2m"),
|
|
384
|
-
italic: f("\x1B[3m", "\x1B[23m"),
|
|
385
|
-
underline: f("\x1B[4m", "\x1B[24m"),
|
|
386
|
-
inverse: f("\x1B[7m", "\x1B[27m"),
|
|
387
|
-
hidden: f("\x1B[8m", "\x1B[28m"),
|
|
388
|
-
strikethrough: f("\x1B[9m", "\x1B[29m"),
|
|
389
|
-
black: f("\x1B[30m", "\x1B[39m"),
|
|
390
|
-
red: f("\x1B[31m", "\x1B[39m"),
|
|
391
|
-
green: f("\x1B[32m", "\x1B[39m"),
|
|
392
|
-
yellow: f("\x1B[33m", "\x1B[39m"),
|
|
393
|
-
blue: f("\x1B[34m", "\x1B[39m"),
|
|
394
|
-
magenta: f("\x1B[35m", "\x1B[39m"),
|
|
395
|
-
cyan: f("\x1B[36m", "\x1B[39m"),
|
|
396
|
-
white: f("\x1B[37m", "\x1B[39m"),
|
|
397
|
-
gray: f("\x1B[90m", "\x1B[39m"),
|
|
398
|
-
bgBlack: f("\x1B[40m", "\x1B[49m"),
|
|
399
|
-
bgRed: f("\x1B[41m", "\x1B[49m"),
|
|
400
|
-
bgGreen: f("\x1B[42m", "\x1B[49m"),
|
|
401
|
-
bgYellow: f("\x1B[43m", "\x1B[49m"),
|
|
402
|
-
bgBlue: f("\x1B[44m", "\x1B[49m"),
|
|
403
|
-
bgMagenta: f("\x1B[45m", "\x1B[49m"),
|
|
404
|
-
bgCyan: f("\x1B[46m", "\x1B[49m"),
|
|
405
|
-
bgWhite: f("\x1B[47m", "\x1B[49m"),
|
|
406
|
-
blackBright: f("\x1B[90m", "\x1B[39m"),
|
|
407
|
-
redBright: f("\x1B[91m", "\x1B[39m"),
|
|
408
|
-
greenBright: f("\x1B[92m", "\x1B[39m"),
|
|
409
|
-
yellowBright: f("\x1B[93m", "\x1B[39m"),
|
|
410
|
-
blueBright: f("\x1B[94m", "\x1B[39m"),
|
|
411
|
-
magentaBright: f("\x1B[95m", "\x1B[39m"),
|
|
412
|
-
cyanBright: f("\x1B[96m", "\x1B[39m"),
|
|
413
|
-
whiteBright: f("\x1B[97m", "\x1B[39m"),
|
|
414
|
-
bgBlackBright: f("\x1B[100m", "\x1B[49m"),
|
|
415
|
-
bgRedBright: f("\x1B[101m", "\x1B[49m"),
|
|
416
|
-
bgGreenBright: f("\x1B[102m", "\x1B[49m"),
|
|
417
|
-
bgYellowBright: f("\x1B[103m", "\x1B[49m"),
|
|
418
|
-
bgBlueBright: f("\x1B[104m", "\x1B[49m"),
|
|
419
|
-
bgMagentaBright: f("\x1B[105m", "\x1B[49m"),
|
|
420
|
-
bgCyanBright: f("\x1B[106m", "\x1B[49m"),
|
|
421
|
-
bgWhiteBright: f("\x1B[107m", "\x1B[49m")
|
|
422
|
-
};
|
|
423
|
-
};
|
|
424
|
-
module.exports = createColors();
|
|
425
|
-
module.exports.createColors = createColors;
|
|
426
|
-
}));
|
|
427
|
-
|
|
428
|
-
//#endregion
|
|
429
|
-
//#region src/cli/program/version-embedded.ts
|
|
430
|
-
var import_picocolors = /* @__PURE__ */ __toESM(require_picocolors(), 1);
|
|
431
|
-
const EMBEDDED_PACKAGE_VERSION = "0.1.4";
|
|
432
|
-
|
|
433
815
|
//#endregion
|
|
434
816
|
//#region src/cli/program/version.ts
|
|
435
817
|
function* candidateSearchRoots() {
|
|
@@ -462,7 +844,7 @@ function normalizeVersion(value) {
|
|
|
462
844
|
return trimmed;
|
|
463
845
|
}
|
|
464
846
|
function resolvePackageVersion(options = {}) {
|
|
465
|
-
const embeddedVersion = normalizeVersion(options.embeddedVersion ??
|
|
847
|
+
const embeddedVersion = normalizeVersion(options.embeddedVersion ?? "0.1.5-canary.2");
|
|
466
848
|
if (embeddedVersion) return embeddedVersion;
|
|
467
849
|
const maxLevels = options.maxLevels ?? 8;
|
|
468
850
|
const resolveFromPath = options.resolveFromPath ?? resolveVersionFromPath;
|
|
@@ -480,58 +862,11 @@ function getFormattedVersionLabel() {
|
|
|
480
862
|
const version = resolvePackageVersion();
|
|
481
863
|
return import_picocolors.default.bgBlack(import_picocolors.default.bold(import_picocolors.default.italic(` word-counter ${import_picocolors.default.cyanBright(`ver.${version}`)} `)));
|
|
482
864
|
}
|
|
483
|
-
|
|
484
|
-
//#endregion
|
|
485
|
-
//#region src/cli/batch/jobs/limits.ts
|
|
486
|
-
const DEFAULT_UV_THREADPOOL_SIZE = 4;
|
|
487
|
-
function parsePositiveInteger(value) {
|
|
488
|
-
if (!value) return;
|
|
489
|
-
const parsed = Number.parseInt(value, 10);
|
|
490
|
-
if (!Number.isFinite(parsed) || parsed <= 0) return;
|
|
491
|
-
return parsed;
|
|
492
|
-
}
|
|
493
|
-
function resolveBatchJobsLimit(env = process.env) {
|
|
494
|
-
const cpuLimit = Math.max(1, os.availableParallelism());
|
|
495
|
-
const uvThreadpool = parsePositiveInteger(env.UV_THREADPOOL_SIZE) ?? DEFAULT_UV_THREADPOOL_SIZE;
|
|
496
|
-
const ioLimit = Math.max(1, uvThreadpool * 2);
|
|
497
|
-
return {
|
|
498
|
-
suggestedMaxJobs: Math.max(1, Math.min(cpuLimit, ioLimit)),
|
|
499
|
-
cpuLimit,
|
|
500
|
-
uvThreadpool,
|
|
501
|
-
ioLimit
|
|
502
|
-
};
|
|
503
|
-
}
|
|
504
|
-
function clampRequestedJobs(requestedJobs, limits) {
|
|
505
|
-
return Math.max(1, Math.min(requestedJobs, limits.suggestedMaxJobs));
|
|
506
|
-
}
|
|
507
|
-
function formatJobsAdvisoryWarning(requestedJobs, effectiveJobs, limits) {
|
|
508
|
-
return [
|
|
509
|
-
`Warning: requested --jobs=${requestedJobs} exceeds suggested host limit (${limits.suggestedMaxJobs}).`,
|
|
510
|
-
`Running with --jobs=${effectiveJobs} as a safety cap.`,
|
|
511
|
-
`Host limits: cpuLimit=${limits.cpuLimit}, uvThreadpool=${limits.uvThreadpool}, ioLimit=${limits.ioLimit}.`
|
|
512
|
-
].join(" ");
|
|
513
|
-
}
|
|
514
|
-
function isResourceLimitError(error) {
|
|
515
|
-
if (typeof error !== "object" || error === null) return false;
|
|
516
|
-
const code = "code" in error ? error.code : void 0;
|
|
517
|
-
return code === "EMFILE" || code === "ENFILE";
|
|
518
|
-
}
|
|
519
|
-
function createResourceLimitError(path, error, requestedJobs, limits) {
|
|
520
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
521
|
-
const code = typeof error === "object" && error !== null && "code" in error ? String(error.code) : "UNKNOWN";
|
|
522
|
-
return new Error([
|
|
523
|
-
`Resource limit reached while processing: ${path} (${code}: ${message}).`,
|
|
524
|
-
`Requested --jobs=${requestedJobs}; suggested host limit is ${limits.suggestedMaxJobs}.`,
|
|
525
|
-
"Reduce --jobs or raise OS file descriptor limits before retrying."
|
|
526
|
-
].join(" "));
|
|
527
|
-
}
|
|
528
|
-
|
|
529
865
|
//#endregion
|
|
530
866
|
//#region src/utils/append-all.ts
|
|
531
867
|
function appendAll(target, source) {
|
|
532
868
|
for (const item of source) target.push(item);
|
|
533
869
|
}
|
|
534
|
-
|
|
535
870
|
//#endregion
|
|
536
871
|
//#region src/markdown/toml/arrays.ts
|
|
537
872
|
function ensureArrayContainer(result, key) {
|
|
@@ -547,7 +882,6 @@ function flattenArrayTables(result) {
|
|
|
547
882
|
result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
|
|
548
883
|
}
|
|
549
884
|
}
|
|
550
|
-
|
|
551
885
|
//#endregion
|
|
552
886
|
//#region src/markdown/toml/keys.ts
|
|
553
887
|
function stripKeyQuotes(key) {
|
|
@@ -566,7 +900,6 @@ function normalizeKeyPath(key) {
|
|
|
566
900
|
if (segments.some((segment) => !segment)) return null;
|
|
567
901
|
return segments.join(".");
|
|
568
902
|
}
|
|
569
|
-
|
|
570
903
|
//#endregion
|
|
571
904
|
//#region src/markdown/toml/strings.ts
|
|
572
905
|
function stripInlineComment(line) {
|
|
@@ -615,7 +948,6 @@ function parseStringLiteral(value) {
|
|
|
615
948
|
if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
|
|
616
949
|
return null;
|
|
617
950
|
}
|
|
618
|
-
|
|
619
951
|
//#endregion
|
|
620
952
|
//#region src/markdown/toml/values.ts
|
|
621
953
|
function parsePrimitive(raw) {
|
|
@@ -773,7 +1105,6 @@ function toPlainText(value) {
|
|
|
773
1105
|
if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
|
|
774
1106
|
return String(value);
|
|
775
1107
|
}
|
|
776
|
-
|
|
777
1108
|
//#endregion
|
|
778
1109
|
//#region src/markdown/toml/parse-frontmatter.ts
|
|
779
1110
|
function parseTomlFrontmatter(frontmatter) {
|
|
@@ -857,7 +1188,6 @@ function parseTomlFrontmatter(frontmatter) {
|
|
|
857
1188
|
flattenArrayTables(result);
|
|
858
1189
|
return result;
|
|
859
1190
|
}
|
|
860
|
-
|
|
861
1191
|
//#endregion
|
|
862
1192
|
//#region src/markdown/parse-markdown.ts
|
|
863
1193
|
const FENCE_TO_TYPE = {
|
|
@@ -992,7 +1322,6 @@ function parseMarkdown(input) {
|
|
|
992
1322
|
frontmatterType: openingType
|
|
993
1323
|
};
|
|
994
1324
|
}
|
|
995
|
-
|
|
996
1325
|
//#endregion
|
|
997
1326
|
//#region src/wc/segmenter.ts
|
|
998
1327
|
const segmenterCache = /* @__PURE__ */ new Map();
|
|
@@ -1021,7 +1350,6 @@ function countCharsForLocale(text, locale) {
|
|
|
1021
1350
|
for (const _segment of segmenter.segment(text)) count++;
|
|
1022
1351
|
return count;
|
|
1023
1352
|
}
|
|
1024
|
-
|
|
1025
1353
|
//#endregion
|
|
1026
1354
|
//#region src/wc/non-words.ts
|
|
1027
1355
|
const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
|
|
@@ -1135,7 +1463,6 @@ function createWhitespaceCounts() {
|
|
|
1135
1463
|
other: 0
|
|
1136
1464
|
};
|
|
1137
1465
|
}
|
|
1138
|
-
|
|
1139
1466
|
//#endregion
|
|
1140
1467
|
//#region src/wc/analyze.ts
|
|
1141
1468
|
function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
|
|
@@ -1235,7 +1562,6 @@ function aggregateByLocale(chunks) {
|
|
|
1235
1562
|
}
|
|
1236
1563
|
return order.map((locale) => map.get(locale));
|
|
1237
1564
|
}
|
|
1238
|
-
|
|
1239
1565
|
//#endregion
|
|
1240
1566
|
//#region src/wc/mode.ts
|
|
1241
1567
|
const MODE_ALIASES = {
|
|
@@ -1303,10 +1629,7 @@ function normalizeMode(input) {
|
|
|
1303
1629
|
function resolveMode(input, fallback = "chunk") {
|
|
1304
1630
|
return normalizeMode(input) ?? fallback;
|
|
1305
1631
|
}
|
|
1306
|
-
|
|
1307
|
-
//#endregion
|
|
1308
|
-
//#region src/wc/latin-hints.ts
|
|
1309
|
-
const DEFAULT_LATIN_HINT_RULES_SOURCE = [
|
|
1632
|
+
const DEFAULT_LATIN_HINT_RULES = Object.freeze([
|
|
1310
1633
|
{
|
|
1311
1634
|
tag: "de",
|
|
1312
1635
|
pattern: "[äöüÄÖÜß]"
|
|
@@ -1343,9 +1666,7 @@ const DEFAULT_LATIN_HINT_RULES_SOURCE = [
|
|
|
1343
1666
|
tag: "is",
|
|
1344
1667
|
pattern: "[ðÐþÞ]"
|
|
1345
1668
|
}
|
|
1346
|
-
];
|
|
1347
|
-
const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
|
|
1348
|
-
|
|
1669
|
+
].map((rule) => Object.freeze({ ...rule })));
|
|
1349
1670
|
//#endregion
|
|
1350
1671
|
//#region src/wc/locale-detect.ts
|
|
1351
1672
|
const DEFAULT_LOCALE = "und-Latn";
|
|
@@ -1465,18 +1786,17 @@ function detectLocaleForChar(char, previousLocale, options = {}, context = resol
|
|
|
1465
1786
|
if (regex.thai.test(char)) return "th";
|
|
1466
1787
|
if (regex.han.test(char)) {
|
|
1467
1788
|
if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
|
|
1468
|
-
return context.hanHint ??
|
|
1789
|
+
return context.hanHint ?? "und-Hani";
|
|
1469
1790
|
}
|
|
1470
1791
|
if (regex.latin.test(char)) {
|
|
1471
1792
|
const hintedLocale = detectLatinLocale(char, context);
|
|
1472
|
-
if (hintedLocale !==
|
|
1473
|
-
if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !==
|
|
1793
|
+
if (hintedLocale !== "und-Latn") return hintedLocale;
|
|
1794
|
+
if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== "und-Latn") return previousLocale;
|
|
1474
1795
|
if (context.latinHint) return context.latinHint;
|
|
1475
1796
|
return DEFAULT_LOCALE;
|
|
1476
1797
|
}
|
|
1477
1798
|
return null;
|
|
1478
1799
|
}
|
|
1479
|
-
|
|
1480
1800
|
//#endregion
|
|
1481
1801
|
//#region src/wc/segment.ts
|
|
1482
1802
|
const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
|
|
@@ -1513,7 +1833,7 @@ function segmentTextByLocale(text, options = {}) {
|
|
|
1513
1833
|
continue;
|
|
1514
1834
|
}
|
|
1515
1835
|
if (targetLocale !== currentLocale && detected !== null) {
|
|
1516
|
-
if (currentLocale ===
|
|
1836
|
+
if (currentLocale === "und-Latn" && isLatinLocale(targetLocale, context)) {
|
|
1517
1837
|
const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
|
|
1518
1838
|
if (promotionBreakIndex === -1) {
|
|
1519
1839
|
currentLocale = targetLocale;
|
|
@@ -1544,58 +1864,249 @@ function segmentTextByLocale(text, options = {}) {
|
|
|
1544
1864
|
updateCarryBoundaryState(detected, char);
|
|
1545
1865
|
continue;
|
|
1546
1866
|
}
|
|
1547
|
-
buffer += char;
|
|
1548
|
-
if (detected !== null) bufferHasScript = true;
|
|
1549
|
-
updateCarryBoundaryState(detected, char);
|
|
1867
|
+
buffer += char;
|
|
1868
|
+
if (detected !== null) bufferHasScript = true;
|
|
1869
|
+
updateCarryBoundaryState(detected, char);
|
|
1870
|
+
}
|
|
1871
|
+
if (buffer.length > 0) chunks.push({
|
|
1872
|
+
locale: currentLocale,
|
|
1873
|
+
text: buffer
|
|
1874
|
+
});
|
|
1875
|
+
return mergeAdjacentChunks(chunks);
|
|
1876
|
+
}
|
|
1877
|
+
function findLastLatinPromotionBreakIndex(buffer) {
|
|
1878
|
+
for (let index = buffer.length - 1; index >= 0; index -= 1) {
|
|
1879
|
+
const char = buffer[index];
|
|
1880
|
+
if (!char) continue;
|
|
1881
|
+
if (LATIN_PROMOTION_BREAK_REGEX.test(char)) return index;
|
|
1882
|
+
}
|
|
1883
|
+
return -1;
|
|
1884
|
+
}
|
|
1885
|
+
function mergeAdjacentChunks(chunks) {
|
|
1886
|
+
if (chunks.length === 0) return chunks;
|
|
1887
|
+
const merged = [];
|
|
1888
|
+
let last = chunks[0];
|
|
1889
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
1890
|
+
const chunk = chunks[i];
|
|
1891
|
+
if (chunk.locale === last.locale) last = {
|
|
1892
|
+
locale: last.locale,
|
|
1893
|
+
text: last.text + chunk.text
|
|
1894
|
+
};
|
|
1895
|
+
else {
|
|
1896
|
+
merged.push(last);
|
|
1897
|
+
last = chunk;
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
merged.push(last);
|
|
1901
|
+
return merged;
|
|
1902
|
+
}
|
|
1903
|
+
//#endregion
|
|
1904
|
+
//#region src/wc/wc.ts
|
|
1905
|
+
function wordCounter(text, options = {}) {
|
|
1906
|
+
const mode = resolveMode(options.mode, "chunk");
|
|
1907
|
+
const collectNonWords = Boolean(options.nonWords);
|
|
1908
|
+
const includeWhitespace = Boolean(options.includeWhitespace);
|
|
1909
|
+
const chunks = segmentTextByLocale(text, {
|
|
1910
|
+
latinLanguageHint: options.latinLanguageHint,
|
|
1911
|
+
latinTagHint: options.latinTagHint,
|
|
1912
|
+
latinLocaleHint: options.latinLocaleHint,
|
|
1913
|
+
latinHintRules: options.latinHintRules,
|
|
1914
|
+
useDefaultLatinHints: options.useDefaultLatinHints,
|
|
1915
|
+
hanLanguageHint: options.hanLanguageHint,
|
|
1916
|
+
hanTagHint: options.hanTagHint
|
|
1917
|
+
});
|
|
1918
|
+
if (mode === "char" || mode === "char-collector") {
|
|
1919
|
+
const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
|
|
1920
|
+
const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
|
|
1921
|
+
const counts = collectNonWords ? {
|
|
1922
|
+
words: analyzed.reduce((sum, chunk) => sum + chunk.wordChars, 0),
|
|
1923
|
+
nonWords: analyzed.reduce((sum, chunk) => sum + chunk.nonWordChars, 0),
|
|
1924
|
+
total
|
|
1925
|
+
} : void 0;
|
|
1926
|
+
if (mode === "char") return {
|
|
1927
|
+
total,
|
|
1928
|
+
counts,
|
|
1929
|
+
breakdown: {
|
|
1930
|
+
mode,
|
|
1931
|
+
items: analyzed.map((chunk) => ({
|
|
1932
|
+
locale: chunk.locale,
|
|
1933
|
+
text: chunk.text,
|
|
1934
|
+
chars: chunk.chars,
|
|
1935
|
+
nonWords: chunk.nonWords
|
|
1936
|
+
}))
|
|
1937
|
+
}
|
|
1938
|
+
};
|
|
1939
|
+
return {
|
|
1940
|
+
total,
|
|
1941
|
+
counts,
|
|
1942
|
+
breakdown: {
|
|
1943
|
+
mode,
|
|
1944
|
+
items: aggregateCharsByLocale(analyzed).map((chunk) => ({
|
|
1945
|
+
locale: chunk.locale,
|
|
1946
|
+
chars: chunk.chars,
|
|
1947
|
+
nonWords: chunk.nonWords
|
|
1948
|
+
}))
|
|
1949
|
+
}
|
|
1950
|
+
};
|
|
1951
|
+
}
|
|
1952
|
+
const analyzed = chunks.map((chunk) => analyzeChunk(chunk, collectNonWords, includeWhitespace));
|
|
1953
|
+
const wordsTotal = analyzed.reduce((sum, chunk) => sum + chunk.words, 0);
|
|
1954
|
+
const nonWordsTotal = collectNonWords ? analyzed.reduce((sum, chunk) => {
|
|
1955
|
+
if (!chunk.nonWords) return sum;
|
|
1956
|
+
return sum + getNonWordTotal$1(chunk.nonWords);
|
|
1957
|
+
}, 0) : 0;
|
|
1958
|
+
const total = analyzed.reduce((sum, chunk) => {
|
|
1959
|
+
let chunkTotal = chunk.words;
|
|
1960
|
+
if (collectNonWords && chunk.nonWords) chunkTotal += getNonWordTotal$1(chunk.nonWords);
|
|
1961
|
+
return sum + chunkTotal;
|
|
1962
|
+
}, 0);
|
|
1963
|
+
const counts = collectNonWords ? {
|
|
1964
|
+
words: wordsTotal,
|
|
1965
|
+
nonWords: nonWordsTotal,
|
|
1966
|
+
total
|
|
1967
|
+
} : void 0;
|
|
1968
|
+
if (mode === "segments") return {
|
|
1969
|
+
total,
|
|
1970
|
+
counts,
|
|
1971
|
+
breakdown: {
|
|
1972
|
+
mode,
|
|
1973
|
+
items: analyzed.map((chunk) => ({
|
|
1974
|
+
locale: chunk.locale,
|
|
1975
|
+
text: chunk.text,
|
|
1976
|
+
words: chunk.words,
|
|
1977
|
+
segments: chunk.segments,
|
|
1978
|
+
nonWords: chunk.nonWords
|
|
1979
|
+
}))
|
|
1980
|
+
}
|
|
1981
|
+
};
|
|
1982
|
+
if (mode === "collector") return {
|
|
1983
|
+
total,
|
|
1984
|
+
counts,
|
|
1985
|
+
breakdown: {
|
|
1986
|
+
mode,
|
|
1987
|
+
items: aggregateByLocale(analyzed),
|
|
1988
|
+
nonWords: collectNonWordsAggregate$1(analyzed, collectNonWords)
|
|
1989
|
+
}
|
|
1990
|
+
};
|
|
1991
|
+
return {
|
|
1992
|
+
total,
|
|
1993
|
+
counts,
|
|
1994
|
+
breakdown: {
|
|
1995
|
+
mode,
|
|
1996
|
+
items: analyzed.map((chunk) => ({
|
|
1997
|
+
locale: chunk.locale,
|
|
1998
|
+
text: chunk.text,
|
|
1999
|
+
words: chunk.words,
|
|
2000
|
+
nonWords: chunk.nonWords
|
|
2001
|
+
}))
|
|
2002
|
+
}
|
|
2003
|
+
};
|
|
2004
|
+
}
|
|
2005
|
+
function getNonWordTotal$1(nonWords) {
|
|
2006
|
+
return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
|
|
2007
|
+
}
|
|
2008
|
+
function collectNonWordsAggregate$1(analyzed, enabled) {
|
|
2009
|
+
if (!enabled) return;
|
|
2010
|
+
const collection = createNonWordCollection();
|
|
2011
|
+
for (const chunk of analyzed) {
|
|
2012
|
+
if (!chunk.nonWords) continue;
|
|
2013
|
+
mergeNonWordCollections(collection, chunk.nonWords);
|
|
1550
2014
|
}
|
|
1551
|
-
|
|
1552
|
-
locale: currentLocale,
|
|
1553
|
-
text: buffer
|
|
1554
|
-
});
|
|
1555
|
-
return mergeAdjacentChunks(chunks);
|
|
2015
|
+
return collection;
|
|
1556
2016
|
}
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
2017
|
+
//#endregion
|
|
2018
|
+
//#region src/wc/index.ts
|
|
2019
|
+
var wc_default = wordCounter;
|
|
2020
|
+
//#endregion
|
|
2021
|
+
//#region src/markdown/section-count.ts
|
|
2022
|
+
function normalizeText$1(value) {
|
|
2023
|
+
if (value == null) return "";
|
|
2024
|
+
if (typeof value === "string") return value;
|
|
2025
|
+
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
2026
|
+
try {
|
|
2027
|
+
return JSON.stringify(value);
|
|
2028
|
+
} catch {
|
|
2029
|
+
return String(value);
|
|
1562
2030
|
}
|
|
1563
|
-
return -1;
|
|
1564
2031
|
}
|
|
1565
|
-
function
|
|
1566
|
-
if (
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
2032
|
+
function buildPerKeyItems$1(data, mode, options) {
|
|
2033
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) return [];
|
|
2034
|
+
return Object.entries(data).map(([key, value]) => {
|
|
2035
|
+
const valueText = normalizeText$1(value);
|
|
2036
|
+
return {
|
|
2037
|
+
name: key,
|
|
2038
|
+
source: "frontmatter",
|
|
2039
|
+
result: wc_default(valueText ? `${key}: ${valueText}` : key, options)
|
|
2040
|
+
};
|
|
2041
|
+
});
|
|
2042
|
+
}
|
|
2043
|
+
function buildSingleItem$1(name, text, mode, options, source) {
|
|
2044
|
+
return [{
|
|
2045
|
+
name,
|
|
2046
|
+
source,
|
|
2047
|
+
result: wc_default(text, options)
|
|
2048
|
+
}];
|
|
2049
|
+
}
|
|
2050
|
+
function sumTotals$1(items) {
|
|
2051
|
+
return items.reduce((sum, item) => sum + item.result.total, 0);
|
|
2052
|
+
}
|
|
2053
|
+
function countSections(input, section, options = {}) {
|
|
2054
|
+
const mode = options.mode ?? "chunk";
|
|
2055
|
+
if (section === "all") {
|
|
2056
|
+
const result = wc_default(input, options);
|
|
2057
|
+
return {
|
|
2058
|
+
section,
|
|
2059
|
+
total: result.total,
|
|
2060
|
+
frontmatterType: null,
|
|
2061
|
+
items: [{
|
|
2062
|
+
name: "all",
|
|
2063
|
+
source: "content",
|
|
2064
|
+
result
|
|
2065
|
+
}]
|
|
1574
2066
|
};
|
|
1575
|
-
else {
|
|
1576
|
-
merged.push(last);
|
|
1577
|
-
last = chunk;
|
|
1578
|
-
}
|
|
1579
2067
|
}
|
|
1580
|
-
|
|
1581
|
-
|
|
2068
|
+
const parsed = parseMarkdown(input);
|
|
2069
|
+
const frontmatterText = parsed.frontmatter ?? "";
|
|
2070
|
+
const contentText = parsed.content ?? "";
|
|
2071
|
+
let items = [];
|
|
2072
|
+
if (section === "frontmatter") items = buildSingleItem$1("frontmatter", frontmatterText, mode, options, "frontmatter");
|
|
2073
|
+
else if (section === "content") items = buildSingleItem$1("content", contentText, mode, options, "content");
|
|
2074
|
+
else if (section === "split") items = [...buildSingleItem$1("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem$1("content", contentText, mode, options, "content")];
|
|
2075
|
+
else if (section === "per-key") items = buildPerKeyItems$1(parsed.data, mode, options);
|
|
2076
|
+
else if (section === "split-per-key") items = [...buildPerKeyItems$1(parsed.data, mode, options), ...buildSingleItem$1("content", contentText, mode, options, "content")];
|
|
2077
|
+
return {
|
|
2078
|
+
section,
|
|
2079
|
+
total: sumTotals$1(items),
|
|
2080
|
+
frontmatterType: parsed.frontmatterType,
|
|
2081
|
+
items
|
|
2082
|
+
};
|
|
1582
2083
|
}
|
|
1583
|
-
|
|
1584
2084
|
//#endregion
|
|
1585
|
-
//#region src/
|
|
1586
|
-
function
|
|
2085
|
+
//#region src/detector/none.ts
|
|
2086
|
+
async function wordCounterWithRegexDetector(text, options = {}) {
|
|
2087
|
+
return wc_default(text, options);
|
|
2088
|
+
}
|
|
2089
|
+
async function countSectionsWithRegexDetector(input, section, options = {}) {
|
|
2090
|
+
return countSections(input, section, options);
|
|
2091
|
+
}
|
|
2092
|
+
//#endregion
|
|
2093
|
+
//#region src/detector/result-builder.ts
|
|
2094
|
+
function getNonWordTotal(nonWords) {
|
|
2095
|
+
return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
|
|
2096
|
+
}
|
|
2097
|
+
function collectNonWordsAggregate(analyzed, enabled) {
|
|
2098
|
+
if (!enabled) return;
|
|
2099
|
+
const collection = createNonWordCollection();
|
|
2100
|
+
for (const chunk of analyzed) {
|
|
2101
|
+
if (!chunk.nonWords) continue;
|
|
2102
|
+
mergeNonWordCollections(collection, chunk.nonWords);
|
|
2103
|
+
}
|
|
2104
|
+
return collection;
|
|
2105
|
+
}
|
|
2106
|
+
function buildWordCounterResultFromChunks(chunks, options = {}) {
|
|
1587
2107
|
const mode = resolveMode(options.mode, "chunk");
|
|
1588
2108
|
const collectNonWords = Boolean(options.nonWords);
|
|
1589
2109
|
const includeWhitespace = Boolean(options.includeWhitespace);
|
|
1590
|
-
const chunks = segmentTextByLocale(text, {
|
|
1591
|
-
latinLanguageHint: options.latinLanguageHint,
|
|
1592
|
-
latinTagHint: options.latinTagHint,
|
|
1593
|
-
latinLocaleHint: options.latinLocaleHint,
|
|
1594
|
-
latinHintRules: options.latinHintRules,
|
|
1595
|
-
useDefaultLatinHints: options.useDefaultLatinHints,
|
|
1596
|
-
hanLanguageHint: options.hanLanguageHint,
|
|
1597
|
-
hanTagHint: options.hanTagHint
|
|
1598
|
-
});
|
|
1599
2110
|
if (mode === "char" || mode === "char-collector") {
|
|
1600
2111
|
const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
|
|
1601
2112
|
const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
|
|
@@ -1683,25 +2194,8 @@ function wordCounter(text, options = {}) {
|
|
|
1683
2194
|
}
|
|
1684
2195
|
};
|
|
1685
2196
|
}
|
|
1686
|
-
function getNonWordTotal(nonWords) {
|
|
1687
|
-
return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
|
|
1688
|
-
}
|
|
1689
|
-
function collectNonWordsAggregate(analyzed, enabled) {
|
|
1690
|
-
if (!enabled) return;
|
|
1691
|
-
const collection = createNonWordCollection();
|
|
1692
|
-
for (const chunk of analyzed) {
|
|
1693
|
-
if (!chunk.nonWords) continue;
|
|
1694
|
-
mergeNonWordCollections(collection, chunk.nonWords);
|
|
1695
|
-
}
|
|
1696
|
-
return collection;
|
|
1697
|
-
}
|
|
1698
|
-
|
|
1699
|
-
//#endregion
|
|
1700
|
-
//#region src/wc/index.ts
|
|
1701
|
-
var wc_default = wordCounter;
|
|
1702
|
-
|
|
1703
2197
|
//#endregion
|
|
1704
|
-
//#region src/
|
|
2198
|
+
//#region src/detector/sections.ts
|
|
1705
2199
|
function normalizeText(value) {
|
|
1706
2200
|
if (value == null) return "";
|
|
1707
2201
|
if (typeof value === "string") return value;
|
|
@@ -1712,31 +2206,31 @@ function normalizeText(value) {
|
|
|
1712
2206
|
return String(value);
|
|
1713
2207
|
}
|
|
1714
2208
|
}
|
|
1715
|
-
function buildPerKeyItems(data,
|
|
2209
|
+
async function buildPerKeyItems(data, options) {
|
|
1716
2210
|
if (!data || typeof data !== "object" || Array.isArray(data)) return [];
|
|
1717
|
-
return Object.entries(data).map(([key, value]) => {
|
|
2211
|
+
return Promise.all(Object.entries(data).map(async ([key, value]) => {
|
|
1718
2212
|
const valueText = normalizeText(value);
|
|
1719
2213
|
return {
|
|
1720
2214
|
name: key,
|
|
1721
2215
|
source: "frontmatter",
|
|
1722
|
-
result:
|
|
2216
|
+
result: await wordCounterWithDetector(valueText ? `${key}: ${valueText}` : key, options)
|
|
1723
2217
|
};
|
|
1724
|
-
});
|
|
2218
|
+
}));
|
|
1725
2219
|
}
|
|
1726
|
-
function buildSingleItem(name, text,
|
|
2220
|
+
async function buildSingleItem(name, text, options, source) {
|
|
1727
2221
|
return [{
|
|
1728
2222
|
name,
|
|
1729
2223
|
source,
|
|
1730
|
-
result:
|
|
2224
|
+
result: await wordCounterWithDetector(text, options)
|
|
1731
2225
|
}];
|
|
1732
2226
|
}
|
|
1733
2227
|
function sumTotals(items) {
|
|
1734
2228
|
return items.reduce((sum, item) => sum + item.result.total, 0);
|
|
1735
2229
|
}
|
|
1736
|
-
function
|
|
1737
|
-
|
|
2230
|
+
async function countSectionsWithResolvedDetector(input, section, options = {}) {
|
|
2231
|
+
options.mode;
|
|
1738
2232
|
if (section === "all") {
|
|
1739
|
-
const result =
|
|
2233
|
+
const result = await wordCounterWithDetector(input, options);
|
|
1740
2234
|
return {
|
|
1741
2235
|
section,
|
|
1742
2236
|
total: result.total,
|
|
@@ -1752,11 +2246,11 @@ function countSections(input, section, options = {}) {
|
|
|
1752
2246
|
const frontmatterText = parsed.frontmatter ?? "";
|
|
1753
2247
|
const contentText = parsed.content ?? "";
|
|
1754
2248
|
let items = [];
|
|
1755
|
-
if (section === "frontmatter") items = buildSingleItem("frontmatter", frontmatterText,
|
|
1756
|
-
else if (section === "content") items = buildSingleItem("content", contentText,
|
|
1757
|
-
else if (section === "split") items = [...buildSingleItem("frontmatter", frontmatterText,
|
|
1758
|
-
else if (section === "per-key") items = buildPerKeyItems(parsed.data,
|
|
1759
|
-
else if (section === "split-per-key") items = [...buildPerKeyItems(parsed.data,
|
|
2249
|
+
if (section === "frontmatter") items = await buildSingleItem("frontmatter", frontmatterText, options, "frontmatter");
|
|
2250
|
+
else if (section === "content") items = await buildSingleItem("content", contentText, options, "content");
|
|
2251
|
+
else if (section === "split") items = [...await buildSingleItem("frontmatter", frontmatterText, options, "frontmatter"), ...await buildSingleItem("content", contentText, options, "content")];
|
|
2252
|
+
else if (section === "per-key") items = await buildPerKeyItems(parsed.data, options);
|
|
2253
|
+
else if (section === "split-per-key") items = [...await buildPerKeyItems(parsed.data, options), ...await buildSingleItem("content", contentText, options, "content")];
|
|
1760
2254
|
return {
|
|
1761
2255
|
section,
|
|
1762
2256
|
total: sumTotals(items),
|
|
@@ -1764,7 +2258,207 @@ function countSections(input, section, options = {}) {
|
|
|
1764
2258
|
items
|
|
1765
2259
|
};
|
|
1766
2260
|
}
|
|
1767
|
-
|
|
2261
|
+
const LATIN_WASM_MIN_CONFIDENCE = .75;
|
|
2262
|
+
const HANI_WASM_MIN_CONFIDENCE = .9;
|
|
2263
|
+
const LATIN_SCRIPT_REGEX = /\p{Script=Latin}/u;
|
|
2264
|
+
const HAN_SCRIPT_REGEX = /\p{Script=Han}/u;
|
|
2265
|
+
const DETECTOR_ROUTE_POLICIES = {
|
|
2266
|
+
[DEFAULT_LOCALE]: {
|
|
2267
|
+
routeTag: DEFAULT_LOCALE,
|
|
2268
|
+
minScriptChars: 24,
|
|
2269
|
+
minConfidence: LATIN_WASM_MIN_CONFIDENCE,
|
|
2270
|
+
requireReliable: true
|
|
2271
|
+
},
|
|
2272
|
+
[DEFAULT_HAN_TAG]: {
|
|
2273
|
+
routeTag: DEFAULT_HAN_TAG,
|
|
2274
|
+
minScriptChars: 12,
|
|
2275
|
+
minConfidence: HANI_WASM_MIN_CONFIDENCE,
|
|
2276
|
+
requireReliable: true
|
|
2277
|
+
}
|
|
2278
|
+
};
|
|
2279
|
+
function isAmbiguousDetectorRoute(locale) {
|
|
2280
|
+
return locale === "und-Latn" || locale === "und-Hani";
|
|
2281
|
+
}
|
|
2282
|
+
function countScriptBearingCharsForRoute(text, routeTag) {
|
|
2283
|
+
const matcher = routeTag === "und-Hani" ? HAN_SCRIPT_REGEX : LATIN_SCRIPT_REGEX;
|
|
2284
|
+
let count = 0;
|
|
2285
|
+
for (const char of text) if (matcher.test(char)) count += 1;
|
|
2286
|
+
return count;
|
|
2287
|
+
}
|
|
2288
|
+
function shouldRunWasmDetector(text, routeTag) {
|
|
2289
|
+
const policy = DETECTOR_ROUTE_POLICIES[routeTag];
|
|
2290
|
+
return countScriptBearingCharsForRoute(text, routeTag) >= policy.minScriptChars;
|
|
2291
|
+
}
|
|
2292
|
+
function normalizeDetectorSampleForRoute(text, routeTag) {
|
|
2293
|
+
const matcher = routeTag === "und-Hani" ? HAN_SCRIPT_REGEX : LATIN_SCRIPT_REGEX;
|
|
2294
|
+
return [...text].map((char) => {
|
|
2295
|
+
if (matcher.test(char)) return char;
|
|
2296
|
+
if (/\s/u.test(char)) return " ";
|
|
2297
|
+
return " ";
|
|
2298
|
+
}).join("").replace(/\s+/g, " ").trim();
|
|
2299
|
+
}
|
|
2300
|
+
//#endregion
|
|
2301
|
+
//#region src/detector/whatlang-wasm.ts
|
|
2302
|
+
const GENERATED_FOLDER_NAME = "wasm-language-detector";
|
|
2303
|
+
const GENERATED_MODULE_FILE = "language_detector.js";
|
|
2304
|
+
const MAX_SEARCH_DEPTH = 8;
|
|
2305
|
+
const requireFromHere = createRequire(import.meta.url);
|
|
2306
|
+
const WASM_DETECTOR_RUNTIME_UNAVAILABLE_MESSAGE = "WASM detector runtime is unavailable. Run `bun run build:wasm` to generate it.";
|
|
2307
|
+
let modulePromise = null;
|
|
2308
|
+
function resolveCandidateModulePaths() {
|
|
2309
|
+
const moduleDir = dirname(fileURLToPath(import.meta.url));
|
|
2310
|
+
const candidates = /* @__PURE__ */ new Set();
|
|
2311
|
+
let currentDir = moduleDir;
|
|
2312
|
+
for (let depth = 0; depth < MAX_SEARCH_DEPTH; depth += 1) {
|
|
2313
|
+
candidates.add(join(currentDir, GENERATED_FOLDER_NAME, GENERATED_MODULE_FILE));
|
|
2314
|
+
candidates.add(join(currentDir, "generated", GENERATED_FOLDER_NAME, GENERATED_MODULE_FILE));
|
|
2315
|
+
const parentDir = dirname(currentDir);
|
|
2316
|
+
if (parentDir === currentDir) break;
|
|
2317
|
+
currentDir = parentDir;
|
|
2318
|
+
}
|
|
2319
|
+
return [...candidates];
|
|
2320
|
+
}
|
|
2321
|
+
function resolveWhatlangWasmModulePath() {
|
|
2322
|
+
for (const candidate of resolveCandidateModulePaths()) if (existsSync(candidate)) return candidate;
|
|
2323
|
+
throw new Error(WASM_DETECTOR_RUNTIME_UNAVAILABLE_MESSAGE);
|
|
2324
|
+
}
|
|
2325
|
+
async function loadWhatlangWasmModule() {
|
|
2326
|
+
if (!modulePromise) modulePromise = (async () => {
|
|
2327
|
+
return requireFromHere(resolveWhatlangWasmModulePath());
|
|
2328
|
+
})();
|
|
2329
|
+
return modulePromise;
|
|
2330
|
+
}
|
|
2331
|
+
async function detectWithWhatlangWasm(text, routeTag) {
|
|
2332
|
+
return (await loadWhatlangWasmModule()).detect_language(text, routeTag);
|
|
2333
|
+
}
|
|
2334
|
+
//#endregion
|
|
2335
|
+
//#region src/detector/whatlang-map.ts
|
|
2336
|
+
const LATIN_LANGUAGE_TAGS = {
|
|
2337
|
+
cat: "ca",
|
|
2338
|
+
ces: "cs",
|
|
2339
|
+
dan: "da",
|
|
2340
|
+
deu: "de",
|
|
2341
|
+
eng: "en",
|
|
2342
|
+
fin: "fi",
|
|
2343
|
+
fra: "fr",
|
|
2344
|
+
hun: "hu",
|
|
2345
|
+
ita: "it",
|
|
2346
|
+
lat: "la",
|
|
2347
|
+
nld: "nl",
|
|
2348
|
+
pol: "pl",
|
|
2349
|
+
por: "pt",
|
|
2350
|
+
ron: "ro",
|
|
2351
|
+
spa: "es",
|
|
2352
|
+
swe: "sv",
|
|
2353
|
+
tur: "tr"
|
|
2354
|
+
};
|
|
2355
|
+
const HANI_LANGUAGE_TAGS = {
|
|
2356
|
+
cmn: "zh",
|
|
2357
|
+
jpn: "ja"
|
|
2358
|
+
};
|
|
2359
|
+
function hasSupportedScript(result, routeTag) {
|
|
2360
|
+
if (routeTag === "und-Latn") return result.script === "Latin";
|
|
2361
|
+
return result.script === "Mandarin";
|
|
2362
|
+
}
|
|
2363
|
+
function remapLanguageTag(lang, routeTag) {
|
|
2364
|
+
if (routeTag === "und-Latn") return LATIN_LANGUAGE_TAGS[lang];
|
|
2365
|
+
return HANI_LANGUAGE_TAGS[lang];
|
|
2366
|
+
}
|
|
2367
|
+
function remapWhatlangResult(result, routeTag) {
|
|
2368
|
+
if (!hasSupportedScript(result, routeTag)) return null;
|
|
2369
|
+
const tag = remapLanguageTag(result.lang, routeTag);
|
|
2370
|
+
if (!tag) return null;
|
|
2371
|
+
return {
|
|
2372
|
+
tag,
|
|
2373
|
+
confidence: result.confidence,
|
|
2374
|
+
reliable: result.reliable,
|
|
2375
|
+
source: "wasm"
|
|
2376
|
+
};
|
|
2377
|
+
}
|
|
2378
|
+
function getDetectorFallbackTag(routeTag) {
|
|
2379
|
+
return routeTag === "und-Hani" ? DEFAULT_HAN_TAG : DEFAULT_LOCALE;
|
|
2380
|
+
}
|
|
2381
|
+
//#endregion
|
|
2382
|
+
//#region src/detector/wasm.ts
|
|
2383
|
+
function shouldAcceptDetectorTag(routeTag, confidence, reliable) {
|
|
2384
|
+
const policy = DETECTOR_ROUTE_POLICIES[routeTag];
|
|
2385
|
+
if (policy.requireReliable && reliable !== true) return false;
|
|
2386
|
+
if (confidence === void 0) return false;
|
|
2387
|
+
return confidence >= policy.minConfidence;
|
|
2388
|
+
}
|
|
2389
|
+
function buildDetectorWindows(chunks) {
|
|
2390
|
+
const windows = [];
|
|
2391
|
+
for (let index = 0; index < chunks.length; index += 1) {
|
|
2392
|
+
const chunk = chunks[index];
|
|
2393
|
+
if (!chunk || !isAmbiguousDetectorRoute(chunk.locale)) continue;
|
|
2394
|
+
const previousWindow = windows[windows.length - 1];
|
|
2395
|
+
if (previousWindow && previousWindow.routeTag === chunk.locale && previousWindow.endIndex === index - 1) {
|
|
2396
|
+
previousWindow.endIndex = index;
|
|
2397
|
+
previousWindow.text += chunk.text;
|
|
2398
|
+
continue;
|
|
2399
|
+
}
|
|
2400
|
+
windows.push({
|
|
2401
|
+
routeTag: chunk.locale,
|
|
2402
|
+
startIndex: index,
|
|
2403
|
+
endIndex: index,
|
|
2404
|
+
text: chunk.text
|
|
2405
|
+
});
|
|
2406
|
+
}
|
|
2407
|
+
return windows;
|
|
2408
|
+
}
|
|
2409
|
+
async function resolveWindowLocale(window) {
|
|
2410
|
+
if (!shouldRunWasmDetector(window.text, window.routeTag)) return window.routeTag;
|
|
2411
|
+
const rawResult = await detectWithWhatlangWasm(window.text, window.routeTag);
|
|
2412
|
+
const rawRemapped = rawResult ? remapWhatlangResult(rawResult, window.routeTag) : null;
|
|
2413
|
+
const normalizedSample = normalizeDetectorSampleForRoute(window.text, window.routeTag);
|
|
2414
|
+
const normalizedResult = normalizedSample.length > 0 && normalizedSample !== window.text ? await detectWithWhatlangWasm(normalizedSample, window.routeTag) : null;
|
|
2415
|
+
const normalizedRemapped = normalizedResult ? remapWhatlangResult(normalizedResult, window.routeTag) : null;
|
|
2416
|
+
const candidates = [rawRemapped, normalizedRemapped].filter((value) => value !== null);
|
|
2417
|
+
if (candidates.length === 0) return getDetectorFallbackTag(window.routeTag);
|
|
2418
|
+
const strongestCandidate = candidates.reduce((best, current) => {
|
|
2419
|
+
if (!best) return current;
|
|
2420
|
+
return (current.confidence ?? 0) > (best.confidence ?? 0) ? current : best;
|
|
2421
|
+
}, candidates[0]);
|
|
2422
|
+
if (strongestCandidate && shouldAcceptDetectorTag(window.routeTag, strongestCandidate.confidence, strongestCandidate.reliable)) return strongestCandidate.tag;
|
|
2423
|
+
if (window.routeTag === "und-Latn" && rawRemapped && normalizedRemapped && rawRemapped.tag === normalizedRemapped.tag) {
|
|
2424
|
+
if (Math.max(rawRemapped.confidence ?? 0, normalizedRemapped.confidence ?? 0) >= .7) return rawRemapped.tag;
|
|
2425
|
+
}
|
|
2426
|
+
return getDetectorFallbackTag(window.routeTag);
|
|
2427
|
+
}
|
|
2428
|
+
async function segmentTextByLocaleWithWasmDetector(text, options = {}) {
|
|
2429
|
+
const chunks = segmentTextByLocale(text, options);
|
|
2430
|
+
const resolved = [...chunks];
|
|
2431
|
+
const windows = buildDetectorWindows(chunks);
|
|
2432
|
+
for (const window of windows) {
|
|
2433
|
+
const resolvedLocale = await resolveWindowLocale(window);
|
|
2434
|
+
for (let index = window.startIndex; index <= window.endIndex; index += 1) {
|
|
2435
|
+
const chunk = resolved[index];
|
|
2436
|
+
if (!chunk) continue;
|
|
2437
|
+
resolved[index] = {
|
|
2438
|
+
...chunk,
|
|
2439
|
+
locale: resolvedLocale
|
|
2440
|
+
};
|
|
2441
|
+
}
|
|
2442
|
+
}
|
|
2443
|
+
return resolved;
|
|
2444
|
+
}
|
|
2445
|
+
async function wordCounterWithWasmDetector(text, options = {}) {
|
|
2446
|
+
return buildWordCounterResultFromChunks(await segmentTextByLocaleWithWasmDetector(text, options), options);
|
|
2447
|
+
}
|
|
2448
|
+
async function countSectionsWithWasmDetector(input, section, options = {}) {
|
|
2449
|
+
return countSectionsWithResolvedDetector(input, section, options);
|
|
2450
|
+
}
|
|
2451
|
+
function resolveDetectorMode(mode) {
|
|
2452
|
+
return mode ?? "regex";
|
|
2453
|
+
}
|
|
2454
|
+
async function wordCounterWithDetector(text, options = {}) {
|
|
2455
|
+
if (resolveDetectorMode(options.detector) === "wasm") return wordCounterWithWasmDetector(text, options);
|
|
2456
|
+
return wordCounterWithRegexDetector(text, options);
|
|
2457
|
+
}
|
|
2458
|
+
async function countSectionsWithDetector(input, section, options = {}) {
|
|
2459
|
+
if (resolveDetectorMode(options.detector) === "wasm") return countSectionsWithWasmDetector(input, section, options);
|
|
2460
|
+
return countSectionsWithRegexDetector(input, section, options);
|
|
2461
|
+
}
|
|
1768
2462
|
//#endregion
|
|
1769
2463
|
//#region src/cli/batch/aggregate.ts
|
|
1770
2464
|
function mergeWordCounterResult(left, right, preserveCollectorSegments) {
|
|
@@ -1978,7 +2672,6 @@ function finalizeBatchSummaryFromFileResults(files, section, wcOptions, options
|
|
|
1978
2672
|
aggregate: section === "all" ? aggregateWordCounterResults(files.map((file) => file.result), preserveCollectorSegments) : aggregateSectionedResults(files.map((file) => file.result), preserveCollectorSegments)
|
|
1979
2673
|
};
|
|
1980
2674
|
}
|
|
1981
|
-
|
|
1982
2675
|
//#endregion
|
|
1983
2676
|
//#region src/cli/batch/jobs/queue.ts
|
|
1984
2677
|
async function runBoundedQueue(total, requestedJobs, worker) {
|
|
@@ -1998,7 +2691,6 @@ async function runBoundedQueue(total, requestedJobs, worker) {
|
|
|
1998
2691
|
await Promise.all(Array.from({ length: concurrency }, () => runWorker()));
|
|
1999
2692
|
return results;
|
|
2000
2693
|
}
|
|
2001
|
-
|
|
2002
2694
|
//#endregion
|
|
2003
2695
|
//#region src/cli/path/load.ts
|
|
2004
2696
|
function isProbablyBinary(buffer) {
|
|
@@ -2015,7 +2707,6 @@ function isProbablyBinary(buffer) {
|
|
|
2015
2707
|
}
|
|
2016
2708
|
return suspicious / sampleSize > .3;
|
|
2017
2709
|
}
|
|
2018
|
-
|
|
2019
2710
|
//#endregion
|
|
2020
2711
|
//#region src/cli/batch/jobs/read-input.ts
|
|
2021
2712
|
async function readBatchInput(path, options) {
|
|
@@ -2046,10 +2737,10 @@ async function readBatchInput(path, options) {
|
|
|
2046
2737
|
content: buffer.toString("utf8")
|
|
2047
2738
|
};
|
|
2048
2739
|
}
|
|
2049
|
-
|
|
2050
2740
|
//#endregion
|
|
2051
2741
|
//#region src/cli/batch/jobs/load-count.ts
|
|
2052
2742
|
async function countBatchInputsWithJobs(filePaths, options) {
|
|
2743
|
+
const detectorMode = options.detectorMode ?? "regex";
|
|
2053
2744
|
const limits = resolveBatchJobsLimit();
|
|
2054
2745
|
const total = filePaths.length;
|
|
2055
2746
|
let completed = 0;
|
|
@@ -2072,7 +2763,13 @@ async function countBatchInputsWithJobs(filePaths, options) {
|
|
|
2072
2763
|
}
|
|
2073
2764
|
};
|
|
2074
2765
|
}
|
|
2075
|
-
const result = options.section === "all" ? wc_default(loaded.content, options.wcOptions) : countSections(loaded.content, options.section, options.wcOptions)
|
|
2766
|
+
const result = detectorMode === "regex" ? options.section === "all" ? wc_default(loaded.content, options.wcOptions) : countSections(loaded.content, options.section, options.wcOptions) : options.section === "all" ? await wordCounterWithDetector(loaded.content, {
|
|
2767
|
+
...options.wcOptions,
|
|
2768
|
+
detector: detectorMode
|
|
2769
|
+
}) : await countSectionsWithDetector(loaded.content, options.section, {
|
|
2770
|
+
...options.wcOptions,
|
|
2771
|
+
detector: detectorMode
|
|
2772
|
+
});
|
|
2076
2773
|
if (!options.preserveCollectorSegments) compactCollectorSegmentsInCountResult(result);
|
|
2077
2774
|
completed += 1;
|
|
2078
2775
|
options.onFileProcessed?.({
|
|
@@ -2101,47 +2798,6 @@ async function countBatchInputsWithJobs(filePaths, options) {
|
|
|
2101
2798
|
skipped
|
|
2102
2799
|
};
|
|
2103
2800
|
}
|
|
2104
|
-
|
|
2105
|
-
//#endregion
|
|
2106
|
-
//#region src/cli/batch/jobs/load-count-worker.ts
|
|
2107
|
-
var WorkerRouteUnavailableError = class extends Error {};
|
|
2108
|
-
function isFallbackFriendlyWorkerError(error) {
|
|
2109
|
-
if (typeof error !== "object" || error === null) return false;
|
|
2110
|
-
const code = "code" in error ? String(error.code) : "";
|
|
2111
|
-
if (code === "ERR_WORKER_PATH" || code === "ERR_WORKER_UNSUPPORTED_EXTENSION" || code === "ERR_UNKNOWN_FILE_EXTENSION" || code === "ERR_MODULE_NOT_FOUND") return true;
|
|
2112
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
2113
|
-
return message.includes("Unknown file extension") || message.includes("Cannot find module");
|
|
2114
|
-
}
|
|
2115
|
-
async function countBatchInputsWithWorkerJobs(filePaths, options) {
|
|
2116
|
-
if (process.env.WORD_COUNTER_DISABLE_WORKER_JOBS === "1" || process.env.WORD_COUNTER_DISABLE_EXPERIMENTAL_WORKERS === "1") throw new WorkerRouteUnavailableError("Worker route disabled by environment.");
|
|
2117
|
-
let workerPoolModule;
|
|
2118
|
-
try {
|
|
2119
|
-
workerPoolModule = await import("./worker-pool.mjs");
|
|
2120
|
-
} catch (error) {
|
|
2121
|
-
throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
|
|
2122
|
-
}
|
|
2123
|
-
try {
|
|
2124
|
-
return await workerPoolModule.countBatchInputsWithWorkerPool({
|
|
2125
|
-
filePaths,
|
|
2126
|
-
jobs: options.jobs,
|
|
2127
|
-
section: options.section,
|
|
2128
|
-
wcOptions: options.wcOptions,
|
|
2129
|
-
preserveCollectorSegments: options.preserveCollectorSegments,
|
|
2130
|
-
onFileProcessed: options.onFileProcessed
|
|
2131
|
-
});
|
|
2132
|
-
} catch (error) {
|
|
2133
|
-
if (error instanceof workerPoolModule.WorkerPoolTaskFatalError) {
|
|
2134
|
-
if (error.code === "EMFILE" || error.code === "ENFILE") throw createResourceLimitError(error.path, {
|
|
2135
|
-
code: error.code,
|
|
2136
|
-
message: error.message
|
|
2137
|
-
}, options.jobs, resolveBatchJobsLimit());
|
|
2138
|
-
throw new Error(error.message);
|
|
2139
|
-
}
|
|
2140
|
-
if (error instanceof workerPoolModule.WorkerPoolUnavailableError || isFallbackFriendlyWorkerError(error)) throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
|
|
2141
|
-
throw error;
|
|
2142
|
-
}
|
|
2143
|
-
}
|
|
2144
|
-
|
|
2145
2801
|
//#endregion
|
|
2146
2802
|
//#region src/cli/batch/jobs/render.ts
|
|
2147
2803
|
function finalizeBatchJobsSummary(files, section, wcOptions, options = {}) {
|
|
@@ -2150,7 +2806,6 @@ function finalizeBatchJobsSummary(files, section, wcOptions, options = {}) {
|
|
|
2150
2806
|
preserveCollectorSegments: options.preserveCollectorSegments
|
|
2151
2807
|
});
|
|
2152
2808
|
}
|
|
2153
|
-
|
|
2154
2809
|
//#endregion
|
|
2155
2810
|
//#region src/cli/path/resolve.ts
|
|
2156
2811
|
async function expandDirectory(rootPath, directoryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats) {
|
|
@@ -2353,7 +3008,6 @@ async function resolveBatchFilePaths(pathInputs, options) {
|
|
|
2353
3008
|
skipped
|
|
2354
3009
|
};
|
|
2355
3010
|
}
|
|
2356
|
-
|
|
2357
3011
|
//#endregion
|
|
2358
3012
|
//#region src/cli/progress/reporter.ts
|
|
2359
3013
|
const PROGRESS_BAR_WIDTH = 20;
|
|
@@ -2457,7 +3111,6 @@ function createBatchProgressReporter(options) {
|
|
|
2457
3111
|
}
|
|
2458
3112
|
};
|
|
2459
3113
|
}
|
|
2460
|
-
|
|
2461
3114
|
//#endregion
|
|
2462
3115
|
//#region src/cli/batch/run.ts
|
|
2463
3116
|
async function runBatchCount(options) {
|
|
@@ -2521,6 +3174,7 @@ async function runBatchCount(options) {
|
|
|
2521
3174
|
counted = await countBatchInputsWithWorkerJobs(resolved.files, {
|
|
2522
3175
|
jobs: options.jobs,
|
|
2523
3176
|
section: options.section,
|
|
3177
|
+
detectorMode: options.wcOptions.detector ?? "regex",
|
|
2524
3178
|
wcOptions: options.wcOptions,
|
|
2525
3179
|
preserveCollectorSegments: options.preserveCollectorSegments,
|
|
2526
3180
|
onFileProcessed: (snapshot) => {
|
|
@@ -2544,6 +3198,7 @@ async function runBatchCount(options) {
|
|
|
2544
3198
|
counted = await countBatchInputsWithJobs(resolved.files, {
|
|
2545
3199
|
jobs: options.jobs,
|
|
2546
3200
|
section: options.section,
|
|
3201
|
+
detectorMode: options.wcOptions.detector ?? "regex",
|
|
2547
3202
|
wcOptions: options.wcOptions,
|
|
2548
3203
|
preserveCollectorSegments: options.preserveCollectorSegments,
|
|
2549
3204
|
onFileProcessed: (snapshot) => {
|
|
@@ -2555,6 +3210,7 @@ async function runBatchCount(options) {
|
|
|
2555
3210
|
counted = await countBatchInputsWithJobs(resolved.files, {
|
|
2556
3211
|
jobs: options.jobs,
|
|
2557
3212
|
section: options.section,
|
|
3213
|
+
detectorMode: options.wcOptions.detector ?? "regex",
|
|
2558
3214
|
wcOptions: options.wcOptions,
|
|
2559
3215
|
preserveCollectorSegments: options.preserveCollectorSegments,
|
|
2560
3216
|
onFileProcessed: (snapshot) => {
|
|
@@ -2609,19 +3265,16 @@ async function runBatchCount(options) {
|
|
|
2609
3265
|
});
|
|
2610
3266
|
return summary;
|
|
2611
3267
|
}
|
|
2612
|
-
|
|
2613
3268
|
//#endregion
|
|
2614
3269
|
//#region src/cli/batch/jobs/strategy.ts
|
|
2615
3270
|
function resolveBatchJobsStrategy(_jobs) {
|
|
2616
3271
|
return "load-count";
|
|
2617
3272
|
}
|
|
2618
|
-
|
|
2619
3273
|
//#endregion
|
|
2620
3274
|
//#region src/utils/show-singular-or-plural-word.ts
|
|
2621
3275
|
function showSingularOrPluralWord(count, word) {
|
|
2622
3276
|
return `${count} ${word}${count === 1 ? "" : "s"}`;
|
|
2623
3277
|
}
|
|
2624
|
-
|
|
2625
3278
|
//#endregion
|
|
2626
3279
|
//#region src/cli/output/render.ts
|
|
2627
3280
|
function getCountUnit(mode) {
|
|
@@ -2766,7 +3419,6 @@ function renderPerFileStandard(summary, labels, resolveTotalOfOverride) {
|
|
|
2766
3419
|
}
|
|
2767
3420
|
renderStandardResult(summary.aggregate, labels.overall, resolveTotalOfOverride?.(summary.aggregate));
|
|
2768
3421
|
}
|
|
2769
|
-
|
|
2770
3422
|
//#endregion
|
|
2771
3423
|
//#region src/cli/output/normalize-base.ts
|
|
2772
3424
|
function normalizeWordCounterResultBase(result) {
|
|
@@ -2805,7 +3457,6 @@ function normalizeBatchSummaryBase(summary) {
|
|
|
2805
3457
|
normalizeResultBase(summary.aggregate);
|
|
2806
3458
|
return summary;
|
|
2807
3459
|
}
|
|
2808
|
-
|
|
2809
3460
|
//#endregion
|
|
2810
3461
|
//#region src/cli/runtime/options.ts
|
|
2811
3462
|
function hasPathInput(pathValues) {
|
|
@@ -2904,6 +3555,7 @@ function resolveLatinHintRules(options) {
|
|
|
2904
3555
|
}
|
|
2905
3556
|
function resolveCountRunOptions(options) {
|
|
2906
3557
|
const useSection = options.section !== "all";
|
|
3558
|
+
const detectorMode = options.detector ?? "regex";
|
|
2907
3559
|
const totalOfParts = options.totalOf;
|
|
2908
3560
|
const requestedNonWords = Boolean(options.nonWords || options.includeWhitespace || options.misc);
|
|
2909
3561
|
const collectNonWordsForOverride = requiresNonWordCollection(totalOfParts);
|
|
@@ -2912,10 +3564,12 @@ function resolveCountRunOptions(options) {
|
|
|
2912
3564
|
const enableWhitespace = Boolean(options.includeWhitespace || options.misc || collectWhitespaceForOverride);
|
|
2913
3565
|
return {
|
|
2914
3566
|
useSection,
|
|
3567
|
+
detectorMode,
|
|
2915
3568
|
totalOfParts,
|
|
2916
3569
|
requestedNonWords,
|
|
2917
3570
|
shouldNormalizeBaseOutput: !requestedNonWords && enableNonWords,
|
|
2918
3571
|
wcOptions: {
|
|
3572
|
+
detector: detectorMode,
|
|
2919
3573
|
mode: options.mode,
|
|
2920
3574
|
latinLanguageHint: options.latinLanguage,
|
|
2921
3575
|
latinTagHint: options.latinTag,
|
|
@@ -2932,7 +3586,6 @@ function resolveCountRunOptions(options) {
|
|
|
2932
3586
|
function formatInputReadError(error) {
|
|
2933
3587
|
return `Failed to read input: ${error instanceof Error ? error.message : String(error)}`;
|
|
2934
3588
|
}
|
|
2935
|
-
|
|
2936
3589
|
//#endregion
|
|
2937
3590
|
//#region src/cli/runtime/batch.ts
|
|
2938
3591
|
async function executeBatchCount({ argv, options, runtime, resolved, debug, teeEnabled }) {
|
|
@@ -3070,7 +3723,6 @@ async function executeBatchCount({ argv, options, runtime, resolved, debug, teeE
|
|
|
3070
3723
|
}
|
|
3071
3724
|
renderStandardResult(summary.aggregate, labels.overall, aggregateTotalOfOverride);
|
|
3072
3725
|
}
|
|
3073
|
-
|
|
3074
3726
|
//#endregion
|
|
3075
3727
|
//#region src/cli/runtime/input.ts
|
|
3076
3728
|
async function readStdin() {
|
|
@@ -3088,7 +3740,6 @@ async function resolveInput(textTokens) {
|
|
|
3088
3740
|
if (textTokens.length > 0) return textTokens.join(" ");
|
|
3089
3741
|
return readStdin();
|
|
3090
3742
|
}
|
|
3091
|
-
|
|
3092
3743
|
//#endregion
|
|
3093
3744
|
//#region src/cli/runtime/single.ts
|
|
3094
3745
|
async function executeSingleCount({ textTokens, options, resolved }) {
|
|
@@ -3100,7 +3751,13 @@ async function executeSingleCount({ textTokens, options, resolved }) {
|
|
|
3100
3751
|
}
|
|
3101
3752
|
const trimmed = input.trim();
|
|
3102
3753
|
if (!trimmed) throw new Error("No input provided. Pass text, pipe stdin, or use --path.");
|
|
3103
|
-
const result = resolved.useSection ? countSections(trimmed, options.section, resolved.wcOptions) :
|
|
3754
|
+
const result = resolved.useSection ? resolved.detectorMode === "regex" ? countSections(trimmed, options.section, resolved.wcOptions) : await countSectionsWithDetector(trimmed, options.section, {
|
|
3755
|
+
...resolved.wcOptions,
|
|
3756
|
+
detector: resolved.detectorMode
|
|
3757
|
+
}) : resolved.detectorMode === "regex" ? wc_default(trimmed, resolved.wcOptions) : await wordCounterWithDetector(trimmed, {
|
|
3758
|
+
...resolved.wcOptions,
|
|
3759
|
+
detector: resolved.detectorMode
|
|
3760
|
+
});
|
|
3104
3761
|
const totalOfOverride = resolveTotalOfOverride(result, resolved.totalOfParts);
|
|
3105
3762
|
const displayResult = resolved.shouldNormalizeBaseOutput ? normalizeResultBase(result) : result;
|
|
3106
3763
|
if (options.format === "raw") {
|
|
@@ -3129,17 +3786,23 @@ async function executeSingleCount({ textTokens, options, resolved }) {
|
|
|
3129
3786
|
}
|
|
3130
3787
|
renderStandardResult(displayResult, labels.overall, totalOfOverride);
|
|
3131
3788
|
}
|
|
3132
|
-
|
|
3133
3789
|
//#endregion
|
|
3134
3790
|
//#region src/command.ts
|
|
3135
3791
|
async function runCli(argv = process.argv, runtime = {}) {
|
|
3792
|
+
if (isExplicitDoctorInvocation(argv)) {
|
|
3793
|
+
await executeDoctorCommand({
|
|
3794
|
+
argv,
|
|
3795
|
+
runtime: runtime.doctor
|
|
3796
|
+
});
|
|
3797
|
+
return;
|
|
3798
|
+
}
|
|
3136
3799
|
const program = new Command();
|
|
3137
3800
|
const parseMode = (value) => {
|
|
3138
3801
|
const normalized = normalizeMode(value);
|
|
3139
3802
|
if (!normalized) throw new Error(`Invalid mode: ${value}`);
|
|
3140
3803
|
return normalized;
|
|
3141
3804
|
};
|
|
3142
|
-
program.name("word-counter").description("Locale-aware word counting powered by Intl.Segmenter.").version(getFormattedVersionLabel(), "-v, --version", "output the version number");
|
|
3805
|
+
program.name("word-counter").description("Locale-aware word counting powered by Intl.Segmenter.").version(getFormattedVersionLabel(), "-v, --version", "output the version number").addHelpText("after", "\nCommands:\n doctor [options] report runtime diagnostics for this host");
|
|
3143
3806
|
configureProgramOptions(program, parseMode);
|
|
3144
3807
|
program.action(async (textTokens, options) => {
|
|
3145
3808
|
if (options.printJobsLimit) {
|
|
@@ -3219,14 +3882,19 @@ async function runCli(argv = process.argv, runtime = {}) {
|
|
|
3219
3882
|
program.error(import_picocolors.default.red(message));
|
|
3220
3883
|
return;
|
|
3221
3884
|
}
|
|
3885
|
+
if (message === "WASM detector runtime is unavailable. Run `bun run build:wasm` to generate it.") {
|
|
3886
|
+
console.error(import_picocolors.default.red(message));
|
|
3887
|
+
process.exitCode = 1;
|
|
3888
|
+
return;
|
|
3889
|
+
}
|
|
3222
3890
|
program.error(message);
|
|
3223
3891
|
} finally {
|
|
3224
3892
|
await debug.close();
|
|
3225
3893
|
}
|
|
3226
3894
|
});
|
|
3227
3895
|
await program.parseAsync(argv);
|
|
3896
|
+
if (process.exitCode === void 0) process.exitCode = 0;
|
|
3228
3897
|
}
|
|
3229
|
-
|
|
3230
3898
|
//#endregion
|
|
3231
3899
|
//#region src/bin.ts
|
|
3232
3900
|
runCli().catch((error) => {
|
|
@@ -3234,7 +3902,7 @@ runCli().catch((error) => {
|
|
|
3234
3902
|
console.error("Failed to run CLI:", message);
|
|
3235
3903
|
process.exitCode = 1;
|
|
3236
3904
|
});
|
|
3237
|
-
|
|
3238
3905
|
//#endregion
|
|
3239
|
-
export {
|
|
3906
|
+
export {};
|
|
3907
|
+
|
|
3240
3908
|
//# sourceMappingURL=bin.mjs.map
|