@dev-pi2pie/word-counter 0.1.4 → 0.1.5-canary.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/esm/bin.mjs CHANGED
@@ -1,13 +1,13 @@
1
1
  #!/usr/bin/env node
2
2
 
3
+ import { createRequire } from "node:module";
3
4
  import { Command, Option } from "commander";
4
5
  import { closeSync, createWriteStream, existsSync, mkdirSync, openSync, readFileSync, statSync } from "node:fs";
5
6
  import { basename, dirname, extname, join, relative, resolve, sep } from "node:path";
6
- import { fileURLToPath } from "node:url";
7
7
  import os from "node:os";
8
+ import { fileURLToPath } from "node:url";
8
9
  import { parseDocument } from "yaml";
9
10
  import { readFile, readdir, stat } from "node:fs/promises";
10
-
11
11
  //#region \0rolldown/runtime.js
12
12
  var __create = Object.create;
13
13
  var __defProp = Object.defineProperty;
@@ -17,16 +17,12 @@ var __getProtoOf = Object.getPrototypeOf;
17
17
  var __hasOwnProp = Object.prototype.hasOwnProperty;
18
18
  var __commonJSMin = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
19
19
  var __copyProps = (to, from, except, desc) => {
20
- if (from && typeof from === "object" || typeof from === "function") {
21
- for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
22
- key = keys[i];
23
- if (!__hasOwnProp.call(to, key) && key !== except) {
24
- __defProp(to, key, {
25
- get: ((k) => from[k]).bind(null, key),
26
- enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
27
- });
28
- }
29
- }
20
+ if (from && typeof from === "object" || typeof from === "function") for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
21
+ key = keys[i];
22
+ if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, {
23
+ get: ((k) => from[k]).bind(null, key),
24
+ enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
25
+ });
30
26
  }
31
27
  return to;
32
28
  };
@@ -34,7 +30,6 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
34
30
  value: mod,
35
31
  enumerable: true
36
32
  }) : target, mod));
37
-
38
33
  //#endregion
39
34
  //#region src/cli/debug/channel.ts
40
35
  const NOOP_CLOSE = async () => {};
@@ -144,7 +139,470 @@ function createDebugChannel(options) {
144
139
  }
145
140
  };
146
141
  }
147
-
142
+ //#endregion
143
+ //#region src/cli/program/version-embedded.ts
144
+ var import_picocolors = /* @__PURE__ */ __toESM((/* @__PURE__ */ __commonJSMin(((exports, module) => {
145
+ let p = process || {}, argv = p.argv || [], env = p.env || {};
146
+ let isColorSupported = !(!!env.NO_COLOR || argv.includes("--no-color")) && (!!env.FORCE_COLOR || argv.includes("--color") || p.platform === "win32" || (p.stdout || {}).isTTY && env.TERM !== "dumb" || !!env.CI);
147
+ let formatter = (open, close, replace = open) => (input) => {
148
+ let string = "" + input, index = string.indexOf(close, open.length);
149
+ return ~index ? open + replaceClose(string, close, replace, index) + close : open + string + close;
150
+ };
151
+ let replaceClose = (string, close, replace, index) => {
152
+ let result = "", cursor = 0;
153
+ do {
154
+ result += string.substring(cursor, index) + replace;
155
+ cursor = index + close.length;
156
+ index = string.indexOf(close, cursor);
157
+ } while (~index);
158
+ return result + string.substring(cursor);
159
+ };
160
+ let createColors = (enabled = isColorSupported) => {
161
+ let f = enabled ? formatter : () => String;
162
+ return {
163
+ isColorSupported: enabled,
164
+ reset: f("\x1B[0m", "\x1B[0m"),
165
+ bold: f("\x1B[1m", "\x1B[22m", "\x1B[22m\x1B[1m"),
166
+ dim: f("\x1B[2m", "\x1B[22m", "\x1B[22m\x1B[2m"),
167
+ italic: f("\x1B[3m", "\x1B[23m"),
168
+ underline: f("\x1B[4m", "\x1B[24m"),
169
+ inverse: f("\x1B[7m", "\x1B[27m"),
170
+ hidden: f("\x1B[8m", "\x1B[28m"),
171
+ strikethrough: f("\x1B[9m", "\x1B[29m"),
172
+ black: f("\x1B[30m", "\x1B[39m"),
173
+ red: f("\x1B[31m", "\x1B[39m"),
174
+ green: f("\x1B[32m", "\x1B[39m"),
175
+ yellow: f("\x1B[33m", "\x1B[39m"),
176
+ blue: f("\x1B[34m", "\x1B[39m"),
177
+ magenta: f("\x1B[35m", "\x1B[39m"),
178
+ cyan: f("\x1B[36m", "\x1B[39m"),
179
+ white: f("\x1B[37m", "\x1B[39m"),
180
+ gray: f("\x1B[90m", "\x1B[39m"),
181
+ bgBlack: f("\x1B[40m", "\x1B[49m"),
182
+ bgRed: f("\x1B[41m", "\x1B[49m"),
183
+ bgGreen: f("\x1B[42m", "\x1B[49m"),
184
+ bgYellow: f("\x1B[43m", "\x1B[49m"),
185
+ bgBlue: f("\x1B[44m", "\x1B[49m"),
186
+ bgMagenta: f("\x1B[45m", "\x1B[49m"),
187
+ bgCyan: f("\x1B[46m", "\x1B[49m"),
188
+ bgWhite: f("\x1B[47m", "\x1B[49m"),
189
+ blackBright: f("\x1B[90m", "\x1B[39m"),
190
+ redBright: f("\x1B[91m", "\x1B[39m"),
191
+ greenBright: f("\x1B[92m", "\x1B[39m"),
192
+ yellowBright: f("\x1B[93m", "\x1B[39m"),
193
+ blueBright: f("\x1B[94m", "\x1B[39m"),
194
+ magentaBright: f("\x1B[95m", "\x1B[39m"),
195
+ cyanBright: f("\x1B[96m", "\x1B[39m"),
196
+ whiteBright: f("\x1B[97m", "\x1B[39m"),
197
+ bgBlackBright: f("\x1B[100m", "\x1B[49m"),
198
+ bgRedBright: f("\x1B[101m", "\x1B[49m"),
199
+ bgGreenBright: f("\x1B[102m", "\x1B[49m"),
200
+ bgYellowBright: f("\x1B[103m", "\x1B[49m"),
201
+ bgBlueBright: f("\x1B[104m", "\x1B[49m"),
202
+ bgMagentaBright: f("\x1B[105m", "\x1B[49m"),
203
+ bgCyanBright: f("\x1B[106m", "\x1B[49m"),
204
+ bgWhiteBright: f("\x1B[107m", "\x1B[49m")
205
+ };
206
+ };
207
+ module.exports = createColors();
208
+ module.exports.createColors = createColors;
209
+ })))(), 1);
210
+ //#endregion
211
+ //#region src/cli/batch/jobs/limits.ts
212
+ const DEFAULT_UV_THREADPOOL_SIZE = 4;
213
+ function parsePositiveInteger(value) {
214
+ if (!value) return;
215
+ const parsed = Number.parseInt(value, 10);
216
+ if (!Number.isFinite(parsed) || parsed <= 0) return;
217
+ return parsed;
218
+ }
219
+ function resolveBatchJobsLimit(env = process.env) {
220
+ const cpuLimit = Math.max(1, os.availableParallelism());
221
+ const uvThreadpool = parsePositiveInteger(env.UV_THREADPOOL_SIZE) ?? DEFAULT_UV_THREADPOOL_SIZE;
222
+ const ioLimit = Math.max(1, uvThreadpool * 2);
223
+ return {
224
+ suggestedMaxJobs: Math.max(1, Math.min(cpuLimit, ioLimit)),
225
+ cpuLimit,
226
+ uvThreadpool,
227
+ ioLimit
228
+ };
229
+ }
230
+ function clampRequestedJobs(requestedJobs, limits) {
231
+ return Math.max(1, Math.min(requestedJobs, limits.suggestedMaxJobs));
232
+ }
233
+ function formatJobsAdvisoryWarning(requestedJobs, effectiveJobs, limits) {
234
+ return [
235
+ `Warning: requested --jobs=${requestedJobs} exceeds suggested host limit (${limits.suggestedMaxJobs}).`,
236
+ `Running with --jobs=${effectiveJobs} as a safety cap.`,
237
+ `Host limits: cpuLimit=${limits.cpuLimit}, uvThreadpool=${limits.uvThreadpool}, ioLimit=${limits.ioLimit}.`
238
+ ].join(" ");
239
+ }
240
+ function isResourceLimitError(error) {
241
+ if (typeof error !== "object" || error === null) return false;
242
+ const code = "code" in error ? error.code : void 0;
243
+ return code === "EMFILE" || code === "ENFILE";
244
+ }
245
+ function createResourceLimitError(path, error, requestedJobs, limits) {
246
+ const message = error instanceof Error ? error.message : String(error);
247
+ const code = typeof error === "object" && error !== null && "code" in error ? String(error.code) : "UNKNOWN";
248
+ return new Error([
249
+ `Resource limit reached while processing: ${path} (${code}: ${message}).`,
250
+ `Requested --jobs=${requestedJobs}; suggested host limit is ${limits.suggestedMaxJobs}.`,
251
+ "Reduce --jobs or raise OS file descriptor limits before retrying."
252
+ ].join(" "));
253
+ }
254
+ //#endregion
255
+ //#region src/cli/batch/jobs/load-count-worker.ts
256
+ var WorkerRouteUnavailableError = class extends Error {};
257
+ async function resolveWorkerThreadsAvailability() {
258
+ try {
259
+ return typeof (await import("node:worker_threads")).Worker === "function";
260
+ } catch {
261
+ return false;
262
+ }
263
+ }
264
+ function isFallbackFriendlyWorkerError(error) {
265
+ if (typeof error !== "object" || error === null) return false;
266
+ const code = "code" in error ? String(error.code) : "";
267
+ if (code === "ERR_WORKER_PATH" || code === "ERR_WORKER_UNSUPPORTED_EXTENSION" || code === "ERR_UNKNOWN_FILE_EXTENSION" || code === "ERR_MODULE_NOT_FOUND") return true;
268
+ const message = error instanceof Error ? error.message : String(error);
269
+ return message.includes("Unknown file extension") || message.includes("Cannot find module");
270
+ }
271
+ async function resolveWorkerRoutePreflight(env = process.env) {
272
+ const disableWorkerJobsEnv = env.WORD_COUNTER_DISABLE_WORKER_JOBS ?? null;
273
+ const workerRouteDisabledByEnv = disableWorkerJobsEnv === "1";
274
+ const workerThreadsAvailable = await resolveWorkerThreadsAvailability();
275
+ try {
276
+ return {
277
+ workerThreadsAvailable,
278
+ workerRouteDisabledByEnv,
279
+ disableWorkerJobsEnv,
280
+ workerPoolModuleLoadable: true,
281
+ workerEntryFound: (await import("./worker-pool.mjs")).resolveWorkerEntryUrl() !== null
282
+ };
283
+ } catch {
284
+ return {
285
+ workerThreadsAvailable,
286
+ workerRouteDisabledByEnv,
287
+ disableWorkerJobsEnv,
288
+ workerPoolModuleLoadable: false,
289
+ workerEntryFound: false
290
+ };
291
+ }
292
+ }
293
+ async function countBatchInputsWithWorkerJobs(filePaths, options) {
294
+ if (process.env.WORD_COUNTER_DISABLE_WORKER_JOBS === "1") throw new WorkerRouteUnavailableError("Worker route disabled by environment.");
295
+ let workerPoolModule;
296
+ try {
297
+ workerPoolModule = await import("./worker-pool.mjs");
298
+ } catch (error) {
299
+ throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
300
+ }
301
+ try {
302
+ return await workerPoolModule.countBatchInputsWithWorkerPool({
303
+ filePaths,
304
+ jobs: options.jobs,
305
+ section: options.section,
306
+ detectorMode: options.detectorMode ?? "regex",
307
+ wcOptions: options.wcOptions,
308
+ preserveCollectorSegments: options.preserveCollectorSegments,
309
+ onFileProcessed: options.onFileProcessed
310
+ });
311
+ } catch (error) {
312
+ if (error instanceof workerPoolModule.WorkerPoolTaskFatalError) {
313
+ if (error.code === "EMFILE" || error.code === "ENFILE") throw createResourceLimitError(error.path, {
314
+ code: error.code,
315
+ message: error.message
316
+ }, options.jobs, resolveBatchJobsLimit());
317
+ throw new Error(error.message);
318
+ }
319
+ if (error instanceof workerPoolModule.WorkerPoolUnavailableError || isFallbackFriendlyWorkerError(error)) throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
320
+ throw error;
321
+ }
322
+ }
323
+ //#endregion
324
+ //#region src/cli/doctor/checks.ts
325
+ const REQUIRED_NODE_RANGE = ">=20";
326
+ const REQUIRED_NODE_MAJOR = 20;
327
+ const SAMPLE_TEXT = "Hello 世界";
328
+ function normalizePackageVersion(value) {
329
+ const trimmed = value?.trim();
330
+ return trimmed && trimmed.length > 0 ? trimmed : "0.0.0";
331
+ }
332
+ function deriveBuildChannel(packageVersion) {
333
+ const prereleaseMatch = /(?:^|[.-])(alpha|beta|rc|canary)(?:[.-]|$)/i.exec(packageVersion);
334
+ if (!prereleaseMatch) return "stable";
335
+ const channel = prereleaseMatch[1]?.toLowerCase();
336
+ if (channel === "alpha" || channel === "beta" || channel === "rc" || channel === "canary") return channel;
337
+ return "stable";
338
+ }
339
+ function parseNodeMajor(version) {
340
+ const match = /^v?(\d+)(?:\.\d+){0,2}(?:[-+].*)?$/.exec(version.trim());
341
+ if (!match) return null;
342
+ const major = Number.parseInt(match[1] ?? "", 10);
343
+ return Number.isFinite(major) ? major : null;
344
+ }
345
+ function resolveRuntimeSummary(overrides = {}) {
346
+ const packageVersion = normalizePackageVersion(overrides.packageVersion ?? "0.1.5-canary.2");
347
+ const nodeVersion = overrides.nodeVersion ?? process.version;
348
+ const major = parseNodeMajor(nodeVersion);
349
+ return {
350
+ packageVersion,
351
+ buildChannel: deriveBuildChannel(packageVersion),
352
+ requiredNodeRange: REQUIRED_NODE_RANGE,
353
+ nodeVersion,
354
+ meetsProjectRequirement: major !== null && major >= REQUIRED_NODE_MAJOR,
355
+ platform: overrides.platform ?? process.platform,
356
+ arch: overrides.arch ?? process.arch
357
+ };
358
+ }
359
+ function resolveSegmenterHealth(overrides = {}) {
360
+ const Segmenter = (overrides.intl ?? Intl).Segmenter;
361
+ const available = typeof Segmenter === "function";
362
+ let wordGranularity = false;
363
+ let graphemeGranularity = false;
364
+ let sampleWordSegmentation = false;
365
+ if (!available) return {
366
+ available,
367
+ wordGranularity,
368
+ graphemeGranularity,
369
+ sampleWordSegmentation
370
+ };
371
+ try {
372
+ const wordSegmenter = new Segmenter("en", { granularity: "word" });
373
+ wordGranularity = true;
374
+ for (const _segment of wordSegmenter.segment(SAMPLE_TEXT)) {
375
+ sampleWordSegmentation = true;
376
+ break;
377
+ }
378
+ } catch {
379
+ wordGranularity = false;
380
+ sampleWordSegmentation = false;
381
+ }
382
+ try {
383
+ new Segmenter("en", { granularity: "grapheme" });
384
+ graphemeGranularity = true;
385
+ } catch {
386
+ graphemeGranularity = false;
387
+ }
388
+ return {
389
+ available,
390
+ wordGranularity,
391
+ graphemeGranularity,
392
+ sampleWordSegmentation
393
+ };
394
+ }
395
+ function collectWarnings(runtime, segmenter, workerRoute) {
396
+ const warnings = [];
397
+ if (!runtime.meetsProjectRequirement) warnings.push(`Node.js ${runtime.nodeVersion} is outside the supported range ${runtime.requiredNodeRange}.`);
398
+ if (!segmenter.available) warnings.push("Intl.Segmenter is unavailable.");
399
+ else {
400
+ if (!segmenter.wordGranularity) warnings.push("Intl.Segmenter word granularity is unusable.");
401
+ if (!segmenter.graphemeGranularity) warnings.push("Intl.Segmenter grapheme granularity is unusable.");
402
+ if (!segmenter.sampleWordSegmentation) warnings.push("Intl.Segmenter sample segmentation failed.");
403
+ }
404
+ if (!workerRoute.workerThreadsAvailable) warnings.push("Worker threads are unavailable on this runtime.");
405
+ if (workerRoute.workerRouteDisabledByEnv) warnings.push("Worker route is disabled by environment.");
406
+ if (!workerRoute.workerPoolModuleLoadable) warnings.push("Worker route preflight failed: worker-pool module could not be loaded.");
407
+ else if (!workerRoute.workerEntryFound) warnings.push("Worker route preflight failed: count-worker entry file was not found.");
408
+ return warnings;
409
+ }
410
+ function resolveStatus(segmenter, warnings) {
411
+ if (!segmenter.available || !segmenter.wordGranularity || !segmenter.graphemeGranularity || !segmenter.sampleWordSegmentation) return "fail";
412
+ if (warnings.length > 0) return "warn";
413
+ return "ok";
414
+ }
415
+ async function createDoctorReport(overrides = {}) {
416
+ const runtime = resolveRuntimeSummary(overrides);
417
+ const segmenter = resolveSegmenterHealth(overrides);
418
+ const env = overrides.env ?? process.env;
419
+ const jobs = resolveBatchJobsLimit(env);
420
+ const workerRoute = await resolveWorkerRoutePreflight(env);
421
+ const warnings = collectWarnings(runtime, segmenter, workerRoute);
422
+ return {
423
+ status: resolveStatus(segmenter, warnings),
424
+ runtime,
425
+ segmenter,
426
+ jobs,
427
+ workerRoute,
428
+ warnings
429
+ };
430
+ }
431
+ //#endregion
432
+ //#region src/cli/doctor/render.ts
433
+ function colorStatus(status) {
434
+ if (status === "ok") return import_picocolors.default.green(import_picocolors.default.bold(status));
435
+ if (status === "warn") return import_picocolors.default.yellow(import_picocolors.default.bold(status));
436
+ return import_picocolors.default.red(import_picocolors.default.bold(status));
437
+ }
438
+ function renderSection(title, lines) {
439
+ console.log(import_picocolors.default.bold(title));
440
+ for (const line of lines) console.log(`- ${line}`);
441
+ console.log("");
442
+ }
443
+ function colorBoolean(value, yes = "yes", no = "no") {
444
+ return value ? import_picocolors.default.green(yes) : import_picocolors.default.red(no);
445
+ }
446
+ function colorNumber(value) {
447
+ return import_picocolors.default.yellow(String(value));
448
+ }
449
+ function colorStatusWord(value) {
450
+ return value ? import_picocolors.default.green("ok") : import_picocolors.default.red("fail");
451
+ }
452
+ function renderStandardDoctorReport(report) {
453
+ console.log(`Doctor: ${colorStatus(report.status)}`);
454
+ console.log("");
455
+ renderSection("Runtime", [
456
+ `package: ${report.runtime.packageVersion} (${report.runtime.buildChannel})`,
457
+ `node: ${report.runtime.nodeVersion} (supported: ${colorBoolean(report.runtime.meetsProjectRequirement)}; required ${report.runtime.requiredNodeRange})`,
458
+ `platform: ${report.runtime.platform} ${report.runtime.arch}`
459
+ ]);
460
+ renderSection("Segmenter", [
461
+ `Intl.Segmenter: ${colorBoolean(report.segmenter.available, "available", "missing")}`,
462
+ `word granularity: ${colorStatusWord(report.segmenter.wordGranularity)}`,
463
+ `grapheme granularity: ${colorStatusWord(report.segmenter.graphemeGranularity)}`,
464
+ `sample segmentation: ${colorStatusWord(report.segmenter.sampleWordSegmentation)}`
465
+ ]);
466
+ renderSection("Batch jobs", [
467
+ `cpuLimit: ${colorNumber(report.jobs.cpuLimit)}`,
468
+ `uvThreadpool: ${colorNumber(report.jobs.uvThreadpool)}`,
469
+ `ioLimit: ${colorNumber(report.jobs.ioLimit)}`,
470
+ `suggestedMaxJobs: ${colorNumber(report.jobs.suggestedMaxJobs)}`
471
+ ]);
472
+ renderSection("Worker route", [
473
+ `worker threads: ${colorBoolean(report.workerRoute.workerThreadsAvailable, "available", "missing")}`,
474
+ `disabled by env: ${colorBoolean(report.workerRoute.workerRouteDisabledByEnv)}`,
475
+ `disableWorkerJobsEnv: ${report.workerRoute.disableWorkerJobsEnv ?? "null"}`,
476
+ `worker pool module: ${colorBoolean(report.workerRoute.workerPoolModuleLoadable, "loadable", "missing")}`,
477
+ `worker entry: ${colorBoolean(report.workerRoute.workerEntryFound, "found", "missing")}`
478
+ ]);
479
+ if (report.warnings.length > 0) {
480
+ console.log(import_picocolors.default.bold("Warnings"));
481
+ for (const warning of report.warnings) console.log(import_picocolors.default.yellow(`- ${warning}`));
482
+ }
483
+ }
484
+ function renderDoctorReport(report, options) {
485
+ if (options.format === "json") {
486
+ console.log(JSON.stringify(report, null, options.pretty ? 2 : 0));
487
+ return;
488
+ }
489
+ renderStandardDoctorReport(report);
490
+ }
491
+ //#endregion
492
+ //#region src/cli/doctor/run.ts
493
+ const DOCTOR_HELP_LINES = [
494
+ "Usage: word-counter doctor [options]",
495
+ "",
496
+ "report runtime diagnostics for this host",
497
+ "",
498
+ "Options:",
499
+ " --format <format> doctor output format (json)",
500
+ " --pretty pretty print doctor JSON output (default: false)",
501
+ " -h, --help display help for command"
502
+ ];
503
+ function parseDoctorFormat(rawValue) {
504
+ if (rawValue === void 0) return "standard";
505
+ if (rawValue === "json") return "json";
506
+ return null;
507
+ }
508
+ function validateDoctorInvocation(argv) {
509
+ const doctorIndex = argv.findIndex((token, index) => index >= 2 && token === "doctor");
510
+ const tokens = doctorIndex >= 0 ? argv.slice(doctorIndex + 1) : [];
511
+ let expectsFormatValue = false;
512
+ let format = "standard";
513
+ let pretty = false;
514
+ for (const token of tokens) {
515
+ if (token === "-h" || token === "--help") return {
516
+ ok: true,
517
+ help: true
518
+ };
519
+ if (expectsFormatValue) {
520
+ const parsedFormat = parseDoctorFormat(token);
521
+ if (parsedFormat === null) return {
522
+ ok: false,
523
+ message: "`doctor` only supports default text output or `--format json`."
524
+ };
525
+ format = parsedFormat;
526
+ expectsFormatValue = false;
527
+ continue;
528
+ }
529
+ if (token === "--") return {
530
+ ok: false,
531
+ message: "`doctor` does not accept positional inputs."
532
+ };
533
+ if (token === "--format") {
534
+ expectsFormatValue = true;
535
+ continue;
536
+ }
537
+ if (token.startsWith("--format=")) {
538
+ const rawValue = token.slice(9);
539
+ if (rawValue.length === 0) return {
540
+ ok: false,
541
+ message: "`--format` requires a value."
542
+ };
543
+ const parsedFormat = parseDoctorFormat(rawValue);
544
+ if (parsedFormat === null) return {
545
+ ok: false,
546
+ message: "`doctor` only supports default text output or `--format json`."
547
+ };
548
+ format = parsedFormat;
549
+ continue;
550
+ }
551
+ if (token === "--pretty") {
552
+ pretty = true;
553
+ continue;
554
+ }
555
+ if (token.startsWith("-")) return {
556
+ ok: false,
557
+ message: `\`${token}\` is not supported by \`doctor\`.`
558
+ };
559
+ return {
560
+ ok: false,
561
+ message: "`doctor` does not accept positional inputs."
562
+ };
563
+ }
564
+ if (expectsFormatValue) return {
565
+ ok: false,
566
+ message: "`--format` requires a value."
567
+ };
568
+ if (pretty && format !== "json") return {
569
+ ok: false,
570
+ message: "`--pretty` requires `--format json`."
571
+ };
572
+ return {
573
+ ok: true,
574
+ format,
575
+ pretty
576
+ };
577
+ }
578
+ function isExplicitDoctorInvocation(argv) {
579
+ if (argv[2] !== "doctor") return false;
580
+ const trailingTokens = argv.slice(3);
581
+ if (trailingTokens.length === 0) return true;
582
+ return trailingTokens.some((token) => token === "--" || token.startsWith("-"));
583
+ }
584
+ function printDoctorHelp() {
585
+ for (const line of DOCTOR_HELP_LINES) console.log(line);
586
+ }
587
+ async function executeDoctorCommand({ argv, runtime }) {
588
+ const validated = validateDoctorInvocation(argv);
589
+ if (!validated.ok) {
590
+ console.error(import_picocolors.default.red(`error: ${validated.message}`));
591
+ process.exitCode = 1;
592
+ return;
593
+ }
594
+ if ("help" in validated) {
595
+ printDoctorHelp();
596
+ process.exitCode = 0;
597
+ return;
598
+ }
599
+ const report = await createDoctorReport(runtime);
600
+ renderDoctorReport(report, {
601
+ format: validated.format,
602
+ pretty: validated.pretty
603
+ });
604
+ process.exitCode = report.status === "fail" ? 2 : 0;
605
+ }
148
606
  //#endregion
149
607
  //#region src/cli/path/filter.ts
150
608
  const DEFAULT_INCLUDE_EXTENSIONS = Object.freeze([
@@ -221,7 +679,6 @@ function shouldIncludeFromDirectoryRegex(relativePath, filter) {
221
679
  if (!filter.regex) return true;
222
680
  return filter.regex.test(relativePath);
223
681
  }
224
-
225
682
  //#endregion
226
683
  //#region src/cli/total-of.ts
227
684
  const TOTAL_OF_PARTS = Object.freeze([
@@ -316,7 +773,6 @@ function resolveTotalOfOverride(result, parts) {
316
773
  function formatTotalOfParts(parts) {
317
774
  return parts.join(", ");
318
775
  }
319
-
320
776
  //#endregion
321
777
  //#region src/cli/program/options.ts
322
778
  const MODE_CHOICES = [
@@ -326,6 +782,7 @@ const MODE_CHOICES = [
326
782
  "char",
327
783
  "char-collector"
328
784
  ];
785
+ const DETECTOR_CHOICES = ["regex", "wasm"];
329
786
  const FORMAT_CHOICES = [
330
787
  "standard",
331
788
  "raw",
@@ -353,83 +810,8 @@ function parseJobsOption(value) {
353
810
  return parsed;
354
811
  }
355
812
  function configureProgramOptions(program, parseMode) {
356
- program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--jobs <n>", "batch jobs in --path mode (1=async main-thread, >1=worker load+count)", parseJobsOption, 1).option("--print-jobs-limit", "print host jobs-limit JSON and exit (must be used alone)").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-warnings", "suppress non-fatal warning diagnostics").option("--quiet-skips", "suppress debug skip output and per-file json skipped field").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
813
+ program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--detector <mode>", "locale detector mode").choices(DETECTOR_CHOICES).default("regex")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--jobs <n>", "batch jobs in --path mode (1=async main-thread, >1=worker load+count)", parseJobsOption, 1).option("--print-jobs-limit", "print host jobs-limit JSON and exit (must be used alone)").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-warnings", "suppress non-fatal warning diagnostics").option("--quiet-skips", "suppress debug skip output and per-file json skipped field").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
357
814
  }
358
-
359
- //#endregion
360
- //#region node_modules/picocolors/picocolors.js
361
- var require_picocolors = /* @__PURE__ */ __commonJSMin(((exports, module) => {
362
- let p = process || {}, argv = p.argv || [], env = p.env || {};
363
- let isColorSupported = !(!!env.NO_COLOR || argv.includes("--no-color")) && (!!env.FORCE_COLOR || argv.includes("--color") || p.platform === "win32" || (p.stdout || {}).isTTY && env.TERM !== "dumb" || !!env.CI);
364
- let formatter = (open, close, replace = open) => (input) => {
365
- let string = "" + input, index = string.indexOf(close, open.length);
366
- return ~index ? open + replaceClose(string, close, replace, index) + close : open + string + close;
367
- };
368
- let replaceClose = (string, close, replace, index) => {
369
- let result = "", cursor = 0;
370
- do {
371
- result += string.substring(cursor, index) + replace;
372
- cursor = index + close.length;
373
- index = string.indexOf(close, cursor);
374
- } while (~index);
375
- return result + string.substring(cursor);
376
- };
377
- let createColors = (enabled = isColorSupported) => {
378
- let f = enabled ? formatter : () => String;
379
- return {
380
- isColorSupported: enabled,
381
- reset: f("\x1B[0m", "\x1B[0m"),
382
- bold: f("\x1B[1m", "\x1B[22m", "\x1B[22m\x1B[1m"),
383
- dim: f("\x1B[2m", "\x1B[22m", "\x1B[22m\x1B[2m"),
384
- italic: f("\x1B[3m", "\x1B[23m"),
385
- underline: f("\x1B[4m", "\x1B[24m"),
386
- inverse: f("\x1B[7m", "\x1B[27m"),
387
- hidden: f("\x1B[8m", "\x1B[28m"),
388
- strikethrough: f("\x1B[9m", "\x1B[29m"),
389
- black: f("\x1B[30m", "\x1B[39m"),
390
- red: f("\x1B[31m", "\x1B[39m"),
391
- green: f("\x1B[32m", "\x1B[39m"),
392
- yellow: f("\x1B[33m", "\x1B[39m"),
393
- blue: f("\x1B[34m", "\x1B[39m"),
394
- magenta: f("\x1B[35m", "\x1B[39m"),
395
- cyan: f("\x1B[36m", "\x1B[39m"),
396
- white: f("\x1B[37m", "\x1B[39m"),
397
- gray: f("\x1B[90m", "\x1B[39m"),
398
- bgBlack: f("\x1B[40m", "\x1B[49m"),
399
- bgRed: f("\x1B[41m", "\x1B[49m"),
400
- bgGreen: f("\x1B[42m", "\x1B[49m"),
401
- bgYellow: f("\x1B[43m", "\x1B[49m"),
402
- bgBlue: f("\x1B[44m", "\x1B[49m"),
403
- bgMagenta: f("\x1B[45m", "\x1B[49m"),
404
- bgCyan: f("\x1B[46m", "\x1B[49m"),
405
- bgWhite: f("\x1B[47m", "\x1B[49m"),
406
- blackBright: f("\x1B[90m", "\x1B[39m"),
407
- redBright: f("\x1B[91m", "\x1B[39m"),
408
- greenBright: f("\x1B[92m", "\x1B[39m"),
409
- yellowBright: f("\x1B[93m", "\x1B[39m"),
410
- blueBright: f("\x1B[94m", "\x1B[39m"),
411
- magentaBright: f("\x1B[95m", "\x1B[39m"),
412
- cyanBright: f("\x1B[96m", "\x1B[39m"),
413
- whiteBright: f("\x1B[97m", "\x1B[39m"),
414
- bgBlackBright: f("\x1B[100m", "\x1B[49m"),
415
- bgRedBright: f("\x1B[101m", "\x1B[49m"),
416
- bgGreenBright: f("\x1B[102m", "\x1B[49m"),
417
- bgYellowBright: f("\x1B[103m", "\x1B[49m"),
418
- bgBlueBright: f("\x1B[104m", "\x1B[49m"),
419
- bgMagentaBright: f("\x1B[105m", "\x1B[49m"),
420
- bgCyanBright: f("\x1B[106m", "\x1B[49m"),
421
- bgWhiteBright: f("\x1B[107m", "\x1B[49m")
422
- };
423
- };
424
- module.exports = createColors();
425
- module.exports.createColors = createColors;
426
- }));
427
-
428
- //#endregion
429
- //#region src/cli/program/version-embedded.ts
430
- var import_picocolors = /* @__PURE__ */ __toESM(require_picocolors(), 1);
431
- const EMBEDDED_PACKAGE_VERSION = "0.1.4";
432
-
433
815
  //#endregion
434
816
  //#region src/cli/program/version.ts
435
817
  function* candidateSearchRoots() {
@@ -462,7 +844,7 @@ function normalizeVersion(value) {
462
844
  return trimmed;
463
845
  }
464
846
  function resolvePackageVersion(options = {}) {
465
- const embeddedVersion = normalizeVersion(options.embeddedVersion ?? EMBEDDED_PACKAGE_VERSION);
847
+ const embeddedVersion = normalizeVersion(options.embeddedVersion ?? "0.1.5-canary.2");
466
848
  if (embeddedVersion) return embeddedVersion;
467
849
  const maxLevels = options.maxLevels ?? 8;
468
850
  const resolveFromPath = options.resolveFromPath ?? resolveVersionFromPath;
@@ -480,58 +862,11 @@ function getFormattedVersionLabel() {
480
862
  const version = resolvePackageVersion();
481
863
  return import_picocolors.default.bgBlack(import_picocolors.default.bold(import_picocolors.default.italic(` word-counter ${import_picocolors.default.cyanBright(`ver.${version}`)} `)));
482
864
  }
483
-
484
- //#endregion
485
- //#region src/cli/batch/jobs/limits.ts
486
- const DEFAULT_UV_THREADPOOL_SIZE = 4;
487
- function parsePositiveInteger(value) {
488
- if (!value) return;
489
- const parsed = Number.parseInt(value, 10);
490
- if (!Number.isFinite(parsed) || parsed <= 0) return;
491
- return parsed;
492
- }
493
- function resolveBatchJobsLimit(env = process.env) {
494
- const cpuLimit = Math.max(1, os.availableParallelism());
495
- const uvThreadpool = parsePositiveInteger(env.UV_THREADPOOL_SIZE) ?? DEFAULT_UV_THREADPOOL_SIZE;
496
- const ioLimit = Math.max(1, uvThreadpool * 2);
497
- return {
498
- suggestedMaxJobs: Math.max(1, Math.min(cpuLimit, ioLimit)),
499
- cpuLimit,
500
- uvThreadpool,
501
- ioLimit
502
- };
503
- }
504
- function clampRequestedJobs(requestedJobs, limits) {
505
- return Math.max(1, Math.min(requestedJobs, limits.suggestedMaxJobs));
506
- }
507
- function formatJobsAdvisoryWarning(requestedJobs, effectiveJobs, limits) {
508
- return [
509
- `Warning: requested --jobs=${requestedJobs} exceeds suggested host limit (${limits.suggestedMaxJobs}).`,
510
- `Running with --jobs=${effectiveJobs} as a safety cap.`,
511
- `Host limits: cpuLimit=${limits.cpuLimit}, uvThreadpool=${limits.uvThreadpool}, ioLimit=${limits.ioLimit}.`
512
- ].join(" ");
513
- }
514
- function isResourceLimitError(error) {
515
- if (typeof error !== "object" || error === null) return false;
516
- const code = "code" in error ? error.code : void 0;
517
- return code === "EMFILE" || code === "ENFILE";
518
- }
519
- function createResourceLimitError(path, error, requestedJobs, limits) {
520
- const message = error instanceof Error ? error.message : String(error);
521
- const code = typeof error === "object" && error !== null && "code" in error ? String(error.code) : "UNKNOWN";
522
- return new Error([
523
- `Resource limit reached while processing: ${path} (${code}: ${message}).`,
524
- `Requested --jobs=${requestedJobs}; suggested host limit is ${limits.suggestedMaxJobs}.`,
525
- "Reduce --jobs or raise OS file descriptor limits before retrying."
526
- ].join(" "));
527
- }
528
-
529
865
  //#endregion
530
866
  //#region src/utils/append-all.ts
531
867
  function appendAll(target, source) {
532
868
  for (const item of source) target.push(item);
533
869
  }
534
-
535
870
  //#endregion
536
871
  //#region src/markdown/toml/arrays.ts
537
872
  function ensureArrayContainer(result, key) {
@@ -547,7 +882,6 @@ function flattenArrayTables(result) {
547
882
  result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
548
883
  }
549
884
  }
550
-
551
885
  //#endregion
552
886
  //#region src/markdown/toml/keys.ts
553
887
  function stripKeyQuotes(key) {
@@ -566,7 +900,6 @@ function normalizeKeyPath(key) {
566
900
  if (segments.some((segment) => !segment)) return null;
567
901
  return segments.join(".");
568
902
  }
569
-
570
903
  //#endregion
571
904
  //#region src/markdown/toml/strings.ts
572
905
  function stripInlineComment(line) {
@@ -615,7 +948,6 @@ function parseStringLiteral(value) {
615
948
  if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
616
949
  return null;
617
950
  }
618
-
619
951
  //#endregion
620
952
  //#region src/markdown/toml/values.ts
621
953
  function parsePrimitive(raw) {
@@ -773,7 +1105,6 @@ function toPlainText(value) {
773
1105
  if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
774
1106
  return String(value);
775
1107
  }
776
-
777
1108
  //#endregion
778
1109
  //#region src/markdown/toml/parse-frontmatter.ts
779
1110
  function parseTomlFrontmatter(frontmatter) {
@@ -857,7 +1188,6 @@ function parseTomlFrontmatter(frontmatter) {
857
1188
  flattenArrayTables(result);
858
1189
  return result;
859
1190
  }
860
-
861
1191
  //#endregion
862
1192
  //#region src/markdown/parse-markdown.ts
863
1193
  const FENCE_TO_TYPE = {
@@ -992,7 +1322,6 @@ function parseMarkdown(input) {
992
1322
  frontmatterType: openingType
993
1323
  };
994
1324
  }
995
-
996
1325
  //#endregion
997
1326
  //#region src/wc/segmenter.ts
998
1327
  const segmenterCache = /* @__PURE__ */ new Map();
@@ -1021,7 +1350,6 @@ function countCharsForLocale(text, locale) {
1021
1350
  for (const _segment of segmenter.segment(text)) count++;
1022
1351
  return count;
1023
1352
  }
1024
-
1025
1353
  //#endregion
1026
1354
  //#region src/wc/non-words.ts
1027
1355
  const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
@@ -1135,7 +1463,6 @@ function createWhitespaceCounts() {
1135
1463
  other: 0
1136
1464
  };
1137
1465
  }
1138
-
1139
1466
  //#endregion
1140
1467
  //#region src/wc/analyze.ts
1141
1468
  function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
@@ -1235,7 +1562,6 @@ function aggregateByLocale(chunks) {
1235
1562
  }
1236
1563
  return order.map((locale) => map.get(locale));
1237
1564
  }
1238
-
1239
1565
  //#endregion
1240
1566
  //#region src/wc/mode.ts
1241
1567
  const MODE_ALIASES = {
@@ -1303,10 +1629,7 @@ function normalizeMode(input) {
1303
1629
  function resolveMode(input, fallback = "chunk") {
1304
1630
  return normalizeMode(input) ?? fallback;
1305
1631
  }
1306
-
1307
- //#endregion
1308
- //#region src/wc/latin-hints.ts
1309
- const DEFAULT_LATIN_HINT_RULES_SOURCE = [
1632
+ const DEFAULT_LATIN_HINT_RULES = Object.freeze([
1310
1633
  {
1311
1634
  tag: "de",
1312
1635
  pattern: "[äöüÄÖÜß]"
@@ -1343,9 +1666,7 @@ const DEFAULT_LATIN_HINT_RULES_SOURCE = [
1343
1666
  tag: "is",
1344
1667
  pattern: "[ðÐþÞ]"
1345
1668
  }
1346
- ];
1347
- const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
1348
-
1669
+ ].map((rule) => Object.freeze({ ...rule })));
1349
1670
  //#endregion
1350
1671
  //#region src/wc/locale-detect.ts
1351
1672
  const DEFAULT_LOCALE = "und-Latn";
@@ -1465,18 +1786,17 @@ function detectLocaleForChar(char, previousLocale, options = {}, context = resol
1465
1786
  if (regex.thai.test(char)) return "th";
1466
1787
  if (regex.han.test(char)) {
1467
1788
  if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
1468
- return context.hanHint ?? DEFAULT_HAN_TAG;
1789
+ return context.hanHint ?? "und-Hani";
1469
1790
  }
1470
1791
  if (regex.latin.test(char)) {
1471
1792
  const hintedLocale = detectLatinLocale(char, context);
1472
- if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
1473
- if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
1793
+ if (hintedLocale !== "und-Latn") return hintedLocale;
1794
+ if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== "und-Latn") return previousLocale;
1474
1795
  if (context.latinHint) return context.latinHint;
1475
1796
  return DEFAULT_LOCALE;
1476
1797
  }
1477
1798
  return null;
1478
1799
  }
1479
-
1480
1800
  //#endregion
1481
1801
  //#region src/wc/segment.ts
1482
1802
  const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
@@ -1513,7 +1833,7 @@ function segmentTextByLocale(text, options = {}) {
1513
1833
  continue;
1514
1834
  }
1515
1835
  if (targetLocale !== currentLocale && detected !== null) {
1516
- if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale, context)) {
1836
+ if (currentLocale === "und-Latn" && isLatinLocale(targetLocale, context)) {
1517
1837
  const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
1518
1838
  if (promotionBreakIndex === -1) {
1519
1839
  currentLocale = targetLocale;
@@ -1544,58 +1864,249 @@ function segmentTextByLocale(text, options = {}) {
1544
1864
  updateCarryBoundaryState(detected, char);
1545
1865
  continue;
1546
1866
  }
1547
- buffer += char;
1548
- if (detected !== null) bufferHasScript = true;
1549
- updateCarryBoundaryState(detected, char);
1867
+ buffer += char;
1868
+ if (detected !== null) bufferHasScript = true;
1869
+ updateCarryBoundaryState(detected, char);
1870
+ }
1871
+ if (buffer.length > 0) chunks.push({
1872
+ locale: currentLocale,
1873
+ text: buffer
1874
+ });
1875
+ return mergeAdjacentChunks(chunks);
1876
+ }
1877
+ function findLastLatinPromotionBreakIndex(buffer) {
1878
+ for (let index = buffer.length - 1; index >= 0; index -= 1) {
1879
+ const char = buffer[index];
1880
+ if (!char) continue;
1881
+ if (LATIN_PROMOTION_BREAK_REGEX.test(char)) return index;
1882
+ }
1883
+ return -1;
1884
+ }
1885
+ function mergeAdjacentChunks(chunks) {
1886
+ if (chunks.length === 0) return chunks;
1887
+ const merged = [];
1888
+ let last = chunks[0];
1889
+ for (let i = 1; i < chunks.length; i++) {
1890
+ const chunk = chunks[i];
1891
+ if (chunk.locale === last.locale) last = {
1892
+ locale: last.locale,
1893
+ text: last.text + chunk.text
1894
+ };
1895
+ else {
1896
+ merged.push(last);
1897
+ last = chunk;
1898
+ }
1899
+ }
1900
+ merged.push(last);
1901
+ return merged;
1902
+ }
1903
+ //#endregion
1904
+ //#region src/wc/wc.ts
1905
+ function wordCounter(text, options = {}) {
1906
+ const mode = resolveMode(options.mode, "chunk");
1907
+ const collectNonWords = Boolean(options.nonWords);
1908
+ const includeWhitespace = Boolean(options.includeWhitespace);
1909
+ const chunks = segmentTextByLocale(text, {
1910
+ latinLanguageHint: options.latinLanguageHint,
1911
+ latinTagHint: options.latinTagHint,
1912
+ latinLocaleHint: options.latinLocaleHint,
1913
+ latinHintRules: options.latinHintRules,
1914
+ useDefaultLatinHints: options.useDefaultLatinHints,
1915
+ hanLanguageHint: options.hanLanguageHint,
1916
+ hanTagHint: options.hanTagHint
1917
+ });
1918
+ if (mode === "char" || mode === "char-collector") {
1919
+ const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
1920
+ const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
1921
+ const counts = collectNonWords ? {
1922
+ words: analyzed.reduce((sum, chunk) => sum + chunk.wordChars, 0),
1923
+ nonWords: analyzed.reduce((sum, chunk) => sum + chunk.nonWordChars, 0),
1924
+ total
1925
+ } : void 0;
1926
+ if (mode === "char") return {
1927
+ total,
1928
+ counts,
1929
+ breakdown: {
1930
+ mode,
1931
+ items: analyzed.map((chunk) => ({
1932
+ locale: chunk.locale,
1933
+ text: chunk.text,
1934
+ chars: chunk.chars,
1935
+ nonWords: chunk.nonWords
1936
+ }))
1937
+ }
1938
+ };
1939
+ return {
1940
+ total,
1941
+ counts,
1942
+ breakdown: {
1943
+ mode,
1944
+ items: aggregateCharsByLocale(analyzed).map((chunk) => ({
1945
+ locale: chunk.locale,
1946
+ chars: chunk.chars,
1947
+ nonWords: chunk.nonWords
1948
+ }))
1949
+ }
1950
+ };
1951
+ }
1952
+ const analyzed = chunks.map((chunk) => analyzeChunk(chunk, collectNonWords, includeWhitespace));
1953
+ const wordsTotal = analyzed.reduce((sum, chunk) => sum + chunk.words, 0);
1954
+ const nonWordsTotal = collectNonWords ? analyzed.reduce((sum, chunk) => {
1955
+ if (!chunk.nonWords) return sum;
1956
+ return sum + getNonWordTotal$1(chunk.nonWords);
1957
+ }, 0) : 0;
1958
+ const total = analyzed.reduce((sum, chunk) => {
1959
+ let chunkTotal = chunk.words;
1960
+ if (collectNonWords && chunk.nonWords) chunkTotal += getNonWordTotal$1(chunk.nonWords);
1961
+ return sum + chunkTotal;
1962
+ }, 0);
1963
+ const counts = collectNonWords ? {
1964
+ words: wordsTotal,
1965
+ nonWords: nonWordsTotal,
1966
+ total
1967
+ } : void 0;
1968
+ if (mode === "segments") return {
1969
+ total,
1970
+ counts,
1971
+ breakdown: {
1972
+ mode,
1973
+ items: analyzed.map((chunk) => ({
1974
+ locale: chunk.locale,
1975
+ text: chunk.text,
1976
+ words: chunk.words,
1977
+ segments: chunk.segments,
1978
+ nonWords: chunk.nonWords
1979
+ }))
1980
+ }
1981
+ };
1982
+ if (mode === "collector") return {
1983
+ total,
1984
+ counts,
1985
+ breakdown: {
1986
+ mode,
1987
+ items: aggregateByLocale(analyzed),
1988
+ nonWords: collectNonWordsAggregate$1(analyzed, collectNonWords)
1989
+ }
1990
+ };
1991
+ return {
1992
+ total,
1993
+ counts,
1994
+ breakdown: {
1995
+ mode,
1996
+ items: analyzed.map((chunk) => ({
1997
+ locale: chunk.locale,
1998
+ text: chunk.text,
1999
+ words: chunk.words,
2000
+ nonWords: chunk.nonWords
2001
+ }))
2002
+ }
2003
+ };
2004
+ }
2005
+ function getNonWordTotal$1(nonWords) {
2006
+ return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
2007
+ }
2008
+ function collectNonWordsAggregate$1(analyzed, enabled) {
2009
+ if (!enabled) return;
2010
+ const collection = createNonWordCollection();
2011
+ for (const chunk of analyzed) {
2012
+ if (!chunk.nonWords) continue;
2013
+ mergeNonWordCollections(collection, chunk.nonWords);
1550
2014
  }
1551
- if (buffer.length > 0) chunks.push({
1552
- locale: currentLocale,
1553
- text: buffer
1554
- });
1555
- return mergeAdjacentChunks(chunks);
2015
+ return collection;
1556
2016
  }
1557
- function findLastLatinPromotionBreakIndex(buffer) {
1558
- for (let index = buffer.length - 1; index >= 0; index -= 1) {
1559
- const char = buffer[index];
1560
- if (!char) continue;
1561
- if (LATIN_PROMOTION_BREAK_REGEX.test(char)) return index;
2017
+ //#endregion
2018
+ //#region src/wc/index.ts
2019
+ var wc_default = wordCounter;
2020
+ //#endregion
2021
+ //#region src/markdown/section-count.ts
2022
+ function normalizeText$1(value) {
2023
+ if (value == null) return "";
2024
+ if (typeof value === "string") return value;
2025
+ if (typeof value === "number" || typeof value === "boolean") return String(value);
2026
+ try {
2027
+ return JSON.stringify(value);
2028
+ } catch {
2029
+ return String(value);
1562
2030
  }
1563
- return -1;
1564
2031
  }
1565
- function mergeAdjacentChunks(chunks) {
1566
- if (chunks.length === 0) return chunks;
1567
- const merged = [];
1568
- let last = chunks[0];
1569
- for (let i = 1; i < chunks.length; i++) {
1570
- const chunk = chunks[i];
1571
- if (chunk.locale === last.locale) last = {
1572
- locale: last.locale,
1573
- text: last.text + chunk.text
2032
+ function buildPerKeyItems$1(data, mode, options) {
2033
+ if (!data || typeof data !== "object" || Array.isArray(data)) return [];
2034
+ return Object.entries(data).map(([key, value]) => {
2035
+ const valueText = normalizeText$1(value);
2036
+ return {
2037
+ name: key,
2038
+ source: "frontmatter",
2039
+ result: wc_default(valueText ? `${key}: ${valueText}` : key, options)
2040
+ };
2041
+ });
2042
+ }
2043
+ function buildSingleItem$1(name, text, mode, options, source) {
2044
+ return [{
2045
+ name,
2046
+ source,
2047
+ result: wc_default(text, options)
2048
+ }];
2049
+ }
2050
+ function sumTotals$1(items) {
2051
+ return items.reduce((sum, item) => sum + item.result.total, 0);
2052
+ }
2053
+ function countSections(input, section, options = {}) {
2054
+ const mode = options.mode ?? "chunk";
2055
+ if (section === "all") {
2056
+ const result = wc_default(input, options);
2057
+ return {
2058
+ section,
2059
+ total: result.total,
2060
+ frontmatterType: null,
2061
+ items: [{
2062
+ name: "all",
2063
+ source: "content",
2064
+ result
2065
+ }]
1574
2066
  };
1575
- else {
1576
- merged.push(last);
1577
- last = chunk;
1578
- }
1579
2067
  }
1580
- merged.push(last);
1581
- return merged;
2068
+ const parsed = parseMarkdown(input);
2069
+ const frontmatterText = parsed.frontmatter ?? "";
2070
+ const contentText = parsed.content ?? "";
2071
+ let items = [];
2072
+ if (section === "frontmatter") items = buildSingleItem$1("frontmatter", frontmatterText, mode, options, "frontmatter");
2073
+ else if (section === "content") items = buildSingleItem$1("content", contentText, mode, options, "content");
2074
+ else if (section === "split") items = [...buildSingleItem$1("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem$1("content", contentText, mode, options, "content")];
2075
+ else if (section === "per-key") items = buildPerKeyItems$1(parsed.data, mode, options);
2076
+ else if (section === "split-per-key") items = [...buildPerKeyItems$1(parsed.data, mode, options), ...buildSingleItem$1("content", contentText, mode, options, "content")];
2077
+ return {
2078
+ section,
2079
+ total: sumTotals$1(items),
2080
+ frontmatterType: parsed.frontmatterType,
2081
+ items
2082
+ };
1582
2083
  }
1583
-
1584
2084
  //#endregion
1585
- //#region src/wc/wc.ts
1586
- function wordCounter(text, options = {}) {
2085
+ //#region src/detector/none.ts
2086
+ async function wordCounterWithRegexDetector(text, options = {}) {
2087
+ return wc_default(text, options);
2088
+ }
2089
+ async function countSectionsWithRegexDetector(input, section, options = {}) {
2090
+ return countSections(input, section, options);
2091
+ }
2092
+ //#endregion
2093
+ //#region src/detector/result-builder.ts
2094
+ function getNonWordTotal(nonWords) {
2095
+ return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
2096
+ }
2097
+ function collectNonWordsAggregate(analyzed, enabled) {
2098
+ if (!enabled) return;
2099
+ const collection = createNonWordCollection();
2100
+ for (const chunk of analyzed) {
2101
+ if (!chunk.nonWords) continue;
2102
+ mergeNonWordCollections(collection, chunk.nonWords);
2103
+ }
2104
+ return collection;
2105
+ }
2106
+ function buildWordCounterResultFromChunks(chunks, options = {}) {
1587
2107
  const mode = resolveMode(options.mode, "chunk");
1588
2108
  const collectNonWords = Boolean(options.nonWords);
1589
2109
  const includeWhitespace = Boolean(options.includeWhitespace);
1590
- const chunks = segmentTextByLocale(text, {
1591
- latinLanguageHint: options.latinLanguageHint,
1592
- latinTagHint: options.latinTagHint,
1593
- latinLocaleHint: options.latinLocaleHint,
1594
- latinHintRules: options.latinHintRules,
1595
- useDefaultLatinHints: options.useDefaultLatinHints,
1596
- hanLanguageHint: options.hanLanguageHint,
1597
- hanTagHint: options.hanTagHint
1598
- });
1599
2110
  if (mode === "char" || mode === "char-collector") {
1600
2111
  const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
1601
2112
  const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
@@ -1683,25 +2194,8 @@ function wordCounter(text, options = {}) {
1683
2194
  }
1684
2195
  };
1685
2196
  }
1686
- function getNonWordTotal(nonWords) {
1687
- return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
1688
- }
1689
- function collectNonWordsAggregate(analyzed, enabled) {
1690
- if (!enabled) return;
1691
- const collection = createNonWordCollection();
1692
- for (const chunk of analyzed) {
1693
- if (!chunk.nonWords) continue;
1694
- mergeNonWordCollections(collection, chunk.nonWords);
1695
- }
1696
- return collection;
1697
- }
1698
-
1699
- //#endregion
1700
- //#region src/wc/index.ts
1701
- var wc_default = wordCounter;
1702
-
1703
2197
  //#endregion
1704
- //#region src/markdown/section-count.ts
2198
+ //#region src/detector/sections.ts
1705
2199
  function normalizeText(value) {
1706
2200
  if (value == null) return "";
1707
2201
  if (typeof value === "string") return value;
@@ -1712,31 +2206,31 @@ function normalizeText(value) {
1712
2206
  return String(value);
1713
2207
  }
1714
2208
  }
1715
- function buildPerKeyItems(data, mode, options) {
2209
+ async function buildPerKeyItems(data, options) {
1716
2210
  if (!data || typeof data !== "object" || Array.isArray(data)) return [];
1717
- return Object.entries(data).map(([key, value]) => {
2211
+ return Promise.all(Object.entries(data).map(async ([key, value]) => {
1718
2212
  const valueText = normalizeText(value);
1719
2213
  return {
1720
2214
  name: key,
1721
2215
  source: "frontmatter",
1722
- result: wc_default(valueText ? `${key}: ${valueText}` : key, options)
2216
+ result: await wordCounterWithDetector(valueText ? `${key}: ${valueText}` : key, options)
1723
2217
  };
1724
- });
2218
+ }));
1725
2219
  }
1726
- function buildSingleItem(name, text, mode, options, source) {
2220
+ async function buildSingleItem(name, text, options, source) {
1727
2221
  return [{
1728
2222
  name,
1729
2223
  source,
1730
- result: wc_default(text, options)
2224
+ result: await wordCounterWithDetector(text, options)
1731
2225
  }];
1732
2226
  }
1733
2227
  function sumTotals(items) {
1734
2228
  return items.reduce((sum, item) => sum + item.result.total, 0);
1735
2229
  }
1736
- function countSections(input, section, options = {}) {
1737
- const mode = options.mode ?? "chunk";
2230
+ async function countSectionsWithResolvedDetector(input, section, options = {}) {
2231
+ options.mode;
1738
2232
  if (section === "all") {
1739
- const result = wc_default(input, options);
2233
+ const result = await wordCounterWithDetector(input, options);
1740
2234
  return {
1741
2235
  section,
1742
2236
  total: result.total,
@@ -1752,11 +2246,11 @@ function countSections(input, section, options = {}) {
1752
2246
  const frontmatterText = parsed.frontmatter ?? "";
1753
2247
  const contentText = parsed.content ?? "";
1754
2248
  let items = [];
1755
- if (section === "frontmatter") items = buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter");
1756
- else if (section === "content") items = buildSingleItem("content", contentText, mode, options, "content");
1757
- else if (section === "split") items = [...buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem("content", contentText, mode, options, "content")];
1758
- else if (section === "per-key") items = buildPerKeyItems(parsed.data, mode, options);
1759
- else if (section === "split-per-key") items = [...buildPerKeyItems(parsed.data, mode, options), ...buildSingleItem("content", contentText, mode, options, "content")];
2249
+ if (section === "frontmatter") items = await buildSingleItem("frontmatter", frontmatterText, options, "frontmatter");
2250
+ else if (section === "content") items = await buildSingleItem("content", contentText, options, "content");
2251
+ else if (section === "split") items = [...await buildSingleItem("frontmatter", frontmatterText, options, "frontmatter"), ...await buildSingleItem("content", contentText, options, "content")];
2252
+ else if (section === "per-key") items = await buildPerKeyItems(parsed.data, options);
2253
+ else if (section === "split-per-key") items = [...await buildPerKeyItems(parsed.data, options), ...await buildSingleItem("content", contentText, options, "content")];
1760
2254
  return {
1761
2255
  section,
1762
2256
  total: sumTotals(items),
@@ -1764,7 +2258,207 @@ function countSections(input, section, options = {}) {
1764
2258
  items
1765
2259
  };
1766
2260
  }
1767
-
2261
+ const LATIN_WASM_MIN_CONFIDENCE = .75;
2262
+ const HANI_WASM_MIN_CONFIDENCE = .9;
2263
+ const LATIN_SCRIPT_REGEX = /\p{Script=Latin}/u;
2264
+ const HAN_SCRIPT_REGEX = /\p{Script=Han}/u;
2265
+ const DETECTOR_ROUTE_POLICIES = {
2266
+ [DEFAULT_LOCALE]: {
2267
+ routeTag: DEFAULT_LOCALE,
2268
+ minScriptChars: 24,
2269
+ minConfidence: LATIN_WASM_MIN_CONFIDENCE,
2270
+ requireReliable: true
2271
+ },
2272
+ [DEFAULT_HAN_TAG]: {
2273
+ routeTag: DEFAULT_HAN_TAG,
2274
+ minScriptChars: 12,
2275
+ minConfidence: HANI_WASM_MIN_CONFIDENCE,
2276
+ requireReliable: true
2277
+ }
2278
+ };
2279
+ function isAmbiguousDetectorRoute(locale) {
2280
+ return locale === "und-Latn" || locale === "und-Hani";
2281
+ }
2282
+ function countScriptBearingCharsForRoute(text, routeTag) {
2283
+ const matcher = routeTag === "und-Hani" ? HAN_SCRIPT_REGEX : LATIN_SCRIPT_REGEX;
2284
+ let count = 0;
2285
+ for (const char of text) if (matcher.test(char)) count += 1;
2286
+ return count;
2287
+ }
2288
+ function shouldRunWasmDetector(text, routeTag) {
2289
+ const policy = DETECTOR_ROUTE_POLICIES[routeTag];
2290
+ return countScriptBearingCharsForRoute(text, routeTag) >= policy.minScriptChars;
2291
+ }
2292
+ function normalizeDetectorSampleForRoute(text, routeTag) {
2293
+ const matcher = routeTag === "und-Hani" ? HAN_SCRIPT_REGEX : LATIN_SCRIPT_REGEX;
2294
+ return [...text].map((char) => {
2295
+ if (matcher.test(char)) return char;
2296
+ if (/\s/u.test(char)) return " ";
2297
+ return " ";
2298
+ }).join("").replace(/\s+/g, " ").trim();
2299
+ }
2300
+ //#endregion
2301
+ //#region src/detector/whatlang-wasm.ts
2302
+ const GENERATED_FOLDER_NAME = "wasm-language-detector";
2303
+ const GENERATED_MODULE_FILE = "language_detector.js";
2304
+ const MAX_SEARCH_DEPTH = 8;
2305
+ const requireFromHere = createRequire(import.meta.url);
2306
+ const WASM_DETECTOR_RUNTIME_UNAVAILABLE_MESSAGE = "WASM detector runtime is unavailable. Run `bun run build:wasm` to generate it.";
2307
+ let modulePromise = null;
2308
+ function resolveCandidateModulePaths() {
2309
+ const moduleDir = dirname(fileURLToPath(import.meta.url));
2310
+ const candidates = /* @__PURE__ */ new Set();
2311
+ let currentDir = moduleDir;
2312
+ for (let depth = 0; depth < MAX_SEARCH_DEPTH; depth += 1) {
2313
+ candidates.add(join(currentDir, GENERATED_FOLDER_NAME, GENERATED_MODULE_FILE));
2314
+ candidates.add(join(currentDir, "generated", GENERATED_FOLDER_NAME, GENERATED_MODULE_FILE));
2315
+ const parentDir = dirname(currentDir);
2316
+ if (parentDir === currentDir) break;
2317
+ currentDir = parentDir;
2318
+ }
2319
+ return [...candidates];
2320
+ }
2321
+ function resolveWhatlangWasmModulePath() {
2322
+ for (const candidate of resolveCandidateModulePaths()) if (existsSync(candidate)) return candidate;
2323
+ throw new Error(WASM_DETECTOR_RUNTIME_UNAVAILABLE_MESSAGE);
2324
+ }
2325
+ async function loadWhatlangWasmModule() {
2326
+ if (!modulePromise) modulePromise = (async () => {
2327
+ return requireFromHere(resolveWhatlangWasmModulePath());
2328
+ })();
2329
+ return modulePromise;
2330
+ }
2331
+ async function detectWithWhatlangWasm(text, routeTag) {
2332
+ return (await loadWhatlangWasmModule()).detect_language(text, routeTag);
2333
+ }
2334
+ //#endregion
2335
+ //#region src/detector/whatlang-map.ts
2336
+ const LATIN_LANGUAGE_TAGS = {
2337
+ cat: "ca",
2338
+ ces: "cs",
2339
+ dan: "da",
2340
+ deu: "de",
2341
+ eng: "en",
2342
+ fin: "fi",
2343
+ fra: "fr",
2344
+ hun: "hu",
2345
+ ita: "it",
2346
+ lat: "la",
2347
+ nld: "nl",
2348
+ pol: "pl",
2349
+ por: "pt",
2350
+ ron: "ro",
2351
+ spa: "es",
2352
+ swe: "sv",
2353
+ tur: "tr"
2354
+ };
2355
+ const HANI_LANGUAGE_TAGS = {
2356
+ cmn: "zh",
2357
+ jpn: "ja"
2358
+ };
2359
+ function hasSupportedScript(result, routeTag) {
2360
+ if (routeTag === "und-Latn") return result.script === "Latin";
2361
+ return result.script === "Mandarin";
2362
+ }
2363
+ function remapLanguageTag(lang, routeTag) {
2364
+ if (routeTag === "und-Latn") return LATIN_LANGUAGE_TAGS[lang];
2365
+ return HANI_LANGUAGE_TAGS[lang];
2366
+ }
2367
+ function remapWhatlangResult(result, routeTag) {
2368
+ if (!hasSupportedScript(result, routeTag)) return null;
2369
+ const tag = remapLanguageTag(result.lang, routeTag);
2370
+ if (!tag) return null;
2371
+ return {
2372
+ tag,
2373
+ confidence: result.confidence,
2374
+ reliable: result.reliable,
2375
+ source: "wasm"
2376
+ };
2377
+ }
2378
+ function getDetectorFallbackTag(routeTag) {
2379
+ return routeTag === "und-Hani" ? DEFAULT_HAN_TAG : DEFAULT_LOCALE;
2380
+ }
2381
+ //#endregion
2382
+ //#region src/detector/wasm.ts
2383
+ function shouldAcceptDetectorTag(routeTag, confidence, reliable) {
2384
+ const policy = DETECTOR_ROUTE_POLICIES[routeTag];
2385
+ if (policy.requireReliable && reliable !== true) return false;
2386
+ if (confidence === void 0) return false;
2387
+ return confidence >= policy.minConfidence;
2388
+ }
2389
+ function buildDetectorWindows(chunks) {
2390
+ const windows = [];
2391
+ for (let index = 0; index < chunks.length; index += 1) {
2392
+ const chunk = chunks[index];
2393
+ if (!chunk || !isAmbiguousDetectorRoute(chunk.locale)) continue;
2394
+ const previousWindow = windows[windows.length - 1];
2395
+ if (previousWindow && previousWindow.routeTag === chunk.locale && previousWindow.endIndex === index - 1) {
2396
+ previousWindow.endIndex = index;
2397
+ previousWindow.text += chunk.text;
2398
+ continue;
2399
+ }
2400
+ windows.push({
2401
+ routeTag: chunk.locale,
2402
+ startIndex: index,
2403
+ endIndex: index,
2404
+ text: chunk.text
2405
+ });
2406
+ }
2407
+ return windows;
2408
+ }
2409
+ async function resolveWindowLocale(window) {
2410
+ if (!shouldRunWasmDetector(window.text, window.routeTag)) return window.routeTag;
2411
+ const rawResult = await detectWithWhatlangWasm(window.text, window.routeTag);
2412
+ const rawRemapped = rawResult ? remapWhatlangResult(rawResult, window.routeTag) : null;
2413
+ const normalizedSample = normalizeDetectorSampleForRoute(window.text, window.routeTag);
2414
+ const normalizedResult = normalizedSample.length > 0 && normalizedSample !== window.text ? await detectWithWhatlangWasm(normalizedSample, window.routeTag) : null;
2415
+ const normalizedRemapped = normalizedResult ? remapWhatlangResult(normalizedResult, window.routeTag) : null;
2416
+ const candidates = [rawRemapped, normalizedRemapped].filter((value) => value !== null);
2417
+ if (candidates.length === 0) return getDetectorFallbackTag(window.routeTag);
2418
+ const strongestCandidate = candidates.reduce((best, current) => {
2419
+ if (!best) return current;
2420
+ return (current.confidence ?? 0) > (best.confidence ?? 0) ? current : best;
2421
+ }, candidates[0]);
2422
+ if (strongestCandidate && shouldAcceptDetectorTag(window.routeTag, strongestCandidate.confidence, strongestCandidate.reliable)) return strongestCandidate.tag;
2423
+ if (window.routeTag === "und-Latn" && rawRemapped && normalizedRemapped && rawRemapped.tag === normalizedRemapped.tag) {
2424
+ if (Math.max(rawRemapped.confidence ?? 0, normalizedRemapped.confidence ?? 0) >= .7) return rawRemapped.tag;
2425
+ }
2426
+ return getDetectorFallbackTag(window.routeTag);
2427
+ }
2428
+ async function segmentTextByLocaleWithWasmDetector(text, options = {}) {
2429
+ const chunks = segmentTextByLocale(text, options);
2430
+ const resolved = [...chunks];
2431
+ const windows = buildDetectorWindows(chunks);
2432
+ for (const window of windows) {
2433
+ const resolvedLocale = await resolveWindowLocale(window);
2434
+ for (let index = window.startIndex; index <= window.endIndex; index += 1) {
2435
+ const chunk = resolved[index];
2436
+ if (!chunk) continue;
2437
+ resolved[index] = {
2438
+ ...chunk,
2439
+ locale: resolvedLocale
2440
+ };
2441
+ }
2442
+ }
2443
+ return resolved;
2444
+ }
2445
+ async function wordCounterWithWasmDetector(text, options = {}) {
2446
+ return buildWordCounterResultFromChunks(await segmentTextByLocaleWithWasmDetector(text, options), options);
2447
+ }
2448
+ async function countSectionsWithWasmDetector(input, section, options = {}) {
2449
+ return countSectionsWithResolvedDetector(input, section, options);
2450
+ }
2451
+ function resolveDetectorMode(mode) {
2452
+ return mode ?? "regex";
2453
+ }
2454
+ async function wordCounterWithDetector(text, options = {}) {
2455
+ if (resolveDetectorMode(options.detector) === "wasm") return wordCounterWithWasmDetector(text, options);
2456
+ return wordCounterWithRegexDetector(text, options);
2457
+ }
2458
+ async function countSectionsWithDetector(input, section, options = {}) {
2459
+ if (resolveDetectorMode(options.detector) === "wasm") return countSectionsWithWasmDetector(input, section, options);
2460
+ return countSectionsWithRegexDetector(input, section, options);
2461
+ }
1768
2462
  //#endregion
1769
2463
  //#region src/cli/batch/aggregate.ts
1770
2464
  function mergeWordCounterResult(left, right, preserveCollectorSegments) {
@@ -1978,7 +2672,6 @@ function finalizeBatchSummaryFromFileResults(files, section, wcOptions, options
1978
2672
  aggregate: section === "all" ? aggregateWordCounterResults(files.map((file) => file.result), preserveCollectorSegments) : aggregateSectionedResults(files.map((file) => file.result), preserveCollectorSegments)
1979
2673
  };
1980
2674
  }
1981
-
1982
2675
  //#endregion
1983
2676
  //#region src/cli/batch/jobs/queue.ts
1984
2677
  async function runBoundedQueue(total, requestedJobs, worker) {
@@ -1998,7 +2691,6 @@ async function runBoundedQueue(total, requestedJobs, worker) {
1998
2691
  await Promise.all(Array.from({ length: concurrency }, () => runWorker()));
1999
2692
  return results;
2000
2693
  }
2001
-
2002
2694
  //#endregion
2003
2695
  //#region src/cli/path/load.ts
2004
2696
  function isProbablyBinary(buffer) {
@@ -2015,7 +2707,6 @@ function isProbablyBinary(buffer) {
2015
2707
  }
2016
2708
  return suspicious / sampleSize > .3;
2017
2709
  }
2018
-
2019
2710
  //#endregion
2020
2711
  //#region src/cli/batch/jobs/read-input.ts
2021
2712
  async function readBatchInput(path, options) {
@@ -2046,10 +2737,10 @@ async function readBatchInput(path, options) {
2046
2737
  content: buffer.toString("utf8")
2047
2738
  };
2048
2739
  }
2049
-
2050
2740
  //#endregion
2051
2741
  //#region src/cli/batch/jobs/load-count.ts
2052
2742
  async function countBatchInputsWithJobs(filePaths, options) {
2743
+ const detectorMode = options.detectorMode ?? "regex";
2053
2744
  const limits = resolveBatchJobsLimit();
2054
2745
  const total = filePaths.length;
2055
2746
  let completed = 0;
@@ -2072,7 +2763,13 @@ async function countBatchInputsWithJobs(filePaths, options) {
2072
2763
  }
2073
2764
  };
2074
2765
  }
2075
- const result = options.section === "all" ? wc_default(loaded.content, options.wcOptions) : countSections(loaded.content, options.section, options.wcOptions);
2766
+ const result = detectorMode === "regex" ? options.section === "all" ? wc_default(loaded.content, options.wcOptions) : countSections(loaded.content, options.section, options.wcOptions) : options.section === "all" ? await wordCounterWithDetector(loaded.content, {
2767
+ ...options.wcOptions,
2768
+ detector: detectorMode
2769
+ }) : await countSectionsWithDetector(loaded.content, options.section, {
2770
+ ...options.wcOptions,
2771
+ detector: detectorMode
2772
+ });
2076
2773
  if (!options.preserveCollectorSegments) compactCollectorSegmentsInCountResult(result);
2077
2774
  completed += 1;
2078
2775
  options.onFileProcessed?.({
@@ -2101,47 +2798,6 @@ async function countBatchInputsWithJobs(filePaths, options) {
2101
2798
  skipped
2102
2799
  };
2103
2800
  }
2104
-
2105
- //#endregion
2106
- //#region src/cli/batch/jobs/load-count-worker.ts
2107
- var WorkerRouteUnavailableError = class extends Error {};
2108
- function isFallbackFriendlyWorkerError(error) {
2109
- if (typeof error !== "object" || error === null) return false;
2110
- const code = "code" in error ? String(error.code) : "";
2111
- if (code === "ERR_WORKER_PATH" || code === "ERR_WORKER_UNSUPPORTED_EXTENSION" || code === "ERR_UNKNOWN_FILE_EXTENSION" || code === "ERR_MODULE_NOT_FOUND") return true;
2112
- const message = error instanceof Error ? error.message : String(error);
2113
- return message.includes("Unknown file extension") || message.includes("Cannot find module");
2114
- }
2115
- async function countBatchInputsWithWorkerJobs(filePaths, options) {
2116
- if (process.env.WORD_COUNTER_DISABLE_WORKER_JOBS === "1" || process.env.WORD_COUNTER_DISABLE_EXPERIMENTAL_WORKERS === "1") throw new WorkerRouteUnavailableError("Worker route disabled by environment.");
2117
- let workerPoolModule;
2118
- try {
2119
- workerPoolModule = await import("./worker-pool.mjs");
2120
- } catch (error) {
2121
- throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
2122
- }
2123
- try {
2124
- return await workerPoolModule.countBatchInputsWithWorkerPool({
2125
- filePaths,
2126
- jobs: options.jobs,
2127
- section: options.section,
2128
- wcOptions: options.wcOptions,
2129
- preserveCollectorSegments: options.preserveCollectorSegments,
2130
- onFileProcessed: options.onFileProcessed
2131
- });
2132
- } catch (error) {
2133
- if (error instanceof workerPoolModule.WorkerPoolTaskFatalError) {
2134
- if (error.code === "EMFILE" || error.code === "ENFILE") throw createResourceLimitError(error.path, {
2135
- code: error.code,
2136
- message: error.message
2137
- }, options.jobs, resolveBatchJobsLimit());
2138
- throw new Error(error.message);
2139
- }
2140
- if (error instanceof workerPoolModule.WorkerPoolUnavailableError || isFallbackFriendlyWorkerError(error)) throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
2141
- throw error;
2142
- }
2143
- }
2144
-
2145
2801
  //#endregion
2146
2802
  //#region src/cli/batch/jobs/render.ts
2147
2803
  function finalizeBatchJobsSummary(files, section, wcOptions, options = {}) {
@@ -2150,7 +2806,6 @@ function finalizeBatchJobsSummary(files, section, wcOptions, options = {}) {
2150
2806
  preserveCollectorSegments: options.preserveCollectorSegments
2151
2807
  });
2152
2808
  }
2153
-
2154
2809
  //#endregion
2155
2810
  //#region src/cli/path/resolve.ts
2156
2811
  async function expandDirectory(rootPath, directoryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats) {
@@ -2353,7 +3008,6 @@ async function resolveBatchFilePaths(pathInputs, options) {
2353
3008
  skipped
2354
3009
  };
2355
3010
  }
2356
-
2357
3011
  //#endregion
2358
3012
  //#region src/cli/progress/reporter.ts
2359
3013
  const PROGRESS_BAR_WIDTH = 20;
@@ -2457,7 +3111,6 @@ function createBatchProgressReporter(options) {
2457
3111
  }
2458
3112
  };
2459
3113
  }
2460
-
2461
3114
  //#endregion
2462
3115
  //#region src/cli/batch/run.ts
2463
3116
  async function runBatchCount(options) {
@@ -2521,6 +3174,7 @@ async function runBatchCount(options) {
2521
3174
  counted = await countBatchInputsWithWorkerJobs(resolved.files, {
2522
3175
  jobs: options.jobs,
2523
3176
  section: options.section,
3177
+ detectorMode: options.wcOptions.detector ?? "regex",
2524
3178
  wcOptions: options.wcOptions,
2525
3179
  preserveCollectorSegments: options.preserveCollectorSegments,
2526
3180
  onFileProcessed: (snapshot) => {
@@ -2544,6 +3198,7 @@ async function runBatchCount(options) {
2544
3198
  counted = await countBatchInputsWithJobs(resolved.files, {
2545
3199
  jobs: options.jobs,
2546
3200
  section: options.section,
3201
+ detectorMode: options.wcOptions.detector ?? "regex",
2547
3202
  wcOptions: options.wcOptions,
2548
3203
  preserveCollectorSegments: options.preserveCollectorSegments,
2549
3204
  onFileProcessed: (snapshot) => {
@@ -2555,6 +3210,7 @@ async function runBatchCount(options) {
2555
3210
  counted = await countBatchInputsWithJobs(resolved.files, {
2556
3211
  jobs: options.jobs,
2557
3212
  section: options.section,
3213
+ detectorMode: options.wcOptions.detector ?? "regex",
2558
3214
  wcOptions: options.wcOptions,
2559
3215
  preserveCollectorSegments: options.preserveCollectorSegments,
2560
3216
  onFileProcessed: (snapshot) => {
@@ -2609,19 +3265,16 @@ async function runBatchCount(options) {
2609
3265
  });
2610
3266
  return summary;
2611
3267
  }
2612
-
2613
3268
  //#endregion
2614
3269
  //#region src/cli/batch/jobs/strategy.ts
2615
3270
  function resolveBatchJobsStrategy(_jobs) {
2616
3271
  return "load-count";
2617
3272
  }
2618
-
2619
3273
  //#endregion
2620
3274
  //#region src/utils/show-singular-or-plural-word.ts
2621
3275
  function showSingularOrPluralWord(count, word) {
2622
3276
  return `${count} ${word}${count === 1 ? "" : "s"}`;
2623
3277
  }
2624
-
2625
3278
  //#endregion
2626
3279
  //#region src/cli/output/render.ts
2627
3280
  function getCountUnit(mode) {
@@ -2766,7 +3419,6 @@ function renderPerFileStandard(summary, labels, resolveTotalOfOverride) {
2766
3419
  }
2767
3420
  renderStandardResult(summary.aggregate, labels.overall, resolveTotalOfOverride?.(summary.aggregate));
2768
3421
  }
2769
-
2770
3422
  //#endregion
2771
3423
  //#region src/cli/output/normalize-base.ts
2772
3424
  function normalizeWordCounterResultBase(result) {
@@ -2805,7 +3457,6 @@ function normalizeBatchSummaryBase(summary) {
2805
3457
  normalizeResultBase(summary.aggregate);
2806
3458
  return summary;
2807
3459
  }
2808
-
2809
3460
  //#endregion
2810
3461
  //#region src/cli/runtime/options.ts
2811
3462
  function hasPathInput(pathValues) {
@@ -2904,6 +3555,7 @@ function resolveLatinHintRules(options) {
2904
3555
  }
2905
3556
  function resolveCountRunOptions(options) {
2906
3557
  const useSection = options.section !== "all";
3558
+ const detectorMode = options.detector ?? "regex";
2907
3559
  const totalOfParts = options.totalOf;
2908
3560
  const requestedNonWords = Boolean(options.nonWords || options.includeWhitespace || options.misc);
2909
3561
  const collectNonWordsForOverride = requiresNonWordCollection(totalOfParts);
@@ -2912,10 +3564,12 @@ function resolveCountRunOptions(options) {
2912
3564
  const enableWhitespace = Boolean(options.includeWhitespace || options.misc || collectWhitespaceForOverride);
2913
3565
  return {
2914
3566
  useSection,
3567
+ detectorMode,
2915
3568
  totalOfParts,
2916
3569
  requestedNonWords,
2917
3570
  shouldNormalizeBaseOutput: !requestedNonWords && enableNonWords,
2918
3571
  wcOptions: {
3572
+ detector: detectorMode,
2919
3573
  mode: options.mode,
2920
3574
  latinLanguageHint: options.latinLanguage,
2921
3575
  latinTagHint: options.latinTag,
@@ -2932,7 +3586,6 @@ function resolveCountRunOptions(options) {
2932
3586
  function formatInputReadError(error) {
2933
3587
  return `Failed to read input: ${error instanceof Error ? error.message : String(error)}`;
2934
3588
  }
2935
-
2936
3589
  //#endregion
2937
3590
  //#region src/cli/runtime/batch.ts
2938
3591
  async function executeBatchCount({ argv, options, runtime, resolved, debug, teeEnabled }) {
@@ -3070,7 +3723,6 @@ async function executeBatchCount({ argv, options, runtime, resolved, debug, teeE
3070
3723
  }
3071
3724
  renderStandardResult(summary.aggregate, labels.overall, aggregateTotalOfOverride);
3072
3725
  }
3073
-
3074
3726
  //#endregion
3075
3727
  //#region src/cli/runtime/input.ts
3076
3728
  async function readStdin() {
@@ -3088,7 +3740,6 @@ async function resolveInput(textTokens) {
3088
3740
  if (textTokens.length > 0) return textTokens.join(" ");
3089
3741
  return readStdin();
3090
3742
  }
3091
-
3092
3743
  //#endregion
3093
3744
  //#region src/cli/runtime/single.ts
3094
3745
  async function executeSingleCount({ textTokens, options, resolved }) {
@@ -3100,7 +3751,13 @@ async function executeSingleCount({ textTokens, options, resolved }) {
3100
3751
  }
3101
3752
  const trimmed = input.trim();
3102
3753
  if (!trimmed) throw new Error("No input provided. Pass text, pipe stdin, or use --path.");
3103
- const result = resolved.useSection ? countSections(trimmed, options.section, resolved.wcOptions) : wc_default(trimmed, resolved.wcOptions);
3754
+ const result = resolved.useSection ? resolved.detectorMode === "regex" ? countSections(trimmed, options.section, resolved.wcOptions) : await countSectionsWithDetector(trimmed, options.section, {
3755
+ ...resolved.wcOptions,
3756
+ detector: resolved.detectorMode
3757
+ }) : resolved.detectorMode === "regex" ? wc_default(trimmed, resolved.wcOptions) : await wordCounterWithDetector(trimmed, {
3758
+ ...resolved.wcOptions,
3759
+ detector: resolved.detectorMode
3760
+ });
3104
3761
  const totalOfOverride = resolveTotalOfOverride(result, resolved.totalOfParts);
3105
3762
  const displayResult = resolved.shouldNormalizeBaseOutput ? normalizeResultBase(result) : result;
3106
3763
  if (options.format === "raw") {
@@ -3129,17 +3786,23 @@ async function executeSingleCount({ textTokens, options, resolved }) {
3129
3786
  }
3130
3787
  renderStandardResult(displayResult, labels.overall, totalOfOverride);
3131
3788
  }
3132
-
3133
3789
  //#endregion
3134
3790
  //#region src/command.ts
3135
3791
  async function runCli(argv = process.argv, runtime = {}) {
3792
+ if (isExplicitDoctorInvocation(argv)) {
3793
+ await executeDoctorCommand({
3794
+ argv,
3795
+ runtime: runtime.doctor
3796
+ });
3797
+ return;
3798
+ }
3136
3799
  const program = new Command();
3137
3800
  const parseMode = (value) => {
3138
3801
  const normalized = normalizeMode(value);
3139
3802
  if (!normalized) throw new Error(`Invalid mode: ${value}`);
3140
3803
  return normalized;
3141
3804
  };
3142
- program.name("word-counter").description("Locale-aware word counting powered by Intl.Segmenter.").version(getFormattedVersionLabel(), "-v, --version", "output the version number");
3805
+ program.name("word-counter").description("Locale-aware word counting powered by Intl.Segmenter.").version(getFormattedVersionLabel(), "-v, --version", "output the version number").addHelpText("after", "\nCommands:\n doctor [options] report runtime diagnostics for this host");
3143
3806
  configureProgramOptions(program, parseMode);
3144
3807
  program.action(async (textTokens, options) => {
3145
3808
  if (options.printJobsLimit) {
@@ -3219,14 +3882,19 @@ async function runCli(argv = process.argv, runtime = {}) {
3219
3882
  program.error(import_picocolors.default.red(message));
3220
3883
  return;
3221
3884
  }
3885
+ if (message === "WASM detector runtime is unavailable. Run `bun run build:wasm` to generate it.") {
3886
+ console.error(import_picocolors.default.red(message));
3887
+ process.exitCode = 1;
3888
+ return;
3889
+ }
3222
3890
  program.error(message);
3223
3891
  } finally {
3224
3892
  await debug.close();
3225
3893
  }
3226
3894
  });
3227
3895
  await program.parseAsync(argv);
3896
+ if (process.exitCode === void 0) process.exitCode = 0;
3228
3897
  }
3229
-
3230
3898
  //#endregion
3231
3899
  //#region src/bin.ts
3232
3900
  runCli().catch((error) => {
@@ -3234,7 +3902,7 @@ runCli().catch((error) => {
3234
3902
  console.error("Failed to run CLI:", message);
3235
3903
  process.exitCode = 1;
3236
3904
  });
3237
-
3238
3905
  //#endregion
3239
- export { };
3906
+ export {};
3907
+
3240
3908
  //# sourceMappingURL=bin.mjs.map