@dev-pi2pie/word-counter 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -121,17 +121,25 @@ word-counter --path ./examples/test-case-multi-files-support --keep-progress
121
121
 
122
122
  Progress is transient by default, auto-disabled for single-input runs, and suppressed in `--format raw` and `--format json`.
123
123
 
124
- ### Stable Path Resolution Contract (`#26`)
124
+ ### Stable Path Resolution Contract
125
125
 
126
126
  - Repeated `--path` values are accepted as mixed inputs (file + directory).
127
127
  - In `--path-mode auto` (default), directory inputs are expanded to files (recursive unless `--no-recursive`).
128
- - In `--path-mode manual`, directory inputs are not expanded and are skipped as non-regular files.
129
- - Extension filters apply only to files discovered from directory expansion.
130
- - Direct file inputs are always considered regardless of `--include-ext` / `--exclude-ext`.
128
+ - In `--path-mode manual`, `--path` values are treated as literal file inputs; `--path <dir>` is not supported and is skipped as `not a regular file`.
129
+ - Extension and regex filters apply only to files discovered from directory expansion.
130
+ - Direct file inputs are always considered regardless of `--include-ext` / `--exclude-ext` / `--regex`.
131
131
  - Overlap dedupe is by resolved absolute file path.
132
132
  - If the same file is discovered multiple ways (repeated roots, nested roots, explicit file + directory), it is counted once.
133
133
  - Final processing order is deterministic: resolved files are sorted by absolute path ascending before load/count.
134
134
 
135
+ Path mode examples:
136
+
137
+ ```bash
138
+ word-counter --path ./examples/test-case-multi-files-support --path-mode auto
139
+ word-counter --path ./examples/test-case-multi-files-support --path-mode manual
140
+ word-counter --path ./examples/test-case-multi-files-support/a.md --path-mode manual
141
+ ```
142
+
135
143
  ### Extension Filters
136
144
 
137
145
  Use include/exclude filters for directory scans:
@@ -147,6 +155,28 @@ Direct file path example (filters do not block explicit file inputs):
147
155
  word-counter --path ./examples/test-case-multi-files-support/ignored.js --include-ext .md --exclude-ext .md
148
156
  ```
149
157
 
158
+ ### Regex Filter (`--regex`)
159
+
160
+ Use `--regex` to include only directory-scanned files whose root-relative path matches:
161
+
162
+ ```bash
163
+ word-counter --path ./examples/test-case-multi-files-support --regex '^a\\.md$'
164
+ word-counter --path ./examples/test-case-multi-files-support --regex '^nested/.*\\.md$'
165
+ word-counter --path ./examples/test-case-multi-files-support --path ./examples --regex '\\.md$'
166
+ ```
167
+
168
+ Regex behavior contract:
169
+
170
+ - `--regex` applies only to files discovered from `--path <dir>` expansion.
171
+ - Matching is against each directory root-relative path.
172
+ - The same regex is applied across all provided directory roots.
173
+ - Direct file inputs are literal and are not blocked by regex filters.
174
+ - In `--path-mode manual`, directories are not expanded, so `--include-ext`, `--exclude-ext`, and `--regex` have no effect.
175
+ - `--regex` is single-use; repeated `--regex` flags fail fast with a misuse error.
176
+ - Empty regex values are treated as no regex restriction.
177
+
178
+ For additional usage details and troubleshooting, see [`docs/regex-usage-guide.md`](docs/regex-usage-guide.md).
179
+
150
180
  ### Debugging Diagnostics (`--debug`)
151
181
 
152
182
  `--debug` remains the diagnostics gate and now defaults to `compact` event volume:
package/dist/esm/bin.mjs CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  import { Command, Option } from "commander";
4
4
  import { closeSync, createWriteStream, existsSync, mkdirSync, openSync, readFileSync, statSync } from "node:fs";
5
- import { basename, dirname, extname, join, relative, resolve } from "node:path";
5
+ import { basename, dirname, extname, join, relative, resolve, sep } from "node:path";
6
6
  import { fileURLToPath } from "node:url";
7
7
  import { readFile, readdir, stat } from "node:fs/promises";
8
8
  import { parseDocument } from "yaml";
@@ -191,6 +191,34 @@ function shouldIncludeFromDirectory(filePath, filter) {
191
191
  const extension = extname(filePath).toLowerCase();
192
192
  return filter.effectiveIncludeExtensions.has(extension);
193
193
  }
194
+ function buildDirectoryRegexFilter(pattern) {
195
+ if (pattern === void 0) return {
196
+ sourcePattern: void 0,
197
+ regex: void 0
198
+ };
199
+ if (pattern.trim().length === 0) return {
200
+ sourcePattern: pattern,
201
+ regex: void 0
202
+ };
203
+ try {
204
+ return {
205
+ sourcePattern: pattern,
206
+ regex: new RegExp(pattern)
207
+ };
208
+ } catch (error) {
209
+ const message = error instanceof Error ? error.message : String(error);
210
+ throw new Error(`Invalid --regex pattern: ${message}`);
211
+ }
212
+ }
213
+ function toDirectoryRelativePath(rootPath, filePath) {
214
+ const relativePath = relative(rootPath, filePath);
215
+ if (sep === "/") return relativePath;
216
+ return relativePath.split(sep).join("/");
217
+ }
218
+ function shouldIncludeFromDirectoryRegex(relativePath, filter) {
219
+ if (!filter.regex) return true;
220
+ return filter.regex.test(relativePath);
221
+ }
194
222
 
195
223
  //#endregion
196
224
  //#region src/cli/total-of.ts
@@ -314,7 +342,7 @@ function collectPathValue(value, previous = []) {
314
342
  return [...previous, value];
315
343
  }
316
344
  function configureProgramOptions(program, parseMode) {
317
- program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies with --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-skips", "hide skip diagnostics (applies when --debug is enabled)").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("-p, --path <path>", "read input from file or directory", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
345
+ program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies with --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-skips", "hide skip diagnostics (applies when --debug is enabled)").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
318
346
  }
319
347
 
320
348
  //#endregion
@@ -387,8 +415,12 @@ var require_picocolors = /* @__PURE__ */ __commonJSMin(((exports, module) => {
387
415
  }));
388
416
 
389
417
  //#endregion
390
- //#region src/cli/program/version.ts
418
+ //#region src/cli/program/version-embedded.ts
391
419
  var import_picocolors = /* @__PURE__ */ __toESM(require_picocolors(), 1);
420
+ const EMBEDDED_PACKAGE_VERSION = "0.1.1";
421
+
422
+ //#endregion
423
+ //#region src/cli/program/version.ts
392
424
  function* candidateSearchRoots() {
393
425
  yield dirname(fileURLToPath(import.meta.url));
394
426
  const argvPath = process.argv[1];
@@ -412,13 +444,23 @@ function resolveVersionFromPath(start, maxLevels) {
412
444
  } catch {}
413
445
  return null;
414
446
  }
415
- function resolvePackageVersion() {
416
- const maxLevels = 8;
447
+ function normalizeVersion(value) {
448
+ if (typeof value !== "string") return null;
449
+ const trimmed = value.trim();
450
+ if (!trimmed) return null;
451
+ return trimmed;
452
+ }
453
+ function resolvePackageVersion(options = {}) {
454
+ const embeddedVersion = normalizeVersion(options.embeddedVersion ?? EMBEDDED_PACKAGE_VERSION);
455
+ if (embeddedVersion) return embeddedVersion;
456
+ const maxLevels = options.maxLevels ?? 8;
457
+ const resolveFromPath = options.resolveFromPath ?? resolveVersionFromPath;
458
+ const roots = options.candidateRoots ?? [...candidateSearchRoots()];
417
459
  const seen = /* @__PURE__ */ new Set();
418
- for (const root of candidateSearchRoots()) {
460
+ for (const root of roots) {
419
461
  if (seen.has(root)) continue;
420
462
  seen.add(root);
421
- const version = resolveVersionFromPath(root, maxLevels);
463
+ const version = normalizeVersion(resolveFromPath(root, maxLevels));
422
464
  if (version) return version;
423
465
  }
424
466
  return "0.0.0";
@@ -485,7 +527,7 @@ async function loadBatchInputs(filePaths) {
485
527
 
486
528
  //#endregion
487
529
  //#region src/cli/path/resolve.ts
488
- async function expandDirectory(directoryPath, recursive, filter, skipped, debug, stats) {
530
+ async function expandDirectory(rootPath, directoryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats) {
489
531
  let entries;
490
532
  try {
491
533
  entries = await readdir(directoryPath, {
@@ -514,7 +556,7 @@ async function expandDirectory(directoryPath, recursive, filter, skipped, debug,
514
556
  for (const entry of sortedEntries) {
515
557
  const entryPath = resolve(directoryPath, entry.name);
516
558
  if (entry.isFile()) {
517
- if (!shouldIncludeFromDirectory(entryPath, filter)) {
559
+ if (!shouldIncludeFromDirectory(entryPath, extensionFilter)) {
518
560
  skipped.push({
519
561
  path: entryPath,
520
562
  reason: "extension excluded"
@@ -526,6 +568,19 @@ async function expandDirectory(directoryPath, recursive, filter, skipped, debug,
526
568
  stats.filterExcluded += 1;
527
569
  continue;
528
570
  }
571
+ const relativePath = toDirectoryRelativePath(rootPath, entryPath);
572
+ if (!shouldIncludeFromDirectoryRegex(relativePath, regexFilter)) {
573
+ if (recordRegexExcluded(entryPath)) {
574
+ debug.emit("path.resolve.regex.excluded", {
575
+ path: entryPath,
576
+ relativePath,
577
+ pattern: regexFilter.sourcePattern,
578
+ reason: "regex excluded"
579
+ }, { verbosity: "verbose" });
580
+ stats.regexExcluded += 1;
581
+ }
582
+ continue;
583
+ }
529
584
  files.push(entryPath);
530
585
  stats.directoryIncluded += 1;
531
586
  debug.emit("path.resolve.expand.include", {
@@ -535,7 +590,7 @@ async function expandDirectory(directoryPath, recursive, filter, skipped, debug,
535
590
  continue;
536
591
  }
537
592
  if (!entry.isDirectory() || !recursive) continue;
538
- appendAll(files, await expandDirectory(entryPath, recursive, filter, skipped, debug, stats));
593
+ appendAll(files, await expandDirectory(rootPath, entryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats));
539
594
  }
540
595
  debug.emit("path.resolve.expand.complete", {
541
596
  directory: directoryPath,
@@ -545,14 +600,17 @@ async function expandDirectory(directoryPath, recursive, filter, skipped, debug,
545
600
  }
546
601
  async function resolveBatchFilePaths(pathInputs, options) {
547
602
  const skipped = [];
603
+ const regexExcludedPaths = /* @__PURE__ */ new Set();
548
604
  const resolvedFiles = /* @__PURE__ */ new Set();
549
605
  const stats = {
550
606
  dedupeAccepted: 0,
551
607
  dedupeDuplicates: 0,
552
608
  filterExcluded: 0,
609
+ regexExcluded: 0,
553
610
  directoryIncluded: 0
554
611
  };
555
612
  const extensionFilter = options.extensionFilter ?? buildDirectoryExtensionFilter(void 0, void 0);
613
+ let regexFilter;
556
614
  const debug = options.debug ?? {
557
615
  enabled: false,
558
616
  verbosity: "compact",
@@ -562,9 +620,11 @@ async function resolveBatchFilePaths(pathInputs, options) {
562
620
  debug.emit("path.resolve.inputs", {
563
621
  inputs: pathInputs.length,
564
622
  pathMode: options.pathMode,
565
- recursive: options.recursive
623
+ recursive: options.recursive,
624
+ hasRegex: Boolean(options.directoryRegexPattern)
566
625
  });
567
626
  const addResolvedFile = (filePath, details) => {
627
+ regexExcludedPaths.delete(filePath);
568
628
  if (resolvedFiles.has(filePath)) {
569
629
  stats.dedupeDuplicates += 1;
570
630
  debug.emit("path.resolve.dedupe.duplicate", {
@@ -582,6 +642,15 @@ async function resolveBatchFilePaths(pathInputs, options) {
582
642
  input: details.input
583
643
  }, { verbosity: "verbose" });
584
644
  };
645
+ const getRegexFilter = () => {
646
+ if (!regexFilter) regexFilter = buildDirectoryRegexFilter(options.directoryRegexPattern);
647
+ return regexFilter;
648
+ };
649
+ const recordRegexExcluded = (filePath) => {
650
+ if (resolvedFiles.has(filePath)) return false;
651
+ regexExcludedPaths.add(filePath);
652
+ return true;
653
+ };
585
654
  for (const rawPath of pathInputs) {
586
655
  const targetPath = resolve(rawPath);
587
656
  debug.emit("path.resolve.input", {
@@ -604,11 +673,13 @@ async function resolveBatchFilePaths(pathInputs, options) {
604
673
  continue;
605
674
  }
606
675
  if (metadata.isDirectory() && options.pathMode === "auto") {
676
+ const effectiveRegexFilter = getRegexFilter();
607
677
  debug.emit("path.resolve.root.expand", {
608
678
  root: targetPath,
609
- recursive: options.recursive
679
+ recursive: options.recursive,
680
+ regex: effectiveRegexFilter.sourcePattern ?? null
610
681
  });
611
- const files = await expandDirectory(targetPath, options.recursive, extensionFilter, skipped, debug, stats);
682
+ const files = await expandDirectory(targetPath, targetPath, options.recursive, extensionFilter, effectiveRegexFilter, skipped, recordRegexExcluded, debug, stats);
612
683
  for (const file of files) addResolvedFile(file, {
613
684
  source: "directory",
614
685
  input: targetPath
@@ -631,9 +702,15 @@ async function resolveBatchFilePaths(pathInputs, options) {
631
702
  input: targetPath
632
703
  });
633
704
  }
705
+ for (const path of regexExcludedPaths) skipped.push({
706
+ path,
707
+ reason: "regex excluded"
708
+ });
634
709
  const files = [...resolvedFiles].sort((left, right) => left.localeCompare(right));
635
710
  debug.emit("path.resolve.filter.summary", {
636
- excluded: stats.filterExcluded,
711
+ excluded: stats.filterExcluded + stats.regexExcluded,
712
+ extensionExcluded: stats.filterExcluded,
713
+ regexExcluded: stats.regexExcluded,
637
714
  included: stats.directoryIncluded
638
715
  });
639
716
  debug.emit("path.resolve.dedupe.summary", {
@@ -2086,6 +2163,7 @@ async function runBatchCount(options) {
2086
2163
  pathMode: options.batchOptions.pathMode,
2087
2164
  recursive: options.batchOptions.recursive,
2088
2165
  extensionFilter: options.extensionFilter,
2166
+ directoryRegexPattern: options.batchOptions.directoryRegexPattern,
2089
2167
  debug: options.debug
2090
2168
  });
2091
2169
  const resolveElapsedMs = Date.now() - resolveStartedAtMs;
@@ -2362,6 +2440,27 @@ function normalizeBatchSummaryBase(summary) {
2362
2440
  function hasPathInput(pathValues) {
2363
2441
  return Array.isArray(pathValues) && pathValues.length > 0;
2364
2442
  }
2443
+ function countLongOptionOccurrences(argv, optionName) {
2444
+ let count = 0;
2445
+ for (let index = 2; index < argv.length; index += 1) {
2446
+ const token = argv[index];
2447
+ if (!token) continue;
2448
+ if (token === "--") break;
2449
+ if (token === optionName) {
2450
+ count += 1;
2451
+ index += 1;
2452
+ continue;
2453
+ }
2454
+ if (token.startsWith(`${optionName}=`)) {
2455
+ count += 1;
2456
+ continue;
2457
+ }
2458
+ }
2459
+ return count;
2460
+ }
2461
+ function validateSingleRegexOptionUsage(argv) {
2462
+ if (countLongOptionOccurrences(argv, "--regex") > 1) throw new Error("`--regex` can only be provided once.");
2463
+ }
2365
2464
  function resolveBatchScope(argv) {
2366
2465
  let scope = "merged";
2367
2466
  for (const token of argv) {
@@ -2413,7 +2512,8 @@ async function executeBatchCount({ argv, options, runtime, resolved, debug, teeE
2413
2512
  scope: resolveBatchScope(argv),
2414
2513
  pathMode: options.pathMode,
2415
2514
  recursive: options.recursive,
2416
- quietSkips: Boolean(options.quietSkips)
2515
+ quietSkips: Boolean(options.quietSkips),
2516
+ directoryRegexPattern: options.regex
2417
2517
  };
2418
2518
  const extensionFilter = buildDirectoryExtensionFilter(options.includeExt, options.excludeExt);
2419
2519
  const debugEnabled = Boolean(options.debug);
@@ -2601,6 +2701,13 @@ async function runCli(argv = process.argv, runtime = {}) {
2601
2701
  program.error(import_picocolors.default.red("`--debug-report-tee` (alias: `--debug-tee`) requires `--debug-report`."));
2602
2702
  return;
2603
2703
  }
2704
+ try {
2705
+ validateSingleRegexOptionUsage(argv);
2706
+ } catch (error) {
2707
+ const message = error instanceof Error ? error.message : String(error);
2708
+ program.error(import_picocolors.default.red(message));
2709
+ return;
2710
+ }
2604
2711
  let debug;
2605
2712
  try {
2606
2713
  debug = createDebugChannel({