@dev-pi2pie/word-counter 0.1.4 → 0.1.5-canary.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,6 @@
1
1
  import { readFile } from "node:fs/promises";
2
2
  import { parentPort, workerData } from "node:worker_threads";
3
3
  import { parseDocument } from "yaml";
4
-
5
4
  //#region src/markdown/toml/arrays.ts
6
5
  function ensureArrayContainer(result, key) {
7
6
  const existing = result[key];
@@ -16,7 +15,6 @@ function flattenArrayTables(result) {
16
15
  result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
17
16
  }
18
17
  }
19
-
20
18
  //#endregion
21
19
  //#region src/markdown/toml/keys.ts
22
20
  function stripKeyQuotes(key) {
@@ -35,7 +33,6 @@ function normalizeKeyPath(key) {
35
33
  if (segments.some((segment) => !segment)) return null;
36
34
  return segments.join(".");
37
35
  }
38
-
39
36
  //#endregion
40
37
  //#region src/markdown/toml/strings.ts
41
38
  function stripInlineComment(line) {
@@ -84,7 +81,6 @@ function parseStringLiteral(value) {
84
81
  if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
85
82
  return null;
86
83
  }
87
-
88
84
  //#endregion
89
85
  //#region src/markdown/toml/values.ts
90
86
  function parsePrimitive(raw) {
@@ -242,7 +238,6 @@ function toPlainText(value) {
242
238
  if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
243
239
  return String(value);
244
240
  }
245
-
246
241
  //#endregion
247
242
  //#region src/markdown/toml/parse-frontmatter.ts
248
243
  function parseTomlFrontmatter(frontmatter) {
@@ -326,7 +321,6 @@ function parseTomlFrontmatter(frontmatter) {
326
321
  flattenArrayTables(result);
327
322
  return result;
328
323
  }
329
-
330
324
  //#endregion
331
325
  //#region src/markdown/parse-markdown.ts
332
326
  const FENCE_TO_TYPE = {
@@ -461,7 +455,6 @@ function parseMarkdown(input) {
461
455
  frontmatterType: openingType
462
456
  };
463
457
  }
464
-
465
458
  //#endregion
466
459
  //#region src/wc/segmenter.ts
467
460
  const segmenterCache = /* @__PURE__ */ new Map();
@@ -490,13 +483,11 @@ function countCharsForLocale(text, locale) {
490
483
  for (const _segment of segmenter.segment(text)) count++;
491
484
  return count;
492
485
  }
493
-
494
486
  //#endregion
495
487
  //#region src/utils/append-all.ts
496
488
  function appendAll(target, source) {
497
489
  for (const item of source) target.push(item);
498
490
  }
499
-
500
491
  //#endregion
501
492
  //#region src/wc/non-words.ts
502
493
  const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
@@ -610,7 +601,6 @@ function createWhitespaceCounts() {
610
601
  other: 0
611
602
  };
612
603
  }
613
-
614
604
  //#endregion
615
605
  //#region src/wc/analyze.ts
616
606
  function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
@@ -710,7 +700,6 @@ function aggregateByLocale(chunks) {
710
700
  }
711
701
  return order.map((locale) => map.get(locale));
712
702
  }
713
-
714
703
  //#endregion
715
704
  //#region src/wc/mode.ts
716
705
  const MODE_ALIASES = {
@@ -778,10 +767,7 @@ function normalizeMode(input) {
778
767
  function resolveMode(input, fallback = "chunk") {
779
768
  return normalizeMode(input) ?? fallback;
780
769
  }
781
-
782
- //#endregion
783
- //#region src/wc/latin-hints.ts
784
- const DEFAULT_LATIN_HINT_RULES_SOURCE = [
770
+ const DEFAULT_LATIN_HINT_RULES = Object.freeze([
785
771
  {
786
772
  tag: "de",
787
773
  pattern: "[äöüÄÖÜß]"
@@ -818,13 +804,10 @@ const DEFAULT_LATIN_HINT_RULES_SOURCE = [
818
804
  tag: "is",
819
805
  pattern: "[ðÐþÞ]"
820
806
  }
821
- ];
822
- const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
823
-
807
+ ].map((rule) => Object.freeze({ ...rule })));
824
808
  //#endregion
825
809
  //#region src/wc/locale-detect.ts
826
810
  const DEFAULT_LOCALE = "und-Latn";
827
- const DEFAULT_HAN_TAG = "und-Hani";
828
811
  const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
829
812
  const regex = {
830
813
  hiragana: /\p{Script=Hiragana}/u,
@@ -940,18 +923,17 @@ function detectLocaleForChar(char, previousLocale, options = {}, context = resol
940
923
  if (regex.thai.test(char)) return "th";
941
924
  if (regex.han.test(char)) {
942
925
  if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
943
- return context.hanHint ?? DEFAULT_HAN_TAG;
926
+ return context.hanHint ?? "und-Hani";
944
927
  }
945
928
  if (regex.latin.test(char)) {
946
929
  const hintedLocale = detectLatinLocale(char, context);
947
- if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
948
- if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
930
+ if (hintedLocale !== "und-Latn") return hintedLocale;
931
+ if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== "und-Latn") return previousLocale;
949
932
  if (context.latinHint) return context.latinHint;
950
933
  return DEFAULT_LOCALE;
951
934
  }
952
935
  return null;
953
936
  }
954
-
955
937
  //#endregion
956
938
  //#region src/wc/segment.ts
957
939
  const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
@@ -988,7 +970,7 @@ function segmentTextByLocale(text, options = {}) {
988
970
  continue;
989
971
  }
990
972
  if (targetLocale !== currentLocale && detected !== null) {
991
- if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale, context)) {
973
+ if (currentLocale === "und-Latn" && isLatinLocale(targetLocale, context)) {
992
974
  const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
993
975
  if (promotionBreakIndex === -1) {
994
976
  currentLocale = targetLocale;
@@ -1055,7 +1037,6 @@ function mergeAdjacentChunks(chunks) {
1055
1037
  merged.push(last);
1056
1038
  return merged;
1057
1039
  }
1058
-
1059
1040
  //#endregion
1060
1041
  //#region src/wc/wc.ts
1061
1042
  function wordCounter(text, options = {}) {
@@ -1170,11 +1151,9 @@ function collectNonWordsAggregate(analyzed, enabled) {
1170
1151
  }
1171
1152
  return collection;
1172
1153
  }
1173
-
1174
1154
  //#endregion
1175
1155
  //#region src/wc/index.ts
1176
1156
  var wc_default = wordCounter;
1177
-
1178
1157
  //#endregion
1179
1158
  //#region src/markdown/section-count.ts
1180
1159
  function normalizeText(value) {
@@ -1239,7 +1218,6 @@ function countSections(input, section, options = {}) {
1239
1218
  items
1240
1219
  };
1241
1220
  }
1242
-
1243
1221
  //#endregion
1244
1222
  //#region src/cli/batch/aggregate.ts
1245
1223
  function stripCollectorSegmentsFromWordCounterResult(result) {
@@ -1256,7 +1234,6 @@ function compactCollectorSegmentsInCountResult(result) {
1256
1234
  }
1257
1235
  stripCollectorSegmentsFromWordCounterResult(result);
1258
1236
  }
1259
-
1260
1237
  //#endregion
1261
1238
  //#region src/cli/path/load.ts
1262
1239
  function isProbablyBinary(buffer) {
@@ -1273,7 +1250,6 @@ function isProbablyBinary(buffer) {
1273
1250
  }
1274
1251
  return suspicious / sampleSize > .3;
1275
1252
  }
1276
-
1277
1253
  //#endregion
1278
1254
  //#region src/cli/batch/jobs/worker/count-worker.ts
1279
1255
  const config = workerData;
@@ -1364,7 +1340,7 @@ parentPort.on("message", async (message) => {
1364
1340
  parentPort?.postMessage(response);
1365
1341
  }
1366
1342
  });
1367
-
1368
1343
  //#endregion
1369
- export { };
1344
+ export {};
1345
+
1370
1346
  //# sourceMappingURL=count-worker.mjs.map