@dev-pi2pie/word-counter 0.1.4 → 0.1.5-canary.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/dist/cjs/index.cjs +7 -30
- package/dist/cjs/index.cjs.map +1 -1
- package/dist/esm/bin.mjs +487 -226
- package/dist/esm/bin.mjs.map +1 -1
- package/dist/esm/index.mjs +7 -29
- package/dist/esm/index.mjs.map +1 -1
- package/dist/esm/worker/count-worker.mjs +8 -32
- package/dist/esm/worker/count-worker.mjs.map +1 -1
- package/dist/esm/worker-pool.mjs +5 -3
- package/dist/esm/worker-pool.mjs.map +1 -1
- package/package.json +5 -5
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { readFile } from "node:fs/promises";
|
|
2
2
|
import { parentPort, workerData } from "node:worker_threads";
|
|
3
3
|
import { parseDocument } from "yaml";
|
|
4
|
-
|
|
5
4
|
//#region src/markdown/toml/arrays.ts
|
|
6
5
|
function ensureArrayContainer(result, key) {
|
|
7
6
|
const existing = result[key];
|
|
@@ -16,7 +15,6 @@ function flattenArrayTables(result) {
|
|
|
16
15
|
result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
|
|
17
16
|
}
|
|
18
17
|
}
|
|
19
|
-
|
|
20
18
|
//#endregion
|
|
21
19
|
//#region src/markdown/toml/keys.ts
|
|
22
20
|
function stripKeyQuotes(key) {
|
|
@@ -35,7 +33,6 @@ function normalizeKeyPath(key) {
|
|
|
35
33
|
if (segments.some((segment) => !segment)) return null;
|
|
36
34
|
return segments.join(".");
|
|
37
35
|
}
|
|
38
|
-
|
|
39
36
|
//#endregion
|
|
40
37
|
//#region src/markdown/toml/strings.ts
|
|
41
38
|
function stripInlineComment(line) {
|
|
@@ -84,7 +81,6 @@ function parseStringLiteral(value) {
|
|
|
84
81
|
if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
|
|
85
82
|
return null;
|
|
86
83
|
}
|
|
87
|
-
|
|
88
84
|
//#endregion
|
|
89
85
|
//#region src/markdown/toml/values.ts
|
|
90
86
|
function parsePrimitive(raw) {
|
|
@@ -242,7 +238,6 @@ function toPlainText(value) {
|
|
|
242
238
|
if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
|
|
243
239
|
return String(value);
|
|
244
240
|
}
|
|
245
|
-
|
|
246
241
|
//#endregion
|
|
247
242
|
//#region src/markdown/toml/parse-frontmatter.ts
|
|
248
243
|
function parseTomlFrontmatter(frontmatter) {
|
|
@@ -326,7 +321,6 @@ function parseTomlFrontmatter(frontmatter) {
|
|
|
326
321
|
flattenArrayTables(result);
|
|
327
322
|
return result;
|
|
328
323
|
}
|
|
329
|
-
|
|
330
324
|
//#endregion
|
|
331
325
|
//#region src/markdown/parse-markdown.ts
|
|
332
326
|
const FENCE_TO_TYPE = {
|
|
@@ -461,7 +455,6 @@ function parseMarkdown(input) {
|
|
|
461
455
|
frontmatterType: openingType
|
|
462
456
|
};
|
|
463
457
|
}
|
|
464
|
-
|
|
465
458
|
//#endregion
|
|
466
459
|
//#region src/wc/segmenter.ts
|
|
467
460
|
const segmenterCache = /* @__PURE__ */ new Map();
|
|
@@ -490,13 +483,11 @@ function countCharsForLocale(text, locale) {
|
|
|
490
483
|
for (const _segment of segmenter.segment(text)) count++;
|
|
491
484
|
return count;
|
|
492
485
|
}
|
|
493
|
-
|
|
494
486
|
//#endregion
|
|
495
487
|
//#region src/utils/append-all.ts
|
|
496
488
|
function appendAll(target, source) {
|
|
497
489
|
for (const item of source) target.push(item);
|
|
498
490
|
}
|
|
499
|
-
|
|
500
491
|
//#endregion
|
|
501
492
|
//#region src/wc/non-words.ts
|
|
502
493
|
const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
|
|
@@ -610,7 +601,6 @@ function createWhitespaceCounts() {
|
|
|
610
601
|
other: 0
|
|
611
602
|
};
|
|
612
603
|
}
|
|
613
|
-
|
|
614
604
|
//#endregion
|
|
615
605
|
//#region src/wc/analyze.ts
|
|
616
606
|
function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
|
|
@@ -710,7 +700,6 @@ function aggregateByLocale(chunks) {
|
|
|
710
700
|
}
|
|
711
701
|
return order.map((locale) => map.get(locale));
|
|
712
702
|
}
|
|
713
|
-
|
|
714
703
|
//#endregion
|
|
715
704
|
//#region src/wc/mode.ts
|
|
716
705
|
const MODE_ALIASES = {
|
|
@@ -778,10 +767,7 @@ function normalizeMode(input) {
|
|
|
778
767
|
function resolveMode(input, fallback = "chunk") {
|
|
779
768
|
return normalizeMode(input) ?? fallback;
|
|
780
769
|
}
|
|
781
|
-
|
|
782
|
-
//#endregion
|
|
783
|
-
//#region src/wc/latin-hints.ts
|
|
784
|
-
const DEFAULT_LATIN_HINT_RULES_SOURCE = [
|
|
770
|
+
const DEFAULT_LATIN_HINT_RULES = Object.freeze([
|
|
785
771
|
{
|
|
786
772
|
tag: "de",
|
|
787
773
|
pattern: "[äöüÄÖÜß]"
|
|
@@ -818,13 +804,10 @@ const DEFAULT_LATIN_HINT_RULES_SOURCE = [
|
|
|
818
804
|
tag: "is",
|
|
819
805
|
pattern: "[ðÐþÞ]"
|
|
820
806
|
}
|
|
821
|
-
];
|
|
822
|
-
const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
|
|
823
|
-
|
|
807
|
+
].map((rule) => Object.freeze({ ...rule })));
|
|
824
808
|
//#endregion
|
|
825
809
|
//#region src/wc/locale-detect.ts
|
|
826
810
|
const DEFAULT_LOCALE = "und-Latn";
|
|
827
|
-
const DEFAULT_HAN_TAG = "und-Hani";
|
|
828
811
|
const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
|
|
829
812
|
const regex = {
|
|
830
813
|
hiragana: /\p{Script=Hiragana}/u,
|
|
@@ -940,18 +923,17 @@ function detectLocaleForChar(char, previousLocale, options = {}, context = resol
|
|
|
940
923
|
if (regex.thai.test(char)) return "th";
|
|
941
924
|
if (regex.han.test(char)) {
|
|
942
925
|
if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
|
|
943
|
-
return context.hanHint ??
|
|
926
|
+
return context.hanHint ?? "und-Hani";
|
|
944
927
|
}
|
|
945
928
|
if (regex.latin.test(char)) {
|
|
946
929
|
const hintedLocale = detectLatinLocale(char, context);
|
|
947
|
-
if (hintedLocale !==
|
|
948
|
-
if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !==
|
|
930
|
+
if (hintedLocale !== "und-Latn") return hintedLocale;
|
|
931
|
+
if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== "und-Latn") return previousLocale;
|
|
949
932
|
if (context.latinHint) return context.latinHint;
|
|
950
933
|
return DEFAULT_LOCALE;
|
|
951
934
|
}
|
|
952
935
|
return null;
|
|
953
936
|
}
|
|
954
|
-
|
|
955
937
|
//#endregion
|
|
956
938
|
//#region src/wc/segment.ts
|
|
957
939
|
const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
|
|
@@ -988,7 +970,7 @@ function segmentTextByLocale(text, options = {}) {
|
|
|
988
970
|
continue;
|
|
989
971
|
}
|
|
990
972
|
if (targetLocale !== currentLocale && detected !== null) {
|
|
991
|
-
if (currentLocale ===
|
|
973
|
+
if (currentLocale === "und-Latn" && isLatinLocale(targetLocale, context)) {
|
|
992
974
|
const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
|
|
993
975
|
if (promotionBreakIndex === -1) {
|
|
994
976
|
currentLocale = targetLocale;
|
|
@@ -1055,7 +1037,6 @@ function mergeAdjacentChunks(chunks) {
|
|
|
1055
1037
|
merged.push(last);
|
|
1056
1038
|
return merged;
|
|
1057
1039
|
}
|
|
1058
|
-
|
|
1059
1040
|
//#endregion
|
|
1060
1041
|
//#region src/wc/wc.ts
|
|
1061
1042
|
function wordCounter(text, options = {}) {
|
|
@@ -1170,11 +1151,9 @@ function collectNonWordsAggregate(analyzed, enabled) {
|
|
|
1170
1151
|
}
|
|
1171
1152
|
return collection;
|
|
1172
1153
|
}
|
|
1173
|
-
|
|
1174
1154
|
//#endregion
|
|
1175
1155
|
//#region src/wc/index.ts
|
|
1176
1156
|
var wc_default = wordCounter;
|
|
1177
|
-
|
|
1178
1157
|
//#endregion
|
|
1179
1158
|
//#region src/markdown/section-count.ts
|
|
1180
1159
|
function normalizeText(value) {
|
|
@@ -1239,7 +1218,6 @@ function countSections(input, section, options = {}) {
|
|
|
1239
1218
|
items
|
|
1240
1219
|
};
|
|
1241
1220
|
}
|
|
1242
|
-
|
|
1243
1221
|
//#endregion
|
|
1244
1222
|
//#region src/cli/batch/aggregate.ts
|
|
1245
1223
|
function stripCollectorSegmentsFromWordCounterResult(result) {
|
|
@@ -1256,7 +1234,6 @@ function compactCollectorSegmentsInCountResult(result) {
|
|
|
1256
1234
|
}
|
|
1257
1235
|
stripCollectorSegmentsFromWordCounterResult(result);
|
|
1258
1236
|
}
|
|
1259
|
-
|
|
1260
1237
|
//#endregion
|
|
1261
1238
|
//#region src/cli/path/load.ts
|
|
1262
1239
|
function isProbablyBinary(buffer) {
|
|
@@ -1273,7 +1250,6 @@ function isProbablyBinary(buffer) {
|
|
|
1273
1250
|
}
|
|
1274
1251
|
return suspicious / sampleSize > .3;
|
|
1275
1252
|
}
|
|
1276
|
-
|
|
1277
1253
|
//#endregion
|
|
1278
1254
|
//#region src/cli/batch/jobs/worker/count-worker.ts
|
|
1279
1255
|
const config = workerData;
|
|
@@ -1364,7 +1340,7 @@ parentPort.on("message", async (message) => {
|
|
|
1364
1340
|
parentPort?.postMessage(response);
|
|
1365
1341
|
}
|
|
1366
1342
|
});
|
|
1367
|
-
|
|
1368
1343
|
//#endregion
|
|
1369
|
-
export {
|
|
1344
|
+
export {};
|
|
1345
|
+
|
|
1370
1346
|
//# sourceMappingURL=count-worker.mjs.map
|