khmer-segment 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -26
- package/dist/dictionary/index.cjs +26 -8
- package/dist/dictionary/index.d.cts +1 -1
- package/dist/dictionary/index.d.ts +1 -1
- package/dist/dictionary/index.js +26 -8
- package/dist/index.cjs +7 -3
- package/dist/index.d.cts +4 -1
- package/dist/index.d.ts +4 -1
- package/dist/index.js +7 -3
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
1
|
# khmer-segment
|
|
4
2
|
|
|
5
3
|
A framework-agnostic Khmer text processing library for JavaScript and TypeScript.
|
|
@@ -23,6 +21,7 @@ npm install khmer-segment
|
|
|
23
21
|
```ts
|
|
24
22
|
import {
|
|
25
23
|
containsKhmer,
|
|
24
|
+
isKhmerText,
|
|
26
25
|
normalizeKhmer,
|
|
27
26
|
splitClusters,
|
|
28
27
|
countClusters,
|
|
@@ -59,41 +58,33 @@ console.log(result.tokens);
|
|
|
59
58
|
|
|
60
59
|
### Detection
|
|
61
60
|
|
|
62
|
-
|
|
63
61
|
| Function | Description |
|
|
64
62
|
| --------------------- | --------------------------------------------------------- |
|
|
65
63
|
| `isKhmerChar(char)` | Returns `true` if the character is a Khmer code point |
|
|
66
64
|
| `containsKhmer(text)` | Returns `true` if the text contains any Khmer characters |
|
|
67
65
|
| `isKhmerText(text)` | Returns `true` if all non-whitespace characters are Khmer |
|
|
68
66
|
|
|
69
|
-
|
|
70
67
|
### Normalization
|
|
71
68
|
|
|
72
|
-
|
|
73
69
|
| Function | Description |
|
|
74
70
|
| -------------------------------- | ------------------------------------------------------------------------------------------ |
|
|
75
71
|
| `normalizeKhmer(text)` | Reorders Khmer characters into canonical order (base → coeng → shift signs → vowel → sign) |
|
|
76
72
|
| `normalizeKhmerCluster(cluster)` | Normalizes a single cluster |
|
|
77
73
|
|
|
78
|
-
|
|
79
74
|
### Cluster Utilities
|
|
80
75
|
|
|
81
|
-
|
|
82
76
|
| Function | Description |
|
|
83
77
|
| ---------------------------- | ------------------------------------------------- |
|
|
84
78
|
| `splitClusters(text)` | Splits text into Khmer-safe grapheme clusters |
|
|
85
79
|
| `countClusters(text)` | Returns the number of clusters in the text |
|
|
86
80
|
| `getClusterBoundaries(text)` | Returns `{ start, end }` offsets for each cluster |
|
|
87
81
|
|
|
88
|
-
|
|
89
82
|
### Segmentation
|
|
90
83
|
|
|
91
|
-
|
|
92
84
|
| Function | Description |
|
|
93
85
|
| ------------------------------ | -------------------------------------------------------------- |
|
|
94
86
|
| `segmentWords(text, options?)` | Segments text into word tokens using dictionary-based matching |
|
|
95
87
|
|
|
96
|
-
|
|
97
88
|
#### `SegmentOptions`
|
|
98
89
|
|
|
99
90
|
```ts
|
|
@@ -115,27 +106,27 @@ interface SegmentResult {
|
|
|
115
106
|
|
|
116
107
|
interface SegmentToken {
|
|
117
108
|
value: string;
|
|
118
|
-
start: number;
|
|
119
|
-
end: number;
|
|
109
|
+
start: number; // zero-based offset into result.normalized
|
|
110
|
+
end: number; // exclusive offset into result.normalized
|
|
120
111
|
isKnown: boolean;
|
|
121
112
|
}
|
|
122
113
|
```
|
|
123
114
|
|
|
124
|
-
|
|
115
|
+
When normalization is enabled, token offsets always refer to `result.normalized`. Invisible characters such as ZWS/ZWJ/BOM may be stripped during normalization, so offsets may not line up with the original input string.
|
|
125
116
|
|
|
117
|
+
### Dictionary
|
|
126
118
|
|
|
127
119
|
| Function | Description |
|
|
128
120
|
| --------------------------------------- | ------------------------------------------------ |
|
|
129
121
|
| `createDictionary(words, frequencies?)` | Creates an in-memory dictionary from a word list |
|
|
130
122
|
|
|
131
|
-
|
|
132
123
|
```ts
|
|
133
124
|
const dict = createDictionary(['សួស្តី', 'អ្នក', 'ខ្មែរ']);
|
|
134
125
|
|
|
135
126
|
dict.has('សួស្តី'); // true
|
|
136
127
|
dict.hasPrefix!('សួ'); // true (trie-based O(k) lookup)
|
|
137
128
|
dict.hasSuffix!('ី'); // true
|
|
138
|
-
dict.size; // 3
|
|
129
|
+
dict.size; // 3 unique words
|
|
139
130
|
```
|
|
140
131
|
|
|
141
132
|
#### `KhmerDictionary` interface
|
|
@@ -175,7 +166,9 @@ console.log(freqData.words.length); // 49113
|
|
|
175
166
|
console.log(freqData.frequencies.get('ជា')); // 701541
|
|
176
167
|
```
|
|
177
168
|
|
|
178
|
-
This is a **separate import** — the core `khmer-segment` package stays small (~
|
|
169
|
+
This is a **separate import** — the core `khmer-segment` package stays small (~11KB). The dictionary module is ~3.9MB. Only import the dictionary when you need it.
|
|
170
|
+
|
|
171
|
+
`loadFrequencyDictionary()` builds its return value from cached dictionary data, but each call returns fresh arrays and a fresh `Map`. You can safely extend or mutate the returned data without affecting later calls.
|
|
179
172
|
|
|
180
173
|
---
|
|
181
174
|
|
|
@@ -242,7 +235,7 @@ const result = segmentWords('កខគ');
|
|
|
242
235
|
|
|
243
236
|
## Dictionary Strategy
|
|
244
237
|
|
|
245
|
-
The library ships a **separate optional dictionary** via `khmer-segment/dictionary` with 49,113 Khmer words. This keeps the core package small (~
|
|
238
|
+
The library ships a **separate optional dictionary** via `khmer-segment/dictionary` with 49,113 Khmer words. This keeps the core package small (~11KB).
|
|
246
239
|
|
|
247
240
|
Options:
|
|
248
241
|
|
|
@@ -272,7 +265,6 @@ const dict = createDictionary([...words, 'custom_word'], frequencies);
|
|
|
272
265
|
|
|
273
266
|
## Framework Compatibility
|
|
274
267
|
|
|
275
|
-
|
|
276
268
|
| Environment | Support |
|
|
277
269
|
| ------------------- | ------- |
|
|
278
270
|
| Node.js (ESM + CJS) | Yes |
|
|
@@ -282,7 +274,6 @@ const dict = createDictionary([...words, 'custom_word'], frequencies);
|
|
|
282
274
|
| Angular | Yes |
|
|
283
275
|
| Vue | Yes |
|
|
284
276
|
|
|
285
|
-
|
|
286
277
|
No framework-specific code in the core. Tree-shakeable with `sideEffects: false`.
|
|
287
278
|
|
|
288
279
|
---
|
|
@@ -308,16 +299,22 @@ No framework-specific code in the core. Tree-shakeable with `sideEffects: false`
|
|
|
308
299
|
- `segmentWords` with FMM
|
|
309
300
|
- Default dictionary (34K+ words, separate import)
|
|
310
301
|
|
|
311
|
-
### v0.2.
|
|
302
|
+
### v0.2.1
|
|
312
303
|
|
|
313
304
|
- BMM (Backward Maximum Matching) algorithm
|
|
314
305
|
- BiMM (Bidirectional Maximum Matching) algorithm
|
|
315
306
|
- Digit grouping (consecutive Khmer digits merged into single tokens)
|
|
316
307
|
- Fixed normalization for MUUSIKATOAN (៉) and TRIISAP (៊) — shift signs now placed before vowels
|
|
317
308
|
- Fixed Unicode range constants (NIKAHIT, REAHMUK, YUUKEALAKHMOU are signs, not vowels)
|
|
318
|
-
-
|
|
319
|
-
|
|
320
|
-
|
|
309
|
+
- Rebuilt dictionary with 49,113 words (merged from 10 sources)
|
|
310
|
+
|
|
311
|
+
### v0.2.2 (current)
|
|
312
|
+
|
|
313
|
+
- Clarified that token offsets are measured against `result.normalized`
|
|
314
|
+
- Expanded Vitest coverage across normalization, dictionary, and segmentation behavior
|
|
315
|
+
- Made `loadFrequencyDictionary()` safe to reuse across calls without shared-state pollution
|
|
316
|
+
- Corrected custom dictionary `size` to report unique non-empty words
|
|
317
|
+
- Added changelog, CI checks, and stricter prepublish formatting verification
|
|
321
318
|
|
|
322
319
|
### v0.3.0
|
|
323
320
|
|
|
@@ -341,6 +338,7 @@ No framework-specific code in the core. Tree-shakeable with `sideEffects: false`
|
|
|
341
338
|
npm install # install dependencies
|
|
342
339
|
npm run build # build with tsup (ESM + CJS + types)
|
|
343
340
|
npm test # run vitest
|
|
341
|
+
npm run test:perf # optional performance-focused checks
|
|
344
342
|
npm run test:watch # watch mode
|
|
345
343
|
npm run lint # TypeScript type check
|
|
346
344
|
```
|
|
@@ -352,7 +350,8 @@ npm run lint # TypeScript type check
|
|
|
352
350
|
### Automated Tests
|
|
353
351
|
|
|
354
352
|
```bash
|
|
355
|
-
npm test # run
|
|
353
|
+
npm test # run the main Vitest correctness suite
|
|
354
|
+
npm run test:perf # optional performance-focused checks
|
|
356
355
|
npm run test:watch # watch mode — re-runs on changes
|
|
357
356
|
npm run lint # TypeScript type check
|
|
358
357
|
```
|
|
@@ -385,10 +384,10 @@ Features:
|
|
|
385
384
|
- **[Word Segmentation of Khmer Text Using Conditional Random Fields](https://medium.com/@phylypo/segmentation-of-khmer-text-using-conditional-random-fields-3a2d4d73956a)** — Phylypo Tum (2019). Comprehensive overview of Khmer segmentation approaches from dictionary-based to CRF, achieving 99.7% accuracy with Linear Chain CRF.
|
|
386
385
|
- **[Khmer Word Segmentation Using Conditional Random Fields](https://www.niptict.edu.kh/khmer-word-segmentation-tool/)** — Vichea Chea, Ye Kyaw Thu, et al. (2015). The prior state-of-the-art CRF model for Khmer segmentation (98.5% accuracy, 5-tag system).
|
|
387
386
|
- **[Benchmark dataset and Python notebooks](https://github.com/phylypo/segmentation-crf-khmer)** — 10K+ segmented Khmer news articles useful for evaluating segmentation quality.
|
|
388
|
-
- **[khmerlbdict](https://github.com/silnrsi/khmerlbdict)** — Source of the default dictionary used by this library (MIT license,
|
|
387
|
+
- **[khmerlbdict](https://github.com/silnrsi/khmerlbdict)** — Source of the default dictionary used by this library (MIT license). Merged with Royal Academy of Cambodia's Khmer Dictionary for a total of 49,113 words.
|
|
389
388
|
|
|
390
389
|
---
|
|
391
390
|
|
|
392
391
|
## License
|
|
393
392
|
|
|
394
|
-
MIT
|
|
393
|
+
MIT
|
|
@@ -87,15 +87,19 @@ var MemoryDictionary = class {
|
|
|
87
87
|
this.trie = new Trie();
|
|
88
88
|
this.reverseTrie = new Trie();
|
|
89
89
|
this.freqMap = frequencies ?? /* @__PURE__ */ new Map();
|
|
90
|
-
|
|
90
|
+
const uniqueWords = /* @__PURE__ */ new Set();
|
|
91
91
|
for (const word of words) {
|
|
92
|
+
if (word.length > 0) {
|
|
93
|
+
uniqueWords.add(word);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
for (const word of uniqueWords) {
|
|
92
97
|
if (word.length > 0) {
|
|
93
98
|
this.trie.insert(word);
|
|
94
99
|
this.reverseTrie.insert([...word].reverse().join(""));
|
|
95
|
-
count++;
|
|
96
100
|
}
|
|
97
101
|
}
|
|
98
|
-
this.size =
|
|
102
|
+
this.size = uniqueWords.size;
|
|
99
103
|
}
|
|
100
104
|
has(word) {
|
|
101
105
|
return this.trie.has(word);
|
|
@@ -196590,14 +196594,28 @@ function getDefaultDictionary() {
|
|
|
196590
196594
|
var cached = null;
|
|
196591
196595
|
function loadFrequencyDictionary() {
|
|
196592
196596
|
if (!cached) {
|
|
196593
|
-
const entries =
|
|
196594
|
-
|
|
196597
|
+
const entries = Object.freeze(
|
|
196598
|
+
khmer_words_default.map(
|
|
196599
|
+
(entry) => Object.freeze({
|
|
196600
|
+
word: entry.word,
|
|
196601
|
+
freq: entry.freq
|
|
196602
|
+
})
|
|
196603
|
+
)
|
|
196604
|
+
);
|
|
196605
|
+
const words = Object.freeze(entries.map((entry) => entry.word));
|
|
196595
196606
|
const frequencies = new Map(
|
|
196596
|
-
entries.map((
|
|
196607
|
+
entries.map((entry) => [entry.word, entry.freq])
|
|
196597
196608
|
);
|
|
196598
|
-
cached = { words, entries, frequencies };
|
|
196609
|
+
cached = Object.freeze({ words, entries, frequencies });
|
|
196599
196610
|
}
|
|
196600
|
-
return
|
|
196611
|
+
return {
|
|
196612
|
+
words: [...cached.words],
|
|
196613
|
+
entries: cached.entries.map((entry) => ({
|
|
196614
|
+
word: entry.word,
|
|
196615
|
+
freq: entry.freq
|
|
196616
|
+
})),
|
|
196617
|
+
frequencies: new Map(cached.frequencies)
|
|
196618
|
+
};
|
|
196601
196619
|
}
|
|
196602
196620
|
// Annotate the CommonJS export names for ESM import in node:
|
|
196603
196621
|
0 && (module.exports = {
|
|
@@ -19,6 +19,6 @@ interface FrequencyDictionary {
|
|
|
19
19
|
}
|
|
20
20
|
declare function loadFrequencyDictionary(): FrequencyDictionary;
|
|
21
21
|
|
|
22
|
-
declare function createDictionary(words: string[], frequencies?:
|
|
22
|
+
declare function createDictionary(words: string[], frequencies?: ReadonlyMap<string, number>): KhmerDictionary;
|
|
23
23
|
|
|
24
24
|
export { type DictionaryEntry, type FrequencyDictionary, type KhmerDictionary, createDictionary, getDefaultDictionary, loadFrequencyDictionary };
|
|
@@ -19,6 +19,6 @@ interface FrequencyDictionary {
|
|
|
19
19
|
}
|
|
20
20
|
declare function loadFrequencyDictionary(): FrequencyDictionary;
|
|
21
21
|
|
|
22
|
-
declare function createDictionary(words: string[], frequencies?:
|
|
22
|
+
declare function createDictionary(words: string[], frequencies?: ReadonlyMap<string, number>): KhmerDictionary;
|
|
23
23
|
|
|
24
24
|
export { type DictionaryEntry, type FrequencyDictionary, type KhmerDictionary, createDictionary, getDefaultDictionary, loadFrequencyDictionary };
|
package/dist/dictionary/index.js
CHANGED
|
@@ -59,15 +59,19 @@ var MemoryDictionary = class {
|
|
|
59
59
|
this.trie = new Trie();
|
|
60
60
|
this.reverseTrie = new Trie();
|
|
61
61
|
this.freqMap = frequencies ?? /* @__PURE__ */ new Map();
|
|
62
|
-
|
|
62
|
+
const uniqueWords = /* @__PURE__ */ new Set();
|
|
63
63
|
for (const word of words) {
|
|
64
|
+
if (word.length > 0) {
|
|
65
|
+
uniqueWords.add(word);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
for (const word of uniqueWords) {
|
|
64
69
|
if (word.length > 0) {
|
|
65
70
|
this.trie.insert(word);
|
|
66
71
|
this.reverseTrie.insert([...word].reverse().join(""));
|
|
67
|
-
count++;
|
|
68
72
|
}
|
|
69
73
|
}
|
|
70
|
-
this.size =
|
|
74
|
+
this.size = uniqueWords.size;
|
|
71
75
|
}
|
|
72
76
|
has(word) {
|
|
73
77
|
return this.trie.has(word);
|
|
@@ -196562,14 +196566,28 @@ function getDefaultDictionary() {
|
|
|
196562
196566
|
var cached = null;
|
|
196563
196567
|
function loadFrequencyDictionary() {
|
|
196564
196568
|
if (!cached) {
|
|
196565
|
-
const entries =
|
|
196566
|
-
|
|
196569
|
+
const entries = Object.freeze(
|
|
196570
|
+
khmer_words_default.map(
|
|
196571
|
+
(entry) => Object.freeze({
|
|
196572
|
+
word: entry.word,
|
|
196573
|
+
freq: entry.freq
|
|
196574
|
+
})
|
|
196575
|
+
)
|
|
196576
|
+
);
|
|
196577
|
+
const words = Object.freeze(entries.map((entry) => entry.word));
|
|
196567
196578
|
const frequencies = new Map(
|
|
196568
|
-
entries.map((
|
|
196579
|
+
entries.map((entry) => [entry.word, entry.freq])
|
|
196569
196580
|
);
|
|
196570
|
-
cached = { words, entries, frequencies };
|
|
196581
|
+
cached = Object.freeze({ words, entries, frequencies });
|
|
196571
196582
|
}
|
|
196572
|
-
return
|
|
196583
|
+
return {
|
|
196584
|
+
words: [...cached.words],
|
|
196585
|
+
entries: cached.entries.map((entry) => ({
|
|
196586
|
+
word: entry.word,
|
|
196587
|
+
freq: entry.freq
|
|
196588
|
+
})),
|
|
196589
|
+
frequencies: new Map(cached.frequencies)
|
|
196590
|
+
};
|
|
196573
196591
|
}
|
|
196574
196592
|
export {
|
|
196575
196593
|
createDictionary,
|
package/dist/index.cjs
CHANGED
|
@@ -447,15 +447,19 @@ var MemoryDictionary = class {
|
|
|
447
447
|
this.trie = new Trie();
|
|
448
448
|
this.reverseTrie = new Trie();
|
|
449
449
|
this.freqMap = frequencies ?? /* @__PURE__ */ new Map();
|
|
450
|
-
|
|
450
|
+
const uniqueWords = /* @__PURE__ */ new Set();
|
|
451
451
|
for (const word of words) {
|
|
452
|
+
if (word.length > 0) {
|
|
453
|
+
uniqueWords.add(word);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
for (const word of uniqueWords) {
|
|
452
457
|
if (word.length > 0) {
|
|
453
458
|
this.trie.insert(word);
|
|
454
459
|
this.reverseTrie.insert([...word].reverse().join(""));
|
|
455
|
-
count++;
|
|
456
460
|
}
|
|
457
461
|
}
|
|
458
|
-
this.size =
|
|
462
|
+
this.size = uniqueWords.size;
|
|
459
463
|
}
|
|
460
464
|
has(word) {
|
|
461
465
|
return this.trie.has(word);
|
package/dist/index.d.cts
CHANGED
|
@@ -14,7 +14,9 @@ declare function getClusterBoundaries(text: string): Array<{
|
|
|
14
14
|
|
|
15
15
|
interface SegmentToken {
|
|
16
16
|
value: string;
|
|
17
|
+
/** Zero-based start offset into `SegmentResult.normalized`. */
|
|
17
18
|
start: number;
|
|
19
|
+
/** Zero-based exclusive end offset into `SegmentResult.normalized`. */
|
|
18
20
|
end: number;
|
|
19
21
|
isKnown: boolean;
|
|
20
22
|
}
|
|
@@ -25,6 +27,7 @@ interface SegmentOptions {
|
|
|
25
27
|
}
|
|
26
28
|
interface SegmentResult {
|
|
27
29
|
original: string;
|
|
30
|
+
/** Normalized text used to compute token boundaries and offsets. */
|
|
28
31
|
normalized: string;
|
|
29
32
|
tokens: SegmentToken[];
|
|
30
33
|
}
|
|
@@ -49,6 +52,6 @@ interface KhmerDictionary {
|
|
|
49
52
|
|
|
50
53
|
declare function segmentWords(text: string, options?: SegmentOptions): SegmentResult;
|
|
51
54
|
|
|
52
|
-
declare function createDictionary(words: string[], frequencies?:
|
|
55
|
+
declare function createDictionary(words: string[], frequencies?: ReadonlyMap<string, number>): KhmerDictionary;
|
|
53
56
|
|
|
54
57
|
export { type KhmerDictionary, type SegmentOptions, type SegmentResult, type SegmentToken, type TypingComparisonResult, containsKhmer, countClusters, createDictionary, getClusterBoundaries, isKhmerChar, isKhmerText, normalizeKhmer, normalizeKhmerCluster, segmentWords, splitClusters };
|
package/dist/index.d.ts
CHANGED
|
@@ -14,7 +14,9 @@ declare function getClusterBoundaries(text: string): Array<{
|
|
|
14
14
|
|
|
15
15
|
interface SegmentToken {
|
|
16
16
|
value: string;
|
|
17
|
+
/** Zero-based start offset into `SegmentResult.normalized`. */
|
|
17
18
|
start: number;
|
|
19
|
+
/** Zero-based exclusive end offset into `SegmentResult.normalized`. */
|
|
18
20
|
end: number;
|
|
19
21
|
isKnown: boolean;
|
|
20
22
|
}
|
|
@@ -25,6 +27,7 @@ interface SegmentOptions {
|
|
|
25
27
|
}
|
|
26
28
|
interface SegmentResult {
|
|
27
29
|
original: string;
|
|
30
|
+
/** Normalized text used to compute token boundaries and offsets. */
|
|
28
31
|
normalized: string;
|
|
29
32
|
tokens: SegmentToken[];
|
|
30
33
|
}
|
|
@@ -49,6 +52,6 @@ interface KhmerDictionary {
|
|
|
49
52
|
|
|
50
53
|
declare function segmentWords(text: string, options?: SegmentOptions): SegmentResult;
|
|
51
54
|
|
|
52
|
-
declare function createDictionary(words: string[], frequencies?:
|
|
55
|
+
declare function createDictionary(words: string[], frequencies?: ReadonlyMap<string, number>): KhmerDictionary;
|
|
53
56
|
|
|
54
57
|
export { type KhmerDictionary, type SegmentOptions, type SegmentResult, type SegmentToken, type TypingComparisonResult, containsKhmer, countClusters, createDictionary, getClusterBoundaries, isKhmerChar, isKhmerText, normalizeKhmer, normalizeKhmerCluster, segmentWords, splitClusters };
|
package/dist/index.js
CHANGED
|
@@ -412,15 +412,19 @@ var MemoryDictionary = class {
|
|
|
412
412
|
this.trie = new Trie();
|
|
413
413
|
this.reverseTrie = new Trie();
|
|
414
414
|
this.freqMap = frequencies ?? /* @__PURE__ */ new Map();
|
|
415
|
-
|
|
415
|
+
const uniqueWords = /* @__PURE__ */ new Set();
|
|
416
416
|
for (const word of words) {
|
|
417
|
+
if (word.length > 0) {
|
|
418
|
+
uniqueWords.add(word);
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
for (const word of uniqueWords) {
|
|
417
422
|
if (word.length > 0) {
|
|
418
423
|
this.trie.insert(word);
|
|
419
424
|
this.reverseTrie.insert([...word].reverse().join(""));
|
|
420
|
-
count++;
|
|
421
425
|
}
|
|
422
426
|
}
|
|
423
|
-
this.size =
|
|
427
|
+
this.size = uniqueWords.size;
|
|
424
428
|
}
|
|
425
429
|
has(word) {
|
|
426
430
|
return this.trie.has(word);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "khmer-segment",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.2",
|
|
4
4
|
"description": "Khmer text segmentation, normalization, and cluster utilities for JavaScript and TypeScript.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -27,11 +27,12 @@
|
|
|
27
27
|
"build": "tsup",
|
|
28
28
|
"dev": "tsup --watch",
|
|
29
29
|
"test": "vitest run",
|
|
30
|
+
"test:perf": "vitest run --config vitest.perf.config.ts",
|
|
30
31
|
"test:watch": "vitest",
|
|
31
32
|
"lint": "tsc --noEmit",
|
|
32
33
|
"format": "prettier --write .",
|
|
33
34
|
"format:check": "prettier --check .",
|
|
34
|
-
"prepublishOnly": "npm run build && npm run test && npm run lint",
|
|
35
|
+
"prepublishOnly": "npm run build && npm run test && npm run lint && npm run format:check",
|
|
35
36
|
"playground:dev": "npm run dev --prefix playground",
|
|
36
37
|
"playground:build": "npm run build --prefix playground"
|
|
37
38
|
},
|