@indodev/toolkit 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -183
- package/dist/compare-B1MKSOWV.d.cts +938 -0
- package/dist/compare-B1MKSOWV.d.ts +938 -0
- package/dist/index.cjs +908 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +896 -1
- package/dist/index.js.map +1 -1
- package/dist/text/index.cjs +915 -0
- package/dist/text/index.cjs.map +1 -0
- package/dist/text/index.d.cts +284 -0
- package/dist/text/index.d.ts +284 -0
- package/dist/text/index.js +898 -0
- package/dist/text/index.js.map +1 -0
- package/package.json +19 -2
|
@@ -0,0 +1,938 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Options for title case conversion
|
|
3
|
+
*/
|
|
4
|
+
interface TitleCaseOptions {
|
|
5
|
+
/**
|
|
6
|
+
* Preserve known acronyms in uppercase (default: true)
|
|
7
|
+
* @example PT, CV, TNI, POLRI
|
|
8
|
+
*/
|
|
9
|
+
preserveAcronyms?: boolean;
|
|
10
|
+
/**
|
|
11
|
+
* Strict mode forces lowercase before capitalizing (default: false)
|
|
12
|
+
*/
|
|
13
|
+
strict?: boolean;
|
|
14
|
+
/**
|
|
15
|
+
* Additional words to keep lowercase (extends default list)
|
|
16
|
+
*/
|
|
17
|
+
exceptions?: string[];
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Options for abbreviation expansion
|
|
21
|
+
*/
|
|
22
|
+
interface ExpandOptions {
|
|
23
|
+
/**
|
|
24
|
+
* Filter abbreviations by category
|
|
25
|
+
* - 'all': Expand all abbreviations (default)
|
|
26
|
+
* - 'address': Only expand address abbreviations
|
|
27
|
+
* - 'title': Only expand title abbreviations
|
|
28
|
+
* - 'org': Only expand organization abbreviations
|
|
29
|
+
*/
|
|
30
|
+
mode?: 'all' | 'address' | 'title' | 'org';
|
|
31
|
+
/**
|
|
32
|
+
* Custom abbreviation mappings (overrides built-in)
|
|
33
|
+
*/
|
|
34
|
+
customMap?: Record<string, string>;
|
|
35
|
+
/**
|
|
36
|
+
* Preserve original case of expanded text (default: false)
|
|
37
|
+
*/
|
|
38
|
+
preserveCase?: boolean;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Options for slug generation
|
|
42
|
+
*/
|
|
43
|
+
interface SlugifyOptions {
|
|
44
|
+
/**
|
|
45
|
+
* Separator character (default: '-')
|
|
46
|
+
*/
|
|
47
|
+
separator?: string;
|
|
48
|
+
/**
|
|
49
|
+
* Convert to lowercase (default: true)
|
|
50
|
+
*/
|
|
51
|
+
lowercase?: boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Custom character replacements
|
|
54
|
+
*/
|
|
55
|
+
replacements?: Record<string, string>;
|
|
56
|
+
/**
|
|
57
|
+
* Trim leading/trailing separators (default: true)
|
|
58
|
+
*/
|
|
59
|
+
trim?: boolean;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Options for text sanitization
|
|
63
|
+
*/
|
|
64
|
+
interface SanitizeOptions {
|
|
65
|
+
/**
|
|
66
|
+
* Remove newline characters (default: false)
|
|
67
|
+
*/
|
|
68
|
+
removeNewlines?: boolean;
|
|
69
|
+
/**
|
|
70
|
+
* Remove extra spaces (default: true)
|
|
71
|
+
*/
|
|
72
|
+
removeExtraSpaces?: boolean;
|
|
73
|
+
/**
|
|
74
|
+
* Remove all punctuation (default: false)
|
|
75
|
+
*/
|
|
76
|
+
removePunctuation?: boolean;
|
|
77
|
+
/**
|
|
78
|
+
* Only allow specific characters (regex pattern)
|
|
79
|
+
*/
|
|
80
|
+
allowedChars?: string;
|
|
81
|
+
/**
|
|
82
|
+
* Trim leading/trailing whitespace (default: true)
|
|
83
|
+
*/
|
|
84
|
+
trim?: boolean;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Options for string comparison
|
|
88
|
+
*/
|
|
89
|
+
interface CompareOptions {
|
|
90
|
+
/**
|
|
91
|
+
* Case-sensitive comparison (default: false)
|
|
92
|
+
*/
|
|
93
|
+
caseSensitive?: boolean;
|
|
94
|
+
/**
|
|
95
|
+
* Ignore whitespace differences (default: false)
|
|
96
|
+
*/
|
|
97
|
+
ignoreWhitespace?: boolean;
|
|
98
|
+
/**
|
|
99
|
+
* Ignore accent/diacritic marks (default: false)
|
|
100
|
+
*/
|
|
101
|
+
ignoreAccents?: boolean;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Options for word extraction
|
|
105
|
+
*/
|
|
106
|
+
interface ExtractOptions {
|
|
107
|
+
/**
|
|
108
|
+
* Minimum word length to include
|
|
109
|
+
*/
|
|
110
|
+
minLength?: number;
|
|
111
|
+
/**
|
|
112
|
+
* Treat hyphenated words as single word (default: true)
|
|
113
|
+
* @example 'anak-anak' is one word
|
|
114
|
+
*/
|
|
115
|
+
includeHyphenated?: boolean;
|
|
116
|
+
/**
|
|
117
|
+
* Convert extracted words to lowercase (default: false)
|
|
118
|
+
*/
|
|
119
|
+
lowercase?: boolean;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Options for text truncation
|
|
123
|
+
*/
|
|
124
|
+
interface TruncateOptions {
|
|
125
|
+
/**
|
|
126
|
+
* Ellipsis string (default: '...')
|
|
127
|
+
*/
|
|
128
|
+
ellipsis?: string;
|
|
129
|
+
/**
|
|
130
|
+
* Truncate at word boundary (default: true)
|
|
131
|
+
*/
|
|
132
|
+
wordBoundary?: boolean;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Capitalize the first letter of a string and lowercase the rest
|
|
137
|
+
*
|
|
138
|
+
* This function converts the first character to uppercase and all remaining
|
|
139
|
+
* characters to lowercase. It handles empty strings, Unicode characters,
|
|
140
|
+
* and multi-word strings (only first word is affected).
|
|
141
|
+
*
|
|
142
|
+
* @param text - The text to capitalize
|
|
143
|
+
* @returns The capitalized text
|
|
144
|
+
*
|
|
145
|
+
* @example
|
|
146
|
+
* Basic usage:
|
|
147
|
+
* ```typescript
|
|
148
|
+
* capitalize('joko') // → 'Joko'
|
|
149
|
+
* capitalize('JOKO') // → 'Joko'
|
|
150
|
+
* capitalize('jOKO') // → 'Joko'
|
|
151
|
+
* ```
|
|
152
|
+
*
|
|
153
|
+
* @example
|
|
154
|
+
* Multi-word strings (only first word capitalized):
|
|
155
|
+
* ```typescript
|
|
156
|
+
* capitalize('joko widodo') // → 'Joko widodo'
|
|
157
|
+
* capitalize('JOKO WIDODO') // → 'Joko widodo'
|
|
158
|
+
* ```
|
|
159
|
+
*
|
|
160
|
+
* @example
|
|
161
|
+
* Edge cases:
|
|
162
|
+
* ```typescript
|
|
163
|
+
* capitalize('') // → ''
|
|
164
|
+
* capitalize('a') // → 'A'
|
|
165
|
+
* capitalize('123abc') // → '123abc'
|
|
166
|
+
* ```
|
|
167
|
+
*
|
|
168
|
+
* @public
|
|
169
|
+
*/
|
|
170
|
+
declare function capitalize(text: string): string;
|
|
171
|
+
/**
|
|
172
|
+
* Convert text to title case following Indonesian grammar rules
|
|
173
|
+
*
|
|
174
|
+
* This function capitalizes the first letter of each word while respecting
|
|
175
|
+
* Indonesian language conventions:
|
|
176
|
+
* - Keeps particles lowercase (di, ke, dari, untuk, dan, etc.)
|
|
177
|
+
* - Preserves known acronyms in uppercase (PT, CV, TNI, DKI, etc.)
|
|
178
|
+
* - Handles hyphenated words correctly (anak-anak → Anak-Anak)
|
|
179
|
+
* - Normalizes whitespace automatically
|
|
180
|
+
*
|
|
181
|
+
* @param text - The text to convert to title case
|
|
182
|
+
* @param options - Optional configuration
|
|
183
|
+
* @returns The title-cased text with proper Indonesian grammar
|
|
184
|
+
*
|
|
185
|
+
* @example
|
|
186
|
+
* Basic usage:
|
|
187
|
+
* ```typescript
|
|
188
|
+
* toTitleCase('joko widodo')
|
|
189
|
+
* // → 'Joko Widodo'
|
|
190
|
+
*
|
|
191
|
+
* toTitleCase('JOKO WIDODO')
|
|
192
|
+
* // → 'Joko Widodo'
|
|
193
|
+
* ```
|
|
194
|
+
*
|
|
195
|
+
* @example
|
|
196
|
+
* Indonesian particles (kept lowercase):
|
|
197
|
+
* ```typescript
|
|
198
|
+
* toTitleCase('buku untuk anak dan orang tua')
|
|
199
|
+
* // → 'Buku untuk Anak dan Orang Tua'
|
|
200
|
+
*
|
|
201
|
+
* toTitleCase('dari jakarta ke bandung')
|
|
202
|
+
* // → 'Dari Jakarta ke Bandung'
|
|
203
|
+
* // (first word always capitalized)
|
|
204
|
+
* ```
|
|
205
|
+
*
|
|
206
|
+
* @example
|
|
207
|
+
* Acronyms (preserved in uppercase):
|
|
208
|
+
* ```typescript
|
|
209
|
+
* toTitleCase('pt bank bca tbk')
|
|
210
|
+
* // → 'PT Bank BCA Tbk'
|
|
211
|
+
*
|
|
212
|
+
* toTitleCase('dki jakarta')
|
|
213
|
+
* // → 'DKI Jakarta'
|
|
214
|
+
*
|
|
215
|
+
* toTitleCase('tni angkatan darat')
|
|
216
|
+
* // → 'TNI Angkatan Darat'
|
|
217
|
+
* ```
|
|
218
|
+
*
|
|
219
|
+
* @example
|
|
220
|
+
* Hyphenated words:
|
|
221
|
+
* ```typescript
|
|
222
|
+
* toTitleCase('anak-anak bermain')
|
|
223
|
+
* // → 'Anak-Anak Bermain'
|
|
224
|
+
*
|
|
225
|
+
* toTitleCase('makan-makan di rumah')
|
|
226
|
+
* // → 'Makan-Makan di Rumah'
|
|
227
|
+
* ```
|
|
228
|
+
*
|
|
229
|
+
* @example
|
|
230
|
+
* With options:
|
|
231
|
+
* ```typescript
|
|
232
|
+
* toTitleCase('PT BCA', { preserveAcronyms: false })
|
|
233
|
+
* // → 'Pt Bca'
|
|
234
|
+
*
|
|
235
|
+
* toTitleCase('mobil dari jepang', { exceptions: ['jepang'] })
|
|
236
|
+
* // → 'Mobil dari jepang'
|
|
237
|
+
*
|
|
238
|
+
* toTitleCase('HELLO WORLD', { strict: true })
|
|
239
|
+
* // → 'Hello World'
|
|
240
|
+
* ```
|
|
241
|
+
*
|
|
242
|
+
* @public
|
|
243
|
+
*/
|
|
244
|
+
declare function toTitleCase(text: string, options?: TitleCaseOptions): string;
|
|
245
|
+
/**
|
|
246
|
+
* Convert text to sentence case (capitalize first letter of sentences only)
|
|
247
|
+
*
|
|
248
|
+
* This function capitalizes the first character of the text and the first
|
|
249
|
+
* character after sentence-ending punctuation (. ! ?), while keeping
|
|
250
|
+
* everything else in lowercase.
|
|
251
|
+
*
|
|
252
|
+
* **Sentence Detection Rules:**
|
|
253
|
+
* - Period (.), exclamation (!), question mark (?) mark sentence endings
|
|
254
|
+
* - Next letter after punctuation + space is capitalized
|
|
255
|
+
* - Handles multiple spaces and newlines
|
|
256
|
+
* - Does NOT treat abbreviations as sentence endings (e.g., "Dr. Smith")
|
|
257
|
+
*
|
|
258
|
+
* @param text - The text to convert to sentence case
|
|
259
|
+
* @returns The sentence-cased text
|
|
260
|
+
*
|
|
261
|
+
* @example
|
|
262
|
+
* Basic usage:
|
|
263
|
+
* ```typescript
|
|
264
|
+
* toSentenceCase('JOKO WIDODO ADALAH PRESIDEN')
|
|
265
|
+
* // → 'Joko widodo adalah presiden'
|
|
266
|
+
*
|
|
267
|
+
* toSentenceCase('joko widodo adalah presiden')
|
|
268
|
+
* // → 'Joko widodo adalah presiden'
|
|
269
|
+
* ```
|
|
270
|
+
*
|
|
271
|
+
* @example
|
|
272
|
+
* Multiple sentences:
|
|
273
|
+
* ```typescript
|
|
274
|
+
* toSentenceCase('halo, apa kabar? baik-baik saja.')
|
|
275
|
+
* // → 'Halo, apa kabar? Baik-baik saja.'
|
|
276
|
+
*
|
|
277
|
+
* toSentenceCase('jakarta. surabaya. bandung.')
|
|
278
|
+
* // → 'Jakarta. Surabaya. Bandung.'
|
|
279
|
+
* ```
|
|
280
|
+
*
|
|
281
|
+
* @example
|
|
282
|
+
* Different punctuation:
|
|
283
|
+
* ```typescript
|
|
284
|
+
* toSentenceCase('wow! amazing! fantastic!')
|
|
285
|
+
* // → 'Wow! Amazing! Fantastic!'
|
|
286
|
+
*
|
|
287
|
+
* toSentenceCase('siapa nama anda? saya joko.')
|
|
288
|
+
* // → 'Siapa nama anda? Saya joko.'
|
|
289
|
+
* ```
|
|
290
|
+
*
|
|
291
|
+
* @example
|
|
292
|
+
* Edge cases:
|
|
293
|
+
* ```typescript
|
|
294
|
+
* toSentenceCase('')
|
|
295
|
+
* // → ''
|
|
296
|
+
*
|
|
297
|
+
* toSentenceCase('hello')
|
|
298
|
+
* // → 'Hello'
|
|
299
|
+
*
|
|
300
|
+
* toSentenceCase(' hello. world. ')
|
|
301
|
+
* // → 'Hello. World.'
|
|
302
|
+
* ```
|
|
303
|
+
*
|
|
304
|
+
* @public
|
|
305
|
+
*/
|
|
306
|
+
declare function toSentenceCase(text: string): string;
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Generate URL-safe slugs with Indonesian language support
|
|
310
|
+
*
|
|
311
|
+
* This function converts text into URL-friendly slugs by:
|
|
312
|
+
* - Converting to lowercase (configurable)
|
|
313
|
+
* - Replacing spaces with separators (default: hyphen)
|
|
314
|
+
* - Replacing Indonesian conjunctions (& → dan, / → atau)
|
|
315
|
+
* - Removing special characters
|
|
316
|
+
* - Collapsing multiple separators
|
|
317
|
+
* - Trimming leading/trailing separators
|
|
318
|
+
*
|
|
319
|
+
* **Character Handling:**
|
|
320
|
+
* - Alphanumeric (a-z, A-Z, 0-9): Preserved
|
|
321
|
+
* - Spaces: Replaced with separator
|
|
322
|
+
* - Ampersand (&): Replaced with "dan"
|
|
323
|
+
* - Slash (/): Replaced with "atau"
|
|
324
|
+
* - Hyphens (-): Preserved as separators
|
|
325
|
+
* - Other special chars: Removed
|
|
326
|
+
*
|
|
327
|
+
* @param text - The text to convert to slug
|
|
328
|
+
* @param options - Optional configuration
|
|
329
|
+
* @returns The URL-safe slug
|
|
330
|
+
*
|
|
331
|
+
* @example
|
|
332
|
+
* Basic usage:
|
|
333
|
+
* ```typescript
|
|
334
|
+
* slugify('Cara Mudah Belajar TypeScript')
|
|
335
|
+
* // → 'cara-mudah-belajar-typescript'
|
|
336
|
+
*
|
|
337
|
+
* slugify('HELLO WORLD')
|
|
338
|
+
* // → 'hello-world'
|
|
339
|
+
* ```
|
|
340
|
+
*
|
|
341
|
+
* @example
|
|
342
|
+
* Indonesian conjunctions:
|
|
343
|
+
* ```typescript
|
|
344
|
+
* slugify('Ibu & Anak: Tips Kesehatan')
|
|
345
|
+
* // → 'ibu-dan-anak-tips-kesehatan'
|
|
346
|
+
*
|
|
347
|
+
* slugify('Baju Pria/Wanita')
|
|
348
|
+
* // → 'baju-pria-atau-wanita'
|
|
349
|
+
*
|
|
350
|
+
* slugify('A & B / C')
|
|
351
|
+
* // → 'a-dan-b-atau-c'
|
|
352
|
+
* ```
|
|
353
|
+
*
|
|
354
|
+
* @example
|
|
355
|
+
* Special characters removed:
|
|
356
|
+
* ```typescript
|
|
357
|
+
* slugify('Harga Rp 100.000 (Diskon 20%)')
|
|
358
|
+
* // → 'harga-rp-100000-diskon-20'
|
|
359
|
+
*
|
|
360
|
+
* slugify('Email: test@example.com')
|
|
361
|
+
* // → 'email-testexamplecom'
|
|
362
|
+
* ```
|
|
363
|
+
*
|
|
364
|
+
* @example
|
|
365
|
+
* Multiple spaces/separators collapsed:
|
|
366
|
+
* ```typescript
|
|
367
|
+
* slugify('Produk Terbaru - - - 2024')
|
|
368
|
+
* // → 'produk-terbaru-2024'
|
|
369
|
+
*
|
|
370
|
+
* slugify(' Hello World ')
|
|
371
|
+
* // → 'hello-world'
|
|
372
|
+
* ```
|
|
373
|
+
*
|
|
374
|
+
* @example
|
|
375
|
+
* With options:
|
|
376
|
+
* ```typescript
|
|
377
|
+
* slugify('Hello World', { separator: '_' })
|
|
378
|
+
* // → 'hello_world'
|
|
379
|
+
*
|
|
380
|
+
* slugify('Hello World', { lowercase: false })
|
|
381
|
+
* // → 'Hello-World'
|
|
382
|
+
*
|
|
383
|
+
* slugify('C++ Programming', {
|
|
384
|
+
* replacements: { 'C++': 'cpp' }
|
|
385
|
+
* })
|
|
386
|
+
* // → 'cpp-programming'
|
|
387
|
+
*
|
|
388
|
+
* slugify('Hello-World', { trim: false })
|
|
389
|
+
* // → 'hello-world' (same, but won't trim if leading/trailing)
|
|
390
|
+
* ```
|
|
391
|
+
*
|
|
392
|
+
* @public
|
|
393
|
+
*/
|
|
394
|
+
declare function slugify(text: string, options?: SlugifyOptions): string;
|
|
395
|
+
|
|
396
|
+
/**
|
|
397
|
+
* Normalize all whitespace characters to single spaces
|
|
398
|
+
*
|
|
399
|
+
* This function:
|
|
400
|
+
* - Collapses multiple spaces into one
|
|
401
|
+
* - Converts tabs, newlines, and other whitespace to single space
|
|
402
|
+
* - Trims leading and trailing whitespace
|
|
403
|
+
* - Handles Unicode whitespace characters
|
|
404
|
+
*
|
|
405
|
+
* **Whitespace Characters Normalized:**
|
|
406
|
+
* - Space (` `)
|
|
407
|
+
* - Tab (`\t`)
|
|
408
|
+
* - Newline (`\n`)
|
|
409
|
+
* - Carriage return (`\r`)
|
|
410
|
+
* - Form feed (`\f`)
|
|
411
|
+
* - Vertical tab (`\v`)
|
|
412
|
+
* - Non-breaking space (`\u00A0`)
|
|
413
|
+
* - Other Unicode spaces
|
|
414
|
+
*
|
|
415
|
+
* @param text - The text to normalize
|
|
416
|
+
* @returns Text with normalized whitespace
|
|
417
|
+
*
|
|
418
|
+
* @example
|
|
419
|
+
* Basic usage:
|
|
420
|
+
* ```typescript
|
|
421
|
+
* normalizeWhitespace('hello world')
|
|
422
|
+
* // → 'hello world'
|
|
423
|
+
*
|
|
424
|
+
* normalizeWhitespace('hello\tworld')
|
|
425
|
+
* // → 'hello world'
|
|
426
|
+
* ```
|
|
427
|
+
*
|
|
428
|
+
* @example
|
|
429
|
+
* Multiple types of whitespace:
|
|
430
|
+
* ```typescript
|
|
431
|
+
* normalizeWhitespace('hello\n\nworld')
|
|
432
|
+
* // → 'hello world'
|
|
433
|
+
*
|
|
434
|
+
* normalizeWhitespace('hello\r\nworld')
|
|
435
|
+
* // → 'hello world'
|
|
436
|
+
*
|
|
437
|
+
* normalizeWhitespace('line1\n\nline2\tword')
|
|
438
|
+
* // → 'line1 line2 word'
|
|
439
|
+
* ```
|
|
440
|
+
*
|
|
441
|
+
* @example
|
|
442
|
+
* Leading and trailing whitespace:
|
|
443
|
+
* ```typescript
|
|
444
|
+
* normalizeWhitespace(' hello world ')
|
|
445
|
+
* // → 'hello world'
|
|
446
|
+
*
|
|
447
|
+
* normalizeWhitespace('\n\thello\t\n')
|
|
448
|
+
* // → 'hello'
|
|
449
|
+
* ```
|
|
450
|
+
*
|
|
451
|
+
* @example
|
|
452
|
+
* Edge cases:
|
|
453
|
+
* ```typescript
|
|
454
|
+
* normalizeWhitespace('')
|
|
455
|
+
* // → ''
|
|
456
|
+
*
|
|
457
|
+
* normalizeWhitespace(' ')
|
|
458
|
+
* // → ''
|
|
459
|
+
*
|
|
460
|
+
* normalizeWhitespace('hello')
|
|
461
|
+
* // → 'hello'
|
|
462
|
+
* ```
|
|
463
|
+
*
|
|
464
|
+
* @public
|
|
465
|
+
*/
|
|
466
|
+
declare function normalizeWhitespace(text: string): string;
|
|
467
|
+
/**
|
|
468
|
+
* Remove or replace unwanted characters from text
|
|
469
|
+
*
|
|
470
|
+
* This function provides flexible text sanitization with options to:
|
|
471
|
+
* - Remove newlines
|
|
472
|
+
* - Remove extra spaces
|
|
473
|
+
* - Remove punctuation
|
|
474
|
+
* - Keep only allowed characters
|
|
475
|
+
* - Trim leading/trailing whitespace
|
|
476
|
+
*
|
|
477
|
+
* @param text - The text to sanitize
|
|
478
|
+
* @param options - Sanitization options
|
|
479
|
+
* @returns The sanitized text
|
|
480
|
+
*
|
|
481
|
+
* @example
|
|
482
|
+
* Remove extra spaces (default):
|
|
483
|
+
* ```typescript
|
|
484
|
+
* sanitize('hello world')
|
|
485
|
+
* // → 'hello world'
|
|
486
|
+
* ```
|
|
487
|
+
*
|
|
488
|
+
* @example
|
|
489
|
+
* Remove newlines:
|
|
490
|
+
* ```typescript
|
|
491
|
+
* sanitize('line1\nline2\nline3', { removeNewlines: true })
|
|
492
|
+
* // → 'line1 line2 line3'
|
|
493
|
+
* ```
|
|
494
|
+
*
|
|
495
|
+
* @example
|
|
496
|
+
* Remove punctuation:
|
|
497
|
+
* ```typescript
|
|
498
|
+
* sanitize('Hello, World!', { removePunctuation: true })
|
|
499
|
+
* // → 'Hello World'
|
|
500
|
+
* ```
|
|
501
|
+
*
|
|
502
|
+
* @example
|
|
503
|
+
* Allow only specific characters:
|
|
504
|
+
* ```typescript
|
|
505
|
+
* sanitize('ABC123!@#', { allowedChars: 'A-Za-z0-9' })
|
|
506
|
+
* // → 'ABC123'
|
|
507
|
+
*
|
|
508
|
+
* sanitize('Hello123!@#', { allowedChars: 'a-z' })
|
|
509
|
+
* // → 'ello'
|
|
510
|
+
* ```
|
|
511
|
+
*
|
|
512
|
+
* @example
|
|
513
|
+
* Combined options:
|
|
514
|
+
* ```typescript
|
|
515
|
+
* sanitize(' Hello,\n World! ', {
|
|
516
|
+
* removeNewlines: true,
|
|
517
|
+
* removePunctuation: true,
|
|
518
|
+
* removeExtraSpaces: true,
|
|
519
|
+
* trim: true
|
|
520
|
+
* })
|
|
521
|
+
* // → 'Hello World'
|
|
522
|
+
* ```
|
|
523
|
+
*
|
|
524
|
+
* @public
|
|
525
|
+
*/
|
|
526
|
+
declare function sanitize(text: string, options?: SanitizeOptions): string;
|
|
527
|
+
/**
|
|
528
|
+
* Remove diacritical marks (accents) from characters
|
|
529
|
+
*
|
|
530
|
+
* Converts accented characters to their base form:
|
|
531
|
+
* - é → e
|
|
532
|
+
* - ñ → n
|
|
533
|
+
* - ü → u
|
|
534
|
+
* - etc.
|
|
535
|
+
*
|
|
536
|
+
* Useful for:
|
|
537
|
+
* - Search normalization
|
|
538
|
+
* - Sorting/comparison
|
|
539
|
+
* - URL generation
|
|
540
|
+
* - Database queries
|
|
541
|
+
*
|
|
542
|
+
* @param text - The text to remove accents from
|
|
543
|
+
* @returns Text with accents removed
|
|
544
|
+
*
|
|
545
|
+
* @example
|
|
546
|
+
* Basic usage:
|
|
547
|
+
* ```typescript
|
|
548
|
+
* removeAccents('café')
|
|
549
|
+
* // → 'cafe'
|
|
550
|
+
*
|
|
551
|
+
* removeAccents('résumé')
|
|
552
|
+
* // → 'resume'
|
|
553
|
+
* ```
|
|
554
|
+
*
|
|
555
|
+
* @example
|
|
556
|
+
* Various accents:
|
|
557
|
+
* ```typescript
|
|
558
|
+
* removeAccents('naïve')
|
|
559
|
+
* // → 'naive'
|
|
560
|
+
*
|
|
561
|
+
* removeAccents('Zürich')
|
|
562
|
+
* // → 'Zurich'
|
|
563
|
+
*
|
|
564
|
+
* removeAccents('São Paulo')
|
|
565
|
+
* // → 'Sao Paulo'
|
|
566
|
+
* ```
|
|
567
|
+
*
|
|
568
|
+
* @example
|
|
569
|
+
* Mixed text:
|
|
570
|
+
* ```typescript
|
|
571
|
+
* removeAccents('École française à Montréal')
|
|
572
|
+
* // → 'Ecole francaise a Montreal'
|
|
573
|
+
* ```
|
|
574
|
+
*
|
|
575
|
+
* @public
|
|
576
|
+
*/
|
|
577
|
+
declare function removeAccents(text: string): string;
|
|
578
|
+
|
|
579
|
+
/**
|
|
580
|
+
* Expand Indonesian abbreviations to their full form
|
|
581
|
+
*
|
|
582
|
+
* This function expands common Indonesian abbreviations like:
|
|
583
|
+
* - Address: Jl. → Jalan, Kec. → Kecamatan
|
|
584
|
+
* - Titles: Dr. → Doktor, S.H. → Sarjana Hukum
|
|
585
|
+
* - Honorifics: Bpk. → Bapak, Yth. → Yang Terhormat
|
|
586
|
+
* - Organizations: PT. → Perseroan Terbatas
|
|
587
|
+
* - Common: dll. → dan lain-lain
|
|
588
|
+
*
|
|
589
|
+
* **Features:**
|
|
590
|
+
* - Case-insensitive matching (Jl. = jl. = JL.)
|
|
591
|
+
* - Mode filtering (all, address, title, org)
|
|
592
|
+
* - Custom mapping support
|
|
593
|
+
* - Preserves surrounding text
|
|
594
|
+
* - Multiple abbreviations in one string
|
|
595
|
+
*
|
|
596
|
+
* @param text - The text containing abbreviations to expand
|
|
597
|
+
* @param options - Optional configuration
|
|
598
|
+
* @returns Text with abbreviations expanded
|
|
599
|
+
*
|
|
600
|
+
* @example
|
|
601
|
+
* Basic usage:
|
|
602
|
+
* ```typescript
|
|
603
|
+
* expandAbbreviation('Jl. Sudirman No. 123')
|
|
604
|
+
* // → 'Jalan Sudirman Nomor 123'
|
|
605
|
+
*
|
|
606
|
+
* expandAbbreviation('Dr. Joko Widodo, S.H.')
|
|
607
|
+
* // → 'Doktor Joko Widodo, Sarjana Hukum'
|
|
608
|
+
* ```
|
|
609
|
+
*
|
|
610
|
+
* @example
|
|
611
|
+
* Address abbreviations:
|
|
612
|
+
* ```typescript
|
|
613
|
+
* expandAbbreviation('Kab. Bogor, Kec. Ciawi')
|
|
614
|
+
* // → 'Kabupaten Bogor, Kecamatan Ciawi'
|
|
615
|
+
*
|
|
616
|
+
* expandAbbreviation('Jl. Merdeka Gg. 5 No. 10')
|
|
617
|
+
* // → 'Jalan Merdeka Gang 5 Nomor 10'
|
|
618
|
+
* ```
|
|
619
|
+
*
|
|
620
|
+
* @example
|
|
621
|
+
* Academic titles:
|
|
622
|
+
* ```typescript
|
|
623
|
+
* expandAbbreviation('Prof. Dr. Ir. Ahmad')
|
|
624
|
+
* // → 'Profesor Doktor Insinyur Ahmad'
|
|
625
|
+
*
|
|
626
|
+
* expandAbbreviation('Saya lulusan S.T. dari ITB')
|
|
627
|
+
* // → 'Saya lulusan Sarjana Teknik dari ITB'
|
|
628
|
+
* ```
|
|
629
|
+
*
|
|
630
|
+
* @example
|
|
631
|
+
* Honorifics:
|
|
632
|
+
* ```typescript
|
|
633
|
+
* expandAbbreviation('Yth. Bpk. H. Ahmad')
|
|
634
|
+
* // → 'Yang Terhormat Bapak Haji Ahmad'
|
|
635
|
+
* ```
|
|
636
|
+
*
|
|
637
|
+
* @example
|
|
638
|
+
* Organizations:
|
|
639
|
+
* ```typescript
|
|
640
|
+
* expandAbbreviation('PT. Maju Jaya Tbk.')
|
|
641
|
+
* // → 'Perseroan Terbatas Maju Jaya Terbuka'
|
|
642
|
+
* ```
|
|
643
|
+
*
|
|
644
|
+
* @example
|
|
645
|
+
* Mode filtering:
|
|
646
|
+
* ```typescript
|
|
647
|
+
* expandAbbreviation('Dr. Joko di Jl. Sudirman', { mode: 'address' })
|
|
648
|
+
* // → 'Dr. Joko di Jalan Sudirman'
|
|
649
|
+
* // Only expands address abbreviations
|
|
650
|
+
*
|
|
651
|
+
* expandAbbreviation('Prof. Dr. di Jl. Sudirman', { mode: 'title' })
|
|
652
|
+
* // → 'Profesor Doktor di Jl. Sudirman'
|
|
653
|
+
* // Only expands title abbreviations
|
|
654
|
+
* ```
|
|
655
|
+
*
|
|
656
|
+
* @example
|
|
657
|
+
* Custom mappings:
|
|
658
|
+
* ```typescript
|
|
659
|
+
* expandAbbreviation('BUMN adalah perusahaan negara', {
|
|
660
|
+
* customMap: { 'BUMN': 'Badan Usaha Milik Negara' }
|
|
661
|
+
* })
|
|
662
|
+
* // → 'Badan Usaha Milik Negara adalah perusahaan negara'
|
|
663
|
+
* ```
|
|
664
|
+
*
|
|
665
|
+
* @example
|
|
666
|
+
* Case sensitivity:
|
|
667
|
+
* ```typescript
|
|
668
|
+
* expandAbbreviation('jl. sudirman')
|
|
669
|
+
* // → 'Jalan sudirman' (default: preserves surrounding case)
|
|
670
|
+
*
|
|
671
|
+
* expandAbbreviation('JL. SUDIRMAN')
|
|
672
|
+
* // → 'Jalan SUDIRMAN'
|
|
673
|
+
* ```
|
|
674
|
+
*
|
|
675
|
+
* @public
|
|
676
|
+
*/
|
|
677
|
+
declare function expandAbbreviation(text: string, options?: ExpandOptions): string;
|
|
678
|
+
/**
|
|
679
|
+
* Contract full forms to abbreviations (reverse of expand)
|
|
680
|
+
*
|
|
681
|
+
* @param text - The text containing full forms to contract
|
|
682
|
+
* @param options - Optional configuration
|
|
683
|
+
* @returns Text with full forms contracted
|
|
684
|
+
*
|
|
685
|
+
* @example
|
|
686
|
+
* ```typescript
|
|
687
|
+
* contractAbbreviation('Jalan Sudirman Nomor 123')
|
|
688
|
+
* // → 'Jl. Sudirman No. 123'
|
|
689
|
+
*
|
|
690
|
+
* contractAbbreviation('Doktor Ahmad, Sarjana Hukum')
|
|
691
|
+
* // → 'Dr. Ahmad, S.H.'
|
|
692
|
+
* ```
|
|
693
|
+
*
|
|
694
|
+
* @public
|
|
695
|
+
*/
|
|
696
|
+
declare function contractAbbreviation(text: string, options?: {
|
|
697
|
+
mode?: 'all' | 'address' | 'title' | 'org';
|
|
698
|
+
}): string;
|
|
699
|
+
|
|
700
|
+
/**
|
|
701
|
+
* Truncate text to specified length, word-aware
|
|
702
|
+
*
|
|
703
|
+
* This function shortens text to a maximum length while:
|
|
704
|
+
* - Respecting word boundaries (don't cut words in half)
|
|
705
|
+
* - Adding ellipsis to indicate truncation
|
|
706
|
+
* - Preserving original text if already short enough
|
|
707
|
+
* - Accounting for ellipsis length in total character count
|
|
708
|
+
*
|
|
709
|
+
* **Features:**
|
|
710
|
+
* - Smart word boundary detection
|
|
711
|
+
* - Customizable ellipsis
|
|
712
|
+
* - No truncation for short text
|
|
713
|
+
* - Handles edge cases gracefully
|
|
714
|
+
*
|
|
715
|
+
* @param text - The text to truncate
|
|
716
|
+
* @param maxLength - Maximum length of output (including ellipsis)
|
|
717
|
+
* @param options - Optional configuration
|
|
718
|
+
* @returns The truncated text with ellipsis if needed
|
|
719
|
+
*
|
|
720
|
+
* @example
|
|
721
|
+
* Basic usage:
|
|
722
|
+
* ```typescript
|
|
723
|
+
* truncate('Ini adalah contoh text yang panjang', 20)
|
|
724
|
+
* // → 'Ini adalah contoh...'
|
|
725
|
+
*
|
|
726
|
+
* truncate('Short text', 20)
|
|
727
|
+
* // → 'Short text' (no truncation needed)
|
|
728
|
+
* ```
|
|
729
|
+
*
|
|
730
|
+
* @example
|
|
731
|
+
* Word boundary handling:
|
|
732
|
+
* ```typescript
|
|
733
|
+
* truncate('Ini adalah contoh text yang panjang', 20, { wordBoundary: true })
|
|
734
|
+
* // → 'Ini adalah contoh...' (stops at word)
|
|
735
|
+
*
|
|
736
|
+
* truncate('Ini adalah contoh text yang panjang', 20, { wordBoundary: false })
|
|
737
|
+
* // → 'Ini adalah contoh t...' (cuts mid-word)
|
|
738
|
+
* ```
|
|
739
|
+
*
|
|
740
|
+
* @example
|
|
741
|
+
* Custom ellipsis:
|
|
742
|
+
* ```typescript
|
|
743
|
+
* truncate('Ini adalah contoh text yang panjang', 20, { ellipsis: '…' })
|
|
744
|
+
* // → 'Ini adalah contoh…'
|
|
745
|
+
*
|
|
746
|
+
* truncate('Ini adalah contoh text yang panjang', 20, { ellipsis: ' [...]' })
|
|
747
|
+
* // → 'Ini adalah [...]'
|
|
748
|
+
* ```
|
|
749
|
+
*
|
|
750
|
+
* @example
|
|
751
|
+
* Edge cases:
|
|
752
|
+
* ```typescript
|
|
753
|
+
* truncate('', 10)
|
|
754
|
+
* // → ''
|
|
755
|
+
*
|
|
756
|
+
* truncate('Hello', 10)
|
|
757
|
+
* // → 'Hello'
|
|
758
|
+
*
|
|
759
|
+
* truncate('Hello World', 11)
|
|
760
|
+
* // → 'Hello World' (exact length, no ellipsis)
|
|
761
|
+
* ```
|
|
762
|
+
*
|
|
763
|
+
* @public
|
|
764
|
+
*/
|
|
765
|
+
declare function truncate(text: string, maxLength: number, options?: TruncateOptions): string;
|
|
766
|
+
/**
|
|
767
|
+
* Extract words from text, respecting Indonesian language rules
|
|
768
|
+
*
|
|
769
|
+
* This function splits text into individual words while:
|
|
770
|
+
* - Respecting hyphenated words (anak-anak as single word)
|
|
771
|
+
* - Filtering by minimum length
|
|
772
|
+
* - Optional lowercase conversion
|
|
773
|
+
* - Removing punctuation and special characters
|
|
774
|
+
*
|
|
775
|
+
* **Features:**
|
|
776
|
+
* - Indonesian hyphenation support (anak-anak, buku-buku)
|
|
777
|
+
* - Minimum word length filtering
|
|
778
|
+
* - Case normalization
|
|
779
|
+
* - Handles punctuation gracefully
|
|
780
|
+
*
|
|
781
|
+
* @param text - The text to extract words from
|
|
782
|
+
* @param options - Optional configuration
|
|
783
|
+
* @returns Array of extracted words
|
|
784
|
+
*
|
|
785
|
+
* @example
|
|
786
|
+
* Basic usage:
|
|
787
|
+
* ```typescript
|
|
788
|
+
* extractWords('Anak-anak bermain di taman')
|
|
789
|
+
* // → ['Anak-anak', 'bermain', 'di', 'taman']
|
|
790
|
+
*
|
|
791
|
+
* extractWords('Hello, World! How are you?')
|
|
792
|
+
* // → ['Hello', 'World', 'How', 'are', 'you']
|
|
793
|
+
* ```
|
|
794
|
+
*
|
|
795
|
+
* @example
|
|
796
|
+
* Hyphenated word handling:
|
|
797
|
+
* ```typescript
|
|
798
|
+
* extractWords('Anak-anak bermain di taman', { includeHyphenated: true })
|
|
799
|
+
* // → ['Anak-anak', 'bermain', 'di', 'taman']
|
|
800
|
+
*
|
|
801
|
+
* extractWords('Anak-anak bermain di taman', { includeHyphenated: false })
|
|
802
|
+
* // → ['Anak', 'anak', 'bermain', 'di', 'taman']
|
|
803
|
+
* ```
|
|
804
|
+
*
|
|
805
|
+
* @example
|
|
806
|
+
* Minimum length filtering:
|
|
807
|
+
* ```typescript
|
|
808
|
+
* extractWords('Di rumah ada 3 kucing', { minLength: 3 })
|
|
809
|
+
* // → ['rumah', 'ada', 'kucing']
|
|
810
|
+
* // 'Di' (2 chars) and '3' (1 char) filtered out
|
|
811
|
+
*
|
|
812
|
+
* extractWords('a b cd def ghij', { minLength: 3 })
|
|
813
|
+
* // → ['def', 'ghij']
|
|
814
|
+
* ```
|
|
815
|
+
*
|
|
816
|
+
* @example
|
|
817
|
+
* Lowercase conversion:
|
|
818
|
+
* ```typescript
|
|
819
|
+
* extractWords('Hello WORLD', { lowercase: true })
|
|
820
|
+
* // → ['hello', 'world']
|
|
821
|
+
*
|
|
822
|
+
* extractWords('Hello WORLD', { lowercase: false })
|
|
823
|
+
* // → ['Hello', 'WORLD']
|
|
824
|
+
* ```
|
|
825
|
+
*
|
|
826
|
+
* @example
|
|
827
|
+
* Combined options:
|
|
828
|
+
* ```typescript
|
|
829
|
+
* extractWords('Anak-Anak BERMAIN di Taman', {
|
|
830
|
+
* includeHyphenated: true,
|
|
831
|
+
* minLength: 3,
|
|
832
|
+
* lowercase: true
|
|
833
|
+
* })
|
|
834
|
+
* // → ['anak-anak', 'bermain', 'taman']
|
|
835
|
+
* // 'di' filtered out (< 3 chars)
|
|
836
|
+
* ```
|
|
837
|
+
*
|
|
838
|
+
* @example
|
|
839
|
+
* Edge cases:
|
|
840
|
+
* ```typescript
|
|
841
|
+
* extractWords('')
|
|
842
|
+
* // → []
|
|
843
|
+
*
|
|
844
|
+
* extractWords(' ')
|
|
845
|
+
* // → []
|
|
846
|
+
*
|
|
847
|
+
* extractWords('!!!@@##')
|
|
848
|
+
* // → []
|
|
849
|
+
* ```
|
|
850
|
+
*
|
|
851
|
+
* @public
|
|
852
|
+
*/
|
|
853
|
+
declare function extractWords(text: string, options?: ExtractOptions): string[];
|
|
854
|
+
|
|
855
|
+
/**
|
|
856
|
+
* Compare strings with Indonesian-aware normalization
|
|
857
|
+
*
|
|
858
|
+
* This function allows flexible string comparison with options to ignore
|
|
859
|
+
* case, whitespace, and accents. Useful for search, filtering, and
|
|
860
|
+
* validation.
|
|
861
|
+
*
|
|
862
|
+
* **Features:**
|
|
863
|
+
* - Case-insensitive comparison (default: false)
|
|
864
|
+
* - Whitespace normalization (ignore extra spaces)
|
|
865
|
+
* - Accent removal (café == cafe)
|
|
866
|
+
* - Null-safe (handles empty strings)
|
|
867
|
+
*
|
|
868
|
+
* @param str1 - First string to compare
|
|
869
|
+
* @param str2 - Second string to compare
|
|
870
|
+
* @param options - Comparison options
|
|
871
|
+
* @returns True if strings match according to options
|
|
872
|
+
*
|
|
873
|
+
* @example
|
|
874
|
+
* Basic matching:
|
|
875
|
+
* ```typescript
|
|
876
|
+
* compareStrings('Hello', 'Hello') // → true
|
|
877
|
+
* compareStrings('Hello', 'hello') // → false
|
|
878
|
+
* ```
|
|
879
|
+
*
|
|
880
|
+
* @example
|
|
881
|
+
* Case insensitive:
|
|
882
|
+
* ```typescript
|
|
883
|
+
* compareStrings('Hello', 'hello', { caseSensitive: false }) // → true
|
|
884
|
+
* // Note: default is caseSensitive: false for convenience in many utils,
|
|
885
|
+
* // but strict comparison usually defaults to true.
|
|
886
|
+
* // Let's check the implementation default.
|
|
887
|
+
* ```
|
|
888
|
+
*
|
|
889
|
+
* @example
|
|
890
|
+
* Ignore whitespace:
|
|
891
|
+
* ```typescript
|
|
892
|
+
* compareStrings(' Hello World ', 'Hello World', { ignoreWhitespace: true })
|
|
893
|
+
* // → true
|
|
894
|
+
* ```
|
|
895
|
+
*
|
|
896
|
+
* @example
|
|
897
|
+
* Ignore accents:
|
|
898
|
+
* ```typescript
|
|
899
|
+
* compareStrings('café', 'cafe', { ignoreAccents: true })
|
|
900
|
+
* // → true
|
|
901
|
+
* ```
|
|
902
|
+
*
|
|
903
|
+
* @public
|
|
904
|
+
*/
|
|
905
|
+
declare function compareStrings(str1: string, str2: string, options?: CompareOptions): boolean;
|
|
906
|
+
/**
|
|
907
|
+
* Calculate similarity score between two strings (0-1) using Levenshtein distance
|
|
908
|
+
*
|
|
909
|
+
* This function measures the difference between two strings and returns a score
|
|
910
|
+
* where 1.0 means identical and 0.0 means completely different.
|
|
911
|
+
*
|
|
912
|
+
* **Algorithm:**
|
|
913
|
+
* Uses Levenshtein distance to calculate the minimum number of single-character
|
|
914
|
+
* edits (insertions, deletions, substitutions) required to change one string
|
|
915
|
+
* into the other.
|
|
916
|
+
*
|
|
917
|
+
* @param str1 - First string
|
|
918
|
+
* @param str2 - Second string
|
|
919
|
+
* @returns Similarity score between 0.0 and 1.0
|
|
920
|
+
*
|
|
921
|
+
* @example
|
|
922
|
+
* Basic Usage:
|
|
923
|
+
* ```typescript
|
|
924
|
+
* similarity('hello', 'hello') // → 1.0 (identical)
|
|
925
|
+
* similarity('hello', 'hallo') // → 0.8 (1 edit / 5 length)
|
|
926
|
+
* similarity('hello', 'world') // → 0.2 (4 edits / 5 length)
|
|
927
|
+
* ```
|
|
928
|
+
*
|
|
929
|
+
* @example
|
|
930
|
+
* Case sensitivity:
|
|
931
|
+
* Note: This function is case-sensitive. Use compareStrings options or
|
|
932
|
+
* manual lowercasing if you need case-insensitive similarity.
|
|
933
|
+
*
|
|
934
|
+
* @public
|
|
935
|
+
*/
|
|
936
|
+
declare function similarity(str1: string, str2: string): number;
|
|
937
|
+
|
|
938
|
+
export { type CompareOptions as C, type ExtractOptions as E, type SlugifyOptions as S, type TitleCaseOptions as T, toSentenceCase as a, sanitize as b, capitalize as c, contractAbbreviation as d, expandAbbreviation as e, truncate as f, extractWords as g, compareStrings as h, similarity as i, type SanitizeOptions as j, type TruncateOptions as k, normalizeWhitespace as n, removeAccents as r, slugify as s, toTitleCase as t };
|