@indodev/toolkit 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,915 @@
1
+ 'use strict';
2
+
3
+ // src/text/constants.ts
4
+ var LOWERCASE_WORDS = [
5
+ // Indonesian prepositions (kata depan)
6
+ "di",
7
+ "ke",
8
+ "dari",
9
+ "pada",
10
+ "dalam",
11
+ "untuk",
12
+ "dengan",
13
+ "oleh",
14
+ "kepada",
15
+ "terhadap",
16
+ "tentang",
17
+ "tanpa",
18
+ "hingga",
19
+ "sampai",
20
+ "sejak",
21
+ "menuju",
22
+ "melalui",
23
+ // Indonesian conjunctions (kata hubung)
24
+ "dan",
25
+ "atau",
26
+ "tetapi",
27
+ "namun",
28
+ "serta",
29
+ "maupun",
30
+ "melainkan",
31
+ "sedangkan",
32
+ // Indonesian articles/particles
33
+ "yang",
34
+ "sebagai",
35
+ "adalah",
36
+ "ialah",
37
+ "yaitu",
38
+ "bahwa",
39
+ "akan",
40
+ "telah",
41
+ "sudah",
42
+ "belum",
43
+ // English articles
44
+ "a",
45
+ "an",
46
+ "the",
47
+ // English conjunctions
48
+ "and",
49
+ "or",
50
+ "but",
51
+ "nor",
52
+ "for",
53
+ "yet",
54
+ "so",
55
+ "as",
56
+ // English prepositions (short ones, < 5 letters)
57
+ "at",
58
+ "by",
59
+ "in",
60
+ "of",
61
+ "on",
62
+ "to",
63
+ "up",
64
+ "via",
65
+ "per",
66
+ "off",
67
+ "out"
68
+ // English prepositions (5+ letters - optional, some style guides capitalize these)
69
+ // 'about',
70
+ // 'above',
71
+ // 'across',
72
+ // 'after',
73
+ // 'among',
74
+ // 'below',
75
+ // 'under',
76
+ // 'until',
77
+ // 'with',
78
+ ];
79
+ var ACRONYMS = [
80
+ // Indonesian government & military
81
+ "DKI",
82
+ // Daerah Khusus Ibukota
83
+ "DIY",
84
+ // Daerah Istimewa Yogyakarta
85
+ "TNI",
86
+ // Tentara Nasional Indonesia
87
+ "POLRI",
88
+ // Kepolisian Republik Indonesia
89
+ "ABRI",
90
+ // Angkatan Bersenjata Republik Indonesia
91
+ "MPR",
92
+ // Majelis Permusyawaratan Rakyat
93
+ "DPR",
94
+ // Dewan Perwakilan Rakyat
95
+ "KPK",
96
+ // Komisi Pemberantasan Korupsi
97
+ "BIN",
98
+ // Badan Intelijen Negara
99
+ // Indonesian business entities
100
+ "PT",
101
+ // Perseroan Terbatas
102
+ "CV",
103
+ // Commanditaire Vennootschap
104
+ "UD",
105
+ // Usaha Dagang
106
+ "PD",
107
+ // Perusahaan Daerah
108
+ "Tbk",
109
+ // Terbuka (publicly traded)
110
+ "BUMN",
111
+ // Badan Usaha Milik Negara
112
+ "BUMD",
113
+ // Badan Usaha Milik Daerah
114
+ // Indonesian banks
115
+ "BCA",
116
+ // Bank Central Asia
117
+ "BRI",
118
+ // Bank Rakyat Indonesia
119
+ "BNI",
120
+ // Bank Negara Indonesia
121
+ "BTN",
122
+ // Bank Tabungan Negara
123
+ "BSI",
124
+ // Bank Syariah Indonesia
125
+ "BPD",
126
+ // Bank Pembangunan Daerah
127
+ // Indonesian government services
128
+ "KTP",
129
+ // Kartu Tanda Penduduk
130
+ "NIK",
131
+ // Nomor Induk Kependudukan
132
+ "NPWP",
133
+ // Nomor Pokok Wajib Pajak
134
+ "SIM",
135
+ // Surat Izin Mengemudi
136
+ "STNK",
137
+ // Surat Tanda Nomor Kendaraan
138
+ "BPJS",
139
+ // Badan Penyelenggara Jaminan Sosial
140
+ "KIS",
141
+ // Kartu Indonesia Sehat
142
+ "KIP",
143
+ // Kartu Indonesia Pintar
144
+ "PKH",
145
+ // Program Keluarga Harapan
146
+ // Indonesian utilities & infrastructure
147
+ "PLN",
148
+ // Perusahaan Listrik Negara
149
+ "PDAM",
150
+ // Perusahaan Daerah Air Minum
151
+ "PGN",
152
+ // Perusahaan Gas Negara
153
+ "KAI",
154
+ // Kereta Api Indonesia
155
+ "MRT",
156
+ // Mass Rapid Transit
157
+ "LRT",
158
+ // Light Rail Transit
159
+ // Indonesian taxes & fees
160
+ "PBB",
161
+ // Pajak Bumi dan Bangunan
162
+ "PPh",
163
+ // Pajak Penghasilan
164
+ "PPN",
165
+ // Pajak Pertambahan Nilai
166
+ "BPHTB",
167
+ // Bea Perolehan Hak atas Tanah dan Bangunan
168
+ // Indonesian education
169
+ "UI",
170
+ // Universitas Indonesia
171
+ "ITB",
172
+ // Institut Teknologi Bandung
173
+ "UGM",
174
+ // Universitas Gadjah Mada
175
+ "IPB",
176
+ // Institut Pertanian Bogor
177
+ "ITS",
178
+ // Institut Teknologi Sepuluh Nopember
179
+ "UNPAD",
180
+ // Universitas Padjadjaran
181
+ "UNDIP",
182
+ // Universitas Diponegoro
183
+ "UNAIR",
184
+ // Universitas Airlangga
185
+ "UNS",
186
+ // Universitas Sebelas Maret
187
+ // Indonesian degrees (gelar)
188
+ "S.Pd",
189
+ // Sarjana Pendidikan
190
+ "S.H",
191
+ // Sarjana Hukum
192
+ "S.E",
193
+ // Sarjana Ekonomi
194
+ "S.T",
195
+ // Sarjana Teknik
196
+ "S.Kom",
197
+ // Sarjana Komputer
198
+ "S.Si",
199
+ // Sarjana Sains
200
+ "S.Sos",
201
+ // Sarjana Sosial
202
+ "M.Pd",
203
+ // Magister Pendidikan
204
+ "M.M",
205
+ // Magister Manajemen
206
+ "M.T",
207
+ // Magister Teknik
208
+ "M.Kom",
209
+ // Magister Komputer
210
+ // Common services
211
+ "ATM",
212
+ // Automated Teller Machine
213
+ "POS",
214
+ // Point of Sale
215
+ "SMS",
216
+ // Short Message Service
217
+ "GPS",
218
+ // Global Positioning System
219
+ "WiFi",
220
+ // Wireless Fidelity (technically Wi-Fi)
221
+ "USB",
222
+ // Universal Serial Bus
223
+ "PIN",
224
+ // Personal Identification Number
225
+ "OTP",
226
+ // One Time Password
227
+ "QR",
228
+ // Quick Response
229
+ // Technology & IT
230
+ "IT",
231
+ // Information Technology
232
+ "AI",
233
+ // Artificial Intelligence
234
+ "ML",
235
+ // Machine Learning
236
+ "API",
237
+ // Application Programming Interface
238
+ "UI",
239
+ // User Interface (duplicate with Universitas Indonesia, context matters)
240
+ "UX",
241
+ // User Experience
242
+ "SEO",
243
+ // Search Engine Optimization
244
+ "SaaS",
245
+ // Software as a Service
246
+ "CRM",
247
+ // Customer Relationship Management
248
+ "ERP",
249
+ // Enterprise Resource Planning
250
+ // Business titles
251
+ "CEO",
252
+ // Chief Executive Officer
253
+ "CFO",
254
+ // Chief Financial Officer
255
+ "CTO",
256
+ // Chief Technology Officer
257
+ "COO",
258
+ // Chief Operating Officer
259
+ "CMO",
260
+ // Chief Marketing Officer
261
+ "HR",
262
+ // Human Resources
263
+ "PR",
264
+ // Public Relations
265
+ "VP",
266
+ // Vice President
267
+ "GM",
268
+ // General Manager
269
+ // International organizations
270
+ "UN",
271
+ // United Nations
272
+ "WHO",
273
+ // World Health Organization
274
+ "UNESCO",
275
+ // United Nations Educational, Scientific and Cultural Organization
276
+ "NATO",
277
+ // North Atlantic Treaty Organization
278
+ "ASEAN",
279
+ // Association of Southeast Asian Nations
280
+ "APEC",
281
+ // Asia-Pacific Economic Cooperation
282
+ "WTO",
283
+ // World Trade Organization
284
+ "IMF",
285
+ // International Monetary Fund
286
+ // Medical
287
+ "ICU",
288
+ // Intensive Care Unit
289
+ "ER",
290
+ // Emergency Room
291
+ "MRI",
292
+ // Magnetic Resonance Imaging
293
+ "CT",
294
+ // Computed Tomography
295
+ "DNA",
296
+ // Deoxyribonucleic Acid
297
+ "RNA",
298
+ // Ribonucleic Acid
299
+ "HIV",
300
+ // Human Immunodeficiency Virus
301
+ "AIDS",
302
+ // Acquired Immunodeficiency Syndrome
303
+ "COVID",
304
+ // Coronavirus Disease
305
+ // Measurements & units
306
+ "KM",
307
+ // Kilometer
308
+ "CM",
309
+ // Centimeter
310
+ "MM",
311
+ // Millimeter
312
+ "KG",
313
+ // Kilogram
314
+ "RPM",
315
+ // Revolutions Per Minute
316
+ "MPH",
317
+ // Miles Per Hour
318
+ "KPH",
319
+ // Kilometers Per Hour
320
+ // Finance
321
+ "IPO",
322
+ // Initial Public Offering
323
+ "ATM",
324
+ // Automated Teller Machine (duplicate)
325
+ "ROI",
326
+ // Return on Investment
327
+ "GDP",
328
+ // Gross Domestic Product
329
+ "VAT"
330
+ // Value Added Tax
331
+ ];
332
+ var ABBREVIATIONS = {
333
+ // ========== Address Abbreviations ==========
334
+ "Jl.": "Jalan",
335
+ "Gg.": "Gang",
336
+ "No.": "Nomor",
337
+ "Kp.": "Kampung",
338
+ "Ds.": "Desa",
339
+ "Kel.": "Kelurahan",
340
+ "Kec.": "Kecamatan",
341
+ "Kab.": "Kabupaten",
342
+ Kota: "Kota",
343
+ "Prov.": "Provinsi",
344
+ "Prop.": "Provinsi",
345
+ "Rt.": "Rukun Tetangga",
346
+ "Rw.": "Rukun Warga",
347
+ Blok: "Blok",
348
+ "Komp.": "Kompleks",
349
+ Perumahan: "Perumahan",
350
+ "Perum.": "Perumahan",
351
+ // ========== Academic Titles ==========
352
+ "Dr.": "Doktor",
353
+ "Ir.": "Insinyur",
354
+ "Prof.": "Profesor",
355
+ "Drs.": "Doktorandus",
356
+ "Dra.": "Doktoranda",
357
+ // Bachelor degrees
358
+ "S.Pd.": "Sarjana Pendidikan",
359
+ "S.H.": "Sarjana Hukum",
360
+ "S.E.": "Sarjana Ekonomi",
361
+ "S.T.": "Sarjana Teknik",
362
+ "S.Kom.": "Sarjana Komputer",
363
+ "S.Si.": "Sarjana Sains",
364
+ "S.Sos.": "Sarjana Sosial",
365
+ "S.I.Kom.": "Sarjana Ilmu Komunikasi",
366
+ "S.S.": "Sarjana Sastra",
367
+ "S.Psi.": "Sarjana Psikologi",
368
+ "S.Farm.": "Sarjana Farmasi",
369
+ "S.Ked.": "Sarjana Kedokteran",
370
+ // Master degrees
371
+ "M.Sc.": "Master of Science",
372
+ "M.M.": "Magister Manajemen",
373
+ "M.Pd.": "Magister Pendidikan",
374
+ "M.T.": "Magister Teknik",
375
+ "M.Kom.": "Magister Komputer",
376
+ "M.Si.": "Magister Sains",
377
+ "M.H.": "Magister Hukum",
378
+ "M.A.": "Master of Arts",
379
+ MBA: "Master of Business Administration",
380
+ // ========== Honorifics ==========
381
+ "Bpk.": "Bapak",
382
+ Ibu: "Ibu",
383
+ "Sdr.": "Saudara",
384
+ "Sdri.": "Saudari",
385
+ "Yth.": "Yang Terhormat",
386
+ "H.": "Haji",
387
+ "Hj.": "Hajjah",
388
+ "Tn.": "Tuan",
389
+ "Ny.": "Nyonya",
390
+ "Nn.": "Nona",
391
+ // ========== Organizations ==========
392
+ "PT.": "Perseroan Terbatas",
393
+ "CV.": "Commanditaire Vennootschap",
394
+ "UD.": "Usaha Dagang",
395
+ "PD.": "Perusahaan Daerah",
396
+ "Tbk.": "Terbuka",
397
+ Koperasi: "Koperasi",
398
+ Yayasan: "Yayasan",
399
+ // ========== Common Abbreviations ==========
400
+ "dst.": "dan seterusnya",
401
+ "dsb.": "dan sebagainya",
402
+ "dll.": "dan lain-lain",
403
+ "dkk.": "dan kawan-kawan",
404
+ "a.n.": "atas nama",
405
+ "u.p.": "untuk perhatian",
406
+ "u.b.": "untuk beliau",
407
+ "c.q.": "casu quo",
408
+ "hlm.": "halaman",
409
+ "tgl.": "tanggal",
410
+ "bln.": "bulan",
411
+ "thn.": "tahun",
412
+ "ttd.": "tertanda",
413
+ // ========== Contact Information ==========
414
+ "Tlp.": "Telepon",
415
+ "Telp.": "Telepon",
416
+ "HP.": "Handphone",
417
+ Fax: "Faksimile",
418
+ Email: "Email",
419
+ Website: "Website",
420
+ // ========== Days (Indonesian) ==========
421
+ "Sen.": "Senin",
422
+ "Sel.": "Selasa",
423
+ "Rab.": "Rabu",
424
+ "Kam.": "Kamis",
425
+ "Jum.": "Jumat",
426
+ "Sab.": "Sabtu",
427
+ "Min.": "Minggu",
428
+ // ========== Months (Indonesian) ==========
429
+ "Jan.": "Januari",
430
+ "Feb.": "Februari",
431
+ "Mar.": "Maret",
432
+ "Apr.": "April",
433
+ Mei: "Mei",
434
+ "Jun.": "Juni",
435
+ "Jul.": "Juli",
436
+ "Agt.": "Agustus",
437
+ "Sep.": "September",
438
+ "Okt.": "Oktober",
439
+ "Nov.": "November",
440
+ "Des.": "Desember",
441
+ // ========== Units & Measurements ==========
442
+ "kg.": "kilogram",
443
+ "gr.": "gram",
444
+ "lt.": "liter",
445
+ "ml.": "mililiter",
446
+ "km.": "kilometer",
447
+ "cm.": "sentimeter",
448
+ "mm.": "milimeter",
449
+ "m2.": "meter persegi",
450
+ "m3.": "meter kubik",
451
+ "ha.": "hektar"
452
+ };
453
+
454
+ // src/text/capitalization.ts
455
+ function capitalize(text) {
456
+ if (!text) return text;
457
+ return text.charAt(0).toUpperCase() + text.slice(1).toLowerCase();
458
+ }
459
+ function toTitleCase(text, options) {
460
+ if (!text) return text;
461
+ const {
462
+ preserveAcronyms = true,
463
+ strict = false,
464
+ exceptions = []
465
+ } = options || {};
466
+ const lowercaseSet = /* @__PURE__ */ new Set([...LOWERCASE_WORDS, ...exceptions]);
467
+ const acronymSet = new Set(ACRONYMS);
468
+ const normalized = normalizeSpaces(text);
469
+ const words = normalized.split(" ");
470
+ return words.map((word, index) => {
471
+ if (!word) return word;
472
+ if (word.includes("-")) {
473
+ return processHyphenatedWord(word, index === 0, {
474
+ lowercaseSet,
475
+ acronymSet,
476
+ preserveAcronyms,
477
+ strict
478
+ });
479
+ }
480
+ return processWord(word, index === 0, {
481
+ lowercaseSet,
482
+ acronymSet,
483
+ preserveAcronyms,
484
+ strict
485
+ });
486
+ }).join(" ");
487
+ }
488
+ function normalizeSpaces(text) {
489
+ return text.trim().replace(/\s+/g, " ");
490
+ }
491
+ function processWord(word, isFirstWord, context) {
492
+ const { lowercaseSet, acronymSet, preserveAcronyms, strict } = context;
493
+ const lowerWord = word.toLowerCase();
494
+ const upperWord = word.toUpperCase();
495
+ if (preserveAcronyms && acronymSet.has(upperWord)) {
496
+ return upperWord;
497
+ }
498
+ if (!isFirstWord && lowercaseSet.has(lowerWord)) {
499
+ return lowerWord;
500
+ }
501
+ if (strict) {
502
+ return capitalizeFirstLetter(lowerWord);
503
+ }
504
+ return capitalizeFirstLetter(word.toLowerCase());
505
+ }
506
+ function processHyphenatedWord(word, isFirstWord, context) {
507
+ return word.split("-").map(
508
+ (part, index) => processWord(part, isFirstWord && index === 0, context)
509
+ ).join("-");
510
+ }
511
+ function capitalizeFirstLetter(word) {
512
+ if (!word) return word;
513
+ return word.charAt(0).toUpperCase() + word.slice(1);
514
+ }
515
+ function toSentenceCase(text) {
516
+ if (!text) return text;
517
+ const normalized = text.trim().replace(/\s+/g, " ");
518
+ let result = "";
519
+ let shouldCapitalize = true;
520
+ for (let i = 0; i < normalized.length; i++) {
521
+ const char = normalized[i];
522
+ if (shouldCapitalize && /[a-zA-ZÀ-ÿ]/.test(char)) {
523
+ result += char.toUpperCase();
524
+ shouldCapitalize = false;
525
+ } else {
526
+ result += char.toLowerCase();
527
+ }
528
+ if (isSentenceEnd(char)) {
529
+ shouldCapitalize = true;
530
+ }
531
+ if (char === "." && i + 1 < normalized.length) {
532
+ const nextChar = normalized[i + 1];
533
+ if (nextChar !== " " && !/[.!?]/.test(nextChar)) {
534
+ shouldCapitalize = false;
535
+ }
536
+ }
537
+ }
538
+ return result;
539
+ }
540
+ function isSentenceEnd(char) {
541
+ return char === "." || char === "!" || char === "?";
542
+ }
543
+
544
+ // src/text/slug.ts
545
+ function slugify(text, options) {
546
+ if (!text) return "";
547
+ const {
548
+ separator = "-",
549
+ lowercase = true,
550
+ replacements = {},
551
+ trim = true
552
+ } = options || {};
553
+ let result = text;
554
+ for (const [search, replace] of Object.entries(replacements)) {
555
+ result = result.replace(new RegExp(escapeRegex(search), "g"), replace);
556
+ }
557
+ result = result.replace(/&/g, " dan ");
558
+ result = result.replace(/\//g, " atau ");
559
+ if (lowercase) {
560
+ result = result.toLowerCase();
561
+ }
562
+ result = result.replace(/[.'@éèêëàâäôöûüùïîçñ™®©]/g, "");
563
+ result = result.replace(/[^\w\s-]+/g, separator);
564
+ result = result.replace(/\s+/g, separator);
565
+ if (separator !== "-") {
566
+ result = result.replace(/-/g, separator);
567
+ }
568
+ if (trim) {
569
+ const separatorRegex = new RegExp(`\\${separator}+`, "g");
570
+ result = result.replace(separatorRegex, separator);
571
+ const trimRegex = new RegExp(`^\\${separator}+|\\${separator}+$`, "g");
572
+ result = result.replace(trimRegex, "");
573
+ }
574
+ return result;
575
+ }
576
+ function escapeRegex(str) {
577
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
578
+ }
579
+
580
+ // src/text/sanitize.ts
581
+ function normalizeWhitespace(text) {
582
+ if (!text) return text;
583
+ return text.trim().replace(/\s+/g, " ");
584
+ }
585
+ function sanitize(text, options) {
586
+ if (!text) return text;
587
+ const {
588
+ removeNewlines = false,
589
+ removeExtraSpaces = true,
590
+ removePunctuation = false,
591
+ allowedChars,
592
+ trim = true
593
+ } = options || {};
594
+ let result = text;
595
+ if (removeNewlines) {
596
+ result = result.replace(/[\n\r]/g, " ");
597
+ }
598
+ if (removePunctuation) {
599
+ result = result.replace(/[!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]/g, "");
600
+ }
601
+ if (allowedChars) {
602
+ const allowedRegex = new RegExp(`[^${allowedChars}]`, "g");
603
+ result = result.replace(allowedRegex, "");
604
+ }
605
+ if (removeExtraSpaces) {
606
+ if (trim) {
607
+ if (removeNewlines) {
608
+ result = result.replace(/\s+/g, " ");
609
+ } else {
610
+ result = result.replace(/[ \t]+/g, " ");
611
+ }
612
+ } else {
613
+ const leadingMatch = result.match(/^([ \t]*)/);
614
+ const trailingMatch = result.match(/([ \t]*)$/);
615
+ const leading = leadingMatch ? leadingMatch[1] : "";
616
+ const trailing = trailingMatch ? trailingMatch[1] : "";
617
+ const middle = result.slice(
618
+ leading.length,
619
+ result.length - trailing.length
620
+ );
621
+ const normalizedMiddle = removeNewlines ? middle.replace(/\s+/g, " ") : middle.replace(/[ \t]+/g, " ");
622
+ result = leading + normalizedMiddle + trailing;
623
+ }
624
+ }
625
+ if (trim) {
626
+ result = result.trim();
627
+ }
628
+ return result;
629
+ }
630
+ function removeAccents(text) {
631
+ if (!text) return text;
632
+ const specialChars = {
633
+ \u00D8: "O",
634
+ \u00F8: "o",
635
+ \u00C6: "AE",
636
+ \u00E6: "ae",
637
+ \u00C5: "A",
638
+ \u00E5: "a",
639
+ \u0110: "D",
640
+ \u0111: "d",
641
+ \u0141: "L",
642
+ \u0142: "l",
643
+ \u00DE: "TH",
644
+ \u00FE: "th",
645
+ \u00DF: "ss"
646
+ };
647
+ let result = text;
648
+ for (const [accented, plain] of Object.entries(specialChars)) {
649
+ result = result.replace(new RegExp(accented, "g"), plain);
650
+ }
651
+ return result.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
652
+ }
653
+
654
+ // src/text/abbreviation.ts
655
+ function expandAbbreviation(text, options) {
656
+ if (!text) return text;
657
+ const { mode = "all", customMap = {}, preserveCase = false } = options || {};
658
+ const abbreviationsMap = {
659
+ ...getAbbreviationsByMode(mode),
660
+ ...customMap
661
+ };
662
+ let result = text;
663
+ const sortedAbbrevs = Object.keys(abbreviationsMap).sort(
664
+ (a, b) => b.length - a.length
665
+ );
666
+ for (const abbrev of sortedAbbrevs) {
667
+ const expansion = abbreviationsMap[abbrev];
668
+ const startBoundary = /^\w/.test(abbrev) ? "\\b" : "";
669
+ const endBoundary = /\w$/.test(abbrev) ? "\\b" : "";
670
+ const regex = new RegExp(
671
+ `${startBoundary}${escapeRegex2(abbrev)}${endBoundary}`,
672
+ "gi"
673
+ );
674
+ result = result.replace(regex, (match) => {
675
+ if (!preserveCase) {
676
+ return expansion;
677
+ }
678
+ return matchCase(match, expansion);
679
+ });
680
+ }
681
+ return result;
682
+ }
683
+ function getAbbreviationsByMode(mode) {
684
+ if (mode === "all") {
685
+ return ABBREVIATIONS;
686
+ }
687
+ const filtered = {};
688
+ const addressAbbrevs = [
689
+ "Jl.",
690
+ "Gg.",
691
+ "No.",
692
+ "Kp.",
693
+ "Ds.",
694
+ "Kel.",
695
+ "Kec.",
696
+ "Kab.",
697
+ "Kota",
698
+ "Prov.",
699
+ "Prop.",
700
+ "Rt.",
701
+ "Rw.",
702
+ "Blok",
703
+ "Komp.",
704
+ "Perumahan",
705
+ "Perum."
706
+ ];
707
+ const titleAbbrevs = [
708
+ "Dr.",
709
+ "Ir.",
710
+ "Prof.",
711
+ "Drs.",
712
+ "Dra.",
713
+ "S.Pd.",
714
+ "S.H.",
715
+ "S.E.",
716
+ "S.T.",
717
+ "S.Kom.",
718
+ "S.Si.",
719
+ "S.Sos.",
720
+ "S.I.Kom.",
721
+ "S.S.",
722
+ "S.Psi.",
723
+ "S.Farm.",
724
+ "S.Ked.",
725
+ "M.Sc.",
726
+ "M.M.",
727
+ "M.Pd.",
728
+ "M.T.",
729
+ "M.Kom.",
730
+ "M.Si.",
731
+ "M.H.",
732
+ "M.A.",
733
+ "MBA"
734
+ ];
735
+ const orgAbbrevs = [
736
+ "PT.",
737
+ "CV.",
738
+ "UD.",
739
+ "PD.",
740
+ "Tbk.",
741
+ "Koperasi",
742
+ "Yayasan"
743
+ ];
744
+ for (const [abbrev, expansion] of Object.entries(ABBREVIATIONS)) {
745
+ if (mode === "address" && addressAbbrevs.includes(abbrev)) {
746
+ filtered[abbrev] = expansion;
747
+ } else if (mode === "title" && titleAbbrevs.includes(abbrev)) {
748
+ filtered[abbrev] = expansion;
749
+ } else if (mode === "org" && orgAbbrevs.includes(abbrev)) {
750
+ filtered[abbrev] = expansion;
751
+ }
752
+ }
753
+ return filtered;
754
+ }
755
+ function escapeRegex2(str) {
756
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
757
+ }
758
+ function matchCase(original, replacement) {
759
+ if (original === original.toUpperCase()) {
760
+ return replacement.toUpperCase();
761
+ }
762
+ if (original === original.toLowerCase()) {
763
+ return replacement.toLowerCase();
764
+ }
765
+ if (original.charAt(0) === original.charAt(0).toUpperCase()) {
766
+ return replacement.charAt(0).toUpperCase() + replacement.slice(1).toLowerCase();
767
+ }
768
+ return replacement;
769
+ }
770
+ function contractAbbreviation(text, options) {
771
+ if (!text) return text;
772
+ const { mode = "all" } = options || {};
773
+ const abbreviationsMap = getAbbreviationsByMode(mode);
774
+ const reverseMap = {};
775
+ for (const [abbrev, expansion] of Object.entries(abbreviationsMap)) {
776
+ reverseMap[expansion] = abbrev;
777
+ }
778
+ let result = text;
779
+ const sortedExpansions = Object.keys(reverseMap).sort(
780
+ (a, b) => b.length - a.length
781
+ );
782
+ for (const expansion of sortedExpansions) {
783
+ const abbrev = reverseMap[expansion];
784
+ const regex = new RegExp(`\\b${escapeRegex2(expansion)}\\b`, "gi");
785
+ result = result.replace(regex, abbrev);
786
+ }
787
+ return result;
788
+ }
789
+
790
+ // src/text/extract.ts
791
+ function truncate(text, maxLength, options) {
792
+ if (!text || maxLength <= 0) {
793
+ return "";
794
+ }
795
+ const { ellipsis = "...", wordBoundary = true } = options || {};
796
+ if (text.length <= maxLength) {
797
+ return text;
798
+ }
799
+ const availableLength = maxLength - ellipsis.length;
800
+ if (availableLength <= 0) {
801
+ return ellipsis.slice(0, maxLength);
802
+ }
803
+ let truncated = text.slice(0, availableLength);
804
+ if (wordBoundary) {
805
+ const lastSpaceIndex = truncated.lastIndexOf(" ");
806
+ if (lastSpaceIndex > 0) {
807
+ truncated = truncated.slice(0, lastSpaceIndex);
808
+ }
809
+ }
810
+ truncated = truncated.trimEnd();
811
+ return truncated + ellipsis;
812
+ }
813
+ function extractWords(text, options) {
814
+ if (!text || !text.trim()) {
815
+ return [];
816
+ }
817
+ const {
818
+ minLength = 0,
819
+ includeHyphenated = true,
820
+ lowercase = false
821
+ } = options || {};
822
+ let cleaned = text;
823
+ if (includeHyphenated) {
824
+ cleaned = text.replace(/[^\w\s-]/g, " ");
825
+ } else {
826
+ cleaned = text.replace(/[^\w\s]/g, " ");
827
+ }
828
+ const words = cleaned.split(/\s+/).map((word) => word.trim()).filter((word) => word.length > 0).filter((word) => !/^-+$/.test(word));
829
+ let result = words;
830
+ if (minLength > 0) {
831
+ result = result.filter((word) => word.length >= minLength);
832
+ }
833
+ if (lowercase) {
834
+ result = result.map((word) => word.toLowerCase());
835
+ }
836
+ return result;
837
+ }
838
+
839
+ // src/text/compare.ts
840
+ function compareStrings(str1, str2, options) {
841
+ if (str1 === str2) {
842
+ return true;
843
+ }
844
+ const s1 = str1 || "";
845
+ const s2 = str2 || "";
846
+ const {
847
+ caseSensitive = false,
848
+ ignoreWhitespace = false,
849
+ ignoreAccents = false
850
+ } = options || {};
851
+ let normalized1 = s1;
852
+ let normalized2 = s2;
853
+ if (ignoreWhitespace) {
854
+ normalized1 = normalizeWhitespace(normalized1);
855
+ normalized2 = normalizeWhitespace(normalized2);
856
+ }
857
+ if (ignoreAccents) {
858
+ normalized1 = removeAccents(normalized1);
859
+ normalized2 = removeAccents(normalized2);
860
+ }
861
+ if (!caseSensitive) {
862
+ normalized1 = normalized1.toLowerCase();
863
+ normalized2 = normalized2.toLowerCase();
864
+ }
865
+ return normalized1 === normalized2;
866
+ }
867
+ function similarity(str1, str2) {
868
+ if (str1 === str2) return 1;
869
+ if (str1.length === 0) return str2.length === 0 ? 1 : 0;
870
+ if (str2.length === 0) return 0;
871
+ const len1 = str1.length;
872
+ const len2 = str2.length;
873
+ let prevRow = Array(len2 + 1).fill(0);
874
+ let currentRow = Array(len2 + 1).fill(0);
875
+ for (let j = 0; j <= len2; j++) {
876
+ prevRow[j] = j;
877
+ }
878
+ for (let i = 1; i <= len1; i++) {
879
+ currentRow[0] = i;
880
+ for (let j = 1; j <= len2; j++) {
881
+ const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
882
+ currentRow[j] = Math.min(
883
+ currentRow[j - 1] + 1,
884
+ // Insertion
885
+ prevRow[j] + 1,
886
+ // Deletion
887
+ prevRow[j - 1] + cost
888
+ // Substitution
889
+ );
890
+ }
891
+ [prevRow, currentRow] = [currentRow, prevRow];
892
+ }
893
+ const distance = prevRow[len2];
894
+ const maxLength = Math.max(len1, len2);
895
+ return 1 - distance / maxLength;
896
+ }
897
+
898
+ exports.ABBREVIATIONS = ABBREVIATIONS;
899
+ exports.ACRONYMS = ACRONYMS;
900
+ exports.LOWERCASE_WORDS = LOWERCASE_WORDS;
901
+ exports.capitalize = capitalize;
902
+ exports.compareStrings = compareStrings;
903
+ exports.contractAbbreviation = contractAbbreviation;
904
+ exports.expandAbbreviation = expandAbbreviation;
905
+ exports.extractWords = extractWords;
906
+ exports.normalizeWhitespace = normalizeWhitespace;
907
+ exports.removeAccents = removeAccents;
908
+ exports.sanitize = sanitize;
909
+ exports.similarity = similarity;
910
+ exports.slugify = slugify;
911
+ exports.toSentenceCase = toSentenceCase;
912
+ exports.toTitleCase = toTitleCase;
913
+ exports.truncate = truncate;
914
+ //# sourceMappingURL=index.cjs.map
915
+ //# sourceMappingURL=index.cjs.map