@indodev/toolkit 0.1.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1705 @@
1
+ // src/text/constants.ts
2
+ var LOWERCASE_WORDS = [
3
+ // Indonesian prepositions (kata depan)
4
+ "di",
5
+ "ke",
6
+ "dari",
7
+ "pada",
8
+ "dalam",
9
+ "untuk",
10
+ "dengan",
11
+ "oleh",
12
+ "kepada",
13
+ "terhadap",
14
+ "tentang",
15
+ "tanpa",
16
+ "hingga",
17
+ "sampai",
18
+ "sejak",
19
+ "menuju",
20
+ "melalui",
21
+ // Indonesian conjunctions (kata hubung)
22
+ "dan",
23
+ "atau",
24
+ "tetapi",
25
+ "namun",
26
+ "serta",
27
+ "maupun",
28
+ "melainkan",
29
+ "sedangkan",
30
+ // Indonesian articles/particles
31
+ "yang",
32
+ "sebagai",
33
+ "adalah",
34
+ "ialah",
35
+ "yaitu",
36
+ "bahwa",
37
+ "akan",
38
+ "telah",
39
+ "sudah",
40
+ "belum",
41
+ // English articles
42
+ "a",
43
+ "an",
44
+ "the",
45
+ // English conjunctions
46
+ "and",
47
+ "or",
48
+ "but",
49
+ "nor",
50
+ "for",
51
+ "yet",
52
+ "so",
53
+ "as",
54
+ // English prepositions (short ones, < 5 letters)
55
+ "at",
56
+ "by",
57
+ "in",
58
+ "of",
59
+ "on",
60
+ "to",
61
+ "up",
62
+ "via",
63
+ "per",
64
+ "off",
65
+ "out"
66
+ // English prepositions (5+ letters - optional, some style guides capitalize these)
67
+ // 'about',
68
+ // 'above',
69
+ // 'across',
70
+ // 'after',
71
+ // 'among',
72
+ // 'below',
73
+ // 'under',
74
+ // 'until',
75
+ // 'with',
76
+ ];
77
+ var ACRONYMS = [
78
+ // Indonesian government & military
79
+ "DKI",
80
+ // Daerah Khusus Ibukota
81
+ "DIY",
82
+ // Daerah Istimewa Yogyakarta
83
+ "TNI",
84
+ // Tentara Nasional Indonesia
85
+ "POLRI",
86
+ // Kepolisian Republik Indonesia
87
+ "ABRI",
88
+ // Angkatan Bersenjata Republik Indonesia
89
+ "MPR",
90
+ // Majelis Permusyawaratan Rakyat
91
+ "DPR",
92
+ // Dewan Perwakilan Rakyat
93
+ "KPK",
94
+ // Komisi Pemberantasan Korupsi
95
+ "BIN",
96
+ // Badan Intelijen Negara
97
+ // Indonesian business entities
98
+ "PT",
99
+ // Perseroan Terbatas
100
+ "CV",
101
+ // Commanditaire Vennootschap
102
+ "UD",
103
+ // Usaha Dagang
104
+ "PD",
105
+ // Perusahaan Daerah
106
+ "Tbk",
107
+ // Terbuka (publicly traded)
108
+ "BUMN",
109
+ // Badan Usaha Milik Negara
110
+ "BUMD",
111
+ // Badan Usaha Milik Daerah
112
+ // Indonesian banks
113
+ "BCA",
114
+ // Bank Central Asia
115
+ "BRI",
116
+ // Bank Rakyat Indonesia
117
+ "BNI",
118
+ // Bank Negara Indonesia
119
+ "BTN",
120
+ // Bank Tabungan Negara
121
+ "BSI",
122
+ // Bank Syariah Indonesia
123
+ "BPD",
124
+ // Bank Pembangunan Daerah
125
+ // Indonesian government services
126
+ "KTP",
127
+ // Kartu Tanda Penduduk
128
+ "NIK",
129
+ // Nomor Induk Kependudukan
130
+ "NPWP",
131
+ // Nomor Pokok Wajib Pajak
132
+ "SIM",
133
+ // Surat Izin Mengemudi
134
+ "STNK",
135
+ // Surat Tanda Nomor Kendaraan
136
+ "BPJS",
137
+ // Badan Penyelenggara Jaminan Sosial
138
+ "KIS",
139
+ // Kartu Indonesia Sehat
140
+ "KIP",
141
+ // Kartu Indonesia Pintar
142
+ "PKH",
143
+ // Program Keluarga Harapan
144
+ // Indonesian utilities & infrastructure
145
+ "PLN",
146
+ // Perusahaan Listrik Negara
147
+ "PDAM",
148
+ // Perusahaan Daerah Air Minum
149
+ "PGN",
150
+ // Perusahaan Gas Negara
151
+ "KAI",
152
+ // Kereta Api Indonesia
153
+ "MRT",
154
+ // Mass Rapid Transit
155
+ "LRT",
156
+ // Light Rail Transit
157
+ // Indonesian taxes & fees
158
+ "PBB",
159
+ // Pajak Bumi dan Bangunan
160
+ "PPh",
161
+ // Pajak Penghasilan
162
+ "PPN",
163
+ // Pajak Pertambahan Nilai
164
+ "BPHTB",
165
+ // Bea Perolehan Hak atas Tanah dan Bangunan
166
+ // Indonesian education
167
+ "UI",
168
+ // Universitas Indonesia
169
+ "ITB",
170
+ // Institut Teknologi Bandung
171
+ "UGM",
172
+ // Universitas Gadjah Mada
173
+ "IPB",
174
+ // Institut Pertanian Bogor
175
+ "ITS",
176
+ // Institut Teknologi Sepuluh Nopember
177
+ "UNPAD",
178
+ // Universitas Padjadjaran
179
+ "UNDIP",
180
+ // Universitas Diponegoro
181
+ "UNAIR",
182
+ // Universitas Airlangga
183
+ "UNS",
184
+ // Universitas Sebelas Maret
185
+ // Indonesian degrees (gelar)
186
+ "S.Pd",
187
+ // Sarjana Pendidikan
188
+ "S.H",
189
+ // Sarjana Hukum
190
+ "S.E",
191
+ // Sarjana Ekonomi
192
+ "S.T",
193
+ // Sarjana Teknik
194
+ "S.Kom",
195
+ // Sarjana Komputer
196
+ "S.Si",
197
+ // Sarjana Sains
198
+ "S.Sos",
199
+ // Sarjana Sosial
200
+ "M.Pd",
201
+ // Magister Pendidikan
202
+ "M.M",
203
+ // Magister Manajemen
204
+ "M.T",
205
+ // Magister Teknik
206
+ "M.Kom",
207
+ // Magister Komputer
208
+ // Common services
209
+ "ATM",
210
+ // Automated Teller Machine
211
+ "POS",
212
+ // Point of Sale
213
+ "SMS",
214
+ // Short Message Service
215
+ "GPS",
216
+ // Global Positioning System
217
+ "WiFi",
218
+ // Wireless Fidelity (technically Wi-Fi)
219
+ "USB",
220
+ // Universal Serial Bus
221
+ "PIN",
222
+ // Personal Identification Number
223
+ "OTP",
224
+ // One Time Password
225
+ "QR",
226
+ // Quick Response
227
+ // Technology & IT
228
+ "IT",
229
+ // Information Technology
230
+ "AI",
231
+ // Artificial Intelligence
232
+ "ML",
233
+ // Machine Learning
234
+ "API",
235
+ // Application Programming Interface
236
+ "UI",
237
+ // User Interface (duplicate with Universitas Indonesia, context matters)
238
+ "UX",
239
+ // User Experience
240
+ "SEO",
241
+ // Search Engine Optimization
242
+ "SaaS",
243
+ // Software as a Service
244
+ "CRM",
245
+ // Customer Relationship Management
246
+ "ERP",
247
+ // Enterprise Resource Planning
248
+ // Business titles
249
+ "CEO",
250
+ // Chief Executive Officer
251
+ "CFO",
252
+ // Chief Financial Officer
253
+ "CTO",
254
+ // Chief Technology Officer
255
+ "COO",
256
+ // Chief Operating Officer
257
+ "CMO",
258
+ // Chief Marketing Officer
259
+ "HR",
260
+ // Human Resources
261
+ "PR",
262
+ // Public Relations
263
+ "VP",
264
+ // Vice President
265
+ "GM",
266
+ // General Manager
267
+ // International organizations
268
+ "UN",
269
+ // United Nations
270
+ "WHO",
271
+ // World Health Organization
272
+ "UNESCO",
273
+ // United Nations Educational, Scientific and Cultural Organization
274
+ "NATO",
275
+ // North Atlantic Treaty Organization
276
+ "ASEAN",
277
+ // Association of Southeast Asian Nations
278
+ "APEC",
279
+ // Asia-Pacific Economic Cooperation
280
+ "WTO",
281
+ // World Trade Organization
282
+ "IMF",
283
+ // International Monetary Fund
284
+ // Medical
285
+ "ICU",
286
+ // Intensive Care Unit
287
+ "ER",
288
+ // Emergency Room
289
+ "MRI",
290
+ // Magnetic Resonance Imaging
291
+ "CT",
292
+ // Computed Tomography
293
+ "DNA",
294
+ // Deoxyribonucleic Acid
295
+ "RNA",
296
+ // Ribonucleic Acid
297
+ "HIV",
298
+ // Human Immunodeficiency Virus
299
+ "AIDS",
300
+ // Acquired Immunodeficiency Syndrome
301
+ "COVID",
302
+ // Coronavirus Disease
303
+ // Measurements & units
304
+ "KM",
305
+ // Kilometer
306
+ "CM",
307
+ // Centimeter
308
+ "MM",
309
+ // Millimeter
310
+ "KG",
311
+ // Kilogram
312
+ "RPM",
313
+ // Revolutions Per Minute
314
+ "MPH",
315
+ // Miles Per Hour
316
+ "KPH",
317
+ // Kilometers Per Hour
318
+ // Finance
319
+ "IPO",
320
+ // Initial Public Offering
321
+ "ATM",
322
+ // Automated Teller Machine (duplicate)
323
+ "ROI",
324
+ // Return on Investment
325
+ "GDP",
326
+ // Gross Domestic Product
327
+ "VAT"
328
+ // Value Added Tax
329
+ ];
330
+ var ABBREVIATIONS = {
331
+ // ========== Address Abbreviations ==========
332
+ "Jl.": "Jalan",
333
+ "Gg.": "Gang",
334
+ "No.": "Nomor",
335
+ "Kp.": "Kampung",
336
+ "Ds.": "Desa",
337
+ "Kel.": "Kelurahan",
338
+ "Kec.": "Kecamatan",
339
+ "Kab.": "Kabupaten",
340
+ Kota: "Kota",
341
+ "Prov.": "Provinsi",
342
+ "Prop.": "Provinsi",
343
+ "Rt.": "Rukun Tetangga",
344
+ "Rw.": "Rukun Warga",
345
+ Blok: "Blok",
346
+ "Komp.": "Kompleks",
347
+ Perumahan: "Perumahan",
348
+ "Perum.": "Perumahan",
349
+ // ========== Academic Titles ==========
350
+ "Dr.": "Doktor",
351
+ "Ir.": "Insinyur",
352
+ "Prof.": "Profesor",
353
+ "Drs.": "Doktorandus",
354
+ "Dra.": "Doktoranda",
355
+ // Bachelor degrees
356
+ "S.Pd.": "Sarjana Pendidikan",
357
+ "S.H.": "Sarjana Hukum",
358
+ "S.E.": "Sarjana Ekonomi",
359
+ "S.T.": "Sarjana Teknik",
360
+ "S.Kom.": "Sarjana Komputer",
361
+ "S.Si.": "Sarjana Sains",
362
+ "S.Sos.": "Sarjana Sosial",
363
+ "S.I.Kom.": "Sarjana Ilmu Komunikasi",
364
+ "S.S.": "Sarjana Sastra",
365
+ "S.Psi.": "Sarjana Psikologi",
366
+ "S.Farm.": "Sarjana Farmasi",
367
+ "S.Ked.": "Sarjana Kedokteran",
368
+ // Master degrees
369
+ "M.Sc.": "Master of Science",
370
+ "M.M.": "Magister Manajemen",
371
+ "M.Pd.": "Magister Pendidikan",
372
+ "M.T.": "Magister Teknik",
373
+ "M.Kom.": "Magister Komputer",
374
+ "M.Si.": "Magister Sains",
375
+ "M.H.": "Magister Hukum",
376
+ "M.A.": "Master of Arts",
377
+ MBA: "Master of Business Administration",
378
+ // ========== Honorifics ==========
379
+ "Bpk.": "Bapak",
380
+ Ibu: "Ibu",
381
+ "Sdr.": "Saudara",
382
+ "Sdri.": "Saudari",
383
+ "Yth.": "Yang Terhormat",
384
+ "H.": "Haji",
385
+ "Hj.": "Hajjah",
386
+ "Tn.": "Tuan",
387
+ "Ny.": "Nyonya",
388
+ "Nn.": "Nona",
389
+ // ========== Organizations ==========
390
+ "PT.": "Perseroan Terbatas",
391
+ "CV.": "Commanditaire Vennootschap",
392
+ "UD.": "Usaha Dagang",
393
+ "PD.": "Perusahaan Daerah",
394
+ "Tbk.": "Terbuka",
395
+ Koperasi: "Koperasi",
396
+ Yayasan: "Yayasan",
397
+ // ========== Common Abbreviations ==========
398
+ "dst.": "dan seterusnya",
399
+ "dsb.": "dan sebagainya",
400
+ "dll.": "dan lain-lain",
401
+ "dkk.": "dan kawan-kawan",
402
+ "a.n.": "atas nama",
403
+ "u.p.": "untuk perhatian",
404
+ "u.b.": "untuk beliau",
405
+ "c.q.": "casu quo",
406
+ "hlm.": "halaman",
407
+ "tgl.": "tanggal",
408
+ "bln.": "bulan",
409
+ "thn.": "tahun",
410
+ "ttd.": "tertanda",
411
+ // ========== Contact Information ==========
412
+ "Tlp.": "Telepon",
413
+ "Telp.": "Telepon",
414
+ "HP.": "Handphone",
415
+ Fax: "Faksimile",
416
+ Email: "Email",
417
+ Website: "Website",
418
+ // ========== Days (Indonesian) ==========
419
+ "Sen.": "Senin",
420
+ "Sel.": "Selasa",
421
+ "Rab.": "Rabu",
422
+ "Kam.": "Kamis",
423
+ "Jum.": "Jumat",
424
+ "Sab.": "Sabtu",
425
+ "Min.": "Minggu",
426
+ // ========== Months (Indonesian) ==========
427
+ "Jan.": "Januari",
428
+ "Feb.": "Februari",
429
+ "Mar.": "Maret",
430
+ "Apr.": "April",
431
+ Mei: "Mei",
432
+ "Jun.": "Juni",
433
+ "Jul.": "Juli",
434
+ "Agt.": "Agustus",
435
+ "Sep.": "September",
436
+ "Okt.": "Oktober",
437
+ "Nov.": "November",
438
+ "Des.": "Desember",
439
+ // ========== Units & Measurements ==========
440
+ "kg.": "kilogram",
441
+ "gr.": "gram",
442
+ "lt.": "liter",
443
+ "ml.": "mililiter",
444
+ "km.": "kilometer",
445
+ "cm.": "sentimeter",
446
+ "mm.": "milimeter",
447
+ "m2.": "meter persegi",
448
+ "m3.": "meter kubik",
449
+ "ha.": "hektar"
450
+ };
451
+ var PROFANITY = [
452
+ "anjing",
453
+ "babi",
454
+ "bangsat",
455
+ "bajingan",
456
+ "brengsek",
457
+ "goblok",
458
+ "tolol",
459
+ "idiot",
460
+ "perek",
461
+ "jablay",
462
+ "kontol",
463
+ "memek",
464
+ "ngewe",
465
+ "puki",
466
+ "jembut",
467
+ "asu",
468
+ "itil",
469
+ "lanjiao",
470
+ "pantek",
471
+ "anying",
472
+ "anjrit"
473
+ ];
474
+ var STOPWORDS = [
475
+ "ada",
476
+ "adalah",
477
+ "adanya",
478
+ "adapun",
479
+ "agak",
480
+ "agaknya",
481
+ "agar",
482
+ "akan",
483
+ "akankah",
484
+ "akhir",
485
+ "akhiri",
486
+ "akhirnya",
487
+ "aku",
488
+ "akulah",
489
+ "amat",
490
+ "amatlah",
491
+ "anda",
492
+ "andalah",
493
+ "antar",
494
+ "antara",
495
+ "antaranya",
496
+ "apa",
497
+ "apaan",
498
+ "apabila",
499
+ "apakah",
500
+ "apalagi",
501
+ "apatah",
502
+ "artinya",
503
+ "asal",
504
+ "asalkan",
505
+ "atas",
506
+ "atau",
507
+ "ataukah",
508
+ "ataupun",
509
+ "awal",
510
+ "awalnya",
511
+ "bagai",
512
+ "bagaikan",
513
+ "bagaimana",
514
+ "bagaimanakah",
515
+ "bagaimanapun",
516
+ "bagi",
517
+ "bagian",
518
+ "bahkan",
519
+ "bahwa",
520
+ "bahwasanya",
521
+ "baik",
522
+ "bakal",
523
+ "bakalan",
524
+ "balik",
525
+ "banyak",
526
+ "bapak",
527
+ "baru",
528
+ "bawah",
529
+ "beberapa",
530
+ "begini",
531
+ "beginian",
532
+ "beginikah",
533
+ "beginilah",
534
+ "begitu",
535
+ "begitukah",
536
+ "begitulah",
537
+ "begitupun",
538
+ "bekerja",
539
+ "belakang",
540
+ "belakangan",
541
+ "belum",
542
+ "belumlah",
543
+ "benar",
544
+ "benarkah",
545
+ "benarlah",
546
+ "berada",
547
+ "berakhir",
548
+ "berakhirlah",
549
+ "berakhirnya",
550
+ "berapa",
551
+ "berapakah",
552
+ "berapalah",
553
+ "berapapun",
554
+ "berarti",
555
+ "berawal",
556
+ "berbagai",
557
+ "berikut",
558
+ "berikutnya",
559
+ "berjumlah",
560
+ "berkali-kali",
561
+ "berkata",
562
+ "berkeinginan",
563
+ "berkenaan",
564
+ "berlainan",
565
+ "berlalu",
566
+ "berlangsung",
567
+ "berlebihan",
568
+ "bermacam",
569
+ "bermacam-macam",
570
+ "bermaksud",
571
+ "bermula",
572
+ "bersama",
573
+ "bersama-sama",
574
+ "bersiap",
575
+ "bersiap-siap",
576
+ "bertanya",
577
+ "bertanya-tanya",
578
+ "berturut",
579
+ "berturut-turut",
580
+ "bertutur",
581
+ "berujar",
582
+ "berupa",
583
+ "besar",
584
+ "betul",
585
+ "betulkah",
586
+ "biasa",
587
+ "biasanya",
588
+ "bila",
589
+ "bilakah",
590
+ "bisa",
591
+ "bisakah",
592
+ "boleh",
593
+ "bolehkah",
594
+ "bolehlah",
595
+ "buat",
596
+ "bukan",
597
+ "bukankah",
598
+ "bukanlah",
599
+ "bukannya",
600
+ "bulan",
601
+ "bung",
602
+ "cara",
603
+ "caranya",
604
+ "cukup",
605
+ "cukupkah",
606
+ "cukuplah",
607
+ "cuma",
608
+ "dahulu",
609
+ "dalam",
610
+ "dan",
611
+ "dapat",
612
+ "dari",
613
+ "daripada",
614
+ "datang",
615
+ "dekat",
616
+ "demi",
617
+ "demikian",
618
+ "demikianlah",
619
+ "dengan",
620
+ "depan",
621
+ "di",
622
+ "dia",
623
+ "diakhiri",
624
+ "diakhirinya",
625
+ "dialah",
626
+ "diantara",
627
+ "diantaranya",
628
+ "diberi",
629
+ "diberikan",
630
+ "diberikannya",
631
+ "dibuat",
632
+ "dibuatnya",
633
+ "didapat",
634
+ "didatangkan",
635
+ "digunakan",
636
+ "diibaratkan",
637
+ "diingat",
638
+ "diingatkan",
639
+ "diinginkan",
640
+ "dijawab",
641
+ "dijelaskan",
642
+ "dijelaskannya",
643
+ "dikarenakan",
644
+ "dikatakan",
645
+ "dikatakannya",
646
+ "dikerjakan",
647
+ "diketahui",
648
+ "diketahuinya",
649
+ "dikira",
650
+ "dilakukan",
651
+ "dilalui",
652
+ "dilihat",
653
+ "dimaksud",
654
+ "dimaksudkan",
655
+ "dimaksudkannya",
656
+ "dimana",
657
+ "dimanalah",
658
+ "dimulai",
659
+ "dimulailah",
660
+ "dimulainya",
661
+ "diminta",
662
+ "dimintai",
663
+ "dimisalkan",
664
+ "dimungkinkan",
665
+ "dini",
666
+ "dipastikan",
667
+ "diperbuat",
668
+ "diperbuatnya",
669
+ "dipergunakan",
670
+ "diperkirakan",
671
+ "diperlihatkan",
672
+ "diperlukan",
673
+ "diperlukannya",
674
+ "dipersoalkan",
675
+ "dipertanyakan",
676
+ "dipunyai",
677
+ "diri",
678
+ "dirinya",
679
+ "disampaikan",
680
+ "disebut",
681
+ "disebutkan",
682
+ "disebutkannya",
683
+ "disini",
684
+ "disinilah",
685
+ "disitulah",
686
+ "diterangkan",
687
+ "diterangkannya",
688
+ "diteruskan",
689
+ "ditujukan",
690
+ "ditunjuk",
691
+ "ditunjuki",
692
+ "ditunjukkan",
693
+ "ditunjukkannya",
694
+ "ditunjuknya",
695
+ "dituturkan",
696
+ "dituturkannya",
697
+ "diucapkan",
698
+ "diucapkannya",
699
+ "diungkapkan",
700
+ "dua",
701
+ "dulu",
702
+ "empat",
703
+ "enggak",
704
+ "enggaknya",
705
+ "entah",
706
+ "entahlah",
707
+ "guna",
708
+ "gunakan",
709
+ "hal",
710
+ "hampir",
711
+ "hanya",
712
+ "hanyalah",
713
+ "hari",
714
+ "harus",
715
+ "haruslah",
716
+ "harusnya",
717
+ "hendak",
718
+ "hendaklah",
719
+ "hendaknya",
720
+ "hingga",
721
+ "ia",
722
+ "ialah",
723
+ "ibarat",
724
+ "ibaratkan",
725
+ "ibaratnya",
726
+ "ibu",
727
+ "ikut",
728
+ "ingat",
729
+ "ingat-ingat",
730
+ "ingin",
731
+ "inginkah",
732
+ "inginkan",
733
+ "ini",
734
+ "inikah",
735
+ "inilah",
736
+ "itu",
737
+ "itukah",
738
+ "itulah",
739
+ "jadi",
740
+ "jadilah",
741
+ "jadinya",
742
+ "jangan",
743
+ "jangankan",
744
+ "janganlah",
745
+ "jauh",
746
+ "jawab",
747
+ "jawaban",
748
+ "jawabnya",
749
+ "jelas",
750
+ "jelaskan",
751
+ "jelaslah",
752
+ "jelasnya",
753
+ "jika",
754
+ "jikalau",
755
+ "juga",
756
+ "jumlah",
757
+ "jumlahnya",
758
+ "justru",
759
+ "kala",
760
+ "kalau",
761
+ "kalaulah",
762
+ "kalaupun",
763
+ "kali",
764
+ "kalian",
765
+ "kami",
766
+ "kamilah",
767
+ "kamu",
768
+ "kamulah",
769
+ "kan",
770
+ "kapan",
771
+ "kapankah",
772
+ "kapanpun",
773
+ "karena",
774
+ "karenanya",
775
+ "ke",
776
+ "keadaan",
777
+ "kebetulan",
778
+ "kecil",
779
+ "kedua",
780
+ "keduanya",
781
+ "keinginan",
782
+ "kelak",
783
+ "kelihatan",
784
+ "kelihatannya",
785
+ "kelima",
786
+ "keluar",
787
+ "kembali",
788
+ "kemudian",
789
+ "kemungkinan",
790
+ "kemungkinannya",
791
+ "kenapa",
792
+ "kepada",
793
+ "kepadanya",
794
+ "kesampaian",
795
+ "keseluruhan",
796
+ "keseluruhannya",
797
+ "keterlaluan",
798
+ "ketika",
799
+ "khususnya",
800
+ "kini",
801
+ "kinilah",
802
+ "kira",
803
+ "kira-kira",
804
+ "kiranya",
805
+ "kita",
806
+ "kitalah",
807
+ "kok",
808
+ "kurang",
809
+ "lagi",
810
+ "lagian",
811
+ "lah",
812
+ "lain",
813
+ "lainnya",
814
+ "lalu",
815
+ "lama",
816
+ "lamanya",
817
+ "lanjut",
818
+ "lanjutnya",
819
+ "lebih",
820
+ "lewat",
821
+ "luar",
822
+ "macam",
823
+ "maka",
824
+ "makanya",
825
+ "makin",
826
+ "malah",
827
+ "malahan",
828
+ "mampu",
829
+ "mampukah",
830
+ "mana",
831
+ "manakala",
832
+ "manalagi",
833
+ "masih",
834
+ "masihkah",
835
+ "masing",
836
+ "masing-masing",
837
+ "mau",
838
+ "maupun",
839
+ "melainkan",
840
+ "melakukan",
841
+ "melalui",
842
+ "melihat",
843
+ "melihatnya",
844
+ "memang",
845
+ "memastikan",
846
+ "memberi",
847
+ "memberikan",
848
+ "membuat",
849
+ "memerlukan",
850
+ "memihak",
851
+ "meminta",
852
+ "memisalkan",
853
+ "memperbuat",
854
+ "mempergunakan",
855
+ "memperkirakan",
856
+ "memperlihatkan",
857
+ "mempersiapkan",
858
+ "mempersoalkan",
859
+ "mempertanyakan",
860
+ "mempunyai",
861
+ "memulai",
862
+ "memungkinkan",
863
+ "memutuskan",
864
+ "menanti",
865
+ "menanti-nanti",
866
+ "menantikan",
867
+ "menunjuk",
868
+ "menunjuknya",
869
+ "menuju",
870
+ "menurut",
871
+ "menurutnya",
872
+ "menurutmu",
873
+ "menurutku",
874
+ "menurutnya",
875
+ "menurut mereka",
876
+ "menyampaikan",
877
+ "menyebut",
878
+ "menyebutkan",
879
+ "menjelaskan",
880
+ "menjadi",
881
+ "menjadikan",
882
+ "menjalani",
883
+ "menjelang",
884
+ "menjawab",
885
+ "menunjukkan",
886
+ "menuangkan",
887
+ "menulis",
888
+ "menyatakan",
889
+ "merupakan",
890
+ "mereka",
891
+ "merekalah",
892
+ "meski",
893
+ "meskipun",
894
+ "mula",
895
+ "mulai",
896
+ "mulailah",
897
+ "mulanya",
898
+ "mungkin",
899
+ "mungkinkah",
900
+ "nah",
901
+ "naik",
902
+ "namun",
903
+ "nanti",
904
+ "nantinya",
905
+ "nyaris",
906
+ "oleh",
907
+ "olehnya",
908
+ "orang",
909
+ "pada",
910
+ "padahal",
911
+ "padanya",
912
+ "pakai",
913
+ "paling",
914
+ "panjang",
915
+ "pantas",
916
+ "para",
917
+ "pasti",
918
+ "pastilah",
919
+ "pagi",
920
+ "per",
921
+ "pernah",
922
+ "persoalan",
923
+ "pertama",
924
+ "pertama-tama",
925
+ "perlu",
926
+ "perlukah",
927
+ "perlulah",
928
+ "pernah",
929
+ "pihak",
930
+ "pihaknya",
931
+ "pukul",
932
+ "pula",
933
+ "pun",
934
+ "punya",
935
+ "rasa",
936
+ "rasanya",
937
+ "rata",
938
+ "rupanya",
939
+ "saat",
940
+ "saatnya",
941
+ "saja",
942
+ "sajalah",
943
+ "salam",
944
+ "saling",
945
+ "sama",
946
+ "sama-sama",
947
+ "sambil",
948
+ "sampai",
949
+ "sampai-sampai",
950
+ "sampaikan",
951
+ "sana",
952
+ "sangat",
953
+ "sangatlah",
954
+ "satu",
955
+ "saya",
956
+ "sayalah",
957
+ "sayang",
958
+ "seperti",
959
+ "seperti-itu",
960
+ "sepura",
961
+ "sebab",
962
+ "sebabnya",
963
+ "sebagai",
964
+ "sebagaimana",
965
+ "sebagainya",
966
+ "sebagian",
967
+ "sebaik",
968
+ "sebaik-baiknya",
969
+ "sebaiknya",
970
+ "sebaliknya",
971
+ "sebanyak",
972
+ "sebegini",
973
+ "sebegitu",
974
+ "sebelum",
975
+ "sebelumnya",
976
+ "sebenarnya",
977
+ "seberapa",
978
+ "sebesar",
979
+ "sebetulnya",
980
+ "sebisanya",
981
+ "sebuah",
982
+ "sebut",
983
+ "sebutkan",
984
+ "sebutnya",
985
+ "secara",
986
+ "secukupnya",
987
+ "sedang",
988
+ "sedangkan",
989
+ "sedikit",
990
+ "sedikitnya",
991
+ "sedemikian",
992
+ "sediakala",
993
+ "sedikit",
994
+ "sedikitnya",
995
+ "segala",
996
+ "segalanya",
997
+ "segera",
998
+ "seharusnya",
999
+ "sehingga",
1000
+ "seingat",
1001
+ "sejak",
1002
+ "sejauh",
1003
+ "sejenak",
1004
+ "sejumlah",
1005
+ "sekali",
1006
+ "sekali-kali",
1007
+ "sekalian",
1008
+ "sekaligus",
1009
+ "sekalipun",
1010
+ "sekarang",
1011
+ "sekaranglah",
1012
+ "sekecil",
1013
+ "seketika",
1014
+ "sekiranya",
1015
+ "sekitar",
1016
+ "sekitarnya",
1017
+ "sekurang",
1018
+ "sekurangnya",
1019
+ "sela",
1020
+ "selalu",
1021
+ "selama",
1022
+ "selama-lamanya",
1023
+ "selamanya",
1024
+ "selanjutnya",
1025
+ "seluruh",
1026
+ "seluruhnya",
1027
+ "semacam",
1028
+ "semakin",
1029
+ "semampu",
1030
+ "semampunya",
1031
+ "semasa",
1032
+ "semata",
1033
+ "semata-mata",
1034
+ "semaunya",
1035
+ "sementara",
1036
+ "semisal",
1037
+ "semisalnya",
1038
+ "sempat",
1039
+ "semua",
1040
+ "semuanya",
1041
+ "semula",
1042
+ "sendiri",
1043
+ "sendirinya",
1044
+ "seolah",
1045
+ "seolah-olah",
1046
+ "seorang",
1047
+ "sepanjang",
1048
+ "sepantasnya",
1049
+ "sepantasnyalah",
1050
+ "seperempat",
1051
+ "seperti",
1052
+ "sepertinya",
1053
+ "sepihak",
1054
+ "sepuluh",
1055
+ "seratus",
1056
+ "seribu",
1057
+ "sering",
1058
+ "seringnya",
1059
+ "serta",
1060
+ "serupa",
1061
+ "sesaat",
1062
+ "sesama",
1063
+ "sesampai",
1064
+ "sesampainya",
1065
+ "sesegera",
1066
+ "sesekali",
1067
+ "seseorang",
1068
+ "sesuatu",
1069
+ "sesuatunya",
1070
+ "sesudah",
1071
+ "sesudahnya",
1072
+ "setelah",
1073
+ "setempat",
1074
+ "setengah",
1075
+ "seterusnya",
1076
+ "setiap",
1077
+ "setidaknya",
1078
+ "setinggi",
1079
+ "seusai",
1080
+ "sewaktu",
1081
+ "siap",
1082
+ "siapa",
1083
+ "siapakah",
1084
+ "siapapun",
1085
+ "sini",
1086
+ "sinilah",
1087
+ "situ",
1088
+ "situlah",
1089
+ "suatu",
1090
+ "sudah",
1091
+ "sudahkah",
1092
+ "sudahlah",
1093
+ "supaya",
1094
+ "tadi",
1095
+ "tadinya",
1096
+ "tahu",
1097
+ "tak",
1098
+ "tambah",
1099
+ "tambahnya",
1100
+ "tampak",
1101
+ "tampaknya",
1102
+ "tandas",
1103
+ "tandasnya",
1104
+ "tanpa",
1105
+ "tanya",
1106
+ "tanyakan",
1107
+ "tanyanya",
1108
+ "tapi",
1109
+ "tegas",
1110
+ "tegasnya",
1111
+ "telah",
1112
+ "tempat",
1113
+ "tengah",
1114
+ "tentang",
1115
+ "tentu",
1116
+ "tentulah",
1117
+ "tentunya",
1118
+ "tepat",
1119
+ "terakhir",
1120
+ "terasa",
1121
+ "terbanyak",
1122
+ "terdahulu",
1123
+ "terdapat",
1124
+ "terdiri",
1125
+ "terdiri-dari",
1126
+ "terhadap",
1127
+ "terhadapnya",
1128
+ "teringat",
1129
+ "teringat-ingat",
1130
+ "terjadi",
1131
+ "terjadilah",
1132
+ "terjadinya",
1133
+ "terkira",
1134
+ "terlalu",
1135
+ "terlebih",
1136
+ "terlihat",
1137
+ "termasuk",
1138
+ "ternyata",
1139
+ "tersampaikan",
1140
+ "tersebut",
1141
+ "tersebutlah",
1142
+ "tertentu",
1143
+ "tertuju",
1144
+ "terus",
1145
+ "terutama",
1146
+ "tetap",
1147
+ "tetapi",
1148
+ "tiap",
1149
+ "tiba",
1150
+ "tiba-tiba",
1151
+ "tidak",
1152
+ "tidakkah",
1153
+ "tidaklah",
1154
+ "tiga",
1155
+ "tadi",
1156
+ "tadinya",
1157
+ "tinggi",
1158
+ "toh",
1159
+ "tuju",
1160
+ "tunjuk",
1161
+ "turut",
1162
+ "tutur",
1163
+ "tuturnya",
1164
+ "ucap",
1165
+ "ucapnya",
1166
+ "ujar",
1167
+ "ujarnya",
1168
+ "umumnya",
1169
+ "ungkap",
1170
+ "ungkapnya",
1171
+ "untuk",
1172
+ "untaian",
1173
+ "usai",
1174
+ "usah",
1175
+ "waduh",
1176
+ "wah",
1177
+ "wahai",
1178
+ "walau",
1179
+ "walaupun",
1180
+ "wong",
1181
+ "yaitu",
1182
+ "yakin",
1183
+ "yakni",
1184
+ "yang"
1185
+ ];
1186
+
1187
+ // src/text/capitalization.ts
1188
+ function capitalize(text) {
1189
+ if (!text) return text;
1190
+ return text.charAt(0).toUpperCase() + text.slice(1).toLowerCase();
1191
+ }
1192
+ function toTitleCase(text, options) {
1193
+ if (!text) return text;
1194
+ const {
1195
+ preserveAcronyms = true,
1196
+ strict = false,
1197
+ exceptions = []
1198
+ } = options || {};
1199
+ const lowercaseSet = /* @__PURE__ */ new Set([...LOWERCASE_WORDS, ...exceptions]);
1200
+ const acronymSet = new Set(ACRONYMS);
1201
+ const normalized = normalizeSpaces(text);
1202
+ const words = normalized.split(" ");
1203
+ return words.map((word, index) => {
1204
+ if (!word) return word;
1205
+ if (word.includes("-")) {
1206
+ return processHyphenatedWord(word, index === 0, {
1207
+ lowercaseSet,
1208
+ acronymSet,
1209
+ preserveAcronyms,
1210
+ strict
1211
+ });
1212
+ }
1213
+ return processWord(word, index === 0, {
1214
+ lowercaseSet,
1215
+ acronymSet,
1216
+ preserveAcronyms,
1217
+ strict
1218
+ });
1219
+ }).join(" ");
1220
+ }
1221
+ function normalizeSpaces(text) {
1222
+ return text.trim().replace(/\s+/g, " ");
1223
+ }
1224
+ function processWord(word, isFirstWord, context) {
1225
+ const { lowercaseSet, acronymSet, preserveAcronyms, strict } = context;
1226
+ const lowerWord = word.toLowerCase();
1227
+ const upperWord = word.toUpperCase();
1228
+ if (preserveAcronyms && acronymSet.has(upperWord)) {
1229
+ return upperWord;
1230
+ }
1231
+ if (!isFirstWord && lowercaseSet.has(lowerWord)) {
1232
+ return lowerWord;
1233
+ }
1234
+ if (strict) {
1235
+ return capitalizeFirstLetter(lowerWord);
1236
+ }
1237
+ return capitalizeFirstLetter(word.toLowerCase());
1238
+ }
1239
+ function processHyphenatedWord(word, isFirstWord, context) {
1240
+ return word.split("-").map(
1241
+ (part, index) => processWord(part, isFirstWord && index === 0, context)
1242
+ ).join("-");
1243
+ }
1244
+ function capitalizeFirstLetter(word) {
1245
+ if (!word) return word;
1246
+ return word.charAt(0).toUpperCase() + word.slice(1);
1247
+ }
1248
+ function toSentenceCase(text) {
1249
+ if (!text) return text;
1250
+ const normalized = text.trim().replace(/\s+/g, " ");
1251
+ let result = "";
1252
+ let shouldCapitalize = true;
1253
+ for (let i = 0; i < normalized.length; i++) {
1254
+ const char = normalized[i];
1255
+ if (shouldCapitalize && /[a-zA-ZÀ-ÿ]/.test(char)) {
1256
+ result += char.toUpperCase();
1257
+ shouldCapitalize = false;
1258
+ } else {
1259
+ result += char.toLowerCase();
1260
+ }
1261
+ if (isSentenceEnd(char)) {
1262
+ shouldCapitalize = true;
1263
+ }
1264
+ if (char === "." && i + 1 < normalized.length) {
1265
+ const nextChar = normalized[i + 1];
1266
+ if (nextChar !== " " && !/[.!?]/.test(nextChar)) {
1267
+ shouldCapitalize = false;
1268
+ }
1269
+ }
1270
+ }
1271
+ return result;
1272
+ }
1273
+ function isSentenceEnd(char) {
1274
+ return char === "." || char === "!" || char === "?";
1275
+ }
1276
+
1277
+ // src/text/slug.ts
1278
+ function slugify(text, options) {
1279
+ if (!text) return "";
1280
+ const {
1281
+ separator = "-",
1282
+ lowercase = true,
1283
+ replacements = {},
1284
+ trim = true
1285
+ } = options || {};
1286
+ let result = text;
1287
+ for (const [search, replace] of Object.entries(replacements)) {
1288
+ result = result.replace(new RegExp(escapeRegex(search), "g"), replace);
1289
+ }
1290
+ result = result.replace(/&/g, " dan ");
1291
+ result = result.replace(/\//g, " atau ");
1292
+ if (lowercase) {
1293
+ result = result.toLowerCase();
1294
+ }
1295
+ result = result.replace(/[.'@éèêëàâäôöûüùïîçñ™®©]/g, "");
1296
+ result = result.replace(/[^\w\s-]+/g, separator);
1297
+ result = result.replace(/\s+/g, separator);
1298
+ if (separator !== "-") {
1299
+ result = result.replace(/-/g, separator);
1300
+ }
1301
+ if (trim) {
1302
+ const separatorRegex = new RegExp(`\\${separator}+`, "g");
1303
+ result = result.replace(separatorRegex, separator);
1304
+ const trimRegex = new RegExp(`^\\${separator}+|\\${separator}+$`, "g");
1305
+ result = result.replace(trimRegex, "");
1306
+ }
1307
+ return result;
1308
+ }
1309
+ function escapeRegex(str) {
1310
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1311
+ }
1312
+
1313
+ // src/text/sanitize.ts
1314
+ function normalizeWhitespace(text) {
1315
+ if (!text) return text;
1316
+ return text.trim().replace(/\s+/g, " ");
1317
+ }
1318
+ function sanitize(text, options) {
1319
+ if (!text) return text;
1320
+ const {
1321
+ removeNewlines = false,
1322
+ removeExtraSpaces = true,
1323
+ removePunctuation = false,
1324
+ allowedChars,
1325
+ trim = true
1326
+ } = options || {};
1327
+ let result = text;
1328
+ if (removeNewlines) {
1329
+ result = result.replace(/[\n\r]/g, " ");
1330
+ }
1331
+ if (removePunctuation) {
1332
+ result = result.replace(/[!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]/g, "");
1333
+ }
1334
+ if (allowedChars) {
1335
+ const allowedRegex = new RegExp(`[^${allowedChars}]`, "g");
1336
+ result = result.replace(allowedRegex, "");
1337
+ }
1338
+ if (removeExtraSpaces) {
1339
+ if (trim) {
1340
+ if (removeNewlines) {
1341
+ result = result.replace(/\s+/g, " ");
1342
+ } else {
1343
+ result = result.replace(/[ \t]+/g, " ");
1344
+ }
1345
+ } else {
1346
+ const leadingMatch = result.match(/^([ \t]*)/);
1347
+ const trailingMatch = result.match(/([ \t]*)$/);
1348
+ const leading = leadingMatch ? leadingMatch[1] : "";
1349
+ const trailing = trailingMatch ? trailingMatch[1] : "";
1350
+ const middle = result.slice(
1351
+ leading.length,
1352
+ result.length - trailing.length
1353
+ );
1354
+ const normalizedMiddle = removeNewlines ? middle.replace(/\s+/g, " ") : middle.replace(/[ \t]+/g, " ");
1355
+ result = leading + normalizedMiddle + trailing;
1356
+ }
1357
+ }
1358
+ if (trim) {
1359
+ result = result.trim();
1360
+ }
1361
+ return result;
1362
+ }
1363
+ function removeAccents(text) {
1364
+ if (!text) return text;
1365
+ const specialChars = {
1366
+ \u00D8: "O",
1367
+ \u00F8: "o",
1368
+ \u00C6: "AE",
1369
+ \u00E6: "ae",
1370
+ \u00C5: "A",
1371
+ \u00E5: "a",
1372
+ \u0110: "D",
1373
+ \u0111: "d",
1374
+ \u0141: "L",
1375
+ \u0142: "l",
1376
+ \u00DE: "TH",
1377
+ \u00FE: "th",
1378
+ \u00DF: "ss"
1379
+ };
1380
+ let result = text;
1381
+ for (const [accented, plain] of Object.entries(specialChars)) {
1382
+ result = result.replace(new RegExp(accented, "g"), plain);
1383
+ }
1384
+ return result.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
1385
+ }
1386
+
1387
+ // src/text/abbreviation.ts
1388
+ function expandAbbreviation(text, options) {
1389
+ if (!text) return text;
1390
+ const { mode = "all", customMap = {}, preserveCase = false } = options || {};
1391
+ const abbreviationsMap = {
1392
+ ...getAbbreviationsByMode(mode),
1393
+ ...customMap
1394
+ };
1395
+ let result = text;
1396
+ const sortedAbbrevs = Object.keys(abbreviationsMap).sort(
1397
+ (a, b) => b.length - a.length
1398
+ );
1399
+ for (const abbrev of sortedAbbrevs) {
1400
+ const expansion = abbreviationsMap[abbrev];
1401
+ const startBoundary = /^\w/.test(abbrev) ? "\\b" : "";
1402
+ const endBoundary = /\w$/.test(abbrev) ? "\\b" : "";
1403
+ const regex = new RegExp(
1404
+ `${startBoundary}${escapeRegex2(abbrev)}${endBoundary}`,
1405
+ "gi"
1406
+ );
1407
+ result = result.replace(regex, (match) => {
1408
+ if (!preserveCase) {
1409
+ return expansion;
1410
+ }
1411
+ return matchCase(match, expansion);
1412
+ });
1413
+ }
1414
+ return result;
1415
+ }
1416
+ function getAbbreviationsByMode(mode) {
1417
+ if (mode === "all") {
1418
+ return ABBREVIATIONS;
1419
+ }
1420
+ const filtered = {};
1421
+ const addressAbbrevs = [
1422
+ "Jl.",
1423
+ "Gg.",
1424
+ "No.",
1425
+ "Kp.",
1426
+ "Ds.",
1427
+ "Kel.",
1428
+ "Kec.",
1429
+ "Kab.",
1430
+ "Kota",
1431
+ "Prov.",
1432
+ "Prop.",
1433
+ "Rt.",
1434
+ "Rw.",
1435
+ "Blok",
1436
+ "Komp.",
1437
+ "Perumahan",
1438
+ "Perum."
1439
+ ];
1440
+ const titleAbbrevs = [
1441
+ "Dr.",
1442
+ "Ir.",
1443
+ "Prof.",
1444
+ "Drs.",
1445
+ "Dra.",
1446
+ "S.Pd.",
1447
+ "S.H.",
1448
+ "S.E.",
1449
+ "S.T.",
1450
+ "S.Kom.",
1451
+ "S.Si.",
1452
+ "S.Sos.",
1453
+ "S.I.Kom.",
1454
+ "S.S.",
1455
+ "S.Psi.",
1456
+ "S.Farm.",
1457
+ "S.Ked.",
1458
+ "M.Sc.",
1459
+ "M.M.",
1460
+ "M.Pd.",
1461
+ "M.T.",
1462
+ "M.Kom.",
1463
+ "M.Si.",
1464
+ "M.H.",
1465
+ "M.A.",
1466
+ "MBA"
1467
+ ];
1468
+ const orgAbbrevs = [
1469
+ "PT.",
1470
+ "CV.",
1471
+ "UD.",
1472
+ "PD.",
1473
+ "Tbk.",
1474
+ "Koperasi",
1475
+ "Yayasan"
1476
+ ];
1477
+ for (const [abbrev, expansion] of Object.entries(ABBREVIATIONS)) {
1478
+ if (mode === "address" && addressAbbrevs.includes(abbrev)) {
1479
+ filtered[abbrev] = expansion;
1480
+ } else if (mode === "title" && titleAbbrevs.includes(abbrev)) {
1481
+ filtered[abbrev] = expansion;
1482
+ } else if (mode === "org" && orgAbbrevs.includes(abbrev)) {
1483
+ filtered[abbrev] = expansion;
1484
+ }
1485
+ }
1486
+ return filtered;
1487
+ }
1488
+ function escapeRegex2(str) {
1489
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1490
+ }
1491
+ function matchCase(original, replacement) {
1492
+ if (original === original.toUpperCase()) {
1493
+ return replacement.toUpperCase();
1494
+ }
1495
+ if (original === original.toLowerCase()) {
1496
+ return replacement.toLowerCase();
1497
+ }
1498
+ if (original.charAt(0) === original.charAt(0).toUpperCase()) {
1499
+ return replacement.charAt(0).toUpperCase() + replacement.slice(1).toLowerCase();
1500
+ }
1501
+ return replacement;
1502
+ }
1503
+ function contractAbbreviation(text, options) {
1504
+ if (!text) return text;
1505
+ const { mode = "all" } = options || {};
1506
+ const abbreviationsMap = getAbbreviationsByMode(mode);
1507
+ const reverseMap = {};
1508
+ for (const [abbrev, expansion] of Object.entries(abbreviationsMap)) {
1509
+ reverseMap[expansion] = abbrev;
1510
+ }
1511
+ let result = text;
1512
+ const sortedExpansions = Object.keys(reverseMap).sort(
1513
+ (a, b) => b.length - a.length
1514
+ );
1515
+ for (const expansion of sortedExpansions) {
1516
+ const abbrev = reverseMap[expansion];
1517
+ const regex = new RegExp(`\\b${escapeRegex2(expansion)}\\b`, "gi");
1518
+ result = result.replace(regex, abbrev);
1519
+ }
1520
+ return result;
1521
+ }
1522
+
1523
+ // src/text/filter.ts
1524
+ function profanityFilter(text, mask = "*") {
1525
+ let filtered = text;
1526
+ PROFANITY.forEach((word) => {
1527
+ const regex = new RegExp(`\\b${word}\\b`, "gi");
1528
+ filtered = filtered.replace(regex, mask.repeat(word.length));
1529
+ });
1530
+ return filtered;
1531
+ }
1532
+ function removeStopwords(text) {
1533
+ const words = text.split(/\s+/);
1534
+ const filtered = words.filter(
1535
+ (word) => !STOPWORDS.includes(word.toLowerCase())
1536
+ );
1537
+ return filtered.join(" ");
1538
+ }
1539
+
1540
+ // src/text/normalization.ts
1541
+ var INFORMAL_MAP = {
1542
+ gw: "saya",
1543
+ gua: "saya",
1544
+ lu: "kamu",
1545
+ lo: "kamu",
1546
+ elo: "kamu",
1547
+ lagi: "sedang",
1548
+ gue: "saya",
1549
+ gwe: "saya",
1550
+ gak: "tidak",
1551
+ ga: "tidak",
1552
+ nggak: "tidak",
1553
+ kalo: "kalau",
1554
+ karna: "karena",
1555
+ tapi: "tetapi",
1556
+ udah: "sudah",
1557
+ dah: "sudah",
1558
+ aja: "saja",
1559
+ banget: "sekali",
1560
+ emang: "memang",
1561
+ pake: "pakai",
1562
+ bikin: "membuat",
1563
+ kasih: "memberi",
1564
+ dapet: "dapat",
1565
+ liat: "lihat",
1566
+ ngasih: "memberi",
1567
+ nyari: "mencari",
1568
+ nanya: "bertanya",
1569
+ bilang: "berkata"
1570
+ };
1571
+ function toFormal(text) {
1572
+ const words = text.split(/\s+/);
1573
+ const formalized = words.map((word) => {
1574
+ const lower = word.toLowerCase().replace(/[^\w]/g, "");
1575
+ const formal = INFORMAL_MAP[lower];
1576
+ if (formal) {
1577
+ if (word[0] === word[0].toUpperCase()) {
1578
+ return formal.charAt(0).toUpperCase() + formal.slice(1);
1579
+ }
1580
+ return formal;
1581
+ }
1582
+ return word;
1583
+ });
1584
+ return formalized.join(" ");
1585
+ }
1586
+ function isAlay(text) {
1587
+ if (!text) return false;
1588
+ const alternatingCaps = /([a-z][A-Z][a-z]|[A-Z][a-z][A-Z])/.test(text);
1589
+ const numberSub = /\b\w*[0431572]\w*\b/.test(text);
1590
+ const qSub = /q/i.test(text) && !/u/i.test(text);
1591
+ const excessiveChars = /(.)\1{2,}/.test(text);
1592
+ return alternatingCaps || numberSub || qSub || excessiveChars;
1593
+ }
1594
+
1595
+ // src/text/extract.ts
1596
+ function truncate(text, maxLength, options) {
1597
+ if (!text || maxLength <= 0) {
1598
+ return "";
1599
+ }
1600
+ const { ellipsis = "...", wordBoundary = true } = options || {};
1601
+ if (text.length <= maxLength) {
1602
+ return text;
1603
+ }
1604
+ const availableLength = maxLength - ellipsis.length;
1605
+ if (availableLength <= 0) {
1606
+ return ellipsis.slice(0, maxLength);
1607
+ }
1608
+ let truncated = text.slice(0, availableLength);
1609
+ if (wordBoundary) {
1610
+ const lastSpaceIndex = truncated.lastIndexOf(" ");
1611
+ if (lastSpaceIndex > 0) {
1612
+ truncated = truncated.slice(0, lastSpaceIndex);
1613
+ }
1614
+ }
1615
+ truncated = truncated.trimEnd();
1616
+ return truncated + ellipsis;
1617
+ }
1618
+ function extractWords(text, options) {
1619
+ if (!text || !text.trim()) {
1620
+ return [];
1621
+ }
1622
+ const {
1623
+ minLength = 0,
1624
+ includeHyphenated = true,
1625
+ lowercase = false
1626
+ } = options || {};
1627
+ let cleaned = text;
1628
+ if (includeHyphenated) {
1629
+ cleaned = text.replace(/[^\w\s-]/g, " ");
1630
+ } else {
1631
+ cleaned = text.replace(/[^\w\s]/g, " ");
1632
+ }
1633
+ const words = cleaned.split(/\s+/).map((word) => word.trim()).filter((word) => word.length > 0).filter((word) => !/^-+$/.test(word));
1634
+ let result = words;
1635
+ if (minLength > 0) {
1636
+ result = result.filter((word) => word.length >= minLength);
1637
+ }
1638
+ if (lowercase) {
1639
+ result = result.map((word) => word.toLowerCase());
1640
+ }
1641
+ return result;
1642
+ }
1643
+
1644
+ // src/text/compare.ts
1645
+ function compareStrings(str1, str2, options) {
1646
+ if (str1 === str2) {
1647
+ return true;
1648
+ }
1649
+ const s1 = str1 || "";
1650
+ const s2 = str2 || "";
1651
+ const {
1652
+ caseSensitive = false,
1653
+ ignoreWhitespace = false,
1654
+ ignoreAccents = false
1655
+ } = options || {};
1656
+ let normalized1 = s1;
1657
+ let normalized2 = s2;
1658
+ if (ignoreWhitespace) {
1659
+ normalized1 = normalizeWhitespace(normalized1);
1660
+ normalized2 = normalizeWhitespace(normalized2);
1661
+ }
1662
+ if (ignoreAccents) {
1663
+ normalized1 = removeAccents(normalized1);
1664
+ normalized2 = removeAccents(normalized2);
1665
+ }
1666
+ if (!caseSensitive) {
1667
+ normalized1 = normalized1.toLowerCase();
1668
+ normalized2 = normalized2.toLowerCase();
1669
+ }
1670
+ return normalized1 === normalized2;
1671
+ }
1672
+ function similarity(str1, str2) {
1673
+ if (str1 === str2) return 1;
1674
+ if (str1.length === 0) return str2.length === 0 ? 1 : 0;
1675
+ if (str2.length === 0) return 0;
1676
+ const len1 = str1.length;
1677
+ const len2 = str2.length;
1678
+ let prevRow = Array(len2 + 1).fill(0);
1679
+ let currentRow = Array(len2 + 1).fill(0);
1680
+ for (let j = 0; j <= len2; j++) {
1681
+ prevRow[j] = j;
1682
+ }
1683
+ for (let i = 1; i <= len1; i++) {
1684
+ currentRow[0] = i;
1685
+ for (let j = 1; j <= len2; j++) {
1686
+ const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
1687
+ currentRow[j] = Math.min(
1688
+ currentRow[j - 1] + 1,
1689
+ // Insertion
1690
+ prevRow[j] + 1,
1691
+ // Deletion
1692
+ prevRow[j - 1] + cost
1693
+ // Substitution
1694
+ );
1695
+ }
1696
+ [prevRow, currentRow] = [currentRow, prevRow];
1697
+ }
1698
+ const distance = prevRow[len2];
1699
+ const maxLength = Math.max(len1, len2);
1700
+ return 1 - distance / maxLength;
1701
+ }
1702
+
1703
+ export { ABBREVIATIONS, ACRONYMS, LOWERCASE_WORDS, capitalize, compareStrings, contractAbbreviation, expandAbbreviation, extractWords, isAlay, normalizeWhitespace, profanityFilter, removeAccents, removeStopwords, sanitize, similarity, slugify, toFormal, toSentenceCase, toTitleCase, truncate };
1704
+ //# sourceMappingURL=index.js.map
1705
+ //# sourceMappingURL=index.js.map