@indodev/toolkit 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1150,7 +1150,907 @@ function capitalize(str) {
1150
1150
  return str.charAt(0).toUpperCase() + str.slice(1);
1151
1151
  }
1152
1152
 
1153
+ // src/text/constants.ts
1154
+ var LOWERCASE_WORDS = [
1155
+ // Indonesian prepositions (kata depan)
1156
+ "di",
1157
+ "ke",
1158
+ "dari",
1159
+ "pada",
1160
+ "dalam",
1161
+ "untuk",
1162
+ "dengan",
1163
+ "oleh",
1164
+ "kepada",
1165
+ "terhadap",
1166
+ "tentang",
1167
+ "tanpa",
1168
+ "hingga",
1169
+ "sampai",
1170
+ "sejak",
1171
+ "menuju",
1172
+ "melalui",
1173
+ // Indonesian conjunctions (kata hubung)
1174
+ "dan",
1175
+ "atau",
1176
+ "tetapi",
1177
+ "namun",
1178
+ "serta",
1179
+ "maupun",
1180
+ "melainkan",
1181
+ "sedangkan",
1182
+ // Indonesian articles/particles
1183
+ "yang",
1184
+ "sebagai",
1185
+ "adalah",
1186
+ "ialah",
1187
+ "yaitu",
1188
+ "bahwa",
1189
+ "akan",
1190
+ "telah",
1191
+ "sudah",
1192
+ "belum",
1193
+ // English articles
1194
+ "a",
1195
+ "an",
1196
+ "the",
1197
+ // English conjunctions
1198
+ "and",
1199
+ "or",
1200
+ "but",
1201
+ "nor",
1202
+ "for",
1203
+ "yet",
1204
+ "so",
1205
+ "as",
1206
+ // English prepositions (short ones, < 5 letters)
1207
+ "at",
1208
+ "by",
1209
+ "in",
1210
+ "of",
1211
+ "on",
1212
+ "to",
1213
+ "up",
1214
+ "via",
1215
+ "per",
1216
+ "off",
1217
+ "out"
1218
+ // English prepositions (5+ letters - optional, some style guides capitalize these)
1219
+ // 'about',
1220
+ // 'above',
1221
+ // 'across',
1222
+ // 'after',
1223
+ // 'among',
1224
+ // 'below',
1225
+ // 'under',
1226
+ // 'until',
1227
+ // 'with',
1228
+ ];
1229
+ var ACRONYMS = [
1230
+ // Indonesian government & military
1231
+ "DKI",
1232
+ // Daerah Khusus Ibukota
1233
+ "DIY",
1234
+ // Daerah Istimewa Yogyakarta
1235
+ "TNI",
1236
+ // Tentara Nasional Indonesia
1237
+ "POLRI",
1238
+ // Kepolisian Republik Indonesia
1239
+ "ABRI",
1240
+ // Angkatan Bersenjata Republik Indonesia
1241
+ "MPR",
1242
+ // Majelis Permusyawaratan Rakyat
1243
+ "DPR",
1244
+ // Dewan Perwakilan Rakyat
1245
+ "KPK",
1246
+ // Komisi Pemberantasan Korupsi
1247
+ "BIN",
1248
+ // Badan Intelijen Negara
1249
+ // Indonesian business entities
1250
+ "PT",
1251
+ // Perseroan Terbatas
1252
+ "CV",
1253
+ // Commanditaire Vennootschap
1254
+ "UD",
1255
+ // Usaha Dagang
1256
+ "PD",
1257
+ // Perusahaan Daerah
1258
+ "Tbk",
1259
+ // Terbuka (publicly traded)
1260
+ "BUMN",
1261
+ // Badan Usaha Milik Negara
1262
+ "BUMD",
1263
+ // Badan Usaha Milik Daerah
1264
+ // Indonesian banks
1265
+ "BCA",
1266
+ // Bank Central Asia
1267
+ "BRI",
1268
+ // Bank Rakyat Indonesia
1269
+ "BNI",
1270
+ // Bank Negara Indonesia
1271
+ "BTN",
1272
+ // Bank Tabungan Negara
1273
+ "BSI",
1274
+ // Bank Syariah Indonesia
1275
+ "BPD",
1276
+ // Bank Pembangunan Daerah
1277
+ // Indonesian government services
1278
+ "KTP",
1279
+ // Kartu Tanda Penduduk
1280
+ "NIK",
1281
+ // Nomor Induk Kependudukan
1282
+ "NPWP",
1283
+ // Nomor Pokok Wajib Pajak
1284
+ "SIM",
1285
+ // Surat Izin Mengemudi
1286
+ "STNK",
1287
+ // Surat Tanda Nomor Kendaraan
1288
+ "BPJS",
1289
+ // Badan Penyelenggara Jaminan Sosial
1290
+ "KIS",
1291
+ // Kartu Indonesia Sehat
1292
+ "KIP",
1293
+ // Kartu Indonesia Pintar
1294
+ "PKH",
1295
+ // Program Keluarga Harapan
1296
+ // Indonesian utilities & infrastructure
1297
+ "PLN",
1298
+ // Perusahaan Listrik Negara
1299
+ "PDAM",
1300
+ // Perusahaan Daerah Air Minum
1301
+ "PGN",
1302
+ // Perusahaan Gas Negara
1303
+ "KAI",
1304
+ // Kereta Api Indonesia
1305
+ "MRT",
1306
+ // Mass Rapid Transit
1307
+ "LRT",
1308
+ // Light Rail Transit
1309
+ // Indonesian taxes & fees
1310
+ "PBB",
1311
+ // Pajak Bumi dan Bangunan
1312
+ "PPh",
1313
+ // Pajak Penghasilan
1314
+ "PPN",
1315
+ // Pajak Pertambahan Nilai
1316
+ "BPHTB",
1317
+ // Bea Perolehan Hak atas Tanah dan Bangunan
1318
+ // Indonesian education
1319
+ "UI",
1320
+ // Universitas Indonesia
1321
+ "ITB",
1322
+ // Institut Teknologi Bandung
1323
+ "UGM",
1324
+ // Universitas Gadjah Mada
1325
+ "IPB",
1326
+ // Institut Pertanian Bogor
1327
+ "ITS",
1328
+ // Institut Teknologi Sepuluh Nopember
1329
+ "UNPAD",
1330
+ // Universitas Padjadjaran
1331
+ "UNDIP",
1332
+ // Universitas Diponegoro
1333
+ "UNAIR",
1334
+ // Universitas Airlangga
1335
+ "UNS",
1336
+ // Universitas Sebelas Maret
1337
+ // Indonesian degrees (gelar)
1338
+ "S.Pd",
1339
+ // Sarjana Pendidikan
1340
+ "S.H",
1341
+ // Sarjana Hukum
1342
+ "S.E",
1343
+ // Sarjana Ekonomi
1344
+ "S.T",
1345
+ // Sarjana Teknik
1346
+ "S.Kom",
1347
+ // Sarjana Komputer
1348
+ "S.Si",
1349
+ // Sarjana Sains
1350
+ "S.Sos",
1351
+ // Sarjana Sosial
1352
+ "M.Pd",
1353
+ // Magister Pendidikan
1354
+ "M.M",
1355
+ // Magister Manajemen
1356
+ "M.T",
1357
+ // Magister Teknik
1358
+ "M.Kom",
1359
+ // Magister Komputer
1360
+ // Common services
1361
+ "ATM",
1362
+ // Automated Teller Machine
1363
+ "POS",
1364
+ // Point of Sale
1365
+ "SMS",
1366
+ // Short Message Service
1367
+ "GPS",
1368
+ // Global Positioning System
1369
+ "WiFi",
1370
+ // Wireless Fidelity (technically Wi-Fi)
1371
+ "USB",
1372
+ // Universal Serial Bus
1373
+ "PIN",
1374
+ // Personal Identification Number
1375
+ "OTP",
1376
+ // One Time Password
1377
+ "QR",
1378
+ // Quick Response
1379
+ // Technology & IT
1380
+ "IT",
1381
+ // Information Technology
1382
+ "AI",
1383
+ // Artificial Intelligence
1384
+ "ML",
1385
+ // Machine Learning
1386
+ "API",
1387
+ // Application Programming Interface
1388
+ "UI",
1389
+ // User Interface (duplicate with Universitas Indonesia, context matters)
1390
+ "UX",
1391
+ // User Experience
1392
+ "SEO",
1393
+ // Search Engine Optimization
1394
+ "SaaS",
1395
+ // Software as a Service
1396
+ "CRM",
1397
+ // Customer Relationship Management
1398
+ "ERP",
1399
+ // Enterprise Resource Planning
1400
+ // Business titles
1401
+ "CEO",
1402
+ // Chief Executive Officer
1403
+ "CFO",
1404
+ // Chief Financial Officer
1405
+ "CTO",
1406
+ // Chief Technology Officer
1407
+ "COO",
1408
+ // Chief Operating Officer
1409
+ "CMO",
1410
+ // Chief Marketing Officer
1411
+ "HR",
1412
+ // Human Resources
1413
+ "PR",
1414
+ // Public Relations
1415
+ "VP",
1416
+ // Vice President
1417
+ "GM",
1418
+ // General Manager
1419
+ // International organizations
1420
+ "UN",
1421
+ // United Nations
1422
+ "WHO",
1423
+ // World Health Organization
1424
+ "UNESCO",
1425
+ // United Nations Educational, Scientific and Cultural Organization
1426
+ "NATO",
1427
+ // North Atlantic Treaty Organization
1428
+ "ASEAN",
1429
+ // Association of Southeast Asian Nations
1430
+ "APEC",
1431
+ // Asia-Pacific Economic Cooperation
1432
+ "WTO",
1433
+ // World Trade Organization
1434
+ "IMF",
1435
+ // International Monetary Fund
1436
+ // Medical
1437
+ "ICU",
1438
+ // Intensive Care Unit
1439
+ "ER",
1440
+ // Emergency Room
1441
+ "MRI",
1442
+ // Magnetic Resonance Imaging
1443
+ "CT",
1444
+ // Computed Tomography
1445
+ "DNA",
1446
+ // Deoxyribonucleic Acid
1447
+ "RNA",
1448
+ // Ribonucleic Acid
1449
+ "HIV",
1450
+ // Human Immunodeficiency Virus
1451
+ "AIDS",
1452
+ // Acquired Immunodeficiency Syndrome
1453
+ "COVID",
1454
+ // Coronavirus Disease
1455
+ // Measurements & units
1456
+ "KM",
1457
+ // Kilometer
1458
+ "CM",
1459
+ // Centimeter
1460
+ "MM",
1461
+ // Millimeter
1462
+ "KG",
1463
+ // Kilogram
1464
+ "RPM",
1465
+ // Revolutions Per Minute
1466
+ "MPH",
1467
+ // Miles Per Hour
1468
+ "KPH",
1469
+ // Kilometers Per Hour
1470
+ // Finance
1471
+ "IPO",
1472
+ // Initial Public Offering
1473
+ "ATM",
1474
+ // Automated Teller Machine (duplicate)
1475
+ "ROI",
1476
+ // Return on Investment
1477
+ "GDP",
1478
+ // Gross Domestic Product
1479
+ "VAT"
1480
+ // Value Added Tax
1481
+ ];
1482
+ var ABBREVIATIONS = {
1483
+ // ========== Address Abbreviations ==========
1484
+ "Jl.": "Jalan",
1485
+ "Gg.": "Gang",
1486
+ "No.": "Nomor",
1487
+ "Kp.": "Kampung",
1488
+ "Ds.": "Desa",
1489
+ "Kel.": "Kelurahan",
1490
+ "Kec.": "Kecamatan",
1491
+ "Kab.": "Kabupaten",
1492
+ Kota: "Kota",
1493
+ "Prov.": "Provinsi",
1494
+ "Prop.": "Provinsi",
1495
+ "Rt.": "Rukun Tetangga",
1496
+ "Rw.": "Rukun Warga",
1497
+ Blok: "Blok",
1498
+ "Komp.": "Kompleks",
1499
+ Perumahan: "Perumahan",
1500
+ "Perum.": "Perumahan",
1501
+ // ========== Academic Titles ==========
1502
+ "Dr.": "Doktor",
1503
+ "Ir.": "Insinyur",
1504
+ "Prof.": "Profesor",
1505
+ "Drs.": "Doktorandus",
1506
+ "Dra.": "Doktoranda",
1507
+ // Bachelor degrees
1508
+ "S.Pd.": "Sarjana Pendidikan",
1509
+ "S.H.": "Sarjana Hukum",
1510
+ "S.E.": "Sarjana Ekonomi",
1511
+ "S.T.": "Sarjana Teknik",
1512
+ "S.Kom.": "Sarjana Komputer",
1513
+ "S.Si.": "Sarjana Sains",
1514
+ "S.Sos.": "Sarjana Sosial",
1515
+ "S.I.Kom.": "Sarjana Ilmu Komunikasi",
1516
+ "S.S.": "Sarjana Sastra",
1517
+ "S.Psi.": "Sarjana Psikologi",
1518
+ "S.Farm.": "Sarjana Farmasi",
1519
+ "S.Ked.": "Sarjana Kedokteran",
1520
+ // Master degrees
1521
+ "M.Sc.": "Master of Science",
1522
+ "M.M.": "Magister Manajemen",
1523
+ "M.Pd.": "Magister Pendidikan",
1524
+ "M.T.": "Magister Teknik",
1525
+ "M.Kom.": "Magister Komputer",
1526
+ "M.Si.": "Magister Sains",
1527
+ "M.H.": "Magister Hukum",
1528
+ "M.A.": "Master of Arts",
1529
+ MBA: "Master of Business Administration",
1530
+ // ========== Honorifics ==========
1531
+ "Bpk.": "Bapak",
1532
+ Ibu: "Ibu",
1533
+ "Sdr.": "Saudara",
1534
+ "Sdri.": "Saudari",
1535
+ "Yth.": "Yang Terhormat",
1536
+ "H.": "Haji",
1537
+ "Hj.": "Hajjah",
1538
+ "Tn.": "Tuan",
1539
+ "Ny.": "Nyonya",
1540
+ "Nn.": "Nona",
1541
+ // ========== Organizations ==========
1542
+ "PT.": "Perseroan Terbatas",
1543
+ "CV.": "Commanditaire Vennootschap",
1544
+ "UD.": "Usaha Dagang",
1545
+ "PD.": "Perusahaan Daerah",
1546
+ "Tbk.": "Terbuka",
1547
+ Koperasi: "Koperasi",
1548
+ Yayasan: "Yayasan",
1549
+ // ========== Common Abbreviations ==========
1550
+ "dst.": "dan seterusnya",
1551
+ "dsb.": "dan sebagainya",
1552
+ "dll.": "dan lain-lain",
1553
+ "dkk.": "dan kawan-kawan",
1554
+ "a.n.": "atas nama",
1555
+ "u.p.": "untuk perhatian",
1556
+ "u.b.": "untuk beliau",
1557
+ "c.q.": "casu quo",
1558
+ "hlm.": "halaman",
1559
+ "tgl.": "tanggal",
1560
+ "bln.": "bulan",
1561
+ "thn.": "tahun",
1562
+ "ttd.": "tertanda",
1563
+ // ========== Contact Information ==========
1564
+ "Tlp.": "Telepon",
1565
+ "Telp.": "Telepon",
1566
+ "HP.": "Handphone",
1567
+ Fax: "Faksimile",
1568
+ Email: "Email",
1569
+ Website: "Website",
1570
+ // ========== Days (Indonesian) ==========
1571
+ "Sen.": "Senin",
1572
+ "Sel.": "Selasa",
1573
+ "Rab.": "Rabu",
1574
+ "Kam.": "Kamis",
1575
+ "Jum.": "Jumat",
1576
+ "Sab.": "Sabtu",
1577
+ "Min.": "Minggu",
1578
+ // ========== Months (Indonesian) ==========
1579
+ "Jan.": "Januari",
1580
+ "Feb.": "Februari",
1581
+ "Mar.": "Maret",
1582
+ "Apr.": "April",
1583
+ Mei: "Mei",
1584
+ "Jun.": "Juni",
1585
+ "Jul.": "Juli",
1586
+ "Agt.": "Agustus",
1587
+ "Sep.": "September",
1588
+ "Okt.": "Oktober",
1589
+ "Nov.": "November",
1590
+ "Des.": "Desember",
1591
+ // ========== Units & Measurements ==========
1592
+ "kg.": "kilogram",
1593
+ "gr.": "gram",
1594
+ "lt.": "liter",
1595
+ "ml.": "mililiter",
1596
+ "km.": "kilometer",
1597
+ "cm.": "sentimeter",
1598
+ "mm.": "milimeter",
1599
+ "m2.": "meter persegi",
1600
+ "m3.": "meter kubik",
1601
+ "ha.": "hektar"
1602
+ };
1603
+
1604
+ // src/text/capitalization.ts
1605
+ function capitalize2(text) {
1606
+ if (!text) return text;
1607
+ return text.charAt(0).toUpperCase() + text.slice(1).toLowerCase();
1608
+ }
1609
+ function toTitleCase(text, options) {
1610
+ if (!text) return text;
1611
+ const {
1612
+ preserveAcronyms = true,
1613
+ strict = false,
1614
+ exceptions = []
1615
+ } = options || {};
1616
+ const lowercaseSet = /* @__PURE__ */ new Set([...LOWERCASE_WORDS, ...exceptions]);
1617
+ const acronymSet = new Set(ACRONYMS);
1618
+ const normalized = normalizeSpaces(text);
1619
+ const words = normalized.split(" ");
1620
+ return words.map((word, index) => {
1621
+ if (!word) return word;
1622
+ if (word.includes("-")) {
1623
+ return processHyphenatedWord(word, index === 0, {
1624
+ lowercaseSet,
1625
+ acronymSet,
1626
+ preserveAcronyms,
1627
+ strict
1628
+ });
1629
+ }
1630
+ return processWord(word, index === 0, {
1631
+ lowercaseSet,
1632
+ acronymSet,
1633
+ preserveAcronyms,
1634
+ strict
1635
+ });
1636
+ }).join(" ");
1637
+ }
1638
+ function normalizeSpaces(text) {
1639
+ return text.trim().replace(/\s+/g, " ");
1640
+ }
1641
+ function processWord(word, isFirstWord, context) {
1642
+ const { lowercaseSet, acronymSet, preserveAcronyms, strict } = context;
1643
+ const lowerWord = word.toLowerCase();
1644
+ const upperWord = word.toUpperCase();
1645
+ if (preserveAcronyms && acronymSet.has(upperWord)) {
1646
+ return upperWord;
1647
+ }
1648
+ if (!isFirstWord && lowercaseSet.has(lowerWord)) {
1649
+ return lowerWord;
1650
+ }
1651
+ if (strict) {
1652
+ return capitalizeFirstLetter(lowerWord);
1653
+ }
1654
+ return capitalizeFirstLetter(word.toLowerCase());
1655
+ }
1656
+ function processHyphenatedWord(word, isFirstWord, context) {
1657
+ return word.split("-").map(
1658
+ (part, index) => processWord(part, isFirstWord && index === 0, context)
1659
+ ).join("-");
1660
+ }
1661
+ function capitalizeFirstLetter(word) {
1662
+ if (!word) return word;
1663
+ return word.charAt(0).toUpperCase() + word.slice(1);
1664
+ }
1665
+ function toSentenceCase(text) {
1666
+ if (!text) return text;
1667
+ const normalized = text.trim().replace(/\s+/g, " ");
1668
+ let result = "";
1669
+ let shouldCapitalize = true;
1670
+ for (let i = 0; i < normalized.length; i++) {
1671
+ const char = normalized[i];
1672
+ if (shouldCapitalize && /[a-zA-ZÀ-ÿ]/.test(char)) {
1673
+ result += char.toUpperCase();
1674
+ shouldCapitalize = false;
1675
+ } else {
1676
+ result += char.toLowerCase();
1677
+ }
1678
+ if (isSentenceEnd(char)) {
1679
+ shouldCapitalize = true;
1680
+ }
1681
+ if (char === "." && i + 1 < normalized.length) {
1682
+ const nextChar = normalized[i + 1];
1683
+ if (nextChar !== " " && !/[.!?]/.test(nextChar)) {
1684
+ shouldCapitalize = false;
1685
+ }
1686
+ }
1687
+ }
1688
+ return result;
1689
+ }
1690
+ function isSentenceEnd(char) {
1691
+ return char === "." || char === "!" || char === "?";
1692
+ }
1693
+
1694
+ // src/text/slug.ts
1695
+ function slugify(text, options) {
1696
+ if (!text) return "";
1697
+ const {
1698
+ separator = "-",
1699
+ lowercase = true,
1700
+ replacements = {},
1701
+ trim = true
1702
+ } = options || {};
1703
+ let result = text;
1704
+ for (const [search, replace] of Object.entries(replacements)) {
1705
+ result = result.replace(new RegExp(escapeRegex(search), "g"), replace);
1706
+ }
1707
+ result = result.replace(/&/g, " dan ");
1708
+ result = result.replace(/\//g, " atau ");
1709
+ if (lowercase) {
1710
+ result = result.toLowerCase();
1711
+ }
1712
+ result = result.replace(/[.'@éèêëàâäôöûüùïîçñ™®©]/g, "");
1713
+ result = result.replace(/[^\w\s-]+/g, separator);
1714
+ result = result.replace(/\s+/g, separator);
1715
+ if (separator !== "-") {
1716
+ result = result.replace(/-/g, separator);
1717
+ }
1718
+ if (trim) {
1719
+ const separatorRegex = new RegExp(`\\${separator}+`, "g");
1720
+ result = result.replace(separatorRegex, separator);
1721
+ const trimRegex = new RegExp(`^\\${separator}+|\\${separator}+$`, "g");
1722
+ result = result.replace(trimRegex, "");
1723
+ }
1724
+ return result;
1725
+ }
1726
+ function escapeRegex(str) {
1727
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1728
+ }
1729
+
1730
+ // src/text/sanitize.ts
1731
+ function normalizeWhitespace(text) {
1732
+ if (!text) return text;
1733
+ return text.trim().replace(/\s+/g, " ");
1734
+ }
1735
+ function sanitize(text, options) {
1736
+ if (!text) return text;
1737
+ const {
1738
+ removeNewlines = false,
1739
+ removeExtraSpaces = true,
1740
+ removePunctuation = false,
1741
+ allowedChars,
1742
+ trim = true
1743
+ } = options || {};
1744
+ let result = text;
1745
+ if (removeNewlines) {
1746
+ result = result.replace(/[\n\r]/g, " ");
1747
+ }
1748
+ if (removePunctuation) {
1749
+ result = result.replace(/[!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]/g, "");
1750
+ }
1751
+ if (allowedChars) {
1752
+ const allowedRegex = new RegExp(`[^${allowedChars}]`, "g");
1753
+ result = result.replace(allowedRegex, "");
1754
+ }
1755
+ if (removeExtraSpaces) {
1756
+ if (trim) {
1757
+ if (removeNewlines) {
1758
+ result = result.replace(/\s+/g, " ");
1759
+ } else {
1760
+ result = result.replace(/[ \t]+/g, " ");
1761
+ }
1762
+ } else {
1763
+ const leadingMatch = result.match(/^([ \t]*)/);
1764
+ const trailingMatch = result.match(/([ \t]*)$/);
1765
+ const leading = leadingMatch ? leadingMatch[1] : "";
1766
+ const trailing = trailingMatch ? trailingMatch[1] : "";
1767
+ const middle = result.slice(
1768
+ leading.length,
1769
+ result.length - trailing.length
1770
+ );
1771
+ const normalizedMiddle = removeNewlines ? middle.replace(/\s+/g, " ") : middle.replace(/[ \t]+/g, " ");
1772
+ result = leading + normalizedMiddle + trailing;
1773
+ }
1774
+ }
1775
+ if (trim) {
1776
+ result = result.trim();
1777
+ }
1778
+ return result;
1779
+ }
1780
+ function removeAccents(text) {
1781
+ if (!text) return text;
1782
+ const specialChars = {
1783
+ \u00D8: "O",
1784
+ \u00F8: "o",
1785
+ \u00C6: "AE",
1786
+ \u00E6: "ae",
1787
+ \u00C5: "A",
1788
+ \u00E5: "a",
1789
+ \u0110: "D",
1790
+ \u0111: "d",
1791
+ \u0141: "L",
1792
+ \u0142: "l",
1793
+ \u00DE: "TH",
1794
+ \u00FE: "th",
1795
+ \u00DF: "ss"
1796
+ };
1797
+ let result = text;
1798
+ for (const [accented, plain] of Object.entries(specialChars)) {
1799
+ result = result.replace(new RegExp(accented, "g"), plain);
1800
+ }
1801
+ return result.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
1802
+ }
1803
+
1804
+ // src/text/abbreviation.ts
1805
+ function expandAbbreviation(text, options) {
1806
+ if (!text) return text;
1807
+ const { mode = "all", customMap = {}, preserveCase = false } = options || {};
1808
+ const abbreviationsMap = {
1809
+ ...getAbbreviationsByMode(mode),
1810
+ ...customMap
1811
+ };
1812
+ let result = text;
1813
+ const sortedAbbrevs = Object.keys(abbreviationsMap).sort(
1814
+ (a, b) => b.length - a.length
1815
+ );
1816
+ for (const abbrev of sortedAbbrevs) {
1817
+ const expansion = abbreviationsMap[abbrev];
1818
+ const startBoundary = /^\w/.test(abbrev) ? "\\b" : "";
1819
+ const endBoundary = /\w$/.test(abbrev) ? "\\b" : "";
1820
+ const regex = new RegExp(
1821
+ `${startBoundary}${escapeRegex2(abbrev)}${endBoundary}`,
1822
+ "gi"
1823
+ );
1824
+ result = result.replace(regex, (match) => {
1825
+ if (!preserveCase) {
1826
+ return expansion;
1827
+ }
1828
+ return matchCase(match, expansion);
1829
+ });
1830
+ }
1831
+ return result;
1832
+ }
1833
+ function getAbbreviationsByMode(mode) {
1834
+ if (mode === "all") {
1835
+ return ABBREVIATIONS;
1836
+ }
1837
+ const filtered = {};
1838
+ const addressAbbrevs = [
1839
+ "Jl.",
1840
+ "Gg.",
1841
+ "No.",
1842
+ "Kp.",
1843
+ "Ds.",
1844
+ "Kel.",
1845
+ "Kec.",
1846
+ "Kab.",
1847
+ "Kota",
1848
+ "Prov.",
1849
+ "Prop.",
1850
+ "Rt.",
1851
+ "Rw.",
1852
+ "Blok",
1853
+ "Komp.",
1854
+ "Perumahan",
1855
+ "Perum."
1856
+ ];
1857
+ const titleAbbrevs = [
1858
+ "Dr.",
1859
+ "Ir.",
1860
+ "Prof.",
1861
+ "Drs.",
1862
+ "Dra.",
1863
+ "S.Pd.",
1864
+ "S.H.",
1865
+ "S.E.",
1866
+ "S.T.",
1867
+ "S.Kom.",
1868
+ "S.Si.",
1869
+ "S.Sos.",
1870
+ "S.I.Kom.",
1871
+ "S.S.",
1872
+ "S.Psi.",
1873
+ "S.Farm.",
1874
+ "S.Ked.",
1875
+ "M.Sc.",
1876
+ "M.M.",
1877
+ "M.Pd.",
1878
+ "M.T.",
1879
+ "M.Kom.",
1880
+ "M.Si.",
1881
+ "M.H.",
1882
+ "M.A.",
1883
+ "MBA"
1884
+ ];
1885
+ const orgAbbrevs = [
1886
+ "PT.",
1887
+ "CV.",
1888
+ "UD.",
1889
+ "PD.",
1890
+ "Tbk.",
1891
+ "Koperasi",
1892
+ "Yayasan"
1893
+ ];
1894
+ for (const [abbrev, expansion] of Object.entries(ABBREVIATIONS)) {
1895
+ if (mode === "address" && addressAbbrevs.includes(abbrev)) {
1896
+ filtered[abbrev] = expansion;
1897
+ } else if (mode === "title" && titleAbbrevs.includes(abbrev)) {
1898
+ filtered[abbrev] = expansion;
1899
+ } else if (mode === "org" && orgAbbrevs.includes(abbrev)) {
1900
+ filtered[abbrev] = expansion;
1901
+ }
1902
+ }
1903
+ return filtered;
1904
+ }
1905
+ function escapeRegex2(str) {
1906
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1907
+ }
1908
+ function matchCase(original, replacement) {
1909
+ if (original === original.toUpperCase()) {
1910
+ return replacement.toUpperCase();
1911
+ }
1912
+ if (original === original.toLowerCase()) {
1913
+ return replacement.toLowerCase();
1914
+ }
1915
+ if (original.charAt(0) === original.charAt(0).toUpperCase()) {
1916
+ return replacement.charAt(0).toUpperCase() + replacement.slice(1).toLowerCase();
1917
+ }
1918
+ return replacement;
1919
+ }
1920
+ function contractAbbreviation(text, options) {
1921
+ if (!text) return text;
1922
+ const { mode = "all" } = options || {};
1923
+ const abbreviationsMap = getAbbreviationsByMode(mode);
1924
+ const reverseMap = {};
1925
+ for (const [abbrev, expansion] of Object.entries(abbreviationsMap)) {
1926
+ reverseMap[expansion] = abbrev;
1927
+ }
1928
+ let result = text;
1929
+ const sortedExpansions = Object.keys(reverseMap).sort(
1930
+ (a, b) => b.length - a.length
1931
+ );
1932
+ for (const expansion of sortedExpansions) {
1933
+ const abbrev = reverseMap[expansion];
1934
+ const regex = new RegExp(`\\b${escapeRegex2(expansion)}\\b`, "gi");
1935
+ result = result.replace(regex, abbrev);
1936
+ }
1937
+ return result;
1938
+ }
1939
+
1940
+ // src/text/extract.ts
1941
+ function truncate(text, maxLength, options) {
1942
+ if (!text || maxLength <= 0) {
1943
+ return "";
1944
+ }
1945
+ const { ellipsis = "...", wordBoundary = true } = options || {};
1946
+ if (text.length <= maxLength) {
1947
+ return text;
1948
+ }
1949
+ const availableLength = maxLength - ellipsis.length;
1950
+ if (availableLength <= 0) {
1951
+ return ellipsis.slice(0, maxLength);
1952
+ }
1953
+ let truncated = text.slice(0, availableLength);
1954
+ if (wordBoundary) {
1955
+ const lastSpaceIndex = truncated.lastIndexOf(" ");
1956
+ if (lastSpaceIndex > 0) {
1957
+ truncated = truncated.slice(0, lastSpaceIndex);
1958
+ }
1959
+ }
1960
+ truncated = truncated.trimEnd();
1961
+ return truncated + ellipsis;
1962
+ }
1963
+ function extractWords(text, options) {
1964
+ if (!text || !text.trim()) {
1965
+ return [];
1966
+ }
1967
+ const {
1968
+ minLength = 0,
1969
+ includeHyphenated = true,
1970
+ lowercase = false
1971
+ } = options || {};
1972
+ let cleaned = text;
1973
+ if (includeHyphenated) {
1974
+ cleaned = text.replace(/[^\w\s-]/g, " ");
1975
+ } else {
1976
+ cleaned = text.replace(/[^\w\s]/g, " ");
1977
+ }
1978
+ const words = cleaned.split(/\s+/).map((word) => word.trim()).filter((word) => word.length > 0).filter((word) => !/^-+$/.test(word));
1979
+ let result = words;
1980
+ if (minLength > 0) {
1981
+ result = result.filter((word) => word.length >= minLength);
1982
+ }
1983
+ if (lowercase) {
1984
+ result = result.map((word) => word.toLowerCase());
1985
+ }
1986
+ return result;
1987
+ }
1988
+
1989
+ // src/text/compare.ts
1990
+ function compareStrings(str1, str2, options) {
1991
+ if (str1 === str2) {
1992
+ return true;
1993
+ }
1994
+ const s1 = str1 || "";
1995
+ const s2 = str2 || "";
1996
+ const {
1997
+ caseSensitive = false,
1998
+ ignoreWhitespace = false,
1999
+ ignoreAccents = false
2000
+ } = options || {};
2001
+ let normalized1 = s1;
2002
+ let normalized2 = s2;
2003
+ if (ignoreWhitespace) {
2004
+ normalized1 = normalizeWhitespace(normalized1);
2005
+ normalized2 = normalizeWhitespace(normalized2);
2006
+ }
2007
+ if (ignoreAccents) {
2008
+ normalized1 = removeAccents(normalized1);
2009
+ normalized2 = removeAccents(normalized2);
2010
+ }
2011
+ if (!caseSensitive) {
2012
+ normalized1 = normalized1.toLowerCase();
2013
+ normalized2 = normalized2.toLowerCase();
2014
+ }
2015
+ return normalized1 === normalized2;
2016
+ }
2017
+ function similarity(str1, str2) {
2018
+ if (str1 === str2) return 1;
2019
+ if (str1.length === 0) return str2.length === 0 ? 1 : 0;
2020
+ if (str2.length === 0) return 0;
2021
+ const len1 = str1.length;
2022
+ const len2 = str2.length;
2023
+ let prevRow = Array(len2 + 1).fill(0);
2024
+ let currentRow = Array(len2 + 1).fill(0);
2025
+ for (let j = 0; j <= len2; j++) {
2026
+ prevRow[j] = j;
2027
+ }
2028
+ for (let i = 1; i <= len1; i++) {
2029
+ currentRow[0] = i;
2030
+ for (let j = 1; j <= len2; j++) {
2031
+ const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
2032
+ currentRow[j] = Math.min(
2033
+ currentRow[j - 1] + 1,
2034
+ // Insertion
2035
+ prevRow[j] + 1,
2036
+ // Deletion
2037
+ prevRow[j - 1] + cost
2038
+ // Substitution
2039
+ );
2040
+ }
2041
+ [prevRow, currentRow] = [currentRow, prevRow];
2042
+ }
2043
+ const distance = prevRow[len2];
2044
+ const maxLength = Math.max(len1, len2);
2045
+ return 1 - distance / maxLength;
2046
+ }
2047
+
2048
+ exports.capitalize = capitalize2;
1153
2049
  exports.cleanPhoneNumber = cleanPhoneNumber;
2050
+ exports.compareStrings = compareStrings;
2051
+ exports.contractAbbreviation = contractAbbreviation;
2052
+ exports.expandAbbreviation = expandAbbreviation;
2053
+ exports.extractWords = extractWords;
1154
2054
  exports.formatCompact = formatCompact;
1155
2055
  exports.formatNIK = formatNIK;
1156
2056
  exports.formatPhoneNumber = formatPhoneNumber;
@@ -1160,13 +2060,21 @@ exports.isLandlineNumber = isLandlineNumber;
1160
2060
  exports.isMobileNumber = isMobileNumber;
1161
2061
  exports.maskNIK = maskNIK;
1162
2062
  exports.maskPhoneNumber = maskPhoneNumber;
2063
+ exports.normalizeWhitespace = normalizeWhitespace;
1163
2064
  exports.parseNIK = parseNIK;
1164
2065
  exports.parsePhoneNumber = parsePhoneNumber;
1165
2066
  exports.parseRupiah = parseRupiah;
2067
+ exports.removeAccents = removeAccents;
2068
+ exports.sanitize = sanitize;
2069
+ exports.similarity = similarity;
2070
+ exports.slugify = slugify;
1166
2071
  exports.toE164 = toE164;
1167
2072
  exports.toInternational = toInternational;
1168
2073
  exports.toNational = toNational;
2074
+ exports.toSentenceCase = toSentenceCase;
2075
+ exports.toTitleCase = toTitleCase;
1169
2076
  exports.toWords = toWords;
2077
+ exports.truncate = truncate;
1170
2078
  exports.validateNIK = validateNIK;
1171
2079
  exports.validatePhoneNumber = validatePhoneNumber;
1172
2080
  //# sourceMappingURL=index.cjs.map