@indodev/toolkit 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -183
- package/dist/compare-B1MKSOWV.d.cts +938 -0
- package/dist/compare-B1MKSOWV.d.ts +938 -0
- package/dist/index.cjs +908 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +896 -1
- package/dist/index.js.map +1 -1
- package/dist/text/index.cjs +915 -0
- package/dist/text/index.cjs.map +1 -0
- package/dist/text/index.d.cts +284 -0
- package/dist/text/index.d.ts +284 -0
- package/dist/text/index.js +898 -0
- package/dist/text/index.js.map +1 -0
- package/package.json +18 -1
package/dist/index.js
CHANGED
|
@@ -1148,6 +1148,901 @@ function capitalize(str) {
|
|
|
1148
1148
|
return str.charAt(0).toUpperCase() + str.slice(1);
|
|
1149
1149
|
}
|
|
1150
1150
|
|
|
1151
|
-
|
|
1151
|
+
// src/text/constants.ts
|
|
1152
|
+
var LOWERCASE_WORDS = [
|
|
1153
|
+
// Indonesian prepositions (kata depan)
|
|
1154
|
+
"di",
|
|
1155
|
+
"ke",
|
|
1156
|
+
"dari",
|
|
1157
|
+
"pada",
|
|
1158
|
+
"dalam",
|
|
1159
|
+
"untuk",
|
|
1160
|
+
"dengan",
|
|
1161
|
+
"oleh",
|
|
1162
|
+
"kepada",
|
|
1163
|
+
"terhadap",
|
|
1164
|
+
"tentang",
|
|
1165
|
+
"tanpa",
|
|
1166
|
+
"hingga",
|
|
1167
|
+
"sampai",
|
|
1168
|
+
"sejak",
|
|
1169
|
+
"menuju",
|
|
1170
|
+
"melalui",
|
|
1171
|
+
// Indonesian conjunctions (kata hubung)
|
|
1172
|
+
"dan",
|
|
1173
|
+
"atau",
|
|
1174
|
+
"tetapi",
|
|
1175
|
+
"namun",
|
|
1176
|
+
"serta",
|
|
1177
|
+
"maupun",
|
|
1178
|
+
"melainkan",
|
|
1179
|
+
"sedangkan",
|
|
1180
|
+
// Indonesian articles/particles
|
|
1181
|
+
"yang",
|
|
1182
|
+
"sebagai",
|
|
1183
|
+
"adalah",
|
|
1184
|
+
"ialah",
|
|
1185
|
+
"yaitu",
|
|
1186
|
+
"bahwa",
|
|
1187
|
+
"akan",
|
|
1188
|
+
"telah",
|
|
1189
|
+
"sudah",
|
|
1190
|
+
"belum",
|
|
1191
|
+
// English articles
|
|
1192
|
+
"a",
|
|
1193
|
+
"an",
|
|
1194
|
+
"the",
|
|
1195
|
+
// English conjunctions
|
|
1196
|
+
"and",
|
|
1197
|
+
"or",
|
|
1198
|
+
"but",
|
|
1199
|
+
"nor",
|
|
1200
|
+
"for",
|
|
1201
|
+
"yet",
|
|
1202
|
+
"so",
|
|
1203
|
+
"as",
|
|
1204
|
+
// English prepositions (short ones, < 5 letters)
|
|
1205
|
+
"at",
|
|
1206
|
+
"by",
|
|
1207
|
+
"in",
|
|
1208
|
+
"of",
|
|
1209
|
+
"on",
|
|
1210
|
+
"to",
|
|
1211
|
+
"up",
|
|
1212
|
+
"via",
|
|
1213
|
+
"per",
|
|
1214
|
+
"off",
|
|
1215
|
+
"out"
|
|
1216
|
+
// English prepositions (5+ letters - optional, some style guides capitalize these)
|
|
1217
|
+
// 'about',
|
|
1218
|
+
// 'above',
|
|
1219
|
+
// 'across',
|
|
1220
|
+
// 'after',
|
|
1221
|
+
// 'among',
|
|
1222
|
+
// 'below',
|
|
1223
|
+
// 'under',
|
|
1224
|
+
// 'until',
|
|
1225
|
+
// 'with',
|
|
1226
|
+
];
|
|
1227
|
+
var ACRONYMS = [
|
|
1228
|
+
// Indonesian government & military
|
|
1229
|
+
"DKI",
|
|
1230
|
+
// Daerah Khusus Ibukota
|
|
1231
|
+
"DIY",
|
|
1232
|
+
// Daerah Istimewa Yogyakarta
|
|
1233
|
+
"TNI",
|
|
1234
|
+
// Tentara Nasional Indonesia
|
|
1235
|
+
"POLRI",
|
|
1236
|
+
// Kepolisian Republik Indonesia
|
|
1237
|
+
"ABRI",
|
|
1238
|
+
// Angkatan Bersenjata Republik Indonesia
|
|
1239
|
+
"MPR",
|
|
1240
|
+
// Majelis Permusyawaratan Rakyat
|
|
1241
|
+
"DPR",
|
|
1242
|
+
// Dewan Perwakilan Rakyat
|
|
1243
|
+
"KPK",
|
|
1244
|
+
// Komisi Pemberantasan Korupsi
|
|
1245
|
+
"BIN",
|
|
1246
|
+
// Badan Intelijen Negara
|
|
1247
|
+
// Indonesian business entities
|
|
1248
|
+
"PT",
|
|
1249
|
+
// Perseroan Terbatas
|
|
1250
|
+
"CV",
|
|
1251
|
+
// Commanditaire Vennootschap
|
|
1252
|
+
"UD",
|
|
1253
|
+
// Usaha Dagang
|
|
1254
|
+
"PD",
|
|
1255
|
+
// Perusahaan Daerah
|
|
1256
|
+
"Tbk",
|
|
1257
|
+
// Terbuka (publicly traded)
|
|
1258
|
+
"BUMN",
|
|
1259
|
+
// Badan Usaha Milik Negara
|
|
1260
|
+
"BUMD",
|
|
1261
|
+
// Badan Usaha Milik Daerah
|
|
1262
|
+
// Indonesian banks
|
|
1263
|
+
"BCA",
|
|
1264
|
+
// Bank Central Asia
|
|
1265
|
+
"BRI",
|
|
1266
|
+
// Bank Rakyat Indonesia
|
|
1267
|
+
"BNI",
|
|
1268
|
+
// Bank Negara Indonesia
|
|
1269
|
+
"BTN",
|
|
1270
|
+
// Bank Tabungan Negara
|
|
1271
|
+
"BSI",
|
|
1272
|
+
// Bank Syariah Indonesia
|
|
1273
|
+
"BPD",
|
|
1274
|
+
// Bank Pembangunan Daerah
|
|
1275
|
+
// Indonesian government services
|
|
1276
|
+
"KTP",
|
|
1277
|
+
// Kartu Tanda Penduduk
|
|
1278
|
+
"NIK",
|
|
1279
|
+
// Nomor Induk Kependudukan
|
|
1280
|
+
"NPWP",
|
|
1281
|
+
// Nomor Pokok Wajib Pajak
|
|
1282
|
+
"SIM",
|
|
1283
|
+
// Surat Izin Mengemudi
|
|
1284
|
+
"STNK",
|
|
1285
|
+
// Surat Tanda Nomor Kendaraan
|
|
1286
|
+
"BPJS",
|
|
1287
|
+
// Badan Penyelenggara Jaminan Sosial
|
|
1288
|
+
"KIS",
|
|
1289
|
+
// Kartu Indonesia Sehat
|
|
1290
|
+
"KIP",
|
|
1291
|
+
// Kartu Indonesia Pintar
|
|
1292
|
+
"PKH",
|
|
1293
|
+
// Program Keluarga Harapan
|
|
1294
|
+
// Indonesian utilities & infrastructure
|
|
1295
|
+
"PLN",
|
|
1296
|
+
// Perusahaan Listrik Negara
|
|
1297
|
+
"PDAM",
|
|
1298
|
+
// Perusahaan Daerah Air Minum
|
|
1299
|
+
"PGN",
|
|
1300
|
+
// Perusahaan Gas Negara
|
|
1301
|
+
"KAI",
|
|
1302
|
+
// Kereta Api Indonesia
|
|
1303
|
+
"MRT",
|
|
1304
|
+
// Mass Rapid Transit
|
|
1305
|
+
"LRT",
|
|
1306
|
+
// Light Rail Transit
|
|
1307
|
+
// Indonesian taxes & fees
|
|
1308
|
+
"PBB",
|
|
1309
|
+
// Pajak Bumi dan Bangunan
|
|
1310
|
+
"PPh",
|
|
1311
|
+
// Pajak Penghasilan
|
|
1312
|
+
"PPN",
|
|
1313
|
+
// Pajak Pertambahan Nilai
|
|
1314
|
+
"BPHTB",
|
|
1315
|
+
// Bea Perolehan Hak atas Tanah dan Bangunan
|
|
1316
|
+
// Indonesian education
|
|
1317
|
+
"UI",
|
|
1318
|
+
// Universitas Indonesia
|
|
1319
|
+
"ITB",
|
|
1320
|
+
// Institut Teknologi Bandung
|
|
1321
|
+
"UGM",
|
|
1322
|
+
// Universitas Gadjah Mada
|
|
1323
|
+
"IPB",
|
|
1324
|
+
// Institut Pertanian Bogor
|
|
1325
|
+
"ITS",
|
|
1326
|
+
// Institut Teknologi Sepuluh Nopember
|
|
1327
|
+
"UNPAD",
|
|
1328
|
+
// Universitas Padjadjaran
|
|
1329
|
+
"UNDIP",
|
|
1330
|
+
// Universitas Diponegoro
|
|
1331
|
+
"UNAIR",
|
|
1332
|
+
// Universitas Airlangga
|
|
1333
|
+
"UNS",
|
|
1334
|
+
// Universitas Sebelas Maret
|
|
1335
|
+
// Indonesian degrees (gelar)
|
|
1336
|
+
"S.Pd",
|
|
1337
|
+
// Sarjana Pendidikan
|
|
1338
|
+
"S.H",
|
|
1339
|
+
// Sarjana Hukum
|
|
1340
|
+
"S.E",
|
|
1341
|
+
// Sarjana Ekonomi
|
|
1342
|
+
"S.T",
|
|
1343
|
+
// Sarjana Teknik
|
|
1344
|
+
"S.Kom",
|
|
1345
|
+
// Sarjana Komputer
|
|
1346
|
+
"S.Si",
|
|
1347
|
+
// Sarjana Sains
|
|
1348
|
+
"S.Sos",
|
|
1349
|
+
// Sarjana Sosial
|
|
1350
|
+
"M.Pd",
|
|
1351
|
+
// Magister Pendidikan
|
|
1352
|
+
"M.M",
|
|
1353
|
+
// Magister Manajemen
|
|
1354
|
+
"M.T",
|
|
1355
|
+
// Magister Teknik
|
|
1356
|
+
"M.Kom",
|
|
1357
|
+
// Magister Komputer
|
|
1358
|
+
// Common services
|
|
1359
|
+
"ATM",
|
|
1360
|
+
// Automated Teller Machine
|
|
1361
|
+
"POS",
|
|
1362
|
+
// Point of Sale
|
|
1363
|
+
"SMS",
|
|
1364
|
+
// Short Message Service
|
|
1365
|
+
"GPS",
|
|
1366
|
+
// Global Positioning System
|
|
1367
|
+
"WiFi",
|
|
1368
|
+
// Wireless Fidelity (technically Wi-Fi)
|
|
1369
|
+
"USB",
|
|
1370
|
+
// Universal Serial Bus
|
|
1371
|
+
"PIN",
|
|
1372
|
+
// Personal Identification Number
|
|
1373
|
+
"OTP",
|
|
1374
|
+
// One Time Password
|
|
1375
|
+
"QR",
|
|
1376
|
+
// Quick Response
|
|
1377
|
+
// Technology & IT
|
|
1378
|
+
"IT",
|
|
1379
|
+
// Information Technology
|
|
1380
|
+
"AI",
|
|
1381
|
+
// Artificial Intelligence
|
|
1382
|
+
"ML",
|
|
1383
|
+
// Machine Learning
|
|
1384
|
+
"API",
|
|
1385
|
+
// Application Programming Interface
|
|
1386
|
+
"UI",
|
|
1387
|
+
// User Interface (duplicate with Universitas Indonesia, context matters)
|
|
1388
|
+
"UX",
|
|
1389
|
+
// User Experience
|
|
1390
|
+
"SEO",
|
|
1391
|
+
// Search Engine Optimization
|
|
1392
|
+
"SaaS",
|
|
1393
|
+
// Software as a Service
|
|
1394
|
+
"CRM",
|
|
1395
|
+
// Customer Relationship Management
|
|
1396
|
+
"ERP",
|
|
1397
|
+
// Enterprise Resource Planning
|
|
1398
|
+
// Business titles
|
|
1399
|
+
"CEO",
|
|
1400
|
+
// Chief Executive Officer
|
|
1401
|
+
"CFO",
|
|
1402
|
+
// Chief Financial Officer
|
|
1403
|
+
"CTO",
|
|
1404
|
+
// Chief Technology Officer
|
|
1405
|
+
"COO",
|
|
1406
|
+
// Chief Operating Officer
|
|
1407
|
+
"CMO",
|
|
1408
|
+
// Chief Marketing Officer
|
|
1409
|
+
"HR",
|
|
1410
|
+
// Human Resources
|
|
1411
|
+
"PR",
|
|
1412
|
+
// Public Relations
|
|
1413
|
+
"VP",
|
|
1414
|
+
// Vice President
|
|
1415
|
+
"GM",
|
|
1416
|
+
// General Manager
|
|
1417
|
+
// International organizations
|
|
1418
|
+
"UN",
|
|
1419
|
+
// United Nations
|
|
1420
|
+
"WHO",
|
|
1421
|
+
// World Health Organization
|
|
1422
|
+
"UNESCO",
|
|
1423
|
+
// United Nations Educational, Scientific and Cultural Organization
|
|
1424
|
+
"NATO",
|
|
1425
|
+
// North Atlantic Treaty Organization
|
|
1426
|
+
"ASEAN",
|
|
1427
|
+
// Association of Southeast Asian Nations
|
|
1428
|
+
"APEC",
|
|
1429
|
+
// Asia-Pacific Economic Cooperation
|
|
1430
|
+
"WTO",
|
|
1431
|
+
// World Trade Organization
|
|
1432
|
+
"IMF",
|
|
1433
|
+
// International Monetary Fund
|
|
1434
|
+
// Medical
|
|
1435
|
+
"ICU",
|
|
1436
|
+
// Intensive Care Unit
|
|
1437
|
+
"ER",
|
|
1438
|
+
// Emergency Room
|
|
1439
|
+
"MRI",
|
|
1440
|
+
// Magnetic Resonance Imaging
|
|
1441
|
+
"CT",
|
|
1442
|
+
// Computed Tomography
|
|
1443
|
+
"DNA",
|
|
1444
|
+
// Deoxyribonucleic Acid
|
|
1445
|
+
"RNA",
|
|
1446
|
+
// Ribonucleic Acid
|
|
1447
|
+
"HIV",
|
|
1448
|
+
// Human Immunodeficiency Virus
|
|
1449
|
+
"AIDS",
|
|
1450
|
+
// Acquired Immunodeficiency Syndrome
|
|
1451
|
+
"COVID",
|
|
1452
|
+
// Coronavirus Disease
|
|
1453
|
+
// Measurements & units
|
|
1454
|
+
"KM",
|
|
1455
|
+
// Kilometer
|
|
1456
|
+
"CM",
|
|
1457
|
+
// Centimeter
|
|
1458
|
+
"MM",
|
|
1459
|
+
// Millimeter
|
|
1460
|
+
"KG",
|
|
1461
|
+
// Kilogram
|
|
1462
|
+
"RPM",
|
|
1463
|
+
// Revolutions Per Minute
|
|
1464
|
+
"MPH",
|
|
1465
|
+
// Miles Per Hour
|
|
1466
|
+
"KPH",
|
|
1467
|
+
// Kilometers Per Hour
|
|
1468
|
+
// Finance
|
|
1469
|
+
"IPO",
|
|
1470
|
+
// Initial Public Offering
|
|
1471
|
+
"ATM",
|
|
1472
|
+
// Automated Teller Machine (duplicate)
|
|
1473
|
+
"ROI",
|
|
1474
|
+
// Return on Investment
|
|
1475
|
+
"GDP",
|
|
1476
|
+
// Gross Domestic Product
|
|
1477
|
+
"VAT"
|
|
1478
|
+
// Value Added Tax
|
|
1479
|
+
];
|
|
1480
|
+
var ABBREVIATIONS = {
|
|
1481
|
+
// ========== Address Abbreviations ==========
|
|
1482
|
+
"Jl.": "Jalan",
|
|
1483
|
+
"Gg.": "Gang",
|
|
1484
|
+
"No.": "Nomor",
|
|
1485
|
+
"Kp.": "Kampung",
|
|
1486
|
+
"Ds.": "Desa",
|
|
1487
|
+
"Kel.": "Kelurahan",
|
|
1488
|
+
"Kec.": "Kecamatan",
|
|
1489
|
+
"Kab.": "Kabupaten",
|
|
1490
|
+
Kota: "Kota",
|
|
1491
|
+
"Prov.": "Provinsi",
|
|
1492
|
+
"Prop.": "Provinsi",
|
|
1493
|
+
"Rt.": "Rukun Tetangga",
|
|
1494
|
+
"Rw.": "Rukun Warga",
|
|
1495
|
+
Blok: "Blok",
|
|
1496
|
+
"Komp.": "Kompleks",
|
|
1497
|
+
Perumahan: "Perumahan",
|
|
1498
|
+
"Perum.": "Perumahan",
|
|
1499
|
+
// ========== Academic Titles ==========
|
|
1500
|
+
"Dr.": "Doktor",
|
|
1501
|
+
"Ir.": "Insinyur",
|
|
1502
|
+
"Prof.": "Profesor",
|
|
1503
|
+
"Drs.": "Doktorandus",
|
|
1504
|
+
"Dra.": "Doktoranda",
|
|
1505
|
+
// Bachelor degrees
|
|
1506
|
+
"S.Pd.": "Sarjana Pendidikan",
|
|
1507
|
+
"S.H.": "Sarjana Hukum",
|
|
1508
|
+
"S.E.": "Sarjana Ekonomi",
|
|
1509
|
+
"S.T.": "Sarjana Teknik",
|
|
1510
|
+
"S.Kom.": "Sarjana Komputer",
|
|
1511
|
+
"S.Si.": "Sarjana Sains",
|
|
1512
|
+
"S.Sos.": "Sarjana Sosial",
|
|
1513
|
+
"S.I.Kom.": "Sarjana Ilmu Komunikasi",
|
|
1514
|
+
"S.S.": "Sarjana Sastra",
|
|
1515
|
+
"S.Psi.": "Sarjana Psikologi",
|
|
1516
|
+
"S.Farm.": "Sarjana Farmasi",
|
|
1517
|
+
"S.Ked.": "Sarjana Kedokteran",
|
|
1518
|
+
// Master degrees
|
|
1519
|
+
"M.Sc.": "Master of Science",
|
|
1520
|
+
"M.M.": "Magister Manajemen",
|
|
1521
|
+
"M.Pd.": "Magister Pendidikan",
|
|
1522
|
+
"M.T.": "Magister Teknik",
|
|
1523
|
+
"M.Kom.": "Magister Komputer",
|
|
1524
|
+
"M.Si.": "Magister Sains",
|
|
1525
|
+
"M.H.": "Magister Hukum",
|
|
1526
|
+
"M.A.": "Master of Arts",
|
|
1527
|
+
MBA: "Master of Business Administration",
|
|
1528
|
+
// ========== Honorifics ==========
|
|
1529
|
+
"Bpk.": "Bapak",
|
|
1530
|
+
Ibu: "Ibu",
|
|
1531
|
+
"Sdr.": "Saudara",
|
|
1532
|
+
"Sdri.": "Saudari",
|
|
1533
|
+
"Yth.": "Yang Terhormat",
|
|
1534
|
+
"H.": "Haji",
|
|
1535
|
+
"Hj.": "Hajjah",
|
|
1536
|
+
"Tn.": "Tuan",
|
|
1537
|
+
"Ny.": "Nyonya",
|
|
1538
|
+
"Nn.": "Nona",
|
|
1539
|
+
// ========== Organizations ==========
|
|
1540
|
+
"PT.": "Perseroan Terbatas",
|
|
1541
|
+
"CV.": "Commanditaire Vennootschap",
|
|
1542
|
+
"UD.": "Usaha Dagang",
|
|
1543
|
+
"PD.": "Perusahaan Daerah",
|
|
1544
|
+
"Tbk.": "Terbuka",
|
|
1545
|
+
Koperasi: "Koperasi",
|
|
1546
|
+
Yayasan: "Yayasan",
|
|
1547
|
+
// ========== Common Abbreviations ==========
|
|
1548
|
+
"dst.": "dan seterusnya",
|
|
1549
|
+
"dsb.": "dan sebagainya",
|
|
1550
|
+
"dll.": "dan lain-lain",
|
|
1551
|
+
"dkk.": "dan kawan-kawan",
|
|
1552
|
+
"a.n.": "atas nama",
|
|
1553
|
+
"u.p.": "untuk perhatian",
|
|
1554
|
+
"u.b.": "untuk beliau",
|
|
1555
|
+
"c.q.": "casu quo",
|
|
1556
|
+
"hlm.": "halaman",
|
|
1557
|
+
"tgl.": "tanggal",
|
|
1558
|
+
"bln.": "bulan",
|
|
1559
|
+
"thn.": "tahun",
|
|
1560
|
+
"ttd.": "tertanda",
|
|
1561
|
+
// ========== Contact Information ==========
|
|
1562
|
+
"Tlp.": "Telepon",
|
|
1563
|
+
"Telp.": "Telepon",
|
|
1564
|
+
"HP.": "Handphone",
|
|
1565
|
+
Fax: "Faksimile",
|
|
1566
|
+
Email: "Email",
|
|
1567
|
+
Website: "Website",
|
|
1568
|
+
// ========== Days (Indonesian) ==========
|
|
1569
|
+
"Sen.": "Senin",
|
|
1570
|
+
"Sel.": "Selasa",
|
|
1571
|
+
"Rab.": "Rabu",
|
|
1572
|
+
"Kam.": "Kamis",
|
|
1573
|
+
"Jum.": "Jumat",
|
|
1574
|
+
"Sab.": "Sabtu",
|
|
1575
|
+
"Min.": "Minggu",
|
|
1576
|
+
// ========== Months (Indonesian) ==========
|
|
1577
|
+
"Jan.": "Januari",
|
|
1578
|
+
"Feb.": "Februari",
|
|
1579
|
+
"Mar.": "Maret",
|
|
1580
|
+
"Apr.": "April",
|
|
1581
|
+
Mei: "Mei",
|
|
1582
|
+
"Jun.": "Juni",
|
|
1583
|
+
"Jul.": "Juli",
|
|
1584
|
+
"Agt.": "Agustus",
|
|
1585
|
+
"Sep.": "September",
|
|
1586
|
+
"Okt.": "Oktober",
|
|
1587
|
+
"Nov.": "November",
|
|
1588
|
+
"Des.": "Desember",
|
|
1589
|
+
// ========== Units & Measurements ==========
|
|
1590
|
+
"kg.": "kilogram",
|
|
1591
|
+
"gr.": "gram",
|
|
1592
|
+
"lt.": "liter",
|
|
1593
|
+
"ml.": "mililiter",
|
|
1594
|
+
"km.": "kilometer",
|
|
1595
|
+
"cm.": "sentimeter",
|
|
1596
|
+
"mm.": "milimeter",
|
|
1597
|
+
"m2.": "meter persegi",
|
|
1598
|
+
"m3.": "meter kubik",
|
|
1599
|
+
"ha.": "hektar"
|
|
1600
|
+
};
|
|
1601
|
+
|
|
1602
|
+
// src/text/capitalization.ts
|
|
1603
|
+
function capitalize2(text) {
|
|
1604
|
+
if (!text) return text;
|
|
1605
|
+
return text.charAt(0).toUpperCase() + text.slice(1).toLowerCase();
|
|
1606
|
+
}
|
|
1607
|
+
function toTitleCase(text, options) {
|
|
1608
|
+
if (!text) return text;
|
|
1609
|
+
const {
|
|
1610
|
+
preserveAcronyms = true,
|
|
1611
|
+
strict = false,
|
|
1612
|
+
exceptions = []
|
|
1613
|
+
} = options || {};
|
|
1614
|
+
const lowercaseSet = /* @__PURE__ */ new Set([...LOWERCASE_WORDS, ...exceptions]);
|
|
1615
|
+
const acronymSet = new Set(ACRONYMS);
|
|
1616
|
+
const normalized = normalizeSpaces(text);
|
|
1617
|
+
const words = normalized.split(" ");
|
|
1618
|
+
return words.map((word, index) => {
|
|
1619
|
+
if (!word) return word;
|
|
1620
|
+
if (word.includes("-")) {
|
|
1621
|
+
return processHyphenatedWord(word, index === 0, {
|
|
1622
|
+
lowercaseSet,
|
|
1623
|
+
acronymSet,
|
|
1624
|
+
preserveAcronyms,
|
|
1625
|
+
strict
|
|
1626
|
+
});
|
|
1627
|
+
}
|
|
1628
|
+
return processWord(word, index === 0, {
|
|
1629
|
+
lowercaseSet,
|
|
1630
|
+
acronymSet,
|
|
1631
|
+
preserveAcronyms,
|
|
1632
|
+
strict
|
|
1633
|
+
});
|
|
1634
|
+
}).join(" ");
|
|
1635
|
+
}
|
|
1636
|
+
function normalizeSpaces(text) {
|
|
1637
|
+
return text.trim().replace(/\s+/g, " ");
|
|
1638
|
+
}
|
|
1639
|
+
function processWord(word, isFirstWord, context) {
|
|
1640
|
+
const { lowercaseSet, acronymSet, preserveAcronyms, strict } = context;
|
|
1641
|
+
const lowerWord = word.toLowerCase();
|
|
1642
|
+
const upperWord = word.toUpperCase();
|
|
1643
|
+
if (preserveAcronyms && acronymSet.has(upperWord)) {
|
|
1644
|
+
return upperWord;
|
|
1645
|
+
}
|
|
1646
|
+
if (!isFirstWord && lowercaseSet.has(lowerWord)) {
|
|
1647
|
+
return lowerWord;
|
|
1648
|
+
}
|
|
1649
|
+
if (strict) {
|
|
1650
|
+
return capitalizeFirstLetter(lowerWord);
|
|
1651
|
+
}
|
|
1652
|
+
return capitalizeFirstLetter(word.toLowerCase());
|
|
1653
|
+
}
|
|
1654
|
+
function processHyphenatedWord(word, isFirstWord, context) {
|
|
1655
|
+
return word.split("-").map(
|
|
1656
|
+
(part, index) => processWord(part, isFirstWord && index === 0, context)
|
|
1657
|
+
).join("-");
|
|
1658
|
+
}
|
|
1659
|
+
function capitalizeFirstLetter(word) {
|
|
1660
|
+
if (!word) return word;
|
|
1661
|
+
return word.charAt(0).toUpperCase() + word.slice(1);
|
|
1662
|
+
}
|
|
1663
|
+
function toSentenceCase(text) {
|
|
1664
|
+
if (!text) return text;
|
|
1665
|
+
const normalized = text.trim().replace(/\s+/g, " ");
|
|
1666
|
+
let result = "";
|
|
1667
|
+
let shouldCapitalize = true;
|
|
1668
|
+
for (let i = 0; i < normalized.length; i++) {
|
|
1669
|
+
const char = normalized[i];
|
|
1670
|
+
if (shouldCapitalize && /[a-zA-ZÀ-ÿ]/.test(char)) {
|
|
1671
|
+
result += char.toUpperCase();
|
|
1672
|
+
shouldCapitalize = false;
|
|
1673
|
+
} else {
|
|
1674
|
+
result += char.toLowerCase();
|
|
1675
|
+
}
|
|
1676
|
+
if (isSentenceEnd(char)) {
|
|
1677
|
+
shouldCapitalize = true;
|
|
1678
|
+
}
|
|
1679
|
+
if (char === "." && i + 1 < normalized.length) {
|
|
1680
|
+
const nextChar = normalized[i + 1];
|
|
1681
|
+
if (nextChar !== " " && !/[.!?]/.test(nextChar)) {
|
|
1682
|
+
shouldCapitalize = false;
|
|
1683
|
+
}
|
|
1684
|
+
}
|
|
1685
|
+
}
|
|
1686
|
+
return result;
|
|
1687
|
+
}
|
|
1688
|
+
function isSentenceEnd(char) {
|
|
1689
|
+
return char === "." || char === "!" || char === "?";
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
// src/text/slug.ts
|
|
1693
|
+
function slugify(text, options) {
|
|
1694
|
+
if (!text) return "";
|
|
1695
|
+
const {
|
|
1696
|
+
separator = "-",
|
|
1697
|
+
lowercase = true,
|
|
1698
|
+
replacements = {},
|
|
1699
|
+
trim = true
|
|
1700
|
+
} = options || {};
|
|
1701
|
+
let result = text;
|
|
1702
|
+
for (const [search, replace] of Object.entries(replacements)) {
|
|
1703
|
+
result = result.replace(new RegExp(escapeRegex(search), "g"), replace);
|
|
1704
|
+
}
|
|
1705
|
+
result = result.replace(/&/g, " dan ");
|
|
1706
|
+
result = result.replace(/\//g, " atau ");
|
|
1707
|
+
if (lowercase) {
|
|
1708
|
+
result = result.toLowerCase();
|
|
1709
|
+
}
|
|
1710
|
+
result = result.replace(/[.'@éèêëàâäôöûüùïîçñ™®©]/g, "");
|
|
1711
|
+
result = result.replace(/[^\w\s-]+/g, separator);
|
|
1712
|
+
result = result.replace(/\s+/g, separator);
|
|
1713
|
+
if (separator !== "-") {
|
|
1714
|
+
result = result.replace(/-/g, separator);
|
|
1715
|
+
}
|
|
1716
|
+
if (trim) {
|
|
1717
|
+
const separatorRegex = new RegExp(`\\${separator}+`, "g");
|
|
1718
|
+
result = result.replace(separatorRegex, separator);
|
|
1719
|
+
const trimRegex = new RegExp(`^\\${separator}+|\\${separator}+$`, "g");
|
|
1720
|
+
result = result.replace(trimRegex, "");
|
|
1721
|
+
}
|
|
1722
|
+
return result;
|
|
1723
|
+
}
|
|
1724
|
+
function escapeRegex(str) {
|
|
1725
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1726
|
+
}
|
|
1727
|
+
|
|
1728
|
+
// src/text/sanitize.ts
|
|
1729
|
+
function normalizeWhitespace(text) {
|
|
1730
|
+
if (!text) return text;
|
|
1731
|
+
return text.trim().replace(/\s+/g, " ");
|
|
1732
|
+
}
|
|
1733
|
+
function sanitize(text, options) {
|
|
1734
|
+
if (!text) return text;
|
|
1735
|
+
const {
|
|
1736
|
+
removeNewlines = false,
|
|
1737
|
+
removeExtraSpaces = true,
|
|
1738
|
+
removePunctuation = false,
|
|
1739
|
+
allowedChars,
|
|
1740
|
+
trim = true
|
|
1741
|
+
} = options || {};
|
|
1742
|
+
let result = text;
|
|
1743
|
+
if (removeNewlines) {
|
|
1744
|
+
result = result.replace(/[\n\r]/g, " ");
|
|
1745
|
+
}
|
|
1746
|
+
if (removePunctuation) {
|
|
1747
|
+
result = result.replace(/[!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]/g, "");
|
|
1748
|
+
}
|
|
1749
|
+
if (allowedChars) {
|
|
1750
|
+
const allowedRegex = new RegExp(`[^${allowedChars}]`, "g");
|
|
1751
|
+
result = result.replace(allowedRegex, "");
|
|
1752
|
+
}
|
|
1753
|
+
if (removeExtraSpaces) {
|
|
1754
|
+
if (trim) {
|
|
1755
|
+
if (removeNewlines) {
|
|
1756
|
+
result = result.replace(/\s+/g, " ");
|
|
1757
|
+
} else {
|
|
1758
|
+
result = result.replace(/[ \t]+/g, " ");
|
|
1759
|
+
}
|
|
1760
|
+
} else {
|
|
1761
|
+
const leadingMatch = result.match(/^([ \t]*)/);
|
|
1762
|
+
const trailingMatch = result.match(/([ \t]*)$/);
|
|
1763
|
+
const leading = leadingMatch ? leadingMatch[1] : "";
|
|
1764
|
+
const trailing = trailingMatch ? trailingMatch[1] : "";
|
|
1765
|
+
const middle = result.slice(
|
|
1766
|
+
leading.length,
|
|
1767
|
+
result.length - trailing.length
|
|
1768
|
+
);
|
|
1769
|
+
const normalizedMiddle = removeNewlines ? middle.replace(/\s+/g, " ") : middle.replace(/[ \t]+/g, " ");
|
|
1770
|
+
result = leading + normalizedMiddle + trailing;
|
|
1771
|
+
}
|
|
1772
|
+
}
|
|
1773
|
+
if (trim) {
|
|
1774
|
+
result = result.trim();
|
|
1775
|
+
}
|
|
1776
|
+
return result;
|
|
1777
|
+
}
|
|
1778
|
+
function removeAccents(text) {
|
|
1779
|
+
if (!text) return text;
|
|
1780
|
+
const specialChars = {
|
|
1781
|
+
\u00D8: "O",
|
|
1782
|
+
\u00F8: "o",
|
|
1783
|
+
\u00C6: "AE",
|
|
1784
|
+
\u00E6: "ae",
|
|
1785
|
+
\u00C5: "A",
|
|
1786
|
+
\u00E5: "a",
|
|
1787
|
+
\u0110: "D",
|
|
1788
|
+
\u0111: "d",
|
|
1789
|
+
\u0141: "L",
|
|
1790
|
+
\u0142: "l",
|
|
1791
|
+
\u00DE: "TH",
|
|
1792
|
+
\u00FE: "th",
|
|
1793
|
+
\u00DF: "ss"
|
|
1794
|
+
};
|
|
1795
|
+
let result = text;
|
|
1796
|
+
for (const [accented, plain] of Object.entries(specialChars)) {
|
|
1797
|
+
result = result.replace(new RegExp(accented, "g"), plain);
|
|
1798
|
+
}
|
|
1799
|
+
return result.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
|
|
1800
|
+
}
|
|
1801
|
+
|
|
1802
|
+
// src/text/abbreviation.ts
|
|
1803
|
+
function expandAbbreviation(text, options) {
|
|
1804
|
+
if (!text) return text;
|
|
1805
|
+
const { mode = "all", customMap = {}, preserveCase = false } = options || {};
|
|
1806
|
+
const abbreviationsMap = {
|
|
1807
|
+
...getAbbreviationsByMode(mode),
|
|
1808
|
+
...customMap
|
|
1809
|
+
};
|
|
1810
|
+
let result = text;
|
|
1811
|
+
const sortedAbbrevs = Object.keys(abbreviationsMap).sort(
|
|
1812
|
+
(a, b) => b.length - a.length
|
|
1813
|
+
);
|
|
1814
|
+
for (const abbrev of sortedAbbrevs) {
|
|
1815
|
+
const expansion = abbreviationsMap[abbrev];
|
|
1816
|
+
const startBoundary = /^\w/.test(abbrev) ? "\\b" : "";
|
|
1817
|
+
const endBoundary = /\w$/.test(abbrev) ? "\\b" : "";
|
|
1818
|
+
const regex = new RegExp(
|
|
1819
|
+
`${startBoundary}${escapeRegex2(abbrev)}${endBoundary}`,
|
|
1820
|
+
"gi"
|
|
1821
|
+
);
|
|
1822
|
+
result = result.replace(regex, (match) => {
|
|
1823
|
+
if (!preserveCase) {
|
|
1824
|
+
return expansion;
|
|
1825
|
+
}
|
|
1826
|
+
return matchCase(match, expansion);
|
|
1827
|
+
});
|
|
1828
|
+
}
|
|
1829
|
+
return result;
|
|
1830
|
+
}
|
|
1831
|
+
function getAbbreviationsByMode(mode) {
|
|
1832
|
+
if (mode === "all") {
|
|
1833
|
+
return ABBREVIATIONS;
|
|
1834
|
+
}
|
|
1835
|
+
const filtered = {};
|
|
1836
|
+
const addressAbbrevs = [
|
|
1837
|
+
"Jl.",
|
|
1838
|
+
"Gg.",
|
|
1839
|
+
"No.",
|
|
1840
|
+
"Kp.",
|
|
1841
|
+
"Ds.",
|
|
1842
|
+
"Kel.",
|
|
1843
|
+
"Kec.",
|
|
1844
|
+
"Kab.",
|
|
1845
|
+
"Kota",
|
|
1846
|
+
"Prov.",
|
|
1847
|
+
"Prop.",
|
|
1848
|
+
"Rt.",
|
|
1849
|
+
"Rw.",
|
|
1850
|
+
"Blok",
|
|
1851
|
+
"Komp.",
|
|
1852
|
+
"Perumahan",
|
|
1853
|
+
"Perum."
|
|
1854
|
+
];
|
|
1855
|
+
const titleAbbrevs = [
|
|
1856
|
+
"Dr.",
|
|
1857
|
+
"Ir.",
|
|
1858
|
+
"Prof.",
|
|
1859
|
+
"Drs.",
|
|
1860
|
+
"Dra.",
|
|
1861
|
+
"S.Pd.",
|
|
1862
|
+
"S.H.",
|
|
1863
|
+
"S.E.",
|
|
1864
|
+
"S.T.",
|
|
1865
|
+
"S.Kom.",
|
|
1866
|
+
"S.Si.",
|
|
1867
|
+
"S.Sos.",
|
|
1868
|
+
"S.I.Kom.",
|
|
1869
|
+
"S.S.",
|
|
1870
|
+
"S.Psi.",
|
|
1871
|
+
"S.Farm.",
|
|
1872
|
+
"S.Ked.",
|
|
1873
|
+
"M.Sc.",
|
|
1874
|
+
"M.M.",
|
|
1875
|
+
"M.Pd.",
|
|
1876
|
+
"M.T.",
|
|
1877
|
+
"M.Kom.",
|
|
1878
|
+
"M.Si.",
|
|
1879
|
+
"M.H.",
|
|
1880
|
+
"M.A.",
|
|
1881
|
+
"MBA"
|
|
1882
|
+
];
|
|
1883
|
+
const orgAbbrevs = [
|
|
1884
|
+
"PT.",
|
|
1885
|
+
"CV.",
|
|
1886
|
+
"UD.",
|
|
1887
|
+
"PD.",
|
|
1888
|
+
"Tbk.",
|
|
1889
|
+
"Koperasi",
|
|
1890
|
+
"Yayasan"
|
|
1891
|
+
];
|
|
1892
|
+
for (const [abbrev, expansion] of Object.entries(ABBREVIATIONS)) {
|
|
1893
|
+
if (mode === "address" && addressAbbrevs.includes(abbrev)) {
|
|
1894
|
+
filtered[abbrev] = expansion;
|
|
1895
|
+
} else if (mode === "title" && titleAbbrevs.includes(abbrev)) {
|
|
1896
|
+
filtered[abbrev] = expansion;
|
|
1897
|
+
} else if (mode === "org" && orgAbbrevs.includes(abbrev)) {
|
|
1898
|
+
filtered[abbrev] = expansion;
|
|
1899
|
+
}
|
|
1900
|
+
}
|
|
1901
|
+
return filtered;
|
|
1902
|
+
}
|
|
1903
|
+
function escapeRegex2(str) {
|
|
1904
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1905
|
+
}
|
|
1906
|
+
function matchCase(original, replacement) {
|
|
1907
|
+
if (original === original.toUpperCase()) {
|
|
1908
|
+
return replacement.toUpperCase();
|
|
1909
|
+
}
|
|
1910
|
+
if (original === original.toLowerCase()) {
|
|
1911
|
+
return replacement.toLowerCase();
|
|
1912
|
+
}
|
|
1913
|
+
if (original.charAt(0) === original.charAt(0).toUpperCase()) {
|
|
1914
|
+
return replacement.charAt(0).toUpperCase() + replacement.slice(1).toLowerCase();
|
|
1915
|
+
}
|
|
1916
|
+
return replacement;
|
|
1917
|
+
}
|
|
1918
|
+
function contractAbbreviation(text, options) {
|
|
1919
|
+
if (!text) return text;
|
|
1920
|
+
const { mode = "all" } = options || {};
|
|
1921
|
+
const abbreviationsMap = getAbbreviationsByMode(mode);
|
|
1922
|
+
const reverseMap = {};
|
|
1923
|
+
for (const [abbrev, expansion] of Object.entries(abbreviationsMap)) {
|
|
1924
|
+
reverseMap[expansion] = abbrev;
|
|
1925
|
+
}
|
|
1926
|
+
let result = text;
|
|
1927
|
+
const sortedExpansions = Object.keys(reverseMap).sort(
|
|
1928
|
+
(a, b) => b.length - a.length
|
|
1929
|
+
);
|
|
1930
|
+
for (const expansion of sortedExpansions) {
|
|
1931
|
+
const abbrev = reverseMap[expansion];
|
|
1932
|
+
const regex = new RegExp(`\\b${escapeRegex2(expansion)}\\b`, "gi");
|
|
1933
|
+
result = result.replace(regex, abbrev);
|
|
1934
|
+
}
|
|
1935
|
+
return result;
|
|
1936
|
+
}
|
|
1937
|
+
|
|
1938
|
+
// src/text/extract.ts
|
|
1939
|
+
function truncate(text, maxLength, options) {
|
|
1940
|
+
if (!text || maxLength <= 0) {
|
|
1941
|
+
return "";
|
|
1942
|
+
}
|
|
1943
|
+
const { ellipsis = "...", wordBoundary = true } = options || {};
|
|
1944
|
+
if (text.length <= maxLength) {
|
|
1945
|
+
return text;
|
|
1946
|
+
}
|
|
1947
|
+
const availableLength = maxLength - ellipsis.length;
|
|
1948
|
+
if (availableLength <= 0) {
|
|
1949
|
+
return ellipsis.slice(0, maxLength);
|
|
1950
|
+
}
|
|
1951
|
+
let truncated = text.slice(0, availableLength);
|
|
1952
|
+
if (wordBoundary) {
|
|
1953
|
+
const lastSpaceIndex = truncated.lastIndexOf(" ");
|
|
1954
|
+
if (lastSpaceIndex > 0) {
|
|
1955
|
+
truncated = truncated.slice(0, lastSpaceIndex);
|
|
1956
|
+
}
|
|
1957
|
+
}
|
|
1958
|
+
truncated = truncated.trimEnd();
|
|
1959
|
+
return truncated + ellipsis;
|
|
1960
|
+
}
|
|
1961
|
+
function extractWords(text, options) {
|
|
1962
|
+
if (!text || !text.trim()) {
|
|
1963
|
+
return [];
|
|
1964
|
+
}
|
|
1965
|
+
const {
|
|
1966
|
+
minLength = 0,
|
|
1967
|
+
includeHyphenated = true,
|
|
1968
|
+
lowercase = false
|
|
1969
|
+
} = options || {};
|
|
1970
|
+
let cleaned = text;
|
|
1971
|
+
if (includeHyphenated) {
|
|
1972
|
+
cleaned = text.replace(/[^\w\s-]/g, " ");
|
|
1973
|
+
} else {
|
|
1974
|
+
cleaned = text.replace(/[^\w\s]/g, " ");
|
|
1975
|
+
}
|
|
1976
|
+
const words = cleaned.split(/\s+/).map((word) => word.trim()).filter((word) => word.length > 0).filter((word) => !/^-+$/.test(word));
|
|
1977
|
+
let result = words;
|
|
1978
|
+
if (minLength > 0) {
|
|
1979
|
+
result = result.filter((word) => word.length >= minLength);
|
|
1980
|
+
}
|
|
1981
|
+
if (lowercase) {
|
|
1982
|
+
result = result.map((word) => word.toLowerCase());
|
|
1983
|
+
}
|
|
1984
|
+
return result;
|
|
1985
|
+
}
|
|
1986
|
+
|
|
1987
|
+
// src/text/compare.ts
|
|
1988
|
+
function compareStrings(str1, str2, options) {
|
|
1989
|
+
if (str1 === str2) {
|
|
1990
|
+
return true;
|
|
1991
|
+
}
|
|
1992
|
+
const s1 = str1 || "";
|
|
1993
|
+
const s2 = str2 || "";
|
|
1994
|
+
const {
|
|
1995
|
+
caseSensitive = false,
|
|
1996
|
+
ignoreWhitespace = false,
|
|
1997
|
+
ignoreAccents = false
|
|
1998
|
+
} = options || {};
|
|
1999
|
+
let normalized1 = s1;
|
|
2000
|
+
let normalized2 = s2;
|
|
2001
|
+
if (ignoreWhitespace) {
|
|
2002
|
+
normalized1 = normalizeWhitespace(normalized1);
|
|
2003
|
+
normalized2 = normalizeWhitespace(normalized2);
|
|
2004
|
+
}
|
|
2005
|
+
if (ignoreAccents) {
|
|
2006
|
+
normalized1 = removeAccents(normalized1);
|
|
2007
|
+
normalized2 = removeAccents(normalized2);
|
|
2008
|
+
}
|
|
2009
|
+
if (!caseSensitive) {
|
|
2010
|
+
normalized1 = normalized1.toLowerCase();
|
|
2011
|
+
normalized2 = normalized2.toLowerCase();
|
|
2012
|
+
}
|
|
2013
|
+
return normalized1 === normalized2;
|
|
2014
|
+
}
|
|
2015
|
+
function similarity(str1, str2) {
|
|
2016
|
+
if (str1 === str2) return 1;
|
|
2017
|
+
if (str1.length === 0) return str2.length === 0 ? 1 : 0;
|
|
2018
|
+
if (str2.length === 0) return 0;
|
|
2019
|
+
const len1 = str1.length;
|
|
2020
|
+
const len2 = str2.length;
|
|
2021
|
+
let prevRow = Array(len2 + 1).fill(0);
|
|
2022
|
+
let currentRow = Array(len2 + 1).fill(0);
|
|
2023
|
+
for (let j = 0; j <= len2; j++) {
|
|
2024
|
+
prevRow[j] = j;
|
|
2025
|
+
}
|
|
2026
|
+
for (let i = 1; i <= len1; i++) {
|
|
2027
|
+
currentRow[0] = i;
|
|
2028
|
+
for (let j = 1; j <= len2; j++) {
|
|
2029
|
+
const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
|
|
2030
|
+
currentRow[j] = Math.min(
|
|
2031
|
+
currentRow[j - 1] + 1,
|
|
2032
|
+
// Insertion
|
|
2033
|
+
prevRow[j] + 1,
|
|
2034
|
+
// Deletion
|
|
2035
|
+
prevRow[j - 1] + cost
|
|
2036
|
+
// Substitution
|
|
2037
|
+
);
|
|
2038
|
+
}
|
|
2039
|
+
[prevRow, currentRow] = [currentRow, prevRow];
|
|
2040
|
+
}
|
|
2041
|
+
const distance = prevRow[len2];
|
|
2042
|
+
const maxLength = Math.max(len1, len2);
|
|
2043
|
+
return 1 - distance / maxLength;
|
|
2044
|
+
}
|
|
2045
|
+
|
|
2046
|
+
export { capitalize2 as capitalize, cleanPhoneNumber, compareStrings, contractAbbreviation, expandAbbreviation, extractWords, formatCompact, formatNIK, formatPhoneNumber, formatRupiah, getOperator, isLandlineNumber, isMobileNumber, maskNIK, maskPhoneNumber, normalizeWhitespace, parseNIK, parsePhoneNumber, parseRupiah, removeAccents, sanitize, similarity, slugify, toE164, toInternational, toNational, toSentenceCase, toTitleCase, toWords, truncate, validateNIK, validatePhoneNumber };
|
|
1152
2047
|
//# sourceMappingURL=index.js.map
|
|
1153
2048
|
//# sourceMappingURL=index.js.map
|