@indodev/toolkit 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -183
- package/dist/compare-B1MKSOWV.d.cts +938 -0
- package/dist/compare-B1MKSOWV.d.ts +938 -0
- package/dist/index.cjs +908 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +896 -1
- package/dist/index.js.map +1 -1
- package/dist/text/index.cjs +915 -0
- package/dist/text/index.cjs.map +1 -0
- package/dist/text/index.d.cts +284 -0
- package/dist/text/index.d.ts +284 -0
- package/dist/text/index.js +898 -0
- package/dist/text/index.js.map +1 -0
- package/package.json +18 -1
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
export { C as CompareOptions, E as ExtractOptions, j as SanitizeOptions, S as SlugifyOptions, T as TitleCaseOptions, k as TruncateOptions, c as capitalize, h as compareStrings, d as contractAbbreviation, e as expandAbbreviation, g as extractWords, n as normalizeWhitespace, r as removeAccents, b as sanitize, i as similarity, s as slugify, a as toSentenceCase, t as toTitleCase, f as truncate } from '../compare-B1MKSOWV.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* ============================================================================
|
|
5
|
+
* INDONESIAN TEXT UTILITIES - CONSTANTS
|
|
6
|
+
* ============================================================================
|
|
7
|
+
*
|
|
8
|
+
* This file contains constants for Indonesian and English text processing:
|
|
9
|
+
* - LOWERCASE_WORDS: Particles that stay lowercase in title case
|
|
10
|
+
* - ACRONYMS: Abbreviations that stay UPPERCASE in title case
|
|
11
|
+
* - ABBREVIATIONS: Full expansions of common Indonesian abbreviations
|
|
12
|
+
*
|
|
13
|
+
* ============================================================================
|
|
14
|
+
* MAINTENANCE GUIDE
|
|
15
|
+
* ============================================================================
|
|
16
|
+
*
|
|
17
|
+
* ## How to Add New Entries
|
|
18
|
+
*
|
|
19
|
+
* ### 1. LOWERCASE_WORDS (Particles)
|
|
20
|
+
*
|
|
21
|
+
* Add words that should remain lowercase in title case (except when first word).
|
|
22
|
+
*
|
|
23
|
+
* **Indonesian Grammar Rules (PUEBI):**
|
|
24
|
+
* - Prepositions: di, ke, dari, untuk, dengan, pada, dalam, etc.
|
|
25
|
+
* - Conjunctions: dan, atau, tetapi, serta, maupun, etc.
|
|
26
|
+
* - Articles/particles: yang, sebagai, adalah, akan, telah, etc.
|
|
27
|
+
*
|
|
28
|
+
* **English Grammar Rules (Chicago Manual of Style):**
|
|
29
|
+
* - Articles: a, an, the
|
|
30
|
+
* - Conjunctions: and, or, but, nor, for, yet, so
|
|
31
|
+
* - Short prepositions (<5 letters): at, by, in, of, on, to, up, etc.
|
|
32
|
+
*
|
|
33
|
+
* **Example Addition:**
|
|
34
|
+
* ```typescript
|
|
35
|
+
* export const LOWERCASE_WORDS = [
|
|
36
|
+
* // ... existing entries
|
|
37
|
+
* 'bagi', // Indonesian: for/to (preposition)
|
|
38
|
+
* 'antara', // Indonesian: between (preposition)
|
|
39
|
+
* 'into', // English: preposition
|
|
40
|
+
* ] as const;
|
|
41
|
+
* ```
|
|
42
|
+
*
|
|
43
|
+
* **Testing:** Add test case in `toTitleCase.test.ts`:
|
|
44
|
+
* ```typescript
|
|
45
|
+
* it('keeps "bagi" lowercase in middle', () => {
|
|
46
|
+
* expect(toTitleCase('buku bagi pemula')).toBe('Buku bagi Pemula');
|
|
47
|
+
* });
|
|
48
|
+
* ```
|
|
49
|
+
*
|
|
50
|
+
* ### 2. ACRONYMS (Always Uppercase)
|
|
51
|
+
*
|
|
52
|
+
* Add abbreviations that should always appear in UPPERCASE.
|
|
53
|
+
*
|
|
54
|
+
* **Categories:**
|
|
55
|
+
* - Government & Military: TNI, POLRI, KPK, DPR, etc.
|
|
56
|
+
* - Business Entities: PT, CV, BUMN, etc.
|
|
57
|
+
* - Banks: BCA, BRI, BNI, etc.
|
|
58
|
+
* - Services: BPJS, PLN, KTP, SIM, etc.
|
|
59
|
+
* - Technology: IT, AI, API, SEO, etc.
|
|
60
|
+
* - Education: UI, ITB, UGM, etc.
|
|
61
|
+
* - International: UN, WHO, NATO, ASEAN, etc.
|
|
62
|
+
*
|
|
63
|
+
* **Validation Checklist:**
|
|
64
|
+
* ✅ Is it commonly written in ALL CAPS?
|
|
65
|
+
* ✅ Is it an official acronym (not just shortened word)?
|
|
66
|
+
* ✅ Will it look wrong if title-cased (e.g., "Pt" instead of "PT")?
|
|
67
|
+
*
|
|
68
|
+
* **Example Addition:**
|
|
69
|
+
* ```typescript
|
|
70
|
+
* export const ACRONYMS = [
|
|
71
|
+
* // ... existing entries
|
|
72
|
+
* 'OJK', // Otoritas Jasa Keuangan
|
|
73
|
+
* 'BI', // Bank Indonesia
|
|
74
|
+
* 'NASA', // National Aeronautics and Space Administration
|
|
75
|
+
* ] as const;
|
|
76
|
+
* ```
|
|
77
|
+
*
|
|
78
|
+
* **Testing:** Add test case in `toTitleCase.test.ts`:
|
|
79
|
+
* ```typescript
|
|
80
|
+
* it('preserves OJK uppercase', () => {
|
|
81
|
+
* expect(toTitleCase('ojk indonesia')).toBe('OJK Indonesia');
|
|
82
|
+
* });
|
|
83
|
+
* ```
|
|
84
|
+
*
|
|
85
|
+
* ### 3. ABBREVIATIONS (Expansion Mapping)
|
|
86
|
+
*
|
|
87
|
+
* Add abbreviation → full form mappings for `expandAbbreviation()` function.
|
|
88
|
+
*
|
|
89
|
+
* **Categories (use comment headers):**
|
|
90
|
+
* - Address: Jl., Gg., Kec., Kab., etc.
|
|
91
|
+
* - Academic Titles: Dr., Ir., Prof., S.H., M.M., etc.
|
|
92
|
+
* - Honorifics: Bpk., Yth., H., Hj., etc.
|
|
93
|
+
* - Organizations: PT., CV., UD., etc.
|
|
94
|
+
* - Common: dst., dll., a.n., etc.
|
|
95
|
+
* - Contact Info: Tlp., HP., Fax, etc.
|
|
96
|
+
* - Days/Months: Sen., Jan., Feb., etc.
|
|
97
|
+
* - Units: kg., km., lt., etc.
|
|
98
|
+
*
|
|
99
|
+
* **Key Format Rules:**
|
|
100
|
+
* - Include period if commonly written: `'Jl.'` not `'Jl'`
|
|
101
|
+
* - Use proper capitalization: `'Jalan'` not `'jalan'`
|
|
102
|
+
* - Keep it concise: Full form only, no explanations
|
|
103
|
+
*
|
|
104
|
+
* **Example Addition:**
|
|
105
|
+
* ```typescript
|
|
106
|
+
* export const ABBREVIATIONS: Record<string, string> = {
|
|
107
|
+
* // ... existing entries
|
|
108
|
+
*
|
|
109
|
+
* // ========== New Category Example ==========
|
|
110
|
+
* 'Apt.': 'Apartemen',
|
|
111
|
+
* 'Ruko': 'Rumah Toko',
|
|
112
|
+
* 'Rukan': 'Rumah Kantor',
|
|
113
|
+
* };
|
|
114
|
+
* ```
|
|
115
|
+
*
|
|
116
|
+
* **Testing:** Add test case in `abbreviation.test.ts`:
|
|
117
|
+
* ```typescript
|
|
118
|
+
* it('expands Apt. to Apartemen', () => {
|
|
119
|
+
* expect(expandAbbreviation('Apt. Sudirman'))
|
|
120
|
+
* .toBe('Apartemen Sudirman');
|
|
121
|
+
* });
|
|
122
|
+
* ```
|
|
123
|
+
*
|
|
124
|
+
* ============================================================================
|
|
125
|
+
* DATA SOURCES & REFERENCES
|
|
126
|
+
* ============================================================================
|
|
127
|
+
*
|
|
128
|
+
* When adding new entries, refer to these authoritative sources:
|
|
129
|
+
*
|
|
130
|
+
* **Indonesian Language:**
|
|
131
|
+
* - PUEBI (Pedoman Umum Ejaan Bahasa Indonesia)
|
|
132
|
+
* https://puebi.js.org/
|
|
133
|
+
*
|
|
134
|
+
* - KBBI (Kamus Besar Bahasa Indonesia)
|
|
135
|
+
* https://kbbi.kemdikbud.go.id/
|
|
136
|
+
*
|
|
137
|
+
* - Wikipedia Indonesia - Daftar Singkatan
|
|
138
|
+
* https://id.wikipedia.org/wiki/Daftar_singkatan_di_Indonesia
|
|
139
|
+
*
|
|
140
|
+
* **English Language:**
|
|
141
|
+
* - Chicago Manual of Style (Title Case Rules)
|
|
142
|
+
* https://www.chicagomanualofstyle.org/
|
|
143
|
+
*
|
|
144
|
+
* - AP Stylebook
|
|
145
|
+
* https://www.apstylebook.com/
|
|
146
|
+
*
|
|
147
|
+
* **Government & Official:**
|
|
148
|
+
* - Kemendagri (addresses, administrative divisions)
|
|
149
|
+
* https://www.kemendagri.go.id/
|
|
150
|
+
*
|
|
151
|
+
* - Kemenkumham (business entities)
|
|
152
|
+
* https://www.kemenkumham.go.id/
|
|
153
|
+
*
|
|
154
|
+
* - Kemendikbud (education, degrees)
|
|
155
|
+
* https://www.kemdikbud.go.id/
|
|
156
|
+
*
|
|
157
|
+
* ============================================================================
|
|
158
|
+
* CONTRIBUTION GUIDELINES
|
|
159
|
+
* ============================================================================
|
|
160
|
+
*
|
|
161
|
+
* **Before Adding:**
|
|
162
|
+
* 1. ✅ Check if entry already exists (Ctrl+F)
|
|
163
|
+
* 2. ✅ Verify spelling from official sources
|
|
164
|
+
* 3. ✅ Ensure it's commonly used (not obscure)
|
|
165
|
+
* 4. ✅ Choose correct category/section
|
|
166
|
+
*
|
|
167
|
+
* **After Adding:**
|
|
168
|
+
* 1. ✅ Add corresponding test case
|
|
169
|
+
* 2. ✅ Run tests: `npm test constants`
|
|
170
|
+
* 3. ✅ Update this file's documentation if needed
|
|
171
|
+
* 4. ✅ Add source reference in PR description
|
|
172
|
+
*
|
|
173
|
+
* **PR Template:**
|
|
174
|
+
* ```
|
|
175
|
+
* ### Added Constants
|
|
176
|
+
*
|
|
177
|
+
* **Type:** [LOWERCASE_WORDS | ACRONYMS | ABBREVIATIONS]
|
|
178
|
+
*
|
|
179
|
+
* **Entries:**
|
|
180
|
+
* - `OJK` - Otoritas Jasa Keuangan
|
|
181
|
+
* - `BI` - Bank Indonesia
|
|
182
|
+
*
|
|
183
|
+
* **Source:** https://www.ojk.go.id/
|
|
184
|
+
*
|
|
185
|
+
* **Test Coverage:** ✅ Added in toTitleCase.test.ts line 245
|
|
186
|
+
*
|
|
187
|
+
* **Rationale:**
|
|
188
|
+
* Commonly used financial regulatory bodies in Indonesian context.
|
|
189
|
+
* ```
|
|
190
|
+
*
|
|
191
|
+
* ============================================================================
|
|
192
|
+
* COMMON PITFALLS TO AVOID
|
|
193
|
+
* ============================================================================
|
|
194
|
+
*
|
|
195
|
+
* ❌ **Don't add brand-specific styling** (e.g., "iPhone" → keep user control)
|
|
196
|
+
* ❌ **Don't add regional dialects** (stick to standard Indonesian/English)
|
|
197
|
+
* ❌ **Don't add context-dependent acronyms** (e.g., "UI" = both User Interface & Universitas Indonesia)
|
|
198
|
+
* ❌ **Don't add very rare/obscure terms** (focus on common usage)
|
|
199
|
+
* ❌ **Don't forget the period** in ABBREVIATIONS (e.g., use `'Dr.'` not `'Dr'`)
|
|
200
|
+
* ❌ **Don't mix singular/plural** in ABBREVIATIONS (choose one consistently)
|
|
201
|
+
*
|
|
202
|
+
* ✅ **Do keep entries alphabetically sorted** within categories
|
|
203
|
+
* ✅ **Do use proper capitalization** in expanded forms
|
|
204
|
+
* ✅ **Do add comments** for non-obvious entries
|
|
205
|
+
* ✅ **Do verify against official sources** before adding
|
|
206
|
+
* ✅ **Do write test cases** for new additions
|
|
207
|
+
*
|
|
208
|
+
* ============================================================================
|
|
209
|
+
* FUTURE EXTENSIBILITY
|
|
210
|
+
* ============================================================================
|
|
211
|
+
*
|
|
212
|
+
* **Planned Enhancements:**
|
|
213
|
+
*
|
|
214
|
+
* 1. **External Data Source Support:**
|
|
215
|
+
* ```typescript
|
|
216
|
+
* import customAcronyms from './data/custom-acronyms.json';
|
|
217
|
+
* export const ACRONYMS = [...DEFAULT_ACRONYMS, ...customAcronyms];
|
|
218
|
+
* ```
|
|
219
|
+
*
|
|
220
|
+
* 2. **Context-Aware Acronyms:**
|
|
221
|
+
* ```typescript
|
|
222
|
+
* export const CONTEXT_ACRONYMS = {
|
|
223
|
+
* 'UI': {
|
|
224
|
+
* tech: 'UI', // User Interface
|
|
225
|
+
* education: 'UI', // Universitas Indonesia
|
|
226
|
+
* }
|
|
227
|
+
* };
|
|
228
|
+
* ```
|
|
229
|
+
*
|
|
230
|
+
* 3. **Locale-Specific Sets:**
|
|
231
|
+
* ```typescript
|
|
232
|
+
* export const LOWERCASE_WORDS = {
|
|
233
|
+
* id: [...], // Indonesian
|
|
234
|
+
* en: [...], // English
|
|
235
|
+
* mixed: [...], // Combined (default)
|
|
236
|
+
* };
|
|
237
|
+
* ```
|
|
238
|
+
*
|
|
239
|
+
* 4. **Dynamic Loading:**
|
|
240
|
+
* ```typescript
|
|
241
|
+
* // Load additional acronyms from user config
|
|
242
|
+
* export async function loadCustomConstants(url: string) {
|
|
243
|
+
* const data = await fetch(url).then(r => r.json());
|
|
244
|
+
* return [...ACRONYMS, ...data.acronyms];
|
|
245
|
+
* }
|
|
246
|
+
* ```
|
|
247
|
+
*
|
|
248
|
+
* ============================================================================
|
|
249
|
+
* VERSIONING & CHANGELOG
|
|
250
|
+
* ============================================================================
|
|
251
|
+
*
|
|
252
|
+
* Track major additions here:
|
|
253
|
+
*
|
|
254
|
+
* - v0.2.0 (2024-12-18): Initial comprehensive dataset
|
|
255
|
+
* - 50+ Indonesian particles
|
|
256
|
+
* - 150+ acronyms (Indonesian + International)
|
|
257
|
+
* - 80+ abbreviation mappings
|
|
258
|
+
*
|
|
259
|
+
* - v0.2.1 (TBD): Add financial sector acronyms (OJK, BI, etc.)
|
|
260
|
+
* - v0.2.2 (TBD): Add technology company acronyms
|
|
261
|
+
*
|
|
262
|
+
* ============================================================================
|
|
263
|
+
*/
|
|
264
|
+
/**
|
|
265
|
+
* Indonesian and English lowercase particles
|
|
266
|
+
* These words remain lowercase in title case (except when first word)
|
|
267
|
+
*
|
|
268
|
+
* Based on:
|
|
269
|
+
* - Indonesian grammar (PUEBI)
|
|
270
|
+
* - English title case rules (Chicago Manual of Style)
|
|
271
|
+
*/
|
|
272
|
+
declare const LOWERCASE_WORDS: readonly ["di", "ke", "dari", "pada", "dalam", "untuk", "dengan", "oleh", "kepada", "terhadap", "tentang", "tanpa", "hingga", "sampai", "sejak", "menuju", "melalui", "dan", "atau", "tetapi", "namun", "serta", "maupun", "melainkan", "sedangkan", "yang", "sebagai", "adalah", "ialah", "yaitu", "bahwa", "akan", "telah", "sudah", "belum", "a", "an", "the", "and", "or", "but", "nor", "for", "yet", "so", "as", "at", "by", "in", "of", "on", "to", "up", "via", "per", "off", "out"];
|
|
273
|
+
/**
|
|
274
|
+
* Indonesian and international acronyms
|
|
275
|
+
* These always remain UPPERCASE in title case
|
|
276
|
+
*/
|
|
277
|
+
declare const ACRONYMS: readonly ["DKI", "DIY", "TNI", "POLRI", "ABRI", "MPR", "DPR", "KPK", "BIN", "PT", "CV", "UD", "PD", "Tbk", "BUMN", "BUMD", "BCA", "BRI", "BNI", "BTN", "BSI", "BPD", "KTP", "NIK", "NPWP", "SIM", "STNK", "BPJS", "KIS", "KIP", "PKH", "PLN", "PDAM", "PGN", "KAI", "MRT", "LRT", "PBB", "PPh", "PPN", "BPHTB", "UI", "ITB", "UGM", "IPB", "ITS", "UNPAD", "UNDIP", "UNAIR", "UNS", "S.Pd", "S.H", "S.E", "S.T", "S.Kom", "S.Si", "S.Sos", "M.Pd", "M.M", "M.T", "M.Kom", "ATM", "POS", "SMS", "GPS", "WiFi", "USB", "PIN", "OTP", "QR", "IT", "AI", "ML", "API", "UI", "UX", "SEO", "SaaS", "CRM", "ERP", "CEO", "CFO", "CTO", "COO", "CMO", "HR", "PR", "VP", "GM", "UN", "WHO", "UNESCO", "NATO", "ASEAN", "APEC", "WTO", "IMF", "ICU", "ER", "MRI", "CT", "DNA", "RNA", "HIV", "AIDS", "COVID", "KM", "CM", "MM", "KG", "RPM", "MPH", "KPH", "IPO", "ATM", "ROI", "GDP", "VAT"];
|
|
278
|
+
/**
|
|
279
|
+
* Indonesian abbreviations mapping
|
|
280
|
+
* Organized by category for maintainability
|
|
281
|
+
*/
|
|
282
|
+
declare const ABBREVIATIONS: Record<string, string>;
|
|
283
|
+
|
|
284
|
+
export { ABBREVIATIONS, ACRONYMS, LOWERCASE_WORDS };
|