@indodev/toolkit 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,284 @@
1
+ export { C as CompareOptions, E as ExtractOptions, j as SanitizeOptions, S as SlugifyOptions, T as TitleCaseOptions, k as TruncateOptions, c as capitalize, h as compareStrings, d as contractAbbreviation, e as expandAbbreviation, g as extractWords, n as normalizeWhitespace, r as removeAccents, b as sanitize, i as similarity, s as slugify, a as toSentenceCase, t as toTitleCase, f as truncate } from '../compare-B1MKSOWV.js';
2
+
3
+ /**
4
+ * ============================================================================
5
+ * INDONESIAN TEXT UTILITIES - CONSTANTS
6
+ * ============================================================================
7
+ *
8
+ * This file contains constants for Indonesian and English text processing:
9
+ * - LOWERCASE_WORDS: Particles that stay lowercase in title case
10
+ * - ACRONYMS: Abbreviations that stay UPPERCASE in title case
11
+ * - ABBREVIATIONS: Full expansions of common Indonesian abbreviations
12
+ *
13
+ * ============================================================================
14
+ * MAINTENANCE GUIDE
15
+ * ============================================================================
16
+ *
17
+ * ## How to Add New Entries
18
+ *
19
+ * ### 1. LOWERCASE_WORDS (Particles)
20
+ *
21
+ * Add words that should remain lowercase in title case (except when first word).
22
+ *
23
+ * **Indonesian Grammar Rules (PUEBI):**
24
+ * - Prepositions: di, ke, dari, untuk, dengan, pada, dalam, etc.
25
+ * - Conjunctions: dan, atau, tetapi, serta, maupun, etc.
26
+ * - Articles/particles: yang, sebagai, adalah, akan, telah, etc.
27
+ *
28
+ * **English Grammar Rules (Chicago Manual of Style):**
29
+ * - Articles: a, an, the
30
+ * - Conjunctions: and, or, but, nor, for, yet, so
31
+ * - Short prepositions (<5 letters): at, by, in, of, on, to, up, etc.
32
+ *
33
+ * **Example Addition:**
34
+ * ```typescript
35
+ * export const LOWERCASE_WORDS = [
36
+ * // ... existing entries
37
+ * 'bagi', // Indonesian: for/to (preposition)
38
+ * 'antara', // Indonesian: between (preposition)
39
+ * 'into', // English: preposition
40
+ * ] as const;
41
+ * ```
42
+ *
43
+ * **Testing:** Add test case in `toTitleCase.test.ts`:
44
+ * ```typescript
45
+ * it('keeps "bagi" lowercase in middle', () => {
46
+ * expect(toTitleCase('buku bagi pemula')).toBe('Buku bagi Pemula');
47
+ * });
48
+ * ```
49
+ *
50
+ * ### 2. ACRONYMS (Always Uppercase)
51
+ *
52
+ * Add abbreviations that should always appear in UPPERCASE.
53
+ *
54
+ * **Categories:**
55
+ * - Government & Military: TNI, POLRI, KPK, DPR, etc.
56
+ * - Business Entities: PT, CV, BUMN, etc.
57
+ * - Banks: BCA, BRI, BNI, etc.
58
+ * - Services: BPJS, PLN, KTP, SIM, etc.
59
+ * - Technology: IT, AI, API, SEO, etc.
60
+ * - Education: UI, ITB, UGM, etc.
61
+ * - International: UN, WHO, NATO, ASEAN, etc.
62
+ *
63
+ * **Validation Checklist:**
64
+ * ✅ Is it commonly written in ALL CAPS?
65
+ * ✅ Is it an official acronym (not just shortened word)?
66
+ * ✅ Will it look wrong if title-cased (e.g., "Pt" instead of "PT")?
67
+ *
68
+ * **Example Addition:**
69
+ * ```typescript
70
+ * export const ACRONYMS = [
71
+ * // ... existing entries
72
+ * 'OJK', // Otoritas Jasa Keuangan
73
+ * 'BI', // Bank Indonesia
74
+ * 'NASA', // National Aeronautics and Space Administration
75
+ * ] as const;
76
+ * ```
77
+ *
78
+ * **Testing:** Add test case in `toTitleCase.test.ts`:
79
+ * ```typescript
80
+ * it('preserves OJK uppercase', () => {
81
+ * expect(toTitleCase('ojk indonesia')).toBe('OJK Indonesia');
82
+ * });
83
+ * ```
84
+ *
85
+ * ### 3. ABBREVIATIONS (Expansion Mapping)
86
+ *
87
+ * Add abbreviation → full form mappings for `expandAbbreviation()` function.
88
+ *
89
+ * **Categories (use comment headers):**
90
+ * - Address: Jl., Gg., Kec., Kab., etc.
91
+ * - Academic Titles: Dr., Ir., Prof., S.H., M.M., etc.
92
+ * - Honorifics: Bpk., Yth., H., Hj., etc.
93
+ * - Organizations: PT., CV., UD., etc.
94
+ * - Common: dst., dll., a.n., etc.
95
+ * - Contact Info: Tlp., HP., Fax, etc.
96
+ * - Days/Months: Sen., Jan., Feb., etc.
97
+ * - Units: kg., km., lt., etc.
98
+ *
99
+ * **Key Format Rules:**
100
+ * - Include period if commonly written: `'Jl.'` not `'Jl'`
101
+ * - Use proper capitalization: `'Jalan'` not `'jalan'`
102
+ * - Keep it concise: Full form only, no explanations
103
+ *
104
+ * **Example Addition:**
105
+ * ```typescript
106
+ * export const ABBREVIATIONS: Record<string, string> = {
107
+ * // ... existing entries
108
+ *
109
+ * // ========== New Category Example ==========
110
+ * 'Apt.': 'Apartemen',
111
+ * 'Ruko': 'Rumah Toko',
112
+ * 'Rukan': 'Rumah Kantor',
113
+ * };
114
+ * ```
115
+ *
116
+ * **Testing:** Add test case in `abbreviation.test.ts`:
117
+ * ```typescript
118
+ * it('expands Apt. to Apartemen', () => {
119
+ * expect(expandAbbreviation('Apt. Sudirman'))
120
+ * .toBe('Apartemen Sudirman');
121
+ * });
122
+ * ```
123
+ *
124
+ * ============================================================================
125
+ * DATA SOURCES & REFERENCES
126
+ * ============================================================================
127
+ *
128
+ * When adding new entries, refer to these authoritative sources:
129
+ *
130
+ * **Indonesian Language:**
131
+ * - PUEBI (Pedoman Umum Ejaan Bahasa Indonesia)
132
+ * https://puebi.js.org/
133
+ *
134
+ * - KBBI (Kamus Besar Bahasa Indonesia)
135
+ * https://kbbi.kemdikbud.go.id/
136
+ *
137
+ * - Wikipedia Indonesia - Daftar Singkatan
138
+ * https://id.wikipedia.org/wiki/Daftar_singkatan_di_Indonesia
139
+ *
140
+ * **English Language:**
141
+ * - Chicago Manual of Style (Title Case Rules)
142
+ * https://www.chicagomanualofstyle.org/
143
+ *
144
+ * - AP Stylebook
145
+ * https://www.apstylebook.com/
146
+ *
147
+ * **Government & Official:**
148
+ * - Kemendagri (addresses, administrative divisions)
149
+ * https://www.kemendagri.go.id/
150
+ *
151
+ * - Kemenkumham (business entities)
152
+ * https://www.kemenkumham.go.id/
153
+ *
154
+ * - Kemendikbud (education, degrees)
155
+ * https://www.kemdikbud.go.id/
156
+ *
157
+ * ============================================================================
158
+ * CONTRIBUTION GUIDELINES
159
+ * ============================================================================
160
+ *
161
+ * **Before Adding:**
162
+ * 1. ✅ Check if entry already exists (Ctrl+F)
163
+ * 2. ✅ Verify spelling from official sources
164
+ * 3. ✅ Ensure it's commonly used (not obscure)
165
+ * 4. ✅ Choose correct category/section
166
+ *
167
+ * **After Adding:**
168
+ * 1. ✅ Add corresponding test case
169
+ * 2. ✅ Run tests: `npm test constants`
170
+ * 3. ✅ Update this file's documentation if needed
171
+ * 4. ✅ Add source reference in PR description
172
+ *
173
+ * **PR Template:**
174
+ * ```
175
+ * ### Added Constants
176
+ *
177
+ * **Type:** [LOWERCASE_WORDS | ACRONYMS | ABBREVIATIONS]
178
+ *
179
+ * **Entries:**
180
+ * - `OJK` - Otoritas Jasa Keuangan
181
+ * - `BI` - Bank Indonesia
182
+ *
183
+ * **Source:** https://www.ojk.go.id/
184
+ *
185
+ * **Test Coverage:** ✅ Added in toTitleCase.test.ts line 245
186
+ *
187
+ * **Rationale:**
188
+ * Commonly used financial regulatory bodies in Indonesian context.
189
+ * ```
190
+ *
191
+ * ============================================================================
192
+ * COMMON PITFALLS TO AVOID
193
+ * ============================================================================
194
+ *
195
+ * ❌ **Don't add brand-specific styling** (e.g., "iPhone" → keep user control)
196
+ * ❌ **Don't add regional dialects** (stick to standard Indonesian/English)
197
+ * ❌ **Don't add context-dependent acronyms** (e.g., "UI" = both User Interface & Universitas Indonesia)
198
+ * ❌ **Don't add very rare/obscure terms** (focus on common usage)
199
+ * ❌ **Don't forget the period** in ABBREVIATIONS (e.g., use `'Dr.'` not `'Dr'`)
200
+ * ❌ **Don't mix singular/plural** in ABBREVIATIONS (choose one consistently)
201
+ *
202
+ * ✅ **Do keep entries alphabetically sorted** within categories
203
+ * ✅ **Do use proper capitalization** in expanded forms
204
+ * ✅ **Do add comments** for non-obvious entries
205
+ * ✅ **Do verify against official sources** before adding
206
+ * ✅ **Do write test cases** for new additions
207
+ *
208
+ * ============================================================================
209
+ * FUTURE EXTENSIBILITY
210
+ * ============================================================================
211
+ *
212
+ * **Planned Enhancements:**
213
+ *
214
+ * 1. **External Data Source Support:**
215
+ * ```typescript
216
+ * import customAcronyms from './data/custom-acronyms.json';
217
+ * export const ACRONYMS = [...DEFAULT_ACRONYMS, ...customAcronyms];
218
+ * ```
219
+ *
220
+ * 2. **Context-Aware Acronyms:**
221
+ * ```typescript
222
+ * export const CONTEXT_ACRONYMS = {
223
+ * 'UI': {
224
+ * tech: 'UI', // User Interface
225
+ * education: 'UI', // Universitas Indonesia
226
+ * }
227
+ * };
228
+ * ```
229
+ *
230
+ * 3. **Locale-Specific Sets:**
231
+ * ```typescript
232
+ * export const LOWERCASE_WORDS = {
233
+ * id: [...], // Indonesian
234
+ * en: [...], // English
235
+ * mixed: [...], // Combined (default)
236
+ * };
237
+ * ```
238
+ *
239
+ * 4. **Dynamic Loading:**
240
+ * ```typescript
241
+ * // Load additional acronyms from user config
242
+ * export async function loadCustomConstants(url: string) {
243
+ * const data = await fetch(url).then(r => r.json());
244
+ * return [...ACRONYMS, ...data.acronyms];
245
+ * }
246
+ * ```
247
+ *
248
+ * ============================================================================
249
+ * VERSIONING & CHANGELOG
250
+ * ============================================================================
251
+ *
252
+ * Track major additions here:
253
+ *
254
+ * - v0.2.0 (2024-12-18): Initial comprehensive dataset
255
+ * - 50+ Indonesian particles
256
+ * - 150+ acronyms (Indonesian + International)
257
+ * - 80+ abbreviation mappings
258
+ *
259
+ * - v0.2.1 (TBD): Add financial sector acronyms (OJK, BI, etc.)
260
+ * - v0.2.2 (TBD): Add technology company acronyms
261
+ *
262
+ * ============================================================================
263
+ */
264
+ /**
265
+ * Indonesian and English lowercase particles
266
+ * These words remain lowercase in title case (except when first word)
267
+ *
268
+ * Based on:
269
+ * - Indonesian grammar (PUEBI)
270
+ * - English title case rules (Chicago Manual of Style)
271
+ */
272
+ declare const LOWERCASE_WORDS: readonly ["di", "ke", "dari", "pada", "dalam", "untuk", "dengan", "oleh", "kepada", "terhadap", "tentang", "tanpa", "hingga", "sampai", "sejak", "menuju", "melalui", "dan", "atau", "tetapi", "namun", "serta", "maupun", "melainkan", "sedangkan", "yang", "sebagai", "adalah", "ialah", "yaitu", "bahwa", "akan", "telah", "sudah", "belum", "a", "an", "the", "and", "or", "but", "nor", "for", "yet", "so", "as", "at", "by", "in", "of", "on", "to", "up", "via", "per", "off", "out"];
273
+ /**
274
+ * Indonesian and international acronyms
275
+ * These always remain UPPERCASE in title case
276
+ */
277
+ declare const ACRONYMS: readonly ["DKI", "DIY", "TNI", "POLRI", "ABRI", "MPR", "DPR", "KPK", "BIN", "PT", "CV", "UD", "PD", "Tbk", "BUMN", "BUMD", "BCA", "BRI", "BNI", "BTN", "BSI", "BPD", "KTP", "NIK", "NPWP", "SIM", "STNK", "BPJS", "KIS", "KIP", "PKH", "PLN", "PDAM", "PGN", "KAI", "MRT", "LRT", "PBB", "PPh", "PPN", "BPHTB", "UI", "ITB", "UGM", "IPB", "ITS", "UNPAD", "UNDIP", "UNAIR", "UNS", "S.Pd", "S.H", "S.E", "S.T", "S.Kom", "S.Si", "S.Sos", "M.Pd", "M.M", "M.T", "M.Kom", "ATM", "POS", "SMS", "GPS", "WiFi", "USB", "PIN", "OTP", "QR", "IT", "AI", "ML", "API", "UI", "UX", "SEO", "SaaS", "CRM", "ERP", "CEO", "CFO", "CTO", "COO", "CMO", "HR", "PR", "VP", "GM", "UN", "WHO", "UNESCO", "NATO", "ASEAN", "APEC", "WTO", "IMF", "ICU", "ER", "MRI", "CT", "DNA", "RNA", "HIV", "AIDS", "COVID", "KM", "CM", "MM", "KG", "RPM", "MPH", "KPH", "IPO", "ATM", "ROI", "GDP", "VAT"];
278
+ /**
279
+ * Indonesian abbreviations mapping
280
+ * Organized by category for maintainability
281
+ */
282
+ declare const ABBREVIATIONS: Record<string, string>;
283
+
284
+ export { ABBREVIATIONS, ACRONYMS, LOWERCASE_WORDS };