georgian-hyphenation 2.2.2 → 2.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +311 -53
- package/package.json +24 -29
- package/src/javascript/index.js +60 -22
package/README.md
CHANGED
|
@@ -3,138 +3,396 @@
|
|
|
3
3
|
[](https://www.npmjs.com/package/georgian-hyphenation)
|
|
4
4
|
[](https://opensource.org/licenses/MIT)
|
|
5
5
|
[](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules)
|
|
6
|
+
[](https://www.npmjs.com/package/georgian-hyphenation)
|
|
6
7
|
|
|
7
|
-
**Version 2.2.
|
|
8
|
+
**Version 2.2.4** - Browser + Node.js Compatible with Dictionary Support
|
|
8
9
|
|
|
9
|
-
ქართული ენის სრული დამარცვლის ბიბლიოთეკა. ვერსია 2.2.
|
|
10
|
+
ქართული ენის სრული დამარცვლის ბიბლიოთეკა. ვერსია 2.2.4 სრულად თავსებადია როგორც Browser, ისე Node.js გარემოსთან.
|
|
10
11
|
|
|
11
12
|
---
|
|
12
13
|
|
|
13
|
-
## ✨ New in v2.2.
|
|
14
|
+
## ✨ New in v2.2.4
|
|
14
15
|
|
|
15
|
-
-
|
|
16
|
-
-
|
|
17
|
-
-
|
|
18
|
-
-
|
|
16
|
+
- 🌐 **Full Browser Support**: CDN URL fixed for reliable dictionary loading in browsers
|
|
17
|
+
- 📦 **NPM Package Files**: Added `data/` folder to published package
|
|
18
|
+
- 🔧 **Improved Error Handling**: Better fallback when dictionary is unavailable
|
|
19
|
+
- 📝 **Documentation**: Corrected examples (removed non-existent Georgian words)
|
|
19
20
|
|
|
20
21
|
---
|
|
21
22
|
|
|
22
|
-
##
|
|
23
|
+
## ✨ Features from v2.2.2
|
|
24
|
+
|
|
25
|
+
- 🧹 **Automatic Sanitization**: Strips existing soft-hyphens before processing to prevent double-hyphenation
|
|
26
|
+
- 📚 **Dictionary Support**: 150+ exception words for edge cases
|
|
27
|
+
- ⚡ **Performance Boost**: Harmonic cluster lookups optimized using `Set` (O(1) complexity)
|
|
28
|
+
- 📦 **Modern ESM Support**: Native `import/export` syntax
|
|
29
|
+
- 🎯 **Hybrid Engine**: Dictionary-first, Algorithm fallback
|
|
30
|
+
|
|
31
|
+
---
|
|
23
32
|
|
|
33
|
+
## 📦 Installation
|
|
24
34
|
```bash
|
|
25
35
|
npm install georgian-hyphenation
|
|
26
|
-
|
|
27
36
|
```
|
|
28
37
|
|
|
29
38
|
---
|
|
30
39
|
|
|
31
|
-
##
|
|
40
|
+
## 🚀 Quick Start
|
|
41
|
+
|
|
42
|
+
### Browser (CDN)
|
|
43
|
+
```html
|
|
44
|
+
<!DOCTYPE html>
|
|
45
|
+
<html>
|
|
46
|
+
<head>
|
|
47
|
+
<meta charset="UTF-8">
|
|
48
|
+
<style>
|
|
49
|
+
.hyphenated {
|
|
50
|
+
hyphens: manual;
|
|
51
|
+
-webkit-hyphens: manual;
|
|
52
|
+
text-align: justify;
|
|
53
|
+
}
|
|
54
|
+
</style>
|
|
55
|
+
</head>
|
|
56
|
+
<body>
|
|
57
|
+
<div class="hyphenated" id="content"></div>
|
|
58
|
+
|
|
59
|
+
<script type="module">
|
|
60
|
+
import GeorgianHyphenator from 'https://cdn.jsdelivr.net/npm/georgian-hyphenation@2.2.4/src/javascript/index.js';
|
|
61
|
+
|
|
62
|
+
async function initialize() {
|
|
63
|
+
const hyphenator = new GeorgianHyphenator('\u00AD'); // Soft hyphen
|
|
64
|
+
|
|
65
|
+
// Load dictionary (optional, but recommended)
|
|
66
|
+
await hyphenator.loadDefaultLibrary();
|
|
67
|
+
|
|
68
|
+
const text = "საქართველო არის ძალიან ლამაზი ქვეყანა, სადაც ბევრი ისტორიული ძეგლია.";
|
|
69
|
+
document.getElementById('content').textContent = hyphenator.hyphenateText(text);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
initialize();
|
|
73
|
+
</script>
|
|
74
|
+
</body>
|
|
75
|
+
</html>
|
|
76
|
+
```
|
|
32
77
|
|
|
33
|
-
|
|
78
|
+
---
|
|
34
79
|
|
|
80
|
+
### Node.js (ESM)
|
|
35
81
|
```javascript
|
|
36
82
|
import GeorgianHyphenator from 'georgian-hyphenation';
|
|
37
83
|
|
|
38
|
-
const hyphenator = new GeorgianHyphenator('-'); //
|
|
84
|
+
const hyphenator = new GeorgianHyphenator('-'); // Visible hyphen
|
|
39
85
|
|
|
40
|
-
//
|
|
86
|
+
// Hyphenate a word
|
|
41
87
|
console.log(hyphenator.hyphenate('საქართველო'));
|
|
42
88
|
// Output: "სა-ქარ-თვე-ლო"
|
|
43
89
|
|
|
44
|
-
//
|
|
45
|
-
|
|
46
|
-
const messyWord = 'სა-ქარ-თვე-ლო';
|
|
47
|
-
console.log(hyphenator.hyphenate(messyWord));
|
|
48
|
-
// Output: "სა-ქარ-თვე-ლო" (Correctly re-processed)
|
|
90
|
+
// Load dictionary (optional)
|
|
91
|
+
await hyphenator.loadDefaultLibrary();
|
|
49
92
|
|
|
93
|
+
// Hyphenate text
|
|
94
|
+
const text = "გამარჯობა, საქართველო მშვენიერი ქვეყანაა!";
|
|
95
|
+
console.log(hyphenator.hyphenateText(text));
|
|
96
|
+
// Output: "გა-მარ-ჯო-ბა, სა-ქარ-თვე-ლო მშვე-ნი-ე-რი ქვე-ყა-ნა-ა!"
|
|
50
97
|
```
|
|
51
98
|
|
|
52
|
-
|
|
99
|
+
---
|
|
53
100
|
|
|
101
|
+
### Node.js (CommonJS)
|
|
54
102
|
```javascript
|
|
55
|
-
|
|
56
|
-
await hyphenator.loadDefaultLibrary();
|
|
103
|
+
const GeorgianHyphenator = require('georgian-hyphenation');
|
|
57
104
|
|
|
58
|
-
|
|
105
|
+
const hyphenator = new GeorgianHyphenator('-');
|
|
106
|
+
console.log(hyphenator.hyphenate('საქართველო'));
|
|
107
|
+
```
|
|
59
108
|
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## 📖 API Reference
|
|
112
|
+
|
|
113
|
+
### **Constructor**
|
|
114
|
+
```javascript
|
|
115
|
+
new GeorgianHyphenator(hyphenChar = '\u00AD')
|
|
60
116
|
```
|
|
61
117
|
|
|
62
|
-
|
|
118
|
+
**Parameters:**
|
|
119
|
+
- `hyphenChar` (string): Character to use for hyphenation
|
|
120
|
+
- `'\u00AD'` - Soft hyphen (invisible, default)
|
|
121
|
+
- `'-'` - Regular hyphen (visible)
|
|
122
|
+
- `'·'` - Middle dot
|
|
123
|
+
- Any custom character
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
### **Methods**
|
|
63
128
|
|
|
129
|
+
#### `hyphenate(word)`
|
|
130
|
+
|
|
131
|
+
Hyphenates a single Georgian word.
|
|
132
|
+
|
|
133
|
+
**Features:**
|
|
134
|
+
- Automatically strips existing hyphens (sanitization)
|
|
135
|
+
- Checks dictionary first (if loaded)
|
|
136
|
+
- Falls back to algorithm
|
|
64
137
|
```javascript
|
|
65
|
-
|
|
66
|
-
|
|
138
|
+
hyphenator.hyphenate('საქართველო'); // → "სა-ქარ-თვე-ლო"
|
|
139
|
+
hyphenator.hyphenate('ბლოკი'); // → "ბლო-კი" (harmonic cluster)
|
|
140
|
+
```
|
|
67
141
|
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
#### `hyphenateText(text)`
|
|
145
|
+
|
|
146
|
+
Hyphenates entire text while preserving:
|
|
147
|
+
- Punctuation
|
|
148
|
+
- Numbers
|
|
149
|
+
- Latin characters
|
|
150
|
+
- Whitespace
|
|
151
|
+
```javascript
|
|
152
|
+
const text = "საქართველო არის ლამაზი ქვეყანა.";
|
|
153
|
+
hyphenator.hyphenateText(text);
|
|
154
|
+
// → "სა-ქარ-თვე-ლო არის ლა-მა-ზი ქვე-ყა-ნა."
|
|
68
155
|
```
|
|
69
156
|
|
|
70
157
|
---
|
|
71
158
|
|
|
72
|
-
|
|
159
|
+
#### `getSyllables(word)`
|
|
160
|
+
|
|
161
|
+
Returns syllables as an array.
|
|
162
|
+
```javascript
|
|
163
|
+
hyphenator.getSyllables('საქართველო');
|
|
164
|
+
// → ['სა', 'ქარ', 'თვე', 'ლო']
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
#### `loadDefaultLibrary()` (Async)
|
|
170
|
+
|
|
171
|
+
Loads the default exception dictionary (150+ words).
|
|
172
|
+
|
|
173
|
+
**Browser:** Fetches from CDN (`jsdelivr`)
|
|
174
|
+
**Node.js:** Loads from local `data/exceptions.json`
|
|
175
|
+
```javascript
|
|
176
|
+
await hyphenator.loadDefaultLibrary();
|
|
177
|
+
console.log('Dictionary loaded!');
|
|
178
|
+
```
|
|
73
179
|
|
|
74
|
-
|
|
180
|
+
---
|
|
75
181
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
182
|
+
#### `loadLibrary(data)`
|
|
183
|
+
|
|
184
|
+
Load custom dictionary.
|
|
185
|
+
```javascript
|
|
186
|
+
hyphenator.loadLibrary({
|
|
187
|
+
'სპეციალური': 'სპე-ცი-ა-ლუ-რი',
|
|
188
|
+
'კომპიუტერი': 'კომ-პიუ-ტე-რი'
|
|
189
|
+
});
|
|
190
|
+
```
|
|
80
191
|
|
|
81
192
|
---
|
|
82
193
|
|
|
83
|
-
##
|
|
194
|
+
## 🧠 Algorithm Logic
|
|
195
|
+
|
|
196
|
+
The v2.2 algorithm uses **phonological distance analysis** with these rules:
|
|
197
|
+
|
|
198
|
+
### 1. **Vowel Distance Analysis**
|
|
199
|
+
```
|
|
200
|
+
საქართველო → vowels at: [1, 3, 5, 7]
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### 2. **Consonant Cluster Rules**
|
|
204
|
+
|
|
205
|
+
- **V-V (0 consonants)**: Split between vowels
|
|
206
|
+
```javascript
|
|
207
|
+
'გააკეთა' → 'გა-ა-კე-თა'
|
|
208
|
+
```
|
|
84
209
|
|
|
85
|
-
|
|
210
|
+
- **V-C-V (1 consonant)**: Split after first vowel
|
|
211
|
+
```javascript
|
|
212
|
+
'მამა' → 'მა-მა'
|
|
213
|
+
```
|
|
86
214
|
|
|
87
|
-
|
|
215
|
+
- **V-CC-V (2+ consonants)**:
|
|
216
|
+
1. Check for double consonants (gemination) - rare in Georgian
|
|
217
|
+
2. Check for harmonic clusters (ბლ, გლ, კრ, etc.) - keep together
|
|
218
|
+
3. Default: split after first consonant
|
|
88
219
|
|
|
89
|
-
###
|
|
220
|
+
### 3. **Harmonic Clusters (62 clusters)**
|
|
221
|
+
|
|
222
|
+
These consonant pairs stay together:
|
|
223
|
+
```
|
|
224
|
+
ბლ, ბრ, ბღ, ბზ, გდ, გლ, გმ, გნ, გვ, გზ, გრ, დრ, თლ, თრ, თღ,
|
|
225
|
+
კლ, კმ, კნ, კრ, კვ, მტ, პლ, პრ, ჟღ, რგ, რლ, რმ, სწ, სხ, ტკ,
|
|
226
|
+
ტპ, ტრ, ფლ, ფრ, ფქ, ფშ, ქლ, ქნ, ქვ, ქრ, ღლ, ღრ, ყლ, ყრ, შთ,
|
|
227
|
+
შპ, ჩქ, ჩრ, ცლ, ცნ, ცრ, ცვ, ძგ, ძვ, ძღ, წლ, წრ, წნ, წკ, ჭკ,
|
|
228
|
+
ჭრ, ჭყ, ხლ, ხმ, ხნ, ხვ, ჯგ
|
|
229
|
+
```
|
|
90
230
|
|
|
91
|
-
|
|
231
|
+
### 4. **Anti-Orphan Protection**
|
|
92
232
|
|
|
93
|
-
|
|
233
|
+
Minimum 2 characters on each side:
|
|
234
|
+
```javascript
|
|
235
|
+
'არა' → 'არა' // Not split (would create 1-letter syllable)
|
|
236
|
+
'არაა' → 'ა-რა-ა' // OK to split
|
|
237
|
+
```
|
|
94
238
|
|
|
95
|
-
|
|
239
|
+
---
|
|
96
240
|
|
|
97
|
-
|
|
241
|
+
## 🎨 Examples
|
|
98
242
|
|
|
99
|
-
|
|
243
|
+
### Basic Words
|
|
244
|
+
```javascript
|
|
245
|
+
hyphenate('საქართველო') // → სა-ქარ-თვე-ლო
|
|
246
|
+
hyphenate('მთავრობა') // → მთავ-რო-ბა
|
|
247
|
+
hyphenate('დედაქალაქი') // → დე-და-ქა-ლა-ქი
|
|
248
|
+
hyphenate('პარლამენტი') // → პარ-ლა-მენ-ტი
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### Harmonic Clusters
|
|
252
|
+
```javascript
|
|
253
|
+
hyphenate('ბლოკი') // → ბლო-კი (ბლ stays together)
|
|
254
|
+
hyphenate('კრემი') // → კრე-მი (კრ stays together)
|
|
255
|
+
hyphenate('გლეხი') // → გლე-ხი (გლ stays together)
|
|
256
|
+
hyphenate('პროგრამა') // → პროგ-რა-მა (პრ and გრ preserved)
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### V-V Split
|
|
260
|
+
```javascript
|
|
261
|
+
hyphenate('გააკეთა') // → გა-ა-კე-თა
|
|
262
|
+
hyphenate('გაიარა') // → გა-ი-ა-რა
|
|
263
|
+
hyphenate('გაანალიზა') // → გა-ა-ნა-ლი-ზა
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
### Text Processing
|
|
267
|
+
```javascript
|
|
268
|
+
hyphenateText('საქართველო არის ლამაზი ქვეყანა')
|
|
269
|
+
// → 'საქართველო არის ლამაზი ქვეყანა' (with soft hyphens)
|
|
270
|
+
|
|
271
|
+
// Preserves punctuation
|
|
272
|
+
hyphenateText('მთავრობა, პარლამენტი და სასამართლო.')
|
|
273
|
+
// → 'მთავრობა, პარლამენტი და სასამართლო.'
|
|
274
|
+
```
|
|
100
275
|
|
|
101
276
|
---
|
|
102
277
|
|
|
103
278
|
## 🧪 Testing
|
|
104
279
|
|
|
105
|
-
|
|
106
|
-
|
|
280
|
+
Run the test suite:
|
|
107
281
|
```bash
|
|
108
282
|
npm test
|
|
283
|
+
```
|
|
109
284
|
|
|
285
|
+
Expected output:
|
|
286
|
+
```
|
|
287
|
+
✅ Test 1: საქართველო → სა-ქარ-თვე-ლო
|
|
288
|
+
✅ Test 2: მთავრობა → მთავ-რო-ბა
|
|
289
|
+
...
|
|
290
|
+
📊 Test Results: 13 passed, 0 failed
|
|
291
|
+
🎉 All tests passed!
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
296
|
+
## 📊 Dictionary
|
|
297
|
+
|
|
298
|
+
The library includes `data/exceptions.json` with 150+ Georgian words:
|
|
299
|
+
```json
|
|
300
|
+
{
|
|
301
|
+
"საქართველო": "სა-ქარ-თვე-ლო",
|
|
302
|
+
"კომპიუტერი": "კომ-პიუ-ტე-რი",
|
|
303
|
+
"პროგრამა": "პროგ-რა-მა",
|
|
304
|
+
"ინტერნეტი": "ინ-ტერ-ნე-ტი"
|
|
305
|
+
}
|
|
110
306
|
```
|
|
111
307
|
|
|
112
308
|
---
|
|
113
309
|
|
|
114
310
|
## 📝 Changelog
|
|
115
311
|
|
|
116
|
-
### Version 2.2.
|
|
312
|
+
### Version 2.2.4 (2026-01-27)
|
|
313
|
+
|
|
314
|
+
* 🌐 **Browser Fix**: Fixed CDN URL for reliable dictionary loading
|
|
315
|
+
* 📦 **NPM Files**: Added `data/` folder to published package (`files` whitelist)
|
|
316
|
+
* 🔧 **Error Handling**: Improved fallback when dictionary unavailable
|
|
317
|
+
* 📝 **Documentation**: Corrected examples, removed non-existent words
|
|
318
|
+
|
|
319
|
+
### Version 2.2.1 (2026-01-26)
|
|
320
|
+
|
|
321
|
+
* 🧹 **Sanitization**: Added `_stripHyphens` for automatic input cleaning
|
|
322
|
+
* ⚡ **Performance**: Converted `harmonicClusters` to `Set` (O(1) lookup)
|
|
323
|
+
* 📦 **ESM**: Full ES Modules support
|
|
324
|
+
* 📚 **Dictionary**: Added `loadDefaultLibrary()` method
|
|
325
|
+
|
|
326
|
+
### Version 2.0.1 (2026-01-22)
|
|
327
|
+
|
|
328
|
+
* 🎓 **Academic Rewrite**: Phonological distance analysis
|
|
329
|
+
* 🛡️ **Anti-Orphan**: Minimum 2 characters on each side
|
|
330
|
+
* 🎼 **Harmonic Clusters**: Georgian-specific consonant groups
|
|
331
|
+
|
|
332
|
+
---
|
|
333
|
+
|
|
334
|
+
## 🤝 Contributing
|
|
335
|
+
|
|
336
|
+
Contributions welcome! Please:
|
|
337
|
+
|
|
338
|
+
1. Fork the repository
|
|
339
|
+
2. Create a feature branch
|
|
340
|
+
3. Run tests: `npm test`
|
|
341
|
+
4. Submit a Pull Request
|
|
117
342
|
|
|
118
|
-
|
|
119
|
-
* Converted `harmonicClusters` to `Set` for high-performance processing.
|
|
120
|
-
* Switched to **ES Modules (ESM)** as default.
|
|
121
|
-
* Added `loadDefaultLibrary` for browser/node dictionary fetching.
|
|
343
|
+
---
|
|
122
344
|
|
|
123
|
-
|
|
345
|
+
## 🐛 Bug Reports
|
|
124
346
|
|
|
125
|
-
|
|
126
|
-
* Phonological distance analysis.
|
|
347
|
+
Found a bug? [Open an issue](https://github.com/guramzhgamadze/georgian-hyphenation/issues)
|
|
127
348
|
|
|
128
349
|
---
|
|
129
350
|
|
|
130
351
|
## 📄 License
|
|
131
352
|
|
|
132
|
-
MIT License
|
|
353
|
+
MIT License
|
|
354
|
+
|
|
355
|
+
Copyright (c) 2025 Guram Zhgamadze
|
|
356
|
+
|
|
357
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
358
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
359
|
+
in the Software without restriction, including without limitation the rights
|
|
360
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
361
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
362
|
+
furnished to do so, subject to the following conditions:
|
|
363
|
+
|
|
364
|
+
The above copyright notice and this permission notice shall be included in all
|
|
365
|
+
copies or substantial portions of the Software.
|
|
366
|
+
|
|
367
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
368
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
369
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
370
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
371
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
372
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
373
|
+
SOFTWARE.
|
|
133
374
|
|
|
134
375
|
---
|
|
135
376
|
|
|
136
377
|
## 📧 Contact
|
|
137
378
|
|
|
138
|
-
**Guram Zhgamadze**
|
|
379
|
+
**Guram Zhgamadze**
|
|
380
|
+
|
|
381
|
+
- 🐙 GitHub: [@guramzhgamadze](https://github.com/guramzhgamadze)
|
|
382
|
+
- 📧 Email: guramzhgamadze@gmail.com
|
|
383
|
+
- 📦 NPM: [georgian-hyphenation](https://www.npmjs.com/package/georgian-hyphenation)
|
|
384
|
+
|
|
385
|
+
---
|
|
386
|
+
|
|
387
|
+
## 🔗 Links
|
|
388
|
+
|
|
389
|
+
- **NPM Package:** https://www.npmjs.com/package/georgian-hyphenation
|
|
390
|
+
- **GitHub Repository:** https://github.com/guramzhgamadze/georgian-hyphenation
|
|
391
|
+
- **Demo:** https://guramzhgamadze.github.io/georgian-hyphenation/
|
|
392
|
+
- **PyPI (Python):** https://pypi.org/project/georgian-hyphenation/
|
|
393
|
+
|
|
394
|
+
---
|
|
395
|
+
|
|
396
|
+
**Made with ❤️ for the Georgian language community**
|
|
139
397
|
|
|
140
|
-
|
|
398
|
+
🇬🇪 **ქართული ენის ციფრული განვითარებისთვის**
|
package/package.json
CHANGED
|
@@ -1,48 +1,43 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "georgian-hyphenation",
|
|
3
|
-
"version": "2.2.
|
|
4
|
-
"description": "Georgian Language Hyphenation Library
|
|
5
|
-
"type": "module",
|
|
3
|
+
"version": "2.2.4",
|
|
4
|
+
"description": "Georgian Language Hyphenation Library - Browser + Node.js compatible",
|
|
6
5
|
"main": "src/javascript/index.js",
|
|
7
|
-
"
|
|
6
|
+
"module": "src/javascript/index.js",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"import": "./src/javascript/index.js",
|
|
11
|
+
"require": "./src/javascript/index.js"
|
|
12
|
+
},
|
|
13
|
+
"./data/exceptions.json": "./data/exceptions.json"
|
|
14
|
+
},
|
|
8
15
|
"files": [
|
|
9
|
-
"src/javascript",
|
|
10
|
-
"data/
|
|
16
|
+
"src/javascript/",
|
|
17
|
+
"data/",
|
|
11
18
|
"README.md",
|
|
12
|
-
"LICENSE
|
|
19
|
+
"LICENSE"
|
|
13
20
|
],
|
|
14
|
-
"exports": {
|
|
15
|
-
".": "./src/javascript/index.js",
|
|
16
|
-
"./data/*": "./data/*"
|
|
17
|
-
},
|
|
18
21
|
"scripts": {
|
|
19
|
-
"test": "node
|
|
20
|
-
},
|
|
21
|
-
"repository": {
|
|
22
|
-
"type": "git",
|
|
23
|
-
"url": "git+https://github.com/guramzhgamadze/georgian-hyphenation.git"
|
|
22
|
+
"test": "node test_javascript.js"
|
|
24
23
|
},
|
|
25
24
|
"keywords": [
|
|
26
25
|
"georgian",
|
|
27
|
-
"kartuli",
|
|
28
|
-
"ქართული",
|
|
29
26
|
"hyphenation",
|
|
30
27
|
"syllabification",
|
|
28
|
+
"kartuli",
|
|
31
29
|
"nlp",
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"text-processing",
|
|
35
|
-
"i18n",
|
|
36
|
-
"localization",
|
|
37
|
-
"sanitization"
|
|
30
|
+
"browser",
|
|
31
|
+
"nodejs"
|
|
38
32
|
],
|
|
39
33
|
"author": "Guram Zhgamadze <guramzhgamadze@gmail.com>",
|
|
40
34
|
"license": "MIT",
|
|
41
|
-
"
|
|
42
|
-
"
|
|
35
|
+
"repository": {
|
|
36
|
+
"type": "git",
|
|
37
|
+
"url": "https://github.com/guramzhgamadze/georgian-hyphenation.git"
|
|
43
38
|
},
|
|
44
39
|
"homepage": "https://github.com/guramzhgamadze/georgian-hyphenation#readme",
|
|
45
|
-
"
|
|
46
|
-
"
|
|
40
|
+
"bugs": {
|
|
41
|
+
"url": "https://github.com/guramzhgamadze/georgian-hyphenation/issues"
|
|
47
42
|
}
|
|
48
|
-
}
|
|
43
|
+
}
|
package/src/javascript/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Georgian Hyphenation Library v2.2.
|
|
3
|
-
*
|
|
2
|
+
* Georgian Hyphenation Library v2.2.4
|
|
3
|
+
* Browser + Node.js Compatible
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
export default class GeorgianHyphenator {
|
|
@@ -21,6 +21,7 @@ export default class GeorgianHyphenator {
|
|
|
21
21
|
]);
|
|
22
22
|
|
|
23
23
|
this.dictionary = new Map();
|
|
24
|
+
this.dictionaryLoaded = false;
|
|
24
25
|
}
|
|
25
26
|
|
|
26
27
|
/**
|
|
@@ -28,12 +29,13 @@ export default class GeorgianHyphenator {
|
|
|
28
29
|
*/
|
|
29
30
|
_stripHyphens(text) {
|
|
30
31
|
if (!text) return '';
|
|
31
|
-
//
|
|
32
|
-
|
|
33
|
-
const regex = new RegExp(`[\u00AD${escapedChar}]`, 'g');
|
|
34
|
-
return text.replace(regex, '');
|
|
32
|
+
// Remove soft hyphens, regular hyphens, and custom hyphen char
|
|
33
|
+
return text.replace(/[\u00AD\-]/g, '').replace(new RegExp(this.hyphenChar, 'g'), '');
|
|
35
34
|
}
|
|
36
35
|
|
|
36
|
+
/**
|
|
37
|
+
* ტვირთავს მომხმარებლის dictionary-ს
|
|
38
|
+
*/
|
|
37
39
|
loadLibrary(data) {
|
|
38
40
|
if (data && typeof data === 'object') {
|
|
39
41
|
Object.entries(data).forEach(([word, hyphenated]) => {
|
|
@@ -42,43 +44,65 @@ export default class GeorgianHyphenator {
|
|
|
42
44
|
}
|
|
43
45
|
}
|
|
44
46
|
|
|
47
|
+
/**
|
|
48
|
+
* ✅ ტვირთავს default dictionary-ს (Browser + Node.js compatible)
|
|
49
|
+
*/
|
|
45
50
|
async loadDefaultLibrary() {
|
|
46
|
-
|
|
51
|
+
if (this.dictionaryLoaded) return;
|
|
52
|
+
|
|
53
|
+
// Browser Environment
|
|
47
54
|
if (typeof window !== 'undefined' && typeof fetch !== 'undefined') {
|
|
48
55
|
try {
|
|
49
|
-
|
|
50
|
-
|
|
56
|
+
// ✅ სწორი CDN URL - jsdelivr უფრო სანდოა unpkg-ზე
|
|
57
|
+
const response = await fetch('https://cdn.jsdelivr.net/npm/georgian-hyphenation@2.2.4/data/exceptions.json');
|
|
58
|
+
|
|
59
|
+
if (!response.ok) {
|
|
60
|
+
throw new Error(`HTTP ${response.status}`);
|
|
61
|
+
}
|
|
62
|
+
|
|
51
63
|
const data = await response.json();
|
|
52
64
|
this.loadLibrary(data);
|
|
65
|
+
this.dictionaryLoaded = true;
|
|
66
|
+
|
|
67
|
+
console.log(`Georgian Hyphenation v2.2.4: Dictionary loaded (${this.dictionary.size} words)`);
|
|
53
68
|
} catch (error) {
|
|
54
|
-
console.warn(
|
|
69
|
+
console.warn('Georgian Hyphenation v2.2.4: Dictionary not available, using algorithm only');
|
|
70
|
+
console.warn('Error:', error.message);
|
|
55
71
|
}
|
|
56
|
-
}
|
|
57
|
-
//
|
|
58
|
-
else if (typeof process !== 'undefined') {
|
|
72
|
+
}
|
|
73
|
+
// Node.js Environment (Dynamic Import)
|
|
74
|
+
else if (typeof process !== 'undefined' && typeof require !== 'undefined') {
|
|
59
75
|
try {
|
|
60
|
-
//
|
|
61
|
-
const
|
|
62
|
-
assert: { type: 'json' }
|
|
63
|
-
});
|
|
76
|
+
// Try CommonJS require first
|
|
77
|
+
const data = require('../../data/exceptions.json');
|
|
64
78
|
this.loadLibrary(data);
|
|
79
|
+
this.dictionaryLoaded = true;
|
|
80
|
+
console.log(`Georgian Hyphenation v2.2.4: Dictionary loaded (${this.dictionary.size} words)`);
|
|
65
81
|
} catch (error) {
|
|
66
|
-
console.warn(
|
|
82
|
+
console.warn('Georgian Hyphenation v2.2.4: Local dictionary not found, using algorithm only');
|
|
67
83
|
}
|
|
68
84
|
}
|
|
69
85
|
}
|
|
70
86
|
|
|
87
|
+
/**
|
|
88
|
+
* ამარცვლებს ერთ სიტყვას
|
|
89
|
+
*/
|
|
71
90
|
hyphenate(word) {
|
|
72
91
|
const sanitizedWord = this._stripHyphens(word);
|
|
73
92
|
const cleanWord = sanitizedWord.replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, "");
|
|
74
93
|
|
|
94
|
+
// Dictionary check
|
|
75
95
|
if (this.dictionary.has(cleanWord)) {
|
|
76
96
|
return this.dictionary.get(cleanWord).replace(/-/g, this.hyphenChar);
|
|
77
97
|
}
|
|
78
98
|
|
|
99
|
+
// Algorithm fallback
|
|
79
100
|
return this.applyAlgorithm(sanitizedWord);
|
|
80
101
|
}
|
|
81
102
|
|
|
103
|
+
/**
|
|
104
|
+
* ალგორითმის გამოყენება
|
|
105
|
+
*/
|
|
82
106
|
applyAlgorithm(word) {
|
|
83
107
|
if (word.length < (this.leftMin + this.rightMin)) return word;
|
|
84
108
|
|
|
@@ -101,6 +125,7 @@ export default class GeorgianHyphenator {
|
|
|
101
125
|
if (distance === 0 || distance === 1) {
|
|
102
126
|
candidatePos = v1 + 1;
|
|
103
127
|
} else {
|
|
128
|
+
// Gemination check
|
|
104
129
|
let doubleConsonantIndex = -1;
|
|
105
130
|
for (let j = 0; j < betweenSubstring.length - 1; j++) {
|
|
106
131
|
if (betweenSubstring[j] === betweenSubstring[j + 1]) {
|
|
@@ -112,6 +137,7 @@ export default class GeorgianHyphenator {
|
|
|
112
137
|
if (doubleConsonantIndex !== -1) {
|
|
113
138
|
candidatePos = v1 + 1 + doubleConsonantIndex + 1;
|
|
114
139
|
} else {
|
|
140
|
+
// Harmonic cluster check
|
|
115
141
|
let breakIndex = -1;
|
|
116
142
|
if (distance >= 2) {
|
|
117
143
|
const lastTwo = betweenSubstring.substring(distance - 2, distance);
|
|
@@ -123,6 +149,7 @@ export default class GeorgianHyphenator {
|
|
|
123
149
|
}
|
|
124
150
|
}
|
|
125
151
|
|
|
152
|
+
// Anti-orphan protection
|
|
126
153
|
if (candidatePos >= this.leftMin && (word.length - candidatePos) >= this.rightMin) {
|
|
127
154
|
insertPoints.push(candidatePos);
|
|
128
155
|
}
|
|
@@ -135,10 +162,16 @@ export default class GeorgianHyphenator {
|
|
|
135
162
|
return result.join('');
|
|
136
163
|
}
|
|
137
164
|
|
|
165
|
+
/**
|
|
166
|
+
* მარცვლების მიღება მასივის სახით
|
|
167
|
+
*/
|
|
138
168
|
getSyllables(word) {
|
|
139
169
|
return this.hyphenate(word).split(this.hyphenChar);
|
|
140
170
|
}
|
|
141
171
|
|
|
172
|
+
/**
|
|
173
|
+
* მთელი ტექსტის დამარცვლა
|
|
174
|
+
*/
|
|
142
175
|
hyphenateText(text) {
|
|
143
176
|
if (!text) return '';
|
|
144
177
|
const sanitizedText = this._stripHyphens(text);
|
|
@@ -153,15 +186,20 @@ export default class GeorgianHyphenator {
|
|
|
153
186
|
}
|
|
154
187
|
}
|
|
155
188
|
|
|
156
|
-
/**
|
|
189
|
+
/**
|
|
190
|
+
* ✅ კროს-პლატფორმული Export
|
|
157
191
|
*/
|
|
158
|
-
|
|
192
|
+
|
|
193
|
+
// 1. ES Module (Modern)
|
|
194
|
+
// export default GeorgianHyphenator; (already at top)
|
|
195
|
+
|
|
196
|
+
// 2. Browser Global (for <script> tag without type="module")
|
|
159
197
|
if (typeof window !== 'undefined') {
|
|
160
198
|
window.GeorgianHyphenator = GeorgianHyphenator;
|
|
161
199
|
}
|
|
162
200
|
|
|
163
|
-
//
|
|
164
|
-
// (მხოლოდ თუ module.exports არსებობს)
|
|
201
|
+
// 3. Node.js CommonJS (for require())
|
|
165
202
|
if (typeof module !== 'undefined' && module.exports) {
|
|
166
203
|
module.exports = GeorgianHyphenator;
|
|
204
|
+
module.exports.default = GeorgianHyphenator;
|
|
167
205
|
}
|