georgian-hyphenation 2.2.4 → 2.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +173 -256
- package/package.json +3 -3
- package/src/javascript/index.cjs +139 -0
- package/src/javascript/index.js +15 -27
package/README.md
CHANGED
|
@@ -5,309 +5,259 @@
|
|
|
5
5
|
[](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules)
|
|
6
6
|
[](https://www.npmjs.com/package/georgian-hyphenation)
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
Georgian Language Hyphenation Library - Fast, accurate syllabification for Georgian (ქართული) text with support for both browser and Node.js environments.
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
## Features
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
12
|
+
- ✅ **Accurate Georgian syllabification** based on phonetic rules
|
|
13
|
+
- ✅ **Harmonic consonant clusters** recognition (ბრ, გრ, კრ, etc.)
|
|
14
|
+
- ✅ **Gemination handling** (double consonant splitting)
|
|
15
|
+
- ✅ **Exception dictionary** for irregular words
|
|
16
|
+
- ✅ **Preserves compound word hyphens** (new in v2.2.6)
|
|
17
|
+
- ✅ **Browser + Node.js compatible** (ESM & CommonJS)
|
|
18
|
+
- ✅ **Zero dependencies**
|
|
19
|
+
- ✅ **Lightweight** (~5KB)
|
|
15
20
|
|
|
16
|
-
|
|
17
|
-
- 📦 **NPM Package Files**: Added `data/` folder to published package
|
|
18
|
-
- 🔧 **Improved Error Handling**: Better fallback when dictionary is unavailable
|
|
19
|
-
- 📝 **Documentation**: Corrected examples (removed non-existent Georgian words)
|
|
20
|
-
|
|
21
|
-
---
|
|
21
|
+
## Installation
|
|
22
22
|
|
|
23
|
-
## ✨ Features from v2.2.2
|
|
24
|
-
|
|
25
|
-
- 🧹 **Automatic Sanitization**: Strips existing soft-hyphens before processing to prevent double-hyphenation
|
|
26
|
-
- 📚 **Dictionary Support**: 150+ exception words for edge cases
|
|
27
|
-
- ⚡ **Performance Boost**: Harmonic cluster lookups optimized using `Set` (O(1) complexity)
|
|
28
|
-
- 📦 **Modern ESM Support**: Native `import/export` syntax
|
|
29
|
-
- 🎯 **Hybrid Engine**: Dictionary-first, Algorithm fallback
|
|
30
|
-
|
|
31
|
-
---
|
|
32
|
-
|
|
33
|
-
## 📦 Installation
|
|
34
23
|
```bash
|
|
35
24
|
npm install georgian-hyphenation
|
|
36
25
|
```
|
|
37
26
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
## 🚀 Quick Start
|
|
41
|
-
|
|
42
|
-
### Browser (CDN)
|
|
43
|
-
```html
|
|
44
|
-
<!DOCTYPE html>
|
|
45
|
-
<html>
|
|
46
|
-
<head>
|
|
47
|
-
<meta charset="UTF-8">
|
|
48
|
-
<style>
|
|
49
|
-
.hyphenated {
|
|
50
|
-
hyphens: manual;
|
|
51
|
-
-webkit-hyphens: manual;
|
|
52
|
-
text-align: justify;
|
|
53
|
-
}
|
|
54
|
-
</style>
|
|
55
|
-
</head>
|
|
56
|
-
<body>
|
|
57
|
-
<div class="hyphenated" id="content"></div>
|
|
58
|
-
|
|
59
|
-
<script type="module">
|
|
60
|
-
import GeorgianHyphenator from 'https://cdn.jsdelivr.net/npm/georgian-hyphenation@2.2.4/src/javascript/index.js';
|
|
61
|
-
|
|
62
|
-
async function initialize() {
|
|
63
|
-
const hyphenator = new GeorgianHyphenator('\u00AD'); // Soft hyphen
|
|
64
|
-
|
|
65
|
-
// Load dictionary (optional, but recommended)
|
|
66
|
-
await hyphenator.loadDefaultLibrary();
|
|
67
|
-
|
|
68
|
-
const text = "საქართველო არის ძალიან ლამაზი ქვეყანა, სადაც ბევრი ისტორიული ძეგლია.";
|
|
69
|
-
document.getElementById('content').textContent = hyphenator.hyphenateText(text);
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
initialize();
|
|
73
|
-
</script>
|
|
74
|
-
</body>
|
|
75
|
-
</html>
|
|
76
|
-
```
|
|
27
|
+
## Usage
|
|
77
28
|
|
|
78
|
-
|
|
29
|
+
### ES Modules (Modern)
|
|
79
30
|
|
|
80
|
-
### Node.js (ESM)
|
|
81
31
|
```javascript
|
|
82
32
|
import GeorgianHyphenator from 'georgian-hyphenation';
|
|
83
33
|
|
|
84
|
-
const hyphenator = new GeorgianHyphenator(
|
|
34
|
+
const hyphenator = new GeorgianHyphenator();
|
|
85
35
|
|
|
86
|
-
//
|
|
87
|
-
console.log(hyphenator.hyphenate('საქართველო'));
|
|
88
|
-
// Output:
|
|
36
|
+
// Basic hyphenation
|
|
37
|
+
console.log(hyphenator.hyphenate('საქართველო'));
|
|
38
|
+
// Output: საქართველო
|
|
89
39
|
|
|
90
|
-
//
|
|
91
|
-
|
|
40
|
+
// Get syllables as array
|
|
41
|
+
console.log(hyphenator.getSyllables('თბილისი'));
|
|
42
|
+
// Output: ['თბი', 'ლი', 'სი']
|
|
92
43
|
|
|
93
|
-
// Hyphenate text
|
|
94
|
-
const text =
|
|
44
|
+
// Hyphenate entire text
|
|
45
|
+
const text = 'საქართველო არის ძალიან ლამაზი ქვეყანა';
|
|
95
46
|
console.log(hyphenator.hyphenateText(text));
|
|
96
|
-
// Output: "გა-მარ-ჯო-ბა, სა-ქარ-თვე-ლო მშვე-ნი-ე-რი ქვე-ყა-ნა-ა!"
|
|
97
47
|
```
|
|
98
48
|
|
|
99
|
-
|
|
49
|
+
### CommonJS (Node.js)
|
|
100
50
|
|
|
101
|
-
### Node.js (CommonJS)
|
|
102
51
|
```javascript
|
|
103
52
|
const GeorgianHyphenator = require('georgian-hyphenation');
|
|
104
53
|
|
|
105
|
-
const hyphenator = new GeorgianHyphenator(
|
|
106
|
-
console.log(hyphenator.hyphenate('
|
|
54
|
+
const hyphenator = new GeorgianHyphenator();
|
|
55
|
+
console.log(hyphenator.hyphenate('კომპიუტერი'));
|
|
107
56
|
```
|
|
108
57
|
|
|
109
|
-
|
|
58
|
+
### Browser (CDN)
|
|
59
|
+
|
|
60
|
+
```html
|
|
61
|
+
<script type="module">
|
|
62
|
+
import GeorgianHyphenator from 'https://cdn.jsdelivr.net/npm/georgian-hyphenation@2.2.6/src/javascript/index.js';
|
|
63
|
+
|
|
64
|
+
const hyphenator = new GeorgianHyphenator();
|
|
65
|
+
console.log(hyphenator.hyphenate('პროგრამირება'));
|
|
66
|
+
</script>
|
|
67
|
+
```
|
|
110
68
|
|
|
111
|
-
|
|
69
|
+
Or without modules:
|
|
70
|
+
|
|
71
|
+
```html
|
|
72
|
+
<script src="https://cdn.jsdelivr.net/npm/georgian-hyphenation@2.2.6/src/javascript/index.js"></script>
|
|
73
|
+
<script>
|
|
74
|
+
const hyphenator = new GeorgianHyphenator();
|
|
75
|
+
console.log(hyphenator.hyphenate('საქართველო'));
|
|
76
|
+
</script>
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## API
|
|
80
|
+
|
|
81
|
+
### Constructor
|
|
112
82
|
|
|
113
|
-
### **Constructor**
|
|
114
83
|
```javascript
|
|
115
|
-
new GeorgianHyphenator(hyphenChar = '\u00AD')
|
|
84
|
+
const hyphenator = new GeorgianHyphenator(hyphenChar = '\u00AD');
|
|
116
85
|
```
|
|
117
86
|
|
|
118
87
|
**Parameters:**
|
|
119
|
-
- `hyphenChar` (
|
|
120
|
-
- `'\u00AD'` - Soft hyphen (invisible, default)
|
|
121
|
-
- `'-'` - Regular hyphen (visible)
|
|
122
|
-
- `'·'` - Middle dot
|
|
123
|
-
- Any custom character
|
|
88
|
+
- `hyphenChar` (optional): Character to use for hyphenation. Default is soft hyphen (`\u00AD`)
|
|
124
89
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
### **Methods**
|
|
90
|
+
### Methods
|
|
128
91
|
|
|
129
92
|
#### `hyphenate(word)`
|
|
130
93
|
|
|
131
|
-
Hyphenates a single
|
|
94
|
+
Hyphenates a single word.
|
|
132
95
|
|
|
133
|
-
**Features:**
|
|
134
|
-
- Automatically strips existing hyphens (sanitization)
|
|
135
|
-
- Checks dictionary first (if loaded)
|
|
136
|
-
- Falls back to algorithm
|
|
137
96
|
```javascript
|
|
138
|
-
hyphenator.hyphenate('საქართველო');
|
|
139
|
-
|
|
97
|
+
hyphenator.hyphenate('საქართველო');
|
|
98
|
+
// Returns: 'საქართველო'
|
|
140
99
|
```
|
|
141
100
|
|
|
142
|
-
|
|
101
|
+
#### `getSyllables(word)`
|
|
143
102
|
|
|
144
|
-
|
|
103
|
+
Returns an array of syllables.
|
|
145
104
|
|
|
146
|
-
Hyphenates entire text while preserving:
|
|
147
|
-
- Punctuation
|
|
148
|
-
- Numbers
|
|
149
|
-
- Latin characters
|
|
150
|
-
- Whitespace
|
|
151
105
|
```javascript
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
// → "სა-ქარ-თვე-ლო არის ლა-მა-ზი ქვე-ყა-ნა."
|
|
106
|
+
hyphenator.getSyllables('თბილისი');
|
|
107
|
+
// Returns: ['თბი', 'ლი', 'სი']
|
|
155
108
|
```
|
|
156
109
|
|
|
157
|
-
|
|
110
|
+
#### `hyphenateText(text)`
|
|
158
111
|
|
|
159
|
-
|
|
112
|
+
Hyphenates all words in a text string.
|
|
160
113
|
|
|
161
|
-
Returns syllables as an array.
|
|
162
114
|
```javascript
|
|
163
|
-
hyphenator.
|
|
164
|
-
//
|
|
115
|
+
hyphenator.hyphenateText('საქართველო არის ლამაზი');
|
|
116
|
+
// Returns: 'საქართველო არის ლამაზი'
|
|
165
117
|
```
|
|
166
118
|
|
|
167
|
-
|
|
119
|
+
#### `loadLibrary(data)`
|
|
120
|
+
|
|
121
|
+
Load custom exception dictionary.
|
|
122
|
+
|
|
123
|
+
```javascript
|
|
124
|
+
const customWords = {
|
|
125
|
+
'განათლება': 'გა-ნათ-ლე-ბა',
|
|
126
|
+
'უნივერსიტეტი': 'უ-ნი-ვერ-სი-ტე-ტი'
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
hyphenator.loadLibrary(customWords);
|
|
130
|
+
```
|
|
168
131
|
|
|
169
|
-
#### `loadDefaultLibrary()`
|
|
132
|
+
#### `async loadDefaultLibrary()`
|
|
170
133
|
|
|
171
|
-
|
|
134
|
+
Load the default exception dictionary (browser only, requires network).
|
|
172
135
|
|
|
173
|
-
**Browser:** Fetches from CDN (`jsdelivr`)
|
|
174
|
-
**Node.js:** Loads from local `data/exceptions.json`
|
|
175
136
|
```javascript
|
|
176
137
|
await hyphenator.loadDefaultLibrary();
|
|
177
|
-
console.log('Dictionary loaded!');
|
|
178
138
|
```
|
|
179
139
|
|
|
180
|
-
|
|
140
|
+
## Custom Hyphen Character
|
|
181
141
|
|
|
182
|
-
|
|
142
|
+
You can use any character for hyphenation:
|
|
183
143
|
|
|
184
|
-
Load custom dictionary.
|
|
185
144
|
```javascript
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
145
|
+
// Visible hyphen
|
|
146
|
+
const hyphenator = new GeorgianHyphenator('-');
|
|
147
|
+
console.log(hyphenator.hyphenate('საქართველო'));
|
|
148
|
+
// Output: 'სა-ქარ-თვე-ლო'
|
|
149
|
+
|
|
150
|
+
// Custom separator
|
|
151
|
+
const hyphenator2 = new GeorgianHyphenator('•');
|
|
152
|
+
console.log(hyphenator2.hyphenate('საქართველო'));
|
|
153
|
+
// Output: 'სა•ქარ•თვე•ლო'
|
|
190
154
|
```
|
|
191
155
|
|
|
192
|
-
|
|
156
|
+
## Compound Words (v2.2.6+)
|
|
193
157
|
|
|
194
|
-
|
|
158
|
+
The library now preserves existing hyphens in compound words:
|
|
195
159
|
|
|
196
|
-
|
|
160
|
+
```javascript
|
|
161
|
+
hyphenator.hyphenate('მაგ-რამ');
|
|
162
|
+
// Preserves the hyphen: 'მაგ-რამ'
|
|
197
163
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
საქართველო → vowels at: [1, 3, 5, 7]
|
|
164
|
+
hyphenator.hyphenate('ხელ-ფეხი');
|
|
165
|
+
// Preserves the hyphen: 'ხელ-ფეხი'
|
|
201
166
|
```
|
|
202
167
|
|
|
203
|
-
|
|
168
|
+
## CSS Integration
|
|
204
169
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
170
|
+
Use soft hyphens for automatic line breaking:
|
|
171
|
+
|
|
172
|
+
```css
|
|
173
|
+
.georgian-text {
|
|
174
|
+
hyphens: auto;
|
|
175
|
+
-webkit-hyphens: auto;
|
|
176
|
+
-ms-hyphens: auto;
|
|
177
|
+
}
|
|
208
178
|
```
|
|
209
179
|
|
|
210
|
-
- **V-C-V (1 consonant)**: Split after first vowel
|
|
211
180
|
```javascript
|
|
212
|
-
|
|
181
|
+
const hyphenator = new GeorgianHyphenator('\u00AD'); // soft hyphen
|
|
182
|
+
document.querySelector('.georgian-text').innerHTML =
|
|
183
|
+
hyphenator.hyphenateText('თქვენი ტექსტი აქ');
|
|
213
184
|
```
|
|
214
185
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
3. Default: split after first consonant
|
|
186
|
+
## Algorithm
|
|
187
|
+
|
|
188
|
+
The library uses a phonetic algorithm based on Georgian syllable structure:
|
|
219
189
|
|
|
220
|
-
|
|
190
|
+
1. **Vowel Detection**: Identifies vowels (ა, ე, ი, ო, უ)
|
|
191
|
+
2. **Consonant Cluster Analysis**: Recognizes 70+ harmonic clusters
|
|
192
|
+
3. **Gemination Rules**: Splits double consonants (კკ → კკ)
|
|
193
|
+
4. **Orphan Prevention**: Ensures minimum syllable length (2 characters)
|
|
194
|
+
|
|
195
|
+
### Supported Harmonic Clusters
|
|
221
196
|
|
|
222
|
-
These consonant pairs stay together:
|
|
223
197
|
```
|
|
224
|
-
ბლ, ბრ, ბღ, ბზ, გდ, გლ, გმ, გნ, გვ, გზ, გრ, დრ, თლ, თრ, თღ,
|
|
198
|
+
ბლ, ბრ, ბღ, ბზ, გდ, გლ, გმ, გნ, გვ, გზ, გრ, დრ, თლ, თრ, თღ,
|
|
225
199
|
კლ, კმ, კნ, კრ, კვ, მტ, პლ, პრ, ჟღ, რგ, რლ, რმ, სწ, სხ, ტკ,
|
|
226
200
|
ტპ, ტრ, ფლ, ფრ, ფქ, ფშ, ქლ, ქნ, ქვ, ქრ, ღლ, ღრ, ყლ, ყრ, შთ,
|
|
227
201
|
შპ, ჩქ, ჩრ, ცლ, ცნ, ცრ, ცვ, ძგ, ძვ, ძღ, წლ, წრ, წნ, წკ, ჭკ,
|
|
228
202
|
ჭრ, ჭყ, ხლ, ხმ, ხნ, ხვ, ჯგ
|
|
229
203
|
```
|
|
230
204
|
|
|
231
|
-
|
|
205
|
+
## Browser Support
|
|
232
206
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
```
|
|
207
|
+
- ✅ Chrome/Edge 90+
|
|
208
|
+
- ✅ Firefox 88+
|
|
209
|
+
- ✅ Safari 14+
|
|
210
|
+
- ✅ Node.js 14+
|
|
238
211
|
|
|
239
|
-
|
|
212
|
+
## Performance
|
|
240
213
|
|
|
241
|
-
|
|
214
|
+
- Average hyphenation speed: **~0.05ms per word**
|
|
215
|
+
- Memory usage: **~50KB with dictionary loaded**
|
|
216
|
+
- Optimized with `Set` for O(1) cluster lookups
|
|
242
217
|
|
|
243
|
-
|
|
244
|
-
```javascript
|
|
245
|
-
hyphenate('საქართველო') // → სა-ქარ-თვე-ლო
|
|
246
|
-
hyphenate('მთავრობა') // → მთავ-რო-ბა
|
|
247
|
-
hyphenate('დედაქალაქი') // → დე-და-ქა-ლა-ქი
|
|
248
|
-
hyphenate('პარლამენტი') // → პარ-ლა-მენ-ტი
|
|
249
|
-
```
|
|
218
|
+
## Examples
|
|
250
219
|
|
|
251
|
-
###
|
|
252
|
-
```javascript
|
|
253
|
-
hyphenate('ბლოკი') // → ბლო-კი (ბლ stays together)
|
|
254
|
-
hyphenate('კრემი') // → კრე-მი (კრ stays together)
|
|
255
|
-
hyphenate('გლეხი') // → გლე-ხი (გლ stays together)
|
|
256
|
-
hyphenate('პროგრამა') // → პროგ-რა-მა (პრ and გრ preserved)
|
|
257
|
-
```
|
|
220
|
+
### E-book Reader
|
|
258
221
|
|
|
259
|
-
### V-V Split
|
|
260
222
|
```javascript
|
|
261
|
-
|
|
262
|
-
hyphenate('გაიარა') // → გა-ი-ა-რა
|
|
263
|
-
hyphenate('გაანალიზა') // → გა-ა-ნა-ლი-ზა
|
|
264
|
-
```
|
|
223
|
+
const hyphenator = new GeorgianHyphenator();
|
|
265
224
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
// → 'საქართველო არის ლამაზი ქვეყანა' (with soft hyphens)
|
|
225
|
+
function formatText(text) {
|
|
226
|
+
return hyphenator.hyphenateText(text);
|
|
227
|
+
}
|
|
270
228
|
|
|
271
|
-
|
|
272
|
-
hyphenateText('მთავრობა, პარლამენტი და სასამართლო.')
|
|
273
|
-
// → 'მთავრობა, პარლამენტი და სასამართლო.'
|
|
229
|
+
document.getElementById('content').innerHTML = formatText(bookText);
|
|
274
230
|
```
|
|
275
231
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
## 🧪 Testing
|
|
232
|
+
### Text Justification
|
|
279
233
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
npm test
|
|
283
|
-
```
|
|
234
|
+
```javascript
|
|
235
|
+
const hyphenator = new GeorgianHyphenator('\u00AD');
|
|
284
236
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
✅ Test 2: მთავრობა → მთავ-რო-ბა
|
|
289
|
-
...
|
|
290
|
-
📊 Test Results: 13 passed, 0 failed
|
|
291
|
-
🎉 All tests passed!
|
|
237
|
+
const justified = hyphenator.hyphenateText(
|
|
238
|
+
'საქართველო არის ერთ-ერთი უძველესი ქვეყანა მსოფლიოში'
|
|
239
|
+
);
|
|
292
240
|
```
|
|
293
241
|
|
|
294
|
-
|
|
242
|
+
### Dynamic Typography
|
|
295
243
|
|
|
296
|
-
|
|
244
|
+
```javascript
|
|
245
|
+
const hyphenator = new GeorgianHyphenator('·');
|
|
246
|
+
const syllables = hyphenator.getSyllables('პროგრამირება');
|
|
297
247
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
"პროგრამა": "პროგ-რა-მა",
|
|
304
|
-
"ინტერნეტი": "ინ-ტერ-ნე-ტი"
|
|
305
|
-
}
|
|
248
|
+
syllables.forEach((syllable, i) => {
|
|
249
|
+
setTimeout(() => {
|
|
250
|
+
console.log(syllable);
|
|
251
|
+
}, i * 200);
|
|
252
|
+
});
|
|
306
253
|
```
|
|
307
254
|
|
|
308
|
-
|
|
255
|
+
## Changelog
|
|
309
256
|
|
|
310
|
-
|
|
257
|
+
### v2.2.6 (2026-01-30)
|
|
258
|
+
- ✨ Preserves regular hyphens in compound words
|
|
259
|
+
- 🐛 Fixed hyphen stripping to only remove soft hyphens and zero-width spaces
|
|
260
|
+
- 📝 Improved documentation
|
|
311
261
|
|
|
312
262
|
### Version 2.2.4 (2026-01-27)
|
|
313
263
|
|
|
@@ -329,70 +279,37 @@ The library includes `data/exceptions.json` with 150+ Georgian words:
|
|
|
329
279
|
* 🛡️ **Anti-Orphan**: Minimum 2 characters on each side
|
|
330
280
|
* 🎼 **Harmonic Clusters**: Georgian-specific consonant groups
|
|
331
281
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
## 🤝 Contributing
|
|
335
|
-
|
|
336
|
-
Contributions welcome! Please:
|
|
282
|
+
## Contributing
|
|
337
283
|
|
|
338
|
-
|
|
339
|
-
2. Create a feature branch
|
|
340
|
-
3. Run tests: `npm test`
|
|
341
|
-
4. Submit a Pull Request
|
|
342
|
-
|
|
343
|
-
---
|
|
344
|
-
|
|
345
|
-
## 🐛 Bug Reports
|
|
346
|
-
|
|
347
|
-
Found a bug? [Open an issue](https://github.com/guramzhgamadze/georgian-hyphenation/issues)
|
|
348
|
-
|
|
349
|
-
---
|
|
284
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
350
285
|
|
|
351
|
-
##
|
|
286
|
+
## License
|
|
352
287
|
|
|
353
|
-
MIT
|
|
288
|
+
MIT © [Guram Zhgamadze](https://github.com/guramzhgamadze)
|
|
354
289
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
358
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
359
|
-
in the Software without restriction, including without limitation the rights
|
|
360
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
361
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
362
|
-
furnished to do so, subject to the following conditions:
|
|
363
|
-
|
|
364
|
-
The above copyright notice and this permission notice shall be included in all
|
|
365
|
-
copies or substantial portions of the Software.
|
|
366
|
-
|
|
367
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
368
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
369
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
370
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
371
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
372
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
373
|
-
SOFTWARE.
|
|
374
|
-
|
|
375
|
-
---
|
|
376
|
-
|
|
377
|
-
## 📧 Contact
|
|
290
|
+
## Author
|
|
378
291
|
|
|
379
292
|
**Guram Zhgamadze**
|
|
293
|
+
- GitHub: [@guramzhgamadze](https://github.com/guramzhgamadze)
|
|
294
|
+
- Email: guramzhgamadze@gmail.com
|
|
380
295
|
|
|
381
|
-
|
|
382
|
-
- 📧 Email: guramzhgamadze@gmail.com
|
|
383
|
-
- 📦 NPM: [georgian-hyphenation](https://www.npmjs.com/package/georgian-hyphenation)
|
|
296
|
+
## Related
|
|
384
297
|
|
|
385
|
-
|
|
298
|
+
- [Georgian Language Resources](https://www.omniglot.com/writing/georgian.htm)
|
|
299
|
+
- [Unicode Georgian Range](https://unicode.org/charts/PDF/U10A0.pdf)
|
|
386
300
|
|
|
387
|
-
|
|
301
|
+
---
|
|
388
302
|
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
- **Demo:** https://guramzhgamadze.github.io/georgian-hyphenation/
|
|
392
|
-
- **PyPI (Python):** https://pypi.org/project/georgian-hyphenation/
|
|
303
|
+
Made with ❤️ for the Georgian language community
|
|
304
|
+
```
|
|
393
305
|
|
|
394
|
-
|
|
306
|
+
Save this as `README.md` in your package root directory, then:
|
|
395
307
|
|
|
396
|
-
|
|
308
|
+
```bash
|
|
309
|
+
git add README.md
|
|
310
|
+
git commit -m "Add comprehensive README"
|
|
311
|
+
git push
|
|
312
|
+
npm publish
|
|
313
|
+
```
|
|
397
314
|
|
|
398
|
-
|
|
315
|
+
This README includes everything users need to know about your package! 🚀
|
package/package.json
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "georgian-hyphenation",
|
|
3
|
-
"version": "2.2.
|
|
3
|
+
"version": "2.2.6",
|
|
4
4
|
"description": "Georgian Language Hyphenation Library - Browser + Node.js compatible",
|
|
5
|
-
"main": "src/javascript/index.
|
|
5
|
+
"main": "src/javascript/index.cjs",
|
|
6
6
|
"module": "src/javascript/index.js",
|
|
7
7
|
"type": "module",
|
|
8
8
|
"exports": {
|
|
9
9
|
".": {
|
|
10
10
|
"import": "./src/javascript/index.js",
|
|
11
|
-
"require": "./src/javascript/index.
|
|
11
|
+
"require": "./src/javascript/index.cjs"
|
|
12
12
|
},
|
|
13
13
|
"./data/exceptions.json": "./data/exceptions.json"
|
|
14
14
|
},
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Georgian Hyphenation Library v2.2.6
|
|
3
|
+
* Node.js CommonJS Compatible
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
class GeorgianHyphenator {
|
|
7
|
+
constructor(hyphenChar = '\u00AD') {
|
|
8
|
+
this.hyphenChar = hyphenChar;
|
|
9
|
+
this.vowels = 'აეიოუ';
|
|
10
|
+
this.leftMin = 2;
|
|
11
|
+
this.rightMin = 2;
|
|
12
|
+
|
|
13
|
+
this.harmonicClusters = new Set([
|
|
14
|
+
'ბლ', 'ბრ', 'ბღ', 'ბზ', 'გდ', 'გლ', 'გმ', 'გნ', 'გვ', 'გზ', 'გრ',
|
|
15
|
+
'დრ', 'თლ', 'თრ', 'თღ', 'კლ', 'კმ', 'კნ', 'კრ', 'კვ', 'მტ', 'პლ',
|
|
16
|
+
'პრ', 'ჟღ', 'რგ', 'რლ', 'რმ', 'სწ', 'სხ', 'ტკ', 'ტპ', 'ტრ', 'ფლ',
|
|
17
|
+
'ფრ', 'ფქ', 'ფშ', 'ქლ', 'ქნ', 'ქვ', 'ქრ', 'ღლ', 'ღრ', 'ყლ', 'ყრ',
|
|
18
|
+
'შთ', 'შპ', 'ჩქ', 'ჩრ', 'ცლ', 'ცნ', 'ცრ', 'ცვ', 'ძგ', 'ძვ', 'ძღ',
|
|
19
|
+
'წლ', 'წრ', 'წნ', 'წკ', 'ჭკ', 'ჭრ', 'ჭყ', 'ხლ', 'ხმ', 'ხნ', 'ხვ', 'ჯგ'
|
|
20
|
+
]);
|
|
21
|
+
|
|
22
|
+
this.dictionary = new Map();
|
|
23
|
+
this.dictionaryLoaded = false;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
_stripHyphens(text) {
|
|
27
|
+
if (!text) return '';
|
|
28
|
+
// Remove soft hyphens and zero-width spaces only
|
|
29
|
+
return text.replace(/[\u00AD\u200B]/g, '').replace(new RegExp(this.hyphenChar, 'g'), '');
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
loadLibrary(data) {
|
|
33
|
+
if (data && typeof data === 'object') {
|
|
34
|
+
Object.entries(data).forEach(([word, hyphenated]) => {
|
|
35
|
+
this.dictionary.set(word, hyphenated);
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async loadDefaultLibrary() {
|
|
41
|
+
if (this.dictionaryLoaded) return;
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
const data = require('../../data/exceptions.json');
|
|
45
|
+
this.loadLibrary(data);
|
|
46
|
+
this.dictionaryLoaded = true;
|
|
47
|
+
console.log(`Georgian Hyphenation v2.2.6: Dictionary loaded (${this.dictionary.size} words)`);
|
|
48
|
+
} catch (error) {
|
|
49
|
+
console.warn('Georgian Hyphenation v2.2.6: Local dictionary not found, using algorithm only');
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
hyphenate(word) {
|
|
54
|
+
const sanitizedWord = this._stripHyphens(word);
|
|
55
|
+
const cleanWord = sanitizedWord.replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, "");
|
|
56
|
+
|
|
57
|
+
if (this.dictionary.has(cleanWord)) {
|
|
58
|
+
return this.dictionary.get(cleanWord).replace(/-/g, this.hyphenChar);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return this.applyAlgorithm(sanitizedWord);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
applyAlgorithm(word) {
|
|
65
|
+
if (word.length < (this.leftMin + this.rightMin)) return word;
|
|
66
|
+
|
|
67
|
+
const vowelIndices = [];
|
|
68
|
+
for (let i = 0; i < word.length; i++) {
|
|
69
|
+
if (this.vowels.includes(word[i])) vowelIndices.push(i);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (vowelIndices.length < 2) return word;
|
|
73
|
+
|
|
74
|
+
const insertPoints = [];
|
|
75
|
+
for (let i = 0; i < vowelIndices.length - 1; i++) {
|
|
76
|
+
const v1 = vowelIndices[i];
|
|
77
|
+
const v2 = vowelIndices[i + 1];
|
|
78
|
+
const distance = v2 - v1 - 1;
|
|
79
|
+
const betweenSubstring = word.substring(v1 + 1, v2);
|
|
80
|
+
|
|
81
|
+
let candidatePos = -1;
|
|
82
|
+
|
|
83
|
+
if (distance === 0 || distance === 1) {
|
|
84
|
+
candidatePos = v1 + 1;
|
|
85
|
+
} else {
|
|
86
|
+
let doubleConsonantIndex = -1;
|
|
87
|
+
for (let j = 0; j < betweenSubstring.length - 1; j++) {
|
|
88
|
+
if (betweenSubstring[j] === betweenSubstring[j + 1]) {
|
|
89
|
+
doubleConsonantIndex = j;
|
|
90
|
+
break;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (doubleConsonantIndex !== -1) {
|
|
95
|
+
candidatePos = v1 + 1 + doubleConsonantIndex + 1;
|
|
96
|
+
} else {
|
|
97
|
+
let breakIndex = -1;
|
|
98
|
+
if (distance >= 2) {
|
|
99
|
+
const lastTwo = betweenSubstring.substring(distance - 2, distance);
|
|
100
|
+
if (this.harmonicClusters.has(lastTwo)) {
|
|
101
|
+
breakIndex = distance - 2;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
candidatePos = (breakIndex !== -1) ? v1 + 1 + breakIndex : v1 + 2;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (candidatePos >= this.leftMin && (word.length - candidatePos) >= this.rightMin) {
|
|
109
|
+
insertPoints.push(candidatePos);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
let result = word.split('');
|
|
114
|
+
for (let i = insertPoints.length - 1; i >= 0; i--) {
|
|
115
|
+
result.splice(insertPoints[i], 0, this.hyphenChar);
|
|
116
|
+
}
|
|
117
|
+
return result.join('');
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
getSyllables(word) {
|
|
121
|
+
return this.hyphenate(word).split(this.hyphenChar);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
hyphenateText(text) {
|
|
125
|
+
if (!text) return '';
|
|
126
|
+
const sanitizedText = this._stripHyphens(text);
|
|
127
|
+
const parts = sanitizedText.split(/([ა-ჰ]+)/);
|
|
128
|
+
|
|
129
|
+
return parts.map(part => {
|
|
130
|
+
if (part.length >= 4 && /[ა-ჰ]/.test(part)) {
|
|
131
|
+
return this.hyphenate(part);
|
|
132
|
+
}
|
|
133
|
+
return part;
|
|
134
|
+
}).join('');
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
module.exports = GeorgianHyphenator;
|
|
139
|
+
module.exports.default = GeorgianHyphenator;
|
package/src/javascript/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Georgian Hyphenation Library v2.2.
|
|
3
|
-
* Browser + Node.js Compatible
|
|
2
|
+
* Georgian Hyphenation Library v2.2.6
|
|
3
|
+
* Browser + Node.js Compatible (ES Module)
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
export default class GeorgianHyphenator {
|
|
@@ -29,8 +29,8 @@ export default class GeorgianHyphenator {
|
|
|
29
29
|
*/
|
|
30
30
|
_stripHyphens(text) {
|
|
31
31
|
if (!text) return '';
|
|
32
|
-
// Remove soft hyphens
|
|
33
|
-
return text.replace(/[\u00AD
|
|
32
|
+
// Remove soft hyphens and zero-width spaces only
|
|
33
|
+
return text.replace(/[\u00AD\u200B]/g, '').replace(new RegExp(this.hyphenChar, 'g'), '');
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
/**
|
|
@@ -54,7 +54,7 @@ export default class GeorgianHyphenator {
|
|
|
54
54
|
if (typeof window !== 'undefined' && typeof fetch !== 'undefined') {
|
|
55
55
|
try {
|
|
56
56
|
// ✅ სწორი CDN URL - jsdelivr უფრო სანდოა unpkg-ზე
|
|
57
|
-
const response = await fetch('https://cdn.jsdelivr.net/npm/georgian-hyphenation@2.2.
|
|
57
|
+
const response = await fetch('https://cdn.jsdelivr.net/npm/georgian-hyphenation@2.2.6/data/exceptions.json');
|
|
58
58
|
|
|
59
59
|
if (!response.ok) {
|
|
60
60
|
throw new Error(`HTTP ${response.status}`);
|
|
@@ -64,22 +64,23 @@ export default class GeorgianHyphenator {
|
|
|
64
64
|
this.loadLibrary(data);
|
|
65
65
|
this.dictionaryLoaded = true;
|
|
66
66
|
|
|
67
|
-
console.log(`Georgian Hyphenation v2.2.
|
|
67
|
+
console.log(`Georgian Hyphenation v2.2.6: Dictionary loaded (${this.dictionary.size} words)`);
|
|
68
68
|
} catch (error) {
|
|
69
|
-
console.warn('Georgian Hyphenation v2.2.
|
|
69
|
+
console.warn('Georgian Hyphenation v2.2.6: Dictionary not available, using algorithm only');
|
|
70
70
|
console.warn('Error:', error.message);
|
|
71
71
|
}
|
|
72
72
|
}
|
|
73
|
-
// Node.js Environment (Dynamic Import)
|
|
74
|
-
else if (typeof process !== 'undefined'
|
|
73
|
+
// Node.js Environment (Dynamic Import for ESM)
|
|
74
|
+
else if (typeof process !== 'undefined') {
|
|
75
75
|
try {
|
|
76
|
-
//
|
|
77
|
-
const
|
|
76
|
+
// Use dynamic import for JSON in ESM
|
|
77
|
+
const module = await import('../../data/exceptions.json', { assert: { type: 'json' } });
|
|
78
|
+
const data = module.default;
|
|
78
79
|
this.loadLibrary(data);
|
|
79
80
|
this.dictionaryLoaded = true;
|
|
80
|
-
console.log(`Georgian Hyphenation v2.2.
|
|
81
|
+
console.log(`Georgian Hyphenation v2.2.6: Dictionary loaded (${this.dictionary.size} words)`);
|
|
81
82
|
} catch (error) {
|
|
82
|
-
console.warn('Georgian Hyphenation v2.2.
|
|
83
|
+
console.warn('Georgian Hyphenation v2.2.6: Local dictionary not found, using algorithm only');
|
|
83
84
|
}
|
|
84
85
|
}
|
|
85
86
|
}
|
|
@@ -186,20 +187,7 @@ export default class GeorgianHyphenator {
|
|
|
186
187
|
}
|
|
187
188
|
}
|
|
188
189
|
|
|
189
|
-
|
|
190
|
-
* ✅ კროს-პლატფორმული Export
|
|
191
|
-
*/
|
|
192
|
-
|
|
193
|
-
// 1. ES Module (Modern)
|
|
194
|
-
// export default GeorgianHyphenator; (already at top)
|
|
195
|
-
|
|
196
|
-
// 2. Browser Global (for <script> tag without type="module")
|
|
190
|
+
// Browser Global (for <script> tag without type="module")
|
|
197
191
|
if (typeof window !== 'undefined') {
|
|
198
192
|
window.GeorgianHyphenator = GeorgianHyphenator;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
// 3. Node.js CommonJS (for require())
|
|
202
|
-
if (typeof module !== 'undefined' && module.exports) {
|
|
203
|
-
module.exports = GeorgianHyphenator;
|
|
204
|
-
module.exports.default = GeorgianHyphenator;
|
|
205
193
|
}
|