georgian-hyphenation 2.2.5 → 2.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +384 -541
- package/package.json +7 -3
- package/src/javascript/index.cjs +273 -12
- package/src/javascript/index.js +214 -7
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "georgian-hyphenation",
|
|
3
|
-
"version": "2.2.
|
|
4
|
-
"description": "Georgian Language Hyphenation Library - Browser + Node.js compatible",
|
|
3
|
+
"version": "2.2.7",
|
|
4
|
+
"description": "Georgian Language Hyphenation Library with 17+ utility functions - Browser + Node.js compatible",
|
|
5
5
|
"main": "src/javascript/index.cjs",
|
|
6
6
|
"module": "src/javascript/index.js",
|
|
7
7
|
"type": "module",
|
|
@@ -28,7 +28,11 @@
|
|
|
28
28
|
"kartuli",
|
|
29
29
|
"nlp",
|
|
30
30
|
"browser",
|
|
31
|
-
"nodejs"
|
|
31
|
+
"nodejs",
|
|
32
|
+
"syllables",
|
|
33
|
+
"typography",
|
|
34
|
+
"i18n",
|
|
35
|
+
"html-hyphenation"
|
|
32
36
|
],
|
|
33
37
|
"author": "Guram Zhgamadze <guramzhgamadze@gmail.com>",
|
|
34
38
|
"license": "MIT",
|
package/src/javascript/index.cjs
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Georgian Hyphenation Library v2.2.
|
|
3
|
-
* Node.js
|
|
2
|
+
* Georgian Hyphenation Library v2.2.7
|
|
3
|
+
* Browser + Node.js Compatible (ES Module)
|
|
4
|
+
* Enhanced with additional utility functions
|
|
4
5
|
*/
|
|
5
6
|
|
|
6
|
-
class GeorgianHyphenator {
|
|
7
|
+
export default class GeorgianHyphenator {
|
|
7
8
|
constructor(hyphenChar = '\u00AD') {
|
|
8
9
|
this.hyphenChar = hyphenChar;
|
|
9
10
|
this.vowels = 'აეიოუ';
|
|
10
11
|
this.leftMin = 2;
|
|
11
12
|
this.rightMin = 2;
|
|
12
13
|
|
|
14
|
+
// ოპტიმიზაცია: გამოყენებულია Set სწრაფი ძებნისთვის (O(1))
|
|
13
15
|
this.harmonicClusters = new Set([
|
|
14
16
|
'ბლ', 'ბრ', 'ბღ', 'ბზ', 'გდ', 'გლ', 'გმ', 'გნ', 'გვ', 'გზ', 'გრ',
|
|
15
17
|
'დრ', 'თლ', 'თრ', 'თღ', 'კლ', 'კმ', 'კნ', 'კრ', 'კვ', 'მტ', 'პლ',
|
|
@@ -23,12 +25,18 @@ class GeorgianHyphenator {
|
|
|
23
25
|
this.dictionaryLoaded = false;
|
|
24
26
|
}
|
|
25
27
|
|
|
28
|
+
/**
|
|
29
|
+
* შლის არსებულ დამარცვლის სიმბოლოებს (Sanitization)
|
|
30
|
+
*/
|
|
26
31
|
_stripHyphens(text) {
|
|
27
32
|
if (!text) return '';
|
|
28
33
|
// Remove soft hyphens and zero-width spaces only
|
|
29
34
|
return text.replace(/[\u00AD\u200B]/g, '').replace(new RegExp(this.hyphenChar, 'g'), '');
|
|
30
35
|
}
|
|
31
36
|
|
|
37
|
+
/**
|
|
38
|
+
* ტვირთავს მომხმარებლის dictionary-ს
|
|
39
|
+
*/
|
|
32
40
|
loadLibrary(data) {
|
|
33
41
|
if (data && typeof data === 'object') {
|
|
34
42
|
Object.entries(data).forEach(([word, hyphenated]) => {
|
|
@@ -37,30 +45,66 @@ class GeorgianHyphenator {
|
|
|
37
45
|
}
|
|
38
46
|
}
|
|
39
47
|
|
|
48
|
+
/**
|
|
49
|
+
* ✅ ტვირთავს default dictionary-ს (Browser + Node.js compatible)
|
|
50
|
+
*/
|
|
40
51
|
async loadDefaultLibrary() {
|
|
41
52
|
if (this.dictionaryLoaded) return;
|
|
42
53
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
54
|
+
// Browser Environment
|
|
55
|
+
if (typeof window !== 'undefined' && typeof fetch !== 'undefined') {
|
|
56
|
+
try {
|
|
57
|
+
// ✅ სწორი CDN URL - jsdelivr უფრო სანდოა unpkg-ზე
|
|
58
|
+
const response = await fetch('https://cdn.jsdelivr.net/npm/georgian-hyphenation@2.2.7/data/exceptions.json');
|
|
59
|
+
|
|
60
|
+
if (!response.ok) {
|
|
61
|
+
throw new Error(`HTTP ${response.status}`);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const data = await response.json();
|
|
65
|
+
this.loadLibrary(data);
|
|
66
|
+
this.dictionaryLoaded = true;
|
|
67
|
+
|
|
68
|
+
console.log(`Georgian Hyphenation v2.2.7: Dictionary loaded (${this.dictionary.size} words)`);
|
|
69
|
+
} catch (error) {
|
|
70
|
+
console.warn('Georgian Hyphenation v2.2.7: Dictionary not available, using algorithm only');
|
|
71
|
+
console.warn('Error:', error.message);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Node.js Environment (Dynamic Import for ESM)
|
|
75
|
+
else if (typeof process !== 'undefined') {
|
|
76
|
+
try {
|
|
77
|
+
// Import from ../../data/exceptions.json (from src/javascript/ to data/)
|
|
78
|
+
const module = await import('../../data/exceptions.json', { assert: { type: 'json' } });
|
|
79
|
+
const data = module.default;
|
|
80
|
+
this.loadLibrary(data);
|
|
81
|
+
this.dictionaryLoaded = true;
|
|
82
|
+
console.log(`Georgian Hyphenation v2.2.7: Dictionary loaded (${this.dictionary.size} words)`);
|
|
83
|
+
} catch (error) {
|
|
84
|
+
console.warn('Georgian Hyphenation v2.2.7: Local dictionary not found, using algorithm only');
|
|
85
|
+
}
|
|
50
86
|
}
|
|
51
87
|
}
|
|
52
88
|
|
|
89
|
+
/**
|
|
90
|
+
* ამარცვლებს ერთ სიტყვას
|
|
91
|
+
*/
|
|
53
92
|
hyphenate(word) {
|
|
54
93
|
const sanitizedWord = this._stripHyphens(word);
|
|
55
94
|
const cleanWord = sanitizedWord.replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, "");
|
|
56
95
|
|
|
96
|
+
// Dictionary check
|
|
57
97
|
if (this.dictionary.has(cleanWord)) {
|
|
58
98
|
return this.dictionary.get(cleanWord).replace(/-/g, this.hyphenChar);
|
|
59
99
|
}
|
|
60
100
|
|
|
101
|
+
// Algorithm fallback
|
|
61
102
|
return this.applyAlgorithm(sanitizedWord);
|
|
62
103
|
}
|
|
63
104
|
|
|
105
|
+
/**
|
|
106
|
+
* ალგორითმის გამოყენება
|
|
107
|
+
*/
|
|
64
108
|
applyAlgorithm(word) {
|
|
65
109
|
if (word.length < (this.leftMin + this.rightMin)) return word;
|
|
66
110
|
|
|
@@ -83,6 +127,7 @@ class GeorgianHyphenator {
|
|
|
83
127
|
if (distance === 0 || distance === 1) {
|
|
84
128
|
candidatePos = v1 + 1;
|
|
85
129
|
} else {
|
|
130
|
+
// Gemination check
|
|
86
131
|
let doubleConsonantIndex = -1;
|
|
87
132
|
for (let j = 0; j < betweenSubstring.length - 1; j++) {
|
|
88
133
|
if (betweenSubstring[j] === betweenSubstring[j + 1]) {
|
|
@@ -94,6 +139,7 @@ class GeorgianHyphenator {
|
|
|
94
139
|
if (doubleConsonantIndex !== -1) {
|
|
95
140
|
candidatePos = v1 + 1 + doubleConsonantIndex + 1;
|
|
96
141
|
} else {
|
|
142
|
+
// Harmonic cluster check
|
|
97
143
|
let breakIndex = -1;
|
|
98
144
|
if (distance >= 2) {
|
|
99
145
|
const lastTwo = betweenSubstring.substring(distance - 2, distance);
|
|
@@ -105,6 +151,7 @@ class GeorgianHyphenator {
|
|
|
105
151
|
}
|
|
106
152
|
}
|
|
107
153
|
|
|
154
|
+
// Anti-orphan protection
|
|
108
155
|
if (candidatePos >= this.leftMin && (word.length - candidatePos) >= this.rightMin) {
|
|
109
156
|
insertPoints.push(candidatePos);
|
|
110
157
|
}
|
|
@@ -117,10 +164,16 @@ class GeorgianHyphenator {
|
|
|
117
164
|
return result.join('');
|
|
118
165
|
}
|
|
119
166
|
|
|
167
|
+
/**
|
|
168
|
+
* მარცვლების მიღება მასივის სახით
|
|
169
|
+
*/
|
|
120
170
|
getSyllables(word) {
|
|
121
171
|
return this.hyphenate(word).split(this.hyphenChar);
|
|
122
172
|
}
|
|
123
173
|
|
|
174
|
+
/**
|
|
175
|
+
* მთელი ტექსტის დამარცვლა
|
|
176
|
+
*/
|
|
124
177
|
hyphenateText(text) {
|
|
125
178
|
if (!text) return '';
|
|
126
179
|
const sanitizedText = this._stripHyphens(text);
|
|
@@ -133,7 +186,215 @@ class GeorgianHyphenator {
|
|
|
133
186
|
return part;
|
|
134
187
|
}).join('');
|
|
135
188
|
}
|
|
189
|
+
|
|
190
|
+
// ========================================
|
|
191
|
+
// NEW UTILITY FUNCTIONS (v2.2.7)
|
|
192
|
+
// ========================================
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Removes all hyphenation from text (public method)
|
|
196
|
+
* @param {string} text - Text with hyphens to remove
|
|
197
|
+
* @returns {string} Text without hyphens
|
|
198
|
+
*/
|
|
199
|
+
unhyphenate(text) {
|
|
200
|
+
return this._stripHyphens(text);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Counts syllables in a word
|
|
205
|
+
* @param {string} word - Word to count syllables
|
|
206
|
+
* @returns {number} Number of syllables
|
|
207
|
+
*/
|
|
208
|
+
countSyllables(word) {
|
|
209
|
+
return this.getSyllables(word).length;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Gets the number of hyphenation points in a word
|
|
214
|
+
* @param {string} word - Word to analyze
|
|
215
|
+
* @returns {number} Number of hyphenation points
|
|
216
|
+
*/
|
|
217
|
+
getHyphenationPoints(word) {
|
|
218
|
+
const hyphenated = this.hyphenate(word);
|
|
219
|
+
const matches = hyphenated.match(new RegExp(this.hyphenChar, 'g'));
|
|
220
|
+
return matches ? matches.length : 0;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Checks if text contains only Georgian characters
|
|
225
|
+
* @param {string} text - Text to validate
|
|
226
|
+
* @returns {boolean} True if only Georgian characters
|
|
227
|
+
*/
|
|
228
|
+
isGeorgian(text) {
|
|
229
|
+
if (!text) return false;
|
|
230
|
+
return /^[ა-ჰ]+$/.test(text);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Checks if a word can be hyphenated (meets minimum length)
|
|
235
|
+
* @param {string} word - Word to check
|
|
236
|
+
* @returns {boolean} True if word can be hyphenated
|
|
237
|
+
*/
|
|
238
|
+
canHyphenate(word) {
|
|
239
|
+
if (!word) return false;
|
|
240
|
+
return word.length >= (this.leftMin + this.rightMin);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Hyphenates multiple words at once
|
|
245
|
+
* @param {string[]} words - Array of words to hyphenate
|
|
246
|
+
* @returns {string[]} Array of hyphenated words
|
|
247
|
+
*/
|
|
248
|
+
hyphenateWords(words) {
|
|
249
|
+
return words.map(word => this.hyphenate(word));
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Hyphenates HTML content while preserving tags
|
|
254
|
+
* Skips <script>, <style>, <code>, <pre> tags
|
|
255
|
+
* @param {string} html - HTML content to hyphenate
|
|
256
|
+
* @returns {string} Hyphenated HTML
|
|
257
|
+
*/
|
|
258
|
+
hyphenateHTML(html) {
|
|
259
|
+
if (!html) return '';
|
|
260
|
+
|
|
261
|
+
// Tags to skip entirely
|
|
262
|
+
const skipTags = ['script', 'style', 'code', 'pre', 'textarea'];
|
|
263
|
+
const skipPattern = new RegExp(`<(${skipTags.join('|')})[^>]*>.*?</\\1>`, 'gis');
|
|
264
|
+
|
|
265
|
+
// Store skipped content
|
|
266
|
+
const skipped = [];
|
|
267
|
+
let placeholder = html.replace(skipPattern, (match) => {
|
|
268
|
+
skipped.push(match);
|
|
269
|
+
return `___SKIP_${skipped.length - 1}___`;
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
// Split by tags to preserve HTML structure
|
|
273
|
+
const parts = placeholder.split(/(<[^>]+>)/);
|
|
274
|
+
|
|
275
|
+
const processed = parts.map(part => {
|
|
276
|
+
// Skip HTML tags themselves
|
|
277
|
+
if (part.startsWith('<')) {
|
|
278
|
+
return part;
|
|
279
|
+
}
|
|
280
|
+
// Process text content
|
|
281
|
+
return this.hyphenateText(part);
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
// Restore skipped content
|
|
285
|
+
let result = processed.join('');
|
|
286
|
+
skipped.forEach((content, index) => {
|
|
287
|
+
result = result.replace(`___SKIP_${index}___`, content);
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
return result;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Sets the minimum characters before first hyphen
|
|
295
|
+
* @param {number} value - Minimum left characters (default: 2)
|
|
296
|
+
* @returns {GeorgianHyphenator} Returns this for method chaining
|
|
297
|
+
*/
|
|
298
|
+
setLeftMin(value) {
|
|
299
|
+
if (typeof value === 'number' && value >= 1) {
|
|
300
|
+
this.leftMin = value;
|
|
301
|
+
}
|
|
302
|
+
return this;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Sets the minimum characters after last hyphen
|
|
307
|
+
* @param {number} value - Minimum right characters (default: 2)
|
|
308
|
+
* @returns {GeorgianHyphenator} Returns this for method chaining
|
|
309
|
+
*/
|
|
310
|
+
setRightMin(value) {
|
|
311
|
+
if (typeof value === 'number' && value >= 1) {
|
|
312
|
+
this.rightMin = value;
|
|
313
|
+
}
|
|
314
|
+
return this;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Changes the hyphen character
|
|
319
|
+
* @param {string} char - New hyphen character
|
|
320
|
+
* @returns {GeorgianHyphenator} Returns this for method chaining
|
|
321
|
+
*/
|
|
322
|
+
setHyphenChar(char) {
|
|
323
|
+
if (typeof char === 'string' && char.length > 0) {
|
|
324
|
+
this.hyphenChar = char;
|
|
325
|
+
}
|
|
326
|
+
return this;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Adds a single hyphenation exception to dictionary
|
|
331
|
+
* @param {string} word - Original word
|
|
332
|
+
* @param {string} hyphenated - Hyphenated version (use '-' for breaks)
|
|
333
|
+
* @returns {GeorgianHyphenator} Returns this for method chaining
|
|
334
|
+
*/
|
|
335
|
+
addException(word, hyphenated) {
|
|
336
|
+
if (word && hyphenated) {
|
|
337
|
+
this.dictionary.set(word, hyphenated);
|
|
338
|
+
}
|
|
339
|
+
return this;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Removes a hyphenation exception from dictionary
|
|
344
|
+
* @param {string} word - Word to remove
|
|
345
|
+
* @returns {boolean} True if word was removed
|
|
346
|
+
*/
|
|
347
|
+
removeException(word) {
|
|
348
|
+
return this.dictionary.delete(word);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Exports the current dictionary as a plain object
|
|
353
|
+
* @returns {Object} Dictionary as key-value pairs
|
|
354
|
+
*/
|
|
355
|
+
exportDictionary() {
|
|
356
|
+
return Object.fromEntries(this.dictionary);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* Gets the current dictionary size
|
|
361
|
+
* @returns {number} Number of words in dictionary
|
|
362
|
+
*/
|
|
363
|
+
getDictionarySize() {
|
|
364
|
+
return this.dictionary.size;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* Adds a custom harmonic cluster
|
|
369
|
+
* @param {string} cluster - Two-character cluster (e.g., 'ბრ')
|
|
370
|
+
* @returns {GeorgianHyphenator} Returns this for method chaining
|
|
371
|
+
*/
|
|
372
|
+
addHarmonicCluster(cluster) {
|
|
373
|
+
if (typeof cluster === 'string' && cluster.length === 2) {
|
|
374
|
+
this.harmonicClusters.add(cluster);
|
|
375
|
+
}
|
|
376
|
+
return this;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
/**
|
|
380
|
+
* Removes a harmonic cluster
|
|
381
|
+
* @param {string} cluster - Cluster to remove
|
|
382
|
+
* @returns {boolean} True if cluster was removed
|
|
383
|
+
*/
|
|
384
|
+
removeHarmonicCluster(cluster) {
|
|
385
|
+
return this.harmonicClusters.delete(cluster);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
/**
|
|
389
|
+
* Gets all harmonic clusters
|
|
390
|
+
* @returns {string[]} Array of harmonic clusters
|
|
391
|
+
*/
|
|
392
|
+
getHarmonicClusters() {
|
|
393
|
+
return Array.from(this.harmonicClusters);
|
|
394
|
+
}
|
|
136
395
|
}
|
|
137
396
|
|
|
138
|
-
|
|
139
|
-
|
|
397
|
+
// Browser Global (for <script> tag without type="module")
|
|
398
|
+
if (typeof window !== 'undefined') {
|
|
399
|
+
window.GeorgianHyphenator = GeorgianHyphenator;
|
|
400
|
+
}
|
package/src/javascript/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Georgian Hyphenation Library v2.2.
|
|
2
|
+
* Georgian Hyphenation Library v2.2.7
|
|
3
3
|
* Browser + Node.js Compatible (ES Module)
|
|
4
|
+
* Enhanced with additional utility functions
|
|
4
5
|
*/
|
|
5
6
|
|
|
6
7
|
export default class GeorgianHyphenator {
|
|
@@ -54,7 +55,7 @@ export default class GeorgianHyphenator {
|
|
|
54
55
|
if (typeof window !== 'undefined' && typeof fetch !== 'undefined') {
|
|
55
56
|
try {
|
|
56
57
|
// ✅ სწორი CDN URL - jsdelivr უფრო სანდოა unpkg-ზე
|
|
57
|
-
const response = await fetch('https://cdn.jsdelivr.net/npm/georgian-hyphenation@2.2.
|
|
58
|
+
const response = await fetch('https://cdn.jsdelivr.net/npm/georgian-hyphenation@2.2.7/data/exceptions.json');
|
|
58
59
|
|
|
59
60
|
if (!response.ok) {
|
|
60
61
|
throw new Error(`HTTP ${response.status}`);
|
|
@@ -64,23 +65,23 @@ export default class GeorgianHyphenator {
|
|
|
64
65
|
this.loadLibrary(data);
|
|
65
66
|
this.dictionaryLoaded = true;
|
|
66
67
|
|
|
67
|
-
console.log(`Georgian Hyphenation v2.2.
|
|
68
|
+
console.log(`Georgian Hyphenation v2.2.7: Dictionary loaded (${this.dictionary.size} words)`);
|
|
68
69
|
} catch (error) {
|
|
69
|
-
console.warn('Georgian Hyphenation v2.2.
|
|
70
|
+
console.warn('Georgian Hyphenation v2.2.7: Dictionary not available, using algorithm only');
|
|
70
71
|
console.warn('Error:', error.message);
|
|
71
72
|
}
|
|
72
73
|
}
|
|
73
74
|
// Node.js Environment (Dynamic Import for ESM)
|
|
74
75
|
else if (typeof process !== 'undefined') {
|
|
75
76
|
try {
|
|
76
|
-
//
|
|
77
|
+
// Import from ../../data/exceptions.json (from src/javascript/ to data/)
|
|
77
78
|
const module = await import('../../data/exceptions.json', { assert: { type: 'json' } });
|
|
78
79
|
const data = module.default;
|
|
79
80
|
this.loadLibrary(data);
|
|
80
81
|
this.dictionaryLoaded = true;
|
|
81
|
-
console.log(`Georgian Hyphenation v2.2.
|
|
82
|
+
console.log(`Georgian Hyphenation v2.2.7: Dictionary loaded (${this.dictionary.size} words)`);
|
|
82
83
|
} catch (error) {
|
|
83
|
-
console.warn('Georgian Hyphenation v2.2.
|
|
84
|
+
console.warn('Georgian Hyphenation v2.2.7: Local dictionary not found, using algorithm only');
|
|
84
85
|
}
|
|
85
86
|
}
|
|
86
87
|
}
|
|
@@ -185,6 +186,212 @@ export default class GeorgianHyphenator {
|
|
|
185
186
|
return part;
|
|
186
187
|
}).join('');
|
|
187
188
|
}
|
|
189
|
+
|
|
190
|
+
// ========================================
|
|
191
|
+
// NEW UTILITY FUNCTIONS (v2.2.7)
|
|
192
|
+
// ========================================
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Removes all hyphenation from text (public method)
|
|
196
|
+
* @param {string} text - Text with hyphens to remove
|
|
197
|
+
* @returns {string} Text without hyphens
|
|
198
|
+
*/
|
|
199
|
+
unhyphenate(text) {
|
|
200
|
+
return this._stripHyphens(text);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Counts syllables in a word
|
|
205
|
+
* @param {string} word - Word to count syllables
|
|
206
|
+
* @returns {number} Number of syllables
|
|
207
|
+
*/
|
|
208
|
+
countSyllables(word) {
|
|
209
|
+
return this.getSyllables(word).length;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Gets the number of hyphenation points in a word
|
|
214
|
+
* @param {string} word - Word to analyze
|
|
215
|
+
* @returns {number} Number of hyphenation points
|
|
216
|
+
*/
|
|
217
|
+
getHyphenationPoints(word) {
|
|
218
|
+
const hyphenated = this.hyphenate(word);
|
|
219
|
+
const matches = hyphenated.match(new RegExp(this.hyphenChar, 'g'));
|
|
220
|
+
return matches ? matches.length : 0;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Checks if text contains only Georgian characters
|
|
225
|
+
* @param {string} text - Text to validate
|
|
226
|
+
* @returns {boolean} True if only Georgian characters
|
|
227
|
+
*/
|
|
228
|
+
isGeorgian(text) {
|
|
229
|
+
if (!text) return false;
|
|
230
|
+
return /^[ა-ჰ]+$/.test(text);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Checks if a word can be hyphenated (meets minimum length)
|
|
235
|
+
* @param {string} word - Word to check
|
|
236
|
+
* @returns {boolean} True if word can be hyphenated
|
|
237
|
+
*/
|
|
238
|
+
canHyphenate(word) {
|
|
239
|
+
if (!word) return false;
|
|
240
|
+
return word.length >= (this.leftMin + this.rightMin);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Hyphenates multiple words at once
|
|
245
|
+
* @param {string[]} words - Array of words to hyphenate
|
|
246
|
+
* @returns {string[]} Array of hyphenated words
|
|
247
|
+
*/
|
|
248
|
+
hyphenateWords(words) {
|
|
249
|
+
return words.map(word => this.hyphenate(word));
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Hyphenates HTML content while preserving tags
|
|
254
|
+
* Skips <script>, <style>, <code>, <pre> tags
|
|
255
|
+
* @param {string} html - HTML content to hyphenate
|
|
256
|
+
* @returns {string} Hyphenated HTML
|
|
257
|
+
*/
|
|
258
|
+
hyphenateHTML(html) {
|
|
259
|
+
if (!html) return '';
|
|
260
|
+
|
|
261
|
+
// Tags to skip entirely
|
|
262
|
+
const skipTags = ['script', 'style', 'code', 'pre', 'textarea'];
|
|
263
|
+
const skipPattern = new RegExp(`<(${skipTags.join('|')})[^>]*>.*?</\\1>`, 'gis');
|
|
264
|
+
|
|
265
|
+
// Store skipped content
|
|
266
|
+
const skipped = [];
|
|
267
|
+
let placeholder = html.replace(skipPattern, (match) => {
|
|
268
|
+
skipped.push(match);
|
|
269
|
+
return `___SKIP_${skipped.length - 1}___`;
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
// Split by tags to preserve HTML structure
|
|
273
|
+
const parts = placeholder.split(/(<[^>]+>)/);
|
|
274
|
+
|
|
275
|
+
const processed = parts.map(part => {
|
|
276
|
+
// Skip HTML tags themselves
|
|
277
|
+
if (part.startsWith('<')) {
|
|
278
|
+
return part;
|
|
279
|
+
}
|
|
280
|
+
// Process text content
|
|
281
|
+
return this.hyphenateText(part);
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
// Restore skipped content
|
|
285
|
+
let result = processed.join('');
|
|
286
|
+
skipped.forEach((content, index) => {
|
|
287
|
+
result = result.replace(`___SKIP_${index}___`, content);
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
return result;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Sets the minimum characters before first hyphen
|
|
295
|
+
* @param {number} value - Minimum left characters (default: 2)
|
|
296
|
+
* @returns {GeorgianHyphenator} Returns this for method chaining
|
|
297
|
+
*/
|
|
298
|
+
setLeftMin(value) {
|
|
299
|
+
if (typeof value === 'number' && value >= 1) {
|
|
300
|
+
this.leftMin = value;
|
|
301
|
+
}
|
|
302
|
+
return this;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Sets the minimum characters after last hyphen
|
|
307
|
+
* @param {number} value - Minimum right characters (default: 2)
|
|
308
|
+
* @returns {GeorgianHyphenator} Returns this for method chaining
|
|
309
|
+
*/
|
|
310
|
+
setRightMin(value) {
|
|
311
|
+
if (typeof value === 'number' && value >= 1) {
|
|
312
|
+
this.rightMin = value;
|
|
313
|
+
}
|
|
314
|
+
return this;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Changes the hyphen character
|
|
319
|
+
* @param {string} char - New hyphen character
|
|
320
|
+
* @returns {GeorgianHyphenator} Returns this for method chaining
|
|
321
|
+
*/
|
|
322
|
+
setHyphenChar(char) {
|
|
323
|
+
if (typeof char === 'string' && char.length > 0) {
|
|
324
|
+
this.hyphenChar = char;
|
|
325
|
+
}
|
|
326
|
+
return this;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Adds a single hyphenation exception to dictionary
|
|
331
|
+
* @param {string} word - Original word
|
|
332
|
+
* @param {string} hyphenated - Hyphenated version (use '-' for breaks)
|
|
333
|
+
* @returns {GeorgianHyphenator} Returns this for method chaining
|
|
334
|
+
*/
|
|
335
|
+
addException(word, hyphenated) {
|
|
336
|
+
if (word && hyphenated) {
|
|
337
|
+
this.dictionary.set(word, hyphenated);
|
|
338
|
+
}
|
|
339
|
+
return this;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Removes a hyphenation exception from dictionary
|
|
344
|
+
* @param {string} word - Word to remove
|
|
345
|
+
* @returns {boolean} True if word was removed
|
|
346
|
+
*/
|
|
347
|
+
removeException(word) {
|
|
348
|
+
return this.dictionary.delete(word);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Exports the current dictionary as a plain object
|
|
353
|
+
* @returns {Object} Dictionary as key-value pairs
|
|
354
|
+
*/
|
|
355
|
+
exportDictionary() {
|
|
356
|
+
return Object.fromEntries(this.dictionary);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* Gets the current dictionary size
|
|
361
|
+
* @returns {number} Number of words in dictionary
|
|
362
|
+
*/
|
|
363
|
+
getDictionarySize() {
|
|
364
|
+
return this.dictionary.size;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* Adds a custom harmonic cluster
|
|
369
|
+
* @param {string} cluster - Two-character cluster (e.g., 'ბრ')
|
|
370
|
+
* @returns {GeorgianHyphenator} Returns this for method chaining
|
|
371
|
+
*/
|
|
372
|
+
addHarmonicCluster(cluster) {
|
|
373
|
+
if (typeof cluster === 'string' && cluster.length === 2) {
|
|
374
|
+
this.harmonicClusters.add(cluster);
|
|
375
|
+
}
|
|
376
|
+
return this;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
/**
|
|
380
|
+
* Removes a harmonic cluster
|
|
381
|
+
* @param {string} cluster - Cluster to remove
|
|
382
|
+
* @returns {boolean} True if cluster was removed
|
|
383
|
+
*/
|
|
384
|
+
removeHarmonicCluster(cluster) {
|
|
385
|
+
return this.harmonicClusters.delete(cluster);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
/**
|
|
389
|
+
* Gets all harmonic clusters
|
|
390
|
+
* @returns {string[]} Array of harmonic clusters
|
|
391
|
+
*/
|
|
392
|
+
getHarmonicClusters() {
|
|
393
|
+
return Array.from(this.harmonicClusters);
|
|
394
|
+
}
|
|
188
395
|
}
|
|
189
396
|
|
|
190
397
|
// Browser Global (for <script> tag without type="module")
|