georgian-hyphenation 2.2.2 → 2.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +211 -67
- package/package.json +24 -29
- package/src/georgian_hyphenation/__init__.py +26 -0
- package/src/georgian_hyphenation/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/georgian_hyphenation/__pycache__/hyphenator.cpython-313.pyc +0 -0
- package/src/georgian_hyphenation/hyphenator.py +358 -0
- package/src/georgian_hyphenation/hyphenator.py.backup +312 -0
- package/src/georgian_hyphenation.egg-info/PKG-INFO +657 -0
- package/src/georgian_hyphenation.egg-info/SOURCES.txt +14 -0
- package/src/georgian_hyphenation.egg-info/dependency_links.txt +1 -0
- package/src/georgian_hyphenation.egg-info/requires.txt +3 -0
- package/src/georgian_hyphenation.egg-info/top_level.txt +2 -0
- package/src/javascript/index.js +60 -22
package/src/javascript/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Georgian Hyphenation Library v2.2.
|
|
3
|
-
*
|
|
2
|
+
* Georgian Hyphenation Library v2.2.3
|
|
3
|
+
* Browser + Node.js Compatible
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
export default class GeorgianHyphenator {
|
|
@@ -21,6 +21,7 @@ export default class GeorgianHyphenator {
|
|
|
21
21
|
]);
|
|
22
22
|
|
|
23
23
|
this.dictionary = new Map();
|
|
24
|
+
this.dictionaryLoaded = false;
|
|
24
25
|
}
|
|
25
26
|
|
|
26
27
|
/**
|
|
@@ -28,12 +29,13 @@ export default class GeorgianHyphenator {
|
|
|
28
29
|
*/
|
|
29
30
|
_stripHyphens(text) {
|
|
30
31
|
if (!text) return '';
|
|
31
|
-
//
|
|
32
|
-
|
|
33
|
-
const regex = new RegExp(`[\u00AD${escapedChar}]`, 'g');
|
|
34
|
-
return text.replace(regex, '');
|
|
32
|
+
// Remove soft hyphens, regular hyphens, and custom hyphen char
|
|
33
|
+
return text.replace(/[\u00AD\-]/g, '').replace(new RegExp(this.hyphenChar, 'g'), '');
|
|
35
34
|
}
|
|
36
35
|
|
|
36
|
+
/**
|
|
37
|
+
* ტვირთავს მომხმარებლის dictionary-ს
|
|
38
|
+
*/
|
|
37
39
|
loadLibrary(data) {
|
|
38
40
|
if (data && typeof data === 'object') {
|
|
39
41
|
Object.entries(data).forEach(([word, hyphenated]) => {
|
|
@@ -42,43 +44,65 @@ export default class GeorgianHyphenator {
|
|
|
42
44
|
}
|
|
43
45
|
}
|
|
44
46
|
|
|
47
|
+
/**
|
|
48
|
+
* ✅ ტვირთავს default dictionary-ს (Browser + Node.js compatible)
|
|
49
|
+
*/
|
|
45
50
|
async loadDefaultLibrary() {
|
|
46
|
-
|
|
51
|
+
if (this.dictionaryLoaded) return;
|
|
52
|
+
|
|
53
|
+
// Browser Environment
|
|
47
54
|
if (typeof window !== 'undefined' && typeof fetch !== 'undefined') {
|
|
48
55
|
try {
|
|
49
|
-
|
|
50
|
-
|
|
56
|
+
// ✅ სწორი CDN URL - jsdelivr უფრო სანდოა unpkg-ზე
|
|
57
|
+
const response = await fetch('https://cdn.jsdelivr.net/npm/georgian-hyphenation@2.2.3/data/exceptions.json');
|
|
58
|
+
|
|
59
|
+
if (!response.ok) {
|
|
60
|
+
throw new Error(`HTTP ${response.status}`);
|
|
61
|
+
}
|
|
62
|
+
|
|
51
63
|
const data = await response.json();
|
|
52
64
|
this.loadLibrary(data);
|
|
65
|
+
this.dictionaryLoaded = true;
|
|
66
|
+
|
|
67
|
+
console.log(`Georgian Hyphenation v2.2.3: Dictionary loaded (${this.dictionary.size} words)`);
|
|
53
68
|
} catch (error) {
|
|
54
|
-
console.warn(
|
|
69
|
+
console.warn('Georgian Hyphenation v2.2.3: Dictionary not available, using algorithm only');
|
|
70
|
+
console.warn('Error:', error.message);
|
|
55
71
|
}
|
|
56
|
-
}
|
|
57
|
-
//
|
|
58
|
-
else if (typeof process !== 'undefined') {
|
|
72
|
+
}
|
|
73
|
+
// Node.js Environment (Dynamic Import)
|
|
74
|
+
else if (typeof process !== 'undefined' && typeof require !== 'undefined') {
|
|
59
75
|
try {
|
|
60
|
-
//
|
|
61
|
-
const
|
|
62
|
-
assert: { type: 'json' }
|
|
63
|
-
});
|
|
76
|
+
// Try CommonJS require first
|
|
77
|
+
const data = require('../../data/exceptions.json');
|
|
64
78
|
this.loadLibrary(data);
|
|
79
|
+
this.dictionaryLoaded = true;
|
|
80
|
+
console.log(`Georgian Hyphenation v2.2.3: Dictionary loaded (${this.dictionary.size} words)`);
|
|
65
81
|
} catch (error) {
|
|
66
|
-
console.warn(
|
|
82
|
+
console.warn('Georgian Hyphenation v2.2.3: Local dictionary not found, using algorithm only');
|
|
67
83
|
}
|
|
68
84
|
}
|
|
69
85
|
}
|
|
70
86
|
|
|
87
|
+
/**
|
|
88
|
+
* ამარცვლებს ერთ სიტყვას
|
|
89
|
+
*/
|
|
71
90
|
hyphenate(word) {
|
|
72
91
|
const sanitizedWord = this._stripHyphens(word);
|
|
73
92
|
const cleanWord = sanitizedWord.replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, "");
|
|
74
93
|
|
|
94
|
+
// Dictionary check
|
|
75
95
|
if (this.dictionary.has(cleanWord)) {
|
|
76
96
|
return this.dictionary.get(cleanWord).replace(/-/g, this.hyphenChar);
|
|
77
97
|
}
|
|
78
98
|
|
|
99
|
+
// Algorithm fallback
|
|
79
100
|
return this.applyAlgorithm(sanitizedWord);
|
|
80
101
|
}
|
|
81
102
|
|
|
103
|
+
/**
|
|
104
|
+
* ალგორითმის გამოყენება
|
|
105
|
+
*/
|
|
82
106
|
applyAlgorithm(word) {
|
|
83
107
|
if (word.length < (this.leftMin + this.rightMin)) return word;
|
|
84
108
|
|
|
@@ -101,6 +125,7 @@ export default class GeorgianHyphenator {
|
|
|
101
125
|
if (distance === 0 || distance === 1) {
|
|
102
126
|
candidatePos = v1 + 1;
|
|
103
127
|
} else {
|
|
128
|
+
// Gemination check
|
|
104
129
|
let doubleConsonantIndex = -1;
|
|
105
130
|
for (let j = 0; j < betweenSubstring.length - 1; j++) {
|
|
106
131
|
if (betweenSubstring[j] === betweenSubstring[j + 1]) {
|
|
@@ -112,6 +137,7 @@ export default class GeorgianHyphenator {
|
|
|
112
137
|
if (doubleConsonantIndex !== -1) {
|
|
113
138
|
candidatePos = v1 + 1 + doubleConsonantIndex + 1;
|
|
114
139
|
} else {
|
|
140
|
+
// Harmonic cluster check
|
|
115
141
|
let breakIndex = -1;
|
|
116
142
|
if (distance >= 2) {
|
|
117
143
|
const lastTwo = betweenSubstring.substring(distance - 2, distance);
|
|
@@ -123,6 +149,7 @@ export default class GeorgianHyphenator {
|
|
|
123
149
|
}
|
|
124
150
|
}
|
|
125
151
|
|
|
152
|
+
// Anti-orphan protection
|
|
126
153
|
if (candidatePos >= this.leftMin && (word.length - candidatePos) >= this.rightMin) {
|
|
127
154
|
insertPoints.push(candidatePos);
|
|
128
155
|
}
|
|
@@ -135,10 +162,16 @@ export default class GeorgianHyphenator {
|
|
|
135
162
|
return result.join('');
|
|
136
163
|
}
|
|
137
164
|
|
|
165
|
+
/**
|
|
166
|
+
* მარცვლების მიღება მასივის სახით
|
|
167
|
+
*/
|
|
138
168
|
getSyllables(word) {
|
|
139
169
|
return this.hyphenate(word).split(this.hyphenChar);
|
|
140
170
|
}
|
|
141
171
|
|
|
172
|
+
/**
|
|
173
|
+
* მთელი ტექსტის დამარცვლა
|
|
174
|
+
*/
|
|
142
175
|
hyphenateText(text) {
|
|
143
176
|
if (!text) return '';
|
|
144
177
|
const sanitizedText = this._stripHyphens(text);
|
|
@@ -153,15 +186,20 @@ export default class GeorgianHyphenator {
|
|
|
153
186
|
}
|
|
154
187
|
}
|
|
155
188
|
|
|
156
|
-
/**
|
|
189
|
+
/**
|
|
190
|
+
* ✅ კროს-პლატფორმული Export
|
|
157
191
|
*/
|
|
158
|
-
|
|
192
|
+
|
|
193
|
+
// 1. ES Module (Modern)
|
|
194
|
+
// export default GeorgianHyphenator; (already at top)
|
|
195
|
+
|
|
196
|
+
// 2. Browser Global (for <script> tag without type="module")
|
|
159
197
|
if (typeof window !== 'undefined') {
|
|
160
198
|
window.GeorgianHyphenator = GeorgianHyphenator;
|
|
161
199
|
}
|
|
162
200
|
|
|
163
|
-
//
|
|
164
|
-
// (მხოლოდ თუ module.exports არსებობს)
|
|
201
|
+
// 3. Node.js CommonJS (for require())
|
|
165
202
|
if (typeof module !== 'undefined' && module.exports) {
|
|
166
203
|
module.exports = GeorgianHyphenator;
|
|
204
|
+
module.exports.default = GeorgianHyphenator;
|
|
167
205
|
}
|