georgian-hyphenation 2.2.1 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,358 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Georgian Hyphenation Library v2.2.1
4
+ ქართული ენის დამარცვლის ბიბლიოთეკა
5
+
6
+ Modernized & Optimized
7
+ - Hybrid Engine: Algorithm + Dictionary
8
+ - Harmonic Clusters Support
9
+ - Gemination Handling
10
+ - O(1) Cluster Lookup with Set
11
+
12
+ Author: Guram Zhgamadze
13
+ """
14
+
15
+ import json
16
+ import os
17
+ import re
18
+ from typing import List, Dict, Set
19
+
20
+
21
+ class GeorgianHyphenator:
22
+ """
23
+ Georgian language hyphenation with hybrid engine
24
+
25
+ Features:
26
+ - Phonological distance analysis
27
+ - Dictionary-based exception handling
28
+ - Harmonic cluster awareness
29
+ - Gemination (double consonant) handling
30
+ - Anti-orphan protection
31
+ """
32
+
33
+ def __init__(self, hyphen_char: str = '\u00AD'):
34
+ """
35
+ Initialize Georgian Hyphenator
36
+
37
+ Args:
38
+ hyphen_char: Character to use for hyphenation (default: soft hyphen U+00AD)
39
+ """
40
+ self.hyphen_char = hyphen_char
41
+ self.vowels = 'აეიოუ'
42
+ self.left_min = 2
43
+ self.right_min = 2
44
+
45
+ # v2.2.1: Optimized - Set for O(1) lookup instead of list
46
+ self.harmonic_clusters: Set[str] = {
47
+ 'ბლ', 'ბრ', 'ბღ', 'ბზ', 'გდ', 'გლ', 'გმ', 'გნ', 'გვ', 'გზ', 'გრ',
48
+ 'დრ', 'თლ', 'თრ', 'თღ', 'კლ', 'კმ', 'კნ', 'კრ', 'კვ', 'მტ', 'პლ',
49
+ 'პრ', 'ჟღ', 'რგ', 'რლ', 'რმ', 'სწ', 'სხ', 'ტკ', 'ტპ', 'ტრ', 'ფლ',
50
+ 'ფრ', 'ფქ', 'ფშ', 'ქლ', 'ქნ', 'ქვ', 'ქრ', 'ღლ', 'ღრ', 'ყლ', 'ყრ',
51
+ 'შთ', 'შპ', 'ჩქ', 'ჩრ', 'ცლ', 'ცნ', 'ცრ', 'ცვ', 'ძგ', 'ძვ', 'ძღ',
52
+ 'წლ', 'წრ', 'წნ', 'წკ', 'ჭკ', 'ჭრ', 'ჭყ', 'ხლ', 'ხმ', 'ხნ', 'ხვ', 'ჯგ'
53
+ }
54
+
55
+ # v2.2.1: Dictionary for exception words
56
+ self.dictionary: Dict[str, str] = {}
57
+
58
+ def _strip_hyphens(self, text: str) -> str:
59
+ """
60
+ Remove existing hyphenation symbols (Sanitization)
61
+
62
+ Args:
63
+ text: Input text
64
+
65
+ Returns:
66
+ Text without any hyphens
67
+ """
68
+ if not text:
69
+ return ''
70
+ # Remove soft hyphens and visible hyphens
71
+ return text.replace('\u00AD', '').replace(self.hyphen_char, '').replace('-', '')
72
+
73
+ def load_library(self, data: Dict[str, str]) -> None:
74
+ """
75
+ Load custom dictionary
76
+
77
+ Args:
78
+ data: Dictionary mapping words to their hyphenation
79
+ Example: {"საქართველო": "სა-ქარ-თვე-ლო"}
80
+ """
81
+ if data and isinstance(data, dict):
82
+ self.dictionary.update(data)
83
+
84
+ def load_default_library(self) -> None:
85
+ """
86
+ Load default exceptions dictionary from data/exceptions.json
87
+
88
+ Works in both development and installed package modes.
89
+ Tries multiple locations to find the data file.
90
+ """
91
+ try:
92
+ package_dir = os.path.dirname(__file__)
93
+
94
+ # Try multiple possible locations
95
+ locations = [
96
+ # Development mode (root data/ folder)
97
+ os.path.join(package_dir, '..', '..', 'data', 'exceptions.json'),
98
+ # Installed via pip (data/ copied to site-packages)
99
+ os.path.join(os.path.dirname(package_dir), 'data', 'exceptions.json'),
100
+ # Alternative installed location
101
+ os.path.join(package_dir, 'data', 'exceptions.json'),
102
+ ]
103
+
104
+ data_file = None
105
+ for loc in locations:
106
+ abs_loc = os.path.abspath(loc)
107
+ if os.path.exists(abs_loc):
108
+ data_file = abs_loc
109
+ break
110
+
111
+ if data_file:
112
+ with open(data_file, 'r', encoding='utf-8') as f:
113
+ data = json.load(f)
114
+ self.load_library(data)
115
+ print(f"Georgian Hyphenation v2.2.1: Dictionary loaded ({len(self.dictionary)} words)")
116
+ else:
117
+ print("Georgian Hyphenation v2.2.1: Dictionary not found, using algorithm only")
118
+
119
+ except Exception as e:
120
+ print(f"Georgian Hyphenation v2.2.1: Could not load dictionary ({e}), using algorithm only")
121
+
122
+ def hyphenate(self, word: str) -> str:
123
+ """
124
+ Hyphenate a Georgian word
125
+
126
+ v2.2.1 Behavior: Always strip existing hyphens and re-hyphenate.
127
+ This corrects any previously incorrect hyphenation.
128
+
129
+ Args:
130
+ word: Georgian word to hyphenate
131
+
132
+ Returns:
133
+ Hyphenated word with configured hyphen character
134
+ """
135
+ # v2.2.1: Always strip existing hyphens first (sanitization)
136
+ sanitized_word = self._strip_hyphens(word)
137
+
138
+ # Remove punctuation for dictionary lookup
139
+ clean_word = re.sub(r'[.,/#!$%^&*;:{}=\-_`~()]', '', sanitized_word)
140
+
141
+ # Check dictionary first (if available)
142
+ if clean_word in self.dictionary:
143
+ return self.dictionary[clean_word].replace('-', self.hyphen_char)
144
+
145
+ # Fallback to algorithm
146
+ return self.apply_algorithm(sanitized_word)
147
+
148
+ def apply_algorithm(self, word: str) -> str:
149
+ """
150
+ Apply hyphenation algorithm
151
+
152
+ v2.2.1 Algorithm Features:
153
+ - Vowel-based syllable detection
154
+ - Gemination (double consonant) handling
155
+ - Harmonic cluster preservation
156
+ - Anti-orphan protection (leftMin=2, rightMin=2)
157
+
158
+ Args:
159
+ word: Word to hyphenate
160
+
161
+ Returns:
162
+ Hyphenated word
163
+ """
164
+ # Skip short words
165
+ if len(word) < (self.left_min + self.right_min):
166
+ return word
167
+
168
+ # Find all vowel positions
169
+ vowel_indices = [i for i, char in enumerate(word) if char in self.vowels]
170
+
171
+ # Need at least 2 vowels for hyphenation
172
+ if len(vowel_indices) < 2:
173
+ return word
174
+
175
+ insert_points = []
176
+
177
+ # Analyze each vowel pair
178
+ for i in range(len(vowel_indices) - 1):
179
+ v1 = vowel_indices[i]
180
+ v2 = vowel_indices[i + 1]
181
+ distance = v2 - v1 - 1 # Number of consonants between vowels
182
+ between_substring = word[v1 + 1:v2]
183
+
184
+ candidate_pos = -1
185
+
186
+ if distance == 0:
187
+ # V-V: Split between vowels (გა-ა-ნა-ლი-ზა)
188
+ candidate_pos = v1 + 1
189
+ elif distance == 1:
190
+ # V-C-V: Split after vowel (მა-მა)
191
+ candidate_pos = v1 + 1
192
+ else:
193
+ # V-CC...C-V: Complex case
194
+
195
+ # v2.2.1: Check for gemination (double consonants)
196
+ double_consonant_index = -1
197
+ for j in range(len(between_substring) - 1):
198
+ if between_substring[j] == between_substring[j + 1]:
199
+ double_consonant_index = j
200
+ break
201
+
202
+ if double_consonant_index != -1:
203
+ # Split between double consonants (კლას-სი, მას-სა)
204
+ candidate_pos = v1 + 1 + double_consonant_index + 1
205
+ else:
206
+ # v2.2.1: Check for harmonic clusters
207
+ break_index = -1
208
+ if distance >= 2:
209
+ last_two = between_substring[distance - 2:distance]
210
+ if last_two in self.harmonic_clusters:
211
+ break_index = distance - 2
212
+
213
+ if break_index != -1:
214
+ # Split before harmonic cluster (ას-ტრო-ნო-მი-ა)
215
+ candidate_pos = v1 + 1 + break_index
216
+ else:
217
+ # Default: split after first consonant (ბარ-ბა-რე)
218
+ candidate_pos = v1 + 2
219
+
220
+ # Anti-orphan protection: ensure minimum 2 chars on each side
221
+ if candidate_pos >= self.left_min and (len(word) - candidate_pos) >= self.right_min:
222
+ insert_points.append(candidate_pos)
223
+
224
+ # Insert hyphens (from right to left to maintain positions)
225
+ result = list(word)
226
+ for pos in reversed(insert_points):
227
+ result.insert(pos, self.hyphen_char)
228
+
229
+ return ''.join(result)
230
+
231
+ def get_syllables(self, word: str) -> List[str]:
232
+ """
233
+ Get syllables as a list
234
+
235
+ Args:
236
+ word: Word to split into syllables
237
+
238
+ Returns:
239
+ List of syllables without hyphen characters
240
+ """
241
+ hyphenated = self.hyphenate(word)
242
+ return hyphenated.split(self.hyphen_char)
243
+
244
+ def hyphenate_text(self, text: str) -> str:
245
+ """
246
+ Hyphenate entire Georgian text
247
+
248
+ Preserves:
249
+ - Punctuation
250
+ - Non-Georgian characters
251
+ - Word boundaries
252
+ - Whitespace
253
+
254
+ v2.2.1: Strips existing hyphens from entire text first
255
+
256
+ Args:
257
+ text: Text to hyphenate (can contain multiple words)
258
+
259
+ Returns:
260
+ Hyphenated text
261
+ """
262
+ if not text:
263
+ return ''
264
+
265
+ # v2.2.1: Strip existing hyphens from entire text
266
+ sanitized_text = self._strip_hyphens(text)
267
+
268
+ # Split text into Georgian words and other characters
269
+ # Pattern captures Georgian letter sequences
270
+ parts = re.split(r'([ა-ჰ]+)', sanitized_text)
271
+
272
+ result = []
273
+ for part in parts:
274
+ # Only hyphenate Georgian words with 4+ characters
275
+ if len(part) >= 4 and re.search(r'[ა-ჰ]', part):
276
+ result.append(self.hyphenate(part))
277
+ else:
278
+ result.append(part)
279
+
280
+ return ''.join(result)
281
+
282
+
283
+ # Convenience functions for backward compatibility and quick usage
284
+
285
+ def hyphenate(word: str, hyphen_char: str = '\u00AD') -> str:
286
+ """
287
+ Hyphenate a single Georgian word
288
+
289
+ Args:
290
+ word: Georgian word
291
+ hyphen_char: Hyphen character to use
292
+
293
+ Returns:
294
+ Hyphenated word
295
+ """
296
+ h = GeorgianHyphenator(hyphen_char)
297
+ return h.hyphenate(word)
298
+
299
+
300
+ def get_syllables(word: str) -> List[str]:
301
+ """
302
+ Get syllables of a Georgian word
303
+
304
+ Args:
305
+ word: Georgian word
306
+
307
+ Returns:
308
+ List of syllables
309
+ """
310
+ h = GeorgianHyphenator('-')
311
+ return h.get_syllables(word)
312
+
313
+
314
+ def hyphenate_text(text: str, hyphen_char: str = '\u00AD') -> str:
315
+ """
316
+ Hyphenate Georgian text
317
+
318
+ Args:
319
+ text: Text containing Georgian words
320
+ hyphen_char: Hyphen character to use
321
+
322
+ Returns:
323
+ Hyphenated text
324
+ """
325
+ h = GeorgianHyphenator(hyphen_char)
326
+ return h.hyphenate_text(text)
327
+
328
+
329
+ # Export format converters (v2.0 compatibility)
330
+
331
+ def to_tex_pattern(word: str) -> str:
332
+ """
333
+ Convert to TeX hyphenation pattern format
334
+
335
+ Args:
336
+ word: Georgian word
337
+
338
+ Returns:
339
+ TeX pattern (e.g., ".სა1ქარ1თვე1ლო.")
340
+ """
341
+ h = GeorgianHyphenator('-')
342
+ syllables = h.get_syllables(word)
343
+ return '.' + '1'.join(syllables) + '.'
344
+
345
+
346
+ def to_hunspell_format(word: str) -> str:
347
+ """
348
+ Convert to Hunspell hyphenation format
349
+
350
+ Args:
351
+ word: Georgian word
352
+
353
+ Returns:
354
+ Hunspell format (e.g., "სა=ქარ=თვე=ლო")
355
+ """
356
+ h = GeorgianHyphenator('-')
357
+ hyphenated = h.hyphenate(word)
358
+ return hyphenated.replace('-', '=')
@@ -0,0 +1,312 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Georgian Language Hyphenation Library
4
+ ქართული ენის დამარცვლის ბიბლიოთეკა
5
+
6
+ Supports multiple output formats:
7
+ - Soft hyphens for web/documents
8
+ - TeX hyphenation patterns
9
+ - Hunspell dictionary format
10
+ - Plain syllable lists
11
+ """
12
+
13
+ import re
14
+ from functools import reduce
15
+ from typing import List, Dict, Optional
16
+ import json
17
+
18
+
19
+ class GeorgianHyphenator:
20
+ """
21
+ Main hyphenation class for Georgian language
22
+ ქართული ენის დამარცვლის ძირითადი კლასი
23
+ """
24
+
25
+ def __init__(self, hyphen_char: str = '\u00AD'):
26
+ """
27
+ Initialize hyphenator with specified hyphen character
28
+
29
+ Args:
30
+ hyphen_char: Character to use for hyphenation points
31
+ Default is soft hyphen (U+00AD)
32
+ """
33
+ self.hyphen_char = hyphen_char
34
+ self.C = '[ბგდვზთკლმნპჟრსტფქღყშჩცძწჭხჯჰ]' # Consonants
35
+ self.V = '[აეიოუ]' # Vowels
36
+ self.char = '[ა-ჰ]' # All Georgian letters
37
+
38
+ def count_vowels(self, word: str) -> int:
39
+ """Count vowels in a word"""
40
+ vowel_counts = [word.count(x) for x in "აეიოუ"]
41
+ return reduce(lambda x, y: x + y, vowel_counts, 0)
42
+
43
+ def hyphenate(self, word: str) -> str:
44
+ """
45
+ Hyphenate a single Georgian word
46
+
47
+ Args:
48
+ word: Georgian word to hyphenate
49
+
50
+ Returns:
51
+ Word with hyphenation points inserted
52
+ """
53
+ # Don't hyphenate words with 0-1 vowels
54
+ if self.count_vowels(word) <= 1:
55
+ return word
56
+
57
+ softhpn = self.hyphen_char
58
+
59
+ # Apply hyphenation rules with different boundary markers
60
+ result = self._apply_rules(word, softhpn, '^', '$')
61
+ result = self._apply_rules(result, softhpn, '^', softhpn)
62
+ result = self._apply_rules(result, softhpn, softhpn, '$')
63
+ result = self._apply_rules(result, softhpn, softhpn, softhpn)
64
+
65
+ # Remove duplicate hyphens
66
+ result = re.sub(f"{re.escape(softhpn)}+", softhpn, result, flags=re.U)
67
+
68
+ return result
69
+
70
+ def _apply_rules(self, w: str, softhpn: str, startchar: str, endchar: str) -> str:
71
+ """Apply hyphenation regex rules"""
72
+ C, V, char = self.C, self.V, self.char
73
+
74
+ # Rule 1: V+C+C++V → VC|CV
75
+ t = re.sub(f"({V})({C})({C}+)({V})",
76
+ rf"\1\2{softhpn}\3\4", w, flags=re.U)
77
+
78
+ # Rule 2: V+C+V+C+V → VCV|CV
79
+ t = re.sub(f"({V})({C})({V})({C})({V})",
80
+ rf"\1\2\3{softhpn}\4\5", t, flags=re.U)
81
+
82
+ # Rule 3: C+V+C+V → CV|CV
83
+ t = re.sub(f"({C})({V})({C})({V})",
84
+ rf"\1\2{softhpn}\3\4", t, flags=re.U)
85
+
86
+ # Rule 4: V+V+V → VV|V
87
+ t = re.sub(f"({V})({V})({V})",
88
+ rf"\1\2{softhpn}\3", t, flags=re.U)
89
+
90
+ # Rule 5: Word start - ^VCVCV
91
+ t = re.sub(f"{startchar}({V})({C})({V})({C})({V})",
92
+ rf"\1\2\3{softhpn}\4\5", t, flags=re.U)
93
+
94
+ # Rule 6: Word start - ^VCVCchar
95
+ t = re.sub(f"{startchar}({V})({C})({V})({C})({char})",
96
+ rf"\1\2\3{softhpn}\4\5", t, flags=re.U)
97
+
98
+ # Rule 7: Word start - ^C++CVCV
99
+ t = re.sub(f"{startchar}({C}+)({V})({C})({V})",
100
+ rf"\1\2{softhpn}\3\4", t, flags=re.U)
101
+
102
+ # Rule 8: Word start - ^C++VVchar
103
+ t = re.sub(f"{startchar}({C}+)({V})({V})({char})",
104
+ rf"\1\2{softhpn}\3\4", t, flags=re.U)
105
+
106
+ # Rule 9: Word end - charVVC++$
107
+ t = re.sub(f"({char})({V})({V})({C}+){endchar}",
108
+ rf"\1\2{softhpn}\3\4", t, flags=re.U)
109
+
110
+ # Rule 10: Word end - charVCV$
111
+ t = re.sub(f"({char})({V})({C})({V}){endchar}",
112
+ rf"\1\2{softhpn}\3\4", t, flags=re.U)
113
+
114
+ # Rule 11: Word end - VCC++VC++$
115
+ t = re.sub(f"({V})({C})({C}+)({V})({C}+){endchar}",
116
+ rf"\1\2{softhpn}\3\4\5", t, flags=re.U)
117
+
118
+ # Rule 12: Word end - charVCVC++$
119
+ t = re.sub(f"({char})({V})({C})({V}+)({C}+){endchar}",
120
+ rf"\1\2{softhpn}\3\4\5", t, flags=re.U)
121
+
122
+ return t
123
+
124
+ def get_syllables(self, word: str) -> List[str]:
125
+ """
126
+ Get list of syllables for a word
127
+
128
+ Args:
129
+ word: Georgian word
130
+
131
+ Returns:
132
+ List of syllables
133
+ """
134
+ hyphenated = self.hyphenate(word)
135
+ return hyphenated.split(self.hyphen_char)
136
+
137
+ def hyphenate_text(self, text: str) -> str:
138
+ """
139
+ Hyphenate entire text
140
+
141
+ Args:
142
+ text: Georgian text
143
+
144
+ Returns:
145
+ Hyphenated text
146
+ """
147
+ words = text.split(' ')
148
+ hyphenated_words = [self.hyphenate(w) for w in words]
149
+ return ' '.join(hyphenated_words)
150
+
151
+
152
+ class TeXPatternGenerator:
153
+ """Generate TeX hyphenation patterns"""
154
+
155
+ def __init__(self, hyphenator: GeorgianHyphenator):
156
+ self.hyphenator = hyphenator
157
+
158
+ def word_to_pattern(self, word: str) -> str:
159
+ """
160
+ Convert a word to TeX pattern format
161
+
162
+ Example: საქართველო → .სა1ქარ1თვე1ლო
163
+ """
164
+ syllables = self.hyphenator.get_syllables(word)
165
+ if len(syllables) <= 1:
166
+ return f".{word}"
167
+ return "." + "1".join(syllables)
168
+
169
+ def generate_patterns_file(self, words: List[str], output_file: str):
170
+ """
171
+ Generate complete TeX patterns file
172
+
173
+ Args:
174
+ words: List of Georgian words
175
+ output_file: Path to output .tex file
176
+ """
177
+ patterns = [self.word_to_pattern(w) for w in words]
178
+
179
+ with open(output_file, 'w', encoding='utf-8') as f:
180
+ f.write("% Georgian hyphenation patterns\n")
181
+ f.write("% ქართული დამარცვლის პატერნები\n")
182
+ f.write("% Generated automatically\n\n")
183
+ f.write("\\patterns{\n")
184
+ for pattern in sorted(set(patterns)):
185
+ f.write(f" {pattern}\n")
186
+ f.write("}\n")
187
+
188
+ print(f"TeX patterns saved to {output_file}")
189
+
190
+
191
+ class HunspellDictionaryGenerator:
192
+ """Generate Hunspell dictionary format"""
193
+
194
+ def __init__(self, hyphenator: GeorgianHyphenator):
195
+ self.hyphenator = hyphenator
196
+
197
+ def word_to_hunspell(self, word: str) -> str:
198
+ """
199
+ Convert word to Hunspell format
200
+
201
+ Example: საქართველო → სა=ქარ=თვე=ლო
202
+ """
203
+ syllables = self.hyphenator.get_syllables(word)
204
+ return "=".join(syllables)
205
+
206
+ def generate_dictionary(self, words: List[str], output_prefix: str):
207
+ """
208
+ Generate Hunspell .dic file
209
+
210
+ Args:
211
+ words: List of Georgian words
212
+ output_prefix: Prefix for output files (e.g., 'hyph_ka_GE')
213
+ """
214
+ dic_file = f"{output_prefix}.dic"
215
+
216
+ with open(dic_file, 'w', encoding='utf-8') as f:
217
+ # Header with word count
218
+ f.write(f"UTF-8\n{len(words)}\n")
219
+ for word in words:
220
+ f.write(self.word_to_hunspell(word) + "\n")
221
+
222
+ print(f"Hunspell dictionary saved to {dic_file}")
223
+
224
+
225
+ class HyphenationExporter:
226
+ """Export hyphenation data in various formats"""
227
+
228
+ def __init__(self, hyphenator: GeorgianHyphenator):
229
+ self.hyphenator = hyphenator
230
+
231
+ def export_json(self, words: List[str], output_file: str):
232
+ """Export as JSON for JavaScript usage"""
233
+ data = {}
234
+ for word in words:
235
+ data[word] = {
236
+ "syllables": self.hyphenator.get_syllables(word),
237
+ "hyphenated": self.hyphenator.hyphenate(word)
238
+ }
239
+
240
+ with open(output_file, 'w', encoding='utf-8') as f:
241
+ json.dump(data, f, ensure_ascii=False, indent=2)
242
+
243
+ print(f"JSON export saved to {output_file}")
244
+
245
+ def export_csv(self, words: List[str], output_file: str):
246
+ """Export as CSV"""
247
+ import csv
248
+
249
+ with open(output_file, 'w', encoding='utf-8', newline='') as f:
250
+ writer = csv.writer(f)
251
+ writer.writerow(['word', 'syllables', 'syllable_count'])
252
+
253
+ for word in words:
254
+ syllables = self.hyphenator.get_syllables(word)
255
+ writer.writerow([word, '-'.join(syllables), len(syllables)])
256
+
257
+ print(f"CSV export saved to {output_file}")
258
+
259
+
260
+ # Test and demonstration
261
+ if __name__ == "__main__":
262
+ # Initialize hyphenator
263
+ hyphenator = GeorgianHyphenator()
264
+
265
+ # Test words
266
+ test_words = [
267
+ "საქართველო",
268
+ "მთავრობა",
269
+ "დედაქალაქი",
270
+ "ტელევიზორი",
271
+ "კომპიუტერი",
272
+ "უნივერსიტეტი",
273
+ "პარლამენტი",
274
+ "დამოუკიდებლობა",
275
+ "განათლება",
276
+ "ეკონომიკა"
277
+ ]
278
+
279
+ print("=" * 60)
280
+ print("Georgian Hyphenation Examples")
281
+ print("ქართული დამარცვლის მაგალითები")
282
+ print("=" * 60)
283
+ print()
284
+
285
+ # Test basic hyphenation with visible hyphens
286
+ visible_hyphenator = GeorgianHyphenator('-')
287
+ for word in test_words:
288
+ syllables = visible_hyphenator.get_syllables(word)
289
+ hyphenated = visible_hyphenator.hyphenate(word)
290
+ print(f"{word:20} → {hyphenated:25} [{len(syllables)} syllables]")
291
+
292
+ print("\n" + "=" * 60)
293
+ print("Generating export files...")
294
+ print("=" * 60)
295
+ print()
296
+
297
+ # Generate TeX patterns
298
+ tex_gen = TeXPatternGenerator(hyphenator)
299
+ tex_gen.generate_patterns_file(test_words, "hyph-ka.tex")
300
+
301
+ # Generate Hunspell dictionary
302
+ hunspell_gen = HunspellDictionaryGenerator(hyphenator)
303
+ hunspell_gen.generate_dictionary(test_words, "hyph_ka_GE")
304
+
305
+ # Generate exports
306
+ exporter = HyphenationExporter(hyphenator)
307
+ exporter.export_json(test_words, "georgian_hyphenation.json")
308
+ exporter.export_csv(test_words, "georgian_hyphenation.csv")
309
+
310
+ print("\n" + "=" * 60)
311
+ print("All files generated successfully!")
312
+ print("=" * 60)