greek-name-correction 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -4
- package/index.js +3 -556
- package/package.json +53 -16
- package/src/cases.js +322 -0
- package/src/constants.js +326 -0
- package/src/index.js +251 -0
- package/src/transliteration.js +64 -0
- package/src/utils.js +233 -0
- package/src/validation.js +60 -0
- package/text.js +0 -113
package/README.md
CHANGED
|
@@ -494,7 +494,7 @@ Contributions are welcome! Please feel free to submit a Pull Request. For major
|
|
|
494
494
|
### Development
|
|
495
495
|
```bash
|
|
496
496
|
# Clone the repository
|
|
497
|
-
git clone https://github.com/
|
|
497
|
+
git clone https://github.com/sraftopo/greek-name-correction.git
|
|
498
498
|
|
|
499
499
|
# Install dependencies (none currently!)
|
|
500
500
|
npm install
|
|
@@ -522,7 +522,13 @@ The test suite covers:
|
|
|
522
522
|
|
|
523
523
|
## Changelog
|
|
524
524
|
|
|
525
|
-
### Version 2.
|
|
525
|
+
### Version 2.1.0 (Current)
|
|
526
|
+
- 🏗️ **Modular Architecture** - Complete codebase refactoring into logical modules
|
|
527
|
+
- 📦 **Improved Structure** - Separated into `transliteration.js`, `cases.js`, `validation.js`, `utils.js`, `constants.js`
|
|
528
|
+
- 🔧 **Better Maintainability** - Clean separation of concerns for easier testing and development
|
|
529
|
+
- ✨ **Enhanced Code Organization** - Each module has a single, clear responsibility
|
|
530
|
+
|
|
531
|
+
### Version 2.0.0
|
|
526
532
|
- ✨ Added transliteration support (Greeklish ↔ Greek ↔ Latin)
|
|
527
533
|
- ✨ Added genitive case conversion
|
|
528
534
|
- ✨ Added diminutive detection
|
|
@@ -548,7 +554,7 @@ MIT © Stavros
|
|
|
548
554
|
|
|
549
555
|
## Support
|
|
550
556
|
|
|
551
|
-
For bugs, questions, and discussions please use the [GitHub Issues](https://github.com/
|
|
557
|
+
For bugs, questions, and discussions please use the [GitHub Issues](https://github.com/sraftopo/greek-name-correction/issues).
|
|
552
558
|
|
|
553
559
|
## Acknowledgments
|
|
554
560
|
|
|
@@ -565,4 +571,4 @@ Special thanks to all contributors and users who help improve this library.
|
|
|
565
571
|
|
|
566
572
|
**Made in Greece 🇬🇷**
|
|
567
573
|
|
|
568
|
-
If you find this library helpful, please consider giving it a ⭐️ on GitHub!
|
|
574
|
+
If you find this library helpful, please consider giving it a ⭐️ on GitHub!
|
package/index.js
CHANGED
|
@@ -1,557 +1,4 @@
|
|
|
1
|
-
// index.js
|
|
2
|
-
|
|
1
|
+
// index.js - Backward compatibility re-export
|
|
2
|
+
"use strict";
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
* GreekNameCorrection - A zero-dependency library for correcting and formatting Greek names
|
|
6
|
-
* @param {string|string[]|Object|Object[]} input - Name(s) to correct
|
|
7
|
-
* @param {Object} options - Configuration options
|
|
8
|
-
* @returns {string|string[]|Object|Object[]} Corrected name(s) in same format as input
|
|
9
|
-
*/
|
|
10
|
-
function GreekNameCorrection(input, options = {}) {
|
|
11
|
-
const defaults = {
|
|
12
|
-
jsonKey: 'fullname',
|
|
13
|
-
preserveOriginal: false,
|
|
14
|
-
outputKey: 'correctedFullname',
|
|
15
|
-
splitNames: true,
|
|
16
|
-
detectGender: false,
|
|
17
|
-
normalizeTonotics: true,
|
|
18
|
-
handleDiacritics: true,
|
|
19
|
-
strictMode: false,
|
|
20
|
-
removeExtraSpaces: true,
|
|
21
|
-
handleParticles: true,
|
|
22
|
-
// New features
|
|
23
|
-
convertToGenitive: false,
|
|
24
|
-
transliterate: null, // 'greeklish-to-greek', 'greek-to-latin', 'greek-to-greeklish'
|
|
25
|
-
detectDiminutive: false,
|
|
26
|
-
handleTitles: true,
|
|
27
|
-
suggestCorrections: false,
|
|
28
|
-
recognizeKatharevousa: false,
|
|
29
|
-
databaseSafe: false,
|
|
30
|
-
generateSortKey: false,
|
|
31
|
-
statistics: false
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
const config = { ...defaults, ...options };
|
|
35
|
-
|
|
36
|
-
// Transliteration maps
|
|
37
|
-
const greeklishToGreekMap = {
|
|
38
|
-
'a': 'α', 'b': 'β', 'g': 'γ', 'd': 'δ', 'e': 'ε', 'z': 'ζ',
|
|
39
|
-
'h': 'η', 'th': 'θ', 'i': 'ι', 'k': 'κ', 'l': 'λ', 'm': 'μ',
|
|
40
|
-
'n': 'ν', 'x': 'ξ', 'o': 'ο', 'p': 'π', 'r': 'ρ', 's': 'σ',
|
|
41
|
-
't': 'τ', 'y': 'υ', 'f': 'φ', 'ch': 'χ', 'ps': 'ψ', 'w': 'ω',
|
|
42
|
-
'ks': 'ξ', 'ou': 'ου', 'ai': 'αι', 'ei': 'ει', 'oi': 'οι',
|
|
43
|
-
'ui': 'υι', 'au': 'αυ', 'eu': 'ευ', 'ou': 'ου', 'iu': 'ιου',
|
|
44
|
-
'mp': 'μπ', 'nt': 'ντ', 'gk': 'γκ', 'gg': 'γγ', 'ts': 'τσ',
|
|
45
|
-
'tz': 'τζ', 'dz': 'ντζ'
|
|
46
|
-
};
|
|
47
|
-
|
|
48
|
-
const greekToLatinMap = {
|
|
49
|
-
'α': 'a', 'ά': 'a', 'β': 'v', 'γ': 'g', 'δ': 'd', 'ε': 'e', 'έ': 'e',
|
|
50
|
-
'ζ': 'z', 'η': 'i', 'ή': 'i', 'θ': 'th', 'ι': 'i', 'ί': 'i', 'ϊ': 'i', 'ΐ': 'i',
|
|
51
|
-
'κ': 'k', 'λ': 'l', 'μ': 'm', 'ν': 'n', 'ξ': 'x', 'ο': 'o', 'ό': 'o',
|
|
52
|
-
'π': 'p', 'ρ': 'r', 'σ': 's', 'ς': 's', 'τ': 't', 'υ': 'y', 'ύ': 'y', 'ϋ': 'y', 'ΰ': 'y',
|
|
53
|
-
'φ': 'f', 'χ': 'ch', 'ψ': 'ps', 'ω': 'o', 'ώ': 'o',
|
|
54
|
-
'Α': 'A', 'Ά': 'A', 'Β': 'V', 'Γ': 'G', 'Δ': 'D', 'Ε': 'E', 'Έ': 'E',
|
|
55
|
-
'Ζ': 'Z', 'Η': 'I', 'Ή': 'I', 'Θ': 'Th', 'Ι': 'I', 'Ί': 'I', 'Ϊ': 'I',
|
|
56
|
-
'Κ': 'K', 'Λ': 'L', 'Μ': 'M', 'Ν': 'N', 'Ξ': 'X', 'Ο': 'O', 'Ό': 'O',
|
|
57
|
-
'Π': 'P', 'Ρ': 'R', 'Σ': 'S', 'Τ': 'T', 'Υ': 'Y', 'Ύ': 'Y', 'Ϋ': 'Y',
|
|
58
|
-
'Φ': 'F', 'Χ': 'Ch', 'Ψ': 'Ps', 'Ω': 'O', 'Ώ': 'O'
|
|
59
|
-
};
|
|
60
|
-
|
|
61
|
-
const greekToGreeklishMap = {
|
|
62
|
-
'α': 'a', 'ά': 'a', 'β': 'v', 'γ': 'g', 'δ': 'd', 'ε': 'e', 'έ': 'e',
|
|
63
|
-
'ζ': 'z', 'η': 'i', 'ή': 'i', 'θ': 'th', 'ι': 'i', 'ί': 'i', 'ϊ': 'i', 'ΐ': 'i',
|
|
64
|
-
'κ': 'k', 'λ': 'l', 'μ': 'm', 'ν': 'n', 'ξ': 'ks', 'ο': 'o', 'ό': 'o',
|
|
65
|
-
'π': 'p', 'ρ': 'r', 'σ': 's', 'ς': 's', 'τ': 't', 'υ': 'u', 'ύ': 'u', 'ϋ': 'u', 'ΰ': 'u',
|
|
66
|
-
'φ': 'f', 'χ': 'x', 'ψ': 'ps', 'ω': 'w', 'ώ': 'w',
|
|
67
|
-
'Α': 'A', 'Ά': 'A', 'Β': 'V', 'Γ': 'G', 'Δ': 'D', 'Ε': 'E', 'Έ': 'E',
|
|
68
|
-
'Ζ': 'Z', 'Η': 'I', 'Ή': 'I', 'Θ': 'Th', 'Ι': 'I', 'Ί': 'I', 'Ϊ': 'I',
|
|
69
|
-
'Κ': 'K', 'Λ': 'L', 'Μ': 'M', 'Ν': 'N', 'Ξ': 'Ks', 'Ο': 'O', 'Ό': 'O',
|
|
70
|
-
'Π': 'P', 'Ρ': 'R', 'Σ': 'S', 'Τ': 'T', 'Υ': 'U', 'Ύ': 'U', 'Ϋ': 'U',
|
|
71
|
-
'Φ': 'F', 'Χ': 'X', 'Ψ': 'Ps', 'Ω': 'W', 'Ώ': 'W'
|
|
72
|
-
};
|
|
73
|
-
|
|
74
|
-
// Diminutive patterns
|
|
75
|
-
const diminutivePatterns = {
|
|
76
|
-
'άκης': 'ας/ης', 'ούλης': 'ος', 'ίτσα': 'α', 'ούλα': 'α',
|
|
77
|
-
'άκι': 'ο', 'ούλι': 'ο', 'ίτσας': 'ας', 'ούλας': 'ας',
|
|
78
|
-
'ίκος': 'ος', 'ούκος': 'ος', 'ίκη': 'η', 'ούκα': 'α'
|
|
79
|
-
};
|
|
80
|
-
|
|
81
|
-
// Common Greek titles
|
|
82
|
-
const titles = [
|
|
83
|
-
'Κος', 'Κα', 'Δις', 'Κυρ', 'Κυρία', 'Κύριος', 'Δεσποινίς',
|
|
84
|
-
'Δρ', 'Καθ', 'Καθηγητής', 'Καθηγήτρια', 'Πρωθυπουργός',
|
|
85
|
-
'Υπουργός', 'Βουλευτής', 'Δήμαρχος', 'Περιφερειάρχης',
|
|
86
|
-
'Αρχιεπίσκοπος', 'Μητροπολίτης', 'Επίσκοπος', 'Πατήρ',
|
|
87
|
-
'Στρατηγός', 'Ταξίαρχος', 'Συνταγματάρχης', 'Αντισυνταγματάρχης'
|
|
88
|
-
];
|
|
89
|
-
|
|
90
|
-
// Katharevousa endings to modern Greek
|
|
91
|
-
const katharevousaMap = {
|
|
92
|
-
'ιον': 'ιο', 'ειον': 'ειο', 'αιον': 'αιο',
|
|
93
|
-
'ου': 'ου', 'ων': 'ων', 'ας': 'ας', 'ης': 'ης'
|
|
94
|
-
};
|
|
95
|
-
|
|
96
|
-
// Common name misspellings
|
|
97
|
-
const commonCorrections = {
|
|
98
|
-
'γιοργος': 'γιώργος', 'γεωργιος': 'γεώργιος',
|
|
99
|
-
'δημητρης': 'δημήτρης', 'δημητριος': 'δημήτριος',
|
|
100
|
-
'νικος': 'νίκος', 'νικολαος': 'νικόλαος',
|
|
101
|
-
'μαρια': 'μαρία', 'ελενη': 'ελένη',
|
|
102
|
-
'κωνσταντινος': 'κωνσταντίνος', 'κωστας': 'κώστας',
|
|
103
|
-
'ιωαννης': 'ιωάννης', 'γιαννης': 'γιάννης',
|
|
104
|
-
'αναστασια': 'αναστασία', 'σοφια': 'σοφία',
|
|
105
|
-
'παναγιωτης': 'παναγιώτης', 'παναγιωτα': 'παναγιώτα'
|
|
106
|
-
};
|
|
107
|
-
|
|
108
|
-
// Main processing function
|
|
109
|
-
function processName(name) {
|
|
110
|
-
if (typeof name !== 'string' || name.trim() === '') {
|
|
111
|
-
return config.strictMode ? null : name;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
let processed = name;
|
|
115
|
-
let extractedTitle = null;
|
|
116
|
-
|
|
117
|
-
// Extract and handle titles
|
|
118
|
-
if (config.handleTitles) {
|
|
119
|
-
const titleResult = extractTitle(processed);
|
|
120
|
-
extractedTitle = titleResult.title;
|
|
121
|
-
processed = titleResult.name;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
// Transliteration
|
|
125
|
-
if (config.transliterate) {
|
|
126
|
-
processed = transliterate(processed, config.transliterate);
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
// Remove extra spaces
|
|
130
|
-
if (config.removeExtraSpaces) {
|
|
131
|
-
processed = processed.replace(/\s+/g, ' ').trim();
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
// Recognize and convert Katharevousa forms
|
|
135
|
-
if (config.recognizeKatharevousa) {
|
|
136
|
-
processed = convertKatharevousa(processed);
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
// Suggest corrections for common misspellings
|
|
140
|
-
let suggestion = null;
|
|
141
|
-
if (config.suggestCorrections) {
|
|
142
|
-
suggestion = suggestCorrection(processed);
|
|
143
|
-
if (suggestion) {
|
|
144
|
-
processed = suggestion;
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
// Normalize Greek tonetics
|
|
149
|
-
if (config.normalizeTonotics) {
|
|
150
|
-
processed = normalizeGreekTonotics(processed);
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// Handle diacritics properly
|
|
154
|
-
if (config.handleDiacritics) {
|
|
155
|
-
processed = handleGreekDiacritics(processed);
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// Split and capitalize each part
|
|
159
|
-
if (config.splitNames) {
|
|
160
|
-
const parts = processed.split(/\s+/);
|
|
161
|
-
processed = parts.map((part, index) => {
|
|
162
|
-
// Handle Greek name particles
|
|
163
|
-
if (config.handleParticles && isGreekParticle(part.toLowerCase())) {
|
|
164
|
-
return part.toLowerCase();
|
|
165
|
-
}
|
|
166
|
-
return capitalizeGreekName(part);
|
|
167
|
-
}).join(' ');
|
|
168
|
-
} else {
|
|
169
|
-
processed = capitalizeGreekName(processed);
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
// Convert to genitive case
|
|
173
|
-
let genitiveForm = null;
|
|
174
|
-
if (config.convertToGenitive) {
|
|
175
|
-
genitiveForm = convertToGenitive(processed);
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
// Database-safe output
|
|
179
|
-
if (config.databaseSafe) {
|
|
180
|
-
processed = makeDatabaseSafe(processed);
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
// Re-attach title if it was extracted
|
|
184
|
-
if (extractedTitle) {
|
|
185
|
-
processed = extractedTitle + ' ' + processed;
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
const result = {
|
|
189
|
-
corrected: processed,
|
|
190
|
-
original: name,
|
|
191
|
-
isValid: validateGreekName(processed, config.strictMode)
|
|
192
|
-
};
|
|
193
|
-
|
|
194
|
-
if (extractedTitle) {
|
|
195
|
-
result.title = extractedTitle;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
if (config.detectGender) {
|
|
199
|
-
result.gender = detectGender(processed);
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
if (config.splitNames) {
|
|
203
|
-
result.parts = splitNameParts(processed);
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
if (config.detectDiminutive) {
|
|
207
|
-
result.diminutive = detectDiminutive(processed);
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
if (genitiveForm) {
|
|
211
|
-
result.genitive = genitiveForm;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
if (config.generateSortKey) {
|
|
215
|
-
result.sortKey = generateSortKey(processed);
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
if (config.statistics) {
|
|
219
|
-
result.statistics = generateStatistics(processed, name);
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
if (suggestion && config.suggestCorrections) {
|
|
223
|
-
result.wasCorrected = true;
|
|
224
|
-
result.suggestedCorrection = suggestion;
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
return config.preserveOriginal ? result : processed;
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
// Extract title from name
|
|
231
|
-
function extractTitle(name) {
|
|
232
|
-
const words = name.trim().split(/\s+/);
|
|
233
|
-
const firstWord = words[0];
|
|
234
|
-
|
|
235
|
-
// Check if first word is a title (case-insensitive)
|
|
236
|
-
const foundTitle = titles.find(t =>
|
|
237
|
-
firstWord.toLowerCase() === t.toLowerCase() ||
|
|
238
|
-
firstWord.toLowerCase() + '.' === t.toLowerCase() ||
|
|
239
|
-
firstWord.toLowerCase() === t.toLowerCase() + '.'
|
|
240
|
-
);
|
|
241
|
-
|
|
242
|
-
if (foundTitle) {
|
|
243
|
-
return {
|
|
244
|
-
title: capitalizeGreekName(foundTitle),
|
|
245
|
-
name: words.slice(1).join(' ')
|
|
246
|
-
};
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
return { title: null, name: name };
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
// Transliteration function
|
|
253
|
-
function transliterate(text, mode) {
|
|
254
|
-
if (mode === 'greeklish-to-greek') {
|
|
255
|
-
return greeklishToGreek(text);
|
|
256
|
-
} else if (mode === 'greek-to-latin') {
|
|
257
|
-
return greekToLatin(text);
|
|
258
|
-
} else if (mode === 'greek-to-greeklish') {
|
|
259
|
-
return greekToGreeklish(text);
|
|
260
|
-
}
|
|
261
|
-
return text;
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
// Greeklish to Greek conversion
|
|
265
|
-
function greeklishToGreek(text) {
|
|
266
|
-
let result = text.toLowerCase();
|
|
267
|
-
|
|
268
|
-
// Sort by length (longest first) to handle multi-character combinations
|
|
269
|
-
const sortedKeys = Object.keys(greeklishToGreekMap).sort((a, b) => b.length - a.length);
|
|
270
|
-
|
|
271
|
-
for (const key of sortedKeys) {
|
|
272
|
-
const regex = new RegExp(key, 'gi');
|
|
273
|
-
result = result.replace(regex, greeklishToGreekMap[key]);
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
return result;
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
// Greek to Latin conversion
|
|
280
|
-
function greekToLatin(text) {
|
|
281
|
-
let result = '';
|
|
282
|
-
for (let i = 0; i < text.length; i++) {
|
|
283
|
-
const char = text[i];
|
|
284
|
-
result += greekToLatinMap[char] || char;
|
|
285
|
-
}
|
|
286
|
-
return result;
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
// Greek to Greeklish conversion
|
|
290
|
-
function greekToGreeklish(text) {
|
|
291
|
-
let result = '';
|
|
292
|
-
for (let i = 0; i < text.length; i++) {
|
|
293
|
-
const char = text[i];
|
|
294
|
-
result += greekToGreeklishMap[char] || char;
|
|
295
|
-
}
|
|
296
|
-
return result;
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
// Convert Katharevousa to modern Greek
|
|
300
|
-
function convertKatharevousa(text) {
|
|
301
|
-
let result = text;
|
|
302
|
-
|
|
303
|
-
for (const [old, modern] of Object.entries(katharevousaMap)) {
|
|
304
|
-
const regex = new RegExp(old + '$', 'i');
|
|
305
|
-
result = result.replace(regex, modern);
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
return result;
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
// Suggest correction for common misspellings
|
|
312
|
-
function suggestCorrection(text) {
|
|
313
|
-
const words = text.toLowerCase().split(/\s+/);
|
|
314
|
-
let corrected = false;
|
|
315
|
-
|
|
316
|
-
const correctedWords = words.map(word => {
|
|
317
|
-
if (commonCorrections[word]) {
|
|
318
|
-
corrected = true;
|
|
319
|
-
return commonCorrections[word];
|
|
320
|
-
}
|
|
321
|
-
return word;
|
|
322
|
-
});
|
|
323
|
-
|
|
324
|
-
return corrected ? correctedWords.join(' ') : null;
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
// Convert to genitive case
|
|
328
|
-
function convertToGenitive(name) {
|
|
329
|
-
const parts = name.split(/\s+/);
|
|
330
|
-
const lastName = parts[parts.length - 1];
|
|
331
|
-
const lowerLast = lastName.toLowerCase();
|
|
332
|
-
|
|
333
|
-
let genitive = lastName;
|
|
334
|
-
|
|
335
|
-
// Male genitive rules
|
|
336
|
-
if (lowerLast.endsWith('ος')) {
|
|
337
|
-
genitive = lastName.slice(0, -2) + 'ου';
|
|
338
|
-
} else if (lowerLast.endsWith('ας')) {
|
|
339
|
-
genitive = lastName.slice(0, -2) + 'α';
|
|
340
|
-
} else if (lowerLast.endsWith('ης')) {
|
|
341
|
-
genitive = lastName.slice(0, -2) + 'η';
|
|
342
|
-
}
|
|
343
|
-
// Female names often stay the same or change differently
|
|
344
|
-
else if (lowerLast.endsWith('ου')) {
|
|
345
|
-
genitive = lastName; // Already genitive
|
|
346
|
-
} else if (lowerLast.endsWith('α')) {
|
|
347
|
-
genitive = lastName.slice(0, -1) + 'ας';
|
|
348
|
-
} else if (lowerLast.endsWith('η')) {
|
|
349
|
-
genitive = lastName.slice(0, -1) + 'ης';
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
parts[parts.length - 1] = genitive;
|
|
353
|
-
return parts.join(' ');
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
// Detect diminutive forms
|
|
357
|
-
function detectDiminutive(name) {
|
|
358
|
-
const parts = name.toLowerCase().split(/\s+/);
|
|
359
|
-
const results = [];
|
|
360
|
-
|
|
361
|
-
for (const part of parts) {
|
|
362
|
-
for (const [ending, baseEnding] of Object.entries(diminutivePatterns)) {
|
|
363
|
-
if (part.endsWith(ending)) {
|
|
364
|
-
results.push({
|
|
365
|
-
word: capitalizeGreekName(part),
|
|
366
|
-
isDiminutive: true,
|
|
367
|
-
possibleBase: part.slice(0, -ending.length) + baseEnding,
|
|
368
|
-
diminutiveType: ending
|
|
369
|
-
});
|
|
370
|
-
break;
|
|
371
|
-
}
|
|
372
|
-
}
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
return results.length > 0 ? results : null;
|
|
376
|
-
}
|
|
377
|
-
|
|
378
|
-
// Make database-safe
|
|
379
|
-
function makeDatabaseSafe(text) {
|
|
380
|
-
// Remove potentially problematic characters for databases
|
|
381
|
-
return text
|
|
382
|
-
.replace(/[^\u0370-\u03FF\u1F00-\u1FFF\w\s\-']/g, '')
|
|
383
|
-
.trim()
|
|
384
|
-
.replace(/\s+/g, ' ');
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
// Generate sorting key (remove accents for sorting)
|
|
388
|
-
function generateSortKey(text) {
|
|
389
|
-
const accentMap = {
|
|
390
|
-
'ά': 'α', 'έ': 'ε', 'ή': 'η', 'ί': 'ι', 'ό': 'ο', 'ύ': 'υ', 'ώ': 'ω',
|
|
391
|
-
'Ά': 'Α', 'Έ': 'Ε', 'Ή': 'Η', 'Ί': 'Ι', 'Ό': 'Ο', 'Ύ': 'Υ', 'Ώ': 'Ω',
|
|
392
|
-
'ϊ': 'ι', 'ϋ': 'υ', 'ΐ': 'ι', 'ΰ': 'υ', 'Ϊ': 'Ι', 'Ϋ': 'Υ',
|
|
393
|
-
'ς': 'σ'
|
|
394
|
-
};
|
|
395
|
-
|
|
396
|
-
let sortKey = text.toLowerCase();
|
|
397
|
-
for (const [accented, plain] of Object.entries(accentMap)) {
|
|
398
|
-
sortKey = sortKey.replace(new RegExp(accented, 'g'), plain);
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
return sortKey;
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
// Generate statistics
|
|
405
|
-
function generateStatistics(processed, original) {
|
|
406
|
-
const words = processed.split(/\s+/).filter(w => !isGreekParticle(w.toLowerCase()));
|
|
407
|
-
|
|
408
|
-
const stats = {
|
|
409
|
-
length: processed.length,
|
|
410
|
-
originalLength: original.length,
|
|
411
|
-
wordCount: words.length,
|
|
412
|
-
hasParticles: processed.split(/\s+/).some(w => isGreekParticle(w.toLowerCase())),
|
|
413
|
-
hasAccents: /[άέήίόύώΆΈΉΊΌΎΏ]/.test(processed),
|
|
414
|
-
hasDiaeresis: /[ϊϋΐΰ]/.test(processed),
|
|
415
|
-
isAllCaps: original === original.toUpperCase() && /[Α-Ωα-ω]/.test(original),
|
|
416
|
-
isAllLower: original === original.toLowerCase() && /[Α-Ωα-ω]/.test(original),
|
|
417
|
-
hasNumbers: /\d/.test(processed),
|
|
418
|
-
hasSpecialChars: /[^Α-Ωα-ωίϊΐόάέύϋΰήώ\s\-']/.test(processed)
|
|
419
|
-
};
|
|
420
|
-
|
|
421
|
-
return stats;
|
|
422
|
-
}
|
|
423
|
-
|
|
424
|
-
// Capitalize Greek names properly
|
|
425
|
-
function capitalizeGreekName(str) {
|
|
426
|
-
if (!str || str.length === 0) return str;
|
|
427
|
-
|
|
428
|
-
const first = str.charAt(0).toUpperCase();
|
|
429
|
-
const rest = str.slice(1).toLowerCase();
|
|
430
|
-
|
|
431
|
-
return first + rest;
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
// Normalize Greek tonetics
|
|
435
|
-
function normalizeGreekTonotics(str) {
|
|
436
|
-
const tonoticMap = {
|
|
437
|
-
'ά': 'ά', 'έ': 'έ', 'ή': 'ή', 'ί': 'ί', 'ό': 'ό', 'ύ': 'ύ', 'ώ': 'ώ',
|
|
438
|
-
'Ά': 'Ά', 'Έ': 'Έ', 'Ή': 'Ή', 'Ί': 'Ί', 'Ό': 'Ό', 'Ύ': 'Ύ', 'Ώ': 'Ώ',
|
|
439
|
-
'ϊ': 'ϊ', 'ϋ': 'ϋ', 'ΐ': 'ΐ', 'ΰ': 'ΰ',
|
|
440
|
-
'Ϊ': 'Ϊ', 'Ϋ': 'Ϋ'
|
|
441
|
-
};
|
|
442
|
-
|
|
443
|
-
return str.split('').map(char => tonoticMap[char] || char).join('');
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
// Handle Greek diacritics
|
|
447
|
-
function handleGreekDiacritics(str) {
|
|
448
|
-
return str.normalize('NFC');
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
// Check if word is a Greek particle
|
|
452
|
-
function isGreekParticle(word) {
|
|
453
|
-
const particles = [
|
|
454
|
-
'του', 'της', 'των', 'τον', 'την', 'τα', 'το',
|
|
455
|
-
'και', 'ή', 'ειδών', 'εκ', 'εξ'
|
|
456
|
-
];
|
|
457
|
-
return particles.includes(word);
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
// Validate Greek name
|
|
461
|
-
function validateGreekName(name, strict = false) {
|
|
462
|
-
if (!name || name.trim() === '') return false;
|
|
463
|
-
|
|
464
|
-
const greekPattern = /^[Α-Ωα-ωίϊΐόάέύϋΰήώ\s\-']+$/;
|
|
465
|
-
|
|
466
|
-
if (strict) {
|
|
467
|
-
return greekPattern.test(name);
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
const hasGreek = /[Α-Ωα-ω]/.test(name);
|
|
471
|
-
const validChars = /^[A-Za-zΑ-Ωα-ωίϊΐόάέύϋΰήώ\s\-']+$/.test(name);
|
|
472
|
-
|
|
473
|
-
return hasGreek && validChars;
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
// Detect gender from name endings
|
|
477
|
-
function detectGender(fullname) {
|
|
478
|
-
// Remove title if present
|
|
479
|
-
const parts = fullname.split(/\s+/).filter(w => !titles.includes(w));
|
|
480
|
-
const lastName = parts[parts.length - 1]?.toLowerCase();
|
|
481
|
-
|
|
482
|
-
if (!lastName) return 'unknown';
|
|
483
|
-
|
|
484
|
-
const maleEndings = ['ος', 'ης', 'ας', 'ούς'];
|
|
485
|
-
const femaleEndings = ['ου', 'α', 'η'];
|
|
486
|
-
|
|
487
|
-
for (const ending of maleEndings) {
|
|
488
|
-
if (lastName.endsWith(ending)) return 'male';
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
for (const ending of femaleEndings) {
|
|
492
|
-
if (lastName.endsWith(ending)) return 'female';
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
return 'unknown';
|
|
496
|
-
}
|
|
497
|
-
|
|
498
|
-
// Split name into parts
|
|
499
|
-
function splitNameParts(fullname) {
|
|
500
|
-
const parts = fullname.split(/\s+/).filter(p =>
|
|
501
|
-
!isGreekParticle(p.toLowerCase()) && !titles.includes(p)
|
|
502
|
-
);
|
|
503
|
-
|
|
504
|
-
if (parts.length === 0) return { firstName: '', lastName: '' };
|
|
505
|
-
if (parts.length === 1) return { firstName: parts[0], lastName: '' };
|
|
506
|
-
if (parts.length === 2) return { firstName: parts[0], lastName: parts[1] };
|
|
507
|
-
|
|
508
|
-
return {
|
|
509
|
-
firstName: parts[0],
|
|
510
|
-
middleName: parts.slice(1, -1).join(' '),
|
|
511
|
-
lastName: parts[parts.length - 1]
|
|
512
|
-
};
|
|
513
|
-
}
|
|
514
|
-
|
|
515
|
-
// Handle different input types
|
|
516
|
-
if (typeof input === 'string') {
|
|
517
|
-
return processName(input);
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
if (Array.isArray(input)) {
|
|
521
|
-
return input.map(item => {
|
|
522
|
-
if (typeof item === 'string') {
|
|
523
|
-
return processName(item);
|
|
524
|
-
}
|
|
525
|
-
if (typeof item === 'object' && item !== null) {
|
|
526
|
-
const result = { ...item };
|
|
527
|
-
if (item[config.jsonKey]) {
|
|
528
|
-
const processed = processName(item[config.jsonKey]);
|
|
529
|
-
if (config.preserveOriginal && typeof processed === 'object') {
|
|
530
|
-
Object.assign(result, processed);
|
|
531
|
-
} else {
|
|
532
|
-
result[config.outputKey] = processed;
|
|
533
|
-
}
|
|
534
|
-
}
|
|
535
|
-
return result;
|
|
536
|
-
}
|
|
537
|
-
return item;
|
|
538
|
-
});
|
|
539
|
-
}
|
|
540
|
-
|
|
541
|
-
if (typeof input === 'object' && input !== null) {
|
|
542
|
-
const result = { ...input };
|
|
543
|
-
if (input[config.jsonKey]) {
|
|
544
|
-
const processed = processName(input[config.jsonKey]);
|
|
545
|
-
if (config.preserveOriginal && typeof processed === 'object') {
|
|
546
|
-
Object.assign(result, processed);
|
|
547
|
-
} else {
|
|
548
|
-
result[config.outputKey] = processed;
|
|
549
|
-
}
|
|
550
|
-
}
|
|
551
|
-
return result;
|
|
552
|
-
}
|
|
553
|
-
|
|
554
|
-
return input;
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
module.exports = GreekNameCorrection;
|
|
4
|
+
module.exports = require("./src/index");
|
package/package.json
CHANGED
|
@@ -1,27 +1,64 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "greek-name-correction",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"description": "A zero-dependency Node.js library for correcting and formatting Greek names with transliteration, genitive conversion, and advanced features",
|
|
5
|
-
"main": "index.js",
|
|
5
|
+
"main": "src/index.js",
|
|
6
6
|
"scripts": {
|
|
7
|
-
|
|
7
|
+
"test": "jest",
|
|
8
|
+
"test:watch": "jest --watch",
|
|
9
|
+
"test:coverage": "jest --coverage"
|
|
8
10
|
},
|
|
9
11
|
"keywords": [
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
12
|
+
"greek",
|
|
13
|
+
"names",
|
|
14
|
+
"correction",
|
|
15
|
+
"formatting",
|
|
16
|
+
"validation",
|
|
17
|
+
"normalization",
|
|
18
|
+
"transliteration",
|
|
19
|
+
"greeklish",
|
|
20
|
+
"genitive",
|
|
21
|
+
"greek-language",
|
|
22
|
+
"katharevousa"
|
|
21
23
|
],
|
|
22
24
|
"author": "Stavros",
|
|
23
25
|
"license": "MIT",
|
|
26
|
+
"repository": {
|
|
27
|
+
"type": "git",
|
|
28
|
+
"url": "https://github.com/sraftopo/greek-name-correction.git"
|
|
29
|
+
},
|
|
30
|
+
"bugs": {
|
|
31
|
+
"url": "https://github.com/sraftopo/greek-name-correction/issues"
|
|
32
|
+
},
|
|
33
|
+
"homepage": "https://github.com/sraftopo/greek-name-correction#readme",
|
|
24
34
|
"engines": {
|
|
25
|
-
|
|
35
|
+
"node": ">=12.0.0"
|
|
36
|
+
},
|
|
37
|
+
"files": [
|
|
38
|
+
"src/",
|
|
39
|
+
"index.js",
|
|
40
|
+
"README.md",
|
|
41
|
+
"LICENSE"
|
|
42
|
+
],
|
|
43
|
+
"devDependencies": {
|
|
44
|
+
"jest": "^29.7.0"
|
|
45
|
+
},
|
|
46
|
+
"jest": {
|
|
47
|
+
"testEnvironment": "node",
|
|
48
|
+
"collectCoverageFrom": [
|
|
49
|
+
"src/**/*.js"
|
|
50
|
+
],
|
|
51
|
+
"coveragePathIgnorePatterns": [
|
|
52
|
+
"/node_modules/",
|
|
53
|
+
"/test/"
|
|
54
|
+
],
|
|
55
|
+
"coverageThreshold": {
|
|
56
|
+
"global": {
|
|
57
|
+
"branches": 70,
|
|
58
|
+
"functions": 70,
|
|
59
|
+
"lines": 70,
|
|
60
|
+
"statements": 70
|
|
61
|
+
}
|
|
62
|
+
}
|
|
26
63
|
}
|
|
27
|
-
|
|
64
|
+
}
|