surname-transliterator 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/lib/surname/transliterator/version.rb +1 -1
- data/lib/surname/transliterator.rb +23 -10
- metadata +5 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3b89068379dd610abf4b713b922fb2a10859f0bdbd9212bd62df1cf78a6de22e
|
|
4
|
+
data.tar.gz: 96f02a045635b271baa5b95dd5d731f0165d1634de1c4be5e7d2d74fb9e88c16
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a4767ed01232c158e6090e04744ea0156d2c87191622b7b1e626eac916285f4529f0281f3cb374978cc7f757c5c358713323fe39c4c11f40c812bdc504dffcda
|
|
7
|
+
data.tar.gz: c68d19e198eccc518e86998d057e45b7ffc35034de5f71c303d7677940acdfcebac0e5d227af35e6099c04b32c1068e93d115cc095853b353b5900fbe6e0a750
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.4.0] - 2025-01-01
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Support for additional polonization mappings: 'ak' → 'akas', 'cki' → 'ckis'/'ckas', 'owski' → 'ovicius'
|
|
12
|
+
- Polish digraph handling in transliteration: 'sz' → 'š', 'cz' → 'č', 'rz' → 'ž'
|
|
13
|
+
- W/V interchange variants for genealogical matching
|
|
14
|
+
- Expanded test suite with more FN examples and edge cases
|
|
15
|
+
- MFA requirement in gemspec for security
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
- Improved transform_ending to handle multiple overlapping suffixes
|
|
19
|
+
- Updated normalize_surname to include original transliterated forms
|
|
20
|
+
- Enhanced gemspec metadata for better compliance
|
|
21
|
+
|
|
8
22
|
## [0.3.0] - 2025-01-01
|
|
9
23
|
|
|
10
24
|
### Changed
|
|
@@ -88,22 +88,26 @@ module Surname
|
|
|
88
88
|
POLONIZATION_MAPPINGS = {
|
|
89
89
|
'polish_to_lithuanian' => {
|
|
90
90
|
'owicz' => ['avičius'],
|
|
91
|
-
'owski' => %w[ovskis ovskas],
|
|
91
|
+
'owski' => %w[ovskis ovskas ovicius],
|
|
92
92
|
'ewski' => %w[evskis evskas],
|
|
93
93
|
'icki' => ['ickis'],
|
|
94
|
+
'ak' => ['akas'],
|
|
94
95
|
'ski' => %w[skis skas],
|
|
95
|
-
'cki' => [
|
|
96
|
+
'cki' => %w[ckis ckas]
|
|
96
97
|
},
|
|
97
98
|
'lithuanian_to_polish' => {
|
|
98
99
|
'avičius' => ['owicz'],
|
|
99
100
|
'ovskis' => ['owski'],
|
|
100
101
|
'ovskas' => ['owski'],
|
|
102
|
+
'ovicius' => ['owski'],
|
|
101
103
|
'evskis' => ['ewski'],
|
|
102
104
|
'evskas' => ['ewski'],
|
|
103
105
|
'ickis' => ['icki'],
|
|
106
|
+
'akas' => ['ak'],
|
|
104
107
|
'skis' => ['ski'],
|
|
105
108
|
'skas' => ['ski'],
|
|
106
109
|
'ckis' => ['cki'],
|
|
110
|
+
'ckas' => ['cki'],
|
|
107
111
|
'onis' => ['owicz'], # e.g., Jonas → Janowicz
|
|
108
112
|
'aitis' => ['owicz'] # rarer, e.g., Kazlauskas variations
|
|
109
113
|
},
|
|
@@ -133,7 +137,7 @@ module Surname
|
|
|
133
137
|
end
|
|
134
138
|
|
|
135
139
|
# Handle Polish digraphs
|
|
136
|
-
normalized = normalized.gsub('sz', '
|
|
140
|
+
normalized = normalized.gsub('sz', 'š').gsub('cz', 'č').gsub('rz', 'ž') if from_lang == 'polish'
|
|
137
141
|
|
|
138
142
|
normalized.capitalize
|
|
139
143
|
end
|
|
@@ -147,13 +151,17 @@ module Surname
|
|
|
147
151
|
|
|
148
152
|
normalized = surname.downcase
|
|
149
153
|
variants = []
|
|
150
|
-
endings
|
|
154
|
+
# Sort endings by length descending to match longest first
|
|
155
|
+
sorted_endings = endings.sort_by { |k, v| -k.length }
|
|
156
|
+
sorted_endings.each do |from_ending, to_endings|
|
|
151
157
|
next unless normalized.end_with?(from_ending)
|
|
152
158
|
|
|
153
159
|
Array(to_endings).each do |to_ending|
|
|
154
160
|
transformed = normalized.sub(/#{from_ending}$/, to_ending)
|
|
155
161
|
variants << transformed.capitalize
|
|
156
162
|
end
|
|
163
|
+
# Break after first match to avoid overlapping
|
|
164
|
+
break
|
|
157
165
|
end
|
|
158
166
|
|
|
159
167
|
variants.uniq
|
|
@@ -161,12 +169,17 @@ module Surname
|
|
|
161
169
|
|
|
162
170
|
# Full cross-language surname normalization
|
|
163
171
|
def self.normalize_surname(surname, from_lang, to_lang)
|
|
164
|
-
# First,
|
|
165
|
-
|
|
166
|
-
# Then,
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
172
|
+
# First, transform endings if applicable
|
|
173
|
+
transformed_variants = transform_ending(surname, from_lang, to_lang)
|
|
174
|
+
# Then, transliterate each variant to remove diacritics and handle digraphs
|
|
175
|
+
variants = transformed_variants.map { |v| transliterate(v, from_lang) }
|
|
176
|
+
|
|
177
|
+
# If no transformation, add the transliterated original
|
|
178
|
+
if transformed_variants == [surname]
|
|
179
|
+
# Already included
|
|
180
|
+
else
|
|
181
|
+
variants << transliterate(surname, from_lang)
|
|
182
|
+
end
|
|
170
183
|
|
|
171
184
|
# Add W/V interchange variants for genealogical matching
|
|
172
185
|
additional = []
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: surname-transliterator
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Justyna Wojtczak
|
|
@@ -26,13 +26,14 @@ files:
|
|
|
26
26
|
- lib/surname/transliterator.rb
|
|
27
27
|
- lib/surname/transliterator/version.rb
|
|
28
28
|
- sig/surname/transliterator.rbs
|
|
29
|
-
homepage: https://github.com/
|
|
29
|
+
homepage: https://github.com/justi-blue/surname-transliterator
|
|
30
30
|
licenses:
|
|
31
31
|
- MIT
|
|
32
32
|
metadata:
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
source_code_uri: https://github.com/justi-blue/surname-transliterator/tree/main
|
|
34
|
+
homepage_uri: https://github.com/justi-blue/surname-transliterator
|
|
35
35
|
changelog_uri: https://github.com/justi-blue/surname-transliterator/blob/main/CHANGELOG.md
|
|
36
|
+
rubygems_mfa_required: 'true'
|
|
36
37
|
rdoc_options: []
|
|
37
38
|
require_paths:
|
|
38
39
|
- lib
|