surname-transliterator 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +0 -1
- data/README.md +2 -2
- data/lib/surname/transliterator/version.rb +1 -1
- data/lib/surname/transliterator.rb +46 -25
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c6d5ceefee22590bad215a0c6d5a1a8749f8d244f3ca6f734bb4ad6911de6c48
|
|
4
|
+
data.tar.gz: f17c9186d02d35238754bf6a01a8e8e8dad8a003b2fa269b45363b4e3f6f2695
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 16143d3c5c7e15bcede3372209f74264f594203d4d086d42e277de5babe044f95e11a52dfbf0b72f37141d835238e09762474f23c480b8304bfd20df46ca9433
|
|
7
|
+
data.tar.gz: a3051c7dc46b3739d967b99a3fef1f15d0c80d4a8283478b73d2331913d49fa4a5bae8a1dfd7df6ed2bf234eaa43037040de534a395bf115953c53c03dceb009
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -13,13 +13,13 @@ TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_O
|
|
|
13
13
|
Install the gem and add to the application's Gemfile by executing:
|
|
14
14
|
|
|
15
15
|
```bash
|
|
16
|
-
bundle add
|
|
16
|
+
bundle add surname-transliterator
|
|
17
17
|
```
|
|
18
18
|
|
|
19
19
|
If bundler is not being used to manage dependencies, install the gem by executing:
|
|
20
20
|
|
|
21
21
|
```bash
|
|
22
|
-
gem install
|
|
22
|
+
gem install surname-transliterator
|
|
23
23
|
```
|
|
24
24
|
|
|
25
25
|
## Usage
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative
|
|
3
|
+
require_relative 'transliterator/version'
|
|
4
4
|
|
|
5
5
|
module Surname
|
|
6
6
|
module Transliterator
|
|
@@ -87,22 +87,25 @@ module Surname
|
|
|
87
87
|
# Polonization/de-polonization mappings for specific pairs (based on genealogical sources)
|
|
88
88
|
POLONIZATION_MAPPINGS = {
|
|
89
89
|
'polish_to_lithuanian' => {
|
|
90
|
-
'owicz' => 'avičius',
|
|
91
|
-
'owski' =>
|
|
92
|
-
'ewski' =>
|
|
93
|
-
'icki' => 'ickis',
|
|
94
|
-
'ski' =>
|
|
95
|
-
'cki' => 'ckis'
|
|
90
|
+
'owicz' => ['avičius'],
|
|
91
|
+
'owski' => %w[ovskis ovskas],
|
|
92
|
+
'ewski' => %w[evskis evskas],
|
|
93
|
+
'icki' => ['ickis'],
|
|
94
|
+
'ski' => %w[skis skas],
|
|
95
|
+
'cki' => ['ckis']
|
|
96
96
|
},
|
|
97
97
|
'lithuanian_to_polish' => {
|
|
98
|
-
'avičius' => 'owicz',
|
|
99
|
-
'
|
|
100
|
-
'
|
|
101
|
-
'
|
|
102
|
-
'
|
|
103
|
-
'
|
|
104
|
-
'
|
|
105
|
-
'
|
|
98
|
+
'avičius' => ['owicz'],
|
|
99
|
+
'ovskis' => ['owski'],
|
|
100
|
+
'ovskas' => ['owski'],
|
|
101
|
+
'evskis' => ['ewski'],
|
|
102
|
+
'evskas' => ['ewski'],
|
|
103
|
+
'ickis' => ['icki'],
|
|
104
|
+
'skis' => ['ski'],
|
|
105
|
+
'skas' => ['ski'],
|
|
106
|
+
'ckis' => ['cki'],
|
|
107
|
+
'onis' => ['owicz'], # e.g., Jonas → Janowicz
|
|
108
|
+
'aitis' => ['owicz'] # rarer, e.g., Kazlauskas variations
|
|
106
109
|
},
|
|
107
110
|
'polish_to_russian' => {
|
|
108
111
|
'ski' => 'skii',
|
|
@@ -129,24 +132,31 @@ module Surname
|
|
|
129
132
|
normalized = normalized.gsub(accented, base)
|
|
130
133
|
end
|
|
131
134
|
|
|
135
|
+
# Handle Polish digraphs
|
|
136
|
+
normalized = normalized.gsub('sz', 's') if from_lang == 'polish'
|
|
137
|
+
|
|
132
138
|
normalized.capitalize
|
|
133
139
|
end
|
|
134
140
|
|
|
135
141
|
# Polonization/de-polonization between languages
|
|
136
142
|
def self.transform_ending(surname, from_lang, to_lang)
|
|
137
|
-
return surname if surname.nil? || surname.empty?
|
|
143
|
+
return [surname] if surname.nil? || surname.empty?
|
|
138
144
|
|
|
139
145
|
key = "#{from_lang}_to_#{to_lang}"
|
|
140
146
|
endings = POLONIZATION_MAPPINGS[key] || {}
|
|
141
147
|
|
|
142
148
|
normalized = surname.downcase
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
149
|
+
variants = []
|
|
150
|
+
endings.each do |from_ending, to_endings|
|
|
151
|
+
next unless normalized.end_with?(from_ending)
|
|
152
|
+
|
|
153
|
+
Array(to_endings).each do |to_ending|
|
|
154
|
+
transformed = normalized.sub(/#{from_ending}$/, to_ending)
|
|
155
|
+
variants << transformed.capitalize
|
|
146
156
|
end
|
|
147
157
|
end
|
|
148
158
|
|
|
149
|
-
|
|
159
|
+
variants.uniq
|
|
150
160
|
end
|
|
151
161
|
|
|
152
162
|
# Full cross-language surname normalization
|
|
@@ -154,11 +164,22 @@ module Surname
|
|
|
154
164
|
# First, transliterate to remove diacritics
|
|
155
165
|
transliterated = transliterate(surname, from_lang)
|
|
156
166
|
# Then, transform endings if applicable
|
|
157
|
-
|
|
158
|
-
# Return variants:
|
|
159
|
-
variants = [
|
|
160
|
-
|
|
161
|
-
variants
|
|
167
|
+
transformed_variants = transform_ending(transliterated, from_lang, to_lang)
|
|
168
|
+
# Return variants: transliterated plus transformed variants
|
|
169
|
+
variants = [transliterated] + transformed_variants
|
|
170
|
+
|
|
171
|
+
# Add W/V interchange variants for genealogical matching
|
|
172
|
+
additional = []
|
|
173
|
+
variants.each do |v|
|
|
174
|
+
if v.start_with?('W')
|
|
175
|
+
additional << v.sub(/^W/, 'V')
|
|
176
|
+
elsif v.start_with?('V')
|
|
177
|
+
additional << v.sub(/^V/, 'W')
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
variants.concat(additional)
|
|
181
|
+
|
|
182
|
+
variants.uniq.reject { |v| v.nil? || v.empty? }
|
|
162
183
|
end
|
|
163
184
|
|
|
164
185
|
# Convenience methods
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: surname-transliterator
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Justyna Wojtczak
|
|
@@ -31,7 +31,7 @@ licenses:
|
|
|
31
31
|
- MIT
|
|
32
32
|
metadata:
|
|
33
33
|
homepage_uri: https://github.com/justine84/surname-transliterator
|
|
34
|
-
source_code_uri: https://github.com/justine84/surname-transliterator
|
|
34
|
+
source_code_uri: https://github.com/justine84/surname-transliterator/tree/main
|
|
35
35
|
changelog_uri: https://github.com/justi-blue/surname-transliterator/blob/main/CHANGELOG.md
|
|
36
36
|
rdoc_options: []
|
|
37
37
|
require_paths:
|