surname-transliterator 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -1
- data/README.md +2 -2
- data/lib/surname/transliterator/version.rb +1 -1
- data/lib/surname/transliterator.rb +62 -28
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ea26f1b796400d4bd4f0fc9ee9cfbe7116e0520eb97769a47aa7e57c190bceb0
|
|
4
|
+
data.tar.gz: 94b1ec91e13d670fe22e1e95d91d6d08f5f7cae9fd5c393881ae9c72dd35eb15
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 40b4a8c3b1c88c34365e4e0631f93f738a93f58702adc2ad9f2423adf57aeb9cb9dcafd1cb2e602c9dbf11ffa7d233d2cb378d942cfb515e84d51a24f6d3c72b
|
|
7
|
+
data.tar.gz: 06d92b4c47e4b9d50daf524cda06b29e92ac2af1e8b03f6bea69c4ebc537690f981e37ef838a35241af926d342a637efe9198113ba846f564d1719d366804eac
|
data/CHANGELOG.md
CHANGED
|
@@ -5,10 +5,23 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.4.0] - 2025-01-01
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Support for additional polonization mappings: 'ak' → 'akas', 'cki' → 'ckis'/'ckas', 'owski' → 'ovicius'
|
|
12
|
+
- Polish digraph handling in transliteration: 'sz' → 'š', 'cz' → 'č', 'rz' → 'ž'
|
|
13
|
+
- W/V interchange variants for genealogical matching
|
|
14
|
+
- Expanded test suite with more FN examples and edge cases
|
|
15
|
+
- MFA requirement in gemspec for security
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
- Improved transform_ending to handle multiple overlapping suffixes
|
|
19
|
+
- Updated normalize_surname to include original transliterated forms
|
|
20
|
+
- Enhanced gemspec metadata for better compliance
|
|
21
|
+
|
|
8
22
|
## [0.3.0] - 2025-01-01
|
|
9
23
|
|
|
10
24
|
### Changed
|
|
11
|
-
- Moved gem to independent directory for standalone publishing
|
|
12
25
|
- Require Ruby 3.1+ for compatibility
|
|
13
26
|
|
|
14
27
|
## [0.2.0] - 2025-01-01
|
data/README.md
CHANGED
|
@@ -13,13 +13,13 @@ TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_O
|
|
|
13
13
|
Install the gem and add to the application's Gemfile by executing:
|
|
14
14
|
|
|
15
15
|
```bash
|
|
16
|
-
bundle add
|
|
16
|
+
bundle add surname-transliterator
|
|
17
17
|
```
|
|
18
18
|
|
|
19
19
|
If bundler is not being used to manage dependencies, install the gem by executing:
|
|
20
20
|
|
|
21
21
|
```bash
|
|
22
|
-
gem install
|
|
22
|
+
gem install surname-transliterator
|
|
23
23
|
```
|
|
24
24
|
|
|
25
25
|
## Usage
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative
|
|
3
|
+
require_relative 'transliterator/version'
|
|
4
4
|
|
|
5
5
|
module Surname
|
|
6
6
|
module Transliterator
|
|
@@ -87,22 +87,29 @@ module Surname
|
|
|
87
87
|
# Polonization/de-polonization mappings for specific pairs (based on genealogical sources)
|
|
88
88
|
POLONIZATION_MAPPINGS = {
|
|
89
89
|
'polish_to_lithuanian' => {
|
|
90
|
-
'owicz' => 'avičius',
|
|
91
|
-
'owski' =>
|
|
92
|
-
'ewski' =>
|
|
93
|
-
'icki' => 'ickis',
|
|
94
|
-
'
|
|
95
|
-
'
|
|
90
|
+
'owicz' => ['avičius'],
|
|
91
|
+
'owski' => %w[ovskis ovskas ovicius],
|
|
92
|
+
'ewski' => %w[evskis evskas],
|
|
93
|
+
'icki' => ['ickis'],
|
|
94
|
+
'ak' => ['akas'],
|
|
95
|
+
'ski' => %w[skis skas],
|
|
96
|
+
'cki' => %w[ckis ckas]
|
|
96
97
|
},
|
|
97
98
|
'lithuanian_to_polish' => {
|
|
98
|
-
'avičius' => 'owicz',
|
|
99
|
-
'
|
|
100
|
-
'
|
|
101
|
-
'
|
|
102
|
-
'
|
|
103
|
-
'
|
|
104
|
-
'
|
|
105
|
-
'
|
|
99
|
+
'avičius' => ['owicz'],
|
|
100
|
+
'ovskis' => ['owski'],
|
|
101
|
+
'ovskas' => ['owski'],
|
|
102
|
+
'ovicius' => ['owski'],
|
|
103
|
+
'evskis' => ['ewski'],
|
|
104
|
+
'evskas' => ['ewski'],
|
|
105
|
+
'ickis' => ['icki'],
|
|
106
|
+
'akas' => ['ak'],
|
|
107
|
+
'skis' => ['ski'],
|
|
108
|
+
'skas' => ['ski'],
|
|
109
|
+
'ckis' => ['cki'],
|
|
110
|
+
'ckas' => ['cki'],
|
|
111
|
+
'onis' => ['owicz'], # e.g., Jonas → Janowicz
|
|
112
|
+
'aitis' => ['owicz'] # rarer, e.g., Kazlauskas variations
|
|
106
113
|
},
|
|
107
114
|
'polish_to_russian' => {
|
|
108
115
|
'ski' => 'skii',
|
|
@@ -129,36 +136,63 @@ module Surname
|
|
|
129
136
|
normalized = normalized.gsub(accented, base)
|
|
130
137
|
end
|
|
131
138
|
|
|
139
|
+
# Handle Polish digraphs
|
|
140
|
+
normalized = normalized.gsub('sz', 'š').gsub('cz', 'č').gsub('rz', 'ž') if from_lang == 'polish'
|
|
141
|
+
|
|
132
142
|
normalized.capitalize
|
|
133
143
|
end
|
|
134
144
|
|
|
135
145
|
# Polonization/de-polonization between languages
|
|
136
146
|
def self.transform_ending(surname, from_lang, to_lang)
|
|
137
|
-
return surname if surname.nil? || surname.empty?
|
|
147
|
+
return [surname] if surname.nil? || surname.empty?
|
|
138
148
|
|
|
139
149
|
key = "#{from_lang}_to_#{to_lang}"
|
|
140
150
|
endings = POLONIZATION_MAPPINGS[key] || {}
|
|
141
151
|
|
|
142
152
|
normalized = surname.downcase
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
153
|
+
variants = []
|
|
154
|
+
# Sort endings by length descending to match longest first
|
|
155
|
+
sorted_endings = endings.sort_by { |k, v| -k.length }
|
|
156
|
+
sorted_endings.each do |from_ending, to_endings|
|
|
157
|
+
next unless normalized.end_with?(from_ending)
|
|
158
|
+
|
|
159
|
+
Array(to_endings).each do |to_ending|
|
|
160
|
+
transformed = normalized.sub(/#{from_ending}$/, to_ending)
|
|
161
|
+
variants << transformed.capitalize
|
|
146
162
|
end
|
|
163
|
+
# Break after first match to avoid overlapping
|
|
164
|
+
break
|
|
147
165
|
end
|
|
148
166
|
|
|
149
|
-
|
|
167
|
+
variants.uniq
|
|
150
168
|
end
|
|
151
169
|
|
|
152
170
|
# Full cross-language surname normalization
|
|
153
171
|
def self.normalize_surname(surname, from_lang, to_lang)
|
|
154
|
-
# First,
|
|
155
|
-
|
|
156
|
-
# Then,
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
172
|
+
# First, transform endings if applicable
|
|
173
|
+
transformed_variants = transform_ending(surname, from_lang, to_lang)
|
|
174
|
+
# Then, transliterate each variant to remove diacritics and handle digraphs
|
|
175
|
+
variants = transformed_variants.map { |v| transliterate(v, from_lang) }
|
|
176
|
+
|
|
177
|
+
# If no transformation, add the transliterated original
|
|
178
|
+
if transformed_variants == [surname]
|
|
179
|
+
# Already included
|
|
180
|
+
else
|
|
181
|
+
variants << transliterate(surname, from_lang)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Add W/V interchange variants for genealogical matching
|
|
185
|
+
additional = []
|
|
186
|
+
variants.each do |v|
|
|
187
|
+
if v.start_with?('W')
|
|
188
|
+
additional << v.sub(/^W/, 'V')
|
|
189
|
+
elsif v.start_with?('V')
|
|
190
|
+
additional << v.sub(/^V/, 'W')
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
variants.concat(additional)
|
|
194
|
+
|
|
195
|
+
variants.uniq.reject { |v| v.nil? || v.empty? }
|
|
162
196
|
end
|
|
163
197
|
|
|
164
198
|
# Convenience methods
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: surname-transliterator
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Justyna Wojtczak
|
|
@@ -30,9 +30,10 @@ homepage: https://github.com/justine84/surname-transliterator
|
|
|
30
30
|
licenses:
|
|
31
31
|
- MIT
|
|
32
32
|
metadata:
|
|
33
|
+
source_code_uri: https://github.com/justine84/surname-transliterator/tree/main
|
|
33
34
|
homepage_uri: https://github.com/justine84/surname-transliterator
|
|
34
|
-
source_code_uri: https://github.com/justine84/surname-transliterator
|
|
35
35
|
changelog_uri: https://github.com/justi-blue/surname-transliterator/blob/main/CHANGELOG.md
|
|
36
|
+
rubygems_mfa_required: 'true'
|
|
36
37
|
rdoc_options: []
|
|
37
38
|
require_paths:
|
|
38
39
|
- lib
|