surname-transliterator 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4df1e6b0864ab368780804751aa166c6b100be381011942fa0073d1cb4350d71
4
- data.tar.gz: cb1118f3560a93dcd5e3bc0982bf0cca6a59680763dcf924c2a4bdaf98752643
3
+ metadata.gz: ea26f1b796400d4bd4f0fc9ee9cfbe7116e0520eb97769a47aa7e57c190bceb0
4
+ data.tar.gz: 94b1ec91e13d670fe22e1e95d91d6d08f5f7cae9fd5c393881ae9c72dd35eb15
5
5
  SHA512:
6
- metadata.gz: 3d04bfb1abd3f73f08227dad6ce1e3a781fac2965e689ef1423a111e36e09519c23037d1c6db1e6e1f7a3aa57909c5992104735ec6b842577ac4fa15e5a1c29e
7
- data.tar.gz: abb7da9ec167ada8b393fd86269baedc927384f87e2ed761299fb55f5a200c5bdda2397bb7d1821141912b7b51974e101e168028496f7d11e8fa4891a54057fe
6
+ metadata.gz: 40b4a8c3b1c88c34365e4e0631f93f738a93f58702adc2ad9f2423adf57aeb9cb9dcafd1cb2e602c9dbf11ffa7d233d2cb378d942cfb515e84d51a24f6d3c72b
7
+ data.tar.gz: 06d92b4c47e4b9d50daf524cda06b29e92ac2af1e8b03f6bea69c4ebc537690f981e37ef838a35241af926d342a637efe9198113ba846f564d1719d366804eac
data/CHANGELOG.md CHANGED
@@ -5,10 +5,23 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.4.0] - 2025-01-01
9
+
10
+ ### Added
11
+ - Support for additional polonization mappings: 'ak' → 'akas', 'cki' → 'ckis'/'ckas', 'owski' → 'ovicius'
12
+ - Polish digraph handling in transliteration: 'sz' → 'š', 'cz' → 'č', 'rz' → 'ž'
13
+ - W/V interchange variants for genealogical matching
14
+ - Expanded test suite with more FN examples and edge cases
15
+ - MFA requirement in gemspec for security
16
+
17
+ ### Changed
18
+ - Improved transform_ending to handle multiple overlapping suffixes
19
+ - Updated normalize_surname to include original transliterated forms
20
+ - Enhanced gemspec metadata for better compliance
21
+
8
22
  ## [0.3.0] - 2025-01-01
9
23
 
10
24
  ### Changed
11
- - Moved gem to independent directory for standalone publishing
12
25
  - Require Ruby 3.1+ for compatibility
13
26
 
14
27
  ## [0.2.0] - 2025-01-01
data/README.md CHANGED
@@ -13,13 +13,13 @@ TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_O
13
13
  Install the gem and add to the application's Gemfile by executing:
14
14
 
15
15
  ```bash
16
- bundle add UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG
16
+ bundle add surname-transliterator
17
17
  ```
18
18
 
19
19
  If bundler is not being used to manage dependencies, install the gem by executing:
20
20
 
21
21
  ```bash
22
- gem install UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG
22
+ gem install surname-transliterator
23
23
  ```
24
24
 
25
25
  ## Usage
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Surname
4
4
  module Transliterator
5
- VERSION = '0.3.0'
5
+ VERSION = '0.4.1'
6
6
  end
7
7
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "transliterator/version"
3
+ require_relative 'transliterator/version'
4
4
 
5
5
  module Surname
6
6
  module Transliterator
@@ -87,22 +87,29 @@ module Surname
87
87
  # Polonization/de-polonization mappings for specific pairs (based on genealogical sources)
88
88
  POLONIZATION_MAPPINGS = {
89
89
  'polish_to_lithuanian' => {
90
- 'owicz' => 'avičius',
91
- 'owski' => 'auskas',
92
- 'ewski' => 'evskis',
93
- 'icki' => 'ickis',
94
- 'ski' => 'skis',
95
- 'cki' => 'ckis'
90
+ 'owicz' => ['avičius'],
91
+ 'owski' => %w[ovskis ovskas ovicius],
92
+ 'ewski' => %w[evskis evskas],
93
+ 'icki' => ['ickis'],
94
+ 'ak' => ['akas'],
95
+ 'ski' => %w[skis skas],
96
+ 'cki' => %w[ckis ckas]
96
97
  },
97
98
  'lithuanian_to_polish' => {
98
- 'avičius' => 'owicz',
99
- 'auskas' => 'owski',
100
- 'evskis' => 'ewski',
101
- 'ickis' => 'icki',
102
- 'skis' => 'ski',
103
- 'ckis' => 'cki',
104
- 'onis' => 'owicz', # e.g., Jonas → Janowicz
105
- 'aitis' => 'owicz' # rarer, e.g., Kazlauskas variations
99
+ 'avičius' => ['owicz'],
100
+ 'ovskis' => ['owski'],
101
+ 'ovskas' => ['owski'],
102
+ 'ovicius' => ['owski'],
103
+ 'evskis' => ['ewski'],
104
+ 'evskas' => ['ewski'],
105
+ 'ickis' => ['icki'],
106
+ 'akas' => ['ak'],
107
+ 'skis' => ['ski'],
108
+ 'skas' => ['ski'],
109
+ 'ckis' => ['cki'],
110
+ 'ckas' => ['cki'],
111
+ 'onis' => ['owicz'], # e.g., Jonas → Janowicz
112
+ 'aitis' => ['owicz'] # rarer, e.g., Kazlauskas variations
106
113
  },
107
114
  'polish_to_russian' => {
108
115
  'ski' => 'skii',
@@ -129,36 +136,63 @@ module Surname
129
136
  normalized = normalized.gsub(accented, base)
130
137
  end
131
138
 
139
+ # Handle Polish digraphs
140
+ normalized = normalized.gsub('sz', 'š').gsub('cz', 'č').gsub('rz', 'ž') if from_lang == 'polish'
141
+
132
142
  normalized.capitalize
133
143
  end
134
144
 
135
145
  # Polonization/de-polonization between languages
136
146
  def self.transform_ending(surname, from_lang, to_lang)
137
- return surname if surname.nil? || surname.empty?
147
+ return [surname] if surname.nil? || surname.empty?
138
148
 
139
149
  key = "#{from_lang}_to_#{to_lang}"
140
150
  endings = POLONIZATION_MAPPINGS[key] || {}
141
151
 
142
152
  normalized = surname.downcase
143
- endings.each do |from_ending, to_ending|
144
- if normalized.end_with?(from_ending)
145
- return normalized.sub(/#{from_ending}$/, to_ending).capitalize
153
+ variants = []
154
+ # Sort endings by length descending to match longest first
155
+ sorted_endings = endings.sort_by { |k, v| -k.length }
156
+ sorted_endings.each do |from_ending, to_endings|
157
+ next unless normalized.end_with?(from_ending)
158
+
159
+ Array(to_endings).each do |to_ending|
160
+ transformed = normalized.sub(/#{from_ending}$/, to_ending)
161
+ variants << transformed.capitalize
146
162
  end
163
+ # Break after first match to avoid overlapping
164
+ break
147
165
  end
148
166
 
149
- surname
167
+ variants.uniq
150
168
  end
151
169
 
152
170
  # Full cross-language surname normalization
153
171
  def self.normalize_surname(surname, from_lang, to_lang)
154
- # First, transliterate to remove diacritics
155
- transliterated = transliterate(surname, from_lang)
156
- # Then, transform endings if applicable
157
- transformed = transform_ending(transliterated, from_lang, to_lang)
158
- # Return variants: only add transformed if different from transliterated
159
- variants = [ transliterated ]
160
- variants << transformed if transformed != transliterated
161
- variants.compact
172
+ # First, transform endings if applicable
173
+ transformed_variants = transform_ending(surname, from_lang, to_lang)
174
+ # Then, transliterate each variant to remove diacritics and handle digraphs
175
+ variants = transformed_variants.map { |v| transliterate(v, from_lang) }
176
+
177
+ # If no transformation, add the transliterated original
178
+ if transformed_variants == [surname]
179
+ # Already included
180
+ else
181
+ variants << transliterate(surname, from_lang)
182
+ end
183
+
184
+ # Add W/V interchange variants for genealogical matching
185
+ additional = []
186
+ variants.each do |v|
187
+ if v.start_with?('W')
188
+ additional << v.sub(/^W/, 'V')
189
+ elsif v.start_with?('V')
190
+ additional << v.sub(/^V/, 'W')
191
+ end
192
+ end
193
+ variants.concat(additional)
194
+
195
+ variants.uniq.reject { |v| v.nil? || v.empty? }
162
196
  end
163
197
 
164
198
  # Convenience methods
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: surname-transliterator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justyna Wojtczak
@@ -30,9 +30,10 @@ homepage: https://github.com/justine84/surname-transliterator
30
30
  licenses:
31
31
  - MIT
32
32
  metadata:
33
+ source_code_uri: https://github.com/justine84/surname-transliterator/tree/main
33
34
  homepage_uri: https://github.com/justine84/surname-transliterator
34
- source_code_uri: https://github.com/justine84/surname-transliterator
35
35
  changelog_uri: https://github.com/justi-blue/surname-transliterator/blob/main/CHANGELOG.md
36
+ rubygems_mfa_required: 'true'
36
37
  rdoc_options: []
37
38
  require_paths:
38
39
  - lib