surname-transliterator 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c6d5ceefee22590bad215a0c6d5a1a8749f8d244f3ca6f734bb4ad6911de6c48
4
- data.tar.gz: f17c9186d02d35238754bf6a01a8e8e8dad8a003b2fa269b45363b4e3f6f2695
3
+ metadata.gz: ea26f1b796400d4bd4f0fc9ee9cfbe7116e0520eb97769a47aa7e57c190bceb0
4
+ data.tar.gz: 94b1ec91e13d670fe22e1e95d91d6d08f5f7cae9fd5c393881ae9c72dd35eb15
5
5
  SHA512:
6
- metadata.gz: 16143d3c5c7e15bcede3372209f74264f594203d4d086d42e277de5babe044f95e11a52dfbf0b72f37141d835238e09762474f23c480b8304bfd20df46ca9433
7
- data.tar.gz: a3051c7dc46b3739d967b99a3fef1f15d0c80d4a8283478b73d2331913d49fa4a5bae8a1dfd7df6ed2bf234eaa43037040de534a395bf115953c53c03dceb009
6
+ metadata.gz: 40b4a8c3b1c88c34365e4e0631f93f738a93f58702adc2ad9f2423adf57aeb9cb9dcafd1cb2e602c9dbf11ffa7d233d2cb378d942cfb515e84d51a24f6d3c72b
7
+ data.tar.gz: 06d92b4c47e4b9d50daf524cda06b29e92ac2af1e8b03f6bea69c4ebc537690f981e37ef838a35241af926d342a637efe9198113ba846f564d1719d366804eac
data/CHANGELOG.md CHANGED
@@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.4.0] - 2025-01-01
9
+
10
+ ### Added
11
+ - Support for additional polonization mappings: 'ak' → 'akas', 'cki' → 'ckis'/'ckas', 'owski' → 'ovicius'
12
+ - Polish digraph handling in transliteration: 'sz' → 'š', 'cz' → 'č', 'rz' → 'ž'
13
+ - W/V interchange variants for genealogical matching
14
+ - Expanded test suite with more FN examples and edge cases
15
+ - MFA requirement in gemspec for security
16
+
17
+ ### Changed
18
+ - Improved transform_ending to handle multiple overlapping suffixes
19
+ - Updated normalize_surname to include original transliterated forms
20
+ - Enhanced gemspec metadata for better compliance
21
+
8
22
  ## [0.3.0] - 2025-01-01
9
23
 
10
24
  ### Changed
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Surname
4
4
  module Transliterator
5
- VERSION = '0.4.0'
5
+ VERSION = '0.4.1'
6
6
  end
7
7
  end
@@ -88,22 +88,26 @@ module Surname
88
88
  POLONIZATION_MAPPINGS = {
89
89
  'polish_to_lithuanian' => {
90
90
  'owicz' => ['avičius'],
91
- 'owski' => %w[ovskis ovskas],
91
+ 'owski' => %w[ovskis ovskas ovicius],
92
92
  'ewski' => %w[evskis evskas],
93
93
  'icki' => ['ickis'],
94
+ 'ak' => ['akas'],
94
95
  'ski' => %w[skis skas],
95
- 'cki' => ['ckis']
96
+ 'cki' => %w[ckis ckas]
96
97
  },
97
98
  'lithuanian_to_polish' => {
98
99
  'avičius' => ['owicz'],
99
100
  'ovskis' => ['owski'],
100
101
  'ovskas' => ['owski'],
102
+ 'ovicius' => ['owski'],
101
103
  'evskis' => ['ewski'],
102
104
  'evskas' => ['ewski'],
103
105
  'ickis' => ['icki'],
106
+ 'akas' => ['ak'],
104
107
  'skis' => ['ski'],
105
108
  'skas' => ['ski'],
106
109
  'ckis' => ['cki'],
110
+ 'ckas' => ['cki'],
107
111
  'onis' => ['owicz'], # e.g., Jonas → Janowicz
108
112
  'aitis' => ['owicz'] # rarer, e.g., Kazlauskas variations
109
113
  },
@@ -133,7 +137,7 @@ module Surname
133
137
  end
134
138
 
135
139
  # Handle Polish digraphs
136
- normalized = normalized.gsub('sz', 's') if from_lang == 'polish'
140
+ normalized = normalized.gsub('sz', 'š').gsub('cz', 'č').gsub('rz', 'ž') if from_lang == 'polish'
137
141
 
138
142
  normalized.capitalize
139
143
  end
@@ -147,13 +151,17 @@ module Surname
147
151
 
148
152
  normalized = surname.downcase
149
153
  variants = []
150
- endings.each do |from_ending, to_endings|
154
+ # Sort endings by length descending to match longest first
155
+ sorted_endings = endings.sort_by { |k, v| -k.length }
156
+ sorted_endings.each do |from_ending, to_endings|
151
157
  next unless normalized.end_with?(from_ending)
152
158
 
153
159
  Array(to_endings).each do |to_ending|
154
160
  transformed = normalized.sub(/#{from_ending}$/, to_ending)
155
161
  variants << transformed.capitalize
156
162
  end
163
+ # Break after first match to avoid overlapping
164
+ break
157
165
  end
158
166
 
159
167
  variants.uniq
@@ -161,12 +169,17 @@ module Surname
161
169
 
162
170
  # Full cross-language surname normalization
163
171
  def self.normalize_surname(surname, from_lang, to_lang)
164
- # First, transliterate to remove diacritics
165
- transliterated = transliterate(surname, from_lang)
166
- # Then, transform endings if applicable
167
- transformed_variants = transform_ending(transliterated, from_lang, to_lang)
168
- # Return variants: transliterated plus transformed variants
169
- variants = [transliterated] + transformed_variants
172
+ # First, transform endings if applicable
173
+ transformed_variants = transform_ending(surname, from_lang, to_lang)
174
+ # Then, transliterate each variant to remove diacritics and handle digraphs
175
+ variants = transformed_variants.map { |v| transliterate(v, from_lang) }
176
+
177
+ # If no transformation, add the transliterated original
178
+ if transformed_variants == [surname]
179
+ # Already included
180
+ else
181
+ variants << transliterate(surname, from_lang)
182
+ end
170
183
 
171
184
  # Add W/V interchange variants for genealogical matching
172
185
  additional = []
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: surname-transliterator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justyna Wojtczak
@@ -30,9 +30,10 @@ homepage: https://github.com/justine84/surname-transliterator
30
30
  licenses:
31
31
  - MIT
32
32
  metadata:
33
- homepage_uri: https://github.com/justine84/surname-transliterator
34
33
  source_code_uri: https://github.com/justine84/surname-transliterator/tree/main
34
+ homepage_uri: https://github.com/justine84/surname-transliterator
35
35
  changelog_uri: https://github.com/justi-blue/surname-transliterator/blob/main/CHANGELOG.md
36
+ rubygems_mfa_required: 'true'
36
37
  rdoc_options: []
37
38
  require_paths:
38
39
  - lib