greeklish_iso843 0.3.0 → 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -1
- data/README.md +8 -2
- data/lib/greeklish_iso843/greek_text.rb +95 -38
- data/lib/greeklish_iso843/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b5d2ef8c9b648af399cd8cbdae6371173a871ae5107031fc9f00d7cff26b81b1
|
4
|
+
data.tar.gz: 051d809e9efac4ed60176798ce46412f63bdfbb8e8b3c4d8ddb7ef721cdde690
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77fdf6af41076c632a35cb0bce2b1397c14cf50d6961959ea13b45eb6d351219a9b8a1be0f293f56b2d1a99735af8fbc2f5fa713704ac518fcb70fdc202bd050
|
7
|
+
data.tar.gz: 4bad1dea32c72a9014ff8a077cd052bd9ec2a0b520ad1a9733e93ce0999ddc875b7d2711150a053d4c14df5909ed0b5d2952fb5af15b317c229e43a7bab7901e
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,44 @@ The format is based on [Keep a Changelog][] and this project adheres to
|
|
7
7
|
|
8
8
|
## [Unreleased][]
|
9
9
|
|
10
|
+
## [0.4.3][] - 2022-01-21
|
11
|
+
|
12
|
+
- [#3][]: Fix `ευς`, `αυς` and `ηυς` throwing `UnhandledCaseError`
|
13
|
+
([@cherouvim][])
|
14
|
+
|
15
|
+
[#3]: https://github.com/agorf/greeklish_iso843/issues/3
|
16
|
+
[@cherouvim]: https://github.com/cherouvim
|
17
|
+
|
18
|
+
## [0.4.2][] - 2021-05-29
|
19
|
+
|
20
|
+
### Changed
|
21
|
+
|
22
|
+
- Do not raise `UnhandledCaseError` if next character is a symbol when
|
23
|
+
converting a pair that ends with `υ` to `v` or `f`. For example, `άνευ` would
|
24
|
+
correctly transliterate to `anef` but `άνευ.` would raise due to `.` at the
|
25
|
+
end.
|
26
|
+
- Fix case for leading `Θ`, `Χ` and `Ψ` when the word is capitalized. For
|
27
|
+
example, `ψάθα` would correctly transliterate to `psatha`, but `Ψάθα` would
|
28
|
+
incorrectly transliterate to `PSatha`. Now it is transliterated to `Psatha`.
|
29
|
+
|
30
|
+
## [0.4.1][] - 2021-04-29
|
31
|
+
|
32
|
+
### Changed
|
33
|
+
|
34
|
+
- Do not raise `UnhandledCaseError` if next character is whitespace when
|
35
|
+
converting a pair that ends with `υ` to `v` or `f`. For example, `άνευ` would
|
36
|
+
correctly transliterate to `anef` but `άνευ λόγου` would raise due to the
|
37
|
+
whitespace after `ευ`.
|
38
|
+
|
39
|
+
## [0.4.0][] - 2021-04-28
|
40
|
+
|
41
|
+
### Changed
|
42
|
+
|
43
|
+
- Return match as-is if it cannot be handled instead of crashing
|
44
|
+
- Raise `UnhandledCaseError` in case of an unhandled case when converting pair
|
45
|
+
for υ or φ (should never happen)
|
46
|
+
- Rewrite a significant part of the code to make it more readable
|
47
|
+
|
10
48
|
## [0.3.0][] - 2021-03-06
|
11
49
|
|
12
50
|
### Changed
|
@@ -34,4 +72,8 @@ Initial release.
|
|
34
72
|
[Semantic Versioning]: http://semver.org/spec/v2.0.0.html
|
35
73
|
[0.2.0]: https://github.com/agorf/greeklish_iso843/compare/0.1.0...0.2.0
|
36
74
|
[0.3.0]: https://github.com/agorf/greeklish_iso843/compare/0.2.0...0.3.0
|
37
|
-
[
|
75
|
+
[0.4.0]: https://github.com/agorf/greeklish_iso843/compare/0.3.0...0.4.0
|
76
|
+
[0.4.1]: https://github.com/agorf/greeklish_iso843/compare/0.4.0...0.4.1
|
77
|
+
[0.4.2]: https://github.com/agorf/greeklish_iso843/compare/0.4.1...0.4.2
|
78
|
+
[0.4.3]: https://github.com/agorf/greeklish_iso843/compare/0.4.2...0.4.3
|
79
|
+
[Unreleased]: https://github.com/agorf/greeklish_iso843/compare/0.4.3...HEAD
|
data/README.md
CHANGED
@@ -1,12 +1,17 @@
|
|
1
1
|
# greeklish_iso843 [![Gem](https://img.shields.io/gem/v/greeklish_iso843?color=blue)](https://rubygems.org/gems/greeklish_iso843/) [![RuboCop](https://github.com/agorf/greeklish_iso843/actions/workflows/rubocop.yml/badge.svg)](https://github.com/agorf/greeklish_iso843/actions/workflows/rubocop.yml) [![Tests](https://github.com/agorf/greeklish_iso843/actions/workflows/tests.yml/badge.svg)](https://github.com/agorf/greeklish_iso843/actions/workflows/tests.yml)
|
2
2
|
|
3
3
|
A [Ruby][] library that converts Greek text to [Greeklish][], conforming to the
|
4
|
-
[ISO 843][] (ELOT 743)
|
4
|
+
[ISO 843][] standard (based on the Greek standard ELOT 743 or ΕΛΟΤ 743) used by
|
5
|
+
the Greek state.
|
5
6
|
|
6
7
|
[Ruby]: https://www.ruby-lang.org/en/
|
7
8
|
[Greeklish]: https://en.wikipedia.org/wiki/Greeklish
|
8
9
|
[ISO 843]: https://www.iso.org/standard/5215.html
|
9
10
|
|
11
|
+
## Demo
|
12
|
+
|
13
|
+
An online service that uses this Gem is available at <https://greeklish.xyz>
|
14
|
+
|
10
15
|
## Installation
|
11
16
|
|
12
17
|
As a [Gem][]:
|
@@ -66,7 +71,8 @@ verbose output.
|
|
66
71
|
|
67
72
|
## Acknowledgements
|
68
73
|
|
69
|
-
This library was based on
|
74
|
+
This library was initially based on an official, buggy [implementation][js] in
|
75
|
+
JavaScript by the Hellenic Police.
|
70
76
|
|
71
77
|
[js]: http://www.passport.gov.gr/passports/GrElotConverter/GrElotConverter.html
|
72
78
|
|
@@ -5,8 +5,12 @@ class GreeklishIso843::GreekText
|
|
5
5
|
|
6
6
|
GREEK_VOWELS = 'αάεέηήιίϊΐοόυύϋΰωώ'.freeze
|
7
7
|
|
8
|
+
PAIRS_FOR_V_OR_F = %w[αυ αύ ευ εύ ηυ ηύ].freeze
|
9
|
+
|
8
10
|
GREEK_LETTERS_AFTER_V = "#{GREEK_VOWELS}βγδζλμνρ".freeze
|
9
11
|
|
12
|
+
GREEK_LETTERS_AFTER_F = 'θκξπσςτφχψ'.freeze
|
13
|
+
|
10
14
|
REPLACEMENTS = {
|
11
15
|
'αι' => 'ai',
|
12
16
|
'αί' => 'ai',
|
@@ -68,7 +72,11 @@ class GreeklishIso843::GreekText
|
|
68
72
|
'ώ' => 'o'
|
69
73
|
}.freeze
|
70
74
|
|
71
|
-
|
75
|
+
REPLACEMENT_KEYS_REGEXP = /#{REPLACEMENTS.keys.join('|')}/i.freeze
|
76
|
+
|
77
|
+
class Error < StandardError; end
|
78
|
+
|
79
|
+
class UnhandledCaseError < Error; end
|
72
80
|
|
73
81
|
attr_reader :text
|
74
82
|
|
@@ -81,66 +89,115 @@ class GreeklishIso843::GreekText
|
|
81
89
|
end
|
82
90
|
|
83
91
|
def to_greeklish
|
84
|
-
text.gsub(
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
greek = match + next_char.to_s
|
92
|
-
else
|
93
|
-
greeklish = convert_to_greeklish(match, match_data, next_char)
|
94
|
-
end
|
95
|
-
|
96
|
-
fix_case(greeklish, greek)
|
92
|
+
text.gsub(REPLACEMENT_KEYS_REGEXP) do |match|
|
93
|
+
greeklish = REPLACEMENTS[match.downcase] ||
|
94
|
+
convert_pair(match, Regexp.last_match)
|
95
|
+
|
96
|
+
next match if greeklish.nil? # Unhandled case. Return as-is.
|
97
|
+
|
98
|
+
fix_case(greeklish, match, Regexp.last_match)
|
97
99
|
end
|
98
100
|
end
|
99
101
|
|
100
|
-
private def
|
101
|
-
|
102
|
+
private def convert_pair(match, match_data)
|
103
|
+
return 'ts' if match.casecmp?('τς')
|
104
|
+
|
105
|
+
next_char = match_data.post_match[0]&.downcase
|
106
|
+
|
107
|
+
if match.casecmp?('μπ')
|
108
|
+
prev_char = match_data.pre_match[-1]&.downcase
|
109
|
+
return convert_mp_or_b(prev_char, next_char)
|
110
|
+
end
|
111
|
+
|
112
|
+
if PAIRS_FOR_V_OR_F.any? { |pair| match.casecmp?(pair) }
|
113
|
+
return convert_pair_for_v_or_f(match, next_char)
|
114
|
+
end
|
115
|
+
|
116
|
+
match # Unhandled case. Return as-is.
|
102
117
|
end
|
103
118
|
|
104
|
-
private def
|
105
|
-
|
119
|
+
private def fix_case(greeklish, match, match_data)
|
120
|
+
if !uppercase?(match[0])
|
121
|
+
return greeklish
|
122
|
+
end
|
123
|
+
|
124
|
+
if match.size == 1
|
125
|
+
return fix_case_single_letter_match(greeklish, match_data)
|
126
|
+
end
|
127
|
+
|
128
|
+
if match.size == 2
|
129
|
+
return fix_case_two_letter_match(greeklish, match)
|
130
|
+
end
|
131
|
+
|
132
|
+
raise UnhandledCaseError
|
106
133
|
end
|
107
134
|
|
108
|
-
private def
|
109
|
-
|
135
|
+
private def fix_case_single_letter_match(greeklish, match_data)
|
136
|
+
if greeklish.size == 1
|
137
|
+
return greeklish.upcase
|
138
|
+
end
|
139
|
+
|
140
|
+
if greeklish.size == 2 # match is one of Θ, Χ, Ψ
|
141
|
+
return fix_case_th_ch_ps(greeklish, match_data)
|
142
|
+
end
|
143
|
+
|
144
|
+
raise UnhandledCaseError
|
145
|
+
end
|
146
|
+
|
147
|
+
private def fix_case_two_letter_match(greeklish, match)
|
148
|
+
if uppercase?(match[1])
|
149
|
+
return greeklish.upcase
|
150
|
+
end
|
151
|
+
|
152
|
+
greeklish[0].upcase + greeklish[1].to_s
|
153
|
+
end
|
110
154
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
155
|
+
private def fix_case_th_ch_ps(greeklish, match_data)
|
156
|
+
next_char = match_data.post_match[0]
|
157
|
+
|
158
|
+
if next_char.nil? ||
|
159
|
+
next_char !~ REPLACEMENT_KEYS_REGEXP ||
|
160
|
+
!uppercase?(next_char)
|
161
|
+
return greeklish[0].upcase + greeklish[1].to_s
|
115
162
|
end
|
163
|
+
|
164
|
+
greeklish.upcase
|
116
165
|
end
|
117
166
|
|
118
167
|
private def convert_mp_or_b(prev_char, next_char)
|
119
|
-
if prev_char &&
|
120
|
-
next_char &&
|
168
|
+
if prev_char && lowercase?(prev_char) && # *μπ
|
169
|
+
next_char && lowercase?(next_char) # and μπ*
|
121
170
|
'mp'
|
122
171
|
else # μπ* or *μπ
|
123
172
|
'b'
|
124
173
|
end
|
125
174
|
end
|
126
175
|
|
127
|
-
private def
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
176
|
+
private def convert_pair_for_v_or_f(match, next_char)
|
177
|
+
v_or_f = convert_v_or_f(next_char)
|
178
|
+
|
179
|
+
if v_or_f.nil?
|
180
|
+
raise UnhandledCaseError # Should never happen
|
132
181
|
end
|
182
|
+
|
183
|
+
REPLACEMENTS[match[0].downcase] + v_or_f
|
133
184
|
end
|
134
185
|
|
135
|
-
private def
|
136
|
-
|
186
|
+
private def uppercase?(char)
|
187
|
+
GREEK_UPPER[char]
|
188
|
+
end
|
137
189
|
|
138
|
-
|
139
|
-
|
140
|
-
|
190
|
+
private def lowercase?(char)
|
191
|
+
GREEK_LOWER[char]
|
192
|
+
end
|
193
|
+
|
194
|
+
private def convert_v_or_f(next_char)
|
195
|
+
if next_char.nil? ||
|
196
|
+
GREEK_LETTERS_AFTER_F[next_char] ||
|
197
|
+
next_char !~ REPLACEMENT_KEYS_REGEXP
|
198
|
+
return 'f'
|
141
199
|
end
|
142
200
|
|
143
|
-
|
144
|
-
REPLACEMENTS[match[0].downcase] + convert_v_or_f(next_char)
|
201
|
+
'v' if GREEK_LETTERS_AFTER_V[next_char]
|
145
202
|
end
|
146
203
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: greeklish_iso843
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3
|
4
|
+
version: 0.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Angelos Orfanakos
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: rubygems@angelos.dev
|
@@ -28,7 +28,8 @@ files:
|
|
28
28
|
homepage: https://github.com/agorf/greeklish_iso843
|
29
29
|
licenses:
|
30
30
|
- MIT
|
31
|
-
metadata:
|
31
|
+
metadata:
|
32
|
+
rubygems_mfa_required: 'true'
|
32
33
|
post_install_message:
|
33
34
|
rdoc_options: []
|
34
35
|
require_paths:
|