alphabets 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/alphabets/alphabets.rb +75 -25
- data/lib/alphabets/reader.rb +5 -3
- data/lib/alphabets/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a69d1fcd1c0e8e8cf4484f7952d856f061a9ba9
|
4
|
+
data.tar.gz: 2f82d1fce623b31121988574e23be05ff89f8642
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ac66a5766a45ad1bd3db4e48808a52ac3ae16a43871f563a20c7f70c61f9d4376011455dc016172113646acf5e112dff6ab099f449cdea2b134f3ea5668b9834
|
7
|
+
data.tar.gz: e322e27d4193b7744e84a046a1712e2fdc6ad63cce698a49a1a99b411bd9db0e0d67ab53ec28e5be22ed103a3f1c5652699c87a71773679683ea3a0e09fa4499
|
data/lib/alphabets/alphabets.rb
CHANGED
@@ -5,50 +5,56 @@ class Alphabet
|
|
5
5
|
UNACCENT = Reader.parse( <<TXT )
|
6
6
|
Ä A ä a
|
7
7
|
Á A á a
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
À A à a
|
9
|
+
à A ã a
|
10
|
+
 A â a
|
11
11
|
Å A å a
|
12
12
|
Æ AE æ ae # ae ligature
|
13
13
|
ā a
|
14
14
|
ă a
|
15
|
-
|
15
|
+
Ą A ą a # ą - U+0105 (261) - LATIN SMALL LETTER A WITH OGONEK
|
16
16
|
|
17
17
|
Ç C ç c # ç - U+00E7 (231) - LATIN SMALL LETTER C WITH CEDILLA
|
18
|
-
|
18
|
+
Ć C ć c
|
19
19
|
Č C č c
|
20
20
|
|
21
|
+
Ď D ď d
|
22
|
+
Ð D ð d # iceland - d
|
23
|
+
|
21
24
|
É E é e
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
+
È E è e
|
26
|
+
Ê E ê e
|
27
|
+
Ë E ë e
|
25
28
|
ė e
|
26
|
-
|
29
|
+
Ę E ę e
|
30
|
+
Ě E ě e
|
27
31
|
|
28
32
|
ğ g
|
29
33
|
|
30
34
|
İ I
|
31
35
|
Í I í i
|
32
|
-
|
36
|
+
Ì I ì i
|
37
|
+
Î I î i
|
33
38
|
ī i
|
34
39
|
ı i # ı - U+0131 (305) - LATIN SMALL LETTER DOTLESS I
|
40
|
+
Ï I ï i
|
35
41
|
|
36
42
|
Ł L ł l
|
37
43
|
|
38
|
-
|
39
|
-
|
40
|
-
|
44
|
+
Ñ N ñ n
|
45
|
+
Ń N ń n
|
46
|
+
Ň N ň n
|
41
47
|
|
42
48
|
Ö O ö o
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
49
|
+
Ő OE ő oe # hungarian - just use O/o - why? (it's not a ligature) why not?
|
50
|
+
Ó O ó o
|
51
|
+
Ò O ò o
|
52
|
+
Õ O õ o
|
53
|
+
Ô O ô o
|
47
54
|
ø o
|
48
|
-
ő o
|
49
55
|
Œ OE œ oe # oe ligature
|
50
56
|
|
51
|
-
|
57
|
+
Ř R ř r
|
52
58
|
|
53
59
|
Ś S ś s
|
54
60
|
Ş S ş s # ş - U+015F (351) - LATIN SMALL LETTER S WITH CEDILLA
|
@@ -56,20 +62,25 @@ UNACCENT = Reader.parse( <<TXT )
|
|
56
62
|
Š S š s
|
57
63
|
ß ss # ß - U+00DF (223) - LATIN SMALL LETTER SHARP S
|
58
64
|
|
59
|
-
Ţ
|
60
|
-
Ț
|
65
|
+
Ţ T ţ t # ţ - U+0163 (355) - LATIN SMALL LETTER T WITH CEDILLA
|
66
|
+
Ț T ț t # ț - U+021B (539) - LATIN SMALL LETTER T WITH COMMA BELOW
|
67
|
+
Ť T ť t
|
61
68
|
|
62
|
-
|
69
|
+
Þ P þ p # þ - U+00FE (254) - LATIN SMALL LETTER THORN
|
63
70
|
#### fix/check!!!! icelandic - use p is p or th - why? why not?
|
64
71
|
|
65
72
|
Ü U ü u
|
66
73
|
Ú U ú u
|
74
|
+
Ù U ù u
|
67
75
|
ū u
|
76
|
+
Ů U ů u
|
77
|
+
Û U û u
|
68
78
|
|
69
|
-
|
79
|
+
Ý Y ý y
|
80
|
+
Ÿ Y ÿ y
|
70
81
|
|
71
|
-
|
72
|
-
|
82
|
+
Ź Z ź z
|
83
|
+
Ż Z ż z
|
73
84
|
Ž Z ž z
|
74
85
|
TXT
|
75
86
|
|
@@ -99,21 +110,51 @@ DOWNCASE = %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z].reduce({}) do
|
|
99
110
|
end.merge( Reader.parse( <<TXT ) )
|
100
111
|
Ä ä
|
101
112
|
Á á
|
113
|
+
À à
|
114
|
+
 â
|
102
115
|
Å å
|
103
116
|
Æ æ # LATIN LETTER AE - ae ligature
|
117
|
+
Ą ą
|
118
|
+
à ã
|
104
119
|
|
105
120
|
Ç ç # LATIN LETTER C WITH CEDILLA
|
106
121
|
Č č
|
122
|
+
Ć ć
|
123
|
+
|
124
|
+
Ď ď
|
125
|
+
|
126
|
+
Ð ð # iceland - d
|
107
127
|
|
108
128
|
É é
|
129
|
+
È è
|
130
|
+
Ë ë
|
131
|
+
Ê ê
|
132
|
+
Ę ę
|
133
|
+
Ě ě
|
109
134
|
|
110
135
|
İ i
|
111
136
|
Í í
|
137
|
+
Ì ì
|
138
|
+
Ï ï
|
139
|
+
Î î
|
112
140
|
|
113
141
|
Ł ł
|
114
142
|
|
143
|
+
Ń ń
|
144
|
+
Ň ň
|
145
|
+
Ñ ñ
|
146
|
+
|
115
147
|
Ö ö
|
148
|
+
Ő ő
|
116
149
|
Œ œ # LATIN LIGATURE OE
|
150
|
+
Ó ó
|
151
|
+
Ò ò
|
152
|
+
Ô ô
|
153
|
+
Õ õ
|
154
|
+
|
155
|
+
Þ þ # iceland - p
|
156
|
+
|
157
|
+
Ř ř
|
117
158
|
|
118
159
|
Ś ś
|
119
160
|
Ş ş # LATIN LETTER S WITH CEDILLA
|
@@ -122,11 +163,20 @@ DOWNCASE = %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z].reduce({}) do
|
|
122
163
|
|
123
164
|
Ţ ţ # LATIN LETTER T WITH CEDILLA
|
124
165
|
Ț ț # LATIN LETTER T WITH COMMA BELOW
|
166
|
+
Ť ť
|
125
167
|
|
126
168
|
Ü ü
|
127
169
|
Ú ú
|
170
|
+
Ù ù
|
171
|
+
Ů ů
|
172
|
+
Û û
|
173
|
+
|
174
|
+
Ý ý
|
175
|
+
Ÿ ÿ
|
128
176
|
|
129
177
|
Ž ž
|
178
|
+
Ż ż
|
179
|
+
Ź ź
|
130
180
|
TXT
|
131
181
|
|
132
182
|
end # class Alphabet
|
data/lib/alphabets/reader.rb
CHANGED
@@ -10,7 +10,9 @@ class Reader ## todo/check: rename to CharReader or something - why? why not?
|
|
10
10
|
def self.parse( txt )
|
11
11
|
h = {} ## char(acter) table mappings
|
12
12
|
|
13
|
+
lineno = 0
|
13
14
|
txt.each_line do |line|
|
15
|
+
lineno += 1
|
14
16
|
line = line.strip
|
15
17
|
|
16
18
|
next if line.empty?
|
@@ -27,7 +29,7 @@ class Reader ## todo/check: rename to CharReader or something - why? why not?
|
|
27
29
|
|
28
30
|
## check - must be a even - a multiple of two
|
29
31
|
if values.size % 2 != 0
|
30
|
-
puts "** !!! ERROR !!! - missing mapping pair - mappings must be even (a multiple of two):"
|
32
|
+
puts "** !!! ERROR !!! - [line:#{lineno}] missing mapping pair - mappings must be even (a multiple of two):"
|
31
33
|
pp values
|
32
34
|
exit 1
|
33
35
|
end
|
@@ -40,14 +42,14 @@ class Reader ## todo/check: rename to CharReader or something - why? why not?
|
|
40
42
|
|
41
43
|
## check - key must be a single-character/letter in unicode
|
42
44
|
if key.size != 1
|
43
|
-
puts "** !!! ERROR !!! - mapping character must be a single-character, size is #{key.size}"
|
45
|
+
puts "** !!! ERROR !!! - [line:#{lineno}] mapping character must be a single-character, size is #{key.size}"
|
44
46
|
pp slice
|
45
47
|
exit 1
|
46
48
|
end
|
47
49
|
|
48
50
|
## check - check for duplicates
|
49
51
|
if h[ key ]
|
50
|
-
puts "** !!! ERROR !!! - duplicate mapping character; key already present"
|
52
|
+
puts "** !!! ERROR !!! - [line:#{lineno}] duplicate mapping character; key already present"
|
51
53
|
pp slice
|
52
54
|
exit 1
|
53
55
|
else
|
data/lib/alphabets/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: alphabets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-01-
|
11
|
+
date: 2020-01-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|