alphabets 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/alphabets/alphabets.rb +75 -25
- data/lib/alphabets/reader.rb +5 -3
- data/lib/alphabets/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a69d1fcd1c0e8e8cf4484f7952d856f061a9ba9
|
4
|
+
data.tar.gz: 2f82d1fce623b31121988574e23be05ff89f8642
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ac66a5766a45ad1bd3db4e48808a52ac3ae16a43871f563a20c7f70c61f9d4376011455dc016172113646acf5e112dff6ab099f449cdea2b134f3ea5668b9834
|
7
|
+
data.tar.gz: e322e27d4193b7744e84a046a1712e2fdc6ad63cce698a49a1a99b411bd9db0e0d67ab53ec28e5be22ed103a3f1c5652699c87a71773679683ea3a0e09fa4499
|
data/lib/alphabets/alphabets.rb
CHANGED
@@ -5,50 +5,56 @@ class Alphabet
|
|
5
5
|
UNACCENT = Reader.parse( <<TXT )
|
6
6
|
Ä A ä a
|
7
7
|
Á A á a
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
À A à a
|
9
|
+
à A ã a
|
10
|
+
 A â a
|
11
11
|
Å A å a
|
12
12
|
Æ AE æ ae # ae ligature
|
13
13
|
ā a
|
14
14
|
ă a
|
15
|
-
|
15
|
+
Ą A ą a # ą - U+0105 (261) - LATIN SMALL LETTER A WITH OGONEK
|
16
16
|
|
17
17
|
Ç C ç c # ç - U+00E7 (231) - LATIN SMALL LETTER C WITH CEDILLA
|
18
|
-
|
18
|
+
Ć C ć c
|
19
19
|
Č C č c
|
20
20
|
|
21
|
+
Ď D ď d
|
22
|
+
Ð D ð d # iceland - d
|
23
|
+
|
21
24
|
É E é e
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
+
È E è e
|
26
|
+
Ê E ê e
|
27
|
+
Ë E ë e
|
25
28
|
ė e
|
26
|
-
|
29
|
+
Ę E ę e
|
30
|
+
Ě E ě e
|
27
31
|
|
28
32
|
ğ g
|
29
33
|
|
30
34
|
İ I
|
31
35
|
Í I í i
|
32
|
-
|
36
|
+
Ì I ì i
|
37
|
+
Î I î i
|
33
38
|
ī i
|
34
39
|
ı i # ı - U+0131 (305) - LATIN SMALL LETTER DOTLESS I
|
40
|
+
Ï I ï i
|
35
41
|
|
36
42
|
Ł L ł l
|
37
43
|
|
38
|
-
|
39
|
-
|
40
|
-
|
44
|
+
Ñ N ñ n
|
45
|
+
Ń N ń n
|
46
|
+
Ň N ň n
|
41
47
|
|
42
48
|
Ö O ö o
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
49
|
+
Ő OE ő oe # hungarian - just use O/o - why? (it's not a ligature) why not?
|
50
|
+
Ó O ó o
|
51
|
+
Ò O ò o
|
52
|
+
Õ O õ o
|
53
|
+
Ô O ô o
|
47
54
|
ø o
|
48
|
-
ő o
|
49
55
|
Œ OE œ oe # oe ligature
|
50
56
|
|
51
|
-
|
57
|
+
Ř R ř r
|
52
58
|
|
53
59
|
Ś S ś s
|
54
60
|
Ş S ş s # ş - U+015F (351) - LATIN SMALL LETTER S WITH CEDILLA
|
@@ -56,20 +62,25 @@ UNACCENT = Reader.parse( <<TXT )
|
|
56
62
|
Š S š s
|
57
63
|
ß ss # ß - U+00DF (223) - LATIN SMALL LETTER SHARP S
|
58
64
|
|
59
|
-
Ţ
|
60
|
-
Ț
|
65
|
+
Ţ T ţ t # ţ - U+0163 (355) - LATIN SMALL LETTER T WITH CEDILLA
|
66
|
+
Ț T ț t # ț - U+021B (539) - LATIN SMALL LETTER T WITH COMMA BELOW
|
67
|
+
Ť T ť t
|
61
68
|
|
62
|
-
|
69
|
+
Þ P þ p # þ - U+00FE (254) - LATIN SMALL LETTER THORN
|
63
70
|
#### fix/check!!!! icelandic - use p is p or th - why? why not?
|
64
71
|
|
65
72
|
Ü U ü u
|
66
73
|
Ú U ú u
|
74
|
+
Ù U ù u
|
67
75
|
ū u
|
76
|
+
Ů U ů u
|
77
|
+
Û U û u
|
68
78
|
|
69
|
-
|
79
|
+
Ý Y ý y
|
80
|
+
Ÿ Y ÿ y
|
70
81
|
|
71
|
-
|
72
|
-
|
82
|
+
Ź Z ź z
|
83
|
+
Ż Z ż z
|
73
84
|
Ž Z ž z
|
74
85
|
TXT
|
75
86
|
|
@@ -99,21 +110,51 @@ DOWNCASE = %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z].reduce({}) do
|
|
99
110
|
end.merge( Reader.parse( <<TXT ) )
|
100
111
|
Ä ä
|
101
112
|
Á á
|
113
|
+
À à
|
114
|
+
 â
|
102
115
|
Å å
|
103
116
|
Æ æ # LATIN LETTER AE - ae ligature
|
117
|
+
Ą ą
|
118
|
+
à ã
|
104
119
|
|
105
120
|
Ç ç # LATIN LETTER C WITH CEDILLA
|
106
121
|
Č č
|
122
|
+
Ć ć
|
123
|
+
|
124
|
+
Ď ď
|
125
|
+
|
126
|
+
Ð ð # iceland - d
|
107
127
|
|
108
128
|
É é
|
129
|
+
È è
|
130
|
+
Ë ë
|
131
|
+
Ê ê
|
132
|
+
Ę ę
|
133
|
+
Ě ě
|
109
134
|
|
110
135
|
İ i
|
111
136
|
Í í
|
137
|
+
Ì ì
|
138
|
+
Ï ï
|
139
|
+
Î î
|
112
140
|
|
113
141
|
Ł ł
|
114
142
|
|
143
|
+
Ń ń
|
144
|
+
Ň ň
|
145
|
+
Ñ ñ
|
146
|
+
|
115
147
|
Ö ö
|
148
|
+
Ő ő
|
116
149
|
Œ œ # LATIN LIGATURE OE
|
150
|
+
Ó ó
|
151
|
+
Ò ò
|
152
|
+
Ô ô
|
153
|
+
Õ õ
|
154
|
+
|
155
|
+
Þ þ # iceland - p
|
156
|
+
|
157
|
+
Ř ř
|
117
158
|
|
118
159
|
Ś ś
|
119
160
|
Ş ş # LATIN LETTER S WITH CEDILLA
|
@@ -122,11 +163,20 @@ DOWNCASE = %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z].reduce({}) do
|
|
122
163
|
|
123
164
|
Ţ ţ # LATIN LETTER T WITH CEDILLA
|
124
165
|
Ț ț # LATIN LETTER T WITH COMMA BELOW
|
166
|
+
Ť ť
|
125
167
|
|
126
168
|
Ü ü
|
127
169
|
Ú ú
|
170
|
+
Ù ù
|
171
|
+
Ů ů
|
172
|
+
Û û
|
173
|
+
|
174
|
+
Ý ý
|
175
|
+
Ÿ ÿ
|
128
176
|
|
129
177
|
Ž ž
|
178
|
+
Ż ż
|
179
|
+
Ź ź
|
130
180
|
TXT
|
131
181
|
|
132
182
|
end # class Alphabet
|
data/lib/alphabets/reader.rb
CHANGED
@@ -10,7 +10,9 @@ class Reader ## todo/check: rename to CharReader or something - why? why not?
|
|
10
10
|
def self.parse( txt )
|
11
11
|
h = {} ## char(acter) table mappings
|
12
12
|
|
13
|
+
lineno = 0
|
13
14
|
txt.each_line do |line|
|
15
|
+
lineno += 1
|
14
16
|
line = line.strip
|
15
17
|
|
16
18
|
next if line.empty?
|
@@ -27,7 +29,7 @@ class Reader ## todo/check: rename to CharReader or something - why? why not?
|
|
27
29
|
|
28
30
|
## check - must be a even - a multiple of two
|
29
31
|
if values.size % 2 != 0
|
30
|
-
puts "** !!! ERROR !!! - missing mapping pair - mappings must be even (a multiple of two):"
|
32
|
+
puts "** !!! ERROR !!! - [line:#{lineno}] missing mapping pair - mappings must be even (a multiple of two):"
|
31
33
|
pp values
|
32
34
|
exit 1
|
33
35
|
end
|
@@ -40,14 +42,14 @@ class Reader ## todo/check: rename to CharReader or something - why? why not?
|
|
40
42
|
|
41
43
|
## check - key must be a single-character/letter in unicode
|
42
44
|
if key.size != 1
|
43
|
-
puts "** !!! ERROR !!! - mapping character must be a single-character, size is #{key.size}"
|
45
|
+
puts "** !!! ERROR !!! - [line:#{lineno}] mapping character must be a single-character, size is #{key.size}"
|
44
46
|
pp slice
|
45
47
|
exit 1
|
46
48
|
end
|
47
49
|
|
48
50
|
## check - check for duplicates
|
49
51
|
if h[ key ]
|
50
|
-
puts "** !!! ERROR !!! - duplicate mapping character; key already present"
|
52
|
+
puts "** !!! ERROR !!! - [line:#{lineno}] duplicate mapping character; key already present"
|
51
53
|
pp slice
|
52
54
|
exit 1
|
53
55
|
else
|
data/lib/alphabets/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: alphabets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-01-
|
11
|
+
date: 2020-01-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|