prose 0.2.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/prose.rb +27 -13
- data/lib/prose/prose.yaml +299 -58
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c15589cc293ce2d9947b1715304955f5f300cefbe206364219c26d44a340c5c
|
4
|
+
data.tar.gz: d329f7a84087b23da56b4d65f8c52dfba94e89cd400d5299c0a045c6eada7d54
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 499d18c8c48eb2c540accb2e8a833780cddcf08ff3b08111e96952778f2a4020869ab56ffcbaab0291fcab9726ed74c2cded0f3aa4cc9f6f17153910607a1050
|
7
|
+
data.tar.gz: 9582290253e756222526c892fc11532daa72569d6dec0343924297816874a03f5da17f6f6bc3537dc5e942fdd242d2bbf88e50770ab8681595fc47e7bbce1cd8
|
data/lib/prose.rb
CHANGED
@@ -5,39 +5,53 @@ require 'yaml'
|
|
5
5
|
|
6
6
|
# Ruby string class
|
7
7
|
class String
|
8
|
-
|
8
|
+
CHAR_CODES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
|
9
9
|
|
10
10
|
def prose
|
11
11
|
find_origins_in(self)
|
12
12
|
end
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
LAN_RANGES.invert.keys.each do |language|
|
17
|
-
language_name = language.split('-').first
|
18
|
-
method_name = "#{language_name}?"
|
14
|
+
CHAR_CODES['languages'].each do |language, ranges|
|
15
|
+
method_name = "#{language}?"
|
19
16
|
|
20
17
|
define_method(method_name) do
|
21
|
-
|
18
|
+
chars.map { |char| char_belongs_to_language?(char, ranges) }.include? true
|
22
19
|
end
|
23
20
|
|
24
21
|
define_method("pure_#{method_name}") do
|
25
|
-
(
|
22
|
+
chars.map { |char| char_belongs_to_language?(char, ranges) }.uniq == [true]
|
26
23
|
end
|
27
24
|
end
|
28
25
|
|
29
|
-
def
|
26
|
+
def char_in_range?(ordinal, range)
|
27
|
+
min_range, max_range = range.split('-')
|
28
|
+
|
30
29
|
(min_range.to_i(16) < ordinal) && (max_range.to_i(16) > ordinal)
|
31
30
|
end
|
32
31
|
|
32
|
+
def char_belongs_to_language?(char, language_ranges)
|
33
|
+
return true if char == ' '
|
34
|
+
|
35
|
+
language_ranges.map { |range| char_in_range?(char.ord, range) }.include? true
|
36
|
+
end
|
37
|
+
|
38
|
+
def percentge_of(language)
|
39
|
+
total_languages = find_origins_in(self)
|
40
|
+
|
41
|
+
occurance_of_language = total_languages.count(language).to_f
|
42
|
+
|
43
|
+
((occurance_of_language / total_languages.count.to_f) * 100.0).to_i
|
44
|
+
end
|
45
|
+
|
33
46
|
def languages_of(letter)
|
34
|
-
|
35
|
-
|
36
|
-
|
47
|
+
ranges = CHAR_CODES['ranges']
|
48
|
+
|
49
|
+
ranges.keys.map do |key|
|
50
|
+
ranges[key] if char_in_range?(letter.ord, key)
|
37
51
|
end
|
38
52
|
end
|
39
53
|
|
40
54
|
def find_origins_in(word)
|
41
|
-
word.
|
55
|
+
word.chars.map { |letter| languages_of(letter) unless letter.empty? }.flatten.compact.uniq
|
42
56
|
end
|
43
57
|
end
|
data/lib/prose/prose.yaml
CHANGED
@@ -1,31 +1,279 @@
|
|
1
|
-
|
1
|
+
languages:
|
2
|
+
CJK:
|
3
|
+
- 4E00–9FD5
|
4
|
+
hebrew:
|
5
|
+
- 0590-05FF
|
6
|
+
- FB00–FB4F
|
7
|
+
malayalam:
|
8
|
+
- 00D00-0D7F
|
9
|
+
armenian:
|
10
|
+
- 0530-058F
|
11
|
+
coptic:
|
12
|
+
- 2C80-2CFF
|
13
|
+
cypriot:
|
14
|
+
- 10800-1083F
|
15
|
+
cyrillic:
|
16
|
+
- 0400-04FF
|
17
|
+
- 0500-052F
|
18
|
+
- 2DE0-2DFF
|
19
|
+
- A640-A69F
|
20
|
+
georgian:
|
21
|
+
- 10A0-10FF
|
22
|
+
- 2D00-2D2F
|
23
|
+
glagolithic:
|
24
|
+
- 2C00-2C5F
|
25
|
+
gothic:
|
26
|
+
- 10330-1034F
|
27
|
+
greek:
|
28
|
+
- 0370-03FF
|
29
|
+
- 1F00-1FFF
|
30
|
+
latin:
|
31
|
+
- 0000-007F
|
32
|
+
- 0080-00FF
|
33
|
+
- 0100-017F
|
34
|
+
- 0180-024F
|
35
|
+
- 2C60-2C7F
|
36
|
+
- A720-A7FF
|
37
|
+
- 1E00-1EFF
|
38
|
+
- FB00-FB4F
|
39
|
+
- FB00-FB4F
|
40
|
+
- FF00-FFEF
|
41
|
+
ogham:
|
42
|
+
- 1680-169F
|
43
|
+
old_italics:
|
44
|
+
- 10300-1032F
|
45
|
+
phaistos:
|
46
|
+
- 101D0-101FF
|
47
|
+
runic:
|
48
|
+
- 16A0-16FF
|
49
|
+
shavian:
|
50
|
+
- 10450-1047F
|
51
|
+
bamum:
|
52
|
+
- A6A0-A6FF
|
53
|
+
- 16800-16A3F
|
54
|
+
egyptian_hieroglyphs:
|
55
|
+
- 13000-1342F
|
56
|
+
ethiopic:
|
57
|
+
- 1200-137F
|
58
|
+
- 1380-139F
|
59
|
+
- 2D80-2DDF
|
60
|
+
- AB00-AB2F
|
61
|
+
meroitic_cursive:
|
62
|
+
- 109A0-109FF
|
63
|
+
meroitic_hieroglyphs:
|
64
|
+
- 10980-1099F
|
65
|
+
nko:
|
66
|
+
- 07C0-07FF
|
67
|
+
osmanya:
|
68
|
+
- 10480-104AF
|
69
|
+
tifinagh:
|
70
|
+
- 2D30-2D7F
|
71
|
+
vai:
|
72
|
+
- A500-A63F
|
73
|
+
arabic:
|
74
|
+
- 0600-06FF
|
75
|
+
- 0750-077F
|
76
|
+
- 08A0-08FF
|
77
|
+
- FB50-FDFF
|
78
|
+
- FE70-FEFF
|
79
|
+
aramic:
|
80
|
+
- 10840-1085F
|
81
|
+
avestan:
|
82
|
+
- 10B00-10B3F
|
83
|
+
carian:
|
84
|
+
- 102A0-102DF
|
85
|
+
cuniform:
|
86
|
+
- 12000-123FF
|
87
|
+
cuniform_numbers_punctuation:
|
88
|
+
- 12400-1247F
|
89
|
+
lycian:
|
90
|
+
- 10280-1029F
|
91
|
+
mongolian:
|
92
|
+
- 1800-18AF
|
93
|
+
tibetan:
|
94
|
+
- 0F00-0FFF
|
95
|
+
bengali_assamese:
|
96
|
+
- 0980-09FF
|
97
|
+
gujarati:
|
98
|
+
- 0A80-0AFF
|
99
|
+
kannada:
|
100
|
+
- 0C80-0CFF
|
101
|
+
oriya:
|
102
|
+
- 0B00-0B7F
|
103
|
+
tamil:
|
104
|
+
- 0B80-0BFF
|
105
|
+
telugu:
|
106
|
+
- 0C00-0C7F
|
107
|
+
brahmi:
|
108
|
+
- 11000-1107F
|
109
|
+
devanagari:
|
110
|
+
- 0900-097F
|
111
|
+
- A8E0-A8FF
|
112
|
+
old_persian:
|
113
|
+
- 103A0-103DF
|
114
|
+
ugaritic:
|
115
|
+
- 10380-1039F
|
116
|
+
lydian:
|
117
|
+
- 10920-1093F
|
118
|
+
mandaic:
|
119
|
+
- 0840-085F
|
120
|
+
old_south_arabian:
|
121
|
+
- 10A60-10A7F
|
122
|
+
pahlavi:
|
123
|
+
- 10B60-10B7F
|
124
|
+
parthian:
|
125
|
+
- 10B40-10B5F
|
126
|
+
phoenician:
|
127
|
+
- 10900-1091F
|
128
|
+
samaritan:
|
129
|
+
- 0800-083F
|
130
|
+
syriac:
|
131
|
+
- 0700-074F
|
132
|
+
old_turkic:
|
133
|
+
- 10C00-10C4F
|
134
|
+
phags_pa:
|
135
|
+
- A840-A87F
|
136
|
+
chakma:
|
137
|
+
- 11100-1114F
|
138
|
+
gurmukhi:
|
139
|
+
- 0A00-0A7F
|
140
|
+
kaithi:
|
141
|
+
- 11080-110CF
|
142
|
+
kharoshthi:
|
143
|
+
- 10A00-10A5F
|
144
|
+
lepcha:
|
145
|
+
- 1C00-1C4F
|
146
|
+
limbu:
|
147
|
+
- 1900-194F
|
148
|
+
meetei_mayek:
|
149
|
+
- ABC0-ABFF
|
150
|
+
- AAE0-AAFF
|
151
|
+
ol_chiki:
|
152
|
+
- 1C50-1C7F
|
153
|
+
saurashtra:
|
154
|
+
- A880-A8DF
|
155
|
+
sharada:
|
156
|
+
- 11180-111DF
|
157
|
+
sinhala:
|
158
|
+
- 0D80-0DFF
|
159
|
+
sora_sompeng:
|
160
|
+
- 110D0-110FF
|
161
|
+
syloti_nagri:
|
162
|
+
- A800-A82F
|
163
|
+
takri:
|
164
|
+
- 11680-116CF
|
165
|
+
thaana:
|
166
|
+
- 0780-07BF
|
167
|
+
vedic:
|
168
|
+
- 1CD0-1CFF
|
169
|
+
balinese:
|
170
|
+
- 1B00-1B7F
|
171
|
+
batak:
|
172
|
+
- 1BC0-1BFF
|
173
|
+
buginese:
|
174
|
+
- 1A00-1A1F
|
175
|
+
cham:
|
176
|
+
- AA00-AA5F
|
177
|
+
javanese:
|
178
|
+
- A980-A9DF
|
179
|
+
kayah_li:
|
180
|
+
- A900-A92F
|
181
|
+
khmer:
|
182
|
+
- 1780-17FF
|
183
|
+
- 19E0-19FF
|
184
|
+
lao:
|
185
|
+
- 0E80-0EFF
|
186
|
+
myanmar:
|
187
|
+
- 1000-109F
|
188
|
+
- AA60-AA7F
|
189
|
+
new_tai_lue:
|
190
|
+
- 1980-19DF
|
191
|
+
rejang:
|
192
|
+
- A930-A95F
|
193
|
+
sudanese:
|
194
|
+
- 1B80-1BBF
|
195
|
+
- 1CC0-1CCF
|
196
|
+
tai_le:
|
197
|
+
- 1950-197F
|
198
|
+
tai_tham:
|
199
|
+
- 1A20-1AAF
|
200
|
+
tai_viet:
|
201
|
+
- AA80-AADF
|
202
|
+
thai:
|
203
|
+
- 0E00-0E7F
|
204
|
+
buhid:
|
205
|
+
- 1740-175F
|
206
|
+
hanunoo:
|
207
|
+
- 1720-173F
|
208
|
+
tagalog:
|
209
|
+
- 1700-171F
|
210
|
+
tagbanwa:
|
211
|
+
- 1760-177F
|
212
|
+
bopomofo:
|
213
|
+
- 3100-312F
|
214
|
+
- 31A0-31BF
|
215
|
+
hangul_jamo:
|
216
|
+
- 1100-11FF
|
217
|
+
- A960-A97F
|
218
|
+
- D7B0-D7FF
|
219
|
+
- 3130-318F
|
220
|
+
- FF00-FFEF
|
221
|
+
hangul:
|
222
|
+
- AC00-D7AF
|
223
|
+
hiragana:
|
224
|
+
- 3040-309F
|
225
|
+
katakana:
|
226
|
+
- 30A0-30FF
|
227
|
+
- 31F0-31FF
|
228
|
+
- FF00-FFEF
|
229
|
+
kana:
|
230
|
+
- 1B000-1B0FF
|
231
|
+
kanbun:
|
232
|
+
- 3190-319F
|
233
|
+
lisu:
|
234
|
+
- A4D0-A4FF
|
235
|
+
miao:
|
236
|
+
- 16F00-16F9F
|
237
|
+
yi:
|
238
|
+
- A000-A48F
|
239
|
+
- A490-A4CF
|
240
|
+
cherokee:
|
241
|
+
- 13A0-13FF
|
242
|
+
deseret:
|
243
|
+
- 10400-1044F
|
244
|
+
united_canadian_aborginal:
|
245
|
+
- 1400-167F
|
246
|
+
- 18B0-18FF
|
247
|
+
|
248
|
+
# Reverse of languages data
|
249
|
+
ranges:
|
2
250
|
4E00–9FD5: CJK
|
3
|
-
0590-05FF: hebrew
|
4
|
-
FB00–FB4F: hebrew
|
251
|
+
0590-05FF: hebrew
|
252
|
+
FB00–FB4F: hebrew
|
5
253
|
00D00-0D7F: malayalam
|
6
254
|
0530-058F: armenian
|
7
255
|
2C80-2CFF: coptic
|
8
256
|
10800-1083F: cypriot
|
9
|
-
0400-04FF: cyrillic
|
10
|
-
0500-052F: cyrillic
|
11
|
-
2DE0-2DFF: cyrillic
|
12
|
-
A640-A69F: cyrillic
|
13
|
-
10A0-10FF: georgian
|
14
|
-
2D00-2D2F: georgian
|
257
|
+
0400-04FF: cyrillic
|
258
|
+
0500-052F: cyrillic
|
259
|
+
2DE0-2DFF: cyrillic
|
260
|
+
A640-A69F: cyrillic
|
261
|
+
10A0-10FF: georgian
|
262
|
+
2D00-2D2F: georgian
|
15
263
|
2C00-2C5F: glagolithic
|
16
264
|
10330-1034F: gothic
|
17
|
-
0370-03FF: greek
|
18
|
-
1F00-1FFF: greek
|
19
|
-
0000-007F: latin
|
20
|
-
0080-00FF: latin
|
21
|
-
0100-017F: latin
|
22
|
-
0180-024F: latin
|
23
|
-
2C60-2C7F: latin
|
24
|
-
A720-A7FF: latin
|
25
|
-
1E00-1EFF: latin
|
26
|
-
FB00-FB4F: latin
|
27
|
-
FB00-FB4F: latin
|
28
|
-
FF00-FFEF: latin
|
265
|
+
0370-03FF: greek
|
266
|
+
1F00-1FFF: greek
|
267
|
+
0000-007F: latin
|
268
|
+
0080-00FF: latin
|
269
|
+
0100-017F: latin
|
270
|
+
0180-024F: latin
|
271
|
+
2C60-2C7F: latin
|
272
|
+
A720-A7FF: latin
|
273
|
+
1E00-1EFF: latin
|
274
|
+
FB00-FB4F: latin
|
275
|
+
FB00-FB4F: latin
|
276
|
+
FF00-FFEF: latin
|
29
277
|
1680-169F: ogham
|
30
278
|
10300-1032F: old_italics
|
31
279
|
101D0-101FF: phaistos
|
@@ -34,9 +282,9 @@
|
|
34
282
|
A6A0-A6FF: bamum
|
35
283
|
16800-16A3F: bamum
|
36
284
|
13000-1342F: egyptian_hieroglyphs
|
37
|
-
1200-137F: ethiopic
|
38
|
-
1380-139F: ethiopic
|
39
|
-
2D80-2DDF: ethiopic
|
285
|
+
1200-137F: ethiopic
|
286
|
+
1380-139F: ethiopic
|
287
|
+
2D80-2DDF: ethiopic
|
40
288
|
AB00-AB2F: ethiopic
|
41
289
|
109A0-109FF: meroitic_cursive
|
42
290
|
10980-1099F: meroitic_hieroglyphs
|
@@ -44,11 +292,11 @@
|
|
44
292
|
10480-104AF: osmanya
|
45
293
|
2D30-2D7F: tifinagh
|
46
294
|
A500-A63F: vai
|
47
|
-
0600-06FF: arabic
|
48
|
-
0750-077F: arabic
|
49
|
-
08A0-08FF: arabic
|
50
|
-
FB50-FDFF: arabic
|
51
|
-
FE70-FEFF: arabic
|
295
|
+
0600-06FF: arabic
|
296
|
+
0750-077F: arabic
|
297
|
+
08A0-08FF: arabic
|
298
|
+
FB50-FDFF: arabic
|
299
|
+
FE70-FEFF: arabic
|
52
300
|
10840-1085F: aramic
|
53
301
|
10B00-10B3F: avestan
|
54
302
|
102A0-102DF: carian
|
@@ -64,8 +312,8 @@
|
|
64
312
|
0B80-0BFF: tamil
|
65
313
|
0C00-0C7F: telugu
|
66
314
|
11000-1107F: brahmi
|
67
|
-
0900-097F: devanagari
|
68
|
-
A8E0-A8FF: devanagari
|
315
|
+
0900-097F: devanagari
|
316
|
+
A8E0-A8FF: devanagari
|
69
317
|
103A0-103DF: old_persian
|
70
318
|
10380-1039F: ugaritic
|
71
319
|
10920-1093F: lydian
|
@@ -84,8 +332,8 @@
|
|
84
332
|
10A00-10A5F: kharoshthi
|
85
333
|
1C00-1C4F: lepcha
|
86
334
|
1900-194F: limbu
|
87
|
-
ABC0-ABFF: meetei_mayek
|
88
|
-
AAE0-AAFF: meetei_mayek
|
335
|
+
ABC0-ABFF: meetei_mayek
|
336
|
+
AAE0-AAFF: meetei_mayek
|
89
337
|
1C50-1C7F: ol_chiki
|
90
338
|
A880-A8DF: saurashtra
|
91
339
|
11180-111DF: sharada
|
@@ -101,15 +349,15 @@
|
|
101
349
|
AA00-AA5F: cham
|
102
350
|
A980-A9DF: javanese
|
103
351
|
A900-A92F: kayah_li
|
104
|
-
1780-17FF: khmer
|
105
|
-
19E0-19FF: khmer
|
352
|
+
1780-17FF: khmer
|
353
|
+
19E0-19FF: khmer
|
106
354
|
0E80-0EFF: lao
|
107
|
-
1000-109F: myanmar
|
108
|
-
AA60-AA7F: myanmar
|
355
|
+
1000-109F: myanmar
|
356
|
+
AA60-AA7F: myanmar
|
109
357
|
1980-19DF: new_tai_lue
|
110
358
|
A930-A95F: rejang
|
111
|
-
1B80-1BBF: sudanese
|
112
|
-
1CC0-1CCF: sudanese
|
359
|
+
1B80-1BBF: sudanese
|
360
|
+
1CC0-1CCF: sudanese
|
113
361
|
1950-197F: tai_le
|
114
362
|
1A20-1AAF: tai_tham
|
115
363
|
AA80-AADF: tai_viet
|
@@ -118,18 +366,18 @@
|
|
118
366
|
1720-173F: hanunoo
|
119
367
|
1700-171F: tagalog
|
120
368
|
1760-177F: tagbanwa
|
121
|
-
3100-312F: bopomofo
|
122
|
-
31A0-31BF: bopomofo
|
123
|
-
1100-11FF: hangul_jamo
|
124
|
-
A960-A97F: hangul_jamo
|
125
|
-
D7B0-D7FF: hangul_jamo
|
126
|
-
3130-318F: hangul_jamo
|
127
|
-
FF00-FFEF: hangul_jamo
|
369
|
+
3100-312F: bopomofo
|
370
|
+
31A0-31BF: bopomofo
|
371
|
+
1100-11FF: hangul_jamo
|
372
|
+
A960-A97F: hangul_jamo
|
373
|
+
D7B0-D7FF: hangul_jamo
|
374
|
+
3130-318F: hangul_jamo
|
375
|
+
FF00-FFEF: hangul_jamo
|
128
376
|
AC00-D7AF: hangul
|
129
377
|
3040-309F: hiragana
|
130
|
-
30A0-30FF: katakana
|
131
|
-
31F0-31FF: katakana
|
132
|
-
FF00-FFEF: katakana
|
378
|
+
30A0-30FF: katakana
|
379
|
+
31F0-31FF: katakana
|
380
|
+
FF00-FFEF: katakana
|
133
381
|
1B000-1B0FF: kana
|
134
382
|
3190-319F: kanbun
|
135
383
|
A4D0-A4FF: lisu
|
@@ -138,12 +386,5 @@
|
|
138
386
|
A490-A4CF: yi
|
139
387
|
13A0-13FF: cherokee
|
140
388
|
10400-1044F: deseret
|
141
|
-
1400-167F: united_canadian_aborginal
|
142
|
-
18B0-18FF: united_canadian_aborginal
|
143
|
-
|
144
|
-
#0000-007F: ASCII
|
145
|
-
|
146
|
-
# languages:
|
147
|
-
# #Future. if there is any
|
148
|
-
# hebrew:
|
149
|
-
# - hebrew
|
389
|
+
1400-167F: united_canadian_aborginal
|
390
|
+
18B0-18FF: united_canadian_aborginal
|