prose 0.2.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/prose.rb +27 -13
  3. data/lib/prose/prose.yaml +299 -58
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 01b32c2b8bb846c0777b88c0de14e269555a54bec00b2b8e4db9a0ffacf6a15d
4
- data.tar.gz: 0513bd4bbfd8e31d9a2767aa4442003aba9cc58fa80938490ed7d203b96c3621
3
+ metadata.gz: 0c15589cc293ce2d9947b1715304955f5f300cefbe206364219c26d44a340c5c
4
+ data.tar.gz: d329f7a84087b23da56b4d65f8c52dfba94e89cd400d5299c0a045c6eada7d54
5
5
  SHA512:
6
- metadata.gz: 4b54b532728556f72e95f53a0106706f45b27eaa70c71b81a897fa6aea4a54b886909d812029a689a64789f3a9d6f532fc7e740d6e11ca9dbf398359e5c28f95
7
- data.tar.gz: 694676c05117cbd73da1aa76eff537d654420eb87104208fa719d5da3fef0b6696c35b19eaedf778b77671c13725b8eff45b7e97473a176691eb8c357381c314
6
+ metadata.gz: 499d18c8c48eb2c540accb2e8a833780cddcf08ff3b08111e96952778f2a4020869ab56ffcbaab0291fcab9726ed74c2cded0f3aa4cc9f6f17153910607a1050
7
+ data.tar.gz: 9582290253e756222526c892fc11532daa72569d6dec0343924297816874a03f5da17f6f6bc3537dc5e942fdd242d2bbf88e50770ab8681595fc47e7bbce1cd8
@@ -5,39 +5,53 @@ require 'yaml'
5
5
 
6
6
  # Ruby string class
7
7
  class String
8
- LAN_RANGES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
8
+ CHAR_CODES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
9
9
 
10
10
  def prose
11
11
  find_origins_in(self)
12
12
  end
13
13
 
14
- # Refactor this so that the dynamic methods no more use find_origins_in instead only check
15
- # the ranges for the specific language only
16
- LAN_RANGES.invert.keys.each do |language|
17
- language_name = language.split('-').first
18
- method_name = "#{language_name}?"
14
+ CHAR_CODES['languages'].each do |language, ranges|
15
+ method_name = "#{language}?"
19
16
 
20
17
  define_method(method_name) do
21
- find_origins_in(self).include? language_name
18
+ chars.map { |char| char_belongs_to_language?(char, ranges) }.include? true
22
19
  end
23
20
 
24
21
  define_method("pure_#{method_name}") do
25
- (find_origins_in(self) - [language_name]).empty?
22
+ chars.map { |char| char_belongs_to_language?(char, ranges) }.uniq == [true]
26
23
  end
27
24
  end
28
25
 
29
- def language_of(ordinal, min_range, max_range)
26
+ def char_in_range?(ordinal, range)
27
+ min_range, max_range = range.split('-')
28
+
30
29
  (min_range.to_i(16) < ordinal) && (max_range.to_i(16) > ordinal)
31
30
  end
32
31
 
32
+ def char_belongs_to_language?(char, language_ranges)
33
+ return true if char == ' '
34
+
35
+ language_ranges.map { |range| char_in_range?(char.ord, range) }.include? true
36
+ end
37
+
38
+ def percentge_of(language)
39
+ total_languages = find_origins_in(self)
40
+
41
+ occurance_of_language = total_languages.count(language).to_f
42
+
43
+ ((occurance_of_language / total_languages.count.to_f) * 100.0).to_i
44
+ end
45
+
33
46
  def languages_of(letter)
34
- LAN_RANGES.keys.map do |key|
35
- min, max = key.split('-')
36
- LAN_RANGES[key].split('-').first if language_of(letter.ord, min, max)
47
+ ranges = CHAR_CODES['ranges']
48
+
49
+ ranges.keys.map do |key|
50
+ ranges[key] if char_in_range?(letter.ord, key)
37
51
  end
38
52
  end
39
53
 
40
54
  def find_origins_in(word)
41
- word.split('').map { |letter| languages_of(letter) unless letter.empty? }.flatten.compact.uniq
55
+ word.chars.map { |letter| languages_of(letter) unless letter.empty? }.flatten.compact.uniq
42
56
  end
43
57
  end
@@ -1,31 +1,279 @@
1
- # ranges:
1
+ languages:
2
+ CJK:
3
+ - 4E00–9FD5
4
+ hebrew:
5
+ - 0590-05FF
6
+ - FB00–FB4F
7
+ malayalam:
8
+ - 00D00-0D7F
9
+ armenian:
10
+ - 0530-058F
11
+ coptic:
12
+ - 2C80-2CFF
13
+ cypriot:
14
+ - 10800-1083F
15
+ cyrillic:
16
+ - 0400-04FF
17
+ - 0500-052F
18
+ - 2DE0-2DFF
19
+ - A640-A69F
20
+ georgian:
21
+ - 10A0-10FF
22
+ - 2D00-2D2F
23
+ glagolithic:
24
+ - 2C00-2C5F
25
+ gothic:
26
+ - 10330-1034F
27
+ greek:
28
+ - 0370-03FF
29
+ - 1F00-1FFF
30
+ latin:
31
+ - 0000-007F
32
+ - 0080-00FF
33
+ - 0100-017F
34
+ - 0180-024F
35
+ - 2C60-2C7F
36
+ - A720-A7FF
37
+ - 1E00-1EFF
38
+ - FB00-FB4F
39
+ - FB00-FB4F
40
+ - FF00-FFEF
41
+ ogham:
42
+ - 1680-169F
43
+ old_italics:
44
+ - 10300-1032F
45
+ phaistos:
46
+ - 101D0-101FF
47
+ runic:
48
+ - 16A0-16FF
49
+ shavian:
50
+ - 10450-1047F
51
+ bamum:
52
+ - A6A0-A6FF
53
+ - 16800-16A3F
54
+ egyptian_hieroglyphs:
55
+ - 13000-1342F
56
+ ethiopic:
57
+ - 1200-137F
58
+ - 1380-139F
59
+ - 2D80-2DDF
60
+ - AB00-AB2F
61
+ meroitic_cursive:
62
+ - 109A0-109FF
63
+ meroitic_hieroglyphs:
64
+ - 10980-1099F
65
+ nko:
66
+ - 07C0-07FF
67
+ osmanya:
68
+ - 10480-104AF
69
+ tifinagh:
70
+ - 2D30-2D7F
71
+ vai:
72
+ - A500-A63F
73
+ arabic:
74
+ - 0600-06FF
75
+ - 0750-077F
76
+ - 08A0-08FF
77
+ - FB50-FDFF
78
+ - FE70-FEFF
79
+ aramic:
80
+ - 10840-1085F
81
+ avestan:
82
+ - 10B00-10B3F
83
+ carian:
84
+ - 102A0-102DF
85
+ cuniform:
86
+ - 12000-123FF
87
+ cuniform_numbers_punctuation:
88
+ - 12400-1247F
89
+ lycian:
90
+ - 10280-1029F
91
+ mongolian:
92
+ - 1800-18AF
93
+ tibetan:
94
+ - 0F00-0FFF
95
+ bengali_assamese:
96
+ - 0980-09FF
97
+ gujarati:
98
+ - 0A80-0AFF
99
+ kannada:
100
+ - 0C80-0CFF
101
+ oriya:
102
+ - 0B00-0B7F
103
+ tamil:
104
+ - 0B80-0BFF
105
+ telugu:
106
+ - 0C00-0C7F
107
+ brahmi:
108
+ - 11000-1107F
109
+ devanagari:
110
+ - 0900-097F
111
+ - A8E0-A8FF
112
+ old_persian:
113
+ - 103A0-103DF
114
+ ugaritic:
115
+ - 10380-1039F
116
+ lydian:
117
+ - 10920-1093F
118
+ mandaic:
119
+ - 0840-085F
120
+ old_south_arabian:
121
+ - 10A60-10A7F
122
+ pahlavi:
123
+ - 10B60-10B7F
124
+ parthian:
125
+ - 10B40-10B5F
126
+ phoenician:
127
+ - 10900-1091F
128
+ samaritan:
129
+ - 0800-083F
130
+ syriac:
131
+ - 0700-074F
132
+ old_turkic:
133
+ - 10C00-10C4F
134
+ phags_pa:
135
+ - A840-A87F
136
+ chakma:
137
+ - 11100-1114F
138
+ gurmukhi:
139
+ - 0A00-0A7F
140
+ kaithi:
141
+ - 11080-110CF
142
+ kharoshthi:
143
+ - 10A00-10A5F
144
+ lepcha:
145
+ - 1C00-1C4F
146
+ limbu:
147
+ - 1900-194F
148
+ meetei_mayek:
149
+ - ABC0-ABFF
150
+ - AAE0-AAFF
151
+ ol_chiki:
152
+ - 1C50-1C7F
153
+ saurashtra:
154
+ - A880-A8DF
155
+ sharada:
156
+ - 11180-111DF
157
+ sinhala:
158
+ - 0D80-0DFF
159
+ sora_sompeng:
160
+ - 110D0-110FF
161
+ syloti_nagri:
162
+ - A800-A82F
163
+ takri:
164
+ - 11680-116CF
165
+ thaana:
166
+ - 0780-07BF
167
+ vedic:
168
+ - 1CD0-1CFF
169
+ balinese:
170
+ - 1B00-1B7F
171
+ batak:
172
+ - 1BC0-1BFF
173
+ buginese:
174
+ - 1A00-1A1F
175
+ cham:
176
+ - AA00-AA5F
177
+ javanese:
178
+ - A980-A9DF
179
+ kayah_li:
180
+ - A900-A92F
181
+ khmer:
182
+ - 1780-17FF
183
+ - 19E0-19FF
184
+ lao:
185
+ - 0E80-0EFF
186
+ myanmar:
187
+ - 1000-109F
188
+ - AA60-AA7F
189
+ new_tai_lue:
190
+ - 1980-19DF
191
+ rejang:
192
+ - A930-A95F
193
+ sudanese:
194
+ - 1B80-1BBF
195
+ - 1CC0-1CCF
196
+ tai_le:
197
+ - 1950-197F
198
+ tai_tham:
199
+ - 1A20-1AAF
200
+ tai_viet:
201
+ - AA80-AADF
202
+ thai:
203
+ - 0E00-0E7F
204
+ buhid:
205
+ - 1740-175F
206
+ hanunoo:
207
+ - 1720-173F
208
+ tagalog:
209
+ - 1700-171F
210
+ tagbanwa:
211
+ - 1760-177F
212
+ bopomofo:
213
+ - 3100-312F
214
+ - 31A0-31BF
215
+ hangul_jamo:
216
+ - 1100-11FF
217
+ - A960-A97F
218
+ - D7B0-D7FF
219
+ - 3130-318F
220
+ - FF00-FFEF
221
+ hangul:
222
+ - AC00-D7AF
223
+ hiragana:
224
+ - 3040-309F
225
+ katakana:
226
+ - 30A0-30FF
227
+ - 31F0-31FF
228
+ - FF00-FFEF
229
+ kana:
230
+ - 1B000-1B0FF
231
+ kanbun:
232
+ - 3190-319F
233
+ lisu:
234
+ - A4D0-A4FF
235
+ miao:
236
+ - 16F00-16F9F
237
+ yi:
238
+ - A000-A48F
239
+ - A490-A4CF
240
+ cherokee:
241
+ - 13A0-13FF
242
+ deseret:
243
+ - 10400-1044F
244
+ united_canadian_aborginal:
245
+ - 1400-167F
246
+ - 18B0-18FF
247
+
248
+ # Reverse of languages data
249
+ ranges:
2
250
  4E00–9FD5: CJK
3
- 0590-05FF: hebrew-1
4
- FB00–FB4F: hebrew-2
251
+ 0590-05FF: hebrew
252
+ FB00–FB4F: hebrew
5
253
  00D00-0D7F: malayalam
6
254
  0530-058F: armenian
7
255
  2C80-2CFF: coptic
8
256
  10800-1083F: cypriot
9
- 0400-04FF: cyrillic-1
10
- 0500-052F: cyrillic-2
11
- 2DE0-2DFF: cyrillic-3
12
- A640-A69F: cyrillic-4
13
- 10A0-10FF: georgian-1
14
- 2D00-2D2F: georgian-2
257
+ 0400-04FF: cyrillic
258
+ 0500-052F: cyrillic
259
+ 2DE0-2DFF: cyrillic
260
+ A640-A69F: cyrillic
261
+ 10A0-10FF: georgian
262
+ 2D00-2D2F: georgian
15
263
  2C00-2C5F: glagolithic
16
264
  10330-1034F: gothic
17
- 0370-03FF: greek-1
18
- 1F00-1FFF: greek-2
19
- 0000-007F: latin-1
20
- 0080-00FF: latin-2
21
- 0100-017F: latin-3
22
- 0180-024F: latin-4
23
- 2C60-2C7F: latin-5
24
- A720-A7FF: latin-6
25
- 1E00-1EFF: latin-7
26
- FB00-FB4F: latin-8
27
- FB00-FB4F: latin-9
28
- FF00-FFEF: latin-10
265
+ 0370-03FF: greek
266
+ 1F00-1FFF: greek
267
+ 0000-007F: latin
268
+ 0080-00FF: latin
269
+ 0100-017F: latin
270
+ 0180-024F: latin
271
+ 2C60-2C7F: latin
272
+ A720-A7FF: latin
273
+ 1E00-1EFF: latin
274
+ FB00-FB4F: latin
275
+ FB00-FB4F: latin
276
+ FF00-FFEF: latin
29
277
  1680-169F: ogham
30
278
  10300-1032F: old_italics
31
279
  101D0-101FF: phaistos
@@ -34,9 +282,9 @@
34
282
  A6A0-A6FF: bamum
35
283
  16800-16A3F: bamum
36
284
  13000-1342F: egyptian_hieroglyphs
37
- 1200-137F: ethiopic-1
38
- 1380-139F: ethiopic-2
39
- 2D80-2DDF: ethiopic-3
285
+ 1200-137F: ethiopic
286
+ 1380-139F: ethiopic
287
+ 2D80-2DDF: ethiopic
40
288
  AB00-AB2F: ethiopic
41
289
  109A0-109FF: meroitic_cursive
42
290
  10980-1099F: meroitic_hieroglyphs
@@ -44,11 +292,11 @@
44
292
  10480-104AF: osmanya
45
293
  2D30-2D7F: tifinagh
46
294
  A500-A63F: vai
47
- 0600-06FF: arabic-1
48
- 0750-077F: arabic-2
49
- 08A0-08FF: arabic-3
50
- FB50-FDFF: arabic-4
51
- FE70-FEFF: arabic-5
295
+ 0600-06FF: arabic
296
+ 0750-077F: arabic
297
+ 08A0-08FF: arabic
298
+ FB50-FDFF: arabic
299
+ FE70-FEFF: arabic
52
300
  10840-1085F: aramic
53
301
  10B00-10B3F: avestan
54
302
  102A0-102DF: carian
@@ -64,8 +312,8 @@
64
312
  0B80-0BFF: tamil
65
313
  0C00-0C7F: telugu
66
314
  11000-1107F: brahmi
67
- 0900-097F: devanagari-1
68
- A8E0-A8FF: devanagari-2
315
+ 0900-097F: devanagari
316
+ A8E0-A8FF: devanagari
69
317
  103A0-103DF: old_persian
70
318
  10380-1039F: ugaritic
71
319
  10920-1093F: lydian
@@ -84,8 +332,8 @@
84
332
  10A00-10A5F: kharoshthi
85
333
  1C00-1C4F: lepcha
86
334
  1900-194F: limbu
87
- ABC0-ABFF: meetei_mayek-1
88
- AAE0-AAFF: meetei_mayek-2
335
+ ABC0-ABFF: meetei_mayek
336
+ AAE0-AAFF: meetei_mayek
89
337
  1C50-1C7F: ol_chiki
90
338
  A880-A8DF: saurashtra
91
339
  11180-111DF: sharada
@@ -101,15 +349,15 @@
101
349
  AA00-AA5F: cham
102
350
  A980-A9DF: javanese
103
351
  A900-A92F: kayah_li
104
- 1780-17FF: khmer-1
105
- 19E0-19FF: khmer-2
352
+ 1780-17FF: khmer
353
+ 19E0-19FF: khmer
106
354
  0E80-0EFF: lao
107
- 1000-109F: myanmar-1
108
- AA60-AA7F: myanmar-2
355
+ 1000-109F: myanmar
356
+ AA60-AA7F: myanmar
109
357
  1980-19DF: new_tai_lue
110
358
  A930-A95F: rejang
111
- 1B80-1BBF: sudanese-1
112
- 1CC0-1CCF: sudanese-2
359
+ 1B80-1BBF: sudanese
360
+ 1CC0-1CCF: sudanese
113
361
  1950-197F: tai_le
114
362
  1A20-1AAF: tai_tham
115
363
  AA80-AADF: tai_viet
@@ -118,18 +366,18 @@
118
366
  1720-173F: hanunoo
119
367
  1700-171F: tagalog
120
368
  1760-177F: tagbanwa
121
- 3100-312F: bopomofo-1
122
- 31A0-31BF: bopomofo-2
123
- 1100-11FF: hangul_jamo-1
124
- A960-A97F: hangul_jamo-2
125
- D7B0-D7FF: hangul_jamo-3
126
- 3130-318F: hangul_jamo-4
127
- FF00-FFEF: hangul_jamo-5
369
+ 3100-312F: bopomofo
370
+ 31A0-31BF: bopomofo
371
+ 1100-11FF: hangul_jamo
372
+ A960-A97F: hangul_jamo
373
+ D7B0-D7FF: hangul_jamo
374
+ 3130-318F: hangul_jamo
375
+ FF00-FFEF: hangul_jamo
128
376
  AC00-D7AF: hangul
129
377
  3040-309F: hiragana
130
- 30A0-30FF: katakana-1
131
- 31F0-31FF: katakana-2
132
- FF00-FFEF: katakana-3
378
+ 30A0-30FF: katakana
379
+ 31F0-31FF: katakana
380
+ FF00-FFEF: katakana
133
381
  1B000-1B0FF: kana
134
382
  3190-319F: kanbun
135
383
  A4D0-A4FF: lisu
@@ -138,12 +386,5 @@
138
386
  A490-A4CF: yi
139
387
  13A0-13FF: cherokee
140
388
  10400-1044F: deseret
141
- 1400-167F: united_canadian_aborginal-1
142
- 18B0-18FF: united_canadian_aborginal-2
143
-
144
- #0000-007F: ASCII
145
-
146
- # languages:
147
- # #Future. if there is any
148
- # hebrew:
149
- # - hebrew
389
+ 1400-167F: united_canadian_aborginal
390
+ 18B0-18FF: united_canadian_aborginal
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prose
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Edwin Rozario