prose 0.2.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/prose.rb +27 -13
  3. data/lib/prose/prose.yaml +299 -58
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 01b32c2b8bb846c0777b88c0de14e269555a54bec00b2b8e4db9a0ffacf6a15d
4
- data.tar.gz: 0513bd4bbfd8e31d9a2767aa4442003aba9cc58fa80938490ed7d203b96c3621
3
+ metadata.gz: 0c15589cc293ce2d9947b1715304955f5f300cefbe206364219c26d44a340c5c
4
+ data.tar.gz: d329f7a84087b23da56b4d65f8c52dfba94e89cd400d5299c0a045c6eada7d54
5
5
  SHA512:
6
- metadata.gz: 4b54b532728556f72e95f53a0106706f45b27eaa70c71b81a897fa6aea4a54b886909d812029a689a64789f3a9d6f532fc7e740d6e11ca9dbf398359e5c28f95
7
- data.tar.gz: 694676c05117cbd73da1aa76eff537d654420eb87104208fa719d5da3fef0b6696c35b19eaedf778b77671c13725b8eff45b7e97473a176691eb8c357381c314
6
+ metadata.gz: 499d18c8c48eb2c540accb2e8a833780cddcf08ff3b08111e96952778f2a4020869ab56ffcbaab0291fcab9726ed74c2cded0f3aa4cc9f6f17153910607a1050
7
+ data.tar.gz: 9582290253e756222526c892fc11532daa72569d6dec0343924297816874a03f5da17f6f6bc3537dc5e942fdd242d2bbf88e50770ab8681595fc47e7bbce1cd8
@@ -5,39 +5,53 @@ require 'yaml'
5
5
 
6
6
  # Ruby string class
7
7
  class String
8
- LAN_RANGES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
8
+ CHAR_CODES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
9
9
 
10
10
  def prose
11
11
  find_origins_in(self)
12
12
  end
13
13
 
14
- # Refactor this so that the dynamic methods no more use find_origins_in instead only check
15
- # the ranges for the specific language only
16
- LAN_RANGES.invert.keys.each do |language|
17
- language_name = language.split('-').first
18
- method_name = "#{language_name}?"
14
+ CHAR_CODES['languages'].each do |language, ranges|
15
+ method_name = "#{language}?"
19
16
 
20
17
  define_method(method_name) do
21
- find_origins_in(self).include? language_name
18
+ chars.map { |char| char_belongs_to_language?(char, ranges) }.include? true
22
19
  end
23
20
 
24
21
  define_method("pure_#{method_name}") do
25
- (find_origins_in(self) - [language_name]).empty?
22
+ chars.map { |char| char_belongs_to_language?(char, ranges) }.uniq == [true]
26
23
  end
27
24
  end
28
25
 
29
- def language_of(ordinal, min_range, max_range)
26
+ def char_in_range?(ordinal, range)
27
+ min_range, max_range = range.split('-')
28
+
30
29
  (min_range.to_i(16) < ordinal) && (max_range.to_i(16) > ordinal)
31
30
  end
32
31
 
32
+ def char_belongs_to_language?(char, language_ranges)
33
+ return true if char == ' '
34
+
35
+ language_ranges.map { |range| char_in_range?(char.ord, range) }.include? true
36
+ end
37
+
38
+ def percentge_of(language)
39
+ total_languages = find_origins_in(self)
40
+
41
+ occurance_of_language = total_languages.count(language).to_f
42
+
43
+ ((occurance_of_language / total_languages.count.to_f) * 100.0).to_i
44
+ end
45
+
33
46
  def languages_of(letter)
34
- LAN_RANGES.keys.map do |key|
35
- min, max = key.split('-')
36
- LAN_RANGES[key].split('-').first if language_of(letter.ord, min, max)
47
+ ranges = CHAR_CODES['ranges']
48
+
49
+ ranges.keys.map do |key|
50
+ ranges[key] if char_in_range?(letter.ord, key)
37
51
  end
38
52
  end
39
53
 
40
54
  def find_origins_in(word)
41
- word.split('').map { |letter| languages_of(letter) unless letter.empty? }.flatten.compact.uniq
55
+ word.chars.map { |letter| languages_of(letter) unless letter.empty? }.flatten.compact.uniq
42
56
  end
43
57
  end
@@ -1,31 +1,279 @@
1
- # ranges:
1
+ languages:
2
+ CJK:
3
+ - 4E00–9FD5
4
+ hebrew:
5
+ - 0590-05FF
6
+ - FB00–FB4F
7
+ malayalam:
8
+ - 00D00-0D7F
9
+ armenian:
10
+ - 0530-058F
11
+ coptic:
12
+ - 2C80-2CFF
13
+ cypriot:
14
+ - 10800-1083F
15
+ cyrillic:
16
+ - 0400-04FF
17
+ - 0500-052F
18
+ - 2DE0-2DFF
19
+ - A640-A69F
20
+ georgian:
21
+ - 10A0-10FF
22
+ - 2D00-2D2F
23
+ glagolithic:
24
+ - 2C00-2C5F
25
+ gothic:
26
+ - 10330-1034F
27
+ greek:
28
+ - 0370-03FF
29
+ - 1F00-1FFF
30
+ latin:
31
+ - 0000-007F
32
+ - 0080-00FF
33
+ - 0100-017F
34
+ - 0180-024F
35
+ - 2C60-2C7F
36
+ - A720-A7FF
37
+ - 1E00-1EFF
38
+ - FB00-FB4F
39
+ - FB00-FB4F
40
+ - FF00-FFEF
41
+ ogham:
42
+ - 1680-169F
43
+ old_italics:
44
+ - 10300-1032F
45
+ phaistos:
46
+ - 101D0-101FF
47
+ runic:
48
+ - 16A0-16FF
49
+ shavian:
50
+ - 10450-1047F
51
+ bamum:
52
+ - A6A0-A6FF
53
+ - 16800-16A3F
54
+ egyptian_hieroglyphs:
55
+ - 13000-1342F
56
+ ethiopic:
57
+ - 1200-137F
58
+ - 1380-139F
59
+ - 2D80-2DDF
60
+ - AB00-AB2F
61
+ meroitic_cursive:
62
+ - 109A0-109FF
63
+ meroitic_hieroglyphs:
64
+ - 10980-1099F
65
+ nko:
66
+ - 07C0-07FF
67
+ osmanya:
68
+ - 10480-104AF
69
+ tifinagh:
70
+ - 2D30-2D7F
71
+ vai:
72
+ - A500-A63F
73
+ arabic:
74
+ - 0600-06FF
75
+ - 0750-077F
76
+ - 08A0-08FF
77
+ - FB50-FDFF
78
+ - FE70-FEFF
79
+ aramic:
80
+ - 10840-1085F
81
+ avestan:
82
+ - 10B00-10B3F
83
+ carian:
84
+ - 102A0-102DF
85
+ cuniform:
86
+ - 12000-123FF
87
+ cuniform_numbers_punctuation:
88
+ - 12400-1247F
89
+ lycian:
90
+ - 10280-1029F
91
+ mongolian:
92
+ - 1800-18AF
93
+ tibetan:
94
+ - 0F00-0FFF
95
+ bengali_assamese:
96
+ - 0980-09FF
97
+ gujarati:
98
+ - 0A80-0AFF
99
+ kannada:
100
+ - 0C80-0CFF
101
+ oriya:
102
+ - 0B00-0B7F
103
+ tamil:
104
+ - 0B80-0BFF
105
+ telugu:
106
+ - 0C00-0C7F
107
+ brahmi:
108
+ - 11000-1107F
109
+ devanagari:
110
+ - 0900-097F
111
+ - A8E0-A8FF
112
+ old_persian:
113
+ - 103A0-103DF
114
+ ugaritic:
115
+ - 10380-1039F
116
+ lydian:
117
+ - 10920-1093F
118
+ mandaic:
119
+ - 0840-085F
120
+ old_south_arabian:
121
+ - 10A60-10A7F
122
+ pahlavi:
123
+ - 10B60-10B7F
124
+ parthian:
125
+ - 10B40-10B5F
126
+ phoenician:
127
+ - 10900-1091F
128
+ samaritan:
129
+ - 0800-083F
130
+ syriac:
131
+ - 0700-074F
132
+ old_turkic:
133
+ - 10C00-10C4F
134
+ phags_pa:
135
+ - A840-A87F
136
+ chakma:
137
+ - 11100-1114F
138
+ gurmukhi:
139
+ - 0A00-0A7F
140
+ kaithi:
141
+ - 11080-110CF
142
+ kharoshthi:
143
+ - 10A00-10A5F
144
+ lepcha:
145
+ - 1C00-1C4F
146
+ limbu:
147
+ - 1900-194F
148
+ meetei_mayek:
149
+ - ABC0-ABFF
150
+ - AAE0-AAFF
151
+ ol_chiki:
152
+ - 1C50-1C7F
153
+ saurashtra:
154
+ - A880-A8DF
155
+ sharada:
156
+ - 11180-111DF
157
+ sinhala:
158
+ - 0D80-0DFF
159
+ sora_sompeng:
160
+ - 110D0-110FF
161
+ syloti_nagri:
162
+ - A800-A82F
163
+ takri:
164
+ - 11680-116CF
165
+ thaana:
166
+ - 0780-07BF
167
+ vedic:
168
+ - 1CD0-1CFF
169
+ balinese:
170
+ - 1B00-1B7F
171
+ batak:
172
+ - 1BC0-1BFF
173
+ buginese:
174
+ - 1A00-1A1F
175
+ cham:
176
+ - AA00-AA5F
177
+ javanese:
178
+ - A980-A9DF
179
+ kayah_li:
180
+ - A900-A92F
181
+ khmer:
182
+ - 1780-17FF
183
+ - 19E0-19FF
184
+ lao:
185
+ - 0E80-0EFF
186
+ myanmar:
187
+ - 1000-109F
188
+ - AA60-AA7F
189
+ new_tai_lue:
190
+ - 1980-19DF
191
+ rejang:
192
+ - A930-A95F
193
+ sudanese:
194
+ - 1B80-1BBF
195
+ - 1CC0-1CCF
196
+ tai_le:
197
+ - 1950-197F
198
+ tai_tham:
199
+ - 1A20-1AAF
200
+ tai_viet:
201
+ - AA80-AADF
202
+ thai:
203
+ - 0E00-0E7F
204
+ buhid:
205
+ - 1740-175F
206
+ hanunoo:
207
+ - 1720-173F
208
+ tagalog:
209
+ - 1700-171F
210
+ tagbanwa:
211
+ - 1760-177F
212
+ bopomofo:
213
+ - 3100-312F
214
+ - 31A0-31BF
215
+ hangul_jamo:
216
+ - 1100-11FF
217
+ - A960-A97F
218
+ - D7B0-D7FF
219
+ - 3130-318F
220
+ - FF00-FFEF
221
+ hangul:
222
+ - AC00-D7AF
223
+ hiragana:
224
+ - 3040-309F
225
+ katakana:
226
+ - 30A0-30FF
227
+ - 31F0-31FF
228
+ - FF00-FFEF
229
+ kana:
230
+ - 1B000-1B0FF
231
+ kanbun:
232
+ - 3190-319F
233
+ lisu:
234
+ - A4D0-A4FF
235
+ miao:
236
+ - 16F00-16F9F
237
+ yi:
238
+ - A000-A48F
239
+ - A490-A4CF
240
+ cherokee:
241
+ - 13A0-13FF
242
+ deseret:
243
+ - 10400-1044F
244
+ united_canadian_aborginal:
245
+ - 1400-167F
246
+ - 18B0-18FF
247
+
248
+ # Reverse of languages data
249
+ ranges:
2
250
  4E00–9FD5: CJK
3
- 0590-05FF: hebrew-1
4
- FB00–FB4F: hebrew-2
251
+ 0590-05FF: hebrew
252
+ FB00–FB4F: hebrew
5
253
  00D00-0D7F: malayalam
6
254
  0530-058F: armenian
7
255
  2C80-2CFF: coptic
8
256
  10800-1083F: cypriot
9
- 0400-04FF: cyrillic-1
10
- 0500-052F: cyrillic-2
11
- 2DE0-2DFF: cyrillic-3
12
- A640-A69F: cyrillic-4
13
- 10A0-10FF: georgian-1
14
- 2D00-2D2F: georgian-2
257
+ 0400-04FF: cyrillic
258
+ 0500-052F: cyrillic
259
+ 2DE0-2DFF: cyrillic
260
+ A640-A69F: cyrillic
261
+ 10A0-10FF: georgian
262
+ 2D00-2D2F: georgian
15
263
  2C00-2C5F: glagolithic
16
264
  10330-1034F: gothic
17
- 0370-03FF: greek-1
18
- 1F00-1FFF: greek-2
19
- 0000-007F: latin-1
20
- 0080-00FF: latin-2
21
- 0100-017F: latin-3
22
- 0180-024F: latin-4
23
- 2C60-2C7F: latin-5
24
- A720-A7FF: latin-6
25
- 1E00-1EFF: latin-7
26
- FB00-FB4F: latin-8
27
- FB00-FB4F: latin-9
28
- FF00-FFEF: latin-10
265
+ 0370-03FF: greek
266
+ 1F00-1FFF: greek
267
+ 0000-007F: latin
268
+ 0080-00FF: latin
269
+ 0100-017F: latin
270
+ 0180-024F: latin
271
+ 2C60-2C7F: latin
272
+ A720-A7FF: latin
273
+ 1E00-1EFF: latin
274
+ FB00-FB4F: latin
275
+ FB00-FB4F: latin
276
+ FF00-FFEF: latin
29
277
  1680-169F: ogham
30
278
  10300-1032F: old_italics
31
279
  101D0-101FF: phaistos
@@ -34,9 +282,9 @@
34
282
  A6A0-A6FF: bamum
35
283
  16800-16A3F: bamum
36
284
  13000-1342F: egyptian_hieroglyphs
37
- 1200-137F: ethiopic-1
38
- 1380-139F: ethiopic-2
39
- 2D80-2DDF: ethiopic-3
285
+ 1200-137F: ethiopic
286
+ 1380-139F: ethiopic
287
+ 2D80-2DDF: ethiopic
40
288
  AB00-AB2F: ethiopic
41
289
  109A0-109FF: meroitic_cursive
42
290
  10980-1099F: meroitic_hieroglyphs
@@ -44,11 +292,11 @@
44
292
  10480-104AF: osmanya
45
293
  2D30-2D7F: tifinagh
46
294
  A500-A63F: vai
47
- 0600-06FF: arabic-1
48
- 0750-077F: arabic-2
49
- 08A0-08FF: arabic-3
50
- FB50-FDFF: arabic-4
51
- FE70-FEFF: arabic-5
295
+ 0600-06FF: arabic
296
+ 0750-077F: arabic
297
+ 08A0-08FF: arabic
298
+ FB50-FDFF: arabic
299
+ FE70-FEFF: arabic
52
300
  10840-1085F: aramic
53
301
  10B00-10B3F: avestan
54
302
  102A0-102DF: carian
@@ -64,8 +312,8 @@
64
312
  0B80-0BFF: tamil
65
313
  0C00-0C7F: telugu
66
314
  11000-1107F: brahmi
67
- 0900-097F: devanagari-1
68
- A8E0-A8FF: devanagari-2
315
+ 0900-097F: devanagari
316
+ A8E0-A8FF: devanagari
69
317
  103A0-103DF: old_persian
70
318
  10380-1039F: ugaritic
71
319
  10920-1093F: lydian
@@ -84,8 +332,8 @@
84
332
  10A00-10A5F: kharoshthi
85
333
  1C00-1C4F: lepcha
86
334
  1900-194F: limbu
87
- ABC0-ABFF: meetei_mayek-1
88
- AAE0-AAFF: meetei_mayek-2
335
+ ABC0-ABFF: meetei_mayek
336
+ AAE0-AAFF: meetei_mayek
89
337
  1C50-1C7F: ol_chiki
90
338
  A880-A8DF: saurashtra
91
339
  11180-111DF: sharada
@@ -101,15 +349,15 @@
101
349
  AA00-AA5F: cham
102
350
  A980-A9DF: javanese
103
351
  A900-A92F: kayah_li
104
- 1780-17FF: khmer-1
105
- 19E0-19FF: khmer-2
352
+ 1780-17FF: khmer
353
+ 19E0-19FF: khmer
106
354
  0E80-0EFF: lao
107
- 1000-109F: myanmar-1
108
- AA60-AA7F: myanmar-2
355
+ 1000-109F: myanmar
356
+ AA60-AA7F: myanmar
109
357
  1980-19DF: new_tai_lue
110
358
  A930-A95F: rejang
111
- 1B80-1BBF: sudanese-1
112
- 1CC0-1CCF: sudanese-2
359
+ 1B80-1BBF: sudanese
360
+ 1CC0-1CCF: sudanese
113
361
  1950-197F: tai_le
114
362
  1A20-1AAF: tai_tham
115
363
  AA80-AADF: tai_viet
@@ -118,18 +366,18 @@
118
366
  1720-173F: hanunoo
119
367
  1700-171F: tagalog
120
368
  1760-177F: tagbanwa
121
- 3100-312F: bopomofo-1
122
- 31A0-31BF: bopomofo-2
123
- 1100-11FF: hangul_jamo-1
124
- A960-A97F: hangul_jamo-2
125
- D7B0-D7FF: hangul_jamo-3
126
- 3130-318F: hangul_jamo-4
127
- FF00-FFEF: hangul_jamo-5
369
+ 3100-312F: bopomofo
370
+ 31A0-31BF: bopomofo
371
+ 1100-11FF: hangul_jamo
372
+ A960-A97F: hangul_jamo
373
+ D7B0-D7FF: hangul_jamo
374
+ 3130-318F: hangul_jamo
375
+ FF00-FFEF: hangul_jamo
128
376
  AC00-D7AF: hangul
129
377
  3040-309F: hiragana
130
- 30A0-30FF: katakana-1
131
- 31F0-31FF: katakana-2
132
- FF00-FFEF: katakana-3
378
+ 30A0-30FF: katakana
379
+ 31F0-31FF: katakana
380
+ FF00-FFEF: katakana
133
381
  1B000-1B0FF: kana
134
382
  3190-319F: kanbun
135
383
  A4D0-A4FF: lisu
@@ -138,12 +386,5 @@
138
386
  A490-A4CF: yi
139
387
  13A0-13FF: cherokee
140
388
  10400-1044F: deseret
141
- 1400-167F: united_canadian_aborginal-1
142
- 18B0-18FF: united_canadian_aborginal-2
143
-
144
- #0000-007F: ASCII
145
-
146
- # languages:
147
- # #Future. if there is any
148
- # hebrew:
149
- # - hebrew
389
+ 1400-167F: united_canadian_aborginal
390
+ 18B0-18FF: united_canadian_aborginal
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prose
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Edwin Rozario