prose 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +5 -13
  2. data/lib/prose.rb +26 -35
  3. data/lib/prose/prose.yaml +52 -50
  4. metadata +4 -5
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- NmNlMGFlNDk5YjMwNTJjYWY0YjJjMTYxM2Q2OTYzN2JmNTc4NGNmOQ==
5
- data.tar.gz: !binary |-
6
- Y2E1OGYwNTlkYzFmMjhiMjFmYmJkMTYyOTM1MjcxZDY4N2YzNWViYQ==
2
+ SHA256:
3
+ metadata.gz: 01b32c2b8bb846c0777b88c0de14e269555a54bec00b2b8e4db9a0ffacf6a15d
4
+ data.tar.gz: 0513bd4bbfd8e31d9a2767aa4442003aba9cc58fa80938490ed7d203b96c3621
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- ZmZkMmU0MDczMzBhMWM2ZDNkMzg5NTVkMzYwOWU4ODg0ZDEyYmZlZDk1ZTA2
10
- YmYzZGU2NDgwZjE5YWViYjQ0Yzc5OWEwZGJlNGIyOTFmMWRkZDM3ZTVhY2Q1
11
- YjhlOTAyZjk2MGY5MGJmNmYwYzI5OTU1NWFhZTlhYzQxZDNiMTk=
12
- data.tar.gz: !binary |-
13
- NWRjZDAxMWZmNjM0NGZiMTcxYmEyOWVmNTVmNTNmMWQwYzk4Yjk1OWMyMDE0
14
- NTMyNTA4MmU1YjI1NGI0NmNmYTg4MTU3YWFlYTg0OWQ4MTJhZjRiYjYxMGZk
15
- NGJmZWQzYTI4MzFkYjc1M2I0NmNmODkzYjczMzBmZDEwYjA3Nzk=
6
+ metadata.gz: 4b54b532728556f72e95f53a0106706f45b27eaa70c71b81a897fa6aea4a54b886909d812029a689a64789f3a9d6f532fc7e740d6e11ca9dbf398359e5c28f95
7
+ data.tar.gz: 694676c05117cbd73da1aa76eff537d654420eb87104208fa719d5da3fef0b6696c35b19eaedf778b77671c13725b8eff45b7e97473a176691eb8c357381c314
@@ -1,52 +1,43 @@
1
1
  # -*- coding: utf-8 -*-
2
+ # frozen_string_literal: true
3
+
2
4
  require 'yaml'
3
5
 
6
+ # Ruby string class
4
7
  class String
5
-
6
- RANGES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
7
- LANGUAGES ||= RANGES.invert
8
+ LAN_RANGES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
8
9
 
9
10
  def prose
10
- find_languages_in(self) # rename find_origin_of
11
+ find_origins_in(self)
11
12
  end
12
13
 
13
- # define_method "#{language}?" do
14
- # self.is_language?
15
- # end
16
-
17
- # __method__ cannot individually identify each method defined dynamically with define_method
18
- # Since this clumsy fix
19
- LANGUAGES.keys.each do |language|
20
- eval <<-EOM
21
- def #{language.split('-').first}?(pure = false)
22
- language = __method__.to_s.gsub("?", "")
23
- result = find_languages_in(self)
24
- pure ? ((result - [language]).empty?) : (result.include? language)
25
- end
26
- EOM
27
- end
14
+ # Refactor this so that the dynamic methods no more use find_origins_in instead only check
15
+ # the ranges for the specific language only
16
+ LAN_RANGES.invert.keys.each do |language|
17
+ language_name = language.split('-').first
18
+ method_name = "#{language_name}?"
28
19
 
29
- def language_of ordinal, min_range, max_range
30
- (min_range.to_i(16) < ordinal) and (max_range.to_i(16) > ordinal)
31
- end
20
+ define_method(method_name) do
21
+ find_origins_in(self).include? language_name
22
+ end
32
23
 
33
- def languages_of letter
34
- result = []
35
- int_ordinal = letter.ord
36
- RANGES.keys.each do |key|
37
- min, max = key.split("-")
38
- ordinal_in_range = language_of(int_ordinal, min, max)
39
- result << RANGES[key].split("-").first if ordinal_in_range #language_of(int_ordinal, min, max) #(min.to_i(16) < int_ordinal) and (max.to_i(16) > int_ordinal)
24
+ define_method("pure_#{method_name}") do
25
+ (find_origins_in(self) - [language_name]).empty?
40
26
  end
41
- return result
42
27
  end
43
28
 
44
- def find_languages_in word
45
- result = []
46
- word.split('').each do |letter|
47
- result += languages_of(letter) if (letter != " ")
29
+ def language_of(ordinal, min_range, max_range)
30
+ (min_range.to_i(16) < ordinal) && (max_range.to_i(16) > ordinal)
31
+ end
32
+
33
+ def languages_of(letter)
34
+ LAN_RANGES.keys.map do |key|
35
+ min, max = key.split('-')
36
+ LAN_RANGES[key].split('-').first if language_of(letter.ord, min, max)
48
37
  end
49
- return result.uniq
50
38
  end
51
39
 
40
+ def find_origins_in(word)
41
+ word.split('').map { |letter| languages_of(letter) unless letter.empty? }.flatten.compact.uniq
42
+ end
52
43
  end
@@ -1,30 +1,31 @@
1
1
  # ranges:
2
- 0590-05FF: "hebrew-1"
3
- FB00–FB4F: "hebrew-2"
2
+ 4E00–9FD5: CJK
3
+ 0590-05FF: hebrew-1
4
+ FB00–FB4F: hebrew-2
4
5
  00D00-0D7F: malayalam
5
6
  0530-058F: armenian
6
7
  2C80-2CFF: coptic
7
8
  10800-1083F: cypriot
8
- 0400-04FF: "cyrillic-1"
9
- 0500-052F: "cyrillic-2"
10
- 2DE0-2DFF: "cyrillic-3"
11
- A640-A69F: "cyrillic-4"
12
- 10A0-10FF: "georgian-1"
13
- 2D00-2D2F: "georgian-2"
9
+ 0400-04FF: cyrillic-1
10
+ 0500-052F: cyrillic-2
11
+ 2DE0-2DFF: cyrillic-3
12
+ A640-A69F: cyrillic-4
13
+ 10A0-10FF: georgian-1
14
+ 2D00-2D2F: georgian-2
14
15
  2C00-2C5F: glagolithic
15
16
  10330-1034F: gothic
16
- 0370-03FF: "greek-1"
17
- 1F00-1FFF: "greek-2"
18
- 0000-007F: "latin-1"
19
- 0080-00FF: "latin-2"
20
- 0100-017F: "latin-3"
21
- 0180-024F: "latin-4"
22
- 2C60-2C7F: "latin-5"
23
- A720-A7FF: "latin-6"
24
- 1E00-1EFF: "latin-7"
25
- FB00-FB4F: "latin-8"
26
- FB00-FB4F: "latin-9"
27
- FF00-FFEF: "latin-10"
17
+ 0370-03FF: greek-1
18
+ 1F00-1FFF: greek-2
19
+ 0000-007F: latin-1
20
+ 0080-00FF: latin-2
21
+ 0100-017F: latin-3
22
+ 0180-024F: latin-4
23
+ 2C60-2C7F: latin-5
24
+ A720-A7FF: latin-6
25
+ 1E00-1EFF: latin-7
26
+ FB00-FB4F: latin-8
27
+ FB00-FB4F: latin-9
28
+ FF00-FFEF: latin-10
28
29
  1680-169F: ogham
29
30
  10300-1032F: old_italics
30
31
  101D0-101FF: phaistos
@@ -33,9 +34,9 @@
33
34
  A6A0-A6FF: bamum
34
35
  16800-16A3F: bamum
35
36
  13000-1342F: egyptian_hieroglyphs
36
- 1200-137F: "ethiopic-1"
37
- 1380-139F: "ethiopic-2"
38
- 2D80-2DDF: "ethiopic-3"
37
+ 1200-137F: ethiopic-1
38
+ 1380-139F: ethiopic-2
39
+ 2D80-2DDF: ethiopic-3
39
40
  AB00-AB2F: ethiopic
40
41
  109A0-109FF: meroitic_cursive
41
42
  10980-1099F: meroitic_hieroglyphs
@@ -43,11 +44,11 @@
43
44
  10480-104AF: osmanya
44
45
  2D30-2D7F: tifinagh
45
46
  A500-A63F: vai
46
- 0600-06FF: "arabic-1"
47
- 0750-077F: "arabic-2"
48
- 08A0-08FF: "arabic-3"
49
- FB50-FDFF: "arabic-4"
50
- FE70-FEFF: "arabic-5"
47
+ 0600-06FF: arabic-1
48
+ 0750-077F: arabic-2
49
+ 08A0-08FF: arabic-3
50
+ FB50-FDFF: arabic-4
51
+ FE70-FEFF: arabic-5
51
52
  10840-1085F: aramic
52
53
  10B00-10B3F: avestan
53
54
  102A0-102DF: carian
@@ -63,8 +64,8 @@
63
64
  0B80-0BFF: tamil
64
65
  0C00-0C7F: telugu
65
66
  11000-1107F: brahmi
66
- 0900-097F: "devanagari-1"
67
- A8E0-A8FF: "devanagari-2"
67
+ 0900-097F: devanagari-1
68
+ A8E0-A8FF: devanagari-2
68
69
  103A0-103DF: old_persian
69
70
  10380-1039F: ugaritic
70
71
  10920-1093F: lydian
@@ -83,8 +84,8 @@
83
84
  10A00-10A5F: kharoshthi
84
85
  1C00-1C4F: lepcha
85
86
  1900-194F: limbu
86
- ABC0-ABFF: "meetei_mayek-1"
87
- AAE0-AAFF: "meetei_mayek-2"
87
+ ABC0-ABFF: meetei_mayek-1
88
+ AAE0-AAFF: meetei_mayek-2
88
89
  1C50-1C7F: ol_chiki
89
90
  A880-A8DF: saurashtra
90
91
  11180-111DF: sharada
@@ -100,15 +101,15 @@
100
101
  AA00-AA5F: cham
101
102
  A980-A9DF: javanese
102
103
  A900-A92F: kayah_li
103
- 1780-17FF: "khmer-1"
104
- 19E0-19FF: "khmer-2"
104
+ 1780-17FF: khmer-1
105
+ 19E0-19FF: khmer-2
105
106
  0E80-0EFF: lao
106
- 1000-109F: "myanmar-1"
107
- AA60-AA7F: "myanmar-2"
107
+ 1000-109F: myanmar-1
108
+ AA60-AA7F: myanmar-2
108
109
  1980-19DF: new_tai_lue
109
110
  A930-A95F: rejang
110
- 1B80-1BBF: "sudanese-1"
111
- 1CC0-1CCF: "sudanese-2"
111
+ 1B80-1BBF: sudanese-1
112
+ 1CC0-1CCF: sudanese-2
112
113
  1950-197F: tai_le
113
114
  1A20-1AAF: tai_tham
114
115
  AA80-AADF: tai_viet
@@ -117,18 +118,18 @@
117
118
  1720-173F: hanunoo
118
119
  1700-171F: tagalog
119
120
  1760-177F: tagbanwa
120
- 3100-312F: "bopomofo-1"
121
- 31A0-31BF: "bopomofo-2"
122
- 1100-11FF: "hangul_jamo-1"
123
- A960-A97F: "hangul_jamo-2"
124
- D7B0-D7FF: "hangul_jamo-3"
125
- 3130-318F: "hangul_jamo-4"
126
- FF00-FFEF: "hangul_jamo-5"
121
+ 3100-312F: bopomofo-1
122
+ 31A0-31BF: bopomofo-2
123
+ 1100-11FF: hangul_jamo-1
124
+ A960-A97F: hangul_jamo-2
125
+ D7B0-D7FF: hangul_jamo-3
126
+ 3130-318F: hangul_jamo-4
127
+ FF00-FFEF: hangul_jamo-5
127
128
  AC00-D7AF: hangul
128
129
  3040-309F: hiragana
129
- 30A0-30FF: "katakana-1"
130
- 31F0-31FF: "katakana-2"
131
- FF00-FFEF: "katakana-3"
130
+ 30A0-30FF: katakana-1
131
+ 31F0-31FF: katakana-2
132
+ FF00-FFEF: katakana-3
132
133
  1B000-1B0FF: kana
133
134
  3190-319F: kanbun
134
135
  A4D0-A4FF: lisu
@@ -137,8 +138,9 @@
137
138
  A490-A4CF: yi
138
139
  13A0-13FF: cherokee
139
140
  10400-1044F: deseret
140
- 1400-167F: "united_canadian_aborginal-1"
141
- 18B0-18FF: "united_canadian_aborginal-2"
141
+ 1400-167F: united_canadian_aborginal-1
142
+ 18B0-18FF: united_canadian_aborginal-2
143
+
142
144
  #0000-007F: ASCII
143
145
 
144
146
  # languages:
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prose
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Edwin Rozario
@@ -28,17 +28,16 @@ require_paths:
28
28
  - lib
29
29
  required_ruby_version: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ! '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  required_rubygems_version: !ruby/object:Gem::Requirement
35
35
  requirements:
36
- - - ! '>='
36
+ - - ">="
37
37
  - !ruby/object:Gem::Version
38
38
  version: '0'
39
39
  requirements: []
40
- rubyforge_project:
41
- rubygems_version: 2.4.1
40
+ rubygems_version: 3.0.3
42
41
  signing_key:
43
42
  specification_version: 4
44
43
  summary: Language detector