prose 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +5 -13
  2. data/lib/prose.rb +26 -35
  3. data/lib/prose/prose.yaml +52 -50
  4. metadata +4 -5
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- NmNlMGFlNDk5YjMwNTJjYWY0YjJjMTYxM2Q2OTYzN2JmNTc4NGNmOQ==
5
- data.tar.gz: !binary |-
6
- Y2E1OGYwNTlkYzFmMjhiMjFmYmJkMTYyOTM1MjcxZDY4N2YzNWViYQ==
2
+ SHA256:
3
+ metadata.gz: 01b32c2b8bb846c0777b88c0de14e269555a54bec00b2b8e4db9a0ffacf6a15d
4
+ data.tar.gz: 0513bd4bbfd8e31d9a2767aa4442003aba9cc58fa80938490ed7d203b96c3621
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- ZmZkMmU0MDczMzBhMWM2ZDNkMzg5NTVkMzYwOWU4ODg0ZDEyYmZlZDk1ZTA2
10
- YmYzZGU2NDgwZjE5YWViYjQ0Yzc5OWEwZGJlNGIyOTFmMWRkZDM3ZTVhY2Q1
11
- YjhlOTAyZjk2MGY5MGJmNmYwYzI5OTU1NWFhZTlhYzQxZDNiMTk=
12
- data.tar.gz: !binary |-
13
- NWRjZDAxMWZmNjM0NGZiMTcxYmEyOWVmNTVmNTNmMWQwYzk4Yjk1OWMyMDE0
14
- NTMyNTA4MmU1YjI1NGI0NmNmYTg4MTU3YWFlYTg0OWQ4MTJhZjRiYjYxMGZk
15
- NGJmZWQzYTI4MzFkYjc1M2I0NmNmODkzYjczMzBmZDEwYjA3Nzk=
6
+ metadata.gz: 4b54b532728556f72e95f53a0106706f45b27eaa70c71b81a897fa6aea4a54b886909d812029a689a64789f3a9d6f532fc7e740d6e11ca9dbf398359e5c28f95
7
+ data.tar.gz: 694676c05117cbd73da1aa76eff537d654420eb87104208fa719d5da3fef0b6696c35b19eaedf778b77671c13725b8eff45b7e97473a176691eb8c357381c314
@@ -1,52 +1,43 @@
1
1
  # -*- coding: utf-8 -*-
2
+ # frozen_string_literal: true
3
+
2
4
  require 'yaml'
3
5
 
6
+ # Ruby string class
4
7
  class String
5
-
6
- RANGES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
7
- LANGUAGES ||= RANGES.invert
8
+ LAN_RANGES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
8
9
 
9
10
  def prose
10
- find_languages_in(self) # rename find_origin_of
11
+ find_origins_in(self)
11
12
  end
12
13
 
13
- # define_method "#{language}?" do
14
- # self.is_language?
15
- # end
16
-
17
- # __method__ cannot individually identify each method defined dynamically with define_method
18
- # Since this clumsy fix
19
- LANGUAGES.keys.each do |language|
20
- eval <<-EOM
21
- def #{language.split('-').first}?(pure = false)
22
- language = __method__.to_s.gsub("?", "")
23
- result = find_languages_in(self)
24
- pure ? ((result - [language]).empty?) : (result.include? language)
25
- end
26
- EOM
27
- end
14
+ # Refactor this so that the dynamic methods no more use find_origins_in instead only check
15
+ # the ranges for the specific language only
16
+ LAN_RANGES.invert.keys.each do |language|
17
+ language_name = language.split('-').first
18
+ method_name = "#{language_name}?"
28
19
 
29
- def language_of ordinal, min_range, max_range
30
- (min_range.to_i(16) < ordinal) and (max_range.to_i(16) > ordinal)
31
- end
20
+ define_method(method_name) do
21
+ find_origins_in(self).include? language_name
22
+ end
32
23
 
33
- def languages_of letter
34
- result = []
35
- int_ordinal = letter.ord
36
- RANGES.keys.each do |key|
37
- min, max = key.split("-")
38
- ordinal_in_range = language_of(int_ordinal, min, max)
39
- result << RANGES[key].split("-").first if ordinal_in_range #language_of(int_ordinal, min, max) #(min.to_i(16) < int_ordinal) and (max.to_i(16) > int_ordinal)
24
+ define_method("pure_#{method_name}") do
25
+ (find_origins_in(self) - [language_name]).empty?
40
26
  end
41
- return result
42
27
  end
43
28
 
44
- def find_languages_in word
45
- result = []
46
- word.split('').each do |letter|
47
- result += languages_of(letter) if (letter != " ")
29
+ def language_of(ordinal, min_range, max_range)
30
+ (min_range.to_i(16) < ordinal) && (max_range.to_i(16) > ordinal)
31
+ end
32
+
33
+ def languages_of(letter)
34
+ LAN_RANGES.keys.map do |key|
35
+ min, max = key.split('-')
36
+ LAN_RANGES[key].split('-').first if language_of(letter.ord, min, max)
48
37
  end
49
- return result.uniq
50
38
  end
51
39
 
40
+ def find_origins_in(word)
41
+ word.split('').map { |letter| languages_of(letter) unless letter.empty? }.flatten.compact.uniq
42
+ end
52
43
  end
@@ -1,30 +1,31 @@
1
1
  # ranges:
2
- 0590-05FF: "hebrew-1"
3
- FB00–FB4F: "hebrew-2"
2
+ 4E00–9FD5: CJK
3
+ 0590-05FF: hebrew-1
4
+ FB00–FB4F: hebrew-2
4
5
  00D00-0D7F: malayalam
5
6
  0530-058F: armenian
6
7
  2C80-2CFF: coptic
7
8
  10800-1083F: cypriot
8
- 0400-04FF: "cyrillic-1"
9
- 0500-052F: "cyrillic-2"
10
- 2DE0-2DFF: "cyrillic-3"
11
- A640-A69F: "cyrillic-4"
12
- 10A0-10FF: "georgian-1"
13
- 2D00-2D2F: "georgian-2"
9
+ 0400-04FF: cyrillic-1
10
+ 0500-052F: cyrillic-2
11
+ 2DE0-2DFF: cyrillic-3
12
+ A640-A69F: cyrillic-4
13
+ 10A0-10FF: georgian-1
14
+ 2D00-2D2F: georgian-2
14
15
  2C00-2C5F: glagolithic
15
16
  10330-1034F: gothic
16
- 0370-03FF: "greek-1"
17
- 1F00-1FFF: "greek-2"
18
- 0000-007F: "latin-1"
19
- 0080-00FF: "latin-2"
20
- 0100-017F: "latin-3"
21
- 0180-024F: "latin-4"
22
- 2C60-2C7F: "latin-5"
23
- A720-A7FF: "latin-6"
24
- 1E00-1EFF: "latin-7"
25
- FB00-FB4F: "latin-8"
26
- FB00-FB4F: "latin-9"
27
- FF00-FFEF: "latin-10"
17
+ 0370-03FF: greek-1
18
+ 1F00-1FFF: greek-2
19
+ 0000-007F: latin-1
20
+ 0080-00FF: latin-2
21
+ 0100-017F: latin-3
22
+ 0180-024F: latin-4
23
+ 2C60-2C7F: latin-5
24
+ A720-A7FF: latin-6
25
+ 1E00-1EFF: latin-7
26
+ FB00-FB4F: latin-8
27
+ FB00-FB4F: latin-9
28
+ FF00-FFEF: latin-10
28
29
  1680-169F: ogham
29
30
  10300-1032F: old_italics
30
31
  101D0-101FF: phaistos
@@ -33,9 +34,9 @@
33
34
  A6A0-A6FF: bamum
34
35
  16800-16A3F: bamum
35
36
  13000-1342F: egyptian_hieroglyphs
36
- 1200-137F: "ethiopic-1"
37
- 1380-139F: "ethiopic-2"
38
- 2D80-2DDF: "ethiopic-3"
37
+ 1200-137F: ethiopic-1
38
+ 1380-139F: ethiopic-2
39
+ 2D80-2DDF: ethiopic-3
39
40
  AB00-AB2F: ethiopic
40
41
  109A0-109FF: meroitic_cursive
41
42
  10980-1099F: meroitic_hieroglyphs
@@ -43,11 +44,11 @@
43
44
  10480-104AF: osmanya
44
45
  2D30-2D7F: tifinagh
45
46
  A500-A63F: vai
46
- 0600-06FF: "arabic-1"
47
- 0750-077F: "arabic-2"
48
- 08A0-08FF: "arabic-3"
49
- FB50-FDFF: "arabic-4"
50
- FE70-FEFF: "arabic-5"
47
+ 0600-06FF: arabic-1
48
+ 0750-077F: arabic-2
49
+ 08A0-08FF: arabic-3
50
+ FB50-FDFF: arabic-4
51
+ FE70-FEFF: arabic-5
51
52
  10840-1085F: aramic
52
53
  10B00-10B3F: avestan
53
54
  102A0-102DF: carian
@@ -63,8 +64,8 @@
63
64
  0B80-0BFF: tamil
64
65
  0C00-0C7F: telugu
65
66
  11000-1107F: brahmi
66
- 0900-097F: "devanagari-1"
67
- A8E0-A8FF: "devanagari-2"
67
+ 0900-097F: devanagari-1
68
+ A8E0-A8FF: devanagari-2
68
69
  103A0-103DF: old_persian
69
70
  10380-1039F: ugaritic
70
71
  10920-1093F: lydian
@@ -83,8 +84,8 @@
83
84
  10A00-10A5F: kharoshthi
84
85
  1C00-1C4F: lepcha
85
86
  1900-194F: limbu
86
- ABC0-ABFF: "meetei_mayek-1"
87
- AAE0-AAFF: "meetei_mayek-2"
87
+ ABC0-ABFF: meetei_mayek-1
88
+ AAE0-AAFF: meetei_mayek-2
88
89
  1C50-1C7F: ol_chiki
89
90
  A880-A8DF: saurashtra
90
91
  11180-111DF: sharada
@@ -100,15 +101,15 @@
100
101
  AA00-AA5F: cham
101
102
  A980-A9DF: javanese
102
103
  A900-A92F: kayah_li
103
- 1780-17FF: "khmer-1"
104
- 19E0-19FF: "khmer-2"
104
+ 1780-17FF: khmer-1
105
+ 19E0-19FF: khmer-2
105
106
  0E80-0EFF: lao
106
- 1000-109F: "myanmar-1"
107
- AA60-AA7F: "myanmar-2"
107
+ 1000-109F: myanmar-1
108
+ AA60-AA7F: myanmar-2
108
109
  1980-19DF: new_tai_lue
109
110
  A930-A95F: rejang
110
- 1B80-1BBF: "sudanese-1"
111
- 1CC0-1CCF: "sudanese-2"
111
+ 1B80-1BBF: sudanese-1
112
+ 1CC0-1CCF: sudanese-2
112
113
  1950-197F: tai_le
113
114
  1A20-1AAF: tai_tham
114
115
  AA80-AADF: tai_viet
@@ -117,18 +118,18 @@
117
118
  1720-173F: hanunoo
118
119
  1700-171F: tagalog
119
120
  1760-177F: tagbanwa
120
- 3100-312F: "bopomofo-1"
121
- 31A0-31BF: "bopomofo-2"
122
- 1100-11FF: "hangul_jamo-1"
123
- A960-A97F: "hangul_jamo-2"
124
- D7B0-D7FF: "hangul_jamo-3"
125
- 3130-318F: "hangul_jamo-4"
126
- FF00-FFEF: "hangul_jamo-5"
121
+ 3100-312F: bopomofo-1
122
+ 31A0-31BF: bopomofo-2
123
+ 1100-11FF: hangul_jamo-1
124
+ A960-A97F: hangul_jamo-2
125
+ D7B0-D7FF: hangul_jamo-3
126
+ 3130-318F: hangul_jamo-4
127
+ FF00-FFEF: hangul_jamo-5
127
128
  AC00-D7AF: hangul
128
129
  3040-309F: hiragana
129
- 30A0-30FF: "katakana-1"
130
- 31F0-31FF: "katakana-2"
131
- FF00-FFEF: "katakana-3"
130
+ 30A0-30FF: katakana-1
131
+ 31F0-31FF: katakana-2
132
+ FF00-FFEF: katakana-3
132
133
  1B000-1B0FF: kana
133
134
  3190-319F: kanbun
134
135
  A4D0-A4FF: lisu
@@ -137,8 +138,9 @@
137
138
  A490-A4CF: yi
138
139
  13A0-13FF: cherokee
139
140
  10400-1044F: deseret
140
- 1400-167F: "united_canadian_aborginal-1"
141
- 18B0-18FF: "united_canadian_aborginal-2"
141
+ 1400-167F: united_canadian_aborginal-1
142
+ 18B0-18FF: united_canadian_aborginal-2
143
+
142
144
  #0000-007F: ASCII
143
145
 
144
146
  # languages:
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prose
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Edwin Rozario
@@ -28,17 +28,16 @@ require_paths:
28
28
  - lib
29
29
  required_ruby_version: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ! '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  required_rubygems_version: !ruby/object:Gem::Requirement
35
35
  requirements:
36
- - - ! '>='
36
+ - - ">="
37
37
  - !ruby/object:Gem::Version
38
38
  version: '0'
39
39
  requirements: []
40
- rubyforge_project:
41
- rubygems_version: 2.4.1
40
+ rubygems_version: 3.0.3
42
41
  signing_key:
43
42
  specification_version: 4
44
43
  summary: Language detector