prose 0.0.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +13 -5
  2. data/lib/prose/prose.yaml +147 -141
  3. data/lib/prose.rb +29 -11
  4. metadata +7 -7
checksums.yaml CHANGED
@@ -1,7 +1,15 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 09b137dde0e95e9308ecdc067bf4fd562a9b1526
4
- data.tar.gz: 8cf9634b26218339d84eb677dfaea9432fdd9c36
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ Y2MzMzgzYzhmNjVkNjA1OWRmYjFkMWIxMDNhYmFiYTI0OGIzZThhMg==
5
+ data.tar.gz: !binary |-
6
+ YjM1NjNhYmRkYjM2YzdhNTU2OTFlYzcyZjc5ZTg1MDQ3OThlMjAzNQ==
5
7
  SHA512:
6
- metadata.gz: be87ec95d9bc12c36caa5d69d2192f88b9f02ed041f97fb186142c71e5a9d1f371d9ec60980ee520f8e928339182706e467aafbdbec6112c2cce433e20f74c03
7
- data.tar.gz: 220d5d89073e35e652d91e89d6a71845166d5679c5ac35777af0f80713c56d727be5a6516eb33c89515a2c095142c0945f22282b61b1fb04cb20806a4af15197
8
+ metadata.gz: !binary |-
9
+ NDMyM2MyMDgxOTgzNWUwMmRkOWRkZWI1N2YyMGFmNDkwNTk2NmVhNTIyNzM3
10
+ N2U2ZjZiZDE4Njk1YmFjZmQ2MGY3ZWIzMTA5OTdhMDg3ZGE2ZmI4OTdmZDcx
11
+ MWY3ZDY5Mzg2MWYxOTk5MzcxNzQwMWQzMmFhNmQxZjgwNzEzZTk=
12
+ data.tar.gz: !binary |-
13
+ YWU2YTdjOWZkMWFjODNiZmRlMjRlMWMxZWIwY2ExZGQ3MTQ1Y2RhYmZhNGNl
14
+ ODhmNDZjMDI4OWU2YTJlMTVjN2I1MWUzZmNlNzdhMGZmZWFmYjVlYzRiNjM3
15
+ ZWRiMDQyMWY3MGNlZDNiYzNkMzc0YjdlZDk3MGI2NGE4MTZmNDk=
data/lib/prose/prose.yaml CHANGED
@@ -1,141 +1,147 @@
1
- 0590-05FF: hebrew
2
- FB00–FB4F: hebrew
3
- 00D00-0D7F: malayalam
4
- 0530-058F: armenian
5
- 2C80-2CFF: coptic
6
- 10800-1083F: cypriot
7
- 0400-04FF: cyrillic
8
- 0500-052F: cyrillic
9
- 2DE0-2DFF: cyrillic
10
- A640-A69F: cyrillic
11
- 10A0-10FF: georgian
12
- 2D00-2D2F: georgian
13
- 2C00-2C5F: glagolithic
14
- 10330-1034F: gothic
15
- 0370-03FF: greek
16
- 1F00-1FFF: greek
17
- 0000-007F: latin
18
- 0080-00FF: latin
19
- 0100-017F: latin
20
- 0180-024F: latin
21
- 2C60-2C7F: latin
22
- A720-A7FF: latin
23
- 1E00-1EFF: latin
24
- FB00-FB4F: latin
25
- FB00-FB4F: latin
26
- FF00-FFEF: latin
27
- 1680-169F: ogham
28
- 10300-1032F: old_italics
29
- 101D0-101FF: phaistos
30
- 16A0-16FF: runic
31
- 10450-1047F: shavian
32
- A6A0-A6FF: bamum
33
- 16800-16A3F: bamum
34
- 13000-1342F: egyptian_hieroglyphs
35
- 1200-137F: ethiopic
36
- 1380-139F: ethiopic
37
- 2D80-2DDF: ethiopic
38
- AB00-AB2F: ethiopic
39
- 109A0-109FF: meroitic_cursive
40
- 10980-1099F: meroitic_hieroglyphs
41
- 07C0-07FF: nko
42
- 10480-104AF: osmanya
43
- 2D30-2D7F: tifinagh
44
- A500-A63F: vai
45
- 0600-06FF: arabic
46
- 0750-077F: arabic
47
- 08A0-08FF: arabic
48
- FB50-FDFF: arabic
49
- FE70-FEFF: arabic
50
- 10840-1085F: aramic
51
- 10B00-10B3F: avestan
52
- 102A0-102DF: carian
53
- 12000-123FF: cuniform
54
- 12400-1247F: cuniform_numbers_punctuation
55
- 10280-1029F: lycian
56
- 1800-18AF: mongolian
57
- 0F00-0FFF: tibetan
58
- 0980-09FF: bengali_assamese
59
- 0A80-0AFF: gujarati
60
- 0C80-0CFF: kannada
61
- 0B00-0B7F: oriya
62
- 0B80-0BFF: tamil
63
- 0C00-0C7F: telugu
64
- 11000-1107F: brahmi
65
- 0900-097F: devanagari
66
- A8E0-A8FF: devanagari
67
- 103A0-103DF: old_persian
68
- 10380-1039F: ugaritic
69
- 10920-1093F: lydian
70
- 0840-085F: mandaic
71
- 10A60-10A7F: old_south_arabian
72
- 10B60-10B7F: pahlavi
73
- 10B40-10B5F: parthian
74
- 10900-1091F: phoenician
75
- 0800-083F: samaritan
76
- 0700-074F: syriac
77
- 10C00-10C4F: old_turkic
78
- A840-A87F: phags_pa
79
- 11100-1114F: chakma
80
- 0A00-0A7F: gurmukhi
81
- 11080-110CF: kaithi
82
- 10A00-10A5F: kharoshthi
83
- 1C00-1C4F: lepcha
84
- 1900-194F: limbu
85
- ABC0-ABFF: meetei_mayek
86
- AAE0-AAFF: meetei_mayek
87
- 1C50-1C7F: ol_chiki
88
- A880-A8DF: saurashtra
89
- 11180-111DF: sharada
90
- 0D80-0DFF: sinhala
91
- 110D0-110FF: sora_sompeng
92
- A800-A82F: syloti_nagri
93
- 11680-116CF: takri
94
- 0780-07BF: thaana
95
- 1CD0-1CFF: vedic
96
- 1B00-1B7F: balinese
97
- 1BC0-1BFF: batak
98
- 1A00-1A1F: buginese
99
- AA00-AA5F: cham
100
- A980-A9DF: javanese
101
- A900-A92F: kayah_li
102
- 1780-17FF: khmer
103
- 19E0-19FF: khmer
104
- 0E80-0EFF: lao
105
- 1000-109F: myanmar
106
- AA60-AA7F: myanmar
107
- 1980-19DF: new_tai_lue
108
- A930-A95F: rejang
109
- 1B80-1BBF: sudanese
110
- 1CC0-1CCF: sudanese
111
- 1950-197F: tai_le
112
- 1A20-1AAF: tai_tham
113
- AA80-AADF: tai_viet
114
- 0E00-0E7F: thai
115
- 1740-175F: buhid
116
- 1720-173F: hanunoo
117
- 1700-171F: tagalog
118
- 1760-177F: tagbanwa
119
- 3100-312F: bopomofo
120
- 31A0-31BF: bopomofo
121
- 1100-11FF: hangul_jamo
122
- A960-A97F: hangul_jamo
123
- D7B0-D7FF: hangul_jamo
124
- 3130-318F: hangul_jamo
125
- FF00-FFEF: hangul_jamo
126
- AC00-D7AF: hangul
127
- 3040-309F: hiragana
128
- 30A0-30FF: katakana
129
- 31F0-31FF: katakana
130
- FF00-FFEF: katakana
131
- 1B000-1B0FF: kana
132
- 3190-319F: kanbun
133
- A4D0-A4FF: lisu
134
- 16F00-16F9F: miao
135
- A000-A48F: yi
136
- A490-A4CF: yi
137
- 13A0-13FF: cherokee
138
- 10400-1044F: deseret
139
- 1400-167F: united_canadian_aborginal
140
- 18B0-18FF: united_canadian_aborginal
141
- #0000-007F: ASCII
1
+ # ranges:
2
+ 0590-05FF: "hebrew-1"
3
+ FB00–FB4F: "hebrew-2"
4
+ 00D00-0D7F: malayalam
5
+ 0530-058F: armenian
6
+ 2C80-2CFF: coptic
7
+ 10800-1083F: cypriot
8
+ 0400-04FF: "cyrillic-1"
9
+ 0500-052F: "cyrillic-2"
10
+ 2DE0-2DFF: "cyrillic-3"
11
+ A640-A69F: "cyrillic-4"
12
+ 10A0-10FF: "georgian-1"
13
+ 2D00-2D2F: "georgian-2"
14
+ 2C00-2C5F: glagolithic
15
+ 10330-1034F: gothic
16
+ 0370-03FF: "greek-1"
17
+ 1F00-1FFF: "greek-2"
18
+ 0000-007F: "latin-1"
19
+ 0080-00FF: "latin-2"
20
+ 0100-017F: "latin-3"
21
+ 0180-024F: "latin-4"
22
+ 2C60-2C7F: "latin-5"
23
+ A720-A7FF: "latin-6"
24
+ 1E00-1EFF: "latin-7"
25
+ FB00-FB4F: "latin-8"
26
+ FB00-FB4F: "latin-9"
27
+ FF00-FFEF: "latin-10"
28
+ 1680-169F: ogham
29
+ 10300-1032F: old_italics
30
+ 101D0-101FF: phaistos
31
+ 16A0-16FF: runic
32
+ 10450-1047F: shavian
33
+ A6A0-A6FF: bamum
34
+ 16800-16A3F: bamum
35
+ 13000-1342F: egyptian_hieroglyphs
36
+ 1200-137F: "ethiopic-1"
37
+ 1380-139F: "ethiopic-2"
38
+ 2D80-2DDF: "ethiopic-3"
39
+ AB00-AB2F: ethiopic
40
+ 109A0-109FF: meroitic_cursive
41
+ 10980-1099F: meroitic_hieroglyphs
42
+ 07C0-07FF: nko
43
+ 10480-104AF: osmanya
44
+ 2D30-2D7F: tifinagh
45
+ A500-A63F: vai
46
+ 0600-06FF: "arabic-1"
47
+ 0750-077F: "arabic-2"
48
+ 08A0-08FF: "arabic-3"
49
+ FB50-FDFF: "arabic-4"
50
+ FE70-FEFF: "arabic-5"
51
+ 10840-1085F: aramic
52
+ 10B00-10B3F: avestan
53
+ 102A0-102DF: carian
54
+ 12000-123FF: cuniform
55
+ 12400-1247F: cuniform_numbers_punctuation
56
+ 10280-1029F: lycian
57
+ 1800-18AF: mongolian
58
+ 0F00-0FFF: tibetan
59
+ 0980-09FF: bengali_assamese
60
+ 0A80-0AFF: gujarati
61
+ 0C80-0CFF: kannada
62
+ 0B00-0B7F: oriya
63
+ 0B80-0BFF: tamil
64
+ 0C00-0C7F: telugu
65
+ 11000-1107F: brahmi
66
+ 0900-097F: "devanagari-1"
67
+ A8E0-A8FF: "devanagari-2"
68
+ 103A0-103DF: old_persian
69
+ 10380-1039F: ugaritic
70
+ 10920-1093F: lydian
71
+ 0840-085F: mandaic
72
+ 10A60-10A7F: old_south_arabian
73
+ 10B60-10B7F: pahlavi
74
+ 10B40-10B5F: parthian
75
+ 10900-1091F: phoenician
76
+ 0800-083F: samaritan
77
+ 0700-074F: syriac
78
+ 10C00-10C4F: old_turkic
79
+ A840-A87F: phags_pa
80
+ 11100-1114F: chakma
81
+ 0A00-0A7F: gurmukhi
82
+ 11080-110CF: kaithi
83
+ 10A00-10A5F: kharoshthi
84
+ 1C00-1C4F: lepcha
85
+ 1900-194F: limbu
86
+ ABC0-ABFF: "meetei_mayek-1"
87
+ AAE0-AAFF: "meetei_mayek-2"
88
+ 1C50-1C7F: ol_chiki
89
+ A880-A8DF: saurashtra
90
+ 11180-111DF: sharada
91
+ 0D80-0DFF: sinhala
92
+ 110D0-110FF: sora_sompeng
93
+ A800-A82F: syloti_nagri
94
+ 11680-116CF: takri
95
+ 0780-07BF: thaana
96
+ 1CD0-1CFF: vedic
97
+ 1B00-1B7F: balinese
98
+ 1BC0-1BFF: batak
99
+ 1A00-1A1F: buginese
100
+ AA00-AA5F: cham
101
+ A980-A9DF: javanese
102
+ A900-A92F: kayah_li
103
+ 1780-17FF: "khmer-1"
104
+ 19E0-19FF: "khmer-2"
105
+ 0E80-0EFF: lao
106
+ 1000-109F: "myanmar-1"
107
+ AA60-AA7F: "myanmar-2"
108
+ 1980-19DF: new_tai_lue
109
+ A930-A95F: rejang
110
+ 1B80-1BBF: "sudanese-1"
111
+ 1CC0-1CCF: "sudanese-2"
112
+ 1950-197F: tai_le
113
+ 1A20-1AAF: tai_tham
114
+ AA80-AADF: tai_viet
115
+ 0E00-0E7F: thai
116
+ 1740-175F: buhid
117
+ 1720-173F: hanunoo
118
+ 1700-171F: tagalog
119
+ 1760-177F: tagbanwa
120
+ 3100-312F: "bopomofo-1"
121
+ 31A0-31BF: "bopomofo-2"
122
+ 1100-11FF: "hangul_jamo-1"
123
+ A960-A97F: "hangul_jamo-2"
124
+ D7B0-D7FF: "hangul_jamo-3"
125
+ 3130-318F: "hangul_jamo-4"
126
+ FF00-FFEF: "hangul_jamo-5"
127
+ AC00-D7AF: hangul
128
+ 3040-309F: hiragana
129
+ 30A0-30FF: "katakana-1"
130
+ 31F0-31FF: "katakana-2"
131
+ FF00-FFEF: "katakana-3"
132
+ 1B000-1B0FF: kana
133
+ 3190-319F: kanbun
134
+ A4D0-A4FF: lisu
135
+ 16F00-16F9F: miao
136
+ A000-A48F: yi
137
+ A490-A4CF: yi
138
+ 13A0-13FF: cherokee
139
+ 10400-1044F: deseret
140
+ 1400-167F: "united_canadian_aborginal-1"
141
+ 18B0-18FF: "united_canadian_aborginal-2"
142
+ #0000-007F: ASCII
143
+
144
+ # languages:
145
+ # #Future. if there is any
146
+ # hebrew:
147
+ # - hebrew
data/lib/prose.rb CHANGED
@@ -1,25 +1,43 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  require 'yaml'
3
+ require 'pry'
3
4
 
4
5
  class String
5
6
 
6
- def prose?
7
- find_languages_in(self)
8
- end
7
+ RANGES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
8
+ LANGUAGES ||= RANGES.invert
9
9
 
10
- private
10
+ def prose
11
+ find_languages_in(self) # rename find_origin_of
12
+ end
11
13
 
14
+ # define_method "#{language}?" do
15
+ # self.is_language?
16
+ # end
17
+
18
+ # __method__ cannot individually identify each method defined dynamically with define_method
19
+ # Since this clumsy fix
20
+ LANGUAGES.keys.each do |language|
21
+ eval <<-EOM
22
+ def #{language.split('-').first}?(pure = false)
23
+ language = __method__.to_s.gsub("?", "")
24
+ result = find_languages_in(self)
25
+ pure ? ((result - [language]).empty?) : (result.include? language)
26
+ end
27
+ EOM
28
+ end
12
29
 
13
- def unicode_ranges
14
- @ranges ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
30
+ def language_of ordinal, min_range, max_range
31
+ (min_range.to_i(16) < ordinal) and (max_range.to_i(16) > ordinal)
15
32
  end
16
33
 
17
- def language_of letter
34
+ def languages_of letter
18
35
  result = []
19
36
  int_ordinal = letter.ord
20
- unicode_ranges.keys.each do |key|
37
+ RANGES.keys.each do |key|
21
38
  min, max = key.split("-")
22
- result << unicode_ranges[key] if (min.to_i(16) < int_ordinal) and (max.to_i(16) > int_ordinal)
39
+ ordinal_in_range = language_of(int_ordinal, min, max)
40
+ result << RANGES[key].split("-").first if ordinal_in_range #language_of(int_ordinal, min, max) #(min.to_i(16) < int_ordinal) and (max.to_i(16) > int_ordinal)
23
41
  end
24
42
  return result
25
43
  end
@@ -27,9 +45,9 @@ class String
27
45
  def find_languages_in word
28
46
  result = []
29
47
  word.split('').each do |letter|
30
- result += language_of(letter) if not letter == " "
48
+ result += languages_of(letter) if (letter != " ")
31
49
  end
32
50
  return result.uniq
33
51
  end
34
52
 
35
- end
53
+ end
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prose
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Edwin Rozario
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-01 00:00:00.000000000 Z
11
+ date: 2014-08-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: Identifies language of alphabets in a string
13
+ description: Language detector
14
14
  email:
15
15
  - rozarioed@gmail.com
16
16
  executables: []
@@ -28,18 +28,18 @@ require_paths:
28
28
  - lib
29
29
  required_ruby_version: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ! '>='
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  required_rubygems_version: !ruby/object:Gem::Requirement
35
35
  requirements:
36
- - - '>='
36
+ - - ! '>='
37
37
  - !ruby/object:Gem::Version
38
38
  version: '0'
39
39
  requirements: []
40
40
  rubyforge_project:
41
- rubygems_version: 2.2.0
41
+ rubygems_version: 2.4.1
42
42
  signing_key:
43
43
  specification_version: 4
44
- summary: Identify language string
44
+ summary: Language detector
45
45
  test_files: []