prose 0.0.1 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +13 -5
- data/lib/prose/prose.yaml +147 -141
- data/lib/prose.rb +29 -11
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,15 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
Y2MzMzgzYzhmNjVkNjA1OWRmYjFkMWIxMDNhYmFiYTI0OGIzZThhMg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
YjM1NjNhYmRkYjM2YzdhNTU2OTFlYzcyZjc5ZTg1MDQ3OThlMjAzNQ==
|
5
7
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
NDMyM2MyMDgxOTgzNWUwMmRkOWRkZWI1N2YyMGFmNDkwNTk2NmVhNTIyNzM3
|
10
|
+
N2U2ZjZiZDE4Njk1YmFjZmQ2MGY3ZWIzMTA5OTdhMDg3ZGE2ZmI4OTdmZDcx
|
11
|
+
MWY3ZDY5Mzg2MWYxOTk5MzcxNzQwMWQzMmFhNmQxZjgwNzEzZTk=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YWU2YTdjOWZkMWFjODNiZmRlMjRlMWMxZWIwY2ExZGQ3MTQ1Y2RhYmZhNGNl
|
14
|
+
ODhmNDZjMDI4OWU2YTJlMTVjN2I1MWUzZmNlNzdhMGZmZWFmYjVlYzRiNjM3
|
15
|
+
ZWRiMDQyMWY3MGNlZDNiYzNkMzc0YjdlZDk3MGI2NGE4MTZmNDk=
|
data/lib/prose/prose.yaml
CHANGED
@@ -1,141 +1,147 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
FB00-FB4F: latin
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
1
|
+
# ranges:
|
2
|
+
0590-05FF: "hebrew-1"
|
3
|
+
FB00–FB4F: "hebrew-2"
|
4
|
+
00D00-0D7F: malayalam
|
5
|
+
0530-058F: armenian
|
6
|
+
2C80-2CFF: coptic
|
7
|
+
10800-1083F: cypriot
|
8
|
+
0400-04FF: "cyrillic-1"
|
9
|
+
0500-052F: "cyrillic-2"
|
10
|
+
2DE0-2DFF: "cyrillic-3"
|
11
|
+
A640-A69F: "cyrillic-4"
|
12
|
+
10A0-10FF: "georgian-1"
|
13
|
+
2D00-2D2F: "georgian-2"
|
14
|
+
2C00-2C5F: glagolithic
|
15
|
+
10330-1034F: gothic
|
16
|
+
0370-03FF: "greek-1"
|
17
|
+
1F00-1FFF: "greek-2"
|
18
|
+
0000-007F: "latin-1"
|
19
|
+
0080-00FF: "latin-2"
|
20
|
+
0100-017F: "latin-3"
|
21
|
+
0180-024F: "latin-4"
|
22
|
+
2C60-2C7F: "latin-5"
|
23
|
+
A720-A7FF: "latin-6"
|
24
|
+
1E00-1EFF: "latin-7"
|
25
|
+
FB00-FB4F: "latin-8"
|
26
|
+
FB00-FB4F: "latin-9"
|
27
|
+
FF00-FFEF: "latin-10"
|
28
|
+
1680-169F: ogham
|
29
|
+
10300-1032F: old_italics
|
30
|
+
101D0-101FF: phaistos
|
31
|
+
16A0-16FF: runic
|
32
|
+
10450-1047F: shavian
|
33
|
+
A6A0-A6FF: bamum
|
34
|
+
16800-16A3F: bamum
|
35
|
+
13000-1342F: egyptian_hieroglyphs
|
36
|
+
1200-137F: "ethiopic-1"
|
37
|
+
1380-139F: "ethiopic-2"
|
38
|
+
2D80-2DDF: "ethiopic-3"
|
39
|
+
AB00-AB2F: ethiopic
|
40
|
+
109A0-109FF: meroitic_cursive
|
41
|
+
10980-1099F: meroitic_hieroglyphs
|
42
|
+
07C0-07FF: nko
|
43
|
+
10480-104AF: osmanya
|
44
|
+
2D30-2D7F: tifinagh
|
45
|
+
A500-A63F: vai
|
46
|
+
0600-06FF: "arabic-1"
|
47
|
+
0750-077F: "arabic-2"
|
48
|
+
08A0-08FF: "arabic-3"
|
49
|
+
FB50-FDFF: "arabic-4"
|
50
|
+
FE70-FEFF: "arabic-5"
|
51
|
+
10840-1085F: aramic
|
52
|
+
10B00-10B3F: avestan
|
53
|
+
102A0-102DF: carian
|
54
|
+
12000-123FF: cuniform
|
55
|
+
12400-1247F: cuniform_numbers_punctuation
|
56
|
+
10280-1029F: lycian
|
57
|
+
1800-18AF: mongolian
|
58
|
+
0F00-0FFF: tibetan
|
59
|
+
0980-09FF: bengali_assamese
|
60
|
+
0A80-0AFF: gujarati
|
61
|
+
0C80-0CFF: kannada
|
62
|
+
0B00-0B7F: oriya
|
63
|
+
0B80-0BFF: tamil
|
64
|
+
0C00-0C7F: telugu
|
65
|
+
11000-1107F: brahmi
|
66
|
+
0900-097F: "devanagari-1"
|
67
|
+
A8E0-A8FF: "devanagari-2"
|
68
|
+
103A0-103DF: old_persian
|
69
|
+
10380-1039F: ugaritic
|
70
|
+
10920-1093F: lydian
|
71
|
+
0840-085F: mandaic
|
72
|
+
10A60-10A7F: old_south_arabian
|
73
|
+
10B60-10B7F: pahlavi
|
74
|
+
10B40-10B5F: parthian
|
75
|
+
10900-1091F: phoenician
|
76
|
+
0800-083F: samaritan
|
77
|
+
0700-074F: syriac
|
78
|
+
10C00-10C4F: old_turkic
|
79
|
+
A840-A87F: phags_pa
|
80
|
+
11100-1114F: chakma
|
81
|
+
0A00-0A7F: gurmukhi
|
82
|
+
11080-110CF: kaithi
|
83
|
+
10A00-10A5F: kharoshthi
|
84
|
+
1C00-1C4F: lepcha
|
85
|
+
1900-194F: limbu
|
86
|
+
ABC0-ABFF: "meetei_mayek-1"
|
87
|
+
AAE0-AAFF: "meetei_mayek-2"
|
88
|
+
1C50-1C7F: ol_chiki
|
89
|
+
A880-A8DF: saurashtra
|
90
|
+
11180-111DF: sharada
|
91
|
+
0D80-0DFF: sinhala
|
92
|
+
110D0-110FF: sora_sompeng
|
93
|
+
A800-A82F: syloti_nagri
|
94
|
+
11680-116CF: takri
|
95
|
+
0780-07BF: thaana
|
96
|
+
1CD0-1CFF: vedic
|
97
|
+
1B00-1B7F: balinese
|
98
|
+
1BC0-1BFF: batak
|
99
|
+
1A00-1A1F: buginese
|
100
|
+
AA00-AA5F: cham
|
101
|
+
A980-A9DF: javanese
|
102
|
+
A900-A92F: kayah_li
|
103
|
+
1780-17FF: "khmer-1"
|
104
|
+
19E0-19FF: "khmer-2"
|
105
|
+
0E80-0EFF: lao
|
106
|
+
1000-109F: "myanmar-1"
|
107
|
+
AA60-AA7F: "myanmar-2"
|
108
|
+
1980-19DF: new_tai_lue
|
109
|
+
A930-A95F: rejang
|
110
|
+
1B80-1BBF: "sudanese-1"
|
111
|
+
1CC0-1CCF: "sudanese-2"
|
112
|
+
1950-197F: tai_le
|
113
|
+
1A20-1AAF: tai_tham
|
114
|
+
AA80-AADF: tai_viet
|
115
|
+
0E00-0E7F: thai
|
116
|
+
1740-175F: buhid
|
117
|
+
1720-173F: hanunoo
|
118
|
+
1700-171F: tagalog
|
119
|
+
1760-177F: tagbanwa
|
120
|
+
3100-312F: "bopomofo-1"
|
121
|
+
31A0-31BF: "bopomofo-2"
|
122
|
+
1100-11FF: "hangul_jamo-1"
|
123
|
+
A960-A97F: "hangul_jamo-2"
|
124
|
+
D7B0-D7FF: "hangul_jamo-3"
|
125
|
+
3130-318F: "hangul_jamo-4"
|
126
|
+
FF00-FFEF: "hangul_jamo-5"
|
127
|
+
AC00-D7AF: hangul
|
128
|
+
3040-309F: hiragana
|
129
|
+
30A0-30FF: "katakana-1"
|
130
|
+
31F0-31FF: "katakana-2"
|
131
|
+
FF00-FFEF: "katakana-3"
|
132
|
+
1B000-1B0FF: kana
|
133
|
+
3190-319F: kanbun
|
134
|
+
A4D0-A4FF: lisu
|
135
|
+
16F00-16F9F: miao
|
136
|
+
A000-A48F: yi
|
137
|
+
A490-A4CF: yi
|
138
|
+
13A0-13FF: cherokee
|
139
|
+
10400-1044F: deseret
|
140
|
+
1400-167F: "united_canadian_aborginal-1"
|
141
|
+
18B0-18FF: "united_canadian_aborginal-2"
|
142
|
+
#0000-007F: ASCII
|
143
|
+
|
144
|
+
# languages:
|
145
|
+
# #Future. if there is any
|
146
|
+
# hebrew:
|
147
|
+
# - hebrew
|
data/lib/prose.rb
CHANGED
@@ -1,25 +1,43 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
require 'yaml'
|
3
|
+
require 'pry'
|
3
4
|
|
4
5
|
class String
|
5
6
|
|
6
|
-
|
7
|
-
|
8
|
-
end
|
7
|
+
RANGES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
|
8
|
+
LANGUAGES ||= RANGES.invert
|
9
9
|
|
10
|
-
|
10
|
+
def prose
|
11
|
+
find_languages_in(self) # rename find_origin_of
|
12
|
+
end
|
11
13
|
|
14
|
+
# define_method "#{language}?" do
|
15
|
+
# self.is_language?
|
16
|
+
# end
|
17
|
+
|
18
|
+
# __method__ cannot individually identify each method defined dynamically with define_method
|
19
|
+
# Since this clumsy fix
|
20
|
+
LANGUAGES.keys.each do |language|
|
21
|
+
eval <<-EOM
|
22
|
+
def #{language.split('-').first}?(pure = false)
|
23
|
+
language = __method__.to_s.gsub("?", "")
|
24
|
+
result = find_languages_in(self)
|
25
|
+
pure ? ((result - [language]).empty?) : (result.include? language)
|
26
|
+
end
|
27
|
+
EOM
|
28
|
+
end
|
12
29
|
|
13
|
-
def
|
14
|
-
|
30
|
+
def language_of ordinal, min_range, max_range
|
31
|
+
(min_range.to_i(16) < ordinal) and (max_range.to_i(16) > ordinal)
|
15
32
|
end
|
16
33
|
|
17
|
-
def
|
34
|
+
def languages_of letter
|
18
35
|
result = []
|
19
36
|
int_ordinal = letter.ord
|
20
|
-
|
37
|
+
RANGES.keys.each do |key|
|
21
38
|
min, max = key.split("-")
|
22
|
-
|
39
|
+
ordinal_in_range = language_of(int_ordinal, min, max)
|
40
|
+
result << RANGES[key].split("-").first if ordinal_in_range #language_of(int_ordinal, min, max) #(min.to_i(16) < int_ordinal) and (max.to_i(16) > int_ordinal)
|
23
41
|
end
|
24
42
|
return result
|
25
43
|
end
|
@@ -27,9 +45,9 @@ class String
|
|
27
45
|
def find_languages_in word
|
28
46
|
result = []
|
29
47
|
word.split('').each do |letter|
|
30
|
-
result +=
|
48
|
+
result += languages_of(letter) if (letter != " ")
|
31
49
|
end
|
32
50
|
return result.uniq
|
33
51
|
end
|
34
52
|
|
35
|
-
end
|
53
|
+
end
|
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prose
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Edwin Rozario
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description:
|
13
|
+
description: Language detector
|
14
14
|
email:
|
15
15
|
- rozarioed@gmail.com
|
16
16
|
executables: []
|
@@ -28,18 +28,18 @@ require_paths:
|
|
28
28
|
- lib
|
29
29
|
required_ruby_version: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - '>='
|
31
|
+
- - ! '>='
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
35
|
requirements:
|
36
|
-
- - '>='
|
36
|
+
- - ! '>='
|
37
37
|
- !ruby/object:Gem::Version
|
38
38
|
version: '0'
|
39
39
|
requirements: []
|
40
40
|
rubyforge_project:
|
41
|
-
rubygems_version: 2.
|
41
|
+
rubygems_version: 2.4.1
|
42
42
|
signing_key:
|
43
43
|
specification_version: 4
|
44
|
-
summary:
|
44
|
+
summary: Language detector
|
45
45
|
test_files: []
|