prose 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -13
- data/lib/prose.rb +26 -35
- data/lib/prose/prose.yaml +52 -50
- metadata +4 -5
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
Y2E1OGYwNTlkYzFmMjhiMjFmYmJkMTYyOTM1MjcxZDY4N2YzNWViYQ==
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 01b32c2b8bb846c0777b88c0de14e269555a54bec00b2b8e4db9a0ffacf6a15d
|
4
|
+
data.tar.gz: 0513bd4bbfd8e31d9a2767aa4442003aba9cc58fa80938490ed7d203b96c3621
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
YmYzZGU2NDgwZjE5YWViYjQ0Yzc5OWEwZGJlNGIyOTFmMWRkZDM3ZTVhY2Q1
|
11
|
-
YjhlOTAyZjk2MGY5MGJmNmYwYzI5OTU1NWFhZTlhYzQxZDNiMTk=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
NWRjZDAxMWZmNjM0NGZiMTcxYmEyOWVmNTVmNTNmMWQwYzk4Yjk1OWMyMDE0
|
14
|
-
NTMyNTA4MmU1YjI1NGI0NmNmYTg4MTU3YWFlYTg0OWQ4MTJhZjRiYjYxMGZk
|
15
|
-
NGJmZWQzYTI4MzFkYjc1M2I0NmNmODkzYjczMzBmZDEwYjA3Nzk=
|
6
|
+
metadata.gz: 4b54b532728556f72e95f53a0106706f45b27eaa70c71b81a897fa6aea4a54b886909d812029a689a64789f3a9d6f532fc7e740d6e11ca9dbf398359e5c28f95
|
7
|
+
data.tar.gz: 694676c05117cbd73da1aa76eff537d654420eb87104208fa719d5da3fef0b6696c35b19eaedf778b77671c13725b8eff45b7e97473a176691eb8c357381c314
|
data/lib/prose.rb
CHANGED
@@ -1,52 +1,43 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
require 'yaml'
|
3
5
|
|
6
|
+
# Ruby string class
|
4
7
|
class String
|
5
|
-
|
6
|
-
RANGES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
|
7
|
-
LANGUAGES ||= RANGES.invert
|
8
|
+
LAN_RANGES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
|
8
9
|
|
9
10
|
def prose
|
10
|
-
|
11
|
+
find_origins_in(self)
|
11
12
|
end
|
12
13
|
|
13
|
-
#
|
14
|
-
#
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
# Since this clumsy fix
|
19
|
-
LANGUAGES.keys.each do |language|
|
20
|
-
eval <<-EOM
|
21
|
-
def #{language.split('-').first}?(pure = false)
|
22
|
-
language = __method__.to_s.gsub("?", "")
|
23
|
-
result = find_languages_in(self)
|
24
|
-
pure ? ((result - [language]).empty?) : (result.include? language)
|
25
|
-
end
|
26
|
-
EOM
|
27
|
-
end
|
14
|
+
# Refactor this so that the dynamic methods no more use find_origins_in instead only check
|
15
|
+
# the ranges for the specific language only
|
16
|
+
LAN_RANGES.invert.keys.each do |language|
|
17
|
+
language_name = language.split('-').first
|
18
|
+
method_name = "#{language_name}?"
|
28
19
|
|
29
|
-
|
30
|
-
|
31
|
-
|
20
|
+
define_method(method_name) do
|
21
|
+
find_origins_in(self).include? language_name
|
22
|
+
end
|
32
23
|
|
33
|
-
|
34
|
-
|
35
|
-
int_ordinal = letter.ord
|
36
|
-
RANGES.keys.each do |key|
|
37
|
-
min, max = key.split("-")
|
38
|
-
ordinal_in_range = language_of(int_ordinal, min, max)
|
39
|
-
result << RANGES[key].split("-").first if ordinal_in_range #language_of(int_ordinal, min, max) #(min.to_i(16) < int_ordinal) and (max.to_i(16) > int_ordinal)
|
24
|
+
define_method("pure_#{method_name}") do
|
25
|
+
(find_origins_in(self) - [language_name]).empty?
|
40
26
|
end
|
41
|
-
return result
|
42
27
|
end
|
43
28
|
|
44
|
-
def
|
45
|
-
|
46
|
-
|
47
|
-
|
29
|
+
def language_of(ordinal, min_range, max_range)
|
30
|
+
(min_range.to_i(16) < ordinal) && (max_range.to_i(16) > ordinal)
|
31
|
+
end
|
32
|
+
|
33
|
+
def languages_of(letter)
|
34
|
+
LAN_RANGES.keys.map do |key|
|
35
|
+
min, max = key.split('-')
|
36
|
+
LAN_RANGES[key].split('-').first if language_of(letter.ord, min, max)
|
48
37
|
end
|
49
|
-
return result.uniq
|
50
38
|
end
|
51
39
|
|
40
|
+
def find_origins_in(word)
|
41
|
+
word.split('').map { |letter| languages_of(letter) unless letter.empty? }.flatten.compact.uniq
|
42
|
+
end
|
52
43
|
end
|
data/lib/prose/prose.yaml
CHANGED
@@ -1,30 +1,31 @@
|
|
1
1
|
# ranges:
|
2
|
-
|
3
|
-
|
2
|
+
4E00–9FD5: CJK
|
3
|
+
0590-05FF: hebrew-1
|
4
|
+
FB00–FB4F: hebrew-2
|
4
5
|
00D00-0D7F: malayalam
|
5
6
|
0530-058F: armenian
|
6
7
|
2C80-2CFF: coptic
|
7
8
|
10800-1083F: cypriot
|
8
|
-
0400-04FF:
|
9
|
-
0500-052F:
|
10
|
-
2DE0-2DFF:
|
11
|
-
A640-A69F:
|
12
|
-
10A0-10FF:
|
13
|
-
2D00-2D2F:
|
9
|
+
0400-04FF: cyrillic-1
|
10
|
+
0500-052F: cyrillic-2
|
11
|
+
2DE0-2DFF: cyrillic-3
|
12
|
+
A640-A69F: cyrillic-4
|
13
|
+
10A0-10FF: georgian-1
|
14
|
+
2D00-2D2F: georgian-2
|
14
15
|
2C00-2C5F: glagolithic
|
15
16
|
10330-1034F: gothic
|
16
|
-
0370-03FF:
|
17
|
-
1F00-1FFF:
|
18
|
-
0000-007F:
|
19
|
-
0080-00FF:
|
20
|
-
0100-017F:
|
21
|
-
0180-024F:
|
22
|
-
2C60-2C7F:
|
23
|
-
A720-A7FF:
|
24
|
-
1E00-1EFF:
|
25
|
-
FB00-FB4F:
|
26
|
-
FB00-FB4F:
|
27
|
-
FF00-FFEF:
|
17
|
+
0370-03FF: greek-1
|
18
|
+
1F00-1FFF: greek-2
|
19
|
+
0000-007F: latin-1
|
20
|
+
0080-00FF: latin-2
|
21
|
+
0100-017F: latin-3
|
22
|
+
0180-024F: latin-4
|
23
|
+
2C60-2C7F: latin-5
|
24
|
+
A720-A7FF: latin-6
|
25
|
+
1E00-1EFF: latin-7
|
26
|
+
FB00-FB4F: latin-8
|
27
|
+
FB00-FB4F: latin-9
|
28
|
+
FF00-FFEF: latin-10
|
28
29
|
1680-169F: ogham
|
29
30
|
10300-1032F: old_italics
|
30
31
|
101D0-101FF: phaistos
|
@@ -33,9 +34,9 @@
|
|
33
34
|
A6A0-A6FF: bamum
|
34
35
|
16800-16A3F: bamum
|
35
36
|
13000-1342F: egyptian_hieroglyphs
|
36
|
-
1200-137F:
|
37
|
-
1380-139F:
|
38
|
-
2D80-2DDF:
|
37
|
+
1200-137F: ethiopic-1
|
38
|
+
1380-139F: ethiopic-2
|
39
|
+
2D80-2DDF: ethiopic-3
|
39
40
|
AB00-AB2F: ethiopic
|
40
41
|
109A0-109FF: meroitic_cursive
|
41
42
|
10980-1099F: meroitic_hieroglyphs
|
@@ -43,11 +44,11 @@
|
|
43
44
|
10480-104AF: osmanya
|
44
45
|
2D30-2D7F: tifinagh
|
45
46
|
A500-A63F: vai
|
46
|
-
0600-06FF:
|
47
|
-
0750-077F:
|
48
|
-
08A0-08FF:
|
49
|
-
FB50-FDFF:
|
50
|
-
FE70-FEFF:
|
47
|
+
0600-06FF: arabic-1
|
48
|
+
0750-077F: arabic-2
|
49
|
+
08A0-08FF: arabic-3
|
50
|
+
FB50-FDFF: arabic-4
|
51
|
+
FE70-FEFF: arabic-5
|
51
52
|
10840-1085F: aramic
|
52
53
|
10B00-10B3F: avestan
|
53
54
|
102A0-102DF: carian
|
@@ -63,8 +64,8 @@
|
|
63
64
|
0B80-0BFF: tamil
|
64
65
|
0C00-0C7F: telugu
|
65
66
|
11000-1107F: brahmi
|
66
|
-
0900-097F:
|
67
|
-
A8E0-A8FF:
|
67
|
+
0900-097F: devanagari-1
|
68
|
+
A8E0-A8FF: devanagari-2
|
68
69
|
103A0-103DF: old_persian
|
69
70
|
10380-1039F: ugaritic
|
70
71
|
10920-1093F: lydian
|
@@ -83,8 +84,8 @@
|
|
83
84
|
10A00-10A5F: kharoshthi
|
84
85
|
1C00-1C4F: lepcha
|
85
86
|
1900-194F: limbu
|
86
|
-
ABC0-ABFF:
|
87
|
-
AAE0-AAFF:
|
87
|
+
ABC0-ABFF: meetei_mayek-1
|
88
|
+
AAE0-AAFF: meetei_mayek-2
|
88
89
|
1C50-1C7F: ol_chiki
|
89
90
|
A880-A8DF: saurashtra
|
90
91
|
11180-111DF: sharada
|
@@ -100,15 +101,15 @@
|
|
100
101
|
AA00-AA5F: cham
|
101
102
|
A980-A9DF: javanese
|
102
103
|
A900-A92F: kayah_li
|
103
|
-
1780-17FF:
|
104
|
-
19E0-19FF:
|
104
|
+
1780-17FF: khmer-1
|
105
|
+
19E0-19FF: khmer-2
|
105
106
|
0E80-0EFF: lao
|
106
|
-
1000-109F:
|
107
|
-
AA60-AA7F:
|
107
|
+
1000-109F: myanmar-1
|
108
|
+
AA60-AA7F: myanmar-2
|
108
109
|
1980-19DF: new_tai_lue
|
109
110
|
A930-A95F: rejang
|
110
|
-
1B80-1BBF:
|
111
|
-
1CC0-1CCF:
|
111
|
+
1B80-1BBF: sudanese-1
|
112
|
+
1CC0-1CCF: sudanese-2
|
112
113
|
1950-197F: tai_le
|
113
114
|
1A20-1AAF: tai_tham
|
114
115
|
AA80-AADF: tai_viet
|
@@ -117,18 +118,18 @@
|
|
117
118
|
1720-173F: hanunoo
|
118
119
|
1700-171F: tagalog
|
119
120
|
1760-177F: tagbanwa
|
120
|
-
3100-312F:
|
121
|
-
31A0-31BF:
|
122
|
-
1100-11FF:
|
123
|
-
A960-A97F:
|
124
|
-
D7B0-D7FF:
|
125
|
-
3130-318F:
|
126
|
-
FF00-FFEF:
|
121
|
+
3100-312F: bopomofo-1
|
122
|
+
31A0-31BF: bopomofo-2
|
123
|
+
1100-11FF: hangul_jamo-1
|
124
|
+
A960-A97F: hangul_jamo-2
|
125
|
+
D7B0-D7FF: hangul_jamo-3
|
126
|
+
3130-318F: hangul_jamo-4
|
127
|
+
FF00-FFEF: hangul_jamo-5
|
127
128
|
AC00-D7AF: hangul
|
128
129
|
3040-309F: hiragana
|
129
|
-
30A0-30FF:
|
130
|
-
31F0-31FF:
|
131
|
-
FF00-FFEF:
|
130
|
+
30A0-30FF: katakana-1
|
131
|
+
31F0-31FF: katakana-2
|
132
|
+
FF00-FFEF: katakana-3
|
132
133
|
1B000-1B0FF: kana
|
133
134
|
3190-319F: kanbun
|
134
135
|
A4D0-A4FF: lisu
|
@@ -137,8 +138,9 @@
|
|
137
138
|
A490-A4CF: yi
|
138
139
|
13A0-13FF: cherokee
|
139
140
|
10400-1044F: deseret
|
140
|
-
1400-167F:
|
141
|
-
18B0-18FF:
|
141
|
+
1400-167F: united_canadian_aborginal-1
|
142
|
+
18B0-18FF: united_canadian_aborginal-2
|
143
|
+
|
142
144
|
#0000-007F: ASCII
|
143
145
|
|
144
146
|
# languages:
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prose
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Edwin Rozario
|
@@ -28,17 +28,16 @@ require_paths:
|
|
28
28
|
- lib
|
29
29
|
required_ruby_version: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
35
|
requirements:
|
36
|
-
- -
|
36
|
+
- - ">="
|
37
37
|
- !ruby/object:Gem::Version
|
38
38
|
version: '0'
|
39
39
|
requirements: []
|
40
|
-
|
41
|
-
rubygems_version: 2.4.1
|
40
|
+
rubygems_version: 3.0.3
|
42
41
|
signing_key:
|
43
42
|
specification_version: 4
|
44
43
|
summary: Language detector
|