prose 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/lib/prose.rb +26 -35
- data/lib/prose/prose.yaml +52 -50
- metadata +4 -5
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
Y2E1OGYwNTlkYzFmMjhiMjFmYmJkMTYyOTM1MjcxZDY4N2YzNWViYQ==
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 01b32c2b8bb846c0777b88c0de14e269555a54bec00b2b8e4db9a0ffacf6a15d
|
4
|
+
data.tar.gz: 0513bd4bbfd8e31d9a2767aa4442003aba9cc58fa80938490ed7d203b96c3621
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
YmYzZGU2NDgwZjE5YWViYjQ0Yzc5OWEwZGJlNGIyOTFmMWRkZDM3ZTVhY2Q1
|
11
|
-
YjhlOTAyZjk2MGY5MGJmNmYwYzI5OTU1NWFhZTlhYzQxZDNiMTk=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
NWRjZDAxMWZmNjM0NGZiMTcxYmEyOWVmNTVmNTNmMWQwYzk4Yjk1OWMyMDE0
|
14
|
-
NTMyNTA4MmU1YjI1NGI0NmNmYTg4MTU3YWFlYTg0OWQ4MTJhZjRiYjYxMGZk
|
15
|
-
NGJmZWQzYTI4MzFkYjc1M2I0NmNmODkzYjczMzBmZDEwYjA3Nzk=
|
6
|
+
metadata.gz: 4b54b532728556f72e95f53a0106706f45b27eaa70c71b81a897fa6aea4a54b886909d812029a689a64789f3a9d6f532fc7e740d6e11ca9dbf398359e5c28f95
|
7
|
+
data.tar.gz: 694676c05117cbd73da1aa76eff537d654420eb87104208fa719d5da3fef0b6696c35b19eaedf778b77671c13725b8eff45b7e97473a176691eb8c357381c314
|
data/lib/prose.rb
CHANGED
@@ -1,52 +1,43 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
require 'yaml'
|
3
5
|
|
6
|
+
# Ruby string class
|
4
7
|
class String
|
5
|
-
|
6
|
-
RANGES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
|
7
|
-
LANGUAGES ||= RANGES.invert
|
8
|
+
LAN_RANGES ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
|
8
9
|
|
9
10
|
def prose
|
10
|
-
|
11
|
+
find_origins_in(self)
|
11
12
|
end
|
12
13
|
|
13
|
-
#
|
14
|
-
#
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
# Since this clumsy fix
|
19
|
-
LANGUAGES.keys.each do |language|
|
20
|
-
eval <<-EOM
|
21
|
-
def #{language.split('-').first}?(pure = false)
|
22
|
-
language = __method__.to_s.gsub("?", "")
|
23
|
-
result = find_languages_in(self)
|
24
|
-
pure ? ((result - [language]).empty?) : (result.include? language)
|
25
|
-
end
|
26
|
-
EOM
|
27
|
-
end
|
14
|
+
# Refactor this so that the dynamic methods no more use find_origins_in instead only check
|
15
|
+
# the ranges for the specific language only
|
16
|
+
LAN_RANGES.invert.keys.each do |language|
|
17
|
+
language_name = language.split('-').first
|
18
|
+
method_name = "#{language_name}?"
|
28
19
|
|
29
|
-
|
30
|
-
|
31
|
-
|
20
|
+
define_method(method_name) do
|
21
|
+
find_origins_in(self).include? language_name
|
22
|
+
end
|
32
23
|
|
33
|
-
|
34
|
-
|
35
|
-
int_ordinal = letter.ord
|
36
|
-
RANGES.keys.each do |key|
|
37
|
-
min, max = key.split("-")
|
38
|
-
ordinal_in_range = language_of(int_ordinal, min, max)
|
39
|
-
result << RANGES[key].split("-").first if ordinal_in_range #language_of(int_ordinal, min, max) #(min.to_i(16) < int_ordinal) and (max.to_i(16) > int_ordinal)
|
24
|
+
define_method("pure_#{method_name}") do
|
25
|
+
(find_origins_in(self) - [language_name]).empty?
|
40
26
|
end
|
41
|
-
return result
|
42
27
|
end
|
43
28
|
|
44
|
-
def
|
45
|
-
|
46
|
-
|
47
|
-
|
29
|
+
def language_of(ordinal, min_range, max_range)
|
30
|
+
(min_range.to_i(16) < ordinal) && (max_range.to_i(16) > ordinal)
|
31
|
+
end
|
32
|
+
|
33
|
+
def languages_of(letter)
|
34
|
+
LAN_RANGES.keys.map do |key|
|
35
|
+
min, max = key.split('-')
|
36
|
+
LAN_RANGES[key].split('-').first if language_of(letter.ord, min, max)
|
48
37
|
end
|
49
|
-
return result.uniq
|
50
38
|
end
|
51
39
|
|
40
|
+
def find_origins_in(word)
|
41
|
+
word.split('').map { |letter| languages_of(letter) unless letter.empty? }.flatten.compact.uniq
|
42
|
+
end
|
52
43
|
end
|
data/lib/prose/prose.yaml
CHANGED
@@ -1,30 +1,31 @@
|
|
1
1
|
# ranges:
|
2
|
-
|
3
|
-
|
2
|
+
4E00–9FD5: CJK
|
3
|
+
0590-05FF: hebrew-1
|
4
|
+
FB00–FB4F: hebrew-2
|
4
5
|
00D00-0D7F: malayalam
|
5
6
|
0530-058F: armenian
|
6
7
|
2C80-2CFF: coptic
|
7
8
|
10800-1083F: cypriot
|
8
|
-
0400-04FF:
|
9
|
-
0500-052F:
|
10
|
-
2DE0-2DFF:
|
11
|
-
A640-A69F:
|
12
|
-
10A0-10FF:
|
13
|
-
2D00-2D2F:
|
9
|
+
0400-04FF: cyrillic-1
|
10
|
+
0500-052F: cyrillic-2
|
11
|
+
2DE0-2DFF: cyrillic-3
|
12
|
+
A640-A69F: cyrillic-4
|
13
|
+
10A0-10FF: georgian-1
|
14
|
+
2D00-2D2F: georgian-2
|
14
15
|
2C00-2C5F: glagolithic
|
15
16
|
10330-1034F: gothic
|
16
|
-
0370-03FF:
|
17
|
-
1F00-1FFF:
|
18
|
-
0000-007F:
|
19
|
-
0080-00FF:
|
20
|
-
0100-017F:
|
21
|
-
0180-024F:
|
22
|
-
2C60-2C7F:
|
23
|
-
A720-A7FF:
|
24
|
-
1E00-1EFF:
|
25
|
-
FB00-FB4F:
|
26
|
-
FB00-FB4F:
|
27
|
-
FF00-FFEF:
|
17
|
+
0370-03FF: greek-1
|
18
|
+
1F00-1FFF: greek-2
|
19
|
+
0000-007F: latin-1
|
20
|
+
0080-00FF: latin-2
|
21
|
+
0100-017F: latin-3
|
22
|
+
0180-024F: latin-4
|
23
|
+
2C60-2C7F: latin-5
|
24
|
+
A720-A7FF: latin-6
|
25
|
+
1E00-1EFF: latin-7
|
26
|
+
FB00-FB4F: latin-8
|
27
|
+
FB00-FB4F: latin-9
|
28
|
+
FF00-FFEF: latin-10
|
28
29
|
1680-169F: ogham
|
29
30
|
10300-1032F: old_italics
|
30
31
|
101D0-101FF: phaistos
|
@@ -33,9 +34,9 @@
|
|
33
34
|
A6A0-A6FF: bamum
|
34
35
|
16800-16A3F: bamum
|
35
36
|
13000-1342F: egyptian_hieroglyphs
|
36
|
-
1200-137F:
|
37
|
-
1380-139F:
|
38
|
-
2D80-2DDF:
|
37
|
+
1200-137F: ethiopic-1
|
38
|
+
1380-139F: ethiopic-2
|
39
|
+
2D80-2DDF: ethiopic-3
|
39
40
|
AB00-AB2F: ethiopic
|
40
41
|
109A0-109FF: meroitic_cursive
|
41
42
|
10980-1099F: meroitic_hieroglyphs
|
@@ -43,11 +44,11 @@
|
|
43
44
|
10480-104AF: osmanya
|
44
45
|
2D30-2D7F: tifinagh
|
45
46
|
A500-A63F: vai
|
46
|
-
0600-06FF:
|
47
|
-
0750-077F:
|
48
|
-
08A0-08FF:
|
49
|
-
FB50-FDFF:
|
50
|
-
FE70-FEFF:
|
47
|
+
0600-06FF: arabic-1
|
48
|
+
0750-077F: arabic-2
|
49
|
+
08A0-08FF: arabic-3
|
50
|
+
FB50-FDFF: arabic-4
|
51
|
+
FE70-FEFF: arabic-5
|
51
52
|
10840-1085F: aramic
|
52
53
|
10B00-10B3F: avestan
|
53
54
|
102A0-102DF: carian
|
@@ -63,8 +64,8 @@
|
|
63
64
|
0B80-0BFF: tamil
|
64
65
|
0C00-0C7F: telugu
|
65
66
|
11000-1107F: brahmi
|
66
|
-
0900-097F:
|
67
|
-
A8E0-A8FF:
|
67
|
+
0900-097F: devanagari-1
|
68
|
+
A8E0-A8FF: devanagari-2
|
68
69
|
103A0-103DF: old_persian
|
69
70
|
10380-1039F: ugaritic
|
70
71
|
10920-1093F: lydian
|
@@ -83,8 +84,8 @@
|
|
83
84
|
10A00-10A5F: kharoshthi
|
84
85
|
1C00-1C4F: lepcha
|
85
86
|
1900-194F: limbu
|
86
|
-
ABC0-ABFF:
|
87
|
-
AAE0-AAFF:
|
87
|
+
ABC0-ABFF: meetei_mayek-1
|
88
|
+
AAE0-AAFF: meetei_mayek-2
|
88
89
|
1C50-1C7F: ol_chiki
|
89
90
|
A880-A8DF: saurashtra
|
90
91
|
11180-111DF: sharada
|
@@ -100,15 +101,15 @@
|
|
100
101
|
AA00-AA5F: cham
|
101
102
|
A980-A9DF: javanese
|
102
103
|
A900-A92F: kayah_li
|
103
|
-
1780-17FF:
|
104
|
-
19E0-19FF:
|
104
|
+
1780-17FF: khmer-1
|
105
|
+
19E0-19FF: khmer-2
|
105
106
|
0E80-0EFF: lao
|
106
|
-
1000-109F:
|
107
|
-
AA60-AA7F:
|
107
|
+
1000-109F: myanmar-1
|
108
|
+
AA60-AA7F: myanmar-2
|
108
109
|
1980-19DF: new_tai_lue
|
109
110
|
A930-A95F: rejang
|
110
|
-
1B80-1BBF:
|
111
|
-
1CC0-1CCF:
|
111
|
+
1B80-1BBF: sudanese-1
|
112
|
+
1CC0-1CCF: sudanese-2
|
112
113
|
1950-197F: tai_le
|
113
114
|
1A20-1AAF: tai_tham
|
114
115
|
AA80-AADF: tai_viet
|
@@ -117,18 +118,18 @@
|
|
117
118
|
1720-173F: hanunoo
|
118
119
|
1700-171F: tagalog
|
119
120
|
1760-177F: tagbanwa
|
120
|
-
3100-312F:
|
121
|
-
31A0-31BF:
|
122
|
-
1100-11FF:
|
123
|
-
A960-A97F:
|
124
|
-
D7B0-D7FF:
|
125
|
-
3130-318F:
|
126
|
-
FF00-FFEF:
|
121
|
+
3100-312F: bopomofo-1
|
122
|
+
31A0-31BF: bopomofo-2
|
123
|
+
1100-11FF: hangul_jamo-1
|
124
|
+
A960-A97F: hangul_jamo-2
|
125
|
+
D7B0-D7FF: hangul_jamo-3
|
126
|
+
3130-318F: hangul_jamo-4
|
127
|
+
FF00-FFEF: hangul_jamo-5
|
127
128
|
AC00-D7AF: hangul
|
128
129
|
3040-309F: hiragana
|
129
|
-
30A0-30FF:
|
130
|
-
31F0-31FF:
|
131
|
-
FF00-FFEF:
|
130
|
+
30A0-30FF: katakana-1
|
131
|
+
31F0-31FF: katakana-2
|
132
|
+
FF00-FFEF: katakana-3
|
132
133
|
1B000-1B0FF: kana
|
133
134
|
3190-319F: kanbun
|
134
135
|
A4D0-A4FF: lisu
|
@@ -137,8 +138,9 @@
|
|
137
138
|
A490-A4CF: yi
|
138
139
|
13A0-13FF: cherokee
|
139
140
|
10400-1044F: deseret
|
140
|
-
1400-167F:
|
141
|
-
18B0-18FF:
|
141
|
+
1400-167F: united_canadian_aborginal-1
|
142
|
+
18B0-18FF: united_canadian_aborginal-2
|
143
|
+
|
142
144
|
#0000-007F: ASCII
|
143
145
|
|
144
146
|
# languages:
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: prose
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Edwin Rozario
|
@@ -28,17 +28,16 @@ require_paths:
|
|
28
28
|
- lib
|
29
29
|
required_ruby_version: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
35
|
requirements:
|
36
|
-
- -
|
36
|
+
- - ">="
|
37
37
|
- !ruby/object:Gem::Version
|
38
38
|
version: '0'
|
39
39
|
requirements: []
|
40
|
-
|
41
|
-
rubygems_version: 2.4.1
|
40
|
+
rubygems_version: 3.0.3
|
42
41
|
signing_key:
|
43
42
|
specification_version: 4
|
44
43
|
summary: Language detector
|