prose 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/prose/prose.yaml +141 -0
- data/lib/prose.rb +35 -0
- metadata +45 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 09b137dde0e95e9308ecdc067bf4fd562a9b1526
|
4
|
+
data.tar.gz: 8cf9634b26218339d84eb677dfaea9432fdd9c36
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: be87ec95d9bc12c36caa5d69d2192f88b9f02ed041f97fb186142c71e5a9d1f371d9ec60980ee520f8e928339182706e467aafbdbec6112c2cce433e20f74c03
|
7
|
+
data.tar.gz: 220d5d89073e35e652d91e89d6a71845166d5679c5ac35777af0f80713c56d727be5a6516eb33c89515a2c095142c0945f22282b61b1fb04cb20806a4af15197
|
@@ -0,0 +1,141 @@
|
|
1
|
+
0590-05FF: hebrew
|
2
|
+
FB00–FB4F: hebrew
|
3
|
+
00D00-0D7F: malayalam
|
4
|
+
0530-058F: armenian
|
5
|
+
2C80-2CFF: coptic
|
6
|
+
10800-1083F: cypriot
|
7
|
+
0400-04FF: cyrillic
|
8
|
+
0500-052F: cyrillic
|
9
|
+
2DE0-2DFF: cyrillic
|
10
|
+
A640-A69F: cyrillic
|
11
|
+
10A0-10FF: georgian
|
12
|
+
2D00-2D2F: georgian
|
13
|
+
2C00-2C5F: glagolithic
|
14
|
+
10330-1034F: gothic
|
15
|
+
0370-03FF: greek
|
16
|
+
1F00-1FFF: greek
|
17
|
+
0000-007F: latin
|
18
|
+
0080-00FF: latin
|
19
|
+
0100-017F: latin
|
20
|
+
0180-024F: latin
|
21
|
+
2C60-2C7F: latin
|
22
|
+
A720-A7FF: latin
|
23
|
+
1E00-1EFF: latin
|
24
|
+
FB00-FB4F: latin
|
25
|
+
FB00-FB4F: latin
|
26
|
+
FF00-FFEF: latin
|
27
|
+
1680-169F: ogham
|
28
|
+
10300-1032F: old_italics
|
29
|
+
101D0-101FF: phaistos
|
30
|
+
16A0-16FF: runic
|
31
|
+
10450-1047F: shavian
|
32
|
+
A6A0-A6FF: bamum
|
33
|
+
16800-16A3F: bamum
|
34
|
+
13000-1342F: egyptian_hieroglyphs
|
35
|
+
1200-137F: ethiopic
|
36
|
+
1380-139F: ethiopic
|
37
|
+
2D80-2DDF: ethiopic
|
38
|
+
AB00-AB2F: ethiopic
|
39
|
+
109A0-109FF: meroitic_cursive
|
40
|
+
10980-1099F: meroitic_hieroglyphs
|
41
|
+
07C0-07FF: nko
|
42
|
+
10480-104AF: osmanya
|
43
|
+
2D30-2D7F: tifinagh
|
44
|
+
A500-A63F: vai
|
45
|
+
0600-06FF: arabic
|
46
|
+
0750-077F: arabic
|
47
|
+
08A0-08FF: arabic
|
48
|
+
FB50-FDFF: arabic
|
49
|
+
FE70-FEFF: arabic
|
50
|
+
10840-1085F: aramic
|
51
|
+
10B00-10B3F: avestan
|
52
|
+
102A0-102DF: carian
|
53
|
+
12000-123FF: cuniform
|
54
|
+
12400-1247F: cuniform_numbers_punctuation
|
55
|
+
10280-1029F: lycian
|
56
|
+
1800-18AF: mongolian
|
57
|
+
0F00-0FFF: tibetan
|
58
|
+
0980-09FF: bengali_assamese
|
59
|
+
0A80-0AFF: gujarati
|
60
|
+
0C80-0CFF: kannada
|
61
|
+
0B00-0B7F: oriya
|
62
|
+
0B80-0BFF: tamil
|
63
|
+
0C00-0C7F: telugu
|
64
|
+
11000-1107F: brahmi
|
65
|
+
0900-097F: devanagari
|
66
|
+
A8E0-A8FF: devanagari
|
67
|
+
103A0-103DF: old_persian
|
68
|
+
10380-1039F: ugaritic
|
69
|
+
10920-1093F: lydian
|
70
|
+
0840-085F: mandaic
|
71
|
+
10A60-10A7F: old_south_arabian
|
72
|
+
10B60-10B7F: pahlavi
|
73
|
+
10B40-10B5F: parthian
|
74
|
+
10900-1091F: phoenician
|
75
|
+
0800-083F: samaritan
|
76
|
+
0700-074F: syriac
|
77
|
+
10C00-10C4F: old_turkic
|
78
|
+
A840-A87F: phags_pa
|
79
|
+
11100-1114F: chakma
|
80
|
+
0A00-0A7F: gurmukhi
|
81
|
+
11080-110CF: kaithi
|
82
|
+
10A00-10A5F: kharoshthi
|
83
|
+
1C00-1C4F: lepcha
|
84
|
+
1900-194F: limbu
|
85
|
+
ABC0-ABFF: meetei_mayek
|
86
|
+
AAE0-AAFF: meetei_mayek
|
87
|
+
1C50-1C7F: ol_chiki
|
88
|
+
A880-A8DF: saurashtra
|
89
|
+
11180-111DF: sharada
|
90
|
+
0D80-0DFF: sinhala
|
91
|
+
110D0-110FF: sora_sompeng
|
92
|
+
A800-A82F: syloti_nagri
|
93
|
+
11680-116CF: takri
|
94
|
+
0780-07BF: thaana
|
95
|
+
1CD0-1CFF: vedic
|
96
|
+
1B00-1B7F: balinese
|
97
|
+
1BC0-1BFF: batak
|
98
|
+
1A00-1A1F: buginese
|
99
|
+
AA00-AA5F: cham
|
100
|
+
A980-A9DF: javanese
|
101
|
+
A900-A92F: kayah_li
|
102
|
+
1780-17FF: khmer
|
103
|
+
19E0-19FF: khmer
|
104
|
+
0E80-0EFF: lao
|
105
|
+
1000-109F: myanmar
|
106
|
+
AA60-AA7F: myanmar
|
107
|
+
1980-19DF: new_tai_lue
|
108
|
+
A930-A95F: rejang
|
109
|
+
1B80-1BBF: sudanese
|
110
|
+
1CC0-1CCF: sudanese
|
111
|
+
1950-197F: tai_le
|
112
|
+
1A20-1AAF: tai_tham
|
113
|
+
AA80-AADF: tai_viet
|
114
|
+
0E00-0E7F: thai
|
115
|
+
1740-175F: buhid
|
116
|
+
1720-173F: hanunoo
|
117
|
+
1700-171F: tagalog
|
118
|
+
1760-177F: tagbanwa
|
119
|
+
3100-312F: bopomofo
|
120
|
+
31A0-31BF: bopomofo
|
121
|
+
1100-11FF: hangul_jamo
|
122
|
+
A960-A97F: hangul_jamo
|
123
|
+
D7B0-D7FF: hangul_jamo
|
124
|
+
3130-318F: hangul_jamo
|
125
|
+
FF00-FFEF: hangul_jamo
|
126
|
+
AC00-D7AF: hangul
|
127
|
+
3040-309F: hiragana
|
128
|
+
30A0-30FF: katakana
|
129
|
+
31F0-31FF: katakana
|
130
|
+
FF00-FFEF: katakana
|
131
|
+
1B000-1B0FF: kana
|
132
|
+
3190-319F: kanbun
|
133
|
+
A4D0-A4FF: lisu
|
134
|
+
16F00-16F9F: miao
|
135
|
+
A000-A48F: yi
|
136
|
+
A490-A4CF: yi
|
137
|
+
13A0-13FF: cherokee
|
138
|
+
10400-1044F: deseret
|
139
|
+
1400-167F: united_canadian_aborginal
|
140
|
+
18B0-18FF: united_canadian_aborginal
|
141
|
+
#0000-007F: ASCII
|
data/lib/prose.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class String
|
5
|
+
|
6
|
+
def prose?
|
7
|
+
find_languages_in(self)
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
|
13
|
+
def unicode_ranges
|
14
|
+
@ranges ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
|
15
|
+
end
|
16
|
+
|
17
|
+
def language_of letter
|
18
|
+
result = []
|
19
|
+
int_ordinal = letter.ord
|
20
|
+
unicode_ranges.keys.each do |key|
|
21
|
+
min, max = key.split("-")
|
22
|
+
result << unicode_ranges[key] if (min.to_i(16) < int_ordinal) and (max.to_i(16) > int_ordinal)
|
23
|
+
end
|
24
|
+
return result
|
25
|
+
end
|
26
|
+
|
27
|
+
def find_languages_in word
|
28
|
+
result = []
|
29
|
+
word.split('').each do |letter|
|
30
|
+
result += language_of(letter) if not letter == " "
|
31
|
+
end
|
32
|
+
return result.uniq
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: prose
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Edwin Rozario
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-02-01 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Identifies language of alphabets in a string
|
14
|
+
email:
|
15
|
+
- rozarioed@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/prose.rb
|
21
|
+
- lib/prose/prose.yaml
|
22
|
+
homepage: https://github.com/EdwinRozario/Prose
|
23
|
+
licenses: []
|
24
|
+
metadata: {}
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 2.2.0
|
42
|
+
signing_key:
|
43
|
+
specification_version: 4
|
44
|
+
summary: Identify language string
|
45
|
+
test_files: []
|