prose 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/prose/prose.yaml +141 -0
  3. data/lib/prose.rb +35 -0
  4. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 09b137dde0e95e9308ecdc067bf4fd562a9b1526
4
+ data.tar.gz: 8cf9634b26218339d84eb677dfaea9432fdd9c36
5
+ SHA512:
6
+ metadata.gz: be87ec95d9bc12c36caa5d69d2192f88b9f02ed041f97fb186142c71e5a9d1f371d9ec60980ee520f8e928339182706e467aafbdbec6112c2cce433e20f74c03
7
+ data.tar.gz: 220d5d89073e35e652d91e89d6a71845166d5679c5ac35777af0f80713c56d727be5a6516eb33c89515a2c095142c0945f22282b61b1fb04cb20806a4af15197
@@ -0,0 +1,141 @@
1
+ 0590-05FF: hebrew
2
+ FB00–FB4F: hebrew
3
+ 00D00-0D7F: malayalam
4
+ 0530-058F: armenian
5
+ 2C80-2CFF: coptic
6
+ 10800-1083F: cypriot
7
+ 0400-04FF: cyrillic
8
+ 0500-052F: cyrillic
9
+ 2DE0-2DFF: cyrillic
10
+ A640-A69F: cyrillic
11
+ 10A0-10FF: georgian
12
+ 2D00-2D2F: georgian
13
+ 2C00-2C5F: glagolithic
14
+ 10330-1034F: gothic
15
+ 0370-03FF: greek
16
+ 1F00-1FFF: greek
17
+ 0000-007F: latin
18
+ 0080-00FF: latin
19
+ 0100-017F: latin
20
+ 0180-024F: latin
21
+ 2C60-2C7F: latin
22
+ A720-A7FF: latin
23
+ 1E00-1EFF: latin
24
+ FB00-FB4F: latin
25
+ FB00-FB4F: latin
26
+ FF00-FFEF: latin
27
+ 1680-169F: ogham
28
+ 10300-1032F: old_italics
29
+ 101D0-101FF: phaistos
30
+ 16A0-16FF: runic
31
+ 10450-1047F: shavian
32
+ A6A0-A6FF: bamum
33
+ 16800-16A3F: bamum
34
+ 13000-1342F: egyptian_hieroglyphs
35
+ 1200-137F: ethiopic
36
+ 1380-139F: ethiopic
37
+ 2D80-2DDF: ethiopic
38
+ AB00-AB2F: ethiopic
39
+ 109A0-109FF: meroitic_cursive
40
+ 10980-1099F: meroitic_hieroglyphs
41
+ 07C0-07FF: nko
42
+ 10480-104AF: osmanya
43
+ 2D30-2D7F: tifinagh
44
+ A500-A63F: vai
45
+ 0600-06FF: arabic
46
+ 0750-077F: arabic
47
+ 08A0-08FF: arabic
48
+ FB50-FDFF: arabic
49
+ FE70-FEFF: arabic
50
+ 10840-1085F: aramic
51
+ 10B00-10B3F: avestan
52
+ 102A0-102DF: carian
53
+ 12000-123FF: cuniform
54
+ 12400-1247F: cuniform_numbers_punctuation
55
+ 10280-1029F: lycian
56
+ 1800-18AF: mongolian
57
+ 0F00-0FFF: tibetan
58
+ 0980-09FF: bengali_assamese
59
+ 0A80-0AFF: gujarati
60
+ 0C80-0CFF: kannada
61
+ 0B00-0B7F: oriya
62
+ 0B80-0BFF: tamil
63
+ 0C00-0C7F: telugu
64
+ 11000-1107F: brahmi
65
+ 0900-097F: devanagari
66
+ A8E0-A8FF: devanagari
67
+ 103A0-103DF: old_persian
68
+ 10380-1039F: ugaritic
69
+ 10920-1093F: lydian
70
+ 0840-085F: mandaic
71
+ 10A60-10A7F: old_south_arabian
72
+ 10B60-10B7F: pahlavi
73
+ 10B40-10B5F: parthian
74
+ 10900-1091F: phoenician
75
+ 0800-083F: samaritan
76
+ 0700-074F: syriac
77
+ 10C00-10C4F: old_turkic
78
+ A840-A87F: phags_pa
79
+ 11100-1114F: chakma
80
+ 0A00-0A7F: gurmukhi
81
+ 11080-110CF: kaithi
82
+ 10A00-10A5F: kharoshthi
83
+ 1C00-1C4F: lepcha
84
+ 1900-194F: limbu
85
+ ABC0-ABFF: meetei_mayek
86
+ AAE0-AAFF: meetei_mayek
87
+ 1C50-1C7F: ol_chiki
88
+ A880-A8DF: saurashtra
89
+ 11180-111DF: sharada
90
+ 0D80-0DFF: sinhala
91
+ 110D0-110FF: sora_sompeng
92
+ A800-A82F: syloti_nagri
93
+ 11680-116CF: takri
94
+ 0780-07BF: thaana
95
+ 1CD0-1CFF: vedic
96
+ 1B00-1B7F: balinese
97
+ 1BC0-1BFF: batak
98
+ 1A00-1A1F: buginese
99
+ AA00-AA5F: cham
100
+ A980-A9DF: javanese
101
+ A900-A92F: kayah_li
102
+ 1780-17FF: khmer
103
+ 19E0-19FF: khmer
104
+ 0E80-0EFF: lao
105
+ 1000-109F: myanmar
106
+ AA60-AA7F: myanmar
107
+ 1980-19DF: new_tai_lue
108
+ A930-A95F: rejang
109
+ 1B80-1BBF: sudanese
110
+ 1CC0-1CCF: sudanese
111
+ 1950-197F: tai_le
112
+ 1A20-1AAF: tai_tham
113
+ AA80-AADF: tai_viet
114
+ 0E00-0E7F: thai
115
+ 1740-175F: buhid
116
+ 1720-173F: hanunoo
117
+ 1700-171F: tagalog
118
+ 1760-177F: tagbanwa
119
+ 3100-312F: bopomofo
120
+ 31A0-31BF: bopomofo
121
+ 1100-11FF: hangul_jamo
122
+ A960-A97F: hangul_jamo
123
+ D7B0-D7FF: hangul_jamo
124
+ 3130-318F: hangul_jamo
125
+ FF00-FFEF: hangul_jamo
126
+ AC00-D7AF: hangul
127
+ 3040-309F: hiragana
128
+ 30A0-30FF: katakana
129
+ 31F0-31FF: katakana
130
+ FF00-FFEF: katakana
131
+ 1B000-1B0FF: kana
132
+ 3190-319F: kanbun
133
+ A4D0-A4FF: lisu
134
+ 16F00-16F9F: miao
135
+ A000-A48F: yi
136
+ A490-A4CF: yi
137
+ 13A0-13FF: cherokee
138
+ 10400-1044F: deseret
139
+ 1400-167F: united_canadian_aborginal
140
+ 18B0-18FF: united_canadian_aborginal
141
+ #0000-007F: ASCII
data/lib/prose.rb ADDED
@@ -0,0 +1,35 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'yaml'
3
+
4
+ class String
5
+
6
+ def prose?
7
+ find_languages_in(self)
8
+ end
9
+
10
+ private
11
+
12
+
13
+ def unicode_ranges
14
+ @ranges ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
15
+ end
16
+
17
+ def language_of letter
18
+ result = []
19
+ int_ordinal = letter.ord
20
+ unicode_ranges.keys.each do |key|
21
+ min, max = key.split("-")
22
+ result << unicode_ranges[key] if (min.to_i(16) < int_ordinal) and (max.to_i(16) > int_ordinal)
23
+ end
24
+ return result
25
+ end
26
+
27
+ def find_languages_in word
28
+ result = []
29
+ word.split('').each do |letter|
30
+ result += language_of(letter) if not letter == " "
31
+ end
32
+ return result.uniq
33
+ end
34
+
35
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: prose
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Edwin Rozario
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-01 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Identifies language of alphabets in a string
14
+ email:
15
+ - rozarioed@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/prose.rb
21
+ - lib/prose/prose.yaml
22
+ homepage: https://github.com/EdwinRozario/Prose
23
+ licenses: []
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.2.0
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: Identify language string
45
+ test_files: []