prose 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/prose/prose.yaml +141 -0
  3. data/lib/prose.rb +35 -0
  4. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 09b137dde0e95e9308ecdc067bf4fd562a9b1526
4
+ data.tar.gz: 8cf9634b26218339d84eb677dfaea9432fdd9c36
5
+ SHA512:
6
+ metadata.gz: be87ec95d9bc12c36caa5d69d2192f88b9f02ed041f97fb186142c71e5a9d1f371d9ec60980ee520f8e928339182706e467aafbdbec6112c2cce433e20f74c03
7
+ data.tar.gz: 220d5d89073e35e652d91e89d6a71845166d5679c5ac35777af0f80713c56d727be5a6516eb33c89515a2c095142c0945f22282b61b1fb04cb20806a4af15197
@@ -0,0 +1,141 @@
1
+ 0590-05FF: hebrew
2
+ FB00–FB4F: hebrew
3
+ 00D00-0D7F: malayalam
4
+ 0530-058F: armenian
5
+ 2C80-2CFF: coptic
6
+ 10800-1083F: cypriot
7
+ 0400-04FF: cyrillic
8
+ 0500-052F: cyrillic
9
+ 2DE0-2DFF: cyrillic
10
+ A640-A69F: cyrillic
11
+ 10A0-10FF: georgian
12
+ 2D00-2D2F: georgian
13
+ 2C00-2C5F: glagolithic
14
+ 10330-1034F: gothic
15
+ 0370-03FF: greek
16
+ 1F00-1FFF: greek
17
+ 0000-007F: latin
18
+ 0080-00FF: latin
19
+ 0100-017F: latin
20
+ 0180-024F: latin
21
+ 2C60-2C7F: latin
22
+ A720-A7FF: latin
23
+ 1E00-1EFF: latin
24
+ FB00-FB4F: latin
25
+ FB00-FB4F: latin
26
+ FF00-FFEF: latin
27
+ 1680-169F: ogham
28
+ 10300-1032F: old_italics
29
+ 101D0-101FF: phaistos
30
+ 16A0-16FF: runic
31
+ 10450-1047F: shavian
32
+ A6A0-A6FF: bamum
33
+ 16800-16A3F: bamum
34
+ 13000-1342F: egyptian_hieroglyphs
35
+ 1200-137F: ethiopic
36
+ 1380-139F: ethiopic
37
+ 2D80-2DDF: ethiopic
38
+ AB00-AB2F: ethiopic
39
+ 109A0-109FF: meroitic_cursive
40
+ 10980-1099F: meroitic_hieroglyphs
41
+ 07C0-07FF: nko
42
+ 10480-104AF: osmanya
43
+ 2D30-2D7F: tifinagh
44
+ A500-A63F: vai
45
+ 0600-06FF: arabic
46
+ 0750-077F: arabic
47
+ 08A0-08FF: arabic
48
+ FB50-FDFF: arabic
49
+ FE70-FEFF: arabic
50
+ 10840-1085F: aramic
51
+ 10B00-10B3F: avestan
52
+ 102A0-102DF: carian
53
+ 12000-123FF: cuniform
54
+ 12400-1247F: cuniform_numbers_punctuation
55
+ 10280-1029F: lycian
56
+ 1800-18AF: mongolian
57
+ 0F00-0FFF: tibetan
58
+ 0980-09FF: bengali_assamese
59
+ 0A80-0AFF: gujarati
60
+ 0C80-0CFF: kannada
61
+ 0B00-0B7F: oriya
62
+ 0B80-0BFF: tamil
63
+ 0C00-0C7F: telugu
64
+ 11000-1107F: brahmi
65
+ 0900-097F: devanagari
66
+ A8E0-A8FF: devanagari
67
+ 103A0-103DF: old_persian
68
+ 10380-1039F: ugaritic
69
+ 10920-1093F: lydian
70
+ 0840-085F: mandaic
71
+ 10A60-10A7F: old_south_arabian
72
+ 10B60-10B7F: pahlavi
73
+ 10B40-10B5F: parthian
74
+ 10900-1091F: phoenician
75
+ 0800-083F: samaritan
76
+ 0700-074F: syriac
77
+ 10C00-10C4F: old_turkic
78
+ A840-A87F: phags_pa
79
+ 11100-1114F: chakma
80
+ 0A00-0A7F: gurmukhi
81
+ 11080-110CF: kaithi
82
+ 10A00-10A5F: kharoshthi
83
+ 1C00-1C4F: lepcha
84
+ 1900-194F: limbu
85
+ ABC0-ABFF: meetei_mayek
86
+ AAE0-AAFF: meetei_mayek
87
+ 1C50-1C7F: ol_chiki
88
+ A880-A8DF: saurashtra
89
+ 11180-111DF: sharada
90
+ 0D80-0DFF: sinhala
91
+ 110D0-110FF: sora_sompeng
92
+ A800-A82F: syloti_nagri
93
+ 11680-116CF: takri
94
+ 0780-07BF: thaana
95
+ 1CD0-1CFF: vedic
96
+ 1B00-1B7F: balinese
97
+ 1BC0-1BFF: batak
98
+ 1A00-1A1F: buginese
99
+ AA00-AA5F: cham
100
+ A980-A9DF: javanese
101
+ A900-A92F: kayah_li
102
+ 1780-17FF: khmer
103
+ 19E0-19FF: khmer
104
+ 0E80-0EFF: lao
105
+ 1000-109F: myanmar
106
+ AA60-AA7F: myanmar
107
+ 1980-19DF: new_tai_lue
108
+ A930-A95F: rejang
109
+ 1B80-1BBF: sudanese
110
+ 1CC0-1CCF: sudanese
111
+ 1950-197F: tai_le
112
+ 1A20-1AAF: tai_tham
113
+ AA80-AADF: tai_viet
114
+ 0E00-0E7F: thai
115
+ 1740-175F: buhid
116
+ 1720-173F: hanunoo
117
+ 1700-171F: tagalog
118
+ 1760-177F: tagbanwa
119
+ 3100-312F: bopomofo
120
+ 31A0-31BF: bopomofo
121
+ 1100-11FF: hangul_jamo
122
+ A960-A97F: hangul_jamo
123
+ D7B0-D7FF: hangul_jamo
124
+ 3130-318F: hangul_jamo
125
+ FF00-FFEF: hangul_jamo
126
+ AC00-D7AF: hangul
127
+ 3040-309F: hiragana
128
+ 30A0-30FF: katakana
129
+ 31F0-31FF: katakana
130
+ FF00-FFEF: katakana
131
+ 1B000-1B0FF: kana
132
+ 3190-319F: kanbun
133
+ A4D0-A4FF: lisu
134
+ 16F00-16F9F: miao
135
+ A000-A48F: yi
136
+ A490-A4CF: yi
137
+ 13A0-13FF: cherokee
138
+ 10400-1044F: deseret
139
+ 1400-167F: united_canadian_aborginal
140
+ 18B0-18FF: united_canadian_aborginal
141
+ #0000-007F: ASCII
data/lib/prose.rb ADDED
@@ -0,0 +1,35 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'yaml'
3
+
4
+ class String
5
+
6
+ def prose?
7
+ find_languages_in(self)
8
+ end
9
+
10
+ private
11
+
12
+
13
+ def unicode_ranges
14
+ @ranges ||= YAML::load( File.open( "#{File.expand_path File.dirname(__FILE__)}/prose/prose.yaml" ) )
15
+ end
16
+
17
+ def language_of letter
18
+ result = []
19
+ int_ordinal = letter.ord
20
+ unicode_ranges.keys.each do |key|
21
+ min, max = key.split("-")
22
+ result << unicode_ranges[key] if (min.to_i(16) < int_ordinal) and (max.to_i(16) > int_ordinal)
23
+ end
24
+ return result
25
+ end
26
+
27
+ def find_languages_in word
28
+ result = []
29
+ word.split('').each do |letter|
30
+ result += language_of(letter) if not letter == " "
31
+ end
32
+ return result.uniq
33
+ end
34
+
35
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: prose
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Edwin Rozario
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-01 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Identifies language of alphabets in a string
14
+ email:
15
+ - rozarioed@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/prose.rb
21
+ - lib/prose/prose.yaml
22
+ homepage: https://github.com/EdwinRozario/Prose
23
+ licenses: []
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.2.0
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: Identify language string
45
+ test_files: []