script_detector 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,48 @@
1
+ # encoding: UTF-8
2
+
3
+ require File.dirname(__FILE__) + '/chinese_detector.rb'
4
+
5
+ class String
6
+ include ChineseDetector
7
+
8
+ # Returns true if the string contains Chinese characters _and_ no Japanese or Korean characters
9
+ def chinese?
10
+ look_for_chars_in(/\p{Han}/) and !self.japanese? and !self.korean?
11
+ end
12
+
13
+ # Return true if the string contains traditional Chinese characters (繁體字)
14
+ def traditional_chinese?
15
+ look_for_chars_in traditional_chinese_regex
16
+ end
17
+
18
+ # Return true if the string contains simplified Chinese characters (简体字)
19
+ def simplified_chinese?
20
+ look_for_chars_in simplified_chinese_regex
21
+ end
22
+
23
+ # Returns true if the string contains specifically Japanese (hiragana or katakana) characters
24
+ def japanese?
25
+ look_for_chars_in /(\p{Katakana}|\p{Hiragana})/
26
+ end
27
+
28
+ # Returns true if the string contains specifically Korean (hangul) characters
29
+ def korean?
30
+ look_for_chars_in /\p{Hangul}/
31
+ end
32
+
33
+ # Try to detect script and return one of "Japanese", "Korean", "Traditional Chinese", "Simplified Chinese", "Ambiguous Chinese" or "Unknown"
34
+ def identify_script
35
+ return "Japanese" if self.japanese?
36
+ return "Korean" if self.korean?
37
+ return "Traditional Chinese" if self.traditional_chinese?
38
+ return "Simplified Chinese" if self.simplified_chinese?
39
+ return "Ambiguous Chinese" if self.chinese?
40
+ "Unknown"
41
+ end
42
+
43
+ private
44
+
45
+ def look_for_chars_in regex
46
+ !! (self =~ regex)
47
+ end
48
+ end
data/spec/helper.rb ADDED
@@ -0,0 +1,21 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+
11
+ require 'simplecov'
12
+ SimpleCov.start do
13
+ add_filter "config"
14
+ add_filter "spec"
15
+ add_filter "vendor"
16
+ coverage_dir "target/reports/coverage"
17
+ end
18
+
19
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
20
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
21
+ require 'script_detector'
@@ -0,0 +1,132 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'helper'
4
+
5
+ describe 'Script detection for' do
6
+ describe 'Simplified Chinese text' do
7
+ before do
8
+ @chinese_simpl = ' 我的气垫船充满了鳝鱼.'
9
+ end
10
+
11
+ it 'is Chinese' do
12
+ @chinese_simpl.should be_chinese
13
+ end
14
+
15
+ it 'is simplified Chinese' do
16
+ @chinese_simpl.should be_simplified_chinese
17
+ end
18
+
19
+ it 'is not traditional Chinese' do
20
+ @chinese_simpl.should_not be_traditional_chinese
21
+ end
22
+
23
+ it 'is not Japanese' do
24
+ @chinese_simpl.should_not be_japanese
25
+ end
26
+
27
+ it 'is identified as Simplified Chinese' do
28
+ @chinese_simpl.identify_script.should == "Simplified Chinese"
29
+ end
30
+ end
31
+
32
+ describe 'Traditional Chinese text' do
33
+ before do
34
+ @chinese_trad = ' 我的氣墊船充滿了鱔魚.'
35
+ end
36
+
37
+ it 'is Chinese' do
38
+ @chinese_trad.should be_chinese
39
+ end
40
+
41
+ it 'is traditional Chinese' do
42
+ @chinese_trad.should be_traditional_chinese
43
+ end
44
+
45
+ it 'is not simplified Chinese' do
46
+ @chinese_trad.should_not be_simplified_chinese
47
+ end
48
+
49
+ it 'is not Japanese' do
50
+ @chinese_trad.should_not be_japanese
51
+ end
52
+
53
+ it 'is identified as traditional Chinese' do
54
+ @chinese_trad.identify_script.should == "Traditional Chinese"
55
+ end
56
+ end
57
+
58
+ describe 'Ambiguous Chinese text' do
59
+ before do
60
+ @chinese_amb = '你好.'
61
+ end
62
+
63
+ it 'is Chinese' do
64
+ @chinese_amb.should be_chinese
65
+ end
66
+
67
+ it 'is neither traditional nor simplified Chinese' do
68
+ @chinese_amb.should_not be_simplified_chinese
69
+ @chinese_amb.should_not be_traditional_chinese
70
+ end
71
+
72
+ it 'is identified as ambiguous Chinese' do
73
+ @chinese_amb.identify_script.should == "Ambiguous Chinese"
74
+ end
75
+ end
76
+
77
+ describe 'Japanese text' do
78
+ before do
79
+ @japanese = ' 私のホバークラフトは鰻でいっぱいです.'
80
+ end
81
+
82
+ it 'is Japanese' do
83
+ @japanese.should be_japanese
84
+ end
85
+
86
+ it 'is not Chinese or Korean' do
87
+ @japanese.should_not be_chinese
88
+ @japanese.should_not be_korean
89
+ end
90
+
91
+ it 'is identified as Japanese' do
92
+ @japanese.identify_script.should == "Japanese"
93
+ end
94
+ end
95
+
96
+ describe 'Korean text' do
97
+ before do
98
+ @korean = ' 내 호버크라프트는 장어로 가득 차 있어요.'
99
+ end
100
+
101
+ it 'is Korean' do
102
+ @korean.should be_korean
103
+ end
104
+
105
+ it 'is not Chinese or Japanese' do
106
+ @korean.should_not be_chinese
107
+ @korean.should_not be_japanese
108
+ end
109
+
110
+ it 'is identified as Korean' do
111
+ @korean.identify_script.should == "Korean"
112
+ end
113
+ end
114
+
115
+ describe 'Non-CJK text' do
116
+ before do
117
+ @unknown = 'Hello world.'
118
+ end
119
+
120
+ it 'is none of the above' do
121
+ @unknown.should_not be_chinese
122
+ @unknown.should_not be_simplified_chinese
123
+ @unknown.should_not be_traditional_chinese
124
+ @unknown.should_not be_japanese
125
+ @unknown.should_not be_korean
126
+ end
127
+
128
+ it 'is identified as unknown' do
129
+ @unknown.identify_script.should == "Unknown"
130
+ end
131
+ end
132
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --colour --format progress
metadata ADDED
@@ -0,0 +1,176 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: script_detector
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jani Patokallio
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-30 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: shoulda
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rdoc
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '3.12'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '3.12'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec-rails
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: simplecov
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: nokogiri
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: bundler
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: 1.1.0
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: 1.1.0
110
+ - !ruby/object:Gem::Dependency
111
+ name: jeweler
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: 1.8.3
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ~>
124
+ - !ruby/object:Gem::Version
125
+ version: 1.8.3
126
+ description: Utility library for determining if string is traditional Chinese, simplified
127
+ Chinese, Japanese or Korean
128
+ email: jpatokal@iki.fi
129
+ executables: []
130
+ extensions: []
131
+ extra_rdoc_files:
132
+ - LICENSE.txt
133
+ - README.rdoc
134
+ files:
135
+ - .document
136
+ - Gemfile
137
+ - Gemfile.lock
138
+ - LICENSE.txt
139
+ - README.rdoc
140
+ - Rakefile
141
+ - VERSION
142
+ - lib/chinese_detector.rb
143
+ - lib/script_detector.rb
144
+ - spec/helper.rb
145
+ - spec/script_detector_spec.rb
146
+ - spec/spec.opts
147
+ homepage: http://github.com/jpatokal/script_detector
148
+ licenses:
149
+ - MIT
150
+ post_install_message:
151
+ rdoc_options: []
152
+ require_paths:
153
+ - lib
154
+ required_ruby_version: !ruby/object:Gem::Requirement
155
+ none: false
156
+ requirements:
157
+ - - ! '>='
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ segments:
161
+ - 0
162
+ hash: -1438461098142701375
163
+ required_rubygems_version: !ruby/object:Gem::Requirement
164
+ none: false
165
+ requirements:
166
+ - - ! '>='
167
+ - !ruby/object:Gem::Version
168
+ version: '0'
169
+ requirements: []
170
+ rubyforge_project:
171
+ rubygems_version: 1.8.21
172
+ signing_key:
173
+ specification_version: 3
174
+ summary: Utility library for determining if string is traditional Chinese, simplified
175
+ Chinese, Japanese or Korean
176
+ test_files: []