jelegante 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/Gemfile +1 -0
  2. data/VERSION +1 -1
  3. data/jelegante.gemspec +5 -2
  4. data/lib/jelegante.rb +23 -21
  5. metadata +22 -11
data/Gemfile CHANGED
@@ -2,6 +2,7 @@ source "http://rubygems.org"
2
2
  # Add dependencies required to use your gem here.
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
+ gem "unicode-block", ">= 1.1.0"
5
6
 
6
7
  # Add dependencies to develop your gem here.
7
8
  # Include everything needed to run rake, tests, features, etc.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.1
1
+ 1.1.0
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "jelegante"
8
- s.version = "1.0.1"
8
+ s.version = "1.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["lpm11"]
12
- s.date = "2011-10-03"
12
+ s.date = "2011-10-17"
13
13
  s.description = "Guess whether text is japanese or not heuristically."
14
14
  s.email = "lpm11r@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -40,17 +40,20 @@ Gem::Specification.new do |s|
40
40
  s.specification_version = 3
41
41
 
42
42
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
43
+ s.add_runtime_dependency(%q<unicode-block>, [">= 1.1.0"])
43
44
  s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
44
45
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
45
46
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
46
47
  s.add_development_dependency(%q<rcov>, [">= 0"])
47
48
  else
49
+ s.add_dependency(%q<unicode-block>, [">= 1.1.0"])
48
50
  s.add_dependency(%q<rspec>, ["~> 2.3.0"])
49
51
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
50
52
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
51
53
  s.add_dependency(%q<rcov>, [">= 0"])
52
54
  end
53
55
  else
56
+ s.add_dependency(%q<unicode-block>, [">= 1.1.0"])
54
57
  s.add_dependency(%q<rspec>, ["~> 2.3.0"])
55
58
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
56
59
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
@@ -1,8 +1,10 @@
1
1
  #!/bin/env ruby
2
2
  #-*- coding: utf-8 -*-
3
+ require("rubygems");
4
+ require("unicode-block");
3
5
 
4
6
  class Jelegante
5
- KANJI_JYOUYOU = %w[
7
+ JYOUYOU_KANJI_LIST = %w[
6
8
  亜 哀 愛 悪 握 圧 扱 安 暗 案 以 位 依 偉 囲 委 威 尉 意 慰 易 為 異 移 維
7
9
  緯 胃 衣 違 遺 医 井 域 育 一 壱 逸 稲 芋 印 員 因 姻 引 飲 院 陰 隠 韻 右
8
10
  宇 羽 雨 渦 浦 運 雲 営 影 映 栄 永 泳 英 衛 詠 鋭 液 疫 益 駅 悦 謁 越 閲
@@ -83,7 +85,7 @@ class Jelegante
83
85
  路 露 労 廊 朗 楼 浪 漏 老 郎 六 録 論 和 話 賄 惑 枠 湾 腕
84
86
  ].sort();
85
87
 
86
- KANJI_JINMEI = %w[
88
+ JINMEI_KANJI_LIST = %w[
87
89
  丑 丞 串 乃 之 乎 也 云 亘 亙 些 亦 亥 亨 亮 仔 伊 伎 伍 伽 佃 佑 伶 侃 侑
88
90
  俄 俠 俣 俐 侶 倭 俺 俱 倦 倖 偲 僅 傭 儲 允 兎 兜 其 冥 冴 冶 凄 凌 凜 凛
89
91
  凧 凪 凰 凱 函 刹 劉 劫 勁 勃 勾 匂 勿 匡 廿 卜 卯 卿 厨 厩 叉 叡 叢 叶 只
@@ -126,7 +128,7 @@ class Jelegante
126
128
  類 禮 曆 歷 練 鍊 郞 朗 廊 錄
127
129
  ].sort();
128
130
 
129
- KANJI_HYOUGAI = %w[
131
+ HYOUGAI_KANJI_LIST = %w[
130
132
  啞 唖 蛙 鴉 埃 挨 曖 靄 軋 斡 按 庵 鞍 闇 已 夷 畏 韋 帷 萎 椅 葦 彙 飴 謂
131
133
  閾 溢 鰯 尹 咽 殷 淫 隕 蔭 于 迂 盂 烏 鬱 云 暈 穢 曳 洩 裔 穎 頴 嬰 翳 腋
132
134
  曰 奄 宛 怨 俺 冤 袁 婉 焉 堰 淵 焰 筵 厭 鳶 燕 閻 嚥 嗚 凰 嘔 鴨 甕 襖 謳
@@ -173,33 +175,33 @@ class Jelegante
173
175
  幷 桝 枡 麺 麵 沪 濾 芦 蘆 蝋 蠟 弯 彎
174
176
  ].sort();
175
177
 
176
- CHARACTERS_HIRAGANA = "\\p{Hiragana}";
177
- CHARACTERS_KATAKANA = "\\p{Katakana}";
178
- CHARACTERS_KANJI = "\\u{4E00}-\\u{9FFF}\\u{3400}-\\u{4DBF}\\u{20000}-\\u{2A6DF}\\u{2A700}-\\u{2B73F}\\u{F900}-\\u{FAFF}"
179
- CHARACTERS_JYOUYOU = "#{KANJI_JYOUYOU.join('')}";
180
- CHARACTERS_JINMEI = "#{KANJI_JINMEI.join('')}";
181
- CHARACTERS_HYOUGAI = "#{KANJI_HYOUGAI.join('')}";
182
- CHARACTERS_POPULAR_KANJI = "#{CHARACTERS_JYOUYOU}#{CHARACTERS_JINMEI}#{CHARACTERS_HYOUGAI}"
178
+ HIRAGANA_PROP = "\\p{Hiragana}";
179
+ KATAKANA_PROP = "\\p{Katakana}";
180
+ KANJI_PROP = "#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_PROP}#{UnicodeBlock::CJK_COMPATIBILITY_IDEOGRAPHS_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_PROP}#{UnicodeBlock::CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_PROP}";
181
+ JYOUYOU_PROP = "#{JYOUYOU_KANJI_LIST.join('')}";
182
+ JINMEI_PROP = "#{JINMEI_KANJI_LIST.join('')}";
183
+ HYOUGAI_PROP = "#{HYOUGAI_KANJI_LIST.join('')}";
184
+ POPULAR_KANJI_PROP = "#{JYOUYOU_PROP}#{JINMEI_PROP}#{HYOUGAI_PROP}"
183
185
 
184
- REGEX_HIRAGANA = /[#{CHARACTERS_HIRAGANA}]/;
185
- REGEX_KATAKANA = /[#{CHARACTERS_KATAKANA}]/;
186
- REGEX_KANJI = /[#{CHARACTERS_KANJI}]/;
187
- REGEX_POPULAR_KANJI = /[#{CHARACTERS_POPULAR_KANJI}]/;
186
+ HIRAGANA_REGEX = /\p{Hiragana}/;
187
+ KATAKANA_REGEX = /\p{Katakana}/;
188
+ KANJI_REGEX = /[#{KANJI_PROP}]/
189
+ POPULAR_KANJI_REGEX = /[#{POPULAR_KANJI_PROP}]/;
188
190
 
189
- REGEX_NO_KANJI = /[^#{CHARACTERS_KANJI}]/;
190
- REGEX_NO_POPULAR_KANJI = /[^#{CHARACTERS_POPULAR_KANJI}]/;
191
+ NO_KANJI_REGEX = /[^#{KANJI_PROP}]/;
192
+ NO_POPULAR_KANJI_REGEX = /[^#{POPULAR_KANJI_PROP}]/;
191
193
 
192
194
  def self.include_hiragana?(text)
193
- return !(REGEX_HIRAGANA.match(text).nil?);
195
+ return !(HIRAGANA_REGEX.match(text).nil?);
194
196
  end
195
197
  def self.include_katakana?(text)
196
- return !(REGEX_KATAKANA.match(text).nil?);
198
+ return !(KATAKANA_REGEX.match(text).nil?);
197
199
  end
198
200
  def self.include_kanji?(text)
199
- return !(REGEX_KANJI.match(text).nil?);
201
+ return !(KANJI_REGEX.match(text).nil?);
200
202
  end
201
203
  def self.include_popular_kanji?(text)
202
- return !(REGEX_POPULAR_KANJI.match(text).nil?);
204
+ return !(POPULAR_KANJI_REGEX.match(text).nil?);
203
205
  end
204
206
 
205
207
  # 簡易日本語判定
@@ -207,7 +209,7 @@ class Jelegante
207
209
  # ・漢字を含むが、常用漢字・人名漢字・表外漢字以外の漢字を含まない
208
210
  def self.japanese?(text)
209
211
  return true if (self.include_hiragana?(text) || self.include_katakana?(text));
210
- return true if (text.gsub(REGEX_NO_KANJI,"") =~ /^[#{CHARACTERS_POPULAR_KANJI}]+$/);
212
+ return true if (text.gsub(NO_KANJI_REGEX,"") =~ /^[#{POPULAR_KANJI_PROP}]+$/);
211
213
  return false;
212
214
  end
213
215
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: jelegante
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.1
5
+ version: 1.1.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - lpm11
@@ -10,11 +10,22 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-10-03 00:00:00 Z
13
+ date: 2011-10-17 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
- name: rspec
16
+ name: unicode-block
17
17
  requirement: &id001 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.1.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *id001
26
+ - !ruby/object:Gem::Dependency
27
+ name: rspec
28
+ requirement: &id002 !ruby/object:Gem::Requirement
18
29
  none: false
19
30
  requirements:
20
31
  - - ~>
@@ -22,10 +33,10 @@ dependencies:
22
33
  version: 2.3.0
23
34
  type: :development
24
35
  prerelease: false
25
- version_requirements: *id001
36
+ version_requirements: *id002
26
37
  - !ruby/object:Gem::Dependency
27
38
  name: bundler
28
- requirement: &id002 !ruby/object:Gem::Requirement
39
+ requirement: &id003 !ruby/object:Gem::Requirement
29
40
  none: false
30
41
  requirements:
31
42
  - - ~>
@@ -33,10 +44,10 @@ dependencies:
33
44
  version: 1.0.0
34
45
  type: :development
35
46
  prerelease: false
36
- version_requirements: *id002
47
+ version_requirements: *id003
37
48
  - !ruby/object:Gem::Dependency
38
49
  name: jeweler
39
- requirement: &id003 !ruby/object:Gem::Requirement
50
+ requirement: &id004 !ruby/object:Gem::Requirement
40
51
  none: false
41
52
  requirements:
42
53
  - - ~>
@@ -44,10 +55,10 @@ dependencies:
44
55
  version: 1.6.4
45
56
  type: :development
46
57
  prerelease: false
47
- version_requirements: *id003
58
+ version_requirements: *id004
48
59
  - !ruby/object:Gem::Dependency
49
60
  name: rcov
50
- requirement: &id004 !ruby/object:Gem::Requirement
61
+ requirement: &id005 !ruby/object:Gem::Requirement
51
62
  none: false
52
63
  requirements:
53
64
  - - ">="
@@ -55,7 +66,7 @@ dependencies:
55
66
  version: "0"
56
67
  type: :development
57
68
  prerelease: false
58
- version_requirements: *id004
69
+ version_requirements: *id005
59
70
  description: Guess whether text is japanese or not heuristically.
60
71
  email: lpm11r@gmail.com
61
72
  executables: []
@@ -91,7 +102,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
91
102
  requirements:
92
103
  - - ">="
93
104
  - !ruby/object:Gem::Version
94
- hash: -2642319369464897638
105
+ hash: 127203842517963343
95
106
  segments:
96
107
  - 0
97
108
  version: "0"