jelegante 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/Gemfile +1 -0
  2. data/VERSION +1 -1
  3. data/jelegante.gemspec +5 -2
  4. data/lib/jelegante.rb +23 -21
  5. metadata +22 -11
data/Gemfile CHANGED
@@ -2,6 +2,7 @@ source "http://rubygems.org"
2
2
  # Add dependencies required to use your gem here.
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
+ gem "unicode-block", ">= 1.1.0"
5
6
 
6
7
  # Add dependencies to develop your gem here.
7
8
  # Include everything needed to run rake, tests, features, etc.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.1
1
+ 1.1.0
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "jelegante"
8
- s.version = "1.0.1"
8
+ s.version = "1.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["lpm11"]
12
- s.date = "2011-10-03"
12
+ s.date = "2011-10-17"
13
13
  s.description = "Guess whether text is japanese or not heuristically."
14
14
  s.email = "lpm11r@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -40,17 +40,20 @@ Gem::Specification.new do |s|
40
40
  s.specification_version = 3
41
41
 
42
42
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
43
+ s.add_runtime_dependency(%q<unicode-block>, [">= 1.1.0"])
43
44
  s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
44
45
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
45
46
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
46
47
  s.add_development_dependency(%q<rcov>, [">= 0"])
47
48
  else
49
+ s.add_dependency(%q<unicode-block>, [">= 1.1.0"])
48
50
  s.add_dependency(%q<rspec>, ["~> 2.3.0"])
49
51
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
50
52
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
51
53
  s.add_dependency(%q<rcov>, [">= 0"])
52
54
  end
53
55
  else
56
+ s.add_dependency(%q<unicode-block>, [">= 1.1.0"])
54
57
  s.add_dependency(%q<rspec>, ["~> 2.3.0"])
55
58
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
56
59
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
@@ -1,8 +1,10 @@
1
1
  #!/bin/env ruby
2
2
  #-*- coding: utf-8 -*-
3
+ require("rubygems");
4
+ require("unicode-block");
3
5
 
4
6
  class Jelegante
5
- KANJI_JYOUYOU = %w[
7
+ JYOUYOU_KANJI_LIST = %w[
6
8
  亜 哀 愛 悪 握 圧 扱 安 暗 案 以 位 依 偉 囲 委 威 尉 意 慰 易 為 異 移 維
7
9
  緯 胃 衣 違 遺 医 井 域 育 一 壱 逸 稲 芋 印 員 因 姻 引 飲 院 陰 隠 韻 右
8
10
  宇 羽 雨 渦 浦 運 雲 営 影 映 栄 永 泳 英 衛 詠 鋭 液 疫 益 駅 悦 謁 越 閲
@@ -83,7 +85,7 @@ class Jelegante
83
85
  路 露 労 廊 朗 楼 浪 漏 老 郎 六 録 論 和 話 賄 惑 枠 湾 腕
84
86
  ].sort();
85
87
 
86
- KANJI_JINMEI = %w[
88
+ JINMEI_KANJI_LIST = %w[
87
89
  丑 丞 串 乃 之 乎 也 云 亘 亙 些 亦 亥 亨 亮 仔 伊 伎 伍 伽 佃 佑 伶 侃 侑
88
90
  俄 俠 俣 俐 侶 倭 俺 俱 倦 倖 偲 僅 傭 儲 允 兎 兜 其 冥 冴 冶 凄 凌 凜 凛
89
91
  凧 凪 凰 凱 函 刹 劉 劫 勁 勃 勾 匂 勿 匡 廿 卜 卯 卿 厨 厩 叉 叡 叢 叶 只
@@ -126,7 +128,7 @@ class Jelegante
126
128
  類 禮 曆 歷 練 鍊 郞 朗 廊 錄
127
129
  ].sort();
128
130
 
129
- KANJI_HYOUGAI = %w[
131
+ HYOUGAI_KANJI_LIST = %w[
130
132
  啞 唖 蛙 鴉 埃 挨 曖 靄 軋 斡 按 庵 鞍 闇 已 夷 畏 韋 帷 萎 椅 葦 彙 飴 謂
131
133
  閾 溢 鰯 尹 咽 殷 淫 隕 蔭 于 迂 盂 烏 鬱 云 暈 穢 曳 洩 裔 穎 頴 嬰 翳 腋
132
134
  曰 奄 宛 怨 俺 冤 袁 婉 焉 堰 淵 焰 筵 厭 鳶 燕 閻 嚥 嗚 凰 嘔 鴨 甕 襖 謳
@@ -173,33 +175,33 @@ class Jelegante
173
175
  幷 桝 枡 麺 麵 沪 濾 芦 蘆 蝋 蠟 弯 彎
174
176
  ].sort();
175
177
 
176
- CHARACTERS_HIRAGANA = "\\p{Hiragana}";
177
- CHARACTERS_KATAKANA = "\\p{Katakana}";
178
- CHARACTERS_KANJI = "\\u{4E00}-\\u{9FFF}\\u{3400}-\\u{4DBF}\\u{20000}-\\u{2A6DF}\\u{2A700}-\\u{2B73F}\\u{F900}-\\u{FAFF}"
179
- CHARACTERS_JYOUYOU = "#{KANJI_JYOUYOU.join('')}";
180
- CHARACTERS_JINMEI = "#{KANJI_JINMEI.join('')}";
181
- CHARACTERS_HYOUGAI = "#{KANJI_HYOUGAI.join('')}";
182
- CHARACTERS_POPULAR_KANJI = "#{CHARACTERS_JYOUYOU}#{CHARACTERS_JINMEI}#{CHARACTERS_HYOUGAI}"
178
+ HIRAGANA_PROP = "\\p{Hiragana}";
179
+ KATAKANA_PROP = "\\p{Katakana}";
180
+ KANJI_PROP = "#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_PROP}#{UnicodeBlock::CJK_COMPATIBILITY_IDEOGRAPHS_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_PROP}#{UnicodeBlock::CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_PROP}";
181
+ JYOUYOU_PROP = "#{JYOUYOU_KANJI_LIST.join('')}";
182
+ JINMEI_PROP = "#{JINMEI_KANJI_LIST.join('')}";
183
+ HYOUGAI_PROP = "#{HYOUGAI_KANJI_LIST.join('')}";
184
+ POPULAR_KANJI_PROP = "#{JYOUYOU_PROP}#{JINMEI_PROP}#{HYOUGAI_PROP}"
183
185
 
184
- REGEX_HIRAGANA = /[#{CHARACTERS_HIRAGANA}]/;
185
- REGEX_KATAKANA = /[#{CHARACTERS_KATAKANA}]/;
186
- REGEX_KANJI = /[#{CHARACTERS_KANJI}]/;
187
- REGEX_POPULAR_KANJI = /[#{CHARACTERS_POPULAR_KANJI}]/;
186
+ HIRAGANA_REGEX = /\p{Hiragana}/;
187
+ KATAKANA_REGEX = /\p{Katakana}/;
188
+ KANJI_REGEX = /[#{KANJI_PROP}]/
189
+ POPULAR_KANJI_REGEX = /[#{POPULAR_KANJI_PROP}]/;
188
190
 
189
- REGEX_NO_KANJI = /[^#{CHARACTERS_KANJI}]/;
190
- REGEX_NO_POPULAR_KANJI = /[^#{CHARACTERS_POPULAR_KANJI}]/;
191
+ NO_KANJI_REGEX = /[^#{KANJI_PROP}]/;
192
+ NO_POPULAR_KANJI_REGEX = /[^#{POPULAR_KANJI_PROP}]/;
191
193
 
192
194
  def self.include_hiragana?(text)
193
- return !(REGEX_HIRAGANA.match(text).nil?);
195
+ return !(HIRAGANA_REGEX.match(text).nil?);
194
196
  end
195
197
  def self.include_katakana?(text)
196
- return !(REGEX_KATAKANA.match(text).nil?);
198
+ return !(KATAKANA_REGEX.match(text).nil?);
197
199
  end
198
200
  def self.include_kanji?(text)
199
- return !(REGEX_KANJI.match(text).nil?);
201
+ return !(KANJI_REGEX.match(text).nil?);
200
202
  end
201
203
  def self.include_popular_kanji?(text)
202
- return !(REGEX_POPULAR_KANJI.match(text).nil?);
204
+ return !(POPULAR_KANJI_REGEX.match(text).nil?);
203
205
  end
204
206
 
205
207
  # 簡易日本語判定
@@ -207,7 +209,7 @@ class Jelegante
207
209
  # ・漢字を含むが、常用漢字・人名漢字・表外漢字以外の漢字を含まない
208
210
  def self.japanese?(text)
209
211
  return true if (self.include_hiragana?(text) || self.include_katakana?(text));
210
- return true if (text.gsub(REGEX_NO_KANJI,"") =~ /^[#{CHARACTERS_POPULAR_KANJI}]+$/);
212
+ return true if (text.gsub(NO_KANJI_REGEX,"") =~ /^[#{POPULAR_KANJI_PROP}]+$/);
211
213
  return false;
212
214
  end
213
215
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: jelegante
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.1
5
+ version: 1.1.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - lpm11
@@ -10,11 +10,22 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-10-03 00:00:00 Z
13
+ date: 2011-10-17 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
- name: rspec
16
+ name: unicode-block
17
17
  requirement: &id001 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.1.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *id001
26
+ - !ruby/object:Gem::Dependency
27
+ name: rspec
28
+ requirement: &id002 !ruby/object:Gem::Requirement
18
29
  none: false
19
30
  requirements:
20
31
  - - ~>
@@ -22,10 +33,10 @@ dependencies:
22
33
  version: 2.3.0
23
34
  type: :development
24
35
  prerelease: false
25
- version_requirements: *id001
36
+ version_requirements: *id002
26
37
  - !ruby/object:Gem::Dependency
27
38
  name: bundler
28
- requirement: &id002 !ruby/object:Gem::Requirement
39
+ requirement: &id003 !ruby/object:Gem::Requirement
29
40
  none: false
30
41
  requirements:
31
42
  - - ~>
@@ -33,10 +44,10 @@ dependencies:
33
44
  version: 1.0.0
34
45
  type: :development
35
46
  prerelease: false
36
- version_requirements: *id002
47
+ version_requirements: *id003
37
48
  - !ruby/object:Gem::Dependency
38
49
  name: jeweler
39
- requirement: &id003 !ruby/object:Gem::Requirement
50
+ requirement: &id004 !ruby/object:Gem::Requirement
40
51
  none: false
41
52
  requirements:
42
53
  - - ~>
@@ -44,10 +55,10 @@ dependencies:
44
55
  version: 1.6.4
45
56
  type: :development
46
57
  prerelease: false
47
- version_requirements: *id003
58
+ version_requirements: *id004
48
59
  - !ruby/object:Gem::Dependency
49
60
  name: rcov
50
- requirement: &id004 !ruby/object:Gem::Requirement
61
+ requirement: &id005 !ruby/object:Gem::Requirement
51
62
  none: false
52
63
  requirements:
53
64
  - - ">="
@@ -55,7 +66,7 @@ dependencies:
55
66
  version: "0"
56
67
  type: :development
57
68
  prerelease: false
58
- version_requirements: *id004
69
+ version_requirements: *id005
59
70
  description: Guess whether text is japanese or not heuristically.
60
71
  email: lpm11r@gmail.com
61
72
  executables: []
@@ -91,7 +102,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
91
102
  requirements:
92
103
  - - ">="
93
104
  - !ruby/object:Gem::Version
94
- hash: -2642319369464897638
105
+ hash: 127203842517963343
95
106
  segments:
96
107
  - 0
97
108
  version: "0"