jelegante 1.0.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/VERSION +1 -1
- data/jelegante.gemspec +5 -2
- data/lib/jelegante.rb +23 -21
- metadata +22 -11
data/Gemfile
CHANGED
@@ -2,6 +2,7 @@ source "http://rubygems.org"
|
|
2
2
|
# Add dependencies required to use your gem here.
|
3
3
|
# Example:
|
4
4
|
# gem "activesupport", ">= 2.3.5"
|
5
|
+
gem "unicode-block", ">= 1.1.0"
|
5
6
|
|
6
7
|
# Add dependencies to develop your gem here.
|
7
8
|
# Include everything needed to run rake, tests, features, etc.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0
|
1
|
+
1.1.0
|
data/jelegante.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "jelegante"
|
8
|
-
s.version = "1.0
|
8
|
+
s.version = "1.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["lpm11"]
|
12
|
-
s.date = "2011-10-
|
12
|
+
s.date = "2011-10-17"
|
13
13
|
s.description = "Guess whether text is japanese or not heuristically."
|
14
14
|
s.email = "lpm11r@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -40,17 +40,20 @@ Gem::Specification.new do |s|
|
|
40
40
|
s.specification_version = 3
|
41
41
|
|
42
42
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
43
|
+
s.add_runtime_dependency(%q<unicode-block>, [">= 1.1.0"])
|
43
44
|
s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
|
44
45
|
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
45
46
|
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
46
47
|
s.add_development_dependency(%q<rcov>, [">= 0"])
|
47
48
|
else
|
49
|
+
s.add_dependency(%q<unicode-block>, [">= 1.1.0"])
|
48
50
|
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
49
51
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
50
52
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
51
53
|
s.add_dependency(%q<rcov>, [">= 0"])
|
52
54
|
end
|
53
55
|
else
|
56
|
+
s.add_dependency(%q<unicode-block>, [">= 1.1.0"])
|
54
57
|
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
55
58
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
56
59
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
data/lib/jelegante.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
#!/bin/env ruby
|
2
2
|
#-*- coding: utf-8 -*-
|
3
|
+
require("rubygems");
|
4
|
+
require("unicode-block");
|
3
5
|
|
4
6
|
class Jelegante
|
5
|
-
|
7
|
+
JYOUYOU_KANJI_LIST = %w[
|
6
8
|
亜 哀 愛 悪 握 圧 扱 安 暗 案 以 位 依 偉 囲 委 威 尉 意 慰 易 為 異 移 維
|
7
9
|
緯 胃 衣 違 遺 医 井 域 育 一 壱 逸 稲 芋 印 員 因 姻 引 飲 院 陰 隠 韻 右
|
8
10
|
宇 羽 雨 渦 浦 運 雲 営 影 映 栄 永 泳 英 衛 詠 鋭 液 疫 益 駅 悦 謁 越 閲
|
@@ -83,7 +85,7 @@ class Jelegante
|
|
83
85
|
路 露 労 廊 朗 楼 浪 漏 老 郎 六 録 論 和 話 賄 惑 枠 湾 腕
|
84
86
|
].sort();
|
85
87
|
|
86
|
-
|
88
|
+
JINMEI_KANJI_LIST = %w[
|
87
89
|
丑 丞 串 乃 之 乎 也 云 亘 亙 些 亦 亥 亨 亮 仔 伊 伎 伍 伽 佃 佑 伶 侃 侑
|
88
90
|
俄 俠 俣 俐 侶 倭 俺 俱 倦 倖 偲 僅 傭 儲 允 兎 兜 其 冥 冴 冶 凄 凌 凜 凛
|
89
91
|
凧 凪 凰 凱 函 刹 劉 劫 勁 勃 勾 匂 勿 匡 廿 卜 卯 卿 厨 厩 叉 叡 叢 叶 只
|
@@ -126,7 +128,7 @@ class Jelegante
|
|
126
128
|
類 禮 曆 歷 練 鍊 郞 朗 廊 錄
|
127
129
|
].sort();
|
128
130
|
|
129
|
-
|
131
|
+
HYOUGAI_KANJI_LIST = %w[
|
130
132
|
啞 唖 蛙 鴉 埃 挨 曖 靄 軋 斡 按 庵 鞍 闇 已 夷 畏 韋 帷 萎 椅 葦 彙 飴 謂
|
131
133
|
閾 溢 鰯 尹 咽 殷 淫 隕 蔭 于 迂 盂 烏 鬱 云 暈 穢 曳 洩 裔 穎 頴 嬰 翳 腋
|
132
134
|
曰 奄 宛 怨 俺 冤 袁 婉 焉 堰 淵 焰 筵 厭 鳶 燕 閻 嚥 嗚 凰 嘔 鴨 甕 襖 謳
|
@@ -173,33 +175,33 @@ class Jelegante
|
|
173
175
|
幷 桝 枡 麺 麵 沪 濾 芦 蘆 蝋 蠟 弯 彎
|
174
176
|
].sort();
|
175
177
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
178
|
+
HIRAGANA_PROP = "\\p{Hiragana}";
|
179
|
+
KATAKANA_PROP = "\\p{Katakana}";
|
180
|
+
KANJI_PROP = "#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_PROP}#{UnicodeBlock::CJK_COMPATIBILITY_IDEOGRAPHS_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_PROP}#{UnicodeBlock::CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_PROP}";
|
181
|
+
JYOUYOU_PROP = "#{JYOUYOU_KANJI_LIST.join('')}";
|
182
|
+
JINMEI_PROP = "#{JINMEI_KANJI_LIST.join('')}";
|
183
|
+
HYOUGAI_PROP = "#{HYOUGAI_KANJI_LIST.join('')}";
|
184
|
+
POPULAR_KANJI_PROP = "#{JYOUYOU_PROP}#{JINMEI_PROP}#{HYOUGAI_PROP}"
|
183
185
|
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
186
|
+
HIRAGANA_REGEX = /\p{Hiragana}/;
|
187
|
+
KATAKANA_REGEX = /\p{Katakana}/;
|
188
|
+
KANJI_REGEX = /[#{KANJI_PROP}]/
|
189
|
+
POPULAR_KANJI_REGEX = /[#{POPULAR_KANJI_PROP}]/;
|
188
190
|
|
189
|
-
|
190
|
-
|
191
|
+
NO_KANJI_REGEX = /[^#{KANJI_PROP}]/;
|
192
|
+
NO_POPULAR_KANJI_REGEX = /[^#{POPULAR_KANJI_PROP}]/;
|
191
193
|
|
192
194
|
def self.include_hiragana?(text)
|
193
|
-
return !(
|
195
|
+
return !(HIRAGANA_REGEX.match(text).nil?);
|
194
196
|
end
|
195
197
|
def self.include_katakana?(text)
|
196
|
-
return !(
|
198
|
+
return !(KATAKANA_REGEX.match(text).nil?);
|
197
199
|
end
|
198
200
|
def self.include_kanji?(text)
|
199
|
-
return !(
|
201
|
+
return !(KANJI_REGEX.match(text).nil?);
|
200
202
|
end
|
201
203
|
def self.include_popular_kanji?(text)
|
202
|
-
return !(
|
204
|
+
return !(POPULAR_KANJI_REGEX.match(text).nil?);
|
203
205
|
end
|
204
206
|
|
205
207
|
# 簡易日本語判定
|
@@ -207,7 +209,7 @@ class Jelegante
|
|
207
209
|
# ・漢字を含むが、常用漢字・人名漢字・表外漢字以外の漢字を含まない
|
208
210
|
def self.japanese?(text)
|
209
211
|
return true if (self.include_hiragana?(text) || self.include_katakana?(text));
|
210
|
-
return true if (text.gsub(
|
212
|
+
return true if (text.gsub(NO_KANJI_REGEX,"") =~ /^[#{POPULAR_KANJI_PROP}]+$/);
|
211
213
|
return false;
|
212
214
|
end
|
213
215
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: jelegante
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.0
|
5
|
+
version: 1.1.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- lpm11
|
@@ -10,11 +10,22 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-10-
|
13
|
+
date: 2011-10-17 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
16
|
+
name: unicode-block
|
17
17
|
requirement: &id001 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.1.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *id001
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: rspec
|
28
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
18
29
|
none: false
|
19
30
|
requirements:
|
20
31
|
- - ~>
|
@@ -22,10 +33,10 @@ dependencies:
|
|
22
33
|
version: 2.3.0
|
23
34
|
type: :development
|
24
35
|
prerelease: false
|
25
|
-
version_requirements: *
|
36
|
+
version_requirements: *id002
|
26
37
|
- !ruby/object:Gem::Dependency
|
27
38
|
name: bundler
|
28
|
-
requirement: &
|
39
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
29
40
|
none: false
|
30
41
|
requirements:
|
31
42
|
- - ~>
|
@@ -33,10 +44,10 @@ dependencies:
|
|
33
44
|
version: 1.0.0
|
34
45
|
type: :development
|
35
46
|
prerelease: false
|
36
|
-
version_requirements: *
|
47
|
+
version_requirements: *id003
|
37
48
|
- !ruby/object:Gem::Dependency
|
38
49
|
name: jeweler
|
39
|
-
requirement: &
|
50
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
40
51
|
none: false
|
41
52
|
requirements:
|
42
53
|
- - ~>
|
@@ -44,10 +55,10 @@ dependencies:
|
|
44
55
|
version: 1.6.4
|
45
56
|
type: :development
|
46
57
|
prerelease: false
|
47
|
-
version_requirements: *
|
58
|
+
version_requirements: *id004
|
48
59
|
- !ruby/object:Gem::Dependency
|
49
60
|
name: rcov
|
50
|
-
requirement: &
|
61
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
51
62
|
none: false
|
52
63
|
requirements:
|
53
64
|
- - ">="
|
@@ -55,7 +66,7 @@ dependencies:
|
|
55
66
|
version: "0"
|
56
67
|
type: :development
|
57
68
|
prerelease: false
|
58
|
-
version_requirements: *
|
69
|
+
version_requirements: *id005
|
59
70
|
description: Guess whether text is japanese or not heuristically.
|
60
71
|
email: lpm11r@gmail.com
|
61
72
|
executables: []
|
@@ -91,7 +102,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
91
102
|
requirements:
|
92
103
|
- - ">="
|
93
104
|
- !ruby/object:Gem::Version
|
94
|
-
hash:
|
105
|
+
hash: 127203842517963343
|
95
106
|
segments:
|
96
107
|
- 0
|
97
108
|
version: "0"
|