jelegante 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/VERSION +1 -1
- data/jelegante.gemspec +5 -2
- data/lib/jelegante.rb +23 -21
- metadata +22 -11
data/Gemfile
CHANGED
@@ -2,6 +2,7 @@ source "http://rubygems.org"
|
|
2
2
|
# Add dependencies required to use your gem here.
|
3
3
|
# Example:
|
4
4
|
# gem "activesupport", ">= 2.3.5"
|
5
|
+
gem "unicode-block", ">= 1.1.0"
|
5
6
|
|
6
7
|
# Add dependencies to develop your gem here.
|
7
8
|
# Include everything needed to run rake, tests, features, etc.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0
|
1
|
+
1.1.0
|
data/jelegante.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "jelegante"
|
8
|
-
s.version = "1.0
|
8
|
+
s.version = "1.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["lpm11"]
|
12
|
-
s.date = "2011-10-
|
12
|
+
s.date = "2011-10-17"
|
13
13
|
s.description = "Guess whether text is japanese or not heuristically."
|
14
14
|
s.email = "lpm11r@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -40,17 +40,20 @@ Gem::Specification.new do |s|
|
|
40
40
|
s.specification_version = 3
|
41
41
|
|
42
42
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
43
|
+
s.add_runtime_dependency(%q<unicode-block>, [">= 1.1.0"])
|
43
44
|
s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
|
44
45
|
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
45
46
|
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
46
47
|
s.add_development_dependency(%q<rcov>, [">= 0"])
|
47
48
|
else
|
49
|
+
s.add_dependency(%q<unicode-block>, [">= 1.1.0"])
|
48
50
|
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
49
51
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
50
52
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
51
53
|
s.add_dependency(%q<rcov>, [">= 0"])
|
52
54
|
end
|
53
55
|
else
|
56
|
+
s.add_dependency(%q<unicode-block>, [">= 1.1.0"])
|
54
57
|
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
55
58
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
56
59
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
data/lib/jelegante.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
#!/bin/env ruby
|
2
2
|
#-*- coding: utf-8 -*-
|
3
|
+
require("rubygems");
|
4
|
+
require("unicode-block");
|
3
5
|
|
4
6
|
class Jelegante
|
5
|
-
|
7
|
+
JYOUYOU_KANJI_LIST = %w[
|
6
8
|
亜 哀 愛 悪 握 圧 扱 安 暗 案 以 位 依 偉 囲 委 威 尉 意 慰 易 為 異 移 維
|
7
9
|
緯 胃 衣 違 遺 医 井 域 育 一 壱 逸 稲 芋 印 員 因 姻 引 飲 院 陰 隠 韻 右
|
8
10
|
宇 羽 雨 渦 浦 運 雲 営 影 映 栄 永 泳 英 衛 詠 鋭 液 疫 益 駅 悦 謁 越 閲
|
@@ -83,7 +85,7 @@ class Jelegante
|
|
83
85
|
路 露 労 廊 朗 楼 浪 漏 老 郎 六 録 論 和 話 賄 惑 枠 湾 腕
|
84
86
|
].sort();
|
85
87
|
|
86
|
-
|
88
|
+
JINMEI_KANJI_LIST = %w[
|
87
89
|
丑 丞 串 乃 之 乎 也 云 亘 亙 些 亦 亥 亨 亮 仔 伊 伎 伍 伽 佃 佑 伶 侃 侑
|
88
90
|
俄 俠 俣 俐 侶 倭 俺 俱 倦 倖 偲 僅 傭 儲 允 兎 兜 其 冥 冴 冶 凄 凌 凜 凛
|
89
91
|
凧 凪 凰 凱 函 刹 劉 劫 勁 勃 勾 匂 勿 匡 廿 卜 卯 卿 厨 厩 叉 叡 叢 叶 只
|
@@ -126,7 +128,7 @@ class Jelegante
|
|
126
128
|
類 禮 曆 歷 練 鍊 郞 朗 廊 錄
|
127
129
|
].sort();
|
128
130
|
|
129
|
-
|
131
|
+
HYOUGAI_KANJI_LIST = %w[
|
130
132
|
啞 唖 蛙 鴉 埃 挨 曖 靄 軋 斡 按 庵 鞍 闇 已 夷 畏 韋 帷 萎 椅 葦 彙 飴 謂
|
131
133
|
閾 溢 鰯 尹 咽 殷 淫 隕 蔭 于 迂 盂 烏 鬱 云 暈 穢 曳 洩 裔 穎 頴 嬰 翳 腋
|
132
134
|
曰 奄 宛 怨 俺 冤 袁 婉 焉 堰 淵 焰 筵 厭 鳶 燕 閻 嚥 嗚 凰 嘔 鴨 甕 襖 謳
|
@@ -173,33 +175,33 @@ class Jelegante
|
|
173
175
|
幷 桝 枡 麺 麵 沪 濾 芦 蘆 蝋 蠟 弯 彎
|
174
176
|
].sort();
|
175
177
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
178
|
+
HIRAGANA_PROP = "\\p{Hiragana}";
|
179
|
+
KATAKANA_PROP = "\\p{Katakana}";
|
180
|
+
KANJI_PROP = "#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_PROP}#{UnicodeBlock::CJK_COMPATIBILITY_IDEOGRAPHS_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_PROP}#{UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_PROP}#{UnicodeBlock::CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_PROP}";
|
181
|
+
JYOUYOU_PROP = "#{JYOUYOU_KANJI_LIST.join('')}";
|
182
|
+
JINMEI_PROP = "#{JINMEI_KANJI_LIST.join('')}";
|
183
|
+
HYOUGAI_PROP = "#{HYOUGAI_KANJI_LIST.join('')}";
|
184
|
+
POPULAR_KANJI_PROP = "#{JYOUYOU_PROP}#{JINMEI_PROP}#{HYOUGAI_PROP}"
|
183
185
|
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
186
|
+
HIRAGANA_REGEX = /\p{Hiragana}/;
|
187
|
+
KATAKANA_REGEX = /\p{Katakana}/;
|
188
|
+
KANJI_REGEX = /[#{KANJI_PROP}]/
|
189
|
+
POPULAR_KANJI_REGEX = /[#{POPULAR_KANJI_PROP}]/;
|
188
190
|
|
189
|
-
|
190
|
-
|
191
|
+
NO_KANJI_REGEX = /[^#{KANJI_PROP}]/;
|
192
|
+
NO_POPULAR_KANJI_REGEX = /[^#{POPULAR_KANJI_PROP}]/;
|
191
193
|
|
192
194
|
def self.include_hiragana?(text)
|
193
|
-
return !(
|
195
|
+
return !(HIRAGANA_REGEX.match(text).nil?);
|
194
196
|
end
|
195
197
|
def self.include_katakana?(text)
|
196
|
-
return !(
|
198
|
+
return !(KATAKANA_REGEX.match(text).nil?);
|
197
199
|
end
|
198
200
|
def self.include_kanji?(text)
|
199
|
-
return !(
|
201
|
+
return !(KANJI_REGEX.match(text).nil?);
|
200
202
|
end
|
201
203
|
def self.include_popular_kanji?(text)
|
202
|
-
return !(
|
204
|
+
return !(POPULAR_KANJI_REGEX.match(text).nil?);
|
203
205
|
end
|
204
206
|
|
205
207
|
# 簡易日本語判定
|
@@ -207,7 +209,7 @@ class Jelegante
|
|
207
209
|
# ・漢字を含むが、常用漢字・人名漢字・表外漢字以外の漢字を含まない
|
208
210
|
def self.japanese?(text)
|
209
211
|
return true if (self.include_hiragana?(text) || self.include_katakana?(text));
|
210
|
-
return true if (text.gsub(
|
212
|
+
return true if (text.gsub(NO_KANJI_REGEX,"") =~ /^[#{POPULAR_KANJI_PROP}]+$/);
|
211
213
|
return false;
|
212
214
|
end
|
213
215
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: jelegante
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.0
|
5
|
+
version: 1.1.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- lpm11
|
@@ -10,11 +10,22 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-10-
|
13
|
+
date: 2011-10-17 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
16
|
+
name: unicode-block
|
17
17
|
requirement: &id001 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.1.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *id001
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: rspec
|
28
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
18
29
|
none: false
|
19
30
|
requirements:
|
20
31
|
- - ~>
|
@@ -22,10 +33,10 @@ dependencies:
|
|
22
33
|
version: 2.3.0
|
23
34
|
type: :development
|
24
35
|
prerelease: false
|
25
|
-
version_requirements: *
|
36
|
+
version_requirements: *id002
|
26
37
|
- !ruby/object:Gem::Dependency
|
27
38
|
name: bundler
|
28
|
-
requirement: &
|
39
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
29
40
|
none: false
|
30
41
|
requirements:
|
31
42
|
- - ~>
|
@@ -33,10 +44,10 @@ dependencies:
|
|
33
44
|
version: 1.0.0
|
34
45
|
type: :development
|
35
46
|
prerelease: false
|
36
|
-
version_requirements: *
|
47
|
+
version_requirements: *id003
|
37
48
|
- !ruby/object:Gem::Dependency
|
38
49
|
name: jeweler
|
39
|
-
requirement: &
|
50
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
40
51
|
none: false
|
41
52
|
requirements:
|
42
53
|
- - ~>
|
@@ -44,10 +55,10 @@ dependencies:
|
|
44
55
|
version: 1.6.4
|
45
56
|
type: :development
|
46
57
|
prerelease: false
|
47
|
-
version_requirements: *
|
58
|
+
version_requirements: *id004
|
48
59
|
- !ruby/object:Gem::Dependency
|
49
60
|
name: rcov
|
50
|
-
requirement: &
|
61
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
51
62
|
none: false
|
52
63
|
requirements:
|
53
64
|
- - ">="
|
@@ -55,7 +66,7 @@ dependencies:
|
|
55
66
|
version: "0"
|
56
67
|
type: :development
|
57
68
|
prerelease: false
|
58
|
-
version_requirements: *
|
69
|
+
version_requirements: *id005
|
59
70
|
description: Guess whether text is japanese or not heuristically.
|
60
71
|
email: lpm11r@gmail.com
|
61
72
|
executables: []
|
@@ -91,7 +102,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
91
102
|
requirements:
|
92
103
|
- - ">="
|
93
104
|
- !ruby/object:Gem::Version
|
94
|
-
hash:
|
105
|
+
hash: 127203842517963343
|
95
106
|
segments:
|
96
107
|
- 0
|
97
108
|
version: "0"
|