rseg 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.5
1
+ 0.1.6
@@ -1,49 +1,51 @@
1
- class Dict < Engine
2
- @@root = nil
3
- @@dict_path = File.join(File.dirname(__FILE__), '../../dict/dict.hash')
1
+ module RsegEngine
2
+ class Dict < Engine
3
+ @@root = nil
4
+ @@dict_path = File.join(File.dirname(__FILE__), '../../dict/dict.hash')
4
5
 
5
- class << self
6
- def dict_path=(path)
7
- @@dict_path = path
8
- end
6
+ class << self
7
+ def dict_path=(path)
8
+ @@dict_path = path
9
+ end
9
10
 
10
- def dict_path
11
- @@dict_path
11
+ def dict_path
12
+ @@dict_path
13
+ end
12
14
  end
13
- end
14
15
 
15
- def initialize
16
- @@root ||= load_dict(@@dict_path)
17
- @word = ''
18
- @node = @@root
19
- super
20
- end
16
+ def initialize
17
+ @@root ||= load_dict(@@dict_path)
18
+ @word = ''
19
+ @node = @@root
20
+ super
21
+ end
21
22
 
22
- def process(char)
23
- match = false
24
- word = nil
23
+ def process(char)
24
+ match = false
25
+ word = nil
25
26
 
26
- if @node[char]
27
- @word << char
28
- @node = @node[char]
29
- match = true
30
- else
31
- if @node[:end] || @word.chars.to_a.length == 1
32
- word = @word
27
+ if @node[char]
28
+ @word << char
29
+ @node = @node[char]
30
+ match = true
33
31
  else
34
- word = @word.chars.to_a
35
- end
32
+ if @node[:end] || @word.chars.to_a.length == 1
33
+ word = @word
34
+ else
35
+ word = @word.chars.to_a
36
+ end
36
37
 
37
- @node = @@root
38
- @word = ''
39
- match = false
40
- end
38
+ @node = @@root
39
+ @word = ''
40
+ match = false
41
+ end
41
42
 
42
- [match, word]
43
- end
43
+ [match, word]
44
+ end
44
45
 
45
- private
46
- def load_dict(path)
47
- File.open(path, "rb") {|io| Marshal.load(io)}
46
+ private
47
+ def load_dict(path)
48
+ File.open(path, "rb") {|io| Marshal.load(io)}
49
+ end
48
50
  end
49
51
  end
@@ -1,17 +1,19 @@
1
- class Engine
2
- def initialize
3
- @running = true
4
- end
1
+ module RsegEngine
2
+ class Engine
3
+ def initialize
4
+ @running = true
5
+ end
5
6
 
6
- def stop
7
- @running = false
8
- end
7
+ def stop
8
+ @running = false
9
+ end
9
10
 
10
- def run
11
- @running = true
12
- end
11
+ def run
12
+ @running = true
13
+ end
13
14
 
14
- def running?
15
- @running
15
+ def running?
16
+ @running
17
+ end
16
18
  end
17
19
  end
@@ -1,24 +1,26 @@
1
- LETTER_SYMBOLS = ('a'..'z').to_a + ('A'..'Z').to_a
1
+ module RsegEngine
2
+ LETTER_SYMBOLS = ('a'..'z').to_a + ('A'..'Z').to_a
2
3
 
3
- class English < Engine
4
- def initialize
5
- @word = ''
6
- super
7
- end
8
-
9
- def process(char)
10
- match = false
11
- word = nil
12
-
13
- if LETTER_SYMBOLS.include?(char)
14
- @word << char
15
- match = true
16
- else
17
- word = @word
4
+ class English < Engine
5
+ def initialize
18
6
  @word = ''
19
- match = false
7
+ super
20
8
  end
9
+
10
+ def process(char)
11
+ match = false
12
+ word = nil
21
13
 
22
- [match, word]
23
- end
14
+ if LETTER_SYMBOLS.include?(char)
15
+ @word << char
16
+ match = true
17
+ else
18
+ word = @word
19
+ @word = ''
20
+ match = false
21
+ end
22
+
23
+ [match, word]
24
+ end
25
+ end
24
26
  end
@@ -1,49 +1,51 @@
1
- class Name < Engine
2
- @@last_names = %W(丁 卜 刁 七 弓 干 于 王 尤 孔 方 申 白 甘 田 包 石 左 平 司 皮 史 池 艾 年 匡 充 江 印
3
-
4
-
5
- 羿
6
- 晏 桑 高 凌 桂 容 姬 劳 桑 桂 袁 时 夏 凌 洪 翁 家 芮 乌 祖 索 贡
7
- 宿
8
- 邴 焦 班 甯 钭 景 邰 劳 茹 寇 荆
9
-
10
- 寿
11
- 广
12
- 衡 融 蒯 逯
13
-
14
-
15
- 怀 饶 顾 苏 龚 边 栾 权) #:nodoc:
1
+ module RsegEngine
2
+ class Name < Engine
3
+ @@last_names = %W(丁
4
+
5
+ 岳 东 林
6
+ 羿
7
+ 桂 袁 时 祝 席 徐 高 夏 凌 洪 翁 家 芮 乌 祖 索 贡
8
+ 宿
9
+ 钭 景 邰 劳 茹 寇 荆
10
+ 詹 闻 逄 雍 訾 郎 农 路 骆 虞 经 裘 郁 滑 靳 闻 逄 雍
11
+ 寿
12
+ 广
13
+
14
+ 应 储 糜 隗 历 蒲 慕 蔚 隆 鞠 关
15
+
16
+ 郑 严 蓟 薄 谭 罗 买 蓝 蓬 怀 党 饶 顾 苏 龚 边 栾 权) #:nodoc:
16
17
 
17
- @@first_names = %W(文 铭 菁 郁 怡 智 德 祥 志 华 孟 庆 雅 佩 晓 蓉 明 仁 宇 青 慧 豪 琪 安
18
- 惠 宗 信 盈 君 秀 敏 伶 佳 国 荣 忠 宏 育 丽 圣 淑 彦 龙 冠 后 静 娟 子
19
- 嘉 瑞 柏 弘 芳 正 玮 贞 如 凯 元 士 伟 杰 颍 霖 玲 仪 珮 英 建 政 真 珍
20
- 美 世 立 秋 婷 贤 瑜 中 玉 维 莹 翔 家 芬 昌 裕 雯 萍 永 成 宜 鸿 珊 民
21
- 欣 哲 良 伦 燕 梦 磊 丹 元 一 昌 红 健) #:nodoc:
22
- def initialize
23
- @word = ''
24
- @last = false
25
- super
26
- end
27
-
28
- def process(char)
29
- match = false
30
- word = nil
31
-
32
- if !@last && @@last_names.include?(char)
33
- @word << char
34
- match = true
35
- @last = true
36
- elsif @last && @word.chars.to_a.length < 3 && @@first_names.include?(char)
37
- @word << char
38
- match = true
39
- @unit = true
40
- else
41
- word = @word
18
+ @@first_names = %W(文 铭 菁 郁 怡 智 德 祥 志 华 孟 庆 雅 佩 晓 蓉 明 仁 宇 青 慧 豪 琪 安
19
+ 惠 宗 信 盈 君 秀 敏 伶 佳 国 荣 忠 宏 育 丽 圣 淑 彦 龙 冠 后 静 娟 子
20
+ 嘉 瑞 柏 弘 芳 正 玮 贞 如 凯 元 士 伟 杰 颍 霖 玲 仪 珮 英 建 政 真 珍
21
+ 美 世 立 秋 婷 贤 瑜 中 玉 维 莹 翔 家 芬 昌 裕 雯 萍 永 成 宜 鸿 珊 民
22
+ 欣 哲 良 伦 燕 梦 磊 丹 元 一 昌 红 健) #:nodoc:
23
+ def initialize
42
24
  @word = ''
43
25
  @last = false
44
- match = false
26
+ super
45
27
  end
28
+
29
+ def process(char)
30
+ match = false
31
+ word = nil
32
+
33
+ if !@last && @@last_names.include?(char)
34
+ @word << char
35
+ match = true
36
+ @last = true
37
+ elsif @last && @word.chars.to_a.length < 3 && @@first_names.include?(char)
38
+ @word << char
39
+ match = true
40
+ @unit = true
41
+ else
42
+ word = @word
43
+ @word = ''
44
+ @last = false
45
+ match = false
46
+ end
46
47
 
47
- [match, word]
48
+ [match, word]
49
+ end
48
50
  end
49
51
  end
@@ -1,59 +1,61 @@
1
- class Number < Engine
2
- @@number_symbols = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
3
- '', '', '', '', '', '', '', '', '', '',
4
- '', '', '', '', '', '', '', '', '', '',
5
- '', '', '', '%', '¥', '', '$', '.', '', '', '每']
6
- @@subunit_symbols = ['', '', '', '', '', '', '', '', '', '',
7
- '', '', '', '', '', '', '', '', '', '',
8
- '', '', '', '', '', '', '', '亿', '', '']
9
- @@unit_symbols = ['', '', '', '', '', '', '', '', '', '',
10
- '', '', '', '', '', '', '', '', '', '',
11
- '', '', '', '', '', '', '', '', '', '',
12
- '', '', '', '', '', '', '', '', '', '',
13
- '', '', '', '', '', '', '', '', '', '亿',
14
- '', '', '', '', '', '', '', '', '', '',
15
- '', '', '', '']
1
+ module RsegEngine
2
+ class Number < Engine
3
+ @@number_symbols = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
4
+ '', '', '', '', '', '', '', '', '', '',
5
+ '', '', '', '', '', '', '', '', '', '',
6
+ '', '', '', '%', '¥', '', '$', '.', '', '', '每']
7
+ @@subunit_symbols = ['', '', '', '', '', '', '', '', '', '',
8
+ '', '', '', '', '', '', '', '', '', '',
9
+ '', '', '', '', '', '', '', '亿', '', '']
10
+ @@unit_symbols = ['', '', '', '', '', '', '', '', '', '',
11
+ '', '', '', '', '', '', '', '', '', '',
12
+ '', '', '', '', '', '', '', '', '', '',
13
+ '', '', '', '', '', '', '', '', '', '',
14
+ '', '', '', '', '', '', '', '', '', '亿',
15
+ '', '', '', '', '岁', '家', '所', '期', '场', '投',
16
+ '中', '辆', '只', '头']
16
17
 
17
- def initialize
18
- @word = ''
19
- @number = ''
20
- @unit = false
21
- @subunit = false
22
- super
23
- end
24
-
25
- def process(char)
26
- match = false
27
- word = nil
28
-
29
- if (!@subunit || @unit) && @@number_symbols.include?(char)
30
- @number << char
31
- match = true
32
- @unit = false
33
- @subunit = false
34
- elsif (@number != '' || @unit) && @@subunit_symbols.include?(char)
35
- @number << char
36
- match = true
37
- @subunit = true
38
- end
39
-
40
- if (@number != '' || @subunit) && @@unit_symbols.include?(char)
41
- @word << @number
42
- @word << char if !match
43
- @number = ''
44
- @unit = true
45
- match = true
46
- end
47
-
48
- if !match
49
- word = (@word != '') ? @word : @number
18
+ def initialize
50
19
  @word = ''
51
20
  @number = ''
52
- match = false
53
21
  @unit = false
54
22
  @subunit = false
23
+ super
55
24
  end
25
+
26
+ def process(char)
27
+ match = false
28
+ word = nil
56
29
 
57
- [match, word]
30
+ if (!@subunit || @unit) && @@number_symbols.include?(char)
31
+ @number << char
32
+ match = true
33
+ @unit = false
34
+ @subunit = false
35
+ elsif (@number != '' || @unit) && @@subunit_symbols.include?(char)
36
+ @number << char
37
+ match = true
38
+ @subunit = true
39
+ end
40
+
41
+ if (@number != '' || @subunit) && @@unit_symbols.include?(char)
42
+ @word << @number
43
+ @word << char if !match
44
+ @number = ''
45
+ @unit = true
46
+ match = true
47
+ end
48
+
49
+ if !match
50
+ word = (@word != '') ? @word : @number
51
+ @word = ''
52
+ @number = ''
53
+ match = false
54
+ @unit = false
55
+ @subunit = false
56
+ end
57
+
58
+ [match, word]
59
+ end
58
60
  end
59
61
  end
@@ -1,7 +1,9 @@
1
- class Conjunction
2
- @@conjunctions = %W(给 的 说 对 在 和 是 被 最 所 那 由 这 有 将 你 会 与 他 为 不 没 很 了 啊 哦 呵 把 去 从)
1
+ module RsegFilter
2
+ class Conjunction
3
+ @@conjunctions = %W(给 的 说 对 在 和 是 被 最 所 那 由 这 有 将 你 会 与 他 为 不 没 很 了 啊 哦 呵 把 去 从)
3
4
 
4
- def self.filter(char)
5
- @@conjunctions.include?(char) ? :conjunction : char
5
+ def self.filter(char)
6
+ @@conjunctions.include?(char) ? :conjunction : char
7
+ end
6
8
  end
7
9
  end
@@ -1,17 +1,19 @@
1
- class Fullwidth
2
- @@fullwidth_chars = {'1' => '1', '2' => '2', '3' => '3', '4' => '4', '5' => '5', '6' => '6', '7' => '7', '8' => '8',
3
- '' => '9', '' => '0', '' => 'a', '' => 'b', '' => 'c', '' => 'd', '' => 'e', '' => 'f',
4
- '' => 'g', '' => 'h', '' => 'i', '' => 'j', '' => 'k', '' => 'l', '' => 'm', '' => 'n',
5
- '' => 'o', '' => 'p', '' => 'q', '' => 'r', '' => 's', '' => 't', '' => 'u', '' => 'v',
6
- '' => 'w', '' => 'x', '' => 'y', '' => 'z', '' => 'A', '' => 'B', '' => 'C', '' => 'D',
7
- '' => 'E', '' => 'F', '' => 'G', '' => 'H', '' => 'I', '' => 'J', '' => 'K', '' => 'L',
8
- '' => 'M', '' => 'N', '' => 'O', '' => 'P', '' => 'Q', '' => 'R', '' => 'S', '' => 'T',
9
- '' => 'U', '' => 'V', '' => 'W', '' => 'X', '' => 'Y', '' => 'Z', '' => '-', '' => '+',
10
- '' => '-', '' => ',', '' => '/', '·' => '.'}
1
+ module RsegFilter
2
+ class Fullwidth
3
+ @@fullwidth_chars = {'' => '1', '' => '2', '' => '3', '' => '4', '' => '5', '' => '6', '' => '7', '' => '8',
4
+ '' => '9', '' => '0', '' => 'a', '' => 'b', '' => 'c', '' => 'd', '' => 'e', '' => 'f',
5
+ '' => 'g', '' => 'h', '' => 'i', '' => 'j', '' => 'k', '' => 'l', '' => 'm', '' => 'n',
6
+ '' => 'o', '' => 'p', '' => 'q', '' => 'r', '' => 's', '' => 't', '' => 'u', '' => 'v',
7
+ '' => 'w', '' => 'x', '' => 'y', '' => 'z', '' => 'A', '' => 'B', '' => 'C', '' => 'D',
8
+ '' => 'E', '' => 'F', '' => 'G', '' => 'H', '' => 'I', '' => 'J', '' => 'K', '' => 'L',
9
+ '' => 'M', '' => 'N', '' => 'O', '' => 'P', '' => 'Q', '' => 'R', '' => 'S', '' => 'T',
10
+ '' => 'U', '' => 'V', 'W' => 'W', '' => 'X', '' => 'Y', 'Z' => 'Z', '-' => '-', '+' => '+',
11
+ '—' => '-', ',' => ',', '/' => '/', '·' => '.'}
11
12
 
12
- class << self
13
- def filter(char)
14
- @@fullwidth_chars[char].nil? ? char : @@fullwidth_chars[char]
13
+ class << self
14
+ def filter(char)
15
+ @@fullwidth_chars[char].nil? ? char : @@fullwidth_chars[char]
16
+ end
15
17
  end
16
18
  end
17
19
  end
@@ -1,12 +1,14 @@
1
- class Symbol
2
- @@separators = ['`', '[', ']', '、', '=', '‘', ';', '。', '|', '?', '》',
3
- '', '', '', '', '', '', '', '', '', '', '',
4
- '', '', '', '', '', '', '', '', '', '', '', '〗',
5
- '', '', '', '', '`', '~', '!', '@', '#', '^',
6
- '&', '*', '\\', '(', ')', '=', '{', '}', '[', ']',
7
- '|', ';', ':', "'", '<', '>', '?', "\n", "\t", "\r",
8
- ' ', '-', '/', '+', ',', ' ']
9
- def self.filter(char)
10
- @@separators.include?(char) ? :symbol : char
1
+ module RsegFilter
2
+ class Symbol
3
+ @@separators = ['', '', '', '', '', '', '', '', '', '', '',
4
+ '', '', '', '', '', '', '', '', '', '', '',
5
+ '', '', '', '', '', '', '', '', '', '', '〖', '〗',
6
+ '', '', '', '', '`', '~', '!', '@', '#', '^',
7
+ '&', '*', '\\', '(', ')', '=', '{', '}', '[', ']',
8
+ '|', ';', ':', "'", '<', '>', '?', "\n", "\t", "\r",
9
+ ' ', '-', '/', '+', ',', ' ']
10
+ def self.filter(char)
11
+ @@separators.include?(char) ? :symbol : char
12
+ end
11
13
  end
12
14
  end
@@ -15,8 +15,14 @@ require File.join(File.dirname(__FILE__), 'filters/conjunction')
15
15
 
16
16
  class Rseg
17
17
  include Singleton
18
+ include RsegEngine
19
+ include RsegFilter
18
20
 
19
21
  class << self
22
+ def dict_path=(path)
23
+ RsegEngine::Dict.dict_path = path
24
+ end
25
+
20
26
  def segment(input)
21
27
  Rseg.instance.input = input
22
28
  Rseg.instance.segment
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rseg
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 5
10
- version: 0.1.5
9
+ - 6
10
+ version: 0.1.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - Yuanyi Zhang