rseg 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.5
1
+ 0.1.6
@@ -1,49 +1,51 @@
1
- class Dict < Engine
2
- @@root = nil
3
- @@dict_path = File.join(File.dirname(__FILE__), '../../dict/dict.hash')
1
+ module RsegEngine
2
+ class Dict < Engine
3
+ @@root = nil
4
+ @@dict_path = File.join(File.dirname(__FILE__), '../../dict/dict.hash')
4
5
 
5
- class << self
6
- def dict_path=(path)
7
- @@dict_path = path
8
- end
6
+ class << self
7
+ def dict_path=(path)
8
+ @@dict_path = path
9
+ end
9
10
 
10
- def dict_path
11
- @@dict_path
11
+ def dict_path
12
+ @@dict_path
13
+ end
12
14
  end
13
- end
14
15
 
15
- def initialize
16
- @@root ||= load_dict(@@dict_path)
17
- @word = ''
18
- @node = @@root
19
- super
20
- end
16
+ def initialize
17
+ @@root ||= load_dict(@@dict_path)
18
+ @word = ''
19
+ @node = @@root
20
+ super
21
+ end
21
22
 
22
- def process(char)
23
- match = false
24
- word = nil
23
+ def process(char)
24
+ match = false
25
+ word = nil
25
26
 
26
- if @node[char]
27
- @word << char
28
- @node = @node[char]
29
- match = true
30
- else
31
- if @node[:end] || @word.chars.to_a.length == 1
32
- word = @word
27
+ if @node[char]
28
+ @word << char
29
+ @node = @node[char]
30
+ match = true
33
31
  else
34
- word = @word.chars.to_a
35
- end
32
+ if @node[:end] || @word.chars.to_a.length == 1
33
+ word = @word
34
+ else
35
+ word = @word.chars.to_a
36
+ end
36
37
 
37
- @node = @@root
38
- @word = ''
39
- match = false
40
- end
38
+ @node = @@root
39
+ @word = ''
40
+ match = false
41
+ end
41
42
 
42
- [match, word]
43
- end
43
+ [match, word]
44
+ end
44
45
 
45
- private
46
- def load_dict(path)
47
- File.open(path, "rb") {|io| Marshal.load(io)}
46
+ private
47
+ def load_dict(path)
48
+ File.open(path, "rb") {|io| Marshal.load(io)}
49
+ end
48
50
  end
49
51
  end
@@ -1,17 +1,19 @@
1
- class Engine
2
- def initialize
3
- @running = true
4
- end
1
+ module RsegEngine
2
+ class Engine
3
+ def initialize
4
+ @running = true
5
+ end
5
6
 
6
- def stop
7
- @running = false
8
- end
7
+ def stop
8
+ @running = false
9
+ end
9
10
 
10
- def run
11
- @running = true
12
- end
11
+ def run
12
+ @running = true
13
+ end
13
14
 
14
- def running?
15
- @running
15
+ def running?
16
+ @running
17
+ end
16
18
  end
17
19
  end
@@ -1,24 +1,26 @@
1
- LETTER_SYMBOLS = ('a'..'z').to_a + ('A'..'Z').to_a
1
+ module RsegEngine
2
+ LETTER_SYMBOLS = ('a'..'z').to_a + ('A'..'Z').to_a
2
3
 
3
- class English < Engine
4
- def initialize
5
- @word = ''
6
- super
7
- end
8
-
9
- def process(char)
10
- match = false
11
- word = nil
12
-
13
- if LETTER_SYMBOLS.include?(char)
14
- @word << char
15
- match = true
16
- else
17
- word = @word
4
+ class English < Engine
5
+ def initialize
18
6
  @word = ''
19
- match = false
7
+ super
20
8
  end
9
+
10
+ def process(char)
11
+ match = false
12
+ word = nil
21
13
 
22
- [match, word]
23
- end
14
+ if LETTER_SYMBOLS.include?(char)
15
+ @word << char
16
+ match = true
17
+ else
18
+ word = @word
19
+ @word = ''
20
+ match = false
21
+ end
22
+
23
+ [match, word]
24
+ end
25
+ end
24
26
  end
@@ -1,49 +1,51 @@
1
- class Name < Engine
2
- @@last_names = %W(丁 卜 刁 七 弓 干 于 王 尤 孔 方 申 白 甘 田 包 石 左 平 司 皮 史 池 艾 年 匡 充 江 印
3
-
4
-
5
- 羿
6
- 晏 桑 高 凌 桂 容 姬 劳 桑 桂 袁 时 夏 凌 洪 翁 家 芮 乌 祖 索 贡
7
- 宿
8
- 邴 焦 班 甯 钭 景 邰 劳 茹 寇 荆
9
-
10
- 寿
11
- 广
12
- 衡 融 蒯 逯
13
-
14
-
15
- 怀 饶 顾 苏 龚 边 栾 权) #:nodoc:
1
+ module RsegEngine
2
+ class Name < Engine
3
+ @@last_names = %W(丁
4
+
5
+ 岳 东 林
6
+ 羿
7
+ 桂 袁 时 祝 席 徐 高 夏 凌 洪 翁 家 芮 乌 祖 索 贡
8
+ 宿
9
+ 钭 景 邰 劳 茹 寇 荆
10
+ 詹 闻 逄 雍 訾 郎 农 路 骆 虞 经 裘 郁 滑 靳 闻 逄 雍
11
+ 寿
12
+ 广
13
+
14
+ 应 储 糜 隗 历 蒲 慕 蔚 隆 鞠 关
15
+
16
+ 郑 严 蓟 薄 谭 罗 买 蓝 蓬 怀 党 饶 顾 苏 龚 边 栾 权) #:nodoc:
16
17
 
17
- @@first_names = %W(文 铭 菁 郁 怡 智 德 祥 志 华 孟 庆 雅 佩 晓 蓉 明 仁 宇 青 慧 豪 琪 安
18
- 惠 宗 信 盈 君 秀 敏 伶 佳 国 荣 忠 宏 育 丽 圣 淑 彦 龙 冠 后 静 娟 子
19
- 嘉 瑞 柏 弘 芳 正 玮 贞 如 凯 元 士 伟 杰 颍 霖 玲 仪 珮 英 建 政 真 珍
20
- 美 世 立 秋 婷 贤 瑜 中 玉 维 莹 翔 家 芬 昌 裕 雯 萍 永 成 宜 鸿 珊 民
21
- 欣 哲 良 伦 燕 梦 磊 丹 元 一 昌 红 健) #:nodoc:
22
- def initialize
23
- @word = ''
24
- @last = false
25
- super
26
- end
27
-
28
- def process(char)
29
- match = false
30
- word = nil
31
-
32
- if !@last && @@last_names.include?(char)
33
- @word << char
34
- match = true
35
- @last = true
36
- elsif @last && @word.chars.to_a.length < 3 && @@first_names.include?(char)
37
- @word << char
38
- match = true
39
- @unit = true
40
- else
41
- word = @word
18
+ @@first_names = %W(文 铭 菁 郁 怡 智 德 祥 志 华 孟 庆 雅 佩 晓 蓉 明 仁 宇 青 慧 豪 琪 安
19
+ 惠 宗 信 盈 君 秀 敏 伶 佳 国 荣 忠 宏 育 丽 圣 淑 彦 龙 冠 后 静 娟 子
20
+ 嘉 瑞 柏 弘 芳 正 玮 贞 如 凯 元 士 伟 杰 颍 霖 玲 仪 珮 英 建 政 真 珍
21
+ 美 世 立 秋 婷 贤 瑜 中 玉 维 莹 翔 家 芬 昌 裕 雯 萍 永 成 宜 鸿 珊 民
22
+ 欣 哲 良 伦 燕 梦 磊 丹 元 一 昌 红 健) #:nodoc:
23
+ def initialize
42
24
  @word = ''
43
25
  @last = false
44
- match = false
26
+ super
45
27
  end
28
+
29
+ def process(char)
30
+ match = false
31
+ word = nil
32
+
33
+ if !@last && @@last_names.include?(char)
34
+ @word << char
35
+ match = true
36
+ @last = true
37
+ elsif @last && @word.chars.to_a.length < 3 && @@first_names.include?(char)
38
+ @word << char
39
+ match = true
40
+ @unit = true
41
+ else
42
+ word = @word
43
+ @word = ''
44
+ @last = false
45
+ match = false
46
+ end
46
47
 
47
- [match, word]
48
+ [match, word]
49
+ end
48
50
  end
49
51
  end
@@ -1,59 +1,61 @@
1
- class Number < Engine
2
- @@number_symbols = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
3
- '', '', '', '', '', '', '', '', '', '',
4
- '', '', '', '', '', '', '', '', '', '',
5
- '', '', '', '%', '¥', '', '$', '.', '', '', '每']
6
- @@subunit_symbols = ['', '', '', '', '', '', '', '', '', '',
7
- '', '', '', '', '', '', '', '', '', '',
8
- '', '', '', '', '', '', '', '亿', '', '']
9
- @@unit_symbols = ['', '', '', '', '', '', '', '', '', '',
10
- '', '', '', '', '', '', '', '', '', '',
11
- '', '', '', '', '', '', '', '', '', '',
12
- '', '', '', '', '', '', '', '', '', '',
13
- '', '', '', '', '', '', '', '', '', '亿',
14
- '', '', '', '', '', '', '', '', '', '',
15
- '', '', '', '']
1
+ module RsegEngine
2
+ class Number < Engine
3
+ @@number_symbols = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
4
+ '', '', '', '', '', '', '', '', '', '',
5
+ '', '', '', '', '', '', '', '', '', '',
6
+ '', '', '', '%', '¥', '', '$', '.', '', '', '每']
7
+ @@subunit_symbols = ['', '', '', '', '', '', '', '', '', '',
8
+ '', '', '', '', '', '', '', '', '', '',
9
+ '', '', '', '', '', '', '', '亿', '', '']
10
+ @@unit_symbols = ['', '', '', '', '', '', '', '', '', '',
11
+ '', '', '', '', '', '', '', '', '', '',
12
+ '', '', '', '', '', '', '', '', '', '',
13
+ '', '', '', '', '', '', '', '', '', '',
14
+ '', '', '', '', '', '', '', '', '', '亿',
15
+ '', '', '', '', '岁', '家', '所', '期', '场', '投',
16
+ '中', '辆', '只', '头']
16
17
 
17
- def initialize
18
- @word = ''
19
- @number = ''
20
- @unit = false
21
- @subunit = false
22
- super
23
- end
24
-
25
- def process(char)
26
- match = false
27
- word = nil
28
-
29
- if (!@subunit || @unit) && @@number_symbols.include?(char)
30
- @number << char
31
- match = true
32
- @unit = false
33
- @subunit = false
34
- elsif (@number != '' || @unit) && @@subunit_symbols.include?(char)
35
- @number << char
36
- match = true
37
- @subunit = true
38
- end
39
-
40
- if (@number != '' || @subunit) && @@unit_symbols.include?(char)
41
- @word << @number
42
- @word << char if !match
43
- @number = ''
44
- @unit = true
45
- match = true
46
- end
47
-
48
- if !match
49
- word = (@word != '') ? @word : @number
18
+ def initialize
50
19
  @word = ''
51
20
  @number = ''
52
- match = false
53
21
  @unit = false
54
22
  @subunit = false
23
+ super
55
24
  end
25
+
26
+ def process(char)
27
+ match = false
28
+ word = nil
56
29
 
57
- [match, word]
30
+ if (!@subunit || @unit) && @@number_symbols.include?(char)
31
+ @number << char
32
+ match = true
33
+ @unit = false
34
+ @subunit = false
35
+ elsif (@number != '' || @unit) && @@subunit_symbols.include?(char)
36
+ @number << char
37
+ match = true
38
+ @subunit = true
39
+ end
40
+
41
+ if (@number != '' || @subunit) && @@unit_symbols.include?(char)
42
+ @word << @number
43
+ @word << char if !match
44
+ @number = ''
45
+ @unit = true
46
+ match = true
47
+ end
48
+
49
+ if !match
50
+ word = (@word != '') ? @word : @number
51
+ @word = ''
52
+ @number = ''
53
+ match = false
54
+ @unit = false
55
+ @subunit = false
56
+ end
57
+
58
+ [match, word]
59
+ end
58
60
  end
59
61
  end
@@ -1,7 +1,9 @@
1
- class Conjunction
2
- @@conjunctions = %W(给 的 说 对 在 和 是 被 最 所 那 由 这 有 将 你 会 与 他 为 不 没 很 了 啊 哦 呵 把 去 从)
1
+ module RsegFilter
2
+ class Conjunction
3
+ @@conjunctions = %W(给 的 说 对 在 和 是 被 最 所 那 由 这 有 将 你 会 与 他 为 不 没 很 了 啊 哦 呵 把 去 从)
3
4
 
4
- def self.filter(char)
5
- @@conjunctions.include?(char) ? :conjunction : char
5
+ def self.filter(char)
6
+ @@conjunctions.include?(char) ? :conjunction : char
7
+ end
6
8
  end
7
9
  end
@@ -1,17 +1,19 @@
1
- class Fullwidth
2
- @@fullwidth_chars = {'1' => '1', '2' => '2', '3' => '3', '4' => '4', '5' => '5', '6' => '6', '7' => '7', '8' => '8',
3
- '' => '9', '' => '0', '' => 'a', '' => 'b', '' => 'c', '' => 'd', '' => 'e', '' => 'f',
4
- '' => 'g', '' => 'h', '' => 'i', '' => 'j', '' => 'k', '' => 'l', '' => 'm', '' => 'n',
5
- '' => 'o', '' => 'p', '' => 'q', '' => 'r', '' => 's', '' => 't', '' => 'u', '' => 'v',
6
- '' => 'w', '' => 'x', '' => 'y', '' => 'z', '' => 'A', '' => 'B', '' => 'C', '' => 'D',
7
- '' => 'E', '' => 'F', '' => 'G', '' => 'H', '' => 'I', '' => 'J', '' => 'K', '' => 'L',
8
- '' => 'M', '' => 'N', '' => 'O', '' => 'P', '' => 'Q', '' => 'R', '' => 'S', '' => 'T',
9
- '' => 'U', '' => 'V', '' => 'W', '' => 'X', '' => 'Y', '' => 'Z', '' => '-', '' => '+',
10
- '' => '-', '' => ',', '' => '/', '·' => '.'}
1
+ module RsegFilter
2
+ class Fullwidth
3
+ @@fullwidth_chars = {'' => '1', '' => '2', '' => '3', '' => '4', '' => '5', '' => '6', '' => '7', '' => '8',
4
+ '' => '9', '' => '0', '' => 'a', '' => 'b', '' => 'c', '' => 'd', '' => 'e', '' => 'f',
5
+ '' => 'g', '' => 'h', '' => 'i', '' => 'j', '' => 'k', '' => 'l', '' => 'm', '' => 'n',
6
+ '' => 'o', '' => 'p', '' => 'q', '' => 'r', '' => 's', '' => 't', '' => 'u', '' => 'v',
7
+ '' => 'w', '' => 'x', '' => 'y', '' => 'z', '' => 'A', '' => 'B', '' => 'C', '' => 'D',
8
+ '' => 'E', '' => 'F', '' => 'G', '' => 'H', '' => 'I', '' => 'J', '' => 'K', '' => 'L',
9
+ '' => 'M', '' => 'N', '' => 'O', '' => 'P', '' => 'Q', '' => 'R', '' => 'S', '' => 'T',
10
+ '' => 'U', '' => 'V', 'W' => 'W', '' => 'X', '' => 'Y', 'Z' => 'Z', '-' => '-', '+' => '+',
11
+ '—' => '-', ',' => ',', '/' => '/', '·' => '.'}
11
12
 
12
- class << self
13
- def filter(char)
14
- @@fullwidth_chars[char].nil? ? char : @@fullwidth_chars[char]
13
+ class << self
14
+ def filter(char)
15
+ @@fullwidth_chars[char].nil? ? char : @@fullwidth_chars[char]
16
+ end
15
17
  end
16
18
  end
17
19
  end
@@ -1,12 +1,14 @@
1
- class Symbol
2
- @@separators = ['`', '[', ']', '、', '=', '‘', ';', '。', '|', '?', '》',
3
- '', '', '', '', '', '', '', '', '', '', '',
4
- '', '', '', '', '', '', '', '', '', '', '', '〗',
5
- '', '', '', '', '`', '~', '!', '@', '#', '^',
6
- '&', '*', '\\', '(', ')', '=', '{', '}', '[', ']',
7
- '|', ';', ':', "'", '<', '>', '?', "\n", "\t", "\r",
8
- ' ', '-', '/', '+', ',', ' ']
9
- def self.filter(char)
10
- @@separators.include?(char) ? :symbol : char
1
+ module RsegFilter
2
+ class Symbol
3
+ @@separators = ['', '', '', '', '', '', '', '', '', '', '',
4
+ '', '', '', '', '', '', '', '', '', '', '',
5
+ '', '', '', '', '', '', '', '', '', '', '〖', '〗',
6
+ '', '', '', '', '`', '~', '!', '@', '#', '^',
7
+ '&', '*', '\\', '(', ')', '=', '{', '}', '[', ']',
8
+ '|', ';', ':', "'", '<', '>', '?', "\n", "\t", "\r",
9
+ ' ', '-', '/', '+', ',', ' ']
10
+ def self.filter(char)
11
+ @@separators.include?(char) ? :symbol : char
12
+ end
11
13
  end
12
14
  end
@@ -15,8 +15,14 @@ require File.join(File.dirname(__FILE__), 'filters/conjunction')
15
15
 
16
16
  class Rseg
17
17
  include Singleton
18
+ include RsegEngine
19
+ include RsegFilter
18
20
 
19
21
  class << self
22
+ def dict_path=(path)
23
+ RsegEngine::Dict.dict_path = path
24
+ end
25
+
20
26
  def segment(input)
21
27
  Rseg.instance.input = input
22
28
  Rseg.instance.segment
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rseg
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 5
10
- version: 0.1.5
9
+ - 6
10
+ version: 0.1.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - Yuanyi Zhang