bimyou_segmenter 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/bimyou_segmenter CHANGED
@@ -68,7 +68,9 @@ if (ARGV.size == 0)
68
68
  s = NKF.nkf(nkf_option, s)
69
69
  end
70
70
  begin
71
- puts s
71
+ unless (s.empty?)
72
+ puts s
73
+ end
72
74
  rescue Errno::EPIPE
73
75
  exit 0
74
76
  end
@@ -84,7 +86,9 @@ else
84
86
  if (nkf_option)
85
87
  s = NKF.nkf(nkf_option, s)
86
88
  end
87
- puts s
89
+ unless (s.empty?)
90
+ puts s
91
+ end
88
92
  end
89
93
  end
90
94
  end
@@ -16,6 +16,7 @@ module BimyouSegmenter
16
16
  end
17
17
 
18
18
  white_space = options[:white_space].nil? ? false : options[:white_space]
19
+ symbol = options[:symbol].nil? ? true : options[:symbol]
19
20
 
20
21
  wakachi = []
21
22
  text_chars = text.chars.to_a
@@ -155,17 +156,23 @@ module BimyouSegmenter
155
156
  end
156
157
  end
157
158
  wakachi << word
158
- if (white_space)
159
- wakachi
160
- else
161
- wakachi.reject{|v| v.match(WHITE_SPACE) }
159
+ unless (white_space)
160
+ wakachi = wakachi.reject{|v| v.match(WHITE_SPACE) }
162
161
  end
162
+ unless (symbol)
163
+ wakachi = wakachi.reject{|v| v.match(SYMBOL) }
164
+ end
165
+ wakachi
163
166
  end
164
167
 
165
168
  private
166
169
  KANJI = Regexp.new('[々〇ヵヶ' + [0x3400].pack('U') + '-' + [0x9FFF].pack('U') +
167
170
  [0xF900].pack('U') + '-' + [0xFAFF].pack('U') +
168
171
  [0x20000].pack('U') + '-' + [0x2FFFF].pack('U') + ']')
172
+ SYMBOL = Regexp.new('^[^々〇' + [0x3400].pack('U') + '-' + [0x9FFF].pack('U') +
173
+ [0xF900].pack('U') + '-' + [0xFAFF].pack('U') +
174
+ [0x20000].pack('U') + '-' + [0x2FFFF].pack('U') +
175
+ '\s ぁ-ゞァ-ヾa-zA-Za-zA-Z0-90-9]+$')
169
176
  WHITE_SPACE = /[\s ]/
170
177
 
171
178
  def self.char_types(chars)
@@ -1,4 +1,4 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  module BimyouSegmenter
3
- VERSION = "1.1.1"
3
+ VERSION = "1.2.0"
4
4
  end
@@ -33,6 +33,18 @@ class BimyouSegmenterTest < Test::Unit::TestCase
33
33
  assert_equal tokens[2], "\r"
34
34
  assert_equal tokens[3], "\n"
35
35
  assert_equal tokens[4], " "
36
+
37
+ tokens = BimyouSegmenter.segment("「私はトマトです。」", :symbol => false)
38
+ assert_equal tokens.size, 4
39
+ assert_equal tokens[0], "私"
40
+ assert_equal tokens[1], "は"
41
+ assert_equal tokens[2], "トマト"
42
+ assert_equal tokens[3], "です"
43
+
44
+ tokens = BimyouSegmenter.segment("hello. world? = !", :symbol => false)
45
+ assert_equal tokens.size, 2
46
+ assert_equal tokens[0], "hello"
47
+ assert_equal tokens[1], "world"
36
48
 
37
49
  assert_equal BimyouSegmenter.segment("").size, 0
38
50
  assert_equal BimyouSegmenter.segment(nil).size, 0
metadata CHANGED
@@ -1,44 +1,35 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bimyou_segmenter
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 1
7
- - 1
8
- - 1
9
- version: 1.1.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.2.0
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - nagadomi
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2012-05-28 00:00:00 +09:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-06-01 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: test-unit
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- segments:
28
- - 0
29
- version: "0"
16
+ requirement: &5715420 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
30
22
  type: :development
31
- version_requirements: *id001
23
+ prerelease: false
24
+ version_requirements: *5715420
32
25
  description: Japanese Word Segmenter
33
- email:
26
+ email:
34
27
  - nagadomi@nurs.or.jp
35
- executables:
28
+ executables:
36
29
  - bimyou_segmenter
37
30
  extensions: []
38
-
39
31
  extra_rdoc_files: []
40
-
41
- files:
32
+ files:
42
33
  - .gitignore
43
34
  - Changelog
44
35
  - Gemfile
@@ -52,36 +43,30 @@ files:
52
43
  - lib/bimyou_segmenter/version.rb
53
44
  - test/bimyou_segmenter_test.rb
54
45
  - test/test_helper.rb
55
- has_rdoc: true
56
46
  homepage: http://github.com/nagadomi/bimyou_segmenter
57
47
  licenses: []
58
-
59
48
  post_install_message:
60
49
  rdoc_options: []
61
-
62
- require_paths:
50
+ require_paths:
63
51
  - lib
64
- required_ruby_version: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- segments:
69
- - 0
70
- version: "0"
71
- required_rubygems_version: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- segments:
76
- - 0
77
- version: "0"
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
78
64
  requirements: []
79
-
80
65
  rubyforge_project:
81
- rubygems_version: 1.3.6
66
+ rubygems_version: 1.8.10
82
67
  signing_key:
83
68
  specification_version: 3
84
69
  summary: Japanese Word Segmenter
85
- test_files:
70
+ test_files:
86
71
  - test/bimyou_segmenter_test.rb
87
72
  - test/test_helper.rb