bimyou_segmenter 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/bimyou_segmenter CHANGED
@@ -68,7 +68,9 @@ if (ARGV.size == 0)
68
68
  s = NKF.nkf(nkf_option, s)
69
69
  end
70
70
  begin
71
- puts s
71
+ unless (s.empty?)
72
+ puts s
73
+ end
72
74
  rescue Errno::EPIPE
73
75
  exit 0
74
76
  end
@@ -84,7 +86,9 @@ else
84
86
  if (nkf_option)
85
87
  s = NKF.nkf(nkf_option, s)
86
88
  end
87
- puts s
89
+ unless (s.empty?)
90
+ puts s
91
+ end
88
92
  end
89
93
  end
90
94
  end
@@ -16,6 +16,7 @@ module BimyouSegmenter
16
16
  end
17
17
 
18
18
  white_space = options[:white_space].nil? ? false : options[:white_space]
19
+ symbol = options[:symbol].nil? ? true : options[:symbol]
19
20
 
20
21
  wakachi = []
21
22
  text_chars = text.chars.to_a
@@ -155,17 +156,23 @@ module BimyouSegmenter
155
156
  end
156
157
  end
157
158
  wakachi << word
158
- if (white_space)
159
- wakachi
160
- else
161
- wakachi.reject{|v| v.match(WHITE_SPACE) }
159
+ unless (white_space)
160
+ wakachi = wakachi.reject{|v| v.match(WHITE_SPACE) }
162
161
  end
162
+ unless (symbol)
163
+ wakachi = wakachi.reject{|v| v.match(SYMBOL) }
164
+ end
165
+ wakachi
163
166
  end
164
167
 
165
168
  private
166
169
  KANJI = Regexp.new('[々〇ヵヶ' + [0x3400].pack('U') + '-' + [0x9FFF].pack('U') +
167
170
  [0xF900].pack('U') + '-' + [0xFAFF].pack('U') +
168
171
  [0x20000].pack('U') + '-' + [0x2FFFF].pack('U') + ']')
172
+ SYMBOL = Regexp.new('^[^々〇' + [0x3400].pack('U') + '-' + [0x9FFF].pack('U') +
173
+ [0xF900].pack('U') + '-' + [0xFAFF].pack('U') +
174
+ [0x20000].pack('U') + '-' + [0x2FFFF].pack('U') +
175
+ '\s ぁ-ゞァ-ヾa-zA-Za-zA-Z0-90-9]+$')
169
176
  WHITE_SPACE = /[\s ]/
170
177
 
171
178
  def self.char_types(chars)
@@ -1,4 +1,4 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  module BimyouSegmenter
3
- VERSION = "1.1.1"
3
+ VERSION = "1.2.0"
4
4
  end
@@ -33,6 +33,18 @@ class BimyouSegmenterTest < Test::Unit::TestCase
33
33
  assert_equal tokens[2], "\r"
34
34
  assert_equal tokens[3], "\n"
35
35
  assert_equal tokens[4], " "
36
+
37
+ tokens = BimyouSegmenter.segment("「私はトマトです。」", :symbol => false)
38
+ assert_equal tokens.size, 4
39
+ assert_equal tokens[0], "私"
40
+ assert_equal tokens[1], "は"
41
+ assert_equal tokens[2], "トマト"
42
+ assert_equal tokens[3], "です"
43
+
44
+ tokens = BimyouSegmenter.segment("hello. world? = !", :symbol => false)
45
+ assert_equal tokens.size, 2
46
+ assert_equal tokens[0], "hello"
47
+ assert_equal tokens[1], "world"
36
48
 
37
49
  assert_equal BimyouSegmenter.segment("").size, 0
38
50
  assert_equal BimyouSegmenter.segment(nil).size, 0
metadata CHANGED
@@ -1,44 +1,35 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bimyou_segmenter
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 1
7
- - 1
8
- - 1
9
- version: 1.1.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.2.0
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - nagadomi
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2012-05-28 00:00:00 +09:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-06-01 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: test-unit
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- segments:
28
- - 0
29
- version: "0"
16
+ requirement: &5715420 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
30
22
  type: :development
31
- version_requirements: *id001
23
+ prerelease: false
24
+ version_requirements: *5715420
32
25
  description: Japanese Word Segmenter
33
- email:
26
+ email:
34
27
  - nagadomi@nurs.or.jp
35
- executables:
28
+ executables:
36
29
  - bimyou_segmenter
37
30
  extensions: []
38
-
39
31
  extra_rdoc_files: []
40
-
41
- files:
32
+ files:
42
33
  - .gitignore
43
34
  - Changelog
44
35
  - Gemfile
@@ -52,36 +43,30 @@ files:
52
43
  - lib/bimyou_segmenter/version.rb
53
44
  - test/bimyou_segmenter_test.rb
54
45
  - test/test_helper.rb
55
- has_rdoc: true
56
46
  homepage: http://github.com/nagadomi/bimyou_segmenter
57
47
  licenses: []
58
-
59
48
  post_install_message:
60
49
  rdoc_options: []
61
-
62
- require_paths:
50
+ require_paths:
63
51
  - lib
64
- required_ruby_version: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- segments:
69
- - 0
70
- version: "0"
71
- required_rubygems_version: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- segments:
76
- - 0
77
- version: "0"
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
78
64
  requirements: []
79
-
80
65
  rubyforge_project:
81
- rubygems_version: 1.3.6
66
+ rubygems_version: 1.8.10
82
67
  signing_key:
83
68
  specification_version: 3
84
69
  summary: Japanese Word Segmenter
85
- test_files:
70
+ test_files:
86
71
  - test/bimyou_segmenter_test.rb
87
72
  - test/test_helper.rb