bimyou_segmenter 1.0.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Changelog ADDED
@@ -0,0 +1,5 @@
1
+ 2012-05-28 nagadomi <nagadomi@nurs.or.jp>
2
+ * bin/bimyou_segmenter: 文字コード変換に対応 (1.1.0)
3
+
4
+ 2012-05-27 nagadomi <nagadomi@nurs.or.jp>
5
+ * release
data/README.md CHANGED
@@ -32,3 +32,11 @@ segmentは、分かち書きした結果を文字列の配列として返しま
32
32
  :white_space => trueを付けると空白文字や改行コードも含みます。この場合は、返した文字列配列を順に結合すると元の文字列と一致します。
33
33
 
34
34
  Ruby1.8.7の場合、$KCODE='u'にしないと動きません。
35
+
36
+ % bimyou_segmenter -d "|" -e ""
37
+ 昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。それは「なぜナポリタンは赤いのだろうか」という問いである。
38
+ 昼飯|の|スパゲティナポリタン|を|眺め|ながら|、|積年|の|疑問|を|考え|て|い|た|。|それ|は|「|なぜ|ナポリタン|は|赤い|の|だろ|う|か|」|という|問い|で|ある|。
39
+
40
+ % bimyou_segmenter --help
41
+
42
+ コマンドラインツールもあります。
@@ -4,8 +4,8 @@ require File.expand_path('../lib/bimyou_segmenter/version', __FILE__)
4
4
  Gem::Specification.new do |gem|
5
5
  gem.authors = ["nagadomi"]
6
6
  gem.email = ["nagadomi@nurs.or.jp"]
7
- gem.description = %q{Japanese word segmenter.}
8
- gem.summary = %q{Japanese word segmenter.}
7
+ gem.description = %q{Japanese Word Segmenter}
8
+ gem.summary = %q{Japanese Word Segmenter}
9
9
  gem.homepage = "http://github.com/nagadomi/bimyou_segmenter"
10
10
 
11
11
  gem.files = `git ls-files`.split($\)
data/bin/bimyou_segmenter CHANGED
@@ -3,7 +3,16 @@
3
3
  if (RUBY_VERSION < "1.9.0")
4
4
  require 'rubygems'
5
5
  $KCODE= 'u'
6
+ # jruby not work...
7
+ if (RUBY_PLATFORM.downcase =~ /mswin(?!ce)|mingw|cygwin|bccwin/)
8
+ nkf_option = '-s'
9
+ else
10
+ nkf_option = '-w'
11
+ end
12
+ else
13
+ nkf_option = nil
6
14
  end
15
+ require 'nkf'
7
16
  require 'bimyou_segmenter'
8
17
  require 'optparse'
9
18
 
@@ -21,6 +30,27 @@ opt = OptionParser.new do |opt|
21
30
  opt.on('-e STR', '--eos STR', 'EOS string') do |v|
22
31
  eos = v
23
32
  end
33
+ opt.on('-c ENCODING', '--output-encoding ENCODING', 'output encoding (euc-jp, sjis, utf-8)') do |v|
34
+ encoding = nil
35
+ option = nil
36
+ case v.downcase
37
+ when /(sjis)|(shift_jis)/
38
+ encoding = 'Shift_JIS'
39
+ option = '-s'
40
+ when /utf\-?8/
41
+ encoding = 'UTF-8'
42
+ option = '-w'
43
+ when /euc/
44
+ encoding = 'EUC-JP'
45
+ option = '-e'
46
+ end
47
+ if (encoding && $stdout.respond_to?(:set_encoding))
48
+ $stdout.set_encoding(encoding, 'UTF-8')
49
+ nkf_option = nil
50
+ else
51
+ nkf_option = option
52
+ end
53
+ end
24
54
  end
25
55
  opt.version = BimyouSegmenter::VERSION
26
56
  opt.banner = "Usage: bimyou_segmenter [options] [files]"
@@ -31,10 +61,14 @@ if (ARGV.size == 0)
31
61
  until $stdin.eof?
32
62
  line = $stdin.readline
33
63
  if (line)
34
- line = line.chomp
64
+ line = NKF.nkf('-w', line).chomp
65
+ s = [BimyouSegmenter.segment(line, :white_space => white_space), eos
66
+ ].flatten(1).reject{|v| v.size == 0}.join(delimiter)
67
+ if (nkf_option)
68
+ s = NKF.nkf(nkf_option, s)
69
+ end
35
70
  begin
36
- puts [BimyouSegmenter.segment(line, :white_space => white_space), eos
37
- ].flatten(1).reject{|v| v.size == 0}.join(delimiter)
71
+ puts s
38
72
  rescue Errno::EPIPE
39
73
  exit 0
40
74
  end
@@ -44,9 +78,13 @@ else
44
78
  ARGV.each do |file|
45
79
  File.open(file) do |f|
46
80
  until f.eof?
47
- line = f.readline
48
- puts [BimyouSegmenter.segment(line, :white_space => white_space), eos
49
- ].flatten(1).reject{|v| v.size == 0}.join(delimiter)
81
+ line = NKF.nkf('-w', f.readline)
82
+ s = [BimyouSegmenter.segment(line, :white_space => white_space), eos
83
+ ].flatten(1).reject{|v| v.size == 0}.join(delimiter)
84
+ if (nkf_option)
85
+ s = NKF.nkf(nkf_option, s)
86
+ end
87
+ puts s
50
88
  end
51
89
  end
52
90
  end
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  module BimyouSegmenter
2
- VERSION = "1.0.1"
3
+ VERSION = "1.1.1"
3
4
  end
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  require 'bimyou_segmenter/version'
2
3
  require 'bimyou_segmenter/aozora_model'
3
4
 
metadata CHANGED
@@ -1,36 +1,46 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bimyou_segmenter
3
- version: !ruby/object:Gem::Version
4
- version: 1.0.1
5
- prerelease:
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 1
8
+ - 1
9
+ version: 1.1.1
6
10
  platform: ruby
7
- authors:
11
+ authors:
8
12
  - nagadomi
9
13
  autorequire:
10
14
  bindir: bin
11
15
  cert_chain: []
12
- date: 2012-05-27 00:00:00.000000000Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
16
+
17
+ date: 2012-05-28 00:00:00 +09:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
15
21
  name: test-unit
16
- requirement: &18169520 !ruby/object:Gem::Requirement
17
- none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
21
- version: '0'
22
- type: :development
23
22
  prerelease: false
24
- version_requirements: *18169520
25
- description: Japanese word segmenter.
26
- email:
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ version: "0"
30
+ type: :development
31
+ version_requirements: *id001
32
+ description: Japanese Word Segmenter
33
+ email:
27
34
  - nagadomi@nurs.or.jp
28
- executables:
35
+ executables:
29
36
  - bimyou_segmenter
30
37
  extensions: []
38
+
31
39
  extra_rdoc_files: []
32
- files:
40
+
41
+ files:
33
42
  - .gitignore
43
+ - Changelog
34
44
  - Gemfile
35
45
  - LICENSE
36
46
  - README.md
@@ -42,30 +52,36 @@ files:
42
52
  - lib/bimyou_segmenter/version.rb
43
53
  - test/bimyou_segmenter_test.rb
44
54
  - test/test_helper.rb
55
+ has_rdoc: true
45
56
  homepage: http://github.com/nagadomi/bimyou_segmenter
46
57
  licenses: []
58
+
47
59
  post_install_message:
48
60
  rdoc_options: []
49
- require_paths:
61
+
62
+ require_paths:
50
63
  - lib
51
- required_ruby_version: !ruby/object:Gem::Requirement
52
- none: false
53
- requirements:
54
- - - ! '>='
55
- - !ruby/object:Gem::Version
56
- version: '0'
57
- required_rubygems_version: !ruby/object:Gem::Requirement
58
- none: false
59
- requirements:
60
- - - ! '>='
61
- - !ruby/object:Gem::Version
62
- version: '0'
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ segments:
69
+ - 0
70
+ version: "0"
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ segments:
76
+ - 0
77
+ version: "0"
63
78
  requirements: []
79
+
64
80
  rubyforge_project:
65
- rubygems_version: 1.8.10
81
+ rubygems_version: 1.3.6
66
82
  signing_key:
67
83
  specification_version: 3
68
- summary: Japanese word segmenter.
69
- test_files:
84
+ summary: Japanese Word Segmenter
85
+ test_files:
70
86
  - test/bimyou_segmenter_test.rb
71
87
  - test/test_helper.rb