bimyou_segmenter 1.0.1 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/Changelog ADDED
@@ -0,0 +1,5 @@
1
+ 2012-05-28 nagadomi <nagadomi@nurs.or.jp>
2
+ * bin/bimyou_segmenter: 文字コード変換に対応 (1.1.0)
3
+
4
+ 2012-05-27 nagadomi <nagadomi@nurs.or.jp>
5
+ * release
data/README.md CHANGED
@@ -32,3 +32,11 @@ segmentは、分かち書きした結果を文字列の配列として返しま
32
32
  :white_space => trueを付けると空白文字や改行コードも含みます。この場合は、返した文字列配列を順に結合すると元の文字列と一致します。
33
33
 
34
34
  Ruby1.8.7の場合、$KCODE='u'にしないと動きません。
35
+
36
+ % bimyou_segmenter -d "|" -e ""
37
+ 昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。それは「なぜナポリタンは赤いのだろうか」という問いである。
38
+ 昼飯|の|スパゲティナポリタン|を|眺め|ながら|、|積年|の|疑問|を|考え|て|い|た|。|それ|は|「|なぜ|ナポリタン|は|赤い|の|だろ|う|か|」|という|問い|で|ある|。
39
+
40
+ % bimyou_segmenter --help
41
+
42
+ コマンドラインツールもあります。
@@ -4,8 +4,8 @@ require File.expand_path('../lib/bimyou_segmenter/version', __FILE__)
4
4
  Gem::Specification.new do |gem|
5
5
  gem.authors = ["nagadomi"]
6
6
  gem.email = ["nagadomi@nurs.or.jp"]
7
- gem.description = %q{Japanese word segmenter.}
8
- gem.summary = %q{Japanese word segmenter.}
7
+ gem.description = %q{Japanese Word Segmenter}
8
+ gem.summary = %q{Japanese Word Segmenter}
9
9
  gem.homepage = "http://github.com/nagadomi/bimyou_segmenter"
10
10
 
11
11
  gem.files = `git ls-files`.split($\)
data/bin/bimyou_segmenter CHANGED
@@ -3,7 +3,16 @@
3
3
  if (RUBY_VERSION < "1.9.0")
4
4
  require 'rubygems'
5
5
  $KCODE= 'u'
6
+ # jruby not work...
7
+ if (RUBY_PLATFORM.downcase =~ /mswin(?!ce)|mingw|cygwin|bccwin/)
8
+ nkf_option = '-s'
9
+ else
10
+ nkf_option = '-w'
11
+ end
12
+ else
13
+ nkf_option = nil
6
14
  end
15
+ require 'nkf'
7
16
  require 'bimyou_segmenter'
8
17
  require 'optparse'
9
18
 
@@ -21,6 +30,27 @@ opt = OptionParser.new do |opt|
21
30
  opt.on('-e STR', '--eos STR', 'EOS string') do |v|
22
31
  eos = v
23
32
  end
33
+ opt.on('-c ENCODING', '--output-encoding ENCODING', 'output encoding (euc-jp, sjis, utf-8)') do |v|
34
+ encoding = nil
35
+ option = nil
36
+ case v.downcase
37
+ when /(sjis)|(shift_jis)/
38
+ encoding = 'Shift_JIS'
39
+ option = '-s'
40
+ when /utf\-?8/
41
+ encoding = 'UTF-8'
42
+ option = '-w'
43
+ when /euc/
44
+ encoding = 'EUC-JP'
45
+ option = '-e'
46
+ end
47
+ if (encoding && $stdout.respond_to?(:set_encoding))
48
+ $stdout.set_encoding(encoding, 'UTF-8')
49
+ nkf_option = nil
50
+ else
51
+ nkf_option = option
52
+ end
53
+ end
24
54
  end
25
55
  opt.version = BimyouSegmenter::VERSION
26
56
  opt.banner = "Usage: bimyou_segmenter [options] [files]"
@@ -31,10 +61,14 @@ if (ARGV.size == 0)
31
61
  until $stdin.eof?
32
62
  line = $stdin.readline
33
63
  if (line)
34
- line = line.chomp
64
+ line = NKF.nkf('-w', line).chomp
65
+ s = [BimyouSegmenter.segment(line, :white_space => white_space), eos
66
+ ].flatten(1).reject{|v| v.size == 0}.join(delimiter)
67
+ if (nkf_option)
68
+ s = NKF.nkf(nkf_option, s)
69
+ end
35
70
  begin
36
- puts [BimyouSegmenter.segment(line, :white_space => white_space), eos
37
- ].flatten(1).reject{|v| v.size == 0}.join(delimiter)
71
+ puts s
38
72
  rescue Errno::EPIPE
39
73
  exit 0
40
74
  end
@@ -44,9 +78,13 @@ else
44
78
  ARGV.each do |file|
45
79
  File.open(file) do |f|
46
80
  until f.eof?
47
- line = f.readline
48
- puts [BimyouSegmenter.segment(line, :white_space => white_space), eos
49
- ].flatten(1).reject{|v| v.size == 0}.join(delimiter)
81
+ line = NKF.nkf('-w', f.readline)
82
+ s = [BimyouSegmenter.segment(line, :white_space => white_space), eos
83
+ ].flatten(1).reject{|v| v.size == 0}.join(delimiter)
84
+ if (nkf_option)
85
+ s = NKF.nkf(nkf_option, s)
86
+ end
87
+ puts s
50
88
  end
51
89
  end
52
90
  end
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  module BimyouSegmenter
2
- VERSION = "1.0.1"
3
+ VERSION = "1.1.1"
3
4
  end
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  require 'bimyou_segmenter/version'
2
3
  require 'bimyou_segmenter/aozora_model'
3
4
 
metadata CHANGED
@@ -1,36 +1,46 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bimyou_segmenter
3
- version: !ruby/object:Gem::Version
4
- version: 1.0.1
5
- prerelease:
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 1
8
+ - 1
9
+ version: 1.1.1
6
10
  platform: ruby
7
- authors:
11
+ authors:
8
12
  - nagadomi
9
13
  autorequire:
10
14
  bindir: bin
11
15
  cert_chain: []
12
- date: 2012-05-27 00:00:00.000000000Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
16
+
17
+ date: 2012-05-28 00:00:00 +09:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
15
21
  name: test-unit
16
- requirement: &18169520 !ruby/object:Gem::Requirement
17
- none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
21
- version: '0'
22
- type: :development
23
22
  prerelease: false
24
- version_requirements: *18169520
25
- description: Japanese word segmenter.
26
- email:
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ version: "0"
30
+ type: :development
31
+ version_requirements: *id001
32
+ description: Japanese Word Segmenter
33
+ email:
27
34
  - nagadomi@nurs.or.jp
28
- executables:
35
+ executables:
29
36
  - bimyou_segmenter
30
37
  extensions: []
38
+
31
39
  extra_rdoc_files: []
32
- files:
40
+
41
+ files:
33
42
  - .gitignore
43
+ - Changelog
34
44
  - Gemfile
35
45
  - LICENSE
36
46
  - README.md
@@ -42,30 +52,36 @@ files:
42
52
  - lib/bimyou_segmenter/version.rb
43
53
  - test/bimyou_segmenter_test.rb
44
54
  - test/test_helper.rb
55
+ has_rdoc: true
45
56
  homepage: http://github.com/nagadomi/bimyou_segmenter
46
57
  licenses: []
58
+
47
59
  post_install_message:
48
60
  rdoc_options: []
49
- require_paths:
61
+
62
+ require_paths:
50
63
  - lib
51
- required_ruby_version: !ruby/object:Gem::Requirement
52
- none: false
53
- requirements:
54
- - - ! '>='
55
- - !ruby/object:Gem::Version
56
- version: '0'
57
- required_rubygems_version: !ruby/object:Gem::Requirement
58
- none: false
59
- requirements:
60
- - - ! '>='
61
- - !ruby/object:Gem::Version
62
- version: '0'
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ segments:
69
+ - 0
70
+ version: "0"
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ segments:
76
+ - 0
77
+ version: "0"
63
78
  requirements: []
79
+
64
80
  rubyforge_project:
65
- rubygems_version: 1.8.10
81
+ rubygems_version: 1.3.6
66
82
  signing_key:
67
83
  specification_version: 3
68
- summary: Japanese word segmenter.
69
- test_files:
84
+ summary: Japanese Word Segmenter
85
+ test_files:
70
86
  - test/bimyou_segmenter_test.rb
71
87
  - test/test_helper.rb