bimyou_segmenter 1.0.1 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Changelog +5 -0
- data/README.md +8 -0
- data/bimyou_segmenter.gemspec +2 -2
- data/bin/bimyou_segmenter +44 -6
- data/lib/bimyou_segmenter/version.rb +2 -1
- data/lib/bimyou_segmenter.rb +1 -0
- metadata +52 -36
data/Changelog
ADDED
data/README.md
CHANGED
@@ -32,3 +32,11 @@ segmentは、分かち書きした結果を文字列の配列として返しま
|
|
32
32
|
:white_space => trueを付けると空白文字や改行コードも含みます。この場合は、返した文字列配列を順に結合すると元の文字列と一致します。
|
33
33
|
|
34
34
|
Ruby1.8.7の場合、$KCODE='u'にしないと動きません。
|
35
|
+
|
36
|
+
% bimyou_segmenter -d "|" -e ""
|
37
|
+
昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。それは「なぜナポリタンは赤いのだろうか」という問いである。
|
38
|
+
昼飯|の|スパゲティナポリタン|を|眺め|ながら|、|積年|の|疑問|を|考え|て|い|た|。|それ|は|「|なぜ|ナポリタン|は|赤い|の|だろ|う|か|」|という|問い|で|ある|。
|
39
|
+
|
40
|
+
% bimyou_segmenter --help
|
41
|
+
|
42
|
+
コマンドラインツールもあります。
|
data/bimyou_segmenter.gemspec
CHANGED
@@ -4,8 +4,8 @@ require File.expand_path('../lib/bimyou_segmenter/version', __FILE__)
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.authors = ["nagadomi"]
|
6
6
|
gem.email = ["nagadomi@nurs.or.jp"]
|
7
|
-
gem.description = %q{Japanese
|
8
|
-
gem.summary = %q{Japanese
|
7
|
+
gem.description = %q{Japanese Word Segmenter}
|
8
|
+
gem.summary = %q{Japanese Word Segmenter}
|
9
9
|
gem.homepage = "http://github.com/nagadomi/bimyou_segmenter"
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split($\)
|
data/bin/bimyou_segmenter
CHANGED
@@ -3,7 +3,16 @@
|
|
3
3
|
if (RUBY_VERSION < "1.9.0")
|
4
4
|
require 'rubygems'
|
5
5
|
$KCODE= 'u'
|
6
|
+
# jruby not work...
|
7
|
+
if (RUBY_PLATFORM.downcase =~ /mswin(?!ce)|mingw|cygwin|bccwin/)
|
8
|
+
nkf_option = '-s'
|
9
|
+
else
|
10
|
+
nkf_option = '-w'
|
11
|
+
end
|
12
|
+
else
|
13
|
+
nkf_option = nil
|
6
14
|
end
|
15
|
+
require 'nkf'
|
7
16
|
require 'bimyou_segmenter'
|
8
17
|
require 'optparse'
|
9
18
|
|
@@ -21,6 +30,27 @@ opt = OptionParser.new do |opt|
|
|
21
30
|
opt.on('-e STR', '--eos STR', 'EOS string') do |v|
|
22
31
|
eos = v
|
23
32
|
end
|
33
|
+
opt.on('-c ENCODING', '--output-encoding ENCODING', 'output encoding (euc-jp, sjis, utf-8)') do |v|
|
34
|
+
encoding = nil
|
35
|
+
option = nil
|
36
|
+
case v.downcase
|
37
|
+
when /(sjis)|(shift_jis)/
|
38
|
+
encoding = 'Shift_JIS'
|
39
|
+
option = '-s'
|
40
|
+
when /utf\-?8/
|
41
|
+
encoding = 'UTF-8'
|
42
|
+
option = '-w'
|
43
|
+
when /euc/
|
44
|
+
encoding = 'EUC-JP'
|
45
|
+
option = '-e'
|
46
|
+
end
|
47
|
+
if (encoding && $stdout.respond_to?(:set_encoding))
|
48
|
+
$stdout.set_encoding(encoding, 'UTF-8')
|
49
|
+
nkf_option = nil
|
50
|
+
else
|
51
|
+
nkf_option = option
|
52
|
+
end
|
53
|
+
end
|
24
54
|
end
|
25
55
|
opt.version = BimyouSegmenter::VERSION
|
26
56
|
opt.banner = "Usage: bimyou_segmenter [options] [files]"
|
@@ -31,10 +61,14 @@ if (ARGV.size == 0)
|
|
31
61
|
until $stdin.eof?
|
32
62
|
line = $stdin.readline
|
33
63
|
if (line)
|
34
|
-
line = line.chomp
|
64
|
+
line = NKF.nkf('-w', line).chomp
|
65
|
+
s = [BimyouSegmenter.segment(line, :white_space => white_space), eos
|
66
|
+
].flatten(1).reject{|v| v.size == 0}.join(delimiter)
|
67
|
+
if (nkf_option)
|
68
|
+
s = NKF.nkf(nkf_option, s)
|
69
|
+
end
|
35
70
|
begin
|
36
|
-
puts
|
37
|
-
].flatten(1).reject{|v| v.size == 0}.join(delimiter)
|
71
|
+
puts s
|
38
72
|
rescue Errno::EPIPE
|
39
73
|
exit 0
|
40
74
|
end
|
@@ -44,9 +78,13 @@ else
|
|
44
78
|
ARGV.each do |file|
|
45
79
|
File.open(file) do |f|
|
46
80
|
until f.eof?
|
47
|
-
line = f.readline
|
48
|
-
|
49
|
-
|
81
|
+
line = NKF.nkf('-w', f.readline)
|
82
|
+
s = [BimyouSegmenter.segment(line, :white_space => white_space), eos
|
83
|
+
].flatten(1).reject{|v| v.size == 0}.join(delimiter)
|
84
|
+
if (nkf_option)
|
85
|
+
s = NKF.nkf(nkf_option, s)
|
86
|
+
end
|
87
|
+
puts s
|
50
88
|
end
|
51
89
|
end
|
52
90
|
end
|
data/lib/bimyou_segmenter.rb
CHANGED
metadata
CHANGED
@@ -1,36 +1,46 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bimyou_segmenter
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 1
|
8
|
+
- 1
|
9
|
+
version: 1.1.1
|
6
10
|
platform: ruby
|
7
|
-
authors:
|
11
|
+
authors:
|
8
12
|
- nagadomi
|
9
13
|
autorequire:
|
10
14
|
bindir: bin
|
11
15
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
16
|
+
|
17
|
+
date: 2012-05-28 00:00:00 +09:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
15
21
|
name: test-unit
|
16
|
-
requirement: &18169520 !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ! '>='
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: '0'
|
22
|
-
type: :development
|
23
22
|
prerelease: false
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
version: "0"
|
30
|
+
type: :development
|
31
|
+
version_requirements: *id001
|
32
|
+
description: Japanese Word Segmenter
|
33
|
+
email:
|
27
34
|
- nagadomi@nurs.or.jp
|
28
|
-
executables:
|
35
|
+
executables:
|
29
36
|
- bimyou_segmenter
|
30
37
|
extensions: []
|
38
|
+
|
31
39
|
extra_rdoc_files: []
|
32
|
-
|
40
|
+
|
41
|
+
files:
|
33
42
|
- .gitignore
|
43
|
+
- Changelog
|
34
44
|
- Gemfile
|
35
45
|
- LICENSE
|
36
46
|
- README.md
|
@@ -42,30 +52,36 @@ files:
|
|
42
52
|
- lib/bimyou_segmenter/version.rb
|
43
53
|
- test/bimyou_segmenter_test.rb
|
44
54
|
- test/test_helper.rb
|
55
|
+
has_rdoc: true
|
45
56
|
homepage: http://github.com/nagadomi/bimyou_segmenter
|
46
57
|
licenses: []
|
58
|
+
|
47
59
|
post_install_message:
|
48
60
|
rdoc_options: []
|
49
|
-
|
61
|
+
|
62
|
+
require_paths:
|
50
63
|
- lib
|
51
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
requirements:
|
60
|
-
- -
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
version: "0"
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
segments:
|
76
|
+
- 0
|
77
|
+
version: "0"
|
63
78
|
requirements: []
|
79
|
+
|
64
80
|
rubyforge_project:
|
65
|
-
rubygems_version: 1.
|
81
|
+
rubygems_version: 1.3.6
|
66
82
|
signing_key:
|
67
83
|
specification_version: 3
|
68
|
-
summary: Japanese
|
69
|
-
test_files:
|
84
|
+
summary: Japanese Word Segmenter
|
85
|
+
test_files:
|
70
86
|
- test/bimyou_segmenter_test.rb
|
71
87
|
- test/test_helper.rb
|