bimyou_segmenter 1.0.1 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Changelog +5 -0
- data/README.md +8 -0
- data/bimyou_segmenter.gemspec +2 -2
- data/bin/bimyou_segmenter +44 -6
- data/lib/bimyou_segmenter/version.rb +2 -1
- data/lib/bimyou_segmenter.rb +1 -0
- metadata +52 -36
data/Changelog
ADDED
data/README.md
CHANGED
@@ -32,3 +32,11 @@ segmentは、分かち書きした結果を文字列の配列として返しま
|
|
32
32
|
:white_space => trueを付けると空白文字や改行コードも含みます。この場合は、返した文字列配列を順に結合すると元の文字列と一致します。
|
33
33
|
|
34
34
|
Ruby1.8.7の場合、$KCODE='u'にしないと動きません。
|
35
|
+
|
36
|
+
% bimyou_segmenter -d "|" -e ""
|
37
|
+
昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。それは「なぜナポリタンは赤いのだろうか」という問いである。
|
38
|
+
昼飯|の|スパゲティナポリタン|を|眺め|ながら|、|積年|の|疑問|を|考え|て|い|た|。|それ|は|「|なぜ|ナポリタン|は|赤い|の|だろ|う|か|」|という|問い|で|ある|。
|
39
|
+
|
40
|
+
% bimyou_segmenter --help
|
41
|
+
|
42
|
+
コマンドラインツールもあります。
|
data/bimyou_segmenter.gemspec
CHANGED
@@ -4,8 +4,8 @@ require File.expand_path('../lib/bimyou_segmenter/version', __FILE__)
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.authors = ["nagadomi"]
|
6
6
|
gem.email = ["nagadomi@nurs.or.jp"]
|
7
|
-
gem.description = %q{Japanese
|
8
|
-
gem.summary = %q{Japanese
|
7
|
+
gem.description = %q{Japanese Word Segmenter}
|
8
|
+
gem.summary = %q{Japanese Word Segmenter}
|
9
9
|
gem.homepage = "http://github.com/nagadomi/bimyou_segmenter"
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split($\)
|
data/bin/bimyou_segmenter
CHANGED
@@ -3,7 +3,16 @@
|
|
3
3
|
if (RUBY_VERSION < "1.9.0")
|
4
4
|
require 'rubygems'
|
5
5
|
$KCODE= 'u'
|
6
|
+
# jruby not work...
|
7
|
+
if (RUBY_PLATFORM.downcase =~ /mswin(?!ce)|mingw|cygwin|bccwin/)
|
8
|
+
nkf_option = '-s'
|
9
|
+
else
|
10
|
+
nkf_option = '-w'
|
11
|
+
end
|
12
|
+
else
|
13
|
+
nkf_option = nil
|
6
14
|
end
|
15
|
+
require 'nkf'
|
7
16
|
require 'bimyou_segmenter'
|
8
17
|
require 'optparse'
|
9
18
|
|
@@ -21,6 +30,27 @@ opt = OptionParser.new do |opt|
|
|
21
30
|
opt.on('-e STR', '--eos STR', 'EOS string') do |v|
|
22
31
|
eos = v
|
23
32
|
end
|
33
|
+
opt.on('-c ENCODING', '--output-encoding ENCODING', 'output encoding (euc-jp, sjis, utf-8)') do |v|
|
34
|
+
encoding = nil
|
35
|
+
option = nil
|
36
|
+
case v.downcase
|
37
|
+
when /(sjis)|(shift_jis)/
|
38
|
+
encoding = 'Shift_JIS'
|
39
|
+
option = '-s'
|
40
|
+
when /utf\-?8/
|
41
|
+
encoding = 'UTF-8'
|
42
|
+
option = '-w'
|
43
|
+
when /euc/
|
44
|
+
encoding = 'EUC-JP'
|
45
|
+
option = '-e'
|
46
|
+
end
|
47
|
+
if (encoding && $stdout.respond_to?(:set_encoding))
|
48
|
+
$stdout.set_encoding(encoding, 'UTF-8')
|
49
|
+
nkf_option = nil
|
50
|
+
else
|
51
|
+
nkf_option = option
|
52
|
+
end
|
53
|
+
end
|
24
54
|
end
|
25
55
|
opt.version = BimyouSegmenter::VERSION
|
26
56
|
opt.banner = "Usage: bimyou_segmenter [options] [files]"
|
@@ -31,10 +61,14 @@ if (ARGV.size == 0)
|
|
31
61
|
until $stdin.eof?
|
32
62
|
line = $stdin.readline
|
33
63
|
if (line)
|
34
|
-
line = line.chomp
|
64
|
+
line = NKF.nkf('-w', line).chomp
|
65
|
+
s = [BimyouSegmenter.segment(line, :white_space => white_space), eos
|
66
|
+
].flatten(1).reject{|v| v.size == 0}.join(delimiter)
|
67
|
+
if (nkf_option)
|
68
|
+
s = NKF.nkf(nkf_option, s)
|
69
|
+
end
|
35
70
|
begin
|
36
|
-
puts
|
37
|
-
].flatten(1).reject{|v| v.size == 0}.join(delimiter)
|
71
|
+
puts s
|
38
72
|
rescue Errno::EPIPE
|
39
73
|
exit 0
|
40
74
|
end
|
@@ -44,9 +78,13 @@ else
|
|
44
78
|
ARGV.each do |file|
|
45
79
|
File.open(file) do |f|
|
46
80
|
until f.eof?
|
47
|
-
line = f.readline
|
48
|
-
|
49
|
-
|
81
|
+
line = NKF.nkf('-w', f.readline)
|
82
|
+
s = [BimyouSegmenter.segment(line, :white_space => white_space), eos
|
83
|
+
].flatten(1).reject{|v| v.size == 0}.join(delimiter)
|
84
|
+
if (nkf_option)
|
85
|
+
s = NKF.nkf(nkf_option, s)
|
86
|
+
end
|
87
|
+
puts s
|
50
88
|
end
|
51
89
|
end
|
52
90
|
end
|
data/lib/bimyou_segmenter.rb
CHANGED
metadata
CHANGED
@@ -1,36 +1,46 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bimyou_segmenter
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 1
|
8
|
+
- 1
|
9
|
+
version: 1.1.1
|
6
10
|
platform: ruby
|
7
|
-
authors:
|
11
|
+
authors:
|
8
12
|
- nagadomi
|
9
13
|
autorequire:
|
10
14
|
bindir: bin
|
11
15
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
16
|
+
|
17
|
+
date: 2012-05-28 00:00:00 +09:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
15
21
|
name: test-unit
|
16
|
-
requirement: &18169520 !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ! '>='
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: '0'
|
22
|
-
type: :development
|
23
22
|
prerelease: false
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
version: "0"
|
30
|
+
type: :development
|
31
|
+
version_requirements: *id001
|
32
|
+
description: Japanese Word Segmenter
|
33
|
+
email:
|
27
34
|
- nagadomi@nurs.or.jp
|
28
|
-
executables:
|
35
|
+
executables:
|
29
36
|
- bimyou_segmenter
|
30
37
|
extensions: []
|
38
|
+
|
31
39
|
extra_rdoc_files: []
|
32
|
-
|
40
|
+
|
41
|
+
files:
|
33
42
|
- .gitignore
|
43
|
+
- Changelog
|
34
44
|
- Gemfile
|
35
45
|
- LICENSE
|
36
46
|
- README.md
|
@@ -42,30 +52,36 @@ files:
|
|
42
52
|
- lib/bimyou_segmenter/version.rb
|
43
53
|
- test/bimyou_segmenter_test.rb
|
44
54
|
- test/test_helper.rb
|
55
|
+
has_rdoc: true
|
45
56
|
homepage: http://github.com/nagadomi/bimyou_segmenter
|
46
57
|
licenses: []
|
58
|
+
|
47
59
|
post_install_message:
|
48
60
|
rdoc_options: []
|
49
|
-
|
61
|
+
|
62
|
+
require_paths:
|
50
63
|
- lib
|
51
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
requirements:
|
60
|
-
- -
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
version: "0"
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
segments:
|
76
|
+
- 0
|
77
|
+
version: "0"
|
63
78
|
requirements: []
|
79
|
+
|
64
80
|
rubyforge_project:
|
65
|
-
rubygems_version: 1.
|
81
|
+
rubygems_version: 1.3.6
|
66
82
|
signing_key:
|
67
83
|
specification_version: 3
|
68
|
-
summary: Japanese
|
69
|
-
test_files:
|
84
|
+
summary: Japanese Word Segmenter
|
85
|
+
test_files:
|
70
86
|
- test/bimyou_segmenter_test.rb
|
71
87
|
- test/test_helper.rb
|