fluent-plugin-mecab 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 34b35b359184b25c580c59cf4c1a9c3cee2d2c54
4
+ data.tar.gz: 2c2a0485e441be79e87ab55f0943eff8e3eb5867
5
+ SHA512:
6
+ metadata.gz: 23f323c3d4673c50a8913ee6c2415a749544662ec6199982bead0ac44ab91438bf1dbf19944e714493fa373a17a8cc850d63b49880f62f3c34df505a56cf03d5
7
+ data.tar.gz: 10f3be2d08002527f847dc3718c2ad6d9e8411996ba66f4f9b92e2dc42b86b06eb0c483f4fa37a46cabcfd7cc7f9bd4b101cdc322854db5b2916f08d94466a8a
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fluent-plugin-mecab.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 MATSUMOTO Katsuyoshi
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,42 @@
1
+ # Fluent::Plugin::MeCab, a plugin for [Fluentd](http://fluentd.org)
2
+
3
+ fluentd plugin for [MeCab](http://mecab.googlecode.com).
4
+
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'fluent-plugin-mecab', git: 'git://github.com/katsyoshi/fluent-plugin-mecab.git'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install fluent-plugin-mecab
19
+
20
+ ## Usage
21
+
22
+ Configuration file:
23
+
24
+ <match japanese.string>
25
+ type mecab
26
+ parse_type default # default: default
27
+ keys text
28
+ tag mecab.parse # defaul: mecab
29
+ </match>
30
+
31
+ ## TODO
32
+
33
+ 1. impliments another mecab parse type, for example: wakati, chasen, and so on.
34
+ 1. gem
35
+
36
+ ## Contributing
37
+
38
+ 1. Fork it ( http://github.com/<my-github-username>/fluent-plugin-mecab/fork )
39
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
40
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
41
+ 4. Push to the branch (`git push origin my-new-feature`)
42
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fluent/plugin/mecab/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fluent-plugin-mecab"
8
+ spec.version = Fluent::Plugin::Mecab::VERSION
9
+ spec.authors = ["MATSUMOTO Katsuyoshi"]
10
+ spec.email = ["matsumoto.katsuyoshi+github@gmail.com"]
11
+ spec.summary = %q{fluentd plugin for MeCab}
12
+ spec.description = %q{fluentd plugin for MeCab}
13
+ spec.homepage = "http://github.com/katsyoshi/fluent-plugin-mecab"
14
+ spec.license = "Apache License, Version 2.0"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "fluentd"
22
+ spec.add_dependency "natto"
23
+ spec.add_development_dependency "bundler", "~> 1.5"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "pry"
26
+ spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "simplecov"
28
+ end
@@ -0,0 +1,46 @@
1
+ # coding: utf-8
2
+ require 'natto'
3
+
4
+ class MeCab
5
+ def initialize(type, keys)
6
+ @type = type
7
+ opt = {}
8
+ opt[:output_format_type] = @type unless type =~ /default/
9
+ @mecab = Natto::MeCab.new(opt)
10
+ @keys = keys
11
+ end
12
+
13
+ def parse(record)
14
+ @keys.map do |key|
15
+ {key => result_format(@mecab.parse(record[key]))}
16
+ end
17
+ end
18
+
19
+ def result_format(result)
20
+ case @type
21
+ when /default/i
22
+ strs = result.split(/\n/)
23
+ strs.pop
24
+ strs.map do|str|
25
+ m = default(str)
26
+ m ? Hash[m.names.zip(m.captures)] : nil
27
+ end.compact
28
+ when /chasen/i
29
+ # TODO
30
+ end
31
+ end
32
+
33
+ def default(str)
34
+ base = "(?<word>.+)\t(?<part_of_speech>.+),(?<part_of_speech_subclassification1>.+),(?<part_of_speech_subclassification2>.+),(?<part_of_speech_subclassification3>.+),(?<inflected_forms>.+),(?<utilizing_types>.+),(?<original_word>.*)"
35
+ # 表層形\t品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
36
+ if str =~ /^\w/
37
+ str.match(/#{base}/)
38
+ else
39
+ str.match(/#{base},(?<reading>.*),(?<pronunciation>.*)/)
40
+ end
41
+ end
42
+
43
+ def chansen(str)
44
+ # TODO
45
+ end
46
+ end
@@ -0,0 +1,7 @@
1
+ module Fluent
2
+ module Plugin
3
+ module Mecab
4
+ VERSION = "0.0.1"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,39 @@
1
+ # coding: utf-8
2
+ module Fluent
3
+ class MeCabOutput < Output
4
+ Plugin.register_output('mecab', self)
5
+ config_param :parse_type, :string, default: "default"
6
+ config_param :tag, :string, default: "mecab"
7
+ config_param :key, :string
8
+
9
+ def initialize
10
+ super
11
+ require 'fluent/plugin/mecab'
12
+ end
13
+
14
+ def configure(config)
15
+ super
16
+ @mecab = MeCab.new(@parse_type, @key.split(',').map{|string| string.strip})
17
+ end
18
+
19
+ def start
20
+ super
21
+ end
22
+
23
+ def shutdown
24
+ super
25
+ end
26
+
27
+ def emit(tag, es, chain)
28
+ es.each do |time, record|
29
+ parse(record).each do |mecab|
30
+ Engine.emit(@tag, time, mecab)
31
+ end
32
+ end
33
+ end
34
+
35
+ def parse(record)
36
+ @mecab.parse(record)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,67 @@
1
+ # coding: utf-8
2
+ require File.expand_path(__dir__ + '/spec_helper')
3
+
4
+ describe Fluent::MeCabOutput do
5
+ let(:fluentd) { Fluent::Test::TestDriver.new(described_class) }
6
+ let(:config){ %[
7
+ type mecab
8
+ key text
9
+ ]
10
+ }
11
+ let(:conf){fluentd.configure(config).instance}
12
+
13
+ describe '#configure' do
14
+ context 'success' do
15
+ it 'parse type' do
16
+ expect(conf.parse_type).to eq("default")
17
+ end
18
+
19
+ it 'key' do
20
+ expect(conf.key).to eq('text')
21
+ end
22
+ end
23
+
24
+ context 'fail' do
25
+ let(:config){
26
+ %[
27
+ type mecab
28
+ parse_type hoge
29
+ key hoge
30
+ ]
31
+ }
32
+
33
+ it 'cannot run this parse type' do
34
+ expect{conf}.to raise_error(Natto::MeCabError)
35
+ end
36
+ end
37
+ end
38
+
39
+ describe '#parse' do
40
+ let(:jp) {
41
+ {'text' => "こんにちは世界", 'not keys' => 'さようなら人生'}
42
+ }
43
+ let(:en) {
44
+ {'text' => 'hello, world!?@#$%^&*()','not key' => 'good night, world!!'}
45
+ }
46
+
47
+ let(:mecab){ fluentd.configure(config).instance }
48
+ context 'parse japanese' do
49
+ let(:result_jp){
50
+ [{"text" => [{"word"=>"こんにちは","part_of_speech"=>"感動詞","part_of_speech_subclassification1"=>"*","part_of_speech_subclassification2"=>"*","part_of_speech_subclassification3"=>"*","inflected_forms"=>"*","utilizing_types"=>"*","original_word"=>"こんにちは","reading"=>"コンニチハ","pronunciation"=>"コンニチワ"},{"word"=>"世界","part_of_speech"=>"名詞","part_of_speech_subclassification1"=>"一般","part_of_speech_subclassification2"=>"*","part_of_speech_subclassification3"=>"*","inflected_forms"=>"*","utilizing_types"=>"*","original_word"=>"世界","reading"=>"セカイ","pronunciation"=>"セカイ"}]}]
51
+ }
52
+
53
+ it 'single key' do
54
+ expect(mecab.parse(jp)).to eq(result_jp)
55
+ end
56
+ end
57
+
58
+ context 'parse english' do
59
+ let(:result_en){
60
+ [{"text"=>[{"word"=>"hello", "part_of_speech"=>"名詞", "part_of_speech_subclassification1"=>"固有名詞", "part_of_speech_subclassification2"=>"組織", "part_of_speech_subclassification3"=>"*", "inflected_forms"=>"*", "utilizing_types"=>"*", "original_word"=>"*"}, {"word"=>"world", "part_of_speech"=>"名詞", "part_of_speech_subclassification1"=>"一般", "part_of_speech_subclassification2"=>"*", "part_of_speech_subclassification3"=>"*", "inflected_forms"=>"*", "utilizing_types"=>"*", "original_word"=>"*"}]}]
61
+ }
62
+ it 'single key' do
63
+ expect(mecab.parse(en)).to eq(result_en)
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,4 @@
1
+ $:.unshift File.expand_path(File.join(__dir__, '..', 'lib'))
2
+ require 'fluent/test'
3
+ require 'fluent/plugin/out_mecab'
4
+
metadata ADDED
@@ -0,0 +1,155 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-mecab
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - MATSUMOTO Katsuyoshi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: fluentd
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: natto
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.5'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.5'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: simplecov
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: fluentd plugin for MeCab
112
+ email:
113
+ - matsumoto.katsuyoshi+github@gmail.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".gitignore"
119
+ - Gemfile
120
+ - LICENSE.txt
121
+ - README.md
122
+ - Rakefile
123
+ - fluent-plugin-mecab.gemspec
124
+ - lib/fluent/plugin/mecab.rb
125
+ - lib/fluent/plugin/mecab/version.rb
126
+ - lib/fluent/plugin/out_mecab.rb
127
+ - spec/out_mecab_spec.rb
128
+ - spec/spec_helper.rb
129
+ homepage: http://github.com/katsyoshi/fluent-plugin-mecab
130
+ licenses:
131
+ - Apache License, Version 2.0
132
+ metadata: {}
133
+ post_install_message:
134
+ rdoc_options: []
135
+ require_paths:
136
+ - lib
137
+ required_ruby_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ required_rubygems_version: !ruby/object:Gem::Requirement
143
+ requirements:
144
+ - - ">="
145
+ - !ruby/object:Gem::Version
146
+ version: '0'
147
+ requirements: []
148
+ rubyforge_project:
149
+ rubygems_version: 2.2.0
150
+ signing_key:
151
+ specification_version: 4
152
+ summary: fluentd plugin for MeCab
153
+ test_files:
154
+ - spec/out_mecab_spec.rb
155
+ - spec/spec_helper.rb