fluent-plugin-kuromoji 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 02f4aa94cb85ca45457cdb3f263dc5a0ea8504d6
4
+ data.tar.gz: 24f2acecfa81b4e2a73194d42ba3f7e90ad1d9eb
5
+ SHA512:
6
+ metadata.gz: 55347f310d7f2467072864bbd1e7aef4feaf9cb08f785d5c912b58675611575619c42f9da91cb3402c39f8ee27b2f4a4527569a0aa3ba5d946b2f38b405f3dc8
7
+ data.tar.gz: 1481afbb3dd6e2d2219bc4b794112fb63c28ea31226c1cfb7727bf581d342188f0725f27b469174ac458e80f73486d8f7f63eeb6a2cd0a55238ec976bed6c585
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /*.gem
2
+ ~*
3
+ #*
4
+ *~
5
+ .bundle
6
+ Gemfile.lock
7
+ .rbenv-version
8
+ vendor
9
+ doc/*
10
+ tmp/*
11
+ coverage
12
+ .yardoc
13
+ pkg/
14
+ .ruby-version
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --colour
2
+ --format documentation
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ rvm:
2
+ - 1.9.2
3
+ - 1.9.3
4
+ - 2.0.0
5
+ - 2.1.0
6
+ gemfile:
7
+ - Gemfile
8
+ script: "rake spec"
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ ## 0.0.1 (2014/06/21)
2
+
3
+ First release
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Hiroshi Toyama
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,100 @@
1
+
2
+ # fluent-plugin-kuromoji
3
+
4
+ Output kuromoji analysis Plugin for fluentd
5
+
6
+ required java!
7
+
8
+ ## Installation
9
+
10
+ ### td-agent(Linux)
11
+
12
+ /usr/lib64/fluent/ruby/bin/fluent-gem install fluent-plugin-kuromoji
13
+
14
+ ### td-agent(Mac)
15
+
16
+ sudo /usr/local/Cellar/td-agent/X.X.XX/bin/fluent-gem install fluent-plugin-kuromoji
17
+
18
+ ### fluentd only(bundler)
19
+
20
+ ```ruby:Gemfile
21
+ gem 'fluent-plugin-kuromoji'
22
+ ```
23
+
24
+ ## quick start
25
+
26
+ ```bash
27
+ $ git clone https://github.com/toyama0919/fluent-plugin-kuromoji
28
+ $ cd fluent-plugin-kuromoji
29
+ $ bundle install
30
+ $ bundle exec fluentd -c sample/fluent.conf -p lib/fluent/plugin/
31
+ $ echo '{ "message":"私はサッカーが大好きです" }' | fluent-cat kuromoji.input
32
+ ```
33
+
34
+ ## parameter
35
+
36
+ param | value
37
+ --------|------
38
+ target_key|analyze target key(require)
39
+ add_tag_prefix|output tag prefix(require)
40
+ dictionary_path|kuromoji dictionaly path
41
+
42
+ ## setting
43
+ ```
44
+ <match kuromoji.input>
45
+ type kuromoji
46
+ target_key message
47
+ add_tag_prefix analyzed
48
+ dictionary_path sample/userdict.txt
49
+ </match>
50
+
51
+ <match analyzed.kuromoji.input>
52
+ type stdout
53
+ </match>
54
+ ```
55
+
56
+ ## usage
57
+
58
+ ```bash
59
+ echo '{ "message":"私は朝青龍が大好きです" }' | fluent-cat kuromoji.input
60
+
61
+ # 2014-06-21 19:20:00 +0900 analyzed.kuromoji.input: {"parts_of_speech":"名詞,代名詞,一般,*","reading":"ワタシ","base_form":"私","surface_form":"私","position":0,"is_known":true,"is_user":false}
62
+ # 2014-06-21 19:20:00 +0900 analyzed.kuromoji.input: {"parts_of_speech":"助詞,係助詞,*,*","reading":"ハ","base_form":"は","surface_form":"は","position":1,"is_known":true,"is_user":false}
63
+ # 2014-06-21 19:20:00 +0900 analyzed.kuromoji.input: {"parts_of_speech":"カスタム名詞","reading":"アサショウリュウ","base_form":null,"surface_form":"朝青龍","position":2,"is_known":false,"is_user":true}
64
+ # 2014-06-21 19:20:00 +0900 analyzed.kuromoji.input: {"parts_of_speech":"助詞,格助詞,一般,*","reading":"ガ","base_form":"が","surface_form":"が","position":5,"is_known":true,"is_user":false}
65
+ # 2014-06-21 19:20:00 +0900 analyzed.kuromoji.input: {"parts_of_speech":"名詞,形容動詞語幹,*,*","reading":"ダイスキ","base_form":"大好き","surface_form":"大好き","position":6,"is_known":true,"is_user":false}
66
+ # 2014-06-21 19:20:00 +0900 analyzed.kuromoji.input: {"parts_of_speech":"助動詞,*,*,*","reading":"デス","base_form":"です","surface_form":"です","position":9,"is_known":true,"is_user":false}
67
+ ```
68
+
69
+ ## dictionaly exsample
70
+ ```csv
71
+ ##
72
+ ## This file should use UTF-8 encoding
73
+ ##
74
+ ## User dictionary format:
75
+ ## <text>,<token1> <token2> ... <tokenn>,<reading1> <reading2> ... <readingn>,<part-of-speech>
76
+ ##
77
+
78
+ # Custom segmentation for long entries
79
+ 日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
80
+ 関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,テスト名詞
81
+
82
+ # Custom reading for former sumo wrestler Asashoryu
83
+ 朝青龍,朝青龍,アサショウリュウ,カスタム名詞
84
+ ```
85
+
86
+ ## ChangeLog
87
+
88
+ See [CHANGELOG.md](CHANGELOG.md) for details.
89
+
90
+ ## Contributing
91
+
92
+ 1. Fork it
93
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
94
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
95
+ 4. Push to the branch (`git push origin my-new-feature`)
96
+ 5. Create new [Pull Request](../../pull/new/master)
97
+
98
+ ## Copyright
99
+
100
+ Copyright (c) 2014 Hiroshi Toyama. See [LICENSE](LICENSE) for details.
data/Rakefile ADDED
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rspec/core'
5
+ require 'rspec/core/rake_task'
6
+ RSpec::Core::RakeTask.new(:spec) do |spec|
7
+ spec.pattern = FileList['spec/**/*_spec.rb']
8
+ end
9
+ task :default => :spec
10
+
11
+ desc 'Open an irb session preloaded with the gem library'
12
+ task :console do
13
+ sh 'irb -rubygems -I lib'
14
+ end
15
+ task :c => :console
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "fluent-plugin-kuromoji"
6
+ gem.version = "0.0.1"
7
+ gem.authors = ["Hiroshi Toyama"]
8
+ gem.email = "toyama0919@gmail.com"
9
+ gem.homepage = "https://github.com/toyama0919/fluent-plugin-kuromoji"
10
+ gem.description = "Output kuromoji analysis Plugin for fluentd"
11
+ gem.summary = "Output kuromoji analysis Plugin for fluentd"
12
+ gem.licenses = ["MIT"]
13
+ gem.has_rdoc = false
14
+
15
+ gem.files = `git ls-files`.split("\n")
16
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ gem.require_paths = ['lib']
19
+
20
+ gem.add_runtime_dependency "fluentd"
21
+ gem.add_runtime_dependency "kuromoji-ruby"
22
+ gem.add_development_dependency "bundler"
23
+ gem.add_development_dependency "rake"
24
+ gem.add_development_dependency "spork"
25
+ gem.add_development_dependency "pry"
26
+ gem.add_development_dependency "fluent-plugin-twitter"
27
+
28
+ end
@@ -0,0 +1,30 @@
1
+ # coding: UTF-8
2
+ module Fluent
3
+ class KuromojiOutput < Output
4
+ Fluent::Plugin.register_output('kuromoji', self)
5
+
6
+ config_param :target_key, :string
7
+ config_param :add_tag_prefix, :string
8
+ config_param :dictionary_path, :string
9
+
10
+ def initialize
11
+ super
12
+ require 'kuromoji'
13
+ end
14
+
15
+ def configure(conf)
16
+ super
17
+ @core = Kuromoji::Core.new(@dictionary_path)
18
+ end
19
+
20
+ def emit(tag, es, chain)
21
+ es.each do |time, record|
22
+ tokens = @core.tokenize_with_hash(record[@target_key])
23
+ tokens.each do |token|
24
+ Fluent::Engine.emit(@add_tag_prefix + '.' + tag, time, token)
25
+ end
26
+ end
27
+ chain.next
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,26 @@
1
+ <source>
2
+ type forward
3
+ port 24224
4
+ </source>
5
+
6
+ #<source>
7
+ # type twitter
8
+ # consumer_key "#{ENV['TWITTER_CONSUMER_KEY']}" # Required
9
+ # consumer_secret "#{ENV['TWITTER_CONSUMER_SECRET']}" # Required
10
+ # oauth_token "#{ENV['OAUTH_TOKEN']}" # Required
11
+ # oauth_token_secret "#{ENV['OAUTH_TOKEN_SECRET']}" # Required
12
+ # tag kuromoji.input # Required
13
+ # timeline userstream # Required (sampling or userstream)
14
+ # output_format nest
15
+ #</source>
16
+
17
+ <match kuromoji.input>
18
+ type kuromoji
19
+ target_key message
20
+ add_tag_prefix analyzed
21
+ dictionary_path sample/userdict.txt
22
+ </match>
23
+
24
+ <match analyzed.kuromoji.input>
25
+ type stdout
26
+ </match>
@@ -0,0 +1,13 @@
1
+ ##
2
+ ## This file should use UTF-8 encoding
3
+ ##
4
+ ## User dictionary format:
5
+ ## <text>,<token1> <token2> ... <tokenn>,<reading1> <reading2> ... <readingn>,<part-of-speech>
6
+ ##
7
+
8
+ # Custom segmentation for long entries
9
+ 日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
10
+ 関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,テスト名詞
11
+
12
+ # Custom reading for former sumo wrestler Asashoryu
13
+ 朝青龍,朝青龍,アサショウリュウ,カスタム名詞
@@ -0,0 +1,22 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ class KuromojiOutputTest < Test::Unit::TestCase
5
+ def setup
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ def create_driver(conf = CONFIG, tag = 'test')
10
+ d = Fluent::Test::BufferedOutputTestDriver.new(Fluent::KuromojiOutput, tag).configure(conf)
11
+ d
12
+ end
13
+
14
+ def test_configure_error
15
+ assert_raise(Fluent::ConfigError) do
16
+ d = create_driver %[
17
+ target_key message
18
+ ]
19
+ end
20
+ end
21
+
22
+ end
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts 'Run `bundle install` to install missing gems'
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'fluent/test'
15
+ unless ENV.key?('VERBOSE')
16
+ nulllogger = Object.new
17
+ nulllogger.instance_eval do|obj|
18
+ def method_missing(method, *args)
19
+ # pass
20
+ end
21
+ end
22
+ $log = nulllogger
23
+ end
24
+
25
+ require 'fluent/plugin/out_kuromoji'
26
+
27
+ class Test::Unit::TestCase
28
+ end
metadata ADDED
@@ -0,0 +1,157 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-kuromoji
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Hiroshi Toyama
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: fluentd
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: kuromoji-ruby
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: spork
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: fluent-plugin-twitter
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: Output kuromoji analysis Plugin for fluentd
112
+ email: toyama0919@gmail.com
113
+ executables: []
114
+ extensions: []
115
+ extra_rdoc_files: []
116
+ files:
117
+ - ".gitignore"
118
+ - ".rspec"
119
+ - ".travis.yml"
120
+ - CHANGELOG.md
121
+ - Gemfile
122
+ - LICENSE
123
+ - README.md
124
+ - Rakefile
125
+ - fluent-plugin-kuromoji.gemspec
126
+ - lib/fluent/plugin/out_kuromoji.rb
127
+ - sample/fluent.conf
128
+ - sample/userdict.txt
129
+ - spec/out_kuromoji_spec.rb
130
+ - spec/spec_helper.rb
131
+ homepage: https://github.com/toyama0919/fluent-plugin-kuromoji
132
+ licenses:
133
+ - MIT
134
+ metadata: {}
135
+ post_install_message:
136
+ rdoc_options: []
137
+ require_paths:
138
+ - lib
139
+ required_ruby_version: !ruby/object:Gem::Requirement
140
+ requirements:
141
+ - - ">="
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ required_rubygems_version: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ version: '0'
149
+ requirements: []
150
+ rubyforge_project:
151
+ rubygems_version: 2.2.2
152
+ signing_key:
153
+ specification_version: 4
154
+ summary: Output kuromoji analysis Plugin for fluentd
155
+ test_files:
156
+ - spec/out_kuromoji_spec.rb
157
+ - spec/spec_helper.rb