fluent-plugin-kuromoji 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/.travis.yml +8 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +3 -0
- data/LICENSE +22 -0
- data/README.md +100 -0
- data/Rakefile +15 -0
- data/fluent-plugin-kuromoji.gemspec +28 -0
- data/lib/fluent/plugin/out_kuromoji.rb +30 -0
- data/sample/fluent.conf +26 -0
- data/sample/userdict.txt +13 -0
- data/spec/out_kuromoji_spec.rb +22 -0
- data/spec/spec_helper.rb +28 -0
- metadata +157 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 02f4aa94cb85ca45457cdb3f263dc5a0ea8504d6
|
4
|
+
data.tar.gz: 24f2acecfa81b4e2a73194d42ba3f7e90ad1d9eb
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 55347f310d7f2467072864bbd1e7aef4feaf9cb08f785d5c912b58675611575619c42f9da91cb3402c39f8ee27b2f4a4527569a0aa3ba5d946b2f38b405f3dc8
|
7
|
+
data.tar.gz: 1481afbb3dd6e2d2219bc4b794112fb63c28ea31226c1cfb7727bf581d342188f0725f27b469174ac458e80f73486d8f7f63eeb6a2cd0a55238ec976bed6c585
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Hiroshi Toyama
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
|
2
|
+
# fluent-plugin-kuromoji
|
3
|
+
|
4
|
+
Output kuromoji analysis Plugin for fluentd
|
5
|
+
|
6
|
+
required java!
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
### td-agent(Linux)
|
11
|
+
|
12
|
+
/usr/lib64/fluent/ruby/bin/fluent-gem install fluent-plugin-kuromoji
|
13
|
+
|
14
|
+
### td-agent(Mac)
|
15
|
+
|
16
|
+
sudo /usr/local/Cellar/td-agent/X.X.XX/bin/fluent-gem install fluent-plugin-kuromoji
|
17
|
+
|
18
|
+
### fluentd only(bundler)
|
19
|
+
|
20
|
+
```ruby:Gemfile
|
21
|
+
gem 'fluent-plugin-kuromoji'
|
22
|
+
```
|
23
|
+
|
24
|
+
## quick start
|
25
|
+
|
26
|
+
```bash
|
27
|
+
$ git clone https://github.com/toyama0919/fluent-plugin-kuromoji
|
28
|
+
$ cd fluent-plugin-kuromoji
|
29
|
+
$ bundle install
|
30
|
+
$ bundle exec fluentd -c sample/fluent.conf -p lib/fluent/plugin/
|
31
|
+
$ echo '{ "message":"私はサッカーが大好きです" }' | fluent-cat kuromoji.input
|
32
|
+
```
|
33
|
+
|
34
|
+
## parameter
|
35
|
+
|
36
|
+
param | value
|
37
|
+
--------|------
|
38
|
+
target_key|analyze target key(require)
|
39
|
+
add_tag_prefix|output tag prefix(require)
|
40
|
+
dictionary_path|kuromoji dictionaly path
|
41
|
+
|
42
|
+
## setting
|
43
|
+
```
|
44
|
+
<match kuromoji.input>
|
45
|
+
type kuromoji
|
46
|
+
target_key message
|
47
|
+
add_tag_prefix analyzed
|
48
|
+
dictionary_path sample/userdict.txt
|
49
|
+
</match>
|
50
|
+
|
51
|
+
<match analyzed.kuromoji.input>
|
52
|
+
type stdout
|
53
|
+
</match>
|
54
|
+
```
|
55
|
+
|
56
|
+
## usage
|
57
|
+
|
58
|
+
```bash
|
59
|
+
echo '{ "message":"私は朝青龍が大好きです" }' | fluent-cat kuromoji.input
|
60
|
+
|
61
|
+
# 2014-06-21 19:20:00 +0900 analyzed.kuromoji.input: {"parts_of_speech":"名詞,代名詞,一般,*","reading":"ワタシ","base_form":"私","surface_form":"私","position":0,"is_known":true,"is_user":false}
|
62
|
+
# 2014-06-21 19:20:00 +0900 analyzed.kuromoji.input: {"parts_of_speech":"助詞,係助詞,*,*","reading":"ハ","base_form":"は","surface_form":"は","position":1,"is_known":true,"is_user":false}
|
63
|
+
# 2014-06-21 19:20:00 +0900 analyzed.kuromoji.input: {"parts_of_speech":"カスタム名詞","reading":"アサショウリュウ","base_form":null,"surface_form":"朝青龍","position":2,"is_known":false,"is_user":true}
|
64
|
+
# 2014-06-21 19:20:00 +0900 analyzed.kuromoji.input: {"parts_of_speech":"助詞,格助詞,一般,*","reading":"ガ","base_form":"が","surface_form":"が","position":5,"is_known":true,"is_user":false}
|
65
|
+
# 2014-06-21 19:20:00 +0900 analyzed.kuromoji.input: {"parts_of_speech":"名詞,形容動詞語幹,*,*","reading":"ダイスキ","base_form":"大好き","surface_form":"大好き","position":6,"is_known":true,"is_user":false}
|
66
|
+
# 2014-06-21 19:20:00 +0900 analyzed.kuromoji.input: {"parts_of_speech":"助動詞,*,*,*","reading":"デス","base_form":"です","surface_form":"です","position":9,"is_known":true,"is_user":false}
|
67
|
+
```
|
68
|
+
|
69
|
+
## dictionaly exsample
|
70
|
+
```csv
|
71
|
+
##
|
72
|
+
## This file should use UTF-8 encoding
|
73
|
+
##
|
74
|
+
## User dictionary format:
|
75
|
+
## <text>,<token1> <token2> ... <tokenn>,<reading1> <reading2> ... <readingn>,<part-of-speech>
|
76
|
+
##
|
77
|
+
|
78
|
+
# Custom segmentation for long entries
|
79
|
+
日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
|
80
|
+
関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,テスト名詞
|
81
|
+
|
82
|
+
# Custom reading for former sumo wrestler Asashoryu
|
83
|
+
朝青龍,朝青龍,アサショウリュウ,カスタム名詞
|
84
|
+
```
|
85
|
+
|
86
|
+
## ChangeLog
|
87
|
+
|
88
|
+
See [CHANGELOG.md](CHANGELOG.md) for details.
|
89
|
+
|
90
|
+
## Contributing
|
91
|
+
|
92
|
+
1. Fork it
|
93
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
94
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
95
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
96
|
+
5. Create new [Pull Request](../../pull/new/master)
|
97
|
+
|
98
|
+
## Copyright
|
99
|
+
|
100
|
+
Copyright (c) 2014 Hiroshi Toyama. See [LICENSE](LICENSE) for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
|
4
|
+
require 'rspec/core'
|
5
|
+
require 'rspec/core/rake_task'
|
6
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
7
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
8
|
+
end
|
9
|
+
task :default => :spec
|
10
|
+
|
11
|
+
desc 'Open an irb session preloaded with the gem library'
|
12
|
+
task :console do
|
13
|
+
sh 'irb -rubygems -I lib'
|
14
|
+
end
|
15
|
+
task :c => :console
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.name = "fluent-plugin-kuromoji"
|
6
|
+
gem.version = "0.0.1"
|
7
|
+
gem.authors = ["Hiroshi Toyama"]
|
8
|
+
gem.email = "toyama0919@gmail.com"
|
9
|
+
gem.homepage = "https://github.com/toyama0919/fluent-plugin-kuromoji"
|
10
|
+
gem.description = "Output kuromoji analysis Plugin for fluentd"
|
11
|
+
gem.summary = "Output kuromoji analysis Plugin for fluentd"
|
12
|
+
gem.licenses = ["MIT"]
|
13
|
+
gem.has_rdoc = false
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split("\n")
|
16
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
gem.require_paths = ['lib']
|
19
|
+
|
20
|
+
gem.add_runtime_dependency "fluentd"
|
21
|
+
gem.add_runtime_dependency "kuromoji-ruby"
|
22
|
+
gem.add_development_dependency "bundler"
|
23
|
+
gem.add_development_dependency "rake"
|
24
|
+
gem.add_development_dependency "spork"
|
25
|
+
gem.add_development_dependency "pry"
|
26
|
+
gem.add_development_dependency "fluent-plugin-twitter"
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
module Fluent
|
3
|
+
class KuromojiOutput < Output
|
4
|
+
Fluent::Plugin.register_output('kuromoji', self)
|
5
|
+
|
6
|
+
config_param :target_key, :string
|
7
|
+
config_param :add_tag_prefix, :string
|
8
|
+
config_param :dictionary_path, :string
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
super
|
12
|
+
require 'kuromoji'
|
13
|
+
end
|
14
|
+
|
15
|
+
def configure(conf)
|
16
|
+
super
|
17
|
+
@core = Kuromoji::Core.new(@dictionary_path)
|
18
|
+
end
|
19
|
+
|
20
|
+
def emit(tag, es, chain)
|
21
|
+
es.each do |time, record|
|
22
|
+
tokens = @core.tokenize_with_hash(record[@target_key])
|
23
|
+
tokens.each do |token|
|
24
|
+
Fluent::Engine.emit(@add_tag_prefix + '.' + tag, time, token)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
chain.next
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/sample/fluent.conf
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
<source>
|
2
|
+
type forward
|
3
|
+
port 24224
|
4
|
+
</source>
|
5
|
+
|
6
|
+
#<source>
|
7
|
+
# type twitter
|
8
|
+
# consumer_key "#{ENV['TWITTER_CONSUMER_KEY']}" # Required
|
9
|
+
# consumer_secret "#{ENV['TWITTER_CONSUMER_SECRET']}" # Required
|
10
|
+
# oauth_token "#{ENV['OAUTH_TOKEN']}" # Required
|
11
|
+
# oauth_token_secret "#{ENV['OAUTH_TOKEN_SECRET']}" # Required
|
12
|
+
# tag kuromoji.input # Required
|
13
|
+
# timeline userstream # Required (sampling or userstream)
|
14
|
+
# output_format nest
|
15
|
+
#</source>
|
16
|
+
|
17
|
+
<match kuromoji.input>
|
18
|
+
type kuromoji
|
19
|
+
target_key message
|
20
|
+
add_tag_prefix analyzed
|
21
|
+
dictionary_path sample/userdict.txt
|
22
|
+
</match>
|
23
|
+
|
24
|
+
<match analyzed.kuromoji.input>
|
25
|
+
type stdout
|
26
|
+
</match>
|
data/sample/userdict.txt
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
##
|
2
|
+
## This file should use UTF-8 encoding
|
3
|
+
##
|
4
|
+
## User dictionary format:
|
5
|
+
## <text>,<token1> <token2> ... <tokenn>,<reading1> <reading2> ... <readingn>,<part-of-speech>
|
6
|
+
##
|
7
|
+
|
8
|
+
# Custom segmentation for long entries
|
9
|
+
日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
|
10
|
+
関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,テスト名詞
|
11
|
+
|
12
|
+
# Custom reading for former sumo wrestler Asashoryu
|
13
|
+
朝青龍,朝青龍,アサショウリュウ,カスタム名詞
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
class KuromojiOutputTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
Fluent::Test.setup
|
7
|
+
end
|
8
|
+
|
9
|
+
def create_driver(conf = CONFIG, tag = 'test')
|
10
|
+
d = Fluent::Test::BufferedOutputTestDriver.new(Fluent::KuromojiOutput, tag).configure(conf)
|
11
|
+
d
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_configure_error
|
15
|
+
assert_raise(Fluent::ConfigError) do
|
16
|
+
d = create_driver %[
|
17
|
+
target_key message
|
18
|
+
]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts 'Run `bundle install` to install missing gems'
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
require 'fluent/test'
|
15
|
+
unless ENV.key?('VERBOSE')
|
16
|
+
nulllogger = Object.new
|
17
|
+
nulllogger.instance_eval do|obj|
|
18
|
+
def method_missing(method, *args)
|
19
|
+
# pass
|
20
|
+
end
|
21
|
+
end
|
22
|
+
$log = nulllogger
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'fluent/plugin/out_kuromoji'
|
26
|
+
|
27
|
+
class Test::Unit::TestCase
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fluent-plugin-kuromoji
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Hiroshi Toyama
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-06-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: fluentd
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: kuromoji-ruby
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: spork
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pry
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: fluent-plugin-twitter
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description: Output kuromoji analysis Plugin for fluentd
|
112
|
+
email: toyama0919@gmail.com
|
113
|
+
executables: []
|
114
|
+
extensions: []
|
115
|
+
extra_rdoc_files: []
|
116
|
+
files:
|
117
|
+
- ".gitignore"
|
118
|
+
- ".rspec"
|
119
|
+
- ".travis.yml"
|
120
|
+
- CHANGELOG.md
|
121
|
+
- Gemfile
|
122
|
+
- LICENSE
|
123
|
+
- README.md
|
124
|
+
- Rakefile
|
125
|
+
- fluent-plugin-kuromoji.gemspec
|
126
|
+
- lib/fluent/plugin/out_kuromoji.rb
|
127
|
+
- sample/fluent.conf
|
128
|
+
- sample/userdict.txt
|
129
|
+
- spec/out_kuromoji_spec.rb
|
130
|
+
- spec/spec_helper.rb
|
131
|
+
homepage: https://github.com/toyama0919/fluent-plugin-kuromoji
|
132
|
+
licenses:
|
133
|
+
- MIT
|
134
|
+
metadata: {}
|
135
|
+
post_install_message:
|
136
|
+
rdoc_options: []
|
137
|
+
require_paths:
|
138
|
+
- lib
|
139
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
140
|
+
requirements:
|
141
|
+
- - ">="
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '0'
|
144
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
145
|
+
requirements:
|
146
|
+
- - ">="
|
147
|
+
- !ruby/object:Gem::Version
|
148
|
+
version: '0'
|
149
|
+
requirements: []
|
150
|
+
rubyforge_project:
|
151
|
+
rubygems_version: 2.2.2
|
152
|
+
signing_key:
|
153
|
+
specification_version: 4
|
154
|
+
summary: Output kuromoji analysis Plugin for fluentd
|
155
|
+
test_files:
|
156
|
+
- spec/out_kuromoji_spec.rb
|
157
|
+
- spec/spec_helper.rb
|