fluent-plugin-bigobject_avro 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 326049b14af5b95c7d140d443e3d1f3a965612d3
4
+ data.tar.gz: be90f9fe1309aba2ea90ccd10071cb265081ac7d
5
+ SHA512:
6
+ metadata.gz: 5be4be8a7093e6d91fd17e2e95d1551530c2a801c2d9e7231a660136b399f3d1a4c8d694511500b8da2e4cb5a6414af3eae7b4ce990ec89d15c4ea317e621533
7
+ data.tar.gz: 19145ab4285e976a6abbf12a9a7c2abc588bd1d61bb2ea91f52590135c22de451caa71590cef08bc48df9933ce3ca62ecb40f82cb1811ebc1312da4567da85ed
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fluent-plugin-bigobject.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 BigObject
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # fluent-plugin-bigobject
2
+
3
+ Fluentd output plugin for inserting data to BigObject using AVRO
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'fluent-plugin-bigobject_avro'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install fluent-plugin-bigobject_avro
20
+
21
+ ## Usage
22
+
23
+ Configure BigObject URL and the table/column to be mapped in BigObject
24
+
25
+ ```apache
26
+
27
+ # send data to BigObject using avro by providing schema_file in each table
28
+ # BigObject will create the table if it does not already exist.
29
+
30
+ <match bo.insert_avro.*>
31
+ type bigobject
32
+
33
+ log_level info
34
+
35
+ # specify the bigobject host/port to connect to
36
+ bigobject_hostname 192.168.59.103
37
+ bigobject_port 9091
38
+
39
+ remove_tag_prefix bo.insert_avro.
40
+ flush_interval 60s
41
+
42
+ <table>
43
+ pattern customer
44
+ schema_file /fluentd/input/avsc/Customer_binary.avsc
45
+ </table>
46
+ </match>
47
+
48
+ ```
49
+
50
+
51
+ ## License
52
+
53
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
54
+
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ Gem::Specification.new do |gem|
3
+ gem.name = "fluent-plugin-bigobject_avro"
4
+ gem.version = "0.0.1"
5
+ gem.authors = ["Andrea Sung"]
6
+ gem.email = ["andrea@bigobject.io"]
7
+ gem.description = %q{Fluentd output plugin to insert BIGOBJECT }
8
+ gem.summary = %q{Fluentd output plugin to insert BIGOBJECT}
9
+ gem.homepage = "https://github.com/bigobject-inc/fluent-plugin-bigobject_avro"
10
+ gem.license = "MIT"
11
+
12
+ gem.files = `git ls-files`.split($\)
13
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
14
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
15
+ gem.require_paths = ["lib"]
16
+
17
+ gem.add_runtime_dependency "fluentd"
18
+ gem.add_runtime_dependency "json"
19
+ gem.add_runtime_dependency "avro"
20
+ gem.add_development_dependency "rake"
21
+ end
@@ -0,0 +1,176 @@
1
+ class Fluent::BigObjectOutput_AVRO < Fluent::BufferedOutput
2
+
3
+ Fluent::Plugin.register_output('bigobject_avro', self)
4
+
5
+ include Fluent::SetTimeKeyMixin
6
+ include Fluent::SetTagKeyMixin
7
+
8
+ config_param :bigobject_hostname, :string
9
+ config_param :bigobject_port, :integer
10
+ config_param :remove_tag_prefix, :string, :default => nil
11
+ config_param :send_unknown_chunks, :string, :default=>true
12
+
13
+ attr_accessor :tables
14
+
15
+ unless method_defined?(:log)
16
+ define_method(:log) { $log }
17
+ end
18
+
19
+ class TableElement
20
+ include Fluent::Configurable
21
+
22
+ config_param :column_mapping, :string, :default=>nil
23
+ config_param :pattern, :string, :default=>nil
24
+ config_param :schema_file, :string
25
+
26
+ attr_reader :mpattern
27
+
28
+ def initialize(log, bo_hostname, bo_port)
29
+ super()
30
+ @log = log
31
+ @bo_hostname = bo_hostname
32
+ @bo_port = bo_port
33
+ @bo_url="http://#{@bo_hostname}:#{@bo_port}/cmd"
34
+ end
35
+
36
+ def configure(conf)
37
+ super
38
+
39
+ @avro_schema = Avro::Schema.parse(File.open(@schema_file, "rb").read)
40
+ @avro_writer = Avro::IO::DatumWriter.new(@avro_schema)
41
+
42
+ @mpattern = Fluent::MatchPattern.create(pattern)
43
+ @mapping = (@column_mapping==nil)? nil:parse_column_mapping(@column_mapping)
44
+ @log.info("column mapping for #{pattern} - #{@mapping}")
45
+ @format_proc = Proc.new { |record|
46
+ if (@mapping==nil)
47
+ record
48
+ else
49
+ new_record = {}
50
+ @mapping.each { |k, c|
51
+ new_record[c] = record[k]
52
+ }
53
+ new_record
54
+ end
55
+ }
56
+ end
57
+
58
+ #Send data to Bigobject using binary AVRO
59
+ def send_binary(chunk)
60
+
61
+ buffer = StringIO.new()
62
+ dw = Avro::DataFile::Writer.new(buffer, @avro_writer, @avro_schema)
63
+ i=0
64
+ chunk.msgpack_each { |tag, time, data|
65
+ data = @format_proc.call(data)
66
+ dw<<data
67
+ i+=1
68
+ }
69
+ dw.flush
70
+
71
+ begin
72
+ socket = TCPSocket.open(@bo_hostname, @bo_port)
73
+ begin
74
+ #timeout=60
75
+ opt = [1, 60].pack('I!I!') # { int l_onoff; int l_linger; }
76
+ socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
77
+
78
+ opt = [60, 0].pack('L!L!') # struct timeval
79
+ socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
80
+ socket.write(buffer.string)
81
+ ensure
82
+ socket.close
83
+ end
84
+
85
+ rescue Exception => e
86
+ @log.error(e.message)
87
+ raise "Failed to send_binary: #{e.message}"
88
+ end
89
+ @log.debug("sending #{i} rows to bigobject via avro")
90
+ end
91
+
92
+ def to_s
93
+ "pattern:#{pattern}, column_mapping:#{column_mapping}"
94
+ end
95
+
96
+ private
97
+ def parse_column_mapping(column_mapping_conf)
98
+ mapping = {}
99
+ column_mapping_conf.split(',').each { |column_map|
100
+ key, column = column_map.strip.split(':', 2)
101
+ column = key if column.nil?
102
+ mapping[key] = column
103
+ }
104
+ mapping
105
+ end
106
+
107
+ end #end class
108
+
109
+ def initialize
110
+ super
111
+ require 'avro'
112
+ log.info("bigobject_avro initialize")
113
+ end
114
+
115
+ def configure(conf)
116
+ super
117
+
118
+ if remove_tag_prefix = conf['remove_tag_prefix']
119
+ @remove_tag_prefix = Regexp.new('^' + Regexp.escape(remove_tag_prefix))
120
+ end
121
+
122
+ @tables = []
123
+ @default_table = nil
124
+
125
+ conf.elements.select { |e|
126
+ e.name == 'table'
127
+ }.each { |e|
128
+ te = TableElement.new(log, @bigobject_hostname, @bigobject_port)
129
+ te.configure(e)
130
+ @tables << te
131
+ }
132
+
133
+ # @tables.each {|t| puts t.to_s}
134
+ end
135
+
136
+ def start
137
+ super
138
+ log.info("bigobject_avro start")
139
+ end
140
+
141
+ def shutdown
142
+ super
143
+ end
144
+
145
+ # This method is called when an event reaches to Fluentd.
146
+ def format(tag, time, record)
147
+ [tag, time, record].to_msgpack
148
+ end
149
+
150
+ # This method is called every flush interval. Write the buffer chunk
151
+ # to files or databases here.
152
+ # 'chunk' is a buffer chunk that includes multiple formatted events.
153
+ def write(chunk)
154
+ unknownChunks = []
155
+ @tables.each { |table|
156
+ if table.mpattern.match(chunk.key)
157
+ return table.send_binary(chunk)
158
+ end
159
+ }
160
+
161
+ log.warn("unknown chunk #{chunk.key}")
162
+
163
+ end
164
+
165
+ def format_tag(tag)
166
+ if @remove_tag_prefix
167
+ tag.gsub(@remove_tag_prefix, '')
168
+ else
169
+ tag
170
+ end
171
+ end
172
+
173
+ def emit(tag, es, chain)
174
+ super(tag, es, chain, format_tag(tag))
175
+ end
176
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-bigobject_avro
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Andrea Sung
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-11-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: fluentd
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: avro
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: 'Fluentd output plugin to insert BIGOBJECT '
70
+ email:
71
+ - andrea@bigobject.io
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - Gemfile
77
+ - LICENSE.txt
78
+ - README.md
79
+ - Rakefile
80
+ - fluent-plugin-bigobject_avro.gemspec
81
+ - lib/fluent/plugin/out_bigobject_avro.rb
82
+ homepage: https://github.com/bigobject-inc/fluent-plugin-bigobject_avro
83
+ licenses:
84
+ - MIT
85
+ metadata: {}
86
+ post_install_message:
87
+ rdoc_options: []
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ requirements: []
101
+ rubyforge_project:
102
+ rubygems_version: 2.2.2
103
+ signing_key:
104
+ specification_version: 4
105
+ summary: Fluentd output plugin to insert BIGOBJECT
106
+ test_files: []