fluent-plugin-bigobject 0.0.10 → 0.0.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +43 -23
- data/fluent-plugin-bigobject.gemspec +4 -5
- data/lib/fluent/plugin/out_bigobject.rb +79 -81
- metadata +5 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 476a311048599f15ef608f470019aee3755f480a
|
4
|
+
data.tar.gz: b5ed61454e7d2aed2bff4d7aef43707b7b3b7d1a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0f3e2399d792c2fc2b92ba74c15a8cfa2dca4e67734553006c3c9730f3da84f53b46f0954d4f51fc40587f72c24b51f7c6e731451e703a6465a5b316938847d
|
7
|
+
data.tar.gz: a5ff36959b8f31f02e28c2be9b464e18eee2d35557599c9da6a13d53ad35f1b04552d6d910c37fc6708c2b7a17662091817a2c6ff09f2d1ddb2a7eecd12c2d1c
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -24,27 +24,46 @@ Configure BigObject URL and the table/column to be mapped in BigObject
|
|
24
24
|
|
25
25
|
```apache
|
26
26
|
|
27
|
-
#
|
28
|
-
<
|
29
|
-
type
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
27
|
+
#sample source to read csv file
|
28
|
+
<source>
|
29
|
+
type tail
|
30
|
+
|
31
|
+
#path- where you placed your input data
|
32
|
+
path ./input/Customer.csv
|
33
|
+
|
34
|
+
# pos_file where you record file position
|
35
|
+
pos_file ./log/customer.log.pos
|
36
|
+
|
37
|
+
# for bigobject output plugin, use tag bigobject.${table_pattern}.${event}.${primary_key}
|
38
|
+
# ${primary_key} is not needed for insert
|
39
|
+
tag bigobject.cust.insert
|
40
|
+
|
41
|
+
#input file format
|
42
|
+
format csv
|
43
|
+
|
44
|
+
# keys - columns in csv file
|
45
|
+
keys id,name,language,state,company,gender,age
|
46
|
+
|
47
|
+
#types - string/bool/integer/float/time/array
|
48
|
+
types age1:integer
|
49
|
+
|
50
|
+
</source>
|
51
|
+
|
52
|
+
# Send data to BigObject using Restful API. Tables need to be created in advance in BigObject.
|
53
|
+
# depending on the event in tag received, will send data to BigObject for insert/update/delete.
|
54
|
+
#
|
55
|
+
# Tag for each event - bigobject.${table_pattern}.${event}.${primary_key}.
|
56
|
+
# ${table_pattern} : will match to the <pattern> in <table> section of bigobject output plugin
|
57
|
+
# ${event} : valid event type by insert/update/delete.
|
58
|
+
# ${primary_key} : the primary key for table, optional for insert event.
|
59
|
+
# if primary_key is integer type in BigObject, set bo_primary_key_is_int to true
|
60
|
+
#
|
61
|
+
# Eg:
|
62
|
+
# tag bigobject.cust.insert ==> INSERT INTO <table> VALUES ...
|
63
|
+
# tag bigobject.cust.delete.id ==> DELETE FROM <table> WHERE id=...
|
64
|
+
# tag bigobject.cust.update.id ==> UPDATE <table> SET ... WHERE id=...
|
65
|
+
|
66
|
+
<match bigobject.**>
|
48
67
|
type bigobject
|
49
68
|
|
50
69
|
log_level info
|
@@ -53,14 +72,15 @@ Configure BigObject URL and the table/column to be mapped in BigObject
|
|
53
72
|
bigobject_hostname 192.168.59.103
|
54
73
|
bigobject_port 9090
|
55
74
|
|
56
|
-
remove_tag_prefix
|
75
|
+
remove_tag_prefix bigobject.
|
57
76
|
flush_interval 60s
|
58
77
|
|
59
78
|
<table>
|
60
79
|
table Customer
|
61
|
-
pattern
|
80
|
+
pattern cust
|
62
81
|
|
63
82
|
#optional-
|
83
|
+
#bo_primary_key_is_int true #defualts to false
|
64
84
|
#column_mapping id,name,language,state,company,gender,age
|
65
85
|
#bo_workspace
|
66
86
|
#bo_opts
|
@@ -1,12 +1,12 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |gem|
|
3
3
|
gem.name = "fluent-plugin-bigobject"
|
4
|
-
gem.version = "0.0.
|
4
|
+
gem.version = "0.0.12"
|
5
5
|
gem.authors = ["Andrea Sung"]
|
6
6
|
gem.email = ["andrea@bigobject.io"]
|
7
|
-
gem.description = %q{Fluentd output plugin to insert BIGOBJECT }
|
8
|
-
gem.summary = %q{Fluentd output plugin to insert BIGOBJECT}
|
9
|
-
gem.homepage = "https://github.com/
|
7
|
+
gem.description = %q{Fluentd output plugin to insert/update/delete data in BIGOBJECT }
|
8
|
+
gem.summary = %q{Fluentd output plugin to insert/update/delete data in BIGOBJECT}
|
9
|
+
gem.homepage = "https://github.com/bigobject-inc/fluent-plugin-bigobject"
|
10
10
|
gem.license = "MIT"
|
11
11
|
|
12
12
|
gem.files = `git ls-files`.split($\)
|
@@ -17,6 +17,5 @@ Gem::Specification.new do |gem|
|
|
17
17
|
gem.add_runtime_dependency "fluentd"
|
18
18
|
gem.add_runtime_dependency "rest-client"
|
19
19
|
gem.add_runtime_dependency "json"
|
20
|
-
gem.add_runtime_dependency "avro"
|
21
20
|
gem.add_development_dependency "rake"
|
22
21
|
end
|
@@ -8,7 +8,10 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
8
8
|
config_param :bigobject_hostname, :string
|
9
9
|
config_param :bigobject_port, :integer
|
10
10
|
config_param :remove_tag_prefix, :string, :default => nil
|
11
|
-
config_param :
|
11
|
+
config_param :tag_format, :string, :default => nil
|
12
|
+
|
13
|
+
# DEFAULT_TAG_FORMAT = /(?<table_name>[^\.]+)\.(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$/
|
14
|
+
DEFAULT_TAG_FORMAT = /^(?<table_name>[^\.]+)\.(?<event>[^\.]+)(\.(?<primary_key>[^\.]+))*/
|
12
15
|
|
13
16
|
attr_accessor :tables
|
14
17
|
|
@@ -19,35 +22,26 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
19
22
|
class TableElement
|
20
23
|
include Fluent::Configurable
|
21
24
|
|
22
|
-
config_param :table, :string
|
25
|
+
config_param :table, :string
|
23
26
|
config_param :column_mapping, :string, :default=>nil
|
24
27
|
config_param :pattern, :string, :default=>nil
|
25
28
|
config_param :bo_workspace, :string, :default=>nil
|
26
29
|
config_param :bo_opts, :string, :default=>nil
|
27
|
-
config_param :
|
30
|
+
config_param :bo_primary_key_is_int, :bool, :default=>false
|
28
31
|
|
29
32
|
attr_reader :mpattern
|
30
33
|
|
31
|
-
def initialize(log, bo_hostname, bo_port)
|
34
|
+
def initialize(log, bo_hostname, bo_port, tag_format)
|
32
35
|
super()
|
33
36
|
@log = log
|
34
37
|
@bo_hostname = bo_hostname
|
35
38
|
@bo_port = bo_port
|
36
39
|
@bo_url="http://#{@bo_hostname}:#{@bo_port}/cmd"
|
40
|
+
@tag_format = tag_format
|
37
41
|
end
|
38
42
|
|
39
43
|
def configure(conf)
|
40
44
|
super
|
41
|
-
if (@table==nil)&&(@schema_file==nil)
|
42
|
-
raise "Table name and schema_file cannot be both nil. Please specify <schema_file> if using avro input or <table> is using restful api."
|
43
|
-
end
|
44
|
-
if (isBinary)
|
45
|
-
@avro_schema = Avro::Schema.parse(File.open(@schema_file, "rb").read)
|
46
|
-
@avro_writer = Avro::IO::DatumWriter.new(@avro_schema)
|
47
|
-
else
|
48
|
-
@avro_schema = nil
|
49
|
-
@avro_writer = nil
|
50
|
-
end
|
51
45
|
|
52
46
|
@mpattern = Fluent::MatchPattern.create(pattern)
|
53
47
|
@mapping = (@column_mapping==nil)? nil:parse_column_mapping(@column_mapping)
|
@@ -65,82 +59,71 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
65
59
|
}
|
66
60
|
end
|
67
61
|
|
68
|
-
def
|
69
|
-
|
62
|
+
def getPkeyValue(value)
|
63
|
+
if (@bo_primary_key_is_int)
|
64
|
+
return value
|
65
|
+
else
|
66
|
+
return"\"#{value}\""
|
67
|
+
end
|
70
68
|
end
|
71
|
-
|
69
|
+
|
72
70
|
#Send Data to Bigobject using Restful API
|
73
|
-
def
|
74
|
-
|
75
|
-
|
71
|
+
def send(chunk)
|
72
|
+
insertStmts = Array.new
|
73
|
+
deleteStmts = Array.new
|
74
|
+
|
76
75
|
columns = nil
|
77
76
|
chunk.msgpack_each { |tag, time, data|
|
77
|
+
tag_parts = tag.match(@tag_format)
|
78
|
+
target_event = tag_parts['event']
|
79
|
+
id_key = tag_parts['primary_key']
|
80
|
+
|
78
81
|
keys = Array.new
|
79
82
|
values = Array.new
|
80
83
|
data = @format_proc.call(data)
|
81
84
|
data.keys.sort.each do |key|
|
82
|
-
|
83
|
-
|
85
|
+
keys << key
|
86
|
+
values << data[key].to_json
|
84
87
|
end
|
85
|
-
|
86
|
-
|
88
|
+
|
89
|
+
if (target_event=='insert')
|
90
|
+
if columns.to_s.empty?
|
91
|
+
columns = "(#{keys.join(",")})"
|
92
|
+
end
|
93
|
+
insertStmts.push("(#{values.join(",")})")
|
94
|
+
elsif (target_event=='update')
|
95
|
+
pkey=""
|
96
|
+
updates = Array.new
|
97
|
+
keys.zip(values) { |key, value|
|
98
|
+
if (key==id_key)
|
99
|
+
pkey = getPkeyValue(value)
|
100
|
+
else
|
101
|
+
updates.push("#{key}=#{value}")
|
102
|
+
end
|
103
|
+
}
|
104
|
+
sendStmt = "UPDATE #{table} SET #{updates.join(",")} WHERE #{id_key}=#{pkey}"
|
105
|
+
sendBO(@bo_url, sendStmt)
|
106
|
+
elsif (target_event=='delete')
|
107
|
+
keys.zip(values) { |key, value|
|
108
|
+
if (key==id_key)
|
109
|
+
pkey = getPkeyValue(value)
|
110
|
+
end
|
111
|
+
deleteStmts.push("#{id_key}=#{pkey}")
|
112
|
+
}
|
87
113
|
end
|
88
|
-
stmts.push("(#{values.join(",")})")
|
89
|
-
#stmts.push("(\"#{values.join("\",\"")}\")")
|
90
|
-
i+=1
|
91
114
|
}
|
92
115
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
@log.error("[BigObject] #{err}")
|
99
|
-
end
|
100
|
-
@log.debug("bigobject insert #{i} rows")
|
101
|
-
|
102
|
-
end
|
103
|
-
|
104
|
-
#Send data to Bigobject using binary AVRO
|
105
|
-
def send_binary(chunk)
|
116
|
+
if insertStmts.length>0
|
117
|
+
sendStmt = "INSERT INTO #{@table} #{columns} VALUES " + insertStmts.join(",")
|
118
|
+
sendBO(@bo_url, sendStmt)
|
119
|
+
@log.debug("sending #{insertStmts.length} rows to bigobject for insert via Restful API")
|
120
|
+
end
|
106
121
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
data = @format_proc.call(data)
|
112
|
-
dw<<data
|
113
|
-
i+=1
|
114
|
-
}
|
115
|
-
dw.flush
|
116
|
-
|
117
|
-
begin
|
118
|
-
socket = TCPSocket.open(@bo_hostname, @bo_port)
|
119
|
-
begin
|
120
|
-
#timeout=60
|
121
|
-
opt = [1, 60].pack('I!I!') # { int l_onoff; int l_linger; }
|
122
|
-
socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
123
|
-
|
124
|
-
opt = [60, 0].pack('L!L!') # struct timeval
|
125
|
-
socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
126
|
-
socket.write(buffer.string)
|
127
|
-
ensure
|
128
|
-
socket.close
|
129
|
-
end
|
130
|
-
|
131
|
-
rescue Exception => e
|
132
|
-
@log.error(e.message)
|
133
|
-
raise "Failed to send_binary: #{e.message}"
|
122
|
+
if deleteStmts.length>0
|
123
|
+
sendStmt = "DELETE FROM #{@table} WHERE " + deleteStmts.join(" or ")
|
124
|
+
sendBO(@bo_url, sendStmt)
|
125
|
+
@log.debug("sending #{deleteStmts.length} rows to bigobject for delete via Restful API")
|
134
126
|
end
|
135
|
-
@log.debug("bigobject send #{i} rows")
|
136
|
-
end
|
137
|
-
|
138
|
-
def send(chunk)
|
139
|
-
if (isBinary)
|
140
|
-
send_binary(chunk)
|
141
|
-
else
|
142
|
-
send_rest(chunk)
|
143
|
-
end
|
144
127
|
end
|
145
128
|
|
146
129
|
def to_s
|
@@ -181,7 +164,12 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
181
164
|
raise "Failed to sendBO: #{e.message}"
|
182
165
|
end
|
183
166
|
|
184
|
-
|
167
|
+
parsed = JSON.parse(resp)
|
168
|
+
err = parsed['Err']
|
169
|
+
if (err.to_s!='')
|
170
|
+
@log.error("[BigObject] #{err}")
|
171
|
+
end
|
172
|
+
|
185
173
|
end
|
186
174
|
|
187
175
|
end #end class
|
@@ -190,7 +178,6 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
190
178
|
super
|
191
179
|
require 'rest-client'
|
192
180
|
require 'json'
|
193
|
-
require 'avro'
|
194
181
|
log.info("bigobject initialize")
|
195
182
|
end
|
196
183
|
|
@@ -201,13 +188,19 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
201
188
|
@remove_tag_prefix = Regexp.new('^' + Regexp.escape(remove_tag_prefix))
|
202
189
|
end
|
203
190
|
|
191
|
+
if @tag_format.nil? || @tag_format == DEFAULT_TAG_FORMAT
|
192
|
+
@tag_format = DEFAULT_TAG_FORMAT
|
193
|
+
else
|
194
|
+
@tag_format = Regexp.new(conf['tag_format'])
|
195
|
+
end
|
196
|
+
|
204
197
|
@tables = []
|
205
198
|
@default_table = nil
|
206
199
|
|
207
200
|
conf.elements.select { |e|
|
208
201
|
e.name == 'table'
|
209
202
|
}.each { |e|
|
210
|
-
te = TableElement.new(log, @bigobject_hostname, @bigobject_port)
|
203
|
+
te = TableElement.new(log, @bigobject_hostname, @bigobject_port, @tag_format)
|
211
204
|
te.configure(e)
|
212
205
|
@tables << te
|
213
206
|
}
|
@@ -234,8 +227,12 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
234
227
|
# 'chunk' is a buffer chunk that includes multiple formatted events.
|
235
228
|
def write(chunk)
|
236
229
|
unknownChunks = []
|
230
|
+
tag = chunk.key
|
231
|
+
tag_parts = tag.match(@tag_format)
|
232
|
+
target_table = tag_parts['table_name']
|
233
|
+
|
237
234
|
@tables.each { |table|
|
238
|
-
if table.mpattern.match(
|
235
|
+
if table.mpattern.match(target_table)
|
239
236
|
return table.send(chunk)
|
240
237
|
end
|
241
238
|
}
|
@@ -253,6 +250,7 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
253
250
|
end
|
254
251
|
|
255
252
|
def emit(tag, es, chain)
|
256
|
-
|
253
|
+
nt = format_tag(tag)
|
254
|
+
super(nt, es, chain, nt)
|
257
255
|
end
|
258
256
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigobject
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrea Sung
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fluentd
|
@@ -52,20 +52,6 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: avro
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
56
|
name: rake
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,7 +66,7 @@ dependencies:
|
|
80
66
|
- - ">="
|
81
67
|
- !ruby/object:Gem::Version
|
82
68
|
version: '0'
|
83
|
-
description: 'Fluentd output plugin to insert BIGOBJECT '
|
69
|
+
description: 'Fluentd output plugin to insert/update/delete data in BIGOBJECT '
|
84
70
|
email:
|
85
71
|
- andrea@bigobject.io
|
86
72
|
executables: []
|
@@ -94,7 +80,7 @@ files:
|
|
94
80
|
- Rakefile
|
95
81
|
- fluent-plugin-bigobject.gemspec
|
96
82
|
- lib/fluent/plugin/out_bigobject.rb
|
97
|
-
homepage: https://github.com/
|
83
|
+
homepage: https://github.com/bigobject-inc/fluent-plugin-bigobject
|
98
84
|
licenses:
|
99
85
|
- MIT
|
100
86
|
metadata: {}
|
@@ -117,5 +103,5 @@ rubyforge_project:
|
|
117
103
|
rubygems_version: 2.2.2
|
118
104
|
signing_key:
|
119
105
|
specification_version: 4
|
120
|
-
summary: Fluentd output plugin to insert BIGOBJECT
|
106
|
+
summary: Fluentd output plugin to insert/update/delete data in BIGOBJECT
|
121
107
|
test_files: []
|