fluent-plugin-bigobject 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +43 -23
- data/fluent-plugin-bigobject.gemspec +4 -5
- data/lib/fluent/plugin/out_bigobject.rb +79 -81
- metadata +5 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 476a311048599f15ef608f470019aee3755f480a
|
4
|
+
data.tar.gz: b5ed61454e7d2aed2bff4d7aef43707b7b3b7d1a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0f3e2399d792c2fc2b92ba74c15a8cfa2dca4e67734553006c3c9730f3da84f53b46f0954d4f51fc40587f72c24b51f7c6e731451e703a6465a5b316938847d
|
7
|
+
data.tar.gz: a5ff36959b8f31f02e28c2be9b464e18eee2d35557599c9da6a13d53ad35f1b04552d6d910c37fc6708c2b7a17662091817a2c6ff09f2d1ddb2a7eecd12c2d1c
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -24,27 +24,46 @@ Configure BigObject URL and the table/column to be mapped in BigObject
|
|
24
24
|
|
25
25
|
```apache
|
26
26
|
|
27
|
-
#
|
28
|
-
<
|
29
|
-
type
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
27
|
+
#sample source to read csv file
|
28
|
+
<source>
|
29
|
+
type tail
|
30
|
+
|
31
|
+
#path- where you placed your input data
|
32
|
+
path ./input/Customer.csv
|
33
|
+
|
34
|
+
# pos_file where you record file position
|
35
|
+
pos_file ./log/customer.log.pos
|
36
|
+
|
37
|
+
# for bigobject output plugin, use tag bigobject.${table_pattern}.${event}.${primary_key}
|
38
|
+
# ${primary_key} is not needed for insert
|
39
|
+
tag bigobject.cust.insert
|
40
|
+
|
41
|
+
#input file format
|
42
|
+
format csv
|
43
|
+
|
44
|
+
# keys - columns in csv file
|
45
|
+
keys id,name,language,state,company,gender,age
|
46
|
+
|
47
|
+
#types - string/bool/integer/float/time/array
|
48
|
+
types age1:integer
|
49
|
+
|
50
|
+
</source>
|
51
|
+
|
52
|
+
# Send data to BigObject using Restful API. Tables need to be created in advance in BigObject.
|
53
|
+
# depending on the event in tag received, will send data to BigObject for insert/update/delete.
|
54
|
+
#
|
55
|
+
# Tag for each event - bigobject.${table_pattern}.${event}.${primary_key}.
|
56
|
+
# ${table_pattern} : will match to the <pattern> in <table> section of bigobject output plugin
|
57
|
+
# ${event} : valid event type by insert/update/delete.
|
58
|
+
# ${primary_key} : the primary key for table, optional for insert event.
|
59
|
+
# if primary_key is integer type in BigObject, set bo_primary_key_is_int to true
|
60
|
+
#
|
61
|
+
# Eg:
|
62
|
+
# tag bigobject.cust.insert ==> INSERT INTO <table> VALUES ...
|
63
|
+
# tag bigobject.cust.delete.id ==> DELETE FROM <table> WHERE id=...
|
64
|
+
# tag bigobject.cust.update.id ==> UPDATE <table> SET ... WHERE id=...
|
65
|
+
|
66
|
+
<match bigobject.**>
|
48
67
|
type bigobject
|
49
68
|
|
50
69
|
log_level info
|
@@ -53,14 +72,15 @@ Configure BigObject URL and the table/column to be mapped in BigObject
|
|
53
72
|
bigobject_hostname 192.168.59.103
|
54
73
|
bigobject_port 9090
|
55
74
|
|
56
|
-
remove_tag_prefix
|
75
|
+
remove_tag_prefix bigobject.
|
57
76
|
flush_interval 60s
|
58
77
|
|
59
78
|
<table>
|
60
79
|
table Customer
|
61
|
-
pattern
|
80
|
+
pattern cust
|
62
81
|
|
63
82
|
#optional-
|
83
|
+
#bo_primary_key_is_int true #defualts to false
|
64
84
|
#column_mapping id,name,language,state,company,gender,age
|
65
85
|
#bo_workspace
|
66
86
|
#bo_opts
|
@@ -1,12 +1,12 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |gem|
|
3
3
|
gem.name = "fluent-plugin-bigobject"
|
4
|
-
gem.version = "0.0.
|
4
|
+
gem.version = "0.0.12"
|
5
5
|
gem.authors = ["Andrea Sung"]
|
6
6
|
gem.email = ["andrea@bigobject.io"]
|
7
|
-
gem.description = %q{Fluentd output plugin to insert BIGOBJECT }
|
8
|
-
gem.summary = %q{Fluentd output plugin to insert BIGOBJECT}
|
9
|
-
gem.homepage = "https://github.com/
|
7
|
+
gem.description = %q{Fluentd output plugin to insert/update/delete data in BIGOBJECT }
|
8
|
+
gem.summary = %q{Fluentd output plugin to insert/update/delete data in BIGOBJECT}
|
9
|
+
gem.homepage = "https://github.com/bigobject-inc/fluent-plugin-bigobject"
|
10
10
|
gem.license = "MIT"
|
11
11
|
|
12
12
|
gem.files = `git ls-files`.split($\)
|
@@ -17,6 +17,5 @@ Gem::Specification.new do |gem|
|
|
17
17
|
gem.add_runtime_dependency "fluentd"
|
18
18
|
gem.add_runtime_dependency "rest-client"
|
19
19
|
gem.add_runtime_dependency "json"
|
20
|
-
gem.add_runtime_dependency "avro"
|
21
20
|
gem.add_development_dependency "rake"
|
22
21
|
end
|
@@ -8,7 +8,10 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
8
8
|
config_param :bigobject_hostname, :string
|
9
9
|
config_param :bigobject_port, :integer
|
10
10
|
config_param :remove_tag_prefix, :string, :default => nil
|
11
|
-
config_param :
|
11
|
+
config_param :tag_format, :string, :default => nil
|
12
|
+
|
13
|
+
# DEFAULT_TAG_FORMAT = /(?<table_name>[^\.]+)\.(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$/
|
14
|
+
DEFAULT_TAG_FORMAT = /^(?<table_name>[^\.]+)\.(?<event>[^\.]+)(\.(?<primary_key>[^\.]+))*/
|
12
15
|
|
13
16
|
attr_accessor :tables
|
14
17
|
|
@@ -19,35 +22,26 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
19
22
|
class TableElement
|
20
23
|
include Fluent::Configurable
|
21
24
|
|
22
|
-
config_param :table, :string
|
25
|
+
config_param :table, :string
|
23
26
|
config_param :column_mapping, :string, :default=>nil
|
24
27
|
config_param :pattern, :string, :default=>nil
|
25
28
|
config_param :bo_workspace, :string, :default=>nil
|
26
29
|
config_param :bo_opts, :string, :default=>nil
|
27
|
-
config_param :
|
30
|
+
config_param :bo_primary_key_is_int, :bool, :default=>false
|
28
31
|
|
29
32
|
attr_reader :mpattern
|
30
33
|
|
31
|
-
def initialize(log, bo_hostname, bo_port)
|
34
|
+
def initialize(log, bo_hostname, bo_port, tag_format)
|
32
35
|
super()
|
33
36
|
@log = log
|
34
37
|
@bo_hostname = bo_hostname
|
35
38
|
@bo_port = bo_port
|
36
39
|
@bo_url="http://#{@bo_hostname}:#{@bo_port}/cmd"
|
40
|
+
@tag_format = tag_format
|
37
41
|
end
|
38
42
|
|
39
43
|
def configure(conf)
|
40
44
|
super
|
41
|
-
if (@table==nil)&&(@schema_file==nil)
|
42
|
-
raise "Table name and schema_file cannot be both nil. Please specify <schema_file> if using avro input or <table> is using restful api."
|
43
|
-
end
|
44
|
-
if (isBinary)
|
45
|
-
@avro_schema = Avro::Schema.parse(File.open(@schema_file, "rb").read)
|
46
|
-
@avro_writer = Avro::IO::DatumWriter.new(@avro_schema)
|
47
|
-
else
|
48
|
-
@avro_schema = nil
|
49
|
-
@avro_writer = nil
|
50
|
-
end
|
51
45
|
|
52
46
|
@mpattern = Fluent::MatchPattern.create(pattern)
|
53
47
|
@mapping = (@column_mapping==nil)? nil:parse_column_mapping(@column_mapping)
|
@@ -65,82 +59,71 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
65
59
|
}
|
66
60
|
end
|
67
61
|
|
68
|
-
def
|
69
|
-
|
62
|
+
def getPkeyValue(value)
|
63
|
+
if (@bo_primary_key_is_int)
|
64
|
+
return value
|
65
|
+
else
|
66
|
+
return"\"#{value}\""
|
67
|
+
end
|
70
68
|
end
|
71
|
-
|
69
|
+
|
72
70
|
#Send Data to Bigobject using Restful API
|
73
|
-
def
|
74
|
-
|
75
|
-
|
71
|
+
def send(chunk)
|
72
|
+
insertStmts = Array.new
|
73
|
+
deleteStmts = Array.new
|
74
|
+
|
76
75
|
columns = nil
|
77
76
|
chunk.msgpack_each { |tag, time, data|
|
77
|
+
tag_parts = tag.match(@tag_format)
|
78
|
+
target_event = tag_parts['event']
|
79
|
+
id_key = tag_parts['primary_key']
|
80
|
+
|
78
81
|
keys = Array.new
|
79
82
|
values = Array.new
|
80
83
|
data = @format_proc.call(data)
|
81
84
|
data.keys.sort.each do |key|
|
82
|
-
|
83
|
-
|
85
|
+
keys << key
|
86
|
+
values << data[key].to_json
|
84
87
|
end
|
85
|
-
|
86
|
-
|
88
|
+
|
89
|
+
if (target_event=='insert')
|
90
|
+
if columns.to_s.empty?
|
91
|
+
columns = "(#{keys.join(",")})"
|
92
|
+
end
|
93
|
+
insertStmts.push("(#{values.join(",")})")
|
94
|
+
elsif (target_event=='update')
|
95
|
+
pkey=""
|
96
|
+
updates = Array.new
|
97
|
+
keys.zip(values) { |key, value|
|
98
|
+
if (key==id_key)
|
99
|
+
pkey = getPkeyValue(value)
|
100
|
+
else
|
101
|
+
updates.push("#{key}=#{value}")
|
102
|
+
end
|
103
|
+
}
|
104
|
+
sendStmt = "UPDATE #{table} SET #{updates.join(",")} WHERE #{id_key}=#{pkey}"
|
105
|
+
sendBO(@bo_url, sendStmt)
|
106
|
+
elsif (target_event=='delete')
|
107
|
+
keys.zip(values) { |key, value|
|
108
|
+
if (key==id_key)
|
109
|
+
pkey = getPkeyValue(value)
|
110
|
+
end
|
111
|
+
deleteStmts.push("#{id_key}=#{pkey}")
|
112
|
+
}
|
87
113
|
end
|
88
|
-
stmts.push("(#{values.join(",")})")
|
89
|
-
#stmts.push("(\"#{values.join("\",\"")}\")")
|
90
|
-
i+=1
|
91
114
|
}
|
92
115
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
@log.error("[BigObject] #{err}")
|
99
|
-
end
|
100
|
-
@log.debug("bigobject insert #{i} rows")
|
101
|
-
|
102
|
-
end
|
103
|
-
|
104
|
-
#Send data to Bigobject using binary AVRO
|
105
|
-
def send_binary(chunk)
|
116
|
+
if insertStmts.length>0
|
117
|
+
sendStmt = "INSERT INTO #{@table} #{columns} VALUES " + insertStmts.join(",")
|
118
|
+
sendBO(@bo_url, sendStmt)
|
119
|
+
@log.debug("sending #{insertStmts.length} rows to bigobject for insert via Restful API")
|
120
|
+
end
|
106
121
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
data = @format_proc.call(data)
|
112
|
-
dw<<data
|
113
|
-
i+=1
|
114
|
-
}
|
115
|
-
dw.flush
|
116
|
-
|
117
|
-
begin
|
118
|
-
socket = TCPSocket.open(@bo_hostname, @bo_port)
|
119
|
-
begin
|
120
|
-
#timeout=60
|
121
|
-
opt = [1, 60].pack('I!I!') # { int l_onoff; int l_linger; }
|
122
|
-
socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
123
|
-
|
124
|
-
opt = [60, 0].pack('L!L!') # struct timeval
|
125
|
-
socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
126
|
-
socket.write(buffer.string)
|
127
|
-
ensure
|
128
|
-
socket.close
|
129
|
-
end
|
130
|
-
|
131
|
-
rescue Exception => e
|
132
|
-
@log.error(e.message)
|
133
|
-
raise "Failed to send_binary: #{e.message}"
|
122
|
+
if deleteStmts.length>0
|
123
|
+
sendStmt = "DELETE FROM #{@table} WHERE " + deleteStmts.join(" or ")
|
124
|
+
sendBO(@bo_url, sendStmt)
|
125
|
+
@log.debug("sending #{deleteStmts.length} rows to bigobject for delete via Restful API")
|
134
126
|
end
|
135
|
-
@log.debug("bigobject send #{i} rows")
|
136
|
-
end
|
137
|
-
|
138
|
-
def send(chunk)
|
139
|
-
if (isBinary)
|
140
|
-
send_binary(chunk)
|
141
|
-
else
|
142
|
-
send_rest(chunk)
|
143
|
-
end
|
144
127
|
end
|
145
128
|
|
146
129
|
def to_s
|
@@ -181,7 +164,12 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
181
164
|
raise "Failed to sendBO: #{e.message}"
|
182
165
|
end
|
183
166
|
|
184
|
-
|
167
|
+
parsed = JSON.parse(resp)
|
168
|
+
err = parsed['Err']
|
169
|
+
if (err.to_s!='')
|
170
|
+
@log.error("[BigObject] #{err}")
|
171
|
+
end
|
172
|
+
|
185
173
|
end
|
186
174
|
|
187
175
|
end #end class
|
@@ -190,7 +178,6 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
190
178
|
super
|
191
179
|
require 'rest-client'
|
192
180
|
require 'json'
|
193
|
-
require 'avro'
|
194
181
|
log.info("bigobject initialize")
|
195
182
|
end
|
196
183
|
|
@@ -201,13 +188,19 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
201
188
|
@remove_tag_prefix = Regexp.new('^' + Regexp.escape(remove_tag_prefix))
|
202
189
|
end
|
203
190
|
|
191
|
+
if @tag_format.nil? || @tag_format == DEFAULT_TAG_FORMAT
|
192
|
+
@tag_format = DEFAULT_TAG_FORMAT
|
193
|
+
else
|
194
|
+
@tag_format = Regexp.new(conf['tag_format'])
|
195
|
+
end
|
196
|
+
|
204
197
|
@tables = []
|
205
198
|
@default_table = nil
|
206
199
|
|
207
200
|
conf.elements.select { |e|
|
208
201
|
e.name == 'table'
|
209
202
|
}.each { |e|
|
210
|
-
te = TableElement.new(log, @bigobject_hostname, @bigobject_port)
|
203
|
+
te = TableElement.new(log, @bigobject_hostname, @bigobject_port, @tag_format)
|
211
204
|
te.configure(e)
|
212
205
|
@tables << te
|
213
206
|
}
|
@@ -234,8 +227,12 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
234
227
|
# 'chunk' is a buffer chunk that includes multiple formatted events.
|
235
228
|
def write(chunk)
|
236
229
|
unknownChunks = []
|
230
|
+
tag = chunk.key
|
231
|
+
tag_parts = tag.match(@tag_format)
|
232
|
+
target_table = tag_parts['table_name']
|
233
|
+
|
237
234
|
@tables.each { |table|
|
238
|
-
if table.mpattern.match(
|
235
|
+
if table.mpattern.match(target_table)
|
239
236
|
return table.send(chunk)
|
240
237
|
end
|
241
238
|
}
|
@@ -253,6 +250,7 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
|
|
253
250
|
end
|
254
251
|
|
255
252
|
def emit(tag, es, chain)
|
256
|
-
|
253
|
+
nt = format_tag(tag)
|
254
|
+
super(nt, es, chain, nt)
|
257
255
|
end
|
258
256
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigobject
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrea Sung
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fluentd
|
@@ -52,20 +52,6 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: avro
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
56
|
name: rake
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,7 +66,7 @@ dependencies:
|
|
80
66
|
- - ">="
|
81
67
|
- !ruby/object:Gem::Version
|
82
68
|
version: '0'
|
83
|
-
description: 'Fluentd output plugin to insert BIGOBJECT '
|
69
|
+
description: 'Fluentd output plugin to insert/update/delete data in BIGOBJECT '
|
84
70
|
email:
|
85
71
|
- andrea@bigobject.io
|
86
72
|
executables: []
|
@@ -94,7 +80,7 @@ files:
|
|
94
80
|
- Rakefile
|
95
81
|
- fluent-plugin-bigobject.gemspec
|
96
82
|
- lib/fluent/plugin/out_bigobject.rb
|
97
|
-
homepage: https://github.com/
|
83
|
+
homepage: https://github.com/bigobject-inc/fluent-plugin-bigobject
|
98
84
|
licenses:
|
99
85
|
- MIT
|
100
86
|
metadata: {}
|
@@ -117,5 +103,5 @@ rubyforge_project:
|
|
117
103
|
rubygems_version: 2.2.2
|
118
104
|
signing_key:
|
119
105
|
specification_version: 4
|
120
|
-
summary: Fluentd output plugin to insert BIGOBJECT
|
106
|
+
summary: Fluentd output plugin to insert/update/delete data in BIGOBJECT
|
121
107
|
test_files: []
|