fluent-plugin-bigobject 0.0.10 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a45409dcfa7c2d66087c327e66fb4d5aa6855434
4
- data.tar.gz: aa29ec1f4e23c6f2e37526d6cb39013a0fefd536
3
+ metadata.gz: 476a311048599f15ef608f470019aee3755f480a
4
+ data.tar.gz: b5ed61454e7d2aed2bff4d7aef43707b7b3b7d1a
5
5
  SHA512:
6
- metadata.gz: 63d9945a6f7db0e106eef8d2bbdebdb4316e8b47b833f952c81f5ac1533c0b050adc65a9ee065a1fcab21dbd3c3376cf2c8e585ce4b662401f4f21075375c773
7
- data.tar.gz: 5d89f98e21cd45b2b5cd74add1aac255ffca1bbef80dd0df42d78a5db81eb4dd219795003047cd799381f3cc5a08508b1184edbc9e94c49ab57e5ae68c947e93
6
+ metadata.gz: c0f3e2399d792c2fc2b92ba74c15a8cfa2dca4e67734553006c3c9730f3da84f53b46f0954d4f51fc40587f72c24b51f7c6e731451e703a6465a5b316938847d
7
+ data.tar.gz: a5ff36959b8f31f02e28c2be9b464e18eee2d35557599c9da6a13d53ad35f1b04552d6d910c37fc6708c2b7a17662091817a2c6ff09f2d1ddb2a7eecd12c2d1c
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2015 MacroData
3
+ Copyright (c) 2015 BigObject
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -24,27 +24,46 @@ Configure BigObject URL and the table/column to be mapped in BigObject
24
24
 
25
25
  ```apache
26
26
 
27
- # send data to BigObject using avro by providing schema_file in each table
28
- <match bo.insert_avro.*>
29
- type bigobject
30
-
31
- log_level info
32
-
33
- # specify the bigobject host/port to connect to
34
- bigobject_hostname 192.168.59.103
35
- bigobject_port 9091
36
-
37
- remove_tag_prefix bo.insert_avro.
38
- flush_interval 60s
39
-
40
- <table>
41
- pattern customer
42
- schema_file /fluentd/input/avsc/Customer_binary.avsc
43
- </table>
44
- </match>
45
-
46
- # send data to BigObject using Restful API. Tables need to be created in advance in BigObject.
47
- <match bo.insert_rest.*>
27
+ #sample source to read csv file
28
+ <source>
29
+ type tail
30
+
31
+ #path- where you placed your input data
32
+ path ./input/Customer.csv
33
+
34
+ # pos_file where you record file position
35
+ pos_file ./log/customer.log.pos
36
+
37
+ # for bigobject output plugin, use tag bigobject.${table_pattern}.${event}.${primary_key}
38
+ # ${primary_key} is not needed for insert
39
+ tag bigobject.cust.insert
40
+
41
+ #input file format
42
+ format csv
43
+
44
+ # keys - columns in csv file
45
+ keys id,name,language,state,company,gender,age
46
+
47
+ #types - string/bool/integer/float/time/array
48
+ types age1:integer
49
+
50
+ </source>
51
+
52
+ # Send data to BigObject using Restful API. Tables need to be created in advance in BigObject.
53
+ # depending on the event in tag received, will send data to BigObject for insert/update/delete.
54
+ #
55
+ # Tag for each event - bigobject.${table_pattern}.${event}.${primary_key}.
56
+ # ${table_pattern} : will match to the <pattern> in <table> section of bigobject output plugin
57
+ # ${event} : valid event type by insert/update/delete.
58
+ # ${primary_key} : the primary key for table, optional for insert event.
59
+ # if primary_key is integer type in BigObject, set bo_primary_key_is_int to true
60
+ #
61
+ # Eg:
62
+ # tag bigobject.cust.insert ==> INSERT INTO <table> VALUES ...
63
+ # tag bigobject.cust.delete.id ==> DELETE FROM <table> WHERE id=...
64
+ # tag bigobject.cust.update.id ==> UPDATE <table> SET ... WHERE id=...
65
+
66
+ <match bigobject.**>
48
67
  type bigobject
49
68
 
50
69
  log_level info
@@ -53,14 +72,15 @@ Configure BigObject URL and the table/column to be mapped in BigObject
53
72
  bigobject_hostname 192.168.59.103
54
73
  bigobject_port 9090
55
74
 
56
- remove_tag_prefix bo.insert_rest.
75
+ remove_tag_prefix bigobject.
57
76
  flush_interval 60s
58
77
 
59
78
  <table>
60
79
  table Customer
61
- pattern customer
80
+ pattern cust
62
81
 
63
82
  #optional-
83
+ #bo_primary_key_is_int true #defualts to false
64
84
  #column_mapping id,name,language,state,company,gender,age
65
85
  #bo_workspace
66
86
  #bo_opts
@@ -1,12 +1,12 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |gem|
3
3
  gem.name = "fluent-plugin-bigobject"
4
- gem.version = "0.0.10"
4
+ gem.version = "0.0.12"
5
5
  gem.authors = ["Andrea Sung"]
6
6
  gem.email = ["andrea@bigobject.io"]
7
- gem.description = %q{Fluentd output plugin to insert BIGOBJECT }
8
- gem.summary = %q{Fluentd output plugin to insert BIGOBJECT}
9
- gem.homepage = "https://github.com/macrodatalab/fluent-plugin-bigobject"
7
+ gem.description = %q{Fluentd output plugin to insert/update/delete data in BIGOBJECT }
8
+ gem.summary = %q{Fluentd output plugin to insert/update/delete data in BIGOBJECT}
9
+ gem.homepage = "https://github.com/bigobject-inc/fluent-plugin-bigobject"
10
10
  gem.license = "MIT"
11
11
 
12
12
  gem.files = `git ls-files`.split($\)
@@ -17,6 +17,5 @@ Gem::Specification.new do |gem|
17
17
  gem.add_runtime_dependency "fluentd"
18
18
  gem.add_runtime_dependency "rest-client"
19
19
  gem.add_runtime_dependency "json"
20
- gem.add_runtime_dependency "avro"
21
20
  gem.add_development_dependency "rake"
22
21
  end
@@ -8,7 +8,10 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
8
8
  config_param :bigobject_hostname, :string
9
9
  config_param :bigobject_port, :integer
10
10
  config_param :remove_tag_prefix, :string, :default => nil
11
- config_param :send_unknown_chunks, :string, :default=>true
11
+ config_param :tag_format, :string, :default => nil
12
+
13
+ # DEFAULT_TAG_FORMAT = /(?<table_name>[^\.]+)\.(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$/
14
+ DEFAULT_TAG_FORMAT = /^(?<table_name>[^\.]+)\.(?<event>[^\.]+)(\.(?<primary_key>[^\.]+))*/
12
15
 
13
16
  attr_accessor :tables
14
17
 
@@ -19,35 +22,26 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
19
22
  class TableElement
20
23
  include Fluent::Configurable
21
24
 
22
- config_param :table, :string, :default=>nil
25
+ config_param :table, :string
23
26
  config_param :column_mapping, :string, :default=>nil
24
27
  config_param :pattern, :string, :default=>nil
25
28
  config_param :bo_workspace, :string, :default=>nil
26
29
  config_param :bo_opts, :string, :default=>nil
27
- config_param :schema_file, :string, :default => nil
30
+ config_param :bo_primary_key_is_int, :bool, :default=>false
28
31
 
29
32
  attr_reader :mpattern
30
33
 
31
- def initialize(log, bo_hostname, bo_port)
34
+ def initialize(log, bo_hostname, bo_port, tag_format)
32
35
  super()
33
36
  @log = log
34
37
  @bo_hostname = bo_hostname
35
38
  @bo_port = bo_port
36
39
  @bo_url="http://#{@bo_hostname}:#{@bo_port}/cmd"
40
+ @tag_format = tag_format
37
41
  end
38
42
 
39
43
  def configure(conf)
40
44
  super
41
- if (@table==nil)&&(@schema_file==nil)
42
- raise "Table name and schema_file cannot be both nil. Please specify <schema_file> if using avro input or <table> is using restful api."
43
- end
44
- if (isBinary)
45
- @avro_schema = Avro::Schema.parse(File.open(@schema_file, "rb").read)
46
- @avro_writer = Avro::IO::DatumWriter.new(@avro_schema)
47
- else
48
- @avro_schema = nil
49
- @avro_writer = nil
50
- end
51
45
 
52
46
  @mpattern = Fluent::MatchPattern.create(pattern)
53
47
  @mapping = (@column_mapping==nil)? nil:parse_column_mapping(@column_mapping)
@@ -65,82 +59,71 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
65
59
  }
66
60
  end
67
61
 
68
- def isBinary()
69
- return !(@schema_file.to_s.empty?)
62
+ def getPkeyValue(value)
63
+ if (@bo_primary_key_is_int)
64
+ return value
65
+ else
66
+ return"\"#{value}\""
67
+ end
70
68
  end
71
-
69
+
72
70
  #Send Data to Bigobject using Restful API
73
- def send_rest(chunk)
74
- stmts = Array.new
75
- i=0
71
+ def send(chunk)
72
+ insertStmts = Array.new
73
+ deleteStmts = Array.new
74
+
76
75
  columns = nil
77
76
  chunk.msgpack_each { |tag, time, data|
77
+ tag_parts = tag.match(@tag_format)
78
+ target_event = tag_parts['event']
79
+ id_key = tag_parts['primary_key']
80
+
78
81
  keys = Array.new
79
82
  values = Array.new
80
83
  data = @format_proc.call(data)
81
84
  data.keys.sort.each do |key|
82
- keys << key
83
- values << data[key].to_json
85
+ keys << key
86
+ values << data[key].to_json
84
87
  end
85
- if columns.to_s.empty?
86
- columns = "(#{keys.join(",")})"
88
+
89
+ if (target_event=='insert')
90
+ if columns.to_s.empty?
91
+ columns = "(#{keys.join(",")})"
92
+ end
93
+ insertStmts.push("(#{values.join(",")})")
94
+ elsif (target_event=='update')
95
+ pkey=""
96
+ updates = Array.new
97
+ keys.zip(values) { |key, value|
98
+ if (key==id_key)
99
+ pkey = getPkeyValue(value)
100
+ else
101
+ updates.push("#{key}=#{value}")
102
+ end
103
+ }
104
+ sendStmt = "UPDATE #{table} SET #{updates.join(",")} WHERE #{id_key}=#{pkey}"
105
+ sendBO(@bo_url, sendStmt)
106
+ elsif (target_event=='delete')
107
+ keys.zip(values) { |key, value|
108
+ if (key==id_key)
109
+ pkey = getPkeyValue(value)
110
+ end
111
+ deleteStmts.push("#{id_key}=#{pkey}")
112
+ }
87
113
  end
88
- stmts.push("(#{values.join(",")})")
89
- #stmts.push("(\"#{values.join("\",\"")}\")")
90
- i+=1
91
114
  }
92
115
 
93
- sendStmt = "INSERT INTO #{@table} #{columns} VALUES" + stmts.join(",")
94
- resp = sendBO(@bo_url, sendStmt)
95
- parsed = JSON.parse(resp)
96
- err = parsed['Err']
97
- if (err.to_s!='')
98
- @log.error("[BigObject] #{err}")
99
- end
100
- @log.debug("bigobject insert #{i} rows")
101
-
102
- end
103
-
104
- #Send data to Bigobject using binary AVRO
105
- def send_binary(chunk)
116
+ if insertStmts.length>0
117
+ sendStmt = "INSERT INTO #{@table} #{columns} VALUES " + insertStmts.join(",")
118
+ sendBO(@bo_url, sendStmt)
119
+ @log.debug("sending #{insertStmts.length} rows to bigobject for insert via Restful API")
120
+ end
106
121
 
107
- buffer = StringIO.new()
108
- dw = Avro::DataFile::Writer.new(buffer, @avro_writer, @avro_schema)
109
- i=0
110
- chunk.msgpack_each { |tag, time, data|
111
- data = @format_proc.call(data)
112
- dw<<data
113
- i+=1
114
- }
115
- dw.flush
116
-
117
- begin
118
- socket = TCPSocket.open(@bo_hostname, @bo_port)
119
- begin
120
- #timeout=60
121
- opt = [1, 60].pack('I!I!') # { int l_onoff; int l_linger; }
122
- socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
123
-
124
- opt = [60, 0].pack('L!L!') # struct timeval
125
- socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
126
- socket.write(buffer.string)
127
- ensure
128
- socket.close
129
- end
130
-
131
- rescue Exception => e
132
- @log.error(e.message)
133
- raise "Failed to send_binary: #{e.message}"
122
+ if deleteStmts.length>0
123
+ sendStmt = "DELETE FROM #{@table} WHERE " + deleteStmts.join(" or ")
124
+ sendBO(@bo_url, sendStmt)
125
+ @log.debug("sending #{deleteStmts.length} rows to bigobject for delete via Restful API")
134
126
  end
135
- @log.debug("bigobject send #{i} rows")
136
- end
137
-
138
- def send(chunk)
139
- if (isBinary)
140
- send_binary(chunk)
141
- else
142
- send_rest(chunk)
143
- end
144
127
  end
145
128
 
146
129
  def to_s
@@ -181,7 +164,12 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
181
164
  raise "Failed to sendBO: #{e.message}"
182
165
  end
183
166
 
184
- return resp
167
+ parsed = JSON.parse(resp)
168
+ err = parsed['Err']
169
+ if (err.to_s!='')
170
+ @log.error("[BigObject] #{err}")
171
+ end
172
+
185
173
  end
186
174
 
187
175
  end #end class
@@ -190,7 +178,6 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
190
178
  super
191
179
  require 'rest-client'
192
180
  require 'json'
193
- require 'avro'
194
181
  log.info("bigobject initialize")
195
182
  end
196
183
 
@@ -201,13 +188,19 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
201
188
  @remove_tag_prefix = Regexp.new('^' + Regexp.escape(remove_tag_prefix))
202
189
  end
203
190
 
191
+ if @tag_format.nil? || @tag_format == DEFAULT_TAG_FORMAT
192
+ @tag_format = DEFAULT_TAG_FORMAT
193
+ else
194
+ @tag_format = Regexp.new(conf['tag_format'])
195
+ end
196
+
204
197
  @tables = []
205
198
  @default_table = nil
206
199
 
207
200
  conf.elements.select { |e|
208
201
  e.name == 'table'
209
202
  }.each { |e|
210
- te = TableElement.new(log, @bigobject_hostname, @bigobject_port)
203
+ te = TableElement.new(log, @bigobject_hostname, @bigobject_port, @tag_format)
211
204
  te.configure(e)
212
205
  @tables << te
213
206
  }
@@ -234,8 +227,12 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
234
227
  # 'chunk' is a buffer chunk that includes multiple formatted events.
235
228
  def write(chunk)
236
229
  unknownChunks = []
230
+ tag = chunk.key
231
+ tag_parts = tag.match(@tag_format)
232
+ target_table = tag_parts['table_name']
233
+
237
234
  @tables.each { |table|
238
- if table.mpattern.match(chunk.key)
235
+ if table.mpattern.match(target_table)
239
236
  return table.send(chunk)
240
237
  end
241
238
  }
@@ -253,6 +250,7 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
253
250
  end
254
251
 
255
252
  def emit(tag, es, chain)
256
- super(tag, es, chain, format_tag(tag))
253
+ nt = format_tag(tag)
254
+ super(nt, es, chain, nt)
257
255
  end
258
256
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigobject
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrea Sung
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-07 00:00:00.000000000 Z
11
+ date: 2015-11-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -52,20 +52,6 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: avro
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: rake
71
57
  requirement: !ruby/object:Gem::Requirement
@@ -80,7 +66,7 @@ dependencies:
80
66
  - - ">="
81
67
  - !ruby/object:Gem::Version
82
68
  version: '0'
83
- description: 'Fluentd output plugin to insert BIGOBJECT '
69
+ description: 'Fluentd output plugin to insert/update/delete data in BIGOBJECT '
84
70
  email:
85
71
  - andrea@bigobject.io
86
72
  executables: []
@@ -94,7 +80,7 @@ files:
94
80
  - Rakefile
95
81
  - fluent-plugin-bigobject.gemspec
96
82
  - lib/fluent/plugin/out_bigobject.rb
97
- homepage: https://github.com/macrodatalab/fluent-plugin-bigobject
83
+ homepage: https://github.com/bigobject-inc/fluent-plugin-bigobject
98
84
  licenses:
99
85
  - MIT
100
86
  metadata: {}
@@ -117,5 +103,5 @@ rubyforge_project:
117
103
  rubygems_version: 2.2.2
118
104
  signing_key:
119
105
  specification_version: 4
120
- summary: Fluentd output plugin to insert BIGOBJECT
106
+ summary: Fluentd output plugin to insert/update/delete data in BIGOBJECT
121
107
  test_files: []