fluent-plugin-bigobject 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a45409dcfa7c2d66087c327e66fb4d5aa6855434
4
- data.tar.gz: aa29ec1f4e23c6f2e37526d6cb39013a0fefd536
3
+ metadata.gz: 476a311048599f15ef608f470019aee3755f480a
4
+ data.tar.gz: b5ed61454e7d2aed2bff4d7aef43707b7b3b7d1a
5
5
  SHA512:
6
- metadata.gz: 63d9945a6f7db0e106eef8d2bbdebdb4316e8b47b833f952c81f5ac1533c0b050adc65a9ee065a1fcab21dbd3c3376cf2c8e585ce4b662401f4f21075375c773
7
- data.tar.gz: 5d89f98e21cd45b2b5cd74add1aac255ffca1bbef80dd0df42d78a5db81eb4dd219795003047cd799381f3cc5a08508b1184edbc9e94c49ab57e5ae68c947e93
6
+ metadata.gz: c0f3e2399d792c2fc2b92ba74c15a8cfa2dca4e67734553006c3c9730f3da84f53b46f0954d4f51fc40587f72c24b51f7c6e731451e703a6465a5b316938847d
7
+ data.tar.gz: a5ff36959b8f31f02e28c2be9b464e18eee2d35557599c9da6a13d53ad35f1b04552d6d910c37fc6708c2b7a17662091817a2c6ff09f2d1ddb2a7eecd12c2d1c
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2015 MacroData
3
+ Copyright (c) 2015 BigObject
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -24,27 +24,46 @@ Configure BigObject URL and the table/column to be mapped in BigObject
24
24
 
25
25
  ```apache
26
26
 
27
- # send data to BigObject using avro by providing schema_file in each table
28
- <match bo.insert_avro.*>
29
- type bigobject
30
-
31
- log_level info
32
-
33
- # specify the bigobject host/port to connect to
34
- bigobject_hostname 192.168.59.103
35
- bigobject_port 9091
36
-
37
- remove_tag_prefix bo.insert_avro.
38
- flush_interval 60s
39
-
40
- <table>
41
- pattern customer
42
- schema_file /fluentd/input/avsc/Customer_binary.avsc
43
- </table>
44
- </match>
45
-
46
- # send data to BigObject using Restful API. Tables need to be created in advance in BigObject.
47
- <match bo.insert_rest.*>
27
+ #sample source to read csv file
28
+ <source>
29
+ type tail
30
+
31
+ #path- where you placed your input data
32
+ path ./input/Customer.csv
33
+
34
+ # pos_file where you record file position
35
+ pos_file ./log/customer.log.pos
36
+
37
+ # for bigobject output plugin, use tag bigobject.${table_pattern}.${event}.${primary_key}
38
+ # ${primary_key} is not needed for insert
39
+ tag bigobject.cust.insert
40
+
41
+ #input file format
42
+ format csv
43
+
44
+ # keys - columns in csv file
45
+ keys id,name,language,state,company,gender,age
46
+
47
+ #types - string/bool/integer/float/time/array
48
+ types age1:integer
49
+
50
+ </source>
51
+
52
+ # Send data to BigObject using Restful API. Tables need to be created in advance in BigObject.
53
+ # depending on the event in tag received, will send data to BigObject for insert/update/delete.
54
+ #
55
+ # Tag for each event - bigobject.${table_pattern}.${event}.${primary_key}.
56
+ # ${table_pattern} : will match to the <pattern> in <table> section of bigobject output plugin
57
+ # ${event} : valid event type by insert/update/delete.
58
+ # ${primary_key} : the primary key for table, optional for insert event.
59
+ # if primary_key is integer type in BigObject, set bo_primary_key_is_int to true
60
+ #
61
+ # Eg:
62
+ # tag bigobject.cust.insert ==> INSERT INTO <table> VALUES ...
63
+ # tag bigobject.cust.delete.id ==> DELETE FROM <table> WHERE id=...
64
+ # tag bigobject.cust.update.id ==> UPDATE <table> SET ... WHERE id=...
65
+
66
+ <match bigobject.**>
48
67
  type bigobject
49
68
 
50
69
  log_level info
@@ -53,14 +72,15 @@ Configure BigObject URL and the table/column to be mapped in BigObject
53
72
  bigobject_hostname 192.168.59.103
54
73
  bigobject_port 9090
55
74
 
56
- remove_tag_prefix bo.insert_rest.
75
+ remove_tag_prefix bigobject.
57
76
  flush_interval 60s
58
77
 
59
78
  <table>
60
79
  table Customer
61
- pattern customer
80
+ pattern cust
62
81
 
63
82
  #optional-
83
+ #bo_primary_key_is_int true #defualts to false
64
84
  #column_mapping id,name,language,state,company,gender,age
65
85
  #bo_workspace
66
86
  #bo_opts
@@ -1,12 +1,12 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |gem|
3
3
  gem.name = "fluent-plugin-bigobject"
4
- gem.version = "0.0.10"
4
+ gem.version = "0.0.12"
5
5
  gem.authors = ["Andrea Sung"]
6
6
  gem.email = ["andrea@bigobject.io"]
7
- gem.description = %q{Fluentd output plugin to insert BIGOBJECT }
8
- gem.summary = %q{Fluentd output plugin to insert BIGOBJECT}
9
- gem.homepage = "https://github.com/macrodatalab/fluent-plugin-bigobject"
7
+ gem.description = %q{Fluentd output plugin to insert/update/delete data in BIGOBJECT }
8
+ gem.summary = %q{Fluentd output plugin to insert/update/delete data in BIGOBJECT}
9
+ gem.homepage = "https://github.com/bigobject-inc/fluent-plugin-bigobject"
10
10
  gem.license = "MIT"
11
11
 
12
12
  gem.files = `git ls-files`.split($\)
@@ -17,6 +17,5 @@ Gem::Specification.new do |gem|
17
17
  gem.add_runtime_dependency "fluentd"
18
18
  gem.add_runtime_dependency "rest-client"
19
19
  gem.add_runtime_dependency "json"
20
- gem.add_runtime_dependency "avro"
21
20
  gem.add_development_dependency "rake"
22
21
  end
@@ -8,7 +8,10 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
8
8
  config_param :bigobject_hostname, :string
9
9
  config_param :bigobject_port, :integer
10
10
  config_param :remove_tag_prefix, :string, :default => nil
11
- config_param :send_unknown_chunks, :string, :default=>true
11
+ config_param :tag_format, :string, :default => nil
12
+
13
+ # DEFAULT_TAG_FORMAT = /(?<table_name>[^\.]+)\.(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$/
14
+ DEFAULT_TAG_FORMAT = /^(?<table_name>[^\.]+)\.(?<event>[^\.]+)(\.(?<primary_key>[^\.]+))*/
12
15
 
13
16
  attr_accessor :tables
14
17
 
@@ -19,35 +22,26 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
19
22
  class TableElement
20
23
  include Fluent::Configurable
21
24
 
22
- config_param :table, :string, :default=>nil
25
+ config_param :table, :string
23
26
  config_param :column_mapping, :string, :default=>nil
24
27
  config_param :pattern, :string, :default=>nil
25
28
  config_param :bo_workspace, :string, :default=>nil
26
29
  config_param :bo_opts, :string, :default=>nil
27
- config_param :schema_file, :string, :default => nil
30
+ config_param :bo_primary_key_is_int, :bool, :default=>false
28
31
 
29
32
  attr_reader :mpattern
30
33
 
31
- def initialize(log, bo_hostname, bo_port)
34
+ def initialize(log, bo_hostname, bo_port, tag_format)
32
35
  super()
33
36
  @log = log
34
37
  @bo_hostname = bo_hostname
35
38
  @bo_port = bo_port
36
39
  @bo_url="http://#{@bo_hostname}:#{@bo_port}/cmd"
40
+ @tag_format = tag_format
37
41
  end
38
42
 
39
43
  def configure(conf)
40
44
  super
41
- if (@table==nil)&&(@schema_file==nil)
42
- raise "Table name and schema_file cannot be both nil. Please specify <schema_file> if using avro input or <table> is using restful api."
43
- end
44
- if (isBinary)
45
- @avro_schema = Avro::Schema.parse(File.open(@schema_file, "rb").read)
46
- @avro_writer = Avro::IO::DatumWriter.new(@avro_schema)
47
- else
48
- @avro_schema = nil
49
- @avro_writer = nil
50
- end
51
45
 
52
46
  @mpattern = Fluent::MatchPattern.create(pattern)
53
47
  @mapping = (@column_mapping==nil)? nil:parse_column_mapping(@column_mapping)
@@ -65,82 +59,71 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
65
59
  }
66
60
  end
67
61
 
68
- def isBinary()
69
- return !(@schema_file.to_s.empty?)
62
+ def getPkeyValue(value)
63
+ if (@bo_primary_key_is_int)
64
+ return value
65
+ else
66
+ return"\"#{value}\""
67
+ end
70
68
  end
71
-
69
+
72
70
  #Send Data to Bigobject using Restful API
73
- def send_rest(chunk)
74
- stmts = Array.new
75
- i=0
71
+ def send(chunk)
72
+ insertStmts = Array.new
73
+ deleteStmts = Array.new
74
+
76
75
  columns = nil
77
76
  chunk.msgpack_each { |tag, time, data|
77
+ tag_parts = tag.match(@tag_format)
78
+ target_event = tag_parts['event']
79
+ id_key = tag_parts['primary_key']
80
+
78
81
  keys = Array.new
79
82
  values = Array.new
80
83
  data = @format_proc.call(data)
81
84
  data.keys.sort.each do |key|
82
- keys << key
83
- values << data[key].to_json
85
+ keys << key
86
+ values << data[key].to_json
84
87
  end
85
- if columns.to_s.empty?
86
- columns = "(#{keys.join(",")})"
88
+
89
+ if (target_event=='insert')
90
+ if columns.to_s.empty?
91
+ columns = "(#{keys.join(",")})"
92
+ end
93
+ insertStmts.push("(#{values.join(",")})")
94
+ elsif (target_event=='update')
95
+ pkey=""
96
+ updates = Array.new
97
+ keys.zip(values) { |key, value|
98
+ if (key==id_key)
99
+ pkey = getPkeyValue(value)
100
+ else
101
+ updates.push("#{key}=#{value}")
102
+ end
103
+ }
104
+ sendStmt = "UPDATE #{table} SET #{updates.join(",")} WHERE #{id_key}=#{pkey}"
105
+ sendBO(@bo_url, sendStmt)
106
+ elsif (target_event=='delete')
107
+ keys.zip(values) { |key, value|
108
+ if (key==id_key)
109
+ pkey = getPkeyValue(value)
110
+ end
111
+ deleteStmts.push("#{id_key}=#{pkey}")
112
+ }
87
113
  end
88
- stmts.push("(#{values.join(",")})")
89
- #stmts.push("(\"#{values.join("\",\"")}\")")
90
- i+=1
91
114
  }
92
115
 
93
- sendStmt = "INSERT INTO #{@table} #{columns} VALUES" + stmts.join(",")
94
- resp = sendBO(@bo_url, sendStmt)
95
- parsed = JSON.parse(resp)
96
- err = parsed['Err']
97
- if (err.to_s!='')
98
- @log.error("[BigObject] #{err}")
99
- end
100
- @log.debug("bigobject insert #{i} rows")
101
-
102
- end
103
-
104
- #Send data to Bigobject using binary AVRO
105
- def send_binary(chunk)
116
+ if insertStmts.length>0
117
+ sendStmt = "INSERT INTO #{@table} #{columns} VALUES " + insertStmts.join(",")
118
+ sendBO(@bo_url, sendStmt)
119
+ @log.debug("sending #{insertStmts.length} rows to bigobject for insert via Restful API")
120
+ end
106
121
 
107
- buffer = StringIO.new()
108
- dw = Avro::DataFile::Writer.new(buffer, @avro_writer, @avro_schema)
109
- i=0
110
- chunk.msgpack_each { |tag, time, data|
111
- data = @format_proc.call(data)
112
- dw<<data
113
- i+=1
114
- }
115
- dw.flush
116
-
117
- begin
118
- socket = TCPSocket.open(@bo_hostname, @bo_port)
119
- begin
120
- #timeout=60
121
- opt = [1, 60].pack('I!I!') # { int l_onoff; int l_linger; }
122
- socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
123
-
124
- opt = [60, 0].pack('L!L!') # struct timeval
125
- socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
126
- socket.write(buffer.string)
127
- ensure
128
- socket.close
129
- end
130
-
131
- rescue Exception => e
132
- @log.error(e.message)
133
- raise "Failed to send_binary: #{e.message}"
122
+ if deleteStmts.length>0
123
+ sendStmt = "DELETE FROM #{@table} WHERE " + deleteStmts.join(" or ")
124
+ sendBO(@bo_url, sendStmt)
125
+ @log.debug("sending #{deleteStmts.length} rows to bigobject for delete via Restful API")
134
126
  end
135
- @log.debug("bigobject send #{i} rows")
136
- end
137
-
138
- def send(chunk)
139
- if (isBinary)
140
- send_binary(chunk)
141
- else
142
- send_rest(chunk)
143
- end
144
127
  end
145
128
 
146
129
  def to_s
@@ -181,7 +164,12 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
181
164
  raise "Failed to sendBO: #{e.message}"
182
165
  end
183
166
 
184
- return resp
167
+ parsed = JSON.parse(resp)
168
+ err = parsed['Err']
169
+ if (err.to_s!='')
170
+ @log.error("[BigObject] #{err}")
171
+ end
172
+
185
173
  end
186
174
 
187
175
  end #end class
@@ -190,7 +178,6 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
190
178
  super
191
179
  require 'rest-client'
192
180
  require 'json'
193
- require 'avro'
194
181
  log.info("bigobject initialize")
195
182
  end
196
183
 
@@ -201,13 +188,19 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
201
188
  @remove_tag_prefix = Regexp.new('^' + Regexp.escape(remove_tag_prefix))
202
189
  end
203
190
 
191
+ if @tag_format.nil? || @tag_format == DEFAULT_TAG_FORMAT
192
+ @tag_format = DEFAULT_TAG_FORMAT
193
+ else
194
+ @tag_format = Regexp.new(conf['tag_format'])
195
+ end
196
+
204
197
  @tables = []
205
198
  @default_table = nil
206
199
 
207
200
  conf.elements.select { |e|
208
201
  e.name == 'table'
209
202
  }.each { |e|
210
- te = TableElement.new(log, @bigobject_hostname, @bigobject_port)
203
+ te = TableElement.new(log, @bigobject_hostname, @bigobject_port, @tag_format)
211
204
  te.configure(e)
212
205
  @tables << te
213
206
  }
@@ -234,8 +227,12 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
234
227
  # 'chunk' is a buffer chunk that includes multiple formatted events.
235
228
  def write(chunk)
236
229
  unknownChunks = []
230
+ tag = chunk.key
231
+ tag_parts = tag.match(@tag_format)
232
+ target_table = tag_parts['table_name']
233
+
237
234
  @tables.each { |table|
238
- if table.mpattern.match(chunk.key)
235
+ if table.mpattern.match(target_table)
239
236
  return table.send(chunk)
240
237
  end
241
238
  }
@@ -253,6 +250,7 @@ class Fluent::BigObjectOutput < Fluent::BufferedOutput
253
250
  end
254
251
 
255
252
  def emit(tag, es, chain)
256
- super(tag, es, chain, format_tag(tag))
253
+ nt = format_tag(tag)
254
+ super(nt, es, chain, nt)
257
255
  end
258
256
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigobject
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrea Sung
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-07 00:00:00.000000000 Z
11
+ date: 2015-11-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -52,20 +52,6 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: avro
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: rake
71
57
  requirement: !ruby/object:Gem::Requirement
@@ -80,7 +66,7 @@ dependencies:
80
66
  - - ">="
81
67
  - !ruby/object:Gem::Version
82
68
  version: '0'
83
- description: 'Fluentd output plugin to insert BIGOBJECT '
69
+ description: 'Fluentd output plugin to insert/update/delete data in BIGOBJECT '
84
70
  email:
85
71
  - andrea@bigobject.io
86
72
  executables: []
@@ -94,7 +80,7 @@ files:
94
80
  - Rakefile
95
81
  - fluent-plugin-bigobject.gemspec
96
82
  - lib/fluent/plugin/out_bigobject.rb
97
- homepage: https://github.com/macrodatalab/fluent-plugin-bigobject
83
+ homepage: https://github.com/bigobject-inc/fluent-plugin-bigobject
98
84
  licenses:
99
85
  - MIT
100
86
  metadata: {}
@@ -117,5 +103,5 @@ rubyforge_project:
117
103
  rubygems_version: 2.2.2
118
104
  signing_key:
119
105
  specification_version: 4
120
- summary: Fluentd output plugin to insert BIGOBJECT
106
+ summary: Fluentd output plugin to insert/update/delete data in BIGOBJECT
121
107
  test_files: []