fluent-plugin-bigquery 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: eca8d5626366799524377edcdd7ced540cc633ef
4
- data.tar.gz: f39debef79fbb52c9c8c7068d4e68f8cc7bbc036
3
+ metadata.gz: e55e2b00afb7ffd8b32cc9b7fa0a305ab66af863
4
+ data.tar.gz: b93594427c232d70db4f7ceca1a28554732b5122
5
5
  SHA512:
6
- metadata.gz: bc6e6af3faf9b1eb9a6dc0fe96a17b8534828fbbcc1133f3e6f9d7c956cca5a65c15319dd2c8df6e4f55a674b561edbb88087ca7c5e298c24ee50db3924e546b
7
- data.tar.gz: 6eff9dcacc9e6b956cc2eb0b55b91f6db0b3418474f941bd83a18eb1f7cef33c68541b6a7d650c42f053edb7ae7464a460d737d09b71bb47e19591521758450f
6
+ metadata.gz: 5662de9f1d839a5ef38fd04df24bd4b90de8bd4a494020ecde58e20191df1317322b903eaef985f7cdaf68f24260ed86b802a6aca53dfb3e3825772547cafaf9
7
+ data.tar.gz: 03a4f811ccc74d36530a4d341218a31c9bc913f530321565696a6e70227c94067bc61a91c2ea97bd5ba5ebf2c617bc1da51e0c1b8c71e97037c444092f62c18c
data/README.md CHANGED
@@ -163,6 +163,26 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
163
163
  Note that the timestamp of logs and the date in the table id do not always match,
164
164
  because there is a time lag between collection and transmission of logs.
165
165
 
166
+ ### Dynamic table creating
167
+
168
+ When `auto_create_table` is set to `true`, try to create the table using BigQuery API when insertion failed with code=404 "Not Found: Table ...".
169
+ Next retry of insertion is expected to be success.
170
+
171
+ NOTE: `auto_create_table` option cannot be used with `fetch_schema`. You should create the table on ahead to use `fetch_schema`.
172
+
173
+ ```apache
174
+ <match dummy>
175
+ type bigquery
176
+
177
+ ...
178
+
179
+ auto_create_table true
180
+ table accesslog_%Y_%m
181
+
182
+ ...
183
+ </match>
184
+ ```
185
+
166
186
  ### Table schema
167
187
 
168
188
  There are three methods to describe the schema of the target table.
@@ -1,6 +1,6 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "0.2.6"
3
+ VERSION = "0.2.7"
4
4
  end
5
5
  end
6
6
 
@@ -40,12 +40,12 @@ module Fluent
40
40
  # Available methods are:
41
41
  # * private_key -- Use service account credential
42
42
  # * compute_engine -- Use access token available in instances of ComputeEngine
43
- config_param :auth_method, :string, :default => 'private_key'
43
+ config_param :auth_method, :string, default: 'private_key'
44
44
 
45
45
  ### Service Account credential
46
- config_param :email, :string, :default => nil
47
- config_param :private_key_path, :string, :default => nil
48
- config_param :private_key_passphrase, :string, :default => 'notasecret'
46
+ config_param :email, :string, default: nil
47
+ config_param :private_key_path, :string, default: nil
48
+ config_param :private_key_passphrase, :string, default: 'notasecret'
49
49
 
50
50
  # see as simple reference
51
51
  # https://github.com/abronte/BigQuery/blob/master/lib/bigquery.rb
@@ -58,32 +58,34 @@ module Fluent
58
58
 
59
59
  # table_id
60
60
  # In Table ID, enter a name for your new table. Naming rules are the same as for your dataset.
61
- config_param :table, :string, :default => nil
62
- config_param :tables, :string, :default => nil
63
-
64
- config_param :schema_path, :string, :default => nil
65
- config_param :fetch_schema, :bool, :default => false
66
- config_param :field_string, :string, :default => nil
67
- config_param :field_integer, :string, :default => nil
68
- config_param :field_float, :string, :default => nil
69
- config_param :field_boolean, :string, :default => nil
70
- config_param :field_timestamp, :string, :default => nil
61
+ config_param :table, :string, default: nil
62
+ config_param :tables, :string, default: nil
63
+
64
+ config_param :auto_create_table, :bool, default: false
65
+
66
+ config_param :schema_path, :string, default: nil
67
+ config_param :fetch_schema, :bool, default: false
68
+ config_param :field_string, :string, default: nil
69
+ config_param :field_integer, :string, default: nil
70
+ config_param :field_float, :string, default: nil
71
+ config_param :field_boolean, :string, default: nil
72
+ config_param :field_timestamp, :string, default: nil
71
73
  ### TODO: record field stream inserts doesn't works well?
72
74
  ### At table creation, table type json + field type record -> field type validation fails
73
75
  ### At streaming inserts, schema cannot be specified
74
- # config_param :field_record, :string, :defualt => nil
75
- # config_param :optional_data_field, :string, :default => nil
76
+ # config_param :field_record, :string, defualt: nil
77
+ # config_param :optional_data_field, :string, default: nil
76
78
 
77
- config_param :time_format, :string, :default => nil
78
- config_param :localtime, :bool, :default => nil
79
- config_param :utc, :bool, :default => nil
80
- config_param :time_field, :string, :default => nil
79
+ config_param :time_format, :string, default: nil
80
+ config_param :localtime, :bool, default: nil
81
+ config_param :utc, :bool, default: nil
82
+ config_param :time_field, :string, default: nil
81
83
 
82
- config_param :insert_id_field, :string, :default => nil
84
+ config_param :insert_id_field, :string, default: nil
83
85
 
84
- config_param :method, :string, :default => 'insert' # or 'load' # TODO: not implemented now
86
+ config_param :method, :string, default: 'insert' # or 'load' # TODO: not implemented now
85
87
 
86
- config_param :load_size_limit, :integer, :default => 1000**4 # < 1TB (1024^4) # TODO: not implemented now
88
+ config_param :load_size_limit, :integer, default: 1000**4 # < 1TB (1024^4) # TODO: not implemented now
87
89
  ### method: 'load'
88
90
  # https://developers.google.com/bigquery/loading-data-into-bigquery
89
91
  # Maximum File Sizes:
@@ -92,9 +94,9 @@ module Fluent
92
94
  # Without new-lines in strings: 1 TB
93
95
  # JSON 1 GB 1 TB
94
96
 
95
- config_param :row_size_limit, :integer, :default => 100*1000 # < 100KB # configurable in google ?
96
- # config_param :insert_size_limit, :integer, :default => 1000**2 # < 1MB
97
- # config_param :rows_per_second_limit, :integer, :default => 1000 # spike limit
97
+ config_param :row_size_limit, :integer, default: 100*1000 # < 100KB # configurable in google ?
98
+ # config_param :insert_size_limit, :integer, default: 1000**2 # < 1MB
99
+ # config_param :rows_per_second_limit, :integer, default: 1000 # spike limit
98
100
  ### method: ''Streaming data inserts support
99
101
  # https://developers.google.com/bigquery/streaming-data-into-bigquery#usecases
100
102
  # Maximum row size: 100 KB
@@ -137,7 +139,7 @@ module Fluent
137
139
 
138
140
  case @auth_method
139
141
  when 'private_key'
140
- if !@email || !@private_key_path
142
+ unless @email && @private_key_path
141
143
  raise Fluent::ConfigError, "'email' and 'private_key_path' must be specified if auth_method == 'private_key'"
142
144
  end
143
145
  when 'compute_engine'
@@ -146,7 +148,7 @@ module Fluent
146
148
  raise Fluent::ConfigError, "unrecognized 'auth_method': #{@auth_method}"
147
149
  end
148
150
 
149
- if (!@table && !@tables) || (@table && @tables)
151
+ unless @table.nil? ^ @tables.nil?
150
152
  raise Fluent::ConfigError, "'table' or 'tables' must be specified, and both are invalid"
151
153
  end
152
154
 
@@ -156,53 +158,34 @@ module Fluent
156
158
  if @schema_path
157
159
  @fields.load_schema(JSON.parse(File.read(@schema_path)))
158
160
  end
159
- if @field_string
160
- @field_string.split(',').each do |fieldname|
161
- @fields.register_field fieldname.strip, :string
162
- end
163
- end
164
- if @field_integer
165
- @field_integer.split(',').each do |fieldname|
166
- @fields.register_field fieldname.strip, :integer
167
- end
168
- end
169
- if @field_float
170
- @field_float.split(',').each do |fieldname|
171
- @fields.register_field fieldname.strip, :float
172
- end
173
- end
174
- if @field_boolean
175
- @field_boolean.split(',').each do |fieldname|
176
- @fields.register_field fieldname.strip, :boolean
177
- end
178
- end
179
- if @field_timestamp
180
- @field_timestamp.split(',').each do |fieldname|
181
- @fields.register_field fieldname.strip, :timestamp
182
- end
183
- end
184
161
 
185
- if @localtime.nil?
186
- if @utc
187
- @localtime = false
162
+ types = %w(string integer float boolean timestamp)
163
+ types.each do |type|
164
+ raw_fields = instance_variable_get("@field_#{type}")
165
+ next unless raw_fields
166
+ raw_fields.split(',').each do |field|
167
+ @fields.register_field field.strip, type.to_sym
188
168
  end
189
169
  end
170
+
171
+ @localtime = false if @localtime.nil? && @utc
172
+
190
173
  @timef = TimeFormatter.new(@time_format, @localtime)
191
174
 
192
175
  if @time_field
193
176
  keys = @time_field.split('.')
194
177
  last_key = keys.pop
195
- @add_time_field = lambda {|record, time|
178
+ @add_time_field = ->(record, time) {
196
179
  keys.inject(record) { |h, k| h[k] ||= {} }[last_key] = @timef.format(time)
197
180
  record
198
181
  }
199
182
  else
200
- @add_time_field = lambda {|record, time| record }
183
+ @add_time_field = ->(record, time) { record }
201
184
  end
202
185
 
203
186
  if @insert_id_field
204
187
  insert_id_keys = @insert_id_field.split('.')
205
- @get_insert_id = lambda {|record|
188
+ @get_insert_id = ->(record) {
206
189
  insert_id_keys.inject(record) {|h, k| h[k] }
207
190
  }
208
191
  else
@@ -223,17 +206,12 @@ module Fluent
223
206
  fetch_schema() if @fetch_schema
224
207
  end
225
208
 
226
- def shutdown
227
- super
228
- # nothing to do
229
- end
230
-
231
209
  def client
232
210
  return @cached_client if @cached_client && @cached_client_expiration > Time.now
233
211
 
234
212
  client = Google::APIClient.new(
235
- :application_name => 'Fluentd BigQuery plugin',
236
- :application_version => Fluent::BigQueryPlugin::VERSION
213
+ application_name: 'Fluentd BigQuery plugin',
214
+ application_version: Fluent::BigQueryPlugin::VERSION
237
215
  )
238
216
 
239
217
  case @auth_method
@@ -264,17 +242,20 @@ module Fluent
264
242
  current_time.strftime(table_id_format)
265
243
  end
266
244
 
267
- def insert(table_id_format, rows)
268
- table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now))
245
+ def create_table(table_id)
269
246
  res = client().execute(
270
- :api_method => @bq.tabledata.insert_all,
247
+ :api_method => @bq.tables.insert,
271
248
  :parameters => {
272
249
  'projectId' => @project,
273
250
  'datasetId' => @dataset,
274
- 'tableId' => table_id,
275
251
  },
276
252
  :body_object => {
277
- "rows" => rows
253
+ 'tableReference' => {
254
+ 'tableId' => table_id,
255
+ },
256
+ 'schema' => {
257
+ 'fields' => @fields.to_a,
258
+ },
278
259
  }
279
260
  )
280
261
  unless res.success?
@@ -289,8 +270,42 @@ module Fluent
289
270
  rescue => e
290
271
  log.warn "Parse error: google api error response body", :body => res.body
291
272
  end
273
+ if res_obj and res_obj['code'] == 409 and /Already Exists:/ =~ message
274
+ # ignore 'Already Exists' error
275
+ return
276
+ end
292
277
  end
293
- log.error "tabledata.insertAll API", :project_id => @project, :dataset => @dataset, :table => table_id, :code => res.status, :message => message
278
+ log.error "tables.insert API", :project_id => @project, :dataset => @dataset, :table => table_id, :code => res.status, :message => message
279
+ raise "failed to create table in bigquery" # TODO: error class
280
+ end
281
+ end
282
+
283
+ def insert(table_id_format, rows)
284
+ table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now))
285
+ res = client().execute(
286
+ api_method: @bq.tabledata.insert_all,
287
+ parameters: {
288
+ 'projectId' => @project,
289
+ 'datasetId' => @dataset,
290
+ 'tableId' => table_id,
291
+ },
292
+ body_object: {
293
+ "rows" => rows
294
+ }
295
+ )
296
+ unless res.success?
297
+ # api_error? -> client cache clear
298
+ @cached_client = nil
299
+
300
+ res_obj = extract_response_obj(res.body)
301
+ message = res_obj['error']['message'] || res.body
302
+ if res_obj
303
+ if @auto_create_table and res_obj and res_obj['error']['code'] == 404 and /Not Found: Table/ =~ message.to_s
304
+ # Table Not Found: Auto Create Table
305
+ create_table(table_id)
306
+ end
307
+ end
308
+ log.error "tabledata.insertAll API", project_id: @project, dataset: @dataset, table: table_id, code: res.status, message: message
294
309
  raise "failed to insert into bigquery" # TODO: error class
295
310
  end
296
311
  end
@@ -335,8 +350,8 @@ module Fluent
335
350
  table_id_format = @tablelist[0]
336
351
  table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now))
337
352
  res = client.execute(
338
- :api_method => @bq.tables.get,
339
- :parameters => {
353
+ api_method: @bq.tables.get,
354
+ parameters: {
340
355
  'projectId' => @project,
341
356
  'datasetId' => @dataset,
342
357
  'tableId' => table_id,
@@ -346,17 +361,8 @@ module Fluent
346
361
  unless res.success?
347
362
  # api_error? -> client cache clear
348
363
  @cached_client = nil
349
-
350
- message = res.body
351
- if res.body =~ /^\{/
352
- begin
353
- res_obj = JSON.parse(res.body)
354
- message = res_obj['error']['message'] || res.body
355
- rescue => e
356
- log.warn "Parse error: google api error response body", :body => res.body
357
- end
358
- end
359
- log.error "tables.get API", :project_id => @project, :dataset => @dataset, :table => table_id, :code => res.status, :message => message
364
+ message = extract_error_message(res.body)
365
+ log.error "tables.get API", project_id: @project, dataset: @dataset, table: table_id, code: res.status, message: message
360
366
  raise "failed to fetch schema from bigquery" # TODO: error class
361
367
  end
362
368
 
@@ -370,19 +376,33 @@ module Fluent
370
376
  # raise NotImplementedError, "OAuth needs browser authentication..."
371
377
  #
372
378
  # client = Google::APIClient.new(
373
- # :application_name => 'Example Ruby application',
374
- # :application_version => '1.0.0'
379
+ # application_name: 'Example Ruby application',
380
+ # application_version: '1.0.0'
375
381
  # )
376
382
  # bigquery = client.discovered_api('bigquery', 'v2')
377
383
  # flow = Google::APIClient::InstalledAppFlow.new(
378
- # :client_id => @client_id
379
- # :client_secret => @client_secret
380
- # :scope => ['https://www.googleapis.com/auth/bigquery']
384
+ # client_id: @client_id
385
+ # client_secret: @client_secret
386
+ # scope: ['https://www.googleapis.com/auth/bigquery']
381
387
  # )
382
388
  # client.authorization = flow.authorize # browser authentication !
383
389
  # client
384
390
  # end
385
391
 
392
+ def extract_response_obj(response_body)
393
+ return nil unless response_body =~ /^\{/
394
+ JSON.parse(response_body)
395
+ rescue
396
+ log.warn "Parse error: google api error response body", body: response_body
397
+ return nil
398
+ end
399
+
400
+ def extract_error_message(response_body)
401
+ res_obj = extract_response_obj(response_body)
402
+ return response_body if res_obj.nil?
403
+ res_obj['error']['message'] || response_body
404
+ end
405
+
386
406
  class FieldSchema
387
407
  def initialize(name, mode = :nullable)
388
408
  unless [:nullable, :required, :repeated].include?(mode)
@@ -419,6 +439,14 @@ module Fluent
419
439
  def format_one(value)
420
440
  raise NotImplementedError, "Must implement in a subclass"
421
441
  end
442
+
443
+ def to_h
444
+ {
445
+ 'name' => name,
446
+ 'type' => type.to_s.upcase,
447
+ 'mode' => mode.to_s.upcase,
448
+ }
449
+ end
422
450
  end
423
451
 
424
452
  class StringFieldSchema < FieldSchema
@@ -473,12 +501,12 @@ module Fluent
473
501
 
474
502
  class RecordSchema < FieldSchema
475
503
  FIELD_TYPES = {
476
- :string => StringFieldSchema,
477
- :integer => IntegerFieldSchema,
478
- :float => FloatFieldSchema,
479
- :boolean => BooleanFieldSchema,
480
- :timestamp => TimestampFieldSchema,
481
- :record => RecordSchema
504
+ string: StringFieldSchema,
505
+ integer: IntegerFieldSchema,
506
+ float: FloatFieldSchema,
507
+ boolean: BooleanFieldSchema,
508
+ timestamp: TimestampFieldSchema,
509
+ record: RecordSchema
482
510
  }.freeze
483
511
 
484
512
  def initialize(name, mode = :nullable)
@@ -494,6 +522,21 @@ module Fluent
494
522
  @fields[name]
495
523
  end
496
524
 
525
+ def to_a
526
+ @fields.map do |_, field_schema|
527
+ field_schema.to_h
528
+ end
529
+ end
530
+
531
+ def to_h
532
+ {
533
+ 'name' => name,
534
+ 'type' => type.to_s.upcase,
535
+ 'mode' => mode.to_s.upcase,
536
+ 'fields' => self.to_a,
537
+ }
538
+ end
539
+
497
540
  def load_schema(schema, allow_overwrite=true)
498
541
  schema.each do |field|
499
542
  raise ConfigError, 'field must have type' unless field.key?('type')
@@ -717,6 +717,103 @@ class BigQueryOutputTest < Test::Unit::TestCase
717
717
  assert_equal 'foo_2014_08_11', table_id
718
718
  end
719
719
 
720
+ def test_auto_create_table_by_bigquery_api
721
+ now = Time.now
722
+ message = {
723
+ "json" => {
724
+ "time" => now.to_i,
725
+ "request" => {
726
+ "vhost" => "bar",
727
+ "path" => "/path/to/baz",
728
+ "method" => "GET",
729
+ "protocol" => "HTTP/1.0",
730
+ "agent" => "libwww",
731
+ "referer" => "http://referer.example",
732
+ "time" => (now - 1).to_f,
733
+ "bot_access" => true,
734
+ "loginsession" => false,
735
+ },
736
+ "remote" => {
737
+ "host" => "remote.example",
738
+ "ip" => "192.168.1.1",
739
+ "user" => "nagachika",
740
+ },
741
+ "response" => {
742
+ "status" => 200,
743
+ "bytes" => 72,
744
+ },
745
+ }
746
+ }
747
+
748
+ driver = create_driver(<<-CONFIG)
749
+ table foo
750
+ email foo@bar.example
751
+ private_key_path /path/to/key
752
+ project yourproject_id
753
+ dataset yourdataset_id
754
+
755
+ time_format %s
756
+ time_field time
757
+
758
+ auto_create_table true
759
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
760
+ CONFIG
761
+ mock_client(driver) do |expect|
762
+ expect.discovered_api("bigquery", "v2") {
763
+ mock! {
764
+ tables.mock!.insert { Object.new }
765
+ tabledata.mock!.insert_all { Object.new }
766
+ }
767
+ }
768
+ expect.execute(
769
+ :api_method => anything,
770
+ :parameters => {
771
+ 'projectId' => 'yourproject_id',
772
+ 'datasetId' => 'yourdataset_id',
773
+ 'tableId' => 'foo'
774
+ },
775
+ :body_object => {
776
+ "rows" => [ message ]
777
+ }
778
+ ) {
779
+ s = stub!
780
+ s.success? { false }
781
+ s.body { JSON.generate({
782
+ 'error' => { "code" => 404, "message" => "Not Found: Table yourproject_id:yourdataset_id.foo" }
783
+ }) }
784
+ s.status { 404 }
785
+ s
786
+ }
787
+ expect.execute(
788
+ :api_method => anything,
789
+ :parameters => {
790
+ 'projectId' => 'yourproject_id',
791
+ 'datasetId' => 'yourdataset_id',
792
+ },
793
+ :body_object => {
794
+ 'tableReference' => {
795
+ 'tableId' => 'foo',
796
+ },
797
+ 'schema' => {
798
+ 'fields' => JSON.parse(File.read(File.join(File.dirname(__FILE__), "testdata", "apache.schema")))
799
+ }
800
+ }
801
+ ) {
802
+ s = stub!
803
+ s.success? { true }
804
+ s
805
+ }
806
+ end
807
+ chunk = Fluent::MemoryBufferChunk.new("my.tag")
808
+ chunk << message.to_msgpack
809
+
810
+ driver.instance.start
811
+ assert_raise(RuntimeError) {
812
+ driver.instance.write(chunk)
813
+ }
814
+ driver.instance.shutdown
815
+ end
816
+
720
817
  private
721
818
 
722
819
  def sudo_schema_response
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-22 00:00:00.000000000 Z
11
+ date: 2015-01-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake