fluent-plugin-bigquery 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: da7e99ce39ed37d44c0525d1b41587ed3792a76f
4
- data.tar.gz: ef402e3381ceddd53293028f2dfbe30ab584d877
3
+ metadata.gz: 55508761bdb03fba5e96f81a1eac46c278d9426e
4
+ data.tar.gz: f00e68b4f634a2ae2e269cfe4f643360cb3939e8
5
5
  SHA512:
6
- metadata.gz: 4dd01094f70694236195dbe5551ec676e2775978b33adbbe1223d820610fb2730a080cd3486f65de5e8190c738fc8659d6633385ea92200ec3c7676c5092d16f
7
- data.tar.gz: df4ab74ff0148c156246e590ac67a4f208951eb364145cca367b4b792f541082a48b9429ce1e2acde148af804bb2581da0be7849650533518e45ccc7e1e98890
6
+ metadata.gz: 4894b51150577736850ac81fbe2fc2d874b2f0e656b5d8456f3d43b836afdd7360764b1de370b83f3947823975b00ae1fa11397904b5226021eb9f1a306e2a0c
7
+ data.tar.gz: d941e48b1323347cdc835959d7c5f91e3009f35a2b64cd72211b8d24868ab9c402d738ab56176da4991f08cc7cafd59da8f39988f407f3fc9489156ebe2dade8
data/README.md CHANGED
@@ -14,7 +14,7 @@ OAuth flow for installed applications.
14
14
 
15
15
  ## Configuration
16
16
 
17
- ### Streming inserts
17
+ ### Streaming inserts
18
18
 
19
19
  Configure insert specifications with target table schema, with your credentials. This is minimum configurations:
20
20
 
@@ -249,6 +249,22 @@ If you specify multiple tables in configuration file, plugin get all schema data
249
249
  NOTE: Since JSON does not define how to encode data of TIMESTAMP type,
250
250
  you are still recommended to specify JSON types for TIMESTAMP fields as "time" field does in the example, if you use second or third method.
251
251
 
252
+ ### Specifying insertId property
253
+
254
+ BigQuery uses `insertId` property to detect duplicate insertion requests (see [data consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency) in Google BigQuery documents).
255
+ You can set `insert_id_field` option to specify the field to use as `insertId` property.
256
+
257
+ ```apache
258
+ <match dummy>
259
+ type bigquery
260
+
261
+ ...
262
+
263
+ insert_id_field uuid
264
+ field_string uuid
265
+ </match>
266
+ ```
267
+
252
268
  ## TODO
253
269
 
254
270
  * support Load API
@@ -1,6 +1,6 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "0.2.4"
3
+ VERSION = "0.2.5"
4
4
  end
5
5
  end
6
6
 
@@ -79,6 +79,8 @@ module Fluent
79
79
  config_param :utc, :bool, :default => nil
80
80
  config_param :time_field, :string, :default => nil
81
81
 
82
+ config_param :insert_id_field, :string, :default => nil
83
+
82
84
  config_param :method, :string, :default => 'insert' # or 'load' # TODO: not implemented now
83
85
 
84
86
  config_param :load_size_limit, :integer, :default => 1000**4 # < 1TB (1024^4) # TODO: not implemented now
@@ -197,6 +199,15 @@ module Fluent
197
199
  else
198
200
  @add_time_field = lambda {|record, time| record }
199
201
  end
202
+
203
+ if @insert_id_field
204
+ insert_id_keys = @insert_id_field.split('.')
205
+ @get_insert_id = lambda {|record|
206
+ insert_id_keys.inject(record) {|h, k| h[k] }
207
+ }
208
+ else
209
+ @get_insert_id = nil
210
+ end
200
211
  end
201
212
 
202
213
  def start
@@ -294,7 +305,11 @@ module Fluent
294
305
  buf = ''
295
306
  es.each do |time, record|
296
307
  row = @fields.format(@add_time_field.call(record, time))
297
- buf << {"json" => row}.to_msgpack unless row.empty?
308
+ unless row.empty?
309
+ row = {"json" => row}
310
+ row['insertId'] = @get_insert_id.call(record) if @get_insert_id
311
+ buf << row.to_msgpack
312
+ end
298
313
  end
299
314
  buf
300
315
  end
@@ -376,9 +391,10 @@ module Fluent
376
391
  ### https://developers.google.com/bigquery/docs/tables
377
392
  # Each field has the following properties:
378
393
  #
379
- # name - Field names are any combination of uppercase and/or lowercase letters (A-Z, a-z),
380
- # digits (0-9) and underscores, but no spaces. The first character must be a letter.
381
- unless name =~ /^[A-Za-z][_A-Za-z0-9]*$/
394
+ # name - The name must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_),
395
+ # and must start with a letter or underscore. The maximum length is 128 characters.
396
+ # https://cloud.google.com/bigquery/docs/reference/v2/tables#schema.fields.name
397
+ unless name =~ /^[_A-Za-z][_A-Za-z0-9]{,127}$/
382
398
  raise Fluent::ConfigError, "invalid bigquery field name: '#{name}'"
383
399
  end
384
400
 
@@ -87,7 +87,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
87
87
  time_field time
88
88
 
89
89
  field_integer time , status , bytes
90
- field_string vhost ,path,method,protocol, agent,referer, remote.host ,remote.ip,remote.user
90
+ field_string _log_name, vhost, path, method, protocol, agent, referer, remote.host, remote.ip, remote.user
91
91
  field_float requesttime
92
92
  field_boolean bot_access , loginsession
93
93
  ])
@@ -97,6 +97,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
97
97
  assert fields['time']
98
98
  assert fields['status']
99
99
  assert fields['bytes']
100
+ assert fields['_log_name']
100
101
  assert fields['vhost']
101
102
  assert fields['protocol']
102
103
  assert fields['agent']
@@ -130,6 +131,12 @@ class BigQueryOutputTest < Test::Unit::TestCase
130
131
  assert_raises(Fluent::ConfigError) do
131
132
  create_driver(base + "field_string remote.host name\n")
132
133
  end
134
+ assert_raises(Fluent::ConfigError) do
135
+ create_driver(base + "field_string 1column\n")
136
+ end
137
+ assert_raises(Fluent::ConfigError) do
138
+ create_driver(base + "field_string #{'tenstrings' * 12 + '123456789'}\n")
139
+ end
133
140
  assert_raises(Fluent::ConfigError) do
134
141
  create_driver(base + "field_float request time\n")
135
142
  end
@@ -567,6 +574,80 @@ class BigQueryOutputTest < Test::Unit::TestCase
567
574
  assert_equal :repeated, fields["argv"].mode
568
575
  end
569
576
 
577
+ def test_format_with_insert_id
578
+ now = Time.now
579
+ input = [
580
+ now,
581
+ {
582
+ "uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
583
+ }
584
+ ]
585
+ expected = {
586
+ "insertId" => "9ABFF756-0267-4247-847F-0895B65F0938",
587
+ "json" => {
588
+ "uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
589
+ }
590
+ }
591
+
592
+ driver = create_driver(<<-CONFIG)
593
+ table foo
594
+ email foo@bar.example
595
+ private_key_path /path/to/key
596
+ project yourproject_id
597
+ dataset yourdataset_id
598
+
599
+ insert_id_field uuid
600
+ field_string uuid
601
+ CONFIG
602
+ mock_client(driver) do |expect|
603
+ expect.discovered_api("bigquery", "v2") { stub! }
604
+ end
605
+ driver.instance.start
606
+ buf = driver.instance.format_stream("my.tag", [input])
607
+ driver.instance.shutdown
608
+
609
+ assert_equal expected, MessagePack.unpack(buf)
610
+ end
611
+
612
+ def test_format_with_nested_insert_id
613
+ now = Time.now
614
+ input = [
615
+ now,
616
+ {
617
+ "data" => {
618
+ "uuid" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
619
+ },
620
+ }
621
+ ]
622
+ expected = {
623
+ "insertId" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
624
+ "json" => {
625
+ "data" => {
626
+ "uuid" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
627
+ }
628
+ }
629
+ }
630
+
631
+ driver = create_driver(<<-CONFIG)
632
+ table foo
633
+ email foo@bar.example
634
+ private_key_path /path/to/key
635
+ project yourproject_id
636
+ dataset yourdataset_id
637
+
638
+ insert_id_field data.uuid
639
+ field_string data.uuid
640
+ CONFIG
641
+ mock_client(driver) do |expect|
642
+ expect.discovered_api("bigquery", "v2") { stub! }
643
+ end
644
+ driver.instance.start
645
+ buf = driver.instance.format_stream("my.tag", [input])
646
+ driver.instance.shutdown
647
+
648
+ assert_equal expected, MessagePack.unpack(buf)
649
+ end
650
+
570
651
  def test_empty_value_in_required
571
652
  now = Time.now
572
653
  input = [
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-22 00:00:00.000000000 Z
11
+ date: 2014-11-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake