fluent-plugin-bigquery 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +17 -1
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery.rb +20 -4
- data/test/plugin/test_out_bigquery.rb +82 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 55508761bdb03fba5e96f81a1eac46c278d9426e
|
4
|
+
data.tar.gz: f00e68b4f634a2ae2e269cfe4f643360cb3939e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4894b51150577736850ac81fbe2fc2d874b2f0e656b5d8456f3d43b836afdd7360764b1de370b83f3947823975b00ae1fa11397904b5226021eb9f1a306e2a0c
|
7
|
+
data.tar.gz: d941e48b1323347cdc835959d7c5f91e3009f35a2b64cd72211b8d24868ab9c402d738ab56176da4991f08cc7cafd59da8f39988f407f3fc9489156ebe2dade8
|
data/README.md
CHANGED
@@ -14,7 +14,7 @@ OAuth flow for installed applications.
|
|
14
14
|
|
15
15
|
## Configuration
|
16
16
|
|
17
|
-
###
|
17
|
+
### Streaming inserts
|
18
18
|
|
19
19
|
Configure insert specifications with target table schema, with your credentials. This is minimum configurations:
|
20
20
|
|
@@ -249,6 +249,22 @@ If you specify multiple tables in configuration file, plugin get all schema data
|
|
249
249
|
NOTE: Since JSON does not define how to encode data of TIMESTAMP type,
|
250
250
|
you are still recommended to specify JSON types for TIMESTAMP fields as "time" field does in the example, if you use second or third method.
|
251
251
|
|
252
|
+
### Specifying insertId property
|
253
|
+
|
254
|
+
BigQuery uses `insertId` property to detect duplicate insertion requests (see [data consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency) in Google BigQuery documents).
|
255
|
+
You can set `insert_id_field` option to specify the field to use as `insertId` property.
|
256
|
+
|
257
|
+
```apache
|
258
|
+
<match dummy>
|
259
|
+
type bigquery
|
260
|
+
|
261
|
+
...
|
262
|
+
|
263
|
+
insert_id_field uuid
|
264
|
+
field_string uuid
|
265
|
+
</match>
|
266
|
+
```
|
267
|
+
|
252
268
|
## TODO
|
253
269
|
|
254
270
|
* support Load API
|
@@ -79,6 +79,8 @@ module Fluent
|
|
79
79
|
config_param :utc, :bool, :default => nil
|
80
80
|
config_param :time_field, :string, :default => nil
|
81
81
|
|
82
|
+
config_param :insert_id_field, :string, :default => nil
|
83
|
+
|
82
84
|
config_param :method, :string, :default => 'insert' # or 'load' # TODO: not implemented now
|
83
85
|
|
84
86
|
config_param :load_size_limit, :integer, :default => 1000**4 # < 1TB (1024^4) # TODO: not implemented now
|
@@ -197,6 +199,15 @@ module Fluent
|
|
197
199
|
else
|
198
200
|
@add_time_field = lambda {|record, time| record }
|
199
201
|
end
|
202
|
+
|
203
|
+
if @insert_id_field
|
204
|
+
insert_id_keys = @insert_id_field.split('.')
|
205
|
+
@get_insert_id = lambda {|record|
|
206
|
+
insert_id_keys.inject(record) {|h, k| h[k] }
|
207
|
+
}
|
208
|
+
else
|
209
|
+
@get_insert_id = nil
|
210
|
+
end
|
200
211
|
end
|
201
212
|
|
202
213
|
def start
|
@@ -294,7 +305,11 @@ module Fluent
|
|
294
305
|
buf = ''
|
295
306
|
es.each do |time, record|
|
296
307
|
row = @fields.format(@add_time_field.call(record, time))
|
297
|
-
|
308
|
+
unless row.empty?
|
309
|
+
row = {"json" => row}
|
310
|
+
row['insertId'] = @get_insert_id.call(record) if @get_insert_id
|
311
|
+
buf << row.to_msgpack
|
312
|
+
end
|
298
313
|
end
|
299
314
|
buf
|
300
315
|
end
|
@@ -376,9 +391,10 @@ module Fluent
|
|
376
391
|
### https://developers.google.com/bigquery/docs/tables
|
377
392
|
# Each field has the following properties:
|
378
393
|
#
|
379
|
-
# name -
|
380
|
-
#
|
381
|
-
|
394
|
+
# name - The name must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_),
|
395
|
+
# and must start with a letter or underscore. The maximum length is 128 characters.
|
396
|
+
# https://cloud.google.com/bigquery/docs/reference/v2/tables#schema.fields.name
|
397
|
+
unless name =~ /^[_A-Za-z][_A-Za-z0-9]{,127}$/
|
382
398
|
raise Fluent::ConfigError, "invalid bigquery field name: '#{name}'"
|
383
399
|
end
|
384
400
|
|
@@ -87,7 +87,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
87
87
|
time_field time
|
88
88
|
|
89
89
|
field_integer time , status , bytes
|
90
|
-
field_string vhost
|
90
|
+
field_string _log_name, vhost, path, method, protocol, agent, referer, remote.host, remote.ip, remote.user
|
91
91
|
field_float requesttime
|
92
92
|
field_boolean bot_access , loginsession
|
93
93
|
])
|
@@ -97,6 +97,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
97
97
|
assert fields['time']
|
98
98
|
assert fields['status']
|
99
99
|
assert fields['bytes']
|
100
|
+
assert fields['_log_name']
|
100
101
|
assert fields['vhost']
|
101
102
|
assert fields['protocol']
|
102
103
|
assert fields['agent']
|
@@ -130,6 +131,12 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
130
131
|
assert_raises(Fluent::ConfigError) do
|
131
132
|
create_driver(base + "field_string remote.host name\n")
|
132
133
|
end
|
134
|
+
assert_raises(Fluent::ConfigError) do
|
135
|
+
create_driver(base + "field_string 1column\n")
|
136
|
+
end
|
137
|
+
assert_raises(Fluent::ConfigError) do
|
138
|
+
create_driver(base + "field_string #{'tenstrings' * 12 + '123456789'}\n")
|
139
|
+
end
|
133
140
|
assert_raises(Fluent::ConfigError) do
|
134
141
|
create_driver(base + "field_float request time\n")
|
135
142
|
end
|
@@ -567,6 +574,80 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
567
574
|
assert_equal :repeated, fields["argv"].mode
|
568
575
|
end
|
569
576
|
|
577
|
+
def test_format_with_insert_id
|
578
|
+
now = Time.now
|
579
|
+
input = [
|
580
|
+
now,
|
581
|
+
{
|
582
|
+
"uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
|
583
|
+
}
|
584
|
+
]
|
585
|
+
expected = {
|
586
|
+
"insertId" => "9ABFF756-0267-4247-847F-0895B65F0938",
|
587
|
+
"json" => {
|
588
|
+
"uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
|
589
|
+
}
|
590
|
+
}
|
591
|
+
|
592
|
+
driver = create_driver(<<-CONFIG)
|
593
|
+
table foo
|
594
|
+
email foo@bar.example
|
595
|
+
private_key_path /path/to/key
|
596
|
+
project yourproject_id
|
597
|
+
dataset yourdataset_id
|
598
|
+
|
599
|
+
insert_id_field uuid
|
600
|
+
field_string uuid
|
601
|
+
CONFIG
|
602
|
+
mock_client(driver) do |expect|
|
603
|
+
expect.discovered_api("bigquery", "v2") { stub! }
|
604
|
+
end
|
605
|
+
driver.instance.start
|
606
|
+
buf = driver.instance.format_stream("my.tag", [input])
|
607
|
+
driver.instance.shutdown
|
608
|
+
|
609
|
+
assert_equal expected, MessagePack.unpack(buf)
|
610
|
+
end
|
611
|
+
|
612
|
+
def test_format_with_nested_insert_id
|
613
|
+
now = Time.now
|
614
|
+
input = [
|
615
|
+
now,
|
616
|
+
{
|
617
|
+
"data" => {
|
618
|
+
"uuid" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
|
619
|
+
},
|
620
|
+
}
|
621
|
+
]
|
622
|
+
expected = {
|
623
|
+
"insertId" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
|
624
|
+
"json" => {
|
625
|
+
"data" => {
|
626
|
+
"uuid" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
|
627
|
+
}
|
628
|
+
}
|
629
|
+
}
|
630
|
+
|
631
|
+
driver = create_driver(<<-CONFIG)
|
632
|
+
table foo
|
633
|
+
email foo@bar.example
|
634
|
+
private_key_path /path/to/key
|
635
|
+
project yourproject_id
|
636
|
+
dataset yourdataset_id
|
637
|
+
|
638
|
+
insert_id_field data.uuid
|
639
|
+
field_string data.uuid
|
640
|
+
CONFIG
|
641
|
+
mock_client(driver) do |expect|
|
642
|
+
expect.discovered_api("bigquery", "v2") { stub! }
|
643
|
+
end
|
644
|
+
driver.instance.start
|
645
|
+
buf = driver.instance.format_stream("my.tag", [input])
|
646
|
+
driver.instance.shutdown
|
647
|
+
|
648
|
+
assert_equal expected, MessagePack.unpack(buf)
|
649
|
+
end
|
650
|
+
|
570
651
|
def test_empty_value_in_required
|
571
652
|
now = Time.now
|
572
653
|
input = [
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10
|
11
|
+
date: 2014-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|