fluent-plugin-bigquery 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +17 -1
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery.rb +20 -4
- data/test/plugin/test_out_bigquery.rb +82 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 55508761bdb03fba5e96f81a1eac46c278d9426e
|
4
|
+
data.tar.gz: f00e68b4f634a2ae2e269cfe4f643360cb3939e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4894b51150577736850ac81fbe2fc2d874b2f0e656b5d8456f3d43b836afdd7360764b1de370b83f3947823975b00ae1fa11397904b5226021eb9f1a306e2a0c
|
7
|
+
data.tar.gz: d941e48b1323347cdc835959d7c5f91e3009f35a2b64cd72211b8d24868ab9c402d738ab56176da4991f08cc7cafd59da8f39988f407f3fc9489156ebe2dade8
|
data/README.md
CHANGED
@@ -14,7 +14,7 @@ OAuth flow for installed applications.
|
|
14
14
|
|
15
15
|
## Configuration
|
16
16
|
|
17
|
-
###
|
17
|
+
### Streaming inserts
|
18
18
|
|
19
19
|
Configure insert specifications with target table schema, with your credentials. This is minimum configurations:
|
20
20
|
|
@@ -249,6 +249,22 @@ If you specify multiple tables in configuration file, plugin get all schema data
|
|
249
249
|
NOTE: Since JSON does not define how to encode data of TIMESTAMP type,
|
250
250
|
you are still recommended to specify JSON types for TIMESTAMP fields as "time" field does in the example, if you use second or third method.
|
251
251
|
|
252
|
+
### Specifying insertId property
|
253
|
+
|
254
|
+
BigQuery uses `insertId` property to detect duplicate insertion requests (see [data consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency) in Google BigQuery documents).
|
255
|
+
You can set `insert_id_field` option to specify the field to use as `insertId` property.
|
256
|
+
|
257
|
+
```apache
|
258
|
+
<match dummy>
|
259
|
+
type bigquery
|
260
|
+
|
261
|
+
...
|
262
|
+
|
263
|
+
insert_id_field uuid
|
264
|
+
field_string uuid
|
265
|
+
</match>
|
266
|
+
```
|
267
|
+
|
252
268
|
## TODO
|
253
269
|
|
254
270
|
* support Load API
|
@@ -79,6 +79,8 @@ module Fluent
|
|
79
79
|
config_param :utc, :bool, :default => nil
|
80
80
|
config_param :time_field, :string, :default => nil
|
81
81
|
|
82
|
+
config_param :insert_id_field, :string, :default => nil
|
83
|
+
|
82
84
|
config_param :method, :string, :default => 'insert' # or 'load' # TODO: not implemented now
|
83
85
|
|
84
86
|
config_param :load_size_limit, :integer, :default => 1000**4 # < 1TB (1024^4) # TODO: not implemented now
|
@@ -197,6 +199,15 @@ module Fluent
|
|
197
199
|
else
|
198
200
|
@add_time_field = lambda {|record, time| record }
|
199
201
|
end
|
202
|
+
|
203
|
+
if @insert_id_field
|
204
|
+
insert_id_keys = @insert_id_field.split('.')
|
205
|
+
@get_insert_id = lambda {|record|
|
206
|
+
insert_id_keys.inject(record) {|h, k| h[k] }
|
207
|
+
}
|
208
|
+
else
|
209
|
+
@get_insert_id = nil
|
210
|
+
end
|
200
211
|
end
|
201
212
|
|
202
213
|
def start
|
@@ -294,7 +305,11 @@ module Fluent
|
|
294
305
|
buf = ''
|
295
306
|
es.each do |time, record|
|
296
307
|
row = @fields.format(@add_time_field.call(record, time))
|
297
|
-
|
308
|
+
unless row.empty?
|
309
|
+
row = {"json" => row}
|
310
|
+
row['insertId'] = @get_insert_id.call(record) if @get_insert_id
|
311
|
+
buf << row.to_msgpack
|
312
|
+
end
|
298
313
|
end
|
299
314
|
buf
|
300
315
|
end
|
@@ -376,9 +391,10 @@ module Fluent
|
|
376
391
|
### https://developers.google.com/bigquery/docs/tables
|
377
392
|
# Each field has the following properties:
|
378
393
|
#
|
379
|
-
# name -
|
380
|
-
#
|
381
|
-
|
394
|
+
# name - The name must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_),
|
395
|
+
# and must start with a letter or underscore. The maximum length is 128 characters.
|
396
|
+
# https://cloud.google.com/bigquery/docs/reference/v2/tables#schema.fields.name
|
397
|
+
unless name =~ /^[_A-Za-z][_A-Za-z0-9]{,127}$/
|
382
398
|
raise Fluent::ConfigError, "invalid bigquery field name: '#{name}'"
|
383
399
|
end
|
384
400
|
|
@@ -87,7 +87,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
87
87
|
time_field time
|
88
88
|
|
89
89
|
field_integer time , status , bytes
|
90
|
-
field_string vhost
|
90
|
+
field_string _log_name, vhost, path, method, protocol, agent, referer, remote.host, remote.ip, remote.user
|
91
91
|
field_float requesttime
|
92
92
|
field_boolean bot_access , loginsession
|
93
93
|
])
|
@@ -97,6 +97,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
97
97
|
assert fields['time']
|
98
98
|
assert fields['status']
|
99
99
|
assert fields['bytes']
|
100
|
+
assert fields['_log_name']
|
100
101
|
assert fields['vhost']
|
101
102
|
assert fields['protocol']
|
102
103
|
assert fields['agent']
|
@@ -130,6 +131,12 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
130
131
|
assert_raises(Fluent::ConfigError) do
|
131
132
|
create_driver(base + "field_string remote.host name\n")
|
132
133
|
end
|
134
|
+
assert_raises(Fluent::ConfigError) do
|
135
|
+
create_driver(base + "field_string 1column\n")
|
136
|
+
end
|
137
|
+
assert_raises(Fluent::ConfigError) do
|
138
|
+
create_driver(base + "field_string #{'tenstrings' * 12 + '123456789'}\n")
|
139
|
+
end
|
133
140
|
assert_raises(Fluent::ConfigError) do
|
134
141
|
create_driver(base + "field_float request time\n")
|
135
142
|
end
|
@@ -567,6 +574,80 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
567
574
|
assert_equal :repeated, fields["argv"].mode
|
568
575
|
end
|
569
576
|
|
577
|
+
def test_format_with_insert_id
|
578
|
+
now = Time.now
|
579
|
+
input = [
|
580
|
+
now,
|
581
|
+
{
|
582
|
+
"uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
|
583
|
+
}
|
584
|
+
]
|
585
|
+
expected = {
|
586
|
+
"insertId" => "9ABFF756-0267-4247-847F-0895B65F0938",
|
587
|
+
"json" => {
|
588
|
+
"uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
|
589
|
+
}
|
590
|
+
}
|
591
|
+
|
592
|
+
driver = create_driver(<<-CONFIG)
|
593
|
+
table foo
|
594
|
+
email foo@bar.example
|
595
|
+
private_key_path /path/to/key
|
596
|
+
project yourproject_id
|
597
|
+
dataset yourdataset_id
|
598
|
+
|
599
|
+
insert_id_field uuid
|
600
|
+
field_string uuid
|
601
|
+
CONFIG
|
602
|
+
mock_client(driver) do |expect|
|
603
|
+
expect.discovered_api("bigquery", "v2") { stub! }
|
604
|
+
end
|
605
|
+
driver.instance.start
|
606
|
+
buf = driver.instance.format_stream("my.tag", [input])
|
607
|
+
driver.instance.shutdown
|
608
|
+
|
609
|
+
assert_equal expected, MessagePack.unpack(buf)
|
610
|
+
end
|
611
|
+
|
612
|
+
def test_format_with_nested_insert_id
|
613
|
+
now = Time.now
|
614
|
+
input = [
|
615
|
+
now,
|
616
|
+
{
|
617
|
+
"data" => {
|
618
|
+
"uuid" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
|
619
|
+
},
|
620
|
+
}
|
621
|
+
]
|
622
|
+
expected = {
|
623
|
+
"insertId" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
|
624
|
+
"json" => {
|
625
|
+
"data" => {
|
626
|
+
"uuid" => "809F6BA7-1C16-44CD-9816-4B20E2C7AA2A",
|
627
|
+
}
|
628
|
+
}
|
629
|
+
}
|
630
|
+
|
631
|
+
driver = create_driver(<<-CONFIG)
|
632
|
+
table foo
|
633
|
+
email foo@bar.example
|
634
|
+
private_key_path /path/to/key
|
635
|
+
project yourproject_id
|
636
|
+
dataset yourdataset_id
|
637
|
+
|
638
|
+
insert_id_field data.uuid
|
639
|
+
field_string data.uuid
|
640
|
+
CONFIG
|
641
|
+
mock_client(driver) do |expect|
|
642
|
+
expect.discovered_api("bigquery", "v2") { stub! }
|
643
|
+
end
|
644
|
+
driver.instance.start
|
645
|
+
buf = driver.instance.format_stream("my.tag", [input])
|
646
|
+
driver.instance.shutdown
|
647
|
+
|
648
|
+
assert_equal expected, MessagePack.unpack(buf)
|
649
|
+
end
|
650
|
+
|
570
651
|
def test_empty_value_in_required
|
571
652
|
now = Time.now
|
572
653
|
input = [
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10
|
11
|
+
date: 2014-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|