fluent-plugin-bigquery 2.2.0 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +31 -0
- data/.github/workflows/windows.yml +27 -0
- data/README.md +10 -1
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/bigquery/writer.rb +11 -0
- data/lib/fluent/plugin/out_bigquery_base.rb +21 -3
- data/lib/fluent/plugin/out_bigquery_insert.rb +3 -0
- data/test/plugin/test_out_bigquery_base.rb +22 -27
- data/test/plugin/test_out_bigquery_insert.rb +15 -5
- data/test/plugin/test_out_bigquery_load.rb +4 -4
- metadata +5 -4
- data/.travis.yml +0 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4209a2b6eaaf0b6f8ba315b6f5de6690e28fb47890aeea777bdb31889e4785ab
|
4
|
+
data.tar.gz: b0983fb4fa16d72059b0e679ea4ee627d19e805779fa010888fa1723354896a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a6fc6891eda12bbc1272af7af9c4e8d48e588bc7ef65153b3a7524e39468baebb8fdb925856d1850bbda12fed5d33865faa56542503f76fdf724a18937c7d56e
|
7
|
+
data.tar.gz: fff0599b6a838cb4ff233ba9585b558ff733eed8063c1cf36ee08aaacb9b3c2ca1bce4d13db2a51ecc72c398ba751a18b2856a6348f43738ee8ca366becdea61
|
@@ -0,0 +1,31 @@
|
|
1
|
+
name: Testing on Ubuntu
|
2
|
+
on:
|
3
|
+
- push
|
4
|
+
- pull_request
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ${{ matrix.os }}
|
8
|
+
strategy:
|
9
|
+
fail-fast: false
|
10
|
+
matrix:
|
11
|
+
ruby:
|
12
|
+
- 2.6
|
13
|
+
- 2.7
|
14
|
+
- 3.0
|
15
|
+
- 3.1
|
16
|
+
os:
|
17
|
+
- ubuntu-latest
|
18
|
+
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
19
|
+
steps:
|
20
|
+
- uses: actions/checkout@v2
|
21
|
+
- uses: ruby/setup-ruby@v1
|
22
|
+
with:
|
23
|
+
ruby-version: ${{ matrix.ruby }}
|
24
|
+
- name: unit testing
|
25
|
+
env:
|
26
|
+
CI: true
|
27
|
+
run: |
|
28
|
+
ruby -v
|
29
|
+
gem install bundler rake
|
30
|
+
bundle install --jobs 4 --retry 3
|
31
|
+
bundle exec rake test
|
@@ -0,0 +1,27 @@
|
|
1
|
+
name: Testing on Windows
|
2
|
+
on:
|
3
|
+
- push
|
4
|
+
- pull_request
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ${{ matrix.os }}
|
8
|
+
strategy:
|
9
|
+
fail-fast: false
|
10
|
+
matrix:
|
11
|
+
ruby: [ '2.6', '2.7', '3.0', '3.1' ]
|
12
|
+
os:
|
13
|
+
- windows-latest
|
14
|
+
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v2
|
17
|
+
- uses: ruby/setup-ruby@v1
|
18
|
+
with:
|
19
|
+
ruby-version: ${{ matrix.ruby }}
|
20
|
+
- name: unit testing
|
21
|
+
env:
|
22
|
+
CI: true
|
23
|
+
run: |
|
24
|
+
ruby -v
|
25
|
+
gem install bundler rake
|
26
|
+
bundle install --jobs 4 --retry 3
|
27
|
+
bundle exec rake test
|
data/README.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
# fluent-plugin-bigquery
|
2
2
|
|
3
|
+
## Notice
|
4
|
+
|
5
|
+
We will transfer fluent-plugin-bigquery repository to [fluent-plugins-nursery](https://github.com/fluent-plugins-nursery) organization.
|
6
|
+
It does not change maintenance plan.
|
7
|
+
The main purpose is that it solves mismatch between maintainers and current organization.
|
8
|
+
|
9
|
+
---
|
10
|
+
|
3
11
|
[Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
|
4
12
|
|
5
13
|
- **Plugin type**: Output
|
@@ -52,7 +60,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
52
60
|
| auto_create_table | bool | no | no | false | If true, creates table automatically |
|
53
61
|
| ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
|
54
62
|
| schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
|
55
|
-
| schema_path | string | yes (either `fetch_schema`) |
|
63
|
+
| schema_path | string | yes (either `fetch_schema`) | yes | nil | Schema Definition file path. It is formatted by JSON. |
|
56
64
|
| fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
|
57
65
|
| fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
|
58
66
|
| schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
@@ -72,6 +80,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
72
80
|
| insert_id_field | string | no | no | nil | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor |
|
73
81
|
| add_insert_timestamp | string | no | no | nil | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
|
74
82
|
| allow_retry_insert_errors | bool | no | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
|
83
|
+
| require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. |
|
75
84
|
|
76
85
|
#### bigquery_load
|
77
86
|
|
@@ -35,6 +35,7 @@ module Fluent
|
|
35
35
|
}
|
36
36
|
|
37
37
|
definition.merge!(time_partitioning: time_partitioning) if time_partitioning
|
38
|
+
definition.merge!(require_partition_filter: require_partition_filter) if require_partition_filter
|
38
39
|
definition.merge!(clustering: clustering) if clustering
|
39
40
|
client.insert_table(project, dataset, definition, {})
|
40
41
|
log.debug "create table", project_id: project, dataset: dataset, table: table_id
|
@@ -318,6 +319,16 @@ module Fluent
|
|
318
319
|
end
|
319
320
|
end
|
320
321
|
|
322
|
+
def require_partition_filter
|
323
|
+
return @require_partition_filter if instance_variable_defined?(:@require_partition_filter)
|
324
|
+
|
325
|
+
if @options[:require_partition_filter]
|
326
|
+
@require_partition_filter = @options[:require_partition_filter]
|
327
|
+
else
|
328
|
+
@require_partition_filter
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
321
332
|
def clustering
|
322
333
|
return @clustering if instance_variable_defined?(:@clustering)
|
323
334
|
|
@@ -111,9 +111,6 @@ module Fluent
|
|
111
111
|
if @schema
|
112
112
|
@table_schema.load_schema(@schema)
|
113
113
|
end
|
114
|
-
if @schema_path
|
115
|
-
@table_schema.load_schema(MultiJson.load(File.read(@schema_path)))
|
116
|
-
end
|
117
114
|
|
118
115
|
formatter_config = conf.elements("format")[0]
|
119
116
|
@formatter = formatter_create(usage: 'out_bigquery_for_insert', default_type: 'json', conf: formatter_config)
|
@@ -126,6 +123,7 @@ module Fluent
|
|
126
123
|
@tables_mutex = Mutex.new
|
127
124
|
@fetched_schemas = {}
|
128
125
|
@last_fetch_schema_time = Hash.new(0)
|
126
|
+
@read_schemas = {}
|
129
127
|
end
|
130
128
|
|
131
129
|
def multi_workers_ready?
|
@@ -148,6 +146,7 @@ module Fluent
|
|
148
146
|
time_partitioning_type: @time_partitioning_type,
|
149
147
|
time_partitioning_field: @time_partitioning_field,
|
150
148
|
time_partitioning_expiration: @time_partitioning_expiration,
|
149
|
+
require_partition_filter: @require_partition_filter,
|
151
150
|
clustering_fields: @clustering_fields,
|
152
151
|
timeout_sec: @request_timeout_sec,
|
153
152
|
open_timeout_sec: @request_open_timeout_sec,
|
@@ -161,6 +160,8 @@ module Fluent
|
|
161
160
|
schema =
|
162
161
|
if @fetch_schema
|
163
162
|
fetch_schema(meta)
|
163
|
+
elsif @schema_path
|
164
|
+
read_schema(meta)
|
164
165
|
else
|
165
166
|
@table_schema
|
166
167
|
end
|
@@ -209,9 +210,26 @@ module Fluent
|
|
209
210
|
extract_placeholders(@fetch_schema_table || @tablelist[0], metadata)
|
210
211
|
end
|
211
212
|
|
213
|
+
def read_schema(metadata)
|
214
|
+
schema_path = read_schema_target_path(metadata)
|
215
|
+
|
216
|
+
unless @read_schemas[schema_path]
|
217
|
+
table_schema = Fluent::BigQuery::RecordSchema.new("record")
|
218
|
+
table_schema.load_schema(MultiJson.load(File.read(schema_path)))
|
219
|
+
@read_schemas[schema_path] = table_schema
|
220
|
+
end
|
221
|
+
@read_schemas[schema_path]
|
222
|
+
end
|
223
|
+
|
224
|
+
def read_schema_target_path(metadata)
|
225
|
+
extract_placeholders(@schema_path, metadata)
|
226
|
+
end
|
227
|
+
|
212
228
|
def get_schema(project, dataset, metadata)
|
213
229
|
if @fetch_schema
|
214
230
|
@fetched_schemas["#{project}.#{dataset}.#{fetch_schema_target_table(metadata)}"] || fetch_schema(metadata)
|
231
|
+
elsif @schema_path
|
232
|
+
@read_schemas[read_schema_target_path(metadata)] || read_schema(metadata)
|
215
233
|
else
|
216
234
|
@table_schema
|
217
235
|
end
|
@@ -29,6 +29,9 @@ module Fluent
|
|
29
29
|
# If insert_id_field is not specified, true means to allow duplicate rows
|
30
30
|
config_param :allow_retry_insert_errors, :bool, default: false
|
31
31
|
|
32
|
+
## RequirePartitionFilter
|
33
|
+
config_param :require_partition_filter, :bool, default: false
|
34
|
+
|
32
35
|
## Buffer
|
33
36
|
config_section :buffer do
|
34
37
|
config_set_default :@type, "memory"
|
@@ -147,33 +147,6 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
|
|
147
147
|
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
148
148
|
end
|
149
149
|
|
150
|
-
def test_configure_auth_json_key_as_file_raise_permission_error
|
151
|
-
json_key_path = 'test/plugin/testdata/json_key.json'
|
152
|
-
json_key_path_dir = File.dirname(json_key_path)
|
153
|
-
|
154
|
-
begin
|
155
|
-
File.chmod(0000, json_key_path_dir)
|
156
|
-
|
157
|
-
driver = create_driver(%[
|
158
|
-
table foo
|
159
|
-
auth_method json_key
|
160
|
-
json_key #{json_key_path}
|
161
|
-
project yourproject_id
|
162
|
-
dataset yourdataset_id
|
163
|
-
schema [
|
164
|
-
{"name": "time", "type": "INTEGER"},
|
165
|
-
{"name": "status", "type": "INTEGER"},
|
166
|
-
{"name": "bytes", "type": "INTEGER"}
|
167
|
-
]
|
168
|
-
])
|
169
|
-
assert_raises(Errno::EACCES) do
|
170
|
-
driver.instance.writer.client
|
171
|
-
end
|
172
|
-
ensure
|
173
|
-
File.chmod(0755, json_key_path_dir)
|
174
|
-
end
|
175
|
-
end
|
176
|
-
|
177
150
|
def test_configure_auth_json_key_as_string
|
178
151
|
json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
|
179
152
|
json_key_io = StringIO.new(json_key)
|
@@ -199,6 +172,8 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
|
|
199
172
|
end
|
200
173
|
|
201
174
|
def test_configure_auth_application_default
|
175
|
+
omit "This testcase depends on some environment variables." if ENV["CI"] == "true"
|
176
|
+
|
202
177
|
driver = create_driver(%[
|
203
178
|
table foo
|
204
179
|
auth_method application_default
|
@@ -576,4 +551,24 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
|
|
576
551
|
assert_equal :string, table_schema["argv"].type
|
577
552
|
assert_equal :repeated, table_schema["argv"].mode
|
578
553
|
end
|
554
|
+
|
555
|
+
def test_resolve_schema_path_with_placeholder
|
556
|
+
now = Time.now.to_i
|
557
|
+
driver = create_driver(<<-CONFIG)
|
558
|
+
table ${tag}_%Y%m%d
|
559
|
+
auth_method json_key
|
560
|
+
json_key jsonkey.josn
|
561
|
+
project yourproject_id
|
562
|
+
dataset yourdataset_id
|
563
|
+
schema_path ${tag}.schema
|
564
|
+
|
565
|
+
<buffer tag, time>
|
566
|
+
timekey 1d
|
567
|
+
</buffer>
|
568
|
+
CONFIG
|
569
|
+
|
570
|
+
metadata = Fluent::Plugin::Buffer::Metadata.new(now, "foo", {})
|
571
|
+
|
572
|
+
assert_equal "foo.schema", driver.instance.read_schema_target_path(metadata)
|
573
|
+
end
|
579
574
|
end
|
@@ -5,6 +5,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
5
5
|
Fluent::Test.setup
|
6
6
|
end
|
7
7
|
|
8
|
+
SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "apache.schema")
|
9
|
+
|
8
10
|
CONFIG = %[
|
9
11
|
table foo
|
10
12
|
email foo@bar.example
|
@@ -260,7 +262,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
260
262
|
|
261
263
|
driver.instance_start
|
262
264
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
263
|
-
metadata =
|
265
|
+
metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
|
264
266
|
chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
|
265
267
|
c.append([driver.instance.format(tag, time, record)])
|
266
268
|
end
|
@@ -344,6 +346,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
344
346
|
schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
|
345
347
|
CONFIG
|
346
348
|
|
349
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
350
|
+
|
347
351
|
stub_writer do |writer|
|
348
352
|
body = {
|
349
353
|
rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
|
@@ -360,7 +364,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
360
364
|
table_id: 'foo',
|
361
365
|
},
|
362
366
|
schema: {
|
363
|
-
fields:
|
367
|
+
fields: schema_fields,
|
364
368
|
},
|
365
369
|
}, {})
|
366
370
|
end
|
@@ -416,8 +420,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
416
420
|
time_partitioning_type day
|
417
421
|
time_partitioning_field time
|
418
422
|
time_partitioning_expiration 1h
|
423
|
+
|
424
|
+
require_partition_filter true
|
419
425
|
CONFIG
|
420
426
|
|
427
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
428
|
+
|
421
429
|
stub_writer do |writer|
|
422
430
|
body = {
|
423
431
|
rows: [message],
|
@@ -434,13 +442,14 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
434
442
|
table_id: 'foo',
|
435
443
|
},
|
436
444
|
schema: {
|
437
|
-
fields:
|
445
|
+
fields: schema_fields,
|
438
446
|
},
|
439
447
|
time_partitioning: {
|
440
448
|
type: 'DAY',
|
441
449
|
field: 'time',
|
442
450
|
expiration_ms: 3600000,
|
443
451
|
},
|
452
|
+
require_partition_filter: true,
|
444
453
|
}, {})
|
445
454
|
end
|
446
455
|
|
@@ -495,7 +504,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
495
504
|
time_partitioning_type day
|
496
505
|
time_partitioning_field time
|
497
506
|
time_partitioning_expiration 1h
|
498
|
-
time_partitioning_require_partition_filter true
|
499
507
|
|
500
508
|
clustering_fields [
|
501
509
|
"time",
|
@@ -503,6 +511,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
503
511
|
]
|
504
512
|
CONFIG
|
505
513
|
|
514
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
515
|
+
|
506
516
|
stub_writer do |writer|
|
507
517
|
body = {
|
508
518
|
rows: [message],
|
@@ -519,7 +529,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
519
529
|
table_id: 'foo',
|
520
530
|
},
|
521
531
|
schema: {
|
522
|
-
fields:
|
532
|
+
fields: schema_fields,
|
523
533
|
},
|
524
534
|
time_partitioning: {
|
525
535
|
type: 'DAY',
|
@@ -132,7 +132,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
132
132
|
|
133
133
|
driver.instance_start
|
134
134
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
135
|
-
metadata =
|
135
|
+
metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
|
136
136
|
chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
|
137
137
|
c.append([driver.instance.format(tag, time, record)])
|
138
138
|
end
|
@@ -158,7 +158,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
158
158
|
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
159
159
|
end
|
160
160
|
|
161
|
-
mock(writer.client).get_job('yourproject_id', 'dummy_job_id',
|
161
|
+
mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
|
162
162
|
stub! do |s|
|
163
163
|
s.id { 'dummy_job_id' }
|
164
164
|
s.configuration.stub! do |_s|
|
@@ -215,7 +215,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
215
215
|
|
216
216
|
driver.instance_start
|
217
217
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
218
|
-
metadata =
|
218
|
+
metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
|
219
219
|
chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
|
220
220
|
c.append([driver.instance.format(tag, time, record)])
|
221
221
|
end
|
@@ -241,7 +241,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
241
241
|
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
242
242
|
end
|
243
243
|
|
244
|
-
mock(writer.client).get_job('yourproject_id', 'dummy_job_id',
|
244
|
+
mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
|
245
245
|
stub! do |s|
|
246
246
|
s.id { 'dummy_job_id' }
|
247
247
|
s.configuration.stub! do |_s|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-02-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -139,8 +139,9 @@ extensions: []
|
|
139
139
|
extra_rdoc_files: []
|
140
140
|
files:
|
141
141
|
- ".github/ISSUE_TEMPLATE.md"
|
142
|
+
- ".github/workflows/linux.yml"
|
143
|
+
- ".github/workflows/windows.yml"
|
142
144
|
- ".gitignore"
|
143
|
-
- ".travis.yml"
|
144
145
|
- Gemfile
|
145
146
|
- LICENSE.txt
|
146
147
|
- README.md
|
@@ -183,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
184
|
- !ruby/object:Gem::Version
|
184
185
|
version: '0'
|
185
186
|
requirements: []
|
186
|
-
rubygems_version: 3.
|
187
|
+
rubygems_version: 3.1.4
|
187
188
|
signing_key:
|
188
189
|
specification_version: 4
|
189
190
|
summary: Fluentd plugin to store data on Google BigQuery
|