fluent-plugin-bigquery 2.2.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +30 -0
- data/.github/workflows/windows.yml +30 -0
- data/Gemfile +3 -0
- data/README.md +8 -4
- data/integration/README.md +14 -0
- data/integration/create_table.sh +4 -0
- data/integration/dummer_insert.rb +12 -0
- data/integration/dummer_load.rb +12 -0
- data/integration/fluent.conf +88 -0
- data/integration/schema.json +22 -0
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/bigquery/writer.rb +17 -8
- data/lib/fluent/plugin/out_bigquery_base.rb +28 -5
- data/lib/fluent/plugin/out_bigquery_insert.rb +5 -0
- data/test/plugin/test_out_bigquery_base.rb +22 -27
- data/test/plugin/test_out_bigquery_insert.rb +60 -26
- data/test/plugin/test_out_bigquery_load.rb +9 -9
- metadata +14 -7
- data/.travis.yml +0 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc6de961c8c42fddf3d9e297e93db560d16cfc098161232c90ee64f0a5679fee
|
4
|
+
data.tar.gz: 5ec1fee690f77d0fa25d8e427c6ad354cdfdbfafe30a4aee4fea9a5e73db5eb3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 20fc96d420611a0d12f7cb34656ae87872f24131c70039383a8f8b7d51048a7d4f277a80675f2bee834113fd13d2a9780b772b517f2140481f7fb86ce63f24e3
|
7
|
+
data.tar.gz: cecc8f8682761ddfb22d942b69103823cc728923f6d7043d967254ed02c754db4e792132769f7f3aa91986aa27895ac83bf16358be21e03d3c94e77c43975231
|
@@ -0,0 +1,30 @@
|
|
1
|
+
name: Testing on Ubuntu
|
2
|
+
on:
|
3
|
+
- push
|
4
|
+
- pull_request
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ${{ matrix.os }}
|
8
|
+
strategy:
|
9
|
+
fail-fast: false
|
10
|
+
matrix:
|
11
|
+
ruby:
|
12
|
+
- 2.7
|
13
|
+
- 3.0
|
14
|
+
- 3.1
|
15
|
+
os:
|
16
|
+
- ubuntu-latest
|
17
|
+
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
18
|
+
steps:
|
19
|
+
- uses: actions/checkout@v2
|
20
|
+
- uses: ruby/setup-ruby@v1
|
21
|
+
with:
|
22
|
+
ruby-version: ${{ matrix.ruby }}
|
23
|
+
- name: unit testing
|
24
|
+
env:
|
25
|
+
CI: true
|
26
|
+
run: |
|
27
|
+
ruby -v
|
28
|
+
gem install bundler rake
|
29
|
+
bundle install --jobs 4 --retry 3
|
30
|
+
bundle exec rake test
|
@@ -0,0 +1,30 @@
|
|
1
|
+
name: Testing on Windows
|
2
|
+
on:
|
3
|
+
- push
|
4
|
+
- pull_request
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ${{ matrix.os }}
|
8
|
+
strategy:
|
9
|
+
fail-fast: false
|
10
|
+
matrix:
|
11
|
+
ruby:
|
12
|
+
- 2.7
|
13
|
+
- 3.0
|
14
|
+
- 3.1
|
15
|
+
os:
|
16
|
+
- windows-latest
|
17
|
+
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
18
|
+
steps:
|
19
|
+
- uses: actions/checkout@v2
|
20
|
+
- uses: ruby/setup-ruby@v1
|
21
|
+
with:
|
22
|
+
ruby-version: ${{ matrix.ruby }}
|
23
|
+
- name: unit testing
|
24
|
+
env:
|
25
|
+
CI: true
|
26
|
+
run: |
|
27
|
+
ruby -v
|
28
|
+
gem install bundler rake
|
29
|
+
bundle install --jobs 4 --retry 3
|
30
|
+
bundle exec rake test
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -23,6 +23,7 @@ OAuth flow for installed applications.
|
|
23
23
|
| v0.4.x | 0.12.x | 2.0 or later |
|
24
24
|
| v1.x.x | 0.14.x or later | 2.2 or later |
|
25
25
|
| v2.x.x | 0.14.x or later | 2.3 or later |
|
26
|
+
| v3.x.x | 1.x or later | 2.7 or later |
|
26
27
|
|
27
28
|
## With docker image
|
28
29
|
If you use official alpine based fluentd docker image (https://github.com/fluent/fluentd-docker-image),
|
@@ -52,7 +53,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
52
53
|
| auto_create_table | bool | no | no | false | If true, creates table automatically |
|
53
54
|
| ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
|
54
55
|
| schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
|
55
|
-
| schema_path | string | yes (either `fetch_schema`) |
|
56
|
+
| schema_path | string | yes (either `fetch_schema`) | yes | nil | Schema Definition file path. It is formatted by JSON. |
|
56
57
|
| fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
|
57
58
|
| fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
|
58
59
|
| schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
@@ -72,6 +73,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
72
73
|
| insert_id_field | string | no | no | nil | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor |
|
73
74
|
| add_insert_timestamp | string | no | no | nil | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
|
74
75
|
| allow_retry_insert_errors | bool | no | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
|
76
|
+
| require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. |
|
75
77
|
|
76
78
|
#### bigquery_load
|
77
79
|
|
@@ -379,10 +381,10 @@ format to construct table ids.
|
|
379
381
|
Table ids are formatted at runtime
|
380
382
|
using the chunk key time.
|
381
383
|
|
382
|
-
see.
|
384
|
+
see. https://docs.fluentd.org/configuration/buffer-section
|
383
385
|
|
384
386
|
For example, with the configuration below,
|
385
|
-
data is inserted into tables `
|
387
|
+
data is inserted into tables `accesslog_2014_08_02`, `accesslog_2014_08_03` and so on.
|
386
388
|
|
387
389
|
```apache
|
388
390
|
<match dummy>
|
@@ -392,7 +394,7 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
|
|
392
394
|
|
393
395
|
project yourproject_id
|
394
396
|
dataset yourdataset_id
|
395
|
-
table accesslog_%Y_%
|
397
|
+
table accesslog_%Y_%m_%d
|
396
398
|
|
397
399
|
<buffer time>
|
398
400
|
timekey 1d
|
@@ -401,6 +403,8 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
|
|
401
403
|
</match>
|
402
404
|
```
|
403
405
|
|
406
|
+
**NOTE: In current fluentd (v1.15.x), The maximum unit supported by strftime formatting is the granularity of days**
|
407
|
+
|
404
408
|
#### record attribute formatting
|
405
409
|
The format can be suffixed with attribute name.
|
406
410
|
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Requirements
|
2
|
+
|
3
|
+
Set Environment Variable
|
4
|
+
|
5
|
+
- GOOGLE_APPLICATION_CREDENTIALS (json key path)
|
6
|
+
- PROJECT_NAME
|
7
|
+
- DATASET_NAME
|
8
|
+
- TABLE_NAME
|
9
|
+
|
10
|
+
# How to use
|
11
|
+
|
12
|
+
1. execute `create_table.sh`
|
13
|
+
1. `bundle exec fluentd -c fluent.conf`
|
14
|
+
1. `bundle exec dummer -c dummer_insert.rb` or `bundle exec dummer -c dummer_load.rb`
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require "time"
|
2
|
+
|
3
|
+
configure "insert" do
|
4
|
+
host "localhost"
|
5
|
+
port 24224
|
6
|
+
rate 100
|
7
|
+
tag type: :string, any: %w(insert_data)
|
8
|
+
field :id, type: :integer, countup: true
|
9
|
+
field :string_field, type: :string, any: %w(str1 str2 str3 str4)
|
10
|
+
field :timestamp_field, type: :string, value: Time.now.iso8601
|
11
|
+
field :date, type: :string, value: Time.now.strftime("%Y-%m-%d")
|
12
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require "time"
|
2
|
+
|
3
|
+
configure "load" do
|
4
|
+
host "localhost"
|
5
|
+
port 24224
|
6
|
+
rate 100
|
7
|
+
tag type: :string, any: %w(load_data)
|
8
|
+
field :id, type: :integer, countup: true
|
9
|
+
field :string_field, type: :string, any: %w(str1 str2 str3 str4)
|
10
|
+
field :timestamp_field, type: :string, value: Time.now.iso8601
|
11
|
+
field :date, type: :string, value: Time.now.strftime("%Y-%m-%d")
|
12
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
<source>
|
2
|
+
@type forward
|
3
|
+
port 24224
|
4
|
+
bind 0.0.0.0
|
5
|
+
</source>
|
6
|
+
|
7
|
+
<match insert_data>
|
8
|
+
@id bigquery-insert-integration
|
9
|
+
@type bigquery_insert
|
10
|
+
|
11
|
+
allow_retry_insert_errors true
|
12
|
+
|
13
|
+
auth_method json_key
|
14
|
+
json_key "#{ENV["GOOGLE_APPLICATION_CREDENTIALS"]}"
|
15
|
+
|
16
|
+
<buffer>
|
17
|
+
@type file
|
18
|
+
|
19
|
+
chunk_limit_size 1m
|
20
|
+
chunk_limit_records 1500
|
21
|
+
total_limit_size 1g
|
22
|
+
path ./log/bigquery-insert-integration
|
23
|
+
|
24
|
+
flush_interval 30
|
25
|
+
flush_thread_count 4
|
26
|
+
flush_at_shutdown true
|
27
|
+
|
28
|
+
retry_max_times 14
|
29
|
+
retry_max_interval 30m
|
30
|
+
</buffer>
|
31
|
+
|
32
|
+
request_open_timeout_sec 2m
|
33
|
+
|
34
|
+
slow_flush_log_threshold 30.0
|
35
|
+
|
36
|
+
project "#{ENV["PROJECT_NAME"]}"
|
37
|
+
dataset "#{ENV["DATASET_NAME"]}"
|
38
|
+
table "#{ENV["TABLE_NAME"]}"
|
39
|
+
auto_create_table false
|
40
|
+
fetch_schema true
|
41
|
+
fetch_schema_table "#{ENV["TABLE_NAME"]}"
|
42
|
+
|
43
|
+
insert_id_field id
|
44
|
+
|
45
|
+
<secondary>
|
46
|
+
@type file
|
47
|
+
path ./log/bigquery-insert-integration.errors
|
48
|
+
</secondary>
|
49
|
+
</match>
|
50
|
+
|
51
|
+
<match load_data>
|
52
|
+
@id bigquery-load-integration
|
53
|
+
@type bigquery_load
|
54
|
+
|
55
|
+
auth_method json_key
|
56
|
+
json_key "#{ENV["GOOGLE_APPLICATION_CREDENTIALS"]}"
|
57
|
+
|
58
|
+
<buffer>
|
59
|
+
@type file
|
60
|
+
|
61
|
+
chunk_limit_size 1m
|
62
|
+
total_limit_size 1g
|
63
|
+
path ./log/bigquery-load-integration
|
64
|
+
|
65
|
+
flush_interval 120
|
66
|
+
flush_thread_count 4
|
67
|
+
flush_at_shutdown true
|
68
|
+
|
69
|
+
retry_max_times 14
|
70
|
+
retry_max_interval 30m
|
71
|
+
</buffer>
|
72
|
+
|
73
|
+
request_open_timeout_sec 2m
|
74
|
+
|
75
|
+
slow_flush_log_threshold 300.0
|
76
|
+
|
77
|
+
project "#{ENV["PROJECT_NAME"]}"
|
78
|
+
dataset "#{ENV["DATASET_NAME"]}"
|
79
|
+
table "#{ENV["TABLE_NAME"]}"
|
80
|
+
auto_create_table false
|
81
|
+
fetch_schema true
|
82
|
+
fetch_schema_table "#{ENV["TABLE_NAME"]}"
|
83
|
+
|
84
|
+
<secondary>
|
85
|
+
@type file
|
86
|
+
path ./log/bigquery-load-integration.errors
|
87
|
+
</secondary>
|
88
|
+
</match>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"name": "id",
|
4
|
+
"type": "INTEGER",
|
5
|
+
"mode": "REQUIRED"
|
6
|
+
},
|
7
|
+
{
|
8
|
+
"name": "string_field",
|
9
|
+
"type": "STRING",
|
10
|
+
"mode": "NULLABLE"
|
11
|
+
},
|
12
|
+
{
|
13
|
+
"name": "timestamp_field",
|
14
|
+
"type": "TIMESTAMP",
|
15
|
+
"mode": "NULLABLE"
|
16
|
+
},
|
17
|
+
{
|
18
|
+
"name": "date",
|
19
|
+
"type": "DATE",
|
20
|
+
"mode": "REQUIRED"
|
21
|
+
}
|
22
|
+
]
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Fluent
|
2
2
|
module BigQuery
|
3
3
|
class Writer
|
4
|
-
def initialize(log, auth_method, options
|
4
|
+
def initialize(log, auth_method, **options)
|
5
5
|
@auth_method = auth_method
|
6
6
|
@scope = "https://www.googleapis.com/auth/bigquery"
|
7
7
|
@options = options
|
@@ -35,8 +35,9 @@ module Fluent
|
|
35
35
|
}
|
36
36
|
|
37
37
|
definition.merge!(time_partitioning: time_partitioning) if time_partitioning
|
38
|
+
definition.merge!(require_partition_filter: require_partition_filter) if require_partition_filter
|
38
39
|
definition.merge!(clustering: clustering) if clustering
|
39
|
-
client.insert_table(project, dataset, definition, {})
|
40
|
+
client.insert_table(project, dataset, definition, **{})
|
40
41
|
log.debug "create table", project_id: project, dataset: dataset, table: table_id
|
41
42
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
42
43
|
message = e.message
|
@@ -82,7 +83,7 @@ module Fluent
|
|
82
83
|
if @options[:auto_create_table]
|
83
84
|
res = insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
|
84
85
|
else
|
85
|
-
res = client.insert_all_table_data(project, dataset, table_id, body, {})
|
86
|
+
res = client.insert_all_table_data(project, dataset, table_id, body, **{})
|
86
87
|
end
|
87
88
|
log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
|
88
89
|
|
@@ -157,10 +158,8 @@ module Fluent
|
|
157
158
|
res = client.insert_job(
|
158
159
|
project,
|
159
160
|
configuration,
|
160
|
-
|
161
|
-
|
162
|
-
content_type: "application/octet-stream",
|
163
|
-
}
|
161
|
+
upload_source: upload_source,
|
162
|
+
content_type: "application/octet-stream",
|
164
163
|
)
|
165
164
|
JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
|
166
165
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
@@ -318,6 +317,16 @@ module Fluent
|
|
318
317
|
end
|
319
318
|
end
|
320
319
|
|
320
|
+
def require_partition_filter
|
321
|
+
return @require_partition_filter if instance_variable_defined?(:@require_partition_filter)
|
322
|
+
|
323
|
+
if @options[:require_partition_filter]
|
324
|
+
@require_partition_filter = @options[:require_partition_filter]
|
325
|
+
else
|
326
|
+
@require_partition_filter
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
321
330
|
def clustering
|
322
331
|
return @clustering if instance_variable_defined?(:@clustering)
|
323
332
|
|
@@ -332,7 +341,7 @@ module Fluent
|
|
332
341
|
|
333
342
|
def insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
|
334
343
|
try_count ||= 1
|
335
|
-
res = client.insert_all_table_data(project, dataset, table_id, body, {})
|
344
|
+
res = client.insert_all_table_data(project, dataset, table_id, body, **{})
|
336
345
|
rescue Google::Apis::ClientError => e
|
337
346
|
if e.status_code == 404 && /Not Found: Table/i =~ e.message
|
338
347
|
if try_count == 1
|
@@ -111,9 +111,6 @@ module Fluent
|
|
111
111
|
if @schema
|
112
112
|
@table_schema.load_schema(@schema)
|
113
113
|
end
|
114
|
-
if @schema_path
|
115
|
-
@table_schema.load_schema(MultiJson.load(File.read(@schema_path)))
|
116
|
-
end
|
117
114
|
|
118
115
|
formatter_config = conf.elements("format")[0]
|
119
116
|
@formatter = formatter_create(usage: 'out_bigquery_for_insert', default_type: 'json', conf: formatter_config)
|
@@ -126,6 +123,7 @@ module Fluent
|
|
126
123
|
@tables_mutex = Mutex.new
|
127
124
|
@fetched_schemas = {}
|
128
125
|
@last_fetch_schema_time = Hash.new(0)
|
126
|
+
@read_schemas = {}
|
129
127
|
end
|
130
128
|
|
131
129
|
def multi_workers_ready?
|
@@ -133,7 +131,7 @@ module Fluent
|
|
133
131
|
end
|
134
132
|
|
135
133
|
def writer
|
136
|
-
@writer ||= Fluent::BigQuery::Writer.new(@log, @auth_method,
|
134
|
+
@writer ||= Fluent::BigQuery::Writer.new(@log, @auth_method,
|
137
135
|
private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
|
138
136
|
email: @email,
|
139
137
|
json_key: @json_key,
|
@@ -148,19 +146,27 @@ module Fluent
|
|
148
146
|
time_partitioning_type: @time_partitioning_type,
|
149
147
|
time_partitioning_field: @time_partitioning_field,
|
150
148
|
time_partitioning_expiration: @time_partitioning_expiration,
|
149
|
+
require_partition_filter: @require_partition_filter,
|
151
150
|
clustering_fields: @clustering_fields,
|
152
151
|
timeout_sec: @request_timeout_sec,
|
153
152
|
open_timeout_sec: @request_open_timeout_sec,
|
154
|
-
|
153
|
+
)
|
155
154
|
end
|
156
155
|
|
157
156
|
def format(tag, time, record)
|
157
|
+
if record.nil?
|
158
|
+
log.warn("nil record detected. corrupted chunks? tag=#{tag}, time=#{time}")
|
159
|
+
return
|
160
|
+
end
|
161
|
+
|
158
162
|
record = inject_values_to_record(tag, time, record)
|
159
163
|
|
160
164
|
meta = metadata(tag, time, record)
|
161
165
|
schema =
|
162
166
|
if @fetch_schema
|
163
167
|
fetch_schema(meta)
|
168
|
+
elsif @schema_path
|
169
|
+
read_schema(meta)
|
164
170
|
else
|
165
171
|
@table_schema
|
166
172
|
end
|
@@ -209,9 +215,26 @@ module Fluent
|
|
209
215
|
extract_placeholders(@fetch_schema_table || @tablelist[0], metadata)
|
210
216
|
end
|
211
217
|
|
218
|
+
def read_schema(metadata)
|
219
|
+
schema_path = read_schema_target_path(metadata)
|
220
|
+
|
221
|
+
unless @read_schemas[schema_path]
|
222
|
+
table_schema = Fluent::BigQuery::RecordSchema.new("record")
|
223
|
+
table_schema.load_schema(MultiJson.load(File.read(schema_path)))
|
224
|
+
@read_schemas[schema_path] = table_schema
|
225
|
+
end
|
226
|
+
@read_schemas[schema_path]
|
227
|
+
end
|
228
|
+
|
229
|
+
def read_schema_target_path(metadata)
|
230
|
+
extract_placeholders(@schema_path, metadata)
|
231
|
+
end
|
232
|
+
|
212
233
|
def get_schema(project, dataset, metadata)
|
213
234
|
if @fetch_schema
|
214
235
|
@fetched_schemas["#{project}.#{dataset}.#{fetch_schema_target_table(metadata)}"] || fetch_schema(metadata)
|
236
|
+
elsif @schema_path
|
237
|
+
@read_schemas[read_schema_target_path(metadata)] || read_schema(metadata)
|
215
238
|
else
|
216
239
|
@table_schema
|
217
240
|
end
|
@@ -29,6 +29,9 @@ module Fluent
|
|
29
29
|
# If insert_id_field is not specified, true means to allow duplicate rows
|
30
30
|
config_param :allow_retry_insert_errors, :bool, default: false
|
31
31
|
|
32
|
+
## RequirePartitionFilter
|
33
|
+
config_param :require_partition_filter, :bool, default: false
|
34
|
+
|
32
35
|
## Buffer
|
33
36
|
config_section :buffer do
|
34
37
|
config_set_default :@type, "memory"
|
@@ -93,6 +96,8 @@ module Fluent
|
|
93
96
|
schema = get_schema(project, dataset, metadata)
|
94
97
|
|
95
98
|
insert(project, dataset, table_id, rows, schema, template_suffix)
|
99
|
+
rescue MultiJson::ParseError => e
|
100
|
+
raise Fluent::UnrecoverableError.new(e)
|
96
101
|
end
|
97
102
|
|
98
103
|
def insert(project, dataset, table_id, rows, schema, template_suffix)
|
@@ -147,33 +147,6 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
|
|
147
147
|
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
148
148
|
end
|
149
149
|
|
150
|
-
def test_configure_auth_json_key_as_file_raise_permission_error
|
151
|
-
json_key_path = 'test/plugin/testdata/json_key.json'
|
152
|
-
json_key_path_dir = File.dirname(json_key_path)
|
153
|
-
|
154
|
-
begin
|
155
|
-
File.chmod(0000, json_key_path_dir)
|
156
|
-
|
157
|
-
driver = create_driver(%[
|
158
|
-
table foo
|
159
|
-
auth_method json_key
|
160
|
-
json_key #{json_key_path}
|
161
|
-
project yourproject_id
|
162
|
-
dataset yourdataset_id
|
163
|
-
schema [
|
164
|
-
{"name": "time", "type": "INTEGER"},
|
165
|
-
{"name": "status", "type": "INTEGER"},
|
166
|
-
{"name": "bytes", "type": "INTEGER"}
|
167
|
-
]
|
168
|
-
])
|
169
|
-
assert_raises(Errno::EACCES) do
|
170
|
-
driver.instance.writer.client
|
171
|
-
end
|
172
|
-
ensure
|
173
|
-
File.chmod(0755, json_key_path_dir)
|
174
|
-
end
|
175
|
-
end
|
176
|
-
|
177
150
|
def test_configure_auth_json_key_as_string
|
178
151
|
json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
|
179
152
|
json_key_io = StringIO.new(json_key)
|
@@ -199,6 +172,8 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
|
|
199
172
|
end
|
200
173
|
|
201
174
|
def test_configure_auth_application_default
|
175
|
+
omit "This testcase depends on some environment variables." if ENV["CI"] == "true"
|
176
|
+
|
202
177
|
driver = create_driver(%[
|
203
178
|
table foo
|
204
179
|
auth_method application_default
|
@@ -576,4 +551,24 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
|
|
576
551
|
assert_equal :string, table_schema["argv"].type
|
577
552
|
assert_equal :repeated, table_schema["argv"].mode
|
578
553
|
end
|
554
|
+
|
555
|
+
def test_resolve_schema_path_with_placeholder
|
556
|
+
now = Time.now.to_i
|
557
|
+
driver = create_driver(<<-CONFIG)
|
558
|
+
table ${tag}_%Y%m%d
|
559
|
+
auth_method json_key
|
560
|
+
json_key jsonkey.josn
|
561
|
+
project yourproject_id
|
562
|
+
dataset yourdataset_id
|
563
|
+
schema_path ${tag}.schema
|
564
|
+
|
565
|
+
<buffer tag, time>
|
566
|
+
timekey 1d
|
567
|
+
</buffer>
|
568
|
+
CONFIG
|
569
|
+
|
570
|
+
metadata = Fluent::Plugin::Buffer::Metadata.new(now, "foo", {})
|
571
|
+
|
572
|
+
assert_equal "foo.schema", driver.instance.read_schema_target_path(metadata)
|
573
|
+
end
|
579
574
|
end
|
@@ -5,6 +5,19 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
5
5
|
Fluent::Test.setup
|
6
6
|
end
|
7
7
|
|
8
|
+
def is_ruby2?
|
9
|
+
RUBY_VERSION.to_i < 3
|
10
|
+
end
|
11
|
+
|
12
|
+
def build_args(args)
|
13
|
+
if is_ruby2?
|
14
|
+
args << {}
|
15
|
+
end
|
16
|
+
args
|
17
|
+
end
|
18
|
+
|
19
|
+
SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "apache.schema")
|
20
|
+
|
8
21
|
CONFIG = %[
|
9
22
|
table foo
|
10
23
|
email foo@bar.example
|
@@ -121,11 +134,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
121
134
|
driver = create_driver
|
122
135
|
|
123
136
|
stub_writer do |writer|
|
124
|
-
|
137
|
+
args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
|
125
138
|
rows: [{json: hash_including(entry)}],
|
126
139
|
skip_invalid_rows: false,
|
127
140
|
ignore_unknown_values: false
|
128
|
-
}
|
141
|
+
}])
|
142
|
+
mock(writer.client).insert_all_table_data(*args) do
|
129
143
|
s = stub!
|
130
144
|
s.insert_errors { nil }
|
131
145
|
s
|
@@ -186,11 +200,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
186
200
|
|
187
201
|
entry = {a: "b"}
|
188
202
|
stub_writer do |writer|
|
189
|
-
|
203
|
+
args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
|
190
204
|
rows: [{json: hash_including(entry)}],
|
191
205
|
skip_invalid_rows: false,
|
192
206
|
ignore_unknown_values: false
|
193
|
-
}
|
207
|
+
}])
|
208
|
+
mock(writer.client).insert_all_table_data(*args) do
|
194
209
|
ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
|
195
210
|
raise ex
|
196
211
|
end
|
@@ -245,11 +260,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
245
260
|
|
246
261
|
entry = {a: "b"}
|
247
262
|
stub_writer do |writer|
|
248
|
-
|
263
|
+
args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
|
249
264
|
rows: [{json: hash_including(entry)}],
|
250
265
|
skip_invalid_rows: false,
|
251
266
|
ignore_unknown_values: false
|
252
|
-
}
|
267
|
+
}])
|
268
|
+
mock(writer.client).insert_all_table_data(*args) do
|
253
269
|
ex = Google::Apis::ServerError.new("error", status_code: 501)
|
254
270
|
def ex.reason
|
255
271
|
"invalid"
|
@@ -260,14 +276,14 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
260
276
|
|
261
277
|
driver.instance_start
|
262
278
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
263
|
-
metadata =
|
279
|
+
metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
|
264
280
|
chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
|
265
281
|
c.append([driver.instance.format(tag, time, record)])
|
266
282
|
end
|
267
283
|
assert_raise Fluent::BigQuery::UnRetryableError do
|
268
284
|
driver.instance.write(chunk)
|
269
285
|
end
|
270
|
-
assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.
|
286
|
+
assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.2
|
271
287
|
driver.instance_shutdown
|
272
288
|
end
|
273
289
|
|
@@ -290,11 +306,15 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
290
306
|
CONFIG
|
291
307
|
|
292
308
|
stub_writer do |writer|
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
309
|
+
args = ['yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
|
310
|
+
rows: [entry[0]],
|
311
|
+
skip_invalid_rows: false,
|
312
|
+
ignore_unknown_values: false
|
313
|
+
}]
|
314
|
+
if RUBY_VERSION.to_i < 3
|
315
|
+
args << {}
|
316
|
+
end
|
317
|
+
mock(writer.client).insert_all_table_data(*args) { stub!.insert_errors { nil } }
|
298
318
|
end
|
299
319
|
|
300
320
|
driver.run do
|
@@ -344,25 +364,29 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
344
364
|
schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
|
345
365
|
CONFIG
|
346
366
|
|
367
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
368
|
+
|
347
369
|
stub_writer do |writer|
|
348
370
|
body = {
|
349
371
|
rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
|
350
372
|
skip_invalid_rows: false,
|
351
373
|
ignore_unknown_values: false,
|
352
374
|
}
|
353
|
-
|
375
|
+
args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
|
376
|
+
mock(writer.client).insert_all_table_data(*args) do
|
354
377
|
raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
|
355
378
|
end.at_least(1)
|
356
379
|
mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
|
357
380
|
|
358
|
-
|
381
|
+
args = build_args(['yourproject_id', 'yourdataset_id', {
|
359
382
|
table_reference: {
|
360
383
|
table_id: 'foo',
|
361
384
|
},
|
362
385
|
schema: {
|
363
|
-
fields:
|
386
|
+
fields: schema_fields,
|
364
387
|
},
|
365
|
-
}
|
388
|
+
}])
|
389
|
+
mock(writer.client).insert_table(*args)
|
366
390
|
end
|
367
391
|
|
368
392
|
assert_raise(RuntimeError) do
|
@@ -416,32 +440,39 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
416
440
|
time_partitioning_type day
|
417
441
|
time_partitioning_field time
|
418
442
|
time_partitioning_expiration 1h
|
443
|
+
|
444
|
+
require_partition_filter true
|
419
445
|
CONFIG
|
420
446
|
|
447
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
448
|
+
|
421
449
|
stub_writer do |writer|
|
422
450
|
body = {
|
423
451
|
rows: [message],
|
424
452
|
skip_invalid_rows: false,
|
425
453
|
ignore_unknown_values: false,
|
426
454
|
}
|
427
|
-
|
455
|
+
args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
|
456
|
+
mock(writer.client).insert_all_table_data(*args) do
|
428
457
|
raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
|
429
458
|
end.at_least(1)
|
430
459
|
mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
|
431
460
|
|
432
|
-
|
461
|
+
args = build_args(['yourproject_id', 'yourdataset_id', {
|
433
462
|
table_reference: {
|
434
463
|
table_id: 'foo',
|
435
464
|
},
|
436
465
|
schema: {
|
437
|
-
fields:
|
466
|
+
fields: schema_fields,
|
438
467
|
},
|
439
468
|
time_partitioning: {
|
440
469
|
type: 'DAY',
|
441
470
|
field: 'time',
|
442
471
|
expiration_ms: 3600000,
|
443
472
|
},
|
444
|
-
|
473
|
+
require_partition_filter: true,
|
474
|
+
}])
|
475
|
+
mock(writer.client).insert_table(*args)
|
445
476
|
end
|
446
477
|
|
447
478
|
assert_raise(RuntimeError) do
|
@@ -495,7 +526,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
495
526
|
time_partitioning_type day
|
496
527
|
time_partitioning_field time
|
497
528
|
time_partitioning_expiration 1h
|
498
|
-
time_partitioning_require_partition_filter true
|
499
529
|
|
500
530
|
clustering_fields [
|
501
531
|
"time",
|
@@ -503,23 +533,26 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
503
533
|
]
|
504
534
|
CONFIG
|
505
535
|
|
536
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
537
|
+
|
506
538
|
stub_writer do |writer|
|
507
539
|
body = {
|
508
540
|
rows: [message],
|
509
541
|
skip_invalid_rows: false,
|
510
542
|
ignore_unknown_values: false,
|
511
543
|
}
|
512
|
-
|
544
|
+
args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
|
545
|
+
mock(writer.client).insert_all_table_data(*args) do
|
513
546
|
raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
|
514
547
|
end.at_least(1)
|
515
548
|
mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
|
516
549
|
|
517
|
-
|
550
|
+
args = build_args(['yourproject_id', 'yourdataset_id', {
|
518
551
|
table_reference: {
|
519
552
|
table_id: 'foo',
|
520
553
|
},
|
521
554
|
schema: {
|
522
|
-
fields:
|
555
|
+
fields: schema_fields,
|
523
556
|
},
|
524
557
|
time_partitioning: {
|
525
558
|
type: 'DAY',
|
@@ -532,7 +565,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
532
565
|
'vhost',
|
533
566
|
],
|
534
567
|
},
|
535
|
-
}
|
568
|
+
}])
|
569
|
+
mock(writer.client).insert_table(*args)
|
536
570
|
end
|
537
571
|
|
538
572
|
assert_raise(RuntimeError) do
|
@@ -64,7 +64,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
64
64
|
max_bad_records: 0,
|
65
65
|
}
|
66
66
|
}
|
67
|
-
},
|
67
|
+
}, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
|
68
68
|
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
69
69
|
end
|
70
70
|
end
|
@@ -117,7 +117,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
117
117
|
},
|
118
118
|
},
|
119
119
|
job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
|
120
|
-
},
|
120
|
+
}, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
|
121
121
|
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
122
122
|
end
|
123
123
|
end
|
@@ -132,7 +132,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
132
132
|
|
133
133
|
driver.instance_start
|
134
134
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
135
|
-
metadata =
|
135
|
+
metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
|
136
136
|
chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
|
137
137
|
c.append([driver.instance.format(tag, time, record)])
|
138
138
|
end
|
@@ -154,11 +154,11 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
154
154
|
max_bad_records: 0,
|
155
155
|
}
|
156
156
|
}
|
157
|
-
},
|
157
|
+
}, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
|
158
158
|
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
159
159
|
end
|
160
160
|
|
161
|
-
mock(writer.client).get_job('yourproject_id', 'dummy_job_id',
|
161
|
+
mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
|
162
162
|
stub! do |s|
|
163
163
|
s.id { 'dummy_job_id' }
|
164
164
|
s.configuration.stub! do |_s|
|
@@ -215,7 +215,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
215
215
|
|
216
216
|
driver.instance_start
|
217
217
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
218
|
-
metadata =
|
218
|
+
metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
|
219
219
|
chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
|
220
220
|
c.append([driver.instance.format(tag, time, record)])
|
221
221
|
end
|
@@ -237,11 +237,11 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
237
237
|
max_bad_records: 0,
|
238
238
|
}
|
239
239
|
}
|
240
|
-
},
|
240
|
+
}, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
|
241
241
|
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
242
242
|
end
|
243
243
|
|
244
|
-
mock(writer.client).get_job('yourproject_id', 'dummy_job_id',
|
244
|
+
mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
|
245
245
|
stub! do |s|
|
246
246
|
s.id { 'dummy_job_id' }
|
247
247
|
s.configuration.stub! do |_s|
|
@@ -317,7 +317,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
317
317
|
},
|
318
318
|
}
|
319
319
|
}
|
320
|
-
},
|
320
|
+
}, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
|
321
321
|
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
322
322
|
end
|
323
323
|
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
8
8
|
- joker1007
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-10-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -139,14 +139,21 @@ extensions: []
|
|
139
139
|
extra_rdoc_files: []
|
140
140
|
files:
|
141
141
|
- ".github/ISSUE_TEMPLATE.md"
|
142
|
+
- ".github/workflows/linux.yml"
|
143
|
+
- ".github/workflows/windows.yml"
|
142
144
|
- ".gitignore"
|
143
|
-
- ".travis.yml"
|
144
145
|
- Gemfile
|
145
146
|
- LICENSE.txt
|
146
147
|
- README.md
|
147
148
|
- Rakefile
|
148
149
|
- fluent-plugin-bigquery.gemspec
|
149
150
|
- gemfiles/activesupport-4.gemfile
|
151
|
+
- integration/README.md
|
152
|
+
- integration/create_table.sh
|
153
|
+
- integration/dummer_insert.rb
|
154
|
+
- integration/dummer_load.rb
|
155
|
+
- integration/fluent.conf
|
156
|
+
- integration/schema.json
|
150
157
|
- lib/fluent/plugin/bigquery/errors.rb
|
151
158
|
- lib/fluent/plugin/bigquery/helper.rb
|
152
159
|
- lib/fluent/plugin/bigquery/schema.rb
|
@@ -168,7 +175,7 @@ homepage: https://github.com/kaizenplatform/fluent-plugin-bigquery
|
|
168
175
|
licenses:
|
169
176
|
- Apache-2.0
|
170
177
|
metadata: {}
|
171
|
-
post_install_message:
|
178
|
+
post_install_message:
|
172
179
|
rdoc_options: []
|
173
180
|
require_paths:
|
174
181
|
- lib
|
@@ -183,8 +190,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
190
|
- !ruby/object:Gem::Version
|
184
191
|
version: '0'
|
185
192
|
requirements: []
|
186
|
-
rubygems_version: 3.
|
187
|
-
signing_key:
|
193
|
+
rubygems_version: 3.3.7
|
194
|
+
signing_key:
|
188
195
|
specification_version: 4
|
189
196
|
summary: Fluentd plugin to store data on Google BigQuery
|
190
197
|
test_files:
|