fluent-plugin-bigquery 2.2.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +30 -0
- data/.github/workflows/windows.yml +30 -0
- data/Gemfile +3 -0
- data/README.md +8 -4
- data/integration/README.md +14 -0
- data/integration/create_table.sh +4 -0
- data/integration/dummer_insert.rb +12 -0
- data/integration/dummer_load.rb +12 -0
- data/integration/fluent.conf +88 -0
- data/integration/schema.json +22 -0
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/bigquery/writer.rb +17 -8
- data/lib/fluent/plugin/out_bigquery_base.rb +28 -5
- data/lib/fluent/plugin/out_bigquery_insert.rb +5 -0
- data/test/plugin/test_out_bigquery_base.rb +22 -27
- data/test/plugin/test_out_bigquery_insert.rb +60 -26
- data/test/plugin/test_out_bigquery_load.rb +9 -9
- metadata +14 -7
- data/.travis.yml +0 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc6de961c8c42fddf3d9e297e93db560d16cfc098161232c90ee64f0a5679fee
|
4
|
+
data.tar.gz: 5ec1fee690f77d0fa25d8e427c6ad354cdfdbfafe30a4aee4fea9a5e73db5eb3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 20fc96d420611a0d12f7cb34656ae87872f24131c70039383a8f8b7d51048a7d4f277a80675f2bee834113fd13d2a9780b772b517f2140481f7fb86ce63f24e3
|
7
|
+
data.tar.gz: cecc8f8682761ddfb22d942b69103823cc728923f6d7043d967254ed02c754db4e792132769f7f3aa91986aa27895ac83bf16358be21e03d3c94e77c43975231
|
@@ -0,0 +1,30 @@
|
|
1
|
+
name: Testing on Ubuntu
|
2
|
+
on:
|
3
|
+
- push
|
4
|
+
- pull_request
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ${{ matrix.os }}
|
8
|
+
strategy:
|
9
|
+
fail-fast: false
|
10
|
+
matrix:
|
11
|
+
ruby:
|
12
|
+
- 2.7
|
13
|
+
- 3.0
|
14
|
+
- 3.1
|
15
|
+
os:
|
16
|
+
- ubuntu-latest
|
17
|
+
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
18
|
+
steps:
|
19
|
+
- uses: actions/checkout@v2
|
20
|
+
- uses: ruby/setup-ruby@v1
|
21
|
+
with:
|
22
|
+
ruby-version: ${{ matrix.ruby }}
|
23
|
+
- name: unit testing
|
24
|
+
env:
|
25
|
+
CI: true
|
26
|
+
run: |
|
27
|
+
ruby -v
|
28
|
+
gem install bundler rake
|
29
|
+
bundle install --jobs 4 --retry 3
|
30
|
+
bundle exec rake test
|
@@ -0,0 +1,30 @@
|
|
1
|
+
name: Testing on Windows
|
2
|
+
on:
|
3
|
+
- push
|
4
|
+
- pull_request
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ${{ matrix.os }}
|
8
|
+
strategy:
|
9
|
+
fail-fast: false
|
10
|
+
matrix:
|
11
|
+
ruby:
|
12
|
+
- 2.7
|
13
|
+
- 3.0
|
14
|
+
- 3.1
|
15
|
+
os:
|
16
|
+
- windows-latest
|
17
|
+
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
18
|
+
steps:
|
19
|
+
- uses: actions/checkout@v2
|
20
|
+
- uses: ruby/setup-ruby@v1
|
21
|
+
with:
|
22
|
+
ruby-version: ${{ matrix.ruby }}
|
23
|
+
- name: unit testing
|
24
|
+
env:
|
25
|
+
CI: true
|
26
|
+
run: |
|
27
|
+
ruby -v
|
28
|
+
gem install bundler rake
|
29
|
+
bundle install --jobs 4 --retry 3
|
30
|
+
bundle exec rake test
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -23,6 +23,7 @@ OAuth flow for installed applications.
|
|
23
23
|
| v0.4.x | 0.12.x | 2.0 or later |
|
24
24
|
| v1.x.x | 0.14.x or later | 2.2 or later |
|
25
25
|
| v2.x.x | 0.14.x or later | 2.3 or later |
|
26
|
+
| v3.x.x | 1.x or later | 2.7 or later |
|
26
27
|
|
27
28
|
## With docker image
|
28
29
|
If you use official alpine based fluentd docker image (https://github.com/fluent/fluentd-docker-image),
|
@@ -52,7 +53,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
52
53
|
| auto_create_table | bool | no | no | false | If true, creates table automatically |
|
53
54
|
| ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
|
54
55
|
| schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
|
55
|
-
| schema_path | string | yes (either `fetch_schema`) |
|
56
|
+
| schema_path | string | yes (either `fetch_schema`) | yes | nil | Schema Definition file path. It is formatted by JSON. |
|
56
57
|
| fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
|
57
58
|
| fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
|
58
59
|
| schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
@@ -72,6 +73,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
72
73
|
| insert_id_field | string | no | no | nil | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor |
|
73
74
|
| add_insert_timestamp | string | no | no | nil | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
|
74
75
|
| allow_retry_insert_errors | bool | no | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
|
76
|
+
| require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. |
|
75
77
|
|
76
78
|
#### bigquery_load
|
77
79
|
|
@@ -379,10 +381,10 @@ format to construct table ids.
|
|
379
381
|
Table ids are formatted at runtime
|
380
382
|
using the chunk key time.
|
381
383
|
|
382
|
-
see.
|
384
|
+
see. https://docs.fluentd.org/configuration/buffer-section
|
383
385
|
|
384
386
|
For example, with the configuration below,
|
385
|
-
data is inserted into tables `
|
387
|
+
data is inserted into tables `accesslog_2014_08_02`, `accesslog_2014_08_03` and so on.
|
386
388
|
|
387
389
|
```apache
|
388
390
|
<match dummy>
|
@@ -392,7 +394,7 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
|
|
392
394
|
|
393
395
|
project yourproject_id
|
394
396
|
dataset yourdataset_id
|
395
|
-
table accesslog_%Y_%
|
397
|
+
table accesslog_%Y_%m_%d
|
396
398
|
|
397
399
|
<buffer time>
|
398
400
|
timekey 1d
|
@@ -401,6 +403,8 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
|
|
401
403
|
</match>
|
402
404
|
```
|
403
405
|
|
406
|
+
**NOTE: In current fluentd (v1.15.x), The maximum unit supported by strftime formatting is the granularity of days**
|
407
|
+
|
404
408
|
#### record attribute formatting
|
405
409
|
The format can be suffixed with attribute name.
|
406
410
|
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Requirements
|
2
|
+
|
3
|
+
Set Environment Variable
|
4
|
+
|
5
|
+
- GOOGLE_APPLICATION_CREDENTIALS (json key path)
|
6
|
+
- PROJECT_NAME
|
7
|
+
- DATASET_NAME
|
8
|
+
- TABLE_NAME
|
9
|
+
|
10
|
+
# How to use
|
11
|
+
|
12
|
+
1. execute `create_table.sh`
|
13
|
+
1. `bundle exec fluentd -c fluent.conf`
|
14
|
+
1. `bundle exec dummer -c dummer_insert.rb` or `bundle exec dummer -c dummer_load.rb`
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require "time"
|
2
|
+
|
3
|
+
configure "insert" do
|
4
|
+
host "localhost"
|
5
|
+
port 24224
|
6
|
+
rate 100
|
7
|
+
tag type: :string, any: %w(insert_data)
|
8
|
+
field :id, type: :integer, countup: true
|
9
|
+
field :string_field, type: :string, any: %w(str1 str2 str3 str4)
|
10
|
+
field :timestamp_field, type: :string, value: Time.now.iso8601
|
11
|
+
field :date, type: :string, value: Time.now.strftime("%Y-%m-%d")
|
12
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require "time"
|
2
|
+
|
3
|
+
configure "load" do
|
4
|
+
host "localhost"
|
5
|
+
port 24224
|
6
|
+
rate 100
|
7
|
+
tag type: :string, any: %w(load_data)
|
8
|
+
field :id, type: :integer, countup: true
|
9
|
+
field :string_field, type: :string, any: %w(str1 str2 str3 str4)
|
10
|
+
field :timestamp_field, type: :string, value: Time.now.iso8601
|
11
|
+
field :date, type: :string, value: Time.now.strftime("%Y-%m-%d")
|
12
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
<source>
|
2
|
+
@type forward
|
3
|
+
port 24224
|
4
|
+
bind 0.0.0.0
|
5
|
+
</source>
|
6
|
+
|
7
|
+
<match insert_data>
|
8
|
+
@id bigquery-insert-integration
|
9
|
+
@type bigquery_insert
|
10
|
+
|
11
|
+
allow_retry_insert_errors true
|
12
|
+
|
13
|
+
auth_method json_key
|
14
|
+
json_key "#{ENV["GOOGLE_APPLICATION_CREDENTIALS"]}"
|
15
|
+
|
16
|
+
<buffer>
|
17
|
+
@type file
|
18
|
+
|
19
|
+
chunk_limit_size 1m
|
20
|
+
chunk_limit_records 1500
|
21
|
+
total_limit_size 1g
|
22
|
+
path ./log/bigquery-insert-integration
|
23
|
+
|
24
|
+
flush_interval 30
|
25
|
+
flush_thread_count 4
|
26
|
+
flush_at_shutdown true
|
27
|
+
|
28
|
+
retry_max_times 14
|
29
|
+
retry_max_interval 30m
|
30
|
+
</buffer>
|
31
|
+
|
32
|
+
request_open_timeout_sec 2m
|
33
|
+
|
34
|
+
slow_flush_log_threshold 30.0
|
35
|
+
|
36
|
+
project "#{ENV["PROJECT_NAME"]}"
|
37
|
+
dataset "#{ENV["DATASET_NAME"]}"
|
38
|
+
table "#{ENV["TABLE_NAME"]}"
|
39
|
+
auto_create_table false
|
40
|
+
fetch_schema true
|
41
|
+
fetch_schema_table "#{ENV["TABLE_NAME"]}"
|
42
|
+
|
43
|
+
insert_id_field id
|
44
|
+
|
45
|
+
<secondary>
|
46
|
+
@type file
|
47
|
+
path ./log/bigquery-insert-integration.errors
|
48
|
+
</secondary>
|
49
|
+
</match>
|
50
|
+
|
51
|
+
<match load_data>
|
52
|
+
@id bigquery-load-integration
|
53
|
+
@type bigquery_load
|
54
|
+
|
55
|
+
auth_method json_key
|
56
|
+
json_key "#{ENV["GOOGLE_APPLICATION_CREDENTIALS"]}"
|
57
|
+
|
58
|
+
<buffer>
|
59
|
+
@type file
|
60
|
+
|
61
|
+
chunk_limit_size 1m
|
62
|
+
total_limit_size 1g
|
63
|
+
path ./log/bigquery-load-integration
|
64
|
+
|
65
|
+
flush_interval 120
|
66
|
+
flush_thread_count 4
|
67
|
+
flush_at_shutdown true
|
68
|
+
|
69
|
+
retry_max_times 14
|
70
|
+
retry_max_interval 30m
|
71
|
+
</buffer>
|
72
|
+
|
73
|
+
request_open_timeout_sec 2m
|
74
|
+
|
75
|
+
slow_flush_log_threshold 300.0
|
76
|
+
|
77
|
+
project "#{ENV["PROJECT_NAME"]}"
|
78
|
+
dataset "#{ENV["DATASET_NAME"]}"
|
79
|
+
table "#{ENV["TABLE_NAME"]}"
|
80
|
+
auto_create_table false
|
81
|
+
fetch_schema true
|
82
|
+
fetch_schema_table "#{ENV["TABLE_NAME"]}"
|
83
|
+
|
84
|
+
<secondary>
|
85
|
+
@type file
|
86
|
+
path ./log/bigquery-load-integration.errors
|
87
|
+
</secondary>
|
88
|
+
</match>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"name": "id",
|
4
|
+
"type": "INTEGER",
|
5
|
+
"mode": "REQUIRED"
|
6
|
+
},
|
7
|
+
{
|
8
|
+
"name": "string_field",
|
9
|
+
"type": "STRING",
|
10
|
+
"mode": "NULLABLE"
|
11
|
+
},
|
12
|
+
{
|
13
|
+
"name": "timestamp_field",
|
14
|
+
"type": "TIMESTAMP",
|
15
|
+
"mode": "NULLABLE"
|
16
|
+
},
|
17
|
+
{
|
18
|
+
"name": "date",
|
19
|
+
"type": "DATE",
|
20
|
+
"mode": "REQUIRED"
|
21
|
+
}
|
22
|
+
]
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Fluent
|
2
2
|
module BigQuery
|
3
3
|
class Writer
|
4
|
-
def initialize(log, auth_method, options
|
4
|
+
def initialize(log, auth_method, **options)
|
5
5
|
@auth_method = auth_method
|
6
6
|
@scope = "https://www.googleapis.com/auth/bigquery"
|
7
7
|
@options = options
|
@@ -35,8 +35,9 @@ module Fluent
|
|
35
35
|
}
|
36
36
|
|
37
37
|
definition.merge!(time_partitioning: time_partitioning) if time_partitioning
|
38
|
+
definition.merge!(require_partition_filter: require_partition_filter) if require_partition_filter
|
38
39
|
definition.merge!(clustering: clustering) if clustering
|
39
|
-
client.insert_table(project, dataset, definition, {})
|
40
|
+
client.insert_table(project, dataset, definition, **{})
|
40
41
|
log.debug "create table", project_id: project, dataset: dataset, table: table_id
|
41
42
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
42
43
|
message = e.message
|
@@ -82,7 +83,7 @@ module Fluent
|
|
82
83
|
if @options[:auto_create_table]
|
83
84
|
res = insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
|
84
85
|
else
|
85
|
-
res = client.insert_all_table_data(project, dataset, table_id, body, {})
|
86
|
+
res = client.insert_all_table_data(project, dataset, table_id, body, **{})
|
86
87
|
end
|
87
88
|
log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
|
88
89
|
|
@@ -157,10 +158,8 @@ module Fluent
|
|
157
158
|
res = client.insert_job(
|
158
159
|
project,
|
159
160
|
configuration,
|
160
|
-
|
161
|
-
|
162
|
-
content_type: "application/octet-stream",
|
163
|
-
}
|
161
|
+
upload_source: upload_source,
|
162
|
+
content_type: "application/octet-stream",
|
164
163
|
)
|
165
164
|
JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
|
166
165
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
@@ -318,6 +317,16 @@ module Fluent
|
|
318
317
|
end
|
319
318
|
end
|
320
319
|
|
320
|
+
def require_partition_filter
|
321
|
+
return @require_partition_filter if instance_variable_defined?(:@require_partition_filter)
|
322
|
+
|
323
|
+
if @options[:require_partition_filter]
|
324
|
+
@require_partition_filter = @options[:require_partition_filter]
|
325
|
+
else
|
326
|
+
@require_partition_filter
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
321
330
|
def clustering
|
322
331
|
return @clustering if instance_variable_defined?(:@clustering)
|
323
332
|
|
@@ -332,7 +341,7 @@ module Fluent
|
|
332
341
|
|
333
342
|
def insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
|
334
343
|
try_count ||= 1
|
335
|
-
res = client.insert_all_table_data(project, dataset, table_id, body, {})
|
344
|
+
res = client.insert_all_table_data(project, dataset, table_id, body, **{})
|
336
345
|
rescue Google::Apis::ClientError => e
|
337
346
|
if e.status_code == 404 && /Not Found: Table/i =~ e.message
|
338
347
|
if try_count == 1
|
@@ -111,9 +111,6 @@ module Fluent
|
|
111
111
|
if @schema
|
112
112
|
@table_schema.load_schema(@schema)
|
113
113
|
end
|
114
|
-
if @schema_path
|
115
|
-
@table_schema.load_schema(MultiJson.load(File.read(@schema_path)))
|
116
|
-
end
|
117
114
|
|
118
115
|
formatter_config = conf.elements("format")[0]
|
119
116
|
@formatter = formatter_create(usage: 'out_bigquery_for_insert', default_type: 'json', conf: formatter_config)
|
@@ -126,6 +123,7 @@ module Fluent
|
|
126
123
|
@tables_mutex = Mutex.new
|
127
124
|
@fetched_schemas = {}
|
128
125
|
@last_fetch_schema_time = Hash.new(0)
|
126
|
+
@read_schemas = {}
|
129
127
|
end
|
130
128
|
|
131
129
|
def multi_workers_ready?
|
@@ -133,7 +131,7 @@ module Fluent
|
|
133
131
|
end
|
134
132
|
|
135
133
|
def writer
|
136
|
-
@writer ||= Fluent::BigQuery::Writer.new(@log, @auth_method,
|
134
|
+
@writer ||= Fluent::BigQuery::Writer.new(@log, @auth_method,
|
137
135
|
private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
|
138
136
|
email: @email,
|
139
137
|
json_key: @json_key,
|
@@ -148,19 +146,27 @@ module Fluent
|
|
148
146
|
time_partitioning_type: @time_partitioning_type,
|
149
147
|
time_partitioning_field: @time_partitioning_field,
|
150
148
|
time_partitioning_expiration: @time_partitioning_expiration,
|
149
|
+
require_partition_filter: @require_partition_filter,
|
151
150
|
clustering_fields: @clustering_fields,
|
152
151
|
timeout_sec: @request_timeout_sec,
|
153
152
|
open_timeout_sec: @request_open_timeout_sec,
|
154
|
-
|
153
|
+
)
|
155
154
|
end
|
156
155
|
|
157
156
|
def format(tag, time, record)
|
157
|
+
if record.nil?
|
158
|
+
log.warn("nil record detected. corrupted chunks? tag=#{tag}, time=#{time}")
|
159
|
+
return
|
160
|
+
end
|
161
|
+
|
158
162
|
record = inject_values_to_record(tag, time, record)
|
159
163
|
|
160
164
|
meta = metadata(tag, time, record)
|
161
165
|
schema =
|
162
166
|
if @fetch_schema
|
163
167
|
fetch_schema(meta)
|
168
|
+
elsif @schema_path
|
169
|
+
read_schema(meta)
|
164
170
|
else
|
165
171
|
@table_schema
|
166
172
|
end
|
@@ -209,9 +215,26 @@ module Fluent
|
|
209
215
|
extract_placeholders(@fetch_schema_table || @tablelist[0], metadata)
|
210
216
|
end
|
211
217
|
|
218
|
+
def read_schema(metadata)
|
219
|
+
schema_path = read_schema_target_path(metadata)
|
220
|
+
|
221
|
+
unless @read_schemas[schema_path]
|
222
|
+
table_schema = Fluent::BigQuery::RecordSchema.new("record")
|
223
|
+
table_schema.load_schema(MultiJson.load(File.read(schema_path)))
|
224
|
+
@read_schemas[schema_path] = table_schema
|
225
|
+
end
|
226
|
+
@read_schemas[schema_path]
|
227
|
+
end
|
228
|
+
|
229
|
+
def read_schema_target_path(metadata)
|
230
|
+
extract_placeholders(@schema_path, metadata)
|
231
|
+
end
|
232
|
+
|
212
233
|
def get_schema(project, dataset, metadata)
|
213
234
|
if @fetch_schema
|
214
235
|
@fetched_schemas["#{project}.#{dataset}.#{fetch_schema_target_table(metadata)}"] || fetch_schema(metadata)
|
236
|
+
elsif @schema_path
|
237
|
+
@read_schemas[read_schema_target_path(metadata)] || read_schema(metadata)
|
215
238
|
else
|
216
239
|
@table_schema
|
217
240
|
end
|
@@ -29,6 +29,9 @@ module Fluent
|
|
29
29
|
# If insert_id_field is not specified, true means to allow duplicate rows
|
30
30
|
config_param :allow_retry_insert_errors, :bool, default: false
|
31
31
|
|
32
|
+
## RequirePartitionFilter
|
33
|
+
config_param :require_partition_filter, :bool, default: false
|
34
|
+
|
32
35
|
## Buffer
|
33
36
|
config_section :buffer do
|
34
37
|
config_set_default :@type, "memory"
|
@@ -93,6 +96,8 @@ module Fluent
|
|
93
96
|
schema = get_schema(project, dataset, metadata)
|
94
97
|
|
95
98
|
insert(project, dataset, table_id, rows, schema, template_suffix)
|
99
|
+
rescue MultiJson::ParseError => e
|
100
|
+
raise Fluent::UnrecoverableError.new(e)
|
96
101
|
end
|
97
102
|
|
98
103
|
def insert(project, dataset, table_id, rows, schema, template_suffix)
|
@@ -147,33 +147,6 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
|
|
147
147
|
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
148
148
|
end
|
149
149
|
|
150
|
-
def test_configure_auth_json_key_as_file_raise_permission_error
|
151
|
-
json_key_path = 'test/plugin/testdata/json_key.json'
|
152
|
-
json_key_path_dir = File.dirname(json_key_path)
|
153
|
-
|
154
|
-
begin
|
155
|
-
File.chmod(0000, json_key_path_dir)
|
156
|
-
|
157
|
-
driver = create_driver(%[
|
158
|
-
table foo
|
159
|
-
auth_method json_key
|
160
|
-
json_key #{json_key_path}
|
161
|
-
project yourproject_id
|
162
|
-
dataset yourdataset_id
|
163
|
-
schema [
|
164
|
-
{"name": "time", "type": "INTEGER"},
|
165
|
-
{"name": "status", "type": "INTEGER"},
|
166
|
-
{"name": "bytes", "type": "INTEGER"}
|
167
|
-
]
|
168
|
-
])
|
169
|
-
assert_raises(Errno::EACCES) do
|
170
|
-
driver.instance.writer.client
|
171
|
-
end
|
172
|
-
ensure
|
173
|
-
File.chmod(0755, json_key_path_dir)
|
174
|
-
end
|
175
|
-
end
|
176
|
-
|
177
150
|
def test_configure_auth_json_key_as_string
|
178
151
|
json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
|
179
152
|
json_key_io = StringIO.new(json_key)
|
@@ -199,6 +172,8 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
|
|
199
172
|
end
|
200
173
|
|
201
174
|
def test_configure_auth_application_default
|
175
|
+
omit "This testcase depends on some environment variables." if ENV["CI"] == "true"
|
176
|
+
|
202
177
|
driver = create_driver(%[
|
203
178
|
table foo
|
204
179
|
auth_method application_default
|
@@ -576,4 +551,24 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
|
|
576
551
|
assert_equal :string, table_schema["argv"].type
|
577
552
|
assert_equal :repeated, table_schema["argv"].mode
|
578
553
|
end
|
554
|
+
|
555
|
+
def test_resolve_schema_path_with_placeholder
|
556
|
+
now = Time.now.to_i
|
557
|
+
driver = create_driver(<<-CONFIG)
|
558
|
+
table ${tag}_%Y%m%d
|
559
|
+
auth_method json_key
|
560
|
+
json_key jsonkey.josn
|
561
|
+
project yourproject_id
|
562
|
+
dataset yourdataset_id
|
563
|
+
schema_path ${tag}.schema
|
564
|
+
|
565
|
+
<buffer tag, time>
|
566
|
+
timekey 1d
|
567
|
+
</buffer>
|
568
|
+
CONFIG
|
569
|
+
|
570
|
+
metadata = Fluent::Plugin::Buffer::Metadata.new(now, "foo", {})
|
571
|
+
|
572
|
+
assert_equal "foo.schema", driver.instance.read_schema_target_path(metadata)
|
573
|
+
end
|
579
574
|
end
|
@@ -5,6 +5,19 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
5
5
|
Fluent::Test.setup
|
6
6
|
end
|
7
7
|
|
8
|
+
def is_ruby2?
|
9
|
+
RUBY_VERSION.to_i < 3
|
10
|
+
end
|
11
|
+
|
12
|
+
def build_args(args)
|
13
|
+
if is_ruby2?
|
14
|
+
args << {}
|
15
|
+
end
|
16
|
+
args
|
17
|
+
end
|
18
|
+
|
19
|
+
SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "apache.schema")
|
20
|
+
|
8
21
|
CONFIG = %[
|
9
22
|
table foo
|
10
23
|
email foo@bar.example
|
@@ -121,11 +134,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
121
134
|
driver = create_driver
|
122
135
|
|
123
136
|
stub_writer do |writer|
|
124
|
-
|
137
|
+
args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
|
125
138
|
rows: [{json: hash_including(entry)}],
|
126
139
|
skip_invalid_rows: false,
|
127
140
|
ignore_unknown_values: false
|
128
|
-
}
|
141
|
+
}])
|
142
|
+
mock(writer.client).insert_all_table_data(*args) do
|
129
143
|
s = stub!
|
130
144
|
s.insert_errors { nil }
|
131
145
|
s
|
@@ -186,11 +200,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
186
200
|
|
187
201
|
entry = {a: "b"}
|
188
202
|
stub_writer do |writer|
|
189
|
-
|
203
|
+
args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
|
190
204
|
rows: [{json: hash_including(entry)}],
|
191
205
|
skip_invalid_rows: false,
|
192
206
|
ignore_unknown_values: false
|
193
|
-
}
|
207
|
+
}])
|
208
|
+
mock(writer.client).insert_all_table_data(*args) do
|
194
209
|
ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
|
195
210
|
raise ex
|
196
211
|
end
|
@@ -245,11 +260,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
245
260
|
|
246
261
|
entry = {a: "b"}
|
247
262
|
stub_writer do |writer|
|
248
|
-
|
263
|
+
args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
|
249
264
|
rows: [{json: hash_including(entry)}],
|
250
265
|
skip_invalid_rows: false,
|
251
266
|
ignore_unknown_values: false
|
252
|
-
}
|
267
|
+
}])
|
268
|
+
mock(writer.client).insert_all_table_data(*args) do
|
253
269
|
ex = Google::Apis::ServerError.new("error", status_code: 501)
|
254
270
|
def ex.reason
|
255
271
|
"invalid"
|
@@ -260,14 +276,14 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
260
276
|
|
261
277
|
driver.instance_start
|
262
278
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
263
|
-
metadata =
|
279
|
+
metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
|
264
280
|
chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
|
265
281
|
c.append([driver.instance.format(tag, time, record)])
|
266
282
|
end
|
267
283
|
assert_raise Fluent::BigQuery::UnRetryableError do
|
268
284
|
driver.instance.write(chunk)
|
269
285
|
end
|
270
|
-
assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.
|
286
|
+
assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.2
|
271
287
|
driver.instance_shutdown
|
272
288
|
end
|
273
289
|
|
@@ -290,11 +306,15 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
290
306
|
CONFIG
|
291
307
|
|
292
308
|
stub_writer do |writer|
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
309
|
+
args = ['yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
|
310
|
+
rows: [entry[0]],
|
311
|
+
skip_invalid_rows: false,
|
312
|
+
ignore_unknown_values: false
|
313
|
+
}]
|
314
|
+
if RUBY_VERSION.to_i < 3
|
315
|
+
args << {}
|
316
|
+
end
|
317
|
+
mock(writer.client).insert_all_table_data(*args) { stub!.insert_errors { nil } }
|
298
318
|
end
|
299
319
|
|
300
320
|
driver.run do
|
@@ -344,25 +364,29 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
344
364
|
schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
|
345
365
|
CONFIG
|
346
366
|
|
367
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
368
|
+
|
347
369
|
stub_writer do |writer|
|
348
370
|
body = {
|
349
371
|
rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
|
350
372
|
skip_invalid_rows: false,
|
351
373
|
ignore_unknown_values: false,
|
352
374
|
}
|
353
|
-
|
375
|
+
args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
|
376
|
+
mock(writer.client).insert_all_table_data(*args) do
|
354
377
|
raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
|
355
378
|
end.at_least(1)
|
356
379
|
mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
|
357
380
|
|
358
|
-
|
381
|
+
args = build_args(['yourproject_id', 'yourdataset_id', {
|
359
382
|
table_reference: {
|
360
383
|
table_id: 'foo',
|
361
384
|
},
|
362
385
|
schema: {
|
363
|
-
fields:
|
386
|
+
fields: schema_fields,
|
364
387
|
},
|
365
|
-
}
|
388
|
+
}])
|
389
|
+
mock(writer.client).insert_table(*args)
|
366
390
|
end
|
367
391
|
|
368
392
|
assert_raise(RuntimeError) do
|
@@ -416,32 +440,39 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
416
440
|
time_partitioning_type day
|
417
441
|
time_partitioning_field time
|
418
442
|
time_partitioning_expiration 1h
|
443
|
+
|
444
|
+
require_partition_filter true
|
419
445
|
CONFIG
|
420
446
|
|
447
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
448
|
+
|
421
449
|
stub_writer do |writer|
|
422
450
|
body = {
|
423
451
|
rows: [message],
|
424
452
|
skip_invalid_rows: false,
|
425
453
|
ignore_unknown_values: false,
|
426
454
|
}
|
427
|
-
|
455
|
+
args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
|
456
|
+
mock(writer.client).insert_all_table_data(*args) do
|
428
457
|
raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
|
429
458
|
end.at_least(1)
|
430
459
|
mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
|
431
460
|
|
432
|
-
|
461
|
+
args = build_args(['yourproject_id', 'yourdataset_id', {
|
433
462
|
table_reference: {
|
434
463
|
table_id: 'foo',
|
435
464
|
},
|
436
465
|
schema: {
|
437
|
-
fields:
|
466
|
+
fields: schema_fields,
|
438
467
|
},
|
439
468
|
time_partitioning: {
|
440
469
|
type: 'DAY',
|
441
470
|
field: 'time',
|
442
471
|
expiration_ms: 3600000,
|
443
472
|
},
|
444
|
-
|
473
|
+
require_partition_filter: true,
|
474
|
+
}])
|
475
|
+
mock(writer.client).insert_table(*args)
|
445
476
|
end
|
446
477
|
|
447
478
|
assert_raise(RuntimeError) do
|
@@ -495,7 +526,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
495
526
|
time_partitioning_type day
|
496
527
|
time_partitioning_field time
|
497
528
|
time_partitioning_expiration 1h
|
498
|
-
time_partitioning_require_partition_filter true
|
499
529
|
|
500
530
|
clustering_fields [
|
501
531
|
"time",
|
@@ -503,23 +533,26 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
503
533
|
]
|
504
534
|
CONFIG
|
505
535
|
|
536
|
+
schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
|
537
|
+
|
506
538
|
stub_writer do |writer|
|
507
539
|
body = {
|
508
540
|
rows: [message],
|
509
541
|
skip_invalid_rows: false,
|
510
542
|
ignore_unknown_values: false,
|
511
543
|
}
|
512
|
-
|
544
|
+
args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
|
545
|
+
mock(writer.client).insert_all_table_data(*args) do
|
513
546
|
raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
|
514
547
|
end.at_least(1)
|
515
548
|
mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
|
516
549
|
|
517
|
-
|
550
|
+
args = build_args(['yourproject_id', 'yourdataset_id', {
|
518
551
|
table_reference: {
|
519
552
|
table_id: 'foo',
|
520
553
|
},
|
521
554
|
schema: {
|
522
|
-
fields:
|
555
|
+
fields: schema_fields,
|
523
556
|
},
|
524
557
|
time_partitioning: {
|
525
558
|
type: 'DAY',
|
@@ -532,7 +565,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
|
|
532
565
|
'vhost',
|
533
566
|
],
|
534
567
|
},
|
535
|
-
}
|
568
|
+
}])
|
569
|
+
mock(writer.client).insert_table(*args)
|
536
570
|
end
|
537
571
|
|
538
572
|
assert_raise(RuntimeError) do
|
@@ -64,7 +64,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
64
64
|
max_bad_records: 0,
|
65
65
|
}
|
66
66
|
}
|
67
|
-
},
|
67
|
+
}, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
|
68
68
|
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
69
69
|
end
|
70
70
|
end
|
@@ -117,7 +117,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
117
117
|
},
|
118
118
|
},
|
119
119
|
job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
|
120
|
-
},
|
120
|
+
}, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
|
121
121
|
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
122
122
|
end
|
123
123
|
end
|
@@ -132,7 +132,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
132
132
|
|
133
133
|
driver.instance_start
|
134
134
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
135
|
-
metadata =
|
135
|
+
metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
|
136
136
|
chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
|
137
137
|
c.append([driver.instance.format(tag, time, record)])
|
138
138
|
end
|
@@ -154,11 +154,11 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
154
154
|
max_bad_records: 0,
|
155
155
|
}
|
156
156
|
}
|
157
|
-
},
|
157
|
+
}, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
|
158
158
|
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
159
159
|
end
|
160
160
|
|
161
|
-
mock(writer.client).get_job('yourproject_id', 'dummy_job_id',
|
161
|
+
mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
|
162
162
|
stub! do |s|
|
163
163
|
s.id { 'dummy_job_id' }
|
164
164
|
s.configuration.stub! do |_s|
|
@@ -215,7 +215,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
215
215
|
|
216
216
|
driver.instance_start
|
217
217
|
tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
|
218
|
-
metadata =
|
218
|
+
metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
|
219
219
|
chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
|
220
220
|
c.append([driver.instance.format(tag, time, record)])
|
221
221
|
end
|
@@ -237,11 +237,11 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
237
237
|
max_bad_records: 0,
|
238
238
|
}
|
239
239
|
}
|
240
|
-
},
|
240
|
+
}, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
|
241
241
|
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
242
242
|
end
|
243
243
|
|
244
|
-
mock(writer.client).get_job('yourproject_id', 'dummy_job_id',
|
244
|
+
mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
|
245
245
|
stub! do |s|
|
246
246
|
s.id { 'dummy_job_id' }
|
247
247
|
s.configuration.stub! do |_s|
|
@@ -317,7 +317,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
|
|
317
317
|
},
|
318
318
|
}
|
319
319
|
}
|
320
|
-
},
|
320
|
+
}, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
|
321
321
|
stub!.job_reference.stub!.job_id { "dummy_job_id" }
|
322
322
|
end
|
323
323
|
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
8
8
|
- joker1007
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-10-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -139,14 +139,21 @@ extensions: []
|
|
139
139
|
extra_rdoc_files: []
|
140
140
|
files:
|
141
141
|
- ".github/ISSUE_TEMPLATE.md"
|
142
|
+
- ".github/workflows/linux.yml"
|
143
|
+
- ".github/workflows/windows.yml"
|
142
144
|
- ".gitignore"
|
143
|
-
- ".travis.yml"
|
144
145
|
- Gemfile
|
145
146
|
- LICENSE.txt
|
146
147
|
- README.md
|
147
148
|
- Rakefile
|
148
149
|
- fluent-plugin-bigquery.gemspec
|
149
150
|
- gemfiles/activesupport-4.gemfile
|
151
|
+
- integration/README.md
|
152
|
+
- integration/create_table.sh
|
153
|
+
- integration/dummer_insert.rb
|
154
|
+
- integration/dummer_load.rb
|
155
|
+
- integration/fluent.conf
|
156
|
+
- integration/schema.json
|
150
157
|
- lib/fluent/plugin/bigquery/errors.rb
|
151
158
|
- lib/fluent/plugin/bigquery/helper.rb
|
152
159
|
- lib/fluent/plugin/bigquery/schema.rb
|
@@ -168,7 +175,7 @@ homepage: https://github.com/kaizenplatform/fluent-plugin-bigquery
|
|
168
175
|
licenses:
|
169
176
|
- Apache-2.0
|
170
177
|
metadata: {}
|
171
|
-
post_install_message:
|
178
|
+
post_install_message:
|
172
179
|
rdoc_options: []
|
173
180
|
require_paths:
|
174
181
|
- lib
|
@@ -183,8 +190,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
190
|
- !ruby/object:Gem::Version
|
184
191
|
version: '0'
|
185
192
|
requirements: []
|
186
|
-
rubygems_version: 3.
|
187
|
-
signing_key:
|
193
|
+
rubygems_version: 3.3.7
|
194
|
+
signing_key:
|
188
195
|
specification_version: 4
|
189
196
|
summary: Fluentd plugin to store data on Google BigQuery
|
190
197
|
test_files:
|