fluent-plugin-bigquery 1.2.0 → 2.0.0.beta
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -9
- data/README.md +68 -65
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/bigquery/writer.rb +45 -39
- data/lib/fluent/plugin/out_bigquery_base.rb +211 -0
- data/lib/fluent/plugin/out_bigquery_insert.rb +131 -0
- data/lib/fluent/plugin/out_bigquery_load.rb +220 -0
- data/test/helper.rb +3 -1
- data/test/plugin/test_out_bigquery_base.rb +579 -0
- data/test/plugin/test_out_bigquery_insert.rb +420 -0
- data/test/plugin/test_out_bigquery_load.rb +310 -0
- metadata +13 -7
- data/lib/fluent/plugin/out_bigquery.rb +0 -500
- data/test/plugin/test_out_bigquery.rb +0 -1276
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '07998acf05ddb3e647da13a4b5c734dc16f8cc77'
|
4
|
+
data.tar.gz: 1fce9fc906cbf72083a4f8132c0ac1d985a95d6d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04cfd6d3080d9424e25bd75ae1a9600259fe94ed933adceab66c02eb11afdb49eeddc393c305f0927dd64f967d1e72835fde9566cd54b2e53805e85ffe7a1516
|
7
|
+
data.tar.gz: 8de74527cf12be2c6553e4a582cc25c47a1773cdc165800f212aae563f7ffa048679260515a51f55e244b641b968badeaa4349cf4369ad2363d22aff1c1cbe7d
|
data/.travis.yml
CHANGED
@@ -1,20 +1,13 @@
|
|
1
1
|
language: ruby
|
2
2
|
|
3
3
|
rvm:
|
4
|
-
- 2.1
|
5
4
|
- 2.2
|
6
5
|
- 2.3.3
|
6
|
+
- 2.4.3
|
7
|
+
- 2.5.0
|
7
8
|
|
8
9
|
gemfile:
|
9
10
|
- Gemfile
|
10
|
-
- gemfiles/activesupport-4.gemfile
|
11
|
-
|
12
|
-
matrix:
|
13
|
-
exclude:
|
14
|
-
- rvm: 2.0
|
15
|
-
gemfile: Gemfile
|
16
|
-
- rvm: 2.1
|
17
|
-
gemfile: Gemfile
|
18
11
|
|
19
12
|
before_install:
|
20
13
|
- gem update bundler
|
data/README.md
CHANGED
@@ -1,13 +1,17 @@
|
|
1
1
|
# fluent-plugin-bigquery
|
2
2
|
|
3
|
+
**This README is for v2.0.0.beta. but it is not released yet. sorry.**
|
4
|
+
|
3
5
|
[Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
|
4
6
|
|
5
|
-
- **Plugin type**:
|
7
|
+
- **Plugin type**: Output
|
6
8
|
|
7
9
|
* insert data over streaming inserts
|
10
|
+
* plugin type is `bigquery_insert`
|
8
11
|
* for continuous real-time insertions
|
9
12
|
* https://developers.google.com/bigquery/streaming-data-into-bigquery#usecases
|
10
13
|
* load data
|
14
|
+
* plugin type is `bigquery_load`
|
11
15
|
* for data loading as batch jobs, for big amount of data
|
12
16
|
* https://developers.google.com/bigquery/loading-data-into-bigquery
|
13
17
|
|
@@ -31,42 +35,47 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
31
35
|
|
32
36
|
### Options
|
33
37
|
|
34
|
-
|
35
|
-
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
| auto_create_table | bool | no | no | false | If true, creates table automatically
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
69
|
-
|
38
|
+
#### common
|
39
|
+
|
40
|
+
| name | type | required? | placeholder? | default | description |
|
41
|
+
| :------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
42
|
+
| auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
|
43
|
+
| email | string | yes (private_key) | no | nil | GCP Service Account Email |
|
44
|
+
| private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
|
45
|
+
| private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
|
46
|
+
| json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
|
47
|
+
| project | string | yes | yes | nil | |
|
48
|
+
| dataset | string | yes | yes | nil | |
|
49
|
+
| table | string | yes (either `tables`) | yes | nil | |
|
50
|
+
| tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
|
51
|
+
| auto_create_table | bool | no | no | false | If true, creates table automatically |
|
52
|
+
| ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
|
53
|
+
| schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
|
54
|
+
| schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
|
55
|
+
| fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
|
56
|
+
| fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
|
57
|
+
| schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
58
|
+
| request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
|
59
|
+
| request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
|
60
|
+
| time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
|
61
|
+
| time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
|
62
|
+
|
63
|
+
#### bigquery_insert
|
64
|
+
|
65
|
+
| name | type | required? | placeholder? | default | description |
|
66
|
+
| :------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
67
|
+
| template_suffix | string | no | yes | nil | can use `%{time_slice}` placeholder replaced by `time_slice_format` |
|
68
|
+
| skip_invalid_rows | bool | no | no | false | |
|
69
|
+
| insert_id_field | string | no | no | nil | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor |
|
70
|
+
| add_insert_timestamp | string | no | no | nil | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
|
71
|
+
| allow_retry_insert_errors | bool | no | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
|
72
|
+
|
73
|
+
#### bigquery_load
|
74
|
+
|
75
|
+
| name | type | required? | placeholder? | default | description |
|
76
|
+
| :------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
77
|
+
| source_format | enum | no | no | json | Specify source format `json` or `csv` or `avro`. If you change this parameter, you must change formatter plugin via `<format>` config section. |
|
78
|
+
| max_bad_records | integer | no | no | 0 | If the number of bad records exceeds this value, an invalid error is returned in the job result. |
|
70
79
|
|
71
80
|
### Buffer section
|
72
81
|
|
@@ -77,9 +86,9 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
77
86
|
| total_limit_size | integer | no | 1GB (insert) or 32GB (load) | |
|
78
87
|
| chunk_records_limit | integer | no | 500 (insert) or nil (load) | |
|
79
88
|
| flush_mode | enum | no | interval | default, lazy, interval, immediate |
|
80
|
-
| flush_interval | float | no | 0
|
81
|
-
| flush_thread_interval | float | no | 0.05 (insert) or
|
82
|
-
| flush_thread_burst_interval | float | no | 0.05 (insert) or
|
89
|
+
| flush_interval | float | no | 1.0 (insert) or 3600 (load) | |
|
90
|
+
| flush_thread_interval | float | no | 0.05 (insert) or 5 (load) | |
|
91
|
+
| flush_thread_burst_interval | float | no | 0.05 (insert) or 5 (load) | |
|
83
92
|
|
84
93
|
And, other params (defined by base class) are available
|
85
94
|
|
@@ -142,9 +151,7 @@ Configure insert specifications with target table schema, with your credentials.
|
|
142
151
|
|
143
152
|
```apache
|
144
153
|
<match dummy>
|
145
|
-
@type
|
146
|
-
|
147
|
-
method insert # default
|
154
|
+
@type bigquery_insert
|
148
155
|
|
149
156
|
auth_method private_key # default
|
150
157
|
email xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxx@developer.gserviceaccount.com
|
@@ -181,14 +188,12 @@ For high rate inserts over streaming inserts, you should specify flush intervals
|
|
181
188
|
|
182
189
|
```apache
|
183
190
|
<match dummy>
|
184
|
-
@type
|
185
|
-
|
186
|
-
method insert # default
|
191
|
+
@type bigquery_insert
|
187
192
|
|
188
193
|
<buffer>
|
189
194
|
flush_interval 0.1 # flush as frequent as possible
|
190
195
|
|
191
|
-
|
196
|
+
total_limit_size 10g
|
192
197
|
|
193
198
|
flush_thread_count 16
|
194
199
|
</buffer>
|
@@ -256,16 +261,12 @@ section in the Google BigQuery document.
|
|
256
261
|
### Load
|
257
262
|
```apache
|
258
263
|
<match bigquery>
|
259
|
-
@type
|
260
|
-
|
261
|
-
method load
|
264
|
+
@type bigquery_load
|
262
265
|
|
263
266
|
<buffer>
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
flush_at_shutdown true
|
268
|
-
timekey_use_utc
|
267
|
+
path bigquery.*.buffer
|
268
|
+
flush_at_shutdown true
|
269
|
+
timekey_use_utc
|
269
270
|
</buffer>
|
270
271
|
|
271
272
|
auth_method json_key
|
@@ -302,7 +303,7 @@ download its JSON key and deploy the key with fluentd.
|
|
302
303
|
|
303
304
|
```apache
|
304
305
|
<match dummy>
|
305
|
-
@type
|
306
|
+
@type bigquery_insert
|
306
307
|
|
307
308
|
auth_method json_key
|
308
309
|
json_key /home/username/.keys/00000000000000000000000000000000-jsonkey.json
|
@@ -319,7 +320,7 @@ You need to only include `private_key` and `client_email` key from JSON key file
|
|
319
320
|
|
320
321
|
```apache
|
321
322
|
<match dummy>
|
322
|
-
@type
|
323
|
+
@type bigquery_insert
|
323
324
|
|
324
325
|
auth_method json_key
|
325
326
|
json_key {"private_key": "-----BEGIN PRIVATE KEY-----\n...", "client_email": "xxx@developer.gserviceaccount.com"}
|
@@ -340,7 +341,7 @@ Compute Engine instance, then you can configure fluentd like this.
|
|
340
341
|
|
341
342
|
```apache
|
342
343
|
<match dummy>
|
343
|
-
@type
|
344
|
+
@type bigquery_insert
|
344
345
|
|
345
346
|
auth_method compute_engine
|
346
347
|
|
@@ -382,7 +383,7 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
|
|
382
383
|
|
383
384
|
```apache
|
384
385
|
<match dummy>
|
385
|
-
@type
|
386
|
+
@type bigquery_insert
|
386
387
|
|
387
388
|
...
|
388
389
|
|
@@ -430,7 +431,7 @@ Use placeholder.
|
|
430
431
|
|
431
432
|
```apache
|
432
433
|
<match dummy>
|
433
|
-
@type
|
434
|
+
@type bigquery_insert
|
434
435
|
|
435
436
|
...
|
436
437
|
table accesslog$%Y%m%d
|
@@ -453,7 +454,7 @@ NOTE: `auto_create_table` option cannot be used with `fetch_schema`. You should
|
|
453
454
|
|
454
455
|
```apache
|
455
456
|
<match dummy>
|
456
|
-
@type
|
457
|
+
@type bigquery_insert
|
457
458
|
|
458
459
|
...
|
459
460
|
|
@@ -477,7 +478,7 @@ you can also specify nested fields by prefixing their belonging record fields.
|
|
477
478
|
|
478
479
|
```apache
|
479
480
|
<match dummy>
|
480
|
-
@type
|
481
|
+
@type bigquery_insert
|
481
482
|
|
482
483
|
...
|
483
484
|
|
@@ -528,7 +529,7 @@ The second method is to specify a path to a BigQuery schema file instead of list
|
|
528
529
|
|
529
530
|
```apache
|
530
531
|
<match dummy>
|
531
|
-
@type
|
532
|
+
@type bigquery_insert
|
532
533
|
|
533
534
|
...
|
534
535
|
|
@@ -541,7 +542,7 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
|
|
541
542
|
|
542
543
|
```apache
|
543
544
|
<match dummy>
|
544
|
-
@type
|
545
|
+
@type bigquery_insert
|
545
546
|
|
546
547
|
...
|
547
548
|
|
@@ -559,10 +560,12 @@ you are still recommended to specify JSON types for TIMESTAMP fields as "time" f
|
|
559
560
|
|
560
561
|
BigQuery uses `insertId` property to detect duplicate insertion requests (see [data consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency) in Google BigQuery documents).
|
561
562
|
You can set `insert_id_field` option to specify the field to use as `insertId` property.
|
563
|
+
`insert_id_field` can use fluentd record_accessor format like `$['key1'][0]['key2']`.
|
564
|
+
(detail. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor)
|
562
565
|
|
563
566
|
```apache
|
564
567
|
<match dummy>
|
565
|
-
@type
|
568
|
+
@type bigquery_insert
|
566
569
|
|
567
570
|
...
|
568
571
|
|
@@ -7,22 +7,15 @@ module Fluent
|
|
7
7
|
@options = options
|
8
8
|
@log = log
|
9
9
|
@num_errors_per_chunk = {}
|
10
|
-
|
11
|
-
@cached_client_expiration = Time.now + 1800
|
12
10
|
end
|
13
11
|
|
14
12
|
def client
|
15
|
-
|
16
|
-
|
17
|
-
client = Google::Apis::BigqueryV2::BigqueryService.new.tap do |cl|
|
13
|
+
@client ||= Google::Apis::BigqueryV2::BigqueryService.new.tap do |cl|
|
18
14
|
cl.authorization = get_auth
|
19
15
|
cl.client_options.open_timeout_sec = @options[:open_timeout_sec] if @options[:open_timeout_sec]
|
20
16
|
cl.client_options.read_timeout_sec = @options[:timeout_sec] if @options[:timeout_sec]
|
21
17
|
cl.client_options.send_timeout_sec = @options[:timeout_sec] if @options[:timeout_sec]
|
22
18
|
end
|
23
|
-
|
24
|
-
@cached_client_expiration = Time.now + 1800
|
25
|
-
@client = client
|
26
19
|
end
|
27
20
|
|
28
21
|
def create_table(project, dataset, table_id, record_schema)
|
@@ -49,10 +42,7 @@ module Fluent
|
|
49
42
|
end
|
50
43
|
client.insert_table(project, dataset, definition, {})
|
51
44
|
log.debug "create table", project_id: project, dataset: dataset, table: table_id
|
52
|
-
@client = nil
|
53
45
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
54
|
-
@client = nil
|
55
|
-
|
56
46
|
message = e.message
|
57
47
|
if e.status_code == 409 && /Already Exists:/ =~ message
|
58
48
|
log.debug "already created table", project_id: project, dataset: dataset, table: table_id
|
@@ -81,7 +71,6 @@ module Fluent
|
|
81
71
|
|
82
72
|
schema
|
83
73
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
84
|
-
@client = nil
|
85
74
|
message = e.message
|
86
75
|
log.error "tables.get API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message
|
87
76
|
nil
|
@@ -111,8 +100,6 @@ module Fluent
|
|
111
100
|
end
|
112
101
|
end
|
113
102
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
114
|
-
@client = nil
|
115
|
-
|
116
103
|
reason = e.respond_to?(:reason) ? e.reason : nil
|
117
104
|
error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason }
|
118
105
|
wrapped = Fluent::BigQuery::Error.wrap(e)
|
@@ -125,7 +112,17 @@ module Fluent
|
|
125
112
|
raise wrapped
|
126
113
|
end
|
127
114
|
|
128
|
-
|
115
|
+
JobReference = Struct.new(:chunk_id, :chunk_id_hex, :project_id, :dataset_id, :table_id, :job_id) do
|
116
|
+
def as_hash(*keys)
|
117
|
+
if keys.empty?
|
118
|
+
to_h
|
119
|
+
else
|
120
|
+
to_h.select { |k, _| keys.include?(k) }
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def create_load_job(chunk_id, chunk_id_hex, project, dataset, table_id, upload_source, fields)
|
129
126
|
configuration = {
|
130
127
|
configuration: {
|
131
128
|
load: {
|
@@ -145,7 +142,7 @@ module Fluent
|
|
145
142
|
}
|
146
143
|
}
|
147
144
|
|
148
|
-
job_id = create_job_id(
|
145
|
+
job_id = create_job_id(chunk_id_hex, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
|
149
146
|
configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
|
150
147
|
configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
|
151
148
|
|
@@ -167,11 +164,8 @@ module Fluent
|
|
167
164
|
content_type: "application/octet-stream",
|
168
165
|
}
|
169
166
|
)
|
170
|
-
|
171
|
-
@num_errors_per_chunk.delete(chunk_id)
|
167
|
+
JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
|
172
168
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
173
|
-
@client = nil
|
174
|
-
|
175
169
|
reason = e.respond_to?(:reason) ? e.reason : nil
|
176
170
|
log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
|
177
171
|
|
@@ -187,44 +181,56 @@ module Fluent
|
|
187
181
|
end
|
188
182
|
|
189
183
|
if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
|
190
|
-
|
191
|
-
@num_errors_per_chunk.delete(chunk_id)
|
192
|
-
return
|
184
|
+
return JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, job_id)
|
193
185
|
end
|
194
186
|
|
195
187
|
raise Fluent::BigQuery::Error.wrap(e)
|
196
188
|
end
|
197
189
|
|
198
|
-
def
|
199
|
-
|
200
|
-
|
190
|
+
def fetch_load_job(job_reference)
|
191
|
+
project = job_reference.project_id
|
192
|
+
job_id = job_reference.job_id
|
193
|
+
|
194
|
+
res = client.get_job(project, job_id)
|
195
|
+
log.debug "load job fetched", id: job_id, state: res.status.state, **job_reference.as_hash(:project_id, :dataset_id, :table_id)
|
201
196
|
|
202
|
-
|
203
|
-
|
204
|
-
sleep wait_interval
|
205
|
-
_response = client.get_job(project, _response.job_reference.job_id)
|
197
|
+
if res.status.state == "DONE"
|
198
|
+
res
|
206
199
|
end
|
200
|
+
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
201
|
+
e = Fluent::BigQuery::Error.wrap(e)
|
202
|
+
raise e unless e.retryable?
|
203
|
+
end
|
204
|
+
|
205
|
+
def commit_load_job(chunk_id_hex, response)
|
206
|
+
job_id = response.id
|
207
|
+
project = response.configuration.load.destination_table.project_id
|
208
|
+
dataset = response.configuration.load.destination_table.dataset_id
|
209
|
+
table_id = response.configuration.load.destination_table.table_id
|
207
210
|
|
208
|
-
errors =
|
211
|
+
errors = response.status.errors
|
209
212
|
if errors
|
210
213
|
errors.each do |e|
|
211
|
-
log.error "job.
|
214
|
+
log.error "job.load API (rows)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: e.message, reason: e.reason
|
212
215
|
end
|
213
216
|
end
|
214
217
|
|
215
|
-
error_result =
|
218
|
+
error_result = response.status.error_result
|
216
219
|
if error_result
|
217
|
-
log.error "job.
|
220
|
+
log.error "job.load API (result)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: error_result.message, reason: error_result.reason
|
218
221
|
if Fluent::BigQuery::Error.retryable_error_reason?(error_result.reason)
|
219
|
-
@num_errors_per_chunk[
|
222
|
+
@num_errors_per_chunk[chunk_id_hex] = @num_errors_per_chunk[chunk_id_hex].to_i + 1
|
220
223
|
raise Fluent::BigQuery::RetryableError.new("failed to load into bigquery, retry")
|
221
224
|
else
|
222
|
-
@num_errors_per_chunk.delete(
|
225
|
+
@num_errors_per_chunk.delete(chunk_id_hex)
|
223
226
|
raise Fluent::BigQuery::UnRetryableError.new("failed to load into bigquery, and cannot retry")
|
224
227
|
end
|
225
228
|
end
|
226
229
|
|
227
|
-
|
230
|
+
stats = response.statistics.load
|
231
|
+
duration = (response.statistics.end_time - response.statistics.creation_time) / 1000.0
|
232
|
+
log.debug "load job finished", id: job_id, state: response.status.state, input_file_bytes: stats.input_file_bytes, input_files: stats.input_files, output_bytes: stats.output_bytes, output_rows: stats.output_rows, bad_records: stats.bad_records, duration: duration.round(2), project_id: project, dataset: dataset, table: table_id
|
233
|
+
@num_errors_per_chunk.delete(chunk_id_hex)
|
228
234
|
end
|
229
235
|
|
230
236
|
private
|
@@ -291,8 +297,8 @@ module Fluent
|
|
291
297
|
table_id.gsub(/\$\d+$/, "")
|
292
298
|
end
|
293
299
|
|
294
|
-
def create_job_id(
|
295
|
-
job_id_key = "#{
|
300
|
+
def create_job_id(chunk_id_hex, dataset, table, schema)
|
301
|
+
job_id_key = "#{chunk_id_hex}#{dataset}#{table}#{schema.to_s}#{@options[:max_bad_records]}#{@options[:ignore_unknown_values]}#{@num_errors_per_chunk[chunk_id_hex]}"
|
296
302
|
@log.debug "job_id_key: #{job_id_key}"
|
297
303
|
"fluentd_job_" + Digest::SHA1.hexdigest(job_id_key)
|
298
304
|
end
|