fluent-plugin-bigquery 1.2.0 → 2.0.0.beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -9
- data/README.md +68 -65
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/bigquery/writer.rb +45 -39
- data/lib/fluent/plugin/out_bigquery_base.rb +211 -0
- data/lib/fluent/plugin/out_bigquery_insert.rb +131 -0
- data/lib/fluent/plugin/out_bigquery_load.rb +220 -0
- data/test/helper.rb +3 -1
- data/test/plugin/test_out_bigquery_base.rb +579 -0
- data/test/plugin/test_out_bigquery_insert.rb +420 -0
- data/test/plugin/test_out_bigquery_load.rb +310 -0
- metadata +13 -7
- data/lib/fluent/plugin/out_bigquery.rb +0 -500
- data/test/plugin/test_out_bigquery.rb +0 -1276
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '07998acf05ddb3e647da13a4b5c734dc16f8cc77'
|
4
|
+
data.tar.gz: 1fce9fc906cbf72083a4f8132c0ac1d985a95d6d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04cfd6d3080d9424e25bd75ae1a9600259fe94ed933adceab66c02eb11afdb49eeddc393c305f0927dd64f967d1e72835fde9566cd54b2e53805e85ffe7a1516
|
7
|
+
data.tar.gz: 8de74527cf12be2c6553e4a582cc25c47a1773cdc165800f212aae563f7ffa048679260515a51f55e244b641b968badeaa4349cf4369ad2363d22aff1c1cbe7d
|
data/.travis.yml
CHANGED
@@ -1,20 +1,13 @@
|
|
1
1
|
language: ruby
|
2
2
|
|
3
3
|
rvm:
|
4
|
-
- 2.1
|
5
4
|
- 2.2
|
6
5
|
- 2.3.3
|
6
|
+
- 2.4.3
|
7
|
+
- 2.5.0
|
7
8
|
|
8
9
|
gemfile:
|
9
10
|
- Gemfile
|
10
|
-
- gemfiles/activesupport-4.gemfile
|
11
|
-
|
12
|
-
matrix:
|
13
|
-
exclude:
|
14
|
-
- rvm: 2.0
|
15
|
-
gemfile: Gemfile
|
16
|
-
- rvm: 2.1
|
17
|
-
gemfile: Gemfile
|
18
11
|
|
19
12
|
before_install:
|
20
13
|
- gem update bundler
|
data/README.md
CHANGED
@@ -1,13 +1,17 @@
|
|
1
1
|
# fluent-plugin-bigquery
|
2
2
|
|
3
|
+
**This README is for v2.0.0.beta. but it is not released yet. sorry.**
|
4
|
+
|
3
5
|
[Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
|
4
6
|
|
5
|
-
- **Plugin type**:
|
7
|
+
- **Plugin type**: Output
|
6
8
|
|
7
9
|
* insert data over streaming inserts
|
10
|
+
* plugin type is `bigquery_insert`
|
8
11
|
* for continuous real-time insertions
|
9
12
|
* https://developers.google.com/bigquery/streaming-data-into-bigquery#usecases
|
10
13
|
* load data
|
14
|
+
* plugin type is `bigquery_load`
|
11
15
|
* for data loading as batch jobs, for big amount of data
|
12
16
|
* https://developers.google.com/bigquery/loading-data-into-bigquery
|
13
17
|
|
@@ -31,42 +35,47 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
31
35
|
|
32
36
|
### Options
|
33
37
|
|
34
|
-
|
35
|
-
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
| auto_create_table | bool | no | no | false | If true, creates table automatically
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
69
|
-
|
38
|
+
#### common
|
39
|
+
|
40
|
+
| name | type | required? | placeholder? | default | description |
|
41
|
+
| :------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
42
|
+
| auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
|
43
|
+
| email | string | yes (private_key) | no | nil | GCP Service Account Email |
|
44
|
+
| private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
|
45
|
+
| private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
|
46
|
+
| json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
|
47
|
+
| project | string | yes | yes | nil | |
|
48
|
+
| dataset | string | yes | yes | nil | |
|
49
|
+
| table | string | yes (either `tables`) | yes | nil | |
|
50
|
+
| tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
|
51
|
+
| auto_create_table | bool | no | no | false | If true, creates table automatically |
|
52
|
+
| ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
|
53
|
+
| schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
|
54
|
+
| schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
|
55
|
+
| fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
|
56
|
+
| fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
|
57
|
+
| schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
|
58
|
+
| request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
|
59
|
+
| request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
|
60
|
+
| time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
|
61
|
+
| time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
|
62
|
+
|
63
|
+
#### bigquery_insert
|
64
|
+
|
65
|
+
| name | type | required? | placeholder? | default | description |
|
66
|
+
| :------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
67
|
+
| template_suffix | string | no | yes | nil | can use `%{time_slice}` placeholder replaced by `time_slice_format` |
|
68
|
+
| skip_invalid_rows | bool | no | no | false | |
|
69
|
+
| insert_id_field | string | no | no | nil | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor |
|
70
|
+
| add_insert_timestamp | string | no | no | nil | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
|
71
|
+
| allow_retry_insert_errors | bool | no | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
|
72
|
+
|
73
|
+
#### bigquery_load
|
74
|
+
|
75
|
+
| name | type | required? | placeholder? | default | description |
|
76
|
+
| :------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
|
77
|
+
| source_format | enum | no | no | json | Specify source format `json` or `csv` or `avro`. If you change this parameter, you must change formatter plugin via `<format>` config section. |
|
78
|
+
| max_bad_records | integer | no | no | 0 | If the number of bad records exceeds this value, an invalid error is returned in the job result. |
|
70
79
|
|
71
80
|
### Buffer section
|
72
81
|
|
@@ -77,9 +86,9 @@ Because embbeded gem dependency sometimes restricts ruby environment.
|
|
77
86
|
| total_limit_size | integer | no | 1GB (insert) or 32GB (load) | |
|
78
87
|
| chunk_records_limit | integer | no | 500 (insert) or nil (load) | |
|
79
88
|
| flush_mode | enum | no | interval | default, lazy, interval, immediate |
|
80
|
-
| flush_interval | float | no | 0
|
81
|
-
| flush_thread_interval | float | no | 0.05 (insert) or
|
82
|
-
| flush_thread_burst_interval | float | no | 0.05 (insert) or
|
89
|
+
| flush_interval | float | no | 1.0 (insert) or 3600 (load) | |
|
90
|
+
| flush_thread_interval | float | no | 0.05 (insert) or 5 (load) | |
|
91
|
+
| flush_thread_burst_interval | float | no | 0.05 (insert) or 5 (load) | |
|
83
92
|
|
84
93
|
And, other params (defined by base class) are available
|
85
94
|
|
@@ -142,9 +151,7 @@ Configure insert specifications with target table schema, with your credentials.
|
|
142
151
|
|
143
152
|
```apache
|
144
153
|
<match dummy>
|
145
|
-
@type
|
146
|
-
|
147
|
-
method insert # default
|
154
|
+
@type bigquery_insert
|
148
155
|
|
149
156
|
auth_method private_key # default
|
150
157
|
email xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxx@developer.gserviceaccount.com
|
@@ -181,14 +188,12 @@ For high rate inserts over streaming inserts, you should specify flush intervals
|
|
181
188
|
|
182
189
|
```apache
|
183
190
|
<match dummy>
|
184
|
-
@type
|
185
|
-
|
186
|
-
method insert # default
|
191
|
+
@type bigquery_insert
|
187
192
|
|
188
193
|
<buffer>
|
189
194
|
flush_interval 0.1 # flush as frequent as possible
|
190
195
|
|
191
|
-
|
196
|
+
total_limit_size 10g
|
192
197
|
|
193
198
|
flush_thread_count 16
|
194
199
|
</buffer>
|
@@ -256,16 +261,12 @@ section in the Google BigQuery document.
|
|
256
261
|
### Load
|
257
262
|
```apache
|
258
263
|
<match bigquery>
|
259
|
-
@type
|
260
|
-
|
261
|
-
method load
|
264
|
+
@type bigquery_load
|
262
265
|
|
263
266
|
<buffer>
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
flush_at_shutdown true
|
268
|
-
timekey_use_utc
|
267
|
+
path bigquery.*.buffer
|
268
|
+
flush_at_shutdown true
|
269
|
+
timekey_use_utc
|
269
270
|
</buffer>
|
270
271
|
|
271
272
|
auth_method json_key
|
@@ -302,7 +303,7 @@ download its JSON key and deploy the key with fluentd.
|
|
302
303
|
|
303
304
|
```apache
|
304
305
|
<match dummy>
|
305
|
-
@type
|
306
|
+
@type bigquery_insert
|
306
307
|
|
307
308
|
auth_method json_key
|
308
309
|
json_key /home/username/.keys/00000000000000000000000000000000-jsonkey.json
|
@@ -319,7 +320,7 @@ You need to only include `private_key` and `client_email` key from JSON key file
|
|
319
320
|
|
320
321
|
```apache
|
321
322
|
<match dummy>
|
322
|
-
@type
|
323
|
+
@type bigquery_insert
|
323
324
|
|
324
325
|
auth_method json_key
|
325
326
|
json_key {"private_key": "-----BEGIN PRIVATE KEY-----\n...", "client_email": "xxx@developer.gserviceaccount.com"}
|
@@ -340,7 +341,7 @@ Compute Engine instance, then you can configure fluentd like this.
|
|
340
341
|
|
341
342
|
```apache
|
342
343
|
<match dummy>
|
343
|
-
@type
|
344
|
+
@type bigquery_insert
|
344
345
|
|
345
346
|
auth_method compute_engine
|
346
347
|
|
@@ -382,7 +383,7 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
|
|
382
383
|
|
383
384
|
```apache
|
384
385
|
<match dummy>
|
385
|
-
@type
|
386
|
+
@type bigquery_insert
|
386
387
|
|
387
388
|
...
|
388
389
|
|
@@ -430,7 +431,7 @@ Use placeholder.
|
|
430
431
|
|
431
432
|
```apache
|
432
433
|
<match dummy>
|
433
|
-
@type
|
434
|
+
@type bigquery_insert
|
434
435
|
|
435
436
|
...
|
436
437
|
table accesslog$%Y%m%d
|
@@ -453,7 +454,7 @@ NOTE: `auto_create_table` option cannot be used with `fetch_schema`. You should
|
|
453
454
|
|
454
455
|
```apache
|
455
456
|
<match dummy>
|
456
|
-
@type
|
457
|
+
@type bigquery_insert
|
457
458
|
|
458
459
|
...
|
459
460
|
|
@@ -477,7 +478,7 @@ you can also specify nested fields by prefixing their belonging record fields.
|
|
477
478
|
|
478
479
|
```apache
|
479
480
|
<match dummy>
|
480
|
-
@type
|
481
|
+
@type bigquery_insert
|
481
482
|
|
482
483
|
...
|
483
484
|
|
@@ -528,7 +529,7 @@ The second method is to specify a path to a BigQuery schema file instead of list
|
|
528
529
|
|
529
530
|
```apache
|
530
531
|
<match dummy>
|
531
|
-
@type
|
532
|
+
@type bigquery_insert
|
532
533
|
|
533
534
|
...
|
534
535
|
|
@@ -541,7 +542,7 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
|
|
541
542
|
|
542
543
|
```apache
|
543
544
|
<match dummy>
|
544
|
-
@type
|
545
|
+
@type bigquery_insert
|
545
546
|
|
546
547
|
...
|
547
548
|
|
@@ -559,10 +560,12 @@ you are still recommended to specify JSON types for TIMESTAMP fields as "time" f
|
|
559
560
|
|
560
561
|
BigQuery uses `insertId` property to detect duplicate insertion requests (see [data consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency) in Google BigQuery documents).
|
561
562
|
You can set `insert_id_field` option to specify the field to use as `insertId` property.
|
563
|
+
`insert_id_field` can use fluentd record_accessor format like `$['key1'][0]['key2']`.
|
564
|
+
(detail. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor)
|
562
565
|
|
563
566
|
```apache
|
564
567
|
<match dummy>
|
565
|
-
@type
|
568
|
+
@type bigquery_insert
|
566
569
|
|
567
570
|
...
|
568
571
|
|
@@ -7,22 +7,15 @@ module Fluent
|
|
7
7
|
@options = options
|
8
8
|
@log = log
|
9
9
|
@num_errors_per_chunk = {}
|
10
|
-
|
11
|
-
@cached_client_expiration = Time.now + 1800
|
12
10
|
end
|
13
11
|
|
14
12
|
def client
|
15
|
-
|
16
|
-
|
17
|
-
client = Google::Apis::BigqueryV2::BigqueryService.new.tap do |cl|
|
13
|
+
@client ||= Google::Apis::BigqueryV2::BigqueryService.new.tap do |cl|
|
18
14
|
cl.authorization = get_auth
|
19
15
|
cl.client_options.open_timeout_sec = @options[:open_timeout_sec] if @options[:open_timeout_sec]
|
20
16
|
cl.client_options.read_timeout_sec = @options[:timeout_sec] if @options[:timeout_sec]
|
21
17
|
cl.client_options.send_timeout_sec = @options[:timeout_sec] if @options[:timeout_sec]
|
22
18
|
end
|
23
|
-
|
24
|
-
@cached_client_expiration = Time.now + 1800
|
25
|
-
@client = client
|
26
19
|
end
|
27
20
|
|
28
21
|
def create_table(project, dataset, table_id, record_schema)
|
@@ -49,10 +42,7 @@ module Fluent
|
|
49
42
|
end
|
50
43
|
client.insert_table(project, dataset, definition, {})
|
51
44
|
log.debug "create table", project_id: project, dataset: dataset, table: table_id
|
52
|
-
@client = nil
|
53
45
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
54
|
-
@client = nil
|
55
|
-
|
56
46
|
message = e.message
|
57
47
|
if e.status_code == 409 && /Already Exists:/ =~ message
|
58
48
|
log.debug "already created table", project_id: project, dataset: dataset, table: table_id
|
@@ -81,7 +71,6 @@ module Fluent
|
|
81
71
|
|
82
72
|
schema
|
83
73
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
84
|
-
@client = nil
|
85
74
|
message = e.message
|
86
75
|
log.error "tables.get API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message
|
87
76
|
nil
|
@@ -111,8 +100,6 @@ module Fluent
|
|
111
100
|
end
|
112
101
|
end
|
113
102
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
114
|
-
@client = nil
|
115
|
-
|
116
103
|
reason = e.respond_to?(:reason) ? e.reason : nil
|
117
104
|
error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason }
|
118
105
|
wrapped = Fluent::BigQuery::Error.wrap(e)
|
@@ -125,7 +112,17 @@ module Fluent
|
|
125
112
|
raise wrapped
|
126
113
|
end
|
127
114
|
|
128
|
-
|
115
|
+
JobReference = Struct.new(:chunk_id, :chunk_id_hex, :project_id, :dataset_id, :table_id, :job_id) do
|
116
|
+
def as_hash(*keys)
|
117
|
+
if keys.empty?
|
118
|
+
to_h
|
119
|
+
else
|
120
|
+
to_h.select { |k, _| keys.include?(k) }
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def create_load_job(chunk_id, chunk_id_hex, project, dataset, table_id, upload_source, fields)
|
129
126
|
configuration = {
|
130
127
|
configuration: {
|
131
128
|
load: {
|
@@ -145,7 +142,7 @@ module Fluent
|
|
145
142
|
}
|
146
143
|
}
|
147
144
|
|
148
|
-
job_id = create_job_id(
|
145
|
+
job_id = create_job_id(chunk_id_hex, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
|
149
146
|
configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
|
150
147
|
configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
|
151
148
|
|
@@ -167,11 +164,8 @@ module Fluent
|
|
167
164
|
content_type: "application/octet-stream",
|
168
165
|
}
|
169
166
|
)
|
170
|
-
|
171
|
-
@num_errors_per_chunk.delete(chunk_id)
|
167
|
+
JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
|
172
168
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
173
|
-
@client = nil
|
174
|
-
|
175
169
|
reason = e.respond_to?(:reason) ? e.reason : nil
|
176
170
|
log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
|
177
171
|
|
@@ -187,44 +181,56 @@ module Fluent
|
|
187
181
|
end
|
188
182
|
|
189
183
|
if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
|
190
|
-
|
191
|
-
@num_errors_per_chunk.delete(chunk_id)
|
192
|
-
return
|
184
|
+
return JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, job_id)
|
193
185
|
end
|
194
186
|
|
195
187
|
raise Fluent::BigQuery::Error.wrap(e)
|
196
188
|
end
|
197
189
|
|
198
|
-
def
|
199
|
-
|
200
|
-
|
190
|
+
def fetch_load_job(job_reference)
|
191
|
+
project = job_reference.project_id
|
192
|
+
job_id = job_reference.job_id
|
193
|
+
|
194
|
+
res = client.get_job(project, job_id)
|
195
|
+
log.debug "load job fetched", id: job_id, state: res.status.state, **job_reference.as_hash(:project_id, :dataset_id, :table_id)
|
201
196
|
|
202
|
-
|
203
|
-
|
204
|
-
sleep wait_interval
|
205
|
-
_response = client.get_job(project, _response.job_reference.job_id)
|
197
|
+
if res.status.state == "DONE"
|
198
|
+
res
|
206
199
|
end
|
200
|
+
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
201
|
+
e = Fluent::BigQuery::Error.wrap(e)
|
202
|
+
raise e unless e.retryable?
|
203
|
+
end
|
204
|
+
|
205
|
+
def commit_load_job(chunk_id_hex, response)
|
206
|
+
job_id = response.id
|
207
|
+
project = response.configuration.load.destination_table.project_id
|
208
|
+
dataset = response.configuration.load.destination_table.dataset_id
|
209
|
+
table_id = response.configuration.load.destination_table.table_id
|
207
210
|
|
208
|
-
errors =
|
211
|
+
errors = response.status.errors
|
209
212
|
if errors
|
210
213
|
errors.each do |e|
|
211
|
-
log.error "job.
|
214
|
+
log.error "job.load API (rows)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: e.message, reason: e.reason
|
212
215
|
end
|
213
216
|
end
|
214
217
|
|
215
|
-
error_result =
|
218
|
+
error_result = response.status.error_result
|
216
219
|
if error_result
|
217
|
-
log.error "job.
|
220
|
+
log.error "job.load API (result)", job_id: job_id, project_id: project, dataset: dataset, table: table_id, message: error_result.message, reason: error_result.reason
|
218
221
|
if Fluent::BigQuery::Error.retryable_error_reason?(error_result.reason)
|
219
|
-
@num_errors_per_chunk[
|
222
|
+
@num_errors_per_chunk[chunk_id_hex] = @num_errors_per_chunk[chunk_id_hex].to_i + 1
|
220
223
|
raise Fluent::BigQuery::RetryableError.new("failed to load into bigquery, retry")
|
221
224
|
else
|
222
|
-
@num_errors_per_chunk.delete(
|
225
|
+
@num_errors_per_chunk.delete(chunk_id_hex)
|
223
226
|
raise Fluent::BigQuery::UnRetryableError.new("failed to load into bigquery, and cannot retry")
|
224
227
|
end
|
225
228
|
end
|
226
229
|
|
227
|
-
|
230
|
+
stats = response.statistics.load
|
231
|
+
duration = (response.statistics.end_time - response.statistics.creation_time) / 1000.0
|
232
|
+
log.debug "load job finished", id: job_id, state: response.status.state, input_file_bytes: stats.input_file_bytes, input_files: stats.input_files, output_bytes: stats.output_bytes, output_rows: stats.output_rows, bad_records: stats.bad_records, duration: duration.round(2), project_id: project, dataset: dataset, table: table_id
|
233
|
+
@num_errors_per_chunk.delete(chunk_id_hex)
|
228
234
|
end
|
229
235
|
|
230
236
|
private
|
@@ -291,8 +297,8 @@ module Fluent
|
|
291
297
|
table_id.gsub(/\$\d+$/, "")
|
292
298
|
end
|
293
299
|
|
294
|
-
def create_job_id(
|
295
|
-
job_id_key = "#{
|
300
|
+
def create_job_id(chunk_id_hex, dataset, table, schema)
|
301
|
+
job_id_key = "#{chunk_id_hex}#{dataset}#{table}#{schema.to_s}#{@options[:max_bad_records]}#{@options[:ignore_unknown_values]}#{@num_errors_per_chunk[chunk_id_hex]}"
|
296
302
|
@log.debug "job_id_key: #{job_id_key}"
|
297
303
|
"fluentd_job_" + Digest::SHA1.hexdigest(job_id_key)
|
298
304
|
end
|