fluent-plugin-bigquery 2.0.0.beta → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: '07998acf05ddb3e647da13a4b5c734dc16f8cc77'
4
- data.tar.gz: 1fce9fc906cbf72083a4f8132c0ac1d985a95d6d
2
+ SHA256:
3
+ metadata.gz: 4209a2b6eaaf0b6f8ba315b6f5de6690e28fb47890aeea777bdb31889e4785ab
4
+ data.tar.gz: b0983fb4fa16d72059b0e679ea4ee627d19e805779fa010888fa1723354896a5
5
5
  SHA512:
6
- metadata.gz: 04cfd6d3080d9424e25bd75ae1a9600259fe94ed933adceab66c02eb11afdb49eeddc393c305f0927dd64f967d1e72835fde9566cd54b2e53805e85ffe7a1516
7
- data.tar.gz: 8de74527cf12be2c6553e4a582cc25c47a1773cdc165800f212aae563f7ffa048679260515a51f55e244b641b968badeaa4349cf4369ad2363d22aff1c1cbe7d
6
+ metadata.gz: a6fc6891eda12bbc1272af7af9c4e8d48e588bc7ef65153b3a7524e39468baebb8fdb925856d1850bbda12fed5d33865faa56542503f76fdf724a18937c7d56e
7
+ data.tar.gz: fff0599b6a838cb4ff233ba9585b558ff733eed8063c1cf36ee08aaacb9b3c2ca1bce4d13db2a51ecc72c398ba751a18b2856a6348f43738ee8ca366becdea61
@@ -0,0 +1,31 @@
1
+ name: Testing on Ubuntu
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby:
12
+ - 2.6
13
+ - 2.7
14
+ - 3.0
15
+ - 3.1
16
+ os:
17
+ - ubuntu-latest
18
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
19
+ steps:
20
+ - uses: actions/checkout@v2
21
+ - uses: ruby/setup-ruby@v1
22
+ with:
23
+ ruby-version: ${{ matrix.ruby }}
24
+ - name: unit testing
25
+ env:
26
+ CI: true
27
+ run: |
28
+ ruby -v
29
+ gem install bundler rake
30
+ bundle install --jobs 4 --retry 3
31
+ bundle exec rake test
@@ -0,0 +1,27 @@
1
+ name: Testing on Windows
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby: [ '2.6', '2.7', '3.0', '3.1' ]
12
+ os:
13
+ - windows-latest
14
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: ${{ matrix.ruby }}
20
+ - name: unit testing
21
+ env:
22
+ CI: true
23
+ run: |
24
+ ruby -v
25
+ gem install bundler rake
26
+ bundle install --jobs 4 --retry 3
27
+ bundle exec rake test
data/README.md CHANGED
@@ -1,6 +1,12 @@
1
1
  # fluent-plugin-bigquery
2
2
 
3
- **This README is for v2.0.0.beta. but it is not released yet. sorry.**
3
+ ## Notice
4
+
5
+ We will transfer fluent-plugin-bigquery repository to [fluent-plugins-nursery](https://github.com/fluent-plugins-nursery) organization.
6
+ It does not change maintenance plan.
7
+ The main purpose is that it solves mismatch between maintainers and current organization.
8
+
9
+ ---
4
10
 
5
11
  [Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
6
12
 
@@ -18,11 +24,13 @@
18
24
  Current version of this plugin supports Google API with Service Account Authentication, but does not support
19
25
  OAuth flow for installed applications.
20
26
 
21
- ## Version Information
22
- v1.0.0 or later supports fluentd-0.14.0 or later.
23
- If you use fluentd-0.12.x, please use v0.4.x.
27
+ ## Support Version
24
28
 
25
- I recommend to update fluentd version to v0.14.x or later.
29
+ | plugin version | fluentd version | ruby version |
30
+ | :----------- | :----------- | :----------- |
31
+ | v0.4.x | 0.12.x | 2.0 or later |
32
+ | v1.x.x | 0.14.x or later | 2.2 or later |
33
+ | v2.x.x | 0.14.x or later | 2.3 or later |
26
34
 
27
35
  ## With docker image
28
36
  If you use official alpine based fluentd docker image (https://github.com/fluent/fluentd-docker-image),
@@ -37,28 +45,31 @@ Because embbeded gem dependency sometimes restricts ruby environment.
37
45
 
38
46
  #### common
39
47
 
40
- | name | type | required? | placeholder? | default | description |
41
- | :------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
42
- | auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
43
- | email | string | yes (private_key) | no | nil | GCP Service Account Email |
44
- | private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
45
- | private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
46
- | json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
47
- | project | string | yes | yes | nil | |
48
- | dataset | string | yes | yes | nil | |
49
- | table | string | yes (either `tables`) | yes | nil | |
50
- | tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
51
- | auto_create_table | bool | no | no | false | If true, creates table automatically |
52
- | ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
53
- | schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
54
- | schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
55
- | fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
56
- | fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
57
- | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
58
- | request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
59
- | request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
60
- | time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
61
- | time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
48
+ | name | type | required? | placeholder? | default | description |
49
+ | :-------------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
50
+ | auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
51
+ | email | string | yes (private_key) | no | nil | GCP Service Account Email |
52
+ | private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
53
+ | private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
54
+ | json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
55
+ | location | string | no | no | nil | BigQuery Data Location. The geographic location of the job. Required except for US and EU. |
56
+ | project | string | yes | yes | nil | |
57
+ | dataset | string | yes | yes | nil | |
58
+ | table | string | yes (either `tables`) | yes | nil | |
59
+ | tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
60
+ | auto_create_table | bool | no | no | false | If true, creates table automatically |
61
+ | ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
62
+ | schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
63
+ | schema_path | string | yes (either `fetch_schema`) | yes | nil | Schema Definition file path. It is formatted by JSON. |
64
+ | fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
65
+ | fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
66
+ | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
67
+ | request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
68
+ | request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
69
+ | time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature. |
70
+ | time_partitioning_field | string | no | no | nil | Field used to determine how to create a time-based partition. |
71
+ | time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. |
72
+ | clustering_fields | array(string) | no | no | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
62
73
 
63
74
  #### bigquery_insert
64
75
 
@@ -69,6 +80,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
69
80
  | insert_id_field | string | no | no | nil | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor |
70
81
  | add_insert_timestamp | string | no | no | nil | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
71
82
  | allow_retry_insert_errors | bool | no | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
83
+ | require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. |
72
84
 
73
85
  #### bigquery_load
74
86
 
@@ -431,7 +443,7 @@ Use placeholder.
431
443
 
432
444
  ```apache
433
445
  <match dummy>
434
- @type bigquery_insert
446
+ @type bigquery_load
435
447
 
436
448
  ...
437
449
  table accesslog$%Y%m%d
@@ -444,6 +456,8 @@ Use placeholder.
444
456
  ```
445
457
 
446
458
  But, Dynamic table creating doesn't support date partitioned table yet.
459
+ And streaming insert is not allowed to insert with `$%Y%m%d` suffix.
460
+ If you use date partitioned table with streaming insert, Please omit `$%Y%m%d` suffix from `table`.
447
461
 
448
462
  ### Dynamic table creating
449
463
 
@@ -465,6 +479,8 @@ NOTE: `auto_create_table` option cannot be used with `fetch_schema`. You should
465
479
  </match>
466
480
  ```
467
481
 
482
+ Also, you can create clustered table by using `clustering_fields`.
483
+
468
484
  ### Table schema
469
485
 
470
486
  There are three methods to describe the schema of the target table.
@@ -7,10 +7,9 @@ module Fluent
7
7
  RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
8
8
 
9
9
  class << self
10
- def wrap(google_api_error, message = nil, force_unretryable: false)
11
- e = google_api_error
12
- return UnRetryableError.new(message, e) if force_unretryable
13
-
10
+ # @param e [Google::Apis::Error]
11
+ # @param message [String]
12
+ def wrap(e, message = nil)
14
13
  if retryable_error?(e)
15
14
  RetryableError.new(message, e)
16
15
  else
@@ -18,12 +17,9 @@ module Fluent
18
17
  end
19
18
  end
20
19
 
21
- def retryable_error?(google_api_error)
22
- e = google_api_error
23
- reason = e.respond_to?(:reason) ? e.reason : nil
24
-
25
- retryable_error_reason?(reason) ||
26
- (e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code))
20
+ # @param e [Google::Apis::Error]
21
+ def retryable_error?(e)
22
+ e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code)
27
23
  end
28
24
 
29
25
  def retryable_error_reason?(reason)
@@ -86,6 +86,16 @@ module Fluent
86
86
  end
87
87
  end
88
88
 
89
+ class NumericFieldSchema < FieldSchema
90
+ def type
91
+ :numeric
92
+ end
93
+
94
+ def format_one(value)
95
+ value.to_s
96
+ end
97
+ end
98
+
89
99
  class BooleanFieldSchema < FieldSchema
90
100
  def type
91
101
  :boolean
@@ -169,6 +179,7 @@ module Fluent
169
179
  string: StringFieldSchema,
170
180
  integer: IntegerFieldSchema,
171
181
  float: FloatFieldSchema,
182
+ numeric: NumericFieldSchema,
172
183
  boolean: BooleanFieldSchema,
173
184
  timestamp: TimestampFieldSchema,
174
185
  date: DateFieldSchema,
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "2.0.0.beta".freeze
3
+ VERSION = "2.3.0".freeze
4
4
  end
5
5
  end
@@ -34,12 +34,9 @@ module Fluent
34
34
  }
35
35
  }
36
36
 
37
- if @options[:time_partitioning_type]
38
- definition[:time_partitioning] = {
39
- type: @options[:time_partitioning_type].to_s.upcase,
40
- expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil
41
- }.select { |_, value| !value.nil? }
42
- end
37
+ definition.merge!(time_partitioning: time_partitioning) if time_partitioning
38
+ definition.merge!(require_partition_filter: require_partition_filter) if require_partition_filter
39
+ definition.merge!(clustering: clustering) if clustering
43
40
  client.insert_table(project, dataset, definition, {})
44
41
  log.debug "create table", project_id: project, dataset: dataset, table: table_id
45
42
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
@@ -50,10 +47,9 @@ module Fluent
50
47
  return
51
48
  end
52
49
 
53
- reason = e.respond_to?(:reason) ? e.reason : nil
54
- log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message, reason: reason
50
+ log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message
55
51
 
56
- if Fluent::BigQuery::Error.retryable_error_reason?(reason) && create_table_retry_count < create_table_retry_limit
52
+ if create_table_retry_count < create_table_retry_limit
57
53
  sleep create_table_retry_wait
58
54
  create_table_retry_wait *= 2
59
55
  create_table_retry_count += 1
@@ -76,14 +72,19 @@ module Fluent
76
72
  nil
77
73
  end
78
74
 
79
- def insert_rows(project, dataset, table_id, rows, template_suffix: nil)
75
+ def insert_rows(project, dataset, table_id, rows, schema, template_suffix: nil)
80
76
  body = {
81
77
  rows: rows,
82
78
  skip_invalid_rows: @options[:skip_invalid_rows],
83
79
  ignore_unknown_values: @options[:ignore_unknown_values],
84
80
  }
85
81
  body.merge!(template_suffix: template_suffix) if template_suffix
86
- res = client.insert_all_table_data(project, dataset, table_id, body, {})
82
+
83
+ if @options[:auto_create_table]
84
+ res = insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
85
+ else
86
+ res = client.insert_all_table_data(project, dataset, table_id, body, {})
87
+ end
87
88
  log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
88
89
 
89
90
  if res.insert_errors && !res.insert_errors.empty?
@@ -100,8 +101,7 @@ module Fluent
100
101
  end
101
102
  end
102
103
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
103
- reason = e.respond_to?(:reason) ? e.reason : nil
104
- error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason }
104
+ error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message }
105
105
  wrapped = Fluent::BigQuery::Error.wrap(e)
106
106
  if wrapped.retryable?
107
107
  log.warn "tabledata.insertAll API", error_data
@@ -131,9 +131,6 @@ module Fluent
131
131
  dataset_id: dataset,
132
132
  table_id: table_id,
133
133
  },
134
- schema: {
135
- fields: fields.to_a,
136
- },
137
134
  write_disposition: "WRITE_APPEND",
138
135
  source_format: source_format,
139
136
  ignore_unknown_values: @options[:ignore_unknown_values],
@@ -143,17 +140,19 @@ module Fluent
143
140
  }
144
141
 
145
142
  job_id = create_job_id(chunk_id_hex, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
146
- configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
147
143
  configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
148
144
 
149
- # If target table is already exist, omit schema configuration.
150
- # Because schema changing is easier.
151
145
  begin
152
- if client.get_table(project, dataset, table_id)
153
- configuration[:configuration][:load].delete(:schema)
146
+ # Check table existance
147
+ client.get_table(project, dataset, table_id)
148
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
149
+ if e.status_code == 404 && /Not Found: Table/i =~ e.message
150
+ raise Fluent::BigQuery::UnRetryableError.new("Table is not found") unless @options[:auto_create_table]
151
+ raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
152
+ configuration[:configuration][:load].merge!(schema: {fields: fields.to_a})
153
+ configuration[:configuration][:load].merge!(time_partitioning: time_partitioning) if time_partitioning
154
+ configuration[:configuration][:load].merge!(clustering: clustering) if clustering
154
155
  end
155
- rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError
156
- raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
157
156
  end
158
157
 
159
158
  res = client.insert_job(
@@ -166,19 +165,7 @@ module Fluent
166
165
  )
167
166
  JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
168
167
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
169
- reason = e.respond_to?(:reason) ? e.reason : nil
170
- log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
171
-
172
- if @options[:auto_create_table] && e.status_code == 404 && /Not Found: Table/i =~ e.message
173
- # Table Not Found: Auto Create Table
174
- create_table(
175
- project,
176
- dataset,
177
- table_id,
178
- fields,
179
- )
180
- raise "table created. send rows next time."
181
- end
168
+ log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message
182
169
 
183
170
  if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
184
171
  return JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, job_id)
@@ -190,8 +177,9 @@ module Fluent
190
177
  def fetch_load_job(job_reference)
191
178
  project = job_reference.project_id
192
179
  job_id = job_reference.job_id
180
+ location = @options[:location]
193
181
 
194
- res = client.get_job(project, job_id)
182
+ res = client.get_job(project, job_id, location: location)
195
183
  log.debug "load job fetched", id: job_id, state: res.status.state, **job_reference.as_hash(:project_id, :dataset_id, :table_id)
196
184
 
197
185
  if res.status.state == "DONE"
@@ -227,9 +215,10 @@ module Fluent
227
215
  end
228
216
  end
229
217
 
218
+ # `stats` can be nil if we receive a warning like "Warning: Load job succeeded with data imported, however statistics may be lost due to internal error."
230
219
  stats = response.statistics.load
231
220
  duration = (response.statistics.end_time - response.statistics.creation_time) / 1000.0
232
- log.debug "load job finished", id: job_id, state: response.status.state, input_file_bytes: stats.input_file_bytes, input_files: stats.input_files, output_bytes: stats.output_bytes, output_rows: stats.output_rows, bad_records: stats.bad_records, duration: duration.round(2), project_id: project, dataset: dataset, table: table_id
221
+ log.debug "load job finished", id: job_id, state: response.status.state, input_file_bytes: stats&.input_file_bytes, input_files: stats&.input_files, output_bytes: stats&.output_bytes, output_rows: stats&.output_rows, bad_records: stats&.bad_records, duration: duration.round(2), project_id: project, dataset: dataset, table: table_id
233
222
  @num_errors_per_chunk.delete(chunk_id_hex)
234
223
  end
235
224
 
@@ -315,6 +304,64 @@ module Fluent
315
304
  "NEWLINE_DELIMITED_JSON"
316
305
  end
317
306
  end
307
+
308
+ def time_partitioning
309
+ return @time_partitioning if instance_variable_defined?(:@time_partitioning)
310
+
311
+ if @options[:time_partitioning_type]
312
+ @time_partitioning = {
313
+ type: @options[:time_partitioning_type].to_s.upcase,
314
+ field: @options[:time_partitioning_field] ? @options[:time_partitioning_field].to_s : nil,
315
+ expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil,
316
+ }.reject { |_, v| v.nil? }
317
+ else
318
+ @time_partitioning
319
+ end
320
+ end
321
+
322
+ def require_partition_filter
323
+ return @require_partition_filter if instance_variable_defined?(:@require_partition_filter)
324
+
325
+ if @options[:require_partition_filter]
326
+ @require_partition_filter = @options[:require_partition_filter]
327
+ else
328
+ @require_partition_filter
329
+ end
330
+ end
331
+
332
+ def clustering
333
+ return @clustering if instance_variable_defined?(:@clustering)
334
+
335
+ if @options[:clustering_fields]
336
+ @clustering = {
337
+ fields: @options[:clustering_fields]
338
+ }
339
+ else
340
+ @clustering
341
+ end
342
+ end
343
+
344
+ def insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
345
+ try_count ||= 1
346
+ res = client.insert_all_table_data(project, dataset, table_id, body, {})
347
+ rescue Google::Apis::ClientError => e
348
+ if e.status_code == 404 && /Not Found: Table/i =~ e.message
349
+ if try_count == 1
350
+ # Table Not Found: Auto Create Table
351
+ create_table(project, dataset, table_id, schema)
352
+ elsif try_count > 10
353
+ raise "A new table was created but it is not found."
354
+ end
355
+
356
+ # Retry to insert several times because the created table is not visible from Streaming insert for a little while
357
+ # cf. https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
358
+ try_count += 1
359
+ sleep 5
360
+ log.debug "Retry to insert rows", project_id: project, dataset: dataset, table: table_id
361
+ retry
362
+ end
363
+ raise
364
+ end
318
365
  end
319
366
  end
320
367
  end
@@ -29,6 +29,9 @@ module Fluent
29
29
  config_param :private_key_path, :string, default: nil
30
30
  config_param :private_key_passphrase, :string, default: 'notasecret', secret: true
31
31
  config_param :json_key, default: nil, secret: true
32
+ # The geographic location of the job. Required except for US and EU.
33
+ # https://github.com/googleapis/google-api-ruby-client/blob/master/generated/google/apis/bigquery_v2/service.rb#L350
34
+ config_param :location, :string, default: nil
32
35
 
33
36
  # see as simple reference
34
37
  # https://github.com/abronte/BigQuery/blob/master/lib/bigquery.rb
@@ -67,8 +70,12 @@ module Fluent
67
70
 
68
71
  ## Partitioning
69
72
  config_param :time_partitioning_type, :enum, list: [:day], default: nil
73
+ config_param :time_partitioning_field, :string, default: nil
70
74
  config_param :time_partitioning_expiration, :time, default: nil
71
75
 
76
+ ## Clustering
77
+ config_param :clustering_fields, :array, default: nil
78
+
72
79
  ## Formatter
73
80
  config_section :format do
74
81
  config_set_default :@type, 'json'
@@ -104,9 +111,6 @@ module Fluent
104
111
  if @schema
105
112
  @table_schema.load_schema(@schema)
106
113
  end
107
- if @schema_path
108
- @table_schema.load_schema(MultiJson.load(File.read(@schema_path)))
109
- end
110
114
 
111
115
  formatter_config = conf.elements("format")[0]
112
116
  @formatter = formatter_create(usage: 'out_bigquery_for_insert', default_type: 'json', conf: formatter_config)
@@ -119,6 +123,7 @@ module Fluent
119
123
  @tables_mutex = Mutex.new
120
124
  @fetched_schemas = {}
121
125
  @last_fetch_schema_time = Hash.new(0)
126
+ @read_schemas = {}
122
127
  end
123
128
 
124
129
  def multi_workers_ready?
@@ -130,6 +135,7 @@ module Fluent
130
135
  private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
131
136
  email: @email,
132
137
  json_key: @json_key,
138
+ location: @location,
133
139
  source_format: @source_format,
134
140
  skip_invalid_rows: @skip_invalid_rows,
135
141
  ignore_unknown_values: @ignore_unknown_values,
@@ -138,7 +144,10 @@ module Fluent
138
144
  prevent_duplicate_load: @prevent_duplicate_load,
139
145
  auto_create_table: @auto_create_table,
140
146
  time_partitioning_type: @time_partitioning_type,
147
+ time_partitioning_field: @time_partitioning_field,
141
148
  time_partitioning_expiration: @time_partitioning_expiration,
149
+ require_partition_filter: @require_partition_filter,
150
+ clustering_fields: @clustering_fields,
142
151
  timeout_sec: @request_timeout_sec,
143
152
  open_timeout_sec: @request_open_timeout_sec,
144
153
  })
@@ -151,6 +160,8 @@ module Fluent
151
160
  schema =
152
161
  if @fetch_schema
153
162
  fetch_schema(meta)
163
+ elsif @schema_path
164
+ read_schema(meta)
154
165
  else
155
166
  @table_schema
156
167
  end
@@ -182,7 +193,7 @@ module Fluent
182
193
  table_schema.load_schema(schema)
183
194
  @fetched_schemas["#{project}.#{dataset}.#{table_id}"] = table_schema
184
195
  else
185
- if @fetched_schemas["#{project}.#{dataset}.#{table_id}"].empty?
196
+ if @fetched_schemas["#{project}.#{dataset}.#{table_id}"].nil?
186
197
  raise "failed to fetch schema from bigquery"
187
198
  else
188
199
  log.warn "#{table_id} uses previous schema"
@@ -199,9 +210,26 @@ module Fluent
199
210
  extract_placeholders(@fetch_schema_table || @tablelist[0], metadata)
200
211
  end
201
212
 
213
+ def read_schema(metadata)
214
+ schema_path = read_schema_target_path(metadata)
215
+
216
+ unless @read_schemas[schema_path]
217
+ table_schema = Fluent::BigQuery::RecordSchema.new("record")
218
+ table_schema.load_schema(MultiJson.load(File.read(schema_path)))
219
+ @read_schemas[schema_path] = table_schema
220
+ end
221
+ @read_schemas[schema_path]
222
+ end
223
+
224
+ def read_schema_target_path(metadata)
225
+ extract_placeholders(@schema_path, metadata)
226
+ end
227
+
202
228
  def get_schema(project, dataset, metadata)
203
229
  if @fetch_schema
204
230
  @fetched_schemas["#{project}.#{dataset}.#{fetch_schema_target_table(metadata)}"] || fetch_schema(metadata)
231
+ elsif @schema_path
232
+ @read_schemas[read_schema_target_path(metadata)] || read_schema(metadata)
205
233
  else
206
234
  @table_schema
207
235
  end
@@ -29,6 +29,9 @@ module Fluent
29
29
  # If insert_id_field is not specified, true means to allow duplicate rows
30
30
  config_param :allow_retry_insert_errors, :bool, default: false
31
31
 
32
+ ## RequirePartitionFilter
33
+ config_param :require_partition_filter, :bool, default: false
34
+
32
35
  ## Buffer
33
36
  config_section :buffer do
34
37
  config_set_default :@type, "memory"
@@ -96,14 +99,8 @@ module Fluent
96
99
  end
97
100
 
98
101
  def insert(project, dataset, table_id, rows, schema, template_suffix)
99
- writer.insert_rows(project, dataset, table_id, rows, template_suffix: template_suffix)
102
+ writer.insert_rows(project, dataset, table_id, rows, schema, template_suffix: template_suffix)
100
103
  rescue Fluent::BigQuery::Error => e
101
- if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
102
- # Table Not Found: Auto Create Table
103
- writer.create_table(project, dataset, table_id, schema)
104
- raise "table created. send rows next time."
105
- end
106
-
107
104
  raise if e.retryable?
108
105
 
109
106
  if @secondary
@@ -196,6 +196,7 @@ module Fluent
196
196
  rescue => e
197
197
  log.error("unexpected error while polling", error: e)
198
198
  log.error_backtrace
199
+ rollback_write(job_reference.chunk_id)
199
200
  end
200
201
  end
201
202
 
@@ -147,33 +147,6 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
147
147
  assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
148
148
  end
149
149
 
150
- def test_configure_auth_json_key_as_file_raise_permission_error
151
- json_key_path = 'test/plugin/testdata/json_key.json'
152
- json_key_path_dir = File.dirname(json_key_path)
153
-
154
- begin
155
- File.chmod(0000, json_key_path_dir)
156
-
157
- driver = create_driver(%[
158
- table foo
159
- auth_method json_key
160
- json_key #{json_key_path}
161
- project yourproject_id
162
- dataset yourdataset_id
163
- schema [
164
- {"name": "time", "type": "INTEGER"},
165
- {"name": "status", "type": "INTEGER"},
166
- {"name": "bytes", "type": "INTEGER"}
167
- ]
168
- ])
169
- assert_raises(Errno::EACCES) do
170
- driver.instance.writer.client
171
- end
172
- ensure
173
- File.chmod(0755, json_key_path_dir)
174
- end
175
- end
176
-
177
150
  def test_configure_auth_json_key_as_string
178
151
  json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
179
152
  json_key_io = StringIO.new(json_key)
@@ -199,6 +172,8 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
199
172
  end
200
173
 
201
174
  def test_configure_auth_application_default
175
+ omit "This testcase depends on some environment variables." if ENV["CI"] == "true"
176
+
202
177
  driver = create_driver(%[
203
178
  table foo
204
179
  auth_method application_default
@@ -576,4 +551,24 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
576
551
  assert_equal :string, table_schema["argv"].type
577
552
  assert_equal :repeated, table_schema["argv"].mode
578
553
  end
554
+
555
+ def test_resolve_schema_path_with_placeholder
556
+ now = Time.now.to_i
557
+ driver = create_driver(<<-CONFIG)
558
+ table ${tag}_%Y%m%d
559
+ auth_method json_key
560
+ json_key jsonkey.josn
561
+ project yourproject_id
562
+ dataset yourdataset_id
563
+ schema_path ${tag}.schema
564
+
565
+ <buffer tag, time>
566
+ timekey 1d
567
+ </buffer>
568
+ CONFIG
569
+
570
+ metadata = Fluent::Plugin::Buffer::Metadata.new(now, "foo", {})
571
+
572
+ assert_equal "foo.schema", driver.instance.read_schema_target_path(metadata)
573
+ end
579
574
  end
@@ -5,6 +5,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
5
5
  Fluent::Test.setup
6
6
  end
7
7
 
8
+ SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "apache.schema")
9
+
8
10
  CONFIG = %[
9
11
  table foo
10
12
  email foo@bar.example
@@ -121,7 +123,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
121
123
  driver = create_driver
122
124
 
123
125
  stub_writer do |writer|
124
- mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: hash_including(entry)}], template_suffix: nil)
125
126
  mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
126
127
  rows: [{json: hash_including(entry)}],
127
128
  skip_invalid_rows: false,
@@ -261,7 +262,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
261
262
 
262
263
  driver.instance_start
263
264
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
264
- metadata = driver.instance.metadata_for_test(tag, time, record)
265
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
265
266
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
266
267
  c.append([driver.instance.format(tag, time, record)])
267
268
  end
@@ -345,11 +346,27 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
345
346
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
346
347
  CONFIG
347
348
 
349
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
350
+
348
351
  stub_writer do |writer|
349
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}], template_suffix: nil) do
350
- raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
351
- end
352
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
352
+ body = {
353
+ rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
354
+ skip_invalid_rows: false,
355
+ ignore_unknown_values: false,
356
+ }
357
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
358
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
359
+ end.at_least(1)
360
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
361
+
362
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
363
+ table_reference: {
364
+ table_id: 'foo',
365
+ },
366
+ schema: {
367
+ fields: schema_fields,
368
+ },
369
+ }, {})
353
370
  end
354
371
 
355
372
  assert_raise(RuntimeError) do
@@ -401,14 +418,131 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
401
418
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
402
419
 
403
420
  time_partitioning_type day
421
+ time_partitioning_field time
404
422
  time_partitioning_expiration 1h
423
+
424
+ require_partition_filter true
405
425
  CONFIG
406
426
 
427
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
428
+
407
429
  stub_writer do |writer|
408
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) do
409
- raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
430
+ body = {
431
+ rows: [message],
432
+ skip_invalid_rows: false,
433
+ ignore_unknown_values: false,
434
+ }
435
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
436
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
437
+ end.at_least(1)
438
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
439
+
440
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
441
+ table_reference: {
442
+ table_id: 'foo',
443
+ },
444
+ schema: {
445
+ fields: schema_fields,
446
+ },
447
+ time_partitioning: {
448
+ type: 'DAY',
449
+ field: 'time',
450
+ expiration_ms: 3600000,
451
+ },
452
+ require_partition_filter: true,
453
+ }, {})
454
+ end
455
+
456
+ assert_raise(RuntimeError) do
457
+ driver.run do
458
+ driver.feed("tag", Fluent::EventTime.now, message[:json])
410
459
  end
411
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
460
+ end
461
+ end
462
+
463
+ def test_auto_create_clustered_table_by_bigquery_api
464
+ now = Time.now
465
+ message = {
466
+ json: {
467
+ time: now.to_i,
468
+ request: {
469
+ vhost: "bar",
470
+ path: "/path/to/baz",
471
+ method: "GET",
472
+ protocol: "HTTP/1.0",
473
+ agent: "libwww",
474
+ referer: "http://referer.example",
475
+ time: (now - 1).to_f,
476
+ bot_access: true,
477
+ loginsession: false,
478
+ },
479
+ remote: {
480
+ host: "remote.example",
481
+ ip: "192.168.1.1",
482
+ user: "nagachika",
483
+ },
484
+ response: {
485
+ status: 200,
486
+ bytes: 72,
487
+ },
488
+ }
489
+ }
490
+
491
+ driver = create_driver(<<-CONFIG)
492
+ table foo
493
+ email foo@bar.example
494
+ private_key_path /path/to/key
495
+ project yourproject_id
496
+ dataset yourdataset_id
497
+
498
+ time_format %s
499
+ time_field time
500
+
501
+ auto_create_table true
502
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
503
+
504
+ time_partitioning_type day
505
+ time_partitioning_field time
506
+ time_partitioning_expiration 1h
507
+
508
+ clustering_fields [
509
+ "time",
510
+ "vhost"
511
+ ]
512
+ CONFIG
513
+
514
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
515
+
516
+ stub_writer do |writer|
517
+ body = {
518
+ rows: [message],
519
+ skip_invalid_rows: false,
520
+ ignore_unknown_values: false,
521
+ }
522
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
523
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
524
+ end.at_least(1)
525
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
526
+
527
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
528
+ table_reference: {
529
+ table_id: 'foo',
530
+ },
531
+ schema: {
532
+ fields: schema_fields,
533
+ },
534
+ time_partitioning: {
535
+ type: 'DAY',
536
+ field: 'time',
537
+ expiration_ms: 3600000,
538
+ },
539
+ clustering: {
540
+ fields: [
541
+ 'time',
542
+ 'vhost',
543
+ ],
544
+ },
545
+ }, {})
412
546
  end
413
547
 
414
548
  assert_raise(RuntimeError) do
@@ -39,10 +39,8 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
39
39
  writer
40
40
  end
41
41
  end
42
-
43
- def test_write
44
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
45
42
 
43
+ def test_write
46
44
  response_stub = stub!
47
45
 
48
46
  driver = create_driver
@@ -60,9 +58,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
60
58
  dataset_id: 'yourdataset_id',
61
59
  table_id: 'foo',
62
60
  },
63
- schema: {
64
- fields: schema_fields,
65
- },
66
61
  write_disposition: "WRITE_APPEND",
67
62
  source_format: "NEWLINE_DELIMITED_JSON",
68
63
  ignore_unknown_values: false,
@@ -99,7 +94,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
99
94
  schema_path #{SCHEMA_PATH}
100
95
  prevent_duplicate_load true
101
96
  CONFIG
102
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
103
97
 
104
98
  response_stub = stub!
105
99
  stub_writer do |writer|
@@ -116,9 +110,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
116
110
  dataset_id: 'yourdataset_id',
117
111
  table_id: 'foo',
118
112
  },
119
- schema: {
120
- fields: schema_fields,
121
- },
122
113
  write_disposition: "WRITE_APPEND",
123
114
  source_format: "NEWLINE_DELIMITED_JSON",
124
115
  ignore_unknown_values: false,
@@ -138,11 +129,10 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
138
129
 
139
130
  def test_write_with_retryable_error
140
131
  driver = create_driver
141
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
142
132
 
143
133
  driver.instance_start
144
134
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
145
- metadata = driver.instance.metadata_for_test(tag, time, record)
135
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
146
136
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
147
137
  c.append([driver.instance.format(tag, time, record)])
148
138
  end
@@ -158,9 +148,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
158
148
  dataset_id: 'yourdataset_id',
159
149
  table_id: 'foo',
160
150
  },
161
- schema: {
162
- fields: schema_fields,
163
- },
164
151
  write_disposition: "WRITE_APPEND",
165
152
  source_format: "NEWLINE_DELIMITED_JSON",
166
153
  ignore_unknown_values: false,
@@ -171,7 +158,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
171
158
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
172
159
  end
173
160
 
174
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id') do
161
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
175
162
  stub! do |s|
176
163
  s.id { 'dummy_job_id' }
177
164
  s.configuration.stub! do |_s|
@@ -225,11 +212,10 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
225
212
  utc
226
213
  </secondary>
227
214
  CONFIG
228
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
229
215
 
230
216
  driver.instance_start
231
217
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
232
- metadata = driver.instance.metadata_for_test(tag, time, record)
218
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
233
219
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
234
220
  c.append([driver.instance.format(tag, time, record)])
235
221
  end
@@ -245,9 +231,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
245
231
  dataset_id: 'yourdataset_id',
246
232
  table_id: 'foo',
247
233
  },
248
- schema: {
249
- fields: schema_fields,
250
- },
251
234
  write_disposition: "WRITE_APPEND",
252
235
  source_format: "NEWLINE_DELIMITED_JSON",
253
236
  ignore_unknown_values: false,
@@ -258,7 +241,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
258
241
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
259
242
  end
260
243
 
261
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id') do
244
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
262
245
  stub! do |s|
263
246
  s.id { 'dummy_job_id' }
264
247
  s.configuration.stub! do |_s|
@@ -289,6 +272,61 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
289
272
  driver.instance_shutdown
290
273
  end
291
274
 
275
+ def test_write_with_auto_create_table
276
+ driver = create_driver(<<-CONFIG)
277
+ table foo
278
+ email foo@bar.example
279
+ private_key_path /path/to/key
280
+ project yourproject_id
281
+ dataset yourdataset_id
282
+
283
+ <buffer>
284
+ @type memory
285
+ </buffer>
286
+
287
+ <inject>
288
+ time_format %s
289
+ time_key time
290
+ </inject>
291
+
292
+ auto_create_table true
293
+ schema_path #{SCHEMA_PATH}
294
+ CONFIG
295
+
296
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
297
+
298
+ stub_writer do |writer|
299
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') do
300
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
301
+ end
302
+
303
+ mock(writer.client).insert_job('yourproject_id', {
304
+ configuration: {
305
+ load: {
306
+ destination_table: {
307
+ project_id: 'yourproject_id',
308
+ dataset_id: 'yourdataset_id',
309
+ table_id: 'foo',
310
+ },
311
+ write_disposition: "WRITE_APPEND",
312
+ source_format: "NEWLINE_DELIMITED_JSON",
313
+ ignore_unknown_values: false,
314
+ max_bad_records: 0,
315
+ schema: {
316
+ fields: schema_fields,
317
+ },
318
+ }
319
+ }
320
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
321
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
322
+ end
323
+ end
324
+
325
+ driver.run do
326
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
327
+ end
328
+ end
329
+
292
330
  private
293
331
 
294
332
  def create_response_stub(response)
@@ -27,6 +27,11 @@ class RecordSchemaTest < Test::Unit::TestCase
27
27
  "name" => "argv",
28
28
  "type" => "STRING",
29
29
  "mode" => "REPEATED"
30
+ },
31
+ {
32
+ "name" => "utilisation",
33
+ "type" => "NUMERIC",
34
+ "mode" => "NULLABLE"
30
35
  }
31
36
  ]
32
37
  end
@@ -58,6 +63,11 @@ class RecordSchemaTest < Test::Unit::TestCase
58
63
  "type" => "STRING",
59
64
  "mode" => "REPEATED"
60
65
  },
66
+ {
67
+ "name" => "utilisation",
68
+ "type" => "NUMERIC",
69
+ "mode" => "NULLABLE"
70
+ },
61
71
  {
62
72
  "name" => "new_column",
63
73
  "type" => "STRING",
@@ -93,6 +103,11 @@ class RecordSchemaTest < Test::Unit::TestCase
93
103
  "type" => "STRING",
94
104
  "mode" => "REPEATED"
95
105
  },
106
+ {
107
+ "name" => "utilisation",
108
+ "type" => "NUMERIC",
109
+ "mode" => "NULLABLE"
110
+ }
96
111
  ]
97
112
  end
98
113
 
@@ -142,12 +157,12 @@ class RecordSchemaTest < Test::Unit::TestCase
142
157
  time = Time.local(2016, 2, 7, 19, 0, 0).utc
143
158
 
144
159
  formatted = fields.format_one({
145
- "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42]
160
+ "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilisation" => "0.837"
146
161
  })
147
162
  assert_equal(
148
163
  formatted,
149
164
  {
150
- "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"]
165
+ "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilisation" => "0.837"
151
166
  }
152
167
  )
153
168
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0.beta
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-03-29 00:00:00.000000000 Z
12
+ date: 2022-02-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -139,8 +139,9 @@ extensions: []
139
139
  extra_rdoc_files: []
140
140
  files:
141
141
  - ".github/ISSUE_TEMPLATE.md"
142
+ - ".github/workflows/linux.yml"
143
+ - ".github/workflows/windows.yml"
142
144
  - ".gitignore"
143
- - ".travis.yml"
144
145
  - Gemfile
145
146
  - LICENSE.txt
146
147
  - README.md
@@ -179,12 +180,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
179
180
  version: '0'
180
181
  required_rubygems_version: !ruby/object:Gem::Requirement
181
182
  requirements:
182
- - - ">"
183
+ - - ">="
183
184
  - !ruby/object:Gem::Version
184
- version: 1.3.1
185
+ version: '0'
185
186
  requirements: []
186
- rubyforge_project:
187
- rubygems_version: 2.6.12
187
+ rubygems_version: 3.1.4
188
188
  signing_key:
189
189
  specification_version: 4
190
190
  summary: Fluentd plugin to store data on Google BigQuery
data/.travis.yml DELETED
@@ -1,15 +0,0 @@
1
- language: ruby
2
-
3
- rvm:
4
- - 2.2
5
- - 2.3.3
6
- - 2.4.3
7
- - 2.5.0
8
-
9
- gemfile:
10
- - Gemfile
11
-
12
- before_install:
13
- - gem update bundler
14
-
15
- script: bundle exec rake test