fluent-plugin-bigquery 2.0.0.beta → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: '07998acf05ddb3e647da13a4b5c734dc16f8cc77'
4
- data.tar.gz: 1fce9fc906cbf72083a4f8132c0ac1d985a95d6d
2
+ SHA256:
3
+ metadata.gz: 4209a2b6eaaf0b6f8ba315b6f5de6690e28fb47890aeea777bdb31889e4785ab
4
+ data.tar.gz: b0983fb4fa16d72059b0e679ea4ee627d19e805779fa010888fa1723354896a5
5
5
  SHA512:
6
- metadata.gz: 04cfd6d3080d9424e25bd75ae1a9600259fe94ed933adceab66c02eb11afdb49eeddc393c305f0927dd64f967d1e72835fde9566cd54b2e53805e85ffe7a1516
7
- data.tar.gz: 8de74527cf12be2c6553e4a582cc25c47a1773cdc165800f212aae563f7ffa048679260515a51f55e244b641b968badeaa4349cf4369ad2363d22aff1c1cbe7d
6
+ metadata.gz: a6fc6891eda12bbc1272af7af9c4e8d48e588bc7ef65153b3a7524e39468baebb8fdb925856d1850bbda12fed5d33865faa56542503f76fdf724a18937c7d56e
7
+ data.tar.gz: fff0599b6a838cb4ff233ba9585b558ff733eed8063c1cf36ee08aaacb9b3c2ca1bce4d13db2a51ecc72c398ba751a18b2856a6348f43738ee8ca366becdea61
@@ -0,0 +1,31 @@
1
+ name: Testing on Ubuntu
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby:
12
+ - 2.6
13
+ - 2.7
14
+ - 3.0
15
+ - 3.1
16
+ os:
17
+ - ubuntu-latest
18
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
19
+ steps:
20
+ - uses: actions/checkout@v2
21
+ - uses: ruby/setup-ruby@v1
22
+ with:
23
+ ruby-version: ${{ matrix.ruby }}
24
+ - name: unit testing
25
+ env:
26
+ CI: true
27
+ run: |
28
+ ruby -v
29
+ gem install bundler rake
30
+ bundle install --jobs 4 --retry 3
31
+ bundle exec rake test
@@ -0,0 +1,27 @@
1
+ name: Testing on Windows
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby: [ '2.6', '2.7', '3.0', '3.1' ]
12
+ os:
13
+ - windows-latest
14
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: ${{ matrix.ruby }}
20
+ - name: unit testing
21
+ env:
22
+ CI: true
23
+ run: |
24
+ ruby -v
25
+ gem install bundler rake
26
+ bundle install --jobs 4 --retry 3
27
+ bundle exec rake test
data/README.md CHANGED
@@ -1,6 +1,12 @@
1
1
  # fluent-plugin-bigquery
2
2
 
3
- **This README is for v2.0.0.beta. but it is not released yet. sorry.**
3
+ ## Notice
4
+
5
+ We will transfer fluent-plugin-bigquery repository to [fluent-plugins-nursery](https://github.com/fluent-plugins-nursery) organization.
6
+ It does not change maintenance plan.
7
+ The main purpose is that it solves mismatch between maintainers and current organization.
8
+
9
+ ---
4
10
 
5
11
  [Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
6
12
 
@@ -18,11 +24,13 @@
18
24
  Current version of this plugin supports Google API with Service Account Authentication, but does not support
19
25
  OAuth flow for installed applications.
20
26
 
21
- ## Version Information
22
- v1.0.0 or later supports fluentd-0.14.0 or later.
23
- If you use fluentd-0.12.x, please use v0.4.x.
27
+ ## Support Version
24
28
 
25
- I recommend to update fluentd version to v0.14.x or later.
29
+ | plugin version | fluentd version | ruby version |
30
+ | :----------- | :----------- | :----------- |
31
+ | v0.4.x | 0.12.x | 2.0 or later |
32
+ | v1.x.x | 0.14.x or later | 2.2 or later |
33
+ | v2.x.x | 0.14.x or later | 2.3 or later |
26
34
 
27
35
  ## With docker image
28
36
  If you use official alpine based fluentd docker image (https://github.com/fluent/fluentd-docker-image),
@@ -37,28 +45,31 @@ Because embbeded gem dependency sometimes restricts ruby environment.
37
45
 
38
46
  #### common
39
47
 
40
- | name | type | required? | placeholder? | default | description |
41
- | :------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
42
- | auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
43
- | email | string | yes (private_key) | no | nil | GCP Service Account Email |
44
- | private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
45
- | private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
46
- | json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
47
- | project | string | yes | yes | nil | |
48
- | dataset | string | yes | yes | nil | |
49
- | table | string | yes (either `tables`) | yes | nil | |
50
- | tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
51
- | auto_create_table | bool | no | no | false | If true, creates table automatically |
52
- | ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
53
- | schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
54
- | schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
55
- | fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
56
- | fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
57
- | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
58
- | request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
59
- | request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
60
- | time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature(experimental feature on BigQuery). |
61
- | time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. (experimental feature on BigQuery) |
48
+ | name | type | required? | placeholder? | default | description |
49
+ | :-------------------------------------------- | :------------ | :----------- | :---------- | :------------------------- | :----------------------- |
50
+ | auth_method | enum | yes | no | private_key | `private_key` or `json_key` or `compute_engine` or `application_default` |
51
+ | email | string | yes (private_key) | no | nil | GCP Service Account Email |
52
+ | private_key_path | string | yes (private_key) | no | nil | GCP Private Key file path |
53
+ | private_key_passphrase | string | yes (private_key) | no | nil | GCP Private Key Passphrase |
54
+ | json_key | string | yes (json_key) | no | nil | GCP JSON Key file path or JSON Key string |
55
+ | location | string | no | no | nil | BigQuery Data Location. The geographic location of the job. Required except for US and EU. |
56
+ | project | string | yes | yes | nil | |
57
+ | dataset | string | yes | yes | nil | |
58
+ | table | string | yes (either `tables`) | yes | nil | |
59
+ | tables | array(string) | yes (either `table`) | yes | nil | can set multi table names splitted by `,` |
60
+ | auto_create_table | bool | no | no | false | If true, creates table automatically |
61
+ | ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
62
+ | schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
63
+ | schema_path | string | yes (either `fetch_schema`) | yes | nil | Schema Definition file path. It is formatted by JSON. |
64
+ | fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
65
+ | fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
66
+ | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
67
+ | request_timeout_sec | integer | no | no | nil | Bigquery API response timeout |
68
+ | request_open_timeout_sec | integer | no | no | 60 | Bigquery API connection, and request timeout. If you send big data to Bigquery, set large value. |
69
+ | time_partitioning_type | enum | no (either day) | no | nil | Type of bigquery time partitioning feature. |
70
+ | time_partitioning_field | string | no | no | nil | Field used to determine how to create a time-based partition. |
71
+ | time_partitioning_expiration | time | no | no | nil | Expiration milliseconds for bigquery time partitioning. |
72
+ | clustering_fields | array(string) | no | no | nil | One or more fields on which data should be clustered. The order of the specified columns determines the sort order of the data. |
62
73
 
63
74
  #### bigquery_insert
64
75
 
@@ -69,6 +80,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
69
80
  | insert_id_field | string | no | no | nil | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor |
70
81
  | add_insert_timestamp | string | no | no | nil | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
71
82
  | allow_retry_insert_errors | bool | no | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
83
+ | require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. |
72
84
 
73
85
  #### bigquery_load
74
86
 
@@ -431,7 +443,7 @@ Use placeholder.
431
443
 
432
444
  ```apache
433
445
  <match dummy>
434
- @type bigquery_insert
446
+ @type bigquery_load
435
447
 
436
448
  ...
437
449
  table accesslog$%Y%m%d
@@ -444,6 +456,8 @@ Use placeholder.
444
456
  ```
445
457
 
446
458
  But, Dynamic table creating doesn't support date partitioned table yet.
459
+ And streaming insert is not allowed to insert with `$%Y%m%d` suffix.
460
+ If you use date partitioned table with streaming insert, Please omit `$%Y%m%d` suffix from `table`.
447
461
 
448
462
  ### Dynamic table creating
449
463
 
@@ -465,6 +479,8 @@ NOTE: `auto_create_table` option cannot be used with `fetch_schema`. You should
465
479
  </match>
466
480
  ```
467
481
 
482
+ Also, you can create clustered table by using `clustering_fields`.
483
+
468
484
  ### Table schema
469
485
 
470
486
  There are three methods to describe the schema of the target table.
@@ -7,10 +7,9 @@ module Fluent
7
7
  RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
8
8
 
9
9
  class << self
10
- def wrap(google_api_error, message = nil, force_unretryable: false)
11
- e = google_api_error
12
- return UnRetryableError.new(message, e) if force_unretryable
13
-
10
+ # @param e [Google::Apis::Error]
11
+ # @param message [String]
12
+ def wrap(e, message = nil)
14
13
  if retryable_error?(e)
15
14
  RetryableError.new(message, e)
16
15
  else
@@ -18,12 +17,9 @@ module Fluent
18
17
  end
19
18
  end
20
19
 
21
- def retryable_error?(google_api_error)
22
- e = google_api_error
23
- reason = e.respond_to?(:reason) ? e.reason : nil
24
-
25
- retryable_error_reason?(reason) ||
26
- (e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code))
20
+ # @param e [Google::Apis::Error]
21
+ def retryable_error?(e)
22
+ e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code)
27
23
  end
28
24
 
29
25
  def retryable_error_reason?(reason)
@@ -86,6 +86,16 @@ module Fluent
86
86
  end
87
87
  end
88
88
 
89
+ class NumericFieldSchema < FieldSchema
90
+ def type
91
+ :numeric
92
+ end
93
+
94
+ def format_one(value)
95
+ value.to_s
96
+ end
97
+ end
98
+
89
99
  class BooleanFieldSchema < FieldSchema
90
100
  def type
91
101
  :boolean
@@ -169,6 +179,7 @@ module Fluent
169
179
  string: StringFieldSchema,
170
180
  integer: IntegerFieldSchema,
171
181
  float: FloatFieldSchema,
182
+ numeric: NumericFieldSchema,
172
183
  boolean: BooleanFieldSchema,
173
184
  timestamp: TimestampFieldSchema,
174
185
  date: DateFieldSchema,
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "2.0.0.beta".freeze
3
+ VERSION = "2.3.0".freeze
4
4
  end
5
5
  end
@@ -34,12 +34,9 @@ module Fluent
34
34
  }
35
35
  }
36
36
 
37
- if @options[:time_partitioning_type]
38
- definition[:time_partitioning] = {
39
- type: @options[:time_partitioning_type].to_s.upcase,
40
- expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil
41
- }.select { |_, value| !value.nil? }
42
- end
37
+ definition.merge!(time_partitioning: time_partitioning) if time_partitioning
38
+ definition.merge!(require_partition_filter: require_partition_filter) if require_partition_filter
39
+ definition.merge!(clustering: clustering) if clustering
43
40
  client.insert_table(project, dataset, definition, {})
44
41
  log.debug "create table", project_id: project, dataset: dataset, table: table_id
45
42
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
@@ -50,10 +47,9 @@ module Fluent
50
47
  return
51
48
  end
52
49
 
53
- reason = e.respond_to?(:reason) ? e.reason : nil
54
- log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message, reason: reason
50
+ log.error "tables.insert API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: message
55
51
 
56
- if Fluent::BigQuery::Error.retryable_error_reason?(reason) && create_table_retry_count < create_table_retry_limit
52
+ if create_table_retry_count < create_table_retry_limit
57
53
  sleep create_table_retry_wait
58
54
  create_table_retry_wait *= 2
59
55
  create_table_retry_count += 1
@@ -76,14 +72,19 @@ module Fluent
76
72
  nil
77
73
  end
78
74
 
79
- def insert_rows(project, dataset, table_id, rows, template_suffix: nil)
75
+ def insert_rows(project, dataset, table_id, rows, schema, template_suffix: nil)
80
76
  body = {
81
77
  rows: rows,
82
78
  skip_invalid_rows: @options[:skip_invalid_rows],
83
79
  ignore_unknown_values: @options[:ignore_unknown_values],
84
80
  }
85
81
  body.merge!(template_suffix: template_suffix) if template_suffix
86
- res = client.insert_all_table_data(project, dataset, table_id, body, {})
82
+
83
+ if @options[:auto_create_table]
84
+ res = insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
85
+ else
86
+ res = client.insert_all_table_data(project, dataset, table_id, body, {})
87
+ end
87
88
  log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
88
89
 
89
90
  if res.insert_errors && !res.insert_errors.empty?
@@ -100,8 +101,7 @@ module Fluent
100
101
  end
101
102
  end
102
103
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
103
- reason = e.respond_to?(:reason) ? e.reason : nil
104
- error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason }
104
+ error_data = { project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message }
105
105
  wrapped = Fluent::BigQuery::Error.wrap(e)
106
106
  if wrapped.retryable?
107
107
  log.warn "tabledata.insertAll API", error_data
@@ -131,9 +131,6 @@ module Fluent
131
131
  dataset_id: dataset,
132
132
  table_id: table_id,
133
133
  },
134
- schema: {
135
- fields: fields.to_a,
136
- },
137
134
  write_disposition: "WRITE_APPEND",
138
135
  source_format: source_format,
139
136
  ignore_unknown_values: @options[:ignore_unknown_values],
@@ -143,17 +140,19 @@ module Fluent
143
140
  }
144
141
 
145
142
  job_id = create_job_id(chunk_id_hex, dataset, table_id, fields.to_a) if @options[:prevent_duplicate_load]
146
- configuration[:configuration][:load].merge!(create_disposition: "CREATE_NEVER") if @options[:time_partitioning_type]
147
143
  configuration.merge!({job_reference: {project_id: project, job_id: job_id}}) if job_id
148
144
 
149
- # If target table is already exist, omit schema configuration.
150
- # Because schema changing is easier.
151
145
  begin
152
- if client.get_table(project, dataset, table_id)
153
- configuration[:configuration][:load].delete(:schema)
146
+ # Check table existance
147
+ client.get_table(project, dataset, table_id)
148
+ rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
149
+ if e.status_code == 404 && /Not Found: Table/i =~ e.message
150
+ raise Fluent::BigQuery::UnRetryableError.new("Table is not found") unless @options[:auto_create_table]
151
+ raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
152
+ configuration[:configuration][:load].merge!(schema: {fields: fields.to_a})
153
+ configuration[:configuration][:load].merge!(time_partitioning: time_partitioning) if time_partitioning
154
+ configuration[:configuration][:load].merge!(clustering: clustering) if clustering
154
155
  end
155
- rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError
156
- raise Fluent::BigQuery::UnRetryableError.new("Schema is empty") if fields.empty?
157
156
  end
158
157
 
159
158
  res = client.insert_job(
@@ -166,19 +165,7 @@ module Fluent
166
165
  )
167
166
  JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
168
167
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
169
- reason = e.respond_to?(:reason) ? e.reason : nil
170
- log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message, reason: reason
171
-
172
- if @options[:auto_create_table] && e.status_code == 404 && /Not Found: Table/i =~ e.message
173
- # Table Not Found: Auto Create Table
174
- create_table(
175
- project,
176
- dataset,
177
- table_id,
178
- fields,
179
- )
180
- raise "table created. send rows next time."
181
- end
168
+ log.error "job.load API", project_id: project, dataset: dataset, table: table_id, code: e.status_code, message: e.message
182
169
 
183
170
  if job_id && e.status_code == 409 && e.message =~ /Job/ # duplicate load job
184
171
  return JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, job_id)
@@ -190,8 +177,9 @@ module Fluent
190
177
  def fetch_load_job(job_reference)
191
178
  project = job_reference.project_id
192
179
  job_id = job_reference.job_id
180
+ location = @options[:location]
193
181
 
194
- res = client.get_job(project, job_id)
182
+ res = client.get_job(project, job_id, location: location)
195
183
  log.debug "load job fetched", id: job_id, state: res.status.state, **job_reference.as_hash(:project_id, :dataset_id, :table_id)
196
184
 
197
185
  if res.status.state == "DONE"
@@ -227,9 +215,10 @@ module Fluent
227
215
  end
228
216
  end
229
217
 
218
+ # `stats` can be nil if we receive a warning like "Warning: Load job succeeded with data imported, however statistics may be lost due to internal error."
230
219
  stats = response.statistics.load
231
220
  duration = (response.statistics.end_time - response.statistics.creation_time) / 1000.0
232
- log.debug "load job finished", id: job_id, state: response.status.state, input_file_bytes: stats.input_file_bytes, input_files: stats.input_files, output_bytes: stats.output_bytes, output_rows: stats.output_rows, bad_records: stats.bad_records, duration: duration.round(2), project_id: project, dataset: dataset, table: table_id
221
+ log.debug "load job finished", id: job_id, state: response.status.state, input_file_bytes: stats&.input_file_bytes, input_files: stats&.input_files, output_bytes: stats&.output_bytes, output_rows: stats&.output_rows, bad_records: stats&.bad_records, duration: duration.round(2), project_id: project, dataset: dataset, table: table_id
233
222
  @num_errors_per_chunk.delete(chunk_id_hex)
234
223
  end
235
224
 
@@ -315,6 +304,64 @@ module Fluent
315
304
  "NEWLINE_DELIMITED_JSON"
316
305
  end
317
306
  end
307
+
308
+ def time_partitioning
309
+ return @time_partitioning if instance_variable_defined?(:@time_partitioning)
310
+
311
+ if @options[:time_partitioning_type]
312
+ @time_partitioning = {
313
+ type: @options[:time_partitioning_type].to_s.upcase,
314
+ field: @options[:time_partitioning_field] ? @options[:time_partitioning_field].to_s : nil,
315
+ expiration_ms: @options[:time_partitioning_expiration] ? @options[:time_partitioning_expiration] * 1000 : nil,
316
+ }.reject { |_, v| v.nil? }
317
+ else
318
+ @time_partitioning
319
+ end
320
+ end
321
+
322
+ def require_partition_filter
323
+ return @require_partition_filter if instance_variable_defined?(:@require_partition_filter)
324
+
325
+ if @options[:require_partition_filter]
326
+ @require_partition_filter = @options[:require_partition_filter]
327
+ else
328
+ @require_partition_filter
329
+ end
330
+ end
331
+
332
+ def clustering
333
+ return @clustering if instance_variable_defined?(:@clustering)
334
+
335
+ if @options[:clustering_fields]
336
+ @clustering = {
337
+ fields: @options[:clustering_fields]
338
+ }
339
+ else
340
+ @clustering
341
+ end
342
+ end
343
+
344
+ def insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
345
+ try_count ||= 1
346
+ res = client.insert_all_table_data(project, dataset, table_id, body, {})
347
+ rescue Google::Apis::ClientError => e
348
+ if e.status_code == 404 && /Not Found: Table/i =~ e.message
349
+ if try_count == 1
350
+ # Table Not Found: Auto Create Table
351
+ create_table(project, dataset, table_id, schema)
352
+ elsif try_count > 10
353
+ raise "A new table was created but it is not found."
354
+ end
355
+
356
+ # Retry to insert several times because the created table is not visible from Streaming insert for a little while
357
+ # cf. https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
358
+ try_count += 1
359
+ sleep 5
360
+ log.debug "Retry to insert rows", project_id: project, dataset: dataset, table: table_id
361
+ retry
362
+ end
363
+ raise
364
+ end
318
365
  end
319
366
  end
320
367
  end
@@ -29,6 +29,9 @@ module Fluent
29
29
  config_param :private_key_path, :string, default: nil
30
30
  config_param :private_key_passphrase, :string, default: 'notasecret', secret: true
31
31
  config_param :json_key, default: nil, secret: true
32
+ # The geographic location of the job. Required except for US and EU.
33
+ # https://github.com/googleapis/google-api-ruby-client/blob/master/generated/google/apis/bigquery_v2/service.rb#L350
34
+ config_param :location, :string, default: nil
32
35
 
33
36
  # see as simple reference
34
37
  # https://github.com/abronte/BigQuery/blob/master/lib/bigquery.rb
@@ -67,8 +70,12 @@ module Fluent
67
70
 
68
71
  ## Partitioning
69
72
  config_param :time_partitioning_type, :enum, list: [:day], default: nil
73
+ config_param :time_partitioning_field, :string, default: nil
70
74
  config_param :time_partitioning_expiration, :time, default: nil
71
75
 
76
+ ## Clustering
77
+ config_param :clustering_fields, :array, default: nil
78
+
72
79
  ## Formatter
73
80
  config_section :format do
74
81
  config_set_default :@type, 'json'
@@ -104,9 +111,6 @@ module Fluent
104
111
  if @schema
105
112
  @table_schema.load_schema(@schema)
106
113
  end
107
- if @schema_path
108
- @table_schema.load_schema(MultiJson.load(File.read(@schema_path)))
109
- end
110
114
 
111
115
  formatter_config = conf.elements("format")[0]
112
116
  @formatter = formatter_create(usage: 'out_bigquery_for_insert', default_type: 'json', conf: formatter_config)
@@ -119,6 +123,7 @@ module Fluent
119
123
  @tables_mutex = Mutex.new
120
124
  @fetched_schemas = {}
121
125
  @last_fetch_schema_time = Hash.new(0)
126
+ @read_schemas = {}
122
127
  end
123
128
 
124
129
  def multi_workers_ready?
@@ -130,6 +135,7 @@ module Fluent
130
135
  private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
131
136
  email: @email,
132
137
  json_key: @json_key,
138
+ location: @location,
133
139
  source_format: @source_format,
134
140
  skip_invalid_rows: @skip_invalid_rows,
135
141
  ignore_unknown_values: @ignore_unknown_values,
@@ -138,7 +144,10 @@ module Fluent
138
144
  prevent_duplicate_load: @prevent_duplicate_load,
139
145
  auto_create_table: @auto_create_table,
140
146
  time_partitioning_type: @time_partitioning_type,
147
+ time_partitioning_field: @time_partitioning_field,
141
148
  time_partitioning_expiration: @time_partitioning_expiration,
149
+ require_partition_filter: @require_partition_filter,
150
+ clustering_fields: @clustering_fields,
142
151
  timeout_sec: @request_timeout_sec,
143
152
  open_timeout_sec: @request_open_timeout_sec,
144
153
  })
@@ -151,6 +160,8 @@ module Fluent
151
160
  schema =
152
161
  if @fetch_schema
153
162
  fetch_schema(meta)
163
+ elsif @schema_path
164
+ read_schema(meta)
154
165
  else
155
166
  @table_schema
156
167
  end
@@ -182,7 +193,7 @@ module Fluent
182
193
  table_schema.load_schema(schema)
183
194
  @fetched_schemas["#{project}.#{dataset}.#{table_id}"] = table_schema
184
195
  else
185
- if @fetched_schemas["#{project}.#{dataset}.#{table_id}"].empty?
196
+ if @fetched_schemas["#{project}.#{dataset}.#{table_id}"].nil?
186
197
  raise "failed to fetch schema from bigquery"
187
198
  else
188
199
  log.warn "#{table_id} uses previous schema"
@@ -199,9 +210,26 @@ module Fluent
199
210
  extract_placeholders(@fetch_schema_table || @tablelist[0], metadata)
200
211
  end
201
212
 
213
+ def read_schema(metadata)
214
+ schema_path = read_schema_target_path(metadata)
215
+
216
+ unless @read_schemas[schema_path]
217
+ table_schema = Fluent::BigQuery::RecordSchema.new("record")
218
+ table_schema.load_schema(MultiJson.load(File.read(schema_path)))
219
+ @read_schemas[schema_path] = table_schema
220
+ end
221
+ @read_schemas[schema_path]
222
+ end
223
+
224
+ def read_schema_target_path(metadata)
225
+ extract_placeholders(@schema_path, metadata)
226
+ end
227
+
202
228
  def get_schema(project, dataset, metadata)
203
229
  if @fetch_schema
204
230
  @fetched_schemas["#{project}.#{dataset}.#{fetch_schema_target_table(metadata)}"] || fetch_schema(metadata)
231
+ elsif @schema_path
232
+ @read_schemas[read_schema_target_path(metadata)] || read_schema(metadata)
205
233
  else
206
234
  @table_schema
207
235
  end
@@ -29,6 +29,9 @@ module Fluent
29
29
  # If insert_id_field is not specified, true means to allow duplicate rows
30
30
  config_param :allow_retry_insert_errors, :bool, default: false
31
31
 
32
+ ## RequirePartitionFilter
33
+ config_param :require_partition_filter, :bool, default: false
34
+
32
35
  ## Buffer
33
36
  config_section :buffer do
34
37
  config_set_default :@type, "memory"
@@ -96,14 +99,8 @@ module Fluent
96
99
  end
97
100
 
98
101
  def insert(project, dataset, table_id, rows, schema, template_suffix)
99
- writer.insert_rows(project, dataset, table_id, rows, template_suffix: template_suffix)
102
+ writer.insert_rows(project, dataset, table_id, rows, schema, template_suffix: template_suffix)
100
103
  rescue Fluent::BigQuery::Error => e
101
- if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
102
- # Table Not Found: Auto Create Table
103
- writer.create_table(project, dataset, table_id, schema)
104
- raise "table created. send rows next time."
105
- end
106
-
107
104
  raise if e.retryable?
108
105
 
109
106
  if @secondary
@@ -196,6 +196,7 @@ module Fluent
196
196
  rescue => e
197
197
  log.error("unexpected error while polling", error: e)
198
198
  log.error_backtrace
199
+ rollback_write(job_reference.chunk_id)
199
200
  end
200
201
  end
201
202
 
@@ -147,33 +147,6 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
147
147
  assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
148
148
  end
149
149
 
150
- def test_configure_auth_json_key_as_file_raise_permission_error
151
- json_key_path = 'test/plugin/testdata/json_key.json'
152
- json_key_path_dir = File.dirname(json_key_path)
153
-
154
- begin
155
- File.chmod(0000, json_key_path_dir)
156
-
157
- driver = create_driver(%[
158
- table foo
159
- auth_method json_key
160
- json_key #{json_key_path}
161
- project yourproject_id
162
- dataset yourdataset_id
163
- schema [
164
- {"name": "time", "type": "INTEGER"},
165
- {"name": "status", "type": "INTEGER"},
166
- {"name": "bytes", "type": "INTEGER"}
167
- ]
168
- ])
169
- assert_raises(Errno::EACCES) do
170
- driver.instance.writer.client
171
- end
172
- ensure
173
- File.chmod(0755, json_key_path_dir)
174
- end
175
- end
176
-
177
150
  def test_configure_auth_json_key_as_string
178
151
  json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
179
152
  json_key_io = StringIO.new(json_key)
@@ -199,6 +172,8 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
199
172
  end
200
173
 
201
174
  def test_configure_auth_application_default
175
+ omit "This testcase depends on some environment variables." if ENV["CI"] == "true"
176
+
202
177
  driver = create_driver(%[
203
178
  table foo
204
179
  auth_method application_default
@@ -576,4 +551,24 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
576
551
  assert_equal :string, table_schema["argv"].type
577
552
  assert_equal :repeated, table_schema["argv"].mode
578
553
  end
554
+
555
+ def test_resolve_schema_path_with_placeholder
556
+ now = Time.now.to_i
557
+ driver = create_driver(<<-CONFIG)
558
+ table ${tag}_%Y%m%d
559
+ auth_method json_key
560
+ json_key jsonkey.josn
561
+ project yourproject_id
562
+ dataset yourdataset_id
563
+ schema_path ${tag}.schema
564
+
565
+ <buffer tag, time>
566
+ timekey 1d
567
+ </buffer>
568
+ CONFIG
569
+
570
+ metadata = Fluent::Plugin::Buffer::Metadata.new(now, "foo", {})
571
+
572
+ assert_equal "foo.schema", driver.instance.read_schema_target_path(metadata)
573
+ end
579
574
  end
@@ -5,6 +5,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
5
5
  Fluent::Test.setup
6
6
  end
7
7
 
8
+ SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "apache.schema")
9
+
8
10
  CONFIG = %[
9
11
  table foo
10
12
  email foo@bar.example
@@ -121,7 +123,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
121
123
  driver = create_driver
122
124
 
123
125
  stub_writer do |writer|
124
- mock.proxy(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: hash_including(entry)}], template_suffix: nil)
125
126
  mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
126
127
  rows: [{json: hash_including(entry)}],
127
128
  skip_invalid_rows: false,
@@ -261,7 +262,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
261
262
 
262
263
  driver.instance_start
263
264
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
264
- metadata = driver.instance.metadata_for_test(tag, time, record)
265
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
265
266
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
266
267
  c.append([driver.instance.format(tag, time, record)])
267
268
  end
@@ -345,11 +346,27 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
345
346
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
346
347
  CONFIG
347
348
 
349
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
350
+
348
351
  stub_writer do |writer|
349
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}], template_suffix: nil) do
350
- raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
351
- end
352
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
352
+ body = {
353
+ rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
354
+ skip_invalid_rows: false,
355
+ ignore_unknown_values: false,
356
+ }
357
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
358
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
359
+ end.at_least(1)
360
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
361
+
362
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
363
+ table_reference: {
364
+ table_id: 'foo',
365
+ },
366
+ schema: {
367
+ fields: schema_fields,
368
+ },
369
+ }, {})
353
370
  end
354
371
 
355
372
  assert_raise(RuntimeError) do
@@ -401,14 +418,131 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
401
418
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
402
419
 
403
420
  time_partitioning_type day
421
+ time_partitioning_field time
404
422
  time_partitioning_expiration 1h
423
+
424
+ require_partition_filter true
405
425
  CONFIG
406
426
 
427
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
428
+
407
429
  stub_writer do |writer|
408
- mock(writer).insert_rows('yourproject_id', 'yourdataset_id', 'foo', [message], template_suffix: nil) do
409
- raise Fluent::BigQuery::RetryableError.new(nil, Google::Apis::ServerError.new("Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404, body: "Not found: Table yourproject_id:yourdataset_id.foo"))
430
+ body = {
431
+ rows: [message],
432
+ skip_invalid_rows: false,
433
+ ignore_unknown_values: false,
434
+ }
435
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
436
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
437
+ end.at_least(1)
438
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
439
+
440
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
441
+ table_reference: {
442
+ table_id: 'foo',
443
+ },
444
+ schema: {
445
+ fields: schema_fields,
446
+ },
447
+ time_partitioning: {
448
+ type: 'DAY',
449
+ field: 'time',
450
+ expiration_ms: 3600000,
451
+ },
452
+ require_partition_filter: true,
453
+ }, {})
454
+ end
455
+
456
+ assert_raise(RuntimeError) do
457
+ driver.run do
458
+ driver.feed("tag", Fluent::EventTime.now, message[:json])
410
459
  end
411
- mock(writer).create_table('yourproject_id', 'yourdataset_id', 'foo', driver.instance.instance_variable_get(:@table_schema))
460
+ end
461
+ end
462
+
463
+ def test_auto_create_clustered_table_by_bigquery_api
464
+ now = Time.now
465
+ message = {
466
+ json: {
467
+ time: now.to_i,
468
+ request: {
469
+ vhost: "bar",
470
+ path: "/path/to/baz",
471
+ method: "GET",
472
+ protocol: "HTTP/1.0",
473
+ agent: "libwww",
474
+ referer: "http://referer.example",
475
+ time: (now - 1).to_f,
476
+ bot_access: true,
477
+ loginsession: false,
478
+ },
479
+ remote: {
480
+ host: "remote.example",
481
+ ip: "192.168.1.1",
482
+ user: "nagachika",
483
+ },
484
+ response: {
485
+ status: 200,
486
+ bytes: 72,
487
+ },
488
+ }
489
+ }
490
+
491
+ driver = create_driver(<<-CONFIG)
492
+ table foo
493
+ email foo@bar.example
494
+ private_key_path /path/to/key
495
+ project yourproject_id
496
+ dataset yourdataset_id
497
+
498
+ time_format %s
499
+ time_field time
500
+
501
+ auto_create_table true
502
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
503
+
504
+ time_partitioning_type day
505
+ time_partitioning_field time
506
+ time_partitioning_expiration 1h
507
+
508
+ clustering_fields [
509
+ "time",
510
+ "vhost"
511
+ ]
512
+ CONFIG
513
+
514
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
515
+
516
+ stub_writer do |writer|
517
+ body = {
518
+ rows: [message],
519
+ skip_invalid_rows: false,
520
+ ignore_unknown_values: false,
521
+ }
522
+ mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
523
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
524
+ end.at_least(1)
525
+ mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
526
+
527
+ mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
528
+ table_reference: {
529
+ table_id: 'foo',
530
+ },
531
+ schema: {
532
+ fields: schema_fields,
533
+ },
534
+ time_partitioning: {
535
+ type: 'DAY',
536
+ field: 'time',
537
+ expiration_ms: 3600000,
538
+ },
539
+ clustering: {
540
+ fields: [
541
+ 'time',
542
+ 'vhost',
543
+ ],
544
+ },
545
+ }, {})
412
546
  end
413
547
 
414
548
  assert_raise(RuntimeError) do
@@ -39,10 +39,8 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
39
39
  writer
40
40
  end
41
41
  end
42
-
43
- def test_write
44
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
45
42
 
43
+ def test_write
46
44
  response_stub = stub!
47
45
 
48
46
  driver = create_driver
@@ -60,9 +58,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
60
58
  dataset_id: 'yourdataset_id',
61
59
  table_id: 'foo',
62
60
  },
63
- schema: {
64
- fields: schema_fields,
65
- },
66
61
  write_disposition: "WRITE_APPEND",
67
62
  source_format: "NEWLINE_DELIMITED_JSON",
68
63
  ignore_unknown_values: false,
@@ -99,7 +94,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
99
94
  schema_path #{SCHEMA_PATH}
100
95
  prevent_duplicate_load true
101
96
  CONFIG
102
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
103
97
 
104
98
  response_stub = stub!
105
99
  stub_writer do |writer|
@@ -116,9 +110,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
116
110
  dataset_id: 'yourdataset_id',
117
111
  table_id: 'foo',
118
112
  },
119
- schema: {
120
- fields: schema_fields,
121
- },
122
113
  write_disposition: "WRITE_APPEND",
123
114
  source_format: "NEWLINE_DELIMITED_JSON",
124
115
  ignore_unknown_values: false,
@@ -138,11 +129,10 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
138
129
 
139
130
  def test_write_with_retryable_error
140
131
  driver = create_driver
141
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
142
132
 
143
133
  driver.instance_start
144
134
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
145
- metadata = driver.instance.metadata_for_test(tag, time, record)
135
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
146
136
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
147
137
  c.append([driver.instance.format(tag, time, record)])
148
138
  end
@@ -158,9 +148,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
158
148
  dataset_id: 'yourdataset_id',
159
149
  table_id: 'foo',
160
150
  },
161
- schema: {
162
- fields: schema_fields,
163
- },
164
151
  write_disposition: "WRITE_APPEND",
165
152
  source_format: "NEWLINE_DELIMITED_JSON",
166
153
  ignore_unknown_values: false,
@@ -171,7 +158,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
171
158
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
172
159
  end
173
160
 
174
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id') do
161
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
175
162
  stub! do |s|
176
163
  s.id { 'dummy_job_id' }
177
164
  s.configuration.stub! do |_s|
@@ -225,11 +212,10 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
225
212
  utc
226
213
  </secondary>
227
214
  CONFIG
228
- schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
229
215
 
230
216
  driver.instance_start
231
217
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
232
- metadata = driver.instance.metadata_for_test(tag, time, record)
218
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
233
219
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
234
220
  c.append([driver.instance.format(tag, time, record)])
235
221
  end
@@ -245,9 +231,6 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
245
231
  dataset_id: 'yourdataset_id',
246
232
  table_id: 'foo',
247
233
  },
248
- schema: {
249
- fields: schema_fields,
250
- },
251
234
  write_disposition: "WRITE_APPEND",
252
235
  source_format: "NEWLINE_DELIMITED_JSON",
253
236
  ignore_unknown_values: false,
@@ -258,7 +241,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
258
241
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
259
242
  end
260
243
 
261
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id') do
244
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
262
245
  stub! do |s|
263
246
  s.id { 'dummy_job_id' }
264
247
  s.configuration.stub! do |_s|
@@ -289,6 +272,61 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
289
272
  driver.instance_shutdown
290
273
  end
291
274
 
275
+ def test_write_with_auto_create_table
276
+ driver = create_driver(<<-CONFIG)
277
+ table foo
278
+ email foo@bar.example
279
+ private_key_path /path/to/key
280
+ project yourproject_id
281
+ dataset yourdataset_id
282
+
283
+ <buffer>
284
+ @type memory
285
+ </buffer>
286
+
287
+ <inject>
288
+ time_format %s
289
+ time_key time
290
+ </inject>
291
+
292
+ auto_create_table true
293
+ schema_path #{SCHEMA_PATH}
294
+ CONFIG
295
+
296
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
297
+
298
+ stub_writer do |writer|
299
+ mock(writer.client).get_table('yourproject_id', 'yourdataset_id', 'foo') do
300
+ raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
301
+ end
302
+
303
+ mock(writer.client).insert_job('yourproject_id', {
304
+ configuration: {
305
+ load: {
306
+ destination_table: {
307
+ project_id: 'yourproject_id',
308
+ dataset_id: 'yourdataset_id',
309
+ table_id: 'foo',
310
+ },
311
+ write_disposition: "WRITE_APPEND",
312
+ source_format: "NEWLINE_DELIMITED_JSON",
313
+ ignore_unknown_values: false,
314
+ max_bad_records: 0,
315
+ schema: {
316
+ fields: schema_fields,
317
+ },
318
+ }
319
+ }
320
+ }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
321
+ stub!.job_reference.stub!.job_id { "dummy_job_id" }
322
+ end
323
+ end
324
+
325
+ driver.run do
326
+ driver.feed("tag", Time.now.to_i, {"a" => "b"})
327
+ end
328
+ end
329
+
292
330
  private
293
331
 
294
332
  def create_response_stub(response)
@@ -27,6 +27,11 @@ class RecordSchemaTest < Test::Unit::TestCase
27
27
  "name" => "argv",
28
28
  "type" => "STRING",
29
29
  "mode" => "REPEATED"
30
+ },
31
+ {
32
+ "name" => "utilisation",
33
+ "type" => "NUMERIC",
34
+ "mode" => "NULLABLE"
30
35
  }
31
36
  ]
32
37
  end
@@ -58,6 +63,11 @@ class RecordSchemaTest < Test::Unit::TestCase
58
63
  "type" => "STRING",
59
64
  "mode" => "REPEATED"
60
65
  },
66
+ {
67
+ "name" => "utilisation",
68
+ "type" => "NUMERIC",
69
+ "mode" => "NULLABLE"
70
+ },
61
71
  {
62
72
  "name" => "new_column",
63
73
  "type" => "STRING",
@@ -93,6 +103,11 @@ class RecordSchemaTest < Test::Unit::TestCase
93
103
  "type" => "STRING",
94
104
  "mode" => "REPEATED"
95
105
  },
106
+ {
107
+ "name" => "utilisation",
108
+ "type" => "NUMERIC",
109
+ "mode" => "NULLABLE"
110
+ }
96
111
  ]
97
112
  end
98
113
 
@@ -142,12 +157,12 @@ class RecordSchemaTest < Test::Unit::TestCase
142
157
  time = Time.local(2016, 2, 7, 19, 0, 0).utc
143
158
 
144
159
  formatted = fields.format_one({
145
- "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42]
160
+ "time" => time, "tty" => ["tty1", "tty2", "tty3"], "pwd" => "/home", "user" => {name: "joker1007", uid: 10000}, "argv" => ["foo", 42], "utilisation" => "0.837"
146
161
  })
147
162
  assert_equal(
148
163
  formatted,
149
164
  {
150
- "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"]
165
+ "time" => time.strftime("%Y-%m-%d %H:%M:%S.%6L %:z"), "tty" => MultiJson.dump(["tty1", "tty2", "tty3"]), "pwd" => "/home", "user" => MultiJson.dump({name: "joker1007", uid: 10000}), "argv" => ["foo", "42"], "utilisation" => "0.837"
151
166
  }
152
167
  )
153
168
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0.beta
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-03-29 00:00:00.000000000 Z
12
+ date: 2022-02-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -139,8 +139,9 @@ extensions: []
139
139
  extra_rdoc_files: []
140
140
  files:
141
141
  - ".github/ISSUE_TEMPLATE.md"
142
+ - ".github/workflows/linux.yml"
143
+ - ".github/workflows/windows.yml"
142
144
  - ".gitignore"
143
- - ".travis.yml"
144
145
  - Gemfile
145
146
  - LICENSE.txt
146
147
  - README.md
@@ -179,12 +180,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
179
180
  version: '0'
180
181
  required_rubygems_version: !ruby/object:Gem::Requirement
181
182
  requirements:
182
- - - ">"
183
+ - - ">="
183
184
  - !ruby/object:Gem::Version
184
- version: 1.3.1
185
+ version: '0'
185
186
  requirements: []
186
- rubyforge_project:
187
- rubygems_version: 2.6.12
187
+ rubygems_version: 3.1.4
188
188
  signing_key:
189
189
  specification_version: 4
190
190
  summary: Fluentd plugin to store data on Google BigQuery
data/.travis.yml DELETED
@@ -1,15 +0,0 @@
1
- language: ruby
2
-
3
- rvm:
4
- - 2.2
5
- - 2.3.3
6
- - 2.4.3
7
- - 2.5.0
8
-
9
- gemfile:
10
- - Gemfile
11
-
12
- before_install:
13
- - gem update bundler
14
-
15
- script: bundle exec rake test