fluent-plugin-bigquery 2.2.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 36b950bf0783d3ce350d7c7514f5b7946b10fe4b867aec015c9331656e86eb48
4
- data.tar.gz: b4b8e92f41008043b09822b20698a7e29ca8daf9ba69c2a5c38c696553e86d71
3
+ metadata.gz: bc6de961c8c42fddf3d9e297e93db560d16cfc098161232c90ee64f0a5679fee
4
+ data.tar.gz: 5ec1fee690f77d0fa25d8e427c6ad354cdfdbfafe30a4aee4fea9a5e73db5eb3
5
5
  SHA512:
6
- metadata.gz: 01d3d39d9247134ca9059b990d0d6a52f308b27711d8cd989de30dfeb4e91a1673f1047d4e9269d24447169d9ec4bbac1d0d9b9f7d93b08b7be5d6c170593f1f
7
- data.tar.gz: f226de7925fb048ba5533bf9b7c626f43e4b63eeb92c119d700737d1ae44611fb6fe6294e1ed5f989456de2ee3e1f98334c2d4cd1d89c49b52ef945a3674c8ce
6
+ metadata.gz: 20fc96d420611a0d12f7cb34656ae87872f24131c70039383a8f8b7d51048a7d4f277a80675f2bee834113fd13d2a9780b772b517f2140481f7fb86ce63f24e3
7
+ data.tar.gz: cecc8f8682761ddfb22d942b69103823cc728923f6d7043d967254ed02c754db4e792132769f7f3aa91986aa27895ac83bf16358be21e03d3c94e77c43975231
@@ -0,0 +1,30 @@
1
+ name: Testing on Ubuntu
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby:
12
+ - 2.7
13
+ - 3.0
14
+ - 3.1
15
+ os:
16
+ - ubuntu-latest
17
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
18
+ steps:
19
+ - uses: actions/checkout@v2
20
+ - uses: ruby/setup-ruby@v1
21
+ with:
22
+ ruby-version: ${{ matrix.ruby }}
23
+ - name: unit testing
24
+ env:
25
+ CI: true
26
+ run: |
27
+ ruby -v
28
+ gem install bundler rake
29
+ bundle install --jobs 4 --retry 3
30
+ bundle exec rake test
@@ -0,0 +1,30 @@
1
+ name: Testing on Windows
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby:
12
+ - 2.7
13
+ - 3.0
14
+ - 3.1
15
+ os:
16
+ - windows-latest
17
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
18
+ steps:
19
+ - uses: actions/checkout@v2
20
+ - uses: ruby/setup-ruby@v1
21
+ with:
22
+ ruby-version: ${{ matrix.ruby }}
23
+ - name: unit testing
24
+ env:
25
+ CI: true
26
+ run: |
27
+ ruby -v
28
+ gem install bundler rake
29
+ bundle install --jobs 4 --retry 3
30
+ bundle exec rake test
data/Gemfile CHANGED
@@ -2,3 +2,6 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in fluent-plugin-bigquery.gemspec
4
4
  gemspec
5
+
6
+ gem "oj"
7
+ gem "dummer"
data/README.md CHANGED
@@ -23,6 +23,7 @@ OAuth flow for installed applications.
23
23
  | v0.4.x | 0.12.x | 2.0 or later |
24
24
  | v1.x.x | 0.14.x or later | 2.2 or later |
25
25
  | v2.x.x | 0.14.x or later | 2.3 or later |
26
+ | v3.x.x | 1.x or later | 2.7 or later |
26
27
 
27
28
  ## With docker image
28
29
  If you use official alpine based fluentd docker image (https://github.com/fluent/fluentd-docker-image),
@@ -52,7 +53,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
52
53
  | auto_create_table | bool | no | no | false | If true, creates table automatically |
53
54
  | ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
54
55
  | schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
55
- | schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
56
+ | schema_path | string | yes (either `fetch_schema`) | yes | nil | Schema Definition file path. It is formatted by JSON. |
56
57
  | fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
57
58
  | fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
58
59
  | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
@@ -72,6 +73,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
72
73
  | insert_id_field | string | no | no | nil | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor |
73
74
  | add_insert_timestamp | string | no | no | nil | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
74
75
  | allow_retry_insert_errors | bool | no | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
76
+ | require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. |
75
77
 
76
78
  #### bigquery_load
77
79
 
@@ -379,10 +381,10 @@ format to construct table ids.
379
381
  Table ids are formatted at runtime
380
382
  using the chunk key time.
381
383
 
382
- see. http://docs.fluentd.org/v0.14/articles/output-plugin-overview
384
+ see. https://docs.fluentd.org/configuration/buffer-section
383
385
 
384
386
  For example, with the configuration below,
385
- data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
387
+ data is inserted into tables `accesslog_2014_08_02`, `accesslog_2014_08_03` and so on.
386
388
 
387
389
  ```apache
388
390
  <match dummy>
@@ -392,7 +394,7 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
392
394
 
393
395
  project yourproject_id
394
396
  dataset yourdataset_id
395
- table accesslog_%Y_%m
397
+ table accesslog_%Y_%m_%d
396
398
 
397
399
  <buffer time>
398
400
  timekey 1d
@@ -401,6 +403,8 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
401
403
  </match>
402
404
  ```
403
405
 
406
+ **NOTE: In current fluentd (v1.15.x), The maximum unit supported by strftime formatting is the granularity of days**
407
+
404
408
  #### record attribute formatting
405
409
  The format can be suffixed with attribute name.
406
410
 
@@ -0,0 +1,14 @@
1
+ # Requirements
2
+
3
+ Set Environment Variable
4
+
5
+ - GOOGLE_APPLICATION_CREDENTIALS (json key path)
6
+ - PROJECT_NAME
7
+ - DATASET_NAME
8
+ - TABLE_NAME
9
+
10
+ # How to use
11
+
12
+ 1. execute `create_table.sh`
13
+ 1. `bundle exec fluentd -c fluent.conf`
14
+ 1. `bundle exec dummer -c dummer_insert.rb` or `bundle exec dummer -c dummer_load.rb`
@@ -0,0 +1,4 @@
1
+ #!/bin/sh
2
+
3
+ set -eux
4
+ bq mk -t --project_id=${PROJECT_NAME} --schema=$(dirname $0)/schema.json ${DATASET_NAME}.${TABLE_NAME}
@@ -0,0 +1,12 @@
1
+ require "time"
2
+
3
+ configure "insert" do
4
+ host "localhost"
5
+ port 24224
6
+ rate 100
7
+ tag type: :string, any: %w(insert_data)
8
+ field :id, type: :integer, countup: true
9
+ field :string_field, type: :string, any: %w(str1 str2 str3 str4)
10
+ field :timestamp_field, type: :string, value: Time.now.iso8601
11
+ field :date, type: :string, value: Time.now.strftime("%Y-%m-%d")
12
+ end
@@ -0,0 +1,12 @@
1
+ require "time"
2
+
3
+ configure "load" do
4
+ host "localhost"
5
+ port 24224
6
+ rate 100
7
+ tag type: :string, any: %w(load_data)
8
+ field :id, type: :integer, countup: true
9
+ field :string_field, type: :string, any: %w(str1 str2 str3 str4)
10
+ field :timestamp_field, type: :string, value: Time.now.iso8601
11
+ field :date, type: :string, value: Time.now.strftime("%Y-%m-%d")
12
+ end
@@ -0,0 +1,88 @@
1
+ <source>
2
+ @type forward
3
+ port 24224
4
+ bind 0.0.0.0
5
+ </source>
6
+
7
+ <match insert_data>
8
+ @id bigquery-insert-integration
9
+ @type bigquery_insert
10
+
11
+ allow_retry_insert_errors true
12
+
13
+ auth_method json_key
14
+ json_key "#{ENV["GOOGLE_APPLICATION_CREDENTIALS"]}"
15
+
16
+ <buffer>
17
+ @type file
18
+
19
+ chunk_limit_size 1m
20
+ chunk_limit_records 1500
21
+ total_limit_size 1g
22
+ path ./log/bigquery-insert-integration
23
+
24
+ flush_interval 30
25
+ flush_thread_count 4
26
+ flush_at_shutdown true
27
+
28
+ retry_max_times 14
29
+ retry_max_interval 30m
30
+ </buffer>
31
+
32
+ request_open_timeout_sec 2m
33
+
34
+ slow_flush_log_threshold 30.0
35
+
36
+ project "#{ENV["PROJECT_NAME"]}"
37
+ dataset "#{ENV["DATASET_NAME"]}"
38
+ table "#{ENV["TABLE_NAME"]}"
39
+ auto_create_table false
40
+ fetch_schema true
41
+ fetch_schema_table "#{ENV["TABLE_NAME"]}"
42
+
43
+ insert_id_field id
44
+
45
+ <secondary>
46
+ @type file
47
+ path ./log/bigquery-insert-integration.errors
48
+ </secondary>
49
+ </match>
50
+
51
+ <match load_data>
52
+ @id bigquery-load-integration
53
+ @type bigquery_load
54
+
55
+ auth_method json_key
56
+ json_key "#{ENV["GOOGLE_APPLICATION_CREDENTIALS"]}"
57
+
58
+ <buffer>
59
+ @type file
60
+
61
+ chunk_limit_size 1m
62
+ total_limit_size 1g
63
+ path ./log/bigquery-load-integration
64
+
65
+ flush_interval 120
66
+ flush_thread_count 4
67
+ flush_at_shutdown true
68
+
69
+ retry_max_times 14
70
+ retry_max_interval 30m
71
+ </buffer>
72
+
73
+ request_open_timeout_sec 2m
74
+
75
+ slow_flush_log_threshold 300.0
76
+
77
+ project "#{ENV["PROJECT_NAME"]}"
78
+ dataset "#{ENV["DATASET_NAME"]}"
79
+ table "#{ENV["TABLE_NAME"]}"
80
+ auto_create_table false
81
+ fetch_schema true
82
+ fetch_schema_table "#{ENV["TABLE_NAME"]}"
83
+
84
+ <secondary>
85
+ @type file
86
+ path ./log/bigquery-load-integration.errors
87
+ </secondary>
88
+ </match>
@@ -0,0 +1,22 @@
1
+ [
2
+ {
3
+ "name": "id",
4
+ "type": "INTEGER",
5
+ "mode": "REQUIRED"
6
+ },
7
+ {
8
+ "name": "string_field",
9
+ "type": "STRING",
10
+ "mode": "NULLABLE"
11
+ },
12
+ {
13
+ "name": "timestamp_field",
14
+ "type": "TIMESTAMP",
15
+ "mode": "NULLABLE"
16
+ },
17
+ {
18
+ "name": "date",
19
+ "type": "DATE",
20
+ "mode": "REQUIRED"
21
+ }
22
+ ]
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "2.2.0".freeze
3
+ VERSION = "3.0.0".freeze
4
4
  end
5
5
  end
@@ -1,7 +1,7 @@
1
1
  module Fluent
2
2
  module BigQuery
3
3
  class Writer
4
- def initialize(log, auth_method, options = {})
4
+ def initialize(log, auth_method, **options)
5
5
  @auth_method = auth_method
6
6
  @scope = "https://www.googleapis.com/auth/bigquery"
7
7
  @options = options
@@ -35,8 +35,9 @@ module Fluent
35
35
  }
36
36
 
37
37
  definition.merge!(time_partitioning: time_partitioning) if time_partitioning
38
+ definition.merge!(require_partition_filter: require_partition_filter) if require_partition_filter
38
39
  definition.merge!(clustering: clustering) if clustering
39
- client.insert_table(project, dataset, definition, {})
40
+ client.insert_table(project, dataset, definition, **{})
40
41
  log.debug "create table", project_id: project, dataset: dataset, table: table_id
41
42
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
42
43
  message = e.message
@@ -82,7 +83,7 @@ module Fluent
82
83
  if @options[:auto_create_table]
83
84
  res = insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
84
85
  else
85
- res = client.insert_all_table_data(project, dataset, table_id, body, {})
86
+ res = client.insert_all_table_data(project, dataset, table_id, body, **{})
86
87
  end
87
88
  log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
88
89
 
@@ -157,10 +158,8 @@ module Fluent
157
158
  res = client.insert_job(
158
159
  project,
159
160
  configuration,
160
- {
161
- upload_source: upload_source,
162
- content_type: "application/octet-stream",
163
- }
161
+ upload_source: upload_source,
162
+ content_type: "application/octet-stream",
164
163
  )
165
164
  JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
166
165
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
@@ -318,6 +317,16 @@ module Fluent
318
317
  end
319
318
  end
320
319
 
320
+ def require_partition_filter
321
+ return @require_partition_filter if instance_variable_defined?(:@require_partition_filter)
322
+
323
+ if @options[:require_partition_filter]
324
+ @require_partition_filter = @options[:require_partition_filter]
325
+ else
326
+ @require_partition_filter
327
+ end
328
+ end
329
+
321
330
  def clustering
322
331
  return @clustering if instance_variable_defined?(:@clustering)
323
332
 
@@ -332,7 +341,7 @@ module Fluent
332
341
 
333
342
  def insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
334
343
  try_count ||= 1
335
- res = client.insert_all_table_data(project, dataset, table_id, body, {})
344
+ res = client.insert_all_table_data(project, dataset, table_id, body, **{})
336
345
  rescue Google::Apis::ClientError => e
337
346
  if e.status_code == 404 && /Not Found: Table/i =~ e.message
338
347
  if try_count == 1
@@ -111,9 +111,6 @@ module Fluent
111
111
  if @schema
112
112
  @table_schema.load_schema(@schema)
113
113
  end
114
- if @schema_path
115
- @table_schema.load_schema(MultiJson.load(File.read(@schema_path)))
116
- end
117
114
 
118
115
  formatter_config = conf.elements("format")[0]
119
116
  @formatter = formatter_create(usage: 'out_bigquery_for_insert', default_type: 'json', conf: formatter_config)
@@ -126,6 +123,7 @@ module Fluent
126
123
  @tables_mutex = Mutex.new
127
124
  @fetched_schemas = {}
128
125
  @last_fetch_schema_time = Hash.new(0)
126
+ @read_schemas = {}
129
127
  end
130
128
 
131
129
  def multi_workers_ready?
@@ -133,7 +131,7 @@ module Fluent
133
131
  end
134
132
 
135
133
  def writer
136
- @writer ||= Fluent::BigQuery::Writer.new(@log, @auth_method, {
134
+ @writer ||= Fluent::BigQuery::Writer.new(@log, @auth_method,
137
135
  private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
138
136
  email: @email,
139
137
  json_key: @json_key,
@@ -148,19 +146,27 @@ module Fluent
148
146
  time_partitioning_type: @time_partitioning_type,
149
147
  time_partitioning_field: @time_partitioning_field,
150
148
  time_partitioning_expiration: @time_partitioning_expiration,
149
+ require_partition_filter: @require_partition_filter,
151
150
  clustering_fields: @clustering_fields,
152
151
  timeout_sec: @request_timeout_sec,
153
152
  open_timeout_sec: @request_open_timeout_sec,
154
- })
153
+ )
155
154
  end
156
155
 
157
156
  def format(tag, time, record)
157
+ if record.nil?
158
+ log.warn("nil record detected. corrupted chunks? tag=#{tag}, time=#{time}")
159
+ return
160
+ end
161
+
158
162
  record = inject_values_to_record(tag, time, record)
159
163
 
160
164
  meta = metadata(tag, time, record)
161
165
  schema =
162
166
  if @fetch_schema
163
167
  fetch_schema(meta)
168
+ elsif @schema_path
169
+ read_schema(meta)
164
170
  else
165
171
  @table_schema
166
172
  end
@@ -209,9 +215,26 @@ module Fluent
209
215
  extract_placeholders(@fetch_schema_table || @tablelist[0], metadata)
210
216
  end
211
217
 
218
+ def read_schema(metadata)
219
+ schema_path = read_schema_target_path(metadata)
220
+
221
+ unless @read_schemas[schema_path]
222
+ table_schema = Fluent::BigQuery::RecordSchema.new("record")
223
+ table_schema.load_schema(MultiJson.load(File.read(schema_path)))
224
+ @read_schemas[schema_path] = table_schema
225
+ end
226
+ @read_schemas[schema_path]
227
+ end
228
+
229
+ def read_schema_target_path(metadata)
230
+ extract_placeholders(@schema_path, metadata)
231
+ end
232
+
212
233
  def get_schema(project, dataset, metadata)
213
234
  if @fetch_schema
214
235
  @fetched_schemas["#{project}.#{dataset}.#{fetch_schema_target_table(metadata)}"] || fetch_schema(metadata)
236
+ elsif @schema_path
237
+ @read_schemas[read_schema_target_path(metadata)] || read_schema(metadata)
215
238
  else
216
239
  @table_schema
217
240
  end
@@ -29,6 +29,9 @@ module Fluent
29
29
  # If insert_id_field is not specified, true means to allow duplicate rows
30
30
  config_param :allow_retry_insert_errors, :bool, default: false
31
31
 
32
+ ## RequirePartitionFilter
33
+ config_param :require_partition_filter, :bool, default: false
34
+
32
35
  ## Buffer
33
36
  config_section :buffer do
34
37
  config_set_default :@type, "memory"
@@ -93,6 +96,8 @@ module Fluent
93
96
  schema = get_schema(project, dataset, metadata)
94
97
 
95
98
  insert(project, dataset, table_id, rows, schema, template_suffix)
99
+ rescue MultiJson::ParseError => e
100
+ raise Fluent::UnrecoverableError.new(e)
96
101
  end
97
102
 
98
103
  def insert(project, dataset, table_id, rows, schema, template_suffix)
@@ -147,33 +147,6 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
147
147
  assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
148
148
  end
149
149
 
150
- def test_configure_auth_json_key_as_file_raise_permission_error
151
- json_key_path = 'test/plugin/testdata/json_key.json'
152
- json_key_path_dir = File.dirname(json_key_path)
153
-
154
- begin
155
- File.chmod(0000, json_key_path_dir)
156
-
157
- driver = create_driver(%[
158
- table foo
159
- auth_method json_key
160
- json_key #{json_key_path}
161
- project yourproject_id
162
- dataset yourdataset_id
163
- schema [
164
- {"name": "time", "type": "INTEGER"},
165
- {"name": "status", "type": "INTEGER"},
166
- {"name": "bytes", "type": "INTEGER"}
167
- ]
168
- ])
169
- assert_raises(Errno::EACCES) do
170
- driver.instance.writer.client
171
- end
172
- ensure
173
- File.chmod(0755, json_key_path_dir)
174
- end
175
- end
176
-
177
150
  def test_configure_auth_json_key_as_string
178
151
  json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
179
152
  json_key_io = StringIO.new(json_key)
@@ -199,6 +172,8 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
199
172
  end
200
173
 
201
174
  def test_configure_auth_application_default
175
+ omit "This testcase depends on some environment variables." if ENV["CI"] == "true"
176
+
202
177
  driver = create_driver(%[
203
178
  table foo
204
179
  auth_method application_default
@@ -576,4 +551,24 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
576
551
  assert_equal :string, table_schema["argv"].type
577
552
  assert_equal :repeated, table_schema["argv"].mode
578
553
  end
554
+
555
+ def test_resolve_schema_path_with_placeholder
556
+ now = Time.now.to_i
557
+ driver = create_driver(<<-CONFIG)
558
+ table ${tag}_%Y%m%d
559
+ auth_method json_key
560
+ json_key jsonkey.josn
561
+ project yourproject_id
562
+ dataset yourdataset_id
563
+ schema_path ${tag}.schema
564
+
565
+ <buffer tag, time>
566
+ timekey 1d
567
+ </buffer>
568
+ CONFIG
569
+
570
+ metadata = Fluent::Plugin::Buffer::Metadata.new(now, "foo", {})
571
+
572
+ assert_equal "foo.schema", driver.instance.read_schema_target_path(metadata)
573
+ end
579
574
  end
@@ -5,6 +5,19 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
5
5
  Fluent::Test.setup
6
6
  end
7
7
 
8
+ def is_ruby2?
9
+ RUBY_VERSION.to_i < 3
10
+ end
11
+
12
+ def build_args(args)
13
+ if is_ruby2?
14
+ args << {}
15
+ end
16
+ args
17
+ end
18
+
19
+ SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "apache.schema")
20
+
8
21
  CONFIG = %[
9
22
  table foo
10
23
  email foo@bar.example
@@ -121,11 +134,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
121
134
  driver = create_driver
122
135
 
123
136
  stub_writer do |writer|
124
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
137
+ args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
125
138
  rows: [{json: hash_including(entry)}],
126
139
  skip_invalid_rows: false,
127
140
  ignore_unknown_values: false
128
- }, {}) do
141
+ }])
142
+ mock(writer.client).insert_all_table_data(*args) do
129
143
  s = stub!
130
144
  s.insert_errors { nil }
131
145
  s
@@ -186,11 +200,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
186
200
 
187
201
  entry = {a: "b"}
188
202
  stub_writer do |writer|
189
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
203
+ args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
190
204
  rows: [{json: hash_including(entry)}],
191
205
  skip_invalid_rows: false,
192
206
  ignore_unknown_values: false
193
- }, {}) do
207
+ }])
208
+ mock(writer.client).insert_all_table_data(*args) do
194
209
  ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
195
210
  raise ex
196
211
  end
@@ -245,11 +260,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
245
260
 
246
261
  entry = {a: "b"}
247
262
  stub_writer do |writer|
248
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
263
+ args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
249
264
  rows: [{json: hash_including(entry)}],
250
265
  skip_invalid_rows: false,
251
266
  ignore_unknown_values: false
252
- }, {}) do
267
+ }])
268
+ mock(writer.client).insert_all_table_data(*args) do
253
269
  ex = Google::Apis::ServerError.new("error", status_code: 501)
254
270
  def ex.reason
255
271
  "invalid"
@@ -260,14 +276,14 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
260
276
 
261
277
  driver.instance_start
262
278
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
263
- metadata = driver.instance.metadata_for_test(tag, time, record)
279
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
264
280
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
265
281
  c.append([driver.instance.format(tag, time, record)])
266
282
  end
267
283
  assert_raise Fluent::BigQuery::UnRetryableError do
268
284
  driver.instance.write(chunk)
269
285
  end
270
- assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
286
+ assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.2
271
287
  driver.instance_shutdown
272
288
  end
273
289
 
@@ -290,11 +306,15 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
290
306
  CONFIG
291
307
 
292
308
  stub_writer do |writer|
293
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
294
- rows: [entry[0]],
295
- skip_invalid_rows: false,
296
- ignore_unknown_values: false
297
- }, {}) { stub!.insert_errors { nil } }
309
+ args = ['yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
310
+ rows: [entry[0]],
311
+ skip_invalid_rows: false,
312
+ ignore_unknown_values: false
313
+ }]
314
+ if RUBY_VERSION.to_i < 3
315
+ args << {}
316
+ end
317
+ mock(writer.client).insert_all_table_data(*args) { stub!.insert_errors { nil } }
298
318
  end
299
319
 
300
320
  driver.run do
@@ -344,25 +364,29 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
344
364
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
345
365
  CONFIG
346
366
 
367
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
368
+
347
369
  stub_writer do |writer|
348
370
  body = {
349
371
  rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
350
372
  skip_invalid_rows: false,
351
373
  ignore_unknown_values: false,
352
374
  }
353
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
375
+ args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
376
+ mock(writer.client).insert_all_table_data(*args) do
354
377
  raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
355
378
  end.at_least(1)
356
379
  mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
357
380
 
358
- mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
381
+ args = build_args(['yourproject_id', 'yourdataset_id', {
359
382
  table_reference: {
360
383
  table_id: 'foo',
361
384
  },
362
385
  schema: {
363
- fields: driver.instance.instance_variable_get(:@table_schema).to_a,
386
+ fields: schema_fields,
364
387
  },
365
- }, {})
388
+ }])
389
+ mock(writer.client).insert_table(*args)
366
390
  end
367
391
 
368
392
  assert_raise(RuntimeError) do
@@ -416,32 +440,39 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
416
440
  time_partitioning_type day
417
441
  time_partitioning_field time
418
442
  time_partitioning_expiration 1h
443
+
444
+ require_partition_filter true
419
445
  CONFIG
420
446
 
447
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
448
+
421
449
  stub_writer do |writer|
422
450
  body = {
423
451
  rows: [message],
424
452
  skip_invalid_rows: false,
425
453
  ignore_unknown_values: false,
426
454
  }
427
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
455
+ args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
456
+ mock(writer.client).insert_all_table_data(*args) do
428
457
  raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
429
458
  end.at_least(1)
430
459
  mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
431
460
 
432
- mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
461
+ args = build_args(['yourproject_id', 'yourdataset_id', {
433
462
  table_reference: {
434
463
  table_id: 'foo',
435
464
  },
436
465
  schema: {
437
- fields: driver.instance.instance_variable_get(:@table_schema).to_a,
466
+ fields: schema_fields,
438
467
  },
439
468
  time_partitioning: {
440
469
  type: 'DAY',
441
470
  field: 'time',
442
471
  expiration_ms: 3600000,
443
472
  },
444
- }, {})
473
+ require_partition_filter: true,
474
+ }])
475
+ mock(writer.client).insert_table(*args)
445
476
  end
446
477
 
447
478
  assert_raise(RuntimeError) do
@@ -495,7 +526,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
495
526
  time_partitioning_type day
496
527
  time_partitioning_field time
497
528
  time_partitioning_expiration 1h
498
- time_partitioning_require_partition_filter true
499
529
 
500
530
  clustering_fields [
501
531
  "time",
@@ -503,23 +533,26 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
503
533
  ]
504
534
  CONFIG
505
535
 
536
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
537
+
506
538
  stub_writer do |writer|
507
539
  body = {
508
540
  rows: [message],
509
541
  skip_invalid_rows: false,
510
542
  ignore_unknown_values: false,
511
543
  }
512
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
544
+ args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
545
+ mock(writer.client).insert_all_table_data(*args) do
513
546
  raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
514
547
  end.at_least(1)
515
548
  mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
516
549
 
517
- mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
550
+ args = build_args(['yourproject_id', 'yourdataset_id', {
518
551
  table_reference: {
519
552
  table_id: 'foo',
520
553
  },
521
554
  schema: {
522
- fields: driver.instance.instance_variable_get(:@table_schema).to_a,
555
+ fields: schema_fields,
523
556
  },
524
557
  time_partitioning: {
525
558
  type: 'DAY',
@@ -532,7 +565,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
532
565
  'vhost',
533
566
  ],
534
567
  },
535
- }, {})
568
+ }])
569
+ mock(writer.client).insert_table(*args)
536
570
  end
537
571
 
538
572
  assert_raise(RuntimeError) do
@@ -64,7 +64,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
64
64
  max_bad_records: 0,
65
65
  }
66
66
  }
67
- }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
67
+ }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
68
68
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
69
69
  end
70
70
  end
@@ -117,7 +117,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
117
117
  },
118
118
  },
119
119
  job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
120
- }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
120
+ }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
121
121
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
122
122
  end
123
123
  end
@@ -132,7 +132,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
132
132
 
133
133
  driver.instance_start
134
134
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
135
- metadata = driver.instance.metadata_for_test(tag, time, record)
135
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
136
136
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
137
137
  c.append([driver.instance.format(tag, time, record)])
138
138
  end
@@ -154,11 +154,11 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
154
154
  max_bad_records: 0,
155
155
  }
156
156
  }
157
- }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
157
+ }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
158
158
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
159
159
  end
160
160
 
161
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
161
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
162
162
  stub! do |s|
163
163
  s.id { 'dummy_job_id' }
164
164
  s.configuration.stub! do |_s|
@@ -215,7 +215,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
215
215
 
216
216
  driver.instance_start
217
217
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
218
- metadata = driver.instance.metadata_for_test(tag, time, record)
218
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
219
219
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
220
220
  c.append([driver.instance.format(tag, time, record)])
221
221
  end
@@ -237,11 +237,11 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
237
237
  max_bad_records: 0,
238
238
  }
239
239
  }
240
- }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
240
+ }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
241
241
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
242
242
  end
243
243
 
244
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
244
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
245
245
  stub! do |s|
246
246
  s.id { 'dummy_job_id' }
247
247
  s.configuration.stub! do |_s|
@@ -317,7 +317,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
317
317
  },
318
318
  }
319
319
  }
320
- }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
320
+ }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
321
321
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
322
322
  end
323
323
  end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
8
8
  - joker1007
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-08-20 00:00:00.000000000 Z
12
+ date: 2022-10-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -139,14 +139,21 @@ extensions: []
139
139
  extra_rdoc_files: []
140
140
  files:
141
141
  - ".github/ISSUE_TEMPLATE.md"
142
+ - ".github/workflows/linux.yml"
143
+ - ".github/workflows/windows.yml"
142
144
  - ".gitignore"
143
- - ".travis.yml"
144
145
  - Gemfile
145
146
  - LICENSE.txt
146
147
  - README.md
147
148
  - Rakefile
148
149
  - fluent-plugin-bigquery.gemspec
149
150
  - gemfiles/activesupport-4.gemfile
151
+ - integration/README.md
152
+ - integration/create_table.sh
153
+ - integration/dummer_insert.rb
154
+ - integration/dummer_load.rb
155
+ - integration/fluent.conf
156
+ - integration/schema.json
150
157
  - lib/fluent/plugin/bigquery/errors.rb
151
158
  - lib/fluent/plugin/bigquery/helper.rb
152
159
  - lib/fluent/plugin/bigquery/schema.rb
@@ -168,7 +175,7 @@ homepage: https://github.com/kaizenplatform/fluent-plugin-bigquery
168
175
  licenses:
169
176
  - Apache-2.0
170
177
  metadata: {}
171
- post_install_message:
178
+ post_install_message:
172
179
  rdoc_options: []
173
180
  require_paths:
174
181
  - lib
@@ -183,8 +190,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
190
  - !ruby/object:Gem::Version
184
191
  version: '0'
185
192
  requirements: []
186
- rubygems_version: 3.0.3
187
- signing_key:
193
+ rubygems_version: 3.3.7
194
+ signing_key:
188
195
  specification_version: 4
189
196
  summary: Fluentd plugin to store data on Google BigQuery
190
197
  test_files:
data/.travis.yml DELETED
@@ -1,14 +0,0 @@
1
- language: ruby
2
-
3
- rvm:
4
- - 2.3.7
5
- - 2.4.4
6
- - 2.5.1
7
-
8
- gemfile:
9
- - Gemfile
10
-
11
- before_install:
12
- - gem update bundler
13
-
14
- script: bundle exec rake test