fluent-plugin-bigquery 2.2.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 36b950bf0783d3ce350d7c7514f5b7946b10fe4b867aec015c9331656e86eb48
4
- data.tar.gz: b4b8e92f41008043b09822b20698a7e29ca8daf9ba69c2a5c38c696553e86d71
3
+ metadata.gz: bc6de961c8c42fddf3d9e297e93db560d16cfc098161232c90ee64f0a5679fee
4
+ data.tar.gz: 5ec1fee690f77d0fa25d8e427c6ad354cdfdbfafe30a4aee4fea9a5e73db5eb3
5
5
  SHA512:
6
- metadata.gz: 01d3d39d9247134ca9059b990d0d6a52f308b27711d8cd989de30dfeb4e91a1673f1047d4e9269d24447169d9ec4bbac1d0d9b9f7d93b08b7be5d6c170593f1f
7
- data.tar.gz: f226de7925fb048ba5533bf9b7c626f43e4b63eeb92c119d700737d1ae44611fb6fe6294e1ed5f989456de2ee3e1f98334c2d4cd1d89c49b52ef945a3674c8ce
6
+ metadata.gz: 20fc96d420611a0d12f7cb34656ae87872f24131c70039383a8f8b7d51048a7d4f277a80675f2bee834113fd13d2a9780b772b517f2140481f7fb86ce63f24e3
7
+ data.tar.gz: cecc8f8682761ddfb22d942b69103823cc728923f6d7043d967254ed02c754db4e792132769f7f3aa91986aa27895ac83bf16358be21e03d3c94e77c43975231
@@ -0,0 +1,30 @@
1
+ name: Testing on Ubuntu
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby:
12
+ - 2.7
13
+ - 3.0
14
+ - 3.1
15
+ os:
16
+ - ubuntu-latest
17
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
18
+ steps:
19
+ - uses: actions/checkout@v2
20
+ - uses: ruby/setup-ruby@v1
21
+ with:
22
+ ruby-version: ${{ matrix.ruby }}
23
+ - name: unit testing
24
+ env:
25
+ CI: true
26
+ run: |
27
+ ruby -v
28
+ gem install bundler rake
29
+ bundle install --jobs 4 --retry 3
30
+ bundle exec rake test
@@ -0,0 +1,30 @@
1
+ name: Testing on Windows
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby:
12
+ - 2.7
13
+ - 3.0
14
+ - 3.1
15
+ os:
16
+ - windows-latest
17
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
18
+ steps:
19
+ - uses: actions/checkout@v2
20
+ - uses: ruby/setup-ruby@v1
21
+ with:
22
+ ruby-version: ${{ matrix.ruby }}
23
+ - name: unit testing
24
+ env:
25
+ CI: true
26
+ run: |
27
+ ruby -v
28
+ gem install bundler rake
29
+ bundle install --jobs 4 --retry 3
30
+ bundle exec rake test
data/Gemfile CHANGED
@@ -2,3 +2,6 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in fluent-plugin-bigquery.gemspec
4
4
  gemspec
5
+
6
+ gem "oj"
7
+ gem "dummer"
data/README.md CHANGED
@@ -23,6 +23,7 @@ OAuth flow for installed applications.
23
23
  | v0.4.x | 0.12.x | 2.0 or later |
24
24
  | v1.x.x | 0.14.x or later | 2.2 or later |
25
25
  | v2.x.x | 0.14.x or later | 2.3 or later |
26
+ | v3.x.x | 1.x or later | 2.7 or later |
26
27
 
27
28
  ## With docker image
28
29
  If you use official alpine based fluentd docker image (https://github.com/fluent/fluentd-docker-image),
@@ -52,7 +53,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
52
53
  | auto_create_table | bool | no | no | false | If true, creates table automatically |
53
54
  | ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
54
55
  | schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
55
- | schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
56
+ | schema_path | string | yes (either `fetch_schema`) | yes | nil | Schema Definition file path. It is formatted by JSON. |
56
57
  | fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
57
58
  | fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
58
59
  | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
@@ -72,6 +73,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
72
73
  | insert_id_field | string | no | no | nil | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor |
73
74
  | add_insert_timestamp | string | no | no | nil | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
74
75
  | allow_retry_insert_errors | bool | no | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
76
+ | require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. |
75
77
 
76
78
  #### bigquery_load
77
79
 
@@ -379,10 +381,10 @@ format to construct table ids.
379
381
  Table ids are formatted at runtime
380
382
  using the chunk key time.
381
383
 
382
- see. http://docs.fluentd.org/v0.14/articles/output-plugin-overview
384
+ see. https://docs.fluentd.org/configuration/buffer-section
383
385
 
384
386
  For example, with the configuration below,
385
- data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
387
+ data is inserted into tables `accesslog_2014_08_02`, `accesslog_2014_08_03` and so on.
386
388
 
387
389
  ```apache
388
390
  <match dummy>
@@ -392,7 +394,7 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
392
394
 
393
395
  project yourproject_id
394
396
  dataset yourdataset_id
395
- table accesslog_%Y_%m
397
+ table accesslog_%Y_%m_%d
396
398
 
397
399
  <buffer time>
398
400
  timekey 1d
@@ -401,6 +403,8 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
401
403
  </match>
402
404
  ```
403
405
 
406
+ **NOTE: In current fluentd (v1.15.x), The maximum unit supported by strftime formatting is the granularity of days**
407
+
404
408
  #### record attribute formatting
405
409
  The format can be suffixed with attribute name.
406
410
 
@@ -0,0 +1,14 @@
1
+ # Requirements
2
+
3
+ Set Environment Variable
4
+
5
+ - GOOGLE_APPLICATION_CREDENTIALS (json key path)
6
+ - PROJECT_NAME
7
+ - DATASET_NAME
8
+ - TABLE_NAME
9
+
10
+ # How to use
11
+
12
+ 1. execute `create_table.sh`
13
+ 1. `bundle exec fluentd -c fluent.conf`
14
+ 1. `bundle exec dummer -c dummer_insert.rb` or `bundle exec dummer -c dummer_load.rb`
@@ -0,0 +1,4 @@
1
+ #!/bin/sh
2
+
3
+ set -eux
4
+ bq mk -t --project_id=${PROJECT_NAME} --schema=$(dirname $0)/schema.json ${DATASET_NAME}.${TABLE_NAME}
@@ -0,0 +1,12 @@
1
+ require "time"
2
+
3
+ configure "insert" do
4
+ host "localhost"
5
+ port 24224
6
+ rate 100
7
+ tag type: :string, any: %w(insert_data)
8
+ field :id, type: :integer, countup: true
9
+ field :string_field, type: :string, any: %w(str1 str2 str3 str4)
10
+ field :timestamp_field, type: :string, value: Time.now.iso8601
11
+ field :date, type: :string, value: Time.now.strftime("%Y-%m-%d")
12
+ end
@@ -0,0 +1,12 @@
1
+ require "time"
2
+
3
+ configure "load" do
4
+ host "localhost"
5
+ port 24224
6
+ rate 100
7
+ tag type: :string, any: %w(load_data)
8
+ field :id, type: :integer, countup: true
9
+ field :string_field, type: :string, any: %w(str1 str2 str3 str4)
10
+ field :timestamp_field, type: :string, value: Time.now.iso8601
11
+ field :date, type: :string, value: Time.now.strftime("%Y-%m-%d")
12
+ end
@@ -0,0 +1,88 @@
1
+ <source>
2
+ @type forward
3
+ port 24224
4
+ bind 0.0.0.0
5
+ </source>
6
+
7
+ <match insert_data>
8
+ @id bigquery-insert-integration
9
+ @type bigquery_insert
10
+
11
+ allow_retry_insert_errors true
12
+
13
+ auth_method json_key
14
+ json_key "#{ENV["GOOGLE_APPLICATION_CREDENTIALS"]}"
15
+
16
+ <buffer>
17
+ @type file
18
+
19
+ chunk_limit_size 1m
20
+ chunk_limit_records 1500
21
+ total_limit_size 1g
22
+ path ./log/bigquery-insert-integration
23
+
24
+ flush_interval 30
25
+ flush_thread_count 4
26
+ flush_at_shutdown true
27
+
28
+ retry_max_times 14
29
+ retry_max_interval 30m
30
+ </buffer>
31
+
32
+ request_open_timeout_sec 2m
33
+
34
+ slow_flush_log_threshold 30.0
35
+
36
+ project "#{ENV["PROJECT_NAME"]}"
37
+ dataset "#{ENV["DATASET_NAME"]}"
38
+ table "#{ENV["TABLE_NAME"]}"
39
+ auto_create_table false
40
+ fetch_schema true
41
+ fetch_schema_table "#{ENV["TABLE_NAME"]}"
42
+
43
+ insert_id_field id
44
+
45
+ <secondary>
46
+ @type file
47
+ path ./log/bigquery-insert-integration.errors
48
+ </secondary>
49
+ </match>
50
+
51
+ <match load_data>
52
+ @id bigquery-load-integration
53
+ @type bigquery_load
54
+
55
+ auth_method json_key
56
+ json_key "#{ENV["GOOGLE_APPLICATION_CREDENTIALS"]}"
57
+
58
+ <buffer>
59
+ @type file
60
+
61
+ chunk_limit_size 1m
62
+ total_limit_size 1g
63
+ path ./log/bigquery-load-integration
64
+
65
+ flush_interval 120
66
+ flush_thread_count 4
67
+ flush_at_shutdown true
68
+
69
+ retry_max_times 14
70
+ retry_max_interval 30m
71
+ </buffer>
72
+
73
+ request_open_timeout_sec 2m
74
+
75
+ slow_flush_log_threshold 300.0
76
+
77
+ project "#{ENV["PROJECT_NAME"]}"
78
+ dataset "#{ENV["DATASET_NAME"]}"
79
+ table "#{ENV["TABLE_NAME"]}"
80
+ auto_create_table false
81
+ fetch_schema true
82
+ fetch_schema_table "#{ENV["TABLE_NAME"]}"
83
+
84
+ <secondary>
85
+ @type file
86
+ path ./log/bigquery-load-integration.errors
87
+ </secondary>
88
+ </match>
@@ -0,0 +1,22 @@
1
+ [
2
+ {
3
+ "name": "id",
4
+ "type": "INTEGER",
5
+ "mode": "REQUIRED"
6
+ },
7
+ {
8
+ "name": "string_field",
9
+ "type": "STRING",
10
+ "mode": "NULLABLE"
11
+ },
12
+ {
13
+ "name": "timestamp_field",
14
+ "type": "TIMESTAMP",
15
+ "mode": "NULLABLE"
16
+ },
17
+ {
18
+ "name": "date",
19
+ "type": "DATE",
20
+ "mode": "REQUIRED"
21
+ }
22
+ ]
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "2.2.0".freeze
3
+ VERSION = "3.0.0".freeze
4
4
  end
5
5
  end
@@ -1,7 +1,7 @@
1
1
  module Fluent
2
2
  module BigQuery
3
3
  class Writer
4
- def initialize(log, auth_method, options = {})
4
+ def initialize(log, auth_method, **options)
5
5
  @auth_method = auth_method
6
6
  @scope = "https://www.googleapis.com/auth/bigquery"
7
7
  @options = options
@@ -35,8 +35,9 @@ module Fluent
35
35
  }
36
36
 
37
37
  definition.merge!(time_partitioning: time_partitioning) if time_partitioning
38
+ definition.merge!(require_partition_filter: require_partition_filter) if require_partition_filter
38
39
  definition.merge!(clustering: clustering) if clustering
39
- client.insert_table(project, dataset, definition, {})
40
+ client.insert_table(project, dataset, definition, **{})
40
41
  log.debug "create table", project_id: project, dataset: dataset, table: table_id
41
42
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
42
43
  message = e.message
@@ -82,7 +83,7 @@ module Fluent
82
83
  if @options[:auto_create_table]
83
84
  res = insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
84
85
  else
85
- res = client.insert_all_table_data(project, dataset, table_id, body, {})
86
+ res = client.insert_all_table_data(project, dataset, table_id, body, **{})
86
87
  end
87
88
  log.debug "insert rows", project_id: project, dataset: dataset, table: table_id, count: rows.size
88
89
 
@@ -157,10 +158,8 @@ module Fluent
157
158
  res = client.insert_job(
158
159
  project,
159
160
  configuration,
160
- {
161
- upload_source: upload_source,
162
- content_type: "application/octet-stream",
163
- }
161
+ upload_source: upload_source,
162
+ content_type: "application/octet-stream",
164
163
  )
165
164
  JobReference.new(chunk_id, chunk_id_hex, project, dataset, table_id, res.job_reference.job_id)
166
165
  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
@@ -318,6 +317,16 @@ module Fluent
318
317
  end
319
318
  end
320
319
 
320
+ def require_partition_filter
321
+ return @require_partition_filter if instance_variable_defined?(:@require_partition_filter)
322
+
323
+ if @options[:require_partition_filter]
324
+ @require_partition_filter = @options[:require_partition_filter]
325
+ else
326
+ @require_partition_filter
327
+ end
328
+ end
329
+
321
330
  def clustering
322
331
  return @clustering if instance_variable_defined?(:@clustering)
323
332
 
@@ -332,7 +341,7 @@ module Fluent
332
341
 
333
342
  def insert_all_table_data_with_create_table(project, dataset, table_id, body, schema)
334
343
  try_count ||= 1
335
- res = client.insert_all_table_data(project, dataset, table_id, body, {})
344
+ res = client.insert_all_table_data(project, dataset, table_id, body, **{})
336
345
  rescue Google::Apis::ClientError => e
337
346
  if e.status_code == 404 && /Not Found: Table/i =~ e.message
338
347
  if try_count == 1
@@ -111,9 +111,6 @@ module Fluent
111
111
  if @schema
112
112
  @table_schema.load_schema(@schema)
113
113
  end
114
- if @schema_path
115
- @table_schema.load_schema(MultiJson.load(File.read(@schema_path)))
116
- end
117
114
 
118
115
  formatter_config = conf.elements("format")[0]
119
116
  @formatter = formatter_create(usage: 'out_bigquery_for_insert', default_type: 'json', conf: formatter_config)
@@ -126,6 +123,7 @@ module Fluent
126
123
  @tables_mutex = Mutex.new
127
124
  @fetched_schemas = {}
128
125
  @last_fetch_schema_time = Hash.new(0)
126
+ @read_schemas = {}
129
127
  end
130
128
 
131
129
  def multi_workers_ready?
@@ -133,7 +131,7 @@ module Fluent
133
131
  end
134
132
 
135
133
  def writer
136
- @writer ||= Fluent::BigQuery::Writer.new(@log, @auth_method, {
134
+ @writer ||= Fluent::BigQuery::Writer.new(@log, @auth_method,
137
135
  private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
138
136
  email: @email,
139
137
  json_key: @json_key,
@@ -148,19 +146,27 @@ module Fluent
148
146
  time_partitioning_type: @time_partitioning_type,
149
147
  time_partitioning_field: @time_partitioning_field,
150
148
  time_partitioning_expiration: @time_partitioning_expiration,
149
+ require_partition_filter: @require_partition_filter,
151
150
  clustering_fields: @clustering_fields,
152
151
  timeout_sec: @request_timeout_sec,
153
152
  open_timeout_sec: @request_open_timeout_sec,
154
- })
153
+ )
155
154
  end
156
155
 
157
156
  def format(tag, time, record)
157
+ if record.nil?
158
+ log.warn("nil record detected. corrupted chunks? tag=#{tag}, time=#{time}")
159
+ return
160
+ end
161
+
158
162
  record = inject_values_to_record(tag, time, record)
159
163
 
160
164
  meta = metadata(tag, time, record)
161
165
  schema =
162
166
  if @fetch_schema
163
167
  fetch_schema(meta)
168
+ elsif @schema_path
169
+ read_schema(meta)
164
170
  else
165
171
  @table_schema
166
172
  end
@@ -209,9 +215,26 @@ module Fluent
209
215
  extract_placeholders(@fetch_schema_table || @tablelist[0], metadata)
210
216
  end
211
217
 
218
+ def read_schema(metadata)
219
+ schema_path = read_schema_target_path(metadata)
220
+
221
+ unless @read_schemas[schema_path]
222
+ table_schema = Fluent::BigQuery::RecordSchema.new("record")
223
+ table_schema.load_schema(MultiJson.load(File.read(schema_path)))
224
+ @read_schemas[schema_path] = table_schema
225
+ end
226
+ @read_schemas[schema_path]
227
+ end
228
+
229
+ def read_schema_target_path(metadata)
230
+ extract_placeholders(@schema_path, metadata)
231
+ end
232
+
212
233
  def get_schema(project, dataset, metadata)
213
234
  if @fetch_schema
214
235
  @fetched_schemas["#{project}.#{dataset}.#{fetch_schema_target_table(metadata)}"] || fetch_schema(metadata)
236
+ elsif @schema_path
237
+ @read_schemas[read_schema_target_path(metadata)] || read_schema(metadata)
215
238
  else
216
239
  @table_schema
217
240
  end
@@ -29,6 +29,9 @@ module Fluent
29
29
  # If insert_id_field is not specified, true means to allow duplicate rows
30
30
  config_param :allow_retry_insert_errors, :bool, default: false
31
31
 
32
+ ## RequirePartitionFilter
33
+ config_param :require_partition_filter, :bool, default: false
34
+
32
35
  ## Buffer
33
36
  config_section :buffer do
34
37
  config_set_default :@type, "memory"
@@ -93,6 +96,8 @@ module Fluent
93
96
  schema = get_schema(project, dataset, metadata)
94
97
 
95
98
  insert(project, dataset, table_id, rows, schema, template_suffix)
99
+ rescue MultiJson::ParseError => e
100
+ raise Fluent::UnrecoverableError.new(e)
96
101
  end
97
102
 
98
103
  def insert(project, dataset, table_id, rows, schema, template_suffix)
@@ -147,33 +147,6 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
147
147
  assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
148
148
  end
149
149
 
150
- def test_configure_auth_json_key_as_file_raise_permission_error
151
- json_key_path = 'test/plugin/testdata/json_key.json'
152
- json_key_path_dir = File.dirname(json_key_path)
153
-
154
- begin
155
- File.chmod(0000, json_key_path_dir)
156
-
157
- driver = create_driver(%[
158
- table foo
159
- auth_method json_key
160
- json_key #{json_key_path}
161
- project yourproject_id
162
- dataset yourdataset_id
163
- schema [
164
- {"name": "time", "type": "INTEGER"},
165
- {"name": "status", "type": "INTEGER"},
166
- {"name": "bytes", "type": "INTEGER"}
167
- ]
168
- ])
169
- assert_raises(Errno::EACCES) do
170
- driver.instance.writer.client
171
- end
172
- ensure
173
- File.chmod(0755, json_key_path_dir)
174
- end
175
- end
176
-
177
150
  def test_configure_auth_json_key_as_string
178
151
  json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
179
152
  json_key_io = StringIO.new(json_key)
@@ -199,6 +172,8 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
199
172
  end
200
173
 
201
174
  def test_configure_auth_application_default
175
+ omit "This testcase depends on some environment variables." if ENV["CI"] == "true"
176
+
202
177
  driver = create_driver(%[
203
178
  table foo
204
179
  auth_method application_default
@@ -576,4 +551,24 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
576
551
  assert_equal :string, table_schema["argv"].type
577
552
  assert_equal :repeated, table_schema["argv"].mode
578
553
  end
554
+
555
+ def test_resolve_schema_path_with_placeholder
556
+ now = Time.now.to_i
557
+ driver = create_driver(<<-CONFIG)
558
+ table ${tag}_%Y%m%d
559
+ auth_method json_key
560
+ json_key jsonkey.josn
561
+ project yourproject_id
562
+ dataset yourdataset_id
563
+ schema_path ${tag}.schema
564
+
565
+ <buffer tag, time>
566
+ timekey 1d
567
+ </buffer>
568
+ CONFIG
569
+
570
+ metadata = Fluent::Plugin::Buffer::Metadata.new(now, "foo", {})
571
+
572
+ assert_equal "foo.schema", driver.instance.read_schema_target_path(metadata)
573
+ end
579
574
  end
@@ -5,6 +5,19 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
5
5
  Fluent::Test.setup
6
6
  end
7
7
 
8
+ def is_ruby2?
9
+ RUBY_VERSION.to_i < 3
10
+ end
11
+
12
+ def build_args(args)
13
+ if is_ruby2?
14
+ args << {}
15
+ end
16
+ args
17
+ end
18
+
19
+ SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "apache.schema")
20
+
8
21
  CONFIG = %[
9
22
  table foo
10
23
  email foo@bar.example
@@ -121,11 +134,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
121
134
  driver = create_driver
122
135
 
123
136
  stub_writer do |writer|
124
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
137
+ args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
125
138
  rows: [{json: hash_including(entry)}],
126
139
  skip_invalid_rows: false,
127
140
  ignore_unknown_values: false
128
- }, {}) do
141
+ }])
142
+ mock(writer.client).insert_all_table_data(*args) do
129
143
  s = stub!
130
144
  s.insert_errors { nil }
131
145
  s
@@ -186,11 +200,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
186
200
 
187
201
  entry = {a: "b"}
188
202
  stub_writer do |writer|
189
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
203
+ args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
190
204
  rows: [{json: hash_including(entry)}],
191
205
  skip_invalid_rows: false,
192
206
  ignore_unknown_values: false
193
- }, {}) do
207
+ }])
208
+ mock(writer.client).insert_all_table_data(*args) do
194
209
  ex = Google::Apis::ServerError.new("error", status_code: d["status_code"])
195
210
  raise ex
196
211
  end
@@ -245,11 +260,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
245
260
 
246
261
  entry = {a: "b"}
247
262
  stub_writer do |writer|
248
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', {
263
+ args = build_args(['yourproject_id', 'yourdataset_id', 'foo', {
249
264
  rows: [{json: hash_including(entry)}],
250
265
  skip_invalid_rows: false,
251
266
  ignore_unknown_values: false
252
- }, {}) do
267
+ }])
268
+ mock(writer.client).insert_all_table_data(*args) do
253
269
  ex = Google::Apis::ServerError.new("error", status_code: 501)
254
270
  def ex.reason
255
271
  "invalid"
@@ -260,14 +276,14 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
260
276
 
261
277
  driver.instance_start
262
278
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
263
- metadata = driver.instance.metadata_for_test(tag, time, record)
279
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
264
280
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
265
281
  c.append([driver.instance.format(tag, time, record)])
266
282
  end
267
283
  assert_raise Fluent::BigQuery::UnRetryableError do
268
284
  driver.instance.write(chunk)
269
285
  end
270
- assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.1
286
+ assert_in_delta driver.instance.retry.secondary_transition_at , Time.now, 0.2
271
287
  driver.instance_shutdown
272
288
  end
273
289
 
@@ -290,11 +306,15 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
290
306
  CONFIG
291
307
 
292
308
  stub_writer do |writer|
293
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
294
- rows: [entry[0]],
295
- skip_invalid_rows: false,
296
- ignore_unknown_values: false
297
- }, {}) { stub!.insert_errors { nil } }
309
+ args = ['yourproject_id', 'yourdataset_id', 'foo_2014_08_20', {
310
+ rows: [entry[0]],
311
+ skip_invalid_rows: false,
312
+ ignore_unknown_values: false
313
+ }]
314
+ if RUBY_VERSION.to_i < 3
315
+ args << {}
316
+ end
317
+ mock(writer.client).insert_all_table_data(*args) { stub!.insert_errors { nil } }
298
318
  end
299
319
 
300
320
  driver.run do
@@ -344,25 +364,29 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
344
364
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
345
365
  CONFIG
346
366
 
367
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
368
+
347
369
  stub_writer do |writer|
348
370
  body = {
349
371
  rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
350
372
  skip_invalid_rows: false,
351
373
  ignore_unknown_values: false,
352
374
  }
353
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
375
+ args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
376
+ mock(writer.client).insert_all_table_data(*args) do
354
377
  raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
355
378
  end.at_least(1)
356
379
  mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
357
380
 
358
- mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
381
+ args = build_args(['yourproject_id', 'yourdataset_id', {
359
382
  table_reference: {
360
383
  table_id: 'foo',
361
384
  },
362
385
  schema: {
363
- fields: driver.instance.instance_variable_get(:@table_schema).to_a,
386
+ fields: schema_fields,
364
387
  },
365
- }, {})
388
+ }])
389
+ mock(writer.client).insert_table(*args)
366
390
  end
367
391
 
368
392
  assert_raise(RuntimeError) do
@@ -416,32 +440,39 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
416
440
  time_partitioning_type day
417
441
  time_partitioning_field time
418
442
  time_partitioning_expiration 1h
443
+
444
+ require_partition_filter true
419
445
  CONFIG
420
446
 
447
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
448
+
421
449
  stub_writer do |writer|
422
450
  body = {
423
451
  rows: [message],
424
452
  skip_invalid_rows: false,
425
453
  ignore_unknown_values: false,
426
454
  }
427
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
455
+ args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
456
+ mock(writer.client).insert_all_table_data(*args) do
428
457
  raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
429
458
  end.at_least(1)
430
459
  mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
431
460
 
432
- mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
461
+ args = build_args(['yourproject_id', 'yourdataset_id', {
433
462
  table_reference: {
434
463
  table_id: 'foo',
435
464
  },
436
465
  schema: {
437
- fields: driver.instance.instance_variable_get(:@table_schema).to_a,
466
+ fields: schema_fields,
438
467
  },
439
468
  time_partitioning: {
440
469
  type: 'DAY',
441
470
  field: 'time',
442
471
  expiration_ms: 3600000,
443
472
  },
444
- }, {})
473
+ require_partition_filter: true,
474
+ }])
475
+ mock(writer.client).insert_table(*args)
445
476
  end
446
477
 
447
478
  assert_raise(RuntimeError) do
@@ -495,7 +526,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
495
526
  time_partitioning_type day
496
527
  time_partitioning_field time
497
528
  time_partitioning_expiration 1h
498
- time_partitioning_require_partition_filter true
499
529
 
500
530
  clustering_fields [
501
531
  "time",
@@ -503,23 +533,26 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
503
533
  ]
504
534
  CONFIG
505
535
 
536
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
537
+
506
538
  stub_writer do |writer|
507
539
  body = {
508
540
  rows: [message],
509
541
  skip_invalid_rows: false,
510
542
  ignore_unknown_values: false,
511
543
  }
512
- mock(writer.client).insert_all_table_data('yourproject_id', 'yourdataset_id', 'foo', body, {}) do
544
+ args = build_args(['yourproject_id', 'yourdataset_id', 'foo', body])
545
+ mock(writer.client).insert_all_table_data(*args) do
513
546
  raise Google::Apis::ClientError.new("notFound: Not found: Table yourproject_id:yourdataset_id.foo", status_code: 404)
514
547
  end.at_least(1)
515
548
  mock(writer).sleep(instance_of(Numeric)) { nil }.at_least(1)
516
549
 
517
- mock(writer.client).insert_table('yourproject_id', 'yourdataset_id', {
550
+ args = build_args(['yourproject_id', 'yourdataset_id', {
518
551
  table_reference: {
519
552
  table_id: 'foo',
520
553
  },
521
554
  schema: {
522
- fields: driver.instance.instance_variable_get(:@table_schema).to_a,
555
+ fields: schema_fields,
523
556
  },
524
557
  time_partitioning: {
525
558
  type: 'DAY',
@@ -532,7 +565,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
532
565
  'vhost',
533
566
  ],
534
567
  },
535
- }, {})
568
+ }])
569
+ mock(writer.client).insert_table(*args)
536
570
  end
537
571
 
538
572
  assert_raise(RuntimeError) do
@@ -64,7 +64,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
64
64
  max_bad_records: 0,
65
65
  }
66
66
  }
67
- }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
67
+ }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
68
68
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
69
69
  end
70
70
  end
@@ -117,7 +117,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
117
117
  },
118
118
  },
119
119
  job_reference: {project_id: 'yourproject_id', job_id: satisfy { |x| x =~ /fluentd_job_.*/}} ,
120
- }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
120
+ }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
121
121
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
122
122
  end
123
123
  end
@@ -132,7 +132,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
132
132
 
133
133
  driver.instance_start
134
134
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
135
- metadata = driver.instance.metadata_for_test(tag, time, record)
135
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
136
136
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
137
137
  c.append([driver.instance.format(tag, time, record)])
138
138
  end
@@ -154,11 +154,11 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
154
154
  max_bad_records: 0,
155
155
  }
156
156
  }
157
- }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
157
+ }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
158
158
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
159
159
  end
160
160
 
161
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
161
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
162
162
  stub! do |s|
163
163
  s.id { 'dummy_job_id' }
164
164
  s.configuration.stub! do |_s|
@@ -215,7 +215,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
215
215
 
216
216
  driver.instance_start
217
217
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
218
- metadata = driver.instance.metadata_for_test(tag, time, record)
218
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
219
219
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
220
220
  c.append([driver.instance.format(tag, time, record)])
221
221
  end
@@ -237,11 +237,11 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
237
237
  max_bad_records: 0,
238
238
  }
239
239
  }
240
- }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
240
+ }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
241
241
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
242
242
  end
243
243
 
244
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
244
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
245
245
  stub! do |s|
246
246
  s.id { 'dummy_job_id' }
247
247
  s.configuration.stub! do |_s|
@@ -317,7 +317,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
317
317
  },
318
318
  }
319
319
  }
320
- }, {upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream"}) do
320
+ }, upload_source: duck_type(:write, :sync, :rewind), content_type: "application/octet-stream") do
321
321
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
322
322
  end
323
323
  end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
8
8
  - joker1007
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-08-20 00:00:00.000000000 Z
12
+ date: 2022-10-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -139,14 +139,21 @@ extensions: []
139
139
  extra_rdoc_files: []
140
140
  files:
141
141
  - ".github/ISSUE_TEMPLATE.md"
142
+ - ".github/workflows/linux.yml"
143
+ - ".github/workflows/windows.yml"
142
144
  - ".gitignore"
143
- - ".travis.yml"
144
145
  - Gemfile
145
146
  - LICENSE.txt
146
147
  - README.md
147
148
  - Rakefile
148
149
  - fluent-plugin-bigquery.gemspec
149
150
  - gemfiles/activesupport-4.gemfile
151
+ - integration/README.md
152
+ - integration/create_table.sh
153
+ - integration/dummer_insert.rb
154
+ - integration/dummer_load.rb
155
+ - integration/fluent.conf
156
+ - integration/schema.json
150
157
  - lib/fluent/plugin/bigquery/errors.rb
151
158
  - lib/fluent/plugin/bigquery/helper.rb
152
159
  - lib/fluent/plugin/bigquery/schema.rb
@@ -168,7 +175,7 @@ homepage: https://github.com/kaizenplatform/fluent-plugin-bigquery
168
175
  licenses:
169
176
  - Apache-2.0
170
177
  metadata: {}
171
- post_install_message:
178
+ post_install_message:
172
179
  rdoc_options: []
173
180
  require_paths:
174
181
  - lib
@@ -183,8 +190,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
190
  - !ruby/object:Gem::Version
184
191
  version: '0'
185
192
  requirements: []
186
- rubygems_version: 3.0.3
187
- signing_key:
193
+ rubygems_version: 3.3.7
194
+ signing_key:
188
195
  specification_version: 4
189
196
  summary: Fluentd plugin to store data on Google BigQuery
190
197
  test_files:
data/.travis.yml DELETED
@@ -1,14 +0,0 @@
1
- language: ruby
2
-
3
- rvm:
4
- - 2.3.7
5
- - 2.4.4
6
- - 2.5.1
7
-
8
- gemfile:
9
- - Gemfile
10
-
11
- before_install:
12
- - gem update bundler
13
-
14
- script: bundle exec rake test