fluent-plugin-bigquery 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 36b950bf0783d3ce350d7c7514f5b7946b10fe4b867aec015c9331656e86eb48
4
- data.tar.gz: b4b8e92f41008043b09822b20698a7e29ca8daf9ba69c2a5c38c696553e86d71
3
+ metadata.gz: 4209a2b6eaaf0b6f8ba315b6f5de6690e28fb47890aeea777bdb31889e4785ab
4
+ data.tar.gz: b0983fb4fa16d72059b0e679ea4ee627d19e805779fa010888fa1723354896a5
5
5
  SHA512:
6
- metadata.gz: 01d3d39d9247134ca9059b990d0d6a52f308b27711d8cd989de30dfeb4e91a1673f1047d4e9269d24447169d9ec4bbac1d0d9b9f7d93b08b7be5d6c170593f1f
7
- data.tar.gz: f226de7925fb048ba5533bf9b7c626f43e4b63eeb92c119d700737d1ae44611fb6fe6294e1ed5f989456de2ee3e1f98334c2d4cd1d89c49b52ef945a3674c8ce
6
+ metadata.gz: a6fc6891eda12bbc1272af7af9c4e8d48e588bc7ef65153b3a7524e39468baebb8fdb925856d1850bbda12fed5d33865faa56542503f76fdf724a18937c7d56e
7
+ data.tar.gz: fff0599b6a838cb4ff233ba9585b558ff733eed8063c1cf36ee08aaacb9b3c2ca1bce4d13db2a51ecc72c398ba751a18b2856a6348f43738ee8ca366becdea61
@@ -0,0 +1,31 @@
1
+ name: Testing on Ubuntu
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby:
12
+ - 2.6
13
+ - 2.7
14
+ - 3.0
15
+ - 3.1
16
+ os:
17
+ - ubuntu-latest
18
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
19
+ steps:
20
+ - uses: actions/checkout@v2
21
+ - uses: ruby/setup-ruby@v1
22
+ with:
23
+ ruby-version: ${{ matrix.ruby }}
24
+ - name: unit testing
25
+ env:
26
+ CI: true
27
+ run: |
28
+ ruby -v
29
+ gem install bundler rake
30
+ bundle install --jobs 4 --retry 3
31
+ bundle exec rake test
@@ -0,0 +1,27 @@
1
+ name: Testing on Windows
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby: [ '2.6', '2.7', '3.0', '3.1' ]
12
+ os:
13
+ - windows-latest
14
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: ${{ matrix.ruby }}
20
+ - name: unit testing
21
+ env:
22
+ CI: true
23
+ run: |
24
+ ruby -v
25
+ gem install bundler rake
26
+ bundle install --jobs 4 --retry 3
27
+ bundle exec rake test
data/README.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # fluent-plugin-bigquery
2
2
 
3
+ ## Notice
4
+
5
+ We will transfer fluent-plugin-bigquery repository to [fluent-plugins-nursery](https://github.com/fluent-plugins-nursery) organization.
6
+ It does not change maintenance plan.
7
+ The main purpose is that it solves mismatch between maintainers and current organization.
8
+
9
+ ---
10
+
3
11
  [Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
4
12
 
5
13
  - **Plugin type**: Output
@@ -52,7 +60,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
52
60
  | auto_create_table | bool | no | no | false | If true, creates table automatically |
53
61
  | ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
54
62
  | schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
55
- | schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
63
+ | schema_path | string | yes (either `fetch_schema`) | yes | nil | Schema Definition file path. It is formatted by JSON. |
56
64
  | fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
57
65
  | fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
58
66
  | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
@@ -72,6 +80,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
72
80
  | insert_id_field | string | no | no | nil | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor |
73
81
  | add_insert_timestamp | string | no | no | nil | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
74
82
  | allow_retry_insert_errors | bool | no | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
83
+ | require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. |
75
84
 
76
85
  #### bigquery_load
77
86
 
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "2.2.0".freeze
3
+ VERSION = "2.3.0".freeze
4
4
  end
5
5
  end
@@ -35,6 +35,7 @@ module Fluent
35
35
  }
36
36
 
37
37
  definition.merge!(time_partitioning: time_partitioning) if time_partitioning
38
+ definition.merge!(require_partition_filter: require_partition_filter) if require_partition_filter
38
39
  definition.merge!(clustering: clustering) if clustering
39
40
  client.insert_table(project, dataset, definition, {})
40
41
  log.debug "create table", project_id: project, dataset: dataset, table: table_id
@@ -318,6 +319,16 @@ module Fluent
318
319
  end
319
320
  end
320
321
 
322
+ def require_partition_filter
323
+ return @require_partition_filter if instance_variable_defined?(:@require_partition_filter)
324
+
325
+ if @options[:require_partition_filter]
326
+ @require_partition_filter = @options[:require_partition_filter]
327
+ else
328
+ @require_partition_filter
329
+ end
330
+ end
331
+
321
332
  def clustering
322
333
  return @clustering if instance_variable_defined?(:@clustering)
323
334
 
@@ -111,9 +111,6 @@ module Fluent
111
111
  if @schema
112
112
  @table_schema.load_schema(@schema)
113
113
  end
114
- if @schema_path
115
- @table_schema.load_schema(MultiJson.load(File.read(@schema_path)))
116
- end
117
114
 
118
115
  formatter_config = conf.elements("format")[0]
119
116
  @formatter = formatter_create(usage: 'out_bigquery_for_insert', default_type: 'json', conf: formatter_config)
@@ -126,6 +123,7 @@ module Fluent
126
123
  @tables_mutex = Mutex.new
127
124
  @fetched_schemas = {}
128
125
  @last_fetch_schema_time = Hash.new(0)
126
+ @read_schemas = {}
129
127
  end
130
128
 
131
129
  def multi_workers_ready?
@@ -148,6 +146,7 @@ module Fluent
148
146
  time_partitioning_type: @time_partitioning_type,
149
147
  time_partitioning_field: @time_partitioning_field,
150
148
  time_partitioning_expiration: @time_partitioning_expiration,
149
+ require_partition_filter: @require_partition_filter,
151
150
  clustering_fields: @clustering_fields,
152
151
  timeout_sec: @request_timeout_sec,
153
152
  open_timeout_sec: @request_open_timeout_sec,
@@ -161,6 +160,8 @@ module Fluent
161
160
  schema =
162
161
  if @fetch_schema
163
162
  fetch_schema(meta)
163
+ elsif @schema_path
164
+ read_schema(meta)
164
165
  else
165
166
  @table_schema
166
167
  end
@@ -209,9 +210,26 @@ module Fluent
209
210
  extract_placeholders(@fetch_schema_table || @tablelist[0], metadata)
210
211
  end
211
212
 
213
+ def read_schema(metadata)
214
+ schema_path = read_schema_target_path(metadata)
215
+
216
+ unless @read_schemas[schema_path]
217
+ table_schema = Fluent::BigQuery::RecordSchema.new("record")
218
+ table_schema.load_schema(MultiJson.load(File.read(schema_path)))
219
+ @read_schemas[schema_path] = table_schema
220
+ end
221
+ @read_schemas[schema_path]
222
+ end
223
+
224
+ def read_schema_target_path(metadata)
225
+ extract_placeholders(@schema_path, metadata)
226
+ end
227
+
212
228
  def get_schema(project, dataset, metadata)
213
229
  if @fetch_schema
214
230
  @fetched_schemas["#{project}.#{dataset}.#{fetch_schema_target_table(metadata)}"] || fetch_schema(metadata)
231
+ elsif @schema_path
232
+ @read_schemas[read_schema_target_path(metadata)] || read_schema(metadata)
215
233
  else
216
234
  @table_schema
217
235
  end
@@ -29,6 +29,9 @@ module Fluent
29
29
  # If insert_id_field is not specified, true means to allow duplicate rows
30
30
  config_param :allow_retry_insert_errors, :bool, default: false
31
31
 
32
+ ## RequirePartitionFilter
33
+ config_param :require_partition_filter, :bool, default: false
34
+
32
35
  ## Buffer
33
36
  config_section :buffer do
34
37
  config_set_default :@type, "memory"
@@ -147,33 +147,6 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
147
147
  assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
148
148
  end
149
149
 
150
- def test_configure_auth_json_key_as_file_raise_permission_error
151
- json_key_path = 'test/plugin/testdata/json_key.json'
152
- json_key_path_dir = File.dirname(json_key_path)
153
-
154
- begin
155
- File.chmod(0000, json_key_path_dir)
156
-
157
- driver = create_driver(%[
158
- table foo
159
- auth_method json_key
160
- json_key #{json_key_path}
161
- project yourproject_id
162
- dataset yourdataset_id
163
- schema [
164
- {"name": "time", "type": "INTEGER"},
165
- {"name": "status", "type": "INTEGER"},
166
- {"name": "bytes", "type": "INTEGER"}
167
- ]
168
- ])
169
- assert_raises(Errno::EACCES) do
170
- driver.instance.writer.client
171
- end
172
- ensure
173
- File.chmod(0755, json_key_path_dir)
174
- end
175
- end
176
-
177
150
  def test_configure_auth_json_key_as_string
178
151
  json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
179
152
  json_key_io = StringIO.new(json_key)
@@ -199,6 +172,8 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
199
172
  end
200
173
 
201
174
  def test_configure_auth_application_default
175
+ omit "This testcase depends on some environment variables." if ENV["CI"] == "true"
176
+
202
177
  driver = create_driver(%[
203
178
  table foo
204
179
  auth_method application_default
@@ -576,4 +551,24 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
576
551
  assert_equal :string, table_schema["argv"].type
577
552
  assert_equal :repeated, table_schema["argv"].mode
578
553
  end
554
+
555
+ def test_resolve_schema_path_with_placeholder
556
+ now = Time.now.to_i
557
+ driver = create_driver(<<-CONFIG)
558
+ table ${tag}_%Y%m%d
559
+ auth_method json_key
560
+ json_key jsonkey.josn
561
+ project yourproject_id
562
+ dataset yourdataset_id
563
+ schema_path ${tag}.schema
564
+
565
+ <buffer tag, time>
566
+ timekey 1d
567
+ </buffer>
568
+ CONFIG
569
+
570
+ metadata = Fluent::Plugin::Buffer::Metadata.new(now, "foo", {})
571
+
572
+ assert_equal "foo.schema", driver.instance.read_schema_target_path(metadata)
573
+ end
579
574
  end
@@ -5,6 +5,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
5
5
  Fluent::Test.setup
6
6
  end
7
7
 
8
+ SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "apache.schema")
9
+
8
10
  CONFIG = %[
9
11
  table foo
10
12
  email foo@bar.example
@@ -260,7 +262,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
260
262
 
261
263
  driver.instance_start
262
264
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
263
- metadata = driver.instance.metadata_for_test(tag, time, record)
265
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
264
266
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
265
267
  c.append([driver.instance.format(tag, time, record)])
266
268
  end
@@ -344,6 +346,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
344
346
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
345
347
  CONFIG
346
348
 
349
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
350
+
347
351
  stub_writer do |writer|
348
352
  body = {
349
353
  rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
@@ -360,7 +364,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
360
364
  table_id: 'foo',
361
365
  },
362
366
  schema: {
363
- fields: driver.instance.instance_variable_get(:@table_schema).to_a,
367
+ fields: schema_fields,
364
368
  },
365
369
  }, {})
366
370
  end
@@ -416,8 +420,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
416
420
  time_partitioning_type day
417
421
  time_partitioning_field time
418
422
  time_partitioning_expiration 1h
423
+
424
+ require_partition_filter true
419
425
  CONFIG
420
426
 
427
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
428
+
421
429
  stub_writer do |writer|
422
430
  body = {
423
431
  rows: [message],
@@ -434,13 +442,14 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
434
442
  table_id: 'foo',
435
443
  },
436
444
  schema: {
437
- fields: driver.instance.instance_variable_get(:@table_schema).to_a,
445
+ fields: schema_fields,
438
446
  },
439
447
  time_partitioning: {
440
448
  type: 'DAY',
441
449
  field: 'time',
442
450
  expiration_ms: 3600000,
443
451
  },
452
+ require_partition_filter: true,
444
453
  }, {})
445
454
  end
446
455
 
@@ -495,7 +504,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
495
504
  time_partitioning_type day
496
505
  time_partitioning_field time
497
506
  time_partitioning_expiration 1h
498
- time_partitioning_require_partition_filter true
499
507
 
500
508
  clustering_fields [
501
509
  "time",
@@ -503,6 +511,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
503
511
  ]
504
512
  CONFIG
505
513
 
514
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
515
+
506
516
  stub_writer do |writer|
507
517
  body = {
508
518
  rows: [message],
@@ -519,7 +529,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
519
529
  table_id: 'foo',
520
530
  },
521
531
  schema: {
522
- fields: driver.instance.instance_variable_get(:@table_schema).to_a,
532
+ fields: schema_fields,
523
533
  },
524
534
  time_partitioning: {
525
535
  type: 'DAY',
@@ -132,7 +132,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
132
132
 
133
133
  driver.instance_start
134
134
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
135
- metadata = driver.instance.metadata_for_test(tag, time, record)
135
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
136
136
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
137
137
  c.append([driver.instance.format(tag, time, record)])
138
138
  end
@@ -158,7 +158,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
158
158
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
159
159
  end
160
160
 
161
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
161
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
162
162
  stub! do |s|
163
163
  s.id { 'dummy_job_id' }
164
164
  s.configuration.stub! do |_s|
@@ -215,7 +215,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
215
215
 
216
216
  driver.instance_start
217
217
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
218
- metadata = driver.instance.metadata_for_test(tag, time, record)
218
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
219
219
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
220
220
  c.append([driver.instance.format(tag, time, record)])
221
221
  end
@@ -241,7 +241,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
241
241
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
242
242
  end
243
243
 
244
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
244
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
245
245
  stub! do |s|
246
246
  s.id { 'dummy_job_id' }
247
247
  s.configuration.stub! do |_s|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-08-20 00:00:00.000000000 Z
12
+ date: 2022-02-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -139,8 +139,9 @@ extensions: []
139
139
  extra_rdoc_files: []
140
140
  files:
141
141
  - ".github/ISSUE_TEMPLATE.md"
142
+ - ".github/workflows/linux.yml"
143
+ - ".github/workflows/windows.yml"
142
144
  - ".gitignore"
143
- - ".travis.yml"
144
145
  - Gemfile
145
146
  - LICENSE.txt
146
147
  - README.md
@@ -183,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
184
  - !ruby/object:Gem::Version
184
185
  version: '0'
185
186
  requirements: []
186
- rubygems_version: 3.0.3
187
+ rubygems_version: 3.1.4
187
188
  signing_key:
188
189
  specification_version: 4
189
190
  summary: Fluentd plugin to store data on Google BigQuery
data/.travis.yml DELETED
@@ -1,14 +0,0 @@
1
- language: ruby
2
-
3
- rvm:
4
- - 2.3.7
5
- - 2.4.4
6
- - 2.5.1
7
-
8
- gemfile:
9
- - Gemfile
10
-
11
- before_install:
12
- - gem update bundler
13
-
14
- script: bundle exec rake test