fluent-plugin-bigquery 2.2.0 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 36b950bf0783d3ce350d7c7514f5b7946b10fe4b867aec015c9331656e86eb48
4
- data.tar.gz: b4b8e92f41008043b09822b20698a7e29ca8daf9ba69c2a5c38c696553e86d71
3
+ metadata.gz: 4209a2b6eaaf0b6f8ba315b6f5de6690e28fb47890aeea777bdb31889e4785ab
4
+ data.tar.gz: b0983fb4fa16d72059b0e679ea4ee627d19e805779fa010888fa1723354896a5
5
5
  SHA512:
6
- metadata.gz: 01d3d39d9247134ca9059b990d0d6a52f308b27711d8cd989de30dfeb4e91a1673f1047d4e9269d24447169d9ec4bbac1d0d9b9f7d93b08b7be5d6c170593f1f
7
- data.tar.gz: f226de7925fb048ba5533bf9b7c626f43e4b63eeb92c119d700737d1ae44611fb6fe6294e1ed5f989456de2ee3e1f98334c2d4cd1d89c49b52ef945a3674c8ce
6
+ metadata.gz: a6fc6891eda12bbc1272af7af9c4e8d48e588bc7ef65153b3a7524e39468baebb8fdb925856d1850bbda12fed5d33865faa56542503f76fdf724a18937c7d56e
7
+ data.tar.gz: fff0599b6a838cb4ff233ba9585b558ff733eed8063c1cf36ee08aaacb9b3c2ca1bce4d13db2a51ecc72c398ba751a18b2856a6348f43738ee8ca366becdea61
@@ -0,0 +1,31 @@
1
+ name: Testing on Ubuntu
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby:
12
+ - 2.6
13
+ - 2.7
14
+ - 3.0
15
+ - 3.1
16
+ os:
17
+ - ubuntu-latest
18
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
19
+ steps:
20
+ - uses: actions/checkout@v2
21
+ - uses: ruby/setup-ruby@v1
22
+ with:
23
+ ruby-version: ${{ matrix.ruby }}
24
+ - name: unit testing
25
+ env:
26
+ CI: true
27
+ run: |
28
+ ruby -v
29
+ gem install bundler rake
30
+ bundle install --jobs 4 --retry 3
31
+ bundle exec rake test
@@ -0,0 +1,27 @@
1
+ name: Testing on Windows
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby: [ '2.6', '2.7', '3.0', '3.1' ]
12
+ os:
13
+ - windows-latest
14
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: ${{ matrix.ruby }}
20
+ - name: unit testing
21
+ env:
22
+ CI: true
23
+ run: |
24
+ ruby -v
25
+ gem install bundler rake
26
+ bundle install --jobs 4 --retry 3
27
+ bundle exec rake test
data/README.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # fluent-plugin-bigquery
2
2
 
3
+ ## Notice
4
+
5
+ We will transfer fluent-plugin-bigquery repository to [fluent-plugins-nursery](https://github.com/fluent-plugins-nursery) organization.
6
+ It does not change maintenance plan.
7
+ The main purpose is that it solves mismatch between maintainers and current organization.
8
+
9
+ ---
10
+
3
11
  [Fluentd](http://fluentd.org) output plugin to load/insert data into Google BigQuery.
4
12
 
5
13
  - **Plugin type**: Output
@@ -52,7 +60,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
52
60
  | auto_create_table | bool | no | no | false | If true, creates table automatically |
53
61
  | ignore_unknown_values | bool | no | no | false | Accept rows that contain values that do not match the schema. The unknown values are ignored. |
54
62
  | schema | array | yes (either `fetch_schema` or `schema_path`) | no | nil | Schema Definition. It is formatted by JSON. |
55
- | schema_path | string | yes (either `fetch_schema`) | no | nil | Schema Definition file path. It is formatted by JSON. |
63
+ | schema_path | string | yes (either `fetch_schema`) | yes | nil | Schema Definition file path. It is formatted by JSON. |
56
64
  | fetch_schema | bool | yes (either `schema_path`) | no | false | If true, fetch table schema definition from Bigquery table automatically. |
57
65
  | fetch_schema_table | string | no | yes | nil | If set, fetch table schema definition from this table, If fetch_schema is false, this param is ignored |
58
66
  | schema_cache_expire | integer | no | no | 600 | Value is second. If current time is after expiration interval, re-fetch table schema definition. |
@@ -72,6 +80,7 @@ Because embbeded gem dependency sometimes restricts ruby environment.
72
80
  | insert_id_field | string | no | no | nil | Use key as `insert_id` of Streaming Insert API parameter. see. https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor |
73
81
  | add_insert_timestamp | string | no | no | nil | Adds a timestamp column just before sending the rows to BigQuery, so that buffering time is not taken into account. Gives a field in BigQuery which represents the insert time of the row. |
74
82
  | allow_retry_insert_errors | bool | no | no | false | Retry to insert rows when an insertErrors occurs. There is a possibility that rows are inserted in duplicate. |
83
+ | require_partition_filter | bool | no | no | false | If true, queries over this table require a partition filter that can be used for partition elimination to be specified. |
75
84
 
76
85
  #### bigquery_load
77
86
 
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "2.2.0".freeze
3
+ VERSION = "2.3.0".freeze
4
4
  end
5
5
  end
@@ -35,6 +35,7 @@ module Fluent
35
35
  }
36
36
 
37
37
  definition.merge!(time_partitioning: time_partitioning) if time_partitioning
38
+ definition.merge!(require_partition_filter: require_partition_filter) if require_partition_filter
38
39
  definition.merge!(clustering: clustering) if clustering
39
40
  client.insert_table(project, dataset, definition, {})
40
41
  log.debug "create table", project_id: project, dataset: dataset, table: table_id
@@ -318,6 +319,16 @@ module Fluent
318
319
  end
319
320
  end
320
321
 
322
+ def require_partition_filter
323
+ return @require_partition_filter if instance_variable_defined?(:@require_partition_filter)
324
+
325
+ if @options[:require_partition_filter]
326
+ @require_partition_filter = @options[:require_partition_filter]
327
+ else
328
+ @require_partition_filter
329
+ end
330
+ end
331
+
321
332
  def clustering
322
333
  return @clustering if instance_variable_defined?(:@clustering)
323
334
 
@@ -111,9 +111,6 @@ module Fluent
111
111
  if @schema
112
112
  @table_schema.load_schema(@schema)
113
113
  end
114
- if @schema_path
115
- @table_schema.load_schema(MultiJson.load(File.read(@schema_path)))
116
- end
117
114
 
118
115
  formatter_config = conf.elements("format")[0]
119
116
  @formatter = formatter_create(usage: 'out_bigquery_for_insert', default_type: 'json', conf: formatter_config)
@@ -126,6 +123,7 @@ module Fluent
126
123
  @tables_mutex = Mutex.new
127
124
  @fetched_schemas = {}
128
125
  @last_fetch_schema_time = Hash.new(0)
126
+ @read_schemas = {}
129
127
  end
130
128
 
131
129
  def multi_workers_ready?
@@ -148,6 +146,7 @@ module Fluent
148
146
  time_partitioning_type: @time_partitioning_type,
149
147
  time_partitioning_field: @time_partitioning_field,
150
148
  time_partitioning_expiration: @time_partitioning_expiration,
149
+ require_partition_filter: @require_partition_filter,
151
150
  clustering_fields: @clustering_fields,
152
151
  timeout_sec: @request_timeout_sec,
153
152
  open_timeout_sec: @request_open_timeout_sec,
@@ -161,6 +160,8 @@ module Fluent
161
160
  schema =
162
161
  if @fetch_schema
163
162
  fetch_schema(meta)
163
+ elsif @schema_path
164
+ read_schema(meta)
164
165
  else
165
166
  @table_schema
166
167
  end
@@ -209,9 +210,26 @@ module Fluent
209
210
  extract_placeholders(@fetch_schema_table || @tablelist[0], metadata)
210
211
  end
211
212
 
213
+ def read_schema(metadata)
214
+ schema_path = read_schema_target_path(metadata)
215
+
216
+ unless @read_schemas[schema_path]
217
+ table_schema = Fluent::BigQuery::RecordSchema.new("record")
218
+ table_schema.load_schema(MultiJson.load(File.read(schema_path)))
219
+ @read_schemas[schema_path] = table_schema
220
+ end
221
+ @read_schemas[schema_path]
222
+ end
223
+
224
+ def read_schema_target_path(metadata)
225
+ extract_placeholders(@schema_path, metadata)
226
+ end
227
+
212
228
  def get_schema(project, dataset, metadata)
213
229
  if @fetch_schema
214
230
  @fetched_schemas["#{project}.#{dataset}.#{fetch_schema_target_table(metadata)}"] || fetch_schema(metadata)
231
+ elsif @schema_path
232
+ @read_schemas[read_schema_target_path(metadata)] || read_schema(metadata)
215
233
  else
216
234
  @table_schema
217
235
  end
@@ -29,6 +29,9 @@ module Fluent
29
29
  # If insert_id_field is not specified, true means to allow duplicate rows
30
30
  config_param :allow_retry_insert_errors, :bool, default: false
31
31
 
32
+ ## RequirePartitionFilter
33
+ config_param :require_partition_filter, :bool, default: false
34
+
32
35
  ## Buffer
33
36
  config_section :buffer do
34
37
  config_set_default :@type, "memory"
@@ -147,33 +147,6 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
147
147
  assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
148
148
  end
149
149
 
150
- def test_configure_auth_json_key_as_file_raise_permission_error
151
- json_key_path = 'test/plugin/testdata/json_key.json'
152
- json_key_path_dir = File.dirname(json_key_path)
153
-
154
- begin
155
- File.chmod(0000, json_key_path_dir)
156
-
157
- driver = create_driver(%[
158
- table foo
159
- auth_method json_key
160
- json_key #{json_key_path}
161
- project yourproject_id
162
- dataset yourdataset_id
163
- schema [
164
- {"name": "time", "type": "INTEGER"},
165
- {"name": "status", "type": "INTEGER"},
166
- {"name": "bytes", "type": "INTEGER"}
167
- ]
168
- ])
169
- assert_raises(Errno::EACCES) do
170
- driver.instance.writer.client
171
- end
172
- ensure
173
- File.chmod(0755, json_key_path_dir)
174
- end
175
- end
176
-
177
150
  def test_configure_auth_json_key_as_string
178
151
  json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
179
152
  json_key_io = StringIO.new(json_key)
@@ -199,6 +172,8 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
199
172
  end
200
173
 
201
174
  def test_configure_auth_application_default
175
+ omit "This testcase depends on some environment variables." if ENV["CI"] == "true"
176
+
202
177
  driver = create_driver(%[
203
178
  table foo
204
179
  auth_method application_default
@@ -576,4 +551,24 @@ class BigQueryBaseOutputTest < Test::Unit::TestCase
576
551
  assert_equal :string, table_schema["argv"].type
577
552
  assert_equal :repeated, table_schema["argv"].mode
578
553
  end
554
+
555
+ def test_resolve_schema_path_with_placeholder
556
+ now = Time.now.to_i
557
+ driver = create_driver(<<-CONFIG)
558
+ table ${tag}_%Y%m%d
559
+ auth_method json_key
560
+ json_key jsonkey.josn
561
+ project yourproject_id
562
+ dataset yourdataset_id
563
+ schema_path ${tag}.schema
564
+
565
+ <buffer tag, time>
566
+ timekey 1d
567
+ </buffer>
568
+ CONFIG
569
+
570
+ metadata = Fluent::Plugin::Buffer::Metadata.new(now, "foo", {})
571
+
572
+ assert_equal "foo.schema", driver.instance.read_schema_target_path(metadata)
573
+ end
579
574
  end
@@ -5,6 +5,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
5
5
  Fluent::Test.setup
6
6
  end
7
7
 
8
+ SCHEMA_PATH = File.join(File.dirname(__FILE__), "testdata", "apache.schema")
9
+
8
10
  CONFIG = %[
9
11
  table foo
10
12
  email foo@bar.example
@@ -260,7 +262,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
260
262
 
261
263
  driver.instance_start
262
264
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
263
- metadata = driver.instance.metadata_for_test(tag, time, record)
265
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
264
266
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
265
267
  c.append([driver.instance.format(tag, time, record)])
266
268
  end
@@ -344,6 +346,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
344
346
  schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
345
347
  CONFIG
346
348
 
349
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
350
+
347
351
  stub_writer do |writer|
348
352
  body = {
349
353
  rows: [{json: Fluent::BigQuery::Helper.deep_symbolize_keys(message)}],
@@ -360,7 +364,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
360
364
  table_id: 'foo',
361
365
  },
362
366
  schema: {
363
- fields: driver.instance.instance_variable_get(:@table_schema).to_a,
367
+ fields: schema_fields,
364
368
  },
365
369
  }, {})
366
370
  end
@@ -416,8 +420,12 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
416
420
  time_partitioning_type day
417
421
  time_partitioning_field time
418
422
  time_partitioning_expiration 1h
423
+
424
+ require_partition_filter true
419
425
  CONFIG
420
426
 
427
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
428
+
421
429
  stub_writer do |writer|
422
430
  body = {
423
431
  rows: [message],
@@ -434,13 +442,14 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
434
442
  table_id: 'foo',
435
443
  },
436
444
  schema: {
437
- fields: driver.instance.instance_variable_get(:@table_schema).to_a,
445
+ fields: schema_fields,
438
446
  },
439
447
  time_partitioning: {
440
448
  type: 'DAY',
441
449
  field: 'time',
442
450
  expiration_ms: 3600000,
443
451
  },
452
+ require_partition_filter: true,
444
453
  }, {})
445
454
  end
446
455
 
@@ -495,7 +504,6 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
495
504
  time_partitioning_type day
496
505
  time_partitioning_field time
497
506
  time_partitioning_expiration 1h
498
- time_partitioning_require_partition_filter true
499
507
 
500
508
  clustering_fields [
501
509
  "time",
@@ -503,6 +511,8 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
503
511
  ]
504
512
  CONFIG
505
513
 
514
+ schema_fields = Fluent::BigQuery::Helper.deep_symbolize_keys(MultiJson.load(File.read(SCHEMA_PATH)))
515
+
506
516
  stub_writer do |writer|
507
517
  body = {
508
518
  rows: [message],
@@ -519,7 +529,7 @@ class BigQueryInsertOutputTest < Test::Unit::TestCase
519
529
  table_id: 'foo',
520
530
  },
521
531
  schema: {
522
- fields: driver.instance.instance_variable_get(:@table_schema).to_a,
532
+ fields: schema_fields,
523
533
  },
524
534
  time_partitioning: {
525
535
  type: 'DAY',
@@ -132,7 +132,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
132
132
 
133
133
  driver.instance_start
134
134
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
135
- metadata = driver.instance.metadata_for_test(tag, time, record)
135
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
136
136
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
137
137
  c.append([driver.instance.format(tag, time, record)])
138
138
  end
@@ -158,7 +158,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
158
158
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
159
159
  end
160
160
 
161
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
161
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
162
162
  stub! do |s|
163
163
  s.id { 'dummy_job_id' }
164
164
  s.configuration.stub! do |_s|
@@ -215,7 +215,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
215
215
 
216
216
  driver.instance_start
217
217
  tag, time, record = "tag", Time.now.to_i, {"a" => "b"}
218
- metadata = driver.instance.metadata_for_test(tag, time, record)
218
+ metadata = Fluent::Plugin::Buffer::Metadata.new(tag, time, record)
219
219
  chunk = driver.instance.buffer.generate_chunk(metadata).tap do |c|
220
220
  c.append([driver.instance.format(tag, time, record)])
221
221
  end
@@ -241,7 +241,7 @@ class BigQueryLoadOutputTest < Test::Unit::TestCase
241
241
  stub!.job_reference.stub!.job_id { "dummy_job_id" }
242
242
  end
243
243
 
244
- mock(writer.client).get_job('yourproject_id', 'dummy_job_id', {:location=>nil}) do
244
+ mock(writer.client).get_job('yourproject_id', 'dummy_job_id', :location=>nil) do
245
245
  stub! do |s|
246
246
  s.id { 'dummy_job_id' }
247
247
  s.configuration.stub! do |_s|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-08-20 00:00:00.000000000 Z
12
+ date: 2022-02-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -139,8 +139,9 @@ extensions: []
139
139
  extra_rdoc_files: []
140
140
  files:
141
141
  - ".github/ISSUE_TEMPLATE.md"
142
+ - ".github/workflows/linux.yml"
143
+ - ".github/workflows/windows.yml"
142
144
  - ".gitignore"
143
- - ".travis.yml"
144
145
  - Gemfile
145
146
  - LICENSE.txt
146
147
  - README.md
@@ -183,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
184
  - !ruby/object:Gem::Version
184
185
  version: '0'
185
186
  requirements: []
186
- rubygems_version: 3.0.3
187
+ rubygems_version: 3.1.4
187
188
  signing_key:
188
189
  specification_version: 4
189
190
  summary: Fluentd plugin to store data on Google BigQuery
data/.travis.yml DELETED
@@ -1,14 +0,0 @@
1
- language: ruby
2
-
3
- rvm:
4
- - 2.3.7
5
- - 2.4.4
6
- - 2.5.1
7
-
8
- gemfile:
9
- - Gemfile
10
-
11
- before_install:
12
- - gem update bundler
13
-
14
- script: bundle exec rake test