fluent-plugin-bigquery 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bc6de961c8c42fddf3d9e297e93db560d16cfc098161232c90ee64f0a5679fee
4
- data.tar.gz: 5ec1fee690f77d0fa25d8e427c6ad354cdfdbfafe30a4aee4fea9a5e73db5eb3
3
+ metadata.gz: 52e15b9cc1e5fba553895298e0e1a4510b2c3be0e333a8c9853ef8fb9a30e721
4
+ data.tar.gz: 9be1a5a48e75f63bd83c103111664690a7e8fa583dfda548a7da2dfd3437960f
5
5
  SHA512:
6
- metadata.gz: 20fc96d420611a0d12f7cb34656ae87872f24131c70039383a8f8b7d51048a7d4f277a80675f2bee834113fd13d2a9780b772b517f2140481f7fb86ce63f24e3
7
- data.tar.gz: cecc8f8682761ddfb22d942b69103823cc728923f6d7043d967254ed02c754db4e792132769f7f3aa91986aa27895ac83bf16358be21e03d3c94e77c43975231
6
+ metadata.gz: 8fd48a77fa9cf4b04706c4c3d041aa36ccc5011024fd6b37287c7ac661d0137458940e832410ae14a2385d77a0370908a22a6e856cbc9de4194da5a0866691ff
7
+ data.tar.gz: aff96e78358ced9a0a213739e8968bc4caa65afa1915ba4bc1a4660161978418ced12dbdec539ef960967f628c8893fd821db28ffe4e4401fe22010e200934ee
@@ -4,6 +4,14 @@
4
4
  bind 0.0.0.0
5
5
  </source>
6
6
 
7
+ <source>
8
+ @type dummy
9
+ dummy {"json_field": {"foo": "val1", "bar": "val2", "hoge": 1}, "geography_field": {"type": "LineString", "coordinates": [[-118.4085, 33.9416], [-73.7781, 40.6413]]}, "timestamp_field": "2022-12-15T22:40:21+09:00", "date": "2022-12-15", "record_field": {"inner_field": "hoge", "inner_json": {"key1": "val1", "key2": "val2"}}, "repeated_string_field": ["a", "b", "c"]}
10
+ auto_increment_key id
11
+
12
+ tag insert_data
13
+ </source>
14
+
7
15
  <match insert_data>
8
16
  @id bigquery-insert-integration
9
17
  @type bigquery_insert
@@ -21,7 +29,7 @@
21
29
  total_limit_size 1g
22
30
  path ./log/bigquery-insert-integration
23
31
 
24
- flush_interval 30
32
+ flush_interval 15
25
33
  flush_thread_count 4
26
34
  flush_at_shutdown true
27
35
 
@@ -37,6 +45,7 @@
37
45
  dataset "#{ENV["DATASET_NAME"]}"
38
46
  table "#{ENV["TABLE_NAME"]}"
39
47
  auto_create_table false
48
+ # schema_path integration/schema.json
40
49
  fetch_schema true
41
50
  fetch_schema_table "#{ENV["TABLE_NAME"]}"
42
51
 
@@ -78,6 +87,7 @@
78
87
  dataset "#{ENV["DATASET_NAME"]}"
79
88
  table "#{ENV["TABLE_NAME"]}"
80
89
  auto_create_table false
90
+ # schema_path integration/schema.json
81
91
  fetch_schema true
82
92
  fetch_schema_table "#{ENV["TABLE_NAME"]}"
83
93
 
@@ -9,6 +9,16 @@
9
9
  "type": "STRING",
10
10
  "mode": "NULLABLE"
11
11
  },
12
+ {
13
+ "name": "json_field",
14
+ "type": "JSON",
15
+ "mode": "NULLABLE"
16
+ },
17
+ {
18
+ "name": "geography_field",
19
+ "type": "GEOGRAPHY",
20
+ "mode": "NULLABLE"
21
+ },
12
22
  {
13
23
  "name": "timestamp_field",
14
24
  "type": "TIMESTAMP",
@@ -18,5 +28,27 @@
18
28
  "name": "date",
19
29
  "type": "DATE",
20
30
  "mode": "REQUIRED"
31
+ },
32
+ {
33
+ "name": "record_field",
34
+ "type": "RECORD",
35
+ "mode": "NULLABLE",
36
+ "fields": [
37
+ {
38
+ "name": "inner_field",
39
+ "type": "STRING",
40
+ "mode": "REQUIRED"
41
+ },
42
+ {
43
+ "name": "inner_json",
44
+ "type": "JSON",
45
+ "mode": "REQUIRED"
46
+ }
47
+ ]
48
+ },
49
+ {
50
+ "name": "repeated_string_field",
51
+ "type": "STRING",
52
+ "mode": "REPEATED"
21
53
  }
22
54
  ]
@@ -23,23 +23,23 @@ module Fluent
23
23
 
24
24
  attr_reader :name, :mode
25
25
 
26
- def format(value)
26
+ def format(value, is_load: false)
27
27
  case @mode
28
28
  when :nullable
29
- format_one(value) unless value.nil?
29
+ format_one(value, is_load: is_load) unless value.nil?
30
30
  when :required
31
31
  if value.nil?
32
32
  log.warn "Required field #{name} cannot be null"
33
33
  nil
34
34
  else
35
- format_one(value)
35
+ format_one(value, is_load: is_load)
36
36
  end
37
37
  when :repeated
38
- value.nil? ? [] : value.each_with_object([]) { |v, arr| arr << format_one(v) if v }
38
+ value.nil? ? [] : value.each_with_object([]) { |v, arr| arr << format_one(v, is_load: true) if v }
39
39
  end
40
40
  end
41
41
 
42
- def format_one(value)
42
+ def format_one(value, is_load: false)
43
43
  raise NotImplementedError, "Must implement in a subclass"
44
44
  end
45
45
 
@@ -57,7 +57,7 @@ module Fluent
57
57
  :string
58
58
  end
59
59
 
60
- def format_one(value)
60
+ def format_one(value, is_load: false)
61
61
  if value.is_a?(Hash) || value.is_a?(Array)
62
62
  MultiJson.dump(value)
63
63
  else
@@ -66,12 +66,32 @@ module Fluent
66
66
  end
67
67
  end
68
68
 
69
+ class JsonFieldSchema < FieldSchema
70
+ def type
71
+ :json
72
+ end
73
+
74
+ def format_one(value, is_load: false)
75
+ if is_load
76
+ value
77
+ else
78
+ MultiJson.dump(value)
79
+ end
80
+ end
81
+ end
82
+
83
+ class GeographyFieldSchema < StringFieldSchema
84
+ def type
85
+ :geography
86
+ end
87
+ end
88
+
69
89
  class IntegerFieldSchema < FieldSchema
70
90
  def type
71
91
  :integer
72
92
  end
73
93
 
74
- def format_one(value)
94
+ def format_one(value, is_load: false)
75
95
  value.to_i
76
96
  end
77
97
  end
@@ -81,7 +101,7 @@ module Fluent
81
101
  :float
82
102
  end
83
103
 
84
- def format_one(value)
104
+ def format_one(value, is_load: false)
85
105
  value.to_f
86
106
  end
87
107
  end
@@ -91,7 +111,7 @@ module Fluent
91
111
  :numeric
92
112
  end
93
113
 
94
- def format_one(value)
114
+ def format_one(value, is_load: false)
95
115
  value.to_s
96
116
  end
97
117
  end
@@ -101,7 +121,7 @@ module Fluent
101
121
  :boolean
102
122
  end
103
123
 
104
- def format_one(value)
124
+ def format_one(value, is_load: false)
105
125
  !!value
106
126
  end
107
127
  end
@@ -114,7 +134,7 @@ module Fluent
114
134
  :timestamp
115
135
  end
116
136
 
117
- def format_one(value)
137
+ def format_one(value, is_load: false)
118
138
  case value
119
139
  when Time
120
140
  value.strftime("%Y-%m-%d %H:%M:%S.%6L %:z")
@@ -137,7 +157,7 @@ module Fluent
137
157
  :date
138
158
  end
139
159
 
140
- def format_one(value)
160
+ def format_one(value, is_load: false)
141
161
  if value.respond_to?(:strftime)
142
162
  value.strftime("%Y-%m-%d")
143
163
  else
@@ -151,7 +171,7 @@ module Fluent
151
171
  :datetime
152
172
  end
153
173
 
154
- def format_one(value)
174
+ def format_one(value, is_load: false)
155
175
  if value.respond_to?(:strftime)
156
176
  value.strftime("%Y-%m-%dT%H:%M:%S.%6L")
157
177
  else
@@ -165,7 +185,7 @@ module Fluent
165
185
  :time
166
186
  end
167
187
 
168
- def format_one(value)
188
+ def format_one(value, is_load: false)
169
189
  if value.respond_to?(:strftime)
170
190
  value.strftime("%H:%M:%S.%6L")
171
191
  else
@@ -185,6 +205,8 @@ module Fluent
185
205
  date: DateFieldSchema,
186
206
  datetime: DateTimeFieldSchema,
187
207
  time: TimeFieldSchema,
208
+ json: JsonFieldSchema,
209
+ geography: GeographyFieldSchema,
188
210
  record: RecordSchema
189
211
  }.freeze
190
212
 
@@ -256,12 +278,12 @@ module Fluent
256
278
  end
257
279
  end
258
280
 
259
- def format_one(record)
281
+ def format_one(record, is_load: false)
260
282
  out = {}
261
283
  record.each do |key, value|
262
284
  next if value.nil?
263
285
  schema = @fields[key]
264
- out[key] = schema ? schema.format(value) : value
286
+ out[key] = schema ? schema.format(value, is_load: is_load) : value
265
287
  end
266
288
  out
267
289
  end
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "3.0.0".freeze
3
+ VERSION = "3.1.0".freeze
4
4
  end
5
5
  end
@@ -347,7 +347,7 @@ module Fluent
347
347
  if try_count == 1
348
348
  # Table Not Found: Auto Create Table
349
349
  create_table(project, dataset, table_id, schema)
350
- elsif try_count > 10
350
+ elsif try_count > 60 # timeout in about 300 seconds
351
351
  raise "A new table was created but it is not found."
352
352
  end
353
353
 
@@ -172,7 +172,7 @@ module Fluent
172
172
  end
173
173
 
174
174
  begin
175
- row = schema.format(record)
175
+ row = schema.format(record, is_load: !!@is_load)
176
176
  return if row.empty?
177
177
  @formatter.format(tag, time, row)
178
178
  rescue
@@ -46,6 +46,7 @@ module Fluent
46
46
 
47
47
  def configure(conf)
48
48
  super
49
+ @is_load = false
49
50
 
50
51
  if @insert_id_field
51
52
  if @insert_id_field !~ /^\$[\[\.]/ && @insert_id_field =~ /\./
@@ -36,6 +36,7 @@ module Fluent
36
36
 
37
37
  def configure(conf)
38
38
  super
39
+ @is_load = true
39
40
 
40
41
  placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}"
41
42
  placeholder_validate!(:bigquery_load, placeholder_params)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-10-05 00:00:00.000000000 Z
12
+ date: 2022-12-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake