fluent-plugin-bigquery-test 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +16 -0
- data/.gitignore +21 -0
- data/.travis.yml +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +13 -0
- data/README.md +602 -0
- data/Rakefile +12 -0
- data/fluent-plugin-bigquery.gemspec +29 -0
- data/gemfiles/activesupport-4.gemfile +6 -0
- data/lib/fluent/plugin/bigquery/errors.rb +84 -0
- data/lib/fluent/plugin/bigquery/helper.rb +33 -0
- data/lib/fluent/plugin/bigquery/schema.rb +281 -0
- data/lib/fluent/plugin/bigquery/version.rb +5 -0
- data/lib/fluent/plugin/bigquery/writer.rb +356 -0
- data/lib/fluent/plugin/out_bigquery_base.rb +221 -0
- data/lib/fluent/plugin/out_bigquery_insert.rb +125 -0
- data/lib/fluent/plugin/out_bigquery_load.rb +221 -0
- data/test/helper.rb +20 -0
- data/test/plugin/test_out_bigquery_base.rb +579 -0
- data/test/plugin/test_out_bigquery_insert.rb +544 -0
- data/test/plugin/test_out_bigquery_load.rb +348 -0
- data/test/plugin/test_record_schema.rb +186 -0
- data/test/plugin/testdata/apache.schema +98 -0
- data/test/plugin/testdata/json_key.json +7 -0
- data/test/plugin/testdata/sudo.schema +27 -0
- data/test/run_test.rb +9 -0
- metadata +197 -0
data/Rakefile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
|
4
|
+
require 'rake/testtask'
|
5
|
+
Rake::TestTask.new(:test) do |test|
|
6
|
+
test.libs << 'lib' << 'test'
|
7
|
+
test.pattern = 'test/**/test_*.rb'
|
8
|
+
test.verbose = true
|
9
|
+
test.warning = false
|
10
|
+
end
|
11
|
+
|
12
|
+
task :default => :test
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'fluent/plugin/bigquery/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "fluent-plugin-bigquery-test"
|
8
|
+
spec.version = Fluent::BigQueryPlugin::VERSION
|
9
|
+
spec.authors = ["dhayakawa"]
|
10
|
+
spec.description = %q{Fluentd plugin to store data on Google BigQuery, by load, or by stream inserts}
|
11
|
+
spec.summary = %q{Fluentd plugin to store data on Google BigQuery}
|
12
|
+
spec.homepage = "https://github.com/kaizenplatform/fluent-plugin-bigquery"
|
13
|
+
spec.license = "Apache-2.0"
|
14
|
+
|
15
|
+
spec.files = `git ls-files`.split($/)
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "rake"
|
21
|
+
spec.add_development_dependency "rr"
|
22
|
+
spec.add_development_dependency "test-unit"
|
23
|
+
spec.add_development_dependency "test-unit-rr"
|
24
|
+
|
25
|
+
spec.add_runtime_dependency "google-api-client", ">= 0.49.0"
|
26
|
+
spec.add_runtime_dependency "googleauth", ">= 0.5.0"
|
27
|
+
spec.add_runtime_dependency "multi_json"
|
28
|
+
spec.add_runtime_dependency "fluentd", ">= 0.14.0", "< 2"
|
29
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Fluent
|
2
|
+
module BigQuery
|
3
|
+
# @abstract
|
4
|
+
class Error < StandardError
|
5
|
+
RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
|
6
|
+
RETRYABLE_INSERT_ERRORS_REASON = %w(timeout backendError internalError rateLimitExceeded).freeze
|
7
|
+
RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
|
8
|
+
|
9
|
+
class << self
|
10
|
+
# @param e [Google::Apis::Error]
|
11
|
+
# @param message [String]
|
12
|
+
def wrap(e, message = nil)
|
13
|
+
if retryable_error?(e)
|
14
|
+
RetryableError.new(message, e)
|
15
|
+
else
|
16
|
+
UnRetryableError.new(message, e)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param e [Google::Apis::Error]
|
21
|
+
def retryable_error?(e)
|
22
|
+
e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code)
|
23
|
+
end
|
24
|
+
|
25
|
+
def retryable_error_reason?(reason)
|
26
|
+
RETRYABLE_ERROR_REASON.include?(reason)
|
27
|
+
end
|
28
|
+
|
29
|
+
def retryable_insert_errors_reason?(reason)
|
30
|
+
RETRYABLE_INSERT_ERRORS_REASON.include?(reason)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Guard for instantiation
|
34
|
+
private :new
|
35
|
+
def inherited(subclass)
|
36
|
+
subclass.class_eval do
|
37
|
+
class << self
|
38
|
+
public :new
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
attr_reader :origin
|
45
|
+
|
46
|
+
def initialize(message, origin = nil)
|
47
|
+
@origin = origin
|
48
|
+
super(message || origin.message)
|
49
|
+
end
|
50
|
+
|
51
|
+
def method_missing(name, *args)
|
52
|
+
if @origin
|
53
|
+
@origin.send(name, *args)
|
54
|
+
else
|
55
|
+
super
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def reason
|
60
|
+
@origin && @origin.respond_to?(:reason) ? @origin.reason : nil
|
61
|
+
end
|
62
|
+
|
63
|
+
def status_code
|
64
|
+
@origin && @origin.respond_to?(:status_code) ? @origin.status_code : nil
|
65
|
+
end
|
66
|
+
|
67
|
+
def body
|
68
|
+
@origin && @origin.respond_to?(:body) ? @origin.body : nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def retryable?
|
72
|
+
false
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
class UnRetryableError < Error; end
|
77
|
+
|
78
|
+
class RetryableError < Error
|
79
|
+
def retryable?
|
80
|
+
true
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Fluent
|
2
|
+
module BigQuery
|
3
|
+
module Helper
|
4
|
+
class << self
|
5
|
+
def deep_symbolize_keys(object)
|
6
|
+
case object
|
7
|
+
when Hash
|
8
|
+
object.each_with_object({}) do |(key, value), result|
|
9
|
+
result[key.to_sym] = deep_symbolize_keys(value)
|
10
|
+
end
|
11
|
+
when Array
|
12
|
+
object.map {|e| deep_symbolize_keys(e) }
|
13
|
+
else
|
14
|
+
object
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def deep_stringify_keys(object)
|
19
|
+
case object
|
20
|
+
when Hash
|
21
|
+
object.each_with_object({}) do |(key, value), result|
|
22
|
+
result[key.to_s] = deep_stringify_keys(value)
|
23
|
+
end
|
24
|
+
when Array
|
25
|
+
object.map {|e| deep_stringify_keys(e) }
|
26
|
+
else
|
27
|
+
object
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,281 @@
|
|
1
|
+
require 'multi_json'
|
2
|
+
|
3
|
+
module Fluent
|
4
|
+
module BigQuery
|
5
|
+
class FieldSchema
|
6
|
+
def initialize(name, mode = :nullable)
|
7
|
+
unless [:nullable, :required, :repeated].include?(mode)
|
8
|
+
raise ConfigError, "Unrecognized mode for #{name}: #{mode}"
|
9
|
+
end
|
10
|
+
### https://developers.google.com/bigquery/docs/tables
|
11
|
+
# Each field has the following properties:
|
12
|
+
#
|
13
|
+
# name - The name must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_),
|
14
|
+
# and must start with a letter or underscore. The maximum length is 128 characters.
|
15
|
+
# https://cloud.google.com/bigquery/docs/reference/v2/tables#schema.fields.name
|
16
|
+
unless name =~ /^[_A-Za-z][_A-Za-z0-9]{,127}$/
|
17
|
+
raise ConfigError, "invalid bigquery field name: '#{name}'"
|
18
|
+
end
|
19
|
+
|
20
|
+
@name = name
|
21
|
+
@mode = mode
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_reader :name, :mode
|
25
|
+
|
26
|
+
def format(value)
|
27
|
+
case @mode
|
28
|
+
when :nullable
|
29
|
+
format_one(value) unless value.nil?
|
30
|
+
when :required
|
31
|
+
if value.nil?
|
32
|
+
log.warn "Required field #{name} cannot be null"
|
33
|
+
nil
|
34
|
+
else
|
35
|
+
format_one(value)
|
36
|
+
end
|
37
|
+
when :repeated
|
38
|
+
value.nil? ? [] : value.each_with_object([]) { |v, arr| arr << format_one(v) if v }
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def format_one(value)
|
43
|
+
raise NotImplementedError, "Must implement in a subclass"
|
44
|
+
end
|
45
|
+
|
46
|
+
def to_h
|
47
|
+
{
|
48
|
+
:name => name,
|
49
|
+
:type => type.to_s.upcase,
|
50
|
+
:mode => mode.to_s.upcase,
|
51
|
+
}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class StringFieldSchema < FieldSchema
|
56
|
+
def type
|
57
|
+
:string
|
58
|
+
end
|
59
|
+
|
60
|
+
def format_one(value)
|
61
|
+
if value.is_a?(Hash) || value.is_a?(Array)
|
62
|
+
MultiJson.dump(value)
|
63
|
+
else
|
64
|
+
value.to_s
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class IntegerFieldSchema < FieldSchema
|
70
|
+
def type
|
71
|
+
:integer
|
72
|
+
end
|
73
|
+
|
74
|
+
def format_one(value)
|
75
|
+
value.to_i
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class FloatFieldSchema < FieldSchema
|
80
|
+
def type
|
81
|
+
:float
|
82
|
+
end
|
83
|
+
|
84
|
+
def format_one(value)
|
85
|
+
value.to_f
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
class NumericFieldSchema < FieldSchema
|
90
|
+
def type
|
91
|
+
:numeric
|
92
|
+
end
|
93
|
+
|
94
|
+
def format_one(value)
|
95
|
+
value.to_s
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
class BooleanFieldSchema < FieldSchema
|
100
|
+
def type
|
101
|
+
:boolean
|
102
|
+
end
|
103
|
+
|
104
|
+
def format_one(value)
|
105
|
+
!!value
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class TimestampFieldSchema < FieldSchema
|
110
|
+
INTEGER_REGEXP = /\A-?[[:digit:]]+\z/.freeze
|
111
|
+
FLOAT_REGEXP = /\A-?[[:digit:]]+(\.[[:digit:]]+)\z/.freeze
|
112
|
+
|
113
|
+
def type
|
114
|
+
:timestamp
|
115
|
+
end
|
116
|
+
|
117
|
+
def format_one(value)
|
118
|
+
case value
|
119
|
+
when Time
|
120
|
+
value.strftime("%Y-%m-%d %H:%M:%S.%6L %:z")
|
121
|
+
when String
|
122
|
+
if value =~ INTEGER_REGEXP
|
123
|
+
value.to_i
|
124
|
+
elsif value =~ FLOAT_REGEXP
|
125
|
+
value.to_f
|
126
|
+
else
|
127
|
+
value
|
128
|
+
end
|
129
|
+
else
|
130
|
+
value
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
class DateFieldSchema < FieldSchema
|
136
|
+
def type
|
137
|
+
:date
|
138
|
+
end
|
139
|
+
|
140
|
+
def format_one(value)
|
141
|
+
if value.respond_to?(:strftime)
|
142
|
+
value.strftime("%Y-%m-%d")
|
143
|
+
else
|
144
|
+
value
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
class DateTimeFieldSchema < FieldSchema
|
150
|
+
def type
|
151
|
+
:datetime
|
152
|
+
end
|
153
|
+
|
154
|
+
def format_one(value)
|
155
|
+
if value.respond_to?(:strftime)
|
156
|
+
value.strftime("%Y-%m-%dT%H:%M:%S.%6L")
|
157
|
+
else
|
158
|
+
value
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
class TimeFieldSchema < FieldSchema
|
164
|
+
def type
|
165
|
+
:time
|
166
|
+
end
|
167
|
+
|
168
|
+
def format_one(value)
|
169
|
+
if value.respond_to?(:strftime)
|
170
|
+
value.strftime("%H:%M:%S.%6L")
|
171
|
+
else
|
172
|
+
value
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
class RecordSchema < FieldSchema
|
178
|
+
FIELD_TYPES = {
|
179
|
+
string: StringFieldSchema,
|
180
|
+
integer: IntegerFieldSchema,
|
181
|
+
float: FloatFieldSchema,
|
182
|
+
numeric: NumericFieldSchema,
|
183
|
+
boolean: BooleanFieldSchema,
|
184
|
+
timestamp: TimestampFieldSchema,
|
185
|
+
date: DateFieldSchema,
|
186
|
+
datetime: DateTimeFieldSchema,
|
187
|
+
time: TimeFieldSchema,
|
188
|
+
record: RecordSchema
|
189
|
+
}.freeze
|
190
|
+
|
191
|
+
def initialize(name, mode = :nullable)
|
192
|
+
super(name, mode)
|
193
|
+
@fields = {}
|
194
|
+
end
|
195
|
+
|
196
|
+
def type
|
197
|
+
:record
|
198
|
+
end
|
199
|
+
|
200
|
+
def [](name)
|
201
|
+
@fields[name]
|
202
|
+
end
|
203
|
+
|
204
|
+
def empty?
|
205
|
+
@fields.empty?
|
206
|
+
end
|
207
|
+
|
208
|
+
def to_a
|
209
|
+
@fields.map do |_, field_schema|
|
210
|
+
field_schema.to_h
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
def to_h
|
215
|
+
{
|
216
|
+
:name => name,
|
217
|
+
:type => type.to_s.upcase,
|
218
|
+
:mode => mode.to_s.upcase,
|
219
|
+
:fields => self.to_a,
|
220
|
+
}
|
221
|
+
end
|
222
|
+
|
223
|
+
def load_schema(schema)
|
224
|
+
schema.each do |field|
|
225
|
+
raise ConfigError, 'field must have type' unless field.key?('type')
|
226
|
+
|
227
|
+
name = field['name']
|
228
|
+
mode = (field['mode'] || 'nullable').downcase.to_sym
|
229
|
+
|
230
|
+
type = field['type'].downcase.to_sym
|
231
|
+
field_schema_class = FIELD_TYPES[type]
|
232
|
+
raise ConfigError, "Invalid field type: #{field['type']}" unless field_schema_class
|
233
|
+
|
234
|
+
field_schema = field_schema_class.new(name, mode)
|
235
|
+
@fields[name] = field_schema
|
236
|
+
if type == :record
|
237
|
+
raise ConfigError, "record field must have fields" unless field.key?('fields')
|
238
|
+
field_schema.load_schema(field['fields'])
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def register_field(name, type)
|
244
|
+
if @fields.key?(name) and @fields[name].type != :timestamp
|
245
|
+
raise ConfigError, "field #{name} is registered twice"
|
246
|
+
end
|
247
|
+
if name[/\./]
|
248
|
+
recordname = $`
|
249
|
+
fieldname = $'
|
250
|
+
register_record_field(recordname)
|
251
|
+
@fields[recordname].register_field(fieldname, type)
|
252
|
+
else
|
253
|
+
schema = FIELD_TYPES[type]
|
254
|
+
raise ConfigError, "[Bug] Invalid field type #{type}" unless schema
|
255
|
+
@fields[name] = schema.new(name)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
def format_one(record)
|
260
|
+
out = {}
|
261
|
+
record.each do |key, value|
|
262
|
+
next if value.nil?
|
263
|
+
schema = @fields[key]
|
264
|
+
out[key] = schema ? schema.format(value) : value
|
265
|
+
end
|
266
|
+
out
|
267
|
+
end
|
268
|
+
|
269
|
+
private
|
270
|
+
def register_record_field(name)
|
271
|
+
if !@fields.key?(name)
|
272
|
+
@fields[name] = RecordSchema.new(name)
|
273
|
+
else
|
274
|
+
unless @fields[name].kind_of?(RecordSchema)
|
275
|
+
raise ConfigError, "field #{name} is required to be a record but already registered as #{@field[name]}"
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|