fluent-plugin-bigquery-test 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rake/testtask'
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'lib' << 'test'
7
+ test.pattern = 'test/**/test_*.rb'
8
+ test.verbose = true
9
+ test.warning = false
10
+ end
11
+
12
+ task :default => :test
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fluent/plugin/bigquery/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fluent-plugin-bigquery-test"
8
+ spec.version = Fluent::BigQueryPlugin::VERSION
9
+ spec.authors = ["dhayakawa"]
10
+ spec.description = %q{Fluentd plugin to store data on Google BigQuery, by load, or by stream inserts}
11
+ spec.summary = %q{Fluentd plugin to store data on Google BigQuery}
12
+ spec.homepage = "https://github.com/kaizenplatform/fluent-plugin-bigquery"
13
+ spec.license = "Apache-2.0"
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "rake"
21
+ spec.add_development_dependency "rr"
22
+ spec.add_development_dependency "test-unit"
23
+ spec.add_development_dependency "test-unit-rr"
24
+
25
+ spec.add_runtime_dependency "google-api-client", ">= 0.49.0"
26
+ spec.add_runtime_dependency "googleauth", ">= 0.5.0"
27
+ spec.add_runtime_dependency "multi_json"
28
+ spec.add_runtime_dependency "fluentd", ">= 0.14.0", "< 2"
29
+ end
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'activesupport', '< 5'
4
+
5
+ # Specify your gem's dependencies in fluent-plugin-bigquery.gemspec
6
+ gemspec :path => '../'
@@ -0,0 +1,84 @@
1
+ module Fluent
2
+ module BigQuery
3
+ # @abstract
4
+ class Error < StandardError
5
+ RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
6
+ RETRYABLE_INSERT_ERRORS_REASON = %w(timeout backendError internalError rateLimitExceeded).freeze
7
+ RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
8
+
9
+ class << self
10
+ # @param e [Google::Apis::Error]
11
+ # @param message [String]
12
+ def wrap(e, message = nil)
13
+ if retryable_error?(e)
14
+ RetryableError.new(message, e)
15
+ else
16
+ UnRetryableError.new(message, e)
17
+ end
18
+ end
19
+
20
+ # @param e [Google::Apis::Error]
21
+ def retryable_error?(e)
22
+ e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code)
23
+ end
24
+
25
+ def retryable_error_reason?(reason)
26
+ RETRYABLE_ERROR_REASON.include?(reason)
27
+ end
28
+
29
+ def retryable_insert_errors_reason?(reason)
30
+ RETRYABLE_INSERT_ERRORS_REASON.include?(reason)
31
+ end
32
+
33
+ # Guard for instantiation
34
+ private :new
35
+ def inherited(subclass)
36
+ subclass.class_eval do
37
+ class << self
38
+ public :new
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ attr_reader :origin
45
+
46
+ def initialize(message, origin = nil)
47
+ @origin = origin
48
+ super(message || origin.message)
49
+ end
50
+
51
+ def method_missing(name, *args)
52
+ if @origin
53
+ @origin.send(name, *args)
54
+ else
55
+ super
56
+ end
57
+ end
58
+
59
+ def reason
60
+ @origin && @origin.respond_to?(:reason) ? @origin.reason : nil
61
+ end
62
+
63
+ def status_code
64
+ @origin && @origin.respond_to?(:status_code) ? @origin.status_code : nil
65
+ end
66
+
67
+ def body
68
+ @origin && @origin.respond_to?(:body) ? @origin.body : nil
69
+ end
70
+
71
+ def retryable?
72
+ false
73
+ end
74
+ end
75
+
76
+ class UnRetryableError < Error; end
77
+
78
+ class RetryableError < Error
79
+ def retryable?
80
+ true
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,33 @@
1
+ module Fluent
2
+ module BigQuery
3
+ module Helper
4
+ class << self
5
+ def deep_symbolize_keys(object)
6
+ case object
7
+ when Hash
8
+ object.each_with_object({}) do |(key, value), result|
9
+ result[key.to_sym] = deep_symbolize_keys(value)
10
+ end
11
+ when Array
12
+ object.map {|e| deep_symbolize_keys(e) }
13
+ else
14
+ object
15
+ end
16
+ end
17
+
18
+ def deep_stringify_keys(object)
19
+ case object
20
+ when Hash
21
+ object.each_with_object({}) do |(key, value), result|
22
+ result[key.to_s] = deep_stringify_keys(value)
23
+ end
24
+ when Array
25
+ object.map {|e| deep_stringify_keys(e) }
26
+ else
27
+ object
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,281 @@
1
+ require 'multi_json'
2
+
3
+ module Fluent
4
+ module BigQuery
5
+ class FieldSchema
6
+ def initialize(name, mode = :nullable)
7
+ unless [:nullable, :required, :repeated].include?(mode)
8
+ raise ConfigError, "Unrecognized mode for #{name}: #{mode}"
9
+ end
10
+ ### https://developers.google.com/bigquery/docs/tables
11
+ # Each field has the following properties:
12
+ #
13
+ # name - The name must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_),
14
+ # and must start with a letter or underscore. The maximum length is 128 characters.
15
+ # https://cloud.google.com/bigquery/docs/reference/v2/tables#schema.fields.name
16
+ unless name =~ /^[_A-Za-z][_A-Za-z0-9]{,127}$/
17
+ raise ConfigError, "invalid bigquery field name: '#{name}'"
18
+ end
19
+
20
+ @name = name
21
+ @mode = mode
22
+ end
23
+
24
+ attr_reader :name, :mode
25
+
26
+ def format(value)
27
+ case @mode
28
+ when :nullable
29
+ format_one(value) unless value.nil?
30
+ when :required
31
+ if value.nil?
32
+ log.warn "Required field #{name} cannot be null"
33
+ nil
34
+ else
35
+ format_one(value)
36
+ end
37
+ when :repeated
38
+ value.nil? ? [] : value.each_with_object([]) { |v, arr| arr << format_one(v) if v }
39
+ end
40
+ end
41
+
42
+ def format_one(value)
43
+ raise NotImplementedError, "Must implement in a subclass"
44
+ end
45
+
46
+ def to_h
47
+ {
48
+ :name => name,
49
+ :type => type.to_s.upcase,
50
+ :mode => mode.to_s.upcase,
51
+ }
52
+ end
53
+ end
54
+
55
+ class StringFieldSchema < FieldSchema
56
+ def type
57
+ :string
58
+ end
59
+
60
+ def format_one(value)
61
+ if value.is_a?(Hash) || value.is_a?(Array)
62
+ MultiJson.dump(value)
63
+ else
64
+ value.to_s
65
+ end
66
+ end
67
+ end
68
+
69
+ class IntegerFieldSchema < FieldSchema
70
+ def type
71
+ :integer
72
+ end
73
+
74
+ def format_one(value)
75
+ value.to_i
76
+ end
77
+ end
78
+
79
+ class FloatFieldSchema < FieldSchema
80
+ def type
81
+ :float
82
+ end
83
+
84
+ def format_one(value)
85
+ value.to_f
86
+ end
87
+ end
88
+
89
+ class NumericFieldSchema < FieldSchema
90
+ def type
91
+ :numeric
92
+ end
93
+
94
+ def format_one(value)
95
+ value.to_s
96
+ end
97
+ end
98
+
99
+ class BooleanFieldSchema < FieldSchema
100
+ def type
101
+ :boolean
102
+ end
103
+
104
+ def format_one(value)
105
+ !!value
106
+ end
107
+ end
108
+
109
+ class TimestampFieldSchema < FieldSchema
110
+ INTEGER_REGEXP = /\A-?[[:digit:]]+\z/.freeze
111
+ FLOAT_REGEXP = /\A-?[[:digit:]]+(\.[[:digit:]]+)\z/.freeze
112
+
113
+ def type
114
+ :timestamp
115
+ end
116
+
117
+ def format_one(value)
118
+ case value
119
+ when Time
120
+ value.strftime("%Y-%m-%d %H:%M:%S.%6L %:z")
121
+ when String
122
+ if value =~ INTEGER_REGEXP
123
+ value.to_i
124
+ elsif value =~ FLOAT_REGEXP
125
+ value.to_f
126
+ else
127
+ value
128
+ end
129
+ else
130
+ value
131
+ end
132
+ end
133
+ end
134
+
135
+ class DateFieldSchema < FieldSchema
136
+ def type
137
+ :date
138
+ end
139
+
140
+ def format_one(value)
141
+ if value.respond_to?(:strftime)
142
+ value.strftime("%Y-%m-%d")
143
+ else
144
+ value
145
+ end
146
+ end
147
+ end
148
+
149
+ class DateTimeFieldSchema < FieldSchema
150
+ def type
151
+ :datetime
152
+ end
153
+
154
+ def format_one(value)
155
+ if value.respond_to?(:strftime)
156
+ value.strftime("%Y-%m-%dT%H:%M:%S.%6L")
157
+ else
158
+ value
159
+ end
160
+ end
161
+ end
162
+
163
+ class TimeFieldSchema < FieldSchema
164
+ def type
165
+ :time
166
+ end
167
+
168
+ def format_one(value)
169
+ if value.respond_to?(:strftime)
170
+ value.strftime("%H:%M:%S.%6L")
171
+ else
172
+ value
173
+ end
174
+ end
175
+ end
176
+
177
+ class RecordSchema < FieldSchema
178
+ FIELD_TYPES = {
179
+ string: StringFieldSchema,
180
+ integer: IntegerFieldSchema,
181
+ float: FloatFieldSchema,
182
+ numeric: NumericFieldSchema,
183
+ boolean: BooleanFieldSchema,
184
+ timestamp: TimestampFieldSchema,
185
+ date: DateFieldSchema,
186
+ datetime: DateTimeFieldSchema,
187
+ time: TimeFieldSchema,
188
+ record: RecordSchema
189
+ }.freeze
190
+
191
+ def initialize(name, mode = :nullable)
192
+ super(name, mode)
193
+ @fields = {}
194
+ end
195
+
196
+ def type
197
+ :record
198
+ end
199
+
200
+ def [](name)
201
+ @fields[name]
202
+ end
203
+
204
+ def empty?
205
+ @fields.empty?
206
+ end
207
+
208
+ def to_a
209
+ @fields.map do |_, field_schema|
210
+ field_schema.to_h
211
+ end
212
+ end
213
+
214
+ def to_h
215
+ {
216
+ :name => name,
217
+ :type => type.to_s.upcase,
218
+ :mode => mode.to_s.upcase,
219
+ :fields => self.to_a,
220
+ }
221
+ end
222
+
223
+ def load_schema(schema)
224
+ schema.each do |field|
225
+ raise ConfigError, 'field must have type' unless field.key?('type')
226
+
227
+ name = field['name']
228
+ mode = (field['mode'] || 'nullable').downcase.to_sym
229
+
230
+ type = field['type'].downcase.to_sym
231
+ field_schema_class = FIELD_TYPES[type]
232
+ raise ConfigError, "Invalid field type: #{field['type']}" unless field_schema_class
233
+
234
+ field_schema = field_schema_class.new(name, mode)
235
+ @fields[name] = field_schema
236
+ if type == :record
237
+ raise ConfigError, "record field must have fields" unless field.key?('fields')
238
+ field_schema.load_schema(field['fields'])
239
+ end
240
+ end
241
+ end
242
+
243
+ def register_field(name, type)
244
+ if @fields.key?(name) and @fields[name].type != :timestamp
245
+ raise ConfigError, "field #{name} is registered twice"
246
+ end
247
+ if name[/\./]
248
+ recordname = $`
249
+ fieldname = $'
250
+ register_record_field(recordname)
251
+ @fields[recordname].register_field(fieldname, type)
252
+ else
253
+ schema = FIELD_TYPES[type]
254
+ raise ConfigError, "[Bug] Invalid field type #{type}" unless schema
255
+ @fields[name] = schema.new(name)
256
+ end
257
+ end
258
+
259
+ def format_one(record)
260
+ out = {}
261
+ record.each do |key, value|
262
+ next if value.nil?
263
+ schema = @fields[key]
264
+ out[key] = schema ? schema.format(value) : value
265
+ end
266
+ out
267
+ end
268
+
269
+ private
270
+ def register_record_field(name)
271
+ if !@fields.key?(name)
272
+ @fields[name] = RecordSchema.new(name)
273
+ else
274
+ unless @fields[name].kind_of?(RecordSchema)
275
+ raise ConfigError, "field #{name} is required to be a record but already registered as #{@field[name]}"
276
+ end
277
+ end
278
+ end
279
+ end
280
+ end
281
+ end