fluent-plugin-bigquery-test 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rake/testtask'
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'lib' << 'test'
7
+ test.pattern = 'test/**/test_*.rb'
8
+ test.verbose = true
9
+ test.warning = false
10
+ end
11
+
12
+ task :default => :test
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fluent/plugin/bigquery/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fluent-plugin-bigquery-test"
8
+ spec.version = Fluent::BigQueryPlugin::VERSION
9
+ spec.authors = ["dhayakawa"]
10
+ spec.description = %q{Fluentd plugin to store data on Google BigQuery, by load, or by stream inserts}
11
+ spec.summary = %q{Fluentd plugin to store data on Google BigQuery}
12
+ spec.homepage = "https://github.com/kaizenplatform/fluent-plugin-bigquery"
13
+ spec.license = "Apache-2.0"
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "rake"
21
+ spec.add_development_dependency "rr"
22
+ spec.add_development_dependency "test-unit"
23
+ spec.add_development_dependency "test-unit-rr"
24
+
25
+ spec.add_runtime_dependency "google-api-client", ">= 0.49.0"
26
+ spec.add_runtime_dependency "googleauth", ">= 0.5.0"
27
+ spec.add_runtime_dependency "multi_json"
28
+ spec.add_runtime_dependency "fluentd", ">= 0.14.0", "< 2"
29
+ end
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'activesupport', '< 5'
4
+
5
+ # Specify your gem's dependencies in fluent-plugin-bigquery.gemspec
6
+ gemspec :path => '../'
@@ -0,0 +1,84 @@
1
+ module Fluent
2
+ module BigQuery
3
+ # @abstract
4
+ class Error < StandardError
5
+ RETRYABLE_ERROR_REASON = %w(backendError internalError rateLimitExceeded tableUnavailable).freeze
6
+ RETRYABLE_INSERT_ERRORS_REASON = %w(timeout backendError internalError rateLimitExceeded).freeze
7
+ RETRYABLE_STATUS_CODE = [500, 502, 503, 504]
8
+
9
+ class << self
10
+ # @param e [Google::Apis::Error]
11
+ # @param message [String]
12
+ def wrap(e, message = nil)
13
+ if retryable_error?(e)
14
+ RetryableError.new(message, e)
15
+ else
16
+ UnRetryableError.new(message, e)
17
+ end
18
+ end
19
+
20
+ # @param e [Google::Apis::Error]
21
+ def retryable_error?(e)
22
+ e.is_a?(Google::Apis::ServerError) && RETRYABLE_STATUS_CODE.include?(e.status_code)
23
+ end
24
+
25
+ def retryable_error_reason?(reason)
26
+ RETRYABLE_ERROR_REASON.include?(reason)
27
+ end
28
+
29
+ def retryable_insert_errors_reason?(reason)
30
+ RETRYABLE_INSERT_ERRORS_REASON.include?(reason)
31
+ end
32
+
33
+ # Guard for instantiation
34
+ private :new
35
+ def inherited(subclass)
36
+ subclass.class_eval do
37
+ class << self
38
+ public :new
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ attr_reader :origin
45
+
46
+ def initialize(message, origin = nil)
47
+ @origin = origin
48
+ super(message || origin.message)
49
+ end
50
+
51
+ def method_missing(name, *args)
52
+ if @origin
53
+ @origin.send(name, *args)
54
+ else
55
+ super
56
+ end
57
+ end
58
+
59
+ def reason
60
+ @origin && @origin.respond_to?(:reason) ? @origin.reason : nil
61
+ end
62
+
63
+ def status_code
64
+ @origin && @origin.respond_to?(:status_code) ? @origin.status_code : nil
65
+ end
66
+
67
+ def body
68
+ @origin && @origin.respond_to?(:body) ? @origin.body : nil
69
+ end
70
+
71
+ def retryable?
72
+ false
73
+ end
74
+ end
75
+
76
+ class UnRetryableError < Error; end
77
+
78
+ class RetryableError < Error
79
+ def retryable?
80
+ true
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,33 @@
1
+ module Fluent
2
+ module BigQuery
3
+ module Helper
4
+ class << self
5
+ def deep_symbolize_keys(object)
6
+ case object
7
+ when Hash
8
+ object.each_with_object({}) do |(key, value), result|
9
+ result[key.to_sym] = deep_symbolize_keys(value)
10
+ end
11
+ when Array
12
+ object.map {|e| deep_symbolize_keys(e) }
13
+ else
14
+ object
15
+ end
16
+ end
17
+
18
+ def deep_stringify_keys(object)
19
+ case object
20
+ when Hash
21
+ object.each_with_object({}) do |(key, value), result|
22
+ result[key.to_s] = deep_stringify_keys(value)
23
+ end
24
+ when Array
25
+ object.map {|e| deep_stringify_keys(e) }
26
+ else
27
+ object
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,281 @@
1
+ require 'multi_json'
2
+
3
+ module Fluent
4
+ module BigQuery
5
+ class FieldSchema
6
+ def initialize(name, mode = :nullable)
7
+ unless [:nullable, :required, :repeated].include?(mode)
8
+ raise ConfigError, "Unrecognized mode for #{name}: #{mode}"
9
+ end
10
+ ### https://developers.google.com/bigquery/docs/tables
11
+ # Each field has the following properties:
12
+ #
13
+ # name - The name must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_),
14
+ # and must start with a letter or underscore. The maximum length is 128 characters.
15
+ # https://cloud.google.com/bigquery/docs/reference/v2/tables#schema.fields.name
16
+ unless name =~ /^[_A-Za-z][_A-Za-z0-9]{,127}$/
17
+ raise ConfigError, "invalid bigquery field name: '#{name}'"
18
+ end
19
+
20
+ @name = name
21
+ @mode = mode
22
+ end
23
+
24
+ attr_reader :name, :mode
25
+
26
+ def format(value)
27
+ case @mode
28
+ when :nullable
29
+ format_one(value) unless value.nil?
30
+ when :required
31
+ if value.nil?
32
+ log.warn "Required field #{name} cannot be null"
33
+ nil
34
+ else
35
+ format_one(value)
36
+ end
37
+ when :repeated
38
+ value.nil? ? [] : value.each_with_object([]) { |v, arr| arr << format_one(v) if v }
39
+ end
40
+ end
41
+
42
+ def format_one(value)
43
+ raise NotImplementedError, "Must implement in a subclass"
44
+ end
45
+
46
+ def to_h
47
+ {
48
+ :name => name,
49
+ :type => type.to_s.upcase,
50
+ :mode => mode.to_s.upcase,
51
+ }
52
+ end
53
+ end
54
+
55
+ class StringFieldSchema < FieldSchema
56
+ def type
57
+ :string
58
+ end
59
+
60
+ def format_one(value)
61
+ if value.is_a?(Hash) || value.is_a?(Array)
62
+ MultiJson.dump(value)
63
+ else
64
+ value.to_s
65
+ end
66
+ end
67
+ end
68
+
69
+ class IntegerFieldSchema < FieldSchema
70
+ def type
71
+ :integer
72
+ end
73
+
74
+ def format_one(value)
75
+ value.to_i
76
+ end
77
+ end
78
+
79
+ class FloatFieldSchema < FieldSchema
80
+ def type
81
+ :float
82
+ end
83
+
84
+ def format_one(value)
85
+ value.to_f
86
+ end
87
+ end
88
+
89
+ class NumericFieldSchema < FieldSchema
90
+ def type
91
+ :numeric
92
+ end
93
+
94
+ def format_one(value)
95
+ value.to_s
96
+ end
97
+ end
98
+
99
+ class BooleanFieldSchema < FieldSchema
100
+ def type
101
+ :boolean
102
+ end
103
+
104
+ def format_one(value)
105
+ !!value
106
+ end
107
+ end
108
+
109
+ class TimestampFieldSchema < FieldSchema
110
+ INTEGER_REGEXP = /\A-?[[:digit:]]+\z/.freeze
111
+ FLOAT_REGEXP = /\A-?[[:digit:]]+(\.[[:digit:]]+)\z/.freeze
112
+
113
+ def type
114
+ :timestamp
115
+ end
116
+
117
+ def format_one(value)
118
+ case value
119
+ when Time
120
+ value.strftime("%Y-%m-%d %H:%M:%S.%6L %:z")
121
+ when String
122
+ if value =~ INTEGER_REGEXP
123
+ value.to_i
124
+ elsif value =~ FLOAT_REGEXP
125
+ value.to_f
126
+ else
127
+ value
128
+ end
129
+ else
130
+ value
131
+ end
132
+ end
133
+ end
134
+
135
+ class DateFieldSchema < FieldSchema
136
+ def type
137
+ :date
138
+ end
139
+
140
+ def format_one(value)
141
+ if value.respond_to?(:strftime)
142
+ value.strftime("%Y-%m-%d")
143
+ else
144
+ value
145
+ end
146
+ end
147
+ end
148
+
149
+ class DateTimeFieldSchema < FieldSchema
150
+ def type
151
+ :datetime
152
+ end
153
+
154
+ def format_one(value)
155
+ if value.respond_to?(:strftime)
156
+ value.strftime("%Y-%m-%dT%H:%M:%S.%6L")
157
+ else
158
+ value
159
+ end
160
+ end
161
+ end
162
+
163
+ class TimeFieldSchema < FieldSchema
164
+ def type
165
+ :time
166
+ end
167
+
168
+ def format_one(value)
169
+ if value.respond_to?(:strftime)
170
+ value.strftime("%H:%M:%S.%6L")
171
+ else
172
+ value
173
+ end
174
+ end
175
+ end
176
+
177
+ class RecordSchema < FieldSchema
178
+ FIELD_TYPES = {
179
+ string: StringFieldSchema,
180
+ integer: IntegerFieldSchema,
181
+ float: FloatFieldSchema,
182
+ numeric: NumericFieldSchema,
183
+ boolean: BooleanFieldSchema,
184
+ timestamp: TimestampFieldSchema,
185
+ date: DateFieldSchema,
186
+ datetime: DateTimeFieldSchema,
187
+ time: TimeFieldSchema,
188
+ record: RecordSchema
189
+ }.freeze
190
+
191
+ def initialize(name, mode = :nullable)
192
+ super(name, mode)
193
+ @fields = {}
194
+ end
195
+
196
+ def type
197
+ :record
198
+ end
199
+
200
+ def [](name)
201
+ @fields[name]
202
+ end
203
+
204
+ def empty?
205
+ @fields.empty?
206
+ end
207
+
208
+ def to_a
209
+ @fields.map do |_, field_schema|
210
+ field_schema.to_h
211
+ end
212
+ end
213
+
214
+ def to_h
215
+ {
216
+ :name => name,
217
+ :type => type.to_s.upcase,
218
+ :mode => mode.to_s.upcase,
219
+ :fields => self.to_a,
220
+ }
221
+ end
222
+
223
+ def load_schema(schema)
224
+ schema.each do |field|
225
+ raise ConfigError, 'field must have type' unless field.key?('type')
226
+
227
+ name = field['name']
228
+ mode = (field['mode'] || 'nullable').downcase.to_sym
229
+
230
+ type = field['type'].downcase.to_sym
231
+ field_schema_class = FIELD_TYPES[type]
232
+ raise ConfigError, "Invalid field type: #{field['type']}" unless field_schema_class
233
+
234
+ field_schema = field_schema_class.new(name, mode)
235
+ @fields[name] = field_schema
236
+ if type == :record
237
+ raise ConfigError, "record field must have fields" unless field.key?('fields')
238
+ field_schema.load_schema(field['fields'])
239
+ end
240
+ end
241
+ end
242
+
243
+ def register_field(name, type)
244
+ if @fields.key?(name) and @fields[name].type != :timestamp
245
+ raise ConfigError, "field #{name} is registered twice"
246
+ end
247
+ if name[/\./]
248
+ recordname = $`
249
+ fieldname = $'
250
+ register_record_field(recordname)
251
+ @fields[recordname].register_field(fieldname, type)
252
+ else
253
+ schema = FIELD_TYPES[type]
254
+ raise ConfigError, "[Bug] Invalid field type #{type}" unless schema
255
+ @fields[name] = schema.new(name)
256
+ end
257
+ end
258
+
259
+ def format_one(record)
260
+ out = {}
261
+ record.each do |key, value|
262
+ next if value.nil?
263
+ schema = @fields[key]
264
+ out[key] = schema ? schema.format(value) : value
265
+ end
266
+ out
267
+ end
268
+
269
+ private
270
+ def register_record_field(name)
271
+ if !@fields.key?(name)
272
+ @fields[name] = RecordSchema.new(name)
273
+ else
274
+ unless @fields[name].kind_of?(RecordSchema)
275
+ raise ConfigError, "field #{name} is required to be a record but already registered as #{@field[name]}"
276
+ end
277
+ end
278
+ end
279
+ end
280
+ end
281
+ end