fluent-plugin-bigquery-test 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ require 'fluent/plugin/out_bigquery_base'
2
+
3
+ module Fluent
4
+ module Plugin
5
+ class BigQueryInsertOutput < BigQueryBaseOutput
6
+ Fluent::Plugin.register_output('bigquery_insert', self)
7
+
8
+ helpers :record_accessor
9
+
10
+ # template_suffix (only insert)
11
+ # https://cloud.google.com/bigquery/streaming-data-into-bigquery#template_table_details
12
+ config_param :template_suffix, :string, default: nil
13
+
14
+ # skip_invalid_rows (only insert)
15
+ # Insert all valid rows of a request, even if invalid rows exist.
16
+ # The default value is false, which causes the entire request to fail if any invalid rows exist.
17
+ config_param :skip_invalid_rows, :bool, default: false
18
+
19
+ # insert_id_field (only insert)
20
+ config_param :insert_id_field, :string, default: nil
21
+
22
+ # add_insert_timestamp (only insert)
23
+ # adds a timestamp just before sending the rows to bigquery, so that
24
+ # buffering time is not taken into account. Gives a field in bigquery
25
+ # which represents the insert time of the row.
26
+ config_param :add_insert_timestamp, :string, default: nil
27
+
28
+ # allow_retry_insert_errors (only insert)
29
+ # If insert_id_field is not specified, true means to allow duplicate rows
30
+ config_param :allow_retry_insert_errors, :bool, default: false
31
+
32
+ ## Buffer
33
+ config_section :buffer do
34
+ config_set_default :@type, "memory"
35
+ config_set_default :flush_mode, :interval
36
+ config_set_default :flush_interval, 1
37
+ config_set_default :flush_thread_interval, 0.05
38
+ config_set_default :flush_thread_burst_interval, 0.05
39
+ config_set_default :chunk_limit_size, 1 * 1024 ** 2 # 1MB
40
+ config_set_default :total_limit_size, 1 * 1024 ** 3 # 1GB
41
+ config_set_default :chunk_limit_records, 500
42
+ end
43
+
44
+ def configure(conf)
45
+ super
46
+
47
+ if @insert_id_field
48
+ if @insert_id_field !~ /^\$[\[\.]/ && @insert_id_field =~ /\./
49
+ warn "[BREAKING CHANGE] insert_id_field format is changed. Use fluentd record_accessor helper. (https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor)"
50
+ end
51
+ @get_insert_id = record_accessor_create(@insert_id_field)
52
+ end
53
+
54
+ formatter_config = conf.elements("format")[0]
55
+ if formatter_config && formatter_config['@type'] != "json"
56
+ raise ConfigError, "`bigquery_insert` supports only json formatter."
57
+ end
58
+ @formatter = formatter_create(usage: 'out_bigquery_for_insert', type: 'json', conf: formatter_config)
59
+
60
+ placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}/template_suffix=#{@template_suffix}"
61
+ placeholder_validate!(:bigquery_insert, placeholder_params)
62
+ end
63
+
64
+ # for Fluent::Plugin::Output#implement? method
65
+ def format(tag, time, record)
66
+ super
67
+ end
68
+
69
+ def write(chunk)
70
+ table_format = @tables_mutex.synchronize do
71
+ t = @tables_queue.shift
72
+ @tables_queue.push t
73
+ t
74
+ end
75
+
76
+ now = Time.now.utc.strftime("%Y-%m-%d %H:%M:%S.%6N") if @add_insert_timestamp
77
+
78
+ rows = chunk.open do |io|
79
+ io.map do |line|
80
+ record = MultiJson.load(line)
81
+ record[@add_insert_timestamp] = now if @add_insert_timestamp
82
+ row = {"json" => record}
83
+ row["insert_id"] = @get_insert_id.call(record) if @get_insert_id
84
+ Fluent::BigQuery::Helper.deep_symbolize_keys(row)
85
+ end
86
+ end
87
+
88
+ metadata = chunk.metadata
89
+ project = extract_placeholders(@project, metadata)
90
+ dataset = extract_placeholders(@dataset, metadata)
91
+ table_id = extract_placeholders(table_format, metadata)
92
+ template_suffix = @template_suffix ? extract_placeholders(@template_suffix, metadata) : nil
93
+ schema = get_schema(project, dataset, metadata)
94
+
95
+ insert(project, dataset, table_id, rows, schema, template_suffix)
96
+ end
97
+
98
+ def insert(project, dataset, table_id, rows, schema, template_suffix)
99
+ writer.insert_rows(project, dataset, table_id, rows, schema, template_suffix: template_suffix)
100
+ rescue Fluent::BigQuery::Error => e
101
+ raise if e.retryable?
102
+
103
+ if @secondary
104
+ # TODO: find better way
105
+ @retry = retry_state_create(
106
+ :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
107
+ forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
108
+ max_interval: @buffer_config.retry_max_interval,
109
+ secondary: true, secondary_threshold: Float::EPSILON,
110
+ randomize: @buffer_config.retry_randomize
111
+ )
112
+ else
113
+ @retry = retry_state_create(
114
+ :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
115
+ forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
116
+ max_interval: @buffer_config.retry_max_interval,
117
+ randomize: @buffer_config.retry_randomize
118
+ )
119
+ end
120
+
121
+ raise
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,221 @@
1
+ require 'fluent/plugin/out_bigquery_base'
2
+
3
+ module Fluent
4
+ module Plugin
5
+ class BigQueryLoadOutput < BigQueryBaseOutput
6
+ Fluent::Plugin.register_output('bigquery_load', self)
7
+
8
+ helpers :timer
9
+
10
+ config_param :source_format, :enum, list: [:json, :avro, :csv], default: :json
11
+
12
+ # max_bad_records (only load)
13
+ # The maximum number of bad records that BigQuery can ignore when running the job.
14
+ # If the number of bad records exceeds this value, an invalid error is returned in the job result.
15
+ # The default value is 0, which requires that all records are valid.
16
+ config_param :max_bad_records, :integer, default: 0
17
+
18
+ # prevent_duplicate_load (only load)
19
+ config_param :prevent_duplicate_load, :bool, default: false
20
+
21
+ config_param :use_delayed_commit, :bool, default: true
22
+ config_param :wait_job_interval, :time, default: 3
23
+
24
+ ## Buffer
25
+ config_section :buffer do
26
+ config_set_default :@type, "file"
27
+ config_set_default :flush_mode, :interval
28
+ config_set_default :flush_interval, 3600 # 1h
29
+ config_set_default :flush_thread_interval, 5
30
+ config_set_default :flush_thread_burst_interval, 5
31
+ config_set_default :chunk_limit_size, 1 * 1024 ** 3 # 1GB
32
+ config_set_default :total_limit_size, 32 * 1024 ** 3 # 32GB
33
+
34
+ config_set_default :delayed_commit_timeout, 1800 # 30m
35
+ end
36
+
37
+ def configure(conf)
38
+ super
39
+
40
+ placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}"
41
+ placeholder_validate!(:bigquery_load, placeholder_params)
42
+ end
43
+
44
+ def start
45
+ super
46
+
47
+ if prefer_delayed_commit
48
+ @polling_targets = []
49
+ @polling_mutex = Mutex.new
50
+ log.debug("start load job polling")
51
+ timer_execute(:polling_bigquery_load_job, @wait_job_interval, &method(:poll))
52
+ end
53
+ end
54
+
55
+ def prefer_delayed_commit
56
+ @use_delayed_commit
57
+ end
58
+
59
+ # for Fluent::Plugin::Output#implement? method
60
+ def format(tag, time, record)
61
+ super
62
+ end
63
+
64
+ def write(chunk)
65
+ job_reference = do_write(chunk)
66
+
67
+ until response = writer.fetch_load_job(job_reference)
68
+ sleep @wait_job_interval
69
+ end
70
+
71
+ writer.commit_load_job(job_reference.chunk_id_hex, response)
72
+ rescue Fluent::BigQuery::Error => e
73
+ raise if e.retryable?
74
+
75
+ @retry_mutex.synchronize do
76
+ if @secondary
77
+ # TODO: find better way
78
+ @retry = retry_state_create(
79
+ :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
80
+ forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
81
+ max_interval: @buffer_config.retry_max_interval,
82
+ secondary: true, secondary_threshold: Float::EPSILON,
83
+ randomize: @buffer_config.retry_randomize
84
+ )
85
+ else
86
+ @retry = retry_state_create(
87
+ :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
88
+ forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
89
+ max_interval: @buffer_config.retry_max_interval,
90
+ randomize: @buffer_config.retry_randomize
91
+ )
92
+ end
93
+ end
94
+
95
+ raise
96
+ end
97
+
98
+ def try_write(chunk)
99
+ job_reference = do_write(chunk)
100
+ @polling_mutex.synchronize do
101
+ @polling_targets << job_reference
102
+ end
103
+ rescue Fluent::BigQuery::Error => e
104
+ raise if e.retryable?
105
+
106
+ @retry_mutex.synchronize do
107
+ if @secondary
108
+ # TODO: find better way
109
+ @retry = retry_state_create(
110
+ :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
111
+ forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
112
+ max_interval: @buffer_config.retry_max_interval,
113
+ secondary: true, secondary_threshold: Float::EPSILON,
114
+ randomize: @buffer_config.retry_randomize
115
+ )
116
+ else
117
+ @retry = retry_state_create(
118
+ :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
119
+ forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
120
+ max_interval: @buffer_config.retry_max_interval,
121
+ randomize: @buffer_config.retry_randomize
122
+ )
123
+ end
124
+ end
125
+
126
+ raise
127
+ end
128
+
129
+ private
130
+
131
+ def do_write(chunk)
132
+ table_format = @tables_mutex.synchronize do
133
+ t = @tables_queue.shift
134
+ @tables_queue.push t
135
+ t
136
+ end
137
+
138
+ metadata = chunk.metadata
139
+ project = extract_placeholders(@project, metadata)
140
+ dataset = extract_placeholders(@dataset, metadata)
141
+ table_id = extract_placeholders(table_format, metadata)
142
+ schema = get_schema(project, dataset, metadata)
143
+
144
+ create_upload_source(chunk) do |upload_source|
145
+ writer.create_load_job(chunk.unique_id, dump_unique_id_hex(chunk.unique_id), project, dataset, table_id, upload_source, schema)
146
+ end
147
+ end
148
+
149
+ def poll
150
+ job_reference = @polling_mutex.synchronize do
151
+ @polling_targets.shift
152
+ end
153
+ return unless job_reference
154
+
155
+ begin
156
+ response = writer.fetch_load_job(job_reference)
157
+ if response
158
+ writer.commit_load_job(job_reference.chunk_id_hex, response)
159
+ commit_write(job_reference.chunk_id)
160
+ log.debug("commit chunk", chunk: job_reference.chunk_id_hex, **job_reference.as_hash(:job_id, :project_id, :dataset_id, :table_id))
161
+ else
162
+ @polling_mutex.synchronize do
163
+ @polling_targets << job_reference
164
+ end
165
+ end
166
+ rescue Fluent::BigQuery::Error => e
167
+ # RetryableError comes from only `commit_load_job`
168
+ # if error is retryable, takeback chunk and do next `try_flush`
169
+ # if error is not retryable, create custom retry_state and takeback chunk do next `try_flush`
170
+ if e.retryable?
171
+ log.warn("failed to poll load job", error: e, chunk: job_reference.chunk_id_hex, **job_reference.as_hash(:job_id, :project_id, :dataset_id, :table_id))
172
+ else
173
+ log.error("failed to poll load job", error: e, chunk: job_reference.chunk_id_hex, **job_reference.as_hash(:job_id, :project_id, :dataset_id, :table_id))
174
+ @retry_mutex.synchronize do
175
+ if @secondary
176
+ # TODO: find better way
177
+ @retry = retry_state_create(
178
+ :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
179
+ forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
180
+ max_interval: @buffer_config.retry_max_interval,
181
+ secondary: true, secondary_threshold: Float::EPSILON,
182
+ randomize: @buffer_config.retry_randomize
183
+ )
184
+ else
185
+ @retry = retry_state_create(
186
+ :output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
187
+ forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
188
+ max_interval: @buffer_config.retry_max_interval,
189
+ randomize: @buffer_config.retry_randomize
190
+ )
191
+ end
192
+ end
193
+ end
194
+
195
+ rollback_write(job_reference.chunk_id)
196
+ rescue => e
197
+ log.error("unexpected error while polling", error: e)
198
+ log.error_backtrace
199
+ rollback_write(job_reference.chunk_id)
200
+ end
201
+ end
202
+
203
+ def create_upload_source(chunk)
204
+ chunk_is_file = @buffer_config["@type"] == 'file'
205
+ if chunk_is_file
206
+ File.open(chunk.path) do |file|
207
+ yield file
208
+ end
209
+ else
210
+ Tempfile.open("chunk-tmp") do |file|
211
+ file.binmode
212
+ chunk.write_to(file)
213
+ file.sync
214
+ file.rewind
215
+ yield file
216
+ end
217
+ end
218
+ end
219
+ end
220
+ end
221
+ end
@@ -0,0 +1,20 @@
1
+ require 'bundler/setup'
2
+ require 'test/unit'
3
+
4
+ $LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
5
+ $LOAD_PATH.unshift(__dir__)
6
+ require 'fluent/test'
7
+
8
+ require 'fluent/plugin/buffer'
9
+ require 'fluent/plugin/buf_memory'
10
+ require 'fluent/plugin/buf_file'
11
+ require 'fluent/test/driver/output'
12
+
13
+ require 'fluent/plugin/out_bigquery_base'
14
+ require 'fluent/plugin/out_bigquery_insert'
15
+ require 'fluent/plugin/out_bigquery_load'
16
+ require 'google/apis/bigquery_v2'
17
+ require 'google/api_client/auth/key_utils'
18
+ require 'googleauth'
19
+
20
+ require 'test/unit/rr'
@@ -0,0 +1,579 @@
1
+ require 'helper'
2
+
3
+ class BigQueryBaseOutputTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ CONFIG = %[
9
+ table foo
10
+ email foo@bar.example
11
+ private_key_path /path/to/key
12
+ project yourproject_id
13
+ dataset yourdataset_id
14
+
15
+ <inject>
16
+ time_format %s
17
+ time_key time
18
+ </inject>
19
+
20
+ schema [
21
+ {"name": "time", "type": "INTEGER"},
22
+ {"name": "status", "type": "INTEGER"},
23
+ {"name": "bytes", "type": "INTEGER"},
24
+ {"name": "vhost", "type": "STRING"},
25
+ {"name": "path", "type": "STRING"},
26
+ {"name": "method", "type": "STRING"},
27
+ {"name": "protocol", "type": "STRING"},
28
+ {"name": "agent", "type": "STRING"},
29
+ {"name": "referer", "type": "STRING"},
30
+ {"name": "remote", "type": "RECORD", "fields": [
31
+ {"name": "host", "type": "STRING"},
32
+ {"name": "ip", "type": "STRING"},
33
+ {"name": "user", "type": "STRING"}
34
+ ]},
35
+ {"name": "requesttime", "type": "FLOAT"},
36
+ {"name": "bot_access", "type": "BOOLEAN"},
37
+ {"name": "loginsession", "type": "BOOLEAN"}
38
+ ]
39
+ ]
40
+
41
+ API_SCOPE = "https://www.googleapis.com/auth/bigquery"
42
+
43
+ def create_driver(conf = CONFIG)
44
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryBaseOutput).configure(conf)
45
+ end
46
+
47
+ def stub_writer(stub_auth: true)
48
+ stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
49
+ stub(writer).get_auth { nil } if stub_auth
50
+ yield writer
51
+ writer
52
+ end
53
+ end
54
+
55
+ private def sudo_schema_response
56
+ {
57
+ "schema" => {
58
+ "fields" => [
59
+ {
60
+ "name" => "time",
61
+ "type" => "TIMESTAMP",
62
+ "mode" => "REQUIRED"
63
+ },
64
+ {
65
+ "name" => "tty",
66
+ "type" => "STRING",
67
+ "mode" => "NULLABLE"
68
+ },
69
+ {
70
+ "name" => "pwd",
71
+ "type" => "STRING",
72
+ "mode" => "REQUIRED"
73
+ },
74
+ {
75
+ "name" => "user",
76
+ "type" => "STRING",
77
+ "mode" => "REQUIRED"
78
+ },
79
+ {
80
+ "name" => "argv",
81
+ "type" => "STRING",
82
+ "mode" => "REPEATED"
83
+ }
84
+ ]
85
+ }
86
+ }
87
+ end
88
+
89
+ def test_configure_table
90
+ driver = create_driver
91
+ assert_equal driver.instance.table, 'foo'
92
+ assert_nil driver.instance.tables
93
+
94
+ driver = create_driver(CONFIG.sub(/\btable\s+.*$/, 'tables foo,bar'))
95
+ assert_nil driver.instance.table
96
+ assert_equal driver.instance.tables, ['foo' ,'bar']
97
+
98
+ assert_raise(Fluent::ConfigError, "'table' or 'tables' must be specified, and both are invalid") {
99
+ create_driver(CONFIG + "tables foo,bar")
100
+ }
101
+ end
102
+
103
+ def test_configure_auth_private_key
104
+ driver = create_driver
105
+ stub_writer(stub_auth: false) do |writer|
106
+ mock(writer).get_auth_from_private_key { stub! }
107
+ end
108
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
109
+ end
110
+
111
+ def test_configure_auth_compute_engine
112
+ driver = create_driver(%[
113
+ table foo
114
+ auth_method compute_engine
115
+ project yourproject_id
116
+ dataset yourdataset_id
117
+ schema [
118
+ {"name": "time", "type": "INTEGER"},
119
+ {"name": "status", "type": "INTEGER"},
120
+ {"name": "bytes", "type": "INTEGER"}
121
+ ]
122
+ ])
123
+
124
+ stub_writer(stub_auth: false) do |writer|
125
+ mock(writer).get_auth_from_compute_engine { stub! }
126
+ end
127
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
128
+ end
129
+
130
+ def test_configure_auth_json_key_as_file
131
+ driver = create_driver(%[
132
+ table foo
133
+ auth_method json_key
134
+ json_key jsonkey.josn
135
+ project yourproject_id
136
+ dataset yourdataset_id
137
+ schema [
138
+ {"name": "time", "type": "INTEGER"},
139
+ {"name": "status", "type": "INTEGER"},
140
+ {"name": "bytes", "type": "INTEGER"}
141
+ ]
142
+ ])
143
+
144
+ stub_writer(stub_auth: false) do |writer|
145
+ mock(writer).get_auth_from_json_key { stub! }
146
+ end
147
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
148
+ end
149
+
150
+ def test_configure_auth_json_key_as_file_raise_permission_error
151
+ json_key_path = 'test/plugin/testdata/json_key.json'
152
+ json_key_path_dir = File.dirname(json_key_path)
153
+
154
+ begin
155
+ File.chmod(0000, json_key_path_dir)
156
+
157
+ driver = create_driver(%[
158
+ table foo
159
+ auth_method json_key
160
+ json_key #{json_key_path}
161
+ project yourproject_id
162
+ dataset yourdataset_id
163
+ schema [
164
+ {"name": "time", "type": "INTEGER"},
165
+ {"name": "status", "type": "INTEGER"},
166
+ {"name": "bytes", "type": "INTEGER"}
167
+ ]
168
+ ])
169
+ assert_raises(Errno::EACCES) do
170
+ driver.instance.writer.client
171
+ end
172
+ ensure
173
+ File.chmod(0755, json_key_path_dir)
174
+ end
175
+ end
176
+
177
+ def test_configure_auth_json_key_as_string
178
+ json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
179
+ json_key_io = StringIO.new(json_key)
180
+ authorization = Object.new
181
+ stub(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
182
+
183
+ driver = create_driver(%[
184
+ table foo
185
+ auth_method json_key
186
+ json_key #{json_key}
187
+ project yourproject_id
188
+ dataset yourdataset_id
189
+ schema [
190
+ {"name": "time", "type": "INTEGER"},
191
+ {"name": "status", "type": "INTEGER"},
192
+ {"name": "bytes", "type": "INTEGER"}
193
+ ]
194
+ ])
195
+ stub_writer(stub_auth: false) do |writer|
196
+ mock.proxy(writer).get_auth_from_json_key { stub! }
197
+ end
198
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
199
+ end
200
+
201
+ def test_configure_auth_application_default
202
+ driver = create_driver(%[
203
+ table foo
204
+ auth_method application_default
205
+ project yourproject_id
206
+ dataset yourdataset_id
207
+ schema [
208
+ {"name": "time", "type": "INTEGER"},
209
+ {"name": "status", "type": "INTEGER"},
210
+ {"name": "bytes", "type": "INTEGER"}
211
+ ]
212
+ ])
213
+
214
+ stub_writer(stub_auth: false) do |writer|
215
+ mock.proxy(writer).get_auth_from_application_default { stub! }
216
+ end
217
+ assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
218
+ end
219
+
220
+ def test_format
221
+ now = Fluent::EventTime.new(Time.now.to_i)
222
+ input = {
223
+ "status" => "1",
224
+ "bytes" => 3.0,
225
+ "vhost" => :bar,
226
+ "path" => "/path/to/baz",
227
+ "method" => "GET",
228
+ "protocol" => "HTTP/0.9",
229
+ "agent" => "libwww",
230
+ "referer" => "http://referer.example",
231
+ "requesttime" => (now - 1).to_f.to_s,
232
+ "bot_access" => true,
233
+ "loginsession" => false,
234
+ "something-else" => "would be ignored",
235
+ "yet-another" => {
236
+ "foo" => "bar",
237
+ "baz" => 1,
238
+ },
239
+ "remote" => {
240
+ "host" => "remote.example",
241
+ "ip" => "192.0.2.1",
242
+ "port" => 12345,
243
+ "user" => "tagomoris",
244
+ }
245
+ }
246
+ expected = {
247
+ "time" => now.to_i,
248
+ "status" => 1,
249
+ "bytes" => 3,
250
+ "vhost" => "bar",
251
+ "path" => "/path/to/baz",
252
+ "method" => "GET",
253
+ "protocol" => "HTTP/0.9",
254
+ "agent" => "libwww",
255
+ "referer" => "http://referer.example",
256
+ "requesttime" => (now - 1).to_f.to_s.to_f,
257
+ "bot_access" => true,
258
+ "loginsession" => false,
259
+ "something-else" => "would be ignored",
260
+ "yet-another" => {
261
+ "foo" => "bar",
262
+ "baz" => 1,
263
+ },
264
+ "remote" => {
265
+ "host" => "remote.example",
266
+ "ip" => "192.0.2.1",
267
+ "port" => 12345,
268
+ "user" => "tagomoris",
269
+ }
270
+ }
271
+
272
+ driver = create_driver(CONFIG)
273
+ buf = nil
274
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
275
+
276
+ assert_equal expected, MultiJson.load(buf)
277
+ end
278
+
279
+ [
280
+ # <time_format>, <time field type>, <time expectation generator>, <assertion>
281
+ [
282
+ "%s.%6N",
283
+ lambda{|t| t.strftime("%s.%6N").to_f },
284
+ lambda{|recv, expected, actual|
285
+ recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
286
+ }
287
+ ],
288
+ [
289
+ "%Y-%m-%dT%H:%M:%S%:z",
290
+ lambda{|t| t.iso8601 },
291
+ :assert_equal.to_proc
292
+ ],
293
+ ].each do |format, expect_time, assert|
294
+ define_method("test_time_formats_#{format}") do
295
+ now = Fluent::Engine.now
296
+ input = {}
297
+ expected = { "time" => expect_time[Time.at(now.to_r)] }
298
+
299
+ driver = create_driver(<<-CONFIG)
300
+ table foo
301
+ email foo@bar.example
302
+ private_key_path /path/to/key
303
+ project yourproject_id
304
+ dataset yourdataset_id
305
+
306
+ <inject>
307
+ time_format #{format}
308
+ time_type string
309
+ time_key time
310
+ </inject>
311
+
312
+ schema [
313
+ {"name": "metadata", "type": "RECORD", "fields": [
314
+ {"name": "time", "type": "INTEGER"},
315
+ {"name": "node", "type": "STRING"}
316
+ ]},
317
+ {"name": "log", "type": "STRING"}
318
+ ]
319
+ CONFIG
320
+
321
+ buf = nil
322
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
323
+
324
+ assert[self, expected["time"], MultiJson.load(buf)["time"]]
325
+ end
326
+ end
327
+
328
+ def test_format_with_schema
329
+ now = Fluent::EventTime.new(Time.now.to_i)
330
+ input = {
331
+ "request" => {
332
+ "vhost" => :bar,
333
+ "path" => "/path/to/baz",
334
+ "method" => "GET",
335
+ "protocol" => "HTTP/0.9",
336
+ "agent" => "libwww",
337
+ "referer" => "http://referer.example",
338
+ "time" => (now - 1).to_f,
339
+ "bot_access" => true,
340
+ "loginsession" => false,
341
+ },
342
+ "response" => {
343
+ "status" => "1",
344
+ "bytes" => 3.0,
345
+ },
346
+ "remote" => {
347
+ "host" => "remote.example",
348
+ "ip" => "192.0.2.1",
349
+ "port" => 12345,
350
+ "user" => "tagomoris",
351
+ },
352
+ "something-else" => "would be ignored",
353
+ "yet-another" => {
354
+ "foo" => "bar",
355
+ "baz" => 1,
356
+ },
357
+ }
358
+ expected = {
359
+ "time" => now.to_f,
360
+ "request" => {
361
+ "vhost" => "bar",
362
+ "path" => "/path/to/baz",
363
+ "method" => "GET",
364
+ "protocol" => "HTTP/0.9",
365
+ "agent" => "libwww",
366
+ "referer" => "http://referer.example",
367
+ "time" => (now - 1).to_f,
368
+ "bot_access" => true,
369
+ "loginsession" => false,
370
+ },
371
+ "remote" => {
372
+ "host" => "remote.example",
373
+ "ip" => "192.0.2.1",
374
+ "port" => 12345,
375
+ "user" => "tagomoris",
376
+ },
377
+ "response" => {
378
+ "status" => 1,
379
+ "bytes" => 3,
380
+ },
381
+ "something-else" => "would be ignored",
382
+ "yet-another" => {
383
+ "foo" => "bar",
384
+ "baz" => 1,
385
+ },
386
+ }
387
+
388
+ driver = create_driver(<<-CONFIG)
389
+ table foo
390
+ email foo@bar.example
391
+ private_key_path /path/to/key
392
+ project yourproject_id
393
+ dataset yourdataset_id
394
+
395
+ <inject>
396
+ time_format %s
397
+ time_key time
398
+ </inject>
399
+
400
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
401
+ schema [{"name": "time", "type": "INTEGER"}]
402
+ CONFIG
403
+
404
+ buf = nil
405
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
406
+
407
+ assert_equal expected, MultiJson.load(buf)
408
+ end
409
+
410
+ def test_format_repeated_field_with_schema
411
+ now = Fluent::EventTime.new(Time.now.to_i)
412
+ input = {
413
+ "tty" => nil,
414
+ "pwd" => "/home/yugui",
415
+ "user" => "fluentd",
416
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
417
+ }
418
+ expected = {
419
+ "time" => now.to_f,
420
+ "pwd" => "/home/yugui",
421
+ "user" => "fluentd",
422
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
423
+ }
424
+
425
+ driver = create_driver(<<-CONFIG)
426
+ table foo
427
+ email foo@bar.example
428
+ private_key_path /path/to/key
429
+ project yourproject_id
430
+ dataset yourdataset_id
431
+
432
+ <inject>
433
+ time_format %s
434
+ time_key time
435
+ </inject>
436
+
437
+ schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
438
+ schema [{"name": "time", "type": "INTEGER"}]
439
+ CONFIG
440
+
441
+ buf = nil
442
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
443
+
444
+ assert_equal expected, MultiJson.load(buf)
445
+ end
446
+
447
+ def test_format_fetch_from_bigquery_api
448
+ now = Fluent::EventTime.new(Time.now.to_i)
449
+ input = {
450
+ "tty" => nil,
451
+ "pwd" => "/home/yugui",
452
+ "user" => "fluentd",
453
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
454
+ }
455
+ expected = {
456
+ "time" => now.to_i,
457
+ "pwd" => "/home/yugui",
458
+ "user" => "fluentd",
459
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
460
+ }
461
+
462
+ driver = create_driver(<<-CONFIG)
463
+ table foo
464
+ email foo@bar.example
465
+ private_key_path /path/to/key
466
+ project yourproject_id
467
+ dataset yourdataset_id
468
+
469
+ <inject>
470
+ time_format %s
471
+ time_key time
472
+ </inject>
473
+
474
+ fetch_schema true
475
+ schema [{"name": "time", "type": "INTEGER"}]
476
+ CONFIG
477
+
478
+ stub_writer do |writer|
479
+ mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
480
+ sudo_schema_response["schema"]["fields"]
481
+ end
482
+ end
483
+
484
+ buf = nil
485
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
486
+
487
+ assert_equal expected, MultiJson.load(buf)
488
+
489
+ table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
490
+ assert table_schema["time"]
491
+ assert_equal :timestamp, table_schema["time"].type
492
+ assert_equal :required, table_schema["time"].mode
493
+
494
+ assert table_schema["tty"]
495
+ assert_equal :string, table_schema["tty"].type
496
+ assert_equal :nullable, table_schema["tty"].mode
497
+
498
+ assert table_schema["pwd"]
499
+ assert_equal :string, table_schema["pwd"].type
500
+ assert_equal :required, table_schema["pwd"].mode
501
+
502
+ assert table_schema["user"]
503
+ assert_equal :string, table_schema["user"].type
504
+ assert_equal :required, table_schema["user"].mode
505
+
506
+ assert table_schema["argv"]
507
+ assert_equal :string, table_schema["argv"].type
508
+ assert_equal :repeated, table_schema["argv"].mode
509
+ end
510
+
511
+ def test_format_fetch_from_bigquery_api_with_fetch_schema_table
512
+ now = Fluent::EventTime.new(Time.now.to_i)
513
+ input = {
514
+ "tty" => nil,
515
+ "pwd" => "/home/yugui",
516
+ "user" => "fluentd",
517
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
518
+ }
519
+ expected = {
520
+ "time" => now.to_i,
521
+ "pwd" => "/home/yugui",
522
+ "user" => "fluentd",
523
+ "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
524
+ }
525
+
526
+ driver = create_driver(<<-CONFIG)
527
+ table foo_%Y_%m_%d
528
+ email foo@bar.example
529
+ private_key_path /path/to/key
530
+ project yourproject_id
531
+ dataset yourdataset_id
532
+
533
+ <inject>
534
+ time_format %s
535
+ time_key time
536
+ </inject>
537
+
538
+ fetch_schema true
539
+ fetch_schema_table foo
540
+ schema [{"name": "time", "type": "INTEGER"}]
541
+
542
+ <buffer time>
543
+ timekey 1d
544
+ </buffer>
545
+ CONFIG
546
+
547
+ stub_writer do |writer|
548
+ mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
549
+ sudo_schema_response["schema"]["fields"]
550
+ end
551
+ end
552
+
553
+ buf = nil
554
+ driver.run { buf = driver.instance.format("my.tag", now, input) }
555
+
556
+ assert_equal expected, MultiJson.load(buf)
557
+
558
+ table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
559
+ assert table_schema["time"]
560
+ assert_equal :timestamp, table_schema["time"].type
561
+ assert_equal :required, table_schema["time"].mode
562
+
563
+ assert table_schema["tty"]
564
+ assert_equal :string, table_schema["tty"].type
565
+ assert_equal :nullable, table_schema["tty"].mode
566
+
567
+ assert table_schema["pwd"]
568
+ assert_equal :string, table_schema["pwd"].type
569
+ assert_equal :required, table_schema["pwd"].mode
570
+
571
+ assert table_schema["user"]
572
+ assert_equal :string, table_schema["user"].type
573
+ assert_equal :required, table_schema["user"].mode
574
+
575
+ assert table_schema["argv"]
576
+ assert_equal :string, table_schema["argv"].type
577
+ assert_equal :repeated, table_schema["argv"].mode
578
+ end
579
+ end