fluent-plugin-bigquery-test 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +16 -0
- data/.gitignore +21 -0
- data/.travis.yml +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +13 -0
- data/README.md +602 -0
- data/Rakefile +12 -0
- data/fluent-plugin-bigquery.gemspec +29 -0
- data/gemfiles/activesupport-4.gemfile +6 -0
- data/lib/fluent/plugin/bigquery/errors.rb +84 -0
- data/lib/fluent/plugin/bigquery/helper.rb +33 -0
- data/lib/fluent/plugin/bigquery/schema.rb +281 -0
- data/lib/fluent/plugin/bigquery/version.rb +5 -0
- data/lib/fluent/plugin/bigquery/writer.rb +356 -0
- data/lib/fluent/plugin/out_bigquery_base.rb +221 -0
- data/lib/fluent/plugin/out_bigquery_insert.rb +125 -0
- data/lib/fluent/plugin/out_bigquery_load.rb +221 -0
- data/test/helper.rb +20 -0
- data/test/plugin/test_out_bigquery_base.rb +579 -0
- data/test/plugin/test_out_bigquery_insert.rb +544 -0
- data/test/plugin/test_out_bigquery_load.rb +348 -0
- data/test/plugin/test_record_schema.rb +186 -0
- data/test/plugin/testdata/apache.schema +98 -0
- data/test/plugin/testdata/json_key.json +7 -0
- data/test/plugin/testdata/sudo.schema +27 -0
- data/test/run_test.rb +9 -0
- metadata +197 -0
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'fluent/plugin/out_bigquery_base'
|
2
|
+
|
3
|
+
module Fluent
|
4
|
+
module Plugin
|
5
|
+
class BigQueryInsertOutput < BigQueryBaseOutput
|
6
|
+
Fluent::Plugin.register_output('bigquery_insert', self)
|
7
|
+
|
8
|
+
helpers :record_accessor
|
9
|
+
|
10
|
+
# template_suffix (only insert)
|
11
|
+
# https://cloud.google.com/bigquery/streaming-data-into-bigquery#template_table_details
|
12
|
+
config_param :template_suffix, :string, default: nil
|
13
|
+
|
14
|
+
# skip_invalid_rows (only insert)
|
15
|
+
# Insert all valid rows of a request, even if invalid rows exist.
|
16
|
+
# The default value is false, which causes the entire request to fail if any invalid rows exist.
|
17
|
+
config_param :skip_invalid_rows, :bool, default: false
|
18
|
+
|
19
|
+
# insert_id_field (only insert)
|
20
|
+
config_param :insert_id_field, :string, default: nil
|
21
|
+
|
22
|
+
# add_insert_timestamp (only insert)
|
23
|
+
# adds a timestamp just before sending the rows to bigquery, so that
|
24
|
+
# buffering time is not taken into account. Gives a field in bigquery
|
25
|
+
# which represents the insert time of the row.
|
26
|
+
config_param :add_insert_timestamp, :string, default: nil
|
27
|
+
|
28
|
+
# allow_retry_insert_errors (only insert)
|
29
|
+
# If insert_id_field is not specified, true means to allow duplicate rows
|
30
|
+
config_param :allow_retry_insert_errors, :bool, default: false
|
31
|
+
|
32
|
+
## Buffer
|
33
|
+
config_section :buffer do
|
34
|
+
config_set_default :@type, "memory"
|
35
|
+
config_set_default :flush_mode, :interval
|
36
|
+
config_set_default :flush_interval, 1
|
37
|
+
config_set_default :flush_thread_interval, 0.05
|
38
|
+
config_set_default :flush_thread_burst_interval, 0.05
|
39
|
+
config_set_default :chunk_limit_size, 1 * 1024 ** 2 # 1MB
|
40
|
+
config_set_default :total_limit_size, 1 * 1024 ** 3 # 1GB
|
41
|
+
config_set_default :chunk_limit_records, 500
|
42
|
+
end
|
43
|
+
|
44
|
+
def configure(conf)
|
45
|
+
super
|
46
|
+
|
47
|
+
if @insert_id_field
|
48
|
+
if @insert_id_field !~ /^\$[\[\.]/ && @insert_id_field =~ /\./
|
49
|
+
warn "[BREAKING CHANGE] insert_id_field format is changed. Use fluentd record_accessor helper. (https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor)"
|
50
|
+
end
|
51
|
+
@get_insert_id = record_accessor_create(@insert_id_field)
|
52
|
+
end
|
53
|
+
|
54
|
+
formatter_config = conf.elements("format")[0]
|
55
|
+
if formatter_config && formatter_config['@type'] != "json"
|
56
|
+
raise ConfigError, "`bigquery_insert` supports only json formatter."
|
57
|
+
end
|
58
|
+
@formatter = formatter_create(usage: 'out_bigquery_for_insert', type: 'json', conf: formatter_config)
|
59
|
+
|
60
|
+
placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}/template_suffix=#{@template_suffix}"
|
61
|
+
placeholder_validate!(:bigquery_insert, placeholder_params)
|
62
|
+
end
|
63
|
+
|
64
|
+
# for Fluent::Plugin::Output#implement? method
|
65
|
+
def format(tag, time, record)
|
66
|
+
super
|
67
|
+
end
|
68
|
+
|
69
|
+
def write(chunk)
|
70
|
+
table_format = @tables_mutex.synchronize do
|
71
|
+
t = @tables_queue.shift
|
72
|
+
@tables_queue.push t
|
73
|
+
t
|
74
|
+
end
|
75
|
+
|
76
|
+
now = Time.now.utc.strftime("%Y-%m-%d %H:%M:%S.%6N") if @add_insert_timestamp
|
77
|
+
|
78
|
+
rows = chunk.open do |io|
|
79
|
+
io.map do |line|
|
80
|
+
record = MultiJson.load(line)
|
81
|
+
record[@add_insert_timestamp] = now if @add_insert_timestamp
|
82
|
+
row = {"json" => record}
|
83
|
+
row["insert_id"] = @get_insert_id.call(record) if @get_insert_id
|
84
|
+
Fluent::BigQuery::Helper.deep_symbolize_keys(row)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
metadata = chunk.metadata
|
89
|
+
project = extract_placeholders(@project, metadata)
|
90
|
+
dataset = extract_placeholders(@dataset, metadata)
|
91
|
+
table_id = extract_placeholders(table_format, metadata)
|
92
|
+
template_suffix = @template_suffix ? extract_placeholders(@template_suffix, metadata) : nil
|
93
|
+
schema = get_schema(project, dataset, metadata)
|
94
|
+
|
95
|
+
insert(project, dataset, table_id, rows, schema, template_suffix)
|
96
|
+
end
|
97
|
+
|
98
|
+
def insert(project, dataset, table_id, rows, schema, template_suffix)
|
99
|
+
writer.insert_rows(project, dataset, table_id, rows, schema, template_suffix: template_suffix)
|
100
|
+
rescue Fluent::BigQuery::Error => e
|
101
|
+
raise if e.retryable?
|
102
|
+
|
103
|
+
if @secondary
|
104
|
+
# TODO: find better way
|
105
|
+
@retry = retry_state_create(
|
106
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
107
|
+
forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
108
|
+
max_interval: @buffer_config.retry_max_interval,
|
109
|
+
secondary: true, secondary_threshold: Float::EPSILON,
|
110
|
+
randomize: @buffer_config.retry_randomize
|
111
|
+
)
|
112
|
+
else
|
113
|
+
@retry = retry_state_create(
|
114
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
115
|
+
forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
116
|
+
max_interval: @buffer_config.retry_max_interval,
|
117
|
+
randomize: @buffer_config.retry_randomize
|
118
|
+
)
|
119
|
+
end
|
120
|
+
|
121
|
+
raise
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -0,0 +1,221 @@
|
|
1
|
+
require 'fluent/plugin/out_bigquery_base'
|
2
|
+
|
3
|
+
module Fluent
|
4
|
+
module Plugin
|
5
|
+
class BigQueryLoadOutput < BigQueryBaseOutput
|
6
|
+
Fluent::Plugin.register_output('bigquery_load', self)
|
7
|
+
|
8
|
+
helpers :timer
|
9
|
+
|
10
|
+
config_param :source_format, :enum, list: [:json, :avro, :csv], default: :json
|
11
|
+
|
12
|
+
# max_bad_records (only load)
|
13
|
+
# The maximum number of bad records that BigQuery can ignore when running the job.
|
14
|
+
# If the number of bad records exceeds this value, an invalid error is returned in the job result.
|
15
|
+
# The default value is 0, which requires that all records are valid.
|
16
|
+
config_param :max_bad_records, :integer, default: 0
|
17
|
+
|
18
|
+
# prevent_duplicate_load (only load)
|
19
|
+
config_param :prevent_duplicate_load, :bool, default: false
|
20
|
+
|
21
|
+
config_param :use_delayed_commit, :bool, default: true
|
22
|
+
config_param :wait_job_interval, :time, default: 3
|
23
|
+
|
24
|
+
## Buffer
|
25
|
+
config_section :buffer do
|
26
|
+
config_set_default :@type, "file"
|
27
|
+
config_set_default :flush_mode, :interval
|
28
|
+
config_set_default :flush_interval, 3600 # 1h
|
29
|
+
config_set_default :flush_thread_interval, 5
|
30
|
+
config_set_default :flush_thread_burst_interval, 5
|
31
|
+
config_set_default :chunk_limit_size, 1 * 1024 ** 3 # 1GB
|
32
|
+
config_set_default :total_limit_size, 32 * 1024 ** 3 # 32GB
|
33
|
+
|
34
|
+
config_set_default :delayed_commit_timeout, 1800 # 30m
|
35
|
+
end
|
36
|
+
|
37
|
+
def configure(conf)
|
38
|
+
super
|
39
|
+
|
40
|
+
placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}"
|
41
|
+
placeholder_validate!(:bigquery_load, placeholder_params)
|
42
|
+
end
|
43
|
+
|
44
|
+
def start
|
45
|
+
super
|
46
|
+
|
47
|
+
if prefer_delayed_commit
|
48
|
+
@polling_targets = []
|
49
|
+
@polling_mutex = Mutex.new
|
50
|
+
log.debug("start load job polling")
|
51
|
+
timer_execute(:polling_bigquery_load_job, @wait_job_interval, &method(:poll))
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def prefer_delayed_commit
|
56
|
+
@use_delayed_commit
|
57
|
+
end
|
58
|
+
|
59
|
+
# for Fluent::Plugin::Output#implement? method
|
60
|
+
def format(tag, time, record)
|
61
|
+
super
|
62
|
+
end
|
63
|
+
|
64
|
+
def write(chunk)
|
65
|
+
job_reference = do_write(chunk)
|
66
|
+
|
67
|
+
until response = writer.fetch_load_job(job_reference)
|
68
|
+
sleep @wait_job_interval
|
69
|
+
end
|
70
|
+
|
71
|
+
writer.commit_load_job(job_reference.chunk_id_hex, response)
|
72
|
+
rescue Fluent::BigQuery::Error => e
|
73
|
+
raise if e.retryable?
|
74
|
+
|
75
|
+
@retry_mutex.synchronize do
|
76
|
+
if @secondary
|
77
|
+
# TODO: find better way
|
78
|
+
@retry = retry_state_create(
|
79
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
80
|
+
forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
81
|
+
max_interval: @buffer_config.retry_max_interval,
|
82
|
+
secondary: true, secondary_threshold: Float::EPSILON,
|
83
|
+
randomize: @buffer_config.retry_randomize
|
84
|
+
)
|
85
|
+
else
|
86
|
+
@retry = retry_state_create(
|
87
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
88
|
+
forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
89
|
+
max_interval: @buffer_config.retry_max_interval,
|
90
|
+
randomize: @buffer_config.retry_randomize
|
91
|
+
)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
raise
|
96
|
+
end
|
97
|
+
|
98
|
+
def try_write(chunk)
|
99
|
+
job_reference = do_write(chunk)
|
100
|
+
@polling_mutex.synchronize do
|
101
|
+
@polling_targets << job_reference
|
102
|
+
end
|
103
|
+
rescue Fluent::BigQuery::Error => e
|
104
|
+
raise if e.retryable?
|
105
|
+
|
106
|
+
@retry_mutex.synchronize do
|
107
|
+
if @secondary
|
108
|
+
# TODO: find better way
|
109
|
+
@retry = retry_state_create(
|
110
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
111
|
+
forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
112
|
+
max_interval: @buffer_config.retry_max_interval,
|
113
|
+
secondary: true, secondary_threshold: Float::EPSILON,
|
114
|
+
randomize: @buffer_config.retry_randomize
|
115
|
+
)
|
116
|
+
else
|
117
|
+
@retry = retry_state_create(
|
118
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
119
|
+
forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
120
|
+
max_interval: @buffer_config.retry_max_interval,
|
121
|
+
randomize: @buffer_config.retry_randomize
|
122
|
+
)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
raise
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
def do_write(chunk)
|
132
|
+
table_format = @tables_mutex.synchronize do
|
133
|
+
t = @tables_queue.shift
|
134
|
+
@tables_queue.push t
|
135
|
+
t
|
136
|
+
end
|
137
|
+
|
138
|
+
metadata = chunk.metadata
|
139
|
+
project = extract_placeholders(@project, metadata)
|
140
|
+
dataset = extract_placeholders(@dataset, metadata)
|
141
|
+
table_id = extract_placeholders(table_format, metadata)
|
142
|
+
schema = get_schema(project, dataset, metadata)
|
143
|
+
|
144
|
+
create_upload_source(chunk) do |upload_source|
|
145
|
+
writer.create_load_job(chunk.unique_id, dump_unique_id_hex(chunk.unique_id), project, dataset, table_id, upload_source, schema)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def poll
|
150
|
+
job_reference = @polling_mutex.synchronize do
|
151
|
+
@polling_targets.shift
|
152
|
+
end
|
153
|
+
return unless job_reference
|
154
|
+
|
155
|
+
begin
|
156
|
+
response = writer.fetch_load_job(job_reference)
|
157
|
+
if response
|
158
|
+
writer.commit_load_job(job_reference.chunk_id_hex, response)
|
159
|
+
commit_write(job_reference.chunk_id)
|
160
|
+
log.debug("commit chunk", chunk: job_reference.chunk_id_hex, **job_reference.as_hash(:job_id, :project_id, :dataset_id, :table_id))
|
161
|
+
else
|
162
|
+
@polling_mutex.synchronize do
|
163
|
+
@polling_targets << job_reference
|
164
|
+
end
|
165
|
+
end
|
166
|
+
rescue Fluent::BigQuery::Error => e
|
167
|
+
# RetryableError comes from only `commit_load_job`
|
168
|
+
# if error is retryable, takeback chunk and do next `try_flush`
|
169
|
+
# if error is not retryable, create custom retry_state and takeback chunk do next `try_flush`
|
170
|
+
if e.retryable?
|
171
|
+
log.warn("failed to poll load job", error: e, chunk: job_reference.chunk_id_hex, **job_reference.as_hash(:job_id, :project_id, :dataset_id, :table_id))
|
172
|
+
else
|
173
|
+
log.error("failed to poll load job", error: e, chunk: job_reference.chunk_id_hex, **job_reference.as_hash(:job_id, :project_id, :dataset_id, :table_id))
|
174
|
+
@retry_mutex.synchronize do
|
175
|
+
if @secondary
|
176
|
+
# TODO: find better way
|
177
|
+
@retry = retry_state_create(
|
178
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
179
|
+
forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
180
|
+
max_interval: @buffer_config.retry_max_interval,
|
181
|
+
secondary: true, secondary_threshold: Float::EPSILON,
|
182
|
+
randomize: @buffer_config.retry_randomize
|
183
|
+
)
|
184
|
+
else
|
185
|
+
@retry = retry_state_create(
|
186
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
187
|
+
forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
188
|
+
max_interval: @buffer_config.retry_max_interval,
|
189
|
+
randomize: @buffer_config.retry_randomize
|
190
|
+
)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
rollback_write(job_reference.chunk_id)
|
196
|
+
rescue => e
|
197
|
+
log.error("unexpected error while polling", error: e)
|
198
|
+
log.error_backtrace
|
199
|
+
rollback_write(job_reference.chunk_id)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def create_upload_source(chunk)
|
204
|
+
chunk_is_file = @buffer_config["@type"] == 'file'
|
205
|
+
if chunk_is_file
|
206
|
+
File.open(chunk.path) do |file|
|
207
|
+
yield file
|
208
|
+
end
|
209
|
+
else
|
210
|
+
Tempfile.open("chunk-tmp") do |file|
|
211
|
+
file.binmode
|
212
|
+
chunk.write_to(file)
|
213
|
+
file.sync
|
214
|
+
file.rewind
|
215
|
+
yield file
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'bundler/setup'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
$LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
|
5
|
+
$LOAD_PATH.unshift(__dir__)
|
6
|
+
require 'fluent/test'
|
7
|
+
|
8
|
+
require 'fluent/plugin/buffer'
|
9
|
+
require 'fluent/plugin/buf_memory'
|
10
|
+
require 'fluent/plugin/buf_file'
|
11
|
+
require 'fluent/test/driver/output'
|
12
|
+
|
13
|
+
require 'fluent/plugin/out_bigquery_base'
|
14
|
+
require 'fluent/plugin/out_bigquery_insert'
|
15
|
+
require 'fluent/plugin/out_bigquery_load'
|
16
|
+
require 'google/apis/bigquery_v2'
|
17
|
+
require 'google/api_client/auth/key_utils'
|
18
|
+
require 'googleauth'
|
19
|
+
|
20
|
+
require 'test/unit/rr'
|
@@ -0,0 +1,579 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class BigQueryBaseOutputTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
table foo
|
10
|
+
email foo@bar.example
|
11
|
+
private_key_path /path/to/key
|
12
|
+
project yourproject_id
|
13
|
+
dataset yourdataset_id
|
14
|
+
|
15
|
+
<inject>
|
16
|
+
time_format %s
|
17
|
+
time_key time
|
18
|
+
</inject>
|
19
|
+
|
20
|
+
schema [
|
21
|
+
{"name": "time", "type": "INTEGER"},
|
22
|
+
{"name": "status", "type": "INTEGER"},
|
23
|
+
{"name": "bytes", "type": "INTEGER"},
|
24
|
+
{"name": "vhost", "type": "STRING"},
|
25
|
+
{"name": "path", "type": "STRING"},
|
26
|
+
{"name": "method", "type": "STRING"},
|
27
|
+
{"name": "protocol", "type": "STRING"},
|
28
|
+
{"name": "agent", "type": "STRING"},
|
29
|
+
{"name": "referer", "type": "STRING"},
|
30
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
31
|
+
{"name": "host", "type": "STRING"},
|
32
|
+
{"name": "ip", "type": "STRING"},
|
33
|
+
{"name": "user", "type": "STRING"}
|
34
|
+
]},
|
35
|
+
{"name": "requesttime", "type": "FLOAT"},
|
36
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
37
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
38
|
+
]
|
39
|
+
]
|
40
|
+
|
41
|
+
API_SCOPE = "https://www.googleapis.com/auth/bigquery"
|
42
|
+
|
43
|
+
def create_driver(conf = CONFIG)
|
44
|
+
Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryBaseOutput).configure(conf)
|
45
|
+
end
|
46
|
+
|
47
|
+
def stub_writer(stub_auth: true)
|
48
|
+
stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
|
49
|
+
stub(writer).get_auth { nil } if stub_auth
|
50
|
+
yield writer
|
51
|
+
writer
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private def sudo_schema_response
|
56
|
+
{
|
57
|
+
"schema" => {
|
58
|
+
"fields" => [
|
59
|
+
{
|
60
|
+
"name" => "time",
|
61
|
+
"type" => "TIMESTAMP",
|
62
|
+
"mode" => "REQUIRED"
|
63
|
+
},
|
64
|
+
{
|
65
|
+
"name" => "tty",
|
66
|
+
"type" => "STRING",
|
67
|
+
"mode" => "NULLABLE"
|
68
|
+
},
|
69
|
+
{
|
70
|
+
"name" => "pwd",
|
71
|
+
"type" => "STRING",
|
72
|
+
"mode" => "REQUIRED"
|
73
|
+
},
|
74
|
+
{
|
75
|
+
"name" => "user",
|
76
|
+
"type" => "STRING",
|
77
|
+
"mode" => "REQUIRED"
|
78
|
+
},
|
79
|
+
{
|
80
|
+
"name" => "argv",
|
81
|
+
"type" => "STRING",
|
82
|
+
"mode" => "REPEATED"
|
83
|
+
}
|
84
|
+
]
|
85
|
+
}
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_configure_table
|
90
|
+
driver = create_driver
|
91
|
+
assert_equal driver.instance.table, 'foo'
|
92
|
+
assert_nil driver.instance.tables
|
93
|
+
|
94
|
+
driver = create_driver(CONFIG.sub(/\btable\s+.*$/, 'tables foo,bar'))
|
95
|
+
assert_nil driver.instance.table
|
96
|
+
assert_equal driver.instance.tables, ['foo' ,'bar']
|
97
|
+
|
98
|
+
assert_raise(Fluent::ConfigError, "'table' or 'tables' must be specified, and both are invalid") {
|
99
|
+
create_driver(CONFIG + "tables foo,bar")
|
100
|
+
}
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_configure_auth_private_key
|
104
|
+
driver = create_driver
|
105
|
+
stub_writer(stub_auth: false) do |writer|
|
106
|
+
mock(writer).get_auth_from_private_key { stub! }
|
107
|
+
end
|
108
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_configure_auth_compute_engine
|
112
|
+
driver = create_driver(%[
|
113
|
+
table foo
|
114
|
+
auth_method compute_engine
|
115
|
+
project yourproject_id
|
116
|
+
dataset yourdataset_id
|
117
|
+
schema [
|
118
|
+
{"name": "time", "type": "INTEGER"},
|
119
|
+
{"name": "status", "type": "INTEGER"},
|
120
|
+
{"name": "bytes", "type": "INTEGER"}
|
121
|
+
]
|
122
|
+
])
|
123
|
+
|
124
|
+
stub_writer(stub_auth: false) do |writer|
|
125
|
+
mock(writer).get_auth_from_compute_engine { stub! }
|
126
|
+
end
|
127
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_configure_auth_json_key_as_file
|
131
|
+
driver = create_driver(%[
|
132
|
+
table foo
|
133
|
+
auth_method json_key
|
134
|
+
json_key jsonkey.josn
|
135
|
+
project yourproject_id
|
136
|
+
dataset yourdataset_id
|
137
|
+
schema [
|
138
|
+
{"name": "time", "type": "INTEGER"},
|
139
|
+
{"name": "status", "type": "INTEGER"},
|
140
|
+
{"name": "bytes", "type": "INTEGER"}
|
141
|
+
]
|
142
|
+
])
|
143
|
+
|
144
|
+
stub_writer(stub_auth: false) do |writer|
|
145
|
+
mock(writer).get_auth_from_json_key { stub! }
|
146
|
+
end
|
147
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
148
|
+
end
|
149
|
+
|
150
|
+
def test_configure_auth_json_key_as_file_raise_permission_error
|
151
|
+
json_key_path = 'test/plugin/testdata/json_key.json'
|
152
|
+
json_key_path_dir = File.dirname(json_key_path)
|
153
|
+
|
154
|
+
begin
|
155
|
+
File.chmod(0000, json_key_path_dir)
|
156
|
+
|
157
|
+
driver = create_driver(%[
|
158
|
+
table foo
|
159
|
+
auth_method json_key
|
160
|
+
json_key #{json_key_path}
|
161
|
+
project yourproject_id
|
162
|
+
dataset yourdataset_id
|
163
|
+
schema [
|
164
|
+
{"name": "time", "type": "INTEGER"},
|
165
|
+
{"name": "status", "type": "INTEGER"},
|
166
|
+
{"name": "bytes", "type": "INTEGER"}
|
167
|
+
]
|
168
|
+
])
|
169
|
+
assert_raises(Errno::EACCES) do
|
170
|
+
driver.instance.writer.client
|
171
|
+
end
|
172
|
+
ensure
|
173
|
+
File.chmod(0755, json_key_path_dir)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_configure_auth_json_key_as_string
|
178
|
+
json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
|
179
|
+
json_key_io = StringIO.new(json_key)
|
180
|
+
authorization = Object.new
|
181
|
+
stub(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
|
182
|
+
|
183
|
+
driver = create_driver(%[
|
184
|
+
table foo
|
185
|
+
auth_method json_key
|
186
|
+
json_key #{json_key}
|
187
|
+
project yourproject_id
|
188
|
+
dataset yourdataset_id
|
189
|
+
schema [
|
190
|
+
{"name": "time", "type": "INTEGER"},
|
191
|
+
{"name": "status", "type": "INTEGER"},
|
192
|
+
{"name": "bytes", "type": "INTEGER"}
|
193
|
+
]
|
194
|
+
])
|
195
|
+
stub_writer(stub_auth: false) do |writer|
|
196
|
+
mock.proxy(writer).get_auth_from_json_key { stub! }
|
197
|
+
end
|
198
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
199
|
+
end
|
200
|
+
|
201
|
+
def test_configure_auth_application_default
|
202
|
+
driver = create_driver(%[
|
203
|
+
table foo
|
204
|
+
auth_method application_default
|
205
|
+
project yourproject_id
|
206
|
+
dataset yourdataset_id
|
207
|
+
schema [
|
208
|
+
{"name": "time", "type": "INTEGER"},
|
209
|
+
{"name": "status", "type": "INTEGER"},
|
210
|
+
{"name": "bytes", "type": "INTEGER"}
|
211
|
+
]
|
212
|
+
])
|
213
|
+
|
214
|
+
stub_writer(stub_auth: false) do |writer|
|
215
|
+
mock.proxy(writer).get_auth_from_application_default { stub! }
|
216
|
+
end
|
217
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
218
|
+
end
|
219
|
+
|
220
|
+
def test_format
|
221
|
+
now = Fluent::EventTime.new(Time.now.to_i)
|
222
|
+
input = {
|
223
|
+
"status" => "1",
|
224
|
+
"bytes" => 3.0,
|
225
|
+
"vhost" => :bar,
|
226
|
+
"path" => "/path/to/baz",
|
227
|
+
"method" => "GET",
|
228
|
+
"protocol" => "HTTP/0.9",
|
229
|
+
"agent" => "libwww",
|
230
|
+
"referer" => "http://referer.example",
|
231
|
+
"requesttime" => (now - 1).to_f.to_s,
|
232
|
+
"bot_access" => true,
|
233
|
+
"loginsession" => false,
|
234
|
+
"something-else" => "would be ignored",
|
235
|
+
"yet-another" => {
|
236
|
+
"foo" => "bar",
|
237
|
+
"baz" => 1,
|
238
|
+
},
|
239
|
+
"remote" => {
|
240
|
+
"host" => "remote.example",
|
241
|
+
"ip" => "192.0.2.1",
|
242
|
+
"port" => 12345,
|
243
|
+
"user" => "tagomoris",
|
244
|
+
}
|
245
|
+
}
|
246
|
+
expected = {
|
247
|
+
"time" => now.to_i,
|
248
|
+
"status" => 1,
|
249
|
+
"bytes" => 3,
|
250
|
+
"vhost" => "bar",
|
251
|
+
"path" => "/path/to/baz",
|
252
|
+
"method" => "GET",
|
253
|
+
"protocol" => "HTTP/0.9",
|
254
|
+
"agent" => "libwww",
|
255
|
+
"referer" => "http://referer.example",
|
256
|
+
"requesttime" => (now - 1).to_f.to_s.to_f,
|
257
|
+
"bot_access" => true,
|
258
|
+
"loginsession" => false,
|
259
|
+
"something-else" => "would be ignored",
|
260
|
+
"yet-another" => {
|
261
|
+
"foo" => "bar",
|
262
|
+
"baz" => 1,
|
263
|
+
},
|
264
|
+
"remote" => {
|
265
|
+
"host" => "remote.example",
|
266
|
+
"ip" => "192.0.2.1",
|
267
|
+
"port" => 12345,
|
268
|
+
"user" => "tagomoris",
|
269
|
+
}
|
270
|
+
}
|
271
|
+
|
272
|
+
driver = create_driver(CONFIG)
|
273
|
+
buf = nil
|
274
|
+
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
275
|
+
|
276
|
+
assert_equal expected, MultiJson.load(buf)
|
277
|
+
end
|
278
|
+
|
279
|
+
[
|
280
|
+
# <time_format>, <time field type>, <time expectation generator>, <assertion>
|
281
|
+
[
|
282
|
+
"%s.%6N",
|
283
|
+
lambda{|t| t.strftime("%s.%6N").to_f },
|
284
|
+
lambda{|recv, expected, actual|
|
285
|
+
recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
|
286
|
+
}
|
287
|
+
],
|
288
|
+
[
|
289
|
+
"%Y-%m-%dT%H:%M:%S%:z",
|
290
|
+
lambda{|t| t.iso8601 },
|
291
|
+
:assert_equal.to_proc
|
292
|
+
],
|
293
|
+
].each do |format, expect_time, assert|
|
294
|
+
define_method("test_time_formats_#{format}") do
|
295
|
+
now = Fluent::Engine.now
|
296
|
+
input = {}
|
297
|
+
expected = { "time" => expect_time[Time.at(now.to_r)] }
|
298
|
+
|
299
|
+
driver = create_driver(<<-CONFIG)
|
300
|
+
table foo
|
301
|
+
email foo@bar.example
|
302
|
+
private_key_path /path/to/key
|
303
|
+
project yourproject_id
|
304
|
+
dataset yourdataset_id
|
305
|
+
|
306
|
+
<inject>
|
307
|
+
time_format #{format}
|
308
|
+
time_type string
|
309
|
+
time_key time
|
310
|
+
</inject>
|
311
|
+
|
312
|
+
schema [
|
313
|
+
{"name": "metadata", "type": "RECORD", "fields": [
|
314
|
+
{"name": "time", "type": "INTEGER"},
|
315
|
+
{"name": "node", "type": "STRING"}
|
316
|
+
]},
|
317
|
+
{"name": "log", "type": "STRING"}
|
318
|
+
]
|
319
|
+
CONFIG
|
320
|
+
|
321
|
+
buf = nil
|
322
|
+
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
323
|
+
|
324
|
+
assert[self, expected["time"], MultiJson.load(buf)["time"]]
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
def test_format_with_schema
|
329
|
+
now = Fluent::EventTime.new(Time.now.to_i)
|
330
|
+
input = {
|
331
|
+
"request" => {
|
332
|
+
"vhost" => :bar,
|
333
|
+
"path" => "/path/to/baz",
|
334
|
+
"method" => "GET",
|
335
|
+
"protocol" => "HTTP/0.9",
|
336
|
+
"agent" => "libwww",
|
337
|
+
"referer" => "http://referer.example",
|
338
|
+
"time" => (now - 1).to_f,
|
339
|
+
"bot_access" => true,
|
340
|
+
"loginsession" => false,
|
341
|
+
},
|
342
|
+
"response" => {
|
343
|
+
"status" => "1",
|
344
|
+
"bytes" => 3.0,
|
345
|
+
},
|
346
|
+
"remote" => {
|
347
|
+
"host" => "remote.example",
|
348
|
+
"ip" => "192.0.2.1",
|
349
|
+
"port" => 12345,
|
350
|
+
"user" => "tagomoris",
|
351
|
+
},
|
352
|
+
"something-else" => "would be ignored",
|
353
|
+
"yet-another" => {
|
354
|
+
"foo" => "bar",
|
355
|
+
"baz" => 1,
|
356
|
+
},
|
357
|
+
}
|
358
|
+
expected = {
|
359
|
+
"time" => now.to_f,
|
360
|
+
"request" => {
|
361
|
+
"vhost" => "bar",
|
362
|
+
"path" => "/path/to/baz",
|
363
|
+
"method" => "GET",
|
364
|
+
"protocol" => "HTTP/0.9",
|
365
|
+
"agent" => "libwww",
|
366
|
+
"referer" => "http://referer.example",
|
367
|
+
"time" => (now - 1).to_f,
|
368
|
+
"bot_access" => true,
|
369
|
+
"loginsession" => false,
|
370
|
+
},
|
371
|
+
"remote" => {
|
372
|
+
"host" => "remote.example",
|
373
|
+
"ip" => "192.0.2.1",
|
374
|
+
"port" => 12345,
|
375
|
+
"user" => "tagomoris",
|
376
|
+
},
|
377
|
+
"response" => {
|
378
|
+
"status" => 1,
|
379
|
+
"bytes" => 3,
|
380
|
+
},
|
381
|
+
"something-else" => "would be ignored",
|
382
|
+
"yet-another" => {
|
383
|
+
"foo" => "bar",
|
384
|
+
"baz" => 1,
|
385
|
+
},
|
386
|
+
}
|
387
|
+
|
388
|
+
driver = create_driver(<<-CONFIG)
|
389
|
+
table foo
|
390
|
+
email foo@bar.example
|
391
|
+
private_key_path /path/to/key
|
392
|
+
project yourproject_id
|
393
|
+
dataset yourdataset_id
|
394
|
+
|
395
|
+
<inject>
|
396
|
+
time_format %s
|
397
|
+
time_key time
|
398
|
+
</inject>
|
399
|
+
|
400
|
+
schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
|
401
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
402
|
+
CONFIG
|
403
|
+
|
404
|
+
buf = nil
|
405
|
+
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
406
|
+
|
407
|
+
assert_equal expected, MultiJson.load(buf)
|
408
|
+
end
|
409
|
+
|
410
|
+
def test_format_repeated_field_with_schema
|
411
|
+
now = Fluent::EventTime.new(Time.now.to_i)
|
412
|
+
input = {
|
413
|
+
"tty" => nil,
|
414
|
+
"pwd" => "/home/yugui",
|
415
|
+
"user" => "fluentd",
|
416
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
417
|
+
}
|
418
|
+
expected = {
|
419
|
+
"time" => now.to_f,
|
420
|
+
"pwd" => "/home/yugui",
|
421
|
+
"user" => "fluentd",
|
422
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
423
|
+
}
|
424
|
+
|
425
|
+
driver = create_driver(<<-CONFIG)
|
426
|
+
table foo
|
427
|
+
email foo@bar.example
|
428
|
+
private_key_path /path/to/key
|
429
|
+
project yourproject_id
|
430
|
+
dataset yourdataset_id
|
431
|
+
|
432
|
+
<inject>
|
433
|
+
time_format %s
|
434
|
+
time_key time
|
435
|
+
</inject>
|
436
|
+
|
437
|
+
schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
|
438
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
439
|
+
CONFIG
|
440
|
+
|
441
|
+
buf = nil
|
442
|
+
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
443
|
+
|
444
|
+
assert_equal expected, MultiJson.load(buf)
|
445
|
+
end
|
446
|
+
|
447
|
+
def test_format_fetch_from_bigquery_api
|
448
|
+
now = Fluent::EventTime.new(Time.now.to_i)
|
449
|
+
input = {
|
450
|
+
"tty" => nil,
|
451
|
+
"pwd" => "/home/yugui",
|
452
|
+
"user" => "fluentd",
|
453
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
454
|
+
}
|
455
|
+
expected = {
|
456
|
+
"time" => now.to_i,
|
457
|
+
"pwd" => "/home/yugui",
|
458
|
+
"user" => "fluentd",
|
459
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
460
|
+
}
|
461
|
+
|
462
|
+
driver = create_driver(<<-CONFIG)
|
463
|
+
table foo
|
464
|
+
email foo@bar.example
|
465
|
+
private_key_path /path/to/key
|
466
|
+
project yourproject_id
|
467
|
+
dataset yourdataset_id
|
468
|
+
|
469
|
+
<inject>
|
470
|
+
time_format %s
|
471
|
+
time_key time
|
472
|
+
</inject>
|
473
|
+
|
474
|
+
fetch_schema true
|
475
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
476
|
+
CONFIG
|
477
|
+
|
478
|
+
stub_writer do |writer|
|
479
|
+
mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
|
480
|
+
sudo_schema_response["schema"]["fields"]
|
481
|
+
end
|
482
|
+
end
|
483
|
+
|
484
|
+
buf = nil
|
485
|
+
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
486
|
+
|
487
|
+
assert_equal expected, MultiJson.load(buf)
|
488
|
+
|
489
|
+
table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
|
490
|
+
assert table_schema["time"]
|
491
|
+
assert_equal :timestamp, table_schema["time"].type
|
492
|
+
assert_equal :required, table_schema["time"].mode
|
493
|
+
|
494
|
+
assert table_schema["tty"]
|
495
|
+
assert_equal :string, table_schema["tty"].type
|
496
|
+
assert_equal :nullable, table_schema["tty"].mode
|
497
|
+
|
498
|
+
assert table_schema["pwd"]
|
499
|
+
assert_equal :string, table_schema["pwd"].type
|
500
|
+
assert_equal :required, table_schema["pwd"].mode
|
501
|
+
|
502
|
+
assert table_schema["user"]
|
503
|
+
assert_equal :string, table_schema["user"].type
|
504
|
+
assert_equal :required, table_schema["user"].mode
|
505
|
+
|
506
|
+
assert table_schema["argv"]
|
507
|
+
assert_equal :string, table_schema["argv"].type
|
508
|
+
assert_equal :repeated, table_schema["argv"].mode
|
509
|
+
end
|
510
|
+
|
511
|
+
def test_format_fetch_from_bigquery_api_with_fetch_schema_table
|
512
|
+
now = Fluent::EventTime.new(Time.now.to_i)
|
513
|
+
input = {
|
514
|
+
"tty" => nil,
|
515
|
+
"pwd" => "/home/yugui",
|
516
|
+
"user" => "fluentd",
|
517
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
518
|
+
}
|
519
|
+
expected = {
|
520
|
+
"time" => now.to_i,
|
521
|
+
"pwd" => "/home/yugui",
|
522
|
+
"user" => "fluentd",
|
523
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
524
|
+
}
|
525
|
+
|
526
|
+
driver = create_driver(<<-CONFIG)
|
527
|
+
table foo_%Y_%m_%d
|
528
|
+
email foo@bar.example
|
529
|
+
private_key_path /path/to/key
|
530
|
+
project yourproject_id
|
531
|
+
dataset yourdataset_id
|
532
|
+
|
533
|
+
<inject>
|
534
|
+
time_format %s
|
535
|
+
time_key time
|
536
|
+
</inject>
|
537
|
+
|
538
|
+
fetch_schema true
|
539
|
+
fetch_schema_table foo
|
540
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
541
|
+
|
542
|
+
<buffer time>
|
543
|
+
timekey 1d
|
544
|
+
</buffer>
|
545
|
+
CONFIG
|
546
|
+
|
547
|
+
stub_writer do |writer|
|
548
|
+
mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
|
549
|
+
sudo_schema_response["schema"]["fields"]
|
550
|
+
end
|
551
|
+
end
|
552
|
+
|
553
|
+
buf = nil
|
554
|
+
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
555
|
+
|
556
|
+
assert_equal expected, MultiJson.load(buf)
|
557
|
+
|
558
|
+
table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
|
559
|
+
assert table_schema["time"]
|
560
|
+
assert_equal :timestamp, table_schema["time"].type
|
561
|
+
assert_equal :required, table_schema["time"].mode
|
562
|
+
|
563
|
+
assert table_schema["tty"]
|
564
|
+
assert_equal :string, table_schema["tty"].type
|
565
|
+
assert_equal :nullable, table_schema["tty"].mode
|
566
|
+
|
567
|
+
assert table_schema["pwd"]
|
568
|
+
assert_equal :string, table_schema["pwd"].type
|
569
|
+
assert_equal :required, table_schema["pwd"].mode
|
570
|
+
|
571
|
+
assert table_schema["user"]
|
572
|
+
assert_equal :string, table_schema["user"].type
|
573
|
+
assert_equal :required, table_schema["user"].mode
|
574
|
+
|
575
|
+
assert table_schema["argv"]
|
576
|
+
assert_equal :string, table_schema["argv"].type
|
577
|
+
assert_equal :repeated, table_schema["argv"].mode
|
578
|
+
end
|
579
|
+
end
|