fluent-plugin-bigquery-test 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +16 -0
- data/.gitignore +21 -0
- data/.travis.yml +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +13 -0
- data/README.md +602 -0
- data/Rakefile +12 -0
- data/fluent-plugin-bigquery.gemspec +29 -0
- data/gemfiles/activesupport-4.gemfile +6 -0
- data/lib/fluent/plugin/bigquery/errors.rb +84 -0
- data/lib/fluent/plugin/bigquery/helper.rb +33 -0
- data/lib/fluent/plugin/bigquery/schema.rb +281 -0
- data/lib/fluent/plugin/bigquery/version.rb +5 -0
- data/lib/fluent/plugin/bigquery/writer.rb +356 -0
- data/lib/fluent/plugin/out_bigquery_base.rb +221 -0
- data/lib/fluent/plugin/out_bigquery_insert.rb +125 -0
- data/lib/fluent/plugin/out_bigquery_load.rb +221 -0
- data/test/helper.rb +20 -0
- data/test/plugin/test_out_bigquery_base.rb +579 -0
- data/test/plugin/test_out_bigquery_insert.rb +544 -0
- data/test/plugin/test_out_bigquery_load.rb +348 -0
- data/test/plugin/test_record_schema.rb +186 -0
- data/test/plugin/testdata/apache.schema +98 -0
- data/test/plugin/testdata/json_key.json +7 -0
- data/test/plugin/testdata/sudo.schema +27 -0
- data/test/run_test.rb +9 -0
- metadata +197 -0
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'fluent/plugin/out_bigquery_base'
|
2
|
+
|
3
|
+
module Fluent
|
4
|
+
module Plugin
|
5
|
+
class BigQueryInsertOutput < BigQueryBaseOutput
|
6
|
+
Fluent::Plugin.register_output('bigquery_insert', self)
|
7
|
+
|
8
|
+
helpers :record_accessor
|
9
|
+
|
10
|
+
# template_suffix (only insert)
|
11
|
+
# https://cloud.google.com/bigquery/streaming-data-into-bigquery#template_table_details
|
12
|
+
config_param :template_suffix, :string, default: nil
|
13
|
+
|
14
|
+
# skip_invalid_rows (only insert)
|
15
|
+
# Insert all valid rows of a request, even if invalid rows exist.
|
16
|
+
# The default value is false, which causes the entire request to fail if any invalid rows exist.
|
17
|
+
config_param :skip_invalid_rows, :bool, default: false
|
18
|
+
|
19
|
+
# insert_id_field (only insert)
|
20
|
+
config_param :insert_id_field, :string, default: nil
|
21
|
+
|
22
|
+
# add_insert_timestamp (only insert)
|
23
|
+
# adds a timestamp just before sending the rows to bigquery, so that
|
24
|
+
# buffering time is not taken into account. Gives a field in bigquery
|
25
|
+
# which represents the insert time of the row.
|
26
|
+
config_param :add_insert_timestamp, :string, default: nil
|
27
|
+
|
28
|
+
# allow_retry_insert_errors (only insert)
|
29
|
+
# If insert_id_field is not specified, true means to allow duplicate rows
|
30
|
+
config_param :allow_retry_insert_errors, :bool, default: false
|
31
|
+
|
32
|
+
## Buffer
|
33
|
+
config_section :buffer do
|
34
|
+
config_set_default :@type, "memory"
|
35
|
+
config_set_default :flush_mode, :interval
|
36
|
+
config_set_default :flush_interval, 1
|
37
|
+
config_set_default :flush_thread_interval, 0.05
|
38
|
+
config_set_default :flush_thread_burst_interval, 0.05
|
39
|
+
config_set_default :chunk_limit_size, 1 * 1024 ** 2 # 1MB
|
40
|
+
config_set_default :total_limit_size, 1 * 1024 ** 3 # 1GB
|
41
|
+
config_set_default :chunk_limit_records, 500
|
42
|
+
end
|
43
|
+
|
44
|
+
def configure(conf)
|
45
|
+
super
|
46
|
+
|
47
|
+
if @insert_id_field
|
48
|
+
if @insert_id_field !~ /^\$[\[\.]/ && @insert_id_field =~ /\./
|
49
|
+
warn "[BREAKING CHANGE] insert_id_field format is changed. Use fluentd record_accessor helper. (https://docs.fluentd.org/v1.0/articles/api-plugin-helper-record_accessor)"
|
50
|
+
end
|
51
|
+
@get_insert_id = record_accessor_create(@insert_id_field)
|
52
|
+
end
|
53
|
+
|
54
|
+
formatter_config = conf.elements("format")[0]
|
55
|
+
if formatter_config && formatter_config['@type'] != "json"
|
56
|
+
raise ConfigError, "`bigquery_insert` supports only json formatter."
|
57
|
+
end
|
58
|
+
@formatter = formatter_create(usage: 'out_bigquery_for_insert', type: 'json', conf: formatter_config)
|
59
|
+
|
60
|
+
placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}/template_suffix=#{@template_suffix}"
|
61
|
+
placeholder_validate!(:bigquery_insert, placeholder_params)
|
62
|
+
end
|
63
|
+
|
64
|
+
# for Fluent::Plugin::Output#implement? method
|
65
|
+
def format(tag, time, record)
|
66
|
+
super
|
67
|
+
end
|
68
|
+
|
69
|
+
def write(chunk)
|
70
|
+
table_format = @tables_mutex.synchronize do
|
71
|
+
t = @tables_queue.shift
|
72
|
+
@tables_queue.push t
|
73
|
+
t
|
74
|
+
end
|
75
|
+
|
76
|
+
now = Time.now.utc.strftime("%Y-%m-%d %H:%M:%S.%6N") if @add_insert_timestamp
|
77
|
+
|
78
|
+
rows = chunk.open do |io|
|
79
|
+
io.map do |line|
|
80
|
+
record = MultiJson.load(line)
|
81
|
+
record[@add_insert_timestamp] = now if @add_insert_timestamp
|
82
|
+
row = {"json" => record}
|
83
|
+
row["insert_id"] = @get_insert_id.call(record) if @get_insert_id
|
84
|
+
Fluent::BigQuery::Helper.deep_symbolize_keys(row)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
metadata = chunk.metadata
|
89
|
+
project = extract_placeholders(@project, metadata)
|
90
|
+
dataset = extract_placeholders(@dataset, metadata)
|
91
|
+
table_id = extract_placeholders(table_format, metadata)
|
92
|
+
template_suffix = @template_suffix ? extract_placeholders(@template_suffix, metadata) : nil
|
93
|
+
schema = get_schema(project, dataset, metadata)
|
94
|
+
|
95
|
+
insert(project, dataset, table_id, rows, schema, template_suffix)
|
96
|
+
end
|
97
|
+
|
98
|
+
def insert(project, dataset, table_id, rows, schema, template_suffix)
|
99
|
+
writer.insert_rows(project, dataset, table_id, rows, schema, template_suffix: template_suffix)
|
100
|
+
rescue Fluent::BigQuery::Error => e
|
101
|
+
raise if e.retryable?
|
102
|
+
|
103
|
+
if @secondary
|
104
|
+
# TODO: find better way
|
105
|
+
@retry = retry_state_create(
|
106
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
107
|
+
forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
108
|
+
max_interval: @buffer_config.retry_max_interval,
|
109
|
+
secondary: true, secondary_threshold: Float::EPSILON,
|
110
|
+
randomize: @buffer_config.retry_randomize
|
111
|
+
)
|
112
|
+
else
|
113
|
+
@retry = retry_state_create(
|
114
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
115
|
+
forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
116
|
+
max_interval: @buffer_config.retry_max_interval,
|
117
|
+
randomize: @buffer_config.retry_randomize
|
118
|
+
)
|
119
|
+
end
|
120
|
+
|
121
|
+
raise
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -0,0 +1,221 @@
|
|
1
|
+
require 'fluent/plugin/out_bigquery_base'
|
2
|
+
|
3
|
+
module Fluent
|
4
|
+
module Plugin
|
5
|
+
class BigQueryLoadOutput < BigQueryBaseOutput
|
6
|
+
Fluent::Plugin.register_output('bigquery_load', self)
|
7
|
+
|
8
|
+
helpers :timer
|
9
|
+
|
10
|
+
config_param :source_format, :enum, list: [:json, :avro, :csv], default: :json
|
11
|
+
|
12
|
+
# max_bad_records (only load)
|
13
|
+
# The maximum number of bad records that BigQuery can ignore when running the job.
|
14
|
+
# If the number of bad records exceeds this value, an invalid error is returned in the job result.
|
15
|
+
# The default value is 0, which requires that all records are valid.
|
16
|
+
config_param :max_bad_records, :integer, default: 0
|
17
|
+
|
18
|
+
# prevent_duplicate_load (only load)
|
19
|
+
config_param :prevent_duplicate_load, :bool, default: false
|
20
|
+
|
21
|
+
config_param :use_delayed_commit, :bool, default: true
|
22
|
+
config_param :wait_job_interval, :time, default: 3
|
23
|
+
|
24
|
+
## Buffer
|
25
|
+
config_section :buffer do
|
26
|
+
config_set_default :@type, "file"
|
27
|
+
config_set_default :flush_mode, :interval
|
28
|
+
config_set_default :flush_interval, 3600 # 1h
|
29
|
+
config_set_default :flush_thread_interval, 5
|
30
|
+
config_set_default :flush_thread_burst_interval, 5
|
31
|
+
config_set_default :chunk_limit_size, 1 * 1024 ** 3 # 1GB
|
32
|
+
config_set_default :total_limit_size, 32 * 1024 ** 3 # 32GB
|
33
|
+
|
34
|
+
config_set_default :delayed_commit_timeout, 1800 # 30m
|
35
|
+
end
|
36
|
+
|
37
|
+
def configure(conf)
|
38
|
+
super
|
39
|
+
|
40
|
+
placeholder_params = "project=#{@project}/dataset=#{@dataset}/table=#{@tablelist.join(",")}/fetch_schema_table=#{@fetch_schema_table}"
|
41
|
+
placeholder_validate!(:bigquery_load, placeholder_params)
|
42
|
+
end
|
43
|
+
|
44
|
+
def start
|
45
|
+
super
|
46
|
+
|
47
|
+
if prefer_delayed_commit
|
48
|
+
@polling_targets = []
|
49
|
+
@polling_mutex = Mutex.new
|
50
|
+
log.debug("start load job polling")
|
51
|
+
timer_execute(:polling_bigquery_load_job, @wait_job_interval, &method(:poll))
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def prefer_delayed_commit
|
56
|
+
@use_delayed_commit
|
57
|
+
end
|
58
|
+
|
59
|
+
# for Fluent::Plugin::Output#implement? method
|
60
|
+
def format(tag, time, record)
|
61
|
+
super
|
62
|
+
end
|
63
|
+
|
64
|
+
def write(chunk)
|
65
|
+
job_reference = do_write(chunk)
|
66
|
+
|
67
|
+
until response = writer.fetch_load_job(job_reference)
|
68
|
+
sleep @wait_job_interval
|
69
|
+
end
|
70
|
+
|
71
|
+
writer.commit_load_job(job_reference.chunk_id_hex, response)
|
72
|
+
rescue Fluent::BigQuery::Error => e
|
73
|
+
raise if e.retryable?
|
74
|
+
|
75
|
+
@retry_mutex.synchronize do
|
76
|
+
if @secondary
|
77
|
+
# TODO: find better way
|
78
|
+
@retry = retry_state_create(
|
79
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
80
|
+
forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
81
|
+
max_interval: @buffer_config.retry_max_interval,
|
82
|
+
secondary: true, secondary_threshold: Float::EPSILON,
|
83
|
+
randomize: @buffer_config.retry_randomize
|
84
|
+
)
|
85
|
+
else
|
86
|
+
@retry = retry_state_create(
|
87
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
88
|
+
forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
89
|
+
max_interval: @buffer_config.retry_max_interval,
|
90
|
+
randomize: @buffer_config.retry_randomize
|
91
|
+
)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
raise
|
96
|
+
end
|
97
|
+
|
98
|
+
def try_write(chunk)
|
99
|
+
job_reference = do_write(chunk)
|
100
|
+
@polling_mutex.synchronize do
|
101
|
+
@polling_targets << job_reference
|
102
|
+
end
|
103
|
+
rescue Fluent::BigQuery::Error => e
|
104
|
+
raise if e.retryable?
|
105
|
+
|
106
|
+
@retry_mutex.synchronize do
|
107
|
+
if @secondary
|
108
|
+
# TODO: find better way
|
109
|
+
@retry = retry_state_create(
|
110
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
111
|
+
forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
112
|
+
max_interval: @buffer_config.retry_max_interval,
|
113
|
+
secondary: true, secondary_threshold: Float::EPSILON,
|
114
|
+
randomize: @buffer_config.retry_randomize
|
115
|
+
)
|
116
|
+
else
|
117
|
+
@retry = retry_state_create(
|
118
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
119
|
+
forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
120
|
+
max_interval: @buffer_config.retry_max_interval,
|
121
|
+
randomize: @buffer_config.retry_randomize
|
122
|
+
)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
raise
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
def do_write(chunk)
|
132
|
+
table_format = @tables_mutex.synchronize do
|
133
|
+
t = @tables_queue.shift
|
134
|
+
@tables_queue.push t
|
135
|
+
t
|
136
|
+
end
|
137
|
+
|
138
|
+
metadata = chunk.metadata
|
139
|
+
project = extract_placeholders(@project, metadata)
|
140
|
+
dataset = extract_placeholders(@dataset, metadata)
|
141
|
+
table_id = extract_placeholders(table_format, metadata)
|
142
|
+
schema = get_schema(project, dataset, metadata)
|
143
|
+
|
144
|
+
create_upload_source(chunk) do |upload_source|
|
145
|
+
writer.create_load_job(chunk.unique_id, dump_unique_id_hex(chunk.unique_id), project, dataset, table_id, upload_source, schema)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def poll
|
150
|
+
job_reference = @polling_mutex.synchronize do
|
151
|
+
@polling_targets.shift
|
152
|
+
end
|
153
|
+
return unless job_reference
|
154
|
+
|
155
|
+
begin
|
156
|
+
response = writer.fetch_load_job(job_reference)
|
157
|
+
if response
|
158
|
+
writer.commit_load_job(job_reference.chunk_id_hex, response)
|
159
|
+
commit_write(job_reference.chunk_id)
|
160
|
+
log.debug("commit chunk", chunk: job_reference.chunk_id_hex, **job_reference.as_hash(:job_id, :project_id, :dataset_id, :table_id))
|
161
|
+
else
|
162
|
+
@polling_mutex.synchronize do
|
163
|
+
@polling_targets << job_reference
|
164
|
+
end
|
165
|
+
end
|
166
|
+
rescue Fluent::BigQuery::Error => e
|
167
|
+
# RetryableError comes from only `commit_load_job`
|
168
|
+
# if error is retryable, takeback chunk and do next `try_flush`
|
169
|
+
# if error is not retryable, create custom retry_state and takeback chunk do next `try_flush`
|
170
|
+
if e.retryable?
|
171
|
+
log.warn("failed to poll load job", error: e, chunk: job_reference.chunk_id_hex, **job_reference.as_hash(:job_id, :project_id, :dataset_id, :table_id))
|
172
|
+
else
|
173
|
+
log.error("failed to poll load job", error: e, chunk: job_reference.chunk_id_hex, **job_reference.as_hash(:job_id, :project_id, :dataset_id, :table_id))
|
174
|
+
@retry_mutex.synchronize do
|
175
|
+
if @secondary
|
176
|
+
# TODO: find better way
|
177
|
+
@retry = retry_state_create(
|
178
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
179
|
+
forever: false, max_steps: @buffer_config.retry_max_times, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
180
|
+
max_interval: @buffer_config.retry_max_interval,
|
181
|
+
secondary: true, secondary_threshold: Float::EPSILON,
|
182
|
+
randomize: @buffer_config.retry_randomize
|
183
|
+
)
|
184
|
+
else
|
185
|
+
@retry = retry_state_create(
|
186
|
+
:output_retries, @buffer_config.retry_type, @buffer_config.retry_wait, @buffer_config.retry_timeout,
|
187
|
+
forever: false, max_steps: 0, backoff_base: @buffer_config.retry_exponential_backoff_base,
|
188
|
+
max_interval: @buffer_config.retry_max_interval,
|
189
|
+
randomize: @buffer_config.retry_randomize
|
190
|
+
)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
rollback_write(job_reference.chunk_id)
|
196
|
+
rescue => e
|
197
|
+
log.error("unexpected error while polling", error: e)
|
198
|
+
log.error_backtrace
|
199
|
+
rollback_write(job_reference.chunk_id)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def create_upload_source(chunk)
|
204
|
+
chunk_is_file = @buffer_config["@type"] == 'file'
|
205
|
+
if chunk_is_file
|
206
|
+
File.open(chunk.path) do |file|
|
207
|
+
yield file
|
208
|
+
end
|
209
|
+
else
|
210
|
+
Tempfile.open("chunk-tmp") do |file|
|
211
|
+
file.binmode
|
212
|
+
chunk.write_to(file)
|
213
|
+
file.sync
|
214
|
+
file.rewind
|
215
|
+
yield file
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'bundler/setup'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
$LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
|
5
|
+
$LOAD_PATH.unshift(__dir__)
|
6
|
+
require 'fluent/test'
|
7
|
+
|
8
|
+
require 'fluent/plugin/buffer'
|
9
|
+
require 'fluent/plugin/buf_memory'
|
10
|
+
require 'fluent/plugin/buf_file'
|
11
|
+
require 'fluent/test/driver/output'
|
12
|
+
|
13
|
+
require 'fluent/plugin/out_bigquery_base'
|
14
|
+
require 'fluent/plugin/out_bigquery_insert'
|
15
|
+
require 'fluent/plugin/out_bigquery_load'
|
16
|
+
require 'google/apis/bigquery_v2'
|
17
|
+
require 'google/api_client/auth/key_utils'
|
18
|
+
require 'googleauth'
|
19
|
+
|
20
|
+
require 'test/unit/rr'
|
@@ -0,0 +1,579 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class BigQueryBaseOutputTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
table foo
|
10
|
+
email foo@bar.example
|
11
|
+
private_key_path /path/to/key
|
12
|
+
project yourproject_id
|
13
|
+
dataset yourdataset_id
|
14
|
+
|
15
|
+
<inject>
|
16
|
+
time_format %s
|
17
|
+
time_key time
|
18
|
+
</inject>
|
19
|
+
|
20
|
+
schema [
|
21
|
+
{"name": "time", "type": "INTEGER"},
|
22
|
+
{"name": "status", "type": "INTEGER"},
|
23
|
+
{"name": "bytes", "type": "INTEGER"},
|
24
|
+
{"name": "vhost", "type": "STRING"},
|
25
|
+
{"name": "path", "type": "STRING"},
|
26
|
+
{"name": "method", "type": "STRING"},
|
27
|
+
{"name": "protocol", "type": "STRING"},
|
28
|
+
{"name": "agent", "type": "STRING"},
|
29
|
+
{"name": "referer", "type": "STRING"},
|
30
|
+
{"name": "remote", "type": "RECORD", "fields": [
|
31
|
+
{"name": "host", "type": "STRING"},
|
32
|
+
{"name": "ip", "type": "STRING"},
|
33
|
+
{"name": "user", "type": "STRING"}
|
34
|
+
]},
|
35
|
+
{"name": "requesttime", "type": "FLOAT"},
|
36
|
+
{"name": "bot_access", "type": "BOOLEAN"},
|
37
|
+
{"name": "loginsession", "type": "BOOLEAN"}
|
38
|
+
]
|
39
|
+
]
|
40
|
+
|
41
|
+
API_SCOPE = "https://www.googleapis.com/auth/bigquery"
|
42
|
+
|
43
|
+
def create_driver(conf = CONFIG)
|
44
|
+
Fluent::Test::Driver::Output.new(Fluent::Plugin::BigQueryBaseOutput).configure(conf)
|
45
|
+
end
|
46
|
+
|
47
|
+
def stub_writer(stub_auth: true)
|
48
|
+
stub.proxy(Fluent::BigQuery::Writer).new.with_any_args do |writer|
|
49
|
+
stub(writer).get_auth { nil } if stub_auth
|
50
|
+
yield writer
|
51
|
+
writer
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private def sudo_schema_response
|
56
|
+
{
|
57
|
+
"schema" => {
|
58
|
+
"fields" => [
|
59
|
+
{
|
60
|
+
"name" => "time",
|
61
|
+
"type" => "TIMESTAMP",
|
62
|
+
"mode" => "REQUIRED"
|
63
|
+
},
|
64
|
+
{
|
65
|
+
"name" => "tty",
|
66
|
+
"type" => "STRING",
|
67
|
+
"mode" => "NULLABLE"
|
68
|
+
},
|
69
|
+
{
|
70
|
+
"name" => "pwd",
|
71
|
+
"type" => "STRING",
|
72
|
+
"mode" => "REQUIRED"
|
73
|
+
},
|
74
|
+
{
|
75
|
+
"name" => "user",
|
76
|
+
"type" => "STRING",
|
77
|
+
"mode" => "REQUIRED"
|
78
|
+
},
|
79
|
+
{
|
80
|
+
"name" => "argv",
|
81
|
+
"type" => "STRING",
|
82
|
+
"mode" => "REPEATED"
|
83
|
+
}
|
84
|
+
]
|
85
|
+
}
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_configure_table
|
90
|
+
driver = create_driver
|
91
|
+
assert_equal driver.instance.table, 'foo'
|
92
|
+
assert_nil driver.instance.tables
|
93
|
+
|
94
|
+
driver = create_driver(CONFIG.sub(/\btable\s+.*$/, 'tables foo,bar'))
|
95
|
+
assert_nil driver.instance.table
|
96
|
+
assert_equal driver.instance.tables, ['foo' ,'bar']
|
97
|
+
|
98
|
+
assert_raise(Fluent::ConfigError, "'table' or 'tables' must be specified, and both are invalid") {
|
99
|
+
create_driver(CONFIG + "tables foo,bar")
|
100
|
+
}
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_configure_auth_private_key
|
104
|
+
driver = create_driver
|
105
|
+
stub_writer(stub_auth: false) do |writer|
|
106
|
+
mock(writer).get_auth_from_private_key { stub! }
|
107
|
+
end
|
108
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_configure_auth_compute_engine
|
112
|
+
driver = create_driver(%[
|
113
|
+
table foo
|
114
|
+
auth_method compute_engine
|
115
|
+
project yourproject_id
|
116
|
+
dataset yourdataset_id
|
117
|
+
schema [
|
118
|
+
{"name": "time", "type": "INTEGER"},
|
119
|
+
{"name": "status", "type": "INTEGER"},
|
120
|
+
{"name": "bytes", "type": "INTEGER"}
|
121
|
+
]
|
122
|
+
])
|
123
|
+
|
124
|
+
stub_writer(stub_auth: false) do |writer|
|
125
|
+
mock(writer).get_auth_from_compute_engine { stub! }
|
126
|
+
end
|
127
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_configure_auth_json_key_as_file
|
131
|
+
driver = create_driver(%[
|
132
|
+
table foo
|
133
|
+
auth_method json_key
|
134
|
+
json_key jsonkey.josn
|
135
|
+
project yourproject_id
|
136
|
+
dataset yourdataset_id
|
137
|
+
schema [
|
138
|
+
{"name": "time", "type": "INTEGER"},
|
139
|
+
{"name": "status", "type": "INTEGER"},
|
140
|
+
{"name": "bytes", "type": "INTEGER"}
|
141
|
+
]
|
142
|
+
])
|
143
|
+
|
144
|
+
stub_writer(stub_auth: false) do |writer|
|
145
|
+
mock(writer).get_auth_from_json_key { stub! }
|
146
|
+
end
|
147
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
148
|
+
end
|
149
|
+
|
150
|
+
def test_configure_auth_json_key_as_file_raise_permission_error
|
151
|
+
json_key_path = 'test/plugin/testdata/json_key.json'
|
152
|
+
json_key_path_dir = File.dirname(json_key_path)
|
153
|
+
|
154
|
+
begin
|
155
|
+
File.chmod(0000, json_key_path_dir)
|
156
|
+
|
157
|
+
driver = create_driver(%[
|
158
|
+
table foo
|
159
|
+
auth_method json_key
|
160
|
+
json_key #{json_key_path}
|
161
|
+
project yourproject_id
|
162
|
+
dataset yourdataset_id
|
163
|
+
schema [
|
164
|
+
{"name": "time", "type": "INTEGER"},
|
165
|
+
{"name": "status", "type": "INTEGER"},
|
166
|
+
{"name": "bytes", "type": "INTEGER"}
|
167
|
+
]
|
168
|
+
])
|
169
|
+
assert_raises(Errno::EACCES) do
|
170
|
+
driver.instance.writer.client
|
171
|
+
end
|
172
|
+
ensure
|
173
|
+
File.chmod(0755, json_key_path_dir)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_configure_auth_json_key_as_string
|
178
|
+
json_key = '{"private_key": "X", "client_email": "' + 'x' * 255 + '@developer.gserviceaccount.com"}'
|
179
|
+
json_key_io = StringIO.new(json_key)
|
180
|
+
authorization = Object.new
|
181
|
+
stub(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: satisfy {|arg| JSON.parse(arg.read) == JSON.parse(json_key_io.read) }, scope: API_SCOPE) { authorization }
|
182
|
+
|
183
|
+
driver = create_driver(%[
|
184
|
+
table foo
|
185
|
+
auth_method json_key
|
186
|
+
json_key #{json_key}
|
187
|
+
project yourproject_id
|
188
|
+
dataset yourdataset_id
|
189
|
+
schema [
|
190
|
+
{"name": "time", "type": "INTEGER"},
|
191
|
+
{"name": "status", "type": "INTEGER"},
|
192
|
+
{"name": "bytes", "type": "INTEGER"}
|
193
|
+
]
|
194
|
+
])
|
195
|
+
stub_writer(stub_auth: false) do |writer|
|
196
|
+
mock.proxy(writer).get_auth_from_json_key { stub! }
|
197
|
+
end
|
198
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
199
|
+
end
|
200
|
+
|
201
|
+
def test_configure_auth_application_default
|
202
|
+
driver = create_driver(%[
|
203
|
+
table foo
|
204
|
+
auth_method application_default
|
205
|
+
project yourproject_id
|
206
|
+
dataset yourdataset_id
|
207
|
+
schema [
|
208
|
+
{"name": "time", "type": "INTEGER"},
|
209
|
+
{"name": "status", "type": "INTEGER"},
|
210
|
+
{"name": "bytes", "type": "INTEGER"}
|
211
|
+
]
|
212
|
+
])
|
213
|
+
|
214
|
+
stub_writer(stub_auth: false) do |writer|
|
215
|
+
mock.proxy(writer).get_auth_from_application_default { stub! }
|
216
|
+
end
|
217
|
+
assert driver.instance.writer.client.is_a?(Google::Apis::BigqueryV2::BigqueryService)
|
218
|
+
end
|
219
|
+
|
220
|
+
def test_format
|
221
|
+
now = Fluent::EventTime.new(Time.now.to_i)
|
222
|
+
input = {
|
223
|
+
"status" => "1",
|
224
|
+
"bytes" => 3.0,
|
225
|
+
"vhost" => :bar,
|
226
|
+
"path" => "/path/to/baz",
|
227
|
+
"method" => "GET",
|
228
|
+
"protocol" => "HTTP/0.9",
|
229
|
+
"agent" => "libwww",
|
230
|
+
"referer" => "http://referer.example",
|
231
|
+
"requesttime" => (now - 1).to_f.to_s,
|
232
|
+
"bot_access" => true,
|
233
|
+
"loginsession" => false,
|
234
|
+
"something-else" => "would be ignored",
|
235
|
+
"yet-another" => {
|
236
|
+
"foo" => "bar",
|
237
|
+
"baz" => 1,
|
238
|
+
},
|
239
|
+
"remote" => {
|
240
|
+
"host" => "remote.example",
|
241
|
+
"ip" => "192.0.2.1",
|
242
|
+
"port" => 12345,
|
243
|
+
"user" => "tagomoris",
|
244
|
+
}
|
245
|
+
}
|
246
|
+
expected = {
|
247
|
+
"time" => now.to_i,
|
248
|
+
"status" => 1,
|
249
|
+
"bytes" => 3,
|
250
|
+
"vhost" => "bar",
|
251
|
+
"path" => "/path/to/baz",
|
252
|
+
"method" => "GET",
|
253
|
+
"protocol" => "HTTP/0.9",
|
254
|
+
"agent" => "libwww",
|
255
|
+
"referer" => "http://referer.example",
|
256
|
+
"requesttime" => (now - 1).to_f.to_s.to_f,
|
257
|
+
"bot_access" => true,
|
258
|
+
"loginsession" => false,
|
259
|
+
"something-else" => "would be ignored",
|
260
|
+
"yet-another" => {
|
261
|
+
"foo" => "bar",
|
262
|
+
"baz" => 1,
|
263
|
+
},
|
264
|
+
"remote" => {
|
265
|
+
"host" => "remote.example",
|
266
|
+
"ip" => "192.0.2.1",
|
267
|
+
"port" => 12345,
|
268
|
+
"user" => "tagomoris",
|
269
|
+
}
|
270
|
+
}
|
271
|
+
|
272
|
+
driver = create_driver(CONFIG)
|
273
|
+
buf = nil
|
274
|
+
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
275
|
+
|
276
|
+
assert_equal expected, MultiJson.load(buf)
|
277
|
+
end
|
278
|
+
|
279
|
+
[
|
280
|
+
# <time_format>, <time field type>, <time expectation generator>, <assertion>
|
281
|
+
[
|
282
|
+
"%s.%6N",
|
283
|
+
lambda{|t| t.strftime("%s.%6N").to_f },
|
284
|
+
lambda{|recv, expected, actual|
|
285
|
+
recv.assert_in_delta(expected, actual, Float::EPSILON / 10**3)
|
286
|
+
}
|
287
|
+
],
|
288
|
+
[
|
289
|
+
"%Y-%m-%dT%H:%M:%S%:z",
|
290
|
+
lambda{|t| t.iso8601 },
|
291
|
+
:assert_equal.to_proc
|
292
|
+
],
|
293
|
+
].each do |format, expect_time, assert|
|
294
|
+
define_method("test_time_formats_#{format}") do
|
295
|
+
now = Fluent::Engine.now
|
296
|
+
input = {}
|
297
|
+
expected = { "time" => expect_time[Time.at(now.to_r)] }
|
298
|
+
|
299
|
+
driver = create_driver(<<-CONFIG)
|
300
|
+
table foo
|
301
|
+
email foo@bar.example
|
302
|
+
private_key_path /path/to/key
|
303
|
+
project yourproject_id
|
304
|
+
dataset yourdataset_id
|
305
|
+
|
306
|
+
<inject>
|
307
|
+
time_format #{format}
|
308
|
+
time_type string
|
309
|
+
time_key time
|
310
|
+
</inject>
|
311
|
+
|
312
|
+
schema [
|
313
|
+
{"name": "metadata", "type": "RECORD", "fields": [
|
314
|
+
{"name": "time", "type": "INTEGER"},
|
315
|
+
{"name": "node", "type": "STRING"}
|
316
|
+
]},
|
317
|
+
{"name": "log", "type": "STRING"}
|
318
|
+
]
|
319
|
+
CONFIG
|
320
|
+
|
321
|
+
buf = nil
|
322
|
+
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
323
|
+
|
324
|
+
assert[self, expected["time"], MultiJson.load(buf)["time"]]
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
def test_format_with_schema
|
329
|
+
now = Fluent::EventTime.new(Time.now.to_i)
|
330
|
+
input = {
|
331
|
+
"request" => {
|
332
|
+
"vhost" => :bar,
|
333
|
+
"path" => "/path/to/baz",
|
334
|
+
"method" => "GET",
|
335
|
+
"protocol" => "HTTP/0.9",
|
336
|
+
"agent" => "libwww",
|
337
|
+
"referer" => "http://referer.example",
|
338
|
+
"time" => (now - 1).to_f,
|
339
|
+
"bot_access" => true,
|
340
|
+
"loginsession" => false,
|
341
|
+
},
|
342
|
+
"response" => {
|
343
|
+
"status" => "1",
|
344
|
+
"bytes" => 3.0,
|
345
|
+
},
|
346
|
+
"remote" => {
|
347
|
+
"host" => "remote.example",
|
348
|
+
"ip" => "192.0.2.1",
|
349
|
+
"port" => 12345,
|
350
|
+
"user" => "tagomoris",
|
351
|
+
},
|
352
|
+
"something-else" => "would be ignored",
|
353
|
+
"yet-another" => {
|
354
|
+
"foo" => "bar",
|
355
|
+
"baz" => 1,
|
356
|
+
},
|
357
|
+
}
|
358
|
+
expected = {
|
359
|
+
"time" => now.to_f,
|
360
|
+
"request" => {
|
361
|
+
"vhost" => "bar",
|
362
|
+
"path" => "/path/to/baz",
|
363
|
+
"method" => "GET",
|
364
|
+
"protocol" => "HTTP/0.9",
|
365
|
+
"agent" => "libwww",
|
366
|
+
"referer" => "http://referer.example",
|
367
|
+
"time" => (now - 1).to_f,
|
368
|
+
"bot_access" => true,
|
369
|
+
"loginsession" => false,
|
370
|
+
},
|
371
|
+
"remote" => {
|
372
|
+
"host" => "remote.example",
|
373
|
+
"ip" => "192.0.2.1",
|
374
|
+
"port" => 12345,
|
375
|
+
"user" => "tagomoris",
|
376
|
+
},
|
377
|
+
"response" => {
|
378
|
+
"status" => 1,
|
379
|
+
"bytes" => 3,
|
380
|
+
},
|
381
|
+
"something-else" => "would be ignored",
|
382
|
+
"yet-another" => {
|
383
|
+
"foo" => "bar",
|
384
|
+
"baz" => 1,
|
385
|
+
},
|
386
|
+
}
|
387
|
+
|
388
|
+
driver = create_driver(<<-CONFIG)
|
389
|
+
table foo
|
390
|
+
email foo@bar.example
|
391
|
+
private_key_path /path/to/key
|
392
|
+
project yourproject_id
|
393
|
+
dataset yourdataset_id
|
394
|
+
|
395
|
+
<inject>
|
396
|
+
time_format %s
|
397
|
+
time_key time
|
398
|
+
</inject>
|
399
|
+
|
400
|
+
schema_path #{File.join(File.dirname(__FILE__), "testdata", "apache.schema")}
|
401
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
402
|
+
CONFIG
|
403
|
+
|
404
|
+
buf = nil
|
405
|
+
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
406
|
+
|
407
|
+
assert_equal expected, MultiJson.load(buf)
|
408
|
+
end
|
409
|
+
|
410
|
+
def test_format_repeated_field_with_schema
|
411
|
+
now = Fluent::EventTime.new(Time.now.to_i)
|
412
|
+
input = {
|
413
|
+
"tty" => nil,
|
414
|
+
"pwd" => "/home/yugui",
|
415
|
+
"user" => "fluentd",
|
416
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
417
|
+
}
|
418
|
+
expected = {
|
419
|
+
"time" => now.to_f,
|
420
|
+
"pwd" => "/home/yugui",
|
421
|
+
"user" => "fluentd",
|
422
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
423
|
+
}
|
424
|
+
|
425
|
+
driver = create_driver(<<-CONFIG)
|
426
|
+
table foo
|
427
|
+
email foo@bar.example
|
428
|
+
private_key_path /path/to/key
|
429
|
+
project yourproject_id
|
430
|
+
dataset yourdataset_id
|
431
|
+
|
432
|
+
<inject>
|
433
|
+
time_format %s
|
434
|
+
time_key time
|
435
|
+
</inject>
|
436
|
+
|
437
|
+
schema_path #{File.join(File.dirname(__FILE__), "testdata", "sudo.schema")}
|
438
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
439
|
+
CONFIG
|
440
|
+
|
441
|
+
buf = nil
|
442
|
+
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
443
|
+
|
444
|
+
assert_equal expected, MultiJson.load(buf)
|
445
|
+
end
|
446
|
+
|
447
|
+
def test_format_fetch_from_bigquery_api
|
448
|
+
now = Fluent::EventTime.new(Time.now.to_i)
|
449
|
+
input = {
|
450
|
+
"tty" => nil,
|
451
|
+
"pwd" => "/home/yugui",
|
452
|
+
"user" => "fluentd",
|
453
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
454
|
+
}
|
455
|
+
expected = {
|
456
|
+
"time" => now.to_i,
|
457
|
+
"pwd" => "/home/yugui",
|
458
|
+
"user" => "fluentd",
|
459
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
460
|
+
}
|
461
|
+
|
462
|
+
driver = create_driver(<<-CONFIG)
|
463
|
+
table foo
|
464
|
+
email foo@bar.example
|
465
|
+
private_key_path /path/to/key
|
466
|
+
project yourproject_id
|
467
|
+
dataset yourdataset_id
|
468
|
+
|
469
|
+
<inject>
|
470
|
+
time_format %s
|
471
|
+
time_key time
|
472
|
+
</inject>
|
473
|
+
|
474
|
+
fetch_schema true
|
475
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
476
|
+
CONFIG
|
477
|
+
|
478
|
+
stub_writer do |writer|
|
479
|
+
mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
|
480
|
+
sudo_schema_response["schema"]["fields"]
|
481
|
+
end
|
482
|
+
end
|
483
|
+
|
484
|
+
buf = nil
|
485
|
+
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
486
|
+
|
487
|
+
assert_equal expected, MultiJson.load(buf)
|
488
|
+
|
489
|
+
table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
|
490
|
+
assert table_schema["time"]
|
491
|
+
assert_equal :timestamp, table_schema["time"].type
|
492
|
+
assert_equal :required, table_schema["time"].mode
|
493
|
+
|
494
|
+
assert table_schema["tty"]
|
495
|
+
assert_equal :string, table_schema["tty"].type
|
496
|
+
assert_equal :nullable, table_schema["tty"].mode
|
497
|
+
|
498
|
+
assert table_schema["pwd"]
|
499
|
+
assert_equal :string, table_schema["pwd"].type
|
500
|
+
assert_equal :required, table_schema["pwd"].mode
|
501
|
+
|
502
|
+
assert table_schema["user"]
|
503
|
+
assert_equal :string, table_schema["user"].type
|
504
|
+
assert_equal :required, table_schema["user"].mode
|
505
|
+
|
506
|
+
assert table_schema["argv"]
|
507
|
+
assert_equal :string, table_schema["argv"].type
|
508
|
+
assert_equal :repeated, table_schema["argv"].mode
|
509
|
+
end
|
510
|
+
|
511
|
+
def test_format_fetch_from_bigquery_api_with_fetch_schema_table
|
512
|
+
now = Fluent::EventTime.new(Time.now.to_i)
|
513
|
+
input = {
|
514
|
+
"tty" => nil,
|
515
|
+
"pwd" => "/home/yugui",
|
516
|
+
"user" => "fluentd",
|
517
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
518
|
+
}
|
519
|
+
expected = {
|
520
|
+
"time" => now.to_i,
|
521
|
+
"pwd" => "/home/yugui",
|
522
|
+
"user" => "fluentd",
|
523
|
+
"argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
|
524
|
+
}
|
525
|
+
|
526
|
+
driver = create_driver(<<-CONFIG)
|
527
|
+
table foo_%Y_%m_%d
|
528
|
+
email foo@bar.example
|
529
|
+
private_key_path /path/to/key
|
530
|
+
project yourproject_id
|
531
|
+
dataset yourdataset_id
|
532
|
+
|
533
|
+
<inject>
|
534
|
+
time_format %s
|
535
|
+
time_key time
|
536
|
+
</inject>
|
537
|
+
|
538
|
+
fetch_schema true
|
539
|
+
fetch_schema_table foo
|
540
|
+
schema [{"name": "time", "type": "INTEGER"}]
|
541
|
+
|
542
|
+
<buffer time>
|
543
|
+
timekey 1d
|
544
|
+
</buffer>
|
545
|
+
CONFIG
|
546
|
+
|
547
|
+
stub_writer do |writer|
|
548
|
+
mock(writer).fetch_schema('yourproject_id', 'yourdataset_id', 'foo') do
|
549
|
+
sudo_schema_response["schema"]["fields"]
|
550
|
+
end
|
551
|
+
end
|
552
|
+
|
553
|
+
buf = nil
|
554
|
+
driver.run { buf = driver.instance.format("my.tag", now, input) }
|
555
|
+
|
556
|
+
assert_equal expected, MultiJson.load(buf)
|
557
|
+
|
558
|
+
table_schema = driver.instance.instance_eval{ @fetched_schemas['yourproject_id.yourdataset_id.foo'] }
|
559
|
+
assert table_schema["time"]
|
560
|
+
assert_equal :timestamp, table_schema["time"].type
|
561
|
+
assert_equal :required, table_schema["time"].mode
|
562
|
+
|
563
|
+
assert table_schema["tty"]
|
564
|
+
assert_equal :string, table_schema["tty"].type
|
565
|
+
assert_equal :nullable, table_schema["tty"].mode
|
566
|
+
|
567
|
+
assert table_schema["pwd"]
|
568
|
+
assert_equal :string, table_schema["pwd"].type
|
569
|
+
assert_equal :required, table_schema["pwd"].mode
|
570
|
+
|
571
|
+
assert table_schema["user"]
|
572
|
+
assert_equal :string, table_schema["user"].type
|
573
|
+
assert_equal :required, table_schema["user"].mode
|
574
|
+
|
575
|
+
assert table_schema["argv"]
|
576
|
+
assert_equal :string, table_schema["argv"].type
|
577
|
+
assert_equal :repeated, table_schema["argv"].mode
|
578
|
+
end
|
579
|
+
end
|