fluent-plugin-bigquery-custom 0.3.6 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +19 -0
- data/fluent-plugin-bigquery-custom.gemspec +2 -1
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery.rb +48 -13
- data/test/plugin/test_out_bigquery.rb +54 -1
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 29b676254a53a8b69c023230819fdded3f53c8e2
|
4
|
+
data.tar.gz: 1b26aa639d74be661e5866c1c78a8fe8ec40e232
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f02605f56709c8beb093458050bfbacab13a286b7b3b5e720fbb1768aa20014a5410d29eb0dd98641eac9d17b245b0ce6e91811cf2419a4f9650e00381697ba
|
7
|
+
data.tar.gz: 7f45d681ade43daf9b5782eff2c87521af72606592f4f456cdce89148cc30ec55adf31ef998460a609a990124ccf4e2fe8666ff5a52892313c4eeef6fbd1f730
|
data/README.md
CHANGED
@@ -155,11 +155,30 @@ section in the Google BigQuery document.
|
|
155
155
|
auto_create_table true
|
156
156
|
table yourtable%{time_slice}
|
157
157
|
schema_path bq_schema.json
|
158
|
+
|
159
|
+
request_open_timeout_sec 5m
|
158
160
|
</match>
|
159
161
|
```
|
160
162
|
|
161
163
|
I recommend to use file buffer and long flush interval.
|
162
164
|
|
165
|
+
Difference with insert method
|
166
|
+
|
167
|
+
* `buffer_type`
|
168
|
+
* default file (it is default of TimeSlicedOutput)
|
169
|
+
* `buffer_chunk_limit`
|
170
|
+
* default 1GB
|
171
|
+
* the max size is limited to 4GB(compressed) or 5TB (uncompressed) on BigQuery
|
172
|
+
* `buffer_chunk_records_limit`
|
173
|
+
* it is available only when buffer_type is `lightening`
|
174
|
+
* `buffer_queue_limit`
|
175
|
+
* default 64
|
176
|
+
* Max used storage is `buffer_chunk_limit (default 1GB)` x `buffer_queue_limit (default 64) = 64GB`
|
177
|
+
* `flush_interval`
|
178
|
+
* default is `nil` (it is default of TimeSlicedOutput)
|
179
|
+
* `request_open_timeout_sec`
|
180
|
+
* If you send large chunk to Bigquery, recommend set long time to `request_open_timeout_sec`. Otherwise, Timeout error maybe occurs.
|
181
|
+
|
163
182
|
### Authentication
|
164
183
|
|
165
184
|
There are two methods supported to fetch access token for the service account.
|
@@ -23,7 +23,8 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency "test-unit", "~> 3.0.2"
|
24
24
|
spec.add_development_dependency "test-unit-rr", "~> 1.0.3"
|
25
25
|
|
26
|
-
spec.add_runtime_dependency "google-api-client", "~> 0.9.
|
26
|
+
spec.add_runtime_dependency "google-api-client", "~> 0.9.3"
|
27
|
+
spec.add_runtime_dependency "activesupport", ">= 3.2"
|
27
28
|
spec.add_runtime_dependency "googleauth"
|
28
29
|
spec.add_runtime_dependency "fluentd"
|
29
30
|
spec.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
|
@@ -19,19 +19,26 @@ module Fluent
|
|
19
19
|
# https://developers.google.com/bigquery/browser-tool-quickstart
|
20
20
|
# https://developers.google.com/bigquery/bigquery-api-quickstart
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
22
|
+
### default for insert
|
23
|
+
def configure_for_insert(conf)
|
24
|
+
raise ConfigError unless conf["method"] != "load"
|
25
|
+
|
26
|
+
conf["buffer_type"] = "lightening" unless conf["buffer_type"]
|
27
|
+
conf["flush_interval"] = 0.25 unless conf["flush_interval"]
|
28
|
+
conf["try_flush_interval"] = 0.05 unless conf["try_flush_interval"]
|
29
|
+
conf["buffer_chunk_limit"] = 1 * 1024 ** 2 unless conf["buffer_chunk_limit"] # 1MB
|
30
|
+
conf["buffer_queue_limit"] = 1024 unless conf["buffer_queue_limit"]
|
31
|
+
conf["buffer_chunk_records_limit"] = 500 unless conf["buffer_chunk_records_limit"]
|
32
|
+
end
|
26
33
|
|
27
|
-
|
28
|
-
|
29
|
-
|
34
|
+
### default for loads
|
35
|
+
def configure_for_load(conf)
|
36
|
+
raise ConfigError unless conf["method"] == "load"
|
30
37
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
38
|
+
# buffer_type, flush_interval, try_flush_interval is TimeSlicedOutput default
|
39
|
+
conf["buffer_chunk_limit"] = 1 * 1024 ** 3 unless conf["buffer_chunk_limit"] # 1GB
|
40
|
+
conf["buffer_queue_limit"] = 64 unless conf["buffer_queue_limit"]
|
41
|
+
end
|
35
42
|
|
36
43
|
### OAuth credential
|
37
44
|
# config_param :client_id, :string
|
@@ -131,6 +138,14 @@ module Fluent
|
|
131
138
|
# If you exceed 100 rows per second for an extended period of time, throttling might occur.
|
132
139
|
### Toooooooooooooo short/small per inserts and row!
|
133
140
|
|
141
|
+
## Timeout
|
142
|
+
# request_timeout_sec
|
143
|
+
# Bigquery API response timeout
|
144
|
+
# request_open_timeout_sec
|
145
|
+
# Bigquery API connection, and request timeout
|
146
|
+
config_param :request_timeout_sec, :time, default: nil
|
147
|
+
config_param :request_open_timeout_sec, :time, default: 60
|
148
|
+
|
134
149
|
### Table types
|
135
150
|
# https://developers.google.com/bigquery/docs/tables
|
136
151
|
#
|
@@ -165,6 +180,11 @@ module Fluent
|
|
165
180
|
end
|
166
181
|
|
167
182
|
def configure(conf)
|
183
|
+
if conf["method"] == "load"
|
184
|
+
configure_for_load(conf)
|
185
|
+
else
|
186
|
+
configure_for_insert(conf)
|
187
|
+
end
|
168
188
|
super
|
169
189
|
|
170
190
|
if @method == "insert"
|
@@ -428,7 +448,11 @@ module Fluent
|
|
428
448
|
ignore_unknown_values: @ignore_unknown_values,
|
429
449
|
}
|
430
450
|
body.merge!(template_suffix: template_suffix) if template_suffix
|
431
|
-
res = client.insert_all_table_data(
|
451
|
+
res = client.insert_all_table_data(
|
452
|
+
@project, @dataset, table_id, body, {
|
453
|
+
options: {timeout_sec: @request_timeout_sec, open_timeout_sec: @request_open_timeout_sec}
|
454
|
+
}
|
455
|
+
)
|
432
456
|
|
433
457
|
if res.insert_errors
|
434
458
|
reasons = []
|
@@ -490,7 +514,18 @@ module Fluent
|
|
490
514
|
job_id = create_job_id(upload_source.path, @dataset, @table, @fields.to_a, @max_bad_records, @ignore_unknown_values)
|
491
515
|
end
|
492
516
|
configuration = load_configuration(table_id, template_suffix, upload_source)
|
493
|
-
res = client.insert_job(
|
517
|
+
res = client.insert_job(
|
518
|
+
@project,
|
519
|
+
configuration,
|
520
|
+
{
|
521
|
+
upload_source: upload_source,
|
522
|
+
content_type: "application/octet-stream",
|
523
|
+
options: {
|
524
|
+
timeout_sec: @request_timeout_sec,
|
525
|
+
open_timeout_sec: @request_open_timeout_sec,
|
526
|
+
}
|
527
|
+
}
|
528
|
+
)
|
494
529
|
end
|
495
530
|
|
496
531
|
wait_load(res.job_reference.job_id)
|
@@ -31,7 +31,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
31
31
|
API_SCOPE = "https://www.googleapis.com/auth/bigquery"
|
32
32
|
|
33
33
|
def create_driver(conf = CONFIG)
|
34
|
-
Fluent::Test::
|
34
|
+
Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf)
|
35
35
|
end
|
36
36
|
|
37
37
|
def stub_client(driver)
|
@@ -64,6 +64,57 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
64
64
|
}
|
65
65
|
end
|
66
66
|
|
67
|
+
def test_configure_default
|
68
|
+
driver = create_driver(<<-CONFIG)
|
69
|
+
table foo
|
70
|
+
email foo@bar.example
|
71
|
+
private_key_path /path/to/key
|
72
|
+
project yourproject_id
|
73
|
+
dataset yourdataset_id
|
74
|
+
CONFIG
|
75
|
+
|
76
|
+
assert { driver.instance.instance_variable_get("@buffer_type") == "lightening" }
|
77
|
+
assert { driver.instance.instance_variable_get("@flush_interval") == 0.25 }
|
78
|
+
assert { driver.instance.instance_variable_get("@try_flush_interval") == 0.05 }
|
79
|
+
assert { driver.instance.instance_variable_get("@buffer").class == Fluent::LighteningBuffer }
|
80
|
+
assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_queue_limit") == 1024 }
|
81
|
+
assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_chunk_limit") == 1 * 1024 ** 2 }
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_configure_for_load
|
85
|
+
driver = create_driver(<<-CONFIG)
|
86
|
+
method load
|
87
|
+
buffer_path bigquery.*.buffer
|
88
|
+
table foo
|
89
|
+
email foo@bar.example
|
90
|
+
private_key_path /path/to/key
|
91
|
+
project yourproject_id
|
92
|
+
dataset yourdataset_id
|
93
|
+
CONFIG
|
94
|
+
|
95
|
+
assert { driver.instance.instance_variable_get("@buffer_type") == "file" }
|
96
|
+
assert { driver.instance.instance_variable_get("@try_flush_interval") == 1 }
|
97
|
+
assert { driver.instance.instance_variable_get("@buffer").class == Fluent::FileBuffer }
|
98
|
+
assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_chunk_limit") == 1 * 1024 ** 3 }
|
99
|
+
assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_queue_limit") == 64 }
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_configure_for_load_with_parameter
|
103
|
+
driver = create_driver(<<-CONFIG)
|
104
|
+
method load
|
105
|
+
buffer_type memory
|
106
|
+
buffer_chunk_limit 100000
|
107
|
+
table foo
|
108
|
+
email foo@bar.example
|
109
|
+
private_key_path /path/to/key
|
110
|
+
project yourproject_id
|
111
|
+
dataset yourdataset_id
|
112
|
+
CONFIG
|
113
|
+
|
114
|
+
assert { driver.instance.instance_variable_get("@buffer_type") == "memory" }
|
115
|
+
assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_chunk_limit") == 100000 }
|
116
|
+
end
|
117
|
+
|
67
118
|
def test_configure_auth_private_key
|
68
119
|
key = stub!
|
69
120
|
mock(Google::APIClient::KeyUtils).load_from_pkcs12('/path/to/key', 'notasecret') { key }
|
@@ -736,6 +787,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
736
787
|
|
737
788
|
driver = create_driver(<<-CONFIG)
|
738
789
|
method load
|
790
|
+
buffer_path bigquery.*.buffer
|
739
791
|
table foo
|
740
792
|
email foo@bar.example
|
741
793
|
private_key_path /path/to/key
|
@@ -824,6 +876,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
824
876
|
entry = {a: "b"}, {b: "c"}
|
825
877
|
driver = create_driver(<<-CONFIG)
|
826
878
|
method load
|
879
|
+
buffer_path bigquery.*.buffer
|
827
880
|
table foo
|
828
881
|
email foo@bar.example
|
829
882
|
private_key_path /path/to/key
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery-custom
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomohiro Hashidate
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -72,14 +72,28 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.9.
|
75
|
+
version: 0.9.3
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0.9.
|
82
|
+
version: 0.9.3
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activesupport
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '3.2'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '3.2'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: googleauth
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|