fluent-plugin-bigquery-custom 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +19 -0
- data/fluent-plugin-bigquery-custom.gemspec +2 -1
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery.rb +48 -13
- data/test/plugin/test_out_bigquery.rb +54 -1
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 29b676254a53a8b69c023230819fdded3f53c8e2
|
4
|
+
data.tar.gz: 1b26aa639d74be661e5866c1c78a8fe8ec40e232
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f02605f56709c8beb093458050bfbacab13a286b7b3b5e720fbb1768aa20014a5410d29eb0dd98641eac9d17b245b0ce6e91811cf2419a4f9650e00381697ba
|
7
|
+
data.tar.gz: 7f45d681ade43daf9b5782eff2c87521af72606592f4f456cdce89148cc30ec55adf31ef998460a609a990124ccf4e2fe8666ff5a52892313c4eeef6fbd1f730
|
data/README.md
CHANGED
@@ -155,11 +155,30 @@ section in the Google BigQuery document.
|
|
155
155
|
auto_create_table true
|
156
156
|
table yourtable%{time_slice}
|
157
157
|
schema_path bq_schema.json
|
158
|
+
|
159
|
+
request_open_timeout_sec 5m
|
158
160
|
</match>
|
159
161
|
```
|
160
162
|
|
161
163
|
I recommend to use file buffer and long flush interval.
|
162
164
|
|
165
|
+
Difference with insert method
|
166
|
+
|
167
|
+
* `buffer_type`
|
168
|
+
* default file (it is default of TimeSlicedOutput)
|
169
|
+
* `buffer_chunk_limit`
|
170
|
+
* default 1GB
|
171
|
+
* the max size is limited to 4GB(compressed) or 5TB (uncompressed) on BigQuery
|
172
|
+
* `buffer_chunk_records_limit`
|
173
|
+
* it is available only when buffer_type is `lightening`
|
174
|
+
* `buffer_queue_limit`
|
175
|
+
* default 64
|
176
|
+
* Max used storage is `buffer_chunk_limit (default 1GB)` x `buffer_queue_limit (default 64) = 64GB`
|
177
|
+
* `flush_interval`
|
178
|
+
* default is `nil` (it is default of TimeSlicedOutput)
|
179
|
+
* `request_open_timeout_sec`
|
180
|
+
* If you send large chunk to Bigquery, recommend set long time to `request_open_timeout_sec`. Otherwise, Timeout error maybe occurs.
|
181
|
+
|
163
182
|
### Authentication
|
164
183
|
|
165
184
|
There are two methods supported to fetch access token for the service account.
|
@@ -23,7 +23,8 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency "test-unit", "~> 3.0.2"
|
24
24
|
spec.add_development_dependency "test-unit-rr", "~> 1.0.3"
|
25
25
|
|
26
|
-
spec.add_runtime_dependency "google-api-client", "~> 0.9.
|
26
|
+
spec.add_runtime_dependency "google-api-client", "~> 0.9.3"
|
27
|
+
spec.add_runtime_dependency "activesupport", ">= 3.2"
|
27
28
|
spec.add_runtime_dependency "googleauth"
|
28
29
|
spec.add_runtime_dependency "fluentd"
|
29
30
|
spec.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
|
@@ -19,19 +19,26 @@ module Fluent
|
|
19
19
|
# https://developers.google.com/bigquery/browser-tool-quickstart
|
20
20
|
# https://developers.google.com/bigquery/bigquery-api-quickstart
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
22
|
+
### default for insert
|
23
|
+
def configure_for_insert(conf)
|
24
|
+
raise ConfigError unless conf["method"] != "load"
|
25
|
+
|
26
|
+
conf["buffer_type"] = "lightening" unless conf["buffer_type"]
|
27
|
+
conf["flush_interval"] = 0.25 unless conf["flush_interval"]
|
28
|
+
conf["try_flush_interval"] = 0.05 unless conf["try_flush_interval"]
|
29
|
+
conf["buffer_chunk_limit"] = 1 * 1024 ** 2 unless conf["buffer_chunk_limit"] # 1MB
|
30
|
+
conf["buffer_queue_limit"] = 1024 unless conf["buffer_queue_limit"]
|
31
|
+
conf["buffer_chunk_records_limit"] = 500 unless conf["buffer_chunk_records_limit"]
|
32
|
+
end
|
26
33
|
|
27
|
-
|
28
|
-
|
29
|
-
|
34
|
+
### default for loads
|
35
|
+
def configure_for_load(conf)
|
36
|
+
raise ConfigError unless conf["method"] == "load"
|
30
37
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
38
|
+
# buffer_type, flush_interval, try_flush_interval is TimeSlicedOutput default
|
39
|
+
conf["buffer_chunk_limit"] = 1 * 1024 ** 3 unless conf["buffer_chunk_limit"] # 1GB
|
40
|
+
conf["buffer_queue_limit"] = 64 unless conf["buffer_queue_limit"]
|
41
|
+
end
|
35
42
|
|
36
43
|
### OAuth credential
|
37
44
|
# config_param :client_id, :string
|
@@ -131,6 +138,14 @@ module Fluent
|
|
131
138
|
# If you exceed 100 rows per second for an extended period of time, throttling might occur.
|
132
139
|
### Toooooooooooooo short/small per inserts and row!
|
133
140
|
|
141
|
+
## Timeout
|
142
|
+
# request_timeout_sec
|
143
|
+
# Bigquery API response timeout
|
144
|
+
# request_open_timeout_sec
|
145
|
+
# Bigquery API connection, and request timeout
|
146
|
+
config_param :request_timeout_sec, :time, default: nil
|
147
|
+
config_param :request_open_timeout_sec, :time, default: 60
|
148
|
+
|
134
149
|
### Table types
|
135
150
|
# https://developers.google.com/bigquery/docs/tables
|
136
151
|
#
|
@@ -165,6 +180,11 @@ module Fluent
|
|
165
180
|
end
|
166
181
|
|
167
182
|
def configure(conf)
|
183
|
+
if conf["method"] == "load"
|
184
|
+
configure_for_load(conf)
|
185
|
+
else
|
186
|
+
configure_for_insert(conf)
|
187
|
+
end
|
168
188
|
super
|
169
189
|
|
170
190
|
if @method == "insert"
|
@@ -428,7 +448,11 @@ module Fluent
|
|
428
448
|
ignore_unknown_values: @ignore_unknown_values,
|
429
449
|
}
|
430
450
|
body.merge!(template_suffix: template_suffix) if template_suffix
|
431
|
-
res = client.insert_all_table_data(
|
451
|
+
res = client.insert_all_table_data(
|
452
|
+
@project, @dataset, table_id, body, {
|
453
|
+
options: {timeout_sec: @request_timeout_sec, open_timeout_sec: @request_open_timeout_sec}
|
454
|
+
}
|
455
|
+
)
|
432
456
|
|
433
457
|
if res.insert_errors
|
434
458
|
reasons = []
|
@@ -490,7 +514,18 @@ module Fluent
|
|
490
514
|
job_id = create_job_id(upload_source.path, @dataset, @table, @fields.to_a, @max_bad_records, @ignore_unknown_values)
|
491
515
|
end
|
492
516
|
configuration = load_configuration(table_id, template_suffix, upload_source)
|
493
|
-
res = client.insert_job(
|
517
|
+
res = client.insert_job(
|
518
|
+
@project,
|
519
|
+
configuration,
|
520
|
+
{
|
521
|
+
upload_source: upload_source,
|
522
|
+
content_type: "application/octet-stream",
|
523
|
+
options: {
|
524
|
+
timeout_sec: @request_timeout_sec,
|
525
|
+
open_timeout_sec: @request_open_timeout_sec,
|
526
|
+
}
|
527
|
+
}
|
528
|
+
)
|
494
529
|
end
|
495
530
|
|
496
531
|
wait_load(res.job_reference.job_id)
|
@@ -31,7 +31,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
31
31
|
API_SCOPE = "https://www.googleapis.com/auth/bigquery"
|
32
32
|
|
33
33
|
def create_driver(conf = CONFIG)
|
34
|
-
Fluent::Test::
|
34
|
+
Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf)
|
35
35
|
end
|
36
36
|
|
37
37
|
def stub_client(driver)
|
@@ -64,6 +64,57 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
64
64
|
}
|
65
65
|
end
|
66
66
|
|
67
|
+
def test_configure_default
|
68
|
+
driver = create_driver(<<-CONFIG)
|
69
|
+
table foo
|
70
|
+
email foo@bar.example
|
71
|
+
private_key_path /path/to/key
|
72
|
+
project yourproject_id
|
73
|
+
dataset yourdataset_id
|
74
|
+
CONFIG
|
75
|
+
|
76
|
+
assert { driver.instance.instance_variable_get("@buffer_type") == "lightening" }
|
77
|
+
assert { driver.instance.instance_variable_get("@flush_interval") == 0.25 }
|
78
|
+
assert { driver.instance.instance_variable_get("@try_flush_interval") == 0.05 }
|
79
|
+
assert { driver.instance.instance_variable_get("@buffer").class == Fluent::LighteningBuffer }
|
80
|
+
assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_queue_limit") == 1024 }
|
81
|
+
assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_chunk_limit") == 1 * 1024 ** 2 }
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_configure_for_load
|
85
|
+
driver = create_driver(<<-CONFIG)
|
86
|
+
method load
|
87
|
+
buffer_path bigquery.*.buffer
|
88
|
+
table foo
|
89
|
+
email foo@bar.example
|
90
|
+
private_key_path /path/to/key
|
91
|
+
project yourproject_id
|
92
|
+
dataset yourdataset_id
|
93
|
+
CONFIG
|
94
|
+
|
95
|
+
assert { driver.instance.instance_variable_get("@buffer_type") == "file" }
|
96
|
+
assert { driver.instance.instance_variable_get("@try_flush_interval") == 1 }
|
97
|
+
assert { driver.instance.instance_variable_get("@buffer").class == Fluent::FileBuffer }
|
98
|
+
assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_chunk_limit") == 1 * 1024 ** 3 }
|
99
|
+
assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_queue_limit") == 64 }
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_configure_for_load_with_parameter
|
103
|
+
driver = create_driver(<<-CONFIG)
|
104
|
+
method load
|
105
|
+
buffer_type memory
|
106
|
+
buffer_chunk_limit 100000
|
107
|
+
table foo
|
108
|
+
email foo@bar.example
|
109
|
+
private_key_path /path/to/key
|
110
|
+
project yourproject_id
|
111
|
+
dataset yourdataset_id
|
112
|
+
CONFIG
|
113
|
+
|
114
|
+
assert { driver.instance.instance_variable_get("@buffer_type") == "memory" }
|
115
|
+
assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_chunk_limit") == 100000 }
|
116
|
+
end
|
117
|
+
|
67
118
|
def test_configure_auth_private_key
|
68
119
|
key = stub!
|
69
120
|
mock(Google::APIClient::KeyUtils).load_from_pkcs12('/path/to/key', 'notasecret') { key }
|
@@ -736,6 +787,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
736
787
|
|
737
788
|
driver = create_driver(<<-CONFIG)
|
738
789
|
method load
|
790
|
+
buffer_path bigquery.*.buffer
|
739
791
|
table foo
|
740
792
|
email foo@bar.example
|
741
793
|
private_key_path /path/to/key
|
@@ -824,6 +876,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
824
876
|
entry = {a: "b"}, {b: "c"}
|
825
877
|
driver = create_driver(<<-CONFIG)
|
826
878
|
method load
|
879
|
+
buffer_path bigquery.*.buffer
|
827
880
|
table foo
|
828
881
|
email foo@bar.example
|
829
882
|
private_key_path /path/to/key
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery-custom
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomohiro Hashidate
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -72,14 +72,28 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.9.
|
75
|
+
version: 0.9.3
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0.9.
|
82
|
+
version: 0.9.3
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activesupport
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '3.2'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '3.2'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: googleauth
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|