fluent-plugin-bigquery-custom 0.3.6 → 0.3.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4af83e7241135e5fa4386ddd2342bc990448fd58
4
- data.tar.gz: 94d7f710bcf578befd9fc1a0ba4cf35eaf79e72b
3
+ metadata.gz: 29b676254a53a8b69c023230819fdded3f53c8e2
4
+ data.tar.gz: 1b26aa639d74be661e5866c1c78a8fe8ec40e232
5
5
  SHA512:
6
- metadata.gz: aad322ae03a689b9bd9459f7eae50f8990801e95bd0f8816a9ce80948b6f962da2724ef35a8728d71d36c266b8a878507953e865f80928503b2ff925c4109f9d
7
- data.tar.gz: 407e7510c1739175cfc40dc476df07bc01526221cc9a77712e9db0d21545ce175143d91e3d21dfe609790e0fa47598f7d9c303f818df9ac0a03837b6c0b6027e
6
+ metadata.gz: 7f02605f56709c8beb093458050bfbacab13a286b7b3b5e720fbb1768aa20014a5410d29eb0dd98641eac9d17b245b0ce6e91811cf2419a4f9650e00381697ba
7
+ data.tar.gz: 7f45d681ade43daf9b5782eff2c87521af72606592f4f456cdce89148cc30ec55adf31ef998460a609a990124ccf4e2fe8666ff5a52892313c4eeef6fbd1f730
data/README.md CHANGED
@@ -155,11 +155,30 @@ section in the Google BigQuery document.
155
155
  auto_create_table true
156
156
  table yourtable%{time_slice}
157
157
  schema_path bq_schema.json
158
+
159
+ request_open_timeout_sec 5m
158
160
  </match>
159
161
  ```
160
162
 
161
163
  I recommend to use file buffer and long flush interval.
162
164
 
165
+ Difference with insert method
166
+
167
+ * `buffer_type`
168
+ * default file (it is default of TimeSlicedOutput)
169
+ * `buffer_chunk_limit`
170
+ * default 1GB
171
+ * the max size is limited to 4GB(compressed) or 5TB (uncompressed) on BigQuery
172
+ * `buffer_chunk_records_limit`
173
+ * it is available only when buffer_type is `lightening`
174
+ * `buffer_queue_limit`
175
+ * default 64
176
+ * Max used storage is `buffer_chunk_limit (default 1GB)` x `buffer_queue_limit (default 64) = 64GB`
177
+ * `flush_interval`
178
+ * default is `nil` (it is default of TimeSlicedOutput)
179
+ * `request_open_timeout_sec`
180
+ * If you send large chunk to Bigquery, recommend set long time to `request_open_timeout_sec`. Otherwise, Timeout error maybe occurs.
181
+
163
182
  ### Authentication
164
183
 
165
184
  There are two methods supported to fetch access token for the service account.
@@ -23,7 +23,8 @@ Gem::Specification.new do |spec|
23
23
  spec.add_development_dependency "test-unit", "~> 3.0.2"
24
24
  spec.add_development_dependency "test-unit-rr", "~> 1.0.3"
25
25
 
26
- spec.add_runtime_dependency "google-api-client", "~> 0.9.pre5"
26
+ spec.add_runtime_dependency "google-api-client", "~> 0.9.3"
27
+ spec.add_runtime_dependency "activesupport", ">= 3.2"
27
28
  spec.add_runtime_dependency "googleauth"
28
29
  spec.add_runtime_dependency "fluentd"
29
30
  spec.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
@@ -1,6 +1,6 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "0.3.6"
3
+ VERSION = "0.3.7"
4
4
  end
5
5
  end
6
6
 
@@ -19,19 +19,26 @@ module Fluent
19
19
  # https://developers.google.com/bigquery/browser-tool-quickstart
20
20
  # https://developers.google.com/bigquery/bigquery-api-quickstart
21
21
 
22
- config_set_default :buffer_type, 'lightening'
23
-
24
- config_set_default :flush_interval, 0.25
25
- config_set_default :try_flush_interval, 0.05
22
+ ### default for insert
23
+ def configure_for_insert(conf)
24
+ raise ConfigError unless conf["method"] != "load"
25
+
26
+ conf["buffer_type"] = "lightening" unless conf["buffer_type"]
27
+ conf["flush_interval"] = 0.25 unless conf["flush_interval"]
28
+ conf["try_flush_interval"] = 0.05 unless conf["try_flush_interval"]
29
+ conf["buffer_chunk_limit"] = 1 * 1024 ** 2 unless conf["buffer_chunk_limit"] # 1MB
30
+ conf["buffer_queue_limit"] = 1024 unless conf["buffer_queue_limit"]
31
+ conf["buffer_chunk_records_limit"] = 500 unless conf["buffer_chunk_records_limit"]
32
+ end
26
33
 
27
- config_set_default :buffer_chunk_records_limit, 500
28
- config_set_default :buffer_chunk_limit, 1000000
29
- config_set_default :buffer_queue_limit, 1024
34
+ ### default for loads
35
+ def configure_for_load(conf)
36
+ raise ConfigError unless conf["method"] == "load"
30
37
 
31
- ### for loads
32
- ### TODO: different default values for buffering between 'load' and insert
33
- # config_set_default :flush_interval, 1800 # 30min => 48 imports/day
34
- # config_set_default :buffer_chunk_limit, 1000**4 # 1.0*10^12 < 1TB (1024^4)
38
+ # buffer_type, flush_interval, try_flush_interval is TimeSlicedOutput default
39
+ conf["buffer_chunk_limit"] = 1 * 1024 ** 3 unless conf["buffer_chunk_limit"] # 1GB
40
+ conf["buffer_queue_limit"] = 64 unless conf["buffer_queue_limit"]
41
+ end
35
42
 
36
43
  ### OAuth credential
37
44
  # config_param :client_id, :string
@@ -131,6 +138,14 @@ module Fluent
131
138
  # If you exceed 100 rows per second for an extended period of time, throttling might occur.
132
139
  ### Toooooooooooooo short/small per inserts and row!
133
140
 
141
+ ## Timeout
142
+ # request_timeout_sec
143
+ # Bigquery API response timeout
144
+ # request_open_timeout_sec
145
+ # Bigquery API connection, and request timeout
146
+ config_param :request_timeout_sec, :time, default: nil
147
+ config_param :request_open_timeout_sec, :time, default: 60
148
+
134
149
  ### Table types
135
150
  # https://developers.google.com/bigquery/docs/tables
136
151
  #
@@ -165,6 +180,11 @@ module Fluent
165
180
  end
166
181
 
167
182
  def configure(conf)
183
+ if conf["method"] == "load"
184
+ configure_for_load(conf)
185
+ else
186
+ configure_for_insert(conf)
187
+ end
168
188
  super
169
189
 
170
190
  if @method == "insert"
@@ -428,7 +448,11 @@ module Fluent
428
448
  ignore_unknown_values: @ignore_unknown_values,
429
449
  }
430
450
  body.merge!(template_suffix: template_suffix) if template_suffix
431
- res = client.insert_all_table_data(@project, @dataset, table_id, body, {})
451
+ res = client.insert_all_table_data(
452
+ @project, @dataset, table_id, body, {
453
+ options: {timeout_sec: @request_timeout_sec, open_timeout_sec: @request_open_timeout_sec}
454
+ }
455
+ )
432
456
 
433
457
  if res.insert_errors
434
458
  reasons = []
@@ -490,7 +514,18 @@ module Fluent
490
514
  job_id = create_job_id(upload_source.path, @dataset, @table, @fields.to_a, @max_bad_records, @ignore_unknown_values)
491
515
  end
492
516
  configuration = load_configuration(table_id, template_suffix, upload_source)
493
- res = client.insert_job(@project, configuration, {upload_source: upload_source, content_type: "application/octet-stream"})
517
+ res = client.insert_job(
518
+ @project,
519
+ configuration,
520
+ {
521
+ upload_source: upload_source,
522
+ content_type: "application/octet-stream",
523
+ options: {
524
+ timeout_sec: @request_timeout_sec,
525
+ open_timeout_sec: @request_open_timeout_sec,
526
+ }
527
+ }
528
+ )
494
529
  end
495
530
 
496
531
  wait_load(res.job_reference.job_id)
@@ -31,7 +31,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
31
31
  API_SCOPE = "https://www.googleapis.com/auth/bigquery"
32
32
 
33
33
  def create_driver(conf = CONFIG)
34
- Fluent::Test::OutputTestDriver.new(Fluent::BigQueryOutput).configure(conf)
34
+ Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf)
35
35
  end
36
36
 
37
37
  def stub_client(driver)
@@ -64,6 +64,57 @@ class BigQueryOutputTest < Test::Unit::TestCase
64
64
  }
65
65
  end
66
66
 
67
+ def test_configure_default
68
+ driver = create_driver(<<-CONFIG)
69
+ table foo
70
+ email foo@bar.example
71
+ private_key_path /path/to/key
72
+ project yourproject_id
73
+ dataset yourdataset_id
74
+ CONFIG
75
+
76
+ assert { driver.instance.instance_variable_get("@buffer_type") == "lightening" }
77
+ assert { driver.instance.instance_variable_get("@flush_interval") == 0.25 }
78
+ assert { driver.instance.instance_variable_get("@try_flush_interval") == 0.05 }
79
+ assert { driver.instance.instance_variable_get("@buffer").class == Fluent::LighteningBuffer }
80
+ assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_queue_limit") == 1024 }
81
+ assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_chunk_limit") == 1 * 1024 ** 2 }
82
+ end
83
+
84
+ def test_configure_for_load
85
+ driver = create_driver(<<-CONFIG)
86
+ method load
87
+ buffer_path bigquery.*.buffer
88
+ table foo
89
+ email foo@bar.example
90
+ private_key_path /path/to/key
91
+ project yourproject_id
92
+ dataset yourdataset_id
93
+ CONFIG
94
+
95
+ assert { driver.instance.instance_variable_get("@buffer_type") == "file" }
96
+ assert { driver.instance.instance_variable_get("@try_flush_interval") == 1 }
97
+ assert { driver.instance.instance_variable_get("@buffer").class == Fluent::FileBuffer }
98
+ assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_chunk_limit") == 1 * 1024 ** 3 }
99
+ assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_queue_limit") == 64 }
100
+ end
101
+
102
+ def test_configure_for_load_with_parameter
103
+ driver = create_driver(<<-CONFIG)
104
+ method load
105
+ buffer_type memory
106
+ buffer_chunk_limit 100000
107
+ table foo
108
+ email foo@bar.example
109
+ private_key_path /path/to/key
110
+ project yourproject_id
111
+ dataset yourdataset_id
112
+ CONFIG
113
+
114
+ assert { driver.instance.instance_variable_get("@buffer_type") == "memory" }
115
+ assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_chunk_limit") == 100000 }
116
+ end
117
+
67
118
  def test_configure_auth_private_key
68
119
  key = stub!
69
120
  mock(Google::APIClient::KeyUtils).load_from_pkcs12('/path/to/key', 'notasecret') { key }
@@ -736,6 +787,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
736
787
 
737
788
  driver = create_driver(<<-CONFIG)
738
789
  method load
790
+ buffer_path bigquery.*.buffer
739
791
  table foo
740
792
  email foo@bar.example
741
793
  private_key_path /path/to/key
@@ -824,6 +876,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
824
876
  entry = {a: "b"}, {b: "c"}
825
877
  driver = create_driver(<<-CONFIG)
826
878
  method load
879
+ buffer_path bigquery.*.buffer
827
880
  table foo
828
881
  email foo@bar.example
829
882
  private_key_path /path/to/key
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery-custom
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomohiro Hashidate
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-09 00:00:00.000000000 Z
11
+ date: 2016-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -72,14 +72,28 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 0.9.pre5
75
+ version: 0.9.3
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 0.9.pre5
82
+ version: 0.9.3
83
+ - !ruby/object:Gem::Dependency
84
+ name: activesupport
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '3.2'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '3.2'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: googleauth
85
99
  requirement: !ruby/object:Gem::Requirement