fluent-plugin-bigquery-custom 0.3.6 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4af83e7241135e5fa4386ddd2342bc990448fd58
4
- data.tar.gz: 94d7f710bcf578befd9fc1a0ba4cf35eaf79e72b
3
+ metadata.gz: 29b676254a53a8b69c023230819fdded3f53c8e2
4
+ data.tar.gz: 1b26aa639d74be661e5866c1c78a8fe8ec40e232
5
5
  SHA512:
6
- metadata.gz: aad322ae03a689b9bd9459f7eae50f8990801e95bd0f8816a9ce80948b6f962da2724ef35a8728d71d36c266b8a878507953e865f80928503b2ff925c4109f9d
7
- data.tar.gz: 407e7510c1739175cfc40dc476df07bc01526221cc9a77712e9db0d21545ce175143d91e3d21dfe609790e0fa47598f7d9c303f818df9ac0a03837b6c0b6027e
6
+ metadata.gz: 7f02605f56709c8beb093458050bfbacab13a286b7b3b5e720fbb1768aa20014a5410d29eb0dd98641eac9d17b245b0ce6e91811cf2419a4f9650e00381697ba
7
+ data.tar.gz: 7f45d681ade43daf9b5782eff2c87521af72606592f4f456cdce89148cc30ec55adf31ef998460a609a990124ccf4e2fe8666ff5a52892313c4eeef6fbd1f730
data/README.md CHANGED
@@ -155,11 +155,30 @@ section in the Google BigQuery document.
155
155
  auto_create_table true
156
156
  table yourtable%{time_slice}
157
157
  schema_path bq_schema.json
158
+
159
+ request_open_timeout_sec 5m
158
160
  </match>
159
161
  ```
160
162
 
161
163
  I recommend to use file buffer and long flush interval.
162
164
 
165
+ Difference with insert method
166
+
167
+ * `buffer_type`
168
+ * default file (it is default of TimeSlicedOutput)
169
+ * `buffer_chunk_limit`
170
+ * default 1GB
171
+ * the max size is limited to 4GB(compressed) or 5TB (uncompressed) on BigQuery
172
+ * `buffer_chunk_records_limit`
173
+ * it is available only when buffer_type is `lightening`
174
+ * `buffer_queue_limit`
175
+ * default 64
176
+ * Max used storage is `buffer_chunk_limit (default 1GB)` x `buffer_queue_limit (default 64) = 64GB`
177
+ * `flush_interval`
178
+ * default is `nil` (it is default of TimeSlicedOutput)
179
+ * `request_open_timeout_sec`
180
+ * If you send large chunk to Bigquery, recommend set long time to `request_open_timeout_sec`. Otherwise, Timeout error maybe occurs.
181
+
163
182
  ### Authentication
164
183
 
165
184
  There are two methods supported to fetch access token for the service account.
@@ -23,7 +23,8 @@ Gem::Specification.new do |spec|
23
23
  spec.add_development_dependency "test-unit", "~> 3.0.2"
24
24
  spec.add_development_dependency "test-unit-rr", "~> 1.0.3"
25
25
 
26
- spec.add_runtime_dependency "google-api-client", "~> 0.9.pre5"
26
+ spec.add_runtime_dependency "google-api-client", "~> 0.9.3"
27
+ spec.add_runtime_dependency "activesupport", ">= 3.2"
27
28
  spec.add_runtime_dependency "googleauth"
28
29
  spec.add_runtime_dependency "fluentd"
29
30
  spec.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
@@ -1,6 +1,6 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "0.3.6"
3
+ VERSION = "0.3.7"
4
4
  end
5
5
  end
6
6
 
@@ -19,19 +19,26 @@ module Fluent
19
19
  # https://developers.google.com/bigquery/browser-tool-quickstart
20
20
  # https://developers.google.com/bigquery/bigquery-api-quickstart
21
21
 
22
- config_set_default :buffer_type, 'lightening'
23
-
24
- config_set_default :flush_interval, 0.25
25
- config_set_default :try_flush_interval, 0.05
22
+ ### default for insert
23
+ def configure_for_insert(conf)
24
+ raise ConfigError unless conf["method"] != "load"
25
+
26
+ conf["buffer_type"] = "lightening" unless conf["buffer_type"]
27
+ conf["flush_interval"] = 0.25 unless conf["flush_interval"]
28
+ conf["try_flush_interval"] = 0.05 unless conf["try_flush_interval"]
29
+ conf["buffer_chunk_limit"] = 1 * 1024 ** 2 unless conf["buffer_chunk_limit"] # 1MB
30
+ conf["buffer_queue_limit"] = 1024 unless conf["buffer_queue_limit"]
31
+ conf["buffer_chunk_records_limit"] = 500 unless conf["buffer_chunk_records_limit"]
32
+ end
26
33
 
27
- config_set_default :buffer_chunk_records_limit, 500
28
- config_set_default :buffer_chunk_limit, 1000000
29
- config_set_default :buffer_queue_limit, 1024
34
+ ### default for loads
35
+ def configure_for_load(conf)
36
+ raise ConfigError unless conf["method"] == "load"
30
37
 
31
- ### for loads
32
- ### TODO: different default values for buffering between 'load' and insert
33
- # config_set_default :flush_interval, 1800 # 30min => 48 imports/day
34
- # config_set_default :buffer_chunk_limit, 1000**4 # 1.0*10^12 < 1TB (1024^4)
38
+ # buffer_type, flush_interval, try_flush_interval is TimeSlicedOutput default
39
+ conf["buffer_chunk_limit"] = 1 * 1024 ** 3 unless conf["buffer_chunk_limit"] # 1GB
40
+ conf["buffer_queue_limit"] = 64 unless conf["buffer_queue_limit"]
41
+ end
35
42
 
36
43
  ### OAuth credential
37
44
  # config_param :client_id, :string
@@ -131,6 +138,14 @@ module Fluent
131
138
  # If you exceed 100 rows per second for an extended period of time, throttling might occur.
132
139
  ### Toooooooooooooo short/small per inserts and row!
133
140
 
141
+ ## Timeout
142
+ # request_timeout_sec
143
+ # Bigquery API response timeout
144
+ # request_open_timeout_sec
145
+ # Bigquery API connection, and request timeout
146
+ config_param :request_timeout_sec, :time, default: nil
147
+ config_param :request_open_timeout_sec, :time, default: 60
148
+
134
149
  ### Table types
135
150
  # https://developers.google.com/bigquery/docs/tables
136
151
  #
@@ -165,6 +180,11 @@ module Fluent
165
180
  end
166
181
 
167
182
  def configure(conf)
183
+ if conf["method"] == "load"
184
+ configure_for_load(conf)
185
+ else
186
+ configure_for_insert(conf)
187
+ end
168
188
  super
169
189
 
170
190
  if @method == "insert"
@@ -428,7 +448,11 @@ module Fluent
428
448
  ignore_unknown_values: @ignore_unknown_values,
429
449
  }
430
450
  body.merge!(template_suffix: template_suffix) if template_suffix
431
- res = client.insert_all_table_data(@project, @dataset, table_id, body, {})
451
+ res = client.insert_all_table_data(
452
+ @project, @dataset, table_id, body, {
453
+ options: {timeout_sec: @request_timeout_sec, open_timeout_sec: @request_open_timeout_sec}
454
+ }
455
+ )
432
456
 
433
457
  if res.insert_errors
434
458
  reasons = []
@@ -490,7 +514,18 @@ module Fluent
490
514
  job_id = create_job_id(upload_source.path, @dataset, @table, @fields.to_a, @max_bad_records, @ignore_unknown_values)
491
515
  end
492
516
  configuration = load_configuration(table_id, template_suffix, upload_source)
493
- res = client.insert_job(@project, configuration, {upload_source: upload_source, content_type: "application/octet-stream"})
517
+ res = client.insert_job(
518
+ @project,
519
+ configuration,
520
+ {
521
+ upload_source: upload_source,
522
+ content_type: "application/octet-stream",
523
+ options: {
524
+ timeout_sec: @request_timeout_sec,
525
+ open_timeout_sec: @request_open_timeout_sec,
526
+ }
527
+ }
528
+ )
494
529
  end
495
530
 
496
531
  wait_load(res.job_reference.job_id)
@@ -31,7 +31,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
31
31
  API_SCOPE = "https://www.googleapis.com/auth/bigquery"
32
32
 
33
33
  def create_driver(conf = CONFIG)
34
- Fluent::Test::OutputTestDriver.new(Fluent::BigQueryOutput).configure(conf)
34
+ Fluent::Test::TimeSlicedOutputTestDriver.new(Fluent::BigQueryOutput).configure(conf)
35
35
  end
36
36
 
37
37
  def stub_client(driver)
@@ -64,6 +64,57 @@ class BigQueryOutputTest < Test::Unit::TestCase
64
64
  }
65
65
  end
66
66
 
67
+ def test_configure_default
68
+ driver = create_driver(<<-CONFIG)
69
+ table foo
70
+ email foo@bar.example
71
+ private_key_path /path/to/key
72
+ project yourproject_id
73
+ dataset yourdataset_id
74
+ CONFIG
75
+
76
+ assert { driver.instance.instance_variable_get("@buffer_type") == "lightening" }
77
+ assert { driver.instance.instance_variable_get("@flush_interval") == 0.25 }
78
+ assert { driver.instance.instance_variable_get("@try_flush_interval") == 0.05 }
79
+ assert { driver.instance.instance_variable_get("@buffer").class == Fluent::LighteningBuffer }
80
+ assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_queue_limit") == 1024 }
81
+ assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_chunk_limit") == 1 * 1024 ** 2 }
82
+ end
83
+
84
+ def test_configure_for_load
85
+ driver = create_driver(<<-CONFIG)
86
+ method load
87
+ buffer_path bigquery.*.buffer
88
+ table foo
89
+ email foo@bar.example
90
+ private_key_path /path/to/key
91
+ project yourproject_id
92
+ dataset yourdataset_id
93
+ CONFIG
94
+
95
+ assert { driver.instance.instance_variable_get("@buffer_type") == "file" }
96
+ assert { driver.instance.instance_variable_get("@try_flush_interval") == 1 }
97
+ assert { driver.instance.instance_variable_get("@buffer").class == Fluent::FileBuffer }
98
+ assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_chunk_limit") == 1 * 1024 ** 3 }
99
+ assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_queue_limit") == 64 }
100
+ end
101
+
102
+ def test_configure_for_load_with_parameter
103
+ driver = create_driver(<<-CONFIG)
104
+ method load
105
+ buffer_type memory
106
+ buffer_chunk_limit 100000
107
+ table foo
108
+ email foo@bar.example
109
+ private_key_path /path/to/key
110
+ project yourproject_id
111
+ dataset yourdataset_id
112
+ CONFIG
113
+
114
+ assert { driver.instance.instance_variable_get("@buffer_type") == "memory" }
115
+ assert { driver.instance.instance_variable_get("@buffer").instance_variable_get("@buffer_chunk_limit") == 100000 }
116
+ end
117
+
67
118
  def test_configure_auth_private_key
68
119
  key = stub!
69
120
  mock(Google::APIClient::KeyUtils).load_from_pkcs12('/path/to/key', 'notasecret') { key }
@@ -736,6 +787,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
736
787
 
737
788
  driver = create_driver(<<-CONFIG)
738
789
  method load
790
+ buffer_path bigquery.*.buffer
739
791
  table foo
740
792
  email foo@bar.example
741
793
  private_key_path /path/to/key
@@ -824,6 +876,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
824
876
  entry = {a: "b"}, {b: "c"}
825
877
  driver = create_driver(<<-CONFIG)
826
878
  method load
879
+ buffer_path bigquery.*.buffer
827
880
  table foo
828
881
  email foo@bar.example
829
882
  private_key_path /path/to/key
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery-custom
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomohiro Hashidate
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-09 00:00:00.000000000 Z
11
+ date: 2016-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -72,14 +72,28 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 0.9.pre5
75
+ version: 0.9.3
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 0.9.pre5
82
+ version: 0.9.3
83
+ - !ruby/object:Gem::Dependency
84
+ name: activesupport
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '3.2'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '3.2'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: googleauth
85
99
  requirement: !ruby/object:Gem::Requirement