logstash-output-google_bigquery 4.0.1-java → 4.1.4-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c388bf700fa17c45b9dcf608fba0515280154734c8eac3b9c92eb6d3d8d31446
4
- data.tar.gz: ab5fccb22f53cabd993d333b8c49bbdf3e2dccf5c6e6b27f00fea4a247b339e3
3
+ metadata.gz: 86df4c7f7f48b149718bc39563788f8d26347ad8cf9e71e9e7557dab57241295
4
+ data.tar.gz: 8cd124442f1e4716188205d2bfb8008dcab6a3777c15d6f48cc36ee1da2f784f
5
5
  SHA512:
6
- metadata.gz: 8223e186bd7ebc7300317bf1fafb24351057a90c0edead491025763f63f14dd3b2218185bb1ff70c8656dbbfad3085ee499812f275d2f14e05cd8760f9a48915
7
- data.tar.gz: 635eddd9885c24236600072cd648ba86e9ee69a2366a625b8068fffe142e301be1056f05b74bf9799ccc86da34f190cfac093e773068bc4a1d56ce6aab5da2c1
6
+ metadata.gz: d22203b529cdc2defeba3bf5ede58673620fd4f560080c27849e1e1ca16720acbdbf6c00d83c866a7f31f175d91933259dddfe77264f1f28b21729c4965b12a7
7
+ data.tar.gz: 72043ee381c4844af2cdd8c0d744a95c37c1b42512555aa09eac42755f62139c0a88e894abbe51ee912cbea5fea408146e6872aa6f09c49f0f93538e30187869
@@ -1,3 +1,20 @@
1
+ ## 4.1.4
2
+ - Changed concurrency to :shared and publish outside of synchronized code [#60](https://github.com/logstash-plugins/logstash-output-google_bigquery/pull/60)
3
+
4
+ ## 4.1.3
5
+ - Fixed documentation issue where malformed asciidoc caused text to be lost [#53](https://github.com/logstash-plugins/logstash-output-google_bigquery/pull/53)
6
+
7
+ ## 4.1.2
8
+ - Fixed issue where Logstash shutdown could cause data loss due to not flushing buffers on close [#52](https://github.com/logstash-plugins/logstash-output-google_bigquery/pull/52)
9
+
10
+ ## 4.1.1
11
+ - Fixed inaccuracies in documentation [#46](https://github.com/logstash-plugins/logstash-output-google_bigquery/pull/46)
12
+
13
+ ## 4.1.0
14
+ - Added `skip_invalid_rows` configuration which will insert all valid rows of a BigQuery insert
15
+ and skip any invalid ones.
16
+ - Fixes [#5](https://github.com/logstash-plugins/logstash-output-google_bigquery/issues/5)
17
+
1
18
  ## 4.0.1
2
19
  - Documentation cleanup
3
20
 
data/LICENSE CHANGED
@@ -1,13 +1,202 @@
1
- Copyright (c) 2012-2018 Elasticsearch <http://www.elastic.co>
2
1
 
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
6
5
 
7
- http://www.apache.org/licenses/LICENSE-2.0
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8
7
 
9
- Unless required by applicable law or agreed to in writing, software
10
- distributed under the License is distributed on an "AS IS" BASIS,
11
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- See the License for the specific language governing permissions and
13
- limitations under the License.
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright 2020 Elastic and contributors
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Logstash Plugin
2
2
 
3
- [![Travis Build Status](https://travis-ci.org/logstash-plugins/logstash-output-google_bigquery.svg)](https://travis-ci.org/logstash-plugins/logstash-output-google_bigquery)
3
+ [![Travis Build Status](https://travis-ci.com/logstash-plugins/logstash-output-google_bigquery.svg)](https://travis-ci.com/logstash-plugins/logstash-output-google_bigquery)
4
4
 
5
5
  This is a plugin for [Logstash](https://github.com/elastic/logstash).
6
6
 
@@ -23,25 +23,24 @@ include::{include_path}/plugin_header.asciidoc[]
23
23
 
24
24
  ===== Summary
25
25
 
26
- This plugin uploads events to Google BigQuery using the streaming API
27
- so data can become available nearly immediately.
26
+ This Logstash plugin uploads events to Google BigQuery using the streaming API
27
+ so data can become available to query nearly immediately.
28
28
 
29
29
  You can configure it to flush periodically, after N events or after
30
30
  a certain amount of data is ingested.
31
31
 
32
32
  ===== Environment Configuration
33
33
 
34
- You must enable BigQuery on your Google Cloud Storage (GCS) account and create a dataset to
34
+ You must enable BigQuery on your Google Cloud account and create a dataset to
35
35
  hold the tables this plugin generates.
36
36
 
37
- You must also grant the service account this plugin uses access to
38
- the dataset.
37
+ You must also grant the service account this plugin uses access to the dataset.
39
38
 
40
39
  You can use https://www.elastic.co/guide/en/logstash/current/event-dependent-configuration.html[Logstash conditionals]
41
40
  and multiple configuration blocks to upload events with different structures.
42
41
 
43
42
  ===== Usage
44
- This is an example of logstash config:
43
+ This is an example of Logstash config:
45
44
 
46
45
  [source,ruby]
47
46
  --------------------------
@@ -65,15 +64,18 @@ https://cloud.google.com/docs/authentication/production[Application Default Cred
65
64
 
66
65
  ===== Considerations
67
66
 
68
- * There is a small fee to insert data into BigQuery using the streaming API
67
+ * There is a small fee to insert data into BigQuery using the streaming API.
69
68
  * This plugin buffers events in-memory, so make sure the flush configurations are appropriate
70
69
  for your use-case and consider using
71
- https://www.elastic.co/guide/en/logstash/current/persistent-queues.html[Logstash Persistent Queues]
70
+ https://www.elastic.co/guide/en/logstash/current/persistent-queues.html[Logstash Persistent Queues].
71
+ * Events will be flushed when <<plugins-{type}s-{plugin}-batch_size>>, <<plugins-{type}s-{plugin}-batch_size_bytes>>, or <<plugins-{type}s-{plugin}-flush_interval_secs>> is met, whatever comes first.
72
+ If you notice a delay in your processing or low throughput, try adjusting those settings.
72
73
 
73
74
  ===== Additional Resources
74
75
 
75
76
  * https://cloud.google.com/docs/authentication/production[Application Default Credentials (ADC) Overview]
76
77
  * https://cloud.google.com/bigquery/[BigQuery Introduction]
78
+ * https://cloud.google.com/bigquery/quota[BigQuery Quotas and Limits]
77
79
  * https://cloud.google.com/bigquery/docs/schemas[BigQuery Schema Formats and Types]
78
80
 
79
81
  [id="plugins-{type}s-{plugin}-options"]
@@ -99,6 +101,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
99
101
  | <<plugins-{type}s-{plugin}-key_path>> |<<string,string>>|*Obsolete*
100
102
  | <<plugins-{type}s-{plugin}-project_id>> |<<string,string>>|Yes
101
103
  | <<plugins-{type}s-{plugin}-service_account>> |<<string,string>>|__Deprecated__
104
+ | <<plugins-{type}s-{plugin}-skip_invalid_rows>> |<<boolean,boolean>>|No
102
105
  | <<plugins-{type}s-{plugin}-table_prefix>> |<<string,string>>|No
103
106
  | <<plugins-{type}s-{plugin}-table_separator>> |<<string,string>>|No
104
107
  | <<plugins-{type}s-{plugin}-temp_directory>> |<<string,string>>|__Deprecated__
@@ -119,7 +122,12 @@ added[4.0.0]
119
122
  * Value type is <<number,number>>
120
123
  * Default value is `128`
121
124
 
122
- The number of messages to upload at a single time. (< 1000, default: 128)
125
+ The maximum number of messages to upload at a single time.
126
+ This number must be < 10,000.
127
+ Batching can increase performance and throughput to a point, but at the cost of per-request latency.
128
+ Too few rows per request and the overhead of each request can make ingestion inefficient.
129
+ Too many rows per request and the throughput may drop.
130
+ BigQuery recommends using about 500 rows per request, but experimentation with representative data (schema and data sizes) will help you determine the ideal batch size.
123
131
 
124
132
  [id="plugins-{type}s-{plugin}-batch_size_bytes"]
125
133
  ===== `batch_size_bytes`
@@ -129,10 +137,11 @@ added[4.0.0]
129
137
  * Value type is <<number,number>>
130
138
  * Default value is `1_000_000`
131
139
 
132
- An approximate number of bytes to upload as part of a batch. Default: 1MB
140
+ An approximate number of bytes to upload as part of a batch.
141
+ This number should be < 10MB or inserts may fail.
133
142
 
134
143
  [id="plugins-{type}s-{plugin}-csv_schema"]
135
- ===== `csv_schema`
144
+ ===== `csv_schema`
136
145
 
137
146
  * Value type is <<string,string>>
138
147
  * Default value is `nil`
@@ -141,7 +150,7 @@ Schema for log data. It must follow the format `name1:type1(,name2:type2)*`.
141
150
  For example, `path:STRING,status:INTEGER,score:FLOAT`.
142
151
 
143
152
  [id="plugins-{type}s-{plugin}-dataset"]
144
- ===== `dataset`
153
+ ===== `dataset`
145
154
 
146
155
  * This is a required setting.
147
156
  * Value type is <<string,string>>
@@ -150,7 +159,7 @@ For example, `path:STRING,status:INTEGER,score:FLOAT`.
150
159
  The BigQuery dataset the tables for the events will be added to.
151
160
 
152
161
  [id="plugins-{type}s-{plugin}-date_pattern"]
153
- ===== `date_pattern`
162
+ ===== `date_pattern`
154
163
 
155
164
  * Value type is <<string,string>>
156
165
  * Default value is `"%Y-%m-%dT%H:00"`
@@ -174,6 +183,9 @@ added[4.0.0]
174
183
  * Default value is `"/tmp/bigquery"`.
175
184
 
176
185
  The location to store events that could not be uploaded due to errors.
186
+ By default if _any_ message in an insert is invalid all will fail.
187
+ You can use <<plugins-{type}s-{plugin}-skip_invalid_rows>> to allow partial inserts.
188
+
177
189
  Consider using an additional Logstash input to pipe the contents of
178
190
  these to an alert platform so you can manually fix the events.
179
191
 
@@ -183,15 +195,16 @@ transparently upload to a GCS bucket.
183
195
  Files names follow the pattern `[table name]-[UNIX timestamp].log`
184
196
 
185
197
  [id="plugins-{type}s-{plugin}-flush_interval_secs"]
186
- ===== `flush_interval_secs`
198
+ ===== `flush_interval_secs`
187
199
 
188
200
  * Value type is <<number,number>>
189
201
  * Default value is `5`
190
202
 
191
- Uploads all data this often even if other upload criteria aren't met. Default: 5s
203
+ Uploads all data this often even if other upload criteria aren't met.
204
+
192
205
 
193
206
  [id="plugins-{type}s-{plugin}-ignore_unknown_values"]
194
- ===== `ignore_unknown_values`
207
+ ===== `ignore_unknown_values`
195
208
 
196
209
  * Value type is <<boolean,boolean>>
197
210
  * Default value is `false`
@@ -213,17 +226,17 @@ mutate {
213
226
  [id="plugins-{type}s-{plugin}-json_key_file"]
214
227
  ===== `json_key_file`
215
228
 
216
- added[4.0.0, Replaces <<plugins-{type}s-{plugin}-key_password>>, <<plugins-{type}s-{plugin}-key_path>> and <<plugins-{type}s-{plugin}-service_account>>]
229
+ added[4.0.0, "Replaces <<plugins-{type}s-{plugin}-key_password>>, <<plugins-{type}s-{plugin}-key_path>> and <<plugins-{type}s-{plugin}-service_account>>."]
217
230
 
218
231
  * Value type is <<string,string>>
219
232
  * Default value is `nil`
220
233
 
221
- If logstash is running within Google Compute Engine, the plugin can use
234
+ If Logstash is running within Google Compute Engine, the plugin can use
222
235
  GCE's Application Default Credentials. Outside of GCE, you will need to
223
236
  specify a Service Account JSON key file.
224
237
 
225
238
  [id="plugins-{type}s-{plugin}-json_schema"]
226
- ===== `json_schema`
239
+ ===== `json_schema`
227
240
 
228
241
  * Value type is <<hash,hash>>
229
242
  * Default value is `nil`
@@ -283,7 +296,7 @@ Please use one of the following mechanisms:
283
296
  `gcloud iam service-accounts keys create key.json --iam-account my-sa-123@my-project-123.iam.gserviceaccount.com`
284
297
 
285
298
  [id="plugins-{type}s-{plugin}-project_id"]
286
- ===== `project_id`
299
+ ===== `project_id`
287
300
 
288
301
  * This is a required setting.
289
302
  * Value type is <<string,string>>
@@ -298,8 +311,19 @@ deprecated[4.0.0, Replaced by `json_key_file` or by using ADC. See <<plugins-{ty
298
311
 
299
312
  * Value type is <<string,string>>
300
313
 
314
+ [id="plugins-{type}s-{plugin}-skip_invalid_rows"]
315
+ ===== `skip_invalid_rows`
316
+
317
+ added[4.1.0]
318
+
319
+ * Value type is <<boolean,boolean>>
320
+ * Default value is `false`
321
+
322
+ Insert all valid rows of a request, even if invalid rows exist.
323
+ The default value is false, which causes the entire request to fail if any invalid rows exist.
324
+
301
325
  [id="plugins-{type}s-{plugin}-table_prefix"]
302
- ===== `table_prefix`
326
+ ===== `table_prefix`
303
327
 
304
328
  * Value type is <<string,string>>
305
329
  * Default value is `"logstash"`
@@ -308,7 +332,7 @@ BigQuery table ID prefix to be used when creating new tables for log data.
308
332
  Table name will be `<table_prefix><table_separator><date>`
309
333
 
310
334
  [id="plugins-{type}s-{plugin}-table_separator"]
311
- ===== `table_separator`
335
+ ===== `table_separator`
312
336
 
313
337
  * Value type is <<string,string>>
314
338
  * Default value is `"_"`
@@ -346,4 +370,4 @@ around one hour).
346
370
  [id="plugins-{type}s-{plugin}-common-options"]
347
371
  include::{include_path}/{type}.asciidoc[]
348
372
 
349
- :default_codec!:
373
+ :default_codec!:
@@ -35,6 +35,7 @@ module LogStash
35
35
  def enqueue(message)
36
36
  @lock.write_lock.lock
37
37
 
38
+ orig = nil
38
39
  begin
39
40
  is_flush_request = message.nil?
40
41
 
@@ -49,15 +50,14 @@ module LogStash
49
50
  if is_flush_request || length_met || size_met
50
51
  orig = @batch
51
52
  clear
52
-
53
- yield(orig) if block_given?
54
- return orig
55
53
  end
56
54
 
57
- nil
58
55
  ensure
59
56
  @lock.write_lock.unlock
60
57
  end
58
+
59
+ yield(orig) if block_given? && !orig.nil?
60
+ return orig
61
61
  end
62
62
 
63
63
  # removes all elements from the batch
@@ -36,18 +36,20 @@ module LogStash
36
36
  @bigquery.create table_info
37
37
  end
38
38
 
39
- def append(dataset, table, rows, ignore_unknown)
39
+ def append(dataset, table, rows, ignore_unknown, skip_invalid)
40
40
  api_debug("Appending #{rows.length} rows", dataset, table)
41
41
 
42
- request = build_append_request dataset, table, rows, ignore_unknown
42
+ request = build_append_request(dataset, table, rows, ignore_unknown, skip_invalid)
43
43
 
44
44
  response = @bigquery.insertAll request
45
- return true unless response.hasErrors
45
+ return [] unless response.hasErrors
46
46
 
47
+ failed_rows = []
47
48
  response.getInsertErrors().entrySet().each{ |entry|
48
49
  key = entry.getKey
49
- errors = entry.getValue
50
+ failed_rows << rows[key]
50
51
 
52
+ errors = entry.getValue
51
53
  errors.each{|bqError|
52
54
  @logger.warn('Error while inserting',
53
55
  key: key,
@@ -57,12 +59,13 @@ module LogStash
57
59
  }
58
60
  }
59
61
 
60
- false
62
+ failed_rows
61
63
  end
62
64
 
63
- def build_append_request(dataset, table, rows, ignore_unknown)
65
+ def build_append_request(dataset, table, rows, ignore_unknown, skip_invalid)
64
66
  request = com.google.cloud.bigquery.InsertAllRequest.newBuilder dataset, table
65
67
  request.setIgnoreUnknownValues ignore_unknown
68
+ request.setSkipInvalidRows(skip_invalid)
66
69
 
67
70
  rows.each { |serialized_row|
68
71
  # deserialize rows into Java maps
@@ -75,7 +78,7 @@ module LogStash
75
78
 
76
79
  # raises an exception if the key file is invalid
77
80
  def get_key_file_error(json_key_file)
78
- return nil if json_key_file.nil? || json_key_file == ''
81
+ return nil if nil_or_empty?(json_key_file)
79
82
 
80
83
  abs = ::File.absolute_path json_key_file
81
84
  unless abs == json_key_file
@@ -94,16 +97,10 @@ module LogStash
94
97
  err = get_key_file_error json_key_file
95
98
  raise err unless err.nil?
96
99
 
97
- if json_key_file.nil? || json_key_file.empty?
98
- return com.google.cloud.bigquery.BigQueryOptions.getDefaultInstance().getService()
99
- end
100
-
101
- # TODO: set User-Agent
102
-
103
- key_file = java.io.FileInputStream.new json_key_file
104
- credentials = com.google.auth.oauth2.ServiceAccountCredentials.fromStream key_file
105
- return com.google.cloud.bigquery.BigQueryOptions.newBuilder()
106
- .setCredentials(credentials)
100
+ com.google.cloud.bigquery.BigQueryOptions.newBuilder()
101
+ .setCredentials(credentials(json_key_file))
102
+ .setHeaderProvider(http_headers)
103
+ .setRetrySettings(retry_settings)
107
104
  .setProjectId(project_id)
108
105
  .build()
109
106
  .getService()
@@ -111,9 +108,45 @@ module LogStash
111
108
 
112
109
  private
113
110
 
111
+ java_import 'com.google.auth.oauth2.GoogleCredentials'
112
+ def credentials(json_key_path)
113
+ return GoogleCredentials.getApplicationDefault() if nil_or_empty?(json_key_path)
114
+
115
+ key_file = java.io.FileInputStream.new(json_key_path)
116
+ GoogleCredentials.fromStream(key_file)
117
+ end
118
+
119
+ java_import 'com.google.api.gax.rpc.FixedHeaderProvider'
120
+ def http_headers
121
+ gem_name = 'logstash-output-google_bigquery'
122
+ gem_version = '4.1.0'
123
+ user_agent = "Elastic/#{gem_name} version/#{gem_version}"
124
+
125
+ FixedHeaderProvider.create({ 'User-Agent' => user_agent })
126
+ end
127
+
128
+ java_import 'com.google.api.gax.retrying.RetrySettings'
129
+ java_import 'org.threeten.bp.Duration'
130
+ def retry_settings
131
+ # backoff values taken from com.google.api.client.util.ExponentialBackOff
132
+ RetrySettings.newBuilder()
133
+ .setInitialRetryDelay(Duration.ofMillis(500))
134
+ .setRetryDelayMultiplier(1.5)
135
+ .setMaxRetryDelay(Duration.ofSeconds(60))
136
+ .setInitialRpcTimeout(Duration.ofSeconds(20))
137
+ .setRpcTimeoutMultiplier(1.5)
138
+ .setMaxRpcTimeout(Duration.ofSeconds(20))
139
+ .setTotalTimeout(Duration.ofMinutes(15))
140
+ .build()
141
+ end
142
+
114
143
  def api_debug(message, dataset, table)
115
144
  @logger.debug(message, dataset: dataset, table: table)
116
145
  end
146
+
147
+ def nil_or_empty?(param)
148
+ param.nil? || param.empty?
149
+ end
117
150
  end
118
151
  end
119
152
  end
@@ -7,6 +7,7 @@ require 'logstash/outputs/bigquery/schema'
7
7
 
8
8
  require 'time'
9
9
  require 'fileutils'
10
+ require 'concurrent'
10
11
 
11
12
  #
12
13
  # === Summary
@@ -67,7 +68,7 @@ require 'fileutils'
67
68
  class LogStash::Outputs::GoogleBigQuery < LogStash::Outputs::Base
68
69
  config_name 'google_bigquery'
69
70
 
70
- concurrency :single
71
+ concurrency :shared
71
72
 
72
73
  # Google Cloud Project ID (number, not Project Name!).
73
74
  config :project_id, validate: :string, required: true
@@ -160,6 +161,10 @@ class LogStash::Outputs::GoogleBigQuery < LogStash::Outputs::Base
160
161
  # Files names follow the pattern `[table name]-[UNIX timestamp].log`
161
162
  config :error_directory, validate: :string, required: true, default: '/tmp/bigquery_errors'
162
163
 
164
+ # Insert all valid rows of a request, even if invalid rows exist. The default value is false,
165
+ # which causes the entire request to fail if any invalid rows exist.
166
+ config :skip_invalid_rows, validate: :boolean, default: false
167
+
163
168
  # The following configuration options still exist to alert users that are using them
164
169
  config :uploader_interval_secs, validate: :number, deprecated: 'No longer used.'
165
170
  config :deleter_interval_secs, validate: :number, deprecated: 'No longer used.'
@@ -177,6 +182,7 @@ class LogStash::Outputs::GoogleBigQuery < LogStash::Outputs::Base
177
182
  @schema = LogStash::Outputs::BigQuery::Schema.parse_csv_or_json @csv_schema, @json_schema
178
183
  @bq_client = LogStash::Outputs::BigQuery::StreamingClient.new @json_key_file, @project_id, @logger
179
184
  @batcher = LogStash::Outputs::BigQuery::Batcher.new @batch_size, @batch_size_bytes
185
+ @stopping = Concurrent::AtomicBoolean.new(false)
180
186
 
181
187
  init_batcher_flush_thread
182
188
  end
@@ -232,8 +238,8 @@ class LogStash::Outputs::GoogleBigQuery < LogStash::Outputs::Base
232
238
 
233
239
  create_table_if_not_exists table
234
240
 
235
- successful = @bq_client.append @dataset, table, messages, @ignore_unknown_values
236
- write_to_errors_file(messages, table) unless successful
241
+ failed_rows = @bq_client.append(@dataset, table, messages, @ignore_unknown_values, @skip_invalid_rows)
242
+ write_to_errors_file(failed_rows, table) unless failed_rows.empty?
237
243
  rescue StandardError => e
238
244
  @logger.error 'Error uploading data.', :exception => e
239
245
 
@@ -270,11 +276,24 @@ class LogStash::Outputs::GoogleBigQuery < LogStash::Outputs::Base
270
276
 
271
277
  def init_batcher_flush_thread
272
278
  @flush_thread = Thread.new do
273
- loop do
274
- sleep @flush_interval_secs
279
+ until stopping?
280
+ Stud.stoppable_sleep(@flush_interval_secs) { stopping? }
275
281
 
276
282
  @batcher.enqueue(nil) { |batch| publish(batch) }
277
283
  end
278
284
  end
279
285
  end
286
+
287
+ def stopping?
288
+ @stopping.value
289
+ end
290
+
291
+ def close
292
+ @stopping.make_true
293
+ @flush_thread.wakeup
294
+ @flush_thread.join
295
+ # Final flush to publish any events published if a pipeline receives a shutdown signal after flush thread
296
+ # has begun flushing.
297
+ @batcher.enqueue(nil) { |batch| publish(batch) }
298
+ end
280
299
  end
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-output-google_bigquery'
3
- s.version = '4.0.1'
3
+ s.version = '4.1.4'
4
4
  s.licenses = ['Apache License (2.0)']
5
5
  s.summary = "Writes events to Google BigQuery"
6
6
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -0,0 +1,8 @@
1
+ {
2
+ "//": "Dummy account from https://github.com/GoogleCloudPlatform/google-cloud-java/google-cloud-clients/google-cloud-core/src/test/java/com/google/cloud/ServiceOptionsTest.java",
3
+ "private_key_id": "somekeyid",
4
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC+K2hSuFpAdrJI\nnCgcDz2M7t7bjdlsadsasad+fvRSW6TjNQZ3p5LLQY1kSZRqBqylRkzteMOyHgaR\n0Pmxh3ILCND5men43j3h4eDbrhQBuxfEMalkG92sL+PNQSETY2tnvXryOvmBRwa/\nQP/9dJfIkIDJ9Fw9N4Bhhhp6mCcRpdQjV38H7JsyJ7lih/oNjECgYAt\nknddadwkwewcVxHFhcZJO+XWf6ofLUXpRwiTZakGMn8EE1uVa2LgczOjwWHGi99MFjxSer5m9\n1tCa3/KEGKiS/YL71JvjwX3mb+cewlkcmweBKZHM2JPTk0ZednFSpVZMtycjkbLa\ndYOS8V85AgMBewECggEBAKksaldajfDZDV6nGqbFjMiizAKJolr/M3OQw16K6o3/\n0S31xIe3sSlgW0+UbYlF4U8KifhManD1apVSC3csafaspP4RZUHFhtBywLO9pR5c\nr6S5aLp+gPWFyIp1pfXbWGvc5VY/v9x7ya1VEa6rXvLsKupSeWAW4tMj3eo/64ge\nsdaceaLYw52KeBYiT6+vpsnYrEkAHO1fF/LavbLLOFJmFTMxmsNaG0tuiJHgjshB\n82DpMCbXG9YcCgI/DbzuIjsdj2JC1cascSP//3PmefWysucBQe7Jryb6NQtASmnv\nCdDw/0jmZTEjpe4S1lxfHplAhHFtdgYTvyYtaLZiVVkCgYEA8eVpof2rceecw/I6\n5ng1q3Hl2usdWV/4mZMvR0fOemacLLfocX6IYxT1zA1FFJlbXSRsJMf/Qq39mOR2\nSpW+hr4jCoHeRVYLgsbggtrevGmILAlNoqCMpGZ6vDmJpq6ECV9olliDvpPgWOP+\nmYPDreFBGxWvQrADNbRt2dmGsrsCgYEAyUHqB2wvJHFqdmeBsaacewzV8x9WgmeX\ngUIi9REwXlGDW0Mz50dxpxcKCAYn65+7TCnY5O/jmL0VRxU1J2mSWyWTo1C+17L0\n3fUqjxL1pkefwecxwecvC+gFFYdJ4CQ/MHHXU81Lwl1iWdFCd2UoGddYaOF+KNeM\nHC7cmqra+JsCgYEAlUNywzq8nUg7282E+uICfCB0LfwejuymR93CtsFgb7cRd6ak\nECR8FGfCpH8ruWJINllbQfcHVCX47ndLZwqv3oVFKh6pAS/vVI4dpOepP8++7y1u\ncoOvtreXCX6XqfrWDtKIvv0vjlHBhhhp6mCcRpdQjV38H7JsyJ7lih/oNjECgYAt\nkndj5uNl5SiuVxHFhcZJO+XWf6ofLUregtevZakGMn8EE1uVa2AY7eafmoU/nZPT\n00YB0TBATdCbn/nBSuKDESkhSg9s2GEKQZG5hBmL5uCMfo09z3SfxZIhJdlerreP\nJ7gSidI12N+EZxYd4xIJh/HFDgp7RRO87f+WJkofMQKBgGTnClK1VMaCRbJZPriw\nEfeFCoOX75MxKwXs6xgrw4W//AYGGUjDt83lD6AZP6tws7gJ2IwY/qP7+lyhjEqN\nHtfPZRGFkGZsdaksdlaksd323423d+15/UvrlRSFPNj1tWQmNKkXyRDW4IG1Oa2p\nrALStNBx5Y9t0/LQnFI4w3aG\n-----END PRIVATE KEY-----\n",
5
+ "client_email": "someclientid@developer.gserviceaccount.com",
6
+ "client_id": "someclientid.apps.googleusercontent.com",
7
+ "type": "service_account"
8
+ }
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ require 'logstash/outputs/bigquery/streamclient'
4
+
5
+ describe LogStash::Outputs::BigQuery::StreamingClient do
6
+
7
+ # This test is mostly to make sure the Java types, signatures and classes
8
+ # haven't changed being that JRuby is very relaxed.
9
+ describe '#initialize' do
10
+ let(:logger) { spy('logger') }
11
+
12
+ it 'does not throw an error when initializing' do
13
+ key_file = ::File.join('spec', 'fixtures', 'credentials.json')
14
+ key_file = ::File.absolute_path(key_file)
15
+ LogStash::Outputs::BigQuery::StreamingClient.new(key_file, 'my-project', logger)
16
+ end
17
+ end
18
+ end
@@ -74,7 +74,7 @@ describe LogStash::Outputs::GoogleBigQuery do
74
74
 
75
75
  it 'creates a table if it does not exist' do
76
76
  allow(subject).to receive(:create_table_if_not_exists).and_return(nil)
77
- allow(bq_client).to receive(:append).and_return(true)
77
+ allow(bq_client).to receive(:append).and_return([])
78
78
  allow(subject).to receive(:write_to_errors_file).and_return(nil)
79
79
  expect(subject).to receive(:create_table_if_not_exists)
80
80
 
@@ -83,7 +83,7 @@ describe LogStash::Outputs::GoogleBigQuery do
83
83
 
84
84
  it 'writes rows to a file on failed insert' do
85
85
  allow(subject).to receive(:create_table_if_not_exists).and_return(nil)
86
- allow(bq_client).to receive(:append).and_return(false)
86
+ allow(bq_client).to receive(:append).and_return([0])
87
87
  allow(subject).to receive(:write_to_errors_file).and_return(nil)
88
88
  expect(subject).to receive(:write_to_errors_file)
89
89
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-google_bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.1
4
+ version: 4.1.4
5
5
  platform: java
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-05-03 00:00:00.000000000 Z
11
+ date: 2021-01-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -17,8 +17,8 @@ dependencies:
17
17
  - !ruby/object:Gem::Version
18
18
  version: '0'
19
19
  name: logstash-codec-plain
20
- prerelease: false
21
20
  type: :runtime
21
+ prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
@@ -31,8 +31,8 @@ dependencies:
31
31
  - !ruby/object:Gem::Version
32
32
  version: '2'
33
33
  name: mime-types
34
- prerelease: false
35
34
  type: :runtime
35
+ prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
@@ -48,8 +48,8 @@ dependencies:
48
48
  - !ruby/object:Gem::Version
49
49
  version: '2.99'
50
50
  name: logstash-core-plugin-api
51
- prerelease: false
52
51
  type: :runtime
52
+ prerelease: false
53
53
  version_requirements: !ruby/object:Gem::Requirement
54
54
  requirements:
55
55
  - - ">="
@@ -65,8 +65,8 @@ dependencies:
65
65
  - !ruby/object:Gem::Version
66
66
  version: '0'
67
67
  name: logstash-devutils
68
- prerelease: false
69
68
  type: :development
69
+ prerelease: false
70
70
  version_requirements: !ruby/object:Gem::Requirement
71
71
  requirements:
72
72
  - - ">="
@@ -79,8 +79,8 @@ dependencies:
79
79
  - !ruby/object:Gem::Version
80
80
  version: 0.3.4
81
81
  name: jar-dependencies
82
- prerelease: false
83
82
  type: :development
83
+ prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
86
  - - "~>"
@@ -107,8 +107,10 @@ files:
107
107
  - lib/logstash/outputs/bigquery/streamclient.rb
108
108
  - lib/logstash/outputs/google_bigquery.rb
109
109
  - logstash-output-google_bigquery.gemspec
110
+ - spec/fixtures/credentials.json
110
111
  - spec/outputs/bigquery/batcher_spec.rb
111
112
  - spec/outputs/bigquery/schema_spec.rb
113
+ - spec/outputs/bigquery/streamclient_spec.rb
112
114
  - spec/outputs/google_bigquery_spec.rb
113
115
  - vendor/jar-dependencies/com/fasterxml/jackson/core/jackson-core/2.1.3/jackson-core-2.1.3.jar
114
116
  - vendor/jar-dependencies/com/google/api-client/google-api-client/1.23.0/google-api-client-1.23.0.jar
@@ -167,12 +169,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
167
169
  - !ruby/object:Gem::Version
168
170
  version: '0'
169
171
  requirements: []
170
- rubyforge_project:
171
- rubygems_version: 2.6.13
172
+ rubygems_version: 3.0.6
172
173
  signing_key:
173
174
  specification_version: 4
174
175
  summary: Writes events to Google BigQuery
175
176
  test_files:
177
+ - spec/fixtures/credentials.json
176
178
  - spec/outputs/bigquery/batcher_spec.rb
177
179
  - spec/outputs/bigquery/schema_spec.rb
180
+ - spec/outputs/bigquery/streamclient_spec.rb
178
181
  - spec/outputs/google_bigquery_spec.rb