logstash-input-s3 3.4.1 → 3.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 71fa5a89a8d744412ab8ac15f60f4d7e59ff2800de976466450de5142962285e
4
- data.tar.gz: 795c0caf4c280c90d4193aacacd8b09d64f33637859b3eda35349366862be9ba
3
+ metadata.gz: a6d9ab89a4d7925dbaaa02b021b1bbe803426a5c2e5285c1239d72950563fc27
4
+ data.tar.gz: 40aafdb8002e940fcc08f72d119299567dda77210dedcaf436df9a273858ecf1
5
5
  SHA512:
6
- metadata.gz: 27be2ecb1234ba44fb4004b0a972d9cb643e9429df468d1777f189f3f207ce849d95b5655077343960edf2c4817254d4eb5ff9fb73c87afb99e0ce35c64e0f38
7
- data.tar.gz: 243140b50837ed67fe8e30997f560e41cc66b7f9da3fd4c6668bb6345cedd911ed9d18c5deb07c594bd24a8caa6c4efc11a3c500e66656e782343b05405d40c6
6
+ metadata.gz: 12730fa07325e2549ac32c8b7a629464c3a1d789992c5e07bd3f1bf43ad11b353886882678d00c27a14a7e0e675eaaa4002187f5141d078308de8e3e480d67d3
7
+ data.tar.gz: 24c61bb4d995ef2615cf9ea769ad36d88965eb758e8a9d205394a887ca6f1cac47211e76c42c81e8499f1ab818e072ee50e52431de045e1018402af947f61df3
@@ -1,3 +1,8 @@
1
+ ## 3.5.0
2
+ - Added support for including objects restored from Glacier or Glacier Deep [#199](https://github.com/logstash-plugins/logstash-input-s3/issues/199)
3
+ - Added `gzip_pattern` option, enabling more flexible determination of whether a file is gzipped [#165](https://github.com/logstash-plugins/logstash-input-s3/issues/165)
4
+ - Refactor: log exception: class + unify logging messages a bit [#201](https://github.com/logstash-plugins/logstash-input-s3/pull/201)
5
+
1
6
  ## 3.4.1
2
7
  - Fixed link formatting for input type (documentation)
3
8
 
data/LICENSE CHANGED
@@ -1,13 +1,202 @@
1
- Copyright (c) 2012-2018 Elasticsearch <http://www.elastic.co>
2
1
 
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
6
5
 
7
- http://www.apache.org/licenses/LICENSE-2.0
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8
7
 
9
- Unless required by applicable law or agreed to in writing, software
10
- distributed under the License is distributed on an "AS IS" BASIS,
11
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- See the License for the specific language governing permissions and
13
- limitations under the License.
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright 2020 Elastic and contributors
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
data/README.md CHANGED
@@ -38,7 +38,7 @@ Need help? Try #logstash on freenode IRC or the https://discuss.elastic.co/c/log
38
38
 
39
39
  ## Developing
40
40
 
41
- ### 1. Plugin Developement and Testing
41
+ ### 1. Plugin Development and Testing
42
42
 
43
43
  #### Code
44
44
  - To get started, you'll need JRuby with the Bundler gem installed.
@@ -46,6 +46,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
46
46
  | <<plugins-{type}s-{plugin}-delete>> |<<boolean,boolean>>|No
47
47
  | <<plugins-{type}s-{plugin}-endpoint>> |<<string,string>>|No
48
48
  | <<plugins-{type}s-{plugin}-exclude_pattern>> |<<string,string>>|No
49
+ | <<plugins-{type}s-{plugin}-gzip_pattern>> |<<string,string>>|No
49
50
  | <<plugins-{type}s-{plugin}-include_object_properties>> |<<boolean,boolean>>|No
50
51
  | <<plugins-{type}s-{plugin}-interval>> |<<number,number>>|No
51
52
  | <<plugins-{type}s-{plugin}-prefix>> |<<string,string>>|No
@@ -158,6 +159,14 @@ guaranteed to work correctly with the AWS SDK.
158
159
 
159
160
  Ruby style regexp of keys to exclude from the bucket
160
161
 
162
+ [id="plugins-{type}s-{plugin}-gzip_pattern"]
163
+ ===== `gzip_pattern`
164
+
165
+ * Value type is <<string,string>>
166
+ * Default value is `"\.gz(ip)?$"`
167
+
168
+ Regular expression used to determine whether an input file is in gzip format.
169
+
161
170
  [id="plugins-{type}s-{plugin}-additional_settings"]
162
171
  ===== `additional_settings`
163
172
 
@@ -187,7 +196,7 @@ the connection to s3. See full list in https://docs.aws.amazon.com/sdkforruby/ap
187
196
  * Default value is `false`
188
197
 
189
198
  Whether or not to include the S3 object's properties (last_modified, content_type, metadata) into each Event at
190
- `[@metadata][s3]`. Regardless of this setting, `[@metdata][s3][key]` will always be present.
199
+ `[@metadata][s3]`. Regardless of this setting, `[@metadata][s3][key]` will always be present.
191
200
 
192
201
  [id="plugins-{type}s-{plugin}-interval"]
193
202
  ===== `interval`
@@ -3,6 +3,7 @@ require "logstash/inputs/base"
3
3
  require "logstash/namespace"
4
4
  require "logstash/plugin_mixins/aws_config"
5
5
  require "time"
6
+ require "date"
6
7
  require "tmpdir"
7
8
  require "stud/interval"
8
9
  require "stud/temporary"
@@ -10,12 +11,6 @@ require "aws-sdk"
10
11
  require "logstash/inputs/s3/patch"
11
12
 
12
13
  require 'java'
13
- java_import java.io.InputStream
14
- java_import java.io.InputStreamReader
15
- java_import java.io.FileInputStream
16
- java_import java.io.BufferedReader
17
- java_import java.util.zip.GZIPInputStream
18
- java_import java.util.zip.ZipException
19
14
 
20
15
  Aws.eager_autoload!
21
16
  # Stream events from files from a S3 bucket.
@@ -23,6 +18,14 @@ Aws.eager_autoload!
23
18
  # Each line from each file generates an event.
24
19
  # Files ending in `.gz` are handled as gzip'ed files.
25
20
  class LogStash::Inputs::S3 < LogStash::Inputs::Base
21
+
22
+ java_import java.io.InputStream
23
+ java_import java.io.InputStreamReader
24
+ java_import java.io.FileInputStream
25
+ java_import java.io.BufferedReader
26
+ java_import java.util.zip.GZIPInputStream
27
+ java_import java.util.zip.ZipException
28
+
26
29
  include LogStash::PluginMixins::AwsConfig::V2
27
30
 
28
31
  config_name "s3"
@@ -63,7 +66,7 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
63
66
  # Value is in seconds.
64
67
  config :interval, :validate => :number, :default => 60
65
68
 
66
- # Whether to watch for new files with the interval.
69
+ # Whether to watch for new files with the interval.
67
70
  # If false, overrides any interval and only lists the s3 bucket once.
68
71
  config :watch_for_new_files, :validate => :boolean, :default => true
69
72
 
@@ -79,13 +82,16 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
79
82
  # be present.
80
83
  config :include_object_properties, :validate => :boolean, :default => false
81
84
 
82
- public
85
+ # Regular expression used to determine whether an input file is in gzip format.
86
+ # default to an expression that matches *.gz and *.gzip file extensions
87
+ config :gzip_pattern, :validate => :string, :default => "\.gz(ip)?$"
88
+
83
89
  def register
84
90
  require "fileutils"
85
91
  require "digest/md5"
86
92
  require "aws-sdk-resources"
87
93
 
88
- @logger.info("Registering s3 input", :bucket => @bucket, :region => @region)
94
+ @logger.info("Registering", :bucket => @bucket, :region => @region)
89
95
 
90
96
  s3 = get_s3object
91
97
 
@@ -111,7 +117,6 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
111
117
  end
112
118
  end
113
119
 
114
- public
115
120
  def run(queue)
116
121
  @current_thread = Thread.current
117
122
  Stud.interval(@interval) do
@@ -120,36 +125,33 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
120
125
  end
121
126
  end # def run
122
127
 
123
- public
124
128
  def list_new_files
125
129
  objects = {}
126
130
  found = false
127
131
  begin
128
132
  @s3bucket.objects(:prefix => @prefix).each do |log|
129
133
  found = true
130
- @logger.debug("S3 input: Found key", :key => log.key)
134
+ @logger.debug('Found key', :key => log.key)
131
135
  if ignore_filename?(log.key)
132
- @logger.debug('S3 input: Ignoring', :key => log.key)
136
+ @logger.debug('Ignoring', :key => log.key)
133
137
  elsif log.content_length <= 0
134
- @logger.debug('S3 Input: Object Zero Length', :key => log.key)
138
+ @logger.debug('Object Zero Length', :key => log.key)
135
139
  elsif !sincedb.newer?(log.last_modified)
136
- @logger.debug('S3 Input: Object Not Modified', :key => log.key)
137
- elsif log.storage_class.start_with?('GLACIER')
138
- @logger.debug('S3 Input: Object Archived to Glacier', :key => log.key)
140
+ @logger.debug('Object Not Modified', :key => log.key)
141
+ elsif (log.storage_class == 'GLACIER' || log.storage_class == 'DEEP_ARCHIVE') && !file_restored?(log.object)
142
+ @logger.debug('Object Archived to Glacier', :key => log.key)
139
143
  else
140
144
  objects[log.key] = log.last_modified
141
- @logger.debug("S3 input: Adding to objects[]", :key => log.key)
142
- @logger.debug("objects[] length is: ", :length => objects.length)
145
+ @logger.debug("Added to objects[]", :key => log.key, :length => objects.length)
143
146
  end
144
147
  end
145
- @logger.info('S3 input: No files found in bucket', :prefix => prefix) unless found
148
+ @logger.info('No files found in bucket', :prefix => prefix) unless found
146
149
  rescue Aws::Errors::ServiceError => e
147
- @logger.error("S3 input: Unable to list objects in bucket", :prefix => prefix, :message => e.message)
150
+ @logger.error("Unable to list objects in bucket", :exception => e.class, :message => e.message, :backtrace => e.backtrace, :prefix => prefix)
148
151
  end
149
152
  objects.keys.sort {|a,b| objects[a] <=> objects[b]}
150
153
  end # def fetch_new_files
151
154
 
152
- public
153
155
  def backup_to_bucket(object)
154
156
  unless @backup_to_bucket.nil?
155
157
  backup_key = "#{@backup_add_prefix}#{object.key}"
@@ -160,14 +162,12 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
160
162
  end
161
163
  end
162
164
 
163
- public
164
165
  def backup_to_dir(filename)
165
166
  unless @backup_to_dir.nil?
166
167
  FileUtils.cp(filename, @backup_to_dir)
167
168
  end
168
169
  end
169
170
 
170
- public
171
171
  def process_files(queue)
172
172
  objects = list_new_files
173
173
 
@@ -175,13 +175,11 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
175
175
  if stop?
176
176
  break
177
177
  else
178
- @logger.debug("S3 input processing", :bucket => @bucket, :key => key)
179
178
  process_log(queue, key)
180
179
  end
181
180
  end
182
181
  end # def process_files
183
182
 
184
- public
185
183
  def stop
186
184
  # @current_thread is initialized in the `#run` method,
187
185
  # this variable is needed because the `#stop` is a called in another thread
@@ -248,24 +246,20 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
248
246
  return true
249
247
  end # def process_local_log
250
248
 
251
- private
252
249
  def event_is_metadata?(event)
253
250
  return false unless event.get("message").class == String
254
251
  line = event.get("message")
255
252
  version_metadata?(line) || fields_metadata?(line)
256
253
  end
257
254
 
258
- private
259
255
  def version_metadata?(line)
260
256
  line.start_with?('#Version: ')
261
257
  end
262
258
 
263
- private
264
259
  def fields_metadata?(line)
265
260
  line.start_with?('#Fields: ')
266
261
  end
267
262
 
268
- private
269
263
  def update_metadata(metadata, event)
270
264
  line = event.get('message').strip
271
265
 
@@ -278,7 +272,6 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
278
272
  end
279
273
  end
280
274
 
281
- private
282
275
  def read_file(filename, &block)
283
276
  if gzip?(filename)
284
277
  read_gzip_file(filename, block)
@@ -287,7 +280,7 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
287
280
  end
288
281
  rescue => e
289
282
  # skip any broken file
290
- @logger.error("Failed to read the file. Skip processing.", :filename => filename, :exception => e.message)
283
+ @logger.error("Failed to read file, processing skipped", :exception => e.class, :message => e.message, :filename => filename)
291
284
  end
292
285
 
293
286
  def read_plain_file(filename, block)
@@ -296,7 +289,6 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
296
289
  end
297
290
  end
298
291
 
299
- private
300
292
  def read_gzip_file(filename, block)
301
293
  file_stream = FileInputStream.new(filename)
302
294
  gzip_stream = GZIPInputStream.new(file_stream)
@@ -313,24 +305,20 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
313
305
  file_stream.close unless file_stream.nil?
314
306
  end
315
307
 
316
- private
317
308
  def gzip?(filename)
318
- filename.end_with?('.gz','.gzip')
309
+ Regexp.new(@gzip_pattern).match(filename)
319
310
  end
320
-
321
- private
322
- def sincedb
311
+
312
+ def sincedb
323
313
  @sincedb ||= if @sincedb_path.nil?
324
314
  @logger.info("Using default generated file for the sincedb", :filename => sincedb_file)
325
315
  SinceDB::File.new(sincedb_file)
326
316
  else
327
- @logger.info("Using the provided sincedb_path",
328
- :sincedb_path => @sincedb_path)
317
+ @logger.info("Using the provided sincedb_path", :sincedb_path => @sincedb_path)
329
318
  SinceDB::File.new(@sincedb_path)
330
319
  end
331
320
  end
332
321
 
333
- private
334
322
  def sincedb_file
335
323
  digest = Digest::MD5.hexdigest("#{@bucket}+#{@prefix}")
336
324
  dir = File.join(LogStash::SETTINGS.get_value("path.data"), "plugins", "inputs", "s3")
@@ -363,11 +351,6 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
363
351
  symbolized
364
352
  end
365
353
 
366
- private
367
- def old_sincedb_file
368
- end
369
-
370
- private
371
354
  def ignore_filename?(filename)
372
355
  if @prefix == filename
373
356
  return true
@@ -384,8 +367,8 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
384
367
  end
385
368
  end
386
369
 
387
- private
388
370
  def process_log(queue, key)
371
+ @logger.debug("Processing", :bucket => @bucket, :key => key)
389
372
  object = @s3bucket.object(key)
390
373
 
391
374
  filename = File.join(temporary_directory, File.basename(key))
@@ -403,7 +386,6 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
403
386
  end
404
387
  end
405
388
 
406
- private
407
389
  # Stream the remove file to the local disk
408
390
  #
409
391
  # @param [S3Object] Reference to the remove S3 objec to download
@@ -411,33 +393,48 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
411
393
  # @return [Boolean] True if the file was completely downloaded
412
394
  def download_remote_file(remote_object, local_filename)
413
395
  completed = false
414
- @logger.debug("S3 input: Download remote file", :remote_key => remote_object.key, :local_filename => local_filename)
396
+ @logger.debug("Downloading remote file", :remote_key => remote_object.key, :local_filename => local_filename)
415
397
  File.open(local_filename, 'wb') do |s3file|
416
398
  return completed if stop?
417
399
  begin
418
400
  remote_object.get(:response_target => s3file)
419
401
  completed = true
420
402
  rescue Aws::Errors::ServiceError => e
421
- @logger.warn("S3 input: Unable to download remote file", :remote_key => remote_object.key, :message => e.message)
403
+ @logger.warn("Unable to download remote file", :exception => e.class, :message => e.message, :remote_key => remote_object.key)
422
404
  end
423
405
  end
424
406
  completed
425
407
  end
426
408
 
427
- private
428
409
  def delete_file_from_bucket(object)
429
410
  if @delete and @backup_to_bucket.nil?
430
411
  object.delete()
431
412
  end
432
413
  end
433
414
 
434
- private
435
415
  def get_s3object
436
416
  options = symbolized_settings.merge(aws_options_hash || {})
437
417
  s3 = Aws::S3::Resource.new(options)
438
418
  end
439
419
 
440
- private
420
+ def file_restored?(object)
421
+ begin
422
+ restore = object.data.restore
423
+ if restore && restore.match(/ongoing-request\s?=\s?["']false["']/)
424
+ if restore = restore.match(/expiry-date\s?=\s?["'](.*?)["']/)
425
+ expiry_date = DateTime.parse(restore[1])
426
+ return true if DateTime.now < expiry_date # restored
427
+ else
428
+ @logger.debug("No expiry-date header for restore request: #{object.data.restore}")
429
+ return nil # no expiry-date found for ongoing request
430
+ end
431
+ end
432
+ rescue => e
433
+ @logger.debug("Could not determine Glacier restore status", :exception => e.class, :message => e.message)
434
+ end
435
+ return false
436
+ end
437
+
441
438
  module SinceDB
442
439
  class File
443
440
  def initialize(file)
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-input-s3'
4
- s.version = '3.4.1'
4
+ s.version = '3.5.0'
5
5
  s.licenses = ['Apache-2.0']
6
6
  s.summary = "Streams events from files in a S3 bucket"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -1,5 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/devutils/rspec/shared_examples"
3
4
  require "logstash/inputs/s3"
4
5
  require "logstash/codecs/multiline"
5
6
  require "logstash/errors"
@@ -114,13 +115,18 @@ describe LogStash::Inputs::S3 do
114
115
  describe "#list_new_files" do
115
116
  before { allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list } }
116
117
 
117
- let!(:present_object) { double(:key => 'this-should-be-present', :last_modified => Time.now, :content_length => 10, :storage_class => 'STANDARD') }
118
- let!(:archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now, :content_length => 10, :storage_class => 'GLACIER') }
118
+ let!(:present_object) {double(:key => 'this-should-be-present', :last_modified => Time.now, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
119
+ let!(:archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => nil)) ) }
120
+ let!(:deep_archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => nil)) ) }
121
+ let!(:restored_object) {double(:key => 'this-should-be-restored-from-archive', :last_modified => Time.now, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => 'ongoing-request="false", expiry-date="Thu, 01 Jan 2099 00:00:00 GMT"')) ) }
122
+ let!(:deep_restored_object) {double(:key => 'this-should-be-restored-from-deep-archive', :last_modified => Time.now, :content_length => 10, :storage_class => 'DEEP_ARCHIVE', :object => double(:data => double(:restore => 'ongoing-request="false", expiry-date="Thu, 01 Jan 2099 00:00:00 GMT"')) ) }
119
123
  let(:objects_list) {
120
124
  [
121
125
  double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
122
126
  double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
123
127
  archived_object,
128
+ restored_object,
129
+ deep_restored_object,
124
130
  present_object
125
131
  ]
126
132
  }
@@ -131,10 +137,13 @@ describe LogStash::Inputs::S3 do
131
137
 
132
138
  files = plugin.list_new_files
133
139
  expect(files).to include(present_object.key)
140
+ expect(files).to include(restored_object.key)
141
+ expect(files).to include(deep_restored_object.key)
134
142
  expect(files).to_not include('exclude-this-file-1') # matches exclude pattern
135
143
  expect(files).to_not include('exclude/logstash') # matches exclude pattern
136
144
  expect(files).to_not include(archived_object.key) # archived
137
- expect(files.size).to eq(1)
145
+ expect(files).to_not include(deep_archived_object.key) # archived
146
+ expect(files.size).to eq(3)
138
147
  end
139
148
 
140
149
  it 'should support not providing a exclude pattern' do
@@ -143,10 +152,13 @@ describe LogStash::Inputs::S3 do
143
152
 
144
153
  files = plugin.list_new_files
145
154
  expect(files).to include(present_object.key)
155
+ expect(files).to include(restored_object.key)
156
+ expect(files).to include(deep_restored_object.key)
146
157
  expect(files).to include('exclude-this-file-1') # no exclude pattern given
147
158
  expect(files).to include('exclude/logstash') # no exclude pattern given
148
159
  expect(files).to_not include(archived_object.key) # archived
149
- expect(files.size).to eq(3)
160
+ expect(files).to_not include(deep_archived_object.key) # archived
161
+ expect(files.size).to eq(5)
150
162
  end
151
163
 
152
164
  context 'when all files are excluded from a bucket' do
@@ -208,10 +220,13 @@ describe LogStash::Inputs::S3 do
208
220
 
209
221
  files = plugin.list_new_files
210
222
  expect(files).to include(present_object.key)
223
+ expect(files).to include(restored_object.key)
224
+ expect(files).to include(deep_restored_object.key)
211
225
  expect(files).to_not include('exclude-this-file-1') # too old
212
226
  expect(files).to_not include('exclude/logstash') # too old
213
227
  expect(files).to_not include(archived_object.key) # archived
214
- expect(files.size).to eq(1)
228
+ expect(files).to_not include(deep_archived_object.key) # archived
229
+ expect(files.size).to eq(3)
215
230
  end
216
231
 
217
232
  it 'should ignore file if the file match the prefix' do
@@ -301,7 +316,7 @@ describe LogStash::Inputs::S3 do
301
316
  it 'should process events' do
302
317
  events = fetch_events(config)
303
318
  expect(events.size).to eq(events_to_process)
304
- insist { events[0].get("[@metadata][s3][key]") } == log.key
319
+ expect(events[0].get("[@metadata][s3][key]")).to eql log.key
305
320
  end
306
321
 
307
322
  it "deletes the temporary file" do
@@ -420,7 +435,7 @@ describe LogStash::Inputs::S3 do
420
435
  let(:events_to_process) { 16 }
421
436
  end
422
437
  end
423
-
438
+
424
439
  context 'compressed' do
425
440
  let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
426
441
  let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gz') }
@@ -428,13 +443,20 @@ describe LogStash::Inputs::S3 do
428
443
  include_examples "generated events"
429
444
  end
430
445
 
431
- context 'compressed with gzip extension' do
446
+ context 'compressed with gzip extension and using default gzip_pattern option' do
432
447
  let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
433
448
  let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gzip') }
434
449
 
435
450
  include_examples "generated events"
436
451
  end
437
452
 
453
+ context 'compressed with gzip extension and using custom gzip_pattern option' do
454
+ let(:config) { super.merge({ "gzip_pattern" => "gee.zip$" }) }
455
+ let(:log) { double(:key => 'log.gee.zip', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
456
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gee.zip') }
457
+ include_examples "generated events"
458
+ end
459
+
438
460
  context 'plain text' do
439
461
  let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
440
462
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.4.1
4
+ version: 3.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-14 00:00:00.000000000 Z
11
+ date: 2020-03-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -119,6 +119,7 @@ files:
119
119
  - lib/logstash/inputs/s3/patch.rb
120
120
  - logstash-input-s3.gemspec
121
121
  - spec/fixtures/cloudfront.log
122
+ - spec/fixtures/compressed.log.gee.zip
122
123
  - spec/fixtures/compressed.log.gz
123
124
  - spec/fixtures/compressed.log.gzip
124
125
  - spec/fixtures/invalid_utf8.gbk.log
@@ -159,6 +160,7 @@ specification_version: 4
159
160
  summary: Streams events from files in a S3 bucket
160
161
  test_files:
161
162
  - spec/fixtures/cloudfront.log
163
+ - spec/fixtures/compressed.log.gee.zip
162
164
  - spec/fixtures/compressed.log.gz
163
165
  - spec/fixtures/compressed.log.gzip
164
166
  - spec/fixtures/invalid_utf8.gbk.log