logstash-input-s3 3.3.7 → 3.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b6b2e69a2cc95f3fc7bdcc0a5b828e08a3795d88f5468795fafc4a518b7e4128
4
- data.tar.gz: 5a1d0103482e624fe8131eed9c28e636c71bb38b70980e463e1548bcb7efaff1
3
+ metadata.gz: e291d3321f85c506ace8f9e2d4d7e72bd7455ed4d7caf3319e021f5b9295ea7b
4
+ data.tar.gz: 2907311101de6a87b984864deecb9ac33b490f4949375f3f3d3482939b2a7ae6
5
5
  SHA512:
6
- metadata.gz: 7cb50c30b2acdd5f8da2602346b0fc9b3ef82f4dd5d1649e7cff8e7d746c28ef0e997cb8db8c1821639e3e8b8001ef19e2c8a0557d5b8a6f37ea9b5a27e451f8
7
- data.tar.gz: 41be7059efac9cd05b2379a5035cc11104bd7c9716939ac9a2c63aa94ee7bbf79832c7219492ce5576987d36bae038c530af64c11e021c89dee47aa53ea4bf51
6
+ metadata.gz: 48bf1a17935b31018118eb00c84bae6e977eb4aca39f57cce09c769b054f4486874b5762a90cc778c235fcda7b731625fcbcf70b571d7534ac0c8550a78c9ffc
7
+ data.tar.gz: 4841a05b01e106b1ff5f3012ae443c091d6cea3a6036b9990729f4aaa95e05272477a6ddf7213ca246803146b34b83f5ff45dedd5382bff7b3f42a6ca59355df
@@ -1,3 +1,7 @@
1
+ ## 3.4.0
2
+ - Skips objects that are archived to AWS Glacier with a helpful log message (previously they would log as matched, but then fail to load events) [#160](https://github.com/logstash-plugins/logstash-input-s3/pull/160)
3
+ - Added `watch_for_new_files` option, enabling single-batch imports [#159](https://github.com/logstash-plugins/logstash-input-s3/pull/159)
4
+
1
5
  ## 3.3.7
2
6
  - Added ability to optionally include S3 object properties inside @metadata [#155](https://github.com/logstash-plugins/logstash-input-s3/pull/155)
3
7
 
@@ -26,6 +26,8 @@ Stream events from files from a S3 bucket.
26
26
  Each line from each file generates an event.
27
27
  Files ending in `.gz` are handled as gzip'ed files.
28
28
 
29
+ Files that are archived to AWS Glacier will be skipped.
30
+
29
31
  [id="plugins-{type}s-{plugin}-options"]
30
32
  ==== S3 Input Configuration Options
31
33
 
@@ -55,6 +57,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
55
57
  | <<plugins-{type}s-{plugin}-session_token>> |<<string,string>>|No
56
58
  | <<plugins-{type}s-{plugin}-sincedb_path>> |<<string,string>>|No
57
59
  | <<plugins-{type}s-{plugin}-temporary_directory>> |<<string,string>>|No
60
+ | <<plugins-{type}s-{plugin}-watch_for_new_files>> | <boolean,boolean>|No
58
61
  |=======================================================================
59
62
 
60
63
  Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
@@ -273,7 +276,14 @@ If specified, this setting must be a filename path and not just a directory.
273
276
 
274
277
  Set the directory where logstash will store the tmp files before processing them.
275
278
 
279
+ [id="plugins-{type}s-{plugin}-watch_for_new_files"]
280
+ ===== `watch_for_new_files`
281
+
282
+ * Value type is <<boolean,boolean>>
283
+ * Default value is `true`
276
284
 
285
+ Whether or not to watch for new files.
286
+ Disabling this option causes the input to close itself after processing the files from a single listing.
277
287
 
278
288
  [id="plugins-{type}s-{plugin}-common-options"]
279
289
  include::{include_path}/{type}.asciidoc[]
@@ -63,6 +63,10 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
63
63
  # Value is in seconds.
64
64
  config :interval, :validate => :number, :default => 60
65
65
 
66
+ # Whether to watch for new files with the interval.
67
+ # If false, overrides any interval and only lists the s3 bucket once.
68
+ config :watch_for_new_files, :validate => :boolean, :default => true
69
+
66
70
  # Ruby style regexp of keys to exclude from the bucket
67
71
  config :exclude_pattern, :validate => :string, :default => nil
68
72
 
@@ -101,6 +105,10 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
101
105
  end
102
106
 
103
107
  FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
108
+
109
+ if !@watch_for_new_files && original_params.include?('interval')
110
+ logger.warn("`watch_for_new_files` has been disabled; `interval` directive will be ignored.")
111
+ end
104
112
  end
105
113
 
106
114
  public
@@ -108,6 +116,7 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
108
116
  @current_thread = Thread.current
109
117
  Stud.interval(@interval) do
110
118
  process_files(queue)
119
+ stop unless @watch_for_new_files
111
120
  end
112
121
  end # def run
113
122
 
@@ -119,14 +128,18 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
119
128
  @s3bucket.objects(:prefix => @prefix).each do |log|
120
129
  found = true
121
130
  @logger.debug("S3 input: Found key", :key => log.key)
122
- if !ignore_filename?(log.key)
123
- if sincedb.newer?(log.last_modified) && log.content_length > 0
124
- objects[log.key] = log.last_modified
125
- @logger.debug("S3 input: Adding to objects[]", :key => log.key)
126
- @logger.debug("objects[] length is: ", :length => objects.length)
127
- end
128
- else
131
+ if ignore_filename?(log.key)
129
132
  @logger.debug('S3 input: Ignoring', :key => log.key)
133
+ elsif log.content_length <= 0
134
+ @logger.debug('S3 Input: Object Zero Length', :key => log.key)
135
+ elsif !sincedb.newer?(log.last_modified)
136
+ @logger.debug('S3 Input: Object Not Modified', :key => log.key)
137
+ elsif log.storage_class.start_with?('GLACIER')
138
+ @logger.debug('S3 Input: Object Archived to Glacier', :key => log.key)
139
+ else
140
+ objects[log.key] = log.last_modified
141
+ @logger.debug("S3 input: Adding to objects[]", :key => log.key)
142
+ @logger.debug("objects[] length is: ", :length => objects.length)
130
143
  end
131
144
  end
132
145
  @logger.info('S3 input: No files found in bucket', :prefix => prefix) unless found
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-input-s3'
4
- s.version = '3.3.7'
4
+ s.version = '3.4.0'
5
5
  s.licenses = ['Apache-2.0']
6
6
  s.summary = "Streams events from files in a S3 bucket"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -114,11 +114,13 @@ describe LogStash::Inputs::S3 do
114
114
  describe "#list_new_files" do
115
115
  before { allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list } }
116
116
 
117
- let!(:present_object) { double(:key => 'this-should-be-present', :last_modified => Time.now, :content_length => 10) }
117
+ let!(:present_object) { double(:key => 'this-should-be-present', :last_modified => Time.now, :content_length => 10, :storage_class => 'STANDARD') }
118
+ let!(:archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now, :content_length => 10, :storage_class => 'GLACIER') }
118
119
  let(:objects_list) {
119
120
  [
120
- double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100),
121
- double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50),
121
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
122
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
123
+ archived_object,
122
124
  present_object
123
125
  ]
124
126
  }
@@ -126,20 +128,32 @@ describe LogStash::Inputs::S3 do
126
128
  it 'should allow user to exclude files from the s3 bucket' do
127
129
  plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
128
130
  plugin.register
129
- expect(plugin.list_new_files).to eq([present_object.key])
131
+
132
+ files = plugin.list_new_files
133
+ expect(files).to include(present_object.key)
134
+ expect(files).to_not include('exclude-this-file-1') # matches exclude pattern
135
+ expect(files).to_not include('exclude/logstash') # matches exclude pattern
136
+ expect(files).to_not include(archived_object.key) # archived
137
+ expect(files.size).to eq(1)
130
138
  end
131
139
 
132
140
  it 'should support not providing a exclude pattern' do
133
141
  plugin = LogStash::Inputs::S3.new(config)
134
142
  plugin.register
135
- expect(plugin.list_new_files).to eq(objects_list.map(&:key))
143
+
144
+ files = plugin.list_new_files
145
+ expect(files).to include(present_object.key)
146
+ expect(files).to include('exclude-this-file-1') # no exclude pattern given
147
+ expect(files).to include('exclude/logstash') # no exclude pattern given
148
+ expect(files).to_not include(archived_object.key) # archived
149
+ expect(files.size).to eq(3)
136
150
  end
137
151
 
138
152
  context 'when all files are excluded from a bucket' do
139
153
  let(:objects_list) {
140
154
  [
141
- double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100),
142
- double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50),
155
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
156
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
143
157
  ]
144
158
  }
145
159
 
@@ -168,7 +182,7 @@ describe LogStash::Inputs::S3 do
168
182
  context "If the bucket is the same as the backup bucket" do
169
183
  it 'should ignore files from the bucket if they match the backup prefix' do
170
184
  objects_list = [
171
- double(:key => 'mybackup-log-1', :last_modified => Time.now, :content_length => 5),
185
+ double(:key => 'mybackup-log-1', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
172
186
  present_object
173
187
  ]
174
188
 
@@ -177,24 +191,34 @@ describe LogStash::Inputs::S3 do
177
191
  plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'mybackup',
178
192
  'backup_to_bucket' => config['bucket']}))
179
193
  plugin.register
180
- expect(plugin.list_new_files).to eq([present_object.key])
194
+
195
+ files = plugin.list_new_files
196
+ expect(files).to include(present_object.key)
197
+ expect(files).to_not include('mybackup-log-1') # matches backup prefix
198
+ expect(files.size).to eq(1)
181
199
  end
182
200
  end
183
201
 
184
202
  it 'should ignore files older than X' do
185
203
  plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'exclude-this-file'}))
186
204
 
187
- expect_any_instance_of(LogStash::Inputs::S3::SinceDB::File).to receive(:read).exactly(objects_list.size) { Time.now - day }
205
+
206
+ allow_any_instance_of(LogStash::Inputs::S3::SinceDB::File).to receive(:read).and_return(Time.now - day)
188
207
  plugin.register
189
208
 
190
- expect(plugin.list_new_files).to eq([present_object.key])
209
+ files = plugin.list_new_files
210
+ expect(files).to include(present_object.key)
211
+ expect(files).to_not include('exclude-this-file-1') # too old
212
+ expect(files).to_not include('exclude/logstash') # too old
213
+ expect(files).to_not include(archived_object.key) # archived
214
+ expect(files.size).to eq(1)
191
215
  end
192
216
 
193
217
  it 'should ignore file if the file match the prefix' do
194
218
  prefix = 'mysource/'
195
219
 
196
220
  objects_list = [
197
- double(:key => prefix, :last_modified => Time.now, :content_length => 5),
221
+ double(:key => prefix, :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
198
222
  present_object
199
223
  ]
200
224
 
@@ -207,9 +231,9 @@ describe LogStash::Inputs::S3 do
207
231
 
208
232
  it 'should sort return object sorted by last_modification date with older first' do
209
233
  objects = [
210
- double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5),
211
- double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5),
212
- double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5)
234
+ double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5, :storage_class => 'STANDARD'),
235
+ double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
236
+ double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD')
213
237
  ]
214
238
 
215
239
  allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
@@ -315,7 +339,7 @@ describe LogStash::Inputs::S3 do
315
339
  %w(AccessDenied NoSuchKey).each do |error|
316
340
  context "when retrieving an object, #{error} is returned" do
317
341
  let(:objects) { [log] }
318
- let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5) }
342
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
319
343
 
320
344
  let(:config) {
321
345
  {
@@ -344,7 +368,7 @@ describe LogStash::Inputs::S3 do
344
368
 
345
369
  context 'when working with logs' do
346
370
  let(:objects) { [log] }
347
- let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :data => { "etag" => 'c2c966251da0bc3229d12c2642ba50a4' }) }
371
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :data => { "etag" => 'c2c966251da0bc3229d12c2642ba50a4' }, :storage_class => 'STANDARD') }
348
372
  let(:data) { File.read(log_file) }
349
373
 
350
374
  before do
@@ -389,7 +413,7 @@ describe LogStash::Inputs::S3 do
389
413
  end
390
414
 
391
415
  context "multiple compressed streams" do
392
- let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
416
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
393
417
  let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiple_compressed_streams.gz') }
394
418
 
395
419
  include_examples "generated events" do
@@ -398,14 +422,14 @@ describe LogStash::Inputs::S3 do
398
422
  end
399
423
 
400
424
  context 'compressed' do
401
- let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
425
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
402
426
  let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gz') }
403
427
 
404
428
  include_examples "generated events"
405
429
  end
406
430
 
407
431
  context 'compressed with gzip extension' do
408
- let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
432
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
409
433
  let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gzip') }
410
434
 
411
435
  include_examples "generated events"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.7
4
+ version: 3.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-20 00:00:00.000000000 Z
11
+ date: 2018-08-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement