logstash-input-s3 3.3.7 → 3.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/docs/index.asciidoc +10 -0
- data/lib/logstash/inputs/s3.rb +20 -7
- data/logstash-input-s3.gemspec +1 -1
- data/spec/inputs/s3_spec.rb +44 -20
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e291d3321f85c506ace8f9e2d4d7e72bd7455ed4d7caf3319e021f5b9295ea7b
|
4
|
+
data.tar.gz: 2907311101de6a87b984864deecb9ac33b490f4949375f3f3d3482939b2a7ae6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 48bf1a17935b31018118eb00c84bae6e977eb4aca39f57cce09c769b054f4486874b5762a90cc778c235fcda7b731625fcbcf70b571d7534ac0c8550a78c9ffc
|
7
|
+
data.tar.gz: 4841a05b01e106b1ff5f3012ae443c091d6cea3a6036b9990729f4aaa95e05272477a6ddf7213ca246803146b34b83f5ff45dedd5382bff7b3f42a6ca59355df
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 3.4.0
|
2
|
+
- Skips objects that are archived to AWS Glacier with a helpful log message (previously they would log as matched, but then fail to load events) [#160](https://github.com/logstash-plugins/logstash-input-s3/pull/160)
|
3
|
+
- Added `watch_for_new_files` option, enabling single-batch imports [#159](https://github.com/logstash-plugins/logstash-input-s3/pull/159)
|
4
|
+
|
1
5
|
## 3.3.7
|
2
6
|
- Added ability to optionally include S3 object properties inside @metadata [#155](https://github.com/logstash-plugins/logstash-input-s3/pull/155)
|
3
7
|
|
data/docs/index.asciidoc
CHANGED
@@ -26,6 +26,8 @@ Stream events from files from a S3 bucket.
|
|
26
26
|
Each line from each file generates an event.
|
27
27
|
Files ending in `.gz` are handled as gzip'ed files.
|
28
28
|
|
29
|
+
Files that are archived to AWS Glacier will be skipped.
|
30
|
+
|
29
31
|
[id="plugins-{type}s-{plugin}-options"]
|
30
32
|
==== S3 Input Configuration Options
|
31
33
|
|
@@ -55,6 +57,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
55
57
|
| <<plugins-{type}s-{plugin}-session_token>> |<<string,string>>|No
|
56
58
|
| <<plugins-{type}s-{plugin}-sincedb_path>> |<<string,string>>|No
|
57
59
|
| <<plugins-{type}s-{plugin}-temporary_directory>> |<<string,string>>|No
|
60
|
+
| <<plugins-{type}s-{plugin}-watch_for_new_files>> | <boolean,boolean>|No
|
58
61
|
|=======================================================================
|
59
62
|
|
60
63
|
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
@@ -273,7 +276,14 @@ If specified, this setting must be a filename path and not just a directory.
|
|
273
276
|
|
274
277
|
Set the directory where logstash will store the tmp files before processing them.
|
275
278
|
|
279
|
+
[id="plugins-{type}s-{plugin}-watch_for_new_files"]
|
280
|
+
===== `watch_for_new_files`
|
281
|
+
|
282
|
+
* Value type is <<boolean,boolean>>
|
283
|
+
* Default value is `true`
|
276
284
|
|
285
|
+
Whether or not to watch for new files.
|
286
|
+
Disabling this option causes the input to close itself after processing the files from a single listing.
|
277
287
|
|
278
288
|
[id="plugins-{type}s-{plugin}-common-options"]
|
279
289
|
include::{include_path}/{type}.asciidoc[]
|
data/lib/logstash/inputs/s3.rb
CHANGED
@@ -63,6 +63,10 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
63
63
|
# Value is in seconds.
|
64
64
|
config :interval, :validate => :number, :default => 60
|
65
65
|
|
66
|
+
# Whether to watch for new files with the interval.
|
67
|
+
# If false, overrides any interval and only lists the s3 bucket once.
|
68
|
+
config :watch_for_new_files, :validate => :boolean, :default => true
|
69
|
+
|
66
70
|
# Ruby style regexp of keys to exclude from the bucket
|
67
71
|
config :exclude_pattern, :validate => :string, :default => nil
|
68
72
|
|
@@ -101,6 +105,10 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
101
105
|
end
|
102
106
|
|
103
107
|
FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
|
108
|
+
|
109
|
+
if !@watch_for_new_files && original_params.include?('interval')
|
110
|
+
logger.warn("`watch_for_new_files` has been disabled; `interval` directive will be ignored.")
|
111
|
+
end
|
104
112
|
end
|
105
113
|
|
106
114
|
public
|
@@ -108,6 +116,7 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
108
116
|
@current_thread = Thread.current
|
109
117
|
Stud.interval(@interval) do
|
110
118
|
process_files(queue)
|
119
|
+
stop unless @watch_for_new_files
|
111
120
|
end
|
112
121
|
end # def run
|
113
122
|
|
@@ -119,14 +128,18 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
119
128
|
@s3bucket.objects(:prefix => @prefix).each do |log|
|
120
129
|
found = true
|
121
130
|
@logger.debug("S3 input: Found key", :key => log.key)
|
122
|
-
if
|
123
|
-
if sincedb.newer?(log.last_modified) && log.content_length > 0
|
124
|
-
objects[log.key] = log.last_modified
|
125
|
-
@logger.debug("S3 input: Adding to objects[]", :key => log.key)
|
126
|
-
@logger.debug("objects[] length is: ", :length => objects.length)
|
127
|
-
end
|
128
|
-
else
|
131
|
+
if ignore_filename?(log.key)
|
129
132
|
@logger.debug('S3 input: Ignoring', :key => log.key)
|
133
|
+
elsif log.content_length <= 0
|
134
|
+
@logger.debug('S3 Input: Object Zero Length', :key => log.key)
|
135
|
+
elsif !sincedb.newer?(log.last_modified)
|
136
|
+
@logger.debug('S3 Input: Object Not Modified', :key => log.key)
|
137
|
+
elsif log.storage_class.start_with?('GLACIER')
|
138
|
+
@logger.debug('S3 Input: Object Archived to Glacier', :key => log.key)
|
139
|
+
else
|
140
|
+
objects[log.key] = log.last_modified
|
141
|
+
@logger.debug("S3 input: Adding to objects[]", :key => log.key)
|
142
|
+
@logger.debug("objects[] length is: ", :length => objects.length)
|
130
143
|
end
|
131
144
|
end
|
132
145
|
@logger.info('S3 input: No files found in bucket', :prefix => prefix) unless found
|
data/logstash-input-s3.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-input-s3'
|
4
|
-
s.version = '3.
|
4
|
+
s.version = '3.4.0'
|
5
5
|
s.licenses = ['Apache-2.0']
|
6
6
|
s.summary = "Streams events from files in a S3 bucket"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
data/spec/inputs/s3_spec.rb
CHANGED
@@ -114,11 +114,13 @@ describe LogStash::Inputs::S3 do
|
|
114
114
|
describe "#list_new_files" do
|
115
115
|
before { allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list } }
|
116
116
|
|
117
|
-
let!(:present_object) { double(:key => 'this-should-be-present', :last_modified => Time.now, :content_length => 10) }
|
117
|
+
let!(:present_object) { double(:key => 'this-should-be-present', :last_modified => Time.now, :content_length => 10, :storage_class => 'STANDARD') }
|
118
|
+
let!(:archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now, :content_length => 10, :storage_class => 'GLACIER') }
|
118
119
|
let(:objects_list) {
|
119
120
|
[
|
120
|
-
double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100),
|
121
|
-
double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50),
|
121
|
+
double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
|
122
|
+
double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
|
123
|
+
archived_object,
|
122
124
|
present_object
|
123
125
|
]
|
124
126
|
}
|
@@ -126,20 +128,32 @@ describe LogStash::Inputs::S3 do
|
|
126
128
|
it 'should allow user to exclude files from the s3 bucket' do
|
127
129
|
plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
|
128
130
|
plugin.register
|
129
|
-
|
131
|
+
|
132
|
+
files = plugin.list_new_files
|
133
|
+
expect(files).to include(present_object.key)
|
134
|
+
expect(files).to_not include('exclude-this-file-1') # matches exclude pattern
|
135
|
+
expect(files).to_not include('exclude/logstash') # matches exclude pattern
|
136
|
+
expect(files).to_not include(archived_object.key) # archived
|
137
|
+
expect(files.size).to eq(1)
|
130
138
|
end
|
131
139
|
|
132
140
|
it 'should support not providing a exclude pattern' do
|
133
141
|
plugin = LogStash::Inputs::S3.new(config)
|
134
142
|
plugin.register
|
135
|
-
|
143
|
+
|
144
|
+
files = plugin.list_new_files
|
145
|
+
expect(files).to include(present_object.key)
|
146
|
+
expect(files).to include('exclude-this-file-1') # no exclude pattern given
|
147
|
+
expect(files).to include('exclude/logstash') # no exclude pattern given
|
148
|
+
expect(files).to_not include(archived_object.key) # archived
|
149
|
+
expect(files.size).to eq(3)
|
136
150
|
end
|
137
151
|
|
138
152
|
context 'when all files are excluded from a bucket' do
|
139
153
|
let(:objects_list) {
|
140
154
|
[
|
141
|
-
double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100),
|
142
|
-
double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50),
|
155
|
+
double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
|
156
|
+
double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
|
143
157
|
]
|
144
158
|
}
|
145
159
|
|
@@ -168,7 +182,7 @@ describe LogStash::Inputs::S3 do
|
|
168
182
|
context "If the bucket is the same as the backup bucket" do
|
169
183
|
it 'should ignore files from the bucket if they match the backup prefix' do
|
170
184
|
objects_list = [
|
171
|
-
double(:key => 'mybackup-log-1', :last_modified => Time.now, :content_length => 5),
|
185
|
+
double(:key => 'mybackup-log-1', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
|
172
186
|
present_object
|
173
187
|
]
|
174
188
|
|
@@ -177,24 +191,34 @@ describe LogStash::Inputs::S3 do
|
|
177
191
|
plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'mybackup',
|
178
192
|
'backup_to_bucket' => config['bucket']}))
|
179
193
|
plugin.register
|
180
|
-
|
194
|
+
|
195
|
+
files = plugin.list_new_files
|
196
|
+
expect(files).to include(present_object.key)
|
197
|
+
expect(files).to_not include('mybackup-log-1') # matches backup prefix
|
198
|
+
expect(files.size).to eq(1)
|
181
199
|
end
|
182
200
|
end
|
183
201
|
|
184
202
|
it 'should ignore files older than X' do
|
185
203
|
plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'exclude-this-file'}))
|
186
204
|
|
187
|
-
|
205
|
+
|
206
|
+
allow_any_instance_of(LogStash::Inputs::S3::SinceDB::File).to receive(:read).and_return(Time.now - day)
|
188
207
|
plugin.register
|
189
208
|
|
190
|
-
|
209
|
+
files = plugin.list_new_files
|
210
|
+
expect(files).to include(present_object.key)
|
211
|
+
expect(files).to_not include('exclude-this-file-1') # too old
|
212
|
+
expect(files).to_not include('exclude/logstash') # too old
|
213
|
+
expect(files).to_not include(archived_object.key) # archived
|
214
|
+
expect(files.size).to eq(1)
|
191
215
|
end
|
192
216
|
|
193
217
|
it 'should ignore file if the file match the prefix' do
|
194
218
|
prefix = 'mysource/'
|
195
219
|
|
196
220
|
objects_list = [
|
197
|
-
double(:key => prefix, :last_modified => Time.now, :content_length => 5),
|
221
|
+
double(:key => prefix, :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
|
198
222
|
present_object
|
199
223
|
]
|
200
224
|
|
@@ -207,9 +231,9 @@ describe LogStash::Inputs::S3 do
|
|
207
231
|
|
208
232
|
it 'should sort return object sorted by last_modification date with older first' do
|
209
233
|
objects = [
|
210
|
-
double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5),
|
211
|
-
double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5),
|
212
|
-
double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5)
|
234
|
+
double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5, :storage_class => 'STANDARD'),
|
235
|
+
double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
|
236
|
+
double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD')
|
213
237
|
]
|
214
238
|
|
215
239
|
allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
|
@@ -315,7 +339,7 @@ describe LogStash::Inputs::S3 do
|
|
315
339
|
%w(AccessDenied NoSuchKey).each do |error|
|
316
340
|
context "when retrieving an object, #{error} is returned" do
|
317
341
|
let(:objects) { [log] }
|
318
|
-
let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5) }
|
342
|
+
let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
|
319
343
|
|
320
344
|
let(:config) {
|
321
345
|
{
|
@@ -344,7 +368,7 @@ describe LogStash::Inputs::S3 do
|
|
344
368
|
|
345
369
|
context 'when working with logs' do
|
346
370
|
let(:objects) { [log] }
|
347
|
-
let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :data => { "etag" => 'c2c966251da0bc3229d12c2642ba50a4' }) }
|
371
|
+
let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :data => { "etag" => 'c2c966251da0bc3229d12c2642ba50a4' }, :storage_class => 'STANDARD') }
|
348
372
|
let(:data) { File.read(log_file) }
|
349
373
|
|
350
374
|
before do
|
@@ -389,7 +413,7 @@ describe LogStash::Inputs::S3 do
|
|
389
413
|
end
|
390
414
|
|
391
415
|
context "multiple compressed streams" do
|
392
|
-
let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
|
416
|
+
let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
|
393
417
|
let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiple_compressed_streams.gz') }
|
394
418
|
|
395
419
|
include_examples "generated events" do
|
@@ -398,14 +422,14 @@ describe LogStash::Inputs::S3 do
|
|
398
422
|
end
|
399
423
|
|
400
424
|
context 'compressed' do
|
401
|
-
let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
|
425
|
+
let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
|
402
426
|
let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gz') }
|
403
427
|
|
404
428
|
include_examples "generated events"
|
405
429
|
end
|
406
430
|
|
407
431
|
context 'compressed with gzip extension' do
|
408
|
-
let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
|
432
|
+
let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
|
409
433
|
let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gzip') }
|
410
434
|
|
411
435
|
include_examples "generated events"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|