logstash-input-s3 3.3.7 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/docs/index.asciidoc +10 -0
- data/lib/logstash/inputs/s3.rb +20 -7
- data/logstash-input-s3.gemspec +1 -1
- data/spec/inputs/s3_spec.rb +44 -20
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e291d3321f85c506ace8f9e2d4d7e72bd7455ed4d7caf3319e021f5b9295ea7b
|
4
|
+
data.tar.gz: 2907311101de6a87b984864deecb9ac33b490f4949375f3f3d3482939b2a7ae6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 48bf1a17935b31018118eb00c84bae6e977eb4aca39f57cce09c769b054f4486874b5762a90cc778c235fcda7b731625fcbcf70b571d7534ac0c8550a78c9ffc
|
7
|
+
data.tar.gz: 4841a05b01e106b1ff5f3012ae443c091d6cea3a6036b9990729f4aaa95e05272477a6ddf7213ca246803146b34b83f5ff45dedd5382bff7b3f42a6ca59355df
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 3.4.0
|
2
|
+
- Skips objects that are archived to AWS Glacier with a helpful log message (previously they would log as matched, but then fail to load events) [#160](https://github.com/logstash-plugins/logstash-input-s3/pull/160)
|
3
|
+
- Added `watch_for_new_files` option, enabling single-batch imports [#159](https://github.com/logstash-plugins/logstash-input-s3/pull/159)
|
4
|
+
|
1
5
|
## 3.3.7
|
2
6
|
- Added ability to optionally include S3 object properties inside @metadata [#155](https://github.com/logstash-plugins/logstash-input-s3/pull/155)
|
3
7
|
|
data/docs/index.asciidoc
CHANGED
@@ -26,6 +26,8 @@ Stream events from files from a S3 bucket.
|
|
26
26
|
Each line from each file generates an event.
|
27
27
|
Files ending in `.gz` are handled as gzip'ed files.
|
28
28
|
|
29
|
+
Files that are archived to AWS Glacier will be skipped.
|
30
|
+
|
29
31
|
[id="plugins-{type}s-{plugin}-options"]
|
30
32
|
==== S3 Input Configuration Options
|
31
33
|
|
@@ -55,6 +57,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
55
57
|
| <<plugins-{type}s-{plugin}-session_token>> |<<string,string>>|No
|
56
58
|
| <<plugins-{type}s-{plugin}-sincedb_path>> |<<string,string>>|No
|
57
59
|
| <<plugins-{type}s-{plugin}-temporary_directory>> |<<string,string>>|No
|
60
|
+
| <<plugins-{type}s-{plugin}-watch_for_new_files>> | <boolean,boolean>|No
|
58
61
|
|=======================================================================
|
59
62
|
|
60
63
|
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
@@ -273,7 +276,14 @@ If specified, this setting must be a filename path and not just a directory.
|
|
273
276
|
|
274
277
|
Set the directory where logstash will store the tmp files before processing them.
|
275
278
|
|
279
|
+
[id="plugins-{type}s-{plugin}-watch_for_new_files"]
|
280
|
+
===== `watch_for_new_files`
|
281
|
+
|
282
|
+
* Value type is <<boolean,boolean>>
|
283
|
+
* Default value is `true`
|
276
284
|
|
285
|
+
Whether or not to watch for new files.
|
286
|
+
Disabling this option causes the input to close itself after processing the files from a single listing.
|
277
287
|
|
278
288
|
[id="plugins-{type}s-{plugin}-common-options"]
|
279
289
|
include::{include_path}/{type}.asciidoc[]
|
data/lib/logstash/inputs/s3.rb
CHANGED
@@ -63,6 +63,10 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
63
63
|
# Value is in seconds.
|
64
64
|
config :interval, :validate => :number, :default => 60
|
65
65
|
|
66
|
+
# Whether to watch for new files with the interval.
|
67
|
+
# If false, overrides any interval and only lists the s3 bucket once.
|
68
|
+
config :watch_for_new_files, :validate => :boolean, :default => true
|
69
|
+
|
66
70
|
# Ruby style regexp of keys to exclude from the bucket
|
67
71
|
config :exclude_pattern, :validate => :string, :default => nil
|
68
72
|
|
@@ -101,6 +105,10 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
101
105
|
end
|
102
106
|
|
103
107
|
FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
|
108
|
+
|
109
|
+
if !@watch_for_new_files && original_params.include?('interval')
|
110
|
+
logger.warn("`watch_for_new_files` has been disabled; `interval` directive will be ignored.")
|
111
|
+
end
|
104
112
|
end
|
105
113
|
|
106
114
|
public
|
@@ -108,6 +116,7 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
108
116
|
@current_thread = Thread.current
|
109
117
|
Stud.interval(@interval) do
|
110
118
|
process_files(queue)
|
119
|
+
stop unless @watch_for_new_files
|
111
120
|
end
|
112
121
|
end # def run
|
113
122
|
|
@@ -119,14 +128,18 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
119
128
|
@s3bucket.objects(:prefix => @prefix).each do |log|
|
120
129
|
found = true
|
121
130
|
@logger.debug("S3 input: Found key", :key => log.key)
|
122
|
-
if
|
123
|
-
if sincedb.newer?(log.last_modified) && log.content_length > 0
|
124
|
-
objects[log.key] = log.last_modified
|
125
|
-
@logger.debug("S3 input: Adding to objects[]", :key => log.key)
|
126
|
-
@logger.debug("objects[] length is: ", :length => objects.length)
|
127
|
-
end
|
128
|
-
else
|
131
|
+
if ignore_filename?(log.key)
|
129
132
|
@logger.debug('S3 input: Ignoring', :key => log.key)
|
133
|
+
elsif log.content_length <= 0
|
134
|
+
@logger.debug('S3 Input: Object Zero Length', :key => log.key)
|
135
|
+
elsif !sincedb.newer?(log.last_modified)
|
136
|
+
@logger.debug('S3 Input: Object Not Modified', :key => log.key)
|
137
|
+
elsif log.storage_class.start_with?('GLACIER')
|
138
|
+
@logger.debug('S3 Input: Object Archived to Glacier', :key => log.key)
|
139
|
+
else
|
140
|
+
objects[log.key] = log.last_modified
|
141
|
+
@logger.debug("S3 input: Adding to objects[]", :key => log.key)
|
142
|
+
@logger.debug("objects[] length is: ", :length => objects.length)
|
130
143
|
end
|
131
144
|
end
|
132
145
|
@logger.info('S3 input: No files found in bucket', :prefix => prefix) unless found
|
data/logstash-input-s3.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-input-s3'
|
4
|
-
s.version = '3.
|
4
|
+
s.version = '3.4.0'
|
5
5
|
s.licenses = ['Apache-2.0']
|
6
6
|
s.summary = "Streams events from files in a S3 bucket"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
data/spec/inputs/s3_spec.rb
CHANGED
@@ -114,11 +114,13 @@ describe LogStash::Inputs::S3 do
|
|
114
114
|
describe "#list_new_files" do
|
115
115
|
before { allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list } }
|
116
116
|
|
117
|
-
let!(:present_object) { double(:key => 'this-should-be-present', :last_modified => Time.now, :content_length => 10) }
|
117
|
+
let!(:present_object) { double(:key => 'this-should-be-present', :last_modified => Time.now, :content_length => 10, :storage_class => 'STANDARD') }
|
118
|
+
let!(:archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now, :content_length => 10, :storage_class => 'GLACIER') }
|
118
119
|
let(:objects_list) {
|
119
120
|
[
|
120
|
-
double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100),
|
121
|
-
double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50),
|
121
|
+
double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
|
122
|
+
double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
|
123
|
+
archived_object,
|
122
124
|
present_object
|
123
125
|
]
|
124
126
|
}
|
@@ -126,20 +128,32 @@ describe LogStash::Inputs::S3 do
|
|
126
128
|
it 'should allow user to exclude files from the s3 bucket' do
|
127
129
|
plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
|
128
130
|
plugin.register
|
129
|
-
|
131
|
+
|
132
|
+
files = plugin.list_new_files
|
133
|
+
expect(files).to include(present_object.key)
|
134
|
+
expect(files).to_not include('exclude-this-file-1') # matches exclude pattern
|
135
|
+
expect(files).to_not include('exclude/logstash') # matches exclude pattern
|
136
|
+
expect(files).to_not include(archived_object.key) # archived
|
137
|
+
expect(files.size).to eq(1)
|
130
138
|
end
|
131
139
|
|
132
140
|
it 'should support not providing a exclude pattern' do
|
133
141
|
plugin = LogStash::Inputs::S3.new(config)
|
134
142
|
plugin.register
|
135
|
-
|
143
|
+
|
144
|
+
files = plugin.list_new_files
|
145
|
+
expect(files).to include(present_object.key)
|
146
|
+
expect(files).to include('exclude-this-file-1') # no exclude pattern given
|
147
|
+
expect(files).to include('exclude/logstash') # no exclude pattern given
|
148
|
+
expect(files).to_not include(archived_object.key) # archived
|
149
|
+
expect(files.size).to eq(3)
|
136
150
|
end
|
137
151
|
|
138
152
|
context 'when all files are excluded from a bucket' do
|
139
153
|
let(:objects_list) {
|
140
154
|
[
|
141
|
-
double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100),
|
142
|
-
double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50),
|
155
|
+
double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
|
156
|
+
double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
|
143
157
|
]
|
144
158
|
}
|
145
159
|
|
@@ -168,7 +182,7 @@ describe LogStash::Inputs::S3 do
|
|
168
182
|
context "If the bucket is the same as the backup bucket" do
|
169
183
|
it 'should ignore files from the bucket if they match the backup prefix' do
|
170
184
|
objects_list = [
|
171
|
-
double(:key => 'mybackup-log-1', :last_modified => Time.now, :content_length => 5),
|
185
|
+
double(:key => 'mybackup-log-1', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
|
172
186
|
present_object
|
173
187
|
]
|
174
188
|
|
@@ -177,24 +191,34 @@ describe LogStash::Inputs::S3 do
|
|
177
191
|
plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'mybackup',
|
178
192
|
'backup_to_bucket' => config['bucket']}))
|
179
193
|
plugin.register
|
180
|
-
|
194
|
+
|
195
|
+
files = plugin.list_new_files
|
196
|
+
expect(files).to include(present_object.key)
|
197
|
+
expect(files).to_not include('mybackup-log-1') # matches backup prefix
|
198
|
+
expect(files.size).to eq(1)
|
181
199
|
end
|
182
200
|
end
|
183
201
|
|
184
202
|
it 'should ignore files older than X' do
|
185
203
|
plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'exclude-this-file'}))
|
186
204
|
|
187
|
-
|
205
|
+
|
206
|
+
allow_any_instance_of(LogStash::Inputs::S3::SinceDB::File).to receive(:read).and_return(Time.now - day)
|
188
207
|
plugin.register
|
189
208
|
|
190
|
-
|
209
|
+
files = plugin.list_new_files
|
210
|
+
expect(files).to include(present_object.key)
|
211
|
+
expect(files).to_not include('exclude-this-file-1') # too old
|
212
|
+
expect(files).to_not include('exclude/logstash') # too old
|
213
|
+
expect(files).to_not include(archived_object.key) # archived
|
214
|
+
expect(files.size).to eq(1)
|
191
215
|
end
|
192
216
|
|
193
217
|
it 'should ignore file if the file match the prefix' do
|
194
218
|
prefix = 'mysource/'
|
195
219
|
|
196
220
|
objects_list = [
|
197
|
-
double(:key => prefix, :last_modified => Time.now, :content_length => 5),
|
221
|
+
double(:key => prefix, :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
|
198
222
|
present_object
|
199
223
|
]
|
200
224
|
|
@@ -207,9 +231,9 @@ describe LogStash::Inputs::S3 do
|
|
207
231
|
|
208
232
|
it 'should sort return object sorted by last_modification date with older first' do
|
209
233
|
objects = [
|
210
|
-
double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5),
|
211
|
-
double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5),
|
212
|
-
double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5)
|
234
|
+
double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5, :storage_class => 'STANDARD'),
|
235
|
+
double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
|
236
|
+
double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD')
|
213
237
|
]
|
214
238
|
|
215
239
|
allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
|
@@ -315,7 +339,7 @@ describe LogStash::Inputs::S3 do
|
|
315
339
|
%w(AccessDenied NoSuchKey).each do |error|
|
316
340
|
context "when retrieving an object, #{error} is returned" do
|
317
341
|
let(:objects) { [log] }
|
318
|
-
let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5) }
|
342
|
+
let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
|
319
343
|
|
320
344
|
let(:config) {
|
321
345
|
{
|
@@ -344,7 +368,7 @@ describe LogStash::Inputs::S3 do
|
|
344
368
|
|
345
369
|
context 'when working with logs' do
|
346
370
|
let(:objects) { [log] }
|
347
|
-
let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :data => { "etag" => 'c2c966251da0bc3229d12c2642ba50a4' }) }
|
371
|
+
let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :data => { "etag" => 'c2c966251da0bc3229d12c2642ba50a4' }, :storage_class => 'STANDARD') }
|
348
372
|
let(:data) { File.read(log_file) }
|
349
373
|
|
350
374
|
before do
|
@@ -389,7 +413,7 @@ describe LogStash::Inputs::S3 do
|
|
389
413
|
end
|
390
414
|
|
391
415
|
context "multiple compressed streams" do
|
392
|
-
let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
|
416
|
+
let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
|
393
417
|
let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiple_compressed_streams.gz') }
|
394
418
|
|
395
419
|
include_examples "generated events" do
|
@@ -398,14 +422,14 @@ describe LogStash::Inputs::S3 do
|
|
398
422
|
end
|
399
423
|
|
400
424
|
context 'compressed' do
|
401
|
-
let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
|
425
|
+
let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
|
402
426
|
let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gz') }
|
403
427
|
|
404
428
|
include_examples "generated events"
|
405
429
|
end
|
406
430
|
|
407
431
|
context 'compressed with gzip extension' do
|
408
|
-
let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
|
432
|
+
let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
|
409
433
|
let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gzip') }
|
410
434
|
|
411
435
|
include_examples "generated events"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|