logstash-input-s3 3.5.0 → 3.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -1
- data/docs/index.asciidoc +40 -23
- data/lib/logstash/inputs/s3.rb +22 -15
- data/logstash-input-s3.gemspec +1 -1
- data/spec/inputs/s3_spec.rb +86 -17
- data/spec/integration/s3_spec.rb +9 -3
- data/spec/support/helpers.rb +10 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a4e1cc2ba334eb9e35fc68cc6a773e13b9b7de5bc6c3b4ee40cf98903d140323
|
4
|
+
data.tar.gz: a43fe645c1016095639e092fa4d3e8e8b77384dd17190a712a0eaa5163e68854
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e10a6e21aa62270ce5f78c269fb3abf0a28b0755f78b0cb7adcc76183be460472327bdabbc94fab51c35774ae930d25846084ad3b9b0e95bd9e021c995c08bbd
|
7
|
+
data.tar.gz: 23b6882049688b1cd57ccba4b8aa247103bf5b9f91011f1aa426e81280c3b4ca701bf88fd96e278968c7e38fd58a01980a830b4ce2bb9ad8e034cfe07113de79
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 3.6.0
|
2
|
+
- Fixed unprocessed file with the same `last_modified` in ingestion. [#220](https://github.com/logstash-plugins/logstash-input-s3/pull/220)
|
3
|
+
|
4
|
+
## 3.5.2
|
5
|
+
- [DOC]Added note that only AWS S3 is supported. No other S3 compatible storage solutions are supported. [#208](https://github.com/logstash-plugins/logstash-input-s3/issues/208)
|
6
|
+
|
7
|
+
## 3.5.1
|
8
|
+
- [DOC]Added example for `exclude_pattern` and reordered option descriptions [#204](https://github.com/logstash-plugins/logstash-input-s3/issues/204)
|
9
|
+
|
1
10
|
## 3.5.0
|
2
11
|
- Added support for including objects restored from Glacier or Glacier Deep [#199](https://github.com/logstash-plugins/logstash-input-s3/issues/199)
|
3
12
|
- Added `gzip_pattern` option, enabling more flexible determination of whether a file is gzipped [#165](https://github.com/logstash-plugins/logstash-input-s3/issues/165)
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Logstash Plugin
|
2
2
|
|
3
|
-
[![Travis Build Status](https://travis-ci.
|
3
|
+
[![Travis Build Status](https://travis-ci.com/logstash-plugins/logstash-input-s3.svg)](https://travis-ci.com/logstash-plugins/logstash-input-s3)
|
4
4
|
|
5
5
|
This is a plugin for [Logstash](https://github.com/elastic/logstash).
|
6
6
|
|
data/docs/index.asciidoc
CHANGED
@@ -23,6 +23,9 @@ include::{include_path}/plugin_header.asciidoc[]
|
|
23
23
|
|
24
24
|
Stream events from files from a S3 bucket.
|
25
25
|
|
26
|
+
IMPORTANT: The S3 input plugin only supports AWS S3.
|
27
|
+
Other S3 compatible storage solutions are not supported.
|
28
|
+
|
26
29
|
Each line from each file generates an event.
|
27
30
|
Files ending in `.gz` are handled as gzip'ed files.
|
28
31
|
|
@@ -80,6 +83,29 @@ This plugin uses the AWS SDK and supports several ways to get credentials, which
|
|
80
83
|
4. Environment variables `AMAZON_ACCESS_KEY_ID` and `AMAZON_SECRET_ACCESS_KEY`
|
81
84
|
5. IAM Instance Profile (available when running inside EC2)
|
82
85
|
|
86
|
+
|
87
|
+
[id="plugins-{type}s-{plugin}-additional_settings"]
|
88
|
+
===== `additional_settings`
|
89
|
+
|
90
|
+
* Value type is <<hash,hash>>
|
91
|
+
* Default value is `{}`
|
92
|
+
|
93
|
+
Key-value pairs of settings and corresponding values used to parametrize
|
94
|
+
the connection to s3. See full list in https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html[the AWS SDK documentation]. Example:
|
95
|
+
|
96
|
+
[source,ruby]
|
97
|
+
input {
|
98
|
+
s3 {
|
99
|
+
"access_key_id" => "1234"
|
100
|
+
"secret_access_key" => "secret"
|
101
|
+
"bucket" => "logstash-test"
|
102
|
+
"additional_settings" => {
|
103
|
+
"force_path_style" => true
|
104
|
+
"follow_redirects" => false
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
83
109
|
[id="plugins-{type}s-{plugin}-aws_credentials_file"]
|
84
110
|
===== `aws_credentials_file`
|
85
111
|
|
@@ -157,7 +183,20 @@ guaranteed to work correctly with the AWS SDK.
|
|
157
183
|
* Value type is <<string,string>>
|
158
184
|
* Default value is `nil`
|
159
185
|
|
160
|
-
Ruby style regexp of keys to exclude from the bucket
|
186
|
+
Ruby style regexp of keys to exclude from the bucket.
|
187
|
+
|
188
|
+
Note that files matching the pattern are skipped _after_ they have been listed.
|
189
|
+
Consider using <<plugins-{type}s-{plugin}-prefix>> instead where possible.
|
190
|
+
|
191
|
+
Example:
|
192
|
+
|
193
|
+
[source,ruby]
|
194
|
+
-----
|
195
|
+
"exclude_pattern" => "\/2020\/04\/"
|
196
|
+
-----
|
197
|
+
|
198
|
+
This pattern excludes all logs containing "/2020/04/" in the path.
|
199
|
+
|
161
200
|
|
162
201
|
[id="plugins-{type}s-{plugin}-gzip_pattern"]
|
163
202
|
===== `gzip_pattern`
|
@@ -167,28 +206,6 @@ Ruby style regexp of keys to exclude from the bucket
|
|
167
206
|
|
168
207
|
Regular expression used to determine whether an input file is in gzip format.
|
169
208
|
|
170
|
-
[id="plugins-{type}s-{plugin}-additional_settings"]
|
171
|
-
===== `additional_settings`
|
172
|
-
|
173
|
-
* Value type is <<hash,hash>>
|
174
|
-
* Default value is `{}`
|
175
|
-
|
176
|
-
Key-value pairs of settings and corresponding values used to parametrize
|
177
|
-
the connection to s3. See full list in https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html[the AWS SDK documentation]. Example:
|
178
|
-
|
179
|
-
[source,ruby]
|
180
|
-
input {
|
181
|
-
s3 {
|
182
|
-
"access_key_id" => "1234"
|
183
|
-
"secret_access_key" => "secret"
|
184
|
-
"bucket" => "logstash-test"
|
185
|
-
"additional_settings" => {
|
186
|
-
"force_path_style" => true
|
187
|
-
"follow_redirects" => false
|
188
|
-
}
|
189
|
-
}
|
190
|
-
}
|
191
|
-
|
192
209
|
[id="plugins-{type}s-{plugin}-include_object_properties"]
|
193
210
|
===== `include_object_properties`
|
194
211
|
|
data/lib/logstash/inputs/s3.rb
CHANGED
@@ -86,6 +86,8 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
86
86
|
# default to an expression that matches *.gz and *.gzip file extensions
|
87
87
|
config :gzip_pattern, :validate => :string, :default => "\.gz(ip)?$"
|
88
88
|
|
89
|
+
CUTOFF_SECOND = 3
|
90
|
+
|
89
91
|
def register
|
90
92
|
require "fileutils"
|
91
93
|
require "digest/md5"
|
@@ -126,7 +128,7 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
126
128
|
end # def run
|
127
129
|
|
128
130
|
def list_new_files
|
129
|
-
objects =
|
131
|
+
objects = []
|
130
132
|
found = false
|
131
133
|
begin
|
132
134
|
@s3bucket.objects(:prefix => @prefix).each do |log|
|
@@ -138,10 +140,12 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
138
140
|
@logger.debug('Object Zero Length', :key => log.key)
|
139
141
|
elsif !sincedb.newer?(log.last_modified)
|
140
142
|
@logger.debug('Object Not Modified', :key => log.key)
|
143
|
+
elsif log.last_modified > (Time.now - CUTOFF_SECOND).utc # file modified within last two seconds will be processed in next cycle
|
144
|
+
@logger.debug('Object Modified After Cutoff Time', :key => log.key)
|
141
145
|
elsif (log.storage_class == 'GLACIER' || log.storage_class == 'DEEP_ARCHIVE') && !file_restored?(log.object)
|
142
146
|
@logger.debug('Object Archived to Glacier', :key => log.key)
|
143
147
|
else
|
144
|
-
objects
|
148
|
+
objects << log
|
145
149
|
@logger.debug("Added to objects[]", :key => log.key, :length => objects.length)
|
146
150
|
end
|
147
151
|
end
|
@@ -149,7 +153,7 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
149
153
|
rescue Aws::Errors::ServiceError => e
|
150
154
|
@logger.error("Unable to list objects in bucket", :exception => e.class, :message => e.message, :backtrace => e.backtrace, :prefix => prefix)
|
151
155
|
end
|
152
|
-
objects.
|
156
|
+
objects.sort_by { |log| log.last_modified }
|
153
157
|
end # def fetch_new_files
|
154
158
|
|
155
159
|
def backup_to_bucket(object)
|
@@ -171,11 +175,11 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
171
175
|
def process_files(queue)
|
172
176
|
objects = list_new_files
|
173
177
|
|
174
|
-
objects.each do |
|
178
|
+
objects.each do |log|
|
175
179
|
if stop?
|
176
180
|
break
|
177
181
|
else
|
178
|
-
process_log(queue,
|
182
|
+
process_log(queue, log)
|
179
183
|
end
|
180
184
|
end
|
181
185
|
end # def process_files
|
@@ -367,19 +371,22 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
|
367
371
|
end
|
368
372
|
end
|
369
373
|
|
370
|
-
def process_log(queue,
|
371
|
-
@logger.debug("Processing", :bucket => @bucket, :key => key)
|
372
|
-
object = @s3bucket.object(key)
|
374
|
+
def process_log(queue, log)
|
375
|
+
@logger.debug("Processing", :bucket => @bucket, :key => log.key)
|
376
|
+
object = @s3bucket.object(log.key)
|
373
377
|
|
374
|
-
filename = File.join(temporary_directory, File.basename(key))
|
378
|
+
filename = File.join(temporary_directory, File.basename(log.key))
|
375
379
|
if download_remote_file(object, filename)
|
376
380
|
if process_local_log(queue, filename, object)
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
381
|
+
if object.last_modified == log.last_modified
|
382
|
+
backup_to_bucket(object)
|
383
|
+
backup_to_dir(filename)
|
384
|
+
delete_file_from_bucket(object)
|
385
|
+
FileUtils.remove_entry_secure(filename, true)
|
386
|
+
sincedb.write(log.last_modified)
|
387
|
+
else
|
388
|
+
@logger.info("#{log.key} is updated at #{object.last_modified} and will process in the next cycle")
|
389
|
+
end
|
383
390
|
end
|
384
391
|
else
|
385
392
|
FileUtils.remove_entry_secure(filename, true)
|
data/logstash-input-s3.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-input-s3'
|
4
|
-
s.version = '3.
|
4
|
+
s.version = '3.6.0'
|
5
5
|
s.licenses = ['Apache-2.0']
|
6
6
|
s.summary = "Streams events from files in a S3 bucket"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
data/spec/inputs/s3_spec.rb
CHANGED
@@ -24,6 +24,7 @@ describe LogStash::Inputs::S3 do
|
|
24
24
|
"sincedb_path" => File.join(sincedb_path, ".sincedb")
|
25
25
|
}
|
26
26
|
}
|
27
|
+
let(:cutoff) { LogStash::Inputs::S3::CUTOFF_SECOND }
|
27
28
|
|
28
29
|
|
29
30
|
before do
|
@@ -33,10 +34,11 @@ describe LogStash::Inputs::S3 do
|
|
33
34
|
end
|
34
35
|
|
35
36
|
context "when interrupting the plugin" do
|
36
|
-
let(:config) { super.merge({ "interval" => 5 }) }
|
37
|
+
let(:config) { super().merge({ "interval" => 5 }) }
|
38
|
+
let(:s3_obj) { double(:key => "awesome-key", :last_modified => Time.now.round, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
|
37
39
|
|
38
40
|
before do
|
39
|
-
expect_any_instance_of(LogStash::Inputs::S3).to receive(:list_new_files).and_return(TestInfiniteS3Object.new)
|
41
|
+
expect_any_instance_of(LogStash::Inputs::S3).to receive(:list_new_files).and_return(TestInfiniteS3Object.new(s3_obj))
|
40
42
|
end
|
41
43
|
|
42
44
|
it_behaves_like "an interruptible input plugin"
|
@@ -115,11 +117,12 @@ describe LogStash::Inputs::S3 do
|
|
115
117
|
describe "#list_new_files" do
|
116
118
|
before { allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list } }
|
117
119
|
|
118
|
-
let!(:
|
119
|
-
let!(:
|
120
|
-
let!(:
|
121
|
-
let!(:
|
122
|
-
let!(:
|
120
|
+
let!(:present_object_after_cutoff) {double(:key => 'this-should-not-be-present', :last_modified => Time.now, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
|
121
|
+
let!(:present_object) {double(:key => 'this-should-be-present', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
|
122
|
+
let!(:archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => nil)) ) }
|
123
|
+
let!(:deep_archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => nil)) ) }
|
124
|
+
let!(:restored_object) {double(:key => 'this-should-be-restored-from-archive', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => 'ongoing-request="false", expiry-date="Thu, 01 Jan 2099 00:00:00 GMT"')) ) }
|
125
|
+
let!(:deep_restored_object) {double(:key => 'this-should-be-restored-from-deep-archive', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'DEEP_ARCHIVE', :object => double(:data => double(:restore => 'ongoing-request="false", expiry-date="Thu, 01 Jan 2099 00:00:00 GMT"')) ) }
|
123
126
|
let(:objects_list) {
|
124
127
|
[
|
125
128
|
double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
|
@@ -127,7 +130,8 @@ describe LogStash::Inputs::S3 do
|
|
127
130
|
archived_object,
|
128
131
|
restored_object,
|
129
132
|
deep_restored_object,
|
130
|
-
present_object
|
133
|
+
present_object,
|
134
|
+
present_object_after_cutoff
|
131
135
|
]
|
132
136
|
}
|
133
137
|
|
@@ -135,7 +139,7 @@ describe LogStash::Inputs::S3 do
|
|
135
139
|
plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
|
136
140
|
plugin.register
|
137
141
|
|
138
|
-
files = plugin.list_new_files
|
142
|
+
files = plugin.list_new_files.map { |item| item.key }
|
139
143
|
expect(files).to include(present_object.key)
|
140
144
|
expect(files).to include(restored_object.key)
|
141
145
|
expect(files).to include(deep_restored_object.key)
|
@@ -143,6 +147,7 @@ describe LogStash::Inputs::S3 do
|
|
143
147
|
expect(files).to_not include('exclude/logstash') # matches exclude pattern
|
144
148
|
expect(files).to_not include(archived_object.key) # archived
|
145
149
|
expect(files).to_not include(deep_archived_object.key) # archived
|
150
|
+
expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
|
146
151
|
expect(files.size).to eq(3)
|
147
152
|
end
|
148
153
|
|
@@ -150,7 +155,7 @@ describe LogStash::Inputs::S3 do
|
|
150
155
|
plugin = LogStash::Inputs::S3.new(config)
|
151
156
|
plugin.register
|
152
157
|
|
153
|
-
files = plugin.list_new_files
|
158
|
+
files = plugin.list_new_files.map { |item| item.key }
|
154
159
|
expect(files).to include(present_object.key)
|
155
160
|
expect(files).to include(restored_object.key)
|
156
161
|
expect(files).to include(deep_restored_object.key)
|
@@ -158,6 +163,7 @@ describe LogStash::Inputs::S3 do
|
|
158
163
|
expect(files).to include('exclude/logstash') # no exclude pattern given
|
159
164
|
expect(files).to_not include(archived_object.key) # archived
|
160
165
|
expect(files).to_not include(deep_archived_object.key) # archived
|
166
|
+
expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
|
161
167
|
expect(files.size).to eq(5)
|
162
168
|
end
|
163
169
|
|
@@ -204,7 +210,7 @@ describe LogStash::Inputs::S3 do
|
|
204
210
|
'backup_to_bucket' => config['bucket']}))
|
205
211
|
plugin.register
|
206
212
|
|
207
|
-
files = plugin.list_new_files
|
213
|
+
files = plugin.list_new_files.map { |item| item.key }
|
208
214
|
expect(files).to include(present_object.key)
|
209
215
|
expect(files).to_not include('mybackup-log-1') # matches backup prefix
|
210
216
|
expect(files.size).to eq(1)
|
@@ -218,7 +224,7 @@ describe LogStash::Inputs::S3 do
|
|
218
224
|
allow_any_instance_of(LogStash::Inputs::S3::SinceDB::File).to receive(:read).and_return(Time.now - day)
|
219
225
|
plugin.register
|
220
226
|
|
221
|
-
files = plugin.list_new_files
|
227
|
+
files = plugin.list_new_files.map { |item| item.key }
|
222
228
|
expect(files).to include(present_object.key)
|
223
229
|
expect(files).to include(restored_object.key)
|
224
230
|
expect(files).to include(deep_restored_object.key)
|
@@ -226,6 +232,7 @@ describe LogStash::Inputs::S3 do
|
|
226
232
|
expect(files).to_not include('exclude/logstash') # too old
|
227
233
|
expect(files).to_not include(archived_object.key) # archived
|
228
234
|
expect(files).to_not include(deep_archived_object.key) # archived
|
235
|
+
expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
|
229
236
|
expect(files.size).to eq(3)
|
230
237
|
end
|
231
238
|
|
@@ -241,13 +248,14 @@ describe LogStash::Inputs::S3 do
|
|
241
248
|
|
242
249
|
plugin = LogStash::Inputs::S3.new(config.merge({ 'prefix' => prefix }))
|
243
250
|
plugin.register
|
244
|
-
expect(plugin.list_new_files).to eq([present_object.key])
|
251
|
+
expect(plugin.list_new_files.map { |item| item.key }).to eq([present_object.key])
|
245
252
|
end
|
246
253
|
|
247
254
|
it 'should sort return object sorted by last_modification date with older first' do
|
248
255
|
objects = [
|
249
256
|
double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5, :storage_class => 'STANDARD'),
|
250
257
|
double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
|
258
|
+
double(:key => 'TODAY_BEFORE_CUTOFF', :last_modified => Time.now - cutoff, :content_length => 5, :storage_class => 'STANDARD'),
|
251
259
|
double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD')
|
252
260
|
]
|
253
261
|
|
@@ -256,7 +264,7 @@ describe LogStash::Inputs::S3 do
|
|
256
264
|
|
257
265
|
plugin = LogStash::Inputs::S3.new(config)
|
258
266
|
plugin.register
|
259
|
-
expect(plugin.list_new_files).to eq(['TWO_DAYS_AGO', 'YESTERDAY', '
|
267
|
+
expect(plugin.list_new_files.map { |item| item.key }).to eq(['TWO_DAYS_AGO', 'YESTERDAY', 'TODAY_BEFORE_CUTOFF'])
|
260
268
|
end
|
261
269
|
|
262
270
|
describe "when doing backup on the s3" do
|
@@ -451,7 +459,7 @@ describe LogStash::Inputs::S3 do
|
|
451
459
|
end
|
452
460
|
|
453
461
|
context 'compressed with gzip extension and using custom gzip_pattern option' do
|
454
|
-
let(:config) { super.merge({ "gzip_pattern" => "gee.zip$" }) }
|
462
|
+
let(:config) { super().merge({ "gzip_pattern" => "gee.zip$" }) }
|
455
463
|
let(:log) { double(:key => 'log.gee.zip', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
|
456
464
|
let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gee.zip') }
|
457
465
|
include_examples "generated events"
|
@@ -499,7 +507,7 @@ describe LogStash::Inputs::S3 do
|
|
499
507
|
end
|
500
508
|
|
501
509
|
context 'when include_object_properties is set to true' do
|
502
|
-
let(:config) { super.merge({ "include_object_properties" => true }) }
|
510
|
+
let(:config) { super().merge({ "include_object_properties" => true }) }
|
503
511
|
let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
|
504
512
|
|
505
513
|
it 'should extract object properties onto [@metadata][s3]' do
|
@@ -513,7 +521,7 @@ describe LogStash::Inputs::S3 do
|
|
513
521
|
end
|
514
522
|
|
515
523
|
context 'when include_object_properties is set to false' do
|
516
|
-
let(:config) { super.merge({ "include_object_properties" => false }) }
|
524
|
+
let(:config) { super().merge({ "include_object_properties" => false }) }
|
517
525
|
let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
|
518
526
|
|
519
527
|
it 'should NOT extract object properties onto [@metadata][s3]' do
|
@@ -525,6 +533,67 @@ describe LogStash::Inputs::S3 do
|
|
525
533
|
|
526
534
|
include_examples "generated events"
|
527
535
|
end
|
536
|
+
end
|
537
|
+
|
538
|
+
describe "data loss" do
|
539
|
+
let(:s3_plugin) { LogStash::Inputs::S3.new(config) }
|
540
|
+
let(:queue) { [] }
|
541
|
+
|
542
|
+
before do
|
543
|
+
s3_plugin.register
|
544
|
+
end
|
545
|
+
|
546
|
+
context 'events come after cutoff time' do
|
547
|
+
it 'should be processed in next cycle' do
|
548
|
+
s3_objects = [
|
549
|
+
double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now.round - 2 * day, :content_length => 5, :storage_class => 'STANDARD'),
|
550
|
+
double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
|
551
|
+
double(:key => 'TODAY_BEFORE_CUTOFF', :last_modified => Time.now.round - cutoff, :content_length => 5, :storage_class => 'STANDARD'),
|
552
|
+
double(:key => 'TODAY', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD'),
|
553
|
+
double(:key => 'TODAY', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD')
|
554
|
+
]
|
555
|
+
size = s3_objects.length
|
556
|
+
|
557
|
+
allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { s3_objects }
|
558
|
+
allow_any_instance_of(Aws::S3::Bucket).to receive(:object).and_return(*s3_objects)
|
559
|
+
expect(s3_plugin).to receive(:process_log).at_least(size).and_call_original
|
560
|
+
expect(s3_plugin).to receive(:stop?).and_return(false).at_least(size)
|
561
|
+
expect(s3_plugin).to receive(:download_remote_file).and_return(true).at_least(size)
|
562
|
+
expect(s3_plugin).to receive(:process_local_log).and_return(true).at_least(size)
|
563
|
+
|
564
|
+
# first iteration
|
565
|
+
s3_plugin.process_files(queue)
|
566
|
+
|
567
|
+
# second iteration
|
568
|
+
sleep(cutoff + 1)
|
569
|
+
s3_plugin.process_files(queue)
|
570
|
+
end
|
571
|
+
end
|
572
|
+
|
573
|
+
context 's3 object updated after getting summary' do
|
574
|
+
it 'should not update sincedb' do
|
575
|
+
s3_summary = [
|
576
|
+
double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
|
577
|
+
double(:key => 'TODAY', :last_modified => Time.now.round - (cutoff * 10), :content_length => 5, :storage_class => 'STANDARD')
|
578
|
+
]
|
579
|
+
|
580
|
+
s3_objects = [
|
581
|
+
double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
|
582
|
+
double(:key => 'TODAY_UPDATED', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD')
|
583
|
+
]
|
584
|
+
|
585
|
+
size = s3_objects.length
|
586
|
+
|
587
|
+
allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { s3_summary }
|
588
|
+
allow_any_instance_of(Aws::S3::Bucket).to receive(:object).and_return(*s3_objects)
|
589
|
+
expect(s3_plugin).to receive(:process_log).at_least(size).and_call_original
|
590
|
+
expect(s3_plugin).to receive(:stop?).and_return(false).at_least(size)
|
591
|
+
expect(s3_plugin).to receive(:download_remote_file).and_return(true).at_least(size)
|
592
|
+
expect(s3_plugin).to receive(:process_local_log).and_return(true).at_least(size)
|
528
593
|
|
594
|
+
s3_plugin.process_files(queue)
|
595
|
+
expect(s3_plugin.send(:sincedb).read).to eq(s3_summary[0].last_modified)
|
596
|
+
end
|
597
|
+
end
|
529
598
|
end
|
530
599
|
end
|
data/spec/integration/s3_spec.rb
CHANGED
@@ -10,6 +10,7 @@ describe LogStash::Inputs::S3, :integration => true, :s3 => true do
|
|
10
10
|
|
11
11
|
upload_file('../fixtures/uncompressed.log' , "#{prefix}uncompressed_1.log")
|
12
12
|
upload_file('../fixtures/compressed.log.gz', "#{prefix}compressed_1.log.gz")
|
13
|
+
sleep(LogStash::Inputs::S3::CUTOFF_SECOND + 1)
|
13
14
|
end
|
14
15
|
|
15
16
|
after do
|
@@ -28,6 +29,7 @@ describe LogStash::Inputs::S3, :integration => true, :s3 => true do
|
|
28
29
|
"prefix" => prefix,
|
29
30
|
"temporary_directory" => temporary_directory } }
|
30
31
|
let(:backup_prefix) { "backup/" }
|
32
|
+
let(:backup_bucket) { "logstash-s3-input-backup" }
|
31
33
|
|
32
34
|
it "support prefix to scope the remote files" do
|
33
35
|
events = fetch_events(minimal_settings)
|
@@ -49,13 +51,17 @@ describe LogStash::Inputs::S3, :integration => true, :s3 => true do
|
|
49
51
|
end
|
50
52
|
|
51
53
|
context "remote backup" do
|
54
|
+
before do
|
55
|
+
create_bucket(backup_bucket)
|
56
|
+
end
|
57
|
+
|
52
58
|
it "another bucket" do
|
53
|
-
fetch_events(minimal_settings.merge({ "backup_to_bucket" =>
|
54
|
-
expect(list_remote_files("",
|
59
|
+
fetch_events(minimal_settings.merge({ "backup_to_bucket" => backup_bucket}))
|
60
|
+
expect(list_remote_files("", backup_bucket).size).to eq(2)
|
55
61
|
end
|
56
62
|
|
57
63
|
after do
|
58
|
-
delete_bucket(
|
64
|
+
delete_bucket(backup_bucket)
|
59
65
|
end
|
60
66
|
end
|
61
67
|
end
|
data/spec/support/helpers.rb
CHANGED
@@ -23,6 +23,10 @@ def list_remote_files(prefix, target_bucket = ENV['AWS_LOGSTASH_TEST_BUCKET'])
|
|
23
23
|
bucket.objects(:prefix => prefix).collect(&:key)
|
24
24
|
end
|
25
25
|
|
26
|
+
def create_bucket(name)
|
27
|
+
s3object.bucket(name).create
|
28
|
+
end
|
29
|
+
|
26
30
|
def delete_bucket(name)
|
27
31
|
s3object.bucket(name).objects.map(&:delete)
|
28
32
|
s3object.bucket(name).delete
|
@@ -33,13 +37,16 @@ def s3object
|
|
33
37
|
end
|
34
38
|
|
35
39
|
class TestInfiniteS3Object
|
40
|
+
def initialize(s3_obj)
|
41
|
+
@s3_obj = s3_obj
|
42
|
+
end
|
43
|
+
|
36
44
|
def each
|
37
45
|
counter = 1
|
38
46
|
|
39
47
|
loop do
|
40
|
-
yield
|
48
|
+
yield @s3_obj
|
41
49
|
counter +=1
|
42
50
|
end
|
43
51
|
end
|
44
|
-
end
|
45
|
-
|
52
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|