logstash-input-s3-cloudian 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,612 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/devutils/rspec/shared_examples"
4
+ require "logstash/inputs/s3"
5
+ require "logstash/codecs/multiline"
6
+ require "logstash/errors"
7
+ require "aws-sdk-resources"
8
+ require_relative "../support/helpers"
9
+ require "stud/temporary"
10
+ require "aws-sdk"
11
+ require "fileutils"
12
+ require 'logstash/plugin_mixins/ecs_compatibility_support/spec_helper'
13
+
14
+ describe LogStash::Inputs::S3 do
15
+ let(:temporary_directory) { Stud::Temporary.pathname }
16
+ let(:sincedb_path) { Stud::Temporary.pathname }
17
+ let(:day) { 3600 * 24 }
18
+ let(:creds) { Aws::Credentials.new('1234', 'secret') }
19
+ let(:config) {
20
+ {
21
+ "access_key_id" => "1234",
22
+ "secret_access_key" => "secret",
23
+ "bucket" => "logstash-test",
24
+ "temporary_directory" => temporary_directory,
25
+ "sincedb_path" => File.join(sincedb_path, ".sincedb")
26
+ }
27
+ }
28
+ let(:cutoff) { LogStash::Inputs::S3::CUTOFF_SECOND }
29
+
30
+
31
+ before do
32
+ FileUtils.mkdir_p(sincedb_path)
33
+ Aws.config[:stub_responses] = true
34
+ Thread.abort_on_exception = true
35
+ end
36
+
37
+ context "when interrupting the plugin" do
38
+ let(:config) { super().merge({ "interval" => 5 }) }
39
+ let(:s3_obj) { double(:key => "awesome-key", :last_modified => Time.now.round, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
40
+
41
+ before do
42
+ expect_any_instance_of(LogStash::Inputs::S3).to receive(:list_new_files).and_return(TestInfiniteS3Object.new(s3_obj))
43
+ end
44
+
45
+ it_behaves_like "an interruptible input plugin" do
46
+ let(:allowed_lag) { 16 } if LOGSTASH_VERSION.split('.').first.to_i <= 6
47
+ end
48
+ end
49
+
50
+ describe "#register" do
51
+ subject { LogStash::Inputs::S3.new(config) }
52
+
53
+ context "with temporary directory" do
54
+ let(:temporary_directory) { Stud::Temporary.pathname }
55
+
56
+ it "creates the direct when it doesn't exist" do
57
+ expect { subject.register }.to change { Dir.exist?(temporary_directory) }.from(false).to(true)
58
+ end
59
+ end
60
+ end
61
+
62
+ describe '#get_s3object' do
63
+ subject { LogStash::Inputs::S3.new(settings) }
64
+
65
+ context 'with modern access key options' do
66
+ let(:settings) {
67
+ {
68
+ "access_key_id" => "1234",
69
+ "secret_access_key" => "secret",
70
+ "proxy_uri" => "http://example.com",
71
+ "bucket" => "logstash-test",
72
+ }
73
+ }
74
+
75
+ it 'should instantiate AWS::S3 clients with a proxy set' do
76
+ expect(Aws::S3::Resource).to receive(:new).with({
77
+ :credentials => kind_of(Aws::Credentials),
78
+ :http_proxy => 'http://example.com',
79
+ :region => subject.region
80
+ })
81
+
82
+ subject.send(:get_s3object)
83
+ end
84
+ end
85
+
86
+ describe "additional_settings" do
87
+ context "supported settings" do
88
+ let(:settings) {
89
+ {
90
+ "additional_settings" => { "force_path_style" => 'true', "ssl_verify_peer" => 'false', "profile" => 'logstash' },
91
+ "bucket" => "logstash-test",
92
+ }
93
+ }
94
+
95
+ it 'should instantiate AWS::S3 clients with force_path_style set' do
96
+ expect(Aws::S3::Resource).to receive(:new).with({
97
+ :region => subject.region,
98
+ :force_path_style => true, :ssl_verify_peer => false, :profile => 'logstash'
99
+ }).and_call_original
100
+
101
+ subject.send(:get_s3object)
102
+ end
103
+ end
104
+
105
+ context 'when an unknown setting is given' do
106
+ let(:settings) {
107
+ {
108
+ "additional_settings" => { "this_setting_doesnt_exist" => true },
109
+ "bucket" => "logstash-test",
110
+ }
111
+ }
112
+
113
+ it 'should raise an error' do
114
+ expect { subject.send(:get_s3object) }.to raise_error(ArgumentError)
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+ describe "#list_new_files" do
121
+ before { allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list } }
122
+
123
+ let!(:present_object_after_cutoff) {double(:key => 'this-should-not-be-present', :last_modified => Time.now, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
124
+ let!(:present_object) {double(:key => 'this-should-be-present', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
125
+ let!(:archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => nil)) ) }
126
+ let!(:deep_archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => nil)) ) }
127
+ let!(:restored_object) {double(:key => 'this-should-be-restored-from-archive', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => 'ongoing-request="false", expiry-date="Thu, 01 Jan 2099 00:00:00 GMT"')) ) }
128
+ let!(:deep_restored_object) {double(:key => 'this-should-be-restored-from-deep-archive', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'DEEP_ARCHIVE', :object => double(:data => double(:restore => 'ongoing-request="false", expiry-date="Thu, 01 Jan 2099 00:00:00 GMT"')) ) }
129
+ let(:objects_list) {
130
+ [
131
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
132
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
133
+ archived_object,
134
+ restored_object,
135
+ deep_restored_object,
136
+ present_object,
137
+ present_object_after_cutoff
138
+ ]
139
+ }
140
+
141
+ it 'should allow user to exclude files from the s3 bucket' do
142
+ plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
143
+ plugin.register
144
+
145
+ files = plugin.list_new_files.map { |item| item.key }
146
+ expect(files).to include(present_object.key)
147
+ expect(files).to include(restored_object.key)
148
+ expect(files).to include(deep_restored_object.key)
149
+ expect(files).to_not include('exclude-this-file-1') # matches exclude pattern
150
+ expect(files).to_not include('exclude/logstash') # matches exclude pattern
151
+ expect(files).to_not include(archived_object.key) # archived
152
+ expect(files).to_not include(deep_archived_object.key) # archived
153
+ expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
154
+ expect(files.size).to eq(3)
155
+ end
156
+
157
+ it 'should support not providing a exclude pattern' do
158
+ plugin = LogStash::Inputs::S3.new(config)
159
+ plugin.register
160
+
161
+ files = plugin.list_new_files.map { |item| item.key }
162
+ expect(files).to include(present_object.key)
163
+ expect(files).to include(restored_object.key)
164
+ expect(files).to include(deep_restored_object.key)
165
+ expect(files).to include('exclude-this-file-1') # no exclude pattern given
166
+ expect(files).to include('exclude/logstash') # no exclude pattern given
167
+ expect(files).to_not include(archived_object.key) # archived
168
+ expect(files).to_not include(deep_archived_object.key) # archived
169
+ expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
170
+ expect(files.size).to eq(5)
171
+ end
172
+
173
+ context 'when all files are excluded from a bucket' do
174
+ let(:objects_list) {
175
+ [
176
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
177
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
178
+ ]
179
+ }
180
+
181
+ it 'should not log that no files were found in the bucket' do
182
+ plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
183
+ plugin.register
184
+ allow(plugin.logger).to receive(:debug).with(anything, anything)
185
+
186
+ expect(plugin.logger).not_to receive(:info).with(/No files found/, anything)
187
+ expect(plugin.logger).to receive(:debug).with(/Ignoring/, anything)
188
+ expect(plugin.list_new_files).to be_empty
189
+ end
190
+ end
191
+
192
+ context 'with an empty bucket' do
193
+ let(:objects_list) { [] }
194
+
195
+ it 'should log that no files were found in the bucket' do
196
+ plugin = LogStash::Inputs::S3.new(config)
197
+ plugin.register
198
+ allow(plugin.logger).to receive(:info).with(/Using the provided sincedb_path/, anything)
199
+ expect(plugin.logger).to receive(:info).with(/No files found/, anything)
200
+ expect(plugin.list_new_files).to be_empty
201
+ end
202
+ end
203
+
204
+ context "If the bucket is the same as the backup bucket" do
205
+ it 'should ignore files from the bucket if they match the backup prefix' do
206
+ objects_list = [
207
+ double(:key => 'mybackup-log-1', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
208
+ present_object
209
+ ]
210
+
211
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list }
212
+
213
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'mybackup',
214
+ 'backup_to_bucket' => config['bucket']}))
215
+ plugin.register
216
+
217
+ files = plugin.list_new_files.map { |item| item.key }
218
+ expect(files).to include(present_object.key)
219
+ expect(files).to_not include('mybackup-log-1') # matches backup prefix
220
+ expect(files.size).to eq(1)
221
+ end
222
+ end
223
+
224
+ it 'should ignore files older than X' do
225
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'exclude-this-file'}))
226
+
227
+
228
+ allow_any_instance_of(LogStash::Inputs::S3::SinceDB::File).to receive(:read).and_return(Time.now - day)
229
+ plugin.register
230
+
231
+ files = plugin.list_new_files.map { |item| item.key }
232
+ expect(files).to include(present_object.key)
233
+ expect(files).to include(restored_object.key)
234
+ expect(files).to include(deep_restored_object.key)
235
+ expect(files).to_not include('exclude-this-file-1') # too old
236
+ expect(files).to_not include('exclude/logstash') # too old
237
+ expect(files).to_not include(archived_object.key) # archived
238
+ expect(files).to_not include(deep_archived_object.key) # archived
239
+ expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
240
+ expect(files.size).to eq(3)
241
+ end
242
+
243
+ it 'should ignore file if the file match the prefix' do
244
+ prefix = 'mysource/'
245
+
246
+ objects_list = [
247
+ double(:key => prefix, :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
248
+ present_object
249
+ ]
250
+
251
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects).with(:prefix => prefix) { objects_list }
252
+
253
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'prefix' => prefix }))
254
+ plugin.register
255
+ expect(plugin.list_new_files.map { |item| item.key }).to eq([present_object.key])
256
+ end
257
+
258
+ it 'should sort return object sorted by last_modification date with older first' do
259
+ objects = [
260
+ double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5, :storage_class => 'STANDARD'),
261
+ double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
262
+ double(:key => 'TODAY_BEFORE_CUTOFF', :last_modified => Time.now - cutoff, :content_length => 5, :storage_class => 'STANDARD'),
263
+ double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD')
264
+ ]
265
+
266
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
267
+
268
+
269
+ plugin = LogStash::Inputs::S3.new(config)
270
+ plugin.register
271
+ expect(plugin.list_new_files.map { |item| item.key }).to eq(['TWO_DAYS_AGO', 'YESTERDAY', 'TODAY_BEFORE_CUTOFF'])
272
+ end
273
+
274
+ describe "when doing backup on the s3" do
275
+ it 'should copy to another s3 bucket when keeping the original file' do
276
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup"}))
277
+ plugin.register
278
+
279
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
280
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
281
+ expect(s3object).to_not receive(:delete)
282
+
283
+ plugin.backup_to_bucket(s3object)
284
+ end
285
+
286
+ it 'should copy to another s3 bucket when deleting the original file' do
287
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup", "delete" => true }))
288
+ plugin.register
289
+
290
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
291
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
292
+ expect(s3object).to receive(:delete)
293
+
294
+ plugin.backup_to_bucket(s3object)
295
+ end
296
+
297
+ it 'should add the specified prefix to the backup file' do
298
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup",
299
+ "backup_add_prefix" => 'backup-' }))
300
+ plugin.register
301
+
302
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
303
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
304
+ expect(s3object).to_not receive(:delete)
305
+
306
+ plugin.backup_to_bucket(s3object)
307
+ end
308
+ end
309
+
310
+ it 'should support doing local backup of files' do
311
+ Stud::Temporary.directory do |backup_dir|
312
+ Stud::Temporary.file do |source_file|
313
+ backup_file = File.join(backup_dir.to_s, Pathname.new(source_file.path).basename.to_s)
314
+
315
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_dir" => backup_dir }))
316
+
317
+ plugin.backup_to_dir(source_file)
318
+
319
+ expect(File.exists?(backup_file)).to eq(true)
320
+ end
321
+ end
322
+ end
323
+ end
324
+
325
+ shared_examples "generated events" do
326
+ let(:events_to_process) { 2 }
327
+
328
+ it 'should process events' do
329
+ events = fetch_events(config)
330
+ expect(events.size).to eq(events_to_process)
331
+ expect(events[0].get("[@metadata][s3][key]")).to eql log.key
332
+ expect(events[1].get("[@metadata][s3][key]")).to eql log.key
333
+ end
334
+
335
+ it "deletes the temporary file" do
336
+ events = fetch_events(config)
337
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(0)
338
+ end
339
+ end
340
+
341
+ context 'while communicating with s3' do
342
+ let(:config) {
343
+ {
344
+ "access_key_id" => "1234",
345
+ "secret_access_key" => "secret",
346
+ "bucket" => "logstash-test",
347
+ "codec" => "json",
348
+ }
349
+ }
350
+ %w(AccessDenied NotFound).each do |error|
351
+ context "while listing bucket contents, #{error} is returned" do
352
+ before do
353
+ Aws.config[:s3] = {
354
+ stub_responses: {
355
+ list_objects: error
356
+ }
357
+ }
358
+ end
359
+
360
+ it 'should not crash the plugin' do
361
+ events = fetch_events(config)
362
+ expect(events.size).to eq(0)
363
+ end
364
+ end
365
+ end
366
+
367
+ %w(AccessDenied NoSuchKey).each do |error|
368
+ context "when retrieving an object, #{error} is returned" do
369
+ let(:objects) { [log] }
370
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
371
+
372
+ let(:config) {
373
+ {
374
+ "access_key_id" => "1234",
375
+ "secret_access_key" => "secret",
376
+ "bucket" => "logstash-test",
377
+ "codec" => "json",
378
+ }
379
+ }
380
+ before do
381
+ Aws.config[:s3] = {
382
+ stub_responses: {
383
+ get_object: error
384
+ }
385
+ }
386
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
387
+ end
388
+
389
+ it 'should not crash the plugin' do
390
+ events = fetch_events(config)
391
+ expect(events.size).to eq(0)
392
+ end
393
+ end
394
+ end
395
+ end
396
+
397
+ context 'when working with logs' do
398
+ let(:objects) { [log] }
399
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :data => { "etag" => 'c2c966251da0bc3229d12c2642ba50a4' }, :storage_class => 'STANDARD') }
400
+ let(:data) { File.read(log_file) }
401
+
402
+ before do
403
+ Aws.config[:s3] = {
404
+ stub_responses: {
405
+ get_object: { body: data }
406
+ }
407
+ }
408
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
409
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).with(log.key) { log }
410
+ expect(log).to receive(:get).with(instance_of(Hash)) do |arg|
411
+ File.open(arg[:response_target], 'wb') { |s3file| s3file.write(data) }
412
+ end
413
+ end
414
+
415
+ context "when event doesn't have a `message` field" do
416
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'json.log') }
417
+ let(:config) {
418
+ {
419
+ "access_key_id" => "1234",
420
+ "secret_access_key" => "secret",
421
+ "bucket" => "logstash-test",
422
+ "codec" => "json",
423
+ }
424
+ }
425
+
426
+ include_examples "generated events"
427
+ end
428
+
429
+ context "when event does have a `message` field" do
430
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'json_with_message.log') }
431
+ let(:config) {
432
+ {
433
+ "access_key_id" => "1234",
434
+ "secret_access_key" => "secret",
435
+ "bucket" => "logstash-test",
436
+ "codec" => "json",
437
+ }
438
+ }
439
+
440
+ include_examples "generated events"
441
+ end
442
+
443
+ context "multiple compressed streams" do
444
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
445
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiple_compressed_streams.gz') }
446
+
447
+ include_examples "generated events" do
448
+ let(:events_to_process) { 16 }
449
+ end
450
+ end
451
+
452
+ context 'compressed' do
453
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
454
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gz') }
455
+
456
+ include_examples "generated events"
457
+ end
458
+
459
+ context 'compressed with gzip extension and using default gzip_pattern option' do
460
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
461
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gzip') }
462
+
463
+ include_examples "generated events"
464
+ end
465
+
466
+ context 'compressed with gzip extension and using custom gzip_pattern option' do
467
+ let(:config) { super().merge({ "gzip_pattern" => "gee.zip$" }) }
468
+ let(:log) { double(:key => 'log.gee.zip', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
469
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gee.zip') }
470
+ include_examples "generated events"
471
+ end
472
+
473
+ context 'plain text' do
474
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
475
+
476
+ include_examples "generated events"
477
+ end
478
+
479
+ context 'multi-line' do
480
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiline.log') }
481
+ let(:config) {
482
+ {
483
+ "access_key_id" => "1234",
484
+ "secret_access_key" => "secret",
485
+ "bucket" => "logstash-test",
486
+ "codec" => LogStash::Codecs::Multiline.new( {"pattern" => "__SEPARATOR__", "negate" => "true", "what" => "previous"})
487
+ }
488
+ }
489
+
490
+ include_examples "generated events"
491
+ end
492
+
493
+ context 'encoded' do
494
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'invalid_utf8.gbk.log') }
495
+
496
+ include_examples "generated events"
497
+ end
498
+
499
+ context 'cloudfront' do
500
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'cloudfront.log') }
501
+
502
+ describe "metadata", :ecs_compatibility_support, :aggregate_failures do
503
+ ecs_compatibility_matrix(:disabled, :v1) do |ecs_select|
504
+ before(:each) do
505
+ allow_any_instance_of(described_class).to receive(:ecs_compatibility).and_return(ecs_compatibility)
506
+ end
507
+
508
+ it 'should extract metadata from cloudfront log' do
509
+ events = fetch_events(config)
510
+
511
+ events.each do |event|
512
+ expect(event.get ecs_select[disabled: "cloudfront_fields", v1: "[@metadata][s3][cloudfront][fields]"] ).to eq('date time x-edge-location c-ip x-event sc-bytes x-cf-status x-cf-client-id cs-uri-stem cs-uri-query c-referrer x-page-url​ c-user-agent x-sname x-sname-query x-file-ext x-sid')
513
+ expect(event.get ecs_select[disabled: "cloudfront_version", v1: "[@metadata][s3][cloudfront][version]"] ).to eq('1.0')
514
+ end
515
+ end
516
+ end
517
+ end
518
+
519
+ include_examples "generated events"
520
+ end
521
+
522
+ context 'when include_object_properties is set to true' do
523
+ let(:config) { super().merge({ "include_object_properties" => true }) }
524
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
525
+
526
+ it 'should extract object properties onto [@metadata][s3]' do
527
+ events = fetch_events(config)
528
+ events.each do |event|
529
+ expect(event.get('[@metadata][s3]')).to include(log.data)
530
+ end
531
+ end
532
+
533
+ include_examples "generated events"
534
+ end
535
+
536
+ context 'when include_object_properties is set to false' do
537
+ let(:config) { super().merge({ "include_object_properties" => false }) }
538
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
539
+
540
+ it 'should NOT extract object properties onto [@metadata][s3]' do
541
+ events = fetch_events(config)
542
+ events.each do |event|
543
+ expect(event.get('[@metadata][s3]')).to_not include(log.data)
544
+ end
545
+ end
546
+
547
+ include_examples "generated events"
548
+ end
549
+ end
550
+
551
+ describe "data loss" do
552
+ let(:s3_plugin) { LogStash::Inputs::S3.new(config) }
553
+ let(:queue) { [] }
554
+
555
+ before do
556
+ s3_plugin.register
557
+ end
558
+
559
+ context 'events come after cutoff time' do
560
+ it 'should be processed in next cycle' do
561
+ s3_objects = [
562
+ double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now.round - 2 * day, :content_length => 5, :storage_class => 'STANDARD'),
563
+ double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
564
+ double(:key => 'TODAY_BEFORE_CUTOFF', :last_modified => Time.now.round - cutoff, :content_length => 5, :storage_class => 'STANDARD'),
565
+ double(:key => 'TODAY', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD'),
566
+ double(:key => 'TODAY', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD')
567
+ ]
568
+ size = s3_objects.length
569
+
570
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { s3_objects }
571
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).and_return(*s3_objects)
572
+ expect(s3_plugin).to receive(:process_log).at_least(size).and_call_original
573
+ expect(s3_plugin).to receive(:stop?).and_return(false).at_least(size)
574
+ expect(s3_plugin).to receive(:download_remote_file).and_return(true).at_least(size)
575
+ expect(s3_plugin).to receive(:process_local_log).and_return(true).at_least(size)
576
+
577
+ # first iteration
578
+ s3_plugin.process_files(queue)
579
+
580
+ # second iteration
581
+ sleep(cutoff + 1)
582
+ s3_plugin.process_files(queue)
583
+ end
584
+ end
585
+
586
+ context 's3 object updated after getting summary' do
587
+ it 'should not update sincedb' do
588
+ s3_summary = [
589
+ double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
590
+ double(:key => 'TODAY', :last_modified => Time.now.round - (cutoff * 10), :content_length => 5, :storage_class => 'STANDARD')
591
+ ]
592
+
593
+ s3_objects = [
594
+ double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
595
+ double(:key => 'TODAY_UPDATED', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD')
596
+ ]
597
+
598
+ size = s3_objects.length
599
+
600
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { s3_summary }
601
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).and_return(*s3_objects)
602
+ expect(s3_plugin).to receive(:process_log).at_least(size).and_call_original
603
+ expect(s3_plugin).to receive(:stop?).and_return(false).at_least(size)
604
+ expect(s3_plugin).to receive(:download_remote_file).and_return(true).at_least(size)
605
+ expect(s3_plugin).to receive(:process_local_log).and_return(true).at_least(size)
606
+
607
+ s3_plugin.process_files(queue)
608
+ expect(s3_plugin.send(:sincedb).read).to eq(s3_summary[0].last_modified)
609
+ end
610
+ end
611
+ end
612
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/s3"
4
+ require "stud/temporary"
5
+ require "fileutils"
6
+
7
+ describe LogStash::Inputs::S3::SinceDB::File do
8
+ let(:file) { Stud::Temporary.file.path }
9
+ subject { LogStash::Inputs::S3::SinceDB::File.new(file) }
10
+ before do
11
+ FileUtils.touch(file)
12
+ end
13
+
14
+ it "doesnt raise an exception if the file is empty" do
15
+ expect { subject.read }.not_to raise_error
16
+ end
17
+ end