logstash-input-s3-cloudian 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,612 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/devutils/rspec/shared_examples"
4
+ require "logstash/inputs/s3"
5
+ require "logstash/codecs/multiline"
6
+ require "logstash/errors"
7
+ require "aws-sdk-resources"
8
+ require_relative "../support/helpers"
9
+ require "stud/temporary"
10
+ require "aws-sdk"
11
+ require "fileutils"
12
+ require 'logstash/plugin_mixins/ecs_compatibility_support/spec_helper'
13
+
14
+ describe LogStash::Inputs::S3 do
15
+ let(:temporary_directory) { Stud::Temporary.pathname }
16
+ let(:sincedb_path) { Stud::Temporary.pathname }
17
+ let(:day) { 3600 * 24 }
18
+ let(:creds) { Aws::Credentials.new('1234', 'secret') }
19
+ let(:config) {
20
+ {
21
+ "access_key_id" => "1234",
22
+ "secret_access_key" => "secret",
23
+ "bucket" => "logstash-test",
24
+ "temporary_directory" => temporary_directory,
25
+ "sincedb_path" => File.join(sincedb_path, ".sincedb")
26
+ }
27
+ }
28
+ let(:cutoff) { LogStash::Inputs::S3::CUTOFF_SECOND }
29
+
30
+
31
+ before do
32
+ FileUtils.mkdir_p(sincedb_path)
33
+ Aws.config[:stub_responses] = true
34
+ Thread.abort_on_exception = true
35
+ end
36
+
37
+ context "when interrupting the plugin" do
38
+ let(:config) { super().merge({ "interval" => 5 }) }
39
+ let(:s3_obj) { double(:key => "awesome-key", :last_modified => Time.now.round, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
40
+
41
+ before do
42
+ expect_any_instance_of(LogStash::Inputs::S3).to receive(:list_new_files).and_return(TestInfiniteS3Object.new(s3_obj))
43
+ end
44
+
45
+ it_behaves_like "an interruptible input plugin" do
46
+ let(:allowed_lag) { 16 } if LOGSTASH_VERSION.split('.').first.to_i <= 6
47
+ end
48
+ end
49
+
50
+ describe "#register" do
51
+ subject { LogStash::Inputs::S3.new(config) }
52
+
53
+ context "with temporary directory" do
54
+ let(:temporary_directory) { Stud::Temporary.pathname }
55
+
56
+ it "creates the direct when it doesn't exist" do
57
+ expect { subject.register }.to change { Dir.exist?(temporary_directory) }.from(false).to(true)
58
+ end
59
+ end
60
+ end
61
+
62
+ describe '#get_s3object' do
63
+ subject { LogStash::Inputs::S3.new(settings) }
64
+
65
+ context 'with modern access key options' do
66
+ let(:settings) {
67
+ {
68
+ "access_key_id" => "1234",
69
+ "secret_access_key" => "secret",
70
+ "proxy_uri" => "http://example.com",
71
+ "bucket" => "logstash-test",
72
+ }
73
+ }
74
+
75
+ it 'should instantiate AWS::S3 clients with a proxy set' do
76
+ expect(Aws::S3::Resource).to receive(:new).with({
77
+ :credentials => kind_of(Aws::Credentials),
78
+ :http_proxy => 'http://example.com',
79
+ :region => subject.region
80
+ })
81
+
82
+ subject.send(:get_s3object)
83
+ end
84
+ end
85
+
86
+ describe "additional_settings" do
87
+ context "supported settings" do
88
+ let(:settings) {
89
+ {
90
+ "additional_settings" => { "force_path_style" => 'true', "ssl_verify_peer" => 'false', "profile" => 'logstash' },
91
+ "bucket" => "logstash-test",
92
+ }
93
+ }
94
+
95
+ it 'should instantiate AWS::S3 clients with force_path_style set' do
96
+ expect(Aws::S3::Resource).to receive(:new).with({
97
+ :region => subject.region,
98
+ :force_path_style => true, :ssl_verify_peer => false, :profile => 'logstash'
99
+ }).and_call_original
100
+
101
+ subject.send(:get_s3object)
102
+ end
103
+ end
104
+
105
+ context 'when an unknown setting is given' do
106
+ let(:settings) {
107
+ {
108
+ "additional_settings" => { "this_setting_doesnt_exist" => true },
109
+ "bucket" => "logstash-test",
110
+ }
111
+ }
112
+
113
+ it 'should raise an error' do
114
+ expect { subject.send(:get_s3object) }.to raise_error(ArgumentError)
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+ describe "#list_new_files" do
121
+ before { allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list } }
122
+
123
+ let!(:present_object_after_cutoff) {double(:key => 'this-should-not-be-present', :last_modified => Time.now, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
124
+ let!(:present_object) {double(:key => 'this-should-be-present', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
125
+ let!(:archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => nil)) ) }
126
+ let!(:deep_archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => nil)) ) }
127
+ let!(:restored_object) {double(:key => 'this-should-be-restored-from-archive', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => 'ongoing-request="false", expiry-date="Thu, 01 Jan 2099 00:00:00 GMT"')) ) }
128
+ let!(:deep_restored_object) {double(:key => 'this-should-be-restored-from-deep-archive', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'DEEP_ARCHIVE', :object => double(:data => double(:restore => 'ongoing-request="false", expiry-date="Thu, 01 Jan 2099 00:00:00 GMT"')) ) }
129
+ let(:objects_list) {
130
+ [
131
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
132
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
133
+ archived_object,
134
+ restored_object,
135
+ deep_restored_object,
136
+ present_object,
137
+ present_object_after_cutoff
138
+ ]
139
+ }
140
+
141
+ it 'should allow user to exclude files from the s3 bucket' do
142
+ plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
143
+ plugin.register
144
+
145
+ files = plugin.list_new_files.map { |item| item.key }
146
+ expect(files).to include(present_object.key)
147
+ expect(files).to include(restored_object.key)
148
+ expect(files).to include(deep_restored_object.key)
149
+ expect(files).to_not include('exclude-this-file-1') # matches exclude pattern
150
+ expect(files).to_not include('exclude/logstash') # matches exclude pattern
151
+ expect(files).to_not include(archived_object.key) # archived
152
+ expect(files).to_not include(deep_archived_object.key) # archived
153
+ expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
154
+ expect(files.size).to eq(3)
155
+ end
156
+
157
+ it 'should support not providing a exclude pattern' do
158
+ plugin = LogStash::Inputs::S3.new(config)
159
+ plugin.register
160
+
161
+ files = plugin.list_new_files.map { |item| item.key }
162
+ expect(files).to include(present_object.key)
163
+ expect(files).to include(restored_object.key)
164
+ expect(files).to include(deep_restored_object.key)
165
+ expect(files).to include('exclude-this-file-1') # no exclude pattern given
166
+ expect(files).to include('exclude/logstash') # no exclude pattern given
167
+ expect(files).to_not include(archived_object.key) # archived
168
+ expect(files).to_not include(deep_archived_object.key) # archived
169
+ expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
170
+ expect(files.size).to eq(5)
171
+ end
172
+
173
+ context 'when all files are excluded from a bucket' do
174
+ let(:objects_list) {
175
+ [
176
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
177
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
178
+ ]
179
+ }
180
+
181
+ it 'should not log that no files were found in the bucket' do
182
+ plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
183
+ plugin.register
184
+ allow(plugin.logger).to receive(:debug).with(anything, anything)
185
+
186
+ expect(plugin.logger).not_to receive(:info).with(/No files found/, anything)
187
+ expect(plugin.logger).to receive(:debug).with(/Ignoring/, anything)
188
+ expect(plugin.list_new_files).to be_empty
189
+ end
190
+ end
191
+
192
+ context 'with an empty bucket' do
193
+ let(:objects_list) { [] }
194
+
195
+ it 'should log that no files were found in the bucket' do
196
+ plugin = LogStash::Inputs::S3.new(config)
197
+ plugin.register
198
+ allow(plugin.logger).to receive(:info).with(/Using the provided sincedb_path/, anything)
199
+ expect(plugin.logger).to receive(:info).with(/No files found/, anything)
200
+ expect(plugin.list_new_files).to be_empty
201
+ end
202
+ end
203
+
204
+ context "If the bucket is the same as the backup bucket" do
205
+ it 'should ignore files from the bucket if they match the backup prefix' do
206
+ objects_list = [
207
+ double(:key => 'mybackup-log-1', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
208
+ present_object
209
+ ]
210
+
211
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list }
212
+
213
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'mybackup',
214
+ 'backup_to_bucket' => config['bucket']}))
215
+ plugin.register
216
+
217
+ files = plugin.list_new_files.map { |item| item.key }
218
+ expect(files).to include(present_object.key)
219
+ expect(files).to_not include('mybackup-log-1') # matches backup prefix
220
+ expect(files.size).to eq(1)
221
+ end
222
+ end
223
+
224
+ it 'should ignore files older than X' do
225
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'exclude-this-file'}))
226
+
227
+
228
+ allow_any_instance_of(LogStash::Inputs::S3::SinceDB::File).to receive(:read).and_return(Time.now - day)
229
+ plugin.register
230
+
231
+ files = plugin.list_new_files.map { |item| item.key }
232
+ expect(files).to include(present_object.key)
233
+ expect(files).to include(restored_object.key)
234
+ expect(files).to include(deep_restored_object.key)
235
+ expect(files).to_not include('exclude-this-file-1') # too old
236
+ expect(files).to_not include('exclude/logstash') # too old
237
+ expect(files).to_not include(archived_object.key) # archived
238
+ expect(files).to_not include(deep_archived_object.key) # archived
239
+ expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
240
+ expect(files.size).to eq(3)
241
+ end
242
+
243
+ it 'should ignore file if the file match the prefix' do
244
+ prefix = 'mysource/'
245
+
246
+ objects_list = [
247
+ double(:key => prefix, :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
248
+ present_object
249
+ ]
250
+
251
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects).with(:prefix => prefix) { objects_list }
252
+
253
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'prefix' => prefix }))
254
+ plugin.register
255
+ expect(plugin.list_new_files.map { |item| item.key }).to eq([present_object.key])
256
+ end
257
+
258
+ it 'should sort return object sorted by last_modification date with older first' do
259
+ objects = [
260
+ double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5, :storage_class => 'STANDARD'),
261
+ double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
262
+ double(:key => 'TODAY_BEFORE_CUTOFF', :last_modified => Time.now - cutoff, :content_length => 5, :storage_class => 'STANDARD'),
263
+ double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD')
264
+ ]
265
+
266
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
267
+
268
+
269
+ plugin = LogStash::Inputs::S3.new(config)
270
+ plugin.register
271
+ expect(plugin.list_new_files.map { |item| item.key }).to eq(['TWO_DAYS_AGO', 'YESTERDAY', 'TODAY_BEFORE_CUTOFF'])
272
+ end
273
+
274
+ describe "when doing backup on the s3" do
275
+ it 'should copy to another s3 bucket when keeping the original file' do
276
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup"}))
277
+ plugin.register
278
+
279
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
280
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
281
+ expect(s3object).to_not receive(:delete)
282
+
283
+ plugin.backup_to_bucket(s3object)
284
+ end
285
+
286
+ it 'should copy to another s3 bucket when deleting the original file' do
287
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup", "delete" => true }))
288
+ plugin.register
289
+
290
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
291
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
292
+ expect(s3object).to receive(:delete)
293
+
294
+ plugin.backup_to_bucket(s3object)
295
+ end
296
+
297
+ it 'should add the specified prefix to the backup file' do
298
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup",
299
+ "backup_add_prefix" => 'backup-' }))
300
+ plugin.register
301
+
302
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
303
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
304
+ expect(s3object).to_not receive(:delete)
305
+
306
+ plugin.backup_to_bucket(s3object)
307
+ end
308
+ end
309
+
310
+ it 'should support doing local backup of files' do
311
+ Stud::Temporary.directory do |backup_dir|
312
+ Stud::Temporary.file do |source_file|
313
+ backup_file = File.join(backup_dir.to_s, Pathname.new(source_file.path).basename.to_s)
314
+
315
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_dir" => backup_dir }))
316
+
317
+ plugin.backup_to_dir(source_file)
318
+
319
+ expect(File.exists?(backup_file)).to eq(true)
320
+ end
321
+ end
322
+ end
323
+ end
324
+
325
+ shared_examples "generated events" do
326
+ let(:events_to_process) { 2 }
327
+
328
+ it 'should process events' do
329
+ events = fetch_events(config)
330
+ expect(events.size).to eq(events_to_process)
331
+ expect(events[0].get("[@metadata][s3][key]")).to eql log.key
332
+ expect(events[1].get("[@metadata][s3][key]")).to eql log.key
333
+ end
334
+
335
+ it "deletes the temporary file" do
336
+ events = fetch_events(config)
337
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(0)
338
+ end
339
+ end
340
+
341
+ context 'while communicating with s3' do
342
+ let(:config) {
343
+ {
344
+ "access_key_id" => "1234",
345
+ "secret_access_key" => "secret",
346
+ "bucket" => "logstash-test",
347
+ "codec" => "json",
348
+ }
349
+ }
350
+ %w(AccessDenied NotFound).each do |error|
351
+ context "while listing bucket contents, #{error} is returned" do
352
+ before do
353
+ Aws.config[:s3] = {
354
+ stub_responses: {
355
+ list_objects: error
356
+ }
357
+ }
358
+ end
359
+
360
+ it 'should not crash the plugin' do
361
+ events = fetch_events(config)
362
+ expect(events.size).to eq(0)
363
+ end
364
+ end
365
+ end
366
+
367
+ %w(AccessDenied NoSuchKey).each do |error|
368
+ context "when retrieving an object, #{error} is returned" do
369
+ let(:objects) { [log] }
370
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
371
+
372
+ let(:config) {
373
+ {
374
+ "access_key_id" => "1234",
375
+ "secret_access_key" => "secret",
376
+ "bucket" => "logstash-test",
377
+ "codec" => "json",
378
+ }
379
+ }
380
+ before do
381
+ Aws.config[:s3] = {
382
+ stub_responses: {
383
+ get_object: error
384
+ }
385
+ }
386
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
387
+ end
388
+
389
+ it 'should not crash the plugin' do
390
+ events = fetch_events(config)
391
+ expect(events.size).to eq(0)
392
+ end
393
+ end
394
+ end
395
+ end
396
+
397
+ context 'when working with logs' do
398
+ let(:objects) { [log] }
399
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :data => { "etag" => 'c2c966251da0bc3229d12c2642ba50a4' }, :storage_class => 'STANDARD') }
400
+ let(:data) { File.read(log_file) }
401
+
402
+ before do
403
+ Aws.config[:s3] = {
404
+ stub_responses: {
405
+ get_object: { body: data }
406
+ }
407
+ }
408
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
409
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).with(log.key) { log }
410
+ expect(log).to receive(:get).with(instance_of(Hash)) do |arg|
411
+ File.open(arg[:response_target], 'wb') { |s3file| s3file.write(data) }
412
+ end
413
+ end
414
+
415
+ context "when event doesn't have a `message` field" do
416
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'json.log') }
417
+ let(:config) {
418
+ {
419
+ "access_key_id" => "1234",
420
+ "secret_access_key" => "secret",
421
+ "bucket" => "logstash-test",
422
+ "codec" => "json",
423
+ }
424
+ }
425
+
426
+ include_examples "generated events"
427
+ end
428
+
429
+ context "when event does have a `message` field" do
430
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'json_with_message.log') }
431
+ let(:config) {
432
+ {
433
+ "access_key_id" => "1234",
434
+ "secret_access_key" => "secret",
435
+ "bucket" => "logstash-test",
436
+ "codec" => "json",
437
+ }
438
+ }
439
+
440
+ include_examples "generated events"
441
+ end
442
+
443
+ context "multiple compressed streams" do
444
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
445
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiple_compressed_streams.gz') }
446
+
447
+ include_examples "generated events" do
448
+ let(:events_to_process) { 16 }
449
+ end
450
+ end
451
+
452
+ context 'compressed' do
453
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
454
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gz') }
455
+
456
+ include_examples "generated events"
457
+ end
458
+
459
+ context 'compressed with gzip extension and using default gzip_pattern option' do
460
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
461
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gzip') }
462
+
463
+ include_examples "generated events"
464
+ end
465
+
466
+ context 'compressed with gzip extension and using custom gzip_pattern option' do
467
+ let(:config) { super().merge({ "gzip_pattern" => "gee.zip$" }) }
468
+ let(:log) { double(:key => 'log.gee.zip', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
469
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gee.zip') }
470
+ include_examples "generated events"
471
+ end
472
+
473
+ context 'plain text' do
474
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
475
+
476
+ include_examples "generated events"
477
+ end
478
+
479
+ context 'multi-line' do
480
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiline.log') }
481
+ let(:config) {
482
+ {
483
+ "access_key_id" => "1234",
484
+ "secret_access_key" => "secret",
485
+ "bucket" => "logstash-test",
486
+ "codec" => LogStash::Codecs::Multiline.new( {"pattern" => "__SEPARATOR__", "negate" => "true", "what" => "previous"})
487
+ }
488
+ }
489
+
490
+ include_examples "generated events"
491
+ end
492
+
493
+ context 'encoded' do
494
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'invalid_utf8.gbk.log') }
495
+
496
+ include_examples "generated events"
497
+ end
498
+
499
+ context 'cloudfront' do
500
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'cloudfront.log') }
501
+
502
+ describe "metadata", :ecs_compatibility_support, :aggregate_failures do
503
+ ecs_compatibility_matrix(:disabled, :v1) do |ecs_select|
504
+ before(:each) do
505
+ allow_any_instance_of(described_class).to receive(:ecs_compatibility).and_return(ecs_compatibility)
506
+ end
507
+
508
+ it 'should extract metadata from cloudfront log' do
509
+ events = fetch_events(config)
510
+
511
+ events.each do |event|
512
+ expect(event.get ecs_select[disabled: "cloudfront_fields", v1: "[@metadata][s3][cloudfront][fields]"] ).to eq('date time x-edge-location c-ip x-event sc-bytes x-cf-status x-cf-client-id cs-uri-stem cs-uri-query c-referrer x-page-url​ c-user-agent x-sname x-sname-query x-file-ext x-sid')
513
+ expect(event.get ecs_select[disabled: "cloudfront_version", v1: "[@metadata][s3][cloudfront][version]"] ).to eq('1.0')
514
+ end
515
+ end
516
+ end
517
+ end
518
+
519
+ include_examples "generated events"
520
+ end
521
+
522
+ context 'when include_object_properties is set to true' do
523
+ let(:config) { super().merge({ "include_object_properties" => true }) }
524
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
525
+
526
+ it 'should extract object properties onto [@metadata][s3]' do
527
+ events = fetch_events(config)
528
+ events.each do |event|
529
+ expect(event.get('[@metadata][s3]')).to include(log.data)
530
+ end
531
+ end
532
+
533
+ include_examples "generated events"
534
+ end
535
+
536
+ context 'when include_object_properties is set to false' do
537
+ let(:config) { super().merge({ "include_object_properties" => false }) }
538
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
539
+
540
+ it 'should NOT extract object properties onto [@metadata][s3]' do
541
+ events = fetch_events(config)
542
+ events.each do |event|
543
+ expect(event.get('[@metadata][s3]')).to_not include(log.data)
544
+ end
545
+ end
546
+
547
+ include_examples "generated events"
548
+ end
549
+ end
550
+
551
+ describe "data loss" do
552
+ let(:s3_plugin) { LogStash::Inputs::S3.new(config) }
553
+ let(:queue) { [] }
554
+
555
+ before do
556
+ s3_plugin.register
557
+ end
558
+
559
+ context 'events come after cutoff time' do
560
+ it 'should be processed in next cycle' do
561
+ s3_objects = [
562
+ double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now.round - 2 * day, :content_length => 5, :storage_class => 'STANDARD'),
563
+ double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
564
+ double(:key => 'TODAY_BEFORE_CUTOFF', :last_modified => Time.now.round - cutoff, :content_length => 5, :storage_class => 'STANDARD'),
565
+ double(:key => 'TODAY', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD'),
566
+ double(:key => 'TODAY', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD')
567
+ ]
568
+ size = s3_objects.length
569
+
570
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { s3_objects }
571
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).and_return(*s3_objects)
572
+ expect(s3_plugin).to receive(:process_log).at_least(size).and_call_original
573
+ expect(s3_plugin).to receive(:stop?).and_return(false).at_least(size)
574
+ expect(s3_plugin).to receive(:download_remote_file).and_return(true).at_least(size)
575
+ expect(s3_plugin).to receive(:process_local_log).and_return(true).at_least(size)
576
+
577
+ # first iteration
578
+ s3_plugin.process_files(queue)
579
+
580
+ # second iteration
581
+ sleep(cutoff + 1)
582
+ s3_plugin.process_files(queue)
583
+ end
584
+ end
585
+
586
+ context 's3 object updated after getting summary' do
587
+ it 'should not update sincedb' do
588
+ s3_summary = [
589
+ double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
590
+ double(:key => 'TODAY', :last_modified => Time.now.round - (cutoff * 10), :content_length => 5, :storage_class => 'STANDARD')
591
+ ]
592
+
593
+ s3_objects = [
594
+ double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
595
+ double(:key => 'TODAY_UPDATED', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD')
596
+ ]
597
+
598
+ size = s3_objects.length
599
+
600
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { s3_summary }
601
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).and_return(*s3_objects)
602
+ expect(s3_plugin).to receive(:process_log).at_least(size).and_call_original
603
+ expect(s3_plugin).to receive(:stop?).and_return(false).at_least(size)
604
+ expect(s3_plugin).to receive(:download_remote_file).and_return(true).at_least(size)
605
+ expect(s3_plugin).to receive(:process_local_log).and_return(true).at_least(size)
606
+
607
+ s3_plugin.process_files(queue)
608
+ expect(s3_plugin.send(:sincedb).read).to eq(s3_summary[0].last_modified)
609
+ end
610
+ end
611
+ end
612
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/s3"
4
+ require "stud/temporary"
5
+ require "fileutils"
6
+
7
+ describe LogStash::Inputs::S3::SinceDB::File do
8
+ let(:file) { Stud::Temporary.file.path }
9
+ subject { LogStash::Inputs::S3::SinceDB::File.new(file) }
10
+ before do
11
+ FileUtils.touch(file)
12
+ end
13
+
14
+ it "doesnt raise an exception if the file is empty" do
15
+ expect { subject.read }.not_to raise_error
16
+ end
17
+ end