logstash-input-s3-local 3.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,455 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/s3"
4
+ require "logstash/codecs/multiline"
5
+ require "logstash/errors"
6
+ require "aws-sdk-resources"
7
+ require_relative "../support/helpers"
8
+ require "stud/temporary"
9
+ require "aws-sdk"
10
+ require "fileutils"
11
+
12
+ describe LogStash::Inputs::S3 do
13
+ let(:temporary_directory) { Stud::Temporary.pathname }
14
+ let(:sincedb_path) { Stud::Temporary.pathname }
15
+ let(:day) { 3600 * 24 }
16
+ let(:creds) { Aws::Credentials.new('1234', 'secret') }
17
+ let(:config) {
18
+ {
19
+ "access_key_id" => "1234",
20
+ "secret_access_key" => "secret",
21
+ "bucket" => "logstash-test",
22
+ "temporary_directory" => temporary_directory,
23
+ "sincedb_path" => File.join(sincedb_path, ".sincedb")
24
+ }
25
+ }
26
+
27
+
28
+ before do
29
+ FileUtils.mkdir_p(sincedb_path)
30
+ Aws.config[:stub_responses] = true
31
+ Thread.abort_on_exception = true
32
+ end
33
+
34
+ context "when interrupting the plugin" do
35
+ let(:config) { super.merge({ "interval" => 5 }) }
36
+
37
+ before do
38
+ expect_any_instance_of(LogStash::Inputs::S3).to receive(:list_new_files).and_return(TestInfiniteS3Object.new)
39
+ end
40
+
41
+ it_behaves_like "an interruptible input plugin"
42
+ end
43
+
44
+ describe "#register" do
45
+ subject { LogStash::Inputs::S3.new(config) }
46
+
47
+ context "with temporary directory" do
48
+ let(:temporary_directory) { Stud::Temporary.pathname }
49
+
50
+ it "creates the direct when it doesn't exist" do
51
+ expect { subject.register }.to change { Dir.exist?(temporary_directory) }.from(false).to(true)
52
+ end
53
+ end
54
+ end
55
+
56
+ describe '#get_s3object' do
57
+ subject { LogStash::Inputs::S3.new(settings) }
58
+
59
+ context 'with modern access key options' do
60
+ let(:settings) {
61
+ {
62
+ "access_key_id" => "1234",
63
+ "secret_access_key" => "secret",
64
+ "proxy_uri" => "http://example.com",
65
+ "bucket" => "logstash-test",
66
+ }
67
+ }
68
+
69
+ it 'should instantiate AWS::S3 clients with a proxy set' do
70
+ expect(Aws::S3::Resource).to receive(:new).with({
71
+ :credentials => kind_of(Aws::Credentials),
72
+ :http_proxy => 'http://example.com',
73
+ :region => subject.region
74
+ })
75
+
76
+ subject.send(:get_s3object)
77
+ end
78
+ end
79
+
80
+ describe "additional_settings" do
81
+ context 'when force_path_style is set' do
82
+ let(:settings) {
83
+ {
84
+ "additional_settings" => { "force_path_style" => true },
85
+ "bucket" => "logstash-test",
86
+ }
87
+ }
88
+
89
+ it 'should instantiate AWS::S3 clients with force_path_style set' do
90
+ expect(Aws::S3::Resource).to receive(:new).with({
91
+ :region => subject.region,
92
+ :force_path_style => true
93
+ }).and_call_original
94
+
95
+ subject.send(:get_s3object)
96
+ end
97
+ end
98
+
99
+ context 'when an unknown setting is given' do
100
+ let(:settings) {
101
+ {
102
+ "additional_settings" => { "this_setting_doesnt_exist" => true },
103
+ "bucket" => "logstash-test",
104
+ }
105
+ }
106
+
107
+ it 'should raise an error' do
108
+ expect { subject.send(:get_s3object) }.to raise_error(ArgumentError)
109
+ end
110
+ end
111
+ end
112
+ end
113
+
114
+ describe "#list_new_files" do
115
+ before { allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list } }
116
+
117
+ let!(:present_object) { double(:key => 'this-should-be-present', :last_modified => Time.now, :content_length => 10) }
118
+ let(:objects_list) {
119
+ [
120
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100),
121
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50),
122
+ present_object
123
+ ]
124
+ }
125
+
126
+ it 'should allow user to exclude files from the s3 bucket' do
127
+ plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
128
+ plugin.register
129
+ expect(plugin.list_new_files).to eq([present_object.key])
130
+ end
131
+
132
+ it 'should support not providing a exclude pattern' do
133
+ plugin = LogStash::Inputs::S3.new(config)
134
+ plugin.register
135
+ expect(plugin.list_new_files).to eq(objects_list.map(&:key))
136
+ end
137
+
138
+ context 'when all files are excluded from a bucket' do
139
+ let(:objects_list) {
140
+ [
141
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100),
142
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50),
143
+ ]
144
+ }
145
+
146
+ it 'should not log that no files were found in the bucket' do
147
+ plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
148
+ plugin.register
149
+ allow(plugin.logger).to receive(:debug).with(anything, anything)
150
+
151
+ expect(plugin.logger).not_to receive(:info).with(/No files found/, anything)
152
+ expect(plugin.logger).to receive(:debug).with(/Ignoring/, anything)
153
+ expect(plugin.list_new_files).to be_empty
154
+ end
155
+ end
156
+
157
+ context 'with an empty bucket' do
158
+ let(:objects_list) { [] }
159
+
160
+ it 'should log that no files were found in the bucket' do
161
+ plugin = LogStash::Inputs::S3.new(config)
162
+ plugin.register
163
+ expect(plugin.logger).to receive(:info).with(/No files found/, anything)
164
+ expect(plugin.list_new_files).to be_empty
165
+ end
166
+ end
167
+
168
+ context "If the bucket is the same as the backup bucket" do
169
+ it 'should ignore files from the bucket if they match the backup prefix' do
170
+ objects_list = [
171
+ double(:key => 'mybackup-log-1', :last_modified => Time.now, :content_length => 5),
172
+ present_object
173
+ ]
174
+
175
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list }
176
+
177
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'mybackup',
178
+ 'backup_to_bucket' => config['bucket']}))
179
+ plugin.register
180
+ expect(plugin.list_new_files).to eq([present_object.key])
181
+ end
182
+ end
183
+
184
+ it 'should ignore files older than X' do
185
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'exclude-this-file'}))
186
+
187
+ expect_any_instance_of(LogStash::Inputs::S3::SinceDB::File).to receive(:read).exactly(objects_list.size) { Time.now - day }
188
+ plugin.register
189
+
190
+ expect(plugin.list_new_files).to eq([present_object.key])
191
+ end
192
+
193
+ it 'should ignore file if the file match the prefix' do
194
+ prefix = 'mysource/'
195
+
196
+ objects_list = [
197
+ double(:key => prefix, :last_modified => Time.now, :content_length => 5),
198
+ present_object
199
+ ]
200
+
201
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects).with(:prefix => prefix) { objects_list }
202
+
203
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'prefix' => prefix }))
204
+ plugin.register
205
+ expect(plugin.list_new_files).to eq([present_object.key])
206
+ end
207
+
208
+ it 'should sort return object sorted by last_modification date with older first' do
209
+ objects = [
210
+ double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5),
211
+ double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5),
212
+ double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5)
213
+ ]
214
+
215
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
216
+
217
+
218
+ plugin = LogStash::Inputs::S3.new(config)
219
+ plugin.register
220
+ expect(plugin.list_new_files).to eq(['TWO_DAYS_AGO', 'YESTERDAY', 'TODAY'])
221
+ end
222
+
223
+ describe "when doing backup on the s3" do
224
+ it 'should copy to another s3 bucket when keeping the original file' do
225
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup"}))
226
+ plugin.register
227
+
228
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
229
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
230
+ expect(s3object).to_not receive(:delete)
231
+
232
+ plugin.backup_to_bucket(s3object)
233
+ end
234
+
235
+ it 'should copy to another s3 bucket when deleting the original file' do
236
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup", "delete" => true }))
237
+ plugin.register
238
+
239
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
240
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
241
+ expect(s3object).to receive(:delete)
242
+
243
+ plugin.backup_to_bucket(s3object)
244
+ end
245
+
246
+ it 'should add the specified prefix to the backup file' do
247
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup",
248
+ "backup_add_prefix" => 'backup-' }))
249
+ plugin.register
250
+
251
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
252
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
253
+ expect(s3object).to_not receive(:delete)
254
+
255
+ plugin.backup_to_bucket(s3object)
256
+ end
257
+ end
258
+
259
+ it 'should support doing local backup of files' do
260
+ Stud::Temporary.directory do |backup_dir|
261
+ Stud::Temporary.file do |source_file|
262
+ backup_file = File.join(backup_dir.to_s, Pathname.new(source_file.path).basename.to_s)
263
+
264
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_dir" => backup_dir }))
265
+
266
+ plugin.backup_to_dir(source_file)
267
+
268
+ expect(File.exists?(backup_file)).to eq(true)
269
+ end
270
+ end
271
+ end
272
+ end
273
+
274
+ shared_examples "generated events" do
275
+ let(:events_to_process) { 2 }
276
+
277
+ it 'should process events' do
278
+ events = fetch_events(config)
279
+ expect(events.size).to eq(events_to_process)
280
+ insist { events[0].get("[@metadata][s3]") } == {"key" => log.key }
281
+ end
282
+
283
+ it "deletes the temporary file" do
284
+ events = fetch_events(config)
285
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(0)
286
+ end
287
+ end
288
+
289
+ context 'while communicating with s3' do
290
+ let(:config) {
291
+ {
292
+ "access_key_id" => "1234",
293
+ "secret_access_key" => "secret",
294
+ "bucket" => "logstash-test",
295
+ "codec" => "json",
296
+ }
297
+ }
298
+ %w(AccessDenied NotFound).each do |error|
299
+ context "while listing bucket contents, #{error} is returned" do
300
+ before do
301
+ Aws.config[:s3] = {
302
+ stub_responses: {
303
+ list_objects: error
304
+ }
305
+ }
306
+ end
307
+
308
+ it 'should not crash the plugin' do
309
+ events = fetch_events(config)
310
+ expect(events.size).to eq(0)
311
+ end
312
+ end
313
+ end
314
+
315
+ %w(AccessDenied NoSuchKey).each do |error|
316
+ context "when retrieving an object, #{error} is returned" do
317
+ let(:objects) { [log] }
318
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5) }
319
+
320
+ let(:config) {
321
+ {
322
+ "access_key_id" => "1234",
323
+ "secret_access_key" => "secret",
324
+ "bucket" => "logstash-test",
325
+ "codec" => "json",
326
+ }
327
+ }
328
+ before do
329
+ Aws.config[:s3] = {
330
+ stub_responses: {
331
+ get_object: error
332
+ }
333
+ }
334
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
335
+ end
336
+
337
+ it 'should not crash the plugin' do
338
+ events = fetch_events(config)
339
+ expect(events.size).to eq(0)
340
+ end
341
+ end
342
+ end
343
+ end
344
+
345
+ context 'when working with logs' do
346
+ let(:objects) { [log] }
347
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5) }
348
+ let(:data) { File.read(log_file) }
349
+
350
+ before do
351
+ Aws.config[:s3] = {
352
+ stub_responses: {
353
+ get_object: { body: data }
354
+ }
355
+ }
356
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
357
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).with(log.key) { log }
358
+ expect(log).to receive(:get).with(instance_of(Hash)) do |arg|
359
+ File.open(arg[:response_target], 'wb') { |s3file| s3file.write(data) }
360
+ end
361
+ end
362
+
363
+ context "when event doesn't have a `message` field" do
364
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'json.log') }
365
+ let(:config) {
366
+ {
367
+ "access_key_id" => "1234",
368
+ "secret_access_key" => "secret",
369
+ "bucket" => "logstash-test",
370
+ "codec" => "json",
371
+ }
372
+ }
373
+
374
+ include_examples "generated events"
375
+ end
376
+
377
+ context "when event does have a `message` field" do
378
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'json_with_message.log') }
379
+ let(:config) {
380
+ {
381
+ "access_key_id" => "1234",
382
+ "secret_access_key" => "secret",
383
+ "bucket" => "logstash-test",
384
+ "codec" => "json",
385
+ }
386
+ }
387
+
388
+ include_examples "generated events"
389
+ end
390
+
391
+ context "multiple compressed streams" do
392
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
393
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiple_compressed_streams.gz') }
394
+
395
+ include_examples "generated events" do
396
+ let(:events_to_process) { 16 }
397
+ end
398
+ end
399
+
400
+ context 'compressed' do
401
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
402
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gz') }
403
+
404
+ include_examples "generated events"
405
+ end
406
+
407
+ context 'compressed with gzip extension' do
408
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
409
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gzip') }
410
+
411
+ include_examples "generated events"
412
+ end
413
+
414
+ context 'plain text' do
415
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
416
+
417
+ include_examples "generated events"
418
+ end
419
+
420
+ context 'multi-line' do
421
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiline.log') }
422
+ let(:config) {
423
+ {
424
+ "access_key_id" => "1234",
425
+ "secret_access_key" => "secret",
426
+ "bucket" => "logstash-test",
427
+ "codec" => LogStash::Codecs::Multiline.new( {"pattern" => "__SEPARATOR__", "negate" => "true", "what" => "previous"})
428
+ }
429
+ }
430
+
431
+ include_examples "generated events"
432
+ end
433
+
434
+ context 'encoded' do
435
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'invalid_utf8.gbk.log') }
436
+
437
+ include_examples "generated events"
438
+ end
439
+
440
+ context 'cloudfront' do
441
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'cloudfront.log') }
442
+
443
+ it 'should extract metadata from cloudfront log' do
444
+ events = fetch_events(config)
445
+
446
+ events.each do |event|
447
+ expect(event.get('cloudfront_fields')).to eq('date time x-edge-location c-ip x-event sc-bytes x-cf-status x-cf-client-id cs-uri-stem cs-uri-query c-referrer x-page-url​ c-user-agent x-sname x-sname-query x-file-ext x-sid')
448
+ expect(event.get('cloudfront_version')).to eq('1.0')
449
+ end
450
+ end
451
+
452
+ include_examples "generated events"
453
+ end
454
+ end
455
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/s3"
4
+ require "stud/temporary"
5
+ require "fileutils"
6
+
7
+ describe LogStash::Inputs::S3::SinceDB::File do
8
+ let(:file) { Stud::Temporary.file.path }
9
+ subject { LogStash::Inputs::S3::SinceDB::File.new(file) }
10
+ before do
11
+ FileUtils.touch(file)
12
+ end
13
+
14
+ it "doesnt raise an exception if the file is empty" do
15
+ expect { subject.read }.not_to raise_error
16
+ end
17
+ end
@@ -0,0 +1,61 @@
1
+ require "logstash/devutils/rspec/spec_helper"
2
+ require "logstash/inputs/s3"
3
+ require "aws-sdk"
4
+ require "fileutils"
5
+ require_relative "../support/helpers"
6
+
7
+ describe LogStash::Inputs::S3, :integration => true, :s3 => true do
8
+ before do
9
+ Thread.abort_on_exception = true
10
+
11
+ upload_file('../fixtures/uncompressed.log' , "#{prefix}uncompressed_1.log")
12
+ upload_file('../fixtures/compressed.log.gz', "#{prefix}compressed_1.log.gz")
13
+ end
14
+
15
+ after do
16
+ delete_remote_files(prefix)
17
+ FileUtils.rm_rf(temporary_directory)
18
+ delete_remote_files(backup_prefix)
19
+ end
20
+
21
+ let(:temporary_directory) { Stud::Temporary.directory }
22
+ let(:prefix) { 'logstash-s3-input-prefix/' }
23
+
24
+ let(:minimal_settings) { { "access_key_id" => ENV['AWS_ACCESS_KEY_ID'],
25
+ "secret_access_key" => ENV['AWS_SECRET_ACCESS_KEY'],
26
+ "bucket" => ENV['AWS_LOGSTASH_TEST_BUCKET'],
27
+ "region" => ENV["AWS_REGION"] || "us-east-1",
28
+ "prefix" => prefix,
29
+ "temporary_directory" => temporary_directory } }
30
+ let(:backup_prefix) { "backup/" }
31
+
32
+ it "support prefix to scope the remote files" do
33
+ events = fetch_events(minimal_settings)
34
+ expect(events.size).to eq(4)
35
+ end
36
+
37
+
38
+ it "add a prefix to the file" do
39
+ fetch_events(minimal_settings.merge({ "backup_to_bucket" => ENV["AWS_LOGSTASH_TEST_BUCKET"],
40
+ "backup_add_prefix" => backup_prefix }))
41
+ expect(list_remote_files(backup_prefix).size).to eq(2)
42
+ end
43
+
44
+ it "allow you to backup to a local directory" do
45
+ Stud::Temporary.directory do |backup_dir|
46
+ fetch_events(minimal_settings.merge({ "backup_to_dir" => backup_dir }))
47
+ expect(Dir.glob(File.join(backup_dir, "*")).size).to eq(2)
48
+ end
49
+ end
50
+
51
+ context "remote backup" do
52
+ it "another bucket" do
53
+ fetch_events(minimal_settings.merge({ "backup_to_bucket" => "logstash-s3-input-backup"}))
54
+ expect(list_remote_files("", "logstash-s3-input-backup").size).to eq(2)
55
+ end
56
+
57
+ after do
58
+ delete_bucket("logstash-s3-input-backup")
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,45 @@
1
+ def fetch_events(settings)
2
+ queue = []
3
+ s3 = LogStash::Inputs::S3.new(settings)
4
+ s3.register
5
+ s3.process_files(queue)
6
+ queue
7
+ end
8
+
9
+ # delete_files(prefix)
10
+ def upload_file(local_file, remote_name)
11
+ bucket = s3object.bucket(ENV['AWS_LOGSTASH_TEST_BUCKET'])
12
+ file = File.expand_path(File.join(File.dirname(__FILE__), local_file))
13
+ bucket.object(remote_name).upload_file(file)
14
+ end
15
+
16
+ def delete_remote_files(prefix)
17
+ bucket = s3object.bucket(ENV['AWS_LOGSTASH_TEST_BUCKET'])
18
+ bucket.objects(:prefix => prefix).each { |object| object.delete }
19
+ end
20
+
21
+ def list_remote_files(prefix, target_bucket = ENV['AWS_LOGSTASH_TEST_BUCKET'])
22
+ bucket = s3object.bucket(target_bucket)
23
+ bucket.objects(:prefix => prefix).collect(&:key)
24
+ end
25
+
26
+ def delete_bucket(name)
27
+ s3object.bucket(name).objects.map(&:delete)
28
+ s3object.bucket(name).delete
29
+ end
30
+
31
+ def s3object
32
+ Aws::S3::Resource.new
33
+ end
34
+
35
+ class TestInfiniteS3Object
36
+ def each
37
+ counter = 1
38
+
39
+ loop do
40
+ yield "awesome-#{counter}"
41
+ counter +=1
42
+ end
43
+ end
44
+ end
45
+