logstash-input-s3-local 3.3.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,455 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/s3"
4
+ require "logstash/codecs/multiline"
5
+ require "logstash/errors"
6
+ require "aws-sdk-resources"
7
+ require_relative "../support/helpers"
8
+ require "stud/temporary"
9
+ require "aws-sdk"
10
+ require "fileutils"
11
+
12
+ describe LogStash::Inputs::S3 do
13
+ let(:temporary_directory) { Stud::Temporary.pathname }
14
+ let(:sincedb_path) { Stud::Temporary.pathname }
15
+ let(:day) { 3600 * 24 }
16
+ let(:creds) { Aws::Credentials.new('1234', 'secret') }
17
+ let(:config) {
18
+ {
19
+ "access_key_id" => "1234",
20
+ "secret_access_key" => "secret",
21
+ "bucket" => "logstash-test",
22
+ "temporary_directory" => temporary_directory,
23
+ "sincedb_path" => File.join(sincedb_path, ".sincedb")
24
+ }
25
+ }
26
+
27
+
28
+ before do
29
+ FileUtils.mkdir_p(sincedb_path)
30
+ Aws.config[:stub_responses] = true
31
+ Thread.abort_on_exception = true
32
+ end
33
+
34
+ context "when interrupting the plugin" do
35
+ let(:config) { super.merge({ "interval" => 5 }) }
36
+
37
+ before do
38
+ expect_any_instance_of(LogStash::Inputs::S3).to receive(:list_new_files).and_return(TestInfiniteS3Object.new)
39
+ end
40
+
41
+ it_behaves_like "an interruptible input plugin"
42
+ end
43
+
44
+ describe "#register" do
45
+ subject { LogStash::Inputs::S3.new(config) }
46
+
47
+ context "with temporary directory" do
48
+ let(:temporary_directory) { Stud::Temporary.pathname }
49
+
50
+ it "creates the direct when it doesn't exist" do
51
+ expect { subject.register }.to change { Dir.exist?(temporary_directory) }.from(false).to(true)
52
+ end
53
+ end
54
+ end
55
+
56
+ describe '#get_s3object' do
57
+ subject { LogStash::Inputs::S3.new(settings) }
58
+
59
+ context 'with modern access key options' do
60
+ let(:settings) {
61
+ {
62
+ "access_key_id" => "1234",
63
+ "secret_access_key" => "secret",
64
+ "proxy_uri" => "http://example.com",
65
+ "bucket" => "logstash-test",
66
+ }
67
+ }
68
+
69
+ it 'should instantiate AWS::S3 clients with a proxy set' do
70
+ expect(Aws::S3::Resource).to receive(:new).with({
71
+ :credentials => kind_of(Aws::Credentials),
72
+ :http_proxy => 'http://example.com',
73
+ :region => subject.region
74
+ })
75
+
76
+ subject.send(:get_s3object)
77
+ end
78
+ end
79
+
80
+ describe "additional_settings" do
81
+ context 'when force_path_style is set' do
82
+ let(:settings) {
83
+ {
84
+ "additional_settings" => { "force_path_style" => true },
85
+ "bucket" => "logstash-test",
86
+ }
87
+ }
88
+
89
+ it 'should instantiate AWS::S3 clients with force_path_style set' do
90
+ expect(Aws::S3::Resource).to receive(:new).with({
91
+ :region => subject.region,
92
+ :force_path_style => true
93
+ }).and_call_original
94
+
95
+ subject.send(:get_s3object)
96
+ end
97
+ end
98
+
99
+ context 'when an unknown setting is given' do
100
+ let(:settings) {
101
+ {
102
+ "additional_settings" => { "this_setting_doesnt_exist" => true },
103
+ "bucket" => "logstash-test",
104
+ }
105
+ }
106
+
107
+ it 'should raise an error' do
108
+ expect { subject.send(:get_s3object) }.to raise_error(ArgumentError)
109
+ end
110
+ end
111
+ end
112
+ end
113
+
114
+ describe "#list_new_files" do
115
+ before { allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list } }
116
+
117
+ let!(:present_object) { double(:key => 'this-should-be-present', :last_modified => Time.now, :content_length => 10) }
118
+ let(:objects_list) {
119
+ [
120
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100),
121
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50),
122
+ present_object
123
+ ]
124
+ }
125
+
126
+ it 'should allow user to exclude files from the s3 bucket' do
127
+ plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
128
+ plugin.register
129
+ expect(plugin.list_new_files).to eq([present_object.key])
130
+ end
131
+
132
+ it 'should support not providing a exclude pattern' do
133
+ plugin = LogStash::Inputs::S3.new(config)
134
+ plugin.register
135
+ expect(plugin.list_new_files).to eq(objects_list.map(&:key))
136
+ end
137
+
138
+ context 'when all files are excluded from a bucket' do
139
+ let(:objects_list) {
140
+ [
141
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100),
142
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50),
143
+ ]
144
+ }
145
+
146
+ it 'should not log that no files were found in the bucket' do
147
+ plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
148
+ plugin.register
149
+ allow(plugin.logger).to receive(:debug).with(anything, anything)
150
+
151
+ expect(plugin.logger).not_to receive(:info).with(/No files found/, anything)
152
+ expect(plugin.logger).to receive(:debug).with(/Ignoring/, anything)
153
+ expect(plugin.list_new_files).to be_empty
154
+ end
155
+ end
156
+
157
+ context 'with an empty bucket' do
158
+ let(:objects_list) { [] }
159
+
160
+ it 'should log that no files were found in the bucket' do
161
+ plugin = LogStash::Inputs::S3.new(config)
162
+ plugin.register
163
+ expect(plugin.logger).to receive(:info).with(/No files found/, anything)
164
+ expect(plugin.list_new_files).to be_empty
165
+ end
166
+ end
167
+
168
+ context "If the bucket is the same as the backup bucket" do
169
+ it 'should ignore files from the bucket if they match the backup prefix' do
170
+ objects_list = [
171
+ double(:key => 'mybackup-log-1', :last_modified => Time.now, :content_length => 5),
172
+ present_object
173
+ ]
174
+
175
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list }
176
+
177
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'mybackup',
178
+ 'backup_to_bucket' => config['bucket']}))
179
+ plugin.register
180
+ expect(plugin.list_new_files).to eq([present_object.key])
181
+ end
182
+ end
183
+
184
+ it 'should ignore files older than X' do
185
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'exclude-this-file'}))
186
+
187
+ expect_any_instance_of(LogStash::Inputs::S3::SinceDB::File).to receive(:read).exactly(objects_list.size) { Time.now - day }
188
+ plugin.register
189
+
190
+ expect(plugin.list_new_files).to eq([present_object.key])
191
+ end
192
+
193
+ it 'should ignore file if the file match the prefix' do
194
+ prefix = 'mysource/'
195
+
196
+ objects_list = [
197
+ double(:key => prefix, :last_modified => Time.now, :content_length => 5),
198
+ present_object
199
+ ]
200
+
201
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects).with(:prefix => prefix) { objects_list }
202
+
203
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'prefix' => prefix }))
204
+ plugin.register
205
+ expect(plugin.list_new_files).to eq([present_object.key])
206
+ end
207
+
208
+ it 'should sort return object sorted by last_modification date with older first' do
209
+ objects = [
210
+ double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5),
211
+ double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5),
212
+ double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5)
213
+ ]
214
+
215
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
216
+
217
+
218
+ plugin = LogStash::Inputs::S3.new(config)
219
+ plugin.register
220
+ expect(plugin.list_new_files).to eq(['TWO_DAYS_AGO', 'YESTERDAY', 'TODAY'])
221
+ end
222
+
223
+ describe "when doing backup on the s3" do
224
+ it 'should copy to another s3 bucket when keeping the original file' do
225
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup"}))
226
+ plugin.register
227
+
228
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
229
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
230
+ expect(s3object).to_not receive(:delete)
231
+
232
+ plugin.backup_to_bucket(s3object)
233
+ end
234
+
235
+ it 'should copy to another s3 bucket when deleting the original file' do
236
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup", "delete" => true }))
237
+ plugin.register
238
+
239
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
240
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
241
+ expect(s3object).to receive(:delete)
242
+
243
+ plugin.backup_to_bucket(s3object)
244
+ end
245
+
246
+ it 'should add the specified prefix to the backup file' do
247
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup",
248
+ "backup_add_prefix" => 'backup-' }))
249
+ plugin.register
250
+
251
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
252
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
253
+ expect(s3object).to_not receive(:delete)
254
+
255
+ plugin.backup_to_bucket(s3object)
256
+ end
257
+ end
258
+
259
+ it 'should support doing local backup of files' do
260
+ Stud::Temporary.directory do |backup_dir|
261
+ Stud::Temporary.file do |source_file|
262
+ backup_file = File.join(backup_dir.to_s, Pathname.new(source_file.path).basename.to_s)
263
+
264
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_dir" => backup_dir }))
265
+
266
+ plugin.backup_to_dir(source_file)
267
+
268
+ expect(File.exists?(backup_file)).to eq(true)
269
+ end
270
+ end
271
+ end
272
+ end
273
+
274
+ shared_examples "generated events" do
275
+ let(:events_to_process) { 2 }
276
+
277
+ it 'should process events' do
278
+ events = fetch_events(config)
279
+ expect(events.size).to eq(events_to_process)
280
+ insist { events[0].get("[@metadata][s3]") } == {"key" => log.key }
281
+ end
282
+
283
+ it "deletes the temporary file" do
284
+ events = fetch_events(config)
285
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(0)
286
+ end
287
+ end
288
+
289
+ context 'while communicating with s3' do
290
+ let(:config) {
291
+ {
292
+ "access_key_id" => "1234",
293
+ "secret_access_key" => "secret",
294
+ "bucket" => "logstash-test",
295
+ "codec" => "json",
296
+ }
297
+ }
298
+ %w(AccessDenied NotFound).each do |error|
299
+ context "while listing bucket contents, #{error} is returned" do
300
+ before do
301
+ Aws.config[:s3] = {
302
+ stub_responses: {
303
+ list_objects: error
304
+ }
305
+ }
306
+ end
307
+
308
+ it 'should not crash the plugin' do
309
+ events = fetch_events(config)
310
+ expect(events.size).to eq(0)
311
+ end
312
+ end
313
+ end
314
+
315
+ %w(AccessDenied NoSuchKey).each do |error|
316
+ context "when retrieving an object, #{error} is returned" do
317
+ let(:objects) { [log] }
318
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5) }
319
+
320
+ let(:config) {
321
+ {
322
+ "access_key_id" => "1234",
323
+ "secret_access_key" => "secret",
324
+ "bucket" => "logstash-test",
325
+ "codec" => "json",
326
+ }
327
+ }
328
+ before do
329
+ Aws.config[:s3] = {
330
+ stub_responses: {
331
+ get_object: error
332
+ }
333
+ }
334
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
335
+ end
336
+
337
+ it 'should not crash the plugin' do
338
+ events = fetch_events(config)
339
+ expect(events.size).to eq(0)
340
+ end
341
+ end
342
+ end
343
+ end
344
+
345
+ context 'when working with logs' do
346
+ let(:objects) { [log] }
347
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5) }
348
+ let(:data) { File.read(log_file) }
349
+
350
+ before do
351
+ Aws.config[:s3] = {
352
+ stub_responses: {
353
+ get_object: { body: data }
354
+ }
355
+ }
356
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
357
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).with(log.key) { log }
358
+ expect(log).to receive(:get).with(instance_of(Hash)) do |arg|
359
+ File.open(arg[:response_target], 'wb') { |s3file| s3file.write(data) }
360
+ end
361
+ end
362
+
363
+ context "when event doesn't have a `message` field" do
364
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'json.log') }
365
+ let(:config) {
366
+ {
367
+ "access_key_id" => "1234",
368
+ "secret_access_key" => "secret",
369
+ "bucket" => "logstash-test",
370
+ "codec" => "json",
371
+ }
372
+ }
373
+
374
+ include_examples "generated events"
375
+ end
376
+
377
+ context "when event does have a `message` field" do
378
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'json_with_message.log') }
379
+ let(:config) {
380
+ {
381
+ "access_key_id" => "1234",
382
+ "secret_access_key" => "secret",
383
+ "bucket" => "logstash-test",
384
+ "codec" => "json",
385
+ }
386
+ }
387
+
388
+ include_examples "generated events"
389
+ end
390
+
391
+ context "multiple compressed streams" do
392
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
393
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiple_compressed_streams.gz') }
394
+
395
+ include_examples "generated events" do
396
+ let(:events_to_process) { 16 }
397
+ end
398
+ end
399
+
400
+ context 'compressed' do
401
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
402
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gz') }
403
+
404
+ include_examples "generated events"
405
+ end
406
+
407
+ context 'compressed with gzip extension' do
408
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5) }
409
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gzip') }
410
+
411
+ include_examples "generated events"
412
+ end
413
+
414
+ context 'plain text' do
415
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
416
+
417
+ include_examples "generated events"
418
+ end
419
+
420
+ context 'multi-line' do
421
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiline.log') }
422
+ let(:config) {
423
+ {
424
+ "access_key_id" => "1234",
425
+ "secret_access_key" => "secret",
426
+ "bucket" => "logstash-test",
427
+ "codec" => LogStash::Codecs::Multiline.new( {"pattern" => "__SEPARATOR__", "negate" => "true", "what" => "previous"})
428
+ }
429
+ }
430
+
431
+ include_examples "generated events"
432
+ end
433
+
434
+ context 'encoded' do
435
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'invalid_utf8.gbk.log') }
436
+
437
+ include_examples "generated events"
438
+ end
439
+
440
+ context 'cloudfront' do
441
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'cloudfront.log') }
442
+
443
+ it 'should extract metadata from cloudfront log' do
444
+ events = fetch_events(config)
445
+
446
+ events.each do |event|
447
+ expect(event.get('cloudfront_fields')).to eq('date time x-edge-location c-ip x-event sc-bytes x-cf-status x-cf-client-id cs-uri-stem cs-uri-query c-referrer x-page-url​ c-user-agent x-sname x-sname-query x-file-ext x-sid')
448
+ expect(event.get('cloudfront_version')).to eq('1.0')
449
+ end
450
+ end
451
+
452
+ include_examples "generated events"
453
+ end
454
+ end
455
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/s3"
4
+ require "stud/temporary"
5
+ require "fileutils"
6
+
7
+ describe LogStash::Inputs::S3::SinceDB::File do
8
+ let(:file) { Stud::Temporary.file.path }
9
+ subject { LogStash::Inputs::S3::SinceDB::File.new(file) }
10
+ before do
11
+ FileUtils.touch(file)
12
+ end
13
+
14
+ it "doesnt raise an exception if the file is empty" do
15
+ expect { subject.read }.not_to raise_error
16
+ end
17
+ end
@@ -0,0 +1,61 @@
1
+ require "logstash/devutils/rspec/spec_helper"
2
+ require "logstash/inputs/s3"
3
+ require "aws-sdk"
4
+ require "fileutils"
5
+ require_relative "../support/helpers"
6
+
7
+ describe LogStash::Inputs::S3, :integration => true, :s3 => true do
8
+ before do
9
+ Thread.abort_on_exception = true
10
+
11
+ upload_file('../fixtures/uncompressed.log' , "#{prefix}uncompressed_1.log")
12
+ upload_file('../fixtures/compressed.log.gz', "#{prefix}compressed_1.log.gz")
13
+ end
14
+
15
+ after do
16
+ delete_remote_files(prefix)
17
+ FileUtils.rm_rf(temporary_directory)
18
+ delete_remote_files(backup_prefix)
19
+ end
20
+
21
+ let(:temporary_directory) { Stud::Temporary.directory }
22
+ let(:prefix) { 'logstash-s3-input-prefix/' }
23
+
24
+ let(:minimal_settings) { { "access_key_id" => ENV['AWS_ACCESS_KEY_ID'],
25
+ "secret_access_key" => ENV['AWS_SECRET_ACCESS_KEY'],
26
+ "bucket" => ENV['AWS_LOGSTASH_TEST_BUCKET'],
27
+ "region" => ENV["AWS_REGION"] || "us-east-1",
28
+ "prefix" => prefix,
29
+ "temporary_directory" => temporary_directory } }
30
+ let(:backup_prefix) { "backup/" }
31
+
32
+ it "support prefix to scope the remote files" do
33
+ events = fetch_events(minimal_settings)
34
+ expect(events.size).to eq(4)
35
+ end
36
+
37
+
38
+ it "add a prefix to the file" do
39
+ fetch_events(minimal_settings.merge({ "backup_to_bucket" => ENV["AWS_LOGSTASH_TEST_BUCKET"],
40
+ "backup_add_prefix" => backup_prefix }))
41
+ expect(list_remote_files(backup_prefix).size).to eq(2)
42
+ end
43
+
44
+ it "allow you to backup to a local directory" do
45
+ Stud::Temporary.directory do |backup_dir|
46
+ fetch_events(minimal_settings.merge({ "backup_to_dir" => backup_dir }))
47
+ expect(Dir.glob(File.join(backup_dir, "*")).size).to eq(2)
48
+ end
49
+ end
50
+
51
+ context "remote backup" do
52
+ it "another bucket" do
53
+ fetch_events(minimal_settings.merge({ "backup_to_bucket" => "logstash-s3-input-backup"}))
54
+ expect(list_remote_files("", "logstash-s3-input-backup").size).to eq(2)
55
+ end
56
+
57
+ after do
58
+ delete_bucket("logstash-s3-input-backup")
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,45 @@
1
+ def fetch_events(settings)
2
+ queue = []
3
+ s3 = LogStash::Inputs::S3.new(settings)
4
+ s3.register
5
+ s3.process_files(queue)
6
+ queue
7
+ end
8
+
9
+ # delete_files(prefix)
10
+ def upload_file(local_file, remote_name)
11
+ bucket = s3object.bucket(ENV['AWS_LOGSTASH_TEST_BUCKET'])
12
+ file = File.expand_path(File.join(File.dirname(__FILE__), local_file))
13
+ bucket.object(remote_name).upload_file(file)
14
+ end
15
+
16
+ def delete_remote_files(prefix)
17
+ bucket = s3object.bucket(ENV['AWS_LOGSTASH_TEST_BUCKET'])
18
+ bucket.objects(:prefix => prefix).each { |object| object.delete }
19
+ end
20
+
21
+ def list_remote_files(prefix, target_bucket = ENV['AWS_LOGSTASH_TEST_BUCKET'])
22
+ bucket = s3object.bucket(target_bucket)
23
+ bucket.objects(:prefix => prefix).collect(&:key)
24
+ end
25
+
26
+ def delete_bucket(name)
27
+ s3object.bucket(name).objects.map(&:delete)
28
+ s3object.bucket(name).delete
29
+ end
30
+
31
+ def s3object
32
+ Aws::S3::Resource.new
33
+ end
34
+
35
+ class TestInfiniteS3Object
36
+ def each
37
+ counter = 1
38
+
39
+ loop do
40
+ yield "awesome-#{counter}"
41
+ counter +=1
42
+ end
43
+ end
44
+ end
45
+