logstash-integration-aws 0.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.PRE.MERGE.md +658 -0
  3. data/CHANGELOG.md +15 -0
  4. data/CONTRIBUTORS +40 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +5 -0
  8. data/README.md +205 -0
  9. data/docs/codec-cloudfront.asciidoc +53 -0
  10. data/docs/codec-cloudtrail.asciidoc +45 -0
  11. data/docs/index.asciidoc +38 -0
  12. data/docs/input-cloudwatch.asciidoc +320 -0
  13. data/docs/input-s3.asciidoc +346 -0
  14. data/docs/input-sqs.asciidoc +287 -0
  15. data/docs/output-cloudwatch.asciidoc +321 -0
  16. data/docs/output-s3.asciidoc +442 -0
  17. data/docs/output-sns.asciidoc +166 -0
  18. data/docs/output-sqs.asciidoc +242 -0
  19. data/lib/logstash/codecs/cloudfront.rb +84 -0
  20. data/lib/logstash/codecs/cloudtrail.rb +47 -0
  21. data/lib/logstash/inputs/cloudwatch.rb +338 -0
  22. data/lib/logstash/inputs/s3.rb +466 -0
  23. data/lib/logstash/inputs/sqs.rb +196 -0
  24. data/lib/logstash/outputs/cloudwatch.rb +346 -0
  25. data/lib/logstash/outputs/s3/file_repository.rb +121 -0
  26. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  27. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  28. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  29. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  30. data/lib/logstash/outputs/s3/temporary_file_factory.rb +129 -0
  31. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  32. data/lib/logstash/outputs/s3/uploader.rb +74 -0
  33. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  34. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
  35. data/lib/logstash/outputs/s3.rb +405 -0
  36. data/lib/logstash/outputs/sns.rb +133 -0
  37. data/lib/logstash/outputs/sqs.rb +167 -0
  38. data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
  39. data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
  40. data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
  41. data/logstash-integration-aws.gemspec +52 -0
  42. data/spec/codecs/cloudfront_spec.rb +92 -0
  43. data/spec/codecs/cloudtrail_spec.rb +56 -0
  44. data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
  45. data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
  46. data/spec/fixtures/cloudfront.log +4 -0
  47. data/spec/fixtures/compressed.log.gee.zip +0 -0
  48. data/spec/fixtures/compressed.log.gz +0 -0
  49. data/spec/fixtures/compressed.log.gzip +0 -0
  50. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  51. data/spec/fixtures/json.log +2 -0
  52. data/spec/fixtures/json_with_message.log +2 -0
  53. data/spec/fixtures/multiline.log +6 -0
  54. data/spec/fixtures/multiple_compressed_streams.gz +0 -0
  55. data/spec/fixtures/uncompressed.log +2 -0
  56. data/spec/inputs/cloudwatch_spec.rb +85 -0
  57. data/spec/inputs/s3_spec.rb +610 -0
  58. data/spec/inputs/sincedb_spec.rb +17 -0
  59. data/spec/inputs/sqs_spec.rb +324 -0
  60. data/spec/integration/cloudwatch_spec.rb +25 -0
  61. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  62. data/spec/integration/gzip_file_spec.rb +62 -0
  63. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  64. data/spec/integration/outputs/sqs_spec.rb +98 -0
  65. data/spec/integration/restore_from_crash_spec.rb +67 -0
  66. data/spec/integration/s3_spec.rb +66 -0
  67. data/spec/integration/size_rotation_spec.rb +59 -0
  68. data/spec/integration/sqs_spec.rb +110 -0
  69. data/spec/integration/stress_test_spec.rb +60 -0
  70. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  71. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
  72. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  73. data/spec/outputs/cloudwatch_spec.rb +38 -0
  74. data/spec/outputs/s3/file_repository_spec.rb +143 -0
  75. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  76. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  77. data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
  78. data/spec/outputs/s3/temporary_file_spec.rb +47 -0
  79. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  80. data/spec/outputs/s3/uploader_spec.rb +69 -0
  81. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  82. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
  83. data/spec/outputs/s3_spec.rb +232 -0
  84. data/spec/outputs/sns_spec.rb +160 -0
  85. data/spec/plugin_mixin/aws_config_spec.rb +217 -0
  86. data/spec/spec_helper.rb +8 -0
  87. data/spec/support/helpers.rb +119 -0
  88. data/spec/unit/outputs/sqs_spec.rb +247 -0
  89. metadata +467 -0
@@ -0,0 +1,610 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/devutils/rspec/shared_examples"
4
+ require "logstash/inputs/s3"
5
+ require "logstash/codecs/multiline"
6
+ require "logstash/errors"
7
+ require "stud/temporary"
8
+ require_relative "../support/helpers"
9
+ require "fileutils"
10
+ require 'logstash/plugin_mixins/ecs_compatibility_support/spec_helper'
11
+
12
+ describe LogStash::Inputs::S3 do
13
+ let(:temporary_directory) { Stud::Temporary.pathname }
14
+ let(:sincedb_path) { Stud::Temporary.pathname }
15
+ let(:day) { 3600 * 24 }
16
+ let(:creds) { Aws::Credentials.new('1234', 'secret') }
17
+ let(:config) {
18
+ {
19
+ "access_key_id" => "1234",
20
+ "secret_access_key" => "secret",
21
+ "bucket" => "logstash-test",
22
+ "temporary_directory" => temporary_directory,
23
+ "sincedb_path" => File.join(sincedb_path, ".sincedb")
24
+ }
25
+ }
26
+ let(:cutoff) { LogStash::Inputs::S3::CUTOFF_SECOND }
27
+
28
+
29
+ before do
30
+ FileUtils.mkdir_p(sincedb_path)
31
+ Aws.config[:stub_responses] = true
32
+ Thread.abort_on_exception = true
33
+ end
34
+
35
+ context "when interrupting the plugin" do
36
+ let(:config) { super().merge({ "interval" => 5 }) }
37
+ let(:s3_obj) { double(:key => "awesome-key", :last_modified => Time.now.round, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
38
+
39
+ before do
40
+ expect_any_instance_of(LogStash::Inputs::S3).to receive(:list_new_files).and_return(TestInfiniteS3Object.new(s3_obj))
41
+ end
42
+
43
+ it_behaves_like "an interruptible input plugin" do
44
+ let(:allowed_lag) { 16 } if LOGSTASH_VERSION.split('.').first.to_i <= 6
45
+ end
46
+ end
47
+
48
+ describe "#register" do
49
+ subject { LogStash::Inputs::S3.new(config) }
50
+
51
+ context "with temporary directory" do
52
+ let(:temporary_directory) { Stud::Temporary.pathname }
53
+
54
+ it "creates the direct when it doesn't exist" do
55
+ expect { subject.register }.to change { Dir.exist?(temporary_directory) }.from(false).to(true)
56
+ end
57
+ end
58
+ end
59
+
60
+ describe '#get_s3object' do
61
+ subject { LogStash::Inputs::S3.new(settings) }
62
+
63
+ context 'with modern access key options' do
64
+ let(:settings) {
65
+ {
66
+ "access_key_id" => "1234",
67
+ "secret_access_key" => "secret",
68
+ "proxy_uri" => "http://example.com",
69
+ "bucket" => "logstash-test",
70
+ }
71
+ }
72
+
73
+ it 'should instantiate AWS::S3 clients with a proxy set' do
74
+ expect(Aws::S3::Resource).to receive(:new).with({
75
+ :credentials => kind_of(Aws::Credentials),
76
+ :http_proxy => 'http://example.com',
77
+ :region => subject.region
78
+ })
79
+
80
+ subject.send(:get_s3object)
81
+ end
82
+ end
83
+
84
+ describe "additional_settings" do
85
+ context "supported settings" do
86
+ let(:settings) {
87
+ {
88
+ "additional_settings" => { "force_path_style" => 'true', "ssl_verify_peer" => 'false', "profile" => 'logstash' },
89
+ "bucket" => "logstash-test",
90
+ }
91
+ }
92
+
93
+ it 'should instantiate AWS::S3 clients with force_path_style set' do
94
+ expect(Aws::S3::Resource).to receive(:new).with({
95
+ :region => subject.region,
96
+ :force_path_style => true, :ssl_verify_peer => false, :profile => 'logstash'
97
+ }).and_call_original
98
+
99
+ subject.send(:get_s3object)
100
+ end
101
+ end
102
+
103
+ context 'when an unknown setting is given' do
104
+ let(:settings) {
105
+ {
106
+ "additional_settings" => { "this_setting_doesnt_exist" => true },
107
+ "bucket" => "logstash-test",
108
+ }
109
+ }
110
+
111
+ it 'should raise an error' do
112
+ expect { subject.send(:get_s3object) }.to raise_error(ArgumentError)
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ describe "#list_new_files" do
119
+ before { allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list } }
120
+
121
+ let!(:present_object_after_cutoff) {double(:key => 'this-should-not-be-present', :last_modified => Time.now, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
122
+ let!(:present_object) {double(:key => 'this-should-be-present', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
123
+ let!(:archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => nil)) ) }
124
+ let!(:deep_archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => nil)) ) }
125
+ let!(:restored_object) {double(:key => 'this-should-be-restored-from-archive', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => 'ongoing-request="false", expiry-date="Thu, 01 Jan 2099 00:00:00 GMT"')) ) }
126
+ let!(:deep_restored_object) {double(:key => 'this-should-be-restored-from-deep-archive', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'DEEP_ARCHIVE', :object => double(:data => double(:restore => 'ongoing-request="false", expiry-date="Thu, 01 Jan 2099 00:00:00 GMT"')) ) }
127
+ let(:objects_list) {
128
+ [
129
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
130
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
131
+ archived_object,
132
+ restored_object,
133
+ deep_restored_object,
134
+ present_object,
135
+ present_object_after_cutoff
136
+ ]
137
+ }
138
+
139
+ it 'should allow user to exclude files from the s3 bucket' do
140
+ plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
141
+ plugin.register
142
+
143
+ files = plugin.list_new_files.map { |item| item.key }
144
+ expect(files).to include(present_object.key)
145
+ expect(files).to include(restored_object.key)
146
+ expect(files).to include(deep_restored_object.key)
147
+ expect(files).to_not include('exclude-this-file-1') # matches exclude pattern
148
+ expect(files).to_not include('exclude/logstash') # matches exclude pattern
149
+ expect(files).to_not include(archived_object.key) # archived
150
+ expect(files).to_not include(deep_archived_object.key) # archived
151
+ expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
152
+ expect(files.size).to eq(3)
153
+ end
154
+
155
+ it 'should support not providing a exclude pattern' do
156
+ plugin = LogStash::Inputs::S3.new(config)
157
+ plugin.register
158
+
159
+ files = plugin.list_new_files.map { |item| item.key }
160
+ expect(files).to include(present_object.key)
161
+ expect(files).to include(restored_object.key)
162
+ expect(files).to include(deep_restored_object.key)
163
+ expect(files).to include('exclude-this-file-1') # no exclude pattern given
164
+ expect(files).to include('exclude/logstash') # no exclude pattern given
165
+ expect(files).to_not include(archived_object.key) # archived
166
+ expect(files).to_not include(deep_archived_object.key) # archived
167
+ expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
168
+ expect(files.size).to eq(5)
169
+ end
170
+
171
+ context 'when all files are excluded from a bucket' do
172
+ let(:objects_list) {
173
+ [
174
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
175
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
176
+ ]
177
+ }
178
+
179
+ it 'should not log that no files were found in the bucket' do
180
+ plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
181
+ plugin.register
182
+ allow(plugin.logger).to receive(:debug).with(anything, anything)
183
+
184
+ expect(plugin.logger).not_to receive(:info).with(/No files found/, anything)
185
+ expect(plugin.logger).to receive(:debug).with(/Ignoring/, anything)
186
+ expect(plugin.list_new_files).to be_empty
187
+ end
188
+ end
189
+
190
+ context 'with an empty bucket' do
191
+ let(:objects_list) { [] }
192
+
193
+ it 'should log that no files were found in the bucket' do
194
+ plugin = LogStash::Inputs::S3.new(config)
195
+ plugin.register
196
+ allow(plugin.logger).to receive(:info).with(/Using the provided sincedb_path/, anything)
197
+ expect(plugin.logger).to receive(:info).with(/No files found/, anything)
198
+ expect(plugin.list_new_files).to be_empty
199
+ end
200
+ end
201
+
202
+ context "If the bucket is the same as the backup bucket" do
203
+ it 'should ignore files from the bucket if they match the backup prefix' do
204
+ objects_list = [
205
+ double(:key => 'mybackup-log-1', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
206
+ present_object
207
+ ]
208
+
209
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list }
210
+
211
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'mybackup',
212
+ 'backup_to_bucket' => config['bucket']}))
213
+ plugin.register
214
+
215
+ files = plugin.list_new_files.map { |item| item.key }
216
+ expect(files).to include(present_object.key)
217
+ expect(files).to_not include('mybackup-log-1') # matches backup prefix
218
+ expect(files.size).to eq(1)
219
+ end
220
+ end
221
+
222
+ it 'should ignore files older than X' do
223
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'exclude-this-file'}))
224
+
225
+
226
+ allow_any_instance_of(LogStash::Inputs::S3::SinceDB::File).to receive(:read).and_return(Time.now - day)
227
+ plugin.register
228
+
229
+ files = plugin.list_new_files.map { |item| item.key }
230
+ expect(files).to include(present_object.key)
231
+ expect(files).to include(restored_object.key)
232
+ expect(files).to include(deep_restored_object.key)
233
+ expect(files).to_not include('exclude-this-file-1') # too old
234
+ expect(files).to_not include('exclude/logstash') # too old
235
+ expect(files).to_not include(archived_object.key) # archived
236
+ expect(files).to_not include(deep_archived_object.key) # archived
237
+ expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
238
+ expect(files.size).to eq(3)
239
+ end
240
+
241
+ it 'should ignore file if the file match the prefix' do
242
+ prefix = 'mysource/'
243
+
244
+ objects_list = [
245
+ double(:key => prefix, :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
246
+ present_object
247
+ ]
248
+
249
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects).with(:prefix => prefix) { objects_list }
250
+
251
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'prefix' => prefix }))
252
+ plugin.register
253
+ expect(plugin.list_new_files.map { |item| item.key }).to eq([present_object.key])
254
+ end
255
+
256
+ it 'should sort return object sorted by last_modification date with older first' do
257
+ objects = [
258
+ double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5, :storage_class => 'STANDARD'),
259
+ double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
260
+ double(:key => 'TODAY_BEFORE_CUTOFF', :last_modified => Time.now - cutoff, :content_length => 5, :storage_class => 'STANDARD'),
261
+ double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD')
262
+ ]
263
+
264
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
265
+
266
+
267
+ plugin = LogStash::Inputs::S3.new(config)
268
+ plugin.register
269
+ expect(plugin.list_new_files.map { |item| item.key }).to eq(['TWO_DAYS_AGO', 'YESTERDAY', 'TODAY_BEFORE_CUTOFF'])
270
+ end
271
+
272
+ describe "when doing backup on the s3" do
273
+ it 'should copy to another s3 bucket when keeping the original file' do
274
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup"}))
275
+ plugin.register
276
+
277
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
278
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
279
+ expect(s3object).to_not receive(:delete)
280
+
281
+ plugin.backup_to_bucket(s3object)
282
+ end
283
+
284
+ it 'should copy to another s3 bucket when deleting the original file' do
285
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup", "delete" => true }))
286
+ plugin.register
287
+
288
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
289
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
290
+ expect(s3object).to receive(:delete)
291
+
292
+ plugin.backup_to_bucket(s3object)
293
+ end
294
+
295
+ it 'should add the specified prefix to the backup file' do
296
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup",
297
+ "backup_add_prefix" => 'backup-' }))
298
+ plugin.register
299
+
300
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
301
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
302
+ expect(s3object).to_not receive(:delete)
303
+
304
+ plugin.backup_to_bucket(s3object)
305
+ end
306
+ end
307
+
308
+ it 'should support doing local backup of files' do
309
+ Stud::Temporary.directory do |backup_dir|
310
+ Stud::Temporary.file do |source_file|
311
+ backup_file = File.join(backup_dir.to_s, Pathname.new(source_file.path).basename.to_s)
312
+
313
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_dir" => backup_dir }))
314
+
315
+ plugin.backup_to_dir(source_file)
316
+
317
+ expect(File.exists?(backup_file)).to eq(true)
318
+ end
319
+ end
320
+ end
321
+ end
322
+
323
+ shared_examples "generated events" do
324
+ let(:events_to_process) { 2 }
325
+
326
+ it 'should process events' do
327
+ events = fetch_events(config)
328
+ expect(events.size).to eq(events_to_process)
329
+ expect(events[0].get("[@metadata][s3][key]")).to eql log.key
330
+ expect(events[1].get("[@metadata][s3][key]")).to eql log.key
331
+ end
332
+
333
+ it "deletes the temporary file" do
334
+ events = fetch_events(config)
335
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(0)
336
+ end
337
+ end
338
+
339
+ context 'while communicating with s3' do
340
+ let(:config) {
341
+ {
342
+ "access_key_id" => "1234",
343
+ "secret_access_key" => "secret",
344
+ "bucket" => "logstash-test",
345
+ "codec" => "json",
346
+ }
347
+ }
348
+ %w(AccessDenied NotFound).each do |error|
349
+ context "while listing bucket contents, #{error} is returned" do
350
+ before do
351
+ Aws.config[:s3] = {
352
+ stub_responses: {
353
+ list_objects: error
354
+ }
355
+ }
356
+ end
357
+
358
+ it 'should not crash the plugin' do
359
+ events = fetch_events(config)
360
+ expect(events.size).to eq(0)
361
+ end
362
+ end
363
+ end
364
+
365
+ %w(AccessDenied NoSuchKey).each do |error|
366
+ context "when retrieving an object, #{error} is returned" do
367
+ let(:objects) { [log] }
368
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
369
+
370
+ let(:config) {
371
+ {
372
+ "access_key_id" => "1234",
373
+ "secret_access_key" => "secret",
374
+ "bucket" => "logstash-test",
375
+ "codec" => "json",
376
+ }
377
+ }
378
+ before do
379
+ Aws.config[:s3] = {
380
+ stub_responses: {
381
+ get_object: error
382
+ }
383
+ }
384
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
385
+ end
386
+
387
+ it 'should not crash the plugin' do
388
+ events = fetch_events(config)
389
+ expect(events.size).to eq(0)
390
+ end
391
+ end
392
+ end
393
+ end
394
+
395
+ context 'when working with logs' do
396
+ let(:objects) { [log] }
397
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :data => { "etag" => 'c2c966251da0bc3229d12c2642ba50a4' }, :storage_class => 'STANDARD') }
398
+ let(:data) { File.read(log_file) }
399
+
400
+ before do
401
+ Aws.config[:s3] = {
402
+ stub_responses: {
403
+ get_object: { body: data }
404
+ }
405
+ }
406
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
407
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).with(log.key) { log }
408
+ expect(log).to receive(:get).with(instance_of(Hash)) do |arg|
409
+ File.open(arg[:response_target], 'wb') { |s3file| s3file.write(data) }
410
+ end
411
+ end
412
+
413
+ context "when event doesn't have a `message` field" do
414
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'json.log') }
415
+ let(:config) {
416
+ {
417
+ "access_key_id" => "1234",
418
+ "secret_access_key" => "secret",
419
+ "bucket" => "logstash-test",
420
+ "codec" => "json",
421
+ }
422
+ }
423
+
424
+ include_examples "generated events"
425
+ end
426
+
427
+ context "when event does have a `message` field" do
428
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'json_with_message.log') }
429
+ let(:config) {
430
+ {
431
+ "access_key_id" => "1234",
432
+ "secret_access_key" => "secret",
433
+ "bucket" => "logstash-test",
434
+ "codec" => "json",
435
+ }
436
+ }
437
+
438
+ include_examples "generated events"
439
+ end
440
+
441
+ context "multiple compressed streams" do
442
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
443
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiple_compressed_streams.gz') }
444
+
445
+ include_examples "generated events" do
446
+ let(:events_to_process) { 16 }
447
+ end
448
+ end
449
+
450
+ context 'compressed' do
451
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
452
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gz') }
453
+
454
+ include_examples "generated events"
455
+ end
456
+
457
+ context 'compressed with gzip extension and using default gzip_pattern option' do
458
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
459
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gzip') }
460
+
461
+ include_examples "generated events"
462
+ end
463
+
464
+ context 'compressed with gzip extension and using custom gzip_pattern option' do
465
+ let(:config) { super().merge({ "gzip_pattern" => "gee.zip$" }) }
466
+ let(:log) { double(:key => 'log.gee.zip', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
467
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gee.zip') }
468
+ include_examples "generated events"
469
+ end
470
+
471
+ context 'plain text' do
472
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
473
+
474
+ include_examples "generated events"
475
+ end
476
+
477
+ context 'multi-line' do
478
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiline.log') }
479
+ let(:config) {
480
+ {
481
+ "access_key_id" => "1234",
482
+ "secret_access_key" => "secret",
483
+ "bucket" => "logstash-test",
484
+ "codec" => LogStash::Codecs::Multiline.new( {"pattern" => "__SEPARATOR__", "negate" => "true", "what" => "previous"})
485
+ }
486
+ }
487
+
488
+ include_examples "generated events"
489
+ end
490
+
491
+ context 'encoded' do
492
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'invalid_utf8.gbk.log') }
493
+
494
+ include_examples "generated events"
495
+ end
496
+
497
+ context 'cloudfront' do
498
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'cloudfront.log') }
499
+
500
+ describe "metadata", :ecs_compatibility_support, :aggregate_failures do
501
+ ecs_compatibility_matrix(:disabled, :v1) do |ecs_select|
502
+ before(:each) do
503
+ allow_any_instance_of(described_class).to receive(:ecs_compatibility).and_return(ecs_compatibility)
504
+ end
505
+
506
+ it 'should extract metadata from cloudfront log' do
507
+ events = fetch_events(config)
508
+
509
+ events.each do |event|
510
+ expect(event.get ecs_select[disabled: "cloudfront_fields", v1: "[@metadata][s3][cloudfront][fields]"] ).to eq('date time x-edge-location c-ip x-event sc-bytes x-cf-status x-cf-client-id cs-uri-stem cs-uri-query c-referrer x-page-url​ c-user-agent x-sname x-sname-query x-file-ext x-sid')
511
+ expect(event.get ecs_select[disabled: "cloudfront_version", v1: "[@metadata][s3][cloudfront][version]"] ).to eq('1.0')
512
+ end
513
+ end
514
+ end
515
+ end
516
+
517
+ include_examples "generated events"
518
+ end
519
+
520
+ context 'when include_object_properties is set to true' do
521
+ let(:config) { super().merge({ "include_object_properties" => true }) }
522
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
523
+
524
+ it 'should extract object properties onto [@metadata][s3]' do
525
+ events = fetch_events(config)
526
+ events.each do |event|
527
+ expect(event.get('[@metadata][s3]')).to include(log.data)
528
+ end
529
+ end
530
+
531
+ include_examples "generated events"
532
+ end
533
+
534
+ context 'when include_object_properties is set to false' do
535
+ let(:config) { super().merge({ "include_object_properties" => false }) }
536
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
537
+
538
+ it 'should NOT extract object properties onto [@metadata][s3]' do
539
+ events = fetch_events(config)
540
+ events.each do |event|
541
+ expect(event.get('[@metadata][s3]')).to_not include(log.data)
542
+ end
543
+ end
544
+
545
+ include_examples "generated events"
546
+ end
547
+ end
548
+
549
+ describe "data loss" do
550
+ let(:s3_plugin) { LogStash::Inputs::S3.new(config) }
551
+ let(:queue) { [] }
552
+
553
+ before do
554
+ s3_plugin.register
555
+ end
556
+
557
+ context 'events come after cutoff time' do
558
+ it 'should be processed in next cycle' do
559
+ s3_objects = [
560
+ double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now.round - 2 * day, :content_length => 5, :storage_class => 'STANDARD'),
561
+ double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
562
+ double(:key => 'TODAY_BEFORE_CUTOFF', :last_modified => Time.now.round - cutoff, :content_length => 5, :storage_class => 'STANDARD'),
563
+ double(:key => 'TODAY', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD'),
564
+ double(:key => 'TODAY', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD')
565
+ ]
566
+ size = s3_objects.length
567
+
568
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { s3_objects }
569
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).and_return(*s3_objects)
570
+ expect(s3_plugin).to receive(:process_log).at_least(size).and_call_original
571
+ expect(s3_plugin).to receive(:stop?).and_return(false).at_least(size)
572
+ expect(s3_plugin).to receive(:download_remote_file).and_return(true).at_least(size)
573
+ expect(s3_plugin).to receive(:process_local_log).and_return(true).at_least(size)
574
+
575
+ # first iteration
576
+ s3_plugin.process_files(queue)
577
+
578
+ # second iteration
579
+ sleep(cutoff + 1)
580
+ s3_plugin.process_files(queue)
581
+ end
582
+ end
583
+
584
+ context 's3 object updated after getting summary' do
585
+ it 'should not update sincedb' do
586
+ s3_summary = [
587
+ double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
588
+ double(:key => 'TODAY', :last_modified => Time.now.round - (cutoff * 10), :content_length => 5, :storage_class => 'STANDARD')
589
+ ]
590
+
591
+ s3_objects = [
592
+ double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
593
+ double(:key => 'TODAY_UPDATED', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD')
594
+ ]
595
+
596
+ size = s3_objects.length
597
+
598
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { s3_summary }
599
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).and_return(*s3_objects)
600
+ expect(s3_plugin).to receive(:process_log).at_least(size).and_call_original
601
+ expect(s3_plugin).to receive(:stop?).and_return(false).at_least(size)
602
+ expect(s3_plugin).to receive(:download_remote_file).and_return(true).at_least(size)
603
+ expect(s3_plugin).to receive(:process_local_log).and_return(true).at_least(size)
604
+
605
+ s3_plugin.process_files(queue)
606
+ expect(s3_plugin.send(:sincedb).read).to eq(s3_summary[0].last_modified)
607
+ end
608
+ end
609
+ end
610
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/s3"
4
+ require "stud/temporary"
5
+ require "fileutils"
6
+
7
+ describe LogStash::Inputs::S3::SinceDB::File do
8
+ let(:file) { Stud::Temporary.file.path }
9
+ subject { LogStash::Inputs::S3::SinceDB::File.new(file) }
10
+ before do
11
+ FileUtils.touch(file)
12
+ end
13
+
14
+ it "doesnt raise an exception if the file is empty" do
15
+ expect { subject.read }.not_to raise_error
16
+ end
17
+ end