logstash-integration-aws 0.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.PRE.MERGE.md +658 -0
  3. data/CHANGELOG.md +15 -0
  4. data/CONTRIBUTORS +40 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +5 -0
  8. data/README.md +205 -0
  9. data/docs/codec-cloudfront.asciidoc +53 -0
  10. data/docs/codec-cloudtrail.asciidoc +45 -0
  11. data/docs/index.asciidoc +38 -0
  12. data/docs/input-cloudwatch.asciidoc +320 -0
  13. data/docs/input-s3.asciidoc +346 -0
  14. data/docs/input-sqs.asciidoc +287 -0
  15. data/docs/output-cloudwatch.asciidoc +321 -0
  16. data/docs/output-s3.asciidoc +442 -0
  17. data/docs/output-sns.asciidoc +166 -0
  18. data/docs/output-sqs.asciidoc +242 -0
  19. data/lib/logstash/codecs/cloudfront.rb +84 -0
  20. data/lib/logstash/codecs/cloudtrail.rb +47 -0
  21. data/lib/logstash/inputs/cloudwatch.rb +338 -0
  22. data/lib/logstash/inputs/s3.rb +466 -0
  23. data/lib/logstash/inputs/sqs.rb +196 -0
  24. data/lib/logstash/outputs/cloudwatch.rb +346 -0
  25. data/lib/logstash/outputs/s3/file_repository.rb +121 -0
  26. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  27. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  28. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  29. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  30. data/lib/logstash/outputs/s3/temporary_file_factory.rb +129 -0
  31. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  32. data/lib/logstash/outputs/s3/uploader.rb +74 -0
  33. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  34. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
  35. data/lib/logstash/outputs/s3.rb +405 -0
  36. data/lib/logstash/outputs/sns.rb +133 -0
  37. data/lib/logstash/outputs/sqs.rb +167 -0
  38. data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
  39. data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
  40. data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
  41. data/logstash-integration-aws.gemspec +52 -0
  42. data/spec/codecs/cloudfront_spec.rb +92 -0
  43. data/spec/codecs/cloudtrail_spec.rb +56 -0
  44. data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
  45. data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
  46. data/spec/fixtures/cloudfront.log +4 -0
  47. data/spec/fixtures/compressed.log.gee.zip +0 -0
  48. data/spec/fixtures/compressed.log.gz +0 -0
  49. data/spec/fixtures/compressed.log.gzip +0 -0
  50. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  51. data/spec/fixtures/json.log +2 -0
  52. data/spec/fixtures/json_with_message.log +2 -0
  53. data/spec/fixtures/multiline.log +6 -0
  54. data/spec/fixtures/multiple_compressed_streams.gz +0 -0
  55. data/spec/fixtures/uncompressed.log +2 -0
  56. data/spec/inputs/cloudwatch_spec.rb +85 -0
  57. data/spec/inputs/s3_spec.rb +610 -0
  58. data/spec/inputs/sincedb_spec.rb +17 -0
  59. data/spec/inputs/sqs_spec.rb +324 -0
  60. data/spec/integration/cloudwatch_spec.rb +25 -0
  61. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  62. data/spec/integration/gzip_file_spec.rb +62 -0
  63. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  64. data/spec/integration/outputs/sqs_spec.rb +98 -0
  65. data/spec/integration/restore_from_crash_spec.rb +67 -0
  66. data/spec/integration/s3_spec.rb +66 -0
  67. data/spec/integration/size_rotation_spec.rb +59 -0
  68. data/spec/integration/sqs_spec.rb +110 -0
  69. data/spec/integration/stress_test_spec.rb +60 -0
  70. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  71. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
  72. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  73. data/spec/outputs/cloudwatch_spec.rb +38 -0
  74. data/spec/outputs/s3/file_repository_spec.rb +143 -0
  75. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  76. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  77. data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
  78. data/spec/outputs/s3/temporary_file_spec.rb +47 -0
  79. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  80. data/spec/outputs/s3/uploader_spec.rb +69 -0
  81. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  82. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
  83. data/spec/outputs/s3_spec.rb +232 -0
  84. data/spec/outputs/sns_spec.rb +160 -0
  85. data/spec/plugin_mixin/aws_config_spec.rb +217 -0
  86. data/spec/spec_helper.rb +8 -0
  87. data/spec/support/helpers.rb +119 -0
  88. data/spec/unit/outputs/sqs_spec.rb +247 -0
  89. metadata +467 -0
@@ -0,0 +1,610 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/devutils/rspec/shared_examples"
4
+ require "logstash/inputs/s3"
5
+ require "logstash/codecs/multiline"
6
+ require "logstash/errors"
7
+ require "stud/temporary"
8
+ require_relative "../support/helpers"
9
+ require "fileutils"
10
+ require 'logstash/plugin_mixins/ecs_compatibility_support/spec_helper'
11
+
12
+ describe LogStash::Inputs::S3 do
13
+ let(:temporary_directory) { Stud::Temporary.pathname }
14
+ let(:sincedb_path) { Stud::Temporary.pathname }
15
+ let(:day) { 3600 * 24 }
16
+ let(:creds) { Aws::Credentials.new('1234', 'secret') }
17
+ let(:config) {
18
+ {
19
+ "access_key_id" => "1234",
20
+ "secret_access_key" => "secret",
21
+ "bucket" => "logstash-test",
22
+ "temporary_directory" => temporary_directory,
23
+ "sincedb_path" => File.join(sincedb_path, ".sincedb")
24
+ }
25
+ }
26
+ let(:cutoff) { LogStash::Inputs::S3::CUTOFF_SECOND }
27
+
28
+
29
+ before do
30
+ FileUtils.mkdir_p(sincedb_path)
31
+ Aws.config[:stub_responses] = true
32
+ Thread.abort_on_exception = true
33
+ end
34
+
35
+ context "when interrupting the plugin" do
36
+ let(:config) { super().merge({ "interval" => 5 }) }
37
+ let(:s3_obj) { double(:key => "awesome-key", :last_modified => Time.now.round, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
38
+
39
+ before do
40
+ expect_any_instance_of(LogStash::Inputs::S3).to receive(:list_new_files).and_return(TestInfiniteS3Object.new(s3_obj))
41
+ end
42
+
43
+ it_behaves_like "an interruptible input plugin" do
44
+ let(:allowed_lag) { 16 } if LOGSTASH_VERSION.split('.').first.to_i <= 6
45
+ end
46
+ end
47
+
48
+ describe "#register" do
49
+ subject { LogStash::Inputs::S3.new(config) }
50
+
51
+ context "with temporary directory" do
52
+ let(:temporary_directory) { Stud::Temporary.pathname }
53
+
54
+ it "creates the direct when it doesn't exist" do
55
+ expect { subject.register }.to change { Dir.exist?(temporary_directory) }.from(false).to(true)
56
+ end
57
+ end
58
+ end
59
+
60
+ describe '#get_s3object' do
61
+ subject { LogStash::Inputs::S3.new(settings) }
62
+
63
+ context 'with modern access key options' do
64
+ let(:settings) {
65
+ {
66
+ "access_key_id" => "1234",
67
+ "secret_access_key" => "secret",
68
+ "proxy_uri" => "http://example.com",
69
+ "bucket" => "logstash-test",
70
+ }
71
+ }
72
+
73
+ it 'should instantiate AWS::S3 clients with a proxy set' do
74
+ expect(Aws::S3::Resource).to receive(:new).with({
75
+ :credentials => kind_of(Aws::Credentials),
76
+ :http_proxy => 'http://example.com',
77
+ :region => subject.region
78
+ })
79
+
80
+ subject.send(:get_s3object)
81
+ end
82
+ end
83
+
84
+ describe "additional_settings" do
85
+ context "supported settings" do
86
+ let(:settings) {
87
+ {
88
+ "additional_settings" => { "force_path_style" => 'true', "ssl_verify_peer" => 'false', "profile" => 'logstash' },
89
+ "bucket" => "logstash-test",
90
+ }
91
+ }
92
+
93
+ it 'should instantiate AWS::S3 clients with force_path_style set' do
94
+ expect(Aws::S3::Resource).to receive(:new).with({
95
+ :region => subject.region,
96
+ :force_path_style => true, :ssl_verify_peer => false, :profile => 'logstash'
97
+ }).and_call_original
98
+
99
+ subject.send(:get_s3object)
100
+ end
101
+ end
102
+
103
+ context 'when an unknown setting is given' do
104
+ let(:settings) {
105
+ {
106
+ "additional_settings" => { "this_setting_doesnt_exist" => true },
107
+ "bucket" => "logstash-test",
108
+ }
109
+ }
110
+
111
+ it 'should raise an error' do
112
+ expect { subject.send(:get_s3object) }.to raise_error(ArgumentError)
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ describe "#list_new_files" do
119
+ before { allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list } }
120
+
121
+ let!(:present_object_after_cutoff) {double(:key => 'this-should-not-be-present', :last_modified => Time.now, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
122
+ let!(:present_object) {double(:key => 'this-should-be-present', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'STANDARD', :object => double(:data => double(:restore => nil)) ) }
123
+ let!(:archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => nil)) ) }
124
+ let!(:deep_archived_object) {double(:key => 'this-should-be-archived', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => nil)) ) }
125
+ let!(:restored_object) {double(:key => 'this-should-be-restored-from-archive', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'GLACIER', :object => double(:data => double(:restore => 'ongoing-request="false", expiry-date="Thu, 01 Jan 2099 00:00:00 GMT"')) ) }
126
+ let!(:deep_restored_object) {double(:key => 'this-should-be-restored-from-deep-archive', :last_modified => Time.now - cutoff, :content_length => 10, :storage_class => 'DEEP_ARCHIVE', :object => double(:data => double(:restore => 'ongoing-request="false", expiry-date="Thu, 01 Jan 2099 00:00:00 GMT"')) ) }
127
+ let(:objects_list) {
128
+ [
129
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
130
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
131
+ archived_object,
132
+ restored_object,
133
+ deep_restored_object,
134
+ present_object,
135
+ present_object_after_cutoff
136
+ ]
137
+ }
138
+
139
+ it 'should allow user to exclude files from the s3 bucket' do
140
+ plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
141
+ plugin.register
142
+
143
+ files = plugin.list_new_files.map { |item| item.key }
144
+ expect(files).to include(present_object.key)
145
+ expect(files).to include(restored_object.key)
146
+ expect(files).to include(deep_restored_object.key)
147
+ expect(files).to_not include('exclude-this-file-1') # matches exclude pattern
148
+ expect(files).to_not include('exclude/logstash') # matches exclude pattern
149
+ expect(files).to_not include(archived_object.key) # archived
150
+ expect(files).to_not include(deep_archived_object.key) # archived
151
+ expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
152
+ expect(files.size).to eq(3)
153
+ end
154
+
155
+ it 'should support not providing a exclude pattern' do
156
+ plugin = LogStash::Inputs::S3.new(config)
157
+ plugin.register
158
+
159
+ files = plugin.list_new_files.map { |item| item.key }
160
+ expect(files).to include(present_object.key)
161
+ expect(files).to include(restored_object.key)
162
+ expect(files).to include(deep_restored_object.key)
163
+ expect(files).to include('exclude-this-file-1') # no exclude pattern given
164
+ expect(files).to include('exclude/logstash') # no exclude pattern given
165
+ expect(files).to_not include(archived_object.key) # archived
166
+ expect(files).to_not include(deep_archived_object.key) # archived
167
+ expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
168
+ expect(files.size).to eq(5)
169
+ end
170
+
171
+ context 'when all files are excluded from a bucket' do
172
+ let(:objects_list) {
173
+ [
174
+ double(:key => 'exclude-this-file-1', :last_modified => Time.now - 2 * day, :content_length => 100, :storage_class => 'STANDARD'),
175
+ double(:key => 'exclude/logstash', :last_modified => Time.now - 2 * day, :content_length => 50, :storage_class => 'STANDARD'),
176
+ ]
177
+ }
178
+
179
+ it 'should not log that no files were found in the bucket' do
180
+ plugin = LogStash::Inputs::S3.new(config.merge({ "exclude_pattern" => "^exclude" }))
181
+ plugin.register
182
+ allow(plugin.logger).to receive(:debug).with(anything, anything)
183
+
184
+ expect(plugin.logger).not_to receive(:info).with(/No files found/, anything)
185
+ expect(plugin.logger).to receive(:debug).with(/Ignoring/, anything)
186
+ expect(plugin.list_new_files).to be_empty
187
+ end
188
+ end
189
+
190
+ context 'with an empty bucket' do
191
+ let(:objects_list) { [] }
192
+
193
+ it 'should log that no files were found in the bucket' do
194
+ plugin = LogStash::Inputs::S3.new(config)
195
+ plugin.register
196
+ allow(plugin.logger).to receive(:info).with(/Using the provided sincedb_path/, anything)
197
+ expect(plugin.logger).to receive(:info).with(/No files found/, anything)
198
+ expect(plugin.list_new_files).to be_empty
199
+ end
200
+ end
201
+
202
+ context "If the bucket is the same as the backup bucket" do
203
+ it 'should ignore files from the bucket if they match the backup prefix' do
204
+ objects_list = [
205
+ double(:key => 'mybackup-log-1', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
206
+ present_object
207
+ ]
208
+
209
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects_list }
210
+
211
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'mybackup',
212
+ 'backup_to_bucket' => config['bucket']}))
213
+ plugin.register
214
+
215
+ files = plugin.list_new_files.map { |item| item.key }
216
+ expect(files).to include(present_object.key)
217
+ expect(files).to_not include('mybackup-log-1') # matches backup prefix
218
+ expect(files.size).to eq(1)
219
+ end
220
+ end
221
+
222
+ it 'should ignore files older than X' do
223
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'backup_add_prefix' => 'exclude-this-file'}))
224
+
225
+
226
+ allow_any_instance_of(LogStash::Inputs::S3::SinceDB::File).to receive(:read).and_return(Time.now - day)
227
+ plugin.register
228
+
229
+ files = plugin.list_new_files.map { |item| item.key }
230
+ expect(files).to include(present_object.key)
231
+ expect(files).to include(restored_object.key)
232
+ expect(files).to include(deep_restored_object.key)
233
+ expect(files).to_not include('exclude-this-file-1') # too old
234
+ expect(files).to_not include('exclude/logstash') # too old
235
+ expect(files).to_not include(archived_object.key) # archived
236
+ expect(files).to_not include(deep_archived_object.key) # archived
237
+ expect(files).to_not include(present_object_after_cutoff.key) # after cutoff
238
+ expect(files.size).to eq(3)
239
+ end
240
+
241
+ it 'should ignore file if the file match the prefix' do
242
+ prefix = 'mysource/'
243
+
244
+ objects_list = [
245
+ double(:key => prefix, :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
246
+ present_object
247
+ ]
248
+
249
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects).with(:prefix => prefix) { objects_list }
250
+
251
+ plugin = LogStash::Inputs::S3.new(config.merge({ 'prefix' => prefix }))
252
+ plugin.register
253
+ expect(plugin.list_new_files.map { |item| item.key }).to eq([present_object.key])
254
+ end
255
+
256
+ it 'should sort return object sorted by last_modification date with older first' do
257
+ objects = [
258
+ double(:key => 'YESTERDAY', :last_modified => Time.now - day, :content_length => 5, :storage_class => 'STANDARD'),
259
+ double(:key => 'TODAY', :last_modified => Time.now, :content_length => 5, :storage_class => 'STANDARD'),
260
+ double(:key => 'TODAY_BEFORE_CUTOFF', :last_modified => Time.now - cutoff, :content_length => 5, :storage_class => 'STANDARD'),
261
+ double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD')
262
+ ]
263
+
264
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
265
+
266
+
267
+ plugin = LogStash::Inputs::S3.new(config)
268
+ plugin.register
269
+ expect(plugin.list_new_files.map { |item| item.key }).to eq(['TWO_DAYS_AGO', 'YESTERDAY', 'TODAY_BEFORE_CUTOFF'])
270
+ end
271
+
272
+ describe "when doing backup on the s3" do
273
+ it 'should copy to another s3 bucket when keeping the original file' do
274
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup"}))
275
+ plugin.register
276
+
277
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
278
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
279
+ expect(s3object).to_not receive(:delete)
280
+
281
+ plugin.backup_to_bucket(s3object)
282
+ end
283
+
284
+ it 'should copy to another s3 bucket when deleting the original file' do
285
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup", "delete" => true }))
286
+ plugin.register
287
+
288
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
289
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
290
+ expect(s3object).to receive(:delete)
291
+
292
+ plugin.backup_to_bucket(s3object)
293
+ end
294
+
295
+ it 'should add the specified prefix to the backup file' do
296
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_bucket" => "mybackup",
297
+ "backup_add_prefix" => 'backup-' }))
298
+ plugin.register
299
+
300
+ s3object = Aws::S3::Object.new('mybucket', 'testkey')
301
+ expect_any_instance_of(Aws::S3::Object).to receive(:copy_from).with(:copy_source => "mybucket/testkey")
302
+ expect(s3object).to_not receive(:delete)
303
+
304
+ plugin.backup_to_bucket(s3object)
305
+ end
306
+ end
307
+
308
+ it 'should support doing local backup of files' do
309
+ Stud::Temporary.directory do |backup_dir|
310
+ Stud::Temporary.file do |source_file|
311
+ backup_file = File.join(backup_dir.to_s, Pathname.new(source_file.path).basename.to_s)
312
+
313
+ plugin = LogStash::Inputs::S3.new(config.merge({ "backup_to_dir" => backup_dir }))
314
+
315
+ plugin.backup_to_dir(source_file)
316
+
317
+ expect(File.exists?(backup_file)).to eq(true)
318
+ end
319
+ end
320
+ end
321
+ end
322
+
323
+ shared_examples "generated events" do
324
+ let(:events_to_process) { 2 }
325
+
326
+ it 'should process events' do
327
+ events = fetch_events(config)
328
+ expect(events.size).to eq(events_to_process)
329
+ expect(events[0].get("[@metadata][s3][key]")).to eql log.key
330
+ expect(events[1].get("[@metadata][s3][key]")).to eql log.key
331
+ end
332
+
333
+ it "deletes the temporary file" do
334
+ events = fetch_events(config)
335
+ expect(Dir.glob(File.join(temporary_directory, "*")).size).to eq(0)
336
+ end
337
+ end
338
+
339
+ context 'while communicating with s3' do
340
+ let(:config) {
341
+ {
342
+ "access_key_id" => "1234",
343
+ "secret_access_key" => "secret",
344
+ "bucket" => "logstash-test",
345
+ "codec" => "json",
346
+ }
347
+ }
348
+ %w(AccessDenied NotFound).each do |error|
349
+ context "while listing bucket contents, #{error} is returned" do
350
+ before do
351
+ Aws.config[:s3] = {
352
+ stub_responses: {
353
+ list_objects: error
354
+ }
355
+ }
356
+ end
357
+
358
+ it 'should not crash the plugin' do
359
+ events = fetch_events(config)
360
+ expect(events.size).to eq(0)
361
+ end
362
+ end
363
+ end
364
+
365
+ %w(AccessDenied NoSuchKey).each do |error|
366
+ context "when retrieving an object, #{error} is returned" do
367
+ let(:objects) { [log] }
368
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
369
+
370
+ let(:config) {
371
+ {
372
+ "access_key_id" => "1234",
373
+ "secret_access_key" => "secret",
374
+ "bucket" => "logstash-test",
375
+ "codec" => "json",
376
+ }
377
+ }
378
+ before do
379
+ Aws.config[:s3] = {
380
+ stub_responses: {
381
+ get_object: error
382
+ }
383
+ }
384
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
385
+ end
386
+
387
+ it 'should not crash the plugin' do
388
+ events = fetch_events(config)
389
+ expect(events.size).to eq(0)
390
+ end
391
+ end
392
+ end
393
+ end
394
+
395
+ context 'when working with logs' do
396
+ let(:objects) { [log] }
397
+ let(:log) { double(:key => 'uncompressed.log', :last_modified => Time.now - 2 * day, :content_length => 5, :data => { "etag" => 'c2c966251da0bc3229d12c2642ba50a4' }, :storage_class => 'STANDARD') }
398
+ let(:data) { File.read(log_file) }
399
+
400
+ before do
401
+ Aws.config[:s3] = {
402
+ stub_responses: {
403
+ get_object: { body: data }
404
+ }
405
+ }
406
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { objects }
407
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).with(log.key) { log }
408
+ expect(log).to receive(:get).with(instance_of(Hash)) do |arg|
409
+ File.open(arg[:response_target], 'wb') { |s3file| s3file.write(data) }
410
+ end
411
+ end
412
+
413
+ context "when event doesn't have a `message` field" do
414
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'json.log') }
415
+ let(:config) {
416
+ {
417
+ "access_key_id" => "1234",
418
+ "secret_access_key" => "secret",
419
+ "bucket" => "logstash-test",
420
+ "codec" => "json",
421
+ }
422
+ }
423
+
424
+ include_examples "generated events"
425
+ end
426
+
427
+ context "when event does have a `message` field" do
428
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'json_with_message.log') }
429
+ let(:config) {
430
+ {
431
+ "access_key_id" => "1234",
432
+ "secret_access_key" => "secret",
433
+ "bucket" => "logstash-test",
434
+ "codec" => "json",
435
+ }
436
+ }
437
+
438
+ include_examples "generated events"
439
+ end
440
+
441
+ context "multiple compressed streams" do
442
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
443
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiple_compressed_streams.gz') }
444
+
445
+ include_examples "generated events" do
446
+ let(:events_to_process) { 16 }
447
+ end
448
+ end
449
+
450
+ context 'compressed' do
451
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
452
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gz') }
453
+
454
+ include_examples "generated events"
455
+ end
456
+
457
+ context 'compressed with gzip extension and using default gzip_pattern option' do
458
+ let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
459
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gzip') }
460
+
461
+ include_examples "generated events"
462
+ end
463
+
464
+ context 'compressed with gzip extension and using custom gzip_pattern option' do
465
+ let(:config) { super().merge({ "gzip_pattern" => "gee.zip$" }) }
466
+ let(:log) { double(:key => 'log.gee.zip', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') }
467
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gee.zip') }
468
+ include_examples "generated events"
469
+ end
470
+
471
+ context 'plain text' do
472
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
473
+
474
+ include_examples "generated events"
475
+ end
476
+
477
+ context 'multi-line' do
478
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'multiline.log') }
479
+ let(:config) {
480
+ {
481
+ "access_key_id" => "1234",
482
+ "secret_access_key" => "secret",
483
+ "bucket" => "logstash-test",
484
+ "codec" => LogStash::Codecs::Multiline.new( {"pattern" => "__SEPARATOR__", "negate" => "true", "what" => "previous"})
485
+ }
486
+ }
487
+
488
+ include_examples "generated events"
489
+ end
490
+
491
+ context 'encoded' do
492
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'invalid_utf8.gbk.log') }
493
+
494
+ include_examples "generated events"
495
+ end
496
+
497
+ context 'cloudfront' do
498
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'cloudfront.log') }
499
+
500
+ describe "metadata", :ecs_compatibility_support, :aggregate_failures do
501
+ ecs_compatibility_matrix(:disabled, :v1) do |ecs_select|
502
+ before(:each) do
503
+ allow_any_instance_of(described_class).to receive(:ecs_compatibility).and_return(ecs_compatibility)
504
+ end
505
+
506
+ it 'should extract metadata from cloudfront log' do
507
+ events = fetch_events(config)
508
+
509
+ events.each do |event|
510
+ expect(event.get ecs_select[disabled: "cloudfront_fields", v1: "[@metadata][s3][cloudfront][fields]"] ).to eq('date time x-edge-location c-ip x-event sc-bytes x-cf-status x-cf-client-id cs-uri-stem cs-uri-query c-referrer x-page-url​ c-user-agent x-sname x-sname-query x-file-ext x-sid')
511
+ expect(event.get ecs_select[disabled: "cloudfront_version", v1: "[@metadata][s3][cloudfront][version]"] ).to eq('1.0')
512
+ end
513
+ end
514
+ end
515
+ end
516
+
517
+ include_examples "generated events"
518
+ end
519
+
520
+ context 'when include_object_properties is set to true' do
521
+ let(:config) { super().merge({ "include_object_properties" => true }) }
522
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
523
+
524
+ it 'should extract object properties onto [@metadata][s3]' do
525
+ events = fetch_events(config)
526
+ events.each do |event|
527
+ expect(event.get('[@metadata][s3]')).to include(log.data)
528
+ end
529
+ end
530
+
531
+ include_examples "generated events"
532
+ end
533
+
534
+ context 'when include_object_properties is set to false' do
535
+ let(:config) { super().merge({ "include_object_properties" => false }) }
536
+ let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'uncompressed.log') }
537
+
538
+ it 'should NOT extract object properties onto [@metadata][s3]' do
539
+ events = fetch_events(config)
540
+ events.each do |event|
541
+ expect(event.get('[@metadata][s3]')).to_not include(log.data)
542
+ end
543
+ end
544
+
545
+ include_examples "generated events"
546
+ end
547
+ end
548
+
549
+ describe "data loss" do
550
+ let(:s3_plugin) { LogStash::Inputs::S3.new(config) }
551
+ let(:queue) { [] }
552
+
553
+ before do
554
+ s3_plugin.register
555
+ end
556
+
557
+ context 'events come after cutoff time' do
558
+ it 'should be processed in next cycle' do
559
+ s3_objects = [
560
+ double(:key => 'TWO_DAYS_AGO', :last_modified => Time.now.round - 2 * day, :content_length => 5, :storage_class => 'STANDARD'),
561
+ double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
562
+ double(:key => 'TODAY_BEFORE_CUTOFF', :last_modified => Time.now.round - cutoff, :content_length => 5, :storage_class => 'STANDARD'),
563
+ double(:key => 'TODAY', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD'),
564
+ double(:key => 'TODAY', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD')
565
+ ]
566
+ size = s3_objects.length
567
+
568
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { s3_objects }
569
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).and_return(*s3_objects)
570
+ expect(s3_plugin).to receive(:process_log).at_least(size).and_call_original
571
+ expect(s3_plugin).to receive(:stop?).and_return(false).at_least(size)
572
+ expect(s3_plugin).to receive(:download_remote_file).and_return(true).at_least(size)
573
+ expect(s3_plugin).to receive(:process_local_log).and_return(true).at_least(size)
574
+
575
+ # first iteration
576
+ s3_plugin.process_files(queue)
577
+
578
+ # second iteration
579
+ sleep(cutoff + 1)
580
+ s3_plugin.process_files(queue)
581
+ end
582
+ end
583
+
584
+ context 's3 object updated after getting summary' do
585
+ it 'should not update sincedb' do
586
+ s3_summary = [
587
+ double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
588
+ double(:key => 'TODAY', :last_modified => Time.now.round - (cutoff * 10), :content_length => 5, :storage_class => 'STANDARD')
589
+ ]
590
+
591
+ s3_objects = [
592
+ double(:key => 'YESTERDAY', :last_modified => Time.now.round - day, :content_length => 5, :storage_class => 'STANDARD'),
593
+ double(:key => 'TODAY_UPDATED', :last_modified => Time.now.round, :content_length => 5, :storage_class => 'STANDARD')
594
+ ]
595
+
596
+ size = s3_objects.length
597
+
598
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:objects) { s3_summary }
599
+ allow_any_instance_of(Aws::S3::Bucket).to receive(:object).and_return(*s3_objects)
600
+ expect(s3_plugin).to receive(:process_log).at_least(size).and_call_original
601
+ expect(s3_plugin).to receive(:stop?).and_return(false).at_least(size)
602
+ expect(s3_plugin).to receive(:download_remote_file).and_return(true).at_least(size)
603
+ expect(s3_plugin).to receive(:process_local_log).and_return(true).at_least(size)
604
+
605
+ s3_plugin.process_files(queue)
606
+ expect(s3_plugin.send(:sincedb).read).to eq(s3_summary[0].last_modified)
607
+ end
608
+ end
609
+ end
610
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/s3"
4
+ require "stud/temporary"
5
+ require "fileutils"
6
+
7
+ describe LogStash::Inputs::S3::SinceDB::File do
8
+ let(:file) { Stud::Temporary.file.path }
9
+ subject { LogStash::Inputs::S3::SinceDB::File.new(file) }
10
+ before do
11
+ FileUtils.touch(file)
12
+ end
13
+
14
+ it "doesnt raise an exception if the file is empty" do
15
+ expect { subject.read }.not_to raise_error
16
+ end
17
+ end