logstash-input-s3-sns-sqs 1.6.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,119 @@
1
+ # LogProcessor:
2
+ # reads and decodes locally available file with log lines
3
+ # and creates LogStash events from these
4
+ require 'logstash/inputs/mime/magic_gzip_validator'
5
+ require 'pathname'
6
+
7
+ module LogProcessor
8
+
9
+ def self.included(base)
10
+ base.extend(self)
11
+ end
12
+
13
+ def process(record, logstash_event_queue)
14
+ file = record[:local_file]
15
+ codec = @codec_factory.get_codec(record)
16
+ folder = record[:folder]
17
+ type = @type_by_folder[folder] #if @type_by_folder.key?(folder)
18
+ metadata = {}
19
+ read_file(file) do |line|
20
+ if stop?
21
+ @logger.warn("Abort reading in the middle of the file, we will read it again when logstash is started")
22
+ throw :skip_delete
23
+ end
24
+ line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: "\u2370")
25
+ codec.decode(line) do |event|
26
+ decorate_event(event, metadata, type, record[:key], record[:bucket], folder)
27
+ logstash_event_queue << event
28
+ end
29
+ end
30
+ # ensure any stateful codecs (such as multi-line ) are flushed to the queue
31
+ codec.flush do |event|
32
+ decorate_event(event, metadata, type, record[:key], record[:bucket], folder)
33
+ @logger.debug("Flushing an incomplete event", :event => event.to_s)
34
+ logstash_event_queue << event
35
+ end
36
+ # signal completion:
37
+ return true
38
+ end
39
+
40
+ private
41
+
42
+ def decorate_event(event, metadata, type, key, bucket, folder)
43
+ if event_is_metadata?(event)
44
+ @logger.debug('Updating the current cloudfront metadata', :event => event)
45
+ update_metadata(metadata, event)
46
+ else
47
+ # type by folder - set before "decorate()" enforces default
48
+ event.set('type', type) if type && !event.include?('type')
49
+ decorate(event)
50
+
51
+ event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
52
+ event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
53
+
54
+ event.set("[@metadata][s3][object_key]", key)
55
+ event.set("[@metadata][s3][bucket_name]", bucket)
56
+ event.set("[@metadata][s3][full_folder]", folder)
57
+ event.set("[@metadata][s3][object_folder]", get_object_folder(folder))
58
+ end
59
+ end
60
+
61
+ def gzip?(filename)
62
+ return true if filename.end_with?('.gz','.gzip')
63
+ MagicGzipValidator.new(File.new(filename, 'rb')).valid?
64
+ rescue Exception => e
65
+ @logger.warn("Problem while gzip detection", :error => e)
66
+ end
67
+
68
+ def read_file(filename)
69
+ completed = false
70
+ zipped = gzip?(filename)
71
+ file_stream = FileInputStream.new(filename)
72
+ if zipped
73
+ gzip_stream = GZIPInputStream.new(file_stream)
74
+ decoder = InputStreamReader.new(gzip_stream, 'UTF-8')
75
+ else
76
+ decoder = InputStreamReader.new(file_stream, 'UTF-8')
77
+ end
78
+ buffered = BufferedReader.new(decoder)
79
+
80
+ while (line = buffered.readLine())
81
+ yield(line)
82
+ end
83
+ completed = true
84
+ rescue ZipException => e
85
+ @logger.error("Gzip codec: We cannot uncompress the gzip file", :filename => filename, :error => e)
86
+ ensure
87
+ buffered.close unless buffered.nil?
88
+ decoder.close unless decoder.nil?
89
+ gzip_stream.close unless gzip_stream.nil?
90
+ file_stream.close unless file_stream.nil?
91
+ throw :skip_delete unless completed
92
+ end
93
+
94
+ def event_is_metadata?(event)
95
+ return false unless event.get("message").class == String
96
+ line = event.get("message")
97
+ version_metadata?(line) || fields_metadata?(line)
98
+ end
99
+
100
+ def version_metadata?(line)
101
+ line.start_with?('#Version: ')
102
+ end
103
+
104
+ def fields_metadata?(line)
105
+ line.start_with?('#Fields: ')
106
+ end
107
+
108
+ def update_metadata(metadata, event)
109
+ line = event.get('message').strip
110
+
111
+ if version_metadata?(line)
112
+ metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
113
+ end
114
+
115
+ if fields_metadata?(line)
116
+ metadata[:cloudfront_fields] = line.split(/#Fields: (.+)/).last
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,183 @@
1
+ # MessagePoller:
2
+ # polling loop fetches messages from source queue and invokes
3
+ # the provided code block on them
4
+ require 'json'
5
+ require 'cgi'
6
+
7
+ class SqsPoller
8
+
9
+ # queue poller options we want to set explicitly
10
+ DEFAULT_OPTIONS = {
11
+ # we query one message at a time, so we can ensure correct error
12
+ # handling if we can't download a single file correctly
13
+ # (we will throw :skip_delete if download size isn't correct to allow
14
+ # for processing the event again later, so make sure to set a reasonable
15
+ # "DefaultVisibilityTimeout" for your queue so that there's enough time
16
+ # to process the log files!)
17
+ max_number_of_messages: 1,
18
+ visibility_timeout: 600,
19
+ # long polling; by default we use the queue's setting.
20
+ # A good value is 10 seconds to to balance between a quick logstash
21
+ # shutdown and fewer api calls.
22
+ wait_time_seconds: nil,
23
+ skip_delete: false,
24
+ }
25
+
26
+ # only needed in "run_with_backoff":
27
+ BACKOFF_SLEEP_TIME = 1
28
+ BACKOFF_FACTOR = 2
29
+ MAX_TIME_BEFORE_GIVING_UP = 60
30
+ # only needed in "preprocess":
31
+ EVENT_SOURCE = 'aws:s3'
32
+ EVENT_TYPE = 'ObjectCreated'
33
+
34
+ # initialization and setup happens once, outside the threads:
35
+ #
36
+ def initialize(logger, stop_semaphore, sqs_queue, options = {}, aws_options_hash)
37
+ @logger = logger
38
+ @stopped = stop_semaphore
39
+ @queue = sqs_queue
40
+ # @stopped = false # FIXME: needed per thread?
41
+ @from_sns = options[:from_sns]
42
+ @options = DEFAULT_OPTIONS.merge(options.reject { |k| [:sqs_explicit_delete, :from_sns, :queue_owner_aws_account_id, :sqs_skip_delete].include? k })
43
+ @options[:skip_delete] = options[:sqs_skip_delete]
44
+ begin
45
+ @logger.info("Registering SQS input", :queue => @queue)
46
+ sqs_client = Aws::SQS::Client.new(aws_options_hash)
47
+ queue_url = sqs_client.get_queue_url({
48
+ queue_name: @queue,
49
+ queue_owner_aws_account_id: @options[:queue_owner_aws_account_id]
50
+ }).queue_url # is a method according to docs. Was [:queue_url].
51
+ @poller = Aws::SQS::QueuePoller.new(queue_url,
52
+ :client => sqs_client
53
+ )
54
+ rescue Aws::SQS::Errors::ServiceError => e
55
+ @logger.error("Cannot establish connection to Amazon SQS", :error => e)
56
+ raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
57
+ end
58
+ end
59
+
60
+ #
61
+ # this is called by every worker thread:
62
+ #
63
+ def run() # not (&block) - pass explicitly (use yield below)
64
+ # per-thread timer to extend visibility if necessary
65
+ extender = nil
66
+ message_backoff = (@options[:visibility_timeout] * 90).to_f / 100.0
67
+ new_visibility = 2 * @options[:visibility_timeout]
68
+
69
+ # "shutdown handler":
70
+ @poller.before_request do |_|
71
+ if stop?
72
+ # kill visibility extender thread if active?
73
+ extender.kill if extender
74
+ extender = nil
75
+ @logger.warn('issuing :stop_polling on "stop?" signal', :queue => @queue)
76
+ # this can take up to "Receive Message Wait Time" (of the sqs queue) seconds to be recognized
77
+ throw :stop_polling
78
+ end
79
+ end
80
+
81
+ run_with_backoff do
82
+ @poller.poll(@options) do |message|
83
+ @logger.debug("Inside Poller: polled message", :message => message)
84
+ # auto-double the timeout if processing takes too long:
85
+ extender = Thread.new do
86
+ sleep message_backoff
87
+ @logger.info("Extending visibility for message", :message => message)
88
+ @poller.change_message_visibility_timeout(message, new_visibility)
89
+ end
90
+ failed = false
91
+ begin
92
+ preprocess(message) do |record|
93
+ @logger.debug("we got a record", :record => record)
94
+ yield(record) #unless record.nil? - unnecessary; implicit
95
+ end
96
+ rescue Exception => e
97
+ @logger.warn("Error in poller loop", :error => e)
98
+ @logger.warn("Backtrace:\n\t#{e.backtrace.join("\n\t")}")
99
+ failed = true
100
+ end
101
+ # at this time the extender has either fired or is obsolete
102
+ extender.kill
103
+ extender = nil
104
+ throw :skip_delete if failed
105
+ end
106
+ end
107
+ end
108
+
109
+ private
110
+
111
+ def stop?
112
+ @stopped.value
113
+ end
114
+
115
+ def preprocess(message)
116
+ @logger.debug("Inside Preprocess: Start", :message => message)
117
+ payload = JSON.parse(message.body)
118
+ payload = JSON.parse(payload['Message']) if @from_sns
119
+ @logger.debug("Payload in Preprocess: ", :payload => payload)
120
+ return nil unless payload['Records']
121
+ payload['Records'].each do |record|
122
+ @logger.debug("We found a record", :record => record)
123
+ # in case there are any events with Records that aren't s3 object-created events and can't therefore be
124
+ # processed by this plugin, we will skip them and remove them from queue
125
+ if record['eventSource'] == EVENT_SOURCE and record['eventName'].start_with?(EVENT_TYPE) then
126
+ @logger.debug("record is valid")
127
+ bucket = CGI.unescape(record['s3']['bucket']['name'])
128
+ key = CGI.unescape(record['s3']['object']['key'])
129
+ size = record['s3']['object']['size']
130
+ yield({
131
+ bucket: bucket,
132
+ key: key,
133
+ size: size,
134
+ folder: get_type_folder(key)
135
+ })
136
+
137
+ # -v- this stuff goes into s3 and processor handling: -v-
138
+
139
+ # type_folder = get_object_folder(key)
140
+ # Set input codec by :set_codec_by_folder
141
+ # instance_codec = set_codec(type_folder) unless set_codec_by_folder["#{type_folder}"].nil?
142
+ # try download and :skip_delete if it fails
143
+ #if record['s3']['object']['size'] < 10000000 then
144
+ # process_log(bucket, key, type_folder, instance_codec, queue, message, size)
145
+ #else
146
+ # @logger.info("Your file is too big")
147
+ #end
148
+ end
149
+ end
150
+ end
151
+
152
+ # Runs an AWS request inside a Ruby block with an exponential backoff in case
153
+ # we experience a ServiceError.
154
+ # @param [Integer] max_time maximum amount of time to sleep before giving up.
155
+ # @param [Integer] sleep_time the initial amount of time to sleep before retrying.
156
+ # instead of requiring
157
+ # @param [Block] block Ruby code block to execute
158
+ # and then doing a "block.call",
159
+ # we yield to the passed block.
160
+ def run_with_backoff(max_time = MAX_TIME_BEFORE_GIVING_UP, sleep_time = BACKOFF_SLEEP_TIME)
161
+ next_sleep = sleep_time
162
+ begin
163
+ yield
164
+ next_sleep = sleep_time
165
+ rescue Aws::SQS::Errors::ServiceError => e
166
+ @logger.warn("Aws::SQS::Errors::ServiceError ... retrying SQS request with exponential backoff", :queue => @queue, :sleep_time => sleep_time, :error => e)
167
+ sleep(next_sleep)
168
+ next_sleep = next_sleep > max_time ? sleep_time : sleep_time * BACKOFF_FACTOR
169
+ retry
170
+ end
171
+ end
172
+
173
+ def get_type_folder(key)
174
+ # TEST THIS!
175
+ # if match = /.*\/?(?<type_folder>)\/[^\/]*.match(key)
176
+ # return match['type_folder']
177
+ # end
178
+ folder = ::File.dirname(key)
179
+ return '' if folder == '.'
180
+ return folder
181
+ end
182
+
183
+ end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-s3-sns-sqs'
3
- s.version = '1.6.1'
4
- s.licenses = ['Apache License (2.0)']
3
+ s.version = '2.0.0'
4
+ s.licenses = ['Apache-2.0']
5
5
  s.summary = "Get logs from AWS s3 buckets as issued by an object-created event via sns -> sqs."
6
6
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
7
7
  s.authors = ["Christian Herweg"]
@@ -21,9 +21,8 @@ Gem::Specification.new do |s|
21
21
  # Gem dependencies
22
22
  s.add_runtime_dependency "logstash-core-plugin-api", ">= 2.1.12", "<= 2.99"
23
23
 
24
- s.add_runtime_dependency 'logstash-codec-json'
25
- s.add_runtime_dependency "logstash-mixin-aws"
26
- s.add_development_dependency 'logstash-devutils'
27
-
24
+ s.add_runtime_dependency 'logstash-codec-json', '~> 3.0'
25
+ s.add_runtime_dependency 'logstash-mixin-aws', '~> 4.3'
26
+ s.add_development_dependency 'logstash-codec-json_stream', '~> 1.0'
27
+ s.add_development_dependency 'logstash-devutils', '~> 1.3'
28
28
  end
29
-
@@ -1,9 +1,60 @@
1
1
  # encoding: utf-8
2
2
  require "logstash/devutils/rspec/spec_helper"
3
- require "logstash/inputs/s3sqs"
3
+ require "logstash/plugin"
4
+ require "logstash/inputs/s3snssqs"
5
+ require "fileutils"
6
+ require "logstash/errors"
7
+ require "logstash/event"
8
+ require "logstash/json"
9
+ require "logstash/codecs/base"
10
+ require "logstash/codecs/json_stream"
11
+ require 'rspec'
12
+ require 'rspec/expectations'
4
13
 
5
- describe LogStash::Inputs::S3SQS do
6
14
 
7
- true.should be_true
8
15
 
9
- end
16
+ describe LogStash::Inputs::S3SNSSQS do
17
+ class LogStash::Inputs::S3SNSSQS
18
+ public :process_local_log # use method without error logging for better visibility of errors
19
+ end
20
+ let(:codec_options) { {} }
21
+
22
+ let(:input) { LogStash::Inputs::S3SNSSQS.new(config) }
23
+ subject { input }
24
+
25
+ context "default parser choice" do
26
+ it "should return true" do
27
+ expect(true).to be true
28
+ end
29
+ end
30
+
31
+ let(:compressed_log_file) { File.join(File.dirname(__FILE__), '..', '..', 'fixtures', 'log-stream.real-formatted') }
32
+ let(:key) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
33
+ let(:folder) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
34
+ let(:instance_codec) { "json" }
35
+ let(:queue) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
36
+ let(:bucket) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
37
+ let(:message) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
38
+ let(:size) { "123344" }
39
+ let(:temporary_directory) { Stud::Temporary.pathname }
40
+ let(:config) { {"queue" => queue, "codec" => "json", "temporary_directory" => temporary_directory } }
41
+ context 'compressed_log_file' do
42
+
43
+ subject do
44
+ LogStash::Inputs::S3SNSSQS.new(config)
45
+ end
46
+ let(:queue) { [] }
47
+ before do
48
+ @codec = LogStash::Codecs::JSONStream.new
49
+ @codec.charset = "UTF-8"
50
+ expect( subject.process_local_log(compressed_log_file, key, folder, @codec.clone, queue, bucket, message, size) ).to be true
51
+ $stderr.puts "method #{queue.to_s}"
52
+ end
53
+
54
+ it '.process_local_log => process compressed log file and verfied logstash event queue with the correct number of events' do
55
+ expect( queue.size ).to eq(38)
56
+ expect( queue.clear).to be_empty
57
+ end
58
+
59
+ end
60
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-s3-sns-sqs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.1
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Herweg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-01-24 00:00:00.000000000 Z
11
+ date: 2019-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -33,45 +33,59 @@ dependencies:
33
33
  - !ruby/object:Gem::Dependency
34
34
  requirement: !ruby/object:Gem::Requirement
35
35
  requirements:
36
- - - ">="
36
+ - - "~>"
37
37
  - !ruby/object:Gem::Version
38
- version: '0'
38
+ version: '3.0'
39
39
  name: logstash-codec-json
40
40
  prerelease: false
41
41
  type: :runtime
42
42
  version_requirements: !ruby/object:Gem::Requirement
43
43
  requirements:
44
- - - ">="
44
+ - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '0'
46
+ version: '3.0'
47
47
  - !ruby/object:Gem::Dependency
48
48
  requirement: !ruby/object:Gem::Requirement
49
49
  requirements:
50
- - - ">="
50
+ - - "~>"
51
51
  - !ruby/object:Gem::Version
52
- version: '0'
52
+ version: '4.3'
53
53
  name: logstash-mixin-aws
54
54
  prerelease: false
55
55
  type: :runtime
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
- - - ">="
58
+ - - "~>"
59
59
  - !ruby/object:Gem::Version
60
- version: '0'
60
+ version: '4.3'
61
61
  - !ruby/object:Gem::Dependency
62
62
  requirement: !ruby/object:Gem::Requirement
63
63
  requirements:
64
- - - ">="
64
+ - - "~>"
65
65
  - !ruby/object:Gem::Version
66
- version: '0'
66
+ version: '1.0'
67
+ name: logstash-codec-json_stream
68
+ prerelease: false
69
+ type: :development
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.0'
75
+ - !ruby/object:Gem::Dependency
76
+ requirement: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - "~>"
79
+ - !ruby/object:Gem::Version
80
+ version: '1.3'
67
81
  name: logstash-devutils
68
82
  prerelease: false
69
83
  type: :development
70
84
  version_requirements: !ruby/object:Gem::Requirement
71
85
  requirements:
72
- - - ">="
86
+ - - "~>"
73
87
  - !ruby/object:Gem::Version
74
- version: '0'
88
+ version: '1.3'
75
89
  description: This gem is a logstash plugin required to be installed on top of the
76
90
  Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
77
91
  a stand-alone program
@@ -86,15 +100,19 @@ files:
86
100
  - LICENSE
87
101
  - NOTICE.TXT
88
102
  - README.md
89
- - lib/logstash/inputs/mime/MagicgzipValidator.rb
103
+ - lib/logstash/inputs/codec_factory.rb
104
+ - lib/logstash/inputs/mime/magic_gzip_validator.rb
105
+ - lib/logstash/inputs/s3/client_factory.rb
106
+ - lib/logstash/inputs/s3/downloader.rb
90
107
  - lib/logstash/inputs/s3snssqs.rb
108
+ - lib/logstash/inputs/s3snssqs/log_processor.rb
91
109
  - lib/logstash/inputs/s3sqs/patch.rb
110
+ - lib/logstash/inputs/sqs/poller.rb
92
111
  - logstash-input-s3-sns-sqs.gemspec
93
112
  - spec/inputs/s3sqs_spec.rb
94
- - spec/spec_helper.rb
95
113
  homepage: https://github.com/cherweg/logstash-input-s3-sns-sqs
96
114
  licenses:
97
- - Apache License (2.0)
115
+ - Apache-2.0
98
116
  metadata:
99
117
  logstash_plugin: 'true'
100
118
  logstash_group: input
@@ -121,4 +139,3 @@ summary: Get logs from AWS s3 buckets as issued by an object-created event via s
121
139
  -> sqs.
122
140
  test_files:
123
141
  - spec/inputs/s3sqs_spec.rb
124
- - spec/spec_helper.rb