logstash-input-s3-sns-sqs 1.6.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,119 @@
1
+ # LogProcessor:
2
+ # reads and decodes locally available file with log lines
3
+ # and creates LogStash events from these
4
+ require 'logstash/inputs/mime/magic_gzip_validator'
5
+ require 'pathname'
6
+
7
+ module LogProcessor
8
+
9
+ def self.included(base)
10
+ base.extend(self)
11
+ end
12
+
13
+ def process(record, logstash_event_queue)
14
+ file = record[:local_file]
15
+ codec = @codec_factory.get_codec(record)
16
+ folder = record[:folder]
17
+ type = @type_by_folder[folder] #if @type_by_folder.key?(folder)
18
+ metadata = {}
19
+ read_file(file) do |line|
20
+ if stop?
21
+ @logger.warn("Abort reading in the middle of the file, we will read it again when logstash is started")
22
+ throw :skip_delete
23
+ end
24
+ line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: "\u2370")
25
+ codec.decode(line) do |event|
26
+ decorate_event(event, metadata, type, record[:key], record[:bucket], folder)
27
+ logstash_event_queue << event
28
+ end
29
+ end
30
+ # ensure any stateful codecs (such as multi-line ) are flushed to the queue
31
+ codec.flush do |event|
32
+ decorate_event(event, metadata, type, record[:key], record[:bucket], folder)
33
+ @logger.debug("Flushing an incomplete event", :event => event.to_s)
34
+ logstash_event_queue << event
35
+ end
36
+ # signal completion:
37
+ return true
38
+ end
39
+
40
+ private
41
+
42
+ def decorate_event(event, metadata, type, key, bucket, folder)
43
+ if event_is_metadata?(event)
44
+ @logger.debug('Updating the current cloudfront metadata', :event => event)
45
+ update_metadata(metadata, event)
46
+ else
47
+ # type by folder - set before "decorate()" enforces default
48
+ event.set('type', type) if type && !event.include?('type')
49
+ decorate(event)
50
+
51
+ event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
52
+ event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
53
+
54
+ event.set("[@metadata][s3][object_key]", key)
55
+ event.set("[@metadata][s3][bucket_name]", bucket)
56
+ event.set("[@metadata][s3][full_folder]", folder)
57
+ event.set("[@metadata][s3][object_folder]", get_object_folder(folder))
58
+ end
59
+ end
60
+
61
+ def gzip?(filename)
62
+ return true if filename.end_with?('.gz','.gzip')
63
+ MagicGzipValidator.new(File.new(filename, 'rb')).valid?
64
+ rescue Exception => e
65
+ @logger.warn("Problem while gzip detection", :error => e)
66
+ end
67
+
68
+ def read_file(filename)
69
+ completed = false
70
+ zipped = gzip?(filename)
71
+ file_stream = FileInputStream.new(filename)
72
+ if zipped
73
+ gzip_stream = GZIPInputStream.new(file_stream)
74
+ decoder = InputStreamReader.new(gzip_stream, 'UTF-8')
75
+ else
76
+ decoder = InputStreamReader.new(file_stream, 'UTF-8')
77
+ end
78
+ buffered = BufferedReader.new(decoder)
79
+
80
+ while (line = buffered.readLine())
81
+ yield(line)
82
+ end
83
+ completed = true
84
+ rescue ZipException => e
85
+ @logger.error("Gzip codec: We cannot uncompress the gzip file", :filename => filename, :error => e)
86
+ ensure
87
+ buffered.close unless buffered.nil?
88
+ decoder.close unless decoder.nil?
89
+ gzip_stream.close unless gzip_stream.nil?
90
+ file_stream.close unless file_stream.nil?
91
+ throw :skip_delete unless completed
92
+ end
93
+
94
+ def event_is_metadata?(event)
95
+ return false unless event.get("message").class == String
96
+ line = event.get("message")
97
+ version_metadata?(line) || fields_metadata?(line)
98
+ end
99
+
100
+ def version_metadata?(line)
101
+ line.start_with?('#Version: ')
102
+ end
103
+
104
+ def fields_metadata?(line)
105
+ line.start_with?('#Fields: ')
106
+ end
107
+
108
+ def update_metadata(metadata, event)
109
+ line = event.get('message').strip
110
+
111
+ if version_metadata?(line)
112
+ metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
113
+ end
114
+
115
+ if fields_metadata?(line)
116
+ metadata[:cloudfront_fields] = line.split(/#Fields: (.+)/).last
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,183 @@
1
+ # MessagePoller:
2
+ # polling loop fetches messages from source queue and invokes
3
+ # the provided code block on them
4
+ require 'json'
5
+ require 'cgi'
6
+
7
+ class SqsPoller
8
+
9
+ # queue poller options we want to set explicitly
10
+ DEFAULT_OPTIONS = {
11
+ # we query one message at a time, so we can ensure correct error
12
+ # handling if we can't download a single file correctly
13
+ # (we will throw :skip_delete if download size isn't correct to allow
14
+ # for processing the event again later, so make sure to set a reasonable
15
+ # "DefaultVisibilityTimeout" for your queue so that there's enough time
16
+ # to process the log files!)
17
+ max_number_of_messages: 1,
18
+ visibility_timeout: 600,
19
+ # long polling; by default we use the queue's setting.
20
+ # A good value is 10 seconds to to balance between a quick logstash
21
+ # shutdown and fewer api calls.
22
+ wait_time_seconds: nil,
23
+ skip_delete: false,
24
+ }
25
+
26
+ # only needed in "run_with_backoff":
27
+ BACKOFF_SLEEP_TIME = 1
28
+ BACKOFF_FACTOR = 2
29
+ MAX_TIME_BEFORE_GIVING_UP = 60
30
+ # only needed in "preprocess":
31
+ EVENT_SOURCE = 'aws:s3'
32
+ EVENT_TYPE = 'ObjectCreated'
33
+
34
+ # initialization and setup happens once, outside the threads:
35
+ #
36
+ def initialize(logger, stop_semaphore, sqs_queue, options = {}, aws_options_hash)
37
+ @logger = logger
38
+ @stopped = stop_semaphore
39
+ @queue = sqs_queue
40
+ # @stopped = false # FIXME: needed per thread?
41
+ @from_sns = options[:from_sns]
42
+ @options = DEFAULT_OPTIONS.merge(options.reject { |k| [:sqs_explicit_delete, :from_sns, :queue_owner_aws_account_id, :sqs_skip_delete].include? k })
43
+ @options[:skip_delete] = options[:sqs_skip_delete]
44
+ begin
45
+ @logger.info("Registering SQS input", :queue => @queue)
46
+ sqs_client = Aws::SQS::Client.new(aws_options_hash)
47
+ queue_url = sqs_client.get_queue_url({
48
+ queue_name: @queue,
49
+ queue_owner_aws_account_id: @options[:queue_owner_aws_account_id]
50
+ }).queue_url # is a method according to docs. Was [:queue_url].
51
+ @poller = Aws::SQS::QueuePoller.new(queue_url,
52
+ :client => sqs_client
53
+ )
54
+ rescue Aws::SQS::Errors::ServiceError => e
55
+ @logger.error("Cannot establish connection to Amazon SQS", :error => e)
56
+ raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
57
+ end
58
+ end
59
+
60
+ #
61
+ # this is called by every worker thread:
62
+ #
63
+ def run() # not (&block) - pass explicitly (use yield below)
64
+ # per-thread timer to extend visibility if necessary
65
+ extender = nil
66
+ message_backoff = (@options[:visibility_timeout] * 90).to_f / 100.0
67
+ new_visibility = 2 * @options[:visibility_timeout]
68
+
69
+ # "shutdown handler":
70
+ @poller.before_request do |_|
71
+ if stop?
72
+ # kill visibility extender thread if active?
73
+ extender.kill if extender
74
+ extender = nil
75
+ @logger.warn('issuing :stop_polling on "stop?" signal', :queue => @queue)
76
+ # this can take up to "Receive Message Wait Time" (of the sqs queue) seconds to be recognized
77
+ throw :stop_polling
78
+ end
79
+ end
80
+
81
+ run_with_backoff do
82
+ @poller.poll(@options) do |message|
83
+ @logger.debug("Inside Poller: polled message", :message => message)
84
+ # auto-double the timeout if processing takes too long:
85
+ extender = Thread.new do
86
+ sleep message_backoff
87
+ @logger.info("Extending visibility for message", :message => message)
88
+ @poller.change_message_visibility_timeout(message, new_visibility)
89
+ end
90
+ failed = false
91
+ begin
92
+ preprocess(message) do |record|
93
+ @logger.debug("we got a record", :record => record)
94
+ yield(record) #unless record.nil? - unnecessary; implicit
95
+ end
96
+ rescue Exception => e
97
+ @logger.warn("Error in poller loop", :error => e)
98
+ @logger.warn("Backtrace:\n\t#{e.backtrace.join("\n\t")}")
99
+ failed = true
100
+ end
101
+ # at this time the extender has either fired or is obsolete
102
+ extender.kill
103
+ extender = nil
104
+ throw :skip_delete if failed
105
+ end
106
+ end
107
+ end
108
+
109
+ private
110
+
111
+ def stop?
112
+ @stopped.value
113
+ end
114
+
115
+ def preprocess(message)
116
+ @logger.debug("Inside Preprocess: Start", :message => message)
117
+ payload = JSON.parse(message.body)
118
+ payload = JSON.parse(payload['Message']) if @from_sns
119
+ @logger.debug("Payload in Preprocess: ", :payload => payload)
120
+ return nil unless payload['Records']
121
+ payload['Records'].each do |record|
122
+ @logger.debug("We found a record", :record => record)
123
+ # in case there are any events with Records that aren't s3 object-created events and can't therefore be
124
+ # processed by this plugin, we will skip them and remove them from queue
125
+ if record['eventSource'] == EVENT_SOURCE and record['eventName'].start_with?(EVENT_TYPE) then
126
+ @logger.debug("record is valid")
127
+ bucket = CGI.unescape(record['s3']['bucket']['name'])
128
+ key = CGI.unescape(record['s3']['object']['key'])
129
+ size = record['s3']['object']['size']
130
+ yield({
131
+ bucket: bucket,
132
+ key: key,
133
+ size: size,
134
+ folder: get_type_folder(key)
135
+ })
136
+
137
+ # -v- this stuff goes into s3 and processor handling: -v-
138
+
139
+ # type_folder = get_object_folder(key)
140
+ # Set input codec by :set_codec_by_folder
141
+ # instance_codec = set_codec(type_folder) unless set_codec_by_folder["#{type_folder}"].nil?
142
+ # try download and :skip_delete if it fails
143
+ #if record['s3']['object']['size'] < 10000000 then
144
+ # process_log(bucket, key, type_folder, instance_codec, queue, message, size)
145
+ #else
146
+ # @logger.info("Your file is too big")
147
+ #end
148
+ end
149
+ end
150
+ end
151
+
152
+ # Runs an AWS request inside a Ruby block with an exponential backoff in case
153
+ # we experience a ServiceError.
154
+ # @param [Integer] max_time maximum amount of time to sleep before giving up.
155
+ # @param [Integer] sleep_time the initial amount of time to sleep before retrying.
156
+ # instead of requiring
157
+ # @param [Block] block Ruby code block to execute
158
+ # and then doing a "block.call",
159
+ # we yield to the passed block.
160
+ def run_with_backoff(max_time = MAX_TIME_BEFORE_GIVING_UP, sleep_time = BACKOFF_SLEEP_TIME)
161
+ next_sleep = sleep_time
162
+ begin
163
+ yield
164
+ next_sleep = sleep_time
165
+ rescue Aws::SQS::Errors::ServiceError => e
166
+ @logger.warn("Aws::SQS::Errors::ServiceError ... retrying SQS request with exponential backoff", :queue => @queue, :sleep_time => sleep_time, :error => e)
167
+ sleep(next_sleep)
168
+ next_sleep = next_sleep > max_time ? sleep_time : sleep_time * BACKOFF_FACTOR
169
+ retry
170
+ end
171
+ end
172
+
173
+ def get_type_folder(key)
174
+ # TEST THIS!
175
+ # if match = /.*\/?(?<type_folder>)\/[^\/]*.match(key)
176
+ # return match['type_folder']
177
+ # end
178
+ folder = ::File.dirname(key)
179
+ return '' if folder == '.'
180
+ return folder
181
+ end
182
+
183
+ end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-s3-sns-sqs'
3
- s.version = '1.6.1'
4
- s.licenses = ['Apache License (2.0)']
3
+ s.version = '2.0.0'
4
+ s.licenses = ['Apache-2.0']
5
5
  s.summary = "Get logs from AWS s3 buckets as issued by an object-created event via sns -> sqs."
6
6
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
7
7
  s.authors = ["Christian Herweg"]
@@ -21,9 +21,8 @@ Gem::Specification.new do |s|
21
21
  # Gem dependencies
22
22
  s.add_runtime_dependency "logstash-core-plugin-api", ">= 2.1.12", "<= 2.99"
23
23
 
24
- s.add_runtime_dependency 'logstash-codec-json'
25
- s.add_runtime_dependency "logstash-mixin-aws"
26
- s.add_development_dependency 'logstash-devutils'
27
-
24
+ s.add_runtime_dependency 'logstash-codec-json', '~> 3.0'
25
+ s.add_runtime_dependency 'logstash-mixin-aws', '~> 4.3'
26
+ s.add_development_dependency 'logstash-codec-json_stream', '~> 1.0'
27
+ s.add_development_dependency 'logstash-devutils', '~> 1.3'
28
28
  end
29
-
@@ -1,9 +1,60 @@
1
1
  # encoding: utf-8
2
2
  require "logstash/devutils/rspec/spec_helper"
3
- require "logstash/inputs/s3sqs"
3
+ require "logstash/plugin"
4
+ require "logstash/inputs/s3snssqs"
5
+ require "fileutils"
6
+ require "logstash/errors"
7
+ require "logstash/event"
8
+ require "logstash/json"
9
+ require "logstash/codecs/base"
10
+ require "logstash/codecs/json_stream"
11
+ require 'rspec'
12
+ require 'rspec/expectations'
4
13
 
5
- describe LogStash::Inputs::S3SQS do
6
14
 
7
- true.should be_true
8
15
 
9
- end
16
+ describe LogStash::Inputs::S3SNSSQS do
17
+ class LogStash::Inputs::S3SNSSQS
18
+ public :process_local_log # use method without error logging for better visibility of errors
19
+ end
20
+ let(:codec_options) { {} }
21
+
22
+ let(:input) { LogStash::Inputs::S3SNSSQS.new(config) }
23
+ subject { input }
24
+
25
+ context "default parser choice" do
26
+ it "should return true" do
27
+ expect(true).to be true
28
+ end
29
+ end
30
+
31
+ let(:compressed_log_file) { File.join(File.dirname(__FILE__), '..', '..', 'fixtures', 'log-stream.real-formatted') }
32
+ let(:key) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
33
+ let(:folder) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
34
+ let(:instance_codec) { "json" }
35
+ let(:queue) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
36
+ let(:bucket) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
37
+ let(:message) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
38
+ let(:size) { "123344" }
39
+ let(:temporary_directory) { Stud::Temporary.pathname }
40
+ let(:config) { {"queue" => queue, "codec" => "json", "temporary_directory" => temporary_directory } }
41
+ context 'compressed_log_file' do
42
+
43
+ subject do
44
+ LogStash::Inputs::S3SNSSQS.new(config)
45
+ end
46
+ let(:queue) { [] }
47
+ before do
48
+ @codec = LogStash::Codecs::JSONStream.new
49
+ @codec.charset = "UTF-8"
50
+ expect( subject.process_local_log(compressed_log_file, key, folder, @codec.clone, queue, bucket, message, size) ).to be true
51
+ $stderr.puts "method #{queue.to_s}"
52
+ end
53
+
54
+ it '.process_local_log => process compressed log file and verfied logstash event queue with the correct number of events' do
55
+ expect( queue.size ).to eq(38)
56
+ expect( queue.clear).to be_empty
57
+ end
58
+
59
+ end
60
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-s3-sns-sqs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.1
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Herweg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-01-24 00:00:00.000000000 Z
11
+ date: 2019-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -33,45 +33,59 @@ dependencies:
33
33
  - !ruby/object:Gem::Dependency
34
34
  requirement: !ruby/object:Gem::Requirement
35
35
  requirements:
36
- - - ">="
36
+ - - "~>"
37
37
  - !ruby/object:Gem::Version
38
- version: '0'
38
+ version: '3.0'
39
39
  name: logstash-codec-json
40
40
  prerelease: false
41
41
  type: :runtime
42
42
  version_requirements: !ruby/object:Gem::Requirement
43
43
  requirements:
44
- - - ">="
44
+ - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '0'
46
+ version: '3.0'
47
47
  - !ruby/object:Gem::Dependency
48
48
  requirement: !ruby/object:Gem::Requirement
49
49
  requirements:
50
- - - ">="
50
+ - - "~>"
51
51
  - !ruby/object:Gem::Version
52
- version: '0'
52
+ version: '4.3'
53
53
  name: logstash-mixin-aws
54
54
  prerelease: false
55
55
  type: :runtime
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
- - - ">="
58
+ - - "~>"
59
59
  - !ruby/object:Gem::Version
60
- version: '0'
60
+ version: '4.3'
61
61
  - !ruby/object:Gem::Dependency
62
62
  requirement: !ruby/object:Gem::Requirement
63
63
  requirements:
64
- - - ">="
64
+ - - "~>"
65
65
  - !ruby/object:Gem::Version
66
- version: '0'
66
+ version: '1.0'
67
+ name: logstash-codec-json_stream
68
+ prerelease: false
69
+ type: :development
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.0'
75
+ - !ruby/object:Gem::Dependency
76
+ requirement: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - "~>"
79
+ - !ruby/object:Gem::Version
80
+ version: '1.3'
67
81
  name: logstash-devutils
68
82
  prerelease: false
69
83
  type: :development
70
84
  version_requirements: !ruby/object:Gem::Requirement
71
85
  requirements:
72
- - - ">="
86
+ - - "~>"
73
87
  - !ruby/object:Gem::Version
74
- version: '0'
88
+ version: '1.3'
75
89
  description: This gem is a logstash plugin required to be installed on top of the
76
90
  Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
77
91
  a stand-alone program
@@ -86,15 +100,19 @@ files:
86
100
  - LICENSE
87
101
  - NOTICE.TXT
88
102
  - README.md
89
- - lib/logstash/inputs/mime/MagicgzipValidator.rb
103
+ - lib/logstash/inputs/codec_factory.rb
104
+ - lib/logstash/inputs/mime/magic_gzip_validator.rb
105
+ - lib/logstash/inputs/s3/client_factory.rb
106
+ - lib/logstash/inputs/s3/downloader.rb
90
107
  - lib/logstash/inputs/s3snssqs.rb
108
+ - lib/logstash/inputs/s3snssqs/log_processor.rb
91
109
  - lib/logstash/inputs/s3sqs/patch.rb
110
+ - lib/logstash/inputs/sqs/poller.rb
92
111
  - logstash-input-s3-sns-sqs.gemspec
93
112
  - spec/inputs/s3sqs_spec.rb
94
- - spec/spec_helper.rb
95
113
  homepage: https://github.com/cherweg/logstash-input-s3-sns-sqs
96
114
  licenses:
97
- - Apache License (2.0)
115
+ - Apache-2.0
98
116
  metadata:
99
117
  logstash_plugin: 'true'
100
118
  logstash_group: input
@@ -121,4 +139,3 @@ summary: Get logs from AWS s3 buckets as issued by an object-created event via s
121
139
  -> sqs.
122
140
  test_files:
123
141
  - spec/inputs/s3sqs_spec.rb
124
- - spec/spec_helper.rb