logstash-input-s3-sns-sqs 1.4.8 → 1.4.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ class MimeMagic
2
+ # MimeMagic version string
3
+ # @api public
4
+ VERSION = '0.3.2'
5
+ end
@@ -0,0 +1,139 @@
1
+ require 'logstash/inputs/mime/mimemagic/tables'
2
+ require 'logstash/inputs/mime/mimemagic/version'
3
+
4
+ require 'stringio'
5
+
6
+ # Mime type detection
7
+ class MimeMagic
8
+ attr_reader :type, :mediatype, :subtype
9
+
10
+ # Mime type by type string
11
+ def initialize(type)
12
+ @type = type
13
+ @mediatype, @subtype = type.split('/', 2)
14
+ end
15
+
16
+ # Add custom mime type. Arguments:
17
+ # * <i>type</i>: Mime type
18
+ # * <i>options</i>: Options hash
19
+ #
20
+ # Option keys:
21
+ # * <i>:extensions</i>: String list or single string of file extensions
22
+ # * <i>:parents</i>: String list or single string of parent mime types
23
+ # * <i>:magic</i>: Mime magic specification
24
+ # * <i>:comment</i>: Comment string
25
+ def self.add(type, options)
26
+ extensions = [options[:extensions]].flatten.compact
27
+ TYPES[type] = [extensions,
28
+ [options[:parents]].flatten.compact,
29
+ options[:comment]]
30
+ extensions.each {|ext| EXTENSIONS[ext] = type }
31
+ MAGIC.unshift [type, options[:magic]] if options[:magic]
32
+ end
33
+
34
+ # Removes a mime type from the dictionary. You might want to do this if
35
+ # you're seeing impossible conflicts (for instance, application/x-gmc-link).
36
+ # * <i>type</i>: The mime type to remove. All associated extensions and magic are removed too.
37
+ def self.remove(type)
38
+ EXTENSIONS.delete_if {|ext, t| t == type }
39
+ MAGIC.delete_if {|t, m| t == type }
40
+ TYPES.delete(type)
41
+ end
42
+
43
+ # Returns true if type is a text format
44
+ def text?; mediatype == 'text' || child_of?('text/plain'); end
45
+
46
+ # Mediatype shortcuts
47
+ def image?; mediatype == 'image'; end
48
+ def audio?; mediatype == 'audio'; end
49
+ def video?; mediatype == 'video'; end
50
+
51
+ # Returns true if type is child of parent type
52
+ def child_of?(parent)
53
+ MimeMagic.child?(type, parent)
54
+ end
55
+
56
+ # Get string list of file extensions
57
+ def extensions
58
+ TYPES.key?(type) ? TYPES[type][0] : []
59
+ end
60
+
61
+ # Get mime comment
62
+ def comment
63
+ (TYPES.key?(type) ? TYPES[type][2] : nil).to_s
64
+ end
65
+
66
+ # Lookup mime type by file extension
67
+ def self.by_extension(ext)
68
+ ext = ext.to_s.downcase
69
+ mime = ext[0..0] == '.' ? EXTENSIONS[ext[1..-1]] : EXTENSIONS[ext]
70
+ mime && new(mime)
71
+ end
72
+
73
+ # Lookup mime type by filename
74
+ def self.by_path(path)
75
+ by_extension(File.extname(path))
76
+ end
77
+
78
+ # Lookup mime type by magic content analysis.
79
+ # This is a slow operation.
80
+ def self.by_magic(io)
81
+ mime = magic_match(io, :find)
82
+ mime && new(mime[0])
83
+ end
84
+
85
+ # Lookup all mime types by magic content analysis.
86
+ # This is a slower operation.
87
+ def self.all_by_magic(io)
88
+ magic_match(io, :select).map { |mime| new(mime[0]) }
89
+ end
90
+
91
+ # Return type as string
92
+ def to_s
93
+ type
94
+ end
95
+
96
+ # Allow comparison with string
97
+ def eql?(other)
98
+ type == other.to_s
99
+ end
100
+
101
+ def hash
102
+ type.hash
103
+ end
104
+
105
+ alias == eql?
106
+
107
+ def self.child?(child, parent)
108
+ child == parent || TYPES.key?(child) && TYPES[child][1].any? {|p| child?(p, parent) }
109
+ end
110
+
111
+ def self.magic_match(io, method)
112
+ return magic_match(StringIO.new(io.to_s), method) unless io.respond_to?(:read)
113
+
114
+ io.binmode if io.respond_to?(:binmode)
115
+ io.set_encoding(Encoding::BINARY) if io.respond_to?(:set_encoding)
116
+ buffer = "".force_encoding(Encoding::BINARY)
117
+
118
+ MAGIC.send(method) { |type, matches| magic_match_io(io, matches, buffer) }
119
+ end
120
+
121
+ def self.magic_match_io(io, matches, buffer)
122
+ matches.any? do |offset, value, children|
123
+ match =
124
+ if Range === offset
125
+ io.read(offset.begin, buffer)
126
+ x = io.read(offset.end - offset.begin + value.bytesize, buffer)
127
+ x && x.include?(value)
128
+ else
129
+ io.read(offset, buffer)
130
+ io.read(value.bytesize, buffer) == value
131
+ end
132
+ io.rewind
133
+ match && (!children || magic_match_io(io, children, buffer))
134
+ end
135
+ end
136
+
137
+ private_class_method :magic_match, :magic_match_io
138
+ end
139
+
@@ -8,6 +8,7 @@ require "logstash/errors"
8
8
  require 'logstash/inputs/s3sqs/patch'
9
9
  require "aws-sdk"
10
10
  require 'cgi'
11
+ require 'logstash/inputs/mime/mimemagic'
11
12
 
12
13
  require 'java'
13
14
  java_import java.io.InputStream
@@ -237,7 +238,7 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
237
238
  begin
238
239
  remote_object.get(:response_target => s3file)
239
240
  rescue Aws::S3::Errors::AccessDenied => e
240
- @logger.debug("Unable to download file. We´ll requeue the message", :file => remote_object.inspect)
241
+ @logger.error("Unable to download file. We´ll requeue the message", :file => remote_object.inspect)
241
242
  throw :skip_delete
242
243
  end
243
244
  end
@@ -264,10 +265,10 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
264
265
  @logger.warn("Logstash S3 input, stop reading in the middle of the file, we will read it again when logstash is started")
265
266
  return false
266
267
  end
267
- #@logger.info("read line #{i}", :line => line)
268
268
  #line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: "\u2370")
269
+ #@logger.debug("read line", :line => line)
269
270
  instance_codec.decode(line) do |event|
270
- #@logger.info("decorate event")
271
+ @logger.debug("decorate event")
271
272
  # We are making an assumption concerning cloudfront
272
273
  # log format, the user will use the plain or the line codec
273
274
  # and the message key will represent the actual line content.
@@ -293,6 +294,7 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
293
294
 
294
295
  private
295
296
  def local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
297
+ @logger.debug('decorating event', :event => event.to_s)
296
298
  if event_is_metadata?(event)
297
299
  @logger.debug('Event is metadata, updating the current cloudfront metadata', :event => event)
298
300
  update_metadata(metadata, event)
@@ -306,6 +308,7 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
306
308
  event.set("[@metadata][s3]", { "object_key" => key })
307
309
  event.set("[@metadata][s3]", { "bucket_name" => bucket })
308
310
  event.set("[@metadata][s3]", { "object_folder" => folder})
311
+ #@logger.debug('queuing event', :event => event.to_s)
309
312
  queue << event
310
313
  end
311
314
  end
@@ -319,6 +322,7 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
319
322
  return ""
320
323
  end
321
324
  end
325
+
322
326
  private
323
327
  def read_file(filename, &block)
324
328
  if gzip?(filename)
@@ -355,10 +359,12 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
355
359
 
356
360
  private
357
361
  def gzip?(filename)
358
- filename.end_with?('.gz','.gzip')
362
+ return true if filename.end_with?('.gz','.gzip')
363
+ return true if MimeMagic.by_magic(File.open(filename)).to_s == 'application/gzip'
364
+ rescue Exception => e
365
+ @logger.debug("Problem while gzip detection", :error => e)
359
366
  end
360
367
 
361
-
362
368
  private
363
369
  def delete_file_from_bucket(object)
364
370
  if @delete_on_success
@@ -370,7 +376,7 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
370
376
  private
371
377
  def get_s3client
372
378
  if s3_access_key_id and s3_secret_access_key
373
- @logger.debug("Using S3 Credentials from config", :ID => aws_options_hash.merge(:access_key_id => s3_access_key_id) )
379
+ @logger.debug("Using S3 Credentials from config", :ID => aws_options_hash.merge(:access_key_id => s3_access_key_id, :secret_access_key => s3_secret_access_key) )
374
380
  @s3_client = Aws::S3::Client.new(aws_options_hash.merge(:access_key_id => s3_access_key_id, :secret_access_key => s3_secret_access_key))
375
381
  elsif @s3_role_arn
376
382
  @s3_client = Aws::S3::Client.new(aws_options_hash.merge!({ :credentials => s3_assume_role }))
@@ -499,4 +505,4 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
499
505
  result
500
506
  end
501
507
  end
502
- end # class
508
+ end # class
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-s3-sns-sqs'
3
- s.version = '1.4.8'
3
+ s.version = '1.4.9'
4
4
  s.licenses = ['Apache License (2.0)']
5
5
  s.summary = "Get logs from AWS s3 buckets as issued by an object-created event via sns -> sqs."
6
6
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -23,7 +23,6 @@ Gem::Specification.new do |s|
23
23
 
24
24
  s.add_runtime_dependency 'logstash-codec-json'
25
25
  s.add_runtime_dependency "logstash-mixin-aws"
26
-
27
26
  s.add_development_dependency 'logstash-devutils'
28
27
 
29
28
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-s3-sns-sqs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.8
4
+ version: 1.4.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Herweg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-28 00:00:00.000000000 Z
11
+ date: 2018-03-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -84,6 +84,10 @@ files:
84
84
  - LICENSE
85
85
  - NOTICE.TXT
86
86
  - README.md
87
+ - lib/logstash/inputs/mime/mimemagic.rb
88
+ - lib/logstash/inputs/mime/mimemagic/overlay.rb
89
+ - lib/logstash/inputs/mime/mimemagic/tables.rb
90
+ - lib/logstash/inputs/mime/mimemagic/version.rb
87
91
  - lib/logstash/inputs/s3snssqs.rb
88
92
  - lib/logstash/inputs/s3sqs/patch.rb
89
93
  - logstash-input-s3-sns-sqs.gemspec