logstash-input-imap 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ N2YwMWY0NWI1MDA1NDcxZWIxMTc4NGEzOWRjZmQyODc3MWI5MzA3Mg==
5
+ data.tar.gz: !binary |-
6
+ OTcxZDc3ZTBmODUzZDc0YWU1YmQ3ZWRmNzdhMzhjMzMwNDkyODJjNg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZDUyODZlNDEyMDczYzBlNzhiODBhNWQyYWQ4MTdmODJiM2Q1ZjcxODgwYWFh
10
+ OTI1YmI2YzIyNTkzYTk5ZDY5NTNlMDc0MjUxNGZmYWY4MjUxNTYwY2ZiODhm
11
+ ZjY2OGJmODZjMDljYmE0ODQ5ODc2MGZkMjA2ZTQ1ZTYzM2NlNDc=
12
+ data.tar.gz: !binary |-
13
+ MjQ2MmUxYjQ1MzgxY2FjN2Q2MjEyYmIwNWRhZDQ2YWM2OTQxOWQ2OTg0Njg4
14
+ MTYxZTQ1ZWYwZWM2MmViZWQ5YjQ1MmI5ZGIxODAzZmI3ODg3MDJkYTMzZjE4
15
+ M2YzM2FkN2M3ZTg5ZGUxMjQ2NDM4ZmQ4ZGFjNTZjZGQ1ZTE4MjM=
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
4
+ gem 'archive-tar-minitar'
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,157 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/timestamp"
5
+ require "stud/interval"
6
+ require "socket" # for Socket.gethostname
7
+
8
+ # Read mail from IMAP servers
9
+ #
10
+ # Periodically scans INBOX and moves any read messages
11
+ # to the trash.
12
+ class LogStash::Inputs::IMAP < LogStash::Inputs::Base
13
+ config_name "imap"
14
+ milestone 1
15
+
16
+ default :codec, "plain"
17
+
18
+ config :host, :validate => :string, :required => true
19
+ config :port, :validate => :number
20
+
21
+ config :user, :validate => :string, :required => true
22
+ config :password, :validate => :password, :required => true
23
+ config :secure, :validate => :boolean, :default => true
24
+ config :verify_cert, :validate => :boolean, :default => true
25
+
26
+ config :fetch_count, :validate => :number, :default => 50
27
+ config :lowercase_headers, :validate => :boolean, :default => true
28
+ config :check_interval, :validate => :number, :default => 300
29
+ config :delete, :validate => :boolean, :default => false
30
+
31
+ # For multipart messages, use the first part that has this
32
+ # content-type as the event message.
33
+ config :content_type, :validate => :string, :default => "text/plain"
34
+
35
+ public
36
+ def register
37
+ require "net/imap" # in stdlib
38
+ require "mail" # gem 'mail'
39
+
40
+ if @secure and not @verify_cert
41
+ @logger.warn("Running IMAP without verifying the certificate may grant attackers unauthorized access to your mailbox or data")
42
+ end
43
+
44
+ if @port.nil?
45
+ if @secure
46
+ @port = 993
47
+ else
48
+ @port = 143
49
+ end
50
+ end
51
+
52
+ @content_type_re = Regexp.new("^" + @content_type)
53
+ end # def register
54
+
55
+ def connect
56
+ sslopt = @secure
57
+ if @secure and not @verify_cert
58
+ sslopt = { :verify_mode => OpenSSL::SSL::VERIFY_NONE }
59
+ end
60
+ imap = Net::IMAP.new(@host, :port => @port, :ssl => sslopt)
61
+ imap.login(@user, @password.value)
62
+ return imap
63
+ end
64
+
65
+ def run(queue)
66
+ Stud.interval(@check_interval) do
67
+ check_mail(queue)
68
+ end
69
+ end
70
+
71
+ def check_mail(queue)
72
+ # TODO(sissel): handle exceptions happening during runtime:
73
+ # EOFError, OpenSSL::SSL::SSLError
74
+ imap = connect
75
+ imap.select("INBOX")
76
+ ids = imap.search("NOT SEEN")
77
+
78
+ ids.each_slice(@fetch_count) do |id_set|
79
+ items = imap.fetch(id_set, "RFC822")
80
+ items.each do |item|
81
+ next unless item.attr.has_key?("RFC822")
82
+ mail = Mail.read_from_string(item.attr["RFC822"])
83
+ queue << parse_mail(mail)
84
+ end
85
+
86
+ imap.store(id_set, '+FLAGS', @delete ? :Deleted : :Seen)
87
+ end
88
+
89
+ imap.close
90
+ imap.disconnect
91
+ end # def run
92
+
93
+ def parse_mail(mail)
94
+ # TODO(sissel): What should a multipart message look like as an event?
95
+ # For now, just take the plain-text part and set it as the message.
96
+ if mail.parts.count == 0
97
+ # No multipart message, just use the body as the event text
98
+ message = mail.body.decoded
99
+ else
100
+ # Multipart message; use the first text/plain part we find
101
+ part = mail.parts.find { |p| p.content_type.match @content_type_re } || mail.parts.first
102
+ message = part.decoded
103
+ end
104
+
105
+ @codec.decode(message) do |event|
106
+ # event = LogStash::Event.new("message" => message)
107
+
108
+ # Use the 'Date' field as the timestamp
109
+ event.timestamp = LogStash::Timestamp.new(mail.date.to_time)
110
+
111
+ # Add fields: Add message.header_fields { |h| h.name=> h.value }
112
+ mail.header_fields.each do |header|
113
+ if @lowercase_headers
114
+ # 'header.name' can sometimes be a Mail::Multibyte::Chars, get it in
115
+ # String form
116
+ name = header.name.to_s.downcase
117
+ else
118
+ name = header.name.to_s
119
+ end
120
+ # Call .decoded on the header in case it's in encoded-word form.
121
+ # Details at:
122
+ # https://github.com/mikel/mail/blob/master/README.md#encodings
123
+ # http://tools.ietf.org/html/rfc2047#section-2
124
+ value = transcode_to_utf8(header.decoded)
125
+
126
+ # Assume we already processed the 'date' above.
127
+ next if name == "Date"
128
+
129
+ case event[name]
130
+ # promote string to array if a header appears multiple times
131
+ # (like 'received')
132
+ when String; event[name] = [event[name], value]
133
+ when Array; event[name] << value
134
+ when nil; event[name] = value
135
+ end
136
+ end # mail.header_fields.each
137
+
138
+ decorate(event)
139
+ event
140
+ end
141
+ end # def handle
142
+
143
+ public
144
+ def teardown
145
+ $stdin.close
146
+ finished
147
+ end # def teardown
148
+
149
+ private
150
+
151
+ # transcode_to_utf8 is meant for headers transcoding.
152
+ # the mail gem will set the correct encoding on header strings decoding
153
+ # and we want to transcode it to utf8
154
+ def transcode_to_utf8(s)
155
+ s.encode(Encoding::UTF_8, :invalid => :replace, :undef => :replace)
156
+ end
157
+ end # class LogStash::Inputs::IMAP
@@ -0,0 +1,30 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-input-imap'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Read mail from IMAP servers"
7
+ s.description = "Read mail from IMAP servers. Periodically scans INBOX and moves any read messages to the trash."
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "input" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ s.add_runtime_dependency 'logstash-codec-plain'
26
+ s.add_runtime_dependency 'mail'
27
+ s.add_runtime_dependency 'stud'
28
+
29
+ end
30
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,93 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+ require "logstash/inputs/imap"
5
+ require "mail"
6
+
7
+ describe LogStash::Inputs::IMAP do
8
+ user = "logstash"
9
+ password = "secret"
10
+ msg_time = Time.new
11
+ msg_text = "foo\nbar\nbaz"
12
+ msg_html = "<p>a paragraph</p>\n\n"
13
+
14
+ subject do
15
+ Mail.new do
16
+ from "me@example.com"
17
+ to "you@example.com"
18
+ subject "logstash imap input test"
19
+ date msg_time
20
+ body msg_text
21
+ add_file :filename => "some.html", :content => msg_html
22
+ end
23
+ end
24
+
25
+ context "with both text and html parts" do
26
+ context "when no content-type selected" do
27
+ it "should select text/plain part" do
28
+ config = {"type" => "imap", "host" => "localhost",
29
+ "user" => "#{user}", "password" => "#{password}"}
30
+
31
+ input = LogStash::Inputs::IMAP.new config
32
+ input.register
33
+ event = input.parse_mail(subject)
34
+ insist { event["message"] } == msg_text
35
+ end
36
+ end
37
+
38
+ context "when text/html content-type selected" do
39
+ it "should select text/html part" do
40
+ config = {"type" => "imap", "host" => "localhost",
41
+ "user" => "#{user}", "password" => "#{password}",
42
+ "content_type" => "text/html"}
43
+
44
+ input = LogStash::Inputs::IMAP.new config
45
+ input.register
46
+ event = input.parse_mail(subject)
47
+ insist { event["message"] } == msg_html
48
+ end
49
+ end
50
+ end
51
+
52
+ context "when subject is in RFC 2047 encoded-word format" do
53
+ it "should be decoded" do
54
+ subject.subject = "=?iso-8859-1?Q?foo_:_bar?="
55
+ config = {"type" => "imap", "host" => "localhost",
56
+ "user" => "#{user}", "password" => "#{password}"}
57
+
58
+ input = LogStash::Inputs::IMAP.new config
59
+ input.register
60
+ event = input.parse_mail(subject)
61
+ insist { event["subject"] } == "foo : bar"
62
+ end
63
+ end
64
+
65
+ context "with multiple values for same header" do
66
+ it "should add 2 values as array in event" do
67
+ subject.received = "test1"
68
+ subject.received = "test2"
69
+
70
+ config = {"type" => "imap", "host" => "localhost",
71
+ "user" => "#{user}", "password" => "#{password}"}
72
+
73
+ input = LogStash::Inputs::IMAP.new config
74
+ input.register
75
+ event = input.parse_mail(subject)
76
+ insist { event["received"] } == ["test1", "test2"]
77
+ end
78
+
79
+ it "should add more than 2 values as array in event" do
80
+ subject.received = "test1"
81
+ subject.received = "test2"
82
+ subject.received = "test3"
83
+
84
+ config = {"type" => "imap", "host" => "localhost",
85
+ "user" => "#{user}", "password" => "#{password}"}
86
+
87
+ input = LogStash::Inputs::IMAP.new config
88
+ input.register
89
+ event = input.parse_mail(subject)
90
+ insist { event["received"] } == ["test1", "test2", "test3"]
91
+ end
92
+ end
93
+ end
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-input-imap
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: logstash-codec-plain
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: mail
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ name: stud
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ! '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ! '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ description: Read mail from IMAP servers. Periodically scans INBOX and moves any read
76
+ messages to the trash.
77
+ email: richard.pijnenburg@elasticsearch.com
78
+ executables: []
79
+ extensions: []
80
+ extra_rdoc_files: []
81
+ files:
82
+ - .gitignore
83
+ - Gemfile
84
+ - Rakefile
85
+ - lib/logstash/inputs/imap.rb
86
+ - logstash-input-imap.gemspec
87
+ - rakelib/publish.rake
88
+ - rakelib/vendor.rake
89
+ - spec/inputs/imap_spec.rb
90
+ homepage: http://logstash.net/
91
+ licenses:
92
+ - Apache License (2.0)
93
+ metadata:
94
+ logstash_plugin: 'true'
95
+ group: input
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ! '>='
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubyforge_project:
112
+ rubygems_version: 2.4.1
113
+ signing_key:
114
+ specification_version: 4
115
+ summary: Read mail from IMAP servers
116
+ test_files:
117
+ - spec/inputs/imap_spec.rb