logstash-output-azure 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class LogstashAzureBlobOutput
5
+ class PathValidator
6
+ INVALID_CHARACTERS = "\^`><"
7
+
8
+ def self.valid?(name)
9
+ name.match(matches_re).nil?
10
+ end
11
+
12
+ def self.matches_re
13
+ /[#{Regexp.escape(INVALID_CHARACTERS)}]/
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/blob/size_rotation_policy"
3
+ require "logstash/outputs/blob/time_rotation_policy"
4
+
5
+ module LogStash
6
+ module Outputs
7
+ class LogstashAzureBlobOutput
8
+ class SizeAndTimeRotationPolicy
9
+ def initialize(file_size, time_file)
10
+ @size_strategy = SizeRotationPolicy.new(file_size)
11
+ @time_strategy = TimeRotationPolicy.new(time_file)
12
+ end
13
+
14
+ def rotate?(file)
15
+ @size_strategy.rotate?(file) || @time_strategy.rotate?(file)
16
+ end
17
+
18
+ def needs_periodic?
19
+ true
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class LogstashAzureBlobOutput
5
+ class SizeRotationPolicy
6
+ attr_reader :size_file
7
+
8
+ def initialize(size_file)
9
+ if size_file <= 0
10
+ raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
11
+ end
12
+
13
+ @size_file = size_file
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size >= size_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ false
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+ require "thread"
3
+ require "forwardable"
4
+ require "fileutils"
5
+
6
+ module LogStash
7
+ module Outputs
8
+ class LogstashAzureBlobOutput
9
+ # Wrap the actual file descriptor into an utility classe
10
+ # It make it more OOP and easier to reason with the paths.
11
+ class TemporaryFile
12
+ extend Forwardable
13
+
14
+ def_delegators :@fd, :path, :write, :close, :fsync
15
+
16
+ attr_reader :fd
17
+
18
+ def initialize(key, fd, temp_path)
19
+ @fd = fd
20
+ @key = key
21
+ @temp_path = temp_path
22
+ @created_at = Time.now
23
+ end
24
+
25
+ def ctime
26
+ @created_at
27
+ end
28
+
29
+ def temp_path
30
+ @temp_path
31
+ end
32
+
33
+ def size
34
+ # Use the fd size to get the accurate result,
35
+ # so we dont have to deal with fsync
36
+ # if the file is close we will use the File::size
37
+ begin
38
+ @fd.size
39
+ rescue IOError
40
+ ::File.size(path)
41
+ end
42
+ end
43
+
44
+ def key
45
+ @key.gsub(/^\//, "")
46
+ end
47
+
48
+ # Each temporary file is made inside a directory named with an UUID,
49
+ # instead of deleting the file directly and having the risk of deleting other files
50
+ # we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
51
+ # a sandbox.
52
+ def delete!
53
+ @fd.close rescue IOError # force close anyway
54
+ FileUtils.rm_r(@temp_path, :secure => true)
55
+ end
56
+
57
+ def empty?
58
+ size == 0
59
+ end
60
+
61
+ def self.create_from_existing_file(file_path, temporary_folder)
62
+ key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
63
+
64
+ TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
65
+ ::File.open(file_path, "r"),
66
+ ::File.join(temporary_folder, key_parts.slice(0, 1)))
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,129 @@
1
+ # encoding: utf-8
2
+ require "socket"
3
+ require "securerandom"
4
+ require "fileutils"
5
+ require "zlib"
6
+ require "forwardable"
7
+
8
+ module LogStash
9
+ module Outputs
10
+ class LogstashAzureBlobOutput
11
+ # Since the file can contains dynamic part, we have to handle a more local structure to
12
+ # allow a nice recovery from a crash.
13
+ #
14
+ # The local structure will look like this.
15
+ #
16
+ # <TEMPORARY_PATH>/<UUID>/<prefix>/ls.s3.localhost.%Y-%m-%dT%H.%m.tag_es_fb.part1.txt.gz
17
+ #
18
+ # Since the UUID should be fairly unique I can destroy the whole path when an upload is complete.
19
+ # I do not have to mess around to check if the other directory have file in it before destroying them.
20
+ class TemporaryFileFactory
21
+ FILE_MODE = "a"
22
+ GZIP_ENCODING = "gzip"
23
+ GZIP_EXTENSION = "txt.gz"
24
+ TXT_EXTENSION = "txt"
25
+ STRFTIME = "%Y-%m-%dT%H.%M"
26
+
27
+ attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
28
+
29
+ def initialize(prefix, tags, encoding, temporary_directory)
30
+ @counter = 0
31
+ @prefix = prefix
32
+
33
+ @tags = tags
34
+ @encoding = encoding
35
+ @temporary_directory = temporary_directory
36
+ @lock = Mutex.new
37
+
38
+ rotate!
39
+ end
40
+
41
+ def rotate!
42
+ @lock.synchronize {
43
+ @current = new_file
44
+ increment_counter
45
+ @current
46
+ }
47
+ end
48
+
49
+ private
50
+ def extension
51
+ gzip? ? GZIP_EXTENSION : TXT_EXTENSION
52
+ end
53
+
54
+ def gzip?
55
+ encoding == GZIP_ENCODING
56
+ end
57
+
58
+ def increment_counter
59
+ @counter += 1
60
+ end
61
+
62
+ def current_time
63
+ Time.now.strftime(STRFTIME)
64
+ end
65
+
66
+ def generate_name
67
+ filename = "ls.s3.#{SecureRandom.uuid}.#{current_time}"
68
+
69
+ if tags.size > 0
70
+ "#{filename}.tag_#{tags.join('.')}.part#{counter}.#{extension}"
71
+ else
72
+ "#{filename}.part#{counter}.#{extension}"
73
+ end
74
+ end
75
+
76
+ def new_file
77
+ uuid = SecureRandom.uuid
78
+ name = generate_name
79
+ path = ::File.join(temporary_directory, uuid)
80
+ key = ::File.join(prefix, name)
81
+
82
+ FileUtils.mkdir_p(::File.join(path, prefix))
83
+
84
+ io = if gzip?
85
+ # We have to use this wrapper because we cannot access the size of the
86
+ # file directly on the gzip writer.
87
+ IOWrappedGzip.new(::File.open(::File.join(path, key), FILE_MODE))
88
+ else
89
+ ::File.open(::File.join(path, key), FILE_MODE)
90
+ end
91
+
92
+ TemporaryFile.new(key, io, path)
93
+ end
94
+
95
+ class IOWrappedGzip
96
+ extend Forwardable
97
+
98
+ def_delegators :@gzip_writer, :write, :close
99
+ attr_reader :file_io, :gzip_writer
100
+
101
+ def initialize(file_io)
102
+ @file_io = file_io
103
+ @gzip_writer = Zlib::GzipWriter.open(file_io)
104
+ end
105
+
106
+ def path
107
+ @gzip_writer.to_io.path
108
+ end
109
+
110
+ def size
111
+ # to get the current file size
112
+ if @gzip_writer.pos == 0
113
+ # Ensure a zero file size is returned when nothing has
114
+ # yet been written to the gzip file.
115
+ 0
116
+ else
117
+ @gzip_writer.flush
118
+ @gzip_writer.to_io.size
119
+ end
120
+ end
121
+
122
+ def fsync
123
+ @gzip_writer.to_io.fsync
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class LogstashAzureBlobOutput
5
+ class TimeRotationPolicy
6
+ attr_reader :time_file
7
+
8
+ def initialize(time_file)
9
+ if time_file <= 0
10
+ raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
11
+ end
12
+
13
+ @time_file = time_file * 60
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size > 0 && (Time.now - file.ctime) >= time_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ true
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,67 @@
1
+ # encoding: utf-8
2
+ require "logstash/util"
3
+ require "azure"
4
+
5
+ module LogStash
6
+ module Outputs
7
+ class LogstashAzureBlobOutput
8
+ class Uploader
9
+ TIME_BEFORE_RETRYING_SECONDS = 1
10
+ DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
11
+ :min_threads => 1,
12
+ :max_threads => 8,
13
+ :max_queue => 1,
14
+ :fallback_policy => :caller_runs
15
+ })
16
+
17
+
18
+ attr_reader :storage_account_name, :upload_options, :logger
19
+
20
+ def initialize(blob_account, logger, threadpool = DEFAULT_THREADPOOL)
21
+ @blob_account = blob_account
22
+ @workers_pool = threadpool
23
+ @logger = logger
24
+ end
25
+
26
+ def upload_async(file, options = {})
27
+ @workers_pool.post do
28
+ LogStash::Util.set_thread_name("LogstashAzureBlobOutput output uploader, file: #{file.path}")
29
+ upload(file, options)
30
+ end
31
+ end
32
+
33
+ def upload(file, options = {})
34
+ upload_options = options.fetch(:upload_options, {})
35
+
36
+ begin
37
+ Azure.config.storage_account_name = ENV['AZURE_STORAGE_ACCOUNT']
38
+ Azure.config.storage_access_key = ENV['AZURE_STORAGE_ACCESS_KEY']
39
+ azure_blob_service = Azure::Blob::BlobService.new
40
+ containers = azure_blob_service.list_containers
41
+ content = Object::File.open(file.path, "rb").read
42
+ blob = azure_blob_service.create_block_blob(containers[0].name, "#{file.ctime.iso8601}", content)
43
+ rescue => e
44
+ # When we get here it usually mean that LogstashAzureBlobOutput tried to do some retry by himself (default is 3)
45
+ # When the retry limit is reached or another error happen we will wait and retry.
46
+ #
47
+ # Thread might be stuck here, but I think its better than losing anything
48
+ # its either a transient errors or something bad really happened.
49
+ logger.error("Uploading failed, retrying", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
50
+ retry
51
+ end
52
+
53
+ options[:on_complete].call(file) unless options[:on_complete].nil?
54
+ blob
55
+ rescue => e
56
+ logger.error("An error occured in the `on_complete` uploader", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
57
+ raise e # reraise it since we don't deal with it now
58
+ end
59
+
60
+ def stop
61
+ @workers_pool.shutdown
62
+ @workers_pool.wait_for_termination(nil) # block until its done
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class LogstashAzureBlobOutput
5
+ class WritableDirectoryValidator
6
+ def self.valid?(path)
7
+ begin
8
+ FileUtils.mkdir_p(path) unless Dir.exist?(path)
9
+ ::File.writable?(path)
10
+ rescue
11
+ false
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-output-azure'
3
+ s.version = '0.1.0'
4
+ s.licenses = ['Apache-2.0']
5
+ s.summary = 'Plugin for logstash to send output to Microsoft Azure Blob'
6
+ #s.description = 'TODO: Write a longer description or delete this line.'
7
+ #s.homepage = 'TODO: Put your plugin''s website or public repo URL here.'
8
+ s.authors = ['Tuffk']
9
+ s.email = 'tuffkmulhall@gmail.com'
10
+ s.require_paths = ['lib']
11
+
12
+ # Files
13
+ s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
14
+ # Tests
15
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
16
+
17
+ # Special flag to let us know this is actually a logstash plugin
18
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
19
+
20
+ # Gem dependencies
21
+ s.add_runtime_dependency "logstash-core-plugin-api", "~> 2.0"
22
+ s.add_runtime_dependency "logstash-codec-plain"
23
+ s.add_runtime_dependency "azure", "~> 0.7"
24
+ s.add_development_dependency "logstash-devutils"
25
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ require 'logstash/devutils/rspec/spec_helper'
4
+ require 'logstash/outputs/Logstash_Azure_Blob_Output'
5
+ require 'logstash/codecs/plain'
6
+ require 'logstash/event'
7
+ require 'tmpdir'
8
+ require 'pry'
9
+
10
+ describe LogStash::Outputs::LogstashAzureBlobOutput do
11
+ let(:config_options) do
12
+ {
13
+ storage_account_name: ENV['AZURE_STORAGE_ACCOUNT'],
14
+ storage_access_key: ENV['AZURE_STORAGE_ACCESS_KEY'],
15
+ size_file: 5242880,
16
+ time_file: 15,
17
+ restore: true,
18
+ temporary_directory: File.join(Dir.tmpdir, 'logstash'),
19
+ prefix: '',
20
+ upload_queue_size: 2 * (Concurrent.processor_count * 0.25).ceil,
21
+ upload_workers_count: (Concurrent.processor_count * 0.5).ceil,
22
+ rotation_strategy: 'size_and_time',
23
+ tags: [],
24
+ encoding: "none"
25
+ }
26
+ end
27
+ let(:sample_event) { LogStash::Event.new(source: 'alguna', tags: %w[tag1 tag2], fields: { field1: 1, field2: true }) }
28
+
29
+ let(:output) { described_class.new() }
30
+
31
+ before do
32
+ output.register
33
+ end
34
+
35
+ describe 'receive message' do
36
+ subject { output.receive(sample_event) }
37
+ xit 'should return the blob sent to Azure' do
38
+ md5 = Digest::MD5.base64digest(sample_event.to_json)
39
+ expect(subject.properties[:content_md5]).to eq(md5)
40
+ end
41
+ end
42
+ end