logstash-output-azure 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +2 -0
- data/CONTRIBUTORS +10 -0
- data/DEVELOPER.md +2 -0
- data/Gemfile +3 -0
- data/LICENSE +11 -0
- data/README.md +87 -0
- data/lib/logstash/outputs/Logstash_Azure_Blob_Output.rb +244 -0
- data/lib/logstash/outputs/blob/file_repository.rb +120 -0
- data/lib/logstash/outputs/blob/path_validator.rb +18 -0
- data/lib/logstash/outputs/blob/size_and_time_rotation_policy.rb +24 -0
- data/lib/logstash/outputs/blob/size_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/blob/temporary_file.rb +71 -0
- data/lib/logstash/outputs/blob/temporary_file_factory.rb +129 -0
- data/lib/logstash/outputs/blob/time_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/blob/uploader.rb +67 -0
- data/lib/logstash/outputs/blob/writable_directory_validator.rb +17 -0
- data/logstash-output-Logstash_Azure_Blob_Output.gemspec +25 -0
- data/spec/outputs/Logstash_Azure_Blob_Output_spec.rb +42 -0
- data/spec/outputs/blob/file_repository_spec.rb +143 -0
- data/spec/outputs/blob/size_and_time_rotation_policy_spec.rb +77 -0
- data/spec/outputs/blob/size_rotation_policy_spec.rb +41 -0
- data/spec/outputs/blob/temporary_file_factory_spec.rb +89 -0
- data/spec/outputs/blob/temporary_file_spec.rb +47 -0
- data/spec/outputs/blob/time_rotation_policy_spec.rb +60 -0
- data/spec/outputs/blob/uploader_spec.rb +65 -0
- data/spec/outputs/blob/writable_directory_validator_spec.rb +40 -0
- data/spec/spec_helper.rb +6 -0
- data/spec/supports/helpers.rb +30 -0
- metadata +140 -0
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class LogstashAzureBlobOutput
|
5
|
+
class PathValidator
|
6
|
+
INVALID_CHARACTERS = "\^`><"
|
7
|
+
|
8
|
+
def self.valid?(name)
|
9
|
+
name.match(matches_re).nil?
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.matches_re
|
13
|
+
/[#{Regexp.escape(INVALID_CHARACTERS)}]/
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/outputs/blob/size_rotation_policy"
|
3
|
+
require "logstash/outputs/blob/time_rotation_policy"
|
4
|
+
|
5
|
+
module LogStash
|
6
|
+
module Outputs
|
7
|
+
class LogstashAzureBlobOutput
|
8
|
+
class SizeAndTimeRotationPolicy
|
9
|
+
def initialize(file_size, time_file)
|
10
|
+
@size_strategy = SizeRotationPolicy.new(file_size)
|
11
|
+
@time_strategy = TimeRotationPolicy.new(time_file)
|
12
|
+
end
|
13
|
+
|
14
|
+
def rotate?(file)
|
15
|
+
@size_strategy.rotate?(file) || @time_strategy.rotate?(file)
|
16
|
+
end
|
17
|
+
|
18
|
+
def needs_periodic?
|
19
|
+
true
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class LogstashAzureBlobOutput
|
5
|
+
class SizeRotationPolicy
|
6
|
+
attr_reader :size_file
|
7
|
+
|
8
|
+
def initialize(size_file)
|
9
|
+
if size_file <= 0
|
10
|
+
raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
|
11
|
+
end
|
12
|
+
|
13
|
+
@size_file = size_file
|
14
|
+
end
|
15
|
+
|
16
|
+
def rotate?(file)
|
17
|
+
file.size >= size_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def needs_periodic?
|
21
|
+
false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "thread"
|
3
|
+
require "forwardable"
|
4
|
+
require "fileutils"
|
5
|
+
|
6
|
+
module LogStash
|
7
|
+
module Outputs
|
8
|
+
class LogstashAzureBlobOutput
|
9
|
+
# Wrap the actual file descriptor into an utility classe
|
10
|
+
# It make it more OOP and easier to reason with the paths.
|
11
|
+
class TemporaryFile
|
12
|
+
extend Forwardable
|
13
|
+
|
14
|
+
def_delegators :@fd, :path, :write, :close, :fsync
|
15
|
+
|
16
|
+
attr_reader :fd
|
17
|
+
|
18
|
+
def initialize(key, fd, temp_path)
|
19
|
+
@fd = fd
|
20
|
+
@key = key
|
21
|
+
@temp_path = temp_path
|
22
|
+
@created_at = Time.now
|
23
|
+
end
|
24
|
+
|
25
|
+
def ctime
|
26
|
+
@created_at
|
27
|
+
end
|
28
|
+
|
29
|
+
def temp_path
|
30
|
+
@temp_path
|
31
|
+
end
|
32
|
+
|
33
|
+
def size
|
34
|
+
# Use the fd size to get the accurate result,
|
35
|
+
# so we dont have to deal with fsync
|
36
|
+
# if the file is close we will use the File::size
|
37
|
+
begin
|
38
|
+
@fd.size
|
39
|
+
rescue IOError
|
40
|
+
::File.size(path)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def key
|
45
|
+
@key.gsub(/^\//, "")
|
46
|
+
end
|
47
|
+
|
48
|
+
# Each temporary file is made inside a directory named with an UUID,
|
49
|
+
# instead of deleting the file directly and having the risk of deleting other files
|
50
|
+
# we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
|
51
|
+
# a sandbox.
|
52
|
+
def delete!
|
53
|
+
@fd.close rescue IOError # force close anyway
|
54
|
+
FileUtils.rm_r(@temp_path, :secure => true)
|
55
|
+
end
|
56
|
+
|
57
|
+
def empty?
|
58
|
+
size == 0
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.create_from_existing_file(file_path, temporary_folder)
|
62
|
+
key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
|
63
|
+
|
64
|
+
TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
|
65
|
+
::File.open(file_path, "r"),
|
66
|
+
::File.join(temporary_folder, key_parts.slice(0, 1)))
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "socket"
|
3
|
+
require "securerandom"
|
4
|
+
require "fileutils"
|
5
|
+
require "zlib"
|
6
|
+
require "forwardable"
|
7
|
+
|
8
|
+
module LogStash
|
9
|
+
module Outputs
|
10
|
+
class LogstashAzureBlobOutput
|
11
|
+
# Since the file can contains dynamic part, we have to handle a more local structure to
|
12
|
+
# allow a nice recovery from a crash.
|
13
|
+
#
|
14
|
+
# The local structure will look like this.
|
15
|
+
#
|
16
|
+
# <TEMPORARY_PATH>/<UUID>/<prefix>/ls.s3.localhost.%Y-%m-%dT%H.%m.tag_es_fb.part1.txt.gz
|
17
|
+
#
|
18
|
+
# Since the UUID should be fairly unique I can destroy the whole path when an upload is complete.
|
19
|
+
# I do not have to mess around to check if the other directory have file in it before destroying them.
|
20
|
+
class TemporaryFileFactory
|
21
|
+
FILE_MODE = "a"
|
22
|
+
GZIP_ENCODING = "gzip"
|
23
|
+
GZIP_EXTENSION = "txt.gz"
|
24
|
+
TXT_EXTENSION = "txt"
|
25
|
+
STRFTIME = "%Y-%m-%dT%H.%M"
|
26
|
+
|
27
|
+
attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
|
28
|
+
|
29
|
+
def initialize(prefix, tags, encoding, temporary_directory)
|
30
|
+
@counter = 0
|
31
|
+
@prefix = prefix
|
32
|
+
|
33
|
+
@tags = tags
|
34
|
+
@encoding = encoding
|
35
|
+
@temporary_directory = temporary_directory
|
36
|
+
@lock = Mutex.new
|
37
|
+
|
38
|
+
rotate!
|
39
|
+
end
|
40
|
+
|
41
|
+
def rotate!
|
42
|
+
@lock.synchronize {
|
43
|
+
@current = new_file
|
44
|
+
increment_counter
|
45
|
+
@current
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
def extension
|
51
|
+
gzip? ? GZIP_EXTENSION : TXT_EXTENSION
|
52
|
+
end
|
53
|
+
|
54
|
+
def gzip?
|
55
|
+
encoding == GZIP_ENCODING
|
56
|
+
end
|
57
|
+
|
58
|
+
def increment_counter
|
59
|
+
@counter += 1
|
60
|
+
end
|
61
|
+
|
62
|
+
def current_time
|
63
|
+
Time.now.strftime(STRFTIME)
|
64
|
+
end
|
65
|
+
|
66
|
+
def generate_name
|
67
|
+
filename = "ls.s3.#{SecureRandom.uuid}.#{current_time}"
|
68
|
+
|
69
|
+
if tags.size > 0
|
70
|
+
"#{filename}.tag_#{tags.join('.')}.part#{counter}.#{extension}"
|
71
|
+
else
|
72
|
+
"#{filename}.part#{counter}.#{extension}"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def new_file
|
77
|
+
uuid = SecureRandom.uuid
|
78
|
+
name = generate_name
|
79
|
+
path = ::File.join(temporary_directory, uuid)
|
80
|
+
key = ::File.join(prefix, name)
|
81
|
+
|
82
|
+
FileUtils.mkdir_p(::File.join(path, prefix))
|
83
|
+
|
84
|
+
io = if gzip?
|
85
|
+
# We have to use this wrapper because we cannot access the size of the
|
86
|
+
# file directly on the gzip writer.
|
87
|
+
IOWrappedGzip.new(::File.open(::File.join(path, key), FILE_MODE))
|
88
|
+
else
|
89
|
+
::File.open(::File.join(path, key), FILE_MODE)
|
90
|
+
end
|
91
|
+
|
92
|
+
TemporaryFile.new(key, io, path)
|
93
|
+
end
|
94
|
+
|
95
|
+
class IOWrappedGzip
|
96
|
+
extend Forwardable
|
97
|
+
|
98
|
+
def_delegators :@gzip_writer, :write, :close
|
99
|
+
attr_reader :file_io, :gzip_writer
|
100
|
+
|
101
|
+
def initialize(file_io)
|
102
|
+
@file_io = file_io
|
103
|
+
@gzip_writer = Zlib::GzipWriter.open(file_io)
|
104
|
+
end
|
105
|
+
|
106
|
+
def path
|
107
|
+
@gzip_writer.to_io.path
|
108
|
+
end
|
109
|
+
|
110
|
+
def size
|
111
|
+
# to get the current file size
|
112
|
+
if @gzip_writer.pos == 0
|
113
|
+
# Ensure a zero file size is returned when nothing has
|
114
|
+
# yet been written to the gzip file.
|
115
|
+
0
|
116
|
+
else
|
117
|
+
@gzip_writer.flush
|
118
|
+
@gzip_writer.to_io.size
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def fsync
|
123
|
+
@gzip_writer.to_io.fsync
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class LogstashAzureBlobOutput
|
5
|
+
class TimeRotationPolicy
|
6
|
+
attr_reader :time_file
|
7
|
+
|
8
|
+
def initialize(time_file)
|
9
|
+
if time_file <= 0
|
10
|
+
raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
|
11
|
+
end
|
12
|
+
|
13
|
+
@time_file = time_file * 60
|
14
|
+
end
|
15
|
+
|
16
|
+
def rotate?(file)
|
17
|
+
file.size > 0 && (Time.now - file.ctime) >= time_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def needs_periodic?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util"
|
3
|
+
require "azure"
|
4
|
+
|
5
|
+
module LogStash
|
6
|
+
module Outputs
|
7
|
+
class LogstashAzureBlobOutput
|
8
|
+
class Uploader
|
9
|
+
TIME_BEFORE_RETRYING_SECONDS = 1
|
10
|
+
DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
|
11
|
+
:min_threads => 1,
|
12
|
+
:max_threads => 8,
|
13
|
+
:max_queue => 1,
|
14
|
+
:fallback_policy => :caller_runs
|
15
|
+
})
|
16
|
+
|
17
|
+
|
18
|
+
attr_reader :storage_account_name, :upload_options, :logger
|
19
|
+
|
20
|
+
def initialize(blob_account, logger, threadpool = DEFAULT_THREADPOOL)
|
21
|
+
@blob_account = blob_account
|
22
|
+
@workers_pool = threadpool
|
23
|
+
@logger = logger
|
24
|
+
end
|
25
|
+
|
26
|
+
def upload_async(file, options = {})
|
27
|
+
@workers_pool.post do
|
28
|
+
LogStash::Util.set_thread_name("LogstashAzureBlobOutput output uploader, file: #{file.path}")
|
29
|
+
upload(file, options)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def upload(file, options = {})
|
34
|
+
upload_options = options.fetch(:upload_options, {})
|
35
|
+
|
36
|
+
begin
|
37
|
+
Azure.config.storage_account_name = ENV['AZURE_STORAGE_ACCOUNT']
|
38
|
+
Azure.config.storage_access_key = ENV['AZURE_STORAGE_ACCESS_KEY']
|
39
|
+
azure_blob_service = Azure::Blob::BlobService.new
|
40
|
+
containers = azure_blob_service.list_containers
|
41
|
+
content = Object::File.open(file.path, "rb").read
|
42
|
+
blob = azure_blob_service.create_block_blob(containers[0].name, "#{file.ctime.iso8601}", content)
|
43
|
+
rescue => e
|
44
|
+
# When we get here it usually mean that LogstashAzureBlobOutput tried to do some retry by himself (default is 3)
|
45
|
+
# When the retry limit is reached or another error happen we will wait and retry.
|
46
|
+
#
|
47
|
+
# Thread might be stuck here, but I think its better than losing anything
|
48
|
+
# its either a transient errors or something bad really happened.
|
49
|
+
logger.error("Uploading failed, retrying", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
|
50
|
+
retry
|
51
|
+
end
|
52
|
+
|
53
|
+
options[:on_complete].call(file) unless options[:on_complete].nil?
|
54
|
+
blob
|
55
|
+
rescue => e
|
56
|
+
logger.error("An error occured in the `on_complete` uploader", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
|
57
|
+
raise e # reraise it since we don't deal with it now
|
58
|
+
end
|
59
|
+
|
60
|
+
def stop
|
61
|
+
@workers_pool.shutdown
|
62
|
+
@workers_pool.wait_for_termination(nil) # block until its done
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class LogstashAzureBlobOutput
|
5
|
+
class WritableDirectoryValidator
|
6
|
+
def self.valid?(path)
|
7
|
+
begin
|
8
|
+
FileUtils.mkdir_p(path) unless Dir.exist?(path)
|
9
|
+
::File.writable?(path)
|
10
|
+
rescue
|
11
|
+
false
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-output-azure'
|
3
|
+
s.version = '0.1.0'
|
4
|
+
s.licenses = ['Apache-2.0']
|
5
|
+
s.summary = 'Plugin for logstash to send output to Microsoft Azure Blob'
|
6
|
+
#s.description = 'TODO: Write a longer description or delete this line.'
|
7
|
+
#s.homepage = 'TODO: Put your plugin''s website or public repo URL here.'
|
8
|
+
s.authors = ['Tuffk']
|
9
|
+
s.email = 'tuffkmulhall@gmail.com'
|
10
|
+
s.require_paths = ['lib']
|
11
|
+
|
12
|
+
# Files
|
13
|
+
s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
|
14
|
+
# Tests
|
15
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
16
|
+
|
17
|
+
# Special flag to let us know this is actually a logstash plugin
|
18
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
|
19
|
+
|
20
|
+
# Gem dependencies
|
21
|
+
s.add_runtime_dependency "logstash-core-plugin-api", "~> 2.0"
|
22
|
+
s.add_runtime_dependency "logstash-codec-plain"
|
23
|
+
s.add_runtime_dependency "azure", "~> 0.7"
|
24
|
+
s.add_development_dependency "logstash-devutils"
|
25
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'logstash/devutils/rspec/spec_helper'
|
4
|
+
require 'logstash/outputs/Logstash_Azure_Blob_Output'
|
5
|
+
require 'logstash/codecs/plain'
|
6
|
+
require 'logstash/event'
|
7
|
+
require 'tmpdir'
|
8
|
+
require 'pry'
|
9
|
+
|
10
|
+
describe LogStash::Outputs::LogstashAzureBlobOutput do
|
11
|
+
let(:config_options) do
|
12
|
+
{
|
13
|
+
storage_account_name: ENV['AZURE_STORAGE_ACCOUNT'],
|
14
|
+
storage_access_key: ENV['AZURE_STORAGE_ACCESS_KEY'],
|
15
|
+
size_file: 5242880,
|
16
|
+
time_file: 15,
|
17
|
+
restore: true,
|
18
|
+
temporary_directory: File.join(Dir.tmpdir, 'logstash'),
|
19
|
+
prefix: '',
|
20
|
+
upload_queue_size: 2 * (Concurrent.processor_count * 0.25).ceil,
|
21
|
+
upload_workers_count: (Concurrent.processor_count * 0.5).ceil,
|
22
|
+
rotation_strategy: 'size_and_time',
|
23
|
+
tags: [],
|
24
|
+
encoding: "none"
|
25
|
+
}
|
26
|
+
end
|
27
|
+
let(:sample_event) { LogStash::Event.new(source: 'alguna', tags: %w[tag1 tag2], fields: { field1: 1, field2: true }) }
|
28
|
+
|
29
|
+
let(:output) { described_class.new() }
|
30
|
+
|
31
|
+
before do
|
32
|
+
output.register
|
33
|
+
end
|
34
|
+
|
35
|
+
describe 'receive message' do
|
36
|
+
subject { output.receive(sample_event) }
|
37
|
+
xit 'should return the blob sent to Azure' do
|
38
|
+
md5 = Digest::MD5.base64digest(sample_event.to_json)
|
39
|
+
expect(subject.properties[:content_md5]).to eq(md5)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|