logstash-output-azure 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +2 -0
- data/CONTRIBUTORS +10 -0
- data/DEVELOPER.md +2 -0
- data/Gemfile +3 -0
- data/LICENSE +11 -0
- data/README.md +87 -0
- data/lib/logstash/outputs/Logstash_Azure_Blob_Output.rb +244 -0
- data/lib/logstash/outputs/blob/file_repository.rb +120 -0
- data/lib/logstash/outputs/blob/path_validator.rb +18 -0
- data/lib/logstash/outputs/blob/size_and_time_rotation_policy.rb +24 -0
- data/lib/logstash/outputs/blob/size_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/blob/temporary_file.rb +71 -0
- data/lib/logstash/outputs/blob/temporary_file_factory.rb +129 -0
- data/lib/logstash/outputs/blob/time_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/blob/uploader.rb +67 -0
- data/lib/logstash/outputs/blob/writable_directory_validator.rb +17 -0
- data/logstash-output-Logstash_Azure_Blob_Output.gemspec +25 -0
- data/spec/outputs/Logstash_Azure_Blob_Output_spec.rb +42 -0
- data/spec/outputs/blob/file_repository_spec.rb +143 -0
- data/spec/outputs/blob/size_and_time_rotation_policy_spec.rb +77 -0
- data/spec/outputs/blob/size_rotation_policy_spec.rb +41 -0
- data/spec/outputs/blob/temporary_file_factory_spec.rb +89 -0
- data/spec/outputs/blob/temporary_file_spec.rb +47 -0
- data/spec/outputs/blob/time_rotation_policy_spec.rb +60 -0
- data/spec/outputs/blob/uploader_spec.rb +65 -0
- data/spec/outputs/blob/writable_directory_validator_spec.rb +40 -0
- data/spec/spec_helper.rb +6 -0
- data/spec/supports/helpers.rb +30 -0
- metadata +140 -0
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class LogstashAzureBlobOutput
|
5
|
+
class PathValidator
|
6
|
+
INVALID_CHARACTERS = "\^`><"
|
7
|
+
|
8
|
+
def self.valid?(name)
|
9
|
+
name.match(matches_re).nil?
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.matches_re
|
13
|
+
/[#{Regexp.escape(INVALID_CHARACTERS)}]/
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/outputs/blob/size_rotation_policy"
|
3
|
+
require "logstash/outputs/blob/time_rotation_policy"
|
4
|
+
|
5
|
+
module LogStash
|
6
|
+
module Outputs
|
7
|
+
class LogstashAzureBlobOutput
|
8
|
+
class SizeAndTimeRotationPolicy
|
9
|
+
def initialize(file_size, time_file)
|
10
|
+
@size_strategy = SizeRotationPolicy.new(file_size)
|
11
|
+
@time_strategy = TimeRotationPolicy.new(time_file)
|
12
|
+
end
|
13
|
+
|
14
|
+
def rotate?(file)
|
15
|
+
@size_strategy.rotate?(file) || @time_strategy.rotate?(file)
|
16
|
+
end
|
17
|
+
|
18
|
+
def needs_periodic?
|
19
|
+
true
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class LogstashAzureBlobOutput
|
5
|
+
class SizeRotationPolicy
|
6
|
+
attr_reader :size_file
|
7
|
+
|
8
|
+
def initialize(size_file)
|
9
|
+
if size_file <= 0
|
10
|
+
raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
|
11
|
+
end
|
12
|
+
|
13
|
+
@size_file = size_file
|
14
|
+
end
|
15
|
+
|
16
|
+
def rotate?(file)
|
17
|
+
file.size >= size_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def needs_periodic?
|
21
|
+
false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "thread"
|
3
|
+
require "forwardable"
|
4
|
+
require "fileutils"
|
5
|
+
|
6
|
+
module LogStash
|
7
|
+
module Outputs
|
8
|
+
class LogstashAzureBlobOutput
|
9
|
+
# Wrap the actual file descriptor into an utility classe
|
10
|
+
# It make it more OOP and easier to reason with the paths.
|
11
|
+
class TemporaryFile
|
12
|
+
extend Forwardable
|
13
|
+
|
14
|
+
def_delegators :@fd, :path, :write, :close, :fsync
|
15
|
+
|
16
|
+
attr_reader :fd
|
17
|
+
|
18
|
+
def initialize(key, fd, temp_path)
|
19
|
+
@fd = fd
|
20
|
+
@key = key
|
21
|
+
@temp_path = temp_path
|
22
|
+
@created_at = Time.now
|
23
|
+
end
|
24
|
+
|
25
|
+
def ctime
|
26
|
+
@created_at
|
27
|
+
end
|
28
|
+
|
29
|
+
def temp_path
|
30
|
+
@temp_path
|
31
|
+
end
|
32
|
+
|
33
|
+
def size
|
34
|
+
# Use the fd size to get the accurate result,
|
35
|
+
# so we dont have to deal with fsync
|
36
|
+
# if the file is close we will use the File::size
|
37
|
+
begin
|
38
|
+
@fd.size
|
39
|
+
rescue IOError
|
40
|
+
::File.size(path)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def key
|
45
|
+
@key.gsub(/^\//, "")
|
46
|
+
end
|
47
|
+
|
48
|
+
# Each temporary file is made inside a directory named with an UUID,
|
49
|
+
# instead of deleting the file directly and having the risk of deleting other files
|
50
|
+
# we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
|
51
|
+
# a sandbox.
|
52
|
+
def delete!
|
53
|
+
@fd.close rescue IOError # force close anyway
|
54
|
+
FileUtils.rm_r(@temp_path, :secure => true)
|
55
|
+
end
|
56
|
+
|
57
|
+
def empty?
|
58
|
+
size == 0
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.create_from_existing_file(file_path, temporary_folder)
|
62
|
+
key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
|
63
|
+
|
64
|
+
TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
|
65
|
+
::File.open(file_path, "r"),
|
66
|
+
::File.join(temporary_folder, key_parts.slice(0, 1)))
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "socket"
|
3
|
+
require "securerandom"
|
4
|
+
require "fileutils"
|
5
|
+
require "zlib"
|
6
|
+
require "forwardable"
|
7
|
+
|
8
|
+
module LogStash
|
9
|
+
module Outputs
|
10
|
+
class LogstashAzureBlobOutput
|
11
|
+
# Since the file can contains dynamic part, we have to handle a more local structure to
|
12
|
+
# allow a nice recovery from a crash.
|
13
|
+
#
|
14
|
+
# The local structure will look like this.
|
15
|
+
#
|
16
|
+
# <TEMPORARY_PATH>/<UUID>/<prefix>/ls.s3.localhost.%Y-%m-%dT%H.%m.tag_es_fb.part1.txt.gz
|
17
|
+
#
|
18
|
+
# Since the UUID should be fairly unique I can destroy the whole path when an upload is complete.
|
19
|
+
# I do not have to mess around to check if the other directory have file in it before destroying them.
|
20
|
+
class TemporaryFileFactory
|
21
|
+
FILE_MODE = "a"
|
22
|
+
GZIP_ENCODING = "gzip"
|
23
|
+
GZIP_EXTENSION = "txt.gz"
|
24
|
+
TXT_EXTENSION = "txt"
|
25
|
+
STRFTIME = "%Y-%m-%dT%H.%M"
|
26
|
+
|
27
|
+
attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
|
28
|
+
|
29
|
+
def initialize(prefix, tags, encoding, temporary_directory)
|
30
|
+
@counter = 0
|
31
|
+
@prefix = prefix
|
32
|
+
|
33
|
+
@tags = tags
|
34
|
+
@encoding = encoding
|
35
|
+
@temporary_directory = temporary_directory
|
36
|
+
@lock = Mutex.new
|
37
|
+
|
38
|
+
rotate!
|
39
|
+
end
|
40
|
+
|
41
|
+
def rotate!
|
42
|
+
@lock.synchronize {
|
43
|
+
@current = new_file
|
44
|
+
increment_counter
|
45
|
+
@current
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
def extension
|
51
|
+
gzip? ? GZIP_EXTENSION : TXT_EXTENSION
|
52
|
+
end
|
53
|
+
|
54
|
+
def gzip?
|
55
|
+
encoding == GZIP_ENCODING
|
56
|
+
end
|
57
|
+
|
58
|
+
def increment_counter
|
59
|
+
@counter += 1
|
60
|
+
end
|
61
|
+
|
62
|
+
def current_time
|
63
|
+
Time.now.strftime(STRFTIME)
|
64
|
+
end
|
65
|
+
|
66
|
+
def generate_name
|
67
|
+
filename = "ls.s3.#{SecureRandom.uuid}.#{current_time}"
|
68
|
+
|
69
|
+
if tags.size > 0
|
70
|
+
"#{filename}.tag_#{tags.join('.')}.part#{counter}.#{extension}"
|
71
|
+
else
|
72
|
+
"#{filename}.part#{counter}.#{extension}"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def new_file
|
77
|
+
uuid = SecureRandom.uuid
|
78
|
+
name = generate_name
|
79
|
+
path = ::File.join(temporary_directory, uuid)
|
80
|
+
key = ::File.join(prefix, name)
|
81
|
+
|
82
|
+
FileUtils.mkdir_p(::File.join(path, prefix))
|
83
|
+
|
84
|
+
io = if gzip?
|
85
|
+
# We have to use this wrapper because we cannot access the size of the
|
86
|
+
# file directly on the gzip writer.
|
87
|
+
IOWrappedGzip.new(::File.open(::File.join(path, key), FILE_MODE))
|
88
|
+
else
|
89
|
+
::File.open(::File.join(path, key), FILE_MODE)
|
90
|
+
end
|
91
|
+
|
92
|
+
TemporaryFile.new(key, io, path)
|
93
|
+
end
|
94
|
+
|
95
|
+
class IOWrappedGzip
|
96
|
+
extend Forwardable
|
97
|
+
|
98
|
+
def_delegators :@gzip_writer, :write, :close
|
99
|
+
attr_reader :file_io, :gzip_writer
|
100
|
+
|
101
|
+
def initialize(file_io)
|
102
|
+
@file_io = file_io
|
103
|
+
@gzip_writer = Zlib::GzipWriter.open(file_io)
|
104
|
+
end
|
105
|
+
|
106
|
+
def path
|
107
|
+
@gzip_writer.to_io.path
|
108
|
+
end
|
109
|
+
|
110
|
+
def size
|
111
|
+
# to get the current file size
|
112
|
+
if @gzip_writer.pos == 0
|
113
|
+
# Ensure a zero file size is returned when nothing has
|
114
|
+
# yet been written to the gzip file.
|
115
|
+
0
|
116
|
+
else
|
117
|
+
@gzip_writer.flush
|
118
|
+
@gzip_writer.to_io.size
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def fsync
|
123
|
+
@gzip_writer.to_io.fsync
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class LogstashAzureBlobOutput
|
5
|
+
class TimeRotationPolicy
|
6
|
+
attr_reader :time_file
|
7
|
+
|
8
|
+
def initialize(time_file)
|
9
|
+
if time_file <= 0
|
10
|
+
raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
|
11
|
+
end
|
12
|
+
|
13
|
+
@time_file = time_file * 60
|
14
|
+
end
|
15
|
+
|
16
|
+
def rotate?(file)
|
17
|
+
file.size > 0 && (Time.now - file.ctime) >= time_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def needs_periodic?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util"
|
3
|
+
require "azure"
|
4
|
+
|
5
|
+
module LogStash
|
6
|
+
module Outputs
|
7
|
+
class LogstashAzureBlobOutput
|
8
|
+
class Uploader
|
9
|
+
TIME_BEFORE_RETRYING_SECONDS = 1
|
10
|
+
DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
|
11
|
+
:min_threads => 1,
|
12
|
+
:max_threads => 8,
|
13
|
+
:max_queue => 1,
|
14
|
+
:fallback_policy => :caller_runs
|
15
|
+
})
|
16
|
+
|
17
|
+
|
18
|
+
attr_reader :storage_account_name, :upload_options, :logger
|
19
|
+
|
20
|
+
def initialize(blob_account, logger, threadpool = DEFAULT_THREADPOOL)
|
21
|
+
@blob_account = blob_account
|
22
|
+
@workers_pool = threadpool
|
23
|
+
@logger = logger
|
24
|
+
end
|
25
|
+
|
26
|
+
def upload_async(file, options = {})
|
27
|
+
@workers_pool.post do
|
28
|
+
LogStash::Util.set_thread_name("LogstashAzureBlobOutput output uploader, file: #{file.path}")
|
29
|
+
upload(file, options)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def upload(file, options = {})
|
34
|
+
upload_options = options.fetch(:upload_options, {})
|
35
|
+
|
36
|
+
begin
|
37
|
+
Azure.config.storage_account_name = ENV['AZURE_STORAGE_ACCOUNT']
|
38
|
+
Azure.config.storage_access_key = ENV['AZURE_STORAGE_ACCESS_KEY']
|
39
|
+
azure_blob_service = Azure::Blob::BlobService.new
|
40
|
+
containers = azure_blob_service.list_containers
|
41
|
+
content = Object::File.open(file.path, "rb").read
|
42
|
+
blob = azure_blob_service.create_block_blob(containers[0].name, "#{file.ctime.iso8601}", content)
|
43
|
+
rescue => e
|
44
|
+
# When we get here it usually mean that LogstashAzureBlobOutput tried to do some retry by himself (default is 3)
|
45
|
+
# When the retry limit is reached or another error happen we will wait and retry.
|
46
|
+
#
|
47
|
+
# Thread might be stuck here, but I think its better than losing anything
|
48
|
+
# its either a transient errors or something bad really happened.
|
49
|
+
logger.error("Uploading failed, retrying", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
|
50
|
+
retry
|
51
|
+
end
|
52
|
+
|
53
|
+
options[:on_complete].call(file) unless options[:on_complete].nil?
|
54
|
+
blob
|
55
|
+
rescue => e
|
56
|
+
logger.error("An error occured in the `on_complete` uploader", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
|
57
|
+
raise e # reraise it since we don't deal with it now
|
58
|
+
end
|
59
|
+
|
60
|
+
def stop
|
61
|
+
@workers_pool.shutdown
|
62
|
+
@workers_pool.wait_for_termination(nil) # block until its done
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class LogstashAzureBlobOutput
|
5
|
+
class WritableDirectoryValidator
|
6
|
+
def self.valid?(path)
|
7
|
+
begin
|
8
|
+
FileUtils.mkdir_p(path) unless Dir.exist?(path)
|
9
|
+
::File.writable?(path)
|
10
|
+
rescue
|
11
|
+
false
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-output-azure'
|
3
|
+
s.version = '0.1.0'
|
4
|
+
s.licenses = ['Apache-2.0']
|
5
|
+
s.summary = 'Plugin for logstash to send output to Microsoft Azure Blob'
|
6
|
+
#s.description = 'TODO: Write a longer description or delete this line.'
|
7
|
+
#s.homepage = 'TODO: Put your plugin''s website or public repo URL here.'
|
8
|
+
s.authors = ['Tuffk']
|
9
|
+
s.email = 'tuffkmulhall@gmail.com'
|
10
|
+
s.require_paths = ['lib']
|
11
|
+
|
12
|
+
# Files
|
13
|
+
s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
|
14
|
+
# Tests
|
15
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
16
|
+
|
17
|
+
# Special flag to let us know this is actually a logstash plugin
|
18
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
|
19
|
+
|
20
|
+
# Gem dependencies
|
21
|
+
s.add_runtime_dependency "logstash-core-plugin-api", "~> 2.0"
|
22
|
+
s.add_runtime_dependency "logstash-codec-plain"
|
23
|
+
s.add_runtime_dependency "azure", "~> 0.7"
|
24
|
+
s.add_development_dependency "logstash-devutils"
|
25
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'logstash/devutils/rspec/spec_helper'
|
4
|
+
require 'logstash/outputs/Logstash_Azure_Blob_Output'
|
5
|
+
require 'logstash/codecs/plain'
|
6
|
+
require 'logstash/event'
|
7
|
+
require 'tmpdir'
|
8
|
+
require 'pry'
|
9
|
+
|
10
|
+
describe LogStash::Outputs::LogstashAzureBlobOutput do
|
11
|
+
let(:config_options) do
|
12
|
+
{
|
13
|
+
storage_account_name: ENV['AZURE_STORAGE_ACCOUNT'],
|
14
|
+
storage_access_key: ENV['AZURE_STORAGE_ACCESS_KEY'],
|
15
|
+
size_file: 5242880,
|
16
|
+
time_file: 15,
|
17
|
+
restore: true,
|
18
|
+
temporary_directory: File.join(Dir.tmpdir, 'logstash'),
|
19
|
+
prefix: '',
|
20
|
+
upload_queue_size: 2 * (Concurrent.processor_count * 0.25).ceil,
|
21
|
+
upload_workers_count: (Concurrent.processor_count * 0.5).ceil,
|
22
|
+
rotation_strategy: 'size_and_time',
|
23
|
+
tags: [],
|
24
|
+
encoding: "none"
|
25
|
+
}
|
26
|
+
end
|
27
|
+
let(:sample_event) { LogStash::Event.new(source: 'alguna', tags: %w[tag1 tag2], fields: { field1: 1, field2: true }) }
|
28
|
+
|
29
|
+
let(:output) { described_class.new() }
|
30
|
+
|
31
|
+
before do
|
32
|
+
output.register
|
33
|
+
end
|
34
|
+
|
35
|
+
describe 'receive message' do
|
36
|
+
subject { output.receive(sample_event) }
|
37
|
+
xit 'should return the blob sent to Azure' do
|
38
|
+
md5 = Digest::MD5.base64digest(sample_event.to_json)
|
39
|
+
expect(subject.properties[:content_md5]).to eq(md5)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|