logstash-output-adls 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +2 -0
- data/LICENSE +13 -0
- data/NOTICE.TXT +4 -0
- data/README.md +119 -0
- data/lib/com/fasterxml/jackson/core/jackson-core/2.7.4/jackson-core-2.7.4.jar +0 -0
- data/lib/com/microsoft/azure/azure-data-lake-store-sdk/2.1.1/azure-data-lake-store-sdk-2.1.1.jar +0 -0
- data/lib/log4j/log4j/1.2.17/log4j-1.2.17.jar +0 -0
- data/lib/logstash-output-adls_jars.rb +18 -0
- data/lib/logstash/outputs/adls.rb +239 -0
- data/lib/org/slf4j/slf4j-api/1.7.21/slf4j-api-1.7.21.jar +0 -0
- data/lib/org/slf4j/slf4j-log4j12/1.7.21/slf4j-log4j12-1.7.21.jar +0 -0
- data/logstash-output-adls.gemspec +37 -0
- data/spec/integration/adls_spec.rb +79 -0
- data/spec/outputs/adls_spec.rb +59 -0
- data/vendor/jar-dependencies/runtime-jars/azure-data-lake-store-sdk-2.1.1.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/jackson-core-2.7.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/log4j-1.2.17.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/slf4j-api-1.7.21.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/slf4j-log4j12-1.7.21.jar +0 -0
- metadata +158 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2e2df6f39f6459b1e43ffdb634da8179e0f1437a
|
4
|
+
data.tar.gz: 0c02a2b820c60277efba33a3bf7379d46fca5c87
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ba9598c0b5a37907bc817a986724a11e93d0316763d3c62804c1048b01929459773b8adbd6dede821811604c2c684822232b98de8b26bf9c0a545848b25a529e
|
7
|
+
data.tar.gz: 79bb890474237a38d94c6c0957d14b56cb9f7ba751c99b20a4f8020bae7262d789e3f550f7c6dc3f990d552f6fe72a2bf5c6022608d7aba9f5d83dc85847c2f3
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2017 NOS Inovacao <http://www.nos.pt>
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/NOTICE.TXT
ADDED
data/README.md
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
# Azure Data Lake Store Output Logstash Plugin
|
2
|
+
|
3
|
+
This is a Azure Data Laka Store Output Plugin for [Logstash](https://github.com/elastic/logstash).
|
4
|
+
|
5
|
+
This plugin uses the official [Microsoft Data Lake Store Java SDK](https://github.com/Azure/azure-data-lake-store-java) with their custom [AzureDataLakeFilesystem - ADL](https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-overview#what-is-azure-data-lake-store-file-system-adl) protocol, which Microsoft claims is more efficient than WebHDFS.
|
6
|
+
|
7
|
+
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
8
|
+
|
9
|
+
## Installing
|
10
|
+
|
11
|
+
TBD
|
12
|
+
|
13
|
+
### Manual installation on a already deployed Logstash:
|
14
|
+
|
15
|
+
You can install the plugin on a already deployed Logstash and avoid messing with the Logstash Gemfile.
|
16
|
+
Please note that in this mode you can't alter the plugin source (logstash-output-adls/lib/logstash/outputs/adls.rb) without rebuilding the gem, which makes sense for a production deploy.
|
17
|
+
|
18
|
+
Note that you'll need an already built gem file from the previous step.
|
19
|
+
|
20
|
+
- Copy logstash-output-adls-x.x.x.gem to your remote Logstash.
|
21
|
+
- Install the package on your remote Logstash:
|
22
|
+
|
23
|
+
```sh
|
24
|
+
bin/logstash-plugin install /your/local/logstash-output-adls-x.x.x.gem
|
25
|
+
```
|
26
|
+
|
27
|
+
## Configuration
|
28
|
+
|
29
|
+
### Configuration example:
|
30
|
+
|
31
|
+
```
|
32
|
+
input {
|
33
|
+
...
|
34
|
+
}
|
35
|
+
filter {
|
36
|
+
...
|
37
|
+
}
|
38
|
+
output {
|
39
|
+
adls {
|
40
|
+
adls_fqdn => "XXXXXXXXXXX.azuredatalakestore.net" # (required)
|
41
|
+
adls_token_endpoint => "https://login.microsoftonline.com/XXXXXXXXXX/oauth2/token" # (required)
|
42
|
+
adls_client_id => "00000000-0000-0000-0000-000000000000" # (required)
|
43
|
+
adls_client_key => "XXXXXXXXXXXXXXXXXXXXXX" # (required)
|
44
|
+
path => "/logstash/%{+YYYY}/%{+MM}/%{+dd}/logstash-%{+HH}-%{[@metadata][cid]}.log" # (required)
|
45
|
+
line_separator => "\n" # (optional, default: "\n")
|
46
|
+
created_files_permission => 755 # (optional, default: 755)
|
47
|
+
adls_token_expire_security_margin => 300 # (optional, default: 300)
|
48
|
+
single_file_per_thread => true # (optional, default: true)
|
49
|
+
retry_interval => 0.5 # (optional, default: 0.5)
|
50
|
+
max_retry_interval => 10 # (optional, default: 10)
|
51
|
+
retry_times => 3 # (optional, default: 3)
|
52
|
+
exit_if_retries_exceeded => false # (optional, default: false)
|
53
|
+
codec => "json" # (optional, default: default codec defined by Logstash)
|
54
|
+
}
|
55
|
+
}
|
56
|
+
```
|
57
|
+
|
58
|
+
### Configuration fields:
|
59
|
+
|
60
|
+
| Setting | Required | Default | Description |
|
61
|
+
| --- | --- | --- | --- |
|
62
|
+
| `adls_fqdn` | yes | | Azure DLS FQDN |
|
63
|
+
| `adls_token_endpoint` | yes | | Azure Oauth Endpoint |
|
64
|
+
| `adls_client_id` | yes | | Azure DLS ClientID |
|
65
|
+
| `adls_client_key` | yes | | Azure DLS ClientKey |
|
66
|
+
| `path` | yes | | The path to the file to write to. Event fields can be used here, as well as date fields in the joda time format, e.g.: `/logstash/%{+YYYY-MM-dd}/logstash-%{+HH}-%{[@metadata][cid]}.log` |
|
67
|
+
| `line_separator` | no | `\n` | Line separator for events written |
|
68
|
+
| `created_files_permission` | no | 755 | File permission for files created |
|
69
|
+
| `adls_token_expire_security_margin` | no | 300 | The security margin (in seconds) that shoud be subtracted to the token's expire value to calculate when the token shoud be renewed. (i.e. If the Oauth token expires in 1hour, it will be renewed in "1hour -adls_token_expire_security_margin " |
|
70
|
+
| `single_file_per_thread` | no | true | Avoid appending to same file in multiple threads. This solves some problems with multiple logstash output threads and locked file leases in ADLS. If this option is set to true, %{[@metadata][cid]} needs to be used in path config settting. %{[@metadata][cid]} (cid->concurrentId) is generated from a random value computed when the logstash instance starts plus a per thread id. This setting is used to deal with ADLS 0x83090a16 errors. (see Configuration notes) |
|
71
|
+
| `retry_interval` | no | 1 | How long(in seconds) should we wait between retries in case of an error. This value is a coefficient and not an absolute value. The wait time is "retry_interval*tries_counter". So, if retry_interval is 1 on the first retry, the wait time will be 1, on the second try will be 2, and so on |
|
72
|
+
| `max_retry_interval` | no | 10 | Max Retry Interval. The actual wait time (in seconds) will be min(retry_interval*tries_counter", max_retry_interval) |
|
73
|
+
| `retry_times` | no | 3 | How many times should we retry. If retry_times is exceeded, an error will be logged and the event will be discarded. (Set to -1 for unlimited retries) |
|
74
|
+
| `exit_if_retries_exceeded` | no | false | If enabled, Logstash will exit if retries are exceeded to avoid loosing events |
|
75
|
+
| `codec` | no | Logstash default codec | The Codec that will be used to serialize the event.(ex: CSV, JSON, LINE, etc) If you do not define one, Logstash will use it's default. Please refer to Logstash documentation |
|
76
|
+
|
77
|
+
|
78
|
+
### Concurrency and batching:
|
79
|
+
This plugin relies only on Logstash concurrency/batching facilities and can configured by Logstash's own "pipeline.workers" and "pipeline.batch.size" settings. Also, the concurrency mode of this plugin is set to [shared](https://www.elastic.co/guide/en/logstash/current/_how_to_write_a_logstash_output_plugin.html#_see_what_your_plugin_looks_like_4) to maximize concurrency.
|
80
|
+
|
81
|
+
### Configuration notes:
|
82
|
+
|
83
|
+
- If **single_file_per_thread** is enabled (and it is by defaut) and you're using more than one working thread, you'll need to add **%{[@metadata][cid]}** to your file path. This concatenates a ConcurrentID value to your path to avoid remote concurrency problems in ADLS. Apparently, ADLS locks the file for writing.
|
84
|
+
- If you you still have errors in your log like "**APPEND failed with error 0x83090a16 (Internal server error.)**" you should lower your concurrency and/or batching settings to avoid these kind of errors. We try to mitigate the problem with a backoff and retry strategy (**retry_interval** and **retry_times** settings) but AFAIK, there's nothing this plugin can do to avoid that entirely, it's an ADLS problem. Maybe they should queue their writing requests internally instead of writing them directly to the FS to avoid file write locks???
|
85
|
+
- However, unless you have very high concurrency and/or a large batch size, these errors shouldn't be a problem.
|
86
|
+
|
87
|
+
|
88
|
+
## Build & Development
|
89
|
+
|
90
|
+
### Build
|
91
|
+
- To get started, you'll need JRuby with the Bundler and Rake gems installed.
|
92
|
+
|
93
|
+
- Install dependencies:
|
94
|
+
```sh
|
95
|
+
bundle install
|
96
|
+
```
|
97
|
+
|
98
|
+
- Install Java dependencies from Maven:
|
99
|
+
```sh
|
100
|
+
rake install_jars
|
101
|
+
```
|
102
|
+
|
103
|
+
- Build your plugin gem:
|
104
|
+
|
105
|
+
```sh
|
106
|
+
gem build logstash-output-adls.gemspec
|
107
|
+
```
|
108
|
+
|
109
|
+
### Run in a local Logstash for development purposes.
|
110
|
+
|
111
|
+
- Edit Logstash `Gemfile` and add the local plugin path, for example:
|
112
|
+
```sh
|
113
|
+
gem "logstash-output-adls", :path => "/your/local/logstash-output-adls"
|
114
|
+
```
|
115
|
+
|
116
|
+
- Install plugin:
|
117
|
+
```sh
|
118
|
+
bin/logstash-plugin install --no-verify
|
119
|
+
```
|
data/lib/com/microsoft/azure/azure-data-lake-store-sdk/2.1.1/azure-data-lake-store-sdk-2.1.1.jar
ADDED
Binary file
|
Binary file
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# this is a generated file, to avoid over-writing it just delete this comment
|
2
|
+
begin
|
3
|
+
require 'jar_dependencies'
|
4
|
+
rescue LoadError
|
5
|
+
require 'com/fasterxml/jackson/core/jackson-core/2.7.4/jackson-core-2.7.4.jar'
|
6
|
+
require 'org/slf4j/slf4j-api/1.7.21/slf4j-api-1.7.21.jar'
|
7
|
+
require 'org/slf4j/slf4j-log4j12/1.7.21/slf4j-log4j12-1.7.21.jar'
|
8
|
+
require 'log4j/log4j/1.2.17/log4j-1.2.17.jar'
|
9
|
+
require 'com/microsoft/azure/azure-data-lake-store-sdk/2.1.1/azure-data-lake-store-sdk-2.1.1.jar'
|
10
|
+
end
|
11
|
+
|
12
|
+
if defined? Jars
|
13
|
+
require_jar( 'com.fasterxml.jackson.core', 'jackson-core', '2.7.4' )
|
14
|
+
require_jar( 'org.slf4j', 'slf4j-api', '1.7.21' )
|
15
|
+
require_jar( 'org.slf4j', 'slf4j-log4j12', '1.7.21' )
|
16
|
+
require_jar( 'log4j', 'log4j', '1.2.17' )
|
17
|
+
require_jar( 'com.microsoft.azure', 'azure-data-lake-store-sdk', '2.1.1' )
|
18
|
+
end
|
@@ -0,0 +1,239 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'date'
|
3
|
+
require "logstash/namespace"
|
4
|
+
require 'logstash/outputs/base'
|
5
|
+
require 'java'
|
6
|
+
require 'logstash-output-adls_jars.rb'
|
7
|
+
|
8
|
+
# ==== Usage
|
9
|
+
# This is an example of Logstash config:
|
10
|
+
#
|
11
|
+
# [source,ruby]
|
12
|
+
# ----------------------------------
|
13
|
+
# input {
|
14
|
+
# ...
|
15
|
+
# }
|
16
|
+
# filter {
|
17
|
+
# ...
|
18
|
+
# }
|
19
|
+
# output {
|
20
|
+
# adls {
|
21
|
+
# adls_fqdn => "XXXXXXXXXXX.azuredatalakestore.net" # (required)
|
22
|
+
# adls_token_endpoint => "https://login.microsoftonline.com/XXXXXXXXXX/oauth2/token" # (required)
|
23
|
+
# adls_client_id => "00000000-0000-0000-0000-000000000000" # (required)
|
24
|
+
# adls_client_key => "XXXXXXXXXXXXXXXXXXXXXX" # (required)
|
25
|
+
# path => "/logstash/%{+YYYY}/%{+MM}/%{+dd}/logstash-%{+HH}-%{[@metadata][cid]}.log" # (required)
|
26
|
+
# line_separator => "\n" # (optional, default: "\n")
|
27
|
+
# created_files_permission => 755 # (optional, default: 755)
|
28
|
+
# adls_token_expire_security_margin => 300 # (optional, default: 300)
|
29
|
+
# single_file_per_thread = > true # (optional, default: true)
|
30
|
+
# retry_interval => 0.5 # (optional, default: 0.5)
|
31
|
+
# max_retry_interval => 10 # (optional, default: 10)
|
32
|
+
# retry_times => 3 # (optional, default: 3)
|
33
|
+
# exit_if_retries_exceeded => false # (optional, default: false)
|
34
|
+
# codec => "json" # (optional, default: default codec defined by Logstash)
|
35
|
+
# }
|
36
|
+
# }
|
37
|
+
# ----------------------------------
|
38
|
+
class LogStash::Outputs::ADLS < LogStash::Outputs::Base
|
39
|
+
|
40
|
+
config_name "adls"
|
41
|
+
|
42
|
+
concurrency :shared
|
43
|
+
|
44
|
+
# The Azure DLS FQDN
|
45
|
+
config :adls_fqdn, :validate => :string, :required => true
|
46
|
+
|
47
|
+
# The Azure Oauth Endpoint
|
48
|
+
config :adls_token_endpoint, :validate => :string, :required => true
|
49
|
+
|
50
|
+
# The Azure DLS ClientID
|
51
|
+
config :adls_client_id, :validate => :string, :required => true
|
52
|
+
|
53
|
+
# The The Azure DLS ClientKey
|
54
|
+
config :adls_client_key, :validate => :string, :required => true
|
55
|
+
|
56
|
+
# The path to the file to write to. Event fields can be used here,
|
57
|
+
# as well as date fields in the joda time format, e.g.:
|
58
|
+
# `/user/logstash/dt=%{+YYYY-MM-dd}/%{@source_host}-%{+HH}.log`
|
59
|
+
config :path, :validate => :string, :required => true
|
60
|
+
|
61
|
+
# Line separator for events written.
|
62
|
+
config :line_separator, :validate => :string, :default => "\n"
|
63
|
+
|
64
|
+
# File permission for files created
|
65
|
+
config :created_files_permission, :validate => :number, :default => 755
|
66
|
+
|
67
|
+
# The security margin that shoud be subtracted to the token's expire value.
|
68
|
+
config :adls_token_expire_security_margin, :validate => :number, :default => 300
|
69
|
+
|
70
|
+
# Avoid appending to same file in multiple threads.
|
71
|
+
# This solves some problems with multiple logstash output threads and locked file leases in webhdfs.
|
72
|
+
# If this option is set to true, %{[@metadata][cid]} needs to be used in path config settting.
|
73
|
+
config :single_file_per_thread, :validate => :boolean, :default => true
|
74
|
+
|
75
|
+
# How long(in seconds) should we wait between retries in case of an error. This value is a coefficient
|
76
|
+
# and not an absolute value. The wait time is "retry_interval*tries_counter". So, if retry_interval is 1
|
77
|
+
# on the first retry, the wait time will be 1, on the second try will be 2, and so on.
|
78
|
+
config :retry_interval, :validate => :number, :default => 1
|
79
|
+
|
80
|
+
# Max Retry Interval. The actual wait time (in seconds) will be min(retry_interval*tries_counter", max_retry_interval).
|
81
|
+
config :max_retry_interval, :validate => :number, :default => 10
|
82
|
+
|
83
|
+
# How many times should we retry. If retry_times is exceeded, an error will be logged and the event will be discarded. (Set to -1 for unlimited retries)
|
84
|
+
config :retry_times, :validate => :number, :default => 3
|
85
|
+
|
86
|
+
# If enabled, Logstash will exit if retries are exceeded to avoid loosing events.
|
87
|
+
config :exit_if_retries_exceeded, :validate => :boolean, :default => false
|
88
|
+
|
89
|
+
attr_accessor :client
|
90
|
+
attr_accessor :azureOauthTokenRefreshDate
|
91
|
+
attr_accessor :timerTaskClass
|
92
|
+
attr_accessor :timer
|
93
|
+
|
94
|
+
public
|
95
|
+
|
96
|
+
def register()
|
97
|
+
|
98
|
+
begin
|
99
|
+
@client = prepare_client(@adls_fqdn, @adls_client_id, @adls_token_endpoint, @adls_client_key)
|
100
|
+
rescue => e
|
101
|
+
logger.error("Cannot Login in ADLS. Aborting.... Exception: #{e.message}; Trace:#{e.backtrace.join("\n\t")}")
|
102
|
+
exit 1
|
103
|
+
end
|
104
|
+
|
105
|
+
# Make sure @path contains %{[@metadata][thread_id]} format value
|
106
|
+
if @single_file_per_thread and !@path.include? "%{[@metadata][cid]}"
|
107
|
+
@logger.error("Please set %{[@metadata][cid]} format value in @path to avoid file locks in ADLS.")
|
108
|
+
raise LogStash::ConfigurationError
|
109
|
+
end
|
110
|
+
|
111
|
+
@codec.on_event do |event, encoded_event|
|
112
|
+
encoded_event
|
113
|
+
end
|
114
|
+
|
115
|
+
@timerTaskClass = Class.new java.util.TimerTask do
|
116
|
+
def setContext(parent)
|
117
|
+
@parent = parent
|
118
|
+
end
|
119
|
+
def run
|
120
|
+
begin
|
121
|
+
@parent.client = @parent.prepare_client(@parent.adls_fqdn, @parent.adls_client_id, @parent.adls_token_endpoint, @parent.adls_client_key)
|
122
|
+
rescue => e
|
123
|
+
sleepTime = [@parent.retry_interval, @parent.max_retry_interval].min
|
124
|
+
@parent.logger.error("ADLS Refresh OAuth Token Failed! Retrying in #{sleepTime.to_s} seconds... Exception: #{e.message}; Trace:#{e.backtrace.join("\n\t")}")
|
125
|
+
sleep(sleepTime)
|
126
|
+
end
|
127
|
+
timerTask = @parent.timerTaskClass.new
|
128
|
+
timerTask.setContext(@parent)
|
129
|
+
@parent.timer.schedule(timerTask, @parent.azureOauthTokenRefreshDate) # Rearm timer
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
timerTask = @timerTaskClass.new
|
134
|
+
timerTask.setContext(self)
|
135
|
+
|
136
|
+
@timer = java.util.Timer.new
|
137
|
+
@timer.schedule(timerTask, @azureOauthTokenRefreshDate)
|
138
|
+
|
139
|
+
@randomValuePerInstance = rand(10..10000) # To make sure different instances in different machines don't generate the same threadId.
|
140
|
+
end
|
141
|
+
|
142
|
+
def close
|
143
|
+
@logger.info("Logstash ADLS output plugin is shutting down...")
|
144
|
+
end
|
145
|
+
|
146
|
+
def prepare_client(accountFQDN, clientId, authTokenEndpoint, clientKey)
|
147
|
+
azureToken = com.microsoft.azure.datalake.store.oauth2.AzureADAuthenticator.getTokenUsingClientCreds(authTokenEndpoint, clientId, clientKey)
|
148
|
+
|
149
|
+
calendar = java.util.Calendar.getInstance()
|
150
|
+
calendar.setTime(azureToken.expiry)
|
151
|
+
calendar.set(java.util.Calendar::SECOND,(calendar.get(java.util.Calendar::SECOND)-@adls_token_expire_security_margin))
|
152
|
+
@azureOauthTokenRefreshDate = calendar.getTime()
|
153
|
+
|
154
|
+
@logger.info("Got ADLS OAuth Token with expire date #{azureToken.expiry.to_s}. Token will be refreshed at #{@azureOauthTokenRefreshDate.to_s}")
|
155
|
+
|
156
|
+
client = com.microsoft.azure.datalake.store.ADLStoreClient.createClient(accountFQDN, azureToken)
|
157
|
+
options = com.microsoft.azure.datalake.store.ADLStoreOptions.new()
|
158
|
+
options.setUserAgentSuffix("Logstash-ADLS-Output-Plugin")
|
159
|
+
client.setOptions(options)
|
160
|
+
client.checkExists("testFile") # Test the Client to make sure it works. The return value is irrelevant.
|
161
|
+
client
|
162
|
+
end
|
163
|
+
|
164
|
+
def multi_receive(events)
|
165
|
+
return if not events
|
166
|
+
|
167
|
+
timeElapsed = Time.now
|
168
|
+
|
169
|
+
output_files = Hash.new { |hash, key| hash[key] = "" }
|
170
|
+
events.collect do |event|
|
171
|
+
|
172
|
+
if @single_file_per_thread
|
173
|
+
event.set("[@metadata][cid]", "#{@randomValuePerInstance.to_s}#{Thread.current.object_id.to_s}")
|
174
|
+
end
|
175
|
+
|
176
|
+
path = event.sprintf(@path)
|
177
|
+
event_as_string = @codec.encode(event)
|
178
|
+
event_as_string += @line_separator unless event_as_string.end_with? @line_separator
|
179
|
+
output_files[path] << event_as_string
|
180
|
+
end
|
181
|
+
|
182
|
+
output_files.each do |path, output|
|
183
|
+
# Retry max_retry times. This can solve problems like leases being hold by another process.
|
184
|
+
write_tries = 0
|
185
|
+
begin
|
186
|
+
write_data(path, output)
|
187
|
+
rescue Exception => e
|
188
|
+
if (write_tries < @retry_times) or (@retry_times == -1)
|
189
|
+
sleepTime = [@retry_interval * write_tries, @max_retry_interval].min
|
190
|
+
@logger.warn("ADLS write caused an exception: #{e.message}. Maybe you should increase retry_interval or reduce number of workers. Attempt: #{write_tries.to_s}. Retrying in #{sleepTime.to_s} seconds...")
|
191
|
+
sleep(sleepTime)
|
192
|
+
write_tries += 1
|
193
|
+
retry
|
194
|
+
else
|
195
|
+
if e.instance_of? com.microsoft.azure.datalake.store.ADLException
|
196
|
+
@logger.error("Max write retries reached. Events discarded! ADLS_RemoteMessage: #{e.remoteExceptionMessage}; Exception: #{e.message}; ADLS_Path: #{path}; StackTrace:#{e.backtrace.join("\n\t")}")
|
197
|
+
else
|
198
|
+
@logger.error("Max write retries reached. Events discarded! Exception: #{e.message}; StackTrace:#{e.backtrace.join("\n\t")}")
|
199
|
+
end
|
200
|
+
if @exit_if_retries_exceeded
|
201
|
+
exit 1
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
@logger.debug("#{events.length.to_s} events written on ADLS in #{Time.now-timeElapsed} seconds.")
|
207
|
+
|
208
|
+
end
|
209
|
+
|
210
|
+
def write_data(path, data)
|
211
|
+
begin
|
212
|
+
#@logger.info("Trying to write at #{path}")
|
213
|
+
adlsClient = @client
|
214
|
+
|
215
|
+
# Try to append to already existing file, which will work most of the times.
|
216
|
+
stream = adlsClient.getAppendStream(path)
|
217
|
+
outStream = java.io.PrintStream.new(stream)
|
218
|
+
outStream.print(data)
|
219
|
+
outStream.close()
|
220
|
+
stream.close()
|
221
|
+
|
222
|
+
# File does not exist, so create it.
|
223
|
+
rescue com.microsoft.azure.datalake.store.ADLException => e
|
224
|
+
if e.httpResponseCode == 404
|
225
|
+
createStream = adlsClient.createFile(path, com.microsoft.azure.datalake.store.IfExists::OVERWRITE, @created_files_permission.to_s, true)
|
226
|
+
outStream = java.io.PrintStream.new(createStream)
|
227
|
+
outStream.print(data)
|
228
|
+
outStream.close()
|
229
|
+
createStream.close()
|
230
|
+
@logger.debug("File #{path} created.")
|
231
|
+
else
|
232
|
+
raise e
|
233
|
+
end
|
234
|
+
end
|
235
|
+
#@logger.info("Data written to ADLS: #{data}")
|
236
|
+
|
237
|
+
end
|
238
|
+
|
239
|
+
end
|
Binary file
|
Binary file
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
Gem::Specification.new do |s|
|
3
|
+
|
4
|
+
s.name = 'logstash-output-adls'
|
5
|
+
s.version = '1.1.3'
|
6
|
+
s.licenses = ['Apache-2.0']
|
7
|
+
s.summary = "Plugin to write events to Azure DataLakeStore"
|
8
|
+
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
9
|
+
s.authors = ["NOS Inovacao"]
|
10
|
+
s.email = 'nosi.metadata@nos.pt'
|
11
|
+
s.homepage = "http://www.nos.pt"
|
12
|
+
s.require_paths = ["lib"]
|
13
|
+
|
14
|
+
# Files
|
15
|
+
s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md', 'Gemfile','LICENSE','NOTICE.TXT']
|
16
|
+
|
17
|
+
# Tests
|
18
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
19
|
+
|
20
|
+
# Special flag to let us know this is actually a logstash plugin
|
21
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
|
22
|
+
|
23
|
+
s.requirements << "jar 'com.microsoft.azure:azure-data-lake-store-sdk', '2.1.1'"
|
24
|
+
s.requirements << "jar 'org.slf4j:slf4j-log4j12', '1.7.21'"
|
25
|
+
|
26
|
+
s.add_development_dependency 'jar-dependencies', '~> 0.3.2'
|
27
|
+
|
28
|
+
# Gem dependencies
|
29
|
+
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
30
|
+
s.add_development_dependency 'logstash-devutils'
|
31
|
+
|
32
|
+
s.add_development_dependency 'logstash-codec-line'
|
33
|
+
s.add_development_dependency 'logstash-codec-json'
|
34
|
+
s.add_development_dependency 'logstash-codec-plain'
|
35
|
+
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'logstash/devutils/rspec/spec_helper'
|
3
|
+
require 'logstash/outputs/adls'
|
4
|
+
require 'json'
|
5
|
+
require 'java'
|
6
|
+
|
7
|
+
describe LogStash::Outputs::ADLS, :integration => true do
|
8
|
+
|
9
|
+
let(:adls_fqdn) { 'XXXXXXXXXXX.azuredatalakestore.net' }
|
10
|
+
let(:adls_token_endpoint) { 'https://login.microsoftonline.com/XXXXXXXXXX/oauth2/token' }
|
11
|
+
let(:adls_client_id) { '00000000-0000-0000-0000-000000000000' }
|
12
|
+
let(:adls_client_key) { 'XXXXXXXXXXXXXXXXXXXXXX' }
|
13
|
+
let(:path) { '/test.log' }
|
14
|
+
|
15
|
+
let(:config) { { 'adls_fqdn' =>adls_fqdn, 'adls_token_endpoint' => adls_token_endpoint, 'adls_client_id' => adls_client_id, 'adls_client_key' => adls_client_key, 'path' => path, "single_file_per_thread" => false } }
|
16
|
+
|
17
|
+
subject(:plugin) { LogStash::Plugin.lookup("output", "adls").new(config) }
|
18
|
+
|
19
|
+
let(:event) { LogStash::Event.new('message' => 'Hello world!', 'source' => 'out of the blue',
|
20
|
+
'type' => 'generator', 'host' => 'localhost' ) }
|
21
|
+
|
22
|
+
describe "register and close" do
|
23
|
+
|
24
|
+
it 'should register with default values' do
|
25
|
+
expect { subject.register }.to_not raise_error
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
describe '#write' do
|
31
|
+
|
32
|
+
let(:AdlsClient) { nil }
|
33
|
+
|
34
|
+
after(:each) do
|
35
|
+
subject.close
|
36
|
+
#deltefile
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "writing plain files" do
|
40
|
+
|
41
|
+
before(:each) do
|
42
|
+
subject.register
|
43
|
+
AdlsClient = subject.client
|
44
|
+
subject.receive(event)
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should use the correct filename pattern' do
|
48
|
+
expect { AdlsClient.checkExists(path) }.to eq(true)
|
49
|
+
end
|
50
|
+
|
51
|
+
context "using the line codec without format" do
|
52
|
+
|
53
|
+
it 'should match the event data' do
|
54
|
+
|
55
|
+
expected = expect do
|
56
|
+
stream = AdlsClient.getReadStream(path)
|
57
|
+
s = java.util.Scanner(stream).new.useDelimiter("\\A")
|
58
|
+
result = s.hasNext() ? s.next() : ""
|
59
|
+
result
|
60
|
+
end
|
61
|
+
expected.to eq(event.to_s)
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
#context "using the json codec" do
|
67
|
+
|
68
|
+
#let(:config) { { 'host' => host, 'user' => user, 'flush_size' => 10, 'path' => test_file, 'compression' => 'none', 'codec' => 'json' } }
|
69
|
+
|
70
|
+
#it 'should match the event data' do
|
71
|
+
#expect(webhdfs_client.read(hdfs_file_name).strip()).to eq(event.to_json)
|
72
|
+
#end
|
73
|
+
|
74
|
+
#end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'logstash/devutils/rspec/spec_helper'
|
3
|
+
require 'logstash/outputs/adls'
|
4
|
+
|
5
|
+
describe 'outputs/adls' do
|
6
|
+
|
7
|
+
let(:adls_fqdn) { 'XXXXXXXXXXX.azuredatalakestore.net' }
|
8
|
+
let(:adls_token_endpoint) { 'https://login.microsoftonline.com/XXXXXXXXXX/oauth2/token' }
|
9
|
+
let(:adls_client_id) { '00000000-0000-0000-0000-000000000000' }
|
10
|
+
let(:adls_client_key) { 'XXXXXXXXXXXXXXXXXXXXXX' }
|
11
|
+
let(:path) { '/test.log' }
|
12
|
+
|
13
|
+
let(:config) { { 'adls_fqdn' =>adls_fqdn, 'adls_token_endpoint' => adls_token_endpoint, 'adls_client_id' => adls_client_id, 'adls_client_key' => adls_client_key, 'path' => path } }
|
14
|
+
|
15
|
+
subject(:plugin) { LogStash::Plugin.lookup("output", "adls").new(config) }
|
16
|
+
|
17
|
+
describe '#initializing' do
|
18
|
+
|
19
|
+
it 'should fail to register without %{[@metadata][cid]} in the path' do
|
20
|
+
plugin = LogStash::Plugin.lookup("output", "adls")
|
21
|
+
expect { plugin.new }.to raise_error(error=LogStash::ConfigurationError)
|
22
|
+
end
|
23
|
+
|
24
|
+
context "default values" do
|
25
|
+
|
26
|
+
it 'should have default line_separator' do
|
27
|
+
expect(subject.line_separator).to eq("\n")
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'should have default created_files_permission' do
|
31
|
+
expect(subject.created_files_permission).to eq(755)
|
32
|
+
end
|
33
|
+
it 'should have default adls_token_expire_security_margin' do
|
34
|
+
expect(subject.adls_token_expire_security_margin).to eq(300)
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'should have default single_file_per_thread' do
|
38
|
+
expect(subject.single_file_per_thread).to eq(true)
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'should have default retry_interval' do
|
42
|
+
expect(subject.retry_interval).to eq(1)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'should have default max_retry_interval' do
|
46
|
+
expect(subject.max_retry_interval).to eq(10)
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'should have default retry_times' do
|
50
|
+
expect(subject.retry_times).to eq(3)
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'should have default exit_if_retries_exceeded' do
|
54
|
+
expect(subject.exit_if_retries_exceeded).to eq(false)
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
metadata
ADDED
@@ -0,0 +1,158 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: logstash-output-adls
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- NOS Inovacao
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-03-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: 0.3.2
|
19
|
+
name: jar-dependencies
|
20
|
+
prerelease: false
|
21
|
+
type: :development
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.3.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '1.60'
|
33
|
+
- - "<="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '2.99'
|
36
|
+
name: logstash-core-plugin-api
|
37
|
+
prerelease: false
|
38
|
+
type: :runtime
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '1.60'
|
44
|
+
- - "<="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '2.99'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
name: logstash-devutils
|
54
|
+
prerelease: false
|
55
|
+
type: :development
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
name: logstash-codec-line
|
68
|
+
prerelease: false
|
69
|
+
type: :development
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
requirement: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: '0'
|
81
|
+
name: logstash-codec-json
|
82
|
+
prerelease: false
|
83
|
+
type: :development
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
- !ruby/object:Gem::Dependency
|
90
|
+
requirement: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: '0'
|
95
|
+
name: logstash-codec-plain
|
96
|
+
prerelease: false
|
97
|
+
type: :development
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
description: This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program
|
104
|
+
email: nosi.metadata@nos.pt
|
105
|
+
executables: []
|
106
|
+
extensions: []
|
107
|
+
extra_rdoc_files: []
|
108
|
+
files:
|
109
|
+
- Gemfile
|
110
|
+
- LICENSE
|
111
|
+
- NOTICE.TXT
|
112
|
+
- README.md
|
113
|
+
- lib/com/fasterxml/jackson/core/jackson-core/2.7.4/jackson-core-2.7.4.jar
|
114
|
+
- lib/com/microsoft/azure/azure-data-lake-store-sdk/2.1.1/azure-data-lake-store-sdk-2.1.1.jar
|
115
|
+
- lib/log4j/log4j/1.2.17/log4j-1.2.17.jar
|
116
|
+
- lib/logstash-output-adls_jars.rb
|
117
|
+
- lib/logstash/outputs/adls.rb
|
118
|
+
- lib/org/slf4j/slf4j-api/1.7.21/slf4j-api-1.7.21.jar
|
119
|
+
- lib/org/slf4j/slf4j-log4j12/1.7.21/slf4j-log4j12-1.7.21.jar
|
120
|
+
- logstash-output-adls.gemspec
|
121
|
+
- spec/integration/adls_spec.rb
|
122
|
+
- spec/outputs/adls_spec.rb
|
123
|
+
- vendor/jar-dependencies/runtime-jars/azure-data-lake-store-sdk-2.1.1.jar
|
124
|
+
- vendor/jar-dependencies/runtime-jars/jackson-core-2.7.4.jar
|
125
|
+
- vendor/jar-dependencies/runtime-jars/log4j-1.2.17.jar
|
126
|
+
- vendor/jar-dependencies/runtime-jars/slf4j-api-1.7.21.jar
|
127
|
+
- vendor/jar-dependencies/runtime-jars/slf4j-log4j12-1.7.21.jar
|
128
|
+
homepage: http://www.nos.pt
|
129
|
+
licenses:
|
130
|
+
- Apache-2.0
|
131
|
+
metadata:
|
132
|
+
logstash_plugin: 'true'
|
133
|
+
logstash_group: output
|
134
|
+
post_install_message:
|
135
|
+
rdoc_options: []
|
136
|
+
require_paths:
|
137
|
+
- lib
|
138
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
139
|
+
requirements:
|
140
|
+
- - ">="
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
version: '0'
|
143
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
144
|
+
requirements:
|
145
|
+
- - ">="
|
146
|
+
- !ruby/object:Gem::Version
|
147
|
+
version: '0'
|
148
|
+
requirements:
|
149
|
+
- jar 'com.microsoft.azure:azure-data-lake-store-sdk', '2.1.1'
|
150
|
+
- jar 'org.slf4j:slf4j-log4j12', '1.7.21'
|
151
|
+
rubyforge_project:
|
152
|
+
rubygems_version: 2.6.8
|
153
|
+
signing_key:
|
154
|
+
specification_version: 4
|
155
|
+
summary: Plugin to write events to Azure DataLakeStore
|
156
|
+
test_files:
|
157
|
+
- spec/integration/adls_spec.rb
|
158
|
+
- spec/outputs/adls_spec.rb
|