logstash-output-clickhouse 0.1.1 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -2
- data/lib/logstash/outputs/clickhouse.rb +62 -97
- data/logstash-output-clickhouse.gemspec +14 -14
- metadata +9 -9
- data/lib/logstash/util/shortname_resolver.rb +0 -40
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20cb59bb456f397ff9408209d676deb3c46214abbd874a5499ab5f7387bb80b9
|
4
|
+
data.tar.gz: 471cd0087a01acbcd6ed8efaa93e09f89102a2b2785bcfa7bb117e7ed8dd2b0d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3319460b03965943afecff0c3b910ebfb57611566f02b255b7fb3afd6b2a7f6781b89990cf667286972ed27bb503b90833a300f20450a0ad3e637c2144b3f37e
|
7
|
+
data.tar.gz: 609792b63cc1aa8d073f272e9539458738914c4ed1a795b22e23d6a86724b6e9c7e0277b533d0e02fa03d40168799197a614d3208e129b8ba534a8bb3ea80aee
|
data/README.md
CHANGED
@@ -1,6 +1,5 @@
|
|
1
|
-
# I switched to vector -> https://github.com/timberio/vector.
|
2
|
-
|
3
1
|
# Logstash Plugin
|
2
|
+
[![Gem Version](https://badge.fury.io/rb/logstash-output-clickhouse.svg)](https://badge.fury.io/rb/logstash-output-clickhouse)
|
4
3
|
|
5
4
|
This plugin is a modified version of the Lucidworks logstash json_batch. That plugin is available [here](https://github.com/lucidworks/logstash-output-json_batch).
|
6
5
|
|
@@ -19,6 +18,9 @@ Please note that the name of the plugin when used is `clickhouse`, it only suppo
|
|
19
18
|
"to1" => "from1"
|
20
19
|
"to2" => [ "from2", "(.)(.)", '\1\2' ]
|
21
20
|
}
|
21
|
+
extra_params => {
|
22
|
+
"date_time_input_format" => "best_effort"
|
23
|
+
}
|
22
24
|
}
|
23
25
|
}
|
24
26
|
|
@@ -28,6 +30,7 @@ Please note that the name of the plugin when used is `clickhouse`, it only suppo
|
|
28
30
|
* `automatic_retries` (default: 1) - number of connect retry attempts to each host in `http_hosts`
|
29
31
|
* `request_tolerance` (default: 5) - number of http request send retry attempts if response status code is not 200
|
30
32
|
* `backoff_time` (default: 3) - time to wait in seconds for next retry attempt of connect or request
|
33
|
+
* `extra_params` (default: {}) - extra parameters to be passed to the clickhouse http client
|
31
34
|
|
32
35
|
Default batch size is 50, with a wait of at most 5 seconds per send. These can be tweaked with the parameters `flush_size` and `idle_flush_time` respectively.
|
33
36
|
|
@@ -2,13 +2,11 @@
|
|
2
2
|
require "logstash/outputs/base"
|
3
3
|
require "logstash/namespace"
|
4
4
|
require "logstash/json"
|
5
|
-
require "logstash/util/shortname_resolver"
|
6
5
|
require "uri"
|
7
6
|
require "stud/buffer"
|
8
7
|
require "logstash/plugin_mixins/http_client"
|
9
8
|
require "securerandom"
|
10
9
|
|
11
|
-
|
12
10
|
class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
13
11
|
include LogStash::PluginMixins::HttpClient
|
14
12
|
include Stud::Buffer
|
@@ -20,7 +18,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
20
18
|
config :http_hosts, :validate => :array, :required => true
|
21
19
|
|
22
20
|
config :table, :validate => :string, :required => true
|
23
|
-
|
21
|
+
|
24
22
|
# Custom headers to use
|
25
23
|
# format is `headers => ["X-My-Header", "%{host}"]`
|
26
24
|
config :headers, :validate => :hash
|
@@ -38,7 +36,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
38
36
|
config :save_file, :validate => :string, :default => "failed.json"
|
39
37
|
|
40
38
|
config :request_tolerance, :validate => :number, :default => 5
|
41
|
-
|
39
|
+
|
42
40
|
config :backoff_time, :validate => :number, :default => 3
|
43
41
|
|
44
42
|
config :automatic_retries, :validate => :number, :default => 3
|
@@ -47,19 +45,21 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
47
45
|
|
48
46
|
config :host_resolve_ttl_sec, :validate => :number, :default => 120
|
49
47
|
|
48
|
+
config :extra_params, :validate => :hash, :default => {}
|
49
|
+
|
50
50
|
def print_plugin_info()
|
51
|
-
@@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-clickhouse/ }
|
51
|
+
@@plugins = Gem::Specification.find_all { |spec| spec.name =~ /logstash-output-clickhouse/ }
|
52
52
|
@plugin_name = @@plugins[0].name
|
53
53
|
@plugin_version = @@plugins[0].version
|
54
54
|
@logger.info("Running #{@plugin_name} version #{@plugin_version}")
|
55
55
|
|
56
56
|
@logger.info("Initialized clickhouse with settings",
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
57
|
+
:flush_size => @flush_size,
|
58
|
+
:idle_flush_time => @idle_flush_time,
|
59
|
+
:request_tokens => @pool_max,
|
60
|
+
:http_hosts => @http_hosts,
|
61
|
+
:http_query => @http_query,
|
62
|
+
:headers => request_headers)
|
63
63
|
end
|
64
64
|
|
65
65
|
def register
|
@@ -71,91 +71,57 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
71
71
|
# When this queue is empty no new requests may be sent,
|
72
72
|
# tokens must be added back by the client on success
|
73
73
|
@request_tokens = SizedQueue.new(@pool_max)
|
74
|
-
@pool_max.times {|t| @request_tokens << true }
|
74
|
+
@pool_max.times { |t| @request_tokens << true }
|
75
75
|
@requests = Array.new
|
76
|
-
@http_query = "/?query=INSERT%20INTO%20#{table}%20FORMAT%20JSONEachRow"
|
77
76
|
|
78
|
-
|
79
|
-
|
80
|
-
ShortNameResolver.new(ttl: @host_resolve_ttl_sec, logger: @logger))
|
77
|
+
params = { "query" => "INSERT INTO #{table} FORMAT JSONEachRow" }.merge(@extra_params)
|
78
|
+
@http_query = "?#{URI.encode_www_form(params)}"
|
81
79
|
|
82
80
|
buffer_initialize(
|
83
81
|
:max_items => @flush_size,
|
84
82
|
:max_interval => @idle_flush_time,
|
85
|
-
:logger => @logger
|
83
|
+
:logger => @logger,
|
86
84
|
)
|
87
85
|
|
88
86
|
print_plugin_info()
|
89
87
|
end # def register
|
90
88
|
|
91
|
-
private
|
92
|
-
|
93
|
-
def parse_http_hosts(hosts, resolver)
|
94
|
-
ip_re = /^[\d]+\.[\d]+\.[\d]+\.[\d]+$/
|
95
|
-
|
96
|
-
lambda {
|
97
|
-
hosts.flat_map { |h|
|
98
|
-
scheme = URI(h).scheme
|
99
|
-
host = URI(h).host
|
100
|
-
port = URI(h).port
|
101
|
-
path = URI(h).path
|
102
|
-
|
103
|
-
if ip_re !~ host
|
104
|
-
resolver.get_addresses(host).map { |ip|
|
105
|
-
"#{scheme}://#{ip}:#{port}#{path}"
|
106
|
-
}
|
107
|
-
else
|
108
|
-
[h]
|
109
|
-
end
|
110
|
-
}
|
111
|
-
}
|
112
|
-
end
|
113
|
-
|
114
|
-
private
|
115
|
-
|
116
|
-
def get_host_addresses()
|
117
|
-
begin
|
118
|
-
@hostnames_pool.call
|
119
|
-
rescue Exception => ex
|
120
|
-
@logger.error('Error while resolving host', :error => ex.to_s)
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
89
|
# This module currently does not support parallel requests as that would circumvent the batching
|
125
90
|
def receive(event)
|
126
91
|
buffer_receive(event)
|
127
92
|
end
|
128
93
|
|
129
|
-
def mutate(
|
94
|
+
def mutate(src)
|
130
95
|
return src if @mutations.empty?
|
131
96
|
res = {}
|
132
97
|
@mutations.each_pair do |dstkey, source|
|
133
98
|
case source
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
99
|
+
when String
|
100
|
+
scrkey = source
|
101
|
+
next unless src.key?(scrkey)
|
102
|
+
|
103
|
+
res[dstkey] = src[scrkey]
|
104
|
+
when Array
|
105
|
+
scrkey = source[0]
|
106
|
+
next unless src.key?(scrkey)
|
107
|
+
pattern = source[1]
|
108
|
+
replace = source[2]
|
109
|
+
res[dstkey] = src[scrkey].sub(Regexp.new(pattern), replace)
|
145
110
|
end
|
146
111
|
end
|
147
112
|
res
|
148
113
|
end
|
149
114
|
|
150
115
|
public
|
151
|
-
|
116
|
+
|
117
|
+
def flush(events, close = false)
|
152
118
|
documents = "" #this is the string of hashes that we push to Fusion as documents
|
153
119
|
|
154
120
|
events.each do |event|
|
155
|
-
|
121
|
+
documents << LogStash::Json.dump(mutate(event.to_hash())) << "\n"
|
156
122
|
end
|
157
123
|
|
158
|
-
hosts =
|
124
|
+
hosts = @http_hosts.clone
|
159
125
|
|
160
126
|
make_request(documents, hosts, @http_query, 1, 1, hosts.sample)
|
161
127
|
end
|
@@ -165,10 +131,10 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
165
131
|
def save_to_disk(documents)
|
166
132
|
begin
|
167
133
|
file = File.open("#{save_dir}/#{table}_#{save_file}", "a")
|
168
|
-
file.write(documents)
|
134
|
+
file.write(documents)
|
169
135
|
rescue IOError => e
|
170
136
|
log_failure("An error occurred while saving file to disk: #{e}",
|
171
|
-
|
137
|
+
:file_name => file_name)
|
172
138
|
ensure
|
173
139
|
file.close unless file.nil?
|
174
140
|
end
|
@@ -177,21 +143,20 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
177
143
|
def delay_attempt(attempt_number, delay)
|
178
144
|
# sleep delay grows roughly as k*x*ln(x) where k is the initial delay set in @backoff_time param
|
179
145
|
attempt = [attempt_number, 1].max
|
180
|
-
timeout = lambda { |x| [delay*x*Math.log(x), 1].max }
|
146
|
+
timeout = lambda { |x| [delay * x * Math.log(x), 1].max }
|
181
147
|
# using rand() to pick final sleep delay to reduce the risk of getting in sync with other clients writing to the DB
|
182
|
-
sleep_time = rand(timeout.call(attempt)..timeout.call(attempt+1))
|
148
|
+
sleep_time = rand(timeout.call(attempt)..timeout.call(attempt + 1))
|
183
149
|
sleep sleep_time
|
184
150
|
end
|
185
151
|
|
186
152
|
private
|
187
153
|
|
188
154
|
def make_request(documents, hosts, query, con_count = 1, req_count = 1, host = "", uuid = SecureRandom.hex)
|
189
|
-
|
190
155
|
if host == ""
|
191
156
|
host = hosts.pop
|
192
157
|
end
|
193
158
|
|
194
|
-
url = host+query
|
159
|
+
url = host + query
|
195
160
|
|
196
161
|
# Block waiting for a token
|
197
162
|
#@logger.info("Requesting token ", :tokens => request_tokens.length())
|
@@ -210,25 +175,26 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
210
175
|
@request_tokens << token
|
211
176
|
|
212
177
|
if response.code == 200
|
213
|
-
@logger.debug("Successfully submitted",
|
214
|
-
|
215
|
-
|
216
|
-
|
178
|
+
@logger.debug("Successfully submitted",
|
179
|
+
:size => documents.length,
|
180
|
+
:response_code => response.code,
|
181
|
+
:uuid => uuid)
|
217
182
|
else
|
218
183
|
if req_count >= @request_tolerance
|
219
184
|
log_failure(
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
185
|
+
"Encountered non-200 HTTP code #{response.code}",
|
186
|
+
:response_code => response.code,
|
187
|
+
:url => url,
|
188
|
+
:size => documents.length,
|
189
|
+
:uuid => uuid,
|
190
|
+
)
|
225
191
|
if @save_on_failure
|
226
192
|
save_to_disk(documents)
|
227
193
|
end
|
228
194
|
else
|
229
195
|
@logger.info("Retrying request", :url => url, :message => response.message, :response => response.body, :uuid => uuid)
|
230
196
|
delay_attempt(req_count, @backoff_time)
|
231
|
-
make_request(documents, hosts, query, con_count, req_count+1, host, uuid)
|
197
|
+
make_request(documents, hosts, query, con_count, req_count + 1, host, uuid)
|
232
198
|
end
|
233
199
|
end
|
234
200
|
end
|
@@ -238,21 +204,21 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
238
204
|
@request_tokens << token
|
239
205
|
|
240
206
|
if hosts.length == 0
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
207
|
+
log_failure("Could not access URL",
|
208
|
+
:url => url,
|
209
|
+
:method => @http_method,
|
210
|
+
:headers => headers,
|
211
|
+
:message => exception.message,
|
212
|
+
:class => exception.class.name,
|
213
|
+
:backtrace => exception.backtrace,
|
214
|
+
:size => documents.length,
|
215
|
+
:uuid => uuid)
|
216
|
+
if @save_on_failure
|
217
|
+
save_to_disk(documents)
|
218
|
+
end
|
219
|
+
return
|
254
220
|
end
|
255
|
-
|
221
|
+
|
256
222
|
if con_count >= @automatic_retries
|
257
223
|
host = ""
|
258
224
|
con_count = 0
|
@@ -260,7 +226,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
260
226
|
|
261
227
|
@logger.info("Retrying connection", :url => url, :uuid => uuid)
|
262
228
|
delay_attempt(con_count, @backoff_time)
|
263
|
-
make_request(documents, hosts, query, con_count+1, req_count, host, uuid)
|
229
|
+
make_request(documents, hosts, query, con_count + 1, req_count, host, uuid)
|
264
230
|
end
|
265
231
|
|
266
232
|
client.execute!
|
@@ -276,5 +242,4 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
276
242
|
headers["Content-Type"] ||= "application/json"
|
277
243
|
headers
|
278
244
|
end
|
279
|
-
|
280
245
|
end
|
@@ -1,16 +1,16 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
|
-
s.name
|
3
|
-
s.version
|
4
|
-
s.licenses
|
5
|
-
s.summary
|
6
|
-
s.description
|
7
|
-
s.authors
|
8
|
-
s.email
|
9
|
-
s.homepage
|
2
|
+
s.name = "logstash-output-clickhouse"
|
3
|
+
s.version = "0.1.4"
|
4
|
+
s.licenses = ["Apache-2.0"]
|
5
|
+
s.summary = "This output lets you `POST` messages as JSON in a batched fashion to ClickHouse HTTP endpoint"
|
6
|
+
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
7
|
+
s.authors = ["kmajk", "maltoze"]
|
8
|
+
s.email = "atonezzz@gmail.com"
|
9
|
+
s.homepage = "https://github.com/maltoze/logstash-output-clickhouse"
|
10
10
|
s.require_paths = ["lib"]
|
11
11
|
|
12
12
|
# Files
|
13
|
-
s.files = Dir[
|
13
|
+
s.files = Dir["lib/**/*", "spec/**/*", "*.gemspec", "*.md", "Gemfile", "LICENSE"]
|
14
14
|
|
15
15
|
# Tests
|
16
16
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
@@ -20,10 +20,10 @@ Gem::Specification.new do |s|
|
|
20
20
|
|
21
21
|
# Gem dependencies
|
22
22
|
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
23
|
-
s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "
|
24
|
-
s.add_runtime_dependency
|
23
|
+
s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "< 8.0.0"
|
24
|
+
s.add_runtime_dependency "mini_cache", ">= 1.0.0", "< 2.0.0"
|
25
25
|
|
26
|
-
s.add_development_dependency
|
27
|
-
s.add_development_dependency
|
28
|
-
s.add_development_dependency
|
26
|
+
s.add_development_dependency "logstash-devutils"
|
27
|
+
s.add_development_dependency "sinatra"
|
28
|
+
s.add_development_dependency "webrick"
|
29
29
|
end
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-output-clickhouse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kmajk
|
8
|
+
- maltoze
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2023-04-27 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: logstash-core-plugin-api
|
@@ -37,9 +38,9 @@ dependencies:
|
|
37
38
|
- - ">="
|
38
39
|
- !ruby/object:Gem::Version
|
39
40
|
version: 6.0.0
|
40
|
-
- - "
|
41
|
+
- - "<"
|
41
42
|
- !ruby/object:Gem::Version
|
42
|
-
version:
|
43
|
+
version: 8.0.0
|
43
44
|
type: :runtime
|
44
45
|
prerelease: false
|
45
46
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -47,9 +48,9 @@ dependencies:
|
|
47
48
|
- - ">="
|
48
49
|
- !ruby/object:Gem::Version
|
49
50
|
version: 6.0.0
|
50
|
-
- - "
|
51
|
+
- - "<"
|
51
52
|
- !ruby/object:Gem::Version
|
52
|
-
version:
|
53
|
+
version: 8.0.0
|
53
54
|
- !ruby/object:Gem::Dependency
|
54
55
|
name: mini_cache
|
55
56
|
requirement: !ruby/object:Gem::Requirement
|
@@ -115,7 +116,7 @@ dependencies:
|
|
115
116
|
description: This gem is a logstash plugin required to be installed on top of the
|
116
117
|
Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
|
117
118
|
a stand-alone program
|
118
|
-
email:
|
119
|
+
email: atonezzz@gmail.com
|
119
120
|
executables: []
|
120
121
|
extensions: []
|
121
122
|
extra_rdoc_files: []
|
@@ -124,9 +125,8 @@ files:
|
|
124
125
|
- LICENSE
|
125
126
|
- README.md
|
126
127
|
- lib/logstash/outputs/clickhouse.rb
|
127
|
-
- lib/logstash/util/shortname_resolver.rb
|
128
128
|
- logstash-output-clickhouse.gemspec
|
129
|
-
homepage:
|
129
|
+
homepage: https://github.com/maltoze/logstash-output-clickhouse
|
130
130
|
licenses:
|
131
131
|
- Apache-2.0
|
132
132
|
metadata:
|
@@ -1,40 +0,0 @@
|
|
1
|
-
require 'resolv'
|
2
|
-
require 'mini_cache'
|
3
|
-
|
4
|
-
class ShortNameResolver
|
5
|
-
def initialize(ttl:, logger:)
|
6
|
-
@ttl = ttl
|
7
|
-
@store = MiniCache::Store.new
|
8
|
-
@logger = logger
|
9
|
-
end
|
10
|
-
|
11
|
-
private
|
12
|
-
def resolve_cached(shortname)
|
13
|
-
@store.get_or_set(shortname) do
|
14
|
-
addresses = resolve(shortname)
|
15
|
-
raise "Bad shortname '#{shortname}'" if addresses.empty?
|
16
|
-
MiniCache::Data.new(addresses, expires_in: @ttl)
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
private
|
21
|
-
def resolve(shortname)
|
22
|
-
addresses = Resolv::DNS.open do |dns|
|
23
|
-
dns.getaddresses(shortname).map { |r| r.to_s }
|
24
|
-
end
|
25
|
-
|
26
|
-
@logger.info("Resolved shortname '#{shortname}' to addresses #{addresses}")
|
27
|
-
|
28
|
-
return addresses
|
29
|
-
end
|
30
|
-
|
31
|
-
public
|
32
|
-
def get_address(shortname)
|
33
|
-
return resolve_cached(shortname).sample
|
34
|
-
end
|
35
|
-
|
36
|
-
public
|
37
|
-
def get_addresses(shortname)
|
38
|
-
return resolve_cached(shortname)
|
39
|
-
end
|
40
|
-
end
|