logstash-output-clickhouse 0.1.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -2
- data/lib/logstash/outputs/clickhouse.rb +62 -97
- data/logstash-output-clickhouse.gemspec +14 -14
- metadata +9 -9
- data/lib/logstash/util/shortname_resolver.rb +0 -40
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20cb59bb456f397ff9408209d676deb3c46214abbd874a5499ab5f7387bb80b9
|
4
|
+
data.tar.gz: 471cd0087a01acbcd6ed8efaa93e09f89102a2b2785bcfa7bb117e7ed8dd2b0d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3319460b03965943afecff0c3b910ebfb57611566f02b255b7fb3afd6b2a7f6781b89990cf667286972ed27bb503b90833a300f20450a0ad3e637c2144b3f37e
|
7
|
+
data.tar.gz: 609792b63cc1aa8d073f272e9539458738914c4ed1a795b22e23d6a86724b6e9c7e0277b533d0e02fa03d40168799197a614d3208e129b8ba534a8bb3ea80aee
|
data/README.md
CHANGED
@@ -1,6 +1,5 @@
|
|
1
|
-
# I switched to vector -> https://github.com/timberio/vector.
|
2
|
-
|
3
1
|
# Logstash Plugin
|
2
|
+
[](https://badge.fury.io/rb/logstash-output-clickhouse)
|
4
3
|
|
5
4
|
This plugin is a modified version of the Lucidworks logstash json_batch. That plugin is available [here](https://github.com/lucidworks/logstash-output-json_batch).
|
6
5
|
|
@@ -19,6 +18,9 @@ Please note that the name of the plugin when used is `clickhouse`, it only suppo
|
|
19
18
|
"to1" => "from1"
|
20
19
|
"to2" => [ "from2", "(.)(.)", '\1\2' ]
|
21
20
|
}
|
21
|
+
extra_params => {
|
22
|
+
"date_time_input_format" => "best_effort"
|
23
|
+
}
|
22
24
|
}
|
23
25
|
}
|
24
26
|
|
@@ -28,6 +30,7 @@ Please note that the name of the plugin when used is `clickhouse`, it only suppo
|
|
28
30
|
* `automatic_retries` (default: 1) - number of connect retry attempts to each host in `http_hosts`
|
29
31
|
* `request_tolerance` (default: 5) - number of http request send retry attempts if response status code is not 200
|
30
32
|
* `backoff_time` (default: 3) - time to wait in seconds for next retry attempt of connect or request
|
33
|
+
* `extra_params` (default: {}) - extra parameters to be passed to the clickhouse http client
|
31
34
|
|
32
35
|
Default batch size is 50, with a wait of at most 5 seconds per send. These can be tweaked with the parameters `flush_size` and `idle_flush_time` respectively.
|
33
36
|
|
@@ -2,13 +2,11 @@
|
|
2
2
|
require "logstash/outputs/base"
|
3
3
|
require "logstash/namespace"
|
4
4
|
require "logstash/json"
|
5
|
-
require "logstash/util/shortname_resolver"
|
6
5
|
require "uri"
|
7
6
|
require "stud/buffer"
|
8
7
|
require "logstash/plugin_mixins/http_client"
|
9
8
|
require "securerandom"
|
10
9
|
|
11
|
-
|
12
10
|
class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
13
11
|
include LogStash::PluginMixins::HttpClient
|
14
12
|
include Stud::Buffer
|
@@ -20,7 +18,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
20
18
|
config :http_hosts, :validate => :array, :required => true
|
21
19
|
|
22
20
|
config :table, :validate => :string, :required => true
|
23
|
-
|
21
|
+
|
24
22
|
# Custom headers to use
|
25
23
|
# format is `headers => ["X-My-Header", "%{host}"]`
|
26
24
|
config :headers, :validate => :hash
|
@@ -38,7 +36,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
38
36
|
config :save_file, :validate => :string, :default => "failed.json"
|
39
37
|
|
40
38
|
config :request_tolerance, :validate => :number, :default => 5
|
41
|
-
|
39
|
+
|
42
40
|
config :backoff_time, :validate => :number, :default => 3
|
43
41
|
|
44
42
|
config :automatic_retries, :validate => :number, :default => 3
|
@@ -47,19 +45,21 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
47
45
|
|
48
46
|
config :host_resolve_ttl_sec, :validate => :number, :default => 120
|
49
47
|
|
48
|
+
config :extra_params, :validate => :hash, :default => {}
|
49
|
+
|
50
50
|
def print_plugin_info()
|
51
|
-
@@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-clickhouse/ }
|
51
|
+
@@plugins = Gem::Specification.find_all { |spec| spec.name =~ /logstash-output-clickhouse/ }
|
52
52
|
@plugin_name = @@plugins[0].name
|
53
53
|
@plugin_version = @@plugins[0].version
|
54
54
|
@logger.info("Running #{@plugin_name} version #{@plugin_version}")
|
55
55
|
|
56
56
|
@logger.info("Initialized clickhouse with settings",
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
57
|
+
:flush_size => @flush_size,
|
58
|
+
:idle_flush_time => @idle_flush_time,
|
59
|
+
:request_tokens => @pool_max,
|
60
|
+
:http_hosts => @http_hosts,
|
61
|
+
:http_query => @http_query,
|
62
|
+
:headers => request_headers)
|
63
63
|
end
|
64
64
|
|
65
65
|
def register
|
@@ -71,91 +71,57 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
71
71
|
# When this queue is empty no new requests may be sent,
|
72
72
|
# tokens must be added back by the client on success
|
73
73
|
@request_tokens = SizedQueue.new(@pool_max)
|
74
|
-
@pool_max.times {|t| @request_tokens << true }
|
74
|
+
@pool_max.times { |t| @request_tokens << true }
|
75
75
|
@requests = Array.new
|
76
|
-
@http_query = "/?query=INSERT%20INTO%20#{table}%20FORMAT%20JSONEachRow"
|
77
76
|
|
78
|
-
|
79
|
-
|
80
|
-
ShortNameResolver.new(ttl: @host_resolve_ttl_sec, logger: @logger))
|
77
|
+
params = { "query" => "INSERT INTO #{table} FORMAT JSONEachRow" }.merge(@extra_params)
|
78
|
+
@http_query = "?#{URI.encode_www_form(params)}"
|
81
79
|
|
82
80
|
buffer_initialize(
|
83
81
|
:max_items => @flush_size,
|
84
82
|
:max_interval => @idle_flush_time,
|
85
|
-
:logger => @logger
|
83
|
+
:logger => @logger,
|
86
84
|
)
|
87
85
|
|
88
86
|
print_plugin_info()
|
89
87
|
end # def register
|
90
88
|
|
91
|
-
private
|
92
|
-
|
93
|
-
def parse_http_hosts(hosts, resolver)
|
94
|
-
ip_re = /^[\d]+\.[\d]+\.[\d]+\.[\d]+$/
|
95
|
-
|
96
|
-
lambda {
|
97
|
-
hosts.flat_map { |h|
|
98
|
-
scheme = URI(h).scheme
|
99
|
-
host = URI(h).host
|
100
|
-
port = URI(h).port
|
101
|
-
path = URI(h).path
|
102
|
-
|
103
|
-
if ip_re !~ host
|
104
|
-
resolver.get_addresses(host).map { |ip|
|
105
|
-
"#{scheme}://#{ip}:#{port}#{path}"
|
106
|
-
}
|
107
|
-
else
|
108
|
-
[h]
|
109
|
-
end
|
110
|
-
}
|
111
|
-
}
|
112
|
-
end
|
113
|
-
|
114
|
-
private
|
115
|
-
|
116
|
-
def get_host_addresses()
|
117
|
-
begin
|
118
|
-
@hostnames_pool.call
|
119
|
-
rescue Exception => ex
|
120
|
-
@logger.error('Error while resolving host', :error => ex.to_s)
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
89
|
# This module currently does not support parallel requests as that would circumvent the batching
|
125
90
|
def receive(event)
|
126
91
|
buffer_receive(event)
|
127
92
|
end
|
128
93
|
|
129
|
-
def mutate(
|
94
|
+
def mutate(src)
|
130
95
|
return src if @mutations.empty?
|
131
96
|
res = {}
|
132
97
|
@mutations.each_pair do |dstkey, source|
|
133
98
|
case source
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
99
|
+
when String
|
100
|
+
scrkey = source
|
101
|
+
next unless src.key?(scrkey)
|
102
|
+
|
103
|
+
res[dstkey] = src[scrkey]
|
104
|
+
when Array
|
105
|
+
scrkey = source[0]
|
106
|
+
next unless src.key?(scrkey)
|
107
|
+
pattern = source[1]
|
108
|
+
replace = source[2]
|
109
|
+
res[dstkey] = src[scrkey].sub(Regexp.new(pattern), replace)
|
145
110
|
end
|
146
111
|
end
|
147
112
|
res
|
148
113
|
end
|
149
114
|
|
150
115
|
public
|
151
|
-
|
116
|
+
|
117
|
+
def flush(events, close = false)
|
152
118
|
documents = "" #this is the string of hashes that we push to Fusion as documents
|
153
119
|
|
154
120
|
events.each do |event|
|
155
|
-
|
121
|
+
documents << LogStash::Json.dump(mutate(event.to_hash())) << "\n"
|
156
122
|
end
|
157
123
|
|
158
|
-
hosts =
|
124
|
+
hosts = @http_hosts.clone
|
159
125
|
|
160
126
|
make_request(documents, hosts, @http_query, 1, 1, hosts.sample)
|
161
127
|
end
|
@@ -165,10 +131,10 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
165
131
|
def save_to_disk(documents)
|
166
132
|
begin
|
167
133
|
file = File.open("#{save_dir}/#{table}_#{save_file}", "a")
|
168
|
-
file.write(documents)
|
134
|
+
file.write(documents)
|
169
135
|
rescue IOError => e
|
170
136
|
log_failure("An error occurred while saving file to disk: #{e}",
|
171
|
-
|
137
|
+
:file_name => file_name)
|
172
138
|
ensure
|
173
139
|
file.close unless file.nil?
|
174
140
|
end
|
@@ -177,21 +143,20 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
177
143
|
def delay_attempt(attempt_number, delay)
|
178
144
|
# sleep delay grows roughly as k*x*ln(x) where k is the initial delay set in @backoff_time param
|
179
145
|
attempt = [attempt_number, 1].max
|
180
|
-
timeout = lambda { |x| [delay*x*Math.log(x), 1].max }
|
146
|
+
timeout = lambda { |x| [delay * x * Math.log(x), 1].max }
|
181
147
|
# using rand() to pick final sleep delay to reduce the risk of getting in sync with other clients writing to the DB
|
182
|
-
sleep_time = rand(timeout.call(attempt)..timeout.call(attempt+1))
|
148
|
+
sleep_time = rand(timeout.call(attempt)..timeout.call(attempt + 1))
|
183
149
|
sleep sleep_time
|
184
150
|
end
|
185
151
|
|
186
152
|
private
|
187
153
|
|
188
154
|
def make_request(documents, hosts, query, con_count = 1, req_count = 1, host = "", uuid = SecureRandom.hex)
|
189
|
-
|
190
155
|
if host == ""
|
191
156
|
host = hosts.pop
|
192
157
|
end
|
193
158
|
|
194
|
-
url = host+query
|
159
|
+
url = host + query
|
195
160
|
|
196
161
|
# Block waiting for a token
|
197
162
|
#@logger.info("Requesting token ", :tokens => request_tokens.length())
|
@@ -210,25 +175,26 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
210
175
|
@request_tokens << token
|
211
176
|
|
212
177
|
if response.code == 200
|
213
|
-
@logger.debug("Successfully submitted",
|
214
|
-
|
215
|
-
|
216
|
-
|
178
|
+
@logger.debug("Successfully submitted",
|
179
|
+
:size => documents.length,
|
180
|
+
:response_code => response.code,
|
181
|
+
:uuid => uuid)
|
217
182
|
else
|
218
183
|
if req_count >= @request_tolerance
|
219
184
|
log_failure(
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
185
|
+
"Encountered non-200 HTTP code #{response.code}",
|
186
|
+
:response_code => response.code,
|
187
|
+
:url => url,
|
188
|
+
:size => documents.length,
|
189
|
+
:uuid => uuid,
|
190
|
+
)
|
225
191
|
if @save_on_failure
|
226
192
|
save_to_disk(documents)
|
227
193
|
end
|
228
194
|
else
|
229
195
|
@logger.info("Retrying request", :url => url, :message => response.message, :response => response.body, :uuid => uuid)
|
230
196
|
delay_attempt(req_count, @backoff_time)
|
231
|
-
make_request(documents, hosts, query, con_count, req_count+1, host, uuid)
|
197
|
+
make_request(documents, hosts, query, con_count, req_count + 1, host, uuid)
|
232
198
|
end
|
233
199
|
end
|
234
200
|
end
|
@@ -238,21 +204,21 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
238
204
|
@request_tokens << token
|
239
205
|
|
240
206
|
if hosts.length == 0
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
207
|
+
log_failure("Could not access URL",
|
208
|
+
:url => url,
|
209
|
+
:method => @http_method,
|
210
|
+
:headers => headers,
|
211
|
+
:message => exception.message,
|
212
|
+
:class => exception.class.name,
|
213
|
+
:backtrace => exception.backtrace,
|
214
|
+
:size => documents.length,
|
215
|
+
:uuid => uuid)
|
216
|
+
if @save_on_failure
|
217
|
+
save_to_disk(documents)
|
218
|
+
end
|
219
|
+
return
|
254
220
|
end
|
255
|
-
|
221
|
+
|
256
222
|
if con_count >= @automatic_retries
|
257
223
|
host = ""
|
258
224
|
con_count = 0
|
@@ -260,7 +226,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
260
226
|
|
261
227
|
@logger.info("Retrying connection", :url => url, :uuid => uuid)
|
262
228
|
delay_attempt(con_count, @backoff_time)
|
263
|
-
make_request(documents, hosts, query, con_count+1, req_count, host, uuid)
|
229
|
+
make_request(documents, hosts, query, con_count + 1, req_count, host, uuid)
|
264
230
|
end
|
265
231
|
|
266
232
|
client.execute!
|
@@ -276,5 +242,4 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
276
242
|
headers["Content-Type"] ||= "application/json"
|
277
243
|
headers
|
278
244
|
end
|
279
|
-
|
280
245
|
end
|
@@ -1,16 +1,16 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
|
-
s.name
|
3
|
-
s.version
|
4
|
-
s.licenses
|
5
|
-
s.summary
|
6
|
-
s.description
|
7
|
-
s.authors
|
8
|
-
s.email
|
9
|
-
s.homepage
|
2
|
+
s.name = "logstash-output-clickhouse"
|
3
|
+
s.version = "0.1.4"
|
4
|
+
s.licenses = ["Apache-2.0"]
|
5
|
+
s.summary = "This output lets you `POST` messages as JSON in a batched fashion to ClickHouse HTTP endpoint"
|
6
|
+
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
7
|
+
s.authors = ["kmajk", "maltoze"]
|
8
|
+
s.email = "atonezzz@gmail.com"
|
9
|
+
s.homepage = "https://github.com/maltoze/logstash-output-clickhouse"
|
10
10
|
s.require_paths = ["lib"]
|
11
11
|
|
12
12
|
# Files
|
13
|
-
s.files = Dir[
|
13
|
+
s.files = Dir["lib/**/*", "spec/**/*", "*.gemspec", "*.md", "Gemfile", "LICENSE"]
|
14
14
|
|
15
15
|
# Tests
|
16
16
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
@@ -20,10 +20,10 @@ Gem::Specification.new do |s|
|
|
20
20
|
|
21
21
|
# Gem dependencies
|
22
22
|
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
23
|
-
s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "
|
24
|
-
s.add_runtime_dependency
|
23
|
+
s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "< 8.0.0"
|
24
|
+
s.add_runtime_dependency "mini_cache", ">= 1.0.0", "< 2.0.0"
|
25
25
|
|
26
|
-
s.add_development_dependency
|
27
|
-
s.add_development_dependency
|
28
|
-
s.add_development_dependency
|
26
|
+
s.add_development_dependency "logstash-devutils"
|
27
|
+
s.add_development_dependency "sinatra"
|
28
|
+
s.add_development_dependency "webrick"
|
29
29
|
end
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-output-clickhouse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kmajk
|
8
|
+
- maltoze
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2023-04-27 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: logstash-core-plugin-api
|
@@ -37,9 +38,9 @@ dependencies:
|
|
37
38
|
- - ">="
|
38
39
|
- !ruby/object:Gem::Version
|
39
40
|
version: 6.0.0
|
40
|
-
- - "
|
41
|
+
- - "<"
|
41
42
|
- !ruby/object:Gem::Version
|
42
|
-
version:
|
43
|
+
version: 8.0.0
|
43
44
|
type: :runtime
|
44
45
|
prerelease: false
|
45
46
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -47,9 +48,9 @@ dependencies:
|
|
47
48
|
- - ">="
|
48
49
|
- !ruby/object:Gem::Version
|
49
50
|
version: 6.0.0
|
50
|
-
- - "
|
51
|
+
- - "<"
|
51
52
|
- !ruby/object:Gem::Version
|
52
|
-
version:
|
53
|
+
version: 8.0.0
|
53
54
|
- !ruby/object:Gem::Dependency
|
54
55
|
name: mini_cache
|
55
56
|
requirement: !ruby/object:Gem::Requirement
|
@@ -115,7 +116,7 @@ dependencies:
|
|
115
116
|
description: This gem is a logstash plugin required to be installed on top of the
|
116
117
|
Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
|
117
118
|
a stand-alone program
|
118
|
-
email:
|
119
|
+
email: atonezzz@gmail.com
|
119
120
|
executables: []
|
120
121
|
extensions: []
|
121
122
|
extra_rdoc_files: []
|
@@ -124,9 +125,8 @@ files:
|
|
124
125
|
- LICENSE
|
125
126
|
- README.md
|
126
127
|
- lib/logstash/outputs/clickhouse.rb
|
127
|
-
- lib/logstash/util/shortname_resolver.rb
|
128
128
|
- logstash-output-clickhouse.gemspec
|
129
|
-
homepage:
|
129
|
+
homepage: https://github.com/maltoze/logstash-output-clickhouse
|
130
130
|
licenses:
|
131
131
|
- Apache-2.0
|
132
132
|
metadata:
|
@@ -1,40 +0,0 @@
|
|
1
|
-
require 'resolv'
|
2
|
-
require 'mini_cache'
|
3
|
-
|
4
|
-
class ShortNameResolver
|
5
|
-
def initialize(ttl:, logger:)
|
6
|
-
@ttl = ttl
|
7
|
-
@store = MiniCache::Store.new
|
8
|
-
@logger = logger
|
9
|
-
end
|
10
|
-
|
11
|
-
private
|
12
|
-
def resolve_cached(shortname)
|
13
|
-
@store.get_or_set(shortname) do
|
14
|
-
addresses = resolve(shortname)
|
15
|
-
raise "Bad shortname '#{shortname}'" if addresses.empty?
|
16
|
-
MiniCache::Data.new(addresses, expires_in: @ttl)
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
private
|
21
|
-
def resolve(shortname)
|
22
|
-
addresses = Resolv::DNS.open do |dns|
|
23
|
-
dns.getaddresses(shortname).map { |r| r.to_s }
|
24
|
-
end
|
25
|
-
|
26
|
-
@logger.info("Resolved shortname '#{shortname}' to addresses #{addresses}")
|
27
|
-
|
28
|
-
return addresses
|
29
|
-
end
|
30
|
-
|
31
|
-
public
|
32
|
-
def get_address(shortname)
|
33
|
-
return resolve_cached(shortname).sample
|
34
|
-
end
|
35
|
-
|
36
|
-
public
|
37
|
-
def get_addresses(shortname)
|
38
|
-
return resolve_cached(shortname)
|
39
|
-
end
|
40
|
-
end
|