logstash-output-clickhouse 0.1.1 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -2
- data/lib/logstash/outputs/clickhouse.rb +64 -61
- data/logstash-output-clickhouse.gemspec +14 -14
- metadata +9 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 05f2b2d96799587dc5678cd80a6ef5d0c62a103797a298dfbe86acb1dbe09b89
|
4
|
+
data.tar.gz: ffeb98ddb2e27497ff54e9304180a2a44e88d093ce33755f842409a2101d4153
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c6d244d35441f46a0b49b79d1f278004e634334349d3949ec8d49e96fb02e31c942d13f8be72e7ddcd9996687acf81f1378b3587cbab6c6a9a9f93eb6634a6a
|
7
|
+
data.tar.gz: 530b73e05f582bf9b9af8a3be95aa1139fd0b21c3a75b411635453fa9c5e4bc538f8fb8477ab2388fad2560570c92a3623d614c97e69f103850ef6e54d45a23a
|
data/README.md
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# I switched to vector -> https://github.com/timberio/vector.
|
2
|
-
|
3
1
|
# Logstash Plugin
|
4
2
|
|
5
3
|
This plugin is a modified version of the Lucidworks logstash json_batch. That plugin is available [here](https://github.com/lucidworks/logstash-output-json_batch).
|
@@ -19,6 +17,9 @@ Please note that the name of the plugin when used is `clickhouse`, it only suppo
|
|
19
17
|
"to1" => "from1"
|
20
18
|
"to2" => [ "from2", "(.)(.)", '\1\2' ]
|
21
19
|
}
|
20
|
+
extra_params => {
|
21
|
+
"date_time_input_format" => "best_effort"
|
22
|
+
}
|
22
23
|
}
|
23
24
|
}
|
24
25
|
|
@@ -28,6 +29,7 @@ Please note that the name of the plugin when used is `clickhouse`, it only suppo
|
|
28
29
|
* `automatic_retries` (default: 1) - number of connect retry attempts to each host in `http_hosts`
|
29
30
|
* `request_tolerance` (default: 5) - number of http request send retry attempts if response status code is not 200
|
30
31
|
* `backoff_time` (default: 3) - time to wait in seconds for next retry attempt of connect or request
|
32
|
+
* `extra_params` (default: {}) - extra parameters to be passed to the clickhouse http client
|
31
33
|
|
32
34
|
Default batch size is 50, with a wait of at most 5 seconds per send. These can be tweaked with the parameters `flush_size` and `idle_flush_time` respectively.
|
33
35
|
|
@@ -8,7 +8,6 @@ require "stud/buffer"
|
|
8
8
|
require "logstash/plugin_mixins/http_client"
|
9
9
|
require "securerandom"
|
10
10
|
|
11
|
-
|
12
11
|
class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
13
12
|
include LogStash::PluginMixins::HttpClient
|
14
13
|
include Stud::Buffer
|
@@ -20,7 +19,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
20
19
|
config :http_hosts, :validate => :array, :required => true
|
21
20
|
|
22
21
|
config :table, :validate => :string, :required => true
|
23
|
-
|
22
|
+
|
24
23
|
# Custom headers to use
|
25
24
|
# format is `headers => ["X-My-Header", "%{host}"]`
|
26
25
|
config :headers, :validate => :hash
|
@@ -38,7 +37,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
38
37
|
config :save_file, :validate => :string, :default => "failed.json"
|
39
38
|
|
40
39
|
config :request_tolerance, :validate => :number, :default => 5
|
41
|
-
|
40
|
+
|
42
41
|
config :backoff_time, :validate => :number, :default => 3
|
43
42
|
|
44
43
|
config :automatic_retries, :validate => :number, :default => 3
|
@@ -47,19 +46,21 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
47
46
|
|
48
47
|
config :host_resolve_ttl_sec, :validate => :number, :default => 120
|
49
48
|
|
49
|
+
config :extra_params, :validate => :hash, :default => {}
|
50
|
+
|
50
51
|
def print_plugin_info()
|
51
|
-
@@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-clickhouse/ }
|
52
|
+
@@plugins = Gem::Specification.find_all { |spec| spec.name =~ /logstash-output-clickhouse/ }
|
52
53
|
@plugin_name = @@plugins[0].name
|
53
54
|
@plugin_version = @@plugins[0].version
|
54
55
|
@logger.info("Running #{@plugin_name} version #{@plugin_version}")
|
55
56
|
|
56
57
|
@logger.info("Initialized clickhouse with settings",
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
:flush_size => @flush_size,
|
59
|
+
:idle_flush_time => @idle_flush_time,
|
60
|
+
:request_tokens => @pool_max,
|
61
|
+
:http_hosts => @http_hosts,
|
62
|
+
:http_query => @http_query,
|
63
|
+
:headers => request_headers)
|
63
64
|
end
|
64
65
|
|
65
66
|
def register
|
@@ -71,18 +72,20 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
71
72
|
# When this queue is empty no new requests may be sent,
|
72
73
|
# tokens must be added back by the client on success
|
73
74
|
@request_tokens = SizedQueue.new(@pool_max)
|
74
|
-
@pool_max.times {|t| @request_tokens << true }
|
75
|
+
@pool_max.times { |t| @request_tokens << true }
|
75
76
|
@requests = Array.new
|
76
|
-
|
77
|
+
|
78
|
+
params = { "query" => "INSERT INTO #{table} FORMAT JSONEachRow" }.merge(@extra_params)
|
79
|
+
@http_query = "?#{URI.encode_www_form(params)}"
|
77
80
|
|
78
81
|
@hostnames_pool =
|
79
82
|
parse_http_hosts(http_hosts,
|
80
|
-
|
83
|
+
ShortNameResolver.new(ttl: @host_resolve_ttl_sec, logger: @logger))
|
81
84
|
|
82
85
|
buffer_initialize(
|
83
86
|
:max_items => @flush_size,
|
84
87
|
:max_interval => @idle_flush_time,
|
85
|
-
:logger => @logger
|
88
|
+
:logger => @logger,
|
86
89
|
)
|
87
90
|
|
88
91
|
print_plugin_info()
|
@@ -117,7 +120,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
117
120
|
begin
|
118
121
|
@hostnames_pool.call
|
119
122
|
rescue Exception => ex
|
120
|
-
@logger.error(
|
123
|
+
@logger.error("Error while resolving host", :error => ex.to_s)
|
121
124
|
end
|
122
125
|
end
|
123
126
|
|
@@ -126,33 +129,34 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
126
129
|
buffer_receive(event)
|
127
130
|
end
|
128
131
|
|
129
|
-
def mutate(
|
132
|
+
def mutate(src)
|
130
133
|
return src if @mutations.empty?
|
131
134
|
res = {}
|
132
135
|
@mutations.each_pair do |dstkey, source|
|
133
136
|
case source
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
137
|
+
when String
|
138
|
+
scrkey = source
|
139
|
+
next unless src.key?(scrkey)
|
140
|
+
|
141
|
+
res[dstkey] = src[scrkey]
|
142
|
+
when Array
|
143
|
+
scrkey = source[0]
|
144
|
+
next unless src.key?(scrkey)
|
145
|
+
pattern = source[1]
|
146
|
+
replace = source[2]
|
147
|
+
res[dstkey] = src[scrkey].sub(Regexp.new(pattern), replace)
|
145
148
|
end
|
146
149
|
end
|
147
150
|
res
|
148
151
|
end
|
149
152
|
|
150
153
|
public
|
151
|
-
|
154
|
+
|
155
|
+
def flush(events, close = false)
|
152
156
|
documents = "" #this is the string of hashes that we push to Fusion as documents
|
153
157
|
|
154
158
|
events.each do |event|
|
155
|
-
|
159
|
+
documents << LogStash::Json.dump(mutate(event.to_hash())) << "\n"
|
156
160
|
end
|
157
161
|
|
158
162
|
hosts = get_host_addresses()
|
@@ -165,10 +169,10 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
165
169
|
def save_to_disk(documents)
|
166
170
|
begin
|
167
171
|
file = File.open("#{save_dir}/#{table}_#{save_file}", "a")
|
168
|
-
file.write(documents)
|
172
|
+
file.write(documents)
|
169
173
|
rescue IOError => e
|
170
174
|
log_failure("An error occurred while saving file to disk: #{e}",
|
171
|
-
|
175
|
+
:file_name => file_name)
|
172
176
|
ensure
|
173
177
|
file.close unless file.nil?
|
174
178
|
end
|
@@ -177,21 +181,20 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
177
181
|
def delay_attempt(attempt_number, delay)
|
178
182
|
# sleep delay grows roughly as k*x*ln(x) where k is the initial delay set in @backoff_time param
|
179
183
|
attempt = [attempt_number, 1].max
|
180
|
-
timeout = lambda { |x| [delay*x*Math.log(x), 1].max }
|
184
|
+
timeout = lambda { |x| [delay * x * Math.log(x), 1].max }
|
181
185
|
# using rand() to pick final sleep delay to reduce the risk of getting in sync with other clients writing to the DB
|
182
|
-
sleep_time = rand(timeout.call(attempt)..timeout.call(attempt+1))
|
186
|
+
sleep_time = rand(timeout.call(attempt)..timeout.call(attempt + 1))
|
183
187
|
sleep sleep_time
|
184
188
|
end
|
185
189
|
|
186
190
|
private
|
187
191
|
|
188
192
|
def make_request(documents, hosts, query, con_count = 1, req_count = 1, host = "", uuid = SecureRandom.hex)
|
189
|
-
|
190
193
|
if host == ""
|
191
194
|
host = hosts.pop
|
192
195
|
end
|
193
196
|
|
194
|
-
url = host+query
|
197
|
+
url = host + query
|
195
198
|
|
196
199
|
# Block waiting for a token
|
197
200
|
#@logger.info("Requesting token ", :tokens => request_tokens.length())
|
@@ -210,25 +213,26 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
210
213
|
@request_tokens << token
|
211
214
|
|
212
215
|
if response.code == 200
|
213
|
-
@logger.debug("Successfully submitted",
|
214
|
-
|
215
|
-
|
216
|
-
|
216
|
+
@logger.debug("Successfully submitted",
|
217
|
+
:size => documents.length,
|
218
|
+
:response_code => response.code,
|
219
|
+
:uuid => uuid)
|
217
220
|
else
|
218
221
|
if req_count >= @request_tolerance
|
219
222
|
log_failure(
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
223
|
+
"Encountered non-200 HTTP code #{response.code}",
|
224
|
+
:response_code => response.code,
|
225
|
+
:url => url,
|
226
|
+
:size => documents.length,
|
227
|
+
:uuid => uuid,
|
228
|
+
)
|
225
229
|
if @save_on_failure
|
226
230
|
save_to_disk(documents)
|
227
231
|
end
|
228
232
|
else
|
229
233
|
@logger.info("Retrying request", :url => url, :message => response.message, :response => response.body, :uuid => uuid)
|
230
234
|
delay_attempt(req_count, @backoff_time)
|
231
|
-
make_request(documents, hosts, query, con_count, req_count+1, host, uuid)
|
235
|
+
make_request(documents, hosts, query, con_count, req_count + 1, host, uuid)
|
232
236
|
end
|
233
237
|
end
|
234
238
|
end
|
@@ -238,21 +242,21 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
238
242
|
@request_tokens << token
|
239
243
|
|
240
244
|
if hosts.length == 0
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
245
|
+
log_failure("Could not access URL",
|
246
|
+
:url => url,
|
247
|
+
:method => @http_method,
|
248
|
+
:headers => headers,
|
249
|
+
:message => exception.message,
|
250
|
+
:class => exception.class.name,
|
251
|
+
:backtrace => exception.backtrace,
|
252
|
+
:size => documents.length,
|
253
|
+
:uuid => uuid)
|
254
|
+
if @save_on_failure
|
255
|
+
save_to_disk(documents)
|
256
|
+
end
|
257
|
+
return
|
254
258
|
end
|
255
|
-
|
259
|
+
|
256
260
|
if con_count >= @automatic_retries
|
257
261
|
host = ""
|
258
262
|
con_count = 0
|
@@ -260,7 +264,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
260
264
|
|
261
265
|
@logger.info("Retrying connection", :url => url, :uuid => uuid)
|
262
266
|
delay_attempt(con_count, @backoff_time)
|
263
|
-
make_request(documents, hosts, query, con_count+1, req_count, host, uuid)
|
267
|
+
make_request(documents, hosts, query, con_count + 1, req_count, host, uuid)
|
264
268
|
end
|
265
269
|
|
266
270
|
client.execute!
|
@@ -276,5 +280,4 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
276
280
|
headers["Content-Type"] ||= "application/json"
|
277
281
|
headers
|
278
282
|
end
|
279
|
-
|
280
283
|
end
|
@@ -1,16 +1,16 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
|
-
s.name
|
3
|
-
s.version
|
4
|
-
s.licenses
|
5
|
-
s.summary
|
6
|
-
s.description
|
7
|
-
s.authors
|
8
|
-
s.email
|
9
|
-
s.homepage
|
2
|
+
s.name = "logstash-output-clickhouse"
|
3
|
+
s.version = "0.1.3"
|
4
|
+
s.licenses = ["Apache-2.0"]
|
5
|
+
s.summary = "This output lets you `POST` messages as JSON in a batched fashion to ClickHouse HTTP endpoint"
|
6
|
+
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
7
|
+
s.authors = ["kmajk", "maltoze"]
|
8
|
+
s.email = "atonezzz@gmail.com"
|
9
|
+
s.homepage = "https://github.com/maltoze/logstash-output-clickhouse"
|
10
10
|
s.require_paths = ["lib"]
|
11
11
|
|
12
12
|
# Files
|
13
|
-
s.files = Dir[
|
13
|
+
s.files = Dir["lib/**/*", "spec/**/*", "*.gemspec", "*.md", "Gemfile", "LICENSE"]
|
14
14
|
|
15
15
|
# Tests
|
16
16
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
@@ -20,10 +20,10 @@ Gem::Specification.new do |s|
|
|
20
20
|
|
21
21
|
# Gem dependencies
|
22
22
|
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
23
|
-
s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "
|
24
|
-
s.add_runtime_dependency
|
23
|
+
s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "< 8.0.0"
|
24
|
+
s.add_runtime_dependency "mini_cache", ">= 1.0.0", "< 2.0.0"
|
25
25
|
|
26
|
-
s.add_development_dependency
|
27
|
-
s.add_development_dependency
|
28
|
-
s.add_development_dependency
|
26
|
+
s.add_development_dependency "logstash-devutils"
|
27
|
+
s.add_development_dependency "sinatra"
|
28
|
+
s.add_development_dependency "webrick"
|
29
29
|
end
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-output-clickhouse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kmajk
|
8
|
+
- maltoze
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2022-07-15 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: logstash-core-plugin-api
|
@@ -37,9 +38,9 @@ dependencies:
|
|
37
38
|
- - ">="
|
38
39
|
- !ruby/object:Gem::Version
|
39
40
|
version: 6.0.0
|
40
|
-
- - "
|
41
|
+
- - "<"
|
41
42
|
- !ruby/object:Gem::Version
|
42
|
-
version:
|
43
|
+
version: 8.0.0
|
43
44
|
type: :runtime
|
44
45
|
prerelease: false
|
45
46
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -47,9 +48,9 @@ dependencies:
|
|
47
48
|
- - ">="
|
48
49
|
- !ruby/object:Gem::Version
|
49
50
|
version: 6.0.0
|
50
|
-
- - "
|
51
|
+
- - "<"
|
51
52
|
- !ruby/object:Gem::Version
|
52
|
-
version:
|
53
|
+
version: 8.0.0
|
53
54
|
- !ruby/object:Gem::Dependency
|
54
55
|
name: mini_cache
|
55
56
|
requirement: !ruby/object:Gem::Requirement
|
@@ -115,7 +116,7 @@ dependencies:
|
|
115
116
|
description: This gem is a logstash plugin required to be installed on top of the
|
116
117
|
Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
|
117
118
|
a stand-alone program
|
118
|
-
email:
|
119
|
+
email: atonezzz@gmail.com
|
119
120
|
executables: []
|
120
121
|
extensions: []
|
121
122
|
extra_rdoc_files: []
|
@@ -126,7 +127,7 @@ files:
|
|
126
127
|
- lib/logstash/outputs/clickhouse.rb
|
127
128
|
- lib/logstash/util/shortname_resolver.rb
|
128
129
|
- logstash-output-clickhouse.gemspec
|
129
|
-
homepage:
|
130
|
+
homepage: https://github.com/maltoze/logstash-output-clickhouse
|
130
131
|
licenses:
|
131
132
|
- Apache-2.0
|
132
133
|
metadata:
|