logstash-output-clickhouse 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -2
- data/lib/logstash/outputs/clickhouse.rb +64 -61
- data/logstash-output-clickhouse.gemspec +14 -14
- metadata +9 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 05f2b2d96799587dc5678cd80a6ef5d0c62a103797a298dfbe86acb1dbe09b89
|
4
|
+
data.tar.gz: ffeb98ddb2e27497ff54e9304180a2a44e88d093ce33755f842409a2101d4153
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c6d244d35441f46a0b49b79d1f278004e634334349d3949ec8d49e96fb02e31c942d13f8be72e7ddcd9996687acf81f1378b3587cbab6c6a9a9f93eb6634a6a
|
7
|
+
data.tar.gz: 530b73e05f582bf9b9af8a3be95aa1139fd0b21c3a75b411635453fa9c5e4bc538f8fb8477ab2388fad2560570c92a3623d614c97e69f103850ef6e54d45a23a
|
data/README.md
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# I switched to vector -> https://github.com/timberio/vector.
|
2
|
-
|
3
1
|
# Logstash Plugin
|
4
2
|
|
5
3
|
This plugin is a modified version of the Lucidworks logstash json_batch. That plugin is available [here](https://github.com/lucidworks/logstash-output-json_batch).
|
@@ -19,6 +17,9 @@ Please note that the name of the plugin when used is `clickhouse`, it only suppo
|
|
19
17
|
"to1" => "from1"
|
20
18
|
"to2" => [ "from2", "(.)(.)", '\1\2' ]
|
21
19
|
}
|
20
|
+
extra_params => {
|
21
|
+
"date_time_input_format" => "best_effort"
|
22
|
+
}
|
22
23
|
}
|
23
24
|
}
|
24
25
|
|
@@ -28,6 +29,7 @@ Please note that the name of the plugin when used is `clickhouse`, it only suppo
|
|
28
29
|
* `automatic_retries` (default: 1) - number of connect retry attempts to each host in `http_hosts`
|
29
30
|
* `request_tolerance` (default: 5) - number of http request send retry attempts if response status code is not 200
|
30
31
|
* `backoff_time` (default: 3) - time to wait in seconds for next retry attempt of connect or request
|
32
|
+
* `extra_params` (default: {}) - extra parameters to be passed to the clickhouse http client
|
31
33
|
|
32
34
|
Default batch size is 50, with a wait of at most 5 seconds per send. These can be tweaked with the parameters `flush_size` and `idle_flush_time` respectively.
|
33
35
|
|
@@ -8,7 +8,6 @@ require "stud/buffer"
|
|
8
8
|
require "logstash/plugin_mixins/http_client"
|
9
9
|
require "securerandom"
|
10
10
|
|
11
|
-
|
12
11
|
class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
13
12
|
include LogStash::PluginMixins::HttpClient
|
14
13
|
include Stud::Buffer
|
@@ -20,7 +19,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
20
19
|
config :http_hosts, :validate => :array, :required => true
|
21
20
|
|
22
21
|
config :table, :validate => :string, :required => true
|
23
|
-
|
22
|
+
|
24
23
|
# Custom headers to use
|
25
24
|
# format is `headers => ["X-My-Header", "%{host}"]`
|
26
25
|
config :headers, :validate => :hash
|
@@ -38,7 +37,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
38
37
|
config :save_file, :validate => :string, :default => "failed.json"
|
39
38
|
|
40
39
|
config :request_tolerance, :validate => :number, :default => 5
|
41
|
-
|
40
|
+
|
42
41
|
config :backoff_time, :validate => :number, :default => 3
|
43
42
|
|
44
43
|
config :automatic_retries, :validate => :number, :default => 3
|
@@ -47,19 +46,21 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
47
46
|
|
48
47
|
config :host_resolve_ttl_sec, :validate => :number, :default => 120
|
49
48
|
|
49
|
+
config :extra_params, :validate => :hash, :default => {}
|
50
|
+
|
50
51
|
def print_plugin_info()
|
51
|
-
@@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-clickhouse/ }
|
52
|
+
@@plugins = Gem::Specification.find_all { |spec| spec.name =~ /logstash-output-clickhouse/ }
|
52
53
|
@plugin_name = @@plugins[0].name
|
53
54
|
@plugin_version = @@plugins[0].version
|
54
55
|
@logger.info("Running #{@plugin_name} version #{@plugin_version}")
|
55
56
|
|
56
57
|
@logger.info("Initialized clickhouse with settings",
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
:flush_size => @flush_size,
|
59
|
+
:idle_flush_time => @idle_flush_time,
|
60
|
+
:request_tokens => @pool_max,
|
61
|
+
:http_hosts => @http_hosts,
|
62
|
+
:http_query => @http_query,
|
63
|
+
:headers => request_headers)
|
63
64
|
end
|
64
65
|
|
65
66
|
def register
|
@@ -71,18 +72,20 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
71
72
|
# When this queue is empty no new requests may be sent,
|
72
73
|
# tokens must be added back by the client on success
|
73
74
|
@request_tokens = SizedQueue.new(@pool_max)
|
74
|
-
@pool_max.times {|t| @request_tokens << true }
|
75
|
+
@pool_max.times { |t| @request_tokens << true }
|
75
76
|
@requests = Array.new
|
76
|
-
|
77
|
+
|
78
|
+
params = { "query" => "INSERT INTO #{table} FORMAT JSONEachRow" }.merge(@extra_params)
|
79
|
+
@http_query = "?#{URI.encode_www_form(params)}"
|
77
80
|
|
78
81
|
@hostnames_pool =
|
79
82
|
parse_http_hosts(http_hosts,
|
80
|
-
|
83
|
+
ShortNameResolver.new(ttl: @host_resolve_ttl_sec, logger: @logger))
|
81
84
|
|
82
85
|
buffer_initialize(
|
83
86
|
:max_items => @flush_size,
|
84
87
|
:max_interval => @idle_flush_time,
|
85
|
-
:logger => @logger
|
88
|
+
:logger => @logger,
|
86
89
|
)
|
87
90
|
|
88
91
|
print_plugin_info()
|
@@ -117,7 +120,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
117
120
|
begin
|
118
121
|
@hostnames_pool.call
|
119
122
|
rescue Exception => ex
|
120
|
-
@logger.error(
|
123
|
+
@logger.error("Error while resolving host", :error => ex.to_s)
|
121
124
|
end
|
122
125
|
end
|
123
126
|
|
@@ -126,33 +129,34 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
126
129
|
buffer_receive(event)
|
127
130
|
end
|
128
131
|
|
129
|
-
def mutate(
|
132
|
+
def mutate(src)
|
130
133
|
return src if @mutations.empty?
|
131
134
|
res = {}
|
132
135
|
@mutations.each_pair do |dstkey, source|
|
133
136
|
case source
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
137
|
+
when String
|
138
|
+
scrkey = source
|
139
|
+
next unless src.key?(scrkey)
|
140
|
+
|
141
|
+
res[dstkey] = src[scrkey]
|
142
|
+
when Array
|
143
|
+
scrkey = source[0]
|
144
|
+
next unless src.key?(scrkey)
|
145
|
+
pattern = source[1]
|
146
|
+
replace = source[2]
|
147
|
+
res[dstkey] = src[scrkey].sub(Regexp.new(pattern), replace)
|
145
148
|
end
|
146
149
|
end
|
147
150
|
res
|
148
151
|
end
|
149
152
|
|
150
153
|
public
|
151
|
-
|
154
|
+
|
155
|
+
def flush(events, close = false)
|
152
156
|
documents = "" #this is the string of hashes that we push to Fusion as documents
|
153
157
|
|
154
158
|
events.each do |event|
|
155
|
-
|
159
|
+
documents << LogStash::Json.dump(mutate(event.to_hash())) << "\n"
|
156
160
|
end
|
157
161
|
|
158
162
|
hosts = get_host_addresses()
|
@@ -165,10 +169,10 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
165
169
|
def save_to_disk(documents)
|
166
170
|
begin
|
167
171
|
file = File.open("#{save_dir}/#{table}_#{save_file}", "a")
|
168
|
-
file.write(documents)
|
172
|
+
file.write(documents)
|
169
173
|
rescue IOError => e
|
170
174
|
log_failure("An error occurred while saving file to disk: #{e}",
|
171
|
-
|
175
|
+
:file_name => file_name)
|
172
176
|
ensure
|
173
177
|
file.close unless file.nil?
|
174
178
|
end
|
@@ -177,21 +181,20 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
177
181
|
def delay_attempt(attempt_number, delay)
|
178
182
|
# sleep delay grows roughly as k*x*ln(x) where k is the initial delay set in @backoff_time param
|
179
183
|
attempt = [attempt_number, 1].max
|
180
|
-
timeout = lambda { |x| [delay*x*Math.log(x), 1].max }
|
184
|
+
timeout = lambda { |x| [delay * x * Math.log(x), 1].max }
|
181
185
|
# using rand() to pick final sleep delay to reduce the risk of getting in sync with other clients writing to the DB
|
182
|
-
sleep_time = rand(timeout.call(attempt)..timeout.call(attempt+1))
|
186
|
+
sleep_time = rand(timeout.call(attempt)..timeout.call(attempt + 1))
|
183
187
|
sleep sleep_time
|
184
188
|
end
|
185
189
|
|
186
190
|
private
|
187
191
|
|
188
192
|
def make_request(documents, hosts, query, con_count = 1, req_count = 1, host = "", uuid = SecureRandom.hex)
|
189
|
-
|
190
193
|
if host == ""
|
191
194
|
host = hosts.pop
|
192
195
|
end
|
193
196
|
|
194
|
-
url = host+query
|
197
|
+
url = host + query
|
195
198
|
|
196
199
|
# Block waiting for a token
|
197
200
|
#@logger.info("Requesting token ", :tokens => request_tokens.length())
|
@@ -210,25 +213,26 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
210
213
|
@request_tokens << token
|
211
214
|
|
212
215
|
if response.code == 200
|
213
|
-
@logger.debug("Successfully submitted",
|
214
|
-
|
215
|
-
|
216
|
-
|
216
|
+
@logger.debug("Successfully submitted",
|
217
|
+
:size => documents.length,
|
218
|
+
:response_code => response.code,
|
219
|
+
:uuid => uuid)
|
217
220
|
else
|
218
221
|
if req_count >= @request_tolerance
|
219
222
|
log_failure(
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
223
|
+
"Encountered non-200 HTTP code #{response.code}",
|
224
|
+
:response_code => response.code,
|
225
|
+
:url => url,
|
226
|
+
:size => documents.length,
|
227
|
+
:uuid => uuid,
|
228
|
+
)
|
225
229
|
if @save_on_failure
|
226
230
|
save_to_disk(documents)
|
227
231
|
end
|
228
232
|
else
|
229
233
|
@logger.info("Retrying request", :url => url, :message => response.message, :response => response.body, :uuid => uuid)
|
230
234
|
delay_attempt(req_count, @backoff_time)
|
231
|
-
make_request(documents, hosts, query, con_count, req_count+1, host, uuid)
|
235
|
+
make_request(documents, hosts, query, con_count, req_count + 1, host, uuid)
|
232
236
|
end
|
233
237
|
end
|
234
238
|
end
|
@@ -238,21 +242,21 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
238
242
|
@request_tokens << token
|
239
243
|
|
240
244
|
if hosts.length == 0
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
245
|
+
log_failure("Could not access URL",
|
246
|
+
:url => url,
|
247
|
+
:method => @http_method,
|
248
|
+
:headers => headers,
|
249
|
+
:message => exception.message,
|
250
|
+
:class => exception.class.name,
|
251
|
+
:backtrace => exception.backtrace,
|
252
|
+
:size => documents.length,
|
253
|
+
:uuid => uuid)
|
254
|
+
if @save_on_failure
|
255
|
+
save_to_disk(documents)
|
256
|
+
end
|
257
|
+
return
|
254
258
|
end
|
255
|
-
|
259
|
+
|
256
260
|
if con_count >= @automatic_retries
|
257
261
|
host = ""
|
258
262
|
con_count = 0
|
@@ -260,7 +264,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
260
264
|
|
261
265
|
@logger.info("Retrying connection", :url => url, :uuid => uuid)
|
262
266
|
delay_attempt(con_count, @backoff_time)
|
263
|
-
make_request(documents, hosts, query, con_count+1, req_count, host, uuid)
|
267
|
+
make_request(documents, hosts, query, con_count + 1, req_count, host, uuid)
|
264
268
|
end
|
265
269
|
|
266
270
|
client.execute!
|
@@ -276,5 +280,4 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
|
|
276
280
|
headers["Content-Type"] ||= "application/json"
|
277
281
|
headers
|
278
282
|
end
|
279
|
-
|
280
283
|
end
|
@@ -1,16 +1,16 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
|
-
s.name
|
3
|
-
s.version
|
4
|
-
s.licenses
|
5
|
-
s.summary
|
6
|
-
s.description
|
7
|
-
s.authors
|
8
|
-
s.email
|
9
|
-
s.homepage
|
2
|
+
s.name = "logstash-output-clickhouse"
|
3
|
+
s.version = "0.1.3"
|
4
|
+
s.licenses = ["Apache-2.0"]
|
5
|
+
s.summary = "This output lets you `POST` messages as JSON in a batched fashion to ClickHouse HTTP endpoint"
|
6
|
+
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
7
|
+
s.authors = ["kmajk", "maltoze"]
|
8
|
+
s.email = "atonezzz@gmail.com"
|
9
|
+
s.homepage = "https://github.com/maltoze/logstash-output-clickhouse"
|
10
10
|
s.require_paths = ["lib"]
|
11
11
|
|
12
12
|
# Files
|
13
|
-
s.files = Dir[
|
13
|
+
s.files = Dir["lib/**/*", "spec/**/*", "*.gemspec", "*.md", "Gemfile", "LICENSE"]
|
14
14
|
|
15
15
|
# Tests
|
16
16
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
@@ -20,10 +20,10 @@ Gem::Specification.new do |s|
|
|
20
20
|
|
21
21
|
# Gem dependencies
|
22
22
|
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
23
|
-
s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "
|
24
|
-
s.add_runtime_dependency
|
23
|
+
s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "< 8.0.0"
|
24
|
+
s.add_runtime_dependency "mini_cache", ">= 1.0.0", "< 2.0.0"
|
25
25
|
|
26
|
-
s.add_development_dependency
|
27
|
-
s.add_development_dependency
|
28
|
-
s.add_development_dependency
|
26
|
+
s.add_development_dependency "logstash-devutils"
|
27
|
+
s.add_development_dependency "sinatra"
|
28
|
+
s.add_development_dependency "webrick"
|
29
29
|
end
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-output-clickhouse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kmajk
|
8
|
+
- maltoze
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2022-07-15 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: logstash-core-plugin-api
|
@@ -37,9 +38,9 @@ dependencies:
|
|
37
38
|
- - ">="
|
38
39
|
- !ruby/object:Gem::Version
|
39
40
|
version: 6.0.0
|
40
|
-
- - "
|
41
|
+
- - "<"
|
41
42
|
- !ruby/object:Gem::Version
|
42
|
-
version:
|
43
|
+
version: 8.0.0
|
43
44
|
type: :runtime
|
44
45
|
prerelease: false
|
45
46
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -47,9 +48,9 @@ dependencies:
|
|
47
48
|
- - ">="
|
48
49
|
- !ruby/object:Gem::Version
|
49
50
|
version: 6.0.0
|
50
|
-
- - "
|
51
|
+
- - "<"
|
51
52
|
- !ruby/object:Gem::Version
|
52
|
-
version:
|
53
|
+
version: 8.0.0
|
53
54
|
- !ruby/object:Gem::Dependency
|
54
55
|
name: mini_cache
|
55
56
|
requirement: !ruby/object:Gem::Requirement
|
@@ -115,7 +116,7 @@ dependencies:
|
|
115
116
|
description: This gem is a logstash plugin required to be installed on top of the
|
116
117
|
Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
|
117
118
|
a stand-alone program
|
118
|
-
email:
|
119
|
+
email: atonezzz@gmail.com
|
119
120
|
executables: []
|
120
121
|
extensions: []
|
121
122
|
extra_rdoc_files: []
|
@@ -126,7 +127,7 @@ files:
|
|
126
127
|
- lib/logstash/outputs/clickhouse.rb
|
127
128
|
- lib/logstash/util/shortname_resolver.rb
|
128
129
|
- logstash-output-clickhouse.gemspec
|
129
|
-
homepage:
|
130
|
+
homepage: https://github.com/maltoze/logstash-output-clickhouse
|
130
131
|
licenses:
|
131
132
|
- Apache-2.0
|
132
133
|
metadata:
|