logstash-output-clickhouse 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1fbf922d44fe511cc743b7fd7c86ad3ea3de2ed8ed8afc3bbe549eb7dace1b03
4
+ data.tar.gz: 6ead7854b8ef425526d9617a3e178c9ff7298eb57fac8f6a84b04c75e9873f08
5
+ SHA512:
6
+ metadata.gz: c5f5f85f5dc3e2f188cac06a78289e269d6b47a114fec1b2defd37fa343ce3af6cf1ace1a85d8fd5e323dd61866ad944ea2b5d45d6cea1f23f1c797997f24007
7
+ data.tar.gz: 88e06fa018fc126c056cfc4a8c8c6237d5921d10449aa1878e506746f77cddc8b3db88dc73f34f5a631c7fd273ca95274a4fc44d348603514d123307f9150d22
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright 2017 GetResponse.com
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # I switched to vector -> https://github.com/timberio/vector.
2
+
3
+ # Logstash Plugin
4
+
5
+ This plugin is a modified version of the Lucidworks logstash json_batch. That plugin is available [here](https://github.com/lucidworks/logstash-output-json_batch).
6
+
7
+ It has been modified to support ClickHouse JSON Format, but also supports fault tolerance.
8
+
9
+ # Usage
10
+
11
+ Please note that the name of the plugin when used is `clickhouse`, it only supports json in its current form. If further output formats are added in the future, this might change back to json_batch.
12
+
13
+ output {
14
+ clickhouse {
15
+ headers => ["Authorization", "Basic YWRtaW46cGFzc3dvcmQxMjM="]
16
+ http_hosts => ["http://your.clickhouse1/", "http://your.clickhouse2/", "http://your.clickhouse3/"]
17
+ table => "table_name"
18
+ mutations => {
19
+ "to1" => "from1"
20
+ "to2" => [ "from2", "(.)(.)", '\1\2' ]
21
+ }
22
+ }
23
+ }
24
+
25
+ ## Other custom options
26
+ * `save_on_failure` (default: true) - enable / disable request body save on failure
27
+ * `save_dir` (default: /tmp) - directory where failed request body will be saved
28
+ * `automatic_retries` (default: 1) - number of connect retry attempts to each host in `http_hosts`
29
+ * `request_tolerance` (default: 5) - number of http request send retry attempts if response status code is not 200
30
+ * `backoff_time` (default: 3) - time to wait in seconds for next retry attempt of connect or request
31
+
32
+ Default batch size is 50, with a wait of at most 5 seconds per send. These can be tweaked with the parameters `flush_size` and `idle_flush_time` respectively.
33
+
34
+ # Installation
35
+
36
+ The easiest way to use this plugin is by installing it through rubygems like any other logstash plugin. To get the latest versio installed, you should run the following command: `bin/logstash-plugin install logstash-output-clickhouse`
37
+
38
+ # Building the gem and installing a local version
39
+
40
+ To build the gem yourself, use `gem build logstash-output-clickhouse.gemspec` in the root of this repository. Alternatively, you can download a built version of the gem from the `dist` branch of this repository.
41
+
42
+ To install, run the following command, assuming the gem is in the local directory: `$LOGSTASH_HOME/bin/plugin install logstash-output-clickhouse-X.Y.Z.gem`
43
+
@@ -0,0 +1,280 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/json"
5
+ require "logstash/util/shortname_resolver"
6
+ require "uri"
7
+ require "stud/buffer"
8
+ require "logstash/plugin_mixins/http_client"
9
+ require "securerandom"
10
+
11
+
12
+ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
13
+ include LogStash::PluginMixins::HttpClient
14
+ include Stud::Buffer
15
+
16
+ concurrency :single
17
+
18
+ config_name "clickhouse"
19
+
20
+ config :http_hosts, :validate => :array, :required => true
21
+
22
+ config :table, :validate => :string, :required => true
23
+
24
+ # Custom headers to use
25
+ # format is `headers => ["X-My-Header", "%{host}"]`
26
+ config :headers, :validate => :hash
27
+
28
+ config :flush_size, :validate => :number, :default => 50
29
+
30
+ config :idle_flush_time, :validate => :number, :default => 5
31
+
32
+ config :pool_max, :validate => :number, :default => 50
33
+
34
+ config :save_on_failure, :validate => :boolean, :default => true
35
+
36
+ config :save_dir, :validate => :string, :default => "/tmp"
37
+
38
+ config :save_file, :validate => :string, :default => "failed.json"
39
+
40
+ config :request_tolerance, :validate => :number, :default => 5
41
+
42
+ config :backoff_time, :validate => :number, :default => 3
43
+
44
+ config :automatic_retries, :validate => :number, :default => 3
45
+
46
+ config :mutations, :validate => :hash, :default => {}
47
+
48
+ config :host_resolve_ttl_sec, :validate => :number, :default => 120
49
+
50
+ def print_plugin_info()
51
+ @@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-clickhouse/ }
52
+ @plugin_name = @@plugins[0].name
53
+ @plugin_version = @@plugins[0].version
54
+ @logger.info("Running #{@plugin_name} version #{@plugin_version}")
55
+
56
+ @logger.info("Initialized clickhouse with settings",
57
+ :flush_size => @flush_size,
58
+ :idle_flush_time => @idle_flush_time,
59
+ :request_tokens => @pool_max,
60
+ :http_hosts => @http_hosts,
61
+ :http_query => @http_query,
62
+ :headers => request_headers)
63
+ end
64
+
65
+ def register
66
+ # Handle this deprecated option. TODO: remove the option
67
+ #@ssl_certificate_validation = @verify_ssl if @verify_ssl
68
+
69
+ # We count outstanding requests with this queue
70
+ # This queue tracks the requests to create backpressure
71
+ # When this queue is empty no new requests may be sent,
72
+ # tokens must be added back by the client on success
73
+ @request_tokens = SizedQueue.new(@pool_max)
74
+ @pool_max.times {|t| @request_tokens << true }
75
+ @requests = Array.new
76
+ @http_query = "/?query=INSERT%20INTO%20#{table}%20FORMAT%20JSONEachRow"
77
+
78
+ @hostnames_pool =
79
+ parse_http_hosts(http_hosts,
80
+ ShortNameResolver.new(ttl: @host_resolve_ttl_sec, logger: @logger))
81
+
82
+ buffer_initialize(
83
+ :max_items => @flush_size,
84
+ :max_interval => @idle_flush_time,
85
+ :logger => @logger
86
+ )
87
+
88
+ print_plugin_info()
89
+ end # def register
90
+
91
+ private
92
+
93
+ def parse_http_hosts(hosts, resolver)
94
+ ip_re = /^[\d]+\.[\d]+\.[\d]+\.[\d]+$/
95
+
96
+ lambda {
97
+ hosts.flat_map { |h|
98
+ scheme = URI(h).scheme
99
+ host = URI(h).host
100
+ port = URI(h).port
101
+ path = URI(h).path
102
+
103
+ if ip_re !~ host
104
+ resolver.get_addresses(host).map { |ip|
105
+ "#{scheme}://#{ip}:#{port}#{path}"
106
+ }
107
+ else
108
+ [h]
109
+ end
110
+ }
111
+ }
112
+ end
113
+
114
+ private
115
+
116
+ def get_host_addresses()
117
+ begin
118
+ @hostnames_pool.call
119
+ rescue Exception => ex
120
+ @logger.error('Error while resolving host', :error => ex.to_s)
121
+ end
122
+ end
123
+
124
+ # This module currently does not support parallel requests as that would circumvent the batching
125
+ def receive(event)
126
+ buffer_receive(event)
127
+ end
128
+
129
+ def mutate( src )
130
+ return src if @mutations.empty?
131
+ res = {}
132
+ @mutations.each_pair do |dstkey, source|
133
+ case source
134
+ when String then
135
+ scrkey = source
136
+ next unless src.key?(scrkey)
137
+
138
+ res[dstkey] = src[scrkey]
139
+ when Array then
140
+ scrkey = source[0]
141
+ next unless src.key?(scrkey)
142
+ pattern = source[1]
143
+ replace = source[2]
144
+ res[dstkey] = src[scrkey].sub( Regexp.new(pattern), replace )
145
+ end
146
+ end
147
+ res
148
+ end
149
+
150
+ public
151
+ def flush(events, close=false)
152
+ documents = "" #this is the string of hashes that we push to Fusion as documents
153
+
154
+ events.each do |event|
155
+ documents << LogStash::Json.dump( mutate( event.to_hash() ) ) << "\n"
156
+ end
157
+
158
+ hosts = get_host_addresses()
159
+
160
+ make_request(documents, hosts, @http_query, 1, 1, hosts.sample)
161
+ end
162
+
163
+ private
164
+
165
+ def save_to_disk(documents)
166
+ begin
167
+ file = File.open("#{save_dir}/#{table}_#{save_file}", "a")
168
+ file.write(documents)
169
+ rescue IOError => e
170
+ log_failure("An error occurred while saving file to disk: #{e}",
171
+ :file_name => file_name)
172
+ ensure
173
+ file.close unless file.nil?
174
+ end
175
+ end
176
+
177
+ def delay_attempt(attempt_number, delay)
178
+ # sleep delay grows roughly as k*x*ln(x) where k is the initial delay set in @backoff_time param
179
+ attempt = [attempt_number, 1].max
180
+ timeout = lambda { |x| [delay*x*Math.log(x), 1].max }
181
+ # using rand() to pick final sleep delay to reduce the risk of getting in sync with other clients writing to the DB
182
+ sleep_time = rand(timeout.call(attempt)..timeout.call(attempt+1))
183
+ sleep sleep_time
184
+ end
185
+
186
+ private
187
+
188
+ def make_request(documents, hosts, query, con_count = 1, req_count = 1, host = "", uuid = SecureRandom.hex)
189
+
190
+ if host == ""
191
+ host = hosts.pop
192
+ end
193
+
194
+ url = host+query
195
+
196
+ # Block waiting for a token
197
+ #@logger.info("Requesting token ", :tokens => request_tokens.length())
198
+ token = @request_tokens.pop
199
+ @logger.debug("Got token", :tokens => @request_tokens.length)
200
+
201
+ # Create an async request
202
+ begin
203
+ request = client.send(:post, url, :body => documents, :headers => request_headers, :async => true)
204
+ rescue Exception => e
205
+ @logger.warn("An error occurred while indexing: #{e.message}")
206
+ end
207
+
208
+ request.on_success do |response|
209
+ # Make sure we return the token to the pool
210
+ @request_tokens << token
211
+
212
+ if response.code == 200
213
+ @logger.debug("Successfully submitted",
214
+ :size => documents.length,
215
+ :response_code => response.code,
216
+ :uuid => uuid)
217
+ else
218
+ if req_count >= @request_tolerance
219
+ log_failure(
220
+ "Encountered non-200 HTTP code #{response.code}",
221
+ :response_code => response.code,
222
+ :url => url,
223
+ :size => documents.length,
224
+ :uuid => uuid)
225
+ if @save_on_failure
226
+ save_to_disk(documents)
227
+ end
228
+ else
229
+ @logger.info("Retrying request", :url => url, :message => response.message, :response => response.body, :uuid => uuid)
230
+ delay_attempt(req_count, @backoff_time)
231
+ make_request(documents, hosts, query, con_count, req_count+1, host, uuid)
232
+ end
233
+ end
234
+ end
235
+
236
+ request.on_failure do |exception|
237
+ # Make sure we return the token to the pool
238
+ @request_tokens << token
239
+
240
+ if hosts.length == 0
241
+ log_failure("Could not access URL",
242
+ :url => url,
243
+ :method => @http_method,
244
+ :headers => headers,
245
+ :message => exception.message,
246
+ :class => exception.class.name,
247
+ :backtrace => exception.backtrace,
248
+ :size => documents.length,
249
+ :uuid => uuid)
250
+ if @save_on_failure
251
+ save_to_disk(documents)
252
+ end
253
+ return
254
+ end
255
+
256
+ if con_count >= @automatic_retries
257
+ host = ""
258
+ con_count = 0
259
+ end
260
+
261
+ @logger.info("Retrying connection", :url => url, :uuid => uuid)
262
+ delay_attempt(con_count, @backoff_time)
263
+ make_request(documents, hosts, query, con_count+1, req_count, host, uuid)
264
+ end
265
+
266
+ client.execute!
267
+ end
268
+
269
+ # This is split into a separate method mostly to help testing
270
+ def log_failure(message, opts)
271
+ @logger.error("[HTTP Output Failure] #{message}", opts)
272
+ end
273
+
274
+ def request_headers()
275
+ headers = @headers || {}
276
+ headers["Content-Type"] ||= "application/json"
277
+ headers
278
+ end
279
+
280
+ end
@@ -0,0 +1,40 @@
1
+ require 'resolv'
2
+ require 'mini_cache'
3
+
4
+ class ShortNameResolver
5
+ def initialize(ttl:, logger:)
6
+ @ttl = ttl
7
+ @store = MiniCache::Store.new
8
+ @logger = logger
9
+ end
10
+
11
+ private
12
+ def resolve_cached(shortname)
13
+ @store.get_or_set(shortname) do
14
+ addresses = resolve(shortname)
15
+ raise "Bad shortname '#{shortname}'" if addresses.empty?
16
+ MiniCache::Data.new(addresses, expires_in: @ttl)
17
+ end
18
+ end
19
+
20
+ private
21
+ def resolve(shortname)
22
+ addresses = Resolv::DNS.open do |dns|
23
+ dns.getaddresses(shortname).map { |r| r.to_s }
24
+ end
25
+
26
+ @logger.info("Resolved shortname '#{shortname}' to addresses #{addresses}")
27
+
28
+ return addresses
29
+ end
30
+
31
+ public
32
+ def get_address(shortname)
33
+ return resolve_cached(shortname).sample
34
+ end
35
+
36
+ public
37
+ def get_addresses(shortname)
38
+ return resolve_cached(shortname)
39
+ end
40
+ end
@@ -0,0 +1,29 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-output-clickhouse'
3
+ s.version = '0.1.1'
4
+ s.licenses = ['Apache-2.0']
5
+ s.summary = "This output lets you `POST` messages as JSON in a batched fashion to ClickHouse HTTP endpoint"
6
+ s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
7
+ s.authors = ["kmajk"]
8
+ s.email = 'k.majk@getresponse.com'
9
+ s.homepage = "http://getresponse.com"
10
+ s.require_paths = ["lib"]
11
+
12
+ # Files
13
+ s.files = Dir['lib/**/*','spec/**/*','*.gemspec','*.md','Gemfile','LICENSE' ]
14
+
15
+ # Tests
16
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
17
+
18
+ # Special flag to let us know this is actually a logstash plugin
19
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
20
+
21
+ # Gem dependencies
22
+ s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
23
+ s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "<= 7.0.0"
24
+ s.add_runtime_dependency 'mini_cache', ">= 1.0.0", "< 2.0.0"
25
+
26
+ s.add_development_dependency 'logstash-devutils'
27
+ s.add_development_dependency 'sinatra'
28
+ s.add_development_dependency 'webrick'
29
+ end
metadata ADDED
@@ -0,0 +1,156 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-output-clickhouse
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - kmajk
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-08-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash-core-plugin-api
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '1.60'
20
+ - - "<="
21
+ - !ruby/object:Gem::Version
22
+ version: '2.99'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '1.60'
30
+ - - "<="
31
+ - !ruby/object:Gem::Version
32
+ version: '2.99'
33
+ - !ruby/object:Gem::Dependency
34
+ name: logstash-mixin-http_client
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: 6.0.0
40
+ - - "<="
41
+ - !ruby/object:Gem::Version
42
+ version: 7.0.0
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 6.0.0
50
+ - - "<="
51
+ - !ruby/object:Gem::Version
52
+ version: 7.0.0
53
+ - !ruby/object:Gem::Dependency
54
+ name: mini_cache
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 1.0.0
60
+ - - "<"
61
+ - !ruby/object:Gem::Version
62
+ version: 2.0.0
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: 1.0.0
70
+ - - "<"
71
+ - !ruby/object:Gem::Version
72
+ version: 2.0.0
73
+ - !ruby/object:Gem::Dependency
74
+ name: logstash-devutils
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ type: :development
81
+ prerelease: false
82
+ version_requirements: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ - !ruby/object:Gem::Dependency
88
+ name: sinatra
89
+ requirement: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ type: :development
95
+ prerelease: false
96
+ version_requirements: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ - !ruby/object:Gem::Dependency
102
+ name: webrick
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ type: :development
109
+ prerelease: false
110
+ version_requirements: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ description: This gem is a logstash plugin required to be installed on top of the
116
+ Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
117
+ a stand-alone program
118
+ email: k.majk@getresponse.com
119
+ executables: []
120
+ extensions: []
121
+ extra_rdoc_files: []
122
+ files:
123
+ - Gemfile
124
+ - LICENSE
125
+ - README.md
126
+ - lib/logstash/outputs/clickhouse.rb
127
+ - lib/logstash/util/shortname_resolver.rb
128
+ - logstash-output-clickhouse.gemspec
129
+ homepage: http://getresponse.com
130
+ licenses:
131
+ - Apache-2.0
132
+ metadata:
133
+ logstash_plugin: 'true'
134
+ logstash_group: output
135
+ post_install_message:
136
+ rdoc_options: []
137
+ require_paths:
138
+ - lib
139
+ required_ruby_version: !ruby/object:Gem::Requirement
140
+ requirements:
141
+ - - ">="
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ required_rubygems_version: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ version: '0'
149
+ requirements: []
150
+ rubyforge_project:
151
+ rubygems_version: 2.7.6
152
+ signing_key:
153
+ specification_version: 4
154
+ summary: This output lets you `POST` messages as JSON in a batched fashion to ClickHouse
155
+ HTTP endpoint
156
+ test_files: []