logstash-output-clickhouse 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1fbf922d44fe511cc743b7fd7c86ad3ea3de2ed8ed8afc3bbe549eb7dace1b03
4
+ data.tar.gz: 6ead7854b8ef425526d9617a3e178c9ff7298eb57fac8f6a84b04c75e9873f08
5
+ SHA512:
6
+ metadata.gz: c5f5f85f5dc3e2f188cac06a78289e269d6b47a114fec1b2defd37fa343ce3af6cf1ace1a85d8fd5e323dd61866ad944ea2b5d45d6cea1f23f1c797997f24007
7
+ data.tar.gz: 88e06fa018fc126c056cfc4a8c8c6237d5921d10449aa1878e506746f77cddc8b3db88dc73f34f5a631c7fd273ca95274a4fc44d348603514d123307f9150d22
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright 2017 GetResponse.com
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # I switched to vector -> https://github.com/timberio/vector.
2
+
3
+ # Logstash Plugin
4
+
5
+ This plugin is a modified version of the Lucidworks logstash json_batch. That plugin is available [here](https://github.com/lucidworks/logstash-output-json_batch).
6
+
7
+ It has been modified to support ClickHouse JSON Format, but also supports fault tolerance.
8
+
9
+ # Usage
10
+
11
+ Please note that the name of the plugin when used is `clickhouse`, it only supports json in its current form. If further output formats are added in the future, this might change back to json_batch.
12
+
13
+ output {
14
+ clickhouse {
15
+ headers => ["Authorization", "Basic YWRtaW46cGFzc3dvcmQxMjM="]
16
+ http_hosts => ["http://your.clickhouse1/", "http://your.clickhouse2/", "http://your.clickhouse3/"]
17
+ table => "table_name"
18
+ mutations => {
19
+ "to1" => "from1"
20
+ "to2" => [ "from2", "(.)(.)", '\1\2' ]
21
+ }
22
+ }
23
+ }
24
+
25
+ ## Other custom options
26
+ * `save_on_failure` (default: true) - enable / disable request body save on failure
27
+ * `save_dir` (default: /tmp) - directory where failed request body will be saved
28
+ * `automatic_retries` (default: 1) - number of connect retry attempts to each host in `http_hosts`
29
+ * `request_tolerance` (default: 5) - number of http request send retry attempts if response status code is not 200
30
+ * `backoff_time` (default: 3) - time to wait in seconds for next retry attempt of connect or request
31
+
32
+ Default batch size is 50, with a wait of at most 5 seconds per send. These can be tweaked with the parameters `flush_size` and `idle_flush_time` respectively.
33
+
34
+ # Installation
35
+
36
+ The easiest way to use this plugin is by installing it through rubygems like any other logstash plugin. To get the latest versio installed, you should run the following command: `bin/logstash-plugin install logstash-output-clickhouse`
37
+
38
+ # Building the gem and installing a local version
39
+
40
+ To build the gem yourself, use `gem build logstash-output-clickhouse.gemspec` in the root of this repository. Alternatively, you can download a built version of the gem from the `dist` branch of this repository.
41
+
42
+ To install, run the following command, assuming the gem is in the local directory: `$LOGSTASH_HOME/bin/plugin install logstash-output-clickhouse-X.Y.Z.gem`
43
+
@@ -0,0 +1,280 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/json"
5
+ require "logstash/util/shortname_resolver"
6
+ require "uri"
7
+ require "stud/buffer"
8
+ require "logstash/plugin_mixins/http_client"
9
+ require "securerandom"
10
+
11
+
12
+ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
13
+ include LogStash::PluginMixins::HttpClient
14
+ include Stud::Buffer
15
+
16
+ concurrency :single
17
+
18
+ config_name "clickhouse"
19
+
20
+ config :http_hosts, :validate => :array, :required => true
21
+
22
+ config :table, :validate => :string, :required => true
23
+
24
+ # Custom headers to use
25
+ # format is `headers => ["X-My-Header", "%{host}"]`
26
+ config :headers, :validate => :hash
27
+
28
+ config :flush_size, :validate => :number, :default => 50
29
+
30
+ config :idle_flush_time, :validate => :number, :default => 5
31
+
32
+ config :pool_max, :validate => :number, :default => 50
33
+
34
+ config :save_on_failure, :validate => :boolean, :default => true
35
+
36
+ config :save_dir, :validate => :string, :default => "/tmp"
37
+
38
+ config :save_file, :validate => :string, :default => "failed.json"
39
+
40
+ config :request_tolerance, :validate => :number, :default => 5
41
+
42
+ config :backoff_time, :validate => :number, :default => 3
43
+
44
+ config :automatic_retries, :validate => :number, :default => 3
45
+
46
+ config :mutations, :validate => :hash, :default => {}
47
+
48
+ config :host_resolve_ttl_sec, :validate => :number, :default => 120
49
+
50
+ def print_plugin_info()
51
+ @@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-clickhouse/ }
52
+ @plugin_name = @@plugins[0].name
53
+ @plugin_version = @@plugins[0].version
54
+ @logger.info("Running #{@plugin_name} version #{@plugin_version}")
55
+
56
+ @logger.info("Initialized clickhouse with settings",
57
+ :flush_size => @flush_size,
58
+ :idle_flush_time => @idle_flush_time,
59
+ :request_tokens => @pool_max,
60
+ :http_hosts => @http_hosts,
61
+ :http_query => @http_query,
62
+ :headers => request_headers)
63
+ end
64
+
65
+ def register
66
+ # Handle this deprecated option. TODO: remove the option
67
+ #@ssl_certificate_validation = @verify_ssl if @verify_ssl
68
+
69
+ # We count outstanding requests with this queue
70
+ # This queue tracks the requests to create backpressure
71
+ # When this queue is empty no new requests may be sent,
72
+ # tokens must be added back by the client on success
73
+ @request_tokens = SizedQueue.new(@pool_max)
74
+ @pool_max.times {|t| @request_tokens << true }
75
+ @requests = Array.new
76
+ @http_query = "/?query=INSERT%20INTO%20#{table}%20FORMAT%20JSONEachRow"
77
+
78
+ @hostnames_pool =
79
+ parse_http_hosts(http_hosts,
80
+ ShortNameResolver.new(ttl: @host_resolve_ttl_sec, logger: @logger))
81
+
82
+ buffer_initialize(
83
+ :max_items => @flush_size,
84
+ :max_interval => @idle_flush_time,
85
+ :logger => @logger
86
+ )
87
+
88
+ print_plugin_info()
89
+ end # def register
90
+
91
+ private
92
+
93
+ def parse_http_hosts(hosts, resolver)
94
+ ip_re = /^[\d]+\.[\d]+\.[\d]+\.[\d]+$/
95
+
96
+ lambda {
97
+ hosts.flat_map { |h|
98
+ scheme = URI(h).scheme
99
+ host = URI(h).host
100
+ port = URI(h).port
101
+ path = URI(h).path
102
+
103
+ if ip_re !~ host
104
+ resolver.get_addresses(host).map { |ip|
105
+ "#{scheme}://#{ip}:#{port}#{path}"
106
+ }
107
+ else
108
+ [h]
109
+ end
110
+ }
111
+ }
112
+ end
113
+
114
+ private
115
+
116
+ def get_host_addresses()
117
+ begin
118
+ @hostnames_pool.call
119
+ rescue Exception => ex
120
+ @logger.error('Error while resolving host', :error => ex.to_s)
121
+ end
122
+ end
123
+
124
+ # This module currently does not support parallel requests as that would circumvent the batching
125
+ def receive(event)
126
+ buffer_receive(event)
127
+ end
128
+
129
+ def mutate( src )
130
+ return src if @mutations.empty?
131
+ res = {}
132
+ @mutations.each_pair do |dstkey, source|
133
+ case source
134
+ when String then
135
+ scrkey = source
136
+ next unless src.key?(scrkey)
137
+
138
+ res[dstkey] = src[scrkey]
139
+ when Array then
140
+ scrkey = source[0]
141
+ next unless src.key?(scrkey)
142
+ pattern = source[1]
143
+ replace = source[2]
144
+ res[dstkey] = src[scrkey].sub( Regexp.new(pattern), replace )
145
+ end
146
+ end
147
+ res
148
+ end
149
+
150
+ public
151
+ def flush(events, close=false)
152
+ documents = "" #this is the string of hashes that we push to Fusion as documents
153
+
154
+ events.each do |event|
155
+ documents << LogStash::Json.dump( mutate( event.to_hash() ) ) << "\n"
156
+ end
157
+
158
+ hosts = get_host_addresses()
159
+
160
+ make_request(documents, hosts, @http_query, 1, 1, hosts.sample)
161
+ end
162
+
163
+ private
164
+
165
+ def save_to_disk(documents)
166
+ begin
167
+ file = File.open("#{save_dir}/#{table}_#{save_file}", "a")
168
+ file.write(documents)
169
+ rescue IOError => e
170
+ log_failure("An error occurred while saving file to disk: #{e}",
171
+ :file_name => file_name)
172
+ ensure
173
+ file.close unless file.nil?
174
+ end
175
+ end
176
+
177
+ def delay_attempt(attempt_number, delay)
178
+ # sleep delay grows roughly as k*x*ln(x) where k is the initial delay set in @backoff_time param
179
+ attempt = [attempt_number, 1].max
180
+ timeout = lambda { |x| [delay*x*Math.log(x), 1].max }
181
+ # using rand() to pick final sleep delay to reduce the risk of getting in sync with other clients writing to the DB
182
+ sleep_time = rand(timeout.call(attempt)..timeout.call(attempt+1))
183
+ sleep sleep_time
184
+ end
185
+
186
+ private
187
+
188
+ def make_request(documents, hosts, query, con_count = 1, req_count = 1, host = "", uuid = SecureRandom.hex)
189
+
190
+ if host == ""
191
+ host = hosts.pop
192
+ end
193
+
194
+ url = host+query
195
+
196
+ # Block waiting for a token
197
+ #@logger.info("Requesting token ", :tokens => request_tokens.length())
198
+ token = @request_tokens.pop
199
+ @logger.debug("Got token", :tokens => @request_tokens.length)
200
+
201
+ # Create an async request
202
+ begin
203
+ request = client.send(:post, url, :body => documents, :headers => request_headers, :async => true)
204
+ rescue Exception => e
205
+ @logger.warn("An error occurred while indexing: #{e.message}")
206
+ end
207
+
208
+ request.on_success do |response|
209
+ # Make sure we return the token to the pool
210
+ @request_tokens << token
211
+
212
+ if response.code == 200
213
+ @logger.debug("Successfully submitted",
214
+ :size => documents.length,
215
+ :response_code => response.code,
216
+ :uuid => uuid)
217
+ else
218
+ if req_count >= @request_tolerance
219
+ log_failure(
220
+ "Encountered non-200 HTTP code #{response.code}",
221
+ :response_code => response.code,
222
+ :url => url,
223
+ :size => documents.length,
224
+ :uuid => uuid)
225
+ if @save_on_failure
226
+ save_to_disk(documents)
227
+ end
228
+ else
229
+ @logger.info("Retrying request", :url => url, :message => response.message, :response => response.body, :uuid => uuid)
230
+ delay_attempt(req_count, @backoff_time)
231
+ make_request(documents, hosts, query, con_count, req_count+1, host, uuid)
232
+ end
233
+ end
234
+ end
235
+
236
+ request.on_failure do |exception|
237
+ # Make sure we return the token to the pool
238
+ @request_tokens << token
239
+
240
+ if hosts.length == 0
241
+ log_failure("Could not access URL",
242
+ :url => url,
243
+ :method => @http_method,
244
+ :headers => headers,
245
+ :message => exception.message,
246
+ :class => exception.class.name,
247
+ :backtrace => exception.backtrace,
248
+ :size => documents.length,
249
+ :uuid => uuid)
250
+ if @save_on_failure
251
+ save_to_disk(documents)
252
+ end
253
+ return
254
+ end
255
+
256
+ if con_count >= @automatic_retries
257
+ host = ""
258
+ con_count = 0
259
+ end
260
+
261
+ @logger.info("Retrying connection", :url => url, :uuid => uuid)
262
+ delay_attempt(con_count, @backoff_time)
263
+ make_request(documents, hosts, query, con_count+1, req_count, host, uuid)
264
+ end
265
+
266
+ client.execute!
267
+ end
268
+
269
+ # This is split into a separate method mostly to help testing
270
+ def log_failure(message, opts)
271
+ @logger.error("[HTTP Output Failure] #{message}", opts)
272
+ end
273
+
274
+ def request_headers()
275
+ headers = @headers || {}
276
+ headers["Content-Type"] ||= "application/json"
277
+ headers
278
+ end
279
+
280
+ end
@@ -0,0 +1,40 @@
1
+ require 'resolv'
2
+ require 'mini_cache'
3
+
4
+ class ShortNameResolver
5
+ def initialize(ttl:, logger:)
6
+ @ttl = ttl
7
+ @store = MiniCache::Store.new
8
+ @logger = logger
9
+ end
10
+
11
+ private
12
+ def resolve_cached(shortname)
13
+ @store.get_or_set(shortname) do
14
+ addresses = resolve(shortname)
15
+ raise "Bad shortname '#{shortname}'" if addresses.empty?
16
+ MiniCache::Data.new(addresses, expires_in: @ttl)
17
+ end
18
+ end
19
+
20
+ private
21
+ def resolve(shortname)
22
+ addresses = Resolv::DNS.open do |dns|
23
+ dns.getaddresses(shortname).map { |r| r.to_s }
24
+ end
25
+
26
+ @logger.info("Resolved shortname '#{shortname}' to addresses #{addresses}")
27
+
28
+ return addresses
29
+ end
30
+
31
+ public
32
+ def get_address(shortname)
33
+ return resolve_cached(shortname).sample
34
+ end
35
+
36
+ public
37
+ def get_addresses(shortname)
38
+ return resolve_cached(shortname)
39
+ end
40
+ end
@@ -0,0 +1,29 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-output-clickhouse'
3
+ s.version = '0.1.1'
4
+ s.licenses = ['Apache-2.0']
5
+ s.summary = "This output lets you `POST` messages as JSON in a batched fashion to ClickHouse HTTP endpoint"
6
+ s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
7
+ s.authors = ["kmajk"]
8
+ s.email = 'k.majk@getresponse.com'
9
+ s.homepage = "http://getresponse.com"
10
+ s.require_paths = ["lib"]
11
+
12
+ # Files
13
+ s.files = Dir['lib/**/*','spec/**/*','*.gemspec','*.md','Gemfile','LICENSE' ]
14
+
15
+ # Tests
16
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
17
+
18
+ # Special flag to let us know this is actually a logstash plugin
19
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
20
+
21
+ # Gem dependencies
22
+ s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
23
+ s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "<= 7.0.0"
24
+ s.add_runtime_dependency 'mini_cache', ">= 1.0.0", "< 2.0.0"
25
+
26
+ s.add_development_dependency 'logstash-devutils'
27
+ s.add_development_dependency 'sinatra'
28
+ s.add_development_dependency 'webrick'
29
+ end
metadata ADDED
@@ -0,0 +1,156 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-output-clickhouse
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - kmajk
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-08-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash-core-plugin-api
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '1.60'
20
+ - - "<="
21
+ - !ruby/object:Gem::Version
22
+ version: '2.99'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '1.60'
30
+ - - "<="
31
+ - !ruby/object:Gem::Version
32
+ version: '2.99'
33
+ - !ruby/object:Gem::Dependency
34
+ name: logstash-mixin-http_client
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: 6.0.0
40
+ - - "<="
41
+ - !ruby/object:Gem::Version
42
+ version: 7.0.0
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 6.0.0
50
+ - - "<="
51
+ - !ruby/object:Gem::Version
52
+ version: 7.0.0
53
+ - !ruby/object:Gem::Dependency
54
+ name: mini_cache
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 1.0.0
60
+ - - "<"
61
+ - !ruby/object:Gem::Version
62
+ version: 2.0.0
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: 1.0.0
70
+ - - "<"
71
+ - !ruby/object:Gem::Version
72
+ version: 2.0.0
73
+ - !ruby/object:Gem::Dependency
74
+ name: logstash-devutils
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ type: :development
81
+ prerelease: false
82
+ version_requirements: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ - !ruby/object:Gem::Dependency
88
+ name: sinatra
89
+ requirement: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ type: :development
95
+ prerelease: false
96
+ version_requirements: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ - !ruby/object:Gem::Dependency
102
+ name: webrick
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ type: :development
109
+ prerelease: false
110
+ version_requirements: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ description: This gem is a logstash plugin required to be installed on top of the
116
+ Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
117
+ a stand-alone program
118
+ email: k.majk@getresponse.com
119
+ executables: []
120
+ extensions: []
121
+ extra_rdoc_files: []
122
+ files:
123
+ - Gemfile
124
+ - LICENSE
125
+ - README.md
126
+ - lib/logstash/outputs/clickhouse.rb
127
+ - lib/logstash/util/shortname_resolver.rb
128
+ - logstash-output-clickhouse.gemspec
129
+ homepage: http://getresponse.com
130
+ licenses:
131
+ - Apache-2.0
132
+ metadata:
133
+ logstash_plugin: 'true'
134
+ logstash_group: output
135
+ post_install_message:
136
+ rdoc_options: []
137
+ require_paths:
138
+ - lib
139
+ required_ruby_version: !ruby/object:Gem::Requirement
140
+ requirements:
141
+ - - ">="
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ required_rubygems_version: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ version: '0'
149
+ requirements: []
150
+ rubyforge_project:
151
+ rubygems_version: 2.7.6
152
+ signing_key:
153
+ specification_version: 4
154
+ summary: This output lets you `POST` messages as JSON in a batched fashion to ClickHouse
155
+ HTTP endpoint
156
+ test_files: []