logstash-output-clickhouse 0.1.1 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1fbf922d44fe511cc743b7fd7c86ad3ea3de2ed8ed8afc3bbe549eb7dace1b03
4
- data.tar.gz: 6ead7854b8ef425526d9617a3e178c9ff7298eb57fac8f6a84b04c75e9873f08
3
+ metadata.gz: 20cb59bb456f397ff9408209d676deb3c46214abbd874a5499ab5f7387bb80b9
4
+ data.tar.gz: 471cd0087a01acbcd6ed8efaa93e09f89102a2b2785bcfa7bb117e7ed8dd2b0d
5
5
  SHA512:
6
- metadata.gz: c5f5f85f5dc3e2f188cac06a78289e269d6b47a114fec1b2defd37fa343ce3af6cf1ace1a85d8fd5e323dd61866ad944ea2b5d45d6cea1f23f1c797997f24007
7
- data.tar.gz: 88e06fa018fc126c056cfc4a8c8c6237d5921d10449aa1878e506746f77cddc8b3db88dc73f34f5a631c7fd273ca95274a4fc44d348603514d123307f9150d22
6
+ metadata.gz: 3319460b03965943afecff0c3b910ebfb57611566f02b255b7fb3afd6b2a7f6781b89990cf667286972ed27bb503b90833a300f20450a0ad3e637c2144b3f37e
7
+ data.tar.gz: 609792b63cc1aa8d073f272e9539458738914c4ed1a795b22e23d6a86724b6e9c7e0277b533d0e02fa03d40168799197a614d3208e129b8ba534a8bb3ea80aee
data/README.md CHANGED
@@ -1,6 +1,5 @@
1
- # I switched to vector -> https://github.com/timberio/vector.
2
-
3
1
  # Logstash Plugin
2
+ [![Gem Version](https://badge.fury.io/rb/logstash-output-clickhouse.svg)](https://badge.fury.io/rb/logstash-output-clickhouse)
4
3
 
5
4
  This plugin is a modified version of the Lucidworks logstash json_batch. That plugin is available [here](https://github.com/lucidworks/logstash-output-json_batch).
6
5
 
@@ -19,6 +18,9 @@ Please note that the name of the plugin when used is `clickhouse`, it only suppo
19
18
  "to1" => "from1"
20
19
  "to2" => [ "from2", "(.)(.)", '\1\2' ]
21
20
  }
21
+ extra_params => {
22
+ "date_time_input_format" => "best_effort"
23
+ }
22
24
  }
23
25
  }
24
26
 
@@ -28,6 +30,7 @@ Please note that the name of the plugin when used is `clickhouse`, it only suppo
28
30
  * `automatic_retries` (default: 1) - number of connect retry attempts to each host in `http_hosts`
29
31
  * `request_tolerance` (default: 5) - number of http request send retry attempts if response status code is not 200
30
32
  * `backoff_time` (default: 3) - time to wait in seconds for next retry attempt of connect or request
33
+ * `extra_params` (default: {}) - extra parameters to be passed to the clickhouse http client
31
34
 
32
35
  Default batch size is 50, with a wait of at most 5 seconds per send. These can be tweaked with the parameters `flush_size` and `idle_flush_time` respectively.
33
36
 
@@ -2,13 +2,11 @@
2
2
  require "logstash/outputs/base"
3
3
  require "logstash/namespace"
4
4
  require "logstash/json"
5
- require "logstash/util/shortname_resolver"
6
5
  require "uri"
7
6
  require "stud/buffer"
8
7
  require "logstash/plugin_mixins/http_client"
9
8
  require "securerandom"
10
9
 
11
-
12
10
  class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
13
11
  include LogStash::PluginMixins::HttpClient
14
12
  include Stud::Buffer
@@ -20,7 +18,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
20
18
  config :http_hosts, :validate => :array, :required => true
21
19
 
22
20
  config :table, :validate => :string, :required => true
23
-
21
+
24
22
  # Custom headers to use
25
23
  # format is `headers => ["X-My-Header", "%{host}"]`
26
24
  config :headers, :validate => :hash
@@ -38,7 +36,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
38
36
  config :save_file, :validate => :string, :default => "failed.json"
39
37
 
40
38
  config :request_tolerance, :validate => :number, :default => 5
41
-
39
+
42
40
  config :backoff_time, :validate => :number, :default => 3
43
41
 
44
42
  config :automatic_retries, :validate => :number, :default => 3
@@ -47,19 +45,21 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
47
45
 
48
46
  config :host_resolve_ttl_sec, :validate => :number, :default => 120
49
47
 
48
+ config :extra_params, :validate => :hash, :default => {}
49
+
50
50
  def print_plugin_info()
51
- @@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-clickhouse/ }
51
+ @@plugins = Gem::Specification.find_all { |spec| spec.name =~ /logstash-output-clickhouse/ }
52
52
  @plugin_name = @@plugins[0].name
53
53
  @plugin_version = @@plugins[0].version
54
54
  @logger.info("Running #{@plugin_name} version #{@plugin_version}")
55
55
 
56
56
  @logger.info("Initialized clickhouse with settings",
57
- :flush_size => @flush_size,
58
- :idle_flush_time => @idle_flush_time,
59
- :request_tokens => @pool_max,
60
- :http_hosts => @http_hosts,
61
- :http_query => @http_query,
62
- :headers => request_headers)
57
+ :flush_size => @flush_size,
58
+ :idle_flush_time => @idle_flush_time,
59
+ :request_tokens => @pool_max,
60
+ :http_hosts => @http_hosts,
61
+ :http_query => @http_query,
62
+ :headers => request_headers)
63
63
  end
64
64
 
65
65
  def register
@@ -71,91 +71,57 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
71
71
  # When this queue is empty no new requests may be sent,
72
72
  # tokens must be added back by the client on success
73
73
  @request_tokens = SizedQueue.new(@pool_max)
74
- @pool_max.times {|t| @request_tokens << true }
74
+ @pool_max.times { |t| @request_tokens << true }
75
75
  @requests = Array.new
76
- @http_query = "/?query=INSERT%20INTO%20#{table}%20FORMAT%20JSONEachRow"
77
76
 
78
- @hostnames_pool =
79
- parse_http_hosts(http_hosts,
80
- ShortNameResolver.new(ttl: @host_resolve_ttl_sec, logger: @logger))
77
+ params = { "query" => "INSERT INTO #{table} FORMAT JSONEachRow" }.merge(@extra_params)
78
+ @http_query = "?#{URI.encode_www_form(params)}"
81
79
 
82
80
  buffer_initialize(
83
81
  :max_items => @flush_size,
84
82
  :max_interval => @idle_flush_time,
85
- :logger => @logger
83
+ :logger => @logger,
86
84
  )
87
85
 
88
86
  print_plugin_info()
89
87
  end # def register
90
88
 
91
- private
92
-
93
- def parse_http_hosts(hosts, resolver)
94
- ip_re = /^[\d]+\.[\d]+\.[\d]+\.[\d]+$/
95
-
96
- lambda {
97
- hosts.flat_map { |h|
98
- scheme = URI(h).scheme
99
- host = URI(h).host
100
- port = URI(h).port
101
- path = URI(h).path
102
-
103
- if ip_re !~ host
104
- resolver.get_addresses(host).map { |ip|
105
- "#{scheme}://#{ip}:#{port}#{path}"
106
- }
107
- else
108
- [h]
109
- end
110
- }
111
- }
112
- end
113
-
114
- private
115
-
116
- def get_host_addresses()
117
- begin
118
- @hostnames_pool.call
119
- rescue Exception => ex
120
- @logger.error('Error while resolving host', :error => ex.to_s)
121
- end
122
- end
123
-
124
89
  # This module currently does not support parallel requests as that would circumvent the batching
125
90
  def receive(event)
126
91
  buffer_receive(event)
127
92
  end
128
93
 
129
- def mutate( src )
94
+ def mutate(src)
130
95
  return src if @mutations.empty?
131
96
  res = {}
132
97
  @mutations.each_pair do |dstkey, source|
133
98
  case source
134
- when String then
135
- scrkey = source
136
- next unless src.key?(scrkey)
137
-
138
- res[dstkey] = src[scrkey]
139
- when Array then
140
- scrkey = source[0]
141
- next unless src.key?(scrkey)
142
- pattern = source[1]
143
- replace = source[2]
144
- res[dstkey] = src[scrkey].sub( Regexp.new(pattern), replace )
99
+ when String
100
+ scrkey = source
101
+ next unless src.key?(scrkey)
102
+
103
+ res[dstkey] = src[scrkey]
104
+ when Array
105
+ scrkey = source[0]
106
+ next unless src.key?(scrkey)
107
+ pattern = source[1]
108
+ replace = source[2]
109
+ res[dstkey] = src[scrkey].sub(Regexp.new(pattern), replace)
145
110
  end
146
111
  end
147
112
  res
148
113
  end
149
114
 
150
115
  public
151
- def flush(events, close=false)
116
+
117
+ def flush(events, close = false)
152
118
  documents = "" #this is the string of hashes that we push to Fusion as documents
153
119
 
154
120
  events.each do |event|
155
- documents << LogStash::Json.dump( mutate( event.to_hash() ) ) << "\n"
121
+ documents << LogStash::Json.dump(mutate(event.to_hash())) << "\n"
156
122
  end
157
123
 
158
- hosts = get_host_addresses()
124
+ hosts = @http_hosts.clone
159
125
 
160
126
  make_request(documents, hosts, @http_query, 1, 1, hosts.sample)
161
127
  end
@@ -165,10 +131,10 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
165
131
  def save_to_disk(documents)
166
132
  begin
167
133
  file = File.open("#{save_dir}/#{table}_#{save_file}", "a")
168
- file.write(documents)
134
+ file.write(documents)
169
135
  rescue IOError => e
170
136
  log_failure("An error occurred while saving file to disk: #{e}",
171
- :file_name => file_name)
137
+ :file_name => file_name)
172
138
  ensure
173
139
  file.close unless file.nil?
174
140
  end
@@ -177,21 +143,20 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
177
143
  def delay_attempt(attempt_number, delay)
178
144
  # sleep delay grows roughly as k*x*ln(x) where k is the initial delay set in @backoff_time param
179
145
  attempt = [attempt_number, 1].max
180
- timeout = lambda { |x| [delay*x*Math.log(x), 1].max }
146
+ timeout = lambda { |x| [delay * x * Math.log(x), 1].max }
181
147
  # using rand() to pick final sleep delay to reduce the risk of getting in sync with other clients writing to the DB
182
- sleep_time = rand(timeout.call(attempt)..timeout.call(attempt+1))
148
+ sleep_time = rand(timeout.call(attempt)..timeout.call(attempt + 1))
183
149
  sleep sleep_time
184
150
  end
185
151
 
186
152
  private
187
153
 
188
154
  def make_request(documents, hosts, query, con_count = 1, req_count = 1, host = "", uuid = SecureRandom.hex)
189
-
190
155
  if host == ""
191
156
  host = hosts.pop
192
157
  end
193
158
 
194
- url = host+query
159
+ url = host + query
195
160
 
196
161
  # Block waiting for a token
197
162
  #@logger.info("Requesting token ", :tokens => request_tokens.length())
@@ -210,25 +175,26 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
210
175
  @request_tokens << token
211
176
 
212
177
  if response.code == 200
213
- @logger.debug("Successfully submitted",
214
- :size => documents.length,
215
- :response_code => response.code,
216
- :uuid => uuid)
178
+ @logger.debug("Successfully submitted",
179
+ :size => documents.length,
180
+ :response_code => response.code,
181
+ :uuid => uuid)
217
182
  else
218
183
  if req_count >= @request_tolerance
219
184
  log_failure(
220
- "Encountered non-200 HTTP code #{response.code}",
221
- :response_code => response.code,
222
- :url => url,
223
- :size => documents.length,
224
- :uuid => uuid)
185
+ "Encountered non-200 HTTP code #{response.code}",
186
+ :response_code => response.code,
187
+ :url => url,
188
+ :size => documents.length,
189
+ :uuid => uuid,
190
+ )
225
191
  if @save_on_failure
226
192
  save_to_disk(documents)
227
193
  end
228
194
  else
229
195
  @logger.info("Retrying request", :url => url, :message => response.message, :response => response.body, :uuid => uuid)
230
196
  delay_attempt(req_count, @backoff_time)
231
- make_request(documents, hosts, query, con_count, req_count+1, host, uuid)
197
+ make_request(documents, hosts, query, con_count, req_count + 1, host, uuid)
232
198
  end
233
199
  end
234
200
  end
@@ -238,21 +204,21 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
238
204
  @request_tokens << token
239
205
 
240
206
  if hosts.length == 0
241
- log_failure("Could not access URL",
242
- :url => url,
243
- :method => @http_method,
244
- :headers => headers,
245
- :message => exception.message,
246
- :class => exception.class.name,
247
- :backtrace => exception.backtrace,
248
- :size => documents.length,
249
- :uuid => uuid)
250
- if @save_on_failure
251
- save_to_disk(documents)
252
- end
253
- return
207
+ log_failure("Could not access URL",
208
+ :url => url,
209
+ :method => @http_method,
210
+ :headers => headers,
211
+ :message => exception.message,
212
+ :class => exception.class.name,
213
+ :backtrace => exception.backtrace,
214
+ :size => documents.length,
215
+ :uuid => uuid)
216
+ if @save_on_failure
217
+ save_to_disk(documents)
218
+ end
219
+ return
254
220
  end
255
-
221
+
256
222
  if con_count >= @automatic_retries
257
223
  host = ""
258
224
  con_count = 0
@@ -260,7 +226,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
260
226
 
261
227
  @logger.info("Retrying connection", :url => url, :uuid => uuid)
262
228
  delay_attempt(con_count, @backoff_time)
263
- make_request(documents, hosts, query, con_count+1, req_count, host, uuid)
229
+ make_request(documents, hosts, query, con_count + 1, req_count, host, uuid)
264
230
  end
265
231
 
266
232
  client.execute!
@@ -276,5 +242,4 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
276
242
  headers["Content-Type"] ||= "application/json"
277
243
  headers
278
244
  end
279
-
280
245
  end
@@ -1,16 +1,16 @@
1
1
  Gem::Specification.new do |s|
2
- s.name = 'logstash-output-clickhouse'
3
- s.version = '0.1.1'
4
- s.licenses = ['Apache-2.0']
5
- s.summary = "This output lets you `POST` messages as JSON in a batched fashion to ClickHouse HTTP endpoint"
6
- s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
7
- s.authors = ["kmajk"]
8
- s.email = 'k.majk@getresponse.com'
9
- s.homepage = "http://getresponse.com"
2
+ s.name = "logstash-output-clickhouse"
3
+ s.version = "0.1.4"
4
+ s.licenses = ["Apache-2.0"]
5
+ s.summary = "This output lets you `POST` messages as JSON in a batched fashion to ClickHouse HTTP endpoint"
6
+ s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
7
+ s.authors = ["kmajk", "maltoze"]
8
+ s.email = "atonezzz@gmail.com"
9
+ s.homepage = "https://github.com/maltoze/logstash-output-clickhouse"
10
10
  s.require_paths = ["lib"]
11
11
 
12
12
  # Files
13
- s.files = Dir['lib/**/*','spec/**/*','*.gemspec','*.md','Gemfile','LICENSE' ]
13
+ s.files = Dir["lib/**/*", "spec/**/*", "*.gemspec", "*.md", "Gemfile", "LICENSE"]
14
14
 
15
15
  # Tests
16
16
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
@@ -20,10 +20,10 @@ Gem::Specification.new do |s|
20
20
 
21
21
  # Gem dependencies
22
22
  s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
23
- s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "<= 7.0.0"
24
- s.add_runtime_dependency 'mini_cache', ">= 1.0.0", "< 2.0.0"
23
+ s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "< 8.0.0"
24
+ s.add_runtime_dependency "mini_cache", ">= 1.0.0", "< 2.0.0"
25
25
 
26
- s.add_development_dependency 'logstash-devutils'
27
- s.add_development_dependency 'sinatra'
28
- s.add_development_dependency 'webrick'
26
+ s.add_development_dependency "logstash-devutils"
27
+ s.add_development_dependency "sinatra"
28
+ s.add_development_dependency "webrick"
29
29
  end
metadata CHANGED
@@ -1,14 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-clickhouse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - kmajk
8
+ - maltoze
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2021-08-04 00:00:00.000000000 Z
12
+ date: 2023-04-27 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: logstash-core-plugin-api
@@ -37,9 +38,9 @@ dependencies:
37
38
  - - ">="
38
39
  - !ruby/object:Gem::Version
39
40
  version: 6.0.0
40
- - - "<="
41
+ - - "<"
41
42
  - !ruby/object:Gem::Version
42
- version: 7.0.0
43
+ version: 8.0.0
43
44
  type: :runtime
44
45
  prerelease: false
45
46
  version_requirements: !ruby/object:Gem::Requirement
@@ -47,9 +48,9 @@ dependencies:
47
48
  - - ">="
48
49
  - !ruby/object:Gem::Version
49
50
  version: 6.0.0
50
- - - "<="
51
+ - - "<"
51
52
  - !ruby/object:Gem::Version
52
- version: 7.0.0
53
+ version: 8.0.0
53
54
  - !ruby/object:Gem::Dependency
54
55
  name: mini_cache
55
56
  requirement: !ruby/object:Gem::Requirement
@@ -115,7 +116,7 @@ dependencies:
115
116
  description: This gem is a logstash plugin required to be installed on top of the
116
117
  Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
117
118
  a stand-alone program
118
- email: k.majk@getresponse.com
119
+ email: atonezzz@gmail.com
119
120
  executables: []
120
121
  extensions: []
121
122
  extra_rdoc_files: []
@@ -124,9 +125,8 @@ files:
124
125
  - LICENSE
125
126
  - README.md
126
127
  - lib/logstash/outputs/clickhouse.rb
127
- - lib/logstash/util/shortname_resolver.rb
128
128
  - logstash-output-clickhouse.gemspec
129
- homepage: http://getresponse.com
129
+ homepage: https://github.com/maltoze/logstash-output-clickhouse
130
130
  licenses:
131
131
  - Apache-2.0
132
132
  metadata:
@@ -1,40 +0,0 @@
1
- require 'resolv'
2
- require 'mini_cache'
3
-
4
- class ShortNameResolver
5
- def initialize(ttl:, logger:)
6
- @ttl = ttl
7
- @store = MiniCache::Store.new
8
- @logger = logger
9
- end
10
-
11
- private
12
- def resolve_cached(shortname)
13
- @store.get_or_set(shortname) do
14
- addresses = resolve(shortname)
15
- raise "Bad shortname '#{shortname}'" if addresses.empty?
16
- MiniCache::Data.new(addresses, expires_in: @ttl)
17
- end
18
- end
19
-
20
- private
21
- def resolve(shortname)
22
- addresses = Resolv::DNS.open do |dns|
23
- dns.getaddresses(shortname).map { |r| r.to_s }
24
- end
25
-
26
- @logger.info("Resolved shortname '#{shortname}' to addresses #{addresses}")
27
-
28
- return addresses
29
- end
30
-
31
- public
32
- def get_address(shortname)
33
- return resolve_cached(shortname).sample
34
- end
35
-
36
- public
37
- def get_addresses(shortname)
38
- return resolve_cached(shortname)
39
- end
40
- end