logstash-output-clickhouse 0.1.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1fbf922d44fe511cc743b7fd7c86ad3ea3de2ed8ed8afc3bbe549eb7dace1b03
4
- data.tar.gz: 6ead7854b8ef425526d9617a3e178c9ff7298eb57fac8f6a84b04c75e9873f08
3
+ metadata.gz: 20cb59bb456f397ff9408209d676deb3c46214abbd874a5499ab5f7387bb80b9
4
+ data.tar.gz: 471cd0087a01acbcd6ed8efaa93e09f89102a2b2785bcfa7bb117e7ed8dd2b0d
5
5
  SHA512:
6
- metadata.gz: c5f5f85f5dc3e2f188cac06a78289e269d6b47a114fec1b2defd37fa343ce3af6cf1ace1a85d8fd5e323dd61866ad944ea2b5d45d6cea1f23f1c797997f24007
7
- data.tar.gz: 88e06fa018fc126c056cfc4a8c8c6237d5921d10449aa1878e506746f77cddc8b3db88dc73f34f5a631c7fd273ca95274a4fc44d348603514d123307f9150d22
6
+ metadata.gz: 3319460b03965943afecff0c3b910ebfb57611566f02b255b7fb3afd6b2a7f6781b89990cf667286972ed27bb503b90833a300f20450a0ad3e637c2144b3f37e
7
+ data.tar.gz: 609792b63cc1aa8d073f272e9539458738914c4ed1a795b22e23d6a86724b6e9c7e0277b533d0e02fa03d40168799197a614d3208e129b8ba534a8bb3ea80aee
data/README.md CHANGED
@@ -1,6 +1,5 @@
1
- # I switched to vector -> https://github.com/timberio/vector.
2
-
3
1
  # Logstash Plugin
2
+ [![Gem Version](https://badge.fury.io/rb/logstash-output-clickhouse.svg)](https://badge.fury.io/rb/logstash-output-clickhouse)
4
3
 
5
4
  This plugin is a modified version of the Lucidworks logstash json_batch. That plugin is available [here](https://github.com/lucidworks/logstash-output-json_batch).
6
5
 
@@ -19,6 +18,9 @@ Please note that the name of the plugin when used is `clickhouse`, it only suppo
19
18
  "to1" => "from1"
20
19
  "to2" => [ "from2", "(.)(.)", '\1\2' ]
21
20
  }
21
+ extra_params => {
22
+ "date_time_input_format" => "best_effort"
23
+ }
22
24
  }
23
25
  }
24
26
 
@@ -28,6 +30,7 @@ Please note that the name of the plugin when used is `clickhouse`, it only suppo
28
30
  * `automatic_retries` (default: 1) - number of connect retry attempts to each host in `http_hosts`
29
31
  * `request_tolerance` (default: 5) - number of http request send retry attempts if response status code is not 200
30
32
  * `backoff_time` (default: 3) - time to wait in seconds for next retry attempt of connect or request
33
+ * `extra_params` (default: {}) - extra parameters to be passed to the clickhouse http client
31
34
 
32
35
  Default batch size is 50, with a wait of at most 5 seconds per send. These can be tweaked with the parameters `flush_size` and `idle_flush_time` respectively.
33
36
 
@@ -2,13 +2,11 @@
2
2
  require "logstash/outputs/base"
3
3
  require "logstash/namespace"
4
4
  require "logstash/json"
5
- require "logstash/util/shortname_resolver"
6
5
  require "uri"
7
6
  require "stud/buffer"
8
7
  require "logstash/plugin_mixins/http_client"
9
8
  require "securerandom"
10
9
 
11
-
12
10
  class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
13
11
  include LogStash::PluginMixins::HttpClient
14
12
  include Stud::Buffer
@@ -20,7 +18,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
20
18
  config :http_hosts, :validate => :array, :required => true
21
19
 
22
20
  config :table, :validate => :string, :required => true
23
-
21
+
24
22
  # Custom headers to use
25
23
  # format is `headers => ["X-My-Header", "%{host}"]`
26
24
  config :headers, :validate => :hash
@@ -38,7 +36,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
38
36
  config :save_file, :validate => :string, :default => "failed.json"
39
37
 
40
38
  config :request_tolerance, :validate => :number, :default => 5
41
-
39
+
42
40
  config :backoff_time, :validate => :number, :default => 3
43
41
 
44
42
  config :automatic_retries, :validate => :number, :default => 3
@@ -47,19 +45,21 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
47
45
 
48
46
  config :host_resolve_ttl_sec, :validate => :number, :default => 120
49
47
 
48
+ config :extra_params, :validate => :hash, :default => {}
49
+
50
50
  def print_plugin_info()
51
- @@plugins = Gem::Specification.find_all{|spec| spec.name =~ /logstash-output-clickhouse/ }
51
+ @@plugins = Gem::Specification.find_all { |spec| spec.name =~ /logstash-output-clickhouse/ }
52
52
  @plugin_name = @@plugins[0].name
53
53
  @plugin_version = @@plugins[0].version
54
54
  @logger.info("Running #{@plugin_name} version #{@plugin_version}")
55
55
 
56
56
  @logger.info("Initialized clickhouse with settings",
57
- :flush_size => @flush_size,
58
- :idle_flush_time => @idle_flush_time,
59
- :request_tokens => @pool_max,
60
- :http_hosts => @http_hosts,
61
- :http_query => @http_query,
62
- :headers => request_headers)
57
+ :flush_size => @flush_size,
58
+ :idle_flush_time => @idle_flush_time,
59
+ :request_tokens => @pool_max,
60
+ :http_hosts => @http_hosts,
61
+ :http_query => @http_query,
62
+ :headers => request_headers)
63
63
  end
64
64
 
65
65
  def register
@@ -71,91 +71,57 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
71
71
  # When this queue is empty no new requests may be sent,
72
72
  # tokens must be added back by the client on success
73
73
  @request_tokens = SizedQueue.new(@pool_max)
74
- @pool_max.times {|t| @request_tokens << true }
74
+ @pool_max.times { |t| @request_tokens << true }
75
75
  @requests = Array.new
76
- @http_query = "/?query=INSERT%20INTO%20#{table}%20FORMAT%20JSONEachRow"
77
76
 
78
- @hostnames_pool =
79
- parse_http_hosts(http_hosts,
80
- ShortNameResolver.new(ttl: @host_resolve_ttl_sec, logger: @logger))
77
+ params = { "query" => "INSERT INTO #{table} FORMAT JSONEachRow" }.merge(@extra_params)
78
+ @http_query = "?#{URI.encode_www_form(params)}"
81
79
 
82
80
  buffer_initialize(
83
81
  :max_items => @flush_size,
84
82
  :max_interval => @idle_flush_time,
85
- :logger => @logger
83
+ :logger => @logger,
86
84
  )
87
85
 
88
86
  print_plugin_info()
89
87
  end # def register
90
88
 
91
- private
92
-
93
- def parse_http_hosts(hosts, resolver)
94
- ip_re = /^[\d]+\.[\d]+\.[\d]+\.[\d]+$/
95
-
96
- lambda {
97
- hosts.flat_map { |h|
98
- scheme = URI(h).scheme
99
- host = URI(h).host
100
- port = URI(h).port
101
- path = URI(h).path
102
-
103
- if ip_re !~ host
104
- resolver.get_addresses(host).map { |ip|
105
- "#{scheme}://#{ip}:#{port}#{path}"
106
- }
107
- else
108
- [h]
109
- end
110
- }
111
- }
112
- end
113
-
114
- private
115
-
116
- def get_host_addresses()
117
- begin
118
- @hostnames_pool.call
119
- rescue Exception => ex
120
- @logger.error('Error while resolving host', :error => ex.to_s)
121
- end
122
- end
123
-
124
89
  # This module currently does not support parallel requests as that would circumvent the batching
125
90
  def receive(event)
126
91
  buffer_receive(event)
127
92
  end
128
93
 
129
- def mutate( src )
94
+ def mutate(src)
130
95
  return src if @mutations.empty?
131
96
  res = {}
132
97
  @mutations.each_pair do |dstkey, source|
133
98
  case source
134
- when String then
135
- scrkey = source
136
- next unless src.key?(scrkey)
137
-
138
- res[dstkey] = src[scrkey]
139
- when Array then
140
- scrkey = source[0]
141
- next unless src.key?(scrkey)
142
- pattern = source[1]
143
- replace = source[2]
144
- res[dstkey] = src[scrkey].sub( Regexp.new(pattern), replace )
99
+ when String
100
+ scrkey = source
101
+ next unless src.key?(scrkey)
102
+
103
+ res[dstkey] = src[scrkey]
104
+ when Array
105
+ scrkey = source[0]
106
+ next unless src.key?(scrkey)
107
+ pattern = source[1]
108
+ replace = source[2]
109
+ res[dstkey] = src[scrkey].sub(Regexp.new(pattern), replace)
145
110
  end
146
111
  end
147
112
  res
148
113
  end
149
114
 
150
115
  public
151
- def flush(events, close=false)
116
+
117
+ def flush(events, close = false)
152
118
  documents = "" #this is the string of hashes that we push to Fusion as documents
153
119
 
154
120
  events.each do |event|
155
- documents << LogStash::Json.dump( mutate( event.to_hash() ) ) << "\n"
121
+ documents << LogStash::Json.dump(mutate(event.to_hash())) << "\n"
156
122
  end
157
123
 
158
- hosts = get_host_addresses()
124
+ hosts = @http_hosts.clone
159
125
 
160
126
  make_request(documents, hosts, @http_query, 1, 1, hosts.sample)
161
127
  end
@@ -165,10 +131,10 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
165
131
  def save_to_disk(documents)
166
132
  begin
167
133
  file = File.open("#{save_dir}/#{table}_#{save_file}", "a")
168
- file.write(documents)
134
+ file.write(documents)
169
135
  rescue IOError => e
170
136
  log_failure("An error occurred while saving file to disk: #{e}",
171
- :file_name => file_name)
137
+ :file_name => file_name)
172
138
  ensure
173
139
  file.close unless file.nil?
174
140
  end
@@ -177,21 +143,20 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
177
143
  def delay_attempt(attempt_number, delay)
178
144
  # sleep delay grows roughly as k*x*ln(x) where k is the initial delay set in @backoff_time param
179
145
  attempt = [attempt_number, 1].max
180
- timeout = lambda { |x| [delay*x*Math.log(x), 1].max }
146
+ timeout = lambda { |x| [delay * x * Math.log(x), 1].max }
181
147
  # using rand() to pick final sleep delay to reduce the risk of getting in sync with other clients writing to the DB
182
- sleep_time = rand(timeout.call(attempt)..timeout.call(attempt+1))
148
+ sleep_time = rand(timeout.call(attempt)..timeout.call(attempt + 1))
183
149
  sleep sleep_time
184
150
  end
185
151
 
186
152
  private
187
153
 
188
154
  def make_request(documents, hosts, query, con_count = 1, req_count = 1, host = "", uuid = SecureRandom.hex)
189
-
190
155
  if host == ""
191
156
  host = hosts.pop
192
157
  end
193
158
 
194
- url = host+query
159
+ url = host + query
195
160
 
196
161
  # Block waiting for a token
197
162
  #@logger.info("Requesting token ", :tokens => request_tokens.length())
@@ -210,25 +175,26 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
210
175
  @request_tokens << token
211
176
 
212
177
  if response.code == 200
213
- @logger.debug("Successfully submitted",
214
- :size => documents.length,
215
- :response_code => response.code,
216
- :uuid => uuid)
178
+ @logger.debug("Successfully submitted",
179
+ :size => documents.length,
180
+ :response_code => response.code,
181
+ :uuid => uuid)
217
182
  else
218
183
  if req_count >= @request_tolerance
219
184
  log_failure(
220
- "Encountered non-200 HTTP code #{response.code}",
221
- :response_code => response.code,
222
- :url => url,
223
- :size => documents.length,
224
- :uuid => uuid)
185
+ "Encountered non-200 HTTP code #{response.code}",
186
+ :response_code => response.code,
187
+ :url => url,
188
+ :size => documents.length,
189
+ :uuid => uuid,
190
+ )
225
191
  if @save_on_failure
226
192
  save_to_disk(documents)
227
193
  end
228
194
  else
229
195
  @logger.info("Retrying request", :url => url, :message => response.message, :response => response.body, :uuid => uuid)
230
196
  delay_attempt(req_count, @backoff_time)
231
- make_request(documents, hosts, query, con_count, req_count+1, host, uuid)
197
+ make_request(documents, hosts, query, con_count, req_count + 1, host, uuid)
232
198
  end
233
199
  end
234
200
  end
@@ -238,21 +204,21 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
238
204
  @request_tokens << token
239
205
 
240
206
  if hosts.length == 0
241
- log_failure("Could not access URL",
242
- :url => url,
243
- :method => @http_method,
244
- :headers => headers,
245
- :message => exception.message,
246
- :class => exception.class.name,
247
- :backtrace => exception.backtrace,
248
- :size => documents.length,
249
- :uuid => uuid)
250
- if @save_on_failure
251
- save_to_disk(documents)
252
- end
253
- return
207
+ log_failure("Could not access URL",
208
+ :url => url,
209
+ :method => @http_method,
210
+ :headers => headers,
211
+ :message => exception.message,
212
+ :class => exception.class.name,
213
+ :backtrace => exception.backtrace,
214
+ :size => documents.length,
215
+ :uuid => uuid)
216
+ if @save_on_failure
217
+ save_to_disk(documents)
218
+ end
219
+ return
254
220
  end
255
-
221
+
256
222
  if con_count >= @automatic_retries
257
223
  host = ""
258
224
  con_count = 0
@@ -260,7 +226,7 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
260
226
 
261
227
  @logger.info("Retrying connection", :url => url, :uuid => uuid)
262
228
  delay_attempt(con_count, @backoff_time)
263
- make_request(documents, hosts, query, con_count+1, req_count, host, uuid)
229
+ make_request(documents, hosts, query, con_count + 1, req_count, host, uuid)
264
230
  end
265
231
 
266
232
  client.execute!
@@ -276,5 +242,4 @@ class LogStash::Outputs::ClickHouse < LogStash::Outputs::Base
276
242
  headers["Content-Type"] ||= "application/json"
277
243
  headers
278
244
  end
279
-
280
245
  end
@@ -1,16 +1,16 @@
1
1
  Gem::Specification.new do |s|
2
- s.name = 'logstash-output-clickhouse'
3
- s.version = '0.1.1'
4
- s.licenses = ['Apache-2.0']
5
- s.summary = "This output lets you `POST` messages as JSON in a batched fashion to ClickHouse HTTP endpoint"
6
- s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
7
- s.authors = ["kmajk"]
8
- s.email = 'k.majk@getresponse.com'
9
- s.homepage = "http://getresponse.com"
2
+ s.name = "logstash-output-clickhouse"
3
+ s.version = "0.1.4"
4
+ s.licenses = ["Apache-2.0"]
5
+ s.summary = "This output lets you `POST` messages as JSON in a batched fashion to ClickHouse HTTP endpoint"
6
+ s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
7
+ s.authors = ["kmajk", "maltoze"]
8
+ s.email = "atonezzz@gmail.com"
9
+ s.homepage = "https://github.com/maltoze/logstash-output-clickhouse"
10
10
  s.require_paths = ["lib"]
11
11
 
12
12
  # Files
13
- s.files = Dir['lib/**/*','spec/**/*','*.gemspec','*.md','Gemfile','LICENSE' ]
13
+ s.files = Dir["lib/**/*", "spec/**/*", "*.gemspec", "*.md", "Gemfile", "LICENSE"]
14
14
 
15
15
  # Tests
16
16
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
@@ -20,10 +20,10 @@ Gem::Specification.new do |s|
20
20
 
21
21
  # Gem dependencies
22
22
  s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
23
- s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "<= 7.0.0"
24
- s.add_runtime_dependency 'mini_cache', ">= 1.0.0", "< 2.0.0"
23
+ s.add_runtime_dependency "logstash-mixin-http_client", ">= 6.0.0", "< 8.0.0"
24
+ s.add_runtime_dependency "mini_cache", ">= 1.0.0", "< 2.0.0"
25
25
 
26
- s.add_development_dependency 'logstash-devutils'
27
- s.add_development_dependency 'sinatra'
28
- s.add_development_dependency 'webrick'
26
+ s.add_development_dependency "logstash-devutils"
27
+ s.add_development_dependency "sinatra"
28
+ s.add_development_dependency "webrick"
29
29
  end
metadata CHANGED
@@ -1,14 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-clickhouse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - kmajk
8
+ - maltoze
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2021-08-04 00:00:00.000000000 Z
12
+ date: 2023-04-27 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: logstash-core-plugin-api
@@ -37,9 +38,9 @@ dependencies:
37
38
  - - ">="
38
39
  - !ruby/object:Gem::Version
39
40
  version: 6.0.0
40
- - - "<="
41
+ - - "<"
41
42
  - !ruby/object:Gem::Version
42
- version: 7.0.0
43
+ version: 8.0.0
43
44
  type: :runtime
44
45
  prerelease: false
45
46
  version_requirements: !ruby/object:Gem::Requirement
@@ -47,9 +48,9 @@ dependencies:
47
48
  - - ">="
48
49
  - !ruby/object:Gem::Version
49
50
  version: 6.0.0
50
- - - "<="
51
+ - - "<"
51
52
  - !ruby/object:Gem::Version
52
- version: 7.0.0
53
+ version: 8.0.0
53
54
  - !ruby/object:Gem::Dependency
54
55
  name: mini_cache
55
56
  requirement: !ruby/object:Gem::Requirement
@@ -115,7 +116,7 @@ dependencies:
115
116
  description: This gem is a logstash plugin required to be installed on top of the
116
117
  Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
117
118
  a stand-alone program
118
- email: k.majk@getresponse.com
119
+ email: atonezzz@gmail.com
119
120
  executables: []
120
121
  extensions: []
121
122
  extra_rdoc_files: []
@@ -124,9 +125,8 @@ files:
124
125
  - LICENSE
125
126
  - README.md
126
127
  - lib/logstash/outputs/clickhouse.rb
127
- - lib/logstash/util/shortname_resolver.rb
128
128
  - logstash-output-clickhouse.gemspec
129
- homepage: http://getresponse.com
129
+ homepage: https://github.com/maltoze/logstash-output-clickhouse
130
130
  licenses:
131
131
  - Apache-2.0
132
132
  metadata:
@@ -1,40 +0,0 @@
1
- require 'resolv'
2
- require 'mini_cache'
3
-
4
- class ShortNameResolver
5
- def initialize(ttl:, logger:)
6
- @ttl = ttl
7
- @store = MiniCache::Store.new
8
- @logger = logger
9
- end
10
-
11
- private
12
- def resolve_cached(shortname)
13
- @store.get_or_set(shortname) do
14
- addresses = resolve(shortname)
15
- raise "Bad shortname '#{shortname}'" if addresses.empty?
16
- MiniCache::Data.new(addresses, expires_in: @ttl)
17
- end
18
- end
19
-
20
- private
21
- def resolve(shortname)
22
- addresses = Resolv::DNS.open do |dns|
23
- dns.getaddresses(shortname).map { |r| r.to_s }
24
- end
25
-
26
- @logger.info("Resolved shortname '#{shortname}' to addresses #{addresses}")
27
-
28
- return addresses
29
- end
30
-
31
- public
32
- def get_address(shortname)
33
- return resolve_cached(shortname).sample
34
- end
35
-
36
- public
37
- def get_addresses(shortname)
38
- return resolve_cached(shortname)
39
- end
40
- end