onlylogs 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/onlylogs/http_logger.rb +144 -15
- data/lib/onlylogs/spool.rb +105 -0
- data/lib/onlylogs/version.rb +1 -1
- data/lib/onlylogs.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f27d7205791730383562092985355eb068e2568ff617ef7d050966347ebc54df
|
|
4
|
+
data.tar.gz: 73b44d11e108d5a030134eeba81fe2f40b49442ef7ceba388d16c3348828d7da
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7e9f5954ab6144be0f8886503b6a1bb6ece1926fa01ea3275e2bf6523b68ac9334025c41d5bf6ef5aca79ced1cab5d33d548c3cd1a79ac248b0f8dcb7048ed33
|
|
7
|
+
data.tar.gz: 986ba53d82a32a5c96ec65a029c543bb0b6123a8e94eb83654bfa6ffbae93327fc8bf295604b0f14faf81042fa358faaa112bac69d54c98b6c254fa258aa193f
|
data/lib/onlylogs/http_logger.rb
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require "net/http"
|
|
4
4
|
require "uri"
|
|
5
|
+
require_relative "spool"
|
|
5
6
|
|
|
6
7
|
# This logger sends messages to onlylogs.io (or any Vector-compatible sink) directly via HTTP.
|
|
7
8
|
# Unlike SocketLogger, it does not require a sidecar process or Puma plugin,
|
|
@@ -13,6 +14,10 @@ require "uri"
|
|
|
13
14
|
# * cooldown: once the drain is known to be failing we stop attempting
|
|
14
15
|
# requests for a cooldown period instead of blocking on every send for the full
|
|
15
16
|
# read timeout (a down host accepts the TCP/TLS connection but never answers).
|
|
17
|
+
#
|
|
18
|
+
# By default an on-disk Spool buffers any batch we could not deliver and replays it once the
|
|
19
|
+
# drain recovers, so a transient outage or a restart does not lose logs. It is on by default
|
|
20
|
+
# (set ONLYLOGS_SPOOL_DIR empty to disable) and bounded by bytes; see Onlylogs::Spool.
|
|
16
21
|
module Onlylogs
|
|
17
22
|
class HttpLogger < Onlylogs::Logger
|
|
18
23
|
DEFAULT_BATCH_SIZE = 100
|
|
@@ -23,6 +28,10 @@ module Onlylogs
|
|
|
23
28
|
DEFAULT_OPEN_TIMEOUT = 0.5
|
|
24
29
|
DEFAULT_READ_TIMEOUT = 0.5
|
|
25
30
|
|
|
31
|
+
# How long Net::HTTP may keep an idle connection around for reuse. Comfortably longer than
|
|
32
|
+
# the default flush interval so normal traffic reuses one connection across many batches.
|
|
33
|
+
DEFAULT_KEEP_ALIVE_TIMEOUT = 30
|
|
34
|
+
|
|
26
35
|
# Open the circuit after this many consecutive failed sends
|
|
27
36
|
CIRCUIT_FAILURE_THRESHOLD = 3
|
|
28
37
|
# ...and keep it open for this long once it is open.
|
|
@@ -36,32 +45,42 @@ module Onlylogs
|
|
|
36
45
|
max_queue_size: ENV.fetch("ONLYLOGS_MAX_QUEUE_SIZE", DEFAULT_MAX_QUEUE_SIZE).to_i,
|
|
37
46
|
open_timeout: ENV.fetch("ONLYLOGS_OPEN_TIMEOUT", DEFAULT_OPEN_TIMEOUT).to_f,
|
|
38
47
|
read_timeout: ENV.fetch("ONLYLOGS_READ_TIMEOUT", DEFAULT_READ_TIMEOUT).to_f,
|
|
39
|
-
circuit_cooldown: ENV.fetch("ONLYLOGS_CIRCUIT_COOLDOWN", CIRCUIT_COOLDOWN).to_f
|
|
48
|
+
circuit_cooldown: ENV.fetch("ONLYLOGS_CIRCUIT_COOLDOWN", CIRCUIT_COOLDOWN).to_f,
|
|
49
|
+
keep_alive_timeout: ENV.fetch("ONLYLOGS_KEEP_ALIVE_TIMEOUT", DEFAULT_KEEP_ALIVE_TIMEOUT).to_f,
|
|
50
|
+
spool_dir: ENV.fetch("ONLYLOGS_SPOOL_DIR", default_spool_dir),
|
|
51
|
+
spool_max_bytes: ENV.fetch("ONLYLOGS_SPOOL_MAX_BYTES", Spool::DEFAULT_MAX_BYTES).to_i
|
|
40
52
|
)
|
|
41
53
|
super(local_fallback)
|
|
42
54
|
@drain_url = drain_url
|
|
55
|
+
@uri = URI.parse(drain_url) if drain_url
|
|
43
56
|
@batch_size = batch_size
|
|
44
57
|
@flush_interval = flush_interval
|
|
45
58
|
@max_queue_size = max_queue_size
|
|
46
59
|
@open_timeout = open_timeout
|
|
47
60
|
@read_timeout = read_timeout
|
|
48
61
|
@circuit_cooldown = circuit_cooldown
|
|
62
|
+
@keep_alive_timeout = keep_alive_timeout
|
|
49
63
|
@queue = Queue.new
|
|
50
64
|
@mutex = Mutex.new
|
|
65
|
+
@http_mutex = Mutex.new
|
|
66
|
+
@http = nil
|
|
67
|
+
@spool = nil
|
|
51
68
|
|
|
52
69
|
@consecutive_failures = 0
|
|
53
70
|
@circuit_open_until = nil
|
|
54
71
|
@dropped = 0
|
|
55
72
|
|
|
56
73
|
if @drain_url
|
|
74
|
+
@spool = build_spool(spool_dir, spool_max_bytes)
|
|
57
75
|
start_sender
|
|
58
76
|
else
|
|
59
|
-
$stderr.puts "Onlylogs::HttpLogger
|
|
77
|
+
$stderr.puts "Onlylogs::HttpLogger: ONLYLOGS_DRAIN_URL is not set; logging locally only." # rubocop:disable Style/StderrPuts
|
|
60
78
|
end
|
|
61
79
|
end
|
|
62
80
|
|
|
63
81
|
def add(severity, message = nil, progname = nil, &block)
|
|
64
|
-
|
|
82
|
+
# No drain configured: behave as a plain local logger instead of dropping everything.
|
|
83
|
+
return super unless @drain_url
|
|
65
84
|
|
|
66
85
|
if message.nil?
|
|
67
86
|
if block_given?
|
|
@@ -81,6 +100,7 @@ module Onlylogs
|
|
|
81
100
|
flush
|
|
82
101
|
@running = false
|
|
83
102
|
@sender_thread&.join(2)
|
|
103
|
+
close_connection
|
|
84
104
|
end
|
|
85
105
|
|
|
86
106
|
def flush
|
|
@@ -106,6 +126,9 @@ module Onlylogs
|
|
|
106
126
|
@running = true
|
|
107
127
|
|
|
108
128
|
@sender_thread = Thread.new do
|
|
129
|
+
# Replay anything left in the spool by a previous run or a crashed/redeployed sibling.
|
|
130
|
+
drain_spool
|
|
131
|
+
|
|
109
132
|
batch = []
|
|
110
133
|
last_flush = Time.now
|
|
111
134
|
|
|
@@ -142,27 +165,133 @@ module Onlylogs
|
|
|
142
165
|
|
|
143
166
|
def send_batch(lines)
|
|
144
167
|
return if lines.empty?
|
|
145
|
-
# Drain is known to be down: skip the request entirely so we don't block for the
|
|
146
|
-
# full read timeout on every batch. The lines are dropped (best-effort logging).
|
|
147
|
-
return if circuit_open?
|
|
148
168
|
|
|
149
|
-
|
|
150
|
-
http = Net::HTTP.new(uri.host, uri.port)
|
|
151
|
-
http.use_ssl = (uri.scheme == "https")
|
|
152
|
-
http.read_timeout = @read_timeout
|
|
153
|
-
http.open_timeout = @open_timeout
|
|
169
|
+
body = lines.join("\n")
|
|
154
170
|
|
|
155
|
-
request
|
|
156
|
-
|
|
157
|
-
|
|
171
|
+
# Drain is known to be down: skip the request entirely so we don't block for the full read
|
|
172
|
+
# timeout on every batch. Buffer the batch so the cooldown does not cost us data (without a
|
|
173
|
+
# spool configured, spool_write is a no-op and the batch is dropped — best-effort logging).
|
|
174
|
+
if circuit_open?
|
|
175
|
+
spool_write(body)
|
|
176
|
+
return
|
|
177
|
+
end
|
|
158
178
|
|
|
159
|
-
|
|
179
|
+
deliver(body)
|
|
160
180
|
record_success
|
|
181
|
+
# The drain just answered: replay anything we had buffered while it was unavailable.
|
|
182
|
+
drain_spool
|
|
161
183
|
rescue => e
|
|
162
184
|
record_failure
|
|
185
|
+
spool_write(body)
|
|
163
186
|
Kernel.warn "Onlylogs::HttpLogger error: #{e.class}: #{e.message}"
|
|
164
187
|
end
|
|
165
188
|
|
|
189
|
+
def spool_write(body)
|
|
190
|
+
@spool&.write(body)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Replay buffered batches now that the drain is responding. Oldest first; stop at the first
|
|
194
|
+
# failure (record it and leave the rest on disk) so a drain that just went down again does not
|
|
195
|
+
# burn the whole backlog into the void.
|
|
196
|
+
def drain_spool
|
|
197
|
+
return unless @spool
|
|
198
|
+
|
|
199
|
+
@spool.replay do |body|
|
|
200
|
+
deliver(body)
|
|
201
|
+
record_success
|
|
202
|
+
true
|
|
203
|
+
rescue => e
|
|
204
|
+
record_failure
|
|
205
|
+
Kernel.warn "Onlylogs::HttpLogger replay error: #{e.class}: #{e.message}"
|
|
206
|
+
false
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def build_spool(dir, max_bytes)
|
|
211
|
+
return if dir.nil? || dir.to_s.strip.empty?
|
|
212
|
+
|
|
213
|
+
Spool.new(dir: dir, max_bytes: max_bytes)
|
|
214
|
+
rescue => e
|
|
215
|
+
Kernel.warn "Onlylogs::HttpLogger: spool disabled (#{e.class}: #{e.message})"
|
|
216
|
+
nil
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# The spool is on by default. It lives under the app's tmp dir, which survives a drain outage
|
|
220
|
+
# while the app keeps running; point ONLYLOGS_SPOOL_DIR at a persistent volume to also survive
|
|
221
|
+
# redeploys, or set it empty to disable.
|
|
222
|
+
def default_spool_dir
|
|
223
|
+
base = if defined?(Rails) && Rails.respond_to?(:root) && Rails.root
|
|
224
|
+
Rails.root.to_s
|
|
225
|
+
else
|
|
226
|
+
::Dir.pwd
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
::File.join(base, "tmp", "onlylogs", "spool")
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# POST the body over a persistent (kept-alive) connection.
|
|
233
|
+
def deliver(body)
|
|
234
|
+
@http_mutex.synchronize do
|
|
235
|
+
attempts = 0
|
|
236
|
+
response = begin
|
|
237
|
+
attempts += 1
|
|
238
|
+
reused = !@http.nil?
|
|
239
|
+
connection.request(build_request(body))
|
|
240
|
+
rescue
|
|
241
|
+
close_connection
|
|
242
|
+
retry if reused && attempts < 2
|
|
243
|
+
raise
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Checked outside the rescue on purpose: a non-2xx is an application-level error on a
|
|
247
|
+
# healthy connection, so it must NOT trigger the reconnect-retry above (that would hammer
|
|
248
|
+
# an erroring drain on a perfectly good socket). Raising here records a failure instead.
|
|
249
|
+
ensure_success!(response)
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Net::HTTP does not raise on 4xx/5xx; it returns the response. Treat any non-2xx as a
|
|
254
|
+
# failed delivery so send_batch records it and the circuit can open. Without this a drain
|
|
255
|
+
# that is up but answering 500/413 would look like success and we'd silently drop every batch.
|
|
256
|
+
def ensure_success!(response)
|
|
257
|
+
return if response.is_a?(Net::HTTPSuccess)
|
|
258
|
+
|
|
259
|
+
raise "drain responded #{response.code} #{response.message}"
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def build_request(body)
|
|
263
|
+
# request_uri (not path): it defaults to "/" when the drain URL has no path — Net::HTTP::Post.new("")
|
|
264
|
+
# raises "HTTP request path is empty" — and it carries any query string (e.g. ?token=...) along.
|
|
265
|
+
request = Net::HTTP::Post.new(@uri.request_uri)
|
|
266
|
+
request.body = body
|
|
267
|
+
request.content_type = "text/plain"
|
|
268
|
+
request
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Lazily opens and memoizes the connection. Only assigns @http once #start succeeds, so a
|
|
272
|
+
# failed connect leaves @http nil and the next send starts clean. Caller holds @http_mutex.
|
|
273
|
+
def connection
|
|
274
|
+
return @http if @http
|
|
275
|
+
|
|
276
|
+
http = Net::HTTP.new(@uri.host, @uri.port)
|
|
277
|
+
http.use_ssl = (@uri.scheme == "https")
|
|
278
|
+
http.read_timeout = @read_timeout
|
|
279
|
+
http.open_timeout = @open_timeout
|
|
280
|
+
http.keep_alive_timeout = @keep_alive_timeout
|
|
281
|
+
http.start
|
|
282
|
+
@http = http
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Caller holds @http_mutex, or no other thread can touch @http (shutdown after the sender
|
|
286
|
+
# thread has joined).
|
|
287
|
+
def close_connection
|
|
288
|
+
@http&.finish
|
|
289
|
+
rescue IOError
|
|
290
|
+
# already closed
|
|
291
|
+
ensure
|
|
292
|
+
@http = nil
|
|
293
|
+
end
|
|
294
|
+
|
|
166
295
|
def circuit_open?
|
|
167
296
|
@mutex.synchronize { !@circuit_open_until.nil? && Time.now < @circuit_open_until }
|
|
168
297
|
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "securerandom"
|
|
5
|
+
|
|
6
|
+
module Onlylogs
|
|
7
|
+
# A bounded, on-disk overflow buffer for log batches that could not be delivered.
|
|
8
|
+
#
|
|
9
|
+
# HttpLogger keeps the happy path in memory: only when a send fails or the circuit is open does
|
|
10
|
+
# a batch get written here, to be replayed once the drain recovers (and on the next boot).
|
|
11
|
+
# This turns transient-failure / restart data loss into at-least-once delivery: a batch that was in
|
|
12
|
+
# fact received but whose response was lost will be replayed and show up as a duplicate
|
|
13
|
+
# downstream. Duplicates are an accepted trade for not losing data.
|
|
14
|
+
class Spool
|
|
15
|
+
DEFAULT_MAX_BYTES = 128 * 1024 * 1024 # 128 MB
|
|
16
|
+
|
|
17
|
+
def initialize(dir:, max_bytes: DEFAULT_MAX_BYTES)
|
|
18
|
+
@dir = dir
|
|
19
|
+
@max_bytes = max_bytes
|
|
20
|
+
# Unique per instance so two runs (even with a reused pid) never collide on a filename.
|
|
21
|
+
@token = SecureRandom.hex(4)
|
|
22
|
+
@seq = 0
|
|
23
|
+
@mutex = Mutex.new
|
|
24
|
+
::FileUtils.mkdir_p(@dir)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Persist a batch body. Rolls the oldest batches off first if the byte cap would be exceeded.
|
|
28
|
+
def write(body)
|
|
29
|
+
return if body.nil? || body.empty?
|
|
30
|
+
|
|
31
|
+
@mutex.synchronize do
|
|
32
|
+
evict(body.bytesize)
|
|
33
|
+
seq = (@seq += 1)
|
|
34
|
+
final = ::File.join(@dir, "#{@token}-#{format("%09d", seq)}.batch")
|
|
35
|
+
tmp = "#{final}.tmp"
|
|
36
|
+
# Write to a temp name then rename: rename is atomic, so replay never reads a
|
|
37
|
+
# half-written file (it only globs *.batch).
|
|
38
|
+
::File.binwrite(tmp, body)
|
|
39
|
+
::File.rename(tmp, final)
|
|
40
|
+
end
|
|
41
|
+
rescue => e
|
|
42
|
+
Kernel.warn "Onlylogs::Spool write error: #{e.class}: #{e.message}"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Replay pending batches oldest-first. Yields each body; if the block returns truthy the file
|
|
46
|
+
# is deleted (delivered), otherwise replay stops and the remaining files are kept for later.
|
|
47
|
+
def replay
|
|
48
|
+
pending_files.each do |path|
|
|
49
|
+
body = read(path)
|
|
50
|
+
next if body.nil? # already claimed/deleted by another process
|
|
51
|
+
|
|
52
|
+
break unless yield(body)
|
|
53
|
+
|
|
54
|
+
delete(path)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def empty?
|
|
59
|
+
pending_files.empty?
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
# Oldest-first. mtime is the primary key; the zero-padded sequence in the filename breaks
|
|
65
|
+
# ties (and preserves per-process write order when mtimes collide at coarse FS resolution).
|
|
66
|
+
def pending_files
|
|
67
|
+
::Dir.glob(::File.join(@dir, "*.batch")).sort_by { |path| [mtime(path), path] }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def mtime(path)
|
|
71
|
+
::File.mtime(path)
|
|
72
|
+
rescue Errno::ENOENT
|
|
73
|
+
Time.at(0)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def read(path)
|
|
77
|
+
::File.binread(path)
|
|
78
|
+
rescue Errno::ENOENT
|
|
79
|
+
nil
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def delete(path)
|
|
83
|
+
::File.delete(path)
|
|
84
|
+
rescue Errno::ENOENT
|
|
85
|
+
nil
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Delete oldest batches until `incoming` more bytes fit under the cap.
|
|
89
|
+
def evict(incoming)
|
|
90
|
+
files = pending_files
|
|
91
|
+
total = files.sum { |path| size(path) }
|
|
92
|
+
|
|
93
|
+
while total + incoming > @max_bytes && (oldest = files.shift)
|
|
94
|
+
total -= size(oldest)
|
|
95
|
+
delete(oldest)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def size(path)
|
|
100
|
+
::File.size(path)
|
|
101
|
+
rescue Errno::ENOENT
|
|
102
|
+
0
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
data/lib/onlylogs/version.rb
CHANGED
data/lib/onlylogs.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: onlylogs
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Alessandro Rodi
|
|
@@ -82,6 +82,7 @@ files:
|
|
|
82
82
|
- lib/onlylogs/http_logger.rb
|
|
83
83
|
- lib/onlylogs/logger.rb
|
|
84
84
|
- lib/onlylogs/socket_logger.rb
|
|
85
|
+
- lib/onlylogs/spool.rb
|
|
85
86
|
- lib/onlylogs/version.rb
|
|
86
87
|
- lib/puma/plugin/onlylogs_sidecar.rb
|
|
87
88
|
- lib/tasks/onlylogs_tasks.rake
|