onlylogs 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7340d11d402202a753fa7a6426c6e7c3338af2b53779951aced223420e250e8c
4
- data.tar.gz: 6be0ff24e762d27d8929a8cd177b53c0140a181bd5717c803858fcc72436074f
3
+ metadata.gz: f27d7205791730383562092985355eb068e2568ff617ef7d050966347ebc54df
4
+ data.tar.gz: 73b44d11e108d5a030134eeba81fe2f40b49442ef7ceba388d16c3348828d7da
5
5
  SHA512:
6
- metadata.gz: 90a6de7fd76c95f4958955f2542a31caaeaafa970ff09b99452a25abb14cb0e801728139d0551056c4d371bb0d53e39a6b80b000e22eea17527a26dc68e2d054
7
- data.tar.gz: 57e3b9e01bcc10fabeef0a3c11cdccf75f01f6b05843922e678c407db7239cce7d0095408df63017459ec929fa8670f60dae38dff614726f5e47c67ce6d65a43
6
+ metadata.gz: 7e9f5954ab6144be0f8886503b6a1bb6ece1926fa01ea3275e2bf6523b68ac9334025c41d5bf6ef5aca79ced1cab5d33d548c3cd1a79ac248b0f8dcb7048ed33
7
+ data.tar.gz: 986ba53d82a32a5c96ec65a029c543bb0b6123a8e94eb83654bfa6ffbae93327fc8bf295604b0f14faf81042fa358faaa112bac69d54c98b6c254fa258aa193f
@@ -2,38 +2,85 @@
2
2
 
3
3
  require "net/http"
4
4
  require "uri"
5
+ require_relative "spool"
5
6
 
6
7
  # This logger sends messages to onlylogs.io (or any Vector-compatible sink) directly via HTTP.
7
8
  # Unlike SocketLogger, it does not require a sidecar process or Puma plugin,
8
9
  # so it works from any process: Puma, GoodJob, Sidekiq, rake tasks, migrations, etc.
9
10
 
11
+ # When the drain is unreachable or unresponsive, we do two things to protect the app:
12
+ # * an upper bound to the in-memory queue: log lines can never accumulate without limit and
13
+ # exhaust memory
14
+ # * cooldown: once the drain is known to be failing we stop attempting
15
+ # requests for a cooldown period instead of blocking on every send for the full
16
+ # read timeout (a down host accepts the TCP/TLS connection but never answers).
17
+ #
18
+ # By default an on-disk Spool buffers any batch we could not deliver and replays it once the
19
+ # drain recovers, so a transient outage or a restart does not lose logs. It is on by default
20
+ # (set ONLYLOGS_SPOOL_DIR empty to disable) and bounded by bytes; see Onlylogs::Spool.
10
21
  module Onlylogs
11
22
  class HttpLogger < Onlylogs::Logger
12
23
  DEFAULT_BATCH_SIZE = 100
13
24
  DEFAULT_FLUSH_INTERVAL = 0.5
25
+ DEFAULT_MAX_QUEUE_SIZE = 10_000
26
+
27
+ # Keep timeouts short: a single slow/dead drain must never stall the app for long.
28
+ DEFAULT_OPEN_TIMEOUT = 0.5
29
+ DEFAULT_READ_TIMEOUT = 0.5
30
+
31
+ # How long Net::HTTP may keep an idle connection around for reuse. Comfortably longer than
32
+ # the default flush interval so normal traffic reuses one connection across many batches.
33
+ DEFAULT_KEEP_ALIVE_TIMEOUT = 30
34
+
35
+ # Open the circuit after this many consecutive failed sends
36
+ CIRCUIT_FAILURE_THRESHOLD = 3
37
+ # ...and keep it open for this long once it is open.
38
+ CIRCUIT_COOLDOWN = 30
14
39
 
15
40
  def initialize(
16
41
  local_fallback: $stdout,
17
42
  drain_url: ENV["ONLYLOGS_DRAIN_URL"],
18
43
  batch_size: ENV.fetch("ONLYLOGS_BATCH_SIZE", DEFAULT_BATCH_SIZE).to_i,
19
- flush_interval: ENV.fetch("ONLYLOGS_FLUSH_INTERVAL", DEFAULT_FLUSH_INTERVAL).to_f
44
+ flush_interval: ENV.fetch("ONLYLOGS_FLUSH_INTERVAL", DEFAULT_FLUSH_INTERVAL).to_f,
45
+ max_queue_size: ENV.fetch("ONLYLOGS_MAX_QUEUE_SIZE", DEFAULT_MAX_QUEUE_SIZE).to_i,
46
+ open_timeout: ENV.fetch("ONLYLOGS_OPEN_TIMEOUT", DEFAULT_OPEN_TIMEOUT).to_f,
47
+ read_timeout: ENV.fetch("ONLYLOGS_READ_TIMEOUT", DEFAULT_READ_TIMEOUT).to_f,
48
+ circuit_cooldown: ENV.fetch("ONLYLOGS_CIRCUIT_COOLDOWN", CIRCUIT_COOLDOWN).to_f,
49
+ keep_alive_timeout: ENV.fetch("ONLYLOGS_KEEP_ALIVE_TIMEOUT", DEFAULT_KEEP_ALIVE_TIMEOUT).to_f,
50
+ spool_dir: ENV.fetch("ONLYLOGS_SPOOL_DIR", default_spool_dir),
51
+ spool_max_bytes: ENV.fetch("ONLYLOGS_SPOOL_MAX_BYTES", Spool::DEFAULT_MAX_BYTES).to_i
20
52
  )
21
53
  super(local_fallback)
22
54
  @drain_url = drain_url
55
+ @uri = URI.parse(drain_url) if drain_url
23
56
  @batch_size = batch_size
24
57
  @flush_interval = flush_interval
58
+ @max_queue_size = max_queue_size
59
+ @open_timeout = open_timeout
60
+ @read_timeout = read_timeout
61
+ @circuit_cooldown = circuit_cooldown
62
+ @keep_alive_timeout = keep_alive_timeout
25
63
  @queue = Queue.new
26
64
  @mutex = Mutex.new
65
+ @http_mutex = Mutex.new
66
+ @http = nil
67
+ @spool = nil
68
+
69
+ @consecutive_failures = 0
70
+ @circuit_open_until = nil
71
+ @dropped = 0
27
72
 
28
73
  if @drain_url
74
+ @spool = build_spool(spool_dir, spool_max_bytes)
29
75
  start_sender
30
76
  else
31
- $stderr.puts "Onlylogs::HttpLogger error: ONLYLOGS_DRAIN_URL is not set; logger is disabled." # rubocop:disable Style/StderrPuts
77
+ $stderr.puts "Onlylogs::HttpLogger: ONLYLOGS_DRAIN_URL is not set; logging locally only." # rubocop:disable Style/StderrPuts
32
78
  end
33
79
  end
34
80
 
35
81
  def add(severity, message = nil, progname = nil, &block)
36
- return true unless @drain_url
82
+ # No drain configured: behave as a plain local logger instead of dropping everything.
83
+ return super unless @drain_url
37
84
 
38
85
  if message.nil?
39
86
  if block_given?
@@ -45,7 +92,7 @@ module Onlylogs
45
92
  end
46
93
 
47
94
  formatted = format_message(format_severity(severity), Time.now, progname, message.to_s)
48
- @queue << formatted.chomp if formatted && @drain_url
95
+ enqueue(formatted.chomp) if formatted
49
96
  super
50
97
  end
51
98
 
@@ -53,6 +100,7 @@ module Onlylogs
53
100
  flush
54
101
  @running = false
55
102
  @sender_thread&.join(2)
103
+ close_connection
56
104
  end
57
105
 
58
106
  def flush
@@ -62,10 +110,25 @@ module Onlylogs
62
110
 
63
111
  private
64
112
 
113
+ # Push a line onto the queue unless it is full. Dropping is intentional: blocking the
114
+ # caller (a request thread) or growing without bound (OOM) are both worse than losing
115
+ # logs while the drain is unavailable.
116
+ def enqueue(line)
117
+ if @queue.size >= @max_queue_size
118
+ @mutex.synchronize { @dropped += 1 }
119
+ return
120
+ end
121
+
122
+ @queue << line
123
+ end
124
+
65
125
  def start_sender
66
126
  @running = true
67
127
 
68
128
  @sender_thread = Thread.new do
129
+ # Replay anything left in the spool by a previous run or a crashed/redeployed sibling.
130
+ drain_spool
131
+
69
132
  batch = []
70
133
  last_flush = Time.now
71
134
 
@@ -103,19 +166,166 @@ module Onlylogs
103
166
  def send_batch(lines)
104
167
  return if lines.empty?
105
168
 
106
- uri = URI.parse(@drain_url)
107
- http = Net::HTTP.new(uri.host, uri.port)
108
- http.use_ssl = (uri.scheme == "https")
109
- http.read_timeout = 5
110
- http.open_timeout = 2
169
+ body = lines.join("\n")
111
170
 
112
- request = Net::HTTP::Post.new(uri.path)
113
- request.body = lines.join("\n")
114
- request.content_type = "text/plain"
171
+ # Drain is known to be down: skip the request entirely so we don't block for the full read
172
+ # timeout on every batch. Buffer the batch so the cooldown does not cost us data (without a
173
+ # spool configured, spool_write is a no-op and the batch is dropped — best-effort logging).
174
+ if circuit_open?
175
+ spool_write(body)
176
+ return
177
+ end
115
178
 
116
- http.start { |h| h.request(request) }
179
+ deliver(body)
180
+ record_success
181
+ # The drain just answered: replay anything we had buffered while it was unavailable.
182
+ drain_spool
117
183
  rescue => e
118
- warn "Onlylogs::HttpLogger error: #{e.class}: #{e.message}"
184
+ record_failure
185
+ spool_write(body)
186
+ Kernel.warn "Onlylogs::HttpLogger error: #{e.class}: #{e.message}"
187
+ end
188
+
189
+ def spool_write(body)
190
+ @spool&.write(body)
191
+ end
192
+
193
+ # Replay buffered batches now that the drain is responding. Oldest first; stop at the first
194
+ # failure (record it and leave the rest on disk) so a drain that just went down again does not
195
+ # burn the whole backlog into the void.
196
+ def drain_spool
197
+ return unless @spool
198
+
199
+ @spool.replay do |body|
200
+ deliver(body)
201
+ record_success
202
+ true
203
+ rescue => e
204
+ record_failure
205
+ Kernel.warn "Onlylogs::HttpLogger replay error: #{e.class}: #{e.message}"
206
+ false
207
+ end
208
+ end
209
+
210
+ def build_spool(dir, max_bytes)
211
+ return if dir.nil? || dir.to_s.strip.empty?
212
+
213
+ Spool.new(dir: dir, max_bytes: max_bytes)
214
+ rescue => e
215
+ Kernel.warn "Onlylogs::HttpLogger: spool disabled (#{e.class}: #{e.message})"
216
+ nil
217
+ end
218
+
219
+ # The spool is on by default. It lives under the app's tmp dir, which survives a drain outage
220
+ # while the app keeps running; point ONLYLOGS_SPOOL_DIR at a persistent volume to also survive
221
+ # redeploys, or set it empty to disable.
222
+ def default_spool_dir
223
+ base = if defined?(Rails) && Rails.respond_to?(:root) && Rails.root
224
+ Rails.root.to_s
225
+ else
226
+ ::Dir.pwd
227
+ end
228
+
229
+ ::File.join(base, "tmp", "onlylogs", "spool")
230
+ end
231
+
232
+ # POST the body over a persistent (kept-alive) connection.
233
+ def deliver(body)
234
+ @http_mutex.synchronize do
235
+ attempts = 0
236
+ response = begin
237
+ attempts += 1
238
+ reused = !@http.nil?
239
+ connection.request(build_request(body))
240
+ rescue
241
+ close_connection
242
+ retry if reused && attempts < 2
243
+ raise
244
+ end
245
+
246
+ # Checked outside the rescue on purpose: a non-2xx is an application-level error on a
247
+ # healthy connection, so it must NOT trigger the reconnect-retry above (that would hammer
248
+ # an erroring drain on a perfectly good socket). Raising here records a failure instead.
249
+ ensure_success!(response)
250
+ end
251
+ end
252
+
253
+ # Net::HTTP does not raise on 4xx/5xx; it returns the response. Treat any non-2xx as a
254
+ # failed delivery so send_batch records it and the circuit can open. Without this a drain
255
+ # that is up but answering 500/413 would look like success and we'd silently drop every batch.
256
+ def ensure_success!(response)
257
+ return if response.is_a?(Net::HTTPSuccess)
258
+
259
+ raise "drain responded #{response.code} #{response.message}"
260
+ end
261
+
262
+ def build_request(body)
263
+ # request_uri (not path): it defaults to "/" when the drain URL has no path — Net::HTTP::Post.new("")
264
+ # raises "HTTP request path is empty" — and it carries any query string (e.g. ?token=...) along.
265
+ request = Net::HTTP::Post.new(@uri.request_uri)
266
+ request.body = body
267
+ request.content_type = "text/plain"
268
+ request
269
+ end
270
+
271
+ # Lazily opens and memoizes the connection. Only assigns @http once #start succeeds, so a
272
+ # failed connect leaves @http nil and the next send starts clean. Caller holds @http_mutex.
273
+ def connection
274
+ return @http if @http
275
+
276
+ http = Net::HTTP.new(@uri.host, @uri.port)
277
+ http.use_ssl = (@uri.scheme == "https")
278
+ http.read_timeout = @read_timeout
279
+ http.open_timeout = @open_timeout
280
+ http.keep_alive_timeout = @keep_alive_timeout
281
+ http.start
282
+ @http = http
283
+ end
284
+
285
+ # Caller holds @http_mutex, or no other thread can touch @http (shutdown after the sender
286
+ # thread has joined).
287
+ def close_connection
288
+ @http&.finish
289
+ rescue IOError
290
+ # already closed
291
+ ensure
292
+ @http = nil
293
+ end
294
+
295
+ def circuit_open?
296
+ @mutex.synchronize { !@circuit_open_until.nil? && Time.now < @circuit_open_until }
297
+ end
298
+
299
+ def record_success
300
+ @mutex.synchronize do
301
+ @consecutive_failures = 0
302
+ @circuit_open_until = nil
303
+ end
304
+ end
305
+
306
+ def record_failure
307
+ opened = false
308
+ dropped = 0
309
+
310
+ @mutex.synchronize do
311
+ @consecutive_failures += 1
312
+ next if @consecutive_failures < CIRCUIT_FAILURE_THRESHOLD
313
+
314
+ # (Re)open the circuit. record_failure only runs on a real send attempt — send_batch
315
+ # short-circuits while the circuit is open — so reaching here always means the drain
316
+ # is still down and we should pause again (this is how recovery retries every cooldown).
317
+ @circuit_open_until = Time.now + @circuit_cooldown
318
+ opened = true
319
+ dropped = @dropped
320
+ @dropped = 0
321
+ end
322
+
323
+ # Warn outside the mutex:
324
+ # doing it inside the lock would re-enter @mutex through add -> enqueue and raise a recursive-lock error.
325
+ return unless opened
326
+
327
+ suffix = dropped.positive? ? " (#{dropped} log lines dropped)" : ""
328
+ Kernel.warn "Onlylogs::HttpLogger: drain unavailable, pausing for #{@circuit_cooldown}s#{suffix}"
119
329
  end
120
330
  end
121
331
  end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fileutils"
4
+ require "securerandom"
5
+
6
+ module Onlylogs
7
+ # A bounded, on-disk overflow buffer for log batches that could not be delivered.
8
+ #
9
+ # HttpLogger keeps the happy path in memory: only when a send fails or the circuit is open does
10
+ # a batch get written here, to be replayed once the drain recovers (and on the next boot).
11
+ # This turns transient-failure / restart data loss into at-least-once delivery: a batch that was in
12
+ # fact received but whose response was lost will be replayed and show up as a duplicate
13
+ # downstream. Duplicates are an accepted trade for not losing data.
14
+ class Spool
15
+ DEFAULT_MAX_BYTES = 128 * 1024 * 1024 # 128 MB
16
+
17
+ def initialize(dir:, max_bytes: DEFAULT_MAX_BYTES)
18
+ @dir = dir
19
+ @max_bytes = max_bytes
20
+ # Unique per instance so two runs (even with a reused pid) never collide on a filename.
21
+ @token = SecureRandom.hex(4)
22
+ @seq = 0
23
+ @mutex = Mutex.new
24
+ ::FileUtils.mkdir_p(@dir)
25
+ end
26
+
27
+ # Persist a batch body. Rolls the oldest batches off first if the byte cap would be exceeded.
28
+ def write(body)
29
+ return if body.nil? || body.empty?
30
+
31
+ @mutex.synchronize do
32
+ evict(body.bytesize)
33
+ seq = (@seq += 1)
34
+ final = ::File.join(@dir, "#{@token}-#{format("%09d", seq)}.batch")
35
+ tmp = "#{final}.tmp"
36
+ # Write to a temp name then rename: rename is atomic, so replay never reads a
37
+ # half-written file (it only globs *.batch).
38
+ ::File.binwrite(tmp, body)
39
+ ::File.rename(tmp, final)
40
+ end
41
+ rescue => e
42
+ Kernel.warn "Onlylogs::Spool write error: #{e.class}: #{e.message}"
43
+ end
44
+
45
+ # Replay pending batches oldest-first. Yields each body; if the block returns truthy the file
46
+ # is deleted (delivered), otherwise replay stops and the remaining files are kept for later.
47
+ def replay
48
+ pending_files.each do |path|
49
+ body = read(path)
50
+ next if body.nil? # already claimed/deleted by another process
51
+
52
+ break unless yield(body)
53
+
54
+ delete(path)
55
+ end
56
+ end
57
+
58
+ def empty?
59
+ pending_files.empty?
60
+ end
61
+
62
+ private
63
+
64
+ # Oldest-first. mtime is the primary key; the zero-padded sequence in the filename breaks
65
+ # ties (and preserves per-process write order when mtimes collide at coarse FS resolution).
66
+ def pending_files
67
+ ::Dir.glob(::File.join(@dir, "*.batch")).sort_by { |path| [mtime(path), path] }
68
+ end
69
+
70
+ def mtime(path)
71
+ ::File.mtime(path)
72
+ rescue Errno::ENOENT
73
+ Time.at(0)
74
+ end
75
+
76
+ def read(path)
77
+ ::File.binread(path)
78
+ rescue Errno::ENOENT
79
+ nil
80
+ end
81
+
82
+ def delete(path)
83
+ ::File.delete(path)
84
+ rescue Errno::ENOENT
85
+ nil
86
+ end
87
+
88
+ # Delete oldest batches until `incoming` more bytes fit under the cap.
89
+ def evict(incoming)
90
+ files = pending_files
91
+ total = files.sum { |path| size(path) }
92
+
93
+ while total + incoming > @max_bytes && (oldest = files.shift)
94
+ total -= size(oldest)
95
+ delete(oldest)
96
+ end
97
+ end
98
+
99
+ def size(path)
100
+ ::File.size(path)
101
+ rescue Errno::ENOENT
102
+ 0
103
+ end
104
+ end
105
+ end
@@ -1,3 +1,3 @@
1
1
  module Onlylogs
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.3"
3
3
  end
data/lib/onlylogs.rb CHANGED
@@ -3,6 +3,7 @@ require "onlylogs/configuration"
3
3
  require "onlylogs/engine"
4
4
  require "onlylogs/formatter"
5
5
  require "onlylogs/logger"
6
+ require "onlylogs/spool"
6
7
  require "onlylogs/socket_logger"
7
8
  require "onlylogs/http_logger"
8
9
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onlylogs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alessandro Rodi
@@ -82,6 +82,7 @@ files:
82
82
  - lib/onlylogs/http_logger.rb
83
83
  - lib/onlylogs/logger.rb
84
84
  - lib/onlylogs/socket_logger.rb
85
+ - lib/onlylogs/spool.rb
85
86
  - lib/onlylogs/version.rb
86
87
  - lib/puma/plugin/onlylogs_sidecar.rb
87
88
  - lib/tasks/onlylogs_tasks.rake