onlylogs 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7340d11d402202a753fa7a6426c6e7c3338af2b53779951aced223420e250e8c
4
- data.tar.gz: 6be0ff24e762d27d8929a8cd177b53c0140a181bd5717c803858fcc72436074f
3
+ metadata.gz: 43e963f2acccbac4061747abd2606eb298b7ea138404f6e726e03c7a5ffacb4e
4
+ data.tar.gz: c9ee6fb1e854dffddb634be9153d285c8db88836b8a2f7046b63f0a8f0734cfa
5
5
  SHA512:
6
- metadata.gz: 90a6de7fd76c95f4958955f2542a31caaeaafa970ff09b99452a25abb14cb0e801728139d0551056c4d371bb0d53e39a6b80b000e22eea17527a26dc68e2d054
7
- data.tar.gz: 57e3b9e01bcc10fabeef0a3c11cdccf75f01f6b05843922e678c407db7239cce7d0095408df63017459ec929fa8670f60dae38dff614726f5e47c67ce6d65a43
6
+ metadata.gz: 0cf479d9e32a82adb41090a4e4525c4f237e75f7ab52485ac29ce8db5cfaf77fada2434dc675078afabf660ee4d3bf1470f916c13976a3cc11ca5823eb041bbe
7
+ data.tar.gz: 955aa49574feaa833e135a853145858961514ebe4c4a093a3758410b1ad91c5aaa6218f09c4944b29360c0419f9bddd738d4c25b48e99e70b882e95b56b2be60
@@ -7,24 +7,52 @@ require "uri"
7
7
  # Unlike SocketLogger, it does not require a sidecar process or Puma plugin,
8
8
  # so it works from any process: Puma, GoodJob, Sidekiq, rake tasks, migrations, etc.
9
9
 
10
+ # When the drain is unreachable or unresponsive, we do two things to protect the app:
11
+ # * an upper bound to the in-memory queue: log lines can never accumulate without limit and
12
+ # exhaust memory
13
+ # * cooldown: once the drain is known to be failing we stop attempting
14
+ # requests for a cooldown period instead of blocking on every send for the full
15
+ # read timeout (a down host accepts the TCP/TLS connection but never answers).
10
16
  module Onlylogs
11
17
  class HttpLogger < Onlylogs::Logger
12
18
  DEFAULT_BATCH_SIZE = 100
13
19
  DEFAULT_FLUSH_INTERVAL = 0.5
20
+ DEFAULT_MAX_QUEUE_SIZE = 10_000
21
+
22
+ # Keep timeouts short: a single slow/dead drain must never stall the app for long.
23
+ DEFAULT_OPEN_TIMEOUT = 0.5
24
+ DEFAULT_READ_TIMEOUT = 0.5
25
+
26
+ # Open the circuit after this many consecutive failed sends
27
+ CIRCUIT_FAILURE_THRESHOLD = 3
28
+ # ...and keep it open for this long once it is open.
29
+ CIRCUIT_COOLDOWN = 30
14
30
 
15
31
  def initialize(
16
32
  local_fallback: $stdout,
17
33
  drain_url: ENV["ONLYLOGS_DRAIN_URL"],
18
34
  batch_size: ENV.fetch("ONLYLOGS_BATCH_SIZE", DEFAULT_BATCH_SIZE).to_i,
19
- flush_interval: ENV.fetch("ONLYLOGS_FLUSH_INTERVAL", DEFAULT_FLUSH_INTERVAL).to_f
35
+ flush_interval: ENV.fetch("ONLYLOGS_FLUSH_INTERVAL", DEFAULT_FLUSH_INTERVAL).to_f,
36
+ max_queue_size: ENV.fetch("ONLYLOGS_MAX_QUEUE_SIZE", DEFAULT_MAX_QUEUE_SIZE).to_i,
37
+ open_timeout: ENV.fetch("ONLYLOGS_OPEN_TIMEOUT", DEFAULT_OPEN_TIMEOUT).to_f,
38
+ read_timeout: ENV.fetch("ONLYLOGS_READ_TIMEOUT", DEFAULT_READ_TIMEOUT).to_f,
39
+ circuit_cooldown: ENV.fetch("ONLYLOGS_CIRCUIT_COOLDOWN", CIRCUIT_COOLDOWN).to_f
20
40
  )
21
41
  super(local_fallback)
22
42
  @drain_url = drain_url
23
43
  @batch_size = batch_size
24
44
  @flush_interval = flush_interval
45
+ @max_queue_size = max_queue_size
46
+ @open_timeout = open_timeout
47
+ @read_timeout = read_timeout
48
+ @circuit_cooldown = circuit_cooldown
25
49
  @queue = Queue.new
26
50
  @mutex = Mutex.new
27
51
 
52
+ @consecutive_failures = 0
53
+ @circuit_open_until = nil
54
+ @dropped = 0
55
+
28
56
  if @drain_url
29
57
  start_sender
30
58
  else
@@ -45,7 +73,7 @@ module Onlylogs
45
73
  end
46
74
 
47
75
  formatted = format_message(format_severity(severity), Time.now, progname, message.to_s)
48
- @queue << formatted.chomp if formatted && @drain_url
76
+ enqueue(formatted.chomp) if formatted
49
77
  super
50
78
  end
51
79
 
@@ -62,6 +90,18 @@ module Onlylogs
62
90
 
63
91
  private
64
92
 
93
+ # Push a line onto the queue unless it is full. Dropping is intentional: blocking the
94
+ # caller (a request thread) or growing without bound (OOM) are both worse than losing
95
+ # logs while the drain is unavailable.
96
+ def enqueue(line)
97
+ if @queue.size >= @max_queue_size
98
+ @mutex.synchronize { @dropped += 1 }
99
+ return
100
+ end
101
+
102
+ @queue << line
103
+ end
104
+
65
105
  def start_sender
66
106
  @running = true
67
107
 
@@ -102,20 +142,61 @@ module Onlylogs
102
142
 
103
143
  def send_batch(lines)
104
144
  return if lines.empty?
145
+ # Drain is known to be down: skip the request entirely so we don't block for the
146
+ # full read timeout on every batch. The lines are dropped (best-effort logging).
147
+ return if circuit_open?
105
148
 
106
149
  uri = URI.parse(@drain_url)
107
150
  http = Net::HTTP.new(uri.host, uri.port)
108
151
  http.use_ssl = (uri.scheme == "https")
109
- http.read_timeout = 5
110
- http.open_timeout = 2
152
+ http.read_timeout = @read_timeout
153
+ http.open_timeout = @open_timeout
111
154
 
112
155
  request = Net::HTTP::Post.new(uri.path)
113
156
  request.body = lines.join("\n")
114
157
  request.content_type = "text/plain"
115
158
 
116
159
  http.start { |h| h.request(request) }
160
+ record_success
117
161
  rescue => e
118
- warn "Onlylogs::HttpLogger error: #{e.class}: #{e.message}"
162
+ record_failure
163
+ Kernel.warn "Onlylogs::HttpLogger error: #{e.class}: #{e.message}"
164
+ end
165
+
166
+ def circuit_open?
167
+ @mutex.synchronize { !@circuit_open_until.nil? && Time.now < @circuit_open_until }
168
+ end
169
+
170
+ def record_success
171
+ @mutex.synchronize do
172
+ @consecutive_failures = 0
173
+ @circuit_open_until = nil
174
+ end
175
+ end
176
+
177
+ def record_failure
178
+ opened = false
179
+ dropped = 0
180
+
181
+ @mutex.synchronize do
182
+ @consecutive_failures += 1
183
+ next if @consecutive_failures < CIRCUIT_FAILURE_THRESHOLD
184
+
185
+ # (Re)open the circuit. record_failure only runs on a real send attempt — send_batch
186
+ # short-circuits while the circuit is open — so reaching here always means the drain
187
+ # is still down and we should pause again (this is how recovery retries every cooldown).
188
+ @circuit_open_until = Time.now + @circuit_cooldown
189
+ opened = true
190
+ dropped = @dropped
191
+ @dropped = 0
192
+ end
193
+
194
+ # Warn outside the mutex:
195
+ # doing it inside the lock would re-enter @mutex through add -> enqueue and raise a recursive-lock error.
196
+ return unless opened
197
+
198
+ suffix = dropped.positive? ? " (#{dropped} log lines dropped)" : ""
199
+ Kernel.warn "Onlylogs::HttpLogger: drain unavailable, pausing for #{@circuit_cooldown}s#{suffix}"
119
200
  end
120
201
  end
121
202
  end
@@ -1,3 +1,3 @@
1
1
  module Onlylogs
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onlylogs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alessandro Rodi