onlylogs 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/onlylogs/http_logger.rb +86 -5
- data/lib/onlylogs/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 43e963f2acccbac4061747abd2606eb298b7ea138404f6e726e03c7a5ffacb4e
|
|
4
|
+
data.tar.gz: c9ee6fb1e854dffddb634be9153d285c8db88836b8a2f7046b63f0a8f0734cfa
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0cf479d9e32a82adb41090a4e4525c4f237e75f7ab52485ac29ce8db5cfaf77fada2434dc675078afabf660ee4d3bf1470f916c13976a3cc11ca5823eb041bbe
|
|
7
|
+
data.tar.gz: 955aa49574feaa833e135a853145858961514ebe4c4a093a3758410b1ad91c5aaa6218f09c4944b29360c0419f9bddd738d4c25b48e99e70b882e95b56b2be60
|
data/lib/onlylogs/http_logger.rb
CHANGED
|
@@ -7,24 +7,52 @@ require "uri"
|
|
|
7
7
|
# Unlike SocketLogger, it does not require a sidecar process or Puma plugin,
|
|
8
8
|
# so it works from any process: Puma, GoodJob, Sidekiq, rake tasks, migrations, etc.
|
|
9
9
|
|
|
10
|
+
# When the drain is unreachable or unresponsive, we do two things to protect the app:
|
|
11
|
+
# * an upper bound to the in-memory queue: log lines can never accumulate without limit and
|
|
12
|
+
# exhaust memory
|
|
13
|
+
# * cooldown: once the drain is known to be failing we stop attempting
|
|
14
|
+
# requests for a cooldown period instead of blocking on every send for the full
|
|
15
|
+
# read timeout (a down host accepts the TCP/TLS connection but never answers).
|
|
10
16
|
module Onlylogs
|
|
11
17
|
class HttpLogger < Onlylogs::Logger
|
|
12
18
|
DEFAULT_BATCH_SIZE = 100
|
|
13
19
|
DEFAULT_FLUSH_INTERVAL = 0.5
|
|
20
|
+
DEFAULT_MAX_QUEUE_SIZE = 10_000
|
|
21
|
+
|
|
22
|
+
# Keep timeouts short: a single slow/dead drain must never stall the app for long.
|
|
23
|
+
DEFAULT_OPEN_TIMEOUT = 0.5
|
|
24
|
+
DEFAULT_READ_TIMEOUT = 0.5
|
|
25
|
+
|
|
26
|
+
# Open the circuit after this many consecutive failed sends
|
|
27
|
+
CIRCUIT_FAILURE_THRESHOLD = 3
|
|
28
|
+
# ...and keep it open for this long once it is open.
|
|
29
|
+
CIRCUIT_COOLDOWN = 30
|
|
14
30
|
|
|
15
31
|
def initialize(
|
|
16
32
|
local_fallback: $stdout,
|
|
17
33
|
drain_url: ENV["ONLYLOGS_DRAIN_URL"],
|
|
18
34
|
batch_size: ENV.fetch("ONLYLOGS_BATCH_SIZE", DEFAULT_BATCH_SIZE).to_i,
|
|
19
|
-
flush_interval: ENV.fetch("ONLYLOGS_FLUSH_INTERVAL", DEFAULT_FLUSH_INTERVAL).to_f
|
|
35
|
+
flush_interval: ENV.fetch("ONLYLOGS_FLUSH_INTERVAL", DEFAULT_FLUSH_INTERVAL).to_f,
|
|
36
|
+
max_queue_size: ENV.fetch("ONLYLOGS_MAX_QUEUE_SIZE", DEFAULT_MAX_QUEUE_SIZE).to_i,
|
|
37
|
+
open_timeout: ENV.fetch("ONLYLOGS_OPEN_TIMEOUT", DEFAULT_OPEN_TIMEOUT).to_f,
|
|
38
|
+
read_timeout: ENV.fetch("ONLYLOGS_READ_TIMEOUT", DEFAULT_READ_TIMEOUT).to_f,
|
|
39
|
+
circuit_cooldown: ENV.fetch("ONLYLOGS_CIRCUIT_COOLDOWN", CIRCUIT_COOLDOWN).to_f
|
|
20
40
|
)
|
|
21
41
|
super(local_fallback)
|
|
22
42
|
@drain_url = drain_url
|
|
23
43
|
@batch_size = batch_size
|
|
24
44
|
@flush_interval = flush_interval
|
|
45
|
+
@max_queue_size = max_queue_size
|
|
46
|
+
@open_timeout = open_timeout
|
|
47
|
+
@read_timeout = read_timeout
|
|
48
|
+
@circuit_cooldown = circuit_cooldown
|
|
25
49
|
@queue = Queue.new
|
|
26
50
|
@mutex = Mutex.new
|
|
27
51
|
|
|
52
|
+
@consecutive_failures = 0
|
|
53
|
+
@circuit_open_until = nil
|
|
54
|
+
@dropped = 0
|
|
55
|
+
|
|
28
56
|
if @drain_url
|
|
29
57
|
start_sender
|
|
30
58
|
else
|
|
@@ -45,7 +73,7 @@ module Onlylogs
|
|
|
45
73
|
end
|
|
46
74
|
|
|
47
75
|
formatted = format_message(format_severity(severity), Time.now, progname, message.to_s)
|
|
48
|
-
|
|
76
|
+
enqueue(formatted.chomp) if formatted
|
|
49
77
|
super
|
|
50
78
|
end
|
|
51
79
|
|
|
@@ -62,6 +90,18 @@ module Onlylogs
|
|
|
62
90
|
|
|
63
91
|
private
|
|
64
92
|
|
|
93
|
+
# Push a line onto the queue unless it is full. Dropping is intentional: blocking the
|
|
94
|
+
# caller (a request thread) or growing without bound (OOM) are both worse than losing
|
|
95
|
+
# logs while the drain is unavailable.
|
|
96
|
+
def enqueue(line)
|
|
97
|
+
if @queue.size >= @max_queue_size
|
|
98
|
+
@mutex.synchronize { @dropped += 1 }
|
|
99
|
+
return
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
@queue << line
|
|
103
|
+
end
|
|
104
|
+
|
|
65
105
|
def start_sender
|
|
66
106
|
@running = true
|
|
67
107
|
|
|
@@ -102,20 +142,61 @@ module Onlylogs
|
|
|
102
142
|
|
|
103
143
|
def send_batch(lines)
|
|
104
144
|
return if lines.empty?
|
|
145
|
+
# Drain is known to be down: skip the request entirely so we don't block for the
|
|
146
|
+
# full read timeout on every batch. The lines are dropped (best-effort logging).
|
|
147
|
+
return if circuit_open?
|
|
105
148
|
|
|
106
149
|
uri = URI.parse(@drain_url)
|
|
107
150
|
http = Net::HTTP.new(uri.host, uri.port)
|
|
108
151
|
http.use_ssl = (uri.scheme == "https")
|
|
109
|
-
http.read_timeout =
|
|
110
|
-
http.open_timeout =
|
|
152
|
+
http.read_timeout = @read_timeout
|
|
153
|
+
http.open_timeout = @open_timeout
|
|
111
154
|
|
|
112
155
|
request = Net::HTTP::Post.new(uri.path)
|
|
113
156
|
request.body = lines.join("\n")
|
|
114
157
|
request.content_type = "text/plain"
|
|
115
158
|
|
|
116
159
|
http.start { |h| h.request(request) }
|
|
160
|
+
record_success
|
|
117
161
|
rescue => e
|
|
118
|
-
|
|
162
|
+
record_failure
|
|
163
|
+
Kernel.warn "Onlylogs::HttpLogger error: #{e.class}: #{e.message}"
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def circuit_open?
|
|
167
|
+
@mutex.synchronize { !@circuit_open_until.nil? && Time.now < @circuit_open_until }
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def record_success
|
|
171
|
+
@mutex.synchronize do
|
|
172
|
+
@consecutive_failures = 0
|
|
173
|
+
@circuit_open_until = nil
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def record_failure
|
|
178
|
+
opened = false
|
|
179
|
+
dropped = 0
|
|
180
|
+
|
|
181
|
+
@mutex.synchronize do
|
|
182
|
+
@consecutive_failures += 1
|
|
183
|
+
next if @consecutive_failures < CIRCUIT_FAILURE_THRESHOLD
|
|
184
|
+
|
|
185
|
+
# (Re)open the circuit. record_failure only runs on a real send attempt — send_batch
|
|
186
|
+
# short-circuits while the circuit is open — so reaching here always means the drain
|
|
187
|
+
# is still down and we should pause again (this is how recovery retries every cooldown).
|
|
188
|
+
@circuit_open_until = Time.now + @circuit_cooldown
|
|
189
|
+
opened = true
|
|
190
|
+
dropped = @dropped
|
|
191
|
+
@dropped = 0
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Warn outside the mutex:
|
|
195
|
+
# doing it inside the lock would re-enter @mutex through add -> enqueue and raise a recursive-lock error.
|
|
196
|
+
return unless opened
|
|
197
|
+
|
|
198
|
+
suffix = dropped.positive? ? " (#{dropped} log lines dropped)" : ""
|
|
199
|
+
Kernel.warn "Onlylogs::HttpLogger: drain unavailable, pausing for #{@circuit_cooldown}s#{suffix}"
|
|
119
200
|
end
|
|
120
201
|
end
|
|
121
202
|
end
|
data/lib/onlylogs/version.rb
CHANGED