logtide 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +23 -0
- data/LICENSE +21 -0
- data/README.md +168 -0
- data/lib/logtide/breadcrumb.rb +65 -0
- data/lib/logtide/circuit_breaker.rb +93 -0
- data/lib/logtide/client.rb +255 -0
- data/lib/logtide/configuration.rb +94 -0
- data/lib/logtide/dsn.rb +56 -0
- data/lib/logtide/error.rb +10 -0
- data/lib/logtide/event.rb +122 -0
- data/lib/logtide/hub.rb +101 -0
- data/lib/logtide/logger_bridge.rb +52 -0
- data/lib/logtide/metrics.rb +25 -0
- data/lib/logtide/rack/middleware.rb +111 -0
- data/lib/logtide/rails/railtie.rb +23 -0
- data/lib/logtide/retry_policy.rb +32 -0
- data/lib/logtide/scope.rb +94 -0
- data/lib/logtide/structured_exception.rb +94 -0
- data/lib/logtide/tracing/span.rb +89 -0
- data/lib/logtide/tracing.rb +61 -0
- data/lib/logtide/transport/batcher.rb +209 -0
- data/lib/logtide/transport/buffer.rb +37 -0
- data/lib/logtide/transport/http.rb +94 -0
- data/lib/logtide/transport/otlp.rb +78 -0
- data/lib/logtide/version.rb +8 -0
- data/lib/logtide.rb +152 -0
- metadata +73 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "breadcrumb"
|
|
4
|
+
|
|
5
|
+
module Logtide
|
|
6
|
+
# Per-request/per-task context merged into every entry captured while active
|
|
7
|
+
# (spec 004 section 4). Cloned per request so concurrent work never shares
|
|
8
|
+
# mutable state.
|
|
9
|
+
class Scope
|
|
10
|
+
attr_reader :tags, :user, :session_id, :extra, :trace_id, :span_id, :breadcrumbs, :span
|
|
11
|
+
|
|
12
|
+
def initialize(max_breadcrumbs: 100)
|
|
13
|
+
@tags = {}
|
|
14
|
+
@user = nil
|
|
15
|
+
@session_id = nil
|
|
16
|
+
@extra = {}
|
|
17
|
+
@trace_id = nil
|
|
18
|
+
@span_id = nil
|
|
19
|
+
@span = nil
|
|
20
|
+
@breadcrumbs = BreadcrumbBuffer.new(max_breadcrumbs)
|
|
21
|
+
@event_processors = []
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def set_span(span)
|
|
25
|
+
@span = span
|
|
26
|
+
self
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def set_tag(key, value)
|
|
30
|
+
@tags[key.to_s] = value.to_s
|
|
31
|
+
self
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def remove_tag(key)
|
|
35
|
+
@tags.delete(key.to_s)
|
|
36
|
+
self
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def set_user(user)
|
|
40
|
+
@user = user.nil? ? nil : stringify(user)
|
|
41
|
+
self
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def set_session_id(session_id)
|
|
45
|
+
@session_id = session_id
|
|
46
|
+
self
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def set_extra(key, value)
|
|
50
|
+
@extra[key.to_s] = value
|
|
51
|
+
self
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def set_trace_context(trace_id, span_id)
|
|
55
|
+
@trace_id = trace_id
|
|
56
|
+
@span_id = span_id
|
|
57
|
+
self
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def add_breadcrumb(breadcrumb)
|
|
61
|
+
@breadcrumbs.add(breadcrumb)
|
|
62
|
+
self
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def clear_breadcrumbs
|
|
66
|
+
@breadcrumbs.clear
|
|
67
|
+
self
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def add_event_processor(processor)
|
|
71
|
+
@event_processors << processor
|
|
72
|
+
self
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def event_processors
|
|
76
|
+
@event_processors.dup
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def initialize_copy(other)
|
|
80
|
+
super
|
|
81
|
+
@tags = other.tags.dup
|
|
82
|
+
@extra = other.extra.dup
|
|
83
|
+
@user = other.user&.dup
|
|
84
|
+
@breadcrumbs = other.breadcrumbs.dup
|
|
85
|
+
@event_processors = other.event_processors
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
private
|
|
89
|
+
|
|
90
|
+
def stringify(hash)
|
|
91
|
+
hash.each_with_object({}) { |(k, v), out| out[k.to_s] = v }
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rbconfig"
|
|
4
|
+
|
|
5
|
+
module Logtide
|
|
6
|
+
# Serialises a Ruby exception into the StructuredException wire shape
|
|
7
|
+
# (spec 003 section 4): type, message, language, stacktrace (outermost first),
|
|
8
|
+
# the cause chain (capped at 10, cycle-safe) and an optional raw trace.
|
|
9
|
+
module StructuredException
|
|
10
|
+
LANGUAGE = "ruby"
|
|
11
|
+
MAX_CAUSE_DEPTH = 10
|
|
12
|
+
MAX_FRAMES = 100
|
|
13
|
+
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
def serialize(exception, include_stacktrace: true)
|
|
17
|
+
build(exception, 0, {}.compare_by_identity, include_stacktrace)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def build(exception, depth, seen, include_stacktrace)
|
|
21
|
+
seen[exception] = true
|
|
22
|
+
result = {
|
|
23
|
+
"type" => type_of(exception),
|
|
24
|
+
"message" => message_of(exception),
|
|
25
|
+
"language" => LANGUAGE
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if include_stacktrace
|
|
29
|
+
frames = stacktrace(exception)
|
|
30
|
+
result["stacktrace"] = frames unless frames.empty?
|
|
31
|
+
raw = raw_text(exception)
|
|
32
|
+
result["raw"] = raw if raw
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
cause = safe_cause(exception)
|
|
36
|
+
result["cause"] = build(cause, depth + 1, seen, include_stacktrace) if follow_cause?(cause, depth, seen)
|
|
37
|
+
|
|
38
|
+
result
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def type_of(exception)
|
|
42
|
+
name = exception.class.name
|
|
43
|
+
name.nil? || name.empty? ? "Exception" : name
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def message_of(exception)
|
|
47
|
+
message = exception.message.to_s
|
|
48
|
+
message.empty? ? type_of(exception) : message
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def stacktrace(exception)
|
|
52
|
+
locations = exception.backtrace_locations
|
|
53
|
+
return [] unless locations
|
|
54
|
+
|
|
55
|
+
# Ruby backtraces are innermost-first; the wire format wants outermost
|
|
56
|
+
# first, and truncation keeps the outermost frames (003 section 4).
|
|
57
|
+
locations.reverse.first(MAX_FRAMES).map { |location| frame(location) }
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def frame(location)
|
|
61
|
+
path = location.absolute_path || location.path
|
|
62
|
+
{
|
|
63
|
+
"file" => path,
|
|
64
|
+
"function" => location.label,
|
|
65
|
+
"line" => location.lineno,
|
|
66
|
+
"metadata" => { "in_app" => in_app?(path) }
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def in_app?(path)
|
|
71
|
+
return false unless path
|
|
72
|
+
|
|
73
|
+
!(path.include?("/gems/") || path.start_with?(RbConfig::CONFIG["rubylibdir"].to_s))
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def raw_text(exception)
|
|
77
|
+
backtrace = exception.backtrace
|
|
78
|
+
return nil unless backtrace
|
|
79
|
+
|
|
80
|
+
(["#{type_of(exception)}: #{message_of(exception)}"] + backtrace).join("\n")
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def follow_cause?(cause, depth, seen)
|
|
84
|
+
cause && depth < MAX_CAUSE_DEPTH && !seen.key?(cause)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def safe_cause(exception)
|
|
88
|
+
cause = exception.cause
|
|
89
|
+
cause if cause.is_a?(Exception)
|
|
90
|
+
rescue StandardError
|
|
91
|
+
nil
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Logtide
|
|
4
|
+
module Tracing
|
|
5
|
+
# A span in the SDK tracing API, exported as OTLP/JSON (spec 005 section 3).
|
|
6
|
+
class Span
|
|
7
|
+
KINDS = { internal: 1, server: 2, client: 3, producer: 4, consumer: 5 }.freeze
|
|
8
|
+
STATUS = { unset: 0, ok: 1, error: 2 }.freeze
|
|
9
|
+
|
|
10
|
+
attr_reader :name, :trace_id, :span_id, :parent_span_id, :kind
|
|
11
|
+
|
|
12
|
+
def initialize(name:, trace_id:, span_id:, parent_span_id: nil,
|
|
13
|
+
kind: :internal, sampled: true, start_time: nil, reporter: nil)
|
|
14
|
+
@name = name
|
|
15
|
+
@trace_id = trace_id
|
|
16
|
+
@span_id = span_id
|
|
17
|
+
@parent_span_id = parent_span_id
|
|
18
|
+
@kind = kind
|
|
19
|
+
@sampled = sampled
|
|
20
|
+
@start_time = start_time || now_nano
|
|
21
|
+
@reporter = reporter
|
|
22
|
+
@attributes = {}
|
|
23
|
+
@events = []
|
|
24
|
+
@status = :unset
|
|
25
|
+
@end_time = nil
|
|
26
|
+
@finished = false
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def sampled? = @sampled
|
|
30
|
+
def recording? = @sampled
|
|
31
|
+
|
|
32
|
+
def set_attribute(key, value)
|
|
33
|
+
@attributes[key.to_s] = value
|
|
34
|
+
self
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def add_event(name)
|
|
38
|
+
@events << { name: name, time: now_nano }
|
|
39
|
+
self
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def finish(status = :ok)
|
|
43
|
+
return self if @finished
|
|
44
|
+
|
|
45
|
+
@finished = true
|
|
46
|
+
@end_time = now_nano
|
|
47
|
+
@status = status
|
|
48
|
+
# Unsampled spans are no-ops: cheap, never exported (spec 005 section 5).
|
|
49
|
+
@reporter.call(self) if @sampled && @reporter
|
|
50
|
+
self
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def to_otlp
|
|
54
|
+
otlp = {
|
|
55
|
+
"traceId" => @trace_id,
|
|
56
|
+
"spanId" => @span_id,
|
|
57
|
+
"name" => @name,
|
|
58
|
+
"kind" => KINDS.fetch(@kind, 1),
|
|
59
|
+
"startTimeUnixNano" => @start_time.to_s,
|
|
60
|
+
"endTimeUnixNano" => (@end_time || @start_time).to_s,
|
|
61
|
+
"attributes" => @attributes.map { |key, value| otlp_attribute(key, value) },
|
|
62
|
+
"events" => @events.map { |event| { "name" => event[:name], "timeUnixNano" => event[:time].to_s } },
|
|
63
|
+
"status" => { "code" => STATUS.fetch(@status, 0) }
|
|
64
|
+
}
|
|
65
|
+
otlp["parentSpanId"] = @parent_span_id if @parent_span_id
|
|
66
|
+
otlp
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private
|
|
70
|
+
|
|
71
|
+
def otlp_attribute(key, value)
|
|
72
|
+
{ "key" => key, "value" => otlp_value(value) }
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def otlp_value(value)
|
|
76
|
+
case value
|
|
77
|
+
when Integer then { "intValue" => value }
|
|
78
|
+
when Float then { "doubleValue" => value }
|
|
79
|
+
when true, false then { "boolValue" => value }
|
|
80
|
+
else { "stringValue" => value.to_s }
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def now_nano
|
|
85
|
+
Process.clock_gettime(Process::CLOCK_REALTIME, :nanosecond)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "securerandom"
|
|
4
|
+
|
|
5
|
+
module Logtide
|
|
6
|
+
# W3C trace context: identifier generation and traceparent parsing/formatting
|
|
7
|
+
# (spec 005 sections 1-2).
|
|
8
|
+
module Tracing
|
|
9
|
+
module_function
|
|
10
|
+
|
|
11
|
+
def generate_trace_id
|
|
12
|
+
loop do
|
|
13
|
+
id = SecureRandom.hex(16)
|
|
14
|
+
return id unless id.match?(/\A0+\z/)
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def generate_span_id
|
|
19
|
+
loop do
|
|
20
|
+
id = SecureRandom.hex(8)
|
|
21
|
+
return id unless id.match?(/\A0+\z/)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Parsing and building of the `traceparent` header (00-trace-span-flags).
|
|
26
|
+
module Propagation
|
|
27
|
+
Parsed = Struct.new(:trace_id, :parent_span_id, :sampled, keyword_init: true)
|
|
28
|
+
|
|
29
|
+
TRACE_ID = /\A[a-f0-9]{32}\z/
|
|
30
|
+
SPAN_ID = /\A[a-f0-9]{16}\z/
|
|
31
|
+
|
|
32
|
+
module_function
|
|
33
|
+
|
|
34
|
+
def parse_traceparent(header)
|
|
35
|
+
return nil unless header
|
|
36
|
+
|
|
37
|
+
version, trace_id, parent_span_id, flags = header.split("-")
|
|
38
|
+
return nil unless version == "00"
|
|
39
|
+
return nil unless valid_id?(trace_id, TRACE_ID)
|
|
40
|
+
return nil unless valid_id?(parent_span_id, SPAN_ID)
|
|
41
|
+
return nil unless flags&.match?(/\A[a-f0-9]{2}\z/)
|
|
42
|
+
|
|
43
|
+
Parsed.new(
|
|
44
|
+
trace_id: trace_id,
|
|
45
|
+
parent_span_id: parent_span_id,
|
|
46
|
+
sampled: Integer(flags, 16).allbits?(0x01)
|
|
47
|
+
)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def format_traceparent(trace_id:, span_id:, sampled: true)
|
|
51
|
+
"00-#{trace_id}-#{span_id}-#{sampled ? "01" : "00"}"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def valid_id?(value, pattern)
|
|
55
|
+
return false unless value&.match?(pattern)
|
|
56
|
+
|
|
57
|
+
!value.match?(/\A0+\z/)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "buffer"
|
|
4
|
+
|
|
5
|
+
module Logtide
|
|
6
|
+
module Transport
|
|
7
|
+
# Owns the buffer and the background dispatcher (spec 002 sections 5-8).
|
|
8
|
+
# Capture appends to a bounded buffer and returns immediately; a worker
|
|
9
|
+
# thread flushes on batch size or interval, applying the retry policy and
|
|
10
|
+
# circuit breaker. flush/close are best-effort and never raise.
|
|
11
|
+
class Batcher
|
|
12
|
+
def initialize(sender:, metrics:, circuit_breaker:, retry_policy:,
|
|
13
|
+
batch_size: 100, max_buffer_size: 10_000,
|
|
14
|
+
flush_interval: 5, flush_timeout: 10,
|
|
15
|
+
sleeper: ->(seconds) { sleep(seconds) }, logger: nil)
|
|
16
|
+
@sender = sender
|
|
17
|
+
@metrics = metrics
|
|
18
|
+
@circuit_breaker = circuit_breaker
|
|
19
|
+
@retry_policy = retry_policy
|
|
20
|
+
@batch_size = batch_size
|
|
21
|
+
@flush_interval = flush_interval
|
|
22
|
+
@flush_timeout = flush_timeout
|
|
23
|
+
@sleeper = sleeper
|
|
24
|
+
@logger = logger
|
|
25
|
+
|
|
26
|
+
@buffer = Buffer.new(max_size: max_buffer_size)
|
|
27
|
+
@mutex = Mutex.new
|
|
28
|
+
@work = ConditionVariable.new
|
|
29
|
+
@flushed = ConditionVariable.new
|
|
30
|
+
@stop = false
|
|
31
|
+
@closed = false
|
|
32
|
+
@delivering = false
|
|
33
|
+
@auth_warned = false
|
|
34
|
+
@worker = Thread.new { run }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def enqueue(entry)
|
|
38
|
+
return if stopped?
|
|
39
|
+
|
|
40
|
+
if @buffer.push(entry)
|
|
41
|
+
signal_worker if @buffer.size >= @batch_size
|
|
42
|
+
else
|
|
43
|
+
@metrics.increment(:logs_dropped)
|
|
44
|
+
log("buffer full, dropping entry")
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def flush(timeout = @flush_timeout)
|
|
49
|
+
deadline = monotonic + timeout
|
|
50
|
+
@mutex.synchronize do
|
|
51
|
+
@work.signal
|
|
52
|
+
until drained?
|
|
53
|
+
remaining = deadline - monotonic
|
|
54
|
+
break if remaining <= 0
|
|
55
|
+
|
|
56
|
+
@flushed.wait(@mutex, remaining)
|
|
57
|
+
end
|
|
58
|
+
drained?
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def close(timeout = @flush_timeout)
|
|
63
|
+
return if @closed
|
|
64
|
+
|
|
65
|
+
flush(timeout)
|
|
66
|
+
@mutex.synchronize do
|
|
67
|
+
@stop = true
|
|
68
|
+
@work.broadcast
|
|
69
|
+
end
|
|
70
|
+
@worker.join(timeout)
|
|
71
|
+
@closed = true
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
private
|
|
75
|
+
|
|
76
|
+
def stopped?
|
|
77
|
+
@mutex.synchronize { @stop }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def signal_worker
|
|
81
|
+
@mutex.synchronize { @work.signal }
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Must hold the mutex.
|
|
85
|
+
def drained?
|
|
86
|
+
@buffer.empty? && !@delivering
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def run
|
|
90
|
+
loop do
|
|
91
|
+
batch = wait_for_batch
|
|
92
|
+
break if batch.nil?
|
|
93
|
+
|
|
94
|
+
deliver(batch)
|
|
95
|
+
finish_delivery
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def wait_for_batch
|
|
100
|
+
@mutex.synchronize do
|
|
101
|
+
loop do
|
|
102
|
+
unless @buffer.empty?
|
|
103
|
+
@delivering = true
|
|
104
|
+
return @buffer.drain(@batch_size)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
@flushed.broadcast
|
|
108
|
+
return nil if @stop
|
|
109
|
+
|
|
110
|
+
@work.wait(@mutex, @flush_interval)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def finish_delivery
|
|
116
|
+
@mutex.synchronize do
|
|
117
|
+
@delivering = false
|
|
118
|
+
@flushed.broadcast
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def deliver(batch)
|
|
123
|
+
return drop_open_circuit(batch) unless @circuit_breaker.allow?
|
|
124
|
+
|
|
125
|
+
attempt = 0
|
|
126
|
+
loop do
|
|
127
|
+
response = attempt_delivery(batch)
|
|
128
|
+
return on_success(batch) if response && success?(response.status)
|
|
129
|
+
return handle_oversized(batch) if response&.status == 413
|
|
130
|
+
return on_non_retryable(batch, response.status) if non_retryable?(response)
|
|
131
|
+
return on_exhausted(batch, attempt) unless attempt < @retry_policy.max_retries
|
|
132
|
+
|
|
133
|
+
@metrics.increment(:retries)
|
|
134
|
+
@sleeper.call(@retry_policy.delay_for(attempt, retry_after: response&.retry_after))
|
|
135
|
+
attempt += 1
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def attempt_delivery(batch)
|
|
140
|
+
@sender.deliver(batch)
|
|
141
|
+
rescue NetworkError => e
|
|
142
|
+
log("network error: #{e.message}")
|
|
143
|
+
nil
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def on_success(batch)
|
|
147
|
+
@metrics.increment(:logs_sent, batch.size)
|
|
148
|
+
@circuit_breaker.record_success
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def on_non_retryable(batch, status)
|
|
152
|
+
warn_auth_once(status)
|
|
153
|
+
@metrics.increment(:errors)
|
|
154
|
+
log("non-retryable status #{status}, dropping #{batch.size} entries")
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def on_exhausted(batch, attempt)
|
|
158
|
+
@metrics.increment(:errors)
|
|
159
|
+
@circuit_breaker.record_failure
|
|
160
|
+
log("delivery failed after #{attempt + 1} attempts, dropping #{batch.size} entries")
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def drop_open_circuit(batch)
|
|
164
|
+
@metrics.increment(:logs_dropped, batch.size)
|
|
165
|
+
@metrics.increment(:circuit_breaker_trips)
|
|
166
|
+
log("circuit open, dropping #{batch.size} entries")
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# On 413 the server rejected an oversized batch: split in half and retry
|
|
170
|
+
# each half once (spec 002 section 10).
|
|
171
|
+
def handle_oversized(batch)
|
|
172
|
+
return @metrics.increment(:errors) if batch.size <= 1
|
|
173
|
+
|
|
174
|
+
batch.each_slice((batch.size / 2.0).ceil) do |half|
|
|
175
|
+
response = attempt_delivery(half)
|
|
176
|
+
if response && success?(response.status)
|
|
177
|
+
@metrics.increment(:logs_sent, half.size)
|
|
178
|
+
else
|
|
179
|
+
@metrics.increment(:errors)
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def warn_auth_once(status)
|
|
185
|
+
return unless [401, 403].include?(status)
|
|
186
|
+
return if @auth_warned
|
|
187
|
+
|
|
188
|
+
@auth_warned = true
|
|
189
|
+
log("authentication failed (#{status}); check the api key")
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def success?(status)
|
|
193
|
+
status.between?(200, 299)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def non_retryable?(response)
|
|
197
|
+
response && !@retry_policy.retryable_status?(response.status)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def log(message)
|
|
201
|
+
@logger&.call("[logtide] #{message}")
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def monotonic
|
|
205
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Logtide
|
|
4
|
+
module Transport
|
|
5
|
+
# A bounded FIFO buffer (spec 002 section 5). When full it rejects the new
|
|
6
|
+
# entry (drop-newest); the caller records the drop. Thread-safe.
|
|
7
|
+
class Buffer
|
|
8
|
+
def initialize(max_size:)
|
|
9
|
+
@max_size = max_size
|
|
10
|
+
@entries = []
|
|
11
|
+
@mutex = Mutex.new
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Returns false when the buffer is full and the entry was dropped.
|
|
15
|
+
def push(entry)
|
|
16
|
+
@mutex.synchronize do
|
|
17
|
+
return false if @entries.size >= @max_size
|
|
18
|
+
|
|
19
|
+
@entries << entry
|
|
20
|
+
true
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def drain(limit)
|
|
25
|
+
@mutex.synchronize { @entries.shift(limit) }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def size
|
|
29
|
+
@mutex.synchronize { @entries.size }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def empty?
|
|
33
|
+
@mutex.synchronize { @entries.empty? }
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "json"
|
|
6
|
+
require "zlib"
|
|
7
|
+
require "stringio"
|
|
8
|
+
require "time"
|
|
9
|
+
require "timeout"
|
|
10
|
+
require_relative "../error"
|
|
11
|
+
require_relative "../version"
|
|
12
|
+
|
|
13
|
+
module Logtide
|
|
14
|
+
module Transport
|
|
15
|
+
# Raised when a batch cannot be delivered due to a network/transport problem.
|
|
16
|
+
# These failures are retryable (spec 002 section 6).
|
|
17
|
+
class NetworkError < Logtide::Error; end
|
|
18
|
+
|
|
19
|
+
# The outcome of a single delivery attempt.
|
|
20
|
+
Response = Struct.new(:status, :retry_after, keyword_init: true)
|
|
21
|
+
|
|
22
|
+
# Stateless HTTP sender: one POST per batch to the ingest endpoint
|
|
23
|
+
# (spec 002 sections 1-4). Buffering, batching and retries live in the
|
|
24
|
+
# Batcher; this class just speaks HTTP and never raises for HTTP status codes.
|
|
25
|
+
class HTTP
|
|
26
|
+
GZIP_THRESHOLD = 1024 * 1024
|
|
27
|
+
USER_AGENT = "logtide-ruby/#{Logtide::VERSION}".freeze
|
|
28
|
+
|
|
29
|
+
NETWORK_ERRORS = [
|
|
30
|
+
Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::EHOSTUNREACH,
|
|
31
|
+
Errno::ENETUNREACH, Errno::ETIMEDOUT, Errno::EPIPE,
|
|
32
|
+
SocketError, Timeout::Error, EOFError, IOError
|
|
33
|
+
].freeze
|
|
34
|
+
|
|
35
|
+
def initialize(url:, api_key:, timeout: 10)
|
|
36
|
+
@uri = URI.parse(url)
|
|
37
|
+
@api_key = api_key
|
|
38
|
+
@timeout = timeout
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Delivers the batch and returns a Response, or raises NetworkError.
|
|
42
|
+
def deliver(logs)
|
|
43
|
+
body = JSON.generate("logs" => logs)
|
|
44
|
+
response = client.request(build_request(body))
|
|
45
|
+
Response.new(status: response.code.to_i, retry_after: parse_retry_after(response["retry-after"]))
|
|
46
|
+
rescue *NETWORK_ERRORS => e
|
|
47
|
+
raise NetworkError, e.message
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def client
|
|
53
|
+
http = Net::HTTP.new(@uri.host, @uri.port)
|
|
54
|
+
http.use_ssl = @uri.scheme == "https"
|
|
55
|
+
http.open_timeout = @timeout
|
|
56
|
+
http.read_timeout = @timeout
|
|
57
|
+
http.write_timeout = @timeout if http.respond_to?(:write_timeout=)
|
|
58
|
+
http
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def build_request(body)
|
|
62
|
+
request = Net::HTTP::Post.new(@uri.request_uri)
|
|
63
|
+
request["X-API-Key"] = @api_key
|
|
64
|
+
request["Content-Type"] = "application/json"
|
|
65
|
+
request["User-Agent"] = USER_AGENT
|
|
66
|
+
if body.bytesize > GZIP_THRESHOLD
|
|
67
|
+
request["Content-Encoding"] = "gzip"
|
|
68
|
+
request.body = gzip(body)
|
|
69
|
+
else
|
|
70
|
+
request.body = body
|
|
71
|
+
end
|
|
72
|
+
request
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def gzip(data)
|
|
76
|
+
io = StringIO.new
|
|
77
|
+
writer = Zlib::GzipWriter.new(io)
|
|
78
|
+
writer.write(data)
|
|
79
|
+
writer.close
|
|
80
|
+
io.string
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def parse_retry_after(value)
|
|
84
|
+
return nil if value.nil? || value.empty?
|
|
85
|
+
return value.to_i if value.match?(/\A\d+\z/)
|
|
86
|
+
|
|
87
|
+
seconds = (Time.httpdate(value) - Time.now).ceil
|
|
88
|
+
seconds.positive? ? seconds : 0
|
|
89
|
+
rescue ArgumentError
|
|
90
|
+
nil
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|