allstak 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,6 @@
1
1
  require "json"
2
+ require_relative "../sampling"
3
+ require_relative "../sanitizer"
2
4
 
3
5
  module AllStak
4
6
  module Modules
@@ -7,13 +9,20 @@ module AllStak
7
9
  PATH = "/ingest/v1/errors".freeze
8
10
  MAX_BREADCRUMBS = 50
9
11
 
10
- def initialize(transport, config, logger)
12
+ # Thread-local key for the per-thread breadcrumb ring buffer. Each
13
+ # request/job runs on its own thread, so a per-thread buffer keeps one
14
+ # thread's breadcrumb trail from bleeding into another concurrent
15
+ # request's captured exception.
16
+ BREADCRUMB_TLS_KEY = :allstak_breadcrumbs
17
+
18
+ def initialize(transport, config, logger, session_id_provider: nil)
11
19
  @transport = transport
12
20
  @config = config
13
21
  @logger = logger
14
22
  @current_user = nil
15
- @breadcrumbs = []
16
- @breadcrumb_mutex = Mutex.new
23
+ # Optional callable returning the active release-health session id, so
24
+ # the backend's error consumer can mark the session errored/crashed.
25
+ @session_id_provider = session_id_provider
17
26
  end
18
27
 
19
28
  def set_user(id: nil, email: nil, ip: nil)
@@ -24,28 +33,34 @@ module AllStak
24
33
  @current_user = nil
25
34
  end
26
35
 
27
- def add_breadcrumb(type:, message:, level: "info", data: nil)
28
- @breadcrumb_mutex.synchronize do
29
- @breadcrumbs.shift if @breadcrumbs.length >= MAX_BREADCRUMBS
30
- @breadcrumbs << {
31
- timestamp: Time.now.utc.iso8601(6),
32
- type: type,
33
- message: message,
34
- level: level,
35
- data: data
36
- }.compact
37
- end
36
+ # Append a breadcrumb to the current thread's ring buffer.
37
+ #
38
+ # `auto: true` marks the crumb as produced by an auto-instrumentation
39
+ # layer (Rack/Net::HTTP/ActiveRecord/log bridge); those are suppressed
40
+ # when `config.enable_auto_breadcrumbs` is false. Manual breadcrumbs
41
+ # (`auto: false`, the default) are always recorded so existing callers
42
+ # (e.g. the Sidekiq middleware) keep working unchanged. Fail-open: a
43
+ # malformed crumb never raises into the host.
44
+ def add_breadcrumb(type:, message:, level: "info", data: nil, auto: false)
45
+ return if auto && !auto_breadcrumbs_enabled?
46
+ buffer = thread_breadcrumbs
47
+ buffer.shift if buffer.length >= MAX_BREADCRUMBS
48
+ buffer << {
49
+ timestamp: Time.now.utc.iso8601(6),
50
+ type: type,
51
+ message: message,
52
+ level: level,
53
+ data: data
54
+ }.compact
55
+ nil
56
+ rescue StandardError
57
+ nil
38
58
  end
39
59
 
40
60
  def capture_exception(exc, level: "error", user: nil, request_context: nil, trace_id: nil, metadata: nil)
41
61
  return nil if @transport.disabled?
42
62
  begin
43
- crumbs = @breadcrumb_mutex.synchronize do
44
- next nil if @breadcrumbs.empty?
45
- out = @breadcrumbs.dup
46
- @breadcrumbs.clear
47
- out
48
- end
63
+ crumbs = drain_breadcrumbs
49
64
 
50
65
  payload = {
51
66
  exceptionClass: exc.class.name,
@@ -54,21 +69,44 @@ module AllStak
54
69
  level: level,
55
70
  environment: @config.environment,
56
71
  release: @config.release,
72
+ # Phase 3 — v2 ingest contract: top-level identity + frames.
73
+ sdkName: @config.sdk_name,
74
+ sdkVersion: @config.sdk_version,
75
+ platform: @config.platform,
76
+ dist: @config.dist,
77
+ frames: extract_structured_frames(exc),
57
78
  traceId: trace_id,
79
+ # Release-health: top-level session id so the backend error
80
+ # consumer can mark the session errored/crashed server-side.
81
+ sessionId: current_session_id,
58
82
  user: (user || @current_user)&.to_h,
59
83
  requestContext: request_context&.to_h,
60
- metadata: metadata,
84
+ metadata: @config.release_tags.merge(metadata || {}),
61
85
  breadcrumbs: crumbs
62
86
  }.compact
63
87
  payload.delete(:user) if payload[:user]&.empty?
64
88
  payload.delete(:requestContext) if payload[:requestContext]&.empty?
65
89
 
90
+ # Sampling first, then pre-hook scrub, before_send, and final
91
+ # transport scrub. Hooks never see raw secrets and cannot reintroduce
92
+ # values that escape the wire-path sanitizer.
93
+ return nil unless Sampling.sampled?(@config.sample_rate)
94
+ payload = apply_before_send(payload)
95
+ return nil if payload.nil?
96
+
66
97
  status, body = @transport.post(PATH, payload)
67
98
  return nil unless status == 202
68
99
  parsed = JSON.parse(body) rescue nil
69
100
  parsed&.dig("data", "id")
70
101
  rescue Transport::AllStakAuthError
71
102
  nil
103
+ rescue Transport::AllStakTransportError => e
104
+ # Retries exhausted / network outage: persist the (scrubbed) error for
105
+ # replay on the next init instead of dropping. `payload` is in scope
106
+ # only after it is built; guard so a pre-build failure still no-ops.
107
+ @transport.persist_failed(PATH, payload) if defined?(payload) && payload
108
+ @logger.debug("[AllStak] capture_exception transport error (spooled): #{e.message}")
109
+ nil
72
110
  rescue => e
73
111
  @logger.debug("[AllStak] capture_exception swallowed: #{e.class}: #{e.message}")
74
112
  nil
@@ -86,14 +124,28 @@ module AllStak
86
124
  environment: @config.environment,
87
125
  release: @config.release,
88
126
  traceId: trace_id,
127
+ sessionId: current_session_id,
89
128
  user: (user || @current_user)&.to_h,
90
129
  requestContext: request_context&.to_h,
91
- metadata: metadata
130
+ metadata: @config.release_tags.merge(metadata || {})
92
131
  }.compact
93
132
  payload.delete(:user) if payload[:user]&.empty?
94
133
  payload.delete(:requestContext) if payload[:requestContext]&.empty?
134
+
135
+ # Sampling first, then pre-hook scrub, before_send, and final
136
+ # transport scrub.
137
+ return nil unless Sampling.sampled?(@config.sample_rate)
138
+ payload = apply_before_send(payload)
139
+ return nil if payload.nil?
140
+
95
141
  status, _ = @transport.post(PATH, payload)
96
142
  status == 202 ? exception_class : nil
143
+ rescue Transport::AllStakAuthError
144
+ nil
145
+ rescue Transport::AllStakTransportError => e
146
+ @transport.persist_failed(PATH, payload) if defined?(payload) && payload
147
+ @logger.debug("[AllStak] capture_error transport error (spooled): #{e.message}")
148
+ nil
97
149
  rescue => e
98
150
  @logger.debug("[AllStak] capture_error swallowed: #{e.class}: #{e.message}")
99
151
  nil
@@ -102,10 +154,100 @@ module AllStak
102
154
 
103
155
  private
104
156
 
157
+ # The current thread's breadcrumb ring buffer (lazily created).
158
+ def thread_breadcrumbs
159
+ Thread.current[BREADCRUMB_TLS_KEY] ||= []
160
+ end
161
+
162
+ # Snapshot + clear the current thread's breadcrumbs for attachment to an
163
+ # outgoing event. Returns nil when empty so the payload omits the key.
164
+ def drain_breadcrumbs
165
+ buffer = Thread.current[BREADCRUMB_TLS_KEY]
166
+ return nil if buffer.nil? || buffer.empty?
167
+ out = buffer.dup
168
+ buffer.clear
169
+ out
170
+ end
171
+
172
+ # Whether auto-emitted breadcrumbs are enabled. Defaults to true if the
173
+ # (possibly older/stubbed) config doesn't expose the flag.
174
+ def auto_breadcrumbs_enabled?
175
+ return true unless @config.respond_to?(:enable_auto_breadcrumbs)
176
+ @config.enable_auto_breadcrumbs != false
177
+ end
178
+
179
+ # Resolve the active release-health session id via the injected provider.
180
+ # Fail-open: any error yields nil so capture is never blocked.
181
+ def current_session_id
182
+ return nil unless @session_id_provider.respond_to?(:call)
183
+ sid = @session_id_provider.call
184
+ sid.to_s.empty? ? nil : sid
185
+ rescue StandardError
186
+ nil
187
+ end
188
+
189
+ # Run the user-supplied before_send hook. Returns the (possibly modified)
190
+ # event, or nil to drop. The hook receives a sanitized copy and the
191
+ # transport sanitizes again after the hook. Fail-open: if the hook raises,
192
+ # log and return the sanitized event so telemetry is never lost to a buggy
193
+ # hook and raw secrets are not exposed.
194
+ def apply_before_send(payload)
195
+ hook = @config.before_send
196
+ sanitized = sanitize_before_send(payload)
197
+ return sanitized unless hook.respond_to?(:call)
198
+ begin
199
+ hook.call(sanitized)
200
+ rescue => e
201
+ @logger.warn("[AllStak] before_send raised; sending sanitized event: #{e.class}: #{e.message}")
202
+ sanitized
203
+ end
204
+ end
205
+
206
+ def sanitize_before_send(payload)
207
+ AllStak::Sanitizer.scrub(payload, **sanitizer_options)
208
+ rescue => e
209
+ @logger.warn("[AllStak] pre-before_send sanitizer failed; sending redacted event: #{e.class}: #{e.message}")
210
+ redacted_payload(payload)
211
+ end
212
+
213
+ def sanitizer_options
214
+ {
215
+ extra_denylist: @config.respond_to?(:extra_denylist) ? @config.extra_denylist : nil,
216
+ send_default_pii: @config.respond_to?(:send_default_pii?) ? @config.send_default_pii? : false
217
+ }
218
+ end
219
+
220
+ def redacted_payload(payload)
221
+ out = payload.dup
222
+ out[:message] = AllStak::Sanitizer::REDACTED
223
+ out[:metadata] = { "redacted" => true }
224
+ out.delete(:breadcrumbs)
225
+ out
226
+ end
227
+
105
228
  def extract_frames(exc)
106
229
  return [] unless exc.backtrace.is_a?(Array)
107
230
  exc.backtrace.first(50)
108
231
  end
232
+
233
+ # Phase 3 — v2 structured frames. Ruby's backtrace is "<file>:<line>:in `<fn>'"
234
+ # — split it into the wire shape so the dashboard can render real
235
+ # source paths. Falls back to nil when no backtrace is present.
236
+ def extract_structured_frames(exc)
237
+ return nil unless exc.backtrace.is_a?(Array)
238
+ out = []
239
+ exc.backtrace.first(50).each do |line|
240
+ if line =~ /^(.*):(\d+):in [`'](.+?)'/
241
+ out << {
242
+ filename: $1, absPath: $1,
243
+ function: $3, lineno: Integer($2),
244
+ inApp: !$1.include?('/gems/') && !$1.start_with?('<internal:'),
245
+ platform: 'ruby'
246
+ }
247
+ end
248
+ end
249
+ out.empty? ? nil : out
250
+ end
109
251
  end
110
252
  end
111
253
  end
@@ -23,7 +23,8 @@ module AllStak
23
23
 
24
24
  def record(direction:, method:, host:, path:, status_code:, duration_ms:,
25
25
  request_size: 0, response_size: 0, trace_id: nil, user_id: nil,
26
- error_fingerprint: nil, span_id: nil, parent_span_id: nil)
26
+ error_fingerprint: nil, span_id: nil, parent_span_id: nil,
27
+ request_id: nil)
27
28
  return if @transport.disabled?
28
29
  item = {
29
30
  direction: direction,
@@ -36,6 +37,7 @@ module AllStak
36
37
  responseSize: response_size.to_i,
37
38
  timestamp: Time.now.utc.iso8601(3),
38
39
  traceId: trace_id || SecureRandom.hex(16),
40
+ requestId: request_id,
39
41
  userId: user_id,
40
42
  errorFingerprint: error_fingerprint,
41
43
  spanId: span_id,
@@ -68,7 +70,10 @@ module AllStak
68
70
  rescue Transport::AllStakAuthError
69
71
  return
70
72
  rescue Transport::AllStakTransportError => e
71
- @logger.debug("[AllStak] http batch transport error: #{e.message}")
73
+ # Retries exhausted / outage: persist the (scrubbed) batch for
74
+ # replay on the next init instead of dropping.
75
+ @transport.persist_failed(PATH, { requests: chunk })
76
+ @logger.debug("[AllStak] http batch transport error (spooled): #{e.message}")
72
77
  rescue => e
73
78
  @logger.debug("[AllStak] http batch unexpected error: #{e.message}")
74
79
  end
@@ -5,10 +5,14 @@ module AllStak
5
5
  PATH = "/ingest/v1/logs".freeze
6
6
  VALID_LEVELS = %w[debug info warn error fatal].freeze
7
7
 
8
- def initialize(transport, config, logger)
8
+ def initialize(transport, config, logger, breadcrumb_sink: nil)
9
9
  @transport = transport
10
10
  @config = config
11
11
  @logger = logger
12
+ # Optional callable invoked for each accepted log so AllStak.log.*
13
+ # entries also surface as breadcrumbs on the next captured exception.
14
+ # Injected by the client; nil keeps Logs standalone (and recursion-free).
15
+ @breadcrumb_sink = breadcrumb_sink
12
16
  @buffer = Transport::FlushBuffer.new(
13
17
  name: "logs",
14
18
  max_size: config.buffer_size,
@@ -34,9 +38,11 @@ module AllStak
34
38
  requestId: request_id,
35
39
  userId: user_id,
36
40
  errorId: error_id,
37
- metadata: metadata
41
+ metadata: @config.release_tags.merge(metadata || {})
38
42
  }.compact
39
43
  @buffer.push(payload)
44
+ emit_breadcrumb(level, message, trace_id: trace_id, span_id: span_id)
45
+ nil
40
46
  end
41
47
 
42
48
  def debug(msg, **kw); log("debug", msg, **kw); end
@@ -55,6 +61,22 @@ module AllStak
55
61
 
56
62
  private
57
63
 
64
+ # Bridge an accepted log into a breadcrumb via the injected sink. The
65
+ # sink is the errors module's add_breadcrumb (auto-gated), so this is a
66
+ # no-op when breadcrumbs are disabled or no sink was wired. Fail-open.
67
+ def emit_breadcrumb(level, message, trace_id: nil, span_id: nil)
68
+ sink = @breadcrumb_sink
69
+ return unless sink.respond_to?(:call)
70
+ sink.call(
71
+ type: "log",
72
+ message: message.to_s,
73
+ level: level,
74
+ data: { "traceId" => trace_id, "spanId" => span_id }.reject { |_, v| v.nil? }
75
+ )
76
+ rescue StandardError
77
+ nil
78
+ end
79
+
58
80
  def normalize_level(level)
59
81
  lv = level.to_s.downcase
60
82
  lv = "warn" if lv == "warning"
@@ -68,7 +90,10 @@ module AllStak
68
90
  rescue Transport::AllStakAuthError
69
91
  return
70
92
  rescue Transport::AllStakTransportError => e
71
- @logger.debug("[AllStak] log transport error (discarding): #{e.message}")
93
+ # Retries exhausted / network outage: persist (scrubbed) for replay
94
+ # on the next init instead of dropping. Fail-open inside transport.
95
+ @transport.persist_failed(PATH, item)
96
+ @logger.debug("[AllStak] log transport error (spooled): #{e.message}")
72
97
  rescue => e
73
98
  @logger.debug("[AllStak] unexpected log error: #{e.message}")
74
99
  end
@@ -1,4 +1,5 @@
1
1
  require "securerandom"
2
+ require_relative "../sampling"
2
3
 
3
4
  module AllStak
4
5
  module Modules
@@ -36,10 +37,26 @@ module AllStak
36
37
  def reset_trace
37
38
  Thread.current[:allstak_trace_id] = nil
38
39
  Thread.current[:allstak_span_stack] = nil
40
+ Thread.current[:allstak_trace_sampled] = nil
41
+ end
42
+
43
+ # Sampling decision for the CURRENT trace. Decided once (at the first
44
+ # span of the trace) and cached thread-locally so every span and the
45
+ # propagated traceparent flag agree. When `traces_sample_rate` is nil
46
+ # (the default), tracing is unsampled-mode-off: everything is kept and
47
+ # the traceparent sampled flag stays "01" (historical behavior).
48
+ def current_trace_sampled?
49
+ decided = Thread.current[:allstak_trace_sampled]
50
+ return decided unless decided.nil?
51
+ rate = @config.traces_sample_rate
52
+ decided = rate.nil? ? true : Sampling.sampled?(rate)
53
+ Thread.current[:allstak_trace_sampled] = decided
54
+ decided
39
55
  end
40
56
 
41
57
  def start_span(operation, description: "", tags: nil)
42
58
  trace_id = current_trace_id
59
+ sampled = current_trace_sampled?
43
60
  span_id = SecureRandom.hex(8)
44
61
  parent = current_span_id || ""
45
62
  Thread.current[:allstak_span_stack] ||= []
@@ -56,6 +73,7 @@ module AllStak
56
73
  release: (@config.respond_to?(:release) ? @config.release : nil) || "",
57
74
  tags: tags || {},
58
75
  start_time_millis: (Time.now.to_f * 1000).to_i,
76
+ sampled: sampled,
59
77
  on_finish: method(:on_span_finish)
60
78
  )
61
79
  end
@@ -88,6 +106,10 @@ module AllStak
88
106
  def on_span_finish(span)
89
107
  stack = Thread.current[:allstak_span_stack]
90
108
  stack&.delete(span.span_id)
109
+ # Drop unsampled spans: they were never meant to be sent. The span
110
+ # still ran (timing/finish semantics intact) so block-form `in_span`
111
+ # control flow is unaffected.
112
+ return unless span.sampled?
91
113
  @buffer.push(span.to_h)
92
114
  end
93
115
 
@@ -97,7 +119,10 @@ module AllStak
97
119
  rescue Transport::AllStakAuthError
98
120
  return
99
121
  rescue Transport::AllStakTransportError => e
100
- @logger.debug("[AllStak] span transport error: #{e.message}")
122
+ # Retries exhausted / outage: persist the (scrubbed) spans for replay
123
+ # on the next init instead of dropping.
124
+ @transport.persist_failed(PATH, { spans: items })
125
+ @logger.debug("[AllStak] span transport error (spooled): #{e.message}")
101
126
  rescue => e
102
127
  @logger.debug("[AllStak] span unexpected error: #{e.message}")
103
128
  end
@@ -108,7 +133,8 @@ module AllStak
108
133
  attr_reader :trace_id, :span_id
109
134
 
110
135
  def initialize(trace_id:, span_id:, parent_span_id:, operation:, description:,
111
- service:, environment:, tags:, start_time_millis:, on_finish:, release: "")
136
+ service:, environment:, tags:, start_time_millis:, on_finish:,
137
+ release: "", sampled: true)
112
138
  @trace_id = trace_id
113
139
  @span_id = span_id
114
140
  @parent_span_id = parent_span_id
@@ -122,9 +148,14 @@ module AllStak
122
148
  @end_time_millis = nil
123
149
  @status = "ok"
124
150
  @finished = false
151
+ @sampled = sampled
125
152
  @on_finish = on_finish
126
153
  end
127
154
 
155
+ def sampled?
156
+ @sampled
157
+ end
158
+
128
159
  def set_tag(key, value)
129
160
  @tags[key.to_s] = value.to_s
130
161
  self
@@ -0,0 +1,48 @@
1
+ module AllStak
2
+ module Propagation
3
+ module_function
4
+
5
+ def baggage(trace_id:, request_id: nil, span_id: nil)
6
+ parts = ["allstak-trace_id=#{trace_id}"]
7
+ parts << "allstak-request_id=#{request_id}" if request_id && !request_id.to_s.empty?
8
+ parts << "allstak-span_id=#{span_id}" if span_id && !span_id.to_s.empty?
9
+ parts.join(",")
10
+ end
11
+
12
+ def merge_baggage(existing, trace_id:, request_id: nil, span_id: nil)
13
+ preserved = existing.to_s.split(",").map(&:strip).reject do |part|
14
+ part.empty? || part.downcase.start_with?("allstak-")
15
+ end
16
+ (preserved + baggage(trace_id: trace_id, request_id: request_id, span_id: span_id).split(",")).join(",")
17
+ end
18
+
19
+ # W3C traceparent trace-flags: "01" = sampled, "00" = not sampled.
20
+ # `sampled` defaults to true to preserve historical behavior for callers
21
+ # that do not pass an explicit sampling decision.
22
+ def trace_flags(sampled)
23
+ sampled == false ? "00" : "01"
24
+ end
25
+
26
+ def apply_headers(headers, trace_id:, request_id: nil, span_id: nil, sampled: true)
27
+ headers["X-AllStak-Trace-Id"] = trace_id
28
+ headers["X-AllStak-Request-Id"] = request_id if request_id && !request_id.to_s.empty?
29
+ if span_id && !span_id.to_s.empty?
30
+ headers["X-AllStak-Span-Id"] = span_id
31
+ headers["traceparent"] = "00-#{trace_id}-#{span_id[0, 16]}-#{trace_flags(sampled)}"
32
+ end
33
+ headers["baggage"] = merge_baggage(headers["baggage"], trace_id: trace_id, request_id: request_id, span_id: span_id)
34
+ headers["AllStak-Baggage"] = baggage(trace_id: trace_id, request_id: request_id, span_id: span_id)
35
+ end
36
+
37
+ def apply_request_headers(req, trace_id:, request_id: nil, span_id: nil, sampled: true)
38
+ req["X-AllStak-Trace-Id"] ||= trace_id
39
+ req["X-AllStak-Request-Id"] ||= request_id if request_id && !request_id.to_s.empty?
40
+ if span_id && !span_id.to_s.empty?
41
+ req["X-AllStak-Span-Id"] ||= span_id
42
+ req["traceparent"] ||= "00-#{trace_id}-#{span_id[0, 16]}-#{trace_flags(sampled)}"
43
+ end
44
+ req["baggage"] = merge_baggage(req["baggage"], trace_id: trace_id, request_id: request_id, span_id: span_id)
45
+ req["AllStak-Baggage"] = baggage(trace_id: trace_id, request_id: request_id, span_id: span_id)
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,38 @@
1
+ module AllStak
2
+ # Deterministic head-sampling helper.
3
+ #
4
+ # Sampling is "deterministic" in the sense that a rate of 1.0 always keeps
5
+ # an event and a rate of 0.0 always drops it — no RNG is consulted at the
6
+ # boundaries. For intermediate rates a single random draw in [0.0, 1.0) is
7
+ # compared against the rate: kept when `draw < rate`.
8
+ #
9
+ # The RNG is a seam: tests inject a deterministic value via {rng=} so the
10
+ # keep/drop decision is fully controllable without monkeypatching Kernel.
11
+ module Sampling
12
+ module_function
13
+
14
+ # Override the random source used by {sampled?}. Pass a callable returning
15
+ # a Float in [0.0, 1.0). Pass nil to restore the default (Kernel#rand).
16
+ def rng=(callable)
17
+ @rng = callable
18
+ end
19
+
20
+ def rng
21
+ @rng || ->(*) { rand }
22
+ end
23
+
24
+ # Returns true when an event should be KEPT under the given rate.
25
+ #
26
+ # nil rate is treated as "no sampling configured" → keep.
27
+ # Rates are clamped to [0.0, 1.0]. 1.0 always keeps; 0.0 always drops.
28
+ def sampled?(rate)
29
+ return true if rate.nil?
30
+ r = rate.to_f
31
+ r = 0.0 if r < 0.0
32
+ r = 1.0 if r > 1.0
33
+ return true if r >= 1.0
34
+ return false if r <= 0.0
35
+ rng.call < r
36
+ end
37
+ end
38
+ end