bitfab 0.16.1 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bitfab/client.rb +54 -17
- data/lib/bitfab/db_snapshot.rb +29 -0
- data/lib/bitfab/http_client.rb +16 -3
- data/lib/bitfab/replay.rb +70 -22
- data/lib/bitfab/replay_environment.rb +110 -0
- data/lib/bitfab/span_context.rb +14 -6
- data/lib/bitfab/version.rb +1 -1
- data/lib/bitfab.rb +2 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9dd280a2cd6afdf47b2bbbf62508dd4918dccf42c83ed276845f592497168601
|
|
4
|
+
data.tar.gz: 224fc7cfe39e847f3bfd56e9876151ea356f7a8ad7bff66b529f9704ab360141
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4cba57762a9ae927ba34ac70fdd1d750d118a9f8f3cc47402f30d8a09c336337f27cef601f8b36e63140e01acaa410d1a58450f160cb1c3d7e7c2c5b1a198cb3
|
|
7
|
+
data.tar.gz: eaa0a0b2eed0c0f7c0e24a73e2128540c53a504e9a3dd4d6858355e8076aa2fa8a4913ea170b278ac1579c5e407d4333759e1bae345dbe575b9fa91b666275a1
|
data/lib/bitfab/client.rb
CHANGED
|
@@ -26,7 +26,7 @@ module Bitfab
|
|
|
26
26
|
@service_url = service_url || DEFAULT_SERVICE_URL
|
|
27
27
|
@enabled = enabled
|
|
28
28
|
if @enabled && (@api_key.nil? || @api_key.to_s.strip.empty?)
|
|
29
|
-
warn "Bitfab: api_key is empty
|
|
29
|
+
warn "Bitfab: api_key is empty: tracing is disabled. Provide a valid API key to enable tracing."
|
|
30
30
|
@enabled = false
|
|
31
31
|
end
|
|
32
32
|
@http_client = HttpClient.new(api_key:, service_url: @service_url)
|
|
@@ -60,11 +60,11 @@ module Bitfab
|
|
|
60
60
|
# @return [Hash] with :items, :test_run_id, :test_run_url
|
|
61
61
|
def replay(receiver, method_name, trace_function_key:, limit: nil, trace_ids: nil, max_concurrency: 10,
|
|
62
62
|
code_change_description: nil, code_change_files: nil, experiment_group_id: nil, mock: "none",
|
|
63
|
-
adapt_inputs: nil)
|
|
63
|
+
adapt_inputs: nil, environment: nil)
|
|
64
64
|
Replay.run(
|
|
65
65
|
self, receiver, method_name,
|
|
66
66
|
trace_function_key:, limit:, trace_ids:, max_concurrency:,
|
|
67
|
-
code_change_description:, code_change_files:, experiment_group_id:, mock:, adapt_inputs:
|
|
67
|
+
code_change_description:, code_change_files:, experiment_group_id:, mock:, adapt_inputs:, environment:
|
|
68
68
|
)
|
|
69
69
|
end
|
|
70
70
|
|
|
@@ -87,7 +87,7 @@ module Bitfab
|
|
|
87
87
|
end
|
|
88
88
|
|
|
89
89
|
# Execute a block inside a span context, sending trace data on completion.
|
|
90
|
-
# Called by Traceable
|
|
90
|
+
# Called by Traceable, not intended for direct use.
|
|
91
91
|
def execute_span(trace_function_key:, span_name:, span_type:, function_name:, args:, kwargs:,
|
|
92
92
|
mock_on_replay: false)
|
|
93
93
|
return yield unless @enabled
|
|
@@ -121,7 +121,7 @@ module Bitfab
|
|
|
121
121
|
# Unmarked spans must consume an index so subsequent marked siblings
|
|
122
122
|
# line up with `build_mock_tree`'s sequential numbering for the same
|
|
123
123
|
# (key, name) pair. Different (key, name) pairs have independent
|
|
124
|
-
# counters
|
|
124
|
+
# counters: they cannot shift each other.
|
|
125
125
|
call_index = advance_mock_counter(replay_ctx, trace_function_key, span_name, is_root_span:)
|
|
126
126
|
if call_index
|
|
127
127
|
mocked_output = check_mock_replay(
|
|
@@ -154,7 +154,7 @@ module Bitfab
|
|
|
154
154
|
finalized = false
|
|
155
155
|
|
|
156
156
|
finalize = lambda do |final_result, final_error|
|
|
157
|
-
# Never crash the host app due to span building/sending. Idempotent
|
|
157
|
+
# Never crash the host app due to span building/sending. Idempotent:
|
|
158
158
|
# only the first call sends the span. Subsequent calls (e.g. from the
|
|
159
159
|
# enumerator wrapper after iteration completes) are no-ops.
|
|
160
160
|
next if finalized
|
|
@@ -188,11 +188,27 @@ module Bitfab
|
|
|
188
188
|
pending << span_thread if span_thread
|
|
189
189
|
pending.each { |t| t.join(5) }
|
|
190
190
|
|
|
191
|
+
# Built AFTER the wrapped method finished (finalize runs at root
|
|
192
|
+
# span end), so :accessed reflects whether customer code obtained
|
|
193
|
+
# the branch URL during this item. nil (key omitted) when no
|
|
194
|
+
# lease was attached, so the server can distinguish "no branch"
|
|
195
|
+
# from "branch ignored".
|
|
196
|
+
lease = replay_ctx&.dig(:db_branch_lease)
|
|
197
|
+
db_snapshot_usage = if lease
|
|
198
|
+
{
|
|
199
|
+
neon_branch_id: lease["neonBranchId"],
|
|
200
|
+
snapshot_timestamp: lease["snapshotTimestamp"],
|
|
201
|
+
source_trace_id: replay_ctx[:source_bitfab_trace_id],
|
|
202
|
+
accessed: replay_ctx[:db_snapshot_accessed] == true
|
|
203
|
+
}
|
|
204
|
+
end
|
|
205
|
+
|
|
191
206
|
completion_thread = send_trace_completion(
|
|
192
207
|
trace_function_key:,
|
|
193
208
|
trace_id:,
|
|
194
209
|
started_at:,
|
|
195
|
-
ended_at
|
|
210
|
+
ended_at:,
|
|
211
|
+
db_snapshot_usage:
|
|
196
212
|
)
|
|
197
213
|
|
|
198
214
|
# In replay, persistence is correctness: the replay runner joins
|
|
@@ -212,7 +228,7 @@ module Bitfab
|
|
|
212
228
|
end
|
|
213
229
|
end
|
|
214
230
|
rescue Exception # rubocop:disable Lint/RescueException
|
|
215
|
-
# Silently ignore
|
|
231
|
+
# Silently ignore: user's result/exception takes priority
|
|
216
232
|
# Catches Exception (not just StandardError) to handle SystemStackError
|
|
217
233
|
# from deeply nested serialization
|
|
218
234
|
end
|
|
@@ -234,7 +250,7 @@ module Bitfab
|
|
|
234
250
|
|
|
235
251
|
# If the wrapped block returned an Enumerator (lazy iteration via
|
|
236
252
|
# `enum_for`, `to_enum`, `Enumerator.new`, `[...].lazy.map(...)`, etc.),
|
|
237
|
-
# the work hasn't actually run yet
|
|
253
|
+
# the work hasn't actually run yet: the values are produced as the
|
|
238
254
|
# caller iterates. Without special handling we'd close the span here
|
|
239
255
|
# with `result == <the Enumerator object>`, and any nested `bitfab_span`
|
|
240
256
|
# calls inside the enumerator body would see an empty span stack and
|
|
@@ -264,7 +280,7 @@ module Bitfab
|
|
|
264
280
|
# Build an Enumerator that drives `source`, restoring `[trace_id, span_id]`
|
|
265
281
|
# on the iterating fiber so nested `bitfab_span` calls inside lazy / `each`
|
|
266
282
|
# callbacks nest under the parent span. Yielded values are collected as the
|
|
267
|
-
# span output. The span is sent exactly once
|
|
283
|
+
# span output. The span is sent exactly once: when iteration finishes,
|
|
268
284
|
# raises, or the wrapper is `.close`d.
|
|
269
285
|
def wrap_enumerator(source, trace_id:, span_id:, finalize:)
|
|
270
286
|
span_entry = {trace_id:, span_id:}
|
|
@@ -296,7 +312,14 @@ module Bitfab
|
|
|
296
312
|
raise ArgumentError, "Invalid span type '#{type}'. Must be one of: #{SPAN_TYPES.join(", ")}"
|
|
297
313
|
end
|
|
298
314
|
|
|
299
|
-
|
|
315
|
+
# db_snapshot_usage: replay DB branch usage record, present only when a
|
|
316
|
+
# lease was attached to the replay item. Serialized as `db_snapshot_usage`
|
|
317
|
+
# on the raw trace so the server can stamp the trace's metadata at ingest:
|
|
318
|
+
# { neon_branch_id:, snapshot_timestamp: (optional), source_trace_id:
|
|
319
|
+
# (optional), accessed: } with :accessed true if customer code obtained
|
|
320
|
+
# the branch URL and false if it ignored it. nil outside replay or when no
|
|
321
|
+
# lease was attached, in which case the key is omitted entirely.
|
|
322
|
+
def send_trace_completion(trace_function_key:, trace_id:, started_at:, ended_at:, db_snapshot_usage: nil)
|
|
300
323
|
trace_state = TraceState.get(trace_id)
|
|
301
324
|
trace_started_at = trace_state&.dig(:started_at) || started_at
|
|
302
325
|
|
|
@@ -315,6 +338,20 @@ module Bitfab
|
|
|
315
338
|
if trace_state&.dig(:input_source_trace_id)
|
|
316
339
|
raw_trace["input_source_trace_id"] = trace_state[:input_source_trace_id]
|
|
317
340
|
end
|
|
341
|
+
if trace_state&.dig(:db_snapshot_ref)
|
|
342
|
+
raw_trace["db_snapshot_ref"] = trace_state[:db_snapshot_ref]
|
|
343
|
+
end
|
|
344
|
+
if db_snapshot_usage
|
|
345
|
+
usage = {"neon_branch_id" => db_snapshot_usage[:neon_branch_id]}
|
|
346
|
+
if db_snapshot_usage[:snapshot_timestamp]
|
|
347
|
+
usage["snapshot_timestamp"] = db_snapshot_usage[:snapshot_timestamp]
|
|
348
|
+
end
|
|
349
|
+
if db_snapshot_usage[:source_trace_id]
|
|
350
|
+
usage["source_trace_id"] = db_snapshot_usage[:source_trace_id]
|
|
351
|
+
end
|
|
352
|
+
usage["accessed"] = db_snapshot_usage[:accessed]
|
|
353
|
+
raw_trace["db_snapshot_usage"] = usage
|
|
354
|
+
end
|
|
318
355
|
|
|
319
356
|
payload = {
|
|
320
357
|
"type" => "sdk-function",
|
|
@@ -336,7 +373,7 @@ module Bitfab
|
|
|
336
373
|
# Clean up trace state
|
|
337
374
|
TraceState.delete(trace_id)
|
|
338
375
|
|
|
339
|
-
# Returned so the replay path can join it
|
|
376
|
+
# Returned so the replay path can join it: trace completions must be
|
|
340
377
|
# persisted before complete_replay builds the trace-ID mapping.
|
|
341
378
|
completion_thread
|
|
342
379
|
end
|
|
@@ -395,8 +432,8 @@ module Bitfab
|
|
|
395
432
|
# non-root span under an active mock tree. Returns the call index this
|
|
396
433
|
# invocation owns, or nil when there's nothing to advance (root span, or
|
|
397
434
|
# no replay mock context). The counter MUST advance for every child span
|
|
398
|
-
# sharing the same (key, name) pair
|
|
399
|
-
# mocked
|
|
435
|
+
# sharing the same (key, name) pair (including spans that won't be
|
|
436
|
+
# mocked) so unmarked spans don't silently shift subsequent marked
|
|
400
437
|
# spans' indices. Different (key, name) pairs have independent counters.
|
|
401
438
|
def advance_mock_counter(replay_ctx, trace_function_key, span_name, is_root_span:)
|
|
402
439
|
return nil if is_root_span
|
|
@@ -430,7 +467,7 @@ module Bitfab
|
|
|
430
467
|
output_meta = mock_entry[:output_meta]
|
|
431
468
|
|
|
432
469
|
# Type-preserving deserialization when the server included Ruby-side
|
|
433
|
-
# Marshal+Base64 metadata. Falls back to the JSON output silently
|
|
470
|
+
# Marshal+Base64 metadata. Falls back to the JSON output silently: the
|
|
434
471
|
# spanTree endpoint currently returns superjson/jsonpickle-shaped meta,
|
|
435
472
|
# which Ruby cannot reconstruct.
|
|
436
473
|
if output_meta.is_a?(String) && !output_meta.empty?
|
|
@@ -481,13 +518,13 @@ module Bitfab
|
|
|
481
518
|
@pending_span_threads[trace_id] << span_thread if span_thread && @pending_span_threads.key?(trace_id)
|
|
482
519
|
end
|
|
483
520
|
rescue Exception # rubocop:disable Lint/RescueException
|
|
484
|
-
# Never crash the host app
|
|
521
|
+
# Never crash the host app: mocked span recording is best-effort
|
|
485
522
|
end
|
|
486
523
|
end
|
|
487
524
|
|
|
488
525
|
# Fluent wrapper bound to a single trace_function_key. Mirrors
|
|
489
526
|
# `BitfabFunction` in the Python SDK and `BitfabFunction` in the TypeScript
|
|
490
|
-
# SDK
|
|
527
|
+
# SDK: lets callers wrap multiple methods without repeating the key.
|
|
491
528
|
class BitfabFunction
|
|
492
529
|
attr_reader :trace_function_key
|
|
493
530
|
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bitfab
|
|
4
|
+
# Per-trace database snapshot ref capture.
|
|
5
|
+
#
|
|
6
|
+
# Every root trace carries a snapshot ref that pins the DB state at trace
|
|
7
|
+
# open by wall-clock timestamp. Capturing the timestamp is free (no IO) and
|
|
8
|
+
# harmless, so it happens on every trace regardless of configuration: that
|
|
9
|
+
# lets any trace be replayed against a historical branch later. The provider
|
|
10
|
+
# is resolved at replay time. Mirrors the TypeScript and Python SDKs'
|
|
11
|
+
# +DbSnapshotRef+ wire shape (camelCase key, since the server speaks the same
|
|
12
|
+
# protocol for every SDK).
|
|
13
|
+
module DbSnapshot
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
# Build a snapshot ref for one trace. Synchronous, no IO.
|
|
17
|
+
#
|
|
18
|
+
# Stores only the wall clock the SDK observed immediately before invoking
|
|
19
|
+
# the wrapped function; the server-side resolver uses that as the snapshot
|
|
20
|
+
# timestamp. No provider is captured (it is resolved at replay time).
|
|
21
|
+
#
|
|
22
|
+
# @param sdk_wall_clock_before_fn [String] ISO wall-clock timestamp the SDK
|
|
23
|
+
# observed immediately before invoking the wrapped function.
|
|
24
|
+
# @return [Hash] snapshot ref with a single camelCase +sdkWallClockBeforeFn+ key.
|
|
25
|
+
def build_snapshot_ref(sdk_wall_clock_before_fn)
|
|
26
|
+
{"sdkWallClockBeforeFn" => sdk_wall_clock_before_fn}
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
data/lib/bitfab/http_client.rb
CHANGED
|
@@ -106,7 +106,7 @@ module Bitfab
|
|
|
106
106
|
# @param experiment_group_id [String, nil] optional UUID grouping multiple
|
|
107
107
|
# replay runs into a single experiment batch
|
|
108
108
|
def start_replay(trace_function_key, limit, trace_ids: nil, code_change_description: nil,
|
|
109
|
-
code_change_files: nil, experiment_group_id: nil)
|
|
109
|
+
code_change_files: nil, experiment_group_id: nil, include_db_branch_lease: false)
|
|
110
110
|
payload = {
|
|
111
111
|
"traceFunctionKey" => trace_function_key
|
|
112
112
|
}
|
|
@@ -117,8 +117,14 @@ module Bitfab
|
|
|
117
117
|
payload["codeChangeDescription"] = code_change_description unless code_change_description.nil?
|
|
118
118
|
payload["codeChangeFiles"] = normalize_code_change_files(code_change_files) unless code_change_files.nil?
|
|
119
119
|
payload["experimentGroupId"] = experiment_group_id unless experiment_group_id.nil?
|
|
120
|
-
|
|
121
|
-
|
|
120
|
+
payload["includeDbBranchLease"] = true if include_db_branch_lease
|
|
121
|
+
|
|
122
|
+
# When DB branching is on, the server resolves a Neon preview branch per
|
|
123
|
+
# item (snapshot + restore + poll), which can run several seconds each.
|
|
124
|
+
# Use a generous timeout so the SDK doesn't give up before a healthy
|
|
125
|
+
# server finishes.
|
|
126
|
+
timeout = include_db_branch_lease ? 180 : 30
|
|
127
|
+
request("/api/sdk/replay/start", payload, timeout:)
|
|
122
128
|
end
|
|
123
129
|
|
|
124
130
|
# Fetch an external span by ID. Blocking GET request.
|
|
@@ -142,6 +148,13 @@ module Bitfab
|
|
|
142
148
|
request("/api/sdk/replay/complete", {"testRunId" => test_run_id}, timeout: 30)
|
|
143
149
|
end
|
|
144
150
|
|
|
151
|
+
# Release a previously-resolved DB branch by deleting its Neon branch.
|
|
152
|
+
# Blocking call. Idempotent server-side (a missing branch is treated as
|
|
153
|
+
# already released).
|
|
154
|
+
def release_db_branch_lease(neon_branch_id)
|
|
155
|
+
request("/api/sdk/replay/releaseDbBranchLease", {"neonBranchId" => neon_branch_id}, timeout: 30)
|
|
156
|
+
end
|
|
157
|
+
|
|
145
158
|
# Send an external trace (fire-and-forget in background thread).
|
|
146
159
|
def send_external_trace(payload)
|
|
147
160
|
merged = payload.merge("sdkVersion" => VERSION)
|
data/lib/bitfab/replay.rb
CHANGED
|
@@ -6,9 +6,9 @@ require_relative "serialize"
|
|
|
6
6
|
module Bitfab
|
|
7
7
|
# Replay mock strategies. Mirrors the Python and TypeScript SDKs.
|
|
8
8
|
#
|
|
9
|
-
# - "none"
|
|
10
|
-
# - "all"
|
|
11
|
-
# - "marked"
|
|
9
|
+
# - "none" : every child span runs real code (default)
|
|
10
|
+
# - "all" : every child span returns its historical output
|
|
11
|
+
# - "marked" : only spans declared with mock_on_replay: true return historical
|
|
12
12
|
# output; everything else runs real code
|
|
13
13
|
MOCK_STRATEGIES = %w[none all marked].freeze
|
|
14
14
|
|
|
@@ -27,7 +27,8 @@ module Bitfab
|
|
|
27
27
|
# threads (span uploads + trace completion) so the replay runner can join
|
|
28
28
|
# them before complete_replay builds the trace-ID mapping.
|
|
29
29
|
def with_context(test_run_id:, input_source_span_id: nil, input_source_trace_id: nil, trace_id: nil,
|
|
30
|
-
mock_tree: nil, mock_strategy: nil, pending_persistence: nil
|
|
30
|
+
mock_tree: nil, mock_strategy: nil, pending_persistence: nil, db_branch_lease: nil,
|
|
31
|
+
source_bitfab_trace_id: nil)
|
|
31
32
|
previous = Thread.current[REPLAY_CONTEXT_KEY]
|
|
32
33
|
ctx = {
|
|
33
34
|
test_run_id:,
|
|
@@ -41,6 +42,21 @@ module Bitfab
|
|
|
41
42
|
ctx[:mock_strategy] = mock_strategy || "none"
|
|
42
43
|
ctx[:call_counters] = {}
|
|
43
44
|
end
|
|
45
|
+
# The per-trace DB branch (resolved server-side) and the Bitfab trace ID
|
|
46
|
+
# it belongs to ride on the context so ReplayEnvironment can read them
|
|
47
|
+
# inside the replayed method.
|
|
48
|
+
#
|
|
49
|
+
# ReplayEnvironment also sets ctx[:db_snapshot_accessed] = true the
|
|
50
|
+
# first time customer code actually obtains the branch URL for this
|
|
51
|
+
# item (via +database_url+ or +snapshot+). Reported on the trace
|
|
52
|
+
# completion inside the +db_snapshot_usage+ record (its +accessed+
|
|
53
|
+
# field) so the server can distinguish "branch was provisioned and
|
|
54
|
+
# exposed" from "branch URL was actually consumed". Any future
|
|
55
|
+
# consumption path that hands the URL to customer code by other means
|
|
56
|
+
# (e.g. a process-isolated runner writing an env overlay) must also
|
|
57
|
+
# set this.
|
|
58
|
+
ctx[:db_branch_lease] = db_branch_lease if db_branch_lease
|
|
59
|
+
ctx[:source_bitfab_trace_id] = source_bitfab_trace_id if source_bitfab_trace_id
|
|
44
60
|
Thread.current[REPLAY_CONTEXT_KEY] = ctx
|
|
45
61
|
yield
|
|
46
62
|
ensure
|
|
@@ -84,7 +100,7 @@ module Bitfab
|
|
|
84
100
|
# @return [Hash] with :items, :test_run_id, :test_run_url
|
|
85
101
|
def run(client, receiver, method_name, trace_function_key:, limit: nil, trace_ids: nil, max_concurrency: 10,
|
|
86
102
|
code_change_description: nil, code_change_files: nil, experiment_group_id: nil, mock: "none",
|
|
87
|
-
adapt_inputs: nil)
|
|
103
|
+
adapt_inputs: nil, environment: nil)
|
|
88
104
|
unless MOCK_STRATEGIES.include?(mock.to_s)
|
|
89
105
|
raise ArgumentError, "Invalid mock strategy '#{mock}'. Must be one of: #{MOCK_STRATEGIES.join(", ")}"
|
|
90
106
|
end
|
|
@@ -105,13 +121,16 @@ module Bitfab
|
|
|
105
121
|
# the count), so it's omitted from the request entirely.
|
|
106
122
|
effective_limit = trace_ids ? nil : (limit || 5)
|
|
107
123
|
|
|
124
|
+
include_db_branch_lease = !environment.nil?
|
|
125
|
+
|
|
108
126
|
replay_data = http_client.start_replay(
|
|
109
127
|
trace_function_key,
|
|
110
128
|
effective_limit,
|
|
111
129
|
trace_ids:,
|
|
112
130
|
code_change_description:,
|
|
113
131
|
code_change_files:,
|
|
114
|
-
experiment_group_id
|
|
132
|
+
experiment_group_id:,
|
|
133
|
+
include_db_branch_lease:
|
|
115
134
|
)
|
|
116
135
|
test_run_id = replay_data["testRunId"]
|
|
117
136
|
test_run_url = replay_data["testRunUrl"]
|
|
@@ -119,14 +138,14 @@ module Bitfab
|
|
|
119
138
|
|
|
120
139
|
result_items = if server_items.any?
|
|
121
140
|
process_items(http_client, server_items, receiver, method_name, test_run_id, max_concurrency, mock.to_s,
|
|
122
|
-
adapt_inputs)
|
|
141
|
+
adapt_inputs, include_db_branch_lease)
|
|
123
142
|
else
|
|
124
143
|
[]
|
|
125
144
|
end
|
|
126
145
|
|
|
127
146
|
# Every item joined its own trace-persistence threads (span uploads +
|
|
128
147
|
# completion) in execute_item, so all replay traces are on the server
|
|
129
|
-
# by now
|
|
148
|
+
# by now: no flush needed, and complete_replay's trace-ID mapping is
|
|
130
149
|
# deterministic. complete_replay failures propagate: a missing mapping
|
|
131
150
|
# means verdicts can't be persisted, which callers must hear about
|
|
132
151
|
# loudly.
|
|
@@ -142,7 +161,7 @@ module Bitfab
|
|
|
142
161
|
else
|
|
143
162
|
# Map each item's locally-generated trace ID to the server's trace
|
|
144
163
|
# row ID. A completed item with no mapping means its trace was sent
|
|
145
|
-
# but the server has no record
|
|
164
|
+
# but the server has no record: a nil trace_id blocks verdict
|
|
146
165
|
# persistence and the Studio experiments view downstream, so this
|
|
147
166
|
# must never be silent.
|
|
148
167
|
#
|
|
@@ -192,12 +211,13 @@ module Bitfab
|
|
|
192
211
|
|
|
193
212
|
# Process all replay items, optionally in parallel using threads.
|
|
194
213
|
def process_items(http_client, server_items, receiver, method_name, test_run_id, max_concurrency, mock_strategy,
|
|
195
|
-
adapt_inputs = nil)
|
|
214
|
+
adapt_inputs = nil, include_db_branch_lease = false)
|
|
196
215
|
concurrency = max_concurrency || server_items.length
|
|
197
216
|
|
|
198
217
|
if concurrency <= 1
|
|
199
218
|
server_items.map do |item|
|
|
200
|
-
process_single_item(http_client, item, receiver, method_name, test_run_id, mock_strategy, adapt_inputs
|
|
219
|
+
process_single_item(http_client, item, receiver, method_name, test_run_id, mock_strategy, adapt_inputs,
|
|
220
|
+
include_db_branch_lease)
|
|
201
221
|
end
|
|
202
222
|
else
|
|
203
223
|
results_mutex = Mutex.new
|
|
@@ -212,7 +232,7 @@ module Bitfab
|
|
|
212
232
|
break unless item
|
|
213
233
|
|
|
214
234
|
result = process_single_item(http_client, item, receiver, method_name, test_run_id, mock_strategy,
|
|
215
|
-
adapt_inputs)
|
|
235
|
+
adapt_inputs, include_db_branch_lease)
|
|
216
236
|
results_mutex.synchronize { results[idx] = result }
|
|
217
237
|
end
|
|
218
238
|
end
|
|
@@ -230,8 +250,15 @@ module Bitfab
|
|
|
230
250
|
# than propagated, so one bad trace never aborts the whole replay run
|
|
231
251
|
# (mirrors the TypeScript and Python SDKs' per-item rescue).
|
|
232
252
|
def process_single_item(http_client, server_item, receiver, method_name, test_run_id, mock_strategy,
|
|
233
|
-
adapt_inputs = nil)
|
|
253
|
+
adapt_inputs = nil, include_db_branch_lease = false)
|
|
234
254
|
metrics = extract_server_item_metrics(server_item)
|
|
255
|
+
# The server resolves a Neon preview branch per item during /replay/start
|
|
256
|
+
# (only when include_db_branch_lease was sent). Release it in the +ensure+
|
|
257
|
+
# below so any raise (span fetch, mock-tree build, or the replayed
|
|
258
|
+
# method) frees the Neon resource. Items whose source trace had no
|
|
259
|
+
# snapshot ref, or whose resolve failed server-side, arrive without a
|
|
260
|
+
# lease (env.active? is false for those).
|
|
261
|
+
lease = include_db_branch_lease ? server_item["dbBranchLease"] : nil
|
|
235
262
|
|
|
236
263
|
span = http_client.get_external_span(server_item["externalSpanId"])
|
|
237
264
|
item_data = extract_span_data(span)
|
|
@@ -255,7 +282,10 @@ module Bitfab
|
|
|
255
282
|
mock_strategy:,
|
|
256
283
|
mock_tree:,
|
|
257
284
|
adapt_inputs:,
|
|
258
|
-
adapt_ctx
|
|
285
|
+
adapt_ctx:,
|
|
286
|
+
db_branch_lease: lease,
|
|
287
|
+
source_bitfab_trace_id: server_item["traceId"],
|
|
288
|
+
db_snapshot_ref: server_item["dbSnapshotRef"]
|
|
259
289
|
)
|
|
260
290
|
rescue => e
|
|
261
291
|
warn "Bitfab: replay item for span #{server_item["externalSpanId"]} failed before execution: #{e.message}"
|
|
@@ -267,14 +297,28 @@ module Bitfab
|
|
|
267
297
|
duration_ms: metrics&.dig(:duration_ms),
|
|
268
298
|
tokens: metrics&.dig(:tokens),
|
|
269
299
|
model: metrics&.dig(:model),
|
|
270
|
-
trace_id: nil
|
|
300
|
+
trace_id: nil,
|
|
301
|
+
db_snapshot_ref: server_item["dbSnapshotRef"]
|
|
271
302
|
}
|
|
303
|
+
ensure
|
|
304
|
+
release_db_branch_lease(http_client, lease) if lease
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
# Delete the per-item Neon preview branch. Best-effort: a failure is warned
|
|
308
|
+
# but never raised: the server-side TTL janitor reaps orphans.
|
|
309
|
+
def release_db_branch_lease(http_client, lease)
|
|
310
|
+
neon_branch_id = lease["neonBranchId"]
|
|
311
|
+
return unless neon_branch_id
|
|
312
|
+
|
|
313
|
+
http_client.release_db_branch_lease(neon_branch_id)
|
|
314
|
+
rescue => e
|
|
315
|
+
warn "Bitfab: failed to release DB branch #{neon_branch_id} (TTL janitor will catch it): #{e.message}"
|
|
272
316
|
end
|
|
273
317
|
|
|
274
318
|
# Walk the children of a root span tree node depth-first and build a
|
|
275
319
|
# lookup keyed by "#{trace_function_key}:#{span_name}:#{call_index}".
|
|
276
320
|
#
|
|
277
|
-
# The root node itself is excluded
|
|
321
|
+
# The root node itself is excluded: at replay time the runtime root span
|
|
278
322
|
# never queries the mock tree.
|
|
279
323
|
#
|
|
280
324
|
# The compound (key, name) match disambiguates same-key spans that come
|
|
@@ -282,7 +326,7 @@ module Bitfab
|
|
|
282
326
|
# wrapped method shares trace_function_key but differs in span_name. The
|
|
283
327
|
# counter is per-(key, name) pair so repeated same-name calls (including
|
|
284
328
|
# recursion) still order by occurrence. Mirrors the Python and TypeScript
|
|
285
|
-
# SDKs after HVT-2078
|
|
329
|
+
# SDKs after HVT-2078: keying by trace_function_key alone caused the
|
|
286
330
|
# wrong historical output for fluent-API span sets.
|
|
287
331
|
def build_mock_tree(root)
|
|
288
332
|
spans = {}
|
|
@@ -346,7 +390,8 @@ module Bitfab
|
|
|
346
390
|
|
|
347
391
|
# Execute a single replay item: deserialize inputs, call method with replay context.
|
|
348
392
|
def execute_item(item, receiver, method_name, test_run_id, input_source_span_id = nil, metrics = {},
|
|
349
|
-
input_source_trace_id: nil, mock_strategy: "none", mock_tree: nil, adapt_inputs: nil, adapt_ctx: nil
|
|
393
|
+
input_source_trace_id: nil, mock_strategy: "none", mock_tree: nil, adapt_inputs: nil, adapt_ctx: nil,
|
|
394
|
+
db_branch_lease: nil, source_bitfab_trace_id: nil, db_snapshot_ref: nil)
|
|
350
395
|
args, kwargs = Serialize.deserialize_inputs(item)
|
|
351
396
|
|
|
352
397
|
fn_result = nil
|
|
@@ -354,7 +399,7 @@ module Bitfab
|
|
|
354
399
|
sdk_trace_id = SecureRandom.uuid
|
|
355
400
|
# Collects the root span's persistence threads (span uploads + trace
|
|
356
401
|
# completion). Joined below so this item's trace is on the server
|
|
357
|
-
# before run() calls complete_replay
|
|
402
|
+
# before run() calls complete_replay: otherwise the server's trace-ID
|
|
358
403
|
# mapping races the uploads and the item's trace_id comes back nil.
|
|
359
404
|
pending_persistence = []
|
|
360
405
|
|
|
@@ -365,7 +410,9 @@ module Bitfab
|
|
|
365
410
|
trace_id: sdk_trace_id,
|
|
366
411
|
mock_tree:,
|
|
367
412
|
mock_strategy:,
|
|
368
|
-
pending_persistence
|
|
413
|
+
pending_persistence:,
|
|
414
|
+
db_branch_lease:,
|
|
415
|
+
source_bitfab_trace_id:
|
|
369
416
|
) do
|
|
370
417
|
# Reshape recorded inputs onto the current signature when an adapter is
|
|
371
418
|
# supplied. Inside the rescue so a raising adapter surfaces on this
|
|
@@ -384,7 +431,7 @@ module Bitfab
|
|
|
384
431
|
end
|
|
385
432
|
|
|
386
433
|
# Wait for this item's trace (spans + completion) to be fully persisted
|
|
387
|
-
# before the item resolves. Runs on the error path too
|
|
434
|
+
# before the item resolves. Runs on the error path too: a raising
|
|
388
435
|
# method still emits a root span whose trace must land before
|
|
389
436
|
# complete_replay. Joins are bounded by the HTTP layer's own timeouts.
|
|
390
437
|
pending_persistence.each(&:join)
|
|
@@ -397,7 +444,8 @@ module Bitfab
|
|
|
397
444
|
duration_ms: metrics[:duration_ms],
|
|
398
445
|
tokens: metrics[:tokens],
|
|
399
446
|
model: metrics[:model],
|
|
400
|
-
trace_id: sdk_trace_id
|
|
447
|
+
trace_id: sdk_trace_id,
|
|
448
|
+
db_snapshot_ref:
|
|
401
449
|
}
|
|
402
450
|
end
|
|
403
451
|
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bitfab
|
|
4
|
+
# Per-trace environment exposed to customer code during replay.
|
|
5
|
+
#
|
|
6
|
+
# The customer instantiates one +ReplayEnvironment+ and passes it to
|
|
7
|
+
# +client.replay(environment: ...)+. Inside the replayed method they read
|
|
8
|
+
# +env.database_url+ (and friends) to pick up the per-trace branch URL the
|
|
9
|
+
# Bitfab service resolved from the source trace's snapshot reference.
|
|
10
|
+
#
|
|
11
|
+
# Outside replay, reading +env.database_url+ raises. Customer code uses the
|
|
12
|
+
# env only on the replay path; live request code keeps reading
|
|
13
|
+
# +ENV["DATABASE_URL"]+ the normal way.
|
|
14
|
+
#
|
|
15
|
+
# Concurrency-safe: the readers resolve through the thread-local replay
|
|
16
|
+
# context, so each in-flight replay item sees its own per-trace values even
|
|
17
|
+
# when the SDK runs items across worker threads.
|
|
18
|
+
#
|
|
19
|
+
# Internally the resolved per-item state is a DB branch lease (the SDK <->
|
|
20
|
+
# server protocol term). We expose its useful fields directly here so
|
|
21
|
+
# customer code never sees the word.
|
|
22
|
+
class ReplayEnvironment
|
|
23
|
+
# The per-trace branch URL for the item currently being replayed.
|
|
24
|
+
# Raises if read outside a replay item.
|
|
25
|
+
def database_url
|
|
26
|
+
snap = require_snapshot
|
|
27
|
+
mark_accessed
|
|
28
|
+
snap.fetch(:database_url)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# When the per-trace branch URL stops being valid. ISO-8601.
|
|
32
|
+
def expires_at
|
|
33
|
+
require_snapshot.fetch(:expires_at)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Deep link to the branch in the provider console, if available.
|
|
37
|
+
def provider_console_url
|
|
38
|
+
require_snapshot[:provider_console_url]
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# True if the branch is read-only. Customer code can use this to skip write
|
|
42
|
+
# operations during replay when the provider returned a read-only lease.
|
|
43
|
+
def read_only
|
|
44
|
+
require_snapshot[:read_only]
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# The historical trace ID that produced the input for this replay item.
|
|
48
|
+
def trace_id
|
|
49
|
+
require_snapshot.fetch(:trace_id)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# True when read inside a replay item that has a resolved branch.
|
|
53
|
+
def active?
|
|
54
|
+
!read_snapshot.nil?
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Non-raising variant for callers that handle the inactive case. Returns a
|
|
58
|
+
# symbol-keyed hash or nil.
|
|
59
|
+
def snapshot
|
|
60
|
+
snap = read_snapshot
|
|
61
|
+
mark_accessed if snap
|
|
62
|
+
snap
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
# Record on the replay context that customer code obtained the branch
|
|
68
|
+
# URL. Only +database_url+ and +snapshot+ count: +active?+, +read_only+
|
|
69
|
+
# and friends inspect the lease without exposing the connection string,
|
|
70
|
+
# so they don't prove the replayed code could have connected to the
|
|
71
|
+
# branch.
|
|
72
|
+
def mark_accessed
|
|
73
|
+
ctx = ReplayContext.current
|
|
74
|
+
return unless ctx && ctx[:db_branch_lease]
|
|
75
|
+
|
|
76
|
+
ctx[:db_snapshot_accessed] = true
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def read_snapshot
|
|
80
|
+
ctx = ReplayContext.current
|
|
81
|
+
return nil unless ctx
|
|
82
|
+
|
|
83
|
+
lease = ctx[:db_branch_lease]
|
|
84
|
+
return nil unless lease
|
|
85
|
+
|
|
86
|
+
# Surface the Bitfab trace ID (what the customer sees in the dashboard),
|
|
87
|
+
# falling back to the external trace ID only if the Bitfab ID is somehow
|
|
88
|
+
# absent: keeps replays from external sources working until the
|
|
89
|
+
# source-system path is fully wired.
|
|
90
|
+
trace_id = ctx[:source_bitfab_trace_id] || ctx[:input_source_trace_id]
|
|
91
|
+
return nil unless trace_id
|
|
92
|
+
|
|
93
|
+
{
|
|
94
|
+
database_url: lease["databaseUrl"],
|
|
95
|
+
expires_at: lease["expiresAt"],
|
|
96
|
+
provider_console_url: lease["providerConsoleUrl"],
|
|
97
|
+
read_only: lease["readOnly"],
|
|
98
|
+
trace_id:
|
|
99
|
+
}
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def require_snapshot
|
|
103
|
+
snap = read_snapshot
|
|
104
|
+
return snap if snap
|
|
105
|
+
|
|
106
|
+
raise "ReplayEnvironment accessed outside of a replay item. Pass it to " \
|
|
107
|
+
"client.replay(environment: ...) and only read it inside the replayed method."
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
data/lib/bitfab/span_context.rb
CHANGED
|
@@ -133,12 +133,20 @@ module Bitfab
|
|
|
133
133
|
|
|
134
134
|
def create(trace_id, test_run_id: nil, input_source_trace_id: nil)
|
|
135
135
|
@states_mutex.synchronize do
|
|
136
|
-
@states[trace_id] ||=
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
136
|
+
@states[trace_id] ||= begin
|
|
137
|
+
started_at = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.%3NZ")
|
|
138
|
+
{
|
|
139
|
+
trace_id:,
|
|
140
|
+
started_at:,
|
|
141
|
+
test_run_id:,
|
|
142
|
+
input_source_trace_id:,
|
|
143
|
+
# Capture the wall clock now, before the wrapped function runs.
|
|
144
|
+
# Stored on every trace (no IO, harmless) so any trace can later be
|
|
145
|
+
# replayed against a historical branch; the provider is resolved at
|
|
146
|
+
# replay time.
|
|
147
|
+
db_snapshot_ref: DbSnapshot.build_snapshot_ref(started_at)
|
|
148
|
+
}.compact
|
|
149
|
+
end
|
|
142
150
|
end
|
|
143
151
|
end
|
|
144
152
|
|
data/lib/bitfab/version.rb
CHANGED
data/lib/bitfab.rb
CHANGED
|
@@ -3,9 +3,11 @@
|
|
|
3
3
|
require_relative "bitfab/version"
|
|
4
4
|
require_relative "bitfab/constants"
|
|
5
5
|
require_relative "bitfab/serialize"
|
|
6
|
+
require_relative "bitfab/db_snapshot"
|
|
6
7
|
require_relative "bitfab/span_context"
|
|
7
8
|
require_relative "bitfab/http_client"
|
|
8
9
|
require_relative "bitfab/replay"
|
|
10
|
+
require_relative "bitfab/replay_environment"
|
|
9
11
|
require_relative "bitfab/client"
|
|
10
12
|
require_relative "bitfab/traceable"
|
|
11
13
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bitfab
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.17.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Harvest Team
|
|
@@ -119,8 +119,10 @@ files:
|
|
|
119
119
|
- lib/bitfab.rb
|
|
120
120
|
- lib/bitfab/client.rb
|
|
121
121
|
- lib/bitfab/constants.rb
|
|
122
|
+
- lib/bitfab/db_snapshot.rb
|
|
122
123
|
- lib/bitfab/http_client.rb
|
|
123
124
|
- lib/bitfab/replay.rb
|
|
125
|
+
- lib/bitfab/replay_environment.rb
|
|
124
126
|
- lib/bitfab/serialize.rb
|
|
125
127
|
- lib/bitfab/span_context.rb
|
|
126
128
|
- lib/bitfab/traceable.rb
|