bitfab 0.12.5 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a66f64bbe867d0adeafbebc9984413802c8ecf8ae47007078b8a3e9d336e5bd0
4
- data.tar.gz: 726943a84542d9bc5ead1cc54b0c66d726edaf57ed03703588f63fd41fb67ecd
3
+ metadata.gz: b0e39c364993d34e99e1d7e3c3a878e0fc6fbfe685fcb0335128bfbf7252161b
4
+ data.tar.gz: 54bcd62faffceac5c0f67f26f5dedbed0057061019d24d1d789f9988fbf0a44a
5
5
  SHA512:
6
- metadata.gz: fb26e2388ac5585d8a74aeb8de4f2b8af12df1d6abf4c882560a96107de46afd6fa9a8c1fd99482ae5af6c49234f058f473338c00b91502494e0c078c65a052f
7
- data.tar.gz: 9eedf94be719b11b2456ed0002eaeb01afc0b1ef42099850dcc4507096072efb593975a5fa92cef8a99fa224cb1873d0f6c41bfa9d343782f70989b542303da4
6
+ metadata.gz: 04cef22ee4135b8c43e1e35f5ead74f5650d9f49e16b818de05980d01a054b145bf4adbad1c9fc3ec01b0e17130e108df666f75bc8bc9bf4666eb23eb9ba7acc
7
+ data.tar.gz: bb4724aff3d7fc6a9f04e1a923fd5aaabb2e808ef2310ef2c1d51e6a246c296653c1ea85509a92e1a1b927c131074e0907c07623ade5affaa6bab8fd20b2464c
data/lib/bitfab/client.rb CHANGED
@@ -39,8 +39,10 @@ module Bitfab
39
39
  # @param receiver [Object, Class] an instance for instance methods, or a Class for class methods
40
40
  # @param method_name [Symbol] the method to replay
41
41
  # @param trace_function_key [String] the trace function key for this method
42
- # @param limit [Integer] maximum number of traces to replay (default: 5)
43
- # @param trace_ids [Array<String>, nil] optional list of trace IDs to filter
42
+ # @param limit [Integer, nil] maximum number of traces to replay (default: 5).
43
+ # Mutually exclusive with trace_ids: an explicit ID list already
44
+ # determines how many traces replay, so passing both raises.
45
+ # @param trace_ids [Array<String>, nil] optional list of trace IDs to replay (max 100)
44
46
  # @param max_concurrency [Integer, nil] max threads for parallel replay (default: 10)
45
47
  # @param code_change_description [String, nil] optional rationale for the
46
48
  # code change being tested in this replay (stored on the experiment)
@@ -52,7 +54,7 @@ module Bitfab
52
54
  # "all", or "marked". "all" mocks every child span; "marked" only mocks
53
55
  # spans declared with mock_on_replay: true.
54
56
  # @return [Hash] with :items, :test_run_id, :test_run_url
55
- def replay(receiver, method_name, trace_function_key:, limit: 5, trace_ids: nil, max_concurrency: 10,
57
+ def replay(receiver, method_name, trace_function_key:, limit: nil, trace_ids: nil, max_concurrency: 10,
56
58
  code_change_description: nil, code_change_files: nil, experiment_group_id: nil, mock: "none")
57
59
  Replay.run(
58
60
  self, receiver, method_name,
@@ -181,12 +183,24 @@ module Bitfab
181
183
  pending << span_thread if span_thread
182
184
  pending.each { |t| t.join(5) }
183
185
 
184
- send_trace_completion(
186
+ completion_thread = send_trace_completion(
185
187
  trace_function_key:,
186
188
  trace_id:,
187
189
  started_at:,
188
190
  ended_at:
189
191
  )
192
+
193
+ # In replay, persistence is correctness: the replay runner joins
194
+ # these threads before calling complete_replay, or the server's
195
+ # trace-ID mapping races the uploads and every item's trace_id
196
+ # comes back nil. The 5s join above is best-effort only; this
197
+ # hands the full set (span uploads + trace completion) to the
198
+ # runner. No-op outside replay, where sends stay fire-and-forget.
199
+ persistence = ReplayContext.current&.dig(:pending_persistence)
200
+ if persistence
201
+ persistence.concat(pending)
202
+ persistence << completion_thread if completion_thread
203
+ end
190
204
  else
191
205
  @pending_span_mutex.synchronize do
192
206
  @pending_span_threads[trace_id] << span_thread if span_thread && @pending_span_threads.key?(trace_id)
@@ -312,10 +326,14 @@ module Bitfab
312
326
  payload["testRunId"] = trace_state[:test_run_id]
313
327
  end
314
328
 
315
- @http_client.send_external_trace(payload)
329
+ completion_thread = @http_client.send_external_trace(payload)
316
330
 
317
331
  # Clean up trace state
318
332
  TraceState.delete(trace_id)
333
+
334
+ # Returned so the replay path can join it — trace completions must be
335
+ # persisted before complete_replay builds the trace-ID mapping.
336
+ completion_thread
319
337
  end
320
338
 
321
339
  def send_span(trace_function_key:, trace_id:, span_id:, parent_span_id:,
@@ -108,9 +108,11 @@ module Bitfab
108
108
  def start_replay(trace_function_key, limit, trace_ids: nil, code_change_description: nil,
109
109
  code_change_files: nil, experiment_group_id: nil)
110
110
  payload = {
111
- "traceFunctionKey" => trace_function_key,
112
- "limit" => limit
111
+ "traceFunctionKey" => trace_function_key
113
112
  }
113
+ # limit is only meaningful without trace_ids (an explicit ID list
114
+ # already determines the count), so it's omitted when nil.
115
+ payload["limit"] = limit unless limit.nil?
114
116
  payload["traceIds"] = trace_ids if trace_ids
115
117
  payload["codeChangeDescription"] = code_change_description unless code_change_description.nil?
116
118
  payload["codeChangeFiles"] = normalize_code_change_files(code_change_files) unless code_change_files.nil?
data/lib/bitfab/replay.rb CHANGED
@@ -22,8 +22,12 @@ module Bitfab
22
22
 
23
23
  # Execute a block with replay context set on the current thread.
24
24
  # The context is automatically cleared when the block completes.
25
+ #
26
+ # pending_persistence, when given, collects the root span's persistence
27
+ # threads (span uploads + trace completion) so the replay runner can join
28
+ # them before complete_replay builds the trace-ID mapping.
25
29
  def with_context(test_run_id:, input_source_span_id: nil, input_source_trace_id: nil, trace_id: nil,
26
- mock_tree: nil, mock_strategy: nil)
30
+ mock_tree: nil, mock_strategy: nil, pending_persistence: nil)
27
31
  previous = Thread.current[REPLAY_CONTEXT_KEY]
28
32
  ctx = {
29
33
  test_run_id:,
@@ -31,6 +35,7 @@ module Bitfab
31
35
  input_source_trace_id:,
32
36
  trace_id:
33
37
  }
38
+ ctx[:pending_persistence] = pending_persistence if pending_persistence
34
39
  if mock_tree
35
40
  ctx[:mock_tree] = mock_tree
36
41
  ctx[:mock_strategy] = mock_strategy || "none"
@@ -56,8 +61,10 @@ module Bitfab
56
61
  # @param receiver [Object, Class] an instance for instance methods, or a Class for class methods
57
62
  # @param method_name [Symbol] the method to replay
58
63
  # @param trace_function_key [String] the trace function key for this method
59
- # @param limit [Integer] maximum number of traces to replay (default: 5)
60
- # @param trace_ids [Array<String>, nil] optional list of trace IDs to filter
64
+ # @param limit [Integer, nil] maximum number of traces to replay (default: 5).
65
+ # Mutually exclusive with trace_ids: an explicit ID list already
66
+ # determines how many traces replay, so passing both raises.
67
+ # @param trace_ids [Array<String>, nil] optional list of trace IDs to replay (max 100)
61
68
  # @param max_concurrency [Integer, nil] max threads for parallel replay (default: 10)
62
69
  # @param code_change_description [String, nil] optional rationale for the
63
70
  # code change being tested in this replay (stored on the experiment)
@@ -69,17 +76,31 @@ module Bitfab
69
76
  # "all", or "marked". "all" mocks every child span; "marked" only mocks
70
77
  # spans declared with mock_on_replay: true.
71
78
  # @return [Hash] with :items, :test_run_id, :test_run_url
72
- def run(client, receiver, method_name, trace_function_key:, limit: 5, trace_ids: nil, max_concurrency: 10,
79
+ def run(client, receiver, method_name, trace_function_key:, limit: nil, trace_ids: nil, max_concurrency: 10,
73
80
  code_change_description: nil, code_change_files: nil, experiment_group_id: nil, mock: "none")
74
81
  unless MOCK_STRATEGIES.include?(mock.to_s)
75
82
  raise ArgumentError, "Invalid mock strategy '#{mock}'. Must be one of: #{MOCK_STRATEGIES.join(", ")}"
76
83
  end
84
+ if trace_ids
85
+ raise ArgumentError, "trace_ids must contain at least one trace ID." if trace_ids.empty?
86
+ if trace_ids.length > 100
87
+ raise ArgumentError, "trace_ids supports at most 100 trace IDs per replay (got #{trace_ids.length})."
88
+ end
89
+ end
90
+ if limit && trace_ids
91
+ raise ArgumentError,
92
+ "Pass either limit or trace_ids, not both: an explicit trace ID list already determines how many traces replay."
93
+ end
77
94
 
78
95
  http_client = client.instance_variable_get(:@http_client)
79
96
 
97
+ # limit is meaningless with explicit trace_ids (the ID list determines
98
+ # the count), so it's omitted from the request entirely.
99
+ effective_limit = trace_ids ? nil : (limit || 5)
100
+
80
101
  replay_data = http_client.start_replay(
81
102
  trace_function_key,
82
- limit,
103
+ effective_limit,
83
104
  trace_ids:,
84
105
  code_change_description:,
85
106
  code_change_files:,
@@ -95,17 +116,63 @@ module Bitfab
95
116
  []
96
117
  end
97
118
 
98
- Bitfab.flush_traces
99
-
100
- begin
101
- complete_response = http_client.complete_replay(test_run_id)
102
- trace_id_map = complete_response&.dig("traceIds") || {}
119
+ # Every item joined its own trace-persistence threads (span uploads +
120
+ # completion) in execute_item, so all replay traces are on the server
121
+ # by now — no flush needed, and complete_replay's trace-ID mapping is
122
+ # deterministic. complete_replay failures propagate: a missing mapping
123
+ # means verdicts can't be persisted, which callers must hear about
124
+ # loudly.
125
+ complete_response = http_client.complete_replay(test_run_id)
126
+ trace_id_map = complete_response&.dig("traceIds")
127
+
128
+ if trace_id_map.nil?
129
+ # Older servers don't return the mapping. Preserve the legacy
130
+ # nil-trace_id behavior but say why.
131
+ warn "Bitfab: server did not return replay trace IDs; item trace_id " \
132
+ "will be nil (server upgrade required for verdict persistence)"
133
+ result_items.each { |item| item[:trace_id] = nil }
134
+ else
135
+ # Map each item's locally-generated trace ID to the server's trace
136
+ # row ID. A completed item with no mapping means its trace was sent
137
+ # but the server has no record — a nil trace_id blocks verdict
138
+ # persistence and the Studio experiments view downstream, so this
139
+ # must never be silent.
140
+ #
141
+ # Severity splits on scope:
142
+ # - ALL completed items missing: systemic (the replayed method is
143
+ # not traced, or uploads are wholesale broken). Raise; the run's
144
+ # results are unusable for persistence.
145
+ # - SOME completed items missing: per-item upload failure (transient
146
+ # network blip, one oversized payload). Nil those items and warn
147
+ # loudly, but return the run so callers can persist verdicts for
148
+ # the items that landed.
149
+ missing = []
150
+ completed_count = 0
103
151
  result_items.each do |item|
104
- item[:trace_id] = trace_id_map[item[:trace_id]]
152
+ next unless item[:trace_id]
153
+
154
+ mapped = trace_id_map[item[:trace_id]]
155
+ if item[:error].nil?
156
+ completed_count += 1
157
+ missing << item[:trace_id] if mapped.nil?
158
+ end
159
+ item[:trace_id] = mapped
160
+ end
161
+ if missing.any?
162
+ trace_count = complete_response["traceCount"]
163
+ server_count = trace_count.nil? ? "" : " The server persisted #{trace_count} trace(s) for this run."
164
+ if missing.length == completed_count
165
+ raise "Replay completed but the server has no persisted trace for " \
166
+ "any of the #{completed_count} completed item(s) " \
167
+ "(test_run_id #{test_run_id}).#{server_count} Trace uploads were " \
168
+ "joined, so either the uploads failed or the replayed method is " \
169
+ "not traced (no root span was emitted)."
170
+ end
171
+ warn "Bitfab: server has no persisted trace for #{missing.length} of " \
172
+ "#{completed_count} completed replay item(s) " \
173
+ "(test_run_id #{test_run_id}).#{server_count} Their trace_id is nil " \
174
+ "and verdicts cannot be persisted for them. Missing: #{missing.join(", ")}"
105
175
  end
106
- rescue => e
107
- warn "Bitfab: Failed to complete replay: #{e.message}"
108
- result_items.each { |item| item[:trace_id] = nil }
109
176
  end
110
177
 
111
178
  {
@@ -270,6 +337,11 @@ module Bitfab
270
337
  fn_result = nil
271
338
  fn_error = nil
272
339
  sdk_trace_id = SecureRandom.uuid
340
+ # Collects the root span's persistence threads (span uploads + trace
341
+ # completion). Joined below so this item's trace is on the server
342
+ # before run() calls complete_replay — otherwise the server's trace-ID
343
+ # mapping races the uploads and the item's trace_id comes back nil.
344
+ pending_persistence = []
273
345
 
274
346
  ReplayContext.with_context(
275
347
  test_run_id:,
@@ -277,7 +349,8 @@ module Bitfab
277
349
  input_source_trace_id:,
278
350
  trace_id: sdk_trace_id,
279
351
  mock_tree:,
280
- mock_strategy:
352
+ mock_strategy:,
353
+ pending_persistence:
281
354
  ) do
282
355
  fn_result = if kwargs.empty?
283
356
  receiver.send(method_name, *args)
@@ -288,6 +361,12 @@ module Bitfab
288
361
  fn_error = e.message
289
362
  end
290
363
 
364
+ # Wait for this item's trace (spans + completion) to be fully persisted
365
+ # before the item resolves. Runs on the error path too — a raising
366
+ # method still emits a root span whose trace must land before
367
+ # complete_replay. Joins are bounded by the HTTP layer's own timeouts.
368
+ pending_persistence.each(&:join)
369
+
291
370
  {
292
371
  input: args,
293
372
  result: fn_result,
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bitfab
4
- VERSION = "0.12.5"
4
+ VERSION = "0.15.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bitfab
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.5
4
+ version: 0.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Harvest Team