bitfab 0.10.5 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +56 -0
- data/lib/bitfab/client.rb +203 -4
- data/lib/bitfab/http_client.rb +11 -0
- data/lib/bitfab/replay.rb +106 -12
- data/lib/bitfab/span_context.rb +3 -2
- data/lib/bitfab/traceable.rb +26 -8
- data/lib/bitfab/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0e4ca1b81e502de48fe20835b4d072b05736ac5f4ff1a444d683627818648675
|
|
4
|
+
data.tar.gz: d7120ea5b06e8da151cd9ca26c9549b017458cc2d3b1fa9078ffa512c59d8606
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b2cbfda78dfd7d726174ace8910f507ea3982d68bb19c402fcaa50b84a20eeddbaf7373932fbec81c93116d8056b41eef876f132aa27913cade2937506299e37
|
|
7
|
+
data.tar.gz: 41fa08a3eb1a36f21af3358570012625564473eeafd847ab0073c33e012fb341c2b42463a1247e73c929679a1075220c096a4e35a2ac5c7e81151268f3cbbd3c
|
data/README.md
CHANGED
|
@@ -276,6 +276,62 @@ client = ExternalHttpClient.new
|
|
|
276
276
|
client.get("https://api.example.com")
|
|
277
277
|
```
|
|
278
278
|
|
|
279
|
+
### Fluent API: `client.get_function`
|
|
280
|
+
|
|
281
|
+
Bind a `trace_function_key` once and wrap multiple methods or classes against it. Mirrors `client.get_function` in the Python SDK and `client.getFunction` in TypeScript.
|
|
282
|
+
|
|
283
|
+
```ruby
|
|
284
|
+
fn = Bitfab.client.get_function("openai")
|
|
285
|
+
|
|
286
|
+
fn.wrap(OpenAI::Client, :chat, name: "Chat", type: "llm")
|
|
287
|
+
fn.wrap(OpenAI::Client, :embeddings, name: "Embed", type: "llm")
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
`#wrap` accepts the same options as `Bitfab::Traceable.wrap` (`name`, `type`, `mock_on_replay`), but the `trace_function_key` is fixed to the one bound on the `BitfabFunction`.
|
|
291
|
+
|
|
292
|
+
### Replay with Mock Strategies
|
|
293
|
+
|
|
294
|
+
Replay reruns historical traces through your code so you can compare outputs after an iteration. By default every child span runs real code — fine for offline traces, but expensive when children make paid LLM/API calls. Three strategies control whether child spans return their historical output instead of executing:
|
|
295
|
+
|
|
296
|
+
```ruby
|
|
297
|
+
# "none" (default): everything runs real code
|
|
298
|
+
client.replay(pipeline, :process, trace_function_key: "my-fn", mock: "none")
|
|
299
|
+
|
|
300
|
+
# "all": every child span returns its historical output
|
|
301
|
+
client.replay(pipeline, :process, trace_function_key: "my-fn", mock: "all")
|
|
302
|
+
|
|
303
|
+
# "marked": only spans tagged with `mock_on_replay: true` return historical output
|
|
304
|
+
client.replay(pipeline, :process, trace_function_key: "my-fn", mock: "marked")
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
Tag the spans you want mocked at definition time:
|
|
308
|
+
|
|
309
|
+
```ruby
|
|
310
|
+
class Pipeline
|
|
311
|
+
include Bitfab::Traceable
|
|
312
|
+
bitfab_function "my-fn"
|
|
313
|
+
|
|
314
|
+
# mock_on_replay: true → returns historical output under mock: "marked"
|
|
315
|
+
bitfab_span :call_llm, type: "llm", mock_on_replay: true
|
|
316
|
+
def call_llm(prompt)
|
|
317
|
+
# paid OpenAI call — skip during replay
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
bitfab_span :transform, type: "function"
|
|
321
|
+
def transform(text)
|
|
322
|
+
# cheap, deterministic — keep running real
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
bitfab_span :process, type: "agent"
|
|
326
|
+
def process(text)
|
|
327
|
+
call_llm(text)
|
|
328
|
+
transform(text)
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
Use `mock: "marked"` when you want to iterate on `process`'s logic without paying for the LLM call each run. Use `mock: "all"` for the cheapest possible replay (every child span returns its recorded output).
|
|
334
|
+
|
|
279
335
|
### Error Handling
|
|
280
336
|
|
|
281
337
|
Errors are automatically captured and re-raised:
|
data/lib/bitfab/client.rb
CHANGED
|
@@ -13,6 +13,12 @@ module Bitfab
|
|
|
13
13
|
class Client
|
|
14
14
|
SPAN_TYPES = %w[llm agent function guardrail handoff custom].freeze
|
|
15
15
|
|
|
16
|
+
# Sentinel returned by check_mock_replay when this span should run real
|
|
17
|
+
# code (no mock active, wrong strategy, or no matching historical entry).
|
|
18
|
+
# Using a sentinel rather than nil/false avoids confusing legitimate mocked
|
|
19
|
+
# outputs (which may themselves be nil or false).
|
|
20
|
+
MOCK_REPLAY_MISS = Object.new.freeze
|
|
21
|
+
|
|
16
22
|
attr_reader :api_key, :service_url, :enabled
|
|
17
23
|
|
|
18
24
|
def initialize(api_key:, service_url: nil, enabled: true)
|
|
@@ -40,19 +46,41 @@ module Bitfab
|
|
|
40
46
|
# code change being tested in this replay (stored on the experiment)
|
|
41
47
|
# @param code_change_files [Array<Hash>, nil] optional list of edited files,
|
|
42
48
|
# each as { path:, before:, after: } (use "" for new/deleted files)
|
|
49
|
+
# @param mock [String] mock strategy for child spans: "none" (default),
|
|
50
|
+
# "all", or "marked". "all" mocks every child span; "marked" only mocks
|
|
51
|
+
# spans declared with mock_on_replay: true.
|
|
43
52
|
# @return [Hash] with :items, :test_run_id, :test_run_url
|
|
44
53
|
def replay(receiver, method_name, trace_function_key:, limit: 5, trace_ids: nil, max_concurrency: 10,
|
|
45
|
-
code_change_description: nil, code_change_files: nil)
|
|
54
|
+
code_change_description: nil, code_change_files: nil, mock: "none")
|
|
46
55
|
Replay.run(
|
|
47
56
|
self, receiver, method_name,
|
|
48
57
|
trace_function_key:, limit:, trace_ids:, max_concurrency:,
|
|
49
|
-
code_change_description:, code_change_files:
|
|
58
|
+
code_change_description:, code_change_files:, mock:
|
|
50
59
|
)
|
|
51
60
|
end
|
|
52
61
|
|
|
62
|
+
# Get a function wrapper bound to a specific trace function key.
|
|
63
|
+
#
|
|
64
|
+
# This provides a fluent API for binding a trace_function_key once and
|
|
65
|
+
# then wrapping multiple methods or classes with that key. Mirrors
|
|
66
|
+
# `client.get_function(key)` in the Python SDK and `client.getFunction(key)`
|
|
67
|
+
# in the TypeScript SDK.
|
|
68
|
+
#
|
|
69
|
+
# @example
|
|
70
|
+
# fn = Bitfab.client.get_function("order-processing")
|
|
71
|
+
# fn.wrap(OrderService, :process_order, type: "function")
|
|
72
|
+
# fn.wrap(OrderService, :validate_order, type: "guardrail")
|
|
73
|
+
#
|
|
74
|
+
# @param trace_function_key [String]
|
|
75
|
+
# @return [BitfabFunction]
|
|
76
|
+
def get_function(trace_function_key)
|
|
77
|
+
BitfabFunction.new(self, trace_function_key)
|
|
78
|
+
end
|
|
79
|
+
|
|
53
80
|
# Execute a block inside a span context, sending trace data on completion.
|
|
54
81
|
# Called by Traceable — not intended for direct use.
|
|
55
|
-
def execute_span(trace_function_key:, span_name:, span_type:, function_name:, args:, kwargs
|
|
82
|
+
def execute_span(trace_function_key:, span_name:, span_type:, function_name:, args:, kwargs:,
|
|
83
|
+
mock_on_replay: false)
|
|
56
84
|
return yield unless @enabled
|
|
57
85
|
|
|
58
86
|
parent = SpanContext.current
|
|
@@ -65,16 +93,52 @@ module Bitfab
|
|
|
65
93
|
replay_ctx = ReplayContext.current
|
|
66
94
|
resolved_test_run_id = replay_ctx&.dig(:test_run_id)
|
|
67
95
|
resolved_input_source_span_id = replay_ctx&.dig(:input_source_span_id)
|
|
96
|
+
resolved_input_source_trace_id = replay_ctx&.dig(:input_source_trace_id)
|
|
68
97
|
|
|
69
98
|
# Register trace state for root spans
|
|
70
99
|
if is_root_span && !TraceState.get(trace_id)
|
|
71
|
-
TraceState.create(
|
|
100
|
+
TraceState.create(
|
|
101
|
+
trace_id,
|
|
102
|
+
test_run_id: resolved_test_run_id,
|
|
103
|
+
input_source_trace_id: resolved_input_source_trace_id
|
|
104
|
+
)
|
|
72
105
|
end
|
|
73
106
|
|
|
74
107
|
if is_root_span
|
|
75
108
|
@pending_span_mutex.synchronize { @pending_span_threads[trace_id] = [] }
|
|
76
109
|
end
|
|
77
110
|
|
|
111
|
+
# Advance the per-(key, name) call counter for any non-root span under
|
|
112
|
+
# an active mock tree, even when this span won't itself be mocked.
|
|
113
|
+
# Unmarked spans must consume an index so subsequent marked siblings
|
|
114
|
+
# line up with `build_mock_tree`'s sequential numbering for the same
|
|
115
|
+
# (key, name) pair. Different (key, name) pairs have independent
|
|
116
|
+
# counters — they cannot shift each other.
|
|
117
|
+
call_index = advance_mock_counter(replay_ctx, trace_function_key, span_name, is_root_span:)
|
|
118
|
+
if call_index
|
|
119
|
+
mocked_output = check_mock_replay(
|
|
120
|
+
replay_ctx, trace_function_key, span_name, call_index, mock_on_replay:
|
|
121
|
+
)
|
|
122
|
+
if mocked_output != MOCK_REPLAY_MISS
|
|
123
|
+
send_mocked_span(
|
|
124
|
+
trace_function_key:,
|
|
125
|
+
trace_id:,
|
|
126
|
+
span_id:,
|
|
127
|
+
parent_span_id:,
|
|
128
|
+
span_name:,
|
|
129
|
+
span_type:,
|
|
130
|
+
function_name:,
|
|
131
|
+
args:,
|
|
132
|
+
kwargs:,
|
|
133
|
+
mocked_output:,
|
|
134
|
+
started_at:,
|
|
135
|
+
test_run_id: resolved_test_run_id,
|
|
136
|
+
input_source_span_id: resolved_input_source_span_id
|
|
137
|
+
)
|
|
138
|
+
return mocked_output
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
78
142
|
result = nil
|
|
79
143
|
error = nil
|
|
80
144
|
span_contexts = nil
|
|
@@ -228,6 +292,9 @@ module Bitfab
|
|
|
228
292
|
if trace_state&.dig(:contexts)
|
|
229
293
|
raw_trace["contexts"] = trace_state[:contexts]
|
|
230
294
|
end
|
|
295
|
+
if trace_state&.dig(:input_source_trace_id)
|
|
296
|
+
raw_trace["input_source_trace_id"] = trace_state[:input_source_trace_id]
|
|
297
|
+
end
|
|
231
298
|
|
|
232
299
|
payload = {
|
|
233
300
|
"type" => "sdk-function",
|
|
@@ -296,5 +363,137 @@ module Bitfab
|
|
|
296
363
|
|
|
297
364
|
@http_client.send_external_span(payload) # Returns the background thread
|
|
298
365
|
end
|
|
366
|
+
|
|
367
|
+
# Advance the per-(key, name) call counter when this invocation is a
|
|
368
|
+
# non-root span under an active mock tree. Returns the call index this
|
|
369
|
+
# invocation owns, or nil when there's nothing to advance (root span, or
|
|
370
|
+
# no replay mock context). The counter MUST advance for every child span
|
|
371
|
+
# sharing the same (key, name) pair — including spans that won't be
|
|
372
|
+
# mocked — so unmarked spans don't silently shift subsequent marked
|
|
373
|
+
# spans' indices. Different (key, name) pairs have independent counters.
|
|
374
|
+
def advance_mock_counter(replay_ctx, trace_function_key, span_name, is_root_span:)
|
|
375
|
+
return nil if is_root_span
|
|
376
|
+
return nil unless replay_ctx&.dig(:mock_tree)
|
|
377
|
+
|
|
378
|
+
counters = replay_ctx[:call_counters]
|
|
379
|
+
counter_key = "#{trace_function_key}:#{span_name}"
|
|
380
|
+
call_index = counters[counter_key] || 0
|
|
381
|
+
counters[counter_key] = call_index + 1
|
|
382
|
+
call_index
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
# Decide whether this child span should be short-circuited to its recorded
|
|
386
|
+
# output. Returns MOCK_REPLAY_MISS when the span should run real code,
|
|
387
|
+
# otherwise returns the deserialized historical output.
|
|
388
|
+
def check_mock_replay(replay_ctx, trace_function_key, span_name, call_index, mock_on_replay:)
|
|
389
|
+
strategy = replay_ctx[:mock_strategy]
|
|
390
|
+
case strategy
|
|
391
|
+
when "marked"
|
|
392
|
+
return MOCK_REPLAY_MISS unless mock_on_replay
|
|
393
|
+
when "all"
|
|
394
|
+
# All non-root spans are eligible
|
|
395
|
+
else
|
|
396
|
+
return MOCK_REPLAY_MISS
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
mock_entry = replay_ctx[:mock_tree]["#{trace_function_key}:#{span_name}:#{call_index}"]
|
|
400
|
+
return MOCK_REPLAY_MISS unless mock_entry
|
|
401
|
+
|
|
402
|
+
output = mock_entry[:output]
|
|
403
|
+
output_meta = mock_entry[:output_meta]
|
|
404
|
+
|
|
405
|
+
# Type-preserving deserialization when the server included Ruby-side
|
|
406
|
+
# Marshal+Base64 metadata. Falls back to the JSON output silently — the
|
|
407
|
+
# spanTree endpoint currently returns superjson/jsonpickle-shaped meta,
|
|
408
|
+
# which Ruby cannot reconstruct.
|
|
409
|
+
if output_meta.is_a?(String) && !output_meta.empty?
|
|
410
|
+
begin
|
|
411
|
+
output = Serialize.unmarshal_value(output_meta)
|
|
412
|
+
rescue
|
|
413
|
+
# Fall through to the JSON output
|
|
414
|
+
end
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
output
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
# Record a span entry for a mocked invocation so the test run reflects the
|
|
421
|
+
# mocked execution. Mirrors send_span's payload shape but with the mocked
|
|
422
|
+
# output as the result and no error. The returned background thread is
|
|
423
|
+
# registered with @pending_span_threads so the root span's finalize joins
|
|
424
|
+
# it before sending trace completion; without this the trace completion
|
|
425
|
+
# can race ahead of the mocked span's HTTP send and the trace lands
|
|
426
|
+
# temporarily incomplete on the server.
|
|
427
|
+
def send_mocked_span(trace_function_key:, trace_id:, span_id:, parent_span_id:,
|
|
428
|
+
span_name:, span_type:, function_name:, args:, kwargs:, mocked_output:,
|
|
429
|
+
started_at:, test_run_id:, input_source_span_id:)
|
|
430
|
+
ended_at = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.%3NZ")
|
|
431
|
+
span_thread = send_span(
|
|
432
|
+
trace_function_key:,
|
|
433
|
+
trace_id:,
|
|
434
|
+
span_id:,
|
|
435
|
+
parent_span_id:,
|
|
436
|
+
span_name:,
|
|
437
|
+
span_type:,
|
|
438
|
+
function_name:,
|
|
439
|
+
contexts: nil,
|
|
440
|
+
prompt: nil,
|
|
441
|
+
args:,
|
|
442
|
+
kwargs:,
|
|
443
|
+
result: mocked_output,
|
|
444
|
+
error: nil,
|
|
445
|
+
started_at:,
|
|
446
|
+
ended_at:,
|
|
447
|
+
test_run_id:,
|
|
448
|
+
input_source_span_id:
|
|
449
|
+
)
|
|
450
|
+
# Mocked spans are always non-root (advance_mock_counter returns nil for
|
|
451
|
+
# root spans, so check_mock_replay never short-circuits them), so the
|
|
452
|
+
# thread always belongs in the parent's pending list, never standalone.
|
|
453
|
+
@pending_span_mutex.synchronize do
|
|
454
|
+
@pending_span_threads[trace_id] << span_thread if span_thread && @pending_span_threads.key?(trace_id)
|
|
455
|
+
end
|
|
456
|
+
rescue Exception # rubocop:disable Lint/RescueException
|
|
457
|
+
# Never crash the host app — mocked span recording is best-effort
|
|
458
|
+
end
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
# Fluent wrapper bound to a single trace_function_key. Mirrors
|
|
462
|
+
# `BitfabFunction` in the Python SDK and `BitfabFunction` in the TypeScript
|
|
463
|
+
# SDK — lets callers wrap multiple methods without repeating the key.
|
|
464
|
+
class BitfabFunction
|
|
465
|
+
attr_reader :trace_function_key
|
|
466
|
+
|
|
467
|
+
def initialize(client, trace_function_key)
|
|
468
|
+
@client = client
|
|
469
|
+
@trace_function_key = trace_function_key
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
# Wrap an existing method on a class with span tracing, binding this
|
|
473
|
+
# function's trace_function_key.
|
|
474
|
+
#
|
|
475
|
+
# Routes spans through the client this function was created from (matches
|
|
476
|
+
# Python's `BitfabFunction.span()` using `self._client.span(...)` and
|
|
477
|
+
# TypeScript's `BitfabFunction.withSpan()` using `this.client.withSpan(...)`),
|
|
478
|
+
# so non-global `Bitfab::Client` instances don't silently fall back to
|
|
479
|
+
# `Bitfab.client`.
|
|
480
|
+
#
|
|
481
|
+
# @example
|
|
482
|
+
# fn = Bitfab.client.get_function("openai")
|
|
483
|
+
# fn.wrap(OpenAI::Client, :chat, name: "Chat", type: "llm")
|
|
484
|
+
#
|
|
485
|
+
# @param klass [Class, Module] the class to wrap
|
|
486
|
+
# @param method_name [Symbol] the method to wrap
|
|
487
|
+
# @param name [String, nil] explicit span name (defaults to method name)
|
|
488
|
+
# @param type [String] span type
|
|
489
|
+
# @param mock_on_replay [Boolean] mark this span for the "marked" mock strategy
|
|
490
|
+
def wrap(klass, method_name, name: nil, type: "custom", mock_on_replay: false)
|
|
491
|
+
Bitfab::Traceable.wrap(
|
|
492
|
+
klass, method_name,
|
|
493
|
+
trace_function_key: @trace_function_key,
|
|
494
|
+
name:, type:, mock_on_replay:,
|
|
495
|
+
client: @client
|
|
496
|
+
)
|
|
497
|
+
end
|
|
299
498
|
end
|
|
300
499
|
end
|
data/lib/bitfab/http_client.rb
CHANGED
|
@@ -120,6 +120,17 @@ module Bitfab
|
|
|
120
120
|
get("/api/sdk/externalSpans/#{span_id}", timeout: 30)
|
|
121
121
|
end
|
|
122
122
|
|
|
123
|
+
# Fetch the span tree rooted at an external span. Blocking GET request.
|
|
124
|
+
# Used by replay when a mock strategy is active so child spans can be
|
|
125
|
+
# matched against their historical outputs.
|
|
126
|
+
#
|
|
127
|
+
# Returns a hash shaped { "root" => SpanTreeNode } where each node has
|
|
128
|
+
# sourceSpanId, traceFunctionKey, spanName, type, output, optional
|
|
129
|
+
# outputMeta, and children.
|
|
130
|
+
def get_span_tree(external_span_id)
|
|
131
|
+
get("/api/sdk/replay/spanTree/#{external_span_id}", timeout: 30)
|
|
132
|
+
end
|
|
133
|
+
|
|
123
134
|
# Mark a replay test run as completed. Blocking call.
|
|
124
135
|
def complete_replay(test_run_id)
|
|
125
136
|
request("/api/sdk/replay/complete", {"testRunId" => test_run_id}, timeout: 30)
|
data/lib/bitfab/replay.rb
CHANGED
|
@@ -4,6 +4,14 @@ require_relative "constants"
|
|
|
4
4
|
require_relative "serialize"
|
|
5
5
|
|
|
6
6
|
module Bitfab
|
|
7
|
+
# Replay mock strategies. Mirrors the Python and TypeScript SDKs.
|
|
8
|
+
#
|
|
9
|
+
# - "none" — every child span runs real code (default)
|
|
10
|
+
# - "all" — every child span returns its historical output
|
|
11
|
+
# - "marked" — only spans declared with mock_on_replay: true return historical
|
|
12
|
+
# output; everything else runs real code
|
|
13
|
+
MOCK_STRATEGIES = %w[none all marked].freeze
|
|
14
|
+
|
|
7
15
|
# Thread-local replay context management.
|
|
8
16
|
module ReplayContext
|
|
9
17
|
module_function
|
|
@@ -14,12 +22,26 @@ module Bitfab
|
|
|
14
22
|
|
|
15
23
|
# Execute a block with replay context set on the current thread.
|
|
16
24
|
# The context is automatically cleared when the block completes.
|
|
17
|
-
|
|
25
|
+
#
|
|
26
|
+
# @param test_run_id [String]
|
|
27
|
+
# @param input_source_span_id [String, nil]
|
|
28
|
+
# @param input_source_trace_id [String, nil]
|
|
29
|
+
# @param mock_tree [Hash{String => Hash}, nil] keyed by "#{key}:#{index}"
|
|
30
|
+
# @param mock_strategy [String, nil] one of MOCK_STRATEGIES
|
|
31
|
+
def with_context(test_run_id:, input_source_span_id: nil, input_source_trace_id: nil,
|
|
32
|
+
mock_tree: nil, mock_strategy: nil)
|
|
18
33
|
previous = Thread.current[REPLAY_CONTEXT_KEY]
|
|
19
|
-
|
|
34
|
+
ctx = {
|
|
20
35
|
test_run_id:,
|
|
21
|
-
input_source_span_id
|
|
36
|
+
input_source_span_id:,
|
|
37
|
+
input_source_trace_id:
|
|
22
38
|
}
|
|
39
|
+
if mock_tree
|
|
40
|
+
ctx[:mock_tree] = mock_tree
|
|
41
|
+
ctx[:mock_strategy] = mock_strategy || "none"
|
|
42
|
+
ctx[:call_counters] = {}
|
|
43
|
+
end
|
|
44
|
+
Thread.current[REPLAY_CONTEXT_KEY] = ctx
|
|
23
45
|
yield
|
|
24
46
|
ensure
|
|
25
47
|
Thread.current[REPLAY_CONTEXT_KEY] = previous
|
|
@@ -46,9 +68,16 @@ module Bitfab
|
|
|
46
68
|
# code change being tested in this replay (stored on the experiment)
|
|
47
69
|
# @param code_change_files [Array<Hash>, nil] optional list of edited files,
|
|
48
70
|
# each as { path:, before:, after: } (empty string for new/deleted files)
|
|
71
|
+
# @param mock [String] mock strategy for child spans: "none" (default),
|
|
72
|
+
# "all", or "marked". "all" mocks every child span; "marked" only mocks
|
|
73
|
+
# spans declared with mock_on_replay: true.
|
|
49
74
|
# @return [Hash] with :items, :test_run_id, :test_run_url
|
|
50
75
|
def run(client, receiver, method_name, trace_function_key:, limit: 5, trace_ids: nil, max_concurrency: 10,
|
|
51
|
-
code_change_description: nil, code_change_files: nil)
|
|
76
|
+
code_change_description: nil, code_change_files: nil, mock: "none")
|
|
77
|
+
unless MOCK_STRATEGIES.include?(mock.to_s)
|
|
78
|
+
raise ArgumentError, "Invalid mock strategy '#{mock}'. Must be one of: #{MOCK_STRATEGIES.join(", ")}"
|
|
79
|
+
end
|
|
80
|
+
|
|
52
81
|
http_client = client.instance_variable_get(:@http_client)
|
|
53
82
|
|
|
54
83
|
replay_data = http_client.start_replay(
|
|
@@ -63,7 +92,7 @@ module Bitfab
|
|
|
63
92
|
server_items = replay_data["items"] || []
|
|
64
93
|
|
|
65
94
|
result_items = if server_items.any?
|
|
66
|
-
process_items(http_client, server_items, receiver, method_name, test_run_id, max_concurrency)
|
|
95
|
+
process_items(http_client, server_items, receiver, method_name, test_run_id, max_concurrency, mock.to_s)
|
|
67
96
|
else
|
|
68
97
|
[]
|
|
69
98
|
end
|
|
@@ -84,11 +113,13 @@ module Bitfab
|
|
|
84
113
|
end
|
|
85
114
|
|
|
86
115
|
# Process all replay items, optionally in parallel using threads.
|
|
87
|
-
def process_items(http_client, server_items, receiver, method_name, test_run_id, max_concurrency)
|
|
116
|
+
def process_items(http_client, server_items, receiver, method_name, test_run_id, max_concurrency, mock_strategy)
|
|
88
117
|
concurrency = max_concurrency || server_items.length
|
|
89
118
|
|
|
90
119
|
if concurrency <= 1
|
|
91
|
-
server_items.map
|
|
120
|
+
server_items.map do |item|
|
|
121
|
+
process_single_item(http_client, item, receiver, method_name, test_run_id, mock_strategy)
|
|
122
|
+
end
|
|
92
123
|
else
|
|
93
124
|
results_mutex = Mutex.new
|
|
94
125
|
results = []
|
|
@@ -101,7 +132,7 @@ module Bitfab
|
|
|
101
132
|
item, idx = work_mutex.synchronize { work_queue.shift }
|
|
102
133
|
break unless item
|
|
103
134
|
|
|
104
|
-
result = process_single_item(http_client, item, receiver, method_name, test_run_id)
|
|
135
|
+
result = process_single_item(http_client, item, receiver, method_name, test_run_id, mock_strategy)
|
|
105
136
|
results_mutex.synchronize { results[idx] = result }
|
|
106
137
|
end
|
|
107
138
|
end
|
|
@@ -113,11 +144,67 @@ module Bitfab
|
|
|
113
144
|
end
|
|
114
145
|
|
|
115
146
|
# Fetch span data and execute a single replay item.
|
|
116
|
-
def process_single_item(http_client, server_item, receiver, method_name, test_run_id)
|
|
147
|
+
def process_single_item(http_client, server_item, receiver, method_name, test_run_id, mock_strategy)
|
|
117
148
|
span = http_client.get_external_span(server_item["externalSpanId"])
|
|
118
149
|
item_data = extract_span_data(span)
|
|
119
150
|
metrics = extract_server_item_metrics(server_item)
|
|
120
|
-
|
|
151
|
+
|
|
152
|
+
mock_tree = nil
|
|
153
|
+
if mock_strategy == "all" || mock_strategy == "marked"
|
|
154
|
+
tree = http_client.get_span_tree(server_item["externalSpanId"])
|
|
155
|
+
mock_tree = build_mock_tree(tree["root"] || {})
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
execute_item(
|
|
159
|
+
item_data,
|
|
160
|
+
receiver,
|
|
161
|
+
method_name,
|
|
162
|
+
test_run_id,
|
|
163
|
+
span["id"],
|
|
164
|
+
metrics,
|
|
165
|
+
input_source_trace_id: span["externalTraceId"],
|
|
166
|
+
mock_strategy:,
|
|
167
|
+
mock_tree:
|
|
168
|
+
)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Walk the children of a root span tree node depth-first and build a
|
|
172
|
+
# lookup keyed by "#{trace_function_key}:#{span_name}:#{call_index}".
|
|
173
|
+
#
|
|
174
|
+
# The root node itself is excluded — at replay time the runtime root span
|
|
175
|
+
# never queries the mock tree.
|
|
176
|
+
#
|
|
177
|
+
# The compound (key, name) match disambiguates same-key spans that come
|
|
178
|
+
# from the fluent `client.get_function(key).wrap(...)` pattern: every
|
|
179
|
+
# wrapped method shares trace_function_key but differs in span_name. The
|
|
180
|
+
# counter is per-(key, name) pair so repeated same-name calls (including
|
|
181
|
+
# recursion) still order by occurrence. Mirrors the Python and TypeScript
|
|
182
|
+
# SDKs after HVT-2078 — keying by trace_function_key alone caused the
|
|
183
|
+
# wrong historical output for fluent-API span sets.
|
|
184
|
+
def build_mock_tree(root)
|
|
185
|
+
spans = {}
|
|
186
|
+
counters = {}
|
|
187
|
+
|
|
188
|
+
walk = lambda do |node|
|
|
189
|
+
key = node["traceFunctionKey"]
|
|
190
|
+
if key && !key.empty?
|
|
191
|
+
name = node["spanName"]
|
|
192
|
+
name = key if name.nil? || name.empty?
|
|
193
|
+
counter_key = "#{key}:#{name}"
|
|
194
|
+
index = counters[counter_key] || 0
|
|
195
|
+
counters[counter_key] = index + 1
|
|
196
|
+
spans["#{counter_key}:#{index}"] = {
|
|
197
|
+
source_span_id: node["sourceSpanId"],
|
|
198
|
+
output: node["output"],
|
|
199
|
+
output_meta: node["outputMeta"]
|
|
200
|
+
}
|
|
201
|
+
end
|
|
202
|
+
(node["children"] || []).each { |child| walk.call(child) }
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
(root["children"] || []).each { |child| walk.call(child) }
|
|
206
|
+
|
|
207
|
+
spans
|
|
121
208
|
end
|
|
122
209
|
|
|
123
210
|
# Extract input/output data from an external span's rawData.
|
|
@@ -155,13 +242,20 @@ module Bitfab
|
|
|
155
242
|
end
|
|
156
243
|
|
|
157
244
|
# Execute a single replay item: deserialize inputs, call method with replay context.
|
|
158
|
-
def execute_item(item, receiver, method_name, test_run_id, input_source_span_id = nil, metrics = {}
|
|
245
|
+
def execute_item(item, receiver, method_name, test_run_id, input_source_span_id = nil, metrics = {},
|
|
246
|
+
input_source_trace_id: nil, mock_strategy: "none", mock_tree: nil)
|
|
159
247
|
args, kwargs = Serialize.deserialize_inputs(item)
|
|
160
248
|
|
|
161
249
|
fn_result = nil
|
|
162
250
|
fn_error = nil
|
|
163
251
|
|
|
164
|
-
ReplayContext.with_context(
|
|
252
|
+
ReplayContext.with_context(
|
|
253
|
+
test_run_id:,
|
|
254
|
+
input_source_span_id:,
|
|
255
|
+
input_source_trace_id:,
|
|
256
|
+
mock_tree:,
|
|
257
|
+
mock_strategy:
|
|
258
|
+
) do
|
|
165
259
|
fn_result = if kwargs.empty?
|
|
166
260
|
receiver.send(method_name, *args)
|
|
167
261
|
else
|
data/lib/bitfab/span_context.rb
CHANGED
|
@@ -131,12 +131,13 @@ module Bitfab
|
|
|
131
131
|
@states_mutex.synchronize { @states[trace_id] }
|
|
132
132
|
end
|
|
133
133
|
|
|
134
|
-
def create(trace_id, test_run_id: nil)
|
|
134
|
+
def create(trace_id, test_run_id: nil, input_source_trace_id: nil)
|
|
135
135
|
@states_mutex.synchronize do
|
|
136
136
|
@states[trace_id] ||= {
|
|
137
137
|
trace_id:,
|
|
138
138
|
started_at: Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.%3NZ"),
|
|
139
|
-
test_run_id
|
|
139
|
+
test_run_id:,
|
|
140
|
+
input_source_trace_id:
|
|
140
141
|
}.compact
|
|
141
142
|
end
|
|
142
143
|
end
|
data/lib/bitfab/traceable.rb
CHANGED
|
@@ -38,19 +38,32 @@ module Bitfab
|
|
|
38
38
|
# @param trace_function_key [String] the trace function key
|
|
39
39
|
# @param name [String, nil] explicit span name (defaults to method name)
|
|
40
40
|
# @param type [String] span type: llm, agent, function, guardrail, handoff, custom
|
|
41
|
-
|
|
41
|
+
# @param mock_on_replay [Boolean] mark this span for the "marked" mock strategy.
|
|
42
|
+
# When true, `client.replay(... mock: "marked")` returns this span's
|
|
43
|
+
# historical output instead of executing the wrapped method.
|
|
44
|
+
# @param client [Bitfab::Client, nil] route spans through this specific client
|
|
45
|
+
# instead of the global `Bitfab.client`. When nil (default), the wrapper
|
|
46
|
+
# resolves `Bitfab.client` at each call (so `Bitfab.configure` / `reset!`
|
|
47
|
+
# between calls keeps working). Used by `Bitfab::Client#get_function` to
|
|
48
|
+
# preserve the bound client through the fluent wrapper, matching Python's
|
|
49
|
+
# `BitfabFunction.span()` and TypeScript's `BitfabFunction.withSpan()`.
|
|
50
|
+
def self.wrap(klass, method_name, trace_function_key:, name: nil, type: "custom",
|
|
51
|
+
mock_on_replay: false, client: nil)
|
|
42
52
|
span_name = name || method_name.to_s
|
|
43
53
|
method_name_str = method_name.to_s
|
|
54
|
+
bound_client = client
|
|
44
55
|
|
|
45
56
|
wrapper = Module.new do
|
|
46
57
|
define_method(method_name) do |*args, **kwargs, &block|
|
|
47
|
-
Bitfab.client
|
|
58
|
+
target_client = bound_client || Bitfab.client
|
|
59
|
+
target_client.send(:execute_span,
|
|
48
60
|
trace_function_key:,
|
|
49
61
|
span_name:,
|
|
50
62
|
span_type: type,
|
|
51
63
|
function_name: method_name_str,
|
|
52
64
|
args:,
|
|
53
|
-
kwargs
|
|
65
|
+
kwargs:,
|
|
66
|
+
mock_on_replay:) do
|
|
54
67
|
super(*args, **kwargs, &block)
|
|
55
68
|
end
|
|
56
69
|
end
|
|
@@ -85,7 +98,10 @@ module Bitfab
|
|
|
85
98
|
# @param trace_function_key [String, nil] trace function key (overrides class-level bitfab_function)
|
|
86
99
|
# @param name [String, nil] explicit span name (defaults to method name)
|
|
87
100
|
# @param type [String] span type: llm, agent, function, guardrail, handoff, custom
|
|
88
|
-
|
|
101
|
+
# @param mock_on_replay [Boolean] mark this span for the "marked" mock strategy.
|
|
102
|
+
# When true, `client.replay(... mock: "marked")` returns this span's
|
|
103
|
+
# historical output instead of executing the wrapped method.
|
|
104
|
+
def bitfab_span(method_name, trace_function_key: nil, name: nil, type: "custom", mock_on_replay: false)
|
|
89
105
|
trace_function_key ||= @bitfab_function_key
|
|
90
106
|
unless trace_function_key
|
|
91
107
|
raise "No trace function key provided. Pass `trace_function_key:` to `bitfab_span` " \
|
|
@@ -94,14 +110,15 @@ module Bitfab
|
|
|
94
110
|
|
|
95
111
|
# If the method already exists (inline or after-method style), wrap it immediately
|
|
96
112
|
if method_defined?(method_name) || private_method_defined?(method_name)
|
|
97
|
-
_bitfab_wrap_method(method_name, trace_function_key:, name:, type:)
|
|
113
|
+
_bitfab_wrap_method(method_name, trace_function_key:, name:, type:, mock_on_replay:)
|
|
98
114
|
else
|
|
99
115
|
# Method doesn't exist yet (before-method style) — register for method_added hook
|
|
100
116
|
@_bitfab_pending_spans ||= {}
|
|
101
117
|
@_bitfab_pending_spans[method_name] = {
|
|
102
118
|
trace_function_key:,
|
|
103
119
|
name:,
|
|
104
|
-
type
|
|
120
|
+
type:,
|
|
121
|
+
mock_on_replay:
|
|
105
122
|
}
|
|
106
123
|
end
|
|
107
124
|
end
|
|
@@ -116,7 +133,7 @@ module Bitfab
|
|
|
116
133
|
_bitfab_wrap_method(method_name, **config)
|
|
117
134
|
end
|
|
118
135
|
|
|
119
|
-
def _bitfab_wrap_method(method_name, trace_function_key:, name: nil, type: "custom")
|
|
136
|
+
def _bitfab_wrap_method(method_name, trace_function_key:, name: nil, type: "custom", mock_on_replay: false)
|
|
120
137
|
span_name = name || method_name.to_s
|
|
121
138
|
method_name_str = method_name.to_s
|
|
122
139
|
|
|
@@ -128,7 +145,8 @@ module Bitfab
|
|
|
128
145
|
span_type: type,
|
|
129
146
|
function_name: method_name_str,
|
|
130
147
|
args:,
|
|
131
|
-
kwargs
|
|
148
|
+
kwargs:,
|
|
149
|
+
mock_on_replay:) do
|
|
132
150
|
super(*args, **kwargs, &block)
|
|
133
151
|
end
|
|
134
152
|
end
|
data/lib/bitfab/version.rb
CHANGED