braintrust 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,337 @@
1
+ """
2
+ SpanCache provides a disk-based cache for span data, allowing
3
+ scorers to read spans without making server round-trips when possible.
4
+
5
+ Spans are stored on disk to minimize memory usage during evaluations.
6
+ The cache file is automatically cleaned up when dispose() is called.
7
+ """
8
+
9
+ import atexit
10
+ import json
11
+ import os
12
+ import tempfile
13
+ import uuid
14
+ from typing import Any, Optional
15
+
16
+ from braintrust.util import merge_dicts
17
+
18
+ # Global registry of active span caches for process exit cleanup
19
+ _active_caches: set["SpanCache"] = set()
20
+ _exit_handlers_registered = False
21
+
22
+
23
+ class CachedSpan:
24
+ """Cached span data structure."""
25
+
26
+ def __init__(
27
+ self,
28
+ span_id: str,
29
+ input: Optional[Any] = None,
30
+ output: Optional[Any] = None,
31
+ metadata: Optional[dict[str, Any]] = None,
32
+ span_parents: Optional[list[str]] = None,
33
+ span_attributes: Optional[dict[str, Any]] = None,
34
+ ):
35
+ self.span_id = span_id
36
+ self.input = input
37
+ self.output = output
38
+ self.metadata = metadata
39
+ self.span_parents = span_parents
40
+ self.span_attributes = span_attributes
41
+
42
+ def to_dict(self) -> dict[str, Any]:
43
+ """Convert to dictionary for serialization."""
44
+ result = {"span_id": self.span_id}
45
+ if self.input is not None:
46
+ result["input"] = self.input
47
+ if self.output is not None:
48
+ result["output"] = self.output
49
+ if self.metadata is not None:
50
+ result["metadata"] = self.metadata
51
+ if self.span_parents is not None:
52
+ result["span_parents"] = self.span_parents
53
+ if self.span_attributes is not None:
54
+ result["span_attributes"] = self.span_attributes
55
+ return result
56
+
57
+ @classmethod
58
+ def from_dict(cls, data: dict[str, Any]) -> "CachedSpan":
59
+ """Create from dictionary."""
60
+ return cls(
61
+ span_id=data["span_id"],
62
+ input=data.get("input"),
63
+ output=data.get("output"),
64
+ metadata=data.get("metadata"),
65
+ span_parents=data.get("span_parents"),
66
+ span_attributes=data.get("span_attributes"),
67
+ )
68
+
69
+
70
+ class DiskSpanRecord:
71
+ """Record structure for disk storage."""
72
+
73
+ def __init__(self, root_span_id: str, span_id: str, data: CachedSpan):
74
+ self.root_span_id = root_span_id
75
+ self.span_id = span_id
76
+ self.data = data
77
+
78
+ def to_dict(self) -> dict[str, Any]:
79
+ """Convert to dictionary for JSON serialization."""
80
+ return {
81
+ "rootSpanId": self.root_span_id,
82
+ "spanId": self.span_id,
83
+ "data": self.data.to_dict(),
84
+ }
85
+
86
+ @classmethod
87
+ def from_dict(cls, data: dict[str, Any]) -> "DiskSpanRecord":
88
+ """Create from dictionary."""
89
+ return cls(
90
+ root_span_id=data["rootSpanId"],
91
+ span_id=data["spanId"],
92
+ data=CachedSpan.from_dict(data["data"]),
93
+ )
94
+
95
+
96
+ class SpanCache:
97
+ """
98
+ Disk-based cache for span data, keyed by rootSpanId.
99
+
100
+ This cache writes spans to a temporary file to minimize memory usage.
101
+ It uses append-only writes and reads the full file when querying.
102
+ """
103
+
104
+ def __init__(self, disabled: bool = False):
105
+ self._cache_file_path: Optional[str] = None
106
+ self._initialized = False
107
+ # Tracks whether the cache was explicitly disabled (via constructor or disable())
108
+ self._explicitly_disabled = disabled
109
+ # Tracks whether the cache has been enabled (for evals only)
110
+ self._enabled = False
111
+ # Reference count of active evals using this cache
112
+ self._active_eval_count = 0
113
+ # Small in-memory index tracking which rootSpanIds have data
114
+ self._root_span_index: set[str] = set()
115
+ # Buffer for pending writes
116
+ self._write_buffer: list[DiskSpanRecord] = []
117
+
118
+ def disable(self) -> None:
119
+ """
120
+ Disable the cache at runtime. This is called automatically when
121
+ OTEL is registered, since OTEL spans won't be in the cache.
122
+ """
123
+ self._explicitly_disabled = True
124
+
125
+ def start(self) -> None:
126
+ """
127
+ Start caching spans for use during evaluations.
128
+ This only starts caching if the cache wasn't permanently disabled.
129
+ Called by Eval() to turn on caching for the duration of the eval.
130
+ Uses reference counting to support parallel evals.
131
+ """
132
+ if not self._explicitly_disabled:
133
+ self._enabled = True
134
+ self._active_eval_count += 1
135
+
136
+ def stop(self) -> None:
137
+ """
138
+ Stop caching spans and return to the default disabled state.
139
+ Unlike disable(), this allows start() to work again for future evals.
140
+ Called after an eval completes to return to the default state.
141
+ Uses reference counting - only disables when all evals are complete.
142
+ """
143
+ self._active_eval_count -= 1
144
+ if self._active_eval_count <= 0:
145
+ self._active_eval_count = 0
146
+ self._enabled = False
147
+
148
+ @property
149
+ def disabled(self) -> bool:
150
+ """Check if cache is disabled."""
151
+ return self._explicitly_disabled or not self._enabled
152
+
153
+ def _ensure_initialized(self) -> None:
154
+ """Initialize the cache file if not already done."""
155
+ if self.disabled or self._initialized:
156
+ return
157
+
158
+ try:
159
+ # Create temporary file
160
+ unique_id = f"{int(os.times().elapsed * 1000000)}-{uuid.uuid4().hex[:8]}"
161
+ self._cache_file_path = os.path.join(tempfile.gettempdir(), f"braintrust-span-cache-{unique_id}.jsonl")
162
+
163
+ # Create the file
164
+ with open(self._cache_file_path, "w") as f:
165
+ pass
166
+
167
+ self._initialized = True
168
+ self._register_exit_handler()
169
+ except Exception:
170
+ # Silently fail if filesystem is unavailable - cache is best-effort
171
+ # This can happen if temp directory is not writable or disk is full
172
+ self._explicitly_disabled = True
173
+ return
174
+
175
+ def _register_exit_handler(self) -> None:
176
+ """Register a handler to clean up the temp file on process exit."""
177
+ global _exit_handlers_registered
178
+ _active_caches.add(self)
179
+
180
+ if not _exit_handlers_registered:
181
+ _exit_handlers_registered = True
182
+
183
+ def cleanup_all_caches():
184
+ """Clean up all active caches."""
185
+ for cache in _active_caches:
186
+ if cache._cache_file_path and os.path.exists(cache._cache_file_path):
187
+ try:
188
+ os.unlink(cache._cache_file_path)
189
+ except Exception:
190
+ # Ignore cleanup errors - file might not exist or already deleted
191
+ pass
192
+
193
+ atexit.register(cleanup_all_caches)
194
+
195
+ def queue_write(self, root_span_id: str, span_id: str, data: CachedSpan) -> None:
196
+ """
197
+ Write a span to the cache.
198
+ In Python, we write synchronously (no async queue like in TS).
199
+ """
200
+ if self.disabled:
201
+ return
202
+
203
+ self._ensure_initialized()
204
+
205
+ record = DiskSpanRecord(root_span_id, span_id, data)
206
+ self._write_buffer.append(record)
207
+ self._root_span_index.add(root_span_id)
208
+
209
+ # Write to disk immediately (simplified compared to TS async version)
210
+ self._flush_write_buffer()
211
+
212
+ def _flush_write_buffer(self) -> None:
213
+ """Flush the write buffer to disk."""
214
+ if not self._write_buffer or not self._cache_file_path:
215
+ return
216
+
217
+ try:
218
+ with open(self._cache_file_path, "a") as f:
219
+ for record in self._write_buffer:
220
+ f.write(json.dumps(record.to_dict()) + "\n")
221
+ self._write_buffer.clear()
222
+ except Exception:
223
+ # Silently fail if write fails - cache is best-effort
224
+ # This can happen if disk is full or file permissions changed
225
+ pass
226
+
227
+ def get_by_root_span_id(self, root_span_id: str) -> Optional[list[CachedSpan]]:
228
+ """
229
+ Get all cached spans for a given rootSpanId.
230
+
231
+ This reads the file and merges all records for the given rootSpanId.
232
+
233
+ Args:
234
+ root_span_id: The root span ID to look up
235
+
236
+ Returns:
237
+ List of cached spans, or None if not in cache
238
+ """
239
+ if self.disabled:
240
+ return None
241
+
242
+ # Quick check using in-memory index
243
+ if root_span_id not in self._root_span_index:
244
+ return None
245
+
246
+ # Accumulate spans by spanId, merging updates
247
+ span_map: dict[str, dict[str, Any]] = {}
248
+
249
+ # Read from disk if initialized
250
+ if self._initialized and self._cache_file_path and os.path.exists(self._cache_file_path):
251
+ try:
252
+ with open(self._cache_file_path, "r") as f:
253
+ for line in f:
254
+ line = line.strip()
255
+ if not line:
256
+ continue
257
+ try:
258
+ record_dict = json.loads(line)
259
+ record = DiskSpanRecord.from_dict(record_dict)
260
+ if record.root_span_id != root_span_id:
261
+ continue
262
+
263
+ if record.span_id in span_map:
264
+ merge_dicts(span_map[record.span_id], record.data.to_dict())
265
+ else:
266
+ span_map[record.span_id] = record.data.to_dict()
267
+ except Exception:
268
+ # Skip malformed lines - may occur if file was corrupted or truncated
269
+ pass
270
+ except Exception:
271
+ # Continue to check buffer even if disk read fails
272
+ # This can happen if file was deleted or permissions changed
273
+ pass
274
+
275
+ # Also check the in-memory write buffer for unflushed data
276
+ for record in self._write_buffer:
277
+ if record.root_span_id != root_span_id:
278
+ continue
279
+ if record.span_id in span_map:
280
+ merge_dicts(span_map[record.span_id], record.data.to_dict())
281
+ else:
282
+ span_map[record.span_id] = record.data.to_dict()
283
+
284
+ if not span_map:
285
+ return None
286
+
287
+ return [CachedSpan.from_dict(data) for data in span_map.values()]
288
+
289
+ def has(self, root_span_id: str) -> bool:
290
+ """Check if a rootSpanId has cached data."""
291
+ if self.disabled:
292
+ return False
293
+ return root_span_id in self._root_span_index
294
+
295
+ def clear(self, root_span_id: str) -> None:
296
+ """
297
+ Clear all cached spans for a given rootSpanId.
298
+ Note: This only removes from the index. The data remains in the file
299
+ but will be ignored on reads.
300
+ """
301
+ self._root_span_index.discard(root_span_id)
302
+
303
+ def clear_all(self) -> None:
304
+ """Clear all cached data and remove the cache file."""
305
+ self._root_span_index.clear()
306
+ self.dispose()
307
+
308
+ @property
309
+ def size(self) -> int:
310
+ """Get the number of root spans currently tracked."""
311
+ return len(self._root_span_index)
312
+
313
+ def dispose(self) -> None:
314
+ """
315
+ Clean up the cache file. Call this when the eval is complete.
316
+ Only performs cleanup when all active evals have completed (refcount = 0).
317
+ """
318
+ # Only dispose if no active evals are using this cache
319
+ if self._active_eval_count > 0:
320
+ return
321
+
322
+ # Remove from global registry
323
+ _active_caches.discard(self)
324
+
325
+ # Clear pending writes
326
+ self._write_buffer.clear()
327
+
328
+ if self._cache_file_path and os.path.exists(self._cache_file_path):
329
+ try:
330
+ os.unlink(self._cache_file_path)
331
+ except Exception:
332
+ # Ignore cleanup errors - file might not exist or already deleted
333
+ pass
334
+ self._cache_file_path = None
335
+
336
+ self._initialized = False
337
+ self._root_span_index.clear()
@@ -38,6 +38,27 @@ class SpanObjectTypeV3(Enum):
38
38
  }[self]
39
39
 
40
40
 
41
+ def span_object_type_v3_to_typed_string(
42
+ object_type: SpanObjectTypeV3,
43
+ ) -> str:
44
+ """Convert SpanObjectTypeV3 enum to typed string literal.
45
+
46
+ Args:
47
+ object_type: The SpanObjectTypeV3 enum value
48
+
49
+ Returns:
50
+ One of "experiment", "project_logs", or "playground_logs"
51
+ """
52
+ if object_type == SpanObjectTypeV3.EXPERIMENT:
53
+ return "experiment"
54
+ elif object_type == SpanObjectTypeV3.PROJECT_LOGS:
55
+ return "project_logs"
56
+ elif object_type == SpanObjectTypeV3.PLAYGROUND_LOGS:
57
+ return "playground_logs"
58
+ else:
59
+ raise ValueError(f"Unknown SpanObjectTypeV3: {object_type}")
60
+
61
+
41
62
  class InternalSpanComponentUUIDFields(Enum):
42
63
  OBJECT_ID = 1
43
64
  ROW_ID = 2
braintrust/test_logger.py CHANGED
@@ -59,6 +59,33 @@ class TestInit(TestCase):
59
59
 
60
60
  assert str(cm.exception) == "Cannot open an experiment without specifying its name"
61
61
 
62
+ def test_init_with_dataset_id_only(self):
63
+ """Test that init accepts dataset={'id': '...'} parameter"""
64
+ # Test the logic that extracts dataset_id from the dict
65
+ from braintrust.logger import Dataset
66
+
67
+ # Test 1: dict with only id
68
+ dataset_dict = {"id": "dataset-id-123"}
69
+ assert isinstance(dataset_dict, dict)
70
+ assert not isinstance(dataset_dict, Dataset)
71
+ assert dataset_dict["id"] == "dataset-id-123"
72
+
73
+ # Test 2: full Dataset object has different behavior
74
+ # (We can't easily instantiate a Dataset here, but we can verify
75
+ # that the isinstance check distinguishes them)
76
+
77
+ def test_init_with_dataset_id_and_version(self):
78
+ """Test that init accepts dataset={'id': '...', 'version': '...'} parameter"""
79
+ # Test the logic that extracts both dataset_id and dataset_version from the dict
80
+ from braintrust.logger import Dataset
81
+
82
+ # Test: dict with id and version
83
+ dataset_dict = {"id": "dataset-id-123", "version": "v2"}
84
+ assert isinstance(dataset_dict, dict)
85
+ assert not isinstance(dataset_dict, Dataset)
86
+ assert dataset_dict["id"] == "dataset-id-123"
87
+ assert dataset_dict["version"] == "v2"
88
+
62
89
 
63
90
  class TestLogger(TestCase):
64
91
  def test_extract_attachments_no_op(self):
@@ -2434,6 +2461,95 @@ def test_logger_export_respects_otel_compat_enabled():
2434
2461
  assert version == 4, f"Expected V4 encoding (version=4), got version={version}"
2435
2462
 
2436
2463
 
2464
+ def test_register_otel_flush_callback():
2465
+ """Test that register_otel_flush registers a callback correctly."""
2466
+ import asyncio
2467
+
2468
+ from braintrust import register_otel_flush
2469
+ from braintrust.logger import _internal_get_global_state
2470
+ from braintrust.test_helpers import init_test_logger
2471
+
2472
+ init_test_logger(__name__)
2473
+ state = _internal_get_global_state()
2474
+
2475
+ # Track if callback was invoked
2476
+ callback_invoked = False
2477
+
2478
+ async def mock_flush():
2479
+ nonlocal callback_invoked
2480
+ callback_invoked = True
2481
+
2482
+ # Register the callback
2483
+ register_otel_flush(mock_flush)
2484
+
2485
+ # Calling flush_otel should invoke the registered callback
2486
+ asyncio.run(state.flush_otel())
2487
+
2488
+ assert callback_invoked is True
2489
+
2490
+
2491
+ def test_register_otel_flush_disables_span_cache():
2492
+ """Test that register_otel_flush disables the span cache."""
2493
+ from braintrust import register_otel_flush
2494
+ from braintrust.logger import _internal_get_global_state
2495
+ from braintrust.test_helpers import init_test_logger
2496
+
2497
+ init_test_logger(__name__)
2498
+ state = _internal_get_global_state()
2499
+
2500
+ # Enable the cache (simulating what happens during eval)
2501
+ state.span_cache.start()
2502
+ assert state.span_cache.disabled is False
2503
+
2504
+ async def mock_flush():
2505
+ pass
2506
+
2507
+ # Register OTEL flush
2508
+ register_otel_flush(mock_flush)
2509
+
2510
+ # Cache should now be disabled
2511
+ assert state.span_cache.disabled is True
2512
+
2513
+
2514
+ def test_flush_otel_noop_when_no_callback():
2515
+ """Test that flush_otel is a no-op when no callback is registered."""
2516
+ import asyncio
2517
+
2518
+ from braintrust.logger import _internal_get_global_state
2519
+ from braintrust.test_helpers import init_test_logger
2520
+
2521
+ init_test_logger(__name__)
2522
+ state = _internal_get_global_state()
2523
+
2524
+ # Should not throw even with no callback registered
2525
+ asyncio.run(state.flush_otel())
2526
+
2527
+
2528
+ def test_register_otel_flush_permanently_disables_cache():
2529
+ """Test that register_otel_flush permanently disables the cache."""
2530
+ from braintrust import register_otel_flush
2531
+ from braintrust.logger import _internal_get_global_state
2532
+ from braintrust.test_helpers import init_test_logger
2533
+
2534
+ init_test_logger(__name__)
2535
+ state = _internal_get_global_state()
2536
+
2537
+ # Enable the cache
2538
+ state.span_cache.start()
2539
+ assert state.span_cache.disabled is False
2540
+
2541
+ async def mock_flush():
2542
+ pass
2543
+
2544
+ # Register OTEL flush
2545
+ register_otel_flush(mock_flush)
2546
+ assert state.span_cache.disabled is True
2547
+
2548
+ # Try to start again - should still be disabled because of explicit disable
2549
+ state.span_cache.start()
2550
+ assert state.span_cache.disabled is True
2551
+
2552
+
2437
2553
  class TestJSONAttachment(TestCase):
2438
2554
  def test_create_attachment_from_json_data(self):
2439
2555
  """Test creating an attachment from JSON data."""