deepeval 3.6.3__py3-none-any.whl → 3.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,19 @@
1
1
  import json
2
+ import logging
2
3
  import os
3
4
  from typing import Literal, Optional, List
4
5
 
6
+ from deepeval.config.settings import get_settings
7
+ from deepeval.confident.api import get_confident_api_key
8
+ from deepeval.prompt import Prompt
9
+ from deepeval.tracing.context import current_trace_context
10
+ from deepeval.tracing.types import Trace
11
+ from deepeval.tracing.otel.utils import to_hex_string
12
+
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
5
17
  try:
6
18
  from pydantic_ai.models.instrumented import InstrumentationSettings
7
19
  from opentelemetry.sdk.trace import SpanProcessor, TracerProvider
@@ -11,7 +23,20 @@ try:
11
23
  )
12
24
 
13
25
  dependency_installed = True
14
- except:
26
+ except ImportError as e:
27
+ if get_settings().DEEPEVAL_VERBOSE_MODE:
28
+ if isinstance(e, ModuleNotFoundError):
29
+ logger.warning(
30
+ "Optional tracing dependency not installed: %s",
31
+ e.name,
32
+ stacklevel=2,
33
+ )
34
+ else:
35
+ logger.warning(
36
+ "Optional tracing import failed: %s",
37
+ e,
38
+ stacklevel=2,
39
+ )
15
40
  dependency_installed = False
16
41
 
17
42
 
@@ -25,6 +50,10 @@ def is_dependency_installed():
25
50
 
26
51
  from deepeval.confident.api import get_confident_api_key
27
52
  from deepeval.prompt import Prompt
53
+ from deepeval.tracing.otel.test_exporter import test_exporter
54
+ from deepeval.tracing.context import current_trace_context
55
+ from deepeval.tracing.types import Trace
56
+ from deepeval.tracing.otel.utils import to_hex_string
28
57
 
29
58
  # OTLP_ENDPOINT = "http://127.0.0.1:4318/v1/traces"
30
59
  OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"
@@ -37,6 +66,12 @@ class SpanInterceptor(SpanProcessor):
37
66
 
38
67
  def on_start(self, span, parent_context):
39
68
 
69
+ # set trace uuid
70
+ _current_trace_context = current_trace_context.get()
71
+ if _current_trace_context and isinstance(_current_trace_context, Trace):
72
+ _otel_trace_id = span.get_span_context().trace_id
73
+ _current_trace_context.uuid = to_hex_string(_otel_trace_id, 32)
74
+
40
75
  # set trace attributes
41
76
  if self.settings.thread_id:
42
77
  span.set_attribute(
@@ -148,8 +183,9 @@ class ConfidentInstrumentationSettings(InstrumentationSettings):
148
183
  confident_prompt: Optional[Prompt] = None,
149
184
  llm_metric_collection: Optional[str] = None,
150
185
  agent_metric_collection: Optional[str] = None,
151
- tool_metric_collection_map: dict = {},
186
+ tool_metric_collection_map: Optional[dict] = None,
152
187
  trace_metric_collection: Optional[str] = None,
188
+ is_test_mode: Optional[bool] = False,
153
189
  ):
154
190
  is_dependency_installed()
155
191
 
@@ -162,7 +198,7 @@ class ConfidentInstrumentationSettings(InstrumentationSettings):
162
198
  ]:
163
199
  self.environment = _environment
164
200
 
165
- self.tool_metric_collection_map = tool_metric_collection_map
201
+ self.tool_metric_collection_map = tool_metric_collection_map or {}
166
202
  self.name = name
167
203
  self.thread_id = thread_id
168
204
  self.user_id = user_id
@@ -185,12 +221,15 @@ class ConfidentInstrumentationSettings(InstrumentationSettings):
185
221
  span_interceptor = SpanInterceptor(self)
186
222
  trace_provider.add_span_processor(span_interceptor)
187
223
 
188
- trace_provider.add_span_processor(
189
- BatchSpanProcessor(
190
- OTLPSpanExporter(
191
- endpoint=OTLP_ENDPOINT,
192
- headers={"x-confident-api-key": api_key},
224
+ if is_test_mode:
225
+ trace_provider.add_span_processor(BatchSpanProcessor(test_exporter))
226
+ else:
227
+ trace_provider.add_span_processor(
228
+ BatchSpanProcessor(
229
+ OTLPSpanExporter(
230
+ endpoint=OTLP_ENDPOINT,
231
+ headers={"x-confident-api-key": api_key},
232
+ )
193
233
  )
194
234
  )
195
- )
196
235
  super().__init__(tracer_provider=trace_provider)
@@ -41,6 +41,7 @@ class FaithfulnessMetric(BaseMetric):
41
41
  strict_mode: bool = False,
42
42
  verbose_mode: bool = False,
43
43
  truths_extraction_limit: Optional[int] = None,
44
+ penalize_ambiguous_claims: bool = False,
44
45
  evaluation_template: Type[FaithfulnessTemplate] = FaithfulnessTemplate,
45
46
  ):
46
47
  self.threshold = 1 if strict_mode else threshold
@@ -51,6 +52,7 @@ class FaithfulnessMetric(BaseMetric):
51
52
  self.strict_mode = strict_mode
52
53
  self.verbose_mode = verbose_mode
53
54
  self.evaluation_template = evaluation_template
55
+ self.penalize_ambiguous_claims = penalize_ambiguous_claims
54
56
 
55
57
  self.truths_extraction_limit = truths_extraction_limit
56
58
  if self.truths_extraction_limit is not None:
@@ -329,6 +331,12 @@ class FaithfulnessMetric(BaseMetric):
329
331
  if verdict.verdict.strip().lower() != "no":
330
332
  faithfulness_count += 1
331
333
 
334
+ if (
335
+ self.penalize_ambiguous_claims
336
+ and verdict.verdict.strip().lower() == "idk"
337
+ ):
338
+ faithfulness_count -= 1
339
+
332
340
  score = faithfulness_count / number_of_verdicts
333
341
  return 0 if self.strict_mode and score < self.threshold else score
334
342
 
deepeval/prompt/prompt.py CHANGED
@@ -8,6 +8,7 @@ import os
8
8
  from pydantic import BaseModel
9
9
  import asyncio
10
10
  import portalocker
11
+ import threading
11
12
 
12
13
  from deepeval.prompt.api import (
13
14
  PromptHttpResponse,
@@ -20,15 +21,39 @@ from deepeval.prompt.api import (
20
21
  from deepeval.prompt.utils import interpolate_text
21
22
  from deepeval.confident.api import Api, Endpoints, HttpMethods
22
23
  from deepeval.constants import HIDDEN_DIR
23
- from deepeval.utils import (
24
- get_or_create_event_loop,
25
- get_or_create_general_event_loop,
26
- )
27
24
 
28
25
  CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-prompt-cache.json"
29
26
  VERSION_CACHE_KEY = "version"
30
27
  LABEL_CACHE_KEY = "label"
31
28
 
29
+ # Global background event loop for polling
30
+ _polling_loop: Optional[asyncio.AbstractEventLoop] = None
31
+ _polling_thread: Optional[threading.Thread] = None
32
+ _polling_loop_lock = threading.Lock()
33
+
34
+
35
+ def _get_or_create_polling_loop() -> asyncio.AbstractEventLoop:
36
+ """Get or create a background event loop for polling that runs in a daemon thread."""
37
+ global _polling_loop, _polling_thread
38
+
39
+ with _polling_loop_lock:
40
+ if _polling_loop is None or not _polling_loop.is_running():
41
+
42
+ def run_loop():
43
+ global _polling_loop
44
+ _polling_loop = asyncio.new_event_loop()
45
+ asyncio.set_event_loop(_polling_loop)
46
+ _polling_loop.run_forever()
47
+
48
+ _polling_thread = threading.Thread(target=run_loop, daemon=True)
49
+ _polling_thread.start()
50
+
51
+ # Wait for loop to be ready
52
+ while _polling_loop is None:
53
+ time.sleep(0.01)
54
+
55
+ return _polling_loop
56
+
32
57
 
33
58
  class CustomEncoder(json.JSONEncoder):
34
59
  def default(self, obj):
@@ -80,11 +105,22 @@ class Prompt:
80
105
  self._version = None
81
106
  self._polling_tasks: Dict[str, Dict[str, asyncio.Task]] = {}
82
107
  self._refresh_map: Dict[str, Dict[str, int]] = {}
108
+ self._lock = (
109
+ threading.Lock()
110
+ ) # Protect instance attributes from race conditions
83
111
  if template:
84
112
  self._type = PromptType.TEXT
85
113
  elif messages_template:
86
114
  self._type = PromptType.LIST
87
115
 
116
+ def __del__(self):
117
+ """Cleanup polling tasks when instance is destroyed"""
118
+ try:
119
+ self._stop_polling()
120
+ except Exception:
121
+ # Suppress exceptions during cleanup to avoid issues in interpreter shutdown
122
+ pass
123
+
88
124
  @property
89
125
  def version(self):
90
126
  if self._version is not None and self._version != "latest":
@@ -100,33 +136,37 @@ class Prompt:
100
136
  self._version = value
101
137
 
102
138
  def interpolate(self, **kwargs):
103
- if self._type == PromptType.TEXT:
104
- if self._text_template is None:
139
+ with self._lock:
140
+ prompt_type = self._type
141
+ text_template = self._text_template
142
+ messages_template = self._messages_template
143
+ interpolation_type = self._interpolation_type
144
+
145
+ if prompt_type == PromptType.TEXT:
146
+ if text_template is None:
105
147
  raise TypeError(
106
148
  "Unable to interpolate empty prompt template. Please pull a prompt from Confident AI or set template manually to continue."
107
149
  )
108
150
 
109
- return interpolate_text(
110
- self._interpolation_type, self._text_template, **kwargs
111
- )
151
+ return interpolate_text(interpolation_type, text_template, **kwargs)
112
152
 
113
- elif self._type == PromptType.LIST:
114
- if self._messages_template is None:
153
+ elif prompt_type == PromptType.LIST:
154
+ if messages_template is None:
115
155
  raise TypeError(
116
156
  "Unable to interpolate empty prompt template messages. Please pull a prompt from Confident AI or set template manually to continue."
117
157
  )
118
158
 
119
159
  interpolated_messages = []
120
- for message in self._messages_template:
160
+ for message in messages_template:
121
161
  interpolated_content = interpolate_text(
122
- self._interpolation_type, message.content, **kwargs
162
+ interpolation_type, message.content, **kwargs
123
163
  )
124
164
  interpolated_messages.append(
125
165
  {"role": message.role, "content": interpolated_content}
126
166
  )
127
167
  return interpolated_messages
128
168
  else:
129
- raise ValueError(f"Unsupported prompt type: {self._type}")
169
+ raise ValueError(f"Unsupported prompt type: {prompt_type}")
130
170
 
131
171
  def _get_versions(self) -> List:
132
172
  if self.alias is None:
@@ -272,15 +312,16 @@ class Prompt:
272
312
  if not cached_prompt:
273
313
  raise ValueError("Unable to fetch prompt and load from cache")
274
314
 
275
- self.version = cached_prompt.version
276
- self.label = cached_prompt.label
277
- self._text_template = cached_prompt.template
278
- self._messages_template = cached_prompt.messages_template
279
- self._prompt_version_id = cached_prompt.prompt_version_id
280
- self._type = PromptType(cached_prompt.type)
281
- self._interpolation_type = PromptInterpolationType(
282
- cached_prompt.interpolation_type
283
- )
315
+ with self._lock:
316
+ self.version = cached_prompt.version
317
+ self.label = cached_prompt.label
318
+ self._text_template = cached_prompt.template
319
+ self._messages_template = cached_prompt.messages_template
320
+ self._prompt_version_id = cached_prompt.prompt_version_id
321
+ self._type = PromptType(cached_prompt.type)
322
+ self._interpolation_type = PromptInterpolationType(
323
+ cached_prompt.interpolation_type
324
+ )
284
325
 
285
326
  end_time = time.perf_counter()
286
327
  time_taken = format(end_time - start_time, ".2f")
@@ -300,7 +341,6 @@ class Prompt:
300
341
  ):
301
342
  should_write_on_first_fetch = False
302
343
  if refresh:
303
- default_to_cache = True
304
344
  # Check if we need to bootstrap the cache
305
345
  cached_prompt = self._read_from_cache(
306
346
  self.alias, version=version, label=label
@@ -316,12 +356,10 @@ class Prompt:
316
356
  )
317
357
 
318
358
  # Manage background prompt polling
319
- loop = get_or_create_general_event_loop()
320
- if loop.is_running():
321
- loop.create_task(self.create_polling_task(version, label, refresh))
322
- else:
323
- loop.run_until_complete(
324
- self.create_polling_task(version, label, refresh)
359
+ if refresh:
360
+ loop = _get_or_create_polling_loop()
361
+ asyncio.run_coroutine_threadsafe(
362
+ self.create_polling_task(version, label, refresh), loop
325
363
  )
326
364
 
327
365
  if default_to_cache:
@@ -330,15 +368,20 @@ class Prompt:
330
368
  self.alias, version=version, label=label
331
369
  )
332
370
  if cached_prompt:
333
- self.version = cached_prompt.version
334
- self.label = cached_prompt.label
335
- self._text_template = cached_prompt.template
336
- self._messages_template = cached_prompt.messages_template
337
- self._prompt_version_id = cached_prompt.prompt_version_id
338
- self._type = PromptType(cached_prompt.type)
339
- self._interpolation_type = PromptInterpolationType(
340
- cached_prompt.interpolation_type
341
- )
371
+ with self._lock:
372
+ self.version = cached_prompt.version
373
+ self.label = cached_prompt.label
374
+ self._text_template = cached_prompt.template
375
+ self._messages_template = (
376
+ cached_prompt.messages_template
377
+ )
378
+ self._prompt_version_id = (
379
+ cached_prompt.prompt_version_id
380
+ )
381
+ self._type = PromptType(cached_prompt.type)
382
+ self._interpolation_type = PromptInterpolationType(
383
+ cached_prompt.interpolation_type
384
+ )
342
385
  return
343
386
  except:
344
387
  pass
@@ -402,13 +445,14 @@ class Prompt:
402
445
  return
403
446
  raise
404
447
 
405
- self.version = response.version
406
- self.label = response.label
407
- self._text_template = response.text
408
- self._messages_template = response.messages
409
- self._prompt_version_id = response.id
410
- self._type = response.type
411
- self._interpolation_type = response.interpolation_type
448
+ with self._lock:
449
+ self.version = response.version
450
+ self.label = response.label
451
+ self._text_template = response.text
452
+ self._messages_template = response.messages
453
+ self._prompt_version_id = response.id
454
+ self._type = response.type
455
+ self._interpolation_type = response.interpolation_type
412
456
 
413
457
  end_time = time.perf_counter()
414
458
  time_taken = format(end_time - start_time, ".2f")
@@ -483,11 +527,7 @@ class Prompt:
483
527
  version: Optional[str],
484
528
  label: Optional[str],
485
529
  refresh: Optional[int] = 60,
486
- default_to_cache: bool = True,
487
530
  ):
488
- if version is None and label is None:
489
- return
490
-
491
531
  # If polling task doesn't exist, start it
492
532
  CACHE_KEY = LABEL_CACHE_KEY if label else VERSION_CACHE_KEY
493
533
  cache_value = label if label else version
@@ -506,9 +546,7 @@ class Prompt:
506
546
  self._refresh_map[CACHE_KEY][cache_value] = refresh
507
547
  if not polling_task:
508
548
  self._polling_tasks[CACHE_KEY][cache_value] = (
509
- asyncio.create_task(
510
- self.poll(version, label, default_to_cache)
511
- )
549
+ asyncio.create_task(self.poll(version, label))
512
550
  )
513
551
 
514
552
  # If invalid `refresh`, stop the task
@@ -524,24 +562,12 @@ class Prompt:
524
562
  self,
525
563
  version: Optional[str] = None,
526
564
  label: Optional[str] = None,
527
- default_to_cache: bool = True,
528
565
  ):
566
+ CACHE_KEY = LABEL_CACHE_KEY if label else VERSION_CACHE_KEY
567
+ cache_value = label if label else version
568
+
529
569
  while True:
530
- if default_to_cache:
531
- cached_prompt = self._read_from_cache(
532
- self.alias, version=version, label=label
533
- )
534
- if cached_prompt:
535
- self.version = cached_prompt.version
536
- self.label = cached_prompt.label
537
- self._text_template = cached_prompt.template
538
- self._messages_template = cached_prompt.messages_template
539
- self._prompt_version_id = cached_prompt.prompt_version_id
540
- self._type = PromptType(cached_prompt.type)
541
- self._interpolation_type = PromptInterpolationType(
542
- cached_prompt.interpolation_type
543
- )
544
- return
570
+ await asyncio.sleep(self._refresh_map[CACHE_KEY][cache_value])
545
571
 
546
572
  api = Api()
547
573
  try:
@@ -573,22 +599,43 @@ class Prompt:
573
599
  type=data["type"],
574
600
  interpolation_type=data["interpolationType"],
575
601
  )
576
- if default_to_cache:
577
- self._write_to_cache(
578
- cache_key=(
579
- LABEL_CACHE_KEY if label else VERSION_CACHE_KEY
580
- ),
581
- version=response.version,
582
- label=response.label,
583
- text_template=response.text,
584
- messages_template=response.messages,
585
- prompt_version_id=response.id,
586
- type=response.type,
587
- interpolation_type=response.interpolation_type,
588
- )
589
- except Exception as e:
602
+
603
+ # Update the cache with fresh data from server
604
+ self._write_to_cache(
605
+ cache_key=CACHE_KEY,
606
+ version=response.version,
607
+ label=response.label,
608
+ text_template=response.text,
609
+ messages_template=response.messages,
610
+ prompt_version_id=response.id,
611
+ type=response.type,
612
+ interpolation_type=response.interpolation_type,
613
+ )
614
+
615
+ # Update in-memory properties with fresh data (thread-safe)
616
+ with self._lock:
617
+ self.version = response.version
618
+ self.label = response.label
619
+ self._text_template = response.text
620
+ self._messages_template = response.messages
621
+ self._prompt_version_id = response.id
622
+ self._type = response.type
623
+ self._interpolation_type = response.interpolation_type
624
+
625
+ except Exception:
590
626
  pass
591
627
 
592
- CACHE_KEY = LABEL_CACHE_KEY if label else VERSION_CACHE_KEY
593
- cache_value = label if label else version
594
- await asyncio.sleep(self._refresh_map[CACHE_KEY][cache_value])
628
+ def _stop_polling(self):
629
+ loop = _polling_loop
630
+ if not loop or not loop.is_running():
631
+ return
632
+
633
+ # Stop all polling tasks
634
+ for ck in list(self._polling_tasks.keys()):
635
+ for cv in list(self._polling_tasks[ck].keys()):
636
+ task = self._polling_tasks[ck][cv]
637
+ if task and not task.done():
638
+ loop.call_soon_threadsafe(task.cancel)
639
+ self._polling_tasks[ck].clear()
640
+ self._refresh_map[ck].clear()
641
+ return
@@ -11,7 +11,7 @@ from .test_run import (
11
11
  )
12
12
 
13
13
  from .hooks import on_test_run_end, invoke_test_run_end_hook
14
- from .api import MetricData
14
+ from .api import MetricData, TurnApi
15
15
  from .hyperparameters import log_hyperparameters
16
16
 
17
17
 
@@ -28,5 +28,6 @@ __all__ = [
28
28
  "on_test_run_end",
29
29
  "invoke_test_run_end_hook",
30
30
  "MetricData",
31
+ "TurnApi",
31
32
  "log_hyperparameters",
32
33
  ]
deepeval/test_run/api.py CHANGED
@@ -99,6 +99,7 @@ class TurnApi(BaseModel):
99
99
  role: str
100
100
  content: str
101
101
  order: int
102
+ user_id: Optional[str] = Field(None, alias="userId")
102
103
  retrieval_context: Optional[list] = Field(None, alias="retrievalContext")
103
104
  tools_called: Optional[List[ToolCall]] = Field(None, alias="toolsCalled")
104
105
  additional_metadata: Optional[Dict] = Field(
@@ -2,9 +2,8 @@ from enum import Enum
2
2
  import os
3
3
  import json
4
4
  from pydantic import BaseModel, Field
5
- from typing import Any, Optional, List, Dict, Union
5
+ from typing import Any, Optional, List, Dict, Union, Tuple
6
6
  import shutil
7
- import webbrowser
8
7
  import sys
9
8
  import datetime
10
9
  import portalocker
@@ -27,6 +26,9 @@ from deepeval.utils import (
27
26
  delete_file_if_exists,
28
27
  get_is_running_deepeval,
29
28
  open_browser,
29
+ shorten,
30
+ format_turn,
31
+ len_short,
30
32
  )
31
33
  from deepeval.test_run.cache import global_test_run_cache_manager
32
34
  from deepeval.constants import CONFIDENT_TEST_CASE_BATCH_SIZE, HIDDEN_DIR
@@ -546,7 +548,7 @@ class TestRunManager:
546
548
 
547
549
  if (
548
550
  display == TestRunResultDisplay.PASSING
549
- and test_case.success == False
551
+ and test_case.success is False
550
552
  ):
551
553
  continue
552
554
  elif display == TestRunResultDisplay.FAILING and test_case.success:
@@ -618,7 +620,7 @@ class TestRunManager:
618
620
  ):
619
621
  if (
620
622
  display == TestRunResultDisplay.PASSING
621
- and conversational_test_case.success == False
623
+ and conversational_test_case.success is False
622
624
  ):
623
625
  continue
624
626
  elif (
@@ -631,6 +633,65 @@ class TestRunManager:
631
633
  fail_count = 0
632
634
  conversational_test_case_name = conversational_test_case.name
633
635
 
636
+ if conversational_test_case.turns:
637
+ turns_table = Table(
638
+ title=f"Conversation - {conversational_test_case_name}",
639
+ show_header=True,
640
+ header_style="bold",
641
+ )
642
+ turns_table.add_column("#", justify="right", width=3)
643
+ turns_table.add_column("Role", justify="left", width=10)
644
+
645
+ # subtract fixed widths + borders and padding.
646
+ # ~20 as a safe buffer
647
+ details_max_width = max(
648
+ 48, min(120, console.width - 3 - 10 - 20)
649
+ )
650
+ turns_table.add_column(
651
+ "Details",
652
+ justify="left",
653
+ overflow="fold",
654
+ max_width=details_max_width,
655
+ )
656
+
657
+ # truncate when too long
658
+ tools_max_width = min(60, max(24, console.width // 3))
659
+ turns_table.add_column(
660
+ "Tools",
661
+ justify="left",
662
+ no_wrap=True,
663
+ overflow="ellipsis",
664
+ max_width=tools_max_width,
665
+ )
666
+
667
+ sorted_turns = sorted(
668
+ conversational_test_case.turns, key=lambda t: t.order
669
+ )
670
+
671
+ for t in sorted_turns:
672
+ tools = t.tools_called or []
673
+ tool_names = ", ".join(tc.name for tc in tools)
674
+
675
+ # omit order, role and tools since we show them in a separate columns.
676
+ details = format_turn(
677
+ t,
678
+ include_tools_in_header=False,
679
+ include_order_role_in_header=False,
680
+ )
681
+
682
+ turns_table.add_row(
683
+ str(t.order),
684
+ t.role,
685
+ details,
686
+ shorten(tool_names, len_short()),
687
+ )
688
+
689
+ console.print(turns_table)
690
+ else:
691
+ console.print(
692
+ f"[dim]No turns recorded for {conversational_test_case_name}.[/dim]"
693
+ )
694
+
634
695
  if conversational_test_case.metrics_data is not None:
635
696
  for metric_data in conversational_test_case.metrics_data:
636
697
  if metric_data.success:
@@ -698,7 +759,7 @@ class TestRunManager:
698
759
  )
699
760
  print(table)
700
761
 
701
- def post_test_run(self, test_run: TestRun) -> Optional[str]:
762
+ def post_test_run(self, test_run: TestRun) -> Optional[Tuple[str, str]]:
702
763
  if (
703
764
  len(test_run.test_cases) == 0
704
765
  and len(test_run.conversational_test_cases) == 0
@@ -752,6 +813,21 @@ class TestRunManager:
752
813
  body=body,
753
814
  )
754
815
 
816
+ if not isinstance(data, dict) or "id" not in data:
817
+ # try to show helpful details
818
+ detail = None
819
+ if isinstance(data, dict):
820
+ detail = (
821
+ data.get("detail")
822
+ or data.get("message")
823
+ or data.get("error")
824
+ )
825
+ # fall back to repr for visibility
826
+ raise RuntimeError(
827
+ f"Confident API response missing 'id'. "
828
+ f"detail={detail!r} raw={type(data).__name__}:{repr(data)[:500]}"
829
+ )
830
+
755
831
  res = TestRunHttpResponse(
756
832
  id=data["id"],
757
833
  )
@@ -814,7 +890,7 @@ class TestRunManager:
814
890
  )
815
891
  self.save_final_test_run_link(link)
816
892
  open_browser(link)
817
- return link
893
+ return link, res.id
818
894
 
819
895
  def save_test_run_locally(self):
820
896
  local_folder = os.getenv("DEEPEVAL_RESULTS_FOLDER")
@@ -841,7 +917,7 @@ class TestRunManager:
841
917
  runDuration: float,
842
918
  display_table: bool = True,
843
919
  display: Optional[TestRunResultDisplay] = TestRunResultDisplay.ALL,
844
- ) -> Optional[str]:
920
+ ) -> Optional[Tuple[str, str]]:
845
921
  test_run = self.get_test_run()
846
922
  if test_run is None:
847
923
  print("Test Run is empty, please try again.")
@@ -868,8 +944,8 @@ class TestRunManager:
868
944
  test_run.sort_test_cases()
869
945
 
870
946
  if global_test_run_cache_manager.disable_write_cache is None:
871
- global_test_run_cache_manager.disable_write_cache = (
872
- get_is_running_deepeval() == False
947
+ global_test_run_cache_manager.disable_write_cache = not bool(
948
+ get_is_running_deepeval()
873
949
  )
874
950
 
875
951
  global_test_run_cache_manager.wrap_up_cached_test_run()
@@ -4,6 +4,7 @@ from .context import (
4
4
  update_retriever_span,
5
5
  update_llm_span,
6
6
  )
7
+ from .trace_context import trace
7
8
  from .types import BaseSpan, Trace
8
9
  from .tracing import observe, trace_manager
9
10
  from .offline_evals import evaluate_thread, evaluate_trace, evaluate_span
@@ -16,6 +17,7 @@ __all__ = [
16
17
  "BaseSpan",
17
18
  "Trace",
18
19
  "observe",
20
+ "trace",
19
21
  "trace_manager",
20
22
  "evaluate_thread",
21
23
  "evaluate_trace",