judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
judgeval/data/result.py CHANGED
@@ -1,10 +1,9 @@
1
1
  from typing import List, Union
2
2
  from judgeval.data import ScorerData, Example
3
- from judgeval.data.trace import TraceSpan
4
- from judgeval.data.judgment_types import ScoringResultJudgmentType
3
+ from judgeval.data.judgment_types import ScoringResult as JudgmentScoringResult
5
4
 
6
5
 
7
- class ScoringResult(ScoringResultJudgmentType):
6
+ class ScoringResult(JudgmentScoringResult):
8
7
  """
9
8
  A ScoringResult contains the output of one or more scorers applied to a single example.
10
9
  Ie: One input, one actual_output, one expected_output, etc..., and 1+ scorer (Faithfulness, Hallucination, Summarization, etc...)
@@ -17,15 +16,14 @@ class ScoringResult(ScoringResultJudgmentType):
17
16
 
18
17
  """
19
18
 
20
- def to_dict(self) -> dict:
21
- """Convert the ScoringResult instance to a dictionary, properly serializing scorer_data."""
22
- return {
23
- "success": self.success,
24
- "scorers_data": [scorer_data.to_dict() for scorer_data in self.scorers_data]
25
- if self.scorers_data
26
- else None,
27
- "data_object": self.data_object.to_dict() if self.data_object else None,
28
- }
19
+ # Need to override this so that it uses this repo's Example class
20
+ data_object: Example
21
+ scorers_data: List[ScorerData]
22
+
23
+ def model_dump(self, **kwargs):
24
+ data = super().model_dump(**kwargs)
25
+ data["data_object"] = self.data_object.model_dump()
26
+ return data
29
27
 
30
28
  def __str__(self) -> str:
31
29
  return f"ScoringResult(\
@@ -36,7 +34,7 @@ class ScoringResult(ScoringResultJudgmentType):
36
34
 
37
35
 
38
36
  def generate_scoring_result(
39
- data_object: Union[Example, TraceSpan],
37
+ data_object: Union[Example],
40
38
  scorers_data: List[ScorerData],
41
39
  run_duration: float,
42
40
  success: bool,
@@ -47,12 +45,7 @@ def generate_scoring_result(
47
45
  When an LLMTestCase is executed, it turns into an LLMApiTestCase and the progress of the evaluation run is tracked.
48
46
  At the end of the evaluation run, we create a TestResult object out of the completed LLMApiTestCase.
49
47
  """
50
- if hasattr(data_object, "name") and data_object.name is not None:
51
- name = data_object.name
52
- else:
53
- name = "Test Case Placeholder"
54
48
  scoring_result = ScoringResult(
55
- name=name,
56
49
  data_object=data_object,
57
50
  success=success,
58
51
  scorers_data=scorers_data,
@@ -4,36 +4,13 @@ Implementation of the ScorerData class.
4
4
  ScorerData holds the information related to a single, completed Scorer evaluation run.
5
5
  """
6
6
 
7
- from judgeval.data.judgment_types import ScorerDataJudgmentType
7
+ from __future__ import annotations
8
+
9
+ from judgeval.data.judgment_types import ScorerData
8
10
  from judgeval.scorers import BaseScorer
9
11
  from typing import List
10
12
 
11
13
 
12
- class ScorerData(ScorerDataJudgmentType):
13
- """
14
- ScorerData holds the information related to a single, completed Scorer evaluation run.
15
-
16
- For example, if running the Judgment Faithfulness scorer on an example, the ScorerData
17
- object will contain whether the example passed its threshold expectation, as well as more detailed
18
- information surrounding the evaluation run such as the claims and verdicts generated by the
19
- judge model(s).
20
- """
21
-
22
- def to_dict(self) -> dict:
23
- """Convert the ScorerData instance to a JSON-serializable dictionary."""
24
- return {
25
- "name": self.name,
26
- "threshold": self.threshold,
27
- "success": self.success,
28
- "score": self.score,
29
- "reason": self.reason,
30
- "strict_mode": self.strict_mode,
31
- "evaluation_model": self.evaluation_model,
32
- "error": self.error,
33
- "additional_metadata": self.additional_metadata,
34
- }
35
-
36
-
37
14
  def create_scorer_data(scorer: BaseScorer) -> List[ScorerData]:
38
15
  """
39
16
  After a `scorer` is run, it contains information about the example that was evaluated
@@ -1,7 +1,7 @@
1
- import json
1
+ import orjson
2
2
  import sys
3
3
  from typing import Any, Dict, Generator, List
4
- from judgeval.utils.requests import requests
4
+ import requests
5
5
 
6
6
  spec_file = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8000/openapi.json"
7
7
 
@@ -10,8 +10,8 @@ if spec_file.startswith("http"):
10
10
  r.raise_for_status()
11
11
  SPEC = r.json()
12
12
  else:
13
- with open(spec_file, "r") as f:
14
- SPEC = json.load(f)
13
+ with open(spec_file, "rb") as f:
14
+ SPEC = orjson.loads(f.read())
15
15
 
16
16
  JUDGEVAL_PATHS: List[str] = [
17
17
  "/log_eval_results/",
@@ -120,4 +120,4 @@ spec = {
120
120
  },
121
121
  }
122
122
 
123
- print(json.dumps(spec, indent=4))
123
+ print(orjson.dumps(spec, option=orjson.OPT_INDENT_2).decode("utf-8"))
judgeval/data/trace.py CHANGED
@@ -1,200 +1,121 @@
1
- from typing import Any
2
- import json
3
- import sys
4
- import threading
5
- from datetime import datetime, timezone
6
- from judgeval.data.judgment_types import (
7
- TraceUsageJudgmentType,
8
- TraceSpanJudgmentType,
9
- TraceJudgmentType,
10
- )
11
- from judgeval.constants import SPAN_LIFECYCLE_END_UPDATE_ID
1
+ from typing import Optional, List, Dict, Any
12
2
  from pydantic import BaseModel
3
+ from .judgment_types import (
4
+ OtelSpanDetailScores,
5
+ OtelSpanDetail,
6
+ OtelTraceListItem,
7
+ )
13
8
 
14
9
 
15
- class TraceUsage(TraceUsageJudgmentType):
16
- pass
10
+ class TraceUsage(BaseModel):
11
+ prompt_tokens: Optional[int] = None
12
+ completion_tokens: Optional[int] = None
13
+ cache_creation_input_tokens: Optional[int] = None
14
+ cache_read_input_tokens: Optional[int] = None
15
+ total_tokens: Optional[int] = None
16
+ prompt_tokens_cost_usd: Optional[float] = None
17
+ completion_tokens_cost_usd: Optional[float] = None
18
+ total_cost_usd: Optional[float] = None
19
+ model_name: Optional[str] = None
17
20
 
18
21
 
19
- class TraceSpan(TraceSpanJudgmentType):
20
- def model_dump(self, **kwargs):
21
- return {
22
- "span_id": self.span_id,
23
- "trace_id": self.trace_id,
24
- "depth": self.depth,
25
- "created_at": datetime.fromtimestamp(
26
- self.created_at, tz=timezone.utc
27
- ).isoformat(),
28
- "inputs": self._serialize_value(self.inputs),
29
- "output": self._serialize_value(self.output),
30
- "error": self._serialize_value(self.error),
31
- "parent_span_id": self.parent_span_id,
32
- "function": self.function,
33
- "duration": self.duration,
34
- "span_type": self.span_type,
35
- "usage": self.usage.model_dump() if self.usage else None,
36
- "has_evaluation": self.has_evaluation,
37
- "agent_name": self.agent_name,
38
- "state_before": self.state_before,
39
- "state_after": self.state_after,
40
- "additional_metadata": self._serialize_value(self.additional_metadata),
41
- "update_id": self.update_id,
42
- }
43
-
44
- def __init__(self, **data):
45
- super().__init__(**data)
46
- # Initialize thread lock for thread-safe update_id increment
47
- self._update_id_lock = threading.Lock()
48
-
49
- def increment_update_id(self) -> int:
50
- """
51
- Thread-safe method to increment the update_id counter.
52
- Returns:
53
- int: The new update_id value after incrementing
54
- """
55
- with self._update_id_lock:
56
- self.update_id += 1
57
- return self.update_id
58
-
59
- def set_update_id_to_ending_number(
60
- self, ending_number: int = SPAN_LIFECYCLE_END_UPDATE_ID
61
- ) -> int:
62
- """
63
- Thread-safe method to set the update_id to a predetermined ending number.
64
-
65
- Args:
66
- ending_number (int): The number to set update_id to. Defaults to SPAN_LIFECYCLE_END_UPDATE_ID.
67
-
68
- Returns:
69
- int: The new update_id value after setting
70
- """
71
- with self._update_id_lock:
72
- self.update_id = ending_number
73
- return self.update_id
74
-
75
- def print_span(self):
76
- """Print the span with proper formatting and parent relationship information."""
77
- indent = " " * self.depth
78
- parent_info = (
79
- f" (parent_id: {self.parent_span_id})" if self.parent_span_id else ""
80
- )
81
- print(f"{indent}→ {self.function} (id: {self.span_id}){parent_info}")
82
-
83
- def _is_json_serializable(self, obj: Any) -> bool:
84
- """Helper method to check if an object is JSON serializable."""
85
- try:
86
- json.dumps(obj)
87
- return True
88
- except (TypeError, OverflowError, ValueError):
89
- return False
90
-
91
- def safe_stringify(self, output, function_name):
92
- """
93
- Safely converts an object to a JSON-serializable structure, handling common object types intelligently.
94
- """
95
- # Handle Pydantic models
96
- if hasattr(output, "model_dump"):
97
- try:
98
- return output.model_dump()
99
- except Exception:
100
- pass
101
-
102
- # Handle LangChain messages and similar objects with content/type
103
- if hasattr(output, "content") and hasattr(output, "type"):
104
- try:
105
- result = {"type": output.type, "content": output.content}
106
- # Add additional fields if they exist
107
- if hasattr(output, "additional_kwargs"):
108
- result["additional_kwargs"] = output.additional_kwargs
109
- if hasattr(output, "response_metadata"):
110
- result["response_metadata"] = output.response_metadata
111
- if hasattr(output, "name"):
112
- result["name"] = output.name
113
- return result
114
- except Exception:
115
- pass
116
-
117
- if hasattr(output, "dict"):
118
- try:
119
- return output.dict()
120
- except Exception:
121
- pass
122
-
123
- if hasattr(output, "to_dict"):
124
- try:
125
- return output.to_dict()
126
- except Exception:
127
- pass
128
-
129
- if hasattr(output, "__dataclass_fields__"):
130
- try:
131
- import dataclasses
132
-
133
- return dataclasses.asdict(output)
134
- except Exception:
135
- pass
136
-
137
- if hasattr(output, "__dict__"):
138
- try:
139
- return output.__dict__
140
- except Exception:
141
- pass
142
-
143
- try:
144
- return str(output)
145
- except (TypeError, OverflowError, ValueError):
146
- pass
147
-
148
- try:
149
- return repr(output)
150
- except (TypeError, OverflowError, ValueError):
151
- pass
152
-
153
- return None
154
-
155
- def _serialize_value(self, value: Any) -> Any:
156
- """Helper method to deep serialize a value safely supporting Pydantic Models / regular PyObjects."""
157
- if value is None:
158
- return None
159
-
160
- recursion_limit = sys.getrecursionlimit()
161
- recursion_limit = int(recursion_limit * 0.75)
162
-
163
- def serialize_value(value, current_depth=0):
164
- try:
165
- if current_depth > recursion_limit:
166
- return {"error": "max_depth_reached: " + type(value).__name__}
167
-
168
- if isinstance(value, BaseModel):
169
- return value.model_dump()
170
- elif isinstance(value, dict):
171
- # Recursively serialize dictionary values
172
- return {
173
- k: serialize_value(v, current_depth + 1)
174
- for k, v in value.items()
175
- }
176
- elif isinstance(value, (list, tuple)):
177
- # Recursively serialize list/tuple items
178
- return [serialize_value(item, current_depth + 1) for item in value]
179
- else:
180
- # Try direct JSON serialization first
181
- try:
182
- json.dumps(value)
183
- return value
184
- except (TypeError, OverflowError, ValueError):
185
- # Fallback to safe stringification
186
- return self.safe_stringify(value, self.function)
187
- except Exception:
188
- return {"error": "Unable to serialize"}
189
- except Exception:
190
- return {"error": "Unable to serialize"}
191
-
192
- # Start serialization with the top-level value
193
- try:
194
- return serialize_value(value, current_depth=0)
195
- except Exception:
196
- return {"error": "Unable to serialize"}
197
-
198
-
199
- class Trace(TraceJudgmentType):
22
+ class TraceScore(OtelSpanDetailScores):
23
+ """Score information for a trace or span."""
24
+
200
25
  pass
26
+
27
+
28
+ class TraceRule(BaseModel):
29
+ """Rule that was triggered for a trace."""
30
+
31
+ rule_id: str
32
+ rule_name: str
33
+
34
+
35
+ class TraceSpan(OtelSpanDetail):
36
+ """Individual span within a trace with complete telemetry data."""
37
+
38
+ @classmethod
39
+ def from_otel_span_detail(cls, span_detail: OtelSpanDetail) -> "TraceSpan":
40
+ """Create TraceSpan from OtelSpanDetail, converting scores to TraceScore."""
41
+ data = span_detail.model_dump()
42
+
43
+ if "scores" in data and data["scores"]:
44
+ data["scores"] = [TraceScore(**score) for score in data["scores"]]
45
+
46
+ return cls(**data)
47
+
48
+ def to_dict(self) -> Dict[str, Any]:
49
+ """Convert TraceSpan to dictionary."""
50
+ return self.model_dump(exclude_none=True)
51
+
52
+
53
+ class Trace(OtelTraceListItem):
54
+ """Complete trace with metadata and all associated spans."""
55
+
56
+ spans: List[TraceSpan] = []
57
+ rules: Optional[List[TraceRule]] = []
58
+
59
+ @classmethod
60
+ def from_dataset_trace_with_spans(cls, dataset_trace: Any) -> "Trace":
61
+ """Create Trace from DatasetTraceWithSpans (handles both API and judgment types)."""
62
+
63
+ if hasattr(dataset_trace, "trace_detail"):
64
+ trace_detail = dataset_trace.trace_detail
65
+ spans_data = dataset_trace.spans
66
+ else:
67
+ trace_detail = dataset_trace.get("trace_detail", {})
68
+ spans_data = dataset_trace.get("spans", [])
69
+
70
+ if hasattr(trace_detail, "model_dump"):
71
+ trace_data = trace_detail.model_dump()
72
+ elif isinstance(trace_detail, dict):
73
+ trace_data = trace_detail.copy()
74
+ else:
75
+ trace_data = dict(trace_detail)
76
+
77
+ spans = []
78
+ for span in spans_data:
79
+ if hasattr(span, "model_dump"):
80
+ spans.append(TraceSpan.from_otel_span_detail(span))
81
+ else:
82
+ # Handle dict spans
83
+ span_data = dict(span) if not isinstance(span, dict) else span.copy()
84
+ if "scores" in span_data and span_data["scores"]:
85
+ span_data["scores"] = [
86
+ TraceScore(**score)
87
+ if isinstance(score, dict)
88
+ else TraceScore(**score.model_dump())
89
+ for score in span_data["scores"]
90
+ ]
91
+ spans.append(TraceSpan(**span_data))
92
+
93
+ rules = []
94
+ if "rule_id" in trace_data and trace_data["rule_id"]:
95
+ rules = [
96
+ TraceRule(
97
+ rule_id=trace_data["rule_id"],
98
+ rule_name=f"Rule {trace_data['rule_id']}",
99
+ )
100
+ ]
101
+
102
+ trace_data.pop("scores", [])
103
+ trace_data.pop("rule_id", None)
104
+ trace = cls(**trace_data)
105
+
106
+ trace.spans = spans
107
+ trace.rules = rules
108
+
109
+ return trace
110
+
111
+ def to_dict(self) -> Dict[str, Any]:
112
+ """Convert Trace to dictionary."""
113
+ return self.model_dump(exclude_none=True)
114
+
115
+ def __len__(self) -> int:
116
+ """Return the number of spans in the trace."""
117
+ return len(self.spans)
118
+
119
+ def __iter__(self):
120
+ """Iterate over spans in the trace."""
121
+ return iter(self.spans)