judgeval 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/common/api/api.py +38 -7
- judgeval/common/api/constants.py +9 -1
- judgeval/common/storage/s3_storage.py +2 -3
- judgeval/common/tracer/core.py +66 -30
- judgeval/common/tracer/otel_span_processor.py +4 -50
- judgeval/common/tracer/span_transformer.py +16 -10
- judgeval/common/utils.py +46 -38
- judgeval/data/example.py +9 -37
- judgeval/data/judgment_types.py +23 -44
- judgeval/data/result.py +8 -14
- judgeval/data/scripts/openapi_transform.py +5 -5
- judgeval/data/trace.py +3 -4
- judgeval/dataset.py +192 -0
- judgeval/evaluation_run.py +1 -0
- judgeval/judges/litellm_judge.py +2 -2
- judgeval/judges/mixture_of_judges.py +6 -6
- judgeval/judges/together_judge.py +4 -2
- judgeval/judgment_client.py +9 -71
- judgeval/run_evaluation.py +40 -8
- judgeval/scorers/score.py +11 -7
- judgeval/scorers/utils.py +3 -3
- judgeval/utils/file_utils.py +40 -25
- {judgeval-0.2.0.dist-info → judgeval-0.3.0.dist-info}/METADATA +10 -6
- {judgeval-0.2.0.dist-info → judgeval-0.3.0.dist-info}/RECORD +26 -28
- judgeval/data/datasets/__init__.py +0 -4
- judgeval/data/datasets/dataset.py +0 -341
- judgeval/data/datasets/eval_dataset_client.py +0 -214
- {judgeval-0.2.0.dist-info → judgeval-0.3.0.dist-info}/WHEEL +0 -0
- {judgeval-0.2.0.dist-info → judgeval-0.3.0.dist-info}/licenses/LICENSE.md +0 -0
judgeval/common/api/api.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Literal, List, Dict, Any
|
1
|
+
from typing import Literal, List, Dict, Any, Union
|
2
2
|
from requests import exceptions
|
3
3
|
from judgeval.common.api.constants import (
|
4
4
|
JUDGMENT_TRACES_FETCH_API_URL,
|
@@ -25,6 +25,8 @@ from judgeval.common.api.constants import (
|
|
25
25
|
JUDGMENT_SCORER_SAVE_API_URL,
|
26
26
|
JUDGMENT_SCORER_FETCH_API_URL,
|
27
27
|
JUDGMENT_SCORER_EXISTS_API_URL,
|
28
|
+
JUDGMENT_DATASETS_APPEND_TRACES_API_URL,
|
29
|
+
JUDGMENT_CHECK_EXAMPLE_KEYS_API_URL,
|
28
30
|
)
|
29
31
|
from judgeval.common.api.constants import (
|
30
32
|
TraceFetchPayload,
|
@@ -48,9 +50,12 @@ from judgeval.common.api.constants import (
|
|
48
50
|
ScorerSavePayload,
|
49
51
|
ScorerFetchPayload,
|
50
52
|
ScorerExistsPayload,
|
53
|
+
CheckExampleKeysPayload,
|
51
54
|
)
|
52
55
|
from judgeval.utils.requests import requests
|
53
56
|
|
57
|
+
import orjson
|
58
|
+
|
54
59
|
|
55
60
|
class JudgmentAPIException(exceptions.HTTPError):
|
56
61
|
"""
|
@@ -65,7 +70,7 @@ class JudgmentAPIException(exceptions.HTTPError):
|
|
65
70
|
self.request = request
|
66
71
|
|
67
72
|
@property
|
68
|
-
def status_code(self) -> int
|
73
|
+
def status_code(self) -> Union[int, None]:
|
69
74
|
"""Get the HTTP status code from the response."""
|
70
75
|
return self.response.status_code if self.response else None
|
71
76
|
|
@@ -114,8 +119,15 @@ class JudgmentApiClient:
|
|
114
119
|
try:
|
115
120
|
r.raise_for_status()
|
116
121
|
except exceptions.HTTPError as e:
|
122
|
+
try:
|
123
|
+
detail = r.json().get("detail", "")
|
124
|
+
except Exception:
|
125
|
+
detail = r.text
|
126
|
+
|
117
127
|
raise JudgmentAPIException(
|
118
|
-
f"HTTP {r.status_code}: {r.reason}
|
128
|
+
f"HTTP {r.status_code}: {r.reason}, {detail}",
|
129
|
+
response=r,
|
130
|
+
request=e.request,
|
119
131
|
)
|
120
132
|
|
121
133
|
return r.json()
|
@@ -218,6 +230,14 @@ class JudgmentApiClient:
|
|
218
230
|
}
|
219
231
|
return self._do_request("POST", JUDGMENT_EVAL_RUN_NAME_EXISTS_API_URL, payload)
|
220
232
|
|
233
|
+
def check_example_keys(self, keys: List[str], eval_name: str, project_name: str):
|
234
|
+
payload: CheckExampleKeysPayload = {
|
235
|
+
"keys": keys,
|
236
|
+
"eval_name": eval_name,
|
237
|
+
"project_name": project_name,
|
238
|
+
}
|
239
|
+
return self._do_request("POST", JUDGMENT_CHECK_EXAMPLE_KEYS_API_URL, payload)
|
240
|
+
|
221
241
|
def save_scorer(self, name: str, prompt: str, options: dict):
|
222
242
|
payload: ScorerSavePayload = {
|
223
243
|
"name": name,
|
@@ -279,7 +299,7 @@ class JudgmentApiClient:
|
|
279
299
|
project_name: str,
|
280
300
|
examples: List[Dict[str, Any]],
|
281
301
|
traces: List[Dict[str, Any]],
|
282
|
-
overwrite: bool,
|
302
|
+
overwrite: bool = False,
|
283
303
|
):
|
284
304
|
payload: DatasetPushPayload = {
|
285
305
|
"dataset_alias": dataset_alias,
|
@@ -302,6 +322,18 @@ class JudgmentApiClient:
|
|
302
322
|
"POST", JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL, payload
|
303
323
|
)
|
304
324
|
|
325
|
+
def append_traces(
|
326
|
+
self, dataset_alias: str, project_name: str, traces: List[Dict[str, Any]]
|
327
|
+
):
|
328
|
+
payload: DatasetAppendPayload = {
|
329
|
+
"dataset_alias": dataset_alias,
|
330
|
+
"project_name": project_name,
|
331
|
+
"traces": traces,
|
332
|
+
}
|
333
|
+
return self._do_request(
|
334
|
+
"POST", JUDGMENT_DATASETS_APPEND_TRACES_API_URL, payload
|
335
|
+
)
|
336
|
+
|
305
337
|
def pull_dataset(self, dataset_alias: str, project_name: str):
|
306
338
|
payload: DatasetPullPayload = {
|
307
339
|
"dataset_alias": dataset_alias,
|
@@ -347,6 +379,5 @@ class JudgmentApiClient:
|
|
347
379
|
except Exception as e:
|
348
380
|
return f"<Unserializable object of type {type(obj).__name__}: {e}>"
|
349
381
|
|
350
|
-
|
351
|
-
|
352
|
-
return json.dumps(data, default=fallback_encoder)
|
382
|
+
# orjson returns bytes, so we need to decode to str
|
383
|
+
return orjson.dumps(data, default=fallback_encoder).decode("utf-8")
|
judgeval/common/api/constants.py
CHANGED
@@ -51,6 +51,7 @@ JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = f"{ROOT_API}/add_to_run_eval_queue/"
|
|
51
51
|
JUDGMENT_GET_EVAL_STATUS_API_URL = f"{ROOT_API}/get_evaluation_status/"
|
52
52
|
JUDGMENT_CHECK_EXPERIMENT_TYPE_API_URL = f"{ROOT_API}/check_experiment_type/"
|
53
53
|
JUDGMENT_EVAL_RUN_NAME_EXISTS_API_URL = f"{ROOT_API}/eval-run-name-exists/"
|
54
|
+
JUDGMENT_CHECK_EXAMPLE_KEYS_API_URL = f"{ROOT_API}/check_example_keys/"
|
54
55
|
|
55
56
|
|
56
57
|
# Evaluation API Payloads
|
@@ -90,9 +91,16 @@ class EvalRunNameExistsPayload(TypedDict):
|
|
90
91
|
judgment_api_key: str
|
91
92
|
|
92
93
|
|
94
|
+
class CheckExampleKeysPayload(TypedDict):
|
95
|
+
keys: List[str]
|
96
|
+
eval_name: str
|
97
|
+
project_name: str
|
98
|
+
|
99
|
+
|
93
100
|
# Datasets API
|
94
101
|
JUDGMENT_DATASETS_PUSH_API_URL = f"{ROOT_API}/datasets/push/"
|
95
102
|
JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL = f"{ROOT_API}/datasets/insert_examples/"
|
103
|
+
JUDGMENT_DATASETS_APPEND_TRACES_API_URL = f"{ROOT_API}/traces/add_to_dataset/"
|
96
104
|
JUDGMENT_DATASETS_PULL_API_URL = f"{ROOT_API}/datasets/pull_for_judgeval/"
|
97
105
|
JUDGMENT_DATASETS_DELETE_API_URL = f"{ROOT_API}/datasets/delete/"
|
98
106
|
JUDGMENT_DATASETS_EXPORT_JSONL_API_URL = f"{ROOT_API}/datasets/export_jsonl/"
|
@@ -134,7 +142,7 @@ class DatasetStatsPayload(TypedDict):
|
|
134
142
|
|
135
143
|
|
136
144
|
# Projects API
|
137
|
-
JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/
|
145
|
+
JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete_from_judgeval"
|
138
146
|
JUDGMENT_PROJECT_CREATE_API_URL = f"{ROOT_API}/projects/add/"
|
139
147
|
|
140
148
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import os
|
2
|
-
import json
|
3
2
|
import boto3
|
3
|
+
import orjson
|
4
4
|
from typing import Optional
|
5
5
|
from datetime import datetime, UTC
|
6
6
|
from botocore.exceptions import ClientError
|
@@ -85,8 +85,7 @@ class S3Storage:
|
|
85
85
|
timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
|
86
86
|
s3_key = f"traces/{project_name}/{trace_id}_{timestamp}.json"
|
87
87
|
|
88
|
-
|
89
|
-
trace_json = json.dumps(trace_data)
|
88
|
+
trace_json = orjson.dumps(trace_data).decode("utf-8")
|
90
89
|
|
91
90
|
self.s3_client.put_object(
|
92
91
|
Bucket=self.bucket_name,
|
judgeval/common/tracer/core.py
CHANGED
@@ -32,6 +32,7 @@ from typing import (
|
|
32
32
|
)
|
33
33
|
import types
|
34
34
|
|
35
|
+
|
35
36
|
from judgeval.common.tracer.constants import _TRACE_FILEPATH_BLOCKLIST
|
36
37
|
|
37
38
|
from judgeval.common.tracer.otel_span_processor import JudgmentSpanProcessor
|
@@ -45,6 +46,7 @@ from openai.types.chat import ParsedChatCompletion
|
|
45
46
|
from together import Together, AsyncTogether
|
46
47
|
from anthropic import Anthropic, AsyncAnthropic
|
47
48
|
from google import genai
|
49
|
+
from groq import Groq, AsyncGroq
|
48
50
|
|
49
51
|
from judgeval.data import Example, Trace, TraceSpan, TraceUsage
|
50
52
|
from judgeval.scorers import APIScorerConfig, BaseScorer
|
@@ -67,6 +69,8 @@ ApiClient: TypeAlias = Union[
|
|
67
69
|
AsyncTogether,
|
68
70
|
genai.Client,
|
69
71
|
genai.client.AsyncClient,
|
72
|
+
Groq,
|
73
|
+
AsyncGroq,
|
70
74
|
]
|
71
75
|
SpanType: TypeAlias = str
|
72
76
|
|
@@ -79,7 +83,7 @@ class TraceClient:
|
|
79
83
|
tracer: Tracer,
|
80
84
|
trace_id: Optional[str] = None,
|
81
85
|
name: str = "default",
|
82
|
-
project_name: str
|
86
|
+
project_name: Union[str, None] = None,
|
83
87
|
enable_monitoring: bool = True,
|
84
88
|
enable_evaluations: bool = True,
|
85
89
|
parent_trace_id: Optional[str] = None,
|
@@ -850,9 +854,9 @@ class Tracer:
|
|
850
854
|
|
851
855
|
def __init__(
|
852
856
|
self,
|
853
|
-
api_key: str
|
854
|
-
organization_id: str
|
855
|
-
project_name: str
|
857
|
+
api_key: Union[str, None] = os.getenv("JUDGMENT_API_KEY"),
|
858
|
+
organization_id: Union[str, None] = os.getenv("JUDGMENT_ORG_ID"),
|
859
|
+
project_name: Union[str, None] = None,
|
856
860
|
deep_tracing: bool = False, # Deep tracing is disabled by default
|
857
861
|
enable_monitoring: bool = os.getenv("JUDGMENT_MONITORING", "true").lower()
|
858
862
|
== "true",
|
@@ -905,8 +909,8 @@ class Tracer:
|
|
905
909
|
self.class_identifiers: Dict[
|
906
910
|
str, str
|
907
911
|
] = {} # Dictionary to store class identifiers
|
908
|
-
self.span_id_to_previous_span_id: Dict[str, str
|
909
|
-
self.trace_id_to_previous_trace: Dict[str, TraceClient
|
912
|
+
self.span_id_to_previous_span_id: Dict[str, Union[str, None]] = {}
|
913
|
+
self.trace_id_to_previous_trace: Dict[str, Union[TraceClient, None]] = {}
|
910
914
|
self.current_span_id: Optional[str] = None
|
911
915
|
self.current_trace: Optional[TraceClient] = None
|
912
916
|
self.trace_across_async_contexts: bool = trace_across_async_contexts
|
@@ -958,7 +962,9 @@ class Tracer:
|
|
958
962
|
self.enable_monitoring = False
|
959
963
|
self.enable_evaluations = False
|
960
964
|
|
961
|
-
def set_current_span(
|
965
|
+
def set_current_span(
|
966
|
+
self, span_id: str
|
967
|
+
) -> Optional[contextvars.Token[Union[str, None]]]:
|
962
968
|
self.span_id_to_previous_span_id[span_id] = self.current_span_id
|
963
969
|
self.current_span_id = span_id
|
964
970
|
Tracer.current_span_id = span_id
|
@@ -981,7 +987,7 @@ class Tracer:
|
|
981
987
|
|
982
988
|
def reset_current_span(
|
983
989
|
self,
|
984
|
-
token: Optional[contextvars.Token[str
|
990
|
+
token: Optional[contextvars.Token[Union[str, None]]] = None,
|
985
991
|
span_id: Optional[str] = None,
|
986
992
|
):
|
987
993
|
try:
|
@@ -997,7 +1003,7 @@ class Tracer:
|
|
997
1003
|
|
998
1004
|
def set_current_trace(
|
999
1005
|
self, trace: TraceClient
|
1000
|
-
) -> Optional[contextvars.Token[TraceClient
|
1006
|
+
) -> Optional[contextvars.Token[Union[TraceClient, None]]]:
|
1001
1007
|
"""
|
1002
1008
|
Set the current trace context in contextvars
|
1003
1009
|
"""
|
@@ -1030,7 +1036,7 @@ class Tracer:
|
|
1030
1036
|
|
1031
1037
|
def reset_current_trace(
|
1032
1038
|
self,
|
1033
|
-
token: Optional[contextvars.Token[TraceClient
|
1039
|
+
token: Optional[contextvars.Token[Union[TraceClient, None]]] = None,
|
1034
1040
|
trace_id: Optional[str] = None,
|
1035
1041
|
):
|
1036
1042
|
try:
|
@@ -1046,7 +1052,7 @@ class Tracer:
|
|
1046
1052
|
|
1047
1053
|
@contextmanager
|
1048
1054
|
def trace(
|
1049
|
-
self, name: str, project_name: str
|
1055
|
+
self, name: str, project_name: Union[str, None] = None
|
1050
1056
|
) -> Generator[TraceClient, None, None]:
|
1051
1057
|
"""Start a new trace context using a context manager"""
|
1052
1058
|
trace_id = str(uuid.uuid4())
|
@@ -1692,25 +1698,31 @@ def wrap(
|
|
1692
1698
|
return wrapper
|
1693
1699
|
|
1694
1700
|
if isinstance(client, (OpenAI)):
|
1695
|
-
client.chat.completions
|
1696
|
-
client.responses
|
1697
|
-
client.beta.chat.completions
|
1701
|
+
setattr(client.chat.completions, "create", wrapped(original_create))
|
1702
|
+
setattr(client.responses, "create", wrapped(original_responses_create))
|
1703
|
+
setattr(client.beta.chat.completions, "parse", wrapped(original_beta_parse))
|
1698
1704
|
elif isinstance(client, (AsyncOpenAI)):
|
1699
|
-
client.chat.completions
|
1700
|
-
client.responses
|
1701
|
-
|
1705
|
+
setattr(client.chat.completions, "create", wrapped_async(original_create))
|
1706
|
+
setattr(client.responses, "create", wrapped_async(original_responses_create))
|
1707
|
+
setattr(
|
1708
|
+
client.beta.chat.completions, "parse", wrapped_async(original_beta_parse)
|
1709
|
+
)
|
1702
1710
|
elif isinstance(client, (Together)):
|
1703
|
-
client.chat.completions
|
1711
|
+
setattr(client.chat.completions, "create", wrapped(original_create))
|
1704
1712
|
elif isinstance(client, (AsyncTogether)):
|
1705
|
-
client.chat.completions
|
1713
|
+
setattr(client.chat.completions, "create", wrapped_async(original_create))
|
1706
1714
|
elif isinstance(client, (Anthropic)):
|
1707
|
-
client.messages
|
1715
|
+
setattr(client.messages, "create", wrapped(original_create))
|
1708
1716
|
elif isinstance(client, (AsyncAnthropic)):
|
1709
|
-
client.messages
|
1717
|
+
setattr(client.messages, "create", wrapped_async(original_create))
|
1710
1718
|
elif isinstance(client, (genai.Client)):
|
1711
|
-
client.models
|
1719
|
+
setattr(client.models, "generate_content", wrapped(original_create))
|
1712
1720
|
elif isinstance(client, (genai.client.AsyncClient)):
|
1713
|
-
client.models
|
1721
|
+
setattr(client.models, "generate_content", wrapped_async(original_create))
|
1722
|
+
elif isinstance(client, (Groq)):
|
1723
|
+
setattr(client.chat.completions, "create", wrapped(original_create))
|
1724
|
+
elif isinstance(client, (AsyncGroq)):
|
1725
|
+
setattr(client.chat.completions, "create", wrapped_async(original_create))
|
1714
1726
|
|
1715
1727
|
return client
|
1716
1728
|
|
@@ -1745,6 +1757,8 @@ def _get_client_config(
|
|
1745
1757
|
None,
|
1746
1758
|
client.beta.chat.completions.parse,
|
1747
1759
|
)
|
1760
|
+
elif isinstance(client, (Groq, AsyncGroq)):
|
1761
|
+
return "GROQ_API_CALL", client.chat.completions.create, None, None, None
|
1748
1762
|
elif isinstance(client, (Together, AsyncTogether)):
|
1749
1763
|
return "TOGETHER_API_CALL", client.chat.completions.create, None, None, None
|
1750
1764
|
elif isinstance(client, (Anthropic, AsyncAnthropic)):
|
@@ -1783,9 +1797,17 @@ def _format_output_data(
|
|
1783
1797
|
if isinstance(client, (OpenAI, AsyncOpenAI)):
|
1784
1798
|
if isinstance(response, ChatCompletion):
|
1785
1799
|
model_name = response.model
|
1786
|
-
prompt_tokens = response.usage.prompt_tokens
|
1787
|
-
completion_tokens =
|
1788
|
-
|
1800
|
+
prompt_tokens = response.usage.prompt_tokens if response.usage else 0
|
1801
|
+
completion_tokens = (
|
1802
|
+
response.usage.completion_tokens if response.usage else 0
|
1803
|
+
)
|
1804
|
+
cache_read_input_tokens = (
|
1805
|
+
response.usage.prompt_tokens_details.cached_tokens
|
1806
|
+
if response.usage
|
1807
|
+
and response.usage.prompt_tokens_details
|
1808
|
+
and response.usage.prompt_tokens_details.cached_tokens
|
1809
|
+
else 0
|
1810
|
+
)
|
1789
1811
|
|
1790
1812
|
if isinstance(response, ParsedChatCompletion):
|
1791
1813
|
message_content = response.choices[0].message.parsed
|
@@ -1793,10 +1815,19 @@ def _format_output_data(
|
|
1793
1815
|
message_content = response.choices[0].message.content
|
1794
1816
|
elif isinstance(response, Response):
|
1795
1817
|
model_name = response.model
|
1796
|
-
prompt_tokens = response.usage.input_tokens
|
1797
|
-
completion_tokens = response.usage.output_tokens
|
1798
|
-
cache_read_input_tokens =
|
1799
|
-
|
1818
|
+
prompt_tokens = response.usage.input_tokens if response.usage else 0
|
1819
|
+
completion_tokens = response.usage.output_tokens if response.usage else 0
|
1820
|
+
cache_read_input_tokens = (
|
1821
|
+
response.usage.input_tokens_details.cached_tokens
|
1822
|
+
if response.usage and response.usage.input_tokens_details
|
1823
|
+
else 0
|
1824
|
+
)
|
1825
|
+
if hasattr(response.output[0], "content"):
|
1826
|
+
message_content = "".join(
|
1827
|
+
seg.text
|
1828
|
+
for seg in response.output[0].content
|
1829
|
+
if hasattr(seg, "text")
|
1830
|
+
)
|
1800
1831
|
|
1801
1832
|
# Note: LiteLLM seems to use cache_read_input_tokens to calculate the cost for OpenAI
|
1802
1833
|
elif isinstance(client, (Together, AsyncTogether)):
|
@@ -1821,6 +1852,11 @@ def _format_output_data(
|
|
1821
1852
|
cache_read_input_tokens = response.usage.cache_read_input_tokens
|
1822
1853
|
cache_creation_input_tokens = response.usage.cache_creation_input_tokens
|
1823
1854
|
message_content = response.content[0].text
|
1855
|
+
elif isinstance(client, (Groq, AsyncGroq)):
|
1856
|
+
model_name = "groq/" + response.model
|
1857
|
+
prompt_tokens = response.usage.prompt_tokens
|
1858
|
+
completion_tokens = response.usage.completion_tokens
|
1859
|
+
message_content = response.choices[0].message.content
|
1824
1860
|
else:
|
1825
1861
|
judgeval_logger.warning(f"Unsupported client type: {type(client)}")
|
1826
1862
|
return None, None
|
@@ -11,11 +11,10 @@ import threading
|
|
11
11
|
from typing import Any, Dict, Optional
|
12
12
|
|
13
13
|
from opentelemetry.context import Context
|
14
|
-
from opentelemetry.sdk.trace import ReadableSpan
|
14
|
+
from opentelemetry.sdk.trace import ReadableSpan, Span
|
15
15
|
from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanProcessor
|
16
|
-
from opentelemetry.trace import
|
16
|
+
from opentelemetry.trace import Status, StatusCode, SpanContext, TraceFlags
|
17
17
|
from opentelemetry.trace.span import TraceState, INVALID_SPAN_CONTEXT
|
18
|
-
from opentelemetry.util.types import Attributes
|
19
18
|
|
20
19
|
from judgeval.common.logger import judgeval_logger
|
21
20
|
from judgeval.common.tracer.otel_exporter import JudgmentAPISpanExporter
|
@@ -51,8 +50,8 @@ class SimpleReadableSpan(ReadableSpan):
|
|
51
50
|
Status(StatusCode.ERROR) if trace_span.error else Status(StatusCode.OK)
|
52
51
|
)
|
53
52
|
|
54
|
-
self._attributes =
|
55
|
-
trace_span, span_state
|
53
|
+
self._attributes: Dict[str, Any] = (
|
54
|
+
SpanTransformer.trace_span_to_otel_attributes(trace_span, span_state)
|
56
55
|
)
|
57
56
|
|
58
57
|
try:
|
@@ -81,53 +80,8 @@ class SimpleReadableSpan(ReadableSpan):
|
|
81
80
|
self._parent: Optional[SpanContext] = None
|
82
81
|
self._events: list[Any] = []
|
83
82
|
self._links: list[Any] = []
|
84
|
-
self._resource: Optional[Any] = None
|
85
83
|
self._instrumentation_info: Optional[Any] = None
|
86
84
|
|
87
|
-
@property
|
88
|
-
def name(self) -> str:
|
89
|
-
return self._name
|
90
|
-
|
91
|
-
@property
|
92
|
-
def context(self) -> SpanContext:
|
93
|
-
return self._context
|
94
|
-
|
95
|
-
@property
|
96
|
-
def parent(self) -> Optional[SpanContext]:
|
97
|
-
return self._parent
|
98
|
-
|
99
|
-
@property
|
100
|
-
def start_time(self) -> Optional[int]:
|
101
|
-
return self._start_time
|
102
|
-
|
103
|
-
@property
|
104
|
-
def end_time(self) -> Optional[int]:
|
105
|
-
return self._end_time
|
106
|
-
|
107
|
-
@property
|
108
|
-
def status(self) -> Status:
|
109
|
-
return self._status
|
110
|
-
|
111
|
-
@property
|
112
|
-
def attributes(self) -> Optional[Attributes]:
|
113
|
-
return self._attributes
|
114
|
-
|
115
|
-
@property
|
116
|
-
def events(self):
|
117
|
-
return self._events
|
118
|
-
|
119
|
-
@property
|
120
|
-
def links(self):
|
121
|
-
return self._links
|
122
|
-
|
123
|
-
@property
|
124
|
-
def resource(self) -> Optional[Any]:
|
125
|
-
return self._resource
|
126
|
-
|
127
|
-
@property
|
128
|
-
def instrumentation_info(self) -> Optional[Any]:
|
129
|
-
return self._instrumentation_info
|
130
|
-
|
131
85
|
|
132
86
|
class JudgmentSpanProcessor(SpanProcessor, SpanProcessorBase):
|
133
87
|
"""
|
@@ -1,10 +1,10 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import json
|
4
3
|
import time
|
5
4
|
import uuid
|
5
|
+
import orjson
|
6
6
|
from datetime import datetime, timezone
|
7
|
-
from typing import Any, Dict, Optional, Union
|
7
|
+
from typing import Any, Dict, Mapping, Optional, Union
|
8
8
|
|
9
9
|
from opentelemetry.sdk.trace import ReadableSpan
|
10
10
|
from pydantic import BaseModel
|
@@ -16,11 +16,15 @@ from judgeval.evaluation_run import EvaluationRun
|
|
16
16
|
class SpanTransformer:
|
17
17
|
@staticmethod
|
18
18
|
def _needs_json_serialization(value: Any) -> bool:
|
19
|
+
"""
|
20
|
+
Check if the value needs JSON serialization.
|
21
|
+
Returns True if the value is complex and needs serialization.
|
22
|
+
"""
|
19
23
|
if value is None:
|
20
24
|
return False
|
21
25
|
|
22
|
-
|
23
|
-
if isinstance(value,
|
26
|
+
# Basic JSON-serializable types don't need serialization
|
27
|
+
if isinstance(value, (str, int, float, bool)):
|
24
28
|
return False
|
25
29
|
|
26
30
|
complex_types = (dict, list, tuple, set, BaseModel)
|
@@ -28,7 +32,7 @@ class SpanTransformer:
|
|
28
32
|
return True
|
29
33
|
|
30
34
|
try:
|
31
|
-
|
35
|
+
orjson.dumps(value)
|
32
36
|
return False
|
33
37
|
except (TypeError, ValueError):
|
34
38
|
return True
|
@@ -39,15 +43,15 @@ class SpanTransformer:
|
|
39
43
|
if obj is None:
|
40
44
|
return None
|
41
45
|
try:
|
42
|
-
return
|
46
|
+
return orjson.dumps(obj, default=str).decode("utf-8")
|
43
47
|
except Exception:
|
44
|
-
return
|
48
|
+
return orjson.dumps(str(obj)).decode("utf-8")
|
45
49
|
else:
|
46
50
|
if not isinstance(obj, str):
|
47
51
|
return obj
|
48
52
|
try:
|
49
|
-
return
|
50
|
-
except (
|
53
|
+
return orjson.loads(obj)
|
54
|
+
except (orjson.JSONDecodeError, TypeError, ValueError):
|
51
55
|
return obj
|
52
56
|
|
53
57
|
@staticmethod
|
@@ -99,7 +103,9 @@ class SpanTransformer:
|
|
99
103
|
return attributes
|
100
104
|
|
101
105
|
@staticmethod
|
102
|
-
def otel_attributes_to_judgment_data(
|
106
|
+
def otel_attributes_to_judgment_data(
|
107
|
+
attributes: Mapping[str, Any],
|
108
|
+
) -> Dict[str, Any]:
|
103
109
|
judgment_data: Dict[str, Any] = {}
|
104
110
|
|
105
111
|
for key, value in attributes.items():
|