judgeval 0.14.0__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
judgeval/api/__init__.py CHANGED
@@ -111,16 +111,6 @@ class JudgmentSyncClient:
111
111
  payload,
112
112
  )
113
113
 
114
- def get_evaluation_status(self, experiment_run_id: str, project_name: str) -> Any:
115
- query_params = {}
116
- query_params["experiment_run_id"] = experiment_run_id
117
- query_params["project_name"] = project_name
118
- return self._request(
119
- "GET",
120
- url_for("/get_evaluation_status/"),
121
- query_params,
122
- )
123
-
124
114
  def datasets_insert_examples_for_judgeval(
125
115
  self, payload: DatasetInsertExamples
126
116
  ) -> Any:
@@ -318,18 +308,6 @@ class JudgmentAsyncClient:
318
308
  payload,
319
309
  )
320
310
 
321
- async def get_evaluation_status(
322
- self, experiment_run_id: str, project_name: str
323
- ) -> Any:
324
- query_params = {}
325
- query_params["experiment_run_id"] = experiment_run_id
326
- query_params["project_name"] = project_name
327
- return await self._request(
328
- "GET",
329
- url_for("/get_evaluation_status/"),
330
- query_params,
331
- )
332
-
333
311
  async def datasets_insert_examples_for_judgeval(
334
312
  self, payload: DatasetInsertExamples
335
313
  ) -> Any:
judgeval/api/api_types.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: .openapi.json
3
- # timestamp: 2025-09-24T18:25:18+00:00
3
+ # timestamp: 2025-09-30T18:06:51+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
@@ -24,6 +24,15 @@ class DatasetsFetch(TypedDict):
24
24
  project_name: str
25
25
 
26
26
 
27
+ class DatasetsTableRow(TypedDict):
28
+ dataset_id: str
29
+ name: str
30
+ created_at: str
31
+ kind: Literal["trace", "example"]
32
+ entries: int
33
+ creator: str
34
+
35
+
27
36
  class ProjectAdd(TypedDict):
28
37
  project_name: str
29
38
 
@@ -54,6 +63,8 @@ class SavePromptScorerRequest(TypedDict):
54
63
  threshold: float
55
64
  model: NotRequired[str]
56
65
  is_trace: NotRequired[bool]
66
+ options: NotRequired[Optional[Dict[str, float]]]
67
+ description: NotRequired[Optional[str]]
57
68
 
58
69
 
59
70
  class SavePromptScorerResponse(TypedDict):
@@ -143,6 +154,8 @@ class PromptScorer(TypedDict):
143
154
  prompt: str
144
155
  threshold: float
145
156
  model: NotRequired[str]
157
+ options: NotRequired[Optional[Dict[str, float]]]
158
+ description: NotRequired[Optional[str]]
146
159
  created_at: NotRequired[Optional[str]]
147
160
  updated_at: NotRequired[Optional[str]]
148
161
  is_trace: NotRequired[Optional[bool]]
@@ -176,18 +189,10 @@ class OtelTraceSpan(TypedDict):
176
189
  resource_attributes: NotRequired[Optional[Dict[str, Any]]]
177
190
  span_attributes: NotRequired[Optional[Dict[str, Any]]]
178
191
  duration: NotRequired[Optional[int]]
179
- status_code: NotRequired[Optional[str]]
192
+ status_code: NotRequired[Optional[int]]
180
193
  status_message: NotRequired[Optional[str]]
181
194
  events: NotRequired[Optional[List[Dict[str, Any]]]]
182
195
  links: NotRequired[Optional[List[Dict[str, Any]]]]
183
- legacy_span_id: NotRequired[Optional[str]]
184
- inputs: NotRequired[Optional[Dict[str, Any]]]
185
- output: Any
186
- error: NotRequired[Optional[Dict[str, Any]]]
187
- agent_id: NotRequired[Optional[str]]
188
- cumulative_llm_cost: NotRequired[Optional[float]]
189
- state_after: NotRequired[Optional[Dict[str, Any]]]
190
- state_before: NotRequired[Optional[Dict[str, Any]]]
191
196
 
192
197
 
193
198
  class OtelSpanListItemScores(TypedDict):
@@ -202,7 +207,7 @@ class OtelSpanDetailScores(TypedDict):
202
207
  score: float
203
208
  reason: NotRequired[Optional[str]]
204
209
  name: str
205
- data: NotRequired[Optional[Dict[str, Any]]]
210
+ example_id: NotRequired[Optional[str]]
206
211
 
207
212
 
208
213
  class ExampleEvaluationRun(TypedDict):
@@ -240,15 +245,6 @@ class DatasetInsertExamples(TypedDict):
240
245
  project_name: str
241
246
 
242
247
 
243
- class DatasetInfo(TypedDict):
244
- dataset_id: str
245
- name: str
246
- created_at: str
247
- dataset_kind: DatasetKind
248
- entries: int
249
- creator: str
250
-
251
-
252
248
  class DatasetCreate(TypedDict):
253
249
  name: str
254
250
  dataset_kind: DatasetKind
@@ -275,14 +271,14 @@ class OtelTraceListItem(TypedDict):
275
271
  organization_id: str
276
272
  project_id: str
277
273
  trace_id: str
278
- timestamp: str
274
+ created_at: str
279
275
  duration: NotRequired[Optional[int]]
280
276
  has_notification: NotRequired[Optional[bool]]
281
277
  tags: NotRequired[Optional[List[str]]]
282
278
  experiment_run_id: NotRequired[Optional[str]]
283
279
  span_name: NotRequired[Optional[str]]
284
- cumulative_llm_cost: NotRequired[Optional[float]]
285
- error: NotRequired[Optional[Dict[str, Any]]]
280
+ llm_cost: NotRequired[Optional[float]]
281
+ error: NotRequired[str]
286
282
  scores: NotRequired[List[OtelSpanListItemScores]]
287
283
  customer_id: NotRequired[Optional[str]]
288
284
  input_preview: NotRequired[Optional[str]]
@@ -306,9 +302,9 @@ class OtelSpanDetail(TypedDict):
306
302
  resource_attributes: NotRequired[Optional[Dict[str, Any]]]
307
303
  span_attributes: NotRequired[Optional[Dict[str, Any]]]
308
304
  duration: NotRequired[Optional[int]]
309
- status_code: NotRequired[Optional[str]]
305
+ status_code: NotRequired[Optional[int]]
310
306
  status_message: NotRequired[Optional[str]]
311
- events: NotRequired[Optional[Union[List[Dict[str, Any]], Dict[str, Any]]]]
307
+ events: NotRequired[Optional[List[Dict[str, Any]]]]
312
308
  links: NotRequired[Optional[Union[List[Dict[str, Any]], Dict[str, Any]]]]
313
309
  llm_cost: NotRequired[Optional[float]]
314
310
  prompt_tokens: NotRequired[Optional[int]]
@@ -331,5 +327,5 @@ class DatasetReturn(TypedDict):
331
327
  name: str
332
328
  project_name: str
333
329
  dataset_kind: DatasetKind
334
- examples: NotRequired[Optional[List[Example]]]
330
+ examples: NotRequired[List[Example]]
335
331
  traces: NotRequired[Optional[List[DatasetTraceWithSpans]]]
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: .openapi.json
3
- # timestamp: 2025-09-24T18:25:17+00:00
3
+ # timestamp: 2025-09-30T18:06:50+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Annotated, Any, Dict, List, Optional, Union
@@ -26,6 +26,20 @@ class DatasetsFetch(BaseModel):
26
26
  project_name: Annotated[str, Field(title="Project Name")]
27
27
 
28
28
 
29
+ class Kind(Enum):
30
+ trace = "trace"
31
+ example = "example"
32
+
33
+
34
+ class DatasetsTableRow(BaseModel):
35
+ dataset_id: Annotated[str, Field(title="Dataset Id")]
36
+ name: Annotated[str, Field(title="Name")]
37
+ created_at: Annotated[str, Field(title="Created At")]
38
+ kind: Annotated[Kind, Field(title="Kind")]
39
+ entries: Annotated[int, Field(title="Entries")]
40
+ creator: Annotated[str, Field(title="Creator")]
41
+
42
+
29
43
  class ProjectAdd(BaseModel):
30
44
  project_name: Annotated[str, Field(title="Project Name")]
31
45
 
@@ -56,6 +70,8 @@ class SavePromptScorerRequest(BaseModel):
56
70
  threshold: Annotated[float, Field(title="Threshold")]
57
71
  model: Annotated[Optional[str], Field(title="Model")] = "gpt-5"
58
72
  is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
73
+ options: Annotated[Optional[Dict[str, float]], Field(title="Options")] = None
74
+ description: Annotated[Optional[str], Field(title="Description")] = None
59
75
 
60
76
 
61
77
  class SavePromptScorerResponse(BaseModel):
@@ -156,6 +172,8 @@ class PromptScorer(BaseModel):
156
172
  prompt: Annotated[str, Field(title="Prompt")]
157
173
  threshold: Annotated[float, Field(title="Threshold")]
158
174
  model: Annotated[Optional[str], Field(title="Model")] = "gpt-5"
175
+ options: Annotated[Optional[Dict[str, float]], Field(title="Options")] = None
176
+ description: Annotated[Optional[str], Field(title="Description")] = None
159
177
  created_at: Annotated[Optional[AwareDatetime], Field(title="Created At")] = None
160
178
  updated_at: Annotated[Optional[AwareDatetime], Field(title="Updated At")] = None
161
179
  is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
@@ -195,22 +213,10 @@ class OtelTraceSpan(BaseModel):
195
213
  Optional[Dict[str, Any]], Field(title="Span Attributes")
196
214
  ] = None
197
215
  duration: Annotated[Optional[int], Field(title="Duration")] = None
198
- status_code: Annotated[Optional[str], Field(title="Status Code")] = None
216
+ status_code: Annotated[Optional[int], Field(title="Status Code")] = None
199
217
  status_message: Annotated[Optional[str], Field(title="Status Message")] = None
200
218
  events: Annotated[Optional[List[Dict[str, Any]]], Field(title="Events")] = None
201
219
  links: Annotated[Optional[List[Dict[str, Any]]], Field(title="Links")] = None
202
- legacy_span_id: Annotated[Optional[str], Field(title="Legacy Span Id")] = None
203
- inputs: Annotated[Optional[Dict[str, Any]], Field(title="Inputs")] = None
204
- output: Annotated[Any, Field(title="Output")]
205
- error: Annotated[Optional[Dict[str, Any]], Field(title="Error")] = None
206
- agent_id: Annotated[Optional[str], Field(title="Agent Id")] = None
207
- cumulative_llm_cost: Annotated[
208
- Optional[float], Field(title="Cumulative Llm Cost")
209
- ] = None
210
- state_after: Annotated[Optional[Dict[str, Any]], Field(title="State After")] = None
211
- state_before: Annotated[Optional[Dict[str, Any]], Field(title="State Before")] = (
212
- None
213
- )
214
220
 
215
221
 
216
222
  class OtelSpanListItemScores(BaseModel):
@@ -225,7 +231,7 @@ class OtelSpanDetailScores(BaseModel):
225
231
  score: Annotated[float, Field(title="Score")]
226
232
  reason: Annotated[Optional[str], Field(title="Reason")] = None
227
233
  name: Annotated[str, Field(title="Name")]
228
- data: Annotated[Optional[Dict[str, Any]], Field(title="Data")] = None
234
+ example_id: Annotated[Optional[str], Field(title="Example Id")] = None
229
235
 
230
236
 
231
237
  class ExampleEvaluationRun(BaseModel):
@@ -273,15 +279,6 @@ class DatasetInsertExamples(BaseModel):
273
279
  project_name: Annotated[str, Field(title="Project Name")]
274
280
 
275
281
 
276
- class DatasetInfo(BaseModel):
277
- dataset_id: Annotated[str, Field(title="Dataset Id")]
278
- name: Annotated[str, Field(title="Name")]
279
- created_at: Annotated[str, Field(title="Created At")]
280
- dataset_kind: DatasetKind
281
- entries: Annotated[int, Field(title="Entries")]
282
- creator: Annotated[str, Field(title="Creator")]
283
-
284
-
285
282
  class DatasetCreate(BaseModel):
286
283
  name: Annotated[str, Field(title="Name")]
287
284
  dataset_kind: DatasetKind
@@ -310,16 +307,14 @@ class OtelTraceListItem(BaseModel):
310
307
  organization_id: Annotated[str, Field(title="Organization Id")]
311
308
  project_id: Annotated[str, Field(title="Project Id")]
312
309
  trace_id: Annotated[str, Field(title="Trace Id")]
313
- timestamp: Annotated[str, Field(title="Timestamp")]
310
+ created_at: Annotated[AwareDatetime, Field(title="Created At")]
314
311
  duration: Annotated[Optional[int], Field(title="Duration")] = None
315
312
  has_notification: Annotated[Optional[bool], Field(title="Has Notification")] = None
316
313
  tags: Annotated[Optional[List[str]], Field(title="Tags")] = None
317
314
  experiment_run_id: Annotated[Optional[str], Field(title="Experiment Run Id")] = None
318
315
  span_name: Annotated[Optional[str], Field(title="Span Name")] = None
319
- cumulative_llm_cost: Annotated[
320
- Optional[float], Field(title="Cumulative Llm Cost")
321
- ] = None
322
- error: Annotated[Optional[Dict[str, Any]], Field(title="Error")] = None
316
+ llm_cost: Annotated[Optional[float], Field(title="Llm Cost")] = None
317
+ error: Annotated[Optional[str], Field(title="Error")] = ""
323
318
  scores: Annotated[
324
319
  Optional[List[OtelSpanListItemScores]], Field(title="Scores")
325
320
  ] = []
@@ -334,7 +329,7 @@ class OtelTraceListItem(BaseModel):
334
329
  class OtelSpanDetail(BaseModel):
335
330
  organization_id: Annotated[str, Field(title="Organization Id")]
336
331
  project_id: Annotated[str, Field(title="Project Id")]
337
- timestamp: Annotated[str, Field(title="Timestamp")]
332
+ timestamp: Annotated[AwareDatetime, Field(title="Timestamp")]
338
333
  trace_id: Annotated[str, Field(title="Trace Id")]
339
334
  span_id: Annotated[str, Field(title="Span Id")]
340
335
  parent_span_id: Annotated[Optional[str], Field(title="Parent Span Id")] = None
@@ -349,11 +344,9 @@ class OtelSpanDetail(BaseModel):
349
344
  Optional[Dict[str, Any]], Field(title="Span Attributes")
350
345
  ] = None
351
346
  duration: Annotated[Optional[int], Field(title="Duration")] = None
352
- status_code: Annotated[Optional[str], Field(title="Status Code")] = None
347
+ status_code: Annotated[Optional[int], Field(title="Status Code")] = None
353
348
  status_message: Annotated[Optional[str], Field(title="Status Message")] = None
354
- events: Annotated[
355
- Optional[Union[List[Dict[str, Any]], Dict[str, Any]]], Field(title="Events")
356
- ] = None
349
+ events: Annotated[Optional[List[Dict[str, Any]]], Field(title="Events")] = None
357
350
  links: Annotated[
358
351
  Optional[Union[List[Dict[str, Any]], Dict[str, Any]]], Field(title="Links")
359
352
  ] = None
@@ -20,7 +20,7 @@ class DatasetInfo:
20
20
  dataset_id: str
21
21
  name: str
22
22
  created_at: str
23
- dataset_kind: DatasetKind
23
+ kind: DatasetKind
24
24
  entries: int
25
25
  creator: str
26
26
 
@@ -84,7 +84,7 @@ def log_evaluation_results(
84
84
 
85
85
  def _poll_evaluation_until_complete(
86
86
  evaluation_run: ExampleEvaluationRun,
87
- expected_scorer_data_count: int,
87
+ expected_examples_count: int,
88
88
  poll_interval_seconds: float = 5,
89
89
  max_failures: int = 5,
90
90
  max_poll_count: int = 60, # This should be equivalent to 5 minutes
@@ -117,29 +117,22 @@ def _poll_evaluation_until_complete(
117
117
  poll_count += 1
118
118
  try:
119
119
  # Check status
120
- status_response = api_client.get_evaluation_status(
121
- experiment_run_id, project_name
122
- )
123
-
124
- if status_response.get("status") != "completed":
125
- time.sleep(poll_interval_seconds)
126
- continue
127
-
128
- example_scorer_pairings = status_response.get("results", [])
129
- if len(example_scorer_pairings) != expected_scorer_data_count:
130
- time.sleep(poll_interval_seconds)
131
- continue
132
-
133
120
  results_response = api_client.fetch_experiment_run(
134
121
  {
135
122
  "experiment_run_id": experiment_run_id,
136
123
  "project_name": project_name,
137
124
  }
138
125
  )
126
+
127
+ example_scorer_pairings = results_response.get("results", [])
128
+ if len(example_scorer_pairings) != expected_examples_count:
129
+ time.sleep(poll_interval_seconds)
130
+ continue
131
+
139
132
  url = results_response.get("ui_results_url")
140
133
 
141
134
  scoring_result_list = []
142
- for res in results_response.get("results", []):
135
+ for res in example_scorer_pairings:
143
136
  example = res.get("data", {}).copy()
144
137
  example["example_id"] = res.get("example_id")
145
138
  scoring_result = ScoringResult(
@@ -241,14 +234,9 @@ def run_eval(
241
234
  )
242
235
  raise JudgmentRuntimeError(error_message)
243
236
 
244
- num_scorers = (
245
- len(evaluation_run.judgment_scorers)
246
- if evaluation_run.judgment_scorers
247
- else sum(1 for cs in evaluation_run.custom_scorers if cs.server_hosted)
248
- )
249
237
  results, url = _poll_evaluation_until_complete(
250
238
  evaluation_run=evaluation_run,
251
- expected_scorer_data_count=(num_scorers * len(evaluation_run.examples)),
239
+ expected_examples_count=len(evaluation_run.examples),
252
240
  )
253
241
  finally:
254
242
  stop_event.set()
@@ -0,0 +1,50 @@
1
+ from abc import ABC
2
+ from judgeval.tracer import Tracer
3
+ from judgeval.logger import judgeval_logger
4
+ from judgeval.utils.url import url_for
5
+
6
+
7
+ try:
8
+ import openlit # type: ignore
9
+ except ImportError:
10
+ raise ImportError(
11
+ "Openlit is not installed and required for the openlit integration. Please install it with `pip install openlit`."
12
+ )
13
+
14
+
15
+ class Openlit(ABC):
16
+ @staticmethod
17
+ def initialize(
18
+ **kwargs,
19
+ ):
20
+ tracer = Tracer.get_instance()
21
+ if not tracer or not tracer._initialized:
22
+ raise ValueError(
23
+ "Openlit must be initialized after the tracer has been initialized. Please create the Tracer instance first before initializing Openlit."
24
+ )
25
+
26
+ api_key = tracer.api_key
27
+ organization_id = tracer.organization_id
28
+ project_name = tracer.project_name
29
+
30
+ project_id = Tracer._resolve_project_id(project_name, api_key, organization_id)
31
+ if not project_id:
32
+ judgeval_logger.warning(
33
+ f"Project {project_name} not found. Please create it first at https://app.judgmentlabs.ai/org/{organization_id}/projects."
34
+ )
35
+ return
36
+
37
+ openlit.init(
38
+ service_name=project_name,
39
+ otlp_endpoint=url_for("/otel"),
40
+ otlp_headers={
41
+ "Authorization": f"Bearer {api_key}",
42
+ "X-Organization-Id": organization_id,
43
+ "X-Project-Id": project_id,
44
+ },
45
+ tracer=tracer.get_tracer(),
46
+ **kwargs,
47
+ )
48
+
49
+
50
+ __all__ = ["Openlit"]
@@ -4,20 +4,23 @@ from judgeval.scorers.api_scorer import (
4
4
  TraceAPIScorerConfig,
5
5
  )
6
6
  from judgeval.constants import APIScorerType
7
- from typing import Dict, Any
7
+ from typing import Dict, Any, Optional
8
8
  from judgeval.api import JudgmentSyncClient
9
9
  from judgeval.exceptions import JudgmentAPIError
10
10
  import os
11
11
  from judgeval.logger import judgeval_logger
12
12
  from abc import ABC
13
13
  from judgeval.env import JUDGMENT_DEFAULT_GPT_MODEL
14
+ from copy import copy
14
15
 
15
16
 
16
17
  def push_prompt_scorer(
17
18
  name: str,
18
19
  prompt: str,
19
20
  threshold: float,
21
+ options: Optional[Dict[str, float]] = None,
20
22
  model: str = JUDGMENT_DEFAULT_GPT_MODEL,
23
+ description: Optional[str] = None,
21
24
  judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
22
25
  organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
23
26
  is_trace: bool = False,
@@ -29,7 +32,9 @@ def push_prompt_scorer(
29
32
  "name": name,
30
33
  "prompt": prompt,
31
34
  "threshold": threshold,
35
+ "options": options,
32
36
  "model": model,
37
+ "description": description,
33
38
  "is_trace": is_trace,
34
39
  }
35
40
  )
@@ -98,6 +103,8 @@ def scorer_exists(
98
103
  class BasePromptScorer(ABC, APIScorerConfig):
99
104
  score_type: APIScorerType
100
105
  prompt: str
106
+ options: Optional[Dict[str, float]] = None
107
+ description: Optional[str] = None
101
108
  judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or ""
102
109
  organization_id: str = os.getenv("JUDGMENT_ORG_ID") or ""
103
110
 
@@ -124,7 +131,9 @@ class BasePromptScorer(ABC, APIScorerConfig):
124
131
  name=name,
125
132
  prompt=scorer_config["prompt"],
126
133
  threshold=scorer_config["threshold"],
134
+ options=scorer_config.get("options"),
127
135
  model=scorer_config.get("model"),
136
+ description=scorer_config.get("description"),
128
137
  judgment_api_key=judgment_api_key,
129
138
  organization_id=organization_id,
130
139
  )
@@ -135,7 +144,9 @@ class BasePromptScorer(ABC, APIScorerConfig):
135
144
  name: str,
136
145
  prompt: str,
137
146
  threshold: float = 0.5,
147
+ options: Optional[Dict[str, float]] = None,
138
148
  model: str = JUDGMENT_DEFAULT_GPT_MODEL,
149
+ description: Optional[str] = None,
139
150
  judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
140
151
  organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
141
152
  ):
@@ -150,7 +161,9 @@ class BasePromptScorer(ABC, APIScorerConfig):
150
161
  name,
151
162
  prompt,
152
163
  threshold,
164
+ options,
153
165
  model,
166
+ description,
154
167
  judgment_api_key,
155
168
  organization_id,
156
169
  is_trace,
@@ -161,7 +174,9 @@ class BasePromptScorer(ABC, APIScorerConfig):
161
174
  name=name,
162
175
  prompt=prompt,
163
176
  threshold=threshold,
177
+ options=options,
164
178
  model=model,
179
+ description=description,
165
180
  judgment_api_key=judgment_api_key,
166
181
  organization_id=organization_id,
167
182
  )
@@ -199,6 +214,22 @@ class BasePromptScorer(ABC, APIScorerConfig):
199
214
  self.push_prompt_scorer()
200
215
  judgeval_logger.info(f"Successfully updated model for {self.name}")
201
216
 
217
+ def set_options(self, options: Optional[Dict[str, float]]):
218
+ """
219
+ Updates the options of the scorer.
220
+ """
221
+ self.options = options
222
+ self.push_prompt_scorer()
223
+ judgeval_logger.info(f"Successfully updated options for {self.name}")
224
+
225
+ def set_description(self, description: Optional[str]):
226
+ """
227
+ Updates the description of the scorer.
228
+ """
229
+ self.description = description
230
+ self.push_prompt_scorer()
231
+ judgeval_logger.info(f"Successfully updated description for {self.name}")
232
+
202
233
  def append_to_prompt(self, prompt_addition: str):
203
234
  """
204
235
  Appends a string to the prompt.
@@ -226,7 +257,19 @@ class BasePromptScorer(ABC, APIScorerConfig):
226
257
  """
227
258
  return self.model
228
259
 
229
- def get_name(self) -> str | None:
260
+ def get_options(self) -> Dict[str, float] | None:
261
+ """
262
+ Returns the options of the scorer.
263
+ """
264
+ return copy(self.options) if self.options is not None else None
265
+
266
+ def get_description(self) -> str | None:
267
+ """
268
+ Returns the description of the scorer.
269
+ """
270
+ return self.description
271
+
272
+ def get_name(self) -> str:
230
273
  """
231
274
  Returns the name of the scorer.
232
275
  """
@@ -241,6 +284,8 @@ class BasePromptScorer(ABC, APIScorerConfig):
241
284
  "model": self.model,
242
285
  "prompt": self.prompt,
243
286
  "threshold": self.threshold,
287
+ "options": self.options,
288
+ "description": self.description,
244
289
  }
245
290
 
246
291
  def push_prompt_scorer(self):
@@ -251,14 +296,16 @@ class BasePromptScorer(ABC, APIScorerConfig):
251
296
  self.name,
252
297
  self.prompt,
253
298
  self.threshold,
299
+ self.options,
254
300
  self.model,
301
+ self.description,
255
302
  self.judgment_api_key,
256
303
  self.organization_id,
257
304
  isinstance(self, TracePromptScorer),
258
305
  )
259
306
 
260
307
  def __str__(self):
261
- return f"PromptScorer(name={self.name}, model={self.model}, prompt={self.prompt}, threshold={self.threshold})"
308
+ return f"PromptScorer(name={self.name}, model={self.model}, prompt={self.prompt}, threshold={self.threshold}, options={self.options}, description={self.description})"
262
309
 
263
310
  def model_dump(self, *args, **kwargs) -> Dict[str, Any]:
264
311
  base = super().model_dump(*args, **kwargs)
@@ -255,6 +255,10 @@ class Tracer(metaclass=SingletonMeta):
255
255
  def get_current_agent_context(self):
256
256
  return self.agent_context
257
257
 
258
+ def get_span_processor(self) -> JudgmentSpanProcessor:
259
+ """Get the internal span processor of this tracer instance."""
260
+ return self.judgment_processor
261
+
258
262
  def set_customer_id(self, customer_id: str) -> None:
259
263
  span = self.get_current_span()
260
264
  if span and span.is_recording():
judgeval/version.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.14.0"
1
+ __version__ = "0.15.0"
2
2
 
3
3
 
4
4
  def get_version() -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.14.0
3
+ Version: 0.15.0
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -4,22 +4,23 @@ judgeval/constants.py,sha256=JZZJ1MqzZZDVk-5PRPRbmLnM8mXI-RDL5vxa1JFuscs,3408
4
4
  judgeval/env.py,sha256=37Mn4g0OkpFxXCZGlO_CLqKJnyX-jx_R24tC28XJzig,2112
5
5
  judgeval/exceptions.py,sha256=tTbfe4yoOtPXmn22UQz9-6a-5PT9uOko85xaRRwr0Sw,621
6
6
  judgeval/logger.py,sha256=ZWbp0QfT1CJnQIjV-Zle4n489nFCKEmD2-ukx--iiow,1553
7
- judgeval/version.py,sha256=qFLvC9WMqiLgjMPdAqGbBm-v0rO2ePVeM8dvl-2r7WA,74
7
+ judgeval/version.py,sha256=1a6hS0-ubylneLxq8Pt0EqBRx0hSP1cO9JKaTmHazfo,74
8
8
  judgeval/warnings.py,sha256=LbGte14ppiFjrkp-JJYueZ40NWFvMkWRvPXr6r-fUWw,73
9
- judgeval/api/__init__.py,sha256=3Pm0qQ4ZQj76jUsJVrnuazRnYcqF3pzM_Wv_Z6lOv0w,13216
10
- judgeval/api/api_types.py,sha256=w45Rh2I31JaqWoUsxCK5_rr_wJ1QkWYxDLjRppXkruE,8785
9
+ judgeval/api/__init__.py,sha256=_oDuEDBDmyPQkdfvWebvBSvrnlzg4vreETpt16frXEA,12468
10
+ judgeval/api/api_types.py,sha256=hpUpVRCLIGF-lHHg1gIgdTaRfwS94Vh1E23vU9Z34js,8555
11
11
  judgeval/data/__init__.py,sha256=1tU0EN0ThIfQ1fad5I3dKxAfTcZ5U8cvTLcQ6qLVLU0,407
12
12
  judgeval/data/evaluation_run.py,sha256=O41p99wNAuCAf6lsLNKzkZ6W-kL9LlzCYxVls7IcKkA,4727
13
13
  judgeval/data/example.py,sha256=eGJpF-lyUH734Cg90B7WtU9f8iKoS3VFGeV6R-GVCCc,1039
14
- judgeval/data/judgment_types.py,sha256=rZ4Uq6Va94bHF0Obn3wKB7E5n77R5N5WTkK5o02EAzg,16285
14
+ judgeval/data/judgment_types.py,sha256=u45rfHEtUNzXSQstJ4TcOo-yX9cZymma5W0hTtb5u34,15965
15
15
  judgeval/data/result.py,sha256=XufFGSAkBDfevPUmzSgsR9HEqytISkM0U5HkhJmsjpY,2102
16
16
  judgeval/data/scorer_data.py,sha256=HeP15ZgftFTJCF8JmDJCLWXRnZJIaGDJCzl7Hg6gWwE,2006
17
17
  judgeval/data/trace.py,sha256=zSiR3o6xt8Z46XA3M9fJBtViF0BsPO6yKp9jxdscOSc,3881
18
18
  judgeval/data/scripts/fix_default_factory.py,sha256=lvp2JwYZqz-XpD9LZNa3mANZVP-jJSZoNzolI6JWERM,591
19
19
  judgeval/data/scripts/openapi_transform.py,sha256=Sm04JClzyP1ga8KA3gkIdsae8Hlx-XU7-x0gHCQYOhg,3877
20
- judgeval/dataset/__init__.py,sha256=4CiV7jQUiJ8_IXnD_E-vS5OfoEr0hghBe3-OSuVoBwE,8277
21
- judgeval/evaluation/__init__.py,sha256=6bSC1Sw-fpJN6OkZTv4UtAoYZqkjUy7OG17lxiRX5qE,13321
20
+ judgeval/dataset/__init__.py,sha256=kL0_tIMP3qV6t4W17HQU91ybdXMZ5iDZzyUKzyfRdyY,8269
21
+ judgeval/evaluation/__init__.py,sha256=WcqOgQdwgtc_BwEwDz6RDlF2RczyLrNjjIevQp-_NKE,12788
22
22
  judgeval/integrations/langgraph/__init__.py,sha256=HwXmtDxaO75Kn4KPErnMb6Ne6FcpRxV_SCYVuwFsve0,332
23
+ judgeval/integrations/openlit/__init__.py,sha256=-8D4D6-fGsWPwoOojw82OaE9X5sUbmb16x1bF-WfOmg,1571
23
24
  judgeval/judges/__init__.py,sha256=e7JnTc1TG_SwqydDHTXHIP0EBazQxt-ydMQG7ghSU5A,228
24
25
  judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
25
26
  judgeval/judges/litellm_judge.py,sha256=5vEF0IUo7HVWnOF2ww-DMke8Xkarnz32B_qbgKjc0-I,4182
@@ -39,8 +40,8 @@ judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=WUeFy
39
40
  judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=ciiFBQQC4UDsk9qou9OiKbAR31s82eRUY1ZTt1gdM-0,407
40
41
  judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ucYOI6ztAjfoYmcgTDzN8u5RrehlVqrkeLEfss9b1fk,441
41
42
  judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=V3RdrWhnR_vLBrtWw7QbgN9K_A-Och7-v9I2fN4z8gY,506
42
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=UiX-qUVokx8W3EIr5i4_H6CGQT2pNBEYWSCtTUx1wLE,9196
43
- judgeval/tracer/__init__.py,sha256=iqFvWok4QBW-1bs2zCmkhw4Y_o2d2mVeiPUtQbG9Nvc,35995
43
+ judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=zJ0n3HyZ1FFBnMnTYxBi37m_3Er7ENd4HpqLjNi5Eag,10902
44
+ judgeval/tracer/__init__.py,sha256=uIOx-2P_FVwBKhwVkkIOyEQCv3gouCZ2I8-eApocnKU,36165
44
45
  judgeval/tracer/constants.py,sha256=ae8tivAW97awJQxdRB9OMqX50wOLX3zqChT_AGkPBu0,85
45
46
  judgeval/tracer/keys.py,sha256=ho4-_w4ngTVejdSKUH80sG6vtYt4c7FEKrYpFrDfPLs,2105
46
47
  judgeval/tracer/local_eval_queue.py,sha256=KZKvSSli7B-EVzdHa4-CmXUpv0uOjGLLRa2KTPg8lRc,7320
@@ -72,8 +73,8 @@ judgeval/utils/serialize.py,sha256=QXR-8Nj5rqOrI9zLx0oRLdk6DW6Bc7j8eyF4zQ7PLxA,6
72
73
  judgeval/utils/testing.py,sha256=m5Nexv65tmfSj1XvAPK5Ear7aJ7w5xjDtZN0tLZ_RBk,2939
73
74
  judgeval/utils/url.py,sha256=Shf0v3XcbaWpL0m1eGJEEO_z4TsQCnDB2Rl25OTUmiI,195
74
75
  judgeval/utils/version_check.py,sha256=ylZQSqV7kLzEOChxvav9SCHUU4OnaCp36tXHLjdzmw0,1072
75
- judgeval-0.14.0.dist-info/METADATA,sha256=PN_KgV2HavJDSdkoMAnusgLD1kiKNOfXMyWEVFyYx2s,8564
76
- judgeval-0.14.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
77
- judgeval-0.14.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
78
- judgeval-0.14.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
79
- judgeval-0.14.0.dist-info/RECORD,,
76
+ judgeval-0.15.0.dist-info/METADATA,sha256=MT857VBF8qoWXiCu_NyK_JCBcrddN1kCSWxDd58D3g0,8564
77
+ judgeval-0.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
78
+ judgeval-0.15.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
79
+ judgeval-0.15.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
80
+ judgeval-0.15.0.dist-info/RECORD,,