judgeval 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/__init__.py +3 -1
- judgeval/common/tracer.py +352 -117
- judgeval/constants.py +5 -3
- judgeval/data/__init__.py +4 -0
- judgeval/data/custom_example.py +18 -0
- judgeval/data/datasets/dataset.py +5 -1
- judgeval/data/datasets/eval_dataset_client.py +64 -5
- judgeval/data/example.py +1 -0
- judgeval/data/result.py +7 -6
- judgeval/data/sequence.py +55 -0
- judgeval/data/sequence_run.py +44 -0
- judgeval/evaluation_run.py +12 -7
- judgeval/integrations/langgraph.py +89 -72
- judgeval/judgment_client.py +70 -68
- judgeval/run_evaluation.py +87 -13
- judgeval/scorers/__init__.py +2 -0
- judgeval/scorers/judgeval_scorer.py +3 -0
- judgeval/scorers/judgeval_scorers/__init__.py +7 -0
- judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -1
- judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +21 -0
- judgeval/scorers/score.py +6 -5
- judgeval/version_check.py +22 -0
- {judgeval-0.0.30.dist-info → judgeval-0.0.32.dist-info}/METADATA +1 -1
- {judgeval-0.0.30.dist-info → judgeval-0.0.32.dist-info}/RECORD +26 -22
- judgeval/data/custom_api_example.py +0 -91
- {judgeval-0.0.30.dist-info → judgeval-0.0.32.dist-info}/WHEEL +0 -0
- {judgeval-0.0.30.dist-info → judgeval-0.0.32.dist-info}/licenses/LICENSE.md +0 -0
judgeval/constants.py
CHANGED
@@ -26,7 +26,8 @@ class APIScorer(str, Enum):
|
|
26
26
|
JSON_CORRECTNESS = "json_correctness"
|
27
27
|
COMPARISON = "comparison"
|
28
28
|
GROUNDEDNESS = "groundedness"
|
29
|
-
|
29
|
+
DERAILMENT = "derailment"
|
30
|
+
|
30
31
|
@classmethod
|
31
32
|
def _missing_(cls, value):
|
32
33
|
# Handle case-insensitive lookup
|
@@ -39,8 +40,10 @@ UNBOUNDED_SCORERS = set([APIScorer.COMPARISON]) # scorers whose scores are not
|
|
39
40
|
ROOT_API = os.getenv("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
|
40
41
|
# API URLs
|
41
42
|
JUDGMENT_EVAL_API_URL = f"{ROOT_API}/evaluate/"
|
43
|
+
JUDGMENT_SEQUENCE_EVAL_API_URL = f"{ROOT_API}/evaluate_sequence/"
|
42
44
|
JUDGMENT_DATASETS_PUSH_API_URL = f"{ROOT_API}/datasets/push/"
|
43
|
-
|
45
|
+
JUDGMENT_DATASETS_APPEND_API_URL = f"{ROOT_API}/datasets/insert_examples/"
|
46
|
+
JUDGMENT_DATASETS_PULL_API_URL = f"{ROOT_API}/datasets/pull_for_judgeval/"
|
44
47
|
JUDGMENT_DATASETS_DELETE_API_URL = f"{ROOT_API}/datasets/delete/"
|
45
48
|
JUDGMENT_DATASETS_EXPORT_JSONL_API_URL = f"{ROOT_API}/datasets/export_jsonl/"
|
46
49
|
JUDGMENT_DATASETS_PROJECT_STATS_API_URL = f"{ROOT_API}/datasets/fetch_stats_by_project/"
|
@@ -54,7 +57,6 @@ JUDGMENT_PROJECT_CREATE_API_URL = f"{ROOT_API}/projects/add/"
|
|
54
57
|
JUDGMENT_TRACES_FETCH_API_URL = f"{ROOT_API}/traces/fetch/"
|
55
58
|
JUDGMENT_TRACES_SAVE_API_URL = f"{ROOT_API}/traces/save/"
|
56
59
|
JUDGMENT_TRACES_DELETE_API_URL = f"{ROOT_API}/traces/delete/"
|
57
|
-
JUDGMENT_TRACES_ADD_TO_EVAL_QUEUE_API_URL = f"{ROOT_API}/traces/add_to_trace_eval_queue/"
|
58
60
|
JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = f"{ROOT_API}/add_to_run_eval_queue/"
|
59
61
|
# RabbitMQ
|
60
62
|
RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "rabbitmq-networklb-faa155df16ec9085.elb.us-west-1.amazonaws.com")
|
judgeval/data/__init__.py
CHANGED
@@ -1,12 +1,16 @@
|
|
1
1
|
from judgeval.data.example import Example, ExampleParams
|
2
|
+
from judgeval.data.custom_example import CustomExample
|
2
3
|
from judgeval.data.scorer_data import ScorerData, create_scorer_data
|
3
4
|
from judgeval.data.result import ScoringResult, generate_scoring_result
|
5
|
+
from judgeval.data.sequence import Sequence
|
4
6
|
|
5
7
|
__all__ = [
|
6
8
|
"Example",
|
7
9
|
"ExampleParams",
|
10
|
+
"CustomExample",
|
8
11
|
"ScorerData",
|
9
12
|
"create_scorer_data",
|
10
13
|
"ScoringResult",
|
11
14
|
"generate_scoring_result",
|
15
|
+
"Sequence",
|
12
16
|
]
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from pydantic import BaseModel, Field
|
2
|
+
from typing import Optional, Union, List, Dict, Any
|
3
|
+
from uuid import uuid4
|
4
|
+
|
5
|
+
class CustomExample(BaseModel):
|
6
|
+
input: Optional[Dict[str, Any]] = None
|
7
|
+
actual_output: Optional[Dict[str, Any]] = None
|
8
|
+
expected_output: Optional[Dict[str, Any]] = None
|
9
|
+
context: Optional[List[str]] = None
|
10
|
+
retrieval_context: Optional[List[str]] = None
|
11
|
+
additional_metadata: Optional[Dict[str, Any]] = None
|
12
|
+
tools_called: Optional[List[str]] = None
|
13
|
+
expected_tools: Optional[List[str]] = None
|
14
|
+
name: Optional[str] = None
|
15
|
+
example_id: str = Field(default_factory=lambda: str(uuid4()))
|
16
|
+
example_index: Optional[int] = None
|
17
|
+
timestamp: Optional[str] = None
|
18
|
+
trace_id: Optional[str] = None
|
@@ -7,12 +7,13 @@ import yaml
|
|
7
7
|
from dataclasses import dataclass, field
|
8
8
|
from typing import List, Union, Literal
|
9
9
|
|
10
|
-
from judgeval.data import Example
|
10
|
+
from judgeval.data import Example, Sequence
|
11
11
|
from judgeval.common.logger import debug, error, warning, info
|
12
12
|
|
13
13
|
@dataclass
|
14
14
|
class EvalDataset:
|
15
15
|
examples: List[Example]
|
16
|
+
sequences: List[Sequence]
|
16
17
|
_alias: Union[str, None] = field(default=None)
|
17
18
|
_id: Union[str, None] = field(default=None)
|
18
19
|
judgment_api_key: str = field(default="")
|
@@ -21,11 +22,13 @@ class EvalDataset:
|
|
21
22
|
judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"),
|
22
23
|
organization_id: str = os.getenv("JUDGMENT_ORG_ID"),
|
23
24
|
examples: List[Example] = [],
|
25
|
+
sequences: List[Sequence] = []
|
24
26
|
):
|
25
27
|
debug(f"Initializing EvalDataset with {len(examples)} examples")
|
26
28
|
if not judgment_api_key:
|
27
29
|
warning("No judgment_api_key provided")
|
28
30
|
self.examples = examples
|
31
|
+
self.sequences = sequences
|
29
32
|
self._alias = None
|
30
33
|
self._id = None
|
31
34
|
self.judgment_api_key = judgment_api_key
|
@@ -309,6 +312,7 @@ class EvalDataset:
|
|
309
312
|
return (
|
310
313
|
f"{self.__class__.__name__}("
|
311
314
|
f"examples={self.examples}, "
|
315
|
+
f"sequences={self.sequences}, "
|
312
316
|
f"_alias={self._alias}, "
|
313
317
|
f"_id={self._id}"
|
314
318
|
f")"
|
@@ -6,13 +6,14 @@ from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
6
6
|
from judgeval.common.logger import debug, error, warning, info
|
7
7
|
from judgeval.constants import (
|
8
8
|
JUDGMENT_DATASETS_PUSH_API_URL,
|
9
|
+
JUDGMENT_DATASETS_APPEND_API_URL,
|
9
10
|
JUDGMENT_DATASETS_PULL_API_URL,
|
10
11
|
JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
|
11
12
|
JUDGMENT_DATASETS_DELETE_API_URL,
|
12
13
|
JUDGMENT_DATASETS_INSERT_API_URL,
|
13
14
|
JUDGMENT_DATASETS_EXPORT_JSONL_API_URL
|
14
15
|
)
|
15
|
-
from judgeval.data import Example
|
16
|
+
from judgeval.data import Example, Sequence
|
16
17
|
from judgeval.data.datasets import EvalDataset
|
17
18
|
|
18
19
|
|
@@ -70,9 +71,9 @@ class EvalDatasetClient:
|
|
70
71
|
},
|
71
72
|
verify=True
|
72
73
|
)
|
73
|
-
if response.status_code
|
74
|
-
error(f"Server error during push: {
|
75
|
-
|
74
|
+
if response.status_code != 200:
|
75
|
+
error(f"Server error during push: {response.json()}")
|
76
|
+
raise Exception(f"Server error during push: {response.json()}")
|
76
77
|
response.raise_for_status()
|
77
78
|
except requests.exceptions.HTTPError as err:
|
78
79
|
if response.status_code == 422:
|
@@ -90,6 +91,64 @@ class EvalDatasetClient:
|
|
90
91
|
)
|
91
92
|
return True
|
92
93
|
|
94
|
+
|
95
|
+
def append(self, alias: str, examples: List[Example], project_name: str) -> bool:
|
96
|
+
debug(f"Appending dataset with alias '{alias}'")
|
97
|
+
"""
|
98
|
+
Appends the dataset to Judgment platform
|
99
|
+
|
100
|
+
Mock request:
|
101
|
+
dataset = {
|
102
|
+
"alias": alias,
|
103
|
+
"examples": [...],
|
104
|
+
"project_name": project_name
|
105
|
+
} ==>
|
106
|
+
{
|
107
|
+
"_alias": alias,
|
108
|
+
"_id": "..." # ID of the dataset
|
109
|
+
}
|
110
|
+
"""
|
111
|
+
with Progress(
|
112
|
+
SpinnerColumn(style="rgb(106,0,255)"),
|
113
|
+
TextColumn("[progress.description]{task.description}"),
|
114
|
+
transient=False,
|
115
|
+
) as progress:
|
116
|
+
task_id = progress.add_task(
|
117
|
+
f"Appending [rgb(106,0,255)]'{alias}' to Judgment...",
|
118
|
+
total=100,
|
119
|
+
)
|
120
|
+
content = {
|
121
|
+
"dataset_alias": alias,
|
122
|
+
"project_name": project_name,
|
123
|
+
"examples": [e.to_dict() for e in examples],
|
124
|
+
}
|
125
|
+
try:
|
126
|
+
response = requests.post(
|
127
|
+
JUDGMENT_DATASETS_APPEND_API_URL,
|
128
|
+
json=content,
|
129
|
+
headers={
|
130
|
+
"Content-Type": "application/json",
|
131
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
132
|
+
"X-Organization-Id": self.organization_id
|
133
|
+
},
|
134
|
+
verify=True
|
135
|
+
)
|
136
|
+
if response.status_code != 200:
|
137
|
+
error(f"Server error during append: {response.json()}")
|
138
|
+
raise Exception(f"Server error during append: {response.json()}")
|
139
|
+
response.raise_for_status()
|
140
|
+
except requests.exceptions.HTTPError as err:
|
141
|
+
if response.status_code == 422:
|
142
|
+
error(f"Validation error during append: {err.response.json()}")
|
143
|
+
else:
|
144
|
+
error(f"HTTP error during append: {err}")
|
145
|
+
|
146
|
+
progress.update(
|
147
|
+
task_id,
|
148
|
+
description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
|
149
|
+
)
|
150
|
+
return True
|
151
|
+
|
93
152
|
def pull(self, alias: str, project_name: str) -> EvalDataset:
|
94
153
|
debug(f"Pulling dataset with alias '{alias}'")
|
95
154
|
"""
|
@@ -142,8 +201,8 @@ class EvalDatasetClient:
|
|
142
201
|
|
143
202
|
info(f"Successfully pulled dataset with alias '{alias}'")
|
144
203
|
payload = response.json()
|
145
|
-
|
146
204
|
dataset.examples = [Example(**e) for e in payload.get("examples", [])]
|
205
|
+
dataset.sequences = [Sequence(**s) for s in payload.get("sequences", [])]
|
147
206
|
dataset._alias = payload.get("alias")
|
148
207
|
dataset._id = payload.get("id")
|
149
208
|
progress.update(
|
judgeval/data/example.py
CHANGED
judgeval/data/result.py
CHANGED
@@ -2,7 +2,8 @@ from dataclasses import dataclass
|
|
2
2
|
from typing import List, Union, Optional, Dict, Any, Union
|
3
3
|
from judgeval.common.logger import debug, error
|
4
4
|
from pydantic import BaseModel
|
5
|
-
from judgeval.data import ScorerData, Example
|
5
|
+
from judgeval.data import ScorerData, Example, CustomExample
|
6
|
+
from judgeval.data.sequence import Sequence
|
6
7
|
|
7
8
|
|
8
9
|
class ScoringResult(BaseModel):
|
@@ -23,7 +24,7 @@ class ScoringResult(BaseModel):
|
|
23
24
|
name: Optional[str] = None
|
24
25
|
|
25
26
|
# The original example object that was used to create the ScoringResult
|
26
|
-
data_object: Optional[Example] = None
|
27
|
+
data_object: Optional[Union[Sequence, CustomExample, Example]] = None
|
27
28
|
trace_id: Optional[str] = None
|
28
29
|
|
29
30
|
# Additional fields for internal use
|
@@ -48,7 +49,7 @@ class ScoringResult(BaseModel):
|
|
48
49
|
|
49
50
|
|
50
51
|
def generate_scoring_result(
|
51
|
-
|
52
|
+
data_object: Union[Example, Sequence],
|
52
53
|
scorers_data: List[ScorerData],
|
53
54
|
run_duration: float,
|
54
55
|
success: bool,
|
@@ -59,15 +60,15 @@ def generate_scoring_result(
|
|
59
60
|
When an LLMTestCase is executed, it turns into an LLMApiTestCase and the progress of the evaluation run is tracked.
|
60
61
|
At the end of the evaluation run, we create a TestResult object out of the completed LLMApiTestCase.
|
61
62
|
"""
|
62
|
-
if
|
63
|
-
name =
|
63
|
+
if data_object.name is not None:
|
64
|
+
name = data_object.name
|
64
65
|
else:
|
65
66
|
name = "Test Case Placeholder"
|
66
67
|
debug(f"No name provided for example, using default name: {name}")
|
67
68
|
debug(f"Creating ScoringResult for: {name}")
|
68
69
|
scoring_result = ScoringResult(
|
69
70
|
name=name,
|
70
|
-
data_object=
|
71
|
+
data_object=data_object,
|
71
72
|
success=success,
|
72
73
|
scorers_data=scorers_data,
|
73
74
|
run_duration=run_duration,
|
@@ -0,0 +1,55 @@
|
|
1
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
2
|
+
from typing import List, Optional, Union, Any
|
3
|
+
from judgeval.data.example import Example
|
4
|
+
from judgeval.scorers import ScorerWrapper, JudgevalScorer
|
5
|
+
from uuid import uuid4
|
6
|
+
from datetime import datetime, timezone
|
7
|
+
|
8
|
+
class Sequence(BaseModel):
|
9
|
+
"""
|
10
|
+
A sequence is a list of either Examples or nested Sequence objects.
|
11
|
+
"""
|
12
|
+
sequence_id: str = Field(default_factory=lambda: str(uuid4()))
|
13
|
+
name: Optional[str] = "Sequence"
|
14
|
+
created_at: str = Field(default_factory=lambda: datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S"))
|
15
|
+
items: List[Union["Sequence", Example]]
|
16
|
+
scorers: Optional[Any] = None
|
17
|
+
parent_sequence_id: Optional[str] = None
|
18
|
+
sequence_order: Optional[int] = 0
|
19
|
+
root_sequence_id: Optional[str] = None
|
20
|
+
inputs: Optional[str] = None
|
21
|
+
output: Optional[str] = None
|
22
|
+
|
23
|
+
@field_validator("scorers")
|
24
|
+
def validate_scorer(cls, v):
|
25
|
+
loaded_scorers = []
|
26
|
+
for scorer in v or []:
|
27
|
+
try:
|
28
|
+
if isinstance(scorer, ScorerWrapper):
|
29
|
+
loaded_scorers.append(scorer.load_implementation())
|
30
|
+
else:
|
31
|
+
loaded_scorers.append(scorer)
|
32
|
+
except Exception as e:
|
33
|
+
raise ValueError(f"Failed to load implementation for scorer {scorer}: {str(e)}")
|
34
|
+
return loaded_scorers
|
35
|
+
|
36
|
+
@model_validator(mode="after")
|
37
|
+
def populate_sequence_metadata(self) -> "Sequence":
|
38
|
+
"""Recursively set parent_sequence_id, root_sequence_id, and sequence_order."""
|
39
|
+
# If root_sequence_id isn't already set, assign it to self
|
40
|
+
if self.root_sequence_id is None:
|
41
|
+
self.root_sequence_id = self.sequence_id
|
42
|
+
|
43
|
+
for idx, item in enumerate(self.items):
|
44
|
+
item.sequence_order = idx
|
45
|
+
if isinstance(item, Sequence):
|
46
|
+
item.parent_sequence_id = self.sequence_id
|
47
|
+
item.root_sequence_id = self.root_sequence_id
|
48
|
+
item.populate_sequence_metadata()
|
49
|
+
return self
|
50
|
+
|
51
|
+
class Config:
|
52
|
+
arbitrary_types_allowed = True
|
53
|
+
|
54
|
+
# Update forward references so that "Sequence" inside items is resolved.
|
55
|
+
Sequence.model_rebuild()
|
@@ -0,0 +1,44 @@
|
|
1
|
+
|
2
|
+
from pydantic import BaseModel
|
3
|
+
from typing import List, Optional, Dict, Any, Union
|
4
|
+
from judgeval.data import Sequence
|
5
|
+
from judgeval.scorers import APIJudgmentScorer, JudgevalScorer
|
6
|
+
from judgeval.judges import JudgevalJudge
|
7
|
+
from judgeval.rules import Rule
|
8
|
+
|
9
|
+
|
10
|
+
class SequenceRun(BaseModel):
|
11
|
+
"""
|
12
|
+
Stores example and evaluation scorers together for running an eval task
|
13
|
+
|
14
|
+
Args:
|
15
|
+
project_name (str): The name of the project the evaluation results belong to
|
16
|
+
eval_name (str): A name for this evaluation run
|
17
|
+
sequences (List[Sequence]): The sequences to evaluate
|
18
|
+
scorers (List[Union[JudgmentScorer, JudgevalScorer]]): A list of scorers to use for evaluation
|
19
|
+
model (str): The model used as a judge when using LLM as a Judge
|
20
|
+
aggregator (Optional[str]): The aggregator to use for evaluation if using Mixture of Judges
|
21
|
+
metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
|
22
|
+
judgment_api_key (Optional[str]): The API key for running evaluations on the Judgment API
|
23
|
+
rules (Optional[List[Rule]]): Rules to evaluate against scoring results
|
24
|
+
append (Optional[bool]): Whether to append to existing evaluation results
|
25
|
+
"""
|
26
|
+
|
27
|
+
# The user will specify whether they want log_results when they call run_eval
|
28
|
+
log_results: bool = False # NOTE: log_results has to be set first because it is used to validate project_name and eval_name
|
29
|
+
organization_id: Optional[str] = None
|
30
|
+
project_name: Optional[str] = None
|
31
|
+
eval_name: Optional[str] = None
|
32
|
+
sequences: List[Sequence]
|
33
|
+
model: Union[str, List[str], JudgevalJudge]
|
34
|
+
aggregator: Optional[str] = None
|
35
|
+
metadata: Optional[Dict[str, Any]] = None
|
36
|
+
trace_span_id: Optional[str] = None
|
37
|
+
append: Optional[bool] = False
|
38
|
+
# API Key will be "" until user calls client.run_eval(), then API Key will be set
|
39
|
+
judgment_api_key: Optional[str] = ""
|
40
|
+
override: Optional[bool] = False
|
41
|
+
rules: Optional[List[Rule]] = None
|
42
|
+
|
43
|
+
class Config:
|
44
|
+
arbitrary_types_allowed = True
|
judgeval/evaluation_run.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from typing import List, Optional, Dict, Any, Union
|
2
2
|
from pydantic import BaseModel, field_validator
|
3
3
|
|
4
|
-
from judgeval.data import Example
|
4
|
+
from judgeval.data import Example, CustomExample
|
5
5
|
from judgeval.scorers import JudgevalScorer, APIJudgmentScorer
|
6
6
|
from judgeval.constants import ACCEPTABLE_MODELS
|
7
7
|
from judgeval.common.logger import debug, error
|
@@ -15,7 +15,7 @@ class EvaluationRun(BaseModel):
|
|
15
15
|
Args:
|
16
16
|
project_name (str): The name of the project the evaluation results belong to
|
17
17
|
eval_name (str): A name for this evaluation run
|
18
|
-
examples (List[Example]): The examples to evaluate
|
18
|
+
examples (Union[List[Example], List[CustomExample]]): The examples to evaluate
|
19
19
|
scorers (List[Union[JudgmentScorer, JudgevalScorer]]): A list of scorers to use for evaluation
|
20
20
|
model (str): The model used as a judge when using LLM as a Judge
|
21
21
|
aggregator (Optional[str]): The aggregator to use for evaluation if using Mixture of Judges
|
@@ -29,7 +29,7 @@ class EvaluationRun(BaseModel):
|
|
29
29
|
organization_id: Optional[str] = None
|
30
30
|
project_name: Optional[str] = None
|
31
31
|
eval_name: Optional[str] = None
|
32
|
-
examples: List[Example]
|
32
|
+
examples: Union[List[Example], List[CustomExample]]
|
33
33
|
scorers: List[Union[APIJudgmentScorer, JudgevalScorer]]
|
34
34
|
model: Union[str, List[str], JudgevalJudge]
|
35
35
|
aggregator: Optional[str] = None
|
@@ -38,6 +38,7 @@ class EvaluationRun(BaseModel):
|
|
38
38
|
# API Key will be "" until user calls client.run_eval(), then API Key will be set
|
39
39
|
judgment_api_key: Optional[str] = ""
|
40
40
|
override: Optional[bool] = False
|
41
|
+
append: Optional[bool] = False
|
41
42
|
rules: Optional[List[Rule]] = None
|
42
43
|
|
43
44
|
def model_dump(self, **kwargs):
|
@@ -78,13 +79,17 @@ class EvaluationRun(BaseModel):
|
|
78
79
|
raise ValueError("Eval name is required when log_results is True. Please include the eval_run_name argument.")
|
79
80
|
return v
|
80
81
|
|
81
|
-
@field_validator('examples')
|
82
|
+
@field_validator('examples', mode='before')
|
82
83
|
def validate_examples(cls, v):
|
83
84
|
if not v:
|
84
85
|
raise ValueError("Examples cannot be empty.")
|
85
|
-
|
86
|
-
|
87
|
-
|
86
|
+
|
87
|
+
first_type = type(v[0])
|
88
|
+
if first_type not in (Example, CustomExample):
|
89
|
+
raise ValueError(f"Invalid type for Example/CustomExample: {first_type}")
|
90
|
+
if not all(isinstance(ex, first_type) for ex in v):
|
91
|
+
raise ValueError("All examples must be of the same type, either all Example or all CustomExample.")
|
92
|
+
|
88
93
|
return v
|
89
94
|
|
90
95
|
@field_validator('scorers')
|