judgeval 0.7.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/__init__.py +139 -12
- judgeval/api/__init__.py +501 -0
- judgeval/api/api_types.py +344 -0
- judgeval/cli.py +2 -4
- judgeval/constants.py +10 -26
- judgeval/data/evaluation_run.py +49 -26
- judgeval/data/example.py +2 -2
- judgeval/data/judgment_types.py +266 -82
- judgeval/data/result.py +4 -5
- judgeval/data/scorer_data.py +4 -2
- judgeval/data/tool.py +2 -2
- judgeval/data/trace.py +7 -50
- judgeval/data/trace_run.py +7 -4
- judgeval/{dataset.py → dataset/__init__.py} +43 -28
- judgeval/env.py +67 -0
- judgeval/{run_evaluation.py → evaluation/__init__.py} +29 -95
- judgeval/exceptions.py +27 -0
- judgeval/integrations/langgraph/__init__.py +788 -0
- judgeval/judges/__init__.py +2 -2
- judgeval/judges/litellm_judge.py +75 -15
- judgeval/judges/together_judge.py +86 -18
- judgeval/judges/utils.py +7 -21
- judgeval/{common/logger.py → logger.py} +8 -6
- judgeval/scorers/__init__.py +0 -4
- judgeval/scorers/agent_scorer.py +3 -7
- judgeval/scorers/api_scorer.py +8 -13
- judgeval/scorers/base_scorer.py +52 -32
- judgeval/scorers/example_scorer.py +1 -3
- judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -14
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +45 -20
- judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +2 -2
- judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +3 -3
- judgeval/scorers/score.py +21 -31
- judgeval/scorers/trace_api_scorer.py +5 -0
- judgeval/scorers/utils.py +1 -103
- judgeval/tracer/__init__.py +1075 -2
- judgeval/tracer/constants.py +1 -0
- judgeval/tracer/exporters/__init__.py +37 -0
- judgeval/tracer/exporters/s3.py +119 -0
- judgeval/tracer/exporters/store.py +43 -0
- judgeval/tracer/exporters/utils.py +32 -0
- judgeval/tracer/keys.py +67 -0
- judgeval/tracer/llm/__init__.py +1233 -0
- judgeval/{common/tracer → tracer/llm}/providers.py +5 -10
- judgeval/{local_eval_queue.py → tracer/local_eval_queue.py} +15 -10
- judgeval/tracer/managers.py +188 -0
- judgeval/tracer/processors/__init__.py +181 -0
- judgeval/tracer/utils.py +20 -0
- judgeval/trainer/__init__.py +5 -0
- judgeval/{common/trainer → trainer}/config.py +12 -9
- judgeval/{common/trainer → trainer}/console.py +2 -9
- judgeval/{common/trainer → trainer}/trainable_model.py +12 -7
- judgeval/{common/trainer → trainer}/trainer.py +119 -17
- judgeval/utils/async_utils.py +2 -3
- judgeval/utils/decorators.py +24 -0
- judgeval/utils/file_utils.py +37 -4
- judgeval/utils/guards.py +32 -0
- judgeval/utils/meta.py +14 -0
- judgeval/{common/api/json_encoder.py → utils/serialize.py} +7 -1
- judgeval/utils/testing.py +88 -0
- judgeval/utils/url.py +10 -0
- judgeval/{version_check.py → utils/version_check.py} +3 -3
- judgeval/version.py +5 -0
- judgeval/warnings.py +4 -0
- {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/METADATA +12 -14
- judgeval-0.9.0.dist-info/RECORD +80 -0
- judgeval/clients.py +0 -35
- judgeval/common/__init__.py +0 -13
- judgeval/common/api/__init__.py +0 -3
- judgeval/common/api/api.py +0 -375
- judgeval/common/api/constants.py +0 -186
- judgeval/common/exceptions.py +0 -27
- judgeval/common/storage/__init__.py +0 -6
- judgeval/common/storage/s3_storage.py +0 -97
- judgeval/common/tracer/__init__.py +0 -31
- judgeval/common/tracer/constants.py +0 -22
- judgeval/common/tracer/core.py +0 -2427
- judgeval/common/tracer/otel_exporter.py +0 -108
- judgeval/common/tracer/otel_span_processor.py +0 -188
- judgeval/common/tracer/span_processor.py +0 -37
- judgeval/common/tracer/span_transformer.py +0 -207
- judgeval/common/tracer/trace_manager.py +0 -101
- judgeval/common/trainer/__init__.py +0 -5
- judgeval/common/utils.py +0 -948
- judgeval/integrations/langgraph.py +0 -844
- judgeval/judges/mixture_of_judges.py +0 -287
- judgeval/judgment_client.py +0 -267
- judgeval/rules.py +0 -521
- judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
- judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
- judgeval/utils/alerts.py +0 -93
- judgeval/utils/requests.py +0 -50
- judgeval-0.7.1.dist-info/RECORD +0 -82
- {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/WHEEL +0 -0
- {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/entry_points.txt +0 -0
- {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,11 +1,11 @@
|
|
1
1
|
from judgeval.scorers.api_scorer import APIScorerConfig
|
2
2
|
from judgeval.constants import APIScorerType
|
3
3
|
from typing import Dict, Any, Optional
|
4
|
-
from judgeval.
|
4
|
+
from judgeval.api import JudgmentSyncClient
|
5
|
+
from judgeval.exceptions import JudgmentAPIError
|
5
6
|
import os
|
6
|
-
from judgeval.common.exceptions import JudgmentAPIError
|
7
7
|
from copy import copy
|
8
|
-
from judgeval.
|
8
|
+
from judgeval.logger import judgeval_logger
|
9
9
|
|
10
10
|
|
11
11
|
def push_prompt_scorer(
|
@@ -16,15 +16,28 @@ def push_prompt_scorer(
|
|
16
16
|
judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
|
17
17
|
organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
|
18
18
|
) -> str:
|
19
|
-
client =
|
19
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
20
20
|
try:
|
21
|
-
r = client.save_scorer(
|
22
|
-
|
21
|
+
r = client.save_scorer(
|
22
|
+
payload={
|
23
|
+
"name": name,
|
24
|
+
"prompt": prompt,
|
25
|
+
"threshold": threshold,
|
26
|
+
"options": options,
|
27
|
+
}
|
28
|
+
)
|
29
|
+
except JudgmentAPIError as e:
|
23
30
|
if e.status_code == 500:
|
24
31
|
raise JudgmentAPIError(
|
25
|
-
|
32
|
+
status_code=e.status_code,
|
33
|
+
detail=f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.detail}",
|
34
|
+
response=e.response,
|
26
35
|
)
|
27
|
-
raise JudgmentAPIError(
|
36
|
+
raise JudgmentAPIError(
|
37
|
+
status_code=e.status_code,
|
38
|
+
detail=f"Failed to save prompt scorer: {e.detail}",
|
39
|
+
response=e.response,
|
40
|
+
)
|
28
41
|
return r["name"]
|
29
42
|
|
30
43
|
|
@@ -33,19 +46,23 @@ def fetch_prompt_scorer(
|
|
33
46
|
judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
|
34
47
|
organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
|
35
48
|
):
|
36
|
-
client =
|
49
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
37
50
|
try:
|
38
|
-
scorer_config = client.fetch_scorer(name)["scorer"]
|
51
|
+
scorer_config = client.fetch_scorer({"name": name})["scorer"]
|
39
52
|
scorer_config.pop("created_at")
|
40
53
|
scorer_config.pop("updated_at")
|
41
54
|
return scorer_config
|
42
|
-
except
|
55
|
+
except JudgmentAPIError as e:
|
43
56
|
if e.status_code == 500:
|
44
57
|
raise JudgmentAPIError(
|
45
|
-
|
58
|
+
status_code=e.status_code,
|
59
|
+
detail=f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.detail}",
|
60
|
+
response=e.response,
|
46
61
|
)
|
47
62
|
raise JudgmentAPIError(
|
48
|
-
|
63
|
+
status_code=e.status_code,
|
64
|
+
detail=f"Failed to fetch prompt scorer '{name}': {e.detail}",
|
65
|
+
response=e.response,
|
49
66
|
)
|
50
67
|
|
51
68
|
|
@@ -54,15 +71,21 @@ def scorer_exists(
|
|
54
71
|
judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
|
55
72
|
organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
|
56
73
|
):
|
57
|
-
client =
|
74
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
58
75
|
try:
|
59
|
-
return client.scorer_exists(name)["exists"]
|
60
|
-
except
|
76
|
+
return client.scorer_exists({"name": name})["exists"]
|
77
|
+
except JudgmentAPIError as e:
|
61
78
|
if e.status_code == 500:
|
62
79
|
raise JudgmentAPIError(
|
63
|
-
|
80
|
+
status_code=e.status_code,
|
81
|
+
detail=f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.detail}",
|
82
|
+
response=e.response,
|
64
83
|
)
|
65
|
-
raise JudgmentAPIError(
|
84
|
+
raise JudgmentAPIError(
|
85
|
+
status_code=e.status_code,
|
86
|
+
detail=f"Failed to check if scorer exists: {e.detail}",
|
87
|
+
response=e.response,
|
88
|
+
)
|
66
89
|
|
67
90
|
|
68
91
|
class PromptScorer(APIScorerConfig):
|
@@ -102,7 +125,7 @@ class PromptScorer(APIScorerConfig):
|
|
102
125
|
cls,
|
103
126
|
name: str,
|
104
127
|
prompt: str,
|
105
|
-
threshold:
|
128
|
+
threshold: float = 0.5,
|
106
129
|
options: Optional[Dict[str, float]] = None,
|
107
130
|
judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
|
108
131
|
organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
|
@@ -122,7 +145,9 @@ class PromptScorer(APIScorerConfig):
|
|
122
145
|
)
|
123
146
|
else:
|
124
147
|
raise JudgmentAPIError(
|
125
|
-
|
148
|
+
status_code=400,
|
149
|
+
detail=f"Scorer with name {name} already exists. Either use the existing scorer with the get() method or use a new name.",
|
150
|
+
response=None, # type: ignore
|
126
151
|
)
|
127
152
|
|
128
153
|
# Setter functions. Each setter function pushes the scorer to the DB.
|
@@ -3,12 +3,12 @@
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
# Internal imports
|
6
|
-
from judgeval.scorers.
|
6
|
+
from judgeval.scorers.trace_api_scorer import TraceAPIScorerConfig
|
7
7
|
from judgeval.constants import APIScorerType
|
8
8
|
from typing import Optional, Dict
|
9
9
|
|
10
10
|
|
11
|
-
class ToolDependencyScorer(
|
11
|
+
class ToolDependencyScorer(TraceAPIScorerConfig):
|
12
12
|
kwargs: Optional[Dict] = None
|
13
13
|
|
14
14
|
def __init__(self, threshold: float = 1.0, enable_param_checking: bool = True):
|
@@ -3,19 +3,19 @@
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
# Internal imports
|
6
|
-
from judgeval.scorers.
|
6
|
+
from judgeval.scorers.trace_api_scorer import TraceAPIScorerConfig
|
7
7
|
from judgeval.constants import APIScorerType
|
8
8
|
from typing import Dict, Any
|
9
9
|
|
10
10
|
|
11
|
-
class ToolOrderScorer(
|
11
|
+
class ToolOrderScorer(TraceAPIScorerConfig):
|
12
12
|
score_type: APIScorerType = APIScorerType.TOOL_ORDER
|
13
13
|
threshold: float = 1.0
|
14
14
|
exact_match: bool = False
|
15
15
|
|
16
16
|
def model_dump(self, *args, **kwargs) -> Dict[str, Any]:
|
17
17
|
base = super().model_dump(*args, **kwargs)
|
18
|
-
base_fields = set(
|
18
|
+
base_fields = set(TraceAPIScorerConfig.model_fields.keys())
|
19
19
|
all_fields = set(self.__class__.model_fields.keys())
|
20
20
|
|
21
21
|
extra_fields = all_fields - base_fields - {"kwargs"}
|
judgeval/scorers/score.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
"""
|
2
|
-
Infrastructure for executing evaluations of `Example`s using one or more `
|
2
|
+
Infrastructure for executing evaluations of `Example`s using one or more `ExampleScorer`s.
|
3
3
|
"""
|
4
4
|
|
5
5
|
import asyncio
|
@@ -13,23 +13,23 @@ from judgeval.data import (
|
|
13
13
|
generate_scoring_result,
|
14
14
|
create_scorer_data,
|
15
15
|
)
|
16
|
-
from judgeval.scorers import
|
16
|
+
from judgeval.scorers.example_scorer import ExampleScorer
|
17
17
|
from judgeval.scorers.utils import clone_scorers
|
18
|
-
from judgeval.
|
18
|
+
from judgeval.logger import judgeval_logger
|
19
19
|
from judgeval.judges import JudgevalJudge
|
20
|
-
from judgeval.
|
20
|
+
from judgeval.env import JUDGMENT_DEFAULT_GPT_MODEL
|
21
21
|
|
22
22
|
|
23
23
|
async def safe_a_score_example(
|
24
|
-
scorer:
|
24
|
+
scorer: ExampleScorer,
|
25
25
|
example: Example,
|
26
26
|
):
|
27
27
|
"""
|
28
28
|
Scoring task function when not using a progress indicator!
|
29
|
-
"Safely" scores an `Example` using a `
|
29
|
+
"Safely" scores an `Example` using a `ExampleScorer` by gracefully handling any exceptions that may occur.
|
30
30
|
|
31
31
|
Args:
|
32
|
-
scorer (
|
32
|
+
scorer (ExampleScorer): The `ExampleScorer` to use for scoring the example.
|
33
33
|
example (Example): The `Example` to be scored.
|
34
34
|
"""
|
35
35
|
try:
|
@@ -55,20 +55,20 @@ async def safe_a_score_example(
|
|
55
55
|
|
56
56
|
async def a_execute_scoring(
|
57
57
|
examples: List[Example],
|
58
|
-
scorers: List[
|
59
|
-
model: Optional[Union[str, List[str], JudgevalJudge]] =
|
58
|
+
scorers: List[ExampleScorer],
|
59
|
+
model: Optional[Union[str, List[str], JudgevalJudge]] = JUDGMENT_DEFAULT_GPT_MODEL,
|
60
60
|
ignore_errors: bool = False,
|
61
61
|
throttle_value: int = 0,
|
62
62
|
max_concurrent: int = 100,
|
63
63
|
show_progress: bool = True,
|
64
64
|
) -> List[ScoringResult]:
|
65
65
|
"""
|
66
|
-
Executes evaluations of `Example`s asynchronously using one or more `
|
67
|
-
Each `Example` will be evaluated by all of the `
|
66
|
+
Executes evaluations of `Example`s asynchronously using one or more `ExampleScorer`s.
|
67
|
+
Each `Example` will be evaluated by all of the `ExampleScorer`s in the `scorers` list.
|
68
68
|
|
69
69
|
Args:
|
70
70
|
examples (List[Example]): A list of `Example` objects to be evaluated.
|
71
|
-
scorers (List[
|
71
|
+
scorers (List[ExampleScorer]): A list of `ExampleScorer` objects to evaluate the examples.
|
72
72
|
model (Union[str, List[str], JudgevalJudge]): The model to use for evaluation.
|
73
73
|
ignore_errors (bool): Whether to ignore errors during evaluation.
|
74
74
|
throttle_value (int): The amount of time to wait between starting each task.
|
@@ -88,19 +88,15 @@ async def a_execute_scoring(
|
|
88
88
|
except Exception as e:
|
89
89
|
judgeval_logger.error(f"Error executing function: {e}")
|
90
90
|
if kwargs.get("ignore_errors", False):
|
91
|
-
# Simply return None when ignoring errors, as expected by the test
|
92
91
|
return None
|
93
|
-
# If we're not ignoring errors, propagate the exception
|
94
92
|
raise
|
95
93
|
|
96
|
-
# Add model to scorers
|
97
94
|
for scorer in scorers:
|
98
|
-
if not scorer.model:
|
95
|
+
if not scorer.model and isinstance(model, str):
|
99
96
|
scorer._add_model(model)
|
100
97
|
|
101
|
-
scoring_results: List[ScoringResult] = [None for _ in examples]
|
98
|
+
scoring_results: List[Optional[ScoringResult]] = [None for _ in examples]
|
102
99
|
tasks = []
|
103
|
-
cloned_scorers: List[BaseScorer]
|
104
100
|
|
105
101
|
if show_progress:
|
106
102
|
with tqdm_asyncio(
|
@@ -115,7 +111,7 @@ async def a_execute_scoring(
|
|
115
111
|
pbar.update(1)
|
116
112
|
continue
|
117
113
|
|
118
|
-
cloned_scorers = clone_scorers(scorers)
|
114
|
+
cloned_scorers = clone_scorers(scorers) # type: ignore
|
119
115
|
task = execute_with_semaphore(
|
120
116
|
func=a_eval_examples_helper,
|
121
117
|
scorers=cloned_scorers,
|
@@ -135,7 +131,7 @@ async def a_execute_scoring(
|
|
135
131
|
if len(scorers) == 0:
|
136
132
|
continue
|
137
133
|
|
138
|
-
cloned_scorers = clone_scorers(scorers)
|
134
|
+
cloned_scorers = clone_scorers(scorers) # type: ignore
|
139
135
|
task = execute_with_semaphore(
|
140
136
|
func=a_eval_examples_helper,
|
141
137
|
scorers=cloned_scorers,
|
@@ -149,13 +145,13 @@ async def a_execute_scoring(
|
|
149
145
|
|
150
146
|
await asyncio.sleep(throttle_value)
|
151
147
|
await asyncio.gather(*tasks)
|
152
|
-
return scoring_results
|
148
|
+
return [result for result in scoring_results if result is not None]
|
153
149
|
|
154
150
|
|
155
151
|
async def a_eval_examples_helper(
|
156
|
-
scorers: List[
|
152
|
+
scorers: List[ExampleScorer],
|
157
153
|
example: Example,
|
158
|
-
scoring_results: List[ScoringResult],
|
154
|
+
scoring_results: List[Optional[ScoringResult]],
|
159
155
|
score_index: int,
|
160
156
|
ignore_errors: bool,
|
161
157
|
pbar: Optional[tqdm_asyncio] = None,
|
@@ -164,7 +160,7 @@ async def a_eval_examples_helper(
|
|
164
160
|
Evaluate a single example asynchronously using a list of scorers.
|
165
161
|
|
166
162
|
Args:
|
167
|
-
scorers (List[
|
163
|
+
scorers (List[ExampleScorer]): List of ExampleScorer objects to evaluate the example.
|
168
164
|
example (Example): The example to be evaluated.
|
169
165
|
scoring_results (List[ScoringResult]): List to store the scoring results.
|
170
166
|
score_index (int): Index at which the result should be stored in scoring_results.
|
@@ -174,24 +170,18 @@ async def a_eval_examples_helper(
|
|
174
170
|
None
|
175
171
|
"""
|
176
172
|
|
177
|
-
# scoring the Example
|
178
173
|
scoring_start_time = time.perf_counter()
|
179
174
|
|
180
175
|
tasks = [safe_a_score_example(scorer, example) for scorer in scorers]
|
181
176
|
|
182
177
|
await asyncio.gather(*tasks)
|
183
178
|
|
184
|
-
# Now that all the scoring functions of each scorer have executed, we collect
|
185
|
-
# the results and update the ScoringResult with the scorer data
|
186
179
|
success = True
|
187
180
|
scorer_data_list = []
|
188
181
|
for scorer in scorers:
|
189
|
-
# At this point, the scorer has been executed and already contains data.
|
190
182
|
if getattr(scorer, "skipped", False):
|
191
183
|
continue
|
192
|
-
scorer_data = create_scorer_data(
|
193
|
-
scorer
|
194
|
-
) # Fetch scorer data from completed scorer evaluation
|
184
|
+
scorer_data = create_scorer_data(scorer)
|
195
185
|
for s in scorer_data:
|
196
186
|
success = success and s.success
|
197
187
|
scorer_data_list.extend(scorer_data)
|
judgeval/scorers/utils.py
CHANGED
@@ -2,15 +2,9 @@
|
|
2
2
|
Util functions for Scorer objects
|
3
3
|
"""
|
4
4
|
|
5
|
-
import
|
6
|
-
import nest_asyncio
|
7
|
-
import orjson
|
8
|
-
import re
|
9
|
-
from typing import List, Optional
|
5
|
+
from typing import List
|
10
6
|
|
11
7
|
from judgeval.scorers import BaseScorer
|
12
|
-
from judgeval.data import Example, ExampleParams
|
13
|
-
from judgeval.scorers.exceptions import MissingExampleParamsError
|
14
8
|
|
15
9
|
|
16
10
|
def clone_scorers(scorers: List[BaseScorer]) -> List[BaseScorer]:
|
@@ -21,99 +15,3 @@ def clone_scorers(scorers: List[BaseScorer]) -> List[BaseScorer]:
|
|
21
15
|
for s in scorers:
|
22
16
|
cloned_scorers.append(s.model_copy(deep=True))
|
23
17
|
return cloned_scorers
|
24
|
-
|
25
|
-
|
26
|
-
def parse_response_json(llm_response: str, scorer: Optional[BaseScorer] = None) -> dict:
|
27
|
-
"""
|
28
|
-
Extracts JSON output from an LLM response and returns it as a dictionary.
|
29
|
-
|
30
|
-
If the JSON is invalid, the error is forwarded to the `scorer`, if provided.
|
31
|
-
|
32
|
-
Args:
|
33
|
-
llm_response (str): The response from an LLM.
|
34
|
-
scorer (BaseScorer, optional): The scorer object to forward errors to (if any).
|
35
|
-
"""
|
36
|
-
start = llm_response.find("{") # opening bracket
|
37
|
-
end = llm_response.rfind("}") + 1 # closing bracket
|
38
|
-
|
39
|
-
if end == 0 and start != -1: # add the closing bracket if it's missing
|
40
|
-
llm_response = llm_response + "}"
|
41
|
-
end = len(llm_response)
|
42
|
-
|
43
|
-
json_str = (
|
44
|
-
llm_response[start:end] if start != -1 and end != 0 else ""
|
45
|
-
) # extract the JSON string
|
46
|
-
json_str = re.sub(
|
47
|
-
r",\s*([\]}])", r"\1", json_str
|
48
|
-
) # Remove trailing comma if present
|
49
|
-
|
50
|
-
try:
|
51
|
-
return orjson.loads(json_str)
|
52
|
-
except orjson.JSONDecodeError:
|
53
|
-
error_str = "Evaluation LLM outputted an invalid JSON. Please use a stronger evaluation model."
|
54
|
-
if scorer is not None:
|
55
|
-
scorer.error = error_str
|
56
|
-
raise ValueError(error_str)
|
57
|
-
except Exception as e:
|
58
|
-
raise Exception(f"An unexpected error occurred: {str(e)}")
|
59
|
-
|
60
|
-
|
61
|
-
def get_or_create_event_loop() -> asyncio.AbstractEventLoop:
|
62
|
-
"""
|
63
|
-
Get or create an asyncio event loop.
|
64
|
-
|
65
|
-
This function attempts to retrieve the current event loop using `asyncio.get_event_loop()`.
|
66
|
-
If the event loop is already running, it applies the `nest_asyncio` patch to allow nested
|
67
|
-
asynchronous execution. If the event loop is closed or not found, it creates a new event loop
|
68
|
-
and sets it as the current event loop.
|
69
|
-
|
70
|
-
Returns:
|
71
|
-
asyncio.AbstractEventLoop: The current or newly created event loop.
|
72
|
-
|
73
|
-
Raises:
|
74
|
-
RuntimeError: If the event loop is closed.
|
75
|
-
"""
|
76
|
-
try:
|
77
|
-
loop = asyncio.get_event_loop()
|
78
|
-
if loop.is_running():
|
79
|
-
print(
|
80
|
-
"Event loop is already running. Applying nest_asyncio patch to allow async execution..."
|
81
|
-
)
|
82
|
-
nest_asyncio.apply()
|
83
|
-
|
84
|
-
if loop.is_closed():
|
85
|
-
raise RuntimeError
|
86
|
-
except RuntimeError:
|
87
|
-
loop = asyncio.new_event_loop()
|
88
|
-
asyncio.set_event_loop(loop)
|
89
|
-
return loop
|
90
|
-
|
91
|
-
|
92
|
-
def check_example_params(
|
93
|
-
example: Example,
|
94
|
-
example_params: List[ExampleParams],
|
95
|
-
scorer: BaseScorer,
|
96
|
-
):
|
97
|
-
if isinstance(example, Example) is False:
|
98
|
-
error_str = f"in check_example_params(): Expected example to be of type 'Example', but got {type(example)}"
|
99
|
-
scorer.error = error_str
|
100
|
-
raise MissingExampleParamsError(error_str)
|
101
|
-
|
102
|
-
missing_params = []
|
103
|
-
for param in example_params:
|
104
|
-
if getattr(example, param.value) is None:
|
105
|
-
missing_params.append(f"'{param.value}'")
|
106
|
-
|
107
|
-
if missing_params:
|
108
|
-
if len(missing_params) == 1:
|
109
|
-
missing_params_str = missing_params[0]
|
110
|
-
elif len(missing_params) == 2:
|
111
|
-
missing_params_str = " and ".join(missing_params)
|
112
|
-
else:
|
113
|
-
missing_params_str = (
|
114
|
-
", ".join(missing_params[:-1]) + ", and " + missing_params[-1]
|
115
|
-
)
|
116
|
-
|
117
|
-
error_str = f"{missing_params_str} fields in example cannot be None for the '{scorer.__name__}' scorer"
|
118
|
-
scorer.error = error_str
|
119
|
-
raise MissingExampleParamsError(error_str)
|