judgeval 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,7 @@ from pydantic import BaseModel
11
11
 
12
12
  from judgeval.common.api.json_encoder import json_encoder
13
13
  from judgeval.data import TraceSpan
14
- from judgeval.evaluation_run import EvaluationRun
14
+ from judgeval.data.evaluation_run import EvaluationRun
15
15
 
16
16
 
17
17
  class SpanTransformer:
@@ -150,6 +150,7 @@ class SpanTransformer:
150
150
  "additional_metadata": judgment_data.get("additional_metadata"),
151
151
  "has_evaluation": judgment_data.get("has_evaluation", False),
152
152
  "agent_name": judgment_data.get("agent_name"),
153
+ "class_name": judgment_data.get("class_name"),
153
154
  "state_before": judgment_data.get("state_before"),
154
155
  "state_after": judgment_data.get("state_after"),
155
156
  "update_id": judgment_data.get("update_id", 1),
@@ -0,0 +1,104 @@
1
+ from typing import List, Optional, Union
2
+ from pydantic import field_validator, model_validator, Field
3
+ from datetime import datetime, timezone
4
+ import uuid
5
+
6
+ from judgeval.data import Example
7
+ from judgeval.scorers import BaseScorer, APIScorerConfig
8
+ from judgeval.constants import ACCEPTABLE_MODELS
9
+ from judgeval.data.judgment_types import EvaluationRunJudgmentType
10
+
11
+
12
+ class EvaluationRun(EvaluationRunJudgmentType):
13
+ """
14
+ Stores example and evaluation scorers together for running an eval task
15
+
16
+ Args:
17
+ project_name (str): The name of the project the evaluation results belong to
18
+ eval_name (str): A name for this evaluation run
19
+ examples (List[Example]): The examples to evaluate
20
+ scorers (List[Union[BaseScorer, APIScorerConfig]]): A list of scorers to use for evaluation
21
+ model (str): The model used as a judge when using LLM as a Judge
22
+ metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
23
+ """
24
+
25
+ id: Optional[str] = Field(default_factory=lambda: str(uuid.uuid4()))
26
+ created_at: Optional[str] = Field(
27
+ default_factory=lambda: datetime.now(timezone.utc).isoformat()
28
+ )
29
+ custom_scorers: Optional[List[BaseScorer]] = None
30
+ judgment_scorers: Optional[List[APIScorerConfig]] = None
31
+ organization_id: Optional[str] = None
32
+
33
+ def __init__(
34
+ self,
35
+ scorers: Optional[List[Union[BaseScorer, APIScorerConfig]]] = None,
36
+ **kwargs,
37
+ ):
38
+ """
39
+ Initialize EvaluationRun with automatic scorer classification.
40
+
41
+ Args:
42
+ scorers: List of scorers that will be automatically sorted into custom_scorers or judgment_scorers
43
+ **kwargs: Other initialization arguments
44
+ """
45
+ if scorers is not None:
46
+ # Automatically sort scorers into appropriate fields
47
+ custom_scorers = [s for s in scorers if isinstance(s, BaseScorer)]
48
+ judgment_scorers = [s for s in scorers if isinstance(s, APIScorerConfig)]
49
+
50
+ # Always set both fields as lists (even if empty) to satisfy validation
51
+ kwargs["custom_scorers"] = custom_scorers
52
+ kwargs["judgment_scorers"] = judgment_scorers
53
+
54
+ super().__init__(**kwargs)
55
+
56
+ def model_dump(self, **kwargs):
57
+ data = super().model_dump(**kwargs)
58
+ data["custom_scorers"] = [s.model_dump() for s in self.custom_scorers]
59
+ data["judgment_scorers"] = [s.model_dump() for s in self.judgment_scorers]
60
+ data["examples"] = [example.model_dump() for example in self.examples]
61
+
62
+ return data
63
+
64
+ @field_validator("examples")
65
+ def validate_examples(cls, v):
66
+ if not v:
67
+ raise ValueError("Examples cannot be empty.")
68
+ for item in v:
69
+ if not isinstance(item, Example):
70
+ raise ValueError(f"Item of type {type(item)} is not a Example")
71
+ return v
72
+
73
+ @model_validator(mode="after")
74
+ @classmethod
75
+ def validate_scorer_lists(cls, values):
76
+ custom_scorers = values.custom_scorers
77
+ judgment_scorers = values.judgment_scorers
78
+
79
+ # Check that both lists are not empty
80
+ if not custom_scorers and not judgment_scorers:
81
+ raise ValueError(
82
+ "At least one of custom_scorers or judgment_scorers must be provided."
83
+ )
84
+
85
+ # Check that only one list is filled
86
+ if custom_scorers and judgment_scorers:
87
+ raise ValueError(
88
+ "Only one of custom_scorers or judgment_scorers can be provided, not both."
89
+ )
90
+
91
+ return values
92
+
93
+ @field_validator("model")
94
+ def validate_model(cls, v, values):
95
+ if not v:
96
+ raise ValueError("Model cannot be empty.")
97
+
98
+ # Check if model is string or list of strings
99
+ if isinstance(v, str):
100
+ if v not in ACCEPTABLE_MODELS:
101
+ raise ValueError(
102
+ f"Model name {v} not recognized. Please select a valid model name.)"
103
+ )
104
+ return v
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: openapi_new.json
3
- # timestamp: 2025-08-01T22:19:19+00:00
3
+ # timestamp: 2025-08-08T18:50:51+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
 
@@ -51,6 +51,31 @@ class ScorerConfigJudgmentType(BaseModel):
51
51
  kwargs: Annotated[Optional[Dict[str, Any]], Field(title="Kwargs")] = None
52
52
 
53
53
 
54
+ class BaseScorerJudgmentType(BaseModel):
55
+ score_type: Annotated[str, Field(title="Score Type")]
56
+ threshold: Annotated[Optional[float], Field(title="Threshold")] = 0.5
57
+ name: Annotated[Optional[str], Field(title="Name")] = None
58
+ class_name: Annotated[Optional[str], Field(title="Class Name")] = None
59
+ score: Annotated[Optional[float], Field(title="Score")] = None
60
+ score_breakdown: Annotated[
61
+ Optional[Dict[str, Any]], Field(title="Score Breakdown")
62
+ ] = None
63
+ reason: Annotated[Optional[str], Field(title="Reason")] = ""
64
+ using_native_model: Annotated[Optional[bool], Field(title="Using Native Model")] = (
65
+ None
66
+ )
67
+ success: Annotated[Optional[bool], Field(title="Success")] = None
68
+ model: Annotated[Optional[str], Field(title="Model")] = None
69
+ model_client: Annotated[Any, Field(title="Model Client")] = None
70
+ strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = False
71
+ error: Annotated[Optional[str], Field(title="Error")] = None
72
+ additional_metadata: Annotated[
73
+ Optional[Dict[str, Any]], Field(title="Additional Metadata")
74
+ ] = None
75
+ user: Annotated[Optional[str], Field(title="User")] = None
76
+ server_hosted: Annotated[Optional[bool], Field(title="Server Hosted")] = False
77
+
78
+
54
79
  class TraceUsageJudgmentType(BaseModel):
55
80
  prompt_tokens: Annotated[Optional[int], Field(title="Prompt Tokens")] = None
56
81
  completion_tokens: Annotated[Optional[int], Field(title="Completion Tokens")] = None
@@ -90,16 +115,21 @@ class HTTPValidationErrorJudgmentType(BaseModel):
90
115
  ] = None
91
116
 
92
117
 
93
- class JudgmentEvalJudgmentType(BaseModel):
118
+ class EvaluationRunJudgmentType(BaseModel):
119
+ id: Annotated[Optional[str], Field(title="Id")] = None
94
120
  project_name: Annotated[Optional[str], Field(title="Project Name")] = None
95
121
  eval_name: Annotated[Optional[str], Field(title="Eval Name")] = None
96
122
  examples: Annotated[List[ExampleJudgmentType], Field(title="Examples")]
97
- scorers: Annotated[List[ScorerConfigJudgmentType], Field(title="Scorers")]
123
+ custom_scorers: Annotated[
124
+ Optional[List[BaseScorerJudgmentType]], Field(title="Custom Scorers")
125
+ ] = Field(default_factory=list)
126
+ judgment_scorers: Annotated[
127
+ Optional[List[ScorerConfigJudgmentType]], Field(title="Judgment Scorers")
128
+ ] = Field(default_factory=list)
98
129
  model: Annotated[str, Field(title="Model")]
99
- append: Annotated[Optional[bool], Field(title="Append")] = False
100
- override: Annotated[Optional[bool], Field(title="Override")] = False
101
130
  trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
102
131
  trace_id: Annotated[Optional[str], Field(title="Trace Id")] = None
132
+ created_at: Annotated[Optional[str], Field(title="Created At")] = None
103
133
 
104
134
 
105
135
  class TraceSpanJudgmentType(BaseModel):
@@ -123,6 +153,7 @@ class TraceSpanJudgmentType(BaseModel):
123
153
  ] = None
124
154
  has_evaluation: Annotated[Optional[bool], Field(title="Has Evaluation")] = False
125
155
  agent_name: Annotated[Optional[str], Field(title="Agent Name")] = None
156
+ class_name: Annotated[Optional[str], Field(title="Class Name")] = None
126
157
  state_before: Annotated[Optional[Dict[str, Any]], Field(title="State Before")] = (
127
158
  None
128
159
  )
@@ -172,8 +203,6 @@ class TraceRunJudgmentType(BaseModel):
172
203
  traces: Annotated[List[TraceJudgmentType], Field(title="Traces")]
173
204
  scorers: Annotated[List[ScorerConfigJudgmentType], Field(title="Scorers")]
174
205
  model: Annotated[str, Field(title="Model")]
175
- append: Annotated[Optional[bool], Field(title="Append")] = False
176
- override: Annotated[Optional[bool], Field(title="Override")] = False
177
206
  trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
178
207
  tools: Annotated[Optional[List[Dict[str, Any]]], Field(title="Tools")] = None
179
208
 
@@ -181,5 +210,5 @@ class TraceRunJudgmentType(BaseModel):
181
210
  class EvalResultsJudgmentType(BaseModel):
182
211
  results: Annotated[List[ScoringResultJudgmentType], Field(title="Results")]
183
212
  run: Annotated[
184
- Union[TraceRunJudgmentType, JudgmentEvalJudgmentType], Field(title="Run")
213
+ Union[TraceRunJudgmentType, EvaluationRunJudgmentType], Field(title="Run")
185
214
  ]
judgeval/data/trace.py CHANGED
@@ -32,6 +32,7 @@ class TraceSpan(TraceSpanJudgmentType):
32
32
  "usage": self.usage.model_dump() if self.usage else None,
33
33
  "has_evaluation": self.has_evaluation,
34
34
  "agent_name": self.agent_name,
35
+ "class_name": self.class_name,
35
36
  "state_before": self.state_before,
36
37
  "state_after": self.state_after,
37
38
  "additional_metadata": json_encoder(self.additional_metadata),
@@ -29,8 +29,6 @@ class TraceRun(BaseModel):
29
29
  scorers: List[Union[APIScorerConfig, BaseScorer]]
30
30
  model: Optional[str] = DEFAULT_GPT_MODEL
31
31
  trace_span_id: Optional[str] = None
32
- append: Optional[bool] = False
33
- override: Optional[bool] = False
34
32
  rules: Optional[List[Rule]] = None
35
33
  tools: Optional[List[Dict[str, Any]]] = None
36
34
 
@@ -133,7 +133,8 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
133
133
  inputs: Optional[Dict[str, Any]] = None,
134
134
  ) -> None:
135
135
  """Start tracking a span, ensuring trace client exists"""
136
-
136
+ if name.startswith("__") and name.endswith("__"):
137
+ return
137
138
  start_time = time.time()
138
139
  span_id = str(uuid.uuid4())
139
140
  parent_span_id: Optional[str] = None
@@ -4,6 +4,8 @@ Implements the JudgmentClient to interact with the Judgment API.
4
4
 
5
5
  from __future__ import annotations
6
6
  import os
7
+ import importlib.util
8
+ from pathlib import Path
7
9
  from uuid import uuid4
8
10
  from typing import Optional, List, Dict, Any, Union, Callable, TYPE_CHECKING
9
11
 
@@ -16,7 +18,7 @@ from judgeval.scorers import (
16
18
  APIScorerConfig,
17
19
  BaseScorer,
18
20
  )
19
- from judgeval.evaluation_run import EvaluationRun
21
+ from judgeval.data.evaluation_run import EvaluationRun
20
22
  from judgeval.run_evaluation import (
21
23
  run_eval,
22
24
  assert_test,
@@ -95,8 +97,6 @@ class JudgmentClient(metaclass=SingletonMeta):
95
97
  project_name: str = "default_project",
96
98
  eval_run_name: str = "default_eval_trace",
97
99
  model: Optional[str] = DEFAULT_GPT_MODEL,
98
- append: bool = False,
99
- override: bool = False,
100
100
  ) -> List[ScoringResult]:
101
101
  try:
102
102
  if examples and not function:
@@ -114,12 +114,11 @@ class JudgmentClient(metaclass=SingletonMeta):
114
114
  traces=traces,
115
115
  scorers=scorers,
116
116
  model=model,
117
- append=append,
118
117
  organization_id=self.organization_id,
119
118
  tools=tools,
120
119
  )
121
120
  return run_trace_eval(
122
- trace_run, self.judgment_api_key, override, function, tracer, examples
121
+ trace_run, self.judgment_api_key, function, tracer, examples
123
122
  )
124
123
  except ValueError as e:
125
124
  raise ValueError(
@@ -135,8 +134,6 @@ class JudgmentClient(metaclass=SingletonMeta):
135
134
  model: Optional[str] = DEFAULT_GPT_MODEL,
136
135
  project_name: str = "default_project",
137
136
  eval_run_name: str = "default_eval_run",
138
- override: bool = False,
139
- append: bool = False,
140
137
  ) -> List[ScoringResult]:
141
138
  """
142
139
  Executes an evaluation of `Example`s using one or more `Scorer`s
@@ -147,21 +144,13 @@ class JudgmentClient(metaclass=SingletonMeta):
147
144
  model (str): The model used as a judge when using LLM as a Judge
148
145
  project_name (str): The name of the project the evaluation results belong to
149
146
  eval_run_name (str): A name for this evaluation run
150
- override (bool): Whether to override an existing evaluation run with the same name
151
- append (bool): Whether to append to an existing evaluation run with the same name
152
147
 
153
148
  Returns:
154
149
  List[ScoringResult]: The results of the evaluation
155
150
  """
156
- if override and append:
157
- raise ValueError(
158
- "Cannot set both override and append to True. Please choose one."
159
- )
160
151
 
161
152
  try:
162
153
  eval = EvaluationRun(
163
- append=append,
164
- override=override,
165
154
  project_name=project_name,
166
155
  eval_name=eval_run_name,
167
156
  examples=examples,
@@ -172,7 +161,6 @@ class JudgmentClient(metaclass=SingletonMeta):
172
161
  return run_eval(
173
162
  eval,
174
163
  self.judgment_api_key,
175
- override,
176
164
  )
177
165
  except ValueError as e:
178
166
  raise ValueError(
@@ -181,22 +169,6 @@ class JudgmentClient(metaclass=SingletonMeta):
181
169
  except Exception as e:
182
170
  raise Exception(f"An unexpected error occurred during evaluation: {str(e)}")
183
171
 
184
- def pull_eval(
185
- self, project_name: str, eval_run_name: str
186
- ) -> List[Dict[str, Union[str, List[ScoringResult]]]]:
187
- """Pull evaluation results from the server.
188
-
189
- Args:
190
- project_name (str): Name of the project
191
- eval_run_name (str): Name of the evaluation run
192
-
193
- Returns:
194
- Dict[str, Union[str, List[ScoringResult]]]: Dictionary containing:
195
- - id (str): The evaluation run ID
196
- - results (List[ScoringResult]): List of scoring results
197
- """
198
- return self.api_client.fetch_evaluation_results(project_name, eval_run_name)
199
-
200
172
  def create_project(self, project_name: str) -> bool:
201
173
  """
202
174
  Creates a project on the server.
@@ -222,8 +194,6 @@ class JudgmentClient(metaclass=SingletonMeta):
222
194
  model: Optional[str] = DEFAULT_GPT_MODEL,
223
195
  project_name: str = "default_test",
224
196
  eval_run_name: str = str(uuid4()),
225
- override: bool = False,
226
- append: bool = False,
227
197
  ) -> None:
228
198
  """
229
199
  Asserts a test by running the evaluation and checking the results for success
@@ -234,9 +204,6 @@ class JudgmentClient(metaclass=SingletonMeta):
234
204
  model (str): The model used as a judge when using LLM as a Judge
235
205
  project_name (str): The name of the project the evaluation results belong to
236
206
  eval_run_name (str): A name for this evaluation run
237
- override (bool): Whether to override an existing evaluation run with the same name
238
- append (bool): Whether to append to an existing evaluation run with the same name
239
- async_execution (bool): Whether to run the evaluation asynchronously
240
207
  """
241
208
 
242
209
  results: List[ScoringResult]
@@ -247,8 +214,6 @@ class JudgmentClient(metaclass=SingletonMeta):
247
214
  model=model,
248
215
  project_name=project_name,
249
216
  eval_run_name=eval_run_name,
250
- override=override,
251
- append=append,
252
217
  )
253
218
  assert_test(results)
254
219
 
@@ -263,9 +228,6 @@ class JudgmentClient(metaclass=SingletonMeta):
263
228
  model: Optional[str] = DEFAULT_GPT_MODEL,
264
229
  project_name: str = "default_test",
265
230
  eval_run_name: str = str(uuid4()),
266
- override: bool = False,
267
- append: bool = False,
268
- async_execution: bool = False,
269
231
  ) -> None:
270
232
  """
271
233
  Asserts a test by running the evaluation and checking the results for success
@@ -276,12 +238,9 @@ class JudgmentClient(metaclass=SingletonMeta):
276
238
  model (str): The model used as a judge when using LLM as a Judge
277
239
  project_name (str): The name of the project the evaluation results belong to
278
240
  eval_run_name (str): A name for this evaluation run
279
- override (bool): Whether to override an existing evaluation run with the same name
280
- append (bool): Whether to append to an existing evaluation run with the same name
281
241
  function (Optional[Callable]): A function to use for evaluation
282
242
  tracer (Optional[Union[Tracer, BaseCallbackHandler]]): A tracer to use for evaluation
283
243
  tools (Optional[List[Dict[str, Any]]]): A list of tools to use for evaluation
284
- async_execution (bool): Whether to run the evaluation asynchronously
285
244
  """
286
245
 
287
246
  # Check for enable_param_checking and tools
@@ -302,11 +261,107 @@ class JudgmentClient(metaclass=SingletonMeta):
302
261
  model=model,
303
262
  project_name=project_name,
304
263
  eval_run_name=eval_run_name,
305
- override=override,
306
- append=append,
307
264
  function=function,
308
265
  tracer=tracer,
309
266
  tools=tools,
310
267
  )
311
268
 
312
269
  assert_test(results)
270
+
271
+ def _extract_scorer_name(self, scorer_file_path: str) -> str:
272
+ """Extract scorer name from the scorer file by importing it."""
273
+ try:
274
+ spec = importlib.util.spec_from_file_location(
275
+ "scorer_module", scorer_file_path
276
+ )
277
+ if spec is None or spec.loader is None:
278
+ raise ImportError(f"Could not load spec from {scorer_file_path}")
279
+
280
+ module = importlib.util.module_from_spec(spec)
281
+ spec.loader.exec_module(module)
282
+
283
+ for attr_name in dir(module):
284
+ attr = getattr(module, attr_name)
285
+ if (
286
+ isinstance(attr, type)
287
+ and any("Scorer" in str(base) for base in attr.__mro__)
288
+ and attr.__module__ == "scorer_module"
289
+ ):
290
+ try:
291
+ # Instantiate the scorer and get its name
292
+ scorer_instance = attr()
293
+ if hasattr(scorer_instance, "name"):
294
+ return scorer_instance.name
295
+ except Exception:
296
+ # Skip if instantiation fails
297
+ continue
298
+
299
+ raise AttributeError("No scorer class found or could be instantiated")
300
+ except Exception as e:
301
+ judgeval_logger.warning(f"Could not extract scorer name: {e}")
302
+ return Path(scorer_file_path).stem
303
+
304
+ def save_custom_scorer(
305
+ self,
306
+ scorer_file_path: str,
307
+ requirements_file_path: Optional[str] = None,
308
+ unique_name: Optional[str] = None,
309
+ ) -> bool:
310
+ """
311
+ Upload custom ExampleScorer from files to backend.
312
+
313
+ Args:
314
+ scorer_file_path: Path to Python file containing CustomScorer class
315
+ requirements_file_path: Optional path to requirements.txt
316
+ unique_name: Optional unique identifier (auto-detected from scorer.name if not provided)
317
+
318
+ Returns:
319
+ bool: True if upload successful
320
+
321
+ Raises:
322
+ ValueError: If scorer file is invalid
323
+ FileNotFoundError: If scorer file doesn't exist
324
+ """
325
+ import os
326
+
327
+ if not os.path.exists(scorer_file_path):
328
+ raise FileNotFoundError(f"Scorer file not found: {scorer_file_path}")
329
+
330
+ # Auto-detect scorer name if not provided
331
+ if unique_name is None:
332
+ unique_name = self._extract_scorer_name(scorer_file_path)
333
+ judgeval_logger.info(f"Auto-detected scorer name: '{unique_name}'")
334
+
335
+ # Read scorer code
336
+ with open(scorer_file_path, "r") as f:
337
+ scorer_code = f.read()
338
+
339
+ # Read requirements (optional)
340
+ requirements_text = ""
341
+ if requirements_file_path and os.path.exists(requirements_file_path):
342
+ with open(requirements_file_path, "r") as f:
343
+ requirements_text = f.read()
344
+
345
+ # Upload to backend
346
+ judgeval_logger.info(
347
+ f"Uploading custom scorer: {unique_name}, this can take a couple of minutes..."
348
+ )
349
+ try:
350
+ response = self.api_client.upload_custom_scorer(
351
+ scorer_name=unique_name,
352
+ scorer_code=scorer_code,
353
+ requirements_text=requirements_text,
354
+ )
355
+
356
+ if response.get("status") == "success":
357
+ judgeval_logger.info(
358
+ f"Successfully uploaded custom scorer: {unique_name}"
359
+ )
360
+ return True
361
+ else:
362
+ judgeval_logger.error(f"Failed to upload custom scorer: {unique_name}")
363
+ return False
364
+
365
+ except Exception as e:
366
+ judgeval_logger.error(f"Error uploading custom scorer: {e}")
367
+ raise
@@ -13,9 +13,8 @@ import time
13
13
  from judgeval.common.logger import judgeval_logger
14
14
  from judgeval.constants import MAX_CONCURRENT_EVALUATIONS
15
15
  from judgeval.data import ScoringResult
16
- from judgeval.evaluation_run import EvaluationRun
16
+ from judgeval.data.evaluation_run import EvaluationRun
17
17
  from judgeval.utils.async_utils import safe_run_async
18
- from judgeval.scorers import BaseScorer
19
18
  from judgeval.scorers.score import a_execute_scoring
20
19
 
21
20
 
@@ -43,9 +42,8 @@ class LocalEvaluationQueue:
43
42
 
44
43
  def _process_run(self, evaluation_run: EvaluationRun) -> List[ScoringResult]:
45
44
  """Execute evaluation run locally and return results."""
46
- local_scorers = [s for s in evaluation_run.scorers if isinstance(s, BaseScorer)]
47
45
 
48
- if not local_scorers:
46
+ if not evaluation_run.custom_scorers:
49
47
  raise ValueError(
50
48
  "LocalEvaluationQueue only supports runs with local scorers (BaseScorer). "
51
49
  "Found only APIScorerConfig instances."
@@ -54,7 +52,7 @@ class LocalEvaluationQueue:
54
52
  return safe_run_async(
55
53
  a_execute_scoring(
56
54
  evaluation_run.examples,
57
- local_scorers,
55
+ evaluation_run.custom_scorers,
58
56
  model=evaluation_run.model,
59
57
  throttle_value=0,
60
58
  max_concurrent=self._max_concurrent // self._num_workers,