langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512040805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. langfun/__init__.py +1 -1
  2. langfun/core/__init__.py +7 -1
  3. langfun/core/agentic/__init__.py +8 -1
  4. langfun/core/agentic/action.py +740 -112
  5. langfun/core/agentic/action_eval.py +9 -2
  6. langfun/core/agentic/action_test.py +189 -24
  7. langfun/core/async_support.py +104 -5
  8. langfun/core/async_support_test.py +23 -0
  9. langfun/core/coding/python/correction.py +19 -9
  10. langfun/core/coding/python/execution.py +14 -12
  11. langfun/core/coding/python/generation.py +21 -16
  12. langfun/core/coding/python/sandboxing.py +23 -3
  13. langfun/core/component.py +42 -3
  14. langfun/core/concurrent.py +70 -6
  15. langfun/core/concurrent_test.py +9 -2
  16. langfun/core/console.py +1 -1
  17. langfun/core/data/conversion/anthropic.py +12 -3
  18. langfun/core/data/conversion/anthropic_test.py +8 -6
  19. langfun/core/data/conversion/gemini.py +11 -2
  20. langfun/core/data/conversion/gemini_test.py +48 -9
  21. langfun/core/data/conversion/openai.py +145 -31
  22. langfun/core/data/conversion/openai_test.py +161 -17
  23. langfun/core/eval/base.py +48 -44
  24. langfun/core/eval/base_test.py +5 -5
  25. langfun/core/eval/matching.py +5 -2
  26. langfun/core/eval/patching.py +3 -3
  27. langfun/core/eval/scoring.py +4 -3
  28. langfun/core/eval/v2/__init__.py +2 -0
  29. langfun/core/eval/v2/checkpointing.py +76 -7
  30. langfun/core/eval/v2/checkpointing_test.py +9 -2
  31. langfun/core/eval/v2/config_saver.py +37 -0
  32. langfun/core/eval/v2/config_saver_test.py +36 -0
  33. langfun/core/eval/v2/eval_test_helper.py +104 -3
  34. langfun/core/eval/v2/evaluation.py +92 -17
  35. langfun/core/eval/v2/evaluation_test.py +9 -3
  36. langfun/core/eval/v2/example.py +50 -40
  37. langfun/core/eval/v2/example_test.py +16 -8
  38. langfun/core/eval/v2/experiment.py +84 -15
  39. langfun/core/eval/v2/experiment_test.py +19 -0
  40. langfun/core/eval/v2/metric_values.py +31 -3
  41. langfun/core/eval/v2/metric_values_test.py +32 -0
  42. langfun/core/eval/v2/metrics.py +157 -44
  43. langfun/core/eval/v2/metrics_test.py +39 -18
  44. langfun/core/eval/v2/progress.py +31 -1
  45. langfun/core/eval/v2/progress_test.py +27 -0
  46. langfun/core/eval/v2/progress_tracking.py +13 -5
  47. langfun/core/eval/v2/progress_tracking_test.py +9 -1
  48. langfun/core/eval/v2/reporting.py +90 -71
  49. langfun/core/eval/v2/reporting_test.py +24 -6
  50. langfun/core/eval/v2/runners/__init__.py +30 -0
  51. langfun/core/eval/v2/{runners.py → runners/base.py} +72 -180
  52. langfun/core/eval/v2/runners/beam.py +354 -0
  53. langfun/core/eval/v2/runners/beam_test.py +153 -0
  54. langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
  55. langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
  56. langfun/core/eval/v2/runners/debug.py +40 -0
  57. langfun/core/eval/v2/runners/debug_test.py +76 -0
  58. langfun/core/eval/v2/runners/parallel.py +243 -0
  59. langfun/core/eval/v2/runners/parallel_test.py +182 -0
  60. langfun/core/eval/v2/runners/sequential.py +47 -0
  61. langfun/core/eval/v2/runners/sequential_test.py +169 -0
  62. langfun/core/langfunc.py +45 -130
  63. langfun/core/langfunc_test.py +7 -5
  64. langfun/core/language_model.py +189 -36
  65. langfun/core/language_model_test.py +54 -3
  66. langfun/core/llms/__init__.py +12 -1
  67. langfun/core/llms/anthropic.py +157 -2
  68. langfun/core/llms/azure_openai.py +29 -17
  69. langfun/core/llms/cache/base.py +25 -3
  70. langfun/core/llms/cache/in_memory.py +48 -7
  71. langfun/core/llms/cache/in_memory_test.py +14 -4
  72. langfun/core/llms/compositional.py +25 -1
  73. langfun/core/llms/deepseek.py +30 -2
  74. langfun/core/llms/fake.py +32 -1
  75. langfun/core/llms/gemini.py +64 -12
  76. langfun/core/llms/gemini_test.py +110 -0
  77. langfun/core/llms/google_genai.py +34 -1
  78. langfun/core/llms/groq.py +28 -3
  79. langfun/core/llms/llama_cpp.py +23 -4
  80. langfun/core/llms/openai.py +120 -3
  81. langfun/core/llms/openai_compatible.py +148 -27
  82. langfun/core/llms/openai_compatible_test.py +207 -20
  83. langfun/core/llms/openai_test.py +0 -2
  84. langfun/core/llms/rest.py +16 -1
  85. langfun/core/llms/vertexai.py +58 -8
  86. langfun/core/logging.py +1 -1
  87. langfun/core/mcp/__init__.py +10 -0
  88. langfun/core/mcp/client.py +177 -0
  89. langfun/core/mcp/client_test.py +71 -0
  90. langfun/core/mcp/session.py +241 -0
  91. langfun/core/mcp/session_test.py +54 -0
  92. langfun/core/mcp/testing/simple_mcp_client.py +33 -0
  93. langfun/core/mcp/testing/simple_mcp_server.py +33 -0
  94. langfun/core/mcp/tool.py +254 -0
  95. langfun/core/mcp/tool_test.py +197 -0
  96. langfun/core/memory.py +1 -0
  97. langfun/core/message.py +160 -55
  98. langfun/core/message_test.py +65 -81
  99. langfun/core/modalities/__init__.py +8 -0
  100. langfun/core/modalities/audio.py +21 -1
  101. langfun/core/modalities/image.py +73 -3
  102. langfun/core/modalities/image_test.py +116 -0
  103. langfun/core/modalities/mime.py +64 -3
  104. langfun/core/modalities/mime_test.py +11 -0
  105. langfun/core/modalities/pdf.py +19 -1
  106. langfun/core/modalities/video.py +21 -1
  107. langfun/core/modality.py +167 -29
  108. langfun/core/modality_test.py +42 -12
  109. langfun/core/natural_language.py +1 -1
  110. langfun/core/sampling.py +4 -4
  111. langfun/core/sampling_test.py +20 -4
  112. langfun/core/structured/__init__.py +2 -24
  113. langfun/core/structured/completion.py +34 -44
  114. langfun/core/structured/completion_test.py +23 -43
  115. langfun/core/structured/description.py +54 -50
  116. langfun/core/structured/function_generation.py +29 -12
  117. langfun/core/structured/mapping.py +81 -37
  118. langfun/core/structured/parsing.py +95 -79
  119. langfun/core/structured/parsing_test.py +0 -3
  120. langfun/core/structured/querying.py +230 -154
  121. langfun/core/structured/querying_test.py +69 -33
  122. langfun/core/structured/schema/__init__.py +49 -0
  123. langfun/core/structured/schema/base.py +664 -0
  124. langfun/core/structured/schema/base_test.py +531 -0
  125. langfun/core/structured/schema/json.py +174 -0
  126. langfun/core/structured/schema/json_test.py +121 -0
  127. langfun/core/structured/schema/python.py +316 -0
  128. langfun/core/structured/schema/python_test.py +410 -0
  129. langfun/core/structured/schema_generation.py +33 -14
  130. langfun/core/structured/scoring.py +47 -36
  131. langfun/core/structured/tokenization.py +26 -11
  132. langfun/core/subscription.py +2 -2
  133. langfun/core/template.py +175 -50
  134. langfun/core/template_test.py +123 -17
  135. langfun/env/__init__.py +43 -0
  136. langfun/env/base_environment.py +827 -0
  137. langfun/env/base_environment_test.py +473 -0
  138. langfun/env/base_feature.py +304 -0
  139. langfun/env/base_feature_test.py +228 -0
  140. langfun/env/base_sandbox.py +842 -0
  141. langfun/env/base_sandbox_test.py +1235 -0
  142. langfun/env/event_handlers/__init__.py +14 -0
  143. langfun/env/event_handlers/chain.py +233 -0
  144. langfun/env/event_handlers/chain_test.py +253 -0
  145. langfun/env/event_handlers/event_logger.py +472 -0
  146. langfun/env/event_handlers/event_logger_test.py +304 -0
  147. langfun/env/event_handlers/metric_writer.py +726 -0
  148. langfun/env/event_handlers/metric_writer_test.py +214 -0
  149. langfun/env/interface.py +1640 -0
  150. langfun/env/interface_test.py +153 -0
  151. langfun/env/load_balancers.py +59 -0
  152. langfun/env/load_balancers_test.py +141 -0
  153. langfun/env/test_utils.py +507 -0
  154. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/METADATA +7 -3
  155. langfun-0.1.2.dev202512040805.dist-info/RECORD +217 -0
  156. langfun/core/eval/v2/runners_test.py +0 -343
  157. langfun/core/structured/schema.py +0 -987
  158. langfun/core/structured/schema_test.py +0 -982
  159. langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
  160. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/WHEEL +0 -0
  161. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/licenses/LICENSE +0 -0
  162. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/top_level.txt +0 -0
@@ -11,18 +11,18 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- """Evaluation experiment runners."""
14
+ """Base experiment runner."""
15
+
15
16
  import abc
16
- import collections
17
17
  import concurrent.futures
18
18
  import random
19
19
  import threading
20
- import time
21
20
  import traceback
22
- from typing import Any, Annotated, Callable, Iterator
21
+ from typing import Any, Annotated, Callable, Iterator, Literal
23
22
 
24
23
  from langfun import core as lf
25
24
  from langfun.core.eval.v2 import checkpointing
25
+ from langfun.core.eval.v2 import config_saver
26
26
  from langfun.core.eval.v2 import evaluation as evaluation_lib
27
27
  from langfun.core.eval.v2 import example as example_lib
28
28
  from langfun.core.eval.v2 import experiment as experiment_lib
@@ -38,35 +38,57 @@ Experiment = experiment_lib.Experiment
38
38
  Plugin = experiment_lib.Plugin
39
39
 
40
40
 
41
- _RUN_MANIFEST = 'run.json'
42
-
43
-
44
41
  class RunnerBase(Runner):
45
- """A simple runner that runs evaluations and their examples sequentially."""
42
+ """Base class for runners with plugin support and IO pooling.
43
+
44
+ `RunnerBase` provides the basic runner functionalities such as plugin
45
+ integration for checkpointing, reporting and progress tracking.
46
+ It also manages a thread pool for background IO operations.
47
+ Subclasses should implement `_run` and `_evaluate_items` for different
48
+ execution strategies.
49
+ """
46
50
 
47
- tqdm: Annotated[
48
- bool,
51
+ progress_tracker: Annotated[
52
+ Literal['tqdm', 'html', 'auto', None],
49
53
  (
50
- 'If True, force using tqdm for progress update. Otherwise, determine '
51
- 'it automatically based on the running environment (console vs. '
52
- 'notebook)'
54
+ 'If `tqdm`, force using tqdm for progress update. '
55
+ 'If `html`, force using html for progress update. '
56
+ 'If `auto`, determine it automatically based on the running '
57
+ 'environment (console vs. notebook)'
58
+ 'If `none`, disable progress update.'
53
59
  )
54
- ] = False
60
+ ] = 'auto'
55
61
 
56
62
  plugins = [
57
63
  checkpointing.BulkCheckpointer(),
58
64
  reporting.HtmlReporter(),
65
+ config_saver.RunConfigSaver(),
59
66
  ]
60
67
 
68
+ max_background_threads: Annotated[
69
+ int,
70
+ 'Max number of background threads for IO operations.'
71
+ ] = 128
72
+
61
73
  def _on_bound(self):
62
74
  super()._on_bound()
63
75
 
64
76
  # Install the tqdm plugin if needed.
65
- with pg.notify_on_change(False):
66
- self.plugins.append(progress_tracking.progress_tracker(self.tqdm))
77
+ if self.progress_tracker is not None:
78
+ with pg.notify_on_change(False):
79
+ self.plugins.append(
80
+ progress_tracking.progress_tracker(self.progress_tracker)
81
+ )
82
+
83
+ if self.max_background_threads > 0:
84
+ self._io_pool_lock = threading.Lock()
85
+ self._io_pool = concurrent.futures.ThreadPoolExecutor(
86
+ max_workers=self.max_background_threads
87
+ )
88
+ else:
89
+ self._io_pool_lock = None
90
+ self._io_pool = None
67
91
 
68
- self._io_pool_lock = threading.Lock()
69
- self._io_pool = concurrent.futures.ThreadPoolExecutor(max_workers=16)
70
92
  # TODO(daiyip): render background errors.
71
93
  self._background_last_error = None
72
94
 
@@ -78,9 +100,12 @@ class RunnerBase(Runner):
78
100
  except Exception as e: # pylint: disable=broad-except
79
101
  self._background_last_error = e
80
102
 
81
- with self._io_pool_lock:
82
- if self._io_pool is not None:
83
- self._io_pool.submit(_background_run, *args, **kwargs)
103
+ if self.max_background_threads > 0:
104
+ with self._io_pool_lock:
105
+ if self._io_pool is not None:
106
+ self._io_pool.submit(_background_run, *args, **kwargs)
107
+ else:
108
+ _background_run(*args, **kwargs)
84
109
 
85
110
  def _all_plugins(self, experiment: Experiment) -> Iterator[Plugin]:
86
111
  """Returns all plugins for the experiment."""
@@ -89,24 +114,8 @@ class RunnerBase(Runner):
89
114
  for plugin in experiment.plugins:
90
115
  yield plugin
91
116
 
92
- #
93
- # IO operations for saving running files.
94
- #
95
-
96
- def _save_run_manifest(self) -> None:
97
- def _save():
98
- pg.symbolic.deref(self.current_run.clone(), recursive=True).save(
99
- self.current_run.output_path_for(
100
- self.current_run.experiment, _RUN_MANIFEST
101
- ),
102
- hide_default_values=True
103
- )
104
- self.background_run(_save)
105
-
106
117
  def on_run_start(self) -> None:
107
118
  """Called when a runner is started."""
108
- self._save_run_manifest()
109
-
110
119
  for plugin in self._all_plugins(self.current_run.experiment):
111
120
  plugin.on_run_start(self, self.current_run.experiment)
112
121
 
@@ -126,9 +135,8 @@ class RunnerBase(Runner):
126
135
  num_examples_to_evaluate = 0
127
136
  if experiment.is_leaf:
128
137
  assert isinstance(experiment, Evaluation)
129
- num_examples_to_evaluate = (
130
- len(self.current_run.example_ids)
131
- if self.current_run.example_ids else experiment.num_examples
138
+ num_examples_to_evaluate = len(
139
+ self.current_run.examples_to_evaluate(experiment)
132
140
  )
133
141
  experiment.progress.start(total=num_examples_to_evaluate)
134
142
  else:
@@ -139,6 +147,7 @@ class RunnerBase(Runner):
139
147
  plugin.on_experiment_start(self, experiment)
140
148
 
141
149
  if experiment.is_leaf:
150
+ pg.io.mkdirs(self.current_run.output_dir(experiment))
142
151
  experiment.info(
143
152
  f'Starting evaluation {experiment.id!r} with '
144
153
  f'{num_examples_to_evaluate} examples to evaluate.'
@@ -180,10 +189,7 @@ class RunnerBase(Runner):
180
189
  self._log_experiment_completion(experiment)
181
190
 
182
191
  def _log_experiment_completion(self, experiment: Experiment):
183
- example_ids = (
184
- self.current_run.example_ids if self.current_run.example_ids else
185
- list(range(1, experiment.num_examples + 1))
186
- )
192
+ example_ids = sorted(self.current_run.examples_to_evaluate(experiment))
187
193
  num_from_checkpoint, num_processed = 0, 0
188
194
  for example_id in example_ids:
189
195
  status = experiment.state.get_status(example_id)
@@ -220,7 +226,7 @@ class RunnerBase(Runner):
220
226
  else:
221
227
  # A evaluation could be considered as done if it has processed all the
222
228
  # examples specified by `example_ids`.
223
- assert progress.is_completed
229
+ assert progress.is_completed, progress
224
230
  parent_progress.increment_processed()
225
231
 
226
232
  if parent_progress.is_completed:
@@ -235,6 +241,8 @@ class RunnerBase(Runner):
235
241
  example: Example
236
242
  ) -> None:
237
243
  """Called when an evaluation example is started."""
244
+ assert isinstance(experiment, Evaluation), experiment
245
+ experiment.state.update(example, in_progress=True)
238
246
  for plugin in self._all_plugins(experiment):
239
247
  plugin.on_example_start(self, experiment, example)
240
248
  experiment.info(f'Starting to evaluate example {example.id}.')
@@ -245,6 +253,8 @@ class RunnerBase(Runner):
245
253
  example: Example
246
254
  ) -> None:
247
255
  """Called when an evaluation example is complete."""
256
+ assert isinstance(experiment, Evaluation), experiment
257
+ experiment.state.update(example, in_progress=False)
248
258
  if example.newly_processed:
249
259
  if example.error is None:
250
260
  experiment.progress.increment_processed()
@@ -256,7 +266,7 @@ class RunnerBase(Runner):
256
266
  experiment.progress.increment_failed()
257
267
  experiment.error(
258
268
  (
259
- f'Failed to evaluate example {example.id} in'
269
+ f'Failed to evaluate example {example.id} in '
260
270
  f'{example.elapse:.2f} seconds.'
261
271
  ),
262
272
  error=example.error
@@ -316,7 +326,7 @@ class RunnerBase(Runner):
316
326
  self._run(targets)
317
327
 
318
328
  self.on_run_complete()
319
- except Exception as e: # pylint: disable=broad-except
329
+ except BaseException as e: # pylint: disable=broad-except
320
330
  self.on_run_abort(e)
321
331
  raise e
322
332
  finally:
@@ -324,9 +334,10 @@ class RunnerBase(Runner):
324
334
  self.background_run(cache.save)
325
335
 
326
336
  # Wait for the background tasks to finish.
327
- with self._io_pool_lock:
328
- self._io_pool, io_pool = None, self._io_pool
329
- io_pool.shutdown(wait=True)
337
+ if self.max_background_threads > 0:
338
+ with self._io_pool_lock:
339
+ self._io_pool, io_pool = None, self._io_pool
340
+ io_pool.shutdown(wait=True)
330
341
 
331
342
  @abc.abstractmethod
332
343
  def _run(self, evaluations: list[Evaluation]) -> None:
@@ -335,6 +346,7 @@ class RunnerBase(Runner):
335
346
  def run_evaluation(self, evaluation: Evaluation) -> None:
336
347
  """Runs the evaluation."""
337
348
  try:
349
+ evaluation.setup()
338
350
  self.on_experiment_start(evaluation)
339
351
 
340
352
  per_evaluation_settings = {}
@@ -344,18 +356,14 @@ class RunnerBase(Runner):
344
356
  per_evaluation_settings['cache'] = cache
345
357
 
346
358
  with lf.use_settings(**per_evaluation_settings):
347
- if self.current_run.example_ids is None:
348
- items = (
349
- Example(id=i + 1, input=ex) for i, ex in enumerate(
350
- evaluation.example_inputs)
351
- )
352
- else:
353
- items = (
354
- Example(
355
- id=example_id,
356
- input=evaluation.example_input_by_id(example_id)
357
- ) for example_id in self.current_run.example_ids
358
- )
359
+ items = (
360
+ Example(
361
+ id=example_id,
362
+ input=evaluation.example_input_by_id(example_id)
363
+ ) for example_id in sorted(
364
+ self.current_run.examples_to_evaluate(evaluation)
365
+ )
366
+ )
359
367
  if self.current_run.shuffle_inputs:
360
368
  items = list(items)
361
369
  random.shuffle(items)
@@ -367,6 +375,8 @@ class RunnerBase(Runner):
367
375
  except BaseException as e: # pylint: disable=broad-except
368
376
  self.on_experiment_abort(evaluation, e)
369
377
  raise e
378
+ finally:
379
+ evaluation.teardown()
370
380
 
371
381
  @abc.abstractmethod
372
382
  def _evaluate_items(
@@ -394,121 +404,3 @@ class RunnerBase(Runner):
394
404
  return in_memory.InMemory(
395
405
  self.current_run.output_path_for(experiment, 'cache.json')
396
406
  )
397
-
398
-
399
- class SequentialRunner(RunnerBase):
400
- """Sequential runner.
401
-
402
- Sequential runner runs all evaluations and their examples in sequence,
403
- as well as the background tasks, it allows the developer to catch all
404
- exceptions thrown from the background tasks, making it easier to debug.
405
- """
406
-
407
- NAME = 'sequential'
408
-
409
- def background_run(
410
- self, func: Callable[..., Any], *args: Any, **kwargs: Any
411
- ) -> None:
412
- """Runs the function with the IO pool."""
413
- func(*args, **kwargs)
414
-
415
- def _run(self, evaluations: list[Evaluation]) -> None:
416
- """Runs the experiment in sequence."""
417
- for e in evaluations:
418
- self.run_evaluation(e)
419
-
420
- def _evaluate_items(
421
- self, evaluation: Evaluation, items: Iterator[Example]
422
- ) -> None:
423
- """Runs the evaluation items in sequence."""
424
- for item in items:
425
- self.evaluate_item(evaluation, item)
426
-
427
-
428
- class DebugRunner(SequentialRunner):
429
- """Debug runner."""
430
-
431
- NAME = 'debug'
432
-
433
- # Do not use the checkpointer for debug runner.
434
- plugins = []
435
-
436
- def _on_bound(self):
437
- super()._on_bound()
438
- if self.current_run.example_ids is None:
439
- self.current_run.rebind(example_ids=[1], skip_notification=True)
440
- self.current_run.rebind(raise_if_has_error=True, skip_notification=True)
441
-
442
- def _save_run_manifest(self) -> None:
443
- """Do nothing to avoid overriden existing runs."""
444
-
445
-
446
- class ParallelRunner(RunnerBase):
447
- """Parallel runner."""
448
-
449
- NAME = 'parallel'
450
-
451
- timeout: Annotated[
452
- int | None,
453
- 'Timeout for each evaluation example.'
454
- ] = None
455
-
456
- concurrent_startup_delay: Annotated[
457
- tuple[int, int] | None,
458
- (
459
- 'A range of seconds to delay the initial evaluation of each thread '
460
- 'in the thread pool, helping to prevent a burst in LLM QPS at '
461
- 'startup. If set to None, no delay will be applied.'
462
- )
463
- ] = None
464
-
465
- def _run(self, evaluations: list[Evaluation]) -> None:
466
- """Runs the evaluations in parallel."""
467
- def _run_group(evaluation_group: list[Evaluation]):
468
- for e in evaluation_group:
469
- self.run_evaluation(e)
470
-
471
- # Run evaluations in parallel groupped by resource key.
472
- groups: dict[str, list[Evaluation]] = collections.defaultdict(list)
473
- for e in evaluations:
474
- resource_ids = e.resource_ids()
475
- if not resource_ids:
476
- group_id = e.id
477
- else:
478
- # TODO(daiyip): support group that requires multiple resources.
479
- group_id = resource_ids.pop()
480
- groups[group_id].append(e)
481
-
482
- for _, _, _ in lf.concurrent_map(
483
- _run_group,
484
- groups.values(),
485
- max_workers=max(64, len(groups)),
486
- timeout=self.timeout,
487
- silence_on_errors=None,
488
- ):
489
- pass
490
-
491
- def _evaluate_items(
492
- self, evaluation: Evaluation, items: Iterator[Example]
493
- ) -> None:
494
- """Override run items to run in parallel."""
495
- if self.concurrent_startup_delay is not None:
496
- thread_delayed = {}
497
- def _evaluate_item(item: Example):
498
- thread_id = threading.current_thread().ident
499
- if thread_id not in thread_delayed:
500
- thread_delayed[thread_id] = True
501
- time.sleep(random.randint(*self.concurrent_startup_delay))
502
- return self.evaluate_item(evaluation, item)
503
- else:
504
- def _evaluate_item(item: Example):
505
- return self.evaluate_item(evaluation, item)
506
-
507
- for _, _, _ in lf.concurrent_map(
508
- _evaluate_item,
509
- items,
510
- max_workers=evaluation.max_workers,
511
- timeout=self.timeout,
512
- silence_on_errors=None,
513
- ):
514
- pass