langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. langfun/__init__.py +1 -1
  2. langfun/core/__init__.py +7 -1
  3. langfun/core/agentic/__init__.py +8 -1
  4. langfun/core/agentic/action.py +740 -112
  5. langfun/core/agentic/action_eval.py +9 -2
  6. langfun/core/agentic/action_test.py +189 -24
  7. langfun/core/async_support.py +104 -5
  8. langfun/core/async_support_test.py +23 -0
  9. langfun/core/coding/python/correction.py +19 -9
  10. langfun/core/coding/python/execution.py +14 -12
  11. langfun/core/coding/python/generation.py +21 -16
  12. langfun/core/coding/python/sandboxing.py +23 -3
  13. langfun/core/component.py +42 -3
  14. langfun/core/concurrent.py +70 -6
  15. langfun/core/concurrent_test.py +9 -2
  16. langfun/core/console.py +1 -1
  17. langfun/core/data/conversion/anthropic.py +12 -3
  18. langfun/core/data/conversion/anthropic_test.py +8 -6
  19. langfun/core/data/conversion/gemini.py +11 -2
  20. langfun/core/data/conversion/gemini_test.py +48 -9
  21. langfun/core/data/conversion/openai.py +145 -31
  22. langfun/core/data/conversion/openai_test.py +161 -17
  23. langfun/core/eval/base.py +48 -44
  24. langfun/core/eval/base_test.py +5 -5
  25. langfun/core/eval/matching.py +5 -2
  26. langfun/core/eval/patching.py +3 -3
  27. langfun/core/eval/scoring.py +4 -3
  28. langfun/core/eval/v2/__init__.py +3 -0
  29. langfun/core/eval/v2/checkpointing.py +148 -46
  30. langfun/core/eval/v2/checkpointing_test.py +9 -2
  31. langfun/core/eval/v2/config_saver.py +37 -0
  32. langfun/core/eval/v2/config_saver_test.py +36 -0
  33. langfun/core/eval/v2/eval_test_helper.py +104 -3
  34. langfun/core/eval/v2/evaluation.py +102 -19
  35. langfun/core/eval/v2/evaluation_test.py +9 -3
  36. langfun/core/eval/v2/example.py +50 -40
  37. langfun/core/eval/v2/example_test.py +16 -8
  38. langfun/core/eval/v2/experiment.py +95 -20
  39. langfun/core/eval/v2/experiment_test.py +19 -0
  40. langfun/core/eval/v2/metric_values.py +31 -3
  41. langfun/core/eval/v2/metric_values_test.py +32 -0
  42. langfun/core/eval/v2/metrics.py +157 -44
  43. langfun/core/eval/v2/metrics_test.py +39 -18
  44. langfun/core/eval/v2/progress.py +31 -1
  45. langfun/core/eval/v2/progress_test.py +27 -0
  46. langfun/core/eval/v2/progress_tracking.py +13 -5
  47. langfun/core/eval/v2/progress_tracking_test.py +9 -1
  48. langfun/core/eval/v2/reporting.py +88 -71
  49. langfun/core/eval/v2/reporting_test.py +24 -6
  50. langfun/core/eval/v2/runners/__init__.py +30 -0
  51. langfun/core/eval/v2/{runners.py → runners/base.py} +73 -180
  52. langfun/core/eval/v2/runners/beam.py +354 -0
  53. langfun/core/eval/v2/runners/beam_test.py +153 -0
  54. langfun/core/eval/v2/runners/ckpt_monitor.py +350 -0
  55. langfun/core/eval/v2/runners/ckpt_monitor_test.py +213 -0
  56. langfun/core/eval/v2/runners/debug.py +40 -0
  57. langfun/core/eval/v2/runners/debug_test.py +76 -0
  58. langfun/core/eval/v2/runners/parallel.py +243 -0
  59. langfun/core/eval/v2/runners/parallel_test.py +182 -0
  60. langfun/core/eval/v2/runners/sequential.py +47 -0
  61. langfun/core/eval/v2/runners/sequential_test.py +169 -0
  62. langfun/core/langfunc.py +45 -130
  63. langfun/core/langfunc_test.py +7 -5
  64. langfun/core/language_model.py +189 -36
  65. langfun/core/language_model_test.py +54 -3
  66. langfun/core/llms/__init__.py +14 -1
  67. langfun/core/llms/anthropic.py +157 -2
  68. langfun/core/llms/azure_openai.py +29 -17
  69. langfun/core/llms/cache/base.py +25 -3
  70. langfun/core/llms/cache/in_memory.py +48 -7
  71. langfun/core/llms/cache/in_memory_test.py +14 -4
  72. langfun/core/llms/compositional.py +25 -1
  73. langfun/core/llms/deepseek.py +30 -2
  74. langfun/core/llms/fake.py +32 -1
  75. langfun/core/llms/gemini.py +90 -12
  76. langfun/core/llms/gemini_test.py +110 -0
  77. langfun/core/llms/google_genai.py +52 -1
  78. langfun/core/llms/groq.py +28 -3
  79. langfun/core/llms/llama_cpp.py +23 -4
  80. langfun/core/llms/openai.py +120 -3
  81. langfun/core/llms/openai_compatible.py +148 -27
  82. langfun/core/llms/openai_compatible_test.py +207 -20
  83. langfun/core/llms/openai_test.py +0 -2
  84. langfun/core/llms/rest.py +16 -1
  85. langfun/core/llms/vertexai.py +78 -8
  86. langfun/core/logging.py +1 -1
  87. langfun/core/mcp/__init__.py +10 -0
  88. langfun/core/mcp/client.py +177 -0
  89. langfun/core/mcp/client_test.py +71 -0
  90. langfun/core/mcp/session.py +241 -0
  91. langfun/core/mcp/session_test.py +54 -0
  92. langfun/core/mcp/testing/simple_mcp_client.py +33 -0
  93. langfun/core/mcp/testing/simple_mcp_server.py +33 -0
  94. langfun/core/mcp/tool.py +254 -0
  95. langfun/core/mcp/tool_test.py +197 -0
  96. langfun/core/memory.py +1 -0
  97. langfun/core/message.py +160 -55
  98. langfun/core/message_test.py +65 -81
  99. langfun/core/modalities/__init__.py +8 -0
  100. langfun/core/modalities/audio.py +21 -1
  101. langfun/core/modalities/image.py +73 -3
  102. langfun/core/modalities/image_test.py +116 -0
  103. langfun/core/modalities/mime.py +78 -4
  104. langfun/core/modalities/mime_test.py +59 -0
  105. langfun/core/modalities/pdf.py +19 -1
  106. langfun/core/modalities/video.py +21 -1
  107. langfun/core/modality.py +167 -29
  108. langfun/core/modality_test.py +42 -12
  109. langfun/core/natural_language.py +1 -1
  110. langfun/core/sampling.py +4 -4
  111. langfun/core/sampling_test.py +20 -4
  112. langfun/core/structured/__init__.py +2 -24
  113. langfun/core/structured/completion.py +34 -44
  114. langfun/core/structured/completion_test.py +23 -43
  115. langfun/core/structured/description.py +54 -50
  116. langfun/core/structured/function_generation.py +29 -12
  117. langfun/core/structured/mapping.py +81 -37
  118. langfun/core/structured/parsing.py +95 -79
  119. langfun/core/structured/parsing_test.py +0 -3
  120. langfun/core/structured/querying.py +230 -154
  121. langfun/core/structured/querying_test.py +69 -33
  122. langfun/core/structured/schema/__init__.py +49 -0
  123. langfun/core/structured/schema/base.py +664 -0
  124. langfun/core/structured/schema/base_test.py +531 -0
  125. langfun/core/structured/schema/json.py +174 -0
  126. langfun/core/structured/schema/json_test.py +121 -0
  127. langfun/core/structured/schema/python.py +316 -0
  128. langfun/core/structured/schema/python_test.py +410 -0
  129. langfun/core/structured/schema_generation.py +33 -14
  130. langfun/core/structured/scoring.py +47 -36
  131. langfun/core/structured/tokenization.py +26 -11
  132. langfun/core/subscription.py +2 -2
  133. langfun/core/template.py +175 -50
  134. langfun/core/template_test.py +123 -17
  135. langfun/env/__init__.py +43 -0
  136. langfun/env/base_environment.py +827 -0
  137. langfun/env/base_environment_test.py +473 -0
  138. langfun/env/base_feature.py +304 -0
  139. langfun/env/base_feature_test.py +228 -0
  140. langfun/env/base_sandbox.py +842 -0
  141. langfun/env/base_sandbox_test.py +1235 -0
  142. langfun/env/event_handlers/__init__.py +14 -0
  143. langfun/env/event_handlers/chain.py +233 -0
  144. langfun/env/event_handlers/chain_test.py +253 -0
  145. langfun/env/event_handlers/event_logger.py +472 -0
  146. langfun/env/event_handlers/event_logger_test.py +304 -0
  147. langfun/env/event_handlers/metric_writer.py +726 -0
  148. langfun/env/event_handlers/metric_writer_test.py +214 -0
  149. langfun/env/interface.py +1640 -0
  150. langfun/env/interface_test.py +153 -0
  151. langfun/env/load_balancers.py +59 -0
  152. langfun/env/load_balancers_test.py +141 -0
  153. langfun/env/test_utils.py +507 -0
  154. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/METADATA +7 -3
  155. langfun-0.1.2.dev202512150805.dist-info/RECORD +217 -0
  156. langfun/core/eval/v2/runners_test.py +0 -343
  157. langfun/core/structured/schema.py +0 -987
  158. langfun/core/structured/schema_test.py +0 -982
  159. langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
  160. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/WHEEL +0 -0
  161. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/licenses/LICENSE +0 -0
  162. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/top_level.txt +0 -0
@@ -11,18 +11,18 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- """Evaluation experiment runners."""
14
+ """Base experiment runner."""
15
+
15
16
  import abc
16
- import collections
17
17
  import concurrent.futures
18
18
  import random
19
19
  import threading
20
- import time
21
20
  import traceback
22
- from typing import Any, Annotated, Callable, Iterator
21
+ from typing import Any, Annotated, Callable, Iterator, Literal
23
22
 
24
23
  from langfun import core as lf
25
24
  from langfun.core.eval.v2 import checkpointing
25
+ from langfun.core.eval.v2 import config_saver
26
26
  from langfun.core.eval.v2 import evaluation as evaluation_lib
27
27
  from langfun.core.eval.v2 import example as example_lib
28
28
  from langfun.core.eval.v2 import experiment as experiment_lib
@@ -38,35 +38,57 @@ Experiment = experiment_lib.Experiment
38
38
  Plugin = experiment_lib.Plugin
39
39
 
40
40
 
41
- _RUN_MANIFEST = 'run.json'
42
-
43
-
44
41
  class RunnerBase(Runner):
45
- """A simple runner that runs evaluations and their examples sequentially."""
42
+ """Base class for runners with plugin support and IO pooling.
43
+
44
+ `RunnerBase` provides the basic runner functionalities such as plugin
45
+ integration for checkpointing, reporting and progress tracking.
46
+ It also manages a thread pool for background IO operations.
47
+ Subclasses should implement `_run` and `_evaluate_items` for different
48
+ execution strategies.
49
+ """
46
50
 
47
- tqdm: Annotated[
48
- bool,
51
+ progress_tracker: Annotated[
52
+ Literal['tqdm', 'html', 'auto', None],
49
53
  (
50
- 'If True, force using tqdm for progress update. Otherwise, determine '
51
- 'it automatically based on the running environment (console vs. '
52
- 'notebook)'
54
+ 'If `tqdm`, force using tqdm for progress update. '
55
+ 'If `html`, force using html for progress update. '
56
+ 'If `auto`, determine it automatically based on the running '
57
+ 'environment (console vs. notebook)'
58
+ 'If `none`, disable progress update.'
53
59
  )
54
- ] = False
60
+ ] = 'auto'
55
61
 
56
62
  plugins = [
57
63
  checkpointing.BulkCheckpointer(),
58
64
  reporting.HtmlReporter(),
65
+ config_saver.RunConfigSaver(),
59
66
  ]
60
67
 
68
+ max_background_threads: Annotated[
69
+ int,
70
+ 'Max number of background threads for IO operations.'
71
+ ] = 128
72
+
61
73
  def _on_bound(self):
62
74
  super()._on_bound()
63
75
 
64
76
  # Install the tqdm plugin if needed.
65
- with pg.notify_on_change(False):
66
- self.plugins.append(progress_tracking.progress_tracker(self.tqdm))
77
+ if self.progress_tracker is not None:
78
+ with pg.notify_on_change(False):
79
+ self.plugins.append(
80
+ progress_tracking.progress_tracker(self.progress_tracker)
81
+ )
82
+
83
+ if self.max_background_threads > 0:
84
+ self._io_pool_lock = threading.Lock()
85
+ self._io_pool = concurrent.futures.ThreadPoolExecutor(
86
+ max_workers=self.max_background_threads
87
+ )
88
+ else:
89
+ self._io_pool_lock = None
90
+ self._io_pool = None
67
91
 
68
- self._io_pool_lock = threading.Lock()
69
- self._io_pool = concurrent.futures.ThreadPoolExecutor(max_workers=16)
70
92
  # TODO(daiyip): render background errors.
71
93
  self._background_last_error = None
72
94
 
@@ -78,9 +100,12 @@ class RunnerBase(Runner):
78
100
  except Exception as e: # pylint: disable=broad-except
79
101
  self._background_last_error = e
80
102
 
81
- with self._io_pool_lock:
82
- if self._io_pool is not None:
83
- self._io_pool.submit(_background_run, *args, **kwargs)
103
+ if self.max_background_threads > 0:
104
+ with self._io_pool_lock:
105
+ if self._io_pool is not None:
106
+ self._io_pool.submit(_background_run, *args, **kwargs)
107
+ else:
108
+ _background_run(*args, **kwargs)
84
109
 
85
110
  def _all_plugins(self, experiment: Experiment) -> Iterator[Plugin]:
86
111
  """Returns all plugins for the experiment."""
@@ -89,24 +114,8 @@ class RunnerBase(Runner):
89
114
  for plugin in experiment.plugins:
90
115
  yield plugin
91
116
 
92
- #
93
- # IO operations for saving running files.
94
- #
95
-
96
- def _save_run_manifest(self) -> None:
97
- def _save():
98
- pg.symbolic.deref(self.current_run.clone(), recursive=True).save(
99
- self.current_run.output_path_for(
100
- self.current_run.experiment, _RUN_MANIFEST
101
- ),
102
- hide_default_values=True
103
- )
104
- self.background_run(_save)
105
-
106
117
  def on_run_start(self) -> None:
107
118
  """Called when a runner is started."""
108
- self._save_run_manifest()
109
-
110
119
  for plugin in self._all_plugins(self.current_run.experiment):
111
120
  plugin.on_run_start(self, self.current_run.experiment)
112
121
 
@@ -126,11 +135,11 @@ class RunnerBase(Runner):
126
135
  num_examples_to_evaluate = 0
127
136
  if experiment.is_leaf:
128
137
  assert isinstance(experiment, Evaluation)
129
- num_examples_to_evaluate = (
130
- len(self.current_run.example_ids)
131
- if self.current_run.example_ids else experiment.num_examples
138
+ num_examples_to_evaluate = len(
139
+ self.current_run.examples_to_evaluate(experiment)
132
140
  )
133
141
  experiment.progress.start(total=num_examples_to_evaluate)
142
+ pg.io.mkdirs(self.current_run.output_dir(experiment))
134
143
  else:
135
144
  experiment.progress.start(total=len(experiment.leaf_nodes))
136
145
 
@@ -139,6 +148,7 @@ class RunnerBase(Runner):
139
148
  plugin.on_experiment_start(self, experiment)
140
149
 
141
150
  if experiment.is_leaf:
151
+ pg.io.mkdirs(self.current_run.output_dir(experiment))
142
152
  experiment.info(
143
153
  f'Starting evaluation {experiment.id!r} with '
144
154
  f'{num_examples_to_evaluate} examples to evaluate.'
@@ -180,10 +190,7 @@ class RunnerBase(Runner):
180
190
  self._log_experiment_completion(experiment)
181
191
 
182
192
  def _log_experiment_completion(self, experiment: Experiment):
183
- example_ids = (
184
- self.current_run.example_ids if self.current_run.example_ids else
185
- list(range(1, experiment.num_examples + 1))
186
- )
193
+ example_ids = sorted(self.current_run.examples_to_evaluate(experiment))
187
194
  num_from_checkpoint, num_processed = 0, 0
188
195
  for example_id in example_ids:
189
196
  status = experiment.state.get_status(example_id)
@@ -220,7 +227,7 @@ class RunnerBase(Runner):
220
227
  else:
221
228
  # A evaluation could be considered as done if it has processed all the
222
229
  # examples specified by `example_ids`.
223
- assert progress.is_completed
230
+ assert progress.is_completed, progress
224
231
  parent_progress.increment_processed()
225
232
 
226
233
  if parent_progress.is_completed:
@@ -235,6 +242,8 @@ class RunnerBase(Runner):
235
242
  example: Example
236
243
  ) -> None:
237
244
  """Called when an evaluation example is started."""
245
+ assert isinstance(experiment, Evaluation), experiment
246
+ experiment.state.update(example, in_progress=True)
238
247
  for plugin in self._all_plugins(experiment):
239
248
  plugin.on_example_start(self, experiment, example)
240
249
  experiment.info(f'Starting to evaluate example {example.id}.')
@@ -245,6 +254,8 @@ class RunnerBase(Runner):
245
254
  example: Example
246
255
  ) -> None:
247
256
  """Called when an evaluation example is complete."""
257
+ assert isinstance(experiment, Evaluation), experiment
258
+ experiment.state.update(example, in_progress=False)
248
259
  if example.newly_processed:
249
260
  if example.error is None:
250
261
  experiment.progress.increment_processed()
@@ -256,7 +267,7 @@ class RunnerBase(Runner):
256
267
  experiment.progress.increment_failed()
257
268
  experiment.error(
258
269
  (
259
- f'Failed to evaluate example {example.id} in'
270
+ f'Failed to evaluate example {example.id} in '
260
271
  f'{example.elapse:.2f} seconds.'
261
272
  ),
262
273
  error=example.error
@@ -316,7 +327,7 @@ class RunnerBase(Runner):
316
327
  self._run(targets)
317
328
 
318
329
  self.on_run_complete()
319
- except Exception as e: # pylint: disable=broad-except
330
+ except BaseException as e: # pylint: disable=broad-except
320
331
  self.on_run_abort(e)
321
332
  raise e
322
333
  finally:
@@ -324,9 +335,10 @@ class RunnerBase(Runner):
324
335
  self.background_run(cache.save)
325
336
 
326
337
  # Wait for the background tasks to finish.
327
- with self._io_pool_lock:
328
- self._io_pool, io_pool = None, self._io_pool
329
- io_pool.shutdown(wait=True)
338
+ if self.max_background_threads > 0:
339
+ with self._io_pool_lock:
340
+ self._io_pool, io_pool = None, self._io_pool
341
+ io_pool.shutdown(wait=True)
330
342
 
331
343
  @abc.abstractmethod
332
344
  def _run(self, evaluations: list[Evaluation]) -> None:
@@ -335,6 +347,7 @@ class RunnerBase(Runner):
335
347
  def run_evaluation(self, evaluation: Evaluation) -> None:
336
348
  """Runs the evaluation."""
337
349
  try:
350
+ evaluation.setup()
338
351
  self.on_experiment_start(evaluation)
339
352
 
340
353
  per_evaluation_settings = {}
@@ -344,18 +357,14 @@ class RunnerBase(Runner):
344
357
  per_evaluation_settings['cache'] = cache
345
358
 
346
359
  with lf.use_settings(**per_evaluation_settings):
347
- if self.current_run.example_ids is None:
348
- items = (
349
- Example(id=i + 1, input=ex) for i, ex in enumerate(
350
- evaluation.example_inputs)
351
- )
352
- else:
353
- items = (
354
- Example(
355
- id=example_id,
356
- input=evaluation.example_input_by_id(example_id)
357
- ) for example_id in self.current_run.example_ids
358
- )
360
+ items = (
361
+ Example(
362
+ id=example_id,
363
+ input=evaluation.example_input_by_id(example_id)
364
+ ) for example_id in sorted(
365
+ self.current_run.examples_to_evaluate(evaluation)
366
+ )
367
+ )
359
368
  if self.current_run.shuffle_inputs:
360
369
  items = list(items)
361
370
  random.shuffle(items)
@@ -367,6 +376,8 @@ class RunnerBase(Runner):
367
376
  except BaseException as e: # pylint: disable=broad-except
368
377
  self.on_experiment_abort(evaluation, e)
369
378
  raise e
379
+ finally:
380
+ evaluation.teardown()
370
381
 
371
382
  @abc.abstractmethod
372
383
  def _evaluate_items(
@@ -394,121 +405,3 @@ class RunnerBase(Runner):
394
405
  return in_memory.InMemory(
395
406
  self.current_run.output_path_for(experiment, 'cache.json')
396
407
  )
397
-
398
-
399
- class SequentialRunner(RunnerBase):
400
- """Sequential runner.
401
-
402
- Sequential runner runs all evaluations and their examples in sequence,
403
- as well as the background tasks, it allows the developer to catch all
404
- exceptions thrown from the background tasks, making it easier to debug.
405
- """
406
-
407
- NAME = 'sequential'
408
-
409
- def background_run(
410
- self, func: Callable[..., Any], *args: Any, **kwargs: Any
411
- ) -> None:
412
- """Runs the function with the IO pool."""
413
- func(*args, **kwargs)
414
-
415
- def _run(self, evaluations: list[Evaluation]) -> None:
416
- """Runs the experiment in sequence."""
417
- for e in evaluations:
418
- self.run_evaluation(e)
419
-
420
- def _evaluate_items(
421
- self, evaluation: Evaluation, items: Iterator[Example]
422
- ) -> None:
423
- """Runs the evaluation items in sequence."""
424
- for item in items:
425
- self.evaluate_item(evaluation, item)
426
-
427
-
428
- class DebugRunner(SequentialRunner):
429
- """Debug runner."""
430
-
431
- NAME = 'debug'
432
-
433
- # Do not use the checkpointer for debug runner.
434
- plugins = []
435
-
436
- def _on_bound(self):
437
- super()._on_bound()
438
- if self.current_run.example_ids is None:
439
- self.current_run.rebind(example_ids=[1], skip_notification=True)
440
- self.current_run.rebind(raise_if_has_error=True, skip_notification=True)
441
-
442
- def _save_run_manifest(self) -> None:
443
- """Do nothing to avoid overriden existing runs."""
444
-
445
-
446
- class ParallelRunner(RunnerBase):
447
- """Parallel runner."""
448
-
449
- NAME = 'parallel'
450
-
451
- timeout: Annotated[
452
- int | None,
453
- 'Timeout for each evaluation example.'
454
- ] = None
455
-
456
- concurrent_startup_delay: Annotated[
457
- tuple[int, int] | None,
458
- (
459
- 'A range of seconds to delay the initial evaluation of each thread '
460
- 'in the thread pool, helping to prevent a burst in LLM QPS at '
461
- 'startup. If set to None, no delay will be applied.'
462
- )
463
- ] = None
464
-
465
- def _run(self, evaluations: list[Evaluation]) -> None:
466
- """Runs the evaluations in parallel."""
467
- def _run_group(evaluation_group: list[Evaluation]):
468
- for e in evaluation_group:
469
- self.run_evaluation(e)
470
-
471
- # Run evaluations in parallel groupped by resource key.
472
- groups: dict[str, list[Evaluation]] = collections.defaultdict(list)
473
- for e in evaluations:
474
- resource_ids = e.resource_ids()
475
- if not resource_ids:
476
- group_id = e.id
477
- else:
478
- # TODO(daiyip): support group that requires multiple resources.
479
- group_id = resource_ids.pop()
480
- groups[group_id].append(e)
481
-
482
- for _, _, _ in lf.concurrent_map(
483
- _run_group,
484
- groups.values(),
485
- max_workers=max(64, len(groups)),
486
- timeout=self.timeout,
487
- silence_on_errors=None,
488
- ):
489
- pass
490
-
491
- def _evaluate_items(
492
- self, evaluation: Evaluation, items: Iterator[Example]
493
- ) -> None:
494
- """Override run items to run in parallel."""
495
- if self.concurrent_startup_delay is not None:
496
- thread_delayed = {}
497
- def _evaluate_item(item: Example):
498
- thread_id = threading.current_thread().ident
499
- if thread_id not in thread_delayed:
500
- thread_delayed[thread_id] = True
501
- time.sleep(random.randint(*self.concurrent_startup_delay))
502
- return self.evaluate_item(evaluation, item)
503
- else:
504
- def _evaluate_item(item: Example):
505
- return self.evaluate_item(evaluation, item)
506
-
507
- for _, _, _ in lf.concurrent_map(
508
- _evaluate_item,
509
- items,
510
- max_workers=evaluation.max_workers,
511
- timeout=self.timeout,
512
- silence_on_errors=None,
513
- ):
514
- pass