inspect-ai 0.3.85__py3-none-any.whl → 0.3.87__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -221,13 +221,13 @@ class TaskMetrics(Widget):
221
221
  self.recompute_grid()
222
222
 
223
223
  def on_mount(self) -> None:
224
- self.recompute_grid(True)
224
+ self.call_after_refresh(self.recompute_grid)
225
225
 
226
- def recompute_grid(self, force: bool = False) -> None:
227
- if not self.is_mounted and not force:
226
+ def recompute_grid(self) -> None:
227
+ if not self.is_mounted:
228
228
  return
229
- grid = self.query_one(f"#{self.grid_id()}")
230
229
 
230
+ grid = self.query_one(f"#{self.grid_id()}")
231
231
  grid.remove_children()
232
232
  for metric in self.metrics:
233
233
  # Add the value static but keep it around
inspect_ai/_eval/eval.py CHANGED
@@ -43,7 +43,7 @@ from inspect_ai.model import (
43
43
  GenerateConfigArgs,
44
44
  Model,
45
45
  )
46
- from inspect_ai.model._model import init_active_model, resolve_models
46
+ from inspect_ai.model._model import get_model, init_active_model, resolve_models
47
47
  from inspect_ai.scorer._reducer import reducer_log_names
48
48
  from inspect_ai.solver._chain import chain
49
49
  from inspect_ai.solver._solver import Solver, SolverSpec
@@ -751,10 +751,15 @@ async def eval_retry_async(
751
751
  else None
752
752
  )
753
753
 
754
+ # resolve the model
755
+ model = get_model(
756
+ model=eval_log.eval.model,
757
+ config=eval_log.eval.model_generate_config,
758
+ base_url=eval_log.eval.model_base_url,
759
+ **eval_log.eval.model_args,
760
+ )
761
+
754
762
  # collect the rest of the params we need for the eval
755
- model = eval_log.eval.model
756
- model_base_url = eval_log.eval.model_base_url
757
- model_args = eval_log.eval.model_args
758
763
  task_args = eval_log.eval.task_args
759
764
  tags = eval_log.eval.tags
760
765
  limit = eval_log.eval.config.limit
@@ -813,8 +818,6 @@ async def eval_retry_async(
813
818
  id=task_id, task=task, task_args=task_args, model=None, log=eval_log
814
819
  ),
815
820
  model=model,
816
- model_base_url=model_base_url,
817
- model_args=model_args,
818
821
  task_args=task_args,
819
822
  sandbox=eval_log.eval.sandbox,
820
823
  sandbox_cleanup=sandbox_cleanup,
@@ -139,6 +139,7 @@ class TaskLogger:
139
139
  tags=tags,
140
140
  solver_args=solver.args if solver else None,
141
141
  model=str(ModelName(model)),
142
+ model_generate_config=model.config,
142
143
  model_base_url=model.api.base_url,
143
144
  dataset=EvalDataset(
144
145
  name=dataset.name,