inspect-ai 0.3.11__py3-none-any.whl → 0.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
inspect_ai/__init__.py CHANGED
@@ -6,7 +6,7 @@ from inspect_ai._eval.eval import eval, eval_async, eval_retry, eval_retry_async
6
6
  from inspect_ai._eval.list import list_tasks
7
7
  from inspect_ai._eval.registry import task
8
8
  from inspect_ai._eval.score import score, score_async
9
- from inspect_ai._eval.types import Task, TaskInfo, Tasks
9
+ from inspect_ai._eval.task import Task, TaskInfo, Tasks
10
10
  from inspect_ai._util.constants import PKG_NAME
11
11
 
12
12
  __version__ = importlib_version(PKG_NAME)
inspect_ai/_cli/list.py CHANGED
@@ -11,7 +11,7 @@ from typing_extensions import Unpack
11
11
  from inspect_ai._cli.common import CommonOptions, common_options, resolve_common_options
12
12
  from inspect_ai._cli.util import parse_cli_args
13
13
  from inspect_ai._eval.list import list_tasks
14
- from inspect_ai._eval.types import TaskInfo
14
+ from inspect_ai._eval.task import TaskInfo
15
15
  from inspect_ai.log import list_eval_logs
16
16
 
17
17
 
inspect_ai/_eval/eval.py CHANGED
@@ -26,10 +26,10 @@ from inspect_ai.solver import Solver
26
26
  from inspect_ai.util._context import init_async_context
27
27
 
28
28
  from .loader import resolve_tasks
29
+ from .task import Tasks, TaskSpec
29
30
  from .task.log import TaskLogger
30
31
  from .task.run import task_run
31
32
  from .task.util import task_file, task_run_dir
32
- from .types import Tasks, TaskSpec
33
33
 
34
34
  log = logging.getLogger(__name__)
35
35
 
inspect_ai/_eval/list.py CHANGED
@@ -8,7 +8,7 @@ from typing import Any, Callable
8
8
  from inspect_ai._util.error import exception_message
9
9
  from inspect_ai._util.file import file
10
10
 
11
- from .types import TaskInfo
11
+ from .task import TaskInfo
12
12
 
13
13
  logger = getLogger(__name__)
14
14
 
@@ -18,8 +18,8 @@ from inspect_ai.model import Model, ModelName
18
18
 
19
19
  from .list import task_files
20
20
  from .registry import task_create
21
+ from .task import Task, TaskInfo, Tasks
21
22
  from .task.constants import TASK_FILE_ATTR, TASK_RUN_DIR_ATTR
22
- from .types import Task, TaskInfo, Tasks
23
23
 
24
24
 
25
25
  def resolve_tasks(
@@ -14,7 +14,7 @@ from inspect_ai._util.registry import (
14
14
  )
15
15
  from inspect_ai.model import ModelName
16
16
 
17
- from .types import Task
17
+ from .task import Task
18
18
 
19
19
  MODEL_PARAM = "model"
20
20
 
inspect_ai/_eval/score.py CHANGED
@@ -17,9 +17,9 @@ from inspect_ai.model import ModelName
17
17
  from inspect_ai.scorer import Metric, Score, Scorer, Target
18
18
  from inspect_ai.solver import TaskState
19
19
 
20
+ from .task import Task
20
21
  from .task.results import eval_results
21
22
  from .task.util import task_run_dir
22
- from .types import Task
23
23
 
24
24
 
25
25
  def score(log: EvalLog, scorer: Scorer) -> EvalLog:
@@ -0,0 +1,3 @@
1
+ from .task import Task, TaskInfo, TaskSpec, Tasks # noqa: I001, F401
2
+
3
+ __all__ = ["Task", "TaskInfo", "TaskSpec", "Tasks"]
@@ -23,6 +23,7 @@ from inspect_ai.log import (
23
23
  EvalConfig,
24
24
  EvalError,
25
25
  EvalLog,
26
+ EvalResults,
26
27
  EvalStats,
27
28
  )
28
29
  from inspect_ai.log._log import eval_error
@@ -34,7 +35,7 @@ from inspect_ai.model import (
34
35
  from inspect_ai.scorer import Score, Scorer, Target
35
36
  from inspect_ai.solver import Generate, Plan, Solver, TaskState
36
37
 
37
- from ..types import Task
38
+ from ..task import Task
38
39
  from .generate import task_generate
39
40
  from .images import samples_with_base64_images, states_with_base64_images
40
41
  from .log import TaskLogger, collect_eval_data, log_output, log_plan
@@ -101,7 +102,9 @@ async def task_run(
101
102
  plan = (
102
103
  plan
103
104
  if isinstance(plan, Plan)
104
- else Plan(plan) if plan is not None else task.plan
105
+ else Plan(plan)
106
+ if plan is not None
107
+ else task.plan
105
108
  )
106
109
  score = score and task.scorer is not None
107
110
  scorer: Scorer | None = task.scorer if (score and task.scorer) else None
@@ -132,7 +135,6 @@ async def task_run(
132
135
  len(plan.steps) + (1 if plan.finish else 0) + (1) # scorer
133
136
  )
134
137
  with td.progress(total=total_steps) as p:
135
-
136
138
  # forward progress
137
139
  def progress() -> None:
138
140
  p.update(1)
@@ -195,6 +197,8 @@ async def task_run(
195
197
  metrics=task.metrics,
196
198
  )
197
199
  logger.log_results(results)
200
+ else:
201
+ results = EvalResults()
198
202
 
199
203
  # collect eval data
200
204
  collect_eval_data(stats, logger)
@@ -295,7 +299,6 @@ async def resolve_dataset(
295
299
  epochs: int,
296
300
  log_images: bool,
297
301
  ) -> tuple[Dataset, list[Sample], list[TaskState]]:
298
-
299
302
  # apply limit to dataset
300
303
  dataset_limit = (
301
304
  slice(0, len(dataset))
@@ -7,7 +7,7 @@ from inspect_ai.dataset import Sample
7
7
  from inspect_ai.model import ChatMessage, ChatMessageUser
8
8
  from inspect_ai.solver import TaskState
9
9
 
10
- from ..types import Task
10
+ from ..task import Task
11
11
  from .constants import TASK_FILE_ATTR, TASK_RUN_DIR_ATTR
12
12
 
13
13
 
@@ -20,6 +20,7 @@ import { WorkSpace } from "./src/workspace/WorkSpace.mjs";
20
20
 
21
21
  export function App() {
22
22
  const [selected, setSelected] = useState(-1);
23
+ const [pendingLog, setPendingLog] = useState(undefined);
23
24
  const [logs, setLogs] = useState({ log_dir: "", files: [] });
24
25
  const [logHeaders, setLogHeaders] = useState({});
25
26
  const [offcanvas, setOffcanvas] = useState(false);
@@ -126,24 +127,32 @@ export function App() {
126
127
  // Ensure that we have a selected index when there is are
127
128
  // new logs
128
129
  useEffect(() => {
129
- setSelected(0);
130
- }, [logs])
130
+ if (logs && pendingLog) {
131
+ const index = logs.files.findIndex((val) => {
132
+ return pendingLog.endsWith(val.name);
133
+ });
134
+ if (index > -1) {
135
+ setSelected(index);
136
+ }
137
+ setPendingLog(undefined);
138
+ }
139
+ }, [logs, pendingLog])
131
140
 
132
141
  // listen for updateState messages from vscode
133
142
  useEffect(() => {
134
- const onMessage = (e) => {
143
+ const onMessage = async (e) => {
135
144
  switch (e.data.type || e.data.message) {
136
145
  case "updateState": {
137
146
  if (e.data.url) {
138
-
139
147
  const index = logs.files.findIndex((val) => {
140
- return val.name.endsWith(e.data.url);
148
+ return e.data.url.endsWith(val.name);
141
149
  });
142
150
  if (index > -1) {
143
151
  // Select the correct index
144
152
  setSelected(index);
145
153
  } else {
146
- // TODO: Error
154
+ await loadLogs();
155
+ setPendingLog(e.data.url);
147
156
  }
148
157
  }
149
158
  }
@@ -153,7 +162,7 @@ export function App() {
153
162
  return () => {
154
163
  window.removeEventListener("message", onMessage);
155
164
  };
156
- }, [setCurrentLog]);
165
+ }, [logs, setCurrentLog, setPendingLog]);
157
166
 
158
167
  useEffect(async () => {
159
168
  // See whether a specific task_file has been passed.
@@ -176,6 +185,9 @@ export function App() {
176
185
  // initial fetch of logs
177
186
  await load();
178
187
 
188
+ // Select the first log
189
+ setSelected(0);
190
+
179
191
  // poll every 1s for events
180
192
  setInterval(() => {
181
193
  api.client_events().then((events) => {
@@ -24,6 +24,7 @@ export const samplesDescriptor = (samples, epochs, context) => {
24
24
  const uniqScoreValues = [
25
25
  ...new Set(
26
26
  samples
27
+ .filter(sample => !!sample.score)
27
28
  .map((sample) => sample.score.value)
28
29
  .filter((value) => {
29
30
  return value !== null;
@@ -92,7 +92,7 @@ export const SamplesTab = (props) => {
92
92
  // Focus the sample list
93
93
  useEffect(() => {
94
94
  const listEl = sampleListRef.current;
95
- if (listEl) {
95
+ if (listEl && listEl.base) {
96
96
  listEl.base.focus();
97
97
  }
98
98
  }, [items]);
@@ -6,7 +6,8 @@ from ._prompt import (
6
6
  prompt_template,
7
7
  system_message,
8
8
  )
9
- from ._solver import Generate, Solver, TaskState, generate, solver
9
+ from ._solver import Generate, Solver, generate, solver
10
+ from ._task_state import TaskState
10
11
  from ._tool.tool import Tool, tool
11
12
  from ._tool.use_tools import use_tools
12
13
  from ._tool.web_search import web_search
@@ -5,7 +5,8 @@ from inspect_ai.model import (
5
5
  )
6
6
  from inspect_ai.util import resource
7
7
 
8
- from ._solver import Generate, Solver, TaskState, solver
8
+ from ._solver import Generate, Solver, solver
9
+ from ._task_state import TaskState
9
10
 
10
11
 
11
12
  @solver
@@ -4,7 +4,8 @@ from random import Random
4
4
 
5
5
  from inspect_ai.util import resource
6
6
 
7
- from ._solver import Generate, Solver, TaskState, solver
7
+ from ._solver import Generate, Solver, solver
8
+ from ._task_state import TaskState
8
9
 
9
10
  logger = logging.getLogger(__name__)
10
11
 
@@ -11,7 +11,8 @@ from inspect_ai._util.registry import (
11
11
  registry_tag,
12
12
  )
13
13
 
14
- from ._solver import Solver, TaskState
14
+ from ._solver import Solver
15
+ from ._task_state import TaskState
15
16
 
16
17
 
17
18
  class Plan:
@@ -3,7 +3,8 @@ from typing import Any
3
3
  from inspect_ai.model import ChatMessageSystem
4
4
  from inspect_ai.util import resource
5
5
 
6
- from ._solver import Generate, Solver, TaskState, solver
6
+ from ._solver import Generate, Solver, solver
7
+ from ._task_state import TaskState
7
8
  from ._util import append_system_message
8
9
 
9
10
 
@@ -18,110 +18,9 @@ from inspect_ai._util.registry import (
18
18
  registry_name,
19
19
  registry_tag,
20
20
  )
21
- from inspect_ai.model import (
22
- ChatMessage,
23
- ChatMessageUser,
24
- GenerateConfigArgs,
25
- ModelName,
26
- ModelOutput,
27
- ToolChoice,
28
- )
29
-
30
- from ._tool.tool import Tool
31
-
21
+ from inspect_ai.model import GenerateConfigArgs
32
22
 
33
- class TaskState:
34
- def __init__(
35
- self,
36
- model: ModelName,
37
- sample_id: int | str,
38
- epoch: int,
39
- input: str | list[ChatMessage],
40
- choices: list[str] | None,
41
- messages: list[ChatMessage],
42
- tools: list[Tool] = [],
43
- tool_choice: ToolChoice | None = None,
44
- output: ModelOutput | None = None,
45
- completed: bool = False,
46
- metadata: dict[str, Any] = {},
47
- ) -> None:
48
- self._model = model
49
-
50
- self.sample_id = sample_id
51
- """Unique id for sample."""
52
-
53
- self.epoch = epoch
54
- """Epoch number for sample."""
55
-
56
- self._input = input
57
-
58
- self.choices = choices
59
- """Sample choices."""
60
-
61
- self.messages = messages
62
- """Chat conversation history for sample."""
63
-
64
- self.tools = tools
65
- """Tools available to the model."""
66
-
67
- self.tool_choice = tool_choice
68
- """Tool choice directive."""
69
-
70
- self.output = output if output else ModelOutput(model=str(model), choices=[])
71
- """Model output."""
72
-
73
- self.completed = completed
74
- """Flag to indicate that the solver loop should terminate."""
75
-
76
- self.metadata = metadata
77
- """Additional task state metadata."""
78
-
79
- @property
80
- def model(self) -> ModelName:
81
- """Name of model being evaluated."""
82
- return self._model
83
-
84
- @property
85
- def input(self) -> str | list[ChatMessage]:
86
- """Sample input."""
87
- return self._input
88
-
89
- @property
90
- def input_text(self) -> str:
91
- """Sample input as text."""
92
- if isinstance(self._input, str):
93
- return self._input
94
- else:
95
- input = next(
96
- (message.text for message in self._input if message.role == "user"),
97
- None,
98
- )
99
- if input:
100
- return input
101
- else:
102
- raise ValueError(
103
- "input_text requested from TaskState but none available"
104
- )
105
-
106
- @property
107
- def user_prompt(self) -> ChatMessageUser:
108
- """User prompt for this state.
109
-
110
- Tasks are very general and can have may types of inputs.
111
- However, in many cases solvers assume they can interact with
112
- the state as a "chat" in a predictable fashion (e.g. prompt
113
- engineering solvers). This property enables easy read and
114
- write access to the user chat prompt. Raises an
115
- exception if there is no user prompt
116
-
117
- Returns:
118
- First user `ChatMessage` in the task state.
119
- """
120
- prompt = next((m for m in self.messages if m.role == "user"), None)
121
- if prompt:
122
- return prompt
123
- else:
124
- raise ValueError("user_prompt requested from TaskState but none available")
23
+ from ._task_state import TaskState
125
24
 
126
25
 
127
26
  @runtime_checkable
@@ -0,0 +1,145 @@
1
+ from typing import Any
2
+
3
+ from inspect_ai.model import (
4
+ ChatMessage,
5
+ ChatMessageUser,
6
+ ModelName,
7
+ ModelOutput,
8
+ ToolChoice,
9
+ )
10
+
11
+ from ._tool.tool import Tool
12
+
13
+
14
+ class TaskState:
15
+ """
16
+ The `TaskState` represents the internal state of the `Task` being run for a single `Sample`.
17
+
18
+ It's a mutable object that is updated by each solver during a sample's
19
+ evaluation. It allows us to maintain things like the message history between
20
+ the running `Task` and the model, the tools available to the model, the
21
+ final output of the model and whether or not it's completed yet.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ model: ModelName,
27
+ sample_id: int | str,
28
+ epoch: int,
29
+ input: str | list[ChatMessage],
30
+ choices: list[str] | None,
31
+ messages: list[ChatMessage],
32
+ tools: list[Tool] = [],
33
+ tool_choice: ToolChoice | None = None,
34
+ output: ModelOutput | None = None,
35
+ completed: bool = False,
36
+ metadata: dict[str, Any] = {},
37
+ ) -> None:
38
+ self._model = model
39
+ """Model name used for this task."""
40
+
41
+ self.sample_id = sample_id
42
+ """Unique id for sample."""
43
+
44
+ self.epoch = epoch
45
+ """Epoch number for sample."""
46
+
47
+ self._input = input
48
+ """
49
+ The original input from the `Sample` for this `TaskState`.
50
+
51
+ Should be treated as immutable and not changed during the run, so that
52
+ it can be referenced or checked wherever needed. Access through `input`
53
+ or `input_text` only
54
+ """
55
+
56
+ self.choices = choices
57
+ """
58
+ List of choices for the sample, specifically used by the `multiple_choice` scorer.
59
+
60
+ For example, if the sample was a multiple choice question like "What is
61
+ the capital of France? A) Paris B) London C) Berlin", we would store the
62
+ possible answers here.
63
+ """
64
+
65
+ self.messages = messages
66
+ """
67
+ Chat conversation history for sample.
68
+
69
+ This will generally get appended to every time a `generate` call is made
70
+ to the model. Useful for both debug and for solvers/scorers to assess
71
+ model performance or choose the next step.
72
+ """
73
+
74
+ self.tools = tools
75
+ """Tools available to the model."""
76
+
77
+ self.tool_choice = tool_choice
78
+ """Tool choice directive."""
79
+
80
+ self.output = output if output else ModelOutput(model=str(model), choices=[])
81
+ """
82
+ The 'final' model output once we've completed all solving.
83
+
84
+ For simple evals this may just be the last `message` from the
85
+ conversation history, but more complex solvers may generate this in
86
+ different ways depending on what solvers are used..
87
+ """
88
+
89
+ self.completed = completed
90
+ """Flag to indicate that the solver loop should terminate."""
91
+
92
+ self.metadata = metadata
93
+ """Additional task state metadata."""
94
+
95
+ @property
96
+ def model(self) -> ModelName:
97
+ """Name of model being evaluated."""
98
+ return self._model
99
+
100
+ @property
101
+ def input(self) -> str | list[ChatMessage]:
102
+ """Input from the `Sample`, should be considered immutable."""
103
+ return self._input
104
+
105
+ @property
106
+ def input_text(self) -> str:
107
+ """
108
+ Convenience function for accessing the initial input from the `Sample` as a string.
109
+
110
+ If the `input` is a `list[ChatMessage]`, this will return the text from
111
+ the first chat message
112
+ """
113
+ if isinstance(self._input, str):
114
+ return self._input
115
+ else:
116
+ input = next(
117
+ (message.text for message in self._input if message.role == "user"),
118
+ None,
119
+ )
120
+ if input:
121
+ return input
122
+ else:
123
+ raise ValueError(
124
+ "input_text requested from TaskState but none available"
125
+ )
126
+
127
+ @property
128
+ def user_prompt(self) -> ChatMessageUser:
129
+ """User prompt for this state.
130
+
131
+ Tasks are very general and can have may types of inputs.
132
+ However, in many cases solvers assume they can interact with
133
+ the state as a "chat" in a predictable fashion (e.g. prompt
134
+ engineering solvers). This property enables easy read and
135
+ write access to the user chat prompt. Raises an
136
+ exception if there is no user prompt
137
+
138
+ Returns:
139
+ First user `ChatMessage` in the task state.
140
+ """
141
+ prompt = next((m for m in self.messages if m.role == "user"), None)
142
+ if prompt:
143
+ return prompt
144
+ else:
145
+ raise ValueError("user_prompt requested from TaskState but none available")
@@ -3,7 +3,8 @@ from inspect_ai.model import (
3
3
  ToolChoice,
4
4
  )
5
5
 
6
- from .._solver import Generate, Solver, TaskState, solver
6
+ from .._solver import Generate, Solver, solver
7
+ from .._task_state import TaskState
7
8
  from .._util import append_system_message
8
9
  from .tool import Tool
9
10
  from .tool_def import tool_defs
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: inspect_ai
3
- Version: 0.3.11
3
+ Version: 0.13.3
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Safety Institute
6
6
  License: MIT License
@@ -1,10 +1,10 @@
1
- inspect_ai/__init__.py,sha256=eeAabo6ZQ6QvU3vVHOhJQK6h5sQsaqYqLNaoPIp_KIU,672
1
+ inspect_ai/__init__.py,sha256=laGXMK1BIoOC_x2D31Cmbs_3c335exZL0FByjAqRO-I,671
2
2
  inspect_ai/__main__.py,sha256=oWX4YwDZDg3GS3-IG0yPGoSEOfSzWihELg7QmrUlxjM,67
3
3
  inspect_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  inspect_ai/_cli/common.py,sha256=CTEvRG2kAwsP7HxI5LjpdXh4RdVBwD0oMfD0Sn8Hl3A,1705
5
5
  inspect_ai/_cli/eval.py,sha256=LyW4Gc15Q_fBbvTXfJczhXyXAHi_MoImjrMkTLRRf9g,7886
6
6
  inspect_ai/_cli/info.py,sha256=K1SMxB8LiLONlwygU_BxcL1u2KJyYyFDpW4qkgMIcjE,1922
7
- inspect_ai/_cli/list.py,sha256=TZiIj-vP8rQ0qTueQniVYppFSIUSLb4asMQvOL01UQM,3736
7
+ inspect_ai/_cli/list.py,sha256=IIkqT1HFqTaYPn3wNxHT656-wHwCHvGA57Rtol2Mc4Y,3735
8
8
  inspect_ai/_cli/main.py,sha256=PjIGgPVAky8eNBX5N_O2EX4TpgIdkmFY1uYtsgSCvkQ,1028
9
9
  inspect_ai/_cli/score.py,sha256=6FRjaqYWY1XHWFFwxZyQJW1cPnIhr7e3ZotTvSZyTgg,2859
10
10
  inspect_ai/_cli/util.py,sha256=nT3W7uUGzslNOUsK95lp6ZhPmTRfqb9i0aHJ3Bx_L2Q,576
@@ -13,19 +13,20 @@ inspect_ai/_display/__init__.py,sha256=PPuC3ydm-duhpdTShtMqVpFMr2BNndGmIIOy0A-Y-
13
13
  inspect_ai/_display/_display.py,sha256=Jd2LB49jMANpLFpgmRFeAa3pIm5MKvROp3bW1x-p2O4,1417
14
14
  inspect_ai/_display/logger.py,sha256=57_5ToLTozASQGdRNTIPt8vmAKF7u3pJnhoXbWY3oAs,2720
15
15
  inspect_ai/_display/rich.py,sha256=Qe357AMXqlPADmW0yZCLYcI1g_b1Gu7Sa5wTZFZbkYg,11708
16
- inspect_ai/_eval/eval.py,sha256=eGsAfuOLDZO0gmhpsgu0lyHZpLzbVfC_MV_2A63QyfM,17293
17
- inspect_ai/_eval/list.py,sha256=AyWokTJzmjjFx0AVNE4TK3NGH2SdzT9AcHPuKNjbTos,6049
18
- inspect_ai/_eval/loader.py,sha256=HBmXTXPdoVneIcjeBdfV7mgzB_91c9PdG_HNIGyRYFA,7881
19
- inspect_ai/_eval/registry.py,sha256=-JiFnGjbckYYS7jPOUGwhOrqnstLhT8cHHV2hTZXh3A,3999
20
- inspect_ai/_eval/score.py,sha256=g2ahIqUeLT9ilaiPqzNGcqcGpJ1fd7bLn7zQypRTrZg,4334
21
- inspect_ai/_eval/types.py,sha256=tqBrdnKEay9KQ3pcX07CW8REaQgEFSsbqWGFLmyNAV8,4073
16
+ inspect_ai/_eval/eval.py,sha256=la4sfT6EYXtsTkJ2yMmF_b6Saz4q6vJYvhQ-26kpBlI,17292
17
+ inspect_ai/_eval/list.py,sha256=YsPclTQKfiafUu27QEpCve6IjDAHtGA-fv-7_-tCctM,6048
18
+ inspect_ai/_eval/loader.py,sha256=wKvMDdaMnLbJ3opKtKD8a8qrpE-y2huvv9p4EnXKF3Q,7880
19
+ inspect_ai/_eval/registry.py,sha256=vTDPgKnGOGohl-IKQ-I2adkpXaQBCKuhA_cx78f3qMI,3998
20
+ inspect_ai/_eval/score.py,sha256=g4sHYxg9ICfOfrS6ZjfWurMtNo0THRSIemONHyUfYKQ,4333
21
+ inspect_ai/_eval/task/__init__.py,sha256=KCAhe9afbgLqolxXw80QSWwUZvfXSoqtjL8pyj-WUOg,123
22
22
  inspect_ai/_eval/task/constants.py,sha256=quAKMw-4-3xKd1T_KwXCZvHYoKRXt1ZGuaHbBcWJwnA,72
23
23
  inspect_ai/_eval/task/generate.py,sha256=53UNk9ReB7jf0FBwUkH_3IqSKg-vr5biQ3hLKbKwUic,3925
24
24
  inspect_ai/_eval/task/images.py,sha256=4bLN66eF2322r42FoINzGXMh1PTRjP_eCceZ8_msPeQ,2135
25
25
  inspect_ai/_eval/task/log.py,sha256=G0tj9QAB6BFTrSyuBEf6CFAZ6NpkLZfIWQUs1ZCYGNo,5255
26
26
  inspect_ai/_eval/task/results.py,sha256=eLfU7Dz4mmTGeKosBz4Iog1poakHdLgHkq2bs4Vw50s,2602
27
- inspect_ai/_eval/task/run.py,sha256=3If6SKM0vu1DD6eTrLvKeW8OgrjK8vxYGgHDw6f6iZY,11682
28
- inspect_ai/_eval/task/util.py,sha256=JOJR8PJF6xuLi5YxS4ZLz6xHBXZsmKmnG1SKwZtr74s,1174
27
+ inspect_ai/_eval/task/run.py,sha256=SDJz4KKxLdpOLT8yjQAwqbkaptSuCDLfZidPdkhgsWg,11786
28
+ inspect_ai/_eval/task/task.py,sha256=tqBrdnKEay9KQ3pcX07CW8REaQgEFSsbqWGFLmyNAV8,4073
29
+ inspect_ai/_eval/task/util.py,sha256=_HcCOdjypVqgvLWakBgKx0MjrEhSWoKu7SX85DfxpiA,1173
29
30
  inspect_ai/_util/_async.py,sha256=OuK_dnZrnGi1W2-72sbpObO4Knr5Q_cLxJL1kTY12t8,276
30
31
  inspect_ai/_util/appdirs.py,sha256=6OsSZ8JcN6Nkp739CxCXfJwx_g9TqTfo7iAKDhtw7SY,355
31
32
  inspect_ai/_util/constants.py,sha256=ky9MfU9zrIvRX1PrlIr89npFtleSPoHevbYt4_t57go,445
@@ -53,7 +54,7 @@ inspect_ai/_view/schema.py,sha256=SlBJ75EsCjEtSzcVMuP2yu2cI3ju1Z3amUWaWbqNZjM,14
53
54
  inspect_ai/_view/view.py,sha256=Gm2p5A8dvwFM2NWkz8GPF7pqSmcC4X1dtXxPvkw0Q_M,9624
54
55
  inspect_ai/_view/www/.gitignore,sha256=8amgmyJs-OmKQoYgDF2evVwokkcHrDMXlH-OzwarFns,13
55
56
  inspect_ai/_view/www/App.css,sha256=VZtFGBwW_JsKtiC4WY0lLT-mcOlGYdxRvqm1-KyeSyQ,13934
56
- inspect_ai/_view/www/App.mjs,sha256=OI9imy6AlyX-92yzqcUNLcXA3B_hHrhennY5K_C2pp4,7195
57
+ inspect_ai/_view/www/App.mjs,sha256=yEyCPMSH-wu7YMeNRTaZbdaSW3pvYYR_dF7jO_lQHWM,7605
57
58
  inspect_ai/_view/www/favicon.svg,sha256=b9AHYZaO2zBzeKH6G4PwXZMGGW_UxY0omKHam-c9MAs,1508
58
59
  inspect_ai/_view/www/index.html,sha256=HEUzotuCnX5GClv4feeppOhNrPN6CxXqp4N_Nsyp7oo,2158
59
60
  inspect_ai/_view/www/log-schema.json,sha256=pGcQiJEUqdzfsxKDqc-eEfRHXaP_FAVbWpUahpWaf9g,38118
@@ -113,8 +114,8 @@ inspect_ai/_view/www/src/samples/SampleDialog.mjs,sha256=ygslG6f8QOzRAwvRiSsKrPW
113
114
  inspect_ai/_view/www/src/samples/SampleDisplay.mjs,sha256=KZZB2YZkoSPDKK5facTwrlUccc23xqfsXO_W67QXN-Q,6525
114
115
  inspect_ai/_view/www/src/samples/SampleList.mjs,sha256=05TDU3dZFF1AOXDdtnddSNS6Y0qJDFtexFiDy4GnO-M,6598
115
116
  inspect_ai/_view/www/src/samples/SampleScoreView.mjs,sha256=Pm3rAV-W2dJ56aFxaxWD7ooaVJMbbSsTPe113wfFOPM,2989
116
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs,sha256=F4T_zxGQgaMj9HTAu2mhR2zMH91hGlVJR8lBezluXbo,7712
117
- inspect_ai/_view/www/src/samples/SamplesTab.mjs,sha256=GDg4fdfDrDN4gungaFcWrKuWicWSJB1NnGIV64P3Ctc,8514
117
+ inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs,sha256=itfKTMAds_Y7cLjFJqzs--j3x28dlSH_3oW1QhbZxeQ,7754
118
+ inspect_ai/_view/www/src/samples/SamplesTab.mjs,sha256=XSONY82wL02cyz9jye5osaJyd1U55zMvYA4mRK89Spg,8529
118
119
  inspect_ai/_view/www/src/samples/SamplesTools.mjs,sha256=A9qHXdMmFAfhGP2ceMxbmyWJBhWVKbi3qfNM3gGPDR0,854
119
120
  inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs,sha256=FMWN67tPNcWTtqd8qPbaFWtsBBzP9013ZhFCfGsajNQ,775
120
121
  inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs,sha256=f1FZBuhuFB_HUD4HoEASoY1cY4GUHIa0ZK9cAV_Hz4w,4834
@@ -177,16 +178,17 @@ inspect_ai/scorer/_metrics/__init__.py,sha256=NF9j0SoDIsGaf3Nl6pzGQYuycAc3gE-1af
177
178
  inspect_ai/scorer/_metrics/accuracy.py,sha256=U8H0iwA3qzqxanaRfYhhMmOV915VR6fbYTHNHyYES6g,943
178
179
  inspect_ai/scorer/_metrics/mean.py,sha256=gUMPBiWwnki9mYsEGVvme78dJCHqGoFVAEA9k7JHJ2E,555
179
180
  inspect_ai/scorer/_metrics/std.py,sha256=uwJW4V2NVDUqeyyn0rlHwguH7Z4jocYO_y7loLbqcbM,1250
180
- inspect_ai/solver/__init__.py,sha256=L8F7CJcS0u1CTvHmOihhJZ8nDcchd2AZzkPipRWg-bk,671
181
- inspect_ai/solver/_critique.py,sha256=0IK-iYyt8tN1C8la6lv-1syfyIb1ZMgwghHtyYE4W00,3100
182
- inspect_ai/solver/_multiple_choice.py,sha256=sm-xo24zOdJr-rG0RGwdhIfNvp18_as55KvJw7tuuN4,6245
183
- inspect_ai/solver/_plan.py,sha256=0WSR7saiGaatFCq1dVfprzpfs0aaA68EHsTfLPaZbOo,5190
184
- inspect_ai/solver/_prompt.py,sha256=-qlLgZiDqYnNxSTBx2eSL8dnUxBX9i5aNsWknvNbH-A,2292
185
- inspect_ai/solver/_solver.py,sha256=j6FhwsBVpX2SyWTCBZbVwFJ-DWU833GOS4o9VilCx8U,8435
181
+ inspect_ai/solver/__init__.py,sha256=MqNUCE2-zcICh79ZSzIKp7RdGR5auJ3OBAD0v17mQfw,695
182
+ inspect_ai/solver/_critique.py,sha256=87gYcwyF3j44IGkrSTXGVmerX7HQzOyowAuIcy2JHIo,3124
183
+ inspect_ai/solver/_multiple_choice.py,sha256=_qCNjeB5QiQgACBt621Xo65e5V9AU2natHD8DePrTXU,6269
184
+ inspect_ai/solver/_plan.py,sha256=geJU5BMKs9iM5vAQnT68_aiZNXmQNQ0CpuKkNdxNdGQ,5214
185
+ inspect_ai/solver/_prompt.py,sha256=eXby9F6OoQ5Ivq2lpQVMdsmEbpO6E8oo03gBl3zvn2o,2316
186
+ inspect_ai/solver/_solver.py,sha256=hyfKFFYAuQXfR69l-OMG-w5BN569IQRn06FzahCYN24,5572
187
+ inspect_ai/solver/_task_state.py,sha256=98e8vxxA94cJ14Ia2BkqoZU5uclvfMJqiw0ItCxOzH0,4700
186
188
  inspect_ai/solver/_util.py,sha256=pthrf-CzC6FnQYSUFLXTYM4wFEJptZrh5POTmV-Jtow,446
187
189
  inspect_ai/solver/_tool/tool.py,sha256=6qpx9Q4JoAa6_KvVm2ul-oXBtgA4z7FYCyz6c7fjJ5A,3839
188
190
  inspect_ai/solver/_tool/tool_def.py,sha256=PMEWdzMSSB1h1HWKQNv-O766KTz7hbXA37-o8qu-QLA,2434
189
- inspect_ai/solver/_tool/use_tools.py,sha256=SkbfspCbtnxDH_u9dDTKcgNAG4IJ9-eDCTQhsHlLRt0,1680
191
+ inspect_ai/solver/_tool/use_tools.py,sha256=fqqLtvD5_hMGnGCNTwrok315MPr6CIftwa6wio5b04k,1705
190
192
  inspect_ai/solver/_tool/web_search.py,sha256=ubdrbzMQoQuaNU-Qqc8VOaxOakWoo9R2uh3kLQvTTHU,7439
191
193
  inspect_ai/util/__init__.py,sha256=jc4QOrjjCggUmtiXSNQPBdzZv79a6158Ams-a3FEASI,247
192
194
  inspect_ai/util/_context/__init__.py,sha256=1D1hDT-u7xgIoqrdXo4SkBbBs69Kq2wLx2lqtlksoQY,280
@@ -194,9 +196,9 @@ inspect_ai/util/_context/concurrency.py,sha256=v5G57onvFRf2lktkPoNJavOBkheqrAglD
194
196
  inspect_ai/util/_context/logger.py,sha256=SODT-AQT-UcFzcRbjsrD0XWawoAF1nIKvgZ5LGqJOEs,690
195
197
  inspect_ai/util/_context/resource.py,sha256=6CDOos4izeGWfFh8Jq3BvonrsuH69JMXLtyAr7aUskE,3148
196
198
  inspect_ai/util/_context/subprocess.py,sha256=Md_1_tShPxMBFUEzNIJCOT8JJaNLFxKsfw1GeDp3CrU,4734
197
- inspect_ai-0.3.11.dist-info/LICENSE,sha256=aYPffOl9TwBXDQ8g33Jh6AsBhobb3A76qNm7r2HZsps,1079
198
- inspect_ai-0.3.11.dist-info/METADATA,sha256=DTFG0Ma1UjCoqcfC06s1r8ZNcniVcxIYshcoaKnUhcE,4185
199
- inspect_ai-0.3.11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
200
- inspect_ai-0.3.11.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
201
- inspect_ai-0.3.11.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
202
- inspect_ai-0.3.11.dist-info/RECORD,,
199
+ inspect_ai-0.13.3.dist-info/LICENSE,sha256=aYPffOl9TwBXDQ8g33Jh6AsBhobb3A76qNm7r2HZsps,1079
200
+ inspect_ai-0.13.3.dist-info/METADATA,sha256=pclK1FtHKH1NR932xWpnfeDZk04qD0hYigH2FJebvu0,4185
201
+ inspect_ai-0.13.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
202
+ inspect_ai-0.13.3.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
203
+ inspect_ai-0.13.3.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
204
+ inspect_ai-0.13.3.dist-info/RECORD,,
File without changes