inspect-ai 0.3.11__py3-none-any.whl → 0.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +1 -1
- inspect_ai/_cli/list.py +1 -1
- inspect_ai/_eval/eval.py +1 -1
- inspect_ai/_eval/list.py +1 -1
- inspect_ai/_eval/loader.py +1 -1
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/score.py +1 -1
- inspect_ai/_eval/task/__init__.py +3 -0
- inspect_ai/_eval/task/run.py +7 -4
- inspect_ai/_eval/task/util.py +1 -1
- inspect_ai/_view/www/App.mjs +19 -7
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +1 -0
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +1 -1
- inspect_ai/solver/__init__.py +2 -1
- inspect_ai/solver/_critique.py +2 -1
- inspect_ai/solver/_multiple_choice.py +2 -1
- inspect_ai/solver/_plan.py +2 -1
- inspect_ai/solver/_prompt.py +2 -1
- inspect_ai/solver/_solver.py +2 -103
- inspect_ai/solver/_task_state.py +145 -0
- inspect_ai/solver/_tool/use_tools.py +2 -1
- {inspect_ai-0.3.11.dist-info → inspect_ai-0.13.3.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.11.dist-info → inspect_ai-0.13.3.dist-info}/RECORD +28 -26
- /inspect_ai/_eval/{types.py → task/task.py} +0 -0
- {inspect_ai-0.3.11.dist-info → inspect_ai-0.13.3.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.11.dist-info → inspect_ai-0.13.3.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.11.dist-info → inspect_ai-0.13.3.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.11.dist-info → inspect_ai-0.13.3.dist-info}/top_level.txt +0 -0
inspect_ai/__init__.py
CHANGED
@@ -6,7 +6,7 @@ from inspect_ai._eval.eval import eval, eval_async, eval_retry, eval_retry_async
|
|
6
6
|
from inspect_ai._eval.list import list_tasks
|
7
7
|
from inspect_ai._eval.registry import task
|
8
8
|
from inspect_ai._eval.score import score, score_async
|
9
|
-
from inspect_ai._eval.
|
9
|
+
from inspect_ai._eval.task import Task, TaskInfo, Tasks
|
10
10
|
from inspect_ai._util.constants import PKG_NAME
|
11
11
|
|
12
12
|
__version__ = importlib_version(PKG_NAME)
|
inspect_ai/_cli/list.py
CHANGED
@@ -11,7 +11,7 @@ from typing_extensions import Unpack
|
|
11
11
|
from inspect_ai._cli.common import CommonOptions, common_options, resolve_common_options
|
12
12
|
from inspect_ai._cli.util import parse_cli_args
|
13
13
|
from inspect_ai._eval.list import list_tasks
|
14
|
-
from inspect_ai._eval.
|
14
|
+
from inspect_ai._eval.task import TaskInfo
|
15
15
|
from inspect_ai.log import list_eval_logs
|
16
16
|
|
17
17
|
|
inspect_ai/_eval/eval.py
CHANGED
@@ -26,10 +26,10 @@ from inspect_ai.solver import Solver
|
|
26
26
|
from inspect_ai.util._context import init_async_context
|
27
27
|
|
28
28
|
from .loader import resolve_tasks
|
29
|
+
from .task import Tasks, TaskSpec
|
29
30
|
from .task.log import TaskLogger
|
30
31
|
from .task.run import task_run
|
31
32
|
from .task.util import task_file, task_run_dir
|
32
|
-
from .types import Tasks, TaskSpec
|
33
33
|
|
34
34
|
log = logging.getLogger(__name__)
|
35
35
|
|
inspect_ai/_eval/list.py
CHANGED
inspect_ai/_eval/loader.py
CHANGED
@@ -18,8 +18,8 @@ from inspect_ai.model import Model, ModelName
|
|
18
18
|
|
19
19
|
from .list import task_files
|
20
20
|
from .registry import task_create
|
21
|
+
from .task import Task, TaskInfo, Tasks
|
21
22
|
from .task.constants import TASK_FILE_ATTR, TASK_RUN_DIR_ATTR
|
22
|
-
from .types import Task, TaskInfo, Tasks
|
23
23
|
|
24
24
|
|
25
25
|
def resolve_tasks(
|
inspect_ai/_eval/registry.py
CHANGED
inspect_ai/_eval/score.py
CHANGED
@@ -17,9 +17,9 @@ from inspect_ai.model import ModelName
|
|
17
17
|
from inspect_ai.scorer import Metric, Score, Scorer, Target
|
18
18
|
from inspect_ai.solver import TaskState
|
19
19
|
|
20
|
+
from .task import Task
|
20
21
|
from .task.results import eval_results
|
21
22
|
from .task.util import task_run_dir
|
22
|
-
from .types import Task
|
23
23
|
|
24
24
|
|
25
25
|
def score(log: EvalLog, scorer: Scorer) -> EvalLog:
|
inspect_ai/_eval/task/run.py
CHANGED
@@ -23,6 +23,7 @@ from inspect_ai.log import (
|
|
23
23
|
EvalConfig,
|
24
24
|
EvalError,
|
25
25
|
EvalLog,
|
26
|
+
EvalResults,
|
26
27
|
EvalStats,
|
27
28
|
)
|
28
29
|
from inspect_ai.log._log import eval_error
|
@@ -34,7 +35,7 @@ from inspect_ai.model import (
|
|
34
35
|
from inspect_ai.scorer import Score, Scorer, Target
|
35
36
|
from inspect_ai.solver import Generate, Plan, Solver, TaskState
|
36
37
|
|
37
|
-
from ..
|
38
|
+
from ..task import Task
|
38
39
|
from .generate import task_generate
|
39
40
|
from .images import samples_with_base64_images, states_with_base64_images
|
40
41
|
from .log import TaskLogger, collect_eval_data, log_output, log_plan
|
@@ -101,7 +102,9 @@ async def task_run(
|
|
101
102
|
plan = (
|
102
103
|
plan
|
103
104
|
if isinstance(plan, Plan)
|
104
|
-
else Plan(plan)
|
105
|
+
else Plan(plan)
|
106
|
+
if plan is not None
|
107
|
+
else task.plan
|
105
108
|
)
|
106
109
|
score = score and task.scorer is not None
|
107
110
|
scorer: Scorer | None = task.scorer if (score and task.scorer) else None
|
@@ -132,7 +135,6 @@ async def task_run(
|
|
132
135
|
len(plan.steps) + (1 if plan.finish else 0) + (1) # scorer
|
133
136
|
)
|
134
137
|
with td.progress(total=total_steps) as p:
|
135
|
-
|
136
138
|
# forward progress
|
137
139
|
def progress() -> None:
|
138
140
|
p.update(1)
|
@@ -195,6 +197,8 @@ async def task_run(
|
|
195
197
|
metrics=task.metrics,
|
196
198
|
)
|
197
199
|
logger.log_results(results)
|
200
|
+
else:
|
201
|
+
results = EvalResults()
|
198
202
|
|
199
203
|
# collect eval data
|
200
204
|
collect_eval_data(stats, logger)
|
@@ -295,7 +299,6 @@ async def resolve_dataset(
|
|
295
299
|
epochs: int,
|
296
300
|
log_images: bool,
|
297
301
|
) -> tuple[Dataset, list[Sample], list[TaskState]]:
|
298
|
-
|
299
302
|
# apply limit to dataset
|
300
303
|
dataset_limit = (
|
301
304
|
slice(0, len(dataset))
|
inspect_ai/_eval/task/util.py
CHANGED
inspect_ai/_view/www/App.mjs
CHANGED
@@ -20,6 +20,7 @@ import { WorkSpace } from "./src/workspace/WorkSpace.mjs";
|
|
20
20
|
|
21
21
|
export function App() {
|
22
22
|
const [selected, setSelected] = useState(-1);
|
23
|
+
const [pendingLog, setPendingLog] = useState(undefined);
|
23
24
|
const [logs, setLogs] = useState({ log_dir: "", files: [] });
|
24
25
|
const [logHeaders, setLogHeaders] = useState({});
|
25
26
|
const [offcanvas, setOffcanvas] = useState(false);
|
@@ -126,24 +127,32 @@ export function App() {
|
|
126
127
|
// Ensure that we have a selected index when there is are
|
127
128
|
// new logs
|
128
129
|
useEffect(() => {
|
129
|
-
|
130
|
-
|
130
|
+
if (logs && pendingLog) {
|
131
|
+
const index = logs.files.findIndex((val) => {
|
132
|
+
return pendingLog.endsWith(val.name);
|
133
|
+
});
|
134
|
+
if (index > -1) {
|
135
|
+
setSelected(index);
|
136
|
+
}
|
137
|
+
setPendingLog(undefined);
|
138
|
+
}
|
139
|
+
}, [logs, pendingLog])
|
131
140
|
|
132
141
|
// listen for updateState messages from vscode
|
133
142
|
useEffect(() => {
|
134
|
-
const onMessage = (e) => {
|
143
|
+
const onMessage = async (e) => {
|
135
144
|
switch (e.data.type || e.data.message) {
|
136
145
|
case "updateState": {
|
137
146
|
if (e.data.url) {
|
138
|
-
|
139
147
|
const index = logs.files.findIndex((val) => {
|
140
|
-
return
|
148
|
+
return e.data.url.endsWith(val.name);
|
141
149
|
});
|
142
150
|
if (index > -1) {
|
143
151
|
// Select the correct index
|
144
152
|
setSelected(index);
|
145
153
|
} else {
|
146
|
-
|
154
|
+
await loadLogs();
|
155
|
+
setPendingLog(e.data.url);
|
147
156
|
}
|
148
157
|
}
|
149
158
|
}
|
@@ -153,7 +162,7 @@ export function App() {
|
|
153
162
|
return () => {
|
154
163
|
window.removeEventListener("message", onMessage);
|
155
164
|
};
|
156
|
-
}, [setCurrentLog]);
|
165
|
+
}, [logs, setCurrentLog, setPendingLog]);
|
157
166
|
|
158
167
|
useEffect(async () => {
|
159
168
|
// See whether a specific task_file has been passed.
|
@@ -176,6 +185,9 @@ export function App() {
|
|
176
185
|
// initial fetch of logs
|
177
186
|
await load();
|
178
187
|
|
188
|
+
// Select the first log
|
189
|
+
setSelected(0);
|
190
|
+
|
179
191
|
// poll every 1s for events
|
180
192
|
setInterval(() => {
|
181
193
|
api.client_events().then((events) => {
|
inspect_ai/solver/__init__.py
CHANGED
@@ -6,7 +6,8 @@ from ._prompt import (
|
|
6
6
|
prompt_template,
|
7
7
|
system_message,
|
8
8
|
)
|
9
|
-
from ._solver import Generate, Solver,
|
9
|
+
from ._solver import Generate, Solver, generate, solver
|
10
|
+
from ._task_state import TaskState
|
10
11
|
from ._tool.tool import Tool, tool
|
11
12
|
from ._tool.use_tools import use_tools
|
12
13
|
from ._tool.web_search import web_search
|
inspect_ai/solver/_critique.py
CHANGED
inspect_ai/solver/_plan.py
CHANGED
inspect_ai/solver/_prompt.py
CHANGED
@@ -3,7 +3,8 @@ from typing import Any
|
|
3
3
|
from inspect_ai.model import ChatMessageSystem
|
4
4
|
from inspect_ai.util import resource
|
5
5
|
|
6
|
-
from ._solver import Generate, Solver,
|
6
|
+
from ._solver import Generate, Solver, solver
|
7
|
+
from ._task_state import TaskState
|
7
8
|
from ._util import append_system_message
|
8
9
|
|
9
10
|
|
inspect_ai/solver/_solver.py
CHANGED
@@ -18,110 +18,9 @@ from inspect_ai._util.registry import (
|
|
18
18
|
registry_name,
|
19
19
|
registry_tag,
|
20
20
|
)
|
21
|
-
from inspect_ai.model import
|
22
|
-
ChatMessage,
|
23
|
-
ChatMessageUser,
|
24
|
-
GenerateConfigArgs,
|
25
|
-
ModelName,
|
26
|
-
ModelOutput,
|
27
|
-
ToolChoice,
|
28
|
-
)
|
29
|
-
|
30
|
-
from ._tool.tool import Tool
|
31
|
-
|
21
|
+
from inspect_ai.model import GenerateConfigArgs
|
32
22
|
|
33
|
-
|
34
|
-
def __init__(
|
35
|
-
self,
|
36
|
-
model: ModelName,
|
37
|
-
sample_id: int | str,
|
38
|
-
epoch: int,
|
39
|
-
input: str | list[ChatMessage],
|
40
|
-
choices: list[str] | None,
|
41
|
-
messages: list[ChatMessage],
|
42
|
-
tools: list[Tool] = [],
|
43
|
-
tool_choice: ToolChoice | None = None,
|
44
|
-
output: ModelOutput | None = None,
|
45
|
-
completed: bool = False,
|
46
|
-
metadata: dict[str, Any] = {},
|
47
|
-
) -> None:
|
48
|
-
self._model = model
|
49
|
-
|
50
|
-
self.sample_id = sample_id
|
51
|
-
"""Unique id for sample."""
|
52
|
-
|
53
|
-
self.epoch = epoch
|
54
|
-
"""Epoch number for sample."""
|
55
|
-
|
56
|
-
self._input = input
|
57
|
-
|
58
|
-
self.choices = choices
|
59
|
-
"""Sample choices."""
|
60
|
-
|
61
|
-
self.messages = messages
|
62
|
-
"""Chat conversation history for sample."""
|
63
|
-
|
64
|
-
self.tools = tools
|
65
|
-
"""Tools available to the model."""
|
66
|
-
|
67
|
-
self.tool_choice = tool_choice
|
68
|
-
"""Tool choice directive."""
|
69
|
-
|
70
|
-
self.output = output if output else ModelOutput(model=str(model), choices=[])
|
71
|
-
"""Model output."""
|
72
|
-
|
73
|
-
self.completed = completed
|
74
|
-
"""Flag to indicate that the solver loop should terminate."""
|
75
|
-
|
76
|
-
self.metadata = metadata
|
77
|
-
"""Additional task state metadata."""
|
78
|
-
|
79
|
-
@property
|
80
|
-
def model(self) -> ModelName:
|
81
|
-
"""Name of model being evaluated."""
|
82
|
-
return self._model
|
83
|
-
|
84
|
-
@property
|
85
|
-
def input(self) -> str | list[ChatMessage]:
|
86
|
-
"""Sample input."""
|
87
|
-
return self._input
|
88
|
-
|
89
|
-
@property
|
90
|
-
def input_text(self) -> str:
|
91
|
-
"""Sample input as text."""
|
92
|
-
if isinstance(self._input, str):
|
93
|
-
return self._input
|
94
|
-
else:
|
95
|
-
input = next(
|
96
|
-
(message.text for message in self._input if message.role == "user"),
|
97
|
-
None,
|
98
|
-
)
|
99
|
-
if input:
|
100
|
-
return input
|
101
|
-
else:
|
102
|
-
raise ValueError(
|
103
|
-
"input_text requested from TaskState but none available"
|
104
|
-
)
|
105
|
-
|
106
|
-
@property
|
107
|
-
def user_prompt(self) -> ChatMessageUser:
|
108
|
-
"""User prompt for this state.
|
109
|
-
|
110
|
-
Tasks are very general and can have may types of inputs.
|
111
|
-
However, in many cases solvers assume they can interact with
|
112
|
-
the state as a "chat" in a predictable fashion (e.g. prompt
|
113
|
-
engineering solvers). This property enables easy read and
|
114
|
-
write access to the user chat prompt. Raises an
|
115
|
-
exception if there is no user prompt
|
116
|
-
|
117
|
-
Returns:
|
118
|
-
First user `ChatMessage` in the task state.
|
119
|
-
"""
|
120
|
-
prompt = next((m for m in self.messages if m.role == "user"), None)
|
121
|
-
if prompt:
|
122
|
-
return prompt
|
123
|
-
else:
|
124
|
-
raise ValueError("user_prompt requested from TaskState but none available")
|
23
|
+
from ._task_state import TaskState
|
125
24
|
|
126
25
|
|
127
26
|
@runtime_checkable
|
@@ -0,0 +1,145 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
from inspect_ai.model import (
|
4
|
+
ChatMessage,
|
5
|
+
ChatMessageUser,
|
6
|
+
ModelName,
|
7
|
+
ModelOutput,
|
8
|
+
ToolChoice,
|
9
|
+
)
|
10
|
+
|
11
|
+
from ._tool.tool import Tool
|
12
|
+
|
13
|
+
|
14
|
+
class TaskState:
|
15
|
+
"""
|
16
|
+
The `TaskState` represents the internal state of the `Task` being run for a single `Sample`.
|
17
|
+
|
18
|
+
It's a mutable object that is updated by each solver during a sample's
|
19
|
+
evaluation. It allows us to maintain things like the message history between
|
20
|
+
the running `Task` and the model, the tools available to the model, the
|
21
|
+
final output of the model and whether or not it's completed yet.
|
22
|
+
"""
|
23
|
+
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
model: ModelName,
|
27
|
+
sample_id: int | str,
|
28
|
+
epoch: int,
|
29
|
+
input: str | list[ChatMessage],
|
30
|
+
choices: list[str] | None,
|
31
|
+
messages: list[ChatMessage],
|
32
|
+
tools: list[Tool] = [],
|
33
|
+
tool_choice: ToolChoice | None = None,
|
34
|
+
output: ModelOutput | None = None,
|
35
|
+
completed: bool = False,
|
36
|
+
metadata: dict[str, Any] = {},
|
37
|
+
) -> None:
|
38
|
+
self._model = model
|
39
|
+
"""Model name used for this task."""
|
40
|
+
|
41
|
+
self.sample_id = sample_id
|
42
|
+
"""Unique id for sample."""
|
43
|
+
|
44
|
+
self.epoch = epoch
|
45
|
+
"""Epoch number for sample."""
|
46
|
+
|
47
|
+
self._input = input
|
48
|
+
"""
|
49
|
+
The original input from the `Sample` for this `TaskState`.
|
50
|
+
|
51
|
+
Should be treated as immutable and not changed during the run, so that
|
52
|
+
it can be referenced or checked wherever needed. Access through `input`
|
53
|
+
or `input_text` only
|
54
|
+
"""
|
55
|
+
|
56
|
+
self.choices = choices
|
57
|
+
"""
|
58
|
+
List of choices for the sample, specifically used by the `multiple_choice` scorer.
|
59
|
+
|
60
|
+
For example, if the sample was a multiple choice question like "What is
|
61
|
+
the capital of France? A) Paris B) London C) Berlin", we would store the
|
62
|
+
possible answers here.
|
63
|
+
"""
|
64
|
+
|
65
|
+
self.messages = messages
|
66
|
+
"""
|
67
|
+
Chat conversation history for sample.
|
68
|
+
|
69
|
+
This will generally get appended to every time a `generate` call is made
|
70
|
+
to the model. Useful for both debug and for solvers/scorers to assess
|
71
|
+
model performance or choose the next step.
|
72
|
+
"""
|
73
|
+
|
74
|
+
self.tools = tools
|
75
|
+
"""Tools available to the model."""
|
76
|
+
|
77
|
+
self.tool_choice = tool_choice
|
78
|
+
"""Tool choice directive."""
|
79
|
+
|
80
|
+
self.output = output if output else ModelOutput(model=str(model), choices=[])
|
81
|
+
"""
|
82
|
+
The 'final' model output once we've completed all solving.
|
83
|
+
|
84
|
+
For simple evals this may just be the last `message` from the
|
85
|
+
conversation history, but more complex solvers may generate this in
|
86
|
+
different ways depending on what solvers are used..
|
87
|
+
"""
|
88
|
+
|
89
|
+
self.completed = completed
|
90
|
+
"""Flag to indicate that the solver loop should terminate."""
|
91
|
+
|
92
|
+
self.metadata = metadata
|
93
|
+
"""Additional task state metadata."""
|
94
|
+
|
95
|
+
@property
|
96
|
+
def model(self) -> ModelName:
|
97
|
+
"""Name of model being evaluated."""
|
98
|
+
return self._model
|
99
|
+
|
100
|
+
@property
|
101
|
+
def input(self) -> str | list[ChatMessage]:
|
102
|
+
"""Input from the `Sample`, should be considered immutable."""
|
103
|
+
return self._input
|
104
|
+
|
105
|
+
@property
|
106
|
+
def input_text(self) -> str:
|
107
|
+
"""
|
108
|
+
Convenience function for accessing the initial input from the `Sample` as a string.
|
109
|
+
|
110
|
+
If the `input` is a `list[ChatMessage]`, this will return the text from
|
111
|
+
the first chat message
|
112
|
+
"""
|
113
|
+
if isinstance(self._input, str):
|
114
|
+
return self._input
|
115
|
+
else:
|
116
|
+
input = next(
|
117
|
+
(message.text for message in self._input if message.role == "user"),
|
118
|
+
None,
|
119
|
+
)
|
120
|
+
if input:
|
121
|
+
return input
|
122
|
+
else:
|
123
|
+
raise ValueError(
|
124
|
+
"input_text requested from TaskState but none available"
|
125
|
+
)
|
126
|
+
|
127
|
+
@property
|
128
|
+
def user_prompt(self) -> ChatMessageUser:
|
129
|
+
"""User prompt for this state.
|
130
|
+
|
131
|
+
Tasks are very general and can have may types of inputs.
|
132
|
+
However, in many cases solvers assume they can interact with
|
133
|
+
the state as a "chat" in a predictable fashion (e.g. prompt
|
134
|
+
engineering solvers). This property enables easy read and
|
135
|
+
write access to the user chat prompt. Raises an
|
136
|
+
exception if there is no user prompt
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
First user `ChatMessage` in the task state.
|
140
|
+
"""
|
141
|
+
prompt = next((m for m in self.messages if m.role == "user"), None)
|
142
|
+
if prompt:
|
143
|
+
return prompt
|
144
|
+
else:
|
145
|
+
raise ValueError("user_prompt requested from TaskState but none available")
|
@@ -3,7 +3,8 @@ from inspect_ai.model import (
|
|
3
3
|
ToolChoice,
|
4
4
|
)
|
5
5
|
|
6
|
-
from .._solver import Generate, Solver,
|
6
|
+
from .._solver import Generate, Solver, solver
|
7
|
+
from .._task_state import TaskState
|
7
8
|
from .._util import append_system_message
|
8
9
|
from .tool import Tool
|
9
10
|
from .tool_def import tool_defs
|
@@ -1,10 +1,10 @@
|
|
1
|
-
inspect_ai/__init__.py,sha256=
|
1
|
+
inspect_ai/__init__.py,sha256=laGXMK1BIoOC_x2D31Cmbs_3c335exZL0FByjAqRO-I,671
|
2
2
|
inspect_ai/__main__.py,sha256=oWX4YwDZDg3GS3-IG0yPGoSEOfSzWihELg7QmrUlxjM,67
|
3
3
|
inspect_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
inspect_ai/_cli/common.py,sha256=CTEvRG2kAwsP7HxI5LjpdXh4RdVBwD0oMfD0Sn8Hl3A,1705
|
5
5
|
inspect_ai/_cli/eval.py,sha256=LyW4Gc15Q_fBbvTXfJczhXyXAHi_MoImjrMkTLRRf9g,7886
|
6
6
|
inspect_ai/_cli/info.py,sha256=K1SMxB8LiLONlwygU_BxcL1u2KJyYyFDpW4qkgMIcjE,1922
|
7
|
-
inspect_ai/_cli/list.py,sha256=
|
7
|
+
inspect_ai/_cli/list.py,sha256=IIkqT1HFqTaYPn3wNxHT656-wHwCHvGA57Rtol2Mc4Y,3735
|
8
8
|
inspect_ai/_cli/main.py,sha256=PjIGgPVAky8eNBX5N_O2EX4TpgIdkmFY1uYtsgSCvkQ,1028
|
9
9
|
inspect_ai/_cli/score.py,sha256=6FRjaqYWY1XHWFFwxZyQJW1cPnIhr7e3ZotTvSZyTgg,2859
|
10
10
|
inspect_ai/_cli/util.py,sha256=nT3W7uUGzslNOUsK95lp6ZhPmTRfqb9i0aHJ3Bx_L2Q,576
|
@@ -13,19 +13,20 @@ inspect_ai/_display/__init__.py,sha256=PPuC3ydm-duhpdTShtMqVpFMr2BNndGmIIOy0A-Y-
|
|
13
13
|
inspect_ai/_display/_display.py,sha256=Jd2LB49jMANpLFpgmRFeAa3pIm5MKvROp3bW1x-p2O4,1417
|
14
14
|
inspect_ai/_display/logger.py,sha256=57_5ToLTozASQGdRNTIPt8vmAKF7u3pJnhoXbWY3oAs,2720
|
15
15
|
inspect_ai/_display/rich.py,sha256=Qe357AMXqlPADmW0yZCLYcI1g_b1Gu7Sa5wTZFZbkYg,11708
|
16
|
-
inspect_ai/_eval/eval.py,sha256=
|
17
|
-
inspect_ai/_eval/list.py,sha256=
|
18
|
-
inspect_ai/_eval/loader.py,sha256=
|
19
|
-
inspect_ai/_eval/registry.py,sha256
|
20
|
-
inspect_ai/_eval/score.py,sha256=
|
21
|
-
inspect_ai/_eval/
|
16
|
+
inspect_ai/_eval/eval.py,sha256=la4sfT6EYXtsTkJ2yMmF_b6Saz4q6vJYvhQ-26kpBlI,17292
|
17
|
+
inspect_ai/_eval/list.py,sha256=YsPclTQKfiafUu27QEpCve6IjDAHtGA-fv-7_-tCctM,6048
|
18
|
+
inspect_ai/_eval/loader.py,sha256=wKvMDdaMnLbJ3opKtKD8a8qrpE-y2huvv9p4EnXKF3Q,7880
|
19
|
+
inspect_ai/_eval/registry.py,sha256=vTDPgKnGOGohl-IKQ-I2adkpXaQBCKuhA_cx78f3qMI,3998
|
20
|
+
inspect_ai/_eval/score.py,sha256=g4sHYxg9ICfOfrS6ZjfWurMtNo0THRSIemONHyUfYKQ,4333
|
21
|
+
inspect_ai/_eval/task/__init__.py,sha256=KCAhe9afbgLqolxXw80QSWwUZvfXSoqtjL8pyj-WUOg,123
|
22
22
|
inspect_ai/_eval/task/constants.py,sha256=quAKMw-4-3xKd1T_KwXCZvHYoKRXt1ZGuaHbBcWJwnA,72
|
23
23
|
inspect_ai/_eval/task/generate.py,sha256=53UNk9ReB7jf0FBwUkH_3IqSKg-vr5biQ3hLKbKwUic,3925
|
24
24
|
inspect_ai/_eval/task/images.py,sha256=4bLN66eF2322r42FoINzGXMh1PTRjP_eCceZ8_msPeQ,2135
|
25
25
|
inspect_ai/_eval/task/log.py,sha256=G0tj9QAB6BFTrSyuBEf6CFAZ6NpkLZfIWQUs1ZCYGNo,5255
|
26
26
|
inspect_ai/_eval/task/results.py,sha256=eLfU7Dz4mmTGeKosBz4Iog1poakHdLgHkq2bs4Vw50s,2602
|
27
|
-
inspect_ai/_eval/task/run.py,sha256=
|
28
|
-
inspect_ai/_eval/task/
|
27
|
+
inspect_ai/_eval/task/run.py,sha256=SDJz4KKxLdpOLT8yjQAwqbkaptSuCDLfZidPdkhgsWg,11786
|
28
|
+
inspect_ai/_eval/task/task.py,sha256=tqBrdnKEay9KQ3pcX07CW8REaQgEFSsbqWGFLmyNAV8,4073
|
29
|
+
inspect_ai/_eval/task/util.py,sha256=_HcCOdjypVqgvLWakBgKx0MjrEhSWoKu7SX85DfxpiA,1173
|
29
30
|
inspect_ai/_util/_async.py,sha256=OuK_dnZrnGi1W2-72sbpObO4Knr5Q_cLxJL1kTY12t8,276
|
30
31
|
inspect_ai/_util/appdirs.py,sha256=6OsSZ8JcN6Nkp739CxCXfJwx_g9TqTfo7iAKDhtw7SY,355
|
31
32
|
inspect_ai/_util/constants.py,sha256=ky9MfU9zrIvRX1PrlIr89npFtleSPoHevbYt4_t57go,445
|
@@ -53,7 +54,7 @@ inspect_ai/_view/schema.py,sha256=SlBJ75EsCjEtSzcVMuP2yu2cI3ju1Z3amUWaWbqNZjM,14
|
|
53
54
|
inspect_ai/_view/view.py,sha256=Gm2p5A8dvwFM2NWkz8GPF7pqSmcC4X1dtXxPvkw0Q_M,9624
|
54
55
|
inspect_ai/_view/www/.gitignore,sha256=8amgmyJs-OmKQoYgDF2evVwokkcHrDMXlH-OzwarFns,13
|
55
56
|
inspect_ai/_view/www/App.css,sha256=VZtFGBwW_JsKtiC4WY0lLT-mcOlGYdxRvqm1-KyeSyQ,13934
|
56
|
-
inspect_ai/_view/www/App.mjs,sha256=
|
57
|
+
inspect_ai/_view/www/App.mjs,sha256=yEyCPMSH-wu7YMeNRTaZbdaSW3pvYYR_dF7jO_lQHWM,7605
|
57
58
|
inspect_ai/_view/www/favicon.svg,sha256=b9AHYZaO2zBzeKH6G4PwXZMGGW_UxY0omKHam-c9MAs,1508
|
58
59
|
inspect_ai/_view/www/index.html,sha256=HEUzotuCnX5GClv4feeppOhNrPN6CxXqp4N_Nsyp7oo,2158
|
59
60
|
inspect_ai/_view/www/log-schema.json,sha256=pGcQiJEUqdzfsxKDqc-eEfRHXaP_FAVbWpUahpWaf9g,38118
|
@@ -113,8 +114,8 @@ inspect_ai/_view/www/src/samples/SampleDialog.mjs,sha256=ygslG6f8QOzRAwvRiSsKrPW
|
|
113
114
|
inspect_ai/_view/www/src/samples/SampleDisplay.mjs,sha256=KZZB2YZkoSPDKK5facTwrlUccc23xqfsXO_W67QXN-Q,6525
|
114
115
|
inspect_ai/_view/www/src/samples/SampleList.mjs,sha256=05TDU3dZFF1AOXDdtnddSNS6Y0qJDFtexFiDy4GnO-M,6598
|
115
116
|
inspect_ai/_view/www/src/samples/SampleScoreView.mjs,sha256=Pm3rAV-W2dJ56aFxaxWD7ooaVJMbbSsTPe113wfFOPM,2989
|
116
|
-
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs,sha256=
|
117
|
-
inspect_ai/_view/www/src/samples/SamplesTab.mjs,sha256=
|
117
|
+
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs,sha256=itfKTMAds_Y7cLjFJqzs--j3x28dlSH_3oW1QhbZxeQ,7754
|
118
|
+
inspect_ai/_view/www/src/samples/SamplesTab.mjs,sha256=XSONY82wL02cyz9jye5osaJyd1U55zMvYA4mRK89Spg,8529
|
118
119
|
inspect_ai/_view/www/src/samples/SamplesTools.mjs,sha256=A9qHXdMmFAfhGP2ceMxbmyWJBhWVKbi3qfNM3gGPDR0,854
|
119
120
|
inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs,sha256=FMWN67tPNcWTtqd8qPbaFWtsBBzP9013ZhFCfGsajNQ,775
|
120
121
|
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs,sha256=f1FZBuhuFB_HUD4HoEASoY1cY4GUHIa0ZK9cAV_Hz4w,4834
|
@@ -177,16 +178,17 @@ inspect_ai/scorer/_metrics/__init__.py,sha256=NF9j0SoDIsGaf3Nl6pzGQYuycAc3gE-1af
|
|
177
178
|
inspect_ai/scorer/_metrics/accuracy.py,sha256=U8H0iwA3qzqxanaRfYhhMmOV915VR6fbYTHNHyYES6g,943
|
178
179
|
inspect_ai/scorer/_metrics/mean.py,sha256=gUMPBiWwnki9mYsEGVvme78dJCHqGoFVAEA9k7JHJ2E,555
|
179
180
|
inspect_ai/scorer/_metrics/std.py,sha256=uwJW4V2NVDUqeyyn0rlHwguH7Z4jocYO_y7loLbqcbM,1250
|
180
|
-
inspect_ai/solver/__init__.py,sha256=
|
181
|
-
inspect_ai/solver/_critique.py,sha256=
|
182
|
-
inspect_ai/solver/_multiple_choice.py,sha256=
|
183
|
-
inspect_ai/solver/_plan.py,sha256=
|
184
|
-
inspect_ai/solver/_prompt.py,sha256
|
185
|
-
inspect_ai/solver/_solver.py,sha256=
|
181
|
+
inspect_ai/solver/__init__.py,sha256=MqNUCE2-zcICh79ZSzIKp7RdGR5auJ3OBAD0v17mQfw,695
|
182
|
+
inspect_ai/solver/_critique.py,sha256=87gYcwyF3j44IGkrSTXGVmerX7HQzOyowAuIcy2JHIo,3124
|
183
|
+
inspect_ai/solver/_multiple_choice.py,sha256=_qCNjeB5QiQgACBt621Xo65e5V9AU2natHD8DePrTXU,6269
|
184
|
+
inspect_ai/solver/_plan.py,sha256=geJU5BMKs9iM5vAQnT68_aiZNXmQNQ0CpuKkNdxNdGQ,5214
|
185
|
+
inspect_ai/solver/_prompt.py,sha256=eXby9F6OoQ5Ivq2lpQVMdsmEbpO6E8oo03gBl3zvn2o,2316
|
186
|
+
inspect_ai/solver/_solver.py,sha256=hyfKFFYAuQXfR69l-OMG-w5BN569IQRn06FzahCYN24,5572
|
187
|
+
inspect_ai/solver/_task_state.py,sha256=98e8vxxA94cJ14Ia2BkqoZU5uclvfMJqiw0ItCxOzH0,4700
|
186
188
|
inspect_ai/solver/_util.py,sha256=pthrf-CzC6FnQYSUFLXTYM4wFEJptZrh5POTmV-Jtow,446
|
187
189
|
inspect_ai/solver/_tool/tool.py,sha256=6qpx9Q4JoAa6_KvVm2ul-oXBtgA4z7FYCyz6c7fjJ5A,3839
|
188
190
|
inspect_ai/solver/_tool/tool_def.py,sha256=PMEWdzMSSB1h1HWKQNv-O766KTz7hbXA37-o8qu-QLA,2434
|
189
|
-
inspect_ai/solver/_tool/use_tools.py,sha256=
|
191
|
+
inspect_ai/solver/_tool/use_tools.py,sha256=fqqLtvD5_hMGnGCNTwrok315MPr6CIftwa6wio5b04k,1705
|
190
192
|
inspect_ai/solver/_tool/web_search.py,sha256=ubdrbzMQoQuaNU-Qqc8VOaxOakWoo9R2uh3kLQvTTHU,7439
|
191
193
|
inspect_ai/util/__init__.py,sha256=jc4QOrjjCggUmtiXSNQPBdzZv79a6158Ams-a3FEASI,247
|
192
194
|
inspect_ai/util/_context/__init__.py,sha256=1D1hDT-u7xgIoqrdXo4SkBbBs69Kq2wLx2lqtlksoQY,280
|
@@ -194,9 +196,9 @@ inspect_ai/util/_context/concurrency.py,sha256=v5G57onvFRf2lktkPoNJavOBkheqrAglD
|
|
194
196
|
inspect_ai/util/_context/logger.py,sha256=SODT-AQT-UcFzcRbjsrD0XWawoAF1nIKvgZ5LGqJOEs,690
|
195
197
|
inspect_ai/util/_context/resource.py,sha256=6CDOos4izeGWfFh8Jq3BvonrsuH69JMXLtyAr7aUskE,3148
|
196
198
|
inspect_ai/util/_context/subprocess.py,sha256=Md_1_tShPxMBFUEzNIJCOT8JJaNLFxKsfw1GeDp3CrU,4734
|
197
|
-
inspect_ai-0.3.
|
198
|
-
inspect_ai-0.3.
|
199
|
-
inspect_ai-0.3.
|
200
|
-
inspect_ai-0.3.
|
201
|
-
inspect_ai-0.3.
|
202
|
-
inspect_ai-0.3.
|
199
|
+
inspect_ai-0.13.3.dist-info/LICENSE,sha256=aYPffOl9TwBXDQ8g33Jh6AsBhobb3A76qNm7r2HZsps,1079
|
200
|
+
inspect_ai-0.13.3.dist-info/METADATA,sha256=pclK1FtHKH1NR932xWpnfeDZk04qD0hYigH2FJebvu0,4185
|
201
|
+
inspect_ai-0.13.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
202
|
+
inspect_ai-0.13.3.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
|
203
|
+
inspect_ai-0.13.3.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
|
204
|
+
inspect_ai-0.13.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|