inspect-ai 0.3.73__py3-none-any.whl → 0.3.75__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +3 -2
- inspect_ai/_cli/cache.py +1 -1
- inspect_ai/_cli/common.py +15 -0
- inspect_ai/_cli/eval.py +4 -5
- inspect_ai/_cli/log.py +1 -1
- inspect_ai/_cli/sandbox.py +1 -1
- inspect_ai/_cli/trace.py +1 -1
- inspect_ai/_cli/view.py +1 -1
- inspect_ai/_display/core/config.py +3 -1
- inspect_ai/_eval/eval.py +55 -61
- inspect_ai/_eval/evalset.py +63 -154
- inspect_ai/_eval/loader.py +27 -54
- inspect_ai/_eval/registry.py +1 -10
- inspect_ai/_eval/run.py +3 -4
- inspect_ai/_eval/task/__init__.py +8 -2
- inspect_ai/_eval/task/log.py +9 -1
- inspect_ai/_eval/task/resolved.py +35 -0
- inspect_ai/_eval/task/task.py +50 -69
- inspect_ai/_eval/task/tasks.py +30 -0
- inspect_ai/_util/constants.py +3 -0
- inspect_ai/_util/dotenv.py +17 -0
- inspect_ai/_util/registry.py +43 -2
- inspect_ai/_view/server.py +28 -10
- inspect_ai/_view/www/dist/assets/index.css +4 -3
- inspect_ai/_view/www/dist/assets/index.js +13030 -25523
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/appearance/styles.ts +6 -5
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +2 -2
- inspect_ai/_view/www/src/constants.ts +3 -0
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +141 -20
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +2 -1
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/tools/tool.ts +7 -5
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +1 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +3 -1
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +5 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +5 -1
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +17 -12
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -1
- inspect_ai/_view/www/yarn.lock +12 -5
- inspect_ai/log/_log.py +10 -1
- inspect_ai/log/_recorders/eval.py +27 -8
- inspect_ai/log/_recorders/json.py +2 -2
- inspect_ai/model/_cache.py +3 -1
- inspect_ai/model/_chat_message.py +12 -1
- inspect_ai/model/_model.py +25 -11
- inspect_ai/model/_providers/anthropic.py +34 -2
- inspect_ai/model/_providers/google.py +6 -2
- inspect_ai/model/_providers/none.py +31 -0
- inspect_ai/model/_providers/providers.py +7 -0
- inspect_ai/solver/_bridge/bridge.py +1 -1
- inspect_ai/solver/_chain.py +7 -6
- inspect_ai/tool/_tools/_computer/_computer.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +1 -1
- inspect_ai/tool/_tools/_web_search.py +2 -2
- inspect_ai/util/_sandbox/context.py +2 -1
- inspect_ai/util/_sandbox/environment.py +17 -2
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/METADATA +4 -4
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/RECORD +63 -60
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/WHEEL +1 -1
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.73.dist-info → inspect_ai-0.3.75.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/task/task.py
CHANGED
@@ -13,6 +13,7 @@ from inspect_ai.approval._policy import ApprovalPolicy, approval_policies_from_c
|
|
13
13
|
from inspect_ai.dataset import Dataset, MemoryDataset, Sample
|
14
14
|
from inspect_ai.log import EvalLog
|
15
15
|
from inspect_ai.model import GenerateConfig
|
16
|
+
from inspect_ai.model._model import Model, get_model
|
16
17
|
from inspect_ai.scorer import Metric, Scorer
|
17
18
|
from inspect_ai.scorer._reducer import ScoreReducers, create_reducers
|
18
19
|
from inspect_ai.solver import Plan, Solver, generate
|
@@ -50,6 +51,7 @@ class Task:
|
|
50
51
|
cleanup: Callable[[TaskState], Awaitable[None]] | None = None,
|
51
52
|
scorer: Scorer | list[Scorer] | None = None,
|
52
53
|
metrics: list[Metric] | dict[str, list[Metric]] | None = None,
|
54
|
+
model: str | Model | None = None,
|
53
55
|
config: GenerateConfig = GenerateConfig(),
|
54
56
|
sandbox: SandboxEnvironmentType | None = None,
|
55
57
|
approval: str | list[ApprovalPolicy] | None = None,
|
@@ -67,42 +69,38 @@ class Task:
|
|
67
69
|
"""Create a task.
|
68
70
|
|
69
71
|
Args:
|
70
|
-
dataset
|
71
|
-
setup: (
|
72
|
-
|
73
|
-
solver: (Solver | list[Solver]): Solver or list of solvers.
|
74
|
-
Defaults to generate(), a normal call to the model.
|
72
|
+
dataset: Dataset to evaluate
|
73
|
+
setup: Setup step (always run even when the main `solver` is replaced).
|
74
|
+
solver: Solver or list of solvers. Defaults to generate(), a normal call to the model.
|
75
75
|
cleanup: Optional cleanup function for task. Called after
|
76
76
|
all solvers have run for each sample (including if an
|
77
77
|
exception occurs during the run)
|
78
|
-
scorer:
|
79
|
-
metrics (
|
80
|
-
|
81
|
-
config
|
82
|
-
sandbox (
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
Defaults to no approval policy.
|
87
|
-
epochs (int | Epochs | None): Epochs to repeat samples for and optional score
|
78
|
+
scorer: Scorer used to evaluate model output.
|
79
|
+
metrics: Alternative metrics (overrides the metrics provided by the specified scorer).
|
80
|
+
model: Default model for task (Optional, defaults to eval model).
|
81
|
+
config: Model generation config.
|
82
|
+
sandbox: Sandbox environment type (or optionally a str or tuple with a shorthand spec)
|
83
|
+
approval: Tool use approval policies.
|
84
|
+
Either a path to an approval policy config file or a list of approval policies. Defaults to no approval policy.
|
85
|
+
epochs: Epochs to repeat samples for and optional score
|
88
86
|
reducer function(s) used to combine sample scores (defaults to "mean")
|
89
|
-
fail_on_error
|
87
|
+
fail_on_error: `True` to fail on first sample error
|
90
88
|
(default); `False` to never fail on sample errors; Value between 0 and 1
|
91
89
|
to fail if a proportion of total samples fails. Value greater than 1 to fail
|
92
90
|
eval if a count of samples fails.
|
93
|
-
message_limit
|
94
|
-
token_limit
|
91
|
+
message_limit: Limit on total messages used for each sample.
|
92
|
+
token_limit: Limit on total tokens used for each sample.
|
95
93
|
time_limit: Limit on clock time (in seconds) for samples.
|
96
94
|
working_limit: Limit on working time (in seconds) for sample. Working
|
97
95
|
time includes model generation, tool calls, etc. but does not include
|
98
96
|
time spent waiting on retries or shared resources.
|
99
|
-
name:
|
97
|
+
name: Task name. If not specified is automatically
|
100
98
|
determined based on the name of the task directory (or "task")
|
101
99
|
if its anonymous task (e.g. created in a notebook and passed to
|
102
100
|
eval() directly)
|
103
|
-
version:
|
101
|
+
version: Version of task (to distinguish evolutions
|
104
102
|
of the task spec or breaking changes to it)
|
105
|
-
metadata:
|
103
|
+
metadata: Additional metadata to associate with the task.
|
106
104
|
**kwargs: Deprecated arguments.
|
107
105
|
"""
|
108
106
|
# handle deprecated args
|
@@ -135,6 +133,7 @@ class Task:
|
|
135
133
|
self.cleanup = cleanup
|
136
134
|
self.scorer = resolve_scorer(scorer)
|
137
135
|
self.metrics = metrics
|
136
|
+
self.model = resolve_model(model)
|
138
137
|
self.config = config
|
139
138
|
self.sandbox = resolve_sandbox_environment(sandbox)
|
140
139
|
self.approval = resolve_approval(approval)
|
@@ -176,6 +175,7 @@ def task_with(
|
|
176
175
|
cleanup: Callable[[TaskState], Awaitable[None]] | None | NotGiven = NOT_GIVEN,
|
177
176
|
scorer: Scorer | list[Scorer] | None | NotGiven = NOT_GIVEN,
|
178
177
|
metrics: list[Metric] | dict[str, list[Metric]] | None | NotGiven = NOT_GIVEN,
|
178
|
+
model: str | Model | NotGiven = NOT_GIVEN,
|
179
179
|
config: GenerateConfig | NotGiven = NOT_GIVEN,
|
180
180
|
sandbox: SandboxEnvironmentType | None | NotGiven = NOT_GIVEN,
|
181
181
|
approval: str | list[ApprovalPolicy] | None | NotGiven = NOT_GIVEN,
|
@@ -192,43 +192,39 @@ def task_with(
|
|
192
192
|
"""Task adapted with alternate values for one or more options.
|
193
193
|
|
194
194
|
Args:
|
195
|
-
task
|
196
|
-
dataset
|
197
|
-
setup: (
|
198
|
-
|
199
|
-
solver: (Solver | list[Solver]): Solver or list of solvers.
|
200
|
-
Defaults to generate(), a normal call to the model.
|
195
|
+
task: Task to adapt (it is deep copied prior to mutating options)
|
196
|
+
dataset: Dataset to evaluate
|
197
|
+
setup: Setup step (always run even when the main `solver` is replaced).
|
198
|
+
solver: Solver or list of solvers. Defaults to generate(), a normal call to the model.
|
201
199
|
cleanup: Optional cleanup function for task. Called after
|
202
200
|
all solvers have run for each sample (including if an
|
203
201
|
exception occurs during the run)
|
204
|
-
scorer:
|
205
|
-
metrics (
|
206
|
-
|
207
|
-
config
|
208
|
-
sandbox (
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
Defaults to no approval policy.
|
213
|
-
epochs (int | Epochs | None): Epochs to repeat samples for and optional score
|
202
|
+
scorer: Scorer used to evaluate model output.
|
203
|
+
metrics: Alternative metrics (overrides the metrics provided by the specified scorer).
|
204
|
+
model: Default model for task (Optional, defaults to eval model).
|
205
|
+
config: Model generation config.
|
206
|
+
sandbox: Sandbox environment type (or optionally a str or tuple with a shorthand spec)
|
207
|
+
approval: Tool use approval policies.
|
208
|
+
Either a path to an approval policy config file or a list of approval policies. Defaults to no approval policy.
|
209
|
+
epochs: Epochs to repeat samples for and optional score
|
214
210
|
reducer function(s) used to combine sample scores (defaults to "mean")
|
215
|
-
fail_on_error
|
211
|
+
fail_on_error: `True` to fail on first sample error
|
216
212
|
(default); `False` to never fail on sample errors; Value between 0 and 1
|
217
213
|
to fail if a proportion of total samples fails. Value greater than 1 to fail
|
218
214
|
eval if a count of samples fails.
|
219
|
-
message_limit
|
220
|
-
token_limit
|
215
|
+
message_limit: Limit on total messages used for each sample.
|
216
|
+
token_limit: Limit on total tokens used for each sample.
|
221
217
|
time_limit: Limit on clock time (in seconds) for samples.
|
222
|
-
working_limit: Limit on
|
218
|
+
working_limit: Limit on working time (in seconds) for sample. Working
|
223
219
|
time includes model generation, tool calls, etc. but does not include
|
224
220
|
time spent waiting on retries or shared resources.
|
225
|
-
name:
|
221
|
+
name: Task name. If not specified is automatically
|
226
222
|
determined based on the name of the task directory (or "task")
|
227
223
|
if its anonymous task (e.g. created in a notebook and passed to
|
228
224
|
eval() directly)
|
229
|
-
version:
|
225
|
+
version: Version of task (to distinguish evolutions
|
230
226
|
of the task spec or breaking changes to it)
|
231
|
-
metadata:
|
227
|
+
metadata: Additional metadata to associate with the task.
|
232
228
|
|
233
229
|
Returns:
|
234
230
|
Task: Task adapted with alternate options.
|
@@ -248,6 +244,8 @@ def task_with(
|
|
248
244
|
task.scorer = resolve_scorer(scorer)
|
249
245
|
if not isinstance(metrics, NotGiven):
|
250
246
|
task.metrics = metrics
|
247
|
+
if not isinstance(model, NotGiven):
|
248
|
+
task.model = resolve_model(model)
|
251
249
|
if not isinstance(config, NotGiven):
|
252
250
|
task.config = config
|
253
251
|
if not isinstance(sandbox, NotGiven):
|
@@ -307,34 +305,10 @@ class PreviousTask:
|
|
307
305
|
id: str
|
308
306
|
task: str | Task
|
309
307
|
task_args: dict[str, Any]
|
308
|
+
model: Model | None
|
310
309
|
log: EvalLog
|
311
310
|
|
312
311
|
|
313
|
-
Tasks = (
|
314
|
-
str
|
315
|
-
| PreviousTask
|
316
|
-
| TaskInfo
|
317
|
-
| Task
|
318
|
-
| Callable[..., Task]
|
319
|
-
| type[Task]
|
320
|
-
| list[str]
|
321
|
-
| list[PreviousTask]
|
322
|
-
| list[TaskInfo]
|
323
|
-
| list[Task]
|
324
|
-
| list[Callable[..., Task]]
|
325
|
-
| list[type[Task]]
|
326
|
-
| None
|
327
|
-
)
|
328
|
-
r"""One or more tasks.
|
329
|
-
|
330
|
-
Tasks to be evaluated. Many forms of task specification are
|
331
|
-
supported including directory names, task functions, task
|
332
|
-
classes, and task instances (a single task or list of tasks
|
333
|
-
can be specified). None is a request to read a task out
|
334
|
-
of the current working directory.
|
335
|
-
"""
|
336
|
-
|
337
|
-
|
338
312
|
def resolve_approval(
|
339
313
|
approval: str | list[ApprovalPolicy] | None,
|
340
314
|
) -> list[ApprovalPolicy] | None:
|
@@ -370,6 +344,13 @@ def resolve_solver(solver: Solver | list[Solver]) -> Solver:
|
|
370
344
|
return chain(solver) if isinstance(solver, list) else solver
|
371
345
|
|
372
346
|
|
347
|
+
def resolve_model(model: str | Model | None) -> Model | None:
|
348
|
+
if isinstance(model, str):
|
349
|
+
return get_model(model)
|
350
|
+
else:
|
351
|
+
return model
|
352
|
+
|
353
|
+
|
373
354
|
def resolve_scorer(scorer: Scorer | list[Scorer] | None) -> list[Scorer] | None:
|
374
355
|
return (
|
375
356
|
scorer if isinstance(scorer, list) else [scorer] if scorer is not None else None
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from typing import Callable, TypeAlias
|
2
|
+
|
3
|
+
from .resolved import ResolvedTask
|
4
|
+
from .task import PreviousTask, Task, TaskInfo
|
5
|
+
|
6
|
+
Tasks: TypeAlias = (
|
7
|
+
str
|
8
|
+
| PreviousTask
|
9
|
+
| ResolvedTask
|
10
|
+
| TaskInfo
|
11
|
+
| Task
|
12
|
+
| Callable[..., Task]
|
13
|
+
| type[Task]
|
14
|
+
| list[str]
|
15
|
+
| list[PreviousTask]
|
16
|
+
| list[ResolvedTask]
|
17
|
+
| list[TaskInfo]
|
18
|
+
| list[Task]
|
19
|
+
| list[Callable[..., Task]]
|
20
|
+
| list[type[Task]]
|
21
|
+
| None
|
22
|
+
)
|
23
|
+
r"""One or more tasks.
|
24
|
+
|
25
|
+
Tasks to be evaluated. Many forms of task specification are
|
26
|
+
supported including directory names, task functions, task
|
27
|
+
classes, and task instances (a single task or list of tasks
|
28
|
+
can be specified). None is a request to read a task out
|
29
|
+
of the current working directory.
|
30
|
+
"""
|
inspect_ai/_util/constants.py
CHANGED
inspect_ai/_util/dotenv.py
CHANGED
@@ -52,6 +52,9 @@ def init_dotenv() -> None:
|
|
52
52
|
if inspect_log_dir:
|
53
53
|
os.environ[INSPECT_LOG_DIR_VAR] = inspect_log_dir
|
54
54
|
|
55
|
+
# re-apply any env vars specified at the cli w/ --env
|
56
|
+
apply_cli_env()
|
57
|
+
|
55
58
|
|
56
59
|
@contextlib.contextmanager
|
57
60
|
def dotenv_environ(
|
@@ -76,3 +79,17 @@ def dotenv_environ(
|
|
76
79
|
finally:
|
77
80
|
os.environ.update(update_after)
|
78
81
|
[os.environ.pop(k) for k in remove_after]
|
82
|
+
|
83
|
+
|
84
|
+
_cli_env: dict[str, Any] = {}
|
85
|
+
|
86
|
+
|
87
|
+
def init_cli_env(env: dict[str, Any]) -> None:
|
88
|
+
global _cli_env
|
89
|
+
_cli_env = env
|
90
|
+
apply_cli_env()
|
91
|
+
|
92
|
+
|
93
|
+
def apply_cli_env() -> None:
|
94
|
+
for var, value in _cli_env.items():
|
95
|
+
os.environ[var] = str(value)
|
inspect_ai/_util/registry.py
CHANGED
@@ -5,6 +5,7 @@ from typing import Any, Callable, Literal, TypedDict, TypeGuard, cast
|
|
5
5
|
from pydantic import BaseModel, Field
|
6
6
|
from pydantic_core import to_jsonable_python
|
7
7
|
|
8
|
+
from inspect_ai._util.json import jsonable_python
|
8
9
|
from inspect_ai._util.package import get_installed_package_name
|
9
10
|
|
10
11
|
from .constants import PKG_NAME
|
@@ -198,13 +199,15 @@ def registry_create(type: RegistryType, name: str, **kwargs: Any) -> object:
|
|
198
199
|
def with_registry_info(o: object) -> object:
|
199
200
|
return set_registry_info(o, registry_info(obj))
|
200
201
|
|
201
|
-
# instantiate registry objects
|
202
|
+
# instantiate registry and model objects
|
202
203
|
for param in kwargs.keys():
|
203
204
|
value = kwargs[param]
|
204
205
|
if is_registry_dict(value):
|
205
206
|
kwargs[param] = registry_create(
|
206
207
|
value["type"], value["name"], **value["params"]
|
207
208
|
)
|
209
|
+
elif is_model_dict(value):
|
210
|
+
kwargs[param] = model_create_from_dict(value)
|
208
211
|
|
209
212
|
if isclass(obj):
|
210
213
|
return with_registry_info(obj(**kwargs))
|
@@ -380,6 +383,8 @@ def is_registry_dict(o: object) -> TypeGuard[RegistryDict]:
|
|
380
383
|
|
381
384
|
|
382
385
|
def registry_value(o: object) -> Any:
|
386
|
+
from inspect_ai.model._model import Model
|
387
|
+
|
383
388
|
# treat tuple as list
|
384
389
|
if isinstance(o, tuple):
|
385
390
|
o = list(o)
|
@@ -390,14 +395,50 @@ def registry_value(o: object) -> Any:
|
|
390
395
|
elif isinstance(o, dict):
|
391
396
|
return {k: registry_value(v) for k, v in o.items()}
|
392
397
|
elif has_registry_params(o):
|
393
|
-
return
|
398
|
+
return RegistryDict(
|
394
399
|
type=registry_info(o).type,
|
395
400
|
name=registry_log_name(o),
|
396
401
|
params=registry_params(o),
|
397
402
|
)
|
403
|
+
elif isinstance(o, Model):
|
404
|
+
return ModelDict(
|
405
|
+
model=str(o),
|
406
|
+
config=jsonable_python(o.config),
|
407
|
+
base_url=o.api.base_url,
|
408
|
+
model_args=o.model_args,
|
409
|
+
)
|
398
410
|
else:
|
399
411
|
return o
|
400
412
|
|
401
413
|
|
402
414
|
def registry_create_from_dict(d: RegistryDict) -> object:
|
403
415
|
return registry_create(d["type"], d["name"], **d["params"])
|
416
|
+
|
417
|
+
|
418
|
+
class ModelDict(TypedDict):
|
419
|
+
model: str
|
420
|
+
config: dict[str, Any]
|
421
|
+
base_url: str | None
|
422
|
+
model_args: dict[str, Any]
|
423
|
+
|
424
|
+
|
425
|
+
def is_model_dict(o: object) -> TypeGuard[ModelDict]:
|
426
|
+
return (
|
427
|
+
isinstance(o, dict)
|
428
|
+
and "model" in o
|
429
|
+
and "config" in o
|
430
|
+
and "base_url" in o
|
431
|
+
and "model_args" in o
|
432
|
+
)
|
433
|
+
|
434
|
+
|
435
|
+
def model_create_from_dict(d: ModelDict) -> object:
|
436
|
+
from inspect_ai.model._generate_config import GenerateConfig
|
437
|
+
from inspect_ai.model._model import get_model
|
438
|
+
|
439
|
+
return get_model(
|
440
|
+
d["model"],
|
441
|
+
config=GenerateConfig(**d["config"]),
|
442
|
+
base_url=d["base_url"],
|
443
|
+
**d["model_args"],
|
444
|
+
)
|
inspect_ai/_view/server.py
CHANGED
@@ -57,8 +57,7 @@ def view_server(
|
|
57
57
|
@routes.get("/api/logs/{log}")
|
58
58
|
async def api_log(request: web.Request) -> web.Response:
|
59
59
|
# log file requested
|
60
|
-
file = request.match_info["log"]
|
61
|
-
file = urllib.parse.unquote(file)
|
60
|
+
file = normalize_uri(request.match_info["log"])
|
62
61
|
validate_log_file_request(file)
|
63
62
|
|
64
63
|
# header_only is based on a size threshold
|
@@ -68,8 +67,7 @@ def view_server(
|
|
68
67
|
@routes.get("/api/log-size/{log}")
|
69
68
|
async def api_log_size(request: web.Request) -> web.Response:
|
70
69
|
# log file requested
|
71
|
-
file = request.match_info["log"]
|
72
|
-
file = urllib.parse.unquote(file)
|
70
|
+
file = normalize_uri(request.match_info["log"])
|
73
71
|
validate_log_file_request(file)
|
74
72
|
|
75
73
|
return await log_size_response(file)
|
@@ -77,8 +75,7 @@ def view_server(
|
|
77
75
|
@routes.get("/api/log-delete/{log}")
|
78
76
|
async def api_log_delete(request: web.Request) -> web.Response:
|
79
77
|
# log file requested
|
80
|
-
file = request.match_info["log"]
|
81
|
-
file = urllib.parse.unquote(file)
|
78
|
+
file = normalize_uri(request.match_info["log"])
|
82
79
|
validate_log_file_request(file)
|
83
80
|
|
84
81
|
return await log_delete_response(file)
|
@@ -86,8 +83,7 @@ def view_server(
|
|
86
83
|
@routes.get("/api/log-bytes/{log}")
|
87
84
|
async def api_log_bytes(request: web.Request) -> web.Response:
|
88
85
|
# log file requested
|
89
|
-
file = request.match_info["log"]
|
90
|
-
file = urllib.parse.unquote(file)
|
86
|
+
file = normalize_uri(request.match_info["log"])
|
91
87
|
validate_log_file_request(file)
|
92
88
|
|
93
89
|
# header_only is based on a size threshold
|
@@ -106,7 +102,7 @@ def view_server(
|
|
106
102
|
if authorization:
|
107
103
|
request_log_dir = request.query.getone("log_dir", None)
|
108
104
|
if request_log_dir:
|
109
|
-
request_log_dir =
|
105
|
+
request_log_dir = normalize_uri(request_log_dir)
|
110
106
|
else:
|
111
107
|
request_log_dir = log_dir
|
112
108
|
else:
|
@@ -121,7 +117,7 @@ def view_server(
|
|
121
117
|
@routes.get("/api/log-headers")
|
122
118
|
async def api_log_headers(request: web.Request) -> web.Response:
|
123
119
|
files = request.query.getall("file", [])
|
124
|
-
files = [
|
120
|
+
files = [normalize_uri(file) for file in files]
|
125
121
|
map(validate_log_file_request, files)
|
126
122
|
return await log_headers_response(files)
|
127
123
|
|
@@ -166,6 +162,28 @@ def view_server(
|
|
166
162
|
)
|
167
163
|
|
168
164
|
|
165
|
+
def normalize_uri(uri: str) -> str:
|
166
|
+
"""Normalize incoming URIs to a consistent format."""
|
167
|
+
# Decode any URL-encoded characters
|
168
|
+
parsed = urllib.parse.urlparse(urllib.parse.unquote(uri))
|
169
|
+
|
170
|
+
if parsed.scheme != "file":
|
171
|
+
# If this isn't a file uri, just unquote it
|
172
|
+
return urllib.parse.unquote(uri)
|
173
|
+
|
174
|
+
else:
|
175
|
+
# If this is a file uri, see whether we should process triple slashes
|
176
|
+
# down to double slashes
|
177
|
+
path = parsed.path
|
178
|
+
|
179
|
+
# Detect and normalize Windows-style file URIs
|
180
|
+
if path.startswith("/") and len(path) > 3 and path[2] == ":":
|
181
|
+
# Strip leading `/` before drive letter
|
182
|
+
path = path[1:]
|
183
|
+
|
184
|
+
return f"file://{path}"
|
185
|
+
|
186
|
+
|
169
187
|
def log_listing_response(logs: list[EvalLogInfo], log_dir: str) -> web.Response:
|
170
188
|
response = dict(
|
171
189
|
log_dir=aliased_path(log_dir),
|
@@ -16346,7 +16346,7 @@ ul.jsondiffpatch-textdiff {
|
|
16346
16346
|
column-gap: 0.5em;
|
16347
16347
|
min-width: 200px;
|
16348
16348
|
}
|
16349
|
-
.
|
16349
|
+
._flatBody_1uw6w_1 {
|
16350
16350
|
color: var(--bs-danger);
|
16351
16351
|
display: grid;
|
16352
16352
|
grid-template-columns: max-content max-content;
|
@@ -16354,16 +16354,17 @@ ul.jsondiffpatch-textdiff {
|
|
16354
16354
|
margin-top: 0.4rem;
|
16355
16355
|
}
|
16356
16356
|
|
16357
|
-
.
|
16357
|
+
._iconSmall_1uw6w_9 {
|
16358
16358
|
font-size: var(--inspect-font-size-small);
|
16359
16359
|
line-height: var(--inspect-font-size-small);
|
16360
16360
|
height: var(--inspect-font-size-small);
|
16361
16361
|
}
|
16362
16362
|
|
16363
|
-
.
|
16363
|
+
._lineBase_1uw6w_15 {
|
16364
16364
|
font-size: var(--inspect-font-size-base);
|
16365
16365
|
line-height: var(--inspect-font-size-base);
|
16366
16366
|
height: var(--inspect-font-size-base);
|
16367
|
+
max-width: 30em;
|
16367
16368
|
}
|
16368
16369
|
._target_9qy4e_1 {
|
16369
16370
|
padding-left: 0;
|