inspect-ai 0.3.48__py3-none-any.whl → 0.3.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. inspect_ai/_cli/info.py +2 -2
  2. inspect_ai/_cli/log.py +2 -2
  3. inspect_ai/_cli/score.py +2 -2
  4. inspect_ai/_display/core/display.py +19 -0
  5. inspect_ai/_display/core/panel.py +37 -7
  6. inspect_ai/_display/core/progress.py +29 -2
  7. inspect_ai/_display/core/results.py +79 -40
  8. inspect_ai/_display/core/textual.py +21 -0
  9. inspect_ai/_display/rich/display.py +28 -8
  10. inspect_ai/_display/textual/app.py +112 -3
  11. inspect_ai/_display/textual/display.py +1 -1
  12. inspect_ai/_display/textual/widgets/samples.py +132 -91
  13. inspect_ai/_display/textual/widgets/task_detail.py +232 -0
  14. inspect_ai/_display/textual/widgets/tasks.py +74 -6
  15. inspect_ai/_display/textual/widgets/toggle.py +32 -0
  16. inspect_ai/_eval/context.py +2 -0
  17. inspect_ai/_eval/eval.py +4 -3
  18. inspect_ai/_eval/loader.py +1 -1
  19. inspect_ai/_eval/run.py +35 -2
  20. inspect_ai/_eval/task/log.py +13 -11
  21. inspect_ai/_eval/task/results.py +12 -3
  22. inspect_ai/_eval/task/run.py +139 -36
  23. inspect_ai/_eval/task/sandbox.py +2 -1
  24. inspect_ai/_util/_async.py +30 -1
  25. inspect_ai/_util/file.py +47 -5
  26. inspect_ai/_util/html.py +3 -0
  27. inspect_ai/_util/logger.py +6 -5
  28. inspect_ai/_util/platform.py +5 -6
  29. inspect_ai/_util/registry.py +1 -1
  30. inspect_ai/_view/server.py +9 -9
  31. inspect_ai/_view/www/App.css +2 -2
  32. inspect_ai/_view/www/dist/assets/index.css +2 -2
  33. inspect_ai/_view/www/dist/assets/index.js +395 -307
  34. inspect_ai/_view/www/log-schema.json +13 -0
  35. inspect_ai/_view/www/package.json +1 -0
  36. inspect_ai/_view/www/src/components/MessageBand.mjs +1 -1
  37. inspect_ai/_view/www/src/components/Tools.mjs +27 -16
  38. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -3
  39. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +52 -77
  40. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -13
  41. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +40 -18
  42. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +15 -2
  43. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +4 -2
  44. inspect_ai/_view/www/src/types/log.d.ts +2 -0
  45. inspect_ai/_view/www/src/utils/debugging.mjs +23 -0
  46. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +2 -0
  47. inspect_ai/_view/www/yarn.lock +9 -4
  48. inspect_ai/approval/__init__.py +1 -1
  49. inspect_ai/approval/_human/approver.py +35 -0
  50. inspect_ai/approval/_human/console.py +62 -0
  51. inspect_ai/approval/_human/manager.py +108 -0
  52. inspect_ai/approval/_human/panel.py +233 -0
  53. inspect_ai/approval/_human/util.py +51 -0
  54. inspect_ai/dataset/_sources/hf.py +2 -2
  55. inspect_ai/dataset/_sources/util.py +1 -1
  56. inspect_ai/log/_file.py +106 -36
  57. inspect_ai/log/_recorders/eval.py +226 -158
  58. inspect_ai/log/_recorders/file.py +9 -6
  59. inspect_ai/log/_recorders/json.py +35 -12
  60. inspect_ai/log/_recorders/recorder.py +15 -15
  61. inspect_ai/log/_samples.py +52 -0
  62. inspect_ai/model/_model.py +14 -0
  63. inspect_ai/model/_model_output.py +4 -0
  64. inspect_ai/model/_providers/azureai.py +1 -1
  65. inspect_ai/model/_providers/hf.py +106 -4
  66. inspect_ai/model/_providers/util/__init__.py +2 -0
  67. inspect_ai/model/_providers/util/hf_handler.py +200 -0
  68. inspect_ai/scorer/_common.py +1 -1
  69. inspect_ai/solver/_plan.py +0 -8
  70. inspect_ai/solver/_task_state.py +18 -1
  71. inspect_ai/solver/_use_tools.py +9 -1
  72. inspect_ai/tool/_tool_call.py +1 -1
  73. inspect_ai/tool/_tool_def.py +2 -2
  74. inspect_ai/tool/_tool_info.py +14 -2
  75. inspect_ai/tool/_tool_params.py +2 -1
  76. inspect_ai/tool/_tools/_execute.py +1 -1
  77. inspect_ai/tool/_tools/_web_browser/_web_browser.py +6 -0
  78. inspect_ai/util/__init__.py +5 -6
  79. inspect_ai/util/_panel.py +91 -0
  80. inspect_ai/util/_sandbox/__init__.py +2 -6
  81. inspect_ai/util/_sandbox/context.py +4 -3
  82. inspect_ai/util/_sandbox/docker/compose.py +12 -2
  83. inspect_ai/util/_sandbox/docker/docker.py +19 -9
  84. inspect_ai/util/_sandbox/docker/util.py +10 -2
  85. inspect_ai/util/_sandbox/environment.py +47 -41
  86. inspect_ai/util/_sandbox/local.py +15 -10
  87. inspect_ai/util/_sandbox/self_check.py +6 -3
  88. inspect_ai/util/_subprocess.py +43 -3
  89. {inspect_ai-0.3.48.dist-info → inspect_ai-0.3.50.dist-info}/METADATA +2 -2
  90. {inspect_ai-0.3.48.dist-info → inspect_ai-0.3.50.dist-info}/RECORD +94 -85
  91. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
  92. inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
  93. inspect_ai/approval/_human.py +0 -123
  94. {inspect_ai-0.3.48.dist-info → inspect_ai-0.3.50.dist-info}/LICENSE +0 -0
  95. {inspect_ai-0.3.48.dist-info → inspect_ai-0.3.50.dist-info}/WHEEL +0 -0
  96. {inspect_ai-0.3.48.dist-info → inspect_ai-0.3.50.dist-info}/entry_points.txt +0 -0
  97. {inspect_ai-0.3.48.dist-info → inspect_ai-0.3.50.dist-info}/top_level.txt +0 -0
inspect_ai/log/_file.py CHANGED
@@ -1,17 +1,15 @@
1
- import asyncio
2
1
  import os
3
2
  import re
4
3
  from logging import getLogger
5
4
  from typing import Any, Callable, Generator, Literal, cast
6
5
 
7
- import fsspec # type: ignore
8
- from fsspec.asyn import AsyncFileSystem # type: ignore
9
- from fsspec.core import split_protocol # type: ignore
10
6
  from pydantic_core import to_json
11
7
 
8
+ from inspect_ai._util._async import run_coroutine
12
9
  from inspect_ai._util.constants import ALL_LOG_FORMATS, EVAL_LOG_FORMAT
13
10
  from inspect_ai._util.file import (
14
11
  FileInfo,
12
+ async_fileystem,
15
13
  file,
16
14
  filesystem,
17
15
  )
@@ -110,25 +108,25 @@ async def list_eval_logs_async(
110
108
  # async filesystem if we can
111
109
  fs = filesystem(log_dir, fs_options)
112
110
  if fs.is_async():
113
- async_fs = async_fileystem(log_dir, fs_options=fs_options)
114
- if await async_fs._exists(log_dir):
115
- # prevent caching of listings
116
- async_fs.invalidate_cache(log_dir)
117
- # list logs
118
- if recursive:
119
- files: list[dict[str, Any]] = []
120
- async for _, _, filenames in async_fs._walk(log_dir, detail=True):
121
- files.extend(filenames.values())
111
+ async with async_fileystem(log_dir, fs_options=fs_options) as async_fs:
112
+ if await async_fs._exists(log_dir):
113
+ # prevent caching of listings
114
+ async_fs.invalidate_cache(log_dir)
115
+ # list logs
116
+ if recursive:
117
+ files: list[dict[str, Any]] = []
118
+ async for _, _, filenames in async_fs._walk(log_dir, detail=True):
119
+ files.extend(filenames.values())
120
+ else:
121
+ files = cast(
122
+ list[dict[str, Any]],
123
+ await async_fs._ls(log_dir, detail=True),
124
+ )
125
+ logs = [fs._file_info(file) for file in files]
126
+ # resolve to eval logs
127
+ return log_files_from_ls(logs, formats, descending)
122
128
  else:
123
- files = cast(
124
- list[dict[str, Any]],
125
- async_fs._ls(log_dir, detail=True),
126
- )
127
- logs = [fs._file_info(file) for file in files]
128
- # resolve to eval logs
129
- return log_files_from_ls(logs, formats, descending)
130
- else:
131
- return []
129
+ return []
132
130
  else:
133
131
  return list_eval_logs(
134
132
  log_dir=log_dir,
@@ -146,6 +144,22 @@ def write_eval_log(
146
144
  ) -> None:
147
145
  """Write an evaluation log.
148
146
 
147
+ Args:
148
+ log (EvalLog): Evaluation log to write.
149
+ location (str | FileInfo): Location to write log to.
150
+ format (Literal["eval", "json", "auto"]): Write to format
151
+ (defaults to 'auto' based on `log_file` extension)
152
+ """
153
+ run_coroutine(write_eval_log_async(log, location, format))
154
+
155
+
156
+ async def write_eval_log_async(
157
+ log: EvalLog,
158
+ location: str | FileInfo | None = None,
159
+ format: Literal["eval", "json", "auto"] = "auto",
160
+ ) -> None:
161
+ """Write an evaluation log.
162
+
149
163
  Args:
150
164
  log (EvalLog): Evaluation log to write.
151
165
  location (str | FileInfo): Location to write log to.
@@ -169,7 +183,7 @@ def write_eval_log(
169
183
  recorder_type = recorder_type_for_location(location)
170
184
  else:
171
185
  recorder_type = recorder_type_for_format(format)
172
- recorder_type.write_log(location, log)
186
+ await recorder_type.write_log(location, log)
173
187
 
174
188
  logger.debug(f"Writing eval log to {location} completed")
175
189
 
@@ -224,6 +238,31 @@ def read_eval_log(
224
238
  ) -> EvalLog:
225
239
  """Read an evaluation log.
226
240
 
241
+ Args:
242
+ log_file (str | FileInfo): Log file to read.
243
+ header_only (bool): Read only the header (i.e. exclude
244
+ the "samples" and "logging" fields). Defaults to False.
245
+ resolve_attachments (bool): Resolve attachments (e.g. images)
246
+ to their full content.
247
+ format (Literal["eval", "json", "auto"]): Read from format
248
+ (defaults to 'auto' based on `log_file` extension)
249
+
250
+ Returns:
251
+ EvalLog object read from file.
252
+ """
253
+ return run_coroutine(
254
+ read_eval_log_async(log_file, header_only, resolve_attachments, format)
255
+ )
256
+
257
+
258
+ async def read_eval_log_async(
259
+ log_file: str | FileInfo,
260
+ header_only: bool = False,
261
+ resolve_attachments: bool = False,
262
+ format: Literal["eval", "json", "auto"] = "auto",
263
+ ) -> EvalLog:
264
+ """Read an evaluation log.
265
+
227
266
  Args:
228
267
  log_file (str | FileInfo): Log file to read.
229
268
  header_only (bool): Read only the header (i.e. exclude
@@ -245,7 +284,7 @@ def read_eval_log(
245
284
  recorder_type = recorder_type_for_location(log_file)
246
285
  else:
247
286
  recorder_type = recorder_type_for_format(format)
248
- log = recorder_type.read_log(log_file, header_only)
287
+ log = await recorder_type.read_log(log_file, header_only)
249
288
 
250
289
  # resolve attachement if requested
251
290
  if resolve_attachments and log.samples:
@@ -267,7 +306,15 @@ def read_eval_log(
267
306
  def read_eval_log_headers(
268
307
  log_files: list[str] | list[FileInfo] | list[EvalLogInfo],
269
308
  ) -> list[EvalLog]:
270
- return [read_eval_log(log_file, header_only=True) for log_file in log_files]
309
+ return run_coroutine(read_eval_log_headers_async(log_files))
310
+
311
+
312
+ async def read_eval_log_headers_async(
313
+ log_files: list[str] | list[FileInfo] | list[EvalLogInfo],
314
+ ) -> list[EvalLog]:
315
+ return [
316
+ await read_eval_log_async(log_file, header_only=True) for log_file in log_files
317
+ ]
271
318
 
272
319
 
273
320
  def read_eval_log_sample(
@@ -279,6 +326,35 @@ def read_eval_log_sample(
279
326
  ) -> EvalSample:
280
327
  """Read a sample from an evaluation log.
281
328
 
329
+ Args:
330
+ log_file (str | FileInfo): Log file to read.
331
+ id (int | str): Sample id to read.
332
+ epoch (int): Epoch for sample id (defaults to 1)
333
+ resolve_attachments (bool): Resolve attachments (e.g. images)
334
+ to their full content.
335
+ format (Literal["eval", "json", "auto"]): Read from format
336
+ (defaults to 'auto' based on `log_file` extension)
337
+
338
+ Returns:
339
+ EvalSample object read from file.
340
+
341
+ Raises:
342
+ IndexError: If the passed id and epoch are not found.
343
+ """
344
+ return run_coroutine(
345
+ read_eval_log_sample_async(log_file, id, epoch, resolve_attachments, format)
346
+ )
347
+
348
+
349
+ async def read_eval_log_sample_async(
350
+ log_file: str | FileInfo,
351
+ id: int | str,
352
+ epoch: int = 1,
353
+ resolve_attachments: bool = False,
354
+ format: Literal["eval", "json", "auto"] = "auto",
355
+ ) -> EvalSample:
356
+ """Read a sample from an evaluation log.
357
+
282
358
  Args:
283
359
  log_file (str | FileInfo): Log file to read.
284
360
  id (int | str): Sample id to read.
@@ -301,7 +377,7 @@ def read_eval_log_sample(
301
377
  recorder_type = recorder_type_for_location(log_file)
302
378
  else:
303
379
  recorder_type = recorder_type_for_format(format)
304
- sample = recorder_type.read_log_sample(log_file, id, epoch)
380
+ sample = await recorder_type.read_log_sample(log_file, id, epoch)
305
381
 
306
382
  if resolve_attachments:
307
383
  sample = resolve_sample_attachments(sample)
@@ -442,7 +518,7 @@ def log_file_info(info: FileInfo) -> "EvalLogInfo":
442
518
  )
443
519
 
444
520
 
445
- def eval_log_json(log: EvalLog) -> str:
521
+ def eval_log_json(log: EvalLog) -> bytes:
446
522
  # serialize to json (ignore values that are unserializable)
447
523
  # these values often result from solvers using metadata to
448
524
  # pass around 'live' objects -- this is fine to do and we
@@ -452,14 +528,8 @@ def eval_log_json(log: EvalLog) -> str:
452
528
  indent=2,
453
529
  exclude_none=True,
454
530
  fallback=lambda _x: None,
455
- ).decode()
531
+ )
456
532
 
457
533
 
458
- def async_fileystem(log_file: str, fs_options: dict[str, Any] = {}) -> AsyncFileSystem:
459
- # determine protocol
460
- protocol, _ = split_protocol(log_file)
461
- protocol = protocol or "file"
462
- # create filesystem
463
- fs_options = fs_options.copy()
464
- fs_options.update({"asynchronous": True, "loop": asyncio.get_event_loop()})
465
- return fsspec.filesystem(protocol, **fs_options)
534
+ def eval_log_json_str(log: EvalLog) -> str:
535
+ return eval_log_json(log).decode()