inspect-ai 0.3.94__py3-none-any.whl → 0.3.96__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. inspect_ai/_eval/loader.py +1 -1
  2. inspect_ai/_eval/task/run.py +12 -6
  3. inspect_ai/_util/exception.py +4 -0
  4. inspect_ai/_util/hash.py +39 -0
  5. inspect_ai/_util/local_server.py +16 -0
  6. inspect_ai/_util/path.py +22 -0
  7. inspect_ai/_util/trace.py +1 -1
  8. inspect_ai/_util/working.py +4 -0
  9. inspect_ai/_view/www/dist/assets/index.css +9 -9
  10. inspect_ai/_view/www/dist/assets/index.js +117 -120
  11. inspect_ai/_view/www/package.json +1 -1
  12. inspect_ai/_view/www/src/app/log-view/navbar/SecondaryBar.tsx +2 -2
  13. inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +1 -4
  14. inspect_ai/_view/www/src/app/samples/SamplesTools.tsx +3 -13
  15. inspect_ai/_view/www/src/app/samples/sample-tools/SelectScorer.tsx +45 -48
  16. inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +16 -15
  17. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +47 -75
  18. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +9 -9
  19. inspect_ai/_view/www/src/app/types.ts +12 -2
  20. inspect_ai/_view/www/src/components/ExpandablePanel.module.css +1 -1
  21. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +5 -5
  22. inspect_ai/_view/www/src/state/hooks.ts +19 -3
  23. inspect_ai/_view/www/src/state/logSlice.ts +23 -5
  24. inspect_ai/_view/www/yarn.lock +9 -9
  25. inspect_ai/agent/_bridge/patch.py +1 -3
  26. inspect_ai/agent/_types.py +1 -1
  27. inspect_ai/analysis/__init__.py +0 -0
  28. inspect_ai/analysis/beta/__init__.py +67 -0
  29. inspect_ai/analysis/beta/_dataframe/__init__.py +0 -0
  30. inspect_ai/analysis/beta/_dataframe/columns.py +145 -0
  31. inspect_ai/analysis/beta/_dataframe/evals/__init__.py +0 -0
  32. inspect_ai/analysis/beta/_dataframe/evals/columns.py +132 -0
  33. inspect_ai/analysis/beta/_dataframe/evals/extract.py +23 -0
  34. inspect_ai/analysis/beta/_dataframe/evals/table.py +177 -0
  35. inspect_ai/analysis/beta/_dataframe/events/__init__.py +0 -0
  36. inspect_ai/analysis/beta/_dataframe/events/columns.py +87 -0
  37. inspect_ai/analysis/beta/_dataframe/events/extract.py +26 -0
  38. inspect_ai/analysis/beta/_dataframe/events/table.py +100 -0
  39. inspect_ai/analysis/beta/_dataframe/extract.py +73 -0
  40. inspect_ai/analysis/beta/_dataframe/messages/__init__.py +0 -0
  41. inspect_ai/analysis/beta/_dataframe/messages/columns.py +60 -0
  42. inspect_ai/analysis/beta/_dataframe/messages/extract.py +21 -0
  43. inspect_ai/analysis/beta/_dataframe/messages/table.py +79 -0
  44. inspect_ai/analysis/beta/_dataframe/progress.py +26 -0
  45. inspect_ai/analysis/beta/_dataframe/record.py +377 -0
  46. inspect_ai/analysis/beta/_dataframe/samples/__init__.py +0 -0
  47. inspect_ai/analysis/beta/_dataframe/samples/columns.py +77 -0
  48. inspect_ai/analysis/beta/_dataframe/samples/extract.py +54 -0
  49. inspect_ai/analysis/beta/_dataframe/samples/table.py +370 -0
  50. inspect_ai/analysis/beta/_dataframe/util.py +160 -0
  51. inspect_ai/analysis/beta/_dataframe/validate.py +171 -0
  52. inspect_ai/log/_file.py +10 -3
  53. inspect_ai/log/_log.py +21 -1
  54. inspect_ai/model/_call_tools.py +2 -1
  55. inspect_ai/model/_model.py +6 -4
  56. inspect_ai/model/_openai_responses.py +17 -18
  57. inspect_ai/model/_providers/anthropic.py +30 -5
  58. inspect_ai/model/_providers/providers.py +1 -1
  59. inspect_ai/solver/_multiple_choice.py +4 -1
  60. inspect_ai/solver/_task_state.py +8 -4
  61. inspect_ai/tool/_mcp/_context.py +3 -5
  62. inspect_ai/tool/_mcp/_sandbox.py +17 -14
  63. inspect_ai/tool/_mcp/server.py +1 -1
  64. inspect_ai/tool/_tools/_think.py +1 -1
  65. inspect_ai/tool/_tools/_web_search/__init__.py +3 -0
  66. inspect_ai/tool/_tools/{_web_search.py → _web_search/_google.py} +56 -103
  67. inspect_ai/tool/_tools/_web_search/_tavily.py +77 -0
  68. inspect_ai/tool/_tools/_web_search/_web_search.py +85 -0
  69. inspect_ai/util/_sandbox/events.py +3 -2
  70. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/METADATA +9 -2
  71. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/RECORD +75 -46
  72. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/WHEEL +1 -1
  73. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/entry_points.txt +0 -0
  74. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/licenses/LICENSE +0 -0
  75. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/top_level.txt +0 -0
@@ -428,7 +428,7 @@ def solver_from_spec(spec: SolverSpec) -> Solver:
428
428
  return as_solver(agent)
429
429
  else:
430
430
  raise ValueError(
431
- f"Unkonwn solver {solver_name} (not registered as a @solver or @agent)"
431
+ f"Unknown solver {solver_name} (not registered as a @solver or @agent)"
432
432
  )
433
433
 
434
434
  # we do have a solver file
@@ -27,6 +27,7 @@ from inspect_ai._util.constants import (
27
27
  )
28
28
  from inspect_ai._util.datetime import iso_now
29
29
  from inspect_ai._util.error import exception_message
30
+ from inspect_ai._util.exception import TerminateSampleError
30
31
  from inspect_ai._util.hooks import send_telemetry
31
32
  from inspect_ai._util.json import to_json_str_safe
32
33
  from inspect_ai._util.registry import (
@@ -35,6 +36,7 @@ from inspect_ai._util.registry import (
35
36
  registry_unqualified_name,
36
37
  )
37
38
  from inspect_ai._util.working import (
39
+ end_sample_working_limit,
38
40
  init_sample_working_limit,
39
41
  sample_waiting_time,
40
42
  )
@@ -639,10 +641,11 @@ async def task_run_sample(
639
641
  ) = contextlib.nullcontext()
640
642
  try:
641
643
  # update active sample wth sandboxes now that we are initialised
642
- active.sandboxes = await sandbox_connections()
643
-
644
- # end init
645
- await init_span.__aexit__(None, None, None)
644
+ # (ensure that we still exit init context in presence of sandbox error)
645
+ try:
646
+ active.sandboxes = await sandbox_connections()
647
+ finally:
648
+ await init_span.__aexit__(None, None, None)
646
649
 
647
650
  # initialise timeout context manager
648
651
  timeout_cm = (
@@ -674,6 +677,9 @@ async def task_run_sample(
674
677
  # set progress for plan then run it
675
678
  state = await plan(state, generate)
676
679
 
680
+ # disable sample working limit after execution
681
+ end_sample_working_limit()
682
+
677
683
  except TimeoutError:
678
684
  if time_limit is not None:
679
685
  transcript()._event(
@@ -715,7 +721,7 @@ async def task_run_sample(
715
721
  # handle the cancel exception
716
722
  raise
717
723
 
718
- except LimitExceededError:
724
+ except (LimitExceededError, TerminateSampleError):
719
725
  # capture most recent state for scoring
720
726
  state = sample_state() or state
721
727
 
@@ -925,7 +931,7 @@ async def log_sample(
925
931
  input=sample.input,
926
932
  choices=sample.choices,
927
933
  target=sample.target,
928
- metadata=sample.metadata or {},
934
+ metadata=state.metadata or {},
929
935
  sandbox=sample.sandbox,
930
936
  files=list(sample.files.keys()) if sample.files else None,
931
937
  setup=sample.setup,
@@ -0,0 +1,4 @@
1
+ class TerminateSampleError(RuntimeError):
2
+ def __init__(self, reason: str) -> None:
3
+ self.reason = reason
4
+ super().__init__(reason)
inspect_ai/_util/hash.py CHANGED
@@ -1,3 +1,5 @@
1
+ import hashlib
2
+
1
3
  import mmh3
2
4
 
3
5
 
@@ -7,3 +9,40 @@ def mm3_hash(message: str) -> str:
7
9
 
8
10
  # Convert to unsigned integers and then to hexadecimal
9
11
  return f"{h1 & 0xFFFFFFFFFFFFFFFF:016x}{h2 & 0xFFFFFFFFFFFFFFFF:016x}"
12
+
13
+
14
+ def base57_id_hash(content: str) -> str:
15
+ """Generate base67 hash for content.
16
+
17
+ Hash the content, truncate to 128 bits, and then further truncate to 93 bits,
18
+ returning a 22-character Base-57-URL string. Collision probability reaches 50%
19
+ at approximately 70 trillion items.
20
+ """
21
+ digest_size = 16 # 128 bits
22
+ digest = hashlib.blake2s(content.encode(), digest_size=digest_size).digest()
23
+
24
+ # Truncate to ~93 bits (log₂57^22 ≈ 128.3)
25
+ as_int = int.from_bytes(digest, "big")
26
+ base57_str = to_base57(as_int)
27
+ if len(base57_str) > 22:
28
+ return base57_str[-22:] # Take last 22 chars if longer
29
+ else:
30
+ # This is unlikely with a 128-bit input
31
+ return base57_str.rjust(22, ALPHABET57[0])
32
+
33
+
34
+ # shortuuid uses these 57 characters (excluding similar-looking characters like 0/O, 1/I/l, etc.)
35
+ ALPHABET57 = "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
36
+
37
+
38
+ def to_base57(n: int) -> str:
39
+ if n == 0:
40
+ return ALPHABET57[0]
41
+
42
+ out = []
43
+ while n:
44
+ n, rem = divmod(n, 57)
45
+ out.append(ALPHABET57[rem])
46
+
47
+ # reverse and return
48
+ return "".join(reversed(out))
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  import os
4
+ import platform
4
5
  import random
5
6
  import socket
6
7
  import subprocess
@@ -33,6 +34,21 @@ def reserve_port(
33
34
  Returns:
34
35
  A tuple (port, lock_socket) where `lock_socket` is kept open to hold the lock.
35
36
  """
37
+ is_macos = platform.system() == "Darwin"
38
+
39
+ if is_macos:
40
+ logger.info(
41
+ "MacOS system detected. A free binding port will be identified, but not reserved until the server binds to it."
42
+ )
43
+ # On macOS, let the OS pick a free port but not open it
44
+ # It leads to a small racode condition window until the port
45
+ # is actually opened by the llm server
46
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
47
+ s.bind((host, 0)) # Bind to any free port
48
+ port = s.getsockname()[1]
49
+ return port, s
50
+
51
+ # Non-macOS behavior: try ports in range
36
52
  candidates = list(range(start, end))
37
53
  random.shuffle(candidates)
38
54
 
inspect_ai/_util/path.py CHANGED
@@ -6,6 +6,10 @@ from copy import deepcopy
6
6
  from pathlib import PurePath
7
7
  from typing import Any, Iterator, overload
8
8
 
9
+ from fsspec.implementations.local import LocalFileSystem # type: ignore
10
+
11
+ from inspect_ai._util.file import filesystem
12
+
9
13
 
10
14
  @contextmanager
11
15
  def add_to_path(p: str) -> Iterator[None]:
@@ -98,6 +102,24 @@ def cwd_relative_path(file: str | None, walk_up: bool = False) -> str | None:
98
102
  return None
99
103
 
100
104
 
105
+ def pretty_path(file: str) -> str:
106
+ fs = filesystem(file)
107
+ if fs.is_local():
108
+ file = LocalFileSystem._strip_protocol(file)
109
+ return cwd_relative_path(file)
110
+ else:
111
+ return file
112
+
113
+
114
+ def native_path(file: str) -> str:
115
+ fs = filesystem(file)
116
+ if fs.is_local():
117
+ file = LocalFileSystem._strip_protocol(file)
118
+ return file
119
+ else:
120
+ return file
121
+
122
+
101
123
  # A slightly modified implementation of task_path.relative(d, walk_up=True)
102
124
  # since that wasn't introduced until python 3.12
103
125
  def relative_walk(from_path: PurePath, to_path: PurePath) -> str:
inspect_ai/_util/trace.py CHANGED
@@ -287,7 +287,7 @@ def rotate_trace_files() -> None:
287
287
  rotate_files = list_trace_files()[10:]
288
288
  for file in rotate_files:
289
289
  file.file.unlink(missing_ok=True)
290
- except FileNotFoundError:
290
+ except (FileNotFoundError, OSError):
291
291
  pass
292
292
 
293
293
 
@@ -10,6 +10,10 @@ def init_sample_working_limit(start_time: float, working_limit: float | None) ->
10
10
  _sample_waiting_time.set(0)
11
11
 
12
12
 
13
+ def end_sample_working_limit() -> None:
14
+ _sample_working_limit.set(None)
15
+
16
+
13
17
  def sample_waiting_time() -> float:
14
18
  return _sample_waiting_time.get()
15
19
 
@@ -15489,34 +15489,34 @@ pre[class*="language-"] {
15489
15489
  padding: 0.1rem 0.6rem;
15490
15490
  border-radius: var(--bs-border-radius);
15491
15491
  }
15492
- ._expandableBordered_1wpxz_1 {
15492
+ ._expandableBordered_59eal_1 {
15493
15493
  border: solid var(--bs-light-border-subtle) 1px;
15494
15494
  }
15495
15495
 
15496
- ._expandableTogglable_1wpxz_5 {
15496
+ ._expandableTogglable_59eal_5 {
15497
15497
  margin-bottom: 1em;
15498
15498
  }
15499
15499
 
15500
- ._expandableContents_1wpxz_9 {
15500
+ ._expandableContents_59eal_9 {
15501
15501
  font-size: var(--inspect-font-size-base);
15502
15502
  }
15503
15503
 
15504
- ._expandableCollapsed_1wpxz_13 {
15504
+ ._expandableCollapsed_59eal_13 {
15505
15505
  overflow: hidden;
15506
15506
  }
15507
15507
 
15508
- ._moreToggle_1wpxz_17 {
15508
+ ._moreToggle_59eal_17 {
15509
15509
  display: flex;
15510
15510
  margin-top: 0;
15511
15511
  position: relative;
15512
- height: 8px;
15512
+ height: 18px;
15513
15513
  }
15514
15514
 
15515
- ._moreToggle_1wpxz_17._bordered_1wpxz_24 {
15515
+ ._moreToggle_59eal_17._bordered_59eal_24 {
15516
15516
  border-top: solid var(--bs-light-border-subtle) 1px;
15517
15517
  }
15518
15518
 
15519
- ._moreToggleContainer_1wpxz_28 {
15519
+ ._moreToggleContainer_59eal_28 {
15520
15520
  position: absolute;
15521
15521
  top: -1px;
15522
15522
  right: 0;
@@ -15527,7 +15527,7 @@ pre[class*="language-"] {
15527
15527
  margin-right: 0;
15528
15528
  }
15529
15529
 
15530
- ._moreToggleButton_1wpxz_39 {
15530
+ ._moreToggleButton_59eal_39 {
15531
15531
  font-size: var(--inspect-font-size-smaller);
15532
15532
  border: none;
15533
15533
  padding: 0.1rem 0.5rem;