inspect-ai 0.3.94__py3-none-any.whl → 0.3.96__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. inspect_ai/_eval/loader.py +1 -1
  2. inspect_ai/_eval/task/run.py +12 -6
  3. inspect_ai/_util/exception.py +4 -0
  4. inspect_ai/_util/hash.py +39 -0
  5. inspect_ai/_util/local_server.py +16 -0
  6. inspect_ai/_util/path.py +22 -0
  7. inspect_ai/_util/trace.py +1 -1
  8. inspect_ai/_util/working.py +4 -0
  9. inspect_ai/_view/www/dist/assets/index.css +9 -9
  10. inspect_ai/_view/www/dist/assets/index.js +117 -120
  11. inspect_ai/_view/www/package.json +1 -1
  12. inspect_ai/_view/www/src/app/log-view/navbar/SecondaryBar.tsx +2 -2
  13. inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +1 -4
  14. inspect_ai/_view/www/src/app/samples/SamplesTools.tsx +3 -13
  15. inspect_ai/_view/www/src/app/samples/sample-tools/SelectScorer.tsx +45 -48
  16. inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +16 -15
  17. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +47 -75
  18. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +9 -9
  19. inspect_ai/_view/www/src/app/types.ts +12 -2
  20. inspect_ai/_view/www/src/components/ExpandablePanel.module.css +1 -1
  21. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +5 -5
  22. inspect_ai/_view/www/src/state/hooks.ts +19 -3
  23. inspect_ai/_view/www/src/state/logSlice.ts +23 -5
  24. inspect_ai/_view/www/yarn.lock +9 -9
  25. inspect_ai/agent/_bridge/patch.py +1 -3
  26. inspect_ai/agent/_types.py +1 -1
  27. inspect_ai/analysis/__init__.py +0 -0
  28. inspect_ai/analysis/beta/__init__.py +67 -0
  29. inspect_ai/analysis/beta/_dataframe/__init__.py +0 -0
  30. inspect_ai/analysis/beta/_dataframe/columns.py +145 -0
  31. inspect_ai/analysis/beta/_dataframe/evals/__init__.py +0 -0
  32. inspect_ai/analysis/beta/_dataframe/evals/columns.py +132 -0
  33. inspect_ai/analysis/beta/_dataframe/evals/extract.py +23 -0
  34. inspect_ai/analysis/beta/_dataframe/evals/table.py +177 -0
  35. inspect_ai/analysis/beta/_dataframe/events/__init__.py +0 -0
  36. inspect_ai/analysis/beta/_dataframe/events/columns.py +87 -0
  37. inspect_ai/analysis/beta/_dataframe/events/extract.py +26 -0
  38. inspect_ai/analysis/beta/_dataframe/events/table.py +100 -0
  39. inspect_ai/analysis/beta/_dataframe/extract.py +73 -0
  40. inspect_ai/analysis/beta/_dataframe/messages/__init__.py +0 -0
  41. inspect_ai/analysis/beta/_dataframe/messages/columns.py +60 -0
  42. inspect_ai/analysis/beta/_dataframe/messages/extract.py +21 -0
  43. inspect_ai/analysis/beta/_dataframe/messages/table.py +79 -0
  44. inspect_ai/analysis/beta/_dataframe/progress.py +26 -0
  45. inspect_ai/analysis/beta/_dataframe/record.py +377 -0
  46. inspect_ai/analysis/beta/_dataframe/samples/__init__.py +0 -0
  47. inspect_ai/analysis/beta/_dataframe/samples/columns.py +77 -0
  48. inspect_ai/analysis/beta/_dataframe/samples/extract.py +54 -0
  49. inspect_ai/analysis/beta/_dataframe/samples/table.py +370 -0
  50. inspect_ai/analysis/beta/_dataframe/util.py +160 -0
  51. inspect_ai/analysis/beta/_dataframe/validate.py +171 -0
  52. inspect_ai/log/_file.py +10 -3
  53. inspect_ai/log/_log.py +21 -1
  54. inspect_ai/model/_call_tools.py +2 -1
  55. inspect_ai/model/_model.py +6 -4
  56. inspect_ai/model/_openai_responses.py +17 -18
  57. inspect_ai/model/_providers/anthropic.py +30 -5
  58. inspect_ai/model/_providers/providers.py +1 -1
  59. inspect_ai/solver/_multiple_choice.py +4 -1
  60. inspect_ai/solver/_task_state.py +8 -4
  61. inspect_ai/tool/_mcp/_context.py +3 -5
  62. inspect_ai/tool/_mcp/_sandbox.py +17 -14
  63. inspect_ai/tool/_mcp/server.py +1 -1
  64. inspect_ai/tool/_tools/_think.py +1 -1
  65. inspect_ai/tool/_tools/_web_search/__init__.py +3 -0
  66. inspect_ai/tool/_tools/{_web_search.py → _web_search/_google.py} +56 -103
  67. inspect_ai/tool/_tools/_web_search/_tavily.py +77 -0
  68. inspect_ai/tool/_tools/_web_search/_web_search.py +85 -0
  69. inspect_ai/util/_sandbox/events.py +3 -2
  70. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/METADATA +9 -2
  71. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/RECORD +75 -46
  72. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/WHEEL +1 -1
  73. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/entry_points.txt +0 -0
  74. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/licenses/LICENSE +0 -0
  75. {inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,87 @@
1
+ from datetime import datetime
2
+ from typing import Any, Callable, Mapping, Type
3
+
4
+ from jsonpath_ng import JSONPath # type: ignore
5
+ from pydantic import JsonValue
6
+ from typing_extensions import override
7
+
8
+ from inspect_ai.log._transcript import Event
9
+
10
+ from ..columns import Column, ColumnType
11
+ from .extract import (
12
+ completion_as_str,
13
+ model_event_input_as_str,
14
+ tool_choice_as_str,
15
+ tool_view_as_str,
16
+ )
17
+
18
+
19
+ class EventColumn(Column):
20
+ """Column which maps to `Event`."""
21
+
22
+ def __init__(
23
+ self,
24
+ name: str,
25
+ *,
26
+ path: str | JSONPath | Callable[[Event], JsonValue],
27
+ required: bool = False,
28
+ default: JsonValue | None = None,
29
+ type: Type[ColumnType] | None = None,
30
+ value: Callable[[JsonValue], JsonValue] | None = None,
31
+ ) -> None:
32
+ super().__init__(
33
+ name=name,
34
+ path=path if not callable(path) else None,
35
+ required=required,
36
+ default=default,
37
+ type=type,
38
+ value=value,
39
+ )
40
+ self._extract_event = path if callable(path) else None
41
+
42
+ @override
43
+ def path_schema(self) -> Mapping[str, Any] | None:
44
+ return None
45
+
46
+
47
+ EventInfo: list[Column] = [
48
+ EventColumn("event", path="event"),
49
+ EventColumn("span_id", path="span_id"),
50
+ ]
51
+ """Event basic information columns."""
52
+
53
+ EventTiming: list[Column] = [
54
+ EventColumn("timestamp", path="timestamp", type=datetime),
55
+ EventColumn("completed", path="completed", type=datetime),
56
+ EventColumn("working_start", path="working_start"),
57
+ EventColumn("working_time", path="working_time"),
58
+ ]
59
+ """Event timing columns."""
60
+
61
+ ModelEventColumns: list[Column] = [
62
+ EventColumn("model_event_model", path="model"),
63
+ EventColumn("model_event_role", path="role"),
64
+ EventColumn("model_event_input", path=model_event_input_as_str),
65
+ EventColumn("model_event_tools", path="tools"),
66
+ EventColumn("model_event_tool_choice", path=tool_choice_as_str),
67
+ EventColumn("model_event_config", path="config"),
68
+ EventColumn("model_event_usage", path="output.usage"),
69
+ EventColumn("model_event_time", path="output.time"),
70
+ EventColumn("model_event_completion", path=completion_as_str),
71
+ EventColumn("model_event_retries", path="retries"),
72
+ EventColumn("model_event_error", path="error"),
73
+ EventColumn("model_event_cache", path="cache"),
74
+ EventColumn("model_event_call", path="call"),
75
+ ]
76
+ """Model event columns."""
77
+
78
+ ToolEventColumns: list[Column] = [
79
+ EventColumn("tool_event_function", path="function"),
80
+ EventColumn("tool_event_arguments", path="arguments"),
81
+ EventColumn("tool_event_view", path=tool_view_as_str),
82
+ EventColumn("tool_event_result", path="result"),
83
+ EventColumn("tool_event_truncated", path="truncated"),
84
+ EventColumn("tool_event_error_type", path="error.type"),
85
+ EventColumn("tool_event_error_message", path="error.message"),
86
+ ]
87
+ """Tool event columns."""
@@ -0,0 +1,26 @@
1
+ from inspect_ai.log._transcript import ModelEvent, ToolEvent
2
+
3
+ from ..extract import messages_as_str
4
+
5
+
6
+ def model_event_input_as_str(event: ModelEvent) -> str:
7
+ return messages_as_str(event.input)
8
+
9
+
10
+ def tool_choice_as_str(event: ModelEvent) -> str:
11
+ if isinstance(event.tool_choice, str):
12
+ return event.tool_choice
13
+ else:
14
+ return event.tool_choice.name
15
+
16
+
17
+ def completion_as_str(event: ModelEvent) -> str:
18
+ return event.output.completion
19
+
20
+
21
+ def tool_view_as_str(event: ToolEvent) -> str | None:
22
+ if event.view is not None:
23
+ title = f"{event.view.title}\n\n" if event.view.title is not None else ""
24
+ return f"{title}{event.view.content}"
25
+ else:
26
+ return None
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Callable, Literal, TypeAlias
4
+
5
+ from inspect_ai.analysis.beta._dataframe.events.columns import EventInfo
6
+ from inspect_ai.log._file import list_eval_logs
7
+ from inspect_ai.log._transcript import Event
8
+
9
+ if TYPE_CHECKING:
10
+ import pandas as pd
11
+
12
+ from typing_extensions import overload
13
+
14
+ from ..columns import Column, ColumnErrors
15
+ from ..samples.table import EventsDetail, _read_samples_df
16
+ from ..util import LogPaths, verify_prerequisites
17
+
18
+ EventFilter: TypeAlias = (
19
+ list[
20
+ Literal[
21
+ "sample_init",
22
+ "sample_limit",
23
+ "sandbox",
24
+ "state",
25
+ "store",
26
+ "model",
27
+ "tool",
28
+ "sandbox",
29
+ "approval",
30
+ "input",
31
+ "score",
32
+ "error",
33
+ "logger",
34
+ "info",
35
+ "span_begin",
36
+ "span_end",
37
+ "subtask",
38
+ ]
39
+ ]
40
+ | Callable[[Event], bool]
41
+ )
42
+ """Filter for `events_df()` rows."""
43
+
44
+
45
+ @overload
46
+ def events_df(
47
+ logs: LogPaths = list_eval_logs(),
48
+ columns: list[Column] = EventInfo,
49
+ filter: EventFilter | None = None,
50
+ strict: Literal[True] = True,
51
+ ) -> "pd.DataFrame": ...
52
+
53
+
54
+ @overload
55
+ def events_df(
56
+ logs: LogPaths = list_eval_logs(),
57
+ columns: list[Column] = EventInfo,
58
+ filter: EventFilter | None = None,
59
+ strict: Literal[False] = False,
60
+ ) -> tuple["pd.DataFrame", ColumnErrors]: ...
61
+
62
+
63
+ def events_df(
64
+ logs: LogPaths = list_eval_logs(),
65
+ columns: list[Column] = EventInfo,
66
+ filter: EventFilter | None = None,
67
+ strict: bool = True,
68
+ ) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
69
+ """Read a dataframe containing events from a set of evals.
70
+
71
+ Args:
72
+ logs: One or more paths to log files or log directories.
73
+ Defaults to the contents of the currently active log directory
74
+ (e.g. ./logs or INSPECT_LOG_DIR).
75
+ columns: Specification for what columns to read from log files.
76
+ filter: List of event types to include or callable that performs the filter.
77
+ strict: Raise import errors immediately. Defaults to `True`.
78
+ If `False` then a tuple of `DataFrame` and errors is returned.
79
+
80
+ Returns:
81
+ For `strict`, a Pandas `DataFrame` with information for the specified logs.
82
+ For `strict=False`, a tuple of Pandas `DataFrame` and a dictionary of errors
83
+ encountered (by log file) during import.
84
+ """
85
+ verify_prerequisites()
86
+
87
+ # resolve filter/detail
88
+ if filter is None:
89
+ detail = EventsDetail(filter=lambda e: True)
90
+ elif callable(filter):
91
+ detail = EventsDetail(filter=filter)
92
+ else:
93
+ detail = EventsDetail(filter=lambda e: e.event in filter)
94
+
95
+ return _read_samples_df(
96
+ logs=logs,
97
+ columns=columns,
98
+ strict=strict,
99
+ detail=detail,
100
+ )
@@ -0,0 +1,73 @@
1
+ import hashlib
2
+ import uuid
3
+ from typing import Any, cast
4
+
5
+ import shortuuid
6
+ from pydantic import BaseModel, JsonValue
7
+
8
+ from inspect_ai.model._chat_message import (
9
+ ChatMessage,
10
+ ChatMessageAssistant,
11
+ ChatMessageTool,
12
+ ChatMessageUser,
13
+ )
14
+
15
+
16
+ def model_to_record(model: BaseModel) -> dict[str, JsonValue]:
17
+ return cast(dict[str, JsonValue], model.model_dump(mode="json", exclude_none=True))
18
+
19
+
20
+ def list_as_str(x: JsonValue) -> str:
21
+ return ",".join([str(e) for e in (x if isinstance(x, list) else [x])])
22
+
23
+
24
+ def score_values(x: JsonValue) -> dict[str, JsonValue]:
25
+ scores = cast(dict[str, Any], x)
26
+ return {k: v["value"] for k, v in scores.items()}
27
+
28
+
29
+ def auto_id(base: str, index: str) -> str:
30
+ seed = f"{base}_{index}"
31
+ hash_bytes = hashlib.md5(seed.encode("utf-8")).digest()
32
+ long_uuid = uuid.UUID(bytes=hash_bytes)
33
+ return shortuuid.encode(long_uuid)
34
+
35
+
36
+ def messages_as_str(messages: str | list[ChatMessage]) -> str:
37
+ if isinstance(messages, str):
38
+ messages = [ChatMessageUser(content=messages)]
39
+ return "\n\n".join([message_as_str(message) for message in messages])
40
+
41
+
42
+ def message_as_str(message: ChatMessage) -> str:
43
+ transcript: list[str] = []
44
+ role = message.role
45
+ content = message.text.strip() if message.text else ""
46
+
47
+ # assistant messages with tool calls
48
+ if isinstance(message, ChatMessageAssistant) and message.tool_calls is not None:
49
+ entry = f"{role}:\n{content}\n"
50
+
51
+ for tool in message.tool_calls:
52
+ func_name = tool.function
53
+ args = tool.arguments
54
+
55
+ if isinstance(args, dict):
56
+ args_text = "\n".join(f"{k}: {v}" for k, v in args.items())
57
+ entry += f"\nTool Call: {func_name}\nArguments:\n{args_text}"
58
+ else:
59
+ entry += f"\nTool Call: {func_name}\nArguments: {args}"
60
+
61
+ transcript.append(entry)
62
+
63
+ # tool responses with errors
64
+ elif isinstance(message, ChatMessageTool) and message.error is not None:
65
+ func_name = message.function or "unknown"
66
+ entry = f"{role}:\n{content}\n\nError in tool call '{func_name}':\n{message.error.message}\n"
67
+ transcript.append(entry)
68
+
69
+ # normal messages
70
+ else:
71
+ transcript.append(f"{role}:\n{content}\n")
72
+
73
+ return "\n".join(transcript)
@@ -0,0 +1,60 @@
1
+ from typing import Any, Callable, Mapping, Type
2
+
3
+ from jsonpath_ng import JSONPath # type: ignore
4
+ from pydantic import JsonValue
5
+ from typing_extensions import override
6
+
7
+ from inspect_ai.model._chat_message import ChatMessage
8
+
9
+ from ..columns import Column, ColumnType
10
+ from .extract import (
11
+ message_text,
12
+ message_tool_calls,
13
+ )
14
+
15
+
16
+ class MessageColumn(Column):
17
+ """Column which maps to `ChatMessage`."""
18
+
19
+ def __init__(
20
+ self,
21
+ name: str,
22
+ *,
23
+ path: str | JSONPath | Callable[[ChatMessage], JsonValue],
24
+ required: bool = False,
25
+ default: JsonValue | None = None,
26
+ type: Type[ColumnType] | None = None,
27
+ value: Callable[[JsonValue], JsonValue] | None = None,
28
+ ) -> None:
29
+ super().__init__(
30
+ name=name,
31
+ path=path if not callable(path) else None,
32
+ required=required,
33
+ default=default,
34
+ type=type,
35
+ value=value,
36
+ )
37
+ self._extract_message = path if callable(path) else None
38
+
39
+ @override
40
+ def path_schema(self) -> Mapping[str, Any] | None:
41
+ return None
42
+
43
+
44
+ MessageContent: list[Column] = [
45
+ MessageColumn("role", path="role", required=True),
46
+ MessageColumn("source", path="source"),
47
+ MessageColumn("content", path=message_text),
48
+ ]
49
+ """Message content columns."""
50
+
51
+ MessageToolCalls: list[Column] = [
52
+ MessageColumn("tool_calls", path=message_tool_calls),
53
+ MessageColumn("tool_call_id", path="tool_call_id"),
54
+ MessageColumn("tool_call_function", path="function"),
55
+ MessageColumn("tool_call_error", path="error.message"),
56
+ ]
57
+ """Message tool call columns."""
58
+
59
+ MessageColumns: list[Column] = MessageContent + MessageToolCalls
60
+ """Chat message columns."""
@@ -0,0 +1,21 @@
1
+ from inspect_ai._util.format import format_function_call
2
+ from inspect_ai.model._chat_message import ChatMessage, ChatMessageAssistant
3
+
4
+
5
+ def message_text(message: ChatMessage) -> str:
6
+ return message.text
7
+
8
+
9
+ def message_tool_calls(message: ChatMessage) -> str | None:
10
+ if isinstance(message, ChatMessageAssistant) and message.tool_calls is not None:
11
+ tool_calls = "\n".join(
12
+ [
13
+ format_function_call(
14
+ tool_call.function, tool_call.arguments, width=1000
15
+ )
16
+ for tool_call in message.tool_calls
17
+ ]
18
+ )
19
+ return tool_calls
20
+ else:
21
+ return None
@@ -0,0 +1,79 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Callable, Literal, TypeAlias
4
+
5
+ from inspect_ai.log._file import list_eval_logs
6
+ from inspect_ai.model._chat_message import ChatMessage
7
+
8
+ if TYPE_CHECKING:
9
+ import pandas as pd
10
+
11
+ from typing_extensions import overload
12
+
13
+ from ..columns import Column, ColumnErrors
14
+ from ..samples.table import MessagesDetail, _read_samples_df
15
+ from ..util import LogPaths, verify_prerequisites
16
+ from .columns import MessageColumns
17
+
18
+ MessageFilter: TypeAlias = (
19
+ list[Literal["system", "user", "assistant", "tool"]] | Callable[[ChatMessage], bool]
20
+ )
21
+ """Filter for `messages_df()` rows."""
22
+
23
+
24
+ @overload
25
+ def messages_df(
26
+ logs: LogPaths = list_eval_logs(),
27
+ columns: list[Column] = MessageColumns,
28
+ filter: MessageFilter | None = None,
29
+ strict: Literal[True] = True,
30
+ ) -> "pd.DataFrame": ...
31
+
32
+
33
+ @overload
34
+ def messages_df(
35
+ logs: LogPaths = list_eval_logs(),
36
+ columns: list[Column] = MessageColumns,
37
+ filter: MessageFilter | None = None,
38
+ strict: Literal[False] = False,
39
+ ) -> tuple["pd.DataFrame", ColumnErrors]: ...
40
+
41
+
42
+ def messages_df(
43
+ logs: LogPaths = list_eval_logs(),
44
+ columns: list[Column] = MessageColumns,
45
+ filter: MessageFilter | None = None,
46
+ strict: bool = True,
47
+ ) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
48
+ """Read a dataframe containing messages from a set of evals.
49
+
50
+ Args:
51
+ logs: One or more paths to log files or log directories.
52
+ Defaults to the contents of the currently active log directory
53
+ (e.g. ./logs or INSPECT_LOG_DIR).
54
+ columns: Specification for what columns to read from log files.
55
+ filter: List of message role types to include or callable that performs the filter.
56
+ strict: Raise import errors immediately. Defaults to `True`.
57
+ If `False` then a tuple of `DataFrame` and errors is returned.
58
+
59
+ Returns:
60
+ For `strict`, a Pandas `DataFrame` with information for the specified logs.
61
+ For `strict=False`, a tuple of Pandas `DataFrame` and a dictionary of errors
62
+ encountered (by log file) during import.
63
+ """
64
+ verify_prerequisites()
65
+
66
+ # resolve filter/detail
67
+ if filter is None:
68
+ detail = MessagesDetail(filter=lambda m: True)
69
+ elif callable(filter):
70
+ detail = MessagesDetail(filter=filter)
71
+ else:
72
+ detail = MessagesDetail(filter=lambda m: m.role in filter)
73
+
74
+ return _read_samples_df(
75
+ logs=logs,
76
+ columns=columns,
77
+ strict=strict,
78
+ detail=detail,
79
+ )
@@ -0,0 +1,26 @@
1
+ from contextlib import contextmanager
2
+ from typing import Iterator
3
+
4
+ from rich.progress import (
5
+ BarColumn,
6
+ Progress,
7
+ TaskID,
8
+ TaskProgressColumn,
9
+ TextColumn,
10
+ TimeElapsedColumn,
11
+ )
12
+
13
+
14
+ @contextmanager
15
+ def import_progress(
16
+ description: str, total: float | None
17
+ ) -> Iterator[tuple[Progress, TaskID]]:
18
+ with Progress(
19
+ TextColumn("[progress.description]{task.description:<18}"),
20
+ BarColumn(),
21
+ TaskProgressColumn(),
22
+ TimeElapsedColumn(),
23
+ transient=True,
24
+ ) as progress:
25
+ task_id = progress.add_task(description, total=total)
26
+ yield progress, task_id