inspect-ai 0.3.88__py3-none-any.whl → 0.3.89__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. inspect_ai/_cli/eval.py +16 -0
  2. inspect_ai/_cli/score.py +1 -12
  3. inspect_ai/_cli/util.py +4 -2
  4. inspect_ai/_display/core/footer.py +2 -2
  5. inspect_ai/_display/plain/display.py +2 -2
  6. inspect_ai/_eval/context.py +7 -1
  7. inspect_ai/_eval/eval.py +51 -27
  8. inspect_ai/_eval/evalset.py +27 -10
  9. inspect_ai/_eval/loader.py +7 -8
  10. inspect_ai/_eval/run.py +23 -31
  11. inspect_ai/_eval/score.py +18 -1
  12. inspect_ai/_eval/task/log.py +5 -13
  13. inspect_ai/_eval/task/resolved.py +1 -0
  14. inspect_ai/_eval/task/run.py +231 -244
  15. inspect_ai/_eval/task/task.py +25 -2
  16. inspect_ai/_eval/task/util.py +1 -8
  17. inspect_ai/_util/constants.py +1 -0
  18. inspect_ai/_util/json.py +8 -3
  19. inspect_ai/_util/registry.py +30 -13
  20. inspect_ai/_view/www/App.css +5 -0
  21. inspect_ai/_view/www/dist/assets/index.css +55 -18
  22. inspect_ai/_view/www/dist/assets/index.js +550 -458
  23. inspect_ai/_view/www/log-schema.json +66 -0
  24. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +1 -1
  25. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +13 -8
  26. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +3 -0
  27. inspect_ai/_view/www/src/plan/ModelCard.module.css +16 -0
  28. inspect_ai/_view/www/src/plan/ModelCard.tsx +93 -0
  29. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +5 -1
  30. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +6 -29
  31. inspect_ai/_view/www/src/types/log.d.ts +24 -6
  32. inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.module.css +16 -0
  33. inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.tsx +43 -0
  34. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -1
  35. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +5 -0
  36. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -0
  37. inspect_ai/agent/_agent.py +12 -0
  38. inspect_ai/agent/_as_tool.py +1 -1
  39. inspect_ai/agent/_bridge/bridge.py +9 -2
  40. inspect_ai/agent/_react.py +142 -74
  41. inspect_ai/agent/_run.py +13 -2
  42. inspect_ai/agent/_types.py +6 -0
  43. inspect_ai/approval/_apply.py +6 -7
  44. inspect_ai/approval/_approver.py +3 -3
  45. inspect_ai/approval/_auto.py +2 -2
  46. inspect_ai/approval/_call.py +20 -4
  47. inspect_ai/approval/_human/approver.py +3 -3
  48. inspect_ai/approval/_human/manager.py +2 -2
  49. inspect_ai/approval/_human/panel.py +3 -3
  50. inspect_ai/approval/_policy.py +3 -3
  51. inspect_ai/log/__init__.py +2 -0
  52. inspect_ai/log/_log.py +23 -2
  53. inspect_ai/log/_model.py +58 -0
  54. inspect_ai/log/_recorders/file.py +14 -3
  55. inspect_ai/log/_transcript.py +3 -0
  56. inspect_ai/model/__init__.py +2 -0
  57. inspect_ai/model/_call_tools.py +4 -1
  58. inspect_ai/model/_model.py +49 -3
  59. inspect_ai/model/_openai.py +151 -21
  60. inspect_ai/model/_providers/anthropic.py +20 -12
  61. inspect_ai/model/_providers/bedrock.py +3 -3
  62. inspect_ai/model/_providers/cloudflare.py +29 -108
  63. inspect_ai/model/_providers/google.py +21 -10
  64. inspect_ai/model/_providers/grok.py +23 -17
  65. inspect_ai/model/_providers/groq.py +61 -37
  66. inspect_ai/model/_providers/llama_cpp_python.py +8 -9
  67. inspect_ai/model/_providers/mistral.py +8 -3
  68. inspect_ai/model/_providers/ollama.py +8 -9
  69. inspect_ai/model/_providers/openai.py +53 -157
  70. inspect_ai/model/_providers/openai_compatible.py +195 -0
  71. inspect_ai/model/_providers/openrouter.py +4 -15
  72. inspect_ai/model/_providers/providers.py +11 -0
  73. inspect_ai/model/_providers/together.py +25 -23
  74. inspect_ai/model/_trim.py +83 -0
  75. inspect_ai/solver/_plan.py +5 -3
  76. inspect_ai/tool/_tool_def.py +8 -2
  77. inspect_ai/util/__init__.py +3 -0
  78. inspect_ai/util/_concurrency.py +15 -2
  79. {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.89.dist-info}/METADATA +1 -1
  80. {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.89.dist-info}/RECORD +84 -79
  81. inspect_ai/_eval/task/rundir.py +0 -78
  82. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
  83. {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.89.dist-info}/WHEEL +0 -0
  84. {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.89.dist-info}/entry_points.txt +0 -0
  85. {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.89.dist-info}/licenses/LICENSE +0 -0
  86. {inspect_ai-0.3.88.dist-info → inspect_ai-0.3.89.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,6 @@ from openai.types.chat import (
10
10
  from typing_extensions import override
11
11
 
12
12
  from inspect_ai._util.constants import DEFAULT_MAX_TOKENS
13
- from inspect_ai.model._providers.util.chatapi import ChatAPIHandler
14
13
  from inspect_ai.tool._tool_choice import ToolChoice
15
14
  from inspect_ai.tool._tool_info import ToolInfo
16
15
 
@@ -27,16 +26,14 @@ from .._model_output import (
27
26
  as_stop_reason,
28
27
  )
29
28
  from .._openai import chat_message_assistant_from_openai
30
- from .openai import (
31
- OpenAIAPI,
32
- )
29
+ from .openai_compatible import OpenAICompatibleAPI
33
30
  from .util import (
34
31
  chat_api_input,
35
32
  chat_api_request,
36
- environment_prerequisite_error,
37
33
  model_base_url,
38
34
  should_retry_chat_api_error,
39
35
  )
36
+ from .util.chatapi import ChatAPIHandler
40
37
 
41
38
 
42
39
  def chat_choices_from_response_together(
@@ -78,10 +75,7 @@ def chat_choices_from_response_together(
78
75
  ]
79
76
 
80
77
 
81
- TOGETHER_API_KEY = "TOGETHER_API_KEY"
82
-
83
-
84
- class TogetherAIAPI(OpenAIAPI):
78
+ class TogetherAIAPI(OpenAICompatibleAPI):
85
79
  def __init__(
86
80
  self,
87
81
  model_name: str,
@@ -89,14 +83,13 @@ class TogetherAIAPI(OpenAIAPI):
89
83
  api_key: str | None = None,
90
84
  config: GenerateConfig = GenerateConfig(),
91
85
  ) -> None:
92
- if not api_key:
93
- api_key = os.environ.get(TOGETHER_API_KEY, None)
94
- if not api_key:
95
- raise environment_prerequisite_error("TogetherAI", TOGETHER_API_KEY)
96
- base_url = model_base_url(base_url, "TOGETHER_BASE_URL")
97
- base_url = base_url if base_url else "https://api.together.xyz/v1"
98
86
  super().__init__(
99
- model_name=model_name, base_url=base_url, api_key=api_key, config=config
87
+ model_name=model_name,
88
+ base_url=base_url,
89
+ api_key=api_key,
90
+ config=config,
91
+ service="Together",
92
+ service_base_url="https://api.together.xyz/v1",
100
93
  )
101
94
 
102
95
  # Together uses a default of 512 so we bump it up
@@ -119,22 +112,31 @@ class TogetherAIAPI(OpenAIAPI):
119
112
  return ex
120
113
 
121
114
  @override
122
- def set_logprobs_params(
123
- self, params: dict[str, Any], config: GenerateConfig
124
- ) -> dict[str, Any]:
125
- if config.logprobs is True:
115
+ def completion_params(self, config: GenerateConfig, tools: bool) -> dict[str, Any]:
116
+ params = super().completion_params(config, tools)
117
+ if "logprobs" in params:
126
118
  params["logprobs"] = 1
119
+ if "top_logprobs" in params:
120
+ del params["top_logprobs"]
121
+
122
+ # together requires temperature with num_choices
123
+ if config.num_choices is not None and config.temperature is None:
124
+ params["temperature"] = 1
125
+
127
126
  return params
128
127
 
129
128
  # Together has a slightly different logprobs structure to OpenAI, so we need to remap it.
130
- def _chat_choices_from_response(
131
- self, response: ChatCompletion, tools: list[ToolInfo]
129
+ @override
130
+ def chat_choices_from_completion(
131
+ self, completion: ChatCompletion, tools: list[ToolInfo]
132
132
  ) -> list[ChatCompletionChoice]:
133
- return chat_choices_from_response_together(response, tools)
133
+ return chat_choices_from_response_together(completion, tools)
134
134
 
135
135
 
136
136
  # Implementation of REST client for Together (currently not used)
137
137
 
138
+ TOGETHER_API_KEY = "TOGETHER_API_KEY"
139
+
138
140
 
139
141
  class TogetherRESTAPI(ModelAPI):
140
142
  def __init__(
@@ -0,0 +1,83 @@
1
+ from dataclasses import dataclass, field
2
+
3
+ from ._chat_message import ChatMessage
4
+
5
+
6
+ def trim_messages(
7
+ messages: list[ChatMessage], preserve: float = 0.7
8
+ ) -> list[ChatMessage]:
9
+ """Trim message list to fit within model context.
10
+
11
+ Trim the list of messages by:
12
+ - Retaining all system messages.
13
+ - Retaining the 'input' messages from the sample.
14
+ - Preserving a proportion of the remaining messages (`preserve=0.7` by default).
15
+ - Ensuring that all assistant tool calls have corresponding tool messages.
16
+
17
+ Args:
18
+ messages: List of messages to trim.
19
+ preserve: Ratio of converation messages to preserve
20
+ (defaults to 0.7)
21
+
22
+ Returns:
23
+ Trimmed messages.
24
+ """
25
+ # validate preserve
26
+ if not 0 <= preserve <= 1:
27
+ raise ValueError(f"preserve must be in range [0,1], got {preserve}")
28
+
29
+ # partition messages
30
+ partitioned = _partition_messages(messages)
31
+
32
+ # slice messages from the beginning of the conversation as-per preserve
33
+ start_idx = int(len(partitioned.conversation) * (1 - preserve))
34
+ preserved_messages = partitioned.conversation[start_idx:]
35
+
36
+ # one last step: many model apis require tool messages to have a parent assistant
37
+ # message with a corresponding tool_call_id. to ensure this, we build the
38
+ # final list of conversation messages by filtering out tool messages for which
39
+ # we haven't seen a corresponding assistant message with their id
40
+ conversation_messages: list[ChatMessage] = []
41
+ active_tool_ids = set()
42
+ for message in preserved_messages:
43
+ if message.role == "assistant":
44
+ active_tool_ids = {tc.id for tc in (message.tool_calls or [])}
45
+ conversation_messages.append(message)
46
+ elif message.role == "tool" and message.tool_call_id in active_tool_ids:
47
+ conversation_messages.append(message)
48
+ elif message.role == "user":
49
+ active_tool_ids = set()
50
+ conversation_messages.append(message)
51
+
52
+ # return trimmed messages
53
+ return partitioned.system + partitioned.input + conversation_messages
54
+
55
+
56
+ @dataclass
57
+ class PartitionedMessages:
58
+ system: list[ChatMessage] = field(default_factory=list)
59
+ input: list[ChatMessage] = field(default_factory=list)
60
+ conversation: list[ChatMessage] = field(default_factory=list)
61
+
62
+
63
+ def _partition_messages(messages: list[ChatMessage]) -> PartitionedMessages:
64
+ # first pass at partitioning
65
+ partitioned = PartitionedMessages()
66
+ for message in messages:
67
+ if message.role == "system":
68
+ partitioned.system.append(message)
69
+ elif message.source == "input":
70
+ partitioned.input.append(message)
71
+ else:
72
+ partitioned.conversation.append(message)
73
+
74
+ # if there are no input messages then take up to the first user message
75
+ if len(partitioned.input) == 0:
76
+ while partitioned.conversation:
77
+ message = partitioned.conversation.pop(0)
78
+ partitioned.input.append(message)
79
+ if message.role == "user":
80
+ break
81
+
82
+ # all done!
83
+ return partitioned
@@ -164,7 +164,7 @@ def plan(*plan: PlanType | None, name: str | None = None, **attribs: Any) -> Any
164
164
  plan_type,
165
165
  plan,
166
166
  RegistryInfo(
167
- type="plan",
167
+ type="plan", # type: ignore[arg-type]
168
168
  name=plan_name,
169
169
  metadata=dict(attribs=attribs, params=params),
170
170
  ),
@@ -212,7 +212,9 @@ def plan_register(
212
212
  registry_add(
213
213
  plan,
214
214
  RegistryInfo(
215
- type="plan", name=name, metadata=dict(attribs=attribs, params=params)
215
+ type="plan", # type: ignore[arg-type]
216
+ name=name,
217
+ metadata=dict(attribs=attribs, params=params),
216
218
  ),
217
219
  )
218
220
  return plan
@@ -228,4 +230,4 @@ def plan_create(name: str, **kwargs: Any) -> Plan:
228
230
  Returns:
229
231
  Plan with registry info attribute
230
232
  """
231
- return cast(Plan, registry_create("plan", name, **kwargs))
233
+ return cast(Plan, registry_create("plan", name, **kwargs)) # type: ignore[arg-type]
@@ -234,9 +234,15 @@ def validate_tool_parameters(tool_name: str, parameters: dict[str, ToolParam]) -
234
234
  # validate that we have types/descriptions for paramters
235
235
  for param_name, param in parameters.items():
236
236
 
237
- def raise_not_provided_error(context: str) -> None:
237
+ def raise_not_provided_error(
238
+ context: str,
239
+ # Use the default value trick to avoid Python's late binding of
240
+ # closures issue.
241
+ # see: https://docs.python.org/3/faq/programming.html#why-do-lambdas-defined-in-a-loop-with-different-values-all-return-the-same-result
242
+ bound_name: str = param_name,
243
+ ) -> None:
238
244
  raise ValueError(
239
- f"{context} provided for parameter '{param_name}' of function '{tool_name}'."
245
+ f"{context} provided for parameter '{bound_name}' of function '{tool_name}'."
240
246
  )
241
247
 
242
248
  if param.type is None and not param.anyOf and not param.enum:
@@ -1,3 +1,4 @@
1
+ from inspect_ai._util.registry import RegistryType, registry_create
1
2
  from inspect_ai._util.trace import trace_action, trace_message
2
3
 
3
4
  from ._concurrency import concurrency
@@ -64,4 +65,6 @@ __all__ = [
64
65
  "throttle",
65
66
  "trace_action",
66
67
  "trace_message",
68
+ "RegistryType",
69
+ "registry_create",
67
70
  ]
@@ -56,10 +56,23 @@ async def concurrency(
56
56
  yield
57
57
 
58
58
 
59
- def concurrency_status() -> dict[str, tuple[int, int]]:
59
+ def concurrency_status_display() -> dict[str, tuple[int, int]]:
60
60
  status: dict[str, tuple[int, int]] = {}
61
+ names = [c.name for c in _concurrency_semaphores.values()]
61
62
  for c in _concurrency_semaphores.values():
62
- status[c.name] = (c.concurrency - c.semaphore.value, c.concurrency)
63
+ # compute name for status display. some resources (e.g. models) use
64
+ # a / prefix. if there are no duplicates of a given prefix then shorten
65
+ # it to be only the prefix (e.g. 'openai' rather than 'openai/gpt-4o')
66
+ prefix = c.name.split("/")[0]
67
+ prefix_count = sum([1 for name in names if name.startswith(prefix + "/")])
68
+ if prefix_count == 1:
69
+ name = prefix
70
+ else:
71
+ name = c.name
72
+
73
+ # status display entry
74
+ status[name] = (c.concurrency - c.semaphore.value, c.concurrency)
75
+
63
76
  return status
64
77
 
65
78
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.88
3
+ Version: 0.3.89
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License