inspect-ai 0.3.103__py3-none-any.whl → 0.3.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. inspect_ai/_cli/common.py +2 -1
  2. inspect_ai/_cli/eval.py +2 -2
  3. inspect_ai/_display/core/active.py +3 -0
  4. inspect_ai/_display/core/config.py +1 -0
  5. inspect_ai/_display/core/panel.py +21 -13
  6. inspect_ai/_display/core/results.py +3 -7
  7. inspect_ai/_display/core/rich.py +3 -5
  8. inspect_ai/_display/log/__init__.py +0 -0
  9. inspect_ai/_display/log/display.py +173 -0
  10. inspect_ai/_display/plain/display.py +2 -2
  11. inspect_ai/_display/rich/display.py +2 -4
  12. inspect_ai/_display/textual/app.py +1 -6
  13. inspect_ai/_display/textual/widgets/task_detail.py +3 -14
  14. inspect_ai/_display/textual/widgets/tasks.py +1 -1
  15. inspect_ai/_eval/eval.py +1 -1
  16. inspect_ai/_eval/evalset.py +2 -2
  17. inspect_ai/_eval/registry.py +6 -1
  18. inspect_ai/_eval/run.py +5 -1
  19. inspect_ai/_eval/task/constants.py +1 -0
  20. inspect_ai/_eval/task/log.py +2 -0
  21. inspect_ai/_eval/task/run.py +1 -1
  22. inspect_ai/_util/citation.py +88 -0
  23. inspect_ai/_util/content.py +24 -2
  24. inspect_ai/_util/json.py +17 -2
  25. inspect_ai/_util/registry.py +19 -4
  26. inspect_ai/_view/schema.py +0 -6
  27. inspect_ai/_view/www/dist/assets/index.css +82 -24
  28. inspect_ai/_view/www/dist/assets/index.js +10124 -9808
  29. inspect_ai/_view/www/log-schema.json +418 -1
  30. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  31. inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
  32. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
  33. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
  34. inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
  35. inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
  36. inspect_ai/_view/www/package.json +2 -2
  37. inspect_ai/_view/www/src/@types/log.d.ts +140 -39
  38. inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
  39. inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
  40. inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
  41. inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
  42. inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
  43. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
  44. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
  45. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
  46. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
  47. inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
  48. inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
  49. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
  50. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
  51. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
  52. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
  53. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
  54. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
  55. inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
  56. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
  57. inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
  58. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
  59. inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
  60. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
  61. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
  62. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
  63. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
  64. inspect_ai/_view/www/src/tests/README.md +2 -2
  65. inspect_ai/_view/www/src/utils/git.ts +3 -1
  66. inspect_ai/_view/www/src/utils/html.ts +6 -0
  67. inspect_ai/agent/_handoff.py +3 -3
  68. inspect_ai/log/_condense.py +5 -0
  69. inspect_ai/log/_file.py +4 -1
  70. inspect_ai/log/_log.py +9 -4
  71. inspect_ai/log/_recorders/json.py +4 -2
  72. inspect_ai/log/_util.py +2 -0
  73. inspect_ai/model/__init__.py +14 -0
  74. inspect_ai/model/_call_tools.py +13 -4
  75. inspect_ai/model/_chat_message.py +3 -0
  76. inspect_ai/model/_openai_responses.py +80 -34
  77. inspect_ai/model/_providers/_anthropic_citations.py +158 -0
  78. inspect_ai/model/_providers/_google_citations.py +100 -0
  79. inspect_ai/model/_providers/anthropic.py +196 -34
  80. inspect_ai/model/_providers/google.py +94 -22
  81. inspect_ai/model/_providers/mistral.py +20 -7
  82. inspect_ai/model/_providers/openai.py +11 -10
  83. inspect_ai/model/_providers/openai_compatible.py +3 -2
  84. inspect_ai/model/_providers/openai_responses.py +2 -5
  85. inspect_ai/model/_providers/perplexity.py +123 -0
  86. inspect_ai/model/_providers/providers.py +13 -2
  87. inspect_ai/model/_providers/vertex.py +3 -0
  88. inspect_ai/model/_trim.py +5 -0
  89. inspect_ai/tool/__init__.py +14 -0
  90. inspect_ai/tool/_mcp/_mcp.py +5 -2
  91. inspect_ai/tool/_mcp/sampling.py +19 -3
  92. inspect_ai/tool/_mcp/server.py +1 -1
  93. inspect_ai/tool/_tool.py +10 -1
  94. inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
  95. inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
  96. inspect_ai/tool/_tools/_web_search/_google.py +22 -25
  97. inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
  98. inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
  99. inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
  100. inspect_ai/util/_display.py +11 -2
  101. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  102. inspect_ai/util/_span.py +12 -1
  103. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/METADATA +2 -2
  104. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/RECORD +110 -86
  105. /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
  106. /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
  107. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/WHEEL +0 -0
  108. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/entry_points.txt +0 -0
  109. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/licenses/LICENSE +0 -0
  110. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,88 @@
1
+ from typing import Annotated, Literal, TypeAlias, Union
2
+
3
+ from pydantic import BaseModel, Discriminator, Field, JsonValue
4
+
5
+
6
+ class CitationBase(BaseModel):
7
+ """Base class for citations."""
8
+
9
+ cited_text: str | tuple[int, int] | None = Field(
10
+ default=None,
11
+ # without helping the schema generator, this will turn into [unknown, unknown] in TypeScript
12
+ json_schema_extra={
13
+ "anyOf": [
14
+ {"type": "string"},
15
+ {
16
+ "type": "array",
17
+ "items": [{"type": "integer"}, {"type": "integer"}],
18
+ "additionalItems": False,
19
+ "minItems": 2,
20
+ "maxItems": 2,
21
+ },
22
+ {"type": "null"},
23
+ ]
24
+ },
25
+ )
26
+ """
27
+ The cited text
28
+
29
+ This can be the text itself or a start/end range of the text content within
30
+ the container that is the cited text.
31
+ """
32
+
33
+ title: str | None = None
34
+ """Title of the cited resource."""
35
+
36
+ internal: dict[str, JsonValue] | None = Field(default=None)
37
+ """Model provider specific payload - typically used to aid transformation back to model types."""
38
+
39
+
40
+ class ContentCitation(CitationBase):
41
+ """A generic content citation."""
42
+
43
+ type: Literal["content"] = Field(default="content")
44
+ """Type."""
45
+
46
+
47
+ class DocumentRange(BaseModel):
48
+ """A range specifying a section of a document."""
49
+
50
+ type: Literal["block", "page", "char"]
51
+ """The type of the document section specified by the range."""
52
+
53
+ start_index: int
54
+ """0 based index of the start of the range."""
55
+
56
+ end_index: int
57
+ """0 based index of the end of the range."""
58
+
59
+
60
+ class DocumentCitation(CitationBase):
61
+ """A citation that refers to a page range in a document."""
62
+
63
+ type: Literal["document"] = Field(default="document")
64
+ """Type."""
65
+
66
+ range: DocumentRange | None = Field(default=None)
67
+ """Range of the document that is cited."""
68
+
69
+
70
+ class UrlCitation(CitationBase):
71
+ """A citation that refers to a URL."""
72
+
73
+ type: Literal["url"] = Field(default="url")
74
+ """Type."""
75
+
76
+ url: str
77
+ """URL of the cited resource."""
78
+
79
+
80
+ Citation: TypeAlias = Annotated[
81
+ Union[
82
+ ContentCitation,
83
+ DocumentCitation,
84
+ UrlCitation,
85
+ ],
86
+ Discriminator("type"),
87
+ ]
88
+ """A citation sent to or received from a model."""
@@ -1,7 +1,9 @@
1
- from typing import Literal, Union
1
+ from typing import Literal, Sequence, Union
2
2
 
3
3
  from pydantic import BaseModel, Field, JsonValue
4
4
 
5
+ from inspect_ai._util.citation import Citation
6
+
5
7
 
6
8
  class ContentBase(BaseModel):
7
9
  internal: JsonValue | None = Field(default=None)
@@ -20,6 +22,9 @@ class ContentText(ContentBase):
20
22
  refusal: bool | None = Field(default=None)
21
23
  """Was this a refusal message?"""
22
24
 
25
+ citations: Sequence[Citation] | None = Field(default=None)
26
+ """Citations supporting the text block."""
27
+
23
28
 
24
29
  class ContentReasoning(ContentBase):
25
30
  """Reasoning content.
@@ -82,5 +87,22 @@ class ContentVideo(ContentBase):
82
87
  """Format of video data ('mp4', 'mpeg', or 'mov')"""
83
88
 
84
89
 
85
- Content = Union[ContentText, ContentReasoning, ContentImage, ContentAudio, ContentVideo]
90
+ class ContentData(ContentBase):
91
+ """Model internal."""
92
+
93
+ type: Literal["data"] = Field(default="data")
94
+ """Type."""
95
+
96
+ data: dict[str, JsonValue]
97
+ """Model provider specific payload - required for internal content."""
98
+
99
+
100
+ Content = Union[
101
+ ContentText,
102
+ ContentReasoning,
103
+ ContentImage,
104
+ ContentAudio,
105
+ ContentVideo,
106
+ ContentData,
107
+ ]
86
108
  """Content sent to or received from a model."""
inspect_ai/_util/json.py CHANGED
@@ -6,7 +6,7 @@ from typing import (
6
6
 
7
7
  import jsonpatch
8
8
  from pydantic import BaseModel, Field, JsonValue
9
- from pydantic_core import to_json, to_jsonable_python
9
+ from pydantic_core import PydanticSerializationError, to_json, to_jsonable_python
10
10
 
11
11
  JSONType = Literal["string", "integer", "number", "boolean", "array", "object", "null"]
12
12
  """Valid types within JSON schema."""
@@ -27,7 +27,22 @@ def jsonable_dict(x: Any) -> dict[str, JsonValue]:
27
27
 
28
28
 
29
29
  def to_json_safe(x: Any) -> bytes:
30
- return to_json(value=x, indent=2, exclude_none=True, fallback=lambda _x: None)
30
+ def clean_utf8_json(obj: Any) -> Any:
31
+ if isinstance(obj, str):
32
+ return obj.encode("utf-8", errors="replace").decode("utf-8")
33
+ elif isinstance(obj, dict):
34
+ return {k: clean_utf8_json(v) for k, v in obj.items()}
35
+ elif isinstance(obj, list):
36
+ return [clean_utf8_json(item) for item in obj]
37
+ return obj
38
+
39
+ try:
40
+ return to_json(value=x, indent=2, exclude_none=True, fallback=lambda _x: None)
41
+ except PydanticSerializationError as ex:
42
+ if "surrogates not allowed" in str(ex):
43
+ cleaned = clean_utf8_json(x)
44
+ return to_json(cleaned)
45
+ raise
31
46
 
32
47
 
33
48
  def to_json_str_safe(x: Any) -> str:
@@ -102,9 +102,26 @@ def registry_tag(
102
102
  *args (list[Any]): Creation arguments
103
103
  **kwargs (dict[str,Any]): Creation keyword arguments
104
104
  """
105
+ # bind arguments to params
106
+ named_params = extract_named_params(type, False, *args, **kwargs)
107
+
108
+ # set attribute
109
+ setattr(o, REGISTRY_INFO, info)
110
+ setattr(o, REGISTRY_PARAMS, named_params)
111
+
112
+
113
+ def extract_named_params(
114
+ type: Callable[..., Any], apply_defaults: bool, *args: Any, **kwargs: Any
115
+ ) -> dict[str, Any]:
105
116
  # bind arguments to params
106
117
  named_params: dict[str, Any] = {}
107
- bound_params = inspect.signature(type).bind(*args, **kwargs)
118
+
119
+ if apply_defaults:
120
+ bound_params = inspect.signature(type).bind_partial(*args, **kwargs)
121
+ bound_params.apply_defaults()
122
+ else:
123
+ bound_params = inspect.signature(type).bind(*args, **kwargs)
124
+
108
125
  for param, value in bound_params.arguments.items():
109
126
  named_params[param] = registry_value(value)
110
127
 
@@ -128,9 +145,7 @@ def registry_tag(
128
145
  or "<unknown>"
129
146
  )
130
147
 
131
- # set attribute
132
- setattr(o, REGISTRY_INFO, info)
133
- setattr(o, REGISTRY_PARAMS, named_params)
148
+ return named_params
134
149
 
135
150
 
136
151
  def registry_name(o: object, name: str) -> str:
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  import os
3
- import shutil
4
3
  import subprocess
5
4
  from pathlib import Path
6
5
  from typing import Any
@@ -19,9 +18,6 @@ def sync_view_schema() -> None:
19
18
  # export schema file
20
19
  schema_path = Path(WWW_DIR, "log-schema.json")
21
20
  types_path = Path(WWW_DIR, "src", "@types", "log.d.ts")
22
- vs_code_types_path = Path(
23
- WWW_DIR, "..", "..", "..", "..", "tools", "vscode", "src", "@types", "log.d.ts"
24
- )
25
21
 
26
22
  with open(schema_path, "w", encoding="utf-8") as f:
27
23
  # make everything required
@@ -50,8 +46,6 @@ def sync_view_schema() -> None:
50
46
 
51
47
  subprocess.run(["yarn", "prettier:write"], cwd=types_path.parent, check=True)
52
48
 
53
- shutil.copyfile(types_path, vs_code_types_path)
54
-
55
49
 
56
50
  def schema_to_strict(schema: dict[str, Any]) -> dict[str, Any]:
57
51
  properties = schema.get("properties", None)
@@ -15692,18 +15692,98 @@ pre[class*="language-"] {
15692
15692
  ._copyLink_17kai_29:hover {
15693
15693
  opacity: 1;
15694
15694
  }
15695
- ._contentImage_61gdd_1 {
15695
+ ._webSearch_1376z_1 {
15696
+ display: grid;
15697
+ grid-template-columns: max-content 1fr;
15698
+ column-gap: 0.5em;
15699
+ align-items: baseline;
15700
+ }
15701
+
15702
+ ._query_1376z_8 {
15703
+ font-family: var(--bs-font-monospace);
15704
+ }
15705
+ ._keyPairContainer_1ltuo_1 {
15706
+ display: grid;
15707
+ grid-template-columns: max-content auto;
15708
+ column-gap: 0.5em;
15709
+ padding-top: 4px;
15710
+ padding-bottom: 4px;
15711
+ border-bottom: solid 1px var(--bs-border-color);
15712
+ }
15713
+
15714
+ ._key_1ltuo_1 {
15715
+ display: grid;
15716
+ grid-template-columns: 1em auto;
15717
+ cursor: pointer;
15718
+ }
15719
+
15720
+ ._pre_1ltuo_16 {
15721
+ margin-bottom: 0;
15722
+ }
15723
+
15724
+ ._treeIcon_1ltuo_20 {
15725
+ margin-top: -3px;
15726
+ }
15727
+ ._contentData_1sd1z_1 {
15728
+ border: solid var(--bs-light-border-subtle) 1px;
15729
+ padding: 0.5em;
15730
+ margin-bottom: 0.5em;
15731
+ margin-top: 0.5em;
15732
+ margin-left: 1em;
15733
+ }
15734
+ ._webSearch_1mixg_1 {
15735
+ display: grid;
15736
+ grid-template-columns: max-content 1fr;
15737
+ column-gap: 0.5em;
15738
+ align-items: baseline;
15739
+ }
15740
+
15741
+ ._query_1mixg_8 {
15742
+ font-family: var(--bs-font-monospace);
15743
+ }
15744
+
15745
+ ._result_1mixg_12 a:hover {
15746
+ text-decoration: underline;
15747
+ }
15748
+
15749
+ ._result_1mixg_12 a {
15750
+ opacity: 0.8;
15751
+ text-decoration: none;
15752
+ }
15753
+ ._citations_t2k1z_1 {
15754
+ margin-top: 1em;
15755
+ margin-bottom: 1em;
15756
+ display: grid;
15757
+ grid-template-columns: max-content 1fr;
15758
+ column-gap: 0.5em;
15759
+ }
15760
+
15761
+ a._citationLink_t2k1z_9 {
15762
+ display: block;
15763
+ color: var(--bs-body);
15764
+ text-decoration: none;
15765
+ }
15766
+ a._citationLink_t2k1z_9:hover {
15767
+ text-decoration: underline;
15768
+ }
15769
+ ._contentImage_8rgix_1 {
15696
15770
  max-width: 800px;
15697
15771
  border: solid var(--bs-border-color) 1px;
15698
15772
  }
15699
15773
 
15700
- ._reasoning_61gdd_6 {
15774
+ ._reasoning_8rgix_6 {
15701
15775
  border: solid var(--bs-light-border-subtle) 1px;
15702
15776
  padding: 1em;
15703
15777
  margin-bottom: 0.5em;
15704
15778
  background-color: var(--bs-light-bg-subtle);
15705
15779
  border-radius: var(--bs-border-radius);
15706
15780
  }
15781
+
15782
+ ._data_8rgix_14 {
15783
+ border: solid var(--bs-light-border-subtle) 1px;
15784
+ padding: 1em;
15785
+ margin-bottom: 0.5em;
15786
+ }
15707
15787
  ._toolImage_bv5nm_1 {
15708
15788
  max-width: 800px;
15709
15789
  border: solid var(--bs-border-color) 1px;
@@ -16741,28 +16821,6 @@ thead th {
16741
16821
  .card-body.card-no-padding {
16742
16822
  padding: 0;
16743
16823
  }
16744
- ._keyPairContainer_1ltuo_1 {
16745
- display: grid;
16746
- grid-template-columns: max-content auto;
16747
- column-gap: 0.5em;
16748
- padding-top: 4px;
16749
- padding-bottom: 4px;
16750
- border-bottom: solid 1px var(--bs-border-color);
16751
- }
16752
-
16753
- ._key_1ltuo_1 {
16754
- display: grid;
16755
- grid-template-columns: 1em auto;
16756
- cursor: pointer;
16757
- }
16758
-
16759
- ._pre_1ltuo_16 {
16760
- margin-bottom: 0;
16761
- }
16762
-
16763
- ._treeIcon_1ltuo_20 {
16764
- margin-top: -3px;
16765
- }
16766
16824
  ._item_1uzhd_1 {
16767
16825
  margin-bottom: 0em;
16768
16826
  }