inspect-ai 0.3.82__py3-none-any.whl → 0.3.84__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_display/textual/app.py +14 -3
  3. inspect_ai/_display/textual/display.py +4 -0
  4. inspect_ai/_display/textual/widgets/samples.py +9 -3
  5. inspect_ai/_display/textual/widgets/task_detail.py +3 -4
  6. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  7. inspect_ai/_display/textual/widgets/vscode.py +48 -0
  8. inspect_ai/_eval/eval.py +36 -24
  9. inspect_ai/_eval/evalset.py +17 -18
  10. inspect_ai/_eval/loader.py +34 -11
  11. inspect_ai/_eval/run.py +8 -13
  12. inspect_ai/_eval/score.py +13 -3
  13. inspect_ai/_eval/task/generate.py +8 -9
  14. inspect_ai/_eval/task/log.py +2 -0
  15. inspect_ai/_eval/task/task.py +23 -9
  16. inspect_ai/_util/file.py +13 -0
  17. inspect_ai/_util/json.py +2 -1
  18. inspect_ai/_util/registry.py +1 -0
  19. inspect_ai/_util/vscode.py +37 -0
  20. inspect_ai/_view/www/App.css +6 -0
  21. inspect_ai/_view/www/dist/assets/index.css +304 -128
  22. inspect_ai/_view/www/dist/assets/index.js +47495 -27519
  23. inspect_ai/_view/www/log-schema.json +124 -31
  24. inspect_ai/_view/www/package.json +3 -0
  25. inspect_ai/_view/www/src/App.tsx +12 -0
  26. inspect_ai/_view/www/src/appearance/icons.ts +1 -0
  27. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  28. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  29. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  30. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
  31. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
  32. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  33. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  34. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  35. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  36. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  37. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
  38. inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
  39. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
  40. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
  41. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
  42. inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
  43. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
  44. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
  45. inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
  46. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
  47. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
  48. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  49. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  50. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  51. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
  52. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
  53. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
  54. inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
  55. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
  56. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
  57. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  58. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  59. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  60. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  61. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
  62. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
  63. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
  64. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
  65. inspect_ai/_view/www/src/state/hooks.ts +5 -3
  66. inspect_ai/_view/www/src/state/logPolling.ts +5 -1
  67. inspect_ai/_view/www/src/state/logSlice.ts +10 -0
  68. inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
  69. inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
  70. inspect_ai/_view/www/src/types/log.d.ts +34 -26
  71. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  72. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  73. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
  74. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
  75. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
  76. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  77. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  78. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
  79. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  80. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
  81. inspect_ai/_view/www/yarn.lock +94 -1
  82. inspect_ai/agent/__init__.py +36 -0
  83. inspect_ai/agent/_agent.py +268 -0
  84. inspect_ai/agent/_as_solver.py +72 -0
  85. inspect_ai/agent/_as_tool.py +122 -0
  86. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  87. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  88. inspect_ai/agent/_filter.py +46 -0
  89. inspect_ai/agent/_handoff.py +93 -0
  90. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  91. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  92. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  93. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  94. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  95. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  96. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  97. inspect_ai/agent/_react.py +241 -0
  98. inspect_ai/agent/_run.py +36 -0
  99. inspect_ai/agent/_types.py +81 -0
  100. inspect_ai/log/_log.py +11 -2
  101. inspect_ai/log/_transcript.py +13 -9
  102. inspect_ai/model/__init__.py +7 -1
  103. inspect_ai/model/_call_tools.py +256 -52
  104. inspect_ai/model/_chat_message.py +7 -4
  105. inspect_ai/model/_conversation.py +13 -62
  106. inspect_ai/model/_display.py +85 -0
  107. inspect_ai/model/_model.py +113 -14
  108. inspect_ai/model/_model_output.py +14 -9
  109. inspect_ai/model/_openai.py +16 -4
  110. inspect_ai/model/_openai_computer_use.py +162 -0
  111. inspect_ai/model/_openai_responses.py +319 -165
  112. inspect_ai/model/_providers/anthropic.py +20 -21
  113. inspect_ai/model/_providers/azureai.py +24 -13
  114. inspect_ai/model/_providers/bedrock.py +1 -7
  115. inspect_ai/model/_providers/cloudflare.py +3 -3
  116. inspect_ai/model/_providers/goodfire.py +2 -6
  117. inspect_ai/model/_providers/google.py +11 -10
  118. inspect_ai/model/_providers/groq.py +6 -3
  119. inspect_ai/model/_providers/hf.py +7 -3
  120. inspect_ai/model/_providers/mistral.py +7 -10
  121. inspect_ai/model/_providers/openai.py +47 -17
  122. inspect_ai/model/_providers/openai_o1.py +11 -4
  123. inspect_ai/model/_providers/openai_responses.py +12 -14
  124. inspect_ai/model/_providers/providers.py +2 -2
  125. inspect_ai/model/_providers/together.py +12 -2
  126. inspect_ai/model/_providers/util/chatapi.py +7 -2
  127. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  128. inspect_ai/model/_providers/util/llama31.py +4 -2
  129. inspect_ai/model/_providers/vertex.py +11 -9
  130. inspect_ai/model/_providers/vllm.py +4 -4
  131. inspect_ai/scorer/__init__.py +2 -0
  132. inspect_ai/scorer/_metrics/__init__.py +2 -0
  133. inspect_ai/scorer/_metrics/grouped.py +84 -0
  134. inspect_ai/scorer/_score.py +26 -6
  135. inspect_ai/solver/__init__.py +2 -2
  136. inspect_ai/solver/_basic_agent.py +22 -9
  137. inspect_ai/solver/_bridge.py +31 -0
  138. inspect_ai/solver/_chain.py +20 -12
  139. inspect_ai/solver/_fork.py +5 -1
  140. inspect_ai/solver/_human_agent.py +52 -0
  141. inspect_ai/solver/_prompt.py +3 -1
  142. inspect_ai/solver/_run.py +59 -0
  143. inspect_ai/solver/_solver.py +14 -4
  144. inspect_ai/solver/_task_state.py +5 -3
  145. inspect_ai/tool/_tool_call.py +15 -8
  146. inspect_ai/tool/_tool_def.py +17 -12
  147. inspect_ai/tool/_tool_support_helpers.py +2 -2
  148. inspect_ai/tool/_tool_with.py +14 -11
  149. inspect_ai/tool/_tools/_bash_session.py +11 -2
  150. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  151. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  152. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  153. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  154. inspect_ai/tool/_tools/_think.py +1 -1
  155. inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
  156. inspect_ai/util/__init__.py +2 -0
  157. inspect_ai/util/_anyio.py +27 -0
  158. inspect_ai/util/_sandbox/__init__.py +2 -1
  159. inspect_ai/util/_sandbox/context.py +32 -7
  160. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  161. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  162. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  163. inspect_ai/util/_store_model.py +30 -7
  164. inspect_ai/util/_subprocess.py +13 -3
  165. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/METADATA +1 -1
  166. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/RECORD +179 -153
  167. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
  168. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  169. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  170. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  171. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  172. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  173. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  174. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  175. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  176. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  177. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/WHEEL +0 -0
  178. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/entry_points.txt +0 -0
  179. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/licenses/LICENSE +0 -0
  180. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,117 @@
1
+ import clsx from "clsx";
2
+ import { FC, ReactNode } from "react";
3
+ import { formatPrettyDecimal } from "../../utils/format";
4
+ import { ResultsScorer } from "./ResultsPanel";
5
+
6
+ import styles from "./ScoreGrid.module.css";
7
+
8
+ interface ScoreGridProps {
9
+ scoreGroups: ResultsScorer[][];
10
+ showReducer?: boolean;
11
+ className?: string | string[];
12
+ striped?: boolean;
13
+ }
14
+
15
+ export const ScoreGrid: FC<ScoreGridProps> = ({
16
+ scoreGroups,
17
+ showReducer,
18
+ className,
19
+ striped,
20
+ }) => {
21
+ const columnCount = scoreGroups.reduce((prev, group) => {
22
+ return Math.max(prev, group[0].metrics.length);
23
+ }, 0);
24
+
25
+ const subTables: ReactNode[] = [];
26
+
27
+ let index = 0;
28
+ for (const scoreGroup of scoreGroups) {
29
+ const metrics = scoreGroup[0].metrics;
30
+
31
+ // Add header row
32
+
33
+ const cells: ReactNode[] = [];
34
+ for (let i = 0; i < columnCount; i++) {
35
+ if (metrics.length > i) {
36
+ cells.push(
37
+ <th
38
+ className={clsx(
39
+ "text-style-label",
40
+ "text-style-secondary",
41
+ "text-size-small",
42
+ styles.label,
43
+ )}
44
+ >
45
+ {metrics[i].name}
46
+ </th>,
47
+ );
48
+ } else {
49
+ cells.push(<td></td>);
50
+ }
51
+ }
52
+
53
+ const headerRow = (
54
+ <tr className={clsx(styles.headerRow)}>
55
+ <td></td>
56
+ {cells}
57
+ </tr>
58
+ );
59
+ const rows: ReactNode[] = [];
60
+ scoreGroup.forEach((g) => {
61
+ const cells: ReactNode[] = [];
62
+ for (let i = 0; i < columnCount; i++) {
63
+ if (metrics.length > i) {
64
+ cells.push(
65
+ <td className={clsx(styles.value, "text-size-small")}>
66
+ {formatPrettyDecimal(g.metrics[i].value)}
67
+ </td>,
68
+ );
69
+ } else {
70
+ cells.push(<td className={clsx(styles.value)}></td>);
71
+ }
72
+ }
73
+
74
+ rows.push(
75
+ <tr>
76
+ <th className={clsx(styles.scorer, "text-size-small")}>
77
+ {g.scorer} {showReducer && g.reducer ? `(${g.reducer})` : undefined}
78
+ </th>
79
+ {cells}
80
+ </tr>,
81
+ );
82
+ });
83
+
84
+ subTables.push(
85
+ <>
86
+ {index > 0 ? (
87
+ <tr>
88
+ <td
89
+ colSpan={columnCount + 1}
90
+ className={clsx(styles.groupSeparator)}
91
+ ></td>
92
+ </tr>
93
+ ) : undefined}
94
+ {headerRow}
95
+ <tbody className={clsx("table-group-divider", styles.tableBody)}>
96
+ {rows}
97
+ </tbody>
98
+ </>,
99
+ );
100
+
101
+ index++;
102
+ }
103
+
104
+ return (
105
+ <table
106
+ className={clsx(
107
+ className,
108
+ "table",
109
+ striped ? "table-striped" : undefined,
110
+ styles.table,
111
+ "table-bordered",
112
+ )}
113
+ >
114
+ {subTables}
115
+ </table>
116
+ );
117
+ };
@@ -124,7 +124,7 @@ export const SecondaryBar: FC<SecondaryBarProps> = ({
124
124
  id={"secondary-nav-bar"}
125
125
  className={clsx(styles.container, "text-size-small")}
126
126
  collapse={true}
127
- lines={4}
127
+ lines={5}
128
128
  >
129
129
  <div
130
130
  className={styles.valueGrid}
@@ -54,8 +54,9 @@
54
54
 
55
55
  .backdrop {
56
56
  position: fixed;
57
- inset: 0; /* top: 0; left: 0; right: 0; bottom: 0; */
58
- background-color: rgba(0, 0, 0, 0.5);
57
+ inset: 0;
58
+ background-color: var(--inspect-glass-color);
59
+ opacity: var(--inspect-glass-opacity);
59
60
  z-index: 9998;
60
61
  }
61
62
 
@@ -38,6 +38,23 @@ export const SamplesTab: FC<SamplesTabProps> = ({ running }) => {
38
38
 
39
39
  const sampleSummaries = useFilteredSamples();
40
40
  const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
41
+
42
+ // Compute the limit to apply to the sample count (this is so)
43
+ // we can provide a total expected sample count for this evaluation
44
+ const evalSampleCount = useMemo(() => {
45
+ const limit = selectedLogSummary?.eval.config.limit;
46
+ const limitCount =
47
+ limit === null || limit === undefined
48
+ ? undefined
49
+ : typeof limit === "number"
50
+ ? limit
51
+ : (limit[1] as number) - (limit[0] as number);
52
+ return (
53
+ (limitCount || selectedLogSummary?.eval.dataset.samples || 0) *
54
+ (selectedLogSummary?.eval.config.epochs || 0)
55
+ );
56
+ }, [selectedLogSummary?.eval.config.limit]);
57
+
41
58
  const totalSampleCount = useTotalSampleCount();
42
59
 
43
60
  const samplesDescriptor = useSampleDescriptor();
@@ -176,6 +193,7 @@ export const SamplesTab: FC<SamplesTabProps> = ({ running }) => {
176
193
  <SampleList
177
194
  listHandle={sampleListHandle}
178
195
  items={items}
196
+ totalItemCount={evalSampleCount}
179
197
  running={running}
180
198
  nextSample={nextSample}
181
199
  prevSample={previousSample}
@@ -10,6 +10,15 @@
10
10
  "@jridgewell/gen-mapping" "^0.3.5"
11
11
  "@jridgewell/trace-mapping" "^0.3.24"
12
12
 
13
+ "@apidevtools/json-schema-ref-parser@^11.5.5":
14
+ version "11.9.3"
15
+ resolved "https://registry.yarnpkg.com/@apidevtools/json-schema-ref-parser/-/json-schema-ref-parser-11.9.3.tgz#0e0c9061fc41cf03737d499a4e6a8299fdd2bfa7"
16
+ integrity sha512-60vepv88RwcJtSHrD6MjIL6Ta3SOYbgfnkHb+ppAVK+o9mXprRtulx7VlRl3lN3bbvysAfCS7WMVfhUYemB0IQ==
17
+ dependencies:
18
+ "@jsdevtools/ono" "^7.1.3"
19
+ "@types/json-schema" "^7.0.15"
20
+ js-yaml "^4.1.0"
21
+
13
22
  "@babel/code-frame@^7.26.2":
14
23
  version "7.26.2"
15
24
  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.26.2.tgz#4b5fab97d33338eff916235055f0ebc21e573a85"
@@ -477,6 +486,11 @@
477
486
  "@jridgewell/resolve-uri" "^3.1.0"
478
487
  "@jridgewell/sourcemap-codec" "^1.4.14"
479
488
 
489
+ "@jsdevtools/ono@^7.1.3":
490
+ version "7.1.3"
491
+ resolved "https://registry.yarnpkg.com/@jsdevtools/ono/-/ono-7.1.3.tgz#9df03bbd7c696a5c58885c34aa06da41c8543796"
492
+ integrity sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg==
493
+
480
494
  "@lezer/common@^1.0.0", "@lezer/common@^1.1.0":
481
495
  version "1.2.3"
482
496
  resolved "https://registry.yarnpkg.com/@lezer/common/-/common-1.2.3.tgz#138fcddab157d83da557554851017c6c1e5667fd"
@@ -701,6 +715,11 @@
701
715
  resolved "https://registry.yarnpkg.com/@types/linkify-it/-/linkify-it-5.0.0.tgz#21413001973106cda1c3a9b91eedd4ccd5469d76"
702
716
  integrity sha512-sVDA58zAw4eWAffKOaQH5/5j3XeayukzDk+ewSsnv3p4yJEZHCCzMDiZM8e0OUrRvmpGZ85jf4yDHkHsgBNr9Q==
703
717
 
718
+ "@types/lodash@^4.17.7":
719
+ version "4.17.16"
720
+ resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.17.16.tgz#94ae78fab4a38d73086e962d0b65c30d816bfb0a"
721
+ integrity sha512-HX7Em5NYQAXKW+1T+FiuG27NGwzJfCX3s1GjOa7ujxZa52kjJLOr4FUxT+giF6Tgxv1e+/czV/iTtBw27WTU9g==
722
+
704
723
  "@types/markdown-it@^14.1.2":
705
724
  version "14.1.2"
706
725
  resolved "https://registry.yarnpkg.com/@types/markdown-it/-/markdown-it-14.1.2.tgz#57f2532a0800067d9b934f3521429a2e8bfb4c61"
@@ -984,6 +1003,11 @@ color-name@~1.1.4:
984
1003
  resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
985
1004
  integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
986
1005
 
1006
+ commander@^8.3.0:
1007
+ version "8.3.0"
1008
+ resolved "https://registry.yarnpkg.com/commander/-/commander-8.3.0.tgz#4837ea1b2da67b9c616a67afbb0fafee567bca66"
1009
+ integrity sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==
1010
+
987
1011
  concat-map@0.0.1:
988
1012
  version "0.0.1"
989
1013
  resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
@@ -1231,6 +1255,11 @@ fastq@^1.6.0:
1231
1255
  dependencies:
1232
1256
  reusify "^1.0.4"
1233
1257
 
1258
+ fdir@^6.4.3:
1259
+ version "6.4.3"
1260
+ resolved "https://registry.yarnpkg.com/fdir/-/fdir-6.4.3.tgz#011cdacf837eca9b811c89dbb902df714273db72"
1261
+ integrity sha512-PMXmW2y1hDDfTSRc9gaXIuCCRpuoz3Kaz8cUelp3smouvfT632ozg2vrT6lJsHKKOF59YLbOGfAWGUcKEfRMQw==
1262
+
1234
1263
  fflate@^0.8.2:
1235
1264
  version "0.8.2"
1236
1265
  resolved "https://registry.yarnpkg.com/fflate/-/fflate-0.8.2.tgz#fc8631f5347812ad6028bbe4a2308b2792aa1dea"
@@ -1404,6 +1433,21 @@ json-buffer@3.0.1:
1404
1433
  resolved "https://registry.yarnpkg.com/json-buffer/-/json-buffer-3.0.1.tgz#9338802a30d3b6605fbe0613e094008ca8c05a13"
1405
1434
  integrity sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==
1406
1435
 
1436
+ json-schema-to-typescript@^15.0.4:
1437
+ version "15.0.4"
1438
+ resolved "https://registry.yarnpkg.com/json-schema-to-typescript/-/json-schema-to-typescript-15.0.4.tgz#a530c7f17312503b262ae12233749732171840f3"
1439
+ integrity sha512-Su9oK8DR4xCmDsLlyvadkXzX6+GGXJpbhwoLtOGArAG61dvbW4YQmSEno2y66ahpIdmLMg6YUf/QHLgiwvkrHQ==
1440
+ dependencies:
1441
+ "@apidevtools/json-schema-ref-parser" "^11.5.5"
1442
+ "@types/json-schema" "^7.0.15"
1443
+ "@types/lodash" "^4.17.7"
1444
+ is-glob "^4.0.3"
1445
+ js-yaml "^4.1.0"
1446
+ lodash "^4.17.21"
1447
+ minimist "^1.2.8"
1448
+ prettier "^3.2.5"
1449
+ tinyglobby "^0.2.9"
1450
+
1407
1451
  json-schema-traverse@^0.4.1:
1408
1452
  version "0.4.1"
1409
1453
  resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz#69f6a87d9513ab8bb8fe63bdb0979c448e684660"
@@ -1433,6 +1477,20 @@ jsondiffpatch@^0.6.0:
1433
1477
  chalk "^5.3.0"
1434
1478
  diff-match-patch "^1.0.5"
1435
1479
 
1480
+ katex@^0.16.21:
1481
+ version "0.16.21"
1482
+ resolved "https://registry.yarnpkg.com/katex/-/katex-0.16.21.tgz#8f63c659e931b210139691f2cc7bb35166b792a3"
1483
+ integrity sha512-XvqR7FgOHtWupfMiigNzmh+MgUVmDGU2kXZm899ZkPfcuoPuFxyHmXsgATDpFZDAXCI8tvinaVcDo8PIIJSo4A==
1484
+ dependencies:
1485
+ commander "^8.3.0"
1486
+
1487
+ katex@^0.6.0:
1488
+ version "0.6.0"
1489
+ resolved "https://registry.yarnpkg.com/katex/-/katex-0.6.0.tgz#12418e09121c05c92041b6b3b9fb6bab213cb6f3"
1490
+ integrity sha512-rS4mY3SvHYg5LtQV6RBcK0if7ur6plyEukAOV+jGGPqFImuzu8fHL6M752iBmRGoUyF0bhZbAPoezehn7xYksA==
1491
+ dependencies:
1492
+ match-at "^0.1.0"
1493
+
1436
1494
  keyv@^4.5.4:
1437
1495
  version "4.5.4"
1438
1496
  resolved "https://registry.yarnpkg.com/keyv/-/keyv-4.5.4.tgz#a879a99e29452f942439f2a405e3af8b31d4de93"
@@ -1467,6 +1525,11 @@ lodash.merge@^4.6.2:
1467
1525
  resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a"
1468
1526
  integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==
1469
1527
 
1528
+ lodash@^4.17.21:
1529
+ version "4.17.21"
1530
+ resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
1531
+ integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
1532
+
1470
1533
  lru-cache@^5.1.1:
1471
1534
  version "5.1.1"
1472
1535
  resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-5.1.1.tgz#1da27e6710271947695daf6848e847f01d84b920"
@@ -1481,6 +1544,13 @@ make-dir@~3.1.0:
1481
1544
  dependencies:
1482
1545
  semver "^6.0.0"
1483
1546
 
1547
+ markdown-it-katex@^2.0.3:
1548
+ version "2.0.3"
1549
+ resolved "https://registry.yarnpkg.com/markdown-it-katex/-/markdown-it-katex-2.0.3.tgz#d7b86a1aea0b9d6496fab4e7919a18fdef589c39"
1550
+ integrity sha512-nUkkMtRWeg7OpdflamflE/Ho/pWl64Lk9wNBKOmaj33XkQdumhXAIYhI0WO03GeiycPCsxbmX536V5NEXpC3Ng==
1551
+ dependencies:
1552
+ katex "^0.6.0"
1553
+
1484
1554
  markdown-it@^14.1.0:
1485
1555
  version "14.1.0"
1486
1556
  resolved "https://registry.yarnpkg.com/markdown-it/-/markdown-it-14.1.0.tgz#3c3c5992883c633db4714ccb4d7b5935d98b7d45"
@@ -1493,6 +1563,11 @@ markdown-it@^14.1.0:
1493
1563
  punycode.js "^2.3.1"
1494
1564
  uc.micro "^2.1.0"
1495
1565
 
1566
+ match-at@^0.1.0:
1567
+ version "0.1.1"
1568
+ resolved "https://registry.yarnpkg.com/match-at/-/match-at-0.1.1.tgz#25d040d291777704d5e6556bbb79230ec2de0540"
1569
+ integrity sha512-h4Yd392z9mST+dzc+yjuybOGFNOZjmXIPKWjxBd1Bb23r4SmDOsk2NYCU2BMUBGbSpZqwVsZYNq26QS3xfaT3Q==
1570
+
1496
1571
  mdurl@^2.0.0:
1497
1572
  version "2.0.0"
1498
1573
  resolved "https://registry.yarnpkg.com/mdurl/-/mdurl-2.0.0.tgz#80676ec0433025dd3e17ee983d0fe8de5a2237e0"
@@ -1537,6 +1612,11 @@ minimatch@~3.0.4:
1537
1612
  dependencies:
1538
1613
  brace-expansion "^1.1.7"
1539
1614
 
1615
+ minimist@^1.2.8:
1616
+ version "1.2.8"
1617
+ resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.8.tgz#c1a464e7693302e082a075cee0c057741ac4772c"
1618
+ integrity sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==
1619
+
1540
1620
  ms@^2.1.3:
1541
1621
  version "2.1.3"
1542
1622
  resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2"
@@ -1615,6 +1695,11 @@ picomatch@^2.3.1:
1615
1695
  resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42"
1616
1696
  integrity sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==
1617
1697
 
1698
+ picomatch@^4.0.2:
1699
+ version "4.0.2"
1700
+ resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-4.0.2.tgz#77c742931e8f3b8820946c76cd0c1f13730d1dab"
1701
+ integrity sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==
1702
+
1618
1703
  postcss-url@^10.1.3:
1619
1704
  version "10.1.3"
1620
1705
  resolved "https://registry.yarnpkg.com/postcss-url/-/postcss-url-10.1.3.tgz#54120cc910309e2475ec05c2cfa8f8a2deafdf1e"
@@ -1639,7 +1724,7 @@ prelude-ls@^1.2.1:
1639
1724
  resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396"
1640
1725
  integrity sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==
1641
1726
 
1642
- prettier@^3.3.3:
1727
+ prettier@^3.2.5, prettier@^3.3.3:
1643
1728
  version "3.5.3"
1644
1729
  resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.5.3.tgz#4fc2ce0d657e7a02e602549f053b239cb7dfe1b5"
1645
1730
  integrity sha512-QQtaxnoDJeAkDvDKWCLiwIXkTgRhwYDEQCghU9Z6q03iyek/rxRh/2lC3HB7P8sWT2xC/y5JDctPLBIGzHKbhw==
@@ -1814,6 +1899,14 @@ tiny-emitter@^2.0.0:
1814
1899
  resolved "https://registry.yarnpkg.com/tiny-emitter/-/tiny-emitter-2.1.0.tgz#1d1a56edfc51c43e863cbb5382a72330e3555423"
1815
1900
  integrity sha512-NB6Dk1A9xgQPMoGqC5CVXn123gWyte215ONT5Pp5a0yt4nlEoO1ZWeCwpncaekPHXO60i47ihFnZPiRPjRMq4Q==
1816
1901
 
1902
+ tinyglobby@^0.2.9:
1903
+ version "0.2.12"
1904
+ resolved "https://registry.yarnpkg.com/tinyglobby/-/tinyglobby-0.2.12.tgz#ac941a42e0c5773bd0b5d08f32de82e74a1a61b5"
1905
+ integrity sha512-qkf4trmKSIiMTs/E63cxH+ojC2unam7rJ0WrauAzpT3ECNTxGRMlaXxVbfxMUC/w0LaYk6jQ4y/nGR9uBO3tww==
1906
+ dependencies:
1907
+ fdir "^6.4.3"
1908
+ picomatch "^4.0.2"
1909
+
1817
1910
  to-regex-range@^5.0.1:
1818
1911
  version "5.0.1"
1819
1912
  resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4"
@@ -0,0 +1,36 @@
1
+ from ._agent import Agent, AgentState, agent, agent_with
2
+ from ._as_solver import as_solver
3
+ from ._as_tool import as_tool
4
+ from ._bridge.bridge import bridge
5
+ from ._filter import MessageFilter, last_message, remove_tools
6
+ from ._handoff import handoff
7
+ from ._human.agent import human_cli
8
+ from ._react import react
9
+ from ._run import run
10
+ from ._types import (
11
+ AgentAttempts,
12
+ AgentContinue,
13
+ AgentPrompt,
14
+ AgentSubmit,
15
+ )
16
+
17
+ __all__ = [
18
+ "react",
19
+ "bridge",
20
+ "human_cli",
21
+ "run",
22
+ "handoff",
23
+ "as_tool",
24
+ "as_solver",
25
+ "last_message",
26
+ "remove_tools",
27
+ "MessageFilter",
28
+ "Agent",
29
+ "AgentState",
30
+ "agent",
31
+ "agent_with",
32
+ "AgentPrompt",
33
+ "AgentAttempts",
34
+ "AgentContinue",
35
+ "AgentSubmit",
36
+ ]
@@ -0,0 +1,268 @@
1
+ from copy import copy, deepcopy
2
+ from functools import wraps
3
+ from typing import (
4
+ Any,
5
+ Callable,
6
+ ParamSpec,
7
+ Protocol,
8
+ TypeGuard,
9
+ cast,
10
+ overload,
11
+ runtime_checkable,
12
+ )
13
+
14
+ from inspect_ai._util.registry import (
15
+ RegistryInfo,
16
+ is_registry_object,
17
+ registry_add,
18
+ registry_info,
19
+ registry_name,
20
+ registry_tag,
21
+ set_registry_info,
22
+ )
23
+ from inspect_ai.model._chat_message import (
24
+ ChatMessage,
25
+ ChatMessageAssistant,
26
+ )
27
+ from inspect_ai.model._model_output import ChatCompletionChoice, ModelOutput
28
+
29
+
30
+ class AgentState:
31
+ """Agent state."""
32
+
33
+ def __init__(self, *, messages: list[ChatMessage]) -> None:
34
+ self._messages = messages
35
+ self._output: ModelOutput | None = None
36
+
37
+ @property
38
+ def messages(self) -> list[ChatMessage]:
39
+ """Conversation history."""
40
+ return self._messages
41
+
42
+ @messages.setter
43
+ def messages(self, messages: list[ChatMessage]) -> None:
44
+ """Set the conversation history."""
45
+ self._messages = messages
46
+
47
+ @property
48
+ def output(self) -> ModelOutput:
49
+ """Model output."""
50
+ # if there is no output yet then synthesize it from the last assistant message
51
+ if self._output is None:
52
+ # look for the last assistant message
53
+ for message in reversed(self.messages):
54
+ if isinstance(message, ChatMessageAssistant):
55
+ self._output = ModelOutput(
56
+ model=message.model or "",
57
+ choices=[
58
+ ChatCompletionChoice(
59
+ message=message.model_copy(),
60
+ stop_reason="stop",
61
+ )
62
+ ],
63
+ )
64
+
65
+ # no assistant message, so generate an empty model output
66
+ if self._output is None:
67
+ self._output = ModelOutput()
68
+
69
+ return self._output
70
+
71
+ @output.setter
72
+ def output(self, output: ModelOutput) -> None:
73
+ """Set the model output."""
74
+ self._output = output
75
+
76
+ def __copy__(self) -> "AgentState":
77
+ state = AgentState(messages=copy(self.messages))
78
+ state.output = self.output.model_copy()
79
+ return state
80
+
81
+ def __deepcopy__(self, memo: dict[int, Any]) -> "AgentState":
82
+ state = AgentState(messages=deepcopy(self.messages, memo))
83
+ state.output = self.output.model_copy(deep=True)
84
+ return state
85
+
86
+
87
+ @runtime_checkable
88
+ class Agent(Protocol):
89
+ async def __call__(
90
+ self,
91
+ state: AgentState,
92
+ *args: Any,
93
+ **kwargs: Any,
94
+ ) -> AgentState:
95
+ """Agents perform tasks and participate in conversations.
96
+
97
+ Agents are similar to tools however they are participants
98
+ in conversation history and can optionally append messages
99
+ and model output to the current conversation state.
100
+
101
+ You can give the model a tool that enables handoff to
102
+ your agent using the `handoff()` function.
103
+
104
+ You can create a simple tool (that receives a string as
105
+ input) from an agent using `as_tool()`.
106
+
107
+ Args:
108
+ state: Agent state (conversation history and last model output)
109
+ *args: Arguments for the agent.
110
+ **kwargs: Keyword arguments for the agent.
111
+
112
+ Returns:
113
+ AgentState: Updated agent state.
114
+ """
115
+ ...
116
+
117
+
118
+ P = ParamSpec("P")
119
+
120
+
121
+ @overload
122
+ def agent(func: Callable[P, Agent]) -> Callable[P, Agent]: ...
123
+
124
+
125
+ @overload
126
+ def agent() -> Callable[[Callable[P, Agent]], Callable[P, Agent]]: ...
127
+
128
+
129
+ @overload
130
+ def agent(
131
+ *,
132
+ name: str | None = None,
133
+ description: str | None = None,
134
+ ) -> Callable[[Callable[P, Agent]], Callable[P, Agent]]: ...
135
+
136
+
137
+ def agent(
138
+ func: Callable[P, Agent] | None = None,
139
+ *,
140
+ name: str | None = None,
141
+ description: str | None = None,
142
+ ) -> Callable[P, Agent] | Callable[[Callable[P, Agent]], Callable[P, Agent]]:
143
+ r"""Decorator for registering agents.
144
+
145
+ Args:
146
+ func: Agent function
147
+ name: Optional name for agent. If the decorator has no name
148
+ argument then the name of the agent creation function
149
+ will be used as the name of the agent.
150
+ description: Description for the agent when used as
151
+ an ordinary tool or handoff tool.
152
+
153
+ Returns:
154
+ Agent with registry attributes.
155
+ """
156
+
157
+ def create_agent_wrapper(agent_type: Callable[P, Agent]) -> Callable[P, Agent]:
158
+ # determine the name (explicit or implicit from object)
159
+ agent_name = registry_name(
160
+ agent_type, name if name else getattr(agent_type, "__name__")
161
+ )
162
+
163
+ # wrap instantiations of agent so they carry registry info and metrics
164
+ @wraps(agent_type)
165
+ def agent_wrapper(*args: P.args, **kwargs: P.kwargs) -> Agent:
166
+ # create agent
167
+ agent = agent_type(*args, **kwargs)
168
+
169
+ # this might already have registry info, if so capture that
170
+ # and use it as default
171
+ if is_registry_object(agent):
172
+ info = registry_info(agent)
173
+ registry_name = info.name
174
+ registry_description = info.metadata.get(AGENT_DESCRIPTION, None)
175
+ else:
176
+ registry_name = None
177
+ registry_description = None
178
+
179
+ registry_tag(
180
+ agent_type,
181
+ agent,
182
+ RegistryInfo(
183
+ type="agent",
184
+ name=registry_name or agent_name,
185
+ metadata={AGENT_DESCRIPTION: registry_description or description},
186
+ ),
187
+ *args,
188
+ **kwargs,
189
+ )
190
+ return agent
191
+
192
+ # register
193
+ return agent_register(cast(Callable[P, Agent], agent_wrapper), agent_name)
194
+
195
+ if func is not None:
196
+ return create_agent_wrapper(func)
197
+ else:
198
+ return create_agent_wrapper
199
+
200
+
201
+ def agent_with(
202
+ agent: Agent,
203
+ *,
204
+ name: str | None = None,
205
+ description: str | None = None,
206
+ ) -> Agent:
207
+ """Agent with modifications to name and/or description
208
+
209
+ This function modifies the passed agent in place and
210
+ returns it. If you want to create multiple variations
211
+ of a single agent using `agent_with()` you should create
212
+ the underlying agent multiple times.
213
+
214
+ Args:
215
+ agent: Agent instance to modify.
216
+ name: Agent name (optional).
217
+ description: Agent description (optional).
218
+
219
+ Returns:
220
+ The passed agent with the requested modifications.
221
+ """
222
+ # resolve name and description
223
+ if is_registry_object(agent):
224
+ info = registry_info(agent)
225
+ name = name or info.name
226
+ description = description or info.metadata.get(AGENT_DESCRIPTION, None)
227
+
228
+ # if the name is null then raise
229
+ if name is None:
230
+ raise ValueError("You must provide a name to agent_with")
231
+
232
+ # now set registry info
233
+ set_registry_info(
234
+ agent,
235
+ RegistryInfo(
236
+ type="agent",
237
+ name=name,
238
+ metadata={AGENT_DESCRIPTION: description}
239
+ if description is not None
240
+ else {},
241
+ ),
242
+ )
243
+
244
+ return agent
245
+
246
+
247
+ def agent_register(agent: Callable[P, Agent], name: str) -> Callable[P, Agent]:
248
+ r"""Register a function or class as an agent.
249
+
250
+ Args:
251
+ agent: Agent function or a class derived from Agent.
252
+ name (str): Name of agent (Optional, defaults to object name)
253
+
254
+ Returns:
255
+ Agent with registry attributes.
256
+ """
257
+ registry_add(
258
+ agent,
259
+ RegistryInfo(type="agent", name=name),
260
+ )
261
+ return agent
262
+
263
+
264
+ def is_agent(obj: Any) -> TypeGuard[Agent]:
265
+ return is_registry_object(obj, type="agent")
266
+
267
+
268
+ AGENT_DESCRIPTION = "description"