inspect-ai 0.3.82__py3-none-any.whl → 0.3.84__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_display/textual/app.py +14 -3
  3. inspect_ai/_display/textual/display.py +4 -0
  4. inspect_ai/_display/textual/widgets/samples.py +9 -3
  5. inspect_ai/_display/textual/widgets/task_detail.py +3 -4
  6. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  7. inspect_ai/_display/textual/widgets/vscode.py +48 -0
  8. inspect_ai/_eval/eval.py +36 -24
  9. inspect_ai/_eval/evalset.py +17 -18
  10. inspect_ai/_eval/loader.py +34 -11
  11. inspect_ai/_eval/run.py +8 -13
  12. inspect_ai/_eval/score.py +13 -3
  13. inspect_ai/_eval/task/generate.py +8 -9
  14. inspect_ai/_eval/task/log.py +2 -0
  15. inspect_ai/_eval/task/task.py +23 -9
  16. inspect_ai/_util/file.py +13 -0
  17. inspect_ai/_util/json.py +2 -1
  18. inspect_ai/_util/registry.py +1 -0
  19. inspect_ai/_util/vscode.py +37 -0
  20. inspect_ai/_view/www/App.css +6 -0
  21. inspect_ai/_view/www/dist/assets/index.css +304 -128
  22. inspect_ai/_view/www/dist/assets/index.js +47495 -27519
  23. inspect_ai/_view/www/log-schema.json +124 -31
  24. inspect_ai/_view/www/package.json +3 -0
  25. inspect_ai/_view/www/src/App.tsx +12 -0
  26. inspect_ai/_view/www/src/appearance/icons.ts +1 -0
  27. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  28. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  29. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  30. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
  31. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
  32. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  33. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  34. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  35. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  36. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  37. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
  38. inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
  39. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
  40. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
  41. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
  42. inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
  43. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
  44. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
  45. inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
  46. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
  47. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
  48. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  49. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  50. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  51. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
  52. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
  53. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
  54. inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
  55. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
  56. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
  57. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  58. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  59. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  60. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  61. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
  62. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
  63. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
  64. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
  65. inspect_ai/_view/www/src/state/hooks.ts +5 -3
  66. inspect_ai/_view/www/src/state/logPolling.ts +5 -1
  67. inspect_ai/_view/www/src/state/logSlice.ts +10 -0
  68. inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
  69. inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
  70. inspect_ai/_view/www/src/types/log.d.ts +34 -26
  71. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  72. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  73. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
  74. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
  75. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
  76. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  77. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  78. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
  79. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  80. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
  81. inspect_ai/_view/www/yarn.lock +94 -1
  82. inspect_ai/agent/__init__.py +36 -0
  83. inspect_ai/agent/_agent.py +268 -0
  84. inspect_ai/agent/_as_solver.py +72 -0
  85. inspect_ai/agent/_as_tool.py +122 -0
  86. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  87. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  88. inspect_ai/agent/_filter.py +46 -0
  89. inspect_ai/agent/_handoff.py +93 -0
  90. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  91. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  92. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  93. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  94. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  95. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  96. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  97. inspect_ai/agent/_react.py +241 -0
  98. inspect_ai/agent/_run.py +36 -0
  99. inspect_ai/agent/_types.py +81 -0
  100. inspect_ai/log/_log.py +11 -2
  101. inspect_ai/log/_transcript.py +13 -9
  102. inspect_ai/model/__init__.py +7 -1
  103. inspect_ai/model/_call_tools.py +256 -52
  104. inspect_ai/model/_chat_message.py +7 -4
  105. inspect_ai/model/_conversation.py +13 -62
  106. inspect_ai/model/_display.py +85 -0
  107. inspect_ai/model/_model.py +113 -14
  108. inspect_ai/model/_model_output.py +14 -9
  109. inspect_ai/model/_openai.py +16 -4
  110. inspect_ai/model/_openai_computer_use.py +162 -0
  111. inspect_ai/model/_openai_responses.py +319 -165
  112. inspect_ai/model/_providers/anthropic.py +20 -21
  113. inspect_ai/model/_providers/azureai.py +24 -13
  114. inspect_ai/model/_providers/bedrock.py +1 -7
  115. inspect_ai/model/_providers/cloudflare.py +3 -3
  116. inspect_ai/model/_providers/goodfire.py +2 -6
  117. inspect_ai/model/_providers/google.py +11 -10
  118. inspect_ai/model/_providers/groq.py +6 -3
  119. inspect_ai/model/_providers/hf.py +7 -3
  120. inspect_ai/model/_providers/mistral.py +7 -10
  121. inspect_ai/model/_providers/openai.py +47 -17
  122. inspect_ai/model/_providers/openai_o1.py +11 -4
  123. inspect_ai/model/_providers/openai_responses.py +12 -14
  124. inspect_ai/model/_providers/providers.py +2 -2
  125. inspect_ai/model/_providers/together.py +12 -2
  126. inspect_ai/model/_providers/util/chatapi.py +7 -2
  127. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  128. inspect_ai/model/_providers/util/llama31.py +4 -2
  129. inspect_ai/model/_providers/vertex.py +11 -9
  130. inspect_ai/model/_providers/vllm.py +4 -4
  131. inspect_ai/scorer/__init__.py +2 -0
  132. inspect_ai/scorer/_metrics/__init__.py +2 -0
  133. inspect_ai/scorer/_metrics/grouped.py +84 -0
  134. inspect_ai/scorer/_score.py +26 -6
  135. inspect_ai/solver/__init__.py +2 -2
  136. inspect_ai/solver/_basic_agent.py +22 -9
  137. inspect_ai/solver/_bridge.py +31 -0
  138. inspect_ai/solver/_chain.py +20 -12
  139. inspect_ai/solver/_fork.py +5 -1
  140. inspect_ai/solver/_human_agent.py +52 -0
  141. inspect_ai/solver/_prompt.py +3 -1
  142. inspect_ai/solver/_run.py +59 -0
  143. inspect_ai/solver/_solver.py +14 -4
  144. inspect_ai/solver/_task_state.py +5 -3
  145. inspect_ai/tool/_tool_call.py +15 -8
  146. inspect_ai/tool/_tool_def.py +17 -12
  147. inspect_ai/tool/_tool_support_helpers.py +2 -2
  148. inspect_ai/tool/_tool_with.py +14 -11
  149. inspect_ai/tool/_tools/_bash_session.py +11 -2
  150. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  151. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  152. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  153. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  154. inspect_ai/tool/_tools/_think.py +1 -1
  155. inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
  156. inspect_ai/util/__init__.py +2 -0
  157. inspect_ai/util/_anyio.py +27 -0
  158. inspect_ai/util/_sandbox/__init__.py +2 -1
  159. inspect_ai/util/_sandbox/context.py +32 -7
  160. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  161. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  162. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  163. inspect_ai/util/_store_model.py +30 -7
  164. inspect_ai/util/_subprocess.py +13 -3
  165. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/METADATA +1 -1
  166. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/RECORD +179 -153
  167. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
  168. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  169. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  170. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  171. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  172. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  173. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  174. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  175. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  176. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  177. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/WHEEL +0 -0
  178. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/entry_points.txt +0 -0
  179. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/licenses/LICENSE +0 -0
  180. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,38 @@
1
+ .container {
2
+ display: grid;
3
+ grid-template-columns:
4
+ minmax(auto, 1fr) minmax(auto, 1fr) minmax(auto, 1fr)
5
+ 2fr;
6
+ column-gap: 0.75em;
7
+ }
8
+
9
+ .container .cell {
10
+ margin-bottom: 0.5em;
11
+ }
12
+
13
+ .fullWidth {
14
+ grid-column: 1 / -1;
15
+ }
16
+
17
+ .heading {
18
+ font-weight: 600;
19
+ }
20
+
21
+ .padded {
22
+ padding-bottom: 3em;
23
+ }
24
+
25
+ .separator {
26
+ height: 1px;
27
+ background-color: var(--bs-light-border-subtle);
28
+ }
29
+
30
+ .separatorPadded {
31
+ margin-top: 0.5em;
32
+ margin-bottom: 0.5em;
33
+ }
34
+
35
+ .headerSep {
36
+ margin-top: 0.1em;
37
+ margin-bottom: 0.2em;
38
+ }
@@ -0,0 +1,118 @@
1
+ import clsx from "clsx";
2
+ import { FC } from "react";
3
+ import { SampleSummary } from "../../api/types";
4
+ import { EmptyPanel } from "../../components/EmptyPanel";
5
+ import { MetaDataGrid } from "../../metadata/MetaDataGrid";
6
+ import { useEvalDescriptor } from "../../state/hooks";
7
+ import { EvalSample } from "../../types/log";
8
+ import { SampleScores } from "./SampleScores";
9
+ import styles from "./SampleScoresGrid.module.css";
10
+
11
+ interface SampleScoresGridProps {
12
+ evalSample: EvalSample;
13
+ className?: string | string[];
14
+ }
15
+
16
+ export const SampleScoresGrid: FC<SampleScoresGridProps> = ({
17
+ evalSample,
18
+ className,
19
+ }) => {
20
+ const evalDescriptor = useEvalDescriptor();
21
+ if (!evalDescriptor) {
22
+ return <EmptyPanel>No Sample Selected</EmptyPanel>;
23
+ }
24
+ return (
25
+ <div className={clsx(className, styles.container)}>
26
+ <div
27
+ className={clsx(
28
+ "text-size-smaller",
29
+ "text-style-label",
30
+ "text-style-secondary",
31
+ )}
32
+ >
33
+ Scorer
34
+ </div>
35
+ <div
36
+ className={clsx(
37
+ "text-size-smaller",
38
+ "text-style-label",
39
+ "text-style-secondary",
40
+ )}
41
+ >
42
+ Answer
43
+ </div>
44
+ <div
45
+ className={clsx(
46
+ "text-size-smaller",
47
+ "text-style-label",
48
+ "text-style-secondary",
49
+ )}
50
+ >
51
+ Score
52
+ </div>
53
+ <div
54
+ className={clsx(
55
+ "text-size-smaller",
56
+ "text-style-label",
57
+ "text-style-secondary",
58
+ )}
59
+ >
60
+ Explanation
61
+ </div>
62
+ <div
63
+ className={clsx(styles.separator, styles.fullWidth, styles.headerSep)}
64
+ ></div>
65
+
66
+ {Object.keys(evalSample.scores || {}).map((scorer) => {
67
+ if (!evalSample.scores) {
68
+ return undefined;
69
+ }
70
+ const scoreData = evalSample.scores[scorer];
71
+ const explanation = scoreData.explanation || "(No Explanation)";
72
+ const answer = scoreData.answer;
73
+ let metadata = scoreData.metadata || {};
74
+
75
+ return (
76
+ <>
77
+ <div className={clsx("text-size-base", styles.cell)}>{scorer}</div>
78
+ <div className={clsx(styles.cell, "text-size-base")}>{answer}</div>
79
+ <div className={clsx(styles.cell, "text-size-base")}>
80
+ <SampleScores
81
+ sample={evalSample as any as SampleSummary}
82
+ scorer={scorer}
83
+ />
84
+ </div>
85
+ <div className={clsx("text-size-base", styles.cell)}>
86
+ {explanation}
87
+ </div>
88
+
89
+ {Object.keys(metadata).length > 0 ? (
90
+ <>
91
+ <div
92
+ className={clsx(
93
+ "text-size-smaller",
94
+ "text-style-label",
95
+ "text-style-secondary",
96
+ styles.fullWidth,
97
+ )}
98
+ >
99
+ Metadata
100
+ </div>
101
+ <div className={clsx(styles.fullWidth)}>
102
+ <MetaDataGrid entries={metadata} />
103
+ </div>
104
+ <div
105
+ className={clsx(
106
+ styles.separator,
107
+ styles.separatorPadded,
108
+ styles.fullWidth,
109
+ )}
110
+ ></div>
111
+ </>
112
+ ) : undefined}
113
+ </>
114
+ );
115
+ })}
116
+ </div>
117
+ );
118
+ };
@@ -1,6 +1,7 @@
1
1
  .container {
2
2
  margin-top: 0.5em;
3
3
  padding-left: 0;
4
+ padding-right: 0;
4
5
  }
5
6
 
6
7
  .label {
@@ -12,7 +13,7 @@
12
13
  }
13
14
 
14
15
  .wordBreak {
15
- word-break: break-all;
16
+ word-break: break-word;
16
17
  }
17
18
 
18
19
  .scoreTable {
@@ -51,3 +52,11 @@
51
52
  .noTop {
52
53
  margin-top: 0 !important;
53
54
  }
55
+
56
+ .scoreCard {
57
+ padding-top: 0.5em;
58
+ }
59
+
60
+ .scores {
61
+ padding-top: 1em;
62
+ }
@@ -0,0 +1,78 @@
1
+ import clsx from "clsx";
2
+ import { Card, CardBody } from "../../components/Card";
3
+ import { MarkdownDiv } from "../../components/MarkdownDiv";
4
+ import { EvalSample } from "../../types/log";
5
+ import { inputString } from "../../utils/format";
6
+
7
+ import { FC } from "react";
8
+ import ExpandablePanel from "../../components/ExpandablePanel";
9
+ import { useEvalDescriptor } from "../../state/hooks";
10
+ import { SampleScoresGrid } from "./SampleScoresGrid";
11
+ import styles from "./SampleScoresView.module.css";
12
+
13
+ interface SampleScoresViewProps {
14
+ sample?: EvalSample;
15
+ className?: string | string[];
16
+ }
17
+
18
+ export const SampleScoresView: FC<SampleScoresViewProps> = ({
19
+ sample,
20
+ className,
21
+ }) => {
22
+ const evalDescriptor = useEvalDescriptor();
23
+ if (!evalDescriptor) {
24
+ return undefined;
25
+ }
26
+ if (!sample) {
27
+ return undefined;
28
+ }
29
+
30
+ const scoreInput = inputString(sample.input);
31
+ if (sample.choices && sample.choices.length > 0) {
32
+ scoreInput.push("");
33
+ scoreInput.push(
34
+ ...sample.choices.map((choice, index) => {
35
+ return `${String.fromCharCode(65 + index)}) ${choice}`;
36
+ }),
37
+ );
38
+ }
39
+
40
+ return (
41
+ <div
42
+ className={clsx(
43
+ "container-fluid",
44
+ className,
45
+ "font-size-base",
46
+ styles.container,
47
+ )}
48
+ >
49
+ <Card className={clsx(styles.scoreCard)}>
50
+ <CardBody>
51
+ <div
52
+ className={clsx(
53
+ "text-size-small",
54
+ "text-style-label",
55
+ "text-style-secondary",
56
+ )}
57
+ >
58
+ Input
59
+ </div>
60
+ <ExpandablePanel
61
+ lines={10}
62
+ id={`sample-score-${sample.id}-${sample.epoch}`}
63
+ collapse={true}
64
+ >
65
+ <MarkdownDiv
66
+ markdown={scoreInput.join("\n")}
67
+ className={clsx(styles.wordBreak, "text-size-base")}
68
+ />
69
+ </ExpandablePanel>
70
+ <SampleScoresGrid
71
+ evalSample={sample}
72
+ className={clsx(styles.scores)}
73
+ />
74
+ </CardBody>
75
+ </Card>
76
+ </div>
77
+ );
78
+ };
@@ -1,6 +1,6 @@
1
1
  import { FC } from "react";
2
2
  import { ApplicationIcons } from "../../appearance/icons";
3
- import { SampleLimitEvent, Type10 } from "../../types/log";
3
+ import { SampleLimitEvent, Type9 } from "../../types/log";
4
4
  import { EventPanel } from "./event/EventPanel";
5
5
 
6
6
  interface SampleLimitEventViewProps {
@@ -17,7 +17,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
17
17
  event,
18
18
  className,
19
19
  }) => {
20
- const resolve_title = (type: Type10) => {
20
+ const resolve_title = (type: Type9) => {
21
21
  switch (type) {
22
22
  case "custom":
23
23
  return "Custom Limit Exceeded";
@@ -34,7 +34,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
34
34
  }
35
35
  };
36
36
 
37
- const resolve_icon = (type: Type10) => {
37
+ const resolve_icon = (type: Type9) => {
38
38
  switch (type) {
39
39
  case "custom":
40
40
  return ApplicationIcons.limits.custom;
@@ -9,6 +9,7 @@ import { TranscriptView } from "./TranscriptView";
9
9
  import clsx from "clsx";
10
10
  import { FC, useMemo } from "react";
11
11
  import { PulsingDots } from "../../components/PulsingDots";
12
+ import { ChatView } from "../chat/ChatView";
12
13
  import { formatTiming, formatTitle } from "./event/utils";
13
14
  import styles from "./ToolEventView.module.css";
14
15
 
@@ -34,10 +35,19 @@ export const ToolEventView: FC<ToolEventViewProps> = ({
34
35
  [event.function, event.arguments],
35
36
  );
36
37
 
37
- // Find an approval if there is one
38
- const approvalEvent = event.events.find((e) => {
39
- return e.event === "approval";
40
- });
38
+ const { approvalEvent, lastModelEvent } = useMemo(() => {
39
+ // Find an approval if there is one
40
+ const approvalEvent = event.events.find((e) => {
41
+ return e.event === "approval";
42
+ });
43
+
44
+ // Find a model message to render, if there is one
45
+ const lastModelEvent = [...event.events].reverse().find((e) => {
46
+ return e.event === "model";
47
+ });
48
+
49
+ return { approvalEvent, lastModelEvent };
50
+ }, [event.events]);
41
51
 
42
52
  const title = `Tool: ${event.view?.title || event.function}`;
43
53
  return (
@@ -58,6 +68,16 @@ export const ToolEventView: FC<ToolEventViewProps> = ({
58
68
  mode="compact"
59
69
  view={event.view ? event.view : undefined}
60
70
  />
71
+
72
+ {lastModelEvent && lastModelEvent.event === "model" ? (
73
+ <ChatView
74
+ id={`${id}-toolcall-chatmessage`}
75
+ messages={lastModelEvent.output.choices.map((m) => m.message)}
76
+ numbered={false}
77
+ toolCallStyle="compact"
78
+ />
79
+ ) : undefined}
80
+
61
81
  {approvalEvent ? (
62
82
  <ApprovalEventView
63
83
  event={approvalEvent}
@@ -76,6 +96,7 @@ export const ToolEventView: FC<ToolEventViewProps> = ({
76
96
  <TranscriptView
77
97
  id={`${id}-subtask`}
78
98
  data-name="Transcript"
99
+ data-default={event.failed || event.agent ? true : null}
79
100
  events={event.events}
80
101
  depth={depth + 1}
81
102
  />
@@ -1,5 +1,11 @@
1
1
  import clsx from "clsx";
2
- import { FC, isValidElement, ReactNode, useCallback } from "react";
2
+ import {
3
+ FC,
4
+ isValidElement,
5
+ ReactElement,
6
+ ReactNode,
7
+ useCallback,
8
+ } from "react";
3
9
  import { ApplicationIcons } from "../../../appearance/icons";
4
10
  import { EventNavs } from "./EventNavs";
5
11
 
@@ -49,7 +55,11 @@ export const EventPanel: FC<EventPanelProps> = ({
49
55
  const filteredArrChildren = (
50
56
  Array.isArray(children) ? children : [children]
51
57
  ).filter((child) => !!child);
52
- const defaultPillId = pillId(0);
58
+
59
+ const defaultPill = filteredArrChildren.findIndex((node) => {
60
+ return hasDataDefault(node) && node.props["data-default"];
61
+ });
62
+ const defaultPillId = defaultPill !== -1 ? pillId(defaultPill) : pillId(0);
53
63
 
54
64
  const [selectedNav, setSelectedNav] = useProperty(id, "selectedNav", {
55
65
  defaultValue: defaultPillId,
@@ -186,3 +196,20 @@ export const EventPanel: FC<EventPanelProps> = ({
186
196
  );
187
197
  return card;
188
198
  };
199
+
200
+ // Typeguard for reading default value from pills
201
+ interface DataDefaultProps {
202
+ "data-default"?: boolean;
203
+ [key: string]: any;
204
+ }
205
+
206
+ function hasDataDefault(
207
+ node: ReactNode,
208
+ ): node is ReactElement<DataDefaultProps> {
209
+ return (
210
+ isValidElement(node) &&
211
+ node.props !== null &&
212
+ typeof node.props === "object" &&
213
+ "data-default" in node.props
214
+ );
215
+ }
@@ -216,7 +216,6 @@ const createMessageRenderer = (name: string, role: string): ChangeType => {
216
216
  return {
217
217
  type: name,
218
218
  match: (changes: JsonChange[]) => {
219
- console.log(changes);
220
219
  if (changes.length === 1) {
221
220
  const change = changes[0];
222
221
  if (change.op === "add" && change.path.match(/\/messages\/\d+/)) {
@@ -78,7 +78,9 @@ export const useScores = () => {
78
78
  return [];
79
79
  }
80
80
 
81
- return getAvailableScorers(selectedLogSummary, sampleSummaries) || [];
81
+ const result =
82
+ getAvailableScorers(selectedLogSummary, sampleSummaries) || [];
83
+ return result;
82
84
  }, [selectedLogSummary, sampleSummaries]);
83
85
  };
84
86
 
@@ -361,8 +363,8 @@ export const usePrismHighlight = (toolCallContent?: string) => {
361
363
  toolCallContent.length <= kPrismRenderMaxSize
362
364
  ) {
363
365
  requestAnimationFrame(() => {
364
- const codeBlocks = toolViewRef.current!.querySelectorAll("pre code");
365
- codeBlocks.forEach((block) => {
366
+ const codeBlocks = toolViewRef.current?.querySelectorAll("pre code");
367
+ codeBlocks?.forEach((block) => {
366
368
  if (block.className.includes("language-")) {
367
369
  block.classList.add("sourceCode");
368
370
  highlightElement(block as HTMLElement);
@@ -133,7 +133,11 @@ export function createLogPolling(
133
133
  log.debug(`Stop polling running samples: ${logFileName}`);
134
134
 
135
135
  // Clear pending summaries and refresh in one transaction
136
- if (loadedPendingSamples) {
136
+ if (
137
+ loadedPendingSamples ||
138
+ state.log.selectedLogSummary?.status === "started"
139
+ ) {
140
+ log.debug(`Refresh log: ${logFileName}`);
137
141
  await refreshLog(logFileName, true);
138
142
  }
139
143
 
@@ -45,6 +45,9 @@ export interface LogSlice {
45
45
 
46
46
  // Refresh the current log
47
47
  refreshLog: () => Promise<void>;
48
+
49
+ // Poll the currently selected log
50
+ pollLog: () => Promise<void>;
48
51
  };
49
52
  }
50
53
 
@@ -175,6 +178,13 @@ export const createLogSlice = (
175
178
  }
176
179
  },
177
180
 
181
+ pollLog: async () => {
182
+ const currentLog = get().log.loadedLog;
183
+ if (currentLog) {
184
+ logPolling.startPolling(currentLog);
185
+ }
186
+ },
187
+
178
188
  refreshLog: async () => {
179
189
  const state = get();
180
190
  const api = state.api;
@@ -123,7 +123,10 @@ export function createSamplePolling(
123
123
 
124
124
  // Also fetch a fresh sample and clear the runnning Events
125
125
  // (if there were ever running events)
126
- if (state.sample.runningEvents.length > 0) {
126
+ if (
127
+ state.sample.runningEvents.length > 0 ||
128
+ state.sample.sampleStatus === "streaming"
129
+ ) {
127
130
  try {
128
131
  log.debug(
129
132
  `LOADING COMPLETED SAMPLE AFTER FLUSH: ${summary.id}-${summary.epoch}`,
@@ -23,6 +23,11 @@ export interface SampleSlice {
23
23
  logFile: string,
24
24
  sampleSummary: SampleSummary,
25
25
  ) => Promise<void>;
26
+
27
+ pollSample: (
28
+ logFile: string,
29
+ sampleSummary: SampleSummary,
30
+ ) => Promise<void>;
26
31
  };
27
32
  }
28
33
 
@@ -68,6 +73,14 @@ export const createSampleSlice = (
68
73
  set((state) => {
69
74
  state.sample.sampleError = error;
70
75
  }),
76
+ pollSample: async (logFile: string, sampleSummary: SampleSummary) => {
77
+ // Poll running sample
78
+ const state = get();
79
+ if (state.log.loadedLog && state.sample.selectedSample) {
80
+ samplePolling.startPolling(logFile, sampleSummary);
81
+ state.sampleActions.setSampleStatus("streaming");
82
+ }
83
+ },
71
84
  loadSample: async (logFile: string, sampleSummary: SampleSummary) => {
72
85
  const sampleActions = get().sampleActions;
73
86