inspect-ai 0.3.82__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_display/textual/app.py +14 -3
  3. inspect_ai/_display/textual/display.py +4 -0
  4. inspect_ai/_display/textual/widgets/samples.py +9 -3
  5. inspect_ai/_display/textual/widgets/task_detail.py +3 -4
  6. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  7. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  8. inspect_ai/_eval/eval.py +36 -24
  9. inspect_ai/_eval/evalset.py +17 -18
  10. inspect_ai/_eval/loader.py +34 -11
  11. inspect_ai/_eval/run.py +8 -13
  12. inspect_ai/_eval/score.py +13 -3
  13. inspect_ai/_eval/task/generate.py +8 -9
  14. inspect_ai/_eval/task/log.py +2 -0
  15. inspect_ai/_eval/task/task.py +23 -9
  16. inspect_ai/_util/file.py +13 -0
  17. inspect_ai/_util/json.py +2 -1
  18. inspect_ai/_util/registry.py +1 -0
  19. inspect_ai/_util/vscode.py +37 -0
  20. inspect_ai/_view/www/App.css +6 -0
  21. inspect_ai/_view/www/dist/assets/index.css +304 -128
  22. inspect_ai/_view/www/dist/assets/index.js +47495 -27519
  23. inspect_ai/_view/www/log-schema.json +124 -31
  24. inspect_ai/_view/www/package.json +3 -0
  25. inspect_ai/_view/www/src/App.tsx +12 -0
  26. inspect_ai/_view/www/src/appearance/icons.ts +1 -0
  27. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  28. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  29. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  30. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
  31. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
  32. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  33. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  34. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  35. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  36. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  37. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
  38. inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
  39. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
  40. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
  41. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
  42. inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
  43. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
  44. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
  45. inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
  46. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
  47. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
  48. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  49. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  50. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  51. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
  52. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
  53. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
  54. inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
  55. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
  56. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
  57. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  58. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  59. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  60. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  61. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
  62. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
  63. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
  64. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
  65. inspect_ai/_view/www/src/state/hooks.ts +5 -3
  66. inspect_ai/_view/www/src/state/logPolling.ts +5 -1
  67. inspect_ai/_view/www/src/state/logSlice.ts +10 -0
  68. inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
  69. inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
  70. inspect_ai/_view/www/src/types/log.d.ts +34 -26
  71. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  72. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  73. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
  74. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
  75. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
  76. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  77. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  78. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
  79. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  80. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
  81. inspect_ai/_view/www/yarn.lock +94 -1
  82. inspect_ai/agent/__init__.py +36 -0
  83. inspect_ai/agent/_agent.py +268 -0
  84. inspect_ai/agent/_as_solver.py +72 -0
  85. inspect_ai/agent/_as_tool.py +122 -0
  86. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  87. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  88. inspect_ai/agent/_filter.py +46 -0
  89. inspect_ai/agent/_handoff.py +93 -0
  90. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  91. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  92. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  93. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  94. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  95. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  96. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  97. inspect_ai/agent/_react.py +241 -0
  98. inspect_ai/agent/_run.py +36 -0
  99. inspect_ai/agent/_types.py +81 -0
  100. inspect_ai/log/_log.py +11 -2
  101. inspect_ai/log/_transcript.py +13 -9
  102. inspect_ai/model/__init__.py +7 -1
  103. inspect_ai/model/_call_tools.py +256 -52
  104. inspect_ai/model/_chat_message.py +7 -4
  105. inspect_ai/model/_conversation.py +13 -62
  106. inspect_ai/model/_display.py +85 -0
  107. inspect_ai/model/_model.py +113 -14
  108. inspect_ai/model/_model_output.py +14 -9
  109. inspect_ai/model/_openai.py +16 -4
  110. inspect_ai/model/_openai_computer_use.py +162 -0
  111. inspect_ai/model/_openai_responses.py +319 -165
  112. inspect_ai/model/_providers/anthropic.py +20 -21
  113. inspect_ai/model/_providers/azureai.py +24 -13
  114. inspect_ai/model/_providers/bedrock.py +1 -7
  115. inspect_ai/model/_providers/cloudflare.py +3 -3
  116. inspect_ai/model/_providers/goodfire.py +2 -6
  117. inspect_ai/model/_providers/google.py +11 -10
  118. inspect_ai/model/_providers/groq.py +6 -3
  119. inspect_ai/model/_providers/hf.py +7 -3
  120. inspect_ai/model/_providers/mistral.py +7 -10
  121. inspect_ai/model/_providers/openai.py +47 -17
  122. inspect_ai/model/_providers/openai_o1.py +11 -4
  123. inspect_ai/model/_providers/openai_responses.py +12 -14
  124. inspect_ai/model/_providers/providers.py +2 -2
  125. inspect_ai/model/_providers/together.py +12 -2
  126. inspect_ai/model/_providers/util/chatapi.py +7 -2
  127. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  128. inspect_ai/model/_providers/util/llama31.py +4 -2
  129. inspect_ai/model/_providers/vertex.py +11 -9
  130. inspect_ai/model/_providers/vllm.py +4 -4
  131. inspect_ai/scorer/__init__.py +2 -0
  132. inspect_ai/scorer/_metrics/__init__.py +2 -0
  133. inspect_ai/scorer/_metrics/grouped.py +84 -0
  134. inspect_ai/scorer/_score.py +26 -6
  135. inspect_ai/solver/__init__.py +2 -2
  136. inspect_ai/solver/_basic_agent.py +22 -9
  137. inspect_ai/solver/_bridge.py +31 -0
  138. inspect_ai/solver/_chain.py +20 -12
  139. inspect_ai/solver/_fork.py +5 -1
  140. inspect_ai/solver/_human_agent.py +52 -0
  141. inspect_ai/solver/_prompt.py +3 -1
  142. inspect_ai/solver/_run.py +59 -0
  143. inspect_ai/solver/_solver.py +14 -4
  144. inspect_ai/solver/_task_state.py +5 -3
  145. inspect_ai/tool/_tool_call.py +15 -8
  146. inspect_ai/tool/_tool_def.py +17 -12
  147. inspect_ai/tool/_tool_support_helpers.py +2 -2
  148. inspect_ai/tool/_tool_with.py +14 -11
  149. inspect_ai/tool/_tools/_bash_session.py +11 -2
  150. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  151. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  152. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  153. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  154. inspect_ai/tool/_tools/_think.py +1 -1
  155. inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
  156. inspect_ai/util/__init__.py +2 -0
  157. inspect_ai/util/_anyio.py +27 -0
  158. inspect_ai/util/_sandbox/__init__.py +2 -1
  159. inspect_ai/util/_sandbox/context.py +32 -7
  160. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  161. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  162. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  163. inspect_ai/util/_store_model.py +30 -7
  164. inspect_ai/util/_subprocess.py +13 -3
  165. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  166. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +179 -153
  167. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
  168. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  169. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  170. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  171. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  172. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  173. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  174. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  175. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  176. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  177. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  178. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  179. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  180. {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,77 @@
1
+ import clsx from "clsx";
2
+ import { FC, ReactNode } from "react";
3
+ import styles from "./Modal.module.css";
4
+
5
+ interface ModalProps {
6
+ id: string;
7
+ showing: boolean;
8
+ setShowing: (showing: boolean) => void;
9
+ title?: string;
10
+ children: ReactNode;
11
+ className?: string | string[];
12
+ }
13
+
14
+ export const Modal: FC<ModalProps> = ({
15
+ id,
16
+ title,
17
+ showing,
18
+ setShowing,
19
+ children,
20
+ className,
21
+ }) => {
22
+ return (
23
+ <>
24
+ {showing && (
25
+ <div className={styles.backdrop} onClick={() => setShowing(false)} />
26
+ )}
27
+ <div
28
+ id={id}
29
+ className={clsx("modal", "fade", showing ? "show" : "", className)}
30
+ tabIndex={-1}
31
+ style={{ display: showing ? "block" : "none" }}
32
+ >
33
+ <div className={clsx("modal-dialog", styles.modal)}>
34
+ <div className="modal-content">
35
+ <div className={clsx("modal-header", styles.header)}>
36
+ <div
37
+ className={clsx(
38
+ "modal-title",
39
+ "text-size-base",
40
+ styles.modalTitle,
41
+ )}
42
+ >
43
+ {title}
44
+ </div>
45
+ <button
46
+ type="button"
47
+ className={clsx(
48
+ "btn-close",
49
+ "text-size-smaller",
50
+ styles.btnClose,
51
+ )}
52
+ data-bs-dismiss="modal"
53
+ aria-label="Close"
54
+ onClick={() => {
55
+ setShowing(!showing);
56
+ }}
57
+ ></button>
58
+ </div>
59
+ <div className="modal-body">{children}</div>
60
+ <div className="modal-footer">
61
+ <button
62
+ type="button"
63
+ className="btn btn-secondary"
64
+ data-bs-dismiss="modal"
65
+ onClick={() => {
66
+ setShowing(!showing);
67
+ }}
68
+ >
69
+ Close
70
+ </button>
71
+ </div>
72
+ </div>
73
+ </div>
74
+ </div>
75
+ </>
76
+ );
77
+ };
@@ -7,3 +7,7 @@
7
7
  margin-top: 0.2rem;
8
8
  margin-bottom: 0.3rem;
9
9
  }
10
+
11
+ .metadata {
12
+ margin-bottom: 0.75em;
13
+ }
@@ -1,7 +1,7 @@
1
1
  import clsx from "clsx";
2
2
  import { FC } from "react";
3
- import { MetaDataView } from "../metadata/MetaDataView";
4
- import styles from "./DatasetDetailView.module.css";
3
+ import { MetaDataGrid } from "../metadata/MetaDataGrid";
4
+ import styles from "./DetailStep.module.css";
5
5
 
6
6
  interface DetailStepProps {
7
7
  icon?: string;
@@ -22,7 +22,10 @@ export const DetailStep: FC<DetailStepProps> = ({
22
22
  {iconHtml} {name}
23
23
  <div className={styles.container}>
24
24
  {params ? (
25
- <MetaDataView entries={params} className={"text-size-small"} />
25
+ <MetaDataGrid
26
+ entries={params}
27
+ className={clsx("text-size-small", styles.metadata)}
28
+ />
26
29
  ) : (
27
30
  ""
28
31
  )}
@@ -1,6 +1,7 @@
1
1
  .container {
2
2
  display: flex;
3
- flex-direction: columns;
3
+ flex-direction: row;
4
+ flex-wrap: wrap;
4
5
  }
5
6
 
6
7
  .item {
@@ -25,8 +25,15 @@ export const InlineSampleDisplay: FC<InlineSampleDisplayProps> = ({
25
25
  // Sample hooks
26
26
  const sampleData = useSampleData();
27
27
  const loadSample = useStore((state) => state.sampleActions.loadSample);
28
+ const pollSample = useStore((state) => state.sampleActions.pollSample);
28
29
  const logSelection = useLogSelection();
29
30
 
31
+ useEffect(() => {
32
+ if (sampleData.running && logSelection.logFile && logSelection.sample) {
33
+ pollSample(logSelection.logFile, logSelection.sample);
34
+ }
35
+ }, []);
36
+
30
37
  // Sample Loading
31
38
  const prevCompleted = usePrevious(
32
39
  logSelection.sample?.completed !== undefined
@@ -39,8 +39,15 @@ export const SampleDialog: FC<SampleDialogProps> = ({
39
39
  // Sample hooks
40
40
  const sampleData = useSampleData();
41
41
  const loadSample = useStore((state) => state.sampleActions.loadSample);
42
+ const pollSample = useStore((state) => state.sampleActions.pollSample);
42
43
  const logSelection = useLogSelection();
43
44
 
45
+ useEffect(() => {
46
+ if (sampleData.running && logSelection.logFile && logSelection.sample) {
47
+ pollSample(logSelection.logFile, logSelection.sample);
48
+ }
49
+ }, []);
50
+
44
51
  // Load sample
45
52
  const prevCompleted = usePrevious(
46
53
  logSelection.sample?.completed !== undefined
@@ -7,7 +7,6 @@ import { isVscode } from "../utils/vscode";
7
7
  import { ApplicationIcons } from "../appearance/icons";
8
8
  import { ANSIDisplay } from "../components/AnsiDisplay";
9
9
  import { ToolButton } from "../components/ToolButton";
10
- import { SampleScoreView } from "./scores/SampleScoreView";
11
10
 
12
11
  import clsx from "clsx";
13
12
  import {
@@ -40,6 +39,7 @@ import { ChatViewVirtualList } from "./chat/ChatViewVirtualList";
40
39
  import { messagesFromEvents } from "./chat/messages";
41
40
  import styles from "./SampleDisplay.module.css";
42
41
  import { SampleSummaryView } from "./SampleSummaryView";
42
+ import { SampleScoresView } from "./scores/SampleScoresView";
43
43
  import { TranscriptVirtualList } from "./transcript/TranscriptView";
44
44
 
45
45
  interface SampleDisplayProps {
@@ -92,7 +92,6 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
92
92
  return false;
93
93
  };
94
94
 
95
- const scorerNames = Object.keys(sample?.scores || {});
96
95
  const sampleMetadatas = metadataViewsForSample(`${baseId}-${id}`, sample);
97
96
 
98
97
  const tabsetId = `task-sample-details-tab-${id}`;
@@ -166,38 +165,16 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
166
165
  running={running}
167
166
  />
168
167
  </TabPanel>
169
- {sample && scorerNames.length === 1 ? (
170
- <TabPanel
171
- key={kSampleScoringTabId}
172
- id={kSampleScoringTabId}
173
- className="sample-tab"
174
- title="Scoring"
175
- onSelected={onSelectedTab}
176
- selected={selectedTab === kSampleScoringTabId}
177
- >
178
- <SampleScoreView sample={sample} scorer={scorerNames[0]} />
179
- </TabPanel>
180
- ) : (
181
- <>
182
- {sample
183
- ? Object.keys(sample?.scores || {}).map((scorer) => {
184
- const tabId = `score-${scorer}`;
185
- return (
186
- <TabPanel
187
- key={tabId}
188
- id={tabId}
189
- className="sample-tab"
190
- title={scorer}
191
- onSelected={onSelectedTab}
192
- selected={selectedTab === tabId}
193
- >
194
- <SampleScoreView sample={sample} scorer={scorer} />
195
- </TabPanel>
196
- );
197
- })
198
- : undefined}
199
- </>
200
- )}
168
+ <TabPanel
169
+ key={kSampleScoringTabId}
170
+ id={kSampleScoringTabId}
171
+ className="sample-tab"
172
+ title="Scoring"
173
+ onSelected={onSelectedTab}
174
+ selected={selectedTab === kSampleScoringTabId}
175
+ >
176
+ <SampleScoresView sample={sample} />
177
+ </TabPanel>
201
178
  <TabPanel
202
179
  id={kSampleMetdataTabId}
203
180
  className={clsx("sample-tab")}
@@ -19,6 +19,11 @@
19
19
  justify-content: center;
20
20
  }
21
21
 
22
+ .centerValue {
23
+ display: flex;
24
+ align-items: center;
25
+ }
26
+
22
27
  .wrap {
23
28
  word-wrap: anywhere;
24
29
  }
@@ -29,4 +34,5 @@
29
34
 
30
35
  .value {
31
36
  flex-direction: column;
37
+ padding-top: 0.1em;
32
38
  }
@@ -123,7 +123,7 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
123
123
 
124
124
  columns.push({
125
125
  label: "Input",
126
- value: fields.input,
126
+ value: <MarkdownDiv markdown={fields.input.join(" ")} />,
127
127
  size: `${input}fr`,
128
128
  clamp: true,
129
129
  });
@@ -233,7 +233,7 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
233
233
  styles.value,
234
234
  styles.wrap,
235
235
  col.clamp ? "three-line-clamp" : undefined,
236
- col.center ? styles.centerLabel : undefined,
236
+ col.center ? styles.centerValue : undefined,
237
237
  )}
238
238
  >
239
239
  {col.value}
@@ -44,3 +44,15 @@ export const SampleTools: FC<SampleToolsProps> = ({ samples }) => {
44
44
  </Fragment>
45
45
  );
46
46
  };
47
+
48
+ interface ScoreFilterToolsProps {}
49
+
50
+ export const ScoreFilterTools: FC<ScoreFilterToolsProps> = () => {
51
+ const scores = useScores();
52
+ const score = useScore();
53
+ const setScore = useStore((state) => state.logActions.setScore);
54
+ if (scores.length <= 1) {
55
+ return undefined;
56
+ }
57
+ return <SelectScorer scores={scores} score={score} setScore={setScore} />;
58
+ };
@@ -52,6 +52,7 @@ export const MessageContent: FC<MessageContentProps> = ({ contents }) => {
52
52
  {
53
53
  type: "text",
54
54
  text: content,
55
+ refusal: null,
55
56
  },
56
57
  index === contents.length - 1,
57
58
  );
@@ -75,6 +76,7 @@ export const MessageContent: FC<MessageContentProps> = ({ contents }) => {
75
76
  const contentText: ContentText = {
76
77
  type: "text",
77
78
  text: contents,
79
+ refusal: null,
78
80
  };
79
81
  return messageRenderers["text"].render(
80
82
  "text-message-content",
@@ -101,6 +101,7 @@ const resolveToolMessage = (toolMessage?: ChatMessageTool): ContentTool[] => {
101
101
  {
102
102
  type: "text",
103
103
  text: content,
104
+ refusal: null,
104
105
  },
105
106
  ],
106
107
  },
@@ -115,6 +116,7 @@ const resolveToolMessage = (toolMessage?: ChatMessageTool): ContentTool[] => {
115
116
  {
116
117
  type: "text",
117
118
  text: con,
119
+ refusal: null,
118
120
  },
119
121
  ],
120
122
  } as ContentTool;
@@ -70,6 +70,7 @@ export const resolveMessages = (messages: Messages) => {
70
70
  role: "system",
71
71
  content: systemContent,
72
72
  source: "input",
73
+ internal: null,
73
74
  };
74
75
 
75
76
  // Converge them
@@ -120,6 +121,7 @@ const normalizeContent = (
120
121
  return {
121
122
  type: "text",
122
123
  text: content,
124
+ refusal: null,
123
125
  };
124
126
  } else {
125
127
  return content;
@@ -151,7 +153,7 @@ export const messagesFromEvents = (runningEvents: Events): Messages => {
151
153
  }
152
154
  });
153
155
 
154
- if (messages.entries.length > 0) {
156
+ if (messages.size > 0) {
155
157
  return messages.values().toArray();
156
158
  } else {
157
159
  return [];
@@ -175,6 +175,7 @@ const normalizeContent = (
175
175
  {
176
176
  type: "text",
177
177
  text: String(output),
178
+ refusal: null,
178
179
  },
179
180
  ],
180
181
  },
@@ -47,7 +47,13 @@ export const createEvalDescriptor = (
47
47
  sample.scores[scoreLabel.scorer] &&
48
48
  sample.scores[scoreLabel.scorer].value
49
49
  ) {
50
- return sample.scores[scoreLabel.scorer].value;
50
+ if (typeof sample.scores[scoreLabel.scorer].value === "object") {
51
+ return (
52
+ sample.scores[scoreLabel.scorer].value as Record<string, Value2>
53
+ )[scoreLabel.name];
54
+ } else {
55
+ return sample.scores[scoreLabel.scorer].value;
56
+ }
51
57
  } else if (sample.scores[scoreLabel.name]) {
52
58
  return sample.scores[scoreLabel.name].value;
53
59
  } else {
@@ -162,7 +168,7 @@ export const createEvalDescriptor = (
162
168
  return "null";
163
169
  } else if (score === undefined) {
164
170
  return "";
165
- } else if (score && descriptor && descriptor.render) {
171
+ } else if (descriptor && descriptor.render) {
166
172
  return descriptor.render(score);
167
173
  } else {
168
174
  return <span>{String(score)}</span>;
@@ -328,7 +334,7 @@ export const createSamplesDescriptor = (
328
334
  answer: Math.min(sizes[2], 300),
329
335
  limit: Math.min(sizes[3], 50),
330
336
  id: Math.min(sizes[4], 10),
331
- score: Math.min(sizes[4], 30),
337
+ score: Math.min(sizes[5], 30),
332
338
  };
333
339
  const base =
334
340
  maxSizes.input +
@@ -1,11 +1,11 @@
1
1
  .circle {
2
2
  font-family: "Consola Regular";
3
- width: 20px;
4
- height: 20px;
3
+ width: 40px;
4
+ height: 30px;
5
5
  display: inline-flex;
6
6
  justify-content: center;
7
7
  align-items: center;
8
- border-radius: 50%;
8
+ border-radius: 15px;
9
9
  padding-top: 1px;
10
10
  }
11
11
 
@@ -15,7 +15,7 @@ export const booleanScoreDescriptor = (): ScoreDescriptor => {
15
15
  className={clsx(
16
16
  styles.circle,
17
17
  "text-size-small",
18
- score ? "green" : "red",
18
+ score ? styles.green : styles.red,
19
19
  )}
20
20
  >
21
21
  {String(score)}
@@ -1,8 +1,8 @@
1
1
  .container {
2
- display: flex;
3
- flex-direction: column;
4
- align-items: center;
5
- margin-left: 0.5rem;
2
+ display: grid;
3
+ grid-template-columns: auto auto;
4
+ grid-auto-rows: auto;
5
+ column-gap: 1rem;
6
6
  }
7
7
 
8
8
  .padded {
@@ -35,7 +35,7 @@ export const objectScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
35
35
 
36
36
  const scores: JSX.Element[] = [];
37
37
  const keys = Object.keys(score);
38
- keys.forEach((key, index) => {
38
+ keys.forEach((key) => {
39
39
  if (typeof score !== "object" || Array.isArray(score)) {
40
40
  throw new Error(
41
41
  "Unexpected us of object score descriptor for non-score object",
@@ -50,23 +50,22 @@ export const objectScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
50
50
  : parseFloat(value === true ? "1" : value),
51
51
  )
52
52
  : String(value);
53
+
53
54
  scores.push(
54
- <div
55
- key={`score-value-${index}`}
56
- className={clsx(
57
- styles.container,
58
- index + 1 < keys.length ? styles.padded : undefined,
59
- )}
60
- >
55
+ <>
61
56
  <div className={clsx(styles.key, "text-size-smaller")}>{key}</div>
62
- <div className={clsx(styles.value, "text-size-large")}>
57
+ <div className={clsx(styles.value, "text-size-base")}>
63
58
  {formattedValue}
64
59
  </div>
65
- </div>,
60
+ </>,
66
61
  );
67
62
  });
68
63
 
69
- return scores;
64
+ return (
65
+ <div key={`score-value`} className={clsx(styles.container)}>
66
+ {scores}
67
+ </div>
68
+ );
70
69
  },
71
70
  };
72
71
  };
@@ -23,5 +23,6 @@
23
23
  }
24
24
 
25
25
  .label {
26
- margin-top: -4px;
26
+ margin-left: 0.1em;
27
+ margin-top: -3px;
27
28
  }
@@ -1,5 +1,6 @@
1
1
  interface SampleFooterProps {
2
2
  sampleCount: number;
3
+ totalSampleCount: number;
3
4
  running: boolean;
4
5
  }
5
6
 
@@ -9,6 +10,7 @@ import styles from "./SampleFooter.module.css";
9
10
 
10
11
  export const SampleFooter: FC<SampleFooterProps> = ({
11
12
  sampleCount,
13
+ totalSampleCount,
12
14
  running,
13
15
  }) => {
14
16
  return (
@@ -28,7 +30,11 @@ export const SampleFooter: FC<SampleFooterProps> = ({
28
30
  </div>
29
31
  ) : undefined}
30
32
  </div>
31
- <div>{sampleCount} Samples</div>
33
+ <div>
34
+ {sampleCount < totalSampleCount
35
+ ? `${sampleCount} / ${totalSampleCount} Samples`
36
+ : `${sampleCount} Samples`}
37
+ </div>
32
38
  </div>
33
39
  );
34
40
  };
@@ -29,6 +29,7 @@ const kSeparatorHeight = 24;
29
29
 
30
30
  interface SampleListProps {
31
31
  items: ListItem[];
32
+ totalItemCount: number;
32
33
  running: boolean;
33
34
  nextSample: () => void;
34
35
  prevSample: () => void;
@@ -37,9 +38,12 @@ interface SampleListProps {
37
38
  listHandle: RefObject<VirtuosoHandle | null>;
38
39
  }
39
40
 
41
+ export const kSampleFollowProp = "sample-list";
42
+
40
43
  export const SampleList: FC<SampleListProps> = memo((props) => {
41
44
  const {
42
45
  items,
46
+ totalItemCount,
43
47
  running,
44
48
  nextSample,
45
49
  prevSample,
@@ -57,9 +61,13 @@ export const SampleList: FC<SampleListProps> = memo((props) => {
57
61
  (state) => state.log.selectedSampleIndex,
58
62
  );
59
63
  const samplesDescriptor = useSampleDescriptor();
60
- const [followOutput, setFollowOutput] = useProperty("sample-list", "follow", {
61
- defaultValue: false,
62
- });
64
+ const [followOutput, setFollowOutput] = useProperty(
65
+ kSampleFollowProp,
66
+ "follow",
67
+ {
68
+ defaultValue: !!running,
69
+ },
70
+ );
63
71
 
64
72
  // Track whether we were previously running so we can
65
73
  // decide whether to pop up to the top
@@ -84,13 +92,15 @@ export const SampleList: FC<SampleListProps> = memo((props) => {
84
92
  prevRunningRef.current = running;
85
93
  }, [running, followOutput, listHandle]);
86
94
 
95
+ const loaded = useRef(false);
87
96
  const handleAtBottomStateChange = useCallback(
88
97
  (atBottom: boolean) => {
89
- if (running) {
98
+ if (loaded.current && running) {
90
99
  setFollowOutput(atBottom);
91
100
  }
101
+ loaded.current = true;
92
102
  },
93
- [running, setFollowOutput],
103
+ [running, setFollowOutput, followOutput],
94
104
  );
95
105
 
96
106
  const onkeydown = useCallback(
@@ -148,7 +158,7 @@ export const SampleList: FC<SampleListProps> = memo((props) => {
148
158
  return null;
149
159
  }
150
160
  },
151
- [showSample],
161
+ [showSample, gridColumnsTemplate],
152
162
  );
153
163
 
154
164
  const { input, limit, answer, target } = gridColumns(samplesDescriptor);
@@ -210,8 +220,11 @@ export const SampleList: FC<SampleListProps> = memo((props) => {
210
220
  data={items}
211
221
  defaultItemHeight={50}
212
222
  itemContent={renderRow}
213
- followOutput={followOutput}
223
+ followOutput={(_atBottom: boolean) => {
224
+ return followOutput;
225
+ }}
214
226
  atBottomStateChange={handleAtBottomStateChange}
227
+ atBottomThreshold={30}
215
228
  increaseViewportBy={{ top: 300, bottom: 300 }}
216
229
  overscan={{
217
230
  main: 10,
@@ -223,7 +236,11 @@ export const SampleList: FC<SampleListProps> = memo((props) => {
223
236
  isScrolling={isScrolling}
224
237
  restoreStateFrom={getRestoreState()}
225
238
  />
226
- <SampleFooter sampleCount={sampleCount} running={running} />
239
+ <SampleFooter
240
+ sampleCount={sampleCount}
241
+ totalSampleCount={totalItemCount}
242
+ running={running}
243
+ />
227
244
  </div>
228
245
  );
229
246
  });
@@ -69,7 +69,7 @@ export const SampleRow: FC<SampleRowProps> = ({
69
69
  styles.wrapAnywhere,
70
70
  )}
71
71
  >
72
- {inputString(sample.input).join(" ")}
72
+ <MarkdownDiv markdown={inputString(sample.input).join(" ")} />
73
73
  </div>
74
74
  <div className={clsx("sample-target", "three-line-clamp", styles.cell)}>
75
75
  <MarkdownDiv
@@ -1,8 +1,7 @@
1
- import { FC, Fragment } from "react";
1
+ import { FC } from "react";
2
2
  import { SampleSummary } from "../../api/types";
3
3
 
4
- import { useSampleDescriptor } from "../../state/hooks";
5
- import styles from "./SampleScores.module.css";
4
+ import { getScoreDescriptorForValues } from "../descriptor/score/ScoreDescriptor";
6
5
 
7
6
  interface SampleScoresProps {
8
7
  sample: SampleSummary;
@@ -10,24 +9,14 @@ interface SampleScoresProps {
10
9
  }
11
10
 
12
11
  export const SampleScores: FC<SampleScoresProps> = ({ sample, scorer }) => {
13
- const samplesDescriptor = useSampleDescriptor();
14
- const scores = scorer
15
- ? samplesDescriptor?.evalDescriptor
16
- .scorerDescriptor(sample, { scorer, name: scorer })
17
- .scores()
18
- : samplesDescriptor?.selectedScorerDescriptor(sample)?.scores();
19
-
20
- if (scores?.length === 1) {
21
- return scores[0].rendered();
22
- } else {
23
- const rows = scores?.map((score) => {
24
- return (
25
- <Fragment>
26
- <div style={{ opacity: "0.7" }}>{score.name}</div>
27
- <div>{score.rendered()}</div>
28
- </Fragment>
29
- );
30
- });
31
- return <div className={styles.grid}>{rows}</div>;
12
+ const scoreData = sample.scores?.[scorer];
13
+ if (!scoreData) {
14
+ return undefined;
32
15
  }
16
+
17
+ const scorerDescriptor = getScoreDescriptorForValues(
18
+ [scoreData.value],
19
+ [typeof scoreData.value],
20
+ );
21
+ return scorerDescriptor?.render(scoreData.value);
33
22
  };