inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. inspect_ai/_cli/common.py +3 -1
  2. inspect_ai/_cli/eval.py +15 -9
  3. inspect_ai/_display/core/active.py +4 -1
  4. inspect_ai/_display/core/config.py +3 -3
  5. inspect_ai/_display/core/panel.py +7 -3
  6. inspect_ai/_display/plain/__init__.py +0 -0
  7. inspect_ai/_display/plain/display.py +203 -0
  8. inspect_ai/_display/rich/display.py +0 -5
  9. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  10. inspect_ai/_display/textual/widgets/samples.py +79 -12
  11. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  12. inspect_ai/_eval/eval.py +10 -1
  13. inspect_ai/_eval/loader.py +79 -19
  14. inspect_ai/_eval/registry.py +6 -0
  15. inspect_ai/_eval/score.py +3 -1
  16. inspect_ai/_eval/task/results.py +51 -22
  17. inspect_ai/_eval/task/run.py +47 -13
  18. inspect_ai/_eval/task/sandbox.py +10 -5
  19. inspect_ai/_util/constants.py +1 -0
  20. inspect_ai/_util/port_names.py +61 -0
  21. inspect_ai/_util/text.py +23 -0
  22. inspect_ai/_view/www/App.css +31 -1
  23. inspect_ai/_view/www/dist/assets/index.css +31 -1
  24. inspect_ai/_view/www/dist/assets/index.js +25498 -2044
  25. inspect_ai/_view/www/log-schema.json +32 -2
  26. inspect_ai/_view/www/package.json +2 -0
  27. inspect_ai/_view/www/src/App.mjs +14 -16
  28. inspect_ai/_view/www/src/Types.mjs +1 -2
  29. inspect_ai/_view/www/src/api/Types.ts +133 -0
  30. inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
  31. inspect_ai/_view/www/src/api/api-http.ts +219 -0
  32. inspect_ai/_view/www/src/api/api-shared.ts +47 -0
  33. inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
  34. inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
  35. inspect_ai/_view/www/src/api/index.ts +51 -0
  36. inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
  37. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  38. inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
  39. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  40. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  41. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  42. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  43. inspect_ai/_view/www/src/index.js +77 -4
  44. inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
  45. inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
  46. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
  47. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  48. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  49. inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
  50. inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
  51. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  52. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
  53. inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
  54. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  55. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  56. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
  57. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  58. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  59. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  60. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  61. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  62. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  63. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  64. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  65. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  66. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  67. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  68. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  69. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  70. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  71. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  72. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  73. inspect_ai/_view/www/src/types/log.d.ts +13 -2
  74. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  75. inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
  76. inspect_ai/_view/www/src/utils/vscode.ts +36 -0
  77. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
  78. inspect_ai/_view/www/vite.config.js +7 -0
  79. inspect_ai/_view/www/yarn.lock +116 -0
  80. inspect_ai/approval/_human/__init__.py +0 -0
  81. inspect_ai/approval/_human/manager.py +1 -1
  82. inspect_ai/approval/_policy.py +12 -6
  83. inspect_ai/log/_log.py +1 -1
  84. inspect_ai/log/_samples.py +16 -0
  85. inspect_ai/log/_transcript.py +4 -1
  86. inspect_ai/model/_call_tools.py +59 -0
  87. inspect_ai/model/_conversation.py +16 -7
  88. inspect_ai/model/_generate_config.py +12 -12
  89. inspect_ai/model/_model.py +117 -18
  90. inspect_ai/model/_model_output.py +22 -2
  91. inspect_ai/model/_openai.py +383 -0
  92. inspect_ai/model/_providers/anthropic.py +152 -55
  93. inspect_ai/model/_providers/azureai.py +21 -21
  94. inspect_ai/model/_providers/bedrock.py +37 -40
  95. inspect_ai/model/_providers/goodfire.py +248 -0
  96. inspect_ai/model/_providers/google.py +46 -54
  97. inspect_ai/model/_providers/groq.py +7 -3
  98. inspect_ai/model/_providers/hf.py +6 -0
  99. inspect_ai/model/_providers/mistral.py +13 -12
  100. inspect_ai/model/_providers/openai.py +51 -218
  101. inspect_ai/model/_providers/openai_o1.py +11 -12
  102. inspect_ai/model/_providers/providers.py +23 -1
  103. inspect_ai/model/_providers/together.py +12 -12
  104. inspect_ai/model/_providers/util/__init__.py +2 -3
  105. inspect_ai/model/_providers/util/hf_handler.py +1 -1
  106. inspect_ai/model/_providers/util/llama31.py +1 -1
  107. inspect_ai/model/_providers/util/util.py +0 -76
  108. inspect_ai/model/_providers/vertex.py +1 -4
  109. inspect_ai/scorer/_metric.py +3 -0
  110. inspect_ai/scorer/_reducer/reducer.py +1 -1
  111. inspect_ai/scorer/_scorer.py +4 -3
  112. inspect_ai/solver/__init__.py +4 -5
  113. inspect_ai/solver/_basic_agent.py +1 -1
  114. inspect_ai/solver/_bridge/__init__.py +3 -0
  115. inspect_ai/solver/_bridge/bridge.py +100 -0
  116. inspect_ai/solver/_bridge/patch.py +170 -0
  117. inspect_ai/solver/_prompt.py +35 -5
  118. inspect_ai/solver/_solver.py +6 -0
  119. inspect_ai/solver/_task_state.py +80 -38
  120. inspect_ai/tool/__init__.py +2 -0
  121. inspect_ai/tool/_tool.py +12 -1
  122. inspect_ai/tool/_tool_call.py +10 -0
  123. inspect_ai/tool/_tool_def.py +16 -5
  124. inspect_ai/tool/_tool_with.py +21 -4
  125. inspect_ai/tool/beta/__init__.py +5 -0
  126. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  127. inspect_ai/tool/beta/_computer/_common.py +133 -0
  128. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  129. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  130. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  131. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  132. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  133. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  134. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  135. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  136. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  137. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  138. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  139. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  140. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  141. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  142. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  143. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  144. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  145. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  146. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  147. inspect_ai/util/__init__.py +2 -0
  148. inspect_ai/util/_display.py +5 -0
  149. inspect_ai/util/_limit.py +26 -0
  150. inspect_ai/util/_sandbox/docker/docker.py +64 -1
  151. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  152. inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
  153. inspect_ai/util/_sandbox/environment.py +14 -0
  154. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
  155. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
  156. inspect_ai/_view/www/src/api/Types.mjs +0 -117
  157. inspect_ai/_view/www/src/api/api-http.mjs +0 -300
  158. inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
  159. inspect_ai/_view/www/src/api/index.mjs +0 -49
  160. inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
  161. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  162. inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
  163. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
  164. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
  165. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
  166. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
@@ -2,15 +2,15 @@ import { render } from "preact";
2
2
  import { html } from "htm/preact";
3
3
 
4
4
  import { App } from "./App.mjs";
5
- import api from "./api/index.mjs";
6
- import { getVscodeApi } from "./utils/vscode.mjs";
5
+ import api from "./api/index";
6
+ import { getVscodeApi } from "./utils/vscode";
7
7
  import { throttle } from "./utils/sync.mjs";
8
8
 
9
9
  // Read any state from the page itself
10
10
  const vscode = getVscodeApi();
11
11
  let initialState = undefined;
12
12
  if (vscode) {
13
- initialState = vscode.getState();
13
+ initialState = filterState(vscode.getState());
14
14
  }
15
15
 
16
16
  render(
@@ -20,9 +20,82 @@ render(
20
20
  saveInitialState=${throttle((state) => {
21
21
  const vscode = getVscodeApi();
22
22
  if (vscode) {
23
- vscode.setState(state);
23
+ vscode.setState(filterState(state));
24
24
  }
25
25
  }, 1000)}
26
26
  />`,
27
27
  document.getElementById("app"),
28
28
  );
29
+
30
+ function filterState(state) {
31
+ if (!state) {
32
+ return state;
33
+ }
34
+
35
+ // When saving state, we can't store vast amounts of data (like a large sample)
36
+ const filters = [filterLargeSample, filterLargeSelectedLog];
37
+ return filters.reduce(
38
+ (filteredState, filter) => filter(filteredState),
39
+ state,
40
+ );
41
+ }
42
+
43
+ // Filters the selected Sample if it is large
44
+ function filterLargeSample(state) {
45
+ if (!state || !state.selectedSample) {
46
+ return state;
47
+ }
48
+
49
+ const estimatedTotalSize = estimateSize(state.selectedSample.messages);
50
+ if (estimatedTotalSize > 400000) {
51
+ const { selectedSample, ...filteredState } = state; // eslint-disable-line
52
+ return filteredState;
53
+ } else {
54
+ return state;
55
+ }
56
+ }
57
+
58
+ // Filters the selectedlog if it is too large
59
+ function filterLargeSelectedLog(state) {
60
+ if (!state || !state.selectedLog?.contents) {
61
+ return state;
62
+ }
63
+
64
+ const estimatedSize = estimateSize(
65
+ state.selectedLog.contents.sampleSummaries,
66
+ );
67
+ if (estimatedSize > 400000) {
68
+ const { selectedLog, ...filteredState } = state; // eslint-disable-line
69
+ return filteredState;
70
+ } else {
71
+ return state;
72
+ }
73
+ }
74
+
75
+ function estimateSize(list, frequency = 0.2) {
76
+ if (!list || list.len === 0) {
77
+ return 0;
78
+ }
79
+
80
+ // Total number of samples
81
+ const sampleSize = Math.ceil(list.length * frequency);
82
+
83
+ // Get a proper random sample without duplicates
84
+ const messageIndices = new Set();
85
+ while (
86
+ messageIndices.size < sampleSize &&
87
+ messageIndices.size < list.length
88
+ ) {
89
+ const randomIndex = Math.floor(Math.random() * list.length);
90
+ messageIndices.add(randomIndex);
91
+ }
92
+
93
+ // Calculate size from sampled messages
94
+ const totalSize = Array.from(messageIndices).reduce((size, index) => {
95
+ return size + JSON.stringify(list[index]).length;
96
+ }, 0);
97
+
98
+ // Estimate total size based on sample
99
+ const estimatedTotalSize = (totalSize / sampleSize) * list.length;
100
+ return estimatedTotalSize;
101
+ }
@@ -1,5 +1,12 @@
1
1
  //@ts-check
2
- import { asyncJsonParse } from "../utils/Json.mjs";
2
+ import {
3
+ EvalHeader,
4
+ EvalSummary,
5
+ LogViewAPI,
6
+ SampleSummary,
7
+ } from "../api/Types";
8
+ import { EvalLog, EvalPlan, EvalSample, EvalSpec } from "../types/log";
9
+ import { asyncJsonParse } from "../utils/json-worker";
3
10
  import { AsyncQueue } from "../utils/queue.mjs";
4
11
  import {
5
12
  FileSizeLimitError,
@@ -9,42 +16,46 @@ import {
9
16
  // don't try to load samples greater than 50mb
10
17
  const MAX_BYTES = 50 * 1024 * 1024;
11
18
 
12
- /**
13
- * @typedef {Object} SampleEntry
14
- * @property {string} sampleId
15
- * @property {number} epoch
16
- */
19
+ interface SampleEntry {
20
+ sampleId: string;
21
+ epoch: number;
22
+ }
17
23
 
18
- /**
19
- * @typedef {Object} RemoteLogFile
20
- * @property {() => Promise<Object>} readHeader - Reads the header of the log file.
21
- * @property {() => Promise<Object>} readLogSummary - Reads the log summary including header and sample summaries.
22
- * @property {(sampleId: string, epoch: number) => Promise<Object>} readSample - Reads a specific sample file.
23
- * @property {() => Promise<import("../types/log").EvalLog>} readCompleteLog - Reads the complete log file including all samples.
24
- */
24
+ export interface RemoteLogFile {
25
+ readHeader: () => Promise<EvalHeader>;
26
+ readLogSummary: () => Promise<EvalSummary>;
27
+ readSample: (sampleId: string, epoch: number) => Promise<EvalSample>;
28
+ readCompleteLog: () => Promise<EvalLog>;
29
+ }
30
+
31
+ interface LogStart {
32
+ version: number;
33
+ eval: EvalSpec;
34
+ plan: EvalPlan;
35
+ }
25
36
 
26
37
  /**
27
38
  * Opens a remote log file and provides methods to read its contents.
28
- * @param {import("../api/Types.mjs").LogViewAPI} api - The api
29
- * @param {string} url - The URL of the remote zip file.
30
- * @param {number} concurrency - The number of concurrent operations allowed.
31
- * @returns {Promise<RemoteLogFile>} An object with methods to read the log file.
32
39
  */
33
- export const openRemoteLogFile = async (api, url, concurrency) => {
40
+ export const openRemoteLogFile = async (
41
+ api: LogViewAPI,
42
+ url: string,
43
+ concurrency: number,
44
+ ): Promise<RemoteLogFile> => {
34
45
  const queue = new AsyncQueue(concurrency);
35
46
  const remoteZipFile = await openRemoteZipFile(
36
- `${encodeURIComponent(url)}`,
47
+ url,
37
48
  api.eval_log_size,
38
49
  api.eval_log_bytes,
39
50
  );
40
51
 
41
52
  /**
42
53
  * Reads and parses a JSON file from the zip.
43
- * @param {string} file - The name of the file to read.
44
- * @param {number} [maxBytes] - the max bytes
45
- * @returns {Promise<Object>} The parsed JSON content.
46
54
  */
47
- const readJSONFile = async (file, maxBytes) => {
55
+ const readJSONFile = async (
56
+ file: string,
57
+ maxBytes?: number,
58
+ ): Promise<Object> => {
48
59
  try {
49
60
  const data = await remoteZipFile.readFile(file, maxBytes);
50
61
  const textDecoder = new TextDecoder("utf-8");
@@ -53,19 +64,22 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
53
64
  } catch (error) {
54
65
  if (error instanceof FileSizeLimitError) {
55
66
  throw error;
56
- } else {
67
+ } else if (error instanceof Error) {
57
68
  throw new Error(
58
69
  `Failed to read or parse file ${file}: ${error.message}`,
59
70
  );
71
+ } else {
72
+ throw new Error(
73
+ `Failed to read or parse file ${file} - an unknown error occurred`,
74
+ );
60
75
  }
61
76
  }
62
77
  };
63
78
 
64
79
  /**
65
80
  * Lists all samples in the zip file.
66
- * @returns {Promise<SampleEntry[]>} An array of sample objects.
67
81
  */
68
- const listSamples = async () => {
82
+ const listSamples = async (): Promise<SampleEntry[]> => {
69
83
  return Array.from(remoteZipFile.centralDirectory.keys())
70
84
  .filter(
71
85
  (filename) =>
@@ -82,14 +96,14 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
82
96
 
83
97
  /**
84
98
  * Reads a specific sample file.
85
- * @param {string} sampleId - The ID of the sample.
86
- * @param {number} epoch - The epoch of the sample.
87
- * @returns {Promise<Object>} The content of the sample file.
88
99
  */
89
- const readSample = async (sampleId, epoch) => {
100
+ const readSample = async (
101
+ sampleId: string,
102
+ epoch: number,
103
+ ): Promise<EvalSample> => {
90
104
  const sampleFile = `samples/${sampleId}_epoch_${epoch}.json`;
91
105
  if (remoteZipFile.centralDirectory.has(sampleFile)) {
92
- return readJSONFile(sampleFile, MAX_BYTES);
106
+ return (await readJSONFile(sampleFile, MAX_BYTES)) as EvalSample;
93
107
  } else {
94
108
  console.log({ dir: remoteZipFile.centralDirectory });
95
109
  throw new Error(
@@ -100,13 +114,12 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
100
114
 
101
115
  /**
102
116
  * Reads the results.json file.
103
- * @returns {Promise<Object>} The content of results.json.
104
117
  */
105
- const readHeader = async () => {
118
+ const readHeader = async (): Promise<EvalHeader> => {
106
119
  if (remoteZipFile.centralDirectory.has("header.json")) {
107
- return readJSONFile("header.json");
120
+ return (await readJSONFile("header.json")) as EvalHeader;
108
121
  } else {
109
- const evalSpec = await readJSONFile("_journal/start.json");
122
+ const evalSpec = (await readJSONFile("_journal/start.json")) as LogStart;
110
123
  return {
111
124
  status: "started",
112
125
  eval: evalSpec.eval,
@@ -117,9 +130,8 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
117
130
 
118
131
  /**
119
132
  * Reads individual summary files when summaries.json is not available.
120
- * @returns {Promise<Object>} Combined summaries from individual files.
121
133
  */
122
- const readFallbackSummaries = async () => {
134
+ const readFallbackSummaries = async (): Promise<SampleSummary[]> => {
123
135
  const summaryFiles = Array.from(
124
136
  remoteZipFile.centralDirectory.keys(),
125
137
  ).filter(
@@ -128,14 +140,16 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
128
140
  filename.endsWith(".json"),
129
141
  );
130
142
 
131
- const summaries = [];
132
- const errors = [];
143
+ const summaries: SampleSummary[] = [];
144
+ const errors: unknown[] = [];
133
145
 
134
146
  await Promise.all(
135
147
  summaryFiles.map((filename) =>
136
148
  queue.enqueue(async () => {
137
149
  try {
138
- const partialSummary = await readJSONFile(filename);
150
+ const partialSummary = (await readJSONFile(
151
+ filename,
152
+ )) as SampleSummary[];
139
153
  summaries.push(...partialSummary);
140
154
  } catch (error) {
141
155
  errors.push(error);
@@ -156,11 +170,10 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
156
170
 
157
171
  /**
158
172
  * Reads all summaries, falling back to individual files if necessary.
159
- * @returns {Promise<Object>} All summaries.
160
173
  */
161
- const readSampleSummaries = async () => {
174
+ const readSampleSummaries = async (): Promise<SampleSummary[]> => {
162
175
  if (remoteZipFile.centralDirectory.has("summaries.json")) {
163
- return await readJSONFile("summaries.json");
176
+ return (await readJSONFile("summaries.json")) as SampleSummary[];
164
177
  } else {
165
178
  return readFallbackSummaries();
166
179
  }
@@ -187,14 +200,17 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
187
200
  readSample,
188
201
  /**
189
202
  * Reads the complete log file.
190
- * @returns {Promise<import("../types/log").EvalLog>} The complete log data.
191
203
  */
192
- readCompleteLog: async () => {
204
+ readCompleteLog: async (): Promise<EvalLog> => {
193
205
  const [evalLog, samples] = await Promise.all([
194
206
  readHeader(),
195
207
  listSamples().then((sampleIds) =>
196
208
  Promise.all(
197
- sampleIds.map(({ sampleId, epoch }) => readSample(sampleId, epoch)),
209
+ sampleIds.map(({ sampleId, epoch }) =>
210
+ readSample(sampleId, epoch).then(
211
+ (sample) => sample as EvalSample,
212
+ ),
213
+ ),
198
214
  ),
199
215
  ),
200
216
  ]);
@@ -18,7 +18,8 @@ import { SecondaryBar } from "./SecondaryBar.mjs";
18
18
  * @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults
19
19
  * @param {import("../types/log").EvalPlan} [props.evalPlan] - The EvalSpec
20
20
  * @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats
21
- * @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
21
+ * @param {import("../samples/SamplesDescriptor.mjs").EvalDescriptor} [props.evalDescriptor] - The EvalDescriptor
22
+ * @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
22
23
  * @param {string} [props.status] - the status
23
24
  * @param {boolean} props.offcanvas - Are we in offcanvas mode?
24
25
  * @param {boolean} props.showToggle - Should we show the toggle?
@@ -32,6 +33,7 @@ export const Navbar = ({
32
33
  evalResults,
33
34
  evalStats,
34
35
  samples,
36
+ evalDescriptor,
35
37
  showToggle,
36
38
  offcanvas,
37
39
  status,
@@ -182,6 +184,7 @@ export const Navbar = ({
182
184
  evalResults=${evalResults}
183
185
  evalStats=${evalStats}
184
186
  samples=${samples}
187
+ evalDescriptor=${evalDescriptor}
185
188
  status=${status}
186
189
  style=${{ gridColumn: "1/-1" }}
187
190
  />
@@ -3,6 +3,7 @@ import { html } from "htm/preact";
3
3
  import { LabeledValue } from "../components/LabeledValue.mjs";
4
4
  import { formatDataset, formatDuration } from "../utils/Format.mjs";
5
5
  import { ExpandablePanel } from "../components/ExpandablePanel.mjs";
6
+ import { scoreFilterItems } from "../samples/tools/filters.mjs";
6
7
 
7
8
  /**
8
9
  * Renders the Navbar
@@ -12,7 +13,8 @@ import { ExpandablePanel } from "../components/ExpandablePanel.mjs";
12
13
  * @param {import("../types/log").EvalPlan} [props.evalPlan] - The EvalSpec
13
14
  * @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults
14
15
  * @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats
15
- * @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
16
+ * @param {import("../samples/SamplesDescriptor.mjs").EvalDescriptor} [props.evalDescriptor] - The EvalDescriptor
17
+ * @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
16
18
  * @param {string} [props.status] - the status
17
19
  * @param {Map<string, string>} [props.style] - is this off canvas
18
20
  *
@@ -24,6 +26,7 @@ export const SecondaryBar = ({
24
26
  evalResults,
25
27
  evalStats,
26
28
  samples,
29
+ evalDescriptor,
27
30
  status,
28
31
  style,
29
32
  }) => {
@@ -60,8 +63,8 @@ export const SecondaryBar = ({
60
63
  values.push({
61
64
  size: "minmax(12%, auto)",
62
65
  value: html`<${LabeledValue} label="${label}" style=${staticColStyle} style=${{ justifySelf: hasConfig ? "left" : "center" }}>
63
- <${ScorerSummary}
64
- scorers=${evalResults?.scores} />
66
+ <${ScorerSummary}
67
+ evalDescriptor=${evalDescriptor} />
65
68
  </${LabeledValue}>`,
66
69
  });
67
70
 
@@ -124,17 +127,23 @@ const DatasetSummary = ({ dataset, samples, epochs, style }) => {
124
127
  `;
125
128
  };
126
129
 
127
- const ScorerSummary = ({ scorers }) => {
128
- if (!scorers) {
130
+ const ScorerSummary = ({ evalDescriptor }) => {
131
+ if (!evalDescriptor) {
129
132
  return "";
130
133
  }
131
134
 
132
- const uniqScorers = new Set();
133
- scorers.forEach((scorer) => {
134
- uniqScorers.add(scorer.name);
135
- });
135
+ const items = scoreFilterItems(evalDescriptor);
136
136
 
137
- return Array.from(uniqScorers).join(", ");
137
+ return html`
138
+ <span style=${{ position: "relative" }}>
139
+ ${Array.from(items).map(
140
+ (item, index) => html`
141
+ ${index > 0 ? ", " : ""}
142
+ <span title=${item.tooltip}>${item.canonicalName}</span>
143
+ `,
144
+ )}
145
+ </span>
146
+ `;
138
147
  };
139
148
 
140
149
  /**
@@ -1,5 +1,5 @@
1
1
  import { html } from "htm/preact";
2
- import { useCallback, useMemo } from "preact/hooks";
2
+ import { useCallback, useMemo, useRef } from "preact/hooks";
3
3
 
4
4
  import { ApplicationIcons } from "../appearance/Icons.mjs";
5
5
  import { LargeModal } from "../components/LargeModal.mjs";
@@ -43,6 +43,8 @@ export const SampleDialog = ({
43
43
  sampleScrollPositionRef,
44
44
  setSampleScrollPosition,
45
45
  }) => {
46
+ const scrollRef = useRef(/** @type {HTMLElement|null} */ (null));
47
+
46
48
  const tools = useMemo(() => {
47
49
  const nextTool = {
48
50
  label: "Next Sample",
@@ -94,6 +96,7 @@ export const SampleDialog = ({
94
96
  sampleDescriptor=${sampleDescriptor}
95
97
  selectedTab=${selectedTab}
96
98
  setSelectedTab=${setSelectedTab}
99
+ scrollRef=${scrollRef}
97
100
  />`;
98
101
  }, [id, sample, sampleDescriptor, selectedTab, setSelectedTab, sampleError]);
99
102
 
@@ -113,6 +116,7 @@ export const SampleDialog = ({
113
116
  showProgress=${sampleStatus === "loading"}
114
117
  initialScrollPositionRef=${sampleScrollPositionRef}
115
118
  setInitialScrollPosition=${setSampleScrollPosition}
119
+ scrollRef=${scrollRef}
116
120
  >
117
121
  ${children}
118
122
  </${LargeModal}>`;
@@ -1,6 +1,6 @@
1
1
  import { html } from "htm/preact";
2
2
 
3
- import { ChatView } from "../components/ChatView.mjs";
3
+ import { ChatViewVirtualList } from "../components/ChatView.mjs";
4
4
  import { MetaDataView } from "../components/MetaDataView.mjs";
5
5
  import { TabSet, TabPanel } from "../components/TabSet.mjs";
6
6
 
@@ -47,6 +47,7 @@ import {
47
47
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - the sample descriptor
48
48
  * @param {string} props.selectedTab - The selected tab
49
49
  * @param {(tab: string) => void} props.setSelectedTab - function to set the selected tab
50
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable element whic contains this display
50
51
  * @returns {import("preact").JSX.Element} The TranscriptView component.
51
52
  */
52
53
  export const InlineSampleDisplay = ({
@@ -57,6 +58,7 @@ export const InlineSampleDisplay = ({
57
58
  sampleDescriptor,
58
59
  selectedTab,
59
60
  setSelectedTab,
61
+ scrollRef,
60
62
  }) => {
61
63
  return html`<div style=${{ flexDirection: "row", width: "100%" }}>
62
64
  <${ProgressBar}
@@ -77,6 +79,7 @@ export const InlineSampleDisplay = ({
77
79
  sampleDescriptor=${sampleDescriptor}
78
80
  selectedTab=${selectedTab}
79
81
  setSelectedTab=${setSelectedTab}
82
+ scrollRef=${scrollRef}
80
83
  />`}
81
84
  </div>
82
85
  </div>`;
@@ -91,6 +94,7 @@ export const InlineSampleDisplay = ({
91
94
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - the sample descriptor
92
95
  * @param {string} props.selectedTab - The selected tab
93
96
  * @param {(tab: string) => void} props.setSelectedTab - function to set the selected tab
97
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
94
98
  * @returns {import("preact").JSX.Element} The TranscriptView component.
95
99
  */
96
100
  export const SampleDisplay = ({
@@ -99,6 +103,7 @@ export const SampleDisplay = ({
99
103
  sampleDescriptor,
100
104
  selectedTab,
101
105
  setSelectedTab,
106
+ scrollRef,
102
107
  }) => {
103
108
  // Tab ids
104
109
  const baseId = `sample-dialog`;
@@ -120,13 +125,14 @@ export const SampleDisplay = ({
120
125
  html`
121
126
  <${TabPanel} id=${kSampleMessagesTabId} classes="sample-tab" title="Messages" onSelected=${onSelectedTab} selected=${
122
127
  selectedTab === kSampleMessagesTabId
123
- }>
124
- <${ChatView}
128
+ } scrollable=${false} style=${{ width: "100%" }}>
129
+ <${ChatViewVirtualList}
125
130
  key=${`${baseId}-chat-${id}`}
126
131
  id=${`${baseId}-chat-${id}`}
127
132
  messages=${sample.messages}
128
- style=${{ paddingLeft: ".8em", paddingTop: "1em" }}
133
+ style=${{ marginLeft: ".8em", marginTop: "1em" }}
129
134
  indented=${true}
135
+ scrollRef=${scrollRef}
130
136
  />
131
137
  </${TabPanel}>`,
132
138
  ];
@@ -136,7 +142,7 @@ export const SampleDisplay = ({
136
142
  <${TabPanel} id=${kSampleTranscriptTabId} classes="sample-tab" title="Transcript" onSelected=${onSelectedTab} selected=${
137
143
  selectedTab === kSampleTranscriptTabId || selectedTab === undefined
138
144
  } scrollable=${false}>
139
- <${SampleTranscript} key=${`${baseId}-transcript-display-${id}`} id=${`${baseId}-transcript-display-${id}`} evalEvents=${sample.events}/>
145
+ <${SampleTranscript} key=${`${baseId}-transcript-display-${id}`} id=${`${baseId}-transcript-display-${id}`} evalEvents=${sample.events} scrollRef=${scrollRef}/>
140
146
  </${TabPanel}>`);
141
147
  }
142
148
 
@@ -201,16 +207,18 @@ export const SampleDisplay = ({
201
207
  );
202
208
  }
203
209
 
204
- tabs.push(html`<${TabPanel}
205
- id=${kSampleJsonTabId}
206
- classes="sample-tab"
207
- title="JSON"
208
- onSelected=${onSelectedTab}
209
- selected=${selectedTab === kSampleJsonTabId}>
210
- <div style=${{ paddingLeft: "0.8em", marginTop: "0.4em" }}>
211
- <${JSONPanel} data=${sample} simple=${true}/>
212
- </div>
213
- </${TabPanel}>`);
210
+ if (sample.messages.length < 100) {
211
+ tabs.push(html`<${TabPanel}
212
+ id=${kSampleJsonTabId}
213
+ classes="sample-tab"
214
+ title="JSON"
215
+ onSelected=${onSelectedTab}
216
+ selected=${selectedTab === kSampleJsonTabId}>
217
+ <div style=${{ paddingLeft: "0.8em", marginTop: "0.4em" }}>
218
+ <${JSONPanel} data=${sample} simple=${true}/>
219
+ </div>
220
+ </${TabPanel}>`);
221
+ }
214
222
 
215
223
  const tabsetId = `task-sample-details-tab-${id}`;
216
224
  const targetId = `${tabsetId}-content`;
@@ -1,6 +1,6 @@
1
1
  import { html } from "htm/preact";
2
- import { useCallback, useState } from "preact/hooks";
3
- import { useEffect, useMemo } from "preact/hooks";
2
+ import { useCallback, useMemo, useState } from "preact/hooks";
3
+ import { useEffect, useRef } from "preact/hooks";
4
4
 
5
5
  import { ApplicationStyles } from "../appearance/Styles.mjs";
6
6
  import { FontSize } from "../appearance/Fonts.mjs";
@@ -56,57 +56,28 @@ export const SampleList = (props) => {
56
56
  setHidden(false);
57
57
  }, [items]);
58
58
 
59
- const heightForType = (type) => {
60
- return type === "sample" ? kSampleHeight : kSeparatorHeight;
61
- };
62
-
63
- // Compute the row arrangement
64
- const rowMap = useMemo(() => {
65
- return items.reduce((values, current, index) => {
66
- const height = heightForType(current.type);
67
- const previous =
68
- values.length > 0 ? values[values.length - 1] : undefined;
69
- const start =
70
- previous === undefined ? 0 : previous.start + previous.height;
71
- values.push({
72
- index,
73
- height,
74
- start,
75
- });
76
- return values;
77
- }, []);
59
+ // Keep a mapping of the indexes to items (skipping separators)
60
+ const itemRowMapping = useMemo(() => {
61
+ const rowIndexes = [];
62
+ items.forEach((item, index) => {
63
+ if (item.type === "sample") {
64
+ rowIndexes.push(index);
65
+ }
66
+ });
67
+ return rowIndexes;
78
68
  }, [items]);
79
69
 
70
+ const prevSelectedIndexRef = useRef(null);
80
71
  useEffect(() => {
81
72
  const listEl = listRef.current;
82
73
  if (listEl) {
83
- // Decide if we need to scroll the element into position
84
- const selected = rowMap[selectedIndex];
85
- if (selected) {
86
- const itemTop = selected.start;
87
- const itemBottom = selected.start + selected.height;
88
-
89
- const scrollTop = listEl.base.scrollTop;
90
- const scrollBottom = scrollTop + listEl.base.offsetHeight;
91
-
92
- // It is visible
93
- if (itemTop >= scrollTop && itemBottom <= scrollBottom) {
94
- return;
95
- }
96
-
97
- if (itemTop < scrollTop) {
98
- // Top is scrolled off
99
- listEl.base.scrollTo({ top: itemTop });
100
- return;
101
- }
102
-
103
- if (itemBottom > scrollBottom) {
104
- listEl.base.scrollTo({ top: itemBottom - listEl.base.offsetHeight });
105
- return;
106
- }
107
- }
74
+ const actualRowIndex = itemRowMapping[selectedIndex];
75
+ const direction =
76
+ actualRowIndex > prevSelectedIndexRef.current ? "down" : "up";
77
+ listRef.current?.scrollToIndex(actualRowIndex, direction);
78
+ prevSelectedIndexRef.current = actualRowIndex;
108
79
  }
109
- }, [selectedIndex, rowMap, listRef]);
80
+ }, [selectedIndex, listRef, itemRowMapping]);
110
81
 
111
82
  /** @param {import("./SamplesTab.mjs").ListItem} item */
112
83
  const renderRow = (item) => {
@@ -254,7 +225,6 @@ export const SampleList = (props) => {
254
225
  tabIndex="0"
255
226
  renderRow=${renderRow}
256
227
  onkeydown=${onkeydown}
257
- rowMap=${rowMap}
258
228
  style=${listStyle}
259
229
  />
260
230
  ${footerRow}
@@ -282,7 +252,7 @@ const SeparatorRow = ({ id, title, height }) => {
282
252
  * @param {Object} props - The parameters for the component.
283
253
  * @param {string} props.id - The unique identifier for the sample.
284
254
  * @param {number} props.index - The index of the sample.
285
- * @param {import("../api/Types.mjs").SampleSummary} props.sample - The sample.
255
+ * @param {import("../api/Types.ts").SampleSummary} props.sample - The sample.
286
256
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor.
287
257
  * @param {number} props.height - The height of the sample row.
288
258
  * @param {boolean} props.selected - Whether the sample is selected.
@@ -2,7 +2,7 @@ import { html } from "htm/preact";
2
2
 
3
3
  /**
4
4
  * @param {Object} props
5
- * @param {import("../api/Types.mjs").SampleSummary} props.sample
5
+ * @param {import("../api/Types.ts").SampleSummary} props.sample
6
6
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor
7
7
  * @param {string} props.scorer
8
8
  * @returns {import("preact").JSX.Element}
@@ -1,6 +1,6 @@
1
1
  // @ts-check
2
2
  import { html } from "htm/preact";
3
- import { TranscriptView } from "./transcript/TranscriptView.mjs";
3
+ import { TranscriptVirtualList } from "./transcript/TranscriptView.mjs";
4
4
 
5
5
  /**
6
6
  * Renders the SampleTranscript component.
@@ -8,8 +8,13 @@ import { TranscriptView } from "./transcript/TranscriptView.mjs";
8
8
  * @param {Object} props - The parameters for the component.
9
9
  * @param {string} props.id - The id of this component
10
10
  * @param {import("../types/log").Events} props.evalEvents - The transcript to display.
11
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
11
12
  * @returns {import("preact").JSX.Element} The SampleTranscript component.
12
13
  */
13
- export const SampleTranscript = ({ id, evalEvents }) => {
14
- return html`<${TranscriptView} id=${id} events=${evalEvents} />`;
14
+ export const SampleTranscript = ({ id, evalEvents, scrollRef }) => {
15
+ return html`<${TranscriptVirtualList}
16
+ id=${id}
17
+ events=${evalEvents}
18
+ scrollRef=${scrollRef}
19
+ />`;
15
20
  };