inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +3 -1
- inspect_ai/_cli/eval.py +15 -9
- inspect_ai/_display/core/active.py +4 -1
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +0 -5
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +79 -12
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/eval.py +10 -1
- inspect_ai/_eval/loader.py +79 -19
- inspect_ai/_eval/registry.py +6 -0
- inspect_ai/_eval/score.py +3 -1
- inspect_ai/_eval/task/results.py +51 -22
- inspect_ai/_eval/task/run.py +47 -13
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25498 -2044
- inspect_ai/_view/www/log-schema.json +32 -2
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +14 -16
- inspect_ai/_view/www/src/Types.mjs +1 -2
- inspect_ai/_view/www/src/api/Types.ts +133 -0
- inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
- inspect_ai/_view/www/src/api/api-http.ts +219 -0
- inspect_ai/_view/www/src/api/api-shared.ts +47 -0
- inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
- inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
- inspect_ai/_view/www/src/api/index.ts +51 -0
- inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +77 -4
- inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
- inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
- inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +13 -2
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
- inspect_ai/_view/www/src/utils/vscode.ts +36 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_human/manager.py +1 -1
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/log/_log.py +1 -1
- inspect_ai/log/_samples.py +16 -0
- inspect_ai/log/_transcript.py +4 -1
- inspect_ai/model/_call_tools.py +59 -0
- inspect_ai/model/_conversation.py +16 -7
- inspect_ai/model/_generate_config.py +12 -12
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +22 -2
- inspect_ai/model/_openai.py +383 -0
- inspect_ai/model/_providers/anthropic.py +152 -55
- inspect_ai/model/_providers/azureai.py +21 -21
- inspect_ai/model/_providers/bedrock.py +37 -40
- inspect_ai/model/_providers/goodfire.py +248 -0
- inspect_ai/model/_providers/google.py +46 -54
- inspect_ai/model/_providers/groq.py +7 -3
- inspect_ai/model/_providers/hf.py +6 -0
- inspect_ai/model/_providers/mistral.py +13 -12
- inspect_ai/model/_providers/openai.py +51 -218
- inspect_ai/model/_providers/openai_o1.py +11 -12
- inspect_ai/model/_providers/providers.py +23 -1
- inspect_ai/model/_providers/together.py +12 -12
- inspect_ai/model/_providers/util/__init__.py +2 -3
- inspect_ai/model/_providers/util/hf_handler.py +1 -1
- inspect_ai/model/_providers/util/llama31.py +1 -1
- inspect_ai/model/_providers/util/util.py +0 -76
- inspect_ai/model/_providers/vertex.py +1 -4
- inspect_ai/scorer/_metric.py +3 -0
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +4 -3
- inspect_ai/solver/__init__.py +4 -5
- inspect_ai/solver/_basic_agent.py +1 -1
- inspect_ai/solver/_bridge/__init__.py +3 -0
- inspect_ai/solver/_bridge/bridge.py +100 -0
- inspect_ai/solver/_bridge/patch.py +170 -0
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_solver.py +6 -0
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +12 -1
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_display.py +5 -0
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/docker/docker.py +64 -1
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
- inspect_ai/util/_sandbox/environment.py +14 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
- inspect_ai/_view/www/src/api/Types.mjs +0 -117
- inspect_ai/_view/www/src/api/api-http.mjs +0 -300
- inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
- inspect_ai/_view/www/src/api/index.mjs +0 -49
- inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
@@ -2,15 +2,15 @@ import { render } from "preact";
|
|
2
2
|
import { html } from "htm/preact";
|
3
3
|
|
4
4
|
import { App } from "./App.mjs";
|
5
|
-
import api from "./api/index
|
6
|
-
import { getVscodeApi } from "./utils/vscode
|
5
|
+
import api from "./api/index";
|
6
|
+
import { getVscodeApi } from "./utils/vscode";
|
7
7
|
import { throttle } from "./utils/sync.mjs";
|
8
8
|
|
9
9
|
// Read any state from the page itself
|
10
10
|
const vscode = getVscodeApi();
|
11
11
|
let initialState = undefined;
|
12
12
|
if (vscode) {
|
13
|
-
initialState = vscode.getState();
|
13
|
+
initialState = filterState(vscode.getState());
|
14
14
|
}
|
15
15
|
|
16
16
|
render(
|
@@ -20,9 +20,82 @@ render(
|
|
20
20
|
saveInitialState=${throttle((state) => {
|
21
21
|
const vscode = getVscodeApi();
|
22
22
|
if (vscode) {
|
23
|
-
vscode.setState(state);
|
23
|
+
vscode.setState(filterState(state));
|
24
24
|
}
|
25
25
|
}, 1000)}
|
26
26
|
/>`,
|
27
27
|
document.getElementById("app"),
|
28
28
|
);
|
29
|
+
|
30
|
+
function filterState(state) {
|
31
|
+
if (!state) {
|
32
|
+
return state;
|
33
|
+
}
|
34
|
+
|
35
|
+
// When saving state, we can't store vast amounts of data (like a large sample)
|
36
|
+
const filters = [filterLargeSample, filterLargeSelectedLog];
|
37
|
+
return filters.reduce(
|
38
|
+
(filteredState, filter) => filter(filteredState),
|
39
|
+
state,
|
40
|
+
);
|
41
|
+
}
|
42
|
+
|
43
|
+
// Filters the selected Sample if it is large
|
44
|
+
function filterLargeSample(state) {
|
45
|
+
if (!state || !state.selectedSample) {
|
46
|
+
return state;
|
47
|
+
}
|
48
|
+
|
49
|
+
const estimatedTotalSize = estimateSize(state.selectedSample.messages);
|
50
|
+
if (estimatedTotalSize > 400000) {
|
51
|
+
const { selectedSample, ...filteredState } = state; // eslint-disable-line
|
52
|
+
return filteredState;
|
53
|
+
} else {
|
54
|
+
return state;
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
// Filters the selectedlog if it is too large
|
59
|
+
function filterLargeSelectedLog(state) {
|
60
|
+
if (!state || !state.selectedLog?.contents) {
|
61
|
+
return state;
|
62
|
+
}
|
63
|
+
|
64
|
+
const estimatedSize = estimateSize(
|
65
|
+
state.selectedLog.contents.sampleSummaries,
|
66
|
+
);
|
67
|
+
if (estimatedSize > 400000) {
|
68
|
+
const { selectedLog, ...filteredState } = state; // eslint-disable-line
|
69
|
+
return filteredState;
|
70
|
+
} else {
|
71
|
+
return state;
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
function estimateSize(list, frequency = 0.2) {
|
76
|
+
if (!list || list.len === 0) {
|
77
|
+
return 0;
|
78
|
+
}
|
79
|
+
|
80
|
+
// Total number of samples
|
81
|
+
const sampleSize = Math.ceil(list.length * frequency);
|
82
|
+
|
83
|
+
// Get a proper random sample without duplicates
|
84
|
+
const messageIndices = new Set();
|
85
|
+
while (
|
86
|
+
messageIndices.size < sampleSize &&
|
87
|
+
messageIndices.size < list.length
|
88
|
+
) {
|
89
|
+
const randomIndex = Math.floor(Math.random() * list.length);
|
90
|
+
messageIndices.add(randomIndex);
|
91
|
+
}
|
92
|
+
|
93
|
+
// Calculate size from sampled messages
|
94
|
+
const totalSize = Array.from(messageIndices).reduce((size, index) => {
|
95
|
+
return size + JSON.stringify(list[index]).length;
|
96
|
+
}, 0);
|
97
|
+
|
98
|
+
// Estimate total size based on sample
|
99
|
+
const estimatedTotalSize = (totalSize / sampleSize) * list.length;
|
100
|
+
return estimatedTotalSize;
|
101
|
+
}
|
@@ -1,5 +1,12 @@
|
|
1
1
|
//@ts-check
|
2
|
-
import {
|
2
|
+
import {
|
3
|
+
EvalHeader,
|
4
|
+
EvalSummary,
|
5
|
+
LogViewAPI,
|
6
|
+
SampleSummary,
|
7
|
+
} from "../api/Types";
|
8
|
+
import { EvalLog, EvalPlan, EvalSample, EvalSpec } from "../types/log";
|
9
|
+
import { asyncJsonParse } from "../utils/json-worker";
|
3
10
|
import { AsyncQueue } from "../utils/queue.mjs";
|
4
11
|
import {
|
5
12
|
FileSizeLimitError,
|
@@ -9,42 +16,46 @@ import {
|
|
9
16
|
// don't try to load samples greater than 50mb
|
10
17
|
const MAX_BYTES = 50 * 1024 * 1024;
|
11
18
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
*/
|
19
|
+
interface SampleEntry {
|
20
|
+
sampleId: string;
|
21
|
+
epoch: number;
|
22
|
+
}
|
17
23
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
24
|
+
export interface RemoteLogFile {
|
25
|
+
readHeader: () => Promise<EvalHeader>;
|
26
|
+
readLogSummary: () => Promise<EvalSummary>;
|
27
|
+
readSample: (sampleId: string, epoch: number) => Promise<EvalSample>;
|
28
|
+
readCompleteLog: () => Promise<EvalLog>;
|
29
|
+
}
|
30
|
+
|
31
|
+
interface LogStart {
|
32
|
+
version: number;
|
33
|
+
eval: EvalSpec;
|
34
|
+
plan: EvalPlan;
|
35
|
+
}
|
25
36
|
|
26
37
|
/**
|
27
38
|
* Opens a remote log file and provides methods to read its contents.
|
28
|
-
* @param {import("../api/Types.mjs").LogViewAPI} api - The api
|
29
|
-
* @param {string} url - The URL of the remote zip file.
|
30
|
-
* @param {number} concurrency - The number of concurrent operations allowed.
|
31
|
-
* @returns {Promise<RemoteLogFile>} An object with methods to read the log file.
|
32
39
|
*/
|
33
|
-
export const openRemoteLogFile = async (
|
40
|
+
export const openRemoteLogFile = async (
|
41
|
+
api: LogViewAPI,
|
42
|
+
url: string,
|
43
|
+
concurrency: number,
|
44
|
+
): Promise<RemoteLogFile> => {
|
34
45
|
const queue = new AsyncQueue(concurrency);
|
35
46
|
const remoteZipFile = await openRemoteZipFile(
|
36
|
-
|
47
|
+
url,
|
37
48
|
api.eval_log_size,
|
38
49
|
api.eval_log_bytes,
|
39
50
|
);
|
40
51
|
|
41
52
|
/**
|
42
53
|
* Reads and parses a JSON file from the zip.
|
43
|
-
* @param {string} file - The name of the file to read.
|
44
|
-
* @param {number} [maxBytes] - the max bytes
|
45
|
-
* @returns {Promise<Object>} The parsed JSON content.
|
46
54
|
*/
|
47
|
-
const readJSONFile = async (
|
55
|
+
const readJSONFile = async (
|
56
|
+
file: string,
|
57
|
+
maxBytes?: number,
|
58
|
+
): Promise<Object> => {
|
48
59
|
try {
|
49
60
|
const data = await remoteZipFile.readFile(file, maxBytes);
|
50
61
|
const textDecoder = new TextDecoder("utf-8");
|
@@ -53,19 +64,22 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
|
|
53
64
|
} catch (error) {
|
54
65
|
if (error instanceof FileSizeLimitError) {
|
55
66
|
throw error;
|
56
|
-
} else {
|
67
|
+
} else if (error instanceof Error) {
|
57
68
|
throw new Error(
|
58
69
|
`Failed to read or parse file ${file}: ${error.message}`,
|
59
70
|
);
|
71
|
+
} else {
|
72
|
+
throw new Error(
|
73
|
+
`Failed to read or parse file ${file} - an unknown error occurred`,
|
74
|
+
);
|
60
75
|
}
|
61
76
|
}
|
62
77
|
};
|
63
78
|
|
64
79
|
/**
|
65
80
|
* Lists all samples in the zip file.
|
66
|
-
* @returns {Promise<SampleEntry[]>} An array of sample objects.
|
67
81
|
*/
|
68
|
-
const listSamples = async () => {
|
82
|
+
const listSamples = async (): Promise<SampleEntry[]> => {
|
69
83
|
return Array.from(remoteZipFile.centralDirectory.keys())
|
70
84
|
.filter(
|
71
85
|
(filename) =>
|
@@ -82,14 +96,14 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
|
|
82
96
|
|
83
97
|
/**
|
84
98
|
* Reads a specific sample file.
|
85
|
-
* @param {string} sampleId - The ID of the sample.
|
86
|
-
* @param {number} epoch - The epoch of the sample.
|
87
|
-
* @returns {Promise<Object>} The content of the sample file.
|
88
99
|
*/
|
89
|
-
const readSample = async (
|
100
|
+
const readSample = async (
|
101
|
+
sampleId: string,
|
102
|
+
epoch: number,
|
103
|
+
): Promise<EvalSample> => {
|
90
104
|
const sampleFile = `samples/${sampleId}_epoch_${epoch}.json`;
|
91
105
|
if (remoteZipFile.centralDirectory.has(sampleFile)) {
|
92
|
-
return readJSONFile(sampleFile, MAX_BYTES);
|
106
|
+
return (await readJSONFile(sampleFile, MAX_BYTES)) as EvalSample;
|
93
107
|
} else {
|
94
108
|
console.log({ dir: remoteZipFile.centralDirectory });
|
95
109
|
throw new Error(
|
@@ -100,13 +114,12 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
|
|
100
114
|
|
101
115
|
/**
|
102
116
|
* Reads the results.json file.
|
103
|
-
* @returns {Promise<Object>} The content of results.json.
|
104
117
|
*/
|
105
|
-
const readHeader = async () => {
|
118
|
+
const readHeader = async (): Promise<EvalHeader> => {
|
106
119
|
if (remoteZipFile.centralDirectory.has("header.json")) {
|
107
|
-
return readJSONFile("header.json");
|
120
|
+
return (await readJSONFile("header.json")) as EvalHeader;
|
108
121
|
} else {
|
109
|
-
const evalSpec = await readJSONFile("_journal/start.json");
|
122
|
+
const evalSpec = (await readJSONFile("_journal/start.json")) as LogStart;
|
110
123
|
return {
|
111
124
|
status: "started",
|
112
125
|
eval: evalSpec.eval,
|
@@ -117,9 +130,8 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
|
|
117
130
|
|
118
131
|
/**
|
119
132
|
* Reads individual summary files when summaries.json is not available.
|
120
|
-
* @returns {Promise<Object>} Combined summaries from individual files.
|
121
133
|
*/
|
122
|
-
const readFallbackSummaries = async () => {
|
134
|
+
const readFallbackSummaries = async (): Promise<SampleSummary[]> => {
|
123
135
|
const summaryFiles = Array.from(
|
124
136
|
remoteZipFile.centralDirectory.keys(),
|
125
137
|
).filter(
|
@@ -128,14 +140,16 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
|
|
128
140
|
filename.endsWith(".json"),
|
129
141
|
);
|
130
142
|
|
131
|
-
const summaries = [];
|
132
|
-
const errors = [];
|
143
|
+
const summaries: SampleSummary[] = [];
|
144
|
+
const errors: unknown[] = [];
|
133
145
|
|
134
146
|
await Promise.all(
|
135
147
|
summaryFiles.map((filename) =>
|
136
148
|
queue.enqueue(async () => {
|
137
149
|
try {
|
138
|
-
const partialSummary = await readJSONFile(
|
150
|
+
const partialSummary = (await readJSONFile(
|
151
|
+
filename,
|
152
|
+
)) as SampleSummary[];
|
139
153
|
summaries.push(...partialSummary);
|
140
154
|
} catch (error) {
|
141
155
|
errors.push(error);
|
@@ -156,11 +170,10 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
|
|
156
170
|
|
157
171
|
/**
|
158
172
|
* Reads all summaries, falling back to individual files if necessary.
|
159
|
-
* @returns {Promise<Object>} All summaries.
|
160
173
|
*/
|
161
|
-
const readSampleSummaries = async () => {
|
174
|
+
const readSampleSummaries = async (): Promise<SampleSummary[]> => {
|
162
175
|
if (remoteZipFile.centralDirectory.has("summaries.json")) {
|
163
|
-
return await readJSONFile("summaries.json");
|
176
|
+
return (await readJSONFile("summaries.json")) as SampleSummary[];
|
164
177
|
} else {
|
165
178
|
return readFallbackSummaries();
|
166
179
|
}
|
@@ -187,14 +200,17 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
|
|
187
200
|
readSample,
|
188
201
|
/**
|
189
202
|
* Reads the complete log file.
|
190
|
-
* @returns {Promise<import("../types/log").EvalLog>} The complete log data.
|
191
203
|
*/
|
192
|
-
readCompleteLog: async () => {
|
204
|
+
readCompleteLog: async (): Promise<EvalLog> => {
|
193
205
|
const [evalLog, samples] = await Promise.all([
|
194
206
|
readHeader(),
|
195
207
|
listSamples().then((sampleIds) =>
|
196
208
|
Promise.all(
|
197
|
-
sampleIds.map(({ sampleId, epoch }) =>
|
209
|
+
sampleIds.map(({ sampleId, epoch }) =>
|
210
|
+
readSample(sampleId, epoch).then(
|
211
|
+
(sample) => sample as EvalSample,
|
212
|
+
),
|
213
|
+
),
|
198
214
|
),
|
199
215
|
),
|
200
216
|
]);
|
@@ -18,7 +18,8 @@ import { SecondaryBar } from "./SecondaryBar.mjs";
|
|
18
18
|
* @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults
|
19
19
|
* @param {import("../types/log").EvalPlan} [props.evalPlan] - The EvalSpec
|
20
20
|
* @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats
|
21
|
-
* @param {import("../
|
21
|
+
* @param {import("../samples/SamplesDescriptor.mjs").EvalDescriptor} [props.evalDescriptor] - The EvalDescriptor
|
22
|
+
* @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
|
22
23
|
* @param {string} [props.status] - the status
|
23
24
|
* @param {boolean} props.offcanvas - Are we in offcanvas mode?
|
24
25
|
* @param {boolean} props.showToggle - Should we show the toggle?
|
@@ -32,6 +33,7 @@ export const Navbar = ({
|
|
32
33
|
evalResults,
|
33
34
|
evalStats,
|
34
35
|
samples,
|
36
|
+
evalDescriptor,
|
35
37
|
showToggle,
|
36
38
|
offcanvas,
|
37
39
|
status,
|
@@ -182,6 +184,7 @@ export const Navbar = ({
|
|
182
184
|
evalResults=${evalResults}
|
183
185
|
evalStats=${evalStats}
|
184
186
|
samples=${samples}
|
187
|
+
evalDescriptor=${evalDescriptor}
|
185
188
|
status=${status}
|
186
189
|
style=${{ gridColumn: "1/-1" }}
|
187
190
|
/>
|
@@ -3,6 +3,7 @@ import { html } from "htm/preact";
|
|
3
3
|
import { LabeledValue } from "../components/LabeledValue.mjs";
|
4
4
|
import { formatDataset, formatDuration } from "../utils/Format.mjs";
|
5
5
|
import { ExpandablePanel } from "../components/ExpandablePanel.mjs";
|
6
|
+
import { scoreFilterItems } from "../samples/tools/filters.mjs";
|
6
7
|
|
7
8
|
/**
|
8
9
|
* Renders the Navbar
|
@@ -12,7 +13,8 @@ import { ExpandablePanel } from "../components/ExpandablePanel.mjs";
|
|
12
13
|
* @param {import("../types/log").EvalPlan} [props.evalPlan] - The EvalSpec
|
13
14
|
* @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults
|
14
15
|
* @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats
|
15
|
-
* @param {import("../
|
16
|
+
* @param {import("../samples/SamplesDescriptor.mjs").EvalDescriptor} [props.evalDescriptor] - The EvalDescriptor
|
17
|
+
* @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
|
16
18
|
* @param {string} [props.status] - the status
|
17
19
|
* @param {Map<string, string>} [props.style] - is this off canvas
|
18
20
|
*
|
@@ -24,6 +26,7 @@ export const SecondaryBar = ({
|
|
24
26
|
evalResults,
|
25
27
|
evalStats,
|
26
28
|
samples,
|
29
|
+
evalDescriptor,
|
27
30
|
status,
|
28
31
|
style,
|
29
32
|
}) => {
|
@@ -60,8 +63,8 @@ export const SecondaryBar = ({
|
|
60
63
|
values.push({
|
61
64
|
size: "minmax(12%, auto)",
|
62
65
|
value: html`<${LabeledValue} label="${label}" style=${staticColStyle} style=${{ justifySelf: hasConfig ? "left" : "center" }}>
|
63
|
-
<${ScorerSummary}
|
64
|
-
|
66
|
+
<${ScorerSummary}
|
67
|
+
evalDescriptor=${evalDescriptor} />
|
65
68
|
</${LabeledValue}>`,
|
66
69
|
});
|
67
70
|
|
@@ -124,17 +127,23 @@ const DatasetSummary = ({ dataset, samples, epochs, style }) => {
|
|
124
127
|
`;
|
125
128
|
};
|
126
129
|
|
127
|
-
const ScorerSummary = ({
|
128
|
-
if (!
|
130
|
+
const ScorerSummary = ({ evalDescriptor }) => {
|
131
|
+
if (!evalDescriptor) {
|
129
132
|
return "";
|
130
133
|
}
|
131
134
|
|
132
|
-
const
|
133
|
-
scorers.forEach((scorer) => {
|
134
|
-
uniqScorers.add(scorer.name);
|
135
|
-
});
|
135
|
+
const items = scoreFilterItems(evalDescriptor);
|
136
136
|
|
137
|
-
return
|
137
|
+
return html`
|
138
|
+
<span style=${{ position: "relative" }}>
|
139
|
+
${Array.from(items).map(
|
140
|
+
(item, index) => html`
|
141
|
+
${index > 0 ? ", " : ""}
|
142
|
+
<span title=${item.tooltip}>${item.canonicalName}</span>
|
143
|
+
`,
|
144
|
+
)}
|
145
|
+
</span>
|
146
|
+
`;
|
138
147
|
};
|
139
148
|
|
140
149
|
/**
|
@@ -1,5 +1,5 @@
|
|
1
1
|
import { html } from "htm/preact";
|
2
|
-
import { useCallback, useMemo } from "preact/hooks";
|
2
|
+
import { useCallback, useMemo, useRef } from "preact/hooks";
|
3
3
|
|
4
4
|
import { ApplicationIcons } from "../appearance/Icons.mjs";
|
5
5
|
import { LargeModal } from "../components/LargeModal.mjs";
|
@@ -43,6 +43,8 @@ export const SampleDialog = ({
|
|
43
43
|
sampleScrollPositionRef,
|
44
44
|
setSampleScrollPosition,
|
45
45
|
}) => {
|
46
|
+
const scrollRef = useRef(/** @type {HTMLElement|null} */ (null));
|
47
|
+
|
46
48
|
const tools = useMemo(() => {
|
47
49
|
const nextTool = {
|
48
50
|
label: "Next Sample",
|
@@ -94,6 +96,7 @@ export const SampleDialog = ({
|
|
94
96
|
sampleDescriptor=${sampleDescriptor}
|
95
97
|
selectedTab=${selectedTab}
|
96
98
|
setSelectedTab=${setSelectedTab}
|
99
|
+
scrollRef=${scrollRef}
|
97
100
|
/>`;
|
98
101
|
}, [id, sample, sampleDescriptor, selectedTab, setSelectedTab, sampleError]);
|
99
102
|
|
@@ -113,6 +116,7 @@ export const SampleDialog = ({
|
|
113
116
|
showProgress=${sampleStatus === "loading"}
|
114
117
|
initialScrollPositionRef=${sampleScrollPositionRef}
|
115
118
|
setInitialScrollPosition=${setSampleScrollPosition}
|
119
|
+
scrollRef=${scrollRef}
|
116
120
|
>
|
117
121
|
${children}
|
118
122
|
</${LargeModal}>`;
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import { html } from "htm/preact";
|
2
2
|
|
3
|
-
import {
|
3
|
+
import { ChatViewVirtualList } from "../components/ChatView.mjs";
|
4
4
|
import { MetaDataView } from "../components/MetaDataView.mjs";
|
5
5
|
import { TabSet, TabPanel } from "../components/TabSet.mjs";
|
6
6
|
|
@@ -47,6 +47,7 @@ import {
|
|
47
47
|
* @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - the sample descriptor
|
48
48
|
* @param {string} props.selectedTab - The selected tab
|
49
49
|
* @param {(tab: string) => void} props.setSelectedTab - function to set the selected tab
|
50
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable element whic contains this display
|
50
51
|
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
51
52
|
*/
|
52
53
|
export const InlineSampleDisplay = ({
|
@@ -57,6 +58,7 @@ export const InlineSampleDisplay = ({
|
|
57
58
|
sampleDescriptor,
|
58
59
|
selectedTab,
|
59
60
|
setSelectedTab,
|
61
|
+
scrollRef,
|
60
62
|
}) => {
|
61
63
|
return html`<div style=${{ flexDirection: "row", width: "100%" }}>
|
62
64
|
<${ProgressBar}
|
@@ -77,6 +79,7 @@ export const InlineSampleDisplay = ({
|
|
77
79
|
sampleDescriptor=${sampleDescriptor}
|
78
80
|
selectedTab=${selectedTab}
|
79
81
|
setSelectedTab=${setSelectedTab}
|
82
|
+
scrollRef=${scrollRef}
|
80
83
|
/>`}
|
81
84
|
</div>
|
82
85
|
</div>`;
|
@@ -91,6 +94,7 @@ export const InlineSampleDisplay = ({
|
|
91
94
|
* @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - the sample descriptor
|
92
95
|
* @param {string} props.selectedTab - The selected tab
|
93
96
|
* @param {(tab: string) => void} props.setSelectedTab - function to set the selected tab
|
97
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
|
94
98
|
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
95
99
|
*/
|
96
100
|
export const SampleDisplay = ({
|
@@ -99,6 +103,7 @@ export const SampleDisplay = ({
|
|
99
103
|
sampleDescriptor,
|
100
104
|
selectedTab,
|
101
105
|
setSelectedTab,
|
106
|
+
scrollRef,
|
102
107
|
}) => {
|
103
108
|
// Tab ids
|
104
109
|
const baseId = `sample-dialog`;
|
@@ -120,13 +125,14 @@ export const SampleDisplay = ({
|
|
120
125
|
html`
|
121
126
|
<${TabPanel} id=${kSampleMessagesTabId} classes="sample-tab" title="Messages" onSelected=${onSelectedTab} selected=${
|
122
127
|
selectedTab === kSampleMessagesTabId
|
123
|
-
}>
|
124
|
-
<${
|
128
|
+
} scrollable=${false} style=${{ width: "100%" }}>
|
129
|
+
<${ChatViewVirtualList}
|
125
130
|
key=${`${baseId}-chat-${id}`}
|
126
131
|
id=${`${baseId}-chat-${id}`}
|
127
132
|
messages=${sample.messages}
|
128
|
-
style=${{
|
133
|
+
style=${{ marginLeft: ".8em", marginTop: "1em" }}
|
129
134
|
indented=${true}
|
135
|
+
scrollRef=${scrollRef}
|
130
136
|
/>
|
131
137
|
</${TabPanel}>`,
|
132
138
|
];
|
@@ -136,7 +142,7 @@ export const SampleDisplay = ({
|
|
136
142
|
<${TabPanel} id=${kSampleTranscriptTabId} classes="sample-tab" title="Transcript" onSelected=${onSelectedTab} selected=${
|
137
143
|
selectedTab === kSampleTranscriptTabId || selectedTab === undefined
|
138
144
|
} scrollable=${false}>
|
139
|
-
<${SampleTranscript} key=${`${baseId}-transcript-display-${id}`} id=${`${baseId}-transcript-display-${id}`} evalEvents=${sample.events}/>
|
145
|
+
<${SampleTranscript} key=${`${baseId}-transcript-display-${id}`} id=${`${baseId}-transcript-display-${id}`} evalEvents=${sample.events} scrollRef=${scrollRef}/>
|
140
146
|
</${TabPanel}>`);
|
141
147
|
}
|
142
148
|
|
@@ -201,16 +207,18 @@ export const SampleDisplay = ({
|
|
201
207
|
);
|
202
208
|
}
|
203
209
|
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
210
|
+
if (sample.messages.length < 100) {
|
211
|
+
tabs.push(html`<${TabPanel}
|
212
|
+
id=${kSampleJsonTabId}
|
213
|
+
classes="sample-tab"
|
214
|
+
title="JSON"
|
215
|
+
onSelected=${onSelectedTab}
|
216
|
+
selected=${selectedTab === kSampleJsonTabId}>
|
217
|
+
<div style=${{ paddingLeft: "0.8em", marginTop: "0.4em" }}>
|
218
|
+
<${JSONPanel} data=${sample} simple=${true}/>
|
219
|
+
</div>
|
220
|
+
</${TabPanel}>`);
|
221
|
+
}
|
214
222
|
|
215
223
|
const tabsetId = `task-sample-details-tab-${id}`;
|
216
224
|
const targetId = `${tabsetId}-content`;
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import { html } from "htm/preact";
|
2
|
-
import { useCallback, useState } from "preact/hooks";
|
3
|
-
import { useEffect,
|
2
|
+
import { useCallback, useMemo, useState } from "preact/hooks";
|
3
|
+
import { useEffect, useRef } from "preact/hooks";
|
4
4
|
|
5
5
|
import { ApplicationStyles } from "../appearance/Styles.mjs";
|
6
6
|
import { FontSize } from "../appearance/Fonts.mjs";
|
@@ -56,57 +56,28 @@ export const SampleList = (props) => {
|
|
56
56
|
setHidden(false);
|
57
57
|
}, [items]);
|
58
58
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
values.length > 0 ? values[values.length - 1] : undefined;
|
69
|
-
const start =
|
70
|
-
previous === undefined ? 0 : previous.start + previous.height;
|
71
|
-
values.push({
|
72
|
-
index,
|
73
|
-
height,
|
74
|
-
start,
|
75
|
-
});
|
76
|
-
return values;
|
77
|
-
}, []);
|
59
|
+
// Keep a mapping of the indexes to items (skipping separators)
|
60
|
+
const itemRowMapping = useMemo(() => {
|
61
|
+
const rowIndexes = [];
|
62
|
+
items.forEach((item, index) => {
|
63
|
+
if (item.type === "sample") {
|
64
|
+
rowIndexes.push(index);
|
65
|
+
}
|
66
|
+
});
|
67
|
+
return rowIndexes;
|
78
68
|
}, [items]);
|
79
69
|
|
70
|
+
const prevSelectedIndexRef = useRef(null);
|
80
71
|
useEffect(() => {
|
81
72
|
const listEl = listRef.current;
|
82
73
|
if (listEl) {
|
83
|
-
|
84
|
-
const
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
const scrollTop = listEl.base.scrollTop;
|
90
|
-
const scrollBottom = scrollTop + listEl.base.offsetHeight;
|
91
|
-
|
92
|
-
// It is visible
|
93
|
-
if (itemTop >= scrollTop && itemBottom <= scrollBottom) {
|
94
|
-
return;
|
95
|
-
}
|
96
|
-
|
97
|
-
if (itemTop < scrollTop) {
|
98
|
-
// Top is scrolled off
|
99
|
-
listEl.base.scrollTo({ top: itemTop });
|
100
|
-
return;
|
101
|
-
}
|
102
|
-
|
103
|
-
if (itemBottom > scrollBottom) {
|
104
|
-
listEl.base.scrollTo({ top: itemBottom - listEl.base.offsetHeight });
|
105
|
-
return;
|
106
|
-
}
|
107
|
-
}
|
74
|
+
const actualRowIndex = itemRowMapping[selectedIndex];
|
75
|
+
const direction =
|
76
|
+
actualRowIndex > prevSelectedIndexRef.current ? "down" : "up";
|
77
|
+
listRef.current?.scrollToIndex(actualRowIndex, direction);
|
78
|
+
prevSelectedIndexRef.current = actualRowIndex;
|
108
79
|
}
|
109
|
-
}, [selectedIndex,
|
80
|
+
}, [selectedIndex, listRef, itemRowMapping]);
|
110
81
|
|
111
82
|
/** @param {import("./SamplesTab.mjs").ListItem} item */
|
112
83
|
const renderRow = (item) => {
|
@@ -254,7 +225,6 @@ export const SampleList = (props) => {
|
|
254
225
|
tabIndex="0"
|
255
226
|
renderRow=${renderRow}
|
256
227
|
onkeydown=${onkeydown}
|
257
|
-
rowMap=${rowMap}
|
258
228
|
style=${listStyle}
|
259
229
|
/>
|
260
230
|
${footerRow}
|
@@ -282,7 +252,7 @@ const SeparatorRow = ({ id, title, height }) => {
|
|
282
252
|
* @param {Object} props - The parameters for the component.
|
283
253
|
* @param {string} props.id - The unique identifier for the sample.
|
284
254
|
* @param {number} props.index - The index of the sample.
|
285
|
-
* @param {import("../api/Types.
|
255
|
+
* @param {import("../api/Types.ts").SampleSummary} props.sample - The sample.
|
286
256
|
* @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor.
|
287
257
|
* @param {number} props.height - The height of the sample row.
|
288
258
|
* @param {boolean} props.selected - Whether the sample is selected.
|
@@ -2,7 +2,7 @@ import { html } from "htm/preact";
|
|
2
2
|
|
3
3
|
/**
|
4
4
|
* @param {Object} props
|
5
|
-
* @param {import("../api/Types.
|
5
|
+
* @param {import("../api/Types.ts").SampleSummary} props.sample
|
6
6
|
* @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor
|
7
7
|
* @param {string} props.scorer
|
8
8
|
* @returns {import("preact").JSX.Element}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
// @ts-check
|
2
2
|
import { html } from "htm/preact";
|
3
|
-
import {
|
3
|
+
import { TranscriptVirtualList } from "./transcript/TranscriptView.mjs";
|
4
4
|
|
5
5
|
/**
|
6
6
|
* Renders the SampleTranscript component.
|
@@ -8,8 +8,13 @@ import { TranscriptView } from "./transcript/TranscriptView.mjs";
|
|
8
8
|
* @param {Object} props - The parameters for the component.
|
9
9
|
* @param {string} props.id - The id of this component
|
10
10
|
* @param {import("../types/log").Events} props.evalEvents - The transcript to display.
|
11
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
|
11
12
|
* @returns {import("preact").JSX.Element} The SampleTranscript component.
|
12
13
|
*/
|
13
|
-
export const SampleTranscript = ({ id, evalEvents }) => {
|
14
|
-
return html`<${
|
14
|
+
export const SampleTranscript = ({ id, evalEvents, scrollRef }) => {
|
15
|
+
return html`<${TranscriptVirtualList}
|
16
|
+
id=${id}
|
17
|
+
events=${evalEvents}
|
18
|
+
scrollRef=${scrollRef}
|
19
|
+
/>`;
|
15
20
|
};
|