inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +3 -1
- inspect_ai/_cli/eval.py +15 -9
- inspect_ai/_display/core/active.py +4 -1
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +0 -5
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +79 -12
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/eval.py +10 -1
- inspect_ai/_eval/loader.py +79 -19
- inspect_ai/_eval/registry.py +6 -0
- inspect_ai/_eval/score.py +3 -1
- inspect_ai/_eval/task/results.py +51 -22
- inspect_ai/_eval/task/run.py +47 -13
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25498 -2044
- inspect_ai/_view/www/log-schema.json +32 -2
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +14 -16
- inspect_ai/_view/www/src/Types.mjs +1 -2
- inspect_ai/_view/www/src/api/Types.ts +133 -0
- inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
- inspect_ai/_view/www/src/api/api-http.ts +219 -0
- inspect_ai/_view/www/src/api/api-shared.ts +47 -0
- inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
- inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
- inspect_ai/_view/www/src/api/index.ts +51 -0
- inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +77 -4
- inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
- inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
- inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +13 -2
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
- inspect_ai/_view/www/src/utils/vscode.ts +36 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_human/manager.py +1 -1
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/log/_log.py +1 -1
- inspect_ai/log/_samples.py +16 -0
- inspect_ai/log/_transcript.py +4 -1
- inspect_ai/model/_call_tools.py +59 -0
- inspect_ai/model/_conversation.py +16 -7
- inspect_ai/model/_generate_config.py +12 -12
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +22 -2
- inspect_ai/model/_openai.py +383 -0
- inspect_ai/model/_providers/anthropic.py +152 -55
- inspect_ai/model/_providers/azureai.py +21 -21
- inspect_ai/model/_providers/bedrock.py +37 -40
- inspect_ai/model/_providers/goodfire.py +248 -0
- inspect_ai/model/_providers/google.py +46 -54
- inspect_ai/model/_providers/groq.py +7 -3
- inspect_ai/model/_providers/hf.py +6 -0
- inspect_ai/model/_providers/mistral.py +13 -12
- inspect_ai/model/_providers/openai.py +51 -218
- inspect_ai/model/_providers/openai_o1.py +11 -12
- inspect_ai/model/_providers/providers.py +23 -1
- inspect_ai/model/_providers/together.py +12 -12
- inspect_ai/model/_providers/util/__init__.py +2 -3
- inspect_ai/model/_providers/util/hf_handler.py +1 -1
- inspect_ai/model/_providers/util/llama31.py +1 -1
- inspect_ai/model/_providers/util/util.py +0 -76
- inspect_ai/model/_providers/vertex.py +1 -4
- inspect_ai/scorer/_metric.py +3 -0
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +4 -3
- inspect_ai/solver/__init__.py +4 -5
- inspect_ai/solver/_basic_agent.py +1 -1
- inspect_ai/solver/_bridge/__init__.py +3 -0
- inspect_ai/solver/_bridge/bridge.py +100 -0
- inspect_ai/solver/_bridge/patch.py +170 -0
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_solver.py +6 -0
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +12 -1
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_display.py +5 -0
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/docker/docker.py +64 -1
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
- inspect_ai/util/_sandbox/environment.py +14 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
- inspect_ai/_view/www/src/api/Types.mjs +0 -117
- inspect_ai/_view/www/src/api/api-http.mjs +0 -300
- inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
- inspect_ai/_view/www/src/api/index.mjs +0 -49
- inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
@@ -1137,6 +1137,7 @@
|
|
1137
1137
|
"logprobs": null,
|
1138
1138
|
"top_logprobs": null,
|
1139
1139
|
"parallel_tool_calls": null,
|
1140
|
+
"internal_tools": null,
|
1140
1141
|
"max_tool_output": null,
|
1141
1142
|
"cache_prompt": null,
|
1142
1143
|
"reasoning_effort": null
|
@@ -1516,7 +1517,8 @@
|
|
1516
1517
|
"time",
|
1517
1518
|
"message",
|
1518
1519
|
"token",
|
1519
|
-
"operator"
|
1520
|
+
"operator",
|
1521
|
+
"custom"
|
1520
1522
|
],
|
1521
1523
|
"title": "Type",
|
1522
1524
|
"type": "string"
|
@@ -2190,6 +2192,18 @@
|
|
2190
2192
|
"default": null,
|
2191
2193
|
"title": "Parallel Tool Calls"
|
2192
2194
|
},
|
2195
|
+
"internal_tools": {
|
2196
|
+
"anyOf": [
|
2197
|
+
{
|
2198
|
+
"type": "boolean"
|
2199
|
+
},
|
2200
|
+
{
|
2201
|
+
"type": "null"
|
2202
|
+
}
|
2203
|
+
],
|
2204
|
+
"default": null,
|
2205
|
+
"title": "Internal Tools"
|
2206
|
+
},
|
2193
2207
|
"max_tool_output": {
|
2194
2208
|
"anyOf": [
|
2195
2209
|
{
|
@@ -2258,6 +2272,7 @@
|
|
2258
2272
|
"logprobs",
|
2259
2273
|
"top_logprobs",
|
2260
2274
|
"parallel_tool_calls",
|
2275
|
+
"internal_tools",
|
2261
2276
|
"max_tool_output",
|
2262
2277
|
"cache_prompt",
|
2263
2278
|
"reasoning_effort"
|
@@ -2681,6 +2696,18 @@
|
|
2681
2696
|
"output": {
|
2682
2697
|
"$ref": "#/$defs/ModelOutput"
|
2683
2698
|
},
|
2699
|
+
"error": {
|
2700
|
+
"anyOf": [
|
2701
|
+
{
|
2702
|
+
"type": "string"
|
2703
|
+
},
|
2704
|
+
{
|
2705
|
+
"type": "null"
|
2706
|
+
}
|
2707
|
+
],
|
2708
|
+
"default": null,
|
2709
|
+
"title": "Error"
|
2710
|
+
},
|
2684
2711
|
"cache": {
|
2685
2712
|
"anyOf": [
|
2686
2713
|
{
|
@@ -2719,6 +2746,7 @@
|
|
2719
2746
|
"tool_choice",
|
2720
2747
|
"config",
|
2721
2748
|
"output",
|
2749
|
+
"error",
|
2722
2750
|
"cache",
|
2723
2751
|
"call"
|
2724
2752
|
],
|
@@ -3066,7 +3094,8 @@
|
|
3066
3094
|
"message",
|
3067
3095
|
"time",
|
3068
3096
|
"token",
|
3069
|
-
"operator"
|
3097
|
+
"operator",
|
3098
|
+
"custom"
|
3070
3099
|
],
|
3071
3100
|
"title": "Type",
|
3072
3101
|
"type": "string"
|
@@ -4207,6 +4236,7 @@
|
|
4207
4236
|
"best_of": null,
|
4208
4237
|
"cache_prompt": null,
|
4209
4238
|
"frequency_penalty": null,
|
4239
|
+
"internal_tools": null,
|
4210
4240
|
"logit_bias": null,
|
4211
4241
|
"logprobs": null,
|
4212
4242
|
"max_connections": null,
|
@@ -30,8 +30,10 @@
|
|
30
30
|
"bootstrap": "^5.3.3",
|
31
31
|
"bootstrap-icons": "^1.11.3",
|
32
32
|
"clipboard": "^2.0.11",
|
33
|
+
"codemirror": "^6.0.1",
|
33
34
|
"fast-json-patch": "^3.1.1",
|
34
35
|
"fflate": "^0.8.2",
|
36
|
+
"filtrex": "^3.1.0",
|
35
37
|
"htm": "^3.1.1",
|
36
38
|
"json": "^11.0.0",
|
37
39
|
"json5": "^2.2.3",
|
inspect_ai/_view/www/src/App.mjs
CHANGED
@@ -30,7 +30,7 @@ import { Sidebar } from "./sidebar/Sidebar.mjs";
|
|
30
30
|
import { WorkSpace } from "./workspace/WorkSpace.mjs";
|
31
31
|
import { FindBand } from "./components/FindBand.mjs";
|
32
32
|
import { isVscode } from "./utils/Html.mjs";
|
33
|
-
import { getVscodeApi } from "./utils/vscode
|
33
|
+
import { getVscodeApi } from "./utils/vscode";
|
34
34
|
import { kDefaultSort } from "./constants.mjs";
|
35
35
|
import {
|
36
36
|
createEvalDescriptor,
|
@@ -38,7 +38,7 @@ import {
|
|
38
38
|
} from "./samples/SamplesDescriptor.mjs";
|
39
39
|
import { byEpoch, bySample, sortSamples } from "./samples/tools/SortFilter.mjs";
|
40
40
|
import { resolveAttachments } from "./utils/attachments.mjs";
|
41
|
-
import {
|
41
|
+
import { filterSamples } from "./samples/tools/filters.mjs";
|
42
42
|
|
43
43
|
import {
|
44
44
|
kEvalWorkspaceTabId,
|
@@ -51,7 +51,7 @@ import {
|
|
51
51
|
* Renders the Main Application
|
52
52
|
*
|
53
53
|
* @param {Object} props - The parameters for the component.
|
54
|
-
* @param {import("./api/Types.
|
54
|
+
* @param {import("./api/Types.ts").ClientAPI} props.api - The api that this view should use
|
55
55
|
* @param {Object} [props.initialState] - Initial state for app (optional, used by VS Code extension)
|
56
56
|
* @param {(state: Object) => void} [props.saveInitialState] - Save initial state for app (optional, used by VS Code extension)
|
57
57
|
* @param {boolean} props.pollForLogs - Whether the application should poll for log changes
|
@@ -308,21 +308,19 @@ export function App({
|
|
308
308
|
|
309
309
|
useEffect(() => {
|
310
310
|
const samples = selectedLog?.contents?.sampleSummaries || [];
|
311
|
-
const
|
311
|
+
const { result: prefiltered } = filterSamples(
|
312
|
+
evalDescriptor,
|
313
|
+
samples,
|
314
|
+
filter?.value,
|
315
|
+
);
|
316
|
+
const filtered = prefiltered.filter((sample) => {
|
312
317
|
// Filter by epoch if specified
|
313
318
|
if (epoch && epoch !== "all") {
|
314
319
|
if (epoch !== sample.epoch + "") {
|
315
320
|
return false;
|
316
321
|
}
|
317
322
|
}
|
318
|
-
|
319
|
-
// Apply the filter
|
320
|
-
const filterFn = filterFnForType(filter);
|
321
|
-
if (filterFn && filter.value) {
|
322
|
-
return filterFn(samplesDescriptor, sample, filter.value);
|
323
|
-
} else {
|
324
|
-
return true;
|
325
|
-
}
|
323
|
+
return true;
|
326
324
|
});
|
327
325
|
|
328
326
|
// Sort the samples
|
@@ -509,12 +507,12 @@ export function App({
|
|
509
507
|
* Determines whether the workspace tab should display samples or info,
|
510
508
|
* depending on the presence of samples and the log status.
|
511
509
|
*
|
512
|
-
* @param {import("./api/Types.
|
510
|
+
* @param {import("./api/Types.ts").EvalSummary} log - The log object containing sample summaries and status.
|
513
511
|
* @returns {void}
|
514
512
|
*/
|
515
513
|
const resetWorkspace = useCallback(
|
516
514
|
/**
|
517
|
-
* @param {import("./api/Types.
|
515
|
+
* @param {import("./api/Types.ts").EvalSummary} log
|
518
516
|
*/
|
519
517
|
(log) => {
|
520
518
|
// Reset the workspace tab
|
@@ -961,7 +959,7 @@ export function App({
|
|
961
959
|
/**
|
962
960
|
* Determines the default scorer for a log
|
963
961
|
*
|
964
|
-
* @param {import("./api/Types.
|
962
|
+
* @param {import("./api/Types.ts").EvalSummary} log - The log object containing sample summaries and status.
|
965
963
|
* @returns {{name: string, scorer: string} | undefined} A scorer object with name and scorer properties, or undefined
|
966
964
|
*/
|
967
965
|
const defaultScorer = (log) => {
|
@@ -983,7 +981,7 @@ const defaultScorer = (log) => {
|
|
983
981
|
/**
|
984
982
|
* Determines the default scorers for a log
|
985
983
|
*
|
986
|
-
* @param {import("./api/Types.
|
984
|
+
* @param {import("./api/Types.ts").EvalSummary} log - The log object containing sample summaries and status.
|
987
985
|
* @returns {Array<{name: string, scorer: string}>} An array of scorer objects with name and scorer properties, or an empty array if no scorers are found.
|
988
986
|
*/
|
989
987
|
const defaultScorers = (log) => {
|
@@ -7,7 +7,7 @@
|
|
7
7
|
/**
|
8
8
|
* @typedef {Object} CurrentLog
|
9
9
|
* @property {string} name
|
10
|
-
* @property {import("./api/Types.
|
10
|
+
* @property {import("./api/Types.ts").EvalSummary} contents
|
11
11
|
*/
|
12
12
|
|
13
13
|
/**
|
@@ -25,7 +25,6 @@
|
|
25
25
|
/**
|
26
26
|
* @typedef {Object} ScoreFilter
|
27
27
|
* @property {string} [value]
|
28
|
-
* @property {string} [type]
|
29
28
|
*/
|
30
29
|
|
31
30
|
/**
|
@@ -0,0 +1,133 @@
|
|
1
|
+
import {
|
2
|
+
Version,
|
3
|
+
Status,
|
4
|
+
EvalSpec,
|
5
|
+
EvalPlan,
|
6
|
+
EvalResults,
|
7
|
+
EvalStats,
|
8
|
+
EvalError,
|
9
|
+
Input,
|
10
|
+
Target,
|
11
|
+
Scores1,
|
12
|
+
Type11,
|
13
|
+
EvalLog,
|
14
|
+
EvalSample,
|
15
|
+
} from "../types/log";
|
16
|
+
|
17
|
+
export interface EvalSummary {
|
18
|
+
version?: Version;
|
19
|
+
status?: Status;
|
20
|
+
eval: EvalSpec;
|
21
|
+
plan?: EvalPlan;
|
22
|
+
results?: EvalResults | null;
|
23
|
+
stats?: EvalStats;
|
24
|
+
error?: EvalError | null;
|
25
|
+
sampleSummaries: SampleSummary[];
|
26
|
+
}
|
27
|
+
|
28
|
+
export interface EvalLogHeader {
|
29
|
+
version?: Version;
|
30
|
+
status?: Status;
|
31
|
+
eval: EvalSpec;
|
32
|
+
plan?: EvalPlan;
|
33
|
+
results?: EvalResults;
|
34
|
+
stats?: EvalStats;
|
35
|
+
error?: EvalError;
|
36
|
+
}
|
37
|
+
|
38
|
+
export interface SampleSummary {
|
39
|
+
id: number | string;
|
40
|
+
epoch: number;
|
41
|
+
input: Input;
|
42
|
+
target: Target;
|
43
|
+
scores: Scores1;
|
44
|
+
error?: string;
|
45
|
+
limit?: Type11;
|
46
|
+
}
|
47
|
+
|
48
|
+
export interface BasicSampleData {
|
49
|
+
id: number | string;
|
50
|
+
epoch: number;
|
51
|
+
target: Target;
|
52
|
+
scores: Scores1;
|
53
|
+
}
|
54
|
+
|
55
|
+
export interface Capabilities {
|
56
|
+
downloadFiles: boolean;
|
57
|
+
webWorkers: boolean;
|
58
|
+
}
|
59
|
+
|
60
|
+
export interface LogViewAPI {
|
61
|
+
client_events: () => Promise<any[]>;
|
62
|
+
eval_logs: () => Promise<LogFiles | undefined>;
|
63
|
+
eval_log: (
|
64
|
+
log_file: string,
|
65
|
+
headerOnly?: number,
|
66
|
+
capabilities?: Capabilities,
|
67
|
+
) => Promise<LogContents>;
|
68
|
+
eval_log_size: (log_file: string) => Promise<number>;
|
69
|
+
eval_log_bytes: (
|
70
|
+
log_file: string,
|
71
|
+
start: number,
|
72
|
+
end: number,
|
73
|
+
) => Promise<Uint8Array>;
|
74
|
+
eval_log_headers: (log_files: string[]) => Promise<EvalLog[]>;
|
75
|
+
download_file: (
|
76
|
+
filename: string,
|
77
|
+
filecontents: string | Blob | ArrayBuffer | ArrayBufferView,
|
78
|
+
) => Promise<void>;
|
79
|
+
open_log_file: (logFile: string, log_dir: string) => Promise<void>;
|
80
|
+
}
|
81
|
+
|
82
|
+
export interface ClientAPI {
|
83
|
+
client_events: () => Promise<string[]>;
|
84
|
+
get_log_paths: () => Promise<LogFiles>;
|
85
|
+
get_log_headers: (log_files: string[]) => Promise<EvalLog[]>;
|
86
|
+
get_log_summary: (log_file: string) => Promise<EvalSummary>;
|
87
|
+
get_log_sample: (
|
88
|
+
log_file: string,
|
89
|
+
id: string | number,
|
90
|
+
epoch: number,
|
91
|
+
) => Promise<EvalSample | undefined>;
|
92
|
+
download_file: (
|
93
|
+
file_name: string,
|
94
|
+
file_contents: string | Blob | ArrayBuffer | ArrayBufferView,
|
95
|
+
) => Promise<void>;
|
96
|
+
open_log_file: (log_file: string, log_dir: string) => Promise<void>;
|
97
|
+
}
|
98
|
+
|
99
|
+
export interface FetchResponse {
|
100
|
+
raw: string;
|
101
|
+
parsed: Record<string, any>;
|
102
|
+
}
|
103
|
+
|
104
|
+
export interface EvalHeader {
|
105
|
+
version?: Version;
|
106
|
+
status?: Status;
|
107
|
+
eval: EvalSpec;
|
108
|
+
plan?: EvalPlan;
|
109
|
+
results?: EvalResults | null;
|
110
|
+
stats?: EvalStats;
|
111
|
+
error?: EvalError | null;
|
112
|
+
}
|
113
|
+
|
114
|
+
export interface LogFiles {
|
115
|
+
files: LogFile[];
|
116
|
+
log_dir?: string;
|
117
|
+
}
|
118
|
+
|
119
|
+
export interface LogFile {
|
120
|
+
name: string;
|
121
|
+
task: string;
|
122
|
+
task_id: string;
|
123
|
+
}
|
124
|
+
|
125
|
+
export interface LogContents {
|
126
|
+
raw: string;
|
127
|
+
parsed: EvalLog;
|
128
|
+
}
|
129
|
+
|
130
|
+
export interface LogFilesFetchResponse {
|
131
|
+
raw: string;
|
132
|
+
parsed: Record<string, EvalHeader>;
|
133
|
+
}
|
@@ -1,6 +1,7 @@
|
|
1
|
-
|
2
|
-
import { asyncJsonParse } from "../utils/
|
3
|
-
import { download_file } from "./api-shared
|
1
|
+
import { Capabilities } from "../Types.mjs";
|
2
|
+
import { asyncJsonParse } from "../utils/json-worker";
|
3
|
+
import { download_file } from "./api-shared";
|
4
|
+
import { LogContents, LogViewAPI } from "./Types";
|
4
5
|
|
5
6
|
const loaded_time = Date.now();
|
6
7
|
let last_eval_time = 0;
|
@@ -18,25 +19,29 @@ async function eval_logs() {
|
|
18
19
|
return logs.parsed;
|
19
20
|
}
|
20
21
|
|
21
|
-
async function eval_log(
|
22
|
+
async function eval_log(
|
23
|
+
file: string,
|
24
|
+
headerOnly?: number,
|
25
|
+
_capabilities?: Capabilities,
|
26
|
+
): Promise<LogContents> {
|
22
27
|
return await api(
|
23
28
|
"GET",
|
24
29
|
`/api/logs/${encodeURIComponent(file)}?header-only=${headerOnly}`,
|
25
30
|
);
|
26
31
|
}
|
27
32
|
|
28
|
-
async function eval_log_size(file) {
|
33
|
+
async function eval_log_size(file: string): Promise<number> {
|
29
34
|
return (await api("GET", `/api/log-size/${encodeURIComponent(file)}`)).parsed;
|
30
35
|
}
|
31
36
|
|
32
|
-
async function eval_log_bytes(file, start, end) {
|
37
|
+
async function eval_log_bytes(file: string, start: number, end: number) {
|
33
38
|
return await api_bytes(
|
34
39
|
"GET",
|
35
40
|
`/api/log-bytes/${encodeURIComponent(file)}?start=${start}&end=${end}`,
|
36
41
|
);
|
37
42
|
}
|
38
43
|
|
39
|
-
async function eval_log_headers(files) {
|
44
|
+
async function eval_log_headers(files: string[]) {
|
40
45
|
const params = new URLSearchParams();
|
41
46
|
for (const file of files) {
|
42
47
|
params.append("file", file);
|
@@ -44,9 +49,13 @@ async function eval_log_headers(files) {
|
|
44
49
|
return (await api("GET", `/api/log-headers?${params.toString()}`)).parsed;
|
45
50
|
}
|
46
51
|
|
47
|
-
async function api(
|
52
|
+
async function api(
|
53
|
+
method: "GET" | "POST" | "PUT" | "DELETE",
|
54
|
+
path: string,
|
55
|
+
body?: string,
|
56
|
+
) {
|
48
57
|
// build headers
|
49
|
-
const headers = {
|
58
|
+
const headers: HeadersInit = {
|
50
59
|
Accept: "application/json",
|
51
60
|
Pragma: "no-cache",
|
52
61
|
Expires: "0",
|
@@ -73,9 +82,12 @@ async function api(method, path, body) {
|
|
73
82
|
}
|
74
83
|
}
|
75
84
|
|
76
|
-
async function api_bytes(
|
85
|
+
async function api_bytes(
|
86
|
+
method: "GET" | "POST" | "PUT" | "DELETE",
|
87
|
+
path: string,
|
88
|
+
) {
|
77
89
|
// build headers
|
78
|
-
const headers = {
|
90
|
+
const headers: HeadersInit = {
|
79
91
|
Accept: "application/octet-stream",
|
80
92
|
Pragma: "no-cache",
|
81
93
|
Expires: "0",
|
@@ -100,8 +112,7 @@ async function open_log_file() {
|
|
100
112
|
// No op
|
101
113
|
}
|
102
114
|
|
103
|
-
|
104
|
-
export default {
|
115
|
+
const browserApi: LogViewAPI = {
|
105
116
|
client_events,
|
106
117
|
eval_logs,
|
107
118
|
eval_log,
|
@@ -111,3 +122,4 @@ export default {
|
|
111
122
|
download_file,
|
112
123
|
open_log_file,
|
113
124
|
};
|
125
|
+
export default browserApi;
|
@@ -0,0 +1,219 @@
|
|
1
|
+
//@ts-check
|
2
|
+
import { asyncJsonParse } from "../utils/json-worker";
|
3
|
+
import { download_file, encodePathParts } from "./api-shared";
|
4
|
+
import { fetchRange, fetchSize } from "../utils/remoteZipFile.mjs";
|
5
|
+
import {
|
6
|
+
Capabilities,
|
7
|
+
LogContents,
|
8
|
+
LogFiles,
|
9
|
+
LogFilesFetchResponse,
|
10
|
+
LogViewAPI,
|
11
|
+
} from "./Types";
|
12
|
+
import { EvalLog } from "../types/log";
|
13
|
+
|
14
|
+
interface LogInfo {
|
15
|
+
log_dir?: string;
|
16
|
+
log_file?: string;
|
17
|
+
}
|
18
|
+
|
19
|
+
/**
|
20
|
+
* This provides an API implementation that will serve a single
|
21
|
+
* file using an http parameter, designed to be deployed
|
22
|
+
* to a webserver without inspect or the ability to enumerate log
|
23
|
+
* files
|
24
|
+
*/
|
25
|
+
export default function simpleHttpApi(
|
26
|
+
log_dir?: string,
|
27
|
+
log_file?: string,
|
28
|
+
): LogViewAPI {
|
29
|
+
const resolved_log_dir = log_dir?.replace(" ", "+");
|
30
|
+
const resolved_log_path = log_file ? log_file.replace(" ", "+") : undefined;
|
31
|
+
return simpleHttpAPI({
|
32
|
+
log_file: resolved_log_path,
|
33
|
+
log_dir: resolved_log_dir,
|
34
|
+
});
|
35
|
+
}
|
36
|
+
|
37
|
+
/**
|
38
|
+
* Fetches a file from the specified URL and parses its content.
|
39
|
+
*/
|
40
|
+
function simpleHttpAPI(logInfo: LogInfo): LogViewAPI {
|
41
|
+
const log_file = logInfo.log_file;
|
42
|
+
const log_dir = logInfo.log_dir;
|
43
|
+
|
44
|
+
async function open_log_file() {
|
45
|
+
// No op
|
46
|
+
}
|
47
|
+
return {
|
48
|
+
client_events: async () => {
|
49
|
+
// There are no client events in the case of serving via
|
50
|
+
// http
|
51
|
+
return Promise.resolve([]);
|
52
|
+
},
|
53
|
+
eval_logs: async (): Promise<LogFiles | undefined> => {
|
54
|
+
// First check based upon the log dir
|
55
|
+
if (log_dir) {
|
56
|
+
const headers = await fetchLogHeaders(log_dir);
|
57
|
+
if (headers) {
|
58
|
+
const logRecord = headers.parsed;
|
59
|
+
const logs = Object.keys(logRecord).map((key) => {
|
60
|
+
return {
|
61
|
+
name: joinURI(log_dir, key),
|
62
|
+
task: logRecord[key].eval.task,
|
63
|
+
task_id: logRecord[key].eval.task_id,
|
64
|
+
};
|
65
|
+
});
|
66
|
+
return Promise.resolve({
|
67
|
+
files: logs,
|
68
|
+
log_dir,
|
69
|
+
});
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
return undefined;
|
74
|
+
},
|
75
|
+
eval_log: async (
|
76
|
+
log_file: string,
|
77
|
+
_headerOnly?: number,
|
78
|
+
_capabilities?: Capabilities,
|
79
|
+
) => {
|
80
|
+
const response = await fetchLogFile(log_file);
|
81
|
+
if (response) {
|
82
|
+
return response;
|
83
|
+
} else {
|
84
|
+
throw new Error(`"Unable to load eval log ${log_file}`);
|
85
|
+
}
|
86
|
+
},
|
87
|
+
eval_log_size: async (log_file: string) => {
|
88
|
+
return await fetchSize(log_file);
|
89
|
+
},
|
90
|
+
eval_log_bytes: async (log_file: string, start: number, end: number) => {
|
91
|
+
return await fetchRange(log_file, start, end);
|
92
|
+
},
|
93
|
+
eval_log_headers: async (files: string[]) => {
|
94
|
+
if (files.length === 0) {
|
95
|
+
return [];
|
96
|
+
}
|
97
|
+
|
98
|
+
if (log_dir) {
|
99
|
+
const headers = await fetchLogHeaders(log_dir);
|
100
|
+
if (headers) {
|
101
|
+
const keys = Object.keys(headers.parsed);
|
102
|
+
const result: EvalLog[] = [];
|
103
|
+
files.forEach((file) => {
|
104
|
+
const fileKey = keys.find((key) => {
|
105
|
+
return file.endsWith(key);
|
106
|
+
});
|
107
|
+
if (fileKey) {
|
108
|
+
result.push(headers.parsed[fileKey]);
|
109
|
+
}
|
110
|
+
});
|
111
|
+
return result;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
115
|
+
// No log.json could be found, and there isn't a log file,
|
116
|
+
throw new Error(
|
117
|
+
`Failed to load a manifest files using the directory: ${log_dir}. Please be sure you have deployed a manifest file (logs.json).`,
|
118
|
+
);
|
119
|
+
},
|
120
|
+
download_file,
|
121
|
+
open_log_file,
|
122
|
+
};
|
123
|
+
}
|
124
|
+
|
125
|
+
/**
|
126
|
+
* Fetches a file from the specified URL and parses its content.
|
127
|
+
*/
|
128
|
+
async function fetchFile<T>(
|
129
|
+
url: string,
|
130
|
+
parse: (text: string) => Promise<T>,
|
131
|
+
handleError?: (response: Response) => boolean,
|
132
|
+
): Promise<T | undefined> {
|
133
|
+
const safe_url = encodePathParts(url);
|
134
|
+
const response = await fetch(`${safe_url}`, { method: "GET" });
|
135
|
+
if (response.ok) {
|
136
|
+
const text = await response.text();
|
137
|
+
return await parse(text);
|
138
|
+
} else if (response.status !== 200) {
|
139
|
+
if (handleError && handleError(response)) {
|
140
|
+
return undefined;
|
141
|
+
}
|
142
|
+
const message = (await response.text()) || response.statusText;
|
143
|
+
const error = new Error(`${response.status}: ${message})`);
|
144
|
+
throw error;
|
145
|
+
} else {
|
146
|
+
throw new Error(`${response.status} - ${response.statusText} `);
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
/**
|
151
|
+
* Fetches a log file and parses its content, updating the log structure if necessary.
|
152
|
+
*/
|
153
|
+
const fetchLogFile = async (file: string): Promise<LogContents | undefined> => {
|
154
|
+
return fetchFile<LogContents>(file, async (text): Promise<LogContents> => {
|
155
|
+
const log = (await asyncJsonParse(text)) as EvalLog;
|
156
|
+
if (log.version === 1) {
|
157
|
+
if (log.results) {
|
158
|
+
const untypedLog = log as any;
|
159
|
+
log.results.scores = [];
|
160
|
+
untypedLog.results.scorer.scorer = untypedLog.results.scorer.name;
|
161
|
+
log.results.scores.push(untypedLog.results.scorer);
|
162
|
+
delete untypedLog.results.scorer;
|
163
|
+
log.results.scores[0].metrics = untypedLog.results.metrics;
|
164
|
+
delete untypedLog.results.metrics;
|
165
|
+
|
166
|
+
// migrate samples
|
167
|
+
const scorerName = log.results.scores[0].name;
|
168
|
+
log.samples?.forEach((sample) => {
|
169
|
+
const untypedSample = sample as any;
|
170
|
+
sample.scores = { [scorerName]: untypedSample.score };
|
171
|
+
delete untypedSample.score;
|
172
|
+
});
|
173
|
+
}
|
174
|
+
}
|
175
|
+
return {
|
176
|
+
raw: text,
|
177
|
+
parsed: log,
|
178
|
+
};
|
179
|
+
});
|
180
|
+
};
|
181
|
+
|
182
|
+
/**
|
183
|
+
* Fetches a log file and parses its content, updating the log structure if necessary.
|
184
|
+
*/
|
185
|
+
const fetchLogHeaders = async (
|
186
|
+
log_dir: string,
|
187
|
+
): Promise<LogFilesFetchResponse | undefined> => {
|
188
|
+
const logs = await fetchFile<LogFilesFetchResponse>(
|
189
|
+
log_dir + "/logs.json",
|
190
|
+
async (text) => {
|
191
|
+
const parsed = await asyncJsonParse(text);
|
192
|
+
return {
|
193
|
+
raw: text,
|
194
|
+
parsed,
|
195
|
+
};
|
196
|
+
},
|
197
|
+
(response) => {
|
198
|
+
if (response.status === 404) {
|
199
|
+
// Couldn't find a header file
|
200
|
+
return true;
|
201
|
+
} else {
|
202
|
+
return false;
|
203
|
+
}
|
204
|
+
},
|
205
|
+
);
|
206
|
+
return logs;
|
207
|
+
};
|
208
|
+
|
209
|
+
/**
|
210
|
+
* Joins multiple URI segments into a single URI string.
|
211
|
+
*
|
212
|
+
* This function removes any leading or trailing slashes from each segment
|
213
|
+
* and then joins them with a single slash (`/`).
|
214
|
+
*/
|
215
|
+
function joinURI(...segments: string[]): string {
|
216
|
+
return segments
|
217
|
+
.map((segment) => segment.replace(/(^\/+|\/+$)/g, "")) // Remove leading/trailing slashes from each segment
|
218
|
+
.join("/");
|
219
|
+
}
|
@@ -0,0 +1,47 @@
|
|
1
|
+
/**
|
2
|
+
* Downloads the provided content as a file using the browser's DOM API
|
3
|
+
*/
|
4
|
+
export async function download_file(
|
5
|
+
filename: string,
|
6
|
+
filecontents: string | Blob | ArrayBuffer | ArrayBufferView,
|
7
|
+
): Promise<void> {
|
8
|
+
const blob = new Blob([filecontents], { type: "text/plain" });
|
9
|
+
const link = document.createElement("a");
|
10
|
+
link.href = URL.createObjectURL(blob);
|
11
|
+
link.download = filename;
|
12
|
+
document.body.appendChild(link);
|
13
|
+
link.click();
|
14
|
+
document.body.removeChild(link);
|
15
|
+
}
|
16
|
+
|
17
|
+
/**
|
18
|
+
* Encodes the path segments of a URL or relative path to ensure special characters
|
19
|
+
* (like `+`, spaces, etc.) are properly encoded without affecting legal characters like `/`.
|
20
|
+
*
|
21
|
+
* This function will encode file names and path portions of both absolute URLs and
|
22
|
+
* relative paths. It ensures that components of a full URL, such as the protocol and
|
23
|
+
* query parameters, remain intact, while only encoding the path.
|
24
|
+
*/
|
25
|
+
export function encodePathParts(url: string): string {
|
26
|
+
if (!url) return url; // Handle empty strings
|
27
|
+
|
28
|
+
try {
|
29
|
+
// Parse a full Uri
|
30
|
+
const fullUrl = new URL(url);
|
31
|
+
fullUrl.pathname = fullUrl.pathname
|
32
|
+
.split("/")
|
33
|
+
.map((segment) =>
|
34
|
+
segment ? encodeURIComponent(decodeURIComponent(segment)) : "",
|
35
|
+
)
|
36
|
+
.join("/");
|
37
|
+
return fullUrl.toString();
|
38
|
+
} catch {
|
39
|
+
// This is a relative path that isn't parseable as Uri
|
40
|
+
return url
|
41
|
+
.split("/")
|
42
|
+
.map((segment) =>
|
43
|
+
segment ? encodeURIComponent(decodeURIComponent(segment)) : "",
|
44
|
+
)
|
45
|
+
.join("/");
|
46
|
+
}
|
47
|
+
}
|