inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. inspect_ai/_cli/common.py +3 -1
  2. inspect_ai/_cli/eval.py +15 -9
  3. inspect_ai/_display/core/active.py +4 -1
  4. inspect_ai/_display/core/config.py +3 -3
  5. inspect_ai/_display/core/panel.py +7 -3
  6. inspect_ai/_display/plain/__init__.py +0 -0
  7. inspect_ai/_display/plain/display.py +203 -0
  8. inspect_ai/_display/rich/display.py +0 -5
  9. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  10. inspect_ai/_display/textual/widgets/samples.py +79 -12
  11. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  12. inspect_ai/_eval/eval.py +10 -1
  13. inspect_ai/_eval/loader.py +79 -19
  14. inspect_ai/_eval/registry.py +6 -0
  15. inspect_ai/_eval/score.py +3 -1
  16. inspect_ai/_eval/task/results.py +51 -22
  17. inspect_ai/_eval/task/run.py +47 -13
  18. inspect_ai/_eval/task/sandbox.py +10 -5
  19. inspect_ai/_util/constants.py +1 -0
  20. inspect_ai/_util/port_names.py +61 -0
  21. inspect_ai/_util/text.py +23 -0
  22. inspect_ai/_view/www/App.css +31 -1
  23. inspect_ai/_view/www/dist/assets/index.css +31 -1
  24. inspect_ai/_view/www/dist/assets/index.js +25498 -2044
  25. inspect_ai/_view/www/log-schema.json +32 -2
  26. inspect_ai/_view/www/package.json +2 -0
  27. inspect_ai/_view/www/src/App.mjs +14 -16
  28. inspect_ai/_view/www/src/Types.mjs +1 -2
  29. inspect_ai/_view/www/src/api/Types.ts +133 -0
  30. inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
  31. inspect_ai/_view/www/src/api/api-http.ts +219 -0
  32. inspect_ai/_view/www/src/api/api-shared.ts +47 -0
  33. inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
  34. inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
  35. inspect_ai/_view/www/src/api/index.ts +51 -0
  36. inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
  37. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  38. inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
  39. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  40. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  41. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  42. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  43. inspect_ai/_view/www/src/index.js +77 -4
  44. inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
  45. inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
  46. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
  47. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  48. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  49. inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
  50. inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
  51. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  52. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
  53. inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
  54. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  55. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  56. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
  57. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  58. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  59. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  60. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  61. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  62. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  63. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  64. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  65. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  66. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  67. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  68. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  69. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  70. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  71. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  72. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  73. inspect_ai/_view/www/src/types/log.d.ts +13 -2
  74. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  75. inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
  76. inspect_ai/_view/www/src/utils/vscode.ts +36 -0
  77. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
  78. inspect_ai/_view/www/vite.config.js +7 -0
  79. inspect_ai/_view/www/yarn.lock +116 -0
  80. inspect_ai/approval/_human/__init__.py +0 -0
  81. inspect_ai/approval/_human/manager.py +1 -1
  82. inspect_ai/approval/_policy.py +12 -6
  83. inspect_ai/log/_log.py +1 -1
  84. inspect_ai/log/_samples.py +16 -0
  85. inspect_ai/log/_transcript.py +4 -1
  86. inspect_ai/model/_call_tools.py +59 -0
  87. inspect_ai/model/_conversation.py +16 -7
  88. inspect_ai/model/_generate_config.py +12 -12
  89. inspect_ai/model/_model.py +117 -18
  90. inspect_ai/model/_model_output.py +22 -2
  91. inspect_ai/model/_openai.py +383 -0
  92. inspect_ai/model/_providers/anthropic.py +152 -55
  93. inspect_ai/model/_providers/azureai.py +21 -21
  94. inspect_ai/model/_providers/bedrock.py +37 -40
  95. inspect_ai/model/_providers/goodfire.py +248 -0
  96. inspect_ai/model/_providers/google.py +46 -54
  97. inspect_ai/model/_providers/groq.py +7 -3
  98. inspect_ai/model/_providers/hf.py +6 -0
  99. inspect_ai/model/_providers/mistral.py +13 -12
  100. inspect_ai/model/_providers/openai.py +51 -218
  101. inspect_ai/model/_providers/openai_o1.py +11 -12
  102. inspect_ai/model/_providers/providers.py +23 -1
  103. inspect_ai/model/_providers/together.py +12 -12
  104. inspect_ai/model/_providers/util/__init__.py +2 -3
  105. inspect_ai/model/_providers/util/hf_handler.py +1 -1
  106. inspect_ai/model/_providers/util/llama31.py +1 -1
  107. inspect_ai/model/_providers/util/util.py +0 -76
  108. inspect_ai/model/_providers/vertex.py +1 -4
  109. inspect_ai/scorer/_metric.py +3 -0
  110. inspect_ai/scorer/_reducer/reducer.py +1 -1
  111. inspect_ai/scorer/_scorer.py +4 -3
  112. inspect_ai/solver/__init__.py +4 -5
  113. inspect_ai/solver/_basic_agent.py +1 -1
  114. inspect_ai/solver/_bridge/__init__.py +3 -0
  115. inspect_ai/solver/_bridge/bridge.py +100 -0
  116. inspect_ai/solver/_bridge/patch.py +170 -0
  117. inspect_ai/solver/_prompt.py +35 -5
  118. inspect_ai/solver/_solver.py +6 -0
  119. inspect_ai/solver/_task_state.py +80 -38
  120. inspect_ai/tool/__init__.py +2 -0
  121. inspect_ai/tool/_tool.py +12 -1
  122. inspect_ai/tool/_tool_call.py +10 -0
  123. inspect_ai/tool/_tool_def.py +16 -5
  124. inspect_ai/tool/_tool_with.py +21 -4
  125. inspect_ai/tool/beta/__init__.py +5 -0
  126. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  127. inspect_ai/tool/beta/_computer/_common.py +133 -0
  128. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  129. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  130. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  131. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  132. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  133. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  134. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  135. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  136. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  137. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  138. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  139. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  140. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  141. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  142. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  143. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  144. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  145. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  146. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  147. inspect_ai/util/__init__.py +2 -0
  148. inspect_ai/util/_display.py +5 -0
  149. inspect_ai/util/_limit.py +26 -0
  150. inspect_ai/util/_sandbox/docker/docker.py +64 -1
  151. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  152. inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
  153. inspect_ai/util/_sandbox/environment.py +14 -0
  154. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
  155. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
  156. inspect_ai/_view/www/src/api/Types.mjs +0 -117
  157. inspect_ai/_view/www/src/api/api-http.mjs +0 -300
  158. inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
  159. inspect_ai/_view/www/src/api/index.mjs +0 -49
  160. inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
  161. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  162. inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
  163. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
  164. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
  165. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
  166. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
@@ -1137,6 +1137,7 @@
1137
1137
  "logprobs": null,
1138
1138
  "top_logprobs": null,
1139
1139
  "parallel_tool_calls": null,
1140
+ "internal_tools": null,
1140
1141
  "max_tool_output": null,
1141
1142
  "cache_prompt": null,
1142
1143
  "reasoning_effort": null
@@ -1516,7 +1517,8 @@
1516
1517
  "time",
1517
1518
  "message",
1518
1519
  "token",
1519
- "operator"
1520
+ "operator",
1521
+ "custom"
1520
1522
  ],
1521
1523
  "title": "Type",
1522
1524
  "type": "string"
@@ -2190,6 +2192,18 @@
2190
2192
  "default": null,
2191
2193
  "title": "Parallel Tool Calls"
2192
2194
  },
2195
+ "internal_tools": {
2196
+ "anyOf": [
2197
+ {
2198
+ "type": "boolean"
2199
+ },
2200
+ {
2201
+ "type": "null"
2202
+ }
2203
+ ],
2204
+ "default": null,
2205
+ "title": "Internal Tools"
2206
+ },
2193
2207
  "max_tool_output": {
2194
2208
  "anyOf": [
2195
2209
  {
@@ -2258,6 +2272,7 @@
2258
2272
  "logprobs",
2259
2273
  "top_logprobs",
2260
2274
  "parallel_tool_calls",
2275
+ "internal_tools",
2261
2276
  "max_tool_output",
2262
2277
  "cache_prompt",
2263
2278
  "reasoning_effort"
@@ -2681,6 +2696,18 @@
2681
2696
  "output": {
2682
2697
  "$ref": "#/$defs/ModelOutput"
2683
2698
  },
2699
+ "error": {
2700
+ "anyOf": [
2701
+ {
2702
+ "type": "string"
2703
+ },
2704
+ {
2705
+ "type": "null"
2706
+ }
2707
+ ],
2708
+ "default": null,
2709
+ "title": "Error"
2710
+ },
2684
2711
  "cache": {
2685
2712
  "anyOf": [
2686
2713
  {
@@ -2719,6 +2746,7 @@
2719
2746
  "tool_choice",
2720
2747
  "config",
2721
2748
  "output",
2749
+ "error",
2722
2750
  "cache",
2723
2751
  "call"
2724
2752
  ],
@@ -3066,7 +3094,8 @@
3066
3094
  "message",
3067
3095
  "time",
3068
3096
  "token",
3069
- "operator"
3097
+ "operator",
3098
+ "custom"
3070
3099
  ],
3071
3100
  "title": "Type",
3072
3101
  "type": "string"
@@ -4207,6 +4236,7 @@
4207
4236
  "best_of": null,
4208
4237
  "cache_prompt": null,
4209
4238
  "frequency_penalty": null,
4239
+ "internal_tools": null,
4210
4240
  "logit_bias": null,
4211
4241
  "logprobs": null,
4212
4242
  "max_connections": null,
@@ -30,8 +30,10 @@
30
30
  "bootstrap": "^5.3.3",
31
31
  "bootstrap-icons": "^1.11.3",
32
32
  "clipboard": "^2.0.11",
33
+ "codemirror": "^6.0.1",
33
34
  "fast-json-patch": "^3.1.1",
34
35
  "fflate": "^0.8.2",
36
+ "filtrex": "^3.1.0",
35
37
  "htm": "^3.1.1",
36
38
  "json": "^11.0.0",
37
39
  "json5": "^2.2.3",
@@ -30,7 +30,7 @@ import { Sidebar } from "./sidebar/Sidebar.mjs";
30
30
  import { WorkSpace } from "./workspace/WorkSpace.mjs";
31
31
  import { FindBand } from "./components/FindBand.mjs";
32
32
  import { isVscode } from "./utils/Html.mjs";
33
- import { getVscodeApi } from "./utils/vscode.mjs";
33
+ import { getVscodeApi } from "./utils/vscode";
34
34
  import { kDefaultSort } from "./constants.mjs";
35
35
  import {
36
36
  createEvalDescriptor,
@@ -38,7 +38,7 @@ import {
38
38
  } from "./samples/SamplesDescriptor.mjs";
39
39
  import { byEpoch, bySample, sortSamples } from "./samples/tools/SortFilter.mjs";
40
40
  import { resolveAttachments } from "./utils/attachments.mjs";
41
- import { filterFnForType } from "./samples/tools/filters.mjs";
41
+ import { filterSamples } from "./samples/tools/filters.mjs";
42
42
 
43
43
  import {
44
44
  kEvalWorkspaceTabId,
@@ -51,7 +51,7 @@ import {
51
51
  * Renders the Main Application
52
52
  *
53
53
  * @param {Object} props - The parameters for the component.
54
- * @param {import("./api/Types.mjs").ClientAPI} props.api - The api that this view should use
54
+ * @param {import("./api/Types.ts").ClientAPI} props.api - The api that this view should use
55
55
  * @param {Object} [props.initialState] - Initial state for app (optional, used by VS Code extension)
56
56
  * @param {(state: Object) => void} [props.saveInitialState] - Save initial state for app (optional, used by VS Code extension)
57
57
  * @param {boolean} props.pollForLogs - Whether the application should poll for log changes
@@ -308,21 +308,19 @@ export function App({
308
308
 
309
309
  useEffect(() => {
310
310
  const samples = selectedLog?.contents?.sampleSummaries || [];
311
- const filtered = samples.filter((sample) => {
311
+ const { result: prefiltered } = filterSamples(
312
+ evalDescriptor,
313
+ samples,
314
+ filter?.value,
315
+ );
316
+ const filtered = prefiltered.filter((sample) => {
312
317
  // Filter by epoch if specified
313
318
  if (epoch && epoch !== "all") {
314
319
  if (epoch !== sample.epoch + "") {
315
320
  return false;
316
321
  }
317
322
  }
318
-
319
- // Apply the filter
320
- const filterFn = filterFnForType(filter);
321
- if (filterFn && filter.value) {
322
- return filterFn(samplesDescriptor, sample, filter.value);
323
- } else {
324
- return true;
325
- }
323
+ return true;
326
324
  });
327
325
 
328
326
  // Sort the samples
@@ -509,12 +507,12 @@ export function App({
509
507
  * Determines whether the workspace tab should display samples or info,
510
508
  * depending on the presence of samples and the log status.
511
509
  *
512
- * @param {import("./api/Types.mjs").EvalSummary} log - The log object containing sample summaries and status.
510
+ * @param {import("./api/Types.ts").EvalSummary} log - The log object containing sample summaries and status.
513
511
  * @returns {void}
514
512
  */
515
513
  const resetWorkspace = useCallback(
516
514
  /**
517
- * @param {import("./api/Types.mjs").EvalSummary} log
515
+ * @param {import("./api/Types.ts").EvalSummary} log
518
516
  */
519
517
  (log) => {
520
518
  // Reset the workspace tab
@@ -961,7 +959,7 @@ export function App({
961
959
  /**
962
960
  * Determines the default scorer for a log
963
961
  *
964
- * @param {import("./api/Types.mjs").EvalSummary} log - The log object containing sample summaries and status.
962
+ * @param {import("./api/Types.ts").EvalSummary} log - The log object containing sample summaries and status.
965
963
  * @returns {{name: string, scorer: string} | undefined} A scorer object with name and scorer properties, or undefined
966
964
  */
967
965
  const defaultScorer = (log) => {
@@ -983,7 +981,7 @@ const defaultScorer = (log) => {
983
981
  /**
984
982
  * Determines the default scorers for a log
985
983
  *
986
- * @param {import("./api/Types.mjs").EvalSummary} log - The log object containing sample summaries and status.
984
+ * @param {import("./api/Types.ts").EvalSummary} log - The log object containing sample summaries and status.
987
985
  * @returns {Array<{name: string, scorer: string}>} An array of scorer objects with name and scorer properties, or an empty array if no scorers are found.
988
986
  */
989
987
  const defaultScorers = (log) => {
@@ -7,7 +7,7 @@
7
7
  /**
8
8
  * @typedef {Object} CurrentLog
9
9
  * @property {string} name
10
- * @property {import("./api/Types.mjs").EvalSummary} contents
10
+ * @property {import("./api/Types.ts").EvalSummary} contents
11
11
  */
12
12
 
13
13
  /**
@@ -25,7 +25,6 @@
25
25
  /**
26
26
  * @typedef {Object} ScoreFilter
27
27
  * @property {string} [value]
28
- * @property {string} [type]
29
28
  */
30
29
 
31
30
  /**
@@ -0,0 +1,133 @@
1
+ import {
2
+ Version,
3
+ Status,
4
+ EvalSpec,
5
+ EvalPlan,
6
+ EvalResults,
7
+ EvalStats,
8
+ EvalError,
9
+ Input,
10
+ Target,
11
+ Scores1,
12
+ Type11,
13
+ EvalLog,
14
+ EvalSample,
15
+ } from "../types/log";
16
+
17
+ export interface EvalSummary {
18
+ version?: Version;
19
+ status?: Status;
20
+ eval: EvalSpec;
21
+ plan?: EvalPlan;
22
+ results?: EvalResults | null;
23
+ stats?: EvalStats;
24
+ error?: EvalError | null;
25
+ sampleSummaries: SampleSummary[];
26
+ }
27
+
28
+ export interface EvalLogHeader {
29
+ version?: Version;
30
+ status?: Status;
31
+ eval: EvalSpec;
32
+ plan?: EvalPlan;
33
+ results?: EvalResults;
34
+ stats?: EvalStats;
35
+ error?: EvalError;
36
+ }
37
+
38
+ export interface SampleSummary {
39
+ id: number | string;
40
+ epoch: number;
41
+ input: Input;
42
+ target: Target;
43
+ scores: Scores1;
44
+ error?: string;
45
+ limit?: Type11;
46
+ }
47
+
48
+ export interface BasicSampleData {
49
+ id: number | string;
50
+ epoch: number;
51
+ target: Target;
52
+ scores: Scores1;
53
+ }
54
+
55
+ export interface Capabilities {
56
+ downloadFiles: boolean;
57
+ webWorkers: boolean;
58
+ }
59
+
60
+ export interface LogViewAPI {
61
+ client_events: () => Promise<any[]>;
62
+ eval_logs: () => Promise<LogFiles | undefined>;
63
+ eval_log: (
64
+ log_file: string,
65
+ headerOnly?: number,
66
+ capabilities?: Capabilities,
67
+ ) => Promise<LogContents>;
68
+ eval_log_size: (log_file: string) => Promise<number>;
69
+ eval_log_bytes: (
70
+ log_file: string,
71
+ start: number,
72
+ end: number,
73
+ ) => Promise<Uint8Array>;
74
+ eval_log_headers: (log_files: string[]) => Promise<EvalLog[]>;
75
+ download_file: (
76
+ filename: string,
77
+ filecontents: string | Blob | ArrayBuffer | ArrayBufferView,
78
+ ) => Promise<void>;
79
+ open_log_file: (logFile: string, log_dir: string) => Promise<void>;
80
+ }
81
+
82
+ export interface ClientAPI {
83
+ client_events: () => Promise<string[]>;
84
+ get_log_paths: () => Promise<LogFiles>;
85
+ get_log_headers: (log_files: string[]) => Promise<EvalLog[]>;
86
+ get_log_summary: (log_file: string) => Promise<EvalSummary>;
87
+ get_log_sample: (
88
+ log_file: string,
89
+ id: string | number,
90
+ epoch: number,
91
+ ) => Promise<EvalSample | undefined>;
92
+ download_file: (
93
+ file_name: string,
94
+ file_contents: string | Blob | ArrayBuffer | ArrayBufferView,
95
+ ) => Promise<void>;
96
+ open_log_file: (log_file: string, log_dir: string) => Promise<void>;
97
+ }
98
+
99
+ export interface FetchResponse {
100
+ raw: string;
101
+ parsed: Record<string, any>;
102
+ }
103
+
104
+ export interface EvalHeader {
105
+ version?: Version;
106
+ status?: Status;
107
+ eval: EvalSpec;
108
+ plan?: EvalPlan;
109
+ results?: EvalResults | null;
110
+ stats?: EvalStats;
111
+ error?: EvalError | null;
112
+ }
113
+
114
+ export interface LogFiles {
115
+ files: LogFile[];
116
+ log_dir?: string;
117
+ }
118
+
119
+ export interface LogFile {
120
+ name: string;
121
+ task: string;
122
+ task_id: string;
123
+ }
124
+
125
+ export interface LogContents {
126
+ raw: string;
127
+ parsed: EvalLog;
128
+ }
129
+
130
+ export interface LogFilesFetchResponse {
131
+ raw: string;
132
+ parsed: Record<string, EvalHeader>;
133
+ }
@@ -1,6 +1,7 @@
1
- //@ts-check
2
- import { asyncJsonParse } from "../utils/Json.mjs";
3
- import { download_file } from "./api-shared.mjs";
1
+ import { Capabilities } from "../Types.mjs";
2
+ import { asyncJsonParse } from "../utils/json-worker";
3
+ import { download_file } from "./api-shared";
4
+ import { LogContents, LogViewAPI } from "./Types";
4
5
 
5
6
  const loaded_time = Date.now();
6
7
  let last_eval_time = 0;
@@ -18,25 +19,29 @@ async function eval_logs() {
18
19
  return logs.parsed;
19
20
  }
20
21
 
21
- async function eval_log(file, headerOnly) {
22
+ async function eval_log(
23
+ file: string,
24
+ headerOnly?: number,
25
+ _capabilities?: Capabilities,
26
+ ): Promise<LogContents> {
22
27
  return await api(
23
28
  "GET",
24
29
  `/api/logs/${encodeURIComponent(file)}?header-only=${headerOnly}`,
25
30
  );
26
31
  }
27
32
 
28
- async function eval_log_size(file) {
33
+ async function eval_log_size(file: string): Promise<number> {
29
34
  return (await api("GET", `/api/log-size/${encodeURIComponent(file)}`)).parsed;
30
35
  }
31
36
 
32
- async function eval_log_bytes(file, start, end) {
37
+ async function eval_log_bytes(file: string, start: number, end: number) {
33
38
  return await api_bytes(
34
39
  "GET",
35
40
  `/api/log-bytes/${encodeURIComponent(file)}?start=${start}&end=${end}`,
36
41
  );
37
42
  }
38
43
 
39
- async function eval_log_headers(files) {
44
+ async function eval_log_headers(files: string[]) {
40
45
  const params = new URLSearchParams();
41
46
  for (const file of files) {
42
47
  params.append("file", file);
@@ -44,9 +49,13 @@ async function eval_log_headers(files) {
44
49
  return (await api("GET", `/api/log-headers?${params.toString()}`)).parsed;
45
50
  }
46
51
 
47
- async function api(method, path, body) {
52
+ async function api(
53
+ method: "GET" | "POST" | "PUT" | "DELETE",
54
+ path: string,
55
+ body?: string,
56
+ ) {
48
57
  // build headers
49
- const headers = {
58
+ const headers: HeadersInit = {
50
59
  Accept: "application/json",
51
60
  Pragma: "no-cache",
52
61
  Expires: "0",
@@ -73,9 +82,12 @@ async function api(method, path, body) {
73
82
  }
74
83
  }
75
84
 
76
- async function api_bytes(method, path) {
85
+ async function api_bytes(
86
+ method: "GET" | "POST" | "PUT" | "DELETE",
87
+ path: string,
88
+ ) {
77
89
  // build headers
78
- const headers = {
90
+ const headers: HeadersInit = {
79
91
  Accept: "application/octet-stream",
80
92
  Pragma: "no-cache",
81
93
  Expires: "0",
@@ -100,8 +112,7 @@ async function open_log_file() {
100
112
  // No op
101
113
  }
102
114
 
103
- /** @type {import("./Types.mjs").LogViewAPI} */
104
- export default {
115
+ const browserApi: LogViewAPI = {
105
116
  client_events,
106
117
  eval_logs,
107
118
  eval_log,
@@ -111,3 +122,4 @@ export default {
111
122
  download_file,
112
123
  open_log_file,
113
124
  };
125
+ export default browserApi;
@@ -0,0 +1,219 @@
1
+ //@ts-check
2
+ import { asyncJsonParse } from "../utils/json-worker";
3
+ import { download_file, encodePathParts } from "./api-shared";
4
+ import { fetchRange, fetchSize } from "../utils/remoteZipFile.mjs";
5
+ import {
6
+ Capabilities,
7
+ LogContents,
8
+ LogFiles,
9
+ LogFilesFetchResponse,
10
+ LogViewAPI,
11
+ } from "./Types";
12
+ import { EvalLog } from "../types/log";
13
+
14
+ interface LogInfo {
15
+ log_dir?: string;
16
+ log_file?: string;
17
+ }
18
+
19
+ /**
20
+ * This provides an API implementation that will serve a single
21
+ * file using an http parameter, designed to be deployed
22
+ * to a webserver without inspect or the ability to enumerate log
23
+ * files
24
+ */
25
+ export default function simpleHttpApi(
26
+ log_dir?: string,
27
+ log_file?: string,
28
+ ): LogViewAPI {
29
+ const resolved_log_dir = log_dir?.replace(" ", "+");
30
+ const resolved_log_path = log_file ? log_file.replace(" ", "+") : undefined;
31
+ return simpleHttpAPI({
32
+ log_file: resolved_log_path,
33
+ log_dir: resolved_log_dir,
34
+ });
35
+ }
36
+
37
+ /**
38
+ * Fetches a file from the specified URL and parses its content.
39
+ */
40
+ function simpleHttpAPI(logInfo: LogInfo): LogViewAPI {
41
+ const log_file = logInfo.log_file;
42
+ const log_dir = logInfo.log_dir;
43
+
44
+ async function open_log_file() {
45
+ // No op
46
+ }
47
+ return {
48
+ client_events: async () => {
49
+ // There are no client events in the case of serving via
50
+ // http
51
+ return Promise.resolve([]);
52
+ },
53
+ eval_logs: async (): Promise<LogFiles | undefined> => {
54
+ // First check based upon the log dir
55
+ if (log_dir) {
56
+ const headers = await fetchLogHeaders(log_dir);
57
+ if (headers) {
58
+ const logRecord = headers.parsed;
59
+ const logs = Object.keys(logRecord).map((key) => {
60
+ return {
61
+ name: joinURI(log_dir, key),
62
+ task: logRecord[key].eval.task,
63
+ task_id: logRecord[key].eval.task_id,
64
+ };
65
+ });
66
+ return Promise.resolve({
67
+ files: logs,
68
+ log_dir,
69
+ });
70
+ }
71
+ }
72
+
73
+ return undefined;
74
+ },
75
+ eval_log: async (
76
+ log_file: string,
77
+ _headerOnly?: number,
78
+ _capabilities?: Capabilities,
79
+ ) => {
80
+ const response = await fetchLogFile(log_file);
81
+ if (response) {
82
+ return response;
83
+ } else {
84
+ throw new Error(`"Unable to load eval log ${log_file}`);
85
+ }
86
+ },
87
+ eval_log_size: async (log_file: string) => {
88
+ return await fetchSize(log_file);
89
+ },
90
+ eval_log_bytes: async (log_file: string, start: number, end: number) => {
91
+ return await fetchRange(log_file, start, end);
92
+ },
93
+ eval_log_headers: async (files: string[]) => {
94
+ if (files.length === 0) {
95
+ return [];
96
+ }
97
+
98
+ if (log_dir) {
99
+ const headers = await fetchLogHeaders(log_dir);
100
+ if (headers) {
101
+ const keys = Object.keys(headers.parsed);
102
+ const result: EvalLog[] = [];
103
+ files.forEach((file) => {
104
+ const fileKey = keys.find((key) => {
105
+ return file.endsWith(key);
106
+ });
107
+ if (fileKey) {
108
+ result.push(headers.parsed[fileKey]);
109
+ }
110
+ });
111
+ return result;
112
+ }
113
+ }
114
+
115
+ // No log.json could be found, and there isn't a log file,
116
+ throw new Error(
117
+ `Failed to load a manifest files using the directory: ${log_dir}. Please be sure you have deployed a manifest file (logs.json).`,
118
+ );
119
+ },
120
+ download_file,
121
+ open_log_file,
122
+ };
123
+ }
124
+
125
+ /**
126
+ * Fetches a file from the specified URL and parses its content.
127
+ */
128
+ async function fetchFile<T>(
129
+ url: string,
130
+ parse: (text: string) => Promise<T>,
131
+ handleError?: (response: Response) => boolean,
132
+ ): Promise<T | undefined> {
133
+ const safe_url = encodePathParts(url);
134
+ const response = await fetch(`${safe_url}`, { method: "GET" });
135
+ if (response.ok) {
136
+ const text = await response.text();
137
+ return await parse(text);
138
+ } else if (response.status !== 200) {
139
+ if (handleError && handleError(response)) {
140
+ return undefined;
141
+ }
142
+ const message = (await response.text()) || response.statusText;
143
+ const error = new Error(`${response.status}: ${message})`);
144
+ throw error;
145
+ } else {
146
+ throw new Error(`${response.status} - ${response.statusText} `);
147
+ }
148
+ }
149
+
150
+ /**
151
+ * Fetches a log file and parses its content, updating the log structure if necessary.
152
+ */
153
+ const fetchLogFile = async (file: string): Promise<LogContents | undefined> => {
154
+ return fetchFile<LogContents>(file, async (text): Promise<LogContents> => {
155
+ const log = (await asyncJsonParse(text)) as EvalLog;
156
+ if (log.version === 1) {
157
+ if (log.results) {
158
+ const untypedLog = log as any;
159
+ log.results.scores = [];
160
+ untypedLog.results.scorer.scorer = untypedLog.results.scorer.name;
161
+ log.results.scores.push(untypedLog.results.scorer);
162
+ delete untypedLog.results.scorer;
163
+ log.results.scores[0].metrics = untypedLog.results.metrics;
164
+ delete untypedLog.results.metrics;
165
+
166
+ // migrate samples
167
+ const scorerName = log.results.scores[0].name;
168
+ log.samples?.forEach((sample) => {
169
+ const untypedSample = sample as any;
170
+ sample.scores = { [scorerName]: untypedSample.score };
171
+ delete untypedSample.score;
172
+ });
173
+ }
174
+ }
175
+ return {
176
+ raw: text,
177
+ parsed: log,
178
+ };
179
+ });
180
+ };
181
+
182
+ /**
183
+ * Fetches a log file and parses its content, updating the log structure if necessary.
184
+ */
185
+ const fetchLogHeaders = async (
186
+ log_dir: string,
187
+ ): Promise<LogFilesFetchResponse | undefined> => {
188
+ const logs = await fetchFile<LogFilesFetchResponse>(
189
+ log_dir + "/logs.json",
190
+ async (text) => {
191
+ const parsed = await asyncJsonParse(text);
192
+ return {
193
+ raw: text,
194
+ parsed,
195
+ };
196
+ },
197
+ (response) => {
198
+ if (response.status === 404) {
199
+ // Couldn't find a header file
200
+ return true;
201
+ } else {
202
+ return false;
203
+ }
204
+ },
205
+ );
206
+ return logs;
207
+ };
208
+
209
+ /**
210
+ * Joins multiple URI segments into a single URI string.
211
+ *
212
+ * This function removes any leading or trailing slashes from each segment
213
+ * and then joins them with a single slash (`/`).
214
+ */
215
+ function joinURI(...segments: string[]): string {
216
+ return segments
217
+ .map((segment) => segment.replace(/(^\/+|\/+$)/g, "")) // Remove leading/trailing slashes from each segment
218
+ .join("/");
219
+ }
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Downloads the provided content as a file using the browser's DOM API
3
+ */
4
+ export async function download_file(
5
+ filename: string,
6
+ filecontents: string | Blob | ArrayBuffer | ArrayBufferView,
7
+ ): Promise<void> {
8
+ const blob = new Blob([filecontents], { type: "text/plain" });
9
+ const link = document.createElement("a");
10
+ link.href = URL.createObjectURL(blob);
11
+ link.download = filename;
12
+ document.body.appendChild(link);
13
+ link.click();
14
+ document.body.removeChild(link);
15
+ }
16
+
17
+ /**
18
+ * Encodes the path segments of a URL or relative path to ensure special characters
19
+ * (like `+`, spaces, etc.) are properly encoded without affecting legal characters like `/`.
20
+ *
21
+ * This function will encode file names and path portions of both absolute URLs and
22
+ * relative paths. It ensures that components of a full URL, such as the protocol and
23
+ * query parameters, remain intact, while only encoding the path.
24
+ */
25
+ export function encodePathParts(url: string): string {
26
+ if (!url) return url; // Handle empty strings
27
+
28
+ try {
29
+ // Parse a full Uri
30
+ const fullUrl = new URL(url);
31
+ fullUrl.pathname = fullUrl.pathname
32
+ .split("/")
33
+ .map((segment) =>
34
+ segment ? encodeURIComponent(decodeURIComponent(segment)) : "",
35
+ )
36
+ .join("/");
37
+ return fullUrl.toString();
38
+ } catch {
39
+ // This is a relative path that isn't parseable as Uri
40
+ return url
41
+ .split("/")
42
+ .map((segment) =>
43
+ segment ? encodeURIComponent(decodeURIComponent(segment)) : "",
44
+ )
45
+ .join("/");
46
+ }
47
+ }