inspect-ai 0.3.59__py3-none-any.whl → 0.3.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. inspect_ai/_cli/eval.py +0 -7
  2. inspect_ai/_display/textual/widgets/samples.py +1 -1
  3. inspect_ai/_eval/eval.py +10 -1
  4. inspect_ai/_eval/loader.py +79 -19
  5. inspect_ai/_eval/registry.py +6 -0
  6. inspect_ai/_eval/score.py +2 -1
  7. inspect_ai/_eval/task/results.py +6 -5
  8. inspect_ai/_eval/task/run.py +11 -11
  9. inspect_ai/_view/www/dist/assets/index.js +262 -303
  10. inspect_ai/_view/www/src/App.mjs +6 -6
  11. inspect_ai/_view/www/src/Types.mjs +1 -1
  12. inspect_ai/_view/www/src/api/Types.ts +133 -0
  13. inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
  14. inspect_ai/_view/www/src/api/api-http.ts +219 -0
  15. inspect_ai/_view/www/src/api/api-shared.ts +47 -0
  16. inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
  17. inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
  18. inspect_ai/_view/www/src/api/index.ts +51 -0
  19. inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
  20. inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
  21. inspect_ai/_view/www/src/index.js +2 -2
  22. inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
  23. inspect_ai/_view/www/src/navbar/Navbar.mjs +1 -1
  24. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +1 -1
  25. inspect_ai/_view/www/src/samples/SampleList.mjs +1 -1
  26. inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
  27. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +14 -14
  28. inspect_ai/_view/www/src/samples/SamplesTab.mjs +10 -10
  29. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
  30. inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +1 -3
  31. inspect_ai/_view/www/src/utils/vscode.ts +36 -0
  32. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
  33. inspect_ai/approval/_human/manager.py +1 -1
  34. inspect_ai/model/_call_tools.py +55 -0
  35. inspect_ai/model/_conversation.py +1 -4
  36. inspect_ai/model/_generate_config.py +2 -8
  37. inspect_ai/model/_model_output.py +15 -0
  38. inspect_ai/model/_openai.py +383 -0
  39. inspect_ai/model/_providers/anthropic.py +52 -11
  40. inspect_ai/model/_providers/azureai.py +1 -1
  41. inspect_ai/model/_providers/goodfire.py +248 -0
  42. inspect_ai/model/_providers/groq.py +7 -3
  43. inspect_ai/model/_providers/hf.py +6 -0
  44. inspect_ai/model/_providers/mistral.py +2 -1
  45. inspect_ai/model/_providers/openai.py +36 -202
  46. inspect_ai/model/_providers/openai_o1.py +2 -4
  47. inspect_ai/model/_providers/providers.py +22 -0
  48. inspect_ai/model/_providers/together.py +4 -4
  49. inspect_ai/model/_providers/util/__init__.py +2 -3
  50. inspect_ai/model/_providers/util/hf_handler.py +1 -1
  51. inspect_ai/model/_providers/util/llama31.py +1 -1
  52. inspect_ai/model/_providers/util/util.py +0 -76
  53. inspect_ai/scorer/_metric.py +3 -0
  54. inspect_ai/scorer/_scorer.py +2 -1
  55. inspect_ai/solver/__init__.py +2 -0
  56. inspect_ai/solver/_basic_agent.py +1 -1
  57. inspect_ai/solver/_bridge/__init__.py +3 -0
  58. inspect_ai/solver/_bridge/bridge.py +100 -0
  59. inspect_ai/solver/_bridge/patch.py +170 -0
  60. inspect_ai/solver/_solver.py +6 -0
  61. inspect_ai/util/_display.py +5 -0
  62. inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
  63. {inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
  64. {inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +68 -63
  65. inspect_ai/_view/www/src/api/Types.mjs +0 -117
  66. inspect_ai/_view/www/src/api/api-http.mjs +0 -300
  67. inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
  68. inspect_ai/_view/www/src/api/index.mjs +0 -49
  69. inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
  70. inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
  71. {inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
  72. {inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
  73. {inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
  74. {inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,12 @@
1
1
  //@ts-check
2
- import { asyncJsonParse } from "../utils/Json.mjs";
2
+ import {
3
+ EvalHeader,
4
+ EvalSummary,
5
+ LogViewAPI,
6
+ SampleSummary,
7
+ } from "../api/Types";
8
+ import { EvalLog, EvalPlan, EvalSample, EvalSpec } from "../types/log";
9
+ import { asyncJsonParse } from "../utils/json-worker";
3
10
  import { AsyncQueue } from "../utils/queue.mjs";
4
11
  import {
5
12
  FileSizeLimitError,
@@ -9,42 +16,46 @@ import {
9
16
  // don't try to load samples greater than 50mb
10
17
  const MAX_BYTES = 50 * 1024 * 1024;
11
18
 
12
- /**
13
- * @typedef {Object} SampleEntry
14
- * @property {string} sampleId
15
- * @property {number} epoch
16
- */
19
+ interface SampleEntry {
20
+ sampleId: string;
21
+ epoch: number;
22
+ }
17
23
 
18
- /**
19
- * @typedef {Object} RemoteLogFile
20
- * @property {() => Promise<Object>} readHeader - Reads the header of the log file.
21
- * @property {() => Promise<Object>} readLogSummary - Reads the log summary including header and sample summaries.
22
- * @property {(sampleId: string, epoch: number) => Promise<Object>} readSample - Reads a specific sample file.
23
- * @property {() => Promise<import("../types/log").EvalLog>} readCompleteLog - Reads the complete log file including all samples.
24
- */
24
+ export interface RemoteLogFile {
25
+ readHeader: () => Promise<EvalHeader>;
26
+ readLogSummary: () => Promise<EvalSummary>;
27
+ readSample: (sampleId: string, epoch: number) => Promise<EvalSample>;
28
+ readCompleteLog: () => Promise<EvalLog>;
29
+ }
30
+
31
+ interface LogStart {
32
+ version: number;
33
+ eval: EvalSpec;
34
+ plan: EvalPlan;
35
+ }
25
36
 
26
37
  /**
27
38
  * Opens a remote log file and provides methods to read its contents.
28
- * @param {import("../api/Types.mjs").LogViewAPI} api - The api
29
- * @param {string} url - The URL of the remote zip file.
30
- * @param {number} concurrency - The number of concurrent operations allowed.
31
- * @returns {Promise<RemoteLogFile>} An object with methods to read the log file.
32
39
  */
33
- export const openRemoteLogFile = async (api, url, concurrency) => {
40
+ export const openRemoteLogFile = async (
41
+ api: LogViewAPI,
42
+ url: string,
43
+ concurrency: number,
44
+ ): Promise<RemoteLogFile> => {
34
45
  const queue = new AsyncQueue(concurrency);
35
46
  const remoteZipFile = await openRemoteZipFile(
36
- `${encodeURIComponent(url)}`,
47
+ url,
37
48
  api.eval_log_size,
38
49
  api.eval_log_bytes,
39
50
  );
40
51
 
41
52
  /**
42
53
  * Reads and parses a JSON file from the zip.
43
- * @param {string} file - The name of the file to read.
44
- * @param {number} [maxBytes] - the max bytes
45
- * @returns {Promise<Object>} The parsed JSON content.
46
54
  */
47
- const readJSONFile = async (file, maxBytes) => {
55
+ const readJSONFile = async (
56
+ file: string,
57
+ maxBytes?: number,
58
+ ): Promise<Object> => {
48
59
  try {
49
60
  const data = await remoteZipFile.readFile(file, maxBytes);
50
61
  const textDecoder = new TextDecoder("utf-8");
@@ -53,19 +64,22 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
53
64
  } catch (error) {
54
65
  if (error instanceof FileSizeLimitError) {
55
66
  throw error;
56
- } else {
67
+ } else if (error instanceof Error) {
57
68
  throw new Error(
58
69
  `Failed to read or parse file ${file}: ${error.message}`,
59
70
  );
71
+ } else {
72
+ throw new Error(
73
+ `Failed to read or parse file ${file} - an unknown error occurred`,
74
+ );
60
75
  }
61
76
  }
62
77
  };
63
78
 
64
79
  /**
65
80
  * Lists all samples in the zip file.
66
- * @returns {Promise<SampleEntry[]>} An array of sample objects.
67
81
  */
68
- const listSamples = async () => {
82
+ const listSamples = async (): Promise<SampleEntry[]> => {
69
83
  return Array.from(remoteZipFile.centralDirectory.keys())
70
84
  .filter(
71
85
  (filename) =>
@@ -82,14 +96,14 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
82
96
 
83
97
  /**
84
98
  * Reads a specific sample file.
85
- * @param {string} sampleId - The ID of the sample.
86
- * @param {number} epoch - The epoch of the sample.
87
- * @returns {Promise<Object>} The content of the sample file.
88
99
  */
89
- const readSample = async (sampleId, epoch) => {
100
+ const readSample = async (
101
+ sampleId: string,
102
+ epoch: number,
103
+ ): Promise<EvalSample> => {
90
104
  const sampleFile = `samples/${sampleId}_epoch_${epoch}.json`;
91
105
  if (remoteZipFile.centralDirectory.has(sampleFile)) {
92
- return readJSONFile(sampleFile, MAX_BYTES);
106
+ return (await readJSONFile(sampleFile, MAX_BYTES)) as EvalSample;
93
107
  } else {
94
108
  console.log({ dir: remoteZipFile.centralDirectory });
95
109
  throw new Error(
@@ -100,13 +114,12 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
100
114
 
101
115
  /**
102
116
  * Reads the results.json file.
103
- * @returns {Promise<Object>} The content of results.json.
104
117
  */
105
- const readHeader = async () => {
118
+ const readHeader = async (): Promise<EvalHeader> => {
106
119
  if (remoteZipFile.centralDirectory.has("header.json")) {
107
- return readJSONFile("header.json");
120
+ return (await readJSONFile("header.json")) as EvalHeader;
108
121
  } else {
109
- const evalSpec = await readJSONFile("_journal/start.json");
122
+ const evalSpec = (await readJSONFile("_journal/start.json")) as LogStart;
110
123
  return {
111
124
  status: "started",
112
125
  eval: evalSpec.eval,
@@ -117,9 +130,8 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
117
130
 
118
131
  /**
119
132
  * Reads individual summary files when summaries.json is not available.
120
- * @returns {Promise<Object>} Combined summaries from individual files.
121
133
  */
122
- const readFallbackSummaries = async () => {
134
+ const readFallbackSummaries = async (): Promise<SampleSummary[]> => {
123
135
  const summaryFiles = Array.from(
124
136
  remoteZipFile.centralDirectory.keys(),
125
137
  ).filter(
@@ -128,14 +140,16 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
128
140
  filename.endsWith(".json"),
129
141
  );
130
142
 
131
- const summaries = [];
132
- const errors = [];
143
+ const summaries: SampleSummary[] = [];
144
+ const errors: unknown[] = [];
133
145
 
134
146
  await Promise.all(
135
147
  summaryFiles.map((filename) =>
136
148
  queue.enqueue(async () => {
137
149
  try {
138
- const partialSummary = await readJSONFile(filename);
150
+ const partialSummary = (await readJSONFile(
151
+ filename,
152
+ )) as SampleSummary[];
139
153
  summaries.push(...partialSummary);
140
154
  } catch (error) {
141
155
  errors.push(error);
@@ -156,11 +170,10 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
156
170
 
157
171
  /**
158
172
  * Reads all summaries, falling back to individual files if necessary.
159
- * @returns {Promise<Object>} All summaries.
160
173
  */
161
- const readSampleSummaries = async () => {
174
+ const readSampleSummaries = async (): Promise<SampleSummary[]> => {
162
175
  if (remoteZipFile.centralDirectory.has("summaries.json")) {
163
- return await readJSONFile("summaries.json");
176
+ return (await readJSONFile("summaries.json")) as SampleSummary[];
164
177
  } else {
165
178
  return readFallbackSummaries();
166
179
  }
@@ -187,14 +200,17 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
187
200
  readSample,
188
201
  /**
189
202
  * Reads the complete log file.
190
- * @returns {Promise<import("../types/log").EvalLog>} The complete log data.
191
203
  */
192
- readCompleteLog: async () => {
204
+ readCompleteLog: async (): Promise<EvalLog> => {
193
205
  const [evalLog, samples] = await Promise.all([
194
206
  readHeader(),
195
207
  listSamples().then((sampleIds) =>
196
208
  Promise.all(
197
- sampleIds.map(({ sampleId, epoch }) => readSample(sampleId, epoch)),
209
+ sampleIds.map(({ sampleId, epoch }) =>
210
+ readSample(sampleId, epoch).then(
211
+ (sample) => sample as EvalSample,
212
+ ),
213
+ ),
198
214
  ),
199
215
  ),
200
216
  ]);
@@ -18,8 +18,8 @@ import { SecondaryBar } from "./SecondaryBar.mjs";
18
18
  * @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults
19
19
  * @param {import("../types/log").EvalPlan} [props.evalPlan] - The EvalSpec
20
20
  * @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats
21
- * @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
22
21
  * @param {import("../samples/SamplesDescriptor.mjs").EvalDescriptor} [props.evalDescriptor] - The EvalDescriptor
22
+ * @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
23
23
  * @param {string} [props.status] - the status
24
24
  * @param {boolean} props.offcanvas - Are we in offcanvas mode?
25
25
  * @param {boolean} props.showToggle - Should we show the toggle?
@@ -13,8 +13,8 @@ import { scoreFilterItems } from "../samples/tools/filters.mjs";
13
13
  * @param {import("../types/log").EvalPlan} [props.evalPlan] - The EvalSpec
14
14
  * @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults
15
15
  * @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats
16
- * @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
17
16
  * @param {import("../samples/SamplesDescriptor.mjs").EvalDescriptor} [props.evalDescriptor] - The EvalDescriptor
17
+ * @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
18
18
  * @param {string} [props.status] - the status
19
19
  * @param {Map<string, string>} [props.style] - is this off canvas
20
20
  *
@@ -252,7 +252,7 @@ const SeparatorRow = ({ id, title, height }) => {
252
252
  * @param {Object} props - The parameters for the component.
253
253
  * @param {string} props.id - The unique identifier for the sample.
254
254
  * @param {number} props.index - The index of the sample.
255
- * @param {import("../api/Types.mjs").SampleSummary} props.sample - The sample.
255
+ * @param {import("../api/Types.ts").SampleSummary} props.sample - The sample.
256
256
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor.
257
257
  * @param {number} props.height - The height of the sample row.
258
258
  * @param {boolean} props.selected - Whether the sample is selected.
@@ -2,7 +2,7 @@ import { html } from "htm/preact";
2
2
 
3
3
  /**
4
4
  * @param {Object} props
5
- * @param {import("../api/Types.mjs").SampleSummary} props.sample
5
+ * @param {import("../api/Types.ts").SampleSummary} props.sample
6
6
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor
7
7
  * @param {string} props.scorer
8
8
  * @returns {import("preact").JSX.Element}
@@ -21,12 +21,12 @@ import {
21
21
  * Represents a utility summary of the samples that doesn't change with the selected score.
22
22
  * @typedef {Object} EvalDescriptor
23
23
  * @property {number} epochs - The number of epochs.
24
- * @property {import("../api/Types.mjs").SampleSummary[]} samples - The list of sample summaries.
24
+ * @property {import("../api/Types.ts").SampleSummary[]} samples - The list of sample summaries.
25
25
  * @property {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
26
- * @property {(sample: import("../api/Types.mjs").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => ScorerDescriptor} scorerDescriptor - Returns the scorer descriptor for a sample and a specified scorer.
26
+ * @property {(sample: import("../api/Types.ts").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => ScorerDescriptor} scorerDescriptor - Returns the scorer descriptor for a sample and a specified scorer.
27
27
  * @property {(scoreLabel: import("../Types.mjs").ScoreLabel) => ScoreDescriptor} scoreDescriptor - Provides information about the score types and how to render them.
28
- * @property {(sample: import("../api/Types.mjs").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => SelectedScore} score - Returns information about a score for a sample.
29
- * @property {(sample: import("../api/Types.mjs").BasicSampleData, scorer: string) => string} scoreAnswer - Returns the answer for a sample and a specified scorer.
28
+ * @property {(sample: import("../api/Types.ts").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => SelectedScore} score - Returns information about a score for a sample.
29
+ * @property {(sample: import("../api/Types.ts").BasicSampleData, scorer: string) => string} scoreAnswer - Returns the answer for a sample and a specified scorer.
30
30
  */
31
31
 
32
32
  /**
@@ -35,8 +35,8 @@ import {
35
35
  * @property {EvalDescriptor} evalDescriptor - The EvalDescriptor.
36
36
  * @property {MessageShape} messageShape - The normalized sizes of input, target, and answer messages.
37
37
  * @property {ScoreDescriptor} selectedScoreDescriptor - Provides information about the score types and how to render them.
38
- * @property {(sample: import("../api/Types.mjs").BasicSampleData) => SelectedScore} selectedScore - Returns the selected score for a sample.
39
- * @property {(sample: import("../api/Types.mjs").BasicSampleData) => ScorerDescriptor} selectedScorerDescriptor - Returns the scorer descriptor for a sample using the selected scorer.
38
+ * @property {(sample: import("../api/Types.ts").BasicSampleData) => SelectedScore} selectedScore - Returns the selected score for a sample.
39
+ * @property {(sample: import("../api/Types.ts").BasicSampleData) => ScorerDescriptor} selectedScorerDescriptor - Returns the scorer descriptor for a sample using the selected scorer.
40
40
  */
41
41
 
42
42
  /**
@@ -108,7 +108,7 @@ export const parseScoreLabelKey = (key) => {
108
108
 
109
109
  /**
110
110
  * @param {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
111
- * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
111
+ * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
112
112
  * @param {number} epochs - The number of epochs
113
113
  * @returns {EvalDescriptor} The EvalDescriptor
114
114
  */
@@ -118,7 +118,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
118
118
  }
119
119
 
120
120
  /**
121
- * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
121
+ * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
122
122
  * @param {import("../Types.mjs").ScoreLabel} scoreLabel - the score label
123
123
  * @returns {import("../types/log").Value2} The Score
124
124
  */
@@ -142,7 +142,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
142
142
  };
143
143
 
144
144
  /**
145
- * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
145
+ * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
146
146
  * @param {string} scorer - the scorer name
147
147
  * @returns {string} The answer
148
148
  */
@@ -158,7 +158,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
158
158
  };
159
159
 
160
160
  /**
161
- * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
161
+ * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
162
162
  * @param {string} scorer - the scorer name
163
163
  * @returns {string} The explanation
164
164
  */
@@ -174,7 +174,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
174
174
 
175
175
  // Retrieve the metadata for a sample
176
176
  /**
177
- * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
177
+ * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
178
178
  * @param {string} scorer - the scorer name
179
179
  * @returns {Object} The explanation
180
180
  */
@@ -248,7 +248,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
248
248
  };
249
249
 
250
250
  /**
251
- * @param {import("../api/Types.mjs").BasicSampleData} sample
251
+ * @param {import("../api/Types.ts").BasicSampleData} sample
252
252
  * @param {import("../Types.mjs").ScoreLabel} scoreLabel
253
253
  * @returns {any}
254
254
  */
@@ -265,7 +265,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
265
265
  };
266
266
 
267
267
  /**
268
- * @param {import("../api/Types.mjs").BasicSampleData} sample
268
+ * @param {import("../api/Types.ts").BasicSampleData} sample
269
269
  * @param {import("../Types.mjs").ScoreLabel} scoreLabel
270
270
  * @returns {ScorerDescriptor}
271
271
  */
@@ -348,7 +348,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
348
348
  };
349
349
 
350
350
  /**
351
- * @param {import("../api/Types.mjs").BasicSampleData} sample
351
+ * @param {import("../api/Types.ts").BasicSampleData} sample
352
352
  * @param {import("../Types.mjs").ScoreLabel} scoreLabel
353
353
  * @returns {SelectedScore}
354
354
  */
@@ -13,7 +13,7 @@ import { EmptyPanel } from "../components/EmptyPanel.mjs";
13
13
  * @param {Object} props - The parameters for the component.
14
14
  * @param {import("../types/log").Sample} [props.sample] - The sample
15
15
  * @param {string} [props.task_id] - The task id
16
- * @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
16
+ * @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
17
17
  * @param {import("../Types.mjs").SampleMode} props.sampleMode - the mode for displaying samples
18
18
  * @param {"epoch" | "sample" | "none" } props.groupBy - how to group items
19
19
  * @param {"asc" | "desc" } props.groupByOrder - whether grouping is ascending or descending
@@ -213,19 +213,19 @@ export const SamplesTab = ({
213
213
  * @property {string} label - The label for the sample, formatted as "Sample {group} (Epoch {item})".
214
214
  * @property {number} number - The current counter item value.
215
215
  * @property {number} index - The index of the sample.
216
- * @property {import("../api/Types.mjs").SampleSummary | string} data - The items data payload.
216
+ * @property {import("../api/Types.ts").SampleSummary | string} data - The items data payload.
217
217
  * @property {string} type - The type of the result, in this case, "sample". (or "separator")
218
218
  */
219
219
 
220
220
  /**
221
221
  * Perform any grouping of the samples
222
222
  *
223
- * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
223
+ * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
224
224
  * @param {"sample" | "epoch" | "none"} groupBy - how to group samples
225
225
  * @param {"asc" | "desc"} groupByOrder - how to order grouped samples
226
226
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} sampleDescriptor - the sample descriptor
227
227
 
228
- * @returns {(sample: import("../api/Types.mjs").SampleSummary, index: number, previousSample: import("../api/Types.mjs").SampleSummary) => ListItem[]} The list items
228
+ * @returns {(sample: import("../api/Types.ts").SampleSummary, index: number, previousSample: import("../api/Types.ts").SampleSummary) => ListItem[]} The list items
229
229
  */
230
230
  const getSampleProcessor = (
231
231
  samples,
@@ -246,9 +246,9 @@ const getSampleProcessor = (
246
246
  /**
247
247
  * Performs no grouping
248
248
  *
249
- * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
249
+ * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
250
250
  * @param {string} order - the selected order
251
- * @returns {(sample: import("../api/Types.mjs").SampleSummary, index: number, previousSample: import("../api/Types.mjs").SampleSummary) => ListItem[]} The list
251
+ * @returns {(sample: import("../api/Types.ts").SampleSummary, index: number, previousSample: import("../api/Types.ts").SampleSummary) => ListItem[]} The list
252
252
  */
253
253
  const noGrouping = (samples, order) => {
254
254
  const counter = getCounter(samples.length, 1, order);
@@ -270,10 +270,10 @@ const noGrouping = (samples, order) => {
270
270
  /**
271
271
  * Groups by sample (showing separators for Epochs)
272
272
  *
273
- * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
273
+ * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
274
274
  * @param {string} order - the selected order
275
275
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} sampleDescriptor - the sample descriptor
276
- * @returns {(sample: import("../api/Types.mjs").SampleSummary, index: number, previousSample: import("../api/Types.mjs").SampleSummary) => ListItem[]} The list
276
+ * @returns {(sample: import("../api/Types.ts").SampleSummary, index: number, previousSample: import("../api/Types.ts").SampleSummary) => ListItem[]} The list
277
277
  */
278
278
  const groupBySample = (samples, sampleDescriptor, order) => {
279
279
  // ensure that we are sorted by id
@@ -327,10 +327,10 @@ const groupBySample = (samples, sampleDescriptor, order) => {
327
327
  /**
328
328
  * Groups by epoch (showing a separator for each sample)
329
329
  *
330
- * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
330
+ * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
331
331
  * @param {string} order - the selected order
332
332
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} sampleDescriptor - the sample descriptor
333
- * @returns {(sample: import("../api/Types.mjs").SampleSummary, index: number, previousSample: import("../api/Types.mjs").SampleSummary) => ListItem[]} The list
333
+ * @returns {(sample: import("../api/Types.ts").SampleSummary, index: number, previousSample: import("../api/Types.ts").SampleSummary) => ListItem[]} The list
334
334
  */
335
335
  const groupByEpoch = (samples, sampleDescriptor, order) => {
336
336
  const groupCount = sampleDescriptor.evalDescriptor.epochs;
@@ -89,9 +89,9 @@ const sortId = (a, b) => {
89
89
  * Sorts a list of samples
90
90
  *
91
91
  * @param {string} sort - The sort direction
92
- * @param {import("../../api/Types.mjs").SampleSummary[]} samples - The samples
92
+ * @param {import("../../api/Types.ts").SampleSummary[]} samples - The samples
93
93
  * @param {import("../SamplesDescriptor.mjs").SamplesDescriptor} samplesDescriptor - The samples descriptor
94
- * @returns {{ sorted: import("../../api/Types.mjs").SampleSummary[], order: 'asc' | 'desc' }} An object with sorted samples and the sort order.
94
+ * @returns {{ sorted: import("../../api/Types.ts").SampleSummary[], order: 'asc' | 'desc' }} An object with sorted samples and the sort order.
95
95
  */
96
96
  export const sortSamples = (sort, samples, samplesDescriptor) => {
97
97
  const sortedSamples = samples.sort((a, b) => {
@@ -1,6 +1,4 @@
1
- // @ts-check
2
-
3
- export const asyncJsonParse = async (text) => {
1
+ export const asyncJsonParse = async (text: string): Promise<any> => {
4
2
  const encoder = new TextEncoder();
5
3
  const encodedText = encoder.encode(text);
6
4
  const blob = new Blob([kWorkerCode], { type: "application/javascript" });
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Type definition for the VS Code API object
3
+ * Note: This is a minimal definition - expand based on your needs
4
+ */
5
+ interface VSCodeApi {
6
+ postMessage(message: unknown): void;
7
+ getState(): unknown;
8
+ setState(state: unknown): void;
9
+ }
10
+
11
+ /**
12
+ * The cached instance of the VS Code API
13
+ */
14
+ let vscodeApi: VSCodeApi | undefined;
15
+
16
+ // Declare the acquireVsCodeApi function on the window object
17
+ declare global {
18
+ interface Window {
19
+ acquireVsCodeApi?: () => VSCodeApi;
20
+ }
21
+ }
22
+
23
+ /**
24
+ * Gets or initializes the VS Code API instance
25
+ * @returns {VSCodeApi | undefined} The VS Code API instance if in VS Code environment, undefined otherwise
26
+ */
27
+ export const getVscodeApi = (): VSCodeApi | undefined => {
28
+ if (window.acquireVsCodeApi) {
29
+ if (vscodeApi === undefined) {
30
+ vscodeApi = window.acquireVsCodeApi();
31
+ }
32
+ return vscodeApi;
33
+ } else {
34
+ return undefined;
35
+ }
36
+ };
@@ -43,7 +43,7 @@ import { debounce } from "../utils/sync.mjs";
43
43
  * @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats for this eval
44
44
  * @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults for this eval
45
45
  * @param {import("../Types.mjs").CurrentLog} [props.log] - the current log
46
- * @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
46
+ * @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
47
47
  * @param {import("../Types.mjs").SampleMode} props.sampleMode - the mode for displaying samples
48
48
  * @param {string} props.groupBy - what to group by
49
49
  * @param {string} props.groupByOrder - the grouping order
@@ -40,7 +40,7 @@ class HumanApprovalManager:
40
40
  future = cast(Future[Approval], asyncio.get_event_loop().create_future())
41
41
  sample = sample_active()
42
42
  assert sample
43
- assert sample.sample.id
43
+ assert sample.sample.id is not None
44
44
  pending = PendingApprovalRequest(
45
45
  request=request,
46
46
  task=sample.task,
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import inspect
3
+ import json
3
4
  import types
4
5
  from dataclasses import is_dataclass
5
6
  from logging import getLogger
@@ -21,6 +22,7 @@ from typing import (
21
22
  is_typeddict,
22
23
  )
23
24
 
25
+ import yaml
24
26
  from jsonschema import Draft7Validator
25
27
  from pydantic import BaseModel
26
28
 
@@ -469,3 +471,56 @@ def truncate_tool_output(
469
471
  )
470
472
  else:
471
473
  return None
474
+
475
+
476
+ def tool_parse_error_message(arguments: str, ex: Exception) -> str:
477
+ return f"Error parsing the following tool call arguments:\n\n{arguments}\n\nError details: {ex}"
478
+
479
+
480
+ def parse_tool_call(
481
+ id: str, function: str, arguments: str, tools: list[ToolInfo] | None = None
482
+ ) -> ToolCall:
483
+ error: str | None = None
484
+ arguments_dict: dict[str, Any] = {}
485
+
486
+ def report_parse_error(ex: Exception) -> None:
487
+ nonlocal error
488
+ error = tool_parse_error_message(arguments, ex)
489
+ logger.info(error)
490
+
491
+ # if the arguments is a dict, then handle it with a plain json.loads
492
+ arguments = arguments.strip()
493
+ if arguments.startswith("{"):
494
+ try:
495
+ arguments_dict = json.loads(arguments)
496
+ except json.JSONDecodeError as ex:
497
+ report_parse_error(ex)
498
+
499
+ # otherwise parse it as yaml (which will pickup unquoted strings, numbers, and true/false)
500
+ # and then create a dict that maps it to the first function argument
501
+ elif function and tools:
502
+ tool_info = next(
503
+ (
504
+ tool
505
+ for tool in tools
506
+ if tool.name == function and len(tool.parameters.properties) > 0
507
+ ),
508
+ None,
509
+ )
510
+ if tool_info:
511
+ param_names = list(tool_info.parameters.properties.keys())
512
+ try:
513
+ value = yaml.safe_load(arguments)
514
+ arguments_dict[param_names[0]] = value
515
+ except yaml.error.YAMLError:
516
+ # If the yaml parser fails, we treat it as a string argument.
517
+ arguments_dict[param_names[0]] = arguments
518
+
519
+ # return ToolCall with error payload
520
+ return ToolCall(
521
+ id=id,
522
+ function=function,
523
+ arguments=arguments_dict,
524
+ type="function",
525
+ parse_error=error,
526
+ )
@@ -1,7 +1,6 @@
1
1
  from rich.console import RenderableType
2
2
  from rich.text import Text
3
3
 
4
- from inspect_ai._util.constants import NO_CONTENT
5
4
  from inspect_ai._util.rich import lines_display
6
5
  from inspect_ai._util.transcript import transcript_markdown
7
6
  from inspect_ai.util._conversation import conversation_panel
@@ -41,9 +40,7 @@ def conversation_assistant_message(
41
40
 
42
41
  # start with assistant content
43
42
  content: list[RenderableType] = (
44
- [transcript_markdown(message.text, escape=True)]
45
- if message.text and message.text != NO_CONTENT
46
- else []
43
+ [transcript_markdown(message.text, escape=True)] if message.text else []
47
44
  )
48
45
 
49
46
  # print tool calls
@@ -34,7 +34,7 @@ class GenerateConfigArgs(TypedDict, total=False):
34
34
  """Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."""
35
35
 
36
36
  best_of: int | None
37
- """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). OpenAI only."""
37
+ """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). vLLM only."""
38
38
 
39
39
  frequency_penalty: float | None
40
40
  """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. OpenAI, Google, Grok, Groq, and vLLM only."""
@@ -48,9 +48,6 @@ class GenerateConfigArgs(TypedDict, total=False):
48
48
  seed: int | None
49
49
  """Random seed. OpenAI, Google, Mistral, Groq, HuggingFace, and vLLM only."""
50
50
 
51
- suffix: str | None
52
- """The suffix that comes after a completion of inserted text. OpenAI only."""
53
-
54
51
  top_k: int | None
55
52
  """Randomly sample the next word from the top_k most likely next words. Anthropic, Google, and HuggingFace only."""
56
53
 
@@ -107,7 +104,7 @@ class GenerateConfig(BaseModel):
107
104
  """Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."""
108
105
 
109
106
  best_of: int | None = Field(default=None)
110
- """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). OpenAI and vLLM only."""
107
+ """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). vLLM only."""
111
108
 
112
109
  frequency_penalty: float | None = Field(default=None)
113
110
  """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. OpenAI, Google, Grok, Groq, and vLLM only."""
@@ -121,9 +118,6 @@ class GenerateConfig(BaseModel):
121
118
  seed: int | None = Field(default=None)
122
119
  """Random seed. OpenAI, Google, Mistral, Groq, HuggingFace, and vLLM only."""
123
120
 
124
- suffix: str | None = Field(default=None)
125
- """The suffix that comes after a completion of inserted text. OpenAI only."""
126
-
127
121
  top_k: int | None = Field(default=None)
128
122
  """Randomly sample the next word from the top_k most likely next words. Anthropic, Google, HuggingFace, and vLLM only."""
129
123
 
@@ -214,3 +214,18 @@ class ModelOutput(BaseModel):
214
214
  )
215
215
  ],
216
216
  )
217
+
218
+
219
+ def as_stop_reason(reason: str | None) -> StopReason:
220
+ """Encode common reason strings into standard StopReason."""
221
+ match reason:
222
+ case "stop" | "eos":
223
+ return "stop"
224
+ case "length":
225
+ return "max_tokens"
226
+ case "tool_calls" | "function_call":
227
+ return "tool_calls"
228
+ case "content_filter" | "model_length" | "max_tokens":
229
+ return reason
230
+ case _:
231
+ return "unknown"