inspect-ai 0.3.103__py3-none-any.whl → 0.3.105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +2 -1
- inspect_ai/_cli/eval.py +2 -2
- inspect_ai/_display/core/active.py +3 -0
- inspect_ai/_display/core/config.py +1 -0
- inspect_ai/_display/core/panel.py +21 -13
- inspect_ai/_display/core/results.py +3 -7
- inspect_ai/_display/core/rich.py +3 -5
- inspect_ai/_display/log/__init__.py +0 -0
- inspect_ai/_display/log/display.py +173 -0
- inspect_ai/_display/plain/display.py +2 -2
- inspect_ai/_display/rich/display.py +2 -4
- inspect_ai/_display/textual/app.py +1 -6
- inspect_ai/_display/textual/widgets/task_detail.py +3 -14
- inspect_ai/_display/textual/widgets/tasks.py +1 -1
- inspect_ai/_eval/eval.py +1 -1
- inspect_ai/_eval/evalset.py +3 -3
- inspect_ai/_eval/registry.py +6 -1
- inspect_ai/_eval/run.py +5 -1
- inspect_ai/_eval/task/constants.py +1 -0
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/run.py +65 -39
- inspect_ai/_util/citation.py +88 -0
- inspect_ai/_util/content.py +24 -2
- inspect_ai/_util/json.py +17 -2
- inspect_ai/_util/registry.py +19 -4
- inspect_ai/_view/schema.py +0 -6
- inspect_ai/_view/server.py +17 -0
- inspect_ai/_view/www/dist/assets/index.css +93 -31
- inspect_ai/_view/www/dist/assets/index.js +10639 -10011
- inspect_ai/_view/www/log-schema.json +418 -1
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/@types/log.d.ts +140 -39
- inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
- inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
- inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +4 -0
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
- inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +26 -0
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +14 -3
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +359 -7
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/language.ts +6 -0
- inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
- inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
- inspect_ai/_view/www/src/client/api/api-browser.ts +25 -0
- inspect_ai/_view/www/src/client/api/api-http.ts +3 -0
- inspect_ai/_view/www/src/client/api/api-vscode.ts +6 -0
- inspect_ai/_view/www/src/client/api/client-api.ts +3 -0
- inspect_ai/_view/www/src/client/api/jsonrpc.ts +1 -0
- inspect_ai/_view/www/src/client/api/types.ts +3 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
- inspect_ai/_view/www/src/state/samplePolling.ts +17 -1
- inspect_ai/_view/www/src/tests/README.md +2 -2
- inspect_ai/_view/www/src/utils/git.ts +3 -1
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/agent/_handoff.py +8 -5
- inspect_ai/agent/_react.py +5 -5
- inspect_ai/dataset/_dataset.py +1 -1
- inspect_ai/log/_condense.py +5 -0
- inspect_ai/log/_file.py +4 -1
- inspect_ai/log/_log.py +9 -4
- inspect_ai/log/_recorders/json.py +4 -2
- inspect_ai/log/_samples.py +5 -0
- inspect_ai/log/_util.py +2 -0
- inspect_ai/model/__init__.py +14 -0
- inspect_ai/model/_call_tools.py +17 -8
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_openai_responses.py +80 -34
- inspect_ai/model/_providers/_anthropic_citations.py +158 -0
- inspect_ai/model/_providers/_google_citations.py +100 -0
- inspect_ai/model/_providers/anthropic.py +219 -36
- inspect_ai/model/_providers/google.py +98 -22
- inspect_ai/model/_providers/mistral.py +20 -7
- inspect_ai/model/_providers/openai.py +11 -10
- inspect_ai/model/_providers/openai_compatible.py +3 -2
- inspect_ai/model/_providers/openai_responses.py +2 -5
- inspect_ai/model/_providers/perplexity.py +123 -0
- inspect_ai/model/_providers/providers.py +13 -2
- inspect_ai/model/_providers/vertex.py +3 -0
- inspect_ai/model/_trim.py +5 -0
- inspect_ai/tool/__init__.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +5 -2
- inspect_ai/tool/_mcp/sampling.py +19 -3
- inspect_ai/tool/_mcp/server.py +1 -1
- inspect_ai/tool/_tool.py +10 -1
- inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
- inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
- inspect_ai/tool/_tools/_web_search/_google.py +22 -25
- inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
- inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
- inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
- inspect_ai/util/__init__.py +8 -0
- inspect_ai/util/_background.py +64 -0
- inspect_ai/util/_display.py +11 -2
- inspect_ai/util/_limit.py +72 -5
- inspect_ai/util/_sandbox/__init__.py +2 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/service.py +28 -7
- inspect_ai/util/_span.py +12 -1
- inspect_ai/util/_subprocess.py +51 -38
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/RECORD +134 -109
- /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
- /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/top_level.txt +0 -0
@@ -3,8 +3,10 @@ import {
|
|
3
3
|
CompletionContext,
|
4
4
|
CompletionResult,
|
5
5
|
CompletionSection,
|
6
|
+
startCompletion,
|
6
7
|
} from "@codemirror/autocomplete";
|
7
8
|
import { EditorView } from "codemirror";
|
9
|
+
import { SampleSummary } from "../../../../client/api/types";
|
8
10
|
import {
|
9
11
|
kScoreTypeBoolean,
|
10
12
|
kScoreTypeCategorical,
|
@@ -15,6 +17,8 @@ import {
|
|
15
17
|
import { SampleFilterItem } from "../filters";
|
16
18
|
import {
|
17
19
|
KEYWORDS,
|
20
|
+
kSampleIdVariable,
|
21
|
+
kSampleMetadataVariable,
|
18
22
|
MATH_FUNCTIONS,
|
19
23
|
SAMPLE_FUNCTIONS,
|
20
24
|
SAMPLE_VARIABLES,
|
@@ -53,6 +57,34 @@ const applyWithCall = (
|
|
53
57
|
});
|
54
58
|
};
|
55
59
|
|
60
|
+
const applyWithDot = (
|
61
|
+
view: EditorView,
|
62
|
+
completion: Completion,
|
63
|
+
from: number,
|
64
|
+
to: number,
|
65
|
+
): void => {
|
66
|
+
view.dispatch({
|
67
|
+
changes: { from, to, insert: `${completion.label}.` },
|
68
|
+
selection: { anchor: from + completion.label.length + 1 },
|
69
|
+
});
|
70
|
+
// trigger completion
|
71
|
+
setTimeout(() => startCompletion(view), 0);
|
72
|
+
};
|
73
|
+
|
74
|
+
const applyWithSpace = (
|
75
|
+
view: EditorView,
|
76
|
+
completion: Completion,
|
77
|
+
from: number,
|
78
|
+
to: number,
|
79
|
+
): void => {
|
80
|
+
view.dispatch({
|
81
|
+
changes: { from, to, insert: `${completion.label} ` },
|
82
|
+
selection: { anchor: from + completion.label.length + 1 },
|
83
|
+
});
|
84
|
+
// trigger completion
|
85
|
+
setTimeout(() => startCompletion(view), 0);
|
86
|
+
};
|
87
|
+
|
56
88
|
const makeKeywordCompletion = (k: string): Completion => ({
|
57
89
|
label: k,
|
58
90
|
type: "keyword",
|
@@ -88,6 +120,12 @@ const makeSampleVariableCompletion = ([label, info]: [
|
|
88
120
|
label,
|
89
121
|
type: "variable",
|
90
122
|
info,
|
123
|
+
apply:
|
124
|
+
label === kSampleMetadataVariable
|
125
|
+
? applyWithDot
|
126
|
+
: label === kSampleIdVariable
|
127
|
+
? applyWithSpace
|
128
|
+
: undefined,
|
91
129
|
boost: 10,
|
92
130
|
});
|
93
131
|
|
@@ -120,6 +158,210 @@ const getMemberScoreItems = (
|
|
120
158
|
): SampleFilterItem[] =>
|
121
159
|
filterItems.filter((item) => item?.qualifiedName?.startsWith(`${scorer}.`));
|
122
160
|
|
161
|
+
const getSampleIds = (samples: SampleSummary[]): Set<string | number> => {
|
162
|
+
const ids = new Set<string | number>();
|
163
|
+
for (const sample of samples) {
|
164
|
+
ids.add(sample.id);
|
165
|
+
}
|
166
|
+
return ids;
|
167
|
+
};
|
168
|
+
|
169
|
+
const getMetadataPropertyValues = (
|
170
|
+
samples: SampleSummary[],
|
171
|
+
propertyPath: string,
|
172
|
+
): Set<any> => {
|
173
|
+
const values = new Set<any>();
|
174
|
+
for (const sample of samples) {
|
175
|
+
if (sample.metadata) {
|
176
|
+
const value = getNestedProperty(sample.metadata, propertyPath);
|
177
|
+
if (value !== undefined && value !== null) {
|
178
|
+
values.add(value);
|
179
|
+
}
|
180
|
+
}
|
181
|
+
}
|
182
|
+
return values;
|
183
|
+
};
|
184
|
+
|
185
|
+
const getNestedProperty = (obj: any, path: string): any => {
|
186
|
+
const keys = path.split(".");
|
187
|
+
let current = obj;
|
188
|
+
for (const key of keys) {
|
189
|
+
if (current && typeof current === "object" && key in current) {
|
190
|
+
current = current[key];
|
191
|
+
} else {
|
192
|
+
return undefined;
|
193
|
+
}
|
194
|
+
}
|
195
|
+
return current;
|
196
|
+
};
|
197
|
+
|
198
|
+
const buildMetadataPath = (
|
199
|
+
tokens: Token[],
|
200
|
+
currentTokenIndex: number,
|
201
|
+
): string | null => {
|
202
|
+
// Walk backwards to build the metadata path
|
203
|
+
// For "metadata." return ""
|
204
|
+
// For "metadata.config." return "config"
|
205
|
+
// For "metadata.config.timeout." return "config.timeout"
|
206
|
+
|
207
|
+
const parts: string[] = [];
|
208
|
+
|
209
|
+
// Start after the first dot
|
210
|
+
let index = 2;
|
211
|
+
|
212
|
+
// Look for the metadata root by walking backwards
|
213
|
+
while (index <= currentTokenIndex) {
|
214
|
+
const token = tokens[currentTokenIndex - index];
|
215
|
+
|
216
|
+
if (token?.text === kSampleMetadataVariable) {
|
217
|
+
// Found metadata root, return the path
|
218
|
+
return parts.reverse().join(".");
|
219
|
+
} else if (token?.type === "variable") {
|
220
|
+
// Found a variable token, add to path
|
221
|
+
parts.push(token.text);
|
222
|
+
// Skip the expected dot
|
223
|
+
index++;
|
224
|
+
if (tokens[currentTokenIndex - index]?.text === ".") {
|
225
|
+
// Move past the dot
|
226
|
+
index++;
|
227
|
+
} else {
|
228
|
+
// No dot, not a valid path
|
229
|
+
break;
|
230
|
+
}
|
231
|
+
} else {
|
232
|
+
// Hit non-variable, non-metadata token
|
233
|
+
break;
|
234
|
+
}
|
235
|
+
}
|
236
|
+
|
237
|
+
// Didn't find metadata root
|
238
|
+
return null;
|
239
|
+
};
|
240
|
+
|
241
|
+
const getMetadataKeysForPath = (
|
242
|
+
samples: SampleSummary[],
|
243
|
+
parentPath: string,
|
244
|
+
): Set<string> => {
|
245
|
+
const keys = new Set<string>();
|
246
|
+
for (const sample of samples) {
|
247
|
+
if (sample.metadata) {
|
248
|
+
const parentObj = parentPath
|
249
|
+
? getNestedProperty(sample.metadata, parentPath)
|
250
|
+
: sample.metadata;
|
251
|
+
if (
|
252
|
+
parentObj &&
|
253
|
+
typeof parentObj === "object" &&
|
254
|
+
!Array.isArray(parentObj)
|
255
|
+
) {
|
256
|
+
for (const key of Object.keys(parentObj)) {
|
257
|
+
keys.add(key);
|
258
|
+
}
|
259
|
+
}
|
260
|
+
}
|
261
|
+
}
|
262
|
+
return keys;
|
263
|
+
};
|
264
|
+
|
265
|
+
const buildMetadataPropertyPath = (
|
266
|
+
tokens: Token[],
|
267
|
+
currentTokenIndex: number,
|
268
|
+
): string | null => {
|
269
|
+
// Walk backwards to build the full metadata property path
|
270
|
+
// e.g., for "metadata.difficulty ==" we want to return "difficulty"
|
271
|
+
// e.g., for "metadata.config.timeout ==" we want to return "config.timeout"
|
272
|
+
const parts: string[] = [];
|
273
|
+
|
274
|
+
// Start after the dot
|
275
|
+
let index = 2;
|
276
|
+
|
277
|
+
// Collect the property path by walking backwards
|
278
|
+
while (index <= currentTokenIndex) {
|
279
|
+
const token = tokens[currentTokenIndex - index];
|
280
|
+
if (!token) break;
|
281
|
+
|
282
|
+
if (token.type === "variable") {
|
283
|
+
if (token.text === kSampleMetadataVariable) {
|
284
|
+
// Found the metadata root, return the path
|
285
|
+
return parts.reverse().join(".");
|
286
|
+
} else {
|
287
|
+
parts.push(token.text);
|
288
|
+
}
|
289
|
+
} else if (token.text !== ".") {
|
290
|
+
// Hit a non-dot, non-variable token, not a metadata path
|
291
|
+
break;
|
292
|
+
}
|
293
|
+
index++;
|
294
|
+
}
|
295
|
+
|
296
|
+
return null;
|
297
|
+
};
|
298
|
+
|
299
|
+
const isMetadataProperty = (
|
300
|
+
tokens: Token[],
|
301
|
+
currentTokenIndex: number,
|
302
|
+
): boolean => {
|
303
|
+
// Check if the current variable is part of a metadata property access
|
304
|
+
// e.g., for "metadata.difficulty" return true
|
305
|
+
|
306
|
+
// For metadata.difficulty, tokens are: [metadata, ., difficulty]
|
307
|
+
// currentTokenIndex points after difficulty, so prevToken(1) = difficulty
|
308
|
+
// We need to check if we can trace back to metadata
|
309
|
+
|
310
|
+
// Start by looking at prevToken(2) which should be "."
|
311
|
+
let index = 2;
|
312
|
+
|
313
|
+
// Walk backwards looking for metadata root
|
314
|
+
while (index <= currentTokenIndex) {
|
315
|
+
const token = tokens[currentTokenIndex - index];
|
316
|
+
if (!token) break;
|
317
|
+
|
318
|
+
if (token.text === kSampleMetadataVariable) {
|
319
|
+
return true;
|
320
|
+
} else if (token.text === "." || token.type === "variable") {
|
321
|
+
index++;
|
322
|
+
} else {
|
323
|
+
break; // Hit a non-metadata token
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
327
|
+
return false;
|
328
|
+
};
|
329
|
+
|
330
|
+
const makeMetadataKeyCompletion = (key: string): Completion => ({
|
331
|
+
label: key,
|
332
|
+
type: "property",
|
333
|
+
info: `Metadata property: ${key}`,
|
334
|
+
boost: 25,
|
335
|
+
});
|
336
|
+
|
337
|
+
const makeSampleIdCompletion = (id: string | number): Completion => ({
|
338
|
+
label: typeof id === "string" ? `"${id}"` : String(id),
|
339
|
+
type: "text",
|
340
|
+
info: `Sample ID: ${id}`,
|
341
|
+
boost: 25,
|
342
|
+
});
|
343
|
+
|
344
|
+
const makeMetadataValueCompletion = (value: any): Completion => {
|
345
|
+
let label: string;
|
346
|
+
if (typeof value === "string") {
|
347
|
+
label = `"${value}"`;
|
348
|
+
} else if (typeof value === "boolean") {
|
349
|
+
// Use filter expression constants for booleans
|
350
|
+
label = value ? "True" : "False";
|
351
|
+
} else if (value === null) {
|
352
|
+
label = "None";
|
353
|
+
} else {
|
354
|
+
label = String(value);
|
355
|
+
}
|
356
|
+
|
357
|
+
return {
|
358
|
+
label,
|
359
|
+
type: "text",
|
360
|
+
info: `Metadata value: ${value}`,
|
361
|
+
boost: 25,
|
362
|
+
};
|
363
|
+
};
|
364
|
+
|
123
365
|
/**
|
124
366
|
* Generates completions for the filter expression. The main goal is to make the
|
125
367
|
* sample filter intuitive for beginners and to provide a smooth experience for
|
@@ -137,6 +379,7 @@ const getMemberScoreItems = (
|
|
137
379
|
export function getCompletions(
|
138
380
|
context: CompletionContext,
|
139
381
|
filterItems: SampleFilterItem[],
|
382
|
+
samples?: SampleSummary[],
|
140
383
|
): CompletionResult | null {
|
141
384
|
const keywordCompletionItems = KEYWORDS.map(makeKeywordCompletion);
|
142
385
|
const mathFunctionCompletionItems = MATH_FUNCTIONS.map(
|
@@ -145,7 +388,22 @@ export function getCompletions(
|
|
145
388
|
const sampleFunctionCompletionItems = SAMPLE_FUNCTIONS.map(
|
146
389
|
makeSampleFunctionCompletion,
|
147
390
|
);
|
148
|
-
|
391
|
+
// Filter sample variables based on available data
|
392
|
+
const availableSampleVariables = SAMPLE_VARIABLES.filter(([label]) => {
|
393
|
+
if (label === kSampleMetadataVariable) {
|
394
|
+
// Only include metadata if at least one sample has metadata
|
395
|
+
return (
|
396
|
+
samples &&
|
397
|
+
samples.some(
|
398
|
+
(sample) =>
|
399
|
+
sample.metadata && Object.keys(sample.metadata).length > 0,
|
400
|
+
)
|
401
|
+
);
|
402
|
+
}
|
403
|
+
return true;
|
404
|
+
});
|
405
|
+
|
406
|
+
const sampleVariableCompletionItems = availableSampleVariables.map(
|
149
407
|
makeSampleVariableCompletion,
|
150
408
|
);
|
151
409
|
const variableCompletionItems = filterItems.map((item) =>
|
@@ -279,7 +537,7 @@ export function getCompletions(
|
|
279
537
|
autoSpaceAfter: completingAtEnd,
|
280
538
|
});
|
281
539
|
|
282
|
-
const
|
540
|
+
const discreteRelationCompletions = () =>
|
283
541
|
makeCompletions(["==", "!=", "in", "not in"].map(makeKeywordCompletion), {
|
284
542
|
enforceOrder: true,
|
285
543
|
autoSpaceAfter: completingAtEnd,
|
@@ -305,9 +563,22 @@ export function getCompletions(
|
|
305
563
|
|
306
564
|
// Member access
|
307
565
|
if (prevToken(1)?.text === ".") {
|
308
|
-
const
|
309
|
-
|
310
|
-
|
566
|
+
const varName = prevToken(2)?.text;
|
567
|
+
|
568
|
+
// Check if this is metadata property access (metadata.* or metadata.*.*)
|
569
|
+
const metadataPath = buildMetadataPath(tokens, currentTokenIndex);
|
570
|
+
if (metadataPath !== null && samples) {
|
571
|
+
// Get completions for the current metadata path
|
572
|
+
const metadataKeys = Array.from(
|
573
|
+
getMetadataKeysForPath(samples, metadataPath),
|
574
|
+
);
|
575
|
+
const metadataCompletions = metadataKeys.map(makeMetadataKeyCompletion);
|
576
|
+
return makeCompletions(metadataCompletions, {
|
577
|
+
autocompleteInTheMiddle: true,
|
578
|
+
includeDefault: false,
|
579
|
+
});
|
580
|
+
} else if (varName) {
|
581
|
+
return memberAccessCompletions(getMemberScoreItems(filterItems, varName));
|
311
582
|
}
|
312
583
|
}
|
313
584
|
|
@@ -328,12 +599,31 @@ export function getCompletions(
|
|
328
599
|
|
329
600
|
// Variable type-based relation suggestions
|
330
601
|
if (prevToken(1)?.type === "variable") {
|
331
|
-
const
|
602
|
+
const varName = prevToken(1)?.text;
|
332
603
|
|
604
|
+
// Check if this is a metadata property access (metadata.property or metadata.nested.property)
|
605
|
+
if (isMetadataProperty(tokens, currentTokenIndex)) {
|
606
|
+
// This is metadata.property - provide custom relation completions
|
607
|
+
return customRelationCompletions();
|
608
|
+
}
|
609
|
+
|
610
|
+
// Handle sample variables specially
|
611
|
+
if (varName === kSampleIdVariable) {
|
612
|
+
return discreteRelationCompletions();
|
613
|
+
}
|
614
|
+
if (varName === kSampleMetadataVariable) {
|
615
|
+
return customRelationCompletions();
|
616
|
+
}
|
617
|
+
if (varName === "has_error" || varName === "has_retries") {
|
618
|
+
return logicalOpCompletions();
|
619
|
+
}
|
620
|
+
|
621
|
+
// Handle score variables
|
622
|
+
const scoreType = findFilterItem(1)?.scoreType || "";
|
333
623
|
switch (scoreType) {
|
334
624
|
case kScoreTypePassFail:
|
335
625
|
case kScoreTypeCategorical:
|
336
|
-
return
|
626
|
+
return discreteRelationCompletions();
|
337
627
|
case kScoreTypeNumeric:
|
338
628
|
return continuousRelationCompletions();
|
339
629
|
case kScoreTypeOther:
|
@@ -347,6 +637,68 @@ export function getCompletions(
|
|
347
637
|
|
348
638
|
// RHS comparison suggestions
|
349
639
|
if (prevToken(1)?.type === "relation") {
|
640
|
+
const varName = prevToken(2)?.text;
|
641
|
+
|
642
|
+
// Check if this is a metadata property comparison (relation after metadata.property or metadata.nested.property)
|
643
|
+
const metadataPropertyPath = buildMetadataPropertyPath(
|
644
|
+
tokens,
|
645
|
+
currentTokenIndex,
|
646
|
+
);
|
647
|
+
if (metadataPropertyPath !== null && samples) {
|
648
|
+
// This is metadata.property == ... - provide value completions for this property
|
649
|
+
const metadataValues = Array.from(
|
650
|
+
getMetadataPropertyValues(samples, metadataPropertyPath),
|
651
|
+
);
|
652
|
+
|
653
|
+
// Get the current query for prefix filtering
|
654
|
+
const currentQuery = currentToken?.text || "";
|
655
|
+
|
656
|
+
// Pre-filter values to only show prefix matches
|
657
|
+
const filteredValues = currentQuery
|
658
|
+
? metadataValues.filter((value) => {
|
659
|
+
const label =
|
660
|
+
typeof value === "string"
|
661
|
+
? `"${value}"`
|
662
|
+
: typeof value === "boolean"
|
663
|
+
? value
|
664
|
+
? "True"
|
665
|
+
: "False"
|
666
|
+
: value === null
|
667
|
+
? "None"
|
668
|
+
: String(value);
|
669
|
+
return label.toLowerCase().startsWith(currentQuery.toLowerCase());
|
670
|
+
})
|
671
|
+
: metadataValues;
|
672
|
+
|
673
|
+
const metadataValueCompletions = filteredValues.map(
|
674
|
+
makeMetadataValueCompletion,
|
675
|
+
);
|
676
|
+
return makeCompletions(metadataValueCompletions, {
|
677
|
+
includeDefault: false,
|
678
|
+
});
|
679
|
+
}
|
680
|
+
|
681
|
+
// Sample ID completions
|
682
|
+
if (varName === kSampleIdVariable && samples) {
|
683
|
+
const sampleIds = Array.from(getSampleIds(samples));
|
684
|
+
|
685
|
+
// Get the current query for prefix filtering
|
686
|
+
const currentQuery = currentToken?.text || "";
|
687
|
+
|
688
|
+
// Pre-filter IDs to only show prefix matches
|
689
|
+
const filteredIds = currentQuery
|
690
|
+
? sampleIds.filter((id) => {
|
691
|
+
const label = typeof id === "string" ? `"${id}"` : String(id);
|
692
|
+
return label.toLowerCase().startsWith(currentQuery.toLowerCase());
|
693
|
+
})
|
694
|
+
: sampleIds;
|
695
|
+
|
696
|
+
const sampleIdCompletions = filteredIds.map(makeSampleIdCompletion);
|
697
|
+
return makeCompletions(sampleIdCompletions, {
|
698
|
+
includeDefault: false,
|
699
|
+
});
|
700
|
+
}
|
701
|
+
|
350
702
|
const item = findFilterItem(2);
|
351
703
|
if (item?.categories?.length) {
|
352
704
|
return rhsCompletions(item.categories);
|
@@ -1,3 +1,7 @@
|
|
1
|
+
export const kSampleIdVariable = "id";
|
2
|
+
export const kSampleMetadataVariable = "metadata";
|
3
|
+
export const kSampleMetadataPrefix = kSampleMetadataVariable + ".";
|
4
|
+
|
1
5
|
export const KEYWORDS: string[] = ["and", "or", "not", "in", "not in", "mod"];
|
2
6
|
|
3
7
|
export const MATH_FUNCTIONS: [string, string][] = [
|
@@ -16,6 +20,8 @@ export const MATH_FUNCTIONS: [string, string][] = [
|
|
16
20
|
export const SAMPLE_VARIABLES: [string, string][] = [
|
17
21
|
["has_error", "Checks if the sample has an error"],
|
18
22
|
["has_retries", "Checks if the sample has been retried"],
|
23
|
+
[kSampleIdVariable, "The unique identifier of the sample"],
|
24
|
+
[kSampleMetadataVariable, "Metadata associated with the sample"],
|
19
25
|
];
|
20
26
|
|
21
27
|
export const SAMPLE_FUNCTIONS: [string, string][] = [
|
@@ -1,9 +1,9 @@
|
|
1
|
-
import {
|
1
|
+
import { Type21 } from "../../@types/log";
|
2
2
|
|
3
3
|
/**
|
4
4
|
* Formats a limit message
|
5
5
|
*/
|
6
|
-
export const sampleLimitMessage = (type:
|
6
|
+
export const sampleLimitMessage = (type: Type21): string => {
|
7
7
|
switch (type) {
|
8
8
|
case "operator":
|
9
9
|
return "Sample terminated due to operator limit.";
|
@@ -216,7 +216,7 @@ const ToolsConfig: FC<ToolConfigProps> = ({ tools, toolChoice }) => {
|
|
216
216
|
<div className={clsx(styles.toolConfig, "text-size-small")}>
|
217
217
|
{toolEls}
|
218
218
|
</div>
|
219
|
-
<div className={styles.toolChoice}>
|
219
|
+
<div className={clsx(styles.toolChoice, "text-size-small")}>
|
220
220
|
<div className={clsx("text-style-label", "text-style-secondary")}>
|
221
221
|
Tool Choice
|
222
222
|
</div>
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import clsx from "clsx";
|
2
2
|
import { FC } from "react";
|
3
|
-
import { SampleLimitEvent,
|
3
|
+
import { SampleLimitEvent, Type15 } from "../../../@types/log";
|
4
4
|
import { ApplicationIcons } from "../../appearance/icons";
|
5
5
|
import { EventPanel } from "./event/EventPanel";
|
6
6
|
import { EventNode } from "./types";
|
@@ -17,12 +17,12 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
|
|
17
17
|
eventNode,
|
18
18
|
className,
|
19
19
|
}) => {
|
20
|
-
const resolve_title = (type:
|
20
|
+
const resolve_title = (type: Type15) => {
|
21
21
|
switch (type) {
|
22
22
|
case "custom":
|
23
23
|
return "Custom Limit Exceeded";
|
24
24
|
case "time":
|
25
|
-
return "Time Limit
|
25
|
+
return "Time Limit Exceeded";
|
26
26
|
case "message":
|
27
27
|
return "Message Limit Exceeded";
|
28
28
|
case "token":
|
@@ -34,7 +34,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
|
|
34
34
|
}
|
35
35
|
};
|
36
36
|
|
37
|
-
const resolve_icon = (type:
|
37
|
+
const resolve_icon = (type: Type15) => {
|
38
38
|
switch (type) {
|
39
39
|
case "custom":
|
40
40
|
return ApplicationIcons.limits.custom;
|
@@ -16,7 +16,7 @@ import { useScrollTrack, useVirtuosoState } from "../../../../state/scrolling";
|
|
16
16
|
import { useStore } from "../../../../state/store";
|
17
17
|
import { flatTree } from "../transform/treeify";
|
18
18
|
|
19
|
-
import { useSampleDetailNavigation } from "../../../routing/
|
19
|
+
import { useSampleDetailNavigation } from "../../../routing/sampleNavigation";
|
20
20
|
import { kSandboxSignalName } from "../transform/fixups";
|
21
21
|
import { OutlineRow } from "./OutlineRow";
|
22
22
|
import styles from "./TranscriptOutline.module.css";
|
@@ -155,6 +155,29 @@ async function eval_log_sample_data(
|
|
155
155
|
return result;
|
156
156
|
}
|
157
157
|
|
158
|
+
async function log_message(log_file: string, message: string) {
|
159
|
+
const params = new URLSearchParams();
|
160
|
+
params.append("log_file", log_file);
|
161
|
+
params.append("message", message);
|
162
|
+
|
163
|
+
const request: Request<void> = {
|
164
|
+
headers: {
|
165
|
+
"Content-Type": "text/plain",
|
166
|
+
},
|
167
|
+
parse: async (text: string) => {
|
168
|
+
if (text !== "") {
|
169
|
+
throw new Error(`Unexpected response from log_message: ${text}`);
|
170
|
+
}
|
171
|
+
return;
|
172
|
+
},
|
173
|
+
};
|
174
|
+
await apiRequest<void>(
|
175
|
+
"GET",
|
176
|
+
`/api/log-message?${params.toString()}`,
|
177
|
+
request,
|
178
|
+
);
|
179
|
+
}
|
180
|
+
|
158
181
|
interface Request<T> {
|
159
182
|
headers?: Record<string, string>;
|
160
183
|
body?: string;
|
@@ -288,7 +311,9 @@ const browserApi: LogViewAPI = {
|
|
288
311
|
eval_log_size,
|
289
312
|
eval_log_bytes,
|
290
313
|
eval_log_headers,
|
314
|
+
log_message,
|
291
315
|
download_file,
|
316
|
+
|
292
317
|
open_log_file,
|
293
318
|
eval_pending_samples,
|
294
319
|
eval_log_sample_data,
|
@@ -70,6 +70,9 @@ function simpleHttpAPI(logInfo: LogInfo): LogViewAPI {
|
|
70
70
|
|
71
71
|
return undefined;
|
72
72
|
},
|
73
|
+
log_message: async (log_file: string, message: string) => {
|
74
|
+
console.log(`[CLIENT MESSAGE] (${log_file}): ${message}`);
|
75
|
+
},
|
73
76
|
eval_log: async (
|
74
77
|
log_file: string,
|
75
78
|
_headerOnly?: number,
|
@@ -8,6 +8,7 @@ import {
|
|
8
8
|
kMethodEvalLogHeaders,
|
9
9
|
kMethodEvalLogs,
|
10
10
|
kMethodEvalLogSize,
|
11
|
+
kMethodLogMessage,
|
11
12
|
kMethodPendingSamples,
|
12
13
|
kMethodSampleData,
|
13
14
|
webViewJsonRpcClient,
|
@@ -147,6 +148,10 @@ async function eval_log_sample_data(
|
|
147
148
|
}
|
148
149
|
}
|
149
150
|
|
151
|
+
async function log_message(log_file: string, message: string): Promise<void> {
|
152
|
+
await vscodeClient(kMethodLogMessage, [log_file, message]);
|
153
|
+
}
|
154
|
+
|
150
155
|
async function download_file() {
|
151
156
|
throw Error("Downloading files is not supported in VS Code");
|
152
157
|
}
|
@@ -167,6 +172,7 @@ const api: LogViewAPI = {
|
|
167
172
|
eval_log_size,
|
168
173
|
eval_log_bytes,
|
169
174
|
eval_log_headers,
|
175
|
+
log_message,
|
170
176
|
download_file,
|
171
177
|
open_log_file,
|
172
178
|
eval_pending_samples,
|
@@ -335,6 +335,9 @@ export const clientApi = (api: LogViewAPI, log_file?: string): ClientAPI => {
|
|
335
335
|
) => {
|
336
336
|
return api.download_file(download_file, file_contents);
|
337
337
|
},
|
338
|
+
log_message: (log_file: string, message: string) => {
|
339
|
+
return api.log_message(log_file, message);
|
340
|
+
},
|
338
341
|
get_log_pending_samples: api.eval_pending_samples
|
339
342
|
? get_log_pending_samples
|
340
343
|
: undefined,
|
@@ -41,6 +41,7 @@ export const kMethodEvalLogBytes = "eval_log_bytes";
|
|
41
41
|
export const kMethodEvalLogHeaders = "eval_log_headers";
|
42
42
|
export const kMethodPendingSamples = "eval_log_pending_samples";
|
43
43
|
export const kMethodSampleData = "eval_log_sample_data";
|
44
|
+
export const kMethodLogMessage = "log_message";
|
44
45
|
|
45
46
|
export const kJsonRpcParseError = -32700;
|
46
47
|
export const kJsonRpcInvalidRequest = -32600;
|
@@ -115,6 +115,7 @@ export interface SampleSummary {
|
|
115
115
|
scores: Scores1;
|
116
116
|
error?: string;
|
117
117
|
limit?: string;
|
118
|
+
metadata?: Record<string, any>;
|
118
119
|
completed?: boolean;
|
119
120
|
retries?: number;
|
120
121
|
}
|
@@ -149,6 +150,7 @@ export interface LogViewAPI {
|
|
149
150
|
end: number,
|
150
151
|
) => Promise<Uint8Array>;
|
151
152
|
eval_log_headers: (log_files: string[]) => Promise<EvalLog[]>;
|
153
|
+
log_message: (log_file: string, message: string) => Promise<void>;
|
152
154
|
download_file: (
|
153
155
|
filename: string,
|
154
156
|
filecontents: string | Blob | ArrayBuffer | ArrayBufferView,
|
@@ -177,6 +179,7 @@ export interface ClientAPI {
|
|
177
179
|
id: string | number,
|
178
180
|
epoch: number,
|
179
181
|
) => Promise<EvalSample | undefined>;
|
182
|
+
log_message?: (log_file: string, message: string) => Promise<void>;
|
180
183
|
download_file: (
|
181
184
|
file_name: string,
|
182
185
|
file_contents: string | Blob | ArrayBuffer | ArrayBufferView,
|