inspect-ai 0.3.103__py3-none-any.whl → 0.3.105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. inspect_ai/_cli/common.py +2 -1
  2. inspect_ai/_cli/eval.py +2 -2
  3. inspect_ai/_display/core/active.py +3 -0
  4. inspect_ai/_display/core/config.py +1 -0
  5. inspect_ai/_display/core/panel.py +21 -13
  6. inspect_ai/_display/core/results.py +3 -7
  7. inspect_ai/_display/core/rich.py +3 -5
  8. inspect_ai/_display/log/__init__.py +0 -0
  9. inspect_ai/_display/log/display.py +173 -0
  10. inspect_ai/_display/plain/display.py +2 -2
  11. inspect_ai/_display/rich/display.py +2 -4
  12. inspect_ai/_display/textual/app.py +1 -6
  13. inspect_ai/_display/textual/widgets/task_detail.py +3 -14
  14. inspect_ai/_display/textual/widgets/tasks.py +1 -1
  15. inspect_ai/_eval/eval.py +1 -1
  16. inspect_ai/_eval/evalset.py +3 -3
  17. inspect_ai/_eval/registry.py +6 -1
  18. inspect_ai/_eval/run.py +5 -1
  19. inspect_ai/_eval/task/constants.py +1 -0
  20. inspect_ai/_eval/task/log.py +2 -0
  21. inspect_ai/_eval/task/run.py +65 -39
  22. inspect_ai/_util/citation.py +88 -0
  23. inspect_ai/_util/content.py +24 -2
  24. inspect_ai/_util/json.py +17 -2
  25. inspect_ai/_util/registry.py +19 -4
  26. inspect_ai/_view/schema.py +0 -6
  27. inspect_ai/_view/server.py +17 -0
  28. inspect_ai/_view/www/dist/assets/index.css +93 -31
  29. inspect_ai/_view/www/dist/assets/index.js +10639 -10011
  30. inspect_ai/_view/www/log-schema.json +418 -1
  31. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  32. inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
  33. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
  34. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
  35. inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
  36. inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
  37. inspect_ai/_view/www/package.json +2 -2
  38. inspect_ai/_view/www/src/@types/log.d.ts +140 -39
  39. inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
  40. inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
  41. inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
  42. inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
  43. inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
  44. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
  45. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +4 -0
  46. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -0
  47. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
  48. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
  49. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
  50. inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
  51. inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
  52. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
  53. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
  54. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
  55. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
  56. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
  57. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
  58. inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
  59. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
  60. inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
  61. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
  62. inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +26 -0
  63. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +14 -3
  64. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +359 -7
  65. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/language.ts +6 -0
  66. inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
  67. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
  68. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
  69. inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +1 -1
  70. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
  71. inspect_ai/_view/www/src/client/api/api-browser.ts +25 -0
  72. inspect_ai/_view/www/src/client/api/api-http.ts +3 -0
  73. inspect_ai/_view/www/src/client/api/api-vscode.ts +6 -0
  74. inspect_ai/_view/www/src/client/api/client-api.ts +3 -0
  75. inspect_ai/_view/www/src/client/api/jsonrpc.ts +1 -0
  76. inspect_ai/_view/www/src/client/api/types.ts +3 -0
  77. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
  78. inspect_ai/_view/www/src/state/samplePolling.ts +17 -1
  79. inspect_ai/_view/www/src/tests/README.md +2 -2
  80. inspect_ai/_view/www/src/utils/git.ts +3 -1
  81. inspect_ai/_view/www/src/utils/html.ts +6 -0
  82. inspect_ai/agent/_handoff.py +8 -5
  83. inspect_ai/agent/_react.py +5 -5
  84. inspect_ai/dataset/_dataset.py +1 -1
  85. inspect_ai/log/_condense.py +5 -0
  86. inspect_ai/log/_file.py +4 -1
  87. inspect_ai/log/_log.py +9 -4
  88. inspect_ai/log/_recorders/json.py +4 -2
  89. inspect_ai/log/_samples.py +5 -0
  90. inspect_ai/log/_util.py +2 -0
  91. inspect_ai/model/__init__.py +14 -0
  92. inspect_ai/model/_call_tools.py +17 -8
  93. inspect_ai/model/_chat_message.py +3 -0
  94. inspect_ai/model/_openai_responses.py +80 -34
  95. inspect_ai/model/_providers/_anthropic_citations.py +158 -0
  96. inspect_ai/model/_providers/_google_citations.py +100 -0
  97. inspect_ai/model/_providers/anthropic.py +219 -36
  98. inspect_ai/model/_providers/google.py +98 -22
  99. inspect_ai/model/_providers/mistral.py +20 -7
  100. inspect_ai/model/_providers/openai.py +11 -10
  101. inspect_ai/model/_providers/openai_compatible.py +3 -2
  102. inspect_ai/model/_providers/openai_responses.py +2 -5
  103. inspect_ai/model/_providers/perplexity.py +123 -0
  104. inspect_ai/model/_providers/providers.py +13 -2
  105. inspect_ai/model/_providers/vertex.py +3 -0
  106. inspect_ai/model/_trim.py +5 -0
  107. inspect_ai/tool/__init__.py +14 -0
  108. inspect_ai/tool/_mcp/_mcp.py +5 -2
  109. inspect_ai/tool/_mcp/sampling.py +19 -3
  110. inspect_ai/tool/_mcp/server.py +1 -1
  111. inspect_ai/tool/_tool.py +10 -1
  112. inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
  113. inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
  114. inspect_ai/tool/_tools/_web_search/_google.py +22 -25
  115. inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
  116. inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
  117. inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
  118. inspect_ai/util/__init__.py +8 -0
  119. inspect_ai/util/_background.py +64 -0
  120. inspect_ai/util/_display.py +11 -2
  121. inspect_ai/util/_limit.py +72 -5
  122. inspect_ai/util/_sandbox/__init__.py +2 -0
  123. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  124. inspect_ai/util/_sandbox/service.py +28 -7
  125. inspect_ai/util/_span.py +12 -1
  126. inspect_ai/util/_subprocess.py +51 -38
  127. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/METADATA +2 -2
  128. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/RECORD +134 -109
  129. /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
  130. /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
  131. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/WHEEL +0 -0
  132. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/entry_points.txt +0 -0
  133. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/licenses/LICENSE +0 -0
  134. {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/top_level.txt +0 -0
@@ -3,8 +3,10 @@ import {
3
3
  CompletionContext,
4
4
  CompletionResult,
5
5
  CompletionSection,
6
+ startCompletion,
6
7
  } from "@codemirror/autocomplete";
7
8
  import { EditorView } from "codemirror";
9
+ import { SampleSummary } from "../../../../client/api/types";
8
10
  import {
9
11
  kScoreTypeBoolean,
10
12
  kScoreTypeCategorical,
@@ -15,6 +17,8 @@ import {
15
17
  import { SampleFilterItem } from "../filters";
16
18
  import {
17
19
  KEYWORDS,
20
+ kSampleIdVariable,
21
+ kSampleMetadataVariable,
18
22
  MATH_FUNCTIONS,
19
23
  SAMPLE_FUNCTIONS,
20
24
  SAMPLE_VARIABLES,
@@ -53,6 +57,34 @@ const applyWithCall = (
53
57
  });
54
58
  };
55
59
 
60
+ const applyWithDot = (
61
+ view: EditorView,
62
+ completion: Completion,
63
+ from: number,
64
+ to: number,
65
+ ): void => {
66
+ view.dispatch({
67
+ changes: { from, to, insert: `${completion.label}.` },
68
+ selection: { anchor: from + completion.label.length + 1 },
69
+ });
70
+ // trigger completion
71
+ setTimeout(() => startCompletion(view), 0);
72
+ };
73
+
74
+ const applyWithSpace = (
75
+ view: EditorView,
76
+ completion: Completion,
77
+ from: number,
78
+ to: number,
79
+ ): void => {
80
+ view.dispatch({
81
+ changes: { from, to, insert: `${completion.label} ` },
82
+ selection: { anchor: from + completion.label.length + 1 },
83
+ });
84
+ // trigger completion
85
+ setTimeout(() => startCompletion(view), 0);
86
+ };
87
+
56
88
  const makeKeywordCompletion = (k: string): Completion => ({
57
89
  label: k,
58
90
  type: "keyword",
@@ -88,6 +120,12 @@ const makeSampleVariableCompletion = ([label, info]: [
88
120
  label,
89
121
  type: "variable",
90
122
  info,
123
+ apply:
124
+ label === kSampleMetadataVariable
125
+ ? applyWithDot
126
+ : label === kSampleIdVariable
127
+ ? applyWithSpace
128
+ : undefined,
91
129
  boost: 10,
92
130
  });
93
131
 
@@ -120,6 +158,210 @@ const getMemberScoreItems = (
120
158
  ): SampleFilterItem[] =>
121
159
  filterItems.filter((item) => item?.qualifiedName?.startsWith(`${scorer}.`));
122
160
 
161
+ const getSampleIds = (samples: SampleSummary[]): Set<string | number> => {
162
+ const ids = new Set<string | number>();
163
+ for (const sample of samples) {
164
+ ids.add(sample.id);
165
+ }
166
+ return ids;
167
+ };
168
+
169
+ const getMetadataPropertyValues = (
170
+ samples: SampleSummary[],
171
+ propertyPath: string,
172
+ ): Set<any> => {
173
+ const values = new Set<any>();
174
+ for (const sample of samples) {
175
+ if (sample.metadata) {
176
+ const value = getNestedProperty(sample.metadata, propertyPath);
177
+ if (value !== undefined && value !== null) {
178
+ values.add(value);
179
+ }
180
+ }
181
+ }
182
+ return values;
183
+ };
184
+
185
+ const getNestedProperty = (obj: any, path: string): any => {
186
+ const keys = path.split(".");
187
+ let current = obj;
188
+ for (const key of keys) {
189
+ if (current && typeof current === "object" && key in current) {
190
+ current = current[key];
191
+ } else {
192
+ return undefined;
193
+ }
194
+ }
195
+ return current;
196
+ };
197
+
198
+ const buildMetadataPath = (
199
+ tokens: Token[],
200
+ currentTokenIndex: number,
201
+ ): string | null => {
202
+ // Walk backwards to build the metadata path
203
+ // For "metadata." return ""
204
+ // For "metadata.config." return "config"
205
+ // For "metadata.config.timeout." return "config.timeout"
206
+
207
+ const parts: string[] = [];
208
+
209
+ // Start after the first dot
210
+ let index = 2;
211
+
212
+ // Look for the metadata root by walking backwards
213
+ while (index <= currentTokenIndex) {
214
+ const token = tokens[currentTokenIndex - index];
215
+
216
+ if (token?.text === kSampleMetadataVariable) {
217
+ // Found metadata root, return the path
218
+ return parts.reverse().join(".");
219
+ } else if (token?.type === "variable") {
220
+ // Found a variable token, add to path
221
+ parts.push(token.text);
222
+ // Skip the expected dot
223
+ index++;
224
+ if (tokens[currentTokenIndex - index]?.text === ".") {
225
+ // Move past the dot
226
+ index++;
227
+ } else {
228
+ // No dot, not a valid path
229
+ break;
230
+ }
231
+ } else {
232
+ // Hit non-variable, non-metadata token
233
+ break;
234
+ }
235
+ }
236
+
237
+ // Didn't find metadata root
238
+ return null;
239
+ };
240
+
241
+ const getMetadataKeysForPath = (
242
+ samples: SampleSummary[],
243
+ parentPath: string,
244
+ ): Set<string> => {
245
+ const keys = new Set<string>();
246
+ for (const sample of samples) {
247
+ if (sample.metadata) {
248
+ const parentObj = parentPath
249
+ ? getNestedProperty(sample.metadata, parentPath)
250
+ : sample.metadata;
251
+ if (
252
+ parentObj &&
253
+ typeof parentObj === "object" &&
254
+ !Array.isArray(parentObj)
255
+ ) {
256
+ for (const key of Object.keys(parentObj)) {
257
+ keys.add(key);
258
+ }
259
+ }
260
+ }
261
+ }
262
+ return keys;
263
+ };
264
+
265
+ const buildMetadataPropertyPath = (
266
+ tokens: Token[],
267
+ currentTokenIndex: number,
268
+ ): string | null => {
269
+ // Walk backwards to build the full metadata property path
270
+ // e.g., for "metadata.difficulty ==" we want to return "difficulty"
271
+ // e.g., for "metadata.config.timeout ==" we want to return "config.timeout"
272
+ const parts: string[] = [];
273
+
274
+ // Start after the dot
275
+ let index = 2;
276
+
277
+ // Collect the property path by walking backwards
278
+ while (index <= currentTokenIndex) {
279
+ const token = tokens[currentTokenIndex - index];
280
+ if (!token) break;
281
+
282
+ if (token.type === "variable") {
283
+ if (token.text === kSampleMetadataVariable) {
284
+ // Found the metadata root, return the path
285
+ return parts.reverse().join(".");
286
+ } else {
287
+ parts.push(token.text);
288
+ }
289
+ } else if (token.text !== ".") {
290
+ // Hit a non-dot, non-variable token, not a metadata path
291
+ break;
292
+ }
293
+ index++;
294
+ }
295
+
296
+ return null;
297
+ };
298
+
299
+ const isMetadataProperty = (
300
+ tokens: Token[],
301
+ currentTokenIndex: number,
302
+ ): boolean => {
303
+ // Check if the current variable is part of a metadata property access
304
+ // e.g., for "metadata.difficulty" return true
305
+
306
+ // For metadata.difficulty, tokens are: [metadata, ., difficulty]
307
+ // currentTokenIndex points after difficulty, so prevToken(1) = difficulty
308
+ // We need to check if we can trace back to metadata
309
+
310
+ // Start by looking at prevToken(2) which should be "."
311
+ let index = 2;
312
+
313
+ // Walk backwards looking for metadata root
314
+ while (index <= currentTokenIndex) {
315
+ const token = tokens[currentTokenIndex - index];
316
+ if (!token) break;
317
+
318
+ if (token.text === kSampleMetadataVariable) {
319
+ return true;
320
+ } else if (token.text === "." || token.type === "variable") {
321
+ index++;
322
+ } else {
323
+ break; // Hit a non-metadata token
324
+ }
325
+ }
326
+
327
+ return false;
328
+ };
329
+
330
+ const makeMetadataKeyCompletion = (key: string): Completion => ({
331
+ label: key,
332
+ type: "property",
333
+ info: `Metadata property: ${key}`,
334
+ boost: 25,
335
+ });
336
+
337
+ const makeSampleIdCompletion = (id: string | number): Completion => ({
338
+ label: typeof id === "string" ? `"${id}"` : String(id),
339
+ type: "text",
340
+ info: `Sample ID: ${id}`,
341
+ boost: 25,
342
+ });
343
+
344
+ const makeMetadataValueCompletion = (value: any): Completion => {
345
+ let label: string;
346
+ if (typeof value === "string") {
347
+ label = `"${value}"`;
348
+ } else if (typeof value === "boolean") {
349
+ // Use filter expression constants for booleans
350
+ label = value ? "True" : "False";
351
+ } else if (value === null) {
352
+ label = "None";
353
+ } else {
354
+ label = String(value);
355
+ }
356
+
357
+ return {
358
+ label,
359
+ type: "text",
360
+ info: `Metadata value: ${value}`,
361
+ boost: 25,
362
+ };
363
+ };
364
+
123
365
  /**
124
366
  * Generates completions for the filter expression. The main goal is to make the
125
367
  * sample filter intuitive for beginners and to provide a smooth experience for
@@ -137,6 +379,7 @@ const getMemberScoreItems = (
137
379
  export function getCompletions(
138
380
  context: CompletionContext,
139
381
  filterItems: SampleFilterItem[],
382
+ samples?: SampleSummary[],
140
383
  ): CompletionResult | null {
141
384
  const keywordCompletionItems = KEYWORDS.map(makeKeywordCompletion);
142
385
  const mathFunctionCompletionItems = MATH_FUNCTIONS.map(
@@ -145,7 +388,22 @@ export function getCompletions(
145
388
  const sampleFunctionCompletionItems = SAMPLE_FUNCTIONS.map(
146
389
  makeSampleFunctionCompletion,
147
390
  );
148
- const sampleVariableCompletionItems = SAMPLE_VARIABLES.map(
391
+ // Filter sample variables based on available data
392
+ const availableSampleVariables = SAMPLE_VARIABLES.filter(([label]) => {
393
+ if (label === kSampleMetadataVariable) {
394
+ // Only include metadata if at least one sample has metadata
395
+ return (
396
+ samples &&
397
+ samples.some(
398
+ (sample) =>
399
+ sample.metadata && Object.keys(sample.metadata).length > 0,
400
+ )
401
+ );
402
+ }
403
+ return true;
404
+ });
405
+
406
+ const sampleVariableCompletionItems = availableSampleVariables.map(
149
407
  makeSampleVariableCompletion,
150
408
  );
151
409
  const variableCompletionItems = filterItems.map((item) =>
@@ -279,7 +537,7 @@ export function getCompletions(
279
537
  autoSpaceAfter: completingAtEnd,
280
538
  });
281
539
 
282
- const descreteRelationCompletions = () =>
540
+ const discreteRelationCompletions = () =>
283
541
  makeCompletions(["==", "!=", "in", "not in"].map(makeKeywordCompletion), {
284
542
  enforceOrder: true,
285
543
  autoSpaceAfter: completingAtEnd,
@@ -305,9 +563,22 @@ export function getCompletions(
305
563
 
306
564
  // Member access
307
565
  if (prevToken(1)?.text === ".") {
308
- const scorer = prevToken(2)?.text;
309
- if (scorer) {
310
- return memberAccessCompletions(getMemberScoreItems(filterItems, scorer));
566
+ const varName = prevToken(2)?.text;
567
+
568
+ // Check if this is metadata property access (metadata.* or metadata.*.*)
569
+ const metadataPath = buildMetadataPath(tokens, currentTokenIndex);
570
+ if (metadataPath !== null && samples) {
571
+ // Get completions for the current metadata path
572
+ const metadataKeys = Array.from(
573
+ getMetadataKeysForPath(samples, metadataPath),
574
+ );
575
+ const metadataCompletions = metadataKeys.map(makeMetadataKeyCompletion);
576
+ return makeCompletions(metadataCompletions, {
577
+ autocompleteInTheMiddle: true,
578
+ includeDefault: false,
579
+ });
580
+ } else if (varName) {
581
+ return memberAccessCompletions(getMemberScoreItems(filterItems, varName));
311
582
  }
312
583
  }
313
584
 
@@ -328,12 +599,31 @@ export function getCompletions(
328
599
 
329
600
  // Variable type-based relation suggestions
330
601
  if (prevToken(1)?.type === "variable") {
331
- const scoreType = findFilterItem(1)?.scoreType || "";
602
+ const varName = prevToken(1)?.text;
332
603
 
604
+ // Check if this is a metadata property access (metadata.property or metadata.nested.property)
605
+ if (isMetadataProperty(tokens, currentTokenIndex)) {
606
+ // This is metadata.property - provide custom relation completions
607
+ return customRelationCompletions();
608
+ }
609
+
610
+ // Handle sample variables specially
611
+ if (varName === kSampleIdVariable) {
612
+ return discreteRelationCompletions();
613
+ }
614
+ if (varName === kSampleMetadataVariable) {
615
+ return customRelationCompletions();
616
+ }
617
+ if (varName === "has_error" || varName === "has_retries") {
618
+ return logicalOpCompletions();
619
+ }
620
+
621
+ // Handle score variables
622
+ const scoreType = findFilterItem(1)?.scoreType || "";
333
623
  switch (scoreType) {
334
624
  case kScoreTypePassFail:
335
625
  case kScoreTypeCategorical:
336
- return descreteRelationCompletions();
626
+ return discreteRelationCompletions();
337
627
  case kScoreTypeNumeric:
338
628
  return continuousRelationCompletions();
339
629
  case kScoreTypeOther:
@@ -347,6 +637,68 @@ export function getCompletions(
347
637
 
348
638
  // RHS comparison suggestions
349
639
  if (prevToken(1)?.type === "relation") {
640
+ const varName = prevToken(2)?.text;
641
+
642
+ // Check if this is a metadata property comparison (relation after metadata.property or metadata.nested.property)
643
+ const metadataPropertyPath = buildMetadataPropertyPath(
644
+ tokens,
645
+ currentTokenIndex,
646
+ );
647
+ if (metadataPropertyPath !== null && samples) {
648
+ // This is metadata.property == ... - provide value completions for this property
649
+ const metadataValues = Array.from(
650
+ getMetadataPropertyValues(samples, metadataPropertyPath),
651
+ );
652
+
653
+ // Get the current query for prefix filtering
654
+ const currentQuery = currentToken?.text || "";
655
+
656
+ // Pre-filter values to only show prefix matches
657
+ const filteredValues = currentQuery
658
+ ? metadataValues.filter((value) => {
659
+ const label =
660
+ typeof value === "string"
661
+ ? `"${value}"`
662
+ : typeof value === "boolean"
663
+ ? value
664
+ ? "True"
665
+ : "False"
666
+ : value === null
667
+ ? "None"
668
+ : String(value);
669
+ return label.toLowerCase().startsWith(currentQuery.toLowerCase());
670
+ })
671
+ : metadataValues;
672
+
673
+ const metadataValueCompletions = filteredValues.map(
674
+ makeMetadataValueCompletion,
675
+ );
676
+ return makeCompletions(metadataValueCompletions, {
677
+ includeDefault: false,
678
+ });
679
+ }
680
+
681
+ // Sample ID completions
682
+ if (varName === kSampleIdVariable && samples) {
683
+ const sampleIds = Array.from(getSampleIds(samples));
684
+
685
+ // Get the current query for prefix filtering
686
+ const currentQuery = currentToken?.text || "";
687
+
688
+ // Pre-filter IDs to only show prefix matches
689
+ const filteredIds = currentQuery
690
+ ? sampleIds.filter((id) => {
691
+ const label = typeof id === "string" ? `"${id}"` : String(id);
692
+ return label.toLowerCase().startsWith(currentQuery.toLowerCase());
693
+ })
694
+ : sampleIds;
695
+
696
+ const sampleIdCompletions = filteredIds.map(makeSampleIdCompletion);
697
+ return makeCompletions(sampleIdCompletions, {
698
+ includeDefault: false,
699
+ });
700
+ }
701
+
350
702
  const item = findFilterItem(2);
351
703
  if (item?.categories?.length) {
352
704
  return rhsCompletions(item.categories);
@@ -1,3 +1,7 @@
1
+ export const kSampleIdVariable = "id";
2
+ export const kSampleMetadataVariable = "metadata";
3
+ export const kSampleMetadataPrefix = kSampleMetadataVariable + ".";
4
+
1
5
  export const KEYWORDS: string[] = ["and", "or", "not", "in", "not in", "mod"];
2
6
 
3
7
  export const MATH_FUNCTIONS: [string, string][] = [
@@ -16,6 +20,8 @@ export const MATH_FUNCTIONS: [string, string][] = [
16
20
  export const SAMPLE_VARIABLES: [string, string][] = [
17
21
  ["has_error", "Checks if the sample has an error"],
18
22
  ["has_retries", "Checks if the sample has been retried"],
23
+ [kSampleIdVariable, "The unique identifier of the sample"],
24
+ [kSampleMetadataVariable, "Metadata associated with the sample"],
19
25
  ];
20
26
 
21
27
  export const SAMPLE_FUNCTIONS: [string, string][] = [
@@ -1,9 +1,9 @@
1
- import { Type14 } from "../../@types/log";
1
+ import { Type21 } from "../../@types/log";
2
2
 
3
3
  /**
4
4
  * Formats a limit message
5
5
  */
6
- export const sampleLimitMessage = (type: Type14): string => {
6
+ export const sampleLimitMessage = (type: Type21): string => {
7
7
  switch (type) {
8
8
  case "operator":
9
9
  return "Sample terminated due to operator limit.";
@@ -216,7 +216,7 @@ const ToolsConfig: FC<ToolConfigProps> = ({ tools, toolChoice }) => {
216
216
  <div className={clsx(styles.toolConfig, "text-size-small")}>
217
217
  {toolEls}
218
218
  </div>
219
- <div className={styles.toolChoice}>
219
+ <div className={clsx(styles.toolChoice, "text-size-small")}>
220
220
  <div className={clsx("text-style-label", "text-style-secondary")}>
221
221
  Tool Choice
222
222
  </div>
@@ -1,6 +1,6 @@
1
1
  import clsx from "clsx";
2
2
  import { FC } from "react";
3
- import { SampleLimitEvent, Type10 } from "../../../@types/log";
3
+ import { SampleLimitEvent, Type15 } from "../../../@types/log";
4
4
  import { ApplicationIcons } from "../../appearance/icons";
5
5
  import { EventPanel } from "./event/EventPanel";
6
6
  import { EventNode } from "./types";
@@ -17,12 +17,12 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
17
17
  eventNode,
18
18
  className,
19
19
  }) => {
20
- const resolve_title = (type: Type10) => {
20
+ const resolve_title = (type: Type15) => {
21
21
  switch (type) {
22
22
  case "custom":
23
23
  return "Custom Limit Exceeded";
24
24
  case "time":
25
- return "Time Limit Execeeded";
25
+ return "Time Limit Exceeded";
26
26
  case "message":
27
27
  return "Message Limit Exceeded";
28
28
  case "token":
@@ -34,7 +34,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
34
34
  }
35
35
  };
36
36
 
37
- const resolve_icon = (type: Type10) => {
37
+ const resolve_icon = (type: Type15) => {
38
38
  switch (type) {
39
39
  case "custom":
40
40
  return ApplicationIcons.limits.custom;
@@ -49,7 +49,7 @@ export const OutlineRow: FC<OutlineRowProps> = ({
49
49
  <div
50
50
  className={clsx(
51
51
  styles.eventRow,
52
- "text-size-smallest",
52
+ "text-size-smaller",
53
53
  selected ? styles.selected : "",
54
54
  )}
55
55
  style={{ paddingLeft: `${node.depth * 0.4}em` }}
@@ -16,7 +16,7 @@ import { useScrollTrack, useVirtuosoState } from "../../../../state/scrolling";
16
16
  import { useStore } from "../../../../state/store";
17
17
  import { flatTree } from "../transform/treeify";
18
18
 
19
- import { useSampleDetailNavigation } from "../../../routing/navigationHooks";
19
+ import { useSampleDetailNavigation } from "../../../routing/sampleNavigation";
20
20
  import { kSandboxSignalName } from "../transform/fixups";
21
21
  import { OutlineRow } from "./OutlineRow";
22
22
  import styles from "./TranscriptOutline.module.css";
@@ -155,6 +155,29 @@ async function eval_log_sample_data(
155
155
  return result;
156
156
  }
157
157
 
158
+ async function log_message(log_file: string, message: string) {
159
+ const params = new URLSearchParams();
160
+ params.append("log_file", log_file);
161
+ params.append("message", message);
162
+
163
+ const request: Request<void> = {
164
+ headers: {
165
+ "Content-Type": "text/plain",
166
+ },
167
+ parse: async (text: string) => {
168
+ if (text !== "") {
169
+ throw new Error(`Unexpected response from log_message: ${text}`);
170
+ }
171
+ return;
172
+ },
173
+ };
174
+ await apiRequest<void>(
175
+ "GET",
176
+ `/api/log-message?${params.toString()}`,
177
+ request,
178
+ );
179
+ }
180
+
158
181
  interface Request<T> {
159
182
  headers?: Record<string, string>;
160
183
  body?: string;
@@ -288,7 +311,9 @@ const browserApi: LogViewAPI = {
288
311
  eval_log_size,
289
312
  eval_log_bytes,
290
313
  eval_log_headers,
314
+ log_message,
291
315
  download_file,
316
+
292
317
  open_log_file,
293
318
  eval_pending_samples,
294
319
  eval_log_sample_data,
@@ -70,6 +70,9 @@ function simpleHttpAPI(logInfo: LogInfo): LogViewAPI {
70
70
 
71
71
  return undefined;
72
72
  },
73
+ log_message: async (log_file: string, message: string) => {
74
+ console.log(`[CLIENT MESSAGE] (${log_file}): ${message}`);
75
+ },
73
76
  eval_log: async (
74
77
  log_file: string,
75
78
  _headerOnly?: number,
@@ -8,6 +8,7 @@ import {
8
8
  kMethodEvalLogHeaders,
9
9
  kMethodEvalLogs,
10
10
  kMethodEvalLogSize,
11
+ kMethodLogMessage,
11
12
  kMethodPendingSamples,
12
13
  kMethodSampleData,
13
14
  webViewJsonRpcClient,
@@ -147,6 +148,10 @@ async function eval_log_sample_data(
147
148
  }
148
149
  }
149
150
 
151
+ async function log_message(log_file: string, message: string): Promise<void> {
152
+ await vscodeClient(kMethodLogMessage, [log_file, message]);
153
+ }
154
+
150
155
  async function download_file() {
151
156
  throw Error("Downloading files is not supported in VS Code");
152
157
  }
@@ -167,6 +172,7 @@ const api: LogViewAPI = {
167
172
  eval_log_size,
168
173
  eval_log_bytes,
169
174
  eval_log_headers,
175
+ log_message,
170
176
  download_file,
171
177
  open_log_file,
172
178
  eval_pending_samples,
@@ -335,6 +335,9 @@ export const clientApi = (api: LogViewAPI, log_file?: string): ClientAPI => {
335
335
  ) => {
336
336
  return api.download_file(download_file, file_contents);
337
337
  },
338
+ log_message: (log_file: string, message: string) => {
339
+ return api.log_message(log_file, message);
340
+ },
338
341
  get_log_pending_samples: api.eval_pending_samples
339
342
  ? get_log_pending_samples
340
343
  : undefined,
@@ -41,6 +41,7 @@ export const kMethodEvalLogBytes = "eval_log_bytes";
41
41
  export const kMethodEvalLogHeaders = "eval_log_headers";
42
42
  export const kMethodPendingSamples = "eval_log_pending_samples";
43
43
  export const kMethodSampleData = "eval_log_sample_data";
44
+ export const kMethodLogMessage = "log_message";
44
45
 
45
46
  export const kJsonRpcParseError = -32700;
46
47
  export const kJsonRpcInvalidRequest = -32600;
@@ -115,6 +115,7 @@ export interface SampleSummary {
115
115
  scores: Scores1;
116
116
  error?: string;
117
117
  limit?: string;
118
+ metadata?: Record<string, any>;
118
119
  completed?: boolean;
119
120
  retries?: number;
120
121
  }
@@ -149,6 +150,7 @@ export interface LogViewAPI {
149
150
  end: number,
150
151
  ) => Promise<Uint8Array>;
151
152
  eval_log_headers: (log_files: string[]) => Promise<EvalLog[]>;
153
+ log_message: (log_file: string, message: string) => Promise<void>;
152
154
  download_file: (
153
155
  filename: string,
154
156
  filecontents: string | Blob | ArrayBuffer | ArrayBufferView,
@@ -177,6 +179,7 @@ export interface ClientAPI {
177
179
  id: string | number,
178
180
  epoch: number,
179
181
  ) => Promise<EvalSample | undefined>;
182
+ log_message?: (log_file: string, message: string) => Promise<void>;
180
183
  download_file: (
181
184
  file_name: string,
182
185
  file_contents: string | Blob | ArrayBuffer | ArrayBufferView,