inspect-ai 0.3.51__py3-none-any.whl → 0.3.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. inspect_ai/_cli/eval.py +44 -2
  2. inspect_ai/_display/core/config.py +4 -0
  3. inspect_ai/_display/core/panel.py +1 -1
  4. inspect_ai/_display/core/progress.py +9 -3
  5. inspect_ai/_display/core/results.py +8 -4
  6. inspect_ai/_display/textual/widgets/task_detail.py +45 -13
  7. inspect_ai/_display/textual/widgets/tasks.py +86 -5
  8. inspect_ai/_display/textual/widgets/transcript.py +4 -17
  9. inspect_ai/_eval/eval.py +29 -1
  10. inspect_ai/_eval/evalset.py +7 -0
  11. inspect_ai/_eval/registry.py +2 -2
  12. inspect_ai/_eval/task/log.py +6 -1
  13. inspect_ai/_eval/task/results.py +22 -4
  14. inspect_ai/_eval/task/run.py +18 -12
  15. inspect_ai/_eval/task/sandbox.py +72 -43
  16. inspect_ai/_eval/task/task.py +4 -0
  17. inspect_ai/_eval/task/util.py +17 -6
  18. inspect_ai/_util/logger.py +10 -2
  19. inspect_ai/_util/samples.py +7 -0
  20. inspect_ai/_util/transcript.py +8 -0
  21. inspect_ai/_view/www/App.css +13 -0
  22. inspect_ai/_view/www/dist/assets/index.css +13 -0
  23. inspect_ai/_view/www/dist/assets/index.js +105 -55
  24. inspect_ai/_view/www/src/App.mjs +31 -6
  25. inspect_ai/_view/www/src/Types.mjs +6 -0
  26. inspect_ai/_view/www/src/components/JsonPanel.mjs +11 -17
  27. inspect_ai/_view/www/src/components/MessageContent.mjs +9 -2
  28. inspect_ai/_view/www/src/components/Tools.mjs +46 -18
  29. inspect_ai/_view/www/src/navbar/Navbar.mjs +12 -0
  30. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +18 -5
  31. inspect_ai/_view/www/src/samples/SampleList.mjs +2 -2
  32. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +2 -2
  33. inspect_ai/log/_log.py +6 -0
  34. inspect_ai/log/_recorders/eval.py +8 -7
  35. inspect_ai/model/_call_tools.py +2 -6
  36. inspect_ai/model/_generate_config.py +6 -0
  37. inspect_ai/model/_model.py +18 -4
  38. inspect_ai/model/_providers/azureai.py +22 -2
  39. inspect_ai/model/_providers/bedrock.py +17 -1
  40. inspect_ai/model/_providers/hf.py +1 -1
  41. inspect_ai/model/_providers/openai.py +32 -8
  42. inspect_ai/model/_providers/providers.py +1 -1
  43. inspect_ai/model/_providers/vllm.py +1 -1
  44. inspect_ai/model/_render.py +7 -6
  45. inspect_ai/model/_trace.py +1 -1
  46. inspect_ai/solver/_basic_agent.py +8 -1
  47. inspect_ai/tool/_tool_transcript.py +28 -0
  48. inspect_ai/util/_sandbox/context.py +1 -2
  49. inspect_ai/util/_sandbox/docker/config.py +8 -10
  50. inspect_ai/util/_sandbox/docker/docker.py +9 -5
  51. inspect_ai/util/_sandbox/docker/util.py +3 -3
  52. inspect_ai/util/_sandbox/environment.py +7 -2
  53. inspect_ai/util/_sandbox/limits.py +1 -1
  54. inspect_ai/util/_sandbox/local.py +8 -9
  55. {inspect_ai-0.3.51.dist-info → inspect_ai-0.3.53.dist-info}/METADATA +2 -4
  56. {inspect_ai-0.3.51.dist-info → inspect_ai-0.3.53.dist-info}/RECORD +60 -59
  57. {inspect_ai-0.3.51.dist-info → inspect_ai-0.3.53.dist-info}/LICENSE +0 -0
  58. {inspect_ai-0.3.51.dist-info → inspect_ai-0.3.53.dist-info}/WHEEL +0 -0
  59. {inspect_ai-0.3.51.dist-info → inspect_ai-0.3.53.dist-info}/entry_points.txt +0 -0
  60. {inspect_ai-0.3.51.dist-info → inspect_ai-0.3.53.dist-info}/top_level.txt +0 -0
@@ -711,6 +711,19 @@ pre[class*="language-"].tool-output,
711
711
  background-color: #333333;
712
712
  }
713
713
 
714
+ pre[class*="language-"].tool-output {
715
+ border: none !important;
716
+ box-shadow: none !important;
717
+ border-radius: var(--bs-border-radius) !important;
718
+ }
719
+
720
+ .vscode-dark pre.jsonPanel {
721
+ background: none !important;
722
+ border: none !important;
723
+ box-shadow: none !important;
724
+ border-radius: var(--bs-border-radius) !important;
725
+ }
726
+
714
727
 
715
728
  /* jsondiffpatch */
716
729
 
@@ -14984,6 +14984,19 @@ pre[class*="language-"].tool-output,
14984
14984
  background-color: #333333;
14985
14985
  }
14986
14986
 
14987
+ pre[class*="language-"].tool-output {
14988
+ border: none !important;
14989
+ box-shadow: none !important;
14990
+ border-radius: var(--bs-border-radius) !important;
14991
+ }
14992
+
14993
+ .vscode-dark pre.jsonPanel {
14994
+ background: none !important;
14995
+ border: none !important;
14996
+ box-shadow: none !important;
14997
+ border-radius: var(--bs-border-radius) !important;
14998
+ }
14999
+
14987
15000
 
14988
15001
  /* jsondiffpatch */
14989
15002
 
@@ -15251,21 +15251,10 @@ const ToolCallView = ({
15251
15251
  output,
15252
15252
  mode
15253
15253
  }) => {
15254
- const icon = mode === "compact" ? "" : m$1`<i
15255
- class="bi bi-tools"
15256
- style=${{
15257
- marginRight: "0.2rem",
15258
- opacity: "0.4"
15259
- }}
15260
- ></i>`;
15261
- const codeIndent = mode === "compact" ? "" : "";
15262
15254
  return m$1`<div>
15263
- ${icon}
15264
- ${!view || view.title ? m$1`<code style=${{ fontSize: FontSize.small }}
15265
- >${(view == null ? void 0 : view.title) || functionCall}</code
15266
- >` : ""}
15255
+ ${mode !== "compact" && (!view || view.title) ? m$1`<${ToolTitle} title=${(view == null ? void 0 : view.title) || functionCall} />` : ""}
15267
15256
  <div>
15268
- <div style=${{ marginLeft: `${codeIndent}` }}>
15257
+ <div>
15269
15258
  <${ToolInput}
15270
15259
  type=${inputType}
15271
15260
  contents=${input}
@@ -15274,12 +15263,39 @@ const ToolCallView = ({
15274
15263
  />
15275
15264
  ${output ? m$1`
15276
15265
  <${ExpandablePanel} collapse=${true} border=${true} lines=${15}>
15277
- <${MessageContent} contents=${output} />
15266
+ <${MessageContent} contents=${normalizeContent$1(output)} />
15278
15267
  </${ExpandablePanel}>` : ""}
15279
15268
  </div>
15280
15269
  </div>
15281
15270
  </div>`;
15282
15271
  };
15272
+ const ToolTitle = ({ title }) => {
15273
+ return m$1` <i
15274
+ class="bi bi-tools"
15275
+ style=${{
15276
+ marginRight: "0.2rem",
15277
+ opacity: "0.4"
15278
+ }}
15279
+ ></i>
15280
+ <code style=${{ fontSize: FontSize.small }}>${title}</code>`;
15281
+ };
15282
+ const normalizeContent$1 = (output) => {
15283
+ if (Array.isArray(output)) {
15284
+ return output;
15285
+ } else {
15286
+ return [
15287
+ {
15288
+ type: "tool",
15289
+ content: [
15290
+ {
15291
+ type: "text",
15292
+ text: String(output)
15293
+ }
15294
+ ]
15295
+ }
15296
+ ];
15297
+ }
15298
+ };
15283
15299
  const ToolInput = ({ type, contents, view, style }) => {
15284
15300
  if (!contents && !(view == null ? void 0 : view.content)) {
15285
15301
  return "";
@@ -15455,8 +15471,7 @@ const extractInput = (inputKey, args) => {
15455
15471
  args: []
15456
15472
  };
15457
15473
  };
15458
- const MessageContent = (props) => {
15459
- const { contents } = props;
15474
+ const MessageContent = ({ contents }) => {
15460
15475
  if (Array.isArray(contents)) {
15461
15476
  return contents.map((content, index) => {
15462
15477
  if (typeof content === "string") {
@@ -19422,21 +19437,15 @@ const LoggerEventView = ({ id, event, style }) => {
19422
19437
  };
19423
19438
  const kPrismRenderMaxSize = 25e4;
19424
19439
  const JSONPanel = ({ id, json, data, simple, style }) => {
19425
- const sourceCode = json || JSON.stringify(data, void 0, 2);
19426
19440
  const codeRef = A();
19427
- if (codeRef.current) {
19441
+ const sourceCode = T(() => {
19442
+ return json || JSON.stringify(data, void 0, 2);
19443
+ }, [json, data]);
19444
+ y(() => {
19428
19445
  if (sourceCode.length < kPrismRenderMaxSize) {
19429
- codeRef.current.innerHTML = Prism$1.highlight(
19430
- sourceCode,
19431
- Prism$1.languages.javascript,
19432
- "javacript"
19433
- );
19434
- } else {
19435
- const textNode = document.createTextNode(sourceCode);
19436
- codeRef.current.innerText = "";
19437
- codeRef.current.appendChild(textNode);
19446
+ Prism$1.highlightElement(codeRef.current);
19438
19447
  }
19439
- }
19448
+ }, [sourceCode]);
19440
19449
  return m$1`<div>
19441
19450
  <pre
19442
19451
  style=${{
@@ -19446,16 +19455,18 @@ const JSONPanel = ({ id, json, data, simple, style }) => {
19446
19455
  borderRadius: simple ? void 0 : "var(--bs-border-radius)",
19447
19456
  ...style
19448
19457
  }}
19458
+ class="jsonPanel"
19449
19459
  >
19450
19460
  <code
19451
19461
  id=${id}
19452
19462
  ref=${codeRef}
19453
- class="sourceCode-json"
19463
+ class="sourceCode language-javascript"
19454
19464
  style=${{
19455
19465
  fontSize: FontSize.small,
19456
19466
  whiteSpace: "pre-wrap",
19457
19467
  wordWrap: "anywhere"
19458
19468
  }}>
19469
+ ${sourceCode}
19459
19470
  </code>
19460
19471
  </pre>
19461
19472
  </div>`;
@@ -19569,6 +19580,7 @@ const decisionIcon = (decision) => {
19569
19580
  }
19570
19581
  };
19571
19582
  const ToolEventView = ({ id, event, style, depth }) => {
19583
+ var _a2;
19572
19584
  const { input, functionCall, inputType } = resolveToolInput(
19573
19585
  event.function,
19574
19586
  event.arguments
@@ -19576,10 +19588,10 @@ const ToolEventView = ({ id, event, style, depth }) => {
19576
19588
  const approvalEvent = event.events.find((e2) => {
19577
19589
  return e2.event === "approval";
19578
19590
  });
19579
- const title = `Tool: ${event.function}`;
19591
+ const title = `Tool: ${((_a2 = event.view) == null ? void 0 : _a2.title) || event.function}`;
19580
19592
  return m$1`
19581
19593
  <${EventPanel} id=${id} title="${title}" subTitle=${formatDateTime(new Date(event.timestamp))} icon=${ApplicationIcons.solvers.use_tools} style=${style}>
19582
- <div name="Summary" style=${{ margin: "0.5em 0" }}>
19594
+ <div name="Summary" style=${{ margin: "0.5em 0", width: "100%" }}>
19583
19595
  <${ToolCallView}
19584
19596
  functionCall=${functionCall}
19585
19597
  input=${input}
@@ -20225,7 +20237,7 @@ const metadataViewsForSample = (id, sample) => {
20225
20237
  }
20226
20238
  return sampleMetadatas;
20227
20239
  };
20228
- const SampleSummary = ({ id, sample, style, sampleDescriptor }) => {
20240
+ const SampleSummary = ({ parent_id, sample, style, sampleDescriptor }) => {
20229
20241
  const input = (sampleDescriptor == null ? void 0 : sampleDescriptor.messageShape.normalized.input) > 0 ? Math.max(0.15, sampleDescriptor.messageShape.normalized.input) : 0;
20230
20242
  const target = (sampleDescriptor == null ? void 0 : sampleDescriptor.messageShape.normalized.target) > 0 ? Math.max(0.15, sampleDescriptor.messageShape.normalized.target) : 0;
20231
20243
  const answer = (sampleDescriptor == null ? void 0 : sampleDescriptor.messageShape.normalized.answer) > 0 ? Math.max(0.15, sampleDescriptor.messageShape.normalized.answer) : 0;
@@ -20246,7 +20258,7 @@ const SampleSummary = ({ id, sample, style, sampleDescriptor }) => {
20246
20258
  const columns = [];
20247
20259
  columns.push({
20248
20260
  label: "Id",
20249
- value: id,
20261
+ value: sample.id,
20250
20262
  size: `${idSize}em`
20251
20263
  });
20252
20264
  columns.push({
@@ -20267,7 +20279,10 @@ const SampleSummary = ({ id, sample, style, sampleDescriptor }) => {
20267
20279
  clamp: true
20268
20280
  });
20269
20281
  }
20270
- const fullAnswer = sample && sampleDescriptor ? sampleDescriptor.selectedScorer(sample).answer() : void 0;
20282
+ const fullAnswer = sample && sampleDescriptor ? (
20283
+ // @ts-ignore
20284
+ sampleDescriptor.selectedScorer(sample).answer()
20285
+ ) : void 0;
20271
20286
  if (fullAnswer) {
20272
20287
  columns.push({
20273
20288
  label: "Answer",
@@ -20293,13 +20308,17 @@ const SampleSummary = ({ id, sample, style, sampleDescriptor }) => {
20293
20308
  value: sample.error ? m$1`<${FlatSampleError}
20294
20309
  message=${sample.error.message}
20295
20310
  style=${{ marginTop: "0.4rem" }}
20296
- />` : sampleDescriptor == null ? void 0 : sampleDescriptor.selectedScore(sample).render(),
20311
+ />` : (
20312
+ // TODO: Cleanup once the PR lands which makes sample / sample summary share common interface
20313
+ // @ts-ignore
20314
+ sampleDescriptor == null ? void 0 : sampleDescriptor.selectedScore(sample).render()
20315
+ ),
20297
20316
  size: "minmax(2em, auto)",
20298
20317
  center: true
20299
20318
  });
20300
20319
  return m$1`
20301
20320
  <div
20302
- id=${`sample-heading-${id}`}
20321
+ id=${`sample-heading-${parent_id}`}
20303
20322
  style=${{
20304
20323
  display: "grid",
20305
20324
  gridTemplateColumns: `${columns.map((col) => {
@@ -20601,7 +20620,7 @@ const SampleList = (props) => {
20601
20620
  [selectedIndex]
20602
20621
  );
20603
20622
  const listStyle = { ...style, flex: "1", overflowY: "auto", outline: "none" };
20604
- const { limit, answer } = gridColumns(sampleDescriptor);
20623
+ const { limit, answer, target } = gridColumns(sampleDescriptor);
20605
20624
  const headerRow = m$1`<div
20606
20625
  style=${{
20607
20626
  display: "grid",
@@ -20616,7 +20635,7 @@ const SampleList = (props) => {
20616
20635
  >
20617
20636
  <div>Id</div>
20618
20637
  <div>Input</div>
20619
- <div>Target</div>
20638
+ <div>${target !== "0" ? "Target" : ""}</div>
20620
20639
  <div>${answer !== "0" ? "Answer" : ""}</div>
20621
20640
  <div>${limit !== "0" ? "Limit" : ""}</div>
20622
20641
  <div style=${{ justifySelf: "center" }}>Score</div>
@@ -24620,7 +24639,9 @@ const ResultsPanel = ({ results }) => {
24620
24639
  flexWrap: "wrap",
24621
24640
  justifyContent: "end",
24622
24641
  height: "100%",
24623
- alignItems: "center"
24642
+ alignItems: "center",
24643
+ maxHeight: "15em",
24644
+ overflow: "scroll"
24624
24645
  }}
24625
24646
  >
24626
24647
  ${metrics.map((metric, i) => {
@@ -24638,7 +24659,9 @@ const ResultsPanel = ({ results }) => {
24638
24659
  alignItems: "center",
24639
24660
  marginTop: "0.2rem",
24640
24661
  paddingBottom: "0.4rem",
24641
- rowGap: "1em"
24662
+ rowGap: "1em",
24663
+ maxHeight: "15em",
24664
+ overflow: "scroll"
24642
24665
  }}
24643
24666
  >
24644
24667
  ${(_b2 = results == null ? void 0 : results.scores) == null ? void 0 : _b2.map((score, index) => {
@@ -25449,22 +25472,35 @@ const createsSamplesDescriptor = (scorers, samples, epochs, selectedScore) => {
25449
25472
  if (!sample || !sample.scores) {
25450
25473
  return [];
25451
25474
  }
25452
- scorers.map((score2) => {
25475
+ const scoreNames = scorers.map((score2) => {
25453
25476
  return score2.name;
25454
25477
  });
25455
25478
  const sampleScorer = sample.scores[scorer];
25456
25479
  const scoreVal = sampleScorer.value;
25457
25480
  if (typeof scoreVal === "object") {
25458
25481
  const names = Object.keys(scoreVal);
25459
- const scores = names.map((name) => {
25460
- return {
25461
- name,
25462
- rendered: () => {
25463
- return scoreDescriptor.render(scoreVal[name]);
25482
+ if (names.find((name) => {
25483
+ return scoreNames.includes(name);
25484
+ })) {
25485
+ const scores = names.map((name) => {
25486
+ return {
25487
+ name,
25488
+ rendered: () => {
25489
+ return scoreDescriptor.render(scoreVal[name]);
25490
+ }
25491
+ };
25492
+ });
25493
+ return scores;
25494
+ } else {
25495
+ return [
25496
+ {
25497
+ name: scorer,
25498
+ rendered: () => {
25499
+ return scoreDescriptor.render(scoreVal);
25500
+ }
25464
25501
  }
25465
- };
25466
- });
25467
- return scores;
25502
+ ];
25503
+ }
25468
25504
  } else {
25469
25505
  return [
25470
25506
  {
@@ -26210,8 +26246,12 @@ function App({
26210
26246
  }
26211
26247
  }
26212
26248
  } catch (e2) {
26213
- console.log(e2);
26214
- setStatus({ loading: false, error: e2 });
26249
+ if (e2.message === "Load failed" || e2.message === "Failed to fetch") {
26250
+ setStatus({ loading: false });
26251
+ } else {
26252
+ console.log(e2);
26253
+ setStatus({ loading: false, error: e2 });
26254
+ }
26215
26255
  }
26216
26256
  setHeadersLoading(false);
26217
26257
  };
@@ -26439,8 +26479,11 @@ function App({
26439
26479
  }
26440
26480
  new ClipboardJS(".clipboard-button,.copy-button");
26441
26481
  if (pollForLogs) {
26442
- setInterval(() => {
26443
- api2.client_events().then(async (events) => {
26482
+ let retryDelay = 1e3;
26483
+ const maxRetryDelay = 6e4;
26484
+ const pollEvents = async () => {
26485
+ try {
26486
+ const events = await api2.client_events();
26444
26487
  if (events.includes("reload")) {
26445
26488
  window.location.reload();
26446
26489
  }
@@ -26449,8 +26492,15 @@ function App({
26449
26492
  setLogs(logs2);
26450
26493
  setSelectedLogIndex(0);
26451
26494
  }
26452
- });
26453
- }, 1e3);
26495
+ retryDelay = 1e3;
26496
+ } catch (error2) {
26497
+ console.error("Error fetching client events:", error2);
26498
+ retryDelay = Math.min(retryDelay * 2, maxRetryDelay);
26499
+ } finally {
26500
+ setTimeout(pollEvents, retryDelay);
26501
+ }
26502
+ };
26503
+ pollEvents();
26454
26504
  }
26455
26505
  };
26456
26506
  loadLogsAndState();
@@ -480,8 +480,13 @@ export function App({
480
480
  }
481
481
  }
482
482
  } catch (e) {
483
- console.log(e);
484
- setStatus({ loading: false, error: e });
483
+ if (e.message === "Load failed" || e.message === "Failed to fetch") {
484
+ // This will happen if the server disappears (e.g. inspect view is terminated)
485
+ setStatus({ loading: false });
486
+ } else {
487
+ console.log(e);
488
+ setStatus({ loading: false, error: e });
489
+ }
485
490
  }
486
491
 
487
492
  setHeadersLoading(false);
@@ -774,18 +779,38 @@ export function App({
774
779
  new ClipboardJS(".clipboard-button,.copy-button");
775
780
 
776
781
  if (pollForLogs) {
777
- setInterval(() => {
778
- api.client_events().then(async (events) => {
782
+ let retryDelay = 1000;
783
+ const maxRetryDelay = 60000;
784
+
785
+ const pollEvents = async () => {
786
+ try {
787
+ const events = await api.client_events();
788
+
779
789
  if (events.includes("reload")) {
780
790
  window.location.reload();
781
791
  }
792
+
782
793
  if (events.includes("refresh-evals")) {
783
794
  const logs = await load();
784
795
  setLogs(logs);
785
796
  setSelectedLogIndex(0);
786
797
  }
787
- });
788
- }, 1000);
798
+
799
+ // Reset delay after a successful call
800
+ retryDelay = 1000;
801
+ } catch (error) {
802
+ console.error("Error fetching client events:", error);
803
+
804
+ // Exponential backoff with capping
805
+ retryDelay = Math.min(retryDelay * 2, maxRetryDelay);
806
+ } finally {
807
+ // Schedule the next poll
808
+ setTimeout(pollEvents, retryDelay);
809
+ }
810
+ };
811
+
812
+ // Start polling
813
+ pollEvents();
789
814
  }
790
815
  };
791
816
 
@@ -32,3 +32,9 @@
32
32
  /**
33
33
  * @typedef {"none" | "single" | "many"} SampleMode
34
34
  */
35
+
36
+ /**
37
+ * @typedef {Object} ContentTool
38
+ * @property {"tool"} type
39
+ * @property {(import("./types/log").ContentImage | import("./types/log").ContentText)[]} content
40
+ */
@@ -4,7 +4,7 @@ import Prism from "prismjs";
4
4
  import "prismjs/components/prism-json";
5
5
 
6
6
  import { html } from "htm/preact";
7
- import { useRef } from "preact/hooks";
7
+ import { useEffect, useMemo, useRef } from "preact/hooks";
8
8
  import { FontSize } from "../appearance/Fonts.mjs";
9
9
 
10
10
  const kPrismRenderMaxSize = 250000;
@@ -22,25 +22,17 @@ const kPrismRenderMaxSize = 250000;
22
22
  * @returns {import('preact').JSX.Element} The rendered component.
23
23
  */
24
24
  export const JSONPanel = ({ id, json, data, simple, style }) => {
25
- const sourceCode = json || JSON.stringify(data, undefined, 2);
26
25
  const codeRef = useRef();
27
26
 
28
- if (codeRef.current) {
27
+ const sourceCode = useMemo(() => {
28
+ return json || JSON.stringify(data, undefined, 2);
29
+ }, [json, data]);
30
+
31
+ useEffect(() => {
29
32
  if (sourceCode.length < kPrismRenderMaxSize) {
30
- // @ts-ignore
31
- codeRef.current.innerHTML = Prism.highlight(
32
- sourceCode,
33
- Prism.languages.javascript,
34
- "javacript",
35
- );
36
- } else {
37
- const textNode = document.createTextNode(sourceCode);
38
- // @ts-ignore
39
- codeRef.current.innerText = "";
40
- // @ts-ignore
41
- codeRef.current.appendChild(textNode);
33
+ Prism.highlightElement(codeRef.current);
42
34
  }
43
- }
35
+ }, [sourceCode]);
44
36
 
45
37
  return html`<div>
46
38
  <pre
@@ -51,16 +43,18 @@ export const JSONPanel = ({ id, json, data, simple, style }) => {
51
43
  borderRadius: simple ? undefined : "var(--bs-border-radius)",
52
44
  ...style,
53
45
  }}
46
+ class="jsonPanel"
54
47
  >
55
48
  <code
56
49
  id=${id}
57
50
  ref=${codeRef}
58
- class="sourceCode-json"
51
+ class="sourceCode language-javascript"
59
52
  style=${{
60
53
  fontSize: FontSize.small,
61
54
  whiteSpace: "pre-wrap",
62
55
  wordWrap: "anywhere",
63
56
  }}>
57
+ ${sourceCode}
64
58
  </code>
65
59
  </pre>
66
60
  </div>`;
@@ -2,8 +2,15 @@ import { html } from "htm/preact";
2
2
  import { MarkdownDiv } from "./MarkdownDiv.mjs";
3
3
  import { ToolOutput } from "./Tools.mjs";
4
4
 
5
- export const MessageContent = (props) => {
6
- const { contents } = props;
5
+ /**
6
+ * Renders message content based on its type.
7
+ * Supports rendering strings, images, and tools using specific renderers.
8
+ *
9
+ * @param {Object} props - The props object.
10
+ * @param {string|string[]| (import("../types/log").ContentText | import("../types/log").ContentImage | import("../Types.mjs").ContentTool)[]} props.contents - The content or array of contents to render.
11
+ * @returns {import("preact").JSX.Element | import("preact").JSX.Element[]} The component.
12
+ */
13
+ export const MessageContent = ({ contents }) => {
7
14
  if (Array.isArray(contents)) {
8
15
  return contents.map((content, index) => {
9
16
  if (typeof content === "string") {
@@ -63,26 +63,12 @@ export const ToolCallView = ({
63
63
  output,
64
64
  mode,
65
65
  }) => {
66
- const icon =
67
- mode === "compact"
68
- ? ""
69
- : html`<i
70
- class="bi bi-tools"
71
- style=${{
72
- marginRight: "0.2rem",
73
- opacity: "0.4",
74
- }}
75
- ></i>`;
76
- const codeIndent = mode === "compact" ? "" : "";
77
66
  return html`<div>
78
- ${icon}
79
- ${!view || view.title
80
- ? html`<code style=${{ fontSize: FontSize.small }}
81
- >${view?.title || functionCall}</code
82
- >`
67
+ ${mode !== "compact" && (!view || view.title)
68
+ ? html`<${ToolTitle} title=${view?.title || functionCall} />`
83
69
  : ""}
84
70
  <div>
85
- <div style=${{ marginLeft: `${codeIndent}` }}>
71
+ <div>
86
72
  <${ToolInput}
87
73
  type=${inputType}
88
74
  contents=${input}
@@ -92,7 +78,7 @@ export const ToolCallView = ({
92
78
  ${output
93
79
  ? html`
94
80
  <${ExpandablePanel} collapse=${true} border=${true} lines=${15}>
95
- <${MessageContent} contents=${output} />
81
+ <${MessageContent} contents=${normalizeContent(output)} />
96
82
  </${ExpandablePanel}>`
97
83
  : ""}
98
84
  </div>
@@ -100,6 +86,48 @@ export const ToolCallView = ({
100
86
  </div>`;
101
87
  };
102
88
 
89
+ /**
90
+ * Renders the ToolCallView component.
91
+ *
92
+ * @param {Object} props - The parameters for the component.
93
+ * @param {string} props.title - The title for the tool call
94
+ * @returns {import("preact").JSX.Element} The SampleTranscript component.
95
+ */
96
+ const ToolTitle = ({ title }) => {
97
+ return html` <i
98
+ class="bi bi-tools"
99
+ style=${{
100
+ marginRight: "0.2rem",
101
+ opacity: "0.4",
102
+ }}
103
+ ></i>
104
+ <code style=${{ fontSize: FontSize.small }}>${title}</code>`;
105
+ };
106
+
107
+ /**
108
+ * Renders the ToolCallView component.
109
+ *
110
+ * @param {string | number | boolean | (import("../types/log").ContentText | import("../types/log").ContentImage)[]} output - The tool output
111
+ * @returns {(import("../Types.mjs").ContentTool | import("../types/log").ContentText | import("../types/log").ContentImage)[]} The SampleTranscript component.
112
+ */
113
+ const normalizeContent = (output) => {
114
+ if (Array.isArray(output)) {
115
+ return output;
116
+ } else {
117
+ return [
118
+ {
119
+ type: "tool",
120
+ content: [
121
+ {
122
+ type: "text",
123
+ text: String(output),
124
+ },
125
+ ],
126
+ },
127
+ ];
128
+ }
129
+ };
130
+
103
131
  /**
104
132
  * Renders the ToolInput component.
105
133
  *
@@ -255,6 +255,8 @@ const ResultsPanel = ({ results }) => {
255
255
  justifyContent: "end",
256
256
  height: "100%",
257
257
  alignItems: "center",
258
+ maxHeight: "15em",
259
+ overflow: "scroll",
258
260
  }}
259
261
  >
260
262
  ${metrics.map((metric, i) => {
@@ -273,6 +275,8 @@ const ResultsPanel = ({ results }) => {
273
275
  marginTop: "0.2rem",
274
276
  paddingBottom: "0.4rem",
275
277
  rowGap: "1em",
278
+ maxHeight: "15em",
279
+ overflow: "scroll",
276
280
  }}
277
281
  >
278
282
  ${results?.scores?.map((score, index) => {
@@ -285,6 +289,14 @@ const ResultsPanel = ({ results }) => {
285
289
  }
286
290
  };
287
291
 
292
+ /** Renders a Vertial Metric
293
+ *
294
+ * @param {Object} props - The parameters for the component.
295
+ * @param {import("../types/log").EvalMetric} props.metric - The metric
296
+ * @param {boolean} props.isFirst - Whether this is the first metric
297
+ *
298
+ * @returns {import("preact").JSX.Element} The TranscriptView component.
299
+ */
288
300
  const VerticalMetric = ({ metric, isFirst }) => {
289
301
  const reducer_component = metric.reducer
290
302
  ? html` <div