inspect-ai 0.3.52__py3-none-any.whl → 0.3.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. inspect_ai/_cli/eval.py +29 -0
  2. inspect_ai/_display/core/progress.py +9 -3
  3. inspect_ai/_display/core/results.py +8 -4
  4. inspect_ai/_display/textual/widgets/task_detail.py +3 -0
  5. inspect_ai/_display/textual/widgets/tasks.py +86 -5
  6. inspect_ai/_eval/eval.py +16 -0
  7. inspect_ai/_eval/evalset.py +4 -0
  8. inspect_ai/_eval/registry.py +2 -2
  9. inspect_ai/_eval/task/results.py +22 -4
  10. inspect_ai/_eval/task/run.py +14 -10
  11. inspect_ai/_eval/task/sandbox.py +72 -43
  12. inspect_ai/_eval/task/task.py +4 -0
  13. inspect_ai/_eval/task/util.py +2 -0
  14. inspect_ai/_view/www/App.css +13 -0
  15. inspect_ai/_view/www/dist/assets/index.css +13 -0
  16. inspect_ai/_view/www/dist/assets/index.js +80 -43
  17. inspect_ai/_view/www/src/App.mjs +31 -6
  18. inspect_ai/_view/www/src/Types.mjs +6 -0
  19. inspect_ai/_view/www/src/components/JsonPanel.mjs +11 -17
  20. inspect_ai/_view/www/src/components/MessageContent.mjs +9 -2
  21. inspect_ai/_view/www/src/components/Tools.mjs +46 -18
  22. inspect_ai/_view/www/src/navbar/Navbar.mjs +12 -0
  23. inspect_ai/_view/www/src/samples/SampleList.mjs +2 -2
  24. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +2 -2
  25. inspect_ai/log/_log.py +3 -0
  26. inspect_ai/log/_recorders/eval.py +8 -7
  27. inspect_ai/model/_generate_config.py +6 -0
  28. inspect_ai/model/_providers/azureai.py +1 -1
  29. inspect_ai/model/_providers/bedrock.py +17 -1
  30. inspect_ai/model/_providers/hf.py +1 -1
  31. inspect_ai/model/_providers/openai.py +32 -8
  32. inspect_ai/model/_providers/providers.py +1 -1
  33. inspect_ai/model/_providers/vllm.py +1 -1
  34. inspect_ai/util/_sandbox/context.py +1 -2
  35. inspect_ai/util/_sandbox/docker/config.py +8 -10
  36. inspect_ai/util/_sandbox/docker/docker.py +9 -5
  37. inspect_ai/util/_sandbox/docker/util.py +3 -3
  38. inspect_ai/util/_sandbox/environment.py +7 -2
  39. inspect_ai/util/_sandbox/limits.py +1 -1
  40. inspect_ai/util/_sandbox/local.py +8 -9
  41. {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/METADATA +1 -3
  42. {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/RECORD +46 -46
  43. {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/LICENSE +0 -0
  44. {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/WHEEL +0 -0
  45. {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/entry_points.txt +0 -0
  46. {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,8 @@
1
1
  import asyncio
2
2
  import base64
3
3
  import contextlib
4
- from typing import AsyncGenerator, NamedTuple
4
+ from random import random
5
+ from typing import AsyncGenerator, Callable, NamedTuple, cast
5
6
 
6
7
  from inspect_ai._eval.task.task import Task
7
8
  from inspect_ai._eval.task.util import task_run_dir
@@ -9,6 +10,7 @@ from inspect_ai._util.file import file, filesystem
9
10
  from inspect_ai._util.registry import registry_unqualified_name
10
11
  from inspect_ai._util.url import data_uri_to_base64, is_data_uri
11
12
  from inspect_ai.dataset import Sample
13
+ from inspect_ai.util._concurrency import concurrency
12
14
  from inspect_ai.util._sandbox.context import (
13
15
  cleanup_sandbox_environments_sample,
14
16
  init_sandbox_environments_sample,
@@ -18,12 +20,14 @@ from inspect_ai.util._sandbox.environment import (
18
20
  SandboxEnvironmentConfigType,
19
21
  SandboxEnvironmentSpec,
20
22
  )
23
+ from inspect_ai.util._sandbox.registry import registry_find_sandboxenv
21
24
 
22
25
 
23
26
  @contextlib.asynccontextmanager
24
27
  async def sandboxenv_context(
25
28
  task_name: str,
26
29
  sandbox: SandboxEnvironmentSpec | None,
30
+ max_sandboxes: int | None,
27
31
  cleanup: bool,
28
32
  sample: Sample,
29
33
  ) -> AsyncGenerator[None, None]:
@@ -32,52 +36,77 @@ async def sandboxenv_context(
32
36
  if not sandbox:
33
37
  raise ValueError("sandboxenv_context called with no sandbox specified")
34
38
 
35
- # read files from sample
36
- files: dict[str, bytes] = {}
37
- if sample.files:
38
- for path, contents in sample.files.items():
39
- files[path] = read_sandboxenv_file(contents)
40
-
41
- # read setup script from sample (add bash shebang if necessary)
42
- setup: bytes | None = None
43
- if sample.setup:
44
- setup = read_sandboxenv_file(sample.setup)
45
- setup_str = setup.decode(encoding="utf-8")
46
- if not setup_str.strip().startswith("#!"):
47
- setup_str = f"#!/usr/bin/env bash\n\n{setup_str}"
48
- setup = setup_str.encode(encoding="utf-8")
49
-
50
- interrupted = False
51
- environments: dict[str, SandboxEnvironment] | None = None
52
- try:
53
- # initialize sandbox environment,
54
- environments = await init_sandbox_environments_sample(
55
- type=sandbox.type,
56
- task_name=registry_unqualified_name(task_name),
57
- config=sandbox.config,
58
- files=files,
59
- setup=setup,
60
- metadata=sample.metadata if sample.metadata else {},
61
- )
62
-
63
- # run sample
64
- yield
65
-
66
- except asyncio.CancelledError as ex:
67
- interrupted = True
68
- raise ex
39
+ # get sandboxenv_type
40
+ sandboxenv_type = registry_find_sandboxenv(sandbox.type)
69
41
 
70
- finally:
71
- # cleanup sandbox environment
72
- if environments and cleanup:
73
- await cleanup_sandbox_environments_sample(
74
- type=sandbox.type,
75
- task_name=task_name,
42
+ # see if there is a max_sandboxes in play (passed or from type)
43
+ if max_sandboxes is None:
44
+ default_concurrency_fn = cast(
45
+ Callable[[], int | None], getattr(sandboxenv_type, "default_concurrency")
46
+ )
47
+ max_sandboxes = default_concurrency_fn()
48
+
49
+ # if we are enforcing max_sandboxes, then when samples are scheduled they may
50
+ # not get interleaved properly across tasks (because the first task will come
51
+ # in and grab all of the sandboxes). Therefore, in this case we wait a random
52
+ # delay so that all tasks/samples have an equal shot at getting scheduled.
53
+ if max_sandboxes is not None:
54
+ await asyncio.sleep(random())
55
+
56
+ # enforce concurrency if required
57
+ sandboxes_cm = (
58
+ concurrency(sandbox.type, max_sandboxes, f"sandboxes/{sandbox.type}")
59
+ if max_sandboxes is not None
60
+ else contextlib.nullcontext()
61
+ )
62
+
63
+ async with sandboxes_cm:
64
+ # read files from sample
65
+ files: dict[str, bytes] = {}
66
+ if sample.files:
67
+ for path, contents in sample.files.items():
68
+ files[path] = read_sandboxenv_file(contents)
69
+
70
+ # read setup script from sample (add bash shebang if necessary)
71
+ setup: bytes | None = None
72
+ if sample.setup:
73
+ setup = read_sandboxenv_file(sample.setup)
74
+ setup_str = setup.decode(encoding="utf-8")
75
+ if not setup_str.strip().startswith("#!"):
76
+ setup_str = f"#!/usr/bin/env bash\n\n{setup_str}"
77
+ setup = setup_str.encode(encoding="utf-8")
78
+
79
+ interrupted = False
80
+ environments: dict[str, SandboxEnvironment] | None = None
81
+ try:
82
+ # initialize sandbox environment,
83
+ environments = await init_sandbox_environments_sample(
84
+ sandboxenv_type=sandboxenv_type,
85
+ task_name=registry_unqualified_name(task_name),
76
86
  config=sandbox.config,
77
- environments=environments,
78
- interrupted=interrupted,
87
+ files=files,
88
+ setup=setup,
89
+ metadata=sample.metadata if sample.metadata else {},
79
90
  )
80
91
 
92
+ # run sample
93
+ yield
94
+
95
+ except asyncio.CancelledError as ex:
96
+ interrupted = True
97
+ raise ex
98
+
99
+ finally:
100
+ # cleanup sandbox environment
101
+ if environments and cleanup:
102
+ await cleanup_sandbox_environments_sample(
103
+ type=sandbox.type,
104
+ task_name=task_name,
105
+ config=sandbox.config,
106
+ environments=environments,
107
+ interrupted=interrupted,
108
+ )
109
+
81
110
 
82
111
  def read_sandboxenv_file(contents: str) -> bytes:
83
112
  if is_data_uri(contents):
@@ -39,6 +39,8 @@ class Task:
39
39
 
40
40
  Args:
41
41
  dataset (Dataset | Sequence[Sample]): Dataset to evaluate
42
+ setup: (Solver | list[Solver] | None): Setup step (always run
43
+ even when the main `solver` is replaced).
42
44
  solver: (Solver | list[Solver]): Solver or list of solvers.
43
45
  Defaults to generate(), a normal call to the model.
44
46
  scorer: (Scorer | list[Scorer] | None): Scorer used to evaluate model output.
@@ -68,6 +70,7 @@ class Task:
68
70
  def __init__(
69
71
  self,
70
72
  dataset: Dataset | Sequence[Sample] | None = None,
73
+ setup: Solver | list[Solver] | None = None,
71
74
  solver: Solver | list[Solver] = generate(),
72
75
  scorer: Scorer | list[Scorer] | None = None,
73
76
  metrics: list[Metric] | dict[str, list[Metric]] | None = None,
@@ -119,6 +122,7 @@ class Task:
119
122
  self.dataset: Dataset = (
120
123
  dataset if isinstance(dataset, Dataset) else MemoryDataset(list(dataset))
121
124
  )
125
+ self.setup = setup
122
126
  self.solver = chain(solver) if isinstance(solver, list) else solver
123
127
  self.scorer = (
124
128
  scorer
@@ -42,6 +42,8 @@ def slice_dataset(
42
42
  sample_id: str | int | list[str | int] | None,
43
43
  ) -> Dataset:
44
44
  def normalise(id: str | int | None) -> str:
45
+ if isinstance(id, str) and id.isdigit():
46
+ id = int(id)
45
47
  return id if isinstance(id, str) else str(id).zfill(20)
46
48
 
47
49
  if sample_id is not None:
@@ -711,6 +711,19 @@ pre[class*="language-"].tool-output,
711
711
  background-color: #333333;
712
712
  }
713
713
 
714
+ pre[class*="language-"].tool-output {
715
+ border: none !important;
716
+ box-shadow: none !important;
717
+ border-radius: var(--bs-border-radius) !important;
718
+ }
719
+
720
+ .vscode-dark pre.jsonPanel {
721
+ background: none !important;
722
+ border: none !important;
723
+ box-shadow: none !important;
724
+ border-radius: var(--bs-border-radius) !important;
725
+ }
726
+
714
727
 
715
728
  /* jsondiffpatch */
716
729
 
@@ -14984,6 +14984,19 @@ pre[class*="language-"].tool-output,
14984
14984
  background-color: #333333;
14985
14985
  }
14986
14986
 
14987
+ pre[class*="language-"].tool-output {
14988
+ border: none !important;
14989
+ box-shadow: none !important;
14990
+ border-radius: var(--bs-border-radius) !important;
14991
+ }
14992
+
14993
+ .vscode-dark pre.jsonPanel {
14994
+ background: none !important;
14995
+ border: none !important;
14996
+ box-shadow: none !important;
14997
+ border-radius: var(--bs-border-radius) !important;
14998
+ }
14999
+
14987
15000
 
14988
15001
  /* jsondiffpatch */
14989
15002
 
@@ -15251,21 +15251,10 @@ const ToolCallView = ({
15251
15251
  output,
15252
15252
  mode
15253
15253
  }) => {
15254
- const icon = mode === "compact" ? "" : m$1`<i
15255
- class="bi bi-tools"
15256
- style=${{
15257
- marginRight: "0.2rem",
15258
- opacity: "0.4"
15259
- }}
15260
- ></i>`;
15261
- const codeIndent = mode === "compact" ? "" : "";
15262
15254
  return m$1`<div>
15263
- ${icon}
15264
- ${!view || view.title ? m$1`<code style=${{ fontSize: FontSize.small }}
15265
- >${(view == null ? void 0 : view.title) || functionCall}</code
15266
- >` : ""}
15255
+ ${mode !== "compact" && (!view || view.title) ? m$1`<${ToolTitle} title=${(view == null ? void 0 : view.title) || functionCall} />` : ""}
15267
15256
  <div>
15268
- <div style=${{ marginLeft: `${codeIndent}` }}>
15257
+ <div>
15269
15258
  <${ToolInput}
15270
15259
  type=${inputType}
15271
15260
  contents=${input}
@@ -15274,12 +15263,39 @@ const ToolCallView = ({
15274
15263
  />
15275
15264
  ${output ? m$1`
15276
15265
  <${ExpandablePanel} collapse=${true} border=${true} lines=${15}>
15277
- <${MessageContent} contents=${output} />
15266
+ <${MessageContent} contents=${normalizeContent$1(output)} />
15278
15267
  </${ExpandablePanel}>` : ""}
15279
15268
  </div>
15280
15269
  </div>
15281
15270
  </div>`;
15282
15271
  };
15272
+ const ToolTitle = ({ title }) => {
15273
+ return m$1` <i
15274
+ class="bi bi-tools"
15275
+ style=${{
15276
+ marginRight: "0.2rem",
15277
+ opacity: "0.4"
15278
+ }}
15279
+ ></i>
15280
+ <code style=${{ fontSize: FontSize.small }}>${title}</code>`;
15281
+ };
15282
+ const normalizeContent$1 = (output) => {
15283
+ if (Array.isArray(output)) {
15284
+ return output;
15285
+ } else {
15286
+ return [
15287
+ {
15288
+ type: "tool",
15289
+ content: [
15290
+ {
15291
+ type: "text",
15292
+ text: String(output)
15293
+ }
15294
+ ]
15295
+ }
15296
+ ];
15297
+ }
15298
+ };
15283
15299
  const ToolInput = ({ type, contents, view, style }) => {
15284
15300
  if (!contents && !(view == null ? void 0 : view.content)) {
15285
15301
  return "";
@@ -15455,8 +15471,7 @@ const extractInput = (inputKey, args) => {
15455
15471
  args: []
15456
15472
  };
15457
15473
  };
15458
- const MessageContent = (props) => {
15459
- const { contents } = props;
15474
+ const MessageContent = ({ contents }) => {
15460
15475
  if (Array.isArray(contents)) {
15461
15476
  return contents.map((content, index) => {
15462
15477
  if (typeof content === "string") {
@@ -19422,21 +19437,15 @@ const LoggerEventView = ({ id, event, style }) => {
19422
19437
  };
19423
19438
  const kPrismRenderMaxSize = 25e4;
19424
19439
  const JSONPanel = ({ id, json, data, simple, style }) => {
19425
- const sourceCode = json || JSON.stringify(data, void 0, 2);
19426
19440
  const codeRef = A();
19427
- if (codeRef.current) {
19441
+ const sourceCode = T(() => {
19442
+ return json || JSON.stringify(data, void 0, 2);
19443
+ }, [json, data]);
19444
+ y(() => {
19428
19445
  if (sourceCode.length < kPrismRenderMaxSize) {
19429
- codeRef.current.innerHTML = Prism$1.highlight(
19430
- sourceCode,
19431
- Prism$1.languages.javascript,
19432
- "javacript"
19433
- );
19434
- } else {
19435
- const textNode = document.createTextNode(sourceCode);
19436
- codeRef.current.innerText = "";
19437
- codeRef.current.appendChild(textNode);
19446
+ Prism$1.highlightElement(codeRef.current);
19438
19447
  }
19439
- }
19448
+ }, [sourceCode]);
19440
19449
  return m$1`<div>
19441
19450
  <pre
19442
19451
  style=${{
@@ -19446,16 +19455,18 @@ const JSONPanel = ({ id, json, data, simple, style }) => {
19446
19455
  borderRadius: simple ? void 0 : "var(--bs-border-radius)",
19447
19456
  ...style
19448
19457
  }}
19458
+ class="jsonPanel"
19449
19459
  >
19450
19460
  <code
19451
19461
  id=${id}
19452
19462
  ref=${codeRef}
19453
- class="sourceCode-json"
19463
+ class="sourceCode language-javascript"
19454
19464
  style=${{
19455
19465
  fontSize: FontSize.small,
19456
19466
  whiteSpace: "pre-wrap",
19457
19467
  wordWrap: "anywhere"
19458
19468
  }}>
19469
+ ${sourceCode}
19459
19470
  </code>
19460
19471
  </pre>
19461
19472
  </div>`;
@@ -19569,6 +19580,7 @@ const decisionIcon = (decision) => {
19569
19580
  }
19570
19581
  };
19571
19582
  const ToolEventView = ({ id, event, style, depth }) => {
19583
+ var _a2;
19572
19584
  const { input, functionCall, inputType } = resolveToolInput(
19573
19585
  event.function,
19574
19586
  event.arguments
@@ -19576,10 +19588,10 @@ const ToolEventView = ({ id, event, style, depth }) => {
19576
19588
  const approvalEvent = event.events.find((e2) => {
19577
19589
  return e2.event === "approval";
19578
19590
  });
19579
- const title = `Tool: ${event.function}`;
19591
+ const title = `Tool: ${((_a2 = event.view) == null ? void 0 : _a2.title) || event.function}`;
19580
19592
  return m$1`
19581
19593
  <${EventPanel} id=${id} title="${title}" subTitle=${formatDateTime(new Date(event.timestamp))} icon=${ApplicationIcons.solvers.use_tools} style=${style}>
19582
- <div name="Summary" style=${{ margin: "0.5em 0" }}>
19594
+ <div name="Summary" style=${{ margin: "0.5em 0", width: "100%" }}>
19583
19595
  <${ToolCallView}
19584
19596
  functionCall=${functionCall}
19585
19597
  input=${input}
@@ -20267,7 +20279,10 @@ const SampleSummary = ({ parent_id, sample, style, sampleDescriptor }) => {
20267
20279
  clamp: true
20268
20280
  });
20269
20281
  }
20270
- const fullAnswer = sample && sampleDescriptor ? sampleDescriptor.selectedScorer(sample).answer() : void 0;
20282
+ const fullAnswer = sample && sampleDescriptor ? (
20283
+ // @ts-ignore
20284
+ sampleDescriptor.selectedScorer(sample).answer()
20285
+ ) : void 0;
20271
20286
  if (fullAnswer) {
20272
20287
  columns.push({
20273
20288
  label: "Answer",
@@ -20293,7 +20308,11 @@ const SampleSummary = ({ parent_id, sample, style, sampleDescriptor }) => {
20293
20308
  value: sample.error ? m$1`<${FlatSampleError}
20294
20309
  message=${sample.error.message}
20295
20310
  style=${{ marginTop: "0.4rem" }}
20296
- />` : sampleDescriptor == null ? void 0 : sampleDescriptor.selectedScore(sample).render(),
20311
+ />` : (
20312
+ // TODO: Cleanup once the PR lands which makes sample / sample summary share common interface
20313
+ // @ts-ignore
20314
+ sampleDescriptor == null ? void 0 : sampleDescriptor.selectedScore(sample).render()
20315
+ ),
20297
20316
  size: "minmax(2em, auto)",
20298
20317
  center: true
20299
20318
  });
@@ -20601,7 +20620,7 @@ const SampleList = (props) => {
20601
20620
  [selectedIndex]
20602
20621
  );
20603
20622
  const listStyle = { ...style, flex: "1", overflowY: "auto", outline: "none" };
20604
- const { limit, answer } = gridColumns(sampleDescriptor);
20623
+ const { limit, answer, target } = gridColumns(sampleDescriptor);
20605
20624
  const headerRow = m$1`<div
20606
20625
  style=${{
20607
20626
  display: "grid",
@@ -20616,7 +20635,7 @@ const SampleList = (props) => {
20616
20635
  >
20617
20636
  <div>Id</div>
20618
20637
  <div>Input</div>
20619
- <div>Target</div>
20638
+ <div>${target !== "0" ? "Target" : ""}</div>
20620
20639
  <div>${answer !== "0" ? "Answer" : ""}</div>
20621
20640
  <div>${limit !== "0" ? "Limit" : ""}</div>
20622
20641
  <div style=${{ justifySelf: "center" }}>Score</div>
@@ -24620,7 +24639,9 @@ const ResultsPanel = ({ results }) => {
24620
24639
  flexWrap: "wrap",
24621
24640
  justifyContent: "end",
24622
24641
  height: "100%",
24623
- alignItems: "center"
24642
+ alignItems: "center",
24643
+ maxHeight: "15em",
24644
+ overflow: "scroll"
24624
24645
  }}
24625
24646
  >
24626
24647
  ${metrics.map((metric, i) => {
@@ -24638,7 +24659,9 @@ const ResultsPanel = ({ results }) => {
24638
24659
  alignItems: "center",
24639
24660
  marginTop: "0.2rem",
24640
24661
  paddingBottom: "0.4rem",
24641
- rowGap: "1em"
24662
+ rowGap: "1em",
24663
+ maxHeight: "15em",
24664
+ overflow: "scroll"
24642
24665
  }}
24643
24666
  >
24644
24667
  ${(_b2 = results == null ? void 0 : results.scores) == null ? void 0 : _b2.map((score, index) => {
@@ -26223,8 +26246,12 @@ function App({
26223
26246
  }
26224
26247
  }
26225
26248
  } catch (e2) {
26226
- console.log(e2);
26227
- setStatus({ loading: false, error: e2 });
26249
+ if (e2.message === "Load failed" || e2.message === "Failed to fetch") {
26250
+ setStatus({ loading: false });
26251
+ } else {
26252
+ console.log(e2);
26253
+ setStatus({ loading: false, error: e2 });
26254
+ }
26228
26255
  }
26229
26256
  setHeadersLoading(false);
26230
26257
  };
@@ -26452,8 +26479,11 @@ function App({
26452
26479
  }
26453
26480
  new ClipboardJS(".clipboard-button,.copy-button");
26454
26481
  if (pollForLogs) {
26455
- setInterval(() => {
26456
- api2.client_events().then(async (events) => {
26482
+ let retryDelay = 1e3;
26483
+ const maxRetryDelay = 6e4;
26484
+ const pollEvents = async () => {
26485
+ try {
26486
+ const events = await api2.client_events();
26457
26487
  if (events.includes("reload")) {
26458
26488
  window.location.reload();
26459
26489
  }
@@ -26462,8 +26492,15 @@ function App({
26462
26492
  setLogs(logs2);
26463
26493
  setSelectedLogIndex(0);
26464
26494
  }
26465
- });
26466
- }, 1e3);
26495
+ retryDelay = 1e3;
26496
+ } catch (error2) {
26497
+ console.error("Error fetching client events:", error2);
26498
+ retryDelay = Math.min(retryDelay * 2, maxRetryDelay);
26499
+ } finally {
26500
+ setTimeout(pollEvents, retryDelay);
26501
+ }
26502
+ };
26503
+ pollEvents();
26467
26504
  }
26468
26505
  };
26469
26506
  loadLogsAndState();
@@ -480,8 +480,13 @@ export function App({
480
480
  }
481
481
  }
482
482
  } catch (e) {
483
- console.log(e);
484
- setStatus({ loading: false, error: e });
483
+ if (e.message === "Load failed" || e.message === "Failed to fetch") {
484
+ // This will happen if the server disappears (e.g. inspect view is terminated)
485
+ setStatus({ loading: false });
486
+ } else {
487
+ console.log(e);
488
+ setStatus({ loading: false, error: e });
489
+ }
485
490
  }
486
491
 
487
492
  setHeadersLoading(false);
@@ -774,18 +779,38 @@ export function App({
774
779
  new ClipboardJS(".clipboard-button,.copy-button");
775
780
 
776
781
  if (pollForLogs) {
777
- setInterval(() => {
778
- api.client_events().then(async (events) => {
782
+ let retryDelay = 1000;
783
+ const maxRetryDelay = 60000;
784
+
785
+ const pollEvents = async () => {
786
+ try {
787
+ const events = await api.client_events();
788
+
779
789
  if (events.includes("reload")) {
780
790
  window.location.reload();
781
791
  }
792
+
782
793
  if (events.includes("refresh-evals")) {
783
794
  const logs = await load();
784
795
  setLogs(logs);
785
796
  setSelectedLogIndex(0);
786
797
  }
787
- });
788
- }, 1000);
798
+
799
+ // Reset delay after a successful call
800
+ retryDelay = 1000;
801
+ } catch (error) {
802
+ console.error("Error fetching client events:", error);
803
+
804
+ // Exponential backoff with capping
805
+ retryDelay = Math.min(retryDelay * 2, maxRetryDelay);
806
+ } finally {
807
+ // Schedule the next poll
808
+ setTimeout(pollEvents, retryDelay);
809
+ }
810
+ };
811
+
812
+ // Start polling
813
+ pollEvents();
789
814
  }
790
815
  };
791
816
 
@@ -32,3 +32,9 @@
32
32
  /**
33
33
  * @typedef {"none" | "single" | "many"} SampleMode
34
34
  */
35
+
36
+ /**
37
+ * @typedef {Object} ContentTool
38
+ * @property {"tool"} type
39
+ * @property {(import("./types/log").ContentImage | import("./types/log").ContentText)[]} content
40
+ */
@@ -4,7 +4,7 @@ import Prism from "prismjs";
4
4
  import "prismjs/components/prism-json";
5
5
 
6
6
  import { html } from "htm/preact";
7
- import { useRef } from "preact/hooks";
7
+ import { useEffect, useMemo, useRef } from "preact/hooks";
8
8
  import { FontSize } from "../appearance/Fonts.mjs";
9
9
 
10
10
  const kPrismRenderMaxSize = 250000;
@@ -22,25 +22,17 @@ const kPrismRenderMaxSize = 250000;
22
22
  * @returns {import('preact').JSX.Element} The rendered component.
23
23
  */
24
24
  export const JSONPanel = ({ id, json, data, simple, style }) => {
25
- const sourceCode = json || JSON.stringify(data, undefined, 2);
26
25
  const codeRef = useRef();
27
26
 
28
- if (codeRef.current) {
27
+ const sourceCode = useMemo(() => {
28
+ return json || JSON.stringify(data, undefined, 2);
29
+ }, [json, data]);
30
+
31
+ useEffect(() => {
29
32
  if (sourceCode.length < kPrismRenderMaxSize) {
30
- // @ts-ignore
31
- codeRef.current.innerHTML = Prism.highlight(
32
- sourceCode,
33
- Prism.languages.javascript,
34
- "javacript",
35
- );
36
- } else {
37
- const textNode = document.createTextNode(sourceCode);
38
- // @ts-ignore
39
- codeRef.current.innerText = "";
40
- // @ts-ignore
41
- codeRef.current.appendChild(textNode);
33
+ Prism.highlightElement(codeRef.current);
42
34
  }
43
- }
35
+ }, [sourceCode]);
44
36
 
45
37
  return html`<div>
46
38
  <pre
@@ -51,16 +43,18 @@ export const JSONPanel = ({ id, json, data, simple, style }) => {
51
43
  borderRadius: simple ? undefined : "var(--bs-border-radius)",
52
44
  ...style,
53
45
  }}
46
+ class="jsonPanel"
54
47
  >
55
48
  <code
56
49
  id=${id}
57
50
  ref=${codeRef}
58
- class="sourceCode-json"
51
+ class="sourceCode language-javascript"
59
52
  style=${{
60
53
  fontSize: FontSize.small,
61
54
  whiteSpace: "pre-wrap",
62
55
  wordWrap: "anywhere",
63
56
  }}>
57
+ ${sourceCode}
64
58
  </code>
65
59
  </pre>
66
60
  </div>`;
@@ -2,8 +2,15 @@ import { html } from "htm/preact";
2
2
  import { MarkdownDiv } from "./MarkdownDiv.mjs";
3
3
  import { ToolOutput } from "./Tools.mjs";
4
4
 
5
- export const MessageContent = (props) => {
6
- const { contents } = props;
5
+ /**
6
+ * Renders message content based on its type.
7
+ * Supports rendering strings, images, and tools using specific renderers.
8
+ *
9
+ * @param {Object} props - The props object.
10
+ * @param {string|string[]| (import("../types/log").ContentText | import("../types/log").ContentImage | import("../Types.mjs").ContentTool)[]} props.contents - The content or array of contents to render.
11
+ * @returns {import("preact").JSX.Element | import("preact").JSX.Element[]} The component.
12
+ */
13
+ export const MessageContent = ({ contents }) => {
7
14
  if (Array.isArray(contents)) {
8
15
  return contents.map((content, index) => {
9
16
  if (typeof content === "string") {