inspect-ai 0.3.56__py3-none-any.whl → 0.3.57__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_display/core/panel.py +1 -1
- inspect_ai/_eval/run.py +16 -11
- inspect_ai/_util/datetime.py +1 -1
- inspect_ai/_util/deprecation.py +1 -1
- inspect_ai/_util/json.py +11 -1
- inspect_ai/_util/logger.py +2 -1
- inspect_ai/_util/trace.py +39 -3
- inspect_ai/_util/transcript.py +36 -7
- inspect_ai/_view/www/.prettierrc.js +12 -0
- inspect_ai/_view/www/dist/assets/index.js +286 -224
- inspect_ai/_view/www/log-schema.json +124 -125
- inspect_ai/_view/www/src/App.mjs +18 -9
- inspect_ai/_view/www/src/Types.mjs +0 -1
- inspect_ai/_view/www/src/api/Types.mjs +15 -4
- inspect_ai/_view/www/src/api/api-http.mjs +2 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
- inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
- inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
- inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
- inspect_ai/_view/www/src/components/Tools.mjs +18 -3
- inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
- inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
- inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
- inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
- inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
- inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
- inspect_ai/_view/www/src/types/log.d.ts +2 -8
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
- inspect_ai/log/_log.py +25 -0
- inspect_ai/log/_recorders/eval.py +2 -0
- inspect_ai/model/_call_tools.py +27 -5
- inspect_ai/model/_providers/google.py +24 -6
- inspect_ai/model/_providers/openai.py +17 -3
- inspect_ai/model/_providers/openai_o1.py +10 -12
- inspect_ai/tool/_tool_info.py +2 -1
- inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -3
- inspect_ai/util/__init__.py +4 -0
- inspect_ai/util/_sandbox/docker/compose.py +1 -3
- inspect_ai/util/_sandbox/docker/util.py +2 -1
- inspect_ai/util/_sandbox/self_check.py +18 -18
- inspect_ai/util/_store.py +2 -2
- inspect_ai/util/_subprocess.py +3 -3
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +57 -56
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0
@@ -8499,7 +8499,10 @@ var require_assets = __commonJS({
|
|
8499
8499
|
children: children2
|
8500
8500
|
}) => {
|
8501
8501
|
const tabContentsId = computeTabContentsId(id, index);
|
8502
|
-
const tabContentsRef = A(
|
8502
|
+
const tabContentsRef = A(
|
8503
|
+
/** @type {HTMLElement|null} */
|
8504
|
+
null
|
8505
|
+
);
|
8503
8506
|
y(() => {
|
8504
8507
|
setTimeout(() => {
|
8505
8508
|
if (scrollPosition !== void 0 && tabContentsRef.current && tabContentsRef.current.scrollTop !== scrollPosition) {
|
@@ -15138,8 +15141,14 @@ var require_assets = __commonJS({
|
|
15138
15141
|
}) => {
|
15139
15142
|
const [collapsed, setCollapsed] = h(collapse);
|
15140
15143
|
const [showToggle, setShowToggle] = h(false);
|
15141
|
-
const contentsRef = A(
|
15142
|
-
|
15144
|
+
const contentsRef = A(
|
15145
|
+
/** @type {HTMLElement|null} */
|
15146
|
+
null
|
15147
|
+
);
|
15148
|
+
const observerRef = A(
|
15149
|
+
/** @type {IntersectionObserver|null} */
|
15150
|
+
null
|
15151
|
+
);
|
15143
15152
|
y(() => {
|
15144
15153
|
setCollapsed(collapse);
|
15145
15154
|
}, [children2, collapse]);
|
@@ -15259,7 +15268,16 @@ var require_assets = __commonJS({
|
|
15259
15268
|
mode
|
15260
15269
|
}) => {
|
15261
15270
|
function isContentImage(value) {
|
15262
|
-
|
15271
|
+
if (value && typeof value === "object") {
|
15272
|
+
if (value.type === "image") {
|
15273
|
+
return true;
|
15274
|
+
} else if (value.type === "tool") {
|
15275
|
+
if (Array.isArray(value.content) && value.content.some(isContentImage)) {
|
15276
|
+
return true;
|
15277
|
+
}
|
15278
|
+
}
|
15279
|
+
}
|
15280
|
+
return false;
|
15263
15281
|
}
|
15264
15282
|
const collapse = Array.isArray(output) ? output.every((item) => !isContentImage(item)) : !isContentImage(output);
|
15265
15283
|
return m$1`<div>
|
@@ -15313,7 +15331,7 @@ var require_assets = __commonJS({
|
|
15313
15331
|
}
|
15314
15332
|
if (view) {
|
15315
15333
|
const toolInputRef = A(
|
15316
|
-
/** @type {
|
15334
|
+
/** @type {import("preact").Component & { base: Element }} */
|
15317
15335
|
null
|
15318
15336
|
);
|
15319
15337
|
y(() => {
|
@@ -15389,7 +15407,7 @@ var require_assets = __commonJS({
|
|
15389
15407
|
m$1`<img
|
15390
15408
|
src="${out.image}"
|
15391
15409
|
style=${{
|
15392
|
-
maxWidth: "
|
15410
|
+
maxWidth: "800px",
|
15393
15411
|
border: "solid var(--bs-border-color) 1px",
|
15394
15412
|
...style2
|
15395
15413
|
}}
|
@@ -15520,7 +15538,7 @@ var require_assets = __commonJS({
|
|
15520
15538
|
return m$1`<img
|
15521
15539
|
src="${content.image}"
|
15522
15540
|
style=${{
|
15523
|
-
maxWidth: "
|
15541
|
+
maxWidth: "800px",
|
15524
15542
|
border: "solid var(--bs-border-color) 1px"
|
15525
15543
|
}}
|
15526
15544
|
/>`;
|
@@ -16531,7 +16549,10 @@ ${entry.value}</pre
|
|
16531
16549
|
setWarningHidden
|
16532
16550
|
} = props;
|
16533
16551
|
const modalFooter = footer ? m$1`<div class="modal-footer">${footer}</div>` : "";
|
16534
|
-
const scrollRef = A(
|
16552
|
+
const scrollRef = A(
|
16553
|
+
/** @type {HTMLElement|null} */
|
16554
|
+
null
|
16555
|
+
);
|
16535
16556
|
y(() => {
|
16536
16557
|
if (scrollRef.current) {
|
16537
16558
|
setTimeout(() => {
|
@@ -16681,7 +16702,7 @@ ${entry.value}</pre
|
|
16681
16702
|
});
|
16682
16703
|
};
|
16683
16704
|
const SampleScores = ({ sample, sampleDescriptor, scorer }) => {
|
16684
|
-
const scores = scorer ? sampleDescriptor.
|
16705
|
+
const scores = scorer ? sampleDescriptor.evalDescriptor.scorerDescriptor(sample, { scorer, name: scorer }).scores() : sampleDescriptor.selectedScorerDescriptor(sample).scores();
|
16685
16706
|
if (scores.length === 1) {
|
16686
16707
|
return scores[0].rendered();
|
16687
16708
|
} else {
|
@@ -16775,7 +16796,7 @@ ${entry.value}</pre
|
|
16775
16796
|
scorer
|
16776
16797
|
}) => {
|
16777
16798
|
if (!sampleDescriptor) {
|
16778
|
-
return
|
16799
|
+
return m$1``;
|
16779
16800
|
}
|
16780
16801
|
const scoreInput = inputString(sample.input);
|
16781
16802
|
if (sample.choices && sample.choices.length > 0) {
|
@@ -16786,7 +16807,10 @@ ${entry.value}</pre
|
|
16786
16807
|
})
|
16787
16808
|
);
|
16788
16809
|
}
|
16789
|
-
const scorerDescriptor = sampleDescriptor.
|
16810
|
+
const scorerDescriptor = sampleDescriptor.evalDescriptor.scorerDescriptor(
|
16811
|
+
sample,
|
16812
|
+
{ scorer, name: scorer }
|
16813
|
+
);
|
16790
16814
|
const explanation = scorerDescriptor.explanation() || "(No Explanation)";
|
16791
16815
|
const answer = scorerDescriptor.answer();
|
16792
16816
|
const metadata = scorerDescriptor.metadata();
|
@@ -24775,7 +24799,7 @@ ${events}
|
|
24775
24799
|
}
|
24776
24800
|
};
|
24777
24801
|
const ToolEventView = ({ id, event, style: style2, depth }) => {
|
24778
|
-
var _a2;
|
24802
|
+
var _a2, _b2;
|
24779
24803
|
const { input, functionCall, inputType } = resolveToolInput(
|
24780
24804
|
event.function,
|
24781
24805
|
event.arguments
|
@@ -24791,7 +24815,7 @@ ${events}
|
|
24791
24815
|
functionCall=${functionCall}
|
24792
24816
|
input=${input}
|
24793
24817
|
inputType=${inputType}
|
24794
|
-
output=${event.result}
|
24818
|
+
output=${((_b2 = event.error) == null ? void 0 : _b2.message) || event.result}
|
24795
24819
|
mode="compact"
|
24796
24820
|
view=${event.view}
|
24797
24821
|
/>
|
@@ -25475,10 +25499,7 @@ ${events}
|
|
25475
25499
|
clamp: true
|
25476
25500
|
});
|
25477
25501
|
}
|
25478
|
-
const fullAnswer = sample && sampleDescriptor ? (
|
25479
|
-
// @ts-ignore
|
25480
|
-
sampleDescriptor.selectedScorer(sample).answer()
|
25481
|
-
) : void 0;
|
25502
|
+
const fullAnswer = sample && sampleDescriptor ? sampleDescriptor.selectedScorerDescriptor(sample).answer() : void 0;
|
25482
25503
|
if (fullAnswer) {
|
25483
25504
|
columns.push({
|
25484
25505
|
label: "Answer",
|
@@ -25640,27 +25661,27 @@ ${events}
|
|
25640
25661
|
class VirtualList extends x$1 {
|
25641
25662
|
constructor(props) {
|
25642
25663
|
super(props);
|
25664
|
+
/** @type {HTMLElement} */
|
25665
|
+
__publicField(this, "base");
|
25643
25666
|
this.state = {
|
25644
25667
|
height: 0,
|
25645
25668
|
offset: 0
|
25646
25669
|
};
|
25647
|
-
this.resize =
|
25648
|
-
|
25670
|
+
this.resize = () => {
|
25671
|
+
if (this.state.height !== this.base.offsetHeight) {
|
25672
|
+
this.setState({ height: this.base.offsetHeight });
|
25673
|
+
}
|
25674
|
+
};
|
25675
|
+
this.handleScroll = throttle$1(() => {
|
25676
|
+
if (this.base) {
|
25677
|
+
this.setState({ offset: this.base.scrollTop });
|
25678
|
+
}
|
25679
|
+
if (this.props.sync) {
|
25680
|
+
this.forceUpdate();
|
25681
|
+
}
|
25682
|
+
}, 100);
|
25649
25683
|
this.containerRef = b();
|
25650
25684
|
}
|
25651
|
-
resize() {
|
25652
|
-
if (this.state.height !== this.base.offsetHeight) {
|
25653
|
-
this.setState({ height: this.base.offsetHeight });
|
25654
|
-
}
|
25655
|
-
}
|
25656
|
-
handleScroll() {
|
25657
|
-
if (this.base) {
|
25658
|
-
this.setState({ offset: this.base.scrollTop });
|
25659
|
-
}
|
25660
|
-
if (this.props.sync) {
|
25661
|
-
this.forceUpdate();
|
25662
|
-
}
|
25663
|
-
}
|
25664
25685
|
componentDidUpdate() {
|
25665
25686
|
this.resize();
|
25666
25687
|
}
|
@@ -25816,7 +25837,7 @@ ${events}
|
|
25816
25837
|
[selectedIndex]
|
25817
25838
|
);
|
25818
25839
|
const listStyle = { ...style2, flex: "1", overflowY: "auto", outline: "none" };
|
25819
|
-
const { limit, answer, target } = gridColumns(sampleDescriptor);
|
25840
|
+
const { input, limit, answer, target } = gridColumns(sampleDescriptor);
|
25820
25841
|
const headerRow = m$1`<div
|
25821
25842
|
style=${{
|
25822
25843
|
display: "grid",
|
@@ -25830,7 +25851,7 @@ ${events}
|
|
25830
25851
|
}}
|
25831
25852
|
>
|
25832
25853
|
<div>Id</div>
|
25833
|
-
<div
|
25854
|
+
<div>${input !== "0" ? "Input" : ""}</div>
|
25834
25855
|
<div>${target !== "0" ? "Target" : ""}</div>
|
25835
25856
|
<div>${answer !== "0" ? "Answer" : ""}</div>
|
25836
25857
|
<div>${limit !== "0" ? "Limit" : ""}</div>
|
@@ -25986,7 +26007,7 @@ ${events}
|
|
25986
26007
|
>
|
25987
26008
|
${sample ? m$1`
|
25988
26009
|
<${MarkdownDiv}
|
25989
|
-
markdown=${sampleDescriptor == null ? void 0 : sampleDescriptor.
|
26010
|
+
markdown=${sampleDescriptor == null ? void 0 : sampleDescriptor.selectedScorerDescriptor(sample).answer()}
|
25990
26011
|
style=${{ paddingLeft: "0" }}
|
25991
26012
|
class="no-last-para-padding"
|
25992
26013
|
/>
|
@@ -26241,7 +26262,7 @@ ${events}
|
|
26241
26262
|
}
|
26242
26263
|
}
|
26243
26264
|
});
|
26244
|
-
const groupCount = samples.length / sampleDescriptor.epochs;
|
26265
|
+
const groupCount = samples.length / sampleDescriptor.evalDescriptor.epochs;
|
26245
26266
|
const itemCount = samples.length / groupCount;
|
26246
26267
|
const counter = getCounter(itemCount, groupCount, order2);
|
26247
26268
|
return (sample, index, previousSample) => {
|
@@ -26270,7 +26291,7 @@ ${events}
|
|
26270
26291
|
};
|
26271
26292
|
};
|
26272
26293
|
const groupByEpoch = (samples, sampleDescriptor, order2) => {
|
26273
|
-
const groupCount = sampleDescriptor.epochs;
|
26294
|
+
const groupCount = sampleDescriptor.evalDescriptor.epochs;
|
26274
26295
|
const itemCount = samples.length / groupCount;
|
26275
26296
|
const counter = getCounter(itemCount, groupCount, order2);
|
26276
26297
|
return (sample, index, previousSample) => {
|
@@ -28418,7 +28439,8 @@ self.onmessage = function (e) {
|
|
28418
28439
|
};
|
28419
28440
|
});
|
28420
28441
|
return Promise.resolve({
|
28421
|
-
files: logs
|
28442
|
+
files: logs,
|
28443
|
+
log_dir
|
28422
28444
|
});
|
28423
28445
|
} else if (log_file) {
|
28424
28446
|
let evalLog = cache.get();
|
@@ -28433,7 +28455,8 @@ self.onmessage = function (e) {
|
|
28433
28455
|
task_id: evalLog.eval.task_id
|
28434
28456
|
};
|
28435
28457
|
return {
|
28436
|
-
files: [result]
|
28458
|
+
files: [result],
|
28459
|
+
log_dir
|
28437
28460
|
};
|
28438
28461
|
} else {
|
28439
28462
|
throw new Error(
|
@@ -28623,7 +28646,7 @@ self.onmessage = function (e) {
|
|
28623
28646
|
}
|
28624
28647
|
}
|
28625
28648
|
}
|
28626
|
-
const MAX_BYTES =
|
28649
|
+
const MAX_BYTES = 50 * 1024 * 1024;
|
28627
28650
|
const openRemoteLogFile = async (api2, url, concurrency) => {
|
28628
28651
|
const queue = new AsyncQueue(concurrency);
|
28629
28652
|
const remoteZipFile = await openRemoteZipFile(
|
@@ -29063,7 +29086,7 @@ self.onmessage = function (e) {
|
|
29063
29086
|
val: kEpochDescVal
|
29064
29087
|
});
|
29065
29088
|
}
|
29066
|
-
if ((_a2 = sampleDescriptor == null ? void 0 : sampleDescriptor.
|
29089
|
+
if ((_a2 = sampleDescriptor == null ? void 0 : sampleDescriptor.selectedScoreDescriptor) == null ? void 0 : _a2.compare) {
|
29067
29090
|
options.push({
|
29068
29091
|
label: "score asc",
|
29069
29092
|
val: kScoreAscVal
|
@@ -29152,12 +29175,12 @@ self.onmessage = function (e) {
|
|
29152
29175
|
}
|
29153
29176
|
}
|
29154
29177
|
case kScoreAscVal:
|
29155
|
-
return samplesDescriptor.
|
29178
|
+
return samplesDescriptor.selectedScoreDescriptor.compare(
|
29156
29179
|
samplesDescriptor.selectedScore(a2).value,
|
29157
29180
|
samplesDescriptor.selectedScore(b2).value
|
29158
29181
|
);
|
29159
29182
|
case kScoreDescVal:
|
29160
|
-
return samplesDescriptor.
|
29183
|
+
return samplesDescriptor.selectedScoreDescriptor.compare(
|
29161
29184
|
samplesDescriptor.selectedScore(b2).value,
|
29162
29185
|
samplesDescriptor.selectedScore(a2).value
|
29163
29186
|
);
|
@@ -29181,11 +29204,11 @@ self.onmessage = function (e) {
|
|
29181
29204
|
});
|
29182
29205
|
}
|
29183
29206
|
};
|
29184
|
-
switch ((_a2 = descriptor == null ? void 0 : descriptor.
|
29207
|
+
switch ((_a2 = descriptor == null ? void 0 : descriptor.selectedScoreDescriptor) == null ? void 0 : _a2.scoreType) {
|
29185
29208
|
case kScoreTypePassFail: {
|
29186
29209
|
const options = [{ text: "All", value: "all" }];
|
29187
29210
|
options.push(
|
29188
|
-
...descriptor.
|
29211
|
+
...descriptor.selectedScoreDescriptor.categories.map((cat) => {
|
29189
29212
|
return { text: cat.text, value: cat.val };
|
29190
29213
|
})
|
29191
29214
|
);
|
@@ -29198,7 +29221,7 @@ self.onmessage = function (e) {
|
|
29198
29221
|
case kScoreTypeCategorical: {
|
29199
29222
|
const options = [{ text: "All", value: "all" }];
|
29200
29223
|
options.push(
|
29201
|
-
...descriptor.
|
29224
|
+
...descriptor.selectedScoreDescriptor.categories.map((cat) => {
|
29202
29225
|
return { text: cat, value: cat };
|
29203
29226
|
})
|
29204
29227
|
);
|
@@ -29226,12 +29249,12 @@ self.onmessage = function (e) {
|
|
29226
29249
|
`;
|
29227
29250
|
}
|
29228
29251
|
case kScoreTypeObject: {
|
29229
|
-
if (!descriptor.
|
29252
|
+
if (!descriptor.selectedScoreDescriptor.categories) {
|
29230
29253
|
return "";
|
29231
29254
|
}
|
29232
29255
|
const options = [{ text: "All", value: "all" }];
|
29233
29256
|
options.push(
|
29234
|
-
...descriptor.
|
29257
|
+
...descriptor.selectedScoreDescriptor.categories.map((cat) => {
|
29235
29258
|
return { text: cat.text, value: cat.value };
|
29236
29259
|
})
|
29237
29260
|
);
|
@@ -29640,11 +29663,13 @@ self.onmessage = function (e) {
|
|
29640
29663
|
if (status === "success") {
|
29641
29664
|
statusPanel = m$1`<${ResultsPanel} results="${results}" />`;
|
29642
29665
|
} else if (status === "cancelled") {
|
29643
|
-
statusPanel = m$1`<${
|
29666
|
+
statusPanel = m$1`<${CancelledPanel}
|
29644
29667
|
sampleCount=${(samples == null ? void 0 : samples.length) || 0}
|
29645
29668
|
/>`;
|
29646
29669
|
} else if (status === "started") {
|
29647
|
-
statusPanel = m$1`<${RunningPanel} />`;
|
29670
|
+
statusPanel = m$1`<${RunningPanel} sampleCount=${(samples == null ? void 0 : samples.length) || 0} />`;
|
29671
|
+
} else if (status === "error") {
|
29672
|
+
statusPanel = m$1`<${ErroredPanel} sampleCount=${(samples == null ? void 0 : samples.length) || 0} />`;
|
29648
29673
|
}
|
29649
29674
|
const navbarContents = logFileName ? m$1` <div
|
29650
29675
|
class="navbar-brand navbar-text mb-0"
|
@@ -29771,47 +29796,51 @@ self.onmessage = function (e) {
|
|
29771
29796
|
</nav>
|
29772
29797
|
`;
|
29773
29798
|
};
|
29774
|
-
const
|
29799
|
+
const StatusPanel = ({ icon, status, sampleCount }) => {
|
29775
29800
|
return m$1`<div
|
29776
29801
|
style=${{
|
29777
29802
|
padding: "1em",
|
29778
29803
|
marginTop: "0.5em",
|
29779
29804
|
textTransform: "uppercase",
|
29780
|
-
fontSize: FontSize.smaller
|
29805
|
+
fontSize: FontSize.smaller,
|
29806
|
+
display: "grid",
|
29807
|
+
gridTemplateColumns: "auto auto"
|
29781
29808
|
}}
|
29782
29809
|
>
|
29783
29810
|
<i
|
29784
|
-
class="${
|
29785
|
-
style=${{ fontSize: FontSize.large, marginRight: "0.3em" }}
|
29786
|
-
/>
|
29787
|
-
cancelled (${sampleCount} ${sampleCount === 1 ? "sample" : "samples"})
|
29788
|
-
</div>`;
|
29789
|
-
};
|
29790
|
-
const RunningPanel = () => {
|
29791
|
-
return m$1`
|
29792
|
-
<div
|
29811
|
+
class="${icon}"
|
29793
29812
|
style=${{
|
29794
|
-
|
29795
|
-
|
29796
|
-
|
29797
|
-
}}
|
29798
|
-
>
|
29799
|
-
<div>
|
29800
|
-
<i class=${ApplicationIcons.running} />
|
29801
|
-
</div>
|
29802
|
-
<div
|
29803
|
-
style=${{
|
29804
|
-
marginLeft: "0.3em",
|
29805
|
-
paddingTop: "0.2em",
|
29806
|
-
fontSize: FontSize.smaller,
|
29807
|
-
...TextStyle.label,
|
29808
|
-
...TextStyle.secondary
|
29813
|
+
fontSize: FontSize.large,
|
29814
|
+
marginRight: "0.3em",
|
29815
|
+
marginTop: "-0.1em"
|
29809
29816
|
}}
|
29810
|
-
|
29811
|
-
|
29812
|
-
</div>
|
29817
|
+
/>
|
29818
|
+
<div>
|
29819
|
+
<div>${status}</div>
|
29820
|
+
<div>(${sampleCount} ${sampleCount === 1 ? "sample" : "samples"})</div>
|
29813
29821
|
</div>
|
29814
|
-
|
29822
|
+
</div>`;
|
29823
|
+
};
|
29824
|
+
const CancelledPanel = ({ sampleCount }) => {
|
29825
|
+
return m$1`<${StatusPanel}
|
29826
|
+
icon=${ApplicationIcons.logging.info}
|
29827
|
+
status="Cancelled"
|
29828
|
+
sampleCount=${sampleCount}
|
29829
|
+
/>`;
|
29830
|
+
};
|
29831
|
+
const ErroredPanel = ({ sampleCount }) => {
|
29832
|
+
return m$1`<${StatusPanel}
|
29833
|
+
icon=${ApplicationIcons.logging.error}
|
29834
|
+
status="Task Failed"
|
29835
|
+
sampleCount=${sampleCount}
|
29836
|
+
/>`;
|
29837
|
+
};
|
29838
|
+
const RunningPanel = ({ sampleCount }) => {
|
29839
|
+
return m$1`<${StatusPanel}
|
29840
|
+
icon=${ApplicationIcons.running}
|
29841
|
+
status="Running"
|
29842
|
+
sampleCount=${sampleCount}
|
29843
|
+
/>`;
|
29815
29844
|
};
|
29816
29845
|
const ResultsPanel = ({ results }) => {
|
29817
29846
|
var _a2, _b2;
|
@@ -29879,7 +29908,8 @@ self.onmessage = function (e) {
|
|
29879
29908
|
...TextStyle.secondary
|
29880
29909
|
}}
|
29881
29910
|
>
|
29882
|
-
${
|
29911
|
+
${// @ts-expect-error
|
29912
|
+
metric.reducer}
|
29883
29913
|
</div>` : "";
|
29884
29914
|
return m$1`<div style=${{ paddingLeft: isFirst ? "0" : "1em" }}>
|
29885
29915
|
<div
|
@@ -30032,7 +30062,7 @@ self.onmessage = function (e) {
|
|
30032
30062
|
}, [divRef, task_id]);
|
30033
30063
|
const resolvedTabs = T(() => {
|
30034
30064
|
const resolvedTabs2 = {};
|
30035
|
-
if (
|
30065
|
+
if (sampleMode !== "none") {
|
30036
30066
|
resolvedTabs2.samples = {
|
30037
30067
|
id: kEvalWorkspaceTabId,
|
30038
30068
|
scrollable: samples.length === 1,
|
@@ -30364,7 +30394,10 @@ self.onmessage = function (e) {
|
|
30364
30394
|
}
|
30365
30395
|
};
|
30366
30396
|
const FindBand = ({ hideBand }) => {
|
30367
|
-
const searchBoxRef = A(
|
30397
|
+
const searchBoxRef = A(
|
30398
|
+
/** @type {HTMLInputElement|null} */
|
30399
|
+
null
|
30400
|
+
);
|
30368
30401
|
y(() => {
|
30369
30402
|
searchBoxRef.current.focus();
|
30370
30403
|
}, []);
|
@@ -30384,13 +30417,16 @@ self.onmessage = function (e) {
|
|
30384
30417
|
}
|
30385
30418
|
return expandablePanelEl;
|
30386
30419
|
};
|
30387
|
-
const focusedElement =
|
30420
|
+
const focusedElement = (
|
30421
|
+
/** @type {HTMLElement} */
|
30422
|
+
document.activeElement
|
30423
|
+
);
|
30388
30424
|
const result = window.find(term, false, !!back, false, false, true, false);
|
30389
30425
|
const noResultEl = window.document.getElementById(
|
30390
30426
|
"inspect-find-no-results"
|
30391
30427
|
);
|
30392
30428
|
if (result) {
|
30393
|
-
noResultEl.style.opacity = 0;
|
30429
|
+
noResultEl.style.opacity = "0";
|
30394
30430
|
const selection = window.getSelection();
|
30395
30431
|
if (selection.rangeCount > 0) {
|
30396
30432
|
const parentPanel = parentExpandablePanel(selection);
|
@@ -30411,7 +30447,7 @@ self.onmessage = function (e) {
|
|
30411
30447
|
}, 100);
|
30412
30448
|
}
|
30413
30449
|
} else {
|
30414
|
-
noResultEl.style.opacity = 1;
|
30450
|
+
noResultEl.style.opacity = "1";
|
30415
30451
|
}
|
30416
30452
|
if (focusedElement) {
|
30417
30453
|
focusedElement.focus();
|
@@ -30504,32 +30540,25 @@ self.onmessage = function (e) {
|
|
30504
30540
|
</button>
|
30505
30541
|
</div>`;
|
30506
30542
|
};
|
30507
|
-
const
|
30543
|
+
const createEvalDescriptor = (scores, samples, epochs) => {
|
30508
30544
|
if (!samples) {
|
30509
30545
|
return void 0;
|
30510
30546
|
}
|
30511
|
-
const
|
30512
|
-
if (sample.scores
|
30513
|
-
return sample.scores[scorer];
|
30514
|
-
} else {
|
30547
|
+
const scoreValue = (sample, scoreLabel) => {
|
30548
|
+
if (Object.keys(sample.scores).length === 0 || !scoreLabel) {
|
30515
30549
|
return void 0;
|
30516
30550
|
}
|
30517
|
-
|
30518
|
-
|
30519
|
-
if (
|
30520
|
-
return
|
30521
|
-
}
|
30522
|
-
if (selectedScore.scorer !== selectedScore.name && sample.scores[selectedScore.scorer] && sample.scores[selectedScore.scorer].value) {
|
30523
|
-
return sample.scores[selectedScore.scorer].value[selectedScore.name];
|
30524
|
-
} else if (sample.scores[selectedScore.name]) {
|
30525
|
-
return sample.scores[selectedScore.name].value;
|
30551
|
+
if (scoreLabel.scorer !== scoreLabel.name && sample.scores[scoreLabel.scorer] && sample.scores[scoreLabel.scorer].value) {
|
30552
|
+
return sample.scores[scoreLabel.scorer].value[scoreLabel.name];
|
30553
|
+
} else if (sample.scores[scoreLabel.name]) {
|
30554
|
+
return sample.scores[scoreLabel.name].value;
|
30526
30555
|
} else {
|
30527
30556
|
return void 0;
|
30528
30557
|
}
|
30529
30558
|
};
|
30530
30559
|
const scoreAnswer = (sample, scorer) => {
|
30531
30560
|
if (sample) {
|
30532
|
-
const sampleScore =
|
30561
|
+
const sampleScore = sample.scores[scorer];
|
30533
30562
|
if (sampleScore && sampleScore.answer) {
|
30534
30563
|
return sampleScore.answer;
|
30535
30564
|
}
|
@@ -30539,7 +30568,7 @@ self.onmessage = function (e) {
|
|
30539
30568
|
};
|
30540
30569
|
const scoreExplanation = (sample, scorer) => {
|
30541
30570
|
if (sample) {
|
30542
|
-
const sampleScore =
|
30571
|
+
const sampleScore = sample.scores[scorer];
|
30543
30572
|
if (sampleScore && sampleScore.explanation) {
|
30544
30573
|
return sampleScore.explanation;
|
30545
30574
|
}
|
@@ -30548,48 +30577,158 @@ self.onmessage = function (e) {
|
|
30548
30577
|
};
|
30549
30578
|
const scoreMetadata = (sample, scorer) => {
|
30550
30579
|
if (sample) {
|
30551
|
-
const sampleScore =
|
30580
|
+
const sampleScore = sample.scores[scorer];
|
30552
30581
|
if (sampleScore && sampleScore.metadata) {
|
30553
30582
|
return sampleScore.metadata;
|
30554
30583
|
}
|
30555
30584
|
}
|
30556
30585
|
return void 0;
|
30557
30586
|
};
|
30558
|
-
const
|
30559
|
-
|
30560
|
-
|
30561
|
-
|
30562
|
-
|
30587
|
+
const scoreLabelKey = (scoreLabel) => {
|
30588
|
+
if (!scoreLabel) {
|
30589
|
+
return "No score key";
|
30590
|
+
}
|
30591
|
+
return `${scoreLabel.scorer}.${scoreLabel.name}`;
|
30592
|
+
};
|
30593
|
+
const scoreDescriptorMap = /* @__PURE__ */ new Map();
|
30594
|
+
for (const scoreLabel of scores) {
|
30595
|
+
const uniqScoreValues = [
|
30596
|
+
...new Set(
|
30597
|
+
samples.filter((sample) => !!sample.scores).filter((sample) => {
|
30598
|
+
if (!scoreLabel) {
|
30599
|
+
return true;
|
30600
|
+
}
|
30601
|
+
if (scoreLabel.scorer !== scoreLabel.name) {
|
30602
|
+
return Object.keys(sample.scores).includes(scoreLabel.scorer) && Object.keys(sample.scores[scoreLabel.scorer].value).includes(
|
30603
|
+
scoreLabel.name
|
30604
|
+
);
|
30605
|
+
} else {
|
30606
|
+
return Object.keys(sample.scores).includes(scoreLabel.name);
|
30607
|
+
}
|
30608
|
+
}).map((sample) => {
|
30609
|
+
return scoreValue(sample, scoreLabel);
|
30610
|
+
}).filter((value) => {
|
30611
|
+
return value !== null;
|
30612
|
+
})
|
30613
|
+
)
|
30614
|
+
];
|
30615
|
+
const uniqScoreTypes = [
|
30616
|
+
...new Set(uniqScoreValues.map((scoreValue2) => typeof scoreValue2))
|
30617
|
+
];
|
30618
|
+
for (const categorizer of scoreCategorizers) {
|
30619
|
+
const scoreDescriptor2 = categorizer.describe(
|
30620
|
+
uniqScoreValues,
|
30621
|
+
uniqScoreTypes
|
30622
|
+
);
|
30623
|
+
if (scoreDescriptor2) {
|
30624
|
+
scoreDescriptorMap.set(scoreLabelKey(scoreLabel), scoreDescriptor2);
|
30625
|
+
break;
|
30626
|
+
}
|
30627
|
+
}
|
30628
|
+
}
|
30629
|
+
const scoreDescriptor = (scoreLabel) => {
|
30630
|
+
return scoreDescriptorMap.get(scoreLabelKey(scoreLabel));
|
30631
|
+
};
|
30632
|
+
const scoreRendered = (sample, scoreLabel) => {
|
30633
|
+
const descriptor = scoreDescriptor(scoreLabel);
|
30634
|
+
const score2 = scoreValue(sample, scoreLabel);
|
30635
|
+
if (score2 === null || score2 === "undefined") {
|
30636
|
+
return "null";
|
30637
|
+
} else if (descriptor && descriptor.render) {
|
30638
|
+
return descriptor.render(score2);
|
30639
|
+
} else {
|
30640
|
+
return score2;
|
30641
|
+
}
|
30642
|
+
};
|
30643
|
+
const scorerDescriptor = (sample, scoreLabel) => {
|
30644
|
+
return {
|
30645
|
+
metadata: () => {
|
30646
|
+
return scoreMetadata(sample, scoreLabel.scorer);
|
30647
|
+
},
|
30648
|
+
explanation: () => {
|
30649
|
+
return scoreExplanation(sample, scoreLabel.scorer);
|
30650
|
+
},
|
30651
|
+
answer: () => {
|
30652
|
+
return scoreAnswer(sample, scoreLabel.scorer);
|
30653
|
+
},
|
30654
|
+
scores: () => {
|
30655
|
+
if (!sample || !sample.scores) {
|
30656
|
+
return [];
|
30563
30657
|
}
|
30564
|
-
|
30565
|
-
|
30566
|
-
|
30567
|
-
|
30658
|
+
const myScoreDescriptor = scoreDescriptor(scoreLabel);
|
30659
|
+
if (!myScoreDescriptor) {
|
30660
|
+
return [];
|
30661
|
+
}
|
30662
|
+
const scoreNames = scores.map((score2) => {
|
30663
|
+
return score2.name;
|
30664
|
+
});
|
30665
|
+
const sampleScorer = sample.scores[scoreLabel.scorer];
|
30666
|
+
const scoreVal = sampleScorer.value;
|
30667
|
+
if (typeof scoreVal === "object") {
|
30668
|
+
const names = Object.keys(scoreVal);
|
30669
|
+
if (names.find((name) => {
|
30670
|
+
return scoreNames.includes(name);
|
30671
|
+
})) {
|
30672
|
+
const scores2 = names.map((name) => {
|
30673
|
+
return {
|
30674
|
+
name,
|
30675
|
+
rendered: () => {
|
30676
|
+
return myScoreDescriptor.render(scoreVal[name]);
|
30677
|
+
}
|
30678
|
+
};
|
30679
|
+
});
|
30680
|
+
return scores2;
|
30681
|
+
} else {
|
30682
|
+
return [
|
30683
|
+
{
|
30684
|
+
name: scoreLabel.scorer,
|
30685
|
+
rendered: () => {
|
30686
|
+
return myScoreDescriptor.render(scoreVal);
|
30687
|
+
}
|
30688
|
+
}
|
30689
|
+
];
|
30690
|
+
}
|
30568
30691
|
} else {
|
30569
|
-
return
|
30692
|
+
return [
|
30693
|
+
{
|
30694
|
+
name: scoreLabel.scorer,
|
30695
|
+
rendered: () => {
|
30696
|
+
return myScoreDescriptor.render(scoreVal);
|
30697
|
+
}
|
30698
|
+
}
|
30699
|
+
];
|
30570
30700
|
}
|
30571
|
-
}
|
30572
|
-
|
30573
|
-
|
30574
|
-
|
30575
|
-
|
30576
|
-
|
30577
|
-
|
30578
|
-
|
30579
|
-
|
30580
|
-
|
30581
|
-
|
30582
|
-
|
30583
|
-
|
30584
|
-
|
30585
|
-
|
30586
|
-
|
30701
|
+
}
|
30702
|
+
};
|
30703
|
+
};
|
30704
|
+
const score = (sample, scoreLabel) => {
|
30705
|
+
return {
|
30706
|
+
value: scoreValue(sample, scoreLabel),
|
30707
|
+
render: () => {
|
30708
|
+
return scoreRendered(sample, scoreLabel);
|
30709
|
+
}
|
30710
|
+
};
|
30711
|
+
};
|
30712
|
+
return {
|
30713
|
+
epochs,
|
30714
|
+
samples,
|
30715
|
+
scores,
|
30716
|
+
scorerDescriptor,
|
30717
|
+
scoreDescriptor,
|
30718
|
+
score,
|
30719
|
+
scoreAnswer
|
30720
|
+
};
|
30721
|
+
};
|
30722
|
+
const createSamplesDescriptor = (evalDescriptor, selectedScore) => {
|
30723
|
+
if (!evalDescriptor) {
|
30724
|
+
return void 0;
|
30587
30725
|
}
|
30588
|
-
const sizes = samples.reduce(
|
30726
|
+
const sizes = evalDescriptor.samples.reduce(
|
30589
30727
|
(previous, current) => {
|
30590
30728
|
var _a2;
|
30591
30729
|
const text2 = inputString(current.input).join(" ");
|
30592
|
-
const
|
30730
|
+
const scoreValue = evalDescriptor.score(current, selectedScore).value;
|
30731
|
+
const scoreText = scoreValue ? String(scoreValue) : "";
|
30593
30732
|
previous[0] = Math.min(Math.max(previous[0], text2.length), 300);
|
30594
30733
|
previous[1] = Math.min(
|
30595
30734
|
Math.max(previous[1], arrayToString(current.target).length),
|
@@ -30598,7 +30737,7 @@ self.onmessage = function (e) {
|
|
30598
30737
|
previous[2] = Math.min(
|
30599
30738
|
Math.max(
|
30600
30739
|
previous[2],
|
30601
|
-
((_a2 = scoreAnswer(current, selectedScore == null ? void 0 : selectedScore.name)) == null ? void 0 : _a2.length) || 0
|
30740
|
+
((_a2 = evalDescriptor.scoreAnswer(current, selectedScore == null ? void 0 : selectedScore.name)) == null ? void 0 : _a2.length) || 0
|
30602
30741
|
),
|
30603
30742
|
300
|
30604
30743
|
);
|
@@ -30642,91 +30781,12 @@ self.onmessage = function (e) {
|
|
30642
30781
|
score: maxSizes.score / base2
|
30643
30782
|
}
|
30644
30783
|
};
|
30645
|
-
const scoreRendered = (sample) => {
|
30646
|
-
const score2 = scoreValue(sample);
|
30647
|
-
if (score2 === null || score2 === "undefined") {
|
30648
|
-
return "null";
|
30649
|
-
} else if (scoreDescriptor.render) {
|
30650
|
-
return scoreDescriptor.render(score2);
|
30651
|
-
} else {
|
30652
|
-
return score2;
|
30653
|
-
}
|
30654
|
-
};
|
30655
|
-
const scorerDescriptor = (sample, scorer) => {
|
30656
|
-
return {
|
30657
|
-
metadata: () => {
|
30658
|
-
return scoreMetadata(sample, scorer);
|
30659
|
-
},
|
30660
|
-
explanation: () => {
|
30661
|
-
return scoreExplanation(sample, scorer);
|
30662
|
-
},
|
30663
|
-
answer: () => {
|
30664
|
-
return scoreAnswer(sample, scorer);
|
30665
|
-
},
|
30666
|
-
scores: () => {
|
30667
|
-
if (!sample || !sample.scores) {
|
30668
|
-
return [];
|
30669
|
-
}
|
30670
|
-
const scoreNames = scorers.map((score2) => {
|
30671
|
-
return score2.name;
|
30672
|
-
});
|
30673
|
-
const sampleScorer = sample.scores[scorer];
|
30674
|
-
const scoreVal = sampleScorer.value;
|
30675
|
-
if (typeof scoreVal === "object") {
|
30676
|
-
const names = Object.keys(scoreVal);
|
30677
|
-
if (names.find((name) => {
|
30678
|
-
return scoreNames.includes(name);
|
30679
|
-
})) {
|
30680
|
-
const scores = names.map((name) => {
|
30681
|
-
return {
|
30682
|
-
name,
|
30683
|
-
rendered: () => {
|
30684
|
-
return scoreDescriptor.render(scoreVal[name]);
|
30685
|
-
}
|
30686
|
-
};
|
30687
|
-
});
|
30688
|
-
return scores;
|
30689
|
-
} else {
|
30690
|
-
return [
|
30691
|
-
{
|
30692
|
-
name: scorer,
|
30693
|
-
rendered: () => {
|
30694
|
-
return scoreDescriptor.render(scoreVal);
|
30695
|
-
}
|
30696
|
-
}
|
30697
|
-
];
|
30698
|
-
}
|
30699
|
-
} else {
|
30700
|
-
return [
|
30701
|
-
{
|
30702
|
-
name: scorer,
|
30703
|
-
rendered: () => {
|
30704
|
-
return scoreDescriptor.render(scoreVal);
|
30705
|
-
}
|
30706
|
-
}
|
30707
|
-
];
|
30708
|
-
}
|
30709
|
-
}
|
30710
|
-
};
|
30711
|
-
};
|
30712
30784
|
return {
|
30713
|
-
|
30714
|
-
epochs,
|
30785
|
+
evalDescriptor,
|
30715
30786
|
messageShape,
|
30716
|
-
|
30717
|
-
|
30718
|
-
|
30719
|
-
render: () => {
|
30720
|
-
return scoreRendered(sample);
|
30721
|
-
}
|
30722
|
-
};
|
30723
|
-
},
|
30724
|
-
scorer: (sample, scorer) => {
|
30725
|
-
return scorerDescriptor(sample, scorer);
|
30726
|
-
},
|
30727
|
-
selectedScorer: (sample) => {
|
30728
|
-
return scorerDescriptor(sample, selectedScore == null ? void 0 : selectedScore.scorer);
|
30729
|
-
}
|
30787
|
+
selectedScoreDescriptor: evalDescriptor.scoreDescriptor(selectedScore),
|
30788
|
+
selectedScore: (sample) => evalDescriptor.score(sample, selectedScore),
|
30789
|
+
selectedScorerDescriptor: (sample) => evalDescriptor.scorerDescriptor(sample, selectedScore)
|
30730
30790
|
};
|
30731
30791
|
};
|
30732
30792
|
const scoreCategorizers = [
|
@@ -31314,7 +31374,7 @@ self.onmessage = function (e) {
|
|
31314
31374
|
]
|
31315
31375
|
);
|
31316
31376
|
y(() => {
|
31317
|
-
var _a3;
|
31377
|
+
var _a3, _b3;
|
31318
31378
|
const samples = ((_a3 = selectedLog == null ? void 0 : selectedLog.contents) == null ? void 0 : _a3.sampleSummaries) || [];
|
31319
31379
|
const filtered = samples.filter((sample) => {
|
31320
31380
|
if (epoch && epoch !== "all") {
|
@@ -31331,7 +31391,7 @@ self.onmessage = function (e) {
|
|
31331
31391
|
});
|
31332
31392
|
const { sorted, order: order2 } = sortSamples(sort, filtered, samplesDescriptor);
|
31333
31393
|
let grouping = "none";
|
31334
|
-
if ((samplesDescriptor == null ? void 0 : samplesDescriptor.epochs) > 1) {
|
31394
|
+
if (((_b3 = samplesDescriptor == null ? void 0 : samplesDescriptor.evalDescriptor) == null ? void 0 : _b3.epochs) > 1) {
|
31335
31395
|
if (byEpoch(sort) || epoch !== "all") {
|
31336
31396
|
grouping = "epoch";
|
31337
31397
|
} else if (bySample(sort)) {
|
@@ -31342,15 +31402,17 @@ self.onmessage = function (e) {
|
|
31342
31402
|
setGroupBy(grouping);
|
31343
31403
|
setGroupByOrder(order2);
|
31344
31404
|
}, [selectedLog, filter, sort, epoch]);
|
31345
|
-
const
|
31405
|
+
const evalDescriptor = T(() => {
|
31346
31406
|
var _a3, _b3, _c2, _d2;
|
31347
|
-
return
|
31407
|
+
return createEvalDescriptor(
|
31348
31408
|
scores,
|
31349
31409
|
(_a3 = selectedLog.contents) == null ? void 0 : _a3.sampleSummaries,
|
31350
|
-
((_d2 = (_c2 = (_b3 = selectedLog.contents) == null ? void 0 : _b3.eval) == null ? void 0 : _c2.config) == null ? void 0 : _d2.epochs) || 1
|
31351
|
-
score
|
31410
|
+
((_d2 = (_c2 = (_b3 = selectedLog.contents) == null ? void 0 : _b3.eval) == null ? void 0 : _c2.config) == null ? void 0 : _d2.epochs) || 1
|
31352
31411
|
);
|
31353
|
-
}, [selectedLog, scores
|
31412
|
+
}, [selectedLog, scores]);
|
31413
|
+
const samplesDescriptor = T(() => {
|
31414
|
+
return createSamplesDescriptor(evalDescriptor, score);
|
31415
|
+
}, [evalDescriptor, score]);
|
31354
31416
|
const refreshSampleTab = q(
|
31355
31417
|
(sample) => {
|
31356
31418
|
if (selectedSampleTab === void 0) {
|
@@ -31458,9 +31520,9 @@ self.onmessage = function (e) {
|
|
31458
31520
|
*/
|
31459
31521
|
(log) => {
|
31460
31522
|
const hasSamples = !!log.sampleSummaries && log.sampleSummaries.length > 0;
|
31461
|
-
const showSamples =
|
31523
|
+
const showSamples = hasSamples;
|
31462
31524
|
setSelectedWorkspaceTab(
|
31463
|
-
|
31525
|
+
log.status !== "error" && hasSamples ? kEvalWorkspaceTabId : kInfoWorkspaceTabId
|
31464
31526
|
);
|
31465
31527
|
const scorer = defaultScorer(log);
|
31466
31528
|
const scorers = defaultScorers(log);
|