inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_cli/common.py +7 -3
- inspect_ai/_cli/eval.py +17 -2
- inspect_ai/_cli/trace.py +21 -2
- inspect_ai/_display/core/active.py +4 -3
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +4 -9
- inspect_ai/_display/textual/app.py +4 -1
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +119 -16
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/eval.py +32 -20
- inspect_ai/_eval/evalset.py +7 -5
- inspect_ai/_eval/score.py +1 -0
- inspect_ai/_eval/task/__init__.py +2 -2
- inspect_ai/_eval/task/images.py +40 -25
- inspect_ai/_eval/task/results.py +50 -22
- inspect_ai/_eval/task/run.py +180 -124
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_eval/task/task.py +140 -25
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/content.py +23 -1
- inspect_ai/_util/images.py +20 -17
- inspect_ai/_util/kvstore.py +73 -0
- inspect_ai/_util/notgiven.py +18 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_util/thread.py +5 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25375 -1846
- inspect_ai/_view/www/log-schema.json +129 -15
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +8 -10
- inspect_ai/_view/www/src/Types.mjs +0 -1
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
- inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +75 -2
- inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +62 -27
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/Json.mjs +12 -6
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_human/util.py +2 -2
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/dataset/_sources/csv.py +2 -1
- inspect_ai/dataset/_sources/json.py +2 -1
- inspect_ai/dataset/_sources/util.py +15 -7
- inspect_ai/log/_condense.py +11 -1
- inspect_ai/log/_log.py +3 -6
- inspect_ai/log/_recorders/eval.py +19 -8
- inspect_ai/log/_samples.py +26 -5
- inspect_ai/log/_transcript.py +32 -2
- inspect_ai/model/__init__.py +10 -2
- inspect_ai/model/_call_tools.py +59 -12
- inspect_ai/model/_chat_message.py +2 -4
- inspect_ai/model/_conversation.py +61 -0
- inspect_ai/model/_generate_config.py +10 -4
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +7 -2
- inspect_ai/model/_providers/anthropic.py +109 -51
- inspect_ai/model/_providers/azureai.py +26 -24
- inspect_ai/model/_providers/bedrock.py +43 -44
- inspect_ai/model/_providers/google.py +121 -58
- inspect_ai/model/_providers/groq.py +7 -5
- inspect_ai/model/_providers/hf.py +11 -6
- inspect_ai/model/_providers/mistral.py +17 -20
- inspect_ai/model/_providers/openai.py +32 -21
- inspect_ai/model/_providers/openai_o1.py +9 -8
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/together.py +8 -8
- inspect_ai/model/_providers/vertex.py +18 -8
- inspect_ai/scorer/__init__.py +13 -2
- inspect_ai/scorer/_metrics/__init__.py +2 -2
- inspect_ai/scorer/_metrics/std.py +3 -3
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +2 -2
- inspect_ai/solver/__init__.py +2 -5
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +11 -1
- inspect_ai/tool/_tool.py +21 -3
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -3
- inspect_ai/util/{_trace.py → _conversation.py} +3 -17
- inspect_ai/util/_display.py +14 -4
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/context.py +12 -13
- inspect_ai/util/_sandbox/docker/compose.py +24 -11
- inspect_ai/util/_sandbox/docker/docker.py +84 -14
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/environment.py +27 -1
- inspect_ai/util/_sandbox/local.py +1 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- inspect_ai/model/_trace.py +0 -48
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
@@ -1,98 +1,280 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
import {
|
4
|
-
import { createRef } from "preact";
|
1
|
+
import { html } from "htm/preact";
|
2
|
+
import { useRef, useState, useEffect, useMemo } from "preact/hooks";
|
3
|
+
import { forwardRef, useImperativeHandle } from "preact/compat";
|
5
4
|
import { throttle } from "../utils/sync.mjs";
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
/**
|
7
|
+
* A virtualized list component that efficiently renders large lists by only
|
8
|
+
* rendering the items that are currently visible in the viewport.
|
9
|
+
* Supports dynamic row heights that are measured after rendering.
|
10
|
+
*
|
11
|
+
* @template T
|
12
|
+
* @param {Object} props - The component props
|
13
|
+
* @param {T[]} props.data - Array of items to be rendered in the list
|
14
|
+
* @param {(item: T, index: number) => preact.VNode} props.renderRow - Function to render each row
|
15
|
+
* @param {number} [props.overscanCount=15] - Number of extra rows to render above and below the visible area
|
16
|
+
* @param {number} [props.estimatedRowHeight=50] - Estimated height of each row before measurement
|
17
|
+
* @param {boolean} [props.sync=false] - If true, forces a re-render on scroll
|
18
|
+
* @param {import("preact").RefObject<HTMLElement>} [props.scrollRef] - Optional ref for the scroll container
|
19
|
+
* @param {import("preact").Ref<{ scrollToIndex: (index: number) => void }>} ref - Ref object exposing the list's methods
|
20
|
+
* @returns {preact.VNode} The virtualized list component
|
21
|
+
*/
|
22
|
+
export const VirtualList = forwardRef(
|
23
|
+
(
|
24
|
+
/** @type {props} */ {
|
25
|
+
data,
|
26
|
+
renderRow,
|
27
|
+
overscanCount = 15,
|
28
|
+
estimatedRowHeight = 50,
|
29
|
+
sync = false,
|
30
|
+
scrollRef,
|
31
|
+
...props
|
32
|
+
},
|
33
|
+
ref,
|
34
|
+
) => {
|
35
|
+
const [height, setHeight] = useState(0);
|
36
|
+
const [offset, setOffset] = useState(0);
|
37
|
+
|
38
|
+
const [listMetrics, setListMetrics] = useState({
|
39
|
+
rowHeights: new Map(),
|
40
|
+
totalHeight: data.length * estimatedRowHeight,
|
41
|
+
});
|
42
|
+
|
43
|
+
const baseRef = useRef(null);
|
44
|
+
const containerRef = useRef(null);
|
45
|
+
const rowRefs = useRef(new Map());
|
46
|
+
|
47
|
+
// Function to get row height (measured or estimated)
|
48
|
+
const getRowHeight = (index) => {
|
49
|
+
return listMetrics.rowHeights.get(index) || estimatedRowHeight;
|
50
|
+
};
|
51
|
+
|
52
|
+
// Calculate row positions based on current heights
|
53
|
+
const rowPositions = useMemo(() => {
|
54
|
+
let currentPosition = 0;
|
55
|
+
const positions = new Map();
|
56
|
+
|
57
|
+
for (let i = 0; i < data.length; i++) {
|
58
|
+
positions.set(i, currentPosition);
|
59
|
+
currentPosition += getRowHeight(i);
|
60
|
+
}
|
61
|
+
|
62
|
+
return positions;
|
63
|
+
}, [listMetrics.rowHeights, data.length]);
|
64
|
+
|
65
|
+
// Expose scrollToIndex method via ref
|
66
|
+
useImperativeHandle(
|
67
|
+
ref,
|
68
|
+
() => ({
|
69
|
+
focus: () => {
|
70
|
+
baseRef.current;
|
71
|
+
},
|
72
|
+
scrollToIndex: (index, direction) => {
|
73
|
+
const scrollElement = scrollRef?.current || baseRef.current;
|
74
|
+
if (!scrollElement || index < 0 || index >= data.length) return;
|
11
75
|
|
12
|
-
|
13
|
-
|
76
|
+
const currentScrollTop = scrollElement.scrollTop;
|
77
|
+
const viewportHeight = scrollElement.offsetHeight;
|
14
78
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
79
|
+
// Get position and height of target row
|
80
|
+
const rowTop = rowPositions.get(index) || 0;
|
81
|
+
const rowHeight = getRowHeight(index);
|
82
|
+
const rowBottom = rowTop + rowHeight;
|
83
|
+
|
84
|
+
// If this is already visible, don't scroll
|
85
|
+
const isVisible =
|
86
|
+
rowTop >= currentScrollTop &&
|
87
|
+
rowBottom <= currentScrollTop + viewportHeight;
|
88
|
+
if (isVisible) {
|
89
|
+
return;
|
90
|
+
}
|
91
|
+
|
92
|
+
// Calculate new scroll position based on direction
|
93
|
+
let newScrollTop;
|
94
|
+
if (direction === "up") {
|
95
|
+
// Align top of element with top of viewport
|
96
|
+
newScrollTop = rowTop;
|
97
|
+
} else {
|
98
|
+
// Align bottom of element with bottom of viewport
|
99
|
+
newScrollTop = rowBottom - viewportHeight;
|
100
|
+
}
|
101
|
+
|
102
|
+
// Clamp scroll position to valid range
|
103
|
+
newScrollTop = Math.max(
|
104
|
+
0,
|
105
|
+
Math.min(newScrollTop, listMetrics.totalHeight - viewportHeight),
|
106
|
+
);
|
107
|
+
scrollElement.scrollTop = newScrollTop;
|
108
|
+
},
|
109
|
+
}),
|
110
|
+
[rowPositions, data.length],
|
111
|
+
);
|
112
|
+
|
113
|
+
// Measure rendered rows and update heights if needed
|
114
|
+
const measureRows = () => {
|
115
|
+
// Keep track of updated heights
|
116
|
+
let updates = [];
|
117
|
+
|
118
|
+
rowRefs.current.forEach((element, index) => {
|
119
|
+
if (element) {
|
120
|
+
const measuredHeight = element.offsetHeight;
|
121
|
+
// If the measured height is different, schedule an update
|
122
|
+
if (
|
123
|
+
measuredHeight &&
|
124
|
+
measuredHeight !== listMetrics.rowHeights.get(index)
|
125
|
+
) {
|
126
|
+
updates.push([index, measuredHeight]);
|
127
|
+
}
|
128
|
+
}
|
129
|
+
});
|
130
|
+
|
131
|
+
// If no rows changed, do nothing
|
132
|
+
if (updates.length === 0) return;
|
133
|
+
|
134
|
+
// Create a new Map of rowHeights so we don't mutate state directly
|
135
|
+
const newHeights = new Map(listMetrics.rowHeights);
|
136
|
+
updates.forEach(([index, height]) => {
|
137
|
+
newHeights.set(index, height);
|
138
|
+
});
|
139
|
+
|
140
|
+
// Recompute total height only once
|
141
|
+
let newTotalHeight = 0;
|
142
|
+
for (let i = 0; i < data.length; i++) {
|
143
|
+
newTotalHeight += newHeights.get(i) || estimatedRowHeight;
|
144
|
+
}
|
145
|
+
|
146
|
+
// Now update our single state object in one go:
|
147
|
+
setListMetrics({
|
148
|
+
rowHeights: newHeights,
|
149
|
+
totalHeight: newTotalHeight,
|
150
|
+
});
|
20
151
|
};
|
21
|
-
|
22
|
-
|
23
|
-
|
152
|
+
|
153
|
+
// Handle container resize
|
154
|
+
const resize = () => {
|
155
|
+
const scrollElement = scrollRef?.current || baseRef.current;
|
156
|
+
if (scrollElement && height !== scrollElement.offsetHeight) {
|
157
|
+
setHeight(scrollElement.offsetHeight);
|
24
158
|
}
|
25
159
|
};
|
26
|
-
|
27
|
-
|
28
|
-
|
160
|
+
|
161
|
+
// Handle scroll with throttling
|
162
|
+
const handleScroll = throttle(() => {
|
163
|
+
const scrollElement = scrollRef?.current || baseRef.current;
|
164
|
+
if (scrollElement) {
|
165
|
+
setOffset(scrollElement.scrollTop);
|
29
166
|
}
|
30
|
-
if (
|
31
|
-
|
167
|
+
if (sync) {
|
168
|
+
setOffset((prev) => prev);
|
32
169
|
}
|
33
170
|
}, 100);
|
34
|
-
this.containerRef = createRef();
|
35
|
-
}
|
36
|
-
|
37
|
-
componentDidUpdate() {
|
38
|
-
this.resize();
|
39
|
-
}
|
40
|
-
|
41
|
-
componentDidMount() {
|
42
|
-
this.resize();
|
43
|
-
window.addEventListener("resize", this.resize);
|
44
|
-
}
|
45
|
-
|
46
|
-
componentWillUnmount() {
|
47
|
-
window.removeEventListener("resize", this.resize);
|
48
|
-
}
|
49
|
-
|
50
|
-
render(
|
51
|
-
{ data, rowMap, renderRow, overscanCount = 10, ...props },
|
52
|
-
{ offset = 0, height = 0 },
|
53
|
-
) {
|
54
|
-
// Compute the start and ending rows
|
55
|
-
const firstVisibleIdx = rowMap.findIndex((row) => {
|
56
|
-
return row.start + row.height >= offset;
|
57
|
-
});
|
58
|
-
const firstIndex = firstVisibleIdx > -1 ? firstVisibleIdx : 0;
|
59
171
|
|
60
|
-
|
61
|
-
|
172
|
+
// Setup scroll and resize listeners
|
173
|
+
useEffect(() => {
|
174
|
+
resize();
|
175
|
+
const scrollElement = scrollRef?.current || baseRef.current;
|
176
|
+
|
177
|
+
if (scrollElement) {
|
178
|
+
scrollElement.addEventListener("scroll", handleScroll);
|
179
|
+
window.addEventListener("resize", resize);
|
180
|
+
|
181
|
+
return () => {
|
182
|
+
scrollElement.removeEventListener("scroll", handleScroll);
|
183
|
+
window.removeEventListener("resize", resize);
|
184
|
+
};
|
185
|
+
}
|
186
|
+
}, [scrollRef?.current]);
|
187
|
+
|
188
|
+
// Measure rows after render
|
189
|
+
useEffect(() => {
|
190
|
+
measureRows();
|
62
191
|
});
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
192
|
+
|
193
|
+
const findRowAtOffset = (targetOffset) => {
|
194
|
+
if (targetOffset <= 0) return 0;
|
195
|
+
if (targetOffset >= listMetrics.totalHeight) return data.length - 1;
|
196
|
+
|
197
|
+
let low = 0;
|
198
|
+
let high = data.length - 1;
|
199
|
+
let lastValid = 0;
|
200
|
+
|
201
|
+
while (low <= high) {
|
202
|
+
const mid = Math.floor((low + high) / 2);
|
203
|
+
const rowStart = rowPositions.get(mid) || 0;
|
204
|
+
|
205
|
+
if (rowStart <= targetOffset) {
|
206
|
+
lastValid = mid;
|
207
|
+
low = mid + 1;
|
208
|
+
} else {
|
209
|
+
high = mid - 1;
|
210
|
+
}
|
211
|
+
}
|
212
|
+
return lastValid;
|
213
|
+
};
|
214
|
+
|
215
|
+
const firstVisibleIdx = findRowAtOffset(offset);
|
216
|
+
const lastVisibleIdx = findRowAtOffset(offset + height);
|
217
|
+
|
218
|
+
// Calculate range of rows to render including overscan
|
219
|
+
const start = Math.max(0, firstVisibleIdx - overscanCount);
|
220
|
+
const end = Math.min(data.length, lastVisibleIdx + overscanCount);
|
221
|
+
|
222
|
+
// Memoize the rendered rows to prevent unnecessary re-renders
|
223
|
+
const renderedRows = useMemo(() => {
|
224
|
+
const selection = data.slice(start, end);
|
225
|
+
return selection.map((item, index) => {
|
226
|
+
const actualIndex = start + index;
|
227
|
+
return html`
|
228
|
+
<div
|
229
|
+
key=${`list-item-${actualIndex}`}
|
230
|
+
ref=${(el) => {
|
231
|
+
if (el) {
|
232
|
+
rowRefs.current.set(actualIndex, el);
|
233
|
+
} else {
|
234
|
+
rowRefs.current.delete(actualIndex);
|
235
|
+
}
|
236
|
+
}}
|
237
|
+
>
|
238
|
+
${renderRow(item, actualIndex)}
|
239
|
+
</div>
|
240
|
+
`;
|
241
|
+
});
|
242
|
+
}, [data, start, end, renderRow]);
|
243
|
+
|
244
|
+
const style_inner = {
|
245
|
+
position: "relative",
|
246
|
+
overflow: scrollRef?.current ? "visible" : "hidden",
|
247
|
+
width: "100%",
|
248
|
+
minHeight: "100%",
|
249
|
+
};
|
250
|
+
|
251
|
+
const style_content = {
|
252
|
+
position: "absolute",
|
253
|
+
top: 0,
|
254
|
+
left: 0,
|
255
|
+
height: "100%",
|
256
|
+
width: "100%",
|
257
|
+
overflow: "visible",
|
258
|
+
};
|
259
|
+
|
260
|
+
const top = rowPositions.get(start) || 0;
|
261
|
+
|
262
|
+
// Only attach onscroll to baseRef if no scrollRef is provided
|
263
|
+
const scrollProps = scrollRef ? {} : { onscroll: handleScroll };
|
264
|
+
|
265
|
+
return html`
|
266
|
+
<div ref=${baseRef} ...${props} ...${scrollProps}>
|
267
|
+
<div
|
268
|
+
style=${{ ...style_inner, height: `${listMetrics.totalHeight}px` }}
|
269
|
+
>
|
270
|
+
<div
|
271
|
+
style=${{ ...style_content, top: `${top}px` }}
|
272
|
+
ref=${containerRef}
|
273
|
+
>
|
274
|
+
${renderedRows}
|
275
|
+
</div>
|
93
276
|
</div>
|
94
277
|
</div>
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
}
|
278
|
+
`;
|
279
|
+
},
|
280
|
+
);
|
@@ -10,7 +10,7 @@ import { throttle } from "./utils/sync.mjs";
|
|
10
10
|
const vscode = getVscodeApi();
|
11
11
|
let initialState = undefined;
|
12
12
|
if (vscode) {
|
13
|
-
initialState = vscode.getState();
|
13
|
+
initialState = filterState(vscode.getState());
|
14
14
|
}
|
15
15
|
|
16
16
|
render(
|
@@ -20,9 +20,82 @@ render(
|
|
20
20
|
saveInitialState=${throttle((state) => {
|
21
21
|
const vscode = getVscodeApi();
|
22
22
|
if (vscode) {
|
23
|
-
vscode.setState(state);
|
23
|
+
vscode.setState(filterState(state));
|
24
24
|
}
|
25
25
|
}, 1000)}
|
26
26
|
/>`,
|
27
27
|
document.getElementById("app"),
|
28
28
|
);
|
29
|
+
|
30
|
+
function filterState(state) {
|
31
|
+
if (!state) {
|
32
|
+
return state;
|
33
|
+
}
|
34
|
+
|
35
|
+
// When saving state, we can't store vast amounts of data (like a large sample)
|
36
|
+
const filters = [filterLargeSample, filterLargeSelectedLog];
|
37
|
+
return filters.reduce(
|
38
|
+
(filteredState, filter) => filter(filteredState),
|
39
|
+
state,
|
40
|
+
);
|
41
|
+
}
|
42
|
+
|
43
|
+
// Filters the selected Sample if it is large
|
44
|
+
function filterLargeSample(state) {
|
45
|
+
if (!state || !state.selectedSample) {
|
46
|
+
return state;
|
47
|
+
}
|
48
|
+
|
49
|
+
const estimatedTotalSize = estimateSize(state.selectedSample.messages);
|
50
|
+
if (estimatedTotalSize > 400000) {
|
51
|
+
const { selectedSample, ...filteredState } = state; // eslint-disable-line
|
52
|
+
return filteredState;
|
53
|
+
} else {
|
54
|
+
return state;
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
// Filters the selectedlog if it is too large
|
59
|
+
function filterLargeSelectedLog(state) {
|
60
|
+
if (!state || !state.selectedLog?.contents) {
|
61
|
+
return state;
|
62
|
+
}
|
63
|
+
|
64
|
+
const estimatedSize = estimateSize(
|
65
|
+
state.selectedLog.contents.sampleSummaries,
|
66
|
+
);
|
67
|
+
if (estimatedSize > 400000) {
|
68
|
+
const { selectedLog, ...filteredState } = state; // eslint-disable-line
|
69
|
+
return filteredState;
|
70
|
+
} else {
|
71
|
+
return state;
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
function estimateSize(list, frequency = 0.2) {
|
76
|
+
if (!list || list.len === 0) {
|
77
|
+
return 0;
|
78
|
+
}
|
79
|
+
|
80
|
+
// Total number of samples
|
81
|
+
const sampleSize = Math.ceil(list.length * frequency);
|
82
|
+
|
83
|
+
// Get a proper random sample without duplicates
|
84
|
+
const messageIndices = new Set();
|
85
|
+
while (
|
86
|
+
messageIndices.size < sampleSize &&
|
87
|
+
messageIndices.size < list.length
|
88
|
+
) {
|
89
|
+
const randomIndex = Math.floor(Math.random() * list.length);
|
90
|
+
messageIndices.add(randomIndex);
|
91
|
+
}
|
92
|
+
|
93
|
+
// Calculate size from sampled messages
|
94
|
+
const totalSize = Array.from(messageIndices).reduce((size, index) => {
|
95
|
+
return size + JSON.stringify(list[index]).length;
|
96
|
+
}, 0);
|
97
|
+
|
98
|
+
// Estimate total size based on sample
|
99
|
+
const estimatedTotalSize = (totalSize / sampleSize) * list.length;
|
100
|
+
return estimatedTotalSize;
|
101
|
+
}
|
@@ -19,6 +19,7 @@ import { SecondaryBar } from "./SecondaryBar.mjs";
|
|
19
19
|
* @param {import("../types/log").EvalPlan} [props.evalPlan] - The EvalSpec
|
20
20
|
* @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats
|
21
21
|
* @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
|
22
|
+
* @param {import("../samples/SamplesDescriptor.mjs").EvalDescriptor} [props.evalDescriptor] - The EvalDescriptor
|
22
23
|
* @param {string} [props.status] - the status
|
23
24
|
* @param {boolean} props.offcanvas - Are we in offcanvas mode?
|
24
25
|
* @param {boolean} props.showToggle - Should we show the toggle?
|
@@ -32,6 +33,7 @@ export const Navbar = ({
|
|
32
33
|
evalResults,
|
33
34
|
evalStats,
|
34
35
|
samples,
|
36
|
+
evalDescriptor,
|
35
37
|
showToggle,
|
36
38
|
offcanvas,
|
37
39
|
status,
|
@@ -182,6 +184,7 @@ export const Navbar = ({
|
|
182
184
|
evalResults=${evalResults}
|
183
185
|
evalStats=${evalStats}
|
184
186
|
samples=${samples}
|
187
|
+
evalDescriptor=${evalDescriptor}
|
185
188
|
status=${status}
|
186
189
|
style=${{ gridColumn: "1/-1" }}
|
187
190
|
/>
|
@@ -3,6 +3,7 @@ import { html } from "htm/preact";
|
|
3
3
|
import { LabeledValue } from "../components/LabeledValue.mjs";
|
4
4
|
import { formatDataset, formatDuration } from "../utils/Format.mjs";
|
5
5
|
import { ExpandablePanel } from "../components/ExpandablePanel.mjs";
|
6
|
+
import { scoreFilterItems } from "../samples/tools/filters.mjs";
|
6
7
|
|
7
8
|
/**
|
8
9
|
* Renders the Navbar
|
@@ -13,6 +14,7 @@ import { ExpandablePanel } from "../components/ExpandablePanel.mjs";
|
|
13
14
|
* @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults
|
14
15
|
* @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats
|
15
16
|
* @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
|
17
|
+
* @param {import("../samples/SamplesDescriptor.mjs").EvalDescriptor} [props.evalDescriptor] - The EvalDescriptor
|
16
18
|
* @param {string} [props.status] - the status
|
17
19
|
* @param {Map<string, string>} [props.style] - is this off canvas
|
18
20
|
*
|
@@ -24,6 +26,7 @@ export const SecondaryBar = ({
|
|
24
26
|
evalResults,
|
25
27
|
evalStats,
|
26
28
|
samples,
|
29
|
+
evalDescriptor,
|
27
30
|
status,
|
28
31
|
style,
|
29
32
|
}) => {
|
@@ -60,8 +63,8 @@ export const SecondaryBar = ({
|
|
60
63
|
values.push({
|
61
64
|
size: "minmax(12%, auto)",
|
62
65
|
value: html`<${LabeledValue} label="${label}" style=${staticColStyle} style=${{ justifySelf: hasConfig ? "left" : "center" }}>
|
63
|
-
<${ScorerSummary}
|
64
|
-
|
66
|
+
<${ScorerSummary}
|
67
|
+
evalDescriptor=${evalDescriptor} />
|
65
68
|
</${LabeledValue}>`,
|
66
69
|
});
|
67
70
|
|
@@ -124,17 +127,23 @@ const DatasetSummary = ({ dataset, samples, epochs, style }) => {
|
|
124
127
|
`;
|
125
128
|
};
|
126
129
|
|
127
|
-
const ScorerSummary = ({
|
128
|
-
if (!
|
130
|
+
const ScorerSummary = ({ evalDescriptor }) => {
|
131
|
+
if (!evalDescriptor) {
|
129
132
|
return "";
|
130
133
|
}
|
131
134
|
|
132
|
-
const
|
133
|
-
scorers.forEach((scorer) => {
|
134
|
-
uniqScorers.add(scorer.name);
|
135
|
-
});
|
135
|
+
const items = scoreFilterItems(evalDescriptor);
|
136
136
|
|
137
|
-
return
|
137
|
+
return html`
|
138
|
+
<span style=${{ position: "relative" }}>
|
139
|
+
${Array.from(items).map(
|
140
|
+
(item, index) => html`
|
141
|
+
${index > 0 ? ", " : ""}
|
142
|
+
<span title=${item.tooltip}>${item.canonicalName}</span>
|
143
|
+
`,
|
144
|
+
)}
|
145
|
+
</span>
|
146
|
+
`;
|
138
147
|
};
|
139
148
|
|
140
149
|
/**
|
@@ -1,5 +1,5 @@
|
|
1
1
|
import { html } from "htm/preact";
|
2
|
-
import { useCallback, useMemo } from "preact/hooks";
|
2
|
+
import { useCallback, useMemo, useRef } from "preact/hooks";
|
3
3
|
|
4
4
|
import { ApplicationIcons } from "../appearance/Icons.mjs";
|
5
5
|
import { LargeModal } from "../components/LargeModal.mjs";
|
@@ -43,6 +43,8 @@ export const SampleDialog = ({
|
|
43
43
|
sampleScrollPositionRef,
|
44
44
|
setSampleScrollPosition,
|
45
45
|
}) => {
|
46
|
+
const scrollRef = useRef(/** @type {HTMLElement|null} */ (null));
|
47
|
+
|
46
48
|
const tools = useMemo(() => {
|
47
49
|
const nextTool = {
|
48
50
|
label: "Next Sample",
|
@@ -94,6 +96,7 @@ export const SampleDialog = ({
|
|
94
96
|
sampleDescriptor=${sampleDescriptor}
|
95
97
|
selectedTab=${selectedTab}
|
96
98
|
setSelectedTab=${setSelectedTab}
|
99
|
+
scrollRef=${scrollRef}
|
97
100
|
/>`;
|
98
101
|
}, [id, sample, sampleDescriptor, selectedTab, setSelectedTab, sampleError]);
|
99
102
|
|
@@ -113,6 +116,7 @@ export const SampleDialog = ({
|
|
113
116
|
showProgress=${sampleStatus === "loading"}
|
114
117
|
initialScrollPositionRef=${sampleScrollPositionRef}
|
115
118
|
setInitialScrollPosition=${setSampleScrollPosition}
|
119
|
+
scrollRef=${scrollRef}
|
116
120
|
>
|
117
121
|
${children}
|
118
122
|
</${LargeModal}>`;
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import { html } from "htm/preact";
|
2
2
|
|
3
|
-
import {
|
3
|
+
import { ChatViewVirtualList } from "../components/ChatView.mjs";
|
4
4
|
import { MetaDataView } from "../components/MetaDataView.mjs";
|
5
5
|
import { TabSet, TabPanel } from "../components/TabSet.mjs";
|
6
6
|
|
@@ -47,6 +47,7 @@ import {
|
|
47
47
|
* @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - the sample descriptor
|
48
48
|
* @param {string} props.selectedTab - The selected tab
|
49
49
|
* @param {(tab: string) => void} props.setSelectedTab - function to set the selected tab
|
50
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable element whic contains this display
|
50
51
|
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
51
52
|
*/
|
52
53
|
export const InlineSampleDisplay = ({
|
@@ -57,6 +58,7 @@ export const InlineSampleDisplay = ({
|
|
57
58
|
sampleDescriptor,
|
58
59
|
selectedTab,
|
59
60
|
setSelectedTab,
|
61
|
+
scrollRef,
|
60
62
|
}) => {
|
61
63
|
return html`<div style=${{ flexDirection: "row", width: "100%" }}>
|
62
64
|
<${ProgressBar}
|
@@ -77,6 +79,7 @@ export const InlineSampleDisplay = ({
|
|
77
79
|
sampleDescriptor=${sampleDescriptor}
|
78
80
|
selectedTab=${selectedTab}
|
79
81
|
setSelectedTab=${setSelectedTab}
|
82
|
+
scrollRef=${scrollRef}
|
80
83
|
/>`}
|
81
84
|
</div>
|
82
85
|
</div>`;
|
@@ -91,6 +94,7 @@ export const InlineSampleDisplay = ({
|
|
91
94
|
* @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - the sample descriptor
|
92
95
|
* @param {string} props.selectedTab - The selected tab
|
93
96
|
* @param {(tab: string) => void} props.setSelectedTab - function to set the selected tab
|
97
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
|
94
98
|
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
95
99
|
*/
|
96
100
|
export const SampleDisplay = ({
|
@@ -99,6 +103,7 @@ export const SampleDisplay = ({
|
|
99
103
|
sampleDescriptor,
|
100
104
|
selectedTab,
|
101
105
|
setSelectedTab,
|
106
|
+
scrollRef,
|
102
107
|
}) => {
|
103
108
|
// Tab ids
|
104
109
|
const baseId = `sample-dialog`;
|
@@ -120,13 +125,14 @@ export const SampleDisplay = ({
|
|
120
125
|
html`
|
121
126
|
<${TabPanel} id=${kSampleMessagesTabId} classes="sample-tab" title="Messages" onSelected=${onSelectedTab} selected=${
|
122
127
|
selectedTab === kSampleMessagesTabId
|
123
|
-
}>
|
124
|
-
<${
|
128
|
+
} scrollable=${false} style=${{ width: "100%" }}>
|
129
|
+
<${ChatViewVirtualList}
|
125
130
|
key=${`${baseId}-chat-${id}`}
|
126
131
|
id=${`${baseId}-chat-${id}`}
|
127
132
|
messages=${sample.messages}
|
128
|
-
style=${{
|
133
|
+
style=${{ marginLeft: ".8em", marginTop: "1em" }}
|
129
134
|
indented=${true}
|
135
|
+
scrollRef=${scrollRef}
|
130
136
|
/>
|
131
137
|
</${TabPanel}>`,
|
132
138
|
];
|
@@ -136,7 +142,7 @@ export const SampleDisplay = ({
|
|
136
142
|
<${TabPanel} id=${kSampleTranscriptTabId} classes="sample-tab" title="Transcript" onSelected=${onSelectedTab} selected=${
|
137
143
|
selectedTab === kSampleTranscriptTabId || selectedTab === undefined
|
138
144
|
} scrollable=${false}>
|
139
|
-
<${SampleTranscript} key=${`${baseId}-transcript-display-${id}`} id=${`${baseId}-transcript-display-${id}`} evalEvents=${sample.events}/>
|
145
|
+
<${SampleTranscript} key=${`${baseId}-transcript-display-${id}`} id=${`${baseId}-transcript-display-${id}`} evalEvents=${sample.events} scrollRef=${scrollRef}/>
|
140
146
|
</${TabPanel}>`);
|
141
147
|
}
|
142
148
|
|
@@ -201,16 +207,18 @@ export const SampleDisplay = ({
|
|
201
207
|
);
|
202
208
|
}
|
203
209
|
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
210
|
+
if (sample.messages.length < 100) {
|
211
|
+
tabs.push(html`<${TabPanel}
|
212
|
+
id=${kSampleJsonTabId}
|
213
|
+
classes="sample-tab"
|
214
|
+
title="JSON"
|
215
|
+
onSelected=${onSelectedTab}
|
216
|
+
selected=${selectedTab === kSampleJsonTabId}>
|
217
|
+
<div style=${{ paddingLeft: "0.8em", marginTop: "0.4em" }}>
|
218
|
+
<${JSONPanel} data=${sample} simple=${true}/>
|
219
|
+
</div>
|
220
|
+
</${TabPanel}>`);
|
221
|
+
}
|
214
222
|
|
215
223
|
const tabsetId = `task-sample-details-tab-${id}`;
|
216
224
|
const targetId = `${tabsetId}-content`;
|