inspect-ai 0.3.96__py3-none-any.whl → 0.3.97__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_eval/eval.py +10 -2
- inspect_ai/_eval/task/util.py +32 -3
- inspect_ai/_util/registry.py +7 -0
- inspect_ai/_util/timer.py +13 -0
- inspect_ai/_view/www/dist/assets/index.css +275 -195
- inspect_ai/_view/www/dist/assets/index.js +8568 -7376
- inspect_ai/_view/www/src/app/App.css +1 -0
- inspect_ai/_view/www/src/app/App.tsx +27 -10
- inspect_ai/_view/www/src/app/appearance/icons.ts +5 -0
- inspect_ai/_view/www/src/app/content/RecordTree.module.css +22 -0
- inspect_ai/_view/www/src/app/content/RecordTree.tsx +370 -0
- inspect_ai/_view/www/src/app/content/RenderedContent.module.css +5 -0
- inspect_ai/_view/www/src/app/content/RenderedContent.tsx +32 -19
- inspect_ai/_view/www/src/app/content/record_processors/store.ts +101 -0
- inspect_ai/_view/www/src/app/content/record_processors/types.ts +3 -0
- inspect_ai/_view/www/src/app/content/types.ts +5 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -0
- inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +35 -28
- inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +1 -8
- inspect_ai/_view/www/src/app/log-view/navbar/PrimaryBar.tsx +2 -4
- inspect_ai/_view/www/src/app/log-view/navbar/ResultsPanel.tsx +13 -3
- inspect_ai/_view/www/src/app/log-view/navbar/ScoreGrid.module.css +15 -0
- inspect_ai/_view/www/src/app/log-view/navbar/ScoreGrid.tsx +14 -10
- inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +9 -3
- inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +1 -3
- inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +8 -2
- inspect_ai/_view/www/src/app/log-view/types.ts +1 -0
- inspect_ai/_view/www/src/app/plan/ModelCard.module.css +7 -0
- inspect_ai/_view/www/src/app/plan/ModelCard.tsx +5 -2
- inspect_ai/_view/www/src/app/plan/PlanCard.tsx +13 -8
- inspect_ai/_view/www/src/app/routing/navigationHooks.ts +63 -8
- inspect_ai/_view/www/src/app/routing/url.ts +45 -0
- inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +2 -1
- inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.tsx +15 -8
- inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +3 -0
- inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +16 -5
- inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +68 -31
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +12 -7
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -5
- inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.module.css +9 -0
- inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +48 -18
- inspect_ai/_view/www/src/app/samples/chat/ChatView.tsx +0 -1
- inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.module.css +4 -0
- inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +41 -1
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -0
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +0 -3
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolOutput.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +5 -1
- inspect_ai/_view/www/src/app/samples/descriptor/score/PassFailScoreDescriptor.tsx +11 -6
- inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +7 -0
- inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +5 -18
- inspect_ai/_view/www/src/app/samples/sample-tools/SortFilter.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.tsx +18 -5
- inspect_ai/_view/www/src/app/samples/scores/SampleScoresView.module.css +0 -6
- inspect_ai/_view/www/src/app/samples/scores/SampleScoresView.tsx +4 -1
- inspect_ai/_view/www/src/app/samples/transcript/ApprovalEventView.tsx +4 -2
- inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +6 -4
- inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +13 -6
- inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +6 -4
- inspect_ai/_view/www/src/app/samples/transcript/LoggerEventView.tsx +4 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +11 -8
- inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +14 -8
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +13 -8
- inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +25 -16
- inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +7 -5
- inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +11 -28
- inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +12 -20
- inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +12 -31
- inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +25 -29
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +297 -0
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +0 -8
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +43 -25
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +43 -0
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +109 -43
- inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +19 -8
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +128 -60
- inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +14 -4
- inspect_ai/_view/www/src/app/samples/transcript/types.ts +6 -4
- inspect_ai/_view/www/src/app/types.ts +12 -1
- inspect_ai/_view/www/src/components/Card.css +6 -3
- inspect_ai/_view/www/src/components/Card.tsx +15 -2
- inspect_ai/_view/www/src/components/CopyButton.tsx +4 -6
- inspect_ai/_view/www/src/components/ExpandablePanel.module.css +20 -14
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +17 -22
- inspect_ai/_view/www/src/components/LargeModal.tsx +5 -1
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +25 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.css +4 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +2 -2
- inspect_ai/_view/www/src/components/TabSet.module.css +6 -1
- inspect_ai/_view/www/src/components/TabSet.tsx +8 -2
- inspect_ai/_view/www/src/state/hooks.ts +83 -13
- inspect_ai/_view/www/src/state/logPolling.ts +2 -2
- inspect_ai/_view/www/src/state/logSlice.ts +1 -2
- inspect_ai/_view/www/src/state/logsSlice.ts +9 -9
- inspect_ai/_view/www/src/state/samplePolling.ts +1 -1
- inspect_ai/_view/www/src/state/sampleSlice.ts +134 -7
- inspect_ai/_view/www/src/state/scoring.ts +1 -1
- inspect_ai/_view/www/src/state/scrolling.ts +39 -6
- inspect_ai/_view/www/src/state/store.ts +5 -0
- inspect_ai/_view/www/src/state/store_filter.ts +47 -44
- inspect_ai/_view/www/src/utils/debugging.ts +95 -0
- inspect_ai/_view/www/src/utils/format.ts +2 -2
- inspect_ai/_view/www/src/utils/json.ts +29 -0
- inspect_ai/agent/__init__.py +2 -1
- inspect_ai/agent/_agent.py +12 -0
- inspect_ai/agent/_react.py +184 -48
- inspect_ai/agent/_types.py +14 -1
- inspect_ai/analysis/beta/__init__.py +0 -2
- inspect_ai/analysis/beta/_dataframe/columns.py +11 -16
- inspect_ai/analysis/beta/_dataframe/evals/table.py +65 -40
- inspect_ai/analysis/beta/_dataframe/events/table.py +24 -36
- inspect_ai/analysis/beta/_dataframe/messages/table.py +24 -15
- inspect_ai/analysis/beta/_dataframe/progress.py +35 -5
- inspect_ai/analysis/beta/_dataframe/record.py +13 -9
- inspect_ai/analysis/beta/_dataframe/samples/columns.py +1 -1
- inspect_ai/analysis/beta/_dataframe/samples/table.py +156 -46
- inspect_ai/analysis/beta/_dataframe/util.py +14 -12
- inspect_ai/model/_call_tools.py +1 -1
- inspect_ai/model/_providers/anthropic.py +18 -5
- inspect_ai/model/_providers/azureai.py +7 -2
- inspect_ai/model/_providers/util/llama31.py +3 -3
- {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.97.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.97.dist-info}/RECORD +131 -126
- {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.97.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.module.css +0 -48
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +0 -276
- {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.97.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.97.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.97.dist-info}/top_level.txt +0 -0
@@ -4,20 +4,35 @@ import { SampleSummary } from "../client/api/types";
|
|
4
4
|
import { kSampleMessagesTabId } from "../constants";
|
5
5
|
import { createLogger } from "../utils/logger";
|
6
6
|
import { createSamplePolling } from "./samplePolling";
|
7
|
-
import { resolveSample } from "./sampleUtils";
|
7
|
+
import { resolveSample } from "./sampleUtils";
|
8
8
|
import { StoreState } from "./store";
|
9
|
+
import { isLargeSample } from "./store_filter";
|
9
10
|
|
10
11
|
const log = createLogger("sampleSlice");
|
11
12
|
|
13
|
+
// Create a module-level ref to store large sample objects
|
14
|
+
let selectedSampleRef: { current: EvalSample | undefined } = {
|
15
|
+
current: undefined,
|
16
|
+
};
|
17
|
+
|
12
18
|
export interface SampleSlice {
|
13
19
|
sample: SampleState;
|
14
20
|
sampleActions: {
|
15
21
|
// The actual sample data
|
16
22
|
setSelectedSample: (sample: EvalSample) => void;
|
23
|
+
getSelectedSample: () => EvalSample | undefined;
|
17
24
|
clearSelectedSample: () => void;
|
18
25
|
setSampleStatus: (status: SampleStatus) => void;
|
19
26
|
setSampleError: (error: Error | undefined) => void;
|
20
27
|
|
28
|
+
setCollapsedEvents: (collapsed: Record<string, true>) => void;
|
29
|
+
collapseEvent: (id: string, collapsed: boolean) => void;
|
30
|
+
clearCollapsedEvents: () => void;
|
31
|
+
|
32
|
+
setCollapsedIds: (key: string, collapsed: Record<string, true>) => void;
|
33
|
+
collapseId: (key: string, id: string, collapsed: boolean) => void;
|
34
|
+
clearCollapsedIds: (key: string) => void;
|
35
|
+
|
21
36
|
// Loading
|
22
37
|
loadSample: (
|
23
38
|
logFile: string,
|
@@ -32,12 +47,23 @@ export interface SampleSlice {
|
|
32
47
|
}
|
33
48
|
|
34
49
|
const initialState: SampleState = {
|
35
|
-
|
50
|
+
// Store ID for all samples (used for triggering renders)
|
51
|
+
sample_identifier: undefined,
|
52
|
+
// Store the actual sample object for small samples
|
53
|
+
selectedSampleObject: undefined,
|
54
|
+
// Flag to indicate where the sample is stored
|
55
|
+
sampleInState: false,
|
36
56
|
sampleStatus: "ok",
|
37
57
|
sampleError: undefined,
|
38
58
|
|
59
|
+
// signals that the sample needs to be reloaded
|
60
|
+
sampleNeedsReload: 0,
|
61
|
+
|
39
62
|
// The resolved events
|
40
63
|
runningEvents: [],
|
64
|
+
collapsedEvents: null,
|
65
|
+
|
66
|
+
collapsedIdBuckets: {},
|
41
67
|
};
|
42
68
|
|
43
69
|
export const createSampleSlice = (
|
@@ -53,18 +79,49 @@ export const createSampleSlice = (
|
|
53
79
|
sample: initialState,
|
54
80
|
sampleActions: {
|
55
81
|
setSelectedSample: (sample: EvalSample) => {
|
82
|
+
const isLarge = isLargeSample(sample);
|
83
|
+
|
84
|
+
// Update state based on sample size
|
56
85
|
set((state) => {
|
57
|
-
state.sample.
|
86
|
+
state.sample.sample_identifier = {
|
87
|
+
id: sample.id,
|
88
|
+
epoch: sample.epoch,
|
89
|
+
};
|
90
|
+
state.sample.sampleInState = !isLarge;
|
91
|
+
|
92
|
+
// Only store in state if it's small
|
93
|
+
if (!isLarge) {
|
94
|
+
state.sample.selectedSampleObject = sample;
|
95
|
+
// Clear ref if using state
|
96
|
+
selectedSampleRef.current = undefined;
|
97
|
+
} else {
|
98
|
+
// Use ref for large objects
|
99
|
+
state.sample.selectedSampleObject = undefined;
|
100
|
+
selectedSampleRef.current = sample;
|
101
|
+
}
|
58
102
|
});
|
103
|
+
|
59
104
|
if (sample.events.length < 1) {
|
60
105
|
// If there are no events, use the messages tab as the default
|
61
106
|
get().appActions.setSampleTab(kSampleMessagesTabId);
|
62
107
|
}
|
63
108
|
},
|
64
|
-
|
109
|
+
getSelectedSample: () => {
|
110
|
+
const state = get().sample;
|
111
|
+
// Return from state if stored there, otherwise from ref
|
112
|
+
return state.sampleInState
|
113
|
+
? state.selectedSampleObject
|
114
|
+
: selectedSampleRef.current;
|
115
|
+
},
|
116
|
+
clearSelectedSample: () => {
|
117
|
+
// Clear both the ref and the state
|
118
|
+
selectedSampleRef.current = undefined;
|
65
119
|
set((state) => {
|
66
|
-
state.sample.
|
67
|
-
|
120
|
+
state.sample.sample_identifier = undefined;
|
121
|
+
state.sample.selectedSampleObject = undefined;
|
122
|
+
state.sample.sampleInState = false;
|
123
|
+
});
|
124
|
+
},
|
68
125
|
setSampleStatus: (status: SampleStatus) =>
|
69
126
|
set((state) => {
|
70
127
|
state.sample.sampleStatus = status;
|
@@ -73,10 +130,59 @@ export const createSampleSlice = (
|
|
73
130
|
set((state) => {
|
74
131
|
state.sample.sampleError = error;
|
75
132
|
}),
|
133
|
+
setCollapsedEvents: (collapsed: Record<string, true>) => {
|
134
|
+
set((state) => {
|
135
|
+
state.sample.collapsedEvents = collapsed;
|
136
|
+
});
|
137
|
+
},
|
138
|
+
clearCollapsedEvents: () => {
|
139
|
+
set((state) => {
|
140
|
+
state.sample.collapsedEvents = null;
|
141
|
+
});
|
142
|
+
},
|
143
|
+
collapseEvent: (id: string, collapsed: boolean) => {
|
144
|
+
set((state) => {
|
145
|
+
if (state.sample.collapsedEvents === null) {
|
146
|
+
state.sample.collapsedEvents = {};
|
147
|
+
}
|
148
|
+
if (collapsed) {
|
149
|
+
state.sample.collapsedEvents[id] = true;
|
150
|
+
} else {
|
151
|
+
delete state.sample.collapsedEvents[id];
|
152
|
+
}
|
153
|
+
});
|
154
|
+
},
|
155
|
+
setCollapsedIds: (key: string, collapsed: Record<string, true>) => {
|
156
|
+
set((state) => {
|
157
|
+
state.sample.collapsedIdBuckets[key] = collapsed;
|
158
|
+
});
|
159
|
+
},
|
160
|
+
collapseId: (key: string, id: string, collapsed: boolean) => {
|
161
|
+
set((state) => {
|
162
|
+
if (state.sample.collapsedIdBuckets[key] === undefined) {
|
163
|
+
state.sample.collapsedIdBuckets[key] = {};
|
164
|
+
}
|
165
|
+
if (collapsed) {
|
166
|
+
state.sample.collapsedIdBuckets[key][id] = true;
|
167
|
+
} else {
|
168
|
+
delete state.sample.collapsedIdBuckets[key][id];
|
169
|
+
}
|
170
|
+
});
|
171
|
+
},
|
172
|
+
clearCollapsedIds: (key: string) => {
|
173
|
+
set((state) => {
|
174
|
+
delete state.sample.collapsedIdBuckets[key];
|
175
|
+
});
|
176
|
+
},
|
177
|
+
|
76
178
|
pollSample: async (logFile: string, sampleSummary: SampleSummary) => {
|
77
179
|
// Poll running sample
|
78
180
|
const state = get();
|
79
|
-
|
181
|
+
const sampleExists = state.sample.sampleInState
|
182
|
+
? !!state.sample.selectedSampleObject
|
183
|
+
: !!selectedSampleRef.current;
|
184
|
+
|
185
|
+
if (state.log.loadedLog && sampleExists) {
|
80
186
|
samplePolling.startPolling(logFile, sampleSummary);
|
81
187
|
}
|
82
188
|
},
|
@@ -85,6 +191,8 @@ export const createSampleSlice = (
|
|
85
191
|
|
86
192
|
sampleActions.setSampleError(undefined);
|
87
193
|
sampleActions.setSampleStatus("loading");
|
194
|
+
const state = get();
|
195
|
+
|
88
196
|
try {
|
89
197
|
if (sampleSummary.completed !== false) {
|
90
198
|
log.debug(
|
@@ -95,8 +203,18 @@ export const createSampleSlice = (
|
|
95
203
|
sampleSummary.id,
|
96
204
|
sampleSummary.epoch,
|
97
205
|
);
|
206
|
+
log.debug(
|
207
|
+
`LOADED COMPLETED SAMPLE: ${sampleSummary.id}-${sampleSummary.epoch}`,
|
208
|
+
);
|
98
209
|
if (sample) {
|
99
210
|
const migratedSample = resolveSample(sample);
|
211
|
+
|
212
|
+
if (
|
213
|
+
state.sample.sample_identifier?.id !== sample.id &&
|
214
|
+
state.sample.sample_identifier?.epoch !== sample.epoch
|
215
|
+
) {
|
216
|
+
sampleActions.clearCollapsedEvents();
|
217
|
+
}
|
100
218
|
sampleActions.setSelectedSample(migratedSample);
|
101
219
|
sampleActions.setSampleStatus("ok");
|
102
220
|
} else {
|
@@ -123,10 +241,19 @@ export const createSampleSlice = (
|
|
123
241
|
|
124
242
|
const cleanup = () => {
|
125
243
|
samplePolling.cleanup();
|
244
|
+
// Clear the ref when cleaning up
|
245
|
+
selectedSampleRef.current = undefined;
|
126
246
|
};
|
127
247
|
return [slice, cleanup];
|
128
248
|
};
|
129
249
|
|
250
|
+
export const handleRehydrate = (state: StoreState) => {
|
251
|
+
// Increment the reload counter if the sample is not in state
|
252
|
+
if (!state.sample.sampleInState) {
|
253
|
+
state.sample.sampleNeedsReload = state.sample.sampleNeedsReload + 1;
|
254
|
+
}
|
255
|
+
};
|
256
|
+
|
130
257
|
export const initializeSampleSlice = (
|
131
258
|
set: (fn: (state: StoreState) => void) => void,
|
132
259
|
) => {
|
@@ -37,7 +37,7 @@ const getScorersFromResults = (results?: EvalResults): ScorerInfo[] => {
|
|
37
37
|
const getScorersFromSamples = (samples: SampleSummary[]): ScorerInfo[] => {
|
38
38
|
// Find a sample with scores
|
39
39
|
const scoredSample = samples.find((sample) => {
|
40
|
-
return !!sample.scores;
|
40
|
+
return !sample.error && sample.completed && !!sample.scores;
|
41
41
|
});
|
42
42
|
|
43
43
|
return Object.keys(scoredSample?.scores || {}).map((key) => ({
|
@@ -11,7 +11,7 @@ export function useStatefulScrollPosition<
|
|
11
11
|
>(
|
12
12
|
elementRef: RefObject<T | null>,
|
13
13
|
elementKey: string,
|
14
|
-
delay =
|
14
|
+
delay = 1000,
|
15
15
|
scrollable = true,
|
16
16
|
) {
|
17
17
|
const getScrollPosition = useStore(
|
@@ -62,12 +62,45 @@ export function useStatefulScrollPosition<
|
|
62
62
|
const savedPosition = getScrollPosition(elementKey);
|
63
63
|
if (savedPosition !== undefined) {
|
64
64
|
log.debug(`Restoring scroll position`, savedPosition);
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
65
|
+
|
66
|
+
// Function to check and restore scroll position
|
67
|
+
const tryRestoreScroll = () => {
|
68
|
+
// Check if element has content to scroll (scrollHeight > clientHeight)
|
69
|
+
if (element.scrollHeight > element.clientHeight) {
|
70
|
+
if (element.scrollTop !== savedPosition) {
|
71
|
+
element.scrollTop = savedPosition;
|
72
|
+
log.debug(`Scroll position restored to ${savedPosition}`);
|
73
|
+
}
|
74
|
+
return true; // Successfully restored
|
69
75
|
}
|
70
|
-
|
76
|
+
return false; // Not ready yet
|
77
|
+
};
|
78
|
+
|
79
|
+
// Try immediately once
|
80
|
+
if (!tryRestoreScroll()) {
|
81
|
+
// If not successful, set up polling with setTimeout for 1-second intervals
|
82
|
+
let attempts = 0;
|
83
|
+
const maxAttempts = 5; // Fewer attempts since we're waiting longer
|
84
|
+
|
85
|
+
const pollForRender = () => {
|
86
|
+
if (tryRestoreScroll() || attempts >= maxAttempts) {
|
87
|
+
// Either success or max attempts reached
|
88
|
+
if (attempts >= maxAttempts) {
|
89
|
+
log.debug(
|
90
|
+
`Failed to restore scroll after ${maxAttempts} attempts`,
|
91
|
+
);
|
92
|
+
}
|
93
|
+
return;
|
94
|
+
}
|
95
|
+
|
96
|
+
attempts++;
|
97
|
+
// Wait 1 second before trying again
|
98
|
+
setTimeout(pollForRender, 1000);
|
99
|
+
};
|
100
|
+
|
101
|
+
// Start polling after 1 second
|
102
|
+
setTimeout(pollForRender, 1000);
|
103
|
+
}
|
71
104
|
}
|
72
105
|
|
73
106
|
// Set up scroll listener
|
@@ -1,3 +1,4 @@
|
|
1
|
+
import { enableMapSet } from "immer";
|
1
2
|
import { create, StoreApi, UseBoundStore } from "zustand";
|
2
3
|
import { devtools, persist } from "zustand/middleware";
|
3
4
|
import { immer } from "zustand/middleware/immer";
|
@@ -9,6 +10,7 @@ import { createLogSlice, initalializeLogSlice, LogSlice } from "./logSlice";
|
|
9
10
|
import { createLogsSlice, initializeLogsSlice, LogsSlice } from "./logsSlice";
|
10
11
|
import {
|
11
12
|
createSampleSlice,
|
13
|
+
handleRehydrate,
|
12
14
|
initializeSampleSlice,
|
13
15
|
SampleSlice,
|
14
16
|
} from "./sampleSlice";
|
@@ -52,6 +54,8 @@ export const initializeStore = (
|
|
52
54
|
capabilities: Capabilities,
|
53
55
|
storage?: ClientStorage,
|
54
56
|
) => {
|
57
|
+
enableMapSet();
|
58
|
+
|
55
59
|
// Create the storage implementation
|
56
60
|
const storageImplementation = {
|
57
61
|
getItem: <T>(name: string): T | null => {
|
@@ -151,6 +155,7 @@ export const initializeStore = (
|
|
151
155
|
version: 1,
|
152
156
|
onRehydrateStorage: (state: StoreState) => {
|
153
157
|
return (hydrationState, error) => {
|
158
|
+
handleRehydrate(state);
|
154
159
|
log.debug("REHYDRATING STATE");
|
155
160
|
if (error) {
|
156
161
|
log.debug("ERROR", { error });
|
@@ -1,3 +1,5 @@
|
|
1
|
+
import { EvalSample } from "../@types/log";
|
2
|
+
import { estimateSize } from "../utils/json";
|
1
3
|
import { PersistedState } from "./store";
|
2
4
|
|
3
5
|
export function filterState(state: PersistedState) {
|
@@ -6,31 +8,60 @@ export function filterState(state: PersistedState) {
|
|
6
8
|
}
|
7
9
|
|
8
10
|
// When saving state, we can't store vast amounts of data (like a large sample)
|
9
|
-
const filters = [
|
11
|
+
const filters = [filterLargeLogSummary];
|
10
12
|
return filters.reduce(
|
11
13
|
(filteredState, filter) => filter(filteredState),
|
12
14
|
state,
|
13
15
|
);
|
14
16
|
}
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
if (
|
19
|
-
return
|
18
|
+
export function isLargeSample(sample: EvalSample): boolean {
|
19
|
+
const storeKeys = countKeys(sample.store);
|
20
|
+
if (storeKeys > 5000) {
|
21
|
+
return true;
|
20
22
|
}
|
21
23
|
|
22
|
-
const
|
23
|
-
if (
|
24
|
-
return
|
25
|
-
...state,
|
26
|
-
sample: {
|
27
|
-
...state.sample,
|
28
|
-
selectedSample: undefined,
|
29
|
-
},
|
30
|
-
};
|
31
|
-
} else {
|
32
|
-
return state;
|
24
|
+
const estimatedMessageSize = estimateSize(sample.messages);
|
25
|
+
if (estimatedMessageSize > 250000) {
|
26
|
+
return true;
|
33
27
|
}
|
28
|
+
|
29
|
+
return true;
|
30
|
+
}
|
31
|
+
|
32
|
+
function countKeys(obj: unknown, options = { countArrayIndices: false }) {
|
33
|
+
// Base case: not an object or null
|
34
|
+
if (obj === null || typeof obj !== "object") {
|
35
|
+
return 0;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Handle arrays
|
39
|
+
if (Array.isArray(obj)) {
|
40
|
+
let count = 0;
|
41
|
+
// Count array indices as keys if option is set
|
42
|
+
if (options.countArrayIndices) {
|
43
|
+
count += obj.length;
|
44
|
+
}
|
45
|
+
// Count keys in array elements that are objects
|
46
|
+
for (const item of obj) {
|
47
|
+
count += countKeys(item, options);
|
48
|
+
}
|
49
|
+
return count;
|
50
|
+
}
|
51
|
+
|
52
|
+
// For regular objects, count all own properties
|
53
|
+
let count = Object.keys(obj).length;
|
54
|
+
|
55
|
+
// Recursively count keys in nested objects
|
56
|
+
for (const key in obj) {
|
57
|
+
// Use type assertion to tell TypeScript that the key is valid
|
58
|
+
if (Object.prototype.hasOwnProperty.call(obj, key)) {
|
59
|
+
// Use type assertion (obj as Record<string, unknown>)
|
60
|
+
count += countKeys((obj as Record<string, unknown>)[key], options);
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
return count;
|
34
65
|
}
|
35
66
|
|
36
67
|
// Filters the selectedlog if it is too large
|
@@ -54,31 +85,3 @@ function filterLargeLogSummary(state: PersistedState): PersistedState {
|
|
54
85
|
return state;
|
55
86
|
}
|
56
87
|
}
|
57
|
-
|
58
|
-
function estimateSize(list: unknown[], frequency = 0.2) {
|
59
|
-
if (!list || list.length === 0) {
|
60
|
-
return 0;
|
61
|
-
}
|
62
|
-
|
63
|
-
// Total number of samples
|
64
|
-
const sampleSize = Math.ceil(list.length * frequency);
|
65
|
-
|
66
|
-
// Get a proper random sample without duplicates
|
67
|
-
const messageIndices = new Set<number>();
|
68
|
-
while (
|
69
|
-
messageIndices.size < sampleSize &&
|
70
|
-
messageIndices.size < list.length
|
71
|
-
) {
|
72
|
-
const randomIndex = Math.floor(Math.random() * list.length);
|
73
|
-
messageIndices.add(randomIndex);
|
74
|
-
}
|
75
|
-
|
76
|
-
// Calculate size from sampled messages
|
77
|
-
const totalSize = Array.from(messageIndices).reduce((size, index) => {
|
78
|
-
return size + JSON.stringify(list[index]).length;
|
79
|
-
}, 0);
|
80
|
-
|
81
|
-
// Estimate total size based on sample
|
82
|
-
const estimatedTotalSize = (totalSize / sampleSize) * list.length;
|
83
|
-
return estimatedTotalSize;
|
84
|
-
}
|
@@ -26,3 +26,98 @@ export function printCircularReferences(obj: Record<string, unknown>): void {
|
|
26
26
|
|
27
27
|
detect(obj, "root");
|
28
28
|
}
|
29
|
+
|
30
|
+
export function findDifferences(
|
31
|
+
obj1: unknown,
|
32
|
+
obj2: unknown,
|
33
|
+
path = "",
|
34
|
+
): string[] {
|
35
|
+
// Helper to build a readable path string
|
36
|
+
const makePath = (parent: string, key: string | number, isIndex = false) =>
|
37
|
+
parent
|
38
|
+
? isIndex
|
39
|
+
? `${parent}[${key}]`
|
40
|
+
: `${parent}.${key}`
|
41
|
+
: isIndex
|
42
|
+
? `[${key}]`
|
43
|
+
: `${key}`;
|
44
|
+
|
45
|
+
// Primitive / simple equality check (Object.is handles NaN)
|
46
|
+
if (Object.is(obj1, obj2)) return [];
|
47
|
+
|
48
|
+
// Primitives or null → direct difference
|
49
|
+
if (
|
50
|
+
obj1 === null ||
|
51
|
+
obj2 === null ||
|
52
|
+
typeof obj1 !== "object" ||
|
53
|
+
typeof obj2 !== "object"
|
54
|
+
) {
|
55
|
+
return [
|
56
|
+
`${path || "<root>"}: ${JSON.stringify(obj1)} → ${JSON.stringify(obj2)}`,
|
57
|
+
];
|
58
|
+
}
|
59
|
+
|
60
|
+
// --- Arrays --------------------------------------------------------------
|
61
|
+
const isArr1 = Array.isArray(obj1);
|
62
|
+
const isArr2 = Array.isArray(obj2);
|
63
|
+
if (isArr1 || isArr2) {
|
64
|
+
if (isArr1 !== isArr2) {
|
65
|
+
return [`${path || "<root>"}: one is an array, the other is not`];
|
66
|
+
}
|
67
|
+
|
68
|
+
const diff: string[] = [];
|
69
|
+
const maxLen = Math.max(
|
70
|
+
(obj1 as unknown[]).length,
|
71
|
+
(obj2 as unknown[]).length,
|
72
|
+
);
|
73
|
+
|
74
|
+
if ((obj1 as unknown[]).length !== (obj2 as unknown[]).length) {
|
75
|
+
diff.push(
|
76
|
+
`${path || "<root>"}: array length ${
|
77
|
+
(obj1 as unknown[]).length
|
78
|
+
} vs ${(obj2 as unknown[]).length}`,
|
79
|
+
);
|
80
|
+
}
|
81
|
+
|
82
|
+
for (let i = 0; i < maxLen; i++) {
|
83
|
+
diff.push(
|
84
|
+
...findDifferences(
|
85
|
+
(obj1 as unknown[])[i],
|
86
|
+
(obj2 as unknown[])[i],
|
87
|
+
makePath(path, i, true),
|
88
|
+
),
|
89
|
+
);
|
90
|
+
}
|
91
|
+
return diff;
|
92
|
+
}
|
93
|
+
|
94
|
+
// --- Plain objects -------------------------------------------------------
|
95
|
+
const allKeys = new Set([
|
96
|
+
...Object.keys(obj1 as Record<string, unknown>),
|
97
|
+
...Object.keys(obj2 as Record<string, unknown>),
|
98
|
+
]);
|
99
|
+
|
100
|
+
const diff: string[] = [];
|
101
|
+
|
102
|
+
for (const key of allKeys) {
|
103
|
+
const has1 = Object.prototype.hasOwnProperty.call(obj1, key);
|
104
|
+
const has2 = Object.prototype.hasOwnProperty.call(obj2, key);
|
105
|
+
const newPath = makePath(path, key);
|
106
|
+
|
107
|
+
if (!has1) {
|
108
|
+
diff.push(`${newPath}: property missing in first object`);
|
109
|
+
} else if (!has2) {
|
110
|
+
diff.push(`${newPath}: property missing in second object`);
|
111
|
+
} else {
|
112
|
+
diff.push(
|
113
|
+
...findDifferences(
|
114
|
+
(obj1 as Record<string, unknown>)[key],
|
115
|
+
(obj2 as Record<string, unknown>)[key],
|
116
|
+
newPath,
|
117
|
+
),
|
118
|
+
);
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
return diff;
|
123
|
+
}
|
@@ -84,13 +84,13 @@ export const formatTime = (seconds: number): string => {
|
|
84
84
|
const hours = Math.floor(seconds / (60 * 60));
|
85
85
|
const minutes = Math.floor((seconds % (60 * 60)) / 60);
|
86
86
|
const remainingSeconds = seconds % 60;
|
87
|
-
return `${hours} hr ${minutes} min ${remainingSeconds} sec`;
|
87
|
+
return `${hours} hr ${minutes} min ${Math.floor(remainingSeconds)} sec`;
|
88
88
|
} else {
|
89
89
|
const days = Math.floor(seconds / (60 * 60 * 24));
|
90
90
|
const hours = Math.floor((seconds % (60 * 60 * 24)) / (60 * 60));
|
91
91
|
const minutes = Math.floor((seconds % (60 * 60)) / 60);
|
92
92
|
const remainingSeconds = seconds % 60;
|
93
|
-
return `${days} days ${hours} hr ${minutes} min ${remainingSeconds} sec`;
|
93
|
+
return `${days} days ${hours} hr ${minutes} min ${Math.floor(remainingSeconds)} sec`;
|
94
94
|
}
|
95
95
|
};
|
96
96
|
|
@@ -22,3 +22,32 @@ export const parsedJson = (text: string): unknown | undefined => {
|
|
22
22
|
}
|
23
23
|
return undefined;
|
24
24
|
};
|
25
|
+
|
26
|
+
// Estimates the size of a list of objects by sampling a subset of the list.
|
27
|
+
export function estimateSize(list: unknown[], frequency = 0.2) {
|
28
|
+
if (!list || list.length === 0) {
|
29
|
+
return 0;
|
30
|
+
}
|
31
|
+
|
32
|
+
// Total number of samples
|
33
|
+
const sampleSize = Math.ceil(list.length * frequency);
|
34
|
+
|
35
|
+
// Get a proper random sample without duplicates
|
36
|
+
const messageIndices = new Set<number>();
|
37
|
+
while (
|
38
|
+
messageIndices.size < sampleSize &&
|
39
|
+
messageIndices.size < list.length
|
40
|
+
) {
|
41
|
+
const randomIndex = Math.floor(Math.random() * list.length);
|
42
|
+
messageIndices.add(randomIndex);
|
43
|
+
}
|
44
|
+
|
45
|
+
// Calculate size from sampled messages
|
46
|
+
const totalSize = Array.from(messageIndices).reduce((size, index) => {
|
47
|
+
return size + JSON.stringify(list[index]).length;
|
48
|
+
}, 0);
|
49
|
+
|
50
|
+
// Estimate total size based on sample
|
51
|
+
const estimatedTotalSize = (totalSize / sampleSize) * list.length;
|
52
|
+
return estimatedTotalSize;
|
53
|
+
}
|
inspect_ai/agent/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from ._agent import Agent, AgentState, agent, agent_with
|
1
|
+
from ._agent import Agent, AgentState, agent, agent_with, is_agent
|
2
2
|
from ._as_solver import as_solver
|
3
3
|
from ._as_tool import as_tool
|
4
4
|
from ._bridge.bridge import bridge
|
@@ -29,6 +29,7 @@ __all__ = [
|
|
29
29
|
"AgentState",
|
30
30
|
"agent",
|
31
31
|
"agent_with",
|
32
|
+
"is_agent",
|
32
33
|
"AgentPrompt",
|
33
34
|
"AgentAttempts",
|
34
35
|
"AgentContinue",
|
inspect_ai/agent/_agent.py
CHANGED
@@ -270,6 +270,18 @@ def agent_register(agent: Callable[P, Agent], name: str) -> Callable[P, Agent]:
|
|
270
270
|
|
271
271
|
|
272
272
|
def is_agent(obj: Any) -> TypeGuard[Agent]:
|
273
|
+
"""Check if an object is an Agent.
|
274
|
+
|
275
|
+
Determines if the provided object is registered as an Agent in the system registry.
|
276
|
+
When this function returns True, type checkers will recognize 'obj' as an Agent type.
|
277
|
+
|
278
|
+
Args:
|
279
|
+
obj: Object to check against the registry.
|
280
|
+
|
281
|
+
Returns:
|
282
|
+
True if the object is a registered Agent, False otherwise.
|
283
|
+
Acts as a TypeGuard to provide type narrowing for static type checkers.
|
284
|
+
"""
|
273
285
|
return is_registry_object(obj, type="agent")
|
274
286
|
|
275
287
|
|