inspect-ai 0.3.81__py3-none-any.whl → 0.3.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +35 -2
- inspect_ai/_cli/util.py +44 -1
- inspect_ai/_display/core/config.py +1 -1
- inspect_ai/_display/core/display.py +13 -4
- inspect_ai/_display/core/results.py +1 -1
- inspect_ai/_display/textual/widgets/task_detail.py +5 -4
- inspect_ai/_eval/eval.py +38 -1
- inspect_ai/_eval/evalset.py +5 -0
- inspect_ai/_eval/run.py +5 -2
- inspect_ai/_eval/task/log.py +53 -6
- inspect_ai/_eval/task/run.py +51 -10
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/file.py +17 -1
- inspect_ai/_util/json.py +36 -1
- inspect_ai/_view/server.py +113 -1
- inspect_ai/_view/www/App.css +1 -1
- inspect_ai/_view/www/dist/assets/index.css +518 -296
- inspect_ai/_view/www/dist/assets/index.js +38803 -36307
- inspect_ai/_view/www/eslint.config.mjs +1 -1
- inspect_ai/_view/www/log-schema.json +13 -0
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/package.json +8 -2
- inspect_ai/_view/www/src/App.tsx +151 -855
- inspect_ai/_view/www/src/api/api-browser.ts +176 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
- inspect_ai/_view/www/src/api/client-api.ts +66 -10
- inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
- inspect_ai/_view/www/src/api/types.ts +107 -2
- inspect_ai/_view/www/src/appearance/icons.ts +1 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
- inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
- inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
- inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -3
- inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
- inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
- inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
- inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
- inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
- inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
- inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
- inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
- inspect_ai/_view/www/src/index.tsx +26 -94
- inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +67 -28
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +51 -22
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +144 -90
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +82 -35
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +23 -30
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +4 -1
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +3 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +34 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +10 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +25 -17
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +2 -1
- inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +21 -3
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +20 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +105 -85
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +27 -14
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
- inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
- inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +7 -9
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +7 -11
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +8 -13
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +52 -58
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +30 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
- inspect_ai/_view/www/src/scoring/utils.ts +87 -0
- inspect_ai/_view/www/src/state/appSlice.ts +244 -0
- inspect_ai/_view/www/src/state/hooks.ts +397 -0
- inspect_ai/_view/www/src/state/logPolling.ts +196 -0
- inspect_ai/_view/www/src/state/logSlice.ts +214 -0
- inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
- inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +311 -0
- inspect_ai/_view/www/src/state/sampleSlice.ts +127 -0
- inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
- inspect_ai/_view/www/src/state/scrolling.ts +206 -0
- inspect_ai/_view/www/src/state/store.ts +168 -0
- inspect_ai/_view/www/src/state/store_filter.ts +84 -0
- inspect_ai/_view/www/src/state/utils.ts +23 -0
- inspect_ai/_view/www/src/storage/index.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +2 -0
- inspect_ai/_view/www/src/types.ts +94 -32
- inspect_ai/_view/www/src/utils/attachments.ts +58 -23
- inspect_ai/_view/www/src/utils/logger.ts +52 -0
- inspect_ai/_view/www/src/utils/polling.ts +100 -0
- inspect_ai/_view/www/src/utils/react.ts +30 -0
- inspect_ai/_view/www/src/utils/vscode.ts +1 -1
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +181 -216
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +0 -1
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +98 -39
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +11 -13
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +110 -115
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
- inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
- inspect_ai/_view/www/src/workspace/types.ts +4 -3
- inspect_ai/_view/www/src/workspace/utils.ts +4 -4
- inspect_ai/_view/www/vite.config.js +6 -0
- inspect_ai/_view/www/yarn.lock +370 -354
- inspect_ai/log/_condense.py +26 -0
- inspect_ai/log/_log.py +6 -3
- inspect_ai/log/_recorders/buffer/__init__.py +14 -0
- inspect_ai/log/_recorders/buffer/buffer.py +30 -0
- inspect_ai/log/_recorders/buffer/database.py +685 -0
- inspect_ai/log/_recorders/buffer/filestore.py +259 -0
- inspect_ai/log/_recorders/buffer/types.py +84 -0
- inspect_ai/log/_recorders/eval.py +2 -11
- inspect_ai/log/_recorders/types.py +30 -0
- inspect_ai/log/_transcript.py +27 -1
- inspect_ai/model/_call_tools.py +1 -0
- inspect_ai/model/_generate_config.py +2 -2
- inspect_ai/model/_model.py +1 -0
- inspect_ai/tool/_tool_support_helpers.py +4 -4
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -1
- inspect_ai/util/_subtask.py +1 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/RECORD +178 -138
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,168 @@
|
|
1
|
+
import { create, StoreApi, UseBoundStore } from "zustand";
|
2
|
+
import { devtools, persist } from "zustand/middleware";
|
3
|
+
import { immer } from "zustand/middleware/immer";
|
4
|
+
import { Capabilities, ClientAPI, ClientStorage } from "../api/types";
|
5
|
+
import { createLogger } from "../utils/logger";
|
6
|
+
import { AppSlice, createAppSlice, initializeAppSlice } from "./appSlice";
|
7
|
+
import { createLogSlice, initalializeLogSlice, LogSlice } from "./logSlice";
|
8
|
+
import { createLogsSlice, initializeLogsSlice, LogsSlice } from "./logsSlice";
|
9
|
+
import {
|
10
|
+
createSampleSlice,
|
11
|
+
initializeSampleSlice,
|
12
|
+
SampleSlice,
|
13
|
+
} from "./sampleSlice";
|
14
|
+
import { filterState } from "./store_filter";
|
15
|
+
|
16
|
+
const log = createLogger("store");
|
17
|
+
|
18
|
+
export interface StoreState extends AppSlice, LogsSlice, LogSlice, SampleSlice {
|
19
|
+
// The shared api
|
20
|
+
api?: ClientAPI | null;
|
21
|
+
|
22
|
+
// Global actions
|
23
|
+
initialize: (api: ClientAPI, capabilities: Capabilities) => void;
|
24
|
+
cleanup: () => void;
|
25
|
+
}
|
26
|
+
|
27
|
+
// The data that will actually be persisted
|
28
|
+
export type PersistedState = {
|
29
|
+
app: AppSlice["app"];
|
30
|
+
log: LogSlice["log"];
|
31
|
+
logs: LogsSlice["logs"];
|
32
|
+
sample: SampleSlice["sample"];
|
33
|
+
};
|
34
|
+
|
35
|
+
// The store implementation (this will be set when the store is initialized)
|
36
|
+
let storeImplementation: UseBoundStore<StoreApi<StoreState>> | null = null;
|
37
|
+
|
38
|
+
// Create a proxy store that forwards calls to the real store once initialized
|
39
|
+
export const useStore = ((selector?: any) => {
|
40
|
+
if (!storeImplementation) {
|
41
|
+
throw new Error(
|
42
|
+
"Store accessed before initialization. Call initializeStore first.",
|
43
|
+
);
|
44
|
+
}
|
45
|
+
return selector ? storeImplementation(selector) : storeImplementation();
|
46
|
+
}) as UseBoundStore<StoreApi<StoreState>>;
|
47
|
+
|
48
|
+
// Initialize the store
|
49
|
+
export const initializeStore = (
|
50
|
+
api: ClientAPI,
|
51
|
+
capabilities: Capabilities,
|
52
|
+
storage?: ClientStorage,
|
53
|
+
) => {
|
54
|
+
// Create the storage implementation
|
55
|
+
const storageImplementation = {
|
56
|
+
getItem: <T>(name: string): T | null => {
|
57
|
+
return storage ? (storage.getItem(name) as T) : null;
|
58
|
+
},
|
59
|
+
setItem: <T>(name: string, value: T): void => {
|
60
|
+
if (storage) {
|
61
|
+
storage.setItem(name, value);
|
62
|
+
}
|
63
|
+
},
|
64
|
+
removeItem: (name: string): void => {
|
65
|
+
if (storage) {
|
66
|
+
storage.removeItem(name);
|
67
|
+
}
|
68
|
+
},
|
69
|
+
};
|
70
|
+
|
71
|
+
// Create the actual store
|
72
|
+
const store = create<StoreState>()(
|
73
|
+
devtools(
|
74
|
+
persist(
|
75
|
+
immer((set, get, store) => {
|
76
|
+
const [appSlice, appCleanup] = createAppSlice(
|
77
|
+
set as (fn: (state: StoreState) => void) => void,
|
78
|
+
get,
|
79
|
+
store,
|
80
|
+
);
|
81
|
+
const [logsSlice, logsCleanup] = createLogsSlice(
|
82
|
+
set as (fn: (state: StoreState) => void) => void,
|
83
|
+
get,
|
84
|
+
store,
|
85
|
+
);
|
86
|
+
const [logSlice, logCleanup] = createLogSlice(
|
87
|
+
set as (fn: (state: StoreState) => void) => void,
|
88
|
+
get,
|
89
|
+
store,
|
90
|
+
);
|
91
|
+
const [sampleSlice, sampleCleanup] = createSampleSlice(
|
92
|
+
set as (fn: (state: StoreState) => void) => void,
|
93
|
+
get,
|
94
|
+
store,
|
95
|
+
);
|
96
|
+
|
97
|
+
return {
|
98
|
+
// Shared state
|
99
|
+
api: null,
|
100
|
+
|
101
|
+
// Initialize
|
102
|
+
initialize: (api, capabilities) => {
|
103
|
+
set((state) => {
|
104
|
+
state.api = api;
|
105
|
+
});
|
106
|
+
|
107
|
+
// Initialize application slices
|
108
|
+
initializeAppSlice(
|
109
|
+
set as (fn: (state: StoreState) => void) => void,
|
110
|
+
capabilities,
|
111
|
+
);
|
112
|
+
initializeLogsSlice(
|
113
|
+
set as (fn: (state: StoreState) => void) => void,
|
114
|
+
);
|
115
|
+
initalializeLogSlice(
|
116
|
+
set as (fn: (state: StoreState) => void) => void,
|
117
|
+
);
|
118
|
+
initializeSampleSlice(
|
119
|
+
set as (fn: (state: StoreState) => void) => void,
|
120
|
+
);
|
121
|
+
},
|
122
|
+
|
123
|
+
// Create the slices and merge them in
|
124
|
+
...appSlice,
|
125
|
+
...logsSlice,
|
126
|
+
...logSlice,
|
127
|
+
...sampleSlice,
|
128
|
+
|
129
|
+
cleanup: () => {
|
130
|
+
appCleanup();
|
131
|
+
logsCleanup();
|
132
|
+
logCleanup();
|
133
|
+
sampleCleanup();
|
134
|
+
},
|
135
|
+
};
|
136
|
+
}),
|
137
|
+
{
|
138
|
+
name: "app-storage",
|
139
|
+
storage: storageImplementation,
|
140
|
+
partialize: (state) => {
|
141
|
+
const persisted: PersistedState = filterState({
|
142
|
+
app: state.app,
|
143
|
+
log: state.log,
|
144
|
+
logs: state.logs,
|
145
|
+
sample: state.sample,
|
146
|
+
});
|
147
|
+
return persisted as unknown as StoreState;
|
148
|
+
},
|
149
|
+
version: 1,
|
150
|
+
onRehydrateStorage: (state: StoreState) => {
|
151
|
+
return (hydrationState, error) => {
|
152
|
+
log.debug("REHYDRATING STATE");
|
153
|
+
if (error) {
|
154
|
+
log.debug("ERROR", { error });
|
155
|
+
} else {
|
156
|
+
log.debug("STATE", { state, hydrationState });
|
157
|
+
}
|
158
|
+
};
|
159
|
+
},
|
160
|
+
},
|
161
|
+
),
|
162
|
+
),
|
163
|
+
);
|
164
|
+
|
165
|
+
// Set the implementation and initialize it
|
166
|
+
storeImplementation = store as UseBoundStore<StoreApi<StoreState>>;
|
167
|
+
store.getState().initialize(api, capabilities);
|
168
|
+
};
|
@@ -0,0 +1,84 @@
|
|
1
|
+
import { PersistedState } from "./store";
|
2
|
+
|
3
|
+
export function filterState(state: PersistedState) {
|
4
|
+
if (!state) {
|
5
|
+
return state;
|
6
|
+
}
|
7
|
+
|
8
|
+
// When saving state, we can't store vast amounts of data (like a large sample)
|
9
|
+
const filters = [filterLargeSample, filterLargeLogSummary];
|
10
|
+
return filters.reduce(
|
11
|
+
(filteredState, filter) => filter(filteredState),
|
12
|
+
state,
|
13
|
+
);
|
14
|
+
}
|
15
|
+
|
16
|
+
// Filters the selected Sample if it is large
|
17
|
+
function filterLargeSample(state: PersistedState): PersistedState {
|
18
|
+
if (!state || !state.sample || !state.sample.selectedSample) {
|
19
|
+
return state;
|
20
|
+
}
|
21
|
+
|
22
|
+
const estimatedTotalSize = estimateSize(state.sample.selectedSample.messages);
|
23
|
+
if (estimatedTotalSize > 250000) {
|
24
|
+
return {
|
25
|
+
...state,
|
26
|
+
sample: {
|
27
|
+
...state.sample,
|
28
|
+
selectedSample: undefined,
|
29
|
+
},
|
30
|
+
};
|
31
|
+
} else {
|
32
|
+
return state;
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
// Filters the selectedlog if it is too large
|
37
|
+
function filterLargeLogSummary(state: PersistedState): PersistedState {
|
38
|
+
if (!state || !state.log || !state.log.selectedLogSummary) {
|
39
|
+
return state;
|
40
|
+
}
|
41
|
+
|
42
|
+
const estimatedSize = estimateSize(
|
43
|
+
state.log.selectedLogSummary.sampleSummaries,
|
44
|
+
);
|
45
|
+
if (estimatedSize > 250000) {
|
46
|
+
return {
|
47
|
+
...state,
|
48
|
+
log: {
|
49
|
+
...state.log,
|
50
|
+
selectedLogSummary: undefined,
|
51
|
+
},
|
52
|
+
};
|
53
|
+
} else {
|
54
|
+
return state;
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
function estimateSize(list: unknown[], frequency = 0.2) {
|
59
|
+
if (!list || list.length === 0) {
|
60
|
+
return 0;
|
61
|
+
}
|
62
|
+
|
63
|
+
// Total number of samples
|
64
|
+
const sampleSize = Math.ceil(list.length * frequency);
|
65
|
+
|
66
|
+
// Get a proper random sample without duplicates
|
67
|
+
const messageIndices = new Set<number>();
|
68
|
+
while (
|
69
|
+
messageIndices.size < sampleSize &&
|
70
|
+
messageIndices.size < list.length
|
71
|
+
) {
|
72
|
+
const randomIndex = Math.floor(Math.random() * list.length);
|
73
|
+
messageIndices.add(randomIndex);
|
74
|
+
}
|
75
|
+
|
76
|
+
// Calculate size from sampled messages
|
77
|
+
const totalSize = Array.from(messageIndices).reduce((size, index) => {
|
78
|
+
return size + JSON.stringify(list[index]).length;
|
79
|
+
}, 0);
|
80
|
+
|
81
|
+
// Estimate total size based on sample
|
82
|
+
const estimatedTotalSize = (totalSize / sampleSize) * list.length;
|
83
|
+
return estimatedTotalSize;
|
84
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
import { SampleSummary } from "../api/types";
|
2
|
+
|
3
|
+
// Function to merge log samples with pending samples
|
4
|
+
export const mergeSampleSummaries = (
|
5
|
+
logSamples: SampleSummary[],
|
6
|
+
pendingSamples: SampleSummary[],
|
7
|
+
) => {
|
8
|
+
// Create a map of existing sample IDs to avoid duplicates
|
9
|
+
const existingSampleIds = new Set(
|
10
|
+
logSamples.map((sample) => `${sample.id}-${sample.epoch}`),
|
11
|
+
);
|
12
|
+
|
13
|
+
// Filter out any pending samples that already exist in the log
|
14
|
+
const uniquePendingSamples = pendingSamples
|
15
|
+
.filter((sample) => !existingSampleIds.has(`${sample.id}-${sample.epoch}`))
|
16
|
+
.map((sample) => {
|
17
|
+
// Always mark pending items as incomplete to be sure we trigger polling
|
18
|
+
return { ...sample, completed: false };
|
19
|
+
});
|
20
|
+
|
21
|
+
// Combine and return all samples
|
22
|
+
return [...logSamples, ...uniquePendingSamples];
|
23
|
+
};
|
@@ -0,0 +1,26 @@
|
|
1
|
+
import { ClientStorage } from "../api/types";
|
2
|
+
import { PersistedState } from "../state/store";
|
3
|
+
import { getVscodeApi } from "../utils/vscode";
|
4
|
+
|
5
|
+
const resolveStorage = (): ClientStorage | undefined => {
|
6
|
+
const vscodeApi = getVscodeApi();
|
7
|
+
if (vscodeApi) {
|
8
|
+
return {
|
9
|
+
getItem: (_name: string) => {
|
10
|
+
const state = vscodeApi.getState() as PersistedState;
|
11
|
+
return state;
|
12
|
+
},
|
13
|
+
setItem: (_name: string, value: unknown) => {
|
14
|
+
// TODO: This is pretty gnarly type hijinks
|
15
|
+
const valObj = value as { state: PersistedState; version: number };
|
16
|
+
vscodeApi.setState(valObj);
|
17
|
+
},
|
18
|
+
removeItem: (_name: string) => {
|
19
|
+
vscodeApi.setState(null);
|
20
|
+
},
|
21
|
+
};
|
22
|
+
}
|
23
|
+
return undefined;
|
24
|
+
};
|
25
|
+
|
26
|
+
export default resolveStorage();
|
@@ -44,6 +44,7 @@ export type SandboxCleanup = boolean | null;
|
|
44
44
|
export type LogSamples = boolean | null;
|
45
45
|
export type LogImages = boolean | null;
|
46
46
|
export type LogBuffer = number | null;
|
47
|
+
export type LogShared = number | null;
|
47
48
|
export type ScoreDisplay = boolean | null;
|
48
49
|
export type Type1 = "git";
|
49
50
|
export type Origin = string;
|
@@ -627,6 +628,7 @@ export interface EvalConfig {
|
|
627
628
|
log_samples: LogSamples;
|
628
629
|
log_images: LogImages;
|
629
630
|
log_buffer: LogBuffer;
|
631
|
+
log_shared: LogShared;
|
630
632
|
score_display: ScoreDisplay;
|
631
633
|
}
|
632
634
|
export interface ApprovalPolicyConfig {
|
@@ -1,49 +1,105 @@
|
|
1
|
+
import { StateSnapshot } from "react-virtuoso";
|
1
2
|
import {
|
3
|
+
AttachmentData,
|
2
4
|
EvalLogHeader,
|
3
5
|
EvalSummary,
|
6
|
+
EventData,
|
4
7
|
LogFiles,
|
8
|
+
PendingSamples,
|
5
9
|
SampleSummary,
|
6
10
|
} from "./api/types";
|
7
|
-
import {
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
11
|
+
import { ScorerInfo } from "./scoring/utils";
|
12
|
+
import {
|
13
|
+
ApprovalEvent,
|
14
|
+
ContentImage,
|
15
|
+
ContentText,
|
16
|
+
EvalSample,
|
17
|
+
InfoEvent,
|
18
|
+
LoggerEvent,
|
19
|
+
ModelEvent,
|
20
|
+
SampleInitEvent,
|
21
|
+
SampleLimitEvent,
|
22
|
+
SandboxEvent,
|
23
|
+
ScoreEvent,
|
24
|
+
StateEvent,
|
25
|
+
StepEvent,
|
26
|
+
StoreEvent,
|
27
|
+
SubtaskEvent,
|
28
|
+
ToolEvent,
|
29
|
+
} from "./types/log";
|
30
|
+
|
31
|
+
export interface AppState {
|
32
|
+
status: AppStatus;
|
33
|
+
offcanvas: boolean;
|
34
|
+
showFind: boolean;
|
35
|
+
tabs: {
|
36
|
+
workspace: string;
|
37
|
+
sample: string;
|
38
|
+
};
|
39
|
+
dialogs: {
|
40
|
+
sample: boolean;
|
41
|
+
};
|
42
|
+
scrollPositions: Record<string, number>;
|
43
|
+
listPositions: Record<string, StateSnapshot>;
|
44
|
+
collapsed: Record<string, boolean>;
|
45
|
+
messages: Record<string, boolean>;
|
46
|
+
propertyBags: Record<string, Record<string, unknown>>;
|
47
|
+
}
|
48
|
+
|
49
|
+
export interface LogsState {
|
50
|
+
logs: LogFiles;
|
51
|
+
logHeaders: Record<string, EvalLogHeader>;
|
52
|
+
headersLoading: boolean;
|
53
|
+
selectedLogIndex: number;
|
54
|
+
}
|
55
|
+
|
56
|
+
export interface LogState {
|
57
|
+
loadedLog?: string;
|
58
|
+
|
59
|
+
selectedSampleIndex: number;
|
60
|
+
selectedLogSummary?: EvalSummary;
|
61
|
+
pendingSampleSummaries?: PendingSamples;
|
62
|
+
|
63
|
+
filter: ScoreFilter;
|
64
|
+
epoch: string;
|
65
|
+
sort: string;
|
30
66
|
score?: ScoreLabel;
|
31
|
-
|
32
|
-
groupBy?: "none" | "epoch" | "sample";
|
33
|
-
groupByOrder?: "asc" | "desc";
|
34
|
-
workspaceTabScrollPosition?: Record<string, number>;
|
67
|
+
scores?: ScorerInfo[];
|
35
68
|
}
|
36
69
|
|
70
|
+
export type SampleStatus = "ok" | "loading" | "streaming" | "error";
|
71
|
+
|
72
|
+
export interface SampleState {
|
73
|
+
selectedSample: EvalSample | undefined;
|
74
|
+
sampleStatus: SampleStatus;
|
75
|
+
sampleError: Error | undefined;
|
76
|
+
|
77
|
+
// Events and attachments
|
78
|
+
runningEvents: Event[];
|
79
|
+
}
|
80
|
+
|
81
|
+
export type Event =
|
82
|
+
| SampleInitEvent
|
83
|
+
| SampleLimitEvent
|
84
|
+
| SandboxEvent
|
85
|
+
| StateEvent
|
86
|
+
| StoreEvent
|
87
|
+
| ModelEvent
|
88
|
+
| ToolEvent
|
89
|
+
| ApprovalEvent
|
90
|
+
| InputEvent
|
91
|
+
| ScoreEvent
|
92
|
+
| ErrorEvent
|
93
|
+
| LoggerEvent
|
94
|
+
| InfoEvent
|
95
|
+
| StepEvent
|
96
|
+
| SubtaskEvent;
|
97
|
+
|
37
98
|
export interface AppStatus {
|
38
99
|
loading: boolean;
|
39
100
|
error?: Error;
|
40
101
|
}
|
41
102
|
|
42
|
-
export interface Capabilities {
|
43
|
-
downloadFiles: boolean;
|
44
|
-
webWorkers: boolean;
|
45
|
-
}
|
46
|
-
|
47
103
|
export interface CurrentLog {
|
48
104
|
name: string;
|
49
105
|
contents: EvalSummary;
|
@@ -69,3 +125,9 @@ export interface ContentTool {
|
|
69
125
|
type: "tool";
|
70
126
|
content: (ContentImage | ContentText)[];
|
71
127
|
}
|
128
|
+
|
129
|
+
export interface RunningSampleData {
|
130
|
+
events: Map<string, EventData>;
|
131
|
+
attachments: Map<string, AttachmentData>;
|
132
|
+
summary?: SampleSummary;
|
133
|
+
}
|
@@ -1,40 +1,75 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
*/
|
4
|
-
export const resolveAttachments = (
|
5
|
-
value: any,
|
1
|
+
export const resolveAttachments = <T>(
|
2
|
+
value: T,
|
6
3
|
attachments: Record<string, string>,
|
7
|
-
):
|
8
|
-
const
|
9
|
-
const
|
4
|
+
): T => {
|
5
|
+
const CONTENT_PROTOCOL = "tc://";
|
6
|
+
const ATTACHMENT_PROTOCOL = "attachment://";
|
7
|
+
|
8
|
+
// Handle null or undefined early
|
9
|
+
if (value === null || value === undefined) {
|
10
|
+
return value;
|
11
|
+
}
|
10
12
|
|
11
13
|
// Handle arrays recursively
|
12
14
|
if (Array.isArray(value)) {
|
13
|
-
|
15
|
+
let hasChanged = false;
|
16
|
+
const resolvedArray = value.map((v) => {
|
17
|
+
const resolved = resolveAttachments(v, attachments);
|
18
|
+
if (resolved !== v) hasChanged = true;
|
19
|
+
return resolved;
|
20
|
+
});
|
21
|
+
|
22
|
+
// Only return the new array if something actually changed
|
23
|
+
return hasChanged ? (resolvedArray as unknown as T) : value;
|
14
24
|
}
|
15
25
|
|
16
|
-
// Handle objects recursively
|
17
|
-
if (
|
26
|
+
// Handle objects recursively, but skip Date instances and other special object types
|
27
|
+
if (
|
28
|
+
typeof value === "object" &&
|
29
|
+
!(value instanceof Date) &&
|
30
|
+
!(value instanceof RegExp)
|
31
|
+
) {
|
32
|
+
let hasChanged = false;
|
18
33
|
const resolvedObject: Record<string, unknown> = {};
|
19
|
-
|
20
|
-
|
34
|
+
|
35
|
+
for (const [key, val] of Object.entries(value)) {
|
36
|
+
const resolved = resolveAttachments(val, attachments);
|
37
|
+
resolvedObject[key] = resolved;
|
38
|
+
|
39
|
+
// Track if anything changed
|
40
|
+
if (resolved !== val) hasChanged = true;
|
21
41
|
}
|
22
|
-
|
42
|
+
|
43
|
+
// Only return the new object if something actually changed
|
44
|
+
return hasChanged ? (resolvedObject as unknown as T) : value;
|
23
45
|
}
|
24
46
|
|
25
47
|
// Handle string values with protocol references
|
26
48
|
if (typeof value === "string") {
|
27
|
-
|
28
|
-
if (
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
)
|
49
|
+
// Check if the string starts with the content protocol
|
50
|
+
if (value.startsWith(CONTENT_PROTOCOL)) {
|
51
|
+
const updatedValue = value.replace(CONTENT_PROTOCOL, ATTACHMENT_PROTOCOL);
|
52
|
+
|
53
|
+
// Now check if it's an attachment reference
|
54
|
+
if (updatedValue.startsWith(ATTACHMENT_PROTOCOL)) {
|
55
|
+
const attachmentId = updatedValue.slice(ATTACHMENT_PROTOCOL.length);
|
56
|
+
const attachment = attachments[attachmentId];
|
57
|
+
|
58
|
+
// Return the attachment content if it exists, otherwise return the original string
|
59
|
+
return (attachment !== undefined ? attachment : value) as unknown as T;
|
60
|
+
}
|
61
|
+
|
62
|
+
return updatedValue as unknown as T;
|
33
63
|
}
|
34
|
-
|
35
|
-
|
64
|
+
|
65
|
+
// Check if it's directly an attachment reference
|
66
|
+
if (value.startsWith(ATTACHMENT_PROTOCOL)) {
|
67
|
+
const attachmentId = value.slice(ATTACHMENT_PROTOCOL.length);
|
68
|
+
const attachment = attachments[attachmentId];
|
69
|
+
|
70
|
+
// Return the attachment content if it exists, otherwise return the original string
|
71
|
+
return (attachment !== undefined ? attachment : value) as unknown as T;
|
36
72
|
}
|
37
|
-
return resolvedValue;
|
38
73
|
}
|
39
74
|
|
40
75
|
// Return unchanged for other types
|
@@ -0,0 +1,52 @@
|
|
1
|
+
// This will be replaced at build time with a boolean value
|
2
|
+
declare const __DEV_WATCH__: boolean;
|
3
|
+
declare const __LOGGING_FILTER__: string;
|
4
|
+
|
5
|
+
const getEnabledNamespaces = () => {
|
6
|
+
// Split by comma and filter out empty strings
|
7
|
+
return __LOGGING_FILTER__
|
8
|
+
.split(",")
|
9
|
+
.map((ns) => ns.trim())
|
10
|
+
.filter(Boolean);
|
11
|
+
};
|
12
|
+
|
13
|
+
const ENABLED_NAMESPACES = new Set<string>(getEnabledNamespaces());
|
14
|
+
const filterNameSpace = (namespace: string) => {
|
15
|
+
if (ENABLED_NAMESPACES.has("*")) return true;
|
16
|
+
|
17
|
+
return ENABLED_NAMESPACES.has(namespace);
|
18
|
+
};
|
19
|
+
|
20
|
+
// Create a logger for a specific namespace
|
21
|
+
export const createLogger = (namespace: string) => {
|
22
|
+
// Logger functions that only activate in dev-watch mode
|
23
|
+
const logger = {
|
24
|
+
debug: (message: string, ...args: any[]) => {
|
25
|
+
if (__DEV_WATCH__ && filterNameSpace(namespace))
|
26
|
+
console.debug(`[${namespace}] ${message}`, ...args);
|
27
|
+
},
|
28
|
+
|
29
|
+
info: (message: string, ...args: any[]) => {
|
30
|
+
if (__DEV_WATCH__ && filterNameSpace(namespace))
|
31
|
+
console.info(`[${namespace}] ${message}`, ...args);
|
32
|
+
},
|
33
|
+
|
34
|
+
warn: (message: string, ...args: any[]) => {
|
35
|
+
if (__DEV_WATCH__ && filterNameSpace(namespace))
|
36
|
+
console.warn(`[${namespace}] ${message}`, ...args);
|
37
|
+
},
|
38
|
+
|
39
|
+
// Always log errors, even in production
|
40
|
+
error: (message: string, ...args: any[]) => {
|
41
|
+
console.error(`[${namespace}] ${message}`, ...args);
|
42
|
+
},
|
43
|
+
|
44
|
+
// Lazy evaluation for expensive logs
|
45
|
+
debugIf: (fn: () => string) => {
|
46
|
+
if (__DEV_WATCH__ && filterNameSpace(namespace))
|
47
|
+
console.debug(`[${namespace}] ${fn()}`);
|
48
|
+
},
|
49
|
+
};
|
50
|
+
|
51
|
+
return logger;
|
52
|
+
};
|