inspect-ai 0.3.80__py3-none-any.whl → 0.3.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. inspect_ai/_cli/eval.py +35 -2
  2. inspect_ai/_cli/util.py +44 -1
  3. inspect_ai/_display/core/config.py +1 -1
  4. inspect_ai/_display/core/display.py +13 -4
  5. inspect_ai/_display/core/results.py +1 -1
  6. inspect_ai/_display/textual/widgets/task_detail.py +5 -4
  7. inspect_ai/_eval/eval.py +38 -1
  8. inspect_ai/_eval/evalset.py +5 -0
  9. inspect_ai/_eval/run.py +5 -2
  10. inspect_ai/_eval/task/log.py +53 -6
  11. inspect_ai/_eval/task/run.py +51 -10
  12. inspect_ai/_util/constants.py +2 -0
  13. inspect_ai/_util/file.py +17 -1
  14. inspect_ai/_util/json.py +36 -1
  15. inspect_ai/_view/server.py +113 -1
  16. inspect_ai/_view/www/App.css +1 -1
  17. inspect_ai/_view/www/dist/assets/index.css +518 -296
  18. inspect_ai/_view/www/dist/assets/index.js +38803 -36307
  19. inspect_ai/_view/www/eslint.config.mjs +1 -1
  20. inspect_ai/_view/www/log-schema.json +13 -0
  21. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  22. inspect_ai/_view/www/package.json +8 -2
  23. inspect_ai/_view/www/src/App.tsx +151 -855
  24. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  25. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  26. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  27. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  28. inspect_ai/_view/www/src/api/types.ts +107 -2
  29. inspect_ai/_view/www/src/appearance/icons.ts +1 -0
  30. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  31. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  32. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  33. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  34. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  35. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  36. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  37. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  38. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  39. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  40. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -3
  41. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  42. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  43. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  44. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  45. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  46. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  47. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  48. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  49. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  50. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  51. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  52. inspect_ai/_view/www/src/index.tsx +26 -94
  53. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  54. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  55. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  56. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  57. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  58. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +67 -28
  59. inspect_ai/_view/www/src/samples/SampleDialog.tsx +51 -22
  60. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  61. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +144 -90
  62. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
  63. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +82 -35
  64. inspect_ai/_view/www/src/samples/SamplesTools.tsx +23 -30
  65. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  66. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  67. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  68. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +4 -1
  69. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +3 -0
  70. inspect_ai/_view/www/src/samples/chat/messages.ts +34 -0
  71. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  72. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +10 -1
  73. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  74. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +25 -17
  75. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +2 -1
  76. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  77. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +21 -3
  78. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +20 -1
  79. inspect_ai/_view/www/src/samples/list/SampleList.tsx +105 -85
  80. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  81. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +27 -14
  82. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  83. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  84. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  85. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  86. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  87. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +7 -9
  88. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +7 -11
  89. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  90. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  91. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  92. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  93. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  94. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  95. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  96. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  97. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  98. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  99. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  100. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  101. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +8 -13
  102. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  103. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  104. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  105. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  106. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +52 -58
  107. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  108. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  109. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +30 -1
  110. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  111. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  112. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  113. inspect_ai/_view/www/src/state/hooks.ts +397 -0
  114. inspect_ai/_view/www/src/state/logPolling.ts +196 -0
  115. inspect_ai/_view/www/src/state/logSlice.ts +214 -0
  116. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  117. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  118. inspect_ai/_view/www/src/state/samplePolling.ts +311 -0
  119. inspect_ai/_view/www/src/state/sampleSlice.ts +127 -0
  120. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  121. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  122. inspect_ai/_view/www/src/state/store.ts +168 -0
  123. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  124. inspect_ai/_view/www/src/state/utils.ts +23 -0
  125. inspect_ai/_view/www/src/storage/index.ts +26 -0
  126. inspect_ai/_view/www/src/types/log.d.ts +2 -0
  127. inspect_ai/_view/www/src/types.ts +94 -32
  128. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  129. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  130. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  131. inspect_ai/_view/www/src/utils/react.ts +30 -0
  132. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  133. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +181 -216
  134. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  135. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  136. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  137. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  138. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +0 -1
  139. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +98 -39
  140. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  141. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  142. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +11 -13
  143. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  144. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  145. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  146. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  147. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  148. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  149. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  150. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +110 -115
  151. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  152. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  153. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  154. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  155. inspect_ai/_view/www/vite.config.js +6 -0
  156. inspect_ai/_view/www/yarn.lock +370 -354
  157. inspect_ai/log/_condense.py +26 -0
  158. inspect_ai/log/_log.py +6 -3
  159. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  160. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  161. inspect_ai/log/_recorders/buffer/database.py +685 -0
  162. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  163. inspect_ai/log/_recorders/buffer/types.py +84 -0
  164. inspect_ai/log/_recorders/eval.py +2 -11
  165. inspect_ai/log/_recorders/types.py +30 -0
  166. inspect_ai/log/_transcript.py +27 -1
  167. inspect_ai/model/_call_tools.py +1 -0
  168. inspect_ai/model/_generate_config.py +2 -2
  169. inspect_ai/model/_model.py +1 -0
  170. inspect_ai/tool/_tool_support_helpers.py +4 -4
  171. inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -1
  172. inspect_ai/util/_subtask.py +1 -0
  173. {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/METADATA +2 -2
  174. {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/RECORD +178 -138
  175. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  176. {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/WHEEL +0 -0
  177. {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/entry_points.txt +0 -0
  178. {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/licenses/LICENSE +0 -0
  179. {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,168 @@
1
+ import { create, StoreApi, UseBoundStore } from "zustand";
2
+ import { devtools, persist } from "zustand/middleware";
3
+ import { immer } from "zustand/middleware/immer";
4
+ import { Capabilities, ClientAPI, ClientStorage } from "../api/types";
5
+ import { createLogger } from "../utils/logger";
6
+ import { AppSlice, createAppSlice, initializeAppSlice } from "./appSlice";
7
+ import { createLogSlice, initalializeLogSlice, LogSlice } from "./logSlice";
8
+ import { createLogsSlice, initializeLogsSlice, LogsSlice } from "./logsSlice";
9
+ import {
10
+ createSampleSlice,
11
+ initializeSampleSlice,
12
+ SampleSlice,
13
+ } from "./sampleSlice";
14
+ import { filterState } from "./store_filter";
15
+
16
+ const log = createLogger("store");
17
+
18
+ export interface StoreState extends AppSlice, LogsSlice, LogSlice, SampleSlice {
19
+ // The shared api
20
+ api?: ClientAPI | null;
21
+
22
+ // Global actions
23
+ initialize: (api: ClientAPI, capabilities: Capabilities) => void;
24
+ cleanup: () => void;
25
+ }
26
+
27
+ // The data that will actually be persisted
28
+ export type PersistedState = {
29
+ app: AppSlice["app"];
30
+ log: LogSlice["log"];
31
+ logs: LogsSlice["logs"];
32
+ sample: SampleSlice["sample"];
33
+ };
34
+
35
+ // The store implementation (this will be set when the store is initialized)
36
+ let storeImplementation: UseBoundStore<StoreApi<StoreState>> | null = null;
37
+
38
+ // Create a proxy store that forwards calls to the real store once initialized
39
+ export const useStore = ((selector?: any) => {
40
+ if (!storeImplementation) {
41
+ throw new Error(
42
+ "Store accessed before initialization. Call initializeStore first.",
43
+ );
44
+ }
45
+ return selector ? storeImplementation(selector) : storeImplementation();
46
+ }) as UseBoundStore<StoreApi<StoreState>>;
47
+
48
+ // Initialize the store
49
+ export const initializeStore = (
50
+ api: ClientAPI,
51
+ capabilities: Capabilities,
52
+ storage?: ClientStorage,
53
+ ) => {
54
+ // Create the storage implementation
55
+ const storageImplementation = {
56
+ getItem: <T>(name: string): T | null => {
57
+ return storage ? (storage.getItem(name) as T) : null;
58
+ },
59
+ setItem: <T>(name: string, value: T): void => {
60
+ if (storage) {
61
+ storage.setItem(name, value);
62
+ }
63
+ },
64
+ removeItem: (name: string): void => {
65
+ if (storage) {
66
+ storage.removeItem(name);
67
+ }
68
+ },
69
+ };
70
+
71
+ // Create the actual store
72
+ const store = create<StoreState>()(
73
+ devtools(
74
+ persist(
75
+ immer((set, get, store) => {
76
+ const [appSlice, appCleanup] = createAppSlice(
77
+ set as (fn: (state: StoreState) => void) => void,
78
+ get,
79
+ store,
80
+ );
81
+ const [logsSlice, logsCleanup] = createLogsSlice(
82
+ set as (fn: (state: StoreState) => void) => void,
83
+ get,
84
+ store,
85
+ );
86
+ const [logSlice, logCleanup] = createLogSlice(
87
+ set as (fn: (state: StoreState) => void) => void,
88
+ get,
89
+ store,
90
+ );
91
+ const [sampleSlice, sampleCleanup] = createSampleSlice(
92
+ set as (fn: (state: StoreState) => void) => void,
93
+ get,
94
+ store,
95
+ );
96
+
97
+ return {
98
+ // Shared state
99
+ api: null,
100
+
101
+ // Initialize
102
+ initialize: (api, capabilities) => {
103
+ set((state) => {
104
+ state.api = api;
105
+ });
106
+
107
+ // Initialize application slices
108
+ initializeAppSlice(
109
+ set as (fn: (state: StoreState) => void) => void,
110
+ capabilities,
111
+ );
112
+ initializeLogsSlice(
113
+ set as (fn: (state: StoreState) => void) => void,
114
+ );
115
+ initalializeLogSlice(
116
+ set as (fn: (state: StoreState) => void) => void,
117
+ );
118
+ initializeSampleSlice(
119
+ set as (fn: (state: StoreState) => void) => void,
120
+ );
121
+ },
122
+
123
+ // Create the slices and merge them in
124
+ ...appSlice,
125
+ ...logsSlice,
126
+ ...logSlice,
127
+ ...sampleSlice,
128
+
129
+ cleanup: () => {
130
+ appCleanup();
131
+ logsCleanup();
132
+ logCleanup();
133
+ sampleCleanup();
134
+ },
135
+ };
136
+ }),
137
+ {
138
+ name: "app-storage",
139
+ storage: storageImplementation,
140
+ partialize: (state) => {
141
+ const persisted: PersistedState = filterState({
142
+ app: state.app,
143
+ log: state.log,
144
+ logs: state.logs,
145
+ sample: state.sample,
146
+ });
147
+ return persisted as unknown as StoreState;
148
+ },
149
+ version: 1,
150
+ onRehydrateStorage: (state: StoreState) => {
151
+ return (hydrationState, error) => {
152
+ log.debug("REHYDRATING STATE");
153
+ if (error) {
154
+ log.debug("ERROR", { error });
155
+ } else {
156
+ log.debug("STATE", { state, hydrationState });
157
+ }
158
+ };
159
+ },
160
+ },
161
+ ),
162
+ ),
163
+ );
164
+
165
+ // Set the implementation and initialize it
166
+ storeImplementation = store as UseBoundStore<StoreApi<StoreState>>;
167
+ store.getState().initialize(api, capabilities);
168
+ };
@@ -0,0 +1,84 @@
1
+ import { PersistedState } from "./store";
2
+
3
+ export function filterState(state: PersistedState) {
4
+ if (!state) {
5
+ return state;
6
+ }
7
+
8
+ // When saving state, we can't store vast amounts of data (like a large sample)
9
+ const filters = [filterLargeSample, filterLargeLogSummary];
10
+ return filters.reduce(
11
+ (filteredState, filter) => filter(filteredState),
12
+ state,
13
+ );
14
+ }
15
+
16
+ // Filters the selected Sample if it is large
17
+ function filterLargeSample(state: PersistedState): PersistedState {
18
+ if (!state || !state.sample || !state.sample.selectedSample) {
19
+ return state;
20
+ }
21
+
22
+ const estimatedTotalSize = estimateSize(state.sample.selectedSample.messages);
23
+ if (estimatedTotalSize > 250000) {
24
+ return {
25
+ ...state,
26
+ sample: {
27
+ ...state.sample,
28
+ selectedSample: undefined,
29
+ },
30
+ };
31
+ } else {
32
+ return state;
33
+ }
34
+ }
35
+
36
+ // Filters the selectedlog if it is too large
37
+ function filterLargeLogSummary(state: PersistedState): PersistedState {
38
+ if (!state || !state.log || !state.log.selectedLogSummary) {
39
+ return state;
40
+ }
41
+
42
+ const estimatedSize = estimateSize(
43
+ state.log.selectedLogSummary.sampleSummaries,
44
+ );
45
+ if (estimatedSize > 250000) {
46
+ return {
47
+ ...state,
48
+ log: {
49
+ ...state.log,
50
+ selectedLogSummary: undefined,
51
+ },
52
+ };
53
+ } else {
54
+ return state;
55
+ }
56
+ }
57
+
58
+ function estimateSize(list: unknown[], frequency = 0.2) {
59
+ if (!list || list.length === 0) {
60
+ return 0;
61
+ }
62
+
63
+ // Total number of samples
64
+ const sampleSize = Math.ceil(list.length * frequency);
65
+
66
+ // Get a proper random sample without duplicates
67
+ const messageIndices = new Set<number>();
68
+ while (
69
+ messageIndices.size < sampleSize &&
70
+ messageIndices.size < list.length
71
+ ) {
72
+ const randomIndex = Math.floor(Math.random() * list.length);
73
+ messageIndices.add(randomIndex);
74
+ }
75
+
76
+ // Calculate size from sampled messages
77
+ const totalSize = Array.from(messageIndices).reduce((size, index) => {
78
+ return size + JSON.stringify(list[index]).length;
79
+ }, 0);
80
+
81
+ // Estimate total size based on sample
82
+ const estimatedTotalSize = (totalSize / sampleSize) * list.length;
83
+ return estimatedTotalSize;
84
+ }
@@ -0,0 +1,23 @@
1
+ import { SampleSummary } from "../api/types";
2
+
3
+ // Function to merge log samples with pending samples
4
+ export const mergeSampleSummaries = (
5
+ logSamples: SampleSummary[],
6
+ pendingSamples: SampleSummary[],
7
+ ) => {
8
+ // Create a map of existing sample IDs to avoid duplicates
9
+ const existingSampleIds = new Set(
10
+ logSamples.map((sample) => `${sample.id}-${sample.epoch}`),
11
+ );
12
+
13
+ // Filter out any pending samples that already exist in the log
14
+ const uniquePendingSamples = pendingSamples
15
+ .filter((sample) => !existingSampleIds.has(`${sample.id}-${sample.epoch}`))
16
+ .map((sample) => {
17
+ // Always mark pending items as incomplete to be sure we trigger polling
18
+ return { ...sample, completed: false };
19
+ });
20
+
21
+ // Combine and return all samples
22
+ return [...logSamples, ...uniquePendingSamples];
23
+ };
@@ -0,0 +1,26 @@
1
+ import { ClientStorage } from "../api/types";
2
+ import { PersistedState } from "../state/store";
3
+ import { getVscodeApi } from "../utils/vscode";
4
+
5
+ const resolveStorage = (): ClientStorage | undefined => {
6
+ const vscodeApi = getVscodeApi();
7
+ if (vscodeApi) {
8
+ return {
9
+ getItem: (_name: string) => {
10
+ const state = vscodeApi.getState() as PersistedState;
11
+ return state;
12
+ },
13
+ setItem: (_name: string, value: unknown) => {
14
+ // TODO: This is pretty gnarly type hijinks
15
+ const valObj = value as { state: PersistedState; version: number };
16
+ vscodeApi.setState(valObj);
17
+ },
18
+ removeItem: (_name: string) => {
19
+ vscodeApi.setState(null);
20
+ },
21
+ };
22
+ }
23
+ return undefined;
24
+ };
25
+
26
+ export default resolveStorage();
@@ -44,6 +44,7 @@ export type SandboxCleanup = boolean | null;
44
44
  export type LogSamples = boolean | null;
45
45
  export type LogImages = boolean | null;
46
46
  export type LogBuffer = number | null;
47
+ export type LogShared = number | null;
47
48
  export type ScoreDisplay = boolean | null;
48
49
  export type Type1 = "git";
49
50
  export type Origin = string;
@@ -627,6 +628,7 @@ export interface EvalConfig {
627
628
  log_samples: LogSamples;
628
629
  log_images: LogImages;
629
630
  log_buffer: LogBuffer;
631
+ log_shared: LogShared;
630
632
  score_display: ScoreDisplay;
631
633
  }
632
634
  export interface ApprovalPolicyConfig {
@@ -1,49 +1,105 @@
1
+ import { StateSnapshot } from "react-virtuoso";
1
2
  import {
3
+ AttachmentData,
2
4
  EvalLogHeader,
3
5
  EvalSummary,
6
+ EventData,
4
7
  LogFiles,
8
+ PendingSamples,
5
9
  SampleSummary,
6
10
  } from "./api/types";
7
- import { ContentImage, ContentText, EvalSample } from "./types/log";
8
-
9
- export interface ApplicationState {
10
- logs?: LogFiles;
11
- selectedLogIndex?: number;
12
- logHeaders?: Record<string, EvalLogHeader>;
13
- headersLoading?: boolean;
14
- selectedLog?: CurrentLog;
15
- selectedWorkspaceTab?: string;
16
- selectedSampleIndex?: number;
17
- selectedSample?: EvalSample;
18
- sampleStatus?: "loading" | "ok" | "error";
19
- sampleError?: Error;
20
- selectedSampleTab?: string;
21
- sampleScrollPosition?: number;
22
- showingSampleDialog?: boolean;
23
- status?: AppStatus;
24
- offcanvas?: boolean;
25
- showFind?: boolean;
26
- filter?: ScoreFilter;
27
- epoch?: string;
28
- sort?: string;
29
- scores?: ScoreLabel[];
11
+ import { ScorerInfo } from "./scoring/utils";
12
+ import {
13
+ ApprovalEvent,
14
+ ContentImage,
15
+ ContentText,
16
+ EvalSample,
17
+ InfoEvent,
18
+ LoggerEvent,
19
+ ModelEvent,
20
+ SampleInitEvent,
21
+ SampleLimitEvent,
22
+ SandboxEvent,
23
+ ScoreEvent,
24
+ StateEvent,
25
+ StepEvent,
26
+ StoreEvent,
27
+ SubtaskEvent,
28
+ ToolEvent,
29
+ } from "./types/log";
30
+
31
+ export interface AppState {
32
+ status: AppStatus;
33
+ offcanvas: boolean;
34
+ showFind: boolean;
35
+ tabs: {
36
+ workspace: string;
37
+ sample: string;
38
+ };
39
+ dialogs: {
40
+ sample: boolean;
41
+ };
42
+ scrollPositions: Record<string, number>;
43
+ listPositions: Record<string, StateSnapshot>;
44
+ collapsed: Record<string, boolean>;
45
+ messages: Record<string, boolean>;
46
+ propertyBags: Record<string, Record<string, unknown>>;
47
+ }
48
+
49
+ export interface LogsState {
50
+ logs: LogFiles;
51
+ logHeaders: Record<string, EvalLogHeader>;
52
+ headersLoading: boolean;
53
+ selectedLogIndex: number;
54
+ }
55
+
56
+ export interface LogState {
57
+ loadedLog?: string;
58
+
59
+ selectedSampleIndex: number;
60
+ selectedLogSummary?: EvalSummary;
61
+ pendingSampleSummaries?: PendingSamples;
62
+
63
+ filter: ScoreFilter;
64
+ epoch: string;
65
+ sort: string;
30
66
  score?: ScoreLabel;
31
- filteredSamples?: SampleSummary[];
32
- groupBy?: "none" | "epoch" | "sample";
33
- groupByOrder?: "asc" | "desc";
34
- workspaceTabScrollPosition?: Record<string, number>;
67
+ scores?: ScorerInfo[];
35
68
  }
36
69
 
70
+ export type SampleStatus = "ok" | "loading" | "streaming" | "error";
71
+
72
+ export interface SampleState {
73
+ selectedSample: EvalSample | undefined;
74
+ sampleStatus: SampleStatus;
75
+ sampleError: Error | undefined;
76
+
77
+ // Events and attachments
78
+ runningEvents: Event[];
79
+ }
80
+
81
+ export type Event =
82
+ | SampleInitEvent
83
+ | SampleLimitEvent
84
+ | SandboxEvent
85
+ | StateEvent
86
+ | StoreEvent
87
+ | ModelEvent
88
+ | ToolEvent
89
+ | ApprovalEvent
90
+ | InputEvent
91
+ | ScoreEvent
92
+ | ErrorEvent
93
+ | LoggerEvent
94
+ | InfoEvent
95
+ | StepEvent
96
+ | SubtaskEvent;
97
+
37
98
  export interface AppStatus {
38
99
  loading: boolean;
39
100
  error?: Error;
40
101
  }
41
102
 
42
- export interface Capabilities {
43
- downloadFiles: boolean;
44
- webWorkers: boolean;
45
- }
46
-
47
103
  export interface CurrentLog {
48
104
  name: string;
49
105
  contents: EvalSummary;
@@ -69,3 +125,9 @@ export interface ContentTool {
69
125
  type: "tool";
70
126
  content: (ContentImage | ContentText)[];
71
127
  }
128
+
129
+ export interface RunningSampleData {
130
+ events: Map<string, EventData>;
131
+ attachments: Map<string, AttachmentData>;
132
+ summary?: SampleSummary;
133
+ }
@@ -1,40 +1,75 @@
1
- /**
2
- * Resolves individual value by replacing protocol references with attachment content
3
- */
4
- export const resolveAttachments = (
5
- value: any,
1
+ export const resolveAttachments = <T>(
2
+ value: T,
6
3
  attachments: Record<string, string>,
7
- ): any => {
8
- const kContentProtocol = "tc://";
9
- const kAttachmentProtocol = "attachment://";
4
+ ): T => {
5
+ const CONTENT_PROTOCOL = "tc://";
6
+ const ATTACHMENT_PROTOCOL = "attachment://";
7
+
8
+ // Handle null or undefined early
9
+ if (value === null || value === undefined) {
10
+ return value;
11
+ }
10
12
 
11
13
  // Handle arrays recursively
12
14
  if (Array.isArray(value)) {
13
- return value.map((v) => resolveAttachments(v, attachments));
15
+ let hasChanged = false;
16
+ const resolvedArray = value.map((v) => {
17
+ const resolved = resolveAttachments(v, attachments);
18
+ if (resolved !== v) hasChanged = true;
19
+ return resolved;
20
+ });
21
+
22
+ // Only return the new array if something actually changed
23
+ return hasChanged ? (resolvedArray as unknown as T) : value;
14
24
  }
15
25
 
16
- // Handle objects recursively
17
- if (value && typeof value === "object") {
26
+ // Handle objects recursively, but skip Date instances and other special object types
27
+ if (
28
+ typeof value === "object" &&
29
+ !(value instanceof Date) &&
30
+ !(value instanceof RegExp)
31
+ ) {
32
+ let hasChanged = false;
18
33
  const resolvedObject: Record<string, unknown> = {};
19
- for (const key of Object.keys(value)) {
20
- resolvedObject[key] = resolveAttachments(value[key], attachments);
34
+
35
+ for (const [key, val] of Object.entries(value)) {
36
+ const resolved = resolveAttachments(val, attachments);
37
+ resolvedObject[key] = resolved;
38
+
39
+ // Track if anything changed
40
+ if (resolved !== val) hasChanged = true;
21
41
  }
22
- return resolvedObject;
42
+
43
+ // Only return the new object if something actually changed
44
+ return hasChanged ? (resolvedObject as unknown as T) : value;
23
45
  }
24
46
 
25
47
  // Handle string values with protocol references
26
48
  if (typeof value === "string") {
27
- let resolvedValue = value;
28
- if (resolvedValue.startsWith(kContentProtocol)) {
29
- resolvedValue = resolvedValue.replace(
30
- kContentProtocol,
31
- kAttachmentProtocol,
32
- );
49
+ // Check if the string starts with the content protocol
50
+ if (value.startsWith(CONTENT_PROTOCOL)) {
51
+ const updatedValue = value.replace(CONTENT_PROTOCOL, ATTACHMENT_PROTOCOL);
52
+
53
+ // Now check if it's an attachment reference
54
+ if (updatedValue.startsWith(ATTACHMENT_PROTOCOL)) {
55
+ const attachmentId = updatedValue.slice(ATTACHMENT_PROTOCOL.length);
56
+ const attachment = attachments[attachmentId];
57
+
58
+ // Return the attachment content if it exists, otherwise return the original string
59
+ return (attachment !== undefined ? attachment : value) as unknown as T;
60
+ }
61
+
62
+ return updatedValue as unknown as T;
33
63
  }
34
- if (resolvedValue.startsWith(kAttachmentProtocol)) {
35
- return attachments[resolvedValue.replace(kAttachmentProtocol, "")];
64
+
65
+ // Check if it's directly an attachment reference
66
+ if (value.startsWith(ATTACHMENT_PROTOCOL)) {
67
+ const attachmentId = value.slice(ATTACHMENT_PROTOCOL.length);
68
+ const attachment = attachments[attachmentId];
69
+
70
+ // Return the attachment content if it exists, otherwise return the original string
71
+ return (attachment !== undefined ? attachment : value) as unknown as T;
36
72
  }
37
- return resolvedValue;
38
73
  }
39
74
 
40
75
  // Return unchanged for other types
@@ -0,0 +1,52 @@
1
+ // This will be replaced at build time with a boolean value
2
+ declare const __DEV_WATCH__: boolean;
3
+ declare const __LOGGING_FILTER__: string;
4
+
5
+ const getEnabledNamespaces = () => {
6
+ // Split by comma and filter out empty strings
7
+ return __LOGGING_FILTER__
8
+ .split(",")
9
+ .map((ns) => ns.trim())
10
+ .filter(Boolean);
11
+ };
12
+
13
+ const ENABLED_NAMESPACES = new Set<string>(getEnabledNamespaces());
14
+ const filterNameSpace = (namespace: string) => {
15
+ if (ENABLED_NAMESPACES.has("*")) return true;
16
+
17
+ return ENABLED_NAMESPACES.has(namespace);
18
+ };
19
+
20
+ // Create a logger for a specific namespace
21
+ export const createLogger = (namespace: string) => {
22
+ // Logger functions that only activate in dev-watch mode
23
+ const logger = {
24
+ debug: (message: string, ...args: any[]) => {
25
+ if (__DEV_WATCH__ && filterNameSpace(namespace))
26
+ console.debug(`[${namespace}] ${message}`, ...args);
27
+ },
28
+
29
+ info: (message: string, ...args: any[]) => {
30
+ if (__DEV_WATCH__ && filterNameSpace(namespace))
31
+ console.info(`[${namespace}] ${message}`, ...args);
32
+ },
33
+
34
+ warn: (message: string, ...args: any[]) => {
35
+ if (__DEV_WATCH__ && filterNameSpace(namespace))
36
+ console.warn(`[${namespace}] ${message}`, ...args);
37
+ },
38
+
39
+ // Always log errors, even in production
40
+ error: (message: string, ...args: any[]) => {
41
+ console.error(`[${namespace}] ${message}`, ...args);
42
+ },
43
+
44
+ // Lazy evaluation for expensive logs
45
+ debugIf: (fn: () => string) => {
46
+ if (__DEV_WATCH__ && filterNameSpace(namespace))
47
+ console.debug(`[${namespace}] ${fn()}`);
48
+ },
49
+ };
50
+
51
+ return logger;
52
+ };