inspect-ai 0.3.96__py3-none-any.whl → 0.3.97__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. inspect_ai/_eval/eval.py +10 -2
  2. inspect_ai/_eval/task/util.py +32 -3
  3. inspect_ai/_util/registry.py +7 -0
  4. inspect_ai/_util/timer.py +13 -0
  5. inspect_ai/_view/www/dist/assets/index.css +275 -195
  6. inspect_ai/_view/www/dist/assets/index.js +8568 -7376
  7. inspect_ai/_view/www/src/app/App.css +1 -0
  8. inspect_ai/_view/www/src/app/App.tsx +27 -10
  9. inspect_ai/_view/www/src/app/appearance/icons.ts +5 -0
  10. inspect_ai/_view/www/src/app/content/RecordTree.module.css +22 -0
  11. inspect_ai/_view/www/src/app/content/RecordTree.tsx +370 -0
  12. inspect_ai/_view/www/src/app/content/RenderedContent.module.css +5 -0
  13. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +32 -19
  14. inspect_ai/_view/www/src/app/content/record_processors/store.ts +101 -0
  15. inspect_ai/_view/www/src/app/content/record_processors/types.ts +3 -0
  16. inspect_ai/_view/www/src/app/content/types.ts +5 -0
  17. inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -0
  18. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +35 -28
  19. inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +1 -8
  20. inspect_ai/_view/www/src/app/log-view/navbar/PrimaryBar.tsx +2 -4
  21. inspect_ai/_view/www/src/app/log-view/navbar/ResultsPanel.tsx +13 -3
  22. inspect_ai/_view/www/src/app/log-view/navbar/ScoreGrid.module.css +15 -0
  23. inspect_ai/_view/www/src/app/log-view/navbar/ScoreGrid.tsx +14 -10
  24. inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +9 -3
  25. inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +1 -3
  26. inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +8 -2
  27. inspect_ai/_view/www/src/app/log-view/types.ts +1 -0
  28. inspect_ai/_view/www/src/app/plan/ModelCard.module.css +7 -0
  29. inspect_ai/_view/www/src/app/plan/ModelCard.tsx +5 -2
  30. inspect_ai/_view/www/src/app/plan/PlanCard.tsx +13 -8
  31. inspect_ai/_view/www/src/app/routing/navigationHooks.ts +63 -8
  32. inspect_ai/_view/www/src/app/routing/url.ts +45 -0
  33. inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +2 -1
  34. inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.tsx +15 -8
  35. inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +3 -0
  36. inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +16 -5
  37. inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +9 -1
  38. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +68 -31
  39. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +12 -7
  40. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -5
  41. inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.module.css +9 -0
  42. inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +48 -18
  43. inspect_ai/_view/www/src/app/samples/chat/ChatView.tsx +0 -1
  44. inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.module.css +4 -0
  45. inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +41 -1
  46. inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -0
  47. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +0 -3
  48. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -1
  49. inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +1 -1
  50. inspect_ai/_view/www/src/app/samples/chat/tools/ToolOutput.module.css +1 -1
  51. inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +5 -1
  52. inspect_ai/_view/www/src/app/samples/descriptor/score/PassFailScoreDescriptor.tsx +11 -6
  53. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +7 -0
  54. inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +5 -18
  55. inspect_ai/_view/www/src/app/samples/sample-tools/SortFilter.tsx +1 -1
  56. inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.tsx +18 -5
  57. inspect_ai/_view/www/src/app/samples/scores/SampleScoresView.module.css +0 -6
  58. inspect_ai/_view/www/src/app/samples/scores/SampleScoresView.tsx +4 -1
  59. inspect_ai/_view/www/src/app/samples/transcript/ApprovalEventView.tsx +4 -2
  60. inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +6 -4
  61. inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.module.css +1 -1
  62. inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +13 -6
  63. inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +6 -4
  64. inspect_ai/_view/www/src/app/samples/transcript/LoggerEventView.tsx +4 -2
  65. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +11 -8
  66. inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +14 -8
  67. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +13 -8
  68. inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +25 -16
  69. inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +7 -5
  70. inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +11 -28
  71. inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +12 -20
  72. inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +12 -31
  73. inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +25 -29
  74. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +297 -0
  75. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +0 -8
  76. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +43 -25
  77. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +43 -0
  78. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +109 -43
  79. inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +19 -8
  80. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +128 -60
  81. inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +14 -4
  82. inspect_ai/_view/www/src/app/samples/transcript/types.ts +6 -4
  83. inspect_ai/_view/www/src/app/types.ts +12 -1
  84. inspect_ai/_view/www/src/components/Card.css +6 -3
  85. inspect_ai/_view/www/src/components/Card.tsx +15 -2
  86. inspect_ai/_view/www/src/components/CopyButton.tsx +4 -6
  87. inspect_ai/_view/www/src/components/ExpandablePanel.module.css +20 -14
  88. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +17 -22
  89. inspect_ai/_view/www/src/components/LargeModal.tsx +5 -1
  90. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +25 -1
  91. inspect_ai/_view/www/src/components/MarkdownDiv.css +4 -0
  92. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +2 -2
  93. inspect_ai/_view/www/src/components/TabSet.module.css +6 -1
  94. inspect_ai/_view/www/src/components/TabSet.tsx +8 -2
  95. inspect_ai/_view/www/src/state/hooks.ts +83 -13
  96. inspect_ai/_view/www/src/state/logPolling.ts +2 -2
  97. inspect_ai/_view/www/src/state/logSlice.ts +1 -2
  98. inspect_ai/_view/www/src/state/logsSlice.ts +9 -9
  99. inspect_ai/_view/www/src/state/samplePolling.ts +1 -1
  100. inspect_ai/_view/www/src/state/sampleSlice.ts +134 -7
  101. inspect_ai/_view/www/src/state/scoring.ts +1 -1
  102. inspect_ai/_view/www/src/state/scrolling.ts +39 -6
  103. inspect_ai/_view/www/src/state/store.ts +5 -0
  104. inspect_ai/_view/www/src/state/store_filter.ts +47 -44
  105. inspect_ai/_view/www/src/utils/debugging.ts +95 -0
  106. inspect_ai/_view/www/src/utils/format.ts +2 -2
  107. inspect_ai/_view/www/src/utils/json.ts +29 -0
  108. inspect_ai/agent/__init__.py +2 -1
  109. inspect_ai/agent/_agent.py +12 -0
  110. inspect_ai/agent/_react.py +184 -48
  111. inspect_ai/agent/_types.py +14 -1
  112. inspect_ai/analysis/beta/__init__.py +0 -2
  113. inspect_ai/analysis/beta/_dataframe/columns.py +11 -16
  114. inspect_ai/analysis/beta/_dataframe/evals/table.py +65 -40
  115. inspect_ai/analysis/beta/_dataframe/events/table.py +24 -36
  116. inspect_ai/analysis/beta/_dataframe/messages/table.py +24 -15
  117. inspect_ai/analysis/beta/_dataframe/progress.py +35 -5
  118. inspect_ai/analysis/beta/_dataframe/record.py +13 -9
  119. inspect_ai/analysis/beta/_dataframe/samples/columns.py +1 -1
  120. inspect_ai/analysis/beta/_dataframe/samples/table.py +156 -46
  121. inspect_ai/analysis/beta/_dataframe/util.py +14 -12
  122. inspect_ai/model/_call_tools.py +1 -1
  123. inspect_ai/model/_providers/anthropic.py +18 -5
  124. inspect_ai/model/_providers/azureai.py +7 -2
  125. inspect_ai/model/_providers/util/llama31.py +3 -3
  126. {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.97.dist-info}/METADATA +1 -1
  127. {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.97.dist-info}/RECORD +131 -126
  128. {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.97.dist-info}/WHEEL +1 -1
  129. inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.module.css +0 -48
  130. inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +0 -276
  131. {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.97.dist-info}/entry_points.txt +0 -0
  132. {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.97.dist-info}/licenses/LICENSE +0 -0
  133. {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.97.dist-info}/top_level.txt +0 -0
@@ -4,20 +4,35 @@ import { SampleSummary } from "../client/api/types";
4
4
  import { kSampleMessagesTabId } from "../constants";
5
5
  import { createLogger } from "../utils/logger";
6
6
  import { createSamplePolling } from "./samplePolling";
7
- import { resolveSample } from "./sampleUtils"; // Import the shared utility
7
+ import { resolveSample } from "./sampleUtils";
8
8
  import { StoreState } from "./store";
9
+ import { isLargeSample } from "./store_filter";
9
10
 
10
11
  const log = createLogger("sampleSlice");
11
12
 
13
+ // Create a module-level ref to store large sample objects
14
+ let selectedSampleRef: { current: EvalSample | undefined } = {
15
+ current: undefined,
16
+ };
17
+
12
18
  export interface SampleSlice {
13
19
  sample: SampleState;
14
20
  sampleActions: {
15
21
  // The actual sample data
16
22
  setSelectedSample: (sample: EvalSample) => void;
23
+ getSelectedSample: () => EvalSample | undefined;
17
24
  clearSelectedSample: () => void;
18
25
  setSampleStatus: (status: SampleStatus) => void;
19
26
  setSampleError: (error: Error | undefined) => void;
20
27
 
28
+ setCollapsedEvents: (collapsed: Record<string, true>) => void;
29
+ collapseEvent: (id: string, collapsed: boolean) => void;
30
+ clearCollapsedEvents: () => void;
31
+
32
+ setCollapsedIds: (key: string, collapsed: Record<string, true>) => void;
33
+ collapseId: (key: string, id: string, collapsed: boolean) => void;
34
+ clearCollapsedIds: (key: string) => void;
35
+
21
36
  // Loading
22
37
  loadSample: (
23
38
  logFile: string,
@@ -32,12 +47,23 @@ export interface SampleSlice {
32
47
  }
33
48
 
34
49
  const initialState: SampleState = {
35
- selectedSample: undefined,
50
+ // Store ID for all samples (used for triggering renders)
51
+ sample_identifier: undefined,
52
+ // Store the actual sample object for small samples
53
+ selectedSampleObject: undefined,
54
+ // Flag to indicate where the sample is stored
55
+ sampleInState: false,
36
56
  sampleStatus: "ok",
37
57
  sampleError: undefined,
38
58
 
59
+ // signals that the sample needs to be reloaded
60
+ sampleNeedsReload: 0,
61
+
39
62
  // The resolved events
40
63
  runningEvents: [],
64
+ collapsedEvents: null,
65
+
66
+ collapsedIdBuckets: {},
41
67
  };
42
68
 
43
69
  export const createSampleSlice = (
@@ -53,18 +79,49 @@ export const createSampleSlice = (
53
79
  sample: initialState,
54
80
  sampleActions: {
55
81
  setSelectedSample: (sample: EvalSample) => {
82
+ const isLarge = isLargeSample(sample);
83
+
84
+ // Update state based on sample size
56
85
  set((state) => {
57
- state.sample.selectedSample = sample;
86
+ state.sample.sample_identifier = {
87
+ id: sample.id,
88
+ epoch: sample.epoch,
89
+ };
90
+ state.sample.sampleInState = !isLarge;
91
+
92
+ // Only store in state if it's small
93
+ if (!isLarge) {
94
+ state.sample.selectedSampleObject = sample;
95
+ // Clear ref if using state
96
+ selectedSampleRef.current = undefined;
97
+ } else {
98
+ // Use ref for large objects
99
+ state.sample.selectedSampleObject = undefined;
100
+ selectedSampleRef.current = sample;
101
+ }
58
102
  });
103
+
59
104
  if (sample.events.length < 1) {
60
105
  // If there are no events, use the messages tab as the default
61
106
  get().appActions.setSampleTab(kSampleMessagesTabId);
62
107
  }
63
108
  },
64
- clearSelectedSample: () =>
109
+ getSelectedSample: () => {
110
+ const state = get().sample;
111
+ // Return from state if stored there, otherwise from ref
112
+ return state.sampleInState
113
+ ? state.selectedSampleObject
114
+ : selectedSampleRef.current;
115
+ },
116
+ clearSelectedSample: () => {
117
+ // Clear both the ref and the state
118
+ selectedSampleRef.current = undefined;
65
119
  set((state) => {
66
- state.sample.selectedSample = undefined;
67
- }),
120
+ state.sample.sample_identifier = undefined;
121
+ state.sample.selectedSampleObject = undefined;
122
+ state.sample.sampleInState = false;
123
+ });
124
+ },
68
125
  setSampleStatus: (status: SampleStatus) =>
69
126
  set((state) => {
70
127
  state.sample.sampleStatus = status;
@@ -73,10 +130,59 @@ export const createSampleSlice = (
73
130
  set((state) => {
74
131
  state.sample.sampleError = error;
75
132
  }),
133
+ setCollapsedEvents: (collapsed: Record<string, true>) => {
134
+ set((state) => {
135
+ state.sample.collapsedEvents = collapsed;
136
+ });
137
+ },
138
+ clearCollapsedEvents: () => {
139
+ set((state) => {
140
+ state.sample.collapsedEvents = null;
141
+ });
142
+ },
143
+ collapseEvent: (id: string, collapsed: boolean) => {
144
+ set((state) => {
145
+ if (state.sample.collapsedEvents === null) {
146
+ state.sample.collapsedEvents = {};
147
+ }
148
+ if (collapsed) {
149
+ state.sample.collapsedEvents[id] = true;
150
+ } else {
151
+ delete state.sample.collapsedEvents[id];
152
+ }
153
+ });
154
+ },
155
+ setCollapsedIds: (key: string, collapsed: Record<string, true>) => {
156
+ set((state) => {
157
+ state.sample.collapsedIdBuckets[key] = collapsed;
158
+ });
159
+ },
160
+ collapseId: (key: string, id: string, collapsed: boolean) => {
161
+ set((state) => {
162
+ if (state.sample.collapsedIdBuckets[key] === undefined) {
163
+ state.sample.collapsedIdBuckets[key] = {};
164
+ }
165
+ if (collapsed) {
166
+ state.sample.collapsedIdBuckets[key][id] = true;
167
+ } else {
168
+ delete state.sample.collapsedIdBuckets[key][id];
169
+ }
170
+ });
171
+ },
172
+ clearCollapsedIds: (key: string) => {
173
+ set((state) => {
174
+ delete state.sample.collapsedIdBuckets[key];
175
+ });
176
+ },
177
+
76
178
  pollSample: async (logFile: string, sampleSummary: SampleSummary) => {
77
179
  // Poll running sample
78
180
  const state = get();
79
- if (state.log.loadedLog && state.sample.selectedSample) {
181
+ const sampleExists = state.sample.sampleInState
182
+ ? !!state.sample.selectedSampleObject
183
+ : !!selectedSampleRef.current;
184
+
185
+ if (state.log.loadedLog && sampleExists) {
80
186
  samplePolling.startPolling(logFile, sampleSummary);
81
187
  }
82
188
  },
@@ -85,6 +191,8 @@ export const createSampleSlice = (
85
191
 
86
192
  sampleActions.setSampleError(undefined);
87
193
  sampleActions.setSampleStatus("loading");
194
+ const state = get();
195
+
88
196
  try {
89
197
  if (sampleSummary.completed !== false) {
90
198
  log.debug(
@@ -95,8 +203,18 @@ export const createSampleSlice = (
95
203
  sampleSummary.id,
96
204
  sampleSummary.epoch,
97
205
  );
206
+ log.debug(
207
+ `LOADED COMPLETED SAMPLE: ${sampleSummary.id}-${sampleSummary.epoch}`,
208
+ );
98
209
  if (sample) {
99
210
  const migratedSample = resolveSample(sample);
211
+
212
+ if (
213
+ state.sample.sample_identifier?.id !== sample.id &&
214
+ state.sample.sample_identifier?.epoch !== sample.epoch
215
+ ) {
216
+ sampleActions.clearCollapsedEvents();
217
+ }
100
218
  sampleActions.setSelectedSample(migratedSample);
101
219
  sampleActions.setSampleStatus("ok");
102
220
  } else {
@@ -123,10 +241,19 @@ export const createSampleSlice = (
123
241
 
124
242
  const cleanup = () => {
125
243
  samplePolling.cleanup();
244
+ // Clear the ref when cleaning up
245
+ selectedSampleRef.current = undefined;
126
246
  };
127
247
  return [slice, cleanup];
128
248
  };
129
249
 
250
+ export const handleRehydrate = (state: StoreState) => {
251
+ // Increment the reload counter if the sample is not in state
252
+ if (!state.sample.sampleInState) {
253
+ state.sample.sampleNeedsReload = state.sample.sampleNeedsReload + 1;
254
+ }
255
+ };
256
+
130
257
  export const initializeSampleSlice = (
131
258
  set: (fn: (state: StoreState) => void) => void,
132
259
  ) => {
@@ -37,7 +37,7 @@ const getScorersFromResults = (results?: EvalResults): ScorerInfo[] => {
37
37
  const getScorersFromSamples = (samples: SampleSummary[]): ScorerInfo[] => {
38
38
  // Find a sample with scores
39
39
  const scoredSample = samples.find((sample) => {
40
- return !!sample.scores;
40
+ return !sample.error && sample.completed && !!sample.scores;
41
41
  });
42
42
 
43
43
  return Object.keys(scoredSample?.scores || {}).map((key) => ({
@@ -11,7 +11,7 @@ export function useStatefulScrollPosition<
11
11
  >(
12
12
  elementRef: RefObject<T | null>,
13
13
  elementKey: string,
14
- delay = 500,
14
+ delay = 1000,
15
15
  scrollable = true,
16
16
  ) {
17
17
  const getScrollPosition = useStore(
@@ -62,12 +62,45 @@ export function useStatefulScrollPosition<
62
62
  const savedPosition = getScrollPosition(elementKey);
63
63
  if (savedPosition !== undefined) {
64
64
  log.debug(`Restoring scroll position`, savedPosition);
65
- // Ensure the element has fully rendered
66
- requestAnimationFrame(() => {
67
- if (element.scrollTop !== savedPosition) {
68
- element.scrollTop = savedPosition;
65
+
66
+ // Function to check and restore scroll position
67
+ const tryRestoreScroll = () => {
68
+ // Check if element has content to scroll (scrollHeight > clientHeight)
69
+ if (element.scrollHeight > element.clientHeight) {
70
+ if (element.scrollTop !== savedPosition) {
71
+ element.scrollTop = savedPosition;
72
+ log.debug(`Scroll position restored to ${savedPosition}`);
73
+ }
74
+ return true; // Successfully restored
69
75
  }
70
- });
76
+ return false; // Not ready yet
77
+ };
78
+
79
+ // Try immediately once
80
+ if (!tryRestoreScroll()) {
81
+ // If not successful, set up polling with setTimeout for 1-second intervals
82
+ let attempts = 0;
83
+ const maxAttempts = 5; // Fewer attempts since we're waiting longer
84
+
85
+ const pollForRender = () => {
86
+ if (tryRestoreScroll() || attempts >= maxAttempts) {
87
+ // Either success or max attempts reached
88
+ if (attempts >= maxAttempts) {
89
+ log.debug(
90
+ `Failed to restore scroll after ${maxAttempts} attempts`,
91
+ );
92
+ }
93
+ return;
94
+ }
95
+
96
+ attempts++;
97
+ // Wait 1 second before trying again
98
+ setTimeout(pollForRender, 1000);
99
+ };
100
+
101
+ // Start polling after 1 second
102
+ setTimeout(pollForRender, 1000);
103
+ }
71
104
  }
72
105
 
73
106
  // Set up scroll listener
@@ -1,3 +1,4 @@
1
+ import { enableMapSet } from "immer";
1
2
  import { create, StoreApi, UseBoundStore } from "zustand";
2
3
  import { devtools, persist } from "zustand/middleware";
3
4
  import { immer } from "zustand/middleware/immer";
@@ -9,6 +10,7 @@ import { createLogSlice, initalializeLogSlice, LogSlice } from "./logSlice";
9
10
  import { createLogsSlice, initializeLogsSlice, LogsSlice } from "./logsSlice";
10
11
  import {
11
12
  createSampleSlice,
13
+ handleRehydrate,
12
14
  initializeSampleSlice,
13
15
  SampleSlice,
14
16
  } from "./sampleSlice";
@@ -52,6 +54,8 @@ export const initializeStore = (
52
54
  capabilities: Capabilities,
53
55
  storage?: ClientStorage,
54
56
  ) => {
57
+ enableMapSet();
58
+
55
59
  // Create the storage implementation
56
60
  const storageImplementation = {
57
61
  getItem: <T>(name: string): T | null => {
@@ -151,6 +155,7 @@ export const initializeStore = (
151
155
  version: 1,
152
156
  onRehydrateStorage: (state: StoreState) => {
153
157
  return (hydrationState, error) => {
158
+ handleRehydrate(state);
154
159
  log.debug("REHYDRATING STATE");
155
160
  if (error) {
156
161
  log.debug("ERROR", { error });
@@ -1,3 +1,5 @@
1
+ import { EvalSample } from "../@types/log";
2
+ import { estimateSize } from "../utils/json";
1
3
  import { PersistedState } from "./store";
2
4
 
3
5
  export function filterState(state: PersistedState) {
@@ -6,31 +8,60 @@ export function filterState(state: PersistedState) {
6
8
  }
7
9
 
8
10
  // When saving state, we can't store vast amounts of data (like a large sample)
9
- const filters = [filterLargeSample, filterLargeLogSummary];
11
+ const filters = [filterLargeLogSummary];
10
12
  return filters.reduce(
11
13
  (filteredState, filter) => filter(filteredState),
12
14
  state,
13
15
  );
14
16
  }
15
17
 
16
- // Filters the selected Sample if it is large
17
- function filterLargeSample(state: PersistedState): PersistedState {
18
- if (!state || !state.sample || !state.sample.selectedSample) {
19
- return state;
18
+ export function isLargeSample(sample: EvalSample): boolean {
19
+ const storeKeys = countKeys(sample.store);
20
+ if (storeKeys > 5000) {
21
+ return true;
20
22
  }
21
23
 
22
- const estimatedTotalSize = estimateSize(state.sample.selectedSample.messages);
23
- if (estimatedTotalSize > 250000) {
24
- return {
25
- ...state,
26
- sample: {
27
- ...state.sample,
28
- selectedSample: undefined,
29
- },
30
- };
31
- } else {
32
- return state;
24
+ const estimatedMessageSize = estimateSize(sample.messages);
25
+ if (estimatedMessageSize > 250000) {
26
+ return true;
33
27
  }
28
+
29
+ return true;
30
+ }
31
+
32
+ function countKeys(obj: unknown, options = { countArrayIndices: false }) {
33
+ // Base case: not an object or null
34
+ if (obj === null || typeof obj !== "object") {
35
+ return 0;
36
+ }
37
+
38
+ // Handle arrays
39
+ if (Array.isArray(obj)) {
40
+ let count = 0;
41
+ // Count array indices as keys if option is set
42
+ if (options.countArrayIndices) {
43
+ count += obj.length;
44
+ }
45
+ // Count keys in array elements that are objects
46
+ for (const item of obj) {
47
+ count += countKeys(item, options);
48
+ }
49
+ return count;
50
+ }
51
+
52
+ // For regular objects, count all own properties
53
+ let count = Object.keys(obj).length;
54
+
55
+ // Recursively count keys in nested objects
56
+ for (const key in obj) {
57
+ // Use type assertion to tell TypeScript that the key is valid
58
+ if (Object.prototype.hasOwnProperty.call(obj, key)) {
59
+ // Use type assertion (obj as Record<string, unknown>)
60
+ count += countKeys((obj as Record<string, unknown>)[key], options);
61
+ }
62
+ }
63
+
64
+ return count;
34
65
  }
35
66
 
36
67
  // Filters the selectedlog if it is too large
@@ -54,31 +85,3 @@ function filterLargeLogSummary(state: PersistedState): PersistedState {
54
85
  return state;
55
86
  }
56
87
  }
57
-
58
- function estimateSize(list: unknown[], frequency = 0.2) {
59
- if (!list || list.length === 0) {
60
- return 0;
61
- }
62
-
63
- // Total number of samples
64
- const sampleSize = Math.ceil(list.length * frequency);
65
-
66
- // Get a proper random sample without duplicates
67
- const messageIndices = new Set<number>();
68
- while (
69
- messageIndices.size < sampleSize &&
70
- messageIndices.size < list.length
71
- ) {
72
- const randomIndex = Math.floor(Math.random() * list.length);
73
- messageIndices.add(randomIndex);
74
- }
75
-
76
- // Calculate size from sampled messages
77
- const totalSize = Array.from(messageIndices).reduce((size, index) => {
78
- return size + JSON.stringify(list[index]).length;
79
- }, 0);
80
-
81
- // Estimate total size based on sample
82
- const estimatedTotalSize = (totalSize / sampleSize) * list.length;
83
- return estimatedTotalSize;
84
- }
@@ -26,3 +26,98 @@ export function printCircularReferences(obj: Record<string, unknown>): void {
26
26
 
27
27
  detect(obj, "root");
28
28
  }
29
+
30
+ export function findDifferences(
31
+ obj1: unknown,
32
+ obj2: unknown,
33
+ path = "",
34
+ ): string[] {
35
+ // Helper to build a readable path string
36
+ const makePath = (parent: string, key: string | number, isIndex = false) =>
37
+ parent
38
+ ? isIndex
39
+ ? `${parent}[${key}]`
40
+ : `${parent}.${key}`
41
+ : isIndex
42
+ ? `[${key}]`
43
+ : `${key}`;
44
+
45
+ // Primitive / simple equality check (Object.is handles NaN)
46
+ if (Object.is(obj1, obj2)) return [];
47
+
48
+ // Primitives or null → direct difference
49
+ if (
50
+ obj1 === null ||
51
+ obj2 === null ||
52
+ typeof obj1 !== "object" ||
53
+ typeof obj2 !== "object"
54
+ ) {
55
+ return [
56
+ `${path || "<root>"}: ${JSON.stringify(obj1)} → ${JSON.stringify(obj2)}`,
57
+ ];
58
+ }
59
+
60
+ // --- Arrays --------------------------------------------------------------
61
+ const isArr1 = Array.isArray(obj1);
62
+ const isArr2 = Array.isArray(obj2);
63
+ if (isArr1 || isArr2) {
64
+ if (isArr1 !== isArr2) {
65
+ return [`${path || "<root>"}: one is an array, the other is not`];
66
+ }
67
+
68
+ const diff: string[] = [];
69
+ const maxLen = Math.max(
70
+ (obj1 as unknown[]).length,
71
+ (obj2 as unknown[]).length,
72
+ );
73
+
74
+ if ((obj1 as unknown[]).length !== (obj2 as unknown[]).length) {
75
+ diff.push(
76
+ `${path || "<root>"}: array length ${
77
+ (obj1 as unknown[]).length
78
+ } vs ${(obj2 as unknown[]).length}`,
79
+ );
80
+ }
81
+
82
+ for (let i = 0; i < maxLen; i++) {
83
+ diff.push(
84
+ ...findDifferences(
85
+ (obj1 as unknown[])[i],
86
+ (obj2 as unknown[])[i],
87
+ makePath(path, i, true),
88
+ ),
89
+ );
90
+ }
91
+ return diff;
92
+ }
93
+
94
+ // --- Plain objects -------------------------------------------------------
95
+ const allKeys = new Set([
96
+ ...Object.keys(obj1 as Record<string, unknown>),
97
+ ...Object.keys(obj2 as Record<string, unknown>),
98
+ ]);
99
+
100
+ const diff: string[] = [];
101
+
102
+ for (const key of allKeys) {
103
+ const has1 = Object.prototype.hasOwnProperty.call(obj1, key);
104
+ const has2 = Object.prototype.hasOwnProperty.call(obj2, key);
105
+ const newPath = makePath(path, key);
106
+
107
+ if (!has1) {
108
+ diff.push(`${newPath}: property missing in first object`);
109
+ } else if (!has2) {
110
+ diff.push(`${newPath}: property missing in second object`);
111
+ } else {
112
+ diff.push(
113
+ ...findDifferences(
114
+ (obj1 as Record<string, unknown>)[key],
115
+ (obj2 as Record<string, unknown>)[key],
116
+ newPath,
117
+ ),
118
+ );
119
+ }
120
+ }
121
+
122
+ return diff;
123
+ }
@@ -84,13 +84,13 @@ export const formatTime = (seconds: number): string => {
84
84
  const hours = Math.floor(seconds / (60 * 60));
85
85
  const minutes = Math.floor((seconds % (60 * 60)) / 60);
86
86
  const remainingSeconds = seconds % 60;
87
- return `${hours} hr ${minutes} min ${remainingSeconds} sec`;
87
+ return `${hours} hr ${minutes} min ${Math.floor(remainingSeconds)} sec`;
88
88
  } else {
89
89
  const days = Math.floor(seconds / (60 * 60 * 24));
90
90
  const hours = Math.floor((seconds % (60 * 60 * 24)) / (60 * 60));
91
91
  const minutes = Math.floor((seconds % (60 * 60)) / 60);
92
92
  const remainingSeconds = seconds % 60;
93
- return `${days} days ${hours} hr ${minutes} min ${remainingSeconds} sec`;
93
+ return `${days} days ${hours} hr ${minutes} min ${Math.floor(remainingSeconds)} sec`;
94
94
  }
95
95
  };
96
96
 
@@ -22,3 +22,32 @@ export const parsedJson = (text: string): unknown | undefined => {
22
22
  }
23
23
  return undefined;
24
24
  };
25
+
26
+ // Estimates the size of a list of objects by sampling a subset of the list.
27
+ export function estimateSize(list: unknown[], frequency = 0.2) {
28
+ if (!list || list.length === 0) {
29
+ return 0;
30
+ }
31
+
32
+ // Total number of samples
33
+ const sampleSize = Math.ceil(list.length * frequency);
34
+
35
+ // Get a proper random sample without duplicates
36
+ const messageIndices = new Set<number>();
37
+ while (
38
+ messageIndices.size < sampleSize &&
39
+ messageIndices.size < list.length
40
+ ) {
41
+ const randomIndex = Math.floor(Math.random() * list.length);
42
+ messageIndices.add(randomIndex);
43
+ }
44
+
45
+ // Calculate size from sampled messages
46
+ const totalSize = Array.from(messageIndices).reduce((size, index) => {
47
+ return size + JSON.stringify(list[index]).length;
48
+ }, 0);
49
+
50
+ // Estimate total size based on sample
51
+ const estimatedTotalSize = (totalSize / sampleSize) * list.length;
52
+ return estimatedTotalSize;
53
+ }
@@ -1,4 +1,4 @@
1
- from ._agent import Agent, AgentState, agent, agent_with
1
+ from ._agent import Agent, AgentState, agent, agent_with, is_agent
2
2
  from ._as_solver import as_solver
3
3
  from ._as_tool import as_tool
4
4
  from ._bridge.bridge import bridge
@@ -29,6 +29,7 @@ __all__ = [
29
29
  "AgentState",
30
30
  "agent",
31
31
  "agent_with",
32
+ "is_agent",
32
33
  "AgentPrompt",
33
34
  "AgentAttempts",
34
35
  "AgentContinue",
@@ -270,6 +270,18 @@ def agent_register(agent: Callable[P, Agent], name: str) -> Callable[P, Agent]:
270
270
 
271
271
 
272
272
  def is_agent(obj: Any) -> TypeGuard[Agent]:
273
+ """Check if an object is an Agent.
274
+
275
+ Determines if the provided object is registered as an Agent in the system registry.
276
+ When this function returns True, type checkers will recognize 'obj' as an Agent type.
277
+
278
+ Args:
279
+ obj: Object to check against the registry.
280
+
281
+ Returns:
282
+ True if the object is a registered Agent, False otherwise.
283
+ Acts as a TypeGuard to provide type narrowing for static type checkers.
284
+ """
273
285
  return is_registry_object(obj, type="agent")
274
286
 
275
287