inspect-ai 0.3.81__py3-none-any.whl → 0.3.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. inspect_ai/_cli/eval.py +35 -2
  2. inspect_ai/_cli/util.py +44 -1
  3. inspect_ai/_display/core/config.py +1 -1
  4. inspect_ai/_display/core/display.py +13 -4
  5. inspect_ai/_display/core/results.py +1 -1
  6. inspect_ai/_display/textual/widgets/task_detail.py +5 -4
  7. inspect_ai/_eval/eval.py +38 -1
  8. inspect_ai/_eval/evalset.py +5 -0
  9. inspect_ai/_eval/run.py +5 -2
  10. inspect_ai/_eval/task/log.py +53 -6
  11. inspect_ai/_eval/task/run.py +51 -10
  12. inspect_ai/_util/constants.py +2 -0
  13. inspect_ai/_util/file.py +17 -1
  14. inspect_ai/_util/json.py +36 -1
  15. inspect_ai/_view/server.py +113 -1
  16. inspect_ai/_view/www/App.css +1 -1
  17. inspect_ai/_view/www/dist/assets/index.css +518 -296
  18. inspect_ai/_view/www/dist/assets/index.js +38803 -36307
  19. inspect_ai/_view/www/eslint.config.mjs +1 -1
  20. inspect_ai/_view/www/log-schema.json +13 -0
  21. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  22. inspect_ai/_view/www/package.json +8 -2
  23. inspect_ai/_view/www/src/App.tsx +151 -855
  24. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  25. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  26. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  27. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  28. inspect_ai/_view/www/src/api/types.ts +107 -2
  29. inspect_ai/_view/www/src/appearance/icons.ts +1 -0
  30. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  31. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  32. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  33. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  34. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  35. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  36. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  37. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  38. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  39. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  40. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -3
  41. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  42. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  43. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  44. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  45. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  46. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  47. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  48. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  49. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  50. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  51. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  52. inspect_ai/_view/www/src/index.tsx +26 -94
  53. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  54. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  55. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  56. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  57. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  58. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +67 -28
  59. inspect_ai/_view/www/src/samples/SampleDialog.tsx +51 -22
  60. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  61. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +144 -90
  62. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
  63. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +82 -35
  64. inspect_ai/_view/www/src/samples/SamplesTools.tsx +23 -30
  65. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  66. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  67. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  68. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +4 -1
  69. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +3 -0
  70. inspect_ai/_view/www/src/samples/chat/messages.ts +34 -0
  71. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  72. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +10 -1
  73. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  74. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +25 -17
  75. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +2 -1
  76. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  77. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +21 -3
  78. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +20 -1
  79. inspect_ai/_view/www/src/samples/list/SampleList.tsx +105 -85
  80. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  81. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +27 -14
  82. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  83. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  84. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  85. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  86. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  87. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +7 -9
  88. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +7 -11
  89. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  90. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  91. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  92. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  93. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  94. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  95. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  96. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  97. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  98. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  99. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  100. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  101. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +8 -13
  102. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  103. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  104. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  105. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  106. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +52 -58
  107. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  108. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  109. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +30 -1
  110. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  111. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  112. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  113. inspect_ai/_view/www/src/state/hooks.ts +397 -0
  114. inspect_ai/_view/www/src/state/logPolling.ts +196 -0
  115. inspect_ai/_view/www/src/state/logSlice.ts +214 -0
  116. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  117. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  118. inspect_ai/_view/www/src/state/samplePolling.ts +311 -0
  119. inspect_ai/_view/www/src/state/sampleSlice.ts +127 -0
  120. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  121. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  122. inspect_ai/_view/www/src/state/store.ts +168 -0
  123. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  124. inspect_ai/_view/www/src/state/utils.ts +23 -0
  125. inspect_ai/_view/www/src/storage/index.ts +26 -0
  126. inspect_ai/_view/www/src/types/log.d.ts +2 -0
  127. inspect_ai/_view/www/src/types.ts +94 -32
  128. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  129. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  130. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  131. inspect_ai/_view/www/src/utils/react.ts +30 -0
  132. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  133. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +181 -216
  134. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  135. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  136. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  137. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  138. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +0 -1
  139. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +98 -39
  140. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  141. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  142. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +11 -13
  143. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  144. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  145. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  146. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  147. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  148. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  149. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  150. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +110 -115
  151. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  152. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  153. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  154. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  155. inspect_ai/_view/www/vite.config.js +6 -0
  156. inspect_ai/_view/www/yarn.lock +370 -354
  157. inspect_ai/log/_condense.py +26 -0
  158. inspect_ai/log/_log.py +6 -3
  159. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  160. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  161. inspect_ai/log/_recorders/buffer/database.py +685 -0
  162. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  163. inspect_ai/log/_recorders/buffer/types.py +84 -0
  164. inspect_ai/log/_recorders/eval.py +2 -11
  165. inspect_ai/log/_recorders/types.py +30 -0
  166. inspect_ai/log/_transcript.py +27 -1
  167. inspect_ai/model/_call_tools.py +1 -0
  168. inspect_ai/model/_generate_config.py +2 -2
  169. inspect_ai/model/_model.py +1 -0
  170. inspect_ai/tool/_tool_support_helpers.py +4 -4
  171. inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -1
  172. inspect_ai/util/_subtask.py +1 -0
  173. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/METADATA +1 -1
  174. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/RECORD +178 -138
  175. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  176. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/WHEEL +0 -0
  177. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/entry_points.txt +0 -0
  178. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/licenses/LICENSE +0 -0
  179. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,685 @@
1
+ import hashlib
2
+ import json
3
+ import os
4
+ import sqlite3
5
+ import time
6
+ from contextlib import contextmanager
7
+ from logging import getLogger
8
+ from pathlib import Path
9
+ from sqlite3 import Connection
10
+ from typing import Callable, Iterator, Literal
11
+
12
+ import psutil
13
+ from pydantic import BaseModel
14
+ from typing_extensions import override
15
+
16
+ from inspect_ai._display.core.display import TaskDisplayMetric
17
+ from inspect_ai._util.appdirs import inspect_data_dir
18
+ from inspect_ai._util.file import basename, dirname, filesystem
19
+ from inspect_ai._util.json import to_json_str_safe
20
+ from inspect_ai._util.trace import trace_action
21
+
22
+ from ..._condense import (
23
+ ATTACHMENT_PROTOCOL,
24
+ attachments_content_fn,
25
+ walk_events,
26
+ walk_input,
27
+ walk_json_dict,
28
+ )
29
+ from ..types import SampleEvent, SampleSummary
30
+ from .filestore import (
31
+ Manifest,
32
+ SampleBufferFilestore,
33
+ SampleManifest,
34
+ Segment,
35
+ SegmentFile,
36
+ )
37
+ from .types import (
38
+ AttachmentData,
39
+ EventData,
40
+ JsonData,
41
+ SampleBuffer,
42
+ SampleData,
43
+ Samples,
44
+ )
45
+
46
+ logger = getLogger(__name__)
47
+
48
+
49
+ class TaskData(BaseModel):
50
+ version: int
51
+ metrics: list[TaskDisplayMetric]
52
+
53
+
54
+ class SampleBufferDatabase(SampleBuffer):
55
+ SCHEMA = """
56
+
57
+ CREATE TABLE IF NOT EXISTS task_database (
58
+ version INTEGER PRIMARY KEY DEFAULT 1,
59
+ metrics TEXT DEFAULT '[]',
60
+ last_updated DATETIME DEFAULT CURRENT_TIMESTAMP
61
+ );
62
+
63
+ CREATE TABLE samples (
64
+ id TEXT,
65
+ epoch INTEGER,
66
+ data TEXT, -- JSON containing all other sample fields
67
+ PRIMARY KEY (id, epoch)
68
+ );
69
+
70
+ CREATE TABLE events (
71
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
72
+ event_id TEXT,
73
+ sample_id TEXT,
74
+ sample_epoch INTEGER,
75
+ data TEXT -- JSON containing full event
76
+ );
77
+
78
+ CREATE TABLE attachments (
79
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
80
+ sample_id TEXT,
81
+ sample_epoch INTEGER,
82
+ hash TEXT UNIQUE,
83
+ content TEXT
84
+ );
85
+
86
+ -- Indices for foreign keys and common queries
87
+ CREATE INDEX IF NOT EXISTS idx_events_sample ON events(sample_id, sample_epoch);
88
+ CREATE INDEX IF NOT EXISTS idx_attachments_hash ON attachments(hash);
89
+
90
+ -- Note the version
91
+ INSERT INTO task_database (version) VALUES (1);
92
+ """
93
+
94
+ def __init__(
95
+ self,
96
+ location: str,
97
+ *,
98
+ create: bool = True,
99
+ log_images: bool = True,
100
+ log_shared: int | None = None,
101
+ update_interval: int = 2,
102
+ db_dir: Path | None = None,
103
+ ):
104
+ self.location = filesystem(location).path_as_uri(location)
105
+ self.log_images = log_images
106
+ self.log_shared = log_shared
107
+ self.update_interval = update_interval
108
+
109
+ # location subdir and file
110
+ dir, file = location_dir_and_file(self.location)
111
+
112
+ # establish dirs
113
+ db_dir = resolve_db_dir(db_dir)
114
+ log_subdir = db_dir / dir
115
+
116
+ # if we are creating then create dirs, use filename w/pid,
117
+ # and create the database as required
118
+ if create:
119
+ log_subdir.mkdir(parents=True, exist_ok=True)
120
+ self.db_path = log_subdir / f"{file}.{os.getpid()}.db"
121
+
122
+ # initialize the database schema
123
+ with self._get_connection() as conn:
124
+ conn.executescript(self.SCHEMA)
125
+ conn.commit()
126
+
127
+ # if we are not creating then find a log in an existing directory
128
+ # which matches the base filename (it will also have a pid)
129
+ else:
130
+ logs = list(log_subdir.glob(f"{file}.*.db"))
131
+ if len(logs) > 0:
132
+ self.db_path = logs[0]
133
+ else:
134
+ raise FileNotFoundError("Log database for '{location}' not found.")
135
+
136
+ # create sync filestore if log_shared
137
+ self._sync_filestore = (
138
+ SampleBufferFilestore(location, update_interval=log_shared)
139
+ if log_shared
140
+ else None
141
+ )
142
+ self._sync_time = time.monotonic()
143
+
144
+ def start_sample(self, sample: SampleSummary) -> None:
145
+ with self._get_connection(write=True) as conn:
146
+ sample = self._consense_sample(conn, sample)
147
+ conn.execute(
148
+ """
149
+ INSERT INTO samples (id, epoch, data)
150
+ VALUES (?, ?, ?)
151
+ """,
152
+ (str(sample.id), sample.epoch, to_json_str_safe(sample)),
153
+ )
154
+
155
+ def log_events(self, events: list[SampleEvent]) -> None:
156
+ with self._get_connection(write=True) as conn:
157
+ # collect the values for all events
158
+ values: list[str | int] = []
159
+ for event in events:
160
+ event = self._consense_event(conn, event)
161
+ values.extend(
162
+ (
163
+ event.event.id_,
164
+ str(event.id),
165
+ event.epoch,
166
+ to_json_str_safe(event.event),
167
+ )
168
+ )
169
+
170
+ # dynamically create the SQL query
171
+ placeholders = ", ".join(["(?, ?, ?, ?)"] * len(events))
172
+ sql = f"""
173
+ INSERT INTO events (event_id, sample_id, sample_epoch, data)
174
+ VALUES {placeholders}
175
+ """
176
+
177
+ # Insert all rows
178
+ conn.execute(sql, values)
179
+
180
+ def complete_sample(self, summary: SampleSummary) -> None:
181
+ with self._get_connection(write=True) as conn:
182
+ summary = self._consense_sample(conn, summary)
183
+ conn.execute(
184
+ """
185
+ UPDATE samples SET data = ? WHERE id = ? and epoch = ?
186
+ """,
187
+ (to_json_str_safe(summary), str(summary.id), summary.epoch),
188
+ )
189
+
190
+ def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
191
+ with self._get_connection(write=True) as conn:
192
+ conn.execute(
193
+ """
194
+ UPDATE task_database
195
+ SET metrics = ?,
196
+ last_updated = CURRENT_TIMESTAMP;
197
+ """,
198
+ [to_json_str_safe(metrics)],
199
+ )
200
+
201
+ def remove_samples(self, samples: list[tuple[str | int, int]]) -> None:
202
+ with self._get_connection(write=True) as conn:
203
+ cursor = conn.cursor()
204
+ try:
205
+ # Convert list of tuples into a string for SQL IN clause
206
+ # Format: (('id1', 1), ('id2', 2))
207
+ sample_conditions = ",".join(
208
+ [f"('{sid}', {epoch})" for sid, epoch in samples]
209
+ )
210
+
211
+ # Delete associated events first due to foreign key constraint
212
+ events_query = f"""
213
+ DELETE FROM events
214
+ WHERE (sample_id, sample_epoch) IN ({sample_conditions})
215
+ """
216
+ cursor.execute(events_query)
217
+
218
+ # Then delete the samples
219
+ samples_query = f"""
220
+ DELETE FROM samples
221
+ WHERE (id, epoch) IN ({sample_conditions})
222
+ """
223
+ cursor.execute(samples_query)
224
+ finally:
225
+ cursor.close()
226
+
227
+ def cleanup(self) -> None:
228
+ cleanup_sample_buffer_db(self.db_path)
229
+ if self._sync_filestore is not None:
230
+ self._sync_filestore.cleanup()
231
+
232
+ @classmethod
233
+ @override
234
+ def running_tasks(cls, log_dir: str) -> list[str] | None:
235
+ log_subdir = log_dir_hash(log_dir)
236
+ db_dir = resolve_db_dir() / log_subdir
237
+
238
+ if db_dir.exists():
239
+ logs = [log.name.rsplit(".", 2)[0] for log in db_dir.glob("*.*.db")]
240
+ return logs
241
+ else:
242
+ return None
243
+
244
+ @override
245
+ def get_samples(
246
+ self, etag: str | None = None
247
+ ) -> Samples | Literal["NotModified"] | None:
248
+ if not self.db_path.exists():
249
+ return None
250
+
251
+ try:
252
+ with self._get_connection() as conn:
253
+ # note version
254
+ task_data = self._get_task_data(conn)
255
+
256
+ # apply etag if requested
257
+ if etag == str(task_data.version):
258
+ return "NotModified"
259
+
260
+ # fetch data
261
+ return Samples(
262
+ samples=list(self._get_samples(conn)),
263
+ metrics=task_data.metrics,
264
+ refresh=self.update_interval,
265
+ etag=str(task_data.version),
266
+ )
267
+ except FileNotFoundError:
268
+ return None
269
+
270
+ @override
271
+ def get_sample_data(
272
+ self,
273
+ id: str | int,
274
+ epoch: int,
275
+ after_event_id: int | None = None,
276
+ after_attachment_id: int | None = None,
277
+ ) -> SampleData | None:
278
+ if not self.db_path.exists():
279
+ return None
280
+
281
+ try:
282
+ with self._get_connection() as conn:
283
+ return SampleData(
284
+ events=list(self._get_events(conn, id, epoch, after_event_id)),
285
+ attachments=list(
286
+ self._get_attachments(conn, id, epoch, after_attachment_id)
287
+ ),
288
+ )
289
+ except FileNotFoundError:
290
+ return None
291
+
292
+ @contextmanager
293
+ def _get_connection(self, *, write: bool = False) -> Iterator[Connection]:
294
+ """Get a database connection."""
295
+ conn = sqlite3.connect(self.db_path, timeout=10)
296
+ conn.row_factory = sqlite3.Row # Enable row factory for named columns
297
+ try:
298
+ # Enable foreign key constraints
299
+ conn.execute("PRAGMA foreign_keys = ON")
300
+
301
+ # concurrency setup
302
+ conn.execute("PRAGMA journal_mode=WAL")
303
+ conn.execute("PRAGMA busy_timeout=10000")
304
+ conn.execute("PRAGMA synchronous=NORMAL")
305
+
306
+ # do work
307
+ yield conn
308
+
309
+ # if this was for a write then bump the version
310
+ if write:
311
+ conn.execute("""
312
+ UPDATE task_database
313
+ SET version = version + 1,
314
+ last_updated = CURRENT_TIMESTAMP;
315
+ """)
316
+
317
+ # commit
318
+ conn.commit()
319
+
320
+ except Exception:
321
+ # rollback on any error
322
+ conn.rollback()
323
+ raise
324
+ finally:
325
+ # close the connection
326
+ conn.close()
327
+
328
+ # if this was for write then sync (throttled)
329
+ if write:
330
+ self._sync()
331
+
332
+ def _sync(self) -> None:
333
+ if self.log_shared is not None and self._sync_filestore is not None:
334
+ if (time.monotonic() - self._sync_time) > self.log_shared:
335
+ with trace_action(logger, "Log Sync", self.location):
336
+ sync_to_filestore(self, self._sync_filestore)
337
+
338
+ self._sync_time = time.monotonic()
339
+
340
+ def _increment_version(self, conn: Connection) -> None:
341
+ conn.execute("""
342
+ UPDATE task_database
343
+ SET version = version + 1,
344
+ last_updated = CURRENT_TIMESTAMP;
345
+ """)
346
+
347
+ def _get_task_data(self, conn: Connection) -> TaskData:
348
+ row = conn.execute("SELECT version, metrics FROM task_database").fetchone()
349
+ task_data = dict(version=row["version"], metrics=json.loads(row["metrics"]))
350
+ return TaskData(**task_data)
351
+
352
+ def _get_samples(
353
+ self, conn: Connection, resolve_attachments: bool = False
354
+ ) -> Iterator[SampleSummary]:
355
+ cursor = conn.execute(
356
+ """
357
+ SELECT s.data as sample_data
358
+ FROM samples s
359
+ ORDER BY s.id
360
+ """
361
+ )
362
+
363
+ for row in cursor:
364
+ summary = SampleSummary.model_validate_json(row["sample_data"])
365
+ if resolve_attachments:
366
+ summary = self._resolve_sample_attachments(conn, summary)
367
+ yield summary
368
+
369
+ def _get_events(
370
+ self,
371
+ conn: Connection,
372
+ id: str | int,
373
+ epoch: int,
374
+ after_event_id: int | None = None,
375
+ resolve_attachments: bool = False,
376
+ ) -> Iterator[EventData]:
377
+ query = """
378
+ SELECT id, event_id, data
379
+ FROM events e WHERE sample_id = ? AND sample_epoch = ?
380
+ """
381
+ params: list[str | int] = [str(id), epoch]
382
+
383
+ if after_event_id is not None:
384
+ query += " AND e.id > ?"
385
+ params.append(after_event_id)
386
+
387
+ query += " ORDER BY e.id"
388
+
389
+ cursor = conn.execute(query, params)
390
+
391
+ for row in cursor:
392
+ event = json.loads(row["data"])
393
+ if resolve_attachments:
394
+ event = self._resolve_event_attachments(conn, event)
395
+ yield EventData(
396
+ id=row["id"],
397
+ event_id=row["event_id"],
398
+ sample_id=str(id),
399
+ epoch=epoch,
400
+ event=event,
401
+ )
402
+
403
+ def _get_attachments(
404
+ self,
405
+ conn: Connection,
406
+ id: str | int,
407
+ epoch: int,
408
+ after_attachment_id: int | None = None,
409
+ ) -> Iterator[AttachmentData]:
410
+ query = """
411
+ SELECT id, hash, content FROM attachments
412
+ WHERE sample_id = ? AND sample_epoch = ?
413
+ """
414
+ params: list[str | int] = [id, epoch]
415
+
416
+ if after_attachment_id is not None:
417
+ query += " AND id > ?"
418
+ params.append(after_attachment_id)
419
+
420
+ cursor = conn.execute(query, params)
421
+
422
+ for row in cursor:
423
+ yield AttachmentData(
424
+ id=row["id"],
425
+ sample_id=str(id),
426
+ epoch=epoch,
427
+ hash=row["hash"],
428
+ content=row["content"],
429
+ )
430
+
431
+ def _consense_sample(
432
+ self, conn: Connection, sample: SampleSummary
433
+ ) -> SampleSummary:
434
+ # alias attachments
435
+ attachments: dict[str, str] = {}
436
+ sample = sample.model_copy(
437
+ update={
438
+ "input": walk_input(
439
+ sample.input, self._create_attachments_content_fn(attachments)
440
+ )
441
+ }
442
+ )
443
+
444
+ # insert attachments
445
+ self._insert_attachments(conn, sample.id, sample.epoch, attachments)
446
+
447
+ # return sample with aliases
448
+ return sample
449
+
450
+ def _resolve_sample_attachments(
451
+ self, conn: Connection, sample: SampleSummary
452
+ ) -> SampleSummary:
453
+ return sample.model_copy(
454
+ update={
455
+ "input": walk_input(
456
+ sample.input, self._resolve_attachments_content_fn(conn)
457
+ )
458
+ }
459
+ )
460
+
461
+ def _consense_event(self, conn: Connection, event: SampleEvent) -> SampleEvent:
462
+ # alias attachments
463
+ attachments: dict[str, str] = {}
464
+ event.event = walk_events(
465
+ [event.event], self._create_attachments_content_fn(attachments)
466
+ )[0]
467
+
468
+ # insert attachments
469
+ self._insert_attachments(conn, event.id, event.epoch, attachments)
470
+
471
+ # return events with aliases
472
+ return event
473
+
474
+ def _resolve_event_attachments(self, conn: Connection, event: JsonData) -> JsonData:
475
+ return walk_json_dict(event, self._resolve_attachments_content_fn(conn))
476
+
477
+ def _create_attachments_content_fn(
478
+ self, attachments: dict[str, str]
479
+ ) -> Callable[[str], str]:
480
+ return attachments_content_fn(self.log_images, 100, attachments)
481
+
482
+ def _resolve_attachments_content_fn(self, conn: Connection) -> Callable[[str], str]:
483
+ def content_fn(text: str) -> str:
484
+ if text.startswith(ATTACHMENT_PROTOCOL):
485
+ hash = text.replace(ATTACHMENT_PROTOCOL, "", 1)
486
+ attachments = self._get_attachments_content(conn, [hash])
487
+ content = attachments.get(hash, None)
488
+ if content is not None:
489
+ return content
490
+ else:
491
+ return text
492
+ else:
493
+ return text
494
+
495
+ return content_fn
496
+
497
+ def _insert_attachments(
498
+ self, conn: Connection, id: int | str, epoch: int, attachments: dict[str, str]
499
+ ) -> None:
500
+ parameters: list[list[int | str]] = []
501
+ for k, v in attachments.items():
502
+ parameters.append([id, epoch, k, v])
503
+
504
+ conn.executemany(
505
+ """
506
+ INSERT OR IGNORE INTO attachments (sample_id, sample_epoch, hash, content)
507
+ VALUES (?, ?, ?, ?)
508
+ """,
509
+ parameters,
510
+ )
511
+
512
+ def _get_attachments_content(
513
+ self, conn: Connection, hashes: list[str]
514
+ ) -> dict[str, str | None]:
515
+ # Create placeholders for the IN clause
516
+ placeholders = ",".join("?" * len(hashes))
517
+
518
+ cursor = conn.execute(
519
+ f"""
520
+ SELECT hash, content
521
+ FROM attachments
522
+ WHERE hash IN ({placeholders})
523
+ """,
524
+ hashes,
525
+ )
526
+
527
+ # Create result dictionary with all requested hashes initialized to None
528
+ results: dict[str, str | None] = {hash_: None for hash_ in hashes}
529
+
530
+ # Update with found values
531
+ for row in cursor:
532
+ results[row["hash"]] = row["content"]
533
+
534
+ return results
535
+
536
+
537
+ def sync_to_filestore(
538
+ db: SampleBufferDatabase, filestore: SampleBufferFilestore
539
+ ) -> None:
540
+ # read existing manifest (create an empty one if there is none)
541
+ manifest = filestore.read_manifest() or Manifest()
542
+
543
+ # prepare a list of buffered samples from the db
544
+ samples = db.get_samples()
545
+ if samples is None:
546
+ return
547
+ assert isinstance(samples, Samples)
548
+
549
+ # at the end of the sync, the manifest should contain only the samples
550
+ # in the db -- create a new list of sample manifests propagating the
551
+ # segment lists from the existing sample manifests
552
+ sample_manifests: list[SampleManifest] = []
553
+ for sample in samples.samples:
554
+ # lookup sample segments in the existing manifest
555
+ segments: list[int] = next(
556
+ (
557
+ s.segments
558
+ for s in manifest.samples
559
+ if s.summary.id == sample.id and s.summary.epoch == sample.epoch
560
+ ),
561
+ [],
562
+ )
563
+ # add to manifests
564
+ sample_manifests.append(SampleManifest(summary=sample, segments=segments))
565
+
566
+ # draft of new manifest has the new sample list and the existing segments
567
+ manifest.metrics = samples.metrics
568
+ manifest.samples = sample_manifests
569
+
570
+ # determine what segment data we already have so we can limit
571
+ # sample queries accordingly
572
+ if len(manifest.segments) > 0:
573
+ last_segment = manifest.segments[-1]
574
+ last_segment_id = last_segment.id
575
+ else:
576
+ last_segment_id = 0
577
+
578
+ # work through samples and create segment files for those that need it
579
+ # (update the manifest with the segment id). track the largest event
580
+ # and attachment ids we've seen
581
+ segment_id = last_segment_id + 1
582
+ last_event_id = 0
583
+ last_attachment_id = 0
584
+ segment_files: list[SegmentFile] = []
585
+ for manifest_sample in manifest.samples:
586
+ # get last ids we've seen for this sample
587
+ sample_last_segment_id = (
588
+ manifest_sample.segments[-1] if manifest_sample.segments else None
589
+ )
590
+ sample_last_segment = next(
591
+ (
592
+ segment
593
+ for segment in manifest.segments
594
+ if segment.id == sample_last_segment_id
595
+ ),
596
+ None,
597
+ )
598
+ if sample_last_segment is not None:
599
+ after_event_id = sample_last_segment.last_event_id
600
+ after_attachment_id = sample_last_segment.last_attachment_id
601
+ else:
602
+ after_event_id, after_attachment_id = (0, 0)
603
+
604
+ # get sample data
605
+ sample_data = db.get_sample_data(
606
+ id=manifest_sample.summary.id,
607
+ epoch=manifest_sample.summary.epoch,
608
+ after_event_id=after_event_id,
609
+ after_attachment_id=after_attachment_id,
610
+ )
611
+ # if we got sample data....
612
+ if sample_data is not None and (
613
+ len(sample_data.events) > 0 or len(sample_data.attachments) > 0
614
+ ):
615
+ # add to segment file
616
+ segment_files.append(
617
+ SegmentFile(
618
+ id=manifest_sample.summary.id,
619
+ epoch=manifest_sample.summary.epoch,
620
+ data=sample_data,
621
+ )
622
+ )
623
+ # update manifest
624
+ manifest_sample.segments.append(segment_id)
625
+
626
+ # update maximums
627
+ last_event_id, last_attachment_id = maximum_ids(
628
+ last_event_id, last_attachment_id, sample_data
629
+ )
630
+
631
+ # write the segment file and update the manifest
632
+ if len(segment_files) > 0:
633
+ filestore.write_segment(segment_id, segment_files)
634
+ manifest.segments.append(
635
+ Segment(
636
+ id=segment_id,
637
+ last_event_id=last_event_id,
638
+ last_attachment_id=last_attachment_id,
639
+ )
640
+ )
641
+
642
+ # write the manifest (do this even if we had no segments to pickup adds/deletes)
643
+ filestore.write_manifest(manifest)
644
+
645
+
646
+ def maximum_ids(
647
+ event_id: int, attachment_id: int, sample_data: SampleData
648
+ ) -> tuple[int, int]:
649
+ if sample_data.events:
650
+ event_id = max(event_id, sample_data.events[-1].id)
651
+ if sample_data.attachments:
652
+ attachment_id = max(attachment_id, sample_data.attachments[-1].id)
653
+ return event_id, attachment_id
654
+
655
+
656
+ def cleanup_sample_buffer_databases(db_dir: Path | None = None) -> None:
657
+ db_dir = resolve_db_dir(db_dir)
658
+ for db in db_dir.glob("*.*.db"):
659
+ _, pid_str, _ = db.name.rsplit(".", 2)
660
+ if pid_str.isdigit():
661
+ pid = int(pid_str)
662
+ if not psutil.pid_exists(pid):
663
+ cleanup_sample_buffer_db(db)
664
+
665
+
666
+ def cleanup_sample_buffer_db(path: Path) -> None:
667
+ try:
668
+ path.unlink(missing_ok=True)
669
+ except Exception as ex:
670
+ logger.warning(f"Error cleaning up sample buffer database at {path}: {ex}")
671
+
672
+
673
+ def resolve_db_dir(db_dir: Path | None = None) -> Path:
674
+ return db_dir or inspect_data_dir("samplebuffer")
675
+
676
+
677
+ def location_dir_and_file(location: str) -> tuple[str, str]:
678
+ dir = log_dir_hash(dirname(location))
679
+ file = basename(location)
680
+ return dir, file
681
+
682
+
683
+ def log_dir_hash(log_dir: str) -> str:
684
+ log_dir = log_dir.rstrip("/\\")
685
+ return hashlib.sha256(log_dir.encode()).hexdigest()