inspect-ai 0.3.81__py3-none-any.whl → 0.3.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +35 -2
- inspect_ai/_cli/util.py +44 -1
- inspect_ai/_display/core/config.py +1 -1
- inspect_ai/_display/core/display.py +13 -4
- inspect_ai/_display/core/results.py +1 -1
- inspect_ai/_display/textual/widgets/task_detail.py +5 -4
- inspect_ai/_eval/eval.py +38 -1
- inspect_ai/_eval/evalset.py +5 -0
- inspect_ai/_eval/run.py +5 -2
- inspect_ai/_eval/task/log.py +53 -6
- inspect_ai/_eval/task/run.py +51 -10
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/file.py +17 -1
- inspect_ai/_util/json.py +36 -1
- inspect_ai/_view/server.py +113 -1
- inspect_ai/_view/www/App.css +1 -1
- inspect_ai/_view/www/dist/assets/index.css +518 -296
- inspect_ai/_view/www/dist/assets/index.js +38803 -36307
- inspect_ai/_view/www/eslint.config.mjs +1 -1
- inspect_ai/_view/www/log-schema.json +13 -0
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/package.json +8 -2
- inspect_ai/_view/www/src/App.tsx +151 -855
- inspect_ai/_view/www/src/api/api-browser.ts +176 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
- inspect_ai/_view/www/src/api/client-api.ts +66 -10
- inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
- inspect_ai/_view/www/src/api/types.ts +107 -2
- inspect_ai/_view/www/src/appearance/icons.ts +1 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
- inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
- inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
- inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -3
- inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
- inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
- inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
- inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
- inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
- inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
- inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
- inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
- inspect_ai/_view/www/src/index.tsx +26 -94
- inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +67 -28
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +51 -22
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +144 -90
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +82 -35
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +23 -30
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +4 -1
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +3 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +34 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +10 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +25 -17
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +2 -1
- inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +21 -3
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +20 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +105 -85
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +27 -14
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
- inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
- inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +7 -9
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +7 -11
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +8 -13
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +52 -58
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +30 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
- inspect_ai/_view/www/src/scoring/utils.ts +87 -0
- inspect_ai/_view/www/src/state/appSlice.ts +244 -0
- inspect_ai/_view/www/src/state/hooks.ts +397 -0
- inspect_ai/_view/www/src/state/logPolling.ts +196 -0
- inspect_ai/_view/www/src/state/logSlice.ts +214 -0
- inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
- inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +311 -0
- inspect_ai/_view/www/src/state/sampleSlice.ts +127 -0
- inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
- inspect_ai/_view/www/src/state/scrolling.ts +206 -0
- inspect_ai/_view/www/src/state/store.ts +168 -0
- inspect_ai/_view/www/src/state/store_filter.ts +84 -0
- inspect_ai/_view/www/src/state/utils.ts +23 -0
- inspect_ai/_view/www/src/storage/index.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +2 -0
- inspect_ai/_view/www/src/types.ts +94 -32
- inspect_ai/_view/www/src/utils/attachments.ts +58 -23
- inspect_ai/_view/www/src/utils/logger.ts +52 -0
- inspect_ai/_view/www/src/utils/polling.ts +100 -0
- inspect_ai/_view/www/src/utils/react.ts +30 -0
- inspect_ai/_view/www/src/utils/vscode.ts +1 -1
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +181 -216
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +0 -1
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +98 -39
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +11 -13
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +110 -115
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
- inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
- inspect_ai/_view/www/src/workspace/types.ts +4 -3
- inspect_ai/_view/www/src/workspace/utils.ts +4 -4
- inspect_ai/_view/www/vite.config.js +6 -0
- inspect_ai/_view/www/yarn.lock +370 -354
- inspect_ai/log/_condense.py +26 -0
- inspect_ai/log/_log.py +6 -3
- inspect_ai/log/_recorders/buffer/__init__.py +14 -0
- inspect_ai/log/_recorders/buffer/buffer.py +30 -0
- inspect_ai/log/_recorders/buffer/database.py +685 -0
- inspect_ai/log/_recorders/buffer/filestore.py +259 -0
- inspect_ai/log/_recorders/buffer/types.py +84 -0
- inspect_ai/log/_recorders/eval.py +2 -11
- inspect_ai/log/_recorders/types.py +30 -0
- inspect_ai/log/_transcript.py +27 -1
- inspect_ai/model/_call_tools.py +1 -0
- inspect_ai/model/_generate_config.py +2 -2
- inspect_ai/model/_model.py +1 -0
- inspect_ai/tool/_tool_support_helpers.py +4 -4
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -1
- inspect_ai/util/_subtask.py +1 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/RECORD +178 -138
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,685 @@
|
|
1
|
+
import hashlib
|
2
|
+
import json
|
3
|
+
import os
|
4
|
+
import sqlite3
|
5
|
+
import time
|
6
|
+
from contextlib import contextmanager
|
7
|
+
from logging import getLogger
|
8
|
+
from pathlib import Path
|
9
|
+
from sqlite3 import Connection
|
10
|
+
from typing import Callable, Iterator, Literal
|
11
|
+
|
12
|
+
import psutil
|
13
|
+
from pydantic import BaseModel
|
14
|
+
from typing_extensions import override
|
15
|
+
|
16
|
+
from inspect_ai._display.core.display import TaskDisplayMetric
|
17
|
+
from inspect_ai._util.appdirs import inspect_data_dir
|
18
|
+
from inspect_ai._util.file import basename, dirname, filesystem
|
19
|
+
from inspect_ai._util.json import to_json_str_safe
|
20
|
+
from inspect_ai._util.trace import trace_action
|
21
|
+
|
22
|
+
from ..._condense import (
|
23
|
+
ATTACHMENT_PROTOCOL,
|
24
|
+
attachments_content_fn,
|
25
|
+
walk_events,
|
26
|
+
walk_input,
|
27
|
+
walk_json_dict,
|
28
|
+
)
|
29
|
+
from ..types import SampleEvent, SampleSummary
|
30
|
+
from .filestore import (
|
31
|
+
Manifest,
|
32
|
+
SampleBufferFilestore,
|
33
|
+
SampleManifest,
|
34
|
+
Segment,
|
35
|
+
SegmentFile,
|
36
|
+
)
|
37
|
+
from .types import (
|
38
|
+
AttachmentData,
|
39
|
+
EventData,
|
40
|
+
JsonData,
|
41
|
+
SampleBuffer,
|
42
|
+
SampleData,
|
43
|
+
Samples,
|
44
|
+
)
|
45
|
+
|
46
|
+
logger = getLogger(__name__)
|
47
|
+
|
48
|
+
|
49
|
+
class TaskData(BaseModel):
|
50
|
+
version: int
|
51
|
+
metrics: list[TaskDisplayMetric]
|
52
|
+
|
53
|
+
|
54
|
+
class SampleBufferDatabase(SampleBuffer):
|
55
|
+
SCHEMA = """
|
56
|
+
|
57
|
+
CREATE TABLE IF NOT EXISTS task_database (
|
58
|
+
version INTEGER PRIMARY KEY DEFAULT 1,
|
59
|
+
metrics TEXT DEFAULT '[]',
|
60
|
+
last_updated DATETIME DEFAULT CURRENT_TIMESTAMP
|
61
|
+
);
|
62
|
+
|
63
|
+
CREATE TABLE samples (
|
64
|
+
id TEXT,
|
65
|
+
epoch INTEGER,
|
66
|
+
data TEXT, -- JSON containing all other sample fields
|
67
|
+
PRIMARY KEY (id, epoch)
|
68
|
+
);
|
69
|
+
|
70
|
+
CREATE TABLE events (
|
71
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
72
|
+
event_id TEXT,
|
73
|
+
sample_id TEXT,
|
74
|
+
sample_epoch INTEGER,
|
75
|
+
data TEXT -- JSON containing full event
|
76
|
+
);
|
77
|
+
|
78
|
+
CREATE TABLE attachments (
|
79
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
80
|
+
sample_id TEXT,
|
81
|
+
sample_epoch INTEGER,
|
82
|
+
hash TEXT UNIQUE,
|
83
|
+
content TEXT
|
84
|
+
);
|
85
|
+
|
86
|
+
-- Indices for foreign keys and common queries
|
87
|
+
CREATE INDEX IF NOT EXISTS idx_events_sample ON events(sample_id, sample_epoch);
|
88
|
+
CREATE INDEX IF NOT EXISTS idx_attachments_hash ON attachments(hash);
|
89
|
+
|
90
|
+
-- Note the version
|
91
|
+
INSERT INTO task_database (version) VALUES (1);
|
92
|
+
"""
|
93
|
+
|
94
|
+
def __init__(
|
95
|
+
self,
|
96
|
+
location: str,
|
97
|
+
*,
|
98
|
+
create: bool = True,
|
99
|
+
log_images: bool = True,
|
100
|
+
log_shared: int | None = None,
|
101
|
+
update_interval: int = 2,
|
102
|
+
db_dir: Path | None = None,
|
103
|
+
):
|
104
|
+
self.location = filesystem(location).path_as_uri(location)
|
105
|
+
self.log_images = log_images
|
106
|
+
self.log_shared = log_shared
|
107
|
+
self.update_interval = update_interval
|
108
|
+
|
109
|
+
# location subdir and file
|
110
|
+
dir, file = location_dir_and_file(self.location)
|
111
|
+
|
112
|
+
# establish dirs
|
113
|
+
db_dir = resolve_db_dir(db_dir)
|
114
|
+
log_subdir = db_dir / dir
|
115
|
+
|
116
|
+
# if we are creating then create dirs, use filename w/pid,
|
117
|
+
# and create the database as required
|
118
|
+
if create:
|
119
|
+
log_subdir.mkdir(parents=True, exist_ok=True)
|
120
|
+
self.db_path = log_subdir / f"{file}.{os.getpid()}.db"
|
121
|
+
|
122
|
+
# initialize the database schema
|
123
|
+
with self._get_connection() as conn:
|
124
|
+
conn.executescript(self.SCHEMA)
|
125
|
+
conn.commit()
|
126
|
+
|
127
|
+
# if we are not creating then find a log in an existing directory
|
128
|
+
# which matches the base filename (it will also have a pid)
|
129
|
+
else:
|
130
|
+
logs = list(log_subdir.glob(f"{file}.*.db"))
|
131
|
+
if len(logs) > 0:
|
132
|
+
self.db_path = logs[0]
|
133
|
+
else:
|
134
|
+
raise FileNotFoundError("Log database for '{location}' not found.")
|
135
|
+
|
136
|
+
# create sync filestore if log_shared
|
137
|
+
self._sync_filestore = (
|
138
|
+
SampleBufferFilestore(location, update_interval=log_shared)
|
139
|
+
if log_shared
|
140
|
+
else None
|
141
|
+
)
|
142
|
+
self._sync_time = time.monotonic()
|
143
|
+
|
144
|
+
def start_sample(self, sample: SampleSummary) -> None:
|
145
|
+
with self._get_connection(write=True) as conn:
|
146
|
+
sample = self._consense_sample(conn, sample)
|
147
|
+
conn.execute(
|
148
|
+
"""
|
149
|
+
INSERT INTO samples (id, epoch, data)
|
150
|
+
VALUES (?, ?, ?)
|
151
|
+
""",
|
152
|
+
(str(sample.id), sample.epoch, to_json_str_safe(sample)),
|
153
|
+
)
|
154
|
+
|
155
|
+
def log_events(self, events: list[SampleEvent]) -> None:
|
156
|
+
with self._get_connection(write=True) as conn:
|
157
|
+
# collect the values for all events
|
158
|
+
values: list[str | int] = []
|
159
|
+
for event in events:
|
160
|
+
event = self._consense_event(conn, event)
|
161
|
+
values.extend(
|
162
|
+
(
|
163
|
+
event.event.id_,
|
164
|
+
str(event.id),
|
165
|
+
event.epoch,
|
166
|
+
to_json_str_safe(event.event),
|
167
|
+
)
|
168
|
+
)
|
169
|
+
|
170
|
+
# dynamically create the SQL query
|
171
|
+
placeholders = ", ".join(["(?, ?, ?, ?)"] * len(events))
|
172
|
+
sql = f"""
|
173
|
+
INSERT INTO events (event_id, sample_id, sample_epoch, data)
|
174
|
+
VALUES {placeholders}
|
175
|
+
"""
|
176
|
+
|
177
|
+
# Insert all rows
|
178
|
+
conn.execute(sql, values)
|
179
|
+
|
180
|
+
def complete_sample(self, summary: SampleSummary) -> None:
|
181
|
+
with self._get_connection(write=True) as conn:
|
182
|
+
summary = self._consense_sample(conn, summary)
|
183
|
+
conn.execute(
|
184
|
+
"""
|
185
|
+
UPDATE samples SET data = ? WHERE id = ? and epoch = ?
|
186
|
+
""",
|
187
|
+
(to_json_str_safe(summary), str(summary.id), summary.epoch),
|
188
|
+
)
|
189
|
+
|
190
|
+
def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
|
191
|
+
with self._get_connection(write=True) as conn:
|
192
|
+
conn.execute(
|
193
|
+
"""
|
194
|
+
UPDATE task_database
|
195
|
+
SET metrics = ?,
|
196
|
+
last_updated = CURRENT_TIMESTAMP;
|
197
|
+
""",
|
198
|
+
[to_json_str_safe(metrics)],
|
199
|
+
)
|
200
|
+
|
201
|
+
def remove_samples(self, samples: list[tuple[str | int, int]]) -> None:
|
202
|
+
with self._get_connection(write=True) as conn:
|
203
|
+
cursor = conn.cursor()
|
204
|
+
try:
|
205
|
+
# Convert list of tuples into a string for SQL IN clause
|
206
|
+
# Format: (('id1', 1), ('id2', 2))
|
207
|
+
sample_conditions = ",".join(
|
208
|
+
[f"('{sid}', {epoch})" for sid, epoch in samples]
|
209
|
+
)
|
210
|
+
|
211
|
+
# Delete associated events first due to foreign key constraint
|
212
|
+
events_query = f"""
|
213
|
+
DELETE FROM events
|
214
|
+
WHERE (sample_id, sample_epoch) IN ({sample_conditions})
|
215
|
+
"""
|
216
|
+
cursor.execute(events_query)
|
217
|
+
|
218
|
+
# Then delete the samples
|
219
|
+
samples_query = f"""
|
220
|
+
DELETE FROM samples
|
221
|
+
WHERE (id, epoch) IN ({sample_conditions})
|
222
|
+
"""
|
223
|
+
cursor.execute(samples_query)
|
224
|
+
finally:
|
225
|
+
cursor.close()
|
226
|
+
|
227
|
+
def cleanup(self) -> None:
|
228
|
+
cleanup_sample_buffer_db(self.db_path)
|
229
|
+
if self._sync_filestore is not None:
|
230
|
+
self._sync_filestore.cleanup()
|
231
|
+
|
232
|
+
@classmethod
|
233
|
+
@override
|
234
|
+
def running_tasks(cls, log_dir: str) -> list[str] | None:
|
235
|
+
log_subdir = log_dir_hash(log_dir)
|
236
|
+
db_dir = resolve_db_dir() / log_subdir
|
237
|
+
|
238
|
+
if db_dir.exists():
|
239
|
+
logs = [log.name.rsplit(".", 2)[0] for log in db_dir.glob("*.*.db")]
|
240
|
+
return logs
|
241
|
+
else:
|
242
|
+
return None
|
243
|
+
|
244
|
+
@override
|
245
|
+
def get_samples(
|
246
|
+
self, etag: str | None = None
|
247
|
+
) -> Samples | Literal["NotModified"] | None:
|
248
|
+
if not self.db_path.exists():
|
249
|
+
return None
|
250
|
+
|
251
|
+
try:
|
252
|
+
with self._get_connection() as conn:
|
253
|
+
# note version
|
254
|
+
task_data = self._get_task_data(conn)
|
255
|
+
|
256
|
+
# apply etag if requested
|
257
|
+
if etag == str(task_data.version):
|
258
|
+
return "NotModified"
|
259
|
+
|
260
|
+
# fetch data
|
261
|
+
return Samples(
|
262
|
+
samples=list(self._get_samples(conn)),
|
263
|
+
metrics=task_data.metrics,
|
264
|
+
refresh=self.update_interval,
|
265
|
+
etag=str(task_data.version),
|
266
|
+
)
|
267
|
+
except FileNotFoundError:
|
268
|
+
return None
|
269
|
+
|
270
|
+
@override
|
271
|
+
def get_sample_data(
|
272
|
+
self,
|
273
|
+
id: str | int,
|
274
|
+
epoch: int,
|
275
|
+
after_event_id: int | None = None,
|
276
|
+
after_attachment_id: int | None = None,
|
277
|
+
) -> SampleData | None:
|
278
|
+
if not self.db_path.exists():
|
279
|
+
return None
|
280
|
+
|
281
|
+
try:
|
282
|
+
with self._get_connection() as conn:
|
283
|
+
return SampleData(
|
284
|
+
events=list(self._get_events(conn, id, epoch, after_event_id)),
|
285
|
+
attachments=list(
|
286
|
+
self._get_attachments(conn, id, epoch, after_attachment_id)
|
287
|
+
),
|
288
|
+
)
|
289
|
+
except FileNotFoundError:
|
290
|
+
return None
|
291
|
+
|
292
|
+
@contextmanager
|
293
|
+
def _get_connection(self, *, write: bool = False) -> Iterator[Connection]:
|
294
|
+
"""Get a database connection."""
|
295
|
+
conn = sqlite3.connect(self.db_path, timeout=10)
|
296
|
+
conn.row_factory = sqlite3.Row # Enable row factory for named columns
|
297
|
+
try:
|
298
|
+
# Enable foreign key constraints
|
299
|
+
conn.execute("PRAGMA foreign_keys = ON")
|
300
|
+
|
301
|
+
# concurrency setup
|
302
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
303
|
+
conn.execute("PRAGMA busy_timeout=10000")
|
304
|
+
conn.execute("PRAGMA synchronous=NORMAL")
|
305
|
+
|
306
|
+
# do work
|
307
|
+
yield conn
|
308
|
+
|
309
|
+
# if this was for a write then bump the version
|
310
|
+
if write:
|
311
|
+
conn.execute("""
|
312
|
+
UPDATE task_database
|
313
|
+
SET version = version + 1,
|
314
|
+
last_updated = CURRENT_TIMESTAMP;
|
315
|
+
""")
|
316
|
+
|
317
|
+
# commit
|
318
|
+
conn.commit()
|
319
|
+
|
320
|
+
except Exception:
|
321
|
+
# rollback on any error
|
322
|
+
conn.rollback()
|
323
|
+
raise
|
324
|
+
finally:
|
325
|
+
# close the connection
|
326
|
+
conn.close()
|
327
|
+
|
328
|
+
# if this was for write then sync (throttled)
|
329
|
+
if write:
|
330
|
+
self._sync()
|
331
|
+
|
332
|
+
def _sync(self) -> None:
|
333
|
+
if self.log_shared is not None and self._sync_filestore is not None:
|
334
|
+
if (time.monotonic() - self._sync_time) > self.log_shared:
|
335
|
+
with trace_action(logger, "Log Sync", self.location):
|
336
|
+
sync_to_filestore(self, self._sync_filestore)
|
337
|
+
|
338
|
+
self._sync_time = time.monotonic()
|
339
|
+
|
340
|
+
def _increment_version(self, conn: Connection) -> None:
|
341
|
+
conn.execute("""
|
342
|
+
UPDATE task_database
|
343
|
+
SET version = version + 1,
|
344
|
+
last_updated = CURRENT_TIMESTAMP;
|
345
|
+
""")
|
346
|
+
|
347
|
+
def _get_task_data(self, conn: Connection) -> TaskData:
|
348
|
+
row = conn.execute("SELECT version, metrics FROM task_database").fetchone()
|
349
|
+
task_data = dict(version=row["version"], metrics=json.loads(row["metrics"]))
|
350
|
+
return TaskData(**task_data)
|
351
|
+
|
352
|
+
def _get_samples(
|
353
|
+
self, conn: Connection, resolve_attachments: bool = False
|
354
|
+
) -> Iterator[SampleSummary]:
|
355
|
+
cursor = conn.execute(
|
356
|
+
"""
|
357
|
+
SELECT s.data as sample_data
|
358
|
+
FROM samples s
|
359
|
+
ORDER BY s.id
|
360
|
+
"""
|
361
|
+
)
|
362
|
+
|
363
|
+
for row in cursor:
|
364
|
+
summary = SampleSummary.model_validate_json(row["sample_data"])
|
365
|
+
if resolve_attachments:
|
366
|
+
summary = self._resolve_sample_attachments(conn, summary)
|
367
|
+
yield summary
|
368
|
+
|
369
|
+
def _get_events(
|
370
|
+
self,
|
371
|
+
conn: Connection,
|
372
|
+
id: str | int,
|
373
|
+
epoch: int,
|
374
|
+
after_event_id: int | None = None,
|
375
|
+
resolve_attachments: bool = False,
|
376
|
+
) -> Iterator[EventData]:
|
377
|
+
query = """
|
378
|
+
SELECT id, event_id, data
|
379
|
+
FROM events e WHERE sample_id = ? AND sample_epoch = ?
|
380
|
+
"""
|
381
|
+
params: list[str | int] = [str(id), epoch]
|
382
|
+
|
383
|
+
if after_event_id is not None:
|
384
|
+
query += " AND e.id > ?"
|
385
|
+
params.append(after_event_id)
|
386
|
+
|
387
|
+
query += " ORDER BY e.id"
|
388
|
+
|
389
|
+
cursor = conn.execute(query, params)
|
390
|
+
|
391
|
+
for row in cursor:
|
392
|
+
event = json.loads(row["data"])
|
393
|
+
if resolve_attachments:
|
394
|
+
event = self._resolve_event_attachments(conn, event)
|
395
|
+
yield EventData(
|
396
|
+
id=row["id"],
|
397
|
+
event_id=row["event_id"],
|
398
|
+
sample_id=str(id),
|
399
|
+
epoch=epoch,
|
400
|
+
event=event,
|
401
|
+
)
|
402
|
+
|
403
|
+
def _get_attachments(
|
404
|
+
self,
|
405
|
+
conn: Connection,
|
406
|
+
id: str | int,
|
407
|
+
epoch: int,
|
408
|
+
after_attachment_id: int | None = None,
|
409
|
+
) -> Iterator[AttachmentData]:
|
410
|
+
query = """
|
411
|
+
SELECT id, hash, content FROM attachments
|
412
|
+
WHERE sample_id = ? AND sample_epoch = ?
|
413
|
+
"""
|
414
|
+
params: list[str | int] = [id, epoch]
|
415
|
+
|
416
|
+
if after_attachment_id is not None:
|
417
|
+
query += " AND id > ?"
|
418
|
+
params.append(after_attachment_id)
|
419
|
+
|
420
|
+
cursor = conn.execute(query, params)
|
421
|
+
|
422
|
+
for row in cursor:
|
423
|
+
yield AttachmentData(
|
424
|
+
id=row["id"],
|
425
|
+
sample_id=str(id),
|
426
|
+
epoch=epoch,
|
427
|
+
hash=row["hash"],
|
428
|
+
content=row["content"],
|
429
|
+
)
|
430
|
+
|
431
|
+
def _consense_sample(
|
432
|
+
self, conn: Connection, sample: SampleSummary
|
433
|
+
) -> SampleSummary:
|
434
|
+
# alias attachments
|
435
|
+
attachments: dict[str, str] = {}
|
436
|
+
sample = sample.model_copy(
|
437
|
+
update={
|
438
|
+
"input": walk_input(
|
439
|
+
sample.input, self._create_attachments_content_fn(attachments)
|
440
|
+
)
|
441
|
+
}
|
442
|
+
)
|
443
|
+
|
444
|
+
# insert attachments
|
445
|
+
self._insert_attachments(conn, sample.id, sample.epoch, attachments)
|
446
|
+
|
447
|
+
# return sample with aliases
|
448
|
+
return sample
|
449
|
+
|
450
|
+
def _resolve_sample_attachments(
|
451
|
+
self, conn: Connection, sample: SampleSummary
|
452
|
+
) -> SampleSummary:
|
453
|
+
return sample.model_copy(
|
454
|
+
update={
|
455
|
+
"input": walk_input(
|
456
|
+
sample.input, self._resolve_attachments_content_fn(conn)
|
457
|
+
)
|
458
|
+
}
|
459
|
+
)
|
460
|
+
|
461
|
+
def _consense_event(self, conn: Connection, event: SampleEvent) -> SampleEvent:
|
462
|
+
# alias attachments
|
463
|
+
attachments: dict[str, str] = {}
|
464
|
+
event.event = walk_events(
|
465
|
+
[event.event], self._create_attachments_content_fn(attachments)
|
466
|
+
)[0]
|
467
|
+
|
468
|
+
# insert attachments
|
469
|
+
self._insert_attachments(conn, event.id, event.epoch, attachments)
|
470
|
+
|
471
|
+
# return events with aliases
|
472
|
+
return event
|
473
|
+
|
474
|
+
def _resolve_event_attachments(self, conn: Connection, event: JsonData) -> JsonData:
|
475
|
+
return walk_json_dict(event, self._resolve_attachments_content_fn(conn))
|
476
|
+
|
477
|
+
def _create_attachments_content_fn(
|
478
|
+
self, attachments: dict[str, str]
|
479
|
+
) -> Callable[[str], str]:
|
480
|
+
return attachments_content_fn(self.log_images, 100, attachments)
|
481
|
+
|
482
|
+
def _resolve_attachments_content_fn(self, conn: Connection) -> Callable[[str], str]:
|
483
|
+
def content_fn(text: str) -> str:
|
484
|
+
if text.startswith(ATTACHMENT_PROTOCOL):
|
485
|
+
hash = text.replace(ATTACHMENT_PROTOCOL, "", 1)
|
486
|
+
attachments = self._get_attachments_content(conn, [hash])
|
487
|
+
content = attachments.get(hash, None)
|
488
|
+
if content is not None:
|
489
|
+
return content
|
490
|
+
else:
|
491
|
+
return text
|
492
|
+
else:
|
493
|
+
return text
|
494
|
+
|
495
|
+
return content_fn
|
496
|
+
|
497
|
+
def _insert_attachments(
|
498
|
+
self, conn: Connection, id: int | str, epoch: int, attachments: dict[str, str]
|
499
|
+
) -> None:
|
500
|
+
parameters: list[list[int | str]] = []
|
501
|
+
for k, v in attachments.items():
|
502
|
+
parameters.append([id, epoch, k, v])
|
503
|
+
|
504
|
+
conn.executemany(
|
505
|
+
"""
|
506
|
+
INSERT OR IGNORE INTO attachments (sample_id, sample_epoch, hash, content)
|
507
|
+
VALUES (?, ?, ?, ?)
|
508
|
+
""",
|
509
|
+
parameters,
|
510
|
+
)
|
511
|
+
|
512
|
+
def _get_attachments_content(
|
513
|
+
self, conn: Connection, hashes: list[str]
|
514
|
+
) -> dict[str, str | None]:
|
515
|
+
# Create placeholders for the IN clause
|
516
|
+
placeholders = ",".join("?" * len(hashes))
|
517
|
+
|
518
|
+
cursor = conn.execute(
|
519
|
+
f"""
|
520
|
+
SELECT hash, content
|
521
|
+
FROM attachments
|
522
|
+
WHERE hash IN ({placeholders})
|
523
|
+
""",
|
524
|
+
hashes,
|
525
|
+
)
|
526
|
+
|
527
|
+
# Create result dictionary with all requested hashes initialized to None
|
528
|
+
results: dict[str, str | None] = {hash_: None for hash_ in hashes}
|
529
|
+
|
530
|
+
# Update with found values
|
531
|
+
for row in cursor:
|
532
|
+
results[row["hash"]] = row["content"]
|
533
|
+
|
534
|
+
return results
|
535
|
+
|
536
|
+
|
537
|
+
def sync_to_filestore(
|
538
|
+
db: SampleBufferDatabase, filestore: SampleBufferFilestore
|
539
|
+
) -> None:
|
540
|
+
# read existing manifest (create an empty one if there is none)
|
541
|
+
manifest = filestore.read_manifest() or Manifest()
|
542
|
+
|
543
|
+
# prepare a list of buffered samples from the db
|
544
|
+
samples = db.get_samples()
|
545
|
+
if samples is None:
|
546
|
+
return
|
547
|
+
assert isinstance(samples, Samples)
|
548
|
+
|
549
|
+
# at the end of the sync, the manifest should contain only the samples
|
550
|
+
# in the db -- create a new list of sample manifests propagating the
|
551
|
+
# segment lists from the existing sample manifests
|
552
|
+
sample_manifests: list[SampleManifest] = []
|
553
|
+
for sample in samples.samples:
|
554
|
+
# lookup sample segments in the existing manifest
|
555
|
+
segments: list[int] = next(
|
556
|
+
(
|
557
|
+
s.segments
|
558
|
+
for s in manifest.samples
|
559
|
+
if s.summary.id == sample.id and s.summary.epoch == sample.epoch
|
560
|
+
),
|
561
|
+
[],
|
562
|
+
)
|
563
|
+
# add to manifests
|
564
|
+
sample_manifests.append(SampleManifest(summary=sample, segments=segments))
|
565
|
+
|
566
|
+
# draft of new manifest has the new sample list and the existing segments
|
567
|
+
manifest.metrics = samples.metrics
|
568
|
+
manifest.samples = sample_manifests
|
569
|
+
|
570
|
+
# determine what segment data we already have so we can limit
|
571
|
+
# sample queries accordingly
|
572
|
+
if len(manifest.segments) > 0:
|
573
|
+
last_segment = manifest.segments[-1]
|
574
|
+
last_segment_id = last_segment.id
|
575
|
+
else:
|
576
|
+
last_segment_id = 0
|
577
|
+
|
578
|
+
# work through samples and create segment files for those that need it
|
579
|
+
# (update the manifest with the segment id). track the largest event
|
580
|
+
# and attachment ids we've seen
|
581
|
+
segment_id = last_segment_id + 1
|
582
|
+
last_event_id = 0
|
583
|
+
last_attachment_id = 0
|
584
|
+
segment_files: list[SegmentFile] = []
|
585
|
+
for manifest_sample in manifest.samples:
|
586
|
+
# get last ids we've seen for this sample
|
587
|
+
sample_last_segment_id = (
|
588
|
+
manifest_sample.segments[-1] if manifest_sample.segments else None
|
589
|
+
)
|
590
|
+
sample_last_segment = next(
|
591
|
+
(
|
592
|
+
segment
|
593
|
+
for segment in manifest.segments
|
594
|
+
if segment.id == sample_last_segment_id
|
595
|
+
),
|
596
|
+
None,
|
597
|
+
)
|
598
|
+
if sample_last_segment is not None:
|
599
|
+
after_event_id = sample_last_segment.last_event_id
|
600
|
+
after_attachment_id = sample_last_segment.last_attachment_id
|
601
|
+
else:
|
602
|
+
after_event_id, after_attachment_id = (0, 0)
|
603
|
+
|
604
|
+
# get sample data
|
605
|
+
sample_data = db.get_sample_data(
|
606
|
+
id=manifest_sample.summary.id,
|
607
|
+
epoch=manifest_sample.summary.epoch,
|
608
|
+
after_event_id=after_event_id,
|
609
|
+
after_attachment_id=after_attachment_id,
|
610
|
+
)
|
611
|
+
# if we got sample data....
|
612
|
+
if sample_data is not None and (
|
613
|
+
len(sample_data.events) > 0 or len(sample_data.attachments) > 0
|
614
|
+
):
|
615
|
+
# add to segment file
|
616
|
+
segment_files.append(
|
617
|
+
SegmentFile(
|
618
|
+
id=manifest_sample.summary.id,
|
619
|
+
epoch=manifest_sample.summary.epoch,
|
620
|
+
data=sample_data,
|
621
|
+
)
|
622
|
+
)
|
623
|
+
# update manifest
|
624
|
+
manifest_sample.segments.append(segment_id)
|
625
|
+
|
626
|
+
# update maximums
|
627
|
+
last_event_id, last_attachment_id = maximum_ids(
|
628
|
+
last_event_id, last_attachment_id, sample_data
|
629
|
+
)
|
630
|
+
|
631
|
+
# write the segment file and update the manifest
|
632
|
+
if len(segment_files) > 0:
|
633
|
+
filestore.write_segment(segment_id, segment_files)
|
634
|
+
manifest.segments.append(
|
635
|
+
Segment(
|
636
|
+
id=segment_id,
|
637
|
+
last_event_id=last_event_id,
|
638
|
+
last_attachment_id=last_attachment_id,
|
639
|
+
)
|
640
|
+
)
|
641
|
+
|
642
|
+
# write the manifest (do this even if we had no segments to pickup adds/deletes)
|
643
|
+
filestore.write_manifest(manifest)
|
644
|
+
|
645
|
+
|
646
|
+
def maximum_ids(
|
647
|
+
event_id: int, attachment_id: int, sample_data: SampleData
|
648
|
+
) -> tuple[int, int]:
|
649
|
+
if sample_data.events:
|
650
|
+
event_id = max(event_id, sample_data.events[-1].id)
|
651
|
+
if sample_data.attachments:
|
652
|
+
attachment_id = max(attachment_id, sample_data.attachments[-1].id)
|
653
|
+
return event_id, attachment_id
|
654
|
+
|
655
|
+
|
656
|
+
def cleanup_sample_buffer_databases(db_dir: Path | None = None) -> None:
|
657
|
+
db_dir = resolve_db_dir(db_dir)
|
658
|
+
for db in db_dir.glob("*.*.db"):
|
659
|
+
_, pid_str, _ = db.name.rsplit(".", 2)
|
660
|
+
if pid_str.isdigit():
|
661
|
+
pid = int(pid_str)
|
662
|
+
if not psutil.pid_exists(pid):
|
663
|
+
cleanup_sample_buffer_db(db)
|
664
|
+
|
665
|
+
|
666
|
+
def cleanup_sample_buffer_db(path: Path) -> None:
|
667
|
+
try:
|
668
|
+
path.unlink(missing_ok=True)
|
669
|
+
except Exception as ex:
|
670
|
+
logger.warning(f"Error cleaning up sample buffer database at {path}: {ex}")
|
671
|
+
|
672
|
+
|
673
|
+
def resolve_db_dir(db_dir: Path | None = None) -> Path:
|
674
|
+
return db_dir or inspect_data_dir("samplebuffer")
|
675
|
+
|
676
|
+
|
677
|
+
def location_dir_and_file(location: str) -> tuple[str, str]:
|
678
|
+
dir = log_dir_hash(dirname(location))
|
679
|
+
file = basename(location)
|
680
|
+
return dir, file
|
681
|
+
|
682
|
+
|
683
|
+
def log_dir_hash(log_dir: str) -> str:
|
684
|
+
log_dir = log_dir.rstrip("/\\")
|
685
|
+
return hashlib.sha256(log_dir.encode()).hexdigest()
|