honker 0.1.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +193 -2
- data/ext/honker/extconf.rb +69 -0
- data/ext/honker/honker-core/Cargo.toml +49 -0
- data/ext/honker/honker-core/LICENSE +6 -0
- data/ext/honker/honker-core/LICENSE-APACHE +176 -0
- data/ext/honker/honker-core/LICENSE-MIT +21 -0
- data/ext/honker/honker-core/README.md +30 -0
- data/ext/honker/honker-core/src/cron.rs +473 -0
- data/ext/honker/honker-core/src/honker_ops.rs +1518 -0
- data/ext/honker/honker-core/src/kernel_watcher.rs +434 -0
- data/ext/honker/honker-core/src/lib.rs +3116 -0
- data/ext/honker/honker-core/src/shm_watcher.rs +250 -0
- data/ext/honker/honker-extension/Cargo.toml +36 -0
- data/ext/honker/honker-extension/LICENSE +6 -0
- data/ext/honker/honker-extension/LICENSE-APACHE +176 -0
- data/ext/honker/honker-extension/LICENSE-MIT +21 -0
- data/ext/honker/honker-extension/README.md +41 -0
- data/ext/honker/honker-extension/src/lib.rs +330 -0
- data/honker.gemspec +24 -1
- data/lib/honker/README.md +28 -0
- data/lib/honker/railtie.rb +11 -0
- data/lib/honker/scheduler.rb +59 -0
- data/lib/honker/version.rb +1 -1
- data/lib/honker.rb +111 -3
- metadata +39 -8
|
@@ -0,0 +1,3116 @@
|
|
|
1
|
+
//! Shared Rust core for the honker bindings.
|
|
2
|
+
//!
|
|
3
|
+
//! This crate is NOT intended for direct use. It's the plain-Rust
|
|
4
|
+
//! foundation that three binding crates depend on:
|
|
5
|
+
//!
|
|
6
|
+
//! * `honker` — PyO3 Python extension
|
|
7
|
+
//! * `honker-extension` — SQLite loadable extension (cdylib)
|
|
8
|
+
//! * `honker-node` — napi-rs Node.js binding
|
|
9
|
+
//!
|
|
10
|
+
//! Moving this code here once avoids the three-copies-of-the-same-SQL
|
|
11
|
+
//! problem every binding would otherwise suffer. Behavioral drift
|
|
12
|
+
//! between the three bindings was a real risk — one would get a new
|
|
13
|
+
//! PRAGMA, one wouldn't, and silent inconsistencies would surface only
|
|
14
|
+
//! when a Python process and a Node process tried to share a `.db`
|
|
15
|
+
//! file.
|
|
16
|
+
//!
|
|
17
|
+
//! What's here:
|
|
18
|
+
//!
|
|
19
|
+
//! - [`open_conn`] — open a SQLite connection with the library's
|
|
20
|
+
//! PRAGMA defaults (WAL, synchronous=NORMAL, 32MB cache, etc.).
|
|
21
|
+
//! - [`attach_notify`] — create `_honker_notifications` and
|
|
22
|
+
//! register the `notify(channel, payload)` SQL scalar function.
|
|
23
|
+
//! - [`Writer`] — single-connection write slot with blocking
|
|
24
|
+
//! acquire, non-blocking try_acquire, and release.
|
|
25
|
+
//! - [`Readers`] — bounded pool of reader connections that open
|
|
26
|
+
//! lazily up to a max.
|
|
27
|
+
//! - [`UpdateWatcher`] — 1 ms PRAGMA-polling thread that fires a
|
|
28
|
+
//! callback on every database commit. Uses `PRAGMA data_version`
|
|
29
|
+
//! for precise change detection, with a periodic stat identity check
|
|
30
|
+
//! to detect file replacement. Bindings wrap this to surface wake
|
|
31
|
+
//! events to their language's async primitive.
|
|
32
|
+
//!
|
|
33
|
+
//! Anything language-specific — PyO3 classes, napi classes, SQLite
|
|
34
|
+
//! entry-point symbols, row-materialization into Python dicts or JS
|
|
35
|
+
//! objects — stays in the respective binding crate.
|
|
36
|
+
|
|
37
|
+
pub mod cron;
|
|
38
|
+
mod honker_ops;
|
|
39
|
+
#[cfg(feature = "kernel-watcher")]
|
|
40
|
+
mod kernel_watcher;
|
|
41
|
+
#[cfg(feature = "shm-fast-path")]
|
|
42
|
+
mod shm_watcher;
|
|
43
|
+
|
|
44
|
+
pub use honker_ops::attach_honker_functions;
|
|
45
|
+
|
|
46
|
+
use parking_lot::{Condvar, Mutex};
|
|
47
|
+
use rusqlite::functions::FunctionFlags;
|
|
48
|
+
use rusqlite::{Connection, OpenFlags, ffi};
|
|
49
|
+
use std::collections::HashMap;
|
|
50
|
+
use std::path::{Path, PathBuf};
|
|
51
|
+
use std::sync::Arc;
|
|
52
|
+
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
|
53
|
+
use std::sync::mpsc::{SyncSender, TrySendError};
|
|
54
|
+
use std::time::{Duration, Instant};
|
|
55
|
+
|
|
56
|
+
// ---------------------------------------------------------------------
|
|
57
|
+
// Watcher backend configuration
|
|
58
|
+
// ---------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
/// Which backend drives the update-detection loop.
|
|
61
|
+
///
|
|
62
|
+
/// `Polling` is the default: 1 ms `PRAGMA data_version` loop, proven
|
|
63
|
+
/// correct across all platforms. The optional backends are **experimental**
|
|
64
|
+
/// — they must first prove equivalence to the polling path before
|
|
65
|
+
/// being relied on for correctness.
|
|
66
|
+
#[derive(Debug, Clone, Default)]
|
|
67
|
+
pub enum WatcherBackend {
|
|
68
|
+
/// Default: 1 ms `PRAGMA data_version` polling loop.
|
|
69
|
+
#[default]
|
|
70
|
+
Polling,
|
|
71
|
+
/// OS kernel filesystem notifications (experimental).
|
|
72
|
+
///
|
|
73
|
+
/// Fires `on_change()` on every non-Access filesystem event in the
|
|
74
|
+
/// db's parent directory plus per-file events on `-wal`/`-shm`.
|
|
75
|
+
/// Spurious wakes possible (consumers re-read state, dedupe).
|
|
76
|
+
/// Missed wakes possible if the OS drops events; consumer's
|
|
77
|
+
/// `idle_poll_s` is the only backstop. Setup failures log and
|
|
78
|
+
/// silently disable — no fall-back to polling.
|
|
79
|
+
#[cfg(feature = "kernel-watcher")]
|
|
80
|
+
KernelWatch,
|
|
81
|
+
/// mmap `-shm` WAL index fast path (experimental).
|
|
82
|
+
///
|
|
83
|
+
/// Reads `iChange` (offset 8 in the WAL index header) at 100 µs
|
|
84
|
+
/// cadence; fires `on_change()` when it advances. WAL mode only.
|
|
85
|
+
/// Trusts the on-disk shm layout (verified via the equivalence
|
|
86
|
+
/// test at build time). If the layout changes or the `-shm` file
|
|
87
|
+
/// is recreated mid-flight, wakes silently stop until restart.
|
|
88
|
+
#[cfg(feature = "shm-fast-path")]
|
|
89
|
+
ShmFastPath,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/// Configuration passed to [`UpdateWatcher::spawn_with_config`] and
|
|
93
|
+
/// [`SharedUpdateWatcher::new_with_config`].
|
|
94
|
+
#[derive(Debug, Clone, Default)]
|
|
95
|
+
pub struct WatcherConfig {
|
|
96
|
+
pub backend: WatcherBackend,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
impl WatcherBackend {
|
|
100
|
+
/// Parse a binding-level string into a backend. Shared across
|
|
101
|
+
/// bindings so the accepted aliases stay in lockstep. If the
|
|
102
|
+
/// requested backend is not compiled in, returns an error; callers
|
|
103
|
+
/// must not silently substitute polling after an explicit opt-in.
|
|
104
|
+
///
|
|
105
|
+
/// Accepted: `None` / `"polling"` / `"poll"`,
|
|
106
|
+
/// `"kernel"` / `"kernel-watcher"`, `"shm"` / `"shm-fast-path"`.
|
|
107
|
+
pub fn parse(name: Option<&str>) -> Result<Self, String> {
|
|
108
|
+
match name {
|
|
109
|
+
None | Some("polling" | "poll") => Ok(WatcherBackend::Polling),
|
|
110
|
+
Some("kernel" | "kernel-watcher") => {
|
|
111
|
+
#[cfg(feature = "kernel-watcher")]
|
|
112
|
+
{
|
|
113
|
+
Ok(WatcherBackend::KernelWatch)
|
|
114
|
+
}
|
|
115
|
+
#[cfg(not(feature = "kernel-watcher"))]
|
|
116
|
+
{
|
|
117
|
+
Err(
|
|
118
|
+
"watcher backend 'kernel' requires the kernel-watcher Cargo feature"
|
|
119
|
+
.to_string(),
|
|
120
|
+
)
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
Some("shm" | "shm-fast-path") => {
|
|
124
|
+
#[cfg(feature = "shm-fast-path")]
|
|
125
|
+
{
|
|
126
|
+
Ok(WatcherBackend::ShmFastPath)
|
|
127
|
+
}
|
|
128
|
+
#[cfg(not(feature = "shm-fast-path"))]
|
|
129
|
+
{
|
|
130
|
+
Err(
|
|
131
|
+
"watcher backend 'shm' requires the shm-fast-path Cargo feature"
|
|
132
|
+
.to_string(),
|
|
133
|
+
)
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
Some(other) => Err(format!(
|
|
137
|
+
"unknown watcher backend {other:?}; valid: None, 'polling', 'kernel', 'shm'"
|
|
138
|
+
)),
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/// Verify the backend can actually initialize for `db_path`. Bindings
|
|
143
|
+
/// call this at `honker.open()` time so a backend that can't run
|
|
144
|
+
/// errors loudly instead of silently producing no wakes. Returns a
|
|
145
|
+
/// human-readable reason on failure.
|
|
146
|
+
pub fn probe(&self, db_path: &Path) -> Result<(), String> {
|
|
147
|
+
match self {
|
|
148
|
+
WatcherBackend::Polling => {
|
|
149
|
+
let _ = db_path;
|
|
150
|
+
Ok(())
|
|
151
|
+
}
|
|
152
|
+
#[cfg(feature = "kernel-watcher")]
|
|
153
|
+
WatcherBackend::KernelWatch => kernel_watcher::probe(db_path),
|
|
154
|
+
#[cfg(feature = "shm-fast-path")]
|
|
155
|
+
WatcherBackend::ShmFastPath => shm_watcher::probe(db_path),
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
#[derive(thiserror::Error, Debug)]
|
|
161
|
+
pub enum Error {
|
|
162
|
+
#[error("Database error: {0}")]
|
|
163
|
+
Sqlite(#[from] rusqlite::Error),
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// ---------------------------------------------------------------------
|
|
167
|
+
// PRAGMAs
|
|
168
|
+
// ---------------------------------------------------------------------
|
|
169
|
+
|
|
170
|
+
/// Default PRAGMA block applied on every connection open. Rationale:
|
|
171
|
+
///
|
|
172
|
+
/// * `journal_mode=WAL` — concurrent readers with one writer.
|
|
173
|
+
/// * `synchronous=NORMAL` — fsync WAL at checkpoint, not every
|
|
174
|
+
/// commit. Safe against app crashes; OS crashes may lose the last
|
|
175
|
+
/// few unchecked-pointed transactions.
|
|
176
|
+
/// * `busy_timeout=5000` — wait up to 5s for the writer lock
|
|
177
|
+
/// before returning SQLITE_BUSY.
|
|
178
|
+
/// * `foreign_keys=ON` — enforce FK constraints (off by
|
|
179
|
+
/// default in SQLite, a real footgun).
|
|
180
|
+
/// * `cache_size=-32000` — 32MB page cache (default was 2MB).
|
|
181
|
+
/// * `temp_store=MEMORY` — temp B-trees in RAM, not disk.
|
|
182
|
+
/// * `wal_autocheckpoint=10000`— fsync every 10k WAL pages. Reduces
|
|
183
|
+
/// fsync frequency 10× vs the default of 1k.
|
|
184
|
+
pub const DEFAULT_PRAGMAS: &str = "PRAGMA journal_mode = WAL;
|
|
185
|
+
PRAGMA synchronous = NORMAL;
|
|
186
|
+
PRAGMA busy_timeout = 5000;
|
|
187
|
+
PRAGMA foreign_keys = ON;
|
|
188
|
+
PRAGMA cache_size = -32000;
|
|
189
|
+
PRAGMA temp_store = MEMORY;
|
|
190
|
+
PRAGMA wal_autocheckpoint = 10000;";
|
|
191
|
+
|
|
192
|
+
/// Apply the library's default PRAGMAs to an already-open connection.
|
|
193
|
+
/// Idempotent.
|
|
194
|
+
pub fn apply_default_pragmas(conn: &Connection) -> rusqlite::Result<()> {
|
|
195
|
+
conn.execute_batch(DEFAULT_PRAGMAS)
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// ---------------------------------------------------------------------
|
|
199
|
+
// notify() SQL function + notifications schema
|
|
200
|
+
// ---------------------------------------------------------------------
|
|
201
|
+
|
|
202
|
+
/// Install the `_honker_notifications` table and the
|
|
203
|
+
/// `notify(channel, payload)` SQL scalar function on `conn`. Idempotent.
|
|
204
|
+
///
|
|
205
|
+
/// `notify()` is the public cross-process primitive. Callers do:
|
|
206
|
+
///
|
|
207
|
+
/// ```sql
|
|
208
|
+
/// BEGIN IMMEDIATE;
|
|
209
|
+
/// INSERT INTO orders ...;
|
|
210
|
+
/// SELECT notify('orders', '{"id":42}');
|
|
211
|
+
/// COMMIT;
|
|
212
|
+
/// ```
|
|
213
|
+
///
|
|
214
|
+
/// The scalar function returns the INSERTed row id. Listeners watch
|
|
215
|
+
/// database updates and SELECT new rows by channel.
|
|
216
|
+
///
|
|
217
|
+
/// Pruning is NOT done here. Callers invoke
|
|
218
|
+
/// `Database.prune_notifications(older_than_s, max_keep)` when they want
|
|
219
|
+
/// to trim the table. No magic timer.
|
|
220
|
+
pub fn attach_notify(conn: &Connection) -> Result<(), Error> {
|
|
221
|
+
conn.execute_batch(
|
|
222
|
+
"CREATE TABLE IF NOT EXISTS _honker_notifications (
|
|
223
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
224
|
+
channel TEXT NOT NULL,
|
|
225
|
+
payload TEXT NOT NULL,
|
|
226
|
+
created_at INTEGER NOT NULL DEFAULT (unixepoch())
|
|
227
|
+
);
|
|
228
|
+
CREATE INDEX IF NOT EXISTS _honker_notifications_recent
|
|
229
|
+
ON _honker_notifications(channel, id);",
|
|
230
|
+
)?;
|
|
231
|
+
|
|
232
|
+
conn.create_scalar_function("notify", 2, FunctionFlags::SQLITE_UTF8, |ctx| {
|
|
233
|
+
let channel: String = ctx.get(0)?;
|
|
234
|
+
let payload: String = ctx.get(1)?;
|
|
235
|
+
let db = unsafe { ctx.get_connection() }?;
|
|
236
|
+
let mut ins = db.prepare_cached(
|
|
237
|
+
"INSERT INTO _honker_notifications (channel, payload) VALUES (?1, ?2)",
|
|
238
|
+
)?;
|
|
239
|
+
let id = ins.insert(rusqlite::params![channel, payload])?;
|
|
240
|
+
Ok(id)
|
|
241
|
+
})?;
|
|
242
|
+
|
|
243
|
+
Ok(())
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// ---------------------------------------------------------------------
|
|
247
|
+
// honker queue schema
|
|
248
|
+
// ---------------------------------------------------------------------
|
|
249
|
+
|
|
250
|
+
/// Canonical DDL for the honker queue schema. Shared source of truth
|
|
251
|
+
/// so the Python binding's `Queue._init_schema`, the SQLite loadable
|
|
252
|
+
/// extension's `honker_bootstrap()`, and any future binding can't drift.
|
|
253
|
+
///
|
|
254
|
+
/// Schema:
|
|
255
|
+
///
|
|
256
|
+
/// * `_honker_live` — pending + processing jobs. Partial index
|
|
257
|
+
/// `_honker_live_claim` restricts to those two states so dead-row
|
|
258
|
+
/// history never slows down the claim hot path.
|
|
259
|
+
/// * `_honker_dead` — terminal rows (retry-exhausted or explicitly
|
|
260
|
+
/// failed). Never scanned by the claim path; retention policy is
|
|
261
|
+
/// the user's problem.
|
|
262
|
+
///
|
|
263
|
+
/// Idempotent (`CREATE TABLE IF NOT EXISTS` / `CREATE INDEX IF NOT
|
|
264
|
+
/// EXISTS`). Views and schema-version cleanup live in the language
|
|
265
|
+
/// binding, not here — they're caller-specific.
|
|
266
|
+
pub const BOOTSTRAP_HONKER_SQL: &str = "
|
|
267
|
+
CREATE TABLE IF NOT EXISTS _honker_live (
|
|
268
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
269
|
+
queue TEXT NOT NULL,
|
|
270
|
+
payload TEXT NOT NULL,
|
|
271
|
+
state TEXT NOT NULL DEFAULT 'pending',
|
|
272
|
+
priority INTEGER NOT NULL DEFAULT 0,
|
|
273
|
+
run_at INTEGER NOT NULL DEFAULT (unixepoch()),
|
|
274
|
+
worker_id TEXT,
|
|
275
|
+
claim_expires_at INTEGER,
|
|
276
|
+
attempts INTEGER NOT NULL DEFAULT 0,
|
|
277
|
+
max_attempts INTEGER NOT NULL DEFAULT 3,
|
|
278
|
+
created_at INTEGER NOT NULL DEFAULT (unixepoch()),
|
|
279
|
+
expires_at INTEGER
|
|
280
|
+
);
|
|
281
|
+
CREATE INDEX IF NOT EXISTS _honker_live_claim
|
|
282
|
+
ON _honker_live(queue, priority DESC, run_at, id)
|
|
283
|
+
WHERE state IN ('pending', 'processing');
|
|
284
|
+
CREATE INDEX IF NOT EXISTS _honker_live_pending_deadline
|
|
285
|
+
ON _honker_live(queue, run_at)
|
|
286
|
+
WHERE state = 'pending';
|
|
287
|
+
CREATE INDEX IF NOT EXISTS _honker_live_processing_deadline
|
|
288
|
+
ON _honker_live(queue, claim_expires_at)
|
|
289
|
+
WHERE state = 'processing';
|
|
290
|
+
CREATE TABLE IF NOT EXISTS _honker_dead (
|
|
291
|
+
id INTEGER PRIMARY KEY,
|
|
292
|
+
queue TEXT NOT NULL,
|
|
293
|
+
payload TEXT NOT NULL,
|
|
294
|
+
priority INTEGER NOT NULL DEFAULT 0,
|
|
295
|
+
run_at INTEGER NOT NULL DEFAULT 0,
|
|
296
|
+
attempts INTEGER NOT NULL DEFAULT 0,
|
|
297
|
+
max_attempts INTEGER NOT NULL DEFAULT 0,
|
|
298
|
+
last_error TEXT,
|
|
299
|
+
created_at INTEGER NOT NULL DEFAULT (unixepoch()),
|
|
300
|
+
died_at INTEGER NOT NULL DEFAULT (unixepoch())
|
|
301
|
+
);
|
|
302
|
+
CREATE TABLE IF NOT EXISTS _honker_locks (
|
|
303
|
+
name TEXT PRIMARY KEY,
|
|
304
|
+
owner TEXT NOT NULL,
|
|
305
|
+
expires_at INTEGER NOT NULL
|
|
306
|
+
);
|
|
307
|
+
CREATE TABLE IF NOT EXISTS _honker_rate_limits (
|
|
308
|
+
name TEXT NOT NULL,
|
|
309
|
+
window_start INTEGER NOT NULL,
|
|
310
|
+
count INTEGER NOT NULL DEFAULT 0,
|
|
311
|
+
PRIMARY KEY (name, window_start)
|
|
312
|
+
);
|
|
313
|
+
CREATE TABLE IF NOT EXISTS _honker_scheduler_tasks (
|
|
314
|
+
name TEXT PRIMARY KEY,
|
|
315
|
+
queue TEXT NOT NULL,
|
|
316
|
+
cron_expr TEXT NOT NULL,
|
|
317
|
+
payload TEXT NOT NULL,
|
|
318
|
+
priority INTEGER NOT NULL DEFAULT 0,
|
|
319
|
+
expires_s INTEGER,
|
|
320
|
+
next_fire_at INTEGER NOT NULL,
|
|
321
|
+
enabled INTEGER NOT NULL DEFAULT 1
|
|
322
|
+
);
|
|
323
|
+
CREATE TABLE IF NOT EXISTS _honker_results (
|
|
324
|
+
job_id INTEGER PRIMARY KEY,
|
|
325
|
+
value TEXT,
|
|
326
|
+
created_at INTEGER NOT NULL DEFAULT (unixepoch()),
|
|
327
|
+
expires_at INTEGER
|
|
328
|
+
);
|
|
329
|
+
CREATE TABLE IF NOT EXISTS _honker_stream (
|
|
330
|
+
offset INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
331
|
+
topic TEXT NOT NULL,
|
|
332
|
+
key TEXT,
|
|
333
|
+
payload TEXT NOT NULL,
|
|
334
|
+
created_at INTEGER NOT NULL DEFAULT (unixepoch())
|
|
335
|
+
);
|
|
336
|
+
CREATE INDEX IF NOT EXISTS _honker_stream_topic
|
|
337
|
+
ON _honker_stream(topic, offset);
|
|
338
|
+
CREATE TABLE IF NOT EXISTS _honker_stream_consumers (
|
|
339
|
+
name TEXT NOT NULL,
|
|
340
|
+
topic TEXT NOT NULL,
|
|
341
|
+
offset INTEGER NOT NULL DEFAULT 0,
|
|
342
|
+
PRIMARY KEY (name, topic)
|
|
343
|
+
);
|
|
344
|
+
";
|
|
345
|
+
|
|
346
|
+
/// Install the honker queue schema on `conn`. Idempotent. See
|
|
347
|
+
/// [`BOOTSTRAP_HONKER_SQL`] for the DDL and rationale.
|
|
348
|
+
///
|
|
349
|
+
/// Works in any journal mode. WAL mode is still the recommended
|
|
350
|
+
/// default (concurrent readers, one writer, efficient fsync), but
|
|
351
|
+
/// callers who know what they're doing can run honker tables on a
|
|
352
|
+
/// DELETE-journal database. Cross-process wake is their responsibility.
|
|
353
|
+
pub fn bootstrap_honker_schema(conn: &Connection) -> Result<(), Error> {
|
|
354
|
+
conn.execute_batch(BOOTSTRAP_HONKER_SQL)?;
|
|
355
|
+
// Migration: pre-Mantle databases lack `enabled` on
|
|
356
|
+
// _honker_scheduler_tasks. ADD COLUMN if absent.
|
|
357
|
+
//
|
|
358
|
+
// Race: two processes bootstrapping concurrently could both see
|
|
359
|
+
// "missing" and both attempt the ALTER. SQLite serializes writes
|
|
360
|
+
// file-wide, so they don't actually run at once — the second
|
|
361
|
+
// ALTER errors with "duplicate column" because by then the first
|
|
362
|
+
// has committed. Swallow that specific error; bubble anything else.
|
|
363
|
+
let has_enabled: bool = {
|
|
364
|
+
let mut stmt = conn.prepare(
|
|
365
|
+
"SELECT 1 FROM pragma_table_info('_honker_scheduler_tasks') WHERE name='enabled'",
|
|
366
|
+
)?;
|
|
367
|
+
stmt.query_row([], |_| Ok(true)).unwrap_or(false)
|
|
368
|
+
};
|
|
369
|
+
if !has_enabled {
|
|
370
|
+
match conn.execute(
|
|
371
|
+
"ALTER TABLE _honker_scheduler_tasks ADD COLUMN enabled INTEGER NOT NULL DEFAULT 1",
|
|
372
|
+
[],
|
|
373
|
+
) {
|
|
374
|
+
Ok(_) => {}
|
|
375
|
+
Err(e) if e.to_string().to_lowercase().contains("duplicate column") => {
|
|
376
|
+
// Lost the race; the other process added it. Fine.
|
|
377
|
+
}
|
|
378
|
+
Err(e) => return Err(e.into()),
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
Ok(())
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// ---------------------------------------------------------------------
|
|
385
|
+
// Opening connections
|
|
386
|
+
// ---------------------------------------------------------------------
|
|
387
|
+
|
|
388
|
+
/// Open a SQLite connection at `path` with the library's PRAGMA
|
|
389
|
+
/// defaults. If `install_notify` is true, also attach the notifications
|
|
390
|
+
/// table + `notify()` SQL function. Readers don't need it; only the
|
|
391
|
+
/// writer connection does.
|
|
392
|
+
pub fn open_conn(path: &str, install_notify: bool) -> Result<Connection, Error> {
|
|
393
|
+
let conn = Connection::open_with_flags(
|
|
394
|
+
path,
|
|
395
|
+
OpenFlags::SQLITE_OPEN_READ_WRITE
|
|
396
|
+
| OpenFlags::SQLITE_OPEN_CREATE
|
|
397
|
+
| OpenFlags::SQLITE_OPEN_URI,
|
|
398
|
+
)?;
|
|
399
|
+
apply_default_pragmas(&conn)?;
|
|
400
|
+
if install_notify {
|
|
401
|
+
attach_notify(&conn)?;
|
|
402
|
+
}
|
|
403
|
+
Ok(conn)
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// ---------------------------------------------------------------------
|
|
407
|
+
// Writer slot
|
|
408
|
+
// ---------------------------------------------------------------------
|
|
409
|
+
|
|
410
|
+
/// Single-connection write slot. Writers serialize through one
|
|
411
|
+
/// rusqlite `Connection` because WAL mode allows only one writer at a
|
|
412
|
+
/// time anyway; doing it in user space avoids busy-timeout retries.
|
|
413
|
+
///
|
|
414
|
+
/// Provides explicit [`close`](Self::close) so bindings can release the
|
|
415
|
+
/// underlying SQLite handle independent of `Arc<Writer>` reference
|
|
416
|
+
/// count. Without this, an outstanding `Arc<Writer>` clone (kept alive
|
|
417
|
+
/// by a not-yet-GC'd Transaction object on the JS/Python side) would
|
|
418
|
+
/// keep the connection open and the `.db` file locked. On Windows that
|
|
419
|
+
/// blocks `rmdir`/`unlink` of the temp directory until GC runs; on
|
|
420
|
+
/// Linux/macOS the unlink succeeds but the file descriptor leaks.
|
|
421
|
+
pub struct Writer {
|
|
422
|
+
slot: Mutex<Option<Connection>>,
|
|
423
|
+
available: Condvar,
|
|
424
|
+
closed: AtomicBool,
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
impl Writer {
|
|
428
|
+
pub fn new(conn: Connection) -> Self {
|
|
429
|
+
Self {
|
|
430
|
+
slot: Mutex::new(Some(conn)),
|
|
431
|
+
available: Condvar::new(),
|
|
432
|
+
closed: AtomicBool::new(false),
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/// Blocking acquire. Waits on a condvar if the slot is held.
|
|
437
|
+
/// Returns `None` if the writer has been [closed](Self::close).
|
|
438
|
+
pub fn acquire(&self) -> Option<Connection> {
|
|
439
|
+
let mut guard = self.slot.lock();
|
|
440
|
+
loop {
|
|
441
|
+
if self.closed.load(Ordering::Acquire) {
|
|
442
|
+
return None;
|
|
443
|
+
}
|
|
444
|
+
if let Some(c) = guard.take() {
|
|
445
|
+
return Some(c);
|
|
446
|
+
}
|
|
447
|
+
self.available.wait(&mut guard);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
/// Non-blocking. Returns `Some(conn)` if the slot was immediately
|
|
452
|
+
/// free, else `None`. Bindings use this for a fast path that
|
|
453
|
+
/// avoids GIL release (Python) or async thread-hops (Node) when
|
|
454
|
+
/// the slot is uncontended. Also returns `None` if closed.
|
|
455
|
+
pub fn try_acquire(&self) -> Option<Connection> {
|
|
456
|
+
if self.closed.load(Ordering::Acquire) {
|
|
457
|
+
return None;
|
|
458
|
+
}
|
|
459
|
+
self.slot.lock().take()
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
/// Return a connection to the slot. After [close](Self::close), the
|
|
463
|
+
/// connection is dropped instead of being returned to the pool.
|
|
464
|
+
pub fn release(&self, conn: Connection) {
|
|
465
|
+
if self.closed.load(Ordering::Acquire) {
|
|
466
|
+
// Drop conn instead of returning it to a closed pool.
|
|
467
|
+
return;
|
|
468
|
+
}
|
|
469
|
+
let mut guard = self.slot.lock();
|
|
470
|
+
*guard = Some(conn);
|
|
471
|
+
self.available.notify_one();
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
/// Drop the underlying connection and refuse further acquisitions.
|
|
475
|
+
/// Idempotent. Wakes any blocked `acquire()` callers; they observe
|
|
476
|
+
/// the closed flag and return `None`.
|
|
477
|
+
///
|
|
478
|
+
/// If a transaction is currently holding the connection (i.e. the
|
|
479
|
+
/// slot is empty), it stays out — the transaction's eventual
|
|
480
|
+
/// `release` will see `closed == true` and drop the connection
|
|
481
|
+
/// itself. So the file handle is released either way; what
|
|
482
|
+
/// matters is that no further writes happen after `close`.
|
|
483
|
+
pub fn close(&self) {
|
|
484
|
+
self.closed.store(true, Ordering::Release);
|
|
485
|
+
let mut guard = self.slot.lock();
|
|
486
|
+
guard.take(); // drops the connection if the slot is occupied
|
|
487
|
+
self.available.notify_all();
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// ---------------------------------------------------------------------
|
|
492
|
+
// Reader pool
|
|
493
|
+
// ---------------------------------------------------------------------
|
|
494
|
+
|
|
495
|
+
/// Bounded pool of reader connections. Readers are cheap (one file
|
|
496
|
+
/// descriptor + a page cache) and WAL mode allows any number to run
|
|
497
|
+
/// concurrently with the writer.
|
|
498
|
+
///
|
|
499
|
+
/// Provides explicit [`close`](Self::close) for the same reason as
|
|
500
|
+
/// [`Writer::close`] — see that doc.
|
|
501
|
+
pub struct Readers {
|
|
502
|
+
pool: Mutex<Vec<Connection>>,
|
|
503
|
+
outstanding: Mutex<usize>,
|
|
504
|
+
available: Condvar,
|
|
505
|
+
path: String,
|
|
506
|
+
max: usize,
|
|
507
|
+
closed: AtomicBool,
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
impl Readers {
|
|
511
|
+
pub fn new(path: String, max: usize) -> Self {
|
|
512
|
+
Self {
|
|
513
|
+
pool: Mutex::new(Vec::new()),
|
|
514
|
+
outstanding: Mutex::new(0),
|
|
515
|
+
available: Condvar::new(),
|
|
516
|
+
path,
|
|
517
|
+
max: max.max(1),
|
|
518
|
+
closed: AtomicBool::new(false),
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
/// Acquire a reader. Pops a pooled one if available; otherwise
|
|
523
|
+
/// opens a new connection up to `max`. Above `max`, waits on the
|
|
524
|
+
/// condvar. After [`close`](Self::close), returns
|
|
525
|
+
/// `Err(rusqlite::Error::ExecuteReturnedResults)` as a sentinel —
|
|
526
|
+
/// bindings should map this to "Database is closed".
|
|
527
|
+
pub fn acquire(&self) -> Result<Connection, Error> {
|
|
528
|
+
loop {
|
|
529
|
+
if self.closed.load(Ordering::Acquire) {
|
|
530
|
+
return Err(closed_err());
|
|
531
|
+
}
|
|
532
|
+
let mut pool = self.pool.lock();
|
|
533
|
+
if let Some(c) = pool.pop() {
|
|
534
|
+
return Ok(c);
|
|
535
|
+
}
|
|
536
|
+
let mut out = self.outstanding.lock();
|
|
537
|
+
if *out < self.max {
|
|
538
|
+
*out += 1;
|
|
539
|
+
drop(out);
|
|
540
|
+
drop(pool);
|
|
541
|
+
let conn = open_conn(&self.path, false)?;
|
|
542
|
+
// Re-check: if close() raced us, drop the brand-new
|
|
543
|
+
// connection instead of handing it out.
|
|
544
|
+
if self.closed.load(Ordering::Acquire) {
|
|
545
|
+
drop(conn);
|
|
546
|
+
return Err(closed_err());
|
|
547
|
+
}
|
|
548
|
+
return Ok(conn);
|
|
549
|
+
}
|
|
550
|
+
drop(out);
|
|
551
|
+
self.available.wait(&mut pool);
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
/// Return a connection to the pool. After [close](Self::close), the
|
|
556
|
+
/// connection is dropped instead of pooled.
|
|
557
|
+
pub fn release(&self, conn: Connection) {
|
|
558
|
+
if self.closed.load(Ordering::Acquire) {
|
|
559
|
+
return;
|
|
560
|
+
}
|
|
561
|
+
let mut pool = self.pool.lock();
|
|
562
|
+
pool.push(conn);
|
|
563
|
+
self.available.notify_one();
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
/// Drop all pooled connections and refuse further acquisitions.
|
|
567
|
+
/// Idempotent. Wakes any blocked `acquire()` callers; they observe
|
|
568
|
+
/// the closed flag and return the closed sentinel.
|
|
569
|
+
pub fn close(&self) {
|
|
570
|
+
self.closed.store(true, Ordering::Release);
|
|
571
|
+
self.pool.lock().clear(); // drops pooled connections
|
|
572
|
+
self.available.notify_all();
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
/// Sentinel error for "pool closed". Bindings can match the inner
|
|
577
|
+
/// `rusqlite::Error::SqliteFailure` with code `SQLITE_MISUSE` and
|
|
578
|
+
/// message containing "Database is closed" to surface a clean error
|
|
579
|
+
/// to user code. `SQLITE_MISUSE` is appropriate here — calling
|
|
580
|
+
/// acquire on a closed pool is a misuse of the API.
|
|
581
|
+
fn closed_err() -> Error {
|
|
582
|
+
Error::Sqlite(rusqlite::Error::SqliteFailure(
|
|
583
|
+
rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_MISUSE),
|
|
584
|
+
Some("Database is closed".to_string()),
|
|
585
|
+
))
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
// ---------------------------------------------------------------------
|
|
589
|
+
// Database file watcher
|
|
590
|
+
// ---------------------------------------------------------------------
|
|
591
|
+
|
|
592
|
+
/// Platform-specific file identity: `(dev, ino)` on Unix,
|
|
593
|
+
/// `(volume_serial, file_index)` on Windows. Used to detect when the
|
|
594
|
+
/// database file has been replaced underneath us (atomic rename,
|
|
595
|
+
/// litestream restore, volume remount).
|
|
596
|
+
///
|
|
597
|
+
/// Uses the `file-id` crate on unix and windows for stable Rust
|
|
598
|
+
/// support without nightly features. Falls back to `(0, 0)` on other
|
|
599
|
+
/// targets (WASI, Redox, illumos, etc.) — same behavior as the
|
|
600
|
+
/// pre-`file-id` `#[cfg(not(any(unix, windows)))]` branch. On those
|
|
601
|
+
/// targets the dead-man's switch is a no-op (every `stat_identity`
|
|
602
|
+
/// returns `(0, 0)` so the equality check never trips); replacement
|
|
603
|
+
/// detection is disabled but the watcher still functions. Nobody is
|
|
604
|
+
/// known to deploy honker there today.
|
|
605
|
+
#[cfg(any(unix, windows))]
|
|
606
|
+
pub(crate) fn stat_identity(path: &Path) -> std::io::Result<(u64, u64)> {
|
|
607
|
+
let id = file_id::get_file_id(path)?;
|
|
608
|
+
match id {
|
|
609
|
+
file_id::FileId::Inode {
|
|
610
|
+
device_id,
|
|
611
|
+
inode_number,
|
|
612
|
+
} => Ok((device_id, inode_number)),
|
|
613
|
+
file_id::FileId::LowRes {
|
|
614
|
+
volume_serial_number,
|
|
615
|
+
file_index,
|
|
616
|
+
} => Ok((volume_serial_number as u64, file_index)),
|
|
617
|
+
file_id::FileId::HighRes {
|
|
618
|
+
volume_serial_number,
|
|
619
|
+
file_id,
|
|
620
|
+
} => Ok(fold_high_res(volume_serial_number, file_id)),
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
/// Fold a 128-bit ReFS / `FILE_ID_INFO` `file_id` into a 64-bit
|
|
625
|
+
/// identity that fits the `(u64, u64)` return type of
|
|
626
|
+
/// [`stat_identity`].
|
|
627
|
+
///
|
|
628
|
+
/// NTFS leaves the upper 64 bits at 0 so the result is just the lower
|
|
629
|
+
/// 64 bits — bit-for-bit equivalent to truncation. ReFS can populate
|
|
630
|
+
/// both halves; XOR-folding mixes the bits so we use both halves'
|
|
631
|
+
/// entropy for symmetry.
|
|
632
|
+
///
|
|
633
|
+
/// For the "did this file get atomically renamed?" detection that
|
|
634
|
+
/// `UpdateWatcher` uses, either truncation or XOR-fold works — ReFS
|
|
635
|
+
/// file_ids change wholesale on rename, so the lower 64 bits change
|
|
636
|
+
/// too. The practical collision probability is the same as
|
|
637
|
+
/// truncation (~2⁻⁶⁴) and acceptable for this use.
|
|
638
|
+
#[cfg(any(unix, windows))]
|
|
639
|
+
fn fold_high_res(volume_serial_number: u64, file_id: u128) -> (u64, u64) {
|
|
640
|
+
let file_index = ((file_id >> 64) as u64) ^ (file_id as u64);
|
|
641
|
+
(volume_serial_number, file_index)
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
#[cfg(not(any(unix, windows)))]
|
|
645
|
+
pub(crate) fn stat_identity(_path: &Path) -> std::io::Result<(u64, u64)> {
|
|
646
|
+
Ok((0, 0))
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
/// Read the pager's `data_version` counter via `PRAGMA data_version`.
|
|
650
|
+
/// Returns a monotonic u32 incremented on every commit by any
|
|
651
|
+
/// connection (and on checkpoint). Empirically verified to detect
|
|
652
|
+
/// cross-connection database updates on all SQLite versions tested.
|
|
653
|
+
/// Cost: ~3.5 µs/call = ~3.5 ms/sec at 1 kHz.
|
|
654
|
+
pub(crate) fn poll_data_version(conn: &Connection) -> Result<u32, String> {
|
|
655
|
+
conn.pragma_query_value(None, "data_version", |row| row.get(0))
|
|
656
|
+
.map_err(|e| e.to_string())
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
/// Returns true if `e` is a transient lock conflict (SQLITE_BUSY /
|
|
660
|
+
/// SQLITE_LOCKED). On non-WAL journal modes the writer holds an
|
|
661
|
+
/// exclusive lock during commit, so the watcher's PRAGMA frequently
|
|
662
|
+
/// races into one of these. Treat as "try again next tick", not as a
|
|
663
|
+
/// connection failure — dropping and re-opening would silently re-
|
|
664
|
+
/// baseline `last_version` and skip pending wakes.
|
|
665
|
+
fn is_transient_lock_error(e: &rusqlite::Error) -> bool {
|
|
666
|
+
matches!(
|
|
667
|
+
e,
|
|
668
|
+
rusqlite::Error::SqliteFailure(
|
|
669
|
+
ffi::Error {
|
|
670
|
+
code: ffi::ErrorCode::DatabaseBusy | ffi::ErrorCode::DatabaseLocked,
|
|
671
|
+
..
|
|
672
|
+
},
|
|
673
|
+
_,
|
|
674
|
+
)
|
|
675
|
+
)
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
/// Polling loop body shared by [`UpdateWatcher`] (polling backend) and
|
|
679
|
+
/// the fallback path inside [`kernel_watcher`] / [`shm_watcher`].
|
|
680
|
+
///
|
|
681
|
+
/// Three-layer defensive architecture:
|
|
682
|
+
///
|
|
683
|
+
/// 1. **Fast path (every 1 ms):** `PRAGMA data_version`. Compare the
|
|
684
|
+
/// integer to last seen value. Notify on change. (~3.5 µs/call.)
|
|
685
|
+
/// 2. **Error recovery (every 1 ms on failure):** If the query fails,
|
|
686
|
+
/// reconnect the SQLite connection and force one wake.
|
|
687
|
+
/// 3. **Identity check (about every 100 ms):** `stat(db_path)` to compare
|
|
688
|
+
/// `(dev, ino)`. If the file was replaced, panic with a clear
|
|
689
|
+
/// message — continuing would silently watch stale data.
|
|
690
|
+
pub(crate) fn run_poll_loop<F>(
|
|
691
|
+
db_path: PathBuf,
|
|
692
|
+
on_change: F,
|
|
693
|
+
stop: Arc<AtomicBool>,
|
|
694
|
+
ready: std::sync::mpsc::SyncSender<()>,
|
|
695
|
+
) where
|
|
696
|
+
F: Fn(),
|
|
697
|
+
{
|
|
698
|
+
let mut conn = match Connection::open_with_flags(
|
|
699
|
+
&db_path,
|
|
700
|
+
OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX,
|
|
701
|
+
) {
|
|
702
|
+
Ok(c) => Some(c),
|
|
703
|
+
Err(e) => {
|
|
704
|
+
eprintln!("honker: failed to open watcher connection: {e}");
|
|
705
|
+
None
|
|
706
|
+
}
|
|
707
|
+
};
|
|
708
|
+
let mut last_version = conn
|
|
709
|
+
.as_ref()
|
|
710
|
+
.and_then(|c| poll_data_version(c).ok())
|
|
711
|
+
.unwrap_or(0);
|
|
712
|
+
let initial_identity = match stat_identity(&db_path) {
|
|
713
|
+
Ok(id) => id,
|
|
714
|
+
Err(e) => {
|
|
715
|
+
eprintln!("honker: failed to stat database for identity check: {e}");
|
|
716
|
+
(0, 0)
|
|
717
|
+
}
|
|
718
|
+
};
|
|
719
|
+
// Wall-clock cadence: tick counting drifts on Windows where 1 ms
|
|
720
|
+
// sleeps round up to ~15 ms.
|
|
721
|
+
let mut next_identity_check = Instant::now() + UPDATE_WATCHER_IDENTITY_INTERVAL;
|
|
722
|
+
// Baseline captured; signal the spawner that it's safe to return.
|
|
723
|
+
let _ = ready.send(());
|
|
724
|
+
drop(ready);
|
|
725
|
+
|
|
726
|
+
while !stop.load(Ordering::Acquire) {
|
|
727
|
+
std::thread::sleep(Duration::from_millis(1));
|
|
728
|
+
|
|
729
|
+
// Path 1: PRAGMA data_version (fast path)
|
|
730
|
+
if let Some(ref c) = conn {
|
|
731
|
+
match c.pragma_query_value(None, "data_version", |row| row.get::<_, u32>(0)) {
|
|
732
|
+
Ok(version) => {
|
|
733
|
+
if version != last_version {
|
|
734
|
+
last_version = version;
|
|
735
|
+
on_change();
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
Err(e) if is_transient_lock_error(&e) => {
|
|
739
|
+
// Writer holds the db lock (mid-commit on a
|
|
740
|
+
// non-WAL journal mode). Don't drop the connection
|
|
741
|
+
// — that would silently re-baseline last_version
|
|
742
|
+
// and skip pending wakes. Just retry next tick.
|
|
743
|
+
}
|
|
744
|
+
Err(e) => {
|
|
745
|
+
eprintln!("honker: data_version poll failed: {e}");
|
|
746
|
+
conn = None;
|
|
747
|
+
on_change(); // conservative wake
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
} else {
|
|
751
|
+
// Path 2: reconnect after transient failure
|
|
752
|
+
match Connection::open_with_flags(
|
|
753
|
+
&db_path,
|
|
754
|
+
OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX,
|
|
755
|
+
) {
|
|
756
|
+
Ok(c) => {
|
|
757
|
+
last_version = poll_data_version(&c).unwrap_or(0);
|
|
758
|
+
conn = Some(c);
|
|
759
|
+
}
|
|
760
|
+
Err(e) => {
|
|
761
|
+
eprintln!("honker: reconnect failed: {e}");
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
// Path 3: dead-man's switch — panic if db inode changed
|
|
767
|
+
// (atomic rename, litestream restore, volume remount, NFS).
|
|
768
|
+
// Effectively a no-op on Windows: the kernel rejects
|
|
769
|
+
// rename-over-open files.
|
|
770
|
+
let now = Instant::now();
|
|
771
|
+
if now >= next_identity_check {
|
|
772
|
+
next_identity_check = now + UPDATE_WATCHER_IDENTITY_INTERVAL;
|
|
773
|
+
match stat_identity(&db_path) {
|
|
774
|
+
Ok(current) => {
|
|
775
|
+
if current != initial_identity {
|
|
776
|
+
panic!(
|
|
777
|
+
"honker: database file replaced: \
|
|
778
|
+
expected (dev={}, ino={}), \
|
|
779
|
+
found (dev={}, ino={}) at {:?}. \
|
|
780
|
+
The watcher cannot recover; \
|
|
781
|
+
close the Database and reopen with honker.open().",
|
|
782
|
+
initial_identity.0, initial_identity.1, current.0, current.1, db_path
|
|
783
|
+
);
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
Err(e) => {
|
|
787
|
+
eprintln!("honker: stat identity check failed: {e}");
|
|
788
|
+
conn = None;
|
|
789
|
+
on_change();
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
/// Background thread that polls a SQLite database file for changes.
|
|
797
|
+
/// Dispatches to the backend selected in [`WatcherConfig`].
|
|
798
|
+
/// See [`run_poll_loop`] for the default polling backend's architecture.
|
|
799
|
+
pub struct UpdateWatcher {
|
|
800
|
+
stop: Arc<AtomicBool>,
|
|
801
|
+
handle: Option<std::thread::JoinHandle<()>>,
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
const UPDATE_WATCHER_IDENTITY_INTERVAL: Duration = Duration::from_millis(100);
|
|
805
|
+
|
|
806
|
+
impl UpdateWatcher {
|
|
807
|
+
/// Spawn a watcher thread on `db_path` using the default polling
|
|
808
|
+
/// backend. `on_change` is called once per observed commit. The
|
|
809
|
+
/// thread runs until [`UpdateWatcher`] is dropped or
|
|
810
|
+
/// [`stop`](Self::stop) is called.
|
|
811
|
+
pub fn spawn<F>(db_path: PathBuf, on_change: F) -> Self
|
|
812
|
+
where
|
|
813
|
+
F: Fn() + Send + 'static,
|
|
814
|
+
{
|
|
815
|
+
Self::spawn_with_config(db_path, on_change, WatcherConfig::default())
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
/// Like [`spawn`](Self::spawn) but with an explicit watcher backend.
|
|
819
|
+
/// The optional `KernelWatch` and `ShmFastPath` backends are
|
|
820
|
+
/// experimental — see [`WatcherBackend`] for the safety contracts.
|
|
821
|
+
pub fn spawn_with_config<F>(db_path: PathBuf, on_change: F, config: WatcherConfig) -> Self
|
|
822
|
+
where
|
|
823
|
+
F: Fn() + Send + 'static,
|
|
824
|
+
{
|
|
825
|
+
let stop = Arc::new(AtomicBool::new(false));
|
|
826
|
+
let stop_t = stop.clone();
|
|
827
|
+
// The thread signals `ready` once it has captured its baseline
|
|
828
|
+
// (initial inode for the dead-man's switch, initial iChange for
|
|
829
|
+
// shm, etc.). spawn_with_config blocks on `ready` so the caller
|
|
830
|
+
// can do anything that mutates the file (rename, write) right
|
|
831
|
+
// after spawn without racing the baseline capture. If the
|
|
832
|
+
// thread fails to init, the sender drops and recv() returns
|
|
833
|
+
// Err — we still return so the caller can use the (no-op)
|
|
834
|
+
// watcher; the eprintln from the backend explains the failure.
|
|
835
|
+
let (ready_tx, ready_rx) = std::sync::mpsc::sync_channel::<()>(1);
|
|
836
|
+
let handle = std::thread::Builder::new()
|
|
837
|
+
.name("honker-update-poll".into())
|
|
838
|
+
.spawn(move || match config.backend {
|
|
839
|
+
WatcherBackend::Polling => run_poll_loop(db_path, on_change, stop_t, ready_tx),
|
|
840
|
+
#[cfg(feature = "kernel-watcher")]
|
|
841
|
+
WatcherBackend::KernelWatch => {
|
|
842
|
+
kernel_watcher::run_kernel_watch_loop(db_path, on_change, stop_t, ready_tx);
|
|
843
|
+
}
|
|
844
|
+
#[cfg(feature = "shm-fast-path")]
|
|
845
|
+
WatcherBackend::ShmFastPath => {
|
|
846
|
+
shm_watcher::run_shm_fast_path_loop(db_path, on_change, stop_t, ready_tx);
|
|
847
|
+
}
|
|
848
|
+
})
|
|
849
|
+
.expect("spawn update-poll thread");
|
|
850
|
+
let _ = ready_rx.recv();
|
|
851
|
+
Self {
|
|
852
|
+
stop,
|
|
853
|
+
handle: Some(handle),
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
/// Request the watcher thread to stop. Idempotent. Dropping the
|
|
858
|
+
/// `UpdateWatcher` also stops the thread.
|
|
859
|
+
pub fn stop(&self) {
|
|
860
|
+
self.stop.store(true, Ordering::Release);
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
/// Stop the watcher and wait for the thread to exit. Returns the
|
|
864
|
+
/// thread's result — `Ok(())` on clean shutdown, `Err(payload)`
|
|
865
|
+
/// if the thread panicked (e.g. the dead-man's switch detected
|
|
866
|
+
/// file replacement). Consumes `self` so the watcher can't be
|
|
867
|
+
/// used after joining.
|
|
868
|
+
pub fn join(mut self) -> std::thread::Result<()> {
|
|
869
|
+
self.stop();
|
|
870
|
+
match self.handle.take() {
|
|
871
|
+
Some(h) => h.join(),
|
|
872
|
+
None => Ok(()),
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
impl Drop for UpdateWatcher {
|
|
878
|
+
fn drop(&mut self) {
|
|
879
|
+
self.stop();
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
// ---------------------------------------------------------------------
|
|
884
|
+
// Shared update watcher (one thread per Database, N subscribers)
|
|
885
|
+
// ---------------------------------------------------------------------
|
|
886
|
+
|
|
887
|
+
/// Shared database-file watcher: one PRAGMA-poll thread per database
|
|
888
|
+
/// path, N subscribers. Each [`subscribe`](Self::subscribe) returns a
|
|
889
|
+
/// fresh `Receiver<()>` that sees a tick on every observed commit.
|
|
890
|
+
///
|
|
891
|
+
/// Previously every call to `db.update_events()` spawned its own
|
|
892
|
+
/// update watcher thread, so N listeners in one process meant N threads
|
|
893
|
+
/// hammering `stat(2)` on the same file at 1 ms cadence. A web
|
|
894
|
+
/// process with 100 active SSE subscribers was doing ~200k stat
|
|
895
|
+
/// syscalls/sec against one file. Now a single shared thread
|
|
896
|
+
/// services all subscribers — 1 ms cadence, same kernel cost
|
|
897
|
+
/// regardless of N.
|
|
898
|
+
///
|
|
899
|
+
/// Subscriber channels are bounded; on overflow, additional ticks for
|
|
900
|
+
/// that subscriber are dropped. Wakes are idempotent signals — the
|
|
901
|
+
/// consumer re-reads state from SQLite on each wake — so dropping
|
|
902
|
+
/// during backpressure is safe. A disconnected subscriber (receiver
|
|
903
|
+
/// dropped) gets pruned on the next wake via `TrySendError::Disconnected`.
|
|
904
|
+
/// Lives in the watcher thread's closure. Closure drops on clean exit
|
|
905
|
+
/// or panic; this Drop clears every subscriber's sender so their next
|
|
906
|
+
/// `recv()` returns Err. Without it, a panicking watcher leaves
|
|
907
|
+
/// subscribers blocking forever.
|
|
908
|
+
struct WatcherDeathGuard {
|
|
909
|
+
senders: Arc<Mutex<HashMap<u64, SyncSender<()>>>>,
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
impl Drop for WatcherDeathGuard {
|
|
913
|
+
fn drop(&mut self) {
|
|
914
|
+
self.senders.lock().clear();
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
pub struct SharedUpdateWatcher {
|
|
919
|
+
/// Hold the underlying poll thread alive. Dropping or
|
|
920
|
+
/// [`close`](Self::close)ing this stops it. Wrapped in
|
|
921
|
+
/// `Mutex<Option<...>>` so `close()` can take the watcher out and
|
|
922
|
+
/// `join()` it synchronously — required to release the watcher's
|
|
923
|
+
/// read-only `Connection` before a `db.close()` consumer tries to
|
|
924
|
+
/// `unlink` the database file (Windows: `EBUSY` until every
|
|
925
|
+
/// handle is dropped).
|
|
926
|
+
watcher: Mutex<Option<UpdateWatcher>>,
|
|
927
|
+
/// Shared with the watcher closure so it can fan out to every
|
|
928
|
+
/// subscriber and prune disconnected ones opportunistically.
|
|
929
|
+
senders: Arc<Mutex<HashMap<u64, SyncSender<()>>>>,
|
|
930
|
+
next_id: AtomicU64,
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
impl SharedUpdateWatcher {
|
|
934
|
+
/// Spawn the shared poll thread for `db_path` using the default
|
|
935
|
+
/// polling backend.
|
|
936
|
+
pub fn new(db_path: PathBuf) -> Self {
|
|
937
|
+
Self::new_with_config(db_path, WatcherConfig::default())
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
/// Like [`new`](Self::new) but with an explicit watcher backend.
|
|
941
|
+
pub fn new_with_config(db_path: PathBuf, config: WatcherConfig) -> Self {
|
|
942
|
+
let senders: Arc<Mutex<HashMap<u64, SyncSender<()>>>> =
|
|
943
|
+
Arc::new(Mutex::new(HashMap::new()));
|
|
944
|
+
let senders_t = senders.clone();
|
|
945
|
+
// Watcher thread exits → closure drops → this guard drops →
|
|
946
|
+
// every subscriber's sender is cleared. Their next `recv()`
|
|
947
|
+
// returns Err instead of blocking forever. Subscribers learn
|
|
948
|
+
// the watcher died programmatically, not via stderr.
|
|
949
|
+
let death_guard = WatcherDeathGuard {
|
|
950
|
+
senders: senders.clone(),
|
|
951
|
+
};
|
|
952
|
+
let watcher = UpdateWatcher::spawn_with_config(
|
|
953
|
+
db_path,
|
|
954
|
+
move || {
|
|
955
|
+
let _ = &death_guard;
|
|
956
|
+
let mut list = senders_t.lock();
|
|
957
|
+
list.retain(|_id, s| match s.try_send(()) {
|
|
958
|
+
Ok(()) | Err(TrySendError::Full(_)) => true,
|
|
959
|
+
Err(TrySendError::Disconnected(_)) => false,
|
|
960
|
+
});
|
|
961
|
+
},
|
|
962
|
+
config,
|
|
963
|
+
);
|
|
964
|
+
Self {
|
|
965
|
+
watcher: Mutex::new(Some(watcher)),
|
|
966
|
+
senders,
|
|
967
|
+
next_id: AtomicU64::new(0),
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
/// Subscribe. Returns a subscriber id and a [`Receiver<()>`] that
|
|
972
|
+
/// sees one tick per observed database update. Callers MUST
|
|
973
|
+
/// [`unsubscribe`](Self::unsubscribe) the returned id when done —
|
|
974
|
+
/// otherwise the sender stays in the map and a bridge thread
|
|
975
|
+
/// blocking on `recv()` will never see a disconnect.
|
|
976
|
+
///
|
|
977
|
+
/// Channel capacity is 1: bursts coalesce into one wake per drain
|
|
978
|
+
/// cycle. Wakes are "go re-read state" signals — the consumer's
|
|
979
|
+
/// SQL query reads current state regardless of how many wakes
|
|
980
|
+
/// were dropped, so dropped redundant wakes never cost data, only
|
|
981
|
+
/// signal redundancy. The kernel-watcher backend in particular
|
|
982
|
+
/// fires one event per filesystem write (multiple per commit);
|
|
983
|
+
/// without coalescing, consumers would run N redundant queries
|
|
984
|
+
/// per commit burst. With cap=1 they run ~1.
|
|
985
|
+
pub fn subscribe(&self) -> (u64, std::sync::mpsc::Receiver<()>) {
|
|
986
|
+
let id = self.next_id.fetch_add(1, Ordering::Relaxed);
|
|
987
|
+
let (tx, rx) = std::sync::mpsc::sync_channel(1);
|
|
988
|
+
self.senders.lock().insert(id, tx);
|
|
989
|
+
(id, rx)
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
/// Remove a subscriber. The corresponding receiver sees
|
|
993
|
+
/// `Err(RecvError)` on its next blocking `recv()`, letting a
|
|
994
|
+
/// bridge thread exit cleanly.
|
|
995
|
+
pub fn unsubscribe(&self, id: u64) {
|
|
996
|
+
self.senders.lock().remove(&id);
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
/// Current subscriber count. Test/introspection helper.
|
|
1000
|
+
pub fn subscriber_count(&self) -> usize {
|
|
1001
|
+
self.senders.lock().len()
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
/// Disconnect all subscribers and synchronously join the poll
|
|
1005
|
+
/// thread. The thread owns the watcher's read-only `Connection`;
|
|
1006
|
+
/// joining drops that connection and releases the file handle.
|
|
1007
|
+
/// Idempotent — safe to call more than once.
|
|
1008
|
+
pub fn close(&self) -> std::thread::Result<()> {
|
|
1009
|
+
self.senders.lock().clear();
|
|
1010
|
+
match self.watcher.lock().take() {
|
|
1011
|
+
Some(watcher) => watcher.join(),
|
|
1012
|
+
None => Ok(()),
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
impl Drop for SharedUpdateWatcher {
|
|
1018
|
+
fn drop(&mut self) {
|
|
1019
|
+
// Best-effort: signal stop. We don't synchronously join here
|
|
1020
|
+
// because Drop runs from arbitrary contexts (including async
|
|
1021
|
+
// executors) where blocking on a thread join is unsafe.
|
|
1022
|
+
// Bindings that need a synchronous release should call
|
|
1023
|
+
// `close()` explicitly.
|
|
1024
|
+
self.senders.lock().clear();
|
|
1025
|
+
if let Some(watcher) = self.watcher.get_mut().take() {
|
|
1026
|
+
// Dropping UpdateWatcher signals stop; the thread exits
|
|
1027
|
+
// shortly after and its Connection drops then.
|
|
1028
|
+
drop(watcher);
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
// ---------------------------------------------------------------------
|
|
1034
|
+
// Tests
|
|
1035
|
+
// ---------------------------------------------------------------------
|
|
1036
|
+
|
|
1037
|
+
#[cfg(test)]
|
|
1038
|
+
mod tests {
|
|
1039
|
+
use super::*;
|
|
1040
|
+
use rusqlite::Connection;
|
|
1041
|
+
use std::collections::HashSet;
|
|
1042
|
+
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
1043
|
+
use std::sync::{Arc, Barrier};
|
|
1044
|
+
|
|
1045
|
+
fn mem() -> Connection {
|
|
1046
|
+
Connection::open_in_memory().unwrap()
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
fn temp_db(name: &str) -> PathBuf {
|
|
1050
|
+
let p = std::env::temp_dir().join(format!(
|
|
1051
|
+
"honker-{name}-{}-{:?}.db",
|
|
1052
|
+
std::process::id(),
|
|
1053
|
+
std::thread::current().id()
|
|
1054
|
+
));
|
|
1055
|
+
let _ = std::fs::remove_file(&p);
|
|
1056
|
+
let _ = std::fs::remove_file(format!("{}-wal", p.display()));
|
|
1057
|
+
let _ = std::fs::remove_file(format!("{}-shm", p.display()));
|
|
1058
|
+
p
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
fn open_core_test_conn(path: &Path) -> Connection {
|
|
1062
|
+
let conn = open_conn(path.to_str().unwrap(), true).unwrap();
|
|
1063
|
+
attach_honker_functions(&conn).unwrap();
|
|
1064
|
+
conn.query_row("SELECT honker_bootstrap()", [], |_| Ok(()))
|
|
1065
|
+
.unwrap();
|
|
1066
|
+
conn
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
#[test]
|
|
1070
|
+
fn core_sql_functions_survive_concurrent_queue_stream_notify_pressure() {
|
|
1071
|
+
let path = temp_db("core-pressure");
|
|
1072
|
+
let producer_count = 4usize;
|
|
1073
|
+
let jobs_per_producer = 75usize;
|
|
1074
|
+
let worker_count = 6usize;
|
|
1075
|
+
let total_jobs = producer_count * jobs_per_producer;
|
|
1076
|
+
|
|
1077
|
+
{
|
|
1078
|
+
let conn = open_core_test_conn(&path);
|
|
1079
|
+
let mode: String = conn
|
|
1080
|
+
.pragma_query_value(None, "journal_mode", |r| r.get(0))
|
|
1081
|
+
.unwrap();
|
|
1082
|
+
assert_eq!(mode.to_ascii_uppercase(), "WAL");
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
let start = Arc::new(Barrier::new(producer_count + worker_count));
|
|
1086
|
+
let producers_done = Arc::new(AtomicUsize::new(0));
|
|
1087
|
+
let processed = Arc::new(Mutex::new(Vec::<(i64, String)>::new()));
|
|
1088
|
+
let mut handles = Vec::new();
|
|
1089
|
+
|
|
1090
|
+
for producer in 0..producer_count {
|
|
1091
|
+
let path = path.clone();
|
|
1092
|
+
let start = start.clone();
|
|
1093
|
+
let producers_done = producers_done.clone();
|
|
1094
|
+
handles.push(std::thread::spawn(move || {
|
|
1095
|
+
let conn = open_core_test_conn(&path);
|
|
1096
|
+
start.wait();
|
|
1097
|
+
for seq in 0..jobs_per_producer {
|
|
1098
|
+
let key = format!("p{producer}-{seq:03}");
|
|
1099
|
+
let payload = format!(r#"{{"producer":{producer},"seq":{seq},"key":"{key}"}}"#);
|
|
1100
|
+
conn.query_row(
|
|
1101
|
+
"SELECT honker_enqueue('pressure', ?1, NULL, NULL, ?2, 3, NULL)",
|
|
1102
|
+
rusqlite::params![payload, (seq % 7) as i64],
|
|
1103
|
+
|r| r.get::<_, i64>(0),
|
|
1104
|
+
)
|
|
1105
|
+
.unwrap();
|
|
1106
|
+
conn.query_row(
|
|
1107
|
+
"SELECT honker_stream_publish('pressure-events', ?1, ?2)",
|
|
1108
|
+
rusqlite::params![key, payload],
|
|
1109
|
+
|r| r.get::<_, i64>(0),
|
|
1110
|
+
)
|
|
1111
|
+
.unwrap();
|
|
1112
|
+
conn.query_row(
|
|
1113
|
+
"SELECT notify('pressure-note', ?1)",
|
|
1114
|
+
rusqlite::params![format!(r#"{{"key":"{key}"}}"#)],
|
|
1115
|
+
|r| r.get::<_, i64>(0),
|
|
1116
|
+
)
|
|
1117
|
+
.unwrap();
|
|
1118
|
+
if seq % 11 == 0 {
|
|
1119
|
+
std::thread::sleep(Duration::from_millis(1));
|
|
1120
|
+
}
|
|
1121
|
+
}
|
|
1122
|
+
producers_done.fetch_add(1, Ordering::Release);
|
|
1123
|
+
}));
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
for worker in 0..worker_count {
|
|
1127
|
+
let path = path.clone();
|
|
1128
|
+
let start = start.clone();
|
|
1129
|
+
let producers_done = producers_done.clone();
|
|
1130
|
+
let processed = processed.clone();
|
|
1131
|
+
handles.push(std::thread::spawn(move || {
|
|
1132
|
+
let conn = open_core_test_conn(&path);
|
|
1133
|
+
start.wait();
|
|
1134
|
+
let worker_id = format!("core-worker-{worker}");
|
|
1135
|
+
let deadline = Instant::now() + Duration::from_secs(20);
|
|
1136
|
+
let mut idle_since: Option<Instant> = None;
|
|
1137
|
+
loop {
|
|
1138
|
+
assert!(Instant::now() < deadline, "{worker_id} timed out");
|
|
1139
|
+
let rows_json: String = conn
|
|
1140
|
+
.query_row(
|
|
1141
|
+
"SELECT honker_claim_batch('pressure', ?1, 7, 30)",
|
|
1142
|
+
rusqlite::params![worker_id],
|
|
1143
|
+
|r| r.get(0),
|
|
1144
|
+
)
|
|
1145
|
+
.unwrap();
|
|
1146
|
+
let mut stmt = conn
|
|
1147
|
+
.prepare(
|
|
1148
|
+
"SELECT
|
|
1149
|
+
json_extract(value, '$.id'),
|
|
1150
|
+
json_extract(json_extract(value, '$.payload'), '$.key')
|
|
1151
|
+
FROM json_each(?1)",
|
|
1152
|
+
)
|
|
1153
|
+
.unwrap();
|
|
1154
|
+
let claimed = stmt
|
|
1155
|
+
.query_map(rusqlite::params![rows_json], |r| {
|
|
1156
|
+
Ok((r.get::<_, i64>(0)?, r.get::<_, String>(1)?))
|
|
1157
|
+
})
|
|
1158
|
+
.unwrap()
|
|
1159
|
+
.collect::<Result<Vec<_>, _>>()
|
|
1160
|
+
.unwrap();
|
|
1161
|
+
|
|
1162
|
+
if claimed.is_empty() {
|
|
1163
|
+
if producers_done.load(Ordering::Acquire) == producer_count {
|
|
1164
|
+
match idle_since {
|
|
1165
|
+
Some(t) if t.elapsed() >= Duration::from_millis(500) => break,
|
|
1166
|
+
Some(_) => {}
|
|
1167
|
+
None => idle_since = Some(Instant::now()),
|
|
1168
|
+
}
|
|
1169
|
+
}
|
|
1170
|
+
std::thread::sleep(Duration::from_millis(5));
|
|
1171
|
+
continue;
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
idle_since = None;
|
|
1175
|
+
let ids_json = format!(
|
|
1176
|
+
"[{}]",
|
|
1177
|
+
claimed
|
|
1178
|
+
.iter()
|
|
1179
|
+
.map(|(id, _)| id.to_string())
|
|
1180
|
+
.collect::<Vec<_>>()
|
|
1181
|
+
.join(",")
|
|
1182
|
+
);
|
|
1183
|
+
let acked: i64 = conn
|
|
1184
|
+
.query_row(
|
|
1185
|
+
"SELECT honker_ack_batch(?1, ?2)",
|
|
1186
|
+
rusqlite::params![ids_json, worker_id],
|
|
1187
|
+
|r| r.get(0),
|
|
1188
|
+
)
|
|
1189
|
+
.unwrap();
|
|
1190
|
+
assert_eq!(acked as usize, claimed.len());
|
|
1191
|
+
processed.lock().extend(claimed);
|
|
1192
|
+
std::thread::sleep(Duration::from_millis(2));
|
|
1193
|
+
}
|
|
1194
|
+
}));
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
for handle in handles {
|
|
1198
|
+
handle.join().unwrap();
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
let processed = processed.lock();
|
|
1202
|
+
assert_eq!(processed.len(), total_jobs);
|
|
1203
|
+
let unique_ids: HashSet<i64> = processed.iter().map(|(id, _)| *id).collect();
|
|
1204
|
+
assert_eq!(unique_ids.len(), total_jobs, "job id claimed twice");
|
|
1205
|
+
let unique_keys: HashSet<String> = processed.iter().map(|(_, k)| k.clone()).collect();
|
|
1206
|
+
assert_eq!(unique_keys.len(), total_jobs, "logical key claimed twice");
|
|
1207
|
+
for producer in 0..producer_count {
|
|
1208
|
+
for seq in 0..jobs_per_producer {
|
|
1209
|
+
assert!(
|
|
1210
|
+
unique_keys.contains(&format!("p{producer}-{seq:03}")),
|
|
1211
|
+
"missing key p{producer}-{seq:03}"
|
|
1212
|
+
);
|
|
1213
|
+
}
|
|
1214
|
+
}
|
|
1215
|
+
drop(processed);
|
|
1216
|
+
|
|
1217
|
+
let conn = open_core_test_conn(&path);
|
|
1218
|
+
let live: i64 = conn
|
|
1219
|
+
.query_row(
|
|
1220
|
+
"SELECT COUNT(*) FROM _honker_live WHERE queue='pressure'",
|
|
1221
|
+
[],
|
|
1222
|
+
|r| r.get(0),
|
|
1223
|
+
)
|
|
1224
|
+
.unwrap();
|
|
1225
|
+
let dead: i64 = conn
|
|
1226
|
+
.query_row(
|
|
1227
|
+
"SELECT COUNT(*) FROM _honker_dead WHERE queue='pressure'",
|
|
1228
|
+
[],
|
|
1229
|
+
|r| r.get(0),
|
|
1230
|
+
)
|
|
1231
|
+
.unwrap();
|
|
1232
|
+
let stream_rows: i64 = conn
|
|
1233
|
+
.query_row(
|
|
1234
|
+
"SELECT COUNT(*) FROM _honker_stream WHERE topic='pressure-events'",
|
|
1235
|
+
[],
|
|
1236
|
+
|r| r.get(0),
|
|
1237
|
+
)
|
|
1238
|
+
.unwrap();
|
|
1239
|
+
let notes: i64 = conn
|
|
1240
|
+
.query_row(
|
|
1241
|
+
"SELECT COUNT(*) FROM _honker_notifications WHERE channel='pressure-note'",
|
|
1242
|
+
[],
|
|
1243
|
+
|r| r.get(0),
|
|
1244
|
+
)
|
|
1245
|
+
.unwrap();
|
|
1246
|
+
let enqueue_wakes: i64 = conn
|
|
1247
|
+
.query_row(
|
|
1248
|
+
"SELECT COUNT(*) FROM _honker_notifications WHERE channel='honker:pressure'",
|
|
1249
|
+
[],
|
|
1250
|
+
|r| r.get(0),
|
|
1251
|
+
)
|
|
1252
|
+
.unwrap();
|
|
1253
|
+
let integrity: String = conn
|
|
1254
|
+
.query_row("PRAGMA integrity_check", [], |r| r.get(0))
|
|
1255
|
+
.unwrap();
|
|
1256
|
+
assert_eq!(live, 0);
|
|
1257
|
+
assert_eq!(dead, 0);
|
|
1258
|
+
assert_eq!(stream_rows as usize, total_jobs);
|
|
1259
|
+
assert_eq!(notes as usize, total_jobs);
|
|
1260
|
+
assert_eq!(enqueue_wakes as usize, total_jobs);
|
|
1261
|
+
assert_eq!(integrity, "ok");
|
|
1262
|
+
|
|
1263
|
+
drop(conn);
|
|
1264
|
+
let _ = std::fs::remove_file(&path);
|
|
1265
|
+
let _ = std::fs::remove_file(format!("{}-wal", path.display()));
|
|
1266
|
+
let _ = std::fs::remove_file(format!("{}-shm", path.display()));
|
|
1267
|
+
}
|
|
1268
|
+
|
|
1269
|
+
#[test]
|
|
1270
|
+
fn notify_inserts_row() {
|
|
1271
|
+
let conn = mem();
|
|
1272
|
+
attach_notify(&conn).unwrap();
|
|
1273
|
+
conn.execute_batch("BEGIN IMMEDIATE;").unwrap();
|
|
1274
|
+
conn.query_row("SELECT notify('orders', 'new')", [], |_| Ok(()))
|
|
1275
|
+
.unwrap();
|
|
1276
|
+
conn.execute_batch("COMMIT;").unwrap();
|
|
1277
|
+
|
|
1278
|
+
let n: i64 = conn
|
|
1279
|
+
.query_row(
|
|
1280
|
+
"SELECT COUNT(*) FROM _honker_notifications WHERE channel='orders'",
|
|
1281
|
+
[],
|
|
1282
|
+
|row| row.get(0),
|
|
1283
|
+
)
|
|
1284
|
+
.unwrap();
|
|
1285
|
+
assert_eq!(n, 1);
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
#[test]
|
|
1289
|
+
fn rollback_drops_notification() {
|
|
1290
|
+
let conn = mem();
|
|
1291
|
+
attach_notify(&conn).unwrap();
|
|
1292
|
+
conn.execute_batch("BEGIN IMMEDIATE;").unwrap();
|
|
1293
|
+
conn.query_row("SELECT notify('x', 'y')", [], |_| Ok(()))
|
|
1294
|
+
.unwrap();
|
|
1295
|
+
conn.execute_batch("ROLLBACK;").unwrap();
|
|
1296
|
+
|
|
1297
|
+
let n: i64 = conn
|
|
1298
|
+
.query_row("SELECT COUNT(*) FROM _honker_notifications", [], |row| {
|
|
1299
|
+
row.get(0)
|
|
1300
|
+
})
|
|
1301
|
+
.unwrap();
|
|
1302
|
+
assert_eq!(n, 0);
|
|
1303
|
+
}
|
|
1304
|
+
|
|
1305
|
+
#[test]
|
|
1306
|
+
fn writer_try_acquire_returns_none_when_held() {
|
|
1307
|
+
let w = Writer::new(Connection::open_in_memory().unwrap());
|
|
1308
|
+
let conn = w.acquire().expect("acquire on fresh Writer");
|
|
1309
|
+
assert!(w.try_acquire().is_none());
|
|
1310
|
+
w.release(conn);
|
|
1311
|
+
assert!(w.try_acquire().is_some());
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1314
|
+
#[test]
|
|
1315
|
+
fn writer_close_drops_idle_connection() {
|
|
1316
|
+
let w = Writer::new(Connection::open_in_memory().unwrap());
|
|
1317
|
+
// Slot is currently occupied (Some(conn)).
|
|
1318
|
+
w.close();
|
|
1319
|
+
// After close, acquire and try_acquire return None even though
|
|
1320
|
+
// a slot was free at close time — the connection was dropped.
|
|
1321
|
+
assert!(w.acquire().is_none());
|
|
1322
|
+
assert!(w.try_acquire().is_none());
|
|
1323
|
+
}
|
|
1324
|
+
|
|
1325
|
+
#[test]
|
|
1326
|
+
fn writer_close_drops_returned_connection() {
|
|
1327
|
+
let w = Writer::new(Connection::open_in_memory().unwrap());
|
|
1328
|
+
let conn = w.acquire().expect("acquire on fresh Writer");
|
|
1329
|
+
// Close while a transaction is "holding" the connection.
|
|
1330
|
+
w.close();
|
|
1331
|
+
// Releasing after close drops the connection (no-op into the
|
|
1332
|
+
// pool); acquire still returns None.
|
|
1333
|
+
w.release(conn);
|
|
1334
|
+
assert!(w.try_acquire().is_none());
|
|
1335
|
+
}
|
|
1336
|
+
|
|
1337
|
+
#[test]
|
|
1338
|
+
fn readers_close_returns_closed_err() {
|
|
1339
|
+
let tmp = std::env::temp_dir().join(format!("honker-readers-close-{}", std::process::id()));
|
|
1340
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1341
|
+
// Create the file so open_conn succeeds.
|
|
1342
|
+
Connection::open(&tmp)
|
|
1343
|
+
.unwrap()
|
|
1344
|
+
.execute_batch("PRAGMA journal_mode=WAL;")
|
|
1345
|
+
.unwrap();
|
|
1346
|
+
|
|
1347
|
+
let r = Readers::new(tmp.to_string_lossy().into_owned(), 4);
|
|
1348
|
+
// Acquire one to populate the pool indirectly via outstanding count.
|
|
1349
|
+
let c = r.acquire().expect("first acquire");
|
|
1350
|
+
r.release(c);
|
|
1351
|
+
r.close();
|
|
1352
|
+
// After close, acquire returns the closed sentinel.
|
|
1353
|
+
match r.acquire() {
|
|
1354
|
+
Err(Error::Sqlite(rusqlite::Error::SqliteFailure(_, Some(msg)))) => {
|
|
1355
|
+
assert!(msg.contains("Database is closed"));
|
|
1356
|
+
}
|
|
1357
|
+
other => panic!("expected closed err, got {other:?}"),
|
|
1358
|
+
}
|
|
1359
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1362
|
+
#[test]
|
|
1363
|
+
fn shared_update_watcher_fans_out_to_many_subscribers() {
|
|
1364
|
+
let tmp = std::env::temp_dir().join(format!("honker-shared-test-{}", std::process::id()));
|
|
1365
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1366
|
+
// Create a real SQLite database in WAL mode so the watcher can
|
|
1367
|
+
// open a read-only connection and poll data_version.
|
|
1368
|
+
{
|
|
1369
|
+
let conn = Connection::open(&tmp).unwrap();
|
|
1370
|
+
conn.execute_batch("PRAGMA journal_mode = WAL;").unwrap();
|
|
1371
|
+
}
|
|
1372
|
+
|
|
1373
|
+
let shared = SharedUpdateWatcher::new(tmp.clone());
|
|
1374
|
+
let subs: Vec<(u64, std::sync::mpsc::Receiver<()>)> =
|
|
1375
|
+
(0..50).map(|_| shared.subscribe()).collect();
|
|
1376
|
+
|
|
1377
|
+
// Open a separate writer connection to trigger commits.
|
|
1378
|
+
let writer = Connection::open(&tmp).unwrap();
|
|
1379
|
+
writer
|
|
1380
|
+
.execute(
|
|
1381
|
+
"CREATE TABLE IF NOT EXISTS _test_trigger(id INTEGER PRIMARY KEY)",
|
|
1382
|
+
[],
|
|
1383
|
+
)
|
|
1384
|
+
.unwrap();
|
|
1385
|
+
for i in 0..5 {
|
|
1386
|
+
std::thread::sleep(Duration::from_millis(5));
|
|
1387
|
+
writer
|
|
1388
|
+
.execute("INSERT INTO _test_trigger(id) VALUES (?)", [i])
|
|
1389
|
+
.unwrap();
|
|
1390
|
+
}
|
|
1391
|
+
std::thread::sleep(Duration::from_millis(50));
|
|
1392
|
+
|
|
1393
|
+
for (i, (_id, rx)) in subs.iter().enumerate() {
|
|
1394
|
+
let mut got_any = false;
|
|
1395
|
+
while rx.try_recv().is_ok() {
|
|
1396
|
+
got_any = true;
|
|
1397
|
+
}
|
|
1398
|
+
assert!(got_any, "subscriber {} saw no ticks", i);
|
|
1399
|
+
}
|
|
1400
|
+
|
|
1401
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1402
|
+
}
|
|
1403
|
+
|
|
1404
|
+
#[test]
|
|
1405
|
+
fn shared_update_watcher_explicit_unsubscribe_disconnects_receiver() {
|
|
1406
|
+
let tmp = std::env::temp_dir().join(format!("honker-unsub-test-{}", std::process::id()));
|
|
1407
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1408
|
+
{
|
|
1409
|
+
let conn = Connection::open(&tmp).unwrap();
|
|
1410
|
+
conn.execute_batch("PRAGMA journal_mode = WAL;").unwrap();
|
|
1411
|
+
}
|
|
1412
|
+
|
|
1413
|
+
let shared = SharedUpdateWatcher::new(tmp.clone());
|
|
1414
|
+
let (id, rx) = shared.subscribe();
|
|
1415
|
+
assert_eq!(shared.subscriber_count(), 1);
|
|
1416
|
+
|
|
1417
|
+
shared.unsubscribe(id);
|
|
1418
|
+
assert_eq!(shared.subscriber_count(), 0);
|
|
1419
|
+
|
|
1420
|
+
// Receiver now sees Err on blocking recv — the contract that
|
|
1421
|
+
// lets a bridge thread exit cleanly when its UpdateEvents drops.
|
|
1422
|
+
assert!(rx.recv().is_err());
|
|
1423
|
+
|
|
1424
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1427
|
+
/// Subscribers must learn that the watcher thread died — not just
|
|
1428
|
+
/// stop receiving wakes silently. We force the watcher to panic via
|
|
1429
|
+
/// the dead-man's switch (file replacement) and assert that an
|
|
1430
|
+
/// already-subscribed receiver returns `Err(RecvError)` on its
|
|
1431
|
+
/// next blocking `recv()`. Without WatcherDeathGuard this test
|
|
1432
|
+
/// hangs (subscriber blocks forever) and times out.
|
|
1433
|
+
#[test]
|
|
1434
|
+
#[cfg(unix)]
|
|
1435
|
+
fn shared_update_watcher_signals_subscribers_on_watcher_death() {
|
|
1436
|
+
let tmp = std::env::temp_dir().join(format!(
|
|
1437
|
+
"honker-death-signal-{}-{}",
|
|
1438
|
+
std::process::id(),
|
|
1439
|
+
std::time::SystemTime::now()
|
|
1440
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
1441
|
+
.unwrap()
|
|
1442
|
+
.subsec_nanos()
|
|
1443
|
+
));
|
|
1444
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1445
|
+
{
|
|
1446
|
+
let conn = Connection::open(&tmp).unwrap();
|
|
1447
|
+
conn.execute_batch("PRAGMA journal_mode = WAL;").unwrap();
|
|
1448
|
+
}
|
|
1449
|
+
|
|
1450
|
+
let shared = SharedUpdateWatcher::new(tmp.clone());
|
|
1451
|
+
let (_id, rx) = shared.subscribe();
|
|
1452
|
+
|
|
1453
|
+
// Let the watcher snapshot the initial inode.
|
|
1454
|
+
std::thread::sleep(Duration::from_millis(200));
|
|
1455
|
+
|
|
1456
|
+
// Replace the file with a different inode. Triggers the
|
|
1457
|
+
// dead-man's switch on the next 100 ms tick.
|
|
1458
|
+
let other = std::env::temp_dir().join(format!(
|
|
1459
|
+
"honker-death-other-{}-{}",
|
|
1460
|
+
std::process::id(),
|
|
1461
|
+
std::time::SystemTime::now()
|
|
1462
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
1463
|
+
.unwrap()
|
|
1464
|
+
.subsec_nanos()
|
|
1465
|
+
));
|
|
1466
|
+
let _ = std::fs::remove_file(&other);
|
|
1467
|
+
std::fs::File::create(&other).unwrap();
|
|
1468
|
+
std::fs::rename(&other, &tmp).unwrap();
|
|
1469
|
+
|
|
1470
|
+
// Within ~150 ms the watcher's identity check fires and panics;
|
|
1471
|
+
// WatcherDeathGuard's Drop clears senders; rx.recv() returns Err.
|
|
1472
|
+
// Use a generous timeout — give the watcher up to 2 s to die
|
|
1473
|
+
// and the guard to fire.
|
|
1474
|
+
let deadline = std::time::Instant::now() + Duration::from_secs(2);
|
|
1475
|
+
loop {
|
|
1476
|
+
if rx.try_recv().is_err() && rx.try_recv() != Ok(()) {
|
|
1477
|
+
// try_recv returns Err(Empty) for "alive but no msg",
|
|
1478
|
+
// Err(Disconnected) for "watcher died, sender cleared".
|
|
1479
|
+
// Use blocking recv with a poll instead.
|
|
1480
|
+
match rx.recv_timeout(Duration::from_millis(100)) {
|
|
1481
|
+
Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => break,
|
|
1482
|
+
_ => {}
|
|
1483
|
+
}
|
|
1484
|
+
}
|
|
1485
|
+
if std::time::Instant::now() > deadline {
|
|
1486
|
+
panic!(
|
|
1487
|
+
"watcher died but subscriber's channel never disconnected — \
|
|
1488
|
+
WatcherDeathGuard didn't fire?"
|
|
1489
|
+
);
|
|
1490
|
+
}
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1494
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
1495
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
1496
|
+
}
|
|
1497
|
+
|
|
1498
|
+
#[test]
|
|
1499
|
+
fn shared_update_watcher_prunes_subscribers_when_receiver_dropped() {
|
|
1500
|
+
let tmp = std::env::temp_dir().join(format!("honker-prune-test-{}", std::process::id()));
|
|
1501
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1502
|
+
{
|
|
1503
|
+
let conn = Connection::open(&tmp).unwrap();
|
|
1504
|
+
conn.execute_batch("PRAGMA journal_mode = WAL;").unwrap();
|
|
1505
|
+
}
|
|
1506
|
+
|
|
1507
|
+
let shared = SharedUpdateWatcher::new(tmp.clone());
|
|
1508
|
+
{
|
|
1509
|
+
let _subs: Vec<_> = (0..10).map(|_| shared.subscribe()).collect();
|
|
1510
|
+
assert_eq!(shared.subscriber_count(), 10);
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
// Trigger commits so the watcher attempts to send on each
|
|
1514
|
+
// dropped receiver and prunes them.
|
|
1515
|
+
let writer = Connection::open(&tmp).unwrap();
|
|
1516
|
+
writer
|
|
1517
|
+
.execute(
|
|
1518
|
+
"CREATE TABLE IF NOT EXISTS _test_prune(id INTEGER PRIMARY KEY)",
|
|
1519
|
+
[],
|
|
1520
|
+
)
|
|
1521
|
+
.unwrap();
|
|
1522
|
+
// Poll for pruning instead of sleeping a fixed duration —
|
|
1523
|
+
// the 1 ms poll thread needs to notice the commit AND
|
|
1524
|
+
// attempt to send on each dropped receiver AND prune. Under
|
|
1525
|
+
// parallel test load, 30 ms is not enough.
|
|
1526
|
+
let deadline = std::time::Instant::now() + Duration::from_secs(2);
|
|
1527
|
+
while shared.subscriber_count() != 0 && std::time::Instant::now() < deadline {
|
|
1528
|
+
std::thread::sleep(Duration::from_millis(5));
|
|
1529
|
+
writer
|
|
1530
|
+
.execute("INSERT INTO _test_prune(id) VALUES (random())", [])
|
|
1531
|
+
.unwrap();
|
|
1532
|
+
}
|
|
1533
|
+
assert_eq!(shared.subscriber_count(), 0);
|
|
1534
|
+
|
|
1535
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1536
|
+
}
|
|
1537
|
+
|
|
1538
|
+
#[test]
|
|
1539
|
+
fn data_version_detects_commits_and_ignores_rollbacks() {
|
|
1540
|
+
let tmp = std::env::temp_dir().join(format!("honker-dv-test-{}", std::process::id()));
|
|
1541
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1542
|
+
// PRAGMA data_version detects changes from OTHER connections.
|
|
1543
|
+
let watcher = Connection::open(&tmp).unwrap();
|
|
1544
|
+
watcher.execute_batch("PRAGMA journal_mode = WAL;").unwrap();
|
|
1545
|
+
let writer = Connection::open(&tmp).unwrap();
|
|
1546
|
+
|
|
1547
|
+
let v0 = poll_data_version(&watcher).unwrap();
|
|
1548
|
+
|
|
1549
|
+
// Commit increments data_version (observed by watcher).
|
|
1550
|
+
writer.execute("CREATE TABLE t(x INTEGER)", []).unwrap();
|
|
1551
|
+
let v1 = poll_data_version(&watcher).unwrap();
|
|
1552
|
+
assert!(v1 > v0, "commit should increment data_version");
|
|
1553
|
+
|
|
1554
|
+
// Rollback does NOT increment data_version.
|
|
1555
|
+
writer.execute_batch("BEGIN IMMEDIATE;").unwrap();
|
|
1556
|
+
writer.execute("INSERT INTO t VALUES (1)", []).unwrap();
|
|
1557
|
+
writer.execute_batch("ROLLBACK;").unwrap();
|
|
1558
|
+
let v2 = poll_data_version(&watcher).unwrap();
|
|
1559
|
+
assert_eq!(v2, v1, "rollback should not increment data_version");
|
|
1560
|
+
|
|
1561
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1562
|
+
}
|
|
1563
|
+
|
|
1564
|
+
#[test]
|
|
1565
|
+
fn data_version_survives_wal_checkpoint() {
|
|
1566
|
+
let tmp = std::env::temp_dir().join(format!("honker-dv-ckpt-test-{}", std::process::id()));
|
|
1567
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1568
|
+
// Watcher connection — observe changes from the writer.
|
|
1569
|
+
let watcher = Connection::open(&tmp).unwrap();
|
|
1570
|
+
watcher.execute_batch("PRAGMA journal_mode = WAL;").unwrap();
|
|
1571
|
+
let w0 = poll_data_version(&watcher).unwrap();
|
|
1572
|
+
|
|
1573
|
+
// Writer connection — make changes.
|
|
1574
|
+
let writer = Connection::open(&tmp).unwrap();
|
|
1575
|
+
writer.execute("CREATE TABLE t(x INTEGER)", []).unwrap();
|
|
1576
|
+
let w1 = poll_data_version(&watcher).unwrap();
|
|
1577
|
+
assert!(
|
|
1578
|
+
w1 > w0,
|
|
1579
|
+
"commit from other conn should increment data_version"
|
|
1580
|
+
);
|
|
1581
|
+
|
|
1582
|
+
// Checkpoint truncates WAL; watcher should still see the change.
|
|
1583
|
+
writer
|
|
1584
|
+
.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")
|
|
1585
|
+
.unwrap();
|
|
1586
|
+
let w2 = poll_data_version(&watcher).unwrap();
|
|
1587
|
+
assert!(
|
|
1588
|
+
w2 > w1,
|
|
1589
|
+
"checkpoint from other conn should increment data_version"
|
|
1590
|
+
);
|
|
1591
|
+
|
|
1592
|
+
// Post-checkpoint commit still detected.
|
|
1593
|
+
writer.execute("INSERT INTO t VALUES (1)", []).unwrap();
|
|
1594
|
+
let w3 = poll_data_version(&watcher).unwrap();
|
|
1595
|
+
assert!(
|
|
1596
|
+
w3 > w2,
|
|
1597
|
+
"post-checkpoint commit should increment data_version"
|
|
1598
|
+
);
|
|
1599
|
+
|
|
1600
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1601
|
+
}
|
|
1602
|
+
|
|
1603
|
+
// Gate to platforms where stat_identity returns real values.
|
|
1604
|
+
// On other targets the function returns (0, 0) for every call,
|
|
1605
|
+
// so the assert_ne! below would fire.
|
|
1606
|
+
#[cfg(any(unix, windows))]
|
|
1607
|
+
#[test]
|
|
1608
|
+
fn stat_identity_detects_file_replacement() {
|
|
1609
|
+
let tmp = std::env::temp_dir().join(format!("honker-id-test-{}", std::process::id()));
|
|
1610
|
+
let tmp2 = std::env::temp_dir().join(format!("honker-id-test2-{}", std::process::id()));
|
|
1611
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1612
|
+
let _ = std::fs::remove_file(&tmp2);
|
|
1613
|
+
|
|
1614
|
+
// Create two distinct files.
|
|
1615
|
+
std::fs::write(&tmp, b"original").unwrap();
|
|
1616
|
+
std::fs::write(&tmp2, b"replacement").unwrap();
|
|
1617
|
+
|
|
1618
|
+
let id1 = stat_identity(&tmp).unwrap();
|
|
1619
|
+
let id2 = stat_identity(&tmp2).unwrap();
|
|
1620
|
+
assert_ne!(id1, id2, "different files should have different identities");
|
|
1621
|
+
|
|
1622
|
+
// After atomic rename, tmp now has tmp2's identity.
|
|
1623
|
+
std::fs::rename(&tmp2, &tmp).unwrap();
|
|
1624
|
+
let id3 = stat_identity(&tmp).unwrap();
|
|
1625
|
+
assert_eq!(
|
|
1626
|
+
id3, id2,
|
|
1627
|
+
"renamed file should carry the replacement's identity"
|
|
1628
|
+
);
|
|
1629
|
+
|
|
1630
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1631
|
+
}
|
|
1632
|
+
|
|
1633
|
+
/// Direct test of the XOR-fold logic on synthetic 128-bit
|
|
1634
|
+
/// inputs. CI runners use NTFS, so the live `stat_identity` test
|
|
1635
|
+
/// above never exercises the `HighRes` arm with non-zero upper
|
|
1636
|
+
/// bits. This unit test does.
|
|
1637
|
+
#[cfg(any(unix, windows))]
|
|
1638
|
+
#[test]
|
|
1639
|
+
fn fold_high_res_uses_both_halves() {
|
|
1640
|
+
// NTFS-shaped: upper = 0, fold == lower.
|
|
1641
|
+
let (vsn, idx) = fold_high_res(0xAABB, 0x0000_0000_0000_0000_DEAD_BEEF_CAFE_F00D);
|
|
1642
|
+
assert_eq!(vsn, 0xAABB);
|
|
1643
|
+
assert_eq!(idx, 0xDEAD_BEEF_CAFE_F00D);
|
|
1644
|
+
|
|
1645
|
+
// ReFS-shaped: both halves non-zero, fold == upper XOR lower.
|
|
1646
|
+
let upper = 0x1111_2222_3333_4444u64;
|
|
1647
|
+
let lower = 0x5555_6666_7777_8888u64;
|
|
1648
|
+
let file_id = ((upper as u128) << 64) | (lower as u128);
|
|
1649
|
+
let (vsn, idx) = fold_high_res(0xCCDD, file_id);
|
|
1650
|
+
assert_eq!(vsn, 0xCCDD);
|
|
1651
|
+
assert_eq!(idx, upper ^ lower);
|
|
1652
|
+
|
|
1653
|
+
// Adversarial: upper == lower → fold == 0. This is the known
|
|
1654
|
+
// XOR weakness; documented and acceptable because ReFS
|
|
1655
|
+
// file_ids aren't constructed to satisfy this property.
|
|
1656
|
+
let same = 0xDEAD_BEEF_CAFE_F00Du64;
|
|
1657
|
+
let file_id = ((same as u128) << 64) | (same as u128);
|
|
1658
|
+
let (_, idx) = fold_high_res(0, file_id);
|
|
1659
|
+
assert_eq!(idx, 0);
|
|
1660
|
+
}
|
|
1661
|
+
|
|
1662
|
+
/// Watcher's dead-man's switch panics when the db file is
|
|
1663
|
+
/// replaced under it. Unix-only: rename-over-open works on
|
|
1664
|
+
/// Linux/macOS (the litestream / NFS-remount scenario) but
|
|
1665
|
+
/// Windows rejects it even with FILE_SHARE_DELETE, so the
|
|
1666
|
+
/// trigger is unreachable there. Windows behavior intentionally
|
|
1667
|
+
/// untested — replacement isn't a typical Windows pattern.
|
|
1668
|
+
#[cfg(unix)]
|
|
1669
|
+
#[test]
|
|
1670
|
+
fn update_watcher_panics_on_file_replacement() {
|
|
1671
|
+
let tmp =
|
|
1672
|
+
std::env::temp_dir().join(format!("honker-watcher-replace-{}", std::process::id()));
|
|
1673
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1674
|
+
|
|
1675
|
+
// Create the DB so the watcher can open + stat it.
|
|
1676
|
+
{
|
|
1677
|
+
let conn = Connection::open(&tmp).unwrap();
|
|
1678
|
+
conn.execute_batch("PRAGMA journal_mode = WAL;").unwrap();
|
|
1679
|
+
}
|
|
1680
|
+
|
|
1681
|
+
let watcher = UpdateWatcher::spawn(tmp.clone(), || {});
|
|
1682
|
+
|
|
1683
|
+
// Give the watcher thread time to open and capture the initial
|
|
1684
|
+
// file identity before we replace the file.
|
|
1685
|
+
std::thread::sleep(Duration::from_millis(200));
|
|
1686
|
+
|
|
1687
|
+
// Replace the file. Atomic-rename instead of delete+create so
|
|
1688
|
+
// it works even when SQLite has the destination open
|
|
1689
|
+
// (Windows allows replace-on-rename for files opened with
|
|
1690
|
+
// FILE_SHARE_DELETE, which SQLite uses).
|
|
1691
|
+
let tmp2 =
|
|
1692
|
+
std::env::temp_dir().join(format!("honker-watcher-replace-new-{}", std::process::id()));
|
|
1693
|
+
let _ = std::fs::remove_file(&tmp2);
|
|
1694
|
+
{
|
|
1695
|
+
let conn = Connection::open(&tmp2).unwrap();
|
|
1696
|
+
conn.execute_batch("PRAGMA journal_mode = WAL;").unwrap();
|
|
1697
|
+
}
|
|
1698
|
+
std::fs::rename(&tmp2, &tmp).unwrap();
|
|
1699
|
+
|
|
1700
|
+
// Wait for the next time-based identity check to fire and
|
|
1701
|
+
// panic.
|
|
1702
|
+
std::thread::sleep(Duration::from_millis(500));
|
|
1703
|
+
|
|
1704
|
+
// Stop and join. Should be Err because the thread panicked.
|
|
1705
|
+
let result = watcher.join();
|
|
1706
|
+
assert!(
|
|
1707
|
+
result.is_err(),
|
|
1708
|
+
"watcher should have panicked on file replacement, instead got Ok"
|
|
1709
|
+
);
|
|
1710
|
+
let payload = result.unwrap_err();
|
|
1711
|
+
let msg = if let Some(s) = payload.downcast_ref::<String>() {
|
|
1712
|
+
s.clone()
|
|
1713
|
+
} else if let Some(s) = payload.downcast_ref::<&str>() {
|
|
1714
|
+
(*s).to_string()
|
|
1715
|
+
} else {
|
|
1716
|
+
String::from("<panic payload not a string>")
|
|
1717
|
+
};
|
|
1718
|
+
assert!(
|
|
1719
|
+
msg.contains("database file replaced"),
|
|
1720
|
+
"panic message should mention replacement; got: {msg}"
|
|
1721
|
+
);
|
|
1722
|
+
|
|
1723
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1724
|
+
}
|
|
1725
|
+
|
|
1726
|
+
/// Verify `poll_data_version` detects cross-connection commits in
|
|
1727
|
+
/// every supported journal mode. WAL was the only mode that had
|
|
1728
|
+
/// explicit coverage before; the bootstrap-without-database update in
|
|
1729
|
+
/// commit `c6716d5` made the watcher work in any mode but never
|
|
1730
|
+
/// added tests for the others. This closes that gap.
|
|
1731
|
+
fn poll_data_version_works_in_journal_mode(mode: &str) {
|
|
1732
|
+
let tmp = std::env::temp_dir().join(format!(
|
|
1733
|
+
"honker-jm-{}-{}",
|
|
1734
|
+
mode.to_ascii_lowercase(),
|
|
1735
|
+
std::process::id()
|
|
1736
|
+
));
|
|
1737
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1738
|
+
|
|
1739
|
+
let watcher = Connection::open(&tmp).unwrap();
|
|
1740
|
+
watcher
|
|
1741
|
+
.execute_batch(&format!("PRAGMA journal_mode = {mode};"))
|
|
1742
|
+
.unwrap();
|
|
1743
|
+
|
|
1744
|
+
// Verify the mode actually took effect. SQLite returns the
|
|
1745
|
+
// resulting mode from the PRAGMA, but `execute_batch`
|
|
1746
|
+
// discards the result — without this assertion, a silent
|
|
1747
|
+
// fallback (e.g., to `MEMORY` for `:memory:` databases, or
|
|
1748
|
+
// a sticky setting that won't change) would leave the test
|
|
1749
|
+
// green while exercising a different mode entirely.
|
|
1750
|
+
let actual: String = watcher
|
|
1751
|
+
.pragma_query_value(None, "journal_mode", |r| r.get(0))
|
|
1752
|
+
.unwrap();
|
|
1753
|
+
assert_eq!(
|
|
1754
|
+
actual.to_ascii_uppercase(),
|
|
1755
|
+
mode.to_ascii_uppercase(),
|
|
1756
|
+
"PRAGMA journal_mode = {mode} silently fell back to {actual}"
|
|
1757
|
+
);
|
|
1758
|
+
|
|
1759
|
+
let writer = Connection::open(&tmp).unwrap();
|
|
1760
|
+
|
|
1761
|
+
let v0 = poll_data_version(&watcher).unwrap();
|
|
1762
|
+
|
|
1763
|
+
// Commit increments data_version (observed across connections).
|
|
1764
|
+
writer.execute("CREATE TABLE t(x INTEGER)", []).unwrap();
|
|
1765
|
+
let v1 = poll_data_version(&watcher).unwrap();
|
|
1766
|
+
assert!(
|
|
1767
|
+
v1 > v0,
|
|
1768
|
+
"journal_mode={mode}: cross-conn commit should bump \
|
|
1769
|
+
data_version; saw {v0} -> {v1}"
|
|
1770
|
+
);
|
|
1771
|
+
|
|
1772
|
+
// Rollback should NOT increment data_version (still true in
|
|
1773
|
+
// non-WAL modes — the docs are journal-mode-agnostic on this).
|
|
1774
|
+
writer.execute_batch("BEGIN IMMEDIATE;").unwrap();
|
|
1775
|
+
writer.execute("INSERT INTO t VALUES (1)", []).unwrap();
|
|
1776
|
+
writer.execute_batch("ROLLBACK;").unwrap();
|
|
1777
|
+
let v2 = poll_data_version(&watcher).unwrap();
|
|
1778
|
+
assert_eq!(
|
|
1779
|
+
v2, v1,
|
|
1780
|
+
"journal_mode={mode}: rollback should not bump data_version"
|
|
1781
|
+
);
|
|
1782
|
+
|
|
1783
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1784
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
1785
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
1786
|
+
let _ = std::fs::remove_file(format!("{}-journal", tmp.display()));
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1789
|
+
#[test]
|
|
1790
|
+
fn poll_data_version_works_in_wal() {
|
|
1791
|
+
poll_data_version_works_in_journal_mode("WAL");
|
|
1792
|
+
}
|
|
1793
|
+
|
|
1794
|
+
#[test]
|
|
1795
|
+
fn poll_data_version_works_in_delete() {
|
|
1796
|
+
poll_data_version_works_in_journal_mode("DELETE");
|
|
1797
|
+
}
|
|
1798
|
+
|
|
1799
|
+
#[test]
|
|
1800
|
+
fn poll_data_version_works_in_truncate() {
|
|
1801
|
+
poll_data_version_works_in_journal_mode("TRUNCATE");
|
|
1802
|
+
}
|
|
1803
|
+
|
|
1804
|
+
#[test]
|
|
1805
|
+
fn poll_data_version_works_in_persist() {
|
|
1806
|
+
poll_data_version_works_in_journal_mode("PERSIST");
|
|
1807
|
+
}
|
|
1808
|
+
|
|
1809
|
+
// MEMORY journal mode is per-connection (the journal lives in
|
|
1810
|
+
// RAM, not a file), so cross-connection rollback semantics are
|
|
1811
|
+
// different. SQLite's docs are clear that MEMORY is intended for
|
|
1812
|
+
// single-process use. honker doesn't promise MEMORY support, so
|
|
1813
|
+
// we don't test it here — flagging in case it ever becomes a
|
|
1814
|
+
// user-visible question.
|
|
1815
|
+
|
|
1816
|
+
/// Crash-recovery: python3 child commits in a loop, parent
|
|
1817
|
+
/// SIGKILLs it mid-flight, reopens, asserts integrity_check=ok,
|
|
1818
|
+
/// committed rows survive, and reopen works (WAL replay).
|
|
1819
|
+
/// Cross-platform — Windows tests that file-handle release on
|
|
1820
|
+
/// kill is clean enough for reopen to succeed.
|
|
1821
|
+
#[test]
|
|
1822
|
+
fn writer_killed_mid_workload_leaves_db_consistent() {
|
|
1823
|
+
use std::process::{Command, Stdio};
|
|
1824
|
+
|
|
1825
|
+
// Try `python3` then `python`. CI always has one; dev boxes
|
|
1826
|
+
// may not. Skip loudly rather than fail silently.
|
|
1827
|
+
let python = ["python3", "python"]
|
|
1828
|
+
.iter()
|
|
1829
|
+
.find(|cmd| {
|
|
1830
|
+
Command::new(cmd)
|
|
1831
|
+
.arg("--version")
|
|
1832
|
+
.stdout(Stdio::null())
|
|
1833
|
+
.stderr(Stdio::null())
|
|
1834
|
+
.status()
|
|
1835
|
+
.map(|s| s.success())
|
|
1836
|
+
.unwrap_or(false)
|
|
1837
|
+
})
|
|
1838
|
+
.map(|s| *s);
|
|
1839
|
+
let Some(python) = python else {
|
|
1840
|
+
eprintln!(
|
|
1841
|
+
"writer_killed_mid_workload_leaves_db_consistent: \
|
|
1842
|
+
no `python3` or `python` on PATH; skipping (set up Python \
|
|
1843
|
+
to exercise the crash-recovery path)"
|
|
1844
|
+
);
|
|
1845
|
+
return;
|
|
1846
|
+
};
|
|
1847
|
+
|
|
1848
|
+
let tmp = std::env::temp_dir().join(format!("honker-crash-{}", std::process::id()));
|
|
1849
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1850
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
1851
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
1852
|
+
|
|
1853
|
+
// Bootstrap schema + WAL mode in the parent.
|
|
1854
|
+
{
|
|
1855
|
+
let conn = Connection::open(&tmp).unwrap();
|
|
1856
|
+
conn.execute_batch(
|
|
1857
|
+
"PRAGMA journal_mode = WAL;
|
|
1858
|
+
PRAGMA synchronous = NORMAL;
|
|
1859
|
+
CREATE TABLE q(id INTEGER PRIMARY KEY AUTOINCREMENT, v INTEGER);",
|
|
1860
|
+
)
|
|
1861
|
+
.unwrap();
|
|
1862
|
+
}
|
|
1863
|
+
|
|
1864
|
+
// Spawn a Python child that writes committed rows in a tight
|
|
1865
|
+
// loop. Open DB in WAL mode + synchronous=NORMAL to match
|
|
1866
|
+
// honker's default. Each iteration is its own auto-commit
|
|
1867
|
+
// transaction. The path is debug-formatted so quoting is
|
|
1868
|
+
// correct on every platform (Windows backslashes are
|
|
1869
|
+
// escaped, unix paths get safe quoting).
|
|
1870
|
+
let writer_script = format!(
|
|
1871
|
+
r#"
|
|
1872
|
+
import sqlite3
|
|
1873
|
+
conn = sqlite3.connect({path:?})
|
|
1874
|
+
conn.execute("PRAGMA journal_mode = WAL")
|
|
1875
|
+
conn.execute("PRAGMA synchronous = NORMAL")
|
|
1876
|
+
i = 0
|
|
1877
|
+
while True:
|
|
1878
|
+
conn.execute("INSERT INTO q(v) VALUES (?)", (i,))
|
|
1879
|
+
conn.commit()
|
|
1880
|
+
i += 1
|
|
1881
|
+
"#,
|
|
1882
|
+
path = tmp.to_str().unwrap()
|
|
1883
|
+
);
|
|
1884
|
+
|
|
1885
|
+
let mut child = Command::new(python)
|
|
1886
|
+
.arg("-c")
|
|
1887
|
+
.arg(&writer_script)
|
|
1888
|
+
.stdout(Stdio::null())
|
|
1889
|
+
.stderr(Stdio::piped())
|
|
1890
|
+
.spawn()
|
|
1891
|
+
.unwrap_or_else(|e| panic!("spawn {python} child writer: {e}"));
|
|
1892
|
+
|
|
1893
|
+
// Poll the database from a separate connection until we see
|
|
1894
|
+
// at least one committed row. This turns a timing-fragile
|
|
1895
|
+
// "sleep N ms and hope" into a deterministic "kill once
|
|
1896
|
+
// we've observed a commit" — robust across slow-Python
|
|
1897
|
+
// startup on Windows, loaded CI runners, etc.
|
|
1898
|
+
let read_conn = Connection::open(&tmp).unwrap();
|
|
1899
|
+
let deadline = std::time::Instant::now() + Duration::from_secs(15);
|
|
1900
|
+
let mut high_water: i64 = 0;
|
|
1901
|
+
while std::time::Instant::now() < deadline {
|
|
1902
|
+
// Bail early if the child died — surface its stderr
|
|
1903
|
+
// rather than the downstream "got 0 rows" symptom.
|
|
1904
|
+
if let Ok(Some(status)) = child.try_wait() {
|
|
1905
|
+
let mut stderr = String::new();
|
|
1906
|
+
if let Some(mut s) = child.stderr.take() {
|
|
1907
|
+
use std::io::Read;
|
|
1908
|
+
let _ = s.read_to_string(&mut stderr);
|
|
1909
|
+
}
|
|
1910
|
+
panic!(
|
|
1911
|
+
"python child exited before kill (status={status:?}); \
|
|
1912
|
+
stderr: {stderr}"
|
|
1913
|
+
);
|
|
1914
|
+
}
|
|
1915
|
+
if let Ok(c) = read_conn.query_row("SELECT count(*) FROM q", [], |r| r.get::<_, i64>(0))
|
|
1916
|
+
{
|
|
1917
|
+
if c > 0 {
|
|
1918
|
+
high_water = c;
|
|
1919
|
+
break;
|
|
1920
|
+
}
|
|
1921
|
+
}
|
|
1922
|
+
std::thread::sleep(Duration::from_millis(50));
|
|
1923
|
+
}
|
|
1924
|
+
// Drop the read connection before kill so we don't hold any
|
|
1925
|
+
// shared lock when the child's process is reaped.
|
|
1926
|
+
drop(read_conn);
|
|
1927
|
+
|
|
1928
|
+
// Let a few more commits accumulate so we test "lots of
|
|
1929
|
+
// committed transactions, then sudden death" rather than
|
|
1930
|
+
// "exactly one commit" — gives the WAL-replay path
|
|
1931
|
+
// something more interesting to recover.
|
|
1932
|
+
std::thread::sleep(Duration::from_millis(200));
|
|
1933
|
+
|
|
1934
|
+
// Hard kill. `Child::kill` sends SIGKILL on unix and
|
|
1935
|
+
// `TerminateProcess` on Windows. No chance for graceful
|
|
1936
|
+
// close — file handles are released by the OS, and any
|
|
1937
|
+
// outstanding writes-since-last-fsync are lost.
|
|
1938
|
+
let _ = child.kill();
|
|
1939
|
+
let _ = child.wait();
|
|
1940
|
+
|
|
1941
|
+
// Reopen and verify. On Windows the OS may take a moment
|
|
1942
|
+
// to fully release the killed process's file locks; a tight
|
|
1943
|
+
// retry loop on the open absorbs that without flaking.
|
|
1944
|
+
let conn = (0..20)
|
|
1945
|
+
.find_map(|i| match Connection::open(&tmp) {
|
|
1946
|
+
Ok(c) => Some(c),
|
|
1947
|
+
Err(_) => {
|
|
1948
|
+
std::thread::sleep(Duration::from_millis(50 * (i + 1)));
|
|
1949
|
+
None
|
|
1950
|
+
}
|
|
1951
|
+
})
|
|
1952
|
+
.unwrap_or_else(|| {
|
|
1953
|
+
Connection::open(&tmp).expect("reopen after retry budget exhausted")
|
|
1954
|
+
});
|
|
1955
|
+
let integrity: String = conn
|
|
1956
|
+
.query_row("PRAGMA integrity_check", [], |r| r.get(0))
|
|
1957
|
+
.unwrap();
|
|
1958
|
+
assert_eq!(
|
|
1959
|
+
integrity, "ok",
|
|
1960
|
+
"DB should be intact after writer hard-kill during WAL writes"
|
|
1961
|
+
);
|
|
1962
|
+
|
|
1963
|
+
let count: i64 = conn
|
|
1964
|
+
.query_row("SELECT count(*) FROM q", [], |r| r.get(0))
|
|
1965
|
+
.unwrap();
|
|
1966
|
+
// Stronger durability assertion: at least the rows we
|
|
1967
|
+
// observed before kill must still be there. (Likely many
|
|
1968
|
+
// more committed in the +200ms window before the kill —
|
|
1969
|
+
// we're checking the floor, not the exact count.)
|
|
1970
|
+
assert!(
|
|
1971
|
+
count >= high_water,
|
|
1972
|
+
"lost committed rows: observed {high_water} before kill, \
|
|
1973
|
+
only {count} present after reopen"
|
|
1974
|
+
);
|
|
1975
|
+
assert!(
|
|
1976
|
+
count > 0,
|
|
1977
|
+
"expected the child to commit some rows in the 15s window \
|
|
1978
|
+
before timeout; got {count}"
|
|
1979
|
+
);
|
|
1980
|
+
|
|
1981
|
+
// Drop the connection before cleanup — Windows can't unlink
|
|
1982
|
+
// open files. (Linux/macOS tolerate this either way.)
|
|
1983
|
+
drop(conn);
|
|
1984
|
+
|
|
1985
|
+
let _ = std::fs::remove_file(&tmp);
|
|
1986
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
1987
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
1988
|
+
}
|
|
1989
|
+
|
|
1990
|
+
/// Long-running soak: watcher + committer for
|
|
1991
|
+
/// `HONKER_SOAK_DURATION_SECS` (default 1h). Asserts
|
|
1992
|
+
/// integrity_check=ok, exact row count, and ≥10% of expected
|
|
1993
|
+
/// wake rate. Doesn't track leaks (run under valgrind/heaptrack
|
|
1994
|
+
/// for that — issue #12). Ignored by default; CI never runs it.
|
|
1995
|
+
///
|
|
1996
|
+
/// ```sh
|
|
1997
|
+
/// HONKER_SOAK_DURATION_SECS=600 \
|
|
1998
|
+
/// cargo test -p honker-core --release --lib \
|
|
1999
|
+
/// soak_watcher_durability -- --ignored --nocapture
|
|
2000
|
+
/// ```
|
|
2001
|
+
#[test]
|
|
2002
|
+
#[ignore]
|
|
2003
|
+
fn soak_watcher_durability() {
|
|
2004
|
+
use std::sync::Arc;
|
|
2005
|
+
use std::sync::atomic::{AtomicU64, Ordering};
|
|
2006
|
+
|
|
2007
|
+
let duration_secs: u64 = std::env::var("HONKER_SOAK_DURATION_SECS")
|
|
2008
|
+
.ok()
|
|
2009
|
+
.and_then(|s| s.parse().ok())
|
|
2010
|
+
.unwrap_or(3600);
|
|
2011
|
+
|
|
2012
|
+
eprintln!("soak: running for {duration_secs} seconds");
|
|
2013
|
+
|
|
2014
|
+
let tmp = std::env::temp_dir().join(format!("honker-soak-{}", std::process::id()));
|
|
2015
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2016
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
2017
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
2018
|
+
|
|
2019
|
+
{
|
|
2020
|
+
let conn = Connection::open(&tmp).unwrap();
|
|
2021
|
+
conn.execute_batch(
|
|
2022
|
+
"PRAGMA journal_mode = WAL;
|
|
2023
|
+
PRAGMA synchronous = NORMAL;
|
|
2024
|
+
CREATE TABLE q(id INTEGER PRIMARY KEY AUTOINCREMENT, v INTEGER);",
|
|
2025
|
+
)
|
|
2026
|
+
.unwrap();
|
|
2027
|
+
}
|
|
2028
|
+
|
|
2029
|
+
let observed = Arc::new(AtomicU64::new(0));
|
|
2030
|
+
let observed_w = observed.clone();
|
|
2031
|
+
let watcher = UpdateWatcher::spawn(tmp.clone(), move || {
|
|
2032
|
+
observed_w.fetch_add(1, Ordering::Relaxed);
|
|
2033
|
+
});
|
|
2034
|
+
|
|
2035
|
+
// Committer thread. Commits ~100/sec — pacing keeps WAL from
|
|
2036
|
+
// growing unboundedly between checkpoints and gives the
|
|
2037
|
+
// watcher time to actually observe each change.
|
|
2038
|
+
let stop = Arc::new(std::sync::atomic::AtomicBool::new(false));
|
|
2039
|
+
let stop_w = stop.clone();
|
|
2040
|
+
let tmp_w = tmp.clone();
|
|
2041
|
+
let writer_handle = std::thread::Builder::new()
|
|
2042
|
+
.name("soak-writer".into())
|
|
2043
|
+
.spawn(move || {
|
|
2044
|
+
let conn = Connection::open(&tmp_w).unwrap();
|
|
2045
|
+
let mut i: i64 = 0;
|
|
2046
|
+
while !stop_w.load(Ordering::Acquire) {
|
|
2047
|
+
conn.execute("INSERT INTO q(v) VALUES (?1)", [i]).unwrap();
|
|
2048
|
+
i += 1;
|
|
2049
|
+
std::thread::sleep(Duration::from_millis(10));
|
|
2050
|
+
}
|
|
2051
|
+
i
|
|
2052
|
+
})
|
|
2053
|
+
.unwrap();
|
|
2054
|
+
|
|
2055
|
+
// Run the soak.
|
|
2056
|
+
std::thread::sleep(Duration::from_secs(duration_secs));
|
|
2057
|
+
|
|
2058
|
+
// Stop the writer and join. join() returns Err if the
|
|
2059
|
+
// thread panicked; surface that explicitly rather than the
|
|
2060
|
+
// opaque `unwrap` panic-on-Err message.
|
|
2061
|
+
stop.store(true, Ordering::Release);
|
|
2062
|
+
let writer_result = writer_handle.join();
|
|
2063
|
+
assert!(
|
|
2064
|
+
writer_result.is_ok(),
|
|
2065
|
+
"writer thread panicked during soak: {writer_result:?}"
|
|
2066
|
+
);
|
|
2067
|
+
let writes = writer_result.unwrap();
|
|
2068
|
+
|
|
2069
|
+
// Stop the watcher and join. join() returns Err if it
|
|
2070
|
+
// panicked; for a clean soak we expect Ok.
|
|
2071
|
+
let watcher_result = watcher.join();
|
|
2072
|
+
assert!(
|
|
2073
|
+
watcher_result.is_ok(),
|
|
2074
|
+
"watcher thread panicked during soak: {watcher_result:?}"
|
|
2075
|
+
);
|
|
2076
|
+
|
|
2077
|
+
// Verify integrity, row count, and that the watcher observed
|
|
2078
|
+
// a reasonable fraction of the writes.
|
|
2079
|
+
let conn = Connection::open(&tmp).unwrap();
|
|
2080
|
+
let integrity: String = conn
|
|
2081
|
+
.query_row("PRAGMA integrity_check", [], |r| r.get(0))
|
|
2082
|
+
.unwrap();
|
|
2083
|
+
assert_eq!(integrity, "ok", "soak ended with corrupt DB");
|
|
2084
|
+
|
|
2085
|
+
let count: i64 = conn
|
|
2086
|
+
.query_row("SELECT count(*) FROM q", [], |r| r.get(0))
|
|
2087
|
+
.unwrap();
|
|
2088
|
+
assert_eq!(
|
|
2089
|
+
count, writes,
|
|
2090
|
+
"row count {count} should match writer's reported {writes}"
|
|
2091
|
+
);
|
|
2092
|
+
|
|
2093
|
+
let observed_count = observed.load(Ordering::Relaxed);
|
|
2094
|
+
// The committer commits every 10ms → ~100 wakes/sec
|
|
2095
|
+
// expected. Floor at 10% of that absorbs runner jitter,
|
|
2096
|
+
// merged ticks (multiple commits in one watcher poll), and
|
|
2097
|
+
// initial-warmup time. Anything below this floor means the
|
|
2098
|
+
// watcher silently stalled or fired far below the commit
|
|
2099
|
+
// rate — both real regressions worth catching.
|
|
2100
|
+
let expected = duration_secs * 100;
|
|
2101
|
+
let floor = expected / 10;
|
|
2102
|
+
assert!(
|
|
2103
|
+
observed_count >= floor,
|
|
2104
|
+
"watcher saw only {observed_count} wakes in {duration_secs}s; \
|
|
2105
|
+
expected ≥ {floor} (10% of theoretical {expected}; writer committed {writes})"
|
|
2106
|
+
);
|
|
2107
|
+
|
|
2108
|
+
eprintln!(
|
|
2109
|
+
"soak: {duration_secs}s, {writes} writes, {observed_count} observed wakes, integrity ok"
|
|
2110
|
+
);
|
|
2111
|
+
|
|
2112
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2113
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
2114
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
2115
|
+
}
|
|
2116
|
+
|
|
2117
|
+
#[test]
|
|
2118
|
+
fn bootstrap_pre_mantle_database_gets_enabled_column() {
|
|
2119
|
+
// Simulate a pre-Mantle database: create _honker_scheduler_tasks
|
|
2120
|
+
// by hand WITHOUT the `enabled` column. Then bootstrap should
|
|
2121
|
+
// detect the missing column and ALTER TABLE ADD it.
|
|
2122
|
+
let conn = mem();
|
|
2123
|
+
conn.execute_batch(
|
|
2124
|
+
"CREATE TABLE _honker_scheduler_tasks (
|
|
2125
|
+
name TEXT PRIMARY KEY,
|
|
2126
|
+
queue TEXT NOT NULL,
|
|
2127
|
+
cron_expr TEXT NOT NULL,
|
|
2128
|
+
payload TEXT NOT NULL,
|
|
2129
|
+
priority INTEGER NOT NULL DEFAULT 0,
|
|
2130
|
+
expires_s INTEGER,
|
|
2131
|
+
next_fire_at INTEGER NOT NULL
|
|
2132
|
+
);",
|
|
2133
|
+
)
|
|
2134
|
+
.unwrap();
|
|
2135
|
+
// Insert a row to prove existing data survives the migration.
|
|
2136
|
+
conn.execute(
|
|
2137
|
+
"INSERT INTO _honker_scheduler_tasks
|
|
2138
|
+
(name, queue, cron_expr, payload, priority, expires_s, next_fire_at)
|
|
2139
|
+
VALUES ('legacy', 'q', '0 9 * * *', '{}', 0, NULL, 1)",
|
|
2140
|
+
[],
|
|
2141
|
+
)
|
|
2142
|
+
.unwrap();
|
|
2143
|
+
|
|
2144
|
+
bootstrap_honker_schema(&conn).unwrap();
|
|
2145
|
+
|
|
2146
|
+
// Column exists now.
|
|
2147
|
+
let has: bool = conn
|
|
2148
|
+
.query_row(
|
|
2149
|
+
"SELECT 1 FROM pragma_table_info('_honker_scheduler_tasks') WHERE name='enabled'",
|
|
2150
|
+
[],
|
|
2151
|
+
|_| Ok(true),
|
|
2152
|
+
)
|
|
2153
|
+
.unwrap_or(false);
|
|
2154
|
+
assert!(has, "enabled column should be present after bootstrap");
|
|
2155
|
+
|
|
2156
|
+
// Existing row got the default and survived.
|
|
2157
|
+
let (cnt, enabled): (i64, i64) = conn
|
|
2158
|
+
.query_row(
|
|
2159
|
+
"SELECT COUNT(*), COALESCE(MAX(enabled), -1) FROM _honker_scheduler_tasks WHERE name='legacy'",
|
|
2160
|
+
[],
|
|
2161
|
+
|r| Ok((r.get(0)?, r.get(1)?)),
|
|
2162
|
+
)
|
|
2163
|
+
.unwrap();
|
|
2164
|
+
assert_eq!(cnt, 1, "existing row must survive migration");
|
|
2165
|
+
assert_eq!(enabled, 1, "existing row must default to enabled=1");
|
|
2166
|
+
|
|
2167
|
+
// Re-running bootstrap is a no-op (idempotent).
|
|
2168
|
+
bootstrap_honker_schema(&conn).unwrap();
|
|
2169
|
+
}
|
|
2170
|
+
|
|
2171
|
+
#[test]
|
|
2172
|
+
fn bootstrap_honker_schema_creates_tables_and_index() {
|
|
2173
|
+
let conn = mem();
|
|
2174
|
+
bootstrap_honker_schema(&conn).unwrap();
|
|
2175
|
+
|
|
2176
|
+
// Idempotent.
|
|
2177
|
+
bootstrap_honker_schema(&conn).unwrap();
|
|
2178
|
+
|
|
2179
|
+
// _honker_live has the 12 columns we expect (Python binding
|
|
2180
|
+
// and the extension have historically disagreed on _honker_dead
|
|
2181
|
+
// column count; this pins both).
|
|
2182
|
+
let live_cols: Vec<String> = conn
|
|
2183
|
+
.prepare("SELECT name FROM pragma_table_info('_honker_live')")
|
|
2184
|
+
.unwrap()
|
|
2185
|
+
.query_map([], |r| r.get::<_, String>(0))
|
|
2186
|
+
.unwrap()
|
|
2187
|
+
.collect::<Result<Vec<_>, _>>()
|
|
2188
|
+
.unwrap();
|
|
2189
|
+
assert_eq!(live_cols.len(), 12);
|
|
2190
|
+
assert!(live_cols.contains(&"expires_at".to_string()));
|
|
2191
|
+
|
|
2192
|
+
let dead_cols: Vec<String> = conn
|
|
2193
|
+
.prepare("SELECT name FROM pragma_table_info('_honker_dead')")
|
|
2194
|
+
.unwrap()
|
|
2195
|
+
.query_map([], |r| r.get::<_, String>(0))
|
|
2196
|
+
.unwrap()
|
|
2197
|
+
.collect::<Result<Vec<_>, _>>()
|
|
2198
|
+
.unwrap();
|
|
2199
|
+
assert_eq!(dead_cols.len(), 10);
|
|
2200
|
+
assert!(dead_cols.contains(&"priority".to_string()));
|
|
2201
|
+
assert!(dead_cols.contains(&"run_at".to_string()));
|
|
2202
|
+
assert!(dead_cols.contains(&"max_attempts".to_string()));
|
|
2203
|
+
assert!(dead_cols.contains(&"created_at".to_string()));
|
|
2204
|
+
|
|
2205
|
+
// Partial index present.
|
|
2206
|
+
let idx: i64 = conn
|
|
2207
|
+
.query_row(
|
|
2208
|
+
"SELECT COUNT(*) FROM sqlite_master
|
|
2209
|
+
WHERE type='index' AND name='_honker_live_claim'",
|
|
2210
|
+
[],
|
|
2211
|
+
|r| r.get(0),
|
|
2212
|
+
)
|
|
2213
|
+
.unwrap();
|
|
2214
|
+
assert_eq!(idx, 1);
|
|
2215
|
+
|
|
2216
|
+
// _honker_locks table present for db.lock() support.
|
|
2217
|
+
let locks_cols: Vec<String> = conn
|
|
2218
|
+
.prepare("SELECT name FROM pragma_table_info('_honker_locks')")
|
|
2219
|
+
.unwrap()
|
|
2220
|
+
.query_map([], |r| r.get::<_, String>(0))
|
|
2221
|
+
.unwrap()
|
|
2222
|
+
.collect::<Result<Vec<_>, _>>()
|
|
2223
|
+
.unwrap();
|
|
2224
|
+
assert_eq!(locks_cols, vec!["name", "owner", "expires_at"]);
|
|
2225
|
+
|
|
2226
|
+
// _honker_rate_limits table present for db.try_rate_limit().
|
|
2227
|
+
let rl_cols: Vec<String> = conn
|
|
2228
|
+
.prepare("SELECT name FROM pragma_table_info('_honker_rate_limits')")
|
|
2229
|
+
.unwrap()
|
|
2230
|
+
.query_map([], |r| r.get::<_, String>(0))
|
|
2231
|
+
.unwrap()
|
|
2232
|
+
.collect::<Result<Vec<_>, _>>()
|
|
2233
|
+
.unwrap();
|
|
2234
|
+
assert_eq!(rl_cols, vec!["name", "window_start", "count"]);
|
|
2235
|
+
|
|
2236
|
+
// _honker_scheduler_tasks table present for Scheduler's
|
|
2237
|
+
// per-task registration + next_fire_at persistence.
|
|
2238
|
+
let sched_cols: Vec<String> = conn
|
|
2239
|
+
.prepare("SELECT name FROM pragma_table_info('_honker_scheduler_tasks')")
|
|
2240
|
+
.unwrap()
|
|
2241
|
+
.query_map([], |r| r.get::<_, String>(0))
|
|
2242
|
+
.unwrap()
|
|
2243
|
+
.collect::<Result<Vec<_>, _>>()
|
|
2244
|
+
.unwrap();
|
|
2245
|
+
assert_eq!(
|
|
2246
|
+
sched_cols,
|
|
2247
|
+
vec![
|
|
2248
|
+
"name",
|
|
2249
|
+
"queue",
|
|
2250
|
+
"cron_expr",
|
|
2251
|
+
"payload",
|
|
2252
|
+
"priority",
|
|
2253
|
+
"expires_s",
|
|
2254
|
+
"next_fire_at",
|
|
2255
|
+
"enabled",
|
|
2256
|
+
],
|
|
2257
|
+
);
|
|
2258
|
+
let res_cols: Vec<String> = conn
|
|
2259
|
+
.prepare("SELECT name FROM pragma_table_info('_honker_results')")
|
|
2260
|
+
.unwrap()
|
|
2261
|
+
.query_map([], |r| r.get::<_, String>(0))
|
|
2262
|
+
.unwrap()
|
|
2263
|
+
.collect::<Result<Vec<_>, _>>()
|
|
2264
|
+
.unwrap();
|
|
2265
|
+
assert_eq!(
|
|
2266
|
+
res_cols,
|
|
2267
|
+
vec!["job_id", "value", "created_at", "expires_at"]
|
|
2268
|
+
);
|
|
2269
|
+
|
|
2270
|
+
// _honker_stream + _honker_stream_consumers tables for
|
|
2271
|
+
// durable pub/sub streams.
|
|
2272
|
+
let stream_cols: Vec<String> = conn
|
|
2273
|
+
.prepare("SELECT name FROM pragma_table_info('_honker_stream')")
|
|
2274
|
+
.unwrap()
|
|
2275
|
+
.query_map([], |r| r.get::<_, String>(0))
|
|
2276
|
+
.unwrap()
|
|
2277
|
+
.collect::<Result<Vec<_>, _>>()
|
|
2278
|
+
.unwrap();
|
|
2279
|
+
assert_eq!(
|
|
2280
|
+
stream_cols,
|
|
2281
|
+
vec!["offset", "topic", "key", "payload", "created_at"]
|
|
2282
|
+
);
|
|
2283
|
+
let sc_cols: Vec<String> = conn
|
|
2284
|
+
.prepare("SELECT name FROM pragma_table_info('_honker_stream_consumers')")
|
|
2285
|
+
.unwrap()
|
|
2286
|
+
.query_map([], |r| r.get::<_, String>(0))
|
|
2287
|
+
.unwrap()
|
|
2288
|
+
.collect::<Result<Vec<_>, _>>()
|
|
2289
|
+
.unwrap();
|
|
2290
|
+
assert_eq!(sc_cols, vec!["name", "topic", "offset"]);
|
|
2291
|
+
}
|
|
2292
|
+
|
|
2293
|
+
// -----------------------------------------------------------------
|
|
2294
|
+
// Optional backend tests
|
|
2295
|
+
// -----------------------------------------------------------------
|
|
2296
|
+
|
|
2297
|
+
/// Run the wake/listen suite against the kernel-watch backend.
|
|
2298
|
+
/// Each commit separated by 20 ms ensures both the 1 ms poller
|
|
2299
|
+
/// and the kernel-watch loop have time to fire before the next.
|
|
2300
|
+
#[test]
|
|
2301
|
+
#[cfg(feature = "kernel-watcher")]
|
|
2302
|
+
fn kernel_watcher_detects_all_commits() {
|
|
2303
|
+
use std::sync::atomic::{AtomicU32, Ordering as AO};
|
|
2304
|
+
|
|
2305
|
+
let tmp = std::env::temp_dir().join(format!(
|
|
2306
|
+
"honker-kernel-watcher-{}-{}",
|
|
2307
|
+
std::process::id(),
|
|
2308
|
+
std::time::SystemTime::now()
|
|
2309
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
2310
|
+
.unwrap()
|
|
2311
|
+
.subsec_nanos()
|
|
2312
|
+
));
|
|
2313
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2314
|
+
|
|
2315
|
+
let writer = open_conn(tmp.to_str().unwrap(), false).unwrap();
|
|
2316
|
+
writer.execute_batch("CREATE TABLE t (x INT)").unwrap();
|
|
2317
|
+
// One initial write ensures the -wal file exists so the watcher
|
|
2318
|
+
// can attach a per-file watch at startup (kqueue watches the file
|
|
2319
|
+
// descriptor, not the directory, for write events).
|
|
2320
|
+
writer.execute("INSERT INTO t VALUES (0)", []).unwrap();
|
|
2321
|
+
std::thread::sleep(Duration::from_millis(20));
|
|
2322
|
+
|
|
2323
|
+
let count = Arc::new(AtomicU32::new(0));
|
|
2324
|
+
let count_t = count.clone();
|
|
2325
|
+
let watcher = UpdateWatcher::spawn_with_config(
|
|
2326
|
+
tmp.clone(),
|
|
2327
|
+
move || {
|
|
2328
|
+
count_t.fetch_add(1, AO::Relaxed);
|
|
2329
|
+
},
|
|
2330
|
+
WatcherConfig {
|
|
2331
|
+
backend: WatcherBackend::KernelWatch,
|
|
2332
|
+
},
|
|
2333
|
+
);
|
|
2334
|
+
|
|
2335
|
+
// Drain any initialization wakes.
|
|
2336
|
+
std::thread::sleep(Duration::from_millis(50));
|
|
2337
|
+
count.store(0, AO::SeqCst);
|
|
2338
|
+
|
|
2339
|
+
// n commits spaced 30 ms apart — gives the event loop time to
|
|
2340
|
+
// process each event individually before the next arrives.
|
|
2341
|
+
let n: u32 = 5;
|
|
2342
|
+
for i in 1..=n {
|
|
2343
|
+
writer
|
|
2344
|
+
.execute(&format!("INSERT INTO t VALUES ({i})"), [])
|
|
2345
|
+
.unwrap();
|
|
2346
|
+
std::thread::sleep(Duration::from_millis(30));
|
|
2347
|
+
}
|
|
2348
|
+
// Wait longer than both the event delivery latency and the
|
|
2349
|
+
// safety-net interval to drain any pending events.
|
|
2350
|
+
std::thread::sleep(Duration::from_millis(600));
|
|
2351
|
+
|
|
2352
|
+
let observed = count.load(AO::SeqCst);
|
|
2353
|
+
drop(watcher);
|
|
2354
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2355
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
2356
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
2357
|
+
|
|
2358
|
+
// Experimental contract: spurious wakes are allowed (the backend
|
|
2359
|
+
// fires on every filesystem event, and SQLite produces several
|
|
2360
|
+
// events per commit). The thing that must not happen is a *missed*
|
|
2361
|
+
// commit — assert at least n wakes.
|
|
2362
|
+
assert!(
|
|
2363
|
+
observed >= n,
|
|
2364
|
+
"kernel watcher detected {observed} wakes for {n} commits — \
|
|
2365
|
+
missed at least one"
|
|
2366
|
+
);
|
|
2367
|
+
}
|
|
2368
|
+
|
|
2369
|
+
/// Prove that the shm fast path fires on the same commits as the
|
|
2370
|
+
/// baseline `PRAGMA data_version` poller.
|
|
2371
|
+
///
|
|
2372
|
+
/// Phase gate: both detectors must report exactly N wakes for N
|
|
2373
|
+
/// commits spaced far enough apart that neither can batch them.
|
|
2374
|
+
#[test]
|
|
2375
|
+
#[cfg(feature = "shm-fast-path")]
|
|
2376
|
+
fn shm_fast_path_equivalence_with_pragma_baseline() {
|
|
2377
|
+
use std::sync::atomic::{AtomicU32, Ordering as AO};
|
|
2378
|
+
|
|
2379
|
+
let tmp = std::env::temp_dir().join(format!(
|
|
2380
|
+
"honker-shm-equiv-{}-{}",
|
|
2381
|
+
std::process::id(),
|
|
2382
|
+
std::time::SystemTime::now()
|
|
2383
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
2384
|
+
.unwrap()
|
|
2385
|
+
.subsec_nanos()
|
|
2386
|
+
));
|
|
2387
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2388
|
+
|
|
2389
|
+
let writer = open_conn(tmp.to_str().unwrap(), false).unwrap();
|
|
2390
|
+
writer.execute_batch("CREATE TABLE t (x INT)").unwrap();
|
|
2391
|
+
// One write ensures the -shm file exists before spawning the shm watcher.
|
|
2392
|
+
writer.execute("INSERT INTO t VALUES (0)", []).unwrap();
|
|
2393
|
+
std::thread::sleep(Duration::from_millis(20));
|
|
2394
|
+
|
|
2395
|
+
let baseline_count = Arc::new(AtomicU32::new(0));
|
|
2396
|
+
let baseline_t = baseline_count.clone();
|
|
2397
|
+
let baseline = UpdateWatcher::spawn(tmp.clone(), move || {
|
|
2398
|
+
baseline_t.fetch_add(1, AO::Relaxed);
|
|
2399
|
+
});
|
|
2400
|
+
|
|
2401
|
+
let shm_count = Arc::new(AtomicU32::new(0));
|
|
2402
|
+
let shm_t = shm_count.clone();
|
|
2403
|
+
let shm = UpdateWatcher::spawn_with_config(
|
|
2404
|
+
tmp.clone(),
|
|
2405
|
+
move || {
|
|
2406
|
+
shm_t.fetch_add(1, AO::Relaxed);
|
|
2407
|
+
},
|
|
2408
|
+
WatcherConfig {
|
|
2409
|
+
backend: WatcherBackend::ShmFastPath,
|
|
2410
|
+
},
|
|
2411
|
+
);
|
|
2412
|
+
|
|
2413
|
+
// Drain initialization wakes.
|
|
2414
|
+
std::thread::sleep(Duration::from_millis(30));
|
|
2415
|
+
baseline_count.store(0, AO::SeqCst);
|
|
2416
|
+
shm_count.store(0, AO::SeqCst);
|
|
2417
|
+
|
|
2418
|
+
// Commits spaced 20 ms apart — well above both polling intervals.
|
|
2419
|
+
let n: u32 = 5;
|
|
2420
|
+
for i in 1..=n {
|
|
2421
|
+
writer
|
|
2422
|
+
.execute(&format!("INSERT INTO t VALUES ({i})"), [])
|
|
2423
|
+
.unwrap();
|
|
2424
|
+
std::thread::sleep(Duration::from_millis(20));
|
|
2425
|
+
}
|
|
2426
|
+
std::thread::sleep(Duration::from_millis(100));
|
|
2427
|
+
|
|
2428
|
+
let b = baseline_count.load(AO::SeqCst);
|
|
2429
|
+
let s = shm_count.load(AO::SeqCst);
|
|
2430
|
+
|
|
2431
|
+
drop(baseline);
|
|
2432
|
+
drop(shm);
|
|
2433
|
+
drop(writer);
|
|
2434
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2435
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
2436
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
2437
|
+
|
|
2438
|
+
assert_eq!(b, n, "baseline detected {b} wakes, expected {n}");
|
|
2439
|
+
assert_eq!(
|
|
2440
|
+
s, n,
|
|
2441
|
+
"shm fast path detected {s} wakes, expected {n} (same as baseline {b})"
|
|
2442
|
+
);
|
|
2443
|
+
}
|
|
2444
|
+
|
|
2445
|
+
// -----------------------------------------------------------------
|
|
2446
|
+
// Journal-mode coverage for the experimental backends
|
|
2447
|
+
//
|
|
2448
|
+
// honker's `open_conn` always sets WAL, so the public Python/Node
|
|
2449
|
+
// surface is WAL-only. These tests poke the watchers directly at
|
|
2450
|
+
// databases pre-set to non-WAL modes so we can prove behavior when
|
|
2451
|
+
// the file is in DELETE / TRUNCATE / PERSIST. Justification per
|
|
2452
|
+
// backend:
|
|
2453
|
+
//
|
|
2454
|
+
// - Polling — universally works because `PRAGMA data_version`
|
|
2455
|
+
// advances on every commit regardless of journal mode. Already
|
|
2456
|
+
// exercised by `poll_data_version_works_in_*`.
|
|
2457
|
+
//
|
|
2458
|
+
// - Kernel watcher — in non-WAL modes there is no `-wal` file to
|
|
2459
|
+
// watch directly; we must rely on the parent-directory watch to
|
|
2460
|
+
// pick up `-journal` create / modify / delete events around each
|
|
2461
|
+
// commit. The PRAGMA verification step still gates `on_change()`,
|
|
2462
|
+
// so spurious events (e.g. another file in the dir) just produce
|
|
2463
|
+
// harmless no-op checks.
|
|
2464
|
+
//
|
|
2465
|
+
// - SHM fast path — in non-WAL modes there is no `-shm` file;
|
|
2466
|
+
// `read_ichange` returns `None` and the loop falls back to the
|
|
2467
|
+
// PRAGMA check on every iteration. Effectively becomes a 100 µs
|
|
2468
|
+
// PRAGMA poller — correct, just CPU-heavier than the polling
|
|
2469
|
+
// backend.
|
|
2470
|
+
// -----------------------------------------------------------------
|
|
2471
|
+
|
|
2472
|
+
/// Drive `n` committed inserts through `writer`, spaced
|
|
2473
|
+
/// `spacing_ms` apart, and return how many `on_change()` calls the
|
|
2474
|
+
/// watcher observed (with the initial drain already deducted).
|
|
2475
|
+
fn drive_and_count_wakes(
|
|
2476
|
+
backend: WatcherBackend,
|
|
2477
|
+
db_path: PathBuf,
|
|
2478
|
+
n: u32,
|
|
2479
|
+
spacing_ms: u64,
|
|
2480
|
+
) -> u32 {
|
|
2481
|
+
use std::sync::atomic::{AtomicU32, Ordering as AO};
|
|
2482
|
+
|
|
2483
|
+
let count = Arc::new(AtomicU32::new(0));
|
|
2484
|
+
let count_t = count.clone();
|
|
2485
|
+
let watcher = UpdateWatcher::spawn_with_config(
|
|
2486
|
+
db_path.clone(),
|
|
2487
|
+
move || {
|
|
2488
|
+
count_t.fetch_add(1, AO::Relaxed);
|
|
2489
|
+
},
|
|
2490
|
+
WatcherConfig { backend },
|
|
2491
|
+
);
|
|
2492
|
+
|
|
2493
|
+
// Drain init wakes (covers shm + kernel setup) before baseline.
|
|
2494
|
+
std::thread::sleep(Duration::from_millis(80));
|
|
2495
|
+
count.store(0, AO::SeqCst);
|
|
2496
|
+
|
|
2497
|
+
let writer = Connection::open(&db_path).unwrap();
|
|
2498
|
+
for i in 1..=n {
|
|
2499
|
+
writer
|
|
2500
|
+
.execute(&format!("INSERT INTO t VALUES ({i})"), [])
|
|
2501
|
+
.unwrap();
|
|
2502
|
+
std::thread::sleep(Duration::from_millis(spacing_ms));
|
|
2503
|
+
}
|
|
2504
|
+
// Drain the slowest safety net (kernel = 500 ms) + one cycle.
|
|
2505
|
+
std::thread::sleep(Duration::from_millis(700));
|
|
2506
|
+
|
|
2507
|
+
let observed = count.load(AO::SeqCst);
|
|
2508
|
+
drop(watcher);
|
|
2509
|
+
drop(writer);
|
|
2510
|
+
observed
|
|
2511
|
+
}
|
|
2512
|
+
|
|
2513
|
+
/// Set up a fresh database file in `mode` and verify the watcher
|
|
2514
|
+
/// detects every committed insert. Tolerates +1 wake (a commit
|
|
2515
|
+
/// straddling the drain boundary) but does not tolerate misses.
|
|
2516
|
+
fn watcher_works_in_journal_mode(backend: WatcherBackend, mode: &str) {
|
|
2517
|
+
let tmp = std::env::temp_dir().join(format!(
|
|
2518
|
+
"honker-watcher-{}-{}-{}-{}",
|
|
2519
|
+
mode.to_ascii_lowercase(),
|
|
2520
|
+
std::process::id(),
|
|
2521
|
+
std::time::SystemTime::now()
|
|
2522
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
2523
|
+
.unwrap()
|
|
2524
|
+
.subsec_nanos(),
|
|
2525
|
+
match backend {
|
|
2526
|
+
WatcherBackend::Polling => "poll",
|
|
2527
|
+
#[cfg(feature = "kernel-watcher")]
|
|
2528
|
+
WatcherBackend::KernelWatch => "kw",
|
|
2529
|
+
#[cfg(feature = "shm-fast-path")]
|
|
2530
|
+
WatcherBackend::ShmFastPath => "shm",
|
|
2531
|
+
},
|
|
2532
|
+
));
|
|
2533
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2534
|
+
|
|
2535
|
+
// Watcher inherits the file's journal mode, so set it before opening.
|
|
2536
|
+
let setup = Connection::open(&tmp).unwrap();
|
|
2537
|
+
setup
|
|
2538
|
+
.execute_batch(&format!("PRAGMA journal_mode = {mode};"))
|
|
2539
|
+
.unwrap();
|
|
2540
|
+
let actual: String = setup
|
|
2541
|
+
.pragma_query_value(None, "journal_mode", |r| r.get(0))
|
|
2542
|
+
.unwrap();
|
|
2543
|
+
assert_eq!(
|
|
2544
|
+
actual.to_ascii_uppercase(),
|
|
2545
|
+
mode.to_ascii_uppercase(),
|
|
2546
|
+
"PRAGMA journal_mode = {mode} silently fell back to {actual}"
|
|
2547
|
+
);
|
|
2548
|
+
setup.execute("CREATE TABLE t (x INTEGER)", []).unwrap();
|
|
2549
|
+
// One prior write so -shm exists at watcher startup (shm fast path
|
|
2550
|
+
// needs it; harmless otherwise).
|
|
2551
|
+
setup.execute("INSERT INTO t VALUES (0)", []).unwrap();
|
|
2552
|
+
// Pin -shm only for shm+WAL: Linux/Windows reap -shm on last close.
|
|
2553
|
+
// Other modes must drop setup or Windows errors on shared non-WAL db.
|
|
2554
|
+
#[cfg(feature = "shm-fast-path")]
|
|
2555
|
+
let keep_setup_open =
|
|
2556
|
+
matches!(backend, WatcherBackend::ShmFastPath) && mode.eq_ignore_ascii_case("WAL");
|
|
2557
|
+
#[cfg(not(feature = "shm-fast-path"))]
|
|
2558
|
+
let keep_setup_open = false;
|
|
2559
|
+
let _pinning = if keep_setup_open {
|
|
2560
|
+
Some(setup)
|
|
2561
|
+
} else {
|
|
2562
|
+
drop(setup);
|
|
2563
|
+
None
|
|
2564
|
+
};
|
|
2565
|
+
|
|
2566
|
+
let n: u32 = 5;
|
|
2567
|
+
let observed = drive_and_count_wakes(backend.clone(), tmp.clone(), n, 30);
|
|
2568
|
+
|
|
2569
|
+
drop(_pinning);
|
|
2570
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2571
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
2572
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
2573
|
+
let _ = std::fs::remove_file(format!("{}-journal", tmp.display()));
|
|
2574
|
+
|
|
2575
|
+
// Polling/shm dedupe → ~1 wake per commit. Kernel fires per
|
|
2576
|
+
// filesystem event (inotify is granular) → upper bound is just
|
|
2577
|
+
// a runaway-watcher guard, not a precise expectation.
|
|
2578
|
+
let upper = match backend {
|
|
2579
|
+
WatcherBackend::Polling => n + 1,
|
|
2580
|
+
#[cfg(feature = "kernel-watcher")]
|
|
2581
|
+
WatcherBackend::KernelWatch => n * 200,
|
|
2582
|
+
#[cfg(feature = "shm-fast-path")]
|
|
2583
|
+
WatcherBackend::ShmFastPath => n + 1,
|
|
2584
|
+
};
|
|
2585
|
+
assert!(
|
|
2586
|
+
observed >= n,
|
|
2587
|
+
"journal_mode={mode}: observed {observed} wakes for {n} commits \
|
|
2588
|
+
(missed at least one)"
|
|
2589
|
+
);
|
|
2590
|
+
assert!(
|
|
2591
|
+
observed <= upper,
|
|
2592
|
+
"journal_mode={mode}: observed {observed} wakes for {n} commits, \
|
|
2593
|
+
upper bound {upper} (runaway watcher?)"
|
|
2594
|
+
);
|
|
2595
|
+
}
|
|
2596
|
+
|
|
2597
|
+
// ---- Polling × every supported journal mode (regression coverage) ----
|
|
2598
|
+
|
|
2599
|
+
#[test]
|
|
2600
|
+
fn polling_watcher_works_in_wal() {
|
|
2601
|
+
watcher_works_in_journal_mode(WatcherBackend::Polling, "WAL");
|
|
2602
|
+
}
|
|
2603
|
+
|
|
2604
|
+
#[test]
|
|
2605
|
+
fn polling_watcher_works_in_delete() {
|
|
2606
|
+
watcher_works_in_journal_mode(WatcherBackend::Polling, "DELETE");
|
|
2607
|
+
}
|
|
2608
|
+
|
|
2609
|
+
#[test]
|
|
2610
|
+
fn polling_watcher_works_in_truncate() {
|
|
2611
|
+
watcher_works_in_journal_mode(WatcherBackend::Polling, "TRUNCATE");
|
|
2612
|
+
}
|
|
2613
|
+
|
|
2614
|
+
#[test]
|
|
2615
|
+
fn polling_watcher_works_in_persist() {
|
|
2616
|
+
watcher_works_in_journal_mode(WatcherBackend::Polling, "PERSIST");
|
|
2617
|
+
}
|
|
2618
|
+
|
|
2619
|
+
// ---- Kernel watcher × every supported journal mode ----
|
|
2620
|
+
|
|
2621
|
+
// macOS kqueue limitation: directory-level watches do NOT fire on
|
|
2622
|
+
// writes within existing files (only on entry create/delete/rename).
|
|
2623
|
+
// Per-file watches fire on regular-file writes, but rollback-journal
|
|
2624
|
+
// commit dances on a loaded macOS CI runner produce so few kqueue
|
|
2625
|
+
// events that the wake count is unreliable for non-WAL modes. We
|
|
2626
|
+
// attach to db + journal + dir to maximize coverage, and ship the
|
|
2627
|
+
// backend with documented "missed wakes possible" semantics, but
|
|
2628
|
+
// we don't gate CI on a behavior the kernel won't reliably deliver.
|
|
2629
|
+
// WAL-mode kernel coverage stays mandatory (kernel_watcher_works_in_wal).
|
|
2630
|
+
#[test]
|
|
2631
|
+
#[cfg(feature = "kernel-watcher")]
|
|
2632
|
+
#[cfg_attr(
|
|
2633
|
+
target_os = "macos",
|
|
2634
|
+
ignore = "kqueue: in-place writes don't fire dir events"
|
|
2635
|
+
)]
|
|
2636
|
+
fn kernel_watcher_works_in_delete() {
|
|
2637
|
+
watcher_works_in_journal_mode(WatcherBackend::KernelWatch, "DELETE");
|
|
2638
|
+
}
|
|
2639
|
+
|
|
2640
|
+
#[test]
|
|
2641
|
+
#[cfg(feature = "kernel-watcher")]
|
|
2642
|
+
#[cfg_attr(
|
|
2643
|
+
target_os = "macos",
|
|
2644
|
+
ignore = "kqueue: in-place writes don't fire dir events"
|
|
2645
|
+
)]
|
|
2646
|
+
fn kernel_watcher_works_in_truncate() {
|
|
2647
|
+
watcher_works_in_journal_mode(WatcherBackend::KernelWatch, "TRUNCATE");
|
|
2648
|
+
}
|
|
2649
|
+
|
|
2650
|
+
#[test]
|
|
2651
|
+
#[cfg(feature = "kernel-watcher")]
|
|
2652
|
+
#[cfg_attr(
|
|
2653
|
+
target_os = "macos",
|
|
2654
|
+
ignore = "kqueue: in-place writes don't fire dir events"
|
|
2655
|
+
)]
|
|
2656
|
+
fn kernel_watcher_works_in_persist() {
|
|
2657
|
+
watcher_works_in_journal_mode(WatcherBackend::KernelWatch, "PERSIST");
|
|
2658
|
+
}
|
|
2659
|
+
|
|
2660
|
+
// ---- SHM fast path: WAL only (it's experimental — non-WAL is
|
|
2661
|
+
// explicitly out of scope, the backend logs and disables itself
|
|
2662
|
+
// when -shm doesn't exist). ----
|
|
2663
|
+
|
|
2664
|
+
#[test]
|
|
2665
|
+
#[cfg(feature = "shm-fast-path")]
|
|
2666
|
+
fn shm_fast_path_works_in_wal() {
|
|
2667
|
+
watcher_works_in_journal_mode(WatcherBackend::ShmFastPath, "WAL");
|
|
2668
|
+
}
|
|
2669
|
+
|
|
2670
|
+
// -----------------------------------------------------------------
|
|
2671
|
+
// Wake-latency invariants — proves the experimental backends
|
|
2672
|
+
// actually deliver wakes via their fast paths (kernel events /
|
|
2673
|
+
// mmap reads), not via some accidental fallback. The simplified
|
|
2674
|
+
// backends have no safety nets, so a missed wake just doesn't
|
|
2675
|
+
// fire — these tests would catch that immediately.
|
|
2676
|
+
// -----------------------------------------------------------------
|
|
2677
|
+
|
|
2678
|
+
/// Helper: spawn a watcher with the given backend, commit `n` writes
|
|
2679
|
+
/// spaced `spacing_ms` apart, return the per-commit wake latency in
|
|
2680
|
+
/// milliseconds. Caller asserts on the distribution.
|
|
2681
|
+
#[cfg(any(feature = "kernel-watcher", feature = "shm-fast-path"))]
|
|
2682
|
+
fn measure_wake_latencies_ms(
|
|
2683
|
+
backend: WatcherBackend,
|
|
2684
|
+
db_path: PathBuf,
|
|
2685
|
+
n: usize,
|
|
2686
|
+
spacing_ms: u64,
|
|
2687
|
+
) -> Vec<f64> {
|
|
2688
|
+
use std::sync::Mutex as StdMutex;
|
|
2689
|
+
|
|
2690
|
+
let writer = open_conn(db_path.to_str().unwrap(), false).unwrap();
|
|
2691
|
+
writer.execute_batch("CREATE TABLE t (x INTEGER)").unwrap();
|
|
2692
|
+
// First write so -wal exists at watcher startup.
|
|
2693
|
+
writer.execute("INSERT INTO t VALUES (0)", []).unwrap();
|
|
2694
|
+
std::thread::sleep(Duration::from_millis(20));
|
|
2695
|
+
|
|
2696
|
+
let wake_times: Arc<StdMutex<Vec<std::time::Instant>>> =
|
|
2697
|
+
Arc::new(StdMutex::new(Vec::new()));
|
|
2698
|
+
let wake_times_t = wake_times.clone();
|
|
2699
|
+
let watcher = UpdateWatcher::spawn_with_config(
|
|
2700
|
+
db_path.clone(),
|
|
2701
|
+
move || {
|
|
2702
|
+
wake_times_t
|
|
2703
|
+
.lock()
|
|
2704
|
+
.expect("wake_times mutex poisoned")
|
|
2705
|
+
.push(std::time::Instant::now());
|
|
2706
|
+
},
|
|
2707
|
+
WatcherConfig { backend },
|
|
2708
|
+
);
|
|
2709
|
+
|
|
2710
|
+
// Drain initialization wakes.
|
|
2711
|
+
std::thread::sleep(Duration::from_millis(100));
|
|
2712
|
+
wake_times.lock().expect("wake_times").clear();
|
|
2713
|
+
|
|
2714
|
+
// Commit each write, recording the commit time. Pair with the
|
|
2715
|
+
// first wake timestamp that arrives after that commit time.
|
|
2716
|
+
let mut commit_times: Vec<std::time::Instant> = Vec::with_capacity(n);
|
|
2717
|
+
for i in 1..=n {
|
|
2718
|
+
let t0 = std::time::Instant::now();
|
|
2719
|
+
writer
|
|
2720
|
+
.execute(&format!("INSERT INTO t VALUES ({i})"), [])
|
|
2721
|
+
.unwrap();
|
|
2722
|
+
commit_times.push(t0);
|
|
2723
|
+
std::thread::sleep(Duration::from_millis(spacing_ms));
|
|
2724
|
+
}
|
|
2725
|
+
// Wait long enough for any in-flight wakes to land. The
|
|
2726
|
+
// backend-specific safety nets are 500 ms (kernel-watcher) and
|
|
2727
|
+
// 100 ms (shm-fast-path); 700 ms covers either.
|
|
2728
|
+
std::thread::sleep(Duration::from_millis(700));
|
|
2729
|
+
|
|
2730
|
+
let wakes = wake_times.lock().expect("wake_times").clone();
|
|
2731
|
+
drop(watcher);
|
|
2732
|
+
drop(writer);
|
|
2733
|
+
|
|
2734
|
+
// Pair each commit with the first wake at-or-after its commit
|
|
2735
|
+
// time. Wakes are monotonic; commits are monotonic; so a single
|
|
2736
|
+
// forward pass suffices.
|
|
2737
|
+
let mut latencies = Vec::with_capacity(n);
|
|
2738
|
+
let mut wake_cursor = 0;
|
|
2739
|
+
for &commit_t in &commit_times {
|
|
2740
|
+
while wake_cursor < wakes.len() && wakes[wake_cursor] < commit_t {
|
|
2741
|
+
wake_cursor += 1;
|
|
2742
|
+
}
|
|
2743
|
+
if wake_cursor >= wakes.len() {
|
|
2744
|
+
latencies.push(f64::INFINITY); // missed wake — caller will assert
|
|
2745
|
+
} else {
|
|
2746
|
+
let dt = wakes[wake_cursor].duration_since(commit_t);
|
|
2747
|
+
latencies.push(dt.as_secs_f64() * 1000.0);
|
|
2748
|
+
wake_cursor += 1;
|
|
2749
|
+
}
|
|
2750
|
+
}
|
|
2751
|
+
latencies
|
|
2752
|
+
}
|
|
2753
|
+
|
|
2754
|
+
#[cfg(any(feature = "kernel-watcher", feature = "shm-fast-path"))]
|
|
2755
|
+
fn percentile(mut samples: Vec<f64>, pct: f64) -> f64 {
|
|
2756
|
+
samples.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
|
2757
|
+
let idx = ((samples.len() as f64) * pct) as usize;
|
|
2758
|
+
samples[idx.min(samples.len() - 1)]
|
|
2759
|
+
}
|
|
2760
|
+
|
|
2761
|
+
/// Kernel watcher: wakes must come via kernel events, not the
|
|
2762
|
+
/// 500 ms safety net. p90 < 200 ms is way below half of the
|
|
2763
|
+
/// safety-net interval, so a backend stuck on the safety net would
|
|
2764
|
+
/// have p90 ≈ 250 ms (mean half of 500) and fail this assertion.
|
|
2765
|
+
#[test]
|
|
2766
|
+
#[cfg_attr(
|
|
2767
|
+
target_os = "macos",
|
|
2768
|
+
ignore = "notify/kqueue can drop the watcher thread under CI load; functional kernel watcher tests still run"
|
|
2769
|
+
)]
|
|
2770
|
+
#[cfg(feature = "kernel-watcher")]
|
|
2771
|
+
#[cfg_attr(target_os = "macos", ignore = "kqueue under CI load may deliver zero wakes")]
|
|
2772
|
+
fn kernel_watcher_wake_latency_is_event_driven() {
|
|
2773
|
+
let tmp = std::env::temp_dir().join(format!(
|
|
2774
|
+
"honker-kw-lat-{}-{}",
|
|
2775
|
+
std::process::id(),
|
|
2776
|
+
std::time::SystemTime::now()
|
|
2777
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
2778
|
+
.unwrap()
|
|
2779
|
+
.subsec_nanos()
|
|
2780
|
+
));
|
|
2781
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2782
|
+
|
|
2783
|
+
let lats = measure_wake_latencies_ms(WatcherBackend::KernelWatch, tmp.clone(), 10, 50);
|
|
2784
|
+
|
|
2785
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2786
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
2787
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
2788
|
+
|
|
2789
|
+
// Contract allows missed wakes (kqueue/inotify/ReadDir can
|
|
2790
|
+
// coalesce). Assert: some wakes arrived AND p50 is well below
|
|
2791
|
+
// the 5 s `idle_poll_s` fallback — proves we're event-driven,
|
|
2792
|
+
// not riding the paranoia poll. Windows ReadDirectoryChangesW
|
|
2793
|
+
// under CI load can stretch past 100 ms; 500 ms threshold
|
|
2794
|
+
// still rules out the fallback.
|
|
2795
|
+
let arrived: Vec<f64> = lats.iter().copied().filter(|l| l.is_finite()).collect();
|
|
2796
|
+
assert!(
|
|
2797
|
+
!arrived.is_empty(),
|
|
2798
|
+
"kernel watcher delivered zero wakes for 10 commits — events \
|
|
2799
|
+
aren't being delivered at all on this platform: {lats:?}"
|
|
2800
|
+
);
|
|
2801
|
+
let p50 = percentile(arrived.clone(), 0.50);
|
|
2802
|
+
assert!(
|
|
2803
|
+
p50 < 500.0,
|
|
2804
|
+
"kernel watcher p50 wake latency = {p50:.1} ms, expected < 500 \
|
|
2805
|
+
(high median latency means events arrive but slowly — possibly \
|
|
2806
|
+
a stuck-thread fallback). Arrived: {arrived:?}, all samples \
|
|
2807
|
+
(inf = no wake): {lats:?}"
|
|
2808
|
+
);
|
|
2809
|
+
}
|
|
2810
|
+
|
|
2811
|
+
/// SHM fast path: wakes must come via the mmap tickle.
|
|
2812
|
+
#[test]
|
|
2813
|
+
#[cfg(feature = "shm-fast-path")]
|
|
2814
|
+
fn shm_fast_path_wake_latency_is_event_driven() {
|
|
2815
|
+
let tmp = std::env::temp_dir().join(format!(
|
|
2816
|
+
"honker-shm-lat-{}-{}",
|
|
2817
|
+
std::process::id(),
|
|
2818
|
+
std::time::SystemTime::now()
|
|
2819
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
2820
|
+
.unwrap()
|
|
2821
|
+
.subsec_nanos()
|
|
2822
|
+
));
|
|
2823
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2824
|
+
|
|
2825
|
+
let lats = measure_wake_latencies_ms(WatcherBackend::ShmFastPath, tmp.clone(), 10, 50);
|
|
2826
|
+
|
|
2827
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2828
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
2829
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
2830
|
+
|
|
2831
|
+
// Same shape as the kernel-watcher latency test: assert that
|
|
2832
|
+
// *some* wakes arrived and that they were fast. Missed wakes
|
|
2833
|
+
// are part of the documented experimental contract.
|
|
2834
|
+
let arrived: Vec<f64> = lats.iter().copied().filter(|l| l.is_finite()).collect();
|
|
2835
|
+
assert!(
|
|
2836
|
+
!arrived.is_empty(),
|
|
2837
|
+
"shm fast path delivered zero wakes for 10 commits: {lats:?}"
|
|
2838
|
+
);
|
|
2839
|
+
let p50 = percentile(arrived.clone(), 0.50);
|
|
2840
|
+
assert!(
|
|
2841
|
+
p50 < 50.0,
|
|
2842
|
+
"shm fast path p50 wake latency (over arrived wakes only) = {p50:.1} ms, expected < 50 \
|
|
2843
|
+
(high latency means iChange isn't \
|
|
2844
|
+
being read via mmap). Samples: {lats:?}"
|
|
2845
|
+
);
|
|
2846
|
+
}
|
|
2847
|
+
|
|
2848
|
+
/// Graceful shutdown latency. Bounded by `RX_POLL_MS = 50 ms`.
|
|
2849
|
+
#[test]
|
|
2850
|
+
#[cfg_attr(
|
|
2851
|
+
target_os = "macos",
|
|
2852
|
+
ignore = "notify/kqueue shutdown can hang under CI load; functional kernel watcher tests still run"
|
|
2853
|
+
)]
|
|
2854
|
+
#[cfg(feature = "kernel-watcher")]
|
|
2855
|
+
fn kernel_watcher_shutdown_is_responsive() {
|
|
2856
|
+
let tmp = std::env::temp_dir().join(format!(
|
|
2857
|
+
"honker-kw-shutdown-{}-{}",
|
|
2858
|
+
std::process::id(),
|
|
2859
|
+
std::time::SystemTime::now()
|
|
2860
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
2861
|
+
.unwrap()
|
|
2862
|
+
.subsec_nanos()
|
|
2863
|
+
));
|
|
2864
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2865
|
+
|
|
2866
|
+
let writer = open_conn(tmp.to_str().unwrap(), false).unwrap();
|
|
2867
|
+
writer.execute_batch("CREATE TABLE t (x INTEGER)").unwrap();
|
|
2868
|
+
writer.execute("INSERT INTO t VALUES (0)", []).unwrap();
|
|
2869
|
+
|
|
2870
|
+
let watcher = UpdateWatcher::spawn_with_config(
|
|
2871
|
+
tmp.clone(),
|
|
2872
|
+
|| {},
|
|
2873
|
+
WatcherConfig {
|
|
2874
|
+
backend: WatcherBackend::KernelWatch,
|
|
2875
|
+
},
|
|
2876
|
+
);
|
|
2877
|
+
|
|
2878
|
+
// Let the watcher reach steady state (in its recv_timeout block).
|
|
2879
|
+
std::thread::sleep(Duration::from_millis(200));
|
|
2880
|
+
|
|
2881
|
+
let t0 = std::time::Instant::now();
|
|
2882
|
+
let _ = watcher.join();
|
|
2883
|
+
let elapsed = t0.elapsed();
|
|
2884
|
+
|
|
2885
|
+
drop(writer);
|
|
2886
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2887
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
2888
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
2889
|
+
|
|
2890
|
+
assert!(
|
|
2891
|
+
elapsed < Duration::from_millis(150),
|
|
2892
|
+
"kernel watcher shutdown took {elapsed:?}, expected < 150 ms \
|
|
2893
|
+
(RX_POLL_MS = 50 ms; if this exceeds 500 ms the recv_timeout \
|
|
2894
|
+
is blocking on the safety-net interval again)"
|
|
2895
|
+
);
|
|
2896
|
+
}
|
|
2897
|
+
|
|
2898
|
+
// -----------------------------------------------------------------
|
|
2899
|
+
// Probe failures must surface as Err — proving "no silent fallback"
|
|
2900
|
+
// when an experimental backend can't initialize.
|
|
2901
|
+
// -----------------------------------------------------------------
|
|
2902
|
+
|
|
2903
|
+
#[test]
|
|
2904
|
+
fn watcher_backend_polling_probe_always_succeeds() {
|
|
2905
|
+
// Polling never fails — works on any path, any state.
|
|
2906
|
+
let nope = std::path::PathBuf::from("/nonexistent/no/way/this/exists.db");
|
|
2907
|
+
assert!(WatcherBackend::Polling.probe(&nope).is_ok());
|
|
2908
|
+
}
|
|
2909
|
+
|
|
2910
|
+
#[test]
|
|
2911
|
+
fn watcher_backend_parse_rejects_unknown_names() {
|
|
2912
|
+
for backend in ["bogus", "KERNEL", " polling "] {
|
|
2913
|
+
let err = WatcherBackend::parse(Some(backend)).unwrap_err();
|
|
2914
|
+
assert!(err.contains("unknown watcher backend"), "got: {err}");
|
|
2915
|
+
}
|
|
2916
|
+
}
|
|
2917
|
+
|
|
2918
|
+
#[test]
|
|
2919
|
+
fn watcher_backend_parse_accepts_polling_aliases() {
|
|
2920
|
+
assert!(matches!(
|
|
2921
|
+
WatcherBackend::parse(None),
|
|
2922
|
+
Ok(WatcherBackend::Polling)
|
|
2923
|
+
));
|
|
2924
|
+
assert!(matches!(
|
|
2925
|
+
WatcherBackend::parse(Some("poll")),
|
|
2926
|
+
Ok(WatcherBackend::Polling)
|
|
2927
|
+
));
|
|
2928
|
+
assert!(matches!(
|
|
2929
|
+
WatcherBackend::parse(Some("polling")),
|
|
2930
|
+
Ok(WatcherBackend::Polling)
|
|
2931
|
+
));
|
|
2932
|
+
}
|
|
2933
|
+
|
|
2934
|
+
#[test]
|
|
2935
|
+
#[cfg(not(feature = "kernel-watcher"))]
|
|
2936
|
+
fn watcher_backend_parse_rejects_uncompiled_kernel() {
|
|
2937
|
+
let err = WatcherBackend::parse(Some("kernel")).unwrap_err();
|
|
2938
|
+
assert!(
|
|
2939
|
+
err.contains("requires the kernel-watcher Cargo feature"),
|
|
2940
|
+
"got: {err}"
|
|
2941
|
+
);
|
|
2942
|
+
}
|
|
2943
|
+
|
|
2944
|
+
#[test]
|
|
2945
|
+
#[cfg(not(feature = "shm-fast-path"))]
|
|
2946
|
+
fn watcher_backend_parse_rejects_uncompiled_shm() {
|
|
2947
|
+
let err = WatcherBackend::parse(Some("shm")).unwrap_err();
|
|
2948
|
+
assert!(
|
|
2949
|
+
err.contains("requires the shm-fast-path Cargo feature"),
|
|
2950
|
+
"got: {err}"
|
|
2951
|
+
);
|
|
2952
|
+
}
|
|
2953
|
+
|
|
2954
|
+
#[test]
|
|
2955
|
+
#[cfg(feature = "kernel-watcher")]
|
|
2956
|
+
fn watcher_backend_parse_accepts_compiled_kernel_aliases() {
|
|
2957
|
+
assert!(matches!(
|
|
2958
|
+
WatcherBackend::parse(Some("kernel")),
|
|
2959
|
+
Ok(WatcherBackend::KernelWatch)
|
|
2960
|
+
));
|
|
2961
|
+
assert!(matches!(
|
|
2962
|
+
WatcherBackend::parse(Some("kernel-watcher")),
|
|
2963
|
+
Ok(WatcherBackend::KernelWatch)
|
|
2964
|
+
));
|
|
2965
|
+
}
|
|
2966
|
+
|
|
2967
|
+
#[test]
|
|
2968
|
+
#[cfg(feature = "shm-fast-path")]
|
|
2969
|
+
fn watcher_backend_parse_accepts_compiled_shm_aliases() {
|
|
2970
|
+
assert!(matches!(
|
|
2971
|
+
WatcherBackend::parse(Some("shm")),
|
|
2972
|
+
Ok(WatcherBackend::ShmFastPath)
|
|
2973
|
+
));
|
|
2974
|
+
assert!(matches!(
|
|
2975
|
+
WatcherBackend::parse(Some("shm-fast-path")),
|
|
2976
|
+
Ok(WatcherBackend::ShmFastPath)
|
|
2977
|
+
));
|
|
2978
|
+
}
|
|
2979
|
+
|
|
2980
|
+
#[test]
|
|
2981
|
+
#[cfg(feature = "shm-fast-path")]
|
|
2982
|
+
fn watcher_backend_shm_probe_fails_when_shm_missing() {
|
|
2983
|
+
// Path with no -shm file — probe must report it, not silently
|
|
2984
|
+
// disable the backend at runtime.
|
|
2985
|
+
let tmp = std::env::temp_dir().join(format!(
|
|
2986
|
+
"honker-shm-probe-{}-{}",
|
|
2987
|
+
std::process::id(),
|
|
2988
|
+
std::time::SystemTime::now()
|
|
2989
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
2990
|
+
.unwrap()
|
|
2991
|
+
.subsec_nanos()
|
|
2992
|
+
));
|
|
2993
|
+
let _ = std::fs::remove_file(&tmp);
|
|
2994
|
+
let result = WatcherBackend::ShmFastPath.probe(&tmp);
|
|
2995
|
+
assert!(result.is_err(), "expected probe to fail for missing -shm");
|
|
2996
|
+
let msg = result.unwrap_err();
|
|
2997
|
+
assert!(
|
|
2998
|
+
msg.contains("-shm unavailable"),
|
|
2999
|
+
"probe error message should explain why; got: {msg}"
|
|
3000
|
+
);
|
|
3001
|
+
}
|
|
3002
|
+
|
|
3003
|
+
/// Parity with `update_watcher_panics_on_file_replacement` for the
|
|
3004
|
+
/// kernel-watcher backend. The polling backend panics when it sees
|
|
3005
|
+
/// the db file replaced; the kernel watcher must do the same so a
|
|
3006
|
+
/// stale per-file watch fails loudly instead of silently missing
|
|
3007
|
+
/// wakes after a litestream-style restore.
|
|
3008
|
+
///
|
|
3009
|
+
/// The experimental backends don't open a SQLite connection of
|
|
3010
|
+
/// their own (only the polling backend does), so the test setup
|
|
3011
|
+
/// can use a plain empty file at `db_path`. That dodges Windows'
|
|
3012
|
+
/// "can't rename over a file SQLite has open" problem and lets us
|
|
3013
|
+
/// run on every platform — unlike the polling test, which still
|
|
3014
|
+
/// has to use a real SQLite db and stays `#[cfg(unix)]`.
|
|
3015
|
+
#[test]
|
|
3016
|
+
#[cfg(feature = "kernel-watcher")]
|
|
3017
|
+
fn kernel_watcher_panics_on_file_replacement() {
|
|
3018
|
+
replacement_panic_test(WatcherBackend::KernelWatch);
|
|
3019
|
+
}
|
|
3020
|
+
|
|
3021
|
+
/// Parity for the SHM fast path. SQLite may recreate the `-shm`
|
|
3022
|
+
/// file during normal WAL lifecycle churn, so that path reopens and
|
|
3023
|
+
/// rebases. The database file itself is still a dead-man condition.
|
|
3024
|
+
#[test]
|
|
3025
|
+
#[cfg_attr(
|
|
3026
|
+
windows,
|
|
3027
|
+
ignore = "Windows prevents replacing the watched db path while the SHM watcher is open"
|
|
3028
|
+
)]
|
|
3029
|
+
#[cfg(feature = "shm-fast-path")]
|
|
3030
|
+
fn shm_fast_path_panics_on_file_replacement() {
|
|
3031
|
+
replacement_panic_test(WatcherBackend::ShmFastPath);
|
|
3032
|
+
}
|
|
3033
|
+
|
|
3034
|
+
#[cfg(any(feature = "kernel-watcher", feature = "shm-fast-path"))]
|
|
3035
|
+
fn replacement_panic_test(backend: WatcherBackend) {
|
|
3036
|
+
use std::io::Write;
|
|
3037
|
+
|
|
3038
|
+
let tmp = std::env::temp_dir().join(format!(
|
|
3039
|
+
"honker-replace-{}-{}",
|
|
3040
|
+
std::process::id(),
|
|
3041
|
+
std::time::SystemTime::now()
|
|
3042
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
3043
|
+
.unwrap()
|
|
3044
|
+
.subsec_nanos()
|
|
3045
|
+
));
|
|
3046
|
+
let _ = std::fs::remove_file(&tmp);
|
|
3047
|
+
// Plain empty file at the db path. The kernel/shm watchers
|
|
3048
|
+
// don't open a SQLite connection — they just stat / watch /
|
|
3049
|
+
// mmap files. So a real SQLite db isn't needed and we avoid
|
|
3050
|
+
// Windows' inability to rename over a SQLite-held file.
|
|
3051
|
+
std::fs::File::create(&tmp).unwrap();
|
|
3052
|
+
|
|
3053
|
+
// For the SHM backend, also write a fake -shm. macOS mmap can
|
|
3054
|
+
// be finicky about tiny files; write at least a page (4 KiB)
|
|
3055
|
+
// with the valid WAL index header up front.
|
|
3056
|
+
if matches!(backend, WatcherBackend::ShmFastPath) {
|
|
3057
|
+
let shm_path = std::path::PathBuf::from(format!("{}-shm", tmp.display()));
|
|
3058
|
+
let mut buf = [0u8; 4096];
|
|
3059
|
+
buf[0..4].copy_from_slice(&3_007_000u32.to_ne_bytes()); // WALINDEX_MAX_VERSION
|
|
3060
|
+
// iChange (offset 8) starts at 0; doesn't matter for this test.
|
|
3061
|
+
let mut f = std::fs::File::create(&shm_path).unwrap();
|
|
3062
|
+
f.write_all(&buf).unwrap();
|
|
3063
|
+
}
|
|
3064
|
+
|
|
3065
|
+
let watcher =
|
|
3066
|
+
UpdateWatcher::spawn_with_config(tmp.clone(), || {}, WatcherConfig { backend });
|
|
3067
|
+
// Generous initial wait so the watcher has snapshotted the
|
|
3068
|
+
// initial inode under CI scheduling pressure.
|
|
3069
|
+
std::thread::sleep(Duration::from_millis(300));
|
|
3070
|
+
|
|
3071
|
+
// Replace the db file with a different inode. Atomic rename
|
|
3072
|
+
// works on every platform when no SQLite handle is held open.
|
|
3073
|
+
let other = std::env::temp_dir().join(format!(
|
|
3074
|
+
"honker-replace-other-{}-{}",
|
|
3075
|
+
std::process::id(),
|
|
3076
|
+
std::time::SystemTime::now()
|
|
3077
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
3078
|
+
.unwrap()
|
|
3079
|
+
.subsec_nanos()
|
|
3080
|
+
));
|
|
3081
|
+
let _ = std::fs::remove_file(&other);
|
|
3082
|
+
std::fs::File::create(&other).unwrap();
|
|
3083
|
+
#[cfg(windows)]
|
|
3084
|
+
{
|
|
3085
|
+
// Windows does not replace an existing destination with
|
|
3086
|
+
// rename(). Remove first; the watcher will still observe the
|
|
3087
|
+
// replacement when the new file appears with a different id.
|
|
3088
|
+
std::fs::remove_file(&tmp).unwrap();
|
|
3089
|
+
}
|
|
3090
|
+
std::fs::rename(&other, &tmp).unwrap();
|
|
3091
|
+
// Wait long enough for the dead-man's switch to fire on a
|
|
3092
|
+
// slow CI runner. Identity check is 100 ms; give it 10 cycles.
|
|
3093
|
+
std::thread::sleep(Duration::from_millis(1000));
|
|
3094
|
+
|
|
3095
|
+
let result = watcher.join();
|
|
3096
|
+
let _ = std::fs::remove_file(&tmp);
|
|
3097
|
+
let _ = std::fs::remove_file(format!("{}-wal", tmp.display()));
|
|
3098
|
+
let _ = std::fs::remove_file(format!("{}-shm", tmp.display()));
|
|
3099
|
+
assert!(
|
|
3100
|
+
result.is_err(),
|
|
3101
|
+
"expected watcher thread to panic on db file replacement"
|
|
3102
|
+
);
|
|
3103
|
+
}
|
|
3104
|
+
|
|
3105
|
+
#[test]
|
|
3106
|
+
#[cfg(feature = "kernel-watcher")]
|
|
3107
|
+
fn watcher_backend_kernel_probe_fails_for_inaccessible_dir() {
|
|
3108
|
+
// Path under a non-existent parent — notify can't watch it.
|
|
3109
|
+
let nope = std::path::PathBuf::from("/this/parent/does/not/exist/honker-kernel-probe.db");
|
|
3110
|
+
let result = WatcherBackend::KernelWatch.probe(&nope);
|
|
3111
|
+
assert!(
|
|
3112
|
+
result.is_err(),
|
|
3113
|
+
"expected probe to fail for inaccessible dir, got Ok"
|
|
3114
|
+
);
|
|
3115
|
+
}
|
|
3116
|
+
}
|