sweet-search 2.5.2 → 2.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -0,0 +1,380 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Maintenance worker entry point.
4
+ *
5
+ * Plan § 10.2, § 13 Phase 0, § 34.7: maintenance jobs (Float HNSW clean
6
+ * replacement, Binary HNSW replacement, LI per-segment recompaction,
7
+ * sparse-gram delta compaction, FTS5 watermark merges) run in a separate
8
+ * process so they have predictable low-priority CPU scheduling and cannot
9
+ * interfere with the daemon's event loop.
10
+ *
11
+ * **CPU-only assertion (mandatory).** This worker MUST NEVER arm the GPU.
12
+ * The reconcile path inherits the ORT INT8 CPU encoder unconditionally
13
+ * (plan § 3.1, § 34.7). Importing or invoking the GPU model pool from this
14
+ * file is a hard error that surfaces in the dead-letter queue. The
15
+ * assertions below trip if a future change accidentally pulls in the
16
+ * `core/indexing/model-pool.js` GPU arming path or sets
17
+ * `process.env.SWEET_SEARCH_GPU` / `INDEX_GPU_BACKEND` to a value other
18
+ * than the CPU defaults.
19
+ *
20
+ * The worker drains `rebuild-queue.jsonl` (legacy filename retained per
21
+ * plan § 13 Phase 0). Jobs are acknowledged only after their tier handler
22
+ * succeeds; unknown tiers remain queued; repeated failures move to the
23
+ * dead-letter file.
24
+ *
25
+ * Process model:
26
+ * - The daemon spawns this worker via `child_process.spawn` with
27
+ * `process.platform === 'win32' ? 'start /BELOW_NORMAL' : 'nice -n 10'`.
28
+ * - The worker polls the JSONL queue every 30 s (plan § 10.2 step 1).
29
+ * - On startup, the worker asserts CPU-only state and refuses to proceed
30
+ * otherwise.
31
+ */
32
+
33
+ import fs from 'node:fs';
34
+ import path from 'node:path';
35
+ import process from 'node:process';
36
+ import Database from 'better-sqlite3';
37
+ import { fts5Merge } from '../infrastructure/sqlite-fts5.mjs';
38
+ import { reclamationHandlers } from './maintenance-handlers.mjs';
39
+
40
+ const FORBIDDEN_GPU_FLAGS = [
41
+ 'SWEET_SEARCH_GPU', // sweet-search canonical knob
42
+ 'INDEX_GPU_BACKEND', // pre-existing flag in core/indexing
43
+ ];
44
+
45
+ /**
46
+ * Throw if any environment flag would arm the GPU in this process.
47
+ * Plan § 3.1 / § 6.1 / § 34.7. The maintenance worker is part of the
48
+ * reconcile context and therefore inherits the CPU-only constraint.
49
+ *
50
+ * @returns {void}
51
+ */
52
+ export function assertCpuOnlyEnvironment(env = process.env) {
53
+ for (const key of FORBIDDEN_GPU_FLAGS) {
54
+ const value = env[key];
55
+ if (!value) continue;
56
+ const normalised = String(value).toLowerCase();
57
+ if (normalised === '0' || normalised === 'false' || normalised === 'cpu' || normalised === 'off') continue;
58
+ throw new Error(
59
+ `maintenance-worker: ${key}=${value} is incompatible with the CPU-only ` +
60
+ `reconcile constraint (plan § 3.1 / § 34.7). Refusing to start.`,
61
+ );
62
+ }
63
+ }
64
+
65
+ /**
66
+ * Refuse to load any module that arms the GPU. We do this by attaching a
67
+ * one-shot listener to `process.emit('warning')` and a guard that re-asserts
68
+ * after any dynamic import. Most importantly we never *statically* import
69
+ * `core/indexing/model-pool.js`, `core/indexing/indexer-pool.js`, or any
70
+ * inference adapter from this file. Per-tier work must be implemented by
71
+ * CPU-only domain helpers.
72
+ *
73
+ * The runtime guard catches dependency drift: if a future helper imports
74
+ * the GPU pool transitively, the `shouldArmGpu` symbol resolution will
75
+ * surface in `process._linkedBinding` or as a missing-export error before
76
+ * a job ever runs.
77
+ *
78
+ * @returns {void}
79
+ */
80
+ export function installGpuLoadGuard() {
81
+ // The current worker uses CPU-only maintenance handlers. This hook remains
82
+ // as the single place to add module-resolution guards if a future handler
83
+ // grows a transitive model dependency.
84
+ }
85
+
86
+ /**
87
+ * The rebuild queue lives at `.sweet-search/rebuild-queue.jsonl` (legacy
88
+ * filename retained for compatibility — plan § 13 Phase 0). Each line is a
89
+ * JSON job descriptor:
90
+ *
91
+ * {
92
+ * "tier": "float_hnsw" | "binary_hnsw" | "li_segment" | "sparse_gram" | "fts5",
93
+ * "reason": "tombstone_watermark" | "dead_doc_ratio" | "stale_doc_ratio" | "delta_size_ratio" | "fts5_segment_count" | "crash_recovery",
94
+ * "epoch": <int>,
95
+ * "createdAt": <ISO-8601>,
96
+ * "payload": {...tier-specific}
97
+ * }
98
+ */
99
+ export const QUEUE_FILENAME = 'rebuild-queue.jsonl';
100
+ export const DEAD_LETTER_FILENAME = 'rebuild-queue.dead-letter.jsonl';
101
+
102
+ function defaultQueuePath(stateDir) {
103
+ return path.join(stateDir, QUEUE_FILENAME);
104
+ }
105
+
106
+ function defaultDeadLetterPath(stateDir) {
107
+ return path.join(stateDir, DEAD_LETTER_FILENAME);
108
+ }
109
+
110
+ function writeMaintenanceQueue(stateDir, jobs, appendedRaw = '') {
111
+ fs.mkdirSync(stateDir, { recursive: true });
112
+ const data = jobs.map((job) => JSON.stringify(job)).join('\n');
113
+ fs.writeFileSync(defaultQueuePath(stateDir), (data ? data + '\n' : '') + appendedRaw);
114
+ }
115
+
116
+ function readQueueSnapshot(stateDir) {
117
+ const p = defaultQueuePath(stateDir);
118
+ if (!fs.existsSync(p)) return { raw: '', lines: [] };
119
+ const raw = fs.readFileSync(p, 'utf-8');
120
+ return { raw, lines: raw.split('\n').filter((line) => line.trim()) };
121
+ }
122
+
123
+ function appendedQueueTail(stateDir, originalRaw) {
124
+ const p = defaultQueuePath(stateDir);
125
+ if (!fs.existsSync(p)) return '';
126
+ const current = fs.readFileSync(p, 'utf-8');
127
+ return current.startsWith(originalRaw) ? current.slice(originalRaw.length) : '';
128
+ }
129
+
130
+ /**
131
+ * Two pending jobs are "equivalent" when they would do the same work.
132
+ * Coalescing here keeps the queue bounded even when the watermark
133
+ * scheduler emits the same tier/reason every tick (the steady state for
134
+ * sparse_gram + binary_hnsw on a churning fixture — see the post-fix
135
+ * soak's 489-deep queue at 30s before this change).
136
+ *
137
+ * Granularity:
138
+ * - li_segment: keyed on payload.segmentId (one job per segment).
139
+ * - everything else: keyed on (tier, reason).
140
+ *
141
+ * Jobs that have already been retried (`attempts > 0`) are NEVER
142
+ * coalesced away: they carry the prior failure context and the
143
+ * dead-letter path depends on attempt counts being monotone per-job.
144
+ */
145
+ export function jobsAreCoalescible(a, b) {
146
+ if (!a || !b) return false;
147
+ if (a.tier !== b.tier) return false;
148
+ if (a.reason !== b.reason) return false;
149
+ if ((a.attempts || 0) > 0 || (b.attempts || 0) > 0) return false;
150
+ if (a.tier === 'li_segment') {
151
+ return (a.payload?.segmentId ?? null) === (b.payload?.segmentId ?? null);
152
+ }
153
+ return true;
154
+ }
155
+
156
+ /**
157
+ * Append a job descriptor to the rebuild queue. Atomic per call (single
158
+ * `fs.appendFileSync`), so concurrent enqueuers from the daemon and CLI
159
+ * never tear a line.
160
+ *
161
+ * By default the call coalesces against existing pending jobs (see
162
+ * `jobsAreCoalescible`). Pass `{ coalesce: false }` to force-append
163
+ * (callers that intentionally want to retry the same tier work).
164
+ *
165
+ * @param {string} stateDir `.sweet-search/` directory
166
+ * @param {object} job
167
+ * @param {{coalesce?:boolean}} [opts]
168
+ * @returns {{enqueued:boolean, coalescedWith?:object}}
169
+ */
170
+ export function enqueueMaintenanceJob(stateDir, job, opts = {}) {
171
+ fs.mkdirSync(stateDir, { recursive: true });
172
+ const coalesce = opts.coalesce !== false;
173
+ if (coalesce) {
174
+ const existing = readMaintenanceQueue(stateDir);
175
+ const match = existing.find((pending) => jobsAreCoalescible(pending, job));
176
+ if (match) return { enqueued: false, coalescedWith: match };
177
+ }
178
+ const line = JSON.stringify({ ...job, createdAt: job.createdAt ?? new Date().toISOString() }) + '\n';
179
+ fs.appendFileSync(defaultQueuePath(stateDir), line);
180
+ return { enqueued: true };
181
+ }
182
+
183
+ /**
184
+ * Read the queue and return all jobs in insertion order. Idempotent — does
185
+ * not mutate the file. `processMaintenanceQueue` rewrites the queue after
186
+ * acknowledging successful jobs.
187
+ *
188
+ * @param {string} stateDir
189
+ * @returns {object[]}
190
+ */
191
+ export function readMaintenanceQueue(stateDir) {
192
+ const p = defaultQueuePath(stateDir);
193
+ if (!fs.existsSync(p)) return [];
194
+ const content = fs.readFileSync(p, 'utf-8');
195
+ const out = [];
196
+ for (const raw of content.split('\n')) {
197
+ const trimmed = raw.trim();
198
+ if (!trimmed) continue;
199
+ try {
200
+ out.push(JSON.parse(trimmed));
201
+ } catch {
202
+ // Skip malformed lines; Phase 6 surfaces these via `reconcile inspect`.
203
+ }
204
+ }
205
+ return out;
206
+ }
207
+
208
+ /**
209
+ * Move a job that failed three times to the dead-letter file. Plan § 13
210
+ * Phase 6 surfaces the dead letter via the operator CLI.
211
+ *
212
+ * @param {string} stateDir
213
+ * @param {object} job
214
+ * @param {Error} err
215
+ */
216
+ export function appendDeadLetter(stateDir, job, err) {
217
+ fs.mkdirSync(stateDir, { recursive: true });
218
+ const line = JSON.stringify({
219
+ job,
220
+ error: { message: err?.message ?? String(err), stack: err?.stack ?? null },
221
+ deadAt: new Date().toISOString(),
222
+ }) + '\n';
223
+ fs.appendFileSync(defaultDeadLetterPath(stateDir), line);
224
+ }
225
+
226
+ export function defaultMaintenanceHandlers(stateDir) {
227
+ return {
228
+ ...reclamationHandlers(stateDir),
229
+ fts5: async (job) => {
230
+ const payload = job?.payload || {};
231
+ const dbPath = payload.dbPath || payload.databasePath || path.join(stateDir, payload.dbFile || 'code-graph.db');
232
+ const tableNames = payload.tableName || payload.table
233
+ ? [payload.tableName || payload.table]
234
+ : ['entities_fts', 'entities_trigram'];
235
+ const pages = Number.isFinite(payload.pages) && payload.pages > 0 ? payload.pages : 500;
236
+ if (!fs.existsSync(dbPath)) throw new Error(`fts5 maintenance database not found: ${dbPath}`);
237
+ const db = new Database(dbPath);
238
+ try {
239
+ const merged = [];
240
+ for (const tableName of tableNames) {
241
+ const exists = db.prepare(
242
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name = ?",
243
+ ).get(tableName);
244
+ if (!exists) continue;
245
+ fts5Merge(db, tableName, pages);
246
+ merged.push(tableName);
247
+ }
248
+ if (merged.length === 0) {
249
+ throw new Error(`fts5 maintenance found no FTS5 tables in ${dbPath}`);
250
+ }
251
+ return { dbPath, tableNames: merged, pages };
252
+ } finally {
253
+ db.close();
254
+ }
255
+ },
256
+ };
257
+ }
258
+
259
+ /**
260
+ * Drain maintenance jobs in insertion order. Success removes a job from the
261
+ * queue; handler failure retries until `maxAttempts`, then dead-letters.
262
+ * Jobs without a registered handler remain queued for a future worker.
263
+ *
264
+ * Draining is bounded by EITHER a job cap (`maxJobs`, a hard ceiling when
265
+ * set) OR a wall-clock budget (`budgetMs`). The budget lets the daemon keep
266
+ * pace with a growing backlog without starving the reconcile tick: it
267
+ * drains as many jobs as fit in the window rather than a fixed tiny count.
268
+ * The first eligible job always runs (so a sub-job-duration budget still
269
+ * makes forward progress); the budget gates only subsequent jobs.
270
+ *
271
+ * @param {string} stateDir
272
+ * @param {{handlers?:object, maxJobs?:number, maxAttempts?:number, budgetMs?:number, now?:()=>number, onProgress?:(phase:string)=>void}} [options]
273
+ */
274
+ export async function processMaintenanceQueue(stateDir, options = {}) {
275
+ const handlers = options.handlers || {};
276
+ const maxJobs = Number.isInteger(options.maxJobs) && options.maxJobs > 0 ? options.maxJobs : Infinity;
277
+ const maxAttempts = Number.isInteger(options.maxAttempts) && options.maxAttempts > 0 ? options.maxAttempts : 3;
278
+ const budgetMs = Number.isFinite(options.budgetMs) && options.budgetMs > 0 ? options.budgetMs : Infinity;
279
+ const clock = typeof options.now === 'function' ? options.now : Date.now;
280
+ const onProgress = typeof options.onProgress === 'function'
281
+ ? (phase) => { options.onProgress(phase); }
282
+ : () => {};
283
+ const startMs = clock();
284
+ const remaining = [];
285
+ const summary = {
286
+ seen: 0,
287
+ succeeded: 0,
288
+ failed: 0,
289
+ retried: 0,
290
+ deadLettered: 0,
291
+ deferred: 0,
292
+ malformed: 0,
293
+ remaining: 0,
294
+ };
295
+
296
+ let attempted = 0;
297
+ const snapshot = readQueueSnapshot(stateDir);
298
+ for (const line of snapshot.lines) {
299
+ let job;
300
+ try {
301
+ job = JSON.parse(line);
302
+ } catch (err) {
303
+ summary.malformed += 1;
304
+ appendDeadLetter(stateDir, { tier: 'malformed', raw: line }, err);
305
+ continue;
306
+ }
307
+
308
+ summary.seen += 1;
309
+ const handler = handlers[job.tier] || handlers.default;
310
+ const overBudget = budgetMs !== Infinity && attempted > 0 && (clock() - startMs) >= budgetMs;
311
+ if (!handler || attempted >= maxJobs || overBudget) {
312
+ summary.deferred += 1;
313
+ remaining.push(job);
314
+ continue;
315
+ }
316
+
317
+ attempted += 1;
318
+ try {
319
+ onProgress(`maintenance:${job.tier || 'unknown'}:start`);
320
+ await handler(job, { stateDir, onProgress });
321
+ onProgress(`maintenance:${job.tier || 'unknown'}:done`);
322
+ summary.succeeded += 1;
323
+ } catch (err) {
324
+ if (err?.name === 'MaintainerLifecycleAbort') throw err;
325
+ summary.failed += 1;
326
+ const attempts = Number.isInteger(job.attempts) ? job.attempts + 1 : 1;
327
+ const next = {
328
+ ...job,
329
+ attempts,
330
+ lastError: err?.message ?? String(err),
331
+ lastTriedAt: new Date().toISOString(),
332
+ };
333
+ if (attempts >= maxAttempts) {
334
+ summary.deadLettered += 1;
335
+ appendDeadLetter(stateDir, next, err);
336
+ } else {
337
+ summary.retried += 1;
338
+ remaining.push(next);
339
+ }
340
+ }
341
+ }
342
+
343
+ writeMaintenanceQueue(stateDir, remaining, appendedQueueTail(stateDir, snapshot.raw));
344
+ summary.remaining = readMaintenanceQueue(stateDir).length;
345
+ return summary;
346
+ }
347
+
348
+ async function main() {
349
+ assertCpuOnlyEnvironment();
350
+ installGpuLoadGuard();
351
+
352
+ const stateDir = process.env.SWEET_SEARCH_STATE_DIR
353
+ || path.resolve(process.cwd(), '.sweet-search');
354
+ const budgetRaw = Number.parseInt(process.env.SWEET_SEARCH_MAINTENANCE_BUDGET_MS || '', 10);
355
+ const drain = await processMaintenanceQueue(stateDir, {
356
+ handlers: defaultMaintenanceHandlers(stateDir),
357
+ maxJobs: Number.parseInt(process.env.SWEET_SEARCH_MAINTENANCE_MAX_JOBS || '50', 10),
358
+ budgetMs: Number.isFinite(budgetRaw) && budgetRaw > 0 ? budgetRaw : undefined,
359
+ maxAttempts: Number.parseInt(process.env.SWEET_SEARCH_MAINTENANCE_MAX_ATTEMPTS || '3', 10),
360
+ });
361
+ console.log(JSON.stringify({
362
+ worker: 'maintenance',
363
+ phase: 'queue-drain',
364
+ stateDir,
365
+ pendingJobs: drain.remaining,
366
+ drain,
367
+ cpuOnly: true,
368
+ }));
369
+ process.exit(0);
370
+ }
371
+
372
+ const invokedDirectly = process.argv[1]
373
+ && (process.argv[1] === new URL(import.meta.url).pathname
374
+ || process.argv[1].endsWith('/maintenance-worker.mjs'));
375
+ if (invokedDirectly) {
376
+ main().catch((err) => {
377
+ console.error('[maintenance-worker]', err);
378
+ process.exit(1);
379
+ });
380
+ }