sweet-search 2.5.2 → 2.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/cli.js +24 -3
- package/core/graph/graph-expansion.js +215 -36
- package/core/graph/graph-extractor.js +196 -11
- package/core/graph/graph-search.js +395 -92
- package/core/graph/hcgs-generator.js +2 -1
- package/core/graph/index.js +2 -0
- package/core/graph/repo-map.js +28 -6
- package/core/graph/structural-answer-cues.js +168 -0
- package/core/graph/structural-callsite-hints.js +40 -0
- package/core/graph/structural-context-format.js +40 -0
- package/core/graph/structural-context.js +450 -0
- package/core/graph/structural-forward-push.js +156 -0
- package/core/graph/structural-header-context.js +19 -0
- package/core/graph/structural-importance.js +148 -0
- package/core/graph/structural-pagerank.js +197 -0
- package/core/graph/summary-manager.js +13 -9
- package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
- package/core/incremental-indexing/application/file-watcher.mjs +197 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
- package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
- package/core/incremental-indexing/application/operator-cli.mjs +554 -0
- package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
- package/core/incremental-indexing/application/reconciler.mjs +477 -0
- package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
- package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
- package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
- package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
- package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
- package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
- package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
- package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
- package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
- package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
- package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
- package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
- package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
- package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
- package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
- package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
- package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
- package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
- package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
- package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
- package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
- package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
- package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
- package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
- package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
- package/core/indexing/admission-policy.js +139 -0
- package/core/indexing/artifact-builder.js +29 -12
- package/core/indexing/ast-chunker.js +107 -30
- package/core/indexing/dedup/exemplar-selector.js +19 -1
- package/core/indexing/gitignore-filter.js +223 -0
- package/core/indexing/incremental-tracker.js +99 -30
- package/core/indexing/index-codebase-v21.js +6 -5
- package/core/indexing/index-maintainer.mjs +698 -6
- package/core/indexing/indexer-ann.js +99 -15
- package/core/indexing/indexer-build.js +158 -45
- package/core/indexing/indexer-empty-baseline.js +80 -0
- package/core/indexing/indexer-manifest.js +66 -0
- package/core/indexing/indexer-phases.js +56 -23
- package/core/indexing/indexer-sparse-gram.js +54 -13
- package/core/indexing/indexer-utils.js +26 -208
- package/core/indexing/indexing-file-policy.js +32 -7
- package/core/indexing/maintainer-launcher.mjs +137 -0
- package/core/indexing/merkle-tracker.js +251 -244
- package/core/indexing/model-pool.js +46 -5
- package/core/infrastructure/code-graph-repository.js +758 -6
- package/core/infrastructure/code-graph-visibility.js +157 -0
- package/core/infrastructure/codebase-repository.js +100 -13
- package/core/infrastructure/config/search.js +1 -1
- package/core/infrastructure/db-utils.js +118 -0
- package/core/infrastructure/dedup-hashing.js +10 -13
- package/core/infrastructure/hardware-capability.js +17 -7
- package/core/infrastructure/index.js +8 -2
- package/core/infrastructure/language-patterns/maps.js +4 -1
- package/core/infrastructure/language-patterns/registry-core.js +56 -17
- package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
- package/core/infrastructure/language-patterns.js +69 -0
- package/core/infrastructure/model-registry.js +20 -0
- package/core/infrastructure/native-inference.js +7 -12
- package/core/infrastructure/native-resolver.js +52 -37
- package/core/infrastructure/native-sparse-gram.js +261 -20
- package/core/infrastructure/native-tokenizer.js +6 -15
- package/core/infrastructure/simd-distance.js +10 -16
- package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
- package/core/infrastructure/structural-alias-resolver.js +122 -0
- package/core/infrastructure/structural-candidate-ranker.js +34 -0
- package/core/infrastructure/structural-context-repository.js +472 -0
- package/core/infrastructure/structural-context-utils.js +51 -0
- package/core/infrastructure/structural-graph-signals.js +121 -0
- package/core/infrastructure/structural-qualified-resolution.js +15 -0
- package/core/infrastructure/structural-source-definitions.js +100 -0
- package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
- package/core/infrastructure/tree-sitter-provider.js +811 -37
- package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
- package/core/query/query-router.js +55 -5
- package/core/ranking/file-kind-ranking.js +2192 -15
- package/core/ranking/late-interaction-index.js +87 -12
- package/core/search/cli-decoration.js +290 -0
- package/core/search/context-expander.js +988 -78
- package/core/search/index.js +1 -0
- package/core/search/output-policy.js +275 -0
- package/core/search/search-anchor.js +499 -0
- package/core/search/search-boost.js +93 -1
- package/core/search/search-cli.js +61 -204
- package/core/search/search-hybrid.js +250 -10
- package/core/search/search-pattern-chunks.js +57 -8
- package/core/search/search-pattern-planner.js +68 -9
- package/core/search/search-pattern-prefilter.js +30 -10
- package/core/search/search-pattern-ripgrep.js +40 -4
- package/core/search/search-pattern-sparse-overlay.js +256 -0
- package/core/search/search-pattern.js +117 -29
- package/core/search/search-postprocess.js +479 -5
- package/core/search/search-read-semantic.js +260 -23
- package/core/search/search-read.js +82 -64
- package/core/search/search-reader-pin.js +71 -0
- package/core/search/search-rrf.js +279 -0
- package/core/search/search-semantic.js +110 -5
- package/core/search/search-server.js +130 -57
- package/core/search/search-trace.js +107 -0
- package/core/search/server-identity.js +93 -0
- package/core/search/session-daemon-prewarm.mjs +33 -10
- package/core/search/sweet-search.js +399 -7
- package/core/skills/sweet-index/SKILL.md +8 -6
- package/core/vector-store/binary-hnsw-index.js +194 -30
- package/core/vector-store/float-vector-store.js +96 -6
- package/core/vector-store/hnsw-index.js +220 -49
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
- package/eval/agent-read-workflows/bin/ss-find +15 -0
- package/eval/agent-read-workflows/bin/ss-grep +12 -0
- package/eval/agent-read-workflows/bin/ss-read +14 -0
- package/eval/agent-read-workflows/bin/ss-search +18 -0
- package/eval/agent-read-workflows/bin/ss-semantic +12 -0
- package/eval/agent-read-workflows/bin/ss-trace +11 -0
- package/mcp/read-tool.js +109 -0
- package/mcp/server.js +55 -15
- package/mcp/tool-handlers.js +14 -124
- package/mcp/trace-tool.js +81 -0
- package/package.json +25 -10
- package/scripts/hooks/intercept-read.mjs +55 -0
- package/scripts/hooks/remind-tools.mjs +40 -0
- package/scripts/init.js +698 -54
- package/scripts/inject-agent-instructions.js +431 -0
- package/scripts/install-prompt-reminders.js +188 -0
- package/scripts/install-tool-enforcement.js +220 -0
- package/scripts/smoke-test.js +12 -9
- package/scripts/uninstall.js +276 -18
- package/scripts/write-claude-rules.js +110 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Maintenance worker entry point.
|
|
4
|
+
*
|
|
5
|
+
* Plan § 10.2, § 13 Phase 0, § 34.7: maintenance jobs (Float HNSW clean
|
|
6
|
+
* replacement, Binary HNSW replacement, LI per-segment recompaction,
|
|
7
|
+
* sparse-gram delta compaction, FTS5 watermark merges) run in a separate
|
|
8
|
+
* process so they have predictable low-priority CPU scheduling and cannot
|
|
9
|
+
* interfere with the daemon's event loop.
|
|
10
|
+
*
|
|
11
|
+
* **CPU-only assertion (mandatory).** This worker MUST NEVER arm the GPU.
|
|
12
|
+
* The reconcile path inherits the ORT INT8 CPU encoder unconditionally
|
|
13
|
+
* (plan § 3.1, § 34.7). Importing or invoking the GPU model pool from this
|
|
14
|
+
* file is a hard error that surfaces in the dead-letter queue. The
|
|
15
|
+
* assertions below trip if a future change accidentally pulls in the
|
|
16
|
+
* `core/indexing/model-pool.js` GPU arming path or sets
|
|
17
|
+
* `process.env.SWEET_SEARCH_GPU` / `INDEX_GPU_BACKEND` to a value other
|
|
18
|
+
* than the CPU defaults.
|
|
19
|
+
*
|
|
20
|
+
* The worker drains `rebuild-queue.jsonl` (legacy filename retained per
|
|
21
|
+
* plan § 13 Phase 0). Jobs are acknowledged only after their tier handler
|
|
22
|
+
* succeeds; unknown tiers remain queued; repeated failures move to the
|
|
23
|
+
* dead-letter file.
|
|
24
|
+
*
|
|
25
|
+
* Process model:
|
|
26
|
+
* - The daemon spawns this worker via `child_process.spawn` with
|
|
27
|
+
* `process.platform === 'win32' ? 'start /BELOW_NORMAL' : 'nice -n 10'`.
|
|
28
|
+
* - The worker polls the JSONL queue every 30 s (plan § 10.2 step 1).
|
|
29
|
+
* - On startup, the worker asserts CPU-only state and refuses to proceed
|
|
30
|
+
* otherwise.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import fs from 'node:fs';
|
|
34
|
+
import path from 'node:path';
|
|
35
|
+
import process from 'node:process';
|
|
36
|
+
import Database from 'better-sqlite3';
|
|
37
|
+
import { fts5Merge } from '../infrastructure/sqlite-fts5.mjs';
|
|
38
|
+
import { reclamationHandlers } from './maintenance-handlers.mjs';
|
|
39
|
+
|
|
40
|
+
const FORBIDDEN_GPU_FLAGS = [
|
|
41
|
+
'SWEET_SEARCH_GPU', // sweet-search canonical knob
|
|
42
|
+
'INDEX_GPU_BACKEND', // pre-existing flag in core/indexing
|
|
43
|
+
];
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Throw if any environment flag would arm the GPU in this process.
|
|
47
|
+
* Plan § 3.1 / § 6.1 / § 34.7. The maintenance worker is part of the
|
|
48
|
+
* reconcile context and therefore inherits the CPU-only constraint.
|
|
49
|
+
*
|
|
50
|
+
* @returns {void}
|
|
51
|
+
*/
|
|
52
|
+
export function assertCpuOnlyEnvironment(env = process.env) {
|
|
53
|
+
for (const key of FORBIDDEN_GPU_FLAGS) {
|
|
54
|
+
const value = env[key];
|
|
55
|
+
if (!value) continue;
|
|
56
|
+
const normalised = String(value).toLowerCase();
|
|
57
|
+
if (normalised === '0' || normalised === 'false' || normalised === 'cpu' || normalised === 'off') continue;
|
|
58
|
+
throw new Error(
|
|
59
|
+
`maintenance-worker: ${key}=${value} is incompatible with the CPU-only ` +
|
|
60
|
+
`reconcile constraint (plan § 3.1 / § 34.7). Refusing to start.`,
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Refuse to load any module that arms the GPU. We do this by attaching a
|
|
67
|
+
* one-shot listener to `process.emit('warning')` and a guard that re-asserts
|
|
68
|
+
* after any dynamic import. Most importantly we never *statically* import
|
|
69
|
+
* `core/indexing/model-pool.js`, `core/indexing/indexer-pool.js`, or any
|
|
70
|
+
* inference adapter from this file. Per-tier work must be implemented by
|
|
71
|
+
* CPU-only domain helpers.
|
|
72
|
+
*
|
|
73
|
+
* The runtime guard catches dependency drift: if a future helper imports
|
|
74
|
+
* the GPU pool transitively, the `shouldArmGpu` symbol resolution will
|
|
75
|
+
* surface in `process._linkedBinding` or as a missing-export error before
|
|
76
|
+
* a job ever runs.
|
|
77
|
+
*
|
|
78
|
+
* @returns {void}
|
|
79
|
+
*/
|
|
80
|
+
export function installGpuLoadGuard() {
|
|
81
|
+
// The current worker uses CPU-only maintenance handlers. This hook remains
|
|
82
|
+
// as the single place to add module-resolution guards if a future handler
|
|
83
|
+
// grows a transitive model dependency.
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* The rebuild queue lives at `.sweet-search/rebuild-queue.jsonl` (legacy
|
|
88
|
+
* filename retained for compatibility — plan § 13 Phase 0). Each line is a
|
|
89
|
+
* JSON job descriptor:
|
|
90
|
+
*
|
|
91
|
+
* {
|
|
92
|
+
* "tier": "float_hnsw" | "binary_hnsw" | "li_segment" | "sparse_gram" | "fts5",
|
|
93
|
+
* "reason": "tombstone_watermark" | "dead_doc_ratio" | "stale_doc_ratio" | "delta_size_ratio" | "fts5_segment_count" | "crash_recovery",
|
|
94
|
+
* "epoch": <int>,
|
|
95
|
+
* "createdAt": <ISO-8601>,
|
|
96
|
+
* "payload": {...tier-specific}
|
|
97
|
+
* }
|
|
98
|
+
*/
|
|
99
|
+
export const QUEUE_FILENAME = 'rebuild-queue.jsonl';
|
|
100
|
+
export const DEAD_LETTER_FILENAME = 'rebuild-queue.dead-letter.jsonl';
|
|
101
|
+
|
|
102
|
+
function defaultQueuePath(stateDir) {
|
|
103
|
+
return path.join(stateDir, QUEUE_FILENAME);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function defaultDeadLetterPath(stateDir) {
|
|
107
|
+
return path.join(stateDir, DEAD_LETTER_FILENAME);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function writeMaintenanceQueue(stateDir, jobs, appendedRaw = '') {
|
|
111
|
+
fs.mkdirSync(stateDir, { recursive: true });
|
|
112
|
+
const data = jobs.map((job) => JSON.stringify(job)).join('\n');
|
|
113
|
+
fs.writeFileSync(defaultQueuePath(stateDir), (data ? data + '\n' : '') + appendedRaw);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function readQueueSnapshot(stateDir) {
|
|
117
|
+
const p = defaultQueuePath(stateDir);
|
|
118
|
+
if (!fs.existsSync(p)) return { raw: '', lines: [] };
|
|
119
|
+
const raw = fs.readFileSync(p, 'utf-8');
|
|
120
|
+
return { raw, lines: raw.split('\n').filter((line) => line.trim()) };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function appendedQueueTail(stateDir, originalRaw) {
|
|
124
|
+
const p = defaultQueuePath(stateDir);
|
|
125
|
+
if (!fs.existsSync(p)) return '';
|
|
126
|
+
const current = fs.readFileSync(p, 'utf-8');
|
|
127
|
+
return current.startsWith(originalRaw) ? current.slice(originalRaw.length) : '';
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Two pending jobs are "equivalent" when they would do the same work.
|
|
132
|
+
* Coalescing here keeps the queue bounded even when the watermark
|
|
133
|
+
* scheduler emits the same tier/reason every tick (the steady state for
|
|
134
|
+
* sparse_gram + binary_hnsw on a churning fixture — see the post-fix
|
|
135
|
+
* soak's 489-deep queue at 30s before this change).
|
|
136
|
+
*
|
|
137
|
+
* Granularity:
|
|
138
|
+
* - li_segment: keyed on payload.segmentId (one job per segment).
|
|
139
|
+
* - everything else: keyed on (tier, reason).
|
|
140
|
+
*
|
|
141
|
+
* Jobs that have already been retried (`attempts > 0`) are NEVER
|
|
142
|
+
* coalesced away: they carry the prior failure context and the
|
|
143
|
+
* dead-letter path depends on attempt counts being monotone per-job.
|
|
144
|
+
*/
|
|
145
|
+
export function jobsAreCoalescible(a, b) {
|
|
146
|
+
if (!a || !b) return false;
|
|
147
|
+
if (a.tier !== b.tier) return false;
|
|
148
|
+
if (a.reason !== b.reason) return false;
|
|
149
|
+
if ((a.attempts || 0) > 0 || (b.attempts || 0) > 0) return false;
|
|
150
|
+
if (a.tier === 'li_segment') {
|
|
151
|
+
return (a.payload?.segmentId ?? null) === (b.payload?.segmentId ?? null);
|
|
152
|
+
}
|
|
153
|
+
return true;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Append a job descriptor to the rebuild queue. Atomic per call (single
|
|
158
|
+
* `fs.appendFileSync`), so concurrent enqueuers from the daemon and CLI
|
|
159
|
+
* never tear a line.
|
|
160
|
+
*
|
|
161
|
+
* By default the call coalesces against existing pending jobs (see
|
|
162
|
+
* `jobsAreCoalescible`). Pass `{ coalesce: false }` to force-append
|
|
163
|
+
* (callers that intentionally want to retry the same tier work).
|
|
164
|
+
*
|
|
165
|
+
* @param {string} stateDir `.sweet-search/` directory
|
|
166
|
+
* @param {object} job
|
|
167
|
+
* @param {{coalesce?:boolean}} [opts]
|
|
168
|
+
* @returns {{enqueued:boolean, coalescedWith?:object}}
|
|
169
|
+
*/
|
|
170
|
+
export function enqueueMaintenanceJob(stateDir, job, opts = {}) {
|
|
171
|
+
fs.mkdirSync(stateDir, { recursive: true });
|
|
172
|
+
const coalesce = opts.coalesce !== false;
|
|
173
|
+
if (coalesce) {
|
|
174
|
+
const existing = readMaintenanceQueue(stateDir);
|
|
175
|
+
const match = existing.find((pending) => jobsAreCoalescible(pending, job));
|
|
176
|
+
if (match) return { enqueued: false, coalescedWith: match };
|
|
177
|
+
}
|
|
178
|
+
const line = JSON.stringify({ ...job, createdAt: job.createdAt ?? new Date().toISOString() }) + '\n';
|
|
179
|
+
fs.appendFileSync(defaultQueuePath(stateDir), line);
|
|
180
|
+
return { enqueued: true };
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Read the queue and return all jobs in insertion order. Idempotent — does
|
|
185
|
+
* not mutate the file. `processMaintenanceQueue` rewrites the queue after
|
|
186
|
+
* acknowledging successful jobs.
|
|
187
|
+
*
|
|
188
|
+
* @param {string} stateDir
|
|
189
|
+
* @returns {object[]}
|
|
190
|
+
*/
|
|
191
|
+
export function readMaintenanceQueue(stateDir) {
|
|
192
|
+
const p = defaultQueuePath(stateDir);
|
|
193
|
+
if (!fs.existsSync(p)) return [];
|
|
194
|
+
const content = fs.readFileSync(p, 'utf-8');
|
|
195
|
+
const out = [];
|
|
196
|
+
for (const raw of content.split('\n')) {
|
|
197
|
+
const trimmed = raw.trim();
|
|
198
|
+
if (!trimmed) continue;
|
|
199
|
+
try {
|
|
200
|
+
out.push(JSON.parse(trimmed));
|
|
201
|
+
} catch {
|
|
202
|
+
// Skip malformed lines; Phase 6 surfaces these via `reconcile inspect`.
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
return out;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Move a job that failed three times to the dead-letter file. Plan § 13
|
|
210
|
+
* Phase 6 surfaces the dead letter via the operator CLI.
|
|
211
|
+
*
|
|
212
|
+
* @param {string} stateDir
|
|
213
|
+
* @param {object} job
|
|
214
|
+
* @param {Error} err
|
|
215
|
+
*/
|
|
216
|
+
export function appendDeadLetter(stateDir, job, err) {
|
|
217
|
+
fs.mkdirSync(stateDir, { recursive: true });
|
|
218
|
+
const line = JSON.stringify({
|
|
219
|
+
job,
|
|
220
|
+
error: { message: err?.message ?? String(err), stack: err?.stack ?? null },
|
|
221
|
+
deadAt: new Date().toISOString(),
|
|
222
|
+
}) + '\n';
|
|
223
|
+
fs.appendFileSync(defaultDeadLetterPath(stateDir), line);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
export function defaultMaintenanceHandlers(stateDir) {
|
|
227
|
+
return {
|
|
228
|
+
...reclamationHandlers(stateDir),
|
|
229
|
+
fts5: async (job) => {
|
|
230
|
+
const payload = job?.payload || {};
|
|
231
|
+
const dbPath = payload.dbPath || payload.databasePath || path.join(stateDir, payload.dbFile || 'code-graph.db');
|
|
232
|
+
const tableNames = payload.tableName || payload.table
|
|
233
|
+
? [payload.tableName || payload.table]
|
|
234
|
+
: ['entities_fts', 'entities_trigram'];
|
|
235
|
+
const pages = Number.isFinite(payload.pages) && payload.pages > 0 ? payload.pages : 500;
|
|
236
|
+
if (!fs.existsSync(dbPath)) throw new Error(`fts5 maintenance database not found: ${dbPath}`);
|
|
237
|
+
const db = new Database(dbPath);
|
|
238
|
+
try {
|
|
239
|
+
const merged = [];
|
|
240
|
+
for (const tableName of tableNames) {
|
|
241
|
+
const exists = db.prepare(
|
|
242
|
+
"SELECT 1 FROM sqlite_master WHERE type='table' AND name = ?",
|
|
243
|
+
).get(tableName);
|
|
244
|
+
if (!exists) continue;
|
|
245
|
+
fts5Merge(db, tableName, pages);
|
|
246
|
+
merged.push(tableName);
|
|
247
|
+
}
|
|
248
|
+
if (merged.length === 0) {
|
|
249
|
+
throw new Error(`fts5 maintenance found no FTS5 tables in ${dbPath}`);
|
|
250
|
+
}
|
|
251
|
+
return { dbPath, tableNames: merged, pages };
|
|
252
|
+
} finally {
|
|
253
|
+
db.close();
|
|
254
|
+
}
|
|
255
|
+
},
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Drain maintenance jobs in insertion order. Success removes a job from the
|
|
261
|
+
* queue; handler failure retries until `maxAttempts`, then dead-letters.
|
|
262
|
+
* Jobs without a registered handler remain queued for a future worker.
|
|
263
|
+
*
|
|
264
|
+
* Draining is bounded by EITHER a job cap (`maxJobs`, a hard ceiling when
|
|
265
|
+
* set) OR a wall-clock budget (`budgetMs`). The budget lets the daemon keep
|
|
266
|
+
* pace with a growing backlog without starving the reconcile tick: it
|
|
267
|
+
* drains as many jobs as fit in the window rather than a fixed tiny count.
|
|
268
|
+
* The first eligible job always runs (so a sub-job-duration budget still
|
|
269
|
+
* makes forward progress); the budget gates only subsequent jobs.
|
|
270
|
+
*
|
|
271
|
+
* @param {string} stateDir
|
|
272
|
+
* @param {{handlers?:object, maxJobs?:number, maxAttempts?:number, budgetMs?:number, now?:()=>number, onProgress?:(phase:string)=>void}} [options]
|
|
273
|
+
*/
|
|
274
|
+
export async function processMaintenanceQueue(stateDir, options = {}) {
|
|
275
|
+
const handlers = options.handlers || {};
|
|
276
|
+
const maxJobs = Number.isInteger(options.maxJobs) && options.maxJobs > 0 ? options.maxJobs : Infinity;
|
|
277
|
+
const maxAttempts = Number.isInteger(options.maxAttempts) && options.maxAttempts > 0 ? options.maxAttempts : 3;
|
|
278
|
+
const budgetMs = Number.isFinite(options.budgetMs) && options.budgetMs > 0 ? options.budgetMs : Infinity;
|
|
279
|
+
const clock = typeof options.now === 'function' ? options.now : Date.now;
|
|
280
|
+
const onProgress = typeof options.onProgress === 'function'
|
|
281
|
+
? (phase) => { options.onProgress(phase); }
|
|
282
|
+
: () => {};
|
|
283
|
+
const startMs = clock();
|
|
284
|
+
const remaining = [];
|
|
285
|
+
const summary = {
|
|
286
|
+
seen: 0,
|
|
287
|
+
succeeded: 0,
|
|
288
|
+
failed: 0,
|
|
289
|
+
retried: 0,
|
|
290
|
+
deadLettered: 0,
|
|
291
|
+
deferred: 0,
|
|
292
|
+
malformed: 0,
|
|
293
|
+
remaining: 0,
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
let attempted = 0;
|
|
297
|
+
const snapshot = readQueueSnapshot(stateDir);
|
|
298
|
+
for (const line of snapshot.lines) {
|
|
299
|
+
let job;
|
|
300
|
+
try {
|
|
301
|
+
job = JSON.parse(line);
|
|
302
|
+
} catch (err) {
|
|
303
|
+
summary.malformed += 1;
|
|
304
|
+
appendDeadLetter(stateDir, { tier: 'malformed', raw: line }, err);
|
|
305
|
+
continue;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
summary.seen += 1;
|
|
309
|
+
const handler = handlers[job.tier] || handlers.default;
|
|
310
|
+
const overBudget = budgetMs !== Infinity && attempted > 0 && (clock() - startMs) >= budgetMs;
|
|
311
|
+
if (!handler || attempted >= maxJobs || overBudget) {
|
|
312
|
+
summary.deferred += 1;
|
|
313
|
+
remaining.push(job);
|
|
314
|
+
continue;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
attempted += 1;
|
|
318
|
+
try {
|
|
319
|
+
onProgress(`maintenance:${job.tier || 'unknown'}:start`);
|
|
320
|
+
await handler(job, { stateDir, onProgress });
|
|
321
|
+
onProgress(`maintenance:${job.tier || 'unknown'}:done`);
|
|
322
|
+
summary.succeeded += 1;
|
|
323
|
+
} catch (err) {
|
|
324
|
+
if (err?.name === 'MaintainerLifecycleAbort') throw err;
|
|
325
|
+
summary.failed += 1;
|
|
326
|
+
const attempts = Number.isInteger(job.attempts) ? job.attempts + 1 : 1;
|
|
327
|
+
const next = {
|
|
328
|
+
...job,
|
|
329
|
+
attempts,
|
|
330
|
+
lastError: err?.message ?? String(err),
|
|
331
|
+
lastTriedAt: new Date().toISOString(),
|
|
332
|
+
};
|
|
333
|
+
if (attempts >= maxAttempts) {
|
|
334
|
+
summary.deadLettered += 1;
|
|
335
|
+
appendDeadLetter(stateDir, next, err);
|
|
336
|
+
} else {
|
|
337
|
+
summary.retried += 1;
|
|
338
|
+
remaining.push(next);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
writeMaintenanceQueue(stateDir, remaining, appendedQueueTail(stateDir, snapshot.raw));
|
|
344
|
+
summary.remaining = readMaintenanceQueue(stateDir).length;
|
|
345
|
+
return summary;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
async function main() {
|
|
349
|
+
assertCpuOnlyEnvironment();
|
|
350
|
+
installGpuLoadGuard();
|
|
351
|
+
|
|
352
|
+
const stateDir = process.env.SWEET_SEARCH_STATE_DIR
|
|
353
|
+
|| path.resolve(process.cwd(), '.sweet-search');
|
|
354
|
+
const budgetRaw = Number.parseInt(process.env.SWEET_SEARCH_MAINTENANCE_BUDGET_MS || '', 10);
|
|
355
|
+
const drain = await processMaintenanceQueue(stateDir, {
|
|
356
|
+
handlers: defaultMaintenanceHandlers(stateDir),
|
|
357
|
+
maxJobs: Number.parseInt(process.env.SWEET_SEARCH_MAINTENANCE_MAX_JOBS || '50', 10),
|
|
358
|
+
budgetMs: Number.isFinite(budgetRaw) && budgetRaw > 0 ? budgetRaw : undefined,
|
|
359
|
+
maxAttempts: Number.parseInt(process.env.SWEET_SEARCH_MAINTENANCE_MAX_ATTEMPTS || '3', 10),
|
|
360
|
+
});
|
|
361
|
+
console.log(JSON.stringify({
|
|
362
|
+
worker: 'maintenance',
|
|
363
|
+
phase: 'queue-drain',
|
|
364
|
+
stateDir,
|
|
365
|
+
pendingJobs: drain.remaining,
|
|
366
|
+
drain,
|
|
367
|
+
cpuOnly: true,
|
|
368
|
+
}));
|
|
369
|
+
process.exit(0);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
const invokedDirectly = process.argv[1]
|
|
373
|
+
&& (process.argv[1] === new URL(import.meta.url).pathname
|
|
374
|
+
|| process.argv[1].endsWith('/maintenance-worker.mjs'));
|
|
375
|
+
if (invokedDirectly) {
|
|
376
|
+
main().catch((err) => {
|
|
377
|
+
console.error('[maintenance-worker]', err);
|
|
378
|
+
process.exit(1);
|
|
379
|
+
});
|
|
380
|
+
}
|