sweet-search 2.5.2 → 2.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/cli.js +24 -3
- package/core/graph/graph-expansion.js +215 -36
- package/core/graph/graph-extractor.js +196 -11
- package/core/graph/graph-search.js +395 -92
- package/core/graph/hcgs-generator.js +2 -1
- package/core/graph/index.js +2 -0
- package/core/graph/repo-map.js +28 -6
- package/core/graph/structural-answer-cues.js +168 -0
- package/core/graph/structural-callsite-hints.js +40 -0
- package/core/graph/structural-context-format.js +40 -0
- package/core/graph/structural-context.js +450 -0
- package/core/graph/structural-forward-push.js +156 -0
- package/core/graph/structural-header-context.js +19 -0
- package/core/graph/structural-importance.js +148 -0
- package/core/graph/structural-pagerank.js +197 -0
- package/core/graph/summary-manager.js +13 -9
- package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
- package/core/incremental-indexing/application/file-watcher.mjs +197 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
- package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
- package/core/incremental-indexing/application/operator-cli.mjs +554 -0
- package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
- package/core/incremental-indexing/application/reconciler.mjs +477 -0
- package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
- package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
- package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
- package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
- package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
- package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
- package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
- package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
- package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
- package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
- package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
- package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
- package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
- package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
- package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
- package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
- package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
- package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
- package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
- package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
- package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
- package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
- package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
- package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
- package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
- package/core/indexing/admission-policy.js +139 -0
- package/core/indexing/artifact-builder.js +29 -12
- package/core/indexing/ast-chunker.js +107 -30
- package/core/indexing/dedup/exemplar-selector.js +19 -1
- package/core/indexing/gitignore-filter.js +223 -0
- package/core/indexing/incremental-tracker.js +99 -30
- package/core/indexing/index-codebase-v21.js +6 -5
- package/core/indexing/index-maintainer.mjs +698 -6
- package/core/indexing/indexer-ann.js +99 -15
- package/core/indexing/indexer-build.js +158 -45
- package/core/indexing/indexer-empty-baseline.js +80 -0
- package/core/indexing/indexer-manifest.js +66 -0
- package/core/indexing/indexer-phases.js +56 -23
- package/core/indexing/indexer-sparse-gram.js +54 -13
- package/core/indexing/indexer-utils.js +26 -208
- package/core/indexing/indexing-file-policy.js +32 -7
- package/core/indexing/maintainer-launcher.mjs +137 -0
- package/core/indexing/merkle-tracker.js +251 -244
- package/core/indexing/model-pool.js +46 -5
- package/core/infrastructure/code-graph-repository.js +758 -6
- package/core/infrastructure/code-graph-visibility.js +157 -0
- package/core/infrastructure/codebase-repository.js +100 -13
- package/core/infrastructure/config/search.js +1 -1
- package/core/infrastructure/db-utils.js +118 -0
- package/core/infrastructure/dedup-hashing.js +10 -13
- package/core/infrastructure/hardware-capability.js +17 -7
- package/core/infrastructure/index.js +8 -2
- package/core/infrastructure/language-patterns/maps.js +4 -1
- package/core/infrastructure/language-patterns/registry-core.js +56 -17
- package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
- package/core/infrastructure/language-patterns.js +69 -0
- package/core/infrastructure/model-registry.js +20 -0
- package/core/infrastructure/native-inference.js +7 -12
- package/core/infrastructure/native-resolver.js +52 -37
- package/core/infrastructure/native-sparse-gram.js +261 -20
- package/core/infrastructure/native-tokenizer.js +6 -15
- package/core/infrastructure/simd-distance.js +10 -16
- package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
- package/core/infrastructure/structural-alias-resolver.js +122 -0
- package/core/infrastructure/structural-candidate-ranker.js +34 -0
- package/core/infrastructure/structural-context-repository.js +472 -0
- package/core/infrastructure/structural-context-utils.js +51 -0
- package/core/infrastructure/structural-graph-signals.js +121 -0
- package/core/infrastructure/structural-qualified-resolution.js +15 -0
- package/core/infrastructure/structural-source-definitions.js +100 -0
- package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
- package/core/infrastructure/tree-sitter-provider.js +811 -37
- package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
- package/core/query/query-router.js +55 -5
- package/core/ranking/file-kind-ranking.js +2192 -15
- package/core/ranking/late-interaction-index.js +87 -12
- package/core/search/cli-decoration.js +290 -0
- package/core/search/context-expander.js +988 -78
- package/core/search/index.js +1 -0
- package/core/search/output-policy.js +275 -0
- package/core/search/search-anchor.js +499 -0
- package/core/search/search-boost.js +93 -1
- package/core/search/search-cli.js +61 -204
- package/core/search/search-hybrid.js +250 -10
- package/core/search/search-pattern-chunks.js +57 -8
- package/core/search/search-pattern-planner.js +68 -9
- package/core/search/search-pattern-prefilter.js +30 -10
- package/core/search/search-pattern-ripgrep.js +40 -4
- package/core/search/search-pattern-sparse-overlay.js +256 -0
- package/core/search/search-pattern.js +117 -29
- package/core/search/search-postprocess.js +479 -5
- package/core/search/search-read-semantic.js +260 -23
- package/core/search/search-read.js +82 -64
- package/core/search/search-reader-pin.js +71 -0
- package/core/search/search-rrf.js +279 -0
- package/core/search/search-semantic.js +110 -5
- package/core/search/search-server.js +130 -57
- package/core/search/search-trace.js +107 -0
- package/core/search/server-identity.js +93 -0
- package/core/search/session-daemon-prewarm.mjs +33 -10
- package/core/search/sweet-search.js +399 -7
- package/core/skills/sweet-index/SKILL.md +8 -6
- package/core/vector-store/binary-hnsw-index.js +194 -30
- package/core/vector-store/float-vector-store.js +96 -6
- package/core/vector-store/hnsw-index.js +220 -49
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
- package/eval/agent-read-workflows/bin/ss-find +15 -0
- package/eval/agent-read-workflows/bin/ss-grep +12 -0
- package/eval/agent-read-workflows/bin/ss-read +14 -0
- package/eval/agent-read-workflows/bin/ss-search +18 -0
- package/eval/agent-read-workflows/bin/ss-semantic +12 -0
- package/eval/agent-read-workflows/bin/ss-trace +11 -0
- package/mcp/read-tool.js +109 -0
- package/mcp/server.js +55 -15
- package/mcp/tool-handlers.js +14 -124
- package/mcp/trace-tool.js +81 -0
- package/package.json +25 -10
- package/scripts/hooks/intercept-read.mjs +55 -0
- package/scripts/hooks/remind-tools.mjs +40 -0
- package/scripts/init.js +698 -54
- package/scripts/inject-agent-instructions.js +431 -0
- package/scripts/install-prompt-reminders.js +188 -0
- package/scripts/install-tool-enforcement.js +220 -0
- package/scripts/smoke-test.js +12 -9
- package/scripts/uninstall.js +276 -18
- package/scripts/write-claude-rules.js +110 -0
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Features:
|
|
9
9
|
* - Deferred merkle check (7s delay, ZERO startup latency)
|
|
10
|
-
* - 45-second periodic merkle check (mtime/size fast-path)
|
|
10
|
+
* - 45-second periodic merkle check (mtime/size/inode fast-path)
|
|
11
11
|
* - Full incremental index: FTS5, HNSW, Binary HNSW, Code Graph (full), HCGS
|
|
12
12
|
* - Global lock file prevents race with manual /index-codebase
|
|
13
13
|
* - Soft delete for removed files (handles branch switches, prune after 30d)
|
|
@@ -37,11 +37,16 @@
|
|
|
37
37
|
* Started by: session-preheat.sh (alongside search infrastructure)
|
|
38
38
|
*/
|
|
39
39
|
|
|
40
|
-
import { readFileSync, writeFileSync, existsSync, unlinkSync, renameSync, appendFileSync, mkdirSync, openSync, closeSync, constants } from 'node:fs';
|
|
40
|
+
import { readFileSync, writeFileSync, existsSync, unlinkSync, renameSync, appendFileSync, mkdirSync, openSync, closeSync, constants, ftruncateSync, fsyncSync, writeSync } from 'node:fs';
|
|
41
41
|
import fs from 'node:fs/promises';
|
|
42
|
-
import { dirname, join, relative, isAbsolute } from 'node:path';
|
|
42
|
+
import { dirname, join, relative, isAbsolute, resolve } from 'node:path';
|
|
43
43
|
import { fileURLToPath } from 'node:url';
|
|
44
44
|
import { spawn } from 'node:child_process';
|
|
45
|
+
import { randomUUID } from 'node:crypto';
|
|
46
|
+
import { startupInterval, tierForHardware, reconcileEnablement } from '../incremental-indexing/domain/interval-autotune.mjs';
|
|
47
|
+
import { detectHardwareCapability } from '../infrastructure/hardware-capability.js';
|
|
48
|
+
import { sweepStaleArtifactTemps, DEFAULT_TMP_SWEEP_MAX_AGE_MS } from '../incremental-indexing/infrastructure/artifact-temp-sweep.mjs';
|
|
49
|
+
import { hasCompleteBaseIndex, WAITING_FOR_INITIAL_INDEX } from '../incremental-indexing/infrastructure/baseline-readiness.mjs';
|
|
45
50
|
|
|
46
51
|
const __filename = fileURLToPath(import.meta.url);
|
|
47
52
|
const __dirname = dirname(__filename);
|
|
@@ -149,6 +154,7 @@ const QUEUE_FILE = join(DATA_DIR, 'index-maintainer-queue.jsonl');
|
|
|
149
154
|
const PROCESSING_FILE = join(DATA_DIR, 'index-maintainer-queue.processing.jsonl');
|
|
150
155
|
const LOCK_FILE = join(DATA_DIR, 'index-maintainer.lock');
|
|
151
156
|
const DEADLETTER_FILE = join(DATA_DIR, 'index-maintainer-deadletter.jsonl');
|
|
157
|
+
const PAUSE_FILE = join(DATA_DIR, 'reconcile-pause.json');
|
|
152
158
|
|
|
153
159
|
// Export configuration for testing
|
|
154
160
|
export const CONFIG = {
|
|
@@ -158,6 +164,7 @@ export const CONFIG = {
|
|
|
158
164
|
PROCESSING_FILE,
|
|
159
165
|
LOCK_FILE,
|
|
160
166
|
DEADLETTER_FILE,
|
|
167
|
+
PAUSE_FILE,
|
|
161
168
|
};
|
|
162
169
|
|
|
163
170
|
// Indexer paths
|
|
@@ -251,6 +258,40 @@ async function loadBetterSqlite3() {
|
|
|
251
258
|
const POLL_INTERVAL = 30000; // 30 seconds between queue checks
|
|
252
259
|
const LOCK_REFRESH_INTERVAL = 30000; // 30 seconds between lock refreshes (M5: was 60s)
|
|
253
260
|
const LOCK_STALE_THRESHOLD = 180000; // 3 minutes (M5: was 5 min, ratio 6:1 with refresh)
|
|
261
|
+
// Lifecycle fix v2 — progress-aware takeover. The legacy 3-min pure-timestamp
|
|
262
|
+
// takeover ("lock looks stale ⇒ steal it") produced stealth co-owner orphans
|
|
263
|
+
// when a busy daemon's heartbeat aged past the threshold; the v1 interim fix
|
|
264
|
+
// raised the threshold to 30 min, which traded faster wedge recovery for
|
|
265
|
+
// safety. v2 reverts the threshold to 3 min and adds a second signal so we
|
|
266
|
+
// keep both: orphan-free AND fast recovery, without false-positives on long
|
|
267
|
+
// async work.
|
|
268
|
+
//
|
|
269
|
+
// The lockfile now carries TWO timestamps:
|
|
270
|
+
// - `timestamp` — the heartbeat, refreshed every 30 s by setInterval
|
|
271
|
+
// (event-loop bound, like before).
|
|
272
|
+
// - `progressTimestamp` — refreshed by recordProgress() at known work
|
|
273
|
+
// checkpoints inside the reconcile loop.
|
|
274
|
+
//
|
|
275
|
+
// acquireStateLock combines them:
|
|
276
|
+
//
|
|
277
|
+
// heartbeat fresh AND progress fresh → busy + progressing → REFUSE
|
|
278
|
+
// heartbeat fresh AND progress stale → alive-but-stuck → SIGTERM + steal
|
|
279
|
+
// heartbeat stale AND progress fresh → recent progress → REFUSE (timer lag)
|
|
280
|
+
// heartbeat stale AND progress stale → genuinely wedged → SIGTERM + steal
|
|
281
|
+
// dead pid → crashed → immediate takeover
|
|
282
|
+
//
|
|
283
|
+
// Backwards-compat: a lockfile without `progressTimestamp` falls back to
|
|
284
|
+
// heartbeat-only (progressAge := heartbeatAge), reverting to classic 3-min
|
|
285
|
+
// behaviour. The SIGTERM-before-steal hardening means even this legacy path
|
|
286
|
+
// never leaks orphans.
|
|
287
|
+
//
|
|
288
|
+
// Caveat: progress IS still recorded from the main event loop, so a daemon
|
|
289
|
+
// blocked by pure synchronous CPU/native work shows both signals stale and
|
|
290
|
+
// will be SIGTERMed at 3 min. With async napi (see
|
|
291
|
+
// project_native_metal_inference_status) this case should not arise in
|
|
292
|
+
// practice; the natural escalation if it does is a worker_threads-based
|
|
293
|
+
// progress beacon — left as future work.
|
|
294
|
+
export const WEDGED_KILL_GRACE_MS = 5000; // SIGTERM grace before declaring takeover complete
|
|
254
295
|
|
|
255
296
|
// Retry configuration
|
|
256
297
|
const MAX_RETRIES = 3;
|
|
@@ -526,6 +567,620 @@ export function ensureDataDir() {
|
|
|
526
567
|
}
|
|
527
568
|
}
|
|
528
569
|
|
|
570
|
+
export function isReconcilePaused(stateDir = DATA_DIR) {
|
|
571
|
+
const pauseFile = join(stateDir, 'reconcile-pause.json');
|
|
572
|
+
try {
|
|
573
|
+
const payload = JSON.parse(readFileSync(pauseFile, 'utf-8'));
|
|
574
|
+
return {
|
|
575
|
+
paused: payload?.paused !== false,
|
|
576
|
+
pausedAt: payload?.pausedAt || null,
|
|
577
|
+
reason: payload?.reason || null,
|
|
578
|
+
filePath: pauseFile,
|
|
579
|
+
};
|
|
580
|
+
} catch (err) {
|
|
581
|
+
if (err.code !== 'ENOENT') {
|
|
582
|
+
log('WARN', `Ignoring unreadable reconcile pause state: ${err.message}`);
|
|
583
|
+
}
|
|
584
|
+
return { paused: false, filePath: pauseFile };
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
/**
|
|
589
|
+
* Full enablement status for the reconcile-v2 incremental indexer. Delegates
|
|
590
|
+
* to the incremental-indexing domain policy (`reconcileEnablement`) so the
|
|
591
|
+
* daemon and the operator `status` surface share one source of truth.
|
|
592
|
+
*
|
|
593
|
+
* Default-on: a missing/empty `SWEET_SEARCH_RECONCILE_V2` means enabled. Opt
|
|
594
|
+
* out with `0` / `false` / `off`.
|
|
595
|
+
*
|
|
596
|
+
* @param {NodeJS.ProcessEnv} [env]
|
|
597
|
+
* @returns {{enabled:boolean, source:string, raw:string|null}}
|
|
598
|
+
*/
|
|
599
|
+
export function reconcileV2Status(env = process.env) {
|
|
600
|
+
return reconcileEnablement(env);
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
export function reconcileV2Requested(env = process.env) {
|
|
604
|
+
return reconcileEnablement(env).enabled;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
export function assertReconcileV2NotSilentlyIgnored(env = process.env) {
|
|
608
|
+
if (!reconcileV2Requested(env)) return;
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
function reconcileV2Context(env = process.env) {
|
|
612
|
+
const projectRoot = resolve(env.SWEET_SEARCH_PROJECT_ROOT || PROJECT_ROOT);
|
|
613
|
+
const stateDir = resolve(env.SWEET_SEARCH_STATE_DIR || join(projectRoot, '.sweet-search'));
|
|
614
|
+
return { projectRoot, stateDir };
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
/**
|
|
618
|
+
* Resolve the reconcile-v2 tick interval at daemon startup.
|
|
619
|
+
*
|
|
620
|
+
* Delegates to `startupInterval` in the incremental-indexing domain so the
|
|
621
|
+
* daemon path and the domain module share the same env precedence and
|
|
622
|
+
* hardware-tier semantics. The hardware capability is detected lazily and
|
|
623
|
+
* passed in; explicit `tier` overrides via `opts.tier` short-circuit it.
|
|
624
|
+
*
|
|
625
|
+
* @param {{env?:NodeJS.ProcessEnv, hardware?:object, tier?:'low'|'mid'|'high'}} [opts]
|
|
626
|
+
* @returns {{intervalMs:number, pinned:boolean, source:string, tier:string|null}}
|
|
627
|
+
*/
|
|
628
|
+
export function resolveReconcileV2Interval(opts = {}) {
|
|
629
|
+
const env = opts.env || process.env;
|
|
630
|
+
let hardware = opts.hardware;
|
|
631
|
+
if (hardware === undefined) {
|
|
632
|
+
try {
|
|
633
|
+
hardware = detectHardwareCapability();
|
|
634
|
+
} catch {
|
|
635
|
+
hardware = null;
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
const tier = opts.tier || (hardware ? tierForHardware(hardware) : null);
|
|
639
|
+
const result = startupInterval({ tier: tier || undefined, env, hardware });
|
|
640
|
+
return { ...result, tier };
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
function reconcileV2IntervalMs(env = process.env) {
|
|
644
|
+
return resolveReconcileV2Interval({ env }).intervalMs;
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
function readStateLock(lockFile) {
|
|
648
|
+
try {
|
|
649
|
+
const parsed = JSON.parse(readFileSync(lockFile, 'utf-8'));
|
|
650
|
+
return Number.isInteger(parsed.pid) ? parsed : null;
|
|
651
|
+
} catch {
|
|
652
|
+
return null;
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
// Module-level lockfile state — populated by acquireStateLock on success,
|
|
657
|
+
// mutated by writeStateLock (heartbeat tick) AND recordProgress (work
|
|
658
|
+
// checkpoint), cleared by releaseStateLock. Both writers share this object
|
|
659
|
+
// so a heartbeat write never clobbers progress fields and vice-versa.
|
|
660
|
+
let lockState = null;
|
|
661
|
+
|
|
662
|
+
/**
|
|
663
|
+
* True iff a parsed state lock belongs to this process's current acquisition.
|
|
664
|
+
* The ownerToken closes the same-pid / stale-module-state hole in tests and
|
|
665
|
+
* long-lived hosts; legacy test fixtures without a token still match by pid
|
|
666
|
+
* when this process has no active token.
|
|
667
|
+
*/
|
|
668
|
+
function lockMatchesCurrentOwner(existing) {
|
|
669
|
+
if (existing?.pid !== process.pid) return false;
|
|
670
|
+
if (lockState?.ownerToken) return existing.ownerToken === lockState.ownerToken;
|
|
671
|
+
return true;
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
function readStateLockFromFd(fd) {
|
|
675
|
+
try {
|
|
676
|
+
const parsed = JSON.parse(readFileSync(fd, 'utf-8'));
|
|
677
|
+
return Number.isInteger(parsed.pid) ? parsed : null;
|
|
678
|
+
} catch {
|
|
679
|
+
return null;
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
/**
|
|
684
|
+
* Re-validate ownership through an open fd, mutate the in-memory lockState,
|
|
685
|
+
* then write back through that same fd. This deliberately avoids temp+rename:
|
|
686
|
+
* if another daemon unlinks/recreates the path after our open, our write lands
|
|
687
|
+
* on the old unlinked inode, not on the successor's new lockfile.
|
|
688
|
+
*/
|
|
689
|
+
function persistLockState(lockFile, mutator) {
|
|
690
|
+
if (!lockState) return;
|
|
691
|
+
let fd = null;
|
|
692
|
+
try {
|
|
693
|
+
fd = openSync(lockFile, constants.O_RDWR);
|
|
694
|
+
const existing = readStateLockFromFd(fd);
|
|
695
|
+
if (!lockMatchesCurrentOwner(existing)) return;
|
|
696
|
+
mutator(lockState);
|
|
697
|
+
const payload = JSON.stringify(lockState);
|
|
698
|
+
const bytes = Buffer.from(payload, 'utf-8');
|
|
699
|
+
ftruncateSync(fd, 0);
|
|
700
|
+
writeSync(fd, bytes, 0, bytes.length, 0);
|
|
701
|
+
try { fsyncSync(fd); } catch { /* best-effort durability for heartbeat */ }
|
|
702
|
+
} catch {
|
|
703
|
+
// Missing/corrupt/displaced locks are handled by the main ownership check.
|
|
704
|
+
} finally {
|
|
705
|
+
if (fd != null) {
|
|
706
|
+
try { closeSync(fd); } catch {}
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
function writeStateLock(lockFile) {
|
|
712
|
+
persistLockState(lockFile, (s) => { s.timestamp = Date.now(); });
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
/**
|
|
716
|
+
* Lifecycle fix v2 — record a work-progress checkpoint. Called from the
|
|
717
|
+
* reconcile loop (top of iteration, post-tick, post-drain) so a candidate
|
|
718
|
+
* maintainer in acquireStateLock can tell "alive but stuck on a hung await"
|
|
719
|
+
* from "busy and progressing." One small JSON write per call; the cost is
|
|
720
|
+
* negligible at the call frequencies we use (≈ once per loop iteration).
|
|
721
|
+
*
|
|
722
|
+
* Designed for the main thread (single event loop). If the event loop is
|
|
723
|
+
* fully blocked by synchronous native work, neither this nor writeStateLock
|
|
724
|
+
* fires and both signals stale together — that case is intentionally treated
|
|
725
|
+
* as "wedged" and SIGTERMed (with the queue-based recovery making lost work
|
|
726
|
+
* idempotent). For a fully event-loop-independent signal a worker_threads
|
|
727
|
+
* beacon would be the next step.
|
|
728
|
+
*/
|
|
729
|
+
export function recordProgress(lockFile) {
|
|
730
|
+
persistLockState(lockFile, (s) => {
|
|
731
|
+
s.progressCounter = (s.progressCounter ?? 0) + 1;
|
|
732
|
+
s.progressTimestamp = Date.now();
|
|
733
|
+
});
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
/**
|
|
737
|
+
* SIGTERM the previous holder and unlink the lockfile, after a bounded grace
|
|
738
|
+
* period. Shared by both takeover branches in acquireStateLock (alive-but-
|
|
739
|
+
* stuck AND fully wedged). The dying holder exits via its SIGTERM handler;
|
|
740
|
+
* if it can't (uninterruptible syscall), its in-loop `stillOwnsLock` check
|
|
741
|
+
* ends it gracefully when it eventually unblocks. Either way: no immortal
|
|
742
|
+
* twin.
|
|
743
|
+
*/
|
|
744
|
+
async function sigtermAndStealLock(existing, lockFile, reason) {
|
|
745
|
+
log('WARN', `Existing maintainer pid=${existing.pid} appears ${reason}; sending SIGTERM before takeover.`);
|
|
746
|
+
try { process.kill(existing.pid, 'SIGTERM'); } catch { /* ESRCH/EPERM — fine, we'll steal anyway */ }
|
|
747
|
+
const deadline = Date.now() + WEDGED_KILL_GRACE_MS;
|
|
748
|
+
while (Date.now() < deadline && isPidRunning(existing.pid, existing.startTime)) {
|
|
749
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
750
|
+
}
|
|
751
|
+
if (isPidRunning(existing.pid, existing.startTime)) {
|
|
752
|
+
log('WARN', `pid=${existing.pid} still alive after SIGTERM+${WEDGED_KILL_GRACE_MS}ms grace; proceeding (it will self-exit at its next loop tick).`);
|
|
753
|
+
}
|
|
754
|
+
try { unlinkSync(lockFile); } catch {}
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
/**
|
|
758
|
+
* Acquire the reconcile-v2 state lock atomically.
|
|
759
|
+
*
|
|
760
|
+
* Lifecycle fix v2 — progress-aware single-owner takeover (see the
|
|
761
|
+
* WEDGED_KILL_GRACE_MS block above for the design rationale). Decision
|
|
762
|
+
* matrix on an existing lockfile:
|
|
763
|
+
*
|
|
764
|
+
* no / unparseable lock → unlink + retry create
|
|
765
|
+
* dead holder → unlink + retry create
|
|
766
|
+
* alive, heartbeat fresh, progress fresh → REFUSE
|
|
767
|
+
* alive, heartbeat stale, progress fresh → REFUSE (timer-lag tolerance)
|
|
768
|
+
* alive, heartbeat fresh, progress stale → SIGTERM + steal (alive-but-stuck)
|
|
769
|
+
* alive, both stale → SIGTERM + steal (wedged)
|
|
770
|
+
*
|
|
771
|
+
* Returns { acquired, lockFile }. On successful acquisition, initialises the
|
|
772
|
+
* module-level `lockState` with both heartbeat and progress timestamps so
|
|
773
|
+
* the new owner is never "stale" immediately. Async because the
|
|
774
|
+
* SIGTERM-and-steal path awaits a bounded grace period; the legacy
|
|
775
|
+
* synchronous form had no caller outside runReconcileV2Main (verified by grep).
|
|
776
|
+
*/
|
|
777
|
+
export async function acquireStateLock(stateDir) {
|
|
778
|
+
mkdirSync(stateDir, { recursive: true });
|
|
779
|
+
const lockFile = join(stateDir, 'index-maintainer.lock');
|
|
780
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
781
|
+
try {
|
|
782
|
+
const fd = openSync(lockFile, constants.O_CREAT | constants.O_EXCL | constants.O_WRONLY, 0o600);
|
|
783
|
+
// Initialise the in-memory state. progressTimestamp starts equal to
|
|
784
|
+
// timestamp so a fresh owner is never falsely declared stale on either
|
|
785
|
+
// signal by a candidate maintainer that races our first tick.
|
|
786
|
+
const nowMs = Date.now();
|
|
787
|
+
lockState = {
|
|
788
|
+
pid: process.pid,
|
|
789
|
+
timestamp: nowMs,
|
|
790
|
+
startTime: getProcessStartTime(),
|
|
791
|
+
ownerToken: randomUUID(),
|
|
792
|
+
progressCounter: 0,
|
|
793
|
+
progressTimestamp: nowMs,
|
|
794
|
+
};
|
|
795
|
+
writeFileSync(fd, JSON.stringify(lockState));
|
|
796
|
+
closeSync(fd);
|
|
797
|
+
return { acquired: true, lockFile };
|
|
798
|
+
} catch (err) {
|
|
799
|
+
if (err.code !== 'EEXIST') throw err;
|
|
800
|
+
const existing = readStateLock(lockFile);
|
|
801
|
+
if (!existing) {
|
|
802
|
+
// Corrupt / unparseable lock — unlink and retry the O_EXCL create.
|
|
803
|
+
try { unlinkSync(lockFile); } catch {}
|
|
804
|
+
continue;
|
|
805
|
+
}
|
|
806
|
+
const holderAlive = isPidRunning(existing.pid, existing.startTime);
|
|
807
|
+
if (!holderAlive) {
|
|
808
|
+
// Crashed daemon — safe to reclaim. (Preserves the dead-pid contract
|
|
809
|
+
// exercised by tests/indexing/maintainer-launcher.test.js.)
|
|
810
|
+
try { unlinkSync(lockFile); } catch {}
|
|
811
|
+
continue;
|
|
812
|
+
}
|
|
813
|
+
const now = Date.now();
|
|
814
|
+
const heartbeatAge = now - existing.timestamp;
|
|
815
|
+
// Backwards-compat: a legacy lockfile without progressTimestamp falls
|
|
816
|
+
// back to heartbeat-only mode. Combined with SIGTERM-before-steal this
|
|
817
|
+
// still avoids orphans even for writers that don't know about progress.
|
|
818
|
+
const progressAge = existing.progressTimestamp != null
|
|
819
|
+
? now - existing.progressTimestamp
|
|
820
|
+
: heartbeatAge;
|
|
821
|
+
const heartbeatFresh = heartbeatAge < LOCK_STALE_THRESHOLD;
|
|
822
|
+
const progressFresh = progressAge < LOCK_STALE_THRESHOLD;
|
|
823
|
+
if (heartbeatFresh && progressFresh) {
|
|
824
|
+
// Busy AND progressing — single-owner invariant: refuse takeover.
|
|
825
|
+
return { acquired: false, lockFile };
|
|
826
|
+
}
|
|
827
|
+
if (progressFresh) {
|
|
828
|
+
// Progress recorded recently even though the heartbeat timer lagged
|
|
829
|
+
// (occasional event-loop pause that swallowed a setInterval tick).
|
|
830
|
+
// Actual work IS happening — trust the progress signal, refuse.
|
|
831
|
+
return { acquired: false, lockFile };
|
|
832
|
+
}
|
|
833
|
+
const reason = heartbeatFresh
|
|
834
|
+
? `alive but not progressing (progress age=${Math.round(progressAge / 1000)}s)`
|
|
835
|
+
: `wedged (heartbeat age=${Math.round(heartbeatAge / 1000)}s, progress age=${Math.round(progressAge / 1000)}s)`;
|
|
836
|
+
await sigtermAndStealLock(existing, lockFile, reason);
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
return { acquired: false, lockFile };
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
export function releaseStateLock(lockFile) {
|
|
843
|
+
try {
|
|
844
|
+
const existing = readStateLock(lockFile);
|
|
845
|
+
if (lockMatchesCurrentOwner(existing)) unlinkSync(lockFile);
|
|
846
|
+
} catch {}
|
|
847
|
+
// Reset module-level state so a subsequent acquire (in tests, in long-lived
|
|
848
|
+
// hosts, in respawn paths) starts from a clean slate.
|
|
849
|
+
lockState = null;
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
/**
|
|
853
|
+
* Lifecycle fix. Returns true iff the state lockfile exists AND still names
|
|
854
|
+
* this process. Used by:
|
|
855
|
+
* - the main reconcile loop, to self-exit when displaced (no immortal
|
|
856
|
+
* twins after a wedged-backstop takeover), and
|
|
857
|
+
* - the heartbeat refresh setInterval, so a displaced daemon never
|
|
858
|
+
* clobbers a successor's lock by rewriting its own pid.
|
|
859
|
+
*
|
|
860
|
+
* Missing/unparseable lockfile is treated as "not ours" — conservatively
|
|
861
|
+
* exits the daemon so the launcher can respawn a clean single owner rather
|
|
862
|
+
* than risk a race during a successor's mid-takeover write.
|
|
863
|
+
*/
|
|
864
|
+
export function stillOwnsLock(lockFile) {
|
|
865
|
+
const existing = readStateLock(lockFile);
|
|
866
|
+
if (!existing) return false;
|
|
867
|
+
return lockMatchesCurrentOwner(existing);
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
class MaintainerLifecycleAbort extends Error {
|
|
871
|
+
constructor(message) {
|
|
872
|
+
super(message);
|
|
873
|
+
this.name = 'MaintainerLifecycleAbort';
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
function createLifecycleProgress(lockFile) {
|
|
878
|
+
return () => {
|
|
879
|
+
if (shutdownRequested) {
|
|
880
|
+
throw new MaintainerLifecycleAbort('shutdown requested');
|
|
881
|
+
}
|
|
882
|
+
if (!stillOwnsLock(lockFile)) {
|
|
883
|
+
throw new MaintainerLifecycleAbort('lock ownership lost');
|
|
884
|
+
}
|
|
885
|
+
recordProgress(lockFile);
|
|
886
|
+
};
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
export async function runReconcileV2Tick(ctx) {
|
|
890
|
+
const onProgress = typeof ctx.onProgress === 'function' ? ctx.onProgress : null;
|
|
891
|
+
const progress = (phase) => { onProgress?.(phase); };
|
|
892
|
+
progress('tick:start');
|
|
893
|
+
// Baseline gate: the incremental reconciler must NEVER be the first index
|
|
894
|
+
// builder for a non-empty repo (product contract). Until the normal full
|
|
895
|
+
// indexing path has produced a complete baseline, stay dormant — skip BOTH
|
|
896
|
+
// the dirty-scan producer (so we don't enqueue the whole tree) AND the
|
|
897
|
+
// reconcile consumer (so we don't create partial codebase.db / code-graph.db
|
|
898
|
+
// / HNSW / LI / sparse artifacts that make search think the repo is indexed).
|
|
899
|
+
// No queue/artifact mutation here; the launcher still spawns the daemon, but
|
|
900
|
+
// each tick is a no-op until `sweet-search index` lands a baseline.
|
|
901
|
+
const baseline = hasCompleteBaseIndex(ctx.stateDir);
|
|
902
|
+
if (!baseline.ready) {
|
|
903
|
+
log('INFO', `${WAITING_FOR_INITIAL_INDEX}: no complete baseline yet (${baseline.reason}); run "sweet-search index" first — reconcile dormant`);
|
|
904
|
+
return { skipped: true, reason: WAITING_FOR_INITIAL_INDEX, baseline: baseline.reason };
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
// Producer step: diff the working tree against merkle-state.json and enqueue
|
|
908
|
+
// add/modify/delete hints, so ordinary edits are reconciled WITHOUT requiring
|
|
909
|
+
// `sweet-search index --add` or an editor hook (release-gate finding C1). Runs
|
|
910
|
+
// before the consume step below; best-effort so a scan failure never blocks
|
|
911
|
+
// reconcile of already-queued work.
|
|
912
|
+
try {
|
|
913
|
+
const { dirtyScanEnabled, scanDirtyAndEnqueue } = await import('../incremental-indexing/application/dirty-scan.mjs');
|
|
914
|
+
if (dirtyScanEnabled()) {
|
|
915
|
+
const { createAdmissionPolicy } = await import('../indexing/admission-policy.js');
|
|
916
|
+
const admissionPolicy = createAdmissionPolicy({ projectRoot: ctx.projectRoot });
|
|
917
|
+
progress('dirty-scan:start');
|
|
918
|
+
const scan = await scanDirtyAndEnqueue({ projectRoot: ctx.projectRoot, stateDir: ctx.stateDir, admissionPolicy, onProgress });
|
|
919
|
+
progress('dirty-scan:done');
|
|
920
|
+
if (scan.enqueued > 0) {
|
|
921
|
+
log('INFO', `Dirty scan enqueued ${scan.enqueued} file(s) (added=${scan.added}, modified=${scan.modified}, deleted=${scan.deleted}, retired=${scan.retired})`);
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
} catch (err) {
|
|
925
|
+
if (err instanceof MaintainerLifecycleAbort) throw err;
|
|
926
|
+
log('WARN', `Dirty scan failed (continuing with queued hints): ${err?.message ?? err}`);
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
const { runProductionReconcileTick } = await import('../incremental-indexing/application/production-reconciler.mjs');
|
|
930
|
+
const counters = await runProductionReconcileTick({
|
|
931
|
+
projectRoot: ctx.projectRoot,
|
|
932
|
+
stateDir: ctx.stateDir,
|
|
933
|
+
logger: {
|
|
934
|
+
info: (msg) => log('INFO', msg),
|
|
935
|
+
warn: (msg) => log('WARN', msg),
|
|
936
|
+
error: (msg) => log('ERROR', msg),
|
|
937
|
+
},
|
|
938
|
+
onProgress,
|
|
939
|
+
});
|
|
940
|
+
progress('tick:done');
|
|
941
|
+
log('INFO', `Reconcile v2 tick complete: epoch=${counters.epoch}, processed=${counters.files_processed}, unchanged=${counters.content_unchanged}`);
|
|
942
|
+
return counters;
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
/**
|
|
946
|
+
* Inline-drain decision for the reconcile daemon.
|
|
947
|
+
*
|
|
948
|
+
* Returns true unless the operator opts out via
|
|
949
|
+
* `SWEET_SEARCH_MAINTENANCE_INLINE=0|false|off`. The daemon owns the only
|
|
950
|
+
* `index-maintainer.lock` for this state dir, so a single inline drain
|
|
951
|
+
* inside the daemon process is the simplest "no two workers racing"
|
|
952
|
+
* topology — no child-process supervisor needed.
|
|
953
|
+
*
|
|
954
|
+
* @param {NodeJS.ProcessEnv} [env]
|
|
955
|
+
*/
|
|
956
|
+
export function maintenanceInlineEnabled(env = process.env) {
|
|
957
|
+
const raw = env.SWEET_SEARCH_MAINTENANCE_INLINE;
|
|
958
|
+
if (raw == null || raw === '') return true;
|
|
959
|
+
const normalized = String(raw).trim().toLowerCase();
|
|
960
|
+
return normalized !== '0' && normalized !== 'false' && normalized !== 'off';
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
/**
|
|
964
|
+
* Per-tick job cap for the inline drain. When the operator sets
|
|
965
|
+
* `SWEET_SEARCH_MAINTENANCE_MAX_JOBS_PER_TICK` it is honored as a hard
|
|
966
|
+
* ceiling; otherwise the drain is bounded by the wall-clock budget
|
|
967
|
+
* (`maintenanceInlineBudgetMs`) instead of a fixed tiny count, so it can
|
|
968
|
+
* keep pace with a growing backlog. Returns `undefined` (→ no job cap) in
|
|
969
|
+
* the unset case.
|
|
970
|
+
*
|
|
971
|
+
* @param {NodeJS.ProcessEnv} [env]
|
|
972
|
+
*/
|
|
973
|
+
function maintenanceInlineMaxJobs(env = process.env) {
|
|
974
|
+
const raw = Number.parseInt(env.SWEET_SEARCH_MAINTENANCE_MAX_JOBS_PER_TICK || '', 10);
|
|
975
|
+
if (Number.isFinite(raw) && raw > 0) return raw;
|
|
976
|
+
return undefined;
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
/**
|
|
980
|
+
* Wall-clock budget (ms) for one inline maintenance drain. Bounds how long
|
|
981
|
+
* the drain may run after a reconcile tick so it never starves reconcile,
|
|
982
|
+
* while still adapting to backlog. Tunable via
|
|
983
|
+
* `SWEET_SEARCH_MAINTENANCE_BUDGET_MS`.
|
|
984
|
+
*
|
|
985
|
+
* @param {NodeJS.ProcessEnv} [env]
|
|
986
|
+
*/
|
|
987
|
+
function maintenanceInlineBudgetMs(env = process.env) {
|
|
988
|
+
const raw = Number.parseInt(env.SWEET_SEARCH_MAINTENANCE_BUDGET_MS || '', 10);
|
|
989
|
+
if (Number.isFinite(raw) && raw > 0) return raw;
|
|
990
|
+
return 1500;
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
function maintenanceInlineMaxAttempts(env = process.env) {
|
|
994
|
+
const raw = Number.parseInt(env.SWEET_SEARCH_MAINTENANCE_MAX_ATTEMPTS || '', 10);
|
|
995
|
+
if (Number.isFinite(raw) && raw > 0) return raw;
|
|
996
|
+
return 3;
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
/**
|
|
1000
|
+
* Grace window for the startup orphan-temp sweep. A staging temp older than
|
|
1001
|
+
* this is a crash orphan (the rename that would publish it never happened);
|
|
1002
|
+
* a younger one might belong to a concurrent in-flight writer and is left
|
|
1003
|
+
* alone. Tunable via `SWEET_SEARCH_TMP_SWEEP_MAX_AGE_MS`; `0` disables the
|
|
1004
|
+
* age gate (sweep everything that matches).
|
|
1005
|
+
*
|
|
1006
|
+
* @param {NodeJS.ProcessEnv} [env]
|
|
1007
|
+
*/
|
|
1008
|
+
function tmpSweepMaxAgeMs(env = process.env) {
|
|
1009
|
+
const raw = Number.parseInt(env.SWEET_SEARCH_TMP_SWEEP_MAX_AGE_MS || '', 10);
|
|
1010
|
+
if (Number.isFinite(raw) && raw >= 0) return raw;
|
|
1011
|
+
return DEFAULT_TMP_SWEEP_MAX_AGE_MS;
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
/**
|
|
1015
|
+
* Bounded inline drain of the maintenance queue, intended to be called
|
|
1016
|
+
* after a successful reconcile tick. Returns the drain summary, or
|
|
1017
|
+
* `{skipped: true, reason}` when inline mode is disabled or the worker
|
|
1018
|
+
* call throws (we never let maintenance failures crash the daemon).
|
|
1019
|
+
*
|
|
1020
|
+
* Exported so tests and the daemon main loop call the same code path.
|
|
1021
|
+
*
|
|
1022
|
+
* @param {{stateDir:string, env?:NodeJS.ProcessEnv}} ctx
|
|
1023
|
+
*/
|
|
1024
|
+
export async function drainMaintenanceInline(ctx) {
|
|
1025
|
+
const onProgress = typeof ctx.onProgress === 'function' ? ctx.onProgress : null;
|
|
1026
|
+
const env = ctx.env || process.env;
|
|
1027
|
+
if (!maintenanceInlineEnabled(env)) {
|
|
1028
|
+
return { skipped: true, reason: 'inline-disabled' };
|
|
1029
|
+
}
|
|
1030
|
+
let processMaintenanceQueue;
|
|
1031
|
+
let defaultMaintenanceHandlers;
|
|
1032
|
+
try {
|
|
1033
|
+
({ processMaintenanceQueue, defaultMaintenanceHandlers } = await import(
|
|
1034
|
+
'../incremental-indexing/application/maintenance-worker.mjs'
|
|
1035
|
+
));
|
|
1036
|
+
} catch (err) {
|
|
1037
|
+
log('WARN', `Maintenance worker import failed: ${err?.message ?? err}`);
|
|
1038
|
+
return { skipped: true, reason: 'import-failed' };
|
|
1039
|
+
}
|
|
1040
|
+
try {
|
|
1041
|
+
const summary = await processMaintenanceQueue(ctx.stateDir, {
|
|
1042
|
+
handlers: defaultMaintenanceHandlers(ctx.stateDir),
|
|
1043
|
+
maxJobs: maintenanceInlineMaxJobs(env),
|
|
1044
|
+
budgetMs: maintenanceInlineBudgetMs(env),
|
|
1045
|
+
maxAttempts: maintenanceInlineMaxAttempts(env),
|
|
1046
|
+
onProgress,
|
|
1047
|
+
});
|
|
1048
|
+
if (summary.seen > 0) {
|
|
1049
|
+
log('INFO',
|
|
1050
|
+
`Maintenance drain: seen=${summary.seen}, succeeded=${summary.succeeded}, ` +
|
|
1051
|
+
`deferred=${summary.deferred}, retried=${summary.retried}, ` +
|
|
1052
|
+
`deadLettered=${summary.deadLettered}, remaining=${summary.remaining}`);
|
|
1053
|
+
}
|
|
1054
|
+
return summary;
|
|
1055
|
+
} catch (err) {
|
|
1056
|
+
if (err instanceof MaintainerLifecycleAbort) throw err;
|
|
1057
|
+
log('WARN', `Maintenance drain failed (continuing reconcile): ${err?.message ?? err}`);
|
|
1058
|
+
return { skipped: true, reason: 'drain-error', error: err?.message ?? String(err) };
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
async function sleepWithProgress(totalMs, lockFile) {
|
|
1063
|
+
const deadline = Date.now() + totalMs;
|
|
1064
|
+
while (!shutdownRequested) {
|
|
1065
|
+
if (!stillOwnsLock(lockFile)) {
|
|
1066
|
+
throw new MaintainerLifecycleAbort('lock ownership lost during sleep');
|
|
1067
|
+
}
|
|
1068
|
+
const remaining = deadline - Date.now();
|
|
1069
|
+
if (remaining <= 0) return;
|
|
1070
|
+
await new Promise((resolveSleep) => setTimeout(resolveSleep, Math.min(LOCK_REFRESH_INTERVAL, remaining)));
|
|
1071
|
+
if (!shutdownRequested) createLifecycleProgress(lockFile)();
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
async function runReconcileV2Main({ runOnce, merkleOnce }) {
|
|
1076
|
+
const ctx = reconcileV2Context();
|
|
1077
|
+
mkdirSync(ctx.stateDir, { recursive: true });
|
|
1078
|
+
if (runOnce || merkleOnce) {
|
|
1079
|
+
await runReconcileV2Tick(ctx);
|
|
1080
|
+
await drainMaintenanceInline(ctx);
|
|
1081
|
+
return;
|
|
1082
|
+
}
|
|
1083
|
+
|
|
1084
|
+
const lock = await acquireStateLock(ctx.stateDir);
|
|
1085
|
+
if (!lock.acquired) {
|
|
1086
|
+
log('INFO', `Another reconcile v2 maintainer is running for ${ctx.stateDir}, exiting.`);
|
|
1087
|
+
return;
|
|
1088
|
+
}
|
|
1089
|
+
log('INFO', `Reconcile v2 lock acquired (PID: ${process.pid})`);
|
|
1090
|
+
|
|
1091
|
+
// Crash-orphan sweep. We hold the exclusive state lock, so any staging
|
|
1092
|
+
// temp left over (`*.tmp.<pid>`, `*.compacting.tmp`, `*.json.tmp`,
|
|
1093
|
+
// `*.bin.tmp`, `*.selfheal.tmp`) is from a writer that died before its
|
|
1094
|
+
// rename. Age-gated; never touches canonical artifacts, queues, WAL, or
|
|
1095
|
+
// the lockfile (see artifact-temp-sweep.mjs). Best-effort: never let
|
|
1096
|
+
// cleanup failure stop the daemon from starting.
|
|
1097
|
+
try {
|
|
1098
|
+
const sweep = sweepStaleArtifactTemps(ctx.stateDir, { maxAgeMs: tmpSweepMaxAgeMs() });
|
|
1099
|
+
if (sweep.removed > 0) {
|
|
1100
|
+
log('INFO', `Swept ${sweep.removed} orphaned staging temp(s) (${sweep.bytesReclaimed} bytes) from ${ctx.stateDir}`);
|
|
1101
|
+
}
|
|
1102
|
+
} catch (err) {
|
|
1103
|
+
log('WARN', `Artifact temp sweep failed (continuing startup): ${err?.message ?? err}`);
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
const resolved = resolveReconcileV2Interval();
|
|
1107
|
+
const intervalMs = resolved.intervalMs;
|
|
1108
|
+
log('INFO', `Reconcile v2 interval ${intervalMs}ms (source=${resolved.source}${resolved.tier ? `, tier=${resolved.tier}` : ''})`);
|
|
1109
|
+
// Lifecycle fix: only refresh the heartbeat if we still own the lock. If a
|
|
1110
|
+
// wedged-backstop takeover stole it, the lockfile now names another pid —
|
|
1111
|
+
// we must NOT clobber that successor with our pid. The main loop's
|
|
1112
|
+
// ownership check will end this maintainer at the next iteration.
|
|
1113
|
+
const refresh = setInterval(() => {
|
|
1114
|
+
if (stillOwnsLock(lock.lockFile)) writeStateLock(lock.lockFile);
|
|
1115
|
+
}, LOCK_REFRESH_INTERVAL);
|
|
1116
|
+
const shutdown = () => { shutdownRequested = true; };
|
|
1117
|
+
process.on('SIGTERM', shutdown);
|
|
1118
|
+
process.on('SIGINT', shutdown);
|
|
1119
|
+
process.on('exit', () => releaseStateLock(lock.lockFile));
|
|
1120
|
+
|
|
1121
|
+
try {
|
|
1122
|
+
while (!shutdownRequested) {
|
|
1123
|
+
// Lifecycle fix: bail out if the lock no longer names us. This is the
|
|
1124
|
+
// backstop that ensures any displacement path (wedged-takeover,
|
|
1125
|
+
// alive-but-stuck takeover, manual unlink) never leaves an immortal
|
|
1126
|
+
// twin maintainer behind — the displaced daemon self-exits at the
|
|
1127
|
+
// next loop iteration instead of looping forever as a PPID=1 orphan.
|
|
1128
|
+
// Checked BEFORE the tick (not after) so a displaced daemon never
|
|
1129
|
+
// starts new work while another maintainer owns the lock.
|
|
1130
|
+
if (!stillOwnsLock(lock.lockFile)) {
|
|
1131
|
+
log('WARN', `Lock no longer owned by pid=${process.pid}; another maintainer has taken over. Exiting cleanly.`);
|
|
1132
|
+
shutdownRequested = true;
|
|
1133
|
+
break;
|
|
1134
|
+
}
|
|
1135
|
+
// Lifecycle fix v2: progress checkpoint at the top of each iteration.
|
|
1136
|
+
// Combined with the post-tick / post-drain checkpoints below this lets
|
|
1137
|
+
// acquireStateLock distinguish a busy-but-progressing daemon from one
|
|
1138
|
+
// hung on a never-resolving await — see the WEDGED_KILL_GRACE_MS block.
|
|
1139
|
+
recordProgress(lock.lockFile);
|
|
1140
|
+
const pause = isReconcilePaused(ctx.stateDir);
|
|
1141
|
+
if (pause.paused) {
|
|
1142
|
+
log('INFO', `Automatic reconcile v2 work paused${pause.pausedAt ? ` since ${pause.pausedAt}` : ''}`);
|
|
1143
|
+
} else {
|
|
1144
|
+
try {
|
|
1145
|
+
const onProgress = createLifecycleProgress(lock.lockFile);
|
|
1146
|
+
await runReconcileV2Tick({ ...ctx, onProgress });
|
|
1147
|
+
onProgress('tick:post'); // post-tick checkpoint
|
|
1148
|
+
await drainMaintenanceInline({ ...ctx, onProgress });
|
|
1149
|
+
onProgress('drain:post'); // post-drain checkpoint
|
|
1150
|
+
} catch (err) {
|
|
1151
|
+
if (err instanceof MaintainerLifecycleAbort) {
|
|
1152
|
+
log('WARN', `Reconcile v2 lifecycle abort: ${err.message}. Cleaning up cancellation-orphaned temps and exiting cleanly.`);
|
|
1153
|
+
// Caveat-1 fix: on cancellation, immediately remove our own
|
|
1154
|
+
// staging temps so they don't sit on disk waiting for the next
|
|
1155
|
+
// daemon startup to sweep them. Safe to use maxAgeMs=0 here
|
|
1156
|
+
// because (a) sweepStaleArtifactTemps uses a strict allowlist
|
|
1157
|
+
// of staging-temp suffixes — it never touches canonical
|
|
1158
|
+
// artifacts, queues, WAL, or the lockfile — and (b) maintenance
|
|
1159
|
+
// handlers run sequentially via processMaintenanceQueue, so no
|
|
1160
|
+
// concurrent writer is mid-rename when this fires.
|
|
1161
|
+
try {
|
|
1162
|
+
const sweep = sweepStaleArtifactTemps(ctx.stateDir, { maxAgeMs: 0 });
|
|
1163
|
+
if (sweep.removed > 0) {
|
|
1164
|
+
log('INFO', `Cancellation cleanup swept ${sweep.removed} orphaned staging temp(s) (${sweep.bytesReclaimed} bytes)`);
|
|
1165
|
+
}
|
|
1166
|
+
} catch (sweepErr) {
|
|
1167
|
+
log('WARN', `Cancellation cleanup sweep failed (non-fatal): ${sweepErr?.message ?? sweepErr}`);
|
|
1168
|
+
}
|
|
1169
|
+
shutdownRequested = true;
|
|
1170
|
+
break;
|
|
1171
|
+
}
|
|
1172
|
+
log('ERROR', `Reconcile v2 tick failed: ${err?.message ?? err}`);
|
|
1173
|
+
}
|
|
1174
|
+
}
|
|
1175
|
+
await sleepWithProgress(intervalMs, lock.lockFile);
|
|
1176
|
+
}
|
|
1177
|
+
} finally {
|
|
1178
|
+
clearInterval(refresh);
|
|
1179
|
+
releaseStateLock(lock.lockFile);
|
|
1180
|
+
log('INFO', 'Reconcile v2 shutdown complete');
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
|
|
529
1184
|
/**
|
|
530
1185
|
* Normalize file path to project-relative format with cross-platform support.
|
|
531
1186
|
* Handles Windows paths (C:\Users\...), UNC paths (\\server\share), and converts
|
|
@@ -962,7 +1617,7 @@ function releaseGlobalIndexLock() {
|
|
|
962
1617
|
|
|
963
1618
|
/**
|
|
964
1619
|
* Perform merkle-state check for ALL file changes (internal + external).
|
|
965
|
-
* Uses mtime/size fast-path for efficiency (~0.1ms per unchanged file).
|
|
1620
|
+
* Uses mtime/size/inode fast-path for efficiency (~0.1ms per unchanged file).
|
|
966
1621
|
*
|
|
967
1622
|
* @returns {Promise<{checked: boolean, toIndex: string[], toRemove: string[], stats: Object}>}
|
|
968
1623
|
*/
|
|
@@ -972,7 +1627,7 @@ async function performMerkleCheck() {
|
|
|
972
1627
|
|
|
973
1628
|
try {
|
|
974
1629
|
// Dynamically import incremental tracker
|
|
975
|
-
const { getChangedFiles, updateState } = await import('
|
|
1630
|
+
const { getChangedFiles, updateState } = await import('./incremental-tracker.js');
|
|
976
1631
|
|
|
977
1632
|
// H3 FIX: Use dynamic loader with fallback paths
|
|
978
1633
|
const fg = await loadFastGlob();
|
|
@@ -987,7 +1642,7 @@ async function performMerkleCheck() {
|
|
|
987
1642
|
return { checked: true, toIndex: [], toRemove: [], stats: { totalFiles: 0 } };
|
|
988
1643
|
}
|
|
989
1644
|
|
|
990
|
-
// Use incremental tracker to detect changes (mtime/size fast-path)
|
|
1645
|
+
// Use incremental tracker to detect changes (mtime/size/inode fast-path)
|
|
991
1646
|
const { toIndex, toRemove, currentHashes, fastPathStats } = await getChangedFiles(allFiles, PROJECT_ROOT);
|
|
992
1647
|
|
|
993
1648
|
const duration = Date.now() - startTime;
|
|
@@ -1521,6 +2176,19 @@ async function main() {
|
|
|
1521
2176
|
|
|
1522
2177
|
// L1 FIX: Updated version to v3
|
|
1523
2178
|
log('INFO', 'Starting index maintainer daemon v3...');
|
|
2179
|
+
const v2 = reconcileV2Status();
|
|
2180
|
+
if (v2.enabled) {
|
|
2181
|
+
if (v2.source === 'env-enabled') {
|
|
2182
|
+
log('INFO', 'SWEET_SEARCH_RECONCILE_V2 enabled; using production Reconciler adapters');
|
|
2183
|
+
} else if (v2.source === 'env-enabled-permissive') {
|
|
2184
|
+
log('WARN', `SWEET_SEARCH_RECONCILE_V2="${v2.raw}" not recognized; treating as enabled (use 0/false/off to disable). Using production Reconciler adapters`);
|
|
2185
|
+
} else {
|
|
2186
|
+
log('INFO', 'Incremental reconcile v2 enabled by default (opt out with SWEET_SEARCH_RECONCILE_V2=0); using production Reconciler adapters');
|
|
2187
|
+
}
|
|
2188
|
+
await runReconcileV2Main({ runOnce, merkleOnce });
|
|
2189
|
+
return;
|
|
2190
|
+
}
|
|
2191
|
+
log('INFO', 'Incremental reconcile v2 disabled via SWEET_SEARCH_RECONCILE_V2; using legacy queue/merkle path');
|
|
1524
2192
|
|
|
1525
2193
|
// Ensure .sweet-search directory exists
|
|
1526
2194
|
ensureDataDir();
|
|
@@ -1580,6 +2248,23 @@ async function main() {
|
|
|
1580
2248
|
log('INFO', 'Lock released, goodbye.');
|
|
1581
2249
|
});
|
|
1582
2250
|
|
|
2251
|
+
let pauseLogged = false;
|
|
2252
|
+
const automaticWorkPaused = () => {
|
|
2253
|
+
const pause = isReconcilePaused();
|
|
2254
|
+
if (pause.paused) {
|
|
2255
|
+
if (!pauseLogged) {
|
|
2256
|
+
log('INFO', `Automatic reconcile work paused${pause.pausedAt ? ` since ${pause.pausedAt}` : ''}`);
|
|
2257
|
+
pauseLogged = true;
|
|
2258
|
+
}
|
|
2259
|
+
return true;
|
|
2260
|
+
}
|
|
2261
|
+
if (pauseLogged) {
|
|
2262
|
+
log('INFO', 'Automatic reconcile work resumed');
|
|
2263
|
+
pauseLogged = false;
|
|
2264
|
+
}
|
|
2265
|
+
return false;
|
|
2266
|
+
};
|
|
2267
|
+
|
|
1583
2268
|
// Refresh lock periodically to prevent stale detection
|
|
1584
2269
|
const lockRefreshInterval = setInterval(() => {
|
|
1585
2270
|
if (!shutdownRequested) {
|
|
@@ -1600,6 +2285,7 @@ async function main() {
|
|
|
1600
2285
|
try {
|
|
1601
2286
|
if (shutdownRequested) return;
|
|
1602
2287
|
startupTimeout = null; // Clear reference after execution
|
|
2288
|
+
if (automaticWorkPaused()) return;
|
|
1603
2289
|
log('INFO', `Running deferred first merkle check (after ${STARTUP_DELAY}ms delay)...`);
|
|
1604
2290
|
await runMerkleCheckAndIndex();
|
|
1605
2291
|
} catch (err) {
|
|
@@ -1614,6 +2300,7 @@ async function main() {
|
|
|
1614
2300
|
const merkleCheckInterval = setInterval(async () => {
|
|
1615
2301
|
try {
|
|
1616
2302
|
if (shutdownRequested) return;
|
|
2303
|
+
if (automaticWorkPaused()) return;
|
|
1617
2304
|
log('INFO', 'Running periodic merkle check...');
|
|
1618
2305
|
await runMerkleCheckAndIndex();
|
|
1619
2306
|
} catch (err) {
|
|
@@ -1626,6 +2313,11 @@ async function main() {
|
|
|
1626
2313
|
let consecutiveEmptyPolls = 0;
|
|
1627
2314
|
|
|
1628
2315
|
while (!shutdownRequested) {
|
|
2316
|
+
if (automaticWorkPaused()) {
|
|
2317
|
+
await new Promise(r => setTimeout(r, POLL_INTERVAL));
|
|
2318
|
+
continue;
|
|
2319
|
+
}
|
|
2320
|
+
|
|
1629
2321
|
// H5 FIX: Atomic queue check and process (prevents race between peek and acquire)
|
|
1630
2322
|
const result = await atomicCheckAndProcessQueue({ dryRun });
|
|
1631
2323
|
|