sweet-search 2.5.2 → 2.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/cli.js +24 -3
- package/core/graph/graph-expansion.js +215 -36
- package/core/graph/graph-extractor.js +196 -11
- package/core/graph/graph-search.js +395 -92
- package/core/graph/hcgs-generator.js +2 -1
- package/core/graph/index.js +2 -0
- package/core/graph/repo-map.js +28 -6
- package/core/graph/structural-answer-cues.js +168 -0
- package/core/graph/structural-callsite-hints.js +40 -0
- package/core/graph/structural-context-format.js +40 -0
- package/core/graph/structural-context.js +450 -0
- package/core/graph/structural-forward-push.js +156 -0
- package/core/graph/structural-header-context.js +19 -0
- package/core/graph/structural-importance.js +148 -0
- package/core/graph/structural-pagerank.js +197 -0
- package/core/graph/summary-manager.js +13 -9
- package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
- package/core/incremental-indexing/application/file-watcher.mjs +197 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
- package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
- package/core/incremental-indexing/application/operator-cli.mjs +554 -0
- package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
- package/core/incremental-indexing/application/reconciler.mjs +477 -0
- package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
- package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
- package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
- package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
- package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
- package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
- package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
- package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
- package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
- package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
- package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
- package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
- package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
- package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
- package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
- package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
- package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
- package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
- package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
- package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
- package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
- package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
- package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
- package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
- package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
- package/core/indexing/admission-policy.js +139 -0
- package/core/indexing/artifact-builder.js +29 -12
- package/core/indexing/ast-chunker.js +107 -30
- package/core/indexing/dedup/exemplar-selector.js +19 -1
- package/core/indexing/gitignore-filter.js +223 -0
- package/core/indexing/incremental-tracker.js +99 -30
- package/core/indexing/index-codebase-v21.js +6 -5
- package/core/indexing/index-maintainer.mjs +698 -6
- package/core/indexing/indexer-ann.js +99 -15
- package/core/indexing/indexer-build.js +158 -45
- package/core/indexing/indexer-empty-baseline.js +80 -0
- package/core/indexing/indexer-manifest.js +66 -0
- package/core/indexing/indexer-phases.js +56 -23
- package/core/indexing/indexer-sparse-gram.js +54 -13
- package/core/indexing/indexer-utils.js +26 -208
- package/core/indexing/indexing-file-policy.js +32 -7
- package/core/indexing/maintainer-launcher.mjs +137 -0
- package/core/indexing/merkle-tracker.js +251 -244
- package/core/indexing/model-pool.js +46 -5
- package/core/infrastructure/code-graph-repository.js +758 -6
- package/core/infrastructure/code-graph-visibility.js +157 -0
- package/core/infrastructure/codebase-repository.js +100 -13
- package/core/infrastructure/config/search.js +1 -1
- package/core/infrastructure/db-utils.js +118 -0
- package/core/infrastructure/dedup-hashing.js +10 -13
- package/core/infrastructure/hardware-capability.js +17 -7
- package/core/infrastructure/index.js +8 -2
- package/core/infrastructure/language-patterns/maps.js +4 -1
- package/core/infrastructure/language-patterns/registry-core.js +56 -17
- package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
- package/core/infrastructure/language-patterns.js +69 -0
- package/core/infrastructure/model-registry.js +20 -0
- package/core/infrastructure/native-inference.js +7 -12
- package/core/infrastructure/native-resolver.js +52 -37
- package/core/infrastructure/native-sparse-gram.js +261 -20
- package/core/infrastructure/native-tokenizer.js +6 -15
- package/core/infrastructure/simd-distance.js +10 -16
- package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
- package/core/infrastructure/structural-alias-resolver.js +122 -0
- package/core/infrastructure/structural-candidate-ranker.js +34 -0
- package/core/infrastructure/structural-context-repository.js +472 -0
- package/core/infrastructure/structural-context-utils.js +51 -0
- package/core/infrastructure/structural-graph-signals.js +121 -0
- package/core/infrastructure/structural-qualified-resolution.js +15 -0
- package/core/infrastructure/structural-source-definitions.js +100 -0
- package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
- package/core/infrastructure/tree-sitter-provider.js +811 -37
- package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
- package/core/query/query-router.js +55 -5
- package/core/ranking/file-kind-ranking.js +2192 -15
- package/core/ranking/late-interaction-index.js +87 -12
- package/core/search/cli-decoration.js +290 -0
- package/core/search/context-expander.js +988 -78
- package/core/search/index.js +1 -0
- package/core/search/output-policy.js +275 -0
- package/core/search/search-anchor.js +499 -0
- package/core/search/search-boost.js +93 -1
- package/core/search/search-cli.js +61 -204
- package/core/search/search-hybrid.js +250 -10
- package/core/search/search-pattern-chunks.js +57 -8
- package/core/search/search-pattern-planner.js +68 -9
- package/core/search/search-pattern-prefilter.js +30 -10
- package/core/search/search-pattern-ripgrep.js +40 -4
- package/core/search/search-pattern-sparse-overlay.js +256 -0
- package/core/search/search-pattern.js +117 -29
- package/core/search/search-postprocess.js +479 -5
- package/core/search/search-read-semantic.js +260 -23
- package/core/search/search-read.js +82 -64
- package/core/search/search-reader-pin.js +71 -0
- package/core/search/search-rrf.js +279 -0
- package/core/search/search-semantic.js +110 -5
- package/core/search/search-server.js +130 -57
- package/core/search/search-trace.js +107 -0
- package/core/search/server-identity.js +93 -0
- package/core/search/session-daemon-prewarm.mjs +33 -10
- package/core/search/sweet-search.js +399 -7
- package/core/skills/sweet-index/SKILL.md +8 -6
- package/core/vector-store/binary-hnsw-index.js +194 -30
- package/core/vector-store/float-vector-store.js +96 -6
- package/core/vector-store/hnsw-index.js +220 -49
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
- package/eval/agent-read-workflows/bin/ss-find +15 -0
- package/eval/agent-read-workflows/bin/ss-grep +12 -0
- package/eval/agent-read-workflows/bin/ss-read +14 -0
- package/eval/agent-read-workflows/bin/ss-search +18 -0
- package/eval/agent-read-workflows/bin/ss-semantic +12 -0
- package/eval/agent-read-workflows/bin/ss-trace +11 -0
- package/mcp/read-tool.js +109 -0
- package/mcp/server.js +55 -15
- package/mcp/tool-handlers.js +14 -124
- package/mcp/trace-tool.js +81 -0
- package/package.json +25 -10
- package/scripts/hooks/intercept-read.mjs +55 -0
- package/scripts/hooks/remind-tools.mjs +40 -0
- package/scripts/init.js +698 -54
- package/scripts/inject-agent-instructions.js +431 -0
- package/scripts/install-prompt-reminders.js +188 -0
- package/scripts/install-tool-enforcement.js +220 -0
- package/scripts/smoke-test.js +12 -9
- package/scripts/uninstall.js +276 -18
- package/scripts/write-claude-rules.js +110 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import { DB_PATHS } from '../infrastructure/config/index.js';
|
|
3
|
+
import {
|
|
4
|
+
buildNextManifest,
|
|
5
|
+
readManifest,
|
|
6
|
+
writeManifest,
|
|
7
|
+
zeroManifest,
|
|
8
|
+
} from '../incremental-indexing/infrastructure/manifest.mjs';
|
|
9
|
+
import { FALLBACK_WEIGHTS_ID } from '../incremental-indexing/infrastructure/sparse-gram-delta.mjs';
|
|
10
|
+
|
|
11
|
+
function basename(filePath) {
|
|
12
|
+
return path.basename(filePath);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function sparseGramWeightsIdFromResult(result) {
|
|
16
|
+
if (typeof result?.weightsId === 'string' && result.weightsId) return result.weightsId;
|
|
17
|
+
if (typeof result?.weights_id === 'string' && result.weights_id) return result.weights_id;
|
|
18
|
+
if (result?.usedFallbackWeights) return FALLBACK_WEIGHTS_ID;
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function defaultIndexerManifestPaths() {
|
|
23
|
+
const liBase = basename(DB_PATHS.lateInteraction);
|
|
24
|
+
return {
|
|
25
|
+
codeGraph: basename(DB_PATHS.codeGraph),
|
|
26
|
+
vectors: basename(DB_PATHS.codebase),
|
|
27
|
+
hnsw: basename(DB_PATHS.hnswIndex),
|
|
28
|
+
hnswStale: basename(DB_PATHS.hnswIndex) + '.stale.bin',
|
|
29
|
+
binaryHnsw: basename(DB_PATHS.binaryHnswIndex),
|
|
30
|
+
liManifest: `${liBase}.segments/manifest.json`,
|
|
31
|
+
sparseBase: basename(DB_PATHS.sparseGramIndex),
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function defaultIndexerStateDir() {
|
|
36
|
+
return path.dirname(DB_PATHS.codebase);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function publishIndexerManifest(options = {}) {
|
|
40
|
+
const stateDir = options.stateDir || defaultIndexerStateDir();
|
|
41
|
+
const defaultManifest = zeroManifest(defaultIndexerManifestPaths());
|
|
42
|
+
const previous = readManifest(stateDir) || defaultManifest;
|
|
43
|
+
const epoch = Number.isInteger(options.epoch) ? options.epoch : (previous.epoch ?? 0) + 1;
|
|
44
|
+
const sparseWeightsId = sparseGramWeightsIdFromResult(options.sparseGramResult);
|
|
45
|
+
const defaultTiers = {
|
|
46
|
+
codeGraph: defaultManifest.codeGraph,
|
|
47
|
+
vectors: defaultManifest.vectors,
|
|
48
|
+
hnsw: defaultManifest.hnsw,
|
|
49
|
+
binaryHnsw: defaultManifest.binaryHnsw,
|
|
50
|
+
lateInteraction: defaultManifest.lateInteraction,
|
|
51
|
+
sparseGram: {
|
|
52
|
+
...defaultManifest.sparseGram,
|
|
53
|
+
...(sparseWeightsId ? { weightsId: sparseWeightsId } : {}),
|
|
54
|
+
},
|
|
55
|
+
};
|
|
56
|
+
const tiers = {};
|
|
57
|
+
for (const [tier, descriptor] of Object.entries(defaultTiers)) {
|
|
58
|
+
tiers[tier] = { ...descriptor, ...(options.tiers?.[tier] || {}) };
|
|
59
|
+
}
|
|
60
|
+
const manifest = buildNextManifest(previous, {
|
|
61
|
+
epoch,
|
|
62
|
+
tiers,
|
|
63
|
+
});
|
|
64
|
+
writeManifest(stateDir, manifest);
|
|
65
|
+
return manifest;
|
|
66
|
+
}
|
|
@@ -16,12 +16,14 @@ import { runDedupPhase, formatDedupSummary } from './dedup/dedup-phase.js';
|
|
|
16
16
|
import { DEDUP_CONFIG } from '../infrastructure/config/index.js';
|
|
17
17
|
import { incrementalUpdateHNSW, buildHNSWIndex, buildLateInteractionIndex, buildQuantizedArtifactsPhase } from './indexer-ann.js';
|
|
18
18
|
import { buildSparseGramArtifact } from './indexer-sparse-gram.js';
|
|
19
|
+
import { publishIndexerManifest } from './indexer-manifest.js';
|
|
20
|
+
import { contentHashSync } from '../incremental-indexing/infrastructure/hashing.mjs';
|
|
19
21
|
import {
|
|
20
22
|
configureLocalModelRuntime,
|
|
21
23
|
resetLocalModelRuntime,
|
|
22
24
|
} from '../embedding/embedding-local-model.js';
|
|
23
25
|
import { isNativeInferenceAvailable } from '../infrastructure/native-inference.js';
|
|
24
|
-
import { teardownAllModels, initIndexGpuPool, teardownIndexGpuPool, warmupQueryCpuModels, GPU_ARMING_MIN_FILES } from './model-pool.js';
|
|
26
|
+
import { teardownAllModels, initIndexGpuPool, teardownIndexGpuPool, warmupQueryCpuModels, GPU_ARMING_MIN_FILES, isIndexAcceleratorAvailable } from './model-pool.js';
|
|
25
27
|
import {
|
|
26
28
|
configureLateInteractionRuntime,
|
|
27
29
|
resetLateInteractionRuntime,
|
|
@@ -423,34 +425,47 @@ export async function buildVectorsAndArtifactsPhase(options = {}) {
|
|
|
423
425
|
}
|
|
424
426
|
|
|
425
427
|
// The embedding worker pool uses ORT INT8 CPU in each worker. It must only
|
|
426
|
-
// be active when the
|
|
428
|
+
// be active when the index-time encoder ALSO uses ORT INT8 CPU, otherwise
|
|
427
429
|
// the stored index and the query vectors live in different embedding spaces
|
|
428
|
-
// (gencodesearchnet 83% → 58% MRR regression
|
|
430
|
+
// (gencodesearchnet 83% → 58% MRR regression — queries are always ORT INT8
|
|
431
|
+
// CPU). Three cases where index-time embed is ORT INT8 CPU:
|
|
429
432
|
// 1. Native inference isn't available at all (pre-native hosts).
|
|
430
|
-
// 2.
|
|
433
|
+
// 2. No usable accelerator (Metal/CoreML/CUDA) — even if the native addon
|
|
434
|
+
// is installed, the native model is never loaded on a no-accelerator
|
|
435
|
+
// host (see model-pool.initIndexGpuPool), so embed dispatch falls to
|
|
436
|
+
// ORT INT8. Running the pool here makes that path multi-threaded
|
|
437
|
+
// instead of inline.
|
|
438
|
+
// 3. SWEET_SEARCH_EMBED_USE_CPU=1 — the user opted into CPU embed on
|
|
431
439
|
// both sides (index + query), so pool ORT embed matches dispatcher
|
|
432
|
-
// ORT embed. This is the "ORT embed on CPU ‖ native LI on
|
|
440
|
+
// ORT embed. This is the "ORT embed on CPU ‖ native LI on accelerator"
|
|
433
441
|
// pipeline that maximises index throughput by running embed and LI
|
|
434
442
|
// on different devices.
|
|
435
443
|
//
|
|
436
444
|
// The historical `!shouldParallelLI` gate existed for the all-CPU era where
|
|
437
445
|
// pool workers and parallel LI both wanted CPU and fought. In the CPU-embed
|
|
438
|
-
// +
|
|
439
|
-
// cores, the main thread drives
|
|
440
|
-
// contention. So when `SWEET_SEARCH_EMBED_USE_CPU=1`
|
|
441
|
-
// let the pool run alongside
|
|
446
|
+
// + accelerator-LI world, that conflict goes away — pool workers do ORT on
|
|
447
|
+
// CPU cores, the main thread drives accelerator LI dispatches (negligible
|
|
448
|
+
// CPU), no contention. So when `SWEET_SEARCH_EMBED_USE_CPU=1` (and LI is on
|
|
449
|
+
// a real accelerator) we lift the gate and let the pool run alongside
|
|
450
|
+
// parallel LI. On a no-accelerator host LI is also on ORT CPU, so the gate
|
|
451
|
+
// stays in force and pool + parallel LI take turns rather than contend.
|
|
442
452
|
const forceEmbedCpu = process.env.SWEET_SEARCH_EMBED_USE_CPU === '1';
|
|
443
|
-
const
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
453
|
+
const indexTimeEmbedIsCpu = !isNativeInferenceAvailable()
|
|
454
|
+
|| !isIndexAcceleratorAvailable()
|
|
455
|
+
|| forceEmbedCpu;
|
|
456
|
+
// LI runs on a native accelerator only when one is actually armed. When it
|
|
457
|
+
// is, pool + parallelLI is safe — the LI driver is just dispatching GPU
|
|
458
|
+
// commands, not competing for CPU cores.
|
|
459
|
+
const liOnAccelerator = isNativeInferenceAvailable()
|
|
460
|
+
&& isIndexAcceleratorAvailable()
|
|
461
|
+
&& !noLateInteraction;
|
|
462
|
+
const allowPoolWithParallelLi = forceEmbedCpu && liOnAccelerator;
|
|
448
463
|
const useEmbeddingPool = !dryRun
|
|
449
464
|
&& filesToIndex.length > 0
|
|
450
465
|
&& EMBEDDING_CONFIG.provider === 'local'
|
|
451
466
|
&& resourcePlan.useWorkerPool
|
|
452
467
|
&& (!shouldParallelLI || allowPoolWithParallelLi)
|
|
453
|
-
&&
|
|
468
|
+
&& indexTimeEmbedIsCpu;
|
|
454
469
|
|
|
455
470
|
if (!dryRun && EMBEDDING_CONFIG.provider === 'local' && filesToIndex.length > 0) {
|
|
456
471
|
configureLocalModelRuntime({ intraOpThreads: embeddingThreads });
|
|
@@ -500,15 +515,26 @@ export async function buildVectorsAndArtifactsPhase(options = {}) {
|
|
|
500
515
|
// run a dummy forward pass to compile Metal pipelines / CoreML variants
|
|
501
516
|
// / BLAS threads.
|
|
502
517
|
//
|
|
518
|
+
// No-accelerator skip: a host with no usable Metal / CoreML / CUDA
|
|
519
|
+
// accelerator indexes on the optimized ORT INT8 CPU path and never arms
|
|
520
|
+
// candle/native. `isIndexAcceleratorAvailable()` gates this even when the
|
|
521
|
+
// optional native addon is installed (e.g. Linux + the CUDA package but a
|
|
522
|
+
// failed/absent CUDA runtime, or SWEET_SEARCH_CUDA=0) — the JS layer is the
|
|
523
|
+
// authoritative selector; we never lean on Rust degrading loadWithDevice()
|
|
524
|
+
// to CPU. Skipping arming also skips the teardown/CPU-rewarm lifecycle in
|
|
525
|
+
// the `finally` below, so a CPU-only full reindex simply runs on ORT CPU.
|
|
526
|
+
//
|
|
503
527
|
// Small-changeset skip: incremental runs with fewer than
|
|
504
528
|
// GPU_ARMING_MIN_FILES files keep the ORT CPU path. The GPU load +
|
|
505
529
|
// warmup + teardown + CPU rewarm round-trip costs 5–15s on M3 class
|
|
506
530
|
// hardware and would dwarf the actual work (<1s per file on CPU).
|
|
507
|
-
// Full reindex always arms the GPU regardless of file count
|
|
531
|
+
// Full reindex always arms the GPU regardless of file count — but only
|
|
532
|
+
// when an accelerator exists.
|
|
508
533
|
const shouldArmGpu = !dryRun
|
|
509
534
|
&& filesToIndex.length > 0
|
|
510
535
|
&& EMBEDDING_CONFIG.provider === 'local'
|
|
511
536
|
&& isNativeInferenceAvailable()
|
|
537
|
+
&& isIndexAcceleratorAvailable()
|
|
512
538
|
&& (fullReindex || filesToIndex.length >= GPU_ARMING_MIN_FILES);
|
|
513
539
|
|
|
514
540
|
if (shouldArmGpu) {
|
|
@@ -531,6 +557,8 @@ export async function buildVectorsAndArtifactsPhase(options = {}) {
|
|
|
531
557
|
}
|
|
532
558
|
} else if (!dryRun && filesToIndex.length > 0 && filesToIndex.length < GPU_ARMING_MIN_FILES) {
|
|
533
559
|
log(`Small changeset (${filesToIndex.length} < ${GPU_ARMING_MIN_FILES} files) — using ORT CPU`, 'dim');
|
|
560
|
+
} else if (!dryRun && filesToIndex.length > 0 && !isIndexAcceleratorAvailable()) {
|
|
561
|
+
log('No inference accelerator detected — indexing on ORT INT8 CPU', 'dim');
|
|
534
562
|
}
|
|
535
563
|
|
|
536
564
|
try {
|
|
@@ -696,7 +724,7 @@ export async function buildVectorsAndArtifactsPhase(options = {}) {
|
|
|
696
724
|
}
|
|
697
725
|
|
|
698
726
|
export async function updateIncrementalStatePhase(options = {}) {
|
|
699
|
-
const { dryRun, fullReindex, incrementalInfo, allFiles, vectorStats, graphStats } = options;
|
|
727
|
+
const { dryRun, fullReindex, incrementalInfo, allFiles, vectorStats, graphStats, manifestStateDir, sparseGramResult } = options;
|
|
700
728
|
|
|
701
729
|
if (dryRun) return;
|
|
702
730
|
|
|
@@ -709,18 +737,18 @@ export async function updateIncrementalStatePhase(options = {}) {
|
|
|
709
737
|
log('\nIncremental state updated', 'green');
|
|
710
738
|
} else if (fullReindex) {
|
|
711
739
|
const hashes = {};
|
|
712
|
-
const crypto = await import('crypto');
|
|
713
740
|
for (const file of allFiles) {
|
|
714
741
|
try {
|
|
715
742
|
const fullPath = path.join(PROJECT_ROOT, file);
|
|
716
743
|
const [content, stat] = await Promise.all([
|
|
717
|
-
fs.readFile(fullPath
|
|
718
|
-
fs.stat(fullPath).catch(() => null),
|
|
744
|
+
fs.readFile(fullPath),
|
|
745
|
+
fs.stat(fullPath, { bigint: true }).catch(() => null),
|
|
719
746
|
]);
|
|
720
747
|
hashes[file] = {
|
|
721
|
-
hash:
|
|
722
|
-
size: stat
|
|
723
|
-
mtime_ns: stat ?
|
|
748
|
+
hash: contentHashSync(content),
|
|
749
|
+
size: stat ? stat.size.toString() : null,
|
|
750
|
+
mtime_ns: stat ? stat.mtimeNs.toString() : null,
|
|
751
|
+
inode: stat ? stat.ino.toString() : null,
|
|
724
752
|
};
|
|
725
753
|
} catch (e) { /* skip */ }
|
|
726
754
|
}
|
|
@@ -731,6 +759,11 @@ export async function updateIncrementalStatePhase(options = {}) {
|
|
|
731
759
|
});
|
|
732
760
|
log('\nIncremental state saved', 'green');
|
|
733
761
|
}
|
|
762
|
+
publishIndexerManifest({
|
|
763
|
+
...(manifestStateDir ? { stateDir: manifestStateDir } : {}),
|
|
764
|
+
...(sparseGramResult ? { sparseGramResult } : {}),
|
|
765
|
+
});
|
|
766
|
+
log('Reconcile manifest published', 'green');
|
|
734
767
|
}
|
|
735
768
|
|
|
736
769
|
export function printSummaryPhase(options) {
|
|
@@ -9,6 +9,8 @@ import {
|
|
|
9
9
|
hasNativeSparseGramSupport,
|
|
10
10
|
resolveSparseSymbolMask,
|
|
11
11
|
} from '../infrastructure/native-sparse-gram.js';
|
|
12
|
+
import { contentHash } from '../incremental-indexing/infrastructure/hashing.mjs';
|
|
13
|
+
import { FALLBACK_WEIGHTS_ID } from '../incremental-indexing/infrastructure/sparse-gram-delta.mjs';
|
|
12
14
|
import { atomicSwapDatabase, log } from './indexer-utils.js';
|
|
13
15
|
|
|
14
16
|
async function unlinkIfExists(filePath) {
|
|
@@ -28,18 +30,7 @@ async function collectFileSymbolMasks(codeFiles) {
|
|
|
28
30
|
const db = new Database(DB_PATHS.codebase, { readonly: true });
|
|
29
31
|
|
|
30
32
|
try {
|
|
31
|
-
|
|
32
|
-
for (const row of rows) {
|
|
33
|
-
if (!masks.has(row.file_path)) continue;
|
|
34
|
-
try {
|
|
35
|
-
const metadata = JSON.parse(row.metadata || '{}');
|
|
36
|
-
const typeMask = resolveSparseSymbolMask(metadata.type);
|
|
37
|
-
if (typeMask === 0) continue;
|
|
38
|
-
masks.set(row.file_path, masks.get(row.file_path) | typeMask);
|
|
39
|
-
} catch {
|
|
40
|
-
// Ignore malformed metadata rows; sparse gram build is best effort.
|
|
41
|
-
}
|
|
42
|
-
}
|
|
33
|
+
collectFileSymbolMasksFromDb(db, codeFiles, masks);
|
|
43
34
|
} finally {
|
|
44
35
|
db.close();
|
|
45
36
|
}
|
|
@@ -47,6 +38,48 @@ async function collectFileSymbolMasks(codeFiles) {
|
|
|
47
38
|
return codeFiles.map((filePath) => masks.get(filePath) || 0);
|
|
48
39
|
}
|
|
49
40
|
|
|
41
|
+
async function resolveSparseGramWeightsId(result, artifactPath) {
|
|
42
|
+
if (typeof result?.weightsId === 'string' && result.weightsId) {
|
|
43
|
+
return result.weightsId;
|
|
44
|
+
}
|
|
45
|
+
if (typeof result?.weights_id === 'string' && result.weights_id) {
|
|
46
|
+
return result.weights_id;
|
|
47
|
+
}
|
|
48
|
+
if (result?.usedFallbackWeights) return FALLBACK_WEIGHTS_ID;
|
|
49
|
+
const artifactBytes = await fs.readFile(artifactPath);
|
|
50
|
+
return `corpus-bigram-v1-${await contentHash(artifactBytes)}`;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function collectFileSymbolMasksFromDb(db, codeFiles, existingMasks = null) {
|
|
54
|
+
const masks = existingMasks || new Map(codeFiles.map((filePath) => [filePath, 0]));
|
|
55
|
+
const liveSql = liveVectorSql(db);
|
|
56
|
+
const rows = db.prepare(`SELECT file_path, metadata FROM vectors WHERE ${liveSql}`).iterate();
|
|
57
|
+
for (const row of rows) {
|
|
58
|
+
if (!masks.has(row.file_path)) continue;
|
|
59
|
+
try {
|
|
60
|
+
const metadata = JSON.parse(row.metadata || '{}');
|
|
61
|
+
const typeMask = resolveSparseSymbolMask(metadata.type);
|
|
62
|
+
if (typeMask === 0) continue;
|
|
63
|
+
masks.set(row.file_path, masks.get(row.file_path) | typeMask);
|
|
64
|
+
} catch {
|
|
65
|
+
// Ignore malformed metadata rows; sparse gram build is best effort.
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return codeFiles.map((filePath) => masks.get(filePath) || 0);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function hasVectorColumn(db, column) {
|
|
72
|
+
try {
|
|
73
|
+
return db.prepare('PRAGMA table_info(vectors)').all().some((col) => col.name === column);
|
|
74
|
+
} catch (_err) {
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function liveVectorSql(db) {
|
|
80
|
+
return hasVectorColumn(db, 'epoch_retired') ? 'epoch_retired IS NULL' : '1=1';
|
|
81
|
+
}
|
|
82
|
+
|
|
50
83
|
export async function buildSparseGramArtifact(allFiles, dryRun) {
|
|
51
84
|
if (dryRun) {
|
|
52
85
|
log('DRY RUN: Skipping sparse gram artifact build', 'magenta');
|
|
@@ -85,14 +118,22 @@ export async function buildSparseGramArtifact(allFiles, dryRun) {
|
|
|
85
118
|
fileSymbolMasks,
|
|
86
119
|
outputPath: stagedPath,
|
|
87
120
|
});
|
|
121
|
+
const weightsId = await resolveSparseGramWeightsId(result, stagedPath);
|
|
88
122
|
await atomicSwapDatabase(stagedPath, DB_PATHS.sparseGramIndex);
|
|
89
123
|
log(
|
|
90
124
|
`Sparse gram artifact promoted (${result.filesIndexed} files, ${result.grams} grams, ${result.postings} postings)`,
|
|
91
125
|
'green'
|
|
92
126
|
);
|
|
93
|
-
return result;
|
|
127
|
+
return { ...result, weightsId };
|
|
94
128
|
} catch (err) {
|
|
95
129
|
await unlinkIfExists(stagedPath);
|
|
96
130
|
throw err;
|
|
97
131
|
}
|
|
98
132
|
}
|
|
133
|
+
|
|
134
|
+
export const __TEST__ = {
|
|
135
|
+
collectFileSymbolMasks,
|
|
136
|
+
collectFileSymbolMasksFromDb,
|
|
137
|
+
liveVectorSql,
|
|
138
|
+
resolveSparseGramWeightsId,
|
|
139
|
+
};
|
|
@@ -5,11 +5,21 @@
|
|
|
5
5
|
|
|
6
6
|
import fs from 'fs/promises';
|
|
7
7
|
import { existsSync } from 'fs';
|
|
8
|
-
import { spawn } from 'child_process';
|
|
9
8
|
import path from 'path';
|
|
10
9
|
import fg from 'fast-glob';
|
|
11
10
|
|
|
12
|
-
import { PROJECT_ROOT, setQuietMode as setGlobalQuietMode
|
|
11
|
+
import { PROJECT_ROOT, setQuietMode as setGlobalQuietMode } from '../infrastructure/config/index.js';
|
|
12
|
+
import { createAdmissionPolicy } from './admission-policy.js';
|
|
13
|
+
|
|
14
|
+
// `.gitignore` alignment now lives in gitignore-filter.js (shared with the
|
|
15
|
+
// incremental admission policy). Re-exported here so existing
|
|
16
|
+
// `import { ... } from indexer-utils` / barrel call sites keep working.
|
|
17
|
+
export {
|
|
18
|
+
toPosixPath,
|
|
19
|
+
isGitignoreAllowlistedAgenticPath,
|
|
20
|
+
getGitIgnoredPathSet,
|
|
21
|
+
applyGitignoreAlignment,
|
|
22
|
+
} from './gitignore-filter.js';
|
|
13
23
|
|
|
14
24
|
const glob = fg.glob || fg;
|
|
15
25
|
|
|
@@ -205,10 +215,6 @@ export function stripWslUncPrefix(filePath) {
|
|
|
205
215
|
return filePath;
|
|
206
216
|
}
|
|
207
217
|
|
|
208
|
-
export function toPosixPath(filePath) {
|
|
209
|
-
return filePath.replace(/\\/g, '/');
|
|
210
|
-
}
|
|
211
|
-
|
|
212
218
|
export async function readFilesFromStdin() {
|
|
213
219
|
return new Promise((resolve, reject) => {
|
|
214
220
|
let data = '';
|
|
@@ -278,201 +284,6 @@ export async function readFilesFromStdin() {
|
|
|
278
284
|
});
|
|
279
285
|
}
|
|
280
286
|
|
|
281
|
-
// =============================================================================
|
|
282
|
-
// GITIGNORE ALIGNMENT
|
|
283
|
-
// =============================================================================
|
|
284
|
-
|
|
285
|
-
export function isGitignoreAllowlistedAgenticPath(relativePath) {
|
|
286
|
-
const normalized = toPosixPath(relativePath).replace(/^\.\//, '');
|
|
287
|
-
const basename = path.posix.basename(normalized);
|
|
288
|
-
|
|
289
|
-
if (AGENTIC_GITIGNORE_ALLOWLIST.files.includes(basename)) {
|
|
290
|
-
return true;
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
if (AGENTIC_GITIGNORE_ALLOWLIST.filePrefixes.some(prefix => basename.startsWith(prefix))) {
|
|
294
|
-
return true;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
return AGENTIC_GITIGNORE_ALLOWLIST.directories.some(dirPrefix =>
|
|
298
|
-
normalized.startsWith(dirPrefix) || normalized.includes(`/${dirPrefix}`)
|
|
299
|
-
);
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
/**
|
|
303
|
-
* Run `git check-ignore` on a single batch of paths.
|
|
304
|
-
* Returns a Set of ignored paths, or null on fatal error.
|
|
305
|
-
*/
|
|
306
|
-
function checkIgnoreBatch(batch, reportError) {
|
|
307
|
-
return new Promise((resolve) => {
|
|
308
|
-
const ignoredChunks = [];
|
|
309
|
-
let settled = false;
|
|
310
|
-
|
|
311
|
-
const git = spawn('git', ['check-ignore', '-z', '--stdin'], { cwd: PROJECT_ROOT });
|
|
312
|
-
|
|
313
|
-
git.stdout.on('data', chunk => ignoredChunks.push(chunk));
|
|
314
|
-
git.stderr.on('data', () => {}); // Suppress — batched caller handles partial failures
|
|
315
|
-
|
|
316
|
-
git.on('error', (err) => {
|
|
317
|
-
if (settled) return;
|
|
318
|
-
settled = true;
|
|
319
|
-
reportError(`WARN: Unable to run git check-ignore (${err.message})`);
|
|
320
|
-
resolve(null);
|
|
321
|
-
});
|
|
322
|
-
|
|
323
|
-
git.on('close', (code) => {
|
|
324
|
-
if (settled) return;
|
|
325
|
-
settled = true;
|
|
326
|
-
|
|
327
|
-
// code 0 = some ignored, code 1 = none ignored, both valid.
|
|
328
|
-
// code 128 = fatal (e.g. path beyond symlink) — still use partial stdout.
|
|
329
|
-
if (code !== 0 && code !== 1 && ignoredChunks.length === 0) {
|
|
330
|
-
resolve(null);
|
|
331
|
-
return;
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
const ignored = Buffer.concat(ignoredChunks)
|
|
335
|
-
.toString('utf8')
|
|
336
|
-
.split('\0')
|
|
337
|
-
.filter(Boolean)
|
|
338
|
-
.map(toPosixPath);
|
|
339
|
-
|
|
340
|
-
resolve(ignored);
|
|
341
|
-
});
|
|
342
|
-
|
|
343
|
-
const stdinPayload = `${batch.map(toPosixPath).join('\0')}\0`;
|
|
344
|
-
git.stdin.on('error', () => {}); // Suppress EPIPE if git exits early
|
|
345
|
-
git.stdin.end(stdinPayload);
|
|
346
|
-
});
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
const CHECK_IGNORE_BATCH_SIZE = 5000;
|
|
350
|
-
|
|
351
|
-
/**
|
|
352
|
-
* Find directory components that are symlinks, so we can filter out paths
|
|
353
|
-
* that traverse them (git check-ignore fatals on "beyond a symbolic link").
|
|
354
|
-
*/
|
|
355
|
-
async function findSymlinkDirs(paths) {
|
|
356
|
-
const checked = new Map();
|
|
357
|
-
const symlinkPrefixes = [];
|
|
358
|
-
|
|
359
|
-
for (const p of paths) {
|
|
360
|
-
const parts = p.split('/');
|
|
361
|
-
let dir = '';
|
|
362
|
-
for (let i = 0; i < parts.length - 1; i++) {
|
|
363
|
-
dir = dir ? `${dir}/${parts[i]}` : parts[i];
|
|
364
|
-
if (checked.has(dir)) continue;
|
|
365
|
-
try {
|
|
366
|
-
const stat = await fs.lstat(path.join(PROJECT_ROOT, dir));
|
|
367
|
-
const isLink = stat.isSymbolicLink();
|
|
368
|
-
checked.set(dir, isLink);
|
|
369
|
-
if (isLink) symlinkPrefixes.push(dir + '/');
|
|
370
|
-
} catch {
|
|
371
|
-
checked.set(dir, false);
|
|
372
|
-
}
|
|
373
|
-
}
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
return symlinkPrefixes;
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
export async function getGitIgnoredPathSet(paths, options = {}) {
|
|
380
|
-
const silent = options.silent ?? false;
|
|
381
|
-
const reportError = silent ? () => {} : logError;
|
|
382
|
-
|
|
383
|
-
if (paths.length === 0) {
|
|
384
|
-
return new Set();
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
const ignored = new Set();
|
|
388
|
-
|
|
389
|
-
// Pre-filter paths that traverse symlinks — git check-ignore fatals on these.
|
|
390
|
-
// Files beyond symlinks are also checked: if the symlink dir itself is ignored,
|
|
391
|
-
// all files under it are treated as ignored too.
|
|
392
|
-
const symlinkPrefixes = await findSymlinkDirs(paths);
|
|
393
|
-
let safePaths = paths;
|
|
394
|
-
if (symlinkPrefixes.length > 0) {
|
|
395
|
-
// Check if the symlink directories themselves are ignored
|
|
396
|
-
const symlinkDirs = symlinkPrefixes.map(p => p.slice(0, -1)); // remove trailing /
|
|
397
|
-
const symlinkIgnored = await checkIgnoreBatch(symlinkDirs, reportError);
|
|
398
|
-
const ignoredSymlinks = new Set(symlinkIgnored || []);
|
|
399
|
-
|
|
400
|
-
safePaths = [];
|
|
401
|
-
for (const p of paths) {
|
|
402
|
-
const matchedPrefix = symlinkPrefixes.find(prefix => p.startsWith(prefix));
|
|
403
|
-
if (matchedPrefix) {
|
|
404
|
-
// Path traverses a symlink — check if symlink dir is gitignored
|
|
405
|
-
const dir = matchedPrefix.slice(0, -1);
|
|
406
|
-
if (ignoredSymlinks.has(dir)) {
|
|
407
|
-
ignored.add(toPosixPath(p)); // inherit parent's ignored status
|
|
408
|
-
}
|
|
409
|
-
// Either way, skip git check-ignore (would fatal)
|
|
410
|
-
} else {
|
|
411
|
-
safePaths.push(p);
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
let failedBatches = 0;
|
|
417
|
-
|
|
418
|
-
for (let i = 0; i < safePaths.length; i += CHECK_IGNORE_BATCH_SIZE) {
|
|
419
|
-
const batch = safePaths.slice(i, i + CHECK_IGNORE_BATCH_SIZE);
|
|
420
|
-
const result = await checkIgnoreBatch(batch, reportError);
|
|
421
|
-
if (result) {
|
|
422
|
-
for (const p of result) ignored.add(p);
|
|
423
|
-
} else {
|
|
424
|
-
failedBatches++;
|
|
425
|
-
}
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
const totalBatches = Math.ceil(safePaths.length / CHECK_IGNORE_BATCH_SIZE);
|
|
429
|
-
if (failedBatches === totalBatches && totalBatches > 0) {
|
|
430
|
-
reportError('WARN: git check-ignore failed on all batches — gitignore filtering disabled');
|
|
431
|
-
return null;
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
return ignored;
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
export async function applyGitignoreAlignment(files, respectGitignore, options = {}) {
|
|
438
|
-
if (!respectGitignore || !existsSync(path.join(PROJECT_ROOT, '.git'))) {
|
|
439
|
-
return { files, gitignored: 0 };
|
|
440
|
-
}
|
|
441
|
-
|
|
442
|
-
const bypassGitignore = new Set();
|
|
443
|
-
const candidates = [];
|
|
444
|
-
for (const file of files) {
|
|
445
|
-
if (isGitignoreAllowlistedAgenticPath(file)) {
|
|
446
|
-
bypassGitignore.add(file);
|
|
447
|
-
} else {
|
|
448
|
-
candidates.push(file);
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
const ignoredSet = await getGitIgnoredPathSet(candidates, options);
|
|
453
|
-
if (!ignoredSet) {
|
|
454
|
-
return { files, gitignored: 0 };
|
|
455
|
-
}
|
|
456
|
-
|
|
457
|
-
const kept = [];
|
|
458
|
-
let gitignored = 0;
|
|
459
|
-
for (const file of files) {
|
|
460
|
-
if (bypassGitignore.has(file)) {
|
|
461
|
-
kept.push(file);
|
|
462
|
-
continue;
|
|
463
|
-
}
|
|
464
|
-
|
|
465
|
-
const normalized = toPosixPath(file);
|
|
466
|
-
if (ignoredSet.has(normalized)) {
|
|
467
|
-
gitignored++;
|
|
468
|
-
continue;
|
|
469
|
-
}
|
|
470
|
-
kept.push(file);
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
return { files: kept, gitignored };
|
|
474
|
-
}
|
|
475
|
-
|
|
476
287
|
// =============================================================================
|
|
477
288
|
// FILE DISCOVERY
|
|
478
289
|
// =============================================================================
|
|
@@ -487,19 +298,26 @@ export async function discoverFiles(options = {}) {
|
|
|
487
298
|
|
|
488
299
|
writeLog('\n━━━ Discovering Files ━━━', 'bright');
|
|
489
300
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
const
|
|
495
|
-
|
|
301
|
+
// Single shared admission policy — the same include allowlist / deny-list /
|
|
302
|
+
// `.sweet-search-ignore` / `.gitignore` / size gates the incremental
|
|
303
|
+
// maintainer uses, so a fresh full index and an incrementally-maintained
|
|
304
|
+
// index admit exactly the same files.
|
|
305
|
+
const policy = createAdmissionPolicy({ projectRoot });
|
|
306
|
+
const maxFileSize = policy.maxFileSize;
|
|
307
|
+
|
|
308
|
+
// Enumerate via the include globs (with the exclude globs pruning big dirs
|
|
309
|
+
// during traversal), then apply the policy's shape gate so `.sweet-search-ignore`
|
|
310
|
+
// is honoured here too — the one rule full discovery did not previously apply.
|
|
311
|
+
const discovered = await glob(policy.includeGlobs, {
|
|
312
|
+
ignore: policy.excludeGlobs,
|
|
496
313
|
cwd: projectRoot,
|
|
497
314
|
absolute: false,
|
|
498
315
|
onlyFiles: true,
|
|
499
316
|
dot: true,
|
|
500
317
|
});
|
|
318
|
+
const shaped = discovered.filter((rel) => policy.admitsShape(rel));
|
|
501
319
|
|
|
502
|
-
const { files: allFiles, gitignored } = await
|
|
320
|
+
const { files: allFiles, gitignored } = await policy.applyGitignore(shaped, { silent });
|
|
503
321
|
|
|
504
322
|
const files = [];
|
|
505
323
|
let oversized = 0;
|
|
@@ -17,6 +17,28 @@ function normalizePath(p) {
|
|
|
17
17
|
return p.replace(/\\/g, '/');
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
+
function chunkPath(chunk) {
|
|
21
|
+
return firstSafeRelativePath(
|
|
22
|
+
chunk?.metadata?.relative_path,
|
|
23
|
+
chunk?.metadata?.path,
|
|
24
|
+
chunk?.metadata?.file_path,
|
|
25
|
+
chunk?.file,
|
|
26
|
+
chunk?.metadata?.file,
|
|
27
|
+
) || '';
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function firstSafeRelativePath(...candidates) {
|
|
31
|
+
for (const candidate of candidates) {
|
|
32
|
+
if (typeof candidate !== 'string') continue;
|
|
33
|
+
const normalized = candidate.replace(/\\/g, '/').replace(/^\.\//, '').replace(/\/+/g, '/');
|
|
34
|
+
if (!normalized || normalized === '.' || normalized.startsWith('/')) continue;
|
|
35
|
+
if (/^[A-Za-z]:\//.test(normalized)) continue;
|
|
36
|
+
if (normalized === '..' || normalized.startsWith('../') || normalized.includes('/../')) continue;
|
|
37
|
+
return normalized;
|
|
38
|
+
}
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
|
|
20
42
|
const _excludesByRoot = new Map();
|
|
21
43
|
|
|
22
44
|
function getExcludes(projectRoot) {
|
|
@@ -93,24 +115,26 @@ export function applyIndexingChunkPolicy(chunks, options = {}) {
|
|
|
93
115
|
const fileFirstReason = new Map();
|
|
94
116
|
|
|
95
117
|
for (const chunk of chunks) {
|
|
96
|
-
|
|
97
|
-
if (
|
|
118
|
+
const file = chunkPath(chunk);
|
|
119
|
+
if (!file) continue;
|
|
120
|
+
if (fileFirstReason.has(file)) continue;
|
|
98
121
|
|
|
99
122
|
let reason = null;
|
|
100
|
-
if (isExcludedByConfig(
|
|
123
|
+
if (isExcludedByConfig(file, projectRoot)) {
|
|
101
124
|
reason = 'excluded';
|
|
102
125
|
} else {
|
|
103
126
|
const text = chunk.text || chunk.content || '';
|
|
104
127
|
if (chunkLooksGenerated(text)) reason = 'generated';
|
|
105
128
|
}
|
|
106
|
-
fileFirstReason.set(
|
|
129
|
+
fileFirstReason.set(file, reason);
|
|
107
130
|
}
|
|
108
131
|
|
|
109
132
|
const kept = [];
|
|
110
133
|
const skipped = [];
|
|
111
134
|
const stats = emptyStats();
|
|
112
135
|
for (const chunk of chunks) {
|
|
113
|
-
const
|
|
136
|
+
const file = chunkPath(chunk);
|
|
137
|
+
const reason = file ? fileFirstReason.get(file) : null;
|
|
114
138
|
if (reason) {
|
|
115
139
|
skipped.push(chunk);
|
|
116
140
|
stats[reason]++;
|
|
@@ -119,8 +143,8 @@ export function applyIndexingChunkPolicy(chunks, options = {}) {
|
|
|
119
143
|
kept.push(chunk);
|
|
120
144
|
}
|
|
121
145
|
}
|
|
122
|
-
stats.skippedFiles = new Set(skipped.map(
|
|
123
|
-
stats.keptFiles = new Set(kept.map(
|
|
146
|
+
stats.skippedFiles = new Set(skipped.map(chunkPath).filter(Boolean)).size;
|
|
147
|
+
stats.keptFiles = new Set(kept.map(chunkPath).filter(Boolean)).size;
|
|
124
148
|
return { kept, skipped, stats };
|
|
125
149
|
}
|
|
126
150
|
|
|
@@ -136,5 +160,6 @@ function emptyStats() {
|
|
|
136
160
|
|
|
137
161
|
export const _internals = {
|
|
138
162
|
GENERATED_MARKERS,
|
|
163
|
+
chunkPath,
|
|
139
164
|
resetCache,
|
|
140
165
|
};
|