npm - @ghcrawl/api-core - Versions diffs - 0.7.0 → 0.8.0-beta.1 - Mend

@ghcrawl/api-core 0.7.0 → 0.8.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/dist/cluster/build.d.ts +30 -0
package/dist/cluster/build.d.ts.map +1 -1
package/dist/cluster/build.js +178 -7
package/dist/cluster/build.js.map +1 -1
package/dist/cluster/perf.integration.js +186 -20
package/dist/cluster/perf.integration.js.map +1 -1
package/dist/config.d.ts +9 -1
package/dist/config.d.ts.map +1 -1
package/dist/config.js +29 -2
package/dist/config.js.map +1 -1
package/dist/db/migrate.d.ts.map +1 -1
package/dist/db/migrate.js +37 -0
package/dist/db/migrate.js.map +1 -1
package/dist/index.d.ts +2 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +2 -0
package/dist/index.js.map +1 -1
package/dist/openai/provider.d.ts +2 -0
package/dist/openai/provider.d.ts.map +1 -1
package/dist/openai/provider.js +15 -1
package/dist/openai/provider.js.map +1 -1
package/dist/service.d.ts +101 -3
package/dist/service.d.ts.map +1 -1
package/dist/service.js +1058 -134
package/dist/service.js.map +1 -1
package/dist/vector/store.d.ts +38 -0
package/dist/vector/store.d.ts.map +1 -0
package/dist/vector/store.js +2 -0
package/dist/vector/store.js.map +1 -0
package/dist/vector/vectorlite-store.d.ts +34 -0
package/dist/vector/vectorlite-store.d.ts.map +1 -0
package/dist/vector/vectorlite-store.js +124 -0
package/dist/vector/vectorlite-store.js.map +1 -0
package/package.json +7 -6

package/dist/service.js CHANGED Viewed

@@ -1,12 +1,15 @@
 import http from 'node:http';
 import crypto from 'node:crypto';
+import fs from 'node:fs';
 import { existsSync } from 'node:fs';
+import { createRequire } from 'node:module';
 import os from 'node:os';
+import path from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { Worker } from 'node:worker_threads';
 import { IterableMapper } from '@shutterstock/p-map-iterable';
 import { actionResponseSchema, authorThreadsResponseSchema, closeResponseSchema, clusterDetailResponseSchema, clusterResultSchema, clusterSummariesResponseSchema, clustersResponseSchema, embedResultSchema, healthResponseSchema, neighborsResponseSchema, refreshResponseSchema, repositoriesResponseSchema, searchResponseSchema, syncResultSchema, threadsResponseSchema, } from '@ghcrawl/api-contract';
-import { buildClusters } from './cluster/build.js';
+import { buildClusters, buildRefinedClusters, buildSizeBoundedClusters } from './cluster/build.js';
 import { buildSourceKindEdges } from './cluster/exact-edges.js';
 import { ensureRuntimeDirs, isLikelyGitHubToken, isLikelyOpenAiApiKey, loadConfig, requireGithubToken, requireOpenAiKey, } from './config.js';
 import { migrate } from './db/migrate.js';
@@ -14,7 +17,8 @@ import { openDb } from './db/sqlite.js';
 import { buildCanonicalDocument, isBotLikeAuthor } from './documents/normalize.js';
 import { makeGitHubClient } from './github/client.js';
 import { OpenAiProvider } from './openai/provider.js';
-import { cosineSimilarity, normalizeEmbedding, rankNearestNeighbors } from './search/exact.js';
+import { cosineSimilarity, dotProduct, normalizeEmbedding, rankNearestNeighbors, rankNearestNeighborsByScore } from './search/exact.js';
+import { VectorliteStore } from './vector/vectorlite-store.js';
 const SYNC_BATCH_SIZE = 100;
 const SYNC_BATCH_DELAY_MS = 5000;
 const STALE_CLOSED_SWEEP_LIMIT = 1000;
@@ -23,10 +27,31 @@ const CLUSTER_PARALLEL_MIN_EMBEDDINGS = 5000;
 const EMBED_ESTIMATED_CHARS_PER_TOKEN = 3;
 const EMBED_MAX_ITEM_TOKENS = 7000;
 const EMBED_MAX_BATCH_TOKENS = 250000;
+const requireFromHere = createRequire(import.meta.url);
 const EMBED_TRUNCATION_MARKER = '\n\n[truncated for embedding]';
 const EMBED_CONTEXT_RETRY_ATTEMPTS = 5;
 const EMBED_CONTEXT_RETRY_FALLBACK_SHRINK_RATIO = 0.9;
 const EMBED_CONTEXT_RETRY_TARGET_BUFFER_RATIO = 0.95;
+const SUMMARY_PROMPT_VERSION = 'v1';
+const ACTIVE_EMBED_DIMENSIONS = 1024;
+const ACTIVE_EMBED_PIPELINE_VERSION = 'vectorlite-1024-v1';
+const DEFAULT_CLUSTER_MIN_SCORE = 0.78;
+const VECTORLITE_CLUSTER_EXPANDED_K = 24;
+const VECTORLITE_CLUSTER_EXPANDED_MULTIPLIER = 4;
+const VECTORLITE_CLUSTER_EXPANDED_CANDIDATE_K = 512;
+const VECTORLITE_CLUSTER_EXPANDED_EF_SEARCH = 1024;
+const SUMMARY_MODEL_PRICING = {
+    'gpt-5-mini': {
+        inputCostPerM: 0.25,
+        cachedInputCostPerM: 0.025,
+        outputCostPerM: 2.0,
+    },
+    'gpt-5.4-mini': {
+        inputCostPerM: 0.75,
+        cachedInputCostPerM: 0.075,
+        outputCostPerM: 4.5,
+    },
+};
 function nowIso() {
     return new Date().toISOString();
 }
@@ -180,7 +205,7 @@ export class GHCrawlService {
     db;
     github;
     ai;
-    parsedEmbeddingCache = new Map();
+    vectorStore;
     constructor(options = {}) {
         this.config = options.config ?? loadConfig();
         ensureRuntimeDirs(this.config);
@@ -188,9 +213,10 @@ export class GHCrawlService {
         migrate(this.db);
         this.github = options.github ?? (this.config.githubToken ? makeGitHubClient({ token: this.config.githubToken }) : undefined);
         this.ai = options.ai ?? (this.config.openaiApiKey ? new OpenAiProvider(this.config.openaiApiKey) : undefined);
+        this.vectorStore = options.vectorStore ?? new VectorliteStore();
     }
     close() {
-        this.parsedEmbeddingCache.clear();
+        this.vectorStore.close();
         this.db.close();
     }
     init() {
@@ -257,7 +283,17 @@ export class GHCrawlService {
                 }
             }
         }
-        return { health, github, openai };
+        const vectorliteHealth = this.vectorStore.checkRuntime();
+        return {
+            health,
+            github,
+            openai,
+            vectorlite: {
+                configured: this.config.vectorBackend === 'vectorlite',
+                runtimeOk: vectorliteHealth.ok,
+                error: vectorliteHealth.error,
+            },
+        };
     }
     listRepositories() {
         const rows = this.db.prepare('select * from repositories order by full_name asc').all();
@@ -409,7 +445,6 @@ export class GHCrawlService {
              updated_at = ?
          where id = ?`)
             .run(closedAt, closedAt, row.id);
-        this.parsedEmbeddingCache.delete(repository.id);
         const clusterIds = this.getLatestRunClusterIdsForThread(repository.id, row.id);
         const clusterClosed = this.reconcileClusterCloseState(repository.id, clusterIds) > 0;
         const updated = this.db.prepare('select * from threads where id = ? limit 1').get(row.id);
@@ -543,7 +578,6 @@ export class GHCrawlService {
                 })
                 : 0;
             const threadsClosed = threadsClosedFromClosedSweep + threadsClosedFromDirectReconcile;
-            this.parsedEmbeddingCache.delete(repoId);
             if (threadsClosed > 0) {
                 this.reconcileClusterCloseState(repoId);
             }
@@ -606,31 +640,69 @@ export class GHCrawlService {
             });
             const pending = sources.filter((row) => {
                 const latest = this.db
-                    .prepare('select content_hash from document_summaries where thread_id = ? and summary_kind = ? and model = ? limit 1')
+                    .prepare('select content_hash, prompt_version from document_summaries where thread_id = ? and summary_kind = ? and model = ? limit 1')
                     .get(row.id, 'dedupe_summary', this.config.summaryModel);
-                return latest?.content_hash !== row.summaryContentHash;
+                return latest?.content_hash !== row.summaryContentHash || latest?.prompt_version !== SUMMARY_PROMPT_VERSION;
             });
             params.onProgress?.(`[summarize] pending=${pending.length} skipped=${rows.length - pending.length} model=${this.config.summaryModel}`);
             let summarized = 0;
             let inputTokens = 0;
             let outputTokens = 0;
             let totalTokens = 0;
-            for (const [index, row] of pending.entries()) {
-                params.onProgress?.(`[summarize] ${index + 1}/${pending.length} thread #${row.number}`);
+            let cachedInputTokens = 0;
+            const startTime = Date.now();
+            const pricing = SUMMARY_MODEL_PRICING[this.config.summaryModel] ?? null;
+            // Stage 1: concurrent API calls
+            const fetcher = new IterableMapper(pending, async (row) => {
                 const result = await ai.summarizeThread({
                     model: this.config.summaryModel,
                     text: row.summaryInput,
                 });
+                return { row, result };
+            }, { concurrency: 5 });
+            // Stage 2: sequential DB writes — consumes from fetcher without blocking API completions
+            const writer = new IterableMapper(fetcher, async ({ row, result }) => {
                 const summary = result.summary;
                 this.upsertSummary(row.id, row.summaryContentHash, 'problem_summary', summary.problemSummary);
                 this.upsertSummary(row.id, row.summaryContentHash, 'solution_summary', summary.solutionSummary);
                 this.upsertSummary(row.id, row.summaryContentHash, 'maintainer_signal_summary', summary.maintainerSignalSummary);
                 this.upsertSummary(row.id, row.summaryContentHash, 'dedupe_summary', summary.dedupeSummary);
-                if (result.usage) {
-                    inputTokens += result.usage.inputTokens;
-                    outputTokens += result.usage.outputTokens;
-                    totalTokens += result.usage.totalTokens;
-                    params.onProgress?.(`[summarize] tokens thread #${row.number} in=${result.usage.inputTokens} out=${result.usage.outputTokens} total=${result.usage.totalTokens} cached_in=${result.usage.cachedInputTokens} reasoning=${result.usage.reasoningTokens}`);
+                return { row, usage: result.usage };
+            }, { concurrency: 1 });
+            let index = 0;
+            for await (const { row, usage } of writer) {
+                index += 1;
+                if (usage) {
+                    inputTokens += usage.inputTokens;
+                    outputTokens += usage.outputTokens;
+                    totalTokens += usage.totalTokens;
+                    cachedInputTokens += usage.cachedInputTokens;
+                }
+                // Compute cost and ETA every 10 items or on the last item
+                if (index % 10 === 0 || index === pending.length) {
+                    const remaining = pending.length - index;
+                    const avgIn = inputTokens / index;
+                    const avgOut = outputTokens / index;
+                    const avgCachedIn = cachedInputTokens / index;
+                    const elapsedSec = (Date.now() - startTime) / 1000;
+                    const secPerItem = elapsedSec / index;
+                    const etaSec = remaining * secPerItem;
+                    const etaMin = Math.round(etaSec / 60);
+                    const etaStr = etaMin >= 60 ? `${Math.floor(etaMin / 60)}h${etaMin % 60}m` : `${etaMin}m`;
+                    if (pricing) {
+                        const uncachedInput = inputTokens - cachedInputTokens;
+                        const costSoFar = (uncachedInput / 1_000_000) * pricing.inputCostPerM +
+                            (cachedInputTokens / 1_000_000) * pricing.cachedInputCostPerM +
+                            (outputTokens / 1_000_000) * pricing.outputCostPerM;
+                        const estTotalCost = costSoFar +
+                            ((remaining * (avgIn - avgCachedIn)) / 1_000_000) * pricing.inputCostPerM +
+                            ((remaining * avgCachedIn) / 1_000_000) * pricing.cachedInputCostPerM +
+                            ((remaining * avgOut) / 1_000_000) * pricing.outputCostPerM;
+                        params.onProgress?.(`[summarize] ${index}/${pending.length} thread #${row.number} | cost=$${costSoFar.toFixed(2)} est_total=$${estTotalCost.toFixed(2)} | avg_in=${Math.round(avgIn)} avg_out=${Math.round(avgOut)} | ETA ${etaStr}`);
+                    }
+                    else {
+                        params.onProgress?.(`[summarize] ${index}/${pending.length} thread #${row.number} | avg_in=${Math.round(avgIn)} avg_out=${Math.round(avgOut)} | ETA ${etaStr}`);
+                    }
                 }
                 summarized += 1;
             }
@@ -674,11 +746,25 @@ export class GHCrawlService {
         const repository = this.requireRepository(params.owner, params.repo);
         const runId = this.startRun('embedding_runs', repository.id, params.threadNumber ? `thread:${params.threadNumber}` : repository.fullName);
         try {
-            const { rows, tasks, pending } = this.getEmbeddingWorkset(repository.id, params.threadNumber);
+            if (params.threadNumber === undefined) {
+                if (!this.isRepoVectorStateCurrent(repository.id)) {
+                    this.resetRepositoryVectors(repository.id, repository.fullName);
+                }
+                else {
+                    const pruned = this.pruneInactiveRepositoryVectors(repository.id, repository.fullName);
+                    if (pruned > 0) {
+                        params.onProgress?.(`[embed] pruned ${pruned} closed or inactive vector(s) before refresh`);
+                    }
+                }
+            }
+            const { rows, tasks, pending, missingSummaryThreadNumbers } = this.getEmbeddingWorkset(repository.id, params.threadNumber);
             const skipped = tasks.length - pending.length;
             const truncated = tasks.filter((task) => task.wasTruncated).length;
-            params.onProgress?.(`[embed] loaded ${rows.length} open thread(s) and ${tasks.length} embedding source(s) for ${repository.fullName}`);
-            params.onProgress?.(`[embed] pending=${pending.length} skipped=${skipped} truncated=${truncated} model=${this.config.embedModel} batch_size=${this.config.embedBatchSize} concurrency=${this.config.embedConcurrency} max_unread=${this.config.embedMaxUnread} max_batch_tokens=${EMBED_MAX_BATCH_TOKENS}`);
+            if (missingSummaryThreadNumbers.length > 0) {
+                throw new Error(`Embedding basis ${this.config.embeddingBasis} requires summaries before embedding. Missing summaries for thread(s): ${missingSummaryThreadNumbers.slice(0, 10).join(', ')}${missingSummaryThreadNumbers.length > 10 ? ', …' : ''}.`);
+            }
+            params.onProgress?.(`[embed] loaded ${rows.length} open thread(s) and ${tasks.length} active vector task(s) for ${repository.fullName}`);
+            params.onProgress?.(`[embed] pending=${pending.length} skipped=${skipped} truncated=${truncated} model=${this.config.embedModel} dimensions=${ACTIVE_EMBED_DIMENSIONS} basis=${this.config.embeddingBasis} batch_size=${this.config.embedBatchSize} concurrency=${this.config.embedConcurrency} max_unread=${this.config.embedMaxUnread} max_batch_tokens=${EMBED_MAX_BATCH_TOKENS}`);
             let embedded = 0;
             const batches = this.chunkEmbeddingTasks(pending, this.config.embedBatchSize, EMBED_MAX_BATCH_TOKENS);
             const mapper = new IterableMapper(batches, async (batch) => {
@@ -690,14 +776,15 @@ export class GHCrawlService {
             let completedBatches = 0;
             for await (const batchResult of mapper) {
                 completedBatches += 1;
-                const numbers = batchResult.map(({ task }) => `#${task.threadNumber}:${task.sourceKind}`);
+                const numbers = batchResult.map(({ task }) => `#${task.threadNumber}:${task.basis}`);
                 const estimatedTokens = batchResult.reduce((sum, { task }) => sum + task.estimatedTokens, 0);
                 params.onProgress?.(`[embed] batch ${completedBatches}/${Math.max(batches.length, 1)} size=${batchResult.length} est_tokens=${estimatedTokens} items=${numbers.join(',')}`);
                 for (const { task, embedding } of batchResult) {
-                    this.upsertEmbedding(task.threadId, task.sourceKind, task.contentHash, embedding);
+                    this.upsertActiveVector(repository.id, repository.fullName, task.threadId, task.basis, task.contentHash, embedding);
                     embedded += 1;
                 }
             }
+            this.markRepoVectorsCurrent(repository.id);
             this.finishRun('embedding_runs', runId, 'completed', { embedded });
             return embedResultSchema.parse({ runId, embedded });
         }
@@ -709,16 +796,70 @@ export class GHCrawlService {
     async clusterRepository(params) {
         const repository = this.requireRepository(params.owner, params.repo);
         const runId = this.startRun('cluster_runs', repository.id, repository.fullName);
-        const minScore = params.minScore ?? 0.82;
+        const minScore = params.minScore ?? DEFAULT_CLUSTER_MIN_SCORE;
         const k = params.k ?? 6;
         try {
-            const { items, sourceKinds } = this.loadClusterableThreadMeta(repository.id);
-            params.onProgress?.(`[cluster] loaded ${items.length} embedded thread(s) across ${sourceKinds.length} source kind(s) for ${repository.fullName} k=${k} minScore=${minScore}`);
-            const aggregatedEdges = await this.aggregateRepositoryEdges(repository.id, sourceKinds, {
-                limit: k,
-                minScore,
-                onProgress: params.onProgress,
-            });
+            let items;
+            let aggregatedEdges;
+            if (this.isRepoVectorStateCurrent(repository.id)) {
+                const vectorItems = this.loadClusterableActiveVectorMeta(repository.id, repository.fullName);
+                const activeIds = new Set(vectorItems.map((item) => item.id));
+                const annQuery = this.getVectorliteClusterQuery(vectorItems.length, k);
+                aggregatedEdges = new Map();
+                let processed = 0;
+                let lastProgressAt = Date.now();
+                params.onProgress?.(`[cluster] loaded ${vectorItems.length} active vector(s) for ${repository.fullName} backend=${this.config.vectorBackend} k=${k} query_limit=${annQuery.limit} candidateK=${annQuery.candidateK} efSearch=${annQuery.efSearch ?? 'default'} minScore=${minScore}`);
+                for (const item of vectorItems) {
+                    const neighbors = this.vectorStore.queryNearest({
+                        storePath: this.repoVectorStorePath(repository.fullName),
+                        dimensions: ACTIVE_EMBED_DIMENSIONS,
+                        vector: item.embedding,
+                        limit: annQuery.limit,
+                        candidateK: annQuery.candidateK + 1,
+                        efSearch: annQuery.efSearch,
+                        excludeThreadId: item.id,
+                    });
+                    for (const neighbor of neighbors) {
+                        if (!activeIds.has(neighbor.threadId))
+                            continue;
+                        if (neighbor.score < minScore)
+                            continue;
+                        const key = this.edgeKey(item.id, neighbor.threadId);
+                        const existing = aggregatedEdges.get(key);
+                        if (existing) {
+                            existing.score = Math.max(existing.score, neighbor.score);
+                        }
+                        else {
+                            aggregatedEdges.set(key, {
+                                leftThreadId: Math.min(item.id, neighbor.threadId),
+                                rightThreadId: Math.max(item.id, neighbor.threadId),
+                                score: neighbor.score,
+                                sourceKinds: new Set(['dedupe_summary']),
+                            });
+                        }
+                    }
+                    processed += 1;
+                    const now = Date.now();
+                    if (params.onProgress && now - lastProgressAt >= CLUSTER_PROGRESS_INTERVAL_MS) {
+                        params.onProgress(`[cluster] queried ${processed}/${vectorItems.length} vectors current_edges=${aggregatedEdges.size}`);
+                        lastProgressAt = now;
+                    }
+                }
+                items = vectorItems;
+            }
+            else if (this.hasLegacyEmbeddings(repository.id)) {
+                const legacy = this.loadClusterableThreadMeta(repository.id);
+                items = legacy.items;
+                params.onProgress?.(`[cluster] loaded ${items.length} legacy embedded thread(s) across ${legacy.sourceKinds.length} source kind(s) for ${repository.fullName} k=${k} minScore=${minScore}`);
+                aggregatedEdges = await this.aggregateRepositoryEdges(repository.id, legacy.sourceKinds, {
+                    limit: k,
+                    minScore,
+                    onProgress: params.onProgress,
+                });
+            }
+            else {
+                throw new Error(`Vectors for ${repository.fullName} are stale or missing. Run refresh or embed first.`);
+            }
             const edges = Array.from(aggregatedEdges.values()).map((entry) => ({
                 leftThreadId: entry.leftThreadId,
                 rightThreadId: entry.rightThreadId,
@@ -728,6 +869,10 @@ export class GHCrawlService {
             const clusters = buildClusters(items.map((item) => ({ threadId: item.id, number: item.number, title: item.title })), edges);
             this.persistClusterRun(repository.id, runId, aggregatedEdges, clusters);
             this.pruneOldClusterRuns(repository.id, runId);
+            if (this.isRepoVectorStateCurrent(repository.id)) {
+                this.markRepoClustersCurrent(repository.id);
+                this.cleanupMigratedRepositoryArtifacts(repository.id, repository.fullName, params.onProgress);
+            }
             params.onProgress?.(`[cluster] persisted ${clusters.length} cluster(s) and pruned older cluster runs`);
             this.finishRun('cluster_runs', runId, 'completed', { edges: edges.length, clusters: clusters.length });
             return clusterResultSchema.parse({ runId, edges: edges.length, clusters: clusters.length });
@@ -737,6 +882,263 @@ export class GHCrawlService {
             throw error;
         }
     }
+    clusterExperiment(params) {
+        const backend = params.backend ?? 'vectorlite';
+        const repository = this.requireRepository(params.owner, params.repo);
+        const loaded = this.loadClusterableThreadMeta(repository.id);
+        const activeVectors = this.isRepoVectorStateCurrent(repository.id) ? this.loadNormalizedActiveVectors(repository.id) : [];
+        const activeSourceKind = this.config.embeddingBasis === 'title_summary' ? 'dedupe_summary' : 'body';
+        const useActiveVectors = activeVectors.length > 0 && (params.sourceKinds === undefined || loaded.items.length === 0);
+        const sourceKinds = useActiveVectors ? [activeSourceKind] : (params.sourceKinds ?? loaded.sourceKinds);
+        const items = useActiveVectors
+            ? activeVectors.map((item) => ({ id: item.id, number: item.number, title: item.title }))
+            : loaded.items;
+        const aggregation = params.aggregation ?? 'max';
+        const minScore = params.minScore ?? DEFAULT_CLUSTER_MIN_SCORE;
+        const k = params.k ?? 6;
+        const candidateK = Math.max(k, params.candidateK ?? Math.max(k * 16, 64));
+        const efSearch = params.efSearch;
+        const startedAt = Date.now();
+        const memoryBefore = process.memoryUsage();
+        let peakRssBytes = memoryBefore.rss;
+        let peakHeapUsedBytes = memoryBefore.heapUsed;
+        const recordMemory = () => {
+            const usage = process.memoryUsage();
+            peakRssBytes = Math.max(peakRssBytes, usage.rss);
+            peakHeapUsedBytes = Math.max(peakHeapUsedBytes, usage.heapUsed);
+        };
+        recordMemory();
+        if (useActiveVectors && params.sourceKinds && loaded.items.length === 0) {
+            params.onProgress?.(`[cluster-experiment] legacy source embeddings are unavailable for ${repository.fullName}; falling back to active ${this.config.embeddingBasis} vectors`);
+        }
+        params.onProgress?.(`[cluster-experiment] loaded ${items.length} embedded thread(s) across ${sourceKinds.length} source kind(s) for ${repository.fullName} backend=${backend} k=${k} candidateK=${candidateK} minScore=${minScore} aggregation=${aggregation}`);
+        const perSourceScores = new Map();
+        let loadMs = 0;
+        let setupMs = 0;
+        let edgeBuildMs = 0;
+        let indexBuildMs = 0;
+        let queryMs = 0;
+        let clusterBuildMs = 0;
+        let tempDbPath = null;
+        let tempDb = null;
+        let tempDir = null;
+        try {
+            if (backend === 'exact') {
+                if (useActiveVectors) {
+                    const loadStartedAt = Date.now();
+                    const normalizedRows = activeVectors.map(({ id, embedding }) => ({ id, normalizedEmbedding: embedding }));
+                    loadMs += Date.now() - loadStartedAt;
+                    recordMemory();
+                    const edgesStartedAt = Date.now();
+                    const edges = buildSourceKindEdges(normalizedRows, {
+                        limit: k,
+                        minScore,
+                        progressIntervalMs: CLUSTER_PROGRESS_INTERVAL_MS,
+                        onProgress: (progress) => {
+                            recordMemory();
+                            if (!params.onProgress)
+                                return;
+                            params.onProgress(`[cluster-experiment] exact ${progress.processedItems}/${normalizedRows.length} active vectors processed current_edges~=${perSourceScores.size + progress.currentEdgeEstimate}`);
+                        },
+                    });
+                    edgeBuildMs += Date.now() - edgesStartedAt;
+                    this.collectSourceKindScores(perSourceScores, edges, activeSourceKind);
+                    recordMemory();
+                }
+                else {
+                    const totalItems = sourceKinds.reduce((sum, sourceKind) => sum + this.countEmbeddingsForSourceKind(repository.id, sourceKind), 0);
+                    let processedItems = 0;
+                    for (const sourceKind of sourceKinds) {
+                        const loadStartedAt = Date.now();
+                        const normalizedRows = this.loadNormalizedEmbeddingsForSourceKind(repository.id, sourceKind);
+                        loadMs += Date.now() - loadStartedAt;
+                        recordMemory();
+                        const edgesStartedAt = Date.now();
+                        const edges = buildSourceKindEdges(normalizedRows, {
+                            limit: k,
+                            minScore,
+                            progressIntervalMs: CLUSTER_PROGRESS_INTERVAL_MS,
+                            onProgress: (progress) => {
+                                recordMemory();
+                                if (!params.onProgress)
+                                    return;
+                                params.onProgress(`[cluster-experiment] exact ${processedItems + progress.processedItems}/${totalItems} source embeddings processed current_edges~=${perSourceScores.size + progress.currentEdgeEstimate}`);
+                            },
+                        });
+                        edgeBuildMs += Date.now() - edgesStartedAt;
+                        processedItems += normalizedRows.length;
+                        this.collectSourceKindScores(perSourceScores, edges, sourceKind);
+                        recordMemory();
+                    }
+                }
+            }
+            else {
+                const setupStartedAt = Date.now();
+                tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-vectorlite-'));
+                tempDbPath = path.join(tempDir, 'cluster-experiment.db');
+                tempDb = openDb(tempDbPath);
+                tempDb.pragma('journal_mode = MEMORY');
+                tempDb.pragma('synchronous = OFF');
+                tempDb.pragma('temp_store = MEMORY');
+                const vectorlite = requireFromHere('vectorlite');
+                tempDb.loadExtension(vectorlite.vectorlitePath());
+                setupMs += Date.now() - setupStartedAt;
+                recordMemory();
+                const vectorSources = useActiveVectors
+                    ? [
+                        {
+                            sourceKind: activeSourceKind,
+                            rows: activeVectors.map(({ id, embedding }) => ({ id, normalizedEmbedding: embedding })),
+                        },
+                    ]
+                    : sourceKinds.map((sourceKind) => ({
+                        sourceKind,
+                        rows: this.loadNormalizedEmbeddingsForSourceKind(repository.id, sourceKind).map((row) => ({
+                            id: row.id,
+                            normalizedEmbedding: row.normalizedEmbedding,
+                        })),
+                    }));
+                for (const source of vectorSources) {
+                    const sourceRowCount = source.rows.length;
+                    if (sourceRowCount === 0) {
+                        continue;
+                    }
+                    const dimension = source.rows[0].normalizedEmbedding.length;
+                    const safeCandidateK = Math.min(candidateK, Math.max(1, sourceRowCount - 1));
+                    const tableName = `vector_${source.sourceKind}`;
+                    params.onProgress?.(`[cluster-experiment] building ${source.sourceKind} HNSW index with ${sourceRowCount} vector(s)`);
+                    const indexStartedAt = Date.now();
+                    tempDb.exec(`create virtual table ${tableName} using vectorlite(vec float32[${dimension}], hnsw(max_elements=${sourceRowCount}));`);
+                    const insert = tempDb.prepare(`insert into ${tableName}(rowid, vec) values (?, ?)`);
+                    tempDb.transaction(() => {
+                        const loadStartedAt = Date.now();
+                        for (const row of source.rows) {
+                            insert.run(row.id, this.normalizedEmbeddingBuffer(row.normalizedEmbedding));
+                        }
+                        loadMs += Date.now() - loadStartedAt;
+                    })();
+                    indexBuildMs += Date.now() - indexStartedAt;
+                    recordMemory();
+                    const queryStartedAt = Date.now();
+                    const querySql = efSearch !== undefined
+                        ? `select rowid, distance from ${tableName} where knn_search(vec, knn_param(?, ${safeCandidateK + 1}, ${efSearch}))`
+                        : `select rowid, distance from ${tableName} where knn_search(vec, knn_param(?, ${safeCandidateK + 1}))`;
+                    const query = tempDb.prepare(querySql);
+                    let processed = 0;
+                    let lastProgressAt = Date.now();
+                    const queryLoadStartedAt = Date.now();
+                    for (const row of source.rows) {
+                        const candidates = query.all(this.normalizedEmbeddingBuffer(row.normalizedEmbedding));
+                        const ranked = rankNearestNeighborsByScore(candidates, {
+                            limit: k,
+                            minScore,
+                            score: (candidate) => {
+                                if (candidate.rowid === row.id) {
+                                    return -1;
+                                }
+                                return this.normalizedDistanceToScore(candidate.distance);
+                            },
+                        });
+                        let addedThisRow = 0;
+                        for (const candidate of ranked) {
+                            const score = candidate.score;
+                            const key = this.edgeKey(row.id, candidate.item.rowid);
+                            const existing = perSourceScores.get(key);
+                            if (existing) {
+                                existing.scores.set(source.sourceKind, Math.max(existing.scores.get(source.sourceKind) ?? -1, score));
+                                continue;
+                            }
+                            const scores = new Map();
+                            scores.set(source.sourceKind, score);
+                            perSourceScores.set(key, {
+                                leftThreadId: Math.min(row.id, candidate.item.rowid),
+                                rightThreadId: Math.max(row.id, candidate.item.rowid),
+                                scores,
+                            });
+                            addedThisRow += 1;
+                        }
+                        processed += 1;
+                        const now = Date.now();
+                        if (params.onProgress && now - lastProgressAt >= CLUSTER_PROGRESS_INTERVAL_MS) {
+                            recordMemory();
+                            params.onProgress(`[cluster-experiment] querying ${source.sourceKind} index ${processed}/${sourceRowCount} current_edges=${perSourceScores.size} added_this_step=${addedThisRow}`);
+                            lastProgressAt = now;
+                        }
+                    }
+                    loadMs += Date.now() - queryLoadStartedAt;
+                    queryMs += Date.now() - queryStartedAt;
+                    tempDb.exec(`drop table ${tableName}`);
+                    recordMemory();
+                }
+            }
+            // Finalize edge scores using the configured aggregation method
+            const defaultWeights = { dedupe_summary: 0.5, title: 0.3, body: 0.2 };
+            const weights = { ...defaultWeights, ...(params.aggregationWeights ?? {}) };
+            const aggregated = this.finalizeEdgeScores(perSourceScores, aggregation, weights, minScore);
+            params.onProgress?.(`[cluster-experiment] finalized ${aggregated.length} edges from ${perSourceScores.size} candidate pairs using ${aggregation} aggregation`);
+            const clusterStartedAt = Date.now();
+            const clusterNodes = items.map((item) => ({ threadId: item.id, number: item.number, title: item.title }));
+            const clusterEdges = aggregated;
+            const clusterMode = params.clusterMode ?? (params.maxClusterSize !== undefined ? 'refine' : 'basic');
+            const clusters = clusterMode === 'bounded'
+                ? buildSizeBoundedClusters(clusterNodes, clusterEdges, {
+                    maxClusterSize: params.maxClusterSize ?? 200,
+                })
+                : clusterMode === 'refine'
+                    ? buildRefinedClusters(clusterNodes, clusterEdges, {
+                        maxClusterSize: params.maxClusterSize ?? 200,
+                        refineStep: params.refineStep ?? 0.02,
+                    })
+                    : buildClusters(clusterNodes, clusterEdges);
+            clusterBuildMs += Date.now() - clusterStartedAt;
+            recordMemory();
+            const memoryAfter = process.memoryUsage();
+            const durationMs = backend === 'vectorlite'
+                ? indexBuildMs + queryMs + clusterBuildMs
+                : edgeBuildMs + clusterBuildMs;
+            const totalDurationMs = Date.now() - startedAt;
+            return {
+                backend,
+                repository,
+                tempDbPath,
+                threads: items.length,
+                sourceKinds: sourceKinds.length,
+                edges: aggregated.length,
+                clusters: clusters.length,
+                timingBasis: 'cluster-only',
+                durationMs,
+                totalDurationMs,
+                loadMs,
+                setupMs,
+                edgeBuildMs,
+                indexBuildMs,
+                queryMs,
+                clusterBuildMs,
+                candidateK,
+                memory: {
+                    rssBeforeBytes: memoryBefore.rss,
+                    rssAfterBytes: memoryAfter.rss,
+                    peakRssBytes,
+                    heapUsedBeforeBytes: memoryBefore.heapUsed,
+                    heapUsedAfterBytes: memoryAfter.heapUsed,
+                    peakHeapUsedBytes,
+                },
+                clusterSizes: this.summarizeClusterSizes(clusters),
+                clustersDetail: params.includeClusters
+                    ? clusters.map((cluster) => ({
+                        representativeThreadId: cluster.representativeThreadId,
+                        memberThreadIds: [...cluster.members],
+                    }))
+                    : null,
+            };
+        }
+        finally {
+            tempDb?.close();
+            if (tempDir) {
+                fs.rmSync(tempDir, { recursive: true, force: true });
+            }
+        }
+    }
     async searchRepository(params) {
         const mode = params.mode ?? 'hybrid';
         const repository = this.requireRepository(params.owner, params.repo);
@@ -758,13 +1160,33 @@ export class GHCrawlService {
             }
         }
         if (mode !== 'keyword' && this.ai) {
-            const [queryEmbedding] = await this.ai.embedTexts({ model: this.config.embedModel, texts: [params.query] });
-            const rows = this.loadParsedStoredEmbeddings(repository.id);
-            for (const row of rows) {
-                const score = cosineSimilarity(queryEmbedding, row.embedding);
-                if (score < 0.2)
-                    continue;
-                semanticScores.set(row.id, Math.max(semanticScores.get(row.id) ?? -1, score));
+            if (this.isRepoVectorStateCurrent(repository.id)) {
+                const [queryEmbedding] = await this.ai.embedTexts({
+                    model: this.config.embedModel,
+                    texts: [params.query],
+                    dimensions: ACTIVE_EMBED_DIMENSIONS,
+                });
+                const neighbors = this.vectorStore.queryNearest({
+                    storePath: this.repoVectorStorePath(repository.fullName),
+                    dimensions: ACTIVE_EMBED_DIMENSIONS,
+                    vector: queryEmbedding,
+                    limit: limit * 2,
+                    candidateK: Math.max(limit * 8, 64),
+                });
+                for (const neighbor of neighbors) {
+                    if (neighbor.score < 0.2)
+                        continue;
+                    semanticScores.set(neighbor.threadId, Math.max(semanticScores.get(neighbor.threadId) ?? -1, neighbor.score));
+                }
+            }
+            else if (this.hasLegacyEmbeddings(repository.id)) {
+                const [queryEmbedding] = await this.ai.embedTexts({ model: this.config.embedModel, texts: [params.query] });
+                for (const row of this.iterateStoredEmbeddings(repository.id)) {
+                    const score = cosineSimilarity(queryEmbedding, JSON.parse(row.embedding_json));
+                    if (score < 0.2)
+                        continue;
+                    semanticScores.set(row.id, Math.max(semanticScores.get(row.id) ?? -1, score));
+                }
             }
         }
         const candidateIds = new Set([...keywordScores.keys(), ...semanticScores.keys()]);
@@ -832,44 +1254,97 @@ export class GHCrawlService {
         const repository = this.requireRepository(params.owner, params.repo);
         const limit = params.limit ?? 10;
         const minScore = params.minScore ?? 0.2;
-        const rows = this.loadParsedStoredEmbeddings(repository.id);
-        const targetRows = rows.filter((row) => row.number === params.threadNumber);
-        if (targetRows.length === 0) {
-            throw new Error(`Thread #${params.threadNumber} for ${repository.fullName} was not found with an embedding. Run embed first.`);
-        }
-        const targetRow = targetRows[0];
-        const targetBySource = new Map();
-        for (const row of targetRows) {
-            targetBySource.set(row.source_kind, row.embedding);
+        const targetRow = this.db
+            .prepare(`select t.*, tv.basis, tv.model, tv.dimensions, tv.content_hash, tv.vector_json, tv.vector_backend
+         from threads t
+         join thread_vectors tv on tv.thread_id = t.id
+         where t.repo_id = ?
+           and t.number = ?
+           and t.state = 'open'
+           and t.closed_at_local is null
+           and tv.model = ?
+           and tv.basis = ?
+           and tv.dimensions = ?
+         limit 1`)
+            .get(repository.id, params.threadNumber, this.config.embedModel, this.config.embeddingBasis, ACTIVE_EMBED_DIMENSIONS);
+        let responseThread;
+        let neighbors;
+        if (targetRow) {
+            responseThread = targetRow;
+            const candidateRows = this.vectorStore
+                .queryNearest({
+                storePath: this.repoVectorStorePath(repository.fullName),
+                dimensions: ACTIVE_EMBED_DIMENSIONS,
+                vector: this.parseStoredVector(targetRow.vector_json),
+                limit: limit * 2,
+                candidateK: Math.max(limit * 8, 64),
+                excludeThreadId: targetRow.id,
+            })
+                .filter((row) => row.score >= minScore);
+            const candidateIds = candidateRows.map((row) => row.threadId);
+            const neighborMeta = candidateIds.length
+                ? this.db
+                    .prepare(`select * from threads
+               where repo_id = ? and state = 'open' and closed_at_local is null and id in (${candidateIds.map(() => '?').join(',')})`)
+                    .all(repository.id, ...candidateIds)
+                : [];
+            const metaById = new Map(neighborMeta.map((row) => [row.id, row]));
+            neighbors = candidateRows
+                .map((row) => {
+                const meta = metaById.get(row.threadId);
+                if (!meta) {
+                    return null;
+                }
+                return {
+                    threadId: row.threadId,
+                    number: meta.number,
+                    kind: meta.kind,
+                    title: meta.title,
+                    score: row.score,
+                };
+            })
+                .filter((row) => row !== null)
+                .slice(0, limit);
         }
-        const aggregated = new Map();
-        for (const row of rows) {
-            if (row.id === targetRow.id)
-                continue;
-            const targetEmbedding = targetBySource.get(row.source_kind);
-            if (!targetEmbedding)
-                continue;
-            const score = cosineSimilarity(targetEmbedding, row.embedding);
-            if (score < minScore)
-                continue;
-            const previous = aggregated.get(row.id);
-            if (!previous || score > previous.score) {
-                aggregated.set(row.id, { number: row.number, kind: row.kind, title: row.title, score });
+        else {
+            const targetRows = this.loadStoredEmbeddingsForThreadNumber(repository.id, params.threadNumber);
+            if (targetRows.length === 0) {
+                throw new Error(`Thread #${params.threadNumber} for ${repository.fullName} was not found with an embedding. Run embed first.`);
             }
+            responseThread = targetRows[0];
+            const targetBySource = new Map();
+            for (const row of targetRows) {
+                targetBySource.set(row.source_kind, JSON.parse(row.embedding_json));
+            }
+            const aggregated = new Map();
+            for (const row of this.iterateStoredEmbeddings(repository.id)) {
+                if (row.id === responseThread.id)
+                    continue;
+                const targetEmbedding = targetBySource.get(row.source_kind);
+                if (!targetEmbedding)
+                    continue;
+                const score = cosineSimilarity(targetEmbedding, JSON.parse(row.embedding_json));
+                if (score < minScore)
+                    continue;
+                const previous = aggregated.get(row.id);
+                if (!previous || score > previous.score) {
+                    aggregated.set(row.id, { number: row.number, kind: row.kind, title: row.title, score });
+                }
+            }
+            neighbors = Array.from(aggregated.entries())
+                .map(([threadId, value]) => ({
+                threadId,
+                number: value.number,
+                kind: value.kind,
+                title: value.title,
+                score: value.score,
+            }))
+                .sort((left, right) => right.score - left.score)
+                .slice(0, limit);
         }
-        const neighbors = Array.from(aggregated.entries())
-            .map(([threadId, value]) => ({
-            threadId,
-            number: value.number,
-            kind: value.kind,
-            title: value.title,
-            score: value.score,
-        }))
-            .sort((left, right) => right.score - left.score)
-            .slice(0, limit);
         return neighborsResponseSchema.parse({
             repository,
-            thread: threadToDto(targetRow),
+            thread: threadToDto(responseThread),
             neighbors,
         });
     }
@@ -946,6 +1421,14 @@ export class GHCrawlService {
                 onProgress: params.onProgress,
             });
         }
+        if (selected.embed && this.config.embeddingBasis === 'title_summary') {
+            params.onProgress?.(`[refresh] embedding basis ${this.config.embeddingBasis} requires summaries; running summarize before embed`);
+            await this.summarizeRepository({
+                owner: params.owner,
+                repo: params.repo,
+                onProgress: params.onProgress,
+            });
+        }
         if (selected.embed) {
             embed = await this.embedRepository({
                 owner: params.owner,
@@ -1150,9 +1633,9 @@ export class GHCrawlService {
         const summaryRows = this.db
             .prepare(`select summary_kind, summary_text
          from document_summaries
-         where thread_id = ? and model = ?
+         where thread_id = ? and model = ? and prompt_version = ?
          order by summary_kind asc`)
-            .all(row.id, this.config.summaryModel);
+            .all(row.id, this.config.summaryModel, SUMMARY_PROMPT_VERSION);
         const summaries = {};
         for (const summary of summaryRows) {
             if (summary.summary_kind === 'problem_summary' ||
@@ -1314,7 +1797,178 @@ export class GHCrawlService {
             latestClusterRunFinishedAt: latestRun?.finished_at ?? null,
         };
     }
+    getDesiredPipelineState() {
+        return {
+            summary_model: this.config.summaryModel,
+            summary_prompt_version: SUMMARY_PROMPT_VERSION,
+            embedding_basis: this.config.embeddingBasis,
+            embed_model: this.config.embedModel,
+            embed_dimensions: ACTIVE_EMBED_DIMENSIONS,
+            embed_pipeline_version: ACTIVE_EMBED_PIPELINE_VERSION,
+            vector_backend: this.config.vectorBackend,
+        };
+    }
+    getRepoPipelineState(repoId) {
+        return (this.db.prepare('select * from repo_pipeline_state where repo_id = ? limit 1').get(repoId) ??
+            null);
+    }
+    isRepoVectorStateCurrent(repoId) {
+        const state = this.getRepoPipelineState(repoId);
+        if (!state || !state.vectors_current_at) {
+            return false;
+        }
+        const desired = this.getDesiredPipelineState();
+        return (state.summary_model === desired.summary_model &&
+            state.summary_prompt_version === desired.summary_prompt_version &&
+            state.embedding_basis === desired.embedding_basis &&
+            state.embed_model === desired.embed_model &&
+            state.embed_dimensions === desired.embed_dimensions &&
+            state.embed_pipeline_version === desired.embed_pipeline_version &&
+            state.vector_backend === desired.vector_backend);
+    }
+    isRepoClusterStateCurrent(repoId) {
+        const state = this.getRepoPipelineState(repoId);
+        return this.isRepoVectorStateCurrent(repoId) && Boolean(state?.clusters_current_at);
+    }
+    hasLegacyEmbeddings(repoId) {
+        const row = this.db
+            .prepare(`select count(*) as count
+         from document_embeddings e
+         join threads t on t.id = e.thread_id
+         where t.repo_id = ?
+           and t.state = 'open'
+           and t.closed_at_local is null
+           and e.model = ?`)
+            .get(repoId, this.config.embedModel);
+        return row.count > 0;
+    }
+    writeRepoPipelineState(repoId, overrides) {
+        const desired = this.getDesiredPipelineState();
+        const current = this.getRepoPipelineState(repoId);
+        this.db
+            .prepare(`insert into repo_pipeline_state (
+            repo_id,
+            summary_model,
+            summary_prompt_version,
+            embedding_basis,
+            embed_model,
+            embed_dimensions,
+            embed_pipeline_version,
+            vector_backend,
+            vectors_current_at,
+            clusters_current_at,
+            updated_at
+         ) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+         on conflict(repo_id) do update set
+           summary_model = excluded.summary_model,
+           summary_prompt_version = excluded.summary_prompt_version,
+           embedding_basis = excluded.embedding_basis,
+           embed_model = excluded.embed_model,
+           embed_dimensions = excluded.embed_dimensions,
+           embed_pipeline_version = excluded.embed_pipeline_version,
+           vector_backend = excluded.vector_backend,
+           vectors_current_at = excluded.vectors_current_at,
+           clusters_current_at = excluded.clusters_current_at,
+           updated_at = excluded.updated_at`)
+            .run(repoId, desired.summary_model, desired.summary_prompt_version, desired.embedding_basis, desired.embed_model, desired.embed_dimensions, desired.embed_pipeline_version, desired.vector_backend, overrides.vectors_current_at ?? current?.vectors_current_at ?? null, overrides.clusters_current_at ?? current?.clusters_current_at ?? null, nowIso());
+    }
+    markRepoVectorsCurrent(repoId) {
+        this.writeRepoPipelineState(repoId, {
+            vectors_current_at: nowIso(),
+            clusters_current_at: null,
+        });
+    }
+    markRepoClustersCurrent(repoId) {
+        const state = this.getRepoPipelineState(repoId);
+        this.writeRepoPipelineState(repoId, {
+            vectors_current_at: state?.vectors_current_at ?? nowIso(),
+            clusters_current_at: nowIso(),
+        });
+    }
+    repoVectorStorePath(repoFullName) {
+        const safeName = repoFullName.replace(/[^a-zA-Z0-9._-]+/g, '__');
+        return path.join(this.config.configDir, 'vectors', `${safeName}.sqlite`);
+    }
+    resetRepositoryVectors(repoId, repoFullName) {
+        this.db
+            .prepare(`delete from thread_vectors
+         where thread_id in (select id from threads where repo_id = ?)`)
+            .run(repoId);
+        this.vectorStore.resetRepository({
+            storePath: this.repoVectorStorePath(repoFullName),
+            dimensions: ACTIVE_EMBED_DIMENSIONS,
+        });
+        this.writeRepoPipelineState(repoId, {
+            vectors_current_at: null,
+            clusters_current_at: null,
+        });
+    }
+    pruneInactiveRepositoryVectors(repoId, repoFullName) {
+        const rows = this.db
+            .prepare(`select tv.thread_id
+         from thread_vectors tv
+         join threads t on t.id = tv.thread_id
+         where t.repo_id = ?
+           and (t.state != 'open' or t.closed_at_local is not null)`)
+            .all(repoId);
+        if (rows.length === 0) {
+            return 0;
+        }
+        const deleteVectorRow = this.db.prepare('delete from thread_vectors where thread_id = ?');
+        this.db.transaction(() => {
+            for (const row of rows) {
+                deleteVectorRow.run(row.thread_id);
+                this.vectorStore.deleteVector({
+                    storePath: this.repoVectorStorePath(repoFullName),
+                    dimensions: ACTIVE_EMBED_DIMENSIONS,
+                    threadId: row.thread_id,
+                });
+            }
+        })();
+        return rows.length;
+    }
+    cleanupMigratedRepositoryArtifacts(repoId, repoFullName, onProgress) {
+        const legacyEmbeddingCount = this.countLegacyEmbeddings(repoId);
+        const inlineJsonVectorCount = this.countInlineJsonThreadVectors(repoId);
+        if (legacyEmbeddingCount === 0 && inlineJsonVectorCount === 0) {
+            return;
+        }
+        if (legacyEmbeddingCount > 0) {
+            this.db
+                .prepare(`delete from document_embeddings
+           where thread_id in (select id from threads where repo_id = ?)`)
+                .run(repoId);
+            onProgress?.(`[cleanup] removed ${legacyEmbeddingCount} legacy document embedding row(s) after vector migration`);
+        }
+        if (inlineJsonVectorCount > 0) {
+            const rows = this.db
+                .prepare(`select tv.thread_id, tv.vector_json
+           from thread_vectors tv
+           join threads t on t.id = tv.thread_id
+           where t.repo_id = ?
+             and typeof(tv.vector_json) = 'text'
+             and tv.vector_json != ''`)
+                .all(repoId);
+            const update = this.db.prepare('update thread_vectors set vector_json = ?, updated_at = ? where thread_id = ?');
+            this.db.transaction(() => {
+                for (const row of rows) {
+                    update.run(this.vectorBlob(JSON.parse(row.vector_json)), nowIso(), row.thread_id);
+                }
+            })();
+            onProgress?.(`[cleanup] compacted ${inlineJsonVectorCount} inline SQLite vector payload(s) from JSON to binary blobs`);
+        }
+        if (this.config.dbPath !== ':memory:') {
+            onProgress?.(`[cleanup] checkpointing WAL and vacuuming ${repoFullName} migration changes`);
+            this.db.pragma('wal_checkpoint(TRUNCATE)');
+            this.db.exec('VACUUM');
+            this.db.pragma('wal_checkpoint(TRUNCATE)');
+        }
+    }
     getLatestClusterRun(repoId) {
+        const state = this.getRepoPipelineState(repoId);
+        if (state && !this.isRepoClusterStateCurrent(repoId)) {
+            return null;
+        }
         return (this.db
             .prepare("select id, finished_at from cluster_runs where repo_id = ? and status = 'completed' order by id desc limit 1")
             .get(repoId) ?? null);
@@ -1784,7 +2438,7 @@ export class GHCrawlService {
             }
         }
         const summaryInput = parts.join('\n\n');
-        const summaryContentHash = stableContentHash(`summary:${includeComments ? 'with-comments' : 'metadata-only'}\n${summaryInput}`);
+        const summaryContentHash = stableContentHash(`summary:${SUMMARY_PROMPT_VERSION}:${includeComments ? 'with-comments' : 'metadata-only'}\n${summaryInput}`);
         return { summaryInput, summaryContentHash };
     }
     buildEmbeddingTasks(params) {
@@ -1827,6 +2481,35 @@ export class GHCrawlService {
         }
         return tasks;
     }
+    buildActiveVectorTask(params) {
+        const sections = [`title: ${normalizeSummaryText(params.title)}`];
+        if (this.config.embeddingBasis === 'title_summary') {
+            const summary = normalizeSummaryText(params.dedupeSummary ?? '');
+            if (!summary) {
+                return null;
+            }
+            sections.push(`summary: ${summary}`);
+        }
+        else {
+            const body = normalizeSummaryText(params.body ?? '');
+            if (body) {
+                sections.push(`body: ${body}`);
+            }
+        }
+        const prepared = this.prepareEmbeddingText(sections.join('\n\n'), EMBED_MAX_ITEM_TOKENS);
+        if (!prepared) {
+            return null;
+        }
+        return {
+            threadId: params.threadId,
+            threadNumber: params.threadNumber,
+            basis: this.config.embeddingBasis,
+            text: prepared.text,
+            contentHash: stableContentHash(`embedding:${ACTIVE_EMBED_PIPELINE_VERSION}:${this.config.embeddingBasis}:${this.config.embedModel}:${ACTIVE_EMBED_DIMENSIONS}\n${prepared.text}`),
+            estimatedTokens: prepared.estimatedTokens,
+            wasTruncated: prepared.wasTruncated,
+        };
+    }
     prepareEmbeddingText(text, maxEstimatedTokens) {
         if (!text) {
             return null;
@@ -1868,6 +2551,7 @@ export class GHCrawlService {
             const embeddings = await ai.embedTexts({
                 model: this.config.embedModel,
                 texts: batch.map((task) => task.text),
+                dimensions: ACTIVE_EMBED_DIMENSIONS,
             });
             return batch.map((task, index) => ({ task, embedding: embeddings[index] }));
         }
@@ -1894,6 +2578,7 @@ export class GHCrawlService {
                 const [embedding] = await ai.embedTexts({
                     model: this.config.embedModel,
                     texts: [current.text],
+                    dimensions: ACTIVE_EMBED_DIMENSIONS,
                 });
                 return { task: current, embedding };
             }
@@ -1906,11 +2591,11 @@ export class GHCrawlService {
                 if (!next || next.text === current.text) {
                     throw error;
                 }
-                onProgress?.(`[embed] shortened #${current.threadNumber}:${current.sourceKind} after context error est_tokens=${current.estimatedTokens}->${next.estimatedTokens}`);
+                onProgress?.(`[embed] shortened #${current.threadNumber}:${current.basis} after context error est_tokens=${current.estimatedTokens}->${next.estimatedTokens}`);
                 current = next;
             }
         }
-        throw new Error(`Unable to shrink embedding input for #${task.threadNumber}:${task.sourceKind} below model limits`);
+        throw new Error(`Unable to shrink embedding input for #${task.threadNumber}:${task.basis} below model limits`);
     }
     shrinkEmbeddingTask(task, context) {
         const withoutMarker = task.text.endsWith(EMBED_TRUNCATION_MARKER)
@@ -1927,7 +2612,7 @@ export class GHCrawlService {
         return {
             ...task,
             text: nextText,
-            contentHash: stableContentHash(`embedding:${task.sourceKind}\n${nextText}`),
+            contentHash: stableContentHash(`embedding:${ACTIVE_EMBED_PIPELINE_VERSION}:${task.basis}:${this.config.embedModel}:${ACTIVE_EMBED_DIMENSIONS}\n${nextText}`),
             estimatedTokens: this.estimateEmbeddingTokens(nextText),
             wasTruncated: true,
         };
@@ -1976,23 +2661,71 @@ export class GHCrawlService {
          order by t.number asc, e.source_kind asc`)
             .all(repoId, this.config.embedModel);
     }
-    loadParsedStoredEmbeddings(repoId) {
-        const cached = this.parsedEmbeddingCache.get(repoId);
-        if (cached) {
-            return cached;
+    loadStoredEmbeddingsForThreadNumber(repoId, threadNumber) {
+        return this.db
+            .prepare(`select t.id, t.repo_id, t.number, t.kind, t.state, t.closed_at_gh, t.closed_at_local, t.close_reason_local,
+                t.title, t.body, t.author_login, t.html_url, t.labels_json,
+                t.updated_at_gh, t.first_pulled_at, t.last_pulled_at, e.source_kind, e.embedding_json
+         from threads t
+         join document_embeddings e on e.thread_id = t.id
+         where t.repo_id = ?
+           and t.number = ?
+           and t.state = 'open'
+           and t.closed_at_local is null
+           and e.model = ?
+         order by e.source_kind asc`)
+            .all(repoId, threadNumber, this.config.embedModel);
+    }
+    iterateStoredEmbeddings(repoId) {
+        return this.db
+            .prepare(`select t.id, t.repo_id, t.number, t.kind, t.state, t.closed_at_gh, t.closed_at_local, t.close_reason_local,
+                t.title, t.body, t.author_login, t.html_url, t.labels_json,
+                t.updated_at_gh, t.first_pulled_at, t.last_pulled_at, e.source_kind, e.embedding_json
+         from threads t
+         join document_embeddings e on e.thread_id = t.id
+         where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null and e.model = ?
+         order by t.number asc, e.source_kind asc`)
+            .iterate(repoId, this.config.embedModel);
+    }
+    loadNormalizedEmbeddingForSourceKindHead(repoId, sourceKind) {
+        const row = this.db
+            .prepare(`select t.id, e.embedding_json
+         from threads t
+         join document_embeddings e on e.thread_id = t.id
+         where t.repo_id = ?
+           and t.state = 'open'
+           and t.closed_at_local is null
+           and e.model = ?
+           and e.source_kind = ?
+         order by t.number asc
+         limit 1`)
+            .get(repoId, this.config.embedModel, sourceKind);
+        if (!row) {
+            return null;
         }
-        const parsed = this.loadStoredEmbeddings(repoId).map((row) => {
-            const embedding = JSON.parse(row.embedding_json);
-            const normalized = normalizeEmbedding(embedding);
-            return {
-                ...row,
-                embedding,
-                normalizedEmbedding: normalized.normalized,
-                embeddingNorm: normalized.norm,
+        return {
+            id: row.id,
+            normalizedEmbedding: normalizeEmbedding(JSON.parse(row.embedding_json)).normalized,
+        };
+    }
+    *iterateNormalizedEmbeddingsForSourceKind(repoId, sourceKind) {
+        const rows = this.db
+            .prepare(`select t.id, e.embedding_json
+         from threads t
+         join document_embeddings e on e.thread_id = t.id
+         where t.repo_id = ?
+           and t.state = 'open'
+           and t.closed_at_local is null
+           and e.model = ?
+           and e.source_kind = ?
+         order by t.number asc`)
+            .iterate(repoId, this.config.embedModel, sourceKind);
+        for (const row of rows) {
+            yield {
+                id: row.id,
+                normalizedEmbedding: normalizeEmbedding(JSON.parse(row.embedding_json)).normalized,
             };
-        });
-        this.parsedEmbeddingCache.set(repoId, parsed);
-        return parsed;
+        }
     }
     loadNormalizedEmbeddingsForSourceKind(repoId, sourceKind) {
         const rows = this.db
@@ -2011,6 +2744,12 @@ export class GHCrawlService {
             normalizedEmbedding: normalizeEmbedding(JSON.parse(row.embedding_json)).normalized,
         }));
     }
+    normalizedEmbeddingBuffer(values) {
+        return Buffer.from(Float32Array.from(values).buffer);
+    }
+    normalizedDistanceToScore(distance) {
+        return 1 - distance / 2;
+    }
     loadClusterableThreadMeta(repoId) {
         const rows = this.db
             .prepare(`select t.id, t.number, t.title, e.source_kind
@@ -2031,6 +2770,34 @@ export class GHCrawlService {
             sourceKinds: Array.from(sourceKinds.values()),
         };
     }
+    loadClusterableActiveVectorMeta(repoId, _repoFullName) {
+        const rows = this.db
+            .prepare(`select t.id, t.number, t.title, tv.vector_json
+         from threads t
+         join thread_vectors tv on tv.thread_id = t.id
+         where t.repo_id = ?
+           and t.state = 'open'
+           and t.closed_at_local is null
+           and tv.model = ?
+           and tv.basis = ?
+           and tv.dimensions = ?
+         order by t.number asc`)
+            .all(repoId, this.config.embedModel, this.config.embeddingBasis, ACTIVE_EMBED_DIMENSIONS);
+        return rows.map((row) => ({
+            id: row.id,
+            number: row.number,
+            title: row.title,
+            embedding: this.parseStoredVector(row.vector_json),
+        }));
+    }
+    loadNormalizedActiveVectors(repoId) {
+        return this.loadClusterableActiveVectorMeta(repoId, '').map((row) => ({
+            id: row.id,
+            number: row.number,
+            title: row.title,
+            embedding: normalizeEmbedding(row.embedding).normalized,
+        }));
+    }
     listStoredClusterNeighbors(repoId, threadId, limit) {
         const latestRun = this.getLatestClusterRun(repoId);
         if (!latestRun) {
@@ -2087,56 +2854,65 @@ export class GHCrawlService {
         }
         sql += ' order by t.number asc';
         const rows = this.db.prepare(sql).all(...args);
-        const summaryTexts = this.loadCombinedSummaryTextMap(repoId, threadNumber);
-        const tasks = rows.flatMap((row) => this.buildEmbeddingTasks({
-            threadId: row.id,
-            threadNumber: row.number,
-            title: row.title,
-            body: row.body,
-            dedupeSummary: summaryTexts.get(row.id) ?? null,
-        }));
+        const summaryTexts = this.loadDedupeSummaryTextMap(repoId, threadNumber);
+        const missingSummaryThreadNumbers = [];
+        const tasks = rows.flatMap((row) => {
+            const task = this.buildActiveVectorTask({
+                threadId: row.id,
+                threadNumber: row.number,
+                title: row.title,
+                body: row.body,
+                dedupeSummary: summaryTexts.get(row.id) ?? null,
+            });
+            if (task) {
+                return [task];
+            }
+            if (this.config.embeddingBasis === 'title_summary') {
+                missingSummaryThreadNumbers.push(row.number);
+            }
+            return [];
+        });
+        const pipelineCurrent = this.isRepoVectorStateCurrent(repoId);
         const existingRows = this.db
-            .prepare(`select e.thread_id, e.source_kind, e.content_hash
-         from document_embeddings e
-         join threads t on t.id = e.thread_id
-         where t.repo_id = ? and e.model = ?`)
-            .all(repoId, this.config.embedModel);
+            .prepare(`select tv.thread_id, tv.content_hash
+         from thread_vectors tv
+         join threads t on t.id = tv.thread_id
+         where t.repo_id = ?
+           and tv.model = ?
+           and tv.basis = ?
+           and tv.dimensions = ?`)
+            .all(repoId, this.config.embedModel, this.config.embeddingBasis, ACTIVE_EMBED_DIMENSIONS);
         const existing = new Map();
         for (const row of existingRows) {
-            existing.set(`${row.thread_id}:${row.source_kind}`, row.content_hash);
+            existing.set(String(row.thread_id), row.content_hash);
         }
-        const pending = tasks.filter((task) => existing.get(`${task.threadId}:${task.sourceKind}`) !== task.contentHash);
-        return { rows, tasks, existing, pending };
+        const pending = pipelineCurrent
+            ? tasks.filter((task) => existing.get(String(task.threadId)) !== task.contentHash)
+            : tasks;
+        return { rows, tasks, existing, pending, missingSummaryThreadNumbers };
     }
-    loadCombinedSummaryTextMap(repoId, threadNumber) {
-        let sql = `select s.thread_id, s.summary_kind, s.summary_text
+    loadDedupeSummaryTextMap(repoId, threadNumber) {
+        let sql = `select s.thread_id, s.summary_text
        from document_summaries s
        join threads t on t.id = s.thread_id
-       where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null and s.model = ?`;
-        const args = [repoId, this.config.summaryModel];
+       where t.repo_id = ?
+         and t.state = 'open'
+         and t.closed_at_local is null
+         and s.model = ?
+         and s.summary_kind = 'dedupe_summary'
+         and s.prompt_version = ?`;
+        const args = [repoId, this.config.summaryModel, SUMMARY_PROMPT_VERSION];
         if (threadNumber) {
             sql += ' and t.number = ?';
             args.push(threadNumber);
         }
-        sql += ' order by t.number asc, s.summary_kind asc';
+        sql += ' order by t.number asc';
         const rows = this.db.prepare(sql).all(...args);
-        const byThread = new Map();
-        for (const row of rows) {
-            const entry = byThread.get(row.thread_id) ?? new Map();
-            entry.set(row.summary_kind, normalizeSummaryText(row.summary_text));
-            byThread.set(row.thread_id, entry);
-        }
         const combined = new Map();
-        const order = ['problem_summary', 'solution_summary', 'maintainer_signal_summary', 'dedupe_summary'];
-        for (const [threadId, entry] of byThread.entries()) {
-            const parts = order
-                .map((summaryKind) => {
-                const text = entry.get(summaryKind);
-                return text ? `${summaryKind}: ${text}` : '';
-            })
-                .filter(Boolean);
-            if (parts.length > 0) {
-                combined.set(threadId, parts.join('\n\n'));
+        for (const row of rows) {
+            const text = normalizeSummaryText(row.summary_text);
+            if (text) {
+                combined.set(row.thread_id, text);
             }
         }
         return combined;
@@ -2233,6 +3009,71 @@ export class GHCrawlService {
             });
         }
     }
+    collectSourceKindScores(perSourceScores, edges, sourceKind) {
+        for (const edge of edges) {
+            const key = this.edgeKey(edge.leftThreadId, edge.rightThreadId);
+            const existing = perSourceScores.get(key);
+            if (existing) {
+                existing.scores.set(sourceKind, Math.max(existing.scores.get(sourceKind) ?? -1, edge.score));
+                continue;
+            }
+            const scores = new Map();
+            scores.set(sourceKind, edge.score);
+            perSourceScores.set(key, {
+                leftThreadId: edge.leftThreadId,
+                rightThreadId: edge.rightThreadId,
+                scores,
+            });
+        }
+    }
+    finalizeEdgeScores(perSourceScores, aggregation, weights, minScore) {
+        const result = [];
+        for (const entry of perSourceScores.values()) {
+            const scoreValues = Array.from(entry.scores.values());
+            let finalScore;
+            switch (aggregation) {
+                case 'max':
+                    finalScore = Math.max(...scoreValues);
+                    break;
+                case 'mean':
+                    finalScore = scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length;
+                    break;
+                case 'weighted': {
+                    let weightedSum = 0;
+                    let weightSum = 0;
+                    for (const [kind, score] of entry.scores) {
+                        const w = weights[kind] ?? 0.1;
+                        weightedSum += score * w;
+                        weightSum += w;
+                    }
+                    finalScore = weightSum > 0 ? weightedSum / weightSum : 0;
+                    break;
+                }
+                case 'min-of-2':
+                    // Require at least 2 source kinds to agree (both above minScore)
+                    if (scoreValues.length < 2) {
+                        continue; // Skip edges with only 1 source kind
+                    }
+                    finalScore = Math.max(...scoreValues);
+                    break;
+                case 'boost': {
+                    // Best score + bonus per additional agreeing source
+                    const best = Math.max(...scoreValues);
+                    const bonusSources = scoreValues.length - 1;
+                    finalScore = Math.min(1.0, best + bonusSources * 0.05);
+                    break;
+                }
+            }
+            if (finalScore >= minScore) {
+                result.push({
+                    leftThreadId: entry.leftThreadId,
+                    rightThreadId: entry.rightThreadId,
+                    score: finalScore,
+                });
+            }
+        }
+        return result;
+    }
     countEmbeddingsForSourceKind(repoId, sourceKind) {
         const row = this.db
             .prepare(`select count(*) as count
@@ -2280,15 +3121,102 @@ export class GHCrawlService {
     pruneOldClusterRuns(repoId, keepRunId) {
         this.db.prepare('delete from cluster_runs where repo_id = ? and id <> ?').run(repoId, keepRunId);
     }
+    summarizeClusterSizes(clusters) {
+        const histogramCounts = new Map();
+        const topClusterSizes = clusters.map((cluster) => cluster.members.length).sort((left, right) => right - left);
+        let soloClusters = 0;
+        for (const cluster of clusters) {
+            const size = cluster.members.length;
+            histogramCounts.set(size, (histogramCounts.get(size) ?? 0) + 1);
+            if (size === 1) {
+                soloClusters += 1;
+            }
+        }
+        return {
+            soloClusters,
+            maxClusterSize: topClusterSizes[0] ?? 0,
+            topClusterSizes: topClusterSizes.slice(0, 50),
+            histogram: Array.from(histogramCounts.entries())
+                .map(([size, count]) => ({ size, count }))
+                .sort((left, right) => left.size - right.size),
+        };
+    }
     upsertSummary(threadId, contentHash, summaryKind, summaryText) {
         this.db
-            .prepare(`insert into document_summaries (thread_id, summary_kind, model, content_hash, summary_text, created_at, updated_at)
-         values (?, ?, ?, ?, ?, ?, ?)
+            .prepare(`insert into document_summaries (thread_id, summary_kind, model, prompt_version, content_hash, summary_text, created_at, updated_at)
+         values (?, ?, ?, ?, ?, ?, ?, ?)
          on conflict(thread_id, summary_kind, model) do update set
+           prompt_version = excluded.prompt_version,
            content_hash = excluded.content_hash,
            summary_text = excluded.summary_text,
            updated_at = excluded.updated_at`)
-            .run(threadId, summaryKind, this.config.summaryModel, contentHash, summaryText, nowIso(), nowIso());
+            .run(threadId, summaryKind, this.config.summaryModel, SUMMARY_PROMPT_VERSION, contentHash, summaryText, nowIso(), nowIso());
+    }
+    upsertActiveVector(repoId, repoFullName, threadId, basis, contentHash, embedding) {
+        this.db
+            .prepare(`insert into thread_vectors (thread_id, basis, model, dimensions, content_hash, vector_json, vector_backend, created_at, updated_at)
+         values (?, ?, ?, ?, ?, ?, ?, ?, ?)
+         on conflict(thread_id) do update set
+           basis = excluded.basis,
+           model = excluded.model,
+           dimensions = excluded.dimensions,
+           content_hash = excluded.content_hash,
+           vector_json = excluded.vector_json,
+           vector_backend = excluded.vector_backend,
+           updated_at = excluded.updated_at`)
+            .run(threadId, basis, this.config.embedModel, embedding.length, contentHash, this.vectorBlob(embedding), this.config.vectorBackend, nowIso(), nowIso());
+        this.vectorStore.upsertVector({
+            storePath: this.repoVectorStorePath(repoFullName),
+            dimensions: ACTIVE_EMBED_DIMENSIONS,
+            threadId,
+            vector: embedding,
+        });
+    }
+    countLegacyEmbeddings(repoId) {
+        const row = this.db
+            .prepare(`select count(*) as count
+         from document_embeddings
+         where thread_id in (select id from threads where repo_id = ?)`)
+            .get(repoId);
+        return row.count;
+    }
+    countInlineJsonThreadVectors(repoId) {
+        const row = this.db
+            .prepare(`select count(*) as count
+         from thread_vectors
+         where thread_id in (select id from threads where repo_id = ?)
+           and typeof(vector_json) = 'text'
+           and vector_json != ''`)
+            .get(repoId);
+        return row.count;
+    }
+    getVectorliteClusterQuery(totalItems, requestedK) {
+        if (totalItems < CLUSTER_PARALLEL_MIN_EMBEDDINGS) {
+            return {
+                limit: requestedK,
+                candidateK: Math.max(requestedK * 16, 64),
+            };
+        }
+        const limit = Math.min(Math.max(requestedK * VECTORLITE_CLUSTER_EXPANDED_MULTIPLIER, VECTORLITE_CLUSTER_EXPANDED_K), Math.max(1, totalItems - 1));
+        const candidateK = Math.min(Math.max(limit * 16, VECTORLITE_CLUSTER_EXPANDED_CANDIDATE_K), Math.max(limit, totalItems - 1));
+        return {
+            limit,
+            candidateK,
+            efSearch: Math.max(candidateK * 2, VECTORLITE_CLUSTER_EXPANDED_EF_SEARCH),
+        };
+    }
+    vectorBlob(values) {
+        return Buffer.from(Float32Array.from(values).buffer);
+    }
+    parseStoredVector(value) {
+        if (typeof value === 'string') {
+            if (!value) {
+                throw new Error('Stored vector payload is empty. Run refresh or embed first.');
+            }
+            return JSON.parse(value);
+        }
+        const floats = new Float32Array(value.buffer, value.byteOffset, Math.floor(value.byteLength / Float32Array.BYTES_PER_ELEMENT));
+        return Array.from(floats);
     }
     upsertEmbedding(threadId, sourceKind, contentHash, embedding) {
         this.db
@@ -2300,10 +3228,6 @@ export class GHCrawlService {
            embedding_json = excluded.embedding_json,
            updated_at = excluded.updated_at`)
             .run(threadId, sourceKind, this.config.embedModel, embedding.length, contentHash, asJson(embedding), nowIso(), nowIso());
-        const row = this.db.prepare('select repo_id from threads where id = ? limit 1').get(threadId);
-        if (row) {
-            this.parsedEmbeddingCache.delete(row.repo_id);
-        }
     }
     startRun(table, repoId, scope) {
         const result = this.db