@et0and/ovid 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,11 +1,8 @@
1
1
  {
2
2
  "name": "@et0and/ovid",
3
- "version": "0.0.2",
3
+ "version": "0.0.4",
4
4
  "description": "Browse a repository's files by semantic meaning",
5
5
  "type": "module",
6
- "bin": {
7
- "ovid": "./bin/semantic-navigator.js"
8
- },
9
6
  "files": [
10
7
  "bin",
11
8
  "src",
package/src/auth.ts CHANGED
@@ -74,9 +74,9 @@ async function fetchCopilotToken(githubToken: string): Promise<{ token: string;
74
74
  method: "GET",
75
75
  headers: {
76
76
  Authorization: `token ${githubToken}`,
77
- "Editor-Version": "semantic-navigator/1.0.0",
78
- "Editor-Plugin-Version": "semantic-navigator/1.0.0",
79
- "User-Agent": "semantic-navigator",
77
+ "Editor-Version": "vscode/1.95.0",
78
+ "Editor-Plugin-Version": "copilot/1.246.0",
79
+ "User-Agent": "GitHubCopilotChat/0.22.4",
80
80
  },
81
81
  })
82
82
 
package/src/cache.ts ADDED
@@ -0,0 +1,119 @@
1
+ /**
2
+ * Two-layer result cache for semantic-navigator.
3
+ *
4
+ * Layer 1 — Embedding cache (~/.cache/semantic-navigator/embeddings.json)
5
+ * Content-addressed: maps sha256(text)[0:16] → number[]
6
+ * Per-entry granularity: only re-embed chunks whose text changed.
7
+ *
8
+ * Layer 2 — Tree cache (~/.cache/semantic-navigator/trees/<fingerprint>.json)
9
+ * Keyed by sha256(model + sorted(path:contentHash pairs)).
10
+ * A single changed file invalidates the whole tree, forcing a fresh
11
+ * cluster + label run, but embeddings are still reused from layer 1.
12
+ */
13
+
14
+ import { createHash } from "node:crypto"
15
+ import { mkdirSync, existsSync, readFileSync, writeFileSync } from "node:fs"
16
+ import { join } from "node:path"
17
+ import { homedir } from "node:os"
18
+ import type { Tree } from "./tree.ts"
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Paths
22
+ // ---------------------------------------------------------------------------
23
+
24
+ const CACHE_DIR = join(homedir(), ".cache", "semantic-navigator")
25
+ const EMBED_CACHE_PATH = join(CACHE_DIR, "embeddings.json")
26
+ const TREES_DIR = join(CACHE_DIR, "trees")
27
+
28
+ function ensureDirs(): void {
29
+ mkdirSync(CACHE_DIR, { recursive: true })
30
+ mkdirSync(TREES_DIR, { recursive: true })
31
+ }
32
+
33
+ // ---------------------------------------------------------------------------
34
+ // Helpers
35
+ // ---------------------------------------------------------------------------
36
+
37
+ /** Short hex digest: sha256(text).slice(0, 16) */
38
+ export function textHash(text: string): string {
39
+ return createHash("sha256").update(text).digest("hex").slice(0, 16)
40
+ }
41
+
42
+ /**
43
+ * Fingerprint for the tree cache: sha256(model + sorted path:contentHash pairs).
44
+ * `fileHashes` is a map from relative path to sha256(file content).
45
+ */
46
+ export function treeFingerprint(model: string, fileHashes: Map<string, string>): string {
47
+ const entries = Array.from(fileHashes.entries())
48
+ .map(([p, h]) => `${p}:${h}`)
49
+ .sort()
50
+ .join("\n")
51
+ return createHash("sha256").update(model + "\n" + entries).digest("hex")
52
+ }
53
+
54
+ // ---------------------------------------------------------------------------
55
+ // Layer 1: Embedding cache
56
+ // ---------------------------------------------------------------------------
57
+
58
+ type EmbedCacheMap = Record<string, number[]>
59
+
60
+ let _embedCache: EmbedCacheMap | null = null
61
+
62
+ function loadEmbedCache(): EmbedCacheMap {
63
+ if (_embedCache !== null) return _embedCache
64
+ if (existsSync(EMBED_CACHE_PATH)) {
65
+ try {
66
+ _embedCache = JSON.parse(readFileSync(EMBED_CACHE_PATH, "utf-8")) as EmbedCacheMap
67
+ } catch {
68
+ _embedCache = {}
69
+ }
70
+ } else {
71
+ _embedCache = {}
72
+ }
73
+ return _embedCache
74
+ }
75
+
76
+ /** Look up a cached embedding by text content. Returns null on miss. */
77
+ export function getCachedEmbedding(text: string): Float32Array | null {
78
+ const cache = loadEmbedCache()
79
+ const key = textHash(text)
80
+ const vec = cache[key]
81
+ if (vec === undefined) return null
82
+ return Float32Array.from(vec)
83
+ }
84
+
85
+ /** Store an embedding in the in-memory cache (call flushEmbedCache to persist). */
86
+ export function setCachedEmbedding(text: string, embedding: Float32Array): void {
87
+ const cache = loadEmbedCache()
88
+ const key = textHash(text)
89
+ cache[key] = Array.from(embedding)
90
+ }
91
+
92
+ /** Persist the in-memory embedding cache to disk. */
93
+ export function flushEmbedCache(): void {
94
+ if (_embedCache === null) return
95
+ ensureDirs()
96
+ writeFileSync(EMBED_CACHE_PATH, JSON.stringify(_embedCache), "utf-8")
97
+ }
98
+
99
+ // ---------------------------------------------------------------------------
100
+ // Layer 2: Tree cache
101
+ // ---------------------------------------------------------------------------
102
+
103
+ /** Look up a cached Tree by fingerprint. Returns null on miss. */
104
+ export function getCachedTree(fingerprint: string): Tree | null {
105
+ const treePath = join(TREES_DIR, `${fingerprint}.json`)
106
+ if (!existsSync(treePath)) return null
107
+ try {
108
+ return JSON.parse(readFileSync(treePath, "utf-8")) as Tree
109
+ } catch {
110
+ return null
111
+ }
112
+ }
113
+
114
+ /** Persist a Tree to the tree cache. */
115
+ export function setCachedTree(fingerprint: string, tree: Tree): void {
116
+ ensureDirs()
117
+ const treePath = join(TREES_DIR, `${fingerprint}.json`)
118
+ writeFileSync(treePath, JSON.stringify(tree), "utf-8")
119
+ }
package/src/cluster.ts CHANGED
@@ -95,19 +95,24 @@ function connectedComponents(indices: Int32Array[], N: number): number {
95
95
  }
96
96
 
97
97
  /**
98
- * Build the (dense) normalised Laplacian from the affinity matrix (stored as
99
- * a list of sparse {row,col,val} triples) and return it as a dense matrix
100
- * plus the degree diagonal `dd`.
98
+ * Build the degree diagonal `dd` from sparse affinity triplets, then return
99
+ * a sparse matvec closure for the *negated* normalised Laplacian (-L_norm).
100
+ *
101
+ * L_norm = I - D^{-1/2} A D^{-1/2}
102
+ * -L_norm = D^{-1/2} A D^{-1/2} - I
103
+ *
104
+ * The matvec avoids allocating an N×N dense matrix — at N=2000, k_sparse≈7,
105
+ * this reduces the cost per multiply from O(N²)=4M to O(N·k)=14k ops.
101
106
  */
102
- function buildNormalisedLaplacian(
107
+ function buildNormLaplacianSparseMatvec(
103
108
  sparseAffinity: Array<{ i: number; j: number; v: number }>,
104
109
  N: number
105
- ): { L: Matrix; dd: Float64Array } {
110
+ ): { matvec: (v: Float64Array) => Float64Array; dd: Float64Array } {
106
111
  // Accumulate row sums (degree) for normalisation
107
112
  const degree = new Float64Array(N)
108
113
  for (const { i, j, v } of sparseAffinity) {
109
- degree[i] = (degree[i] ?? 0) + v
110
- if (i !== j) degree[j] = (degree[j] ?? 0) + v
114
+ degree[i]! += v
115
+ if (i !== j) degree[j]! += v
111
116
  }
112
117
 
113
118
  const dd = new Float64Array(N)
@@ -115,31 +120,29 @@ function buildNormalisedLaplacian(
115
120
  dd[i] = degree[i]! > 1e-12 ? 1 / Math.sqrt(degree[i]!) : 0
116
121
  }
117
122
 
118
- // L_norm = I - D^{-1/2} A D^{-1/2}
119
- // We start from identity
120
- const L: Matrix = Array.from({ length: N }, (_, i) => {
121
- const row = new Float64Array(N)
122
- row[i] = 1
123
- return row
124
- })
125
-
126
- // Subtract normalised affinity contributions
127
- for (const { i, j, v } of sparseAffinity) {
128
- const w = v * dd[i]! * dd[j]!
129
- const rowI = L[i]!
130
- rowI[j] = (rowI[j] ?? 0) - w
131
- if (i !== j) {
132
- const rowJ = L[j]!
133
- rowJ[i] = (rowJ[i] ?? 0) - w
123
+ // Pre-compute normalised weights once so the closure stays cheap.
124
+ // w_ij = v * dd[i] * dd[j] (the off-diagonal contribution to A_norm)
125
+ const normAffinity = sparseAffinity.map(({ i, j, v }) => ({
126
+ i,
127
+ j,
128
+ w: v * dd[i]! * dd[j]!,
129
+ }))
130
+
131
+ // Matvec for -L_norm = A_norm - I
132
+ // result[i] = -v[i] + sum_j w_ij * v[j] (using symmetry)
133
+ const matvecFn = (vec: Float64Array): Float64Array => {
134
+ const out = new Float64Array(N)
135
+ // Start from -I · vec
136
+ for (let i = 0; i < N; i++) out[i] = -vec[i]!
137
+ // Add symmetric A_norm contributions
138
+ for (const { i, j, w } of normAffinity) {
139
+ out[i]! += w * vec[j]!
140
+ if (i !== j) out[j]! += w * vec[i]!
134
141
  }
142
+ return out
135
143
  }
136
144
 
137
- // Clamp diagonal to 1 (matches scipy behaviour after set_diag)
138
- for (let i = 0; i < N; i++) {
139
- L[i]![i] = 1
140
- }
141
-
142
- return { L, dd }
145
+ return { matvec: matvecFn, dd }
143
146
  }
144
147
 
145
148
  /**
@@ -171,35 +174,24 @@ function normaliseVec(v: Float64Array): number {
171
174
  return n
172
175
  }
173
176
 
174
- /** Multiply matrix M by vector v */
175
- function matvec(M: Matrix, v: Float64Array): Float64Array<ArrayBuffer> {
176
- const N = M.length
177
- const out = new Float64Array(N) as Float64Array<ArrayBuffer>
178
- for (let i = 0; i < N; i++) {
179
- out[i] = dot(M[i]!, v)
180
- }
181
- return out
182
- }
183
-
184
177
  /**
185
178
  * Randomised power-iteration with deflation to extract the `k` eigenpairs
186
- * corresponding to the *smallest* eigenvalues of a symmetric matrix M.
187
- *
188
- * M is the **negated** Laplacian (M = -L), so its *largest* eigenvalues
189
- * correspond to L's smallest — matching the Python code which does `laplacian *= -1`.
179
+ * corresponding to the *smallest* eigenvalues of a symmetric matrix.
190
180
  *
191
- * We use shifted inverse iteration: to find small eigenvalues of L we find
192
- * large eigenvalues of (-L + shift*I) where shift ≈ 1 (the diagonal was set
193
- * to 1 above). We iterate on M = -L and take the top-k eigenvectors, then
194
- * negate the eigenvalues back.
181
+ * Instead of a dense matrix, accepts a sparse `matvecFn` closure so that the
182
+ * per-iteration cost is O(N·k_sparse) rather than O(N²). The closure should
183
+ * implement multiplication by the *negated* Laplacian (-L_norm), whose top
184
+ * eigenvalues correspond to L_norm's bottom ones (matching the Python code
185
+ * which does `laplacian *= -1`).
195
186
  */
196
187
  function topKEigenpairs(
197
- negL: Matrix,
188
+ matvecFn: (v: Float64Array) => Float64Array,
189
+ n: number,
198
190
  k: number,
199
191
  maxIter = 300,
200
192
  tol = 1e-6
201
193
  ): { values: Float64Array<ArrayBuffer>; vectors: Float64Array<ArrayBuffer>[] } {
202
- const N = negL.length
194
+ const N = n
203
195
  const rng = seededRng(42)
204
196
 
205
197
  const vectors: Float64Array<ArrayBuffer>[] = []
@@ -207,7 +199,7 @@ function topKEigenpairs(
207
199
 
208
200
  for (let idx = 0; idx < k; idx++) {
209
201
  // Random start
210
- let v = Float64Array.from({ length: N }, () => rng() * 2 - 1)
202
+ let v = Float64Array.from({ length: N }, () => rng() * 2 - 1) as Float64Array<ArrayBuffer>
211
203
  normaliseVec(v)
212
204
 
213
205
  // Deflate against already-found vectors
@@ -216,7 +208,7 @@ function topKEigenpairs(
216
208
 
217
209
  let lambda = 0
218
210
  for (let iter = 0; iter < maxIter; iter++) {
219
- const Mv = matvec(negL, v)
211
+ const Mv = matvecFn(v) as Float64Array<ArrayBuffer>
220
212
 
221
213
  // Deflate
222
214
  for (const u of vectors) subtractProjection(Mv, u)
@@ -364,6 +356,29 @@ export function splitCluster(input: Cluster): Cluster[] {
364
356
 
365
357
  const normalized = normaliseRows(matFromEmbeds(input.entries))
366
358
 
359
+ // --- Precompute all pairwise distances once (O(N²)) ---
360
+ // Each row is sorted ascending so we can slice any k cheaply.
361
+ const allDistances: Array<Array<[number, number]>> = Array.from({ length: N }, () => [])
362
+ for (let i = 0; i < N; i++) {
363
+ for (let j = 0; j < N; j++) {
364
+ if (j === i) continue
365
+ allDistances[i]!.push([cosDist(normalized[i]!, normalized[j]!), j])
366
+ }
367
+ allDistances[i]!.sort((a, b) => a[0] - b[0])
368
+ }
369
+
370
+ /** Slice sorted rows to get k-NN result for any k in O(N·k). */
371
+ function knnFromPrecomputed(k: number): { distances: Float64Array[]; indices: Int32Array[] } {
372
+ const distances: Float64Array[] = []
373
+ const indices: Int32Array[] = []
374
+ for (let i = 0; i < N; i++) {
375
+ const row = allDistances[i]!.slice(0, k)
376
+ distances.push(Float64Array.from(row.map((x) => x[0])))
377
+ indices.push(Int32Array.from(row.map((x) => x[1])))
378
+ }
379
+ return { distances, indices }
380
+ }
381
+
367
382
  // --- Adaptive k-NN: find smallest k that gives 1 connected component ---
368
383
  const candidateKs: number[] = []
369
384
  for (let n = 0; ; n++) {
@@ -377,7 +392,7 @@ export function splitCluster(input: Cluster): Cluster[] {
377
392
  let chosenKnnResult: { distances: Float64Array[]; indices: Int32Array[] } | null = null
378
393
 
379
394
  for (const k of candidateKs) {
380
- const knnResult = knn(normalized, k)
395
+ const knnResult = knnFromPrecomputed(k)
381
396
  const nComponents = connectedComponents(knnResult.indices, N)
382
397
  if (nComponents === 1) {
383
398
  chosenK = k
@@ -387,8 +402,7 @@ export function splitCluster(input: Cluster): Cluster[] {
387
402
  }
388
403
 
389
404
  if (chosenKnnResult === null) {
390
- // Fallback: compute for the last candidate (floor(N/2))
391
- chosenKnnResult = knn(normalized, chosenK)
405
+ chosenKnnResult = knnFromPrecomputed(chosenK)
392
406
  }
393
407
 
394
408
  const { distances, indices } = chosenKnnResult
@@ -411,19 +425,12 @@ export function splitCluster(input: Cluster): Cluster[] {
411
425
  }
412
426
  }
413
427
 
414
- // --- Normalised Laplacian ---
415
- const { L, dd } = buildNormalisedLaplacian(sparseAffinity, N)
416
-
417
- // Negate L (as Python does `laplacian *= -1`) so power iteration finds
418
- // eigenvectors of -L, whose top eigenvalues correspond to L's bottom ones.
419
- const negL: Matrix = L.map((row) => {
420
- const r = Float64Array.from(row)
421
- for (let i = 0; i < r.length; i++) r[i]! *= -1
422
- return r
423
- })
428
+ // --- Sparse normalised Laplacian matvec ---
429
+ // Avoids building an N×N dense matrix; matvec cost is O(N·k_sparse) vs O(N²).
430
+ const { matvec: negLMatvec, dd } = buildNormLaplacianSparseMatvec(sparseAffinity, N)
424
431
 
425
432
  const k = Math.min(MAX_CLUSTERS + 1, N)
426
- const { values: rawValues, vectors } = topKEigenpairs(negL, k)
433
+ const { values: rawValues, vectors } = topKEigenpairs(negLMatvec, N, k)
427
434
 
428
435
  // Eigenvalues were of -L; flip sign back to get L eigenvalues
429
436
  const eigenvalues = Float64Array.from(rawValues, (v) => -v)
package/src/embed.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { pipeline, type FeatureExtractionPipeline } from "@huggingface/transformers"
2
2
  import type { Chunk } from "./tokenize.ts"
3
+ import { getCachedEmbedding, setCachedEmbedding, flushEmbedCache } from "./cache.ts"
3
4
 
4
5
  export const DEFAULT_EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2"
5
6
 
@@ -19,6 +20,8 @@ export interface EmbedOptions {
19
20
  model: string
20
21
  batchSize: number
21
22
  concurrency: number
23
+ /** When true, skip reading from cache (but still write to it). */
24
+ noCache?: boolean
22
25
  }
23
26
 
24
27
  let _pipe: FeatureExtractionPipeline | null = null
@@ -81,6 +84,7 @@ async function embedBatch(
81
84
 
82
85
  /**
83
86
  * Embed all chunks using the local model, with batching + concurrency limits.
87
+ * Chunks whose text is already in the embedding cache are skipped.
84
88
  * Calls `onProgress(done, total)` after each batch completes.
85
89
  */
86
90
  export async function embedChunks(
@@ -90,44 +94,60 @@ export async function embedChunks(
90
94
  ): Promise<EmbedEntry[]> {
91
95
  if (chunks.length === 0) return []
92
96
 
93
- const pipe = await getEmbedPipeline(opts.model)
94
-
95
- const batches: Chunk[][] = []
96
- for (let i = 0; i < chunks.length; i += opts.batchSize) {
97
- batches.push(chunks.slice(i, i + opts.batchSize))
97
+ // --- Cache pass: resolve hits immediately, collect misses for the model ---
98
+ const entries: EmbedEntry[] = new Array(chunks.length)
99
+ const missIndices: number[] = []
100
+
101
+ if (!opts.noCache) {
102
+ for (let i = 0; i < chunks.length; i++) {
103
+ const cached = getCachedEmbedding(chunks[i]!.text)
104
+ if (cached !== null) {
105
+ entries[i] = { path: chunks[i]!.path, text: chunks[i]!.text, embedding: cached }
106
+ } else {
107
+ missIndices.push(i)
108
+ }
109
+ }
110
+ } else {
111
+ for (let i = 0; i < chunks.length; i++) missIndices.push(i)
98
112
  }
99
113
 
100
- const entries: EmbedEntry[] = new Array(chunks.length)
101
- let chunkIndex = 0
102
- let done = 0
114
+ let done = chunks.length - missIndices.length
115
+ onProgress?.(done, chunks.length)
103
116
 
104
- for (let i = 0; i < batches.length; i += opts.concurrency) {
105
- const concurrentBatches = batches.slice(i, i + opts.concurrency)
106
- const startIndex = chunkIndex
117
+ if (missIndices.length > 0) {
118
+ const pipe = await getEmbedPipeline(opts.model)
119
+ const missChunks = missIndices.map((i) => chunks[i]!)
107
120
 
108
- const batchResults = await Promise.all(
109
- concurrentBatches.map((batch) =>
110
- embedBatch(pipe, batch.map((c) => c.text))
121
+ const batches: number[][] = []
122
+ for (let i = 0; i < missChunks.length; i += opts.batchSize) {
123
+ batches.push(missIndices.slice(i, i + opts.batchSize))
124
+ }
125
+
126
+ for (let i = 0; i < batches.length; i += opts.concurrency) {
127
+ const concurrentBatches = batches.slice(i, i + opts.concurrency)
128
+
129
+ const batchResults = await Promise.all(
130
+ concurrentBatches.map((idxBatch) =>
131
+ embedBatch(pipe, idxBatch.map((ci) => chunks[ci]!.text))
132
+ )
111
133
  )
112
- )
113
-
114
- let offset = startIndex
115
- for (let b = 0; b < concurrentBatches.length; b++) {
116
- const batch = concurrentBatches[b]!
117
- const embeddings = batchResults[b]!
118
- for (let j = 0; j < batch.length; j++) {
119
- const chunk = batch[j]!
120
- entries[offset] = {
121
- path: chunk.path,
122
- text: chunk.text,
123
- embedding: embeddings[j]!,
134
+
135
+ for (let b = 0; b < concurrentBatches.length; b++) {
136
+ const idxBatch = concurrentBatches[b]!
137
+ const embeddings = batchResults[b]!
138
+ for (let j = 0; j < idxBatch.length; j++) {
139
+ const ci = idxBatch[j]!
140
+ const chunk = chunks[ci]!
141
+ const embedding = embeddings[j]!
142
+ entries[ci] = { path: chunk.path, text: chunk.text, embedding }
143
+ setCachedEmbedding(chunk.text, embedding)
124
144
  }
125
- offset++
145
+ done += idxBatch.length
146
+ onProgress?.(done, chunks.length)
126
147
  }
127
- chunkIndex += batch.length
128
- done += batch.length
129
- onProgress?.(done, chunks.length)
130
148
  }
149
+
150
+ flushEmbedCache()
131
151
  }
132
152
 
133
153
  return entries
package/src/labels.ts CHANGED
@@ -106,7 +106,8 @@ async function chatComplete(
106
106
  })
107
107
 
108
108
  if (!resp.ok) {
109
- lastError = new Error(`Copilot API error: HTTP ${resp.status} ${resp.statusText}`)
109
+ const errBody = await resp.text().catch(() => "")
110
+ lastError = new Error(`Copilot API error: HTTP ${resp.status} ${resp.statusText} — ${errBody}`)
110
111
  // Retry on 5xx
111
112
  if (resp.status >= 500) {
112
113
  await Bun.sleep(1000 * (attempt + 1))
package/src/main.ts CHANGED
@@ -15,6 +15,7 @@
15
15
 
16
16
  import { Command } from "commander"
17
17
  import path from "node:path"
18
+ import { createHash } from "node:crypto"
18
19
 
19
20
  import { discoverFiles, DEFAULT_EXCLUDES, type FsOptions } from "./fs.ts"
20
21
  import { chunkFile } from "./tokenize.ts"
@@ -24,22 +25,24 @@ import { buildTree } from "./tree.ts"
24
25
  import { clearAuthCache, getCopilotToken } from "./auth.ts"
25
26
  import { SemanticNavigatorUI, type ProgressState } from "./ui.ts"
26
27
  import type { CopilotConfig } from "./labels.ts"
28
+ import { treeFingerprint, getCachedTree, setCachedTree } from "./cache.ts"
27
29
 
28
30
  // ---------------------------------------------------------------------------
29
31
  // CLI definition
30
32
  // ---------------------------------------------------------------------------
31
33
 
32
34
  const program = new Command()
33
- .name("semantic-navigator")
35
+ .name("ovid")
34
36
  .description("Browse a repository's files by semantic meaning")
35
37
  .argument("[directory]", "Directory to analyse (default: current working directory)", ".")
36
- .option("--completion-model <model>", "Copilot model to use for labelling", "gpt-4o-mini")
38
+ .option("--completion-model <model>", "Copilot model to use for labelling", "gpt-5-mini")
37
39
  .option("--max-files <n>", "Maximum number of files to index", (v) => parseInt(v, 10), 2000)
38
40
  .option("--max-file-bytes <n>", "Skip files larger than this many bytes", (v) => parseInt(v, 10), 1_000_000)
39
41
  .option("--exclude-glob <pattern...>", "Glob patterns to exclude (repeatable)", DEFAULT_EXCLUDES)
40
42
  .option("--read-concurrency <n>", "Concurrent file reads", (v) => parseInt(v, 10), 64)
41
43
  .option("--embed-batch-size <n>", "Chunks per embedding batch", (v) => parseInt(v, 10), 32)
42
44
  .option("--embed-concurrency <n>", "Concurrent embedding batches", (v) => parseInt(v, 10), 2)
45
+ .option("--no-cache", "Skip reading from cache; force re-embed and re-label")
43
46
  .option("--logout", "Clear cached GitHub / Copilot credentials and exit")
44
47
  .helpOption("-h, --help", "Show help")
45
48
 
@@ -60,6 +63,7 @@ async function main(): Promise<void> {
60
63
  readConcurrency: number
61
64
  embedBatchSize: number
62
65
  embedConcurrency: number
66
+ cache: boolean
63
67
  logout: boolean | undefined
64
68
  }>()
65
69
 
@@ -167,6 +171,7 @@ async function main(): Promise<void> {
167
171
  model: DEFAULT_EMBEDDING_MODEL,
168
172
  batchSize: opts.embedBatchSize,
169
173
  concurrency: opts.embedConcurrency,
174
+ noCache: !opts.cache,
170
175
  }
171
176
 
172
177
  let embedEntriesRaw: EmbedEntry[] | undefined
@@ -184,7 +189,32 @@ async function main(): Promise<void> {
184
189
  }
185
190
 
186
191
  // ---------------------------------------------------------------------------
187
- // Step 5: Spectral clustering (CPU-bound, synchronous)
192
+ // Step 5: Compute tree fingerprint and check tree cache
193
+ // ---------------------------------------------------------------------------
194
+
195
+ // Build a map of relativePath → sha256(content) for all discovered files.
196
+ const fileHashes = new Map<string, string>()
197
+ for (const f of resolvedFiles) {
198
+ fileHashes.set(
199
+ f.relativePath,
200
+ createHash("sha256").update(f.content).digest("hex").slice(0, 16)
201
+ )
202
+ }
203
+
204
+ const fingerprint = treeFingerprint(opts.completionModel, fileHashes)
205
+ const noCache = !opts.cache
206
+
207
+ if (!noCache) {
208
+ const cached = getCachedTree(fingerprint)
209
+ if (cached !== null) {
210
+ ui.setTree(cached)
211
+ // The UI event loop keeps the process alive until the user presses q/Esc.
212
+ return
213
+ }
214
+ }
215
+
216
+ // ---------------------------------------------------------------------------
217
+ // Step 6: Spectral clustering (CPU-bound, synchronous)
188
218
  // ---------------------------------------------------------------------------
189
219
 
190
220
  ui.updateProgress({
@@ -201,7 +231,7 @@ async function main(): Promise<void> {
201
231
  await Bun.sleep(0)
202
232
 
203
233
  // ---------------------------------------------------------------------------
204
- // Step 6: Build labelled tree
234
+ // Step 7: Build labelled tree
205
235
  // ---------------------------------------------------------------------------
206
236
 
207
237
  ui.updateProgress({
@@ -223,8 +253,11 @@ async function main(): Promise<void> {
223
253
  }
224
254
  const tree = treeRaw!
225
255
 
256
+ // Persist to tree cache for future runs
257
+ setCachedTree(fingerprint, tree)
258
+
226
259
  // ---------------------------------------------------------------------------
227
- // Step 7: Hand the tree to the UI
260
+ // Step 8: Hand the tree to the UI
228
261
  // ---------------------------------------------------------------------------
229
262
 
230
263
  ui.setTree(tree)
package/src/tree.ts CHANGED
@@ -93,25 +93,30 @@ export async function labelNodes(
93
93
  const children = splitCluster(cluster)
94
94
 
95
95
  if (children.length === 1) {
96
- // Leaf cluster: label each file individually
96
+ // Leaf cluster: label each file individually, in batches of 5 to stay
97
+ // within the model's prompt token limit.
97
98
  const entries = cluster.entries
98
- let labels = await labelFiles(config, entries)
99
-
100
- // Guard: align label count with entry count (Copilot may return fewer)
101
- if (labels.length < entries.length) {
102
- const missing = entries.length - labels.length
103
- labels = [
104
- ...labels,
105
- ...Array.from({ length: missing }, () => ({
106
- overarchingTheme: "",
107
- distinguishingFeature: "",
108
- label: "unlabelled",
109
- })),
110
- ]
99
+ const BATCH = 5
100
+ const allLabels: import("./labels.ts").Label[] = []
101
+ for (let i = 0; i < entries.length; i += BATCH) {
102
+ const batch = entries.slice(i, i + BATCH)
103
+ let batchLabels = await labelFiles(config, batch)
104
+ if (batchLabels.length < batch.length) {
105
+ const missing = batch.length - batchLabels.length
106
+ batchLabels = [
107
+ ...batchLabels,
108
+ ...Array.from({ length: missing }, () => ({
109
+ overarchingTheme: "",
110
+ distinguishingFeature: "",
111
+ label: "unlabelled",
112
+ })),
113
+ ]
114
+ }
115
+ allLabels.push(...batchLabels)
111
116
  }
112
117
 
113
118
  return entries.map((entry, i) => ({
114
- label: `${entry.path}: ${labels[i]!.label}`,
119
+ label: `${entry.path}: ${allLabels[i]!.label}`,
115
120
  files: [entry.path],
116
121
  children: [],
117
122
  }))
package/src/ui.ts CHANGED
@@ -42,6 +42,8 @@ const THEME = {
42
42
  statusColor: "#bb9af7",
43
43
  }
44
44
 
45
+ const SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
46
+
45
47
  // ---------------------------------------------------------------------------
46
48
  // Flat node model for scroll-based rendering
47
49
  // ---------------------------------------------------------------------------
@@ -79,7 +81,7 @@ function nodeKey(tree: Tree, depth: number): string {
79
81
  // ---------------------------------------------------------------------------
80
82
 
81
83
  export interface ProgressState {
82
- phase: "reading" | "embedding" | "clustering" | "labelling" | "done"
84
+ phase: "reading" | "embedding" | "clustering" | "labelling" | "working" | "done"
83
85
  done: number
84
86
  total: number
85
87
  message?: string
@@ -104,6 +106,8 @@ export class SemanticNavigatorUI {
104
106
 
105
107
  // Progress state (shown before tree is ready)
106
108
  private progress: ProgressState = { phase: "reading", done: 0, total: 0 }
109
+ private spinnerTimer: ReturnType<typeof setInterval> | null = null
110
+ private spinnerIndex = 0
107
111
 
108
112
  // Row renderables (reused by rebuildRows)
109
113
  private rowRenderables: TextRenderable[] = []
@@ -144,7 +148,7 @@ export class SemanticNavigatorUI {
144
148
  })
145
149
  this.headerText = new TextRenderable(this.renderer, {
146
150
  id: "header-text",
147
- content: " semantic-navigator ",
151
+ content: " ovid ",
148
152
  fg: THEME.statusColor,
149
153
  })
150
154
  headerBox.add(this.headerText)
@@ -277,6 +281,11 @@ export class SemanticNavigatorUI {
277
281
 
278
282
  updateProgress(state: ProgressState): void {
279
283
  this.progress = state
284
+ if (state.phase === "working") {
285
+ this.startSpinner()
286
+ } else {
287
+ this.stopSpinner()
288
+ }
280
289
  this.renderProgress()
281
290
  }
282
291
 
@@ -290,6 +299,7 @@ export class SemanticNavigatorUI {
290
299
  case "embedding": phaseLabel = "Embedding"; break
291
300
  case "clustering": phaseLabel = "Clustering"; break
292
301
  case "labelling": phaseLabel = "Labelling"; break
302
+ case "working": phaseLabel = "Working"; break
293
303
  default: phaseLabel = "Done"; break
294
304
  }
295
305
 
@@ -297,14 +307,17 @@ export class SemanticNavigatorUI {
297
307
  ? `${phaseLabel}: ${done}/${total} (${pct}%)`
298
308
  : message ?? phaseLabel
299
309
 
300
- this.headerText.content = ` semantic-navigator ${statusLine}`
310
+ this.headerText.content = ` ovid ${statusLine}`
301
311
 
302
312
  // Clear rows and show a single status line
303
313
  this.clearRows()
304
314
  const id = "status-line"
315
+ const spinner = phase === "working"
316
+ ? `${SPINNER_FRAMES[this.spinnerIndex % SPINNER_FRAMES.length] ?? "•"} `
317
+ : ""
305
318
  const statusText = new TextRenderable(this.renderer, {
306
319
  id,
307
- content: ` ${statusLine}…`,
320
+ content: ` ${spinner}${statusLine}…`,
308
321
  fg: THEME.statusColor,
309
322
  })
310
323
  this.scrollBox.add(statusText)
@@ -312,11 +325,28 @@ export class SemanticNavigatorUI {
312
325
  this.rowIds = [id]
313
326
  }
314
327
 
328
+ private startSpinner(): void {
329
+ if (this.spinnerTimer !== null) return
330
+ this.spinnerTimer = setInterval(() => {
331
+ if (this.progress.phase !== "working") return
332
+ this.spinnerIndex = (this.spinnerIndex + 1) % SPINNER_FRAMES.length
333
+ this.renderProgress()
334
+ }, 80)
335
+ }
336
+
337
+ private stopSpinner(): void {
338
+ if (this.spinnerTimer === null) return
339
+ clearInterval(this.spinnerTimer)
340
+ this.spinnerTimer = null
341
+ this.spinnerIndex = 0
342
+ }
343
+
315
344
  // ---------------------------------------------------------------------------
316
345
  // Tree display (after tree is ready)
317
346
  // ---------------------------------------------------------------------------
318
347
 
319
348
  setTree(tree: Tree): void {
349
+ this.stopSpinner()
320
350
  this.tree = tree
321
351
 
322
352
  // Auto-expand root and its direct children
@@ -326,7 +356,7 @@ export class SemanticNavigatorUI {
326
356
  this.renderRows()
327
357
 
328
358
  this.headerText.content =
329
- ` semantic-navigator ${tree.label} (${tree.files.length} files)`
359
+ ` ovid ${tree.label} (${tree.files.length} files)`
330
360
  }
331
361
 
332
362
  private rebuildFlatList(): void {
@@ -455,6 +485,7 @@ export class SemanticNavigatorUI {
455
485
  // ---------------------------------------------------------------------------
456
486
 
457
487
  destroy(): void {
488
+ this.stopSpinner()
458
489
  this.renderer.destroy()
459
490
  }
460
491
  }