@comfanion/usethis_search 4.1.0-dev.3 → 4.2.0-dev.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cache/manager.ts CHANGED
@@ -1,21 +1,29 @@
1
1
  /**
2
2
  * Workspace Cache Manager
3
3
  *
4
- * In-memory cache of files attached to the AI's workspace context.
5
- * search() attaches top results + graph relations here.
6
- * message.before hook injects cached content into every LLM request.
4
+ * In-memory cache of chunks attached to the AI's workspace context.
5
+ * search() attaches top result chunks + graph relation chunks here.
6
+ * message.before hook injects cached chunks into every LLM request.
7
+ *
8
+ * Chunk-based architecture:
9
+ * - Each file can be split into multiple chunks (e.g. "src/auth.ts:chunk-5")
10
+ * - Chunks are keyed by chunkId (not path)
11
+ * - getChunksByPath(path) groups chunks by file
12
+ * - freshen() marks chunks as needing refresh if file modified
7
13
  *
8
14
  * Persistence:
9
15
  * .opencode/.workspace/{sessionId}.json — per-session workspace state
10
16
  * .opencode/.workspace/latest.json — fallback (current session)
11
17
  *
12
18
  * Only metadata + paths are saved. Content is re-read from disk on restore.
19
+ * Note: Chunks cannot be re-read from disk (chunk boundaries not stored).
20
+ * Restore uses saved content if available, otherwise skips chunk.
13
21
  *
14
22
  * Lifecycle:
15
- * search("auth") -> attach top N main files + graph relations
16
- * message.before -> inject all cached files into user message
23
+ * search("auth") -> attach top N chunks (from multiple files)
24
+ * message.before -> inject all cached chunks into user message
17
25
  * workspace.detach / workspace.clear -> manual cleanup
18
- * ensureBudget() -> auto-evict oldest low-score files when over limit
26
+ * ensureBudget() -> auto-evict oldest low-score chunks when over limit
19
27
  * save() / restore() -> persist to / load from .opencode/.workspace/
20
28
  */
21
29
 
@@ -26,15 +34,19 @@ import crypto from "crypto"
26
34
  // ── Types ───────────────────────────────────────────────────────────────────
27
35
 
28
36
  export interface WorkspaceEntry {
37
+ /** Unique chunk identifier (e.g. "src/auth/login.ts:chunk-5") */
38
+ chunkId: string
29
39
  /** Relative file path (e.g. "src/auth/login.ts") */
30
40
  path: string
31
- /** Full file content (loaded in memory, NOT persisted to JSON) */
41
+ /** Chunk content (NOT full file just this chunk) */
32
42
  content: string
33
- /** Estimated token count (~content.length / 4) */
43
+ /** Estimated token count for this chunk (~content.length / 4) */
34
44
  tokens: number
35
- /** MD5 hash of content — used by freshen() to detect disk changes */
45
+ /** Chunk index within file (0-based) */
46
+ chunkIndex: number
47
+ /** MD5 hash of chunk content — used by freshen() to detect changes */
36
48
  contentHash: string
37
- /** How this file got into workspace */
49
+ /** How this chunk got into workspace */
38
50
  role: "search-main" | "search-graph" | "manual"
39
51
  /** Timestamp when attached */
40
52
  attachedAt: number
@@ -42,7 +54,7 @@ export interface WorkspaceEntry {
42
54
  attachedBy: string
43
55
  /** Search relevance score (0-1) */
44
56
  score?: number
45
- /** File metadata from search results */
57
+ /** Chunk metadata from search results */
46
58
  metadata?: {
47
59
  language?: string
48
60
  function_name?: string
@@ -50,15 +62,20 @@ export interface WorkspaceEntry {
50
62
  heading_context?: string
51
63
  /** Graph relation type (for role=search-graph) */
52
64
  relation?: string
53
- /** Which main file pulled this graph file in */
54
- mainFile?: string
65
+ /** Which main chunk pulled this graph chunk in */
66
+ mainChunkId?: string
67
+ /** Start/end line numbers in original file */
68
+ startLine?: number
69
+ endLine?: number
55
70
  }
56
71
  }
57
72
 
58
73
  /** Serialized entry — content included only when persistContent=true */
59
74
  interface PersistedEntry {
75
+ chunkId: string
60
76
  path: string
61
77
  tokens: number
78
+ chunkIndex: number
62
79
  role: WorkspaceEntry["role"]
63
80
  attachedAt: number
64
81
  attachedBy: string
@@ -66,6 +83,8 @@ interface PersistedEntry {
66
83
  metadata?: WorkspaceEntry["metadata"]
67
84
  /** Only present when config.persistContent=true (debug mode) */
68
85
  content?: string
86
+ /** MD5 hash of chunk content (for freshen detection) */
87
+ contentHash?: string
69
88
  }
70
89
 
71
90
  /** On-disk workspace snapshot */
@@ -73,28 +92,30 @@ interface WorkspaceSnapshot {
73
92
  sessionId: string
74
93
  savedAt: string
75
94
  totalTokens: number
76
- fileCount: number
95
+ chunkCount: number
77
96
  /** true if content is included in entries */
78
97
  debug: boolean
79
98
  entries: PersistedEntry[]
99
+ /** Deprecated: old fileCount field (for backward compatibility) */
100
+ fileCount?: number
80
101
  }
81
102
 
82
103
  export interface WorkspaceConfig {
83
- /** Max total tokens across all files (default: 50000) */
104
+ /** Max total tokens across all chunks (default: 50000) */
84
105
  maxTokens: number
85
- /** Max number of files (default: 30) */
86
- maxFiles: number
87
- /** How many top search results to attach (default: 5) */
106
+ /** Max number of chunks (default: 100) */
107
+ maxChunks: number
108
+ /** How many top search chunks to attach (default: 10) */
88
109
  attachTopN: number
89
- /** Max graph relations per main file (default: 3) */
90
- attachRelatedPerFile: number
91
- /** Min score for main files to be attached (default: 0.65) */
110
+ /** Max graph relation chunks per main chunk (default: 3) */
111
+ attachRelatedPerChunk: number
112
+ /** Min score for main chunks to be attached (default: 0.65) */
92
113
  minScoreMain: number
93
- /** Min score for graph relations to be attached (default: 0.5) */
114
+ /** Min score for graph relation chunks to be attached (default: 0.5) */
94
115
  minScoreRelated: number
95
116
  /**
96
- * Save full file content in workspace snapshots (default: false).
97
- * When true: .workspace/{sessionId}.json includes content for every file.
117
+ * Save full chunk content in workspace snapshots (default: false).
118
+ * When true: .workspace/{sessionId}.json includes content for every chunk.
98
119
  * Useful for debugging — see exactly what the AI saw.
99
120
  * Warning: snapshots can be 10-50MB+ with content enabled.
100
121
  */
@@ -102,23 +123,23 @@ export interface WorkspaceConfig {
102
123
  /**
103
124
  * Auto-prune old search tool outputs from chat history (default: true).
104
125
  * When true: message.before hook replaces old search outputs with 1-line summaries.
105
- * Files are already in workspace injection — keeping big outputs wastes tokens.
126
+ * Chunks are already in workspace injection — keeping big outputs wastes tokens.
106
127
  */
107
128
  autoPruneSearch: boolean
108
129
  /**
109
- * Substitute tool outputs when files are in workspace (default: true).
110
- * When true: read(), grep(), glob() outputs are replaced with compact messages
111
- * if all matched files are in workspace cache.
112
- * Saves tokens since files are already visible in workspace injection.
130
+ * Substitute tool outputs when chunks are in workspace (default: true).
131
+ * When true: read() outputs are replaced with compact messages
132
+ * if file has chunks in workspace cache.
133
+ * Saves tokens since chunks are already visible in workspace injection.
113
134
  */
114
135
  substituteToolOutputs: boolean
115
136
  }
116
137
 
117
138
  export const DEFAULT_WORKSPACE_CONFIG: WorkspaceConfig = {
118
139
  maxTokens: 50_000,
119
- maxFiles: 30,
120
- attachTopN: 5,
121
- attachRelatedPerFile: 3,
140
+ maxChunks: 100,
141
+ attachTopN: 10,
142
+ attachRelatedPerChunk: 3,
122
143
  minScoreMain: 0.65,
123
144
  minScoreRelated: 0.5,
124
145
  persistContent: false,
@@ -129,6 +150,7 @@ export const DEFAULT_WORKSPACE_CONFIG: WorkspaceConfig = {
129
150
  // ── Implementation ──────────────────────────────────────────────────────────
130
151
 
131
152
  class WorkspaceCache {
153
+ /** Map of chunks keyed by chunkId (e.g. "src/auth.ts:chunk-5") */
132
154
  private entries = new Map<string, WorkspaceEntry>()
133
155
  private _totalTokens = 0
134
156
  private config: WorkspaceConfig
@@ -165,31 +187,31 @@ class WorkspaceCache {
165
187
  * Safe to call multiple times — second call with a different sessionId
166
188
  * will save current state, then restore for the new session.
167
189
  */
168
- async init(projectRoot: string, sessionId?: string): Promise<void> {
169
- // If re-initializing with a new session — flush current state first
170
- if (this.projectRoot && this.sessionId && sessionId && sessionId !== this.sessionId) {
171
- await this.save()
172
- }
173
-
174
- this.projectRoot = projectRoot
175
- this.sessionId = sessionId || `session-${Date.now()}`
176
-
177
- // Try to restore existing workspace for this session
178
- const restored = await this.restore(this.sessionId)
179
- if (restored) {
180
- console.log(`[workspace] Restored session ${this.sessionId} (${this.entries.size} files, ${this._totalTokens} tokens)`)
181
- } else if (!sessionId) {
182
- // Only fall back to latest.json when no specific session requested
183
- // (avoids clobbering a fresh session with stale data)
184
- const restoredLatest = await this.restore("latest")
185
- if (restoredLatest) {
186
- console.log(`[workspace] Restored from latest (${this.entries.size} files, ${this._totalTokens} tokens)`)
187
- }
188
- }
189
-
190
- // Register process exit handler (once)
191
- this.registerShutdownHook()
192
- }
190
+ async init(projectRoot: string, sessionId?: string): Promise<void> {
191
+ // If re-initializing with a new session — flush current state first
192
+ if (this.projectRoot && this.sessionId && sessionId && sessionId !== this.sessionId) {
193
+ await this.save()
194
+ }
195
+
196
+ this.projectRoot = projectRoot
197
+ this.sessionId = sessionId || `session-${Date.now()}`
198
+
199
+ // Try to restore existing workspace for this session
200
+ const restored = await this.restore(this.sessionId)
201
+ if (restored) {
202
+ console.log(`[workspace] Restored session ${this.sessionId} (${this.entries.size} chunks, ${this._totalTokens} tokens)`)
203
+ } else if (!sessionId) {
204
+ // Only fall back to latest.json when no specific session requested
205
+ // (avoids clobbering a fresh session with stale data)
206
+ const restoredLatest = await this.restore("latest")
207
+ if (restoredLatest) {
208
+ console.log(`[workspace] Restored from latest (${this.entries.size} chunks, ${this._totalTokens} tokens)`)
209
+ }
210
+ }
211
+
212
+ // Register process exit handler (once)
213
+ this.registerShutdownHook()
214
+ }
193
215
 
194
216
  /** Whether shutdown hook is registered */
195
217
  private _shutdownRegistered = false
@@ -202,48 +224,51 @@ class WorkspaceCache {
202
224
  if (this._shutdownRegistered) return
203
225
  this._shutdownRegistered = true
204
226
 
205
- const flush = () => {
206
- // Synchronous-ish flush — best effort
207
- // Node process is exiting, so we can't await.
208
- // Use writeFileSync as last resort.
209
- try {
210
- const dir = this.getWorkspaceDir()
211
- if (!dir || this.entries.size === 0) return
212
-
213
- const id = this.sessionId || "latest"
214
- const includeContent = this.config.persistContent
215
-
216
- const snapshot = {
217
- sessionId: id,
218
- savedAt: new Date().toISOString(),
219
- totalTokens: this._totalTokens,
220
- fileCount: this.entries.size,
221
- debug: includeContent,
222
- entries: Array.from(this.entries.values()).map(e => {
223
- const p: any = {
224
- path: e.path,
225
- tokens: e.tokens,
226
- role: e.role,
227
- attachedAt: e.attachedAt,
228
- attachedBy: e.attachedBy,
229
- score: e.score,
230
- metadata: e.metadata,
231
- }
232
- if (includeContent) p.content = e.content
233
- return p
234
- }),
235
- }
236
-
237
- const data = JSON.stringify(snapshot, null, 2)
238
- const fsSync = require("fs")
239
-
240
- fsSync.mkdirSync(dir, { recursive: true })
241
- fsSync.writeFileSync(path.join(dir, `${id}.json`), data, "utf-8")
242
- fsSync.writeFileSync(path.join(dir, "latest.json"), data, "utf-8")
243
- } catch {
244
- // Best effort — process is dying
245
- }
246
- }
227
+ const flush = () => {
228
+ // Synchronous-ish flush — best effort
229
+ // Node process is exiting, so we can't await.
230
+ // Use writeFileSync as last resort.
231
+ try {
232
+ const dir = this.getWorkspaceDir()
233
+ if (!dir || this.entries.size === 0) return
234
+
235
+ const id = this.sessionId || "latest"
236
+ const includeContent = this.config.persistContent
237
+
238
+ const snapshot = {
239
+ sessionId: id,
240
+ savedAt: new Date().toISOString(),
241
+ totalTokens: this._totalTokens,
242
+ chunkCount: this.entries.size,
243
+ debug: includeContent,
244
+ entries: Array.from(this.entries.values()).map(e => {
245
+ const p: any = {
246
+ chunkId: e.chunkId,
247
+ path: e.path,
248
+ tokens: e.tokens,
249
+ chunkIndex: e.chunkIndex,
250
+ role: e.role,
251
+ attachedAt: e.attachedAt,
252
+ attachedBy: e.attachedBy,
253
+ score: e.score,
254
+ metadata: e.metadata,
255
+ contentHash: e.contentHash,
256
+ }
257
+ if (includeContent) p.content = e.content
258
+ return p
259
+ }),
260
+ }
261
+
262
+ const data = JSON.stringify(snapshot, null, 2)
263
+ const fsSync = require("fs")
264
+
265
+ fsSync.mkdirSync(dir, { recursive: true })
266
+ fsSync.writeFileSync(path.join(dir, `${id}.json`), data, "utf-8")
267
+ fsSync.writeFileSync(path.join(dir, "latest.json"), data, "utf-8")
268
+ } catch {
269
+ // Best effort — process is dying
270
+ }
271
+ }
247
272
 
248
273
  process.on("exit", flush)
249
274
  process.on("SIGINT", () => { flush(); process.exit(0) })
@@ -268,165 +293,162 @@ class WorkspaceCache {
268
293
  }
269
294
  }
270
295
 
271
- /**
272
- * Save workspace metadata to disk immediately.
273
- */
274
- async save(snapshotId?: string): Promise<void> {
275
- const dir = this.getWorkspaceDir()
276
- if (!dir) return
277
-
278
- const id = snapshotId || this.sessionId || "latest"
279
-
280
- const includeContent = this.config.persistContent
281
-
282
- const snapshot: WorkspaceSnapshot = {
283
- sessionId: id,
284
- savedAt: new Date().toISOString(),
285
- totalTokens: this._totalTokens,
286
- fileCount: this.entries.size,
287
- debug: includeContent,
288
- entries: Array.from(this.entries.values()).map(e => {
289
- const persisted: PersistedEntry = {
290
- path: e.path,
291
- tokens: e.tokens,
292
- role: e.role,
293
- attachedAt: e.attachedAt,
294
- attachedBy: e.attachedBy,
295
- score: e.score,
296
- metadata: e.metadata,
297
- }
298
- if (includeContent) {
299
- persisted.content = e.content
300
- }
301
- return persisted
302
- }),
303
- }
304
-
305
- try {
306
- await fs.mkdir(dir, { recursive: true })
307
-
308
- // Save session-specific file
309
- const sessionFile = path.join(dir, `${id}.json`)
310
- await fs.writeFile(sessionFile, JSON.stringify(snapshot, null, 2), "utf-8")
311
-
312
- // Also save as latest.json (always keep current state accessible)
313
- const latestFile = path.join(dir, "latest.json")
314
- await fs.writeFile(latestFile, JSON.stringify(snapshot, null, 2), "utf-8")
315
- } catch (error) {
316
- console.error("[workspace] Failed to save:", error)
317
- }
318
- }
319
-
320
- /**
321
- * Restore workspace from disk.
322
- * Re-reads file content from project files (NOT from snapshot).
323
- * Files that no longer exist on disk are skipped.
324
- *
325
- * Returns true if snapshot was found and loaded.
326
- */
327
- async restore(snapshotId?: string): Promise<boolean> {
328
- const dir = this.getWorkspaceDir()
329
- if (!dir || !this.projectRoot) return false
330
-
331
- const id = snapshotId || this.sessionId || "latest"
332
- const snapshotFile = path.join(dir, `${id}.json`)
333
-
334
- let snapshot: WorkspaceSnapshot
335
- try {
336
- const raw = await fs.readFile(snapshotFile, "utf-8")
337
- snapshot = JSON.parse(raw)
338
- } catch {
339
- return false // No snapshot found
340
- }
341
-
342
- // Clear current state
343
- this.entries.clear()
344
- this._totalTokens = 0
345
-
346
- // Re-read file content from disk (or use saved content in debug mode)
347
- let loaded = 0
348
- let skipped = 0
349
-
350
- for (const entry of snapshot.entries) {
351
- let content: string | null = null
352
-
353
- // If snapshot has content (debug mode) — use it directly
354
- if (entry.content) {
355
- content = entry.content
356
- } else {
357
- // Otherwise re-read from disk
358
- try {
359
- const fullPath = path.join(this.projectRoot, entry.path)
360
- content = await fs.readFile(fullPath, "utf-8")
361
- } catch {
362
- // File no longer exists or unreadable
363
- }
364
- }
365
-
366
- if (content === null) {
367
- skipped++
368
- continue
369
- }
370
-
371
- const tokens = this.countTokens(content)
372
-
373
- this.entries.set(entry.path, {
374
- path: entry.path,
375
- content,
376
- tokens,
377
- contentHash: this.hashContent(content),
378
- role: entry.role,
379
- attachedAt: entry.attachedAt,
380
- attachedBy: entry.attachedBy,
381
- score: entry.score,
382
- metadata: entry.metadata,
383
- })
384
- this._totalTokens += tokens
385
- loaded++
386
- }
387
-
388
- if (skipped > 0) {
389
- console.log(`[workspace] Restore: ${loaded} loaded, ${skipped} skipped (files missing)`)
390
- }
391
-
392
- return loaded > 0
393
- }
394
-
395
- /**
396
- * List all saved workspace snapshots.
397
- */
398
- async listSnapshots(): Promise<{ id: string; savedAt: string; fileCount: number; totalTokens: number }[]> {
399
- const dir = this.getWorkspaceDir()
400
- if (!dir) return []
401
-
402
- try {
403
- const files = await fs.readdir(dir)
404
- const snapshots: { id: string; savedAt: string; fileCount: number; totalTokens: number }[] = []
405
-
406
- for (const file of files) {
407
- if (!file.endsWith(".json") || file === "latest.json") continue
408
-
409
- try {
410
- const raw = await fs.readFile(path.join(dir, file), "utf-8")
411
- const snap: WorkspaceSnapshot = JSON.parse(raw)
412
- snapshots.push({
413
- id: snap.sessionId,
414
- savedAt: snap.savedAt,
415
- fileCount: snap.fileCount,
416
- totalTokens: snap.totalTokens,
417
- })
418
- } catch {
419
- // Corrupt file — skip
420
- }
421
- }
422
-
423
- // Sort by savedAt descending
424
- snapshots.sort((a, b) => b.savedAt.localeCompare(a.savedAt))
425
- return snapshots
426
- } catch {
427
- return []
428
- }
429
- }
296
+ /**
297
+ * Save workspace metadata to disk immediately.
298
+ * Chunks are keyed by chunkId in the snapshot.
299
+ */
300
+ async save(snapshotId?: string): Promise<void> {
301
+ const dir = this.getWorkspaceDir()
302
+ if (!dir) return
303
+
304
+ const id = snapshotId || this.sessionId || "latest"
305
+
306
+ const includeContent = this.config.persistContent
307
+
308
+ const snapshot: WorkspaceSnapshot = {
309
+ sessionId: id,
310
+ savedAt: new Date().toISOString(),
311
+ totalTokens: this._totalTokens,
312
+ chunkCount: this.entries.size,
313
+ debug: includeContent,
314
+ entries: Array.from(this.entries.values()).map(e => {
315
+ const persisted: PersistedEntry = {
316
+ chunkId: e.chunkId,
317
+ path: e.path,
318
+ tokens: e.tokens,
319
+ chunkIndex: e.chunkIndex,
320
+ role: e.role,
321
+ attachedAt: e.attachedAt,
322
+ attachedBy: e.attachedBy,
323
+ score: e.score,
324
+ metadata: e.metadata,
325
+ contentHash: e.contentHash,
326
+ }
327
+ if (includeContent) {
328
+ persisted.content = e.content
329
+ }
330
+ return persisted
331
+ }),
332
+ }
333
+
334
+ try {
335
+ await fs.mkdir(dir, { recursive: true })
336
+
337
+ // Save session-specific file
338
+ const sessionFile = path.join(dir, `${id}.json`)
339
+ await fs.writeFile(sessionFile, JSON.stringify(snapshot, null, 2), "utf-8")
340
+
341
+ // Also save as latest.json (always keep current state accessible)
342
+ const latestFile = path.join(dir, "latest.json")
343
+ await fs.writeFile(latestFile, JSON.stringify(snapshot, null, 2), "utf-8")
344
+ } catch (error) {
345
+ console.error("[workspace] Failed to save:", error)
346
+ }
347
+ }
348
+
349
+ /**
350
+ * Restore workspace from disk.
351
+ * Chunks are keyed by chunkId.
352
+ *
353
+ * For chunks: uses saved content if available (debug mode).
354
+ * Cannot re-read chunks from disk since chunk boundaries aren't stored.
355
+ * Chunks without saved content are skipped.
356
+ *
357
+ * Returns true if snapshot was found and loaded.
358
+ */
359
+ async restore(snapshotId?: string): Promise<boolean> {
360
+ const dir = this.getWorkspaceDir()
361
+ if (!dir || !this.projectRoot) return false
362
+
363
+ const id = snapshotId || this.sessionId || "latest"
364
+ const snapshotFile = path.join(dir, `${id}.json`)
365
+
366
+ let snapshot: WorkspaceSnapshot
367
+ try {
368
+ const raw = await fs.readFile(snapshotFile, "utf-8")
369
+ snapshot = JSON.parse(raw)
370
+ } catch {
371
+ return false // No snapshot found
372
+ }
373
+
374
+ // Clear current state
375
+ this.entries.clear()
376
+ this._totalTokens = 0
377
+
378
+ // Restore chunks from snapshot
379
+ // Note: Chunks can only be restored if content was saved (debug mode)
380
+ let loaded = 0
381
+ let skipped = 0
382
+
383
+ for (const entry of snapshot.entries) {
384
+ // Chunks require saved content — cannot re-read from disk
385
+ if (!entry.content) {
386
+ skipped++
387
+ continue
388
+ }
389
+
390
+ const content = entry.content
391
+ const tokens = this.countTokens(content)
392
+
393
+ this.entries.set(entry.chunkId, {
394
+ chunkId: entry.chunkId,
395
+ path: entry.path,
396
+ content,
397
+ tokens,
398
+ chunkIndex: entry.chunkIndex,
399
+ contentHash: entry.contentHash || this.hashContent(content),
400
+ role: entry.role,
401
+ attachedAt: entry.attachedAt,
402
+ attachedBy: entry.attachedBy,
403
+ score: entry.score,
404
+ metadata: entry.metadata,
405
+ })
406
+ this._totalTokens += tokens
407
+ loaded++
408
+ }
409
+
410
+ if (skipped > 0) {
411
+ console.log(`[workspace] Restore: ${loaded} chunks loaded, ${skipped} skipped (no saved content)`)
412
+ }
413
+
414
+ return loaded > 0
415
+ }
416
+
417
+ /**
418
+ * List all saved workspace snapshots.
419
+ */
420
+ async listSnapshots(): Promise<{ id: string; savedAt: string; chunkCount: number; totalTokens: number }[]> {
421
+ const dir = this.getWorkspaceDir()
422
+ if (!dir) return []
423
+
424
+ try {
425
+ const files = await fs.readdir(dir)
426
+ const snapshots: { id: string; savedAt: string; chunkCount: number; totalTokens: number }[] = []
427
+
428
+ for (const file of files) {
429
+ if (!file.endsWith(".json") || file === "latest.json") continue
430
+
431
+ try {
432
+ const raw = await fs.readFile(path.join(dir, file), "utf-8")
433
+ const snap: WorkspaceSnapshot = JSON.parse(raw)
434
+ snapshots.push({
435
+ id: snap.sessionId,
436
+ savedAt: snap.savedAt,
437
+ chunkCount: snap.chunkCount || snap.fileCount || 0,
438
+ totalTokens: snap.totalTokens,
439
+ })
440
+ } catch {
441
+ // Corrupt file — skip
442
+ }
443
+ }
444
+
445
+ // Sort by savedAt descending
446
+ snapshots.sort((a, b) => b.savedAt.localeCompare(a.savedAt))
447
+ return snapshots
448
+ } catch {
449
+ return []
450
+ }
451
+ }
430
452
 
431
453
  /**
432
454
  * Delete a saved snapshot.
@@ -443,145 +465,170 @@ class WorkspaceCache {
443
465
  }
444
466
  }
445
467
 
446
- // ── Core operations ─────────────────────────────────────────────────────
447
-
448
- /**
449
- * Attach a file to workspace.
450
- * If file already exists, updates it (replaces content/score).
451
- * Auto-saves to disk (debounced).
452
- */
453
- attach(entry: Omit<WorkspaceEntry, "tokens" | "contentHash">): void {
454
- const tokens = this.countTokens(entry.content)
455
- const contentHash = this.hashContent(entry.content)
456
-
457
- // Update existing — subtract old tokens first
458
- const existing = this.entries.get(entry.path)
459
- if (existing) {
460
- this._totalTokens -= existing.tokens
461
- }
462
-
463
- this.entries.set(entry.path, { ...entry, tokens, contentHash })
464
- this._totalTokens += tokens
465
-
466
- // Auto-evict if over budget
467
- this.ensureBudget()
468
-
469
- // Auto-save (debounced)
470
- this.scheduleSave()
471
- }
472
-
473
- /**
474
- * Remove a single file from workspace.
475
- * Returns true if file was found and removed.
476
- */
477
- detach(filePath: string): boolean {
478
- const entry = this.entries.get(filePath)
479
- if (!entry) return false
480
-
481
- this.entries.delete(filePath)
482
- this._totalTokens -= entry.tokens
483
-
484
- // Auto-save (debounced)
485
- this.scheduleSave()
486
- return true
487
- }
488
-
489
- /**
490
- * Remove all files older than `ms` milliseconds.
491
- * Returns number of files removed.
492
- */
493
- detachOlderThan(ms: number): number {
494
- const cutoff = Date.now() - ms
495
- let removed = 0
496
-
497
- for (const [filePath, entry] of this.entries) {
498
- if (entry.attachedAt < cutoff) {
499
- this.entries.delete(filePath)
500
- this._totalTokens -= entry.tokens
501
- removed++
502
- }
503
- }
504
-
505
- if (removed > 0) this.scheduleSave()
506
- return removed
507
- }
508
-
509
- /**
510
- * Remove all files that were attached by a specific search query.
511
- * Returns number of files removed.
512
- */
513
- detachByQuery(query: string): number {
514
- let removed = 0
515
-
516
- for (const [filePath, entry] of this.entries) {
517
- if (entry.attachedBy === query || entry.attachedBy.startsWith(`${query} (`)) {
518
- this.entries.delete(filePath)
519
- this._totalTokens -= entry.tokens
520
- removed++
521
- }
522
- }
523
-
524
- if (removed > 0) this.scheduleSave()
525
- return removed
526
- }
527
-
528
- /**
529
- * Get all entries sorted by: search-main first (by score desc), then search-graph, then manual.
530
- */
531
- getAll(): WorkspaceEntry[] {
532
- return Array.from(this.entries.values()).sort((a, b) => {
533
- // Main files first
534
- const roleOrder = { "search-main": 0, "search-graph": 1, manual: 2 }
535
- const ra = roleOrder[a.role] ?? 2
536
- const rb = roleOrder[b.role] ?? 2
537
- if (ra !== rb) return ra - rb
538
-
539
- // Within same role: higher score first
540
- if (a.score !== undefined && b.score !== undefined) {
541
- return b.score - a.score
542
- }
543
-
544
- // Fallback: alphabetical
545
- return a.path.localeCompare(b.path)
546
- })
547
- }
548
-
549
- /**
550
- * Check if a file is in workspace.
551
- */
552
- has(filePath: string): boolean {
553
- return this.entries.has(filePath)
554
- }
555
-
556
- /**
557
- * Get a single entry by path.
558
- */
559
- get(filePath: string): WorkspaceEntry | undefined {
560
- return this.entries.get(filePath)
561
- }
562
-
563
- /**
564
- * Remove all files from workspace.
565
- */
566
- clear(): void {
567
- this.entries.clear()
568
- this._totalTokens = 0
569
- this.scheduleSave()
570
- }
571
-
572
- /**
573
- * Total tokens across all cached files.
574
- */
575
- get totalTokens(): number {
576
- return this._totalTokens
577
- }
578
-
579
- /**
580
- * Number of files in workspace.
581
- */
582
- get size(): number {
583
- return this.entries.size
584
- }
468
+ // ── Core operations ─────────────────────────────────────────────────────
469
+
470
+ /**
471
+ * Attach a chunk to workspace.
472
+ * If chunk with same chunkId already exists, updates it (replaces content/score).
473
+ * Auto-saves to disk (debounced).
474
+ */
475
+ attach(entry: Omit<WorkspaceEntry, "tokens" | "contentHash">): void {
476
+ const tokens = this.countTokens(entry.content)
477
+ const contentHash = this.hashContent(entry.content)
478
+
479
+ // Update existing chunk — subtract old tokens first
480
+ const existing = this.entries.get(entry.chunkId)
481
+ if (existing) {
482
+ this._totalTokens -= existing.tokens
483
+ }
484
+
485
+ this.entries.set(entry.chunkId, { ...entry, tokens, contentHash })
486
+ this._totalTokens += tokens
487
+
488
+ // Auto-evict if over budget
489
+ this.ensureBudget()
490
+
491
+ // Auto-save (debounced)
492
+ this.scheduleSave()
493
+ }
494
+
495
+ /**
496
+ * Remove a single chunk from workspace by chunkId.
497
+ * Returns true if chunk was found and removed.
498
+ */
499
+ detach(chunkId: string): boolean {
500
+ const entry = this.entries.get(chunkId)
501
+ if (!entry) return false
502
+
503
+ this.entries.delete(chunkId)
504
+ this._totalTokens -= entry.tokens
505
+
506
+ // Auto-save (debounced)
507
+ this.scheduleSave()
508
+ return true
509
+ }
510
+
511
+ /**
512
+ * Remove all chunks older than `ms` milliseconds.
513
+ * Returns number of chunks removed.
514
+ */
515
+ detachOlderThan(ms: number): number {
516
+ const cutoff = Date.now() - ms
517
+ let removed = 0
518
+
519
+ for (const [chunkId, entry] of this.entries) {
520
+ if (entry.attachedAt < cutoff) {
521
+ this.entries.delete(chunkId)
522
+ this._totalTokens -= entry.tokens
523
+ removed++
524
+ }
525
+ }
526
+
527
+ if (removed > 0) this.scheduleSave()
528
+ return removed
529
+ }
530
+
531
+ /**
532
+ * Remove all chunks that were attached by a specific search query.
533
+ * Returns number of chunks removed.
534
+ */
535
+ detachByQuery(query: string): number {
536
+ let removed = 0
537
+
538
+ for (const [chunkId, entry] of this.entries) {
539
+ if (entry.attachedBy === query || entry.attachedBy.startsWith(`${query} (`)) {
540
+ this.entries.delete(chunkId)
541
+ this._totalTokens -= entry.tokens
542
+ removed++
543
+ }
544
+ }
545
+
546
+ if (removed > 0) this.scheduleSave()
547
+ return removed
548
+ }
549
+
550
+ /**
551
+ * Get all chunks sorted by: search-main first (by score desc), then search-graph, then manual.
552
+ */
553
+ getAll(): WorkspaceEntry[] {
554
+ return Array.from(this.entries.values()).sort((a, b) => {
555
+ // Main chunks first
556
+ const roleOrder = { "search-main": 0, "search-graph": 1, manual: 2 }
557
+ const ra = roleOrder[a.role] ?? 2
558
+ const rb = roleOrder[b.role] ?? 2
559
+ if (ra !== rb) return ra - rb
560
+
561
+ // Within same role: higher score first
562
+ if (a.score !== undefined && b.score !== undefined) {
563
+ return b.score - a.score
564
+ }
565
+
566
+ // Fallback: alphabetical by chunkId
567
+ return a.chunkId.localeCompare(b.chunkId)
568
+ })
569
+ }
570
+
571
+ /**
572
+ * Check if a chunk is in workspace by chunkId.
573
+ * If path is given instead, checks if ANY chunks with that path exist.
574
+ */
575
+ has(chunkIdOrPath: string): boolean {
576
+ // If it looks like a chunkId (contains ":chunk-"), check directly
577
+ if (chunkIdOrPath.includes(":chunk-")) {
578
+ return this.entries.has(chunkIdOrPath)
579
+ }
580
+ // Otherwise treat as path — check if any chunks have this path
581
+ for (const entry of this.entries.values()) {
582
+ if (entry.path === chunkIdOrPath) return true
583
+ }
584
+ return false
585
+ }
586
+
587
+ /**
588
+ * Get a single chunk by chunkId.
589
+ */
590
+ get(chunkId: string): WorkspaceEntry | undefined {
591
+ return this.entries.get(chunkId)
592
+ }
593
+
594
+ /**
595
+ * Get all chunks for a given file path.
596
+ * Returns chunks sorted by chunkIndex (ascending).
597
+ */
598
+ getChunksByPath(filePath: string): WorkspaceEntry[] {
599
+ const chunks: WorkspaceEntry[] = []
600
+ for (const entry of this.entries.values()) {
601
+ if (entry.path === filePath) {
602
+ chunks.push(entry)
603
+ }
604
+ }
605
+ // Sort by chunk index to maintain order
606
+ chunks.sort((a, b) => a.chunkIndex - b.chunkIndex)
607
+ return chunks
608
+ }
609
+
610
+ /**
611
+ * Remove all chunks from workspace.
612
+ */
613
+ clear(): void {
614
+ this.entries.clear()
615
+ this._totalTokens = 0
616
+ this.scheduleSave()
617
+ }
618
+
619
+ /**
620
+ * Total tokens across all cached chunks.
621
+ */
622
+ get totalTokens(): number {
623
+ return this._totalTokens
624
+ }
625
+
626
+ /**
627
+ * Number of chunks in workspace.
628
+ */
629
+ get size(): number {
630
+ return this.entries.size
631
+ }
585
632
 
586
633
  /**
587
634
  * Current session ID.
@@ -605,127 +652,140 @@ class WorkspaceCache {
605
652
  this.ensureBudget()
606
653
  }
607
654
 
608
- /**
609
- * Mark file as dirty (modified by edit/write tool).
610
- * Dirty files bypass read() substitution until next freshen().
611
- */
612
- markDirty(filePath: string): void {
613
- this.dirtyFiles.add(filePath)
614
- }
615
-
616
- /**
617
- * Check if file is dirty (modified since last freshen).
618
- */
619
- isDirty(filePath: string): boolean {
620
- return this.dirtyFiles.has(filePath)
621
- }
622
-
623
- /**
624
- * Clear all dirty flags (called by freshen after re-reading files).
625
- */
626
- clearDirty(): void {
627
- this.dirtyFiles.clear()
628
- }
655
+ /**
656
+ * Mark file as dirty (modified by edit/write tool).
657
+ * Dirty files bypass read() substitution until next freshen().
658
+ * Note: Marks by path, not chunkId (all chunks in file are dirty).
659
+ */
660
+ markDirty(filePath: string): void {
661
+ this.dirtyFiles.add(filePath)
662
+ }
663
+
664
+ /**
665
+ * Check if file is dirty (modified since last freshen).
666
+ */
667
+ isDirty(filePath: string): boolean {
668
+ return this.dirtyFiles.has(filePath)
669
+ }
670
+
671
+ /**
672
+ * Clear all dirty flags (called by freshen after re-reading files).
673
+ */
674
+ clearDirty(): void {
675
+ this.dirtyFiles.clear()
676
+ }
629
677
 
630
678
  // ── Freshness ────────────────────────────────────────────────────────────
631
679
 
632
- /**
633
- * Re-read workspace files from disk if they changed.
634
- * Compares stored contentHash with current file hash.
635
- * Removes entries whose files no longer exist on disk.
636
- *
637
- * Called by message.before hook ensures AI always sees fresh content.
638
- * Cost: ~2ms for 30 files (stat + optional readFile).
639
- *
640
- * Returns { updated, removed } counts.
641
- */
642
- async freshen(): Promise<{ updated: number; removed: number }> {
643
- if (!this.projectRoot) return { updated: 0, removed: 0 }
644
-
645
- let updated = 0
646
- let removed = 0
647
-
648
- for (const [filePath, entry] of this.entries) {
649
- try {
650
- const fullPath = path.join(this.projectRoot, filePath)
651
- const content = await fs.readFile(fullPath, "utf-8")
652
- const newHash = this.hashContent(content)
653
-
654
- if (newHash !== entry.contentHash) {
655
- // File changed on disk — update in-memory
656
- const oldTokens = entry.tokens
657
- const newTokens = this.countTokens(content)
658
-
659
- entry.content = content
660
- entry.contentHash = newHash
661
- entry.tokens = newTokens
662
- this._totalTokens += newTokens - oldTokens
663
-
664
- updated++
665
- }
666
- } catch {
667
- // File no longer exists — remove from workspace
668
- this.entries.delete(filePath)
669
- this._totalTokens -= entry.tokens
670
- removed++
671
- }
672
- }
673
-
674
- if (updated > 0 || removed > 0) {
675
- this.scheduleSave()
676
- }
677
-
678
- // Clear dirty flags all files are now fresh from disk
679
- this.clearDirty()
680
-
681
- return { updated, removed }
682
- }
680
+ /**
681
+ * Mark chunks as needing refresh if their source file changed on disk.
682
+ * Removes chunks whose source files no longer exist.
683
+ *
684
+ * NOTE: Chunks cannot be re-read from disk (chunk boundaries not stored).
685
+ * This method only detects changes and removes orphaned chunks.
686
+ * For full content refresh, chunks must be re-attached from search.
687
+ *
688
+ * Called by message.before hook ensures AI sees fresh file status.
689
+ * Cost: ~2ms for 30 files (stat only, no readFile).
690
+ *
691
+ * Returns { updated, removed } counts.
692
+ */
693
+ async freshen(): Promise<{ updated: number; removed: number }> {
694
+ if (!this.projectRoot) return { updated: 0, removed: 0 }
695
+
696
+ let stale = 0
697
+ let removed = 0
698
+
699
+ // Group chunks by path for efficient checking
700
+ const pathToChunks = new Map<string, WorkspaceEntry[]>()
701
+ for (const entry of this.entries.values()) {
702
+ if (!pathToChunks.has(entry.path)) {
703
+ pathToChunks.set(entry.path, [])
704
+ }
705
+ pathToChunks.get(entry.path)!.push(entry)
706
+ }
707
+
708
+ // Check each unique file path
709
+ for (const [filePath, chunks] of pathToChunks) {
710
+ try {
711
+ const fullPath = path.join(this.projectRoot, filePath)
712
+ const content = await fs.readFile(fullPath, "utf-8")
713
+ const newHash = this.hashContent(content)
714
+
715
+ // Check if file changed
716
+ const fileChanged = chunks.some(c => c.contentHash !== newHash)
717
+ if (fileChanged) {
718
+ // Mark all chunks from this file as stale
719
+ // (they need re-attachment from search to get fresh content)
720
+ stale += chunks.length
721
+ // TODO(chunk-7): Implement stale flag on WorkspaceEntry
722
+ // For now, chunks remain in cache but are marked as needing refresh
723
+ }
724
+ } catch {
725
+ // File no longer exists — remove all chunks from this file
726
+ for (const chunk of chunks) {
727
+ this.entries.delete(chunk.chunkId)
728
+ this._totalTokens -= chunk.tokens
729
+ removed++
730
+ }
731
+ }
732
+ }
733
+
734
+ if (stale > 0 || removed > 0) {
735
+ this.scheduleSave()
736
+ }
737
+
738
+ // Clear dirty flags — all files are now checked
739
+ this.clearDirty()
740
+
741
+ return { updated: stale, removed }
742
+ }
683
743
 
684
744
  // ── Budget management ───────────────────────────────────────────────────
685
745
 
686
- /**
687
- * Evict oldest / lowest-score files until within budget.
688
- * Priority: evict search-graph first, then search-main, then manual.
689
- * Within same role: lowest score first, then oldest first.
690
- */
691
- private ensureBudget(): void {
692
- // Check both token and file count limits
693
- if (
694
- this._totalTokens <= this.config.maxTokens &&
695
- this.entries.size <= this.config.maxFiles
696
- ) {
697
- return
698
- }
699
-
700
- // Build eviction priority list
701
- const candidates = Array.from(this.entries.values()).sort((a, b) => {
702
- // Evict graph files before main files before manual files
703
- const roleOrder = { "search-graph": 0, "search-main": 1, manual: 2 }
704
- const ra = roleOrder[a.role] ?? 2
705
- const rb = roleOrder[b.role] ?? 2
706
- if (ra !== rb) return ra - rb
707
-
708
- // Within same role: lowest score first
709
- const sa = a.score ?? 0
710
- const sb = b.score ?? 0
711
- if (sa !== sb) return sa - sb
712
-
713
- // Then oldest first
714
- return a.attachedAt - b.attachedAt
715
- })
716
-
717
- for (const entry of candidates) {
718
- if (
719
- this._totalTokens <= this.config.maxTokens &&
720
- this.entries.size <= this.config.maxFiles
721
- ) {
722
- break
723
- }
724
-
725
- this.entries.delete(entry.path)
726
- this._totalTokens -= entry.tokens
727
- }
728
- }
746
+ /**
747
+ * Evict oldest / lowest-score chunks until within budget.
748
+ * Priority: evict search-graph first, then search-main, then manual.
749
+ * Within same role: lowest score first, then oldest first.
750
+ */
751
+ private ensureBudget(): void {
752
+ // Check both token and chunk count limits
753
+ if (
754
+ this._totalTokens <= this.config.maxTokens &&
755
+ this.entries.size <= this.config.maxChunks
756
+ ) {
757
+ return
758
+ }
759
+
760
+ // Build eviction priority list
761
+ const candidates = Array.from(this.entries.values()).sort((a, b) => {
762
+ // Evict graph chunks before main chunks before manual chunks
763
+ const roleOrder = { "search-graph": 0, "search-main": 1, manual: 2 }
764
+ const ra = roleOrder[a.role] ?? 2
765
+ const rb = roleOrder[b.role] ?? 2
766
+ if (ra !== rb) return ra - rb
767
+
768
+ // Within same role: lowest score first
769
+ const sa = a.score ?? 0
770
+ const sb = b.score ?? 0
771
+ if (sa !== sb) return sa - sb
772
+
773
+ // Then oldest first
774
+ return a.attachedAt - b.attachedAt
775
+ })
776
+
777
+ for (const entry of candidates) {
778
+ if (
779
+ this._totalTokens <= this.config.maxTokens &&
780
+ this.entries.size <= this.config.maxChunks
781
+ ) {
782
+ break
783
+ }
784
+
785
+ this.entries.delete(entry.chunkId)
786
+ this._totalTokens -= entry.tokens
787
+ }
788
+ }
729
789
 
730
790
  // ── Helpers ─────────────────────────────────────────────────────────────
731
791