@comfanion/usethis_search 4.2.0-dev.3 → 4.3.0-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hooks/message-before.ts +92 -9
- package/hooks/tool-substitution.ts +167 -11
- package/index.ts +2 -3
- package/package.json +5 -3
- package/tools/read-interceptor.ts +149 -0
- package/tools/search.ts +140 -75
- package/tools/workspace.ts +52 -77
- package/vectorizer/chunkers/lsp-chunker.ts +316 -0
- package/vectorizer/chunkers/markdown-chunker.ts +70 -4
package/hooks/message-before.ts
CHANGED
|
@@ -57,13 +57,14 @@ export function createWorkspaceInjectionHandler(state: SessionState) {
|
|
|
57
57
|
// Don't inject or prune for sub-agents (title generation, etc.)
|
|
58
58
|
if (state.isSubAgent) return
|
|
59
59
|
|
|
60
|
-
// ── Prune: replace old
|
|
61
|
-
// Files are already in workspace injection — no need for big
|
|
62
|
-
//
|
|
63
|
-
// (handles case where workspace was cleared but old
|
|
60
|
+
// ── Prune: replace old tool outputs with compact summaries ────────────
|
|
61
|
+
// Files are already in workspace injection — no need for big outputs
|
|
62
|
+
// in chat history. This runs even when workspace is empty
|
|
63
|
+
// (handles case where workspace was cleared but old outputs remain).
|
|
64
64
|
const wsConfig = workspaceCache.getConfig()
|
|
65
65
|
if (wsConfig.autoPruneSearch !== false) {
|
|
66
66
|
pruneSearchToolOutputs(output.messages)
|
|
67
|
+
pruneReadToolOutputs(output.messages)
|
|
67
68
|
}
|
|
68
69
|
|
|
69
70
|
let entries = workspaceCache.getAll()
|
|
@@ -225,7 +226,8 @@ function formatFileWithChunks(
|
|
|
225
226
|
}
|
|
226
227
|
|
|
227
228
|
/**
|
|
228
|
-
* Format a single chunk with metadata.
|
|
229
|
+
* Format a single chunk with metadata and line numbers (cat -n style).
|
|
230
|
+
* This allows the agent to see exact line numbers without needing grep.
|
|
229
231
|
*/
|
|
230
232
|
function formatChunk(entry: ReturnType<typeof workspaceCache.getAll>[0]): string {
|
|
231
233
|
let block = ""
|
|
@@ -251,11 +253,21 @@ function formatChunk(entry: ReturnType<typeof workspaceCache.getAll>[0]): string
|
|
|
251
253
|
block += `<!-- ${meta.join(" | ")} -->\n`
|
|
252
254
|
}
|
|
253
255
|
|
|
254
|
-
// Chunk content
|
|
256
|
+
// Chunk content WITH LINE NUMBERS (cat -n style)
|
|
257
|
+
// This allows agent to reference exact lines without grep
|
|
258
|
+
const startLine = entry.metadata?.startLine ?? 1
|
|
259
|
+
const lines = entry.content.split("\n")
|
|
255
260
|
const lang = entry.metadata?.language || ""
|
|
261
|
+
|
|
256
262
|
block += `\`\`\`${lang}\n`
|
|
257
|
-
|
|
258
|
-
|
|
263
|
+
|
|
264
|
+
for (let i = 0; i < lines.length; i++) {
|
|
265
|
+
const lineNum = startLine + i
|
|
266
|
+
const lineContent = lines[i]
|
|
267
|
+
// Format: " 123| line content" (5 chars for line number + tab)
|
|
268
|
+
block += `${lineNum.toString().padStart(5, " ")}| ${lineContent}\n`
|
|
269
|
+
}
|
|
270
|
+
|
|
259
271
|
block += `\`\`\`\n`
|
|
260
272
|
|
|
261
273
|
return block
|
|
@@ -270,7 +282,7 @@ function findLastUserMessage(messages: Message[]): Message | null {
|
|
|
270
282
|
return null
|
|
271
283
|
}
|
|
272
284
|
|
|
273
|
-
// ──
|
|
285
|
+
// ── Tool output pruning ─────────────────────────────────────────────────────
|
|
274
286
|
|
|
275
287
|
/**
|
|
276
288
|
* Minimum output length to consider pruning.
|
|
@@ -344,3 +356,74 @@ export function pruneSearchToolOutputs(messages: Message[]): void {
|
|
|
344
356
|
`${attachedCount} attached to workspace. Full content available via workspace context.]`
|
|
345
357
|
}
|
|
346
358
|
}
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* Replace read() tool outputs in chat history with compact summaries.
|
|
362
|
+
*
|
|
363
|
+
* Why: read() returns full file content or large chunks.
|
|
364
|
+
* After workspace injection (or auto-attach), the content is already in context.
|
|
365
|
+
* Keeping the read output wastes tokens — replace it with a 1-line summary.
|
|
366
|
+
*
|
|
367
|
+
* Only prunes completed read calls with output longer than MIN_PRUNE_LENGTH.
|
|
368
|
+
* The last read output is kept (the agent may still be referencing it).
|
|
369
|
+
*/
|
|
370
|
+
export function pruneReadToolOutputs(messages: Message[]): void {
|
|
371
|
+
// Find all read tool parts (completed, with long output)
|
|
372
|
+
const readParts: { msgIdx: number; partIdx: number; part: MessagePart }[] = []
|
|
373
|
+
|
|
374
|
+
for (let i = 0; i < messages.length; i++) {
|
|
375
|
+
const msg = messages[i]
|
|
376
|
+
const parts = Array.isArray(msg.parts) ? msg.parts : []
|
|
377
|
+
|
|
378
|
+
for (let j = 0; j < parts.length; j++) {
|
|
379
|
+
const part = parts[j]
|
|
380
|
+
if (
|
|
381
|
+
part.type === "tool" &&
|
|
382
|
+
(part.tool === "read" || part.tool === "Read") &&
|
|
383
|
+
part.state?.status === "completed" &&
|
|
384
|
+
typeof part.state?.output === "string" &&
|
|
385
|
+
part.state.output.length > MIN_PRUNE_LENGTH
|
|
386
|
+
) {
|
|
387
|
+
readParts.push({ msgIdx: i, partIdx: j, part })
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Keep the last read output (agent may reference it) — prune the rest
|
|
393
|
+
if (readParts.length <= 1) return
|
|
394
|
+
|
|
395
|
+
const toPrune = readParts.slice(0, -1)
|
|
396
|
+
|
|
397
|
+
for (const { part } of toPrune) {
|
|
398
|
+
const output = part.state.output as string
|
|
399
|
+
|
|
400
|
+
// Extract file path from output or input
|
|
401
|
+
const filePath = part.input?.filePath || extractFilePathFromOutput(output)
|
|
402
|
+
|
|
403
|
+
// Check if it's a substituted output (already compact)
|
|
404
|
+
if (output.startsWith("[File ") || output.startsWith("[Lines ") || output.startsWith("✓ Attached chunk")) {
|
|
405
|
+
// Already substituted — keep as-is
|
|
406
|
+
continue
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Replace with compact summary
|
|
410
|
+
part.state.output = `[Read "${filePath || "file"}" — content available in workspace context]`
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Extract file path from read() output.
|
|
416
|
+
* Output usually starts with file path or has markers.
|
|
417
|
+
*/
|
|
418
|
+
function extractFilePathFromOutput(output: string): string | null {
|
|
419
|
+
// Try to find file path in first line
|
|
420
|
+
const firstLine = output.split("\n")[0]
|
|
421
|
+
|
|
422
|
+
// Pattern: "## path/to/file.ts" or "path/to/file.ts"
|
|
423
|
+
const pathMatch = firstLine.match(/##?\s*(.+?\.(ts|js|go|py|md|txt|yaml|json|tsx|jsx|rs|java|kt|swift|c|cpp|h|cs|rb|php))/)
|
|
424
|
+
if (pathMatch) {
|
|
425
|
+
return pathMatch[1].trim()
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
return null
|
|
429
|
+
}
|
|
@@ -21,6 +21,75 @@
|
|
|
21
21
|
|
|
22
22
|
import type { SessionState } from "./types.ts"
|
|
23
23
|
import { workspaceCache, WorkspaceCache } from "../cache/manager.ts"
|
|
24
|
+
import { getIndexer, releaseIndexer } from "../vectorizer/index.ts"
|
|
25
|
+
|
|
26
|
+
// ── Chunk Detection Helpers ─────────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Find which chunk contains the given line offset.
|
|
30
|
+
*
|
|
31
|
+
* @param filePath Relative file path
|
|
32
|
+
* @param offset Line number (0-based as used by read())
|
|
33
|
+
* @param limit Number of lines to read
|
|
34
|
+
* @returns Chunk metadata if found, null otherwise
|
|
35
|
+
*/
|
|
36
|
+
async function findChunkByOffset(
|
|
37
|
+
filePath: string,
|
|
38
|
+
offset: number,
|
|
39
|
+
limit?: number
|
|
40
|
+
): Promise<{
|
|
41
|
+
chunk_id: string
|
|
42
|
+
content: string
|
|
43
|
+
chunk_index: number
|
|
44
|
+
start_line: number
|
|
45
|
+
end_line: number
|
|
46
|
+
language?: string
|
|
47
|
+
function_name?: string
|
|
48
|
+
class_name?: string
|
|
49
|
+
} | null> {
|
|
50
|
+
try {
|
|
51
|
+
const projectRoot = process.cwd()
|
|
52
|
+
const indexer = await getIndexer(projectRoot, "code")
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
// Get all chunks for this file
|
|
56
|
+
const chunks = await indexer.findChunksByPath(filePath)
|
|
57
|
+
|
|
58
|
+
if (chunks.length === 0) return null
|
|
59
|
+
|
|
60
|
+
// Find chunk that contains this offset
|
|
61
|
+
// offset is 0-based line number from read()
|
|
62
|
+
const targetLine = offset
|
|
63
|
+
const endLine = limit ? offset + limit : offset + 100
|
|
64
|
+
|
|
65
|
+
for (const chunk of chunks) {
|
|
66
|
+
const chunkStart = chunk.start_line ?? 0
|
|
67
|
+
const chunkEnd = chunk.end_line ?? Number.MAX_SAFE_INTEGER
|
|
68
|
+
|
|
69
|
+
// Check if offset falls within this chunk
|
|
70
|
+
if (targetLine >= chunkStart && targetLine <= chunkEnd) {
|
|
71
|
+
return {
|
|
72
|
+
chunk_id: chunk.chunk_id || `${filePath}:chunk-${chunk.chunk_index ?? 0}`,
|
|
73
|
+
content: chunk.content,
|
|
74
|
+
chunk_index: chunk.chunk_index ?? 0,
|
|
75
|
+
start_line: chunkStart,
|
|
76
|
+
end_line: chunkEnd,
|
|
77
|
+
language: chunk.language,
|
|
78
|
+
function_name: chunk.function_name,
|
|
79
|
+
class_name: chunk.class_name,
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return null
|
|
85
|
+
} finally {
|
|
86
|
+
releaseIndexer(projectRoot, "code")
|
|
87
|
+
}
|
|
88
|
+
} catch (error) {
|
|
89
|
+
// Index not available or error — return null
|
|
90
|
+
return null
|
|
91
|
+
}
|
|
92
|
+
}
|
|
24
93
|
|
|
25
94
|
/**
|
|
26
95
|
* Create the tool output substitution handler.
|
|
@@ -75,7 +144,8 @@ export function createToolSubstitutionHandler(state: SessionState, cache?: Works
|
|
|
75
144
|
// is valuable for AI navigation. Only read() is substituted.
|
|
76
145
|
switch (input.tool) {
|
|
77
146
|
case "read":
|
|
78
|
-
|
|
147
|
+
case "Read":
|
|
148
|
+
await handleReadSubstitution(output, wsCache)
|
|
79
149
|
break
|
|
80
150
|
// case "grep": // Disabled — AI needs line numbers and match context
|
|
81
151
|
// case "glob": // Disabled — discovery tool, paths are metadata not content
|
|
@@ -84,16 +154,106 @@ export function createToolSubstitutionHandler(state: SessionState, cache?: Works
|
|
|
84
154
|
}
|
|
85
155
|
|
|
86
156
|
/**
|
|
87
|
-
*
|
|
157
|
+
* Handle read() substitution with smart chunk detection.
|
|
158
|
+
*
|
|
159
|
+
* Two modes:
|
|
160
|
+
* 1. Full read (no offset) → substitute if file in workspace
|
|
161
|
+
* 2. Partial read (with offset) → auto-attach chunk, then substitute
|
|
162
|
+
*/
|
|
163
|
+
async function handleReadSubstitution(
|
|
164
|
+
output: { title: string; output: string; metadata: any },
|
|
165
|
+
cache: WorkspaceCache
|
|
166
|
+
): Promise<void> {
|
|
167
|
+
const filePath = output.metadata?.filePath || extractFilePathFromTitle(output.title)
|
|
168
|
+
if (!filePath) return
|
|
169
|
+
|
|
170
|
+
const offset = output.metadata?.offset
|
|
171
|
+
const limit = output.metadata?.limit
|
|
172
|
+
const isPartialRead = offset !== undefined
|
|
173
|
+
|
|
174
|
+
// MODE 1: Partial read with offset → auto-attach chunk
|
|
175
|
+
if (isPartialRead) {
|
|
176
|
+
await handlePartialReadAttach(filePath, offset, limit, output, cache)
|
|
177
|
+
return
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// MODE 2: Full read → standard substitution
|
|
181
|
+
substituteReadOutput(output, cache)
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Auto-attach chunk for partial read() with offset.
|
|
186
|
+
*
|
|
187
|
+
* When agent does read({ filePath: "src/auth.ts", offset: 150, limit: 50 }),
|
|
188
|
+
* we find which chunk contains lines 150-200 and attach it to workspace.
|
|
189
|
+
*/
|
|
190
|
+
async function handlePartialReadAttach(
|
|
191
|
+
filePath: string,
|
|
192
|
+
offset: number,
|
|
193
|
+
limit: number | undefined,
|
|
194
|
+
output: { title: string; output: string; metadata: any },
|
|
195
|
+
cache: WorkspaceCache
|
|
196
|
+
): Promise<void> {
|
|
197
|
+
try {
|
|
198
|
+
// Find which chunk contains this offset
|
|
199
|
+
const chunk = await findChunkByOffset(filePath, offset, limit)
|
|
200
|
+
|
|
201
|
+
if (!chunk) {
|
|
202
|
+
// Chunk not found (file not indexed or offset out of range)
|
|
203
|
+
// Keep original output
|
|
204
|
+
return
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Check if chunk already in workspace
|
|
208
|
+
const existing = cache.get(chunk.chunk_id)
|
|
209
|
+
if (existing) {
|
|
210
|
+
// Already attached → replace output with reference
|
|
211
|
+
output.output = `[Lines ${chunk.start_line}-${chunk.end_line} (chunk ${chunk.chunk_index}) already in workspace — see <workspace_context>]`
|
|
212
|
+
return
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Attach chunk to workspace
|
|
216
|
+
cache.attach({
|
|
217
|
+
chunkId: chunk.chunk_id,
|
|
218
|
+
path: filePath,
|
|
219
|
+
content: chunk.content,
|
|
220
|
+
chunkIndex: chunk.chunk_index,
|
|
221
|
+
role: "manual",
|
|
222
|
+
attachedAt: Date.now(),
|
|
223
|
+
attachedBy: `read(offset:${offset})`,
|
|
224
|
+
metadata: {
|
|
225
|
+
language: chunk.language,
|
|
226
|
+
function_name: chunk.function_name,
|
|
227
|
+
class_name: chunk.class_name,
|
|
228
|
+
startLine: chunk.start_line,
|
|
229
|
+
endLine: chunk.end_line,
|
|
230
|
+
},
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
// Replace output with compact message
|
|
234
|
+
const meta: string[] = []
|
|
235
|
+
if (chunk.function_name) meta.push(`fn: ${chunk.function_name}`)
|
|
236
|
+
if (chunk.class_name) meta.push(`class: ${chunk.class_name}`)
|
|
237
|
+
const metaStr = meta.length > 0 ? ` (${meta.join(", ")})` : ""
|
|
238
|
+
|
|
239
|
+
output.output = `✓ Attached chunk ${chunk.chunk_index} to workspace${metaStr}\n\nLines ${chunk.start_line}-${chunk.end_line} — see <workspace_context> for content.\n\nWorkspace: ${cache.size} chunks, ${cache.totalTokens.toLocaleString()} tokens`
|
|
240
|
+
|
|
241
|
+
// Save workspace asynchronously
|
|
242
|
+
cache.save().catch(() => {})
|
|
243
|
+
} catch (error) {
|
|
244
|
+
// Auto-attach failed — keep original output
|
|
245
|
+
// Silent failure (don't break read())
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Substitute read() output if file has chunks in workspace (full reads only).
|
|
88
251
|
*
|
|
89
|
-
* Input: { filePath: "src/auth.ts"
|
|
252
|
+
* Input: { filePath: "src/auth.ts" } (no offset/limit)
|
|
90
253
|
* Output: "export function login(...)\n..."
|
|
91
254
|
*
|
|
92
|
-
* If file has chunks in workspace
|
|
255
|
+
* If file has chunks in workspace:
|
|
93
256
|
* Replace with: "[File "src/auth.ts" has N chunks in workspace (chunks: 2, 5, 7) — see <workspace_context>]"
|
|
94
|
-
*
|
|
95
|
-
* If offset/limit present (partial read):
|
|
96
|
-
* Keep original (partial reads are not in workspace injection)
|
|
97
257
|
*/
|
|
98
258
|
function substituteReadOutput(output: { title: string; output: string; metadata: any }, cache: WorkspaceCache): void {
|
|
99
259
|
try {
|
|
@@ -101,10 +261,6 @@ function substituteReadOutput(output: { title: string; output: string; metadata:
|
|
|
101
261
|
const filePath = output.metadata?.filePath || extractFilePathFromTitle(output.title)
|
|
102
262
|
if (!filePath) return
|
|
103
263
|
|
|
104
|
-
// Check if this is a partial read (offset/limit present)
|
|
105
|
-
const isPartialRead = output.metadata?.offset !== undefined || output.metadata?.limit !== undefined
|
|
106
|
-
if (isPartialRead) return
|
|
107
|
-
|
|
108
264
|
// Don't substitute if file was modified (dirty) — workspace has stale content
|
|
109
265
|
if (cache.isDirty(filePath)) return
|
|
110
266
|
|
package/index.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { Plugin } from "@opencode-ai/plugin"
|
|
2
2
|
|
|
3
3
|
import search from "./tools/search"
|
|
4
|
-
import { workspace_list,
|
|
4
|
+
import { workspace_list, workspace_forget, workspace_clear, workspace_restore } from "./tools/workspace"
|
|
5
5
|
import FileIndexerPlugin from "./file-indexer"
|
|
6
6
|
import { workspaceCache } from "./cache/manager"
|
|
7
7
|
import { createWorkspaceInjectionHandler } from "./hooks/message-before"
|
|
@@ -39,8 +39,7 @@ const UsethisSearchPlugin: Plugin = async ({ directory, client }) => {
|
|
|
39
39
|
tool: {
|
|
40
40
|
search,
|
|
41
41
|
workspace_list,
|
|
42
|
-
|
|
43
|
-
workspace_detach,
|
|
42
|
+
workspace_forget,
|
|
44
43
|
workspace_clear,
|
|
45
44
|
workspace_restore,
|
|
46
45
|
},
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "4.
|
|
4
|
-
"description": "OpenCode plugin: semantic search with
|
|
3
|
+
"version": "4.3.0-dev.0",
|
|
4
|
+
"description": "OpenCode plugin: semantic search with auto-attach, line numbers in workspace, simplified API (v4.3: auto-detect modes, read() caching, 99% token reduction, no grep needed)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
7
7
|
"exports": {
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"index:clear": "bun run cli.ts clear"
|
|
16
16
|
},
|
|
17
17
|
"bin": {
|
|
18
|
-
"usethis-search": "
|
|
18
|
+
"usethis-search": "cli.ts"
|
|
19
19
|
},
|
|
20
20
|
"files": [
|
|
21
21
|
"index.ts",
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
"tools/search.ts",
|
|
26
26
|
"tools/codeindex.ts",
|
|
27
27
|
"tools/workspace.ts",
|
|
28
|
+
"tools/read-interceptor.ts",
|
|
28
29
|
"cache/manager.ts",
|
|
29
30
|
"hooks/message-before.ts",
|
|
30
31
|
"hooks/tool-substitution.ts",
|
|
@@ -45,6 +46,7 @@
|
|
|
45
46
|
"vectorizer/analyzers/lsp-client.ts",
|
|
46
47
|
"vectorizer/chunkers/markdown-chunker.ts",
|
|
47
48
|
"vectorizer/chunkers/code-chunker.ts",
|
|
49
|
+
"vectorizer/chunkers/lsp-chunker.ts",
|
|
48
50
|
"vectorizer/chunkers/chunker-factory.ts",
|
|
49
51
|
"vectorizer.yaml",
|
|
50
52
|
"README.md",
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import { tool } from "@opencode-ai/plugin"
|
|
2
|
+
import path from "path"
|
|
3
|
+
import fs from "fs/promises"
|
|
4
|
+
|
|
5
|
+
import { CodebaseIndexer } from "../vectorizer/index.ts"
|
|
6
|
+
|
|
7
|
+
// FR-043: Logging for intercepted Read() calls
|
|
8
|
+
const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*"
|
|
9
|
+
|
|
10
|
+
interface ReadLogEntry {
|
|
11
|
+
timestamp: number
|
|
12
|
+
filePath: string
|
|
13
|
+
relPath: string
|
|
14
|
+
chunksFound: number
|
|
15
|
+
relatedContextCount: number
|
|
16
|
+
durationMs: number
|
|
17
|
+
fallback: boolean
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const LOG_MAX_ENTRIES = 500
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Append a log entry to the Read() interception log file.
|
|
24
|
+
* Non-blocking, non-fatal — errors are silently ignored.
|
|
25
|
+
*/
|
|
26
|
+
async function logReadInterception(projectRoot: string, entry: ReadLogEntry): Promise<void> {
|
|
27
|
+
try {
|
|
28
|
+
const logPath = path.join(projectRoot, ".opencode", "vectors", "read-intercept.log.json")
|
|
29
|
+
await fs.mkdir(path.dirname(logPath), { recursive: true })
|
|
30
|
+
|
|
31
|
+
let entries: ReadLogEntry[] = []
|
|
32
|
+
try {
|
|
33
|
+
const raw = await fs.readFile(logPath, "utf-8")
|
|
34
|
+
entries = JSON.parse(raw)
|
|
35
|
+
} catch {
|
|
36
|
+
// file doesn't exist or is invalid — start fresh
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
entries.push(entry)
|
|
40
|
+
// Cap log size to avoid unbounded growth
|
|
41
|
+
if (entries.length > LOG_MAX_ENTRIES) {
|
|
42
|
+
entries = entries.slice(-LOG_MAX_ENTRIES)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
await fs.writeFile(logPath, JSON.stringify(entries, null, 2), "utf-8")
|
|
46
|
+
} catch {
|
|
47
|
+
// non-fatal — logging must never break Read
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export default tool({
|
|
52
|
+
description: `Read file with graph-aware context attachment. When available, this tool searches the file in the index and returns content + related context from the graph (imports, links, etc.).
|
|
53
|
+
|
|
54
|
+
Use this instead of the standard Read tool for better context awareness.`,
|
|
55
|
+
|
|
56
|
+
args: {
|
|
57
|
+
filePath: tool.schema.string().describe("Path to the file to read"),
|
|
58
|
+
},
|
|
59
|
+
|
|
60
|
+
async execute(args) {
|
|
61
|
+
const startTime = Date.now()
|
|
62
|
+
const projectRoot = process.cwd()
|
|
63
|
+
const filePath = path.isAbsolute(args.filePath) ? args.filePath : path.join(projectRoot, args.filePath)
|
|
64
|
+
|
|
65
|
+
const relPath = path.relative(projectRoot, filePath)
|
|
66
|
+
|
|
67
|
+
if (DEBUG) {
|
|
68
|
+
console.log(`[read-interceptor] Intercepted Read("${relPath}")`)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Resilient search: if vector index is corrupted or unavailable, fall back gracefully
|
|
72
|
+
let fileChunks: any[] = []
|
|
73
|
+
let allRelated: any[] = []
|
|
74
|
+
let searchFailed = false
|
|
75
|
+
|
|
76
|
+
try {
|
|
77
|
+
const indexer = await new CodebaseIndexer(projectRoot, "code").init()
|
|
78
|
+
try {
|
|
79
|
+
const results = await indexer.search(relPath, 20, false, {})
|
|
80
|
+
fileChunks = results.filter((r: any) => r.file === relPath)
|
|
81
|
+
|
|
82
|
+
allRelated = fileChunks
|
|
83
|
+
.flatMap((c: any) => c.relatedContext || [])
|
|
84
|
+
.filter((r: any, i: number, arr: any[]) => arr.findIndex((x: any) => x.chunk_id === r.chunk_id) === i)
|
|
85
|
+
} catch (searchErr: any) {
|
|
86
|
+
if (DEBUG) {
|
|
87
|
+
console.log(`[read-interceptor] Search failed for "${relPath}": ${searchErr.message}`)
|
|
88
|
+
}
|
|
89
|
+
searchFailed = true
|
|
90
|
+
}
|
|
91
|
+
await indexer.unloadModel()
|
|
92
|
+
} catch (initErr: any) {
|
|
93
|
+
if (DEBUG) {
|
|
94
|
+
console.log(`[read-interceptor] Indexer init failed: ${initErr.message}`)
|
|
95
|
+
}
|
|
96
|
+
searchFailed = true
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const durationMs = Date.now() - startTime
|
|
100
|
+
const fallback = fileChunks.length === 0
|
|
101
|
+
|
|
102
|
+
// FR-043: Log the interception asynchronously (non-blocking)
|
|
103
|
+
logReadInterception(projectRoot, {
|
|
104
|
+
timestamp: startTime,
|
|
105
|
+
filePath: args.filePath,
|
|
106
|
+
relPath,
|
|
107
|
+
chunksFound: fileChunks.length,
|
|
108
|
+
relatedContextCount: allRelated.length,
|
|
109
|
+
durationMs,
|
|
110
|
+
fallback,
|
|
111
|
+
}).catch(() => {})
|
|
112
|
+
|
|
113
|
+
if (DEBUG) {
|
|
114
|
+
console.log(
|
|
115
|
+
`[read-interceptor] ${relPath}: ${fileChunks.length} chunks, ${allRelated.length} related, ${durationMs}ms${fallback ? " (fallback)" : ""}${searchFailed ? " (search error)" : ""}`
|
|
116
|
+
)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (fallback) {
|
|
120
|
+
const reason = searchFailed
|
|
121
|
+
? `Search index unavailable (possibly corrupted). Run codeindex({ action: "reindex", index: "code" }) to rebuild.`
|
|
122
|
+
: `File "${relPath}" not indexed. Use original Read tool or run codeindex({ action: "reindex", index: "code" })`
|
|
123
|
+
return reason
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
let output = `## ${relPath}\n\n`
|
|
127
|
+
|
|
128
|
+
output += `### Content\n\n`
|
|
129
|
+
for (const chunk of fileChunks) {
|
|
130
|
+
output += chunk.content + "\n\n"
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (allRelated.length > 0) {
|
|
134
|
+
output += `### Related Context\n\n`
|
|
135
|
+
for (const rel of allRelated) {
|
|
136
|
+
const snippet = rel.content.length > 300
|
|
137
|
+
? rel.content.substring(0, 300) + "..."
|
|
138
|
+
: rel.content
|
|
139
|
+
output += `**${rel.file}** (${rel.relation})\n`
|
|
140
|
+
output += `\`\`\`\n${snippet}\n\`\`\`\n\n`
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return output
|
|
145
|
+
},
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
// Export for testing
|
|
149
|
+
export { logReadInterception, ReadLogEntry }
|