@comfanion/usethis_search 4.2.0-dev.4 → 4.3.0-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hooks/message-before.ts +92 -9
- package/hooks/tool-substitution.ts +167 -11
- package/index.ts +2 -3
- package/package.json +3 -2
- package/tools/read-interceptor.ts +149 -0
- package/tools/search.ts +140 -75
- package/tools/workspace.ts +52 -77
- package/vectorizer/chunkers/markdown-chunker.ts +70 -4
package/hooks/message-before.ts
CHANGED
|
@@ -57,13 +57,14 @@ export function createWorkspaceInjectionHandler(state: SessionState) {
|
|
|
57
57
|
// Don't inject or prune for sub-agents (title generation, etc.)
|
|
58
58
|
if (state.isSubAgent) return
|
|
59
59
|
|
|
60
|
-
// ── Prune: replace old
|
|
61
|
-
// Files are already in workspace injection — no need for big
|
|
62
|
-
//
|
|
63
|
-
// (handles case where workspace was cleared but old
|
|
60
|
+
// ── Prune: replace old tool outputs with compact summaries ────────────
|
|
61
|
+
// Files are already in workspace injection — no need for big outputs
|
|
62
|
+
// in chat history. This runs even when workspace is empty
|
|
63
|
+
// (handles case where workspace was cleared but old outputs remain).
|
|
64
64
|
const wsConfig = workspaceCache.getConfig()
|
|
65
65
|
if (wsConfig.autoPruneSearch !== false) {
|
|
66
66
|
pruneSearchToolOutputs(output.messages)
|
|
67
|
+
pruneReadToolOutputs(output.messages)
|
|
67
68
|
}
|
|
68
69
|
|
|
69
70
|
let entries = workspaceCache.getAll()
|
|
@@ -225,7 +226,8 @@ function formatFileWithChunks(
|
|
|
225
226
|
}
|
|
226
227
|
|
|
227
228
|
/**
|
|
228
|
-
* Format a single chunk with metadata.
|
|
229
|
+
* Format a single chunk with metadata and line numbers (cat -n style).
|
|
230
|
+
* This allows the agent to see exact line numbers without needing grep.
|
|
229
231
|
*/
|
|
230
232
|
function formatChunk(entry: ReturnType<typeof workspaceCache.getAll>[0]): string {
|
|
231
233
|
let block = ""
|
|
@@ -251,11 +253,21 @@ function formatChunk(entry: ReturnType<typeof workspaceCache.getAll>[0]): string
|
|
|
251
253
|
block += `<!-- ${meta.join(" | ")} -->\n`
|
|
252
254
|
}
|
|
253
255
|
|
|
254
|
-
// Chunk content
|
|
256
|
+
// Chunk content WITH LINE NUMBERS (cat -n style)
|
|
257
|
+
// This allows agent to reference exact lines without grep
|
|
258
|
+
const startLine = entry.metadata?.startLine ?? 1
|
|
259
|
+
const lines = entry.content.split("\n")
|
|
255
260
|
const lang = entry.metadata?.language || ""
|
|
261
|
+
|
|
256
262
|
block += `\`\`\`${lang}\n`
|
|
257
|
-
|
|
258
|
-
|
|
263
|
+
|
|
264
|
+
for (let i = 0; i < lines.length; i++) {
|
|
265
|
+
const lineNum = startLine + i
|
|
266
|
+
const lineContent = lines[i]
|
|
267
|
+
// Format: " 123| line content" (5 chars for line number + tab)
|
|
268
|
+
block += `${lineNum.toString().padStart(5, " ")}| ${lineContent}\n`
|
|
269
|
+
}
|
|
270
|
+
|
|
259
271
|
block += `\`\`\`\n`
|
|
260
272
|
|
|
261
273
|
return block
|
|
@@ -270,7 +282,7 @@ function findLastUserMessage(messages: Message[]): Message | null {
|
|
|
270
282
|
return null
|
|
271
283
|
}
|
|
272
284
|
|
|
273
|
-
// ──
|
|
285
|
+
// ── Tool output pruning ─────────────────────────────────────────────────────
|
|
274
286
|
|
|
275
287
|
/**
|
|
276
288
|
* Minimum output length to consider pruning.
|
|
@@ -344,3 +356,74 @@ export function pruneSearchToolOutputs(messages: Message[]): void {
|
|
|
344
356
|
`${attachedCount} attached to workspace. Full content available via workspace context.]`
|
|
345
357
|
}
|
|
346
358
|
}
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* Replace read() tool outputs in chat history with compact summaries.
|
|
362
|
+
*
|
|
363
|
+
* Why: read() returns full file content or large chunks.
|
|
364
|
+
* After workspace injection (or auto-attach), the content is already in context.
|
|
365
|
+
* Keeping the read output wastes tokens — replace it with a 1-line summary.
|
|
366
|
+
*
|
|
367
|
+
* Only prunes completed read calls with output longer than MIN_PRUNE_LENGTH.
|
|
368
|
+
* The last read output is kept (the agent may still be referencing it).
|
|
369
|
+
*/
|
|
370
|
+
export function pruneReadToolOutputs(messages: Message[]): void {
|
|
371
|
+
// Find all read tool parts (completed, with long output)
|
|
372
|
+
const readParts: { msgIdx: number; partIdx: number; part: MessagePart }[] = []
|
|
373
|
+
|
|
374
|
+
for (let i = 0; i < messages.length; i++) {
|
|
375
|
+
const msg = messages[i]
|
|
376
|
+
const parts = Array.isArray(msg.parts) ? msg.parts : []
|
|
377
|
+
|
|
378
|
+
for (let j = 0; j < parts.length; j++) {
|
|
379
|
+
const part = parts[j]
|
|
380
|
+
if (
|
|
381
|
+
part.type === "tool" &&
|
|
382
|
+
(part.tool === "read" || part.tool === "Read") &&
|
|
383
|
+
part.state?.status === "completed" &&
|
|
384
|
+
typeof part.state?.output === "string" &&
|
|
385
|
+
part.state.output.length > MIN_PRUNE_LENGTH
|
|
386
|
+
) {
|
|
387
|
+
readParts.push({ msgIdx: i, partIdx: j, part })
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Keep the last read output (agent may reference it) — prune the rest
|
|
393
|
+
if (readParts.length <= 1) return
|
|
394
|
+
|
|
395
|
+
const toPrune = readParts.slice(0, -1)
|
|
396
|
+
|
|
397
|
+
for (const { part } of toPrune) {
|
|
398
|
+
const output = part.state.output as string
|
|
399
|
+
|
|
400
|
+
// Extract file path from output or input
|
|
401
|
+
const filePath = part.input?.filePath || extractFilePathFromOutput(output)
|
|
402
|
+
|
|
403
|
+
// Check if it's a substituted output (already compact)
|
|
404
|
+
if (output.startsWith("[File ") || output.startsWith("[Lines ") || output.startsWith("✓ Attached chunk")) {
|
|
405
|
+
// Already substituted — keep as-is
|
|
406
|
+
continue
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Replace with compact summary
|
|
410
|
+
part.state.output = `[Read "${filePath || "file"}" — content available in workspace context]`
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Extract file path from read() output.
|
|
416
|
+
* Output usually starts with file path or has markers.
|
|
417
|
+
*/
|
|
418
|
+
function extractFilePathFromOutput(output: string): string | null {
|
|
419
|
+
// Try to find file path in first line
|
|
420
|
+
const firstLine = output.split("\n")[0]
|
|
421
|
+
|
|
422
|
+
// Pattern: "## path/to/file.ts" or "path/to/file.ts"
|
|
423
|
+
const pathMatch = firstLine.match(/##?\s*(.+?\.(ts|js|go|py|md|txt|yaml|json|tsx|jsx|rs|java|kt|swift|c|cpp|h|cs|rb|php))/)
|
|
424
|
+
if (pathMatch) {
|
|
425
|
+
return pathMatch[1].trim()
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
return null
|
|
429
|
+
}
|
|
@@ -21,6 +21,75 @@
|
|
|
21
21
|
|
|
22
22
|
import type { SessionState } from "./types.ts"
|
|
23
23
|
import { workspaceCache, WorkspaceCache } from "../cache/manager.ts"
|
|
24
|
+
import { getIndexer, releaseIndexer } from "../vectorizer/index.ts"
|
|
25
|
+
|
|
26
|
+
// ── Chunk Detection Helpers ─────────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Find which chunk contains the given line offset.
|
|
30
|
+
*
|
|
31
|
+
* @param filePath Relative file path
|
|
32
|
+
* @param offset Line number (0-based as used by read())
|
|
33
|
+
* @param limit Number of lines to read
|
|
34
|
+
* @returns Chunk metadata if found, null otherwise
|
|
35
|
+
*/
|
|
36
|
+
async function findChunkByOffset(
|
|
37
|
+
filePath: string,
|
|
38
|
+
offset: number,
|
|
39
|
+
limit?: number
|
|
40
|
+
): Promise<{
|
|
41
|
+
chunk_id: string
|
|
42
|
+
content: string
|
|
43
|
+
chunk_index: number
|
|
44
|
+
start_line: number
|
|
45
|
+
end_line: number
|
|
46
|
+
language?: string
|
|
47
|
+
function_name?: string
|
|
48
|
+
class_name?: string
|
|
49
|
+
} | null> {
|
|
50
|
+
try {
|
|
51
|
+
const projectRoot = process.cwd()
|
|
52
|
+
const indexer = await getIndexer(projectRoot, "code")
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
// Get all chunks for this file
|
|
56
|
+
const chunks = await indexer.findChunksByPath(filePath)
|
|
57
|
+
|
|
58
|
+
if (chunks.length === 0) return null
|
|
59
|
+
|
|
60
|
+
// Find chunk that contains this offset
|
|
61
|
+
// offset is 0-based line number from read()
|
|
62
|
+
const targetLine = offset
|
|
63
|
+
const endLine = limit ? offset + limit : offset + 100
|
|
64
|
+
|
|
65
|
+
for (const chunk of chunks) {
|
|
66
|
+
const chunkStart = chunk.start_line ?? 0
|
|
67
|
+
const chunkEnd = chunk.end_line ?? Number.MAX_SAFE_INTEGER
|
|
68
|
+
|
|
69
|
+
// Check if offset falls within this chunk
|
|
70
|
+
if (targetLine >= chunkStart && targetLine <= chunkEnd) {
|
|
71
|
+
return {
|
|
72
|
+
chunk_id: chunk.chunk_id || `${filePath}:chunk-${chunk.chunk_index ?? 0}`,
|
|
73
|
+
content: chunk.content,
|
|
74
|
+
chunk_index: chunk.chunk_index ?? 0,
|
|
75
|
+
start_line: chunkStart,
|
|
76
|
+
end_line: chunkEnd,
|
|
77
|
+
language: chunk.language,
|
|
78
|
+
function_name: chunk.function_name,
|
|
79
|
+
class_name: chunk.class_name,
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return null
|
|
85
|
+
} finally {
|
|
86
|
+
releaseIndexer(projectRoot, "code")
|
|
87
|
+
}
|
|
88
|
+
} catch (error) {
|
|
89
|
+
// Index not available or error — return null
|
|
90
|
+
return null
|
|
91
|
+
}
|
|
92
|
+
}
|
|
24
93
|
|
|
25
94
|
/**
|
|
26
95
|
* Create the tool output substitution handler.
|
|
@@ -75,7 +144,8 @@ export function createToolSubstitutionHandler(state: SessionState, cache?: Works
|
|
|
75
144
|
// is valuable for AI navigation. Only read() is substituted.
|
|
76
145
|
switch (input.tool) {
|
|
77
146
|
case "read":
|
|
78
|
-
|
|
147
|
+
case "Read":
|
|
148
|
+
await handleReadSubstitution(output, wsCache)
|
|
79
149
|
break
|
|
80
150
|
// case "grep": // Disabled — AI needs line numbers and match context
|
|
81
151
|
// case "glob": // Disabled — discovery tool, paths are metadata not content
|
|
@@ -84,16 +154,106 @@ export function createToolSubstitutionHandler(state: SessionState, cache?: Works
|
|
|
84
154
|
}
|
|
85
155
|
|
|
86
156
|
/**
|
|
87
|
-
*
|
|
157
|
+
* Handle read() substitution with smart chunk detection.
|
|
158
|
+
*
|
|
159
|
+
* Two modes:
|
|
160
|
+
* 1. Full read (no offset) → substitute if file in workspace
|
|
161
|
+
* 2. Partial read (with offset) → auto-attach chunk, then substitute
|
|
162
|
+
*/
|
|
163
|
+
async function handleReadSubstitution(
|
|
164
|
+
output: { title: string; output: string; metadata: any },
|
|
165
|
+
cache: WorkspaceCache
|
|
166
|
+
): Promise<void> {
|
|
167
|
+
const filePath = output.metadata?.filePath || extractFilePathFromTitle(output.title)
|
|
168
|
+
if (!filePath) return
|
|
169
|
+
|
|
170
|
+
const offset = output.metadata?.offset
|
|
171
|
+
const limit = output.metadata?.limit
|
|
172
|
+
const isPartialRead = offset !== undefined
|
|
173
|
+
|
|
174
|
+
// MODE 1: Partial read with offset → auto-attach chunk
|
|
175
|
+
if (isPartialRead) {
|
|
176
|
+
await handlePartialReadAttach(filePath, offset, limit, output, cache)
|
|
177
|
+
return
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// MODE 2: Full read → standard substitution
|
|
181
|
+
substituteReadOutput(output, cache)
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Auto-attach chunk for partial read() with offset.
|
|
186
|
+
*
|
|
187
|
+
* When agent does read({ filePath: "src/auth.ts", offset: 150, limit: 50 }),
|
|
188
|
+
* we find which chunk contains lines 150-200 and attach it to workspace.
|
|
189
|
+
*/
|
|
190
|
+
async function handlePartialReadAttach(
|
|
191
|
+
filePath: string,
|
|
192
|
+
offset: number,
|
|
193
|
+
limit: number | undefined,
|
|
194
|
+
output: { title: string; output: string; metadata: any },
|
|
195
|
+
cache: WorkspaceCache
|
|
196
|
+
): Promise<void> {
|
|
197
|
+
try {
|
|
198
|
+
// Find which chunk contains this offset
|
|
199
|
+
const chunk = await findChunkByOffset(filePath, offset, limit)
|
|
200
|
+
|
|
201
|
+
if (!chunk) {
|
|
202
|
+
// Chunk not found (file not indexed or offset out of range)
|
|
203
|
+
// Keep original output
|
|
204
|
+
return
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Check if chunk already in workspace
|
|
208
|
+
const existing = cache.get(chunk.chunk_id)
|
|
209
|
+
if (existing) {
|
|
210
|
+
// Already attached → replace output with reference
|
|
211
|
+
output.output = `[Lines ${chunk.start_line}-${chunk.end_line} (chunk ${chunk.chunk_index}) already in workspace — see <workspace_context>]`
|
|
212
|
+
return
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Attach chunk to workspace
|
|
216
|
+
cache.attach({
|
|
217
|
+
chunkId: chunk.chunk_id,
|
|
218
|
+
path: filePath,
|
|
219
|
+
content: chunk.content,
|
|
220
|
+
chunkIndex: chunk.chunk_index,
|
|
221
|
+
role: "manual",
|
|
222
|
+
attachedAt: Date.now(),
|
|
223
|
+
attachedBy: `read(offset:${offset})`,
|
|
224
|
+
metadata: {
|
|
225
|
+
language: chunk.language,
|
|
226
|
+
function_name: chunk.function_name,
|
|
227
|
+
class_name: chunk.class_name,
|
|
228
|
+
startLine: chunk.start_line,
|
|
229
|
+
endLine: chunk.end_line,
|
|
230
|
+
},
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
// Replace output with compact message
|
|
234
|
+
const meta: string[] = []
|
|
235
|
+
if (chunk.function_name) meta.push(`fn: ${chunk.function_name}`)
|
|
236
|
+
if (chunk.class_name) meta.push(`class: ${chunk.class_name}`)
|
|
237
|
+
const metaStr = meta.length > 0 ? ` (${meta.join(", ")})` : ""
|
|
238
|
+
|
|
239
|
+
output.output = `✓ Attached chunk ${chunk.chunk_index} to workspace${metaStr}\n\nLines ${chunk.start_line}-${chunk.end_line} — see <workspace_context> for content.\n\nWorkspace: ${cache.size} chunks, ${cache.totalTokens.toLocaleString()} tokens`
|
|
240
|
+
|
|
241
|
+
// Save workspace asynchronously
|
|
242
|
+
cache.save().catch(() => {})
|
|
243
|
+
} catch (error) {
|
|
244
|
+
// Auto-attach failed — keep original output
|
|
245
|
+
// Silent failure (don't break read())
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Substitute read() output if file has chunks in workspace (full reads only).
|
|
88
251
|
*
|
|
89
|
-
* Input: { filePath: "src/auth.ts"
|
|
252
|
+
* Input: { filePath: "src/auth.ts" } (no offset/limit)
|
|
90
253
|
* Output: "export function login(...)\n..."
|
|
91
254
|
*
|
|
92
|
-
* If file has chunks in workspace
|
|
255
|
+
* If file has chunks in workspace:
|
|
93
256
|
* Replace with: "[File "src/auth.ts" has N chunks in workspace (chunks: 2, 5, 7) — see <workspace_context>]"
|
|
94
|
-
*
|
|
95
|
-
* If offset/limit present (partial read):
|
|
96
|
-
* Keep original (partial reads are not in workspace injection)
|
|
97
257
|
*/
|
|
98
258
|
function substituteReadOutput(output: { title: string; output: string; metadata: any }, cache: WorkspaceCache): void {
|
|
99
259
|
try {
|
|
@@ -101,10 +261,6 @@ function substituteReadOutput(output: { title: string; output: string; metadata:
|
|
|
101
261
|
const filePath = output.metadata?.filePath || extractFilePathFromTitle(output.title)
|
|
102
262
|
if (!filePath) return
|
|
103
263
|
|
|
104
|
-
// Check if this is a partial read (offset/limit present)
|
|
105
|
-
const isPartialRead = output.metadata?.offset !== undefined || output.metadata?.limit !== undefined
|
|
106
|
-
if (isPartialRead) return
|
|
107
|
-
|
|
108
264
|
// Don't substitute if file was modified (dirty) — workspace has stale content
|
|
109
265
|
if (cache.isDirty(filePath)) return
|
|
110
266
|
|
package/index.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { Plugin } from "@opencode-ai/plugin"
|
|
2
2
|
|
|
3
3
|
import search from "./tools/search"
|
|
4
|
-
import { workspace_list,
|
|
4
|
+
import { workspace_list, workspace_forget, workspace_clear, workspace_restore } from "./tools/workspace"
|
|
5
5
|
import FileIndexerPlugin from "./file-indexer"
|
|
6
6
|
import { workspaceCache } from "./cache/manager"
|
|
7
7
|
import { createWorkspaceInjectionHandler } from "./hooks/message-before"
|
|
@@ -39,8 +39,7 @@ const UsethisSearchPlugin: Plugin = async ({ directory, client }) => {
|
|
|
39
39
|
tool: {
|
|
40
40
|
search,
|
|
41
41
|
workspace_list,
|
|
42
|
-
|
|
43
|
-
workspace_detach,
|
|
42
|
+
workspace_forget,
|
|
44
43
|
workspace_clear,
|
|
45
44
|
workspace_restore,
|
|
46
45
|
},
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "4.
|
|
4
|
-
"description": "OpenCode plugin: semantic search with
|
|
3
|
+
"version": "4.3.0-dev.0",
|
|
4
|
+
"description": "OpenCode plugin: semantic search with auto-attach, line numbers in workspace, simplified API (v4.3: auto-detect modes, read() caching, 99% token reduction, no grep needed)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
7
7
|
"exports": {
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
"tools/search.ts",
|
|
26
26
|
"tools/codeindex.ts",
|
|
27
27
|
"tools/workspace.ts",
|
|
28
|
+
"tools/read-interceptor.ts",
|
|
28
29
|
"cache/manager.ts",
|
|
29
30
|
"hooks/message-before.ts",
|
|
30
31
|
"hooks/tool-substitution.ts",
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import { tool } from "@opencode-ai/plugin"
|
|
2
|
+
import path from "path"
|
|
3
|
+
import fs from "fs/promises"
|
|
4
|
+
|
|
5
|
+
import { CodebaseIndexer } from "../vectorizer/index.ts"
|
|
6
|
+
|
|
7
|
+
// FR-043: Logging for intercepted Read() calls
|
|
8
|
+
const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*"
|
|
9
|
+
|
|
10
|
+
interface ReadLogEntry {
|
|
11
|
+
timestamp: number
|
|
12
|
+
filePath: string
|
|
13
|
+
relPath: string
|
|
14
|
+
chunksFound: number
|
|
15
|
+
relatedContextCount: number
|
|
16
|
+
durationMs: number
|
|
17
|
+
fallback: boolean
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const LOG_MAX_ENTRIES = 500
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Append a log entry to the Read() interception log file.
|
|
24
|
+
* Non-blocking, non-fatal — errors are silently ignored.
|
|
25
|
+
*/
|
|
26
|
+
async function logReadInterception(projectRoot: string, entry: ReadLogEntry): Promise<void> {
|
|
27
|
+
try {
|
|
28
|
+
const logPath = path.join(projectRoot, ".opencode", "vectors", "read-intercept.log.json")
|
|
29
|
+
await fs.mkdir(path.dirname(logPath), { recursive: true })
|
|
30
|
+
|
|
31
|
+
let entries: ReadLogEntry[] = []
|
|
32
|
+
try {
|
|
33
|
+
const raw = await fs.readFile(logPath, "utf-8")
|
|
34
|
+
entries = JSON.parse(raw)
|
|
35
|
+
} catch {
|
|
36
|
+
// file doesn't exist or is invalid — start fresh
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
entries.push(entry)
|
|
40
|
+
// Cap log size to avoid unbounded growth
|
|
41
|
+
if (entries.length > LOG_MAX_ENTRIES) {
|
|
42
|
+
entries = entries.slice(-LOG_MAX_ENTRIES)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
await fs.writeFile(logPath, JSON.stringify(entries, null, 2), "utf-8")
|
|
46
|
+
} catch {
|
|
47
|
+
// non-fatal — logging must never break Read
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export default tool({
|
|
52
|
+
description: `Read file with graph-aware context attachment. When available, this tool searches the file in the index and returns content + related context from the graph (imports, links, etc.).
|
|
53
|
+
|
|
54
|
+
Use this instead of the standard Read tool for better context awareness.`,
|
|
55
|
+
|
|
56
|
+
args: {
|
|
57
|
+
filePath: tool.schema.string().describe("Path to the file to read"),
|
|
58
|
+
},
|
|
59
|
+
|
|
60
|
+
async execute(args) {
|
|
61
|
+
const startTime = Date.now()
|
|
62
|
+
const projectRoot = process.cwd()
|
|
63
|
+
const filePath = path.isAbsolute(args.filePath) ? args.filePath : path.join(projectRoot, args.filePath)
|
|
64
|
+
|
|
65
|
+
const relPath = path.relative(projectRoot, filePath)
|
|
66
|
+
|
|
67
|
+
if (DEBUG) {
|
|
68
|
+
console.log(`[read-interceptor] Intercepted Read("${relPath}")`)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Resilient search: if vector index is corrupted or unavailable, fall back gracefully
|
|
72
|
+
let fileChunks: any[] = []
|
|
73
|
+
let allRelated: any[] = []
|
|
74
|
+
let searchFailed = false
|
|
75
|
+
|
|
76
|
+
try {
|
|
77
|
+
const indexer = await new CodebaseIndexer(projectRoot, "code").init()
|
|
78
|
+
try {
|
|
79
|
+
const results = await indexer.search(relPath, 20, false, {})
|
|
80
|
+
fileChunks = results.filter((r: any) => r.file === relPath)
|
|
81
|
+
|
|
82
|
+
allRelated = fileChunks
|
|
83
|
+
.flatMap((c: any) => c.relatedContext || [])
|
|
84
|
+
.filter((r: any, i: number, arr: any[]) => arr.findIndex((x: any) => x.chunk_id === r.chunk_id) === i)
|
|
85
|
+
} catch (searchErr: any) {
|
|
86
|
+
if (DEBUG) {
|
|
87
|
+
console.log(`[read-interceptor] Search failed for "${relPath}": ${searchErr.message}`)
|
|
88
|
+
}
|
|
89
|
+
searchFailed = true
|
|
90
|
+
}
|
|
91
|
+
await indexer.unloadModel()
|
|
92
|
+
} catch (initErr: any) {
|
|
93
|
+
if (DEBUG) {
|
|
94
|
+
console.log(`[read-interceptor] Indexer init failed: ${initErr.message}`)
|
|
95
|
+
}
|
|
96
|
+
searchFailed = true
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const durationMs = Date.now() - startTime
|
|
100
|
+
const fallback = fileChunks.length === 0
|
|
101
|
+
|
|
102
|
+
// FR-043: Log the interception asynchronously (non-blocking)
|
|
103
|
+
logReadInterception(projectRoot, {
|
|
104
|
+
timestamp: startTime,
|
|
105
|
+
filePath: args.filePath,
|
|
106
|
+
relPath,
|
|
107
|
+
chunksFound: fileChunks.length,
|
|
108
|
+
relatedContextCount: allRelated.length,
|
|
109
|
+
durationMs,
|
|
110
|
+
fallback,
|
|
111
|
+
}).catch(() => {})
|
|
112
|
+
|
|
113
|
+
if (DEBUG) {
|
|
114
|
+
console.log(
|
|
115
|
+
`[read-interceptor] ${relPath}: ${fileChunks.length} chunks, ${allRelated.length} related, ${durationMs}ms${fallback ? " (fallback)" : ""}${searchFailed ? " (search error)" : ""}`
|
|
116
|
+
)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (fallback) {
|
|
120
|
+
const reason = searchFailed
|
|
121
|
+
? `Search index unavailable (possibly corrupted). Run codeindex({ action: "reindex", index: "code" }) to rebuild.`
|
|
122
|
+
: `File "${relPath}" not indexed. Use original Read tool or run codeindex({ action: "reindex", index: "code" })`
|
|
123
|
+
return reason
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
let output = `## ${relPath}\n\n`
|
|
127
|
+
|
|
128
|
+
output += `### Content\n\n`
|
|
129
|
+
for (const chunk of fileChunks) {
|
|
130
|
+
output += chunk.content + "\n\n"
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (allRelated.length > 0) {
|
|
134
|
+
output += `### Related Context\n\n`
|
|
135
|
+
for (const rel of allRelated) {
|
|
136
|
+
const snippet = rel.content.length > 300
|
|
137
|
+
? rel.content.substring(0, 300) + "..."
|
|
138
|
+
: rel.content
|
|
139
|
+
output += `**${rel.file}** (${rel.relation})\n`
|
|
140
|
+
output += `\`\`\`\n${snippet}\n\`\`\`\n\n`
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return output
|
|
145
|
+
},
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
// Export for testing
|
|
149
|
+
export { logReadInterception, ReadLogEntry }
|
package/tools/search.ts
CHANGED
|
@@ -178,47 +178,70 @@ function parseFilter(filter: string): {
|
|
|
178
178
|
}
|
|
179
179
|
|
|
180
180
|
export default tool({
|
|
181
|
-
description: `Search
|
|
181
|
+
description: `Search codebase and automatically attach relevant context to workspace.
|
|
182
182
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
183
|
+
Accepts any query - semantic search, file path, or chunk ID:
|
|
184
|
+
- "authentication logic" → finds relevant code
|
|
185
|
+
- "docs/architecture.md" → attaches file
|
|
186
|
+
- "src/auth.ts:chunk-5" → attaches specific chunk
|
|
187
187
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
188
|
+
Results are optimized for context - top chunks auto-attached with expanded context
|
|
189
|
+
(related code, imports, class methods).
|
|
190
|
+
|
|
191
|
+
IMPORTANT: Workspace has limited token budget. Use workspace_forget() to remove
|
|
192
|
+
irrelevant files or old searches before adding new context.
|
|
193
|
+
|
|
194
|
+
Choose index based on what you're looking for:
|
|
195
|
+
- index: "code" → search source code
|
|
196
|
+
- index: "docs" → search documentation
|
|
197
|
+
- searchAll: true → search everywhere
|
|
192
198
|
|
|
193
199
|
Examples:
|
|
194
200
|
- search({ query: "authentication logic" })
|
|
195
201
|
- search({ query: "how to deploy", index: "docs" })
|
|
196
|
-
- search({ query: "
|
|
197
|
-
- search({
|
|
198
|
-
- search({ path: "docs/architecture.md" })
|
|
199
|
-
- search({ path: "src/auth.ts", index: "code" })`,
|
|
202
|
+
- search({ query: "docs/prd.md" }) // attach file
|
|
203
|
+
- search({ query: "internal/domain/", filter: "*.go" })`,
|
|
200
204
|
|
|
201
205
|
args: {
|
|
202
|
-
query: tool.schema.string().
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of just one"),
|
|
208
|
-
filter: tool.schema.string().optional().describe("Filter results by path or language. Examples: 'internal/domain/', '*.go', 'internal/**/*.go', 'service'"),
|
|
206
|
+
query: tool.schema.string().describe("What to search: semantic query, file path, or chunk ID"),
|
|
207
|
+
index: tool.schema.string().optional().default("code").describe("Where to search: 'code', 'docs', or leave empty for auto-detect"),
|
|
208
|
+
limit: tool.schema.number().optional().describe("Max results (default: 10)"),
|
|
209
|
+
searchAll: tool.schema.boolean().optional().default(false).describe("Search all indexes instead of one"),
|
|
210
|
+
filter: tool.schema.string().optional().describe("Filter by path/language: 'internal/domain/', '*.go', 'service'"),
|
|
209
211
|
},
|
|
210
212
|
|
|
211
213
|
async execute(args) {
|
|
212
214
|
const projectRoot = process.cwd()
|
|
213
215
|
|
|
214
216
|
try {
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
217
|
+
if (!args.query) {
|
|
218
|
+
return `Error: query is required\n\nExamples:\n- search({ query: "authentication logic" })\n- search({ query: "docs/architecture.md" })\n- search({ query: "src/auth.ts:chunk-5" })`
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Auto-detect mode from query
|
|
222
|
+
let mode: "chunkId" | "path" | "semantic"
|
|
223
|
+
let chunkId: string | undefined
|
|
224
|
+
let filePath: string | undefined
|
|
225
|
+
let semanticQuery: string | undefined
|
|
226
|
+
|
|
227
|
+
// 1. Check if it's a chunk ID (contains ":chunk-")
|
|
228
|
+
if (args.query.includes(":chunk-")) {
|
|
229
|
+
mode = "chunkId"
|
|
230
|
+
chunkId = args.query
|
|
231
|
+
}
|
|
232
|
+
// 2. Check if it's a file path (has extension or starts with common paths)
|
|
233
|
+
else if (
|
|
234
|
+
args.query.match(/\.(md|ts|js|go|py|tsx|jsx|rs|java|kt|swift|txt|yaml|json|yml|toml)$/i) ||
|
|
235
|
+
args.query.match(/^(src|docs|internal|pkg|lib|app|pages|components|api)\//i) ||
|
|
236
|
+
args.query.includes("/")
|
|
237
|
+
) {
|
|
238
|
+
mode = "path"
|
|
239
|
+
filePath = args.query
|
|
219
240
|
}
|
|
220
|
-
|
|
221
|
-
|
|
241
|
+
// 3. Otherwise, it's a semantic search
|
|
242
|
+
else {
|
|
243
|
+
mode = "semantic"
|
|
244
|
+
semanticQuery = args.query
|
|
222
245
|
}
|
|
223
246
|
|
|
224
247
|
// Load config defaults (parsed from vectorizer.yaml)
|
|
@@ -234,23 +257,23 @@ Examples:
|
|
|
234
257
|
// ══════════════════════════════════════════════════════════════════════
|
|
235
258
|
// MODE 1: Direct chunk attach by chunkId
|
|
236
259
|
// ══════════════════════════════════════════════════════════════════════
|
|
237
|
-
if (
|
|
260
|
+
if (mode === "chunkId") {
|
|
238
261
|
const indexer = await getIndexer(projectRoot, indexName)
|
|
239
262
|
try {
|
|
240
|
-
const chunk = await indexer.findChunkById(
|
|
263
|
+
const chunk = await indexer.findChunkById(chunkId!)
|
|
241
264
|
if (!chunk) {
|
|
242
|
-
return `Chunk "${
|
|
265
|
+
return `Chunk "${chunkId}" not found in index "${indexName}".\n\nMake sure:\n1. The file is indexed\n2. The chunk ID is correct (format: "path:chunk-N")\n3. You're searching the right index`
|
|
243
266
|
}
|
|
244
267
|
|
|
245
268
|
// Attach to workspace
|
|
246
269
|
workspaceCache.attach({
|
|
247
|
-
chunkId:
|
|
270
|
+
chunkId: chunkId!,
|
|
248
271
|
path: chunk.file,
|
|
249
272
|
content: chunk.content,
|
|
250
273
|
chunkIndex: chunk.chunk_index ?? 0,
|
|
251
274
|
role: "manual",
|
|
252
275
|
attachedAt: Date.now(),
|
|
253
|
-
attachedBy: `direct:${
|
|
276
|
+
attachedBy: `direct:${chunkId}`,
|
|
254
277
|
metadata: {
|
|
255
278
|
language: chunk.language,
|
|
256
279
|
function_name: chunk.function_name,
|
|
@@ -263,8 +286,8 @@ Examples:
|
|
|
263
286
|
|
|
264
287
|
workspaceCache.save().catch(() => {})
|
|
265
288
|
|
|
266
|
-
const entry = workspaceCache.get(
|
|
267
|
-
return `✓ Attached chunk to workspace\n\nChunk: ${
|
|
289
|
+
const entry = workspaceCache.get(chunkId!)!
|
|
290
|
+
return `✓ Attached chunk to workspace\n\nChunk: ${chunkId}\nFile: ${chunk.file}\nTokens: ${entry.tokens.toLocaleString()}\nLanguage: ${chunk.language}\nLines: ${chunk.start_line}-${chunk.end_line}\n\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
|
|
268
291
|
} finally {
|
|
269
292
|
releaseIndexer(projectRoot, indexName)
|
|
270
293
|
}
|
|
@@ -273,27 +296,27 @@ Examples:
|
|
|
273
296
|
// ══════════════════════════════════════════════════════════════════════
|
|
274
297
|
// MODE 2: File attach by path (all chunks)
|
|
275
298
|
// ══════════════════════════════════════════════════════════════════════
|
|
276
|
-
if (
|
|
299
|
+
if (mode === "path") {
|
|
277
300
|
const indexer = await getIndexer(projectRoot, indexName)
|
|
278
301
|
try {
|
|
279
|
-
const chunks = await indexer.findChunksByPath(
|
|
302
|
+
const chunks = await indexer.findChunksByPath(filePath!)
|
|
280
303
|
if (chunks.length === 0) {
|
|
281
|
-
return `No chunks found for file "${
|
|
304
|
+
return `No chunks found for file "${filePath}" in index "${indexName}".\n\nMake sure:\n1. The file exists and is indexed\n2. The path is correct (relative to project root)\n3. You're searching the right index\n\nRun: bunx usethis_search reindex`
|
|
282
305
|
}
|
|
283
306
|
|
|
284
307
|
// Attach all chunks to workspace
|
|
285
308
|
let totalTokens = 0
|
|
286
309
|
for (const chunk of chunks) {
|
|
287
|
-
const
|
|
310
|
+
const chunkIdForChunk = chunk.chunk_id || `${filePath}:chunk-${chunk.chunk_index ?? 0}`
|
|
288
311
|
|
|
289
312
|
workspaceCache.attach({
|
|
290
|
-
chunkId,
|
|
291
|
-
path:
|
|
313
|
+
chunkId: chunkIdForChunk,
|
|
314
|
+
path: filePath!,
|
|
292
315
|
content: chunk.content,
|
|
293
316
|
chunkIndex: chunk.chunk_index ?? 0,
|
|
294
317
|
role: "manual",
|
|
295
318
|
attachedAt: Date.now(),
|
|
296
|
-
attachedBy: `file:${
|
|
319
|
+
attachedBy: `file:${filePath}`,
|
|
297
320
|
metadata: {
|
|
298
321
|
language: chunk.language,
|
|
299
322
|
function_name: chunk.function_name,
|
|
@@ -304,13 +327,13 @@ Examples:
|
|
|
304
327
|
},
|
|
305
328
|
})
|
|
306
329
|
|
|
307
|
-
const entry = workspaceCache.get(
|
|
330
|
+
const entry = workspaceCache.get(chunkIdForChunk)!
|
|
308
331
|
totalTokens += entry.tokens
|
|
309
332
|
}
|
|
310
333
|
|
|
311
334
|
workspaceCache.save().catch(() => {})
|
|
312
335
|
|
|
313
|
-
return `✓ Attached file to workspace\n\nFile: ${
|
|
336
|
+
return `✓ Attached file to workspace\n\nFile: ${filePath}\nChunks: ${chunks.length}\nTokens: ${totalTokens.toLocaleString()}\nLanguage: ${chunks[0].language}\n\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
|
|
314
337
|
} finally {
|
|
315
338
|
releaseIndexer(projectRoot, indexName)
|
|
316
339
|
}
|
|
@@ -352,7 +375,7 @@ Examples:
|
|
|
352
375
|
for (const idx of indexes) {
|
|
353
376
|
const indexer = await getIndexer(projectRoot, idx)
|
|
354
377
|
try {
|
|
355
|
-
const results = await indexer.search(
|
|
378
|
+
const results = await indexer.search(semanticQuery!, limit, includeArchived, searchOptions)
|
|
356
379
|
allResults.push(...results.map((r: any) => ({ ...r, _index: idx })))
|
|
357
380
|
} finally {
|
|
358
381
|
releaseIndexer(projectRoot, idx)
|
|
@@ -390,14 +413,14 @@ Examples:
|
|
|
390
413
|
|
|
391
414
|
if (available.length > 0) {
|
|
392
415
|
const list = available.map(i => `"${i}"`).join(", ")
|
|
393
|
-
return `Index "${indexName}" not found. Available indexes: ${list}.\n\nTry: search({ query: "${
|
|
416
|
+
return `Index "${indexName}" not found. Available indexes: ${list}.\n\nTry: search({ query: "${semanticQuery}", index: "${available[0]}" })\nOr search all: search({ query: "${semanticQuery}", searchAll: true })`
|
|
394
417
|
}
|
|
395
418
|
return `No indexes found. The codebase needs to be indexed first.\n\nRun the CLI: bunx usethis_search reindex`
|
|
396
419
|
}
|
|
397
420
|
|
|
398
421
|
const indexer = await getIndexer(projectRoot, indexName)
|
|
399
422
|
try {
|
|
400
|
-
const results = await indexer.search(
|
|
423
|
+
const results = await indexer.search(semanticQuery!, limit, includeArchived, searchOptions)
|
|
401
424
|
allResults = results.map((r: any) => ({ ...r, _index: indexName }))
|
|
402
425
|
} finally {
|
|
403
426
|
releaseIndexer(projectRoot, indexName)
|
|
@@ -411,20 +434,58 @@ Examples:
|
|
|
411
434
|
})
|
|
412
435
|
|
|
413
436
|
// ── Filter — apply path/language constraints from `filter` param ───────
|
|
437
|
+
// Strategy: Try strict filter first, fallback to relaxed if too few results
|
|
438
|
+
const unfilteredResults = [...allResults]
|
|
439
|
+
let filterApplied = false
|
|
440
|
+
let filterRelaxed = false
|
|
441
|
+
|
|
414
442
|
if (filterParsed.pathPrefix) {
|
|
415
443
|
const prefix = filterParsed.pathPrefix
|
|
416
|
-
|
|
444
|
+
const strictFiltered = allResults.filter(r => r.file && r.file.startsWith(prefix))
|
|
445
|
+
|
|
446
|
+
// Fallback: if strict gives < 3 results, try "contains" instead of "startsWith"
|
|
447
|
+
if (strictFiltered.length < 3 && allResults.length > strictFiltered.length) {
|
|
448
|
+
const relaxedFiltered = allResults.filter(r => r.file && r.file.includes(prefix))
|
|
449
|
+
if (relaxedFiltered.length > strictFiltered.length) {
|
|
450
|
+
allResults = relaxedFiltered
|
|
451
|
+
filterRelaxed = true
|
|
452
|
+
} else {
|
|
453
|
+
allResults = strictFiltered
|
|
454
|
+
}
|
|
455
|
+
} else {
|
|
456
|
+
allResults = strictFiltered
|
|
457
|
+
}
|
|
458
|
+
filterApplied = true
|
|
417
459
|
}
|
|
460
|
+
|
|
418
461
|
if (filterParsed.pathContains) {
|
|
419
462
|
const needle = filterParsed.pathContains.toLowerCase()
|
|
420
463
|
allResults = allResults.filter(r => r.file && r.file.toLowerCase().includes(needle))
|
|
464
|
+
filterApplied = true
|
|
421
465
|
}
|
|
466
|
+
|
|
422
467
|
if (filterParsed.language) {
|
|
423
|
-
|
|
468
|
+
const strictFiltered = allResults.filter(r => r.language === filterParsed.language)
|
|
469
|
+
|
|
470
|
+
// Fallback: if strict language filter gives < 3 results, include "unknown" language
|
|
471
|
+
if (strictFiltered.length < 3 && allResults.length > strictFiltered.length) {
|
|
472
|
+
const relaxedFiltered = allResults.filter(r =>
|
|
473
|
+
!r.language || r.language === filterParsed.language || r.language === "unknown"
|
|
474
|
+
)
|
|
475
|
+
if (relaxedFiltered.length > strictFiltered.length) {
|
|
476
|
+
allResults = relaxedFiltered
|
|
477
|
+
filterRelaxed = true
|
|
478
|
+
} else {
|
|
479
|
+
allResults = strictFiltered
|
|
480
|
+
}
|
|
481
|
+
} else {
|
|
482
|
+
allResults = strictFiltered
|
|
483
|
+
}
|
|
484
|
+
filterApplied = true
|
|
424
485
|
}
|
|
425
486
|
|
|
426
487
|
// ── Reranking — boost results where query keywords appear in text ──────
|
|
427
|
-
const queryKeywords =
|
|
488
|
+
const queryKeywords = semanticQuery!.toLowerCase().split(/\s+/).filter((w: string) => w.length > 2)
|
|
428
489
|
for (const r of allResults) {
|
|
429
490
|
const isBM25Only = !!r._bm25Only
|
|
430
491
|
const vectorScore = r._distance != null ? Math.max(0, 1 - r._distance / 2) : 0
|
|
@@ -454,7 +515,7 @@ Examples:
|
|
|
454
515
|
if (topChunks.length === 0) {
|
|
455
516
|
const scope = args.searchAll ? "any index" : `index "${indexName}"`
|
|
456
517
|
const filterNote = args.filter ? ` with filter "${args.filter}"` : ""
|
|
457
|
-
return `No results found in ${scope}${filterNote} for: "${
|
|
518
|
+
return `No results found in ${scope}${filterNote} for: "${semanticQuery}" (min score: ${minScore})\n\nTry:\n- Different keywords or phrasing\n- Remove or broaden the filter\n- search({ query: "...", searchAll: true })`
|
|
458
519
|
}
|
|
459
520
|
|
|
460
521
|
// ══════════════════════════════════════════════════════════════════════
|
|
@@ -479,15 +540,15 @@ Examples:
|
|
|
479
540
|
// Attach main chunk
|
|
480
541
|
const chunkId = chunk.chunkId || `${chunk.file}:chunk-${chunk.index ?? 0}`
|
|
481
542
|
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
543
|
+
workspaceCache.attach({
|
|
544
|
+
chunkId,
|
|
545
|
+
path: chunk.file,
|
|
546
|
+
content: chunk.content,
|
|
547
|
+
chunkIndex: chunk.index ?? 0,
|
|
548
|
+
role: "search-main",
|
|
549
|
+
attachedAt: Date.now(),
|
|
550
|
+
attachedBy: semanticQuery!,
|
|
551
|
+
score: chunk._finalScore,
|
|
491
552
|
metadata: {
|
|
492
553
|
language: chunk.language,
|
|
493
554
|
function_name: chunk.function_name,
|
|
@@ -511,14 +572,14 @@ Examples:
|
|
|
511
572
|
// Check budget before adding
|
|
512
573
|
if (workspaceCache.size >= wsConfig.maxChunks) break
|
|
513
574
|
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
575
|
+
workspaceCache.attach({
|
|
576
|
+
chunkId: expChunkId,
|
|
577
|
+
path: expChunk.file,
|
|
578
|
+
content: expChunk.content,
|
|
579
|
+
chunkIndex: expChunk.chunk_index ?? 0,
|
|
580
|
+
role: "search-context",
|
|
581
|
+
attachedAt: Date.now(),
|
|
582
|
+
attachedBy: `${semanticQuery} (${reason})`,
|
|
522
583
|
score: chunk._finalScore * 0.9, // Slightly lower score than main
|
|
523
584
|
metadata: {
|
|
524
585
|
language: expChunk.language,
|
|
@@ -550,14 +611,14 @@ Examples:
|
|
|
550
611
|
const relChunkId = rel.chunkId || `${rel.file}:chunk-${rel.index ?? 0}`
|
|
551
612
|
if (alreadyAttached.has(relChunkId)) continue
|
|
552
613
|
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
614
|
+
workspaceCache.attach({
|
|
615
|
+
chunkId: relChunkId,
|
|
616
|
+
path: rel.file,
|
|
617
|
+
content: rel.content,
|
|
618
|
+
chunkIndex: rel.index ?? 0,
|
|
619
|
+
role: "search-graph",
|
|
620
|
+
attachedAt: Date.now(),
|
|
621
|
+
attachedBy: `${semanticQuery} (${rel.relation} from ${chunkId})`,
|
|
561
622
|
score: rel.score,
|
|
562
623
|
metadata: {
|
|
563
624
|
language: rel.language,
|
|
@@ -590,12 +651,16 @@ Examples:
|
|
|
590
651
|
const hasBM25Only = allResults.some((r: any) => r._bm25Only)
|
|
591
652
|
const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
|
|
592
653
|
const filterLabel = args.filter ? ` filter:"${args.filter}"` : ""
|
|
593
|
-
let output = `## Search: "${
|
|
654
|
+
let output = `## Search: "${semanticQuery}" (${scope}${filterLabel})\n\n`
|
|
594
655
|
|
|
595
656
|
if (hasBM25Only) {
|
|
596
657
|
output += `> **BM25-only mode** -- vector embeddings not yet available. Quality will improve after embedding completes.\n\n`
|
|
597
658
|
}
|
|
598
659
|
|
|
660
|
+
if (filterRelaxed) {
|
|
661
|
+
output += `> **Filter relaxed.** Strict filter gave too few results. Showing broader matches.\n\n`
|
|
662
|
+
}
|
|
663
|
+
|
|
599
664
|
if (topScore < 0.45) {
|
|
600
665
|
output += `> **Low confidence.** Best score: ${topScore.toFixed(3)}. Try more specific keywords.\n\n`
|
|
601
666
|
}
|
package/tools/workspace.ts
CHANGED
|
@@ -155,102 +155,77 @@ export const workspace_list = tool({
|
|
|
155
155
|
},
|
|
156
156
|
})
|
|
157
157
|
|
|
158
|
-
// ── workspace.
|
|
158
|
+
// ── workspace.forget ────────────────────────────────────────────────────────
|
|
159
159
|
|
|
160
|
-
export const
|
|
161
|
-
description: `
|
|
160
|
+
export const workspace_forget = tool({
|
|
161
|
+
description: `Remove chunks from workspace context to optimize context size and focus.
|
|
162
162
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
},
|
|
163
|
+
IMPORTANT: Regularly clean up workspace by removing irrelevant files or old search results.
|
|
164
|
+
This keeps context focused and prevents token budget overflow.
|
|
166
165
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
const fullPath = path.join(projectRoot, args.filePath)
|
|
173
|
-
const content = await fs.readFile(fullPath, "utf-8")
|
|
174
|
-
|
|
175
|
-
// Generate chunkId for manual attachment: "path:chunk-0"
|
|
176
|
-
const chunkId = `${args.filePath}:chunk-0`
|
|
177
|
-
|
|
178
|
-
// Check if already attached
|
|
179
|
-
if (workspaceCache.has(args.filePath)) {
|
|
180
|
-
const existing = workspaceCache.getChunksByPath(args.filePath)
|
|
181
|
-
if (existing.length > 0) {
|
|
182
|
-
const first = existing[0]
|
|
183
|
-
const totalTokens = existing.reduce((sum, c) => sum + c.tokens, 0)
|
|
184
|
-
return `File "${args.filePath}" is already in workspace (${existing.length} chunk${existing.length > 1 ? "s" : ""}).\nTokens: ${totalTokens.toLocaleString()} | Role: ${first.role} | Score: ${first.score?.toFixed(3) ?? "n/a"}`
|
|
185
|
-
}
|
|
186
|
-
}
|
|
166
|
+
Auto-detects what to remove based on input:
|
|
167
|
+
- Chunk ID: "src/auth.ts:chunk-5"
|
|
168
|
+
- File path: "docs/architecture.md" (removes ALL chunks)
|
|
169
|
+
- Search query: "authentication logic" (removes chunks from this search)
|
|
170
|
+
- Age: "5" (removes chunks older than 5 minutes)
|
|
187
171
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
chunkIndex: 0,
|
|
193
|
-
role: "manual",
|
|
194
|
-
attachedAt: Date.now(),
|
|
195
|
-
attachedBy: "manual",
|
|
196
|
-
})
|
|
197
|
-
|
|
198
|
-
const entry = workspaceCache.get(chunkId)!
|
|
199
|
-
return `Attached "${args.filePath}" to workspace as single chunk.\nChunkId: ${chunkId}\nTokens: ${entry.tokens.toLocaleString()}\nWorkspace total: ${workspaceCache.totalTokens.toLocaleString()} tokens (${workspaceCache.size} chunks)`
|
|
200
|
-
} catch (error: any) {
|
|
201
|
-
return `Failed to attach "${args.filePath}": ${error.message || String(error)}`
|
|
202
|
-
}
|
|
203
|
-
},
|
|
204
|
-
})
|
|
205
|
-
|
|
206
|
-
// ── workspace.detach ────────────────────────────────────────────────────────
|
|
207
|
-
|
|
208
|
-
export const workspace_detach = tool({
|
|
209
|
-
description: `Remove chunks from workspace context. Can detach by chunkId, by file path (removes ALL chunks of that file), by search query, or by age.`,
|
|
172
|
+
Examples:
|
|
173
|
+
- workspace_forget({ what: "docs/prd.md" })
|
|
174
|
+
- workspace_forget({ what: "5" }) // older than 5 min
|
|
175
|
+
- workspace_forget({ what: "src/auth.ts:chunk-3" })`,
|
|
210
176
|
|
|
211
177
|
args: {
|
|
212
|
-
|
|
213
|
-
filePath: tool.schema.string().optional().describe("File path to remove (removes ALL chunks of that file)"),
|
|
214
|
-
query: tool.schema.string().optional().describe("Remove all chunks attached by this search query"),
|
|
215
|
-
olderThan: tool.schema.number().optional().describe("Remove chunks older than N minutes"),
|
|
178
|
+
what: tool.schema.string().describe("What to forget: chunk ID, file path, search query, or age in minutes"),
|
|
216
179
|
},
|
|
217
180
|
|
|
218
181
|
async execute(args) {
|
|
219
182
|
let removed = 0
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
183
|
+
|
|
184
|
+
// Auto-detect what to remove
|
|
185
|
+
// 1. Check if it's a chunk ID (contains ":chunk-")
|
|
186
|
+
if (args.what.includes(":chunk-")) {
|
|
187
|
+
const entry = workspaceCache.get(args.what)
|
|
225
188
|
if (!entry) {
|
|
226
|
-
return `Chunk "${args.
|
|
189
|
+
return `Chunk "${args.what}" not found in workspace.`
|
|
227
190
|
}
|
|
228
|
-
|
|
229
|
-
removed = workspaceCache.detach(args.chunkId) ? 1 : 0
|
|
191
|
+
removed = workspaceCache.detach(args.what) ? 1 : 0
|
|
230
192
|
if (removed === 0) {
|
|
231
|
-
return `Failed to remove chunk "${args.
|
|
193
|
+
return `Failed to remove chunk "${args.what}".`
|
|
232
194
|
}
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
195
|
+
return `Removed chunk "${args.what}" from workspace.\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// 2. Check if it's a number (age in minutes)
|
|
199
|
+
const ageMatch = args.what.match(/^(\d+)$/)
|
|
200
|
+
if (ageMatch) {
|
|
201
|
+
const minutes = parseInt(ageMatch[1], 10)
|
|
202
|
+
removed = workspaceCache.detachOlderThan(minutes * 60 * 1000)
|
|
203
|
+
return `Removed ${removed} chunk(s) older than ${minutes} minutes.\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// 3. Check if it's a file path (has extension or common path prefixes)
|
|
207
|
+
if (
|
|
208
|
+
args.what.match(/\.(md|ts|js|go|py|tsx|jsx|rs|java|kt|swift|txt|yaml|json|yml|toml)$/i) ||
|
|
209
|
+
args.what.match(/^(src|docs|internal|pkg|lib|app|pages|components|api)\//i) ||
|
|
210
|
+
args.what.includes("/")
|
|
211
|
+
) {
|
|
212
|
+
const fileChunks = workspaceCache.getChunksByPath(args.what)
|
|
237
213
|
if (fileChunks.length === 0) {
|
|
238
|
-
return `File "${args.
|
|
214
|
+
return `File "${args.what}" not found in workspace.`
|
|
239
215
|
}
|
|
240
|
-
|
|
241
|
-
removed = workspaceCache.detachByPath(args.filePath)
|
|
216
|
+
removed = workspaceCache.detachByPath(args.what)
|
|
242
217
|
if (removed === 0) {
|
|
243
|
-
return `Failed to remove chunks from "${args.
|
|
218
|
+
return `Failed to remove chunks from "${args.what}".`
|
|
244
219
|
}
|
|
245
|
-
|
|
246
|
-
removed = workspaceCache.detachByQuery(args.query)
|
|
247
|
-
} else if (args.olderThan) {
|
|
248
|
-
removed = workspaceCache.detachOlderThan(args.olderThan * 60 * 1000)
|
|
249
|
-
} else {
|
|
250
|
-
return `Specify chunkId, filePath, query, or olderThan to detach chunks.`
|
|
220
|
+
return `Removed ${removed} chunk(s) from "${args.what}".\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
|
|
251
221
|
}
|
|
252
|
-
|
|
253
|
-
|
|
222
|
+
|
|
223
|
+
// 4. Otherwise, treat as search query
|
|
224
|
+
removed = workspaceCache.detachByQuery(args.what)
|
|
225
|
+
if (removed === 0) {
|
|
226
|
+
return `No chunks found attached by query "${args.what}".\n\nTip: Use workspace_list() to see what's in workspace.`
|
|
227
|
+
}
|
|
228
|
+
return `Removed ${removed} chunk(s) from search "${args.what}".\nWorkspace: ${workspaceCache.size} chunks, ${workspaceCache.totalTokens.toLocaleString()} tokens`
|
|
254
229
|
},
|
|
255
230
|
})
|
|
256
231
|
|
|
@@ -10,6 +10,7 @@ export interface MarkdownChunkConfig {
|
|
|
10
10
|
max_chunk_size: number // split sections larger than this (chars)
|
|
11
11
|
split_by_headings: boolean
|
|
12
12
|
preserve_heading_hierarchy: boolean
|
|
13
|
+
skip_low_priority: boolean // Skip low-priority sections (SQL, aggregates, etc.)
|
|
13
14
|
}
|
|
14
15
|
|
|
15
16
|
export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
|
|
@@ -17,6 +18,7 @@ export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
|
|
|
17
18
|
max_chunk_size: 8000, // Large chunks for docs (SQL schemas, API specs, etc.)
|
|
18
19
|
split_by_headings: true,
|
|
19
20
|
preserve_heading_hierarchy: true,
|
|
21
|
+
skip_low_priority: true, // Skip SQL schemas, aggregates, views by default
|
|
20
22
|
}
|
|
21
23
|
|
|
22
24
|
export interface MarkdownChunk {
|
|
@@ -24,6 +26,7 @@ export interface MarkdownChunk {
|
|
|
24
26
|
heading_context: string // "H1 > H2 > H3"
|
|
25
27
|
start_line?: number
|
|
26
28
|
end_line?: number
|
|
29
|
+
priority?: "high" | "normal" | "low" // Chunk priority for ranking
|
|
27
30
|
}
|
|
28
31
|
|
|
29
32
|
// ── Internal types ──────────────────────────────────────────────────────────
|
|
@@ -34,6 +37,55 @@ interface Section {
|
|
|
34
37
|
body: string
|
|
35
38
|
start_line: number
|
|
36
39
|
end_line: number
|
|
40
|
+
priority: "high" | "normal" | "low"
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ── Priority detection ──────────────────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Detect if heading indicates low-priority content (SQL schemas, aggregates, etc.)
|
|
47
|
+
* These sections are often "noise" when searching for business logic.
|
|
48
|
+
*/
|
|
49
|
+
function isLowPriorityHeading(heading: string): boolean {
|
|
50
|
+
const lower = heading.toLowerCase()
|
|
51
|
+
|
|
52
|
+
// SQL-related sections (schemas, DDL, migrations)
|
|
53
|
+
if (lower.includes("sql schema") ||
|
|
54
|
+
lower.includes("database schema") ||
|
|
55
|
+
lower.includes("continuous aggregate") ||
|
|
56
|
+
lower.includes("materialized view") ||
|
|
57
|
+
lower.includes("ddl") ||
|
|
58
|
+
lower.includes("migration")) {
|
|
59
|
+
return true
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Generated/auto-generated content
|
|
63
|
+
if (lower.includes("auto-generated") ||
|
|
64
|
+
lower.includes("generated schema") ||
|
|
65
|
+
lower.includes("api reference") && lower.includes("generated")) {
|
|
66
|
+
return true
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Large reference tables (often boilerplate)
|
|
70
|
+
if (lower.includes("full reference") ||
|
|
71
|
+
lower.includes("complete list") ||
|
|
72
|
+
lower.includes("all endpoints")) {
|
|
73
|
+
return true
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return false
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Determine priority level for a section based on heading and context. */
|
|
80
|
+
function getSectionPriority(heading: string, body: string): "high" | "normal" | "low" {
|
|
81
|
+
if (isLowPriorityHeading(heading)) return "low"
|
|
82
|
+
|
|
83
|
+
// High-priority: short sections with code examples (tutorials, guides)
|
|
84
|
+
if (body.includes("```") && body.length < 2000) {
|
|
85
|
+
return "high"
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return "normal"
|
|
37
89
|
}
|
|
38
90
|
|
|
39
91
|
// ── Parsing ─────────────────────────────────────────────────────────────────
|
|
@@ -42,15 +94,16 @@ interface Section {
|
|
|
42
94
|
function parseSections(content: string): Section[] {
|
|
43
95
|
const lines = content.split("\n")
|
|
44
96
|
const sections: Section[] = []
|
|
45
|
-
let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0 }
|
|
97
|
+
let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0, priority: "normal" }
|
|
46
98
|
|
|
47
99
|
for (let i = 0; i < lines.length; i++) {
|
|
48
100
|
const line = lines[i]
|
|
49
101
|
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/)
|
|
50
102
|
if (headingMatch) {
|
|
51
|
-
// Push previous section
|
|
103
|
+
// Push previous section (with priority calculated)
|
|
52
104
|
if (currentSection.body.trim() || currentSection.heading) {
|
|
53
105
|
currentSection.end_line = i - 1
|
|
106
|
+
currentSection.priority = getSectionPriority(currentSection.heading, currentSection.body)
|
|
54
107
|
sections.push(currentSection)
|
|
55
108
|
}
|
|
56
109
|
currentSection = {
|
|
@@ -59,15 +112,17 @@ function parseSections(content: string): Section[] {
|
|
|
59
112
|
body: "",
|
|
60
113
|
start_line: i,
|
|
61
114
|
end_line: 0,
|
|
115
|
+
priority: "normal", // Will be calculated when section ends
|
|
62
116
|
}
|
|
63
117
|
} else {
|
|
64
118
|
currentSection.body += line + "\n"
|
|
65
119
|
}
|
|
66
120
|
}
|
|
67
121
|
|
|
68
|
-
// Push last section
|
|
122
|
+
// Push last section (with priority calculated)
|
|
69
123
|
if (currentSection.body.trim() || currentSection.heading) {
|
|
70
124
|
currentSection.end_line = lines.length - 1
|
|
125
|
+
currentSection.priority = getSectionPriority(currentSection.heading, currentSection.body)
|
|
71
126
|
sections.push(currentSection)
|
|
72
127
|
}
|
|
73
128
|
|
|
@@ -191,12 +246,18 @@ export function chunkMarkdown(
|
|
|
191
246
|
heading_context: headingContext,
|
|
192
247
|
start_line: section.start_line,
|
|
193
248
|
end_line: section.end_line,
|
|
249
|
+
priority: section.priority,
|
|
194
250
|
})
|
|
195
251
|
}
|
|
196
252
|
|
|
253
|
+
// Filter low-priority sections if configured
|
|
254
|
+
const filteredChunks = config.skip_low_priority
|
|
255
|
+
? rawChunks.filter(chunk => chunk.priority !== "low")
|
|
256
|
+
: rawChunks
|
|
257
|
+
|
|
197
258
|
// Merge small sections with previous
|
|
198
259
|
const merged: MarkdownChunk[] = []
|
|
199
|
-
for (const chunk of
|
|
260
|
+
for (const chunk of filteredChunks) {
|
|
200
261
|
if (
|
|
201
262
|
merged.length > 0 &&
|
|
202
263
|
chunk.content.length < config.min_chunk_size
|
|
@@ -211,6 +272,10 @@ export function chunkMarkdown(
|
|
|
211
272
|
if (chunk.heading_context) {
|
|
212
273
|
prev.heading_context = chunk.heading_context
|
|
213
274
|
}
|
|
275
|
+
// Keep highest priority (high > normal > low)
|
|
276
|
+
if (chunk.priority === "high" || (chunk.priority === "normal" && prev.priority === "low")) {
|
|
277
|
+
prev.priority = chunk.priority
|
|
278
|
+
}
|
|
214
279
|
} else {
|
|
215
280
|
merged.push({ ...chunk })
|
|
216
281
|
}
|
|
@@ -227,6 +292,7 @@ export function chunkMarkdown(
|
|
|
227
292
|
heading_context: chunk.heading_context,
|
|
228
293
|
start_line: part.start_line,
|
|
229
294
|
end_line: part.end_line,
|
|
295
|
+
priority: chunk.priority, // Inherit priority from parent chunk
|
|
230
296
|
})
|
|
231
297
|
}
|
|
232
298
|
} else {
|