@comfanion/usethis_search 4.2.0-dev.3 → 4.3.0-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,13 +57,14 @@ export function createWorkspaceInjectionHandler(state: SessionState) {
57
57
  // Don't inject or prune for sub-agents (title generation, etc.)
58
58
  if (state.isSubAgent) return
59
59
 
60
- // ── Prune: replace old search tool outputs with compact summaries ────
61
- // Files are already in workspace injection — no need for big search
62
- // output in chat history. This runs even when workspace is empty
63
- // (handles case where workspace was cleared but old search outputs remain).
60
+ // ── Prune: replace old tool outputs with compact summaries ────────────
61
+ // Files are already in workspace injection — no need for big outputs
62
+ // in chat history. This runs even when workspace is empty
63
+ // (handles case where workspace was cleared but old outputs remain).
64
64
  const wsConfig = workspaceCache.getConfig()
65
65
  if (wsConfig.autoPruneSearch !== false) {
66
66
  pruneSearchToolOutputs(output.messages)
67
+ pruneReadToolOutputs(output.messages)
67
68
  }
68
69
 
69
70
  let entries = workspaceCache.getAll()
@@ -225,7 +226,8 @@ function formatFileWithChunks(
225
226
  }
226
227
 
227
228
  /**
228
- * Format a single chunk with metadata.
229
+ * Format a single chunk with metadata and line numbers (cat -n style).
230
+ * This allows the agent to see exact line numbers without needing grep.
229
231
  */
230
232
  function formatChunk(entry: ReturnType<typeof workspaceCache.getAll>[0]): string {
231
233
  let block = ""
@@ -251,11 +253,21 @@ function formatChunk(entry: ReturnType<typeof workspaceCache.getAll>[0]): string
251
253
  block += `<!-- ${meta.join(" | ")} -->\n`
252
254
  }
253
255
 
254
- // Chunk content
256
+ // Chunk content WITH LINE NUMBERS (cat -n style)
257
+ // This allows agent to reference exact lines without grep
258
+ const startLine = entry.metadata?.startLine ?? 1
259
+ const lines = entry.content.split("\n")
255
260
  const lang = entry.metadata?.language || ""
261
+
256
262
  block += `\`\`\`${lang}\n`
257
- block += entry.content
258
- if (!entry.content.endsWith("\n")) block += "\n"
263
+
264
+ for (let i = 0; i < lines.length; i++) {
265
+ const lineNum = startLine + i
266
+ const lineContent = lines[i]
267
+ // Format: " 123| line content" (5 chars for line number + tab)
268
+ block += `${lineNum.toString().padStart(5, " ")}| ${lineContent}\n`
269
+ }
270
+
259
271
  block += `\`\`\`\n`
260
272
 
261
273
  return block
@@ -270,7 +282,7 @@ function findLastUserMessage(messages: Message[]): Message | null {
270
282
  return null
271
283
  }
272
284
 
273
- // ── Search output pruning ────────────────────────────────────────────────────
285
+ // ── Tool output pruning ─────────────────────────────────────────────────────
274
286
 
275
287
  /**
276
288
  * Minimum output length to consider pruning.
@@ -344,3 +356,74 @@ export function pruneSearchToolOutputs(messages: Message[]): void {
344
356
  `${attachedCount} attached to workspace. Full content available via workspace context.]`
345
357
  }
346
358
  }
359
+
360
+ /**
361
+ * Replace read() tool outputs in chat history with compact summaries.
362
+ *
363
+ * Why: read() returns full file content or large chunks.
364
+ * After workspace injection (or auto-attach), the content is already in context.
365
+ * Keeping the read output wastes tokens — replace it with a 1-line summary.
366
+ *
367
+ * Only prunes completed read calls with output longer than MIN_PRUNE_LENGTH.
368
+ * The last read output is kept (the agent may still be referencing it).
369
+ */
370
+ export function pruneReadToolOutputs(messages: Message[]): void {
371
+ // Find all read tool parts (completed, with long output)
372
+ const readParts: { msgIdx: number; partIdx: number; part: MessagePart }[] = []
373
+
374
+ for (let i = 0; i < messages.length; i++) {
375
+ const msg = messages[i]
376
+ const parts = Array.isArray(msg.parts) ? msg.parts : []
377
+
378
+ for (let j = 0; j < parts.length; j++) {
379
+ const part = parts[j]
380
+ if (
381
+ part.type === "tool" &&
382
+ (part.tool === "read" || part.tool === "Read") &&
383
+ part.state?.status === "completed" &&
384
+ typeof part.state?.output === "string" &&
385
+ part.state.output.length > MIN_PRUNE_LENGTH
386
+ ) {
387
+ readParts.push({ msgIdx: i, partIdx: j, part })
388
+ }
389
+ }
390
+ }
391
+
392
+ // Keep the last read output (agent may reference it) — prune the rest
393
+ if (readParts.length <= 1) return
394
+
395
+ const toPrune = readParts.slice(0, -1)
396
+
397
+ for (const { part } of toPrune) {
398
+ const output = part.state.output as string
399
+
400
+ // Extract file path from output or input
401
+ const filePath = part.input?.filePath || extractFilePathFromOutput(output)
402
+
403
+ // Check if it's a substituted output (already compact)
404
+ if (output.startsWith("[File ") || output.startsWith("[Lines ") || output.startsWith("✓ Attached chunk")) {
405
+ // Already substituted — keep as-is
406
+ continue
407
+ }
408
+
409
+ // Replace with compact summary
410
+ part.state.output = `[Read "${filePath || "file"}" — content available in workspace context]`
411
+ }
412
+ }
413
+
414
+ /**
415
+ * Extract file path from read() output.
416
+ * Output usually starts with file path or has markers.
417
+ */
418
+ function extractFilePathFromOutput(output: string): string | null {
419
+ // Try to find file path in first line
420
+ const firstLine = output.split("\n")[0]
421
+
422
+ // Pattern: "## path/to/file.ts" or "path/to/file.ts"
423
+ const pathMatch = firstLine.match(/##?\s*(.+?\.(ts|js|go|py|md|txt|yaml|json|tsx|jsx|rs|java|kt|swift|c|cpp|h|cs|rb|php))/)
424
+ if (pathMatch) {
425
+ return pathMatch[1].trim()
426
+ }
427
+
428
+ return null
429
+ }
@@ -21,6 +21,75 @@
21
21
 
22
22
  import type { SessionState } from "./types.ts"
23
23
  import { workspaceCache, WorkspaceCache } from "../cache/manager.ts"
24
+ import { getIndexer, releaseIndexer } from "../vectorizer/index.ts"
25
+
26
+ // ── Chunk Detection Helpers ─────────────────────────────────────────────────
27
+
28
+ /**
29
+ * Find which chunk contains the given line offset.
30
+ *
31
+ * @param filePath Relative file path
32
+ * @param offset Line number (0-based as used by read())
33
+ * @param limit Number of lines to read
34
+ * @returns Chunk metadata if found, null otherwise
35
+ */
36
+ async function findChunkByOffset(
37
+ filePath: string,
38
+ offset: number,
39
+ limit?: number
40
+ ): Promise<{
41
+ chunk_id: string
42
+ content: string
43
+ chunk_index: number
44
+ start_line: number
45
+ end_line: number
46
+ language?: string
47
+ function_name?: string
48
+ class_name?: string
49
+ } | null> {
50
+ try {
51
+ const projectRoot = process.cwd()
52
+ const indexer = await getIndexer(projectRoot, "code")
53
+
54
+ try {
55
+ // Get all chunks for this file
56
+ const chunks = await indexer.findChunksByPath(filePath)
57
+
58
+ if (chunks.length === 0) return null
59
+
60
+ // Find chunk that contains this offset
61
+ // offset is 0-based line number from read()
62
+ const targetLine = offset
63
+ const endLine = limit ? offset + limit : offset + 100
64
+
65
+ for (const chunk of chunks) {
66
+ const chunkStart = chunk.start_line ?? 0
67
+ const chunkEnd = chunk.end_line ?? Number.MAX_SAFE_INTEGER
68
+
69
+ // Check if offset falls within this chunk
70
+ if (targetLine >= chunkStart && targetLine <= chunkEnd) {
71
+ return {
72
+ chunk_id: chunk.chunk_id || `${filePath}:chunk-${chunk.chunk_index ?? 0}`,
73
+ content: chunk.content,
74
+ chunk_index: chunk.chunk_index ?? 0,
75
+ start_line: chunkStart,
76
+ end_line: chunkEnd,
77
+ language: chunk.language,
78
+ function_name: chunk.function_name,
79
+ class_name: chunk.class_name,
80
+ }
81
+ }
82
+ }
83
+
84
+ return null
85
+ } finally {
86
+ releaseIndexer(projectRoot, "code")
87
+ }
88
+ } catch (error) {
89
+ // Index not available or error — return null
90
+ return null
91
+ }
92
+ }
24
93
 
25
94
  /**
26
95
  * Create the tool output substitution handler.
@@ -75,7 +144,8 @@ export function createToolSubstitutionHandler(state: SessionState, cache?: Works
75
144
  // is valuable for AI navigation. Only read() is substituted.
76
145
  switch (input.tool) {
77
146
  case "read":
78
- substituteReadOutput(output, wsCache)
147
+ case "Read":
148
+ await handleReadSubstitution(output, wsCache)
79
149
  break
80
150
  // case "grep": // Disabled — AI needs line numbers and match context
81
151
  // case "glob": // Disabled — discovery tool, paths are metadata not content
@@ -84,16 +154,106 @@ export function createToolSubstitutionHandler(state: SessionState, cache?: Works
84
154
  }
85
155
 
86
156
  /**
87
- * Substitute read() output if file has chunks in workspace.
157
+ * Handle read() substitution with smart chunk detection.
158
+ *
159
+ * Two modes:
160
+ * 1. Full read (no offset) → substitute if file in workspace
161
+ * 2. Partial read (with offset) → auto-attach chunk, then substitute
162
+ */
163
+ async function handleReadSubstitution(
164
+ output: { title: string; output: string; metadata: any },
165
+ cache: WorkspaceCache
166
+ ): Promise<void> {
167
+ const filePath = output.metadata?.filePath || extractFilePathFromTitle(output.title)
168
+ if (!filePath) return
169
+
170
+ const offset = output.metadata?.offset
171
+ const limit = output.metadata?.limit
172
+ const isPartialRead = offset !== undefined
173
+
174
+ // MODE 1: Partial read with offset → auto-attach chunk
175
+ if (isPartialRead) {
176
+ await handlePartialReadAttach(filePath, offset, limit, output, cache)
177
+ return
178
+ }
179
+
180
+ // MODE 2: Full read → standard substitution
181
+ substituteReadOutput(output, cache)
182
+ }
183
+
184
+ /**
185
+ * Auto-attach chunk for partial read() with offset.
186
+ *
187
+ * When agent does read({ filePath: "src/auth.ts", offset: 150, limit: 50 }),
188
+ * we find which chunk contains lines 150-200 and attach it to workspace.
189
+ */
190
+ async function handlePartialReadAttach(
191
+ filePath: string,
192
+ offset: number,
193
+ limit: number | undefined,
194
+ output: { title: string; output: string; metadata: any },
195
+ cache: WorkspaceCache
196
+ ): Promise<void> {
197
+ try {
198
+ // Find which chunk contains this offset
199
+ const chunk = await findChunkByOffset(filePath, offset, limit)
200
+
201
+ if (!chunk) {
202
+ // Chunk not found (file not indexed or offset out of range)
203
+ // Keep original output
204
+ return
205
+ }
206
+
207
+ // Check if chunk already in workspace
208
+ const existing = cache.get(chunk.chunk_id)
209
+ if (existing) {
210
+ // Already attached → replace output with reference
211
+ output.output = `[Lines ${chunk.start_line}-${chunk.end_line} (chunk ${chunk.chunk_index}) already in workspace — see <workspace_context>]`
212
+ return
213
+ }
214
+
215
+ // Attach chunk to workspace
216
+ cache.attach({
217
+ chunkId: chunk.chunk_id,
218
+ path: filePath,
219
+ content: chunk.content,
220
+ chunkIndex: chunk.chunk_index,
221
+ role: "manual",
222
+ attachedAt: Date.now(),
223
+ attachedBy: `read(offset:${offset})`,
224
+ metadata: {
225
+ language: chunk.language,
226
+ function_name: chunk.function_name,
227
+ class_name: chunk.class_name,
228
+ startLine: chunk.start_line,
229
+ endLine: chunk.end_line,
230
+ },
231
+ })
232
+
233
+ // Replace output with compact message
234
+ const meta: string[] = []
235
+ if (chunk.function_name) meta.push(`fn: ${chunk.function_name}`)
236
+ if (chunk.class_name) meta.push(`class: ${chunk.class_name}`)
237
+ const metaStr = meta.length > 0 ? ` (${meta.join(", ")})` : ""
238
+
239
+ output.output = `✓ Attached chunk ${chunk.chunk_index} to workspace${metaStr}\n\nLines ${chunk.start_line}-${chunk.end_line} — see <workspace_context> for content.\n\nWorkspace: ${cache.size} chunks, ${cache.totalTokens.toLocaleString()} tokens`
240
+
241
+ // Save workspace asynchronously
242
+ cache.save().catch(() => {})
243
+ } catch (error) {
244
+ // Auto-attach failed — keep original output
245
+ // Silent failure (don't break read())
246
+ }
247
+ }
248
+
249
+ /**
250
+ * Substitute read() output if file has chunks in workspace (full reads only).
88
251
  *
89
- * Input: { filePath: "src/auth.ts", offset?: 0, limit?: 100 }
252
+ * Input: { filePath: "src/auth.ts" } (no offset/limit)
90
253
  * Output: "export function login(...)\n..."
91
254
  *
92
- * If file has chunks in workspace AND no offset/limit (full read):
255
+ * If file has chunks in workspace:
93
256
  * Replace with: "[File "src/auth.ts" has N chunks in workspace (chunks: 2, 5, 7) — see <workspace_context>]"
94
- *
95
- * If offset/limit present (partial read):
96
- * Keep original (partial reads are not in workspace injection)
97
257
  */
98
258
  function substituteReadOutput(output: { title: string; output: string; metadata: any }, cache: WorkspaceCache): void {
99
259
  try {
@@ -101,10 +261,6 @@ function substituteReadOutput(output: { title: string; output: string; metadata:
101
261
  const filePath = output.metadata?.filePath || extractFilePathFromTitle(output.title)
102
262
  if (!filePath) return
103
263
 
104
- // Check if this is a partial read (offset/limit present)
105
- const isPartialRead = output.metadata?.offset !== undefined || output.metadata?.limit !== undefined
106
- if (isPartialRead) return
107
-
108
264
  // Don't substitute if file was modified (dirty) — workspace has stale content
109
265
  if (cache.isDirty(filePath)) return
110
266
 
package/index.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import type { Plugin } from "@opencode-ai/plugin"
2
2
 
3
3
  import search from "./tools/search"
4
- import { workspace_list, workspace_attach, workspace_detach, workspace_clear, workspace_restore } from "./tools/workspace"
4
+ import { workspace_list, workspace_forget, workspace_clear, workspace_restore } from "./tools/workspace"
5
5
  import FileIndexerPlugin from "./file-indexer"
6
6
  import { workspaceCache } from "./cache/manager"
7
7
  import { createWorkspaceInjectionHandler } from "./hooks/message-before"
@@ -39,8 +39,7 @@ const UsethisSearchPlugin: Plugin = async ({ directory, client }) => {
39
39
  tool: {
40
40
  search,
41
41
  workspace_list,
42
- workspace_attach,
43
- workspace_detach,
42
+ workspace_forget,
44
43
  workspace_clear,
45
44
  workspace_restore,
46
45
  },
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@comfanion/usethis_search",
3
- "version": "4.2.0-dev.3",
4
- "description": "OpenCode plugin: semantic search with chunk-based workspace injection (v4.2-dev: chunk-level context, granular detach, improved token efficiency)",
3
+ "version": "4.3.0-dev.0",
4
+ "description": "OpenCode plugin: semantic search with auto-attach, line numbers in workspace, simplified API (v4.3: auto-detect modes, read() caching, 99% token reduction, no grep needed)",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
7
7
  "exports": {
@@ -15,7 +15,7 @@
15
15
  "index:clear": "bun run cli.ts clear"
16
16
  },
17
17
  "bin": {
18
- "usethis-search": "./cli.ts"
18
+ "usethis-search": "cli.ts"
19
19
  },
20
20
  "files": [
21
21
  "index.ts",
@@ -25,6 +25,7 @@
25
25
  "tools/search.ts",
26
26
  "tools/codeindex.ts",
27
27
  "tools/workspace.ts",
28
+ "tools/read-interceptor.ts",
28
29
  "cache/manager.ts",
29
30
  "hooks/message-before.ts",
30
31
  "hooks/tool-substitution.ts",
@@ -45,6 +46,7 @@
45
46
  "vectorizer/analyzers/lsp-client.ts",
46
47
  "vectorizer/chunkers/markdown-chunker.ts",
47
48
  "vectorizer/chunkers/code-chunker.ts",
49
+ "vectorizer/chunkers/lsp-chunker.ts",
48
50
  "vectorizer/chunkers/chunker-factory.ts",
49
51
  "vectorizer.yaml",
50
52
  "README.md",
@@ -0,0 +1,149 @@
1
+ import { tool } from "@opencode-ai/plugin"
2
+ import path from "path"
3
+ import fs from "fs/promises"
4
+
5
+ import { CodebaseIndexer } from "../vectorizer/index.ts"
6
+
7
+ // FR-043: Logging for intercepted Read() calls
8
+ const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*"
9
+
10
+ interface ReadLogEntry {
11
+ timestamp: number
12
+ filePath: string
13
+ relPath: string
14
+ chunksFound: number
15
+ relatedContextCount: number
16
+ durationMs: number
17
+ fallback: boolean
18
+ }
19
+
20
+ const LOG_MAX_ENTRIES = 500
21
+
22
+ /**
23
+ * Append a log entry to the Read() interception log file.
24
+ * Non-blocking, non-fatal — errors are silently ignored.
25
+ */
26
+ async function logReadInterception(projectRoot: string, entry: ReadLogEntry): Promise<void> {
27
+ try {
28
+ const logPath = path.join(projectRoot, ".opencode", "vectors", "read-intercept.log.json")
29
+ await fs.mkdir(path.dirname(logPath), { recursive: true })
30
+
31
+ let entries: ReadLogEntry[] = []
32
+ try {
33
+ const raw = await fs.readFile(logPath, "utf-8")
34
+ entries = JSON.parse(raw)
35
+ } catch {
36
+ // file doesn't exist or is invalid — start fresh
37
+ }
38
+
39
+ entries.push(entry)
40
+ // Cap log size to avoid unbounded growth
41
+ if (entries.length > LOG_MAX_ENTRIES) {
42
+ entries = entries.slice(-LOG_MAX_ENTRIES)
43
+ }
44
+
45
+ await fs.writeFile(logPath, JSON.stringify(entries, null, 2), "utf-8")
46
+ } catch {
47
+ // non-fatal — logging must never break Read
48
+ }
49
+ }
50
+
51
+ export default tool({
52
+ description: `Read file with graph-aware context attachment. When available, this tool searches the file in the index and returns content + related context from the graph (imports, links, etc.).
53
+
54
+ Use this instead of the standard Read tool for better context awareness.`,
55
+
56
+ args: {
57
+ filePath: tool.schema.string().describe("Path to the file to read"),
58
+ },
59
+
60
+ async execute(args) {
61
+ const startTime = Date.now()
62
+ const projectRoot = process.cwd()
63
+ const filePath = path.isAbsolute(args.filePath) ? args.filePath : path.join(projectRoot, args.filePath)
64
+
65
+ const relPath = path.relative(projectRoot, filePath)
66
+
67
+ if (DEBUG) {
68
+ console.log(`[read-interceptor] Intercepted Read("${relPath}")`)
69
+ }
70
+
71
+ // Resilient search: if vector index is corrupted or unavailable, fall back gracefully
72
+ let fileChunks: any[] = []
73
+ let allRelated: any[] = []
74
+ let searchFailed = false
75
+
76
+ try {
77
+ const indexer = await new CodebaseIndexer(projectRoot, "code").init()
78
+ try {
79
+ const results = await indexer.search(relPath, 20, false, {})
80
+ fileChunks = results.filter((r: any) => r.file === relPath)
81
+
82
+ allRelated = fileChunks
83
+ .flatMap((c: any) => c.relatedContext || [])
84
+ .filter((r: any, i: number, arr: any[]) => arr.findIndex((x: any) => x.chunk_id === r.chunk_id) === i)
85
+ } catch (searchErr: any) {
86
+ if (DEBUG) {
87
+ console.log(`[read-interceptor] Search failed for "${relPath}": ${searchErr.message}`)
88
+ }
89
+ searchFailed = true
90
+ }
91
+ await indexer.unloadModel()
92
+ } catch (initErr: any) {
93
+ if (DEBUG) {
94
+ console.log(`[read-interceptor] Indexer init failed: ${initErr.message}`)
95
+ }
96
+ searchFailed = true
97
+ }
98
+
99
+ const durationMs = Date.now() - startTime
100
+ const fallback = fileChunks.length === 0
101
+
102
+ // FR-043: Log the interception asynchronously (non-blocking)
103
+ logReadInterception(projectRoot, {
104
+ timestamp: startTime,
105
+ filePath: args.filePath,
106
+ relPath,
107
+ chunksFound: fileChunks.length,
108
+ relatedContextCount: allRelated.length,
109
+ durationMs,
110
+ fallback,
111
+ }).catch(() => {})
112
+
113
+ if (DEBUG) {
114
+ console.log(
115
+ `[read-interceptor] ${relPath}: ${fileChunks.length} chunks, ${allRelated.length} related, ${durationMs}ms${fallback ? " (fallback)" : ""}${searchFailed ? " (search error)" : ""}`
116
+ )
117
+ }
118
+
119
+ if (fallback) {
120
+ const reason = searchFailed
121
+ ? `Search index unavailable (possibly corrupted). Run codeindex({ action: "reindex", index: "code" }) to rebuild.`
122
+ : `File "${relPath}" not indexed. Use original Read tool or run codeindex({ action: "reindex", index: "code" })`
123
+ return reason
124
+ }
125
+
126
+ let output = `## ${relPath}\n\n`
127
+
128
+ output += `### Content\n\n`
129
+ for (const chunk of fileChunks) {
130
+ output += chunk.content + "\n\n"
131
+ }
132
+
133
+ if (allRelated.length > 0) {
134
+ output += `### Related Context\n\n`
135
+ for (const rel of allRelated) {
136
+ const snippet = rel.content.length > 300
137
+ ? rel.content.substring(0, 300) + "..."
138
+ : rel.content
139
+ output += `**${rel.file}** (${rel.relation})\n`
140
+ output += `\`\`\`\n${snippet}\n\`\`\`\n\n`
141
+ }
142
+ }
143
+
144
+ return output
145
+ },
146
+ })
147
+
148
+ // Export for testing
149
+ export { logReadInterception, ReadLogEntry }