@comfanion/usethis_search 3.0.1 → 4.1.0-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cache/manager.ts +751 -0
- package/hooks/message-before.ts +261 -0
- package/hooks/types.ts +23 -0
- package/index.ts +63 -1
- package/package.json +6 -2
- package/tools/search.ts +154 -63
- package/tools/workspace.ts +210 -0
- package/vectorizer/index.ts +47 -1
- package/vectorizer.yaml +11 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workspace Context Injection Hook
|
|
3
|
+
*
|
|
4
|
+
* Uses "experimental.chat.messages.transform" to inject workspace files
|
|
5
|
+
* into the conversation context. The AI sees attached files as part of
|
|
6
|
+
* the message stream — no read() needed.
|
|
7
|
+
*
|
|
8
|
+
* Architecture:
|
|
9
|
+
* search("auth") → workspaceCache.attach(files)
|
|
10
|
+
* [this hook] → inject cached files into messages
|
|
11
|
+
* AI sees: full file content in context
|
|
12
|
+
* Chat history: search outputs auto-pruned (files already in workspace)
|
|
13
|
+
*
|
|
14
|
+
* Two responsibilities:
|
|
15
|
+
* 1. INJECT: synthetic <workspace_context> message before last user message
|
|
16
|
+
* 2. PRUNE: replace old search tool outputs with compact summaries
|
|
17
|
+
* (the full content is already in workspace injection — no need to keep
|
|
18
|
+
* the big search output in chat history)
|
|
19
|
+
*
|
|
20
|
+
* Injection strategy:
|
|
21
|
+
* - Injects a synthetic user message with <workspace_context> BEFORE
|
|
22
|
+
* the last user message (so AI sees files as "already known" context)
|
|
23
|
+
* - Uses cache_control: ephemeral for Anthropic prompt caching (90% savings)
|
|
24
|
+
* - Groups files: search-main first, then search-graph, then manual
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import type { SessionState } from "./types.ts"
|
|
28
|
+
import { workspaceCache } from "../cache/manager.ts"
|
|
29
|
+
|
|
30
|
+
// ── Types matching OpenCode plugin message format ───────────────────────────
|
|
31
|
+
|
|
32
|
+
interface MessagePart {
|
|
33
|
+
type: string
|
|
34
|
+
content?: string
|
|
35
|
+
text?: string
|
|
36
|
+
[key: string]: any
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
interface Message {
|
|
40
|
+
info: {
|
|
41
|
+
role: "user" | "assistant" | "tool"
|
|
42
|
+
[key: string]: any
|
|
43
|
+
}
|
|
44
|
+
parts: MessagePart[]
|
|
45
|
+
[key: string]: any
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ── Hook ────────────────────────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Create the messages transform handler that injects workspace context.
|
|
52
|
+
*/
|
|
53
|
+
export function createWorkspaceInjectionHandler(state: SessionState) {
|
|
54
|
+
return async (_input: {}, output: { messages: Message[] }) => {
|
|
55
|
+
// Don't inject or prune for sub-agents (title generation, etc.)
|
|
56
|
+
if (state.isSubAgent) return
|
|
57
|
+
|
|
58
|
+
// ── Prune: replace old search tool outputs with compact summaries ────
|
|
59
|
+
// Files are already in workspace injection — no need for big search
|
|
60
|
+
// output in chat history. This runs even when workspace is empty
|
|
61
|
+
// (handles case where workspace was cleared but old search outputs remain).
|
|
62
|
+
const wsConfig = workspaceCache.getConfig()
|
|
63
|
+
if (wsConfig.autoPruneSearch !== false) {
|
|
64
|
+
pruneSearchToolOutputs(output.messages)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
let entries = workspaceCache.getAll()
|
|
68
|
+
|
|
69
|
+
// Nothing in workspace — skip injection (but pruning already happened)
|
|
70
|
+
if (entries.length === 0) return
|
|
71
|
+
|
|
72
|
+
// ── Freshen: re-read changed files from disk ──────────────────────────
|
|
73
|
+
const { updated, removed } = await workspaceCache.freshen()
|
|
74
|
+
if (updated > 0 || removed > 0) {
|
|
75
|
+
// Re-fetch entries after freshen (some may be removed)
|
|
76
|
+
entries = workspaceCache.getAll()
|
|
77
|
+
if (entries.length === 0) return
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ── Build workspace context block ─────────────────────────────────────
|
|
81
|
+
const totalTokens = workspaceCache.totalTokens
|
|
82
|
+
const fileCount = workspaceCache.size
|
|
83
|
+
|
|
84
|
+
let workspace = `<workspace_context files="${fileCount}" tokens="${totalTokens}">\n`
|
|
85
|
+
|
|
86
|
+
// Group by role for clear structure
|
|
87
|
+
const mainFiles = entries.filter(e => e.role === "search-main")
|
|
88
|
+
const graphFiles = entries.filter(e => e.role === "search-graph")
|
|
89
|
+
const manualFiles = entries.filter(e => e.role === "manual")
|
|
90
|
+
|
|
91
|
+
// Main search results
|
|
92
|
+
if (mainFiles.length > 0) {
|
|
93
|
+
for (const entry of mainFiles) {
|
|
94
|
+
workspace += formatFileEntry(entry)
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Graph relations (imports, extends, used_by)
|
|
99
|
+
if (graphFiles.length > 0) {
|
|
100
|
+
workspace += `\n<!-- Graph relations -->\n`
|
|
101
|
+
for (const entry of graphFiles) {
|
|
102
|
+
workspace += formatFileEntry(entry)
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Manually attached files
|
|
107
|
+
if (manualFiles.length > 0) {
|
|
108
|
+
workspace += `\n<!-- Manually attached -->\n`
|
|
109
|
+
for (const entry of manualFiles) {
|
|
110
|
+
workspace += formatFileEntry(entry)
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
workspace += `</workspace_context>`
|
|
115
|
+
|
|
116
|
+
// ── Inject into messages ──────────────────────────────────────────────
|
|
117
|
+
// Find the last real user message and inject workspace BEFORE it
|
|
118
|
+
// This way AI sees files as "already available context"
|
|
119
|
+
|
|
120
|
+
const lastUserIdx = findLastUserMessageIndex(output.messages)
|
|
121
|
+
if (lastUserIdx === -1) return
|
|
122
|
+
|
|
123
|
+
// Create synthetic workspace message (inserted before last user message)
|
|
124
|
+
const workspaceMessage: Message = {
|
|
125
|
+
info: {
|
|
126
|
+
role: "user",
|
|
127
|
+
// Mark as synthetic so DCP doesn't prune it
|
|
128
|
+
_synthetic: true,
|
|
129
|
+
_workspace_injection: true,
|
|
130
|
+
},
|
|
131
|
+
parts: [
|
|
132
|
+
{
|
|
133
|
+
type: "text",
|
|
134
|
+
content: workspace,
|
|
135
|
+
// Anthropic prompt caching — content that doesn't change
|
|
136
|
+
// between turns gets cached at 90% discount
|
|
137
|
+
cache_control: { type: "ephemeral" },
|
|
138
|
+
},
|
|
139
|
+
],
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Insert before last user message
|
|
143
|
+
output.messages.splice(lastUserIdx, 0, workspaceMessage)
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ── Helpers ─────────────────────────────────────────────────────────────────
|
|
148
|
+
|
|
149
|
+
function formatFileEntry(entry: ReturnType<typeof workspaceCache.getAll>[0]): string {
|
|
150
|
+
let block = `\n## ${entry.path}\n`
|
|
151
|
+
|
|
152
|
+
// Metadata line
|
|
153
|
+
const meta: string[] = []
|
|
154
|
+
if (entry.score !== undefined) meta.push(`score: ${entry.score.toFixed(3)}`)
|
|
155
|
+
if (entry.metadata?.language) meta.push(entry.metadata.language)
|
|
156
|
+
if (entry.metadata?.function_name) meta.push(`fn: ${entry.metadata.function_name}`)
|
|
157
|
+
if (entry.metadata?.class_name) meta.push(`class: ${entry.metadata.class_name}`)
|
|
158
|
+
if (entry.metadata?.relation) {
|
|
159
|
+
const mainBase = entry.metadata.mainFile?.split("/").pop() || "?"
|
|
160
|
+
meta.push(`${entry.metadata.relation} from ${mainBase}`)
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (meta.length > 0) {
|
|
164
|
+
block += `<!-- ${meta.join(" | ")} -->\n`
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// File content
|
|
168
|
+
const lang = entry.metadata?.language || ""
|
|
169
|
+
block += `\`\`\`${lang}\n`
|
|
170
|
+
block += entry.content
|
|
171
|
+
if (!entry.content.endsWith("\n")) block += "\n"
|
|
172
|
+
block += `\`\`\`\n`
|
|
173
|
+
|
|
174
|
+
return block
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function findLastUserMessageIndex(messages: Message[]): number {
|
|
178
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
179
|
+
if (messages[i]?.info?.role === "user") {
|
|
180
|
+
// Skip synthetic messages
|
|
181
|
+
if ((messages[i].info as any)._synthetic) continue
|
|
182
|
+
return i
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return -1
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// ── Search output pruning ────────────────────────────────────────────────────
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Minimum output length to consider pruning.
|
|
192
|
+
* Short outputs (errors, "no results") are kept as-is.
|
|
193
|
+
*/
|
|
194
|
+
const MIN_PRUNE_LENGTH = 500
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Marker prefix that search tool outputs start with.
|
|
198
|
+
* Used to identify search results in chat history.
|
|
199
|
+
*/
|
|
200
|
+
const SEARCH_OUTPUT_MARKER = '## Search: "'
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Replace search tool outputs in chat history with compact summaries.
|
|
204
|
+
*
|
|
205
|
+
* Why: search() returns a big markdown block with file listings, scores, etc.
|
|
206
|
+
* After workspace injection, the full file content is already in context.
|
|
207
|
+
* Keeping the search output wastes tokens — replace it with a 1-line summary.
|
|
208
|
+
*
|
|
209
|
+
* Only prunes completed search calls with output longer than MIN_PRUNE_LENGTH.
|
|
210
|
+
* The last search output is kept (the agent may still be referencing it).
|
|
211
|
+
*/
|
|
212
|
+
export function pruneSearchToolOutputs(messages: Message[]): void {
|
|
213
|
+
// Find all search tool parts (completed, with long output)
|
|
214
|
+
const searchParts: { msgIdx: number; partIdx: number; part: MessagePart }[] = []
|
|
215
|
+
|
|
216
|
+
for (let i = 0; i < messages.length; i++) {
|
|
217
|
+
const msg = messages[i]
|
|
218
|
+
const parts = Array.isArray(msg.parts) ? msg.parts : []
|
|
219
|
+
|
|
220
|
+
for (let j = 0; j < parts.length; j++) {
|
|
221
|
+
const part = parts[j]
|
|
222
|
+
if (
|
|
223
|
+
part.type === "tool" &&
|
|
224
|
+
part.tool === "search" &&
|
|
225
|
+
part.state?.status === "completed" &&
|
|
226
|
+
typeof part.state?.output === "string" &&
|
|
227
|
+
part.state.output.length > MIN_PRUNE_LENGTH &&
|
|
228
|
+
part.state.output.startsWith(SEARCH_OUTPUT_MARKER)
|
|
229
|
+
) {
|
|
230
|
+
searchParts.push({ msgIdx: i, partIdx: j, part })
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Keep the last search output (agent may reference it) — prune the rest
|
|
236
|
+
if (searchParts.length <= 1) return
|
|
237
|
+
|
|
238
|
+
const toPrune = searchParts.slice(0, -1)
|
|
239
|
+
|
|
240
|
+
for (const { part } of toPrune) {
|
|
241
|
+
const output = part.state.output as string
|
|
242
|
+
|
|
243
|
+
// Extract query from output: ## Search: "query" (...)
|
|
244
|
+
const queryMatch = output.match(/^## Search: "([^"]+)"/)
|
|
245
|
+
const query = queryMatch?.[1] || "?"
|
|
246
|
+
|
|
247
|
+
// Extract file count from output: *N files (M chunks)...*
|
|
248
|
+
const filesMatch = output.match(/\*(\d+) files? \((\d+) chunks?\)/)
|
|
249
|
+
const fileCount = filesMatch?.[1] || "?"
|
|
250
|
+
const chunkCount = filesMatch?.[2] || "?"
|
|
251
|
+
|
|
252
|
+
// Extract attached count: ### Attached to workspace (N files)
|
|
253
|
+
const attachedMatch = output.match(/### Attached to workspace \((\d+) files?\)/)
|
|
254
|
+
const attachedCount = attachedMatch?.[1] || "0"
|
|
255
|
+
|
|
256
|
+
// Replace with compact summary
|
|
257
|
+
part.state.output =
|
|
258
|
+
`[Search "${query}" — ${fileCount} files (${chunkCount} chunks), ` +
|
|
259
|
+
`${attachedCount} attached to workspace. Full content available via workspace context.]`
|
|
260
|
+
}
|
|
261
|
+
}
|
package/hooks/types.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared types for hooks.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors the minimal SessionState needed by workspace hooks.
|
|
5
|
+
* We avoid importing from DCP directly to keep usethis_search independent.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export interface SessionState {
|
|
9
|
+
/** Current session ID (from "chat.message" hook) */
|
|
10
|
+
sessionId: string | null
|
|
11
|
+
/** Is this a sub-agent (title gen, summarizer, etc.)? */
|
|
12
|
+
isSubAgent: boolean
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Create initial session state.
|
|
17
|
+
*/
|
|
18
|
+
export function createSessionState(): SessionState {
|
|
19
|
+
return {
|
|
20
|
+
sessionId: null,
|
|
21
|
+
isSubAgent: false,
|
|
22
|
+
}
|
|
23
|
+
}
|
package/index.ts
CHANGED
|
@@ -1,10 +1,31 @@
|
|
|
1
1
|
import type { Plugin } from "@opencode-ai/plugin"
|
|
2
2
|
|
|
3
3
|
import search from "./tools/search"
|
|
4
|
+
import { workspace_list, workspace_attach, workspace_detach, workspace_clear, workspace_restore } from "./tools/workspace"
|
|
4
5
|
import FileIndexerPlugin from "./file-indexer"
|
|
6
|
+
import { workspaceCache } from "./cache/manager"
|
|
7
|
+
import { createWorkspaceInjectionHandler } from "./hooks/message-before"
|
|
8
|
+
import { createToolSubstitutionHandler } from "./hooks/tool-substitution"
|
|
9
|
+
import { createSessionState } from "./hooks/types"
|
|
10
|
+
import { getWorkspaceConfig } from "./vectorizer/index.ts"
|
|
11
|
+
|
|
12
|
+
const INTERNAL_AGENT_SIGNATURES = [
|
|
13
|
+
"You are a title generator",
|
|
14
|
+
"You are a helpful AI assistant tasked with summarizing conversations",
|
|
15
|
+
"Summarize what was done in this conversation",
|
|
16
|
+
]
|
|
5
17
|
|
|
6
18
|
const UsethisSearchPlugin: Plugin = async ({ directory, client }) => {
|
|
7
|
-
//
|
|
19
|
+
// ── Initialize workspace cache with project root + config from yaml ─────
|
|
20
|
+
// sessionId will be set on first "chat.message" event
|
|
21
|
+
const wsConfig = getWorkspaceConfig()
|
|
22
|
+
workspaceCache.updateConfig(wsConfig)
|
|
23
|
+
await workspaceCache.init(directory)
|
|
24
|
+
|
|
25
|
+
// ── Session state (tracks sessionId, sub-agent detection) ───────────────
|
|
26
|
+
const state = createSessionState()
|
|
27
|
+
|
|
28
|
+
// ── Start file indexer (background indexing + event handling) ────────────
|
|
8
29
|
let fileIndexerEvent: ((args: any) => Promise<void>) | null = null
|
|
9
30
|
try {
|
|
10
31
|
const hooks = await FileIndexerPlugin({ directory, client } as any)
|
|
@@ -14,10 +35,51 @@ const UsethisSearchPlugin: Plugin = async ({ directory, client }) => {
|
|
|
14
35
|
}
|
|
15
36
|
|
|
16
37
|
return {
|
|
38
|
+
// ── Tools ───────────────────────────────────────────────────────────
|
|
17
39
|
tool: {
|
|
18
40
|
search,
|
|
41
|
+
workspace_list,
|
|
42
|
+
workspace_attach,
|
|
43
|
+
workspace_detach,
|
|
44
|
+
workspace_clear,
|
|
45
|
+
workspace_restore,
|
|
46
|
+
},
|
|
47
|
+
|
|
48
|
+
// ── Hooks ───────────────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
// Inject workspace files into message context (before LLM sees them)
|
|
51
|
+
"experimental.chat.messages.transform": createWorkspaceInjectionHandler(state),
|
|
52
|
+
|
|
53
|
+
// Substitute tool outputs when files are in workspace
|
|
54
|
+
"tool.execute.after": createToolSubstitutionHandler(state),
|
|
55
|
+
|
|
56
|
+
// Detect sub-agents (title gen, summarizer) via system prompt
|
|
57
|
+
"experimental.chat.system.transform": async (_input: unknown, output: { system: string[] }) => {
|
|
58
|
+
const systemText = output.system.join("\n")
|
|
59
|
+
if (INTERNAL_AGENT_SIGNATURES.some(sig => systemText.includes(sig))) {
|
|
60
|
+
state.isSubAgent = true
|
|
61
|
+
} else {
|
|
62
|
+
state.isSubAgent = false
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
|
|
66
|
+
// Track session ID + init workspace per-session persistence
|
|
67
|
+
"chat.message": async (input: {
|
|
68
|
+
sessionID: string
|
|
69
|
+
agent?: string
|
|
70
|
+
model?: { providerID: string; modelID: string }
|
|
71
|
+
messageID?: string
|
|
72
|
+
variant?: string
|
|
73
|
+
}) => {
|
|
74
|
+
if (input.sessionID && input.sessionID !== state.sessionId) {
|
|
75
|
+
state.sessionId = input.sessionID
|
|
76
|
+
|
|
77
|
+
// Re-init workspace with actual session ID (first time only)
|
|
78
|
+
await workspaceCache.init(directory, input.sessionID)
|
|
79
|
+
}
|
|
19
80
|
},
|
|
20
81
|
|
|
82
|
+
// ── Events ──────────────────────────────────────────────────────────
|
|
21
83
|
event: async (args: any) => {
|
|
22
84
|
if (fileIndexerEvent) {
|
|
23
85
|
try {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "
|
|
4
|
-
"description": "OpenCode plugin: semantic search with
|
|
3
|
+
"version": "4.1.0-dev.1",
|
|
4
|
+
"description": "OpenCode plugin: semantic search with workspace injection + tool output substitution (v4.1-dev: read() substitution, dirty file tracking)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
7
7
|
"exports": {
|
|
@@ -24,6 +24,10 @@
|
|
|
24
24
|
"file-indexer.ts",
|
|
25
25
|
"tools/search.ts",
|
|
26
26
|
"tools/codeindex.ts",
|
|
27
|
+
"tools/workspace.ts",
|
|
28
|
+
"cache/manager.ts",
|
|
29
|
+
"hooks/message-before.ts",
|
|
30
|
+
"hooks/types.ts",
|
|
27
31
|
"vectorizer/index.ts",
|
|
28
32
|
"vectorizer/content-cleaner.ts",
|
|
29
33
|
"vectorizer/metadata-extractor.ts",
|
package/tools/search.ts
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Semantic Code Search Tool (
|
|
2
|
+
* Semantic Code Search Tool (v4 — workspace injection)
|
|
3
3
|
*
|
|
4
4
|
* Uses local embeddings + LanceDB vector store via bundled vectorizer.
|
|
5
|
-
*
|
|
5
|
+
* v4: Top results + graph relations attached to workspace with full content.
|
|
6
|
+
* Rest returned as summary only.
|
|
7
|
+
* AI sees full files via message.before injection — no read() needed.
|
|
8
|
+
*
|
|
6
9
|
* Index data is stored in `.opencode/vectors/<index>/`.
|
|
7
10
|
*/
|
|
8
11
|
|
|
@@ -11,6 +14,7 @@ import path from "path"
|
|
|
11
14
|
import fs from "fs/promises"
|
|
12
15
|
|
|
13
16
|
import { CodebaseIndexer, getSearchConfig, getIndexer, releaseIndexer } from "../vectorizer/index.ts"
|
|
17
|
+
import { workspaceCache } from "../cache/manager.ts"
|
|
14
18
|
|
|
15
19
|
// ── Extension → language mapping (for filter parsing) ─────────────────────
|
|
16
20
|
const EXT_TO_LANG: Record<string, string> = {
|
|
@@ -88,7 +92,7 @@ function parseFilter(filter: string): {
|
|
|
88
92
|
}
|
|
89
93
|
|
|
90
94
|
export default tool({
|
|
91
|
-
description: `Search the codebase semantically.
|
|
95
|
+
description: `Search the codebase semantically. Top results are attached to workspace with full content (visible via context injection). Rest returned as summary.
|
|
92
96
|
|
|
93
97
|
Available indexes:
|
|
94
98
|
- "code" (default) - Source code files (*.js, *.ts, *.py, *.go, etc.)
|
|
@@ -123,6 +127,9 @@ Examples:
|
|
|
123
127
|
const minScore = cfg.min_score ?? 0.35
|
|
124
128
|
const includeArchived = cfg.include_archived ?? false
|
|
125
129
|
|
|
130
|
+
// Workspace config
|
|
131
|
+
const wsConfig = workspaceCache.getConfig()
|
|
132
|
+
|
|
126
133
|
// Parse filter into path/language constraints
|
|
127
134
|
const filterParsed = args.filter ? parseFilter(args.filter) : {}
|
|
128
135
|
|
|
@@ -209,8 +216,6 @@ Examples:
|
|
|
209
216
|
const needle = filterParsed.pathContains.toLowerCase()
|
|
210
217
|
allResults = allResults.filter(r => r.file && r.file.toLowerCase().includes(needle))
|
|
211
218
|
}
|
|
212
|
-
// Language filter is already passed to searchOptions above, but double-check
|
|
213
|
-
// in case vectorizer didn't filter (e.g. docs index has no language field)
|
|
214
219
|
if (filterParsed.language) {
|
|
215
220
|
allResults = allResults.filter(r => !r.language || r.language === filterParsed.language || r.language === "unknown")
|
|
216
221
|
}
|
|
@@ -265,84 +270,170 @@ Examples:
|
|
|
265
270
|
return `No results found in ${scope}${filterNote} for: "${args.query}" (min score: ${minScore})\n\nTry:\n- Different keywords or phrasing\n- Remove or broaden the filter\n- search({ query: "...", searchAll: true })`
|
|
266
271
|
}
|
|
267
272
|
|
|
268
|
-
//
|
|
273
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
274
|
+
// WORKSPACE ATTACH: Top N main files + graph relations (FULL CONTENT)
|
|
275
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
276
|
+
|
|
277
|
+
const topGroups = sortedGroups.slice(0, wsConfig.attachTopN)
|
|
278
|
+
const restGroups = sortedGroups.slice(wsConfig.attachTopN)
|
|
279
|
+
|
|
280
|
+
const attachedMain: string[] = []
|
|
281
|
+
const attachedGraph: string[] = []
|
|
282
|
+
const alreadyAttached = new Set<string>()
|
|
283
|
+
|
|
284
|
+
for (const { best: r } of topGroups) {
|
|
285
|
+
// Skip if score too low
|
|
286
|
+
if ((r._finalScore ?? 0) < wsConfig.minScoreMain) continue
|
|
287
|
+
|
|
288
|
+
// Read full file and attach
|
|
289
|
+
try {
|
|
290
|
+
const fullPath = path.join(projectRoot, r.file)
|
|
291
|
+
const content = await fs.readFile(fullPath, "utf-8")
|
|
292
|
+
|
|
293
|
+
workspaceCache.attach({
|
|
294
|
+
path: r.file,
|
|
295
|
+
content,
|
|
296
|
+
role: "search-main",
|
|
297
|
+
attachedAt: Date.now(),
|
|
298
|
+
attachedBy: args.query,
|
|
299
|
+
score: r._finalScore,
|
|
300
|
+
metadata: {
|
|
301
|
+
language: r.language,
|
|
302
|
+
function_name: r.function_name,
|
|
303
|
+
class_name: r.class_name,
|
|
304
|
+
heading_context: r.heading_context,
|
|
305
|
+
},
|
|
306
|
+
})
|
|
307
|
+
|
|
308
|
+
attachedMain.push(r.file)
|
|
309
|
+
alreadyAttached.add(r.file)
|
|
310
|
+
} catch {
|
|
311
|
+
// File read failed — skip
|
|
312
|
+
continue
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// Attach graph relations (imports, extends, used_by)
|
|
316
|
+
if (r.relatedContext && r.relatedContext.length > 0) {
|
|
317
|
+
const topRelated = r.relatedContext
|
|
318
|
+
.filter((rel: any) => rel.score >= wsConfig.minScoreRelated)
|
|
319
|
+
.sort((a: any, b: any) => b.score - a.score)
|
|
320
|
+
.slice(0, wsConfig.attachRelatedPerFile)
|
|
321
|
+
|
|
322
|
+
for (const rel of topRelated) {
|
|
323
|
+
if (alreadyAttached.has(rel.file)) continue
|
|
324
|
+
|
|
325
|
+
try {
|
|
326
|
+
const relFullPath = path.join(projectRoot, rel.file)
|
|
327
|
+
const relContent = await fs.readFile(relFullPath, "utf-8")
|
|
328
|
+
|
|
329
|
+
workspaceCache.attach({
|
|
330
|
+
path: rel.file,
|
|
331
|
+
content: relContent,
|
|
332
|
+
role: "search-graph",
|
|
333
|
+
attachedAt: Date.now(),
|
|
334
|
+
attachedBy: `${args.query} (${rel.relation} from ${r.file})`,
|
|
335
|
+
score: rel.score,
|
|
336
|
+
metadata: {
|
|
337
|
+
language: rel.language,
|
|
338
|
+
relation: rel.relation,
|
|
339
|
+
mainFile: r.file,
|
|
340
|
+
},
|
|
341
|
+
})
|
|
342
|
+
|
|
343
|
+
attachedGraph.push(rel.file)
|
|
344
|
+
alreadyAttached.add(rel.file)
|
|
345
|
+
} catch {
|
|
346
|
+
// Related file read failed — skip
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// ── Flush workspace to disk immediately (don't rely on debounce) ─────
|
|
353
|
+
if (attachedMain.length > 0 || attachedGraph.length > 0) {
|
|
354
|
+
workspaceCache.save().catch(() => {})
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
358
|
+
// BUILD OUTPUT: Attached (summary) + Rest (summary only)
|
|
359
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
360
|
+
|
|
269
361
|
const topScore = sortedGroups[0].best._finalScore ?? 0
|
|
270
362
|
const hasBM25Only = allResults.some((r: any) => r._bm25Only)
|
|
271
363
|
const scope = args.searchAll ? "all indexes" : `index "${indexName}"`
|
|
272
364
|
const filterLabel = args.filter ? ` filter:"${args.filter}"` : ""
|
|
273
|
-
let output = `## Search
|
|
365
|
+
let output = `## Search: "${args.query}" (${scope}${filterLabel})\n\n`
|
|
274
366
|
|
|
275
367
|
if (hasBM25Only) {
|
|
276
|
-
output += `> **BM25-only mode**
|
|
368
|
+
output += `> **BM25-only mode** -- vector embeddings not yet available. Quality will improve after embedding completes.\n\n`
|
|
277
369
|
}
|
|
278
370
|
|
|
279
371
|
if (topScore < 0.45) {
|
|
280
|
-
output += `> **Low confidence
|
|
372
|
+
output += `> **Low confidence.** Best score: ${topScore.toFixed(3)}. Try more specific keywords.\n\n`
|
|
281
373
|
}
|
|
282
374
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
const
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
?
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
if (r._keywordBonus > 0.005) breakdownParts.push(`kw: +${r._keywordBonus.toFixed(2)}`)
|
|
303
|
-
const breakdown = breakdownParts.join(", ")
|
|
304
|
-
|
|
305
|
-
// Matched keywords
|
|
306
|
-
const kwDisplay = r._matchedKeywords && r._matchedKeywords.length > 0
|
|
307
|
-
? ` | matched: "${r._matchedKeywords.join('", "')}"`
|
|
308
|
-
: ""
|
|
309
|
-
|
|
310
|
-
output += `### ${i + 1}. ${r.file}${indexLabel}${chunkNote}\n`
|
|
311
|
-
output += `**Score:** ${score} (${breakdown}${kwDisplay})${metaLine}\n\n`
|
|
312
|
-
output += "```\n"
|
|
313
|
-
const content = r.content.length > 500 ? r.content.substring(0, 500) + "\n... (truncated)" : r.content
|
|
314
|
-
output += content
|
|
315
|
-
output += "\n```\n"
|
|
316
|
-
|
|
317
|
-
// Second-best chunk hint
|
|
318
|
-
if (chunks.length > 1) {
|
|
319
|
-
const second = chunks.find((c: any) => c !== r)
|
|
320
|
-
if (second) {
|
|
321
|
-
const secMeta: string[] = []
|
|
322
|
-
if (second.function_name) secMeta.push(`fn: ${second.function_name}`)
|
|
323
|
-
if (second.heading_context) secMeta.push(`"${second.heading_context}"`)
|
|
324
|
-
const secLabel = secMeta.length > 0 ? ` ${secMeta.join(", ")}` : ""
|
|
325
|
-
output += `\n*Also:${secLabel}*\n`
|
|
326
|
-
}
|
|
375
|
+
// ── Attached files (summary — full content in workspace injection) ─────
|
|
376
|
+
if (attachedMain.length > 0) {
|
|
377
|
+
const totalAttached = attachedMain.length + attachedGraph.length
|
|
378
|
+
output += `### Attached to workspace (${totalAttached} files)\n\n`
|
|
379
|
+
|
|
380
|
+
for (let i = 0; i < attachedMain.length; i++) {
|
|
381
|
+
const group = topGroups.find(g => g.best.file === attachedMain[i])
|
|
382
|
+
if (!group) continue
|
|
383
|
+
const r = group.best
|
|
384
|
+
const score = (r._finalScore ?? 0).toFixed(3)
|
|
385
|
+
const chunkNote = group.chunks.length > 1 ? ` (${group.chunks.length} sections)` : ""
|
|
386
|
+
|
|
387
|
+
const metaParts: string[] = []
|
|
388
|
+
if (r.language && r.language !== "unknown") metaParts.push(r.language)
|
|
389
|
+
if (r.function_name) metaParts.push(`fn: ${r.function_name}`)
|
|
390
|
+
if (r.class_name) metaParts.push(`class: ${r.class_name}`)
|
|
391
|
+
const metaLine = metaParts.length > 0 ? ` — ${metaParts.join(", ")}` : ""
|
|
392
|
+
|
|
393
|
+
output += `${i + 1}. **${r.file}** score: ${score}${chunkNote}${metaLine}\n`
|
|
327
394
|
}
|
|
328
395
|
|
|
329
|
-
if (
|
|
330
|
-
output +=
|
|
331
|
-
for (const
|
|
332
|
-
const
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
output +=
|
|
396
|
+
if (attachedGraph.length > 0) {
|
|
397
|
+
output += `\n**Graph relations:**\n`
|
|
398
|
+
for (const graphFile of attachedGraph) {
|
|
399
|
+
const entry = workspaceCache.get(graphFile)
|
|
400
|
+
const relation = entry?.metadata?.relation || "related"
|
|
401
|
+
const mainFile = entry?.metadata?.mainFile
|
|
402
|
+
const mainBasename = mainFile ? path.basename(mainFile) : "?"
|
|
403
|
+
output += `- ${graphFile} (${relation} from ${mainBasename})\n`
|
|
337
404
|
}
|
|
338
405
|
}
|
|
406
|
+
output += `\n`
|
|
407
|
+
}
|
|
339
408
|
|
|
340
|
-
|
|
409
|
+
// ── Rest files (summary only — not attached) ──────────────────────────
|
|
410
|
+
if (restGroups.length > 0) {
|
|
411
|
+
output += `### Additional results (summary only)\n\n`
|
|
412
|
+
for (let i = 0; i < restGroups.length; i++) {
|
|
413
|
+
const { best: r, chunks } = restGroups[i]
|
|
414
|
+
const score = (r._finalScore ?? 0).toFixed(3)
|
|
415
|
+
const chunkNote = chunks.length > 1 ? ` (${chunks.length} sections)` : ""
|
|
416
|
+
const indexLabel = args.searchAll ? ` [${r._index}]` : ""
|
|
417
|
+
|
|
418
|
+
const metaParts: string[] = []
|
|
419
|
+
if (r.language && r.language !== "unknown") metaParts.push(r.language)
|
|
420
|
+
if (r.function_name) metaParts.push(`fn: ${r.function_name}`)
|
|
421
|
+
if (r.class_name) metaParts.push(`class: ${r.class_name}`)
|
|
422
|
+
const metaLine = metaParts.length > 0 ? ` — ${metaParts.join(", ")}` : ""
|
|
423
|
+
|
|
424
|
+
output += `${attachedMain.length + i + 1}. ${r.file}${indexLabel} score: ${score}${chunkNote}${metaLine}\n`
|
|
425
|
+
}
|
|
426
|
+
output += `\nUse \`workspace.attach("path")\` to attach additional files.\n`
|
|
341
427
|
}
|
|
342
428
|
|
|
429
|
+
// ── Footer ────────────────────────────────────────────────────────────
|
|
343
430
|
const totalChunks = allResults.length
|
|
344
431
|
const uniqueFiles = sortedGroups.length
|
|
345
|
-
output +=
|
|
432
|
+
output += `\n---\n`
|
|
433
|
+
output += `*${uniqueFiles} files (${totalChunks} chunks) | `
|
|
434
|
+
output += `Workspace: ${workspaceCache.size} files, ${workspaceCache.totalTokens.toLocaleString()} tokens*\n`
|
|
435
|
+
output += `*Attached files are in workspace context — reference them directly without read().*`
|
|
436
|
+
|
|
346
437
|
return output
|
|
347
438
|
} catch (error: any) {
|
|
348
439
|
return `Search failed: ${error.message || String(error)}`
|