npm - @comfanion/usethis_search - Versions diffs - 4.3.0-dev.3 → 4.3.1 - Mend

@comfanion/usethis_search 4.3.0-dev.3 → 4.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/hooks/message-before.ts +23 -193
package/package.json +2 -2
package/tools/search.ts +42 -20
package/tools/workspace.ts +44 -15

package/hooks/message-before.ts CHANGED Viewed

@@ -1,21 +1,25 @@
 /**
- * History Pruning Hook (v2 — no injection)
+ * History Pruning Hook (v3 — workspace-only, DCP handles the rest)
  *
  * Uses "experimental.chat.messages.transform" to prune old workspace tool
  * outputs from chat history. Only the LAST workspace state is kept in context.
  *
- * v2: Removed workspace injection entirely.
- *     Each tool (search, workspace_list, etc.) now returns full workspace
- *     state inline. This hook only prunes previous outputs to prevent
- *     context bloat.
+ * v3: Removed read pruning and tool compaction — delegated to DCP plugin
+ *     (@tarquinen/opencode-dcp) which handles deduplication, supersede-writes,
+ *     error purging, and agent-facing discard/extract tools.
+ *
+ *     This hook ONLY handles workspace state pruning — something DCP can't do
+ *     because it doesn't understand that different search queries produce
+ *     workspace state blocks that supersede each other.
  *
  * Pruning strategy:
- *   1. WORKSPACE TOOLS: Find all outputs from search/workspace_* tools.
- *      Keep only the LAST one (it has the latest workspace state).
- *      Replace the rest with compact 1-line summaries.
- *   2. READ TOOLS: Replace old read() outputs with compact summaries.
- *      Keep the last read output (agent may reference it).
- *   3. COMPACT: Remove old tool call parts entirely (keep last N turns).
+ *   Find all outputs from search/workspace_* tools that contain
+ *   <workspace_state> blocks. Keep only the LAST one. Replace the rest
+ *   with compact 1-line summaries.
+ *
+ * DCP companion config (.opencode/dcp.jsonc):
+ *   Our workspace tools are added to DCP's protectedTools so DCP
+ *   doesn't try to prune them (we handle them ourselves).
  */
 import type { SessionState } from "./types.ts"
@@ -56,17 +60,11 @@ const WORKSPACE_TOOLS = new Set([
 /** Minimum output length to consider pruning. Short outputs are kept as-is. */
 const MIN_PRUNE_LENGTH = 500
-/** Keep last N turns intact (don't compact recent tool calls). */
-const KEEP_LAST_N_TURNS = 5
-/** Tools eligible for compaction (removing old call + output parts). */
-const COMPACT_TOOLS = new Set(["search", "read", "Read", "workspace_list", "workspace_forget", "workspace_clear", "workspace_restore"])
 // ── Hook ────────────────────────────────────────────────────────────────────
 /**
  * Create the history pruning handler.
- * No injection — only prunes old tool outputs from chat history.
+ * Only prunes old workspace state outputs — DCP handles everything else.
  */
 export function createWorkspaceInjectionHandler(state: SessionState) {
   return async (_input: {}, output: { messages: Message[] }) => {
@@ -74,8 +72,6 @@ export function createWorkspaceInjectionHandler(state: SessionState) {
     if (state.isSubAgent) return
     pruneWorkspaceToolOutputs(output.messages)
-    pruneReadToolOutputs(output.messages)
-    compactOldToolCalls(output.messages)
   }
 }
@@ -84,13 +80,18 @@ export function createWorkspaceInjectionHandler(state: SessionState) {
 /**
  * Replace old workspace tool outputs with compact summaries.
  *
- * Workspace tools (search, workspace_list, etc.) now return full workspace
+ * Workspace tools (search, workspace_list, etc.) return full workspace
  * state in their output. Only the LAST such output is kept — all previous
  * ones are replaced with a 1-line summary.
  *
  * This ensures only ONE copy of workspace state is in context at any time.
+ *
+ * Note: DCP's deduplication only prunes IDENTICAL tool calls (same params).
+ * Two different search queries wouldn't be deduplicated by DCP, but both
+ * contain workspace state that supersedes each other. That's why we need
+ * this workspace-specific pruning.
  */
-function pruneWorkspaceToolOutputs(messages: Message[]): void {
+export function pruneWorkspaceToolOutputs(messages: Message[]): void {
   const wsParts: { msgIdx: number; partIdx: number; part: MessagePart }[] = []
   for (let i = 0; i < messages.length; i++) {
@@ -139,174 +140,3 @@ function pruneWorkspaceToolOutputs(messages: Message[]): void {
     }
   }
 }
-// ── Read Tool Pruning ───────────────────────────────────────────────────────
-/**
- * Replace old read() tool outputs with compact summaries.
- * Keep the last read output (agent may reference it).
- */
-function pruneReadToolOutputs(messages: Message[]): void {
-  const readParts: { msgIdx: number; partIdx: number; part: MessagePart }[] = []
-  for (let i = 0; i < messages.length; i++) {
-    const msg = messages[i]
-    const parts = Array.isArray(msg.parts) ? msg.parts : []
-    for (let j = 0; j < parts.length; j++) {
-      const part = parts[j]
-      if (
-        part.type === "tool" &&
-        (part.tool === "read" || part.tool === "Read") &&
-        part.state?.status === "completed" &&
-        typeof part.state?.output === "string" &&
-        part.state.output.length > MIN_PRUNE_LENGTH
-      ) {
-        readParts.push({ msgIdx: i, partIdx: j, part })
-      }
-    }
-  }
-  // Keep the last read output — prune the rest
-  if (readParts.length <= 1) return
-  const toPrune = readParts.slice(0, -1)
-  for (const { part } of toPrune) {
-    const output = part.state!.output as string
-    // Skip already-pruned outputs
-    if (output.startsWith("[") || output.startsWith("✓")) continue
-    // Extract file path from input or output
-    const filePath = part.input?.filePath || extractFilePathFromOutput(output)
-    part.state!.output = `[Read "${filePath || "file"}" — content pruned from history]`
-  }
-}
-/**
- * Extract file path from read() output.
- */
-function extractFilePathFromOutput(output: string): string | null {
-  const firstLine = output.split("\n")[0]
-  const pathMatch = firstLine.match(/##?\s*(.+?\.(ts|js|go|py|md|txt|yaml|json|tsx|jsx|rs|java|kt|swift|c|cpp|h|cs|rb|php))/)
-  if (pathMatch) {
-    return pathMatch[1].trim()
-  }
-  return null
-}
-// ── Tool Call Compaction ────────────────────────────────────────────────────
-/**
- * Remove old tool call parts from chat history.
- *
- * Strategy:
- * - Keep last N turns intact
- * - Only compact search/read/workspace tools
- * - Only compact completed calls with pruned outputs
- * - Add compact marker showing how many calls removed
- */
-function compactOldToolCalls(messages: Message[]): void {
-  const toolPairs = findToolCallPairs(messages)
-  if (toolPairs.length === 0) return
-  const totalTurns = messages.length
-  // Filter: only old, completed, compactable tools with pruned outputs
-  const toCompact = toolPairs.filter(pair => {
-    const turnsFromEnd = totalTurns - pair.msgIndex
-    return (
-      turnsFromEnd > KEEP_LAST_N_TURNS &&
-      pair.status === "completed" &&
-      COMPACT_TOOLS.has(pair.tool) &&
-      pair.outputPart &&
-      isPrunedOutput(pair.outputPart.state?.output || "")
-    )
-  })
-  if (toCompact.length === 0) return
-  // Remove tool parts from messages
-  const removedIds = new Set<string>()
-  for (const pair of toCompact) {
-    if (pair.callPart.id) removedIds.add(pair.callPart.id)
-    if (pair.outputPart?.id) removedIds.add(pair.outputPart.id)
-  }
-  for (const msg of messages) {
-    if (!msg.parts || !Array.isArray(msg.parts)) continue
-    msg.parts = msg.parts.filter(part => !part.id || !removedIds.has(part.id))
-  }
-  // Add compact marker to first user message
-  const firstUserMsg = messages.find(m => m?.info?.role === "user")
-  if (firstUserMsg && firstUserMsg.parts) {
-    const marker = {
-      type: "text",
-      text: `<!-- ${toCompact.length} tool calls compacted (search/read/workspace results pruned) -->`,
-      id: "compact-marker-" + Date.now(),
-    }
-    firstUserMsg.parts.unshift(marker)
-  }
-}
-interface ToolCallPair {
-  msgIndex: number
-  callPart: MessagePart
-  outputPart?: MessagePart
-  tool: string
-  status: string
-}
-/**
- * Find all tool call + output pairs in messages.
- */
-function findToolCallPairs(messages: Message[]): ToolCallPair[] {
-  const pairs: ToolCallPair[] = []
-  for (let i = 0; i < messages.length; i++) {
-    const msg = messages[i]
-    if (!msg.parts || !Array.isArray(msg.parts)) continue
-    for (const part of msg.parts) {
-      if (part.type === "tool" && part.tool) {
-        const status = part.state?.status || "unknown"
-        // Find matching output part (usually in same message)
-        let outputPart: MessagePart | undefined
-        for (const p of msg.parts) {
-          if (p.type === "tool" && p.tool === part.tool && p.state?.output && p.id !== part.id) {
-            outputPart = p
-            break
-          }
-        }
-        pairs.push({
-          msgIndex: i,
-          callPart: part,
-          outputPart,
-          tool: part.tool,
-          status,
-        })
-      }
-    }
-  }
-  return pairs
-}
-/**
- * Check if output is pruned (compact format).
- */
-function isPrunedOutput(output: string): boolean {
-  if (!output) return false
-  return output.startsWith("[") || output.startsWith("✓")
-}
-// ── Exports for testing ─────────────────────────────────────────────────────
-export { pruneWorkspaceToolOutputs, pruneReadToolOutputs, compactOldToolCalls }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@comfanion/usethis_search",
-  "version": "4.3.0-dev.3",
-  "description": "OpenCode plugin: semantic search with context-efficient workspace state (v4.3: no injection, each tool returns full state inline, auto-prune history, auto-detect modes, line numbers, LSP memory leak fixed)",
+  "version": "4.3.1",
+  "description": "OpenCode plugin: semantic search with context-efficient workspace state (v4.3.1: improved tool description - chunks contain direct file content, no verification needed)",
   "type": "module",
   "main": "./index.ts",
   "exports": {

package/tools/search.ts CHANGED Viewed

@@ -179,30 +179,52 @@ function parseFilter(filter: string): {
 }
 export default tool({
-  description: `Search codebase and automatically attach relevant context to workspace.
-Accepts any query - semantic search, file path, or chunk ID:
-- "authentication logic" → finds relevant code
-- "docs/architecture.md" → attaches file
-- "src/auth.ts:chunk-5" → attaches specific chunk
-Results are optimized for context - top chunks auto-attached with expanded context
-(related code, imports, class methods). Returns full workspace state inline.
-Previous search outputs are automatically pruned from history.
-IMPORTANT: Workspace has limited token budget. Use workspace_forget() to remove
-irrelevant files or old searches before adding new context.
-Choose index based on what you're looking for:
-- index: "code" → search source code
-- index: "docs" → search documentation
-- searchAll: true → search everywhere
+  description: `Search the codebase semantically. Use this to find relevant code snippets, functions, or files based on meaning, not just text matching.
+Available indexes:
+- "code" (default) - Source code files (*.js, *.ts, *.py, *.go, etc.)
+- "docs" - Documentation files (*.md, *.txt, etc.)
+- "config" - Configuration files (*.yaml, *.json, etc.)
+- searchAll: true - Search across all indexes
+Auto-detects query type:
+- Semantic: "authentication logic" → vector search for relevant code
+- File path: "docs/architecture.md" → attaches entire file to workspace
+- Chunk ID: "src/auth.ts:chunk-5" → attaches specific chunk
+How workspace works:
+- Top results are AUTO-ATTACHED to workspace with expanded context (class methods, imports, related code via graph)
+- Workspace has a TOKEN BUDGET (~50K tokens, ~100 chunks). When full, oldest chunks are evicted
+- Each search call returns full <workspace_state> with all chunk contents inline
+- Only the LATEST search/workspace output is kept in chat history — older ones are auto-pruned
+- Workspace persists across searches — new results ADD to existing workspace
+IMPORTANT: Chunks contain DIRECT file content dumps (raw code/text from files).
+- You DO NOT need to verify chunk content with grep/read tools
+- Chunks are already the actual file content, not summaries or references
+- Trust the chunk content as the source of truth
+- Use Read tool only if you need content OUTSIDE the indexed chunks
+Context management:
+- BEFORE searching a new topic, use workspace_forget() to remove irrelevant old context
+- Use workspace_clear() when switching to a completely different task
+- The workspace is your working memory — keep it focused on the current task
+Filter narrows results by path or language:
+- "internal/domain/" → only files under that path
+- "*.go" → only Go files
+- "internal/**/*.go" → path + language combined
+- "service" → files containing "service" in path
 Examples:
 - search({ query: "authentication logic" })
 - search({ query: "how to deploy", index: "docs" })
-- search({ query: "docs/prd.md" })  // attach file
-- search({ query: "internal/domain/", filter: "*.go" })`,
+- search({ query: "tenant management", filter: "internal/domain/" })
+- search({ query: "event handling", filter: "*.go" })
+- search({ query: "API routes", filter: "internal/**/*.go" })
+- search({ query: "metrics", searchAll: true })
+- search({ query: "docs/prd.md" })
+- search({ query: "src/auth.ts:chunk-5" })`,
   args: {
     query: tool.schema.string().describe("What to search: semantic query, file path, or chunk ID"),

package/tools/workspace.ts CHANGED Viewed

@@ -20,7 +20,15 @@ import { buildWorkspaceOutput } from "./workspace-state.ts"
 // ── workspace.list ──────────────────────────────────────────────────────────
 export const workspace_list = tool({
-  description: `Show full workspace state with all chunk content. Returns file listing and inline content for every attached chunk.`,
+  description: `Show current workspace contents — all attached code chunks with full source code, line numbers, and metadata.
+Use this to:
+- Check what context is currently loaded after compaction or session restore
+- Verify workspace contents before starting implementation
+- See token budget usage (how much space is left for new searches)
+Returns <workspace_state> with every chunk's full content. This is the same state appended to every search() call.
+Only the LATEST workspace tool output is kept in chat — older outputs are auto-pruned.`,
   args: {},
@@ -32,21 +40,26 @@ export const workspace_list = tool({
 // ── workspace.forget ────────────────────────────────────────────────────────
 export const workspace_forget = tool({
-  description: `Remove chunks from workspace context to optimize context size and focus.
+  description: `Remove chunks from workspace to free token budget and keep context focused on the current task.
-IMPORTANT: Regularly clean up workspace by removing irrelevant files or old search results.
-This keeps context focused and prevents token budget overflow.
+WHEN TO USE:
+- Before searching a new topic — remove old irrelevant context first
+- When workspace is near budget limit — free space for new results
+- After finishing a subtask — remove code you no longer need
+- When context has stale chunks from files you've since edited
-Auto-detects what to remove based on input:
-- Chunk ID: "src/auth.ts:chunk-5"
-- File path: "docs/architecture.md" (removes ALL chunks)
-- Search query: "authentication logic" (removes chunks from this search)
-- Age: "5" (removes chunks older than 5 minutes)
+Auto-detects what to remove based on input format:
+- File path: "docs/architecture.md" → removes ALL chunks from that file
+- Chunk ID: "src/auth.ts:chunk-5" → removes one specific chunk
+- Search query: "authentication logic" → removes all chunks attached by that search
+- Age in minutes: "5" → removes all chunks older than 5 minutes
-Examples:
-- workspace_forget({ what: "docs/prd.md" })
-- workspace_forget({ what: "5" })  // older than 5 min
-- workspace_forget({ what: "src/auth.ts:chunk-3" })`,
+Best practices:
+- Remove by file path when done with a file: workspace_forget({ what: "docs/prd.md" })
+- Remove stale context by age: workspace_forget({ what: "10" })
+- Remove results from previous search: workspace_forget({ what: "old query text" })
+Returns updated workspace state after removal.`,
   args: {
     what: tool.schema.string().describe("What to forget: chunk ID, file path, search query, or age in minutes"),
@@ -110,7 +123,15 @@ Examples:
 // ── workspace.clear ─────────────────────────────────────────────────────────
 export const workspace_clear = tool({
-  description: `Remove ALL chunks from workspace context. Use when switching tasks or starting fresh.`,
+  description: `Remove ALL chunks from workspace — complete reset. Frees entire token budget.
+Use when:
+- Switching to a completely different task or topic
+- Workspace is cluttered with irrelevant context from many searches
+- Starting a fresh investigation from scratch
+Prefer workspace_forget() for selective cleanup. Use workspace_clear() only for full reset.
+Returns empty workspace state.`,
   args: {},
@@ -126,7 +147,15 @@ export const workspace_clear = tool({
 // ── workspace.restore ───────────────────────────────────────────────────────
 export const workspace_restore = tool({
-  description: `Restore workspace from a saved session snapshot. Use after compaction or to switch context.`,
+  description: `Restore workspace from a previously saved session snapshot.
+Use when:
+- After compaction — restore the workspace context from before compaction
+- Resuming work on a previous task — switch back to that context
+- After workspace_clear() — if you need the old context back
+Call without sessionId to list available snapshots with their chunk counts and token sizes.
+Call with sessionId to restore a specific snapshot. Replaces current workspace entirely.`,
   args: {
     sessionId: tool.schema.string().optional().describe("Session ID to restore. If not provided, lists available snapshots."),