npm - @comfanion/usethis_search - Versions diffs - 4.4.0 → 4.5.1 - Mend

@comfanion/usethis_search 4.4.0 → 4.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/api.ts +34 -17
package/cache/manager.ts +30 -19
package/cli.ts +8 -5
package/file-indexer.ts +28 -11
package/hooks/message-before.ts +5 -5
package/hooks/tool-substitution.ts +4 -120
package/index.ts +17 -6
package/package.json +3 -2
package/tools/codeindex.ts +192 -184
package/tools/graph.ts +265 -0
package/tools/read-interceptor.ts +7 -3
package/tools/search.ts +268 -190
package/tools/workspace-state.ts +1 -2
package/tools/workspace.ts +76 -108
package/vectorizer/analyzers/lsp-client.ts +52 -6
package/vectorizer/chunkers/chunker-factory.ts +6 -0
package/vectorizer/chunkers/code-chunker.ts +73 -16
package/vectorizer/chunkers/lsp-chunker.ts +313 -191
package/vectorizer/graph-db.ts +6 -4
package/vectorizer/index.ts +329 -134
package/vectorizer/usage-tracker.ts +36 -0
package/vectorizer.yaml +2 -2

package/tools/workspace-state.ts CHANGED Viewed

@@ -16,8 +16,7 @@ import { workspaceCache, type WorkspaceEntry } from "../cache/manager.ts"
  * Build the full workspace state output.
  * Contains all chunks grouped by file with full content and metadata.
  *
- * Called by search(), workspace_list(), workspace_forget(),
- * workspace_clear(), workspace_restore().
+ * Called by search(), list(), forget(), clear(), explore().
  *
  * Returns a <workspace_state> XML block that the agent can reference.
  * The block is self-contained — all chunk content is inline.

package/tools/workspace.ts CHANGED Viewed

@@ -1,13 +1,13 @@
 /**
- * Workspace Management Tools (v2 — context-efficient)
+ * Workspace Management Tools (v3 — short names)
  *
  * Manual control over the workspace cache:
- *   workspace_list    — show full workspace state with chunk content
- *   workspace_forget  — remove chunks, return updated state
- *   workspace_clear   — remove all chunks, return empty state
- *   workspace_restore — restore a saved session snapshot, return state
+ *   list   — show full workspace state with chunk content
+ *   forget — remove chunks, return updated state
+ *   clear  — remove all chunks, return empty state
  *
- * v2: Each tool returns full workspace state inline (via buildWorkspaceOutput).
+ * v3: Short tool names (dropped "workspace_" prefix for brevity).
+ *     Each tool returns full workspace state inline (via buildWorkspaceOutput).
  *     Previous tool outputs are pruned from history by message-before hook.
  *     No injection — workspace lives only in the latest tool output.
  */
@@ -19,7 +19,7 @@ import { buildWorkspaceOutput } from "./workspace-state.ts"
 // ── workspace.list ──────────────────────────────────────────────────────────
-export const workspace_list = tool({
+export const list = tool({
   description: `Show current workspace contents — all attached code chunks with full source code, line numbers, and metadata.
 Use this to:
@@ -39,7 +39,7 @@ Only the LATEST workspace tool output is kept in chat — older outputs are auto
 // ── workspace.forget ────────────────────────────────────────────────────────
-export const workspace_forget = tool({
+export const forget = tool({
   description: `Remove chunks from workspace context to optimize context size and focus.
 IMPORTANT: Regularly clean up workspace by removing irrelevant files or old search results.
@@ -47,85 +47,97 @@ This keeps context focused and prevents token budget overflow.
 WHEN TO CLEAN UP:
 - BEFORE searching a new topic — forget the previous search results first:
-  workspace_forget({ what: "previous search query" }) → then search({ query: "new topic" })
+  forget({ queries: ["previous search query"] }) → then search({ query: "new topic" })
 - AFTER finishing a subtask — forget files you no longer need
-- WHEN budget >60% — evict old chunks: workspace_forget({ what: "5" })
+- WHEN budget >60% — evict old chunks: forget({ queries: ["5"] })
 - AFTER editing files — workspace chunks become stale, forget and re-search
-Auto-detects what to remove based on input:
+Auto-detects what to remove based on each item:
 - Chunk ID: "src/auth.ts:chunk-5"
-- File path: "docs/architecture.md" (removes ALL chunks)
+- File path: "docs/architecture.md" (removes ALL chunks from file)
 - Search query: "authentication logic" (removes chunks from this search)
 - Age: "5" (removes chunks older than 5 minutes)
+Supports multiple items in one call — forget several files/queries at once.
 Examples:
-- workspace_forget({ what: "docs/prd.md" })
-- workspace_forget({ what: "5" })  // older than 5 min
-- workspace_forget({ what: "src/auth.ts:chunk-3" })
-- workspace_forget({ what: "authentication logic" })  // forget previous search`,
+- forget({ queries: ["docs/prd.md"] })
+- forget({ queries: ["5"] })  // older than 5 min
+- forget({ queries: ["src/auth.ts", "src/types/User.ts"] })  // forget two files
+- forget({ queries: ["authentication logic", "old-file.ts", "5"] })  // mix types`,
   args: {
-    what: tool.schema.string().describe("What to forget: chunk ID, file path, search query, or age in minutes"),
+    queries: tool.schema.array(tool.schema.string()).describe("Items to forget: chunk IDs, file paths, search queries, or age in minutes"),
   },
   async execute(args) {
-    let removed = 0
-    let summary = ""
-    // Auto-detect what to remove
-    // 1. Check if it's a chunk ID (contains ":chunk-")
-    if (args.what.includes(":chunk-")) {
-      const entry = workspaceCache.get(args.what)
-      if (!entry) {
-        return `Chunk "${args.what}" not found in workspace.` + buildWorkspaceOutput()
-      }
-      removed = workspaceCache.detach(args.what) ? 1 : 0
-      if (removed === 0) {
-        return `Failed to remove chunk "${args.what}".` + buildWorkspaceOutput()
-      }
-      summary = `Removed chunk "${args.what}" from workspace.`
-    }
-    // 2. Check if it's a number (age in minutes)
-    else if (args.what.match(/^(\d+)$/)) {
-      const minutes = parseInt(args.what, 10)
-      removed = workspaceCache.detachOlderThan(minutes * 60 * 1000)
-      summary = `Removed ${removed} chunk(s) older than ${minutes} minutes.`
+    const items: string[] = args.queries && args.queries.length > 0
+      ? args.queries
+      : []
+    if (items.length === 0) {
+      return `Error: queries is required` + buildWorkspaceOutput()
     }
-    // 3. Check if it's a file path (has extension or common path prefixes)
-    else if (
-      args.what.match(/\.(md|ts|js|go|py|tsx|jsx|rs|java|kt|swift|txt|yaml|json|yml|toml)$/i) ||
-      args.what.match(/^(src|docs|internal|pkg|lib|app|pages|components|api)\//i) ||
-      args.what.includes("/")
-    ) {
-      const fileChunks = workspaceCache.getChunksByPath(args.what)
-      if (fileChunks.length === 0) {
-        return `File "${args.what}" not found in workspace.` + buildWorkspaceOutput()
+    const summaries: string[] = []
+    let totalRemoved = 0
+    for (const item of items) {
+      let removed = 0
+      // 1. Chunk ID (contains ":chunk-" or starts with "chunk:")
+      if (item.includes(":chunk-") || item.startsWith("chunk:")) {
+        removed = workspaceCache.detach(item) ? 1 : 0
+        if (removed > 0) {
+          summaries.push(`"${item}" — removed`)
+        } else {
+          summaries.push(`"${item}" — not found`)
+        }
       }
-      removed = workspaceCache.detachByPath(args.what)
-      if (removed === 0) {
-        return `Failed to remove chunks from "${args.what}".` + buildWorkspaceOutput()
+      // 2. Number (age in minutes)
+      else if (item.match(/^(\d+)$/)) {
+        const minutes = parseInt(item, 10)
+        removed = workspaceCache.detachOlderThan(minutes * 60 * 1000)
+        summaries.push(`older than ${minutes}min — ${removed} removed`)
       }
-      summary = `Removed ${removed} chunk(s) from "${args.what}".`
-    }
-    // 4. Otherwise, treat as search query
-    else {
-      removed = workspaceCache.detachByQuery(args.what)
-      if (removed === 0) {
-        return `No chunks found attached by query "${args.what}".` + buildWorkspaceOutput()
+      // 3. File path
+      else if (
+        item.match(/\.(md|ts|js|go|py|tsx|jsx|rs|java|kt|swift|txt|yaml|json|yml|toml)$/i) ||
+        item.match(/^(src|docs|internal|pkg|lib|app|pages|components|api)\//i) ||
+        item.includes("/")
+      ) {
+        removed = workspaceCache.detachByPath(item)
+        if (removed > 0) {
+          summaries.push(`"${item}" — ${removed} chunk(s) removed`)
+        } else {
+          summaries.push(`"${item}" — not in workspace`)
+        }
+      }
+      // 4. Search query
+      else {
+        removed = workspaceCache.detachByQuery(item)
+        if (removed > 0) {
+          summaries.push(`search "${item}" — ${removed} chunk(s) removed`)
+        } else {
+          summaries.push(`search "${item}" — no matches`)
+        }
       }
-      summary = `Removed ${removed} chunk(s) from search "${args.what}".`
+      totalRemoved += removed
     }
-    return summary + buildWorkspaceOutput()
+    let output = `Removed ${totalRemoved} chunk(s):\n`
+    for (const s of summaries) {
+      output += `- ${s}\n`
+    }
+    output += buildWorkspaceOutput()
+    return output
   },
 })
 // ── workspace.clear ─────────────────────────────────────────────────────────
-export const workspace_clear = tool({
+export const clear = tool({
   description: `Remove ALL chunks from workspace context. Use when switching tasks or starting fresh.
 Use when:
@@ -133,7 +145,7 @@ Use when:
 - Workspace is cluttered with irrelevant context from many searches
 - Starting a fresh investigation from scratch
-Prefer workspace_forget() for selective cleanup. Use workspace_clear() only for full reset.
+Prefer forget() for selective cleanup. Use clear() only for full reset.
 Returns empty workspace state.`,
   args: {},
@@ -147,48 +159,4 @@ Returns empty workspace state.`,
   },
 })
-// ── workspace.restore ───────────────────────────────────────────────────────
-export const workspace_restore = tool({
-  description: `Restore workspace from a previously saved session snapshot.
-Use when:
-- After compaction — restore the workspace context from before compaction
-- Resuming work on a previous task — switch back to that context
-- After workspace_clear() — if you need the old context back
-Call without sessionId to list available snapshots with their chunk counts and token sizes.
-Call with sessionId to restore a specific snapshot. Replaces current workspace entirely.`,
-  args: {
-    sessionId: tool.schema.string().optional().describe("Session ID to restore. If not provided, lists available snapshots."),
-  },
-  async execute(args) {
-    if (!args.sessionId) {
-      // List available snapshots (no workspace state needed — just metadata)
-      const snapshots = await workspaceCache.listSnapshots()
-      if (snapshots.length === 0) {
-        return `No saved workspace snapshots found.`
-      }
-      let output = `## Saved Workspace Snapshots\n\n`
-      for (const snap of snapshots) {
-        const date = new Date(snap.savedAt).toLocaleString()
-        output += `- **${snap.id}** — ${snap.chunkCount} chunks, ${snap.totalTokens.toLocaleString()} tokens — ${date}\n`
-      }
-      output += `\nUse \`workspace_restore("session-id")\` to restore.`
-      return output
-    }
-    // Restore specific snapshot
-    const restored = await workspaceCache.restore(args.sessionId)
-    if (!restored) {
-      return `Snapshot "${args.sessionId}" not found or empty.`
-    }
-    return `Restored workspace from "${args.sessionId}".` + buildWorkspaceOutput()
-  },
-})

package/vectorizer/analyzers/lsp-client.ts CHANGED Viewed

@@ -129,7 +129,7 @@ function encodeMessage(body: object): Buffer {
 export class LSPClient {
   private proc: ChildProcess | null = null
   private requestId = 0
-  private pending = new Map<number, { resolve: (v: any) => void; reject: (e: Error) => void }>()
+  private pending = new Map<number, { resolve: (v: any) => void; reject: (e: Error) => void; timer: ReturnType<typeof setTimeout> }>()
   private buffer = Buffer.alloc(0)
   private initialized = false
   private serverConfig: ServerConfig | null = null
@@ -176,14 +176,24 @@ export class LSPClient {
       // Silently consume stderr — language servers are chatty
     })
     this.proc.on("error", (err) => {
-      // Reject all pending
-      for (const p of this.pending.values()) p.reject(err)
+      // Clear all timers + reject all pending
+      for (const p of this.pending.values()) {
+        clearTimeout(p.timer)
+        p.reject(err)
+      }
       this.pending.clear()
     })
     this.proc.on("exit", () => {
-      for (const p of this.pending.values()) p.reject(new Error("LSP server exited"))
+      for (const p of this.pending.values()) {
+        clearTimeout(p.timer)
+        p.reject(new Error("LSP server exited"))
+      }
       this.pending.clear()
       this.initialized = false
+      // Release buffer memory on exit
+      this.buffer = Buffer.alloc(0)
+      this._pendingChunks = []
+      this._pendingLen = 0
     })
     // LSP initialize handshake
@@ -205,16 +215,28 @@ export class LSPClient {
     this.initialized = true
   }
-  /** Shut down gracefully. */
+  /** Shut down gracefully — clears all timers, pending requests, and buffers. */
   async stop(): Promise<void> {
     if (!this.proc || !this.initialized) return
     try {
       await this.sendRequest("shutdown", null)
       this.sendNotification("exit", null)
     } catch { /* best effort */ }
+    // Clear all pending request timers to prevent leaks
+    for (const p of this.pending.values()) {
+      clearTimeout(p.timer)
+    }
+    this.pending.clear()
     this.proc.kill()
     this.proc = null
     this.initialized = false
+    // Release buffer memory
+    this.buffer = Buffer.alloc(0)
+    this._pendingChunks = []
+    this._pendingLen = 0
   }
   // ---- LSP helpers --------------------------------------------------------
@@ -293,6 +315,7 @@ export class LSPClient {
       }, this.timeoutMs)
       this.pending.set(id, {
+        timer,
         resolve: (v: any) => { clearTimeout(timer); resolve(v) },
         reject: (e: Error) => { clearTimeout(timer); reject(e) },
       })
@@ -308,11 +331,33 @@ export class LSPClient {
   }
   private onData(chunk: Buffer): void {
-    this.buffer = Buffer.concat([this.buffer, chunk])
+    // Accumulate incoming chunks in a list — avoids Buffer.concat on every data event
+    this._pendingChunks.push(chunk)
+    this._pendingLen += chunk.length
     this.processBuffer()
   }
+  /** Pending incoming chunks not yet merged into main buffer */
+  private _pendingChunks: Buffer[] = []
+  private _pendingLen = 0
+  /** Merge pending chunks into main buffer only when we need to parse */
+  private compactBuffer(): void {
+    if (this._pendingChunks.length === 0) return
+    if (this.buffer.length === 0) {
+      this.buffer = this._pendingChunks.length === 1
+        ? this._pendingChunks[0]
+        : Buffer.concat(this._pendingChunks, this._pendingLen)
+    } else {
+      this.buffer = Buffer.concat([this.buffer, ...this._pendingChunks], this.buffer.length + this._pendingLen)
+    }
+    this._pendingChunks = []
+    this._pendingLen = 0
+  }
   private processBuffer(): void {
+    this.compactBuffer()
     while (true) {
       // Look for Content-Length header
       const headerEnd = this.buffer.indexOf("\r\n\r\n")
@@ -349,6 +394,7 @@ export class LSPClient {
     if (msg.id != null && this.pending.has(msg.id)) {
       const p = this.pending.get(msg.id)!
       this.pending.delete(msg.id)
+      clearTimeout(p.timer)
       if (msg.error) {
         p.reject(new Error(`LSP error ${msg.error.code}: ${msg.error.message}`))
       } else {

package/vectorizer/chunkers/chunker-factory.ts CHANGED Viewed

@@ -36,6 +36,8 @@ export interface UnifiedChunk {
   heading_context?: string
   function_name?: string
   class_name?: string
+  start_line?: number
+  end_line?: number
 }
 // ── Fixed chunker (legacy) ──────────────────────────────────────────────────
@@ -103,6 +105,8 @@ export async function chunkContent(
             content: c.content,
             function_name: c.function_name,
             class_name: c.class_name,
+            start_line: c.start_line,
+            end_line: c.end_line,
           }))
         }
       } catch (error) {
@@ -119,6 +123,8 @@ export async function chunkContent(
       content: c.content,
       function_name: c.function_name,
       class_name: c.class_name,
+      start_line: c.start_line,
+      end_line: c.end_line,
     }))
   }

package/vectorizer/chunkers/code-chunker.ts CHANGED Viewed

@@ -272,21 +272,38 @@ export function chunkCode(
     // If there is class-level block, skip individual method-level duplicate
     if (block.type === "method") continue
-    // Gap before this block
+    // Gap before this block — check if it's JSDoc/comments that belong to the block
+    let blockStartLine = block.startLine
     if (block.startLine > lastEnd + 1) {
-      const gapContent = lines.slice(lastEnd + 1, block.startLine).join("\n").trim()
-      if (gapContent.length >= config.min_chunk_size) {
+      const gapLines = lines.slice(lastEnd + 1, block.startLine)
+      const gapContent = gapLines.join("\n").trim()
+      // Check if gap has real code (not just comments/whitespace/braces)
+      const hasCode = gapLines.some(l => {
+        const t = l.trim()
+        return t.length > 0
+          && !t.startsWith("//") && !t.startsWith("/*") && !t.startsWith("*") && !t.startsWith("*/")
+          && !t.startsWith("#") && !t.startsWith("<!--")
+          && !/^[{}()\[\];,]+$/.test(t)
+      })
+      if (hasCode && gapContent.length >= config.min_chunk_size) {
+        // Gap has real code — keep as separate chunk
         chunks.push({ content: gapContent, start_line: lastEnd + 1, end_line: block.startLine - 1 })
-      } else if (gapContent.length > 0 && chunks.length > 0) {
-        // Merge small gap with previous chunk
+      } else if (hasCode && gapContent.length > 0 && chunks.length > 0) {
+        // Small gap with code — merge with previous chunk
         chunks[chunks.length - 1].content += "\n\n" + gapContent
         chunks[chunks.length - 1].end_line = block.startLine - 1
-      } else if (gapContent.length > 0) {
+      } else if (hasCode && gapContent.length > 0) {
+        // First chunk, small gap with code
         chunks.push({ content: gapContent, start_line: lastEnd + 1, end_line: block.startLine - 1 })
+      } else if (gapContent.length > 0) {
+        // Gap is only comments/JSDoc — merge with the block below
+        blockStartLine = lastEnd + 1
       }
     }
-    const blockContent = lines.slice(block.startLine, block.endLine + 1).join("\n")
+    const blockContent = lines.slice(blockStartLine, block.endLine + 1).join("\n")
     if (blockContent.length > config.max_chunk_size && block.type === "class") {
       // Split class into methods
@@ -295,27 +312,43 @@ export function chunkCode(
       )
       if (methods.length > 0) {
-        let classLastEnd = block.startLine
+        let classLastEnd = blockStartLine
         for (const method of methods) {
-          // Class preamble / gap before method
+          // Gap before method — check if it's JSDoc/comments that belong to the method
+          let methodStartLine = method.startLine
           if (method.startLine > classLastEnd + 1) {
-            const gap = lines.slice(classLastEnd + 1, method.startLine).join("\n").trim()
-            if (gap) {
+            const gapLines = lines.slice(classLastEnd + 1, method.startLine)
+            const gapContent = gapLines.join("\n").trim()
+            // Check if gap is ONLY comments/whitespace — if so, merge with method
+            const hasCode = gapLines.some(l => {
+              const t = l.trim()
+              return t.length > 0
+                && !t.startsWith("//") && !t.startsWith("/*") && !t.startsWith("*") && !t.startsWith("*/")
+                && !t.startsWith("#") && !t.startsWith("<!--")
+                && !/^[{}()\[\];,]+$/.test(t)
+            })
+            if (hasCode && gapContent.length >= config.min_chunk_size) {
+              // Gap has real code (not just comments) — keep as separate chunk
               chunks.push({
-                content: gap,
+                content: gapContent,
                 class_name: block.name,
                 start_line: classLastEnd + 1,
                 end_line: method.startLine - 1,
               })
+            } else {
+              // Gap is JSDoc/comments — merge with method by extending start
+              methodStartLine = classLastEnd + 1
             }
           }
           chunks.push({
-            content: lines.slice(method.startLine, method.endLine + 1).join("\n"),
+            content: lines.slice(methodStartLine, method.endLine + 1).join("\n"),
             function_name: method.name,
             class_name: block.name,
-            start_line: method.startLine,
+            start_line: methodStartLine,
             end_line: method.endLine,
           })
           classLastEnd = method.endLine
@@ -344,6 +377,8 @@ export function chunkCode(
         content: blockContent,
         function_name: block.type === "function" ? block.name : undefined,
         class_name: block.type === "class" ? block.name : block.className,
+        start_line: blockStartLine,
+        end_line: block.endLine,
       })
     }
@@ -354,7 +389,7 @@ export function chunkCode(
   if (lastEnd < lines.length - 1) {
     const trailing = lines.slice(lastEnd + 1).join("\n").trim()
     if (trailing.length > 0) {
-      chunks.push({ content: trailing })
+      chunks.push({ content: trailing, start_line: lastEnd + 1, end_line: lines.length - 1 })
     }
   }
@@ -371,5 +406,27 @@ export function chunkCode(
     }
   }
-  return result.filter((c) => c.content.trim().length > 0)
+  // Filter out empty chunks and trivial ones (single-line comments, separators)
+  // Keep chunks with function/class names regardless of size (they're meaningful)
+  return result.filter((c) => {
+    const trimmed = c.content.trim()
+    if (trimmed.length === 0) return false
+    // Keep any chunk with a function or class name — it's a real code block
+    if (c.function_name || c.class_name) return true
+    // Filter out tiny "gap" chunks (comment separators, blank lines, single imports)
+    // These are noise that pollute search results
+    if (trimmed.length < 50) return false
+    // Filter out chunks that are ONLY comments, braces, and whitespace (no real code)
+    const meaningfulLines = trimmed.split("\n").filter(l => {
+      const t = l.trim()
+      if (t.length === 0) return false
+      // Skip comment lines
+      if (t.startsWith("//") || t.startsWith("/*") || t.startsWith("*") || t.startsWith("#") || t.startsWith("<!--")) return false
+      // Skip lines that are only braces/punctuation (closing }, ], ), etc.)
+      if (/^[{}()\[\];,]+$/.test(t)) return false
+      return true
+    })
+    if (meaningfulLines.length === 0 && trimmed.length < 300) return false
+    return true
+  })
 }