npm - @comfanion/usethis_search - Versions diffs - 3.0.0-dev.15 → 3.0.0-dev.17 - Mend

@comfanion/usethis_search 3.0.0-dev.15 → 3.0.0-dev.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +1 -1
package/vectorizer/chunkers/code-chunker.ts +74 -24
package/vectorizer/chunkers/markdown-chunker.ts +69 -7
package/vectorizer/graph-builder.ts +2 -1
package/vectorizer/index.ts +20 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@comfanion/usethis_search",
-  "version": "3.0.0-dev.15",
+  "version": "3.0.0-dev.17",
   "description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
   "type": "module",
   "main": "./index.ts",

package/vectorizer/chunkers/code-chunker.ts CHANGED Viewed

@@ -23,6 +23,8 @@ export interface CodeChunk {
   content: string
   function_name?: string
   class_name?: string
+  start_line?: number
+  end_line?: number
 }
 // ── Block detection ─────────────────────────────────────────────────────────
@@ -172,31 +174,74 @@ function findPythonBlockEnd(lines: string[], startLine: number): number {
   return lines.length - 1
 }
-// ── Fallback: line-based splitting ──────────────────────────────────────────
+  // ── Fallback: line-based splitting ──────────────────────────────────────────
+  function splitByLines(lines: string[], maxChars: number): CodeChunk[] {
+    const chunks: CodeChunk[] = []
+    let current: string[] = []
+    let currentLen = 0
+    let startLine = 0
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i]
+      if (currentLen + line.length + 1 > maxChars && current.length > 0) {
+        chunks.push({ content: current.join("\n"), start_line: startLine, end_line: i - 1 })
+        current = []
+        currentLen = 0
+        startLine = i
+      }
+      current.push(line)
+      currentLen += line.length + 1
+    }
-function splitByLines(lines: string[], maxChars: number): CodeChunk[] {
-  const chunks: CodeChunk[] = []
-  let current: string[] = []
-  let currentLen = 0
-  for (const line of lines) {
-    if (currentLen + line.length + 1 > maxChars && current.length > 0) {
-      chunks.push({ content: current.join("\n") })
-      current = []
-      currentLen = 0
+    if (current.length > 0) {
+      chunks.push({ content: current.join("\n"), start_line: startLine, end_line: lines.length - 1 })
     }
-    current.push(line)
-    currentLen += line.length + 1
-  }
-  if (current.length > 0) {
-    chunks.push({ content: current.join("\n") })
+    return chunks
   }
-  return chunks
-}
+  // ── Split large chunks preserving line numbers ────────────────────────────
+  function splitChunkByLines(chunk: CodeChunk, maxChars: number): CodeChunk[] {
+    const lines = chunk.content.split("\n")
+    const baseLine = chunk.start_line || 0
+    const parts: CodeChunk[] = []
+    let current: string[] = []
+    let currentLen = 0
+    let startLine = baseLine
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i]
+      if (currentLen + line.length + 1 > maxChars && current.length > 0) {
+        parts.push({
+          ...chunk,
+          content: current.join("\n"),
+          start_line: startLine,
+          end_line: baseLine + i - 1,
+        })
+        current = []
+        currentLen = 0
+        startLine = baseLine + i
+      }
+      current.push(line)
+      currentLen += line.length + 1
+    }
+    if (current.length > 0) {
+      parts.push({
+        ...chunk,
+        content: current.join("\n"),
+        start_line: startLine,
+        end_line: baseLine + lines.length - 1,
+      })
+    }
+    return parts
+  }
-// ── Public API ──────────────────────────────────────────────────────────────
+  // ── Public API ──────────────────────────────────────────────────────────────
 /**
  * Chunk source code by functions/classes.
@@ -231,12 +276,13 @@ export function chunkCode(
     if (block.startLine > lastEnd + 1) {
       const gapContent = lines.slice(lastEnd + 1, block.startLine).join("\n").trim()
       if (gapContent.length >= config.min_chunk_size) {
-        chunks.push({ content: gapContent })
+        chunks.push({ content: gapContent, start_line: lastEnd + 1, end_line: block.startLine - 1 })
       } else if (gapContent.length > 0 && chunks.length > 0) {
         // Merge small gap with previous chunk
         chunks[chunks.length - 1].content += "\n\n" + gapContent
+        chunks[chunks.length - 1].end_line = block.startLine - 1
       } else if (gapContent.length > 0) {
-        chunks.push({ content: gapContent })
+        chunks.push({ content: gapContent, start_line: lastEnd + 1, end_line: block.startLine - 1 })
       }
     }
@@ -259,6 +305,8 @@ export function chunkCode(
               chunks.push({
                 content: gap,
                 class_name: block.name,
+                start_line: classLastEnd + 1,
+                end_line: method.startLine - 1,
               })
             }
           }
@@ -267,6 +315,8 @@ export function chunkCode(
             content: lines.slice(method.startLine, method.endLine + 1).join("\n"),
             function_name: method.name,
             class_name: block.name,
+            start_line: method.startLine,
+            end_line: method.endLine,
           })
           classLastEnd = method.endLine
         }
@@ -275,7 +325,7 @@ export function chunkCode(
         if (classLastEnd < block.endLine) {
           const tail = lines.slice(classLastEnd + 1, block.endLine + 1).join("\n").trim()
           if (tail) {
-            chunks.push({ content: tail, class_name: block.name })
+            chunks.push({ content: tail, class_name: block.name, start_line: classLastEnd + 1, end_line: block.endLine })
           }
         }
       } else {
@@ -312,9 +362,9 @@ export function chunkCode(
   const result: CodeChunk[] = []
   for (const chunk of chunks) {
     if (chunk.content.length > config.max_chunk_size) {
-      const parts = splitByLines(chunk.content.split("\n"), config.max_chunk_size)
+      const parts = splitChunkByLines(chunk, config.max_chunk_size)
       for (const p of parts) {
-        result.push({ ...chunk, content: p.content })
+        result.push(p)
       }
     } else {
       result.push(chunk)

package/vectorizer/chunkers/markdown-chunker.ts CHANGED Viewed

@@ -22,14 +22,18 @@ export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
 export interface MarkdownChunk {
   content: string
   heading_context: string   // "H1 > H2 > H3"
+  start_line?: number
+  end_line?: number
 }
 // ── Internal types ──────────────────────────────────────────────────────────
 interface Section {
-  level: number       // 1-6 for headings, 0 for preamble
+  level: number       //1-6 for headings, 0 for preamble
   heading: string
   body: string
+  start_line: number
+  end_line: number
 }
 // ── Parsing ─────────────────────────────────────────────────────────────────
@@ -38,19 +42,23 @@ interface Section {
 function parseSections(content: string): Section[] {
   const lines = content.split("\n")
   const sections: Section[] = []
-  let currentSection: Section = { level: 0, heading: "", body: "" }
+  let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0 }
-  for (const line of lines) {
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i]
     const headingMatch = line.match(/^(#{1,6})\s+(.+)$/)
     if (headingMatch) {
       // Push previous section
       if (currentSection.body.trim() || currentSection.heading) {
+        currentSection.end_line = i - 1
         sections.push(currentSection)
       }
       currentSection = {
         level: headingMatch[1].length,
         heading: headingMatch[2].trim(),
         body: "",
+        start_line: i,
+        end_line: 0,
       }
     } else {
       currentSection.body += line + "\n"
@@ -59,6 +67,7 @@ function parseSections(content: string): Section[] {
   // Push last section
   if (currentSection.body.trim() || currentSection.heading) {
+    currentSection.end_line = lines.length - 1
     sections.push(currentSection)
   }
@@ -97,6 +106,45 @@ function splitLargeText(text: string, maxSize: number): string[] {
   return chunks
 }
+function splitLargeTextWithLines(text: string, maxSize: number, startLine: number): Array<{ content: string; start_line: number; end_line: number }> {
+  if (text.length <= maxSize) {
+    const lines = text.split("\n")
+    return [{ content: text, start_line: startLine, end_line: startLine + lines.length - 1 }]
+  }
+  const chunks: Array<{ content: string; start_line: number; end_line: number }> = []
+  const lines = text.split("\n")
+  let current: string[] = []
+  let currentLen = 0
+  let chunkStartLine = startLine
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i]
+    if (currentLen + line.length + 1 > maxSize && current.length > 0) {
+      chunks.push({
+        content: current.join("\n"),
+        start_line: chunkStartLine,
+        end_line: startLine + i - 1,
+      })
+      current = []
+      currentLen = 0
+      chunkStartLine = startLine + i
+    }
+    current.push(line)
+    currentLen += line.length + 1
+  }
+  if (current.length > 0) {
+    chunks.push({
+      content: current.join("\n"),
+      start_line: chunkStartLine,
+      end_line: startLine + lines.length - 1,
+    })
+  }
+  return chunks
+}
 // ── Public API ──────────────────────────────────────────────────────────────
 /**
@@ -138,7 +186,12 @@ export function chunkMarkdown(
       ? `${"#".repeat(section.level)} ${section.heading}\n${section.body}`
       : section.body
-    rawChunks.push({ content: sectionText.trim(), heading_context: headingContext })
+    rawChunks.push({
+      content: sectionText.trim(),
+      heading_context: headingContext,
+      start_line: section.start_line,
+      end_line: section.end_line,
+    })
   }
   // Merge small sections with previous
@@ -150,7 +203,11 @@ export function chunkMarkdown(
     ) {
       const prev = merged[merged.length - 1]
       prev.content += "\n\n" + chunk.content
-      // Keep the deepest heading context
+      // Merge end_line
+      if (chunk.end_line !== undefined) {
+        prev.end_line = chunk.end_line
+      }
+      // Keep deepest heading context
       if (chunk.heading_context) {
         prev.heading_context = chunk.heading_context
       }
@@ -163,9 +220,14 @@ export function chunkMarkdown(
   const result: MarkdownChunk[] = []
   for (const chunk of merged) {
     if (chunk.content.length > config.max_chunk_size) {
-      const parts = splitLargeText(chunk.content, config.max_chunk_size)
+      const parts = splitLargeTextWithLines(chunk.content, config.max_chunk_size, chunk.start_line || 0)
       for (const part of parts) {
-        result.push({ content: part, heading_context: chunk.heading_context })
+        result.push({
+          content: part.content,
+          heading_context: chunk.heading_context,
+          start_line: part.start_line,
+          end_line: part.end_line,
+        })
       }
     } else {
       result.push(chunk)

package/vectorizer/graph-builder.ts CHANGED Viewed

@@ -41,7 +41,7 @@ export class GraphBuilder {
     content: string,
     chunks: ChunkWithId[],
     fileType: "code" | "docs"
-  ): Promise<void> {
+  ): Promise<number> {
     let relations: Array<RegexRelation | LSPRelation> = []
     if (fileType === "docs") {
@@ -74,6 +74,7 @@ export class GraphBuilder {
     }))
     await this.graphDB.putEdges(triples)
+    return triples.length
   }
   resolveChunkId(filePath: string, line: number): string | null {

package/vectorizer/index.ts CHANGED Viewed

@@ -562,7 +562,23 @@ class CodebaseIndexer {
     // v3: Delete old edges for this file and build new ones
     await this.graphDB.deleteByFile(relPath);
-    await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
+    const graphEdgesBuilt = await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
+    // Log graph creation to indexer.log
+    if (graphEdgesBuilt > 0 || DEBUG) {
+      const timestamp = new Date().toISOString().slice(11, 19);
+      const logMsg = `${timestamp} Graph built: ${relPath} (${chunksWithIds.length} chunks)`;
+      if (DEBUG) console.log(`[vectorizer] ${logMsg}`);
+      // Write to indexer.log in .opencode directory
+      try {
+        const logPath = path.join(this.root, ".opencode", "indexer.log");
+        const fsSync = await import("fs");
+        fsSync.appendFileSync(logPath, `${logMsg}\n`);
+      } catch {
+        // non-fatal — logging is advisory
+      }
+    }
     // FR-054: Store graph build timestamp + file hash as metadata triple
     try {
@@ -590,6 +606,9 @@ class CodebaseIndexer {
         function_name: chunksWithIds[i].function_name || "",
         class_name: chunksWithIds[i].class_name || "",
         tags: (fileMeta.tags || []).join(","),
+        // Line numbers for "from-to" extraction
+        start_line: chunksWithIds[i].start_line,
+        end_line: chunksWithIds[i].end_line,
       });
     }