npm - @comfanion/usethis_search - Versions diffs - 4.2.0-dev.4 → 4.3.0-dev.1 - Mend

@comfanion/usethis_search 4.2.0-dev.4 → 4.3.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/hooks/message-before.ts +229 -9
package/hooks/tool-substitution.ts +167 -11
package/index.ts +2 -3
package/package.json +3 -2
package/tools/read-interceptor.ts +149 -0
package/tools/search.ts +140 -75
package/tools/workspace.ts +52 -77
package/vectorizer/chunkers/markdown-chunker.ts +70 -4
package/vectorizer.yaml +1 -0

package/vectorizer/chunkers/markdown-chunker.ts CHANGED Viewed

@@ -10,6 +10,7 @@ export interface MarkdownChunkConfig {
   max_chunk_size: number   // split sections larger than this (chars)
   split_by_headings: boolean
   preserve_heading_hierarchy: boolean
+  skip_low_priority: boolean  // Skip low-priority sections (SQL, aggregates, etc.)
 }
 export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
@@ -17,6 +18,7 @@ export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
   max_chunk_size: 8000,  // Large chunks for docs (SQL schemas, API specs, etc.)
   split_by_headings: true,
   preserve_heading_hierarchy: true,
+  skip_low_priority: true,  // Skip SQL schemas, aggregates, views by default
 }
 export interface MarkdownChunk {
@@ -24,6 +26,7 @@ export interface MarkdownChunk {
   heading_context: string   // "H1 > H2 > H3"
   start_line?: number
   end_line?: number
+  priority?: "high" | "normal" | "low"  // Chunk priority for ranking
 }
 // ── Internal types ──────────────────────────────────────────────────────────
@@ -34,6 +37,55 @@ interface Section {
   body: string
   start_line: number
   end_line: number
+  priority: "high" | "normal" | "low"
+}
+// ── Priority detection ──────────────────────────────────────────────────────
+/**
+ * Detect if heading indicates low-priority content (SQL schemas, aggregates, etc.)
+ * These sections are often "noise" when searching for business logic.
+ */
+function isLowPriorityHeading(heading: string): boolean {
+  const lower = heading.toLowerCase()
+  // SQL-related sections (schemas, DDL, migrations)
+  if (lower.includes("sql schema") ||
+      lower.includes("database schema") ||
+      lower.includes("continuous aggregate") ||
+      lower.includes("materialized view") ||
+      lower.includes("ddl") ||
+      lower.includes("migration")) {
+    return true
+  }
+  // Generated/auto-generated content
+  if (lower.includes("auto-generated") ||
+      lower.includes("generated schema") ||
+      lower.includes("api reference") && lower.includes("generated")) {
+    return true
+  }
+  // Large reference tables (often boilerplate)
+  if (lower.includes("full reference") ||
+      lower.includes("complete list") ||
+      lower.includes("all endpoints")) {
+    return true
+  }
+  return false
+}
+/** Determine priority level for a section based on heading and context. */
+function getSectionPriority(heading: string, body: string): "high" | "normal" | "low" {
+  if (isLowPriorityHeading(heading)) return "low"
+  // High-priority: short sections with code examples (tutorials, guides)
+  if (body.includes("```") && body.length < 2000) {
+    return "high"
+  }
+  return "normal"
 }
 // ── Parsing ─────────────────────────────────────────────────────────────────
@@ -42,15 +94,16 @@ interface Section {
 function parseSections(content: string): Section[] {
   const lines = content.split("\n")
   const sections: Section[] = []
-  let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0 }
+  let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0, priority: "normal" }
   for (let i = 0; i < lines.length; i++) {
     const line = lines[i]
     const headingMatch = line.match(/^(#{1,6})\s+(.+)$/)
     if (headingMatch) {
-      // Push previous section
+      // Push previous section (with priority calculated)
       if (currentSection.body.trim() || currentSection.heading) {
         currentSection.end_line = i - 1
+        currentSection.priority = getSectionPriority(currentSection.heading, currentSection.body)
         sections.push(currentSection)
       }
       currentSection = {
@@ -59,15 +112,17 @@ function parseSections(content: string): Section[] {
         body: "",
         start_line: i,
         end_line: 0,
+        priority: "normal",  // Will be calculated when section ends
       }
     } else {
       currentSection.body += line + "\n"
     }
   }
-  // Push last section
+  // Push last section (with priority calculated)
   if (currentSection.body.trim() || currentSection.heading) {
     currentSection.end_line = lines.length - 1
+    currentSection.priority = getSectionPriority(currentSection.heading, currentSection.body)
     sections.push(currentSection)
   }
@@ -191,12 +246,18 @@ export function chunkMarkdown(
       heading_context: headingContext,
       start_line: section.start_line,
       end_line: section.end_line,
+      priority: section.priority,
     })
   }
+  // Filter low-priority sections if configured
+  const filteredChunks = config.skip_low_priority
+    ? rawChunks.filter(chunk => chunk.priority !== "low")
+    : rawChunks
   // Merge small sections with previous
   const merged: MarkdownChunk[] = []
-  for (const chunk of rawChunks) {
+  for (const chunk of filteredChunks) {
     if (
       merged.length > 0 &&
       chunk.content.length < config.min_chunk_size
@@ -211,6 +272,10 @@ export function chunkMarkdown(
       if (chunk.heading_context) {
         prev.heading_context = chunk.heading_context
       }
+      // Keep highest priority (high > normal > low)
+      if (chunk.priority === "high" || (chunk.priority === "normal" && prev.priority === "low")) {
+        prev.priority = chunk.priority
+      }
     } else {
       merged.push({ ...chunk })
     }
@@ -227,6 +292,7 @@ export function chunkMarkdown(
           heading_context: chunk.heading_context,
           start_line: part.start_line,
           end_line: part.end_line,
+          priority: chunk.priority,  // Inherit priority from parent chunk
         })
       }
     } else {

package/vectorizer.yaml CHANGED Viewed

@@ -26,6 +26,7 @@ vectorizer:
       min_chunk_size: 1000  # Merge small sections (avoid header-only chunks)
       max_chunk_size: 8000  # Large chunks for docs (SQL schemas, API specs, etc.)
       preserve_heading_hierarchy: true
+      skip_low_priority: true  # Skip SQL schemas, continuous aggregates (default: true)
     code:
       split_by_functions: true
       include_function_signature: true