@comfanion/usethis_search 4.2.0-dev.4 → 4.3.0-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hooks/message-before.ts +229 -9
- package/hooks/tool-substitution.ts +167 -11
- package/index.ts +2 -3
- package/package.json +3 -2
- package/tools/read-interceptor.ts +149 -0
- package/tools/search.ts +140 -75
- package/tools/workspace.ts +52 -77
- package/vectorizer/chunkers/markdown-chunker.ts +70 -4
- package/vectorizer.yaml +1 -0
|
@@ -10,6 +10,7 @@ export interface MarkdownChunkConfig {
|
|
|
10
10
|
max_chunk_size: number // split sections larger than this (chars)
|
|
11
11
|
split_by_headings: boolean
|
|
12
12
|
preserve_heading_hierarchy: boolean
|
|
13
|
+
skip_low_priority: boolean // Skip low-priority sections (SQL, aggregates, etc.)
|
|
13
14
|
}
|
|
14
15
|
|
|
15
16
|
export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
|
|
@@ -17,6 +18,7 @@ export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
|
|
|
17
18
|
max_chunk_size: 8000, // Large chunks for docs (SQL schemas, API specs, etc.)
|
|
18
19
|
split_by_headings: true,
|
|
19
20
|
preserve_heading_hierarchy: true,
|
|
21
|
+
skip_low_priority: true, // Skip SQL schemas, aggregates, views by default
|
|
20
22
|
}
|
|
21
23
|
|
|
22
24
|
export interface MarkdownChunk {
|
|
@@ -24,6 +26,7 @@ export interface MarkdownChunk {
|
|
|
24
26
|
heading_context: string // "H1 > H2 > H3"
|
|
25
27
|
start_line?: number
|
|
26
28
|
end_line?: number
|
|
29
|
+
priority?: "high" | "normal" | "low" // Chunk priority for ranking
|
|
27
30
|
}
|
|
28
31
|
|
|
29
32
|
// ── Internal types ──────────────────────────────────────────────────────────
|
|
@@ -34,6 +37,55 @@ interface Section {
|
|
|
34
37
|
body: string
|
|
35
38
|
start_line: number
|
|
36
39
|
end_line: number
|
|
40
|
+
priority: "high" | "normal" | "low"
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ── Priority detection ──────────────────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Detect if heading indicates low-priority content (SQL schemas, aggregates, etc.)
|
|
47
|
+
* These sections are often "noise" when searching for business logic.
|
|
48
|
+
*/
|
|
49
|
+
function isLowPriorityHeading(heading: string): boolean {
|
|
50
|
+
const lower = heading.toLowerCase()
|
|
51
|
+
|
|
52
|
+
// SQL-related sections (schemas, DDL, migrations)
|
|
53
|
+
if (lower.includes("sql schema") ||
|
|
54
|
+
lower.includes("database schema") ||
|
|
55
|
+
lower.includes("continuous aggregate") ||
|
|
56
|
+
lower.includes("materialized view") ||
|
|
57
|
+
lower.includes("ddl") ||
|
|
58
|
+
lower.includes("migration")) {
|
|
59
|
+
return true
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Generated/auto-generated content
|
|
63
|
+
if (lower.includes("auto-generated") ||
|
|
64
|
+
lower.includes("generated schema") ||
|
|
65
|
+
lower.includes("api reference") && lower.includes("generated")) {
|
|
66
|
+
return true
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Large reference tables (often boilerplate)
|
|
70
|
+
if (lower.includes("full reference") ||
|
|
71
|
+
lower.includes("complete list") ||
|
|
72
|
+
lower.includes("all endpoints")) {
|
|
73
|
+
return true
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return false
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Determine priority level for a section based on heading and context. */
|
|
80
|
+
function getSectionPriority(heading: string, body: string): "high" | "normal" | "low" {
|
|
81
|
+
if (isLowPriorityHeading(heading)) return "low"
|
|
82
|
+
|
|
83
|
+
// High-priority: short sections with code examples (tutorials, guides)
|
|
84
|
+
if (body.includes("```") && body.length < 2000) {
|
|
85
|
+
return "high"
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return "normal"
|
|
37
89
|
}
|
|
38
90
|
|
|
39
91
|
// ── Parsing ─────────────────────────────────────────────────────────────────
|
|
@@ -42,15 +94,16 @@ interface Section {
|
|
|
42
94
|
function parseSections(content: string): Section[] {
|
|
43
95
|
const lines = content.split("\n")
|
|
44
96
|
const sections: Section[] = []
|
|
45
|
-
let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0 }
|
|
97
|
+
let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0, priority: "normal" }
|
|
46
98
|
|
|
47
99
|
for (let i = 0; i < lines.length; i++) {
|
|
48
100
|
const line = lines[i]
|
|
49
101
|
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/)
|
|
50
102
|
if (headingMatch) {
|
|
51
|
-
// Push previous section
|
|
103
|
+
// Push previous section (with priority calculated)
|
|
52
104
|
if (currentSection.body.trim() || currentSection.heading) {
|
|
53
105
|
currentSection.end_line = i - 1
|
|
106
|
+
currentSection.priority = getSectionPriority(currentSection.heading, currentSection.body)
|
|
54
107
|
sections.push(currentSection)
|
|
55
108
|
}
|
|
56
109
|
currentSection = {
|
|
@@ -59,15 +112,17 @@ function parseSections(content: string): Section[] {
|
|
|
59
112
|
body: "",
|
|
60
113
|
start_line: i,
|
|
61
114
|
end_line: 0,
|
|
115
|
+
priority: "normal", // Will be calculated when section ends
|
|
62
116
|
}
|
|
63
117
|
} else {
|
|
64
118
|
currentSection.body += line + "\n"
|
|
65
119
|
}
|
|
66
120
|
}
|
|
67
121
|
|
|
68
|
-
// Push last section
|
|
122
|
+
// Push last section (with priority calculated)
|
|
69
123
|
if (currentSection.body.trim() || currentSection.heading) {
|
|
70
124
|
currentSection.end_line = lines.length - 1
|
|
125
|
+
currentSection.priority = getSectionPriority(currentSection.heading, currentSection.body)
|
|
71
126
|
sections.push(currentSection)
|
|
72
127
|
}
|
|
73
128
|
|
|
@@ -191,12 +246,18 @@ export function chunkMarkdown(
|
|
|
191
246
|
heading_context: headingContext,
|
|
192
247
|
start_line: section.start_line,
|
|
193
248
|
end_line: section.end_line,
|
|
249
|
+
priority: section.priority,
|
|
194
250
|
})
|
|
195
251
|
}
|
|
196
252
|
|
|
253
|
+
// Filter low-priority sections if configured
|
|
254
|
+
const filteredChunks = config.skip_low_priority
|
|
255
|
+
? rawChunks.filter(chunk => chunk.priority !== "low")
|
|
256
|
+
: rawChunks
|
|
257
|
+
|
|
197
258
|
// Merge small sections with previous
|
|
198
259
|
const merged: MarkdownChunk[] = []
|
|
199
|
-
for (const chunk of
|
|
260
|
+
for (const chunk of filteredChunks) {
|
|
200
261
|
if (
|
|
201
262
|
merged.length > 0 &&
|
|
202
263
|
chunk.content.length < config.min_chunk_size
|
|
@@ -211,6 +272,10 @@ export function chunkMarkdown(
|
|
|
211
272
|
if (chunk.heading_context) {
|
|
212
273
|
prev.heading_context = chunk.heading_context
|
|
213
274
|
}
|
|
275
|
+
// Keep highest priority (high > normal > low)
|
|
276
|
+
if (chunk.priority === "high" || (chunk.priority === "normal" && prev.priority === "low")) {
|
|
277
|
+
prev.priority = chunk.priority
|
|
278
|
+
}
|
|
214
279
|
} else {
|
|
215
280
|
merged.push({ ...chunk })
|
|
216
281
|
}
|
|
@@ -227,6 +292,7 @@ export function chunkMarkdown(
|
|
|
227
292
|
heading_context: chunk.heading_context,
|
|
228
293
|
start_line: part.start_line,
|
|
229
294
|
end_line: part.end_line,
|
|
295
|
+
priority: chunk.priority, // Inherit priority from parent chunk
|
|
230
296
|
})
|
|
231
297
|
}
|
|
232
298
|
} else {
|
package/vectorizer.yaml
CHANGED
|
@@ -26,6 +26,7 @@ vectorizer:
|
|
|
26
26
|
min_chunk_size: 1000 # Merge small sections (avoid header-only chunks)
|
|
27
27
|
max_chunk_size: 8000 # Large chunks for docs (SQL schemas, API specs, etc.)
|
|
28
28
|
preserve_heading_hierarchy: true
|
|
29
|
+
skip_low_priority: true # Skip SQL schemas, continuous aggregates (default: true)
|
|
29
30
|
code:
|
|
30
31
|
split_by_functions: true
|
|
31
32
|
include_function_signature: true
|