@comfanion/usethis_search 3.0.0-dev.15 → 3.0.0-dev.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "3.0.0-dev.
|
|
3
|
+
"version": "3.0.0-dev.17",
|
|
4
4
|
"description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
@@ -23,6 +23,8 @@ export interface CodeChunk {
|
|
|
23
23
|
content: string
|
|
24
24
|
function_name?: string
|
|
25
25
|
class_name?: string
|
|
26
|
+
start_line?: number
|
|
27
|
+
end_line?: number
|
|
26
28
|
}
|
|
27
29
|
|
|
28
30
|
// ── Block detection ─────────────────────────────────────────────────────────
|
|
@@ -172,31 +174,74 @@ function findPythonBlockEnd(lines: string[], startLine: number): number {
|
|
|
172
174
|
return lines.length - 1
|
|
173
175
|
}
|
|
174
176
|
|
|
175
|
-
// ── Fallback: line-based splitting ──────────────────────────────────────────
|
|
177
|
+
// ── Fallback: line-based splitting ──────────────────────────────────────────
|
|
178
|
+
|
|
179
|
+
function splitByLines(lines: string[], maxChars: number): CodeChunk[] {
|
|
180
|
+
const chunks: CodeChunk[] = []
|
|
181
|
+
let current: string[] = []
|
|
182
|
+
let currentLen = 0
|
|
183
|
+
let startLine = 0
|
|
184
|
+
|
|
185
|
+
for (let i = 0; i < lines.length; i++) {
|
|
186
|
+
const line = lines[i]
|
|
187
|
+
if (currentLen + line.length + 1 > maxChars && current.length > 0) {
|
|
188
|
+
chunks.push({ content: current.join("\n"), start_line: startLine, end_line: i - 1 })
|
|
189
|
+
current = []
|
|
190
|
+
currentLen = 0
|
|
191
|
+
startLine = i
|
|
192
|
+
}
|
|
193
|
+
current.push(line)
|
|
194
|
+
currentLen += line.length + 1
|
|
195
|
+
}
|
|
176
196
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
let current: string[] = []
|
|
180
|
-
let currentLen = 0
|
|
181
|
-
|
|
182
|
-
for (const line of lines) {
|
|
183
|
-
if (currentLen + line.length + 1 > maxChars && current.length > 0) {
|
|
184
|
-
chunks.push({ content: current.join("\n") })
|
|
185
|
-
current = []
|
|
186
|
-
currentLen = 0
|
|
197
|
+
if (current.length > 0) {
|
|
198
|
+
chunks.push({ content: current.join("\n"), start_line: startLine, end_line: lines.length - 1 })
|
|
187
199
|
}
|
|
188
|
-
current.push(line)
|
|
189
|
-
currentLen += line.length + 1
|
|
190
|
-
}
|
|
191
200
|
|
|
192
|
-
|
|
193
|
-
chunks.push({ content: current.join("\n") })
|
|
201
|
+
return chunks
|
|
194
202
|
}
|
|
195
203
|
|
|
196
|
-
|
|
197
|
-
|
|
204
|
+
// ── Split large chunks preserving line numbers ────────────────────────────
|
|
205
|
+
|
|
206
|
+
function splitChunkByLines(chunk: CodeChunk, maxChars: number): CodeChunk[] {
|
|
207
|
+
const lines = chunk.content.split("\n")
|
|
208
|
+
const baseLine = chunk.start_line || 0
|
|
209
|
+
|
|
210
|
+
const parts: CodeChunk[] = []
|
|
211
|
+
let current: string[] = []
|
|
212
|
+
let currentLen = 0
|
|
213
|
+
let startLine = baseLine
|
|
214
|
+
|
|
215
|
+
for (let i = 0; i < lines.length; i++) {
|
|
216
|
+
const line = lines[i]
|
|
217
|
+
if (currentLen + line.length + 1 > maxChars && current.length > 0) {
|
|
218
|
+
parts.push({
|
|
219
|
+
...chunk,
|
|
220
|
+
content: current.join("\n"),
|
|
221
|
+
start_line: startLine,
|
|
222
|
+
end_line: baseLine + i - 1,
|
|
223
|
+
})
|
|
224
|
+
current = []
|
|
225
|
+
currentLen = 0
|
|
226
|
+
startLine = baseLine + i
|
|
227
|
+
}
|
|
228
|
+
current.push(line)
|
|
229
|
+
currentLen += line.length + 1
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (current.length > 0) {
|
|
233
|
+
parts.push({
|
|
234
|
+
...chunk,
|
|
235
|
+
content: current.join("\n"),
|
|
236
|
+
start_line: startLine,
|
|
237
|
+
end_line: baseLine + lines.length - 1,
|
|
238
|
+
})
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return parts
|
|
242
|
+
}
|
|
198
243
|
|
|
199
|
-
// ── Public API ──────────────────────────────────────────────────────────────
|
|
244
|
+
// ── Public API ──────────────────────────────────────────────────────────────
|
|
200
245
|
|
|
201
246
|
/**
|
|
202
247
|
* Chunk source code by functions/classes.
|
|
@@ -231,12 +276,13 @@ export function chunkCode(
|
|
|
231
276
|
if (block.startLine > lastEnd + 1) {
|
|
232
277
|
const gapContent = lines.slice(lastEnd + 1, block.startLine).join("\n").trim()
|
|
233
278
|
if (gapContent.length >= config.min_chunk_size) {
|
|
234
|
-
chunks.push({ content: gapContent })
|
|
279
|
+
chunks.push({ content: gapContent, start_line: lastEnd + 1, end_line: block.startLine - 1 })
|
|
235
280
|
} else if (gapContent.length > 0 && chunks.length > 0) {
|
|
236
281
|
// Merge small gap with previous chunk
|
|
237
282
|
chunks[chunks.length - 1].content += "\n\n" + gapContent
|
|
283
|
+
chunks[chunks.length - 1].end_line = block.startLine - 1
|
|
238
284
|
} else if (gapContent.length > 0) {
|
|
239
|
-
chunks.push({ content: gapContent })
|
|
285
|
+
chunks.push({ content: gapContent, start_line: lastEnd + 1, end_line: block.startLine - 1 })
|
|
240
286
|
}
|
|
241
287
|
}
|
|
242
288
|
|
|
@@ -259,6 +305,8 @@ export function chunkCode(
|
|
|
259
305
|
chunks.push({
|
|
260
306
|
content: gap,
|
|
261
307
|
class_name: block.name,
|
|
308
|
+
start_line: classLastEnd + 1,
|
|
309
|
+
end_line: method.startLine - 1,
|
|
262
310
|
})
|
|
263
311
|
}
|
|
264
312
|
}
|
|
@@ -267,6 +315,8 @@ export function chunkCode(
|
|
|
267
315
|
content: lines.slice(method.startLine, method.endLine + 1).join("\n"),
|
|
268
316
|
function_name: method.name,
|
|
269
317
|
class_name: block.name,
|
|
318
|
+
start_line: method.startLine,
|
|
319
|
+
end_line: method.endLine,
|
|
270
320
|
})
|
|
271
321
|
classLastEnd = method.endLine
|
|
272
322
|
}
|
|
@@ -275,7 +325,7 @@ export function chunkCode(
|
|
|
275
325
|
if (classLastEnd < block.endLine) {
|
|
276
326
|
const tail = lines.slice(classLastEnd + 1, block.endLine + 1).join("\n").trim()
|
|
277
327
|
if (tail) {
|
|
278
|
-
chunks.push({ content: tail, class_name: block.name })
|
|
328
|
+
chunks.push({ content: tail, class_name: block.name, start_line: classLastEnd + 1, end_line: block.endLine })
|
|
279
329
|
}
|
|
280
330
|
}
|
|
281
331
|
} else {
|
|
@@ -312,9 +362,9 @@ export function chunkCode(
|
|
|
312
362
|
const result: CodeChunk[] = []
|
|
313
363
|
for (const chunk of chunks) {
|
|
314
364
|
if (chunk.content.length > config.max_chunk_size) {
|
|
315
|
-
const parts =
|
|
365
|
+
const parts = splitChunkByLines(chunk, config.max_chunk_size)
|
|
316
366
|
for (const p of parts) {
|
|
317
|
-
result.push(
|
|
367
|
+
result.push(p)
|
|
318
368
|
}
|
|
319
369
|
} else {
|
|
320
370
|
result.push(chunk)
|
|
@@ -22,14 +22,18 @@ export const DEFAULT_MD_CONFIG: MarkdownChunkConfig = {
|
|
|
22
22
|
export interface MarkdownChunk {
|
|
23
23
|
content: string
|
|
24
24
|
heading_context: string // "H1 > H2 > H3"
|
|
25
|
+
start_line?: number
|
|
26
|
+
end_line?: number
|
|
25
27
|
}
|
|
26
28
|
|
|
27
29
|
// ── Internal types ──────────────────────────────────────────────────────────
|
|
28
30
|
|
|
29
31
|
interface Section {
|
|
30
|
-
level: number //
|
|
32
|
+
level: number //1-6 for headings, 0 for preamble
|
|
31
33
|
heading: string
|
|
32
34
|
body: string
|
|
35
|
+
start_line: number
|
|
36
|
+
end_line: number
|
|
33
37
|
}
|
|
34
38
|
|
|
35
39
|
// ── Parsing ─────────────────────────────────────────────────────────────────
|
|
@@ -38,19 +42,23 @@ interface Section {
|
|
|
38
42
|
function parseSections(content: string): Section[] {
|
|
39
43
|
const lines = content.split("\n")
|
|
40
44
|
const sections: Section[] = []
|
|
41
|
-
let currentSection: Section = { level: 0, heading: "", body: "" }
|
|
45
|
+
let currentSection: Section = { level: 0, heading: "", body: "", start_line: 0, end_line: 0 }
|
|
42
46
|
|
|
43
|
-
for (
|
|
47
|
+
for (let i = 0; i < lines.length; i++) {
|
|
48
|
+
const line = lines[i]
|
|
44
49
|
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/)
|
|
45
50
|
if (headingMatch) {
|
|
46
51
|
// Push previous section
|
|
47
52
|
if (currentSection.body.trim() || currentSection.heading) {
|
|
53
|
+
currentSection.end_line = i - 1
|
|
48
54
|
sections.push(currentSection)
|
|
49
55
|
}
|
|
50
56
|
currentSection = {
|
|
51
57
|
level: headingMatch[1].length,
|
|
52
58
|
heading: headingMatch[2].trim(),
|
|
53
59
|
body: "",
|
|
60
|
+
start_line: i,
|
|
61
|
+
end_line: 0,
|
|
54
62
|
}
|
|
55
63
|
} else {
|
|
56
64
|
currentSection.body += line + "\n"
|
|
@@ -59,6 +67,7 @@ function parseSections(content: string): Section[] {
|
|
|
59
67
|
|
|
60
68
|
// Push last section
|
|
61
69
|
if (currentSection.body.trim() || currentSection.heading) {
|
|
70
|
+
currentSection.end_line = lines.length - 1
|
|
62
71
|
sections.push(currentSection)
|
|
63
72
|
}
|
|
64
73
|
|
|
@@ -97,6 +106,45 @@ function splitLargeText(text: string, maxSize: number): string[] {
|
|
|
97
106
|
return chunks
|
|
98
107
|
}
|
|
99
108
|
|
|
109
|
+
function splitLargeTextWithLines(text: string, maxSize: number, startLine: number): Array<{ content: string; start_line: number; end_line: number }> {
|
|
110
|
+
if (text.length <= maxSize) {
|
|
111
|
+
const lines = text.split("\n")
|
|
112
|
+
return [{ content: text, start_line: startLine, end_line: startLine + lines.length - 1 }]
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const chunks: Array<{ content: string; start_line: number; end_line: number }> = []
|
|
116
|
+
const lines = text.split("\n")
|
|
117
|
+
let current: string[] = []
|
|
118
|
+
let currentLen = 0
|
|
119
|
+
let chunkStartLine = startLine
|
|
120
|
+
|
|
121
|
+
for (let i = 0; i < lines.length; i++) {
|
|
122
|
+
const line = lines[i]
|
|
123
|
+
if (currentLen + line.length + 1 > maxSize && current.length > 0) {
|
|
124
|
+
chunks.push({
|
|
125
|
+
content: current.join("\n"),
|
|
126
|
+
start_line: chunkStartLine,
|
|
127
|
+
end_line: startLine + i - 1,
|
|
128
|
+
})
|
|
129
|
+
current = []
|
|
130
|
+
currentLen = 0
|
|
131
|
+
chunkStartLine = startLine + i
|
|
132
|
+
}
|
|
133
|
+
current.push(line)
|
|
134
|
+
currentLen += line.length + 1
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (current.length > 0) {
|
|
138
|
+
chunks.push({
|
|
139
|
+
content: current.join("\n"),
|
|
140
|
+
start_line: chunkStartLine,
|
|
141
|
+
end_line: startLine + lines.length - 1,
|
|
142
|
+
})
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return chunks
|
|
146
|
+
}
|
|
147
|
+
|
|
100
148
|
// ── Public API ──────────────────────────────────────────────────────────────
|
|
101
149
|
|
|
102
150
|
/**
|
|
@@ -138,7 +186,12 @@ export function chunkMarkdown(
|
|
|
138
186
|
? `${"#".repeat(section.level)} ${section.heading}\n${section.body}`
|
|
139
187
|
: section.body
|
|
140
188
|
|
|
141
|
-
rawChunks.push({
|
|
189
|
+
rawChunks.push({
|
|
190
|
+
content: sectionText.trim(),
|
|
191
|
+
heading_context: headingContext,
|
|
192
|
+
start_line: section.start_line,
|
|
193
|
+
end_line: section.end_line,
|
|
194
|
+
})
|
|
142
195
|
}
|
|
143
196
|
|
|
144
197
|
// Merge small sections with previous
|
|
@@ -150,7 +203,11 @@ export function chunkMarkdown(
|
|
|
150
203
|
) {
|
|
151
204
|
const prev = merged[merged.length - 1]
|
|
152
205
|
prev.content += "\n\n" + chunk.content
|
|
153
|
-
//
|
|
206
|
+
// Merge end_line
|
|
207
|
+
if (chunk.end_line !== undefined) {
|
|
208
|
+
prev.end_line = chunk.end_line
|
|
209
|
+
}
|
|
210
|
+
// Keep deepest heading context
|
|
154
211
|
if (chunk.heading_context) {
|
|
155
212
|
prev.heading_context = chunk.heading_context
|
|
156
213
|
}
|
|
@@ -163,9 +220,14 @@ export function chunkMarkdown(
|
|
|
163
220
|
const result: MarkdownChunk[] = []
|
|
164
221
|
for (const chunk of merged) {
|
|
165
222
|
if (chunk.content.length > config.max_chunk_size) {
|
|
166
|
-
const parts =
|
|
223
|
+
const parts = splitLargeTextWithLines(chunk.content, config.max_chunk_size, chunk.start_line || 0)
|
|
167
224
|
for (const part of parts) {
|
|
168
|
-
result.push({
|
|
225
|
+
result.push({
|
|
226
|
+
content: part.content,
|
|
227
|
+
heading_context: chunk.heading_context,
|
|
228
|
+
start_line: part.start_line,
|
|
229
|
+
end_line: part.end_line,
|
|
230
|
+
})
|
|
169
231
|
}
|
|
170
232
|
} else {
|
|
171
233
|
result.push(chunk)
|
|
@@ -41,7 +41,7 @@ export class GraphBuilder {
|
|
|
41
41
|
content: string,
|
|
42
42
|
chunks: ChunkWithId[],
|
|
43
43
|
fileType: "code" | "docs"
|
|
44
|
-
): Promise<
|
|
44
|
+
): Promise<number> {
|
|
45
45
|
let relations: Array<RegexRelation | LSPRelation> = []
|
|
46
46
|
|
|
47
47
|
if (fileType === "docs") {
|
|
@@ -74,6 +74,7 @@ export class GraphBuilder {
|
|
|
74
74
|
}))
|
|
75
75
|
|
|
76
76
|
await this.graphDB.putEdges(triples)
|
|
77
|
+
return triples.length
|
|
77
78
|
}
|
|
78
79
|
|
|
79
80
|
resolveChunkId(filePath: string, line: number): string | null {
|
package/vectorizer/index.ts
CHANGED
|
@@ -562,7 +562,23 @@ class CodebaseIndexer {
|
|
|
562
562
|
|
|
563
563
|
// v3: Delete old edges for this file and build new ones
|
|
564
564
|
await this.graphDB.deleteByFile(relPath);
|
|
565
|
-
await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
|
|
565
|
+
const graphEdgesBuilt = await this.graphBuilder.buildEdges(relPath, content, chunksWithIds, fileMeta.file_type);
|
|
566
|
+
|
|
567
|
+
// Log graph creation to indexer.log
|
|
568
|
+
if (graphEdgesBuilt > 0 || DEBUG) {
|
|
569
|
+
const timestamp = new Date().toISOString().slice(11, 19);
|
|
570
|
+
const logMsg = `${timestamp} Graph built: ${relPath} (${chunksWithIds.length} chunks)`;
|
|
571
|
+
if (DEBUG) console.log(`[vectorizer] ${logMsg}`);
|
|
572
|
+
|
|
573
|
+
// Write to indexer.log in .opencode directory
|
|
574
|
+
try {
|
|
575
|
+
const logPath = path.join(this.root, ".opencode", "indexer.log");
|
|
576
|
+
const fsSync = await import("fs");
|
|
577
|
+
fsSync.appendFileSync(logPath, `${logMsg}\n`);
|
|
578
|
+
} catch {
|
|
579
|
+
// non-fatal — logging is advisory
|
|
580
|
+
}
|
|
581
|
+
}
|
|
566
582
|
|
|
567
583
|
// FR-054: Store graph build timestamp + file hash as metadata triple
|
|
568
584
|
try {
|
|
@@ -590,6 +606,9 @@ class CodebaseIndexer {
|
|
|
590
606
|
function_name: chunksWithIds[i].function_name || "",
|
|
591
607
|
class_name: chunksWithIds[i].class_name || "",
|
|
592
608
|
tags: (fileMeta.tags || []).join(","),
|
|
609
|
+
// Line numbers for "from-to" extraction
|
|
610
|
+
start_line: chunksWithIds[i].start_line,
|
|
611
|
+
end_line: chunksWithIds[i].end_line,
|
|
593
612
|
});
|
|
594
613
|
}
|
|
595
614
|
|