@comfanion/usethis_search 3.0.0-dev.16 → 3.0.0-dev.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/vectorizer/analyzers/lsp-analyzer.ts +7 -7
- package/vectorizer/analyzers/regex-analyzer.ts +173 -67
- package/vectorizer/chunkers/code-chunker.ts +74 -24
- package/vectorizer/chunkers/markdown-chunker.ts +69 -7
- package/vectorizer/graph-builder.ts +207 -15
- package/vectorizer/graph-db.ts +70 -47
- package/vectorizer/index.ts +111 -23
- package/vectorizer.yaml +16 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@comfanion/usethis_search",
|
|
3
|
-
"version": "3.0.0-dev.
|
|
3
|
+
"version": "3.0.0-dev.18",
|
|
4
4
|
"description": "OpenCode plugin: semantic search with graph-based context (v3: graph relations, 1-hop context, LSP + regex analyzers)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
import path from "path"
|
|
14
14
|
import fs from "fs/promises"
|
|
15
|
-
import { ChunkWithId } from "../graph-builder"
|
|
15
|
+
import { ChunkWithId, buildDefaultChunkId } from "../graph-builder"
|
|
16
16
|
import { LSPClient, LSPSymbolInformation, SymbolKind } from "./lsp-client"
|
|
17
17
|
|
|
18
18
|
export interface Relation {
|
|
@@ -252,7 +252,9 @@ export class LSPAnalyzer {
|
|
|
252
252
|
return result
|
|
253
253
|
}
|
|
254
254
|
|
|
255
|
-
/** Convert LSP location URI + line → chunk_id.
|
|
255
|
+
/** Convert LSP location URI + line → chunk_id.
|
|
256
|
+
* For same-file refs, resolves to exact chunk by line.
|
|
257
|
+
* For cross-file refs, returns the default (first) chunk of the target file. */
|
|
256
258
|
private locationToChunkId(currentFile: string, uri: string, line: number, root: string): string | null {
|
|
257
259
|
// uri = file:///absolute/path/to/file.ts
|
|
258
260
|
const filePath = uri.startsWith("file://") ? uri.slice(7) : uri
|
|
@@ -261,11 +263,9 @@ export class LSPAnalyzer {
|
|
|
261
263
|
// Skip external files (node_modules, etc.)
|
|
262
264
|
if (relPath.startsWith("..") || relPath.includes("node_modules")) return null
|
|
263
265
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
// For same-file, we could be more precise but chunk 0 is sufficient for graph
|
|
268
|
-
return `chunk_${normalized}_0`
|
|
266
|
+
// Same file → use findChunkForPosition (called separately with chunks)
|
|
267
|
+
// Cross-file → default chunk
|
|
268
|
+
return buildDefaultChunkId(relPath)
|
|
269
269
|
}
|
|
270
270
|
|
|
271
271
|
private findChunkForPosition(chunks: ChunkWithId[], line: number): string | null {
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import path from "path"
|
|
2
|
-
import
|
|
2
|
+
import fs from "fs"
|
|
3
|
+
import { ChunkWithId, buildDefaultChunkId } from "../graph-builder"
|
|
3
4
|
|
|
4
5
|
export interface Relation {
|
|
5
6
|
from: string
|
|
@@ -10,14 +11,127 @@ export interface Relation {
|
|
|
10
11
|
line?: number
|
|
11
12
|
}
|
|
12
13
|
|
|
14
|
+
// ── Module resolution ─────────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
/** Extensions to try when resolving JS/TS imports (in order). */
|
|
17
|
+
const JS_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"]
|
|
18
|
+
|
|
19
|
+
/** Extensions to try when resolving Python imports. */
|
|
20
|
+
const PY_EXTENSIONS = [".py"]
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Resolve a relative import to an actual file on disk.
|
|
24
|
+
*
|
|
25
|
+
* Follows Node.js / TypeScript module resolution:
|
|
26
|
+
* 1. Exact path (has extension) → check exists
|
|
27
|
+
* 2. Try each extension: `target.ts`, `target.tsx`, ...
|
|
28
|
+
* 3. Try directory index: `target/index.ts`, `target/index.tsx`, ...
|
|
29
|
+
*
|
|
30
|
+
* For Python:
|
|
31
|
+
* 1. `target.py`
|
|
32
|
+
* 2. `target/__init__.py`
|
|
33
|
+
*
|
|
34
|
+
* Fallback: if nothing exists on disk, infer extension from the source file
|
|
35
|
+
* (so offline / unit-test scenarios still produce useful edges).
|
|
36
|
+
*
|
|
37
|
+
* Returns a project-relative path (e.g. `src/utils.ts`) or null.
|
|
38
|
+
*/
|
|
39
|
+
function resolveModulePath(
|
|
40
|
+
projectRoot: string,
|
|
41
|
+
sourceFile: string,
|
|
42
|
+
importSpecifier: string,
|
|
43
|
+
language: "js" | "python" | "markdown",
|
|
44
|
+
): string | null {
|
|
45
|
+
const dir = path.dirname(path.resolve(projectRoot, sourceFile))
|
|
46
|
+
const base = path.resolve(dir, importSpecifier)
|
|
47
|
+
|
|
48
|
+
// Security: must stay inside project root
|
|
49
|
+
if (!base.startsWith(projectRoot)) return null
|
|
50
|
+
|
|
51
|
+
const hasExtension = !!path.extname(base)
|
|
52
|
+
|
|
53
|
+
// 1. If specifier already has an extension, check it directly
|
|
54
|
+
if (hasExtension) {
|
|
55
|
+
if (fileExists(base)) return path.relative(projectRoot, base)
|
|
56
|
+
// Even if the exact file doesn't exist, return the relative path so we
|
|
57
|
+
// can still build a "best effort" edge (e.g. markdown link to ./api.md
|
|
58
|
+
// in a test without real files).
|
|
59
|
+
return path.relative(projectRoot, base)
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const exts = language === "python" ? PY_EXTENSIONS : JS_EXTENSIONS
|
|
63
|
+
|
|
64
|
+
// 2. Try appending each extension
|
|
65
|
+
for (const ext of exts) {
|
|
66
|
+
const candidate = base + ext
|
|
67
|
+
if (fileExists(candidate)) return path.relative(projectRoot, candidate)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// 3. Try directory index files
|
|
71
|
+
const indexNames = language === "python" ? ["__init__.py"] : exts.map(e => "index" + e)
|
|
72
|
+
for (const idx of indexNames) {
|
|
73
|
+
const candidate = path.join(base, idx)
|
|
74
|
+
if (fileExists(candidate)) return path.relative(projectRoot, candidate)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// 4. Fallback: infer extension from source file
|
|
78
|
+
// `app.ts` imports `./utils` → assume `utils.ts`
|
|
79
|
+
const sourceExt = path.extname(sourceFile)
|
|
80
|
+
if (sourceExt && exts.includes(sourceExt)) {
|
|
81
|
+
return path.relative(projectRoot, base + sourceExt)
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Last resort for Python: assume .py
|
|
85
|
+
if (language === "python") {
|
|
86
|
+
return path.relative(projectRoot, base + ".py")
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return null
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Synchronous file-exists check (cheap for module resolution). */
|
|
93
|
+
function fileExists(absPath: string): boolean {
|
|
94
|
+
try {
|
|
95
|
+
return fs.statSync(absPath).isFile()
|
|
96
|
+
} catch {
|
|
97
|
+
return false
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Convert Python relative import specifier to a path.
|
|
103
|
+
* `.utils` → `./utils`
|
|
104
|
+
* `..utils` → `../utils`
|
|
105
|
+
* `...pkg` → `../../pkg`
|
|
106
|
+
*/
|
|
107
|
+
function pythonRelativeToPath(spec: string): string {
|
|
108
|
+
const match = spec.match(/^(\.+)(.*)$/)
|
|
109
|
+
if (!match) return spec
|
|
110
|
+
const dots = match[1].length // number of leading dots
|
|
111
|
+
const module = match[2] // remainder, e.g. "utils"
|
|
112
|
+
// 1 dot = current dir "./", 2 dots = "../", 3 = "../../", ...
|
|
113
|
+
const prefix = dots === 1 ? "./" : "../".repeat(dots - 1)
|
|
114
|
+
// Module part: dots→slashes (e.g. "pkg.sub" → "pkg/sub")
|
|
115
|
+
const modulePath = module.replace(/\./g, "/")
|
|
116
|
+
return prefix + modulePath
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ── RegexAnalyzer ────────────────────────────────────────────────────────────
|
|
120
|
+
|
|
13
121
|
export class RegexAnalyzer {
|
|
122
|
+
private projectRoot: string
|
|
123
|
+
|
|
14
124
|
private readonly patterns = {
|
|
15
125
|
jsImports: /import\s+(?:\{[^}]+\}|\w+)\s+from\s+['"]([^'"]+)['"]/g,
|
|
16
126
|
pythonFromImport: /from\s+(\S+)\s+import/g,
|
|
17
127
|
pythonImport: /import\s+(\S+)/g,
|
|
18
128
|
extends: /class\s+\w+\s+extends\s+(\w+)/g,
|
|
19
129
|
implements: /class\s+\w+\s+implements\s+([^{]+)/g,
|
|
20
|
-
markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g
|
|
130
|
+
markdownLink: /\[([^\]]+)\]\(([^)]+)\)/g,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
constructor(projectRoot?: string) {
|
|
134
|
+
this.projectRoot = projectRoot || process.cwd()
|
|
21
135
|
}
|
|
22
136
|
|
|
23
137
|
analyzeCode(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
|
|
@@ -25,9 +139,9 @@ export class RegexAnalyzer {
|
|
|
25
139
|
const ext = path.extname(filePath)
|
|
26
140
|
const lines = content.split("\n")
|
|
27
141
|
|
|
28
|
-
if ([".js", ".ts", ".jsx", ".tsx"].includes(ext)) {
|
|
142
|
+
if ([".js", ".ts", ".jsx", ".tsx", ".mjs", ".cjs"].includes(ext)) {
|
|
29
143
|
this.analyzeJSCode(content, lines, filePath, chunks, relations)
|
|
30
|
-
} else if (
|
|
144
|
+
} else if (ext === ".py") {
|
|
31
145
|
this.analyzePythonCode(content, lines, filePath, chunks, relations)
|
|
32
146
|
}
|
|
33
147
|
|
|
@@ -37,17 +151,14 @@ export class RegexAnalyzer {
|
|
|
37
151
|
analyzeMarkdown(filePath: string, content: string, chunks: ChunkWithId[]): Relation[] {
|
|
38
152
|
const relations: Relation[] = []
|
|
39
153
|
const lines = content.split("\n")
|
|
40
|
-
const dir = path.dirname(filePath)
|
|
41
154
|
|
|
42
155
|
let match
|
|
43
156
|
this.patterns.markdownLink.lastIndex = 0
|
|
44
157
|
while ((match = this.patterns.markdownLink.exec(content)) !== null) {
|
|
45
|
-
const linkText = match[1]
|
|
46
158
|
const linkTarget = match[2]
|
|
47
159
|
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
48
|
-
const line = lines[lineIndex]
|
|
49
160
|
|
|
50
|
-
const targetPath = this.
|
|
161
|
+
const targetPath = this.resolveMarkdownLink(filePath, linkTarget)
|
|
51
162
|
if (!targetPath) continue
|
|
52
163
|
|
|
53
164
|
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
@@ -61,7 +172,7 @@ export class RegexAnalyzer {
|
|
|
61
172
|
predicate: "links_to",
|
|
62
173
|
weight: 1.0,
|
|
63
174
|
source: "markdown",
|
|
64
|
-
line: lineIndex
|
|
175
|
+
line: lineIndex,
|
|
65
176
|
})
|
|
66
177
|
}
|
|
67
178
|
}
|
|
@@ -69,33 +180,34 @@ export class RegexAnalyzer {
|
|
|
69
180
|
return relations
|
|
70
181
|
}
|
|
71
182
|
|
|
72
|
-
|
|
183
|
+
// ── JS / TS ───────────────────────────────────────────────────────────────
|
|
184
|
+
|
|
185
|
+
private analyzeJSCode(
|
|
186
|
+
content: string, lines: string[], filePath: string,
|
|
187
|
+
chunks: ChunkWithId[], relations: Relation[],
|
|
188
|
+
) {
|
|
73
189
|
let match
|
|
74
190
|
|
|
75
191
|
this.patterns.jsImports.lastIndex = 0
|
|
76
192
|
while ((match = this.patterns.jsImports.exec(content)) !== null) {
|
|
77
193
|
const importPath = match[1]
|
|
78
194
|
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
79
|
-
const line = lines[lineIndex]
|
|
80
195
|
|
|
81
196
|
if (importPath.startsWith(".")) {
|
|
82
|
-
const targetPath = this.
|
|
197
|
+
const targetPath = resolveModulePath(this.projectRoot, filePath, importPath, "js")
|
|
83
198
|
if (!targetPath) continue
|
|
84
199
|
|
|
85
200
|
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
86
201
|
if (!fromChunkId) continue
|
|
87
202
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
line: lineIndex
|
|
97
|
-
})
|
|
98
|
-
}
|
|
203
|
+
relations.push({
|
|
204
|
+
from: fromChunkId,
|
|
205
|
+
to: buildDefaultChunkId(targetPath),
|
|
206
|
+
predicate: "imports",
|
|
207
|
+
weight: 0.8,
|
|
208
|
+
source: "regex",
|
|
209
|
+
line: lineIndex,
|
|
210
|
+
})
|
|
99
211
|
}
|
|
100
212
|
}
|
|
101
213
|
|
|
@@ -115,7 +227,7 @@ export class RegexAnalyzer {
|
|
|
115
227
|
predicate: "extends",
|
|
116
228
|
weight: 0.8,
|
|
117
229
|
source: "regex",
|
|
118
|
-
line: lineIndex
|
|
230
|
+
line: lineIndex,
|
|
119
231
|
})
|
|
120
232
|
}
|
|
121
233
|
}
|
|
@@ -137,14 +249,19 @@ export class RegexAnalyzer {
|
|
|
137
249
|
predicate: "implements",
|
|
138
250
|
weight: 0.8,
|
|
139
251
|
source: "regex",
|
|
140
|
-
line: lineIndex
|
|
252
|
+
line: lineIndex,
|
|
141
253
|
})
|
|
142
254
|
}
|
|
143
255
|
}
|
|
144
256
|
}
|
|
145
257
|
}
|
|
146
258
|
|
|
147
|
-
|
|
259
|
+
// ── Python ────────────────────────────────────────────────────────────────
|
|
260
|
+
|
|
261
|
+
private analyzePythonCode(
|
|
262
|
+
content: string, lines: string[], filePath: string,
|
|
263
|
+
chunks: ChunkWithId[], relations: Relation[],
|
|
264
|
+
) {
|
|
148
265
|
let match
|
|
149
266
|
|
|
150
267
|
this.patterns.pythonFromImport.lastIndex = 0
|
|
@@ -153,23 +270,21 @@ export class RegexAnalyzer {
|
|
|
153
270
|
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
154
271
|
|
|
155
272
|
if (importPath.startsWith(".")) {
|
|
156
|
-
const
|
|
273
|
+
const pyPath = pythonRelativeToPath(importPath)
|
|
274
|
+
const targetPath = resolveModulePath(this.projectRoot, filePath, pyPath, "python")
|
|
157
275
|
if (!targetPath) continue
|
|
158
276
|
|
|
159
277
|
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
160
278
|
if (!fromChunkId) continue
|
|
161
279
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
line: lineIndex
|
|
171
|
-
})
|
|
172
|
-
}
|
|
280
|
+
relations.push({
|
|
281
|
+
from: fromChunkId,
|
|
282
|
+
to: buildDefaultChunkId(targetPath),
|
|
283
|
+
predicate: "imports",
|
|
284
|
+
weight: 0.8,
|
|
285
|
+
source: "regex",
|
|
286
|
+
line: lineIndex,
|
|
287
|
+
})
|
|
173
288
|
}
|
|
174
289
|
}
|
|
175
290
|
|
|
@@ -179,42 +294,38 @@ export class RegexAnalyzer {
|
|
|
179
294
|
const lineIndex = content.substring(0, match.index).split("\n").length - 1
|
|
180
295
|
|
|
181
296
|
if (importPath.startsWith(".")) {
|
|
182
|
-
const
|
|
297
|
+
const pyPath = pythonRelativeToPath(importPath)
|
|
298
|
+
const targetPath = resolveModulePath(this.projectRoot, filePath, pyPath, "python")
|
|
183
299
|
if (!targetPath) continue
|
|
184
300
|
|
|
185
301
|
const fromChunkId = this.findChunkForLine(chunks, lineIndex)
|
|
186
302
|
if (!fromChunkId) continue
|
|
187
303
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
line: lineIndex
|
|
197
|
-
})
|
|
198
|
-
}
|
|
304
|
+
relations.push({
|
|
305
|
+
from: fromChunkId,
|
|
306
|
+
to: buildDefaultChunkId(targetPath),
|
|
307
|
+
predicate: "imports",
|
|
308
|
+
weight: 0.8,
|
|
309
|
+
source: "regex",
|
|
310
|
+
line: lineIndex,
|
|
311
|
+
})
|
|
199
312
|
}
|
|
200
313
|
}
|
|
201
314
|
}
|
|
202
315
|
|
|
203
|
-
|
|
204
|
-
try {
|
|
205
|
-
const dir = path.dirname(filePath)
|
|
206
|
-
const absoluteTarget = path.resolve(dir, target)
|
|
316
|
+
// ── Markdown link resolution ──────────────────────────────────────────────
|
|
207
317
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
318
|
+
private resolveMarkdownLink(filePath: string, target: string): string | null {
|
|
319
|
+
// Strip anchor (#section)
|
|
320
|
+
const hashIdx = target.indexOf("#")
|
|
321
|
+
const cleanTarget = hashIdx >= 0 ? target.substring(0, hashIdx) : target
|
|
322
|
+
if (!cleanTarget) return null
|
|
211
323
|
|
|
212
|
-
|
|
213
|
-
} catch {
|
|
214
|
-
return null
|
|
215
|
-
}
|
|
324
|
+
return resolveModulePath(this.projectRoot, filePath, cleanTarget, "markdown")
|
|
216
325
|
}
|
|
217
326
|
|
|
327
|
+
// ── Chunk lookup helpers ──────────────────────────────────────────────────
|
|
328
|
+
|
|
218
329
|
private findChunkForLine(chunks: ChunkWithId[], lineIndex: number): string | null {
|
|
219
330
|
for (const chunk of chunks) {
|
|
220
331
|
if (chunk.start_line !== undefined && chunk.end_line !== undefined) {
|
|
@@ -226,11 +337,6 @@ export class RegexAnalyzer {
|
|
|
226
337
|
return null
|
|
227
338
|
}
|
|
228
339
|
|
|
229
|
-
private findFirstChunkInFile(targetPath: string): string | null {
|
|
230
|
-
const normalized = targetPath.replace(/[^a-zA-Z0-9]/g, "_")
|
|
231
|
-
return `chunk_${normalized}_0`
|
|
232
|
-
}
|
|
233
|
-
|
|
234
340
|
private findChunkContainingSymbol(chunks: ChunkWithId[], symbol: string): string | null {
|
|
235
341
|
for (const chunk of chunks) {
|
|
236
342
|
if (chunk.content.includes(symbol)) {
|
|
@@ -250,6 +356,6 @@ export class RegexAnalyzer {
|
|
|
250
356
|
}
|
|
251
357
|
}
|
|
252
358
|
}
|
|
253
|
-
return
|
|
359
|
+
return buildDefaultChunkId(targetPath)
|
|
254
360
|
}
|
|
255
361
|
}
|
|
@@ -23,6 +23,8 @@ export interface CodeChunk {
|
|
|
23
23
|
content: string
|
|
24
24
|
function_name?: string
|
|
25
25
|
class_name?: string
|
|
26
|
+
start_line?: number
|
|
27
|
+
end_line?: number
|
|
26
28
|
}
|
|
27
29
|
|
|
28
30
|
// ── Block detection ─────────────────────────────────────────────────────────
|
|
@@ -172,31 +174,74 @@ function findPythonBlockEnd(lines: string[], startLine: number): number {
|
|
|
172
174
|
return lines.length - 1
|
|
173
175
|
}
|
|
174
176
|
|
|
175
|
-
// ── Fallback: line-based splitting ──────────────────────────────────────────
|
|
177
|
+
// ── Fallback: line-based splitting ──────────────────────────────────────────
|
|
178
|
+
|
|
179
|
+
function splitByLines(lines: string[], maxChars: number): CodeChunk[] {
|
|
180
|
+
const chunks: CodeChunk[] = []
|
|
181
|
+
let current: string[] = []
|
|
182
|
+
let currentLen = 0
|
|
183
|
+
let startLine = 0
|
|
184
|
+
|
|
185
|
+
for (let i = 0; i < lines.length; i++) {
|
|
186
|
+
const line = lines[i]
|
|
187
|
+
if (currentLen + line.length + 1 > maxChars && current.length > 0) {
|
|
188
|
+
chunks.push({ content: current.join("\n"), start_line: startLine, end_line: i - 1 })
|
|
189
|
+
current = []
|
|
190
|
+
currentLen = 0
|
|
191
|
+
startLine = i
|
|
192
|
+
}
|
|
193
|
+
current.push(line)
|
|
194
|
+
currentLen += line.length + 1
|
|
195
|
+
}
|
|
176
196
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
let current: string[] = []
|
|
180
|
-
let currentLen = 0
|
|
181
|
-
|
|
182
|
-
for (const line of lines) {
|
|
183
|
-
if (currentLen + line.length + 1 > maxChars && current.length > 0) {
|
|
184
|
-
chunks.push({ content: current.join("\n") })
|
|
185
|
-
current = []
|
|
186
|
-
currentLen = 0
|
|
197
|
+
if (current.length > 0) {
|
|
198
|
+
chunks.push({ content: current.join("\n"), start_line: startLine, end_line: lines.length - 1 })
|
|
187
199
|
}
|
|
188
|
-
current.push(line)
|
|
189
|
-
currentLen += line.length + 1
|
|
190
|
-
}
|
|
191
200
|
|
|
192
|
-
|
|
193
|
-
chunks.push({ content: current.join("\n") })
|
|
201
|
+
return chunks
|
|
194
202
|
}
|
|
195
203
|
|
|
196
|
-
|
|
197
|
-
|
|
204
|
+
// ── Split large chunks preserving line numbers ────────────────────────────
|
|
205
|
+
|
|
206
|
+
function splitChunkByLines(chunk: CodeChunk, maxChars: number): CodeChunk[] {
|
|
207
|
+
const lines = chunk.content.split("\n")
|
|
208
|
+
const baseLine = chunk.start_line || 0
|
|
209
|
+
|
|
210
|
+
const parts: CodeChunk[] = []
|
|
211
|
+
let current: string[] = []
|
|
212
|
+
let currentLen = 0
|
|
213
|
+
let startLine = baseLine
|
|
214
|
+
|
|
215
|
+
for (let i = 0; i < lines.length; i++) {
|
|
216
|
+
const line = lines[i]
|
|
217
|
+
if (currentLen + line.length + 1 > maxChars && current.length > 0) {
|
|
218
|
+
parts.push({
|
|
219
|
+
...chunk,
|
|
220
|
+
content: current.join("\n"),
|
|
221
|
+
start_line: startLine,
|
|
222
|
+
end_line: baseLine + i - 1,
|
|
223
|
+
})
|
|
224
|
+
current = []
|
|
225
|
+
currentLen = 0
|
|
226
|
+
startLine = baseLine + i
|
|
227
|
+
}
|
|
228
|
+
current.push(line)
|
|
229
|
+
currentLen += line.length + 1
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (current.length > 0) {
|
|
233
|
+
parts.push({
|
|
234
|
+
...chunk,
|
|
235
|
+
content: current.join("\n"),
|
|
236
|
+
start_line: startLine,
|
|
237
|
+
end_line: baseLine + lines.length - 1,
|
|
238
|
+
})
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return parts
|
|
242
|
+
}
|
|
198
243
|
|
|
199
|
-
// ── Public API ──────────────────────────────────────────────────────────────
|
|
244
|
+
// ── Public API ──────────────────────────────────────────────────────────────
|
|
200
245
|
|
|
201
246
|
/**
|
|
202
247
|
* Chunk source code by functions/classes.
|
|
@@ -231,12 +276,13 @@ export function chunkCode(
|
|
|
231
276
|
if (block.startLine > lastEnd + 1) {
|
|
232
277
|
const gapContent = lines.slice(lastEnd + 1, block.startLine).join("\n").trim()
|
|
233
278
|
if (gapContent.length >= config.min_chunk_size) {
|
|
234
|
-
chunks.push({ content: gapContent })
|
|
279
|
+
chunks.push({ content: gapContent, start_line: lastEnd + 1, end_line: block.startLine - 1 })
|
|
235
280
|
} else if (gapContent.length > 0 && chunks.length > 0) {
|
|
236
281
|
// Merge small gap with previous chunk
|
|
237
282
|
chunks[chunks.length - 1].content += "\n\n" + gapContent
|
|
283
|
+
chunks[chunks.length - 1].end_line = block.startLine - 1
|
|
238
284
|
} else if (gapContent.length > 0) {
|
|
239
|
-
chunks.push({ content: gapContent })
|
|
285
|
+
chunks.push({ content: gapContent, start_line: lastEnd + 1, end_line: block.startLine - 1 })
|
|
240
286
|
}
|
|
241
287
|
}
|
|
242
288
|
|
|
@@ -259,6 +305,8 @@ export function chunkCode(
|
|
|
259
305
|
chunks.push({
|
|
260
306
|
content: gap,
|
|
261
307
|
class_name: block.name,
|
|
308
|
+
start_line: classLastEnd + 1,
|
|
309
|
+
end_line: method.startLine - 1,
|
|
262
310
|
})
|
|
263
311
|
}
|
|
264
312
|
}
|
|
@@ -267,6 +315,8 @@ export function chunkCode(
|
|
|
267
315
|
content: lines.slice(method.startLine, method.endLine + 1).join("\n"),
|
|
268
316
|
function_name: method.name,
|
|
269
317
|
class_name: block.name,
|
|
318
|
+
start_line: method.startLine,
|
|
319
|
+
end_line: method.endLine,
|
|
270
320
|
})
|
|
271
321
|
classLastEnd = method.endLine
|
|
272
322
|
}
|
|
@@ -275,7 +325,7 @@ export function chunkCode(
|
|
|
275
325
|
if (classLastEnd < block.endLine) {
|
|
276
326
|
const tail = lines.slice(classLastEnd + 1, block.endLine + 1).join("\n").trim()
|
|
277
327
|
if (tail) {
|
|
278
|
-
chunks.push({ content: tail, class_name: block.name })
|
|
328
|
+
chunks.push({ content: tail, class_name: block.name, start_line: classLastEnd + 1, end_line: block.endLine })
|
|
279
329
|
}
|
|
280
330
|
}
|
|
281
331
|
} else {
|
|
@@ -312,9 +362,9 @@ export function chunkCode(
|
|
|
312
362
|
const result: CodeChunk[] = []
|
|
313
363
|
for (const chunk of chunks) {
|
|
314
364
|
if (chunk.content.length > config.max_chunk_size) {
|
|
315
|
-
const parts =
|
|
365
|
+
const parts = splitChunkByLines(chunk, config.max_chunk_size)
|
|
316
366
|
for (const p of parts) {
|
|
317
|
-
result.push(
|
|
367
|
+
result.push(p)
|
|
318
368
|
}
|
|
319
369
|
} else {
|
|
320
370
|
result.push(chunk)
|