@comfanion/usethis_search 4.4.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/api.ts +34 -17
- package/cache/manager.ts +30 -19
- package/cli.ts +8 -5
- package/file-indexer.ts +28 -11
- package/hooks/message-before.ts +5 -5
- package/hooks/tool-substitution.ts +4 -120
- package/index.ts +17 -6
- package/package.json +3 -2
- package/tools/codeindex.ts +192 -184
- package/tools/graph.ts +265 -0
- package/tools/read-interceptor.ts +7 -3
- package/tools/search.ts +268 -190
- package/tools/workspace-state.ts +1 -2
- package/tools/workspace.ts +76 -108
- package/vectorizer/analyzers/lsp-client.ts +52 -6
- package/vectorizer/chunkers/chunker-factory.ts +6 -0
- package/vectorizer/chunkers/code-chunker.ts +73 -16
- package/vectorizer/chunkers/lsp-chunker.ts +313 -191
- package/vectorizer/graph-db.ts +6 -4
- package/vectorizer/index.ts +329 -134
- package/vectorizer/usage-tracker.ts +36 -0
- package/vectorizer.yaml +2 -2
package/tools/workspace-state.ts
CHANGED
|
@@ -16,8 +16,7 @@ import { workspaceCache, type WorkspaceEntry } from "../cache/manager.ts"
|
|
|
16
16
|
* Build the full workspace state output.
|
|
17
17
|
* Contains all chunks grouped by file with full content and metadata.
|
|
18
18
|
*
|
|
19
|
-
* Called by search(),
|
|
20
|
-
* workspace_clear(), workspace_restore().
|
|
19
|
+
* Called by search(), list(), forget(), clear(), explore().
|
|
21
20
|
*
|
|
22
21
|
* Returns a <workspace_state> XML block that the agent can reference.
|
|
23
22
|
* The block is self-contained — all chunk content is inline.
|
package/tools/workspace.ts
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Workspace Management Tools (
|
|
2
|
+
* Workspace Management Tools (v3 — short names)
|
|
3
3
|
*
|
|
4
4
|
* Manual control over the workspace cache:
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
* workspace_restore — restore a saved session snapshot, return state
|
|
5
|
+
* list — show full workspace state with chunk content
|
|
6
|
+
* forget — remove chunks, return updated state
|
|
7
|
+
* clear — remove all chunks, return empty state
|
|
9
8
|
*
|
|
10
|
-
*
|
|
9
|
+
* v3: Short tool names (dropped "workspace_" prefix for brevity).
|
|
10
|
+
* Each tool returns full workspace state inline (via buildWorkspaceOutput).
|
|
11
11
|
* Previous tool outputs are pruned from history by message-before hook.
|
|
12
12
|
* No injection — workspace lives only in the latest tool output.
|
|
13
13
|
*/
|
|
@@ -19,7 +19,7 @@ import { buildWorkspaceOutput } from "./workspace-state.ts"
|
|
|
19
19
|
|
|
20
20
|
// ── workspace.list ──────────────────────────────────────────────────────────
|
|
21
21
|
|
|
22
|
-
export const
|
|
22
|
+
export const list = tool({
|
|
23
23
|
description: `Show current workspace contents — all attached code chunks with full source code, line numbers, and metadata.
|
|
24
24
|
|
|
25
25
|
Use this to:
|
|
@@ -39,7 +39,7 @@ Only the LATEST workspace tool output is kept in chat — older outputs are auto
|
|
|
39
39
|
|
|
40
40
|
// ── workspace.forget ────────────────────────────────────────────────────────
|
|
41
41
|
|
|
42
|
-
export const
|
|
42
|
+
export const forget = tool({
|
|
43
43
|
description: `Remove chunks from workspace context to optimize context size and focus.
|
|
44
44
|
|
|
45
45
|
IMPORTANT: Regularly clean up workspace by removing irrelevant files or old search results.
|
|
@@ -47,85 +47,97 @@ This keeps context focused and prevents token budget overflow.
|
|
|
47
47
|
|
|
48
48
|
WHEN TO CLEAN UP:
|
|
49
49
|
- BEFORE searching a new topic — forget the previous search results first:
|
|
50
|
-
|
|
50
|
+
forget({ queries: ["previous search query"] }) → then search({ query: "new topic" })
|
|
51
51
|
- AFTER finishing a subtask — forget files you no longer need
|
|
52
|
-
- WHEN budget >60% — evict old chunks:
|
|
52
|
+
- WHEN budget >60% — evict old chunks: forget({ queries: ["5"] })
|
|
53
53
|
- AFTER editing files — workspace chunks become stale, forget and re-search
|
|
54
54
|
|
|
55
|
-
Auto-detects what to remove based on
|
|
55
|
+
Auto-detects what to remove based on each item:
|
|
56
56
|
- Chunk ID: "src/auth.ts:chunk-5"
|
|
57
|
-
- File path: "docs/architecture.md" (removes ALL chunks)
|
|
57
|
+
- File path: "docs/architecture.md" (removes ALL chunks from file)
|
|
58
58
|
- Search query: "authentication logic" (removes chunks from this search)
|
|
59
59
|
- Age: "5" (removes chunks older than 5 minutes)
|
|
60
60
|
|
|
61
|
+
Supports multiple items in one call — forget several files/queries at once.
|
|
62
|
+
|
|
61
63
|
Examples:
|
|
62
|
-
-
|
|
63
|
-
-
|
|
64
|
-
-
|
|
65
|
-
-
|
|
64
|
+
- forget({ queries: ["docs/prd.md"] })
|
|
65
|
+
- forget({ queries: ["5"] }) // older than 5 min
|
|
66
|
+
- forget({ queries: ["src/auth.ts", "src/types/User.ts"] }) // forget two files
|
|
67
|
+
- forget({ queries: ["authentication logic", "old-file.ts", "5"] }) // mix types`,
|
|
66
68
|
|
|
67
69
|
args: {
|
|
68
|
-
|
|
70
|
+
queries: tool.schema.array(tool.schema.string()).describe("Items to forget: chunk IDs, file paths, search queries, or age in minutes"),
|
|
69
71
|
},
|
|
70
72
|
|
|
71
73
|
async execute(args) {
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
const entry = workspaceCache.get(args.what)
|
|
79
|
-
if (!entry) {
|
|
80
|
-
return `Chunk "${args.what}" not found in workspace.` + buildWorkspaceOutput()
|
|
81
|
-
}
|
|
82
|
-
removed = workspaceCache.detach(args.what) ? 1 : 0
|
|
83
|
-
if (removed === 0) {
|
|
84
|
-
return `Failed to remove chunk "${args.what}".` + buildWorkspaceOutput()
|
|
85
|
-
}
|
|
86
|
-
summary = `Removed chunk "${args.what}" from workspace.`
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
// 2. Check if it's a number (age in minutes)
|
|
90
|
-
else if (args.what.match(/^(\d+)$/)) {
|
|
91
|
-
const minutes = parseInt(args.what, 10)
|
|
92
|
-
removed = workspaceCache.detachOlderThan(minutes * 60 * 1000)
|
|
93
|
-
summary = `Removed ${removed} chunk(s) older than ${minutes} minutes.`
|
|
74
|
+
const items: string[] = args.queries && args.queries.length > 0
|
|
75
|
+
? args.queries
|
|
76
|
+
: []
|
|
77
|
+
|
|
78
|
+
if (items.length === 0) {
|
|
79
|
+
return `Error: queries is required` + buildWorkspaceOutput()
|
|
94
80
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
if (
|
|
104
|
-
|
|
81
|
+
|
|
82
|
+
const summaries: string[] = []
|
|
83
|
+
let totalRemoved = 0
|
|
84
|
+
|
|
85
|
+
for (const item of items) {
|
|
86
|
+
let removed = 0
|
|
87
|
+
|
|
88
|
+
// 1. Chunk ID (contains ":chunk-" or starts with "chunk:")
|
|
89
|
+
if (item.includes(":chunk-") || item.startsWith("chunk:")) {
|
|
90
|
+
removed = workspaceCache.detach(item) ? 1 : 0
|
|
91
|
+
if (removed > 0) {
|
|
92
|
+
summaries.push(`"${item}" — removed`)
|
|
93
|
+
} else {
|
|
94
|
+
summaries.push(`"${item}" — not found`)
|
|
95
|
+
}
|
|
105
96
|
}
|
|
106
|
-
|
|
107
|
-
if (
|
|
108
|
-
|
|
97
|
+
// 2. Number (age in minutes)
|
|
98
|
+
else if (item.match(/^(\d+)$/)) {
|
|
99
|
+
const minutes = parseInt(item, 10)
|
|
100
|
+
removed = workspaceCache.detachOlderThan(minutes * 60 * 1000)
|
|
101
|
+
summaries.push(`older than ${minutes}min — ${removed} removed`)
|
|
109
102
|
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
103
|
+
// 3. File path
|
|
104
|
+
else if (
|
|
105
|
+
item.match(/\.(md|ts|js|go|py|tsx|jsx|rs|java|kt|swift|txt|yaml|json|yml|toml)$/i) ||
|
|
106
|
+
item.match(/^(src|docs|internal|pkg|lib|app|pages|components|api)\//i) ||
|
|
107
|
+
item.includes("/")
|
|
108
|
+
) {
|
|
109
|
+
removed = workspaceCache.detachByPath(item)
|
|
110
|
+
if (removed > 0) {
|
|
111
|
+
summaries.push(`"${item}" — ${removed} chunk(s) removed`)
|
|
112
|
+
} else {
|
|
113
|
+
summaries.push(`"${item}" — not in workspace`)
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
// 4. Search query
|
|
117
|
+
else {
|
|
118
|
+
removed = workspaceCache.detachByQuery(item)
|
|
119
|
+
if (removed > 0) {
|
|
120
|
+
summaries.push(`search "${item}" — ${removed} chunk(s) removed`)
|
|
121
|
+
} else {
|
|
122
|
+
summaries.push(`search "${item}" — no matches`)
|
|
123
|
+
}
|
|
118
124
|
}
|
|
119
|
-
|
|
125
|
+
|
|
126
|
+
totalRemoved += removed
|
|
120
127
|
}
|
|
121
128
|
|
|
122
|
-
|
|
129
|
+
let output = `Removed ${totalRemoved} chunk(s):\n`
|
|
130
|
+
for (const s of summaries) {
|
|
131
|
+
output += `- ${s}\n`
|
|
132
|
+
}
|
|
133
|
+
output += buildWorkspaceOutput()
|
|
134
|
+
return output
|
|
123
135
|
},
|
|
124
136
|
})
|
|
125
137
|
|
|
126
138
|
// ── workspace.clear ─────────────────────────────────────────────────────────
|
|
127
139
|
|
|
128
|
-
export const
|
|
140
|
+
export const clear = tool({
|
|
129
141
|
description: `Remove ALL chunks from workspace context. Use when switching tasks or starting fresh.
|
|
130
142
|
|
|
131
143
|
Use when:
|
|
@@ -133,7 +145,7 @@ Use when:
|
|
|
133
145
|
- Workspace is cluttered with irrelevant context from many searches
|
|
134
146
|
- Starting a fresh investigation from scratch
|
|
135
147
|
|
|
136
|
-
Prefer
|
|
148
|
+
Prefer forget() for selective cleanup. Use clear() only for full reset.
|
|
137
149
|
Returns empty workspace state.`,
|
|
138
150
|
|
|
139
151
|
args: {},
|
|
@@ -147,48 +159,4 @@ Returns empty workspace state.`,
|
|
|
147
159
|
},
|
|
148
160
|
})
|
|
149
161
|
|
|
150
|
-
// ── workspace.restore ───────────────────────────────────────────────────────
|
|
151
|
-
|
|
152
|
-
export const workspace_restore = tool({
|
|
153
|
-
description: `Restore workspace from a previously saved session snapshot.
|
|
154
|
-
|
|
155
|
-
Use when:
|
|
156
|
-
- After compaction — restore the workspace context from before compaction
|
|
157
|
-
- Resuming work on a previous task — switch back to that context
|
|
158
|
-
- After workspace_clear() — if you need the old context back
|
|
159
|
-
|
|
160
|
-
Call without sessionId to list available snapshots with their chunk counts and token sizes.
|
|
161
|
-
Call with sessionId to restore a specific snapshot. Replaces current workspace entirely.`,
|
|
162
162
|
|
|
163
|
-
args: {
|
|
164
|
-
sessionId: tool.schema.string().optional().describe("Session ID to restore. If not provided, lists available snapshots."),
|
|
165
|
-
},
|
|
166
|
-
|
|
167
|
-
async execute(args) {
|
|
168
|
-
if (!args.sessionId) {
|
|
169
|
-
// List available snapshots (no workspace state needed — just metadata)
|
|
170
|
-
const snapshots = await workspaceCache.listSnapshots()
|
|
171
|
-
|
|
172
|
-
if (snapshots.length === 0) {
|
|
173
|
-
return `No saved workspace snapshots found.`
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
let output = `## Saved Workspace Snapshots\n\n`
|
|
177
|
-
for (const snap of snapshots) {
|
|
178
|
-
const date = new Date(snap.savedAt).toLocaleString()
|
|
179
|
-
output += `- **${snap.id}** — ${snap.chunkCount} chunks, ${snap.totalTokens.toLocaleString()} tokens — ${date}\n`
|
|
180
|
-
}
|
|
181
|
-
output += `\nUse \`workspace_restore("session-id")\` to restore.`
|
|
182
|
-
return output
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
// Restore specific snapshot
|
|
186
|
-
const restored = await workspaceCache.restore(args.sessionId)
|
|
187
|
-
|
|
188
|
-
if (!restored) {
|
|
189
|
-
return `Snapshot "${args.sessionId}" not found or empty.`
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
return `Restored workspace from "${args.sessionId}".` + buildWorkspaceOutput()
|
|
193
|
-
},
|
|
194
|
-
})
|
|
@@ -129,7 +129,7 @@ function encodeMessage(body: object): Buffer {
|
|
|
129
129
|
export class LSPClient {
|
|
130
130
|
private proc: ChildProcess | null = null
|
|
131
131
|
private requestId = 0
|
|
132
|
-
private pending = new Map<number, { resolve: (v: any) => void; reject: (e: Error) => void }>()
|
|
132
|
+
private pending = new Map<number, { resolve: (v: any) => void; reject: (e: Error) => void; timer: ReturnType<typeof setTimeout> }>()
|
|
133
133
|
private buffer = Buffer.alloc(0)
|
|
134
134
|
private initialized = false
|
|
135
135
|
private serverConfig: ServerConfig | null = null
|
|
@@ -176,14 +176,24 @@ export class LSPClient {
|
|
|
176
176
|
// Silently consume stderr — language servers are chatty
|
|
177
177
|
})
|
|
178
178
|
this.proc.on("error", (err) => {
|
|
179
|
-
//
|
|
180
|
-
for (const p of this.pending.values())
|
|
179
|
+
// Clear all timers + reject all pending
|
|
180
|
+
for (const p of this.pending.values()) {
|
|
181
|
+
clearTimeout(p.timer)
|
|
182
|
+
p.reject(err)
|
|
183
|
+
}
|
|
181
184
|
this.pending.clear()
|
|
182
185
|
})
|
|
183
186
|
this.proc.on("exit", () => {
|
|
184
|
-
for (const p of this.pending.values())
|
|
187
|
+
for (const p of this.pending.values()) {
|
|
188
|
+
clearTimeout(p.timer)
|
|
189
|
+
p.reject(new Error("LSP server exited"))
|
|
190
|
+
}
|
|
185
191
|
this.pending.clear()
|
|
186
192
|
this.initialized = false
|
|
193
|
+
// Release buffer memory on exit
|
|
194
|
+
this.buffer = Buffer.alloc(0)
|
|
195
|
+
this._pendingChunks = []
|
|
196
|
+
this._pendingLen = 0
|
|
187
197
|
})
|
|
188
198
|
|
|
189
199
|
// LSP initialize handshake
|
|
@@ -205,16 +215,28 @@ export class LSPClient {
|
|
|
205
215
|
this.initialized = true
|
|
206
216
|
}
|
|
207
217
|
|
|
208
|
-
/** Shut down gracefully. */
|
|
218
|
+
/** Shut down gracefully — clears all timers, pending requests, and buffers. */
|
|
209
219
|
async stop(): Promise<void> {
|
|
210
220
|
if (!this.proc || !this.initialized) return
|
|
211
221
|
try {
|
|
212
222
|
await this.sendRequest("shutdown", null)
|
|
213
223
|
this.sendNotification("exit", null)
|
|
214
224
|
} catch { /* best effort */ }
|
|
225
|
+
|
|
226
|
+
// Clear all pending request timers to prevent leaks
|
|
227
|
+
for (const p of this.pending.values()) {
|
|
228
|
+
clearTimeout(p.timer)
|
|
229
|
+
}
|
|
230
|
+
this.pending.clear()
|
|
231
|
+
|
|
215
232
|
this.proc.kill()
|
|
216
233
|
this.proc = null
|
|
217
234
|
this.initialized = false
|
|
235
|
+
|
|
236
|
+
// Release buffer memory
|
|
237
|
+
this.buffer = Buffer.alloc(0)
|
|
238
|
+
this._pendingChunks = []
|
|
239
|
+
this._pendingLen = 0
|
|
218
240
|
}
|
|
219
241
|
|
|
220
242
|
// ---- LSP helpers --------------------------------------------------------
|
|
@@ -293,6 +315,7 @@ export class LSPClient {
|
|
|
293
315
|
}, this.timeoutMs)
|
|
294
316
|
|
|
295
317
|
this.pending.set(id, {
|
|
318
|
+
timer,
|
|
296
319
|
resolve: (v: any) => { clearTimeout(timer); resolve(v) },
|
|
297
320
|
reject: (e: Error) => { clearTimeout(timer); reject(e) },
|
|
298
321
|
})
|
|
@@ -308,11 +331,33 @@ export class LSPClient {
|
|
|
308
331
|
}
|
|
309
332
|
|
|
310
333
|
private onData(chunk: Buffer): void {
|
|
311
|
-
|
|
334
|
+
// Accumulate incoming chunks in a list — avoids Buffer.concat on every data event
|
|
335
|
+
this._pendingChunks.push(chunk)
|
|
336
|
+
this._pendingLen += chunk.length
|
|
312
337
|
this.processBuffer()
|
|
313
338
|
}
|
|
314
339
|
|
|
340
|
+
/** Pending incoming chunks not yet merged into main buffer */
|
|
341
|
+
private _pendingChunks: Buffer[] = []
|
|
342
|
+
private _pendingLen = 0
|
|
343
|
+
|
|
344
|
+
/** Merge pending chunks into main buffer only when we need to parse */
|
|
345
|
+
private compactBuffer(): void {
|
|
346
|
+
if (this._pendingChunks.length === 0) return
|
|
347
|
+
if (this.buffer.length === 0) {
|
|
348
|
+
this.buffer = this._pendingChunks.length === 1
|
|
349
|
+
? this._pendingChunks[0]
|
|
350
|
+
: Buffer.concat(this._pendingChunks, this._pendingLen)
|
|
351
|
+
} else {
|
|
352
|
+
this.buffer = Buffer.concat([this.buffer, ...this._pendingChunks], this.buffer.length + this._pendingLen)
|
|
353
|
+
}
|
|
354
|
+
this._pendingChunks = []
|
|
355
|
+
this._pendingLen = 0
|
|
356
|
+
}
|
|
357
|
+
|
|
315
358
|
private processBuffer(): void {
|
|
359
|
+
this.compactBuffer()
|
|
360
|
+
|
|
316
361
|
while (true) {
|
|
317
362
|
// Look for Content-Length header
|
|
318
363
|
const headerEnd = this.buffer.indexOf("\r\n\r\n")
|
|
@@ -349,6 +394,7 @@ export class LSPClient {
|
|
|
349
394
|
if (msg.id != null && this.pending.has(msg.id)) {
|
|
350
395
|
const p = this.pending.get(msg.id)!
|
|
351
396
|
this.pending.delete(msg.id)
|
|
397
|
+
clearTimeout(p.timer)
|
|
352
398
|
if (msg.error) {
|
|
353
399
|
p.reject(new Error(`LSP error ${msg.error.code}: ${msg.error.message}`))
|
|
354
400
|
} else {
|
|
@@ -36,6 +36,8 @@ export interface UnifiedChunk {
|
|
|
36
36
|
heading_context?: string
|
|
37
37
|
function_name?: string
|
|
38
38
|
class_name?: string
|
|
39
|
+
start_line?: number
|
|
40
|
+
end_line?: number
|
|
39
41
|
}
|
|
40
42
|
|
|
41
43
|
// ── Fixed chunker (legacy) ──────────────────────────────────────────────────
|
|
@@ -103,6 +105,8 @@ export async function chunkContent(
|
|
|
103
105
|
content: c.content,
|
|
104
106
|
function_name: c.function_name,
|
|
105
107
|
class_name: c.class_name,
|
|
108
|
+
start_line: c.start_line,
|
|
109
|
+
end_line: c.end_line,
|
|
106
110
|
}))
|
|
107
111
|
}
|
|
108
112
|
} catch (error) {
|
|
@@ -119,6 +123,8 @@ export async function chunkContent(
|
|
|
119
123
|
content: c.content,
|
|
120
124
|
function_name: c.function_name,
|
|
121
125
|
class_name: c.class_name,
|
|
126
|
+
start_line: c.start_line,
|
|
127
|
+
end_line: c.end_line,
|
|
122
128
|
}))
|
|
123
129
|
}
|
|
124
130
|
|
|
@@ -272,21 +272,38 @@ export function chunkCode(
|
|
|
272
272
|
// If there is class-level block, skip individual method-level duplicate
|
|
273
273
|
if (block.type === "method") continue
|
|
274
274
|
|
|
275
|
-
// Gap before this block
|
|
275
|
+
// Gap before this block — check if it's JSDoc/comments that belong to the block
|
|
276
|
+
let blockStartLine = block.startLine
|
|
276
277
|
if (block.startLine > lastEnd + 1) {
|
|
277
|
-
const
|
|
278
|
-
|
|
278
|
+
const gapLines = lines.slice(lastEnd + 1, block.startLine)
|
|
279
|
+
const gapContent = gapLines.join("\n").trim()
|
|
280
|
+
|
|
281
|
+
// Check if gap has real code (not just comments/whitespace/braces)
|
|
282
|
+
const hasCode = gapLines.some(l => {
|
|
283
|
+
const t = l.trim()
|
|
284
|
+
return t.length > 0
|
|
285
|
+
&& !t.startsWith("//") && !t.startsWith("/*") && !t.startsWith("*") && !t.startsWith("*/")
|
|
286
|
+
&& !t.startsWith("#") && !t.startsWith("<!--")
|
|
287
|
+
&& !/^[{}()\[\];,]+$/.test(t)
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
if (hasCode && gapContent.length >= config.min_chunk_size) {
|
|
291
|
+
// Gap has real code — keep as separate chunk
|
|
279
292
|
chunks.push({ content: gapContent, start_line: lastEnd + 1, end_line: block.startLine - 1 })
|
|
280
|
-
} else if (gapContent.length > 0 && chunks.length > 0) {
|
|
281
|
-
//
|
|
293
|
+
} else if (hasCode && gapContent.length > 0 && chunks.length > 0) {
|
|
294
|
+
// Small gap with code — merge with previous chunk
|
|
282
295
|
chunks[chunks.length - 1].content += "\n\n" + gapContent
|
|
283
296
|
chunks[chunks.length - 1].end_line = block.startLine - 1
|
|
284
|
-
} else if (gapContent.length > 0) {
|
|
297
|
+
} else if (hasCode && gapContent.length > 0) {
|
|
298
|
+
// First chunk, small gap with code
|
|
285
299
|
chunks.push({ content: gapContent, start_line: lastEnd + 1, end_line: block.startLine - 1 })
|
|
300
|
+
} else if (gapContent.length > 0) {
|
|
301
|
+
// Gap is only comments/JSDoc — merge with the block below
|
|
302
|
+
blockStartLine = lastEnd + 1
|
|
286
303
|
}
|
|
287
304
|
}
|
|
288
305
|
|
|
289
|
-
const blockContent = lines.slice(
|
|
306
|
+
const blockContent = lines.slice(blockStartLine, block.endLine + 1).join("\n")
|
|
290
307
|
|
|
291
308
|
if (blockContent.length > config.max_chunk_size && block.type === "class") {
|
|
292
309
|
// Split class into methods
|
|
@@ -295,27 +312,43 @@ export function chunkCode(
|
|
|
295
312
|
)
|
|
296
313
|
|
|
297
314
|
if (methods.length > 0) {
|
|
298
|
-
let classLastEnd =
|
|
315
|
+
let classLastEnd = blockStartLine
|
|
299
316
|
|
|
300
317
|
for (const method of methods) {
|
|
301
|
-
//
|
|
318
|
+
// Gap before method — check if it's JSDoc/comments that belong to the method
|
|
319
|
+
let methodStartLine = method.startLine
|
|
302
320
|
if (method.startLine > classLastEnd + 1) {
|
|
303
|
-
const
|
|
304
|
-
|
|
321
|
+
const gapLines = lines.slice(classLastEnd + 1, method.startLine)
|
|
322
|
+
const gapContent = gapLines.join("\n").trim()
|
|
323
|
+
|
|
324
|
+
// Check if gap is ONLY comments/whitespace — if so, merge with method
|
|
325
|
+
const hasCode = gapLines.some(l => {
|
|
326
|
+
const t = l.trim()
|
|
327
|
+
return t.length > 0
|
|
328
|
+
&& !t.startsWith("//") && !t.startsWith("/*") && !t.startsWith("*") && !t.startsWith("*/")
|
|
329
|
+
&& !t.startsWith("#") && !t.startsWith("<!--")
|
|
330
|
+
&& !/^[{}()\[\];,]+$/.test(t)
|
|
331
|
+
})
|
|
332
|
+
|
|
333
|
+
if (hasCode && gapContent.length >= config.min_chunk_size) {
|
|
334
|
+
// Gap has real code (not just comments) — keep as separate chunk
|
|
305
335
|
chunks.push({
|
|
306
|
-
content:
|
|
336
|
+
content: gapContent,
|
|
307
337
|
class_name: block.name,
|
|
308
338
|
start_line: classLastEnd + 1,
|
|
309
339
|
end_line: method.startLine - 1,
|
|
310
340
|
})
|
|
341
|
+
} else {
|
|
342
|
+
// Gap is JSDoc/comments — merge with method by extending start
|
|
343
|
+
methodStartLine = classLastEnd + 1
|
|
311
344
|
}
|
|
312
345
|
}
|
|
313
346
|
|
|
314
347
|
chunks.push({
|
|
315
|
-
content: lines.slice(
|
|
348
|
+
content: lines.slice(methodStartLine, method.endLine + 1).join("\n"),
|
|
316
349
|
function_name: method.name,
|
|
317
350
|
class_name: block.name,
|
|
318
|
-
start_line:
|
|
351
|
+
start_line: methodStartLine,
|
|
319
352
|
end_line: method.endLine,
|
|
320
353
|
})
|
|
321
354
|
classLastEnd = method.endLine
|
|
@@ -344,6 +377,8 @@ export function chunkCode(
|
|
|
344
377
|
content: blockContent,
|
|
345
378
|
function_name: block.type === "function" ? block.name : undefined,
|
|
346
379
|
class_name: block.type === "class" ? block.name : block.className,
|
|
380
|
+
start_line: blockStartLine,
|
|
381
|
+
end_line: block.endLine,
|
|
347
382
|
})
|
|
348
383
|
}
|
|
349
384
|
|
|
@@ -354,7 +389,7 @@ export function chunkCode(
|
|
|
354
389
|
if (lastEnd < lines.length - 1) {
|
|
355
390
|
const trailing = lines.slice(lastEnd + 1).join("\n").trim()
|
|
356
391
|
if (trailing.length > 0) {
|
|
357
|
-
chunks.push({ content: trailing })
|
|
392
|
+
chunks.push({ content: trailing, start_line: lastEnd + 1, end_line: lines.length - 1 })
|
|
358
393
|
}
|
|
359
394
|
}
|
|
360
395
|
|
|
@@ -371,5 +406,27 @@ export function chunkCode(
|
|
|
371
406
|
}
|
|
372
407
|
}
|
|
373
408
|
|
|
374
|
-
|
|
409
|
+
// Filter out empty chunks and trivial ones (single-line comments, separators)
|
|
410
|
+
// Keep chunks with function/class names regardless of size (they're meaningful)
|
|
411
|
+
return result.filter((c) => {
|
|
412
|
+
const trimmed = c.content.trim()
|
|
413
|
+
if (trimmed.length === 0) return false
|
|
414
|
+
// Keep any chunk with a function or class name — it's a real code block
|
|
415
|
+
if (c.function_name || c.class_name) return true
|
|
416
|
+
// Filter out tiny "gap" chunks (comment separators, blank lines, single imports)
|
|
417
|
+
// These are noise that pollute search results
|
|
418
|
+
if (trimmed.length < 50) return false
|
|
419
|
+
// Filter out chunks that are ONLY comments, braces, and whitespace (no real code)
|
|
420
|
+
const meaningfulLines = trimmed.split("\n").filter(l => {
|
|
421
|
+
const t = l.trim()
|
|
422
|
+
if (t.length === 0) return false
|
|
423
|
+
// Skip comment lines
|
|
424
|
+
if (t.startsWith("//") || t.startsWith("/*") || t.startsWith("*") || t.startsWith("#") || t.startsWith("<!--")) return false
|
|
425
|
+
// Skip lines that are only braces/punctuation (closing }, ], ), etc.)
|
|
426
|
+
if (/^[{}()\[\];,]+$/.test(t)) return false
|
|
427
|
+
return true
|
|
428
|
+
})
|
|
429
|
+
if (meaningfulLines.length === 0 && trimmed.length < 300) return false
|
|
430
|
+
return true
|
|
431
|
+
})
|
|
375
432
|
}
|