opencodekit 0.23.1 → 0.23.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/index.js +354 -825
  2. package/dist/template/.opencode/AGENTS.md +15 -2
  3. package/dist/template/.opencode/command/init.md +198 -34
  4. package/dist/template/.opencode/context/fallow.md +137 -0
  5. package/dist/template/.opencode/opencode.json +12 -315
  6. package/dist/template/.opencode/plugin/codesearch.ts +730 -0
  7. package/dist/template/.opencode/plugin/memory/compile.ts +171 -186
  8. package/dist/template/.opencode/plugin/memory/index-generator.ts +118 -133
  9. package/dist/template/.opencode/plugin/memory/lint.ts +253 -275
  10. package/dist/template/.opencode/plugin/memory/tools.ts +224 -268
  11. package/dist/template/.opencode/plugin/memory/validate.ts +154 -164
  12. package/dist/template/.opencode/plugin/sdk/copilot/responses/tool/web-search-preview.ts +13 -30
  13. package/dist/template/.opencode/plugin/sdk/copilot/responses/tool/web-search-shared.ts +25 -0
  14. package/dist/template/.opencode/plugin/sdk/copilot/responses/tool/web-search.ts +17 -34
  15. package/dist/template/.opencode/plugin/session-summary.ts +0 -2
  16. package/dist/template/.opencode/plugin/srcwalk.ts +646 -667
  17. package/dist/template/.opencode/skill/code-navigation/SKILL.md +10 -10
  18. package/dist/template/.opencode/skill/code-review-and-quality/SKILL.md +1 -1
  19. package/dist/template/.opencode/skill/condition-based-waiting/example.ts +15 -2
  20. package/dist/template/.opencode/skill/debugging-and-error-recovery/SKILL.md +1 -1
  21. package/dist/template/.opencode/skill/deep-module-design/SKILL.md +1 -1
  22. package/dist/template/.opencode/skill/fallow/SKILL.md +409 -0
  23. package/dist/template/.opencode/skill/fallow/references/cli-reference.md +1905 -0
  24. package/dist/template/.opencode/skill/fallow/references/gotchas.md +644 -0
  25. package/dist/template/.opencode/skill/fallow/references/patterns.md +791 -0
  26. package/dist/template/.opencode/skill/planning-and-task-breakdown/SKILL.md +1 -1
  27. package/dist/template/.opencode/skill/srcwalk/SKILL.md +10 -13
  28. package/dist/template/.opencode/skill/ubiquitous-language/SKILL.md +1 -1
  29. package/dist/template/.opencode/tool/grepsearch.ts +92 -103
  30. package/package.json +1 -1
@@ -0,0 +1,730 @@
1
+ /**
2
+ * Csearch Plugin — Multi-Keyword Code Chunk Search with BM25 Ranking
3
+ *
4
+ * Bridges natural language queries to structural code chunk retrieval.
5
+ * The agent's LLM provides semantic query expansion; this tool provides
6
+ * broad multi-keyword retrieval with function-level chunk extraction
7
+ * and BM25 relevance ranking.
8
+ *
9
+ * Design philosophy:
10
+ * - Zero infrastructure: no embeddings, no vector DB, no network
11
+ * - Built-in `grep` handles exact-pattern search; this fills the gap for
12
+ * multi-keyword discovery with ranked results
13
+ * - Function-level chunk extraction returns complete code units, not fragments
14
+ * - BM25 scoring provides principled relevance ranking
15
+ * - AbortSignal-aware: respects OpenCode's cancellation like built-in grep
16
+ * - Glob filter: matches built-in grep's flexibility
17
+ */
18
+
19
+ import { execFileSync } from "node:child_process";
20
+ import { existsSync, readFileSync } from "node:fs";
21
+ import path from "node:path";
22
+ import { tool } from "@opencode-ai/plugin/tool";
23
+ import type { Plugin } from "@opencode-ai/plugin";
24
+
25
+ // ---------------------------------------------------------------------------
26
+ // Types
27
+ // ---------------------------------------------------------------------------
28
+
29
+ /** A single match from ripgrep output */
30
+ interface RawMatch {
31
+ file: string;
32
+ line: number;
33
+ text: string;
34
+ }
35
+
36
+ /** A code chunk: a function/class body or relevant snippet extracted from a file */
37
+ interface CodeChunk {
38
+ file: string;
39
+ relPath: string;
40
+ startLine: number;
41
+ endLine: number;
42
+ text: string;
43
+ matchedKeywords: string[];
44
+ score: number;
45
+ }
46
+
47
+ // ---------------------------------------------------------------------------
48
+ // Helpers
49
+ // ---------------------------------------------------------------------------
50
+
51
+ const TIMEOUT_MS = 15_000;
52
+ const MAX_BUFFER = 8 * 1024 * 1024;
53
+
54
+ /** Run a command and return output. Returns empty on non-zero exit. */
55
+ function run(cmd: string, args: string[], cwd?: string): { stdout: string; code: number } {
56
+ try {
57
+ const result = execFileSync(cmd, args, {
58
+ encoding: "utf-8" as const,
59
+ timeout: TIMEOUT_MS,
60
+ maxBuffer: MAX_BUFFER,
61
+ cwd: cwd ?? process.cwd(),
62
+ stdio: ["ignore", "pipe", "pipe"],
63
+ });
64
+ return { stdout: result as string, code: 0 };
65
+ } catch {
66
+ return { stdout: "", code: 1 };
67
+ }
68
+ }
69
+
70
+ /**
71
+ * Split a query into search keywords.
72
+ *
73
+ * Strategy:
74
+ * 1. Split by whitespace
75
+ * 2. Split camelCase identifiers into constituent words
76
+ * 3. Deduplicate
77
+ *
78
+ * The agent is expected to provide specific, relevant keywords directly.
79
+ * Stop words are not stripped — the agent controls query quality.
80
+ */
81
+ function expandQuery(query: string): string[] {
82
+ const keywords = new Set<string>();
83
+ const rawParts = query.trim().split(/\s+/);
84
+
85
+ for (const part of rawParts) {
86
+ if (part.length === 0) continue;
87
+
88
+ // Add whole token (useful for exact symbol names like validateToken)
89
+ keywords.add(part);
90
+
91
+ // Split camelCase: "validateToken" → "validate", "Token"
92
+ const camelParts = part.split(/(?<=[a-z])(?=[A-Z])/);
93
+ for (const c of camelParts) {
94
+ const cleaned = c.toLowerCase().replace(/^[^a-z0-9]+|[^a-z0-9]+$/g, "");
95
+ if (cleaned.length > 1 && cleaned !== part.toLowerCase()) {
96
+ keywords.add(cleaned);
97
+ }
98
+ }
99
+ }
100
+
101
+ return Array.from(keywords);
102
+ }
103
+
104
+ /** Parse ripgrep output into structured matches. */
105
+ function parseMatches(raw: string): RawMatch[] {
106
+ const results: RawMatch[] = [];
107
+ for (const line of raw.split("\n").filter(Boolean)) {
108
+ const colonIdx = line.indexOf(":");
109
+ if (colonIdx === -1) continue;
110
+ const file = line.slice(0, colonIdx);
111
+ const rest = line.slice(colonIdx + 1);
112
+ const lineBreak = rest.indexOf(":");
113
+ const lineNum = lineBreak > 0 ? parseInt(rest.slice(0, lineBreak), 10) : NaN;
114
+ const text =
115
+ lineBreak > 0
116
+ ? rest
117
+ .slice(lineBreak + 1)
118
+ .trim()
119
+ .slice(0, 200)
120
+ : rest.trim().slice(0, 200);
121
+ if (!isNaN(lineNum)) {
122
+ results.push({ file, line: lineNum, text });
123
+ }
124
+ }
125
+ return results;
126
+ }
127
+
128
+ /**
129
+ * Run a single keyword search via ripgrep.
130
+ * All file type detection is left to rg's built-in binary detection + .gitignore.
131
+ */
132
+ function searchKeyword(
133
+ keyword: string,
134
+ scopeDir: string,
135
+ limit: number,
136
+ signal?: AbortSignal,
137
+ glob?: string,
138
+ ): RawMatch[] {
139
+ if (signal?.aborted) return [];
140
+
141
+ const rgArgs: string[] = [
142
+ "--no-heading",
143
+ "--line-number",
144
+ "--color",
145
+ "never",
146
+ "-F",
147
+ "-i",
148
+ "--max-count",
149
+ String(limit),
150
+ ];
151
+ if (glob) {
152
+ rgArgs.push("--glob", glob);
153
+ }
154
+ rgArgs.push("--", keyword, scopeDir);
155
+
156
+ const result = run("rg", rgArgs);
157
+ return parseMatches(result.stdout);
158
+ }
159
+
160
+ // ---------------------------------------------------------------------------
161
+ // Code Chunk Extraction
162
+ // ---------------------------------------------------------------------------
163
+
164
+ /**
165
+ * Pattern-based function/class/interface/enum declaration detection.
166
+ * Covers the common patterns in TypeScript/JavaScript.
167
+ */
168
+ const DECLARATION_PATTERNS = [
169
+ // function name( or export function name( or async function name(
170
+ /^\s*(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+\*?(?:\s*\w+\s*)?[<(]/,
171
+ // class Name or export default class Name or abstract class Name
172
+ /^\s*(?:export\s+)?(?:default\s+)?(?:abstract\s+)?class\s+\w+/,
173
+ // interface Name
174
+ /^\s*(?:export\s+)?interface\s+\w+/,
175
+ // type Name =
176
+ /^\s*(?:export\s+)?type\s+\w+\s*=/,
177
+ // enum Name
178
+ /^\s*(?:export\s+)?enum\s+\w+/,
179
+ // const name = (args) => { or const name: Type = (args) => {
180
+ /^\s*(?:export\s+)?(?:const|let|var)\s+\w+\s*(?::\s*\w+\s*)?=\s*(?:async\s+)?\(.*\)\s*(?::\s*\w+\s*)?=>\s*\{/,
181
+ // const name = function ...
182
+ /^\s*(?:export\s+)?(?:const|let|var)\s+\w+\s*=\s*(?:async\s+)?function\b/,
183
+ // get/set accessors
184
+ /^\s*(?:get|set)\s+\w+\s*\(/,
185
+ ];
186
+
187
+ /** Check if a line contains a known declaration keyword that opens a block */
188
+ function isDeclarationLine(line: string): boolean {
189
+ return DECLARATION_PATTERNS.some((p) => p.test(line));
190
+ }
191
+
192
+ /**
193
+ * Check if a line looks like a method declaration inside a class.
194
+ * e.g., `methodName(params) {` or `methodName<T>(params): Type {`
195
+ */
196
+ function isMethodLine(line: string): boolean {
197
+ const trimmed = line.trim();
198
+ if (!trimmed.endsWith("{")) return false;
199
+ // Must match: word, optional generics, parens, optional return type, opening brace
200
+ if (!/^\w+\s*(?:<[^>]*>)?\s*\(/.test(trimmed)) return false;
201
+ // Exclude control flow keywords
202
+ if (/^(if|for|while|switch|catch|return|throw|import|export)\b/.test(trimmed)) return false;
203
+ return true;
204
+ }
205
+
206
+ /** Count occurrences of a character in a string */
207
+ function countChar(s: string, ch: string): number {
208
+ let count = 0;
209
+ for (let i = 0; i < s.length; i++) {
210
+ if (s[i] === ch) count++;
211
+ }
212
+ return count;
213
+ }
214
+
215
+ /**
216
+ * Find the closing brace for a block starting at `startIdx`.
217
+ * Returns the index of the line containing the matching closing brace.
218
+ */
219
+ function findBlockEnd(lines: string[], startIdx: number): number {
220
+ let depth = 0;
221
+ let started = false;
222
+ let inBlockComment = false;
223
+
224
+ for (let i = startIdx; i < lines.length; i++) {
225
+ let line = lines[i];
226
+
227
+ // Skip lines entirely inside a /* */ block comment
228
+ if (inBlockComment) {
229
+ const endIdx = line.indexOf("*/");
230
+ if (endIdx !== -1) {
231
+ inBlockComment = false;
232
+ line = line.slice(endIdx + 2);
233
+ } else {
234
+ continue;
235
+ }
236
+ }
237
+
238
+ // Remove string literals before counting braces in them
239
+ line = line
240
+ .replace(/"(?:[^"\\]|\\.)*"/g, "") // double-quoted strings
241
+ .replace(/'(?:[^'\\]|\\.)*'/g, "") // single-quoted strings
242
+ .replace(/`(?:[^`\\]|\\.)*`/g, ""); // template literals
243
+
244
+ // Remove //-style single-line comments
245
+ line = line.replace(/\/\/.*$/, "");
246
+
247
+ // Handle /* */ block comments that start and end on the same line
248
+ line = line.replace(/\/\*.*?\*\//g, "");
249
+
250
+ // Handle /* that opened on this line and spans forward
251
+ const blockStartIdx = line.indexOf("/*");
252
+ if (blockStartIdx !== -1) {
253
+ inBlockComment = true;
254
+ line = line.slice(0, blockStartIdx);
255
+ }
256
+
257
+ const opens = countChar(line, "{");
258
+ const closes = countChar(line, "}");
259
+ if (opens > 0 || closes > 0) started = true;
260
+ depth += opens - closes;
261
+ if (started && depth === 0) return i;
262
+ }
263
+ return lines.length - 1;
264
+ }
265
+
266
+ /**
267
+ * For a match at `targetLine` (1-based), find the enclosing function/class
268
+ * declaration and extract it as a chunk. Returns null if no enclosing
269
+ * declaration is found (e.g., the match is in file-level code).
270
+ */
271
+ function extractEnclosingChunk(
272
+ fileLines: string[],
273
+ targetLine: number,
274
+ ): { startLine: number; endLine: number; text: string } | null {
275
+ const targetIdx = targetLine - 1; // convert to 0-based
276
+
277
+ // Scan upward from target to find enclosing declaration
278
+ for (let i = targetIdx; i >= 0; i--) {
279
+ const line = fileLines[i];
280
+
281
+ // If we hit a closing brace at start of line, we crossed a
282
+ // function boundary going upward. Stop — the match is not inside
283
+ // a function body we can cleanly extract.
284
+ const trimmedStart = line.trimStart();
285
+ if (trimmedStart.startsWith("}") && i < targetIdx) {
286
+ break;
287
+ }
288
+
289
+ if (isDeclarationLine(line) || isMethodLine(line)) {
290
+ // Check if this line opens a block — the opening brace may be on
291
+ // the same line (e.g., `function foo() {`) or a subsequent line
292
+ // (e.g., `function foo(\n ...\n): Result {`).
293
+ let braceBalance = 0;
294
+ for (let j = i; j < fileLines.length; j++) {
295
+ braceBalance += countChar(fileLines[j], "{") - countChar(fileLines[j], "}");
296
+ if (braceBalance > 0) break;
297
+ if (j - i > 20) break; // safety limit
298
+ }
299
+ const hasArrow = line.includes("=>");
300
+
301
+ if (braceBalance > 0 || hasArrow) {
302
+ const endLine = findBlockEnd(fileLines, i);
303
+ const text = fileLines.slice(i, endLine + 1).join("\n");
304
+ return { startLine: i + 1, endLine: endLine + 1, text };
305
+ }
306
+ }
307
+ }
308
+
309
+ // Also try if the match line itself is a declaration
310
+ const line = fileLines[targetIdx];
311
+ if (isDeclarationLine(line) || isMethodLine(line)) {
312
+ let braceBalance = 0;
313
+ for (let j = targetIdx; j < fileLines.length; j++) {
314
+ braceBalance += countChar(fileLines[j], "{") - countChar(fileLines[j], "}");
315
+ if (braceBalance > 0) break;
316
+ if (j - targetIdx > 20) break;
317
+ }
318
+ const hasArrow = line.includes("=>");
319
+
320
+ if (braceBalance > 0 || hasArrow) {
321
+ const endLine = findBlockEnd(fileLines, targetIdx);
322
+ const text = fileLines.slice(targetIdx, endLine + 1).join("\n");
323
+ return { startLine: targetIdx + 1, endLine: endLine + 1, text };
324
+ }
325
+ }
326
+
327
+ return null;
328
+ }
329
+
330
+ // ---------------------------------------------------------------------------
331
+ // BM25 Ranking
332
+ // ---------------------------------------------------------------------------
333
+
334
+ const BM25_K1 = 1.5;
335
+ const BM25_B = 0.75;
336
+
337
+ /**
338
+ * Compute BM25 score for a single chunk.
339
+ *
340
+ * @param chunkText - the text of the code chunk
341
+ * @param keywords - all search keywords
342
+ * @param keywordFreqInChunk - map of keyword → frequency in this chunk
343
+ * @param keywordDocCount - map of keyword → number of chunks containing it
344
+ * @param totalChunks - total number of candidate chunks
345
+ * @param avgChunkLen - average chunk length in words
346
+ */
347
+ function bm25Score(
348
+ chunkText: string,
349
+ keywords: string[],
350
+ keywordFreqInChunk: Map<string, number>,
351
+ keywordDocCount: Map<string, number>,
352
+ totalChunks: number,
353
+ avgChunkLen: number,
354
+ ): number {
355
+ const docLen = chunkText.split(/\s+/).length;
356
+ let score = 0;
357
+
358
+ for (const kw of keywords) {
359
+ const tf = keywordFreqInChunk.get(kw) ?? 0;
360
+ if (tf === 0) continue;
361
+
362
+ const df = keywordDocCount.get(kw) ?? 1;
363
+ const idf = Math.log((totalChunks - df + 0.5) / (df + 0.5) + 1);
364
+
365
+ score +=
366
+ idf *
367
+ ((tf * (BM25_K1 + 1)) / (tf + BM25_K1 * (1 - BM25_B + BM25_B * (docLen / avgChunkLen))));
368
+ }
369
+
370
+ return score;
371
+ }
372
+
373
+ // ---------------------------------------------------------------------------
374
+ // Plugin
375
+ // ---------------------------------------------------------------------------
376
+
377
+ export const CsearchPlugin: Plugin = async () => {
378
+ return {
379
+ tool: {
380
+ csearch: tool({
381
+ description: `Search codebase by multiple keywords and return ranked function-level code chunks.
382
+
383
+ Like ripgrep, this respects .gitignore — node_modules, dist/, and other ignored directories are automatically excluded.
384
+
385
+ How it works:
386
+ - Provide specific search keywords separated by spaces
387
+ - Each keyword is searched independently via ripgrep
388
+ - Results are grouped into function/class-level code chunks
389
+ - Chunks are ranked by BM25 relevance (best match first)
390
+
391
+ Key differences from grep:
392
+ - grep: exact pattern match, returns single lines → use when you know the symbol name
393
+ - csearch: multi-keyword, returns complete function bodies → use for discovery
394
+
395
+ Best practices:
396
+ - Instead of "how is auth handled", provide: "auth token login session middleware"
397
+ - Instead of "db queries", provide: "database query insert select connection pool"
398
+ - Include synonyms: "user authentication" → "user auth login token session"
399
+
400
+ Returns chunks with file paths, line ranges, and complete source code.`,
401
+ args: {
402
+ query: tool.schema
403
+ .string()
404
+ .describe(
405
+ "Space-separated search keywords. Be specific: 'auth token login jwt' not 'security stuff'. Include synonyms where relevant.",
406
+ ),
407
+ scope: tool.schema
408
+ .string()
409
+ .optional()
410
+ .describe(
411
+ "Subdirectory to search within (e.g., 'src/', 'packages/') — defaults to project root",
412
+ ),
413
+ glob: tool.schema
414
+ .string()
415
+ .optional()
416
+ .describe("Optional glob pattern to filter file types, e.g. '*.ts' or '**/*.test.ts'."),
417
+ max_results: tool.schema
418
+ .number()
419
+ .optional()
420
+ .describe("Maximum chunks to return (default: 15, max: 30)"),
421
+ },
422
+ execute: async (args, context) => {
423
+ const query = String(args.query ?? "").trim();
424
+ if (!query) return "query is required.";
425
+
426
+ const scopeArg = args.scope ? String(args.scope).trim() : "";
427
+ const scopeDir = scopeArg ? path.resolve(context.directory, scopeArg) : context.directory;
428
+ const maxResults = Math.min(args.max_results ?? 15, 30);
429
+ const glob = args.glob ? String(args.glob).trim() : undefined;
430
+ const signal = context.abort;
431
+
432
+ if (!existsSync(scopeDir)) {
433
+ return `Scope directory not found: ${scopeArg || "."}`;
434
+ }
435
+
436
+ // Ensure ripgrep is available
437
+ try {
438
+ execFileSync("rg", ["--version"], {
439
+ encoding: "utf-8" as const,
440
+ timeout: 2000,
441
+ stdio: "ignore",
442
+ });
443
+ } catch {
444
+ return (
445
+ `ripgrep (rg) is required but not found — install it with:\n` +
446
+ ` brew install rg\n\n` +
447
+ `rg powers the built-in grep and glob tools too; it's part of OpenCode's standard toolchain.`
448
+ );
449
+ }
450
+
451
+ // 1. Expand query into individual keywords
452
+ const keywords = expandQuery(query);
453
+
454
+ if (keywords.length === 0) {
455
+ return `No search keywords extracted from: "${query}"`;
456
+ }
457
+
458
+ // 2. Search each keyword via ripgrep
459
+ const perKeywordLimit = Math.ceil(maxResults * 3); // generous per-keyword cap
460
+ const rawMatches: RawMatch[] = [];
461
+ const matchKeywordMap = new Map<string, string[]>(); // "file:line" → keywords
462
+ let abortedEarly = false;
463
+
464
+ for (const keyword of keywords) {
465
+ if (signal?.aborted) {
466
+ abortedEarly = true;
467
+ break;
468
+ }
469
+
470
+ const matches = searchKeyword(keyword, scopeDir, perKeywordLimit, signal, glob);
471
+
472
+ for (const m of matches) {
473
+ const key = `${m.file}:${m.line}`;
474
+ const existing = matchKeywordMap.get(key);
475
+ if (existing) {
476
+ if (!existing.includes(keyword)) existing.push(keyword);
477
+ } else {
478
+ matchKeywordMap.set(key, [keyword]);
479
+ rawMatches.push(m);
480
+ }
481
+ }
482
+ }
483
+
484
+ if (rawMatches.length === 0) {
485
+ const prefix = abortedEarly ? "Search was cancelled before completing.\n" : "";
486
+ return (
487
+ prefix +
488
+ `No results found for keywords: ${keywords.join(", ")}\n\n` +
489
+ `Tips:\n` +
490
+ ` - Use broader terms\n` +
491
+ ` - Use grep(pattern: "...") with exact symbol names if you know them\n` +
492
+ ` - Check if the scope is correct (current: ${scopeArg || "project root"})`
493
+ );
494
+ }
495
+
496
+ // 3. Group matches by file
497
+ const fileMatches = new Map<string, RawMatch[]>();
498
+ for (const m of rawMatches) {
499
+ const existing = fileMatches.get(m.file);
500
+ if (existing) existing.push(m);
501
+ else fileMatches.set(m.file, [m]);
502
+ }
503
+
504
+ // 4. Extract code chunks from each matched file
505
+ const chunks: CodeChunk[] = [];
506
+ const keywordDocCount = new Map<string, number>(); // keyword → chunks containing it
507
+
508
+ for (const [filePath, matches] of fileMatches) {
509
+ const absPath = path.resolve(context.directory, filePath);
510
+ let fileLines: string[];
511
+ try {
512
+ const content = readFileSync(absPath, "utf-8");
513
+ fileLines = content.split("\n");
514
+ } catch {
515
+ // Skip files we can't read
516
+ continue;
517
+ }
518
+
519
+ // Sort matches by line number for stable processing
520
+ matches.sort((a, b) => a.line - b.line);
521
+
522
+ // Track which lines we've already assigned to a chunk
523
+ const assignedLines = new Set<number>();
524
+
525
+ for (const m of matches) {
526
+ if (assignedLines.has(m.line)) continue;
527
+
528
+ const chunk = extractEnclosingChunk(fileLines, m.line);
529
+
530
+ if (chunk) {
531
+ // Check if this chunk overlaps with an already-found chunk
532
+ const overlaps = chunks.some(
533
+ (c) => c.file === filePath && c.startLine === chunk.startLine,
534
+ );
535
+ if (overlaps) {
536
+ assignedLines.add(m.line);
537
+ continue;
538
+ }
539
+
540
+ const matchedKeywords = matchKeywordMap.get(`${filePath}:${m.line}`) ?? [];
541
+
542
+ // Collect all matched keywords for this chunk
543
+ const chunkKeywords = new Set(matchedKeywords);
544
+ for (const other of matches) {
545
+ if (other.line === m.line) continue;
546
+ if (other.line >= chunk.startLine && other.line <= chunk.endLine) {
547
+ const otherKws = matchKeywordMap.get(`${filePath}:${other.line}`) ?? [];
548
+ for (const kw of otherKws) chunkKeywords.add(kw);
549
+ assignedLines.add(other.line);
550
+ }
551
+ }
552
+
553
+ const kwArr = Array.from(chunkKeywords);
554
+ const relPath = path.relative(context.directory, filePath);
555
+
556
+ // Count which chunks contain each keyword (for IDF)
557
+ for (const kw of kwArr) {
558
+ keywordDocCount.set(kw, (keywordDocCount.get(kw) ?? 0) + 1);
559
+ }
560
+
561
+ chunks.push({
562
+ file: filePath,
563
+ relPath,
564
+ startLine: chunk.startLine,
565
+ endLine: chunk.endLine,
566
+ text: chunk.text,
567
+ matchedKeywords: kwArr,
568
+ score: 0, // computed below
569
+ });
570
+
571
+ assignedLines.add(m.line);
572
+ } else {
573
+ // No enclosing function found — create a minimal chunk from context lines
574
+ const relPath = path.relative(context.directory, filePath);
575
+ const matchedKeywords = matchKeywordMap.get(`${filePath}:${m.line}`) ?? [];
576
+ const contextStart = Math.max(0, m.line - 4);
577
+ const contextEnd = Math.min(fileLines.length, m.line + 3);
578
+ const contextText = fileLines.slice(contextStart, contextEnd).join("\n");
579
+
580
+ for (const kw of matchedKeywords) {
581
+ keywordDocCount.set(kw, (keywordDocCount.get(kw) ?? 0) + 1);
582
+ }
583
+
584
+ chunks.push({
585
+ file: filePath,
586
+ relPath,
587
+ startLine: contextStart + 1,
588
+ endLine: contextEnd,
589
+ text: contextText,
590
+ matchedKeywords,
591
+ score: 0,
592
+ });
593
+
594
+ assignedLines.add(m.line);
595
+ }
596
+ }
597
+ }
598
+
599
+ if (chunks.length === 0) {
600
+ return "No code chunks could be extracted from the matched files.";
601
+ }
602
+
603
+ // 5. BM25 score all chunks
604
+ const totalChunks = chunks.length;
605
+ const avgChunkLen =
606
+ chunks.reduce((sum, c) => sum + c.text.split(/\s+/).length, 0) / totalChunks;
607
+
608
+ for (const chunk of chunks) {
609
+ // Count keyword frequency IN this chunk
610
+ const kwFreq = new Map<string, number>();
611
+ const lowerChunk = chunk.text.toLowerCase();
612
+ for (const kw of chunk.matchedKeywords) {
613
+ // Count occurrences of the keyword as a word boundary match
614
+ const escaped = kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
615
+ const regex = new RegExp(`\\b${escaped}\\b`, "gi");
616
+ const matches_arr = lowerChunk.match(regex);
617
+ const count = matches_arr ? matches_arr.length : 0;
618
+ kwFreq.set(kw, count);
619
+ }
620
+
621
+ chunk.score = bm25Score(
622
+ chunk.text,
623
+ keywords,
624
+ kwFreq,
625
+ keywordDocCount,
626
+ totalChunks,
627
+ avgChunkLen,
628
+ );
629
+ }
630
+
631
+ // 6. Sort by BM25 score descending
632
+ chunks.sort((a, b) => b.score - a.score);
633
+
634
+ // 7. Return top-N chunks
635
+ const topChunks = chunks.slice(0, maxResults);
636
+
637
+ const output: string[] = [
638
+ `# Csearch: ${query}`,
639
+ `Keywords: ${keywords.join(", ")}`,
640
+ `Found ${plural(chunks.length, "chunk")} across ${plural(fileMatches.size, "file")}`,
641
+ `Showing top ${topChunks.length} ${abortedEarly ? "(partial — search cancelled) " : ""}`,
642
+ "",
643
+ ];
644
+
645
+ // Group by file
646
+ const fileGroups = new Map<string, CodeChunk[]>();
647
+ for (const chunk of topChunks) {
648
+ const existing = fileGroups.get(chunk.relPath);
649
+ if (existing) existing.push(chunk);
650
+ else fileGroups.set(chunk.relPath, [chunk]);
651
+ }
652
+
653
+ for (const [filePath, fileChunks] of fileGroups) {
654
+ output.push(`## ${filePath} — ${plural(fileChunks.length, "chunk")}`);
655
+ output.push("");
656
+
657
+ for (const chunk of fileChunks) {
658
+ const name = guessChunkName(chunk.text, chunk.startLine);
659
+ output.push(
660
+ `### ${name} (L${chunk.startLine}-L${chunk.endLine}) — score: ${chunk.score.toFixed(1)}, keywords: [${chunk.matchedKeywords.join(", ")}]`,
661
+ );
662
+ output.push("```typescript");
663
+ output.push(chunk.text);
664
+ output.push("```");
665
+ output.push("");
666
+ }
667
+ }
668
+
669
+ output.push(
670
+ `> Next: drill into a chunk with srcwalk_read({ path: "${topChunks[0]?.relPath ?? "file.ts"}:${topChunks[0]?.startLine ?? 1}" })`,
671
+ );
672
+ output.push(`> Next: search exact symbol names with grep({ pattern: "symbolName" })`);
673
+
674
+ return output.join("\n");
675
+ },
676
+ }),
677
+ },
678
+ };
679
+ };
680
+
681
+ // ---------------------------------------------------------------------------
682
+ // Formatting Helpers
683
+ // ---------------------------------------------------------------------------
684
+
685
+ function plural(n: number, word: string): string {
686
+ if (n === 1) return `${n} ${word}`;
687
+ if (
688
+ word.endsWith("ch") ||
689
+ word.endsWith("s") ||
690
+ word.endsWith("sh") ||
691
+ word.endsWith("x") ||
692
+ word.endsWith("z")
693
+ ) {
694
+ return `${n} ${word}es`;
695
+ }
696
+ if (
697
+ word.endsWith("y") &&
698
+ word.length > 1 &&
699
+ !["a", "e", "i", "o", "u"].includes(word[word.length - 2])
700
+ ) {
701
+ return `${n} ${word.slice(0, -1)}ies`;
702
+ }
703
+ return `${n} ${word}s`;
704
+ }
705
+
706
+ /**
707
+ * Guess the name of a code chunk from its first lines.
708
+ * Used to label chunks in the output.
709
+ */
710
+ function guessChunkName(text: string, startLine: number): string {
711
+ const firstLine = text.split("\n")[0]?.trim() ?? "";
712
+ // Extract name from: function name( ... export function name( ... class name
713
+ const fnMatch = firstLine.match(/(?:function|class|interface|type|enum)\s+(\w+)/);
714
+ if (fnMatch) return fnMatch[1]!;
715
+
716
+ // Extract from: const name = ( ... export const name = ( ...
717
+ const constMatch = firstLine.match(/(?:export\s+)?(?:const|let|var)\s+(\w+)/);
718
+ if (constMatch) return constMatch[1]!;
719
+
720
+ // Method or getter/setter
721
+ const methodMatch = firstLine.match(/^\s*(get|set)\s+(\w+)/);
722
+ if (methodMatch) return `${methodMatch[1]!} ${methodMatch[2]!}`;
723
+
724
+ const methodName = firstLine.match(/^\s*(\w+)\s*\(/);
725
+ if (methodName) return methodName[1]!;
726
+
727
+ return `chunk at L${startLine}`;
728
+ }
729
+
730
+ export default CsearchPlugin;