opencode-codebase-index 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -64,11 +64,12 @@ src/api/checkout.ts:89 (Route handler for /pay)
64
64
  |----------|------|-----|
65
65
  | Don't know the function name | `codebase_search` | Semantic search finds by meaning |
66
66
  | Exploring unfamiliar codebase | `codebase_search` | Discovers related code across files |
67
+ | Just need to find locations | `codebase_peek` | Returns metadata only, saves ~90% tokens |
67
68
  | Know exact identifier | `grep` | Faster, finds all occurrences |
68
69
  | Need ALL matches | `grep` | Semantic returns top N only |
69
70
  | Mixed discovery + precision | `/find` (hybrid) | Best of both worlds |
70
71
 
71
- **Rule of thumb**: Semantic search for discovery → grep for precision.
72
+ **Rule of thumb**: `codebase_peek` to find locations`Read` to examine → `grep` for precision.
72
73
 
73
74
  ## 📊 Token Usage
74
75
 
@@ -187,6 +188,18 @@ The plugin exposes these tools to the OpenCode agent:
187
188
  | "code that calculates shipping costs" | "shipping" |
188
189
  | "where user permissions are checked" | "permissions" |
189
190
 
191
+ ### `codebase_peek`
192
+ **Token-efficient discovery.** Returns only metadata (file, line, name, type) without code content.
193
+ - **Use for**: Finding WHERE code is before deciding what to read. Saves ~90% tokens vs `codebase_search`.
194
+ - **Example output**:
195
+ ```
196
+ [1] function "validatePayment" at src/billing.ts:45-67 (score: 0.92)
197
+ [2] class "PaymentProcessor" at src/processor.ts:12-89 (score: 0.87)
198
+
199
+ Use Read tool to examine specific files.
200
+ ```
201
+ - **Workflow**: `codebase_peek` → find locations → `Read` specific files
202
+
190
203
  ### `index_codebase`
191
204
  Manually trigger indexing.
192
205
  - **Use for**: Forcing a re-index or checking stats.
package/dist/index.cjs CHANGED
@@ -3879,12 +3879,13 @@ var Indexer = class {
3879
3879
  keywordMs: Math.round(keywordMs * 100) / 100,
3880
3880
  fusionMs: Math.round(fusionMs * 100) / 100
3881
3881
  });
3882
+ const metadataOnly = options?.metadataOnly ?? false;
3882
3883
  return Promise.all(
3883
3884
  filtered.map(async (r) => {
3884
3885
  let content = "";
3885
3886
  let contextStartLine = r.metadata.startLine;
3886
3887
  let contextEndLine = r.metadata.endLine;
3887
- if (this.config.search.includeContext) {
3888
+ if (!metadataOnly && this.config.search.includeContext) {
3888
3889
  try {
3889
3890
  const fileContent = await import_fs4.promises.readFile(
3890
3891
  r.metadata.filePath,
@@ -4091,6 +4092,91 @@ var Indexer = class {
4091
4092
  getLogger() {
4092
4093
  return this.logger;
4093
4094
  }
4095
+ async findSimilar(code, limit, options) {
4096
+ const searchStartTime = import_perf_hooks.performance.now();
4097
+ const { store, provider, database } = await this.ensureInitialized();
4098
+ if (store.count() === 0) {
4099
+ this.logger.search("debug", "Find similar on empty index");
4100
+ return [];
4101
+ }
4102
+ const maxResults = limit ?? this.config.search.maxResults;
4103
+ const filterByBranch = options?.filterByBranch ?? true;
4104
+ this.logger.search("debug", "Starting find similar", {
4105
+ codeLength: code.length,
4106
+ maxResults,
4107
+ filterByBranch
4108
+ });
4109
+ const embeddingStartTime = import_perf_hooks.performance.now();
4110
+ const { embedding, tokensUsed } = await provider.embed(code);
4111
+ const embeddingMs = import_perf_hooks.performance.now() - embeddingStartTime;
4112
+ this.logger.recordEmbeddingApiCall(tokensUsed);
4113
+ const vectorStartTime = import_perf_hooks.performance.now();
4114
+ const semanticResults = store.search(embedding, maxResults * 2);
4115
+ const vectorMs = import_perf_hooks.performance.now() - vectorStartTime;
4116
+ let branchChunkIds = null;
4117
+ if (filterByBranch && this.currentBranch !== "default") {
4118
+ branchChunkIds = new Set(database.getBranchChunkIds(this.currentBranch));
4119
+ }
4120
+ const filtered = semanticResults.filter((r) => {
4121
+ if (r.score < this.config.search.minScore) return false;
4122
+ if (branchChunkIds && !branchChunkIds.has(r.id)) return false;
4123
+ if (options?.excludeFile) {
4124
+ if (r.metadata.filePath === options.excludeFile) return false;
4125
+ }
4126
+ if (options?.fileType) {
4127
+ const ext = r.metadata.filePath.split(".").pop()?.toLowerCase();
4128
+ if (ext !== options.fileType.toLowerCase().replace(/^\./, "")) return false;
4129
+ }
4130
+ if (options?.directory) {
4131
+ const normalizedDir = options.directory.replace(/^\/|\/$/g, "");
4132
+ if (!r.metadata.filePath.includes(`/${normalizedDir}/`) && !r.metadata.filePath.includes(`${normalizedDir}/`)) return false;
4133
+ }
4134
+ if (options?.chunkType) {
4135
+ if (r.metadata.chunkType !== options.chunkType) return false;
4136
+ }
4137
+ return true;
4138
+ }).slice(0, maxResults);
4139
+ const totalSearchMs = import_perf_hooks.performance.now() - searchStartTime;
4140
+ this.logger.recordSearch(totalSearchMs, {
4141
+ embeddingMs,
4142
+ vectorMs,
4143
+ keywordMs: 0,
4144
+ fusionMs: 0
4145
+ });
4146
+ this.logger.search("info", "Find similar complete", {
4147
+ codeLength: code.length,
4148
+ results: filtered.length,
4149
+ totalMs: Math.round(totalSearchMs * 100) / 100,
4150
+ embeddingMs: Math.round(embeddingMs * 100) / 100,
4151
+ vectorMs: Math.round(vectorMs * 100) / 100
4152
+ });
4153
+ return Promise.all(
4154
+ filtered.map(async (r) => {
4155
+ let content = "";
4156
+ if (this.config.search.includeContext) {
4157
+ try {
4158
+ const fileContent = await import_fs4.promises.readFile(
4159
+ r.metadata.filePath,
4160
+ "utf-8"
4161
+ );
4162
+ const lines = fileContent.split("\n");
4163
+ content = lines.slice(r.metadata.startLine - 1, r.metadata.endLine).join("\n");
4164
+ } catch {
4165
+ content = "[File not accessible]";
4166
+ }
4167
+ }
4168
+ return {
4169
+ filePath: r.metadata.filePath,
4170
+ startLine: r.metadata.startLine,
4171
+ endLine: r.metadata.endLine,
4172
+ content,
4173
+ score: r.score,
4174
+ chunkType: r.metadata.chunkType,
4175
+ name: r.metadata.name
4176
+ };
4177
+ })
4178
+ );
4179
+ }
4094
4180
  };
4095
4181
 
4096
4182
  // node_modules/chokidar/index.js
@@ -6032,7 +6118,7 @@ function getIndexer() {
6032
6118
  return sharedIndexer;
6033
6119
  }
6034
6120
  var codebase_search = (0, import_plugin.tool)({
6035
- description: "Search codebase by MEANING, not keywords. Use when you don't know exact function/class names. Returns focused results (5-10 files). For known identifiers like 'validateToken' or 'UserService', use grep instead - it's faster and finds all occurrences. Best for: 'find authentication logic', 'code that handles payments', 'error middleware'.",
6121
+ description: "Search codebase by MEANING, not keywords. Returns full code content. Use when you need to see actual implementation. For just finding WHERE code is (saves ~90% tokens), use codebase_peek instead. For known identifiers like 'validateToken', use grep - it's faster.",
6036
6122
  args: {
6037
6123
  query: z.string().describe("Natural language description of what code you're looking for. Describe behavior, not syntax."),
6038
6124
  limit: z.number().optional().default(10).describe("Maximum number of results to return"),
@@ -6064,6 +6150,38 @@ ${r.content}
6064
6150
  ${formatted.join("\n\n")}`;
6065
6151
  }
6066
6152
  });
6153
+ var codebase_peek = (0, import_plugin.tool)({
6154
+ description: "Quick lookup of code locations by meaning. Returns only metadata (file, line, name, type) WITHOUT code content. Use this first to find WHERE code is, then use Read tool to examine specific files. Saves tokens by not returning full code blocks. Best for: discovery, navigation, finding multiple related locations.",
6155
+ args: {
6156
+ query: z.string().describe("Natural language description of what code you're looking for."),
6157
+ limit: z.number().optional().default(10).describe("Maximum number of results to return"),
6158
+ fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
6159
+ directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
6160
+ chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type")
6161
+ },
6162
+ async execute(args) {
6163
+ const indexer = getIndexer();
6164
+ const results = await indexer.search(args.query, args.limit ?? 10, {
6165
+ fileType: args.fileType,
6166
+ directory: args.directory,
6167
+ chunkType: args.chunkType,
6168
+ metadataOnly: true
6169
+ });
6170
+ if (results.length === 0) {
6171
+ return "No matching code found. Try a different query or run index_codebase first.";
6172
+ }
6173
+ const formatted = results.map((r, idx) => {
6174
+ const location = `${r.filePath}:${r.startLine}-${r.endLine}`;
6175
+ const name = r.name ? `"${r.name}"` : "(anonymous)";
6176
+ return `[${idx + 1}] ${r.chunkType} ${name} at ${location} (score: ${r.score.toFixed(2)})`;
6177
+ });
6178
+ return `Found ${results.length} locations for "${args.query}":
6179
+
6180
+ ${formatted.join("\n")}
6181
+
6182
+ Use Read tool to examine specific files.`;
6183
+ }
6184
+ });
6067
6185
  var index_codebase = (0, import_plugin.tool)({
6068
6186
  description: "Index the codebase for semantic search. Creates vector embeddings of code chunks. Incremental - only re-indexes changed files (~50ms when nothing changed). Run before first codebase_search.",
6069
6187
  args: {
@@ -6175,6 +6293,39 @@ var index_logs = (0, import_plugin.tool)({
6175
6293
  }).join("\n");
6176
6294
  }
6177
6295
  });
6296
+ var find_similar = (0, import_plugin.tool)({
6297
+ description: "Find code similar to a given snippet. Use for duplicate detection, pattern discovery, or refactoring prep. Paste code and find semantically similar implementations elsewhere in the codebase.",
6298
+ args: {
6299
+ code: z.string().describe("The code snippet to find similar code for"),
6300
+ limit: z.number().optional().default(10).describe("Maximum number of results to return"),
6301
+ fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
6302
+ directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
6303
+ chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type"),
6304
+ excludeFile: z.string().optional().describe("Exclude results from this file path (useful when searching for duplicates of code from a specific file)")
6305
+ },
6306
+ async execute(args) {
6307
+ const indexer = getIndexer();
6308
+ const results = await indexer.findSimilar(args.code, args.limit ?? 10, {
6309
+ fileType: args.fileType,
6310
+ directory: args.directory,
6311
+ chunkType: args.chunkType,
6312
+ excludeFile: args.excludeFile
6313
+ });
6314
+ if (results.length === 0) {
6315
+ return "No similar code found. Try a different snippet or run index_codebase first.";
6316
+ }
6317
+ const formatted = results.map((r, idx) => {
6318
+ const header = r.name ? `[${idx + 1}] ${r.chunkType} "${r.name}" in ${r.filePath}:${r.startLine}-${r.endLine}` : `[${idx + 1}] ${r.chunkType} in ${r.filePath}:${r.startLine}-${r.endLine}`;
6319
+ return `${header} (similarity: ${(r.score * 100).toFixed(1)}%)
6320
+ \`\`\`
6321
+ ${r.content}
6322
+ \`\`\``;
6323
+ });
6324
+ return `Found ${results.length} similar code blocks:
6325
+
6326
+ ${formatted.join("\n\n")}`;
6327
+ }
6328
+ });
6178
6329
  function formatIndexStats(stats, verbose = false) {
6179
6330
  const lines = [];
6180
6331
  if (stats.indexedChunks === 0 && stats.removedChunks === 0) {
@@ -6358,11 +6509,13 @@ var plugin = async ({ directory }) => {
6358
6509
  return {
6359
6510
  tool: {
6360
6511
  codebase_search,
6512
+ codebase_peek,
6361
6513
  index_codebase,
6362
6514
  index_status,
6363
6515
  index_health_check,
6364
6516
  index_metrics,
6365
- index_logs
6517
+ index_logs,
6518
+ find_similar
6366
6519
  },
6367
6520
  async config(cfg) {
6368
6521
  cfg.command = cfg.command ?? {};