opencode-codebase-index 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3873,12 +3873,13 @@ var Indexer = class {
3873
3873
  keywordMs: Math.round(keywordMs * 100) / 100,
3874
3874
  fusionMs: Math.round(fusionMs * 100) / 100
3875
3875
  });
3876
+ const metadataOnly = options?.metadataOnly ?? false;
3876
3877
  return Promise.all(
3877
3878
  filtered.map(async (r) => {
3878
3879
  let content = "";
3879
3880
  let contextStartLine = r.metadata.startLine;
3880
3881
  let contextEndLine = r.metadata.endLine;
3881
- if (this.config.search.includeContext) {
3882
+ if (!metadataOnly && this.config.search.includeContext) {
3882
3883
  try {
3883
3884
  const fileContent = await fsPromises2.readFile(
3884
3885
  r.metadata.filePath,
@@ -4085,6 +4086,91 @@ var Indexer = class {
4085
4086
  getLogger() {
4086
4087
  return this.logger;
4087
4088
  }
4089
+ async findSimilar(code, limit, options) {
4090
+ const searchStartTime = performance2.now();
4091
+ const { store, provider, database } = await this.ensureInitialized();
4092
+ if (store.count() === 0) {
4093
+ this.logger.search("debug", "Find similar on empty index");
4094
+ return [];
4095
+ }
4096
+ const maxResults = limit ?? this.config.search.maxResults;
4097
+ const filterByBranch = options?.filterByBranch ?? true;
4098
+ this.logger.search("debug", "Starting find similar", {
4099
+ codeLength: code.length,
4100
+ maxResults,
4101
+ filterByBranch
4102
+ });
4103
+ const embeddingStartTime = performance2.now();
4104
+ const { embedding, tokensUsed } = await provider.embed(code);
4105
+ const embeddingMs = performance2.now() - embeddingStartTime;
4106
+ this.logger.recordEmbeddingApiCall(tokensUsed);
4107
+ const vectorStartTime = performance2.now();
4108
+ const semanticResults = store.search(embedding, maxResults * 2);
4109
+ const vectorMs = performance2.now() - vectorStartTime;
4110
+ let branchChunkIds = null;
4111
+ if (filterByBranch && this.currentBranch !== "default") {
4112
+ branchChunkIds = new Set(database.getBranchChunkIds(this.currentBranch));
4113
+ }
4114
+ const filtered = semanticResults.filter((r) => {
4115
+ if (r.score < this.config.search.minScore) return false;
4116
+ if (branchChunkIds && !branchChunkIds.has(r.id)) return false;
4117
+ if (options?.excludeFile) {
4118
+ if (r.metadata.filePath === options.excludeFile) return false;
4119
+ }
4120
+ if (options?.fileType) {
4121
+ const ext = r.metadata.filePath.split(".").pop()?.toLowerCase();
4122
+ if (ext !== options.fileType.toLowerCase().replace(/^\./, "")) return false;
4123
+ }
4124
+ if (options?.directory) {
4125
+ const normalizedDir = options.directory.replace(/^\/|\/$/g, "");
4126
+ if (!r.metadata.filePath.includes(`/${normalizedDir}/`) && !r.metadata.filePath.includes(`${normalizedDir}/`)) return false;
4127
+ }
4128
+ if (options?.chunkType) {
4129
+ if (r.metadata.chunkType !== options.chunkType) return false;
4130
+ }
4131
+ return true;
4132
+ }).slice(0, maxResults);
4133
+ const totalSearchMs = performance2.now() - searchStartTime;
4134
+ this.logger.recordSearch(totalSearchMs, {
4135
+ embeddingMs,
4136
+ vectorMs,
4137
+ keywordMs: 0,
4138
+ fusionMs: 0
4139
+ });
4140
+ this.logger.search("info", "Find similar complete", {
4141
+ codeLength: code.length,
4142
+ results: filtered.length,
4143
+ totalMs: Math.round(totalSearchMs * 100) / 100,
4144
+ embeddingMs: Math.round(embeddingMs * 100) / 100,
4145
+ vectorMs: Math.round(vectorMs * 100) / 100
4146
+ });
4147
+ return Promise.all(
4148
+ filtered.map(async (r) => {
4149
+ let content = "";
4150
+ if (this.config.search.includeContext) {
4151
+ try {
4152
+ const fileContent = await fsPromises2.readFile(
4153
+ r.metadata.filePath,
4154
+ "utf-8"
4155
+ );
4156
+ const lines = fileContent.split("\n");
4157
+ content = lines.slice(r.metadata.startLine - 1, r.metadata.endLine).join("\n");
4158
+ } catch {
4159
+ content = "[File not accessible]";
4160
+ }
4161
+ }
4162
+ return {
4163
+ filePath: r.metadata.filePath,
4164
+ startLine: r.metadata.startLine,
4165
+ endLine: r.metadata.endLine,
4166
+ content,
4167
+ score: r.score,
4168
+ chunkType: r.metadata.chunkType,
4169
+ name: r.metadata.name
4170
+ };
4171
+ })
4172
+ );
4173
+ }
4088
4174
  };
4089
4175
 
4090
4176
  // node_modules/chokidar/index.js
@@ -6026,7 +6112,7 @@ function getIndexer() {
6026
6112
  return sharedIndexer;
6027
6113
  }
6028
6114
  var codebase_search = tool({
6029
- description: "Search codebase by MEANING, not keywords. Use when you don't know exact function/class names. Returns focused results (5-10 files). For known identifiers like 'validateToken' or 'UserService', use grep instead - it's faster and finds all occurrences. Best for: 'find authentication logic', 'code that handles payments', 'error middleware'.",
6115
+ description: "Search codebase by MEANING, not keywords. Returns full code content. Use when you need to see actual implementation. For just finding WHERE code is (saves ~90% tokens), use codebase_peek instead. For known identifiers like 'validateToken', use grep - it's faster.",
6030
6116
  args: {
6031
6117
  query: z.string().describe("Natural language description of what code you're looking for. Describe behavior, not syntax."),
6032
6118
  limit: z.number().optional().default(10).describe("Maximum number of results to return"),
@@ -6058,6 +6144,38 @@ ${r.content}
6058
6144
  ${formatted.join("\n\n")}`;
6059
6145
  }
6060
6146
  });
6147
+ var codebase_peek = tool({
6148
+ description: "Quick lookup of code locations by meaning. Returns only metadata (file, line, name, type) WITHOUT code content. Use this first to find WHERE code is, then use Read tool to examine specific files. Saves tokens by not returning full code blocks. Best for: discovery, navigation, finding multiple related locations.",
6149
+ args: {
6150
+ query: z.string().describe("Natural language description of what code you're looking for."),
6151
+ limit: z.number().optional().default(10).describe("Maximum number of results to return"),
6152
+ fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
6153
+ directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
6154
+ chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type")
6155
+ },
6156
+ async execute(args) {
6157
+ const indexer = getIndexer();
6158
+ const results = await indexer.search(args.query, args.limit ?? 10, {
6159
+ fileType: args.fileType,
6160
+ directory: args.directory,
6161
+ chunkType: args.chunkType,
6162
+ metadataOnly: true
6163
+ });
6164
+ if (results.length === 0) {
6165
+ return "No matching code found. Try a different query or run index_codebase first.";
6166
+ }
6167
+ const formatted = results.map((r, idx) => {
6168
+ const location = `${r.filePath}:${r.startLine}-${r.endLine}`;
6169
+ const name = r.name ? `"${r.name}"` : "(anonymous)";
6170
+ return `[${idx + 1}] ${r.chunkType} ${name} at ${location} (score: ${r.score.toFixed(2)})`;
6171
+ });
6172
+ return `Found ${results.length} locations for "${args.query}":
6173
+
6174
+ ${formatted.join("\n")}
6175
+
6176
+ Use Read tool to examine specific files.`;
6177
+ }
6178
+ });
6061
6179
  var index_codebase = tool({
6062
6180
  description: "Index the codebase for semantic search. Creates vector embeddings of code chunks. Incremental - only re-indexes changed files (~50ms when nothing changed). Run before first codebase_search.",
6063
6181
  args: {
@@ -6169,6 +6287,39 @@ var index_logs = tool({
6169
6287
  }).join("\n");
6170
6288
  }
6171
6289
  });
6290
+ var find_similar = tool({
6291
+ description: "Find code similar to a given snippet. Use for duplicate detection, pattern discovery, or refactoring prep. Paste code and find semantically similar implementations elsewhere in the codebase.",
6292
+ args: {
6293
+ code: z.string().describe("The code snippet to find similar code for"),
6294
+ limit: z.number().optional().default(10).describe("Maximum number of results to return"),
6295
+ fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
6296
+ directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
6297
+ chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type"),
6298
+ excludeFile: z.string().optional().describe("Exclude results from this file path (useful when searching for duplicates of code from a specific file)")
6299
+ },
6300
+ async execute(args) {
6301
+ const indexer = getIndexer();
6302
+ const results = await indexer.findSimilar(args.code, args.limit ?? 10, {
6303
+ fileType: args.fileType,
6304
+ directory: args.directory,
6305
+ chunkType: args.chunkType,
6306
+ excludeFile: args.excludeFile
6307
+ });
6308
+ if (results.length === 0) {
6309
+ return "No similar code found. Try a different snippet or run index_codebase first.";
6310
+ }
6311
+ const formatted = results.map((r, idx) => {
6312
+ const header = r.name ? `[${idx + 1}] ${r.chunkType} "${r.name}" in ${r.filePath}:${r.startLine}-${r.endLine}` : `[${idx + 1}] ${r.chunkType} in ${r.filePath}:${r.startLine}-${r.endLine}`;
6313
+ return `${header} (similarity: ${(r.score * 100).toFixed(1)}%)
6314
+ \`\`\`
6315
+ ${r.content}
6316
+ \`\`\``;
6317
+ });
6318
+ return `Found ${results.length} similar code blocks:
6319
+
6320
+ ${formatted.join("\n\n")}`;
6321
+ }
6322
+ });
6172
6323
  function formatIndexStats(stats, verbose = false) {
6173
6324
  const lines = [];
6174
6325
  if (stats.indexedChunks === 0 && stats.removedChunks === 0) {
@@ -6351,11 +6502,13 @@ var plugin = async ({ directory }) => {
6351
6502
  return {
6352
6503
  tool: {
6353
6504
  codebase_search,
6505
+ codebase_peek,
6354
6506
  index_codebase,
6355
6507
  index_status,
6356
6508
  index_health_check,
6357
6509
  index_metrics,
6358
- index_logs
6510
+ index_logs,
6511
+ find_similar
6359
6512
  },
6360
6513
  async config(cfg) {
6361
6514
  cfg.command = cfg.command ?? {};