opencode-codebase-index 0.3.2 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -696,7 +696,8 @@ function getDefaultIndexingConfig() {
696
696
  retryDelayMs: 1e3,
697
697
  autoGc: true,
698
698
  gcIntervalDays: 7,
699
- gcOrphanThreshold: 100
699
+ gcOrphanThreshold: 100,
700
+ requireProjectMarker: true
700
701
  };
701
702
  }
702
703
  function getDefaultSearchConfig() {
@@ -751,7 +752,8 @@ function parseConfig(raw) {
751
752
  retryDelayMs: typeof rawIndexing.retryDelayMs === "number" ? rawIndexing.retryDelayMs : defaultIndexing.retryDelayMs,
752
753
  autoGc: typeof rawIndexing.autoGc === "boolean" ? rawIndexing.autoGc : defaultIndexing.autoGc,
753
754
  gcIntervalDays: typeof rawIndexing.gcIntervalDays === "number" ? Math.max(1, rawIndexing.gcIntervalDays) : defaultIndexing.gcIntervalDays,
754
- gcOrphanThreshold: typeof rawIndexing.gcOrphanThreshold === "number" ? Math.max(0, rawIndexing.gcOrphanThreshold) : defaultIndexing.gcOrphanThreshold
755
+ gcOrphanThreshold: typeof rawIndexing.gcOrphanThreshold === "number" ? Math.max(0, rawIndexing.gcOrphanThreshold) : defaultIndexing.gcOrphanThreshold,
756
+ requireProjectMarker: typeof rawIndexing.requireProjectMarker === "boolean" ? rawIndexing.requireProjectMarker : defaultIndexing.requireProjectMarker
755
757
  };
756
758
  const rawSearch = input.search && typeof input.search === "object" ? input.search : {};
757
759
  const search = {
@@ -2181,6 +2183,30 @@ var OllamaEmbeddingProvider = class {
2181
2183
  var import_ignore = __toESM(require_ignore(), 1);
2182
2184
  import { existsSync as existsSync2, readFileSync as readFileSync2, promises as fsPromises } from "fs";
2183
2185
  import * as path2 from "path";
2186
+ var PROJECT_MARKERS = [
2187
+ ".git",
2188
+ "package.json",
2189
+ "Cargo.toml",
2190
+ "go.mod",
2191
+ "pyproject.toml",
2192
+ "setup.py",
2193
+ "requirements.txt",
2194
+ "Gemfile",
2195
+ "composer.json",
2196
+ "pom.xml",
2197
+ "build.gradle",
2198
+ "CMakeLists.txt",
2199
+ "Makefile",
2200
+ ".opencode"
2201
+ ];
2202
+ function hasProjectMarker(projectRoot) {
2203
+ for (const marker of PROJECT_MARKERS) {
2204
+ if (existsSync2(path2.join(projectRoot, marker))) {
2205
+ return true;
2206
+ }
2207
+ }
2208
+ return false;
2209
+ }
2184
2210
  function createIgnoreFilter(projectRoot) {
2185
2211
  const ig = (0, import_ignore.default)();
2186
2212
  const defaultIgnores = [
@@ -3873,12 +3899,13 @@ var Indexer = class {
3873
3899
  keywordMs: Math.round(keywordMs * 100) / 100,
3874
3900
  fusionMs: Math.round(fusionMs * 100) / 100
3875
3901
  });
3902
+ const metadataOnly = options?.metadataOnly ?? false;
3876
3903
  return Promise.all(
3877
3904
  filtered.map(async (r) => {
3878
3905
  let content = "";
3879
3906
  let contextStartLine = r.metadata.startLine;
3880
3907
  let contextEndLine = r.metadata.endLine;
3881
- if (this.config.search.includeContext) {
3908
+ if (!metadataOnly && this.config.search.includeContext) {
3882
3909
  try {
3883
3910
  const fileContent = await fsPromises2.readFile(
3884
3911
  r.metadata.filePath,
@@ -4085,6 +4112,91 @@ var Indexer = class {
4085
4112
  getLogger() {
4086
4113
  return this.logger;
4087
4114
  }
4115
+ async findSimilar(code, limit, options) {
4116
+ const searchStartTime = performance2.now();
4117
+ const { store, provider, database } = await this.ensureInitialized();
4118
+ if (store.count() === 0) {
4119
+ this.logger.search("debug", "Find similar on empty index");
4120
+ return [];
4121
+ }
4122
+ const maxResults = limit ?? this.config.search.maxResults;
4123
+ const filterByBranch = options?.filterByBranch ?? true;
4124
+ this.logger.search("debug", "Starting find similar", {
4125
+ codeLength: code.length,
4126
+ maxResults,
4127
+ filterByBranch
4128
+ });
4129
+ const embeddingStartTime = performance2.now();
4130
+ const { embedding, tokensUsed } = await provider.embed(code);
4131
+ const embeddingMs = performance2.now() - embeddingStartTime;
4132
+ this.logger.recordEmbeddingApiCall(tokensUsed);
4133
+ const vectorStartTime = performance2.now();
4134
+ const semanticResults = store.search(embedding, maxResults * 2);
4135
+ const vectorMs = performance2.now() - vectorStartTime;
4136
+ let branchChunkIds = null;
4137
+ if (filterByBranch && this.currentBranch !== "default") {
4138
+ branchChunkIds = new Set(database.getBranchChunkIds(this.currentBranch));
4139
+ }
4140
+ const filtered = semanticResults.filter((r) => {
4141
+ if (r.score < this.config.search.minScore) return false;
4142
+ if (branchChunkIds && !branchChunkIds.has(r.id)) return false;
4143
+ if (options?.excludeFile) {
4144
+ if (r.metadata.filePath === options.excludeFile) return false;
4145
+ }
4146
+ if (options?.fileType) {
4147
+ const ext = r.metadata.filePath.split(".").pop()?.toLowerCase();
4148
+ if (ext !== options.fileType.toLowerCase().replace(/^\./, "")) return false;
4149
+ }
4150
+ if (options?.directory) {
4151
+ const normalizedDir = options.directory.replace(/^\/|\/$/g, "");
4152
+ if (!r.metadata.filePath.includes(`/${normalizedDir}/`) && !r.metadata.filePath.includes(`${normalizedDir}/`)) return false;
4153
+ }
4154
+ if (options?.chunkType) {
4155
+ if (r.metadata.chunkType !== options.chunkType) return false;
4156
+ }
4157
+ return true;
4158
+ }).slice(0, maxResults);
4159
+ const totalSearchMs = performance2.now() - searchStartTime;
4160
+ this.logger.recordSearch(totalSearchMs, {
4161
+ embeddingMs,
4162
+ vectorMs,
4163
+ keywordMs: 0,
4164
+ fusionMs: 0
4165
+ });
4166
+ this.logger.search("info", "Find similar complete", {
4167
+ codeLength: code.length,
4168
+ results: filtered.length,
4169
+ totalMs: Math.round(totalSearchMs * 100) / 100,
4170
+ embeddingMs: Math.round(embeddingMs * 100) / 100,
4171
+ vectorMs: Math.round(vectorMs * 100) / 100
4172
+ });
4173
+ return Promise.all(
4174
+ filtered.map(async (r) => {
4175
+ let content = "";
4176
+ if (this.config.search.includeContext) {
4177
+ try {
4178
+ const fileContent = await fsPromises2.readFile(
4179
+ r.metadata.filePath,
4180
+ "utf-8"
4181
+ );
4182
+ const lines = fileContent.split("\n");
4183
+ content = lines.slice(r.metadata.startLine - 1, r.metadata.endLine).join("\n");
4184
+ } catch {
4185
+ content = "[File not accessible]";
4186
+ }
4187
+ }
4188
+ return {
4189
+ filePath: r.metadata.filePath,
4190
+ startLine: r.metadata.startLine,
4191
+ endLine: r.metadata.endLine,
4192
+ content,
4193
+ score: r.score,
4194
+ chunkType: r.metadata.chunkType,
4195
+ name: r.metadata.name
4196
+ };
4197
+ })
4198
+ );
4199
+ }
4088
4200
  };
4089
4201
 
4090
4202
  // node_modules/chokidar/index.js
@@ -6015,6 +6127,13 @@ function createWatcherWithIndexer(indexer, projectRoot, config) {
6015
6127
  // src/tools/index.ts
6016
6128
  import { tool } from "@opencode-ai/plugin";
6017
6129
  var z = tool.schema;
6130
+ var MAX_CONTENT_LINES = 30;
6131
+ function truncateContent(content) {
6132
+ const lines = content.split("\n");
6133
+ if (lines.length <= MAX_CONTENT_LINES) return content;
6134
+ return lines.slice(0, MAX_CONTENT_LINES).join("\n") + `
6135
+ // ... (${lines.length - MAX_CONTENT_LINES} more lines)`;
6136
+ }
6018
6137
  var sharedIndexer = null;
6019
6138
  function initializeTools(projectRoot, config) {
6020
6139
  sharedIndexer = new Indexer(projectRoot, config);
@@ -6026,10 +6145,10 @@ function getIndexer() {
6026
6145
  return sharedIndexer;
6027
6146
  }
6028
6147
  var codebase_search = tool({
6029
- description: "Search codebase by MEANING, not keywords. Use when you don't know exact function/class names. Returns focused results (5-10 files). For known identifiers like 'validateToken' or 'UserService', use grep instead - it's faster and finds all occurrences. Best for: 'find authentication logic', 'code that handles payments', 'error middleware'.",
6148
+ description: "Search codebase by MEANING, not keywords. Returns full code content. Use when you need to see actual implementation. For just finding WHERE code is (saves ~90% tokens), use codebase_peek instead. For known identifiers like 'validateToken', use grep - it's faster.",
6030
6149
  args: {
6031
6150
  query: z.string().describe("Natural language description of what code you're looking for. Describe behavior, not syntax."),
6032
- limit: z.number().optional().default(10).describe("Maximum number of results to return"),
6151
+ limit: z.number().optional().default(5).describe("Maximum number of results to return"),
6033
6152
  fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
6034
6153
  directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
6035
6154
  chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type"),
@@ -6037,7 +6156,7 @@ var codebase_search = tool({
6037
6156
  },
6038
6157
  async execute(args) {
6039
6158
  const indexer = getIndexer();
6040
- const results = await indexer.search(args.query, args.limit ?? 10, {
6159
+ const results = await indexer.search(args.query, args.limit ?? 5, {
6041
6160
  fileType: args.fileType,
6042
6161
  directory: args.directory,
6043
6162
  chunkType: args.chunkType,
@@ -6050,7 +6169,7 @@ var codebase_search = tool({
6050
6169
  const header = r.name ? `[${idx + 1}] ${r.chunkType} "${r.name}" in ${r.filePath}:${r.startLine}-${r.endLine}` : `[${idx + 1}] ${r.chunkType} in ${r.filePath}:${r.startLine}-${r.endLine}`;
6051
6170
  return `${header} (score: ${r.score.toFixed(2)})
6052
6171
  \`\`\`
6053
- ${r.content}
6172
+ ${truncateContent(r.content)}
6054
6173
  \`\`\``;
6055
6174
  });
6056
6175
  return `Found ${results.length} results for "${args.query}":
@@ -6058,6 +6177,38 @@ ${r.content}
6058
6177
  ${formatted.join("\n\n")}`;
6059
6178
  }
6060
6179
  });
6180
+ var codebase_peek = tool({
6181
+ description: "Quick lookup of code locations by meaning. Returns only metadata (file, line, name, type) WITHOUT code content. Use this first to find WHERE code is, then use Read tool to examine specific files. Saves tokens by not returning full code blocks. Best for: discovery, navigation, finding multiple related locations.",
6182
+ args: {
6183
+ query: z.string().describe("Natural language description of what code you're looking for."),
6184
+ limit: z.number().optional().default(10).describe("Maximum number of results to return"),
6185
+ fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
6186
+ directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
6187
+ chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type")
6188
+ },
6189
+ async execute(args) {
6190
+ const indexer = getIndexer();
6191
+ const results = await indexer.search(args.query, args.limit ?? 10, {
6192
+ fileType: args.fileType,
6193
+ directory: args.directory,
6194
+ chunkType: args.chunkType,
6195
+ metadataOnly: true
6196
+ });
6197
+ if (results.length === 0) {
6198
+ return "No matching code found. Try a different query or run index_codebase first.";
6199
+ }
6200
+ const formatted = results.map((r, idx) => {
6201
+ const location = `${r.filePath}:${r.startLine}-${r.endLine}`;
6202
+ const name = r.name ? `"${r.name}"` : "(anonymous)";
6203
+ return `[${idx + 1}] ${r.chunkType} ${name} at ${location} (score: ${r.score.toFixed(2)})`;
6204
+ });
6205
+ return `Found ${results.length} locations for "${args.query}":
6206
+
6207
+ ${formatted.join("\n")}
6208
+
6209
+ Use Read tool to examine specific files.`;
6210
+ }
6211
+ });
6061
6212
  var index_codebase = tool({
6062
6213
  description: "Index the codebase for semantic search. Creates vector embeddings of code chunks. Incremental - only re-indexes changed files (~50ms when nothing changed). Run before first codebase_search.",
6063
6214
  args: {
@@ -6169,6 +6320,39 @@ var index_logs = tool({
6169
6320
  }).join("\n");
6170
6321
  }
6171
6322
  });
6323
+ var find_similar = tool({
6324
+ description: "Find code similar to a given snippet. Use for duplicate detection, pattern discovery, or refactoring prep. Paste code and find semantically similar implementations elsewhere in the codebase.",
6325
+ args: {
6326
+ code: z.string().describe("The code snippet to find similar code for"),
6327
+ limit: z.number().optional().default(10).describe("Maximum number of results to return"),
6328
+ fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
6329
+ directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
6330
+ chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type"),
6331
+ excludeFile: z.string().optional().describe("Exclude results from this file path (useful when searching for duplicates of code from a specific file)")
6332
+ },
6333
+ async execute(args) {
6334
+ const indexer = getIndexer();
6335
+ const results = await indexer.findSimilar(args.code, args.limit ?? 10, {
6336
+ fileType: args.fileType,
6337
+ directory: args.directory,
6338
+ chunkType: args.chunkType,
6339
+ excludeFile: args.excludeFile
6340
+ });
6341
+ if (results.length === 0) {
6342
+ return "No similar code found. Try a different snippet or run index_codebase first.";
6343
+ }
6344
+ const formatted = results.map((r, idx) => {
6345
+ const header = r.name ? `[${idx + 1}] ${r.chunkType} "${r.name}" in ${r.filePath}:${r.startLine}-${r.endLine}` : `[${idx + 1}] ${r.chunkType} in ${r.filePath}:${r.startLine}-${r.endLine}`;
6346
+ return `${header} (similarity: ${(r.score * 100).toFixed(1)}%)
6347
+ \`\`\`
6348
+ ${truncateContent(r.content)}
6349
+ \`\`\``;
6350
+ });
6351
+ return `Found ${results.length} similar code blocks:
6352
+
6353
+ ${formatted.join("\n\n")}`;
6354
+ }
6355
+ });
6172
6356
  function formatIndexStats(stats, verbose = false) {
6173
6357
  const lines = [];
6174
6358
  if (stats.indexedChunks === 0 && stats.removedChunks === 0) {
@@ -6338,24 +6522,32 @@ var plugin = async ({ directory }) => {
6338
6522
  const config = parseConfig(rawConfig);
6339
6523
  initializeTools(projectRoot, config);
6340
6524
  const indexer = new Indexer(projectRoot, config);
6341
- if (config.indexing.autoIndex) {
6525
+ const isValidProject = !config.indexing.requireProjectMarker || hasProjectMarker(projectRoot);
6526
+ if (!isValidProject) {
6527
+ console.warn(
6528
+ `[codebase-index] Skipping file watching and auto-indexing: no project marker found in "${projectRoot}". Set "indexing.requireProjectMarker": false in config to override.`
6529
+ );
6530
+ }
6531
+ if (config.indexing.autoIndex && isValidProject) {
6342
6532
  indexer.initialize().then(() => {
6343
6533
  indexer.index().catch(() => {
6344
6534
  });
6345
6535
  }).catch(() => {
6346
6536
  });
6347
6537
  }
6348
- if (config.indexing.watchFiles) {
6538
+ if (config.indexing.watchFiles && isValidProject) {
6349
6539
  createWatcherWithIndexer(indexer, projectRoot, config);
6350
6540
  }
6351
6541
  return {
6352
6542
  tool: {
6353
6543
  codebase_search,
6544
+ codebase_peek,
6354
6545
  index_codebase,
6355
6546
  index_status,
6356
6547
  index_health_check,
6357
6548
  index_metrics,
6358
- index_logs
6549
+ index_logs,
6550
+ find_similar
6359
6551
  },
6360
6552
  async config(cfg) {
6361
6553
  cfg.command = cfg.command ?? {};