opencode-codebase-index 0.3.2 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -64,11 +64,12 @@ src/api/checkout.ts:89 (Route handler for /pay)
64
64
  |----------|------|-----|
65
65
  | Don't know the function name | `codebase_search` | Semantic search finds by meaning |
66
66
  | Exploring unfamiliar codebase | `codebase_search` | Discovers related code across files |
67
+ | Just need to find locations | `codebase_peek` | Returns metadata only, saves ~90% tokens |
67
68
  | Know exact identifier | `grep` | Faster, finds all occurrences |
68
69
  | Need ALL matches | `grep` | Semantic returns top N only |
69
70
  | Mixed discovery + precision | `/find` (hybrid) | Best of both worlds |
70
71
 
71
- **Rule of thumb**: Semantic search for discovery → grep for precision.
72
+ **Rule of thumb**: `codebase_peek` to find locations`Read` to examine → `grep` for precision.
72
73
 
73
74
  ## 📊 Token Usage
74
75
 
@@ -187,6 +188,18 @@ The plugin exposes these tools to the OpenCode agent:
187
188
  | "code that calculates shipping costs" | "shipping" |
188
189
  | "where user permissions are checked" | "permissions" |
189
190
 
191
+ ### `codebase_peek`
192
+ **Token-efficient discovery.** Returns only metadata (file, line, name, type) without code content.
193
+ - **Use for**: Finding WHERE code is before deciding what to read. Saves ~90% tokens vs `codebase_search`.
194
+ - **Example output**:
195
+ ```
196
+ [1] function "validatePayment" at src/billing.ts:45-67 (score: 0.92)
197
+ [2] class "PaymentProcessor" at src/processor.ts:12-89 (score: 0.87)
198
+
199
+ Use Read tool to examine specific files.
200
+ ```
201
+ - **Workflow**: `codebase_peek` → find locations → `Read` specific files
202
+
190
203
  ### `index_codebase`
191
204
  Manually trigger indexing.
192
205
  - **Use for**: Forcing a re-index or checking stats.
@@ -233,7 +246,8 @@ Zero-config by default (uses `auto` mode). Customize in `.opencode/codebase-inde
233
246
  "semanticOnly": false,
234
247
  "autoGc": true,
235
248
  "gcIntervalDays": 7,
236
- "gcOrphanThreshold": 100
249
+ "gcOrphanThreshold": 100,
250
+ "requireProjectMarker": true
237
251
  },
238
252
  "search": {
239
253
  "maxResults": 20,
@@ -266,6 +280,7 @@ Zero-config by default (uses `auto` mode). Customize in `.opencode/codebase-inde
266
280
  | `autoGc` | `true` | Automatically run garbage collection to remove orphaned embeddings/chunks |
267
281
  | `gcIntervalDays` | `7` | Run GC on initialization if last GC was more than N days ago |
268
282
  | `gcOrphanThreshold` | `100` | Run GC after indexing if orphan count exceeds this threshold |
283
+ | `requireProjectMarker` | `true` | Require a project marker (`.git`, `package.json`, etc.) to enable file watching and auto-indexing. Prevents accidentally indexing large directories like home. Set to `false` to index any directory. |
269
284
  | **search** | | |
270
285
  | `maxResults` | `20` | Maximum results to return |
271
286
  | `minScore` | `0.1` | Minimum similarity score (0-1). Lower = more results |
package/dist/index.cjs CHANGED
@@ -701,7 +701,8 @@ function getDefaultIndexingConfig() {
701
701
  retryDelayMs: 1e3,
702
702
  autoGc: true,
703
703
  gcIntervalDays: 7,
704
- gcOrphanThreshold: 100
704
+ gcOrphanThreshold: 100,
705
+ requireProjectMarker: true
705
706
  };
706
707
  }
707
708
  function getDefaultSearchConfig() {
@@ -756,7 +757,8 @@ function parseConfig(raw) {
756
757
  retryDelayMs: typeof rawIndexing.retryDelayMs === "number" ? rawIndexing.retryDelayMs : defaultIndexing.retryDelayMs,
757
758
  autoGc: typeof rawIndexing.autoGc === "boolean" ? rawIndexing.autoGc : defaultIndexing.autoGc,
758
759
  gcIntervalDays: typeof rawIndexing.gcIntervalDays === "number" ? Math.max(1, rawIndexing.gcIntervalDays) : defaultIndexing.gcIntervalDays,
759
- gcOrphanThreshold: typeof rawIndexing.gcOrphanThreshold === "number" ? Math.max(0, rawIndexing.gcOrphanThreshold) : defaultIndexing.gcOrphanThreshold
760
+ gcOrphanThreshold: typeof rawIndexing.gcOrphanThreshold === "number" ? Math.max(0, rawIndexing.gcOrphanThreshold) : defaultIndexing.gcOrphanThreshold,
761
+ requireProjectMarker: typeof rawIndexing.requireProjectMarker === "boolean" ? rawIndexing.requireProjectMarker : defaultIndexing.requireProjectMarker
760
762
  };
761
763
  const rawSearch = input.search && typeof input.search === "object" ? input.search : {};
762
764
  const search = {
@@ -2186,6 +2188,30 @@ var OllamaEmbeddingProvider = class {
2186
2188
  var import_ignore = __toESM(require_ignore(), 1);
2187
2189
  var import_fs2 = require("fs");
2188
2190
  var path2 = __toESM(require("path"), 1);
2191
+ var PROJECT_MARKERS = [
2192
+ ".git",
2193
+ "package.json",
2194
+ "Cargo.toml",
2195
+ "go.mod",
2196
+ "pyproject.toml",
2197
+ "setup.py",
2198
+ "requirements.txt",
2199
+ "Gemfile",
2200
+ "composer.json",
2201
+ "pom.xml",
2202
+ "build.gradle",
2203
+ "CMakeLists.txt",
2204
+ "Makefile",
2205
+ ".opencode"
2206
+ ];
2207
+ function hasProjectMarker(projectRoot) {
2208
+ for (const marker of PROJECT_MARKERS) {
2209
+ if ((0, import_fs2.existsSync)(path2.join(projectRoot, marker))) {
2210
+ return true;
2211
+ }
2212
+ }
2213
+ return false;
2214
+ }
2189
2215
  function createIgnoreFilter(projectRoot) {
2190
2216
  const ig = (0, import_ignore.default)();
2191
2217
  const defaultIgnores = [
@@ -3879,12 +3905,13 @@ var Indexer = class {
3879
3905
  keywordMs: Math.round(keywordMs * 100) / 100,
3880
3906
  fusionMs: Math.round(fusionMs * 100) / 100
3881
3907
  });
3908
+ const metadataOnly = options?.metadataOnly ?? false;
3882
3909
  return Promise.all(
3883
3910
  filtered.map(async (r) => {
3884
3911
  let content = "";
3885
3912
  let contextStartLine = r.metadata.startLine;
3886
3913
  let contextEndLine = r.metadata.endLine;
3887
- if (this.config.search.includeContext) {
3914
+ if (!metadataOnly && this.config.search.includeContext) {
3888
3915
  try {
3889
3916
  const fileContent = await import_fs4.promises.readFile(
3890
3917
  r.metadata.filePath,
@@ -4091,6 +4118,91 @@ var Indexer = class {
4091
4118
  getLogger() {
4092
4119
  return this.logger;
4093
4120
  }
4121
+ async findSimilar(code, limit, options) {
4122
+ const searchStartTime = import_perf_hooks.performance.now();
4123
+ const { store, provider, database } = await this.ensureInitialized();
4124
+ if (store.count() === 0) {
4125
+ this.logger.search("debug", "Find similar on empty index");
4126
+ return [];
4127
+ }
4128
+ const maxResults = limit ?? this.config.search.maxResults;
4129
+ const filterByBranch = options?.filterByBranch ?? true;
4130
+ this.logger.search("debug", "Starting find similar", {
4131
+ codeLength: code.length,
4132
+ maxResults,
4133
+ filterByBranch
4134
+ });
4135
+ const embeddingStartTime = import_perf_hooks.performance.now();
4136
+ const { embedding, tokensUsed } = await provider.embed(code);
4137
+ const embeddingMs = import_perf_hooks.performance.now() - embeddingStartTime;
4138
+ this.logger.recordEmbeddingApiCall(tokensUsed);
4139
+ const vectorStartTime = import_perf_hooks.performance.now();
4140
+ const semanticResults = store.search(embedding, maxResults * 2);
4141
+ const vectorMs = import_perf_hooks.performance.now() - vectorStartTime;
4142
+ let branchChunkIds = null;
4143
+ if (filterByBranch && this.currentBranch !== "default") {
4144
+ branchChunkIds = new Set(database.getBranchChunkIds(this.currentBranch));
4145
+ }
4146
+ const filtered = semanticResults.filter((r) => {
4147
+ if (r.score < this.config.search.minScore) return false;
4148
+ if (branchChunkIds && !branchChunkIds.has(r.id)) return false;
4149
+ if (options?.excludeFile) {
4150
+ if (r.metadata.filePath === options.excludeFile) return false;
4151
+ }
4152
+ if (options?.fileType) {
4153
+ const ext = r.metadata.filePath.split(".").pop()?.toLowerCase();
4154
+ if (ext !== options.fileType.toLowerCase().replace(/^\./, "")) return false;
4155
+ }
4156
+ if (options?.directory) {
4157
+ const normalizedDir = options.directory.replace(/^\/|\/$/g, "");
4158
+ if (!r.metadata.filePath.includes(`/${normalizedDir}/`) && !r.metadata.filePath.includes(`${normalizedDir}/`)) return false;
4159
+ }
4160
+ if (options?.chunkType) {
4161
+ if (r.metadata.chunkType !== options.chunkType) return false;
4162
+ }
4163
+ return true;
4164
+ }).slice(0, maxResults);
4165
+ const totalSearchMs = import_perf_hooks.performance.now() - searchStartTime;
4166
+ this.logger.recordSearch(totalSearchMs, {
4167
+ embeddingMs,
4168
+ vectorMs,
4169
+ keywordMs: 0,
4170
+ fusionMs: 0
4171
+ });
4172
+ this.logger.search("info", "Find similar complete", {
4173
+ codeLength: code.length,
4174
+ results: filtered.length,
4175
+ totalMs: Math.round(totalSearchMs * 100) / 100,
4176
+ embeddingMs: Math.round(embeddingMs * 100) / 100,
4177
+ vectorMs: Math.round(vectorMs * 100) / 100
4178
+ });
4179
+ return Promise.all(
4180
+ filtered.map(async (r) => {
4181
+ let content = "";
4182
+ if (this.config.search.includeContext) {
4183
+ try {
4184
+ const fileContent = await import_fs4.promises.readFile(
4185
+ r.metadata.filePath,
4186
+ "utf-8"
4187
+ );
4188
+ const lines = fileContent.split("\n");
4189
+ content = lines.slice(r.metadata.startLine - 1, r.metadata.endLine).join("\n");
4190
+ } catch {
4191
+ content = "[File not accessible]";
4192
+ }
4193
+ }
4194
+ return {
4195
+ filePath: r.metadata.filePath,
4196
+ startLine: r.metadata.startLine,
4197
+ endLine: r.metadata.endLine,
4198
+ content,
4199
+ score: r.score,
4200
+ chunkType: r.metadata.chunkType,
4201
+ name: r.metadata.name
4202
+ };
4203
+ })
4204
+ );
4205
+ }
4094
4206
  };
4095
4207
 
4096
4208
  // node_modules/chokidar/index.js
@@ -6021,6 +6133,13 @@ function createWatcherWithIndexer(indexer, projectRoot, config) {
6021
6133
  // src/tools/index.ts
6022
6134
  var import_plugin = require("@opencode-ai/plugin");
6023
6135
  var z = import_plugin.tool.schema;
6136
+ var MAX_CONTENT_LINES = 30;
6137
+ function truncateContent(content) {
6138
+ const lines = content.split("\n");
6139
+ if (lines.length <= MAX_CONTENT_LINES) return content;
6140
+ return lines.slice(0, MAX_CONTENT_LINES).join("\n") + `
6141
+ // ... (${lines.length - MAX_CONTENT_LINES} more lines)`;
6142
+ }
6024
6143
  var sharedIndexer = null;
6025
6144
  function initializeTools(projectRoot, config) {
6026
6145
  sharedIndexer = new Indexer(projectRoot, config);
@@ -6032,10 +6151,10 @@ function getIndexer() {
6032
6151
  return sharedIndexer;
6033
6152
  }
6034
6153
  var codebase_search = (0, import_plugin.tool)({
6035
- description: "Search codebase by MEANING, not keywords. Use when you don't know exact function/class names. Returns focused results (5-10 files). For known identifiers like 'validateToken' or 'UserService', use grep instead - it's faster and finds all occurrences. Best for: 'find authentication logic', 'code that handles payments', 'error middleware'.",
6154
+ description: "Search codebase by MEANING, not keywords. Returns full code content. Use when you need to see actual implementation. For just finding WHERE code is (saves ~90% tokens), use codebase_peek instead. For known identifiers like 'validateToken', use grep - it's faster.",
6036
6155
  args: {
6037
6156
  query: z.string().describe("Natural language description of what code you're looking for. Describe behavior, not syntax."),
6038
- limit: z.number().optional().default(10).describe("Maximum number of results to return"),
6157
+ limit: z.number().optional().default(5).describe("Maximum number of results to return"),
6039
6158
  fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
6040
6159
  directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
6041
6160
  chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type"),
@@ -6043,7 +6162,7 @@ var codebase_search = (0, import_plugin.tool)({
6043
6162
  },
6044
6163
  async execute(args) {
6045
6164
  const indexer = getIndexer();
6046
- const results = await indexer.search(args.query, args.limit ?? 10, {
6165
+ const results = await indexer.search(args.query, args.limit ?? 5, {
6047
6166
  fileType: args.fileType,
6048
6167
  directory: args.directory,
6049
6168
  chunkType: args.chunkType,
@@ -6056,7 +6175,7 @@ var codebase_search = (0, import_plugin.tool)({
6056
6175
  const header = r.name ? `[${idx + 1}] ${r.chunkType} "${r.name}" in ${r.filePath}:${r.startLine}-${r.endLine}` : `[${idx + 1}] ${r.chunkType} in ${r.filePath}:${r.startLine}-${r.endLine}`;
6057
6176
  return `${header} (score: ${r.score.toFixed(2)})
6058
6177
  \`\`\`
6059
- ${r.content}
6178
+ ${truncateContent(r.content)}
6060
6179
  \`\`\``;
6061
6180
  });
6062
6181
  return `Found ${results.length} results for "${args.query}":
@@ -6064,6 +6183,38 @@ ${r.content}
6064
6183
  ${formatted.join("\n\n")}`;
6065
6184
  }
6066
6185
  });
6186
+ var codebase_peek = (0, import_plugin.tool)({
6187
+ description: "Quick lookup of code locations by meaning. Returns only metadata (file, line, name, type) WITHOUT code content. Use this first to find WHERE code is, then use Read tool to examine specific files. Saves tokens by not returning full code blocks. Best for: discovery, navigation, finding multiple related locations.",
6188
+ args: {
6189
+ query: z.string().describe("Natural language description of what code you're looking for."),
6190
+ limit: z.number().optional().default(10).describe("Maximum number of results to return"),
6191
+ fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
6192
+ directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
6193
+ chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type")
6194
+ },
6195
+ async execute(args) {
6196
+ const indexer = getIndexer();
6197
+ const results = await indexer.search(args.query, args.limit ?? 10, {
6198
+ fileType: args.fileType,
6199
+ directory: args.directory,
6200
+ chunkType: args.chunkType,
6201
+ metadataOnly: true
6202
+ });
6203
+ if (results.length === 0) {
6204
+ return "No matching code found. Try a different query or run index_codebase first.";
6205
+ }
6206
+ const formatted = results.map((r, idx) => {
6207
+ const location = `${r.filePath}:${r.startLine}-${r.endLine}`;
6208
+ const name = r.name ? `"${r.name}"` : "(anonymous)";
6209
+ return `[${idx + 1}] ${r.chunkType} ${name} at ${location} (score: ${r.score.toFixed(2)})`;
6210
+ });
6211
+ return `Found ${results.length} locations for "${args.query}":
6212
+
6213
+ ${formatted.join("\n")}
6214
+
6215
+ Use Read tool to examine specific files.`;
6216
+ }
6217
+ });
6067
6218
  var index_codebase = (0, import_plugin.tool)({
6068
6219
  description: "Index the codebase for semantic search. Creates vector embeddings of code chunks. Incremental - only re-indexes changed files (~50ms when nothing changed). Run before first codebase_search.",
6069
6220
  args: {
@@ -6175,6 +6326,39 @@ var index_logs = (0, import_plugin.tool)({
6175
6326
  }).join("\n");
6176
6327
  }
6177
6328
  });
6329
+ var find_similar = (0, import_plugin.tool)({
6330
+ description: "Find code similar to a given snippet. Use for duplicate detection, pattern discovery, or refactoring prep. Paste code and find semantically similar implementations elsewhere in the codebase.",
6331
+ args: {
6332
+ code: z.string().describe("The code snippet to find similar code for"),
6333
+ limit: z.number().optional().default(10).describe("Maximum number of results to return"),
6334
+ fileType: z.string().optional().describe("Filter by file extension (e.g., 'ts', 'py', 'rs')"),
6335
+ directory: z.string().optional().describe("Filter by directory path (e.g., 'src/utils', 'lib')"),
6336
+ chunkType: z.enum(["function", "class", "method", "interface", "type", "enum", "struct", "impl", "trait", "module", "other"]).optional().describe("Filter by code chunk type"),
6337
+ excludeFile: z.string().optional().describe("Exclude results from this file path (useful when searching for duplicates of code from a specific file)")
6338
+ },
6339
+ async execute(args) {
6340
+ const indexer = getIndexer();
6341
+ const results = await indexer.findSimilar(args.code, args.limit ?? 10, {
6342
+ fileType: args.fileType,
6343
+ directory: args.directory,
6344
+ chunkType: args.chunkType,
6345
+ excludeFile: args.excludeFile
6346
+ });
6347
+ if (results.length === 0) {
6348
+ return "No similar code found. Try a different snippet or run index_codebase first.";
6349
+ }
6350
+ const formatted = results.map((r, idx) => {
6351
+ const header = r.name ? `[${idx + 1}] ${r.chunkType} "${r.name}" in ${r.filePath}:${r.startLine}-${r.endLine}` : `[${idx + 1}] ${r.chunkType} in ${r.filePath}:${r.startLine}-${r.endLine}`;
6352
+ return `${header} (similarity: ${(r.score * 100).toFixed(1)}%)
6353
+ \`\`\`
6354
+ ${truncateContent(r.content)}
6355
+ \`\`\``;
6356
+ });
6357
+ return `Found ${results.length} similar code blocks:
6358
+
6359
+ ${formatted.join("\n\n")}`;
6360
+ }
6361
+ });
6178
6362
  function formatIndexStats(stats, verbose = false) {
6179
6363
  const lines = [];
6180
6364
  if (stats.indexedChunks === 0 && stats.removedChunks === 0) {
@@ -6345,24 +6529,32 @@ var plugin = async ({ directory }) => {
6345
6529
  const config = parseConfig(rawConfig);
6346
6530
  initializeTools(projectRoot, config);
6347
6531
  const indexer = new Indexer(projectRoot, config);
6348
- if (config.indexing.autoIndex) {
6532
+ const isValidProject = !config.indexing.requireProjectMarker || hasProjectMarker(projectRoot);
6533
+ if (!isValidProject) {
6534
+ console.warn(
6535
+ `[codebase-index] Skipping file watching and auto-indexing: no project marker found in "${projectRoot}". Set "indexing.requireProjectMarker": false in config to override.`
6536
+ );
6537
+ }
6538
+ if (config.indexing.autoIndex && isValidProject) {
6349
6539
  indexer.initialize().then(() => {
6350
6540
  indexer.index().catch(() => {
6351
6541
  });
6352
6542
  }).catch(() => {
6353
6543
  });
6354
6544
  }
6355
- if (config.indexing.watchFiles) {
6545
+ if (config.indexing.watchFiles && isValidProject) {
6356
6546
  createWatcherWithIndexer(indexer, projectRoot, config);
6357
6547
  }
6358
6548
  return {
6359
6549
  tool: {
6360
6550
  codebase_search,
6551
+ codebase_peek,
6361
6552
  index_codebase,
6362
6553
  index_status,
6363
6554
  index_health_check,
6364
6555
  index_metrics,
6365
- index_logs
6556
+ index_logs,
6557
+ find_similar
6366
6558
  },
6367
6559
  async config(cfg) {
6368
6560
  cfg.command = cfg.command ?? {};