codebase-analyzer-mcp 2.0.4 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -7746,6 +7746,10 @@ async function surfaceAnalysis(repoPath, options = {}) {
7746
7746
  });
7747
7747
  const fileInfos = await gatherFileInfo(repoPath, files);
7748
7748
  const repositoryMap = buildRepositoryMap(repoPath, fileInfos, options.sourceName);
7749
+ const readmeContent = await readReadmeContent(repoPath, fileInfos);
7750
+ if (readmeContent) {
7751
+ repositoryMap.readme = readmeContent.slice(0, 5000);
7752
+ }
7749
7753
  const identifiedModules = identifyModules(fileInfos);
7750
7754
  const complexity = calculateComplexity(fileInfos, identifiedModules);
7751
7755
  const estimatedAnalysisTime = estimateAnalysisTimes(fileInfos, complexity);
@@ -7809,7 +7813,6 @@ function buildRepositoryMap(repoPath, files, sourceName) {
7809
7813
  const estimatedTokens = Math.ceil(totalSize / 4);
7810
7814
  const entryPoints = findEntryPoints(files);
7811
7815
  const structure = buildDirectoryTree(files);
7812
- const readme = extractReadme(repoPath, files);
7813
7816
  return {
7814
7817
  name,
7815
7818
  languages,
@@ -7817,8 +7820,7 @@ function buildRepositoryMap(repoPath, files, sourceName) {
7817
7820
  totalSize,
7818
7821
  estimatedTokens,
7819
7822
  entryPoints,
7820
- structure,
7821
- readme
7823
+ structure
7822
7824
  };
7823
7825
  }
7824
7826
  function findEntryPoints(files) {
@@ -7894,17 +7896,6 @@ function buildDirectoryTree(files) {
7894
7896
  };
7895
7897
  return collapseTree(root);
7896
7898
  }
7897
- function extractReadme(repoPath, files) {
7898
- const readmeFile = files.find((f) => f.relativePath.toLowerCase() === "readme.md" || f.relativePath.toLowerCase() === "readme");
7899
- if (readmeFile && readmeFile.size < 50000) {
7900
- try {
7901
- return `README found at ${readmeFile.relativePath}`;
7902
- } catch {
7903
- return;
7904
- }
7905
- }
7906
- return;
7907
- }
7908
7899
  function identifyModules(files) {
7909
7900
  const modules = new Map;
7910
7901
  for (const file of files) {
@@ -7977,6 +7968,18 @@ function estimateAnalysisTimes(files, complexity) {
7977
7968
  semantic: Math.round(semanticTime)
7978
7969
  };
7979
7970
  }
7971
+ async function readReadmeContent(repoPath, files) {
7972
+ const readmeFile = files.find((f) => f.relativePath.toLowerCase() === "readme.md" || f.relativePath.toLowerCase() === "readme");
7973
+ if (readmeFile && readmeFile.size < 50000) {
7974
+ try {
7975
+ const content = await readFile(join(repoPath, readmeFile.relativePath), "utf-8");
7976
+ return content;
7977
+ } catch {
7978
+ return;
7979
+ }
7980
+ }
7981
+ return;
7982
+ }
7980
7983
  var LANGUAGE_MAP, ENTRY_POINT_PATTERNS, MODULE_TYPE_PATTERNS;
7981
7984
  var init_surface = __esm(() => {
7982
7985
  init_esm6();
@@ -8081,6 +8084,24 @@ async function structuralAnalysis(module, files) {
8081
8084
  let totalClasses = 0;
8082
8085
  for (const file of files) {
8083
8086
  const ext2 = extname2(file.path).toLowerCase();
8087
+ if (ext2 === ".md" || ext2 === ".mdx") {
8088
+ const mdSymbols = analyzeMarkdownFile(file.path, file.content);
8089
+ symbols.push(...mdSymbols);
8090
+ const lines2 = file.content.split(`
8091
+ `);
8092
+ totalLoc += lines2.filter((l) => l.trim()).length;
8093
+ continue;
8094
+ }
8095
+ if (ext2 === ".sh" || ext2 === ".bash" || ext2 === ".zsh") {
8096
+ const shAnalysis = analyzeShellFile(file.path, file.content);
8097
+ symbols.push(...shAnalysis.symbols);
8098
+ imports.push(...shAnalysis.imports);
8099
+ const lines2 = file.content.split(`
8100
+ `);
8101
+ totalLoc += lines2.filter((l) => l.trim() && !l.trim().startsWith("#")).length;
8102
+ totalFunctions += shAnalysis.symbols.filter((s) => s.type === "function").length;
8103
+ continue;
8104
+ }
8084
8105
  const config = getLanguageConfig(ext2);
8085
8106
  if (!config) {
8086
8107
  continue;
@@ -8225,6 +8246,83 @@ async function analyzeFileWithRegex(filePath, content, config) {
8225
8246
  }
8226
8247
  return { symbols, imports, exports };
8227
8248
  }
8249
+ function analyzeMarkdownFile(filePath, content) {
8250
+ const symbols = [];
8251
+ const lines = content.split(`
8252
+ `);
8253
+ if (lines[0]?.trim() === "---") {
8254
+ for (let i = 1;i < lines.length; i++) {
8255
+ if (lines[i].trim() === "---")
8256
+ break;
8257
+ const keyMatch = lines[i].match(/^(\w[\w-]*):\s/);
8258
+ if (keyMatch) {
8259
+ symbols.push({
8260
+ name: `frontmatter:${keyMatch[1]}`,
8261
+ type: "variable",
8262
+ file: filePath,
8263
+ line: i + 1,
8264
+ exported: false
8265
+ });
8266
+ }
8267
+ }
8268
+ }
8269
+ for (let i = 0;i < lines.length; i++) {
8270
+ const headingMatch = lines[i].match(/^(#{1,2})\s+(.+)/);
8271
+ if (headingMatch) {
8272
+ const level = headingMatch[1].length;
8273
+ symbols.push({
8274
+ name: headingMatch[2].trim(),
8275
+ type: level === 1 ? "class" : "function",
8276
+ file: filePath,
8277
+ line: i + 1,
8278
+ exported: false
8279
+ });
8280
+ }
8281
+ }
8282
+ return symbols;
8283
+ }
8284
+ function analyzeShellFile(filePath, content) {
8285
+ const symbols = [];
8286
+ const imports = [];
8287
+ const lines = content.split(`
8288
+ `);
8289
+ for (let i = 0;i < lines.length; i++) {
8290
+ const line = lines[i];
8291
+ const funcMatch = line.match(/^(?:function\s+)?(\w+)\s*\(\)\s*\{/) || line.match(/^function\s+(\w+)\s*\{/);
8292
+ if (funcMatch) {
8293
+ symbols.push({
8294
+ name: funcMatch[1],
8295
+ type: "function",
8296
+ file: filePath,
8297
+ line: i + 1,
8298
+ exported: false
8299
+ });
8300
+ continue;
8301
+ }
8302
+ const constMatch = line.match(/^([A-Z][A-Z0-9_]+)=/);
8303
+ if (constMatch) {
8304
+ symbols.push({
8305
+ name: constMatch[1],
8306
+ type: "constant",
8307
+ file: filePath,
8308
+ line: i + 1,
8309
+ exported: false
8310
+ });
8311
+ continue;
8312
+ }
8313
+ const sourceMatch = line.match(/^(?:source|\.) +["']?([^"'\s]+)["']?/);
8314
+ if (sourceMatch) {
8315
+ imports.push({
8316
+ from: filePath,
8317
+ to: sourceMatch[1],
8318
+ importedNames: [],
8319
+ isDefault: false,
8320
+ isType: false
8321
+ });
8322
+ }
8323
+ }
8324
+ return { symbols, imports };
8325
+ }
8228
8326
  function getLineNumber(content, index) {
8229
8327
  return content.slice(0, index).split(`
8230
8328
  `).length;
@@ -43247,6 +43345,7 @@ class AnalysisCache {
43247
43345
  return null;
43248
43346
  }
43249
43347
  if (Date.now() > entry.expiresAt) {
43348
+ entry.value.cleanup?.().catch(() => {});
43250
43349
  this.cache.delete(key);
43251
43350
  return null;
43252
43351
  }
@@ -43256,6 +43355,7 @@ class AnalysisCache {
43256
43355
  for (const entry of this.cache.values()) {
43257
43356
  if (entry.value.result.analysisId === analysisId) {
43258
43357
  if (Date.now() > entry.expiresAt) {
43358
+ entry.value.cleanup?.().catch(() => {});
43259
43359
  this.cache.delete(entry.key);
43260
43360
  return null;
43261
43361
  }
@@ -43278,6 +43378,8 @@ class AnalysisCache {
43278
43378
  }
43279
43379
  invalidate(source, commitHash) {
43280
43380
  const key = this.generateKey(source, commitHash);
43381
+ const entry = this.cache.get(key);
43382
+ entry?.value.cleanup?.().catch(() => {});
43281
43383
  return this.cache.delete(key);
43282
43384
  }
43283
43385
  clearExpired() {
@@ -43285,6 +43387,7 @@ class AnalysisCache {
43285
43387
  let cleared = 0;
43286
43388
  for (const [key, entry] of this.cache.entries()) {
43287
43389
  if (now > entry.expiresAt) {
43390
+ entry.value.cleanup?.().catch(() => {});
43288
43391
  this.cache.delete(key);
43289
43392
  cleared++;
43290
43393
  }
@@ -43292,6 +43395,9 @@ class AnalysisCache {
43292
43395
  return cleared;
43293
43396
  }
43294
43397
  clear() {
43398
+ for (const entry of this.cache.values()) {
43399
+ entry.value.cleanup?.().catch(() => {});
43400
+ }
43295
43401
  this.cache.clear();
43296
43402
  }
43297
43403
  stats() {
@@ -43318,6 +43424,8 @@ class AnalysisCache {
43318
43424
  }
43319
43425
  }
43320
43426
  if (oldestKey) {
43427
+ const entry = this.cache.get(oldestKey);
43428
+ entry?.value.cleanup?.().catch(() => {});
43321
43429
  this.cache.delete(oldestKey);
43322
43430
  }
43323
43431
  }
@@ -43326,6 +43434,9 @@ var DEFAULT_TTL_MS, MAX_CACHE_ENTRIES = 50, analysisCache;
43326
43434
  var init_cache = __esm(() => {
43327
43435
  DEFAULT_TTL_MS = 60 * 60 * 1000;
43328
43436
  analysisCache = new AnalysisCache;
43437
+ process.on("beforeExit", () => {
43438
+ analysisCache.clear();
43439
+ });
43329
43440
  });
43330
43441
 
43331
43442
  // src/core/disclosure.ts
@@ -43347,7 +43458,7 @@ function estimateTokens(obj) {
43347
43458
  function buildAnalysisResult(analysisId, source, depth, surface, structural, semantic, durationMs) {
43348
43459
  const summary = buildSummary(surface, structural, semantic);
43349
43460
  const sections = buildExpandableSections(surface, structural, semantic);
43350
- const forAgent = buildAgentDigest(surface, summary, sections);
43461
+ const forAgent = buildAgentDigest(analysisId, surface, summary, sections);
43351
43462
  const tokenCost = estimateTokens({
43352
43463
  repositoryMap: surface.repositoryMap,
43353
43464
  summary,
@@ -43404,24 +43515,34 @@ function buildExpandableSections(surface, structural, semantic) {
43404
43515
  const sections = [];
43405
43516
  for (const module of surface.identifiedModules.slice(0, 10)) {
43406
43517
  const structuralData = structural.find((s2) => s2.modulePath === module.path);
43518
+ const isDocModule = module.primaryLanguage === "Markdown" || module.primaryLanguage === "MDX";
43407
43519
  const section = {
43408
43520
  id: `module_${module.path.replace(/[^a-zA-Z0-9]/g, "_")}`,
43409
43521
  title: `Module: ${module.name}`,
43410
43522
  type: "module",
43411
- summary: `${module.type} module with ${module.fileCount} files in ${module.primaryLanguage}`,
43412
- canExpand: !!structuralData,
43523
+ summary: isDocModule ? `Documentation module with ${module.fileCount} files` : `${module.type} module with ${module.fileCount} files in ${module.primaryLanguage}`,
43524
+ canExpand: !!(structuralData || isDocModule),
43413
43525
  expansionCost: {
43414
43526
  detail: structuralData ? estimateTokens(structuralData.symbols.slice(0, 20)) : 0,
43415
43527
  full: structuralData ? estimateTokens(structuralData) : 0
43416
43528
  }
43417
43529
  };
43418
43530
  if (structuralData) {
43419
- section.detail = {
43420
- exports: structuralData.exports,
43421
- complexity: structuralData.complexity,
43422
- symbolCount: structuralData.symbols.length,
43423
- importCount: structuralData.imports.length
43424
- };
43531
+ if (isDocModule) {
43532
+ const headings = structuralData.symbols.filter((s2) => s2.type === "class" || s2.type === "function").slice(0, 20).map((s2) => ({ title: s2.name, file: s2.file, line: s2.line }));
43533
+ section.detail = {
43534
+ type: "documentation",
43535
+ headings,
43536
+ fileCount: module.fileCount
43537
+ };
43538
+ } else {
43539
+ section.detail = {
43540
+ exports: structuralData.exports,
43541
+ complexity: structuralData.complexity,
43542
+ symbolCount: structuralData.symbols.length,
43543
+ importCount: structuralData.imports.length
43544
+ };
43545
+ }
43425
43546
  }
43426
43547
  sections.push(section);
43427
43548
  }
@@ -43485,7 +43606,7 @@ function buildExpandableSections(surface, structural, semantic) {
43485
43606
  }
43486
43607
  return sections;
43487
43608
  }
43488
- function buildAgentDigest(surface, summary, sections) {
43609
+ function buildAgentDigest(analysisId, surface, summary, sections) {
43489
43610
  const { repositoryMap, complexity } = surface;
43490
43611
  const quickSummary = `${repositoryMap.name} is a ${summary.complexity} complexity ${summary.architectureType} codebase with ${repositoryMap.fileCount} files primarily in ${repositoryMap.languages[0]?.language || "mixed languages"}. ${summary.primaryPatterns.length > 0 ? `Key patterns include ${summary.primaryPatterns.slice(0, 3).join(", ")}.` : ""}`;
43491
43612
  const keyInsights = [];
@@ -43515,6 +43636,7 @@ function buildAgentDigest(surface, summary, sections) {
43515
43636
  suggestedNextSteps.push(`Focus on core modules: ${coreModules.map((m2) => m2.name).join(", ")}`);
43516
43637
  }
43517
43638
  }
43639
+ suggestedNextSteps.push(`Use read_files with analysisId "${analysisId}" to read specific files from the repository`);
43518
43640
  return {
43519
43641
  quickSummary,
43520
43642
  keyInsights,
@@ -43804,6 +43926,14 @@ async function orchestrateAnalysis(repoPath, options = {}) {
43804
43926
  logger.orchestrator("Surface-only mode, skipping deeper analysis");
43805
43927
  const result2 = buildAnalysisResult(analysisId, repoPath, depth, surface, [], null, Date.now() - startTime);
43806
43928
  result2.warnings = warnings.length > 0 ? warnings : undefined;
43929
+ analysisCache.set(repoPath, {
43930
+ result: result2,
43931
+ surface,
43932
+ structural: [],
43933
+ semantic: null,
43934
+ repoPath,
43935
+ cleanup: options.cleanup
43936
+ }, undefined, depth);
43807
43937
  return result2;
43808
43938
  }
43809
43939
  logger.progress("structural", "Phase 2: Starting structural analysis");
@@ -43890,7 +44020,9 @@ async function orchestrateAnalysis(repoPath, options = {}) {
43890
44020
  result,
43891
44021
  surface,
43892
44022
  structural,
43893
- semantic
44023
+ semantic,
44024
+ repoPath,
44025
+ cleanup: options.cleanup
43894
44026
  }, undefined, depth);
43895
44027
  state.phase = "complete";
43896
44028
  logger.orchestrator(`Analysis complete`, {
@@ -44076,10 +44208,11 @@ var package_default;
44076
44208
  var init_package = __esm(() => {
44077
44209
  package_default = {
44078
44210
  name: "codebase-analyzer-mcp",
44079
- version: "2.0.4",
44211
+ version: "2.1.0",
44080
44212
  description: "Multi-layer codebase analysis with Gemini AI. MCP server + Claude plugin with progressive disclosure.",
44081
44213
  type: "module",
44082
44214
  main: "dist/mcp/server.js",
44215
+ packageManager: "bun@1.3.8",
44083
44216
  bin: {
44084
44217
  cba: "dist/cli/index.js",
44085
44218
  "codebase-analyzer": "dist/cli/index.js"
@@ -44095,19 +44228,15 @@ var init_package = __esm(() => {
44095
44228
  "AGENTS.md"
44096
44229
  ],
44097
44230
  scripts: {
44098
- build: "bun run build:js",
44099
- "build:js": `bun build src/mcp/server.ts --outfile dist/mcp/server.js --target node && bun build src/cli/index.ts --outfile dist/cli/index.js --target node && node -e "const fs=require('fs');const c=fs.readFileSync('dist/cli/index.js','utf8');fs.writeFileSync('dist/cli/index.js','#!/usr/bin/env node\\n'+c)"`,
44231
+ build: "bun scripts/build.ts",
44100
44232
  dev: "bun --watch src/cli/index.ts",
44101
44233
  start: "bun dist/mcp/server.js",
44102
44234
  typecheck: "tsc --noEmit",
44103
44235
  test: "bun test",
44104
44236
  cli: "bun src/cli/index.ts",
44105
- cba: "bun src/cli/index.ts",
44106
- "version:sync": "bun scripts/sync-version.ts",
44107
- release: "npm version patch && bun run version:sync",
44108
- "release:minor": "npm version minor && bun run version:sync",
44109
- "release:major": "npm version major && bun run version:sync",
44110
- prepublishOnly: "bun run version:sync && bun run build:js"
44237
+ version: "bun scripts/sync-version.ts && git add .",
44238
+ postversion: "git push --follow-tags",
44239
+ prepublishOnly: "bun run build"
44111
44240
  },
44112
44241
  repository: {
44113
44242
  type: "git",
@@ -58242,6 +58371,207 @@ var init_dataflow = __esm(() => {
58242
58371
  };
58243
58372
  });
58244
58373
 
58374
+ // src/mcp/tools/query.ts
58375
+ var exports_query = {};
58376
+ __export(exports_query, {
58377
+ queryRepoSchema: () => queryRepoSchema,
58378
+ executeQueryRepo: () => executeQueryRepo
58379
+ });
58380
+ import { basename as basename4, join as join6 } from "path";
58381
+ import { readFile as readFile6 } from "fs/promises";
58382
+ function extractSourceName(source) {
58383
+ const githubMatch = source.match(/github\.com\/([^\/]+\/[^\/]+)/);
58384
+ if (githubMatch) {
58385
+ return githubMatch[1].replace(/\.git$/, "");
58386
+ }
58387
+ return basename4(source) || source;
58388
+ }
58389
+ function scoreFileRelevance(filePath, symbols, question) {
58390
+ const q = question.toLowerCase();
58391
+ const words = q.split(/\s+/).filter((w) => w.length > 2);
58392
+ let score = 0;
58393
+ const pathLower = filePath.toLowerCase();
58394
+ for (const word of words) {
58395
+ if (pathLower.includes(word))
58396
+ score += 3;
58397
+ }
58398
+ for (const sym of symbols) {
58399
+ const symLower = sym.toLowerCase();
58400
+ for (const word of words) {
58401
+ if (symLower.includes(word))
58402
+ score += 2;
58403
+ }
58404
+ }
58405
+ return score;
58406
+ }
58407
+ async function executeQueryRepo(input) {
58408
+ const { source, question } = input;
58409
+ const sourceName = extractSourceName(source);
58410
+ const { repoPath, cleanup } = await resolveSource(source);
58411
+ try {
58412
+ let cached2 = analysisCache.get(repoPath);
58413
+ let analysisId;
58414
+ if (cached2) {
58415
+ analysisId = cached2.result.analysisId;
58416
+ } else {
58417
+ const result = await orchestrateAnalysis(repoPath, {
58418
+ depth: "standard",
58419
+ sourceName,
58420
+ cleanup
58421
+ });
58422
+ analysisId = result.analysisId;
58423
+ cached2 = analysisCache.get(repoPath);
58424
+ if (!cached2) {
58425
+ throw new Error("Analysis completed but cache lookup failed");
58426
+ }
58427
+ }
58428
+ const fileSymbols = new Map;
58429
+ for (const mod of cached2.structural) {
58430
+ for (const sym of mod.symbols) {
58431
+ if (sym.file && sym.name) {
58432
+ const existing = fileSymbols.get(sym.file) || [];
58433
+ existing.push(sym.name);
58434
+ fileSymbols.set(sym.file, existing);
58435
+ }
58436
+ }
58437
+ if (mod.exports.length > 0) {
58438
+ const existing = fileSymbols.get(mod.modulePath) || [];
58439
+ existing.push(...mod.exports);
58440
+ fileSymbols.set(mod.modulePath, existing);
58441
+ }
58442
+ }
58443
+ const collectFiles = (node, prefix) => {
58444
+ const path3 = prefix ? `${prefix}/${node.name}` : node.name;
58445
+ if (node.type === "file") {
58446
+ if (!fileSymbols.has(path3)) {
58447
+ fileSymbols.set(path3, []);
58448
+ }
58449
+ } else if (node.children) {
58450
+ for (const child of node.children) {
58451
+ collectFiles(child, path3);
58452
+ }
58453
+ }
58454
+ };
58455
+ if (cached2.surface.repositoryMap.structure?.children) {
58456
+ for (const child of cached2.surface.repositoryMap.structure.children) {
58457
+ collectFiles(child, "");
58458
+ }
58459
+ }
58460
+ const scored = Array.from(fileSymbols.entries()).map(([path3, symbols]) => ({
58461
+ path: path3,
58462
+ symbols,
58463
+ score: scoreFileRelevance(path3, symbols, question)
58464
+ })).filter((f3) => f3.score > 0).sort((a, b) => b.score - a.score).slice(0, 15);
58465
+ const filesToRead = scored.length > 0 ? scored.map((f3) => f3.path) : (cached2.surface.repositoryMap.entryPoints || []).slice(0, 10);
58466
+ const fileContents = new Map;
58467
+ let totalChars = 0;
58468
+ const MAX_TOTAL_CHARS = 1e5;
58469
+ const MAX_PER_FILE = 4000;
58470
+ for (const filePath of filesToRead) {
58471
+ if (totalChars >= MAX_TOTAL_CHARS)
58472
+ break;
58473
+ const fullPath = join6(repoPath, filePath);
58474
+ try {
58475
+ const content = await readFile6(fullPath, "utf-8");
58476
+ const truncated = content.length > MAX_PER_FILE ? content.slice(0, MAX_PER_FILE) + `
58477
+ ... [truncated]` : content;
58478
+ fileContents.set(filePath, truncated);
58479
+ totalChars += truncated.length;
58480
+ } catch {}
58481
+ }
58482
+ try {
58483
+ return await queryWithGemini(question, analysisId, cached2, fileContents);
58484
+ } catch {
58485
+ return buildFallbackAnswer(question, analysisId, cached2, scored, fileContents);
58486
+ }
58487
+ } catch (error48) {
58488
+ if (cleanup) {
58489
+ await cleanup();
58490
+ }
58491
+ throw error48;
58492
+ }
58493
+ }
58494
+ async function queryWithGemini(question, analysisId, cached2, fileContents) {
58495
+ const surface = cached2.surface;
58496
+ const fileSummary = Array.from(fileContents.entries()).map(([path3, content]) => `--- ${path3} ---
58497
+ ${content}`).join(`
58498
+
58499
+ `);
58500
+ const structuralSummary = cached2.structural.map((mod) => {
58501
+ const exports = mod.exports.slice(0, 10).join(", ");
58502
+ const funcs = mod.complexity.functionCount;
58503
+ const classes = mod.complexity.classCount;
58504
+ return `- ${mod.modulePath}: ${funcs} functions, ${classes} classes. Exports: ${exports || "none"}`;
58505
+ }).join(`
58506
+ `);
58507
+ const prompt = `Answer this question about a codebase:
58508
+
58509
+ QUESTION: ${question}
58510
+
58511
+ Repository: ${surface.repositoryMap.name}
58512
+ Languages: ${surface.repositoryMap.languages.map((l) => l.language).join(", ")}
58513
+ Entry points: ${surface.repositoryMap.entryPoints.slice(0, 10).join(", ")}
58514
+ Modules: ${surface.identifiedModules.map((m2) => m2.name).join(", ")}
58515
+
58516
+ Structural overview:
58517
+ ${structuralSummary}
58518
+
58519
+ Relevant file contents:
58520
+ ${fileSummary}
58521
+
58522
+ Respond with this exact JSON structure:
58523
+ {
58524
+ "answer": "Clear, detailed answer to the question based on the code",
58525
+ "relevantFiles": [
58526
+ {"path": "relative/path.ts", "reason": "Why this file is relevant"}
58527
+ ],
58528
+ "confidence": "high" | "medium" | "low",
58529
+ "suggestedFollowUps": ["Follow-up question 1", "Follow-up question 2"]
58530
+ }
58531
+
58532
+ Guidelines:
58533
+ - Reference specific files and code when possible
58534
+ - If the code doesn't clearly answer the question, say so and set confidence to "low"
58535
+ - Suggest 2-3 follow-up questions that would help understand more
58536
+ - Keep relevantFiles to the most important 5-8 files`;
58537
+ const result = await generateJsonWithGemini(prompt, {
58538
+ maxOutputTokens: 4096
58539
+ });
58540
+ return { ...result, analysisId };
58541
+ }
58542
+ function buildFallbackAnswer(question, analysisId, cached2, scored, fileContents) {
58543
+ const surface = cached2.surface;
58544
+ const topFiles = scored.slice(0, 8);
58545
+ const relevantFiles = topFiles.map((f3) => ({
58546
+ path: f3.path,
58547
+ reason: f3.symbols.length > 0 ? `Contains relevant symbols: ${f3.symbols.slice(0, 5).join(", ")}` : `File path matches question keywords`
58548
+ }));
58549
+ const answer = topFiles.length > 0 ? `Based on keyword matching against the codebase structure, the most relevant files for "${question}" are listed below. ` + `The repository is a ${surface.repositoryMap.languages[0]?.language || "unknown"} project with ${surface.repositoryMap.fileCount} files. ` + `For a more detailed answer, ensure GEMINI_API_KEY is set. ` + `Use read_files with analysisId "${analysisId}" to examine the relevant files.` : `Could not find files matching "${question}" through keyword search. ` + `The repository contains ${surface.repositoryMap.fileCount} files primarily in ${surface.repositoryMap.languages[0]?.language || "unknown"}. ` + `Try rephrasing the question or use read_files with analysisId "${analysisId}" to explore specific files. ` + `For AI-powered answers, set GEMINI_API_KEY.`;
58550
+ return {
58551
+ answer,
58552
+ relevantFiles,
58553
+ confidence: topFiles.length > 3 ? "medium" : "low",
58554
+ analysisId,
58555
+ suggestedFollowUps: [
58556
+ `Use read_files to examine: ${topFiles.slice(0, 3).map((f3) => f3.path).join(", ")}`,
58557
+ `Use expand_section to drill into specific modules`,
58558
+ `Use trace_dataflow to follow data through the system`
58559
+ ]
58560
+ };
58561
+ }
58562
+ var queryRepoSchema;
58563
+ var init_query = __esm(() => {
58564
+ init_zod();
58565
+ init_repo_loader();
58566
+ init_orchestrator();
58567
+ init_cache();
58568
+ init_gemini();
58569
+ queryRepoSchema = {
58570
+ source: exports_external.string().describe("Local path or GitHub URL to the repository"),
58571
+ question: exports_external.string().describe("Question about the codebase (e.g. 'how is authentication handled?')")
58572
+ };
58573
+ });
58574
+
58245
58575
  // src/mcp/tools/capabilities.ts
58246
58576
  var exports_capabilities = {};
58247
58577
  __export(exports_capabilities, {
@@ -58275,19 +58605,14 @@ function getCapabilities() {
58275
58605
  parameters: ["source", "from", "to"]
58276
58606
  },
58277
58607
  {
58278
- name: "extract_feature",
58279
- description: "Analyze how a specific feature is implemented",
58280
- parameters: ["source", "feature"]
58608
+ name: "read_files",
58609
+ description: "Read specific files from a previously analyzed repository",
58610
+ parameters: ["analysisId", "paths", "maxLines"]
58281
58611
  },
58282
58612
  {
58283
58613
  name: "query_repo",
58284
- description: "Ask questions about the codebase",
58614
+ description: "Ask a question about a codebase and get an AI-powered answer with relevant files",
58285
58615
  parameters: ["source", "question"]
58286
- },
58287
- {
58288
- name: "compare_repos",
58289
- description: "Compare how repositories approach the same problem",
58290
- parameters: ["sources", "aspect"]
58291
58616
  }
58292
58617
  ],
58293
58618
  models: {
@@ -73306,13 +73631,13 @@ var init_stdio2 = __esm(() => {
73306
73631
  });
73307
73632
 
73308
73633
  // src/mcp/tools/analyze.ts
73309
- import { basename as basename4 } from "path";
73310
- function extractSourceName(source) {
73634
+ import { basename as basename5 } from "path";
73635
+ function extractSourceName2(source) {
73311
73636
  const githubMatch = source.match(/github\.com\/([^\/]+\/[^\/]+)/);
73312
73637
  if (githubMatch) {
73313
73638
  return githubMatch[1].replace(/\.git$/, "");
73314
73639
  }
73315
- return basename4(source) || source;
73640
+ return basename5(source) || source;
73316
73641
  }
73317
73642
  async function executeAnalyzeRepo(input) {
73318
73643
  const {
@@ -73323,7 +73648,7 @@ async function executeAnalyzeRepo(input) {
73323
73648
  tokenBudget,
73324
73649
  includeSemantics = false
73325
73650
  } = input;
73326
- const sourceName = extractSourceName(source);
73651
+ const sourceName = extractSourceName2(source);
73327
73652
  const { repoPath, cleanup } = await resolveSource(source);
73328
73653
  try {
73329
73654
  const result = await orchestrateAnalysis(repoPath, {
@@ -73332,13 +73657,15 @@ async function executeAnalyzeRepo(input) {
73332
73657
  exclude,
73333
73658
  tokenBudget,
73334
73659
  includeSemantics,
73335
- sourceName
73660
+ sourceName,
73661
+ cleanup
73336
73662
  });
73337
73663
  return result;
73338
- } finally {
73664
+ } catch (error48) {
73339
73665
  if (cleanup) {
73340
73666
  await cleanup();
73341
73667
  }
73668
+ throw error48;
73342
73669
  }
73343
73670
  }
73344
73671
  var analyzeRepoSchema;
@@ -73384,6 +73711,74 @@ var init_expand = __esm(() => {
73384
73711
  };
73385
73712
  });
73386
73713
 
73714
+ // src/mcp/tools/read-files.ts
73715
+ import { readFile as readFile7, stat as stat5 } from "fs/promises";
73716
+ import { join as join7, resolve, normalize as normalize2 } from "path";
73717
+ async function executeReadFiles(input) {
73718
+ const { analysisId, paths, maxLines = 500 } = input;
73719
+ const cached2 = analysisCache.getByAnalysisId(analysisId);
73720
+ if (!cached2) {
73721
+ return {
73722
+ error: `Analysis ${analysisId} not found in cache. It may have expired. Run analyze_repo again.`
73723
+ };
73724
+ }
73725
+ const repoPath = cached2.repoPath;
73726
+ if (!repoPath) {
73727
+ return {
73728
+ error: `No repository path stored for analysis ${analysisId}. This analysis predates the read_files feature.`
73729
+ };
73730
+ }
73731
+ try {
73732
+ await stat5(repoPath);
73733
+ } catch {
73734
+ return {
73735
+ error: `Repository at ${repoPath} is no longer available. Run analyze_repo again.`
73736
+ };
73737
+ }
73738
+ const resolvedRepoPath = resolve(repoPath);
73739
+ const effectiveMaxLines = Math.min(maxLines, 2000);
73740
+ const files = await Promise.all(paths.slice(0, 20).map(async (filePath) => {
73741
+ const normalized = normalize2(filePath);
73742
+ if (normalized.startsWith("..") || normalized.startsWith("/")) {
73743
+ return { path: filePath, error: "Invalid path: must be relative and within the repository" };
73744
+ }
73745
+ const fullPath = resolve(join7(resolvedRepoPath, normalized));
73746
+ if (!fullPath.startsWith(resolvedRepoPath)) {
73747
+ return { path: filePath, error: "Invalid path: traversal outside repository" };
73748
+ }
73749
+ try {
73750
+ const content = await readFile7(fullPath, "utf-8");
73751
+ const lines = content.split(`
73752
+ `);
73753
+ const truncated = lines.length > effectiveMaxLines;
73754
+ const outputContent = truncated ? lines.slice(0, effectiveMaxLines).join(`
73755
+ `) : content;
73756
+ return {
73757
+ path: filePath,
73758
+ content: outputContent,
73759
+ lineCount: lines.length,
73760
+ truncated
73761
+ };
73762
+ } catch {
73763
+ return { path: filePath, error: "File not found or not readable" };
73764
+ }
73765
+ }));
73766
+ return {
73767
+ analysisId,
73768
+ files
73769
+ };
73770
+ }
73771
+ var readFilesSchema;
73772
+ var init_read_files = __esm(() => {
73773
+ init_zod();
73774
+ init_cache();
73775
+ readFilesSchema = {
73776
+ analysisId: exports_external.string().describe("The analysisId from a previous analyze_repo result"),
73777
+ paths: exports_external.array(exports_external.string()).min(1).max(20).describe("Relative file paths from the repository (max 20)"),
73778
+ maxLines: exports_external.number().min(1).max(2000).default(500).optional().describe("Maximum lines per file (default 500, max 2000)")
73779
+ };
73780
+ });
73781
+
73387
73782
  // src/mcp/tools/index.ts
73388
73783
  var init_tools = __esm(() => {
73389
73784
  init_capabilities();
@@ -73391,6 +73786,8 @@ var init_tools = __esm(() => {
73391
73786
  init_expand();
73392
73787
  init_patterns();
73393
73788
  init_dataflow();
73789
+ init_read_files();
73790
+ init_query();
73394
73791
  });
73395
73792
 
73396
73793
  // src/mcp/server.ts
@@ -73566,6 +73963,68 @@ var init_server2 = __esm(() => {
73566
73963
  };
73567
73964
  }
73568
73965
  });
73966
+ server.tool("read_files", "Read specific files from a previously analyzed repository. Use the analysisId from analyze_repo to access files without re-cloning.", {
73967
+ analysisId: readFilesSchema.analysisId,
73968
+ paths: readFilesSchema.paths,
73969
+ maxLines: readFilesSchema.maxLines
73970
+ }, async ({ analysisId, paths, maxLines }) => {
73971
+ try {
73972
+ const result = await executeReadFiles({
73973
+ analysisId,
73974
+ paths,
73975
+ maxLines
73976
+ });
73977
+ return {
73978
+ content: [
73979
+ {
73980
+ type: "text",
73981
+ text: JSON.stringify(result, null, 2)
73982
+ }
73983
+ ]
73984
+ };
73985
+ } catch (error48) {
73986
+ const message = error48 instanceof Error ? error48.message : String(error48);
73987
+ return {
73988
+ content: [
73989
+ {
73990
+ type: "text",
73991
+ text: `Error reading files: ${message}`
73992
+ }
73993
+ ],
73994
+ isError: true
73995
+ };
73996
+ }
73997
+ });
73998
+ server.tool("query_repo", "Ask a question about a codebase and get an AI-powered answer with relevant file references. Uses cached analysis when available. Works best with GEMINI_API_KEY set, falls back to keyword matching without it.", {
73999
+ source: queryRepoSchema.source,
74000
+ question: queryRepoSchema.question
74001
+ }, async ({ source, question }) => {
74002
+ try {
74003
+ const result = await executeQueryRepo({
74004
+ source,
74005
+ question
74006
+ });
74007
+ return {
74008
+ content: [
74009
+ {
74010
+ type: "text",
74011
+ text: JSON.stringify(result, null, 2)
74012
+ }
74013
+ ]
74014
+ };
74015
+ } catch (error48) {
74016
+ const message = error48 instanceof Error ? error48.message : String(error48);
74017
+ return {
74018
+ content: [
74019
+ {
74020
+ type: "text",
74021
+ text: `Error querying repository: ${message}`
74022
+ }
74023
+ ],
74024
+ isError: true
74025
+ };
74026
+ }
74027
+ });
73569
74028
  main().catch((error48) => {
73570
74029
  console.error("Fatal error:", error48);
73571
74030
  process.exit(1);
@@ -73593,13 +74052,13 @@ init_orchestrator();
73593
74052
  init_repo_loader();
73594
74053
  init_logger();
73595
74054
  init_package();
73596
- import { basename as basename5 } from "path";
73597
- function extractSourceName2(source) {
74055
+ import { basename as basename6 } from "path";
74056
+ function extractSourceName3(source) {
73598
74057
  const githubMatch = source.match(/github\.com\/([^\/]+\/[^\/]+)/);
73599
74058
  if (githubMatch) {
73600
74059
  return githubMatch[1].replace(/\.git$/, "");
73601
74060
  }
73602
- return basename5(source) || source;
74061
+ return basename6(source) || source;
73603
74062
  }
73604
74063
  var program2 = new Command;
73605
74064
  program2.name("cba").description("Codebase Analyzer - Multi-layer repository analysis with Gemini AI").version(package_default.version);
@@ -73609,7 +74068,7 @@ program2.command("analyze").description("Perform architectural analysis of a rep
73609
74068
  logger.setVerbose(true);
73610
74069
  if (options.quiet)
73611
74070
  logger.setQuiet(true);
73612
- const sourceName = extractSourceName2(source);
74071
+ const sourceName = extractSourceName3(source);
73613
74072
  const { repoPath, cleanup } = await resolveSource(source);
73614
74073
  try {
73615
74074
  const result = await orchestrateAnalysis(repoPath, {
@@ -73673,6 +74132,23 @@ program2.command("dataflow").description("Trace data flow from an entry point").
73673
74132
  process.exit(1);
73674
74133
  }
73675
74134
  });
74135
+ program2.command("query").description("Ask a question about a codebase").argument("<source>", "Local path or GitHub URL").argument("<question>", "Question about the codebase").option("-v, --verbose", "Show detailed progress").option("-q, --quiet", "Only output the final result").action(async (source, question, options) => {
74136
+ try {
74137
+ if (options.verbose)
74138
+ logger.setVerbose(true);
74139
+ if (options.quiet)
74140
+ logger.setQuiet(true);
74141
+ const { executeQueryRepo: executeQueryRepo2 } = await Promise.resolve().then(() => (init_query(), exports_query));
74142
+ const result = await executeQueryRepo2({
74143
+ source,
74144
+ question
74145
+ });
74146
+ console.log(JSON.stringify(result, null, 2));
74147
+ } catch (error48) {
74148
+ logger.error("cli", error48 instanceof Error ? error48.message : String(error48));
74149
+ process.exit(1);
74150
+ }
74151
+ });
73676
74152
  program2.command("capabilities").description("Show available analysis capabilities").action(async () => {
73677
74153
  const { formatCapabilitiesResponse: formatCapabilitiesResponse2 } = await Promise.resolve().then(() => (init_capabilities(), exports_capabilities));
73678
74154
  console.log(JSON.stringify(formatCapabilitiesResponse2(), null, 2));
@@ -73745,7 +74221,7 @@ function formatAnalysisAsMarkdown(result) {
73745
74221
  return lines.join(`
73746
74222
  `);
73747
74223
  }
73748
- var cliCommands = ["analyze", "patterns", "dataflow", "capabilities", "help", "-h", "--help", "-V", "--version"];
74224
+ var cliCommands = ["analyze", "patterns", "dataflow", "query", "capabilities", "help", "-h", "--help", "-V", "--version"];
73749
74225
  var firstArg = process.argv[2];
73750
74226
  var isCliMode = firstArg && cliCommands.some((cmd) => firstArg === cmd || firstArg.startsWith("-"));
73751
74227
  if (isCliMode) {