codeseeker 1.8.1 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -67,8 +67,8 @@ const indexing_service_1 = require("./indexing-service");
67
67
  const coding_standards_generator_1 = require("../cli/services/analysis/coding-standards-generator");
68
68
  const language_support_service_1 = require("../cli/services/project/language-support-service");
69
69
  const query_cache_service_1 = require("./query-cache-service");
70
- const duplicate_code_detector_1 = require("../cli/services/analysis/deduplication/duplicate-code-detector");
71
- const knowledge_graph_1 = require("../cli/knowledge/graph/knowledge-graph");
70
+ // DuplicateCodeDetector no longer used - find_duplicates now uses indexed embeddings directly
71
+ // SemanticKnowledgeGraph no longer used - find_dead_code now uses indexed graph from storage manager
72
72
  // Version from package.json
73
73
  const VERSION = '2.0.0';
74
74
  /**
@@ -2022,6 +2022,18 @@ class CodeSeekerMcpServer {
2022
2022
  generateProjectId(projectPath) {
2023
2023
  return crypto.createHash('md5').update(projectPath).digest('hex');
2024
2024
  }
2025
+ /**
2026
+ * Get all documents for a project from vector store
2027
+ * Used by find_duplicates to analyze indexed embeddings
2028
+ */
2029
+ async getAllProjectDocuments(vectorStore, projectId) {
2030
+ // Use a random embedding to get diverse results from vector search
2031
+ // This leverages the existing searchByVector which returns all docs sorted by similarity
2032
+ const randomEmbedding = Array.from({ length: 384 }, () => Math.random() - 0.5);
2033
+ // Get a large sample of documents (up to 10000)
2034
+ const results = await vectorStore.searchByVector(randomEmbedding, projectId, 10000);
2035
+ return results.map(r => r.document);
2036
+ }
2025
2037
  /**
2026
2038
  * Generate actionable error message based on error type
2027
2039
  */
@@ -2098,62 +2110,142 @@ class CodeSeekerMcpServer {
2098
2110
  include_types: zod_1.z.array(zod_1.z.enum(['function', 'class', 'method', 'block'])).optional()
2099
2111
  .describe('Types of code to analyze. Default: all types'),
2100
2112
  },
2101
- }, async ({ project, similarity_threshold = 0.80, min_lines = 5, include_types }) => {
2113
+ }, async ({ project, similarity_threshold = 0.80, min_lines = 5, include_types: _include_types }) => {
2102
2114
  try {
2103
2115
  const storageManager = await (0, storage_1.getStorageManager)();
2104
2116
  const projectStore = storageManager.getProjectStore();
2117
+ const vectorStore = storageManager.getVectorStore();
2105
2118
  const projects = await projectStore.list();
2106
2119
  // Find the project
2107
2120
  const projectRecord = projects.find(p => p.name === project ||
2108
2121
  p.path === project ||
2109
2122
  path.basename(p.path) === project ||
2110
2123
  path.resolve(project) === p.path);
2111
- const projectPath = projectRecord?.path || path.resolve(project);
2112
- // Verify project exists
2113
- if (!fs.existsSync(projectPath)) {
2124
+ if (!projectRecord) {
2114
2125
  return {
2115
2126
  content: [{
2116
2127
  type: 'text',
2117
- text: `Project path not found: ${projectPath}`,
2128
+ text: `Project not found or not indexed: ${project}\n\n` +
2129
+ `Use index({path: "${project}"}) to index the project first.`,
2118
2130
  }],
2119
2131
  isError: true,
2120
2132
  };
2121
2133
  }
2122
- // Run duplicate detection
2123
- const detector = new duplicate_code_detector_1.DuplicateCodeDetector();
2124
- const report = await detector.analyzeProject(projectPath, {
2125
- semanticSimilarityThreshold: similarity_threshold,
2126
- minimumChunkSize: min_lines,
2127
- includeTypes: include_types || ['function', 'class', 'method', 'block'],
2134
+ // Use indexed embeddings from vector store for duplicate detection
2135
+ // This leverages the existing indexed data instead of re-analyzing from scratch
2136
+ // Get all documents for this project from the vector store
2137
+ // We'll use the vector search to find similar chunks efficiently
2138
+ const allDocs = await this.getAllProjectDocuments(vectorStore, projectRecord.id);
2139
+ if (allDocs.length === 0) {
2140
+ return {
2141
+ content: [{
2142
+ type: 'text',
2143
+ text: JSON.stringify({
2144
+ project: projectRecord.name,
2145
+ summary: {
2146
+ total_chunks_analyzed: 0,
2147
+ exact_duplicates: 0,
2148
+ semantic_duplicates: 0,
2149
+ structural_duplicates: 0,
2150
+ total_lines_affected: 0,
2151
+ potential_lines_saved: 0,
2152
+ },
2153
+ duplicate_groups: [],
2154
+ recommendations: ['No indexed chunks found. Run index() first.'],
2155
+ }, null, 2),
2156
+ }],
2157
+ };
2158
+ }
2159
+ // Filter by min_lines if metadata contains line info
2160
+ const filteredDocs = allDocs.filter(doc => {
2161
+ const lineCount = doc.content.split('\n').length;
2162
+ return lineCount >= min_lines;
2128
2163
  });
2164
+ // Find duplicate groups using indexed embeddings
2165
+ const duplicateGroups = [];
2166
+ const processed = new Set();
2167
+ const EXACT_THRESHOLD = 0.98;
2168
+ // For each chunk, find similar chunks using cosine similarity
2169
+ for (let i = 0; i < filteredDocs.length && duplicateGroups.length < 50; i++) {
2170
+ const doc = filteredDocs[i];
2171
+ if (processed.has(doc.id))
2172
+ continue;
2173
+ // Find similar documents using vector search
2174
+ const similarDocs = await vectorStore.searchByVector(doc.embedding, projectRecord.id, 20 // Get top 20 similar
2175
+ );
2176
+ // Filter by threshold and exclude self
2177
+ const matches = similarDocs.filter(match => match.document.id !== doc.id &&
2178
+ match.score >= similarity_threshold &&
2179
+ !processed.has(match.document.id));
2180
+ if (matches.length > 0) {
2181
+ // Determine type (exact vs semantic)
2182
+ const maxScore = Math.max(...matches.map(m => m.score));
2183
+ const type = maxScore >= EXACT_THRESHOLD ? 'exact' : 'semantic';
2184
+ // Extract line info from metadata if available
2185
+ const getLines = (d) => {
2186
+ const meta = d.metadata;
2187
+ return {
2188
+ startLine: meta?.startLine,
2189
+ endLine: meta?.endLine,
2190
+ };
2191
+ };
2192
+ duplicateGroups.push({
2193
+ type,
2194
+ similarity: maxScore,
2195
+ chunks: [
2196
+ {
2197
+ id: doc.id,
2198
+ filePath: doc.filePath,
2199
+ content: doc.content.substring(0, 200) + (doc.content.length > 200 ? '...' : ''),
2200
+ ...getLines(doc),
2201
+ },
2202
+ ...matches.map(m => ({
2203
+ id: m.document.id,
2204
+ filePath: m.document.filePath,
2205
+ content: m.document.content.substring(0, 200) + (m.document.content.length > 200 ? '...' : ''),
2206
+ ...getLines(m.document),
2207
+ })),
2208
+ ],
2209
+ });
2210
+ // Mark all as processed
2211
+ processed.add(doc.id);
2212
+ matches.forEach(m => processed.add(m.document.id));
2213
+ }
2214
+ }
2215
+ // Calculate summary
2216
+ const exactDuplicates = duplicateGroups.filter(g => g.type === 'exact').length;
2217
+ const semanticDuplicates = duplicateGroups.filter(g => g.type === 'semantic').length;
2218
+ const totalLinesAffected = duplicateGroups.reduce((sum, g) => sum + g.chunks.reduce((chunkSum, c) => chunkSum + (c.endLine && c.startLine ? c.endLine - c.startLine + 1 : c.content.split('\n').length), 0), 0);
2129
2219
  // Format results
2130
- const duplicateGroups = report.duplicateGroups.slice(0, 20).map(group => ({
2220
+ const formattedGroups = duplicateGroups.slice(0, 20).map(group => ({
2131
2221
  type: group.type,
2132
2222
  similarity: `${(group.similarity * 100).toFixed(1)}%`,
2133
- files_affected: group.estimatedSavings.filesAffected,
2134
- lines_savable: group.estimatedSavings.linesReduced,
2135
- suggestion: group.consolidationSuggestion,
2223
+ files_affected: new Set(group.chunks.map(c => c.filePath)).size,
2136
2224
  locations: group.chunks.map(c => ({
2137
- file: c.filePath,
2138
- lines: `${c.startLine}-${c.endLine}`,
2139
- name: c.functionName || c.className || 'code block',
2225
+ file: path.relative(projectRecord.path, c.filePath),
2226
+ lines: c.startLine && c.endLine ? `${c.startLine}-${c.endLine}` : 'N/A',
2227
+ preview: c.content.substring(0, 100).replace(/\n/g, ' '),
2140
2228
  })),
2141
2229
  }));
2142
2230
  return {
2143
2231
  content: [{
2144
2232
  type: 'text',
2145
2233
  text: JSON.stringify({
2146
- project: path.basename(projectPath),
2234
+ project: projectRecord.name,
2147
2235
  summary: {
2148
- total_chunks_analyzed: report.totalChunksAnalyzed,
2149
- exact_duplicates: report.summary.exactDuplicates,
2150
- semantic_duplicates: report.summary.semanticDuplicates,
2151
- structural_duplicates: report.summary.structuralDuplicates,
2152
- total_lines_affected: report.summary.totalLinesAffected,
2153
- potential_lines_saved: report.summary.potentialSavings,
2236
+ total_chunks_analyzed: filteredDocs.length,
2237
+ exact_duplicates: exactDuplicates,
2238
+ semantic_duplicates: semanticDuplicates,
2239
+ structural_duplicates: 0, // Not computed in this approach
2240
+ total_lines_affected: totalLinesAffected,
2241
+ potential_lines_saved: Math.floor(totalLinesAffected * 0.6), // Estimate
2154
2242
  },
2155
- duplicate_groups: duplicateGroups,
2156
- recommendations: report.recommendations,
2243
+ duplicate_groups: formattedGroups,
2244
+ recommendations: exactDuplicates > 0
2245
+ ? [`Found ${exactDuplicates} exact duplicate groups - prioritize consolidation`]
2246
+ : semanticDuplicates > 0
2247
+ ? [`Found ${semanticDuplicates} semantic duplicates - review for potential abstraction`]
2248
+ : ['No significant duplicates found above threshold'],
2157
2249
  }, null, 2),
2158
2250
  }],
2159
2251
  };
@@ -2188,6 +2280,7 @@ class CodeSeekerMcpServer {
2188
2280
  try {
2189
2281
  const storageManager = await (0, storage_1.getStorageManager)();
2190
2282
  const projectStore = storageManager.getProjectStore();
2283
+ const graphStore = storageManager.getGraphStore();
2191
2284
  const projects = await projectStore.list();
2192
2285
  // Find the project
2193
2286
  const projectRecord = projects.find(p => p.name === project ||
@@ -2204,58 +2297,141 @@ class CodeSeekerMcpServer {
2204
2297
  isError: true,
2205
2298
  };
2206
2299
  }
2207
- // Load the knowledge graph for this project
2208
- const knowledgeGraph = new knowledge_graph_1.SemanticKnowledgeGraph(projectRecord.path);
2209
- // Run architectural insight detection (includes dead code, god classes, circular deps, etc.)
2210
- const allInsights = await knowledgeGraph.detectArchitecturalInsights();
2211
- // Filter by requested patterns
2300
+ // Use the indexed graph data from storage manager (same as show_dependencies)
2301
+ const allNodes = await graphStore.findNodes(projectRecord.id);
2302
+ if (allNodes.length === 0) {
2303
+ return {
2304
+ content: [{
2305
+ type: 'text',
2306
+ text: JSON.stringify({
2307
+ project: projectRecord.name,
2308
+ summary: {
2309
+ total_issues: 0,
2310
+ dead_code_count: 0,
2311
+ anti_patterns_count: 0,
2312
+ coupling_issues_count: 0,
2313
+ },
2314
+ dead_code: [],
2315
+ anti_patterns: [],
2316
+ coupling_issues: [],
2317
+ note: 'No graph data found. The project may need reindexing with graph building enabled.',
2318
+ }, null, 2),
2319
+ }],
2320
+ };
2321
+ }
2322
+ // Analyze the indexed graph for dead code and anti-patterns
2212
2323
  const patterns = include_patterns || ['dead_code', 'god_class', 'circular_deps', 'feature_envy', 'coupling'];
2213
- const patternMapping = {
2214
- 'dead_code': ['Dead Code'],
2215
- 'god_class': ['God Class'],
2216
- 'circular_deps': ['Circular Dependencies', 'Circular Dependency'],
2217
- 'feature_envy': ['Feature Envy'],
2218
- 'coupling': ['High Coupling', 'Inappropriate Intimacy'],
2219
- };
2220
- const selectedPatterns = patterns.flatMap(p => patternMapping[p] || []);
2221
- const filteredInsights = allInsights.filter(insight => selectedPatterns.some(pattern => insight.pattern?.toLowerCase().includes(pattern.toLowerCase()) ||
2222
- insight.description?.toLowerCase().includes(pattern.toLowerCase())));
2223
- // Group by type
2224
- const deadCode = filteredInsights.filter(i => i.pattern === 'Dead Code');
2225
- const antiPatterns = filteredInsights.filter(i => i.type === 'anti_pattern' && i.pattern !== 'Dead Code');
2226
- const couplingIssues = filteredInsights.filter(i => i.type === 'coupling_issue');
2324
+ // Build analysis results from indexed graph
2325
+ const deadCodeItems = [];
2326
+ const antiPatternItems = [];
2327
+ const couplingItems = [];
2328
+ // Analyze each node for issues
2329
+ for (const node of allNodes) {
2330
+ // Get edges for this node
2331
+ const inEdges = await graphStore.getEdges(node.id, 'in');
2332
+ const outEdges = await graphStore.getEdges(node.id, 'out');
2333
+ // Dead code detection: nodes with no incoming references (except entry points)
2334
+ if (patterns.includes('dead_code')) {
2335
+ const isEntryPoint = node.type === 'file' ||
2336
+ node.name.toLowerCase().includes('main') ||
2337
+ node.name.toLowerCase().includes('index') ||
2338
+ node.name.toLowerCase().includes('app');
2339
+ // A class/function with no incoming calls/imports is potentially dead
2340
+ if (!isEntryPoint && (node.type === 'class' || node.type === 'function') && inEdges.length === 0) {
2341
+ deadCodeItems.push({
2342
+ type: 'Dead Code',
2343
+ name: node.name,
2344
+ file: path.relative(projectRecord.path, node.filePath),
2345
+ description: `Unused ${node.type}: ${node.name} - no incoming references found`,
2346
+ confidence: '70%',
2347
+ impact: 'medium',
2348
+ recommendation: 'Review if this code is needed. Remove if unused or add to exports if it should be public.',
2349
+ });
2350
+ }
2351
+ }
2352
+ // God class detection: classes with too many methods/dependencies
2353
+ if (patterns.includes('god_class') && node.type === 'class') {
2354
+ const containsEdges = outEdges.filter(e => e.type === 'contains');
2355
+ const dependsOnEdges = outEdges.filter(e => e.type === 'imports' || e.type === 'depends_on');
2356
+ if (containsEdges.length > 15 || dependsOnEdges.length > 10) {
2357
+ antiPatternItems.push({
2358
+ type: 'God Class',
2359
+ name: node.name,
2360
+ file: path.relative(projectRecord.path, node.filePath),
2361
+ description: `Class ${node.name} has ${containsEdges.length} members and ${dependsOnEdges.length} dependencies`,
2362
+ confidence: '80%',
2363
+ impact: 'high',
2364
+ recommendation: 'Break down into smaller, focused classes following Single Responsibility Principle',
2365
+ });
2366
+ }
2367
+ }
2368
+ // High coupling detection
2369
+ if (patterns.includes('coupling') && (node.type === 'class' || node.type === 'file')) {
2370
+ const dependencies = outEdges.filter(e => e.type === 'imports' || e.type === 'depends_on');
2371
+ if (dependencies.length > 8) {
2372
+ couplingItems.push({
2373
+ type: 'High Coupling',
2374
+ name: node.name,
2375
+ file: path.relative(projectRecord.path, node.filePath),
2376
+ description: `${node.type} ${node.name} has ${dependencies.length} dependencies`,
2377
+ confidence: '75%',
2378
+ impact: 'high',
2379
+ recommendation: 'Reduce dependencies using interfaces, dependency injection, or service locator pattern',
2380
+ });
2381
+ }
2382
+ }
2383
+ }
2384
+ // Circular dependency detection (simplified - look for bidirectional imports)
2385
+ if (patterns.includes('circular_deps')) {
2386
+ const fileNodes = allNodes.filter(n => n.type === 'file');
2387
+ const importMap = new Map();
2388
+ for (const fileNode of fileNodes) {
2389
+ const imports = await graphStore.getEdges(fileNode.id, 'out');
2390
+ const importTargets = new Set(imports.filter(e => e.type === 'imports').map(e => e.target));
2391
+ importMap.set(fileNode.id, importTargets);
2392
+ }
2393
+ // Check for circular imports (A imports B and B imports A)
2394
+ for (const [fileId, imports] of importMap.entries()) {
2395
+ for (const targetId of imports) {
2396
+ const targetImports = importMap.get(targetId);
2397
+ if (targetImports?.has(fileId)) {
2398
+ const sourceNode = allNodes.find(n => n.id === fileId);
2399
+ const targetNode = allNodes.find(n => n.id === targetId);
2400
+ if (sourceNode && targetNode) {
2401
+ antiPatternItems.push({
2402
+ type: 'Circular Dependency',
2403
+ name: `${sourceNode.name} <-> ${targetNode.name}`,
2404
+ file: path.relative(projectRecord.path, sourceNode.filePath),
2405
+ description: `Bidirectional import between ${sourceNode.name} and ${targetNode.name}`,
2406
+ confidence: '90%',
2407
+ impact: 'high',
2408
+ recommendation: 'Break the cycle using dependency inversion or extracting shared code',
2409
+ });
2410
+ }
2411
+ }
2412
+ }
2413
+ }
2414
+ }
2227
2415
  return {
2228
2416
  content: [{
2229
2417
  type: 'text',
2230
2418
  text: JSON.stringify({
2231
2419
  project: projectRecord.name,
2420
+ graph_stats: {
2421
+ total_nodes: allNodes.length,
2422
+ files: allNodes.filter(n => n.type === 'file').length,
2423
+ classes: allNodes.filter(n => n.type === 'class').length,
2424
+ functions: allNodes.filter(n => n.type === 'function').length,
2425
+ },
2232
2426
  summary: {
2233
- total_issues: filteredInsights.length,
2234
- dead_code_count: deadCode.length,
2235
- anti_patterns_count: antiPatterns.length,
2236
- coupling_issues_count: couplingIssues.length,
2427
+ total_issues: deadCodeItems.length + antiPatternItems.length + couplingItems.length,
2428
+ dead_code_count: deadCodeItems.length,
2429
+ anti_patterns_count: antiPatternItems.length,
2430
+ coupling_issues_count: couplingItems.length,
2237
2431
  },
2238
- dead_code: deadCode.slice(0, 20).map(d => ({
2239
- type: d.pattern,
2240
- description: d.description,
2241
- confidence: `${((d.confidence || 0) * 100).toFixed(0)}%`,
2242
- impact: d.impact,
2243
- recommendation: d.recommendation,
2244
- })),
2245
- anti_patterns: antiPatterns.slice(0, 10).map(a => ({
2246
- type: a.pattern,
2247
- description: a.description,
2248
- confidence: `${((a.confidence || 0) * 100).toFixed(0)}%`,
2249
- impact: a.impact,
2250
- recommendation: a.recommendation,
2251
- })),
2252
- coupling_issues: couplingIssues.slice(0, 10).map(c => ({
2253
- type: c.pattern,
2254
- description: c.description,
2255
- confidence: `${((c.confidence || 0) * 100).toFixed(0)}%`,
2256
- impact: c.impact,
2257
- recommendation: c.recommendation,
2258
- })),
2432
+ dead_code: deadCodeItems.slice(0, 20),
2433
+ anti_patterns: antiPatternItems.slice(0, 10),
2434
+ coupling_issues: couplingItems.slice(0, 10),
2259
2435
  }, null, 2),
2260
2436
  }],
2261
2437
  };