mcp-code-indexer 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -210,6 +210,45 @@ class DatabaseManager:
210
210
  await db.commit()
211
211
  logger.debug(f"Updated project: {project.id}")
212
212
 
213
+ async def get_all_projects(self) -> List[Project]:
214
+ """Get all projects in the database."""
215
+ async with self.get_connection() as db:
216
+ cursor = await db.execute(
217
+ "SELECT id, name, remote_origin, upstream_origin, aliases, created, last_accessed FROM projects"
218
+ )
219
+ rows = await cursor.fetchall()
220
+
221
+ projects = []
222
+ for row in rows:
223
+ aliases = json.loads(row[4]) if row[4] else []
224
+ project = Project(
225
+ id=row[0],
226
+ name=row[1],
227
+ remote_origin=row[2],
228
+ upstream_origin=row[3],
229
+ aliases=aliases,
230
+ created=row[5],
231
+ last_accessed=row[6]
232
+ )
233
+ projects.append(project)
234
+
235
+ return projects
236
+
237
+ async def get_branch_file_counts(self, project_id: str) -> Dict[str, int]:
238
+ """Get file counts per branch for a project."""
239
+ async with self.get_connection() as db:
240
+ cursor = await db.execute(
241
+ """
242
+ SELECT branch, COUNT(*) as file_count
243
+ FROM file_descriptions
244
+ WHERE project_id = ?
245
+ GROUP BY branch
246
+ """,
247
+ (project_id,)
248
+ )
249
+ rows = await cursor.fetchall()
250
+ return {row[0]: row[1] for row in rows}
251
+
213
252
  # File description operations
214
253
 
215
254
  async def create_file_description(self, file_desc: FileDescription) -> None:
@@ -345,13 +384,13 @@ class DatabaseManager:
345
384
  fd.branch,
346
385
  fd.file_path,
347
386
  fd.description,
348
- fts.rank
349
- FROM file_descriptions_fts fts
350
- JOIN file_descriptions fd ON fd.rowid = fts.rowid
351
- WHERE fts MATCH ?
387
+ bm25(file_descriptions_fts) as rank
388
+ FROM file_descriptions_fts
389
+ JOIN file_descriptions fd ON fd.rowid = file_descriptions_fts.rowid
390
+ WHERE file_descriptions_fts MATCH ?
352
391
  AND fd.project_id = ?
353
392
  AND fd.branch = ?
354
- ORDER BY fts.rank
393
+ ORDER BY bm25(file_descriptions_fts)
355
394
  LIMIT ?
356
395
  """,
357
396
  (query, project_id, branch, max_results)
@@ -9,9 +9,10 @@ import asyncio
9
9
  import hashlib
10
10
  import json
11
11
  import logging
12
+ import uuid
12
13
  from datetime import datetime
13
14
  from pathlib import Path
14
- from typing import Any, Dict, List, Optional
15
+ from typing import Any, Dict, List, Optional, Set
15
16
 
16
17
  from mcp import types
17
18
  from mcp.server import Server
@@ -276,26 +277,48 @@ class MCPCodeIndexServer:
276
277
  )]
277
278
 
278
279
  async def _get_or_create_project_id(self, arguments: Dict[str, Any]) -> str:
279
- """Get or create a project ID from tool arguments."""
280
+ """
281
+ Get or create a project ID using intelligent matching.
282
+
283
+ Matches projects based on 2+ out of 4 identification factors:
284
+ 1. Project name (normalized, case-insensitive)
285
+ 2. Remote origin URL
286
+ 3. Upstream origin URL
287
+ 4. Any folder path in aliases
288
+
289
+ If only 1 factor matches, uses file similarity to determine if it's the same project.
290
+ """
280
291
  project_name = arguments["projectName"]
281
292
  remote_origin = arguments.get("remoteOrigin")
282
293
  upstream_origin = arguments.get("upstreamOrigin")
283
294
  folder_path = arguments["folderPath"]
284
295
  branch = arguments.get("branch", "main")
285
296
 
286
- # Create project ID from stable identifiers only (name + folder path)
287
- # Normalize project name to lowercase for case-insensitive matching
288
- # This ensures consistent project IDs regardless of case variations
297
+ # Normalize project name for case-insensitive matching
289
298
  normalized_name = project_name.lower()
290
- id_source = f"{normalized_name}:{folder_path}"
291
- project_id = hashlib.sha256(id_source.encode()).hexdigest()[:16]
292
299
 
293
- # Check if project exists, create if not
294
- project = await self.db_manager.get_project(project_id)
295
- if not project:
300
+ # Find potential project matches
301
+ project = await self._find_matching_project(
302
+ normalized_name, remote_origin, upstream_origin, folder_path
303
+ )
304
+ if project:
305
+ # Update project metadata and aliases
306
+ await self._update_existing_project(project, normalized_name, remote_origin, upstream_origin, folder_path)
307
+
308
+ # Check if upstream inheritance is needed
309
+ if upstream_origin and await self.db_manager.check_upstream_inheritance_needed(project):
310
+ try:
311
+ inherited_count = await self.db_manager.inherit_from_upstream(project, branch)
312
+ if inherited_count > 0:
313
+ logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {normalized_name}")
314
+ except Exception as e:
315
+ logger.warning(f"Failed to inherit from upstream: {e}")
316
+ else:
317
+ # Create new project with UUID
318
+ project_id = str(uuid.uuid4())
296
319
  project = Project(
297
320
  id=project_id,
298
- name=normalized_name, # Store normalized name for consistency
321
+ name=normalized_name,
299
322
  remote_origin=remote_origin,
300
323
  upstream_origin=upstream_origin,
301
324
  aliases=[folder_path],
@@ -303,42 +326,187 @@ class MCPCodeIndexServer:
303
326
  last_accessed=datetime.utcnow()
304
327
  )
305
328
  await self.db_manager.create_project(project)
329
+ logger.info(f"Created new project: {normalized_name} ({project_id})")
306
330
 
307
331
  # Auto-inherit from upstream if needed
308
332
  if upstream_origin:
309
333
  try:
310
334
  inherited_count = await self.db_manager.inherit_from_upstream(project, branch)
311
335
  if inherited_count > 0:
312
- logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {project_name}")
313
- except Exception as e:
314
- logger.warning(f"Failed to inherit from upstream: {e}")
315
- else:
316
- # Update last accessed time
317
- await self.db_manager.update_project_access_time(project_id)
318
-
319
- # Update remote/upstream origins if provided and different from existing
320
- should_update = False
321
- if remote_origin and project.remote_origin != remote_origin:
322
- project.remote_origin = remote_origin
323
- should_update = True
324
- if upstream_origin and project.upstream_origin != upstream_origin:
325
- project.upstream_origin = upstream_origin
326
- should_update = True
327
-
328
- if should_update:
329
- await self.db_manager.update_project(project)
330
- logger.debug(f"Updated project metadata for {project_name}")
331
-
332
- # Check if upstream inheritance is needed for existing project
333
- if upstream_origin and await self.db_manager.check_upstream_inheritance_needed(project):
334
- try:
335
- inherited_count = await self.db_manager.inherit_from_upstream(project, branch)
336
- if inherited_count > 0:
337
- logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {project_name}")
336
+ logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {normalized_name}")
338
337
  except Exception as e:
339
338
  logger.warning(f"Failed to inherit from upstream: {e}")
340
339
 
341
- return project_id
340
+ return project.id
341
+
342
+ async def _find_matching_project(
343
+ self,
344
+ normalized_name: str,
345
+ remote_origin: Optional[str],
346
+ upstream_origin: Optional[str],
347
+ folder_path: str
348
+ ) -> Optional[Project]:
349
+ """
350
+ Find a matching project using intelligent 2-out-of-4 matching logic.
351
+
352
+ Returns the best matching project or None if no sufficient match is found.
353
+ """
354
+ all_projects = await self.db_manager.get_all_projects()
355
+
356
+ best_match = None
357
+ best_score = 0
358
+
359
+ for project in all_projects:
360
+ score = 0
361
+ match_factors = []
362
+
363
+ # Factor 1: Project name match
364
+ if project.name.lower() == normalized_name:
365
+ score += 1
366
+ match_factors.append("name")
367
+
368
+ # Factor 2: Remote origin match
369
+ if remote_origin and project.remote_origin == remote_origin:
370
+ score += 1
371
+ match_factors.append("remote_origin")
372
+
373
+ # Factor 3: Upstream origin match
374
+ if upstream_origin and project.upstream_origin == upstream_origin:
375
+ score += 1
376
+ match_factors.append("upstream_origin")
377
+
378
+ # Factor 4: Folder path in aliases
379
+ project_aliases = json.loads(project.aliases) if isinstance(project.aliases, str) else project.aliases
380
+ if folder_path in project_aliases:
381
+ score += 1
382
+ match_factors.append("folder_path")
383
+
384
+ # If we have 2+ matches, this is a strong candidate
385
+ if score >= 2:
386
+ if score > best_score:
387
+ best_score = score
388
+ best_match = project
389
+ logger.info(f"Strong match for project {project.name} (score: {score}, factors: {match_factors})")
390
+
391
+ # If only 1 match, check file similarity for potential matches
392
+ elif score == 1:
393
+ if await self._check_file_similarity(project, folder_path):
394
+ logger.info(f"File similarity match for project {project.name} (factor: {match_factors[0]})")
395
+ if score > best_score:
396
+ best_score = score
397
+ best_match = project
398
+
399
+ return best_match
400
+
401
+ async def _check_file_similarity(self, project: Project, folder_path: str) -> bool:
402
+ """
403
+ Check if the files in the folder are similar to files already indexed for this project.
404
+ Returns True if 80%+ of files match.
405
+ """
406
+ try:
407
+ # Get files currently in the folder
408
+ scanner = FileScanner(Path(folder_path))
409
+ if not scanner.is_valid_project_directory():
410
+ return False
411
+
412
+ current_files = scanner.scan_files()
413
+ current_basenames = {Path(f).name for f in current_files}
414
+
415
+ if not current_basenames:
416
+ return False
417
+
418
+ # Get files already indexed for this project
419
+ indexed_files = await self.db_manager.get_all_file_descriptions(project.id, "main")
420
+ indexed_basenames = {Path(fd.file_path).name for fd in indexed_files}
421
+
422
+ if not indexed_basenames:
423
+ return False
424
+
425
+ # Calculate similarity
426
+ intersection = current_basenames & indexed_basenames
427
+ similarity = len(intersection) / len(current_basenames)
428
+
429
+ logger.debug(f"File similarity for {project.name}: {similarity:.2%} ({len(intersection)}/{len(current_basenames)} files match)")
430
+
431
+ return similarity >= 0.8
432
+ except Exception as e:
433
+ logger.warning(f"Error checking file similarity: {e}")
434
+ return False
435
+
436
+ async def _update_existing_project(
437
+ self,
438
+ project: Project,
439
+ normalized_name: str,
440
+ remote_origin: Optional[str],
441
+ upstream_origin: Optional[str],
442
+ folder_path: str
443
+ ) -> None:
444
+ """Update an existing project with new metadata and folder alias."""
445
+ # Update last accessed time
446
+ await self.db_manager.update_project_access_time(project.id)
447
+
448
+ should_update = False
449
+
450
+ # Update name if different
451
+ if project.name != normalized_name:
452
+ project.name = normalized_name
453
+ should_update = True
454
+
455
+ # Update remote/upstream origins if provided and different
456
+ if remote_origin and project.remote_origin != remote_origin:
457
+ project.remote_origin = remote_origin
458
+ should_update = True
459
+
460
+ if upstream_origin and project.upstream_origin != upstream_origin:
461
+ project.upstream_origin = upstream_origin
462
+ should_update = True
463
+
464
+ # Add folder path to aliases if not already present
465
+ project_aliases = json.loads(project.aliases) if isinstance(project.aliases, str) else project.aliases
466
+ if folder_path not in project_aliases:
467
+ project_aliases.append(folder_path)
468
+ project.aliases = project_aliases
469
+ should_update = True
470
+ logger.info(f"Added new folder alias to project {project.name}: {folder_path}")
471
+
472
+ if should_update:
473
+ await self.db_manager.update_project(project)
474
+ logger.debug(f"Updated project metadata for {project.name}")
475
+
476
+ async def _find_best_branch(self, project_id: str, requested_branch: str) -> Optional[str]:
477
+ """
478
+ Find the best available branch for a project when the requested branch has no files.
479
+ Returns the branch with the most files, or None if no branches have files.
480
+ """
481
+ try:
482
+ # Get all branches and their file counts for this project
483
+ branch_counts = await self.db_manager.get_branch_file_counts(project_id)
484
+
485
+ if not branch_counts:
486
+ return None
487
+
488
+ # First try common branch name variations
489
+ common_variations = {
490
+ 'main': ['master', 'develop', 'development', 'dev'],
491
+ 'master': ['main', 'develop', 'development', 'dev'],
492
+ 'develop': ['development', 'main', 'master', 'dev'],
493
+ 'development': ['develop', 'main', 'master', 'dev'],
494
+ 'dev': ['develop', 'development', 'main', 'master']
495
+ }
496
+
497
+ # Try variations of the requested branch
498
+ if requested_branch.lower() in common_variations:
499
+ for variation in common_variations[requested_branch.lower()]:
500
+ if variation in branch_counts and branch_counts[variation] > 0:
501
+ return variation
502
+
503
+ # Fall back to the branch with the most files
504
+ best_branch = max(branch_counts.items(), key=lambda x: x[1])
505
+ return best_branch[0] if best_branch[1] > 0 else None
506
+
507
+ except Exception as e:
508
+ logger.warning(f"Error finding best branch: {e}")
509
+ return None
342
510
 
343
511
  async def _handle_get_file_description(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
344
512
  """Handle get_file_description tool calls."""
@@ -390,13 +558,24 @@ class MCPCodeIndexServer:
390
558
  async def _handle_check_codebase_size(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
391
559
  """Handle check_codebase_size tool calls."""
392
560
  project_id = await self._get_or_create_project_id(arguments)
561
+ requested_branch = arguments["branch"]
393
562
 
394
- # Get all file descriptions for this project/branch
563
+ # Get file descriptions for this project/branch
395
564
  file_descriptions = await self.db_manager.get_all_file_descriptions(
396
565
  project_id=project_id,
397
- branch=arguments["branch"]
566
+ branch=requested_branch
398
567
  )
399
568
 
569
+ # If no files found for requested branch, try to find the best available branch
570
+ if not file_descriptions:
571
+ available_branch = await self._find_best_branch(project_id, requested_branch)
572
+ if available_branch and available_branch != requested_branch:
573
+ file_descriptions = await self.db_manager.get_all_file_descriptions(
574
+ project_id=project_id,
575
+ branch=available_branch
576
+ )
577
+ logger.info(f"No files found for branch '{requested_branch}', using '{available_branch}' instead")
578
+
400
579
  # Calculate total tokens
401
580
  total_tokens = self.token_counter.calculate_codebase_tokens(file_descriptions)
402
581
  is_large = self.token_counter.is_large_codebase(total_tokens)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 1.0.9
3
+ Version: 1.1.1
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  Author: MCP Code Indexer Contributors
6
6
  Maintainer: MCP Code Indexer Contributors
@@ -6,17 +6,17 @@ mcp_code_indexer/main.py,sha256=Rou-mAN9-12PPP8jC7dIs2_UNambJuC2F8BF--j-0m8,3715
6
6
  mcp_code_indexer/merge_handler.py,sha256=lJR8eVq2qSrF6MW9mR3Fy8UzrNAaQ7RsI2FMNXne3vQ,14692
7
7
  mcp_code_indexer/token_counter.py,sha256=WrifOkbF99nWWHlRlhCHAB2KN7qr83GOHl7apE-hJcE,8460
8
8
  mcp_code_indexer/database/__init__.py,sha256=aPq_aaRp0aSwOBIq9GkuMNjmLxA411zg2vhdrAuHm-w,38
9
- mcp_code_indexer/database/database.py,sha256=ROGdosQSADI7EytNkdC4RauPD9zLtHTO1mQ8SxsmPVo,18755
9
+ mcp_code_indexer/database/database.py,sha256=eG2xY5cd-oxRZ6mgGkqqBiJJfGCPqJgzoFq6kR99WfA,20300
10
10
  mcp_code_indexer/database/models.py,sha256=3wOxHKb6j3zKPWFSwB5g1TLpI507vLNZcqsxZR4VuRs,5528
11
11
  mcp_code_indexer/middleware/__init__.py,sha256=p-mP0pMsfiU2yajCPvokCUxUEkh_lu4XJP1LyyMW2ug,220
12
12
  mcp_code_indexer/middleware/error_middleware.py,sha256=v6jaHmPxf3qerYdb85X1tHIXLxgcbybpitKVakFLQTA,10109
13
13
  mcp_code_indexer/server/__init__.py,sha256=16xMcuriUOBlawRqWNBk6niwrvtv_JD5xvI36X1Vsmk,41
14
- mcp_code_indexer/server/mcp_server.py,sha256=QhN34Ue6jKzwRvCIxYRYrb9v3_fEVsuIUmmMP-woRqI,38023
14
+ mcp_code_indexer/server/mcp_server.py,sha256=LxYt6AQ2hifAZIrduyGGBz22kxfcMnCAsHPjih37X5k,45523
15
15
  mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4,sha256=Ijkht27pm96ZW3_3OFE-7xAPtR0YyTWXoRO8_-hlsqc,1681126
16
16
  mcp_code_indexer/tools/__init__.py,sha256=m01mxML2UdD7y5rih_XNhNSCMzQTz7WQ_T1TeOcYlnE,49
17
- mcp_code_indexer-1.0.9.dist-info/licenses/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
18
- mcp_code_indexer-1.0.9.dist-info/METADATA,sha256=uMXk1E3Hp0PY3yykMfBa1YyZSRPPnDW2GFYxcR9r2K8,11930
19
- mcp_code_indexer-1.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
- mcp_code_indexer-1.0.9.dist-info/entry_points.txt,sha256=8HqWOw1Is7jOP1bvIgaSwouvT9z_Boe-9hd4NzyJOhY,68
21
- mcp_code_indexer-1.0.9.dist-info/top_level.txt,sha256=yKYCM-gMGt-cnupGfAhnZaoEsROLB6DQ1KFUuyKx4rw,17
22
- mcp_code_indexer-1.0.9.dist-info/RECORD,,
17
+ mcp_code_indexer-1.1.1.dist-info/licenses/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
18
+ mcp_code_indexer-1.1.1.dist-info/METADATA,sha256=h8Kqpz8nH14e73F1AoBwXAy3BgnBYKh04igxTq2euKw,11930
19
+ mcp_code_indexer-1.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
+ mcp_code_indexer-1.1.1.dist-info/entry_points.txt,sha256=8HqWOw1Is7jOP1bvIgaSwouvT9z_Boe-9hd4NzyJOhY,68
21
+ mcp_code_indexer-1.1.1.dist-info/top_level.txt,sha256=yKYCM-gMGt-cnupGfAhnZaoEsROLB6DQ1KFUuyKx4rw,17
22
+ mcp_code_indexer-1.1.1.dist-info/RECORD,,