mcp-code-indexer 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -210,6 +210,30 @@ class DatabaseManager:
210
210
  await db.commit()
211
211
  logger.debug(f"Updated project: {project.id}")
212
212
 
213
+ async def get_all_projects(self) -> List[Project]:
214
+ """Get all projects in the database."""
215
+ async with self.get_connection() as db:
216
+ cursor = await db.execute(
217
+ "SELECT id, name, remote_origin, upstream_origin, aliases, created, last_accessed FROM projects"
218
+ )
219
+ rows = await cursor.fetchall()
220
+
221
+ projects = []
222
+ for row in rows:
223
+ aliases = json.loads(row[4]) if row[4] else []
224
+ project = Project(
225
+ id=row[0],
226
+ name=row[1],
227
+ remote_origin=row[2],
228
+ upstream_origin=row[3],
229
+ aliases=aliases,
230
+ created=row[5],
231
+ last_accessed=row[6]
232
+ )
233
+ projects.append(project)
234
+
235
+ return projects
236
+
213
237
  # File description operations
214
238
 
215
239
  async def create_file_description(self, file_desc: FileDescription) -> None:
@@ -9,9 +9,10 @@ import asyncio
9
9
  import hashlib
10
10
  import json
11
11
  import logging
12
+ import uuid
12
13
  from datetime import datetime
13
14
  from pathlib import Path
14
- from typing import Any, Dict, List, Optional
15
+ from typing import Any, Dict, List, Optional, Set
15
16
 
16
17
  from mcp import types
17
18
  from mcp.server import Server
@@ -276,26 +277,48 @@ class MCPCodeIndexServer:
276
277
  )]
277
278
 
278
279
  async def _get_or_create_project_id(self, arguments: Dict[str, Any]) -> str:
279
- """Get or create a project ID from tool arguments."""
280
+ """
281
+ Get or create a project ID using intelligent matching.
282
+
283
+ Matches projects based on 2+ out of 4 identification factors:
284
+ 1. Project name (normalized, case-insensitive)
285
+ 2. Remote origin URL
286
+ 3. Upstream origin URL
287
+ 4. Any folder path in aliases
288
+
289
+ If only 1 factor matches, uses file similarity to determine if it's the same project.
290
+ """
280
291
  project_name = arguments["projectName"]
281
292
  remote_origin = arguments.get("remoteOrigin")
282
293
  upstream_origin = arguments.get("upstreamOrigin")
283
294
  folder_path = arguments["folderPath"]
284
295
  branch = arguments.get("branch", "main")
285
296
 
286
- # Create project ID from stable identifiers only (name + folder path)
287
- # Normalize project name to lowercase for case-insensitive matching
288
- # This ensures consistent project IDs regardless of case variations
297
+ # Normalize project name for case-insensitive matching
289
298
  normalized_name = project_name.lower()
290
- id_source = f"{normalized_name}:{folder_path}"
291
- project_id = hashlib.sha256(id_source.encode()).hexdigest()[:16]
292
299
 
293
- # Check if project exists, create if not
294
- project = await self.db_manager.get_project(project_id)
295
- if not project:
300
+ # Find potential project matches
301
+ project = await self._find_matching_project(
302
+ normalized_name, remote_origin, upstream_origin, folder_path
303
+ )
304
+ if project:
305
+ # Update project metadata and aliases
306
+ await self._update_existing_project(project, normalized_name, remote_origin, upstream_origin, folder_path)
307
+
308
+ # Check if upstream inheritance is needed
309
+ if upstream_origin and await self.db_manager.check_upstream_inheritance_needed(project):
310
+ try:
311
+ inherited_count = await self.db_manager.inherit_from_upstream(project, branch)
312
+ if inherited_count > 0:
313
+ logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {normalized_name}")
314
+ except Exception as e:
315
+ logger.warning(f"Failed to inherit from upstream: {e}")
316
+ else:
317
+ # Create new project with UUID
318
+ project_id = str(uuid.uuid4())
296
319
  project = Project(
297
320
  id=project_id,
298
- name=normalized_name, # Store normalized name for consistency
321
+ name=normalized_name,
299
322
  remote_origin=remote_origin,
300
323
  upstream_origin=upstream_origin,
301
324
  aliases=[folder_path],
@@ -303,42 +326,152 @@ class MCPCodeIndexServer:
303
326
  last_accessed=datetime.utcnow()
304
327
  )
305
328
  await self.db_manager.create_project(project)
329
+ logger.info(f"Created new project: {normalized_name} ({project_id})")
306
330
 
307
331
  # Auto-inherit from upstream if needed
308
332
  if upstream_origin:
309
333
  try:
310
334
  inherited_count = await self.db_manager.inherit_from_upstream(project, branch)
311
335
  if inherited_count > 0:
312
- logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {project_name}")
336
+ logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {normalized_name}")
313
337
  except Exception as e:
314
338
  logger.warning(f"Failed to inherit from upstream: {e}")
315
- else:
316
- # Update last accessed time
317
- await self.db_manager.update_project_access_time(project_id)
339
+
340
+ return project.id
341
+
342
+ async def _find_matching_project(
343
+ self,
344
+ normalized_name: str,
345
+ remote_origin: Optional[str],
346
+ upstream_origin: Optional[str],
347
+ folder_path: str
348
+ ) -> Optional[Project]:
349
+ """
350
+ Find a matching project using intelligent 2-out-of-4 matching logic.
351
+
352
+ Returns the best matching project or None if no sufficient match is found.
353
+ """
354
+ all_projects = await self.db_manager.get_all_projects()
355
+
356
+ best_match = None
357
+ best_score = 0
358
+
359
+ for project in all_projects:
360
+ score = 0
361
+ match_factors = []
362
+
363
+ # Factor 1: Project name match
364
+ if project.name.lower() == normalized_name:
365
+ score += 1
366
+ match_factors.append("name")
367
+
368
+ # Factor 2: Remote origin match
369
+ if remote_origin and project.remote_origin == remote_origin:
370
+ score += 1
371
+ match_factors.append("remote_origin")
372
+
373
+ # Factor 3: Upstream origin match
374
+ if upstream_origin and project.upstream_origin == upstream_origin:
375
+ score += 1
376
+ match_factors.append("upstream_origin")
377
+
378
+ # Factor 4: Folder path in aliases
379
+ project_aliases = json.loads(project.aliases) if isinstance(project.aliases, str) else project.aliases
380
+ if folder_path in project_aliases:
381
+ score += 1
382
+ match_factors.append("folder_path")
318
383
 
319
- # Update remote/upstream origins if provided and different from existing
320
- should_update = False
321
- if remote_origin and project.remote_origin != remote_origin:
322
- project.remote_origin = remote_origin
323
- should_update = True
324
- if upstream_origin and project.upstream_origin != upstream_origin:
325
- project.upstream_origin = upstream_origin
326
- should_update = True
384
+ # If we have 2+ matches, this is a strong candidate
385
+ if score >= 2:
386
+ if score > best_score:
387
+ best_score = score
388
+ best_match = project
389
+ logger.info(f"Strong match for project {project.name} (score: {score}, factors: {match_factors})")
327
390
 
328
- if should_update:
329
- await self.db_manager.update_project(project)
330
- logger.debug(f"Updated project metadata for {project_name}")
391
+ # If only 1 match, check file similarity for potential matches
392
+ elif score == 1:
393
+ if await self._check_file_similarity(project, folder_path):
394
+ logger.info(f"File similarity match for project {project.name} (factor: {match_factors[0]})")
395
+ if score > best_score:
396
+ best_score = score
397
+ best_match = project
398
+
399
+ return best_match
400
+
401
+ async def _check_file_similarity(self, project: Project, folder_path: str) -> bool:
402
+ """
403
+ Check if the files in the folder are similar to files already indexed for this project.
404
+ Returns True if 80%+ of files match.
405
+ """
406
+ try:
407
+ # Get files currently in the folder
408
+ scanner = FileScanner(Path(folder_path))
409
+ if not scanner.is_valid_project_directory():
410
+ return False
331
411
 
332
- # Check if upstream inheritance is needed for existing project
333
- if upstream_origin and await self.db_manager.check_upstream_inheritance_needed(project):
334
- try:
335
- inherited_count = await self.db_manager.inherit_from_upstream(project, branch)
336
- if inherited_count > 0:
337
- logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {project_name}")
338
- except Exception as e:
339
- logger.warning(f"Failed to inherit from upstream: {e}")
340
-
341
- return project_id
412
+ current_files = scanner.scan_files()
413
+ current_basenames = {Path(f).name for f in current_files}
414
+
415
+ if not current_basenames:
416
+ return False
417
+
418
+ # Get files already indexed for this project
419
+ indexed_files = await self.db_manager.get_all_file_descriptions(project.id, "main")
420
+ indexed_basenames = {Path(fd.file_path).name for fd in indexed_files}
421
+
422
+ if not indexed_basenames:
423
+ return False
424
+
425
+ # Calculate similarity
426
+ intersection = current_basenames & indexed_basenames
427
+ similarity = len(intersection) / len(current_basenames)
428
+
429
+ logger.debug(f"File similarity for {project.name}: {similarity:.2%} ({len(intersection)}/{len(current_basenames)} files match)")
430
+
431
+ return similarity >= 0.8
432
+ except Exception as e:
433
+ logger.warning(f"Error checking file similarity: {e}")
434
+ return False
435
+
436
+ async def _update_existing_project(
437
+ self,
438
+ project: Project,
439
+ normalized_name: str,
440
+ remote_origin: Optional[str],
441
+ upstream_origin: Optional[str],
442
+ folder_path: str
443
+ ) -> None:
444
+ """Update an existing project with new metadata and folder alias."""
445
+ # Update last accessed time
446
+ await self.db_manager.update_project_access_time(project.id)
447
+
448
+ should_update = False
449
+
450
+ # Update name if different
451
+ if project.name != normalized_name:
452
+ project.name = normalized_name
453
+ should_update = True
454
+
455
+ # Update remote/upstream origins if provided and different
456
+ if remote_origin and project.remote_origin != remote_origin:
457
+ project.remote_origin = remote_origin
458
+ should_update = True
459
+
460
+ if upstream_origin and project.upstream_origin != upstream_origin:
461
+ project.upstream_origin = upstream_origin
462
+ should_update = True
463
+
464
+ # Add folder path to aliases if not already present
465
+ project_aliases = json.loads(project.aliases) if isinstance(project.aliases, str) else project.aliases
466
+ if folder_path not in project_aliases:
467
+ project_aliases.append(folder_path)
468
+ project.aliases = project_aliases
469
+ should_update = True
470
+ logger.info(f"Added new folder alias to project {project.name}: {folder_path}")
471
+
472
+ if should_update:
473
+ await self.db_manager.update_project(project)
474
+ logger.debug(f"Updated project metadata for {project.name}")
342
475
 
343
476
  async def _handle_get_file_description(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
344
477
  """Handle get_file_description tool calls."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 1.0.9
3
+ Version: 1.1.0
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  Author: MCP Code Indexer Contributors
6
6
  Maintainer: MCP Code Indexer Contributors
@@ -6,17 +6,17 @@ mcp_code_indexer/main.py,sha256=Rou-mAN9-12PPP8jC7dIs2_UNambJuC2F8BF--j-0m8,3715
6
6
  mcp_code_indexer/merge_handler.py,sha256=lJR8eVq2qSrF6MW9mR3Fy8UzrNAaQ7RsI2FMNXne3vQ,14692
7
7
  mcp_code_indexer/token_counter.py,sha256=WrifOkbF99nWWHlRlhCHAB2KN7qr83GOHl7apE-hJcE,8460
8
8
  mcp_code_indexer/database/__init__.py,sha256=aPq_aaRp0aSwOBIq9GkuMNjmLxA411zg2vhdrAuHm-w,38
9
- mcp_code_indexer/database/database.py,sha256=ROGdosQSADI7EytNkdC4RauPD9zLtHTO1mQ8SxsmPVo,18755
9
+ mcp_code_indexer/database/database.py,sha256=CqkGXkJLANr2qfxA2FLX1oYiNj0lTm1LyxD97dzJHSs,19650
10
10
  mcp_code_indexer/database/models.py,sha256=3wOxHKb6j3zKPWFSwB5g1TLpI507vLNZcqsxZR4VuRs,5528
11
11
  mcp_code_indexer/middleware/__init__.py,sha256=p-mP0pMsfiU2yajCPvokCUxUEkh_lu4XJP1LyyMW2ug,220
12
12
  mcp_code_indexer/middleware/error_middleware.py,sha256=v6jaHmPxf3qerYdb85X1tHIXLxgcbybpitKVakFLQTA,10109
13
13
  mcp_code_indexer/server/__init__.py,sha256=16xMcuriUOBlawRqWNBk6niwrvtv_JD5xvI36X1Vsmk,41
14
- mcp_code_indexer/server/mcp_server.py,sha256=QhN34Ue6jKzwRvCIxYRYrb9v3_fEVsuIUmmMP-woRqI,38023
14
+ mcp_code_indexer/server/mcp_server.py,sha256=RNM-0tB_1Gm5P5Vz4rNTXQKcjOyIWa4s6nirZ1BNi-g,43208
15
15
  mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4,sha256=Ijkht27pm96ZW3_3OFE-7xAPtR0YyTWXoRO8_-hlsqc,1681126
16
16
  mcp_code_indexer/tools/__init__.py,sha256=m01mxML2UdD7y5rih_XNhNSCMzQTz7WQ_T1TeOcYlnE,49
17
- mcp_code_indexer-1.0.9.dist-info/licenses/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
18
- mcp_code_indexer-1.0.9.dist-info/METADATA,sha256=uMXk1E3Hp0PY3yykMfBa1YyZSRPPnDW2GFYxcR9r2K8,11930
19
- mcp_code_indexer-1.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
- mcp_code_indexer-1.0.9.dist-info/entry_points.txt,sha256=8HqWOw1Is7jOP1bvIgaSwouvT9z_Boe-9hd4NzyJOhY,68
21
- mcp_code_indexer-1.0.9.dist-info/top_level.txt,sha256=yKYCM-gMGt-cnupGfAhnZaoEsROLB6DQ1KFUuyKx4rw,17
22
- mcp_code_indexer-1.0.9.dist-info/RECORD,,
17
+ mcp_code_indexer-1.1.0.dist-info/licenses/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
18
+ mcp_code_indexer-1.1.0.dist-info/METADATA,sha256=n6fIk9JrhEmPG2LVpMCFI7XkP3Aey4dM1V_zKuhAkoA,11930
19
+ mcp_code_indexer-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
+ mcp_code_indexer-1.1.0.dist-info/entry_points.txt,sha256=8HqWOw1Is7jOP1bvIgaSwouvT9z_Boe-9hd4NzyJOhY,68
21
+ mcp_code_indexer-1.1.0.dist-info/top_level.txt,sha256=yKYCM-gMGt-cnupGfAhnZaoEsROLB6DQ1KFUuyKx4rw,17
22
+ mcp_code_indexer-1.1.0.dist-info/RECORD,,