PyPI - mcp-code-indexer - Versions diffs - 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

mcp-code-indexer 1.0.9py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

mcp_code_indexer/database/database.py CHANGED Viewed

@@ -210,6 +210,45 @@ class DatabaseManager:
             await db.commit()
             logger.debug(f"Updated project: {project.id}")
+    async def get_all_projects(self) -> List[Project]:
+        """Get all projects in the database."""
+        async with self.get_connection() as db:
+            cursor = await db.execute(
+                "SELECT id, name, remote_origin, upstream_origin, aliases, created, last_accessed FROM projects"
+            )
+            rows = await cursor.fetchall()
+            projects = []
+            for row in rows:
+                aliases = json.loads(row[4]) if row[4] else []
+                project = Project(
+                    id=row[0],
+                    name=row[1],
+                    remote_origin=row[2],
+                    upstream_origin=row[3],
+                    aliases=aliases,
+                    created=row[5],
+                    last_accessed=row[6]
+                )
+                projects.append(project)
+            return projects
+    async def get_branch_file_counts(self, project_id: str) -> Dict[str, int]:
+        """Get file counts per branch for a project."""
+        async with self.get_connection() as db:
+            cursor = await db.execute(
+                """
+                SELECT branch, COUNT(*) as file_count
+                FROM file_descriptions
+                WHERE project_id = ?
+                GROUP BY branch
+                """,
+                (project_id,)
+            )
+            rows = await cursor.fetchall()
+            return {row[0]: row[1] for row in rows}
     # File description operations
     async def create_file_description(self, file_desc: FileDescription) -> None:
@@ -345,13 +384,13 @@ class DatabaseManager:
                     fd.branch,
                     fd.file_path,
                     fd.description,
-                    fts.rank
-                FROM file_descriptions_fts fts
-                JOIN file_descriptions fd ON fd.rowid = fts.rowid
-                WHERE fts MATCH ?
+                    bm25(file_descriptions_fts) as rank
+                FROM file_descriptions_fts
+                JOIN file_descriptions fd ON fd.rowid = file_descriptions_fts.rowid
+                WHERE file_descriptions_fts MATCH ?
                   AND fd.project_id = ?
                   AND fd.branch = ?
-                ORDER BY fts.rank
+                ORDER BY bm25(file_descriptions_fts)
                 LIMIT ?
                 """,
                 (query, project_id, branch, max_results)

mcp_code_indexer/server/mcp_server.py CHANGED Viewed

@@ -9,9 +9,10 @@ import asyncio
 import hashlib
 import json
 import logging
+import uuid
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Set
 from mcp import types
 from mcp.server import Server
@@ -276,26 +277,48 @@ class MCPCodeIndexServer:
         )]
     async def _get_or_create_project_id(self, arguments: Dict[str, Any]) -> str:
-        """Get or create a project ID from tool arguments."""
+        """
+        Get or create a project ID using intelligent matching.
+        Matches projects based on 2+ out of 4 identification factors:
+        1. Project name (normalized, case-insensitive)
+        2. Remote origin URL
+        3. Upstream origin URL
+        4. Any folder path in aliases
+        If only 1 factor matches, uses file similarity to determine if it's the same project.
+        """
         project_name = arguments["projectName"]
         remote_origin = arguments.get("remoteOrigin")
         upstream_origin = arguments.get("upstreamOrigin")
         folder_path = arguments["folderPath"]
         branch = arguments.get("branch", "main")
-        # Create project ID from stable identifiers only (name + folder path)
-        # Normalize project name to lowercase for case-insensitive matching
-        # This ensures consistent project IDs regardless of case variations
+        # Normalize project name for case-insensitive matching
         normalized_name = project_name.lower()
-        id_source = f"{normalized_name}:{folder_path}"
-        project_id = hashlib.sha256(id_source.encode()).hexdigest()[:16]
-        # Check if project exists, create if not
-        project = await self.db_manager.get_project(project_id)
-        if not project:
+        # Find potential project matches
+        project = await self._find_matching_project(
+            normalized_name, remote_origin, upstream_origin, folder_path
+        )
+        if project:
+            # Update project metadata and aliases
+            await self._update_existing_project(project, normalized_name, remote_origin, upstream_origin, folder_path)
+            # Check if upstream inheritance is needed
+            if upstream_origin and await self.db_manager.check_upstream_inheritance_needed(project):
+                try:
+                    inherited_count = await self.db_manager.inherit_from_upstream(project, branch)
+                    if inherited_count > 0:
+                        logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {normalized_name}")
+                except Exception as e:
+                    logger.warning(f"Failed to inherit from upstream: {e}")
+        else:
+            # Create new project with UUID
+            project_id = str(uuid.uuid4())
             project = Project(
                 id=project_id,
-                name=normalized_name,  # Store normalized name for consistency
+                name=normalized_name,
                 remote_origin=remote_origin,
                 upstream_origin=upstream_origin,
                 aliases=[folder_path],
@@ -303,42 +326,187 @@ class MCPCodeIndexServer:
                 last_accessed=datetime.utcnow()
             )
             await self.db_manager.create_project(project)
+            logger.info(f"Created new project: {normalized_name} ({project_id})")
             # Auto-inherit from upstream if needed
             if upstream_origin:
                 try:
                     inherited_count = await self.db_manager.inherit_from_upstream(project, branch)
                     if inherited_count > 0:
-                        logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {project_name}")
-                except Exception as e:
-                    logger.warning(f"Failed to inherit from upstream: {e}")
-        else:
-            # Update last accessed time
-            await self.db_manager.update_project_access_time(project_id)
-            # Update remote/upstream origins if provided and different from existing
-            should_update = False
-            if remote_origin and project.remote_origin != remote_origin:
-                project.remote_origin = remote_origin
-                should_update = True
-            if upstream_origin and project.upstream_origin != upstream_origin:
-                project.upstream_origin = upstream_origin
-                should_update = True
-            if should_update:
-                await self.db_manager.update_project(project)
-                logger.debug(f"Updated project metadata for {project_name}")
-            # Check if upstream inheritance is needed for existing project
-            if upstream_origin and await self.db_manager.check_upstream_inheritance_needed(project):
-                try:
-                    inherited_count = await self.db_manager.inherit_from_upstream(project, branch)
-                    if inherited_count > 0:
-                        logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {project_name}")
+                        logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {normalized_name}")
                 except Exception as e:
                     logger.warning(f"Failed to inherit from upstream: {e}")
-        return project_id
+        return project.id
+    async def _find_matching_project(
+        self,
+        normalized_name: str,
+        remote_origin: Optional[str],
+        upstream_origin: Optional[str],
+        folder_path: str
+    ) -> Optional[Project]:
+        """
+        Find a matching project using intelligent 2-out-of-4 matching logic.
+        Returns the best matching project or None if no sufficient match is found.
+        """
+        all_projects = await self.db_manager.get_all_projects()
+        best_match = None
+        best_score = 0
+        for project in all_projects:
+            score = 0
+            match_factors = []
+            # Factor 1: Project name match
+            if project.name.lower() == normalized_name:
+                score += 1
+                match_factors.append("name")
+            # Factor 2: Remote origin match
+            if remote_origin and project.remote_origin == remote_origin:
+                score += 1
+                match_factors.append("remote_origin")
+            # Factor 3: Upstream origin match
+            if upstream_origin and project.upstream_origin == upstream_origin:
+                score += 1
+                match_factors.append("upstream_origin")
+            # Factor 4: Folder path in aliases
+            project_aliases = json.loads(project.aliases) if isinstance(project.aliases, str) else project.aliases
+            if folder_path in project_aliases:
+                score += 1
+                match_factors.append("folder_path")
+            # If we have 2+ matches, this is a strong candidate
+            if score >= 2:
+                if score > best_score:
+                    best_score = score
+                    best_match = project
+                    logger.info(f"Strong match for project {project.name} (score: {score}, factors: {match_factors})")
+            # If only 1 match, check file similarity for potential matches
+            elif score == 1:
+                if await self._check_file_similarity(project, folder_path):
+                    logger.info(f"File similarity match for project {project.name} (factor: {match_factors[0]})")
+                    if score > best_score:
+                        best_score = score
+                        best_match = project
+        return best_match
+    async def _check_file_similarity(self, project: Project, folder_path: str) -> bool:
+        """
+        Check if the files in the folder are similar to files already indexed for this project.
+        Returns True if 80%+ of files match.
+        """
+        try:
+            # Get files currently in the folder
+            scanner = FileScanner(Path(folder_path))
+            if not scanner.is_valid_project_directory():
+                return False
+            current_files = scanner.scan_files()
+            current_basenames = {Path(f).name for f in current_files}
+            if not current_basenames:
+                return False
+            # Get files already indexed for this project
+            indexed_files = await self.db_manager.get_all_file_descriptions(project.id, "main")
+            indexed_basenames = {Path(fd.file_path).name for fd in indexed_files}
+            if not indexed_basenames:
+                return False
+            # Calculate similarity
+            intersection = current_basenames & indexed_basenames
+            similarity = len(intersection) / len(current_basenames)
+            logger.debug(f"File similarity for {project.name}: {similarity:.2%} ({len(intersection)}/{len(current_basenames)} files match)")
+            return similarity >= 0.8
+        except Exception as e:
+            logger.warning(f"Error checking file similarity: {e}")
+            return False
+    async def _update_existing_project(
+        self,
+        project: Project,
+        normalized_name: str,
+        remote_origin: Optional[str],
+        upstream_origin: Optional[str],
+        folder_path: str
+    ) -> None:
+        """Update an existing project with new metadata and folder alias."""
+        # Update last accessed time
+        await self.db_manager.update_project_access_time(project.id)
+        should_update = False
+        # Update name if different
+        if project.name != normalized_name:
+            project.name = normalized_name
+            should_update = True
+        # Update remote/upstream origins if provided and different
+        if remote_origin and project.remote_origin != remote_origin:
+            project.remote_origin = remote_origin
+            should_update = True
+        if upstream_origin and project.upstream_origin != upstream_origin:
+            project.upstream_origin = upstream_origin
+            should_update = True
+        # Add folder path to aliases if not already present
+        project_aliases = json.loads(project.aliases) if isinstance(project.aliases, str) else project.aliases
+        if folder_path not in project_aliases:
+            project_aliases.append(folder_path)
+            project.aliases = project_aliases
+            should_update = True
+            logger.info(f"Added new folder alias to project {project.name}: {folder_path}")
+        if should_update:
+            await self.db_manager.update_project(project)
+            logger.debug(f"Updated project metadata for {project.name}")
+    async def _find_best_branch(self, project_id: str, requested_branch: str) -> Optional[str]:
+        """
+        Find the best available branch for a project when the requested branch has no files.
+        Returns the branch with the most files, or None if no branches have files.
+        """
+        try:
+            # Get all branches and their file counts for this project
+            branch_counts = await self.db_manager.get_branch_file_counts(project_id)
+            if not branch_counts:
+                return None
+            # First try common branch name variations
+            common_variations = {
+                'main': ['master', 'develop', 'development', 'dev'],
+                'master': ['main', 'develop', 'development', 'dev'],
+                'develop': ['development', 'main', 'master', 'dev'],
+                'development': ['develop', 'main', 'master', 'dev'],
+                'dev': ['develop', 'development', 'main', 'master']
+            }
+            # Try variations of the requested branch
+            if requested_branch.lower() in common_variations:
+                for variation in common_variations[requested_branch.lower()]:
+                    if variation in branch_counts and branch_counts[variation] > 0:
+                        return variation
+            # Fall back to the branch with the most files
+            best_branch = max(branch_counts.items(), key=lambda x: x[1])
+            return best_branch[0] if best_branch[1] > 0 else None
+        except Exception as e:
+            logger.warning(f"Error finding best branch: {e}")
+            return None
     async def _handle_get_file_description(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
         """Handle get_file_description tool calls."""
@@ -390,13 +558,24 @@ class MCPCodeIndexServer:
     async def _handle_check_codebase_size(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
         """Handle check_codebase_size tool calls."""
         project_id = await self._get_or_create_project_id(arguments)
+        requested_branch = arguments["branch"]
-        # Get all file descriptions for this project/branch
+        # Get file descriptions for this project/branch
         file_descriptions = await self.db_manager.get_all_file_descriptions(
             project_id=project_id,
-            branch=arguments["branch"]
+            branch=requested_branch
         )
+        # If no files found for requested branch, try to find the best available branch
+        if not file_descriptions:
+            available_branch = await self._find_best_branch(project_id, requested_branch)
+            if available_branch and available_branch != requested_branch:
+                file_descriptions = await self.db_manager.get_all_file_descriptions(
+                    project_id=project_id,
+                    branch=available_branch
+                )
+                logger.info(f"No files found for branch '{requested_branch}', using '{available_branch}' instead")
         # Calculate total tokens
         total_tokens = self.token_counter.calculate_codebase_tokens(file_descriptions)
         is_large = self.token_counter.is_large_codebase(total_tokens)

{mcp_code_indexer-1.0.9.dist-info → mcp_code_indexer-1.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mcp-code-indexer
-Version: 1.0.9
+Version: 1.1.1
 Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
 Author: MCP Code Indexer Contributors
 Maintainer: MCP Code Indexer Contributors

{mcp_code_indexer-1.0.9.dist-info → mcp_code_indexer-1.1.1.dist-info}/RECORD RENAMED Viewed

@@ -6,17 +6,17 @@ mcp_code_indexer/main.py,sha256=Rou-mAN9-12PPP8jC7dIs2_UNambJuC2F8BF--j-0m8,3715
 mcp_code_indexer/merge_handler.py,sha256=lJR8eVq2qSrF6MW9mR3Fy8UzrNAaQ7RsI2FMNXne3vQ,14692
 mcp_code_indexer/token_counter.py,sha256=WrifOkbF99nWWHlRlhCHAB2KN7qr83GOHl7apE-hJcE,8460
 mcp_code_indexer/database/__init__.py,sha256=aPq_aaRp0aSwOBIq9GkuMNjmLxA411zg2vhdrAuHm-w,38
-mcp_code_indexer/database/database.py,sha256=ROGdosQSADI7EytNkdC4RauPD9zLtHTO1mQ8SxsmPVo,18755
+mcp_code_indexer/database/database.py,sha256=eG2xY5cd-oxRZ6mgGkqqBiJJfGCPqJgzoFq6kR99WfA,20300
 mcp_code_indexer/database/models.py,sha256=3wOxHKb6j3zKPWFSwB5g1TLpI507vLNZcqsxZR4VuRs,5528
 mcp_code_indexer/middleware/__init__.py,sha256=p-mP0pMsfiU2yajCPvokCUxUEkh_lu4XJP1LyyMW2ug,220
 mcp_code_indexer/middleware/error_middleware.py,sha256=v6jaHmPxf3qerYdb85X1tHIXLxgcbybpitKVakFLQTA,10109
 mcp_code_indexer/server/__init__.py,sha256=16xMcuriUOBlawRqWNBk6niwrvtv_JD5xvI36X1Vsmk,41
-mcp_code_indexer/server/mcp_server.py,sha256=QhN34Ue6jKzwRvCIxYRYrb9v3_fEVsuIUmmMP-woRqI,38023
+mcp_code_indexer/server/mcp_server.py,sha256=LxYt6AQ2hifAZIrduyGGBz22kxfcMnCAsHPjih37X5k,45523
 mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4,sha256=Ijkht27pm96ZW3_3OFE-7xAPtR0YyTWXoRO8_-hlsqc,1681126
 mcp_code_indexer/tools/__init__.py,sha256=m01mxML2UdD7y5rih_XNhNSCMzQTz7WQ_T1TeOcYlnE,49
-mcp_code_indexer-1.0.9.dist-info/licenses/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
-mcp_code_indexer-1.0.9.dist-info/METADATA,sha256=uMXk1E3Hp0PY3yykMfBa1YyZSRPPnDW2GFYxcR9r2K8,11930
-mcp_code_indexer-1.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-mcp_code_indexer-1.0.9.dist-info/entry_points.txt,sha256=8HqWOw1Is7jOP1bvIgaSwouvT9z_Boe-9hd4NzyJOhY,68
-mcp_code_indexer-1.0.9.dist-info/top_level.txt,sha256=yKYCM-gMGt-cnupGfAhnZaoEsROLB6DQ1KFUuyKx4rw,17
-mcp_code_indexer-1.0.9.dist-info/RECORD,,
+mcp_code_indexer-1.1.1.dist-info/licenses/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
+mcp_code_indexer-1.1.1.dist-info/METADATA,sha256=h8Kqpz8nH14e73F1AoBwXAy3BgnBYKh04igxTq2euKw,11930
+mcp_code_indexer-1.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+mcp_code_indexer-1.1.1.dist-info/entry_points.txt,sha256=8HqWOw1Is7jOP1bvIgaSwouvT9z_Boe-9hd4NzyJOhY,68
+mcp_code_indexer-1.1.1.dist-info/top_level.txt,sha256=yKYCM-gMGt-cnupGfAhnZaoEsROLB6DQ1KFUuyKx4rw,17
+mcp_code_indexer-1.1.1.dist-info/RECORD,,

{mcp_code_indexer-1.0.9.dist-info → mcp_code_indexer-1.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{mcp_code_indexer-1.0.9.dist-info → mcp_code_indexer-1.1.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mcp_code_indexer-1.0.9.dist-info → mcp_code_indexer-1.1.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{mcp_code_indexer-1.0.9.dist-info → mcp_code_indexer-1.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

mcp-code-indexer 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl

mcp-code-indexer 1.0.9py3-none-any.whl → 1.1.1py3-none-any.whl