mcp-code-indexer 3.0.3__py3-none-any.whl → 3.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -509,14 +509,12 @@ class DatabaseManager:
509
509
  async with self.get_write_connection_with_retry("create_project") as db:
510
510
  await db.execute(
511
511
  """
512
- INSERT INTO projects (id, name, remote_origin, upstream_origin, aliases, created, last_accessed)
513
- VALUES (?, ?, ?, ?, ?, ?, ?)
512
+ INSERT INTO projects (id, name, aliases, created, last_accessed)
513
+ VALUES (?, ?, ?, ?, ?)
514
514
  """,
515
515
  (
516
516
  project.id,
517
517
  project.name,
518
- project.remote_origin,
519
- project.upstream_origin,
520
518
  json.dumps(project.aliases),
521
519
  project.created,
522
520
  project.last_accessed
@@ -538,44 +536,15 @@ class DatabaseManager:
538
536
  return Project(
539
537
  id=row['id'],
540
538
  name=row['name'],
541
- remote_origin=row['remote_origin'],
542
- upstream_origin=row['upstream_origin'],
543
539
  aliases=json.loads(row['aliases']),
544
540
  created=datetime.fromisoformat(row['created']),
545
541
  last_accessed=datetime.fromisoformat(row['last_accessed'])
546
542
  )
547
543
  return None
548
544
 
549
- async def find_project_by_origin(self, origin_url: str) -> Optional[Project]:
550
- """Find project by remote or upstream origin URL."""
551
- async with self.get_connection() as db:
552
- cursor = await db.execute(
553
- """
554
- SELECT * FROM projects
555
- WHERE remote_origin = ? OR upstream_origin = ?
556
- LIMIT 1
557
- """,
558
- (origin_url, origin_url)
559
- )
560
- row = await cursor.fetchone()
561
-
562
- if row:
563
- return Project(
564
- id=row['id'],
565
- name=row['name'],
566
- remote_origin=row['remote_origin'],
567
- upstream_origin=row['upstream_origin'],
568
- aliases=json.loads(row['aliases']),
569
- created=datetime.fromisoformat(row['created']),
570
- last_accessed=datetime.fromisoformat(row['last_accessed'])
571
- )
572
- return None
573
-
574
545
  async def find_matching_project(
575
546
  self,
576
547
  project_name: str,
577
- remote_origin: Optional[str] = None,
578
- upstream_origin: Optional[str] = None,
579
548
  folder_path: Optional[str] = None
580
549
  ) -> Optional[Project]:
581
550
  """
@@ -583,8 +552,6 @@ class DatabaseManager:
583
552
 
584
553
  Args:
585
554
  project_name: Name of the project
586
- remote_origin: Remote origin URL
587
- upstream_origin: Upstream origin URL
588
555
  folder_path: Project folder path
589
556
 
590
557
  Returns:
@@ -602,48 +569,27 @@ class DatabaseManager:
602
569
 
603
570
  # Check name match (case-insensitive)
604
571
  if project.name.lower() == normalized_name:
605
- score += 1
572
+ score += 2 # Name match is primary identifier
606
573
  match_factors.append("name")
607
574
 
608
- # Check remote origin match
609
- if remote_origin and project.remote_origin == remote_origin:
610
- score += 1
611
- match_factors.append("remote_origin")
612
-
613
- # Check upstream origin match
614
- if upstream_origin and project.upstream_origin == upstream_origin:
615
- score += 1
616
- match_factors.append("upstream_origin")
617
-
618
575
  # Check folder path in aliases
619
576
  if folder_path and folder_path in project.aliases:
620
577
  score += 1
621
578
  match_factors.append("folder_path")
622
579
 
623
- # Enhanced matching: If name matches and no remote origins are provided,
624
- # consider it a strong match to prevent duplicates
625
- if (score == 1 and "name" in match_factors and
626
- not remote_origin and not project.remote_origin and
627
- not upstream_origin and not project.upstream_origin):
628
- logger.info(f"Name-only match with no remotes for project {project.name} - treating as strong match")
629
- score = 2 # Boost score to strong match level
630
- match_factors.append("no_remotes_boost")
631
-
632
- # If we have 2+ matches, this is a strong candidate
580
+ # If we have a name match, it's a strong candidate
633
581
  if score >= 2:
634
582
  if score > best_score:
635
583
  best_score = score
636
584
  best_match = project
637
- logger.info(f"Strong match for project {project.name} (score: {score}, factors: {match_factors})")
585
+ logger.info(f"Match for project {project.name} (score: {score}, factors: {match_factors})")
638
586
 
639
587
  return best_match
640
588
 
641
589
  async def get_or_create_project(
642
590
  self,
643
591
  project_name: str,
644
- folder_path: str,
645
- remote_origin: Optional[str] = None,
646
- upstream_origin: Optional[str] = None
592
+ folder_path: str
647
593
  ) -> Project:
648
594
  """
649
595
  Get or create a project using intelligent matching.
@@ -651,15 +597,13 @@ class DatabaseManager:
651
597
  Args:
652
598
  project_name: Name of the project
653
599
  folder_path: Project folder path
654
- remote_origin: Remote origin URL
655
- upstream_origin: Upstream origin URL
656
600
 
657
601
  Returns:
658
602
  Existing or newly created project
659
603
  """
660
604
  # Try to find existing project
661
605
  project = await self.find_matching_project(
662
- project_name, remote_origin, upstream_origin, folder_path
606
+ project_name, folder_path
663
607
  )
664
608
 
665
609
  if project:
@@ -680,8 +624,6 @@ class DatabaseManager:
680
624
  new_project = Project(
681
625
  id=str(uuid.uuid4()),
682
626
  name=project_name,
683
- remote_origin=remote_origin,
684
- upstream_origin=upstream_origin,
685
627
  aliases=[folder_path],
686
628
  created=datetime.utcnow(),
687
629
  last_accessed=datetime.utcnow()
@@ -706,13 +648,11 @@ class DatabaseManager:
706
648
  await db.execute(
707
649
  """
708
650
  UPDATE projects
709
- SET name = ?, remote_origin = ?, upstream_origin = ?, aliases = ?, last_accessed = ?
651
+ SET name = ?, aliases = ?, last_accessed = ?
710
652
  WHERE id = ?
711
653
  """,
712
654
  (
713
655
  project.name,
714
- project.remote_origin,
715
- project.upstream_origin,
716
656
  json.dumps(project.aliases),
717
657
  project.last_accessed,
718
658
  project.id
@@ -725,21 +665,19 @@ class DatabaseManager:
725
665
  """Get all projects in the database."""
726
666
  async with self.get_connection() as db:
727
667
  cursor = await db.execute(
728
- "SELECT id, name, remote_origin, upstream_origin, aliases, created, last_accessed FROM projects"
668
+ "SELECT id, name, aliases, created, last_accessed FROM projects"
729
669
  )
730
670
  rows = await cursor.fetchall()
731
671
 
732
672
  projects = []
733
673
  for row in rows:
734
- aliases = json.loads(row[4]) if row[4] else []
674
+ aliases = json.loads(row[2]) if row[2] else []
735
675
  project = Project(
736
676
  id=row[0],
737
677
  name=row[1],
738
- remote_origin=row[2],
739
- upstream_origin=row[3],
740
678
  aliases=aliases,
741
- created=row[5],
742
- last_accessed=row[6]
679
+ created=row[3],
680
+ last_accessed=row[4]
743
681
  )
744
682
  projects.append(project)
745
683
 
@@ -1031,22 +969,7 @@ class DatabaseManager:
1031
969
 
1032
970
  return len(inherited_descriptions)
1033
971
 
1034
- async def check_upstream_inheritance_needed(self, project: Project) -> bool:
1035
- """
1036
- Check if a project needs upstream inheritance.
1037
-
1038
- Args:
1039
- project: Project to check
1040
-
1041
- Returns:
1042
- True if project has upstream but no descriptions yet
1043
- """
1044
- if not project.upstream_origin:
1045
- return False
1046
-
1047
- # Check if project has any descriptions
1048
- file_count = await self.get_file_count(project.id, "main")
1049
- return file_count == 0
972
+
1050
973
 
1051
974
  # Project Overview operations
1052
975
 
@@ -15,13 +15,11 @@ class Project(BaseModel):
15
15
  """
16
16
  Represents a tracked project/repository.
17
17
 
18
- Projects are identified by a combination of git remotes and local paths,
19
- allowing tracking across forks, renames, and different local copies.
18
+ Projects are identified by project name and folder paths,
19
+ allowing tracking across different local copies without git coupling.
20
20
  """
21
21
  id: str = Field(..., description="Generated unique identifier")
22
22
  name: str = Field(..., description="User-provided project name")
23
- remote_origin: Optional[str] = Field(None, description="Git remote origin URL")
24
- upstream_origin: Optional[str] = Field(None, description="Upstream repository URL for forks")
25
23
  aliases: List[str] = Field(default_factory=list, description="Alternative identifiers")
26
24
  created: datetime = Field(default_factory=datetime.utcnow, description="Creation timestamp")
27
25
  last_accessed: datetime = Field(default_factory=datetime.utcnow, description="Last access timestamp")
@@ -14,7 +14,7 @@ import subprocess
14
14
  import tempfile
15
15
  from pathlib import Path
16
16
  from typing import Dict, List, Optional, Tuple, Any
17
- from urllib.parse import urlparse
17
+
18
18
 
19
19
  import aiohttp
20
20
  from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type
@@ -295,50 +295,18 @@ Return ONLY a JSON object:
295
295
  # Get current working directory as project root
296
296
  project_root = Path.cwd()
297
297
 
298
- # Get git remote info
299
- remote_result = await self._run_git_command(["remote", "get-url", "origin"])
300
- remote_origin = remote_result.strip() if remote_result else None
301
-
302
- # Try to get upstream origin
303
- upstream_origin = None
304
- try:
305
- upstream_result = await self._run_git_command(["remote", "get-url", "upstream"])
306
- upstream_origin = upstream_result.strip() if upstream_result else None
307
- except subprocess.CalledProcessError:
308
- pass # No upstream remote
309
-
310
- # Extract project name from remote URL or use directory name
311
- project_name = self._extract_project_name(remote_origin, project_root)
298
+ # Use directory name as project name
299
+ project_name = project_root.name
312
300
 
313
301
  return {
314
302
  "projectName": project_name,
315
- "folderPath": str(project_root),
316
- "remoteOrigin": remote_origin,
317
- "upstreamOrigin": upstream_origin
303
+ "folderPath": str(project_root)
318
304
  }
319
305
 
320
306
  except Exception as e:
321
307
  raise GitHookError(f"Failed to identify project from git: {e}")
322
308
 
323
- def _extract_project_name(self, remote_origin: Optional[str], project_root: Path) -> str:
324
- """Extract project name from remote URL or directory name."""
325
- if remote_origin:
326
- # Parse GitHub/GitLab URL
327
- if remote_origin.startswith("git@"):
328
- # SSH format: git@github.com:user/repo.git
329
- parts = remote_origin.split(":")
330
- if len(parts) >= 2:
331
- repo_path = parts[-1].replace(".git", "")
332
- return repo_path.split("/")[-1]
333
- else:
334
- # HTTPS format
335
- parsed = urlparse(remote_origin)
336
- if parsed.path:
337
- repo_path = parsed.path.strip("/").replace(".git", "")
338
- return repo_path.split("/")[-1]
339
-
340
- # Fallback to directory name
341
- return project_root.name
309
+
342
310
 
343
311
  async def _get_git_diff(self) -> str:
344
312
  """
@@ -198,7 +198,7 @@ def _setup_component_loggers_for_command(
198
198
  "mcp_code_indexer.token_counter",
199
199
  "mcp_code_indexer.file_scanner",
200
200
  "mcp_code_indexer.error_handler",
201
- "mcp_code_indexer.merge_handler"
201
+
202
202
  ]
203
203
 
204
204
  for component_logger_name in component_loggers:
mcp_code_indexer/main.py CHANGED
@@ -129,8 +129,6 @@ async def handle_getprojects(args: argparse.Namespace) -> None:
129
129
  for project in projects:
130
130
  print(f"ID: {project.id}")
131
131
  print(f"Name: {project.name}")
132
- print(f"Remote Origin: {project.remote_origin or 'N/A'}")
133
- print(f"Upstream Origin: {project.upstream_origin or 'N/A'}")
134
132
 
135
133
  # Get branch information
136
134
  try:
@@ -293,7 +291,6 @@ async def handle_runcommand(args: argparse.Namespace) -> None:
293
291
  "get_codebase_overview": server._handle_get_condensed_overview,
294
292
  "update_codebase_overview": server._handle_update_codebase_overview,
295
293
  "get_word_frequency": server._handle_get_word_frequency,
296
- "merge_branch_descriptions": server._handle_merge_branch_descriptions,
297
294
  "search_codebase_overview": server._handle_search_codebase_overview,
298
295
  }
299
296
 
@@ -706,12 +703,6 @@ def generate_project_markdown(project, branch, overview, files, logger):
706
703
  markdown_lines.append("")
707
704
 
708
705
  # Project metadata
709
- if project.remote_origin:
710
- markdown_lines.append(f"**Repository:** {project.remote_origin}")
711
- markdown_lines.append("")
712
- if project.upstream_origin:
713
- markdown_lines.append(f"**Upstream:** {project.upstream_origin}")
714
- markdown_lines.append("")
715
706
  markdown_lines.append(f"**Branch:** {branch}")
716
707
  markdown_lines.append("")
717
708
 
@@ -0,0 +1,41 @@
1
+ -- Migration 005: Remove git remote dependencies from projects table
2
+ -- This migration removes remote_origin and upstream_origin columns and their indexes
3
+ -- Project identification now relies solely on project name and folder paths
4
+
5
+ -- Ensure WAL mode is enabled for safe migrations
6
+ PRAGMA journal_mode=WAL;
7
+
8
+ -- Temporarily disable foreign key constraints for migration
9
+ PRAGMA foreign_keys=OFF;
10
+
11
+ -- Start transaction for atomic migration
12
+ BEGIN TRANSACTION;
13
+
14
+ -- Create new projects table without git remote columns
15
+ CREATE TABLE projects_new (
16
+ id TEXT PRIMARY KEY,
17
+ name TEXT NOT NULL,
18
+ aliases TEXT DEFAULT '[]', -- JSON array of aliases
19
+ created DATETIME DEFAULT CURRENT_TIMESTAMP,
20
+ last_accessed DATETIME DEFAULT CURRENT_TIMESTAMP
21
+ );
22
+
23
+ -- Create indexes for the new table (without remote indexes)
24
+ CREATE INDEX idx_projects_new_name ON projects_new(name);
25
+
26
+ -- Migrate data from old table (dropping remote_origin and upstream_origin)
27
+ INSERT INTO projects_new (id, name, aliases, created, last_accessed)
28
+ SELECT id, name, aliases, created, last_accessed
29
+ FROM projects;
30
+
31
+ -- Drop old table
32
+ DROP TABLE projects;
33
+
34
+ -- Rename new table to original name
35
+ ALTER TABLE projects_new RENAME TO projects;
36
+
37
+ -- Re-enable foreign key constraints
38
+ PRAGMA foreign_keys=ON;
39
+
40
+ -- Commit the migration
41
+ COMMIT;
@@ -519,17 +519,13 @@ src/
519
519
  """
520
520
  Get or create a project ID using intelligent matching.
521
521
 
522
- Matches projects based on 2+ out of 4 identification factors:
522
+ Matches projects based on identification factors:
523
523
  1. Project name (normalized, case-insensitive)
524
- 2. Remote origin URL
525
- 3. Upstream origin URL
526
- 4. Any folder path in aliases
524
+ 2. Folder path in aliases
527
525
 
528
- If only 1 factor matches, uses file similarity to determine if it's the same project.
526
+ Projects are now identified primarily by name without git coupling.
529
527
  """
530
528
  project_name = arguments["projectName"]
531
- remote_origin = arguments.get("remoteOrigin")
532
- upstream_origin = arguments.get("upstreamOrigin")
533
529
  folder_path = arguments["folderPath"]
534
530
 
535
531
 
@@ -538,55 +534,33 @@ src/
538
534
 
539
535
  # Find potential project matches
540
536
  project = await self._find_matching_project(
541
- normalized_name, remote_origin, upstream_origin, folder_path
537
+ normalized_name, folder_path
542
538
  )
543
539
  if project:
544
540
  # Update project metadata and aliases
545
- await self._update_existing_project(project, normalized_name, remote_origin, upstream_origin, folder_path)
546
-
547
- # Check if upstream inheritance is needed
548
- if upstream_origin and await self.db_manager.check_upstream_inheritance_needed(project):
549
- try:
550
- inherited_count = await self.db_manager.inherit_from_upstream(project)
551
- if inherited_count > 0:
552
- logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {normalized_name}")
553
- except Exception as e:
554
- logger.warning(f"Failed to inherit from upstream: {e}")
541
+ await self._update_existing_project(project, normalized_name, folder_path)
555
542
  else:
556
543
  # Create new project with UUID
557
544
  project_id = str(uuid.uuid4())
558
545
  project = Project(
559
546
  id=project_id,
560
547
  name=normalized_name,
561
- remote_origin=remote_origin,
562
- upstream_origin=upstream_origin,
563
548
  aliases=[folder_path],
564
549
  created=datetime.utcnow(),
565
550
  last_accessed=datetime.utcnow()
566
551
  )
567
552
  await self.db_manager.create_project(project)
568
553
  logger.info(f"Created new project: {normalized_name} ({project_id})")
569
-
570
- # Auto-inherit from upstream if needed
571
- if upstream_origin:
572
- try:
573
- inherited_count = await self.db_manager.inherit_from_upstream(project)
574
- if inherited_count > 0:
575
- logger.info(f"Auto-inherited {inherited_count} descriptions from upstream for {normalized_name}")
576
- except Exception as e:
577
- logger.warning(f"Failed to inherit from upstream: {e}")
578
554
 
579
555
  return project.id
580
556
 
581
557
  async def _find_matching_project(
582
558
  self,
583
559
  normalized_name: str,
584
- remote_origin: Optional[str],
585
- upstream_origin: Optional[str],
586
560
  folder_path: str
587
561
  ) -> Optional[Project]:
588
562
  """
589
- Find a matching project using intelligent 2-out-of-4 matching logic.
563
+ Find a matching project using name and folder path matching.
590
564
 
591
565
  Returns the best matching project or None if no sufficient match is found.
592
566
  """
@@ -599,45 +573,26 @@ src/
599
573
  score = 0
600
574
  match_factors = []
601
575
 
602
- # Factor 1: Project name match
576
+ # Factor 1: Project name match (primary identifier)
603
577
  if project.name.lower() == normalized_name:
604
- score += 1
578
+ score += 2 # Higher weight for name match
605
579
  match_factors.append("name")
606
580
 
607
- # Factor 2: Remote origin match
608
- if remote_origin and project.remote_origin == remote_origin:
609
- score += 1
610
- match_factors.append("remote_origin")
611
-
612
- # Factor 3: Upstream origin match
613
- if upstream_origin and project.upstream_origin == upstream_origin:
614
- score += 1
615
- match_factors.append("upstream_origin")
616
-
617
- # Factor 4: Folder path in aliases
581
+ # Factor 2: Folder path in aliases
618
582
  project_aliases = json.loads(project.aliases) if isinstance(project.aliases, str) else project.aliases
619
583
  if folder_path in project_aliases:
620
584
  score += 1
621
585
  match_factors.append("folder_path")
622
586
 
623
- # Enhanced matching: If name matches and no remote origins are provided,
624
- # consider it a strong match to prevent duplicates
625
- if (score == 1 and "name" in match_factors and
626
- not remote_origin and not project.remote_origin and
627
- not upstream_origin and not project.upstream_origin):
628
- logger.info(f"Name-only match with no remotes for project {project.name} - treating as strong match to prevent duplicates")
629
- score = 2 # Boost score to strong match level
630
- match_factors.append("no_remotes_boost")
631
-
632
- # If we have 2+ matches, this is a strong candidate
587
+ # If we have a name match, it's a strong candidate
633
588
  if score >= 2:
634
589
  if score > best_score:
635
590
  best_score = score
636
591
  best_match = project
637
- logger.info(f"Strong match for project {project.name} (score: {score}, factors: {match_factors})")
592
+ logger.info(f"Match for project {project.name} (score: {score}, factors: {match_factors})")
638
593
 
639
- # If only 1 match, check file similarity for potential matches
640
- elif score == 1:
594
+ # If only name matches, check file similarity for potential matches
595
+ elif score == 1 and "name" in match_factors:
641
596
  if await self._check_file_similarity(project, folder_path):
642
597
  logger.info(f"File similarity match for project {project.name} (factor: {match_factors[0]})")
643
598
  if score > best_score:
@@ -685,8 +640,6 @@ src/
685
640
  self,
686
641
  project: Project,
687
642
  normalized_name: str,
688
- remote_origin: Optional[str],
689
- upstream_origin: Optional[str],
690
643
  folder_path: str
691
644
  ) -> None:
692
645
  """Update an existing project with new metadata and folder alias."""
@@ -699,15 +652,6 @@ src/
699
652
  if project.name != normalized_name:
700
653
  project.name = normalized_name
701
654
  should_update = True
702
-
703
- # Update remote/upstream origins if provided and different
704
- if remote_origin and project.remote_origin != remote_origin:
705
- project.remote_origin = remote_origin
706
- should_update = True
707
-
708
- if upstream_origin and project.upstream_origin != upstream_origin:
709
- project.upstream_origin = upstream_origin
710
- should_update = True
711
655
 
712
656
  # Add folder path to aliases if not already present
713
657
  project_aliases = json.loads(project.aliases) if isinstance(project.aliases, str) else project.aliases
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 3.0.3
3
+ Version: 3.1.1
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  Author: MCP Code Indexer Contributors
6
6
  Maintainer: MCP Code Indexer Contributors
@@ -59,8 +59,8 @@ Dynamic: requires-python
59
59
 
60
60
  # MCP Code Indexer 🚀
61
61
 
62
- [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?21)](https://badge.fury.io/py/mcp-code-indexer)
63
- [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?21)](https://pypi.org/project/mcp-code-indexer/)
62
+ [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?22)](https://badge.fury.io/py/mcp-code-indexer)
63
+ [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?22)](https://pypi.org/project/mcp-code-indexer/)
64
64
  [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
65
65
 
66
66
  A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
@@ -72,8 +72,7 @@ The MCP Code Indexer solves a critical problem for AI agents working with large
72
72
  - **Query file purposes** instantly with natural language descriptions
73
73
  - **Search across codebases** using full-text search
74
74
  - **Get intelligent recommendations** based on codebase size (overview vs search)
75
- - **Merge branch descriptions** with conflict resolution
76
- - **Inherit descriptions** from upstream repositories automatically
75
+ - **Generate condensed overviews** for project understanding
77
76
 
78
77
  Perfect for AI-powered code review, refactoring tools, documentation generation, and codebase analysis workflows.
79
78
 
@@ -243,7 +242,6 @@ The server provides **12 powerful MCP tools** for intelligent codebase managemen
243
242
  ### 🔍 For Advanced Users: Search & Discovery
244
243
  - **`get_all_descriptions`** - Complete hierarchical project structure
245
244
  - **`get_word_frequency`** - Technical vocabulary analysis with stop-word filtering
246
- - **`merge_branch_descriptions`** - Two-phase merge with conflict resolution
247
245
  - **`update_codebase_overview`** - Create comprehensive codebase documentation
248
246
 
249
247
  ### 🏥 For System Monitoring: Health & Performance
@@ -253,7 +251,7 @@ The server provides **12 powerful MCP tools** for intelligent codebase managemen
253
251
 
254
252
  ## 🔗 Git Hook Integration
255
253
 
256
- Keep your codebase documentation automatically synchronized with automated analysis on every commit, rebase, or merge:
254
+ Keep your codebase documentation automatically synchronized with automated analysis on every commit:
257
255
 
258
256
  ```bash
259
257
  # Analyze current staged changes
@@ -396,8 +394,7 @@ async def analyze_codebase(project_path):
396
394
  # Check if codebase is large
397
395
  size_info = await mcp_client.call_tool("check_codebase_size", {
398
396
  "projectName": "my-project",
399
- "folderPath": project_path,
400
- "branch": "main"
397
+ "folderPath": project_path
401
398
  })
402
399
 
403
400
  if size_info["isLarge"]:
@@ -405,15 +402,13 @@ async def analyze_codebase(project_path):
405
402
  results = await mcp_client.call_tool("search_descriptions", {
406
403
  "projectName": "my-project",
407
404
  "folderPath": project_path,
408
- "branch": "main",
409
405
  "query": "authentication logic"
410
406
  })
411
407
  else:
412
408
  # Get full overview for smaller projects
413
409
  overview = await mcp_client.call_tool("get_codebase_overview", {
414
410
  "projectName": "my-project",
415
- "folderPath": project_path,
416
- "branch": "main"
411
+ "folderPath": project_path
417
412
  })
418
413
  ```
419
414
 
@@ -432,8 +427,7 @@ async def analyze_codebase(project_path):
432
427
  # Find files without descriptions
433
428
  missing = await client.call_tool('find_missing_descriptions', {
434
429
  'projectName': '${{ github.repository }}',
435
- 'folderPath': '.',
436
- 'branch': '${{ github.ref_name }}'
430
+ 'folderPath': '.'
437
431
  })
438
432
 
439
433
  # Process with AI and update...
@@ -502,14 +496,14 @@ mcp-code-indexer --githook [OPTIONS]
502
496
 
503
497
  ### Utility Commands
504
498
  ```bash
505
- # List all projects and branches
499
+ # List all projects
506
500
  mcp-code-indexer --getprojects
507
501
 
508
502
  # Execute MCP tool directly
509
503
  mcp-code-indexer --runcommand '{"method": "tools/call", "params": {...}}'
510
504
 
511
505
  # Export descriptions for a project
512
- mcp-code-indexer --dumpdescriptions PROJECT_ID [BRANCH]
506
+ mcp-code-indexer --dumpdescriptions PROJECT_ID
513
507
  ```
514
508
 
515
509
  ## 🛡️ Security Features
@@ -6,18 +6,17 @@ mcp_code_indexer/cleanup_manager.py,sha256=1x2de8Mr9dL92q4ubEebsWSF_2n8Yxk549Zoh
6
6
  mcp_code_indexer/deepask_handler.py,sha256=iAFA1pKfAnurHBprIyP1TaecPzZ5YhBs-oR8Eccxoe4,18323
7
7
  mcp_code_indexer/error_handler.py,sha256=x6dHezVeKcD2ealNLBndt-3SiPiMfh9VOUNoqQSk3rI,11660
8
8
  mcp_code_indexer/file_scanner.py,sha256=ctXeZMROgDThEtjzsANTK9TbK-fhTScMBd4iyuleBT4,11734
9
- mcp_code_indexer/git_hook_handler.py,sha256=OMPfQlykqR2_cE5IxGqbAI92afLOOJxsvXbAQIZrdLU,36579
10
- mcp_code_indexer/logging_config.py,sha256=tf_U-Zz_axDXRV9s7TfHEeUrBjT1QBWkzPuiyZMffBU,10252
11
- mcp_code_indexer/main.py,sha256=abCHbNFUYjkJcNYsU0EPdZQI-_Gz9cQCH7dYJ5Jp7I8,31627
12
- mcp_code_indexer/merge_handler.py,sha256=lJR8eVq2qSrF6MW9mR3Fy8UzrNAaQ7RsI2FMNXne3vQ,14692
9
+ mcp_code_indexer/git_hook_handler.py,sha256=y_JHj0zeTwID3oWhZHjZKeO1R0XoAifMEMwYGZnYk2w,34920
10
+ mcp_code_indexer/logging_config.py,sha256=R5R50xFYy-flgHHh5uVGKV7JIPYKohk2RYa1eEn8kYM,10212
11
+ mcp_code_indexer/main.py,sha256=NThiusK1ZPgxP_ZYlms4sxifcviupHa1-oH6ytbFVwQ,31122
13
12
  mcp_code_indexer/query_preprocessor.py,sha256=uHYy8FO4FTs7MFKsXoueYIafWDKOIirRgdUzwh8upb4,5773
14
13
  mcp_code_indexer/token_counter.py,sha256=WrifOkbF99nWWHlRlhCHAB2KN7qr83GOHl7apE-hJcE,8460
15
14
  mcp_code_indexer/data/stop_words_english.txt,sha256=7Zdd9ameVgA6tN_zuXROvHXD4hkWeELVywPhb7FJEkw,6343
16
15
  mcp_code_indexer/database/__init__.py,sha256=aPq_aaRp0aSwOBIq9GkuMNjmLxA411zg2vhdrAuHm-w,38
17
16
  mcp_code_indexer/database/connection_health.py,sha256=s2r9L_KipH5NlemAUDnhBQO90Dn4b_0Ht9UDs7F6QPk,24432
18
- mcp_code_indexer/database/database.py,sha256=4y2JqK0opz4MZk1R_zMBi5B-hRyXd_jt4cksWIiU34A,52724
17
+ mcp_code_indexer/database/database.py,sha256=9c_DFhitSc5gRLmU2xkeeGAuRQ-mIBPE7_Xbu-vMlF4,49222
19
18
  mcp_code_indexer/database/exceptions.py,sha256=AgpRA9Z5R-GoWYdQSPeSdYvAXDopFCQkLGN3jD7Ha4E,10215
20
- mcp_code_indexer/database/models.py,sha256=FbNtP9Z0bDCoe8JjsYT1HWp0uYsxgZFHR0Blt3d8TBY,7054
19
+ mcp_code_indexer/database/models.py,sha256=t4HJ2HJfRzMWt0kHjfLEh8p_ecqdQIdej5LyQYUqpsI,6858
21
20
  mcp_code_indexer/database/retry_executor.py,sha256=QUayjkCk8OsckVMYiJ_HBQ9NTUss-H8GQeUIUbbw4_U,13419
22
21
  mcp_code_indexer/middleware/__init__.py,sha256=p-mP0pMsfiU2yajCPvokCUxUEkh_lu4XJP1LyyMW2ug,220
23
22
  mcp_code_indexer/middleware/error_middleware.py,sha256=5agJTAkkPogfPGnja1V9JtG9RG-BiOALIJYctK3byJQ,11730
@@ -25,13 +24,14 @@ mcp_code_indexer/migrations/001_initial.sql,sha256=hIXkCP4LA_4A9HJ1CHU0a1DD-a6EN
25
24
  mcp_code_indexer/migrations/002_performance_indexes.sql,sha256=FlKbmcJyKAHTKmjxmpk8ABe6eMcQahz8RciRYcREY_E,2846
26
25
  mcp_code_indexer/migrations/003_project_overviews.sql,sha256=pPzn7UmJ_Bda9mJ1nYTN1GeuYwdQHC7Fva6PvWaucUw,891
27
26
  mcp_code_indexer/migrations/004_remove_branch_dependency.sql,sha256=whZvj2qfba1-Xq7Vg4IfpCpIrRKN21AdtG0gZbFSRi4,6466
27
+ mcp_code_indexer/migrations/005_remove_git_remotes.sql,sha256=vT84AaV1hyN4zq5W67hR14TgAwhW7_RNtBHrCoksxA4,1299
28
28
  mcp_code_indexer/server/__init__.py,sha256=16xMcuriUOBlawRqWNBk6niwrvtv_JD5xvI36X1Vsmk,41
29
- mcp_code_indexer/server/mcp_server.py,sha256=EQnwRbjF17AdPvO_HPb6d7cmpxUN51qbyMuhQXrtetU,63168
29
+ mcp_code_indexer/server/mcp_server.py,sha256=YEb7vNp_TALanGFlm-shAHjkuFfcE4obhVcyjrCvrJA,60194
30
30
  mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4,sha256=Ijkht27pm96ZW3_3OFE-7xAPtR0YyTWXoRO8_-hlsqc,1681126
31
31
  mcp_code_indexer/tools/__init__.py,sha256=m01mxML2UdD7y5rih_XNhNSCMzQTz7WQ_T1TeOcYlnE,49
32
- mcp_code_indexer-3.0.3.dist-info/licenses/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
33
- mcp_code_indexer-3.0.3.dist-info/METADATA,sha256=KhSmG_81Lli-5xIPfTy1uOCGYl6Ec1vqGrpaq0IE9ls,20165
34
- mcp_code_indexer-3.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
- mcp_code_indexer-3.0.3.dist-info/entry_points.txt,sha256=8HqWOw1Is7jOP1bvIgaSwouvT9z_Boe-9hd4NzyJOhY,68
36
- mcp_code_indexer-3.0.3.dist-info/top_level.txt,sha256=yKYCM-gMGt-cnupGfAhnZaoEsROLB6DQ1KFUuyKx4rw,17
37
- mcp_code_indexer-3.0.3.dist-info/RECORD,,
32
+ mcp_code_indexer-3.1.1.dist-info/licenses/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
33
+ mcp_code_indexer-3.1.1.dist-info/METADATA,sha256=r6VEyZ8c5IH9oBQLDQy95GQzktp5i1x89lK4BchUmBg,19849
34
+ mcp_code_indexer-3.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
+ mcp_code_indexer-3.1.1.dist-info/entry_points.txt,sha256=8HqWOw1Is7jOP1bvIgaSwouvT9z_Boe-9hd4NzyJOhY,68
36
+ mcp_code_indexer-3.1.1.dist-info/top_level.txt,sha256=yKYCM-gMGt-cnupGfAhnZaoEsROLB6DQ1KFUuyKx4rw,17
37
+ mcp_code_indexer-3.1.1.dist-info/RECORD,,
@@ -1,386 +0,0 @@
1
- """
2
- Two-phase merge functionality for branch descriptions.
3
-
4
- This module implements conflict detection and resolution for merging
5
- file descriptions between branches with AI-assisted conflict resolution.
6
- """
7
-
8
- import logging
9
- from datetime import datetime
10
- from typing import Dict, List, Optional, Tuple
11
- from uuid import uuid4
12
-
13
- from mcp_code_indexer.database.database import DatabaseManager
14
- from mcp_code_indexer.database.models import FileDescription
15
- from mcp_code_indexer.error_handler import ValidationError, DatabaseError
16
- from mcp_code_indexer.logging_config import get_logger
17
-
18
- logger = get_logger(__name__)
19
-
20
-
21
- class MergeConflict:
22
- """Represents a merge conflict between file descriptions."""
23
-
24
- def __init__(
25
- self,
26
- file_path: str,
27
- source_branch: str,
28
- target_branch: str,
29
- source_description: str,
30
- target_description: str,
31
- conflict_id: Optional[str] = None
32
- ):
33
- """
34
- Initialize merge conflict.
35
-
36
- Args:
37
- file_path: Path to conflicted file
38
- source_branch: Branch being merged from
39
- target_branch: Branch being merged into
40
- source_description: Description from source branch
41
- target_description: Description from target branch
42
- conflict_id: Optional conflict identifier
43
- """
44
- self.file_path = file_path
45
- self.source_branch = source_branch
46
- self.target_branch = target_branch
47
- self.source_description = source_description
48
- self.target_description = target_description
49
- self.conflict_id = conflict_id or str(uuid4())
50
- self.resolution: Optional[str] = None
51
-
52
- def to_dict(self) -> Dict:
53
- """Convert conflict to dictionary representation."""
54
- return {
55
- "conflictId": self.conflict_id,
56
- "filePath": self.file_path,
57
- "sourceBranch": self.source_branch,
58
- "targetBranch": self.target_branch,
59
- "sourceDescription": self.source_description,
60
- "targetDescription": self.target_description,
61
- "resolution": self.resolution
62
- }
63
-
64
-
65
- class MergeSession:
66
- """Manages a merge session with conflicts and resolutions."""
67
-
68
- def __init__(self, project_id: str, source_branch: str, target_branch: str):
69
- """
70
- Initialize merge session.
71
-
72
- Args:
73
- project_id: Project identifier
74
- source_branch: Branch being merged from
75
- target_branch: Branch being merged into
76
- """
77
- self.session_id = str(uuid4())
78
- self.project_id = project_id
79
- self.source_branch = source_branch
80
- self.target_branch = target_branch
81
- self.conflicts: List[MergeConflict] = []
82
- self.created = datetime.utcnow()
83
- self.status = "pending" # pending, resolved, aborted
84
-
85
- def add_conflict(self, conflict: MergeConflict) -> None:
86
- """Add a conflict to the session."""
87
- self.conflicts.append(conflict)
88
-
89
- def get_conflict_count(self) -> int:
90
- """Get total number of conflicts."""
91
- return len(self.conflicts)
92
-
93
- def get_resolved_count(self) -> int:
94
- """Get number of resolved conflicts."""
95
- return len([c for c in self.conflicts if c.resolution is not None])
96
-
97
- def is_fully_resolved(self) -> bool:
98
- """Check if all conflicts are resolved."""
99
- return self.get_resolved_count() == self.get_conflict_count()
100
-
101
- def to_dict(self) -> Dict:
102
- """Convert session to dictionary representation."""
103
- return {
104
- "sessionId": self.session_id,
105
- "projectId": self.project_id,
106
- "sourceBranch": self.source_branch,
107
- "targetBranch": self.target_branch,
108
- "totalConflicts": self.get_conflict_count(),
109
- "resolvedConflicts": self.get_resolved_count(),
110
- "isFullyResolved": self.is_fully_resolved(),
111
- "created": self.created.isoformat(),
112
- "status": self.status,
113
- "conflicts": [conflict.to_dict() for conflict in self.conflicts]
114
- }
115
-
116
-
117
- class MergeHandler:
118
- """
119
- Handles two-phase merge operations for file descriptions.
120
-
121
- Phase 1: Detect conflicts between source and target branches
122
- Phase 2: Apply resolutions and complete merge
123
- """
124
-
125
- def __init__(self, db_manager: DatabaseManager):
126
- """
127
- Initialize merge handler.
128
-
129
- Args:
130
- db_manager: Database manager instance
131
- """
132
- self.db_manager = db_manager
133
- self._active_sessions: Dict[str, MergeSession] = {}
134
-
135
- async def start_merge_phase1(
136
- self,
137
- project_id: str,
138
- source_branch: str,
139
- target_branch: str
140
- ) -> MergeSession:
141
- """
142
- Phase 1: Detect merge conflicts.
143
-
144
- Args:
145
- project_id: Project identifier
146
- source_branch: Branch to merge from
147
- target_branch: Branch to merge into
148
-
149
- Returns:
150
- MergeSession with detected conflicts
151
-
152
- Raises:
153
- ValidationError: If branches are invalid
154
- DatabaseError: If database operation fails
155
- """
156
- if source_branch == target_branch:
157
- raise ValidationError("Source and target branches cannot be the same")
158
-
159
- logger.info(f"Starting merge phase 1: {source_branch} -> {target_branch}")
160
-
161
- try:
162
- # Get file descriptions from both branches
163
- source_descriptions = await self.db_manager.get_all_file_descriptions(
164
- project_id, source_branch
165
- )
166
- target_descriptions = await self.db_manager.get_all_file_descriptions(
167
- project_id, target_branch
168
- )
169
-
170
- # Create session
171
- session = MergeSession(project_id, source_branch, target_branch)
172
-
173
- # Build lookup dictionaries
174
- source_lookup = {desc.file_path: desc for desc in source_descriptions}
175
- target_lookup = {desc.file_path: desc for desc in target_descriptions}
176
-
177
- # Detect conflicts
178
- conflicts_found = 0
179
- all_files = set(source_lookup.keys()) | set(target_lookup.keys())
180
-
181
- for file_path in all_files:
182
- source_desc = source_lookup.get(file_path)
183
- target_desc = target_lookup.get(file_path)
184
-
185
- # Conflict occurs when:
186
- # 1. File exists in both branches with different descriptions
187
- # 2. File has been modified in source but also exists in target
188
- if source_desc and target_desc:
189
- if source_desc.description != target_desc.description:
190
- conflict = MergeConflict(
191
- file_path=file_path,
192
- source_branch=source_branch,
193
- target_branch=target_branch,
194
- source_description=source_desc.description,
195
- target_description=target_desc.description
196
- )
197
- session.add_conflict(conflict)
198
- conflicts_found += 1
199
-
200
- # Store session
201
- self._active_sessions[session.session_id] = session
202
-
203
- logger.info(f"Merge phase 1 completed: {conflicts_found} conflicts found")
204
-
205
- return session
206
-
207
- except Exception as e:
208
- logger.error(f"Error in merge phase 1: {e}")
209
- raise DatabaseError(f"Failed to detect merge conflicts: {e}") from e
210
-
211
- async def complete_merge_phase2(
212
- self,
213
- session_id: str,
214
- conflict_resolutions: List[Dict[str, str]]
215
- ) -> Dict:
216
- """
217
- Phase 2: Apply resolutions and complete merge.
218
-
219
- Args:
220
- session_id: Merge session identifier
221
- conflict_resolutions: List of {conflictId, resolvedDescription}
222
-
223
- Returns:
224
- Merge result summary
225
-
226
- Raises:
227
- ValidationError: If session not found or resolutions invalid
228
- DatabaseError: If database operation fails
229
- """
230
- session = self._active_sessions.get(session_id)
231
- if not session:
232
- raise ValidationError(f"Merge session not found: {session_id}")
233
-
234
- logger.info(f"Starting merge phase 2 for session {session_id}")
235
-
236
- try:
237
- # Validate and apply resolutions
238
- resolution_lookup = {res["conflictId"]: res["resolvedDescription"]
239
- for res in conflict_resolutions}
240
-
241
- resolved_count = 0
242
- for conflict in session.conflicts:
243
- if conflict.conflict_id in resolution_lookup:
244
- conflict.resolution = resolution_lookup[conflict.conflict_id]
245
- resolved_count += 1
246
-
247
- # Check if all conflicts are resolved
248
- if not session.is_fully_resolved():
249
- unresolved = session.get_conflict_count() - session.get_resolved_count()
250
- raise ValidationError(
251
- f"Not all conflicts resolved: {unresolved} remaining",
252
- details={
253
- "total_conflicts": session.get_conflict_count(),
254
- "resolved_conflicts": session.get_resolved_count(),
255
- "unresolved_conflicts": unresolved
256
- }
257
- )
258
-
259
- # Apply merge
260
- merged_descriptions = []
261
-
262
- # Get all descriptions from source branch
263
- source_descriptions = await self.db_manager.get_all_file_descriptions(
264
- session.project_id, session.source_branch
265
- )
266
-
267
- # Get existing target descriptions
268
- target_descriptions = await self.db_manager.get_all_file_descriptions(
269
- session.project_id, session.target_branch
270
- )
271
-
272
- target_lookup = {desc.file_path: desc for desc in target_descriptions}
273
-
274
- # Apply resolved descriptions
275
- for source_desc in source_descriptions:
276
- resolved_conflict = next(
277
- (c for c in session.conflicts if c.file_path == source_desc.file_path),
278
- None
279
- )
280
-
281
- if resolved_conflict:
282
- # Use resolved description
283
- new_desc = FileDescription(
284
- project_id=session.project_id,
285
- branch=session.target_branch,
286
- file_path=source_desc.file_path,
287
- description=resolved_conflict.resolution,
288
- file_hash=source_desc.file_hash,
289
- last_modified=datetime.utcnow(),
290
- version=1,
291
- source_project_id=source_desc.source_project_id
292
- )
293
- else:
294
- # No conflict, copy from source
295
- new_desc = FileDescription(
296
- project_id=session.project_id,
297
- branch=session.target_branch,
298
- file_path=source_desc.file_path,
299
- description=source_desc.description,
300
- file_hash=source_desc.file_hash,
301
- last_modified=datetime.utcnow(),
302
- version=1,
303
- source_project_id=source_desc.source_project_id
304
- )
305
-
306
- merged_descriptions.append(new_desc)
307
-
308
- # Batch update target branch
309
- await self.db_manager.batch_create_file_descriptions(merged_descriptions)
310
-
311
- # Mark session as completed
312
- session.status = "resolved"
313
-
314
- result = {
315
- "success": True,
316
- "sessionId": session_id,
317
- "sourceBranch": session.source_branch,
318
- "targetBranch": session.target_branch,
319
- "totalConflicts": session.get_conflict_count(),
320
- "resolvedConflicts": session.get_resolved_count(),
321
- "mergedFiles": len(merged_descriptions),
322
- "message": f"Successfully merged {len(merged_descriptions)} files from {session.source_branch} to {session.target_branch}"
323
- }
324
-
325
- logger.info(f"Merge phase 2 completed successfully: {len(merged_descriptions)} files merged")
326
-
327
- # Clean up session
328
- del self._active_sessions[session_id]
329
-
330
- return result
331
-
332
- except Exception as e:
333
- if session:
334
- session.status = "aborted"
335
- logger.error(f"Error in merge phase 2: {e}")
336
- raise DatabaseError(f"Failed to complete merge: {e}") from e
337
-
338
- def get_session(self, session_id: str) -> Optional[MergeSession]:
339
- """Get merge session by ID."""
340
- return self._active_sessions.get(session_id)
341
-
342
- def get_active_sessions(self) -> List[MergeSession]:
343
- """Get all active merge sessions."""
344
- return list(self._active_sessions.values())
345
-
346
- def abort_session(self, session_id: str) -> bool:
347
- """
348
- Abort a merge session.
349
-
350
- Args:
351
- session_id: Session to abort
352
-
353
- Returns:
354
- True if session was aborted
355
- """
356
- session = self._active_sessions.get(session_id)
357
- if session:
358
- session.status = "aborted"
359
- del self._active_sessions[session_id]
360
- logger.info(f"Merge session {session_id} aborted")
361
- return True
362
- return False
363
-
364
- def cleanup_old_sessions(self, max_age_hours: int = 24) -> int:
365
- """
366
- Clean up old merge sessions.
367
-
368
- Args:
369
- max_age_hours: Maximum age of sessions to keep
370
-
371
- Returns:
372
- Number of sessions cleaned up
373
- """
374
- cutoff_time = datetime.utcnow() - datetime.timedelta(hours=max_age_hours)
375
- old_sessions = [
376
- session_id for session_id, session in self._active_sessions.items()
377
- if session.created < cutoff_time
378
- ]
379
-
380
- for session_id in old_sessions:
381
- del self._active_sessions[session_id]
382
-
383
- if old_sessions:
384
- logger.info(f"Cleaned up {len(old_sessions)} old merge sessions")
385
-
386
- return len(old_sessions)