mcp-vector-search 0.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/config.py +393 -0
  6. mcp_vector_search/cli/commands/demo.py +358 -0
  7. mcp_vector_search/cli/commands/index.py +744 -0
  8. mcp_vector_search/cli/commands/init.py +645 -0
  9. mcp_vector_search/cli/commands/install.py +675 -0
  10. mcp_vector_search/cli/commands/install_old.py +696 -0
  11. mcp_vector_search/cli/commands/mcp.py +1182 -0
  12. mcp_vector_search/cli/commands/reset.py +393 -0
  13. mcp_vector_search/cli/commands/search.py +773 -0
  14. mcp_vector_search/cli/commands/status.py +549 -0
  15. mcp_vector_search/cli/commands/uninstall.py +485 -0
  16. mcp_vector_search/cli/commands/visualize.py +1467 -0
  17. mcp_vector_search/cli/commands/watch.py +287 -0
  18. mcp_vector_search/cli/didyoumean.py +500 -0
  19. mcp_vector_search/cli/export.py +320 -0
  20. mcp_vector_search/cli/history.py +295 -0
  21. mcp_vector_search/cli/interactive.py +342 -0
  22. mcp_vector_search/cli/main.py +461 -0
  23. mcp_vector_search/cli/output.py +412 -0
  24. mcp_vector_search/cli/suggestions.py +375 -0
  25. mcp_vector_search/config/__init__.py +1 -0
  26. mcp_vector_search/config/constants.py +24 -0
  27. mcp_vector_search/config/defaults.py +200 -0
  28. mcp_vector_search/config/settings.py +134 -0
  29. mcp_vector_search/core/__init__.py +1 -0
  30. mcp_vector_search/core/auto_indexer.py +298 -0
  31. mcp_vector_search/core/connection_pool.py +360 -0
  32. mcp_vector_search/core/database.py +1214 -0
  33. mcp_vector_search/core/directory_index.py +318 -0
  34. mcp_vector_search/core/embeddings.py +294 -0
  35. mcp_vector_search/core/exceptions.py +89 -0
  36. mcp_vector_search/core/factory.py +318 -0
  37. mcp_vector_search/core/git_hooks.py +345 -0
  38. mcp_vector_search/core/indexer.py +1002 -0
  39. mcp_vector_search/core/models.py +294 -0
  40. mcp_vector_search/core/project.py +333 -0
  41. mcp_vector_search/core/scheduler.py +330 -0
  42. mcp_vector_search/core/search.py +952 -0
  43. mcp_vector_search/core/watcher.py +322 -0
  44. mcp_vector_search/mcp/__init__.py +5 -0
  45. mcp_vector_search/mcp/__main__.py +25 -0
  46. mcp_vector_search/mcp/server.py +733 -0
  47. mcp_vector_search/parsers/__init__.py +8 -0
  48. mcp_vector_search/parsers/base.py +296 -0
  49. mcp_vector_search/parsers/dart.py +605 -0
  50. mcp_vector_search/parsers/html.py +413 -0
  51. mcp_vector_search/parsers/javascript.py +643 -0
  52. mcp_vector_search/parsers/php.py +694 -0
  53. mcp_vector_search/parsers/python.py +502 -0
  54. mcp_vector_search/parsers/registry.py +223 -0
  55. mcp_vector_search/parsers/ruby.py +678 -0
  56. mcp_vector_search/parsers/text.py +186 -0
  57. mcp_vector_search/parsers/utils.py +265 -0
  58. mcp_vector_search/py.typed +1 -0
  59. mcp_vector_search/utils/__init__.py +40 -0
  60. mcp_vector_search/utils/gitignore.py +250 -0
  61. mcp_vector_search/utils/monorepo.py +277 -0
  62. mcp_vector_search/utils/timing.py +334 -0
  63. mcp_vector_search/utils/version.py +47 -0
  64. mcp_vector_search-0.12.6.dist-info/METADATA +754 -0
  65. mcp_vector_search-0.12.6.dist-info/RECORD +68 -0
  66. mcp_vector_search-0.12.6.dist-info/WHEEL +4 -0
  67. mcp_vector_search-0.12.6.dist-info/entry_points.txt +2 -0
  68. mcp_vector_search-0.12.6.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,294 @@
1
+ """Data models for MCP Vector Search."""
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ @dataclass
11
+ class CodeChunk:
12
+ """Represents a chunk of code with metadata."""
13
+
14
+ content: str
15
+ file_path: Path
16
+ start_line: int
17
+ end_line: int
18
+ language: str
19
+ chunk_type: str = "code" # code, function, class, comment, docstring
20
+ function_name: str | None = None
21
+ class_name: str | None = None
22
+ docstring: str | None = None
23
+ imports: list[str] = None
24
+
25
+ # Enhancement 1: Complexity scoring
26
+ complexity_score: float = 0.0
27
+
28
+ # Enhancement 3: Hierarchical relationships
29
+ chunk_id: str | None = None
30
+ parent_chunk_id: str | None = None
31
+ child_chunk_ids: list[str] = None
32
+ chunk_depth: int = 0
33
+
34
+ # Enhancement 4: Enhanced metadata
35
+ decorators: list[str] = None
36
+ parameters: list[dict] = None
37
+ return_type: str | None = None
38
+ type_annotations: dict[str, str] = None
39
+
40
+ # Enhancement 5: Monorepo support
41
+ subproject_name: str | None = None # "ewtn-plus-foundation"
42
+ subproject_path: str | None = None # Relative path from root
43
+
44
+ def __post_init__(self) -> None:
45
+ """Initialize default values and generate chunk ID."""
46
+ if self.imports is None:
47
+ self.imports = []
48
+ if self.child_chunk_ids is None:
49
+ self.child_chunk_ids = []
50
+ if self.decorators is None:
51
+ self.decorators = []
52
+ if self.parameters is None:
53
+ self.parameters = []
54
+ if self.type_annotations is None:
55
+ self.type_annotations = {}
56
+
57
+ # Generate chunk ID if not provided
58
+ if self.chunk_id is None:
59
+ import hashlib
60
+
61
+ # Include name and first 50 chars of content for uniqueness
62
+ # This ensures deterministic IDs while handling same-location chunks
63
+ name = self.function_name or self.class_name or ""
64
+ content_hash = hashlib.sha256(self.content[:100].encode()).hexdigest()[:8]
65
+ id_string = f"{self.file_path}:{self.chunk_type}:{name}:{self.start_line}:{self.end_line}:{content_hash}"
66
+ self.chunk_id = hashlib.sha256(id_string.encode()).hexdigest()[:16]
67
+
68
+ @property
69
+ def id(self) -> str:
70
+ """Generate unique ID for this chunk."""
71
+ return f"{self.file_path}:{self.start_line}:{self.end_line}"
72
+
73
+ @property
74
+ def line_count(self) -> int:
75
+ """Get the number of lines in this chunk."""
76
+ return self.end_line - self.start_line + 1
77
+
78
+ def to_dict(self) -> dict[str, Any]:
79
+ """Convert to dictionary for storage."""
80
+ return {
81
+ "content": self.content,
82
+ "file_path": str(self.file_path),
83
+ "start_line": self.start_line,
84
+ "end_line": self.end_line,
85
+ "language": self.language,
86
+ "chunk_type": self.chunk_type,
87
+ "function_name": self.function_name,
88
+ "class_name": self.class_name,
89
+ "docstring": self.docstring,
90
+ "imports": self.imports,
91
+ "complexity_score": self.complexity_score,
92
+ "chunk_id": self.chunk_id,
93
+ "parent_chunk_id": self.parent_chunk_id,
94
+ "child_chunk_ids": self.child_chunk_ids,
95
+ "chunk_depth": self.chunk_depth,
96
+ "decorators": self.decorators,
97
+ "parameters": self.parameters,
98
+ "return_type": self.return_type,
99
+ "type_annotations": self.type_annotations,
100
+ "subproject_name": self.subproject_name,
101
+ "subproject_path": self.subproject_path,
102
+ }
103
+
104
+ @classmethod
105
+ def from_dict(cls, data: dict[str, Any]) -> "CodeChunk":
106
+ """Create from dictionary."""
107
+ return cls(
108
+ content=data["content"],
109
+ file_path=Path(data["file_path"]),
110
+ start_line=data["start_line"],
111
+ end_line=data["end_line"],
112
+ language=data["language"],
113
+ chunk_type=data.get("chunk_type", "code"),
114
+ function_name=data.get("function_name"),
115
+ class_name=data.get("class_name"),
116
+ docstring=data.get("docstring"),
117
+ imports=data.get("imports", []),
118
+ complexity_score=data.get("complexity_score", 0.0),
119
+ chunk_id=data.get("chunk_id"),
120
+ parent_chunk_id=data.get("parent_chunk_id"),
121
+ child_chunk_ids=data.get("child_chunk_ids", []),
122
+ chunk_depth=data.get("chunk_depth", 0),
123
+ decorators=data.get("decorators", []),
124
+ parameters=data.get("parameters", []),
125
+ return_type=data.get("return_type"),
126
+ type_annotations=data.get("type_annotations", {}),
127
+ subproject_name=data.get("subproject_name"),
128
+ subproject_path=data.get("subproject_path"),
129
+ )
130
+
131
+
132
+ class SearchResult(BaseModel):
133
+ """Represents a search result with metadata."""
134
+
135
+ content: str = Field(..., description="The matched code content")
136
+ file_path: Path = Field(..., description="Path to the source file")
137
+ start_line: int = Field(..., description="Starting line number")
138
+ end_line: int = Field(..., description="Ending line number")
139
+ language: str = Field(..., description="Programming language")
140
+ similarity_score: float = Field(..., description="Similarity score (0.0 to 1.0)")
141
+ rank: int = Field(..., description="Result rank in search results")
142
+ chunk_type: str = Field(default="code", description="Type of code chunk")
143
+ function_name: str | None = Field(
144
+ default=None, description="Function name if applicable"
145
+ )
146
+ class_name: str | None = Field(default=None, description="Class name if applicable")
147
+ context_before: list[str] = Field(default=[], description="Lines before the match")
148
+ context_after: list[str] = Field(default=[], description="Lines after the match")
149
+ highlights: list[str] = Field(default=[], description="Highlighted terms")
150
+
151
+ class Config:
152
+ arbitrary_types_allowed = True
153
+
154
+ @property
155
+ def line_count(self) -> int:
156
+ """Get the number of lines in this result."""
157
+ return self.end_line - self.start_line + 1
158
+
159
+ @property
160
+ def location(self) -> str:
161
+ """Get a human-readable location string."""
162
+ return f"{self.file_path}:{self.start_line}-{self.end_line}"
163
+
164
+ def to_dict(self) -> dict[str, Any]:
165
+ """Convert to dictionary for serialization."""
166
+ return {
167
+ "content": self.content,
168
+ "file_path": str(self.file_path),
169
+ "start_line": self.start_line,
170
+ "end_line": self.end_line,
171
+ "language": self.language,
172
+ "similarity_score": self.similarity_score,
173
+ "rank": self.rank,
174
+ "chunk_type": self.chunk_type,
175
+ "function_name": self.function_name,
176
+ "class_name": self.class_name,
177
+ "context_before": self.context_before,
178
+ "context_after": self.context_after,
179
+ "highlights": self.highlights,
180
+ "location": self.location,
181
+ "line_count": self.line_count,
182
+ }
183
+
184
+
185
+ class IndexStats(BaseModel):
186
+ """Statistics about the search index."""
187
+
188
+ total_files: int = Field(..., description="Total number of indexed files")
189
+ total_chunks: int = Field(..., description="Total number of code chunks")
190
+ languages: dict[str, int] = Field(..., description="Language distribution")
191
+ file_types: dict[str, int] = Field(..., description="File type distribution")
192
+ index_size_mb: float = Field(..., description="Index size in megabytes")
193
+ last_updated: str = Field(..., description="Last update timestamp")
194
+ embedding_model: str = Field(..., description="Embedding model used")
195
+
196
+ def to_dict(self) -> dict[str, Any]:
197
+ """Convert to dictionary for serialization."""
198
+ return {
199
+ "total_files": self.total_files,
200
+ "total_chunks": self.total_chunks,
201
+ "languages": self.languages,
202
+ "file_types": self.file_types,
203
+ "index_size_mb": self.index_size_mb,
204
+ "last_updated": self.last_updated,
205
+ "embedding_model": self.embedding_model,
206
+ }
207
+
208
+
209
+ @dataclass
210
+ class Directory:
211
+ """Represents a directory in the project structure."""
212
+
213
+ path: Path # Relative path from project root
214
+ name: str # Directory name
215
+ parent_path: Path | None = None # Parent directory path (None for root)
216
+ file_count: int = 0 # Number of files directly in this directory
217
+ subdirectory_count: int = 0 # Number of subdirectories
218
+ total_chunks: int = 0 # Total code chunks in this directory (recursive)
219
+ languages: dict[str, int] = None # Language distribution in this directory
220
+ depth: int = 0 # Depth from project root (0 = root)
221
+ is_package: bool = False # True if contains __init__.py or package.json
222
+ last_modified: float | None = (
223
+ None # Most recent file modification time (unix timestamp)
224
+ )
225
+
226
+ def __post_init__(self) -> None:
227
+ """Initialize default values and generate directory ID."""
228
+ if self.languages is None:
229
+ self.languages = {}
230
+
231
+ @property
232
+ def id(self) -> str:
233
+ """Generate unique ID for this directory."""
234
+ import hashlib
235
+
236
+ return hashlib.sha256(str(self.path).encode()).hexdigest()[:16]
237
+
238
+ def to_dict(self) -> dict[str, Any]:
239
+ """Convert to dictionary for storage."""
240
+ return {
241
+ "path": str(self.path),
242
+ "name": self.name,
243
+ "parent_path": str(self.parent_path) if self.parent_path else None,
244
+ "file_count": self.file_count,
245
+ "subdirectory_count": self.subdirectory_count,
246
+ "total_chunks": self.total_chunks,
247
+ "languages": self.languages,
248
+ "depth": self.depth,
249
+ "is_package": self.is_package,
250
+ "last_modified": self.last_modified,
251
+ }
252
+
253
+ @classmethod
254
+ def from_dict(cls, data: dict[str, Any]) -> "Directory":
255
+ """Create from dictionary."""
256
+ return cls(
257
+ path=Path(data["path"]),
258
+ name=data["name"],
259
+ parent_path=Path(data["parent_path"]) if data.get("parent_path") else None,
260
+ file_count=data.get("file_count", 0),
261
+ subdirectory_count=data.get("subdirectory_count", 0),
262
+ total_chunks=data.get("total_chunks", 0),
263
+ languages=data.get("languages", {}),
264
+ depth=data.get("depth", 0),
265
+ is_package=data.get("is_package", False),
266
+ last_modified=data.get("last_modified"),
267
+ )
268
+
269
+
270
+ class ProjectInfo(BaseModel):
271
+ """Information about a project."""
272
+
273
+ name: str = Field(..., description="Project name")
274
+ root_path: Path = Field(..., description="Project root directory")
275
+ config_path: Path = Field(..., description="Configuration file path")
276
+ index_path: Path = Field(..., description="Index directory path")
277
+ is_initialized: bool = Field(..., description="Whether project is initialized")
278
+ languages: list[str] = Field(default=[], description="Detected languages")
279
+ file_count: int = Field(default=0, description="Number of indexable files")
280
+
281
+ class Config:
282
+ arbitrary_types_allowed = True
283
+
284
+ def to_dict(self) -> dict[str, Any]:
285
+ """Convert to dictionary for serialization."""
286
+ return {
287
+ "name": self.name,
288
+ "root_path": str(self.root_path),
289
+ "config_path": str(self.config_path),
290
+ "index_path": str(self.index_path),
291
+ "is_initialized": self.is_initialized,
292
+ "languages": self.languages,
293
+ "file_count": self.file_count,
294
+ }
@@ -0,0 +1,333 @@
1
+ """Project detection and management for MCP Vector Search."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+ from loguru import logger
7
+
8
+ from ..config.defaults import (
9
+ DEFAULT_FILE_EXTENSIONS,
10
+ DEFAULT_IGNORE_PATTERNS,
11
+ get_default_config_path,
12
+ get_default_index_path,
13
+ get_language_from_extension,
14
+ )
15
+ from ..config.settings import ProjectConfig
16
+ from ..utils.gitignore import create_gitignore_parser
17
+ from .exceptions import (
18
+ ConfigurationError,
19
+ ProjectInitializationError,
20
+ ProjectNotFoundError,
21
+ )
22
+ from .models import ProjectInfo
23
+
24
+
25
+ class ProjectManager:
26
+ """Manages project detection, initialization, and configuration."""
27
+
28
+ def __init__(self, project_root: Path | None = None) -> None:
29
+ """Initialize project manager.
30
+
31
+ Args:
32
+ project_root: Project root directory. If None, will auto-detect.
33
+ """
34
+ self.project_root = project_root or self._detect_project_root()
35
+ self._config: ProjectConfig | None = None
36
+
37
+ # Initialize gitignore parser
38
+ try:
39
+ self.gitignore_parser = create_gitignore_parser(self.project_root)
40
+ except Exception as e:
41
+ logger.debug(f"Failed to load gitignore patterns: {e}")
42
+ self.gitignore_parser = None
43
+
44
+ def _detect_project_root(self) -> Path:
45
+ """Auto-detect project root directory."""
46
+ current = Path.cwd()
47
+
48
+ # Look for common project indicators
49
+ indicators = [
50
+ ".git",
51
+ ".mcp-vector-search",
52
+ "pyproject.toml",
53
+ "package.json",
54
+ "Cargo.toml",
55
+ "go.mod",
56
+ "pom.xml",
57
+ "build.gradle",
58
+ ".project",
59
+ ]
60
+
61
+ # Walk up the directory tree
62
+ for path in [current] + list(current.parents):
63
+ for indicator in indicators:
64
+ if (path / indicator).exists():
65
+ logger.debug(f"Detected project root: {path} (found {indicator})")
66
+ return path
67
+
68
+ # Default to current directory
69
+ logger.debug(f"Using current directory as project root: {current}")
70
+ return current
71
+
72
+ def is_initialized(self) -> bool:
73
+ """Check if project is initialized for MCP Vector Search."""
74
+ config_path = get_default_config_path(self.project_root)
75
+ index_path = get_default_index_path(self.project_root)
76
+
77
+ return config_path.exists() and index_path.exists()
78
+
79
+ def initialize(
80
+ self,
81
+ file_extensions: list[str] | None = None,
82
+ embedding_model: str = "microsoft/codebert-base",
83
+ similarity_threshold: float = 0.5,
84
+ force: bool = False,
85
+ ) -> ProjectConfig:
86
+ """Initialize project for MCP Vector Search.
87
+
88
+ Args:
89
+ file_extensions: File extensions to index
90
+ embedding_model: Embedding model to use
91
+ similarity_threshold: Similarity threshold for search
92
+ force: Force re-initialization if already exists
93
+
94
+ Returns:
95
+ Project configuration
96
+
97
+ Raises:
98
+ ProjectInitializationError: If initialization fails
99
+ """
100
+ if self.is_initialized() and not force:
101
+ raise ProjectInitializationError(
102
+ f"Project already initialized at {self.project_root}. Use --force to re-initialize."
103
+ )
104
+
105
+ try:
106
+ # Create index directory
107
+ index_path = get_default_index_path(self.project_root)
108
+ index_path.mkdir(parents=True, exist_ok=True)
109
+
110
+ # Detect languages and files
111
+ detected_languages = self.detect_languages()
112
+ file_count = self.count_indexable_files(
113
+ file_extensions or DEFAULT_FILE_EXTENSIONS
114
+ )
115
+
116
+ # Create configuration
117
+ config = ProjectConfig(
118
+ project_root=self.project_root,
119
+ index_path=index_path,
120
+ file_extensions=file_extensions or DEFAULT_FILE_EXTENSIONS,
121
+ embedding_model=embedding_model,
122
+ similarity_threshold=similarity_threshold,
123
+ languages=detected_languages,
124
+ )
125
+
126
+ # Save configuration
127
+ self.save_config(config)
128
+
129
+ logger.info(
130
+ f"Initialized project at {self.project_root}",
131
+ languages=detected_languages,
132
+ file_count=file_count,
133
+ extensions=config.file_extensions,
134
+ )
135
+
136
+ self._config = config
137
+ return config
138
+
139
+ except Exception as e:
140
+ raise ProjectInitializationError(
141
+ f"Failed to initialize project: {e}"
142
+ ) from e
143
+
144
+ def load_config(self) -> ProjectConfig:
145
+ """Load project configuration.
146
+
147
+ Returns:
148
+ Project configuration
149
+
150
+ Raises:
151
+ ProjectNotFoundError: If project is not initialized
152
+ ConfigurationError: If configuration is invalid
153
+ """
154
+ if not self.is_initialized():
155
+ raise ProjectNotFoundError(
156
+ f"Project not initialized at {self.project_root}. Run 'mcp-vector-search init' first."
157
+ )
158
+
159
+ config_path = get_default_config_path(self.project_root)
160
+
161
+ try:
162
+ with open(config_path) as f:
163
+ config_data = json.load(f)
164
+
165
+ # Convert paths back to Path objects
166
+ config_data["project_root"] = Path(config_data["project_root"])
167
+ config_data["index_path"] = Path(config_data["index_path"])
168
+
169
+ config = ProjectConfig(**config_data)
170
+ self._config = config
171
+ return config
172
+
173
+ except Exception as e:
174
+ raise ConfigurationError(f"Failed to load configuration: {e}") from e
175
+
176
+ def save_config(self, config: ProjectConfig) -> None:
177
+ """Save project configuration.
178
+
179
+ Args:
180
+ config: Project configuration to save
181
+
182
+ Raises:
183
+ ConfigurationError: If saving fails
184
+ """
185
+ config_path = get_default_config_path(self.project_root)
186
+ config_path.parent.mkdir(parents=True, exist_ok=True)
187
+
188
+ try:
189
+ # Convert to JSON-serializable format
190
+ config_data = config.model_dump()
191
+ config_data["project_root"] = str(config.project_root)
192
+ config_data["index_path"] = str(config.index_path)
193
+
194
+ with open(config_path, "w") as f:
195
+ json.dump(config_data, f, indent=2)
196
+
197
+ logger.debug(f"Saved configuration to {config_path}")
198
+
199
+ except Exception as e:
200
+ raise ConfigurationError(f"Failed to save configuration: {e}") from e
201
+
202
+ @property
203
+ def config(self) -> ProjectConfig:
204
+ """Get project configuration, loading if necessary."""
205
+ if self._config is None:
206
+ self._config = self.load_config()
207
+ return self._config
208
+
209
+ def detect_languages(self) -> list[str]:
210
+ """Detect programming languages in the project.
211
+
212
+ Returns:
213
+ List of detected language names
214
+ """
215
+ languages: set[str] = set()
216
+
217
+ for file_path in self._iter_source_files():
218
+ language = get_language_from_extension(file_path.suffix)
219
+ if language != "text":
220
+ languages.add(language)
221
+
222
+ return sorted(languages)
223
+
224
+ def count_indexable_files(self, extensions: list[str]) -> int:
225
+ """Count files that can be indexed.
226
+
227
+ Args:
228
+ extensions: File extensions to count
229
+
230
+ Returns:
231
+ Number of indexable files
232
+ """
233
+ count = 0
234
+ for file_path in self._iter_source_files():
235
+ if file_path.suffix in extensions:
236
+ count += 1
237
+ return count
238
+
239
+ def get_project_info(self, file_count: int | None = None) -> ProjectInfo:
240
+ """Get comprehensive project information.
241
+
242
+ Args:
243
+ file_count: Optional pre-computed file count (avoids expensive filesystem scan)
244
+
245
+ Returns:
246
+ Project information
247
+ """
248
+ config_path = get_default_config_path(self.project_root)
249
+ index_path = get_default_index_path(self.project_root)
250
+
251
+ is_initialized = self.is_initialized()
252
+ languages = []
253
+ computed_file_count = 0
254
+
255
+ if is_initialized:
256
+ try:
257
+ config = self.config
258
+ languages = config.languages
259
+ # Use provided file_count if available to avoid filesystem scan
260
+ if file_count is not None:
261
+ computed_file_count = file_count
262
+ else:
263
+ computed_file_count = self.count_indexable_files(
264
+ config.file_extensions
265
+ )
266
+ except Exception:
267
+ # Ignore errors when getting detailed info
268
+ pass
269
+
270
+ return ProjectInfo(
271
+ name=self.project_root.name,
272
+ root_path=self.project_root,
273
+ config_path=config_path,
274
+ index_path=index_path,
275
+ is_initialized=is_initialized,
276
+ languages=languages,
277
+ file_count=computed_file_count,
278
+ )
279
+
280
+ def _iter_source_files(self) -> list[Path]:
281
+ """Iterate over source files in the project.
282
+
283
+ Returns:
284
+ List of source file paths
285
+ """
286
+ files = []
287
+
288
+ for path in self.project_root.rglob("*"):
289
+ if not path.is_file():
290
+ continue
291
+
292
+ # Skip ignored patterns
293
+ # PERFORMANCE: Pass is_directory=False since we already checked is_file()
294
+ if self._should_ignore_path(path, is_directory=False):
295
+ continue
296
+
297
+ files.append(path)
298
+
299
+ return files
300
+
301
+ def _should_ignore_path(self, path: Path, is_directory: bool | None = None) -> bool:
302
+ """Check if a path should be ignored.
303
+
304
+ Args:
305
+ path: Path to check
306
+ is_directory: Optional hint if path is a directory (avoids filesystem check)
307
+
308
+ Returns:
309
+ True if path should be ignored
310
+ """
311
+ # First check gitignore rules if available
312
+ # PERFORMANCE: Pass is_directory hint to avoid redundant stat() calls
313
+ if self.gitignore_parser and self.gitignore_parser.is_ignored(
314
+ path, is_directory=is_directory
315
+ ):
316
+ return True
317
+
318
+ # Check if any parent directory is in ignore patterns
319
+ for part in path.parts:
320
+ if part in DEFAULT_IGNORE_PATTERNS:
321
+ return True
322
+
323
+ # Check relative path from project root
324
+ try:
325
+ relative_path = path.relative_to(self.project_root)
326
+ for part in relative_path.parts:
327
+ if part in DEFAULT_IGNORE_PATTERNS:
328
+ return True
329
+ except ValueError:
330
+ # Path is not relative to project root
331
+ return True
332
+
333
+ return False