mcp-vector-search 0.15.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (86) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/chat.py +534 -0
  6. mcp_vector_search/cli/commands/config.py +393 -0
  7. mcp_vector_search/cli/commands/demo.py +358 -0
  8. mcp_vector_search/cli/commands/index.py +762 -0
  9. mcp_vector_search/cli/commands/init.py +658 -0
  10. mcp_vector_search/cli/commands/install.py +869 -0
  11. mcp_vector_search/cli/commands/install_old.py +700 -0
  12. mcp_vector_search/cli/commands/mcp.py +1254 -0
  13. mcp_vector_search/cli/commands/reset.py +393 -0
  14. mcp_vector_search/cli/commands/search.py +796 -0
  15. mcp_vector_search/cli/commands/setup.py +1133 -0
  16. mcp_vector_search/cli/commands/status.py +584 -0
  17. mcp_vector_search/cli/commands/uninstall.py +404 -0
  18. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  19. mcp_vector_search/cli/commands/visualize/cli.py +265 -0
  20. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  21. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  22. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  23. mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
  24. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  25. mcp_vector_search/cli/commands/visualize/server.py +201 -0
  26. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  27. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  28. mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
  29. mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
  30. mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
  31. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  32. mcp_vector_search/cli/commands/watch.py +287 -0
  33. mcp_vector_search/cli/didyoumean.py +520 -0
  34. mcp_vector_search/cli/export.py +320 -0
  35. mcp_vector_search/cli/history.py +295 -0
  36. mcp_vector_search/cli/interactive.py +342 -0
  37. mcp_vector_search/cli/main.py +484 -0
  38. mcp_vector_search/cli/output.py +414 -0
  39. mcp_vector_search/cli/suggestions.py +375 -0
  40. mcp_vector_search/config/__init__.py +1 -0
  41. mcp_vector_search/config/constants.py +24 -0
  42. mcp_vector_search/config/defaults.py +200 -0
  43. mcp_vector_search/config/settings.py +146 -0
  44. mcp_vector_search/core/__init__.py +1 -0
  45. mcp_vector_search/core/auto_indexer.py +298 -0
  46. mcp_vector_search/core/config_utils.py +394 -0
  47. mcp_vector_search/core/connection_pool.py +360 -0
  48. mcp_vector_search/core/database.py +1237 -0
  49. mcp_vector_search/core/directory_index.py +318 -0
  50. mcp_vector_search/core/embeddings.py +294 -0
  51. mcp_vector_search/core/exceptions.py +89 -0
  52. mcp_vector_search/core/factory.py +318 -0
  53. mcp_vector_search/core/git_hooks.py +345 -0
  54. mcp_vector_search/core/indexer.py +1002 -0
  55. mcp_vector_search/core/llm_client.py +453 -0
  56. mcp_vector_search/core/models.py +294 -0
  57. mcp_vector_search/core/project.py +350 -0
  58. mcp_vector_search/core/scheduler.py +330 -0
  59. mcp_vector_search/core/search.py +952 -0
  60. mcp_vector_search/core/watcher.py +322 -0
  61. mcp_vector_search/mcp/__init__.py +5 -0
  62. mcp_vector_search/mcp/__main__.py +25 -0
  63. mcp_vector_search/mcp/server.py +752 -0
  64. mcp_vector_search/parsers/__init__.py +8 -0
  65. mcp_vector_search/parsers/base.py +296 -0
  66. mcp_vector_search/parsers/dart.py +605 -0
  67. mcp_vector_search/parsers/html.py +413 -0
  68. mcp_vector_search/parsers/javascript.py +643 -0
  69. mcp_vector_search/parsers/php.py +694 -0
  70. mcp_vector_search/parsers/python.py +502 -0
  71. mcp_vector_search/parsers/registry.py +223 -0
  72. mcp_vector_search/parsers/ruby.py +678 -0
  73. mcp_vector_search/parsers/text.py +186 -0
  74. mcp_vector_search/parsers/utils.py +265 -0
  75. mcp_vector_search/py.typed +1 -0
  76. mcp_vector_search/utils/__init__.py +42 -0
  77. mcp_vector_search/utils/gitignore.py +250 -0
  78. mcp_vector_search/utils/gitignore_updater.py +212 -0
  79. mcp_vector_search/utils/monorepo.py +339 -0
  80. mcp_vector_search/utils/timing.py +338 -0
  81. mcp_vector_search/utils/version.py +47 -0
  82. mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
  83. mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
  84. mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
  85. mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
  86. mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,294 @@
1
+ """Data models for MCP Vector Search."""
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ @dataclass
11
+ class CodeChunk:
12
+ """Represents a chunk of code with metadata."""
13
+
14
+ content: str
15
+ file_path: Path
16
+ start_line: int
17
+ end_line: int
18
+ language: str
19
+ chunk_type: str = "code" # code, function, class, comment, docstring
20
+ function_name: str | None = None
21
+ class_name: str | None = None
22
+ docstring: str | None = None
23
+ imports: list[str] = None
24
+
25
+ # Enhancement 1: Complexity scoring
26
+ complexity_score: float = 0.0
27
+
28
+ # Enhancement 3: Hierarchical relationships
29
+ chunk_id: str | None = None
30
+ parent_chunk_id: str | None = None
31
+ child_chunk_ids: list[str] = None
32
+ chunk_depth: int = 0
33
+
34
+ # Enhancement 4: Enhanced metadata
35
+ decorators: list[str] = None
36
+ parameters: list[dict] = None
37
+ return_type: str | None = None
38
+ type_annotations: dict[str, str] = None
39
+
40
+ # Enhancement 5: Monorepo support
41
+ subproject_name: str | None = None # "ewtn-plus-foundation"
42
+ subproject_path: str | None = None # Relative path from root
43
+
44
+ def __post_init__(self) -> None:
45
+ """Initialize default values and generate chunk ID."""
46
+ if self.imports is None:
47
+ self.imports = []
48
+ if self.child_chunk_ids is None:
49
+ self.child_chunk_ids = []
50
+ if self.decorators is None:
51
+ self.decorators = []
52
+ if self.parameters is None:
53
+ self.parameters = []
54
+ if self.type_annotations is None:
55
+ self.type_annotations = {}
56
+
57
+ # Generate chunk ID if not provided
58
+ if self.chunk_id is None:
59
+ import hashlib
60
+
61
+ # Include name and first 50 chars of content for uniqueness
62
+ # This ensures deterministic IDs while handling same-location chunks
63
+ name = self.function_name or self.class_name or ""
64
+ content_hash = hashlib.sha256(self.content[:100].encode()).hexdigest()[:8]
65
+ id_string = f"{self.file_path}:{self.chunk_type}:{name}:{self.start_line}:{self.end_line}:{content_hash}"
66
+ self.chunk_id = hashlib.sha256(id_string.encode()).hexdigest()[:16]
67
+
68
+ @property
69
+ def id(self) -> str:
70
+ """Generate unique ID for this chunk."""
71
+ return f"{self.file_path}:{self.start_line}:{self.end_line}"
72
+
73
+ @property
74
+ def line_count(self) -> int:
75
+ """Get the number of lines in this chunk."""
76
+ return self.end_line - self.start_line + 1
77
+
78
+ def to_dict(self) -> dict[str, Any]:
79
+ """Convert to dictionary for storage."""
80
+ return {
81
+ "content": self.content,
82
+ "file_path": str(self.file_path),
83
+ "start_line": self.start_line,
84
+ "end_line": self.end_line,
85
+ "language": self.language,
86
+ "chunk_type": self.chunk_type,
87
+ "function_name": self.function_name,
88
+ "class_name": self.class_name,
89
+ "docstring": self.docstring,
90
+ "imports": self.imports,
91
+ "complexity_score": self.complexity_score,
92
+ "chunk_id": self.chunk_id,
93
+ "parent_chunk_id": self.parent_chunk_id,
94
+ "child_chunk_ids": self.child_chunk_ids,
95
+ "chunk_depth": self.chunk_depth,
96
+ "decorators": self.decorators,
97
+ "parameters": self.parameters,
98
+ "return_type": self.return_type,
99
+ "type_annotations": self.type_annotations,
100
+ "subproject_name": self.subproject_name,
101
+ "subproject_path": self.subproject_path,
102
+ }
103
+
104
+ @classmethod
105
+ def from_dict(cls, data: dict[str, Any]) -> "CodeChunk":
106
+ """Create from dictionary."""
107
+ return cls(
108
+ content=data["content"],
109
+ file_path=Path(data["file_path"]),
110
+ start_line=data["start_line"],
111
+ end_line=data["end_line"],
112
+ language=data["language"],
113
+ chunk_type=data.get("chunk_type", "code"),
114
+ function_name=data.get("function_name"),
115
+ class_name=data.get("class_name"),
116
+ docstring=data.get("docstring"),
117
+ imports=data.get("imports", []),
118
+ complexity_score=data.get("complexity_score", 0.0),
119
+ chunk_id=data.get("chunk_id"),
120
+ parent_chunk_id=data.get("parent_chunk_id"),
121
+ child_chunk_ids=data.get("child_chunk_ids", []),
122
+ chunk_depth=data.get("chunk_depth", 0),
123
+ decorators=data.get("decorators", []),
124
+ parameters=data.get("parameters", []),
125
+ return_type=data.get("return_type"),
126
+ type_annotations=data.get("type_annotations", {}),
127
+ subproject_name=data.get("subproject_name"),
128
+ subproject_path=data.get("subproject_path"),
129
+ )
130
+
131
+
132
+ class SearchResult(BaseModel):
133
+ """Represents a search result with metadata."""
134
+
135
+ content: str = Field(..., description="The matched code content")
136
+ file_path: Path = Field(..., description="Path to the source file")
137
+ start_line: int = Field(..., description="Starting line number")
138
+ end_line: int = Field(..., description="Ending line number")
139
+ language: str = Field(..., description="Programming language")
140
+ similarity_score: float = Field(..., description="Similarity score (0.0 to 1.0)")
141
+ rank: int = Field(..., description="Result rank in search results")
142
+ chunk_type: str = Field(default="code", description="Type of code chunk")
143
+ function_name: str | None = Field(
144
+ default=None, description="Function name if applicable"
145
+ )
146
+ class_name: str | None = Field(default=None, description="Class name if applicable")
147
+ context_before: list[str] = Field(default=[], description="Lines before the match")
148
+ context_after: list[str] = Field(default=[], description="Lines after the match")
149
+ highlights: list[str] = Field(default=[], description="Highlighted terms")
150
+
151
+ class Config:
152
+ arbitrary_types_allowed = True
153
+
154
+ @property
155
+ def line_count(self) -> int:
156
+ """Get the number of lines in this result."""
157
+ return self.end_line - self.start_line + 1
158
+
159
+ @property
160
+ def location(self) -> str:
161
+ """Get a human-readable location string."""
162
+ return f"{self.file_path}:{self.start_line}-{self.end_line}"
163
+
164
+ def to_dict(self) -> dict[str, Any]:
165
+ """Convert to dictionary for serialization."""
166
+ return {
167
+ "content": self.content,
168
+ "file_path": str(self.file_path),
169
+ "start_line": self.start_line,
170
+ "end_line": self.end_line,
171
+ "language": self.language,
172
+ "similarity_score": self.similarity_score,
173
+ "rank": self.rank,
174
+ "chunk_type": self.chunk_type,
175
+ "function_name": self.function_name,
176
+ "class_name": self.class_name,
177
+ "context_before": self.context_before,
178
+ "context_after": self.context_after,
179
+ "highlights": self.highlights,
180
+ "location": self.location,
181
+ "line_count": self.line_count,
182
+ }
183
+
184
+
185
+ class IndexStats(BaseModel):
186
+ """Statistics about the search index."""
187
+
188
+ total_files: int = Field(..., description="Total number of indexed files")
189
+ total_chunks: int = Field(..., description="Total number of code chunks")
190
+ languages: dict[str, int] = Field(..., description="Language distribution")
191
+ file_types: dict[str, int] = Field(..., description="File type distribution")
192
+ index_size_mb: float = Field(..., description="Index size in megabytes")
193
+ last_updated: str = Field(..., description="Last update timestamp")
194
+ embedding_model: str = Field(..., description="Embedding model used")
195
+
196
+ def to_dict(self) -> dict[str, Any]:
197
+ """Convert to dictionary for serialization."""
198
+ return {
199
+ "total_files": self.total_files,
200
+ "total_chunks": self.total_chunks,
201
+ "languages": self.languages,
202
+ "file_types": self.file_types,
203
+ "index_size_mb": self.index_size_mb,
204
+ "last_updated": self.last_updated,
205
+ "embedding_model": self.embedding_model,
206
+ }
207
+
208
+
209
+ @dataclass
210
+ class Directory:
211
+ """Represents a directory in the project structure."""
212
+
213
+ path: Path # Relative path from project root
214
+ name: str # Directory name
215
+ parent_path: Path | None = None # Parent directory path (None for root)
216
+ file_count: int = 0 # Number of files directly in this directory
217
+ subdirectory_count: int = 0 # Number of subdirectories
218
+ total_chunks: int = 0 # Total code chunks in this directory (recursive)
219
+ languages: dict[str, int] = None # Language distribution in this directory
220
+ depth: int = 0 # Depth from project root (0 = root)
221
+ is_package: bool = False # True if contains __init__.py or package.json
222
+ last_modified: float | None = (
223
+ None # Most recent file modification time (unix timestamp)
224
+ )
225
+
226
+ def __post_init__(self) -> None:
227
+ """Initialize default values and generate directory ID."""
228
+ if self.languages is None:
229
+ self.languages = {}
230
+
231
+ @property
232
+ def id(self) -> str:
233
+ """Generate unique ID for this directory."""
234
+ import hashlib
235
+
236
+ return hashlib.sha256(str(self.path).encode()).hexdigest()[:16]
237
+
238
+ def to_dict(self) -> dict[str, Any]:
239
+ """Convert to dictionary for storage."""
240
+ return {
241
+ "path": str(self.path),
242
+ "name": self.name,
243
+ "parent_path": str(self.parent_path) if self.parent_path else None,
244
+ "file_count": self.file_count,
245
+ "subdirectory_count": self.subdirectory_count,
246
+ "total_chunks": self.total_chunks,
247
+ "languages": self.languages,
248
+ "depth": self.depth,
249
+ "is_package": self.is_package,
250
+ "last_modified": self.last_modified,
251
+ }
252
+
253
+ @classmethod
254
+ def from_dict(cls, data: dict[str, Any]) -> "Directory":
255
+ """Create from dictionary."""
256
+ return cls(
257
+ path=Path(data["path"]),
258
+ name=data["name"],
259
+ parent_path=Path(data["parent_path"]) if data.get("parent_path") else None,
260
+ file_count=data.get("file_count", 0),
261
+ subdirectory_count=data.get("subdirectory_count", 0),
262
+ total_chunks=data.get("total_chunks", 0),
263
+ languages=data.get("languages", {}),
264
+ depth=data.get("depth", 0),
265
+ is_package=data.get("is_package", False),
266
+ last_modified=data.get("last_modified"),
267
+ )
268
+
269
+
270
+ class ProjectInfo(BaseModel):
271
+ """Information about a project."""
272
+
273
+ name: str = Field(..., description="Project name")
274
+ root_path: Path = Field(..., description="Project root directory")
275
+ config_path: Path = Field(..., description="Configuration file path")
276
+ index_path: Path = Field(..., description="Index directory path")
277
+ is_initialized: bool = Field(..., description="Whether project is initialized")
278
+ languages: list[str] = Field(default=[], description="Detected languages")
279
+ file_count: int = Field(default=0, description="Number of indexable files")
280
+
281
+ class Config:
282
+ arbitrary_types_allowed = True
283
+
284
+ def to_dict(self) -> dict[str, Any]:
285
+ """Convert to dictionary for serialization."""
286
+ return {
287
+ "name": self.name,
288
+ "root_path": str(self.root_path),
289
+ "config_path": str(self.config_path),
290
+ "index_path": str(self.index_path),
291
+ "is_initialized": self.is_initialized,
292
+ "languages": self.languages,
293
+ "file_count": self.file_count,
294
+ }
@@ -0,0 +1,350 @@
1
+ """Project detection and management for MCP Vector Search."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+ from loguru import logger
7
+
8
+ from ..config.defaults import (
9
+ DEFAULT_FILE_EXTENSIONS,
10
+ DEFAULT_IGNORE_PATTERNS,
11
+ get_default_config_path,
12
+ get_default_index_path,
13
+ get_language_from_extension,
14
+ )
15
+ from ..config.settings import ProjectConfig
16
+ from ..utils.gitignore import create_gitignore_parser
17
+ from .exceptions import (
18
+ ConfigurationError,
19
+ ProjectInitializationError,
20
+ ProjectNotFoundError,
21
+ )
22
+ from .models import ProjectInfo
23
+
24
+
25
+ class ProjectManager:
26
+ """Manages project detection, initialization, and configuration."""
27
+
28
+ def __init__(self, project_root: Path | None = None) -> None:
29
+ """Initialize project manager.
30
+
31
+ Args:
32
+ project_root: Project root directory. If None, will auto-detect.
33
+ """
34
+ self.project_root = project_root or self._detect_project_root()
35
+ self._config: ProjectConfig | None = None
36
+
37
+ # Initialize gitignore parser
38
+ try:
39
+ self.gitignore_parser = create_gitignore_parser(self.project_root)
40
+ except Exception as e:
41
+ logger.debug(f"Failed to load gitignore patterns: {e}")
42
+ self.gitignore_parser = None
43
+
44
+ def _detect_project_root(self) -> Path:
45
+ """Auto-detect project root directory."""
46
+ current = Path.cwd()
47
+
48
+ # Look for common project indicators
49
+ indicators = [
50
+ ".git",
51
+ ".mcp-vector-search",
52
+ "pyproject.toml",
53
+ "package.json",
54
+ "Cargo.toml",
55
+ "go.mod",
56
+ "pom.xml",
57
+ "build.gradle",
58
+ ".project",
59
+ ]
60
+
61
+ # Walk up the directory tree
62
+ for path in [current] + list(current.parents):
63
+ for indicator in indicators:
64
+ if (path / indicator).exists():
65
+ logger.debug(f"Detected project root: {path} (found {indicator})")
66
+ return path
67
+
68
+ # Default to current directory
69
+ logger.debug(f"Using current directory as project root: {current}")
70
+ return current
71
+
72
+ def is_initialized(self) -> bool:
73
+ """Check if project is initialized for MCP Vector Search."""
74
+ config_path = get_default_config_path(self.project_root)
75
+ index_path = get_default_index_path(self.project_root)
76
+
77
+ return config_path.exists() and index_path.exists()
78
+
79
+ def initialize(
80
+ self,
81
+ file_extensions: list[str] | None = None,
82
+ embedding_model: str = "microsoft/codebert-base",
83
+ similarity_threshold: float = 0.5,
84
+ force: bool = False,
85
+ ) -> ProjectConfig:
86
+ """Initialize project for MCP Vector Search.
87
+
88
+ Args:
89
+ file_extensions: File extensions to index
90
+ embedding_model: Embedding model to use
91
+ similarity_threshold: Similarity threshold for search
92
+ force: Force re-initialization if already exists
93
+
94
+ Returns:
95
+ Project configuration
96
+
97
+ Raises:
98
+ ProjectInitializationError: If initialization fails
99
+ """
100
+ if self.is_initialized() and not force:
101
+ raise ProjectInitializationError(
102
+ f"Project already initialized at {self.project_root}. Use --force to re-initialize."
103
+ )
104
+
105
+ try:
106
+ # Create index directory
107
+ index_path = get_default_index_path(self.project_root)
108
+ index_path.mkdir(parents=True, exist_ok=True)
109
+
110
+ # Ensure .mcp-vector-search/ is in .gitignore
111
+ # This is a non-critical operation - failures are logged but don't block initialization
112
+ try:
113
+ from ..utils.gitignore_updater import ensure_gitignore_entry
114
+
115
+ ensure_gitignore_entry(
116
+ self.project_root,
117
+ pattern=".mcp-vector-search/",
118
+ comment="MCP Vector Search index directory",
119
+ )
120
+ except Exception as e:
121
+ # Log warning but continue initialization
122
+ logger.warning(f"Could not update .gitignore: {e}")
123
+ logger.info(
124
+ "Please manually add '.mcp-vector-search/' to your .gitignore file"
125
+ )
126
+
127
+ # Detect languages and files
128
+ detected_languages = self.detect_languages()
129
+ file_count = self.count_indexable_files(
130
+ file_extensions or DEFAULT_FILE_EXTENSIONS
131
+ )
132
+
133
+ # Create configuration
134
+ config = ProjectConfig(
135
+ project_root=self.project_root,
136
+ index_path=index_path,
137
+ file_extensions=file_extensions or DEFAULT_FILE_EXTENSIONS,
138
+ embedding_model=embedding_model,
139
+ similarity_threshold=similarity_threshold,
140
+ languages=detected_languages,
141
+ )
142
+
143
+ # Save configuration
144
+ self.save_config(config)
145
+
146
+ logger.info(
147
+ f"Initialized project at {self.project_root}",
148
+ languages=detected_languages,
149
+ file_count=file_count,
150
+ extensions=config.file_extensions,
151
+ )
152
+
153
+ self._config = config
154
+ return config
155
+
156
+ except Exception as e:
157
+ raise ProjectInitializationError(
158
+ f"Failed to initialize project: {e}"
159
+ ) from e
160
+
161
+ def load_config(self) -> ProjectConfig:
162
+ """Load project configuration.
163
+
164
+ Returns:
165
+ Project configuration
166
+
167
+ Raises:
168
+ ProjectNotFoundError: If project is not initialized
169
+ ConfigurationError: If configuration is invalid
170
+ """
171
+ if not self.is_initialized():
172
+ raise ProjectNotFoundError(
173
+ f"Project not initialized at {self.project_root}. Run 'mcp-vector-search init' first."
174
+ )
175
+
176
+ config_path = get_default_config_path(self.project_root)
177
+
178
+ try:
179
+ with open(config_path) as f:
180
+ config_data = json.load(f)
181
+
182
+ # Convert paths back to Path objects
183
+ config_data["project_root"] = Path(config_data["project_root"])
184
+ config_data["index_path"] = Path(config_data["index_path"])
185
+
186
+ config = ProjectConfig(**config_data)
187
+ self._config = config
188
+ return config
189
+
190
+ except Exception as e:
191
+ raise ConfigurationError(f"Failed to load configuration: {e}") from e
192
+
193
+ def save_config(self, config: ProjectConfig) -> None:
194
+ """Save project configuration.
195
+
196
+ Args:
197
+ config: Project configuration to save
198
+
199
+ Raises:
200
+ ConfigurationError: If saving fails
201
+ """
202
+ config_path = get_default_config_path(self.project_root)
203
+ config_path.parent.mkdir(parents=True, exist_ok=True)
204
+
205
+ try:
206
+ # Convert to JSON-serializable format
207
+ config_data = config.model_dump()
208
+ config_data["project_root"] = str(config.project_root)
209
+ config_data["index_path"] = str(config.index_path)
210
+
211
+ with open(config_path, "w") as f:
212
+ json.dump(config_data, f, indent=2)
213
+
214
+ logger.debug(f"Saved configuration to {config_path}")
215
+
216
+ except Exception as e:
217
+ raise ConfigurationError(f"Failed to save configuration: {e}") from e
218
+
219
+ @property
220
+ def config(self) -> ProjectConfig:
221
+ """Get project configuration, loading if necessary."""
222
+ if self._config is None:
223
+ self._config = self.load_config()
224
+ return self._config
225
+
226
+ def detect_languages(self) -> list[str]:
227
+ """Detect programming languages in the project.
228
+
229
+ Returns:
230
+ List of detected language names
231
+ """
232
+ languages: set[str] = set()
233
+
234
+ for file_path in self._iter_source_files():
235
+ language = get_language_from_extension(file_path.suffix)
236
+ if language != "text":
237
+ languages.add(language)
238
+
239
+ return sorted(languages)
240
+
241
+ def count_indexable_files(self, extensions: list[str]) -> int:
242
+ """Count files that can be indexed.
243
+
244
+ Args:
245
+ extensions: File extensions to count
246
+
247
+ Returns:
248
+ Number of indexable files
249
+ """
250
+ count = 0
251
+ for file_path in self._iter_source_files():
252
+ if file_path.suffix in extensions:
253
+ count += 1
254
+ return count
255
+
256
+ def get_project_info(self, file_count: int | None = None) -> ProjectInfo:
257
+ """Get comprehensive project information.
258
+
259
+ Args:
260
+ file_count: Optional pre-computed file count (avoids expensive filesystem scan)
261
+
262
+ Returns:
263
+ Project information
264
+ """
265
+ config_path = get_default_config_path(self.project_root)
266
+ index_path = get_default_index_path(self.project_root)
267
+
268
+ is_initialized = self.is_initialized()
269
+ languages = []
270
+ computed_file_count = 0
271
+
272
+ if is_initialized:
273
+ try:
274
+ config = self.config
275
+ languages = config.languages
276
+ # Use provided file_count if available to avoid filesystem scan
277
+ if file_count is not None:
278
+ computed_file_count = file_count
279
+ else:
280
+ computed_file_count = self.count_indexable_files(
281
+ config.file_extensions
282
+ )
283
+ except Exception:
284
+ # Ignore errors when getting detailed info
285
+ pass
286
+
287
+ return ProjectInfo(
288
+ name=self.project_root.name,
289
+ root_path=self.project_root,
290
+ config_path=config_path,
291
+ index_path=index_path,
292
+ is_initialized=is_initialized,
293
+ languages=languages,
294
+ file_count=computed_file_count,
295
+ )
296
+
297
+ def _iter_source_files(self) -> list[Path]:
298
+ """Iterate over source files in the project.
299
+
300
+ Returns:
301
+ List of source file paths
302
+ """
303
+ files = []
304
+
305
+ for path in self.project_root.rglob("*"):
306
+ if not path.is_file():
307
+ continue
308
+
309
+ # Skip ignored patterns
310
+ # PERFORMANCE: Pass is_directory=False since we already checked is_file()
311
+ if self._should_ignore_path(path, is_directory=False):
312
+ continue
313
+
314
+ files.append(path)
315
+
316
+ return files
317
+
318
+ def _should_ignore_path(self, path: Path, is_directory: bool | None = None) -> bool:
319
+ """Check if a path should be ignored.
320
+
321
+ Args:
322
+ path: Path to check
323
+ is_directory: Optional hint if path is a directory (avoids filesystem check)
324
+
325
+ Returns:
326
+ True if path should be ignored
327
+ """
328
+ # First check gitignore rules if available
329
+ # PERFORMANCE: Pass is_directory hint to avoid redundant stat() calls
330
+ if self.gitignore_parser and self.gitignore_parser.is_ignored(
331
+ path, is_directory=is_directory
332
+ ):
333
+ return True
334
+
335
+ # Check if any parent directory is in ignore patterns
336
+ for part in path.parts:
337
+ if part in DEFAULT_IGNORE_PATTERNS:
338
+ return True
339
+
340
+ # Check relative path from project root
341
+ try:
342
+ relative_path = path.relative_to(self.project_root)
343
+ for part in relative_path.parts:
344
+ if part in DEFAULT_IGNORE_PATTERNS:
345
+ return True
346
+ except ValueError:
347
+ # Path is not relative to project root
348
+ return True
349
+
350
+ return False