mcp-vector-search 0.15.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +10 -0
- mcp_vector_search/cli/__init__.py +1 -0
- mcp_vector_search/cli/commands/__init__.py +1 -0
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/chat.py +534 -0
- mcp_vector_search/cli/commands/config.py +393 -0
- mcp_vector_search/cli/commands/demo.py +358 -0
- mcp_vector_search/cli/commands/index.py +762 -0
- mcp_vector_search/cli/commands/init.py +658 -0
- mcp_vector_search/cli/commands/install.py +869 -0
- mcp_vector_search/cli/commands/install_old.py +700 -0
- mcp_vector_search/cli/commands/mcp.py +1254 -0
- mcp_vector_search/cli/commands/reset.py +393 -0
- mcp_vector_search/cli/commands/search.py +796 -0
- mcp_vector_search/cli/commands/setup.py +1133 -0
- mcp_vector_search/cli/commands/status.py +584 -0
- mcp_vector_search/cli/commands/uninstall.py +404 -0
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +265 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +201 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
- mcp_vector_search/cli/commands/visualize.py.original +2536 -0
- mcp_vector_search/cli/commands/watch.py +287 -0
- mcp_vector_search/cli/didyoumean.py +520 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +295 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +484 -0
- mcp_vector_search/cli/output.py +414 -0
- mcp_vector_search/cli/suggestions.py +375 -0
- mcp_vector_search/config/__init__.py +1 -0
- mcp_vector_search/config/constants.py +24 -0
- mcp_vector_search/config/defaults.py +200 -0
- mcp_vector_search/config/settings.py +146 -0
- mcp_vector_search/core/__init__.py +1 -0
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/connection_pool.py +360 -0
- mcp_vector_search/core/database.py +1237 -0
- mcp_vector_search/core/directory_index.py +318 -0
- mcp_vector_search/core/embeddings.py +294 -0
- mcp_vector_search/core/exceptions.py +89 -0
- mcp_vector_search/core/factory.py +318 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +1002 -0
- mcp_vector_search/core/llm_client.py +453 -0
- mcp_vector_search/core/models.py +294 -0
- mcp_vector_search/core/project.py +350 -0
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +952 -0
- mcp_vector_search/core/watcher.py +322 -0
- mcp_vector_search/mcp/__init__.py +5 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +752 -0
- mcp_vector_search/parsers/__init__.py +8 -0
- mcp_vector_search/parsers/base.py +296 -0
- mcp_vector_search/parsers/dart.py +605 -0
- mcp_vector_search/parsers/html.py +413 -0
- mcp_vector_search/parsers/javascript.py +643 -0
- mcp_vector_search/parsers/php.py +694 -0
- mcp_vector_search/parsers/python.py +502 -0
- mcp_vector_search/parsers/registry.py +223 -0
- mcp_vector_search/parsers/ruby.py +678 -0
- mcp_vector_search/parsers/text.py +186 -0
- mcp_vector_search/parsers/utils.py +265 -0
- mcp_vector_search/py.typed +1 -0
- mcp_vector_search/utils/__init__.py +42 -0
- mcp_vector_search/utils/gitignore.py +250 -0
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +339 -0
- mcp_vector_search/utils/timing.py +338 -0
- mcp_vector_search/utils/version.py +47 -0
- mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
- mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
- mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
- mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
- mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
"""Contextual suggestion system for better user experience."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ContextualSuggestionProvider:
|
|
10
|
+
"""Provides context-aware suggestions based on project state and user workflow."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, project_root: Path | None = None):
|
|
13
|
+
"""Initialize the suggestion provider.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
project_root: Root directory of the project (defaults to current directory)
|
|
17
|
+
"""
|
|
18
|
+
self.project_root = project_root or Path.cwd()
|
|
19
|
+
self.console = Console(stderr=True)
|
|
20
|
+
|
|
21
|
+
def get_project_state(self) -> dict[str, bool]:
|
|
22
|
+
"""Analyze the current project state.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Dictionary with boolean flags indicating project state
|
|
26
|
+
"""
|
|
27
|
+
state = {
|
|
28
|
+
"is_initialized": False,
|
|
29
|
+
"has_index": False,
|
|
30
|
+
"has_config": False,
|
|
31
|
+
"has_recent_changes": False,
|
|
32
|
+
"is_git_repo": False,
|
|
33
|
+
"has_mcp_config": False,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
# Check if project is initialized
|
|
38
|
+
config_dir = self.project_root / ".mcp-vector-search"
|
|
39
|
+
state["is_initialized"] = config_dir.exists()
|
|
40
|
+
|
|
41
|
+
if state["is_initialized"]:
|
|
42
|
+
# Check for config
|
|
43
|
+
config_file = config_dir / "config.json"
|
|
44
|
+
state["has_config"] = config_file.exists()
|
|
45
|
+
|
|
46
|
+
# Check for index
|
|
47
|
+
index_dir = config_dir / "chroma_db"
|
|
48
|
+
state["has_index"] = index_dir.exists() and any(index_dir.iterdir())
|
|
49
|
+
|
|
50
|
+
# Check if it's a git repo
|
|
51
|
+
git_dir = self.project_root / ".git"
|
|
52
|
+
state["is_git_repo"] = git_dir.exists()
|
|
53
|
+
|
|
54
|
+
# Check for MCP configuration (Claude Desktop config)
|
|
55
|
+
home = Path.home()
|
|
56
|
+
claude_config = (
|
|
57
|
+
home
|
|
58
|
+
/ "Library"
|
|
59
|
+
/ "Application Support"
|
|
60
|
+
/ "Claude"
|
|
61
|
+
/ "claude_desktop_config.json"
|
|
62
|
+
)
|
|
63
|
+
if claude_config.exists():
|
|
64
|
+
try:
|
|
65
|
+
with open(claude_config) as f:
|
|
66
|
+
config_data = json.load(f)
|
|
67
|
+
mcp_servers = config_data.get("mcpServers", {})
|
|
68
|
+
state["has_mcp_config"] = "mcp-vector-search" in mcp_servers
|
|
69
|
+
except (OSError, json.JSONDecodeError):
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
# TODO: Check for recent file changes (would need file system monitoring)
|
|
73
|
+
# For now, we'll assume false
|
|
74
|
+
state["has_recent_changes"] = False
|
|
75
|
+
|
|
76
|
+
except Exception as e:
|
|
77
|
+
# If we can't determine state, provide conservative defaults
|
|
78
|
+
logger.debug(f"Failed to determine project state for suggestions: {e}")
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
return state
|
|
82
|
+
|
|
83
|
+
def get_workflow_suggestions(self, failed_command: str) -> list[dict[str, str]]:
|
|
84
|
+
"""Get workflow-based suggestions for a failed command.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
failed_command: The command that failed
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
List of suggestion dictionaries with 'command', 'reason', and 'priority'
|
|
91
|
+
"""
|
|
92
|
+
suggestions = []
|
|
93
|
+
state = self.get_project_state()
|
|
94
|
+
|
|
95
|
+
# High priority suggestions based on project state
|
|
96
|
+
if not state["is_initialized"]:
|
|
97
|
+
suggestions.append(
|
|
98
|
+
{
|
|
99
|
+
"command": "init",
|
|
100
|
+
"reason": "Project is not initialized for vector search",
|
|
101
|
+
"priority": "high",
|
|
102
|
+
"description": "Set up the project configuration and create necessary directories",
|
|
103
|
+
}
|
|
104
|
+
)
|
|
105
|
+
elif not state["has_index"]:
|
|
106
|
+
suggestions.append(
|
|
107
|
+
{
|
|
108
|
+
"command": "index",
|
|
109
|
+
"reason": "No search index found - create one to enable searching",
|
|
110
|
+
"priority": "high",
|
|
111
|
+
"description": "Build the vector index for your codebase",
|
|
112
|
+
}
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Context-specific suggestions based on the failed command
|
|
116
|
+
if failed_command.lower() in ["search", "find", "query", "s", "f"]:
|
|
117
|
+
if not state["has_index"]:
|
|
118
|
+
suggestions.append(
|
|
119
|
+
{
|
|
120
|
+
"command": "index",
|
|
121
|
+
"reason": "Cannot search without an index",
|
|
122
|
+
"priority": "high",
|
|
123
|
+
"description": "Build the search index first",
|
|
124
|
+
}
|
|
125
|
+
)
|
|
126
|
+
else:
|
|
127
|
+
suggestions.extend(
|
|
128
|
+
[
|
|
129
|
+
{
|
|
130
|
+
"command": "search",
|
|
131
|
+
"reason": "Correct command for semantic code search",
|
|
132
|
+
"priority": "high",
|
|
133
|
+
"description": "Search your codebase semantically",
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"command": "interactive",
|
|
137
|
+
"reason": "Try interactive search for better experience",
|
|
138
|
+
"priority": "medium",
|
|
139
|
+
"description": "Start an interactive search session",
|
|
140
|
+
},
|
|
141
|
+
]
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
elif failed_command.lower() in ["index", "build", "scan", "i", "b"]:
|
|
145
|
+
suggestions.append(
|
|
146
|
+
{
|
|
147
|
+
"command": "index",
|
|
148
|
+
"reason": "Correct command for building search index",
|
|
149
|
+
"priority": "high",
|
|
150
|
+
"description": "Index your codebase for semantic search",
|
|
151
|
+
}
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
elif failed_command.lower() in ["status", "info", "stat", "st"]:
|
|
155
|
+
suggestions.append(
|
|
156
|
+
{
|
|
157
|
+
"command": "status",
|
|
158
|
+
"reason": "Show project status and statistics",
|
|
159
|
+
"priority": "high",
|
|
160
|
+
"description": "Display current project information",
|
|
161
|
+
}
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
elif failed_command.lower() in ["config", "configure", "settings", "c"]:
|
|
165
|
+
suggestions.append(
|
|
166
|
+
{
|
|
167
|
+
"command": "config",
|
|
168
|
+
"reason": "Manage project configuration",
|
|
169
|
+
"priority": "high",
|
|
170
|
+
"description": "View or modify project settings",
|
|
171
|
+
}
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# MCP-related suggestions
|
|
175
|
+
if not state["has_mcp_config"] and failed_command.lower() in [
|
|
176
|
+
"mcp",
|
|
177
|
+
"claude",
|
|
178
|
+
"server",
|
|
179
|
+
]:
|
|
180
|
+
suggestions.append(
|
|
181
|
+
{
|
|
182
|
+
"command": "init-mcp",
|
|
183
|
+
"reason": "Set up Claude Code MCP integration",
|
|
184
|
+
"priority": "medium",
|
|
185
|
+
"description": "Configure MCP server for Claude Code integration",
|
|
186
|
+
}
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Remove duplicates while preserving order
|
|
190
|
+
seen = set()
|
|
191
|
+
unique_suggestions = []
|
|
192
|
+
for suggestion in suggestions:
|
|
193
|
+
key = suggestion["command"]
|
|
194
|
+
if key not in seen:
|
|
195
|
+
seen.add(key)
|
|
196
|
+
unique_suggestions.append(suggestion)
|
|
197
|
+
|
|
198
|
+
return unique_suggestions
|
|
199
|
+
|
|
200
|
+
def get_next_steps(self) -> list[dict[str, str]]:
|
|
201
|
+
"""Get suggested next steps based on current project state.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
List of suggested next step dictionaries
|
|
205
|
+
"""
|
|
206
|
+
state = self.get_project_state()
|
|
207
|
+
next_steps = []
|
|
208
|
+
|
|
209
|
+
if not state["is_initialized"]:
|
|
210
|
+
next_steps.append(
|
|
211
|
+
{
|
|
212
|
+
"command": "init",
|
|
213
|
+
"description": "Initialize the project for semantic search",
|
|
214
|
+
"priority": "high",
|
|
215
|
+
}
|
|
216
|
+
)
|
|
217
|
+
elif not state["has_index"]:
|
|
218
|
+
next_steps.append(
|
|
219
|
+
{
|
|
220
|
+
"command": "index",
|
|
221
|
+
"description": "Build the search index for your codebase",
|
|
222
|
+
"priority": "high",
|
|
223
|
+
}
|
|
224
|
+
)
|
|
225
|
+
else:
|
|
226
|
+
# Project is ready for use
|
|
227
|
+
next_steps.extend(
|
|
228
|
+
[
|
|
229
|
+
{
|
|
230
|
+
"command": 'search "your query here"',
|
|
231
|
+
"description": "Search your codebase semantically",
|
|
232
|
+
"priority": "high",
|
|
233
|
+
},
|
|
234
|
+
{
|
|
235
|
+
"command": "status",
|
|
236
|
+
"description": "Check project statistics and index health",
|
|
237
|
+
"priority": "medium",
|
|
238
|
+
},
|
|
239
|
+
]
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
if state["has_recent_changes"]:
|
|
243
|
+
next_steps.insert(
|
|
244
|
+
0,
|
|
245
|
+
{
|
|
246
|
+
"command": "index --force",
|
|
247
|
+
"description": "Update the index with recent changes",
|
|
248
|
+
"priority": "high",
|
|
249
|
+
},
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
if not state["has_mcp_config"] and state["is_initialized"]:
|
|
253
|
+
next_steps.append(
|
|
254
|
+
{
|
|
255
|
+
"command": "init-mcp",
|
|
256
|
+
"description": "Set up Claude Code integration",
|
|
257
|
+
"priority": "low",
|
|
258
|
+
}
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
return next_steps
|
|
262
|
+
|
|
263
|
+
def show_contextual_help(self, failed_command: str | None = None) -> None:
|
|
264
|
+
"""Show contextual help and suggestions.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
failed_command: The command that failed (if any)
|
|
268
|
+
"""
|
|
269
|
+
if failed_command:
|
|
270
|
+
self.console.print(
|
|
271
|
+
f"\n[yellow]Command '{failed_command}' not recognized.[/yellow]"
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
suggestions = self.get_workflow_suggestions(failed_command)
|
|
275
|
+
if suggestions:
|
|
276
|
+
self.console.print(
|
|
277
|
+
"\n[bold]Based on your project state, you might want to try:[/bold]"
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
for i, suggestion in enumerate(suggestions[:3], 1): # Show top 3
|
|
281
|
+
priority_color = {
|
|
282
|
+
"high": "red",
|
|
283
|
+
"medium": "yellow",
|
|
284
|
+
"low": "dim",
|
|
285
|
+
}.get(suggestion["priority"], "white")
|
|
286
|
+
|
|
287
|
+
self.console.print(
|
|
288
|
+
f" [{priority_color}]{i}.[/{priority_color}] "
|
|
289
|
+
f"[bold cyan]mcp-vector-search {suggestion['command']}[/bold cyan]"
|
|
290
|
+
)
|
|
291
|
+
self.console.print(f" {suggestion['description']}")
|
|
292
|
+
if suggestion.get("reason"):
|
|
293
|
+
self.console.print(f" [dim]({suggestion['reason']})[/dim]")
|
|
294
|
+
else:
|
|
295
|
+
# Show general next steps
|
|
296
|
+
next_steps = self.get_next_steps()
|
|
297
|
+
if next_steps:
|
|
298
|
+
self.console.print("\n[bold]Suggested next steps:[/bold]")
|
|
299
|
+
|
|
300
|
+
for i, step in enumerate(next_steps[:3], 1):
|
|
301
|
+
priority_color = {
|
|
302
|
+
"high": "green",
|
|
303
|
+
"medium": "yellow",
|
|
304
|
+
"low": "dim",
|
|
305
|
+
}.get(step["priority"], "white")
|
|
306
|
+
|
|
307
|
+
self.console.print(
|
|
308
|
+
f" [{priority_color}]{i}.[/{priority_color}] "
|
|
309
|
+
f"[bold cyan]mcp-vector-search {step['command']}[/bold cyan]"
|
|
310
|
+
)
|
|
311
|
+
self.console.print(f" {step['description']}")
|
|
312
|
+
|
|
313
|
+
def get_command_completion_suggestions(self, partial_command: str) -> list[str]:
|
|
314
|
+
"""Get command completion suggestions for a partial command.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
partial_command: Partial command string
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
List of possible command completions
|
|
321
|
+
"""
|
|
322
|
+
all_commands = [
|
|
323
|
+
"search",
|
|
324
|
+
"index",
|
|
325
|
+
"status",
|
|
326
|
+
"config",
|
|
327
|
+
"init",
|
|
328
|
+
"mcp",
|
|
329
|
+
"doctor",
|
|
330
|
+
"version",
|
|
331
|
+
"watch",
|
|
332
|
+
"auto-index",
|
|
333
|
+
"history",
|
|
334
|
+
"interactive",
|
|
335
|
+
"demo",
|
|
336
|
+
"install",
|
|
337
|
+
"reset",
|
|
338
|
+
"health",
|
|
339
|
+
]
|
|
340
|
+
|
|
341
|
+
# Add common aliases and shortcuts
|
|
342
|
+
all_commands.extend(["s", "i", "st", "c", "f", "find"])
|
|
343
|
+
|
|
344
|
+
partial_lower = partial_command.lower()
|
|
345
|
+
matches = [cmd for cmd in all_commands if cmd.startswith(partial_lower)]
|
|
346
|
+
|
|
347
|
+
return sorted(matches)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def get_contextual_suggestions(
|
|
351
|
+
project_root: Path | None = None, failed_command: str | None = None
|
|
352
|
+
) -> None:
|
|
353
|
+
"""Get and display contextual suggestions.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
project_root: Root directory of the project
|
|
357
|
+
failed_command: The command that failed
|
|
358
|
+
"""
|
|
359
|
+
provider = ContextualSuggestionProvider(project_root)
|
|
360
|
+
provider.show_contextual_help(failed_command)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def suggest_workflow_commands(project_root: Path | None = None) -> list[str]:
|
|
364
|
+
"""Get workflow command suggestions for the current project state.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
project_root: Root directory of the project
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
List of suggested commands in priority order
|
|
371
|
+
"""
|
|
372
|
+
provider = ContextualSuggestionProvider(project_root)
|
|
373
|
+
next_steps = provider.get_next_steps()
|
|
374
|
+
|
|
375
|
+
return [step["command"] for step in next_steps]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Configuration management for MCP Vector Search."""
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Project-wide constants for MCP Vector Search.
|
|
2
|
+
|
|
3
|
+
This module contains all magic numbers and configuration constants
|
|
4
|
+
used throughout the application to improve maintainability and clarity.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Timeout Constants (in seconds)
|
|
8
|
+
SUBPROCESS_INSTALL_TIMEOUT = 120 # Timeout for package installation commands
|
|
9
|
+
SUBPROCESS_SHORT_TIMEOUT = 10 # Short timeout for quick commands (version checks, etc.)
|
|
10
|
+
SUBPROCESS_MCP_TIMEOUT = 30 # Timeout for MCP server operations
|
|
11
|
+
SUBPROCESS_TEST_TIMEOUT = 5 # Timeout for server test operations
|
|
12
|
+
CONNECTION_POOL_TIMEOUT = 30.0 # Connection pool acquisition timeout
|
|
13
|
+
|
|
14
|
+
# Chunking Constants
|
|
15
|
+
DEFAULT_CHUNK_SIZE = 50 # Default number of lines per code chunk
|
|
16
|
+
TEXT_CHUNK_SIZE = 30 # Number of lines per text/markdown chunk
|
|
17
|
+
SEARCH_RESULT_LIMIT = 20 # Default number of search results to return
|
|
18
|
+
|
|
19
|
+
# Threshold Constants
|
|
20
|
+
DEFAULT_SIMILARITY_THRESHOLD = 0.5 # Default similarity threshold for search (0.0-1.0)
|
|
21
|
+
HIGH_SIMILARITY_THRESHOLD = 0.75 # Higher threshold for more precise matches
|
|
22
|
+
|
|
23
|
+
# Cache Constants
|
|
24
|
+
DEFAULT_CACHE_SIZE = 256 # Default LRU cache size for file reads
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""Default configurations for MCP Vector Search."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
# Dotfiles that should NEVER be skipped (CI/CD configurations)
|
|
6
|
+
ALLOWED_DOTFILES = {
|
|
7
|
+
".github", # GitHub workflows/actions
|
|
8
|
+
".gitlab-ci", # GitLab CI
|
|
9
|
+
".circleci", # CircleCI config
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
# Default file extensions to index (prioritize supported languages)
|
|
13
|
+
DEFAULT_FILE_EXTENSIONS = [
|
|
14
|
+
".py", # Python (fully supported)
|
|
15
|
+
".js", # JavaScript (fully supported)
|
|
16
|
+
".ts", # TypeScript (fully supported)
|
|
17
|
+
".jsx", # React JSX (fully supported)
|
|
18
|
+
".tsx", # React TSX (fully supported)
|
|
19
|
+
".mjs", # ES6 modules (fully supported)
|
|
20
|
+
".java", # Java (fallback parsing)
|
|
21
|
+
".cpp", # C++ (fallback parsing)
|
|
22
|
+
".c", # C (fallback parsing)
|
|
23
|
+
".h", # C/C++ headers (fallback parsing)
|
|
24
|
+
".hpp", # C++ headers (fallback parsing)
|
|
25
|
+
".cs", # C# (fallback parsing)
|
|
26
|
+
".go", # Go (fallback parsing)
|
|
27
|
+
".rs", # Rust (fallback parsing)
|
|
28
|
+
".php", # PHP (fallback parsing)
|
|
29
|
+
".rb", # Ruby (fallback parsing)
|
|
30
|
+
".swift", # Swift (fallback parsing)
|
|
31
|
+
".kt", # Kotlin (fallback parsing)
|
|
32
|
+
".scala", # Scala (fallback parsing)
|
|
33
|
+
".sh", # Shell scripts (fallback parsing)
|
|
34
|
+
".bash", # Bash scripts (fallback parsing)
|
|
35
|
+
".zsh", # Zsh scripts (fallback parsing)
|
|
36
|
+
".json", # JSON configuration files
|
|
37
|
+
".md", # Markdown documentation
|
|
38
|
+
".txt", # Plain text files
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
# Language mappings for parsers
|
|
42
|
+
LANGUAGE_MAPPINGS: dict[str, str] = {
|
|
43
|
+
".py": "python",
|
|
44
|
+
".pyw": "python",
|
|
45
|
+
".js": "javascript",
|
|
46
|
+
".jsx": "javascript",
|
|
47
|
+
".mjs": "javascript",
|
|
48
|
+
".ts": "typescript",
|
|
49
|
+
".tsx": "typescript",
|
|
50
|
+
".java": "java",
|
|
51
|
+
".cpp": "cpp",
|
|
52
|
+
".c": "c",
|
|
53
|
+
".h": "c",
|
|
54
|
+
".hpp": "cpp",
|
|
55
|
+
".cs": "c_sharp",
|
|
56
|
+
".go": "go",
|
|
57
|
+
".rs": "rust",
|
|
58
|
+
".php": "php",
|
|
59
|
+
".rb": "ruby",
|
|
60
|
+
".swift": "swift",
|
|
61
|
+
".kt": "kotlin",
|
|
62
|
+
".scala": "scala",
|
|
63
|
+
".sh": "bash",
|
|
64
|
+
".bash": "bash",
|
|
65
|
+
".zsh": "bash",
|
|
66
|
+
".json": "json",
|
|
67
|
+
".md": "markdown",
|
|
68
|
+
".txt": "text",
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# Default embedding models by use case
|
|
72
|
+
DEFAULT_EMBEDDING_MODELS = {
|
|
73
|
+
"code": "sentence-transformers/all-MiniLM-L6-v2", # Changed from microsoft/codebert-base which doesn't exist
|
|
74
|
+
"multilingual": "sentence-transformers/all-MiniLM-L6-v2",
|
|
75
|
+
"fast": "sentence-transformers/all-MiniLM-L12-v2",
|
|
76
|
+
"precise": "sentence-transformers/all-mpnet-base-v2", # Changed from microsoft/unixcoder-base
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
# Default similarity thresholds by language
|
|
80
|
+
DEFAULT_SIMILARITY_THRESHOLDS = {
|
|
81
|
+
"python": 0.3,
|
|
82
|
+
"javascript": 0.3,
|
|
83
|
+
"typescript": 0.3,
|
|
84
|
+
"java": 0.3,
|
|
85
|
+
"cpp": 0.3,
|
|
86
|
+
"c": 0.3,
|
|
87
|
+
"go": 0.3,
|
|
88
|
+
"rust": 0.3,
|
|
89
|
+
"json": 0.4, # JSON files may have more structural similarity
|
|
90
|
+
"markdown": 0.3, # Markdown documentation
|
|
91
|
+
"text": 0.3, # Plain text files
|
|
92
|
+
"default": 0.3,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
# Default chunk sizes by language (in tokens)
|
|
96
|
+
DEFAULT_CHUNK_SIZES = {
|
|
97
|
+
"python": 512,
|
|
98
|
+
"javascript": 384,
|
|
99
|
+
"typescript": 384,
|
|
100
|
+
"java": 512,
|
|
101
|
+
"cpp": 384,
|
|
102
|
+
"c": 384,
|
|
103
|
+
"go": 512,
|
|
104
|
+
"rust": 512,
|
|
105
|
+
"json": 256, # JSON files are often smaller and more structured
|
|
106
|
+
"markdown": 512, # Markdown documentation can be chunked normally
|
|
107
|
+
"text": 384, # Plain text files with paragraph-based chunking
|
|
108
|
+
"default": 512,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
# Directories to ignore during indexing
|
|
112
|
+
DEFAULT_IGNORE_PATTERNS = [
|
|
113
|
+
".git",
|
|
114
|
+
".svn",
|
|
115
|
+
".hg",
|
|
116
|
+
"__pycache__",
|
|
117
|
+
".pytest_cache",
|
|
118
|
+
".mypy_cache", # mypy type checking cache
|
|
119
|
+
".ruff_cache", # ruff linter cache
|
|
120
|
+
"node_modules",
|
|
121
|
+
".venv",
|
|
122
|
+
"venv",
|
|
123
|
+
".env",
|
|
124
|
+
"build",
|
|
125
|
+
"dist",
|
|
126
|
+
"target",
|
|
127
|
+
".idea",
|
|
128
|
+
".vscode",
|
|
129
|
+
"*.egg-info",
|
|
130
|
+
".DS_Store",
|
|
131
|
+
"Thumbs.db",
|
|
132
|
+
".claude-mpm", # Claude MPM directory
|
|
133
|
+
".mcp-vector-search", # Our own index directory
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
# File patterns to ignore
|
|
137
|
+
DEFAULT_IGNORE_FILES = [
|
|
138
|
+
"*.pyc",
|
|
139
|
+
"*.pyo",
|
|
140
|
+
"*.pyd",
|
|
141
|
+
"*.so",
|
|
142
|
+
"*.dll",
|
|
143
|
+
"*.dylib",
|
|
144
|
+
"*.exe",
|
|
145
|
+
"*.bin",
|
|
146
|
+
"*.obj",
|
|
147
|
+
"*.o",
|
|
148
|
+
"*.a",
|
|
149
|
+
"*.lib",
|
|
150
|
+
"*.jar",
|
|
151
|
+
"*.war",
|
|
152
|
+
"*.ear",
|
|
153
|
+
"*.zip",
|
|
154
|
+
"*.tar",
|
|
155
|
+
"*.gz",
|
|
156
|
+
"*.bz2",
|
|
157
|
+
"*.xz",
|
|
158
|
+
"*.7z",
|
|
159
|
+
"*.rar",
|
|
160
|
+
"*.iso",
|
|
161
|
+
"*.dmg",
|
|
162
|
+
"*.img",
|
|
163
|
+
"*.log",
|
|
164
|
+
"*.tmp",
|
|
165
|
+
"*.temp",
|
|
166
|
+
"*.cache",
|
|
167
|
+
"*.lock",
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def get_default_config_path(project_root: Path) -> Path:
|
|
172
|
+
"""Get the default configuration file path for a project."""
|
|
173
|
+
return project_root / ".mcp-vector-search" / "config.json"
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def get_default_index_path(project_root: Path) -> Path:
|
|
177
|
+
"""Get the default index directory path for a project."""
|
|
178
|
+
return project_root / ".mcp-vector-search"
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def get_default_cache_path(project_root: Path) -> Path:
|
|
182
|
+
"""Get the default cache directory path for a project."""
|
|
183
|
+
return project_root / ".mcp-vector-search" / "cache"
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def get_language_from_extension(extension: str) -> str:
|
|
187
|
+
"""Get the language name from file extension."""
|
|
188
|
+
return LANGUAGE_MAPPINGS.get(extension.lower(), "text")
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def get_similarity_threshold(language: str) -> float:
|
|
192
|
+
"""Get the default similarity threshold for a language."""
|
|
193
|
+
return DEFAULT_SIMILARITY_THRESHOLDS.get(
|
|
194
|
+
language.lower(), DEFAULT_SIMILARITY_THRESHOLDS["default"]
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def get_chunk_size(language: str) -> int:
|
|
199
|
+
"""Get the default chunk size for a language."""
|
|
200
|
+
return DEFAULT_CHUNK_SIZES.get(language.lower(), DEFAULT_CHUNK_SIZES["default"])
|