mcp-vector-search 0.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/config.py +393 -0
  6. mcp_vector_search/cli/commands/demo.py +358 -0
  7. mcp_vector_search/cli/commands/index.py +744 -0
  8. mcp_vector_search/cli/commands/init.py +645 -0
  9. mcp_vector_search/cli/commands/install.py +675 -0
  10. mcp_vector_search/cli/commands/install_old.py +696 -0
  11. mcp_vector_search/cli/commands/mcp.py +1182 -0
  12. mcp_vector_search/cli/commands/reset.py +393 -0
  13. mcp_vector_search/cli/commands/search.py +773 -0
  14. mcp_vector_search/cli/commands/status.py +549 -0
  15. mcp_vector_search/cli/commands/uninstall.py +485 -0
  16. mcp_vector_search/cli/commands/visualize.py +1467 -0
  17. mcp_vector_search/cli/commands/watch.py +287 -0
  18. mcp_vector_search/cli/didyoumean.py +500 -0
  19. mcp_vector_search/cli/export.py +320 -0
  20. mcp_vector_search/cli/history.py +295 -0
  21. mcp_vector_search/cli/interactive.py +342 -0
  22. mcp_vector_search/cli/main.py +461 -0
  23. mcp_vector_search/cli/output.py +412 -0
  24. mcp_vector_search/cli/suggestions.py +375 -0
  25. mcp_vector_search/config/__init__.py +1 -0
  26. mcp_vector_search/config/constants.py +24 -0
  27. mcp_vector_search/config/defaults.py +200 -0
  28. mcp_vector_search/config/settings.py +134 -0
  29. mcp_vector_search/core/__init__.py +1 -0
  30. mcp_vector_search/core/auto_indexer.py +298 -0
  31. mcp_vector_search/core/connection_pool.py +360 -0
  32. mcp_vector_search/core/database.py +1214 -0
  33. mcp_vector_search/core/directory_index.py +318 -0
  34. mcp_vector_search/core/embeddings.py +294 -0
  35. mcp_vector_search/core/exceptions.py +89 -0
  36. mcp_vector_search/core/factory.py +318 -0
  37. mcp_vector_search/core/git_hooks.py +345 -0
  38. mcp_vector_search/core/indexer.py +1002 -0
  39. mcp_vector_search/core/models.py +294 -0
  40. mcp_vector_search/core/project.py +333 -0
  41. mcp_vector_search/core/scheduler.py +330 -0
  42. mcp_vector_search/core/search.py +952 -0
  43. mcp_vector_search/core/watcher.py +322 -0
  44. mcp_vector_search/mcp/__init__.py +5 -0
  45. mcp_vector_search/mcp/__main__.py +25 -0
  46. mcp_vector_search/mcp/server.py +733 -0
  47. mcp_vector_search/parsers/__init__.py +8 -0
  48. mcp_vector_search/parsers/base.py +296 -0
  49. mcp_vector_search/parsers/dart.py +605 -0
  50. mcp_vector_search/parsers/html.py +413 -0
  51. mcp_vector_search/parsers/javascript.py +643 -0
  52. mcp_vector_search/parsers/php.py +694 -0
  53. mcp_vector_search/parsers/python.py +502 -0
  54. mcp_vector_search/parsers/registry.py +223 -0
  55. mcp_vector_search/parsers/ruby.py +678 -0
  56. mcp_vector_search/parsers/text.py +186 -0
  57. mcp_vector_search/parsers/utils.py +265 -0
  58. mcp_vector_search/py.typed +1 -0
  59. mcp_vector_search/utils/__init__.py +40 -0
  60. mcp_vector_search/utils/gitignore.py +250 -0
  61. mcp_vector_search/utils/monorepo.py +277 -0
  62. mcp_vector_search/utils/timing.py +334 -0
  63. mcp_vector_search/utils/version.py +47 -0
  64. mcp_vector_search-0.12.6.dist-info/METADATA +754 -0
  65. mcp_vector_search-0.12.6.dist-info/RECORD +68 -0
  66. mcp_vector_search-0.12.6.dist-info/WHEEL +4 -0
  67. mcp_vector_search-0.12.6.dist-info/entry_points.txt +2 -0
  68. mcp_vector_search-0.12.6.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,754 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp-vector-search
3
+ Version: 0.12.6
4
+ Summary: CLI-first semantic code search with MCP integration
5
+ Project-URL: Homepage, https://github.com/bobmatnyc/mcp-vector-search
6
+ Project-URL: Documentation, https://mcp-vector-search.readthedocs.io
7
+ Project-URL: Repository, https://github.com/bobmatnyc/mcp-vector-search
8
+ Project-URL: Bug Tracker, https://github.com/bobmatnyc/mcp-vector-search/issues
9
+ Author-email: Robert Matsuoka <bobmatnyc@gmail.com>
10
+ License: MIT License
11
+
12
+ Copyright (c) 2024 Robert Matsuoka
13
+
14
+ Permission is hereby granted, free of charge, to any person obtaining a copy
15
+ of this software and associated documentation files (the "Software"), to deal
16
+ in the Software without restriction, including without limitation the rights
17
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18
+ copies of the Software, and to permit persons to whom the Software is
19
+ furnished to do so, subject to the following conditions:
20
+
21
+ The above copyright notice and this permission notice shall be included in all
22
+ copies or substantial portions of the Software.
23
+
24
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
+ SOFTWARE.
31
+ License-File: LICENSE
32
+ Keywords: code-search,mcp,semantic-search,vector-database
33
+ Classifier: Development Status :: 3 - Alpha
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Programming Language :: Python :: 3.11
37
+ Classifier: Programming Language :: Python :: 3.12
38
+ Classifier: Topic :: Software Development :: Code Generators
39
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
40
+ Requires-Python: >=3.11
41
+ Requires-Dist: aiofiles>=23.0.0
42
+ Requires-Dist: authlib>=1.6.4
43
+ Requires-Dist: chromadb>=0.5.0
44
+ Requires-Dist: click-didyoumean>=0.3.0
45
+ Requires-Dist: httpx>=0.25.0
46
+ Requires-Dist: loguru>=0.7.0
47
+ Requires-Dist: mcp>=1.12.4
48
+ Requires-Dist: packaging>=23.0
49
+ Requires-Dist: pydantic-settings>=2.1.0
50
+ Requires-Dist: pydantic>=2.5.0
51
+ Requires-Dist: rich>=13.0.0
52
+ Requires-Dist: sentence-transformers>=2.2.2
53
+ Requires-Dist: tree-sitter-language-pack>=0.9.0
54
+ Requires-Dist: tree-sitter>=0.20.1
55
+ Requires-Dist: typer>=0.9.0
56
+ Requires-Dist: watchdog>=3.0.0
57
+ Description-Content-Type: text/markdown
58
+
59
+ # MCP Vector Search
60
+
61
+ 🔍 **CLI-first semantic code search with MCP integration**
62
+
63
+ [![PyPI version](https://badge.fury.io/py/mcp-vector-search.svg)](https://badge.fury.io/py/mcp-vector-search)
64
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
65
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
66
+
67
+ > ⚠️ **Alpha Release (v0.7.1)**: This is an early-stage project under active development. Expect breaking changes and rough edges. Feedback and contributions are welcome!
68
+
69
+ A modern, fast, and intelligent code search tool that understands your codebase through semantic analysis and AST parsing. Built with Python, powered by ChromaDB, and designed for developer productivity.
70
+
71
+ ## ✨ Features
72
+
73
+ ### 🚀 **Core Capabilities**
74
+ - **Semantic Search**: Find code by meaning, not just keywords
75
+ - **AST-Aware Parsing**: Understands code structure (functions, classes, methods)
76
+ - **Multi-Language Support**: 8 languages - Python, JavaScript, TypeScript, Dart/Flutter, PHP, Ruby, HTML, and Markdown/Text (with extensible architecture)
77
+ - **Real-time Indexing**: File watching with automatic index updates
78
+ - **Automatic Version Tracking**: Smart reindexing on tool upgrades
79
+ - **Local-First**: Complete privacy with on-device processing
80
+ - **Zero Configuration**: Auto-detects project structure and languages
81
+
82
+ ### 🛠️ **Developer Experience**
83
+ - **CLI-First Design**: Simple commands for immediate productivity
84
+ - **Rich Output**: Syntax highlighting, similarity scores, context
85
+ - **Fast Performance**: Sub-second search responses, efficient indexing
86
+ - **Modern Architecture**: Async-first, type-safe, modular design
87
+ - **Semi-Automatic Reindexing**: Multiple strategies without daemon processes
88
+
89
+ ### 🔧 **Technical Features**
90
+ - **Vector Database**: ChromaDB with connection pooling for 13.6% performance boost
91
+ - **Embedding Models**: Configurable sentence transformers
92
+ - **Smart Reindexing**: Search-triggered, Git hooks, scheduled tasks, and manual options
93
+ - **Extensible Parsers**: Plugin architecture for new languages
94
+ - **Configuration Management**: Project-specific settings
95
+ - **Production Ready**: Connection pooling, auto-indexing, comprehensive error handling
96
+
97
+ ## 🚀 Quick Start
98
+
99
+ ### Installation
100
+
101
+ ```bash
102
+ # Install from PyPI
103
+ pip install mcp-vector-search
104
+
105
+ # Or with UV (recommended)
106
+ uv add mcp-vector-search
107
+
108
+ # Or install from source
109
+ git clone https://github.com/bobmatnyc/mcp-vector-search.git
110
+ cd mcp-vector-search
111
+ uv sync && uv pip install -e .
112
+ ```
113
+
114
+ ### Complete Setup (One Command)
115
+
116
+ The **hierarchical install command** (v0.13.0) provides complete project setup and MCP integration management:
117
+
118
+ ```bash
119
+ # Quick setup (recommended)
120
+ mcp-vector-search install
121
+
122
+ # This will:
123
+ # 1. Initialize your project configuration
124
+ # 2. Automatically index your codebase
125
+ # 3. Provide next-step hints for MCP integration
126
+
127
+ # Install with all MCP integrations at once
128
+ mcp-vector-search install --with-mcp
129
+
130
+ # Custom file extensions
131
+ mcp-vector-search install --extensions .py,.js,.ts,.dart
132
+
133
+ # Skip automatic indexing
134
+ mcp-vector-search install --no-auto-index
135
+ ```
136
+
137
+ ### Add MCP Integration for AI Tools
138
+
139
+ ```bash
140
+ # Add Claude Code integration (project-scoped)
141
+ mcp-vector-search install claude-code
142
+
143
+ # Add Cursor IDE integration (global)
144
+ mcp-vector-search install cursor
145
+
146
+ # Add Claude Desktop integration (global)
147
+ mcp-vector-search install claude-desktop
148
+
149
+ # See all available platforms
150
+ mcp-vector-search install list
151
+ ```
152
+
153
+ ### Remove MCP Integrations
154
+
155
+ ```bash
156
+ # Remove specific platform
157
+ mcp-vector-search uninstall claude-code
158
+
159
+ # Remove all integrations
160
+ mcp-vector-search uninstall --all
161
+
162
+ # List configured integrations
163
+ mcp-vector-search uninstall list
164
+ ```
165
+
166
+ ### Basic Usage
167
+
168
+ ```bash
169
+ # Search your code
170
+ mcp-vector-search search "authentication logic"
171
+ mcp-vector-search search "database connection setup"
172
+ mcp-vector-search search "error handling patterns"
173
+
174
+ # Index your codebase (if not done during install)
175
+ mcp-vector-search index
176
+
177
+ # Check project status
178
+ mcp-vector-search status
179
+
180
+ # Start file watching (auto-update index)
181
+ mcp-vector-search watch
182
+ ```
183
+
184
+ ### Smart CLI with "Did You Mean" Suggestions
185
+
186
+ The CLI includes intelligent command suggestions for typos:
187
+
188
+ ```bash
189
+ # Typos are automatically detected and corrected
190
+ $ mcp-vector-search serach "auth"
191
+ No such command 'serach'. Did you mean 'search'?
192
+
193
+ $ mcp-vector-search indx
194
+ No such command 'indx'. Did you mean 'index'?
195
+ ```
196
+
197
+ See [docs/CLI_FEATURES.md](docs/CLI_FEATURES.md) for more details.
198
+
199
+ ## Versioning & Releasing
200
+
201
+ This project uses semantic versioning with an automated release workflow.
202
+
203
+ ### Quick Commands
204
+ - `make version-show` - Display current version
205
+ - `make release-patch` - Create patch release
206
+ - `make publish` - Publish to PyPI
207
+
208
+ See [docs/VERSIONING_WORKFLOW.md](docs/VERSIONING_WORKFLOW.md) for complete documentation.
209
+
210
+ ## 📖 Documentation
211
+
212
+ ### Commands
213
+
214
+ #### `install` - Install Project and MCP Integrations (v0.13.0)
215
+ ```bash
216
+ # Quick setup (recommended)
217
+ mcp-vector-search install
218
+
219
+ # Install with all MCP integrations
220
+ mcp-vector-search install --with-mcp
221
+
222
+ # Custom file extensions
223
+ mcp-vector-search install --extensions .py,.js,.ts
224
+
225
+ # Skip automatic indexing
226
+ mcp-vector-search install --no-auto-index
227
+
228
+ # Platform-specific MCP integration
229
+ mcp-vector-search install claude-code # Project-scoped
230
+ mcp-vector-search install claude-desktop # Global
231
+ mcp-vector-search install cursor # Global
232
+ mcp-vector-search install windsurf # Global
233
+ mcp-vector-search install vscode # Global
234
+
235
+ # List available platforms
236
+ mcp-vector-search install list
237
+ ```
238
+
239
+ #### `uninstall` - Remove MCP Integrations (v0.13.0)
240
+ ```bash
241
+ # Remove specific platform
242
+ mcp-vector-search uninstall claude-code
243
+
244
+ # Remove all integrations
245
+ mcp-vector-search uninstall --all
246
+
247
+ # List configured integrations
248
+ mcp-vector-search uninstall list
249
+
250
+ # Skip backup creation
251
+ mcp-vector-search uninstall claude-code --no-backup
252
+
253
+ # Alias (same as uninstall)
254
+ mcp-vector-search remove claude-code
255
+ ```
256
+
257
+ #### `init` - Initialize Project (Simple)
258
+ ```bash
259
+ # Basic initialization
260
+ mcp-vector-search init
261
+
262
+ # Custom configuration
263
+ mcp-vector-search init --extensions .py,.js,.ts --embedding-model sentence-transformers/all-MiniLM-L6-v2
264
+
265
+ # Force re-initialization
266
+ mcp-vector-search init --force
267
+ ```
268
+
269
+ #### `index` - Index Codebase
270
+ ```bash
271
+ # Index all files
272
+ mcp-vector-search index
273
+
274
+ # Index specific directory
275
+ mcp-vector-search index /path/to/code
276
+
277
+ # Force re-indexing
278
+ mcp-vector-search index --force
279
+
280
+ # Reindex entire project
281
+ mcp-vector-search index reindex
282
+
283
+ # Reindex entire project (explicit)
284
+ mcp-vector-search index reindex --all
285
+
286
+ # Reindex entire project without confirmation
287
+ mcp-vector-search index reindex --force
288
+
289
+ # Reindex specific file
290
+ mcp-vector-search index reindex path/to/file.py
291
+ ```
292
+
293
+ #### `search` - Semantic Search
294
+ ```bash
295
+ # Basic search
296
+ mcp-vector-search search "function that handles user authentication"
297
+
298
+ # Adjust similarity threshold
299
+ mcp-vector-search search "database queries" --threshold 0.7
300
+
301
+ # Limit results
302
+ mcp-vector-search search "error handling" --limit 10
303
+
304
+ # Search in specific context
305
+ mcp-vector-search search similar "path/to/function.py:25"
306
+ ```
307
+
308
+ #### `auto-index` - Automatic Reindexing
309
+ ```bash
310
+ # Setup all auto-indexing strategies
311
+ mcp-vector-search auto-index setup --method all
312
+
313
+ # Setup specific strategies
314
+ mcp-vector-search auto-index setup --method git-hooks
315
+ mcp-vector-search auto-index setup --method scheduled --interval 60
316
+
317
+ # Check for stale files and auto-reindex
318
+ mcp-vector-search auto-index check --auto-reindex --max-files 10
319
+
320
+ # View auto-indexing status
321
+ mcp-vector-search auto-index status
322
+
323
+ # Remove auto-indexing setup
324
+ mcp-vector-search auto-index teardown --method all
325
+ ```
326
+
327
+ #### `watch` - File Watching
328
+ ```bash
329
+ # Start watching for changes
330
+ mcp-vector-search watch
331
+
332
+ # Check watch status
333
+ mcp-vector-search watch status
334
+
335
+ # Enable/disable watching
336
+ mcp-vector-search watch enable
337
+ mcp-vector-search watch disable
338
+ ```
339
+
340
+ #### `status` - Project Information
341
+ ```bash
342
+ # Basic status
343
+ mcp-vector-search status
344
+
345
+ # Detailed information
346
+ mcp-vector-search status --verbose
347
+ ```
348
+
349
+ #### `config` - Configuration Management
350
+ ```bash
351
+ # View configuration
352
+ mcp-vector-search config show
353
+
354
+ # Update settings
355
+ mcp-vector-search config set similarity_threshold 0.8
356
+ mcp-vector-search config set embedding_model microsoft/codebert-base
357
+
358
+ # Configure indexing behavior
359
+ mcp-vector-search config set skip_dotfiles true # Skip dotfiles (default)
360
+ mcp-vector-search config set respect_gitignore true # Respect .gitignore (default)
361
+
362
+ # Get specific setting
363
+ mcp-vector-search config get skip_dotfiles
364
+ mcp-vector-search config get respect_gitignore
365
+
366
+ # List available models
367
+ mcp-vector-search config models
368
+
369
+ # List all configuration keys
370
+ mcp-vector-search config list-keys
371
+ ```
372
+
373
+ ## 🚀 Performance Features
374
+
375
+ ### Connection Pooling
376
+ Automatic connection pooling provides **13.6% performance improvement** with zero configuration:
377
+
378
+ ```python
379
+ # Automatically enabled for high-throughput scenarios
380
+ from mcp_vector_search.core.database import PooledChromaVectorDatabase
381
+
382
+ database = PooledChromaVectorDatabase(
383
+ max_connections=10, # Pool size
384
+ min_connections=2, # Warm connections
385
+ max_idle_time=300.0, # 5 minutes
386
+ )
387
+ ```
388
+
389
+ ### Semi-Automatic Reindexing
390
+ Multiple strategies to keep your index up-to-date without daemon processes:
391
+
392
+ 1. **Search-Triggered**: Automatically checks for stale files during searches
393
+ 2. **Git Hooks**: Triggers reindexing after commits, merges, checkouts
394
+ 3. **Scheduled Tasks**: System-level cron jobs or Windows tasks
395
+ 4. **Manual Checks**: On-demand via CLI commands
396
+ 5. **Periodic Checker**: In-process periodic checks for long-running apps
397
+
398
+ ```bash
399
+ # Setup all strategies
400
+ mcp-vector-search auto-index setup --method all
401
+
402
+ # Check status
403
+ mcp-vector-search auto-index status
404
+ ```
405
+
406
+ ### Configuration
407
+
408
+ Projects are configured via `.mcp-vector-search/config.json`:
409
+
410
+ ```json
411
+ {
412
+ "project_root": "/path/to/project",
413
+ "file_extensions": [".py", ".js", ".ts"],
414
+ "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
415
+ "similarity_threshold": 0.75,
416
+ "languages": ["python", "javascript", "typescript"],
417
+ "watch_files": true,
418
+ "cache_embeddings": true,
419
+ "skip_dotfiles": true,
420
+ "respect_gitignore": true
421
+ }
422
+ ```
423
+
424
+ #### Indexing Configuration Options
425
+
426
+ **`skip_dotfiles`** (default: `true`)
427
+ - Controls whether files and directories starting with "." are skipped during indexing
428
+ - **Whitelisted directories** are always indexed regardless of this setting:
429
+ - `.github/` - GitHub workflows and actions
430
+ - `.gitlab-ci/` - GitLab CI configuration
431
+ - `.circleci/` - CircleCI configuration
432
+ - When `false`: All dotfiles are indexed (subject to gitignore rules if `respect_gitignore` is `true`)
433
+
434
+ **`respect_gitignore`** (default: `true`)
435
+ - Controls whether `.gitignore` patterns are respected during indexing
436
+ - When `false`: Files in `.gitignore` are indexed (subject to `skip_dotfiles` if enabled)
437
+
438
+ #### Configuration Use Cases
439
+
440
+ **Default Behavior** (Recommended for most projects):
441
+ ```bash
442
+ # Skip dotfiles AND respect .gitignore
443
+ mcp-vector-search config set skip_dotfiles true
444
+ mcp-vector-search config set respect_gitignore true
445
+ ```
446
+
447
+ **Index Everything** (Useful for deep code analysis):
448
+ ```bash
449
+ # Index all files including dotfiles and gitignored files
450
+ mcp-vector-search config set skip_dotfiles false
451
+ mcp-vector-search config set respect_gitignore false
452
+ ```
453
+
454
+ **Index Dotfiles but Respect .gitignore**:
455
+ ```bash
456
+ # Index configuration files but skip build artifacts
457
+ mcp-vector-search config set skip_dotfiles false
458
+ mcp-vector-search config set respect_gitignore true
459
+ ```
460
+
461
+ **Skip Dotfiles but Ignore .gitignore**:
462
+ ```bash
463
+ # Useful when you want to index files in .gitignore but skip hidden config files
464
+ mcp-vector-search config set skip_dotfiles true
465
+ mcp-vector-search config set respect_gitignore false
466
+ ```
467
+
468
+ ## 🏗️ Architecture
469
+
470
+ ### Core Components
471
+
472
+ - **Parser Registry**: Extensible system for language-specific parsing
473
+ - **Semantic Indexer**: Efficient code chunking and embedding generation
474
+ - **Vector Database**: ChromaDB integration for similarity search
475
+ - **File Watcher**: Real-time monitoring and incremental updates
476
+ - **CLI Interface**: Rich, user-friendly command-line experience
477
+
478
+ ### Supported Languages
479
+
480
+ MCP Vector Search supports **8 programming languages** with full semantic search capabilities:
481
+
482
+ | Language | Extensions | Status | Features |
483
+ |------------|------------|--------|----------|
484
+ | Python | `.py`, `.pyw` | ✅ Full | Functions, classes, methods, docstrings |
485
+ | JavaScript | `.js`, `.jsx`, `.mjs` | ✅ Full | Functions, classes, JSDoc, ES6+ syntax |
486
+ | TypeScript | `.ts`, `.tsx` | ✅ Full | Interfaces, types, generics, decorators |
487
+ | Dart | `.dart` | ✅ Full | Functions, classes, widgets, async, dartdoc |
488
+ | PHP | `.php`, `.phtml` | ✅ Full | Classes, methods, traits, PHPDoc, Laravel patterns |
489
+ | Ruby | `.rb`, `.rake`, `.gemspec` | ✅ Full | Modules, classes, methods, RDoc, Rails patterns |
490
+ | HTML | `.html`, `.htm` | ✅ Full | Semantic content extraction, heading hierarchy, text chunking |
491
+ | Text/Markdown | `.txt`, `.md`, `.markdown` | ✅ Basic | Semantic chunking for documentation |
492
+
493
+ **Planned Languages:**
494
+ | Language | Status | Features |
495
+ |------------|--------|----------|
496
+ | Java | 🔄 Planned | Classes, methods, annotations |
497
+ | Go | 🔄 Planned | Functions, structs, interfaces |
498
+ | Rust | 🔄 Planned | Functions, structs, traits |
499
+
500
+ #### New Language Support
501
+
502
+ **HTML Support** (Unreleased):
503
+ - **Semantic Extraction**: Content from h1-h6, p, section, article, main, aside, nav, header, footer
504
+ - **Intelligent Chunking**: Based on heading hierarchy (h1-h6)
505
+ - **Context Preservation**: Maintains class and id attributes for searchability
506
+ - **Script/Style Filtering**: Ignores non-content elements
507
+ - **Use Cases**: Static sites, documentation, web templates, HTML fragments
508
+
509
+ **Dart/Flutter Support** (v0.4.15):
510
+ - **Widget Detection**: StatelessWidget, StatefulWidget recognition
511
+ - **State Classes**: Automatic parsing of `_WidgetNameState` patterns
512
+ - **Async Support**: Future<T> and async function handling
513
+ - **Dartdoc**: Triple-slash comment extraction
514
+ - **Tree-sitter AST**: Fast, accurate parsing with regex fallback
515
+
516
+ **PHP Support** (v0.5.0):
517
+ - **Class Detection**: Classes, interfaces, traits
518
+ - **Method Extraction**: Public, private, protected, static methods
519
+ - **Magic Methods**: __construct, __get, __set, __call, etc.
520
+ - **PHPDoc**: Full comment extraction
521
+ - **Laravel Patterns**: Controllers, Models, Eloquent support
522
+ - **Tree-sitter AST**: Fast parsing with regex fallback
523
+
524
+ **Ruby Support** (v0.5.0):
525
+ - **Module/Class Detection**: Full namespace support (::)
526
+ - **Method Extraction**: Instance and class methods
527
+ - **Special Syntax**: Method names with ?, ! support
528
+ - **Attribute Macros**: attr_accessor, attr_reader, attr_writer
529
+ - **RDoc**: Comment extraction (# and =begin...=end)
530
+ - **Rails Patterns**: ActiveRecord, Controllers support
531
+ - **Tree-sitter AST**: Fast parsing with regex fallback
532
+
533
+ ## 🤝 Contributing
534
+
535
+ We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
536
+
537
+ ### Development Setup
538
+
539
+ ```bash
540
+ # Clone the repository
541
+ git clone https://github.com/bobmatnyc/mcp-vector-search.git
542
+ cd mcp-vector-search
543
+
544
+ # Install dependencies with UV
545
+ uv sync
546
+
547
+ # Install in development mode
548
+ uv pip install -e .
549
+
550
+ # Test CLI from source (recommended during development)
551
+ ./dev-mcp version # Shows [DEV] indicator
552
+ ./dev-mcp search "test" # No reinstall needed after code changes
553
+
554
+ # Run tests
555
+ uv run pytest
556
+
557
+ # Run linting
558
+ uv run ruff check
559
+ uv run mypy src/
560
+ ```
561
+
562
+ For detailed development workflow and `dev-mcp` usage, see the [Development](#-development) section below.
563
+
564
+ ### Adding Language Support
565
+
566
+ 1. Create a new parser in `src/mcp_vector_search/parsers/`
567
+ 2. Extend the `BaseParser` class
568
+ 3. Register the parser in `parsers/registry.py`
569
+ 4. Add tests and documentation
570
+
571
+ ## 📊 Performance
572
+
573
+ - **Indexing Speed**: ~1000 files/minute (typical Python project)
574
+ - **Search Latency**: <100ms for most queries
575
+ - **Memory Usage**: ~50MB baseline + ~1MB per 1000 code chunks
576
+ - **Storage**: ~1KB per code chunk (compressed embeddings)
577
+
578
+ ## ⚠️ Known Limitations (Alpha)
579
+
580
+ - **Tree-sitter Integration**: Currently using regex fallback parsing (Tree-sitter setup needs improvement)
581
+ - **Search Relevance**: Embedding model may need tuning for code-specific queries
582
+ - **Error Handling**: Some edge cases may not be gracefully handled
583
+ - **Documentation**: API documentation is minimal
584
+ - **Testing**: Limited test coverage, needs real-world validation
585
+
586
+ ## 🙏 Feedback Needed
587
+
588
+ We're actively seeking feedback on:
589
+
590
+ - **Search Quality**: How relevant are the search results for your codebase?
591
+ - **Performance**: How does indexing and search speed feel in practice?
592
+ - **Usability**: Is the CLI interface intuitive and helpful?
593
+ - **Language Support**: Which languages would you like to see added next?
594
+ - **Features**: What functionality is missing for your workflow?
595
+
596
+ Please [open an issue](https://github.com/bobmatnyc/mcp-vector-search/issues) or start a [discussion](https://github.com/bobmatnyc/mcp-vector-search/discussions) to share your experience!
597
+
598
+ ## 🔮 Roadmap
599
+
600
+ ### v0.0.x: Alpha (Current) 🔄
601
+ - [x] Core CLI interface
602
+ - [x] Python/JS/TS parsing
603
+ - [x] ChromaDB integration
604
+ - [x] File watching
605
+ - [x] Basic search functionality
606
+ - [ ] Real-world testing and feedback
607
+ - [ ] Bug fixes and stability improvements
608
+ - [ ] Performance optimizations
609
+
610
+ ### v0.1.x: Beta 🔮
611
+ - [ ] Advanced search modes (contextual, similar code)
612
+ - [ ] Additional language support (Java, Go, Rust)
613
+ - [ ] Configuration improvements
614
+ - [ ] Comprehensive testing suite
615
+ - [ ] Documentation improvements
616
+
617
+ ### v1.0.x: Stable 🔮
618
+ - [ ] MCP server implementation
619
+ - [ ] IDE extensions (VS Code, JetBrains)
620
+ - [ ] Git integration
621
+ - [ ] Team collaboration features
622
+ - [ ] Production-ready performance
623
+
624
+ ## 🛠️ Development
625
+
626
+ ### Three-Stage Development Workflow
627
+
628
+ **Stage A: Local Development & Testing**
629
+ ```bash
630
+ # Setup development environment
631
+ uv sync && uv pip install -e .
632
+
633
+ # Run development tests
634
+ ./scripts/dev-test.sh
635
+
636
+ # Run CLI from source (recommended during development)
637
+ ./dev-mcp version # Visual [DEV] indicator
638
+ ./dev-mcp status # Any command works
639
+ ./dev-mcp search "auth" # Immediate feedback on changes
640
+
641
+ # Alternative: use uv run directly
642
+ uv run mcp-vector-search version
643
+ ```
644
+
645
+ #### Using the `dev-mcp` Development Helper
646
+
647
+ The `./dev-mcp` script provides a streamlined way to run the CLI from source code during development, eliminating the need for repeated installations.
648
+
649
+ **Key Features:**
650
+ - **Visual [DEV] Indicator**: Shows `[DEV]` prefix to distinguish from installed version
651
+ - **No Reinstall Required**: Reflects code changes immediately
652
+ - **Complete Argument Forwarding**: Works with all CLI commands and options
653
+ - **Verbose Mode**: Debug output with `--verbose` flag
654
+ - **Built-in Help**: Script usage with `--help`
655
+
656
+ **Usage Examples:**
657
+ ```bash
658
+ # Basic commands (note the [DEV] prefix in output)
659
+ ./dev-mcp version
660
+ ./dev-mcp status
661
+ ./dev-mcp index
662
+ ./dev-mcp search "authentication logic"
663
+
664
+ # With CLI options
665
+ ./dev-mcp search "error handling" --limit 10
666
+ ./dev-mcp index --force
667
+
668
+ # Script verbose mode (shows Python interpreter, paths)
669
+ ./dev-mcp --verbose search "database"
670
+
671
+ # Script help (shows dev-mcp usage, not CLI help)
672
+ ./dev-mcp --help
673
+
674
+ # CLI command help (forwards --help to the CLI)
675
+ ./dev-mcp search --help
676
+ ./dev-mcp index --help
677
+ ```
678
+
679
+ **When to Use:**
680
+ - **`./dev-mcp`** → Development workflow (runs from source code)
681
+ - **`mcp-vector-search`** → Production usage (runs installed version via pipx/pip)
682
+
683
+ **Benefits:**
684
+ - **Instant Feedback**: Changes to source code are reflected immediately
685
+ - **No Build Step**: Skip the reinstall cycle during active development
686
+ - **Clear Context**: Visual `[DEV]` indicator prevents confusion about which version is running
687
+ - **Error Handling**: Built-in checks for uv installation and project structure
688
+
689
+ **Requirements:**
690
+ - Must have `uv` installed (`pip install uv`)
691
+ - Must run from project root directory
692
+ - Requires `pyproject.toml` in current directory
693
+
694
+ **Stage B: Local Deployment Testing**
695
+ ```bash
696
+ # Build and test clean deployment
697
+ ./scripts/deploy-test.sh
698
+
699
+ # Test on other projects
700
+ cd ~/other-project
701
+ mcp-vector-search init && mcp-vector-search index
702
+ ```
703
+
704
+ **Stage C: PyPI Publication**
705
+ ```bash
706
+ # Publish to PyPI
707
+ ./scripts/publish.sh
708
+
709
+ # Verify published version
710
+ pip install mcp-vector-search --upgrade
711
+ ```
712
+
713
+ ### Quick Reference
714
+ ```bash
715
+ ./scripts/workflow.sh # Show workflow overview
716
+ ```
717
+
718
+ See [DEVELOPMENT.md](DEVELOPMENT.md) for detailed development instructions.
719
+
720
+ ## 📚 Documentation
721
+
722
+ For comprehensive documentation, see **[CLAUDE.md](CLAUDE.md)** - the main documentation index.
723
+
724
+ ### Quick Links
725
+ - **[Configuration Guide](docs/CONFIGURATION.md)** - Comprehensive configuration reference
726
+ - **[Installation & Deployment](docs/DEPLOY.md)** - Setup and deployment guide
727
+ - **[CLI Features](docs/CLI_FEATURES.md)** - Advanced CLI features and usage
728
+ - **[Project Structure](docs/STRUCTURE.md)** - Architecture and file organization
729
+ - **[Contributing Guidelines](docs/developer/CONTRIBUTING.md)** - How to contribute
730
+ - **[API Reference](docs/developer/API.md)** - Internal API documentation
731
+ - **[Testing Guide](docs/developer/TESTING.md)** - Testing strategies
732
+ - **[Code Quality](docs/developer/LINTING.md)** - Linting and formatting
733
+ - **[Versioning](docs/VERSIONING.md)** - Version management
734
+ - **[Releases](docs/RELEASES.md)** - Release process
735
+
736
+ ## 🤝 Contributing
737
+
738
+ Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
739
+
740
+ ## 📄 License
741
+
742
+ MIT License - see [LICENSE](LICENSE) file for details.
743
+
744
+ ## 🙏 Acknowledgments
745
+
746
+ - [ChromaDB](https://github.com/chroma-core/chroma) for vector database
747
+ - [Tree-sitter](https://tree-sitter.github.io/) for parsing infrastructure
748
+ - [Sentence Transformers](https://www.sbert.net/) for embeddings
749
+ - [Typer](https://typer.tiangolo.com/) for CLI framework
750
+ - [Rich](https://rich.readthedocs.io/) for beautiful terminal output
751
+
752
+ ---
753
+
754
+ **Built with ❤️ for developers who love efficient code search**