mcp-codebase-searcher 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. mcp_codebase_searcher-0.1.0/LICENSE +21 -0
  2. mcp_codebase_searcher-0.1.0/PKG-INFO +292 -0
  3. mcp_codebase_searcher-0.1.0/README.md +269 -0
  4. mcp_codebase_searcher-0.1.0/pyproject.toml +56 -0
  5. mcp_codebase_searcher-0.1.0/setup.cfg +4 -0
  6. mcp_codebase_searcher-0.1.0/src/config.py +20 -0
  7. mcp_codebase_searcher-0.1.0/src/file_scanner.py +213 -0
  8. mcp_codebase_searcher-0.1.0/src/mcp_codebase_searcher.egg-info/PKG-INFO +292 -0
  9. mcp_codebase_searcher-0.1.0/src/mcp_codebase_searcher.egg-info/SOURCES.txt +22 -0
  10. mcp_codebase_searcher-0.1.0/src/mcp_codebase_searcher.egg-info/dependency_links.txt +1 -0
  11. mcp_codebase_searcher-0.1.0/src/mcp_codebase_searcher.egg-info/entry_points.txt +2 -0
  12. mcp_codebase_searcher-0.1.0/src/mcp_codebase_searcher.egg-info/requires.txt +2 -0
  13. mcp_codebase_searcher-0.1.0/src/mcp_codebase_searcher.egg-info/top_level.txt +7 -0
  14. mcp_codebase_searcher-0.1.0/src/mcp_elaborate.py +277 -0
  15. mcp_codebase_searcher-0.1.0/src/mcp_search.py +295 -0
  16. mcp_codebase_searcher-0.1.0/src/mcp_searcher.py +217 -0
  17. mcp_codebase_searcher-0.1.0/src/output_generator.py +177 -0
  18. mcp_codebase_searcher-0.1.0/src/report_elaborator.py +203 -0
  19. mcp_codebase_searcher-0.1.0/tests/test_file_scanner.py +264 -0
  20. mcp_codebase_searcher-0.1.0/tests/test_mcp_elaborate.py +252 -0
  21. mcp_codebase_searcher-0.1.0/tests/test_mcp_search.py +340 -0
  22. mcp_codebase_searcher-0.1.0/tests/test_mcp_searcher.py +220 -0
  23. mcp_codebase_searcher-0.1.0/tests/test_output_generator.py +165 -0
  24. mcp_codebase_searcher-0.1.0/tests/test_report_elaborator.py +169 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Sakilmostak
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,292 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp_codebase_searcher
3
+ Version: 0.1.0
4
+ Summary: A Python tool to scan codebases, search for text/regex patterns, and elaborate on findings using Google Gemini.
5
+ Author-email: Sakilmostak <skmahim71@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/sakilmostak/mcp-codebase-searcher
8
+ Project-URL: Bug Tracker, https://github.com/sakilmostak/mcp-codebase-searcher/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Topic :: Software Development :: Build Tools
15
+ Classifier: Topic :: Text Processing :: Indexing
16
+ Classifier: Topic :: Utilities
17
+ Requires-Python: >=3.8
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: python-dotenv
21
+ Requires-Dist: google-generativeai
22
+ Dynamic: license-file
23
+
24
+ # MCP Codebase Searcher
25
+
26
+ MCP Codebase Searcher is a Python tool designed to scan codebases, search for text or regular expression patterns, and optionally elaborate on the findings using Google Gemini.
27
+
28
+ ## Features
29
+
30
+ * Search for exact strings or regular expression patterns.
31
+ * Case-sensitive or case-insensitive searching.
32
+ * Specify context lines to display around matches.
33
+ * Exclude specific directories and file patterns.
34
+ * Option to include/exclude hidden files and directories.
35
+ * Output results in console, JSON, or Markdown format.
36
+ * Save search results to a file.
37
+ * Elaborate on individual findings from a JSON report using Google Gemini.
38
+
39
+ ## Installation
40
+
41
+ This project uses Python 3.8+.
42
+
43
+ 1. **Clone the repository (if applicable):**
44
+ ```bash
45
+ git clone <repository_url>
46
+ cd mcp_codebase_searcher
47
+ ```
48
+
49
+ 2. **Create and activate a virtual environment:**
50
+ ```bash
51
+ python3 -m venv venv
52
+ source venv/bin/activate # On Windows use `venv\Scripts\activate`
53
+ ```
54
+
55
+ 3. **Install the package:**
56
+ Once the package is built (see Building section below), you can install it using pip:
57
+ ```bash
58
+ pip install dist/mcp_codebase_searcher-*.whl
59
+ ```
60
+ Alternatively, for development, install in editable mode from the project root:
61
+ ```bash
62
+ pip install -e .
63
+ ```
64
+
65
+ 4. **API Key (for Elaboration):**
66
+ To use the elaboration feature, you need a Google API key for Gemini. You can provide it via:
67
+ * The `--api-key` argument when using the `elaborate` command.
68
+ * A JSON configuration file specified with `--config-file` (containing `{"GOOGLE_API_KEY": "YOUR_KEY"}`).
69
+ * An environment variable `GOOGLE_API_KEY`.
70
+ * A `config.py` file in the project root (if running from source) that has a `load_api_key()` function returning the key.
71
+
72
+ The API key is sourced with the following precedence: `--api-key` argument > `--config-file` > `GOOGLE_API_KEY` environment variable > `config.py` module.
73
+
74
+ Create a `.env` file in the project root for local development if using environment variables:
75
+ ```
76
+ GOOGLE_API_KEY="YOUR_API_KEY_HERE"
77
+ ```
78
+
79
+ ## Usage
80
+
81
+ The tool provides two main commands: `search` and `elaborate`.
82
+
83
+ ### Search
84
+
85
+ ```bash
86
+ mcp-searcher search "your_query" path/to/search [--regex] [--case-sensitive] [--context LINES] [--exclude-dirs .git,node_modules] [--exclude-files *.log] [--include-hidden] [--output-format json] [--output-file results.json]
87
+ ```
88
+
89
+ **Arguments:**
90
+
91
+ * `query`: The search term or regex pattern.
92
+ * `paths`: One or more file or directory paths to search within.
93
+ * `--regex`, `-r`: Treat the `query` as a Python regular expression pattern.
94
+ * `--case-sensitive`, `-c`: Perform a case-sensitive search. By default, search is case-insensitive.
95
+ * `--context LINES`, `-C LINES`: Number of context lines to show around each match (default: 3). Set to 0 for no context.
96
+ * `--exclude-dirs PATTERNS`: Comma-separated list of directory name patterns (using `fnmatch` wildcards like `*`, `?`) to exclude (e.g., `.git,node_modules,build,*cache*`).
97
+ * `--exclude-files PATTERNS`: Comma-separated list of file name patterns (using `fnmatch` wildcards) to exclude (e.g., `*.log,*.tmp,temp_*`).
98
+ * `--include-hidden`: Include hidden files and directories (those starting with a period `.`) in the scan. By default, they are excluded unless they are explicitly provided in `paths`.
99
+ * `--output-format FORMAT`: Format for the output. Choices: `console` (default), `json`, `md` (or `markdown`).
100
+ * `--output-file FILE`: Path to save the output. If not provided, prints to the console.
101
+
102
+ **Examples:**
103
+
104
+ 1. Search for "TODO" (case-insensitive) in the `src` directory and its subdirectories, excluding `__pycache__` directories and any `.tmp` or `.log` files, and save the results as JSON:
105
+ ```bash
106
+ mcp-searcher search "TODO" src --exclude-dirs __pycache__ --exclude-files "*.tmp,*.log" --output-format json --output-file todos.json
107
+ ```
108
+
109
+ 2. Search for Python function definitions (e.g., `def my_function(`) using a regular expression in all `.py` files within the current directory (`.`) and its subdirectories:
110
+ ```bash
111
+ mcp-searcher search "^\s*def\s+\w+\s*\(.*\):" . --regex --exclude-files "!*.py" # Assumes FileScanner handles includes or user pre-filters paths if !*.py is not directly supported for exclusion.
112
+ # A better way if FileScanner doesn't support include patterns in exclude-files:
113
+ # Find .py files first, then pass to mcp-searcher, or rely on mcp-searcher scanning all and then filtering if it did.
114
+ # For this tool, it scans all non-excluded, so to search only .py, you'd typically not exclude others unless they are binaries etc.
115
+ # Corrected Example for just regex:
116
+ mcp-searcher search "^\s*def\s+\w+\s*\(.*\):" . --regex
117
+ ```
118
+ *Note: Ensure your regex is quoted correctly for your shell, especially if it contains special characters.*
119
+
120
+ 3. Perform a case-sensitive search for the exact string "ErrorLog" in all files in `/var/log`, include hidden files, and output to a Markdown file:
121
+ ```bash
122
+ mcp-searcher search "ErrorLog" /var/log --case-sensitive --include-hidden --output-format md --output-file errors_report.md
123
+ ```
124
+
125
+ ### Elaborate
126
+
127
+ ```bash
128
+ mcp-searcher elaborate --report-file path/to/report.json --finding-id INDEX [--api-key YOUR_KEY] [--config-file path/to/config.json] [--context-lines LINES]
129
+ ```
130
+
131
+ **Arguments:**
132
+
133
+ * `--report-file FILE`: (Required) Path to the JSON search report file generated by the `search` command.
134
+ * `--finding-id INDEX`: (Required) The 0-based index (ID) of the specific finding within the report file that you want to elaborate on.
135
+ * `--api-key KEY`: Your Google API key for Gemini. If provided, this takes precedence over other key sources.
136
+ * `--config-file FILE`: Path to an optional JSON configuration file containing your `GOOGLE_API_KEY` (e.g., `{"GOOGLE_API_KEY": "YOUR_KEY"}`).
137
+ * `--context-lines LINES`: Number of lines of broader context from the source file (surrounding the original snippet) to provide to the LLM for better understanding (default: 10).
138
+
139
+ **Examples:**
140
+
141
+ 1. Elaborate on the first finding (index 0) from `todos.json`, assuming the API key is set as an environment variable (`GOOGLE_API_KEY`) or in a `config.py` / `.env` file:
142
+ ```bash
143
+ mcp-searcher elaborate --report-file todos.json --finding-id 0
144
+ ```
145
+
146
+ 2. Elaborate on the third finding (index 2) from `search_results.json`, providing the API key directly and specifying 15 lines of context for the LLM:
147
+ ```bash
148
+ mcp-searcher elaborate --report-file search_results.json --finding-id 2 --api-key "AIzaSyXXXXXXXXXXXXXXXXXXX" --context-lines 15
149
+ ```
150
+
151
+ 3. Elaborate on a finding from `project_report.json`, using an API key stored in a custom configuration file named `my_gemini_config.json` located in the user's home directory:
152
+ ```bash
153
+ mcp-searcher elaborate --report-file project_report.json --finding-id 5 --config-file ~/.my_gemini_config.json
154
+ ```
155
+
156
+ ## Output Formats
157
+
158
+ The `search` command can output results in several formats using the `--output-format` option:
159
+
160
+ * **`console` (default):** Prints results directly to the terminal in a human-readable format. Each match includes the file path, line number, and the line containing the match with the matched text highlighted (e.g., `>>>matched text<<<`). Context lines, if requested, are shown above and below the match line.
161
+
162
+ *Example Console Output (simplified):*
163
+ ```text
164
+ path/to/your/file.py:42
165
+ Context line 1 before match
166
+ >>>The line with the matched text<<<
167
+ Context line 1 after match
168
+ ---
169
+ another/file.txt:101
170
+ Just the >>>matched line<<< if no context
171
+ ---
172
+ ```
173
+
174
+ * **`json`:** Outputs results as a JSON array. Each object in the array represents a single match and contains the following fields:
175
+ * `file_path`: Absolute path to the file containing the match.
176
+ * `line_number`: The 1-based line number where the match occurred.
177
+ * `match_text`: The actual text that was matched.
178
+ * `snippet`: A string containing the line with the match and any surrounding context lines requested. The matched text within the snippet is highlighted with `>>> <<<`.
179
+ * `char_start_in_line`: The 0-based starting character offset of the match within its line.
180
+ * `char_end_in_line`: The 0-based ending character offset of the match within its line.
181
+
182
+ *Example JSON Output (for one match):*
183
+ ```json
184
+ [
185
+ {
186
+ "file_path": "/path/to/your/file.py",
187
+ "line_number": 42,
188
+ "match_text": "matched text",
189
+ "snippet": " Context line 1 before match\n >>>The line with the matched text<<<\n Context line 1 after match",
190
+ "char_start_in_line": 25,
191
+ "char_end_in_line": 37
192
+ }
193
+ // ... more matches ...
194
+ ]
195
+ ```
196
+ This format is ideal for programmatic processing and is required as input for the `elaborate` command.
197
+
198
+ * **`md` or `markdown`:** Outputs results in Markdown format. Each match is typically presented with the file path as a heading or bolded, followed by the line number and the snippet (often as a preformatted text block).
199
+
200
+ *Example Markdown Output (simplified):*
201
+ ```markdown
202
+ **path/to/your/file.py:42**
203
+ ```text
204
+ Context line 1 before match
205
+ >>>The line with the matched text<<<
206
+ Context line 1 after match
207
+ ```
208
+ ---
209
+ **another/file.txt:101**
210
+ ```text
211
+ Just the >>>matched line<<< if no context
212
+ ```
213
+ ```
214
+ This format is suitable for generating reports or for easy pasting into documents that support Markdown.
215
+
216
+ ## Building
217
+
218
+ To build the package (wheel and source distribution):
219
+
220
+ 1. Ensure you have the necessary build tools:
221
+ ```bash
222
+ pip install build
223
+ ```
224
+ 2. Run the build command from the project root:
225
+ ```bash
226
+ python -m build
227
+ ```
228
+ This will create `sdist` and `wheel` files in a `dist/` directory.
229
+
230
+ ## Running Tests
231
+
232
+ 1. Ensure test dependencies are installed (if any beyond main dependencies).
233
+ 2. Run tests using unittest discovery from the project root:
234
+ ```bash
235
+ python -m unittest discover -s tests
236
+ ```
237
+
238
+ ## Contributing
239
+
240
+ Contributions are welcome! Please open an issue or submit a pull request.
241
+
242
+ ## License
243
+
244
+ This project is licensed under the MIT License - see the LICENSE file for details.
245
+
246
+ ## Troubleshooting
247
+
248
+ Here are some common issues and how to resolve them:
249
+
250
+ * **Command not found (`mcp-searcher: command not found`):**
251
+ * Ensure you have activated the virtual environment where the package was installed: `source venv/bin/activate` (or `venv\Scripts\activate` on Windows).
252
+ * If installed in editable mode (`pip install -e .`), ensure you are in the project root or that the project root is in your `PYTHONPATH`.
253
+ * If installed via wheel, ensure the virtual environment's `bin` (or `Scripts`) directory is in your system's `PATH`.
254
+
255
+ * **ModuleNotFoundError (e.g., `No module named 'google_generativeai'`):**
256
+ * Make sure all dependencies are installed correctly within your active virtual environment. Try reinstalling: `pip install --force-reinstall -r requirements.txt` (if you have one from source) or `pip install --force-reinstall mcp-codebase-searcher` (if from wheel, though direct wheel reinstallation might be `pip install --force-reinstall dist/mcp_codebase_searcher-*.whl`). For an installed package, dependencies should be handled automatically.
257
+ * Ensure you are using the Python interpreter from your activated virtual environment.
258
+
259
+ * **API Key Errors (for `elaborate` command):**
260
+ * **"Could not initialize GenerativeModel... API key not found."**: This means the Google API key was not found through any of the supported methods (argument, config file, environment variable, `config.py`). Double-check the [API Key section under Installation](#api-key-for-elaboration).
261
+ * **"Could not initialize GenerativeModel... Invalid API key."**: The key was found but is incorrect or unauthorized for the Gemini API.
262
+ * Ensure your `.env` file (if used) is in the correct location (project root if running from source) and correctly formatted (`GOOGLE_API_KEY="YOUR_KEY"`).
263
+ * Verify that the environment variable `GOOGLE_API_KEY` is set and exported in your current shell session if not using an `.env` file with `python-dotenv` support.
264
+
265
+ * **File/Directory Not Found (for `search` or `elaborate --report-file`):**
266
+ * Double-check that the paths provided to the `search` command or the `--report-file` argument are correct and accessible.
267
+ * Relative paths are resolved from the current working directory where you run the command.
268
+
269
+ * **Permission Denied Errors:**
270
+ * Ensure you have read permissions for the files/directories you are trying to search, and write permissions if using `--output-file` to a restricted location.
271
+
272
+ * **Invalid Regular Expression (for `search --regex`):**
273
+ * The tool will output an error if the regex pattern is invalid. Test your regex pattern with online tools or Python's `re` module separately.
274
+ * Remember to quote your regex pattern properly in the shell, especially if it contains special characters like `*`, `(`, `)`, `|`, etc. Single quotes (`'pattern'`) are often safer than double quotes in bash/zsh for complex patterns.
275
+
276
+ * **No Matches Found:**
277
+ * Verify your query term or regex pattern. Try a simpler, broader query first.
278
+ * Check your `--case-sensitive` flag. Search is case-insensitive by default.
279
+ * Review your exclusion patterns (`--exclude-dirs`, `--exclude-files`). You might be unintentionally excluding the files containing matches.
280
+ * Ensure the target files are not binary or are of a type the tool can read (primarily text-based).
281
+ * If searching hidden files, ensure `--include-hidden` is used.
282
+
283
+ * **Incorrect JSON in Report File (for `elaborate` command):**
284
+ * The `elaborate` command expects a JSON file in the format produced by `mcp-searcher search --output-format json`. If the file is malformed or not a valid JSON array of search results, elaboration will fail.
285
+ * Error messages like "Could not decode JSON from report file" or "Finding ID ... is out of range" point to issues with the report file or the provided ID.
286
+
287
+ * **Shell Quoting Issues for Query:**
288
+ * If your search query contains spaces or special shell characters (e.g., `!`, `*`, `$`, `&`), ensure it's properly quoted. Single quotes (`'your query'`) are generally safest to prevent shell expansion.
289
+ ```bash
290
+ mcp-searcher search 'my exact phrase with spaces!' .
291
+ mcp-searcher search 'pattern_with_$(dollar_sign_and_parens)' . --regex
292
+ ```
@@ -0,0 +1,269 @@
1
+ # MCP Codebase Searcher
2
+
3
+ MCP Codebase Searcher is a Python tool designed to scan codebases, search for text or regular expression patterns, and optionally elaborate on the findings using Google Gemini.
4
+
5
+ ## Features
6
+
7
+ * Search for exact strings or regular expression patterns.
8
+ * Case-sensitive or case-insensitive searching.
9
+ * Specify context lines to display around matches.
10
+ * Exclude specific directories and file patterns.
11
+ * Option to include/exclude hidden files and directories.
12
+ * Output results in console, JSON, or Markdown format.
13
+ * Save search results to a file.
14
+ * Elaborate on individual findings from a JSON report using Google Gemini.
15
+
16
+ ## Installation
17
+
18
+ This project uses Python 3.8+.
19
+
20
+ 1. **Clone the repository (if applicable):**
21
+ ```bash
22
+ git clone <repository_url>
23
+ cd mcp_codebase_searcher
24
+ ```
25
+
26
+ 2. **Create and activate a virtual environment:**
27
+ ```bash
28
+ python3 -m venv venv
29
+ source venv/bin/activate # On Windows use `venv\Scripts\activate`
30
+ ```
31
+
32
+ 3. **Install the package:**
33
+ Once the package is built (see Building section below), you can install it using pip:
34
+ ```bash
35
+ pip install dist/mcp_codebase_searcher-*.whl
36
+ ```
37
+ Alternatively, for development, install in editable mode from the project root:
38
+ ```bash
39
+ pip install -e .
40
+ ```
41
+
42
+ 4. **API Key (for Elaboration):**
43
+ To use the elaboration feature, you need a Google API key for Gemini. You can provide it via:
44
+ * The `--api-key` argument when using the `elaborate` command.
45
+ * A JSON configuration file specified with `--config-file` (containing `{"GOOGLE_API_KEY": "YOUR_KEY"}`).
46
+ * An environment variable `GOOGLE_API_KEY`.
47
+ * A `config.py` file in the project root (if running from source) that has a `load_api_key()` function returning the key.
48
+
49
+ The API key is sourced with the following precedence: `--api-key` argument > `--config-file` > `GOOGLE_API_KEY` environment variable > `config.py` module.
50
+
51
+ Create a `.env` file in the project root for local development if using environment variables:
52
+ ```
53
+ GOOGLE_API_KEY="YOUR_API_KEY_HERE"
54
+ ```
55
+
56
+ ## Usage
57
+
58
+ The tool provides two main commands: `search` and `elaborate`.
59
+
60
+ ### Search
61
+
62
+ ```bash
63
+ mcp-searcher search "your_query" path/to/search [--regex] [--case-sensitive] [--context LINES] [--exclude-dirs .git,node_modules] [--exclude-files *.log] [--include-hidden] [--output-format json] [--output-file results.json]
64
+ ```
65
+
66
+ **Arguments:**
67
+
68
+ * `query`: The search term or regex pattern.
69
+ * `paths`: One or more file or directory paths to search within.
70
+ * `--regex`, `-r`: Treat the `query` as a Python regular expression pattern.
71
+ * `--case-sensitive`, `-c`: Perform a case-sensitive search. By default, search is case-insensitive.
72
+ * `--context LINES`, `-C LINES`: Number of context lines to show around each match (default: 3). Set to 0 for no context.
73
+ * `--exclude-dirs PATTERNS`: Comma-separated list of directory name patterns (using `fnmatch` wildcards like `*`, `?`) to exclude (e.g., `.git,node_modules,build,*cache*`).
74
+ * `--exclude-files PATTERNS`: Comma-separated list of file name patterns (using `fnmatch` wildcards) to exclude (e.g., `*.log,*.tmp,temp_*`).
75
+ * `--include-hidden`: Include hidden files and directories (those starting with a period `.`) in the scan. By default, they are excluded unless they are explicitly provided in `paths`.
76
+ * `--output-format FORMAT`: Format for the output. Choices: `console` (default), `json`, `md` (or `markdown`).
77
+ * `--output-file FILE`: Path to save the output. If not provided, prints to the console.
78
+
79
+ **Examples:**
80
+
81
+ 1. Search for "TODO" (case-insensitive) in the `src` directory and its subdirectories, excluding `__pycache__` directories and any `.tmp` or `.log` files, and save the results as JSON:
82
+ ```bash
83
+ mcp-searcher search "TODO" src --exclude-dirs __pycache__ --exclude-files "*.tmp,*.log" --output-format json --output-file todos.json
84
+ ```
85
+
86
+ 2. Search for Python function definitions (e.g., `def my_function(`) using a regular expression in all `.py` files within the current directory (`.`) and its subdirectories:
87
+ ```bash
88
+ mcp-searcher search "^\s*def\s+\w+\s*\(.*\):" . --regex --exclude-files "!*.py" # Assumes FileScanner handles includes or user pre-filters paths if !*.py is not directly supported for exclusion.
89
+ # A better way if FileScanner doesn't support include patterns in exclude-files:
90
+ # Find .py files first, then pass to mcp-searcher, or rely on mcp-searcher scanning all and then filtering if it did.
91
+ # For this tool, it scans all non-excluded, so to search only .py, you'd typically not exclude others unless they are binaries etc.
92
+ # Corrected Example for just regex:
93
+ mcp-searcher search "^\s*def\s+\w+\s*\(.*\):" . --regex
94
+ ```
95
+ *Note: Ensure your regex is quoted correctly for your shell, especially if it contains special characters.*
96
+
97
+ 3. Perform a case-sensitive search for the exact string "ErrorLog" in all files in `/var/log`, include hidden files, and output to a Markdown file:
98
+ ```bash
99
+ mcp-searcher search "ErrorLog" /var/log --case-sensitive --include-hidden --output-format md --output-file errors_report.md
100
+ ```
101
+
102
+ ### Elaborate
103
+
104
+ ```bash
105
+ mcp-searcher elaborate --report-file path/to/report.json --finding-id INDEX [--api-key YOUR_KEY] [--config-file path/to/config.json] [--context-lines LINES]
106
+ ```
107
+
108
+ **Arguments:**
109
+
110
+ * `--report-file FILE`: (Required) Path to the JSON search report file generated by the `search` command.
111
+ * `--finding-id INDEX`: (Required) The 0-based index (ID) of the specific finding within the report file that you want to elaborate on.
112
+ * `--api-key KEY`: Your Google API key for Gemini. If provided, this takes precedence over other key sources.
113
+ * `--config-file FILE`: Path to an optional JSON configuration file containing your `GOOGLE_API_KEY` (e.g., `{"GOOGLE_API_KEY": "YOUR_KEY"}`).
114
+ * `--context-lines LINES`: Number of lines of broader context from the source file (surrounding the original snippet) to provide to the LLM for better understanding (default: 10).
115
+
116
+ **Examples:**
117
+
118
+ 1. Elaborate on the first finding (index 0) from `todos.json`, assuming the API key is set as an environment variable (`GOOGLE_API_KEY`) or in a `config.py` / `.env` file:
119
+ ```bash
120
+ mcp-searcher elaborate --report-file todos.json --finding-id 0
121
+ ```
122
+
123
+ 2. Elaborate on the third finding (index 2) from `search_results.json`, providing the API key directly and specifying 15 lines of context for the LLM:
124
+ ```bash
125
+ mcp-searcher elaborate --report-file search_results.json --finding-id 2 --api-key "AIzaSyXXXXXXXXXXXXXXXXXXX" --context-lines 15
126
+ ```
127
+
128
+ 3. Elaborate on a finding from `project_report.json`, using an API key stored in a custom configuration file named `my_gemini_config.json` located in the user's home directory:
129
+ ```bash
130
+ mcp-searcher elaborate --report-file project_report.json --finding-id 5 --config-file ~/.my_gemini_config.json
131
+ ```
132
+
133
+ ## Output Formats
134
+
135
+ The `search` command can output results in several formats using the `--output-format` option:
136
+
137
+ * **`console` (default):** Prints results directly to the terminal in a human-readable format. Each match includes the file path, line number, and the line containing the match with the matched text highlighted (e.g., `>>>matched text<<<`). Context lines, if requested, are shown above and below the match line.
138
+
139
+ *Example Console Output (simplified):*
140
+ ```text
141
+ path/to/your/file.py:42
142
+ Context line 1 before match
143
+ >>>The line with the matched text<<<
144
+ Context line 1 after match
145
+ ---
146
+ another/file.txt:101
147
+ Just the >>>matched line<<< if no context
148
+ ---
149
+ ```
150
+
151
+ * **`json`:** Outputs results as a JSON array. Each object in the array represents a single match and contains the following fields:
152
+ * `file_path`: Absolute path to the file containing the match.
153
+ * `line_number`: The 1-based line number where the match occurred.
154
+ * `match_text`: The actual text that was matched.
155
+ * `snippet`: A string containing the line with the match and any surrounding context lines requested. The matched text within the snippet is highlighted with `>>> <<<`.
156
+ * `char_start_in_line`: The 0-based starting character offset of the match within its line.
157
+ * `char_end_in_line`: The 0-based ending character offset of the match within its line.
158
+
159
+ *Example JSON Output (for one match):*
160
+ ```json
161
+ [
162
+ {
163
+ "file_path": "/path/to/your/file.py",
164
+ "line_number": 42,
165
+ "match_text": "matched text",
166
+ "snippet": " Context line 1 before match\n >>>The line with the matched text<<<\n Context line 1 after match",
167
+ "char_start_in_line": 25,
168
+ "char_end_in_line": 37
169
+ }
170
+ // ... more matches ...
171
+ ]
172
+ ```
173
+ This format is ideal for programmatic processing and is required as input for the `elaborate` command.
174
+
175
+ * **`md` or `markdown`:** Outputs results in Markdown format. Each match is typically presented with the file path as a heading or bolded, followed by the line number and the snippet (often as a preformatted text block).
176
+
177
+ *Example Markdown Output (simplified):*
178
+ ```markdown
179
+ **path/to/your/file.py:42**
180
+ ```text
181
+ Context line 1 before match
182
+ >>>The line with the matched text<<<
183
+ Context line 1 after match
184
+ ```
185
+ ---
186
+ **another/file.txt:101**
187
+ ```text
188
+ Just the >>>matched line<<< if no context
189
+ ```
190
+ ```
191
+ This format is suitable for generating reports or for easy pasting into documents that support Markdown.
192
+
193
+ ## Building
194
+
195
+ To build the package (wheel and source distribution):
196
+
197
+ 1. Ensure you have the necessary build tools:
198
+ ```bash
199
+ pip install build
200
+ ```
201
+ 2. Run the build command from the project root:
202
+ ```bash
203
+ python -m build
204
+ ```
205
+ This will create `sdist` and `wheel` files in a `dist/` directory.
206
+
207
+ ## Running Tests
208
+
209
+ 1. Ensure test dependencies are installed (if any beyond main dependencies).
210
+ 2. Run tests using unittest discovery from the project root:
211
+ ```bash
212
+ python -m unittest discover -s tests
213
+ ```
214
+
215
+ ## Contributing
216
+
217
+ Contributions are welcome! Please open an issue or submit a pull request.
218
+
219
+ ## License
220
+
221
+ This project is licensed under the MIT License - see the LICENSE file for details.
222
+
223
+ ## Troubleshooting
224
+
225
+ Here are some common issues and how to resolve them:
226
+
227
+ * **Command not found (`mcp-searcher: command not found`):**
228
+ * Ensure you have activated the virtual environment where the package was installed: `source venv/bin/activate` (or `venv\Scripts\activate` on Windows).
229
+ * If installed in editable mode (`pip install -e .`), ensure you are in the project root or that the project root is in your `PYTHONPATH`.
230
+ * If installed via wheel, ensure the virtual environment's `bin` (or `Scripts`) directory is in your system's `PATH`.
231
+
232
+ * **ModuleNotFoundError (e.g., `No module named 'google_generativeai'`):**
233
+ * Make sure all dependencies are installed correctly within your active virtual environment. Try reinstalling: `pip install --force-reinstall -r requirements.txt` (if you have one from source) or `pip install --force-reinstall mcp-codebase-searcher` (if from wheel, though direct wheel reinstallation might be `pip install --force-reinstall dist/mcp_codebase_searcher-*.whl`). For an installed package, dependencies should be handled automatically.
234
+ * Ensure you are using the Python interpreter from your activated virtual environment.
235
+
236
+ * **API Key Errors (for `elaborate` command):**
237
+ * **"Could not initialize GenerativeModel... API key not found."**: This means the Google API key was not found through any of the supported methods (argument, config file, environment variable, `config.py`). Double-check the [API Key section under Installation](#api-key-for-elaboration).
238
+ * **"Could not initialize GenerativeModel... Invalid API key."**: The key was found but is incorrect or unauthorized for the Gemini API.
239
+ * Ensure your `.env` file (if used) is in the correct location (project root if running from source) and correctly formatted (`GOOGLE_API_KEY="YOUR_KEY"`).
240
+ * Verify that the environment variable `GOOGLE_API_KEY` is set and exported in your current shell session if not using an `.env` file with `python-dotenv` support.
241
+
242
+ * **File/Directory Not Found (for `search` or `elaborate --report-file`):**
243
+ * Double-check that the paths provided to the `search` command or the `--report-file` argument are correct and accessible.
244
+ * Relative paths are resolved from the current working directory where you run the command.
245
+
246
+ * **Permission Denied Errors:**
247
+ * Ensure you have read permissions for the files/directories you are trying to search, and write permissions if using `--output-file` to a restricted location.
248
+
249
+ * **Invalid Regular Expression (for `search --regex`):**
250
+ * The tool will output an error if the regex pattern is invalid. Test your regex pattern with online tools or Python's `re` module separately.
251
+ * Remember to quote your regex pattern properly in the shell, especially if it contains special characters like `*`, `(`, `)`, `|`, etc. Single quotes (`'pattern'`) are often safer than double quotes in bash/zsh for complex patterns.
252
+
253
+ * **No Matches Found:**
254
+ * Verify your query term or regex pattern. Try a simpler, broader query first.
255
+ * Check your `--case-sensitive` flag. Search is case-insensitive by default.
256
+ * Review your exclusion patterns (`--exclude-dirs`, `--exclude-files`). You might be unintentionally excluding the files containing matches.
257
+ * Ensure the target files are not binary or are of a type the tool can read (primarily text-based).
258
+ * If searching hidden files, ensure `--include-hidden` is used.
259
+
260
+ * **Incorrect JSON in Report File (for `elaborate` command):**
261
+ * The `elaborate` command expects a JSON file in the format produced by `mcp-searcher search --output-format json`. If the file is malformed or not a valid JSON array of search results, elaboration will fail.
262
+ * Error messages like "Could not decode JSON from report file" or "Finding ID ... is out of range" point to issues with the report file or the provided ID.
263
+
264
+ * **Shell Quoting Issues for Query:**
265
+ * If your search query contains spaces or special shell characters (e.g., `!`, `*`, `$`, `&`), ensure it's properly quoted. Single quotes (`'your query'`) are generally safest to prevent shell expansion.
266
+ ```bash
267
+ mcp-searcher search 'my exact phrase with spaces!' .
268
+ mcp-searcher search 'pattern_with_$(dollar_sign_and_parens)' . --regex
269
+ ```
@@ -0,0 +1,56 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "mcp_codebase_searcher"
7
+ version = "0.1.0"
8
+ authors = [
9
+ { name="Sakilmostak", email="skmahim71@gmail.com" },
10
+ ]
11
+ description = "A Python tool to scan codebases, search for text/regex patterns, and elaborate on findings using Google Gemini."
12
+ readme = "README.md"
13
+ requires-python = ">=3.8"
14
+ license = "MIT"
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Operating System :: OS Independent",
18
+ "Development Status :: 3 - Alpha",
19
+ "Environment :: Console",
20
+ "Intended Audience :: Developers",
21
+ "Topic :: Software Development :: Build Tools",
22
+ "Topic :: Text Processing :: Indexing",
23
+ "Topic :: Utilities",
24
+ ]
25
+
26
+ # Dependencies will be added in the next step
27
+ dependencies = [
28
+ "python-dotenv",
29
+ "google-generativeai",
30
+ ]
31
+
32
+ # py-modules IS REMOVED FROM HERE
33
+
34
+ [project.scripts]
35
+ # Entry points will be defined in a later subtask
36
+ mcp-searcher = "mcp_searcher:main"
37
+
38
+ [project.urls]
39
+ "Homepage" = "https://github.com/sakilmostak/mcp-codebase-searcher" # Placeholder
40
+ "Bug Tracker" = "https://github.com/sakilmostak/mcp-codebase-searcher/issues" # Placeholder
41
+
42
+ [tool.setuptools]
43
+ package-dir = {"" = "src"}
44
+ py-modules = [
45
+ "mcp_searcher",
46
+ "config",
47
+ "file_scanner",
48
+ "mcp_elaborate",
49
+ "mcp_search",
50
+ "output_generator",
51
+ "report_elaborator"
52
+ ]
53
+ # Using py-modules explicitly, so find is not strictly needed but can be kept for safety/explicitness
54
+ [tool.setuptools.packages.find]
55
+ where = ["src"]
56
+ exclude = ["tests", "tests.*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,20 @@
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ def load_api_key():
5
+ """Loads the Google API key from the .env file."""
6
+ load_dotenv() # Load environment variables from .env file
7
+ api_key = os.getenv("GOOGLE_API_KEY")
8
+ if not api_key:
9
+ print("Warning: GOOGLE_API_KEY not found in .env file or environment variables.")
10
+ # Depending on strictness, you might want to raise an error here
11
+ # raise ValueError("GOOGLE_API_KEY not found. Please ensure it is set in your .env file.")
12
+ return api_key
13
+
14
+ if __name__ == '__main__':
15
+ # Example usage:
16
+ key = load_api_key()
17
+ if key:
18
+ print(f"Successfully loaded API key (first 5 chars): {key[:5]}...")
19
+ else:
20
+ print("API key not loaded. Please check your .env file for GOOGLE_API_KEY.")