awslabs.git-repo-research-mcp-server 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- awslabs/__init__.py +12 -0
- awslabs/git_repo_research_mcp_server/__init__.py +13 -0
- awslabs/git_repo_research_mcp_server/defaults.py +347 -0
- awslabs/git_repo_research_mcp_server/embeddings.py +66 -0
- awslabs/git_repo_research_mcp_server/github_search.py +471 -0
- awslabs/git_repo_research_mcp_server/indexer.py +860 -0
- awslabs/git_repo_research_mcp_server/models.py +291 -0
- awslabs/git_repo_research_mcp_server/repository.py +321 -0
- awslabs/git_repo_research_mcp_server/search.py +350 -0
- awslabs/git_repo_research_mcp_server/server.py +914 -0
- awslabs/git_repo_research_mcp_server/utils.py +396 -0
- awslabs_git_repo_research_mcp_server-0.0.1.dist-info/METADATA +190 -0
- awslabs_git_repo_research_mcp_server-0.0.1.dist-info/RECORD +17 -0
- awslabs_git_repo_research_mcp_server-0.0.1.dist-info/WHEEL +4 -0
- awslabs_git_repo_research_mcp_server-0.0.1.dist-info/entry_points.txt +2 -0
- awslabs_git_repo_research_mcp_server-0.0.1.dist-info/licenses/LICENSE +175 -0
- awslabs_git_repo_research_mcp_server-0.0.1.dist-info/licenses/NOTICE +2 -0
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
|
|
4
|
+
# with the License. A copy of the License is located at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
|
|
9
|
+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
|
|
10
|
+
# and limitations under the License.
|
|
11
|
+
"""Utility functions for Git Repository Research MCP Server.
|
|
12
|
+
|
|
13
|
+
This module provides utility functions for the Git Repository Research MCP Server.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
import shutil
|
|
19
|
+
from awslabs.git_repo_research_mcp_server.defaults import Constants
|
|
20
|
+
from awslabs.git_repo_research_mcp_server.models import (
|
|
21
|
+
DetailedIndexedRepositoriesResponse,
|
|
22
|
+
DetailedIndexedRepositoryInfo,
|
|
23
|
+
IndexedRepositoriesResponse,
|
|
24
|
+
IndexedRepositoryInfo,
|
|
25
|
+
IndexMetadata,
|
|
26
|
+
)
|
|
27
|
+
from datetime import datetime
|
|
28
|
+
from loguru import logger
|
|
29
|
+
from typing import Dict, List, Optional, Union
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_default_index_dir() -> str:
|
|
33
|
+
"""Get the default index directory.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Path to the default index directory
|
|
37
|
+
"""
|
|
38
|
+
default_dir = os.path.expanduser(f'~/{Constants.DEFAULT_INDEX_DIR}')
|
|
39
|
+
os.makedirs(default_dir, exist_ok=True)
|
|
40
|
+
return default_dir
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def load_metadata(metadata_path: str) -> Optional[IndexMetadata]:
|
|
44
|
+
"""Load metadata from a file.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
metadata_path: Path to the metadata file
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
IndexMetadata object if the file exists and is valid, None otherwise
|
|
51
|
+
"""
|
|
52
|
+
if not os.path.exists(metadata_path):
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
with open(metadata_path, 'r') as f:
|
|
57
|
+
metadata_dict = json.load(f)
|
|
58
|
+
return IndexMetadata(**metadata_dict)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
logger.error(f'Error loading metadata from {metadata_path}: {e}')
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def list_indexed_repositories(
|
|
65
|
+
index_dir: Optional[str] = None, detailed: bool = False
|
|
66
|
+
) -> Union[IndexedRepositoriesResponse, DetailedIndexedRepositoriesResponse]:
|
|
67
|
+
"""List all indexed repositories.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
index_dir: Directory to look for indices (optional, uses default if not provided)
|
|
71
|
+
detailed: Whether to return detailed information about each index
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
IndexedRepositoriesResponse or DetailedIndexedRepositoriesResponse object
|
|
75
|
+
"""
|
|
76
|
+
index_dir = index_dir or get_default_index_dir()
|
|
77
|
+
if not os.path.exists(index_dir):
|
|
78
|
+
if detailed:
|
|
79
|
+
return DetailedIndexedRepositoriesResponse(
|
|
80
|
+
repositories=[],
|
|
81
|
+
total_count=0,
|
|
82
|
+
index_directory=index_dir,
|
|
83
|
+
total_index_size_bytes=0,
|
|
84
|
+
)
|
|
85
|
+
else:
|
|
86
|
+
return IndexedRepositoriesResponse(
|
|
87
|
+
repositories=[],
|
|
88
|
+
total_count=0,
|
|
89
|
+
index_directory=index_dir,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
repositories = []
|
|
93
|
+
total_index_size = 0
|
|
94
|
+
|
|
95
|
+
# Look for repository directories in the index directory
|
|
96
|
+
for dirname in os.listdir(index_dir):
|
|
97
|
+
dir_path = os.path.join(index_dir, dirname)
|
|
98
|
+
if os.path.isdir(dir_path):
|
|
99
|
+
# Check if this directory contains a metadata.json file
|
|
100
|
+
metadata_path = os.path.join(dir_path, 'metadata.json')
|
|
101
|
+
if os.path.exists(metadata_path):
|
|
102
|
+
metadata = load_metadata(metadata_path)
|
|
103
|
+
if metadata is None:
|
|
104
|
+
continue
|
|
105
|
+
else:
|
|
106
|
+
continue # Skip directories without metadata.json
|
|
107
|
+
|
|
108
|
+
# Check if repository directory exists
|
|
109
|
+
repo_files_path = os.path.join(metadata.index_path, 'repository')
|
|
110
|
+
repository_directory = None
|
|
111
|
+
if os.path.exists(repo_files_path) and os.path.isdir(repo_files_path):
|
|
112
|
+
repository_directory = repo_files_path
|
|
113
|
+
|
|
114
|
+
# At this point, metadata is guaranteed to be not None
|
|
115
|
+
if detailed:
|
|
116
|
+
# Create a detailed repository info object
|
|
117
|
+
repo_info = DetailedIndexedRepositoryInfo(
|
|
118
|
+
repository_name=metadata.repository_name,
|
|
119
|
+
repository_path=metadata.repository_path,
|
|
120
|
+
index_path=metadata.index_path,
|
|
121
|
+
repository_directory=repository_directory,
|
|
122
|
+
created_at=metadata.created_at,
|
|
123
|
+
last_accessed=metadata.last_accessed,
|
|
124
|
+
file_count=metadata.file_count,
|
|
125
|
+
embedding_model=metadata.embedding_model,
|
|
126
|
+
chunk_count=metadata.chunk_count,
|
|
127
|
+
file_types=metadata.file_types,
|
|
128
|
+
total_tokens=metadata.total_tokens,
|
|
129
|
+
index_size_bytes=metadata.index_size_bytes,
|
|
130
|
+
last_commit_id=metadata.last_commit_id,
|
|
131
|
+
)
|
|
132
|
+
if metadata.index_size_bytes:
|
|
133
|
+
total_index_size += metadata.index_size_bytes
|
|
134
|
+
else:
|
|
135
|
+
# Create a basic repository info object
|
|
136
|
+
repo_info = IndexedRepositoryInfo(
|
|
137
|
+
repository_name=metadata.repository_name,
|
|
138
|
+
repository_path=metadata.repository_path,
|
|
139
|
+
index_path=metadata.index_path,
|
|
140
|
+
repository_directory=repository_directory,
|
|
141
|
+
created_at=metadata.created_at,
|
|
142
|
+
last_accessed=metadata.last_accessed,
|
|
143
|
+
file_count=metadata.file_count,
|
|
144
|
+
embedding_model=metadata.embedding_model,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
repositories.append(repo_info)
|
|
148
|
+
|
|
149
|
+
if detailed:
|
|
150
|
+
return DetailedIndexedRepositoriesResponse(
|
|
151
|
+
repositories=repositories,
|
|
152
|
+
total_count=len(repositories),
|
|
153
|
+
index_directory=index_dir,
|
|
154
|
+
total_index_size_bytes=total_index_size,
|
|
155
|
+
)
|
|
156
|
+
else:
|
|
157
|
+
return IndexedRepositoriesResponse(
|
|
158
|
+
repositories=repositories,
|
|
159
|
+
total_count=len(repositories),
|
|
160
|
+
index_directory=index_dir,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class DateTimeEncoder(json.JSONEncoder):
|
|
165
|
+
"""Custom JSON encoder to handle datetime objects.
|
|
166
|
+
|
|
167
|
+
This encoder converts datetime objects to ISO format strings during JSON serialization.
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
def default(self, o):
|
|
171
|
+
"""Convert datetime objects to ISO format strings.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
o: Object to convert
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
ISO format string if object is a datetime, otherwise default serialization
|
|
178
|
+
"""
|
|
179
|
+
if isinstance(o, datetime):
|
|
180
|
+
return o.isoformat()
|
|
181
|
+
return super().default(o)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def format_size(size_bytes: int) -> str:
|
|
185
|
+
"""Format a size in bytes to a human-readable string.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
size_bytes: Size in bytes
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Human-readable string
|
|
192
|
+
"""
|
|
193
|
+
if size_bytes < 1024:
|
|
194
|
+
return f'{size_bytes} B'
|
|
195
|
+
elif size_bytes < 1024 * 1024:
|
|
196
|
+
return f'{size_bytes / 1024:.2f} KB'
|
|
197
|
+
elif size_bytes < 1024 * 1024 * 1024:
|
|
198
|
+
return f'{size_bytes / (1024 * 1024):.2f} MB'
|
|
199
|
+
else:
|
|
200
|
+
return f'{size_bytes / (1024 * 1024 * 1024):.2f} GB'
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
async def delete_indexed_repository(
|
|
204
|
+
repository_name_or_path: str, index_dir: Optional[str] = None
|
|
205
|
+
) -> Dict[str, Union[str, List[str]]]:
|
|
206
|
+
"""Delete an indexed repository.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
repository_name_or_path: Name of the repository or path to the index
|
|
210
|
+
index_dir: Directory to look for indices (optional, uses default if not provided)
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Dictionary with status and message
|
|
214
|
+
"""
|
|
215
|
+
index_dir = index_dir or get_default_index_dir()
|
|
216
|
+
if not os.path.exists(index_dir):
|
|
217
|
+
return {
|
|
218
|
+
'status': 'error',
|
|
219
|
+
'message': f'Index directory {index_dir} does not exist',
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
# Check if the input is a repository name or an index path
|
|
223
|
+
if os.path.isabs(repository_name_or_path) and os.path.exists(repository_name_or_path):
|
|
224
|
+
# It's an index path
|
|
225
|
+
index_path = repository_name_or_path
|
|
226
|
+
metadata_path = os.path.join(index_path, 'metadata.json')
|
|
227
|
+
else:
|
|
228
|
+
# It's a repository name, find the corresponding index directory
|
|
229
|
+
repository_name = repository_name_or_path
|
|
230
|
+
# Sanitize the repository name for use in a directory name
|
|
231
|
+
safe_name = ''.join(c if c.isalnum() or c in '-_' else '_' for c in repository_name)
|
|
232
|
+
index_path = os.path.join(index_dir, safe_name)
|
|
233
|
+
metadata_path = os.path.join(index_path, 'metadata.json')
|
|
234
|
+
|
|
235
|
+
if not os.path.exists(index_path) or not os.path.exists(metadata_path):
|
|
236
|
+
# Try to find the repository by checking metadata in all subdirectories
|
|
237
|
+
found = False
|
|
238
|
+
for dirname in os.listdir(index_dir):
|
|
239
|
+
dir_path = os.path.join(index_dir, dirname)
|
|
240
|
+
if os.path.isdir(dir_path):
|
|
241
|
+
potential_metadata_path = os.path.join(dir_path, 'metadata.json')
|
|
242
|
+
if os.path.exists(potential_metadata_path):
|
|
243
|
+
metadata = load_metadata(potential_metadata_path)
|
|
244
|
+
if metadata and metadata.repository_name == repository_name:
|
|
245
|
+
index_path = dir_path
|
|
246
|
+
metadata_path = potential_metadata_path
|
|
247
|
+
found = True
|
|
248
|
+
break
|
|
249
|
+
|
|
250
|
+
if not found:
|
|
251
|
+
return {
|
|
252
|
+
'status': 'error',
|
|
253
|
+
'message': f"Repository '{repository_name}' not found in index directory",
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
# Check if the metadata file exists
|
|
257
|
+
if not os.path.exists(metadata_path):
|
|
258
|
+
return {
|
|
259
|
+
'status': 'error',
|
|
260
|
+
'message': f'Metadata file {metadata_path} not found',
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
# Load the metadata to get repository information
|
|
264
|
+
metadata = load_metadata(metadata_path)
|
|
265
|
+
if metadata is None:
|
|
266
|
+
return {
|
|
267
|
+
'status': 'error',
|
|
268
|
+
'message': f'Failed to load metadata from {metadata_path}',
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
repository_name = metadata.repository_name
|
|
272
|
+
|
|
273
|
+
# Check permissions before attempting to delete
|
|
274
|
+
files_to_check = [metadata_path]
|
|
275
|
+
if os.path.exists(index_path):
|
|
276
|
+
files_to_check.append(index_path)
|
|
277
|
+
|
|
278
|
+
index_dir_path = os.path.splitext(index_path)[0]
|
|
279
|
+
if os.path.isdir(index_dir_path):
|
|
280
|
+
files_to_check.append(index_dir_path)
|
|
281
|
+
|
|
282
|
+
permission_issues = []
|
|
283
|
+
for file_path in files_to_check:
|
|
284
|
+
if not os.access(file_path, os.W_OK):
|
|
285
|
+
permission_issues.append(file_path)
|
|
286
|
+
|
|
287
|
+
if permission_issues:
|
|
288
|
+
permission_msg = 'Permission denied for the following files:\n'
|
|
289
|
+
for path in permission_issues:
|
|
290
|
+
permission_msg += f' - {path}\n'
|
|
291
|
+
permission_msg += '\nTo delete these files, you may need to run the command with sudo or adjust file permissions.'
|
|
292
|
+
|
|
293
|
+
return {
|
|
294
|
+
'status': 'error',
|
|
295
|
+
'message': permission_msg,
|
|
296
|
+
'repository_name': repository_name,
|
|
297
|
+
'permission_issues': permission_issues,
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
# Delete the files
|
|
301
|
+
deleted_files = []
|
|
302
|
+
errors = []
|
|
303
|
+
|
|
304
|
+
# Check if the index path is a file or directory
|
|
305
|
+
is_file = os.path.isfile(index_path)
|
|
306
|
+
is_dir = os.path.isdir(index_path)
|
|
307
|
+
|
|
308
|
+
# Check for repository directory
|
|
309
|
+
repo_files_path = os.path.join(index_path, 'repository')
|
|
310
|
+
if os.path.isdir(repo_files_path):
|
|
311
|
+
files_to_check.append(repo_files_path)
|
|
312
|
+
|
|
313
|
+
# Try to delete the metadata file first
|
|
314
|
+
try:
|
|
315
|
+
os.remove(metadata_path)
|
|
316
|
+
deleted_files.append(metadata_path)
|
|
317
|
+
logger.info(f'Deleted metadata file: {metadata_path}')
|
|
318
|
+
except Exception as e:
|
|
319
|
+
errors.append(f'Failed to delete metadata file {metadata_path}: {str(e)}')
|
|
320
|
+
logger.error(f'Error deleting metadata file {metadata_path}: {e}')
|
|
321
|
+
|
|
322
|
+
# Try to delete the repository directory if it exists
|
|
323
|
+
if os.path.isdir(repo_files_path):
|
|
324
|
+
try:
|
|
325
|
+
shutil.rmtree(repo_files_path)
|
|
326
|
+
deleted_files.append(repo_files_path)
|
|
327
|
+
logger.info(f'Deleted repository directory: {repo_files_path}')
|
|
328
|
+
except Exception as e:
|
|
329
|
+
errors.append(f'Failed to delete repository directory {repo_files_path}: {str(e)}')
|
|
330
|
+
logger.error(f'Error deleting repository directory {repo_files_path}: {e}')
|
|
331
|
+
|
|
332
|
+
# If the index path is a file, try to delete it
|
|
333
|
+
if is_file:
|
|
334
|
+
try:
|
|
335
|
+
os.remove(index_path)
|
|
336
|
+
deleted_files.append(index_path)
|
|
337
|
+
logger.info(f'Deleted index file: {index_path}')
|
|
338
|
+
except Exception as e:
|
|
339
|
+
# If we can't delete the file, log the error but don't consider it a failure
|
|
340
|
+
# since the index directory might contain the actual data
|
|
341
|
+
logger.warning(f'Could not delete index file {index_path}: {e}')
|
|
342
|
+
|
|
343
|
+
# Try to delete the directory if it exists
|
|
344
|
+
index_dir_path = os.path.splitext(index_path)[0]
|
|
345
|
+
if os.path.isdir(index_dir_path):
|
|
346
|
+
try:
|
|
347
|
+
shutil.rmtree(index_dir_path)
|
|
348
|
+
deleted_files.append(index_dir_path)
|
|
349
|
+
logger.info(f'Deleted index directory: {index_dir_path}')
|
|
350
|
+
except Exception as e:
|
|
351
|
+
errors.append(f'Failed to delete index directory {index_dir_path}: {str(e)}')
|
|
352
|
+
logger.error(f'Error deleting index directory {index_dir_path}: {e}')
|
|
353
|
+
|
|
354
|
+
# If the index path itself is a directory, try to delete it
|
|
355
|
+
if is_dir and index_path != index_dir_path:
|
|
356
|
+
try:
|
|
357
|
+
shutil.rmtree(index_path)
|
|
358
|
+
deleted_files.append(index_path)
|
|
359
|
+
logger.info(f'Deleted index directory: {index_path}')
|
|
360
|
+
except Exception as e:
|
|
361
|
+
# If we already deleted the directory with the same name, this is expected
|
|
362
|
+
if index_path in deleted_files:
|
|
363
|
+
logger.info(f'Index directory {index_path} was already deleted')
|
|
364
|
+
else:
|
|
365
|
+
errors.append(f'Failed to delete index directory {index_path}: {str(e)}')
|
|
366
|
+
logger.error(f'Error deleting index directory {index_path}: {e}')
|
|
367
|
+
|
|
368
|
+
# Return appropriate response based on results
|
|
369
|
+
if not errors:
|
|
370
|
+
return {
|
|
371
|
+
'status': 'success',
|
|
372
|
+
'message': f"Successfully deleted repository '{repository_name}'",
|
|
373
|
+
'repository_name': repository_name,
|
|
374
|
+
'deleted_files': deleted_files,
|
|
375
|
+
}
|
|
376
|
+
elif deleted_files:
|
|
377
|
+
# Partial success
|
|
378
|
+
return {
|
|
379
|
+
'status': 'partial',
|
|
380
|
+
'message': f"Partially deleted repository '{repository_name}'. Some files could not be deleted.",
|
|
381
|
+
'repository_name': repository_name,
|
|
382
|
+
'deleted_files': deleted_files,
|
|
383
|
+
'errors': errors,
|
|
384
|
+
}
|
|
385
|
+
else:
|
|
386
|
+
# Complete failure
|
|
387
|
+
error_msg = f"Failed to delete repository '{repository_name}':\n"
|
|
388
|
+
for err in errors:
|
|
389
|
+
error_msg += f' - {err}\n'
|
|
390
|
+
|
|
391
|
+
return {
|
|
392
|
+
'status': 'error',
|
|
393
|
+
'message': error_msg,
|
|
394
|
+
'repository_name': repository_name,
|
|
395
|
+
'errors': errors,
|
|
396
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: awslabs.git-repo-research-mcp-server
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: An AWS Labs Model Context Protocol (MCP) server for researching git repositories
|
|
5
|
+
Project-URL: Homepage, https://awslabs.github.io/mcp/
|
|
6
|
+
Project-URL: Documentation, https://awslabs.github.io/mcp/servers/git-repo-research-mcp-server/
|
|
7
|
+
Project-URL: Source, https://github.com/awslabs/mcp.git
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/awslabs/mcp/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/awslabs/mcp/blob/main/src/git-repo-research-mcp-server/CHANGELOG.md
|
|
10
|
+
Author: Amazon Web Services
|
|
11
|
+
Author-email: AWSLabs MCP <203918161+awslabs-mcp@users.noreply.github.com>
|
|
12
|
+
License: Apache-2.0
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
License-File: NOTICE
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Requires-Dist: backoff>=2.2.1
|
|
25
|
+
Requires-Dist: boto3>=1.37.26
|
|
26
|
+
Requires-Dist: faiss-cpu>=1.10.0
|
|
27
|
+
Requires-Dist: gitpython>=3.1.44
|
|
28
|
+
Requires-Dist: h11>=0.16.0
|
|
29
|
+
Requires-Dist: langchain-aws>=0.2.18
|
|
30
|
+
Requires-Dist: langchain-community>=0.3.20
|
|
31
|
+
Requires-Dist: langchain>=0.3.22
|
|
32
|
+
Requires-Dist: loguru>=0.7.3
|
|
33
|
+
Requires-Dist: mcp[cli]>=1.6.0
|
|
34
|
+
Requires-Dist: pydantic>=2.10.6
|
|
35
|
+
Requires-Dist: requests>=2.32.0
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# Git Repo Research MCP Server
|
|
39
|
+
|
|
40
|
+
Model Context Protocol (MCP) server for researching Git repositories using semantic search
|
|
41
|
+
|
|
42
|
+
This MCP server enables developers to research external Git repositories and influence their code generation without having to clone repositories to local projects. It provides tools to index, search, and explore Git repositories using semantic search powered by Amazon Bedrock and FAISS.
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
- **Repository Indexing**: Create searchable FAISS indexes from local or remote Git repositories
|
|
47
|
+
- **Semantic Search**: Query repository content using natural language and retrieve relevant code snippets
|
|
48
|
+
- **Repository Summary**: Get directory structures and identify key files like READMEs
|
|
49
|
+
- **GitHub Repository Search**: Find repositories in AWS-related organizations filtered by licenses and keywords
|
|
50
|
+
- **File Access**: Access repository files and directories with support for both text and binary content
|
|
51
|
+
|
|
52
|
+
## Prerequisites
|
|
53
|
+
|
|
54
|
+
### Installation Requirements
|
|
55
|
+
|
|
56
|
+
1. Install `uv` from [Astral](https://docs.astral.sh/uv/getting-started/installation/) or the [GitHub README](https://github.com/astral-sh/uv#installation)
|
|
57
|
+
2. Install Python 3.12 or newer using `uv python install 3.12`
|
|
58
|
+
3. - [uv](https://github.com/astral-sh/uv) - Fast Python package installer and resolver
|
|
59
|
+
4. AWS credentials configured with Bedrock access
|
|
60
|
+
5. Node.js (for UVX installation support)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
### AWS Requirements
|
|
64
|
+
|
|
65
|
+
1. **AWS CLI Configuration**: You must have the AWS CLI configured with credentials that have access to Amazon Bedrock
|
|
66
|
+
2. **Amazon Bedrock Access**: Ensure your AWS account has access to embedding models like Titan Embeddings
|
|
67
|
+
3. **Environment Variables**: The server uses `AWS_REGION` and `AWS_PROFILE` environment variables
|
|
68
|
+
|
|
69
|
+
### Optional Requirements
|
|
70
|
+
|
|
71
|
+
1. **GitHub Token**: Set `GITHUB_TOKEN` environment variable for higher rate limits when searching GitHub repositories
|
|
72
|
+
|
|
73
|
+
## Installation
|
|
74
|
+
|
|
75
|
+
To add this MCP server to your Amazon Q or Claude, add the following to your MCP config file:
|
|
76
|
+
|
|
77
|
+
```json
|
|
78
|
+
{
|
|
79
|
+
"mcpServers": {
|
|
80
|
+
"awslabs.git-repo-research-mcp-server": {
|
|
81
|
+
"command": "uvx",
|
|
82
|
+
"args": ["awslabs.git-repo-research-mcp-server@latest"],
|
|
83
|
+
"env": {
|
|
84
|
+
"AWS_PROFILE": "your-profile-name",
|
|
85
|
+
"AWS_REGION": "us-west-2",
|
|
86
|
+
"FASTMCP_LOG_LEVEL": "ERROR",
|
|
87
|
+
"GITHUB_TOKEN": "your-github-token"
|
|
88
|
+
},
|
|
89
|
+
"disabled": false,
|
|
90
|
+
"autoApprove": []
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Tools
|
|
97
|
+
|
|
98
|
+
### create_research_repository
|
|
99
|
+
|
|
100
|
+
Indexes a Git repository (local or remote) using FAISS and Amazon Bedrock embeddings.
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
create_research_repository(
|
|
104
|
+
repository_path: str,
|
|
105
|
+
output_path: Optional[str] = None,
|
|
106
|
+
embedding_model: str = "amazon.titan-embed-text-v2:0",
|
|
107
|
+
include_patterns: Optional[List[str]] = None,
|
|
108
|
+
exclude_patterns: Optional[List[str]] = None,
|
|
109
|
+
chunk_size: int = 1000,
|
|
110
|
+
chunk_overlap: int = 200
|
|
111
|
+
) -> Dict
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### search_research_repository
|
|
115
|
+
|
|
116
|
+
Performs semantic search within an indexed repository.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
search_research_repository(
|
|
120
|
+
index_path: str,
|
|
121
|
+
query: str,
|
|
122
|
+
limit: int = 10,
|
|
123
|
+
threshold: float = 0.0
|
|
124
|
+
) -> Dict
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### search_research_repository_suggestions
|
|
128
|
+
|
|
129
|
+
Searches for GitHub repositories based on keywords, scoped to AWS organizations.
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
search_research_repository_suggestions(
|
|
133
|
+
keywords: List[str],
|
|
134
|
+
num_results: int = 5
|
|
135
|
+
) -> Dict
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### access_file
|
|
139
|
+
|
|
140
|
+
Accesses file or directory contents within repositories or on the filesystem.
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
access_file(
|
|
144
|
+
filepath: str
|
|
145
|
+
) -> Dict | ImageContent
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### delete_research_repository
|
|
149
|
+
|
|
150
|
+
Deletes an indexed repository.
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
delete_research_repository(
|
|
154
|
+
repository_name_or_path: str,
|
|
155
|
+
index_directory: Optional[str] = None
|
|
156
|
+
) -> Dict
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Resources
|
|
160
|
+
|
|
161
|
+
### repositories://{repository_name}/summary
|
|
162
|
+
|
|
163
|
+
Get a summary of an indexed repository including structure and helpful files.
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
repositories://awslabs_mcp/summary
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### repositories://
|
|
170
|
+
|
|
171
|
+
List all indexed repositories with detailed information.
|
|
172
|
+
|
|
173
|
+
```
|
|
174
|
+
repositories://
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### repositories://{index_directory}
|
|
178
|
+
|
|
179
|
+
List all indexed repositories from a specific index directory.
|
|
180
|
+
|
|
181
|
+
```
|
|
182
|
+
repositories:///path/to/custom/index/directory
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## Considerations
|
|
186
|
+
|
|
187
|
+
- Repository indexing requires Amazon Bedrock access and sufficient permissions
|
|
188
|
+
- Large repositories may take significant time to index
|
|
189
|
+
- Binary files (except images) are not supported for content viewing
|
|
190
|
+
- GitHub repository search is by default limited to AWS organizations: aws-samples, aws-solutions-library-samples, and awslabs (but can be configured to include other organizations)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
awslabs/__init__.py,sha256=BIBSY3C1KeFtC_77foZxP2Jju9462uxgEQUPoJTve7w,673
|
|
2
|
+
awslabs/git_repo_research_mcp_server/__init__.py,sha256=GNVcSFE3kxGv2FOKoy3H0Yh922lVN6vycp6SfgZl4w4,624
|
|
3
|
+
awslabs/git_repo_research_mcp_server/defaults.py,sha256=DRk79HueQN-HW3xFmrSV2q4jdjTtSjmTSjucmhSfAJU,7621
|
|
4
|
+
awslabs/git_repo_research_mcp_server/embeddings.py,sha256=qOW3ulokriuDdl_OilcJ4F38zkWdZy80N-GN2AgwHOc,2452
|
|
5
|
+
awslabs/git_repo_research_mcp_server/github_search.py,sha256=gLCTq3EV2ghS9FMbngoP5BnDwHcWiKTahOZbCPdglTs,16136
|
|
6
|
+
awslabs/git_repo_research_mcp_server/indexer.py,sha256=AMSY3FLnLjRtNXdTELqI43yhcjXU8iBkuZMZcWOd3m4,30317
|
|
7
|
+
awslabs/git_repo_research_mcp_server/models.py,sha256=fb55x92k5xqseFHLGo2bJta5YioRuNPxicSn1Vm_yhE,12544
|
|
8
|
+
awslabs/git_repo_research_mcp_server/repository.py,sha256=6Tw2k2V3XxLE7Hqjc_o6LBiaYEbjLiBtn4HpTJ_tuAE,10605
|
|
9
|
+
awslabs/git_repo_research_mcp_server/search.py,sha256=b2NBJGk3YLU9zVEhXlrpHdfGTUQdRNhSGIvU0TR9muI,13747
|
|
10
|
+
awslabs/git_repo_research_mcp_server/server.py,sha256=2WtoDMlQHJ1DWr4J_9jORbU_nwhhTH5ye7DUKDrRETA,34668
|
|
11
|
+
awslabs/git_repo_research_mcp_server/utils.py,sha256=SRFPvxRNPR3K41F1fQDRHdytF-86JczvXINTxjjGguI,15019
|
|
12
|
+
awslabs_git_repo_research_mcp_server-0.0.1.dist-info/METADATA,sha256=DTzIyqtMyMEv4PGt_06dQ-EHGMOqY9msKD0hbVVhbl0,6124
|
|
13
|
+
awslabs_git_repo_research_mcp_server-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
14
|
+
awslabs_git_repo_research_mcp_server-0.0.1.dist-info/entry_points.txt,sha256=jX-BkQM7l_OK8D_iq_B4ukRuChbKjA4P4Q0IMikM4Ys,106
|
|
15
|
+
awslabs_git_repo_research_mcp_server-0.0.1.dist-info/licenses/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
|
|
16
|
+
awslabs_git_repo_research_mcp_server-0.0.1.dist-info/licenses/NOTICE,sha256=fnpchtUnC9htWDzn7WbOrBYePd9UUEYXytHe98AB34w,104
|
|
17
|
+
awslabs_git_repo_research_mcp_server-0.0.1.dist-info/RECORD,,
|