awslabs.git-repo-research-mcp-server 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,914 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
4
+ # with the License. A copy of the License is located at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
9
+ # OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
10
+ # and limitations under the License.
11
+ """awslabs git-repo-research MCP Server implementation."""
12
+
13
+ import argparse
14
+ import json
15
+ import mimetypes
16
+ import os
17
+ import sys
18
+ from awslabs.git_repo_research_mcp_server.defaults import Constants
19
+ from awslabs.git_repo_research_mcp_server.github_search import (
20
+ github_repo_search_wrapper,
21
+ )
22
+ from awslabs.git_repo_research_mcp_server.indexer import (
23
+ IndexConfig,
24
+ RepositoryConfig,
25
+ get_repository_indexer,
26
+ )
27
+ from awslabs.git_repo_research_mcp_server.models import (
28
+ DeleteRepositoryResponse,
29
+ EmbeddingModel,
30
+ GitHubRepoSearchResponse,
31
+ GitHubRepoSearchResult,
32
+ )
33
+ from awslabs.git_repo_research_mcp_server.search import get_repository_searcher
34
+ from awslabs.git_repo_research_mcp_server.utils import (
35
+ DateTimeEncoder,
36
+ delete_indexed_repository,
37
+ list_indexed_repositories,
38
+ )
39
+ from datetime import datetime
40
+ from loguru import logger
41
+ from mcp.server.fastmcp import Context, FastMCP, Image
42
+ from mcp.types import ImageContent
43
+ from pydantic import Field
44
+ from typing import Dict, List, Optional, Union
45
+
46
+
47
+ # Configure logging
48
+ logger.remove()
49
+ logger.add(sys.stderr, level=os.getenv('FASTMCP_LOG_LEVEL', 'INFO'))
50
+
51
+ # Create the MCP server
52
+ mcp = FastMCP(
53
+ 'Git Repository Research MCP Server',
54
+ instructions="""
55
+ # Git Repository Research MCP Server
56
+
57
+ This MCP server provides tools and resources for indexing and searching Git repositories using semantic search.
58
+
59
+ ## Important Note on Repository Names
60
+
61
+ When working with repository names that include organization (e.g., "awslabs/mcp"), you MUST use underscores instead of slashes in URIs (e.g., "awslabs_mcp") for compatibility. This affects:
62
+ - How repositories are stored in the index directory
63
+ - How repositories are referenced in metadata.json
64
+ - How repositories should be referenced in URIs and search queries
65
+
66
+ IMPORTANT: Always use underscores in URIs (e.g., `repositories://awslabs_mcp/summary`), NOT slashes.
67
+
68
+ ## Available Tools
69
+
70
+ ### create_research_repository
71
+ Build a FAISS index for a Git repository.
72
+
73
+ ### search_research_repository
74
+ Perform semantic search within an indexed repository.
75
+
76
+ ### delete_research_repository
77
+ Delete an indexed repository.
78
+
79
+ ### search_research_repository_suggestions
80
+ Search for GitHub repositories based on keywords, scoped to specific organizations.
81
+
82
+ ### access_file
83
+ Access file or directory contents. This tool is recommended for accessing files with complex paths, especially those containing slashes in repository names (e.g., "awslabs/mcp/repository/README.md").
84
+
85
+ ## Available Resources
86
+
87
+ ### repositories://{repository_name}/summary
88
+ Get a summary of an indexed repository including directory structure and helpful files (READMEs, etc.). This is particularly useful for understanding the structure of the repository and quickly finding important documentation. The repository_name can be a simple name or in the format "org_repo".
89
+
90
+ ### repositories://
91
+ List all indexed repositories with detailed information including file counts, chunk counts, file types, etc.
92
+
93
+ ### repositories://{index_directory}
94
+ List all indexed repositories from a specific index directory.
95
+
96
+ ## Usage Examples
97
+
98
+ ### Summarizing or describing purpose/objective/goals of the specific repository (e.g. 'What does this repo do?' or 'What are the main features?').
99
+ ```
100
+ # Access the repository summary resource
101
+ repositories://awslabs_mcp/summary
102
+
103
+ # Or for a simple repository name
104
+ repositories://my-repo-name/summary
105
+ ```
106
+
107
+ Then after identifying the main files of interest (e.g. README.md, diagrams, etc.), you can further investigate using other tools.
108
+
109
+ ### Indexing a Repository
110
+ ```
111
+ create_research_repository(repository_path="https://github.com/username/repo.git")
112
+ ```
113
+
114
+ ### Describing the Structure of a Repository (Directory Tree Format)
115
+ ```
116
+ # Access the repository summary resource (with organization name)
117
+ repositories://awslabs_mcp/summary
118
+
119
+ # Or without organization name
120
+ repositories://my-repo-name/summary
121
+ ```
122
+
123
+ ### Searching a Repository
124
+ ```
125
+ search_research_repository(index_path="repo_name", query="How does the authentication system work?")
126
+ ```
127
+
128
+ ### Listing Indexed Repositories
129
+ ```
130
+ # Default listing
131
+ repositories://
132
+
133
+ # Listing from a specific directory
134
+ repositories:///path/to/custom/index/directory
135
+ ```
136
+
137
+ ### Accessing Files
138
+ ```
139
+ # Using the tool
140
+ access_file(filepath="awslabs/mcp/repository/README.md")
141
+ access_file(filepath="/Users/username/.git_repo_research/repo_name/repository/src/file.py")
142
+ ```
143
+
144
+ ### Deleting a Repository
145
+ ```
146
+ delete_research_repository(repository_name_or_path="repo_name")
147
+ ```
148
+
149
+ ### Searching for GitHub Repositories
150
+ ```
151
+ search_research_repository_suggestions(
152
+ keywords=["serverless", "lambda"],
153
+ num_results=10
154
+ )
155
+ ```
156
+ Results are automatically filtered to AWS organizations (aws-samples, aws-solutions-library-samples, awslabs) and specific licenses (Apache License 2.0, MIT, MIT No Attribution), and sorted by stars (descending) and then by updated date.
157
+ """,
158
+ dependencies=[
159
+ 'boto3',
160
+ 'faiss-cpu',
161
+ 'gitpython',
162
+ 'loguru',
163
+ 'numpy',
164
+ 'pydantic',
165
+ ],
166
+ )
167
+
168
+
169
+ @mcp.tool(name='create_research_repository')
170
+ async def mcp_index_repository(
171
+ ctx: Context,
172
+ repository_path: str = Field(
173
+ description='Path to local repository or URL to remote repository'
174
+ ),
175
+ output_path: Optional[str] = Field(
176
+ default=None,
177
+ description='Where to store the index (optional, uses default if not provided)',
178
+ ),
179
+ embedding_model: str = Field(
180
+ default=EmbeddingModel.AMAZON_TITAN_EMBED_TEXT_V2,
181
+ description='Which AWS embedding model to use',
182
+ ),
183
+ include_patterns: Optional[List[str]] = Field(
184
+ default=Constants.DEFAULT_INCLUDE_PATTERNS,
185
+ description='Glob patterns for files to include (optional). Defaults to common source code and documentation files.',
186
+ ),
187
+ exclude_patterns: Optional[List[str]] = Field(
188
+ default=Constants.DEFAULT_EXCLUDE_PATTERNS,
189
+ description='Glob patterns for files to exclude (optional). Defaults to common binary files, build artifacts, and VCS directories.',
190
+ ),
191
+ chunk_size: int = Field(
192
+ default=1000,
193
+ description='Maximum size of each chunk in characters',
194
+ ),
195
+ chunk_overlap: int = Field(
196
+ default=200,
197
+ description='Overlap between chunks in characters',
198
+ ),
199
+ ) -> Dict:
200
+ """Build a FAISS index for a Git repository.
201
+
202
+ This tool indexes a Git repository (local or remote) using FAISS and Amazon Bedrock embeddings.
203
+ The index can then be used for semantic search within the repository.
204
+
205
+ Args:
206
+ ctx: MCP context object used for progress tracking and error reporting
207
+ repository_path: Path to local repository or URL to remote repository
208
+ output_path: Where to store the index (optional, uses default if not provided)
209
+ embedding_model: Which AWS embedding model to use
210
+ include_patterns: Glob patterns for files to include (optional)
211
+ exclude_patterns: Glob patterns for files to exclude (optional)
212
+ chunk_size: Maximum size of each chunk in characters
213
+ chunk_overlap: Overlap between chunks in characters
214
+
215
+ Returns:
216
+ Information about the created index
217
+ """
218
+ logger.info(f'Indexing repository: {repository_path}')
219
+
220
+ # If output_path is provided and contains slashes, normalize it for file path compatibility
221
+ if output_path and '/' in output_path:
222
+ output_path = output_path.replace('/', '_')
223
+ logger.info(f'Normalized output path: {output_path}')
224
+
225
+ try:
226
+ # Get AWS credentials from environment variables
227
+ aws_region = os.environ.get('AWS_REGION')
228
+ aws_profile = os.environ.get('AWS_PROFILE')
229
+
230
+ index_config = IndexConfig(
231
+ embedding_model=embedding_model, aws_region=aws_region, aws_profile=aws_profile
232
+ )
233
+
234
+ repository_config = RepositoryConfig(
235
+ repository_path=repository_path,
236
+ output_path=output_path,
237
+ include_patterns=include_patterns,
238
+ exclude_patterns=exclude_patterns,
239
+ chunk_size=chunk_size,
240
+ chunk_overlap=chunk_overlap,
241
+ )
242
+
243
+ # Get the repository indexer
244
+ indexer = get_repository_indexer(config=index_config)
245
+
246
+ # Index the repository
247
+ response = await indexer.index_repository(
248
+ config=repository_config,
249
+ ctx=ctx, # Pass the context for progress tracking
250
+ )
251
+
252
+ # Add repository directory information to the response
253
+ if response.status == 'success':
254
+ repo_files_path = os.path.join(response.index_path, 'repository')
255
+ if os.path.exists(repo_files_path) and os.path.isdir(repo_files_path):
256
+ response.repository_directory = repo_files_path
257
+
258
+ # Return the response
259
+ return response.model_dump()
260
+ except Exception as e:
261
+ logger.error(f'Error indexing repository: {e}')
262
+ await ctx.error(f'Error indexing repository: {str(e)}')
263
+ raise
264
+
265
+
266
+ @mcp.resource(
267
+ uri='repositories://{repository_name}/summary',
268
+ name='Repository Summary',
269
+ mime_type='application/json',
270
+ )
271
+ async def repository_summary(repository_name: str) -> str:
272
+ """Get a summary of an indexed repository including structure and helpful files.
273
+
274
+ This resource provides a summary of the repository including:
275
+ - Directory tree structure of all files
276
+ - List of helpful files (READMEs, documentation, etc.)
277
+
278
+ Args:
279
+ repository_name: Name of the repository
280
+
281
+ Returns:
282
+ Repository summary if repository is found, error message otherwise
283
+ """
284
+ # Use repository_name as is for the response
285
+ full_repository_name = repository_name
286
+ logger.info(f'Listing files for repository: {full_repository_name}')
287
+
288
+ # Convert repository name with slashes to underscores for file path compatibility
289
+ normalized_repo_name = full_repository_name.replace('/', '_')
290
+ logger.info(f'Normalized repository name: {normalized_repo_name}')
291
+
292
+ try:
293
+ # Get AWS credentials from environment variables
294
+ aws_region = os.environ.get('AWS_REGION')
295
+ aws_profile = os.environ.get('AWS_PROFILE')
296
+
297
+ # Get the repository searcher
298
+ searcher = get_repository_searcher(
299
+ aws_region=aws_region,
300
+ aws_profile=aws_profile,
301
+ )
302
+
303
+ # List the repository files
304
+ tree = searcher.list_repository_files(
305
+ repository_name=normalized_repo_name,
306
+ )
307
+
308
+ if tree is None:
309
+ return json.dumps(
310
+ {
311
+ 'status': 'error',
312
+ 'message': f'Repository not found or no files available: {repository_name}',
313
+ }
314
+ )
315
+
316
+ # Get the repository directory path
317
+ index_path = searcher.repository_indexer._get_index_path(normalized_repo_name)
318
+ repo_files_path = os.path.join(index_path, 'repository')
319
+
320
+ # Find helpful files (READMEs, etc.)
321
+ helpful_files = []
322
+ if tree and isinstance(tree, dict):
323
+ # Extract all README files from the tree
324
+ def extract_readme_paths(tree_dict, current_path=''):
325
+ readme_paths = []
326
+ for name, content in tree_dict.items():
327
+ path = f'{current_path}/{name}' if current_path else name
328
+ if isinstance(content, dict):
329
+ # It's a directory
330
+ readme_paths.extend(extract_readme_paths(content, path))
331
+ elif name.lower().startswith('readme'):
332
+ # It's a README file
333
+ # Format the path for use with access_file tool
334
+ file_path = f'{repository_name}/{path}'
335
+ readme_paths.append(file_path)
336
+ return readme_paths
337
+
338
+ helpful_files = extract_readme_paths(tree)
339
+ elif tree and isinstance(tree, str):
340
+ # If tree is a string, try to parse it as a directory structure
341
+ logger.info('Tree is a string, attempting to parse directory structure')
342
+
343
+ # Extract README files with their full paths from the string representation of the tree
344
+ import re
345
+
346
+ # Parse the tree structure to extract full paths
347
+ lines = tree.split('\n')
348
+ current_path = []
349
+ readme_files = []
350
+
351
+ # Process each line to build the directory structure
352
+ for line in lines:
353
+ # Skip empty lines
354
+ if not line.strip():
355
+ continue
356
+
357
+ # Calculate the indentation level
358
+ indent = 0
359
+ for char in line:
360
+ if char in ' │':
361
+ indent += 1
362
+ else:
363
+ break
364
+
365
+ # Adjust the current path based on indentation
366
+ current_path = current_path[: indent // 4 + 1]
367
+
368
+ # Extract the file or directory name
369
+ match = re.search(r'[─└├]─+\s+(.+)$', line)
370
+ if match:
371
+ name = match.group(1)
372
+
373
+ # If it's a directory, add it to the current path
374
+ if name.endswith('/'):
375
+ name = name.rstrip('/')
376
+ if len(current_path) <= indent // 4:
377
+ current_path.append(name)
378
+ else:
379
+ current_path[indent // 4] = name
380
+ # If it's a README file, add its full path to the list
381
+ elif re.match(r'README.*', name, re.IGNORECASE):
382
+ path = '/'.join(current_path + [name]) if current_path else name
383
+ # Format the path for use with access_file tool
384
+ file_path = f'{repository_name}/{path}'
385
+ readme_files.append(file_path)
386
+
387
+ # Add all found README files to helpful_files
388
+ if readme_files:
389
+ helpful_files = readme_files
390
+ logger.info(
391
+ f'Found {len(helpful_files)} README files with full paths in string tree'
392
+ )
393
+ else:
394
+ logger.warning('No README files found in string tree')
395
+
396
+ return json.dumps(
397
+ {
398
+ 'status': 'success',
399
+ 'tree': tree,
400
+ 'repository_name': repository_name,
401
+ 'repository_directory': (
402
+ repo_files_path
403
+ if os.path.exists(repo_files_path) and os.path.isdir(repo_files_path)
404
+ else None
405
+ ),
406
+ 'helpful_files': helpful_files,
407
+ },
408
+ cls=DateTimeEncoder,
409
+ )
410
+ except Exception as e:
411
+ logger.error(f'Error listing repository files: {e}')
412
+ return json.dumps(
413
+ {'status': 'error', 'message': f'Error listing repository files: {str(e)}'},
414
+ cls=DateTimeEncoder,
415
+ )
416
+
417
+
418
+ @mcp.resource(uri='repositories://', name='Indexed Repositories', mime_type='application/json')
419
+ async def list_repositories() -> str:
420
+ """List all indexed repositories with detailed information.
421
+
422
+ This resource returns a list of all repositories that have been indexed and are available for searching.
423
+ It provides detailed information about each index including file counts, chunk counts, file types, etc.
424
+
425
+ Returns:
426
+ List of indexed repositories with detailed information
427
+ """
428
+ logger.info('Listing indexed repositories')
429
+
430
+ try:
431
+ # List indexed repositories with detailed information by default
432
+ response = list_indexed_repositories(
433
+ index_dir=None,
434
+ detailed=True, # Return detailed information by default
435
+ )
436
+
437
+ # Add repository directory information to each repository
438
+ for repo in response.repositories:
439
+ repo_files_path = os.path.join(repo.index_path, 'repository')
440
+ if os.path.exists(repo_files_path) and os.path.isdir(repo_files_path):
441
+ repo.repository_directory = repo_files_path
442
+
443
+ # Return the response with custom encoder for datetime objects
444
+ return json.dumps(response.model_dump(), cls=DateTimeEncoder)
445
+ except Exception as e:
446
+ logger.error(f'Error listing indexed repositories: {e}')
447
+ return json.dumps(
448
+ {
449
+ 'status': 'error',
450
+ 'message': f'Error listing indexed repositories: {str(e)}',
451
+ }
452
+ )
453
+
454
+
455
+ async def access_file_or_directory(filepath: str) -> Union[str, List[str], Image]:
456
+ """Access file or directory contents.
457
+
458
+ This resource provides access to file or directory contents:
459
+ - If the filepath references a text file, returns the content as a string
460
+ - If the filepath references a directory, returns an array of files in the directory
461
+ - If the filepath references a binary image (jpg, png), returns the image data
462
+
463
+ For repository files, use the format: repository_name/repository/path/to/file
464
+ Example: awslabs_mcp/repository/README.md
465
+
466
+ For repositories with organization names, both formats are supported:
467
+ - awslabs_mcp/repository/README.md (with underscore)
468
+ - awslabs/mcp/repository/README.md (with slash)
469
+
470
+ Args:
471
+ filepath: Path to the file or directory to access
472
+
473
+ Returns:
474
+ File content, directory listing, or image data
475
+ """
476
+ logger.info(f'Accessing file or directory: {filepath}')
477
+
478
+ try:
479
+ # Check if this is a repository file path (format: repo_name/repository/...)
480
+ parts = filepath.split('/')
481
+
482
+ # Handle the case where the first part might contain a slash (e.g., "awslabs/mcp")
483
+ if '/' in parts[0]:
484
+ # Normalize the repository name by replacing slashes with underscores
485
+ normalized_repo_name = parts[0].replace('/', '_')
486
+ # Reconstruct the path with the normalized repository name
487
+ parts[0] = normalized_repo_name
488
+ filepath = '/'.join(parts)
489
+ logger.info(f'Normalized filepath: {filepath}')
490
+
491
+ # Re-split the filepath with the normalized repository name
492
+ parts = filepath.split('/')
493
+
494
+ if len(parts) >= 2 and parts[1] == 'repository':
495
+ repo_name = parts[0]
496
+ # Get the repository directory path
497
+ try:
498
+ # Get AWS credentials from environment variables
499
+ aws_region = os.environ.get('AWS_REGION')
500
+ aws_profile = os.environ.get('AWS_PROFILE')
501
+
502
+ # Get the repository searcher
503
+ searcher = get_repository_searcher(
504
+ aws_region=aws_region,
505
+ aws_profile=aws_profile,
506
+ )
507
+
508
+ # Get the repository directory path
509
+ index_path = searcher.repository_indexer._get_index_path(repo_name)
510
+ repo_path = os.path.join(index_path, 'repository')
511
+
512
+ # Construct the full path to the file
513
+ if len(parts) > 2:
514
+ file_path = os.path.join(repo_path, *parts[2:])
515
+ else:
516
+ file_path = repo_path
517
+
518
+ logger.info(f'Accessing repository file: {file_path}')
519
+ filepath = file_path
520
+ except Exception as e:
521
+ logger.error(f'Error resolving repository path: {e}')
522
+ return json.dumps(
523
+ {
524
+ 'status': 'error',
525
+ 'message': f'Error resolving repository path: {str(e)}',
526
+ }
527
+ )
528
+
529
+ # Check if the path exists
530
+ if not os.path.exists(filepath):
531
+ return json.dumps(
532
+ {
533
+ 'status': 'error',
534
+ 'message': f'File or directory not found: {filepath}',
535
+ }
536
+ )
537
+
538
+ # If it's a directory, return a listing of files
539
+ if os.path.isdir(filepath):
540
+ files = os.listdir(filepath)
541
+ return json.dumps(
542
+ {
543
+ 'status': 'success',
544
+ 'type': 'directory',
545
+ 'path': filepath,
546
+ 'files': files,
547
+ }
548
+ )
549
+
550
+ # If it's a file, determine the mime type
551
+ mime_type, _ = mimetypes.guess_type(filepath)
552
+
553
+ # If it's an image, return the image data
554
+ if mime_type and mime_type.startswith('image/'):
555
+ try:
556
+ # Read file directly as binary data
557
+ with open(filepath, 'rb') as f:
558
+ image_data = f.read()
559
+
560
+ # Extract format from mime_type (e.g., "image/png" -> "png")
561
+ image_format = mime_type.split('/')[1]
562
+
563
+ # Return Image with binary data
564
+ return Image(data=image_data, format=image_format)
565
+ except Exception as e:
566
+ logger.error(f'Error processing image file: {e}')
567
+ return json.dumps(
568
+ {
569
+ 'status': 'error',
570
+ 'message': f'Error processing image file: {str(e)}',
571
+ }
572
+ )
573
+
574
+ # For text files, return the content as a string
575
+ try:
576
+ with open(filepath, 'r', encoding='utf-8') as f:
577
+ content = f.read()
578
+ return content
579
+ except UnicodeDecodeError:
580
+ # If we can't decode as text, it's likely a binary file
581
+ return json.dumps(
582
+ {
583
+ 'status': 'error',
584
+ 'message': f'File appears to be binary and not an image: {filepath}',
585
+ }
586
+ )
587
+
588
+ except Exception as e:
589
+ logger.error(f'Error accessing file or directory: {e}')
590
+ return json.dumps(
591
+ {
592
+ 'status': 'error',
593
+ 'message': f'Error accessing file or directory: {str(e)}',
594
+ }
595
+ )
596
+
597
+
598
+ @mcp.tool(name='search_research_repository')
599
+ async def mcp_search_repository(
600
+ ctx: Context,
601
+ index_path: str = Field(description='Name of the repository or path to the index to search'),
602
+ query: str = Field(description='The search query to use for semantic search'),
603
+ limit: int = Field(default=10, description='Maximum number of results to return'),
604
+ threshold: float = Field(
605
+ default=0.0, description='Minimum similarity score threshold (0.0 to 1.0)'
606
+ ),
607
+ ) -> Dict:
608
+ """Perform semantic search within an indexed repository.
609
+
610
+ This tool searches an indexed repository using semantic search with Amazon Bedrock embeddings.
611
+ It returns results ranked by relevance to the query.
612
+
613
+ Args:
614
+ ctx: MCP context object used for error reporting
615
+ index_path: Name of the repository or path to the index to search
616
+ query: The search query to use for semantic search
617
+ limit: Maximum number of results to return
618
+ threshold: Minimum similarity score threshold (0.0 to 1.0)
619
+
620
+ Returns:
621
+ Search results ranked by relevance to the query
622
+ """
623
+ logger.info(f'Searching repository: {index_path} for query: {query}')
624
+
625
+ # Convert repository name with slashes to underscores for file path compatibility
626
+ normalized_index_path = str(index_path).replace('/', '_')
627
+ if normalized_index_path != index_path:
628
+ logger.info(f'Normalized index path: {normalized_index_path}')
629
+
630
+ try:
631
+ # Record start time
632
+ start_time = datetime.now()
633
+
634
+ # Get AWS credentials from environment variables
635
+ aws_region = os.environ.get('AWS_REGION')
636
+ aws_profile = os.environ.get('AWS_PROFILE')
637
+
638
+ # Get the repository searcher
639
+ searcher = get_repository_searcher(
640
+ aws_region=aws_region,
641
+ aws_profile=aws_profile,
642
+ )
643
+
644
+ # Search the repository
645
+ response = searcher.search(
646
+ index_path=normalized_index_path,
647
+ query=query,
648
+ limit=limit,
649
+ threshold=threshold,
650
+ )
651
+
652
+ # Calculate execution time
653
+ execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
654
+
655
+ # Add execution time to the response
656
+ response_dict = response.model_dump()
657
+ response_dict['execution_time_ms'] = execution_time_ms
658
+
659
+ # Return the response
660
+ return response_dict
661
+ except Exception as e:
662
+ logger.error(f'Error searching repository: {e}')
663
+ await ctx.error(f'Error searching repository: {str(e)}')
664
+ raise
665
+
666
+
667
+ @mcp.tool(name='search_research_repository_suggestions')
668
+ async def mcp_search_github_repos(
669
+ ctx: Context,
670
+ keywords: List[str] = Field(description='List of keywords to search for GitHub repositories'),
671
+ num_results: int = Field(default=5, description='Number of results to return'),
672
+ ) -> Dict:
673
+ """Search for GitHub repositories based on keywords, scoped to specific organizations.
674
+
675
+ This tool searches for GitHub repositories using the GitHub REST/GraphQL APIs, scoped to specific GitHub
676
+ organizations (aws-samples, aws-solutions-library-samples, and awslabs).
677
+
678
+ Results are filtered to only include repositories with specific licenses (Apache License 2.0,
679
+ MIT, and MIT No Attribution) and are sorted by stars (descending) and then by updated date.
680
+
681
+ For higher rate limits, you can set the GITHUB_TOKEN environment variable with a GitHub
682
+ personal access token. Without a token, the API is limited to 60 requests per hour, and requests are
683
+ made with the REST API. With a token, this increases to 5,000 requests per hour, and requests are made
684
+ with the GraphQL API.
685
+
686
+ Args:
687
+ ctx: MCP context object used for error reporting
688
+ keywords: List of keywords to search for
689
+ num_results: Number of results to return
690
+
691
+ Returns:
692
+ List of GitHub repositories matching the search criteria
693
+ """
694
+ logger.info(f'Searching for GitHub repositories with keywords: {keywords}')
695
+
696
+ try:
697
+ # Record start time
698
+ start_time = datetime.now()
699
+
700
+ # Get GitHub token from environment variables
701
+ github_token = os.environ.get('GITHUB_TOKEN')
702
+
703
+ # Log whether we're using authenticated or unauthenticated mode
704
+ if github_token:
705
+ logger.info('Using authenticated GitHub API (higher rate limits)')
706
+ else:
707
+ logger.info('Using unauthenticated GitHub API (lower rate limits)')
708
+
709
+ # Define fixed values for organizations and license filters
710
+ organizations = ['aws-samples', 'aws-solutions-library-samples', 'awslabs']
711
+ license_filter = ['Apache License 2.0', 'MIT', 'MIT No Attribution']
712
+
713
+ # Call the search function
714
+ results = github_repo_search_wrapper(
715
+ keywords=keywords,
716
+ organizations=organizations,
717
+ num_results=num_results,
718
+ license_filter=license_filter,
719
+ )
720
+
721
+ # Calculate execution time
722
+ execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
723
+
724
+ # Convert results to GitHubRepoSearchResult objects
725
+ repo_results = []
726
+ for result in results:
727
+ # Include all available fields
728
+ repo_results.append(
729
+ GitHubRepoSearchResult(
730
+ url=result['url'],
731
+ title=result['title'],
732
+ description=result.get('description'),
733
+ organization=result['organization'],
734
+ stars=result.get('stars'),
735
+ updated_at=result.get('updated_at'),
736
+ language=result.get('language'),
737
+ topics=result.get('topics'),
738
+ license=result.get('license'),
739
+ forks=result.get('forks'),
740
+ open_issues=result.get('open_issues'),
741
+ homepage=result.get('homepage'),
742
+ )
743
+ )
744
+
745
+ # Create response object
746
+ response = GitHubRepoSearchResponse(
747
+ status='success',
748
+ query=' '.join(keywords) if isinstance(keywords, list) else keywords,
749
+ organizations=organizations, # Using the organizations defined above
750
+ results=repo_results,
751
+ total_results=len(repo_results),
752
+ execution_time_ms=execution_time_ms,
753
+ )
754
+
755
+ # Return the response
756
+ return response.model_dump()
757
+ except Exception as e:
758
+ logger.error(f'Error searching for GitHub repositories: {e}')
759
+ await ctx.error(f'Error searching for GitHub repositories: {str(e)}')
760
+ raise
761
+
762
+
763
+ @mcp.tool(name='access_file')
764
+ async def mcp_access_file(
765
+ ctx: Context,
766
+ filepath: str = Field(description='Path to the file or directory to access'),
767
+ ) -> Dict | ImageContent:
768
+ """Access file or directory contents.
769
+
770
+ This tool provides access to file or directory contents:
771
+ - If the filepath references a text file, returns the content as a string
772
+ - If the filepath references a directory, returns an array of files in the directory
773
+ - If the filepath references a binary image (jpg, png), returns the image data
774
+
775
+ For repository files, use the format: repository_name/repository/path/to/file
776
+ Example: awslabs_mcp/repository/README.md
777
+
778
+ For repositories with organization names, both formats are supported:
779
+ - awslabs_mcp/repository/README.md (with underscore)
780
+ - awslabs/mcp/repository/README.md (with slash)
781
+
782
+ Args:
783
+ ctx: MCP context object used for error reporting
784
+ filepath: Path to the file or directory to access
785
+
786
+ Returns:
787
+ File content, directory listing, or image data
788
+ """
789
+ logger.info(f'Tool: Accessing file or directory: {filepath}')
790
+
791
+ try:
792
+ # Use the existing access_file_or_directory function
793
+ result = await access_file_or_directory(filepath)
794
+
795
+ # Handle different result types
796
+ if isinstance(result, str):
797
+ if result.startswith('{'):
798
+ # It's a JSON string (error or directory listing)
799
+ return json.loads(result)
800
+ else:
801
+ # It's a file content string
802
+ return {'status': 'success', 'type': 'text', 'content': result}
803
+ elif isinstance(result, Image):
804
+ # It's an image
805
+ return result.to_image_content()
806
+ else:
807
+ # Unknown type
808
+ return {
809
+ 'status': 'error',
810
+ 'message': f'Unknown result type: {type(result)}',
811
+ }
812
+ except Exception as e:
813
+ # Ensure exceptions are properly raised for the test case
814
+ logger.error(f'Error in mcp_access_file: {e}')
815
+ await ctx.error(f'Error accessing file or directory: {str(e)}')
816
+ raise Exception(f'Error accessing file: {str(e)}')
817
+
818
+
819
+ @mcp.tool(name='delete_research_repository')
820
+ async def mcp_delete_repository(
821
+ ctx: Context,
822
+ repository_name_or_path: str = Field(
823
+ description='Name of the repository or path to the index to delete'
824
+ ),
825
+ index_directory: Optional[str] = Field(
826
+ default=None,
827
+ description='Directory to look for indices (optional, uses default if not provided)',
828
+ ),
829
+ ) -> Dict:
830
+ """Delete an indexed repository.
831
+
832
+ This tool deletes an indexed repository and its associated files.
833
+ It can be identified by repository name or the full path to the index.
834
+
835
+ Args:
836
+ ctx: MCP context object used for error reporting
837
+ repository_name_or_path: Name of the repository or path to the index to delete
838
+ index_directory: Directory to look for indices (optional, uses default if not provided)
839
+
840
+ Returns:
841
+ Status of the delete operation
842
+ """
843
+ logger.info(f'Deleting repository: {repository_name_or_path}')
844
+
845
+ # Convert repository name with slashes to underscores for file path compatibility
846
+ normalized_repo_name = str(repository_name_or_path).replace('/', '_')
847
+ logger.info(f'Normalized repository name: {normalized_repo_name}')
848
+
849
+ # Properly await the info call
850
+ await ctx.info(f'Deleting repository: {normalized_repo_name}')
851
+
852
+ # Ensure index_directory is None or a string, not a Field
853
+ index_dir = None if index_directory is None else str(index_directory)
854
+
855
+ try:
856
+ # Record start time
857
+ start_time = datetime.now()
858
+
859
+ # Delete the repository
860
+ result = await delete_indexed_repository(
861
+ repository_name_or_path=normalized_repo_name,
862
+ index_dir=index_dir,
863
+ )
864
+
865
+ # Calculate execution time
866
+ execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
867
+
868
+ # Create response with all available fields
869
+ response_data = {
870
+ 'status': result['status'],
871
+ 'message': result['message'],
872
+ 'repository_name': result.get('repository_name'),
873
+ 'execution_time_ms': execution_time_ms,
874
+ }
875
+
876
+ # Add optional fields if they exist in the result
877
+ if 'deleted_files' in result:
878
+ response_data['deleted_files'] = result['deleted_files']
879
+ if 'errors' in result:
880
+ response_data['errors'] = result['errors']
881
+ if 'permission_issues' in result:
882
+ response_data['permission_issues'] = result['permission_issues']
883
+
884
+ # Create response object
885
+ response = DeleteRepositoryResponse(**response_data)
886
+
887
+ # Return the response
888
+ return response.model_dump()
889
+ except Exception as e:
890
+ logger.error(f'Error deleting repository: {e}')
891
+ await ctx.error(f'Error deleting repository: {str(e)}')
892
+ raise
893
+
894
+
895
+ def main():
896
+ """Run the MCP server with CLI argument support."""
897
+ parser = argparse.ArgumentParser(
898
+ description='An AWS Labs Model Context Protocol (MCP) server for researching git repositories'
899
+ )
900
+ parser.add_argument('--sse', action='store_true', help='Use SSE transport')
901
+ parser.add_argument('--port', type=int, default=8888, help='Port to run the server on')
902
+
903
+ args = parser.parse_args()
904
+
905
+ # Run server with appropriate transport
906
+ if args.sse:
907
+ mcp.settings.port = args.port
908
+ mcp.run(transport='sse')
909
+ else:
910
+ mcp.run()
911
+
912
+
913
+ if __name__ == '__main__':
914
+ main()