nia-mcp-server 1.0.18__py3-none-any.whl → 1.0.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nia-mcp-server might be problematic. Click here for more details.

nia_mcp_server/server.py CHANGED
@@ -471,8 +471,8 @@ async def search_documentation(
471
471
  text=f"❌ Error searching documentation: {str(e)}"
472
472
  )]
473
473
 
474
- @mcp.tool()
475
- async def list_repositories() -> List[TextContent]:
474
+ # @mcp.tool()
475
+ # async def list_repositories() -> List[TextContent]:
476
476
  """
477
477
  List all indexed repositories.
478
478
 
@@ -555,8 +555,8 @@ async def list_repositories() -> List[TextContent]:
555
555
  text=f"❌ Error listing repositories: {error_msg}"
556
556
  )]
557
557
 
558
- @mcp.tool()
559
- async def check_repository_status(repository: str) -> List[TextContent]:
558
+ # @mcp.tool()
559
+ # async def check_repository_status(repository: str) -> List[TextContent]:
560
560
  """
561
561
  Check the indexing status of a repository.
562
562
 
@@ -704,8 +704,8 @@ async def index_documentation(
704
704
  text=f"❌ Error indexing documentation: {str(e)}"
705
705
  )]
706
706
 
707
- @mcp.tool()
708
- async def list_documentation() -> List[TextContent]:
707
+ # @mcp.tool()
708
+ # async def list_documentation() -> List[TextContent]:
709
709
  """
710
710
  List all indexed documentation sources.
711
711
 
@@ -763,8 +763,8 @@ async def list_documentation() -> List[TextContent]:
763
763
  text=f"❌ Error listing documentation: {str(e)}"
764
764
  )]
765
765
 
766
- @mcp.tool()
767
- async def check_documentation_status(source_id: str) -> List[TextContent]:
766
+ # @mcp.tool()
767
+ # async def check_documentation_status(source_id: str) -> List[TextContent]:
768
768
  """
769
769
  Check the indexing status of a documentation source.
770
770
 
@@ -829,8 +829,384 @@ async def check_documentation_status(source_id: str) -> List[TextContent]:
829
829
  text=f"❌ Error checking documentation status: {str(e)}"
830
830
  )]
831
831
 
832
+ # Combined Resource Management Tools
833
+
834
+ @mcp.tool()
835
+ async def rename_resource(
836
+ resource_type: str,
837
+ identifier: str,
838
+ new_name: str
839
+ ) -> List[TextContent]:
840
+ """
841
+ Rename a resource (repository or documentation) for better organization.
842
+
843
+ Args:
844
+ resource_type: Type of resource - "repository" or "documentation"
845
+ identifier:
846
+ - For repository: Repository in owner/repo format (e.g., "facebook/react")
847
+ - For documentation: Documentation source ID
848
+ new_name: New display name for the resource (1-100 characters)
849
+
850
+ Returns:
851
+ Confirmation of rename operation
852
+
853
+ Examples:
854
+ - rename_resource("repository", "facebook/react", "React Framework")
855
+ - rename_resource("documentation", "doc-id-123", "Python Official Docs")
856
+ """
857
+ try:
858
+ # Validate resource type
859
+ if resource_type not in ["repository", "documentation"]:
860
+ return [TextContent(
861
+ type="text",
862
+ text=f"❌ Invalid resource_type: '{resource_type}'. Must be 'repository' or 'documentation'."
863
+ )]
864
+
865
+ # Validate name length
866
+ if not new_name or len(new_name) > 100:
867
+ return [TextContent(
868
+ type="text",
869
+ text="❌ Display name must be between 1 and 100 characters."
870
+ )]
871
+
872
+ client = await ensure_api_client()
873
+
874
+ if resource_type == "repository":
875
+ result = await client.rename_repository(identifier, new_name)
876
+ resource_desc = f"repository '{identifier}'"
877
+ else: # documentation
878
+ result = await client.rename_data_source(identifier, new_name)
879
+ resource_desc = f"documentation source"
880
+
881
+ if result.get("success"):
882
+ return [TextContent(
883
+ type="text",
884
+ text=f"✅ Successfully renamed {resource_desc} to '{new_name}'"
885
+ )]
886
+ else:
887
+ return [TextContent(
888
+ type="text",
889
+ text=f"❌ Failed to rename {resource_type}: {result.get('message', 'Unknown error')}"
890
+ )]
891
+
892
+ except APIError as e:
893
+ logger.error(f"API Error renaming {resource_type}: {e}")
894
+ error_msg = f"❌ {str(e)}"
895
+ if e.status_code == 403 and "lifetime limit" in str(e).lower():
896
+ error_msg += "\n\n💡 Tip: You've reached the free tier limit. Upgrade to Pro for unlimited access."
897
+ return [TextContent(type="text", text=error_msg)]
898
+ except Exception as e:
899
+ logger.error(f"Error renaming {resource_type}: {e}")
900
+ return [TextContent(
901
+ type="text",
902
+ text=f"❌ Error renaming {resource_type}: {str(e)}"
903
+ )]
904
+
905
+ @mcp.tool()
906
+ async def delete_resource(
907
+ resource_type: str,
908
+ identifier: str
909
+ ) -> List[TextContent]:
910
+ """
911
+ Delete an indexed resource (repository or documentation).
912
+
913
+ Args:
914
+ resource_type: Type of resource - "repository" or "documentation"
915
+ identifier:
916
+ - For repository: Repository in owner/repo format (e.g., "facebook/react")
917
+ - For documentation: Documentation source ID
918
+
919
+ Returns:
920
+ Confirmation of deletion
921
+
922
+ Examples:
923
+ - delete_resource("repository", "facebook/react")
924
+ - delete_resource("documentation", "doc-id-123")
925
+ """
926
+ try:
927
+ # Validate resource type
928
+ if resource_type not in ["repository", "documentation"]:
929
+ return [TextContent(
930
+ type="text",
931
+ text=f"❌ Invalid resource_type: '{resource_type}'. Must be 'repository' or 'documentation'."
932
+ )]
933
+
934
+ client = await ensure_api_client()
935
+
936
+ if resource_type == "repository":
937
+ success = await client.delete_repository(identifier)
938
+ resource_desc = f"repository: {identifier}"
939
+ else: # documentation
940
+ success = await client.delete_data_source(identifier)
941
+ resource_desc = f"documentation source: {identifier}"
942
+
943
+ if success:
944
+ return [TextContent(
945
+ type="text",
946
+ text=f"✅ Successfully deleted {resource_desc}"
947
+ )]
948
+ else:
949
+ return [TextContent(
950
+ type="text",
951
+ text=f"❌ Failed to delete {resource_desc}"
952
+ )]
953
+
954
+ except APIError as e:
955
+ logger.error(f"API Error deleting {resource_type}: {e}")
956
+ error_msg = f"❌ {str(e)}"
957
+ if e.status_code == 403 and "lifetime limit" in str(e).lower():
958
+ error_msg += "\n\n💡 Tip: You've reached the free tier limit of 3 indexing operations. Upgrade to Pro for unlimited access."
959
+ return [TextContent(type="text", text=error_msg)]
960
+ except Exception as e:
961
+ logger.error(f"Error deleting {resource_type}: {e}")
962
+ return [TextContent(
963
+ type="text",
964
+ text=f"❌ Error deleting {resource_type}: {str(e)}"
965
+ )]
966
+
832
967
  @mcp.tool()
833
- async def delete_documentation(source_id: str) -> List[TextContent]:
968
+ async def check_resource_status(
969
+ resource_type: str,
970
+ identifier: str
971
+ ) -> List[TextContent]:
972
+ """
973
+ Check the indexing status of a resource (repository or documentation).
974
+
975
+ Args:
976
+ resource_type: Type of resource - "repository" or "documentation"
977
+ identifier:
978
+ - For repository: Repository in owner/repo format (e.g., "facebook/react")
979
+ - For documentation: Documentation source ID
980
+
981
+ Returns:
982
+ Current status of the resource
983
+
984
+ Examples:
985
+ - check_resource_status("repository", "facebook/react")
986
+ - check_resource_status("documentation", "doc-id-123")
987
+ """
988
+ try:
989
+ # Validate resource type
990
+ if resource_type not in ["repository", "documentation"]:
991
+ return [TextContent(
992
+ type="text",
993
+ text=f"❌ Invalid resource_type: '{resource_type}'. Must be 'repository' or 'documentation'."
994
+ )]
995
+
996
+ client = await ensure_api_client()
997
+
998
+ if resource_type == "repository":
999
+ status = await client.get_repository_status(identifier)
1000
+ if not status:
1001
+ return [TextContent(
1002
+ type="text",
1003
+ text=f"❌ Repository '{identifier}' not found."
1004
+ )]
1005
+ title = f"Repository Status: {identifier}"
1006
+ status_key = "status"
1007
+ else: # documentation
1008
+ status = await client.get_data_source_status(identifier)
1009
+ if not status:
1010
+ return [TextContent(
1011
+ type="text",
1012
+ text=f"❌ Documentation source '{identifier}' not found."
1013
+ )]
1014
+ title = f"Documentation Status: {status.get('url', 'Unknown URL')}"
1015
+ status_key = "status"
1016
+
1017
+ # Format status with appropriate icon
1018
+ status_text = status.get(status_key, "unknown")
1019
+ status_icon = {
1020
+ "completed": "✅",
1021
+ "indexing": "⏳",
1022
+ "processing": "⏳",
1023
+ "failed": "❌",
1024
+ "pending": "🔄",
1025
+ "error": "❌"
1026
+ }.get(status_text, "❓")
1027
+
1028
+ lines = [
1029
+ f"# {title}\n",
1030
+ f"{status_icon} **Status:** {status_text}"
1031
+ ]
1032
+
1033
+ # Add resource-specific fields
1034
+ if resource_type == "repository":
1035
+ lines.append(f"**Branch:** {status.get('branch', 'main')}")
1036
+ if status.get("progress"):
1037
+ progress = status["progress"]
1038
+ if isinstance(progress, dict):
1039
+ lines.append(f"**Progress:** {progress.get('percentage', 0)}%")
1040
+ if progress.get("stage"):
1041
+ lines.append(f"**Stage:** {progress['stage']}")
1042
+ else: # documentation
1043
+ lines.append(f"**Source ID:** {identifier}")
1044
+ if status.get("page_count", 0) > 0:
1045
+ lines.append(f"**Pages Indexed:** {status['page_count']}")
1046
+ if status.get("details"):
1047
+ details = status["details"]
1048
+ if details.get("progress"):
1049
+ lines.append(f"**Progress:** {details['progress']}%")
1050
+ if details.get("stage"):
1051
+ lines.append(f"**Stage:** {details['stage']}")
1052
+
1053
+ # Common fields
1054
+ if status.get("indexed_at"):
1055
+ lines.append(f"**Indexed:** {status['indexed_at']}")
1056
+ elif status.get("created_at"):
1057
+ lines.append(f"**Created:** {status['created_at']}")
1058
+
1059
+ if status.get("error"):
1060
+ lines.append(f"**Error:** {status['error']}")
1061
+
1062
+ return [TextContent(type="text", text="\n".join(lines))]
1063
+
1064
+ except APIError as e:
1065
+ logger.error(f"API Error checking {resource_type} status: {e}")
1066
+ error_msg = f"❌ {str(e)}"
1067
+ if e.status_code == 403 and "lifetime limit" in str(e).lower():
1068
+ error_msg += "\n\n💡 Tip: You've reached the free tier limit of 3 indexing operations. Upgrade to Pro for unlimited access."
1069
+ return [TextContent(type="text", text=error_msg)]
1070
+ except Exception as e:
1071
+ logger.error(f"Error checking {resource_type} status: {e}")
1072
+ return [TextContent(
1073
+ type="text",
1074
+ text=f"❌ Error checking {resource_type} status: {str(e)}"
1075
+ )]
1076
+
1077
+ @mcp.tool()
1078
+ async def list_resources(
1079
+ resource_type: Optional[str] = None
1080
+ ) -> List[TextContent]:
1081
+ """
1082
+ List indexed resources (repositories and/or documentation).
1083
+
1084
+ Args:
1085
+ resource_type: Optional filter - "repository", "documentation", or None for all
1086
+
1087
+ Returns:
1088
+ List of indexed resources with their status
1089
+
1090
+ Examples:
1091
+ - list_resources() - List all resources
1092
+ - list_resources("repository") - List only repositories
1093
+ - list_resources("documentation") - List only documentation
1094
+ """
1095
+ try:
1096
+ # Validate resource type if provided
1097
+ if resource_type and resource_type not in ["repository", "documentation"]:
1098
+ return [TextContent(
1099
+ type="text",
1100
+ text=f"❌ Invalid resource_type: '{resource_type}'. Must be 'repository', 'documentation', or None for all."
1101
+ )]
1102
+
1103
+ client = await ensure_api_client()
1104
+ lines = []
1105
+
1106
+ # Determine what to list
1107
+ list_repos = resource_type in [None, "repository"]
1108
+ list_docs = resource_type in [None, "documentation"]
1109
+
1110
+ if list_repos:
1111
+ repositories = await client.list_repositories()
1112
+
1113
+ if repositories:
1114
+ lines.append("# Indexed Repositories\n")
1115
+ for repo in repositories:
1116
+ status_icon = "✅" if repo.get("status") == "completed" else "⏳"
1117
+
1118
+ # Show display name if available, otherwise show repository
1119
+ display_name = repo.get("display_name")
1120
+ repo_name = repo['repository']
1121
+
1122
+ if display_name:
1123
+ lines.append(f"\n## {status_icon} {display_name}")
1124
+ lines.append(f"- **Repository:** {repo_name}")
1125
+ else:
1126
+ lines.append(f"\n## {status_icon} {repo_name}")
1127
+
1128
+ lines.append(f"- **Branch:** {repo.get('branch', 'main')}")
1129
+ lines.append(f"- **Status:** {repo.get('status', 'unknown')}")
1130
+ if repo.get("indexed_at"):
1131
+ lines.append(f"- **Indexed:** {repo['indexed_at']}")
1132
+ if repo.get("error"):
1133
+ lines.append(f"- **Error:** {repo['error']}")
1134
+
1135
+ # Add usage hint for completed repositories
1136
+ if repo.get("status") == "completed":
1137
+ lines.append(f"- **Usage:** `search_codebase(query, [\"{repo_name}\"])`")
1138
+ elif resource_type == "repository":
1139
+ lines.append("No indexed repositories found.\n\n")
1140
+ lines.append("Get started by indexing a repository:\n")
1141
+ lines.append("Use `index_repository` with a GitHub URL.")
1142
+
1143
+ if list_docs:
1144
+ sources = await client.list_data_sources()
1145
+
1146
+ if sources:
1147
+ if lines: # Add separator if we already have repositories
1148
+ lines.append("\n---\n")
1149
+ lines.append("# Indexed Documentation\n")
1150
+
1151
+ for source in sources:
1152
+ status_icon = "✅" if source.get("status") == "completed" else "⏳"
1153
+
1154
+ # Show display name if available, otherwise show URL
1155
+ display_name = source.get("display_name")
1156
+ url = source.get('url', 'Unknown URL')
1157
+
1158
+ if display_name:
1159
+ lines.append(f"\n## {status_icon} {display_name}")
1160
+ lines.append(f"- **URL:** {url}")
1161
+ else:
1162
+ lines.append(f"\n## {status_icon} {url}")
1163
+
1164
+ lines.append(f"- **ID:** {source['id']}")
1165
+ lines.append(f"- **Status:** {source.get('status', 'unknown')}")
1166
+ lines.append(f"- **Type:** {source.get('source_type', 'web')}")
1167
+ if source.get("page_count", 0) > 0:
1168
+ lines.append(f"- **Pages:** {source['page_count']}")
1169
+ if source.get("created_at"):
1170
+ lines.append(f"- **Created:** {source['created_at']}")
1171
+ elif resource_type == "documentation":
1172
+ lines.append("No indexed documentation found.\n\n")
1173
+ lines.append("Get started by indexing documentation:\n")
1174
+ lines.append("Use `index_documentation` with a URL.")
1175
+
1176
+ if not lines:
1177
+ lines.append("No indexed resources found.\n\n")
1178
+ lines.append("Get started by indexing:\n")
1179
+ lines.append("- Use `index_repository` for GitHub repositories\n")
1180
+ lines.append("- Use `index_documentation` for documentation sites")
1181
+
1182
+ return [TextContent(type="text", text="\n".join(lines))]
1183
+
1184
+ except APIError as e:
1185
+ logger.error(f"API Error listing resources: {e}")
1186
+ error_msg = f"❌ {str(e)}"
1187
+ if e.status_code == 403 or "free tier limit" in str(e).lower():
1188
+ if e.detail and "3 free indexing operations" in e.detail:
1189
+ error_msg = f"❌ {e.detail}\n\n💡 Tip: Upgrade to Pro at https://trynia.ai/billing for unlimited indexing."
1190
+ else:
1191
+ error_msg += "\n\n💡 Tip: You've reached the free tier limit. Upgrade to Pro for unlimited access."
1192
+ return [TextContent(type="text", text=error_msg)]
1193
+ except Exception as e:
1194
+ logger.error(f"Unexpected error listing resources: {e}")
1195
+ error_msg = str(e)
1196
+ if "indexing operations" in error_msg.lower() or "lifetime limit" in error_msg.lower():
1197
+ return [TextContent(
1198
+ type="text",
1199
+ text=f"❌ {error_msg}\n\n💡 Tip: Upgrade to Pro at https://trynia.ai/billing for unlimited indexing."
1200
+ )]
1201
+ return [TextContent(
1202
+ type="text",
1203
+ text=f"❌ Error listing resources: {error_msg}"
1204
+ )]
1205
+
1206
+ # Old individual tools (to be commented out after testing)
1207
+
1208
+ # @mcp.tool()
1209
+ # async def delete_documentation(source_id: str) -> List[TextContent]:
834
1210
  """
835
1211
  Delete an indexed documentation source.
836
1212
 
@@ -868,8 +1244,8 @@ async def delete_documentation(source_id: str) -> List[TextContent]:
868
1244
  text=f"❌ Error deleting documentation: {str(e)}"
869
1245
  )]
870
1246
 
871
- @mcp.tool()
872
- async def delete_repository(repository: str) -> List[TextContent]:
1247
+ # @mcp.tool()
1248
+ # async def delete_repository(repository: str) -> List[TextContent]:
873
1249
  """
874
1250
  Delete an indexed repository.
875
1251
 
@@ -907,8 +1283,8 @@ async def delete_repository(repository: str) -> List[TextContent]:
907
1283
  text=f"❌ Error deleting repository: {str(e)}"
908
1284
  )]
909
1285
 
910
- @mcp.tool()
911
- async def rename_repository(repository: str, new_name: str) -> List[TextContent]:
1286
+ # @mcp.tool()
1287
+ # async def rename_repository(repository: str, new_name: str) -> List[TextContent]:
912
1288
  """
913
1289
  Rename an indexed repository for better organization.
914
1290
 
@@ -954,8 +1330,8 @@ async def rename_repository(repository: str, new_name: str) -> List[TextContent]
954
1330
  text=f"❌ Error renaming repository: {str(e)}"
955
1331
  )]
956
1332
 
957
- @mcp.tool()
958
- async def rename_documentation(source_id: str, new_name: str) -> List[TextContent]:
1333
+ # @mcp.tool()
1334
+ # async def rename_documentation(source_id: str, new_name: str) -> List[TextContent]:
959
1335
  """
960
1336
  Rename a documentation source for better organization.
961
1337
 
@@ -1633,406 +2009,814 @@ async def read_source_content(
1633
2009
  text=f"❌ Error reading source content: {str(e)}"
1634
2010
  )]
1635
2011
 
2012
+ # @mcp.tool()
2013
+ # async def index_local_filesystem(
2014
+ # directory_path: str,
2015
+ # inclusion_patterns: Optional[List[str]] = None,
2016
+ # exclusion_patterns: Optional[List[str]] = None,
2017
+ # max_file_size_mb: int = 50
2018
+ # ) -> List[TextContent]:
2019
+ # """
2020
+ # Index a local filesystem directory for intelligent search.
2021
+ #
2022
+ # Args:
2023
+ # directory_path: Absolute path to the directory to index
2024
+ # inclusion_patterns: Optional list of patterns to include (e.g., ["ext:.py", "dir:src"])
2025
+ # exclusion_patterns: Optional list of patterns to exclude (e.g., ["dir:node_modules", "ext:.log"])
2026
+ # max_file_size_mb: Maximum file size in MB to process (default: 50)
2027
+ #
2028
+ # Returns:
2029
+ # Status of the indexing operation
2030
+ #
2031
+ # Important:
2032
+ # - Path must be absolute (e.g., /Users/username/projects/myproject)
2033
+ # - When indexing starts, use check_local_filesystem_status tool to monitor progress
2034
+ # """
2035
+ # try:
2036
+ # # Validate absolute path
2037
+ # if not os.path.isabs(directory_path):
2038
+ # return [TextContent(
2039
+ # type="text",
2040
+ # text=f"❌ Error: directory_path must be an absolute path. Got: {directory_path}\n\n"
2041
+ # f"Example: /Users/username/projects/myproject"
2042
+ # )]
2043
+ #
2044
+ # client = await ensure_api_client()
2045
+ #
2046
+ # # Start indexing
2047
+ # logger.info(f"Starting to index local directory: {directory_path}")
2048
+ # result = await client.index_local_filesystem(
2049
+ # directory_path=directory_path,
2050
+ # inclusion_patterns=inclusion_patterns or [],
2051
+ # exclusion_patterns=exclusion_patterns or [],
2052
+ # max_file_size_mb=max_file_size_mb
2053
+ # )
2054
+ #
2055
+ # if result.get("success"):
2056
+ # source_id = result["data"]["source_id"]
2057
+ # status_url = result["data"]["status_url"]
2058
+ #
2059
+ # return [TextContent(
2060
+ # type="text",
2061
+ # text=(
2062
+ # f"✅ Successfully started indexing local directory!\n\n"
2063
+ # f"📁 **Directory:** `{directory_path}`\n"
2064
+ # f"🆔 **Source ID:** `{source_id}`\n"
2065
+ # f"📊 **Status:** Processing\n\n"
2066
+ # f"**What happens next:**\n"
2067
+ # f"• NIA is scanning and indexing your files in the background\n"
2068
+ # f"• This process typically takes a few minutes depending on directory size\n"
2069
+ # f"• Use `check_local_filesystem_status` with source ID `{source_id}` to monitor progress\n"
2070
+ # f"• Once indexed, use `search_codebase` or `search_documentation` to search your files\n\n"
2071
+ # f"📌 **Tip:** You can check the status at any time or visit [app.trynia.ai](https://app.trynia.ai) to monitor progress."
2072
+ # )
2073
+ # )]
2074
+ # else:
2075
+ # return [TextContent(
2076
+ # type="text",
2077
+ # text=f"❌ Failed to start indexing: {result.get('detail', 'Unknown error')}"
2078
+ # )]
2079
+ #
2080
+ # except APIError as e:
2081
+ # logger.error(f"API error indexing local filesystem: {e}")
2082
+ # return [TextContent(
2083
+ # type="text",
2084
+ # text=f"❌ API Error: {str(e)}\n\nStatus Code: {e.status_code}\nDetails: {e.detail}"
2085
+ # )]
2086
+ # except Exception as e:
2087
+ # logger.error(f"Unexpected error indexing local filesystem: {e}")
2088
+ # return [TextContent(
2089
+ # type="text",
2090
+ # text=f"❌ Error: An unexpected error occurred while indexing the directory: {str(e)}"
2091
+ # )]
2092
+
2093
+ # @mcp.tool()
2094
+ # async def scan_local_filesystem(
2095
+ # directory_path: str,
2096
+ # inclusion_patterns: Optional[List[str]] = None,
2097
+ # exclusion_patterns: Optional[List[str]] = None,
2098
+ # max_file_size_mb: int = 50
2099
+ # ) -> List[TextContent]:
2100
+ # """
2101
+ # Scan a local filesystem directory to preview what files would be indexed.
2102
+ #
2103
+ # This tool helps you understand what files will be processed before actually indexing.
2104
+ #
2105
+ # Args:
2106
+ # directory_path: Absolute path to the directory to scan
2107
+ # inclusion_patterns: Optional list of patterns to include (e.g., ["ext:.py", "dir:src"])
2108
+ # exclusion_patterns: Optional list of patterns to exclude (e.g., ["dir:node_modules", "ext:.log"])
2109
+ # max_file_size_mb: Maximum file size in MB to process (default: 50)
2110
+ #
2111
+ # Returns:
2112
+ # Summary of files that would be indexed including count, size, and file types
2113
+ # """
2114
+ # try:
2115
+ # # Validate absolute path
2116
+ # if not os.path.isabs(directory_path):
2117
+ # return [TextContent(
2118
+ # type="text",
2119
+ # text=f"❌ Error: directory_path must be an absolute path. Got: {directory_path}\n\n"
2120
+ # f"Example: /Users/username/projects/myproject"
2121
+ # )]
2122
+ #
2123
+ # client = await ensure_api_client()
2124
+ #
2125
+ # logger.info(f"Scanning local directory: {directory_path}")
2126
+ # result = await client.scan_local_filesystem(
2127
+ # directory_path=directory_path,
2128
+ # inclusion_patterns=inclusion_patterns or [],
2129
+ # exclusion_patterns=exclusion_patterns or [],
2130
+ # max_file_size_mb=max_file_size_mb
2131
+ # )
2132
+ #
2133
+ # # Format the scan results
2134
+ # total_files = result.get("total_files", 0)
2135
+ # total_size_mb = result.get("total_size_mb", 0)
2136
+ # file_types = result.get("file_types", {})
2137
+ # files = result.get("files", [])
2138
+ # truncated = result.get("truncated", False)
2139
+ #
2140
+ # response = f"📊 **Local Directory Scan Results**\n\n"
2141
+ # response += f"📁 **Directory:** `{directory_path}`\n"
2142
+ # response += f"📄 **Total Files:** {total_files:,}\n"
2143
+ # response += f"💾 **Total Size:** {total_size_mb:.2f} MB\n\n"
2144
+ #
2145
+ # if file_types:
2146
+ # response += "**File Types:**\n"
2147
+ # # Sort by count descending
2148
+ # sorted_types = sorted(file_types.items(), key=lambda x: x[1], reverse=True)
2149
+ # for ext, count in sorted_types[:10]: # Show top 10
2150
+ # response += f"• `{ext}`: {count:,} files\n"
2151
+ # if len(sorted_types) > 10:
2152
+ # response += f"• ... and {len(sorted_types) - 10} more types\n"
2153
+ # response += "\n"
2154
+ #
2155
+ # if files:
2156
+ # response += f"**Largest Files (showing {min(len(files), 10)}):**\n"
2157
+ # for i, file_info in enumerate(files[:10]):
2158
+ # size_mb = file_info["size"] / (1024 * 1024)
2159
+ # response += f"{i+1}. `{file_info['path']}` ({size_mb:.2f} MB)\n"
2160
+ #
2161
+ # if truncated:
2162
+ # response += f"\n*Note: Showing first 100 files out of {total_files:,} total*\n"
2163
+ #
2164
+ # if inclusion_patterns:
2165
+ # response += f"\n**Inclusion Patterns:** {', '.join(f'`{p}`' for p in inclusion_patterns)}\n"
2166
+ # if exclusion_patterns:
2167
+ # response += f"**Exclusion Patterns:** {', '.join(f'`{p}`' for p in exclusion_patterns)}\n"
2168
+ #
2169
+ # response += "\n💡 **Next Step:** Use `index_local_filesystem` to index these files."
2170
+ #
2171
+ # return [TextContent(type="text", text=response)]
2172
+ #
2173
+ # except APIError as e:
2174
+ # logger.error(f"API error scanning local filesystem: {e}")
2175
+ # return [TextContent(
2176
+ # type="text",
2177
+ # text=f"❌ API Error: {str(e)}\n\nStatus Code: {e.status_code}\nDetails: {e.detail}"
2178
+ # )]
2179
+ # except Exception as e:
2180
+ # logger.error(f"Unexpected error scanning local filesystem: {e}")
2181
+ # return [TextContent(
2182
+ # type="text",
2183
+ # text=f"❌ Error: An unexpected error occurred while scanning: {str(e)}"
2184
+ # )]
2185
+
2186
+ # @mcp.tool()
2187
+ # async def check_local_filesystem_status(source_id: str) -> List[TextContent]:
2188
+ # """
2189
+ # Check the indexing status of a local filesystem source.
2190
+ #
2191
+ # Args:
2192
+ # source_id: The source ID returned when indexing was started
2193
+ #
2194
+ # Returns:
2195
+ # Current status of the local filesystem indexing
2196
+ # """
2197
+ # try:
2198
+ # client = await ensure_api_client()
2199
+ # status = await client.check_local_filesystem_status(source_id)
2200
+ #
2201
+ # # Format status response
2202
+ # status_text = status.get("status", "unknown")
2203
+ # progress = status.get("progress", 0)
2204
+ # message = status.get("message", "")
2205
+ # error = status.get("error")
2206
+ # directory_path = status.get("directory_path", "Unknown")
2207
+ # page_count = status.get("page_count", 0) # Number of files
2208
+ # chunk_count = status.get("chunk_count", 0)
2209
+ #
2210
+ # # Status emoji
2211
+ # status_emoji = {
2212
+ # "pending": "⏳",
2213
+ # "processing": "🔄",
2214
+ # "completed": "✅",
2215
+ # "failed": "❌",
2216
+ # "error": "❌"
2217
+ # }.get(status_text, "❓")
2218
+ #
2219
+ # response = f"{status_emoji} **Local Filesystem Status**\n\n"
2220
+ # response += f"🆔 **Source ID:** `{source_id}`\n"
2221
+ # response += f"📁 **Directory:** `{directory_path}`\n"
2222
+ # response += f"📊 **Status:** {status_text.capitalize()}\n"
2223
+ #
2224
+ # if progress > 0:
2225
+ # response += f"📈 **Progress:** {progress}%\n"
2226
+ #
2227
+ # if message:
2228
+ # response += f"💬 **Message:** {message}\n"
2229
+ #
2230
+ # if status_text == "completed":
2231
+ # response += f"\n✨ **Indexing Complete!**\n"
2232
+ # response += f"• **Files Indexed:** {page_count:,}\n"
2233
+ # response += f"• **Chunks Created:** {chunk_count:,}\n"
2234
+ # response += f"\nYou can now search this directory using `search_codebase` or the unified search!"
2235
+ # elif status_text in ["failed", "error"]:
2236
+ # response += f"\n❌ **Indexing Failed**\n"
2237
+ # if error:
2238
+ # response += f"**Error:** {error}\n"
2239
+ # response += "\nPlease check your directory path and try again."
2240
+ # elif status_text == "processing":
2241
+ # response += f"\n🔄 Indexing is in progress...\n"
2242
+ # response += "Check back in a few moments or monitor at [app.trynia.ai](https://app.trynia.ai)"
2243
+ #
2244
+ # return [TextContent(type="text", text=response)]
2245
+ #
2246
+ # except APIError as e:
2247
+ # logger.error(f"API error checking local filesystem status: {e}")
2248
+ # if e.status_code == 404:
2249
+ # return [TextContent(
2250
+ # type="text",
2251
+ # text=f"❌ Source ID `{source_id}` not found. Please check the ID and try again."
2252
+ # )]
2253
+ # return [TextContent(
2254
+ # type="text",
2255
+ # text=f"❌ API Error: {str(e)}\n\nStatus Code: {e.status_code}\nDetails: {e.detail}"
2256
+ # )]
2257
+ # except Exception as e:
2258
+ # logger.error(f"Unexpected error checking local filesystem status: {e}")
2259
+ # return [TextContent(
2260
+ # type="text",
2261
+ # text=f"❌ Error: An unexpected error occurred: {str(e)}"
2262
+ # )]
2263
+
2264
+ # @mcp.tool()
2265
+ # async def search_local_filesystem(
2266
+ # source_id: str,
2267
+ # query: str,
2268
+ # include_sources: bool = True
2269
+ # ) -> List[TextContent]:
2270
+ # """
2271
+ # Search an indexed local filesystem directory using its source ID.
2272
+ #
2273
+ # To search local files:
2274
+ # 1. First index a directory using `index_local_filesystem` - this will return a source_id
2275
+ # 2. Use that source_id with this tool to search the indexed content
2276
+ #
2277
+ # Args:
2278
+ # source_id: The source ID returned when the directory was indexed (required)
2279
+ # query: Your search query in natural language (required)
2280
+ # include_sources: Whether to include source code snippets in results (default: True)
2281
+ #
2282
+ # Returns:
2283
+ # Search results with relevant file snippets and explanations
2284
+ #
2285
+ # Example:
2286
+ # # After indexing returns source_id "abc123-def456"
2287
+ # search_local_filesystem(
2288
+ # source_id="abc123-def456",
2289
+ # query="configuration settings"
2290
+ # )
2291
+ #
2292
+ # Note: To find your source IDs, use `list_documentation` and look for
2293
+ # sources with source_type="local_filesystem"
2294
+ # """
2295
+ # try:
2296
+ # # Validate inputs
2297
+ # if not source_id:
2298
+ # return [TextContent(
2299
+ # type="text",
2300
+ # text="❌ Error: 'source_id' parameter is required. Use the ID returned from index_local_filesystem."
2301
+ # )]
2302
+ #
2303
+ # if not query:
2304
+ # return [TextContent(
2305
+ # type="text",
2306
+ # text="❌ Error: 'query' parameter is required"
2307
+ # )]
2308
+ #
2309
+ # client = await ensure_api_client()
2310
+ #
2311
+ # # Check if the source exists and is ready
2312
+ # logger.info(f"Checking status of source {source_id}")
2313
+ # try:
2314
+ # status = await client.get_data_source_status(source_id)
2315
+ # if not status:
2316
+ # return [TextContent(
2317
+ # type="text",
2318
+ # text=f"❌ Source ID '{source_id}' not found. Please check the ID and try again."
2319
+ # )]
2320
+ #
2321
+ # source_status = status.get("status", "unknown")
2322
+ # if source_status == "processing":
2323
+ # progress = status.get("progress", 0)
2324
+ # return [TextContent(
2325
+ # type="text",
2326
+ # text=f"⏳ This source is still being indexed ({progress}% complete).\n\n"
2327
+ # f"Use `check_local_filesystem_status(\"{source_id}\")` to check progress."
2328
+ # )]
2329
+ # elif source_status == "failed":
2330
+ # error = status.get("error", "Unknown error")
2331
+ # return [TextContent(
2332
+ # type="text",
2333
+ # text=f"❌ This source failed to index.\n\nError: {error}"
2334
+ # )]
2335
+ # elif source_status != "completed":
2336
+ # return [TextContent(
2337
+ # type="text",
2338
+ # text=f"❌ Source is not ready for search. Status: {source_status}"
2339
+ # )]
2340
+ # except Exception as e:
2341
+ # logger.warning(f"Could not check source status: {e}")
2342
+ # # Continue anyway in case it's just a status check issue
2343
+ #
2344
+ # # Perform the search
2345
+ # logger.info(f"Searching local filesystem source {source_id} with query: {query}")
2346
+ #
2347
+ # # Use the unified query endpoint with data_sources parameter
2348
+ # result = client.query_unified(
2349
+ # messages=[{"role": "user", "content": query}],
2350
+ # data_sources=[source_id],
2351
+ # include_sources=include_sources,
2352
+ # stream=False
2353
+ # )
2354
+ #
2355
+ # # Parse the response
2356
+ # response_text = ""
2357
+ # async for chunk in result:
2358
+ # data = json.loads(chunk)
2359
+ # if "content" in data:
2360
+ # response_text = data["content"]
2361
+ # sources = data.get("sources", [])
2362
+ # break
2363
+ #
2364
+ # # Format the response nicely for local filesystem results
2365
+ # if response_text:
2366
+ # # Extract the local filesystem results section if present
2367
+ # if "**Local filesystem results" in response_text:
2368
+ # # Keep the original response
2369
+ # formatted_response = response_text
2370
+ # else:
2371
+ # # Create our own formatted response
2372
+ # formatted_response = f"🔍 **Search Results for Local Directory**\n"
2373
+ # formatted_response += f"🔎 Query: \"{query}\"\n\n"
2374
+ # formatted_response += response_text
2375
+ #
2376
+ # # Add sources if available and requested
2377
+ # if include_sources and sources:
2378
+ # formatted_response += "\n\n**📄 Source Details:**\n"
2379
+ # for i, source in enumerate(sources[:5], 1):
2380
+ # metadata = source.get("metadata", {})
2381
+ # file_path = metadata.get("file_path", "Unknown file")
2382
+ # formatted_response += f"\n{i}. `{file_path}`\n"
2383
+ #
2384
+ # # Add snippet of content
2385
+ # content = source.get("content", "")
2386
+ # if content:
2387
+ # # Truncate to reasonable length
2388
+ # lines = content.split('\n')[:10]
2389
+ # snippet = '\n'.join(lines)
2390
+ # if len(lines) > 10:
2391
+ # snippet += "\n..."
2392
+ # formatted_response += f"```\n{snippet}\n```\n"
2393
+ #
2394
+ # return [TextContent(type="text", text=formatted_response)]
2395
+ # else:
2396
+ # return [TextContent(
2397
+ # type="text",
2398
+ # text=f"No results found for query: \"{query}\" in the indexed directory."
2399
+ # )]
2400
+ #
2401
+ # except APIError as e:
2402
+ # logger.error(f"API error searching local filesystem: {e}")
2403
+ # return [TextContent(
2404
+ # type="text",
2405
+ # text=f"❌ API Error: {str(e)}\n\nStatus Code: {e.status_code}\nDetails: {e.detail}"
2406
+ # )]
2407
+ # except Exception as e:
2408
+ # logger.error(f"Unexpected error searching local filesystem: {e}")
2409
+ # return [TextContent(
2410
+ # type="text",
2411
+ # text=f"❌ Error: An unexpected error occurred: {str(e)}"
2412
+ # )]
2413
+
2414
+ # ===============================================================================
2415
+ # CHROMA PACKAGE SEARCH INTEGRATION
2416
+ # ===============================================================================
2417
+ #
2418
+ # Provides access to Chroma's Package Search MCP tools for searching actual
2419
+ # source code from 3,000+ packages across multiple package registries.
2420
+ # This integration enables AI assistants to search ground-truth code instead
2421
+ # of relying on training data or hallucinations.
2422
+ #
2423
+ # Available Registries:
2424
+ # - py_pi: Python Package Index (PyPI) packages
2425
+ # - npm: Node.js packages from NPM registry
2426
+ # - crates_io: Rust packages from crates.io
2427
+ # - golang_proxy: Go modules from Go proxy
2428
+ #
2429
+ # Authentication:
2430
+ # - Requires CHROMA_API_KEY environment variable
2431
+ # - Uses x-chroma-token header for API authentication
2432
+ #
2433
+ # Tools:
2434
+ # 1. nia_package_search_grep: Regex-based code search
2435
+ # 2. nia_package_search_hybrid: Semantic/AI-powered search
2436
+ # 3. nia_package_search_read_file: Direct file content retrieval
2437
+ #
2438
+ # ===============================================================================
2439
+
1636
2440
  @mcp.tool()
1637
- async def index_local_filesystem(
1638
- directory_path: str,
1639
- inclusion_patterns: Optional[List[str]] = None,
1640
- exclusion_patterns: Optional[List[str]] = None,
1641
- max_file_size_mb: int = 50
2441
+ async def nia_package_search_grep(
2442
+ registry: str,
2443
+ package_name: str,
2444
+ pattern: str,
2445
+ version: Optional[str] = None,
2446
+ language: Optional[str] = None,
2447
+ filename_sha256: Optional[str] = None,
2448
+ a: Optional[int] = None,
2449
+ b: Optional[int] = None,
2450
+ c: Optional[int] = None,
2451
+ head_limit: Optional[int] = None,
2452
+ output_mode: str = "content"
1642
2453
  ) -> List[TextContent]:
1643
2454
  """
1644
- Index a local filesystem directory for intelligent search.
1645
-
1646
- Args:
1647
- directory_path: Absolute path to the directory to index
1648
- inclusion_patterns: Optional list of patterns to include (e.g., ["ext:.py", "dir:src"])
1649
- exclusion_patterns: Optional list of patterns to exclude (e.g., ["dir:node_modules", "ext:.log"])
1650
- max_file_size_mb: Maximum file size in MB to process (default: 50)
1651
-
1652
- Returns:
1653
- Status of the indexing operation
1654
-
1655
- Important:
1656
- - Path must be absolute (e.g., /Users/username/projects/myproject)
1657
- - When indexing starts, use check_local_filesystem_status tool to monitor progress
2455
+ Executes a grep over the source code of a public package. This tool is useful for deterministically
2456
+ finding code in a package using regex. Use this tool before implementing solutions that use external
2457
+ packages. The regex pattern should be restrictive enough to only match code you're looking for, to limit
2458
+ overfetching.
2459
+
2460
+ Required Args: "registry", "package_name", "pattern" Optional Args: "version", "language",
2461
+ "filename_sha256", "a", "b", "c", "head_limit", "output_mode"
2462
+
2463
+ Best for: Deterministic code search, finding specific code patterns, or exploring code structure.
2464
+
2465
+ Parameters:
2466
+ a: The number of lines after a grep match to include
2467
+ b: The number of lines before a grep match to include
2468
+ c: The number of lines before and after a grep match to include
2469
+ filename_sha256: The sha256 hash of the file to filter for
2470
+ head_limit: Limits number of results returned. If the number of results returned is less than the
2471
+ head limit, all results have been returned.
2472
+ language: The languages to filter for. If not provided, all languages will be searched. Valid
2473
+ options: "Rust", "Go", "Python", "JavaScript", "JSX", "TypeScript", "TSX", "HTML", "Markdown",
2474
+ "YAML", "Bash", "SQL", "JSON", "Text", "Dockerfile", "HCL", "Protobuf", "Make", "Toml", "Jupyter Notebook"
2475
+ output_mode: Controls the shape of the grep output. Accepted values:
2476
+ "content" (default): return content snippets with line ranges
2477
+ "files_with_matches": return unique files (path and sha256) that match
2478
+ "count": return files with the count of matches per file
2479
+ package_name: The name of the requested package. Pass the name as it appears in the package
2480
+ manager. For Go packages, use the GitHub organization and repository name in the format
2481
+ {org}/{repo}, if unsure check the GitHub URL for the package and use {org}/{repo} from that URL.
2482
+ pattern: The regex pattern for exact text matching in the codebase. Must be a valid regex.
2483
+ Example: "func\\s+\\(get_repository\\|getRepository\\)\\s*\\(.*?\\)\\s\\{"
2484
+ registry: The name of the registry containing the requested package. Must be one of:
2485
+ "crates_io", "golang_proxy", "npm", or "py_pi".
2486
+ version: Optionally, the specific version of the package whose source code to search.
2487
+ If provided, must be in semver format: {major}.{minor}.{patch}. Otherwise, the latest indexed
2488
+ version of the package available will be used.
1658
2489
  """
1659
2490
  try:
1660
- # Validate absolute path
1661
- if not os.path.isabs(directory_path):
1662
- return [TextContent(
1663
- type="text",
1664
- text=f"❌ Error: directory_path must be an absolute path. Got: {directory_path}\n\n"
1665
- f"Example: /Users/username/projects/myproject"
1666
- )]
1667
-
2491
+ # Use API client for backend routing
1668
2492
  client = await ensure_api_client()
1669
-
1670
- # Start indexing
1671
- logger.info(f"Starting to index local directory: {directory_path}")
1672
- result = await client.index_local_filesystem(
1673
- directory_path=directory_path,
1674
- inclusion_patterns=inclusion_patterns or [],
1675
- exclusion_patterns=exclusion_patterns or [],
1676
- max_file_size_mb=max_file_size_mb
2493
+ logger.info(f"Searching package {package_name} from {registry} with pattern: {pattern}")
2494
+
2495
+ # Execute grep search through backend
2496
+ result = await client.package_search_grep(
2497
+ registry=registry,
2498
+ package_name=package_name,
2499
+ pattern=pattern,
2500
+ version=version,
2501
+ language=language,
2502
+ filename_sha256=filename_sha256,
2503
+ a=a,
2504
+ b=b,
2505
+ c=c,
2506
+ head_limit=head_limit,
2507
+ output_mode=output_mode
1677
2508
  )
1678
-
1679
- if result.get("success"):
1680
- source_id = result["data"]["source_id"]
1681
- status_url = result["data"]["status_url"]
1682
-
2509
+
2510
+ # Handle raw Chroma JSON response
2511
+ if not result or not isinstance(result, dict):
1683
2512
  return [TextContent(
1684
2513
  type="text",
1685
- text=(
1686
- f"✅ Successfully started indexing local directory!\n\n"
1687
- f"📁 **Directory:** `{directory_path}`\n"
1688
- f"🆔 **Source ID:** `{source_id}`\n"
1689
- f"📊 **Status:** Processing\n\n"
1690
- f"**What happens next:**\n"
1691
- f"• NIA is scanning and indexing your files in the background\n"
1692
- f"• This process typically takes a few minutes depending on directory size\n"
1693
- f"• Use `check_local_filesystem_status` with source ID `{source_id}` to monitor progress\n"
1694
- f"• Once indexed, use `search_codebase` or `search_documentation` to search your files\n\n"
1695
- f"📌 **Tip:** You can check the status at any time or visit [app.trynia.ai](https://app.trynia.ai) to monitor progress."
1696
- )
2514
+ text=f"No response from Chroma for pattern '{pattern}' in {package_name} ({registry})"
1697
2515
  )]
1698
- else:
2516
+
2517
+ # Extract results and version from raw Chroma response
2518
+ results = result.get("results", [])
2519
+ version_used = result.get("version_used")
2520
+
2521
+ if not results:
1699
2522
  return [TextContent(
1700
2523
  type="text",
1701
- text=f" Failed to start indexing: {result.get('detail', 'Unknown error')}"
2524
+ text=f"No matches found for pattern '{pattern}' in {package_name} ({registry})"
1702
2525
  )]
1703
-
1704
- except APIError as e:
1705
- logger.error(f"API error indexing local filesystem: {e}")
1706
- return [TextContent(
1707
- type="text",
1708
- text=f"❌ API Error: {str(e)}\n\nStatus Code: {e.status_code}\nDetails: {e.detail}"
1709
- )]
2526
+
2527
+ response_lines = [
2528
+ f"# 🔍 Package Search Results: {package_name} ({registry})",
2529
+ f"**Pattern:** `{pattern}`",
2530
+ ""
2531
+ ]
2532
+
2533
+ if version_used:
2534
+ response_lines.append(f"**Version:** {version_used}")
2535
+ elif version:
2536
+ response_lines.append(f"**Version:** {version}")
2537
+
2538
+ response_lines.append(f"**Found {len(results)} matches**\n")
2539
+
2540
+ # Handle grep result format: {output_mode: "content", result: {content, file_path, start_line, etc}}
2541
+ for i, item in enumerate(results, 1):
2542
+ response_lines.append(f"## Match {i}")
2543
+
2544
+ # Extract data from Chroma grep format
2545
+ if "result" in item:
2546
+ result_data = item["result"]
2547
+ if result_data.get("file_path"):
2548
+ response_lines.append(f"**File:** `{result_data['file_path']}`")
2549
+
2550
+ # Show SHA256 for read_file tool usage
2551
+ if result_data.get("filename_sha256"):
2552
+ response_lines.append(f"**SHA256:** `{result_data['filename_sha256']}`")
2553
+
2554
+ if result_data.get("start_line") and result_data.get("end_line"):
2555
+ response_lines.append(f"**Lines:** {result_data['start_line']}-{result_data['end_line']}")
2556
+ if result_data.get("language"):
2557
+ response_lines.append(f"**Language:** {result_data['language']}")
2558
+
2559
+ response_lines.append("```")
2560
+ response_lines.append(result_data.get("content", ""))
2561
+ response_lines.append("```\n")
2562
+ else:
2563
+ # Fallback for other formats
2564
+ response_lines.append("```")
2565
+ response_lines.append(str(item))
2566
+ response_lines.append("```\n")
2567
+
2568
+ # Add truncation message if present
2569
+ if result.get("truncation_message"):
2570
+ response_lines.append(f"⚠️ **Note:** {result['truncation_message']}")
2571
+
2572
+ # Add usage hint for read_file workflow (grep tool)
2573
+ response_lines.append("\n💡 **To read full file content:**")
2574
+ response_lines.append("Copy a SHA256 above and use: `nia_package_search_read_file(registry=..., package_name=..., filename_sha256=\"...\", start_line=1, end_line=100)`")
2575
+
2576
+ return [TextContent(type="text", text="\n".join(response_lines))]
2577
+
1710
2578
  except Exception as e:
1711
- logger.error(f"Unexpected error indexing local filesystem: {e}")
2579
+ logger.error(f"Error in package search grep: {e}")
1712
2580
  return [TextContent(
1713
2581
  type="text",
1714
- text=f"❌ Error: An unexpected error occurred while indexing the directory: {str(e)}"
2582
+ text=f"❌ Error searching package: {str(e)}\n\n"
2583
+ f"Make sure:\n"
2584
+ f"- The registry is one of: crates_io, golang_proxy, npm, py_pi\n"
2585
+ f"- The package name is correct\n"
2586
+ f"- The pattern is a valid regex"
1715
2587
  )]
1716
2588
 
1717
2589
  @mcp.tool()
1718
- async def scan_local_filesystem(
1719
- directory_path: str,
1720
- inclusion_patterns: Optional[List[str]] = None,
1721
- exclusion_patterns: Optional[List[str]] = None,
1722
- max_file_size_mb: int = 50
2590
+ async def nia_package_search_hybrid(
2591
+ registry: str,
2592
+ package_name: str,
2593
+ semantic_queries: List[str],
2594
+ version: Optional[str] = None,
2595
+ filename_sha256: Optional[str] = None,
2596
+ pattern: Optional[str] = None,
2597
+ language: Optional[str] = None
1723
2598
  ) -> List[TextContent]:
1724
2599
  """
1725
- Scan a local filesystem directory to preview what files would be indexed.
1726
-
1727
- This tool helps you understand what files will be processed before actually indexing.
1728
-
1729
- Args:
1730
- directory_path: Absolute path to the directory to scan
1731
- inclusion_patterns: Optional list of patterns to include (e.g., ["ext:.py", "dir:src"])
1732
- exclusion_patterns: Optional list of patterns to exclude (e.g., ["dir:node_modules", "ext:.log"])
1733
- max_file_size_mb: Maximum file size in MB to process (default: 50)
1734
-
1735
- Returns:
1736
- Summary of files that would be indexed including count, size, and file types
2600
+ Searches package source code using semantic understanding AND optionally regex patterns. This
2601
+ allows for hybrid search, allowing for prefiltering with regex, and semantic ranking.
2602
+
2603
+ Required Args: "registry", "package_name", "semantic_queries"
2604
+
2605
+ Optional Args: "version", "filename_sha256", "pattern", "language"
2606
+
2607
+ Best for: Understanding how packages implement specific features, finding usage patterns, or
2608
+ exploring code structure.
2609
+
2610
+ Parameters:
2611
+ filename_sha256: The sha256 hash of the file to filter for
2612
+ language: The languages to filter for. If not provided, all languages will be searched. Valid
2613
+ options: "Rust", "Go", "Python", "JavaScript", "JSX", "TypeScript", "TSX", "HTML", "Markdown",
2614
+ "YAML", "Bash", "SQL", "JSON", "Text", "Dockerfile", "HCL", "Protobuf", "Make", "Toml", "Jupyter Notebook"
2615
+ package_name: The name of the requested package. Pass the name as it appears in the package
2616
+ manager. For Go packages, use the GitHub organization and repository name in the format
2617
+ {org}/{repo}, if unsure check the GitHub URL for the package and use {org}/{repo} from that URL.
2618
+ pattern: The regex pattern for exact text matching in the codebase. Must be a valid regex.
2619
+ Example: "func\\s+\\(get_repository\\|getRepository\\)\\s*\\(.*?\\)\\s\\{"
2620
+ registry: The name of the registry containing the requested package. Must be one of:
2621
+ "crates_io", "golang_proxy", "npm", or "py_pi".
2622
+ semantic_queries: Array of 1-5 plain English questions about the codebase. Example: ["how is
2623
+ argmax implemented in numpy?", "what testing patterns does axum use?"]
2624
+ version: Optionally, the specific version of the package whose source code to search.
2625
+ If provided, must be in semver format: {major}.{minor}.{patch}. Otherwise, the latest indexed
2626
+ version of the package available will be used.
1737
2627
  """
1738
2628
  try:
1739
- # Validate absolute path
1740
- if not os.path.isabs(directory_path):
2629
+ # Use API client for backend routing
2630
+ client = await ensure_api_client()
2631
+ logger.info(f"Hybrid search in {package_name} from {registry} with queries: {semantic_queries}")
2632
+
2633
+ # Execute hybrid search through backend
2634
+ result = await client.package_search_hybrid(
2635
+ registry=registry,
2636
+ package_name=package_name,
2637
+ semantic_queries=semantic_queries,
2638
+ version=version,
2639
+ filename_sha256=filename_sha256,
2640
+ pattern=pattern,
2641
+ language=language
2642
+ )
2643
+
2644
+ # Handle raw Chroma JSON response
2645
+ if not result or not isinstance(result, dict):
2646
+ queries_str = "\n".join(f"- {q}" for q in semantic_queries)
1741
2647
  return [TextContent(
1742
2648
  type="text",
1743
- text=f" Error: directory_path must be an absolute path. Got: {directory_path}\n\n"
1744
- f"Example: /Users/username/projects/myproject"
2649
+ text=f"No response from Chroma for queries:\n{queries_str}\n\nin {package_name} ({registry})"
1745
2650
  )]
1746
-
1747
- client = await ensure_api_client()
1748
-
1749
- logger.info(f"Scanning local directory: {directory_path}")
1750
- result = await client.scan_local_filesystem(
1751
- directory_path=directory_path,
1752
- inclusion_patterns=inclusion_patterns or [],
1753
- exclusion_patterns=exclusion_patterns or [],
1754
- max_file_size_mb=max_file_size_mb
1755
- )
1756
-
1757
- # Format the scan results
1758
- total_files = result.get("total_files", 0)
1759
- total_size_mb = result.get("total_size_mb", 0)
1760
- file_types = result.get("file_types", {})
1761
- files = result.get("files", [])
1762
- truncated = result.get("truncated", False)
1763
-
1764
- response = f"📊 **Local Directory Scan Results**\n\n"
1765
- response += f"📁 **Directory:** `{directory_path}`\n"
1766
- response += f"📄 **Total Files:** {total_files:,}\n"
1767
- response += f"💾 **Total Size:** {total_size_mb:.2f} MB\n\n"
1768
-
1769
- if file_types:
1770
- response += "**File Types:**\n"
1771
- # Sort by count descending
1772
- sorted_types = sorted(file_types.items(), key=lambda x: x[1], reverse=True)
1773
- for ext, count in sorted_types[:10]: # Show top 10
1774
- response += f"• `{ext}`: {count:,} files\n"
1775
- if len(sorted_types) > 10:
1776
- response += f"• ... and {len(sorted_types) - 10} more types\n"
1777
- response += "\n"
1778
-
1779
- if files:
1780
- response += f"**Largest Files (showing {min(len(files), 10)}):**\n"
1781
- for i, file_info in enumerate(files[:10]):
1782
- size_mb = file_info["size"] / (1024 * 1024)
1783
- response += f"{i+1}. `{file_info['path']}` ({size_mb:.2f} MB)\n"
1784
-
1785
- if truncated:
1786
- response += f"\n*Note: Showing first 100 files out of {total_files:,} total*\n"
1787
-
1788
- if inclusion_patterns:
1789
- response += f"\n**Inclusion Patterns:** {', '.join(f'`{p}`' for p in inclusion_patterns)}\n"
1790
- if exclusion_patterns:
1791
- response += f"**Exclusion Patterns:** {', '.join(f'`{p}`' for p in exclusion_patterns)}\n"
1792
-
1793
- response += "\n💡 **Next Step:** Use `index_local_filesystem` to index these files."
1794
-
1795
- return [TextContent(type="text", text=response)]
1796
-
1797
- except APIError as e:
1798
- logger.error(f"API error scanning local filesystem: {e}")
1799
- return [TextContent(
1800
- type="text",
1801
- text=f"❌ API Error: {str(e)}\n\nStatus Code: {e.status_code}\nDetails: {e.detail}"
1802
- )]
1803
- except Exception as e:
1804
- logger.error(f"Unexpected error scanning local filesystem: {e}")
1805
- return [TextContent(
1806
- type="text",
1807
- text=f"❌ Error: An unexpected error occurred while scanning: {str(e)}"
1808
- )]
1809
2651
 
1810
- @mcp.tool()
1811
- async def check_local_filesystem_status(source_id: str) -> List[TextContent]:
1812
- """
1813
- Check the indexing status of a local filesystem source.
1814
-
1815
- Args:
1816
- source_id: The source ID returned when indexing was started
1817
-
1818
- Returns:
1819
- Current status of the local filesystem indexing
1820
- """
1821
- try:
1822
- client = await ensure_api_client()
1823
- status = await client.check_local_filesystem_status(source_id)
1824
-
1825
- # Format status response
1826
- status_text = status.get("status", "unknown")
1827
- progress = status.get("progress", 0)
1828
- message = status.get("message", "")
1829
- error = status.get("error")
1830
- directory_path = status.get("directory_path", "Unknown")
1831
- page_count = status.get("page_count", 0) # Number of files
1832
- chunk_count = status.get("chunk_count", 0)
1833
-
1834
- # Status emoji
1835
- status_emoji = {
1836
- "pending": "⏳",
1837
- "processing": "🔄",
1838
- "completed": "✅",
1839
- "failed": "❌",
1840
- "error": "❌"
1841
- }.get(status_text, "❓")
1842
-
1843
- response = f"{status_emoji} **Local Filesystem Status**\n\n"
1844
- response += f"🆔 **Source ID:** `{source_id}`\n"
1845
- response += f"📁 **Directory:** `{directory_path}`\n"
1846
- response += f"📊 **Status:** {status_text.capitalize()}\n"
1847
-
1848
- if progress > 0:
1849
- response += f"📈 **Progress:** {progress}%\n"
1850
-
1851
- if message:
1852
- response += f"💬 **Message:** {message}\n"
1853
-
1854
- if status_text == "completed":
1855
- response += f"\n✨ **Indexing Complete!**\n"
1856
- response += f"• **Files Indexed:** {page_count:,}\n"
1857
- response += f"• **Chunks Created:** {chunk_count:,}\n"
1858
- response += f"\nYou can now search this directory using `search_codebase` or the unified search!"
1859
- elif status_text in ["failed", "error"]:
1860
- response += f"\n❌ **Indexing Failed**\n"
1861
- if error:
1862
- response += f"**Error:** {error}\n"
1863
- response += "\nPlease check your directory path and try again."
1864
- elif status_text == "processing":
1865
- response += f"\n🔄 Indexing is in progress...\n"
1866
- response += "Check back in a few moments or monitor at [app.trynia.ai](https://app.trynia.ai)"
1867
-
1868
- return [TextContent(type="text", text=response)]
1869
-
1870
- except APIError as e:
1871
- logger.error(f"API error checking local filesystem status: {e}")
1872
- if e.status_code == 404:
2652
+ # Extract results and version from raw Chroma response
2653
+ results = result.get("results", [])
2654
+ version_used = result.get("version_used")
2655
+
2656
+ if not results:
2657
+ queries_str = "\n".join(f"- {q}" for q in semantic_queries)
1873
2658
  return [TextContent(
1874
2659
  type="text",
1875
- text=f" Source ID `{source_id}` not found. Please check the ID and try again."
2660
+ text=f"No relevant code found for queries:\n{queries_str}\n\nin {package_name} ({registry})"
1876
2661
  )]
1877
- return [TextContent(
1878
- type="text",
1879
- text=f" API Error: {str(e)}\n\nStatus Code: {e.status_code}\nDetails: {e.detail}"
1880
- )]
2662
+
2663
+ response_lines = [
2664
+ f"# 🔎 Package Semantic Search: {package_name} ({registry})",
2665
+ "**Queries:**"
2666
+ ]
2667
+
2668
+ for query in semantic_queries:
2669
+ response_lines.append(f"- {query}")
2670
+
2671
+ response_lines.append("")
2672
+
2673
+ if version_used:
2674
+ response_lines.append(f"**Version:** {version_used}")
2675
+ elif version:
2676
+ response_lines.append(f"**Version:** {version}")
2677
+ if pattern:
2678
+ response_lines.append(f"**Pattern Filter:** `{pattern}`")
2679
+
2680
+ response_lines.append(f"\n**Found {len(results)} relevant code sections**\n")
2681
+
2682
+ # Handle hybrid result format: {id: "...", document: "content", metadata: {...}}
2683
+ for i, item in enumerate(results, 1):
2684
+ response_lines.append(f"## Result {i}")
2685
+
2686
+ # Extract metadata if available
2687
+ metadata = item.get("metadata", {})
2688
+ if metadata.get("filename"):
2689
+ response_lines.append(f"**File:** `{metadata['filename']}`")
2690
+
2691
+ # Show SHA256 for read_file tool usage (from metadata)
2692
+ if metadata.get("filename_sha256"):
2693
+ response_lines.append(f"**SHA256:** `{metadata['filename_sha256']}`")
2694
+
2695
+ if metadata.get("start_line") and metadata.get("end_line"):
2696
+ response_lines.append(f"**Lines:** {metadata['start_line']}-{metadata['end_line']}")
2697
+ if metadata.get("language"):
2698
+ response_lines.append(f"**Language:** {metadata['language']}")
2699
+
2700
+ # Get document content
2701
+ content = item.get("document", "")
2702
+ if content:
2703
+ response_lines.append("```")
2704
+ response_lines.append(content)
2705
+ response_lines.append("```\n")
2706
+
2707
+ # Add truncation message if present
2708
+ if result.get("truncation_message"):
2709
+ response_lines.append(f"⚠️ **Note:** {result['truncation_message']}")
2710
+
2711
+ # Add usage hint for read_file workflow (hybrid tool)
2712
+ response_lines.append("\n💡 **To read full file content:**")
2713
+ response_lines.append("Copy a SHA256 above and use: `nia_package_search_read_file(registry=..., package_name=..., filename_sha256=\"...\", start_line=1, end_line=100)`")
2714
+
2715
+ return [TextContent(type="text", text="\n".join(response_lines))]
2716
+
1881
2717
  except Exception as e:
1882
- logger.error(f"Unexpected error checking local filesystem status: {e}")
2718
+ logger.error(f"Error in package search hybrid: {e}")
1883
2719
  return [TextContent(
1884
2720
  type="text",
1885
- text=f"❌ Error: An unexpected error occurred: {str(e)}"
2721
+ text=f"❌ Error in hybrid search: {str(e)}\n\n"
2722
+ f"Make sure:\n"
2723
+ f"- The registry is one of: crates_io, golang_proxy, npm, py_pi\n"
2724
+ f"- The package name is correct\n"
2725
+ f"- Semantic queries are provided (1-5 queries)"
1886
2726
  )]
1887
2727
 
1888
2728
  @mcp.tool()
1889
- async def search_local_filesystem(
1890
- source_id: str,
1891
- query: str,
1892
- include_sources: bool = True
2729
+ async def nia_package_search_read_file(
2730
+ registry: str,
2731
+ package_name: str,
2732
+ filename_sha256: str,
2733
+ start_line: int,
2734
+ end_line: int,
2735
+ version: Optional[str] = None
1893
2736
  ) -> List[TextContent]:
1894
2737
  """
1895
- Search an indexed local filesystem directory using its source ID.
1896
-
1897
- To search local files:
1898
- 1. First index a directory using `index_local_filesystem` - this will return a source_id
1899
- 2. Use that source_id with this tool to search the indexed content
1900
-
1901
- Args:
1902
- source_id: The source ID returned when the directory was indexed (required)
1903
- query: Your search query in natural language (required)
1904
- include_sources: Whether to include source code snippets in results (default: True)
1905
-
1906
- Returns:
1907
- Search results with relevant file snippets and explanations
1908
-
1909
- Example:
1910
- # After indexing returns source_id "abc123-def456"
1911
- search_local_filesystem(
1912
- source_id="abc123-def456",
1913
- query="configuration settings"
1914
- )
1915
-
1916
- Note: To find your source IDs, use `list_documentation` and look for
1917
- sources with source_type="local_filesystem"
2738
+ Reads exact lines from a source file of a public package. Useful for fetching specific code regions by
2739
+ line range.
2740
+
2741
+ Required Args: "registry", "package_name", "filename_sha256", "start_line", "end_line" Optional Args:
2742
+ "version"
2743
+
2744
+ Best for: Inspecting exact code snippets when you already know the file and line numbers. Max 200
2745
+ lines.
2746
+
2747
+ Parameters:
2748
+ end_line: 1-based inclusive end line to read
2749
+ filename_sha256: The sha256 hash of the file to filter for
2750
+ package_name: The name of the requested package. Pass the name as it appears in the package
2751
+ manager. For Go packages, use the GitHub organization and repository name in the format
2752
+ {org}/{repo}, if unsure check the GitHub URL for the package and use {org}/{repo} from that URL.
2753
+ registry: The name of the registry containing the requested package. Must be one of:
2754
+ "crates_io", "golang_proxy", "npm", or "py_pi".
2755
+ start_line: 1-based inclusive start line to read
2756
+ version: Optionally, the specific version of the package whose source code to search.
2757
+ If provided, must be in semver format: {major}.{minor}.{patch}. Otherwise, the latest indexed
2758
+ version of the package available will be used.
1918
2759
  """
1919
2760
  try:
1920
- # Validate inputs
1921
- if not source_id:
2761
+ # Validate line range
2762
+ if end_line - start_line + 1 > 200:
1922
2763
  return [TextContent(
1923
2764
  type="text",
1924
- text="❌ Error: 'source_id' parameter is required. Use the ID returned from index_local_filesystem."
2765
+ text="❌ Error: Maximum 200 lines can be read at once. Please reduce the line range."
1925
2766
  )]
1926
-
1927
- if not query:
2767
+
2768
+ if start_line < 1 or end_line < start_line:
1928
2769
  return [TextContent(
1929
2770
  type="text",
1930
- text="❌ Error: 'query' parameter is required"
2771
+ text="❌ Error: Invalid line range. Start line must be >= 1 and end line must be >= start line."
1931
2772
  )]
1932
-
2773
+
2774
+ # Use API client for backend routing
1933
2775
  client = await ensure_api_client()
1934
-
1935
- # Check if the source exists and is ready
1936
- logger.info(f"Checking status of source {source_id}")
1937
- try:
1938
- status = await client.get_data_source_status(source_id)
1939
- if not status:
1940
- return [TextContent(
1941
- type="text",
1942
- text=f"❌ Source ID '{source_id}' not found. Please check the ID and try again."
1943
- )]
1944
-
1945
- source_status = status.get("status", "unknown")
1946
- if source_status == "processing":
1947
- progress = status.get("progress", 0)
1948
- return [TextContent(
1949
- type="text",
1950
- text=f"⏳ This source is still being indexed ({progress}% complete).\n\n"
1951
- f"Use `check_local_filesystem_status(\"{source_id}\")` to check progress."
1952
- )]
1953
- elif source_status == "failed":
1954
- error = status.get("error", "Unknown error")
1955
- return [TextContent(
1956
- type="text",
1957
- text=f"❌ This source failed to index.\n\nError: {error}"
1958
- )]
1959
- elif source_status != "completed":
1960
- return [TextContent(
1961
- type="text",
1962
- text=f"❌ Source is not ready for search. Status: {source_status}"
1963
- )]
1964
- except Exception as e:
1965
- logger.warning(f"Could not check source status: {e}")
1966
- # Continue anyway in case it's just a status check issue
1967
-
1968
- # Perform the search
1969
- logger.info(f"Searching local filesystem source {source_id} with query: {query}")
1970
-
1971
- # Use the unified query endpoint with data_sources parameter
1972
- result = client.query_unified(
1973
- messages=[{"role": "user", "content": query}],
1974
- data_sources=[source_id],
1975
- include_sources=include_sources,
1976
- stream=False
2776
+ logger.info(f"Reading file from {package_name} ({registry}): sha256={filename_sha256}, lines {start_line}-{end_line}")
2777
+
2778
+ # Read file content through backend
2779
+ result = await client.package_search_read_file(
2780
+ registry=registry,
2781
+ package_name=package_name,
2782
+ filename_sha256=filename_sha256,
2783
+ start_line=start_line,
2784
+ end_line=end_line,
2785
+ version=version
1977
2786
  )
1978
-
1979
- # Parse the response
1980
- response_text = ""
1981
- async for chunk in result:
1982
- data = json.loads(chunk)
1983
- if "content" in data:
1984
- response_text = data["content"]
1985
- sources = data.get("sources", [])
1986
- break
1987
-
1988
- # Format the response nicely for local filesystem results
1989
- if response_text:
1990
- # Extract the local filesystem results section if present
1991
- if "**Local filesystem results" in response_text:
1992
- # Keep the original response
1993
- formatted_response = response_text
1994
- else:
1995
- # Create our own formatted response
1996
- formatted_response = f"🔍 **Search Results for Local Directory**\n"
1997
- formatted_response += f"🔎 Query: \"{query}\"\n\n"
1998
- formatted_response += response_text
1999
-
2000
- # Add sources if available and requested
2001
- if include_sources and sources:
2002
- formatted_response += "\n\n**📄 Source Details:**\n"
2003
- for i, source in enumerate(sources[:5], 1):
2004
- metadata = source.get("metadata", {})
2005
- file_path = metadata.get("file_path", "Unknown file")
2006
- formatted_response += f"\n{i}. `{file_path}`\n"
2007
-
2008
- # Add snippet of content
2009
- content = source.get("content", "")
2010
- if content:
2011
- # Truncate to reasonable length
2012
- lines = content.split('\n')[:10]
2013
- snippet = '\n'.join(lines)
2014
- if len(lines) > 10:
2015
- snippet += "\n..."
2016
- formatted_response += f"```\n{snippet}\n```\n"
2017
-
2018
- return [TextContent(type="text", text=formatted_response)]
2787
+
2788
+ # Handle raw Chroma response (read_file typically returns content directly)
2789
+ response_lines = [
2790
+ f"# 📄 Package File Content: {package_name} ({registry})",
2791
+ f"**File SHA256:** `{filename_sha256}`",
2792
+ f"**Lines:** {start_line}-{end_line}"
2793
+ ]
2794
+
2795
+ if version:
2796
+ response_lines.append(f"**Version:** {version}")
2797
+
2798
+ response_lines.append("\n```")
2799
+ # For read_file, Chroma typically returns the content directly as a string
2800
+ if isinstance(result, str):
2801
+ response_lines.append(result)
2802
+ elif isinstance(result, dict) and result.get("content"):
2803
+ response_lines.append(result["content"])
2019
2804
  else:
2020
- return [TextContent(
2021
- type="text",
2022
- text=f"No results found for query: \"{query}\" in the indexed directory."
2023
- )]
2024
-
2025
- except APIError as e:
2026
- logger.error(f"API error searching local filesystem: {e}")
2027
- return [TextContent(
2028
- type="text",
2029
- text=f"❌ API Error: {str(e)}\n\nStatus Code: {e.status_code}\nDetails: {e.detail}"
2030
- )]
2805
+ response_lines.append(str(result))
2806
+ response_lines.append("```")
2807
+
2808
+ return [TextContent(type="text", text="\n".join(response_lines))]
2809
+
2031
2810
  except Exception as e:
2032
- logger.error(f"Unexpected error searching local filesystem: {e}")
2811
+ logger.error(f"Error reading package file: {e}")
2033
2812
  return [TextContent(
2034
2813
  type="text",
2035
- text=f"❌ Error: An unexpected error occurred: {str(e)}"
2814
+ text=f"❌ Error reading file: {str(e)}\n\n"
2815
+ f"Make sure:\n"
2816
+ f"- The registry is one of: crates_io, golang_proxy, npm, py_pi\n"
2817
+ f"- The package name is correct\n"
2818
+ f"- The filename_sha256 is valid\n"
2819
+ f"- The line range is valid (1-based, max 200 lines)"
2036
2820
  )]
2037
2821
 
2038
2822
  @mcp.tool()