regen-koi-mcp 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -13,10 +13,12 @@ import { TOOLS } from './tools.js';
13
13
  // Use enhanced SPARQL client with focused retrieval
14
14
  import { SPARQLClient } from './sparql-client-enhanced.js';
15
15
  import HybridSearchClient from './hybrid-client.js';
16
+ import { QueryRouter } from './query_router.js';
17
+ import { UnifiedSearch } from './unified_search.js';
18
+ import { executeGraphTool } from './graph_tool.js';
16
19
  // Load environment variables
17
20
  dotenv.config();
18
21
  // Configuration
19
- // Prefer the public HTTPS endpoint unless overridden
20
22
  const KOI_API_ENDPOINT = process.env.KOI_API_ENDPOINT || 'https://regen.gaiaai.xyz/api/koi';
21
23
  const KOI_API_KEY = process.env.KOI_API_KEY || '';
22
24
  const SERVER_NAME = process.env.MCP_SERVER_NAME || 'regen-koi';
@@ -35,9 +37,41 @@ class KOIServer {
35
37
  server;
36
38
  sparqlClient;
37
39
  hybridClient;
40
+ queryRouter = null;
41
+ unifiedSearch = null;
38
42
  constructor() {
39
43
  this.sparqlClient = new SPARQLClient();
40
44
  this.hybridClient = new HybridSearchClient();
45
+ // Initialize QueryRouter and UnifiedSearch if database config is available
46
+ try {
47
+ if (process.env.GRAPH_DB_HOST && process.env.GRAPH_DB_NAME) {
48
+ this.queryRouter = new QueryRouter({
49
+ host: process.env.GRAPH_DB_HOST,
50
+ port: parseInt(process.env.GRAPH_DB_PORT || '5432'),
51
+ database: process.env.GRAPH_DB_NAME,
52
+ user: process.env.GRAPH_DB_USER,
53
+ password: process.env.GRAPH_DB_PASSWORD,
54
+ entitySimilarityThreshold: parseFloat(process.env.ENTITY_SIMILARITY_THRESHOLD || '0.15'),
55
+ });
56
+ this.unifiedSearch = new UnifiedSearch({
57
+ host: process.env.GRAPH_DB_HOST,
58
+ port: parseInt(process.env.GRAPH_DB_PORT || '5432'),
59
+ database: process.env.GRAPH_DB_NAME,
60
+ user: process.env.GRAPH_DB_USER,
61
+ password: process.env.GRAPH_DB_PASSWORD,
62
+ graphName: process.env.GRAPH_NAME || 'regen_graph',
63
+ embeddingDimension: parseInt(process.env.EMBEDDING_DIM || '1536'),
64
+ rrfConstant: parseInt(process.env.RRF_K || '60'),
65
+ });
66
+ console.error(`[${SERVER_NAME}] Initialized QueryRouter and UnifiedSearch`);
67
+ }
68
+ else {
69
+ console.error(`[${SERVER_NAME}] Graph database configuration not found - hybrid_search and query_code_graph tools will be unavailable`);
70
+ }
71
+ }
72
+ catch (error) {
73
+ console.error(`[${SERVER_NAME}] Failed to initialize graph components:`, error);
74
+ }
41
75
  this.server = new Server({
42
76
  name: SERVER_NAME,
43
77
  version: SERVER_VERSION,
@@ -65,12 +99,22 @@ class KOIServer {
65
99
  try {
66
100
  console.error(`[${SERVER_NAME}] Executing tool: ${name}`);
67
101
  switch (name) {
102
+ case 'query_code_graph':
103
+ return await executeGraphTool(args);
104
+ case 'hybrid_search':
105
+ return await this.handleHybridSearch(args);
68
106
  case 'search_knowledge':
69
107
  return await this.searchKnowledge(args);
70
108
  case 'get_stats':
71
109
  return await this.getStats(args);
72
110
  case 'generate_weekly_digest':
73
111
  return await this.generateWeeklyDigest(args);
112
+ case 'search_github_docs':
113
+ return await this.searchGithubDocs(args);
114
+ case 'get_repo_overview':
115
+ return await this.getRepoOverview(args);
116
+ case 'get_tech_stack':
117
+ return await this.getTechStack(args);
74
118
  default:
75
119
  throw new Error(`Unknown tool: ${name}`);
76
120
  }
@@ -253,7 +297,7 @@ class KOIServer {
253
297
  }
254
298
  }
255
299
  async searchKnowledge(args) {
256
- const { query, limit = 5, published_from, published_to, include_undated = false, useHybrid = true } = args || {};
300
+ const { query, limit = 5, published_from, published_to, include_undated = false, useHybrid = false } = args || {};
257
301
  const vectorFilters = {};
258
302
  // Respect explicit date filter
259
303
  if (published_from || published_to) {
@@ -300,10 +344,10 @@ class KOIServer {
300
344
  }
301
345
  // Fallback to original vector search
302
346
  try {
303
- const body = { query: query, limit };
347
+ const body = { question: query, limit };
304
348
  if (Object.keys(vectorFilters).length > 0)
305
349
  body.filters = vectorFilters;
306
- const response = await apiClient.post('/query', body);
350
+ const response = await apiClient.post('/search', body);
307
351
  const data = response.data;
308
352
  const results = data.results || [];
309
353
  const formattedResults = this.formatSearchResults(results, query);
@@ -847,6 +891,501 @@ class KOIServer {
847
891
  throw new Error(`Failed to generate weekly digest: ${error instanceof Error ? error.message : 'Unknown error'}`);
848
892
  }
849
893
  }
894
+ /**
895
+ * Extract repository name from GitHub RID
896
+ * RID format: regen.github:github_{repo}_github_sensor_{id}_{repo}_{filepath}#chunk{n}
897
+ * or: regen.github:github_{repo}_{repo}_{filepath}#chunk{n}
898
+ */
899
+ extractRepoFromRid(rid) {
900
+ // Try pattern with sensor ID first
901
+ let match = rid.match(/regen\.github:github_([^_]+)_github_sensor/);
902
+ if (match)
903
+ return match[1];
904
+ // Try pattern without sensor ID
905
+ match = rid.match(/regen\.github:github_([^_]+)_([^_]+)/);
906
+ return match ? match[1] : '';
907
+ }
908
+ /**
909
+ * Extract filepath from GitHub RID for deduplication
910
+ * Returns unique file identifier
911
+ */
912
+ extractFilepathFromRid(rid) {
913
+ // Pattern 1: with sensor ID: _github_sensor_{id}_{repo}_{filepath}#chunk{n}
914
+ let match = rid.match(/_github_sensor_[^_]+_[^_]+_(.+?)(?:#chunk\d+)?$/);
915
+ if (match)
916
+ return match[1];
917
+ // Pattern 2: without sensor ID: github_{repo}_{repo}_{filepath}#chunk{n}
918
+ match = rid.match(/github_[^_]+_[^_]+_(.+?)(?:#chunk\d+)?$/);
919
+ return match ? match[1] : rid;
920
+ }
921
+ /**
922
+ * Format GitHub documentation search results as markdown
923
+ */
924
+ formatGithubDocsResults(memories, query) {
925
+ if (memories.length === 0) {
926
+ return `No results found for "${query}" in GitHub documentation.\n\n**Suggestions:**\n- Try broader search terms\n- Check repository name spelling\n- Use \`search_knowledge\` for non-GitHub content`;
927
+ }
928
+ let output = `## GitHub Documentation Search Results\n\n`;
929
+ output += `**Query:** "${query}"\n`;
930
+ output += `**Results:** ${memories.length} documents found\n\n`;
931
+ memories.forEach((memory, index) => {
932
+ // Extract info from RID
933
+ const repo = this.extractRepoFromRid(memory.rid);
934
+ const filepath = this.extractFilepathFromRid(memory.rid);
935
+ // Get relevance score
936
+ const score = memory.similarity
937
+ ? `(relevance: ${(memory.similarity * 100).toFixed(0)}%)`
938
+ : '';
939
+ // Truncate content
940
+ const content = memory.content?.substring(0, 300) || '';
941
+ output += `### ${index + 1}. ${filepath} ${score}\n`;
942
+ output += `**Repository:** ${repo}\n`;
943
+ output += `**RID:** ${memory.rid}\n\n`;
944
+ output += `${content}${content.length >= 300 ? '...' : ''}\n\n`;
945
+ output += `---\n\n`;
946
+ });
947
+ return output;
948
+ }
949
+ /**
950
+ * Search GitHub documentation
951
+ * Implements client-side filtering based on Phase 0 findings
952
+ */
953
+ async searchGithubDocs(args) {
954
+ const startTime = Date.now();
955
+ const { query, repository, limit = 10 } = args;
956
+ console.error(`[${SERVER_NAME}] Tool=search_github_docs Event=start Query="${query.substring(0, 50)}" Repository=${repository || 'all'}`);
957
+ try {
958
+ // Call Bun Hybrid RAG API (uses different field names than Python fallback)
959
+ // Request extra results to account for filtering/deduplication
960
+ const response = await apiClient.post('/query', {
961
+ question: query, // Bun API uses "question" parameter
962
+ limit: Math.min(limit * 3, 50) // Request 3x to account for client-side filtering
963
+ });
964
+ const data = response.data;
965
+ const allMemories = data?.results || []; // Bun API returns "results" array
966
+ const duration = Date.now() - startTime;
967
+ console.error(`[${SERVER_NAME}] Tool=search_github_docs Event=api_response RawResults=${allMemories.length} Duration=${duration}ms`);
968
+ // CLIENT-SIDE FILTERING (handles Phase 0 issues)
969
+ const filteredMemories = allMemories
970
+ // Filter 1: Only GitHub results (fixes 10-20% leakage from Phase 0)
971
+ .filter((m) => m.rid?.startsWith('regen.github:'))
972
+ // Filter 2: Repository filter if specified
973
+ .filter((m) => {
974
+ if (!repository)
975
+ return true;
976
+ const repo = this.extractRepoFromRid(m.rid);
977
+ return repo === repository;
978
+ })
979
+ // Filter 3: Deduplicate by filepath (handles duplicate sensor IDs from Phase 0)
980
+ .filter((m, index, arr) => {
981
+ const filepath = this.extractFilepathFromRid(m.rid);
982
+ return arr.findIndex((x) => this.extractFilepathFromRid(x.rid) === filepath) === index;
983
+ })
984
+ // Take only requested amount
985
+ .slice(0, limit);
986
+ console.error(`[${SERVER_NAME}] Tool=search_github_docs Event=filtered FilteredResults=${filteredMemories.length}`);
987
+ // Format and return results
988
+ const formattedOutput = this.formatGithubDocsResults(filteredMemories, query);
989
+ console.error(`[${SERVER_NAME}] Tool=search_github_docs Event=success FinalResults=${filteredMemories.length} TotalDuration=${Date.now() - startTime}ms`);
990
+ return {
991
+ content: [{
992
+ type: 'text',
993
+ text: formattedOutput
994
+ }]
995
+ };
996
+ }
997
+ catch (error) {
998
+ console.error(`[${SERVER_NAME}] Tool=search_github_docs Event=error`, error);
999
+ // Handle specific error types
1000
+ if (error.code === 'ECONNREFUSED') {
1001
+ return {
1002
+ content: [{
1003
+ type: 'text',
1004
+ text: 'KOI API is currently unavailable. Please try again later or check your network connection.'
1005
+ }]
1006
+ };
1007
+ }
1008
+ if (error.code === 'ETIMEDOUT' || error.message?.includes('timeout')) {
1009
+ return {
1010
+ content: [{
1011
+ type: 'text',
1012
+ text: 'Request timed out. The server may be busy. Please try again with a smaller limit.'
1013
+ }]
1014
+ };
1015
+ }
1016
+ // Generic error
1017
+ return {
1018
+ content: [{
1019
+ type: 'text',
1020
+ text: `Error searching GitHub documentation: ${error instanceof Error ? error.message : 'Unknown error occurred'}`
1021
+ }]
1022
+ };
1023
+ }
1024
+ }
1025
+ /**
1026
+ * Get repository overview
1027
+ * Provides structured overview of a specific Regen Network repository
1028
+ */
1029
+ async getRepoOverview(args) {
1030
+ const startTime = Date.now();
1031
+ const { repository } = args;
1032
+ console.error(`[${SERVER_NAME}] Tool=get_repo_overview Event=start Repository=${repository}`);
1033
+ try {
1034
+ // Search for README and key documentation files
1035
+ const queries = [
1036
+ `${repository} README documentation overview`,
1037
+ `${repository} CONTRIBUTING guidelines`,
1038
+ `${repository} architecture structure`
1039
+ ];
1040
+ // Execute searches in parallel
1041
+ const responses = await Promise.all(queries.map(query => apiClient.post('/query', {
1042
+ question: query,
1043
+ limit: 20
1044
+ })));
1045
+ // Combine and filter all memories
1046
+ const allMemories = [];
1047
+ responses.forEach(response => {
1048
+ const data = response.data;
1049
+ const memories = data?.results || []; // Bun API returns "results"
1050
+ allMemories.push(...memories);
1051
+ });
1052
+ console.error(`[${SERVER_NAME}] Tool=get_repo_overview Event=api_response RawResults=${allMemories.length}`);
1053
+ // CLIENT-SIDE FILTERING
1054
+ const filteredMemories = allMemories
1055
+ // Filter 1: Only GitHub results
1056
+ .filter((m) => m.rid?.startsWith('regen.github:'))
1057
+ // Filter 2: Only specified repository
1058
+ .filter((m) => {
1059
+ const repo = this.extractRepoFromRid(m.rid);
1060
+ return repo === repository;
1061
+ })
1062
+ // Filter 3: Deduplicate by filepath
1063
+ .filter((m, index, arr) => {
1064
+ const filepath = this.extractFilepathFromRid(m.rid);
1065
+ return arr.findIndex((x) => this.extractFilepathFromRid(x.rid) === filepath) === index;
1066
+ });
1067
+ console.error(`[${SERVER_NAME}] Tool=get_repo_overview Event=filtered FilteredResults=${filteredMemories.length}`);
1068
+ // Format output
1069
+ const formattedOutput = this.formatRepoOverview(repository, filteredMemories);
1070
+ console.error(`[${SERVER_NAME}] Tool=get_repo_overview Event=success FinalResults=${filteredMemories.length} TotalDuration=${Date.now() - startTime}ms`);
1071
+ return {
1072
+ content: [{
1073
+ type: 'text',
1074
+ text: formattedOutput
1075
+ }]
1076
+ };
1077
+ }
1078
+ catch (error) {
1079
+ console.error(`[${SERVER_NAME}] Tool=get_repo_overview Event=error`, error);
1080
+ return {
1081
+ content: [{
1082
+ type: 'text',
1083
+ text: `Error getting repository overview: ${error instanceof Error ? error.message : 'Unknown error occurred'}`
1084
+ }]
1085
+ };
1086
+ }
1087
+ }
1088
+ /**
1089
+ * Format repository overview as markdown
1090
+ */
1091
+ formatRepoOverview(repository, memories) {
1092
+ let output = `# ${repository} - Repository Overview\n\n`;
1093
+ if (memories.length === 0) {
1094
+ output += `No documentation found for ${repository}.\n\n`;
1095
+ output += `**Note:** The GitHub sensor primarily indexes documentation and config files. `;
1096
+ output += `Try using \`search_github_docs\` with specific queries.\n`;
1097
+ return output;
1098
+ }
1099
+ // Categorize files
1100
+ const readmeFiles = [];
1101
+ const contributingFiles = [];
1102
+ const docFiles = [];
1103
+ const configFiles = [];
1104
+ memories.forEach(memory => {
1105
+ const filepath = this.extractFilepathFromRid(memory.rid).toLowerCase();
1106
+ if (filepath.includes('readme')) {
1107
+ readmeFiles.push(memory);
1108
+ }
1109
+ else if (filepath.includes('contributing') || filepath.includes('code_of_conduct')) {
1110
+ contributingFiles.push(memory);
1111
+ }
1112
+ else if (filepath.includes('doc') || filepath.includes('.md')) {
1113
+ docFiles.push(memory);
1114
+ }
1115
+ else {
1116
+ configFiles.push(memory);
1117
+ }
1118
+ });
1119
+ // Repository description section
1120
+ output += `## Repository Description\n\n`;
1121
+ // Use README content if available
1122
+ if (readmeFiles.length > 0) {
1123
+ const readme = readmeFiles[0];
1124
+ const content = readme.content?.substring(0, 400) || '';
1125
+ output += `${content}${content.length >= 400 ? '...' : ''}\n\n`;
1126
+ }
1127
+ else {
1128
+ output += `*No README found. Documentation may be limited for this repository.*\n\n`;
1129
+ }
1130
+ // Key files section
1131
+ output += `## Key Files Found\n\n`;
1132
+ output += `**Total Documentation Files:** ${memories.length}\n\n`;
1133
+ if (readmeFiles.length > 0) {
1134
+ output += `### README Files (${readmeFiles.length})\n`;
1135
+ readmeFiles.slice(0, 5).forEach(file => {
1136
+ const filepath = this.extractFilepathFromRid(file.rid);
1137
+ output += `- ${filepath}\n`;
1138
+ });
1139
+ output += `\n`;
1140
+ }
1141
+ if (contributingFiles.length > 0) {
1142
+ output += `### Contributing Guidelines (${contributingFiles.length})\n`;
1143
+ contributingFiles.slice(0, 3).forEach(file => {
1144
+ const filepath = this.extractFilepathFromRid(file.rid);
1145
+ output += `- ${filepath}\n`;
1146
+ });
1147
+ output += `\n`;
1148
+ }
1149
+ if (docFiles.length > 0) {
1150
+ output += `### Documentation Files (${docFiles.length})\n`;
1151
+ docFiles.slice(0, 10).forEach(file => {
1152
+ const filepath = this.extractFilepathFromRid(file.rid);
1153
+ output += `- ${filepath}\n`;
1154
+ });
1155
+ if (docFiles.length > 10) {
1156
+ output += `- ... and ${docFiles.length - 10} more\n`;
1157
+ }
1158
+ output += `\n`;
1159
+ }
1160
+ if (configFiles.length > 0) {
1161
+ output += `### Configuration Files (${configFiles.length})\n`;
1162
+ configFiles.slice(0, 5).forEach(file => {
1163
+ const filepath = this.extractFilepathFromRid(file.rid);
1164
+ output += `- ${filepath}\n`;
1165
+ });
1166
+ if (configFiles.length > 5) {
1167
+ output += `- ... and ${configFiles.length - 5} more\n`;
1168
+ }
1169
+ output += `\n`;
1170
+ }
1171
+ // Links section
1172
+ output += `## Links\n\n`;
1173
+ output += `- **GitHub:** https://github.com/regen-network/${repository}\n`;
1174
+ output += `- **Issues:** https://github.com/regen-network/${repository}/issues\n`;
1175
+ output += `- **Pull Requests:** https://github.com/regen-network/${repository}/pulls\n\n`;
1176
+ output += `---\n\n`;
1177
+ output += `*Use \`search_github_docs\` with repository="${repository}" to explore specific topics.*\n`;
1178
+ return output;
1179
+ }
1180
+ /**
1181
+ * Get tech stack information
1182
+ * Provides technical stack information for Regen Network repositories
1183
+ */
1184
+ async getTechStack(args) {
1185
+ const startTime = Date.now();
1186
+ const { repository } = args;
1187
+ console.error(`[${SERVER_NAME}] Tool=get_tech_stack Event=start Repository=${repository || 'all'}`);
1188
+ try {
1189
+ // Search for tech stack indicators
1190
+ const queries = [
1191
+ 'package.json dependencies frameworks',
1192
+ 'go.mod go dependencies modules',
1193
+ 'Dockerfile CI CD configuration',
1194
+ 'Makefile build tools',
1195
+ 'Cargo.toml pyproject.toml'
1196
+ ];
1197
+ // Execute searches in parallel
1198
+ const responses = await Promise.all(queries.map(query => apiClient.post('/query', {
1199
+ question: query, // Bun API uses "question" parameter
1200
+ limit: 15
1201
+ })));
1202
+ // Combine all memories
1203
+ const allMemories = [];
1204
+ responses.forEach(response => {
1205
+ const data = response.data;
1206
+ const memories = data?.results || []; // Bun API returns "results"
1207
+ allMemories.push(...memories);
1208
+ });
1209
+ console.error(`[${SERVER_NAME}] Tool=get_tech_stack Event=api_response RawResults=${allMemories.length}`);
1210
+ // CLIENT-SIDE FILTERING
1211
+ let filteredMemories = allMemories
1212
+ // Filter 1: Only GitHub results
1213
+ .filter((m) => m.rid?.startsWith('regen.github:'))
1214
+ // Filter 2: Repository filter if specified
1215
+ .filter((m) => {
1216
+ if (!repository)
1217
+ return true;
1218
+ const repo = this.extractRepoFromRid(m.rid);
1219
+ return repo === repository;
1220
+ })
1221
+ // Filter 3: Deduplicate by filepath
1222
+ .filter((m, index, arr) => {
1223
+ const filepath = this.extractFilepathFromRid(m.rid);
1224
+ return arr.findIndex((x) => this.extractFilepathFromRid(x.rid) === filepath) === index;
1225
+ });
1226
+ console.error(`[${SERVER_NAME}] Tool=get_tech_stack Event=filtered FilteredResults=${filteredMemories.length}`);
1227
+ // Format output
1228
+ const formattedOutput = this.formatTechStack(filteredMemories, repository);
1229
+ console.error(`[${SERVER_NAME}] Tool=get_tech_stack Event=success FinalResults=${filteredMemories.length} TotalDuration=${Date.now() - startTime}ms`);
1230
+ return {
1231
+ content: [{
1232
+ type: 'text',
1233
+ text: formattedOutput
1234
+ }]
1235
+ };
1236
+ }
1237
+ catch (error) {
1238
+ console.error(`[${SERVER_NAME}] Tool=get_tech_stack Event=error`, error);
1239
+ return {
1240
+ content: [{
1241
+ type: 'text',
1242
+ text: `Error getting tech stack: ${error instanceof Error ? error.message : 'Unknown error occurred'}`
1243
+ }]
1244
+ };
1245
+ }
1246
+ }
1247
+ /**
1248
+ * Format tech stack information as markdown
1249
+ */
1250
+ formatTechStack(memories, repository) {
1251
+ const repoFilter = repository ? ` for ${repository}` : '';
1252
+ let output = `# Technical Stack${repoFilter}\n\n`;
1253
+ if (memories.length === 0) {
1254
+ output += `No tech stack information found${repoFilter}.\n\n`;
1255
+ output += `**Note:** The GitHub sensor primarily indexes documentation and config files. `;
1256
+ output += `Some tech stack files may not be available.\n`;
1257
+ return output;
1258
+ }
1259
+ // Categorize by file type and repository
1260
+ const repoData = {};
1261
+ memories.forEach(memory => {
1262
+ const repo = this.extractRepoFromRid(memory.rid);
1263
+ const filepath = this.extractFilepathFromRid(memory.rid).toLowerCase();
1264
+ if (!repoData[repo]) {
1265
+ repoData[repo] = {
1266
+ packageJson: [],
1267
+ goMod: [],
1268
+ dockerfiles: [],
1269
+ makefiles: [],
1270
+ cargo: [],
1271
+ ci: [],
1272
+ other: []
1273
+ };
1274
+ }
1275
+ if (filepath.includes('package.json')) {
1276
+ repoData[repo].packageJson.push(memory);
1277
+ }
1278
+ else if (filepath.includes('go.mod') || filepath.includes('go.sum')) {
1279
+ repoData[repo].goMod.push(memory);
1280
+ }
1281
+ else if (filepath.includes('dockerfile')) {
1282
+ repoData[repo].dockerfiles.push(memory);
1283
+ }
1284
+ else if (filepath.includes('makefile')) {
1285
+ repoData[repo].makefiles.push(memory);
1286
+ }
1287
+ else if (filepath.includes('cargo.toml') || filepath.includes('pyproject.toml')) {
1288
+ repoData[repo].cargo.push(memory);
1289
+ }
1290
+ else if (filepath.includes('.yml') || filepath.includes('.yaml') || filepath.includes('ci')) {
1291
+ repoData[repo].ci.push(memory);
1292
+ }
1293
+ else {
1294
+ repoData[repo].other.push(memory);
1295
+ }
1296
+ });
1297
+ // Output by repository
1298
+ const repos = Object.keys(repoData).sort();
1299
+ repos.forEach(repo => {
1300
+ const data = repoData[repo];
1301
+ output += `## ${repo}\n\n`;
1302
+ // Determine primary language/stack
1303
+ const languages = [];
1304
+ if (data.packageJson.length > 0)
1305
+ languages.push('JavaScript/TypeScript (Node.js)');
1306
+ if (data.goMod.length > 0)
1307
+ languages.push('Go');
1308
+ if (data.cargo.length > 0)
1309
+ languages.push('Rust/Python');
1310
+ if (languages.length > 0) {
1311
+ output += `**Primary Languages:** ${languages.join(', ')}\n\n`;
1312
+ }
1313
+ // Package dependencies
1314
+ if (data.packageJson.length > 0) {
1315
+ output += `### JavaScript/TypeScript Dependencies\n`;
1316
+ data.packageJson.forEach(file => {
1317
+ const filepath = this.extractFilepathFromRid(file.rid);
1318
+ output += `- **${filepath}**\n`;
1319
+ // Try to extract dependency info from content
1320
+ const content = file.content || '';
1321
+ const depsMatch = content.match(/"dependencies":\s*{([^}]+)}/);
1322
+ if (depsMatch) {
1323
+ const deps = depsMatch[1].split(',').slice(0, 5);
1324
+ deps.forEach((dep) => {
1325
+ const cleaned = dep.trim().replace(/"/g, '');
1326
+ if (cleaned)
1327
+ output += ` - ${cleaned}\n`;
1328
+ });
1329
+ if (depsMatch[1].split(',').length > 5) {
1330
+ output += ` - ... and more\n`;
1331
+ }
1332
+ }
1333
+ });
1334
+ output += `\n`;
1335
+ }
1336
+ // Go modules
1337
+ if (data.goMod.length > 0) {
1338
+ output += `### Go Modules\n`;
1339
+ data.goMod.forEach(file => {
1340
+ const filepath = this.extractFilepathFromRid(file.rid);
1341
+ output += `- **${filepath}**\n`;
1342
+ // Try to extract module info from content
1343
+ const content = file.content || '';
1344
+ const lines = content.split('\n').filter((l) => l.trim().startsWith('require')).slice(0, 5);
1345
+ lines.forEach((line) => {
1346
+ const cleaned = line.trim();
1347
+ if (cleaned)
1348
+ output += ` - ${cleaned}\n`;
1349
+ });
1350
+ });
1351
+ output += `\n`;
1352
+ }
1353
+ // Build tools
1354
+ if (data.makefiles.length > 0 || data.dockerfiles.length > 0) {
1355
+ output += `### Build Tools & Infrastructure\n`;
1356
+ if (data.makefiles.length > 0) {
1357
+ output += `**Makefiles:**\n`;
1358
+ data.makefiles.slice(0, 3).forEach(file => {
1359
+ const filepath = this.extractFilepathFromRid(file.rid);
1360
+ output += `- ${filepath}\n`;
1361
+ });
1362
+ }
1363
+ if (data.dockerfiles.length > 0) {
1364
+ output += `**Docker:**\n`;
1365
+ data.dockerfiles.slice(0, 3).forEach(file => {
1366
+ const filepath = this.extractFilepathFromRid(file.rid);
1367
+ output += `- ${filepath}\n`;
1368
+ });
1369
+ }
1370
+ output += `\n`;
1371
+ }
1372
+ // CI/CD
1373
+ if (data.ci.length > 0) {
1374
+ output += `### CI/CD Configuration\n`;
1375
+ data.ci.slice(0, 5).forEach(file => {
1376
+ const filepath = this.extractFilepathFromRid(file.rid);
1377
+ output += `- ${filepath}\n`;
1378
+ });
1379
+ if (data.ci.length > 5) {
1380
+ output += `- ... and ${data.ci.length - 5} more\n`;
1381
+ }
1382
+ output += `\n`;
1383
+ }
1384
+ output += `---\n\n`;
1385
+ });
1386
+ output += `*Use \`search_github_docs\` to explore specific dependency or configuration files.*\n`;
1387
+ return output;
1388
+ }
850
1389
  formatSearchResults(results, query) {
851
1390
  if (!results || results.length === 0) {
852
1391
  return `No results found for query: "${query}"`;
@@ -873,6 +1412,158 @@ class KOIServer {
873
1412
  }
874
1413
  return formatted;
875
1414
  }
1415
+ /**
1416
+ * Handle hybrid search - intelligent routing based on query classification
1417
+ */
1418
+ async handleHybridSearch(args) {
1419
+ const { query, limit = 10 } = args;
1420
+ // Check if hybrid search is available
1421
+ if (!this.queryRouter || !this.unifiedSearch) {
1422
+ console.error(`[${SERVER_NAME}] Hybrid search not available - falling back to vector search`);
1423
+ return await this.searchKnowledge({ query, limit });
1424
+ }
1425
+ try {
1426
+ const startTime = Date.now();
1427
+ // Step 1: Classify query
1428
+ const classification = await this.queryRouter.classifyQuery(query);
1429
+ console.error(`[${SERVER_NAME}] Query classified as: ${classification.intent} (route: ${classification.recommended_route})`);
1430
+ let results = [];
1431
+ let searchMetadata = {};
1432
+ // Step 2: Execute appropriate search based on classification
1433
+ if (classification.recommended_route === 'graph' && classification.detected_entities.length > 0) {
1434
+ // Graph-only search for entity queries
1435
+ const entityNames = classification.detected_entities.map(e => e.name);
1436
+ const graphResults = await this.unifiedSearch.graphSearch(entityNames, limit);
1437
+ results = graphResults.map(hit => ({
1438
+ id: hit.id,
1439
+ title: hit.title,
1440
+ content: hit.content || '',
1441
+ source: 'graph',
1442
+ entity_type: hit.entity_type,
1443
+ file_path: hit.file_path,
1444
+ line_number: hit.line_number,
1445
+ score: hit.final_score,
1446
+ }));
1447
+ searchMetadata = {
1448
+ route: 'graph',
1449
+ entities_detected: entityNames,
1450
+ };
1451
+ }
1452
+ else if (classification.recommended_route === 'vector') {
1453
+ // Vector-only search for conceptual queries - use KOI API
1454
+ const response = await apiClient.post('/query', {
1455
+ question: query,
1456
+ limit: limit
1457
+ });
1458
+ const data = response.data;
1459
+ results = (data.results || []).map((r) => ({
1460
+ id: r.rid || r.id,
1461
+ title: r.title || 'Document',
1462
+ content: r.content || '',
1463
+ source: 'vector',
1464
+ score: r.score || 0,
1465
+ metadata: r.metadata,
1466
+ }));
1467
+ searchMetadata = {
1468
+ route: 'vector',
1469
+ };
1470
+ }
1471
+ else {
1472
+ // Unified/hybrid search - combine graph and vector
1473
+ // For now, use vector search as we don't have embedding service integrated
1474
+ console.error(`[${SERVER_NAME}] Unified search requested but falling back to vector search (embedding service not integrated)`);
1475
+ const response = await apiClient.post('/query', {
1476
+ question: query,
1477
+ limit: limit
1478
+ });
1479
+ const data = response.data;
1480
+ results = (data.results || []).map((r) => ({
1481
+ id: r.rid || r.id,
1482
+ title: r.title || 'Document',
1483
+ content: r.content || '',
1484
+ source: 'vector',
1485
+ score: r.score || 0,
1486
+ metadata: r.metadata,
1487
+ }));
1488
+ searchMetadata = {
1489
+ route: 'hybrid_fallback_to_vector',
1490
+ entities_detected: classification.detected_entities.map(e => e.name),
1491
+ note: 'Embedding service not available - using vector search only',
1492
+ };
1493
+ }
1494
+ const duration = Date.now() - startTime;
1495
+ // Step 3: Format results
1496
+ const markdown = this.formatHybridResults(results, classification, searchMetadata);
1497
+ // MCP only supports type: 'text' - embed JSON as code block
1498
+ const jsonData = JSON.stringify({
1499
+ hits: results,
1500
+ classification,
1501
+ metadata: {
1502
+ query,
1503
+ route: classification.recommended_route,
1504
+ duration_ms: duration,
1505
+ total_results: results.length,
1506
+ ...searchMetadata,
1507
+ },
1508
+ }, null, 2);
1509
+ return {
1510
+ content: [
1511
+ {
1512
+ type: 'text',
1513
+ text: markdown + '\n\n---\n\n<details>\n<summary>Raw JSON (for eval harness)</summary>\n\n```json\n' + jsonData + '\n```\n</details>',
1514
+ },
1515
+ ],
1516
+ };
1517
+ }
1518
+ catch (error) {
1519
+ console.error(`[${SERVER_NAME}] Hybrid search error:`, error);
1520
+ // Fallback to basic search
1521
+ return await this.searchKnowledge({ query, limit });
1522
+ }
1523
+ }
1524
+ /**
1525
+ * Format hybrid search results as markdown
1526
+ */
1527
+ formatHybridResults(results, classification, metadata) {
1528
+ let output = `## Hybrid Search Results\n\n`;
1529
+ output += `**Query Route:** ${metadata.route} (intent: ${classification.intent})\n`;
1530
+ if (classification.detected_entities.length > 0) {
1531
+ output += `**Detected Entities:** ${classification.detected_entities.map((e) => e.name).join(', ')}\n`;
1532
+ }
1533
+ output += `**Confidence:** ${(classification.confidence * 100).toFixed(1)}%\n`;
1534
+ output += `**Results:** ${results.length}\n\n`;
1535
+ if (classification.reasoning) {
1536
+ output += `*${classification.reasoning}*\n\n`;
1537
+ }
1538
+ if (metadata.note) {
1539
+ output += `> **Note:** ${metadata.note}\n\n`;
1540
+ }
1541
+ output += `---\n\n`;
1542
+ results.forEach((hit, i) => {
1543
+ output += `### ${i + 1}. ${hit.title || hit.id}\n`;
1544
+ if (hit.entity_type) {
1545
+ output += `**Type:** ${hit.entity_type} | `;
1546
+ }
1547
+ output += `**Source:** ${hit.source}`;
1548
+ if (hit.score !== undefined) {
1549
+ output += ` | **Score:** ${hit.score.toFixed(3)}`;
1550
+ }
1551
+ output += `\n\n`;
1552
+ if (hit.file_path) {
1553
+ output += `📁 \`${hit.file_path}\``;
1554
+ if (hit.line_number) {
1555
+ output += `:${hit.line_number}`;
1556
+ }
1557
+ output += `\n\n`;
1558
+ }
1559
+ if (hit.content) {
1560
+ const preview = hit.content.substring(0, 300);
1561
+ output += `${preview}${hit.content.length > 300 ? '...' : ''}\n\n`;
1562
+ }
1563
+ output += `---\n\n`;
1564
+ });
1565
+ return output;
1566
+ }
876
1567
  async run() {
877
1568
  const transport = new StdioServerTransport();
878
1569
  await this.server.connect(transport);