mcp-log-query-server 3.6.0 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/index.js +910 -910
  2. package/logger.js +27 -17
  3. package/package.json +1 -1
package/index.js CHANGED
@@ -1,910 +1,910 @@
1
- #!/usr/bin/env node
2
-
3
- /**
4
- * Log Query MCP Server
5
- *
6
- * 提供以下工具:
7
- * - query_log: 查询服务日志(支持测试环境 SSH + 生产环境 Loki)
8
- * - search_log: 搜索日志关键词(生产环境自动提取 traceId)
9
- * - list_services: 列出可用服务
10
- * - test_connection: 测试 SSH 连接
11
- * - list_pods: 列出 pods 及状态
12
- * - describe_pod: 获取 pod 详情
13
- * - get_pod_logs: 获取 pod 日志
14
- * - get_events: 获取 namespace 事件
15
- * - trace_log: 根据 traceId 跨服务查询日志(生产环境一次查询所有服务)
16
- * - detect_context: 根据工作目录自动检测 namespace 和服务
17
- * - list_loki_environments: 列出可用的 Loki 生产环境
18
- * - list_loki_services: 列出 Loki 环境下的服务
19
- */
20
-
21
- import { Server } from '@modelcontextprotocol/sdk/server/index.js';
22
- import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
23
- import {
24
- CallToolRequestSchema,
25
- ListToolsRequestSchema,
26
- } from '@modelcontextprotocol/sdk/types.js';
27
-
28
- import { queryLog, testConnection, executeKubectl } from './ssh-client.js';
29
- import { findService, getAllServices, DEFAULTS, DEFAULT_NAMESPACE, SERVICES, NAMESPACES, detectContextFromPath, isLokiEnv, resolveLokiEnvName, LOKI_ENVIRONMENTS } from './config.js';
30
- import { log, getLogFilePath } from './logger.js';
31
- import {
32
- queryLoki, queryLokiAutoRange, parseTimeStr,
33
- extractTraceIds, parseServiceFromFilename, groupLogsByService,
34
- buildServiceLogQL, buildProjectLogQL, getLokiServiceDirName,
35
- listLokiEnvironments as getLokiEnvList, listLokiServices as getLokiSvcList
36
- } from './loki-client.js';
37
-
38
- // 超时配置
39
- const REQUEST_TIMEOUT = 60000; // MCP 请求兑底超时 60s(withTimeout 强制终止)
40
- const WATCHDOG_WARN_TIMEOUT = 120000; // 看门狗 120s,仅记录告警(不再 process.exit)
41
-
42
- function withTimeout(promise, ms, label) {
43
- return Promise.race([
44
- promise,
45
- new Promise((_, reject) =>
46
- setTimeout(() => reject(new Error(`${label} 超时(${ms}ms)`)), ms)
47
- ),
48
- ]);
49
- }
50
-
51
- // 安全序列化工具参数(截断超长值,容错循环引用)
52
- function safeStringify(obj, maxLen = 200) {
53
- try {
54
- const s = JSON.stringify(obj);
55
- return s.length > maxLen ? s.slice(0, maxLen) + '...' : s;
56
- } catch {
57
- return '<unserializable>';
58
- }
59
- }
60
-
61
- // 合并多个 AbortSignal:任何一个 abort 则聚合 signal abort
62
- // Node 20+ 原生支持 AbortSignal.any;低版本回退到手工监听
63
- function anySignal(signals) {
64
- const valid = signals.filter(Boolean);
65
- if (valid.length === 0) return undefined;
66
- if (valid.length === 1) return valid[0];
67
- if (typeof AbortSignal.any === 'function') return AbortSignal.any(valid);
68
- // 回退方案
69
- const ctrl = new AbortController();
70
- const onAbort = () => ctrl.abort();
71
- for (const s of valid) {
72
- if (s.aborted) { ctrl.abort(); break; }
73
- s.addEventListener('abort', onAbort, { once: true });
74
- }
75
- return ctrl.signal;
76
- }
77
-
78
- // 进程级安全网:只记录日志,不退出进程
79
- // 退出会导致 stdio 断开,整个 MCP 不可用直到 IDE 重启;单次请求错误不应拖死服务
80
- process.on('unhandledRejection', (err) => log(`[unhandledRejection] ${err && err.stack || err}`));
81
- process.on('uncaughtException', (err) => log(`[uncaughtException] ${err && err.stack || err}`));
82
-
83
- // 创建 MCP Server
84
- const server = new Server(
85
- {
86
- name: 'mcp-log-query',
87
- version: '3.6.0',
88
- },
89
- {
90
- capabilities: {
91
- tools: {},
92
- },
93
- }
94
- );
95
-
96
- // 定义工具列表
97
- server.setRequestHandler(ListToolsRequestSchema, async () => {
98
- return {
99
- tools: [
100
- {
101
- name: 'query_log',
102
- description: '查询服务容器的日志文件。返回最近的日志内容。支持通过 env 参数查询生产环境日志(Loki)。',
103
- inputSchema: {
104
- type: 'object',
105
- properties: {
106
- service: {
107
- type: 'string',
108
- description: '服务名称,如 clife-senior-health、clife-senior-archive,或别名如 health、archive'
109
- },
110
- namespace: {
111
- type: 'string',
112
- description: 'K8s namespace,如 saas-itest、whood-itest。不指定则使用服务默认配置'
113
- },
114
- lines: {
115
- type: 'number',
116
- description: '返回的日志行数,默认 100',
117
- default: 100
118
- },
119
- env: {
120
- type: 'string',
121
- description: '环境标识,不指定则查询测试环境(走 SSH)。可选值:cms/prod/生产(CMS生产环境)、城阳/cy/chengyang、临颖/ly/linying、漯河/lh/luohe、德阳/dy/deyang、旌阳/jy/jingyang(私有化环境)'
122
- },
123
- from: {
124
- type: 'string',
125
- description: '(Loki) 查询起始时间,如 "2026-02-05 10:00:00"。指定后禁用自动递进'
126
- },
127
- to: {
128
- type: 'string',
129
- description: '(Loki) 查询结束时间,如 "2026-02-06 12:00:00"。不指定则为当前时间'
130
- }
131
- },
132
- required: ['service']
133
- }
134
- },
135
- {
136
- name: 'search_log',
137
- description: '在服务日志中搜索关键词。支持正则表达式。生产环境会自动提取 traceId 列表。',
138
- inputSchema: {
139
- type: 'object',
140
- properties: {
141
- service: {
142
- type: 'string',
143
- description: '服务名称或别名'
144
- },
145
- namespace: {
146
- type: 'string',
147
- description: 'K8s namespace,如 saas-itest、whood-itest。不指定则使用服务默认配置'
148
- },
149
- keyword: {
150
- type: 'string',
151
- description: '搜索关键词,支持正则表达式'
152
- },
153
- context_lines: {
154
- type: 'number',
155
- description: '显示匹配行前后的上下文行数,默认 5',
156
- default: 5
157
- },
158
- case_sensitive: {
159
- type: 'boolean',
160
- description: '是否区分大小写,默认 false',
161
- default: false
162
- },
163
- env: {
164
- type: 'string',
165
- description: '环境标识,不指定则查询测试环境(走 SSH)。可选值:cms/prod/生产(CMS生产环境)、城阳/cy/chengyang、临颖/ly/linying、漯河/lh/luohe、德阳/dy/deyang、旌阳/jy/jingyang(私有化环境)'
166
- },
167
- from: {
168
- type: 'string',
169
- description: '(Loki) 查询起始时间,如 "2026-02-05 10:00:00"。指定后禁用自动递进'
170
- },
171
- to: {
172
- type: 'string',
173
- description: '(Loki) 查询结束时间,如 "2026-02-06 12:00:00"。不指定则为当前时间'
174
- }
175
- },
176
- required: ['service', 'keyword']
177
- }
178
- },
179
- {
180
- name: 'list_services',
181
- description: '列出所有可查询日志的服务',
182
- inputSchema: {
183
- type: 'object',
184
- properties: {}
185
- }
186
- },
187
- {
188
- name: 'test_connection',
189
- description: '测试到堡垒机的 SSH 连接是否正常',
190
- inputSchema: {
191
- type: 'object',
192
- properties: {}
193
- }
194
- },
195
- // ========== 新增 K8s 工具 ==========
196
- {
197
- name: 'list_pods',
198
- description: '列出指定 namespace 的所有 pods 及其状态,用于快速定位问题 pod',
199
- inputSchema: {
200
- type: 'object',
201
- properties: {
202
- namespace: {
203
- type: 'string',
204
- description: 'K8s namespace,默认 saas-itest',
205
- default: 'saas-itest'
206
- },
207
- label: {
208
- type: 'string',
209
- description: '标签选择器,如 app=clife-senior-health'
210
- }
211
- }
212
- }
213
- },
214
- {
215
- name: 'describe_pod',
216
- description: '获取 pod 详细信息,包括事件、状态、退出码等,用于排查 pod 崩溃原因',
217
- inputSchema: {
218
- type: 'object',
219
- properties: {
220
- pod: {
221
- type: 'string',
222
- description: 'Pod 名称或名称模式(支持部分匹配)'
223
- },
224
- namespace: {
225
- type: 'string',
226
- description: 'K8s namespace,默认 saas-itest',
227
- default: 'saas-itest'
228
- }
229
- },
230
- required: ['pod']
231
- }
232
- },
233
- {
234
- name: 'get_pod_logs',
235
- description: '获取 pod 日志,支持查看崩溃前的日志(--previous)',
236
- inputSchema: {
237
- type: 'object',
238
- properties: {
239
- pod: {
240
- type: 'string',
241
- description: 'Pod 名称或名称模式'
242
- },
243
- namespace: {
244
- type: 'string',
245
- description: 'K8s namespace,默认 saas-itest',
246
- default: 'saas-itest'
247
- },
248
- previous: {
249
- type: 'boolean',
250
- description: '是否查看上一个容器的日志(崩溃前日志),默认 false',
251
- default: false
252
- },
253
- tail: {
254
- type: 'number',
255
- description: '返回的日志行数,默认 100',
256
- default: 100
257
- }
258
- },
259
- required: ['pod']
260
- }
261
- },
262
- {
263
- name: 'get_events',
264
- description: '获取 namespace 级别的 K8s 事件,用于排查集群问题',
265
- inputSchema: {
266
- type: 'object',
267
- properties: {
268
- namespace: {
269
- type: 'string',
270
- description: 'K8s namespace,默认 saas-itest',
271
- default: 'saas-itest'
272
- },
273
- pod: {
274
- type: 'string',
275
- description: '过滤指定 pod 的事件(可选)'
276
- }
277
- }
278
- }
279
- },
280
- {
281
- name: 'trace_log',
282
- description: '根据 traceId 跨服务查询日志,用于追踪完整调用链。生产环境使用 Loki API 一次查询所有服务。',
283
- inputSchema: {
284
- type: 'object',
285
- properties: {
286
- traceId: {
287
- type: 'string',
288
- description: '链路追踪 ID'
289
- },
290
- namespace: {
291
- type: 'string',
292
- description: 'K8s namespace,如 saas-itest、whood-itest。不指定则使用服务默认配置'
293
- },
294
- services: {
295
- type: 'array',
296
- items: { type: 'string' },
297
- description: '要搜索的服务列表,不指定则搜索所有服务'
298
- },
299
- context_lines: {
300
- type: 'number',
301
- description: '显示匹配行前后的上下文行数,默认 3',
302
- default: 3
303
- },
304
- env: {
305
- type: 'string',
306
- description: '环境标识,不指定则查询测试环境(走 SSH)。可选值:cms/prod/生产(CMS生产环境)、城阳/cy/chengyang、临颖/ly/linying、漯河/lh/luohe、德阳/dy/deyang、旌阳/jy/jingyang(私有化环境)'
307
- },
308
- from: {
309
- type: 'string',
310
- description: '(Loki) 查询起始时间,如 "2026-02-05 10:00:00"。指定后禁用自动递进'
311
- },
312
- to: {
313
- type: 'string',
314
- description: '(Loki) 查询结束时间,如 "2026-02-06 12:00:00"。不指定则为当前时间'
315
- }
316
- },
317
- required: ['traceId']
318
- }
319
- },
320
- // ========== 上下文检测工具 ==========
321
- {
322
- name: 'detect_context',
323
- description: '根据当前工作目录自动检测对应的 namespace 和服务名。AI 可以先调用此工具获取上下文,再调用 query_log 等工具时传入正确的 namespace。',
324
- inputSchema: {
325
- type: 'object',
326
- properties: {
327
- workspace_path: {
328
- type: 'string',
329
- description: '当前工作目录路径,如 D:\\shulian\\whood\\clife-senior-mall 或 /home/user/shulian/saas/clife-senior-health'
330
- }
331
- },
332
- required: ['workspace_path']
333
- }
334
- },
335
- // ========== Loki 生产环境工具 ==========
336
- {
337
- name: 'list_loki_environments',
338
- description: '列出所有可用的 Loki 生产环境',
339
- inputSchema: {
340
- type: 'object',
341
- properties: {}
342
- }
343
- },
344
- {
345
- name: 'list_loki_services',
346
- description: '列出指定 Loki 环境下的所有可用服务',
347
- inputSchema: {
348
- type: 'object',
349
- properties: {
350
- env: {
351
- type: 'string',
352
- description: '环境标识。可选值:cms/prod/生产(CMS生产环境)、城阳/cy/chengyang、临颖/ly/linying、漯河/lh/luohe、德阳/dy/deyang、旌阳/jy/jingyang(私有化环境)',
353
- default: 'cms'
354
- },
355
- project: {
356
- type: 'string',
357
- description: '项目名,默认 senior',
358
- default: 'senior'
359
- }
360
- }
361
- }
362
- }
363
- ]
364
- };
365
- });
366
- // 处理工具调用
367
- server.setRequestHandler(CallToolRequestSchema, async (request, extra) => {
368
- const { name, arguments: args } = request.params;
369
- const startTime = Date.now();
370
- // SDK 传入的 signal:Cascade 发 notifications/cancelled 时 signal.aborted=true
371
- const signal = extra && extra.signal;
372
- log(`[Tool] → ${name} start args=${safeStringify(args)}`);
373
-
374
- // 提前 cancel:立即抛错,让 SDK 检测到 signal.aborted 不发 response
375
- if (signal && signal.aborted) {
376
- log(`[Tool] ⊗ ${name} 收到请求时已 aborted,立即返回`);
377
- throw new Error('Request cancelled before handler');
378
- }
379
-
380
- // 看门狗:仅记录长时间未完成的请求,不再退出进程
381
- const watchdog = setTimeout(() => {
382
- log(`[Watchdog] ${name} 仍在运行超过 ${WATCHDOG_WARN_TIMEOUT}ms(仅记录,不退出进程)`);
383
- }, WATCHDOG_WARN_TIMEOUT);
384
- watchdog.unref();
385
-
386
- // cancel race:signal abort 时立即 reject,handler 不再等下游
387
- const cancelPromise = new Promise((_, reject) => {
388
- if (!signal) return;
389
- const onAbort = () => {
390
- log(`[Tool] ⊗ ${name} 收到 cancel signal (${Date.now() - startTime}ms)`);
391
- reject(new Error('CANCELLED'));
392
- };
393
- signal.addEventListener('abort', onAbort, { once: true });
394
- });
395
-
396
- try {
397
- const result = await Promise.race([
398
- withTimeout(handleToolCall(name, args, signal), REQUEST_TIMEOUT, name),
399
- cancelPromise,
400
- ]);
401
- clearTimeout(watchdog);
402
- log(`[Tool] ✓ ${name} done ${Date.now() - startTime}ms`);
403
- return result;
404
- } catch (error) {
405
- clearTimeout(watchdog);
406
- // 取消场景:抛错让 SDK 知道(SDK 检测 signal.aborted 不发 response)
407
- if (signal && signal.aborted) {
408
- log(`[Tool] ⊗ ${name} CANCELLED ${Date.now() - startTime}ms`);
409
- throw error;
410
- }
411
- log(`[Tool] ✗ ${name} FAIL ${Date.now() - startTime}ms: ${error.message}`);
412
- return {
413
- content: [{ type: 'text', text: `## 执行错误\n\n❌ ${error.message}` }],
414
- isError: true
415
- };
416
- }
417
- });
418
-
419
- /**
420
- * 实际的工具调用处理逻辑
421
- * @param {string} name - 工具名
422
- * @param {object} args - 工具参数
423
- * @param {AbortSignal} [signal] - Cascade 传入的取消信号;层层传给 Loki/SSH/kubectl
424
- */
425
- async function handleToolCall(name, args, signal) {
426
- try {
427
- switch (name) {
428
- case 'query_log': {
429
- // 判断是否走 Loki(生产环境)
430
- if (isLokiEnv(args.env)) {
431
- const envKey = resolveLokiEnvName(args.env);
432
- const envConfig = LOKI_ENVIRONMENTS[envKey];
433
- const project = envConfig.defaultProject || 'senior';
434
- const serviceDirName = getLokiServiceDirName(args.service);
435
- const maxLines = args.lines || DEFAULTS.lines;
436
-
437
- const expr = buildServiceLogQL(project, serviceDirName, '', envKey);
438
- console.error(`[MCP] Loki 查询日志: env=${envKey}, service=${args.service}, expr=${expr}`);
439
-
440
- // 构建时间范围选项
441
- const timeOpts = { maxLines };
442
- if (args.from) timeOpts.from = parseTimeStr(args.from);
443
- if (args.to) timeOpts.to = parseTimeStr(args.to);
444
- timeOpts.signal = signal;
445
-
446
- const lokiResult = await queryLokiAutoRange(envKey, expr, timeOpts);
447
-
448
- if (lokiResult.logs.length === 0) {
449
- const errorHint = lokiResult.error ? `\n\n⚠️ **${lokiResult.error}**` : '';
450
- return { content: [{ type: 'text', text: `## ${args.service} 日志 (${envKey} 生产环境)\n\n⚠️ 已自动搜索 5分钟 → 30分钟 → 1小时 → 3小时 → 24小时 范围,均未找到日志。${errorHint}\n\n请确认:\n1. 服务名是否正确\n2. 如需查询更早的日志,请使用 \`from\`/\`to\` 参数指定具体时间范围` }] };
451
- }
452
-
453
- let text = `## ${args.service} 日志 (${envKey} 生产环境, ${lokiResult.timeRange.label}内, ${lokiResult.logs.length} 行)\n\n`;
454
- text += `\`\`\`\n${lokiResult.logs.join('\n')}\n\`\`\``;
455
- if (lokiResult.traceIds.length > 0) {
456
- text += `\n\n🔑 **提取到的 traceId** (${lokiResult.traceIds.length} 个):\n`;
457
- lokiResult.traceIds.slice(0, 20).forEach((id, i) => { text += ` ${i + 1}. \`${id}\`\n`; });
458
- if (lokiResult.traceIds.length > 20) text += ` ... 还有 ${lokiResult.traceIds.length - 20} 个\n`;
459
- }
460
- return { content: [{ type: 'text', text }] };
461
- }
462
-
463
- // 测试环境:走 SSH
464
- const service = findService(args.service, args.namespace);
465
- if (!service) {
466
- return { content: [{ type: 'text', text: `错误: 未找到服务 "${args.service}"。使用 list_services 查看可用服务。` }] };
467
- }
468
-
469
- const lines = args.lines || DEFAULTS.lines;
470
- const command = `tail -${lines}`;
471
-
472
- console.error(`[MCP] 查询日志: ${service.name} (namespace: ${service.namespace}), 命令: ${command}`);
473
- const result = await queryLog(service, command, { signal });
474
-
475
- return {
476
- content: [{
477
- type: 'text',
478
- text: `## ${service.name} 日志 (namespace: ${service.namespace}, 最近 ${lines} 行)\n\n\`\`\`\n${result}\n\`\`\``
479
- }]
480
- };
481
- }
482
-
483
- case 'search_log': {
484
- // 判断是否走 Loki(生产环境)
485
- if (isLokiEnv(args.env)) {
486
- const envKey = resolveLokiEnvName(args.env);
487
- const envConfig = LOKI_ENVIRONMENTS[envKey];
488
- const project = envConfig.defaultProject || 'senior';
489
- const serviceDirName = getLokiServiceDirName(args.service);
490
- const keyword = args.keyword;
491
-
492
- const expr = buildServiceLogQL(project, serviceDirName, keyword, envKey);
493
- console.error(`[MCP] Loki 搜索日志: env=${envKey}, service=${args.service}, keyword=${keyword}`);
494
-
495
- // 构建时间范围选项
496
- const timeOpts = { maxLines: 200 };
497
- if (args.from) timeOpts.from = parseTimeStr(args.from);
498
- if (args.to) timeOpts.to = parseTimeStr(args.to);
499
- timeOpts.signal = signal;
500
-
501
- const lokiResult = await queryLokiAutoRange(envKey, expr, timeOpts);
502
-
503
- if (lokiResult.logs.length === 0) {
504
- const errorHint = lokiResult.error ? `\n\n⚠️ **${lokiResult.error}**` : '';
505
- return { content: [{ type: 'text', text: `## ${args.service} 日志搜索结果 (${envKey} 生产环境)\n\n**关键词**: ${keyword}\n\n⚠️ 已自动搜索 5分钟 → 30分钟 → 1小时 → 3小时 → 24小时 范围,均未找到匹配内容。${errorHint}\n\n请确认:\n1. 关键词是否正确\n2. 服务名是否正确\n3. 如需查询更早的日志,请使用 \`from\`/\`to\` 参数指定具体时间范围` }] };
506
- }
507
-
508
- let text = `## ${args.service} 日志搜索结果 (${envKey} 生产环境, ${lokiResult.timeRange.label}内)\n\n`;
509
- text += `**关键词**: ${keyword}\n**匹配行数**: ${lokiResult.logs.length}\n**时间范围**: ${lokiResult.timeRange.label}\n\n`;
510
- text += `\`\`\`\n${lokiResult.logs.join('\n')}\n\`\`\``;
511
-
512
- // 自动提取 traceId(核心功能:帮助用户获取 traceId 进行链路追踪)
513
- if (lokiResult.traceIds.length > 0) {
514
- text += `\n\n🔑 **提取到的 traceId** (${lokiResult.traceIds.length} 个):\n`;
515
- lokiResult.traceIds.slice(0, 20).forEach((id, i) => { text += ` ${i + 1}. \`${id}\`\n`; });
516
- if (lokiResult.traceIds.length > 20) text += ` ... 还有 ${lokiResult.traceIds.length - 20} 个\n`;
517
- text += `\n💡 **提示**: 可以使用 \`trace_log(traceId: "xxx", env: "${args.env}")\` 查看完整调用链`;
518
- }
519
- return { content: [{ type: 'text', text }] };
520
- }
521
-
522
- // 测试环境:走 SSH
523
- const service = findService(args.service, args.namespace);
524
- if (!service) {
525
- return { content: [{ type: 'text', text: `错误: 未找到服务 "${args.service}"。使用 list_services 查看可用服务。` }] };
526
- }
527
-
528
- const keyword = args.keyword;
529
- const contextLines = args.context_lines || 5;
530
- const caseSensitive = args.case_sensitive || false;
531
-
532
- const grepFlags = caseSensitive ? '' : '-i';
533
- const command = `grep ${grepFlags} -C ${contextLines} "${keyword}"`;
534
-
535
- console.error(`[MCP] 搜索日志: ${service.name} (namespace: ${service.namespace}), 关键词: ${keyword}`);
536
- const result = await queryLog(service, command, { signal });
537
-
538
- return {
539
- content: [{
540
- type: 'text',
541
- text: `## ${service.name} 日志搜索结果 (namespace: ${service.namespace})\n\n**关键词**: ${keyword}\n\n\`\`\`\n${result || '未找到匹配内容'}\n\`\`\``
542
- }]
543
- };
544
- }
545
-
546
- case 'list_services': {
547
- const services = getAllServices();
548
- const list = services.map(s =>
549
- `- **${s.name}**: ${s.description}\n 别名: ${s.aliases.join(', ')}`
550
- ).join('\n');
551
-
552
- return {
553
- content: [{
554
- type: 'text',
555
- text: `## 可用服务列表\n\n${list}`
556
- }]
557
- };
558
- }
559
-
560
- case 'test_connection': {
561
- console.error('[MCP] 测试 SSH 连接');
562
- const result = await testConnection();
563
-
564
- return {
565
- content: [{
566
- type: 'text',
567
- text: `## SSH 连接测试\n\n✅ ${result.message}`
568
- }]
569
- };
570
- }
571
-
572
- // ========== 新增 K8s 工具处理 ==========
573
- case 'list_pods': {
574
- const namespace = args.namespace || DEFAULT_NAMESPACE;
575
- let cmd = `kubectl get pods -n ${namespace} -o wide`;
576
- if (args.label) {
577
- cmd += ` -l ${args.label}`;
578
- }
579
-
580
- console.error(`[MCP] 列出 pods: namespace=${namespace}`);
581
- const result = await executeKubectl(cmd, { signal });
582
-
583
- return {
584
- content: [{
585
- type: 'text',
586
- text: `## Pods 列表 (namespace: ${namespace})\n\n\`\`\`\n${result}\n\`\`\``
587
- }]
588
- };
589
- }
590
-
591
- case 'describe_pod': {
592
- const namespace = args.namespace || DEFAULT_NAMESPACE;
593
- const podPattern = args.pod;
594
-
595
- // 先查找匹配的 pod
596
- const findCmd = `kubectl get pod -n ${namespace} -o name | grep ${podPattern} | head -1`;
597
- console.error(`[MCP] 查找 pod: ${podPattern}`);
598
-
599
- const describeCmd = `kubectl describe $(kubectl get pod -n ${namespace} -o name | grep ${podPattern} | head -1) -n ${namespace}`;
600
- const result = await executeKubectl(describeCmd, { signal });
601
-
602
- return {
603
- content: [{
604
- type: 'text',
605
- text: `## Pod 详情: ${podPattern}\n\n\`\`\`\n${result}\n\`\`\``
606
- }]
607
- };
608
- }
609
-
610
- case 'get_pod_logs': {
611
- const namespace = args.namespace || DEFAULT_NAMESPACE;
612
- const podPattern = args.pod;
613
- const previous = args.previous || false;
614
- const tail = args.tail || 100;
615
-
616
- let cmd = `kubectl logs $(kubectl get pod -n ${namespace} -o name | grep ${podPattern} | head -1) -n ${namespace} --tail=${tail}`;
617
- if (previous) {
618
- cmd += ' --previous';
619
- }
620
-
621
- console.error(`[MCP] 获取 pod 日志: ${podPattern}, previous=${previous}`);
622
- const result = await executeKubectl(cmd, { signal });
623
-
624
- const logType = previous ? '崩溃前日志' : '当前日志';
625
- return {
626
- content: [{
627
- type: 'text',
628
- text: `## Pod 日志: ${podPattern} (${logType})\n\n\`\`\`\n${result}\n\`\`\``
629
- }]
630
- };
631
- }
632
-
633
- case 'get_events': {
634
- const namespace = args.namespace || DEFAULT_NAMESPACE;
635
- let cmd = `kubectl get events -n ${namespace} --sort-by='.lastTimestamp'`;
636
-
637
- if (args.pod) {
638
- cmd = `kubectl get events -n ${namespace} --field-selector involvedObject.name=${args.pod} --sort-by='.lastTimestamp'`;
639
- }
640
-
641
- console.error(`[MCP] 获取事件: namespace=${namespace}`);
642
- const result = await executeKubectl(cmd, { signal });
643
-
644
- return {
645
- content: [{
646
- type: 'text',
647
- text: `## K8s 事件 (namespace: ${namespace})\n\n\`\`\`\n${result}\n\`\`\``
648
- }]
649
- };
650
- }
651
-
652
- case 'trace_log': {
653
- const traceId = args.traceId;
654
- const contextLines = args.context_lines || 3;
655
-
656
- // 判断是否走 Loki(生产环境)- 一次 API 调用搜索所有服务
657
- if (isLokiEnv(args.env)) {
658
- const envKey = resolveLokiEnvName(args.env);
659
- const envConfig = LOKI_ENVIRONMENTS[envKey];
660
- const project = envConfig.defaultProject || 'senior';
661
-
662
- // 构建时间范围选项
663
- const timeOpts = {};
664
- if (args.from) timeOpts.from = parseTimeStr(args.from);
665
- if (args.to) timeOpts.to = parseTimeStr(args.to);
666
-
667
- // 如果指定了服务列表,按服务查询;否则按项目查询(一次搜索所有服务)
668
- let lokiResult;
669
- const targetServices = args.services || [];
670
-
671
- if (targetServices.length > 0) {
672
- // 指定服务:逐个查询
673
- const allLogs = [];
674
- const allLabels = [];
675
- for (const svc of targetServices) {
676
- const dirName = getLokiServiceDirName(svc);
677
- const expr = buildServiceLogQL(project, dirName, traceId, envKey);
678
- console.error(`[MCP] Loki trace: env=${envKey}, service=${svc}, traceId=${traceId}`);
679
- const r = await queryLokiAutoRange(envKey, expr, { ...timeOpts, maxLines: 500, signal });
680
- allLogs.push(...r.logs);
681
- allLabels.push(...r.labels);
682
- }
683
- lokiResult = { logs: allLogs, labels: allLabels, traceIds: extractTraceIds(allLogs), timeRange: { label: '自动递进' } };
684
- } else {
685
- // 未指定服务:按项目一次查询所有服务(高效!)
686
- const expr = buildProjectLogQL(project, traceId, envKey);
687
- console.error(`[MCP] Loki trace (全项目): env=${envKey}, project=${project}, traceId=${traceId}`);
688
- lokiResult = await queryLokiAutoRange(envKey, expr, { ...timeOpts, maxLines: 1000, signal });
689
- }
690
-
691
- if (lokiResult.logs.length === 0) {
692
- const errorHint = lokiResult.error ? `\n\n⚠️ **${lokiResult.error}**` : '';
693
- return { content: [{ type: 'text', text: `## TraceId 追踪结果 (${envKey} 生产环境)\n\n**traceId**: \`${traceId}\`\n\n❌ 已自动搜索 5分钟 → 30分钟 → 1小时 → 3小时 → 24小时 范围,均未找到匹配日志。${errorHint}\n\n请确认:\n1. traceId 是否正确\n2. 如需查询更早的日志,请使用 \`from\`/\`to\` 参数指定具体时间范围` }] };
694
- }
695
-
696
- // 按服务分组展示
697
- const groups = groupLogsByService(lokiResult);
698
- const serviceNames = Object.keys(groups).sort();
699
-
700
- let text = `## TraceId 追踪结果 (${envKey} 生产环境, ${lokiResult.timeRange.label}内)\n\n`;
701
- text += `**traceId**: \`${traceId}\`\n`;
702
- text += `**匹配服务数**: ${serviceNames.length}\n`;
703
- text += `**总日志行数**: ${lokiResult.logs.length}\n\n`;
704
-
705
- for (const svcName of serviceNames) {
706
- const group = groups[svcName];
707
- text += `### ${svcName}\n`;
708
- text += `\`\`\`\n${group.logs.join('\n')}\n\`\`\`\n\n`;
709
- }
710
-
711
- return { content: [{ type: 'text', text }] };
712
- }
713
-
714
- // 测试环境:走 SSH(逐个服务搜索)
715
- const targetNamespace = args.namespace || null;
716
- let servicesToSearch = args.services || [];
717
-
718
- if (servicesToSearch.length === 0) {
719
- servicesToSearch = Object.keys(SERVICES);
720
- } else {
721
- servicesToSearch = servicesToSearch.map(s => {
722
- const service = findService(s, targetNamespace);
723
- return service ? service.name : s;
724
- }).filter(Boolean);
725
- }
726
-
727
- console.error(`[MCP] 追踪日志: traceId=${traceId}, namespace=${targetNamespace || 'default'}, 服务数=${servicesToSearch.length}`);
728
-
729
- const TRACE_TOTAL_TIMEOUT = 50000; // 总耗时上限 50s
730
- const TRACE_PER_SERVICE = 10000; // 单服务超时 10s
731
- const traceStart = Date.now();
732
- const results = [];
733
- let searched = 0;
734
- let skipped = 0;
735
-
736
- for (const serviceName of servicesToSearch) {
737
- // 总耗时检查
738
- if (Date.now() - traceStart > TRACE_TOTAL_TIMEOUT) {
739
- skipped = servicesToSearch.length - searched;
740
- console.error(`[MCP] trace_log 总耗时超过 ${TRACE_TOTAL_TIMEOUT}ms,跳过剩余 ${skipped} 个服务`);
741
- break;
742
- }
743
-
744
- const service = findService(serviceName, targetNamespace);
745
- if (!service) { searched++; continue; }
746
-
747
- try {
748
- const command = `grep -i -C ${contextLines} "${traceId}"`;
749
- const result = await queryLog(service, command, { timeout: TRACE_PER_SERVICE, signal });
750
-
751
- if (result && result.trim() && !result.includes('未找到')) {
752
- results.push({ service: serviceName, namespace: service.namespace, logs: result });
753
- }
754
- } catch (err) {
755
- // 快速跳过失败/超时的服务
756
- console.error(`[MCP] ${serviceName} 跳过: ${err.message.substring(0, 80)}`);
757
- }
758
- searched++;
759
- }
760
-
761
- const elapsed = Date.now() - traceStart;
762
- const timeNote = skipped > 0 ? `\n**注意**: 已搜索 ${searched}/${servicesToSearch.length} 个服务(耗时 ${elapsed}ms,跳过 ${skipped} 个)` : '';
763
-
764
- if (results.length === 0) {
765
- return { content: [{ type: 'text', text: `## TraceId 追踪结果\n\n**traceId**: ${traceId}\n**namespace**: ${targetNamespace || '默认'}\n\n❌ 未在已搜索的 ${searched} 个服务中找到匹配的日志${timeNote}` }] };
766
- }
767
-
768
- const output = results.map(r => `### ${r.service} (${r.namespace})\n\`\`\`\n${r.logs}\n\`\`\``).join('\n\n');
769
-
770
- return {
771
- content: [{
772
- type: 'text',
773
- text: `## TraceId 追踪结果\n\n**traceId**: ${traceId}\n**namespace**: ${targetNamespace || '默认'}\n**匹配服务数**: ${results.length}${timeNote}\n\n${output}`
774
- }]
775
- };
776
- }
777
-
778
- case 'detect_context': {
779
- const workspacePath = args.workspace_path;
780
- const result = detectContextFromPath(workspacePath);
781
-
782
- if (!result.success) {
783
- return {
784
- content: [{
785
- type: 'text',
786
- text: `## 上下文检测失败\n\n**错误**: ${result.error}\n**默认 namespace**: ${result.namespace}`
787
- }]
788
- };
789
- }
790
-
791
- // 构建返回信息
792
- let responseText = `## 上下文检测结果\n\n`;
793
- responseText += `**工作目录**: ${result.originalPath}\n`;
794
- responseText += `**检测到的 namespace**: ${result.namespace}\n`;
795
- responseText += `**namespace 来源**: ${result.namespaceSource}\n`;
796
-
797
- if (result.serviceName) {
798
- responseText += `**检测到的服务**: ${result.serviceName}\n`;
799
- if (result.service) {
800
- responseText += `**服务描述**: ${result.service.description}\n`;
801
- responseText += `**服务别名**: ${result.service.aliases.join(', ')}\n`;
802
- }
803
- } else {
804
- responseText += `**检测到的服务**: 未能从路径中识别服务名\n`;
805
- }
806
-
807
- responseText += `\n### 建议\n`;
808
- responseText += `在调用 query_log、search_log 等工具时,请使用:\n`;
809
- responseText += `- **namespace**: \`${result.namespace}\`\n`;
810
- if (result.serviceName) {
811
- responseText += `- **service**: \`${result.serviceName}\`\n`;
812
- }
813
-
814
- console.error(`[MCP] 上下文检测: path=${workspacePath}, namespace=${result.namespace}, service=${result.serviceName}`);
815
-
816
- return {
817
- content: [{
818
- type: 'text',
819
- text: responseText
820
- }]
821
- };
822
- }
823
-
824
- // ========== Loki 生产环境工具处理 ==========
825
- case 'list_loki_environments': {
826
- const envs = getLokiEnvList();
827
- if (envs.length === 0) {
828
- return { content: [{ type: 'text', text: '## Loki 环境列表\n\n⚠️ 未配置任何 Loki 环境' }] };
829
- }
830
-
831
- const list = envs.map(e =>
832
- `- **${e.name}**: ${e.description}\n Grafana: ${e.grafanaUrl}\n 默认项目: ${e.project}`
833
- ).join('\n');
834
-
835
- return { content: [{ type: 'text', text: `## Loki 生产环境列表\n\n${list}` }] };
836
- }
837
-
838
- case 'list_loki_services': {
839
- const envKey = resolveLokiEnvName(args.env || 'cms');
840
- const project = args.project || 'senior';
841
-
842
- if (!envKey || !LOKI_ENVIRONMENTS[envKey]) {
843
- return { content: [{ type: 'text', text: `错误: 未知环境 "${args.env}"。使用 list_loki_environments 查看可用环境。` }] };
844
- }
845
-
846
- console.error(`[MCP] 列出 Loki 服务: env=${envKey}, project=${project}`);
847
- const services = await getLokiSvcList(envKey, project);
848
-
849
- if (services.length === 0) {
850
- return { content: [{ type: 'text', text: `## Loki 服务列表 (${envKey})\n\n⚠️ 未找到任何服务` }] };
851
- }
852
-
853
- const list = services.map((s, i) => ` ${i + 1}. ${s}`).join('\n');
854
- return { content: [{ type: 'text', text: `## Loki 服务列表 (${envKey}, project=${project})\n\n共 ${services.length} 个服务:\n${list}` }] };
855
- }
856
-
857
- default:
858
- return {
859
- content: [{
860
- type: 'text',
861
- text: `错误: 未知工具 "${name}"`
862
- }]
863
- };
864
- }
865
- } catch (error) {
866
- console.error(`[MCP] 工具内部错误: ${error.message}`);
867
- return {
868
- content: [{
869
- type: 'text',
870
- text: `## 执行错误\n\n❌ ${error.message}`
871
- }],
872
- isError: true
873
- };
874
- }
875
- }
876
-
877
- // 优雅关闭(对齐 auggie MCP 启动代码)
878
- function gracefulShutdown() {
879
- console.error('[MCP] 优雅关闭...');
880
- server.close().catch(() => {});
881
- // 给 close 一点时间完成,然后强制退出
882
- setTimeout(() => process.exit(0), 500).unref();
883
- }
884
-
885
- process.on('SIGINT', gracefulShutdown);
886
- process.on('SIGTERM', gracefulShutdown);
887
-
888
- // 启动服务器
889
- async function main() {
890
- // 对齐 auggie: 监听 stdin end/close,宿主进程断开时优雅关闭
891
- process.stdin.on('end', () => {
892
- log('[MCP] stdin end, initiating graceful shutdown');
893
- gracefulShutdown();
894
- });
895
- process.stdin.on('close', () => {
896
- log('[MCP] stdin close, initiating graceful shutdown');
897
- gracefulShutdown();
898
- });
899
-
900
- const transport = new StdioServerTransport();
901
- await server.connect(transport);
902
- const logPath = getLogFilePath();
903
- log(`[MCP] Log Query Server v3.6.0 已启动 (SSH排队超时 + Loki体超时 + 文件日志 + cancel signal 透传)`);
904
- if (logPath) log(`[MCP] 本地日志文件: ${logPath}`);
905
- }
906
-
907
- main().catch((error) => {
908
- log(`[MCP] 启动失败: ${error && error.stack || error}`);
909
- process.exit(1);
910
- });
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Log Query MCP Server
5
+ *
6
+ * 提供以下工具:
7
+ * - query_log: 查询服务日志(支持测试环境 SSH + 生产环境 Loki)
8
+ * - search_log: 搜索日志关键词(生产环境自动提取 traceId)
9
+ * - list_services: 列出可用服务
10
+ * - test_connection: 测试 SSH 连接
11
+ * - list_pods: 列出 pods 及状态
12
+ * - describe_pod: 获取 pod 详情
13
+ * - get_pod_logs: 获取 pod 日志
14
+ * - get_events: 获取 namespace 事件
15
+ * - trace_log: 根据 traceId 跨服务查询日志(生产环境一次查询所有服务)
16
+ * - detect_context: 根据工作目录自动检测 namespace 和服务
17
+ * - list_loki_environments: 列出可用的 Loki 生产环境
18
+ * - list_loki_services: 列出 Loki 环境下的服务
19
+ */
20
+
21
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
22
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
23
+ import {
24
+ CallToolRequestSchema,
25
+ ListToolsRequestSchema,
26
+ } from '@modelcontextprotocol/sdk/types.js';
27
+
28
+ import { queryLog, testConnection, executeKubectl } from './ssh-client.js';
29
+ import { findService, getAllServices, DEFAULTS, DEFAULT_NAMESPACE, SERVICES, NAMESPACES, detectContextFromPath, isLokiEnv, resolveLokiEnvName, LOKI_ENVIRONMENTS } from './config.js';
30
+ import { log, getLogFilePath } from './logger.js';
31
+ import {
32
+ queryLoki, queryLokiAutoRange, parseTimeStr,
33
+ extractTraceIds, parseServiceFromFilename, groupLogsByService,
34
+ buildServiceLogQL, buildProjectLogQL, getLokiServiceDirName,
35
+ listLokiEnvironments as getLokiEnvList, listLokiServices as getLokiSvcList
36
+ } from './loki-client.js';
37
+
38
+ // 超时配置
39
+ const REQUEST_TIMEOUT = 60000; // MCP 请求兑底超时 60s(withTimeout 强制终止)
40
+ const WATCHDOG_WARN_TIMEOUT = 120000; // 看门狗 120s,仅记录告警(不再 process.exit)
41
+
42
+ function withTimeout(promise, ms, label) {
43
+ return Promise.race([
44
+ promise,
45
+ new Promise((_, reject) =>
46
+ setTimeout(() => reject(new Error(`${label} 超时(${ms}ms)`)), ms)
47
+ ),
48
+ ]);
49
+ }
50
+
51
+ // 安全序列化工具参数(截断超长值,容错循环引用)
52
+ function safeStringify(obj, maxLen = 200) {
53
+ try {
54
+ const s = JSON.stringify(obj);
55
+ return s.length > maxLen ? s.slice(0, maxLen) + '...' : s;
56
+ } catch {
57
+ return '<unserializable>';
58
+ }
59
+ }
60
+
61
+ // 合并多个 AbortSignal:任何一个 abort 则聚合 signal abort
62
+ // Node 20+ 原生支持 AbortSignal.any;低版本回退到手工监听
63
+ function anySignal(signals) {
64
+ const valid = signals.filter(Boolean);
65
+ if (valid.length === 0) return undefined;
66
+ if (valid.length === 1) return valid[0];
67
+ if (typeof AbortSignal.any === 'function') return AbortSignal.any(valid);
68
+ // 回退方案
69
+ const ctrl = new AbortController();
70
+ const onAbort = () => ctrl.abort();
71
+ for (const s of valid) {
72
+ if (s.aborted) { ctrl.abort(); break; }
73
+ s.addEventListener('abort', onAbort, { once: true });
74
+ }
75
+ return ctrl.signal;
76
+ }
77
+
78
+ // 进程级安全网:只记录日志,不退出进程
79
+ // 退出会导致 stdio 断开,整个 MCP 不可用直到 IDE 重启;单次请求错误不应拖死服务
80
+ process.on('unhandledRejection', (err) => log(`[unhandledRejection] ${err && err.stack || err}`));
81
+ process.on('uncaughtException', (err) => log(`[uncaughtException] ${err && err.stack || err}`));
82
+
83
+ // 创建 MCP Server
84
+ const server = new Server(
85
+ {
86
+ name: 'mcp-log-query',
87
+ version: '3.6.1',
88
+ },
89
+ {
90
+ capabilities: {
91
+ tools: {},
92
+ },
93
+ }
94
+ );
95
+
96
+ // 定义工具列表
97
+ server.setRequestHandler(ListToolsRequestSchema, async () => {
98
+ return {
99
+ tools: [
100
+ {
101
+ name: 'query_log',
102
+ description: '查询服务容器的日志文件。返回最近的日志内容。支持通过 env 参数查询生产环境日志(Loki)。',
103
+ inputSchema: {
104
+ type: 'object',
105
+ properties: {
106
+ service: {
107
+ type: 'string',
108
+ description: '服务名称,如 clife-senior-health、clife-senior-archive,或别名如 health、archive'
109
+ },
110
+ namespace: {
111
+ type: 'string',
112
+ description: 'K8s namespace,如 saas-itest、whood-itest。不指定则使用服务默认配置'
113
+ },
114
+ lines: {
115
+ type: 'number',
116
+ description: '返回的日志行数,默认 100',
117
+ default: 100
118
+ },
119
+ env: {
120
+ type: 'string',
121
+ description: '环境标识,不指定则查询测试环境(走 SSH)。可选值:cms/prod/生产(CMS生产环境)、城阳/cy/chengyang、临颖/ly/linying、漯河/lh/luohe、德阳/dy/deyang、旌阳/jy/jingyang(私有化环境)'
122
+ },
123
+ from: {
124
+ type: 'string',
125
+ description: '(Loki) 查询起始时间,如 "2026-02-05 10:00:00"。指定后禁用自动递进'
126
+ },
127
+ to: {
128
+ type: 'string',
129
+ description: '(Loki) 查询结束时间,如 "2026-02-06 12:00:00"。不指定则为当前时间'
130
+ }
131
+ },
132
+ required: ['service']
133
+ }
134
+ },
135
+ {
136
+ name: 'search_log',
137
+ description: '在服务日志中搜索关键词。支持正则表达式。生产环境会自动提取 traceId 列表。',
138
+ inputSchema: {
139
+ type: 'object',
140
+ properties: {
141
+ service: {
142
+ type: 'string',
143
+ description: '服务名称或别名'
144
+ },
145
+ namespace: {
146
+ type: 'string',
147
+ description: 'K8s namespace,如 saas-itest、whood-itest。不指定则使用服务默认配置'
148
+ },
149
+ keyword: {
150
+ type: 'string',
151
+ description: '搜索关键词,支持正则表达式'
152
+ },
153
+ context_lines: {
154
+ type: 'number',
155
+ description: '显示匹配行前后的上下文行数,默认 5',
156
+ default: 5
157
+ },
158
+ case_sensitive: {
159
+ type: 'boolean',
160
+ description: '是否区分大小写,默认 false',
161
+ default: false
162
+ },
163
+ env: {
164
+ type: 'string',
165
+ description: '环境标识,不指定则查询测试环境(走 SSH)。可选值:cms/prod/生产(CMS生产环境)、城阳/cy/chengyang、临颖/ly/linying、漯河/lh/luohe、德阳/dy/deyang、旌阳/jy/jingyang(私有化环境)'
166
+ },
167
+ from: {
168
+ type: 'string',
169
+ description: '(Loki) 查询起始时间,如 "2026-02-05 10:00:00"。指定后禁用自动递进'
170
+ },
171
+ to: {
172
+ type: 'string',
173
+ description: '(Loki) 查询结束时间,如 "2026-02-06 12:00:00"。不指定则为当前时间'
174
+ }
175
+ },
176
+ required: ['service', 'keyword']
177
+ }
178
+ },
179
+ {
180
+ name: 'list_services',
181
+ description: '列出所有可查询日志的服务',
182
+ inputSchema: {
183
+ type: 'object',
184
+ properties: {}
185
+ }
186
+ },
187
+ {
188
+ name: 'test_connection',
189
+ description: '测试到堡垒机的 SSH 连接是否正常',
190
+ inputSchema: {
191
+ type: 'object',
192
+ properties: {}
193
+ }
194
+ },
195
+ // ========== 新增 K8s 工具 ==========
196
+ {
197
+ name: 'list_pods',
198
+ description: '列出指定 namespace 的所有 pods 及其状态,用于快速定位问题 pod',
199
+ inputSchema: {
200
+ type: 'object',
201
+ properties: {
202
+ namespace: {
203
+ type: 'string',
204
+ description: 'K8s namespace,默认 saas-itest',
205
+ default: 'saas-itest'
206
+ },
207
+ label: {
208
+ type: 'string',
209
+ description: '标签选择器,如 app=clife-senior-health'
210
+ }
211
+ }
212
+ }
213
+ },
214
+ {
215
+ name: 'describe_pod',
216
+ description: '获取 pod 详细信息,包括事件、状态、退出码等,用于排查 pod 崩溃原因',
217
+ inputSchema: {
218
+ type: 'object',
219
+ properties: {
220
+ pod: {
221
+ type: 'string',
222
+ description: 'Pod 名称或名称模式(支持部分匹配)'
223
+ },
224
+ namespace: {
225
+ type: 'string',
226
+ description: 'K8s namespace,默认 saas-itest',
227
+ default: 'saas-itest'
228
+ }
229
+ },
230
+ required: ['pod']
231
+ }
232
+ },
233
+ {
234
+ name: 'get_pod_logs',
235
+ description: '获取 pod 日志,支持查看崩溃前的日志(--previous)',
236
+ inputSchema: {
237
+ type: 'object',
238
+ properties: {
239
+ pod: {
240
+ type: 'string',
241
+ description: 'Pod 名称或名称模式'
242
+ },
243
+ namespace: {
244
+ type: 'string',
245
+ description: 'K8s namespace,默认 saas-itest',
246
+ default: 'saas-itest'
247
+ },
248
+ previous: {
249
+ type: 'boolean',
250
+ description: '是否查看上一个容器的日志(崩溃前日志),默认 false',
251
+ default: false
252
+ },
253
+ tail: {
254
+ type: 'number',
255
+ description: '返回的日志行数,默认 100',
256
+ default: 100
257
+ }
258
+ },
259
+ required: ['pod']
260
+ }
261
+ },
262
+ {
263
+ name: 'get_events',
264
+ description: '获取 namespace 级别的 K8s 事件,用于排查集群问题',
265
+ inputSchema: {
266
+ type: 'object',
267
+ properties: {
268
+ namespace: {
269
+ type: 'string',
270
+ description: 'K8s namespace,默认 saas-itest',
271
+ default: 'saas-itest'
272
+ },
273
+ pod: {
274
+ type: 'string',
275
+ description: '过滤指定 pod 的事件(可选)'
276
+ }
277
+ }
278
+ }
279
+ },
280
+ {
281
+ name: 'trace_log',
282
+ description: '根据 traceId 跨服务查询日志,用于追踪完整调用链。生产环境使用 Loki API 一次查询所有服务。',
283
+ inputSchema: {
284
+ type: 'object',
285
+ properties: {
286
+ traceId: {
287
+ type: 'string',
288
+ description: '链路追踪 ID'
289
+ },
290
+ namespace: {
291
+ type: 'string',
292
+ description: 'K8s namespace,如 saas-itest、whood-itest。不指定则使用服务默认配置'
293
+ },
294
+ services: {
295
+ type: 'array',
296
+ items: { type: 'string' },
297
+ description: '要搜索的服务列表,不指定则搜索所有服务'
298
+ },
299
+ context_lines: {
300
+ type: 'number',
301
+ description: '显示匹配行前后的上下文行数,默认 3',
302
+ default: 3
303
+ },
304
+ env: {
305
+ type: 'string',
306
+ description: '环境标识,不指定则查询测试环境(走 SSH)。可选值:cms/prod/生产(CMS生产环境)、城阳/cy/chengyang、临颖/ly/linying、漯河/lh/luohe、德阳/dy/deyang、旌阳/jy/jingyang(私有化环境)'
307
+ },
308
+ from: {
309
+ type: 'string',
310
+ description: '(Loki) 查询起始时间,如 "2026-02-05 10:00:00"。指定后禁用自动递进'
311
+ },
312
+ to: {
313
+ type: 'string',
314
+ description: '(Loki) 查询结束时间,如 "2026-02-06 12:00:00"。不指定则为当前时间'
315
+ }
316
+ },
317
+ required: ['traceId']
318
+ }
319
+ },
320
+ // ========== 上下文检测工具 ==========
321
+ {
322
+ name: 'detect_context',
323
+ description: '根据当前工作目录自动检测对应的 namespace 和服务名。AI 可以先调用此工具获取上下文,再调用 query_log 等工具时传入正确的 namespace。',
324
+ inputSchema: {
325
+ type: 'object',
326
+ properties: {
327
+ workspace_path: {
328
+ type: 'string',
329
+ description: '当前工作目录路径,如 D:\\shulian\\whood\\clife-senior-mall 或 /home/user/shulian/saas/clife-senior-health'
330
+ }
331
+ },
332
+ required: ['workspace_path']
333
+ }
334
+ },
335
+ // ========== Loki 生产环境工具 ==========
336
+ {
337
+ name: 'list_loki_environments',
338
+ description: '列出所有可用的 Loki 生产环境',
339
+ inputSchema: {
340
+ type: 'object',
341
+ properties: {}
342
+ }
343
+ },
344
+ {
345
+ name: 'list_loki_services',
346
+ description: '列出指定 Loki 环境下的所有可用服务',
347
+ inputSchema: {
348
+ type: 'object',
349
+ properties: {
350
+ env: {
351
+ type: 'string',
352
+ description: '环境标识。可选值:cms/prod/生产(CMS生产环境)、城阳/cy/chengyang、临颖/ly/linying、漯河/lh/luohe、德阳/dy/deyang、旌阳/jy/jingyang(私有化环境)',
353
+ default: 'cms'
354
+ },
355
+ project: {
356
+ type: 'string',
357
+ description: '项目名,默认 senior',
358
+ default: 'senior'
359
+ }
360
+ }
361
+ }
362
+ }
363
+ ]
364
+ };
365
+ });
366
+ // 处理工具调用
367
+ server.setRequestHandler(CallToolRequestSchema, async (request, extra) => {
368
+ const { name, arguments: args } = request.params;
369
+ const startTime = Date.now();
370
+ // SDK 传入的 signal:Cascade 发 notifications/cancelled 时 signal.aborted=true
371
+ const signal = extra && extra.signal;
372
+ log(`[Tool] → ${name} start args=${safeStringify(args)}`);
373
+
374
+ // 提前 cancel:立即抛错,让 SDK 检测到 signal.aborted 不发 response
375
+ if (signal && signal.aborted) {
376
+ log(`[Tool] ⊗ ${name} 收到请求时已 aborted,立即返回`);
377
+ throw new Error('Request cancelled before handler');
378
+ }
379
+
380
+ // 看门狗:仅记录长时间未完成的请求,不再退出进程
381
+ const watchdog = setTimeout(() => {
382
+ log(`[Watchdog] ${name} 仍在运行超过 ${WATCHDOG_WARN_TIMEOUT}ms(仅记录,不退出进程)`);
383
+ }, WATCHDOG_WARN_TIMEOUT);
384
+ watchdog.unref();
385
+
386
+ // cancel race:signal abort 时立即 reject,handler 不再等下游
387
+ const cancelPromise = new Promise((_, reject) => {
388
+ if (!signal) return;
389
+ const onAbort = () => {
390
+ log(`[Tool] ⊗ ${name} 收到 cancel signal (${Date.now() - startTime}ms)`);
391
+ reject(new Error('CANCELLED'));
392
+ };
393
+ signal.addEventListener('abort', onAbort, { once: true });
394
+ });
395
+
396
+ try {
397
+ const result = await Promise.race([
398
+ withTimeout(handleToolCall(name, args, signal), REQUEST_TIMEOUT, name),
399
+ cancelPromise,
400
+ ]);
401
+ clearTimeout(watchdog);
402
+ log(`[Tool] ✓ ${name} done ${Date.now() - startTime}ms`);
403
+ return result;
404
+ } catch (error) {
405
+ clearTimeout(watchdog);
406
+ // 取消场景:抛错让 SDK 知道(SDK 检测 signal.aborted 不发 response)
407
+ if (signal && signal.aborted) {
408
+ log(`[Tool] ⊗ ${name} CANCELLED ${Date.now() - startTime}ms`);
409
+ throw error;
410
+ }
411
+ log(`[Tool] ✗ ${name} FAIL ${Date.now() - startTime}ms: ${error.message}`);
412
+ return {
413
+ content: [{ type: 'text', text: `## 执行错误\n\n❌ ${error.message}` }],
414
+ isError: true
415
+ };
416
+ }
417
+ });
418
+
419
+ /**
420
+ * 实际的工具调用处理逻辑
421
+ * @param {string} name - 工具名
422
+ * @param {object} args - 工具参数
423
+ * @param {AbortSignal} [signal] - Cascade 传入的取消信号;层层传给 Loki/SSH/kubectl
424
+ */
425
+ async function handleToolCall(name, args, signal) {
426
+ try {
427
+ switch (name) {
428
+ case 'query_log': {
429
+ // 判断是否走 Loki(生产环境)
430
+ if (isLokiEnv(args.env)) {
431
+ const envKey = resolveLokiEnvName(args.env);
432
+ const envConfig = LOKI_ENVIRONMENTS[envKey];
433
+ const project = envConfig.defaultProject || 'senior';
434
+ const serviceDirName = getLokiServiceDirName(args.service);
435
+ const maxLines = args.lines || DEFAULTS.lines;
436
+
437
+ const expr = buildServiceLogQL(project, serviceDirName, '', envKey);
438
+ log(`[MCP] Loki 查询日志: env=${envKey}, service=${args.service}, expr=${expr}`);
439
+
440
+ // 构建时间范围选项
441
+ const timeOpts = { maxLines };
442
+ if (args.from) timeOpts.from = parseTimeStr(args.from);
443
+ if (args.to) timeOpts.to = parseTimeStr(args.to);
444
+ timeOpts.signal = signal;
445
+
446
+ const lokiResult = await queryLokiAutoRange(envKey, expr, timeOpts);
447
+
448
+ if (lokiResult.logs.length === 0) {
449
+ const errorHint = lokiResult.error ? `\n\n⚠️ **${lokiResult.error}**` : '';
450
+ return { content: [{ type: 'text', text: `## ${args.service} 日志 (${envKey} 生产环境)\n\n⚠️ 已自动搜索 5分钟 → 30分钟 → 1小时 → 3小时 → 24小时 范围,均未找到日志。${errorHint}\n\n请确认:\n1. 服务名是否正确\n2. 如需查询更早的日志,请使用 \`from\`/\`to\` 参数指定具体时间范围` }] };
451
+ }
452
+
453
+ let text = `## ${args.service} 日志 (${envKey} 生产环境, ${lokiResult.timeRange.label}内, ${lokiResult.logs.length} 行)\n\n`;
454
+ text += `\`\`\`\n${lokiResult.logs.join('\n')}\n\`\`\``;
455
+ if (lokiResult.traceIds.length > 0) {
456
+ text += `\n\n🔑 **提取到的 traceId** (${lokiResult.traceIds.length} 个):\n`;
457
+ lokiResult.traceIds.slice(0, 20).forEach((id, i) => { text += ` ${i + 1}. \`${id}\`\n`; });
458
+ if (lokiResult.traceIds.length > 20) text += ` ... 还有 ${lokiResult.traceIds.length - 20} 个\n`;
459
+ }
460
+ return { content: [{ type: 'text', text }] };
461
+ }
462
+
463
+ // 测试环境:走 SSH
464
+ const service = findService(args.service, args.namespace);
465
+ if (!service) {
466
+ return { content: [{ type: 'text', text: `错误: 未找到服务 "${args.service}"。使用 list_services 查看可用服务。` }] };
467
+ }
468
+
469
+ const lines = args.lines || DEFAULTS.lines;
470
+ const command = `tail -${lines}`;
471
+
472
+ log(`[MCP] 查询日志: ${service.name} (namespace: ${service.namespace}), 命令: ${command}`);
473
+ const result = await queryLog(service, command, { signal });
474
+
475
+ return {
476
+ content: [{
477
+ type: 'text',
478
+ text: `## ${service.name} 日志 (namespace: ${service.namespace}, 最近 ${lines} 行)\n\n\`\`\`\n${result}\n\`\`\``
479
+ }]
480
+ };
481
+ }
482
+
483
+ case 'search_log': {
484
+ // 判断是否走 Loki(生产环境)
485
+ if (isLokiEnv(args.env)) {
486
+ const envKey = resolveLokiEnvName(args.env);
487
+ const envConfig = LOKI_ENVIRONMENTS[envKey];
488
+ const project = envConfig.defaultProject || 'senior';
489
+ const serviceDirName = getLokiServiceDirName(args.service);
490
+ const keyword = args.keyword;
491
+
492
+ const expr = buildServiceLogQL(project, serviceDirName, keyword, envKey);
493
+ log(`[MCP] Loki 搜索日志: env=${envKey}, service=${args.service}, keyword=${keyword}`);
494
+
495
+ // 构建时间范围选项
496
+ const timeOpts = { maxLines: 200 };
497
+ if (args.from) timeOpts.from = parseTimeStr(args.from);
498
+ if (args.to) timeOpts.to = parseTimeStr(args.to);
499
+ timeOpts.signal = signal;
500
+
501
+ const lokiResult = await queryLokiAutoRange(envKey, expr, timeOpts);
502
+
503
+ if (lokiResult.logs.length === 0) {
504
+ const errorHint = lokiResult.error ? `\n\n⚠️ **${lokiResult.error}**` : '';
505
+ return { content: [{ type: 'text', text: `## ${args.service} 日志搜索结果 (${envKey} 生产环境)\n\n**关键词**: ${keyword}\n\n⚠️ 已自动搜索 5分钟 → 30分钟 → 1小时 → 3小时 → 24小时 范围,均未找到匹配内容。${errorHint}\n\n请确认:\n1. 关键词是否正确\n2. 服务名是否正确\n3. 如需查询更早的日志,请使用 \`from\`/\`to\` 参数指定具体时间范围` }] };
506
+ }
507
+
508
+ let text = `## ${args.service} 日志搜索结果 (${envKey} 生产环境, ${lokiResult.timeRange.label}内)\n\n`;
509
+ text += `**关键词**: ${keyword}\n**匹配行数**: ${lokiResult.logs.length}\n**时间范围**: ${lokiResult.timeRange.label}\n\n`;
510
+ text += `\`\`\`\n${lokiResult.logs.join('\n')}\n\`\`\``;
511
+
512
+ // 自动提取 traceId(核心功能:帮助用户获取 traceId 进行链路追踪)
513
+ if (lokiResult.traceIds.length > 0) {
514
+ text += `\n\n🔑 **提取到的 traceId** (${lokiResult.traceIds.length} 个):\n`;
515
+ lokiResult.traceIds.slice(0, 20).forEach((id, i) => { text += ` ${i + 1}. \`${id}\`\n`; });
516
+ if (lokiResult.traceIds.length > 20) text += ` ... 还有 ${lokiResult.traceIds.length - 20} 个\n`;
517
+ text += `\n💡 **提示**: 可以使用 \`trace_log(traceId: "xxx", env: "${args.env}")\` 查看完整调用链`;
518
+ }
519
+ return { content: [{ type: 'text', text }] };
520
+ }
521
+
522
+ // 测试环境:走 SSH
523
+ const service = findService(args.service, args.namespace);
524
+ if (!service) {
525
+ return { content: [{ type: 'text', text: `错误: 未找到服务 "${args.service}"。使用 list_services 查看可用服务。` }] };
526
+ }
527
+
528
+ const keyword = args.keyword;
529
+ const contextLines = args.context_lines || 5;
530
+ const caseSensitive = args.case_sensitive || false;
531
+
532
+ const grepFlags = caseSensitive ? '' : '-i';
533
+ const command = `grep ${grepFlags} -C ${contextLines} "${keyword}"`;
534
+
535
+ log(`[MCP] 搜索日志: ${service.name} (namespace: ${service.namespace}), 关键词: ${keyword}`);
536
+ const result = await queryLog(service, command, { signal });
537
+
538
+ return {
539
+ content: [{
540
+ type: 'text',
541
+ text: `## ${service.name} 日志搜索结果 (namespace: ${service.namespace})\n\n**关键词**: ${keyword}\n\n\`\`\`\n${result || '未找到匹配内容'}\n\`\`\``
542
+ }]
543
+ };
544
+ }
545
+
546
+ case 'list_services': {
547
+ const services = getAllServices();
548
+ const list = services.map(s =>
549
+ `- **${s.name}**: ${s.description}\n 别名: ${s.aliases.join(', ')}`
550
+ ).join('\n');
551
+
552
+ return {
553
+ content: [{
554
+ type: 'text',
555
+ text: `## 可用服务列表\n\n${list}`
556
+ }]
557
+ };
558
+ }
559
+
560
+ case 'test_connection': {
561
+ log('[MCP] 测试 SSH 连接');
562
+ const result = await testConnection();
563
+
564
+ return {
565
+ content: [{
566
+ type: 'text',
567
+ text: `## SSH 连接测试\n\n✅ ${result.message}`
568
+ }]
569
+ };
570
+ }
571
+
572
+ // ========== 新增 K8s 工具处理 ==========
573
+ case 'list_pods': {
574
+ const namespace = args.namespace || DEFAULT_NAMESPACE;
575
+ let cmd = `kubectl get pods -n ${namespace} -o wide`;
576
+ if (args.label) {
577
+ cmd += ` -l ${args.label}`;
578
+ }
579
+
580
+ log(`[MCP] 列出 pods: namespace=${namespace}`);
581
+ const result = await executeKubectl(cmd, { signal });
582
+
583
+ return {
584
+ content: [{
585
+ type: 'text',
586
+ text: `## Pods 列表 (namespace: ${namespace})\n\n\`\`\`\n${result}\n\`\`\``
587
+ }]
588
+ };
589
+ }
590
+
591
+ case 'describe_pod': {
592
+ const namespace = args.namespace || DEFAULT_NAMESPACE;
593
+ const podPattern = args.pod;
594
+
595
+ // 先查找匹配的 pod
596
+ const findCmd = `kubectl get pod -n ${namespace} -o name | grep ${podPattern} | head -1`;
597
+ log(`[MCP] 查找 pod: ${podPattern}`);
598
+
599
+ const describeCmd = `kubectl describe $(kubectl get pod -n ${namespace} -o name | grep ${podPattern} | head -1) -n ${namespace}`;
600
+ const result = await executeKubectl(describeCmd, { signal });
601
+
602
+ return {
603
+ content: [{
604
+ type: 'text',
605
+ text: `## Pod 详情: ${podPattern}\n\n\`\`\`\n${result}\n\`\`\``
606
+ }]
607
+ };
608
+ }
609
+
610
+ case 'get_pod_logs': {
611
+ const namespace = args.namespace || DEFAULT_NAMESPACE;
612
+ const podPattern = args.pod;
613
+ const previous = args.previous || false;
614
+ const tail = args.tail || 100;
615
+
616
+ let cmd = `kubectl logs $(kubectl get pod -n ${namespace} -o name | grep ${podPattern} | head -1) -n ${namespace} --tail=${tail}`;
617
+ if (previous) {
618
+ cmd += ' --previous';
619
+ }
620
+
621
+ log(`[MCP] 获取 pod 日志: ${podPattern}, previous=${previous}`);
622
+ const result = await executeKubectl(cmd, { signal });
623
+
624
+ const logType = previous ? '崩溃前日志' : '当前日志';
625
+ return {
626
+ content: [{
627
+ type: 'text',
628
+ text: `## Pod 日志: ${podPattern} (${logType})\n\n\`\`\`\n${result}\n\`\`\``
629
+ }]
630
+ };
631
+ }
632
+
633
+ case 'get_events': {
634
+ const namespace = args.namespace || DEFAULT_NAMESPACE;
635
+ let cmd = `kubectl get events -n ${namespace} --sort-by='.lastTimestamp'`;
636
+
637
+ if (args.pod) {
638
+ cmd = `kubectl get events -n ${namespace} --field-selector involvedObject.name=${args.pod} --sort-by='.lastTimestamp'`;
639
+ }
640
+
641
+ log(`[MCP] 获取事件: namespace=${namespace}`);
642
+ const result = await executeKubectl(cmd, { signal });
643
+
644
+ return {
645
+ content: [{
646
+ type: 'text',
647
+ text: `## K8s 事件 (namespace: ${namespace})\n\n\`\`\`\n${result}\n\`\`\``
648
+ }]
649
+ };
650
+ }
651
+
652
+ case 'trace_log': {
653
+ const traceId = args.traceId;
654
+ const contextLines = args.context_lines || 3;
655
+
656
+ // 判断是否走 Loki(生产环境)- 一次 API 调用搜索所有服务
657
+ if (isLokiEnv(args.env)) {
658
+ const envKey = resolveLokiEnvName(args.env);
659
+ const envConfig = LOKI_ENVIRONMENTS[envKey];
660
+ const project = envConfig.defaultProject || 'senior';
661
+
662
+ // 构建时间范围选项
663
+ const timeOpts = {};
664
+ if (args.from) timeOpts.from = parseTimeStr(args.from);
665
+ if (args.to) timeOpts.to = parseTimeStr(args.to);
666
+
667
+ // 如果指定了服务列表,按服务查询;否则按项目查询(一次搜索所有服务)
668
+ let lokiResult;
669
+ const targetServices = args.services || [];
670
+
671
+ if (targetServices.length > 0) {
672
+ // 指定服务:逐个查询
673
+ const allLogs = [];
674
+ const allLabels = [];
675
+ for (const svc of targetServices) {
676
+ const dirName = getLokiServiceDirName(svc);
677
+ const expr = buildServiceLogQL(project, dirName, traceId, envKey);
678
+ log(`[MCP] Loki trace: env=${envKey}, service=${svc}, traceId=${traceId}`);
679
+ const r = await queryLokiAutoRange(envKey, expr, { ...timeOpts, maxLines: 500, signal });
680
+ allLogs.push(...r.logs);
681
+ allLabels.push(...r.labels);
682
+ }
683
+ lokiResult = { logs: allLogs, labels: allLabels, traceIds: extractTraceIds(allLogs), timeRange: { label: '自动递进' } };
684
+ } else {
685
+ // 未指定服务:按项目一次查询所有服务(高效!)
686
+ const expr = buildProjectLogQL(project, traceId, envKey);
687
+ log(`[MCP] Loki trace (全项目): env=${envKey}, project=${project}, traceId=${traceId}`);
688
+ lokiResult = await queryLokiAutoRange(envKey, expr, { ...timeOpts, maxLines: 1000, signal });
689
+ }
690
+
691
+ if (lokiResult.logs.length === 0) {
692
+ const errorHint = lokiResult.error ? `\n\n⚠️ **${lokiResult.error}**` : '';
693
+ return { content: [{ type: 'text', text: `## TraceId 追踪结果 (${envKey} 生产环境)\n\n**traceId**: \`${traceId}\`\n\n❌ 已自动搜索 5分钟 → 30分钟 → 1小时 → 3小时 → 24小时 范围,均未找到匹配日志。${errorHint}\n\n请确认:\n1. traceId 是否正确\n2. 如需查询更早的日志,请使用 \`from\`/\`to\` 参数指定具体时间范围` }] };
694
+ }
695
+
696
+ // 按服务分组展示
697
+ const groups = groupLogsByService(lokiResult);
698
+ const serviceNames = Object.keys(groups).sort();
699
+
700
+ let text = `## TraceId 追踪结果 (${envKey} 生产环境, ${lokiResult.timeRange.label}内)\n\n`;
701
+ text += `**traceId**: \`${traceId}\`\n`;
702
+ text += `**匹配服务数**: ${serviceNames.length}\n`;
703
+ text += `**总日志行数**: ${lokiResult.logs.length}\n\n`;
704
+
705
+ for (const svcName of serviceNames) {
706
+ const group = groups[svcName];
707
+ text += `### ${svcName}\n`;
708
+ text += `\`\`\`\n${group.logs.join('\n')}\n\`\`\`\n\n`;
709
+ }
710
+
711
+ return { content: [{ type: 'text', text }] };
712
+ }
713
+
714
+ // 测试环境:走 SSH(逐个服务搜索)
715
+ const targetNamespace = args.namespace || null;
716
+ let servicesToSearch = args.services || [];
717
+
718
+ if (servicesToSearch.length === 0) {
719
+ servicesToSearch = Object.keys(SERVICES);
720
+ } else {
721
+ servicesToSearch = servicesToSearch.map(s => {
722
+ const service = findService(s, targetNamespace);
723
+ return service ? service.name : s;
724
+ }).filter(Boolean);
725
+ }
726
+
727
+ log(`[MCP] 追踪日志: traceId=${traceId}, namespace=${targetNamespace || 'default'}, 服务数=${servicesToSearch.length}`);
728
+
729
+ const TRACE_TOTAL_TIMEOUT = 50000; // 总耗时上限 50s
730
+ const TRACE_PER_SERVICE = 10000; // 单服务超时 10s
731
+ const traceStart = Date.now();
732
+ const results = [];
733
+ let searched = 0;
734
+ let skipped = 0;
735
+
736
+ for (const serviceName of servicesToSearch) {
737
+ // 总耗时检查
738
+ if (Date.now() - traceStart > TRACE_TOTAL_TIMEOUT) {
739
+ skipped = servicesToSearch.length - searched;
740
+ log(`[MCP] trace_log 总耗时超过 ${TRACE_TOTAL_TIMEOUT}ms,跳过剩余 ${skipped} 个服务`);
741
+ break;
742
+ }
743
+
744
+ const service = findService(serviceName, targetNamespace);
745
+ if (!service) { searched++; continue; }
746
+
747
+ try {
748
+ const command = `grep -i -C ${contextLines} "${traceId}"`;
749
+ const result = await queryLog(service, command, { timeout: TRACE_PER_SERVICE, signal });
750
+
751
+ if (result && result.trim() && !result.includes('未找到')) {
752
+ results.push({ service: serviceName, namespace: service.namespace, logs: result });
753
+ }
754
+ } catch (err) {
755
+ // 快速跳过失败/超时的服务
756
+ log(`[MCP] ${serviceName} 跳过: ${err.message.substring(0, 80)}`);
757
+ }
758
+ searched++;
759
+ }
760
+
761
+ const elapsed = Date.now() - traceStart;
762
+ const timeNote = skipped > 0 ? `\n**注意**: 已搜索 ${searched}/${servicesToSearch.length} 个服务(耗时 ${elapsed}ms,跳过 ${skipped} 个)` : '';
763
+
764
+ if (results.length === 0) {
765
+ return { content: [{ type: 'text', text: `## TraceId 追踪结果\n\n**traceId**: ${traceId}\n**namespace**: ${targetNamespace || '默认'}\n\n❌ 未在已搜索的 ${searched} 个服务中找到匹配的日志${timeNote}` }] };
766
+ }
767
+
768
+ const output = results.map(r => `### ${r.service} (${r.namespace})\n\`\`\`\n${r.logs}\n\`\`\``).join('\n\n');
769
+
770
+ return {
771
+ content: [{
772
+ type: 'text',
773
+ text: `## TraceId 追踪结果\n\n**traceId**: ${traceId}\n**namespace**: ${targetNamespace || '默认'}\n**匹配服务数**: ${results.length}${timeNote}\n\n${output}`
774
+ }]
775
+ };
776
+ }
777
+
778
+ case 'detect_context': {
779
+ const workspacePath = args.workspace_path;
780
+ const result = detectContextFromPath(workspacePath);
781
+
782
+ if (!result.success) {
783
+ return {
784
+ content: [{
785
+ type: 'text',
786
+ text: `## 上下文检测失败\n\n**错误**: ${result.error}\n**默认 namespace**: ${result.namespace}`
787
+ }]
788
+ };
789
+ }
790
+
791
+ // 构建返回信息
792
+ let responseText = `## 上下文检测结果\n\n`;
793
+ responseText += `**工作目录**: ${result.originalPath}\n`;
794
+ responseText += `**检测到的 namespace**: ${result.namespace}\n`;
795
+ responseText += `**namespace 来源**: ${result.namespaceSource}\n`;
796
+
797
+ if (result.serviceName) {
798
+ responseText += `**检测到的服务**: ${result.serviceName}\n`;
799
+ if (result.service) {
800
+ responseText += `**服务描述**: ${result.service.description}\n`;
801
+ responseText += `**服务别名**: ${result.service.aliases.join(', ')}\n`;
802
+ }
803
+ } else {
804
+ responseText += `**检测到的服务**: 未能从路径中识别服务名\n`;
805
+ }
806
+
807
+ responseText += `\n### 建议\n`;
808
+ responseText += `在调用 query_log、search_log 等工具时,请使用:\n`;
809
+ responseText += `- **namespace**: \`${result.namespace}\`\n`;
810
+ if (result.serviceName) {
811
+ responseText += `- **service**: \`${result.serviceName}\`\n`;
812
+ }
813
+
814
+ log(`[MCP] 上下文检测: path=${workspacePath}, namespace=${result.namespace}, service=${result.serviceName}`);
815
+
816
+ return {
817
+ content: [{
818
+ type: 'text',
819
+ text: responseText
820
+ }]
821
+ };
822
+ }
823
+
824
+ // ========== Loki 生产环境工具处理 ==========
825
+ case 'list_loki_environments': {
826
+ const envs = getLokiEnvList();
827
+ if (envs.length === 0) {
828
+ return { content: [{ type: 'text', text: '## Loki 环境列表\n\n⚠️ 未配置任何 Loki 环境' }] };
829
+ }
830
+
831
+ const list = envs.map(e =>
832
+ `- **${e.name}**: ${e.description}\n Grafana: ${e.grafanaUrl}\n 默认项目: ${e.project}`
833
+ ).join('\n');
834
+
835
+ return { content: [{ type: 'text', text: `## Loki 生产环境列表\n\n${list}` }] };
836
+ }
837
+
838
+ case 'list_loki_services': {
839
+ const envKey = resolveLokiEnvName(args.env || 'cms');
840
+ const project = args.project || 'senior';
841
+
842
+ if (!envKey || !LOKI_ENVIRONMENTS[envKey]) {
843
+ return { content: [{ type: 'text', text: `错误: 未知环境 "${args.env}"。使用 list_loki_environments 查看可用环境。` }] };
844
+ }
845
+
846
+ log(`[MCP] 列出 Loki 服务: env=${envKey}, project=${project}`);
847
+ const services = await getLokiSvcList(envKey, project);
848
+
849
+ if (services.length === 0) {
850
+ return { content: [{ type: 'text', text: `## Loki 服务列表 (${envKey})\n\n⚠️ 未找到任何服务` }] };
851
+ }
852
+
853
+ const list = services.map((s, i) => ` ${i + 1}. ${s}`).join('\n');
854
+ return { content: [{ type: 'text', text: `## Loki 服务列表 (${envKey}, project=${project})\n\n共 ${services.length} 个服务:\n${list}` }] };
855
+ }
856
+
857
+ default:
858
+ return {
859
+ content: [{
860
+ type: 'text',
861
+ text: `错误: 未知工具 "${name}"`
862
+ }]
863
+ };
864
+ }
865
+ } catch (error) {
866
+ log(`[MCP] 工具内部错误: ${error.message}`);
867
+ return {
868
+ content: [{
869
+ type: 'text',
870
+ text: `## 执行错误\n\n❌ ${error.message}`
871
+ }],
872
+ isError: true
873
+ };
874
+ }
875
+ }
876
+
877
+ // 优雅关闭(对齐 auggie MCP 启动代码)
878
+ function gracefulShutdown() {
879
+ log('[MCP] 优雅关闭...');
880
+ server.close().catch(() => {});
881
+ // 给 close 一点时间完成,然后强制退出
882
+ setTimeout(() => process.exit(0), 500).unref();
883
+ }
884
+
885
+ process.on('SIGINT', gracefulShutdown);
886
+ process.on('SIGTERM', gracefulShutdown);
887
+
888
+ // 启动服务器
889
+ async function main() {
890
+ // 对齐 auggie: 监听 stdin end/close,宿主进程断开时优雅关闭
891
+ process.stdin.on('end', () => {
892
+ log('[MCP] stdin end, initiating graceful shutdown');
893
+ gracefulShutdown();
894
+ });
895
+ process.stdin.on('close', () => {
896
+ log('[MCP] stdin close, initiating graceful shutdown');
897
+ gracefulShutdown();
898
+ });
899
+
900
+ const transport = new StdioServerTransport();
901
+ await server.connect(transport);
902
+ const logPath = getLogFilePath();
903
+ log(`[MCP] Log Query Server v3.6.1 已启动 (仅文件日志,避免 stderr backpressure 阻塞 event loop)`);
904
+ if (logPath) log(`[MCP] 本地日志文件: ${logPath}`);
905
+ }
906
+
907
+ main().catch((error) => {
908
+ log(`[MCP] 启动失败: ${error && error.stack || error}`);
909
+ process.exit(1);
910
+ });