mcp-log-query-server 3.2.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/index.js +74 -8
  2. package/loki-client.js +24 -5
  3. package/package.json +1 -1
package/index.js CHANGED
@@ -34,6 +34,23 @@ import {
34
34
  listLokiEnvironments as getLokiEnvList, listLokiServices as getLokiSvcList
35
35
  } from './loki-client.js';
36
36
 
37
+ // 超时配置
38
+ const REQUEST_TIMEOUT = 60000; // MCP 请求兜底超时 60s
39
+ const WATCHDOG_TIMEOUT = 120000; // 进程看门狗 120s,卡死则强制退出
40
+
41
+ function withTimeout(promise, ms, label) {
42
+ return Promise.race([
43
+ promise,
44
+ new Promise((_, reject) =>
45
+ setTimeout(() => reject(new Error(`${label} 超时(${ms}ms)`)), ms)
46
+ ),
47
+ ]);
48
+ }
49
+
50
+ // 进程级安全网
51
+ process.on('unhandledRejection', (err) => console.error('[unhandledRejection]', err));
52
+ process.on('uncaughtException', (err) => { console.error('[uncaughtException]', err); process.exit(1); });
53
+
37
54
  // 创建 MCP Server
38
55
  const server = new Server(
39
56
  {
@@ -320,7 +337,38 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
320
337
  // 处理工具调用
321
338
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
322
339
  const { name, arguments: args } = request.params;
340
+ const startTime = Date.now();
341
+
342
+ // 进程级看门狗:如果整个请求卡死超时,强制重启进程
343
+ const watchdog = setTimeout(() => {
344
+ console.error(`[Watchdog] ${name} 卡死超过 ${WATCHDOG_TIMEOUT}ms,强制退出进程`);
345
+ process.exit(1);
346
+ }, WATCHDOG_TIMEOUT);
347
+ watchdog.unref();
348
+
349
+ try {
350
+ const result = await withTimeout(
351
+ handleToolCall(name, args),
352
+ REQUEST_TIMEOUT,
353
+ name
354
+ );
355
+ clearTimeout(watchdog);
356
+ console.error(`[MCP] ${name} ${Date.now() - startTime}ms`);
357
+ return result;
358
+ } catch (error) {
359
+ clearTimeout(watchdog);
360
+ console.error(`[Error] ${name} ${Date.now() - startTime}ms: ${error.message}`);
361
+ return {
362
+ content: [{ type: 'text', text: `## 执行错误\n\n❌ ${error.message}` }],
363
+ isError: true
364
+ };
365
+ }
366
+ });
323
367
 
368
+ /**
369
+ * 实际的工具调用处理逻辑
370
+ */
371
+ async function handleToolCall(name, args) {
324
372
  try {
325
373
  switch (name) {
326
374
  case 'query_log': {
@@ -622,25 +670,43 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
622
670
 
623
671
  console.error(`[MCP] 追踪日志: traceId=${traceId}, namespace=${targetNamespace || 'default'}, 服务数=${servicesToSearch.length}`);
624
672
 
673
+ const TRACE_TOTAL_TIMEOUT = 50000; // 总耗时上限 50s
674
+ const TRACE_PER_SERVICE = 10000; // 单服务超时 10s
675
+ const traceStart = Date.now();
625
676
  const results = [];
677
+ let searched = 0;
678
+ let skipped = 0;
679
+
626
680
  for (const serviceName of servicesToSearch) {
681
+ // 总耗时检查
682
+ if (Date.now() - traceStart > TRACE_TOTAL_TIMEOUT) {
683
+ skipped = servicesToSearch.length - searched;
684
+ console.error(`[MCP] trace_log 总耗时超过 ${TRACE_TOTAL_TIMEOUT}ms,跳过剩余 ${skipped} 个服务`);
685
+ break;
686
+ }
687
+
627
688
  const service = findService(serviceName, targetNamespace);
628
- if (!service) continue;
689
+ if (!service) { searched++; continue; }
629
690
 
630
691
  try {
631
692
  const command = `grep -i -C ${contextLines} "${traceId}"`;
632
- const result = await queryLog(service, command);
693
+ const result = await queryLog(service, command, { timeout: TRACE_PER_SERVICE });
633
694
 
634
695
  if (result && result.trim() && !result.includes('未找到')) {
635
696
  results.push({ service: serviceName, namespace: service.namespace, logs: result });
636
697
  }
637
698
  } catch (err) {
638
- console.error(`[MCP] 搜索 ${serviceName} 失败: ${err.message}`);
699
+ // 快速跳过失败/超时的服务
700
+ console.error(`[MCP] ${serviceName} 跳过: ${err.message.substring(0, 80)}`);
639
701
  }
702
+ searched++;
640
703
  }
641
704
 
705
+ const elapsed = Date.now() - traceStart;
706
+ const timeNote = skipped > 0 ? `\n**注意**: 已搜索 ${searched}/${servicesToSearch.length} 个服务(耗时 ${elapsed}ms,跳过 ${skipped} 个)` : '';
707
+
642
708
  if (results.length === 0) {
643
- return { content: [{ type: 'text', text: `## TraceId 追踪结果\n\n**traceId**: ${traceId}\n**namespace**: ${targetNamespace || '默认'}\n\n❌ 未在任何服务中找到匹配的日志` }] };
709
+ return { content: [{ type: 'text', text: `## TraceId 追踪结果\n\n**traceId**: ${traceId}\n**namespace**: ${targetNamespace || '默认'}\n\n❌ 未在已搜索的 ${searched} 个服务中找到匹配的日志${timeNote}` }] };
644
710
  }
645
711
 
646
712
  const output = results.map(r => `### ${r.service} (${r.namespace})\n\`\`\`\n${r.logs}\n\`\`\``).join('\n\n');
@@ -648,7 +714,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
648
714
  return {
649
715
  content: [{
650
716
  type: 'text',
651
- text: `## TraceId 追踪结果\n\n**traceId**: ${traceId}\n**namespace**: ${targetNamespace || '默认'}\n**匹配服务数**: ${results.length}\n\n${output}`
717
+ text: `## TraceId 追踪结果\n\n**traceId**: ${traceId}\n**namespace**: ${targetNamespace || '默认'}\n**匹配服务数**: ${results.length}${timeNote}\n\n${output}`
652
718
  }]
653
719
  };
654
720
  }
@@ -741,7 +807,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
741
807
  };
742
808
  }
743
809
  } catch (error) {
744
- console.error(`[MCP] 错误: ${error.message}`);
810
+ console.error(`[MCP] 工具内部错误: ${error.message}`);
745
811
  return {
746
812
  content: [{
747
813
  type: 'text',
@@ -750,13 +816,13 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
750
816
  isError: true
751
817
  };
752
818
  }
753
- });
819
+ }
754
820
 
755
821
  // 启动服务器
756
822
  async function main() {
757
823
  const transport = new StdioServerTransport();
758
824
  await server.connect(transport);
759
- console.error('[MCP] Log Query Server v3.1.0 已启动 (支持 Loki 生产环境日志)');
825
+ console.error('[MCP] Log Query Server v3.2.0 已启动 (支持超时保护 + 进程看门狗)');
760
826
  }
761
827
 
762
828
  main().catch((error) => {
package/loki-client.js CHANGED
@@ -11,6 +11,9 @@
11
11
 
12
12
  import { LOKI_ENVIRONMENTS, LOKI_DEFAULTS } from './config.js';
13
13
 
14
+ // Loki 查询超时(毫秒)
15
+ const LOKI_FETCH_TIMEOUT = 30000;
16
+
14
17
  // 时间范围自动递进策略(毫秒)
15
18
  const AUTO_RANGE_STEPS = [
16
19
  { range: 5 * 60 * 1000, label: '5 分钟' },
@@ -63,11 +66,27 @@ export async function queryLoki(envName, expr, options = {}) {
63
66
 
64
67
  console.error(`[Loki] 查询: env=${envName}, expr=${expr}`);
65
68
 
66
- const resp = await fetch(url, {
67
- method: 'POST',
68
- headers: buildHeaders(env),
69
- body: JSON.stringify(body)
70
- });
69
+ // 带超时的 fetch
70
+ const controller = new AbortController();
71
+ const timer = setTimeout(() => controller.abort(), LOKI_FETCH_TIMEOUT);
72
+
73
+ let resp;
74
+ try {
75
+ resp = await fetch(url, {
76
+ method: 'POST',
77
+ headers: buildHeaders(env),
78
+ body: JSON.stringify(body),
79
+ signal: controller.signal
80
+ });
81
+ } catch (e) {
82
+ clearTimeout(timer);
83
+ if (e.name === 'AbortError') {
84
+ throw new Error(`Loki 查询超时(${LOKI_FETCH_TIMEOUT}ms)`);
85
+ }
86
+ throw e;
87
+ } finally {
88
+ clearTimeout(timer);
89
+ }
71
90
 
72
91
  if (!resp.ok) {
73
92
  const text = await resp.text();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mcp-log-query-server",
3
- "version": "3.2.0",
3
+ "version": "3.4.0",
4
4
  "description": "MCP Server for querying server logs via SSH jump host and Grafana Loki API",
5
5
  "main": "index.js",
6
6
  "type": "module",