mcp-log-query-server 3.5.1 → 3.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -27,6 +27,7 @@ import {
27
27
 
28
28
  import { queryLog, testConnection, executeKubectl } from './ssh-client.js';
29
29
  import { findService, getAllServices, DEFAULTS, DEFAULT_NAMESPACE, SERVICES, NAMESPACES, detectContextFromPath, isLokiEnv, resolveLokiEnvName, LOKI_ENVIRONMENTS } from './config.js';
30
+ import { log, getLogFilePath } from './logger.js';
30
31
  import {
31
32
  queryLoki, queryLokiAutoRange, parseTimeStr,
32
33
  extractTraceIds, parseServiceFromFilename, groupLogsByService,
@@ -47,16 +48,26 @@ function withTimeout(promise, ms, label) {
47
48
  ]);
48
49
  }
49
50
 
51
+ // 安全序列化工具参数(截断超长值,容错循环引用)
52
+ function safeStringify(obj, maxLen = 200) {
53
+ try {
54
+ const s = JSON.stringify(obj);
55
+ return s.length > maxLen ? s.slice(0, maxLen) + '...' : s;
56
+ } catch {
57
+ return '<unserializable>';
58
+ }
59
+ }
60
+
50
61
  // 进程级安全网:只记录日志,不退出进程
51
62
  // 退出会导致 stdio 断开,整个 MCP 不可用直到 IDE 重启;单次请求错误不应拖死服务
52
- process.on('unhandledRejection', (err) => console.error('[unhandledRejection]', err));
53
- process.on('uncaughtException', (err) => console.error('[uncaughtException]', err));
63
+ process.on('unhandledRejection', (err) => log(`[unhandledRejection] ${err && err.stack || err}`));
64
+ process.on('uncaughtException', (err) => log(`[uncaughtException] ${err && err.stack || err}`));
54
65
 
55
66
  // 创建 MCP Server
56
67
  const server = new Server(
57
68
  {
58
69
  name: 'mcp-log-query',
59
- version: '3.5.1',
70
+ version: '3.5.3',
60
71
  },
61
72
  {
62
73
  capabilities: {
@@ -339,11 +350,12 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
339
350
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
340
351
  const { name, arguments: args } = request.params;
341
352
  const startTime = Date.now();
353
+ log(`[Tool] → ${name} start args=${safeStringify(args)}`);
342
354
 
343
355
  // 看门狗:仅记录长时间未完成的请求,不再退出进程
344
356
  // withTimeout(REQUEST_TIMEOUT) 已经保证单次请求超时会抛错
345
357
  const watchdog = setTimeout(() => {
346
- console.error(`[Watchdog] ${name} 仍在运行超过 ${WATCHDOG_WARN_TIMEOUT}ms(仅记录,不退出进程)`);
358
+ log(`[Watchdog] ${name} 仍在运行超过 ${WATCHDOG_WARN_TIMEOUT}ms(仅记录,不退出进程)`);
347
359
  }, WATCHDOG_WARN_TIMEOUT);
348
360
  watchdog.unref();
349
361
 
@@ -354,11 +366,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
354
366
  name
355
367
  );
356
368
  clearTimeout(watchdog);
357
- console.error(`[MCP] ${name} ${Date.now() - startTime}ms`);
369
+ log(`[Tool] ${name} done ${Date.now() - startTime}ms`);
358
370
  return result;
359
371
  } catch (error) {
360
372
  clearTimeout(watchdog);
361
- console.error(`[Error] ${name} ${Date.now() - startTime}ms: ${error.message}`);
373
+ log(`[Tool] ${name} FAIL ${Date.now() - startTime}ms: ${error.message}`);
362
374
  return {
363
375
  content: [{ type: 'text', text: `## 执行错误\n\n❌ ${error.message}` }],
364
376
  isError: true
@@ -834,20 +846,22 @@ process.on('SIGTERM', gracefulShutdown);
834
846
  async function main() {
835
847
  // 对齐 auggie: 监听 stdin end/close,宿主进程断开时优雅关闭
836
848
  process.stdin.on('end', () => {
837
- console.error('[MCP] stdin end, initiating graceful shutdown');
849
+ log('[MCP] stdin end, initiating graceful shutdown');
838
850
  gracefulShutdown();
839
851
  });
840
852
  process.stdin.on('close', () => {
841
- console.error('[MCP] stdin close, initiating graceful shutdown');
853
+ log('[MCP] stdin close, initiating graceful shutdown');
842
854
  gracefulShutdown();
843
855
  });
844
856
 
845
857
  const transport = new StdioServerTransport();
846
858
  await server.connect(transport);
847
- console.error('[MCP] Log Query Server v3.5.1 已启动 (超时保护 + SSH 并发限制 + 排队超时 + 进程不自杀 + stdin 优雅关闭)');
859
+ const logPath = getLogFilePath();
860
+ log(`[MCP] Log Query Server v3.5.3 已启动 (SSH排队超时 + Loki体超时 + 文件日志)`);
861
+ if (logPath) log(`[MCP] 本地日志文件: ${logPath}`);
848
862
  }
849
863
 
850
864
  main().catch((error) => {
851
- console.error('[MCP] 启动失败:', error);
865
+ log(`[MCP] 启动失败: ${error && error.stack || error}`);
852
866
  process.exit(1);
853
867
  });
package/logger.js ADDED
@@ -0,0 +1,91 @@
1
+ /**
2
+ * 轻量日志模块:console.error + 本地文件追加
3
+ *
4
+ * 为什么需要文件日志?
5
+ * Windsurf 不会把 MCP server 的 stderr 落盘到 %APPDATA%\Windsurf\logs,
6
+ * 只在 Output 面板实时显示。一旦进程重启、面板关闭,信息就丢了。
7
+ *
8
+ * 用法:
9
+ * import { log } from './logger.js';
10
+ * log('[SSH-Sem] acquire ...');
11
+ *
12
+ * 诊断卡住时:
13
+ * Get-Content $env:TEMP\mcp-log-query.log -Tail 20 -Wait (Windows)
14
+ * tail -F /tmp/mcp-log-query.log (Linux/Mac)
15
+ *
16
+ * 环境变量:
17
+ * - MCP_LOG_FILE: 自定义日志文件路径(默认 <tmpdir>/mcp-log-query.log)
18
+ * - MCP_LOG_MAX_BYTES: 单文件最大字节数(默认 10MB,超过则轮转到 .1)
19
+ * - MCP_LOG_DISABLE: 设为 '1' 则禁用文件日志(只走 stderr)
20
+ */
21
+
22
+ import fs from 'node:fs';
23
+ import os from 'node:os';
24
+ import path from 'node:path';
25
+
26
+ const LOG_FILE = process.env.MCP_LOG_FILE || path.join(os.tmpdir(), 'mcp-log-query.log');
27
+ const MAX_BYTES = parseInt(process.env.MCP_LOG_MAX_BYTES || `${10 * 1024 * 1024}`, 10);
28
+ const DISABLED = process.env.MCP_LOG_DISABLE === '1';
29
+
30
+ // 懒初始化:首次写入时再检查目录
31
+ let initialized = false;
32
+
33
+ function ensureInit() {
34
+ if (initialized) return;
35
+ initialized = true;
36
+ if (DISABLED) return;
37
+ try {
38
+ fs.mkdirSync(path.dirname(LOG_FILE), { recursive: true });
39
+ // 启动时打一条 banner,方便判断是不是新进程
40
+ fs.appendFileSync(
41
+ LOG_FILE,
42
+ `\n========== [${new Date().toISOString()}] MCP log-query 进程启动 pid=${process.pid} ==========\n`
43
+ );
44
+ } catch (err) {
45
+ console.error('[logger] 初始化文件日志失败,仅使用 stderr:', err.message);
46
+ }
47
+ }
48
+
49
+ function rotateIfNeeded() {
50
+ try {
51
+ const st = fs.statSync(LOG_FILE);
52
+ if (st.size < MAX_BYTES) return;
53
+ const backup = `${LOG_FILE}.1`;
54
+ try { fs.rmSync(backup, { force: true }); } catch {}
55
+ try { fs.renameSync(LOG_FILE, backup); } catch {}
56
+ } catch {
57
+ // 文件不存在等错误忽略
58
+ }
59
+ }
60
+
61
+ /**
62
+ * 写一条日志:stderr + 可选本地文件
63
+ * @param {string} msg - 日志内容(不需要带时间戳,本函数自动加)
64
+ */
65
+ export function log(msg) {
66
+ // stderr 永远写,兼容 Windsurf Output 面板实时查看
67
+ console.error(msg);
68
+
69
+ if (DISABLED) return;
70
+
71
+ ensureInit();
72
+
73
+ const line = `[${new Date().toISOString()}] ${msg}\n`;
74
+ try {
75
+ rotateIfNeeded();
76
+ fs.appendFileSync(LOG_FILE, line);
77
+ } catch (err) {
78
+ // 文件写失败不能影响主流程,但 stderr 提示一次
79
+ if (!log._warnedFileFail) {
80
+ log._warnedFileFail = true;
81
+ console.error('[logger] 写日志文件失败(后续不再提示):', err.message);
82
+ }
83
+ }
84
+ }
85
+
86
+ /**
87
+ * 获取当前日志文件路径(方便 MCP 工具返回给调用方)
88
+ */
89
+ export function getLogFilePath() {
90
+ return DISABLED ? null : LOG_FILE;
91
+ }
package/loki-client.js CHANGED
@@ -1,483 +1,488 @@
1
- /**
2
- * Grafana Loki API 客户端
3
- *
4
- * 通过 Grafana 代理接口查询 Loki 日志,支持:
5
- * - 日志查询(LogQL)
6
- * - 标签/标签值获取
7
- * - traceId 自动提取
8
- * - 服务名自动识别(从 filename 标签解析)
9
- * - 时间范围自动递进(1h → 24h → 72h → 7d)
10
- */
11
-
12
- import { LOKI_ENVIRONMENTS, LOKI_DEFAULTS } from './config.js';
13
-
14
- // Loki 查询超时(毫秒)
15
- const LOKI_FETCH_TIMEOUT = 30000;
16
-
17
- // 时间范围自动递进策略(毫秒)
18
- const AUTO_RANGE_STEPS = [
19
- { range: 5 * 60 * 1000, label: '5 分钟' },
20
- { range: 30 * 60 * 1000, label: '30 分钟' },
21
- { range: 1 * 60 * 60 * 1000, label: '1 小时' },
22
- { range: 3 * 60 * 60 * 1000, label: '3 小时' },
23
- { range: 24 * 60 * 60 * 1000, label: '24 小时' },
24
- ];
25
-
26
- // ============================================================
27
- // 核心查询
28
- // ============================================================
29
-
30
- /**
31
- * 执行 Loki 日志查询
32
- * @param {string} envName - 环境名称,如 'cms'
33
- * @param {string} expr - LogQL 表达式
34
- * @param {Object} options - 查询选项
35
- * @param {number} options.from - 起始时间(毫秒时间戳),默认 1 小时前
36
- * @param {number} options.to - 结束时间(毫秒时间戳),默认当前
37
- * @param {number} options.maxLines - 最大返回行数,默认 100
38
- * @param {string} options.direction - 排序方向 'backward'|'forward',默认 'backward'
39
- * @returns {Object} { logs: string[], labels: Object[], traceIds: string[], stats: Object }
40
- */
41
- export async function queryLoki(envName, expr, options = {}) {
42
- const env = getLokiEnv(envName);
43
- const now = Date.now();
44
- const from = options.from || (now - LOKI_DEFAULTS.defaultTimeRange);
45
- const to = options.to || now;
46
- const maxLines = options.maxLines || LOKI_DEFAULTS.maxLines;
47
- const direction = options.direction || 'backward';
48
-
49
- const url = `${env.grafanaUrl}/api/ds/query?ds_type=loki`;
50
- const body = {
51
- queries: [{
52
- refId: 'A',
53
- expr,
54
- queryType: 'range',
55
- datasource: { type: 'loki', uid: env.datasourceUid },
56
- editorMode: 'builder',
57
- direction,
58
- maxLines,
59
- datasourceId: env.datasourceId,
60
- intervalMs: 1000,
61
- maxDataPoints: 1000
62
- }],
63
- from: String(from),
64
- to: String(to)
65
- };
66
-
67
- console.error(`[Loki] 查询: env=${envName}, expr=${expr}`);
68
-
69
- // 带超时的 fetch
70
- const controller = new AbortController();
71
- const timer = setTimeout(() => controller.abort(), LOKI_FETCH_TIMEOUT);
72
-
73
- let resp;
74
- try {
75
- resp = await fetch(url, {
76
- method: 'POST',
77
- headers: buildHeaders(env),
78
- body: JSON.stringify(body),
79
- signal: controller.signal
80
- });
81
- } catch (e) {
82
- clearTimeout(timer);
83
- if (e.name === 'AbortError') {
84
- throw new Error(`Loki 查询超时(${LOKI_FETCH_TIMEOUT}ms)`);
85
- }
86
- throw e;
87
- } finally {
88
- clearTimeout(timer);
89
- }
90
-
91
- if (!resp.ok) {
92
- const text = await resp.text();
93
- throw new Error(`Loki 查询失败 (${resp.status}): ${text}`);
94
- }
95
-
96
- const data = await resp.json();
97
- return parseLokiResponse(data);
98
- }
99
-
100
- /**
101
- * 带时间范围自动递进的 Loki 查询
102
- *
103
- * 策略:1h → 24h → 72h → 7d,找到结果立即返回
104
- * 如果用户指定了 from/to,则直接使用指定范围,不递进
105
- *
106
- * @param {string} envName - 环境名称
107
- * @param {string} expr - LogQL 表达式
108
- * @param {Object} options - 查询选项
109
- * @param {number} options.from - 起始时间戳(毫秒),指定后不递进
110
- * @param {number} options.to - 结束时间戳(毫秒),指定后不递进
111
- * @param {number} options.maxLines - 最大返回行数
112
- * @param {string} options.direction - 排序方向
113
- * @returns {Object} { logs, labels, traceIds, stats, timeRange: { label, from, to } }
114
- */
115
- export async function queryLokiAutoRange(envName, expr, options = {}) {
116
- // 如果用户明确指定了 from/to,直接查询不递进
117
- if (options.from && options.to) {
118
- console.error(`[Loki] 使用指定时间范围查询: ${new Date(options.from).toLocaleString()} ~ ${new Date(options.to).toLocaleString()}`);
119
- try {
120
- const result = await queryLoki(envName, expr, options);
121
- result.timeRange = { label: '自定义', from: options.from, to: options.to };
122
- return result;
123
- } catch (e) {
124
- const isTimeout = e.message.includes('timeout') || e.message.includes('504') || e.message.includes('Timeout');
125
- console.error(`[Loki] 指定时间范围查询${isTimeout ? '超时' : '失败'}: ${e.message.substring(0, 200)}`);
126
- return {
127
- logs: [], labels: [], traceIds: [], stats: null,
128
- timeRange: { label: '自定义', from: options.from, to: options.to },
129
- notFound: true,
130
- error: isTimeout
131
- ? '查询超时(数据量过大),请缩小时间范围或指定具体服务'
132
- : `查询失败: ${e.message.substring(0, 200)}`
133
- };
134
- }
135
- }
136
-
137
- // 自动递进:从小范围到大范围
138
- const now = Date.now();
139
- for (const step of AUTO_RANGE_STEPS) {
140
- const from = now - step.range;
141
- const to = now;
142
-
143
- console.error(`[Loki] 自动递进: 尝试 ${step.label} 范围...`);
144
-
145
- try {
146
- const result = await queryLoki(envName, expr, { ...options, from, to });
147
-
148
- if (result.logs.length > 0) {
149
- console.error(`[Loki] ✅ 在 ${step.label} 范围内找到 ${result.logs.length} 行日志`);
150
- result.timeRange = { label: step.label, from, to };
151
- return result;
152
- }
153
-
154
- console.error(`[Loki] ⏭️ ${step.label} 范围内无结果,扩大范围...`);
155
- } catch (e) {
156
- // 查询超时或失败,停止递进,返回优雅降级结果
157
- const isTimeout = e.message.includes('timeout') || e.message.includes('504') || e.message.includes('Timeout');
158
- console.error(`[Loki] ⚠️ ${step.label} 范围查询${isTimeout ? '超时' : '失败'}: ${e.message.substring(0, 200)}`);
159
- return {
160
- logs: [], labels: [], traceIds: [], stats: null,
161
- timeRange: { label: step.label, from, to },
162
- notFound: true,
163
- error: isTimeout
164
- ? `查询在递进到 ${step.label} 范围时超时(数据量过大),请缩小时间范围或指定具体服务查询`
165
- : `查询在递进到 ${step.label} 范围时失败: ${e.message.substring(0, 200)}`
166
- };
167
- }
168
- }
169
-
170
- // 所有范围都没找到
171
- console.error(`[Loki] ❌ 所有时间范围均未找到结果`);
172
- return {
173
- logs: [],
174
- labels: [],
175
- traceIds: [],
176
- stats: null,
177
- timeRange: { label: '未找到', from: null, to: null },
178
- notFound: true
179
- };
180
- }
181
-
182
- /**
183
- * 解析用户传入的时间字符串为毫秒时间戳
184
- * 支持格式: "2026-02-06 12:00:00", "2026-02-06", ISO 8601 等
185
- * @param {string} timeStr - 时间字符串
186
- * @returns {number|null} 毫秒时间戳,解析失败返回 null
187
- */
188
- export function parseTimeStr(timeStr) {
189
- if (!timeStr) return null;
190
- // 如果是纯数字,当作时间戳
191
- if (/^\d{10,13}$/.test(timeStr)) {
192
- const ts = parseInt(timeStr);
193
- return ts < 1e12 ? ts * 1000 : ts; // 秒 → 毫秒
194
- }
195
- const d = new Date(timeStr);
196
- return isNaN(d.getTime()) ? null : d.getTime();
197
- }
198
-
199
- // ============================================================
200
- // 标签查询
201
- // ============================================================
202
-
203
- /** 获取 Loki 标签列表 */
204
- export async function getLokiLabels(envName) {
205
- const env = getLokiEnv(envName);
206
- const now = Date.now();
207
- const start = (now - LOKI_DEFAULTS.defaultTimeRange) * 1_000_000;
208
- const end = now * 1_000_000;
209
- const url = `${env.grafanaUrl}/api/datasources/uid/${env.datasourceUid}/resources/labels?start=${start}&end=${end}`;
210
- const resp = await fetch(url, { headers: buildHeaders(env) });
211
- if (!resp.ok) throw new Error(`获取标签失败 (${resp.status})`);
212
- const data = await resp.json();
213
- return data.data || [];
214
- }
215
-
216
- /** 获取 Loki 标签值 */
217
- export async function getLokiLabelValues(envName, label, query = '') {
218
- const env = getLokiEnv(envName);
219
- const now = Date.now();
220
- const start = (now - LOKI_DEFAULTS.defaultTimeRange) * 1_000_000;
221
- const end = now * 1_000_000;
222
- let url = `${env.grafanaUrl}/api/datasources/uid/${env.datasourceUid}/resources/label/${label}/values?start=${start}&end=${end}`;
223
- if (query) url += `&query=${encodeURIComponent(query)}`;
224
- const resp = await fetch(url, { headers: buildHeaders(env) });
225
- if (!resp.ok) throw new Error(`获取标签值失败 (${resp.status})`);
226
- const data = await resp.json();
227
- return data.data || [];
228
- }
229
-
230
- // ============================================================
231
- // 响应解析
232
- // ============================================================
233
-
234
- /** 解析 Grafana Loki 查询响应 */
235
- export function parseLokiResponse(data) {
236
- const result = { logs: [], labels: [], traceIds: [], stats: null };
237
- const frames = data?.results?.A?.frames;
238
- if (!frames || frames.length === 0) return result;
239
-
240
- for (const frame of frames) {
241
- const values = frame?.data?.values;
242
- if (!values || values.length < 3) continue;
243
- // values[0]: 标签数组, values[1]: 时间戳数组, values[2]: 日志行数组
244
- const labelsArr = values[0] || [];
245
- const linesArr = values[2] || [];
246
- for (let i = 0; i < linesArr.length; i++) {
247
- result.logs.push(linesArr[i]);
248
- result.labels.push(labelsArr[i] || {});
249
- }
250
- }
251
-
252
- result.traceIds = extractTraceIds(result.logs);
253
-
254
- const stats = frames[0]?.schema?.meta?.stats;
255
- if (stats) {
256
- result.stats = {};
257
- for (const s of stats) { result.stats[s.displayName] = s.value; }
258
- }
259
- return result;
260
- }
261
-
262
- // ============================================================
263
- // traceId 提取
264
- // ============================================================
265
-
266
- /**
267
- * 从日志行中提取 traceId(32位十六进制,在方括号中)
268
- * 日志格式: [clife-senior] 时间 级别 [服务] [pod] [线程] [OT-spanId] [traceId] 类名 - 内容
269
- */
270
- export function extractTraceIds(lines) {
271
- const traceIdSet = new Set();
272
-
273
- const regex = /\[([a-f0-9]{32})\]/gi;
274
- for (const line of lines) {
275
- let match;
276
- while ((match = regex.exec(line)) !== null) {
277
- traceIdSet.add(match[1].toLowerCase());
278
- }
279
- regex.lastIndex = 0;
280
- }
281
- return [...traceIdSet];
282
- }
283
-
284
- // ============================================================
285
- // 服务名解析
286
- // ============================================================
287
-
288
- /**
289
- * 从 Loki filename 标签中解析服务名
290
- * filename 格式: /data/services/logs/senior/clife-senior-health-app/normal_logs/normal.log
291
- * 解析结果: clife-senior-health
292
- */
293
- export function parseServiceFromFilename(filename) {
294
- if (!filename) return null;
295
- // 匹配 /{service-name}-app//{service-name}-service/ 模式
296
- const match = filename.match(/\/(clife-senior-[a-zA-Z0-9-]+?)(?:-app|-service)\//);
297
- if (match) return match[1];
298
- // 兜底:匹配非 clife-senior 前缀的服务(如 device-manage-service)
299
- const match2 = filename.match(/\/([a-zA-Z0-9-]+?)(?:-app|-service)\//);
300
- if (match2) return match2[1];
301
- return null;
302
- }
303
-
304
- /**
305
- * 将查询结果按服务分组
306
- * @param {Object} lokiResult - parseLokiResponse 的返回值
307
- * @returns {Object} { serviceName: { logs: string[], traceIds: string[] } }
308
- */
309
- export function groupLogsByService(lokiResult) {
310
- const groups = {};
311
-
312
- for (let i = 0; i < lokiResult.logs.length; i++) {
313
- const label = lokiResult.labels[i] || {};
314
- const serviceName = parseServiceFromFilename(label.filename) || 'unknown';
315
- const logLine = lokiResult.logs[i];
316
-
317
- if (!groups[serviceName]) {
318
- groups[serviceName] = { logs: [], traceIds: new Set() };
319
- }
320
- groups[serviceName].logs.push(logLine);
321
-
322
- // 从该行提取 traceId
323
- const ids = extractTraceIds([logLine]);
324
- ids.forEach(id => groups[serviceName].traceIds.add(id));
325
- }
326
-
327
- // Set → Array
328
- for (const key of Object.keys(groups)) {
329
- groups[key].traceIds = [...groups[key].traceIds];
330
- }
331
-
332
- return groups;
333
- }
334
-
335
- // ============================================================
336
- // LogQL 构建辅助
337
- // ============================================================
338
-
339
- /**
340
- * 构建按服务查询的 LogQL 表达式
341
- * 根据环境是否有 project 标签,自动选择不同的 filename 路径格式:
342
- * - 有 project 标签(CMS): /data/services/logs/senior/clife-senior-health-app/normal_logs/normal.log
343
- * - 无 project 标签(私有化): /data/services/logs/clife-senior-health-app/normal_logs/normal.log
344
- *
345
- * @param {string} project - 项目名,如 'senior'
346
- * @param {string} servicePodPattern - 服务目录名,如 'clife-senior-health-app'
347
- * @param {string} keyword - 搜索关键词(可选)
348
- * @param {string} envName - 环境名称,如 'cms'、'chengyang'
349
- */
350
- export function buildServiceLogQL(project, servicePodPattern, keyword = '', envName = '') {
351
- const env = envName ? LOKI_ENVIRONMENTS[envName] : null;
352
- const hasProject = env ? env.hasProjectLabel !== false : true;
353
-
354
- // CMS: /data/services/logs/senior/xxx-app/... 私有化: /data/services/logs/xxx-app/...
355
- const filename = hasProject
356
- ? `/data/services/logs/${project}/${servicePodPattern}/normal_logs/normal.log`
357
- : `/data/services/logs/${servicePodPattern}/normal_logs/normal.log`;
358
-
359
- let expr = `{filename="${filename}"}`;
360
- if (keyword) {
361
- expr += ` |= \`${keyword}\``;
362
- }
363
- return expr;
364
- }
365
-
366
- /**
367
- * 构建按项目查询的 LogQL 表达式(搜索整个项目所有服务)
368
- * 根据环境是否有 project 标签,自动选择不同的查询方式:
369
- * - 有 project 标签(CMS): {project="senior"} |= `keyword`
370
- * - 无 project 标签(私有化): {filename=~"/data/services/logs/clife-senior-.*normal.log"} |= `keyword`
371
- *
372
- * @param {string} project - 项目名,如 'senior'
373
- * @param {string} keyword - 搜索关键词
374
- * @param {string} envName - 环境名称,如 'cms'、'chengyang'
375
- */
376
- export function buildProjectLogQL(project, keyword, envName = '') {
377
- const env = envName ? LOKI_ENVIRONMENTS[envName] : null;
378
- const hasProject = env ? env.hasProjectLabel !== false : true;
379
-
380
- if (hasProject) {
381
- // CMS: 直接用 project 标签,高效精确
382
- return `{project="${project}"} |= \`${keyword}\``;
383
- } else {
384
- // 私有化: 用 filename 正则匹配所有 clife-{project}-* 服务的 normal.log
385
- return `{filename=~"/data/services/logs/clife-${project}-.*normal.log"} |= \`${keyword}\``;
386
- }
387
- }
388
-
389
- // ============================================================
390
- // 内部辅助函数
391
- // ============================================================
392
-
393
- /** 获取 Loki 环境配置 */
394
- function getLokiEnv(envName) {
395
- const env = LOKI_ENVIRONMENTS[envName];
396
- if (!env) {
397
- const available = Object.keys(LOKI_ENVIRONMENTS).join(', ');
398
- throw new Error(`未知的 Loki 环境 "${envName}",可用环境: ${available}`);
399
- }
400
- return env;
401
- }
402
-
403
- /** 构建请求头 */
404
- function buildHeaders(env) {
405
- const headers = {
406
- 'Content-Type': 'application/json',
407
- 'Accept': 'application/json',
408
- 'x-grafana-org-id': String(env.orgId || 1),
409
- 'x-plugin-id': 'loki',
410
- 'x-datasource-uid': env.datasourceUid
411
- };
412
-
413
- // 如果配置了认证信息,添加 Basic Auth
414
- if (env.username && env.password) {
415
- const auth = Buffer.from(`${env.username}:${env.password}`).toString('base64');
416
- headers['Authorization'] = `Basic ${auth}`;
417
- }
418
-
419
- return headers;
420
- }
421
-
422
- // ============================================================
423
- // 公共辅助函数
424
- // ============================================================
425
-
426
- /** 获取所有可用的 Loki 环境列表 */
427
- export function listLokiEnvironments() {
428
- return Object.entries(LOKI_ENVIRONMENTS).map(([key, env]) => ({
429
- name: key,
430
- description: env.description,
431
- grafanaUrl: env.grafanaUrl,
432
- project: env.defaultProject
433
- }));
434
- }
435
-
436
- /**
437
- * 获取指定环境下的服务列表(从 Loki filename 标签动态获取)
438
- * 根据环境是否有 project 标签,使用不同的查询方式:
439
- * - 有 project 标签(CMS): 用 {project="senior"} 过滤
440
- * - 无 project 标签(私有化): 获取全部 filename 后按 clife-{project}- 前缀过滤
441
- *
442
- * @param {string} envName - 环境名称
443
- * @param {string} project - 项目名,如 'senior'
444
- * @returns {string[]} 服务名列表
445
- */
446
- export async function listLokiServices(envName, project = 'senior') {
447
- const env = LOKI_ENVIRONMENTS[envName];
448
- const hasProject = env ? env.hasProjectLabel !== false : true;
449
-
450
- let filenames;
451
- if (hasProject) {
452
- // CMS: 直接用 project 标签过滤
453
- filenames = await getLokiLabelValues(envName, 'filename', `{project="${project}"}`);
454
- } else {
455
- // 私有化: 获取全部 filename,然后按 clife-{project}- 前缀过滤
456
- filenames = await getLokiLabelValues(envName, 'filename');
457
- filenames = filenames.filter(f => f.includes(`/clife-${project}-`));
458
- }
459
-
460
- const serviceSet = new Set();
461
- for (const f of filenames) {
462
- if (!f.includes('/normal_logs/normal.log')) continue;
463
- const svc = parseServiceFromFilename(f);
464
- if (svc) serviceSet.add(svc);
465
- }
466
-
467
- return [...serviceSet].sort();
468
- }
469
-
470
- /**
471
- * 根据服务简称获取 Loki 中的服务目录名
472
- * 例如: 'health' → 'clife-senior-health-app'
473
- * @param {string} serviceName - 服务简称,如 'health', 'core', 'gateway'
474
- * @returns {string} 服务目录名
475
- */
476
- export function getLokiServiceDirName(serviceName) {
477
- // 如果已经是完整名称,直接返回
478
- if (serviceName.startsWith('clife-senior-')) {
479
- return serviceName.endsWith('-app') ? serviceName : `${serviceName}-app`;
480
- }
481
- // 简称转完整名称
482
- return `clife-senior-${serviceName}-app`;
483
- }
1
+ /**
2
+ * Grafana Loki API 客户端
3
+ *
4
+ * 通过 Grafana 代理接口查询 Loki 日志,支持:
5
+ * - 日志查询(LogQL)
6
+ * - 标签/标签值获取
7
+ * - traceId 自动提取
8
+ * - 服务名自动识别(从 filename 标签解析)
9
+ * - 时间范围自动递进(1h → 24h → 72h → 7d)
10
+ */
11
+
12
+ import { LOKI_ENVIRONMENTS, LOKI_DEFAULTS } from './config.js';
13
+ import { log } from './logger.js';
14
+
15
+ // Loki 查询超时(毫秒)
16
+ const LOKI_FETCH_TIMEOUT = 30000;
17
+
18
+ // 时间范围自动递进策略(毫秒)
19
+ const AUTO_RANGE_STEPS = [
20
+ { range: 5 * 60 * 1000, label: '5 分钟' },
21
+ { range: 30 * 60 * 1000, label: '30 分钟' },
22
+ { range: 1 * 60 * 60 * 1000, label: '1 小时' },
23
+ { range: 3 * 60 * 60 * 1000, label: '3 小时' },
24
+ { range: 24 * 60 * 60 * 1000, label: '24 小时' },
25
+ ];
26
+
27
+ // ============================================================
28
+ // 核心查询
29
+ // ============================================================
30
+
31
+ /**
32
+ * 执行 Loki 日志查询
33
+ * @param {string} envName - 环境名称,如 'cms'
34
+ * @param {string} expr - LogQL 表达式
35
+ * @param {Object} options - 查询选项
36
+ * @param {number} options.from - 起始时间(毫秒时间戳),默认 1 小时前
37
+ * @param {number} options.to - 结束时间(毫秒时间戳),默认当前
38
+ * @param {number} options.maxLines - 最大返回行数,默认 100
39
+ * @param {string} options.direction - 排序方向 'backward'|'forward',默认 'backward'
40
+ * @returns {Object} { logs: string[], labels: Object[], traceIds: string[], stats: Object }
41
+ */
42
+ export async function queryLoki(envName, expr, options = {}) {
43
+ const env = getLokiEnv(envName);
44
+ const now = Date.now();
45
+ const from = options.from || (now - LOKI_DEFAULTS.defaultTimeRange);
46
+ const to = options.to || now;
47
+ const maxLines = options.maxLines || LOKI_DEFAULTS.maxLines;
48
+ const direction = options.direction || 'backward';
49
+
50
+ const url = `${env.grafanaUrl}/api/ds/query?ds_type=loki`;
51
+ const body = {
52
+ queries: [{
53
+ refId: 'A',
54
+ expr,
55
+ queryType: 'range',
56
+ datasource: { type: 'loki', uid: env.datasourceUid },
57
+ editorMode: 'builder',
58
+ direction,
59
+ maxLines,
60
+ datasourceId: env.datasourceId,
61
+ intervalMs: 1000,
62
+ maxDataPoints: 1000
63
+ }],
64
+ from: String(from),
65
+ to: String(to)
66
+ };
67
+
68
+ log(`[Loki] 查询: env=${envName}, expr=${expr}`);
69
+
70
+ // 带超时的 fetch —— AbortController 覆盖 fetch + body 读取全过程
71
+ // 之前的 bug:fetch resolve 后立刻 clearTimeout,导致 resp.json() 读 body 卡住时无保护
72
+ const controller = new AbortController();
73
+ const timer = setTimeout(() => {
74
+ log(`[Loki] ⏱ 超时 ${LOKI_FETCH_TIMEOUT}ms,主动 abort: env=${envName}, expr=${expr.substring(0, 80)}`);
75
+ controller.abort();
76
+ }, LOKI_FETCH_TIMEOUT);
77
+
78
+ try {
79
+ const resp = await fetch(url, {
80
+ method: 'POST',
81
+ headers: buildHeaders(env),
82
+ body: JSON.stringify(body),
83
+ signal: controller.signal
84
+ });
85
+
86
+ if (!resp.ok) {
87
+ // 读错误响应体同样受 signal 保护
88
+ const text = await resp.text();
89
+ throw new Error(`Loki 查询失败 (${resp.status}): ${text}`);
90
+ }
91
+
92
+ // body 仍在 controller.signal 保护之下:body 卡住会被 abort
93
+ const data = await resp.json();
94
+ return parseLokiResponse(data);
95
+ } catch (e) {
96
+ if (e.name === 'AbortError') {
97
+ throw new Error(`Loki 查询超时(${LOKI_FETCH_TIMEOUT}ms)`);
98
+ }
99
+ throw e;
100
+ } finally {
101
+ clearTimeout(timer);
102
+ }
103
+ }
104
+
105
+ /**
106
+ * 带时间范围自动递进的 Loki 查询
107
+ *
108
+ * 策略:1h 24h 72h → 7d,找到结果立即返回
109
+ * 如果用户指定了 from/to,则直接使用指定范围,不递进
110
+ *
111
+ * @param {string} envName - 环境名称
112
+ * @param {string} expr - LogQL 表达式
113
+ * @param {Object} options - 查询选项
114
+ * @param {number} options.from - 起始时间戳(毫秒),指定后不递进
115
+ * @param {number} options.to - 结束时间戳(毫秒),指定后不递进
116
+ * @param {number} options.maxLines - 最大返回行数
117
+ * @param {string} options.direction - 排序方向
118
+ * @returns {Object} { logs, labels, traceIds, stats, timeRange: { label, from, to } }
119
+ */
120
+ export async function queryLokiAutoRange(envName, expr, options = {}) {
121
+ // 如果用户明确指定了 from/to,直接查询不递进
122
+ if (options.from && options.to) {
123
+ log(`[Loki] 使用指定时间范围查询: ${new Date(options.from).toLocaleString()} ~ ${new Date(options.to).toLocaleString()}`);
124
+ try {
125
+ const result = await queryLoki(envName, expr, options);
126
+ result.timeRange = { label: '自定义', from: options.from, to: options.to };
127
+ return result;
128
+ } catch (e) {
129
+ const isTimeout = e.message.includes('timeout') || e.message.includes('504') || e.message.includes('Timeout');
130
+ log(`[Loki] ❌ 指定时间范围查询${isTimeout ? '超时' : '失败'}: ${e.message.substring(0, 200)}`);
131
+ return {
132
+ logs: [], labels: [], traceIds: [], stats: null,
133
+ timeRange: { label: '自定义', from: options.from, to: options.to },
134
+ notFound: true,
135
+ error: isTimeout
136
+ ? '查询超时(数据量过大),请缩小时间范围或指定具体服务'
137
+ : `查询失败: ${e.message.substring(0, 200)}`
138
+ };
139
+ }
140
+ }
141
+
142
+ // 自动递进:从小范围到大范围
143
+ const now = Date.now();
144
+ for (const step of AUTO_RANGE_STEPS) {
145
+ const from = now - step.range;
146
+ const to = now;
147
+
148
+ log(`[Loki] 自动递进: 尝试 ${step.label} 范围...`);
149
+
150
+ try {
151
+ const result = await queryLoki(envName, expr, { ...options, from, to });
152
+
153
+ if (result.logs.length > 0) {
154
+ log(`[Loki] ${step.label} 范围内找到 ${result.logs.length} 行日志`);
155
+ result.timeRange = { label: step.label, from, to };
156
+ return result;
157
+ }
158
+
159
+ log(`[Loki] ⏭️ ${step.label} 范围内无结果,扩大范围...`);
160
+ } catch (e) {
161
+ // 查询超时或失败,停止递进,返回优雅降级结果
162
+ const isTimeout = e.message.includes('timeout') || e.message.includes('504') || e.message.includes('Timeout');
163
+ log(`[Loki] ⚠️ ${step.label} 范围查询${isTimeout ? '超时' : '失败'}: ${e.message.substring(0, 200)}`);
164
+ return {
165
+ logs: [], labels: [], traceIds: [], stats: null,
166
+ timeRange: { label: step.label, from, to },
167
+ notFound: true,
168
+ error: isTimeout
169
+ ? `查询在递进到 ${step.label} 范围时超时(数据量过大),请缩小时间范围或指定具体服务查询`
170
+ : `查询在递进到 ${step.label} 范围时失败: ${e.message.substring(0, 200)}`
171
+ };
172
+ }
173
+ }
174
+
175
+ // 所有范围都没找到
176
+ log(`[Loki] ❌ 所有时间范围均未找到结果`);
177
+ return {
178
+ logs: [],
179
+ labels: [],
180
+ traceIds: [],
181
+ stats: null,
182
+ timeRange: { label: '未找到', from: null, to: null },
183
+ notFound: true
184
+ };
185
+ }
186
+
187
+ /**
188
+ * 解析用户传入的时间字符串为毫秒时间戳
189
+ * 支持格式: "2026-02-06 12:00:00", "2026-02-06", ISO 8601 等
190
+ * @param {string} timeStr - 时间字符串
191
+ * @returns {number|null} 毫秒时间戳,解析失败返回 null
192
+ */
193
+ export function parseTimeStr(timeStr) {
194
+ if (!timeStr) return null;
195
+ // 如果是纯数字,当作时间戳
196
+ if (/^\d{10,13}$/.test(timeStr)) {
197
+ const ts = parseInt(timeStr);
198
+ return ts < 1e12 ? ts * 1000 : ts; // 秒 → 毫秒
199
+ }
200
+ const d = new Date(timeStr);
201
+ return isNaN(d.getTime()) ? null : d.getTime();
202
+ }
203
+
204
+ // ============================================================
205
+ // 标签查询
206
+ // ============================================================
207
+
208
+ /** 获取 Loki 标签列表 */
209
+ export async function getLokiLabels(envName) {
210
+ const env = getLokiEnv(envName);
211
+ const now = Date.now();
212
+ const start = (now - LOKI_DEFAULTS.defaultTimeRange) * 1_000_000;
213
+ const end = now * 1_000_000;
214
+ const url = `${env.grafanaUrl}/api/datasources/uid/${env.datasourceUid}/resources/labels?start=${start}&end=${end}`;
215
+ const resp = await fetch(url, { headers: buildHeaders(env) });
216
+ if (!resp.ok) throw new Error(`获取标签失败 (${resp.status})`);
217
+ const data = await resp.json();
218
+ return data.data || [];
219
+ }
220
+
221
+ /** 获取 Loki 标签值 */
222
+ export async function getLokiLabelValues(envName, label, query = '') {
223
+ const env = getLokiEnv(envName);
224
+ const now = Date.now();
225
+ const start = (now - LOKI_DEFAULTS.defaultTimeRange) * 1_000_000;
226
+ const end = now * 1_000_000;
227
+ let url = `${env.grafanaUrl}/api/datasources/uid/${env.datasourceUid}/resources/label/${label}/values?start=${start}&end=${end}`;
228
+ if (query) url += `&query=${encodeURIComponent(query)}`;
229
+ const resp = await fetch(url, { headers: buildHeaders(env) });
230
+ if (!resp.ok) throw new Error(`获取标签值失败 (${resp.status})`);
231
+ const data = await resp.json();
232
+ return data.data || [];
233
+ }
234
+
235
+ // ============================================================
236
+ // 响应解析
237
+ // ============================================================
238
+
239
+ /** 解析 Grafana Loki 查询响应 */
240
+ export function parseLokiResponse(data) {
241
+ const result = { logs: [], labels: [], traceIds: [], stats: null };
242
+ const frames = data?.results?.A?.frames;
243
+ if (!frames || frames.length === 0) return result;
244
+
245
+ for (const frame of frames) {
246
+ const values = frame?.data?.values;
247
+ if (!values || values.length < 3) continue;
248
+ // values[0]: 标签数组, values[1]: 时间戳数组, values[2]: 日志行数组
249
+ const labelsArr = values[0] || [];
250
+ const linesArr = values[2] || [];
251
+ for (let i = 0; i < linesArr.length; i++) {
252
+ result.logs.push(linesArr[i]);
253
+ result.labels.push(labelsArr[i] || {});
254
+ }
255
+ }
256
+
257
+ result.traceIds = extractTraceIds(result.logs);
258
+
259
+ const stats = frames[0]?.schema?.meta?.stats;
260
+ if (stats) {
261
+ result.stats = {};
262
+ for (const s of stats) { result.stats[s.displayName] = s.value; }
263
+ }
264
+ return result;
265
+ }
266
+
267
+ // ============================================================
268
+ // traceId 提取
269
+ // ============================================================
270
+
271
+ /**
272
+ * 从日志行中提取 traceId(32位十六进制,在方括号中)
273
+ * 日志格式: [clife-senior] 时间 级别 [服务] [pod] [线程] [OT-spanId] [traceId] 类名 - 内容
274
+ */
275
+ export function extractTraceIds(lines) {
276
+ const traceIdSet = new Set();
277
+
278
+ const regex = /\[([a-f0-9]{32})\]/gi;
279
+ for (const line of lines) {
280
+ let match;
281
+ while ((match = regex.exec(line)) !== null) {
282
+ traceIdSet.add(match[1].toLowerCase());
283
+ }
284
+ regex.lastIndex = 0;
285
+ }
286
+ return [...traceIdSet];
287
+ }
288
+
289
+ // ============================================================
290
+ // 服务名解析
291
+ // ============================================================
292
+
293
+ /**
294
+ * 从 Loki filename 标签中解析服务名
295
+ * filename 格式: /data/services/logs/senior/clife-senior-health-app/normal_logs/normal.log
296
+ * 解析结果: clife-senior-health
297
+ */
298
+ export function parseServiceFromFilename(filename) {
299
+ if (!filename) return null;
300
+ // 匹配 /{service-name}-app/ 或 /{service-name}-service/ 模式
301
+ const match = filename.match(/\/(clife-senior-[a-zA-Z0-9-]+?)(?:-app|-service)\//);
302
+ if (match) return match[1];
303
+ // 兜底:匹配非 clife-senior 前缀的服务(如 device-manage-service)
304
+ const match2 = filename.match(/\/([a-zA-Z0-9-]+?)(?:-app|-service)\//);
305
+ if (match2) return match2[1];
306
+ return null;
307
+ }
308
+
309
+ /**
310
+ * 将查询结果按服务分组
311
+ * @param {Object} lokiResult - parseLokiResponse 的返回值
312
+ * @returns {Object} { serviceName: { logs: string[], traceIds: string[] } }
313
+ */
314
+ export function groupLogsByService(lokiResult) {
315
+ const groups = {};
316
+
317
+ for (let i = 0; i < lokiResult.logs.length; i++) {
318
+ const label = lokiResult.labels[i] || {};
319
+ const serviceName = parseServiceFromFilename(label.filename) || 'unknown';
320
+ const logLine = lokiResult.logs[i];
321
+
322
+ if (!groups[serviceName]) {
323
+ groups[serviceName] = { logs: [], traceIds: new Set() };
324
+ }
325
+ groups[serviceName].logs.push(logLine);
326
+
327
+ // 从该行提取 traceId
328
+ const ids = extractTraceIds([logLine]);
329
+ ids.forEach(id => groups[serviceName].traceIds.add(id));
330
+ }
331
+
332
+ // Set → Array
333
+ for (const key of Object.keys(groups)) {
334
+ groups[key].traceIds = [...groups[key].traceIds];
335
+ }
336
+
337
+ return groups;
338
+ }
339
+
340
+ // ============================================================
341
+ // LogQL 构建辅助
342
+ // ============================================================
343
+
344
+ /**
345
+ * 构建按服务查询的 LogQL 表达式
346
+ * 根据环境是否有 project 标签,自动选择不同的 filename 路径格式:
347
+ * - project 标签(CMS): /data/services/logs/senior/clife-senior-health-app/normal_logs/normal.log
348
+ * - project 标签(私有化): /data/services/logs/clife-senior-health-app/normal_logs/normal.log
349
+ *
350
+ * @param {string} project - 项目名,如 'senior'
351
+ * @param {string} servicePodPattern - 服务目录名,如 'clife-senior-health-app'
352
+ * @param {string} keyword - 搜索关键词(可选)
353
+ * @param {string} envName - 环境名称,如 'cms'、'chengyang'
354
+ */
355
+ export function buildServiceLogQL(project, servicePodPattern, keyword = '', envName = '') {
356
+ const env = envName ? LOKI_ENVIRONMENTS[envName] : null;
357
+ const hasProject = env ? env.hasProjectLabel !== false : true;
358
+
359
+ // CMS: /data/services/logs/senior/xxx-app/... 私有化: /data/services/logs/xxx-app/...
360
+ const filename = hasProject
361
+ ? `/data/services/logs/${project}/${servicePodPattern}/normal_logs/normal.log`
362
+ : `/data/services/logs/${servicePodPattern}/normal_logs/normal.log`;
363
+
364
+ let expr = `{filename="${filename}"}`;
365
+ if (keyword) {
366
+ expr += ` |= \`${keyword}\``;
367
+ }
368
+ return expr;
369
+ }
370
+
371
+ /**
372
+ * 构建按项目查询的 LogQL 表达式(搜索整个项目所有服务)
373
+ * 根据环境是否有 project 标签,自动选择不同的查询方式:
374
+ * - project 标签(CMS): {project="senior"} |= `keyword`
375
+ * - 无 project 标签(私有化): {filename=~"/data/services/logs/clife-senior-.*normal.log"} |= `keyword`
376
+ *
377
+ * @param {string} project - 项目名,如 'senior'
378
+ * @param {string} keyword - 搜索关键词
379
+ * @param {string} envName - 环境名称,如 'cms'、'chengyang'
380
+ */
381
+ export function buildProjectLogQL(project, keyword, envName = '') {
382
+ const env = envName ? LOKI_ENVIRONMENTS[envName] : null;
383
+ const hasProject = env ? env.hasProjectLabel !== false : true;
384
+
385
+ if (hasProject) {
386
+ // CMS: 直接用 project 标签,高效精确
387
+ return `{project="${project}"} |= \`${keyword}\``;
388
+ } else {
389
+ // 私有化: 用 filename 正则匹配所有 clife-{project}-* 服务的 normal.log
390
+ return `{filename=~"/data/services/logs/clife-${project}-.*normal.log"} |= \`${keyword}\``;
391
+ }
392
+ }
393
+
394
+ // ============================================================
395
+ // 内部辅助函数
396
+ // ============================================================
397
+
398
+ /** 获取 Loki 环境配置 */
399
+ function getLokiEnv(envName) {
400
+ const env = LOKI_ENVIRONMENTS[envName];
401
+ if (!env) {
402
+ const available = Object.keys(LOKI_ENVIRONMENTS).join(', ');
403
+ throw new Error(`未知的 Loki 环境 "${envName}",可用环境: ${available}`);
404
+ }
405
+ return env;
406
+ }
407
+
408
+ /** 构建请求头 */
409
+ function buildHeaders(env) {
410
+ const headers = {
411
+ 'Content-Type': 'application/json',
412
+ 'Accept': 'application/json',
413
+ 'x-grafana-org-id': String(env.orgId || 1),
414
+ 'x-plugin-id': 'loki',
415
+ 'x-datasource-uid': env.datasourceUid
416
+ };
417
+
418
+ // 如果配置了认证信息,添加 Basic Auth
419
+ if (env.username && env.password) {
420
+ const auth = Buffer.from(`${env.username}:${env.password}`).toString('base64');
421
+ headers['Authorization'] = `Basic ${auth}`;
422
+ }
423
+
424
+ return headers;
425
+ }
426
+
427
+ // ============================================================
428
+ // 公共辅助函数
429
+ // ============================================================
430
+
431
+ /** 获取所有可用的 Loki 环境列表 */
432
+ export function listLokiEnvironments() {
433
+ return Object.entries(LOKI_ENVIRONMENTS).map(([key, env]) => ({
434
+ name: key,
435
+ description: env.description,
436
+ grafanaUrl: env.grafanaUrl,
437
+ project: env.defaultProject
438
+ }));
439
+ }
440
+
441
+ /**
442
+ * 获取指定环境下的服务列表(从 Loki filename 标签动态获取)
443
+ * 根据环境是否有 project 标签,使用不同的查询方式:
444
+ * - project 标签(CMS): 用 {project="senior"} 过滤
445
+ * - 无 project 标签(私有化): 获取全部 filename 后按 clife-{project}- 前缀过滤
446
+ *
447
+ * @param {string} envName - 环境名称
448
+ * @param {string} project - 项目名,如 'senior'
449
+ * @returns {string[]} 服务名列表
450
+ */
451
+ export async function listLokiServices(envName, project = 'senior') {
452
+ const env = LOKI_ENVIRONMENTS[envName];
453
+ const hasProject = env ? env.hasProjectLabel !== false : true;
454
+
455
+ let filenames;
456
+ if (hasProject) {
457
+ // CMS: 直接用 project 标签过滤
458
+ filenames = await getLokiLabelValues(envName, 'filename', `{project="${project}"}`);
459
+ } else {
460
+ // 私有化: 获取全部 filename,然后按 clife-{project}- 前缀过滤
461
+ filenames = await getLokiLabelValues(envName, 'filename');
462
+ filenames = filenames.filter(f => f.includes(`/clife-${project}-`));
463
+ }
464
+
465
+ const serviceSet = new Set();
466
+ for (const f of filenames) {
467
+ if (!f.includes('/normal_logs/normal.log')) continue;
468
+ const svc = parseServiceFromFilename(f);
469
+ if (svc) serviceSet.add(svc);
470
+ }
471
+
472
+ return [...serviceSet].sort();
473
+ }
474
+
475
+ /**
476
+ * 根据服务简称获取 Loki 中的服务目录名
477
+ * 例如: 'health' → 'clife-senior-health-app'
478
+ * @param {string} serviceName - 服务简称,如 'health', 'core', 'gateway'
479
+ * @returns {string} 服务目录名
480
+ */
481
+ export function getLokiServiceDirName(serviceName) {
482
+ // 如果已经是完整名称,直接返回
483
+ if (serviceName.startsWith('clife-senior-')) {
484
+ return serviceName.endsWith('-app') ? serviceName : `${serviceName}-app`;
485
+ }
486
+ // 简称转完整名称
487
+ return `clife-senior-${serviceName}-app`;
488
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mcp-log-query-server",
3
- "version": "3.5.1",
3
+ "version": "3.5.3",
4
4
  "description": "MCP Server for querying server logs via SSH jump host and Grafana Loki API",
5
5
  "main": "index.js",
6
6
  "type": "module",
@@ -12,6 +12,7 @@
12
12
  "config.js",
13
13
  "ssh-client.js",
14
14
  "loki-client.js",
15
+ "logger.js",
15
16
  "server-sse.js",
16
17
  "README.md"
17
18
  ],
package/ssh-client.js CHANGED
@@ -15,6 +15,7 @@
15
15
 
16
16
  import { Client } from 'ssh2';
17
17
  import { JUMP_HOST, K8S_SERVER, DEFAULTS } from './config.js';
18
+ import { log } from './logger.js';
18
19
 
19
20
  // ============================================================
20
21
  // 并发信号量:防止同时打开过多堡垒机会话被踢
@@ -39,13 +40,13 @@ const _sshSem = {
39
40
  function sshAcquire(timeoutMs = SSH_ACQUIRE_TIMEOUT) {
40
41
  if (_sshSem.active < _sshSem.max) {
41
42
  _sshSem.active++;
42
- console.error(`[SSH-Sem] acquire 直接通过 (active=${_sshSem.active}/${_sshSem.max}, queue=${_sshSem.queue.length})`);
43
+ log(`[SSH-Sem] acquire 直接通过 (active=${_sshSem.active}/${_sshSem.max}, queue=${_sshSem.queue.length})`);
43
44
  return Promise.resolve();
44
45
  }
45
46
  if (_sshSem.queue.length >= _sshSem.queueMax) {
46
47
  return Promise.reject(new Error(`SSH 并发队列已满(>${_sshSem.queueMax}),请稍后重试`));
47
48
  }
48
- console.error(`[SSH-Sem] acquire 进入排队 (active=${_sshSem.active}/${_sshSem.max}, queue=${_sshSem.queue.length + 1}, timeout=${timeoutMs}ms)`);
49
+ log(`[SSH-Sem] acquire 进入排队 (active=${_sshSem.active}/${_sshSem.max}, queue=${_sshSem.queue.length + 1}, timeout=${timeoutMs}ms)`);
49
50
  return new Promise((resolve, reject) => {
50
51
  let settled = false;
51
52
  const enterQueue = () => {
@@ -53,7 +54,7 @@ function sshAcquire(timeoutMs = SSH_ACQUIRE_TIMEOUT) {
53
54
  settled = true;
54
55
  clearTimeout(timer);
55
56
  _sshSem.active++;
56
- console.error(`[SSH-Sem] acquire 出队获得槽位 (active=${_sshSem.active}/${_sshSem.max}, queue=${_sshSem.queue.length})`);
57
+ log(`[SSH-Sem] acquire 出队获得槽位 (active=${_sshSem.active}/${_sshSem.max}, queue=${_sshSem.queue.length})`);
57
58
  resolve();
58
59
  };
59
60
  _sshSem.queue.push(enterQueue);
@@ -63,7 +64,7 @@ function sshAcquire(timeoutMs = SSH_ACQUIRE_TIMEOUT) {
63
64
  // 从队列里移除自己的 entry
64
65
  const idx = _sshSem.queue.indexOf(enterQueue);
65
66
  if (idx >= 0) _sshSem.queue.splice(idx, 1);
66
- console.error(`[SSH-Sem] acquire 排队超时 (${timeoutMs}ms, active=${_sshSem.active}/${_sshSem.max}, queue=${_sshSem.queue.length})`);
67
+ log(`[SSH-Sem] acquire 排队超时 (${timeoutMs}ms, active=${_sshSem.active}/${_sshSem.max}, queue=${_sshSem.queue.length})`);
67
68
  reject(new Error(`SSH 排队等待超时 (${timeoutMs}ms):前面请求卡住,或并发过高。可调整 SSH_MAX_CONCURRENT / SSH_ACQUIRE_TIMEOUT`));
68
69
  }, timeoutMs);
69
70
  });
@@ -74,7 +75,7 @@ function sshRelease() {
74
75
  // 超时的 entry 已在 timer 里从 queue 移除,这里 shift 到的都是活的
75
76
  const next = _sshSem.queue.shift();
76
77
  if (next) next();
77
- console.error(`[SSH-Sem] release (active=${_sshSem.active}/${_sshSem.max}, queue=${_sshSem.queue.length})`);
78
+ log(`[SSH-Sem] release (active=${_sshSem.active}/${_sshSem.max}, queue=${_sshSem.queue.length})`);
78
79
  }
79
80
 
80
81
  /**