4runr-os 2.10.49 → 2.10.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,298 +1,298 @@
1
- /**
2
- * Advanced monitoring routes for Portal Monitoring (Phase 0)
3
- * Provides structured logs, dependency details, and extended metrics
4
- *
5
- * SECURITY: All monitoring endpoints require authentication in production.
6
- * For local development, consider network binding (127.0.0.1 only).
7
- */
8
-
9
- import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
10
- import { getQueue } from '../queue/index.js';
11
- import { getRedisClient } from '../db/redis.js';
12
- import { getPrismaClient } from '../db/prisma.js';
13
- import { performHealthChecks } from '../health/index.js';
14
- import { createLogger } from '@4runr/shared';
15
- import {
16
- getDependencyPoolsPayload,
17
- getMonitoringHttpPayload,
18
- getMonitoringRunsPayload,
19
- getMonitoringSsePayload,
20
- } from '../metrics/monitoring-detail.js';
21
-
22
- const logger = createLogger('Gateway:Monitoring');
23
-
24
- interface LogEntry {
25
- timestamp: string;
26
- level: 'error' | 'warn' | 'info' | 'debug';
27
- message: string;
28
- context?: string;
29
- data?: Record<string, unknown>;
30
- }
31
-
32
- // In-memory ring buffer for logs (last 1000 entries)
33
- const LOG_BUFFER_SIZE = 1000;
34
- const logBuffer: LogEntry[] = [];
35
-
36
- /**
37
- * Add log entry to buffer (called by logger middleware or manually)
38
- */
39
- export function addMonitoringLog(entry: LogEntry): void {
40
- logBuffer.push(entry);
41
- if (logBuffer.length > LOG_BUFFER_SIZE) {
42
- logBuffer.shift();
43
- }
44
- }
45
-
46
- /**
47
- * Register monitoring routes
48
- * @param fastify Fastify instance
49
- * @param options Optional configuration for route security
50
- */
51
- export function registerMonitoringRoutes(
52
- fastify: FastifyInstance,
53
- options?: {
54
- requireAuth?: boolean | any; // boolean false or auth handler function
55
- readRateLimit?: any;
56
- }
57
- ): void {
58
- const { requireAuth, readRateLimit } = options || {};
59
-
60
- // Determine if auth is required (requireAuth can be false, function, or undefined)
61
- const authHandler = requireAuth === false ? null : requireAuth;
62
- const preHandler = authHandler && readRateLimit
63
- ? [authHandler, readRateLimit]
64
- : authHandler
65
- ? [authHandler]
66
- : [];
67
-
68
- /**
69
- * GET /api/monitoring/logs
70
- * Returns last N Gateway logs for TUI display
71
- * SECURITY: Requires authentication (contains operational data)
72
- */
73
- fastify.get('/api/monitoring/logs', {
74
- ...(preHandler.length > 0 && { preHandler }),
75
- }, async (request: FastifyRequest, reply: FastifyReply) => {
76
- const query = request.query as { limit?: string; level?: string };
77
- const limit = Math.min(parseInt(query.limit || '100', 10), LOG_BUFFER_SIZE);
78
- const levelFilter = query.level as LogEntry['level'] | undefined;
79
-
80
- let logs = logBuffer.slice(-limit);
81
-
82
- if (levelFilter) {
83
- logs = logs.filter(log => log.level === levelFilter);
84
- }
85
-
86
- return {
87
- logs,
88
- count: logs.length,
89
- bufferSize: logBuffer.length,
90
- timestamp: new Date().toISOString(),
91
- };
92
- });
93
-
94
- /**
95
- * GET /api/monitoring/dependencies/detail
96
- * Extended dependency info (connection pools, queue stats, etc.)
97
- * SECURITY: Requires authentication (exposes infrastructure details)
98
- */
99
- fastify.get('/api/monitoring/dependencies/detail', {
100
- ...(preHandler.length > 0 && { preHandler }),
101
- }, async (request: FastifyRequest, reply: FastifyReply) => {
102
- const health = await performHealthChecks();
103
- const details: {
104
- database?: {
105
- status: string;
106
- latency?: number;
107
- poolAvailable?: boolean;
108
- pool?: { active: number; idle: number };
109
- telemetry?: { source: string; bestEffort: boolean; note: string };
110
- };
111
- redis?: {
112
- status: string;
113
- latency?: number;
114
- connectionCountAvailable?: boolean;
115
- connections?: { active: number };
116
- telemetry?: { source: string; bestEffort: boolean; note: string };
117
- };
118
- queue?: {
119
- status: string;
120
- jobs?: {
121
- waiting: number;
122
- active: number;
123
- completed: number;
124
- failed: number;
125
- delayed: number;
126
- };
127
- };
128
- } = {};
129
-
130
- const pools = await getDependencyPoolsPayload();
131
-
132
- // Database details
133
- if (health.checks.database) {
134
- const dbLatency = health.checks.database.responseTime;
135
- details.database = {
136
- status: health.checks.database.status,
137
- ...(dbLatency !== undefined && { latency: dbLatency }),
138
- poolAvailable: true,
139
- pool: {
140
- active: pools.database.active,
141
- idle: pools.database.idle,
142
- },
143
- telemetry: pools.telemetry,
144
- };
145
- }
146
-
147
- // Redis details
148
- if (health.checks.redis) {
149
- const redisLatency = health.checks.redis.responseTime;
150
- details.redis = {
151
- status: health.checks.redis.status,
152
- ...(redisLatency !== undefined && { latency: redisLatency }),
153
- connectionCountAvailable: true,
154
- connections: {
155
- active: pools.redis.active,
156
- },
157
- telemetry: pools.telemetry,
158
- };
159
- }
160
-
161
- // Queue details (real data from BullMQ)
162
- if (health.checks.queue) {
163
- details.queue = {
164
- status: health.checks.queue.status,
165
- };
166
-
167
- try {
168
- const queue = getQueue();
169
- if (queue) {
170
- // BullMQ v4+ getJobCounts: pass state names as separate args
171
- // Returns object with keys matching exact state names passed
172
- const counts = await queue.getJobCounts(
173
- 'waiting',
174
- 'active',
175
- 'completed',
176
- 'failed',
177
- 'delayed'
178
- ) as Record<string, number>;
179
-
180
- details.queue.jobs = {
181
- waiting: counts['waiting'] ?? 0,
182
- active: counts['active'] ?? 0,
183
- completed: counts['completed'] ?? 0,
184
- failed: counts['failed'] ?? 0,
185
- delayed: counts['delayed'] ?? 0,
186
- };
187
- }
188
- } catch (err) {
189
- logger.error('Failed to fetch queue job counts', {
190
- error: err instanceof Error ? err.message : String(err),
191
- });
192
- }
193
- }
194
-
195
- return {
196
- details,
197
- telemetry: pools.telemetry,
198
- timestamp: new Date().toISOString(),
199
- };
200
- });
201
-
202
- /**
203
- * GET /api/monitoring/metrics/http
204
- * Detailed HTTP metrics with route breakdown
205
- * SECURITY: Requires authentication
206
- */
207
- fastify.get('/api/monitoring/metrics/http', {
208
- ...(preHandler.length > 0 && { preHandler }),
209
- }, async (_request: FastifyRequest, _reply: FastifyReply) => {
210
- return await getMonitoringHttpPayload();
211
- });
212
-
213
- /**
214
- * GET /api/monitoring/metrics/runs
215
- * Detailed run metrics
216
- * SECURITY: Requires authentication
217
- */
218
- fastify.get('/api/monitoring/metrics/runs', {
219
- ...(preHandler.length > 0 && { preHandler }),
220
- }, async (_request: FastifyRequest, _reply: FastifyReply) => {
221
- return await getMonitoringRunsPayload();
222
- });
223
-
224
- /**
225
- * GET /api/monitoring/metrics/sse
226
- * SSE connection / message counters (Phase 3)
227
- */
228
- fastify.get('/api/monitoring/metrics/sse', {
229
- ...(preHandler.length > 0 && { preHandler }),
230
- }, async (_request: FastifyRequest, _reply: FastifyReply) => {
231
- return await getMonitoringSsePayload();
232
- });
233
-
234
- /**
235
- * GET /api/monitoring/metrics/queue
236
- * Detailed queue metrics with job states
237
- * SECURITY: Requires authentication
238
- */
239
- fastify.get('/api/monitoring/metrics/queue', {
240
- ...(preHandler.length > 0 && { preHandler }),
241
- }, async (request: FastifyRequest, reply: FastifyReply) => {
242
- try {
243
- const queue = getQueue();
244
- if (!queue) {
245
- return {
246
- available: false,
247
- message: 'Queue not initialized (Redis required)',
248
- timestamp: new Date().toISOString(),
249
- };
250
- }
251
-
252
- // BullMQ v4+ getJobCounts: returns exact keys matching state names passed
253
- const counts = await queue.getJobCounts(
254
- 'waiting',
255
- 'active',
256
- 'completed',
257
- 'failed',
258
- 'delayed',
259
- 'paused'
260
- ) as Record<string, number>;
261
-
262
- return {
263
- available: true,
264
- jobs: {
265
- waiting: counts['waiting'] ?? 0,
266
- active: counts['active'] ?? 0,
267
- completed: counts['completed'] ?? 0,
268
- failed: counts['failed'] ?? 0,
269
- delayed: counts['delayed'] ?? 0,
270
- paused: counts['paused'] ?? 0,
271
- },
272
- queueName: (queue as any).name || 'run-execution',
273
- timestamp: new Date().toISOString(),
274
- };
275
- } catch (error) {
276
- logger.error('Failed to get queue metrics', {
277
- error: error instanceof Error ? error.message : String(error),
278
- });
279
- return {
280
- available: false,
281
- error: error instanceof Error ? error.message : String(error),
282
- timestamp: new Date().toISOString(),
283
- };
284
- }
285
- });
286
-
287
- logger.info('Monitoring routes registered (Phase 0)', {
288
- authRequired: preHandler.length > 0,
289
- endpoints: [
290
- '/api/monitoring/logs',
291
- '/api/monitoring/dependencies/detail',
292
- '/api/monitoring/metrics/http',
293
- '/api/monitoring/metrics/runs',
294
- '/api/monitoring/metrics/sse',
295
- '/api/monitoring/metrics/queue',
296
- ],
297
- });
298
- }
1
+ /**
2
+ * Advanced monitoring routes for Portal Monitoring (Phase 0)
3
+ * Provides structured logs, dependency details, and extended metrics
4
+ *
5
+ * SECURITY: All monitoring endpoints require authentication in production.
6
+ * For local development, consider network binding (127.0.0.1 only).
7
+ */
8
+
9
+ import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
10
+ import { getQueue } from '../queue/index.js';
11
+ import { getRedisClient } from '../db/redis.js';
12
+ import { getPrismaClient } from '../db/prisma.js';
13
+ import { performHealthChecks } from '../health/index.js';
14
+ import { createLogger } from '@4runr/shared';
15
+ import {
16
+ getDependencyPoolsPayload,
17
+ getMonitoringHttpPayload,
18
+ getMonitoringRunsPayload,
19
+ getMonitoringSsePayload,
20
+ } from '../metrics/monitoring-detail.js';
21
+
22
+ const logger = createLogger('Gateway:Monitoring');
23
+
24
+ interface LogEntry {
25
+ timestamp: string;
26
+ level: 'error' | 'warn' | 'info' | 'debug';
27
+ message: string;
28
+ context?: string;
29
+ data?: Record<string, unknown>;
30
+ }
31
+
32
+ // In-memory ring buffer for logs (last 1000 entries)
33
+ const LOG_BUFFER_SIZE = 1000;
34
+ const logBuffer: LogEntry[] = [];
35
+
36
+ /**
37
+ * Add log entry to buffer (called by logger middleware or manually)
38
+ */
39
+ export function addMonitoringLog(entry: LogEntry): void {
40
+ logBuffer.push(entry);
41
+ if (logBuffer.length > LOG_BUFFER_SIZE) {
42
+ logBuffer.shift();
43
+ }
44
+ }
45
+
46
+ /**
47
+ * Register monitoring routes
48
+ * @param fastify Fastify instance
49
+ * @param options Optional configuration for route security
50
+ */
51
+ export function registerMonitoringRoutes(
52
+ fastify: FastifyInstance,
53
+ options?: {
54
+ requireAuth?: boolean | any; // boolean false or auth handler function
55
+ readRateLimit?: any;
56
+ }
57
+ ): void {
58
+ const { requireAuth, readRateLimit } = options || {};
59
+
60
+ // Determine if auth is required (requireAuth can be false, function, or undefined)
61
+ const authHandler = requireAuth === false ? null : requireAuth;
62
+ const preHandler = authHandler && readRateLimit
63
+ ? [authHandler, readRateLimit]
64
+ : authHandler
65
+ ? [authHandler]
66
+ : [];
67
+
68
+ /**
69
+ * GET /api/monitoring/logs
70
+ * Returns last N Gateway logs for TUI display
71
+ * SECURITY: Requires authentication (contains operational data)
72
+ */
73
+ fastify.get('/api/monitoring/logs', {
74
+ ...(preHandler.length > 0 && { preHandler }),
75
+ }, async (request: FastifyRequest, reply: FastifyReply) => {
76
+ const query = request.query as { limit?: string; level?: string };
77
+ const limit = Math.min(parseInt(query.limit || '100', 10), LOG_BUFFER_SIZE);
78
+ const levelFilter = query.level as LogEntry['level'] | undefined;
79
+
80
+ let logs = logBuffer.slice(-limit);
81
+
82
+ if (levelFilter) {
83
+ logs = logs.filter(log => log.level === levelFilter);
84
+ }
85
+
86
+ return {
87
+ logs,
88
+ count: logs.length,
89
+ bufferSize: logBuffer.length,
90
+ timestamp: new Date().toISOString(),
91
+ };
92
+ });
93
+
94
+ /**
95
+ * GET /api/monitoring/dependencies/detail
96
+ * Extended dependency info (connection pools, queue stats, etc.)
97
+ * SECURITY: Requires authentication (exposes infrastructure details)
98
+ */
99
+ fastify.get('/api/monitoring/dependencies/detail', {
100
+ ...(preHandler.length > 0 && { preHandler }),
101
+ }, async (request: FastifyRequest, reply: FastifyReply) => {
102
+ const health = await performHealthChecks();
103
+ const details: {
104
+ database?: {
105
+ status: string;
106
+ latency?: number;
107
+ poolAvailable?: boolean;
108
+ pool?: { active: number; idle: number };
109
+ telemetry?: { source: string; bestEffort: boolean; note: string };
110
+ };
111
+ redis?: {
112
+ status: string;
113
+ latency?: number;
114
+ connectionCountAvailable?: boolean;
115
+ connections?: { active: number };
116
+ telemetry?: { source: string; bestEffort: boolean; note: string };
117
+ };
118
+ queue?: {
119
+ status: string;
120
+ jobs?: {
121
+ waiting: number;
122
+ active: number;
123
+ completed: number;
124
+ failed: number;
125
+ delayed: number;
126
+ };
127
+ };
128
+ } = {};
129
+
130
+ const pools = await getDependencyPoolsPayload();
131
+
132
+ // Database details
133
+ if (health.checks.database) {
134
+ const dbLatency = health.checks.database.responseTime;
135
+ details.database = {
136
+ status: health.checks.database.status,
137
+ ...(dbLatency !== undefined && { latency: dbLatency }),
138
+ poolAvailable: true,
139
+ pool: {
140
+ active: pools.database.active,
141
+ idle: pools.database.idle,
142
+ },
143
+ telemetry: pools.telemetry,
144
+ };
145
+ }
146
+
147
+ // Redis details
148
+ if (health.checks.redis) {
149
+ const redisLatency = health.checks.redis.responseTime;
150
+ details.redis = {
151
+ status: health.checks.redis.status,
152
+ ...(redisLatency !== undefined && { latency: redisLatency }),
153
+ connectionCountAvailable: true,
154
+ connections: {
155
+ active: pools.redis.active,
156
+ },
157
+ telemetry: pools.telemetry,
158
+ };
159
+ }
160
+
161
+ // Queue details (real data from BullMQ)
162
+ if (health.checks.queue) {
163
+ details.queue = {
164
+ status: health.checks.queue.status,
165
+ };
166
+
167
+ try {
168
+ const queue = getQueue();
169
+ if (queue) {
170
+ // BullMQ v4+ getJobCounts: pass state names as separate args
171
+ // Returns object with keys matching exact state names passed
172
+ const counts = await queue.getJobCounts(
173
+ 'waiting',
174
+ 'active',
175
+ 'completed',
176
+ 'failed',
177
+ 'delayed'
178
+ ) as Record<string, number>;
179
+
180
+ details.queue.jobs = {
181
+ waiting: counts['waiting'] ?? 0,
182
+ active: counts['active'] ?? 0,
183
+ completed: counts['completed'] ?? 0,
184
+ failed: counts['failed'] ?? 0,
185
+ delayed: counts['delayed'] ?? 0,
186
+ };
187
+ }
188
+ } catch (err) {
189
+ logger.error('Failed to fetch queue job counts', {
190
+ error: err instanceof Error ? err.message : String(err),
191
+ });
192
+ }
193
+ }
194
+
195
+ return {
196
+ details,
197
+ telemetry: pools.telemetry,
198
+ timestamp: new Date().toISOString(),
199
+ };
200
+ });
201
+
202
+ /**
203
+ * GET /api/monitoring/metrics/http
204
+ * Detailed HTTP metrics with route breakdown
205
+ * SECURITY: Requires authentication
206
+ */
207
+ fastify.get('/api/monitoring/metrics/http', {
208
+ ...(preHandler.length > 0 && { preHandler }),
209
+ }, async (_request: FastifyRequest, _reply: FastifyReply) => {
210
+ return await getMonitoringHttpPayload();
211
+ });
212
+
213
+ /**
214
+ * GET /api/monitoring/metrics/runs
215
+ * Detailed run metrics
216
+ * SECURITY: Requires authentication
217
+ */
218
+ fastify.get('/api/monitoring/metrics/runs', {
219
+ ...(preHandler.length > 0 && { preHandler }),
220
+ }, async (_request: FastifyRequest, _reply: FastifyReply) => {
221
+ return await getMonitoringRunsPayload();
222
+ });
223
+
224
+ /**
225
+ * GET /api/monitoring/metrics/sse
226
+ * SSE connection / message counters (Phase 3)
227
+ */
228
+ fastify.get('/api/monitoring/metrics/sse', {
229
+ ...(preHandler.length > 0 && { preHandler }),
230
+ }, async (_request: FastifyRequest, _reply: FastifyReply) => {
231
+ return await getMonitoringSsePayload();
232
+ });
233
+
234
+ /**
235
+ * GET /api/monitoring/metrics/queue
236
+ * Detailed queue metrics with job states
237
+ * SECURITY: Requires authentication
238
+ */
239
+ fastify.get('/api/monitoring/metrics/queue', {
240
+ ...(preHandler.length > 0 && { preHandler }),
241
+ }, async (request: FastifyRequest, reply: FastifyReply) => {
242
+ try {
243
+ const queue = getQueue();
244
+ if (!queue) {
245
+ return {
246
+ available: false,
247
+ message: 'Queue not initialized (Redis required)',
248
+ timestamp: new Date().toISOString(),
249
+ };
250
+ }
251
+
252
+ // BullMQ v4+ getJobCounts: returns exact keys matching state names passed
253
+ const counts = await queue.getJobCounts(
254
+ 'waiting',
255
+ 'active',
256
+ 'completed',
257
+ 'failed',
258
+ 'delayed',
259
+ 'paused'
260
+ ) as Record<string, number>;
261
+
262
+ return {
263
+ available: true,
264
+ jobs: {
265
+ waiting: counts['waiting'] ?? 0,
266
+ active: counts['active'] ?? 0,
267
+ completed: counts['completed'] ?? 0,
268
+ failed: counts['failed'] ?? 0,
269
+ delayed: counts['delayed'] ?? 0,
270
+ paused: counts['paused'] ?? 0,
271
+ },
272
+ queueName: (queue as any).name || 'run-execution',
273
+ timestamp: new Date().toISOString(),
274
+ };
275
+ } catch (error) {
276
+ logger.error('Failed to get queue metrics', {
277
+ error: error instanceof Error ? error.message : String(error),
278
+ });
279
+ return {
280
+ available: false,
281
+ error: error instanceof Error ? error.message : String(error),
282
+ timestamp: new Date().toISOString(),
283
+ };
284
+ }
285
+ });
286
+
287
+ logger.info('Monitoring routes registered (Phase 0)', {
288
+ authRequired: preHandler.length > 0,
289
+ endpoints: [
290
+ '/api/monitoring/logs',
291
+ '/api/monitoring/dependencies/detail',
292
+ '/api/monitoring/metrics/http',
293
+ '/api/monitoring/metrics/runs',
294
+ '/api/monitoring/metrics/sse',
295
+ '/api/monitoring/metrics/queue',
296
+ ],
297
+ });
298
+ }