@thejrsoft/subway-protocol 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/ACK_MESSAGES_IMPLEMENTATION_SUMMARY.md +128 -0
  2. package/ACK_MESSAGE_DESIGN.md +457 -0
  3. package/CHANGELOG.md +58 -0
  4. package/COMMAND_VALIDATION_RULES.md +178 -0
  5. package/DOCUMENTATION_REORGANIZATION_SUMMARY.md +81 -0
  6. package/DOCUMENTATION_STRUCTURE.md +106 -0
  7. package/GATEWAY_MIGRATION_GUIDE.md +130 -0
  8. package/GATEWAY_PROTOCOL_COMPARISON.md +216 -0
  9. package/INTEGRATION_GUIDE.md +190 -0
  10. package/OPTIONAL_FIELDS_WITHOUT_DEFAULTS.md +97 -0
  11. package/PROTOCOL_UTILS_USAGE.md +278 -0
  12. package/README.md +237 -0
  13. package/TYPE_FIXES_SUMMARY.md +210 -0
  14. package/UPDATE_ENUM_VALUES.md +139 -0
  15. package/dist/asyncapi-sync.d.ts +47 -0
  16. package/dist/asyncapi-sync.d.ts.map +1 -0
  17. package/dist/asyncapi-sync.js +85 -0
  18. package/dist/asyncapi-sync.js.map +1 -0
  19. package/dist/command-factory.d.ts +62 -0
  20. package/dist/command-factory.d.ts.map +1 -0
  21. package/dist/command-factory.js +137 -0
  22. package/dist/command-factory.js.map +1 -0
  23. package/dist/command-types.d.ts +27 -0
  24. package/dist/command-types.d.ts.map +1 -0
  25. package/dist/command-types.js +31 -0
  26. package/dist/command-types.js.map +1 -0
  27. package/dist/index.d.ts +403 -0
  28. package/dist/index.d.ts.map +1 -0
  29. package/dist/index.js +413 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/message-validator.d.ts +102 -0
  32. package/dist/message-validator.d.ts.map +1 -0
  33. package/dist/message-validator.js +640 -0
  34. package/dist/message-validator.js.map +1 -0
  35. package/dist/protocol-utils.d.ts +108 -0
  36. package/dist/protocol-utils.d.ts.map +1 -0
  37. package/dist/protocol-utils.js +293 -0
  38. package/dist/protocol-utils.js.map +1 -0
  39. package/docs/01-protocol/README.md +45 -0
  40. package/docs/01-protocol/design-rationale.md +198 -0
  41. package/docs/01-protocol/message-types.md +669 -0
  42. package/docs/01-protocol/specification.md +1466 -0
  43. package/docs/02-commands/README.md +56 -0
  44. package/docs/02-commands/batch-command.md +435 -0
  45. package/docs/02-commands/complex-command.md +537 -0
  46. package/docs/02-commands/simple-command.md +332 -0
  47. package/docs/02-commands/typed-commands.md +362 -0
  48. package/docs/03-architecture/README.md +66 -0
  49. package/docs/03-architecture/device-protocol.md +430 -0
  50. package/docs/03-architecture/edge-proxy.md +727 -0
  51. package/docs/03-architecture/routing-flow.md +893 -0
  52. package/docs/04-integration/README.md +144 -0
  53. package/docs/04-integration/backend-guide.md +551 -0
  54. package/docs/04-integration/edge-guide.md +684 -0
  55. package/docs/04-integration/gateway-guide.md +180 -0
  56. package/docs/04-integration/migration-guide.md +226 -0
  57. package/docs/05-examples/README.md +141 -0
  58. package/docs/05-examples/progress-update-examples.md +757 -0
  59. package/docs/06-reference/README.md +67 -0
  60. package/docs/06-reference/api.md +572 -0
  61. package/docs/06-reference/faq.md +302 -0
  62. package/docs/06-reference/glossary.md +232 -0
  63. package/examples/backend-upgrade.ts +279 -0
  64. package/examples/edge-multi-device.ts +513 -0
  65. package/examples/gateway-upgrade.ts +150 -0
  66. package/examples/protocol-implementation.ts +715 -0
  67. package/package.json +48 -0
  68. package/scripts/validate-asyncapi.ts +78 -0
  69. package/src/__tests__/protocol.test.ts +297 -0
  70. package/src/asyncapi-sync.ts +84 -0
  71. package/src/command-factory.ts +183 -0
  72. package/src/command-types.ts +72 -0
  73. package/src/edge-proxy.ts +494 -0
  74. package/src/gateway-extensions.ts +278 -0
  75. package/src/index.ts +792 -0
  76. package/src/message-validator.ts +726 -0
  77. package/src/protocol-utils.ts +355 -0
  78. package/tsconfig.json +24 -0
@@ -0,0 +1,893 @@
1
+ # 消息路由流程
2
+
3
+ 本文档详细说明 JRSoft Subway 系统中的消息路由机制和新架构设计。
4
+
5
+ ## 系统架构概览
6
+
7
+ ### 核心组件
8
+
9
+ ```
10
+ ┌─────────────┐ ┌─────────────┐ ┌──────────┐ ┌──────────┐
11
+ │ Backend │────▶│ Gateway │────▶│ Edge │────▶│ Device │
12
+ │ (FastAPI) │ │(API + WS) │ │ (Proxy) │ │(Client) │
13
+ │ │◀────│ │◀────│ │◀────│ │
14
+ └─────────────┘ └─────────────┘ └──────────┘ └──────────┘
15
+ 18082 18081 Dynamic Dynamic
16
+ ↑ ↑
17
+ │ │
18
+ Business Protocol API
19
+ Management & WebSocket
20
+ ```
21
+
22
+ ### 职责分离
23
+
24
+ #### Backend (业务管理层)
25
+ - **端口**: 18082
26
+ - **职责**:
27
+ - 节目内容管理和编排
28
+ - 任务调度和时间管理
29
+ - 程序发布和分发控制
30
+ - 设备状态监控和统计
31
+ - 提供管理 API 给前端应用
32
+ - **技术栈**: Python FastAPI, PostgreSQL, Redis
33
+
34
+
35
+ #### Gateway (API服务 + 协议路由层)
36
+ - **端口**: 18081
37
+ - **职责**:
38
+ - 提供 HTTP API 接收命令请求
39
+ - WebSocket 服务器,管理连接
40
+ - 消息路由和转发
41
+ - 连接状态管理
42
+ - 协议版本控制
43
+ - 命令执行状态追踪
44
+ - 异步回调处理
45
+ - **技术栈**: Node.js, Express, WebSocket Server
46
+
47
+ #### Edge (设备代理层)
48
+ - **端口**: 动态分配
49
+ - **职责**:
50
+ - 连接 Gateway 和本地设备
51
+ - 协议转换和适配
52
+ - 本地设备管理
53
+ - 离线缓存和恢复
54
+
55
+ #### Device (设备执行层)
56
+ - **端口**: 动态分配
57
+ - **职责**:
58
+ - 执行具体的硬件操作
59
+ - 状态上报
60
+ - 程序存储和播放
61
+
62
+ ## 业务流程示例
63
+
64
+ ### Command 流程(直接命令执行)
65
+
66
+ 适用于实时控制命令,如开关灯、调整亮度等即时操作。
67
+
68
+ ```
69
+ EUDI Gateway Edge Device
70
+ | | | |
71
+ | 1. HTTP POST | | |
72
+ | /api/command | | |
73
+ |------------------>| | |
74
+ | | | |
75
+ | | 2. 转换为WS消息 | |
76
+ | | 记录callback | |
77
+ | | | |
78
+ | | 3. 路由到Edge | |
79
+ | |------------------>| |
80
+ | | | |
81
+ | | | 4. 转发到设备 |
82
+ | | |----------------->|
83
+ | | | |
84
+ | | | | 5. 执行命令
85
+ | | | |
86
+ | | | 6. 返回结果 |
87
+ | | |<-----------------|
88
+ | | | |
89
+ | | 7. 接收响应 | |
90
+ | |<------------------| |
91
+ | | | |
92
+ | 8. Callback通知 | | |
93
+ |<------------------| | |
94
+ | | | |
95
+ ```
96
+
97
+ ### Program 流程(节目发布)
98
+
99
+ 适用于节目发布、批量更新等需要调度的任务。
100
+
101
+ ```
102
+ EUDI Backend Gateway Edge Device
103
+ | | | | |
104
+ | 1. 创建任务 | | | |
105
+ |-------------->| | | |
106
+ | | | | |
107
+ | | 2. 持久化任务 | | |
108
+ | | 保存callback | | |
109
+ | | | | |
110
+ | | 3. 任务调度 | | |
111
+ | | (满足条件后) | | |
112
+ | | | | |
113
+ | | 4. WebSocket发送 | | |
114
+ | | program命令 | | |
115
+ | |------------------>| | |
116
+ | | ↓ | | |
117
+ | | 记录callback | | |
118
+ | | | | |
119
+ | | | 5. 路由转发 | |
120
+ | | |------------->| |
121
+ | | | | |
122
+ | | | | 6. 转发命令 |
123
+ | | | |------------->|
124
+ | | | | |
125
+ | | | | | 7. 执行
126
+ | | | | | 下载节目
127
+ | | | | | 安装部署
128
+ | | | | |
129
+ | | | | 8. 进度更新 |
130
+ | | | |<-------------|
131
+ | | | | |
132
+ | | | 9. 转发进度 | |
133
+ | | |<-------------| |
134
+ | | | | |
135
+ | | 10. WS通知 | 11. Callback | |
136
+ | |<------------------|───────────┐ | |
137
+ | | | ↓ | |
138
+ | | 12. 更新任务状态 | EUDI回调| |
139
+ | | | | |
140
+ ```
141
+
142
+ ## 消息路由详解
143
+
144
+ ### 客户端标识符格式
145
+
146
+ #### 1. Backend Client ID
147
+ - 格式:`backend-{instance}`
148
+ - 示例:`backend-001`, `backend-primary`
149
+ - 用途:标识 Backend 实例
150
+
151
+ #### 2. Edge ID
152
+ - 格式:`edge-{location}`
153
+ - 示例:`edge-001`, `edge-tunnel-exit-1`
154
+ - 用途:标识 Edge 节点位置
155
+
156
+ #### 4. Device ID
157
+ - 格式:`{type}-{number}`
158
+ - 示例:`td-01`, `screen-05`, `pillar-100`
159
+ - 用途:标识具体设备
160
+
161
+ #### 5. Target Client ID
162
+ - 格式:直接使用设备ID
163
+ - 示例:`td-01`
164
+ - 用途:Gateway根据路由表自动找到对应Edge
165
+
166
+ ### 命令路由流程
167
+
168
+ #### Simple 命令路由
169
+
170
+ ```
171
+ Backend Gateway Edge Device
172
+ | | | |
173
+ | 1. HTTP POST | | |
174
+ | /api/command | | |
175
+ | targetClientId: | | |
176
+ | "td-01" | | |
177
+ |----------------->| | |
178
+ | | | |
179
+ | | 2. 查找路由表 | |
180
+ | | td-01 -> edge-001 | |
181
+ | | | |
182
+ | | 3. 路由到edge | |
183
+ | |------------------>| |
184
+ | | | |
185
+ | | | 4. 转发到device |
186
+ | | |----------------->|
187
+ | | | |
188
+ | | | | 5. 执行命令
189
+ | | | |
190
+ | | | 6. RESPONSE |
191
+ | | |<-----------------|
192
+ | | | |
193
+ | | 7. 路由响应 | |
194
+ | |<------------------| |
195
+ | | | |
196
+ | 8. RESPONSE | | |
197
+ |<-----------------| | |
198
+ ```
199
+
200
+ ### Gateway 路由逻辑
201
+
202
+ ```typescript
203
+ class GatewayRouter {
204
+ private connections = new Map<string, WebSocket>();
205
+ private routeMap = new Map<string, string>(); // deviceId -> edgeId
206
+
207
+ route(message: CommandMessage): void {
208
+ const { targetClientId } = message;
209
+
210
+ // 1. targetClientId 直接就是设备ID
211
+ const deviceId = targetClientId;
212
+
213
+ // 2. 从路由表查找对应的 Edge
214
+ const edgeId = this.routeMap.get(deviceId);
215
+ if (!edgeId) {
216
+ throw new Error(`No route found for device ${deviceId}`);
217
+ }
218
+
219
+ // 3. 查找 Edge 连接
220
+ const edgeConnection = this.connections.get(edgeId);
221
+ if (!edgeConnection) {
222
+ throw new Error(`Edge ${edgeId} not connected`);
223
+ }
224
+
225
+ // 4. 转发到 Edge
226
+ edgeConnection.send(JSON.stringify(message));
227
+
228
+ // 5. 记录路由日志
229
+ this.logRoute(message.requestRef, 'gateway', edgeId, 'command');
230
+ }
231
+
232
+ // 设备注册时更新路由表
233
+ registerDevice(deviceId: string, edgeId: string): void {
234
+ this.routeMap.set(deviceId, edgeId);
235
+ console.log(`Route registered: ${deviceId} -> ${edgeId}`);
236
+ }
237
+
238
+ routeResponse(response: CommandResponseMessage): void {
239
+ const { requestRef } = response;
240
+
241
+ // 查找原始请求的来源
242
+ const sourceConnection = this.findSourceConnection(requestRef);
243
+ if (sourceConnection) {
244
+ sourceConnection.send(JSON.stringify(response));
245
+ this.logRoute(requestRef, 'gateway', 'source', 'response');
246
+ }
247
+ }
248
+ }
249
+ ```
250
+
251
+ ### Edge 转发逻辑
252
+
253
+ ```typescript
254
+ class EdgeProxy {
255
+ private gatewayConnection: WebSocket;
256
+ private deviceConnections = new Map<string, WebSocket>();
257
+
258
+ handleCommand(message: CommandMessage): void {
259
+ const { targetClientId } = message;
260
+ // targetClientId 已经是设备ID
261
+ const deviceId = targetClientId;
262
+
263
+ // 1. 查找设备连接
264
+ const deviceConnection = this.deviceConnections.get(deviceId);
265
+ if (!deviceConnection) {
266
+ this.sendError('DEVICE_NOT_FOUND', `Device ${deviceId} not connected`);
267
+ return;
268
+ }
269
+
270
+ // 2. 直接转发到设备
271
+ deviceConnection.send(JSON.stringify(message));
272
+
273
+ // 3. 记录转发日志
274
+ this.logForward(message.requestRef, deviceId);
275
+ }
276
+
277
+ handleDeviceResponse(response: CommandResponseMessage): void {
278
+ // 转发设备响应到 Gateway
279
+ this.gatewayConnection.send(JSON.stringify(response));
280
+ }
281
+
282
+ // 设备连接时注册到 Gateway
283
+ onDeviceConnect(deviceId: string): void {
284
+ const registerMessage = {
285
+ type: 'REGISTER',
286
+ clientId: deviceId,
287
+ clientType: 'DEVICE',
288
+ edgeInfo: {
289
+ edgeId: this.edgeId,
290
+ edgeVersion: this.version
291
+ },
292
+ timestamp: new Date().toISOString(),
293
+ version: '1.0'
294
+ };
295
+
296
+ this.gatewayConnection.send(JSON.stringify(registerMessage));
297
+ }
298
+ }
299
+ ```
300
+
301
+ ## Gateway API 到 WebSocket 的转换
302
+
303
+ ### API 请求处理流程
304
+
305
+ ```typescript
306
+ class GatewayAPIHandler {
307
+ // 处理来自 Backend 的 API 请求
308
+ async handleCommandRequest(req: Request, res: Response) {
309
+ const { target_client_id, command, callback_url, priority, timeout } = req.body;
310
+
311
+ // 1. 生成唯一的 requestRef
312
+ const requestRef = `cmd-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
313
+
314
+ // 2. 构建 WebSocket 命令消息
315
+ const wsMessage = {
316
+ type: 'COMMAND',
317
+ requestRef,
318
+ targetClientId: target_client_id,
319
+ command,
320
+ priority: priority || 'NORMAL',
321
+ timeout: timeout || 30000,
322
+ callback: callback_url,
323
+ timestamp: new Date().toISOString(),
324
+ version: '1.0'
325
+ };
326
+
327
+ // 3. 记录请求信息,用于回调
328
+ this.pendingRequests.set(requestRef, {
329
+ callback: callback_url,
330
+ startTime: Date.now(),
331
+ timeout
332
+ });
333
+
334
+ // 4. 通过 WebSocket 路由系统发送命令
335
+ try {
336
+ await this.router.route(wsMessage);
337
+
338
+ // 5. 返回请求受理响应
339
+ res.json({
340
+ success: true,
341
+ requestRef,
342
+ message: 'Command accepted for processing'
343
+ });
344
+ } catch (error) {
345
+ res.status(500).json({
346
+ success: false,
347
+ error: error.message
348
+ });
349
+ }
350
+ }
351
+
352
+ // 统一处理所有响应的回调(Command 和 Program)
353
+ async handleResponse(response: CommandResponseMessage | ProgressUpdateMessage) {
354
+ // 1. 查找请求信息
355
+ const requestInfo = this.pendingRequests.get(response.requestRef);
356
+
357
+ // 2. 如果是通过 WebSocket 发送的 Program 命令
358
+ if (!requestInfo && response.callback) {
359
+ // Program 命令携带 callback 在消息中
360
+ await this.executeCallback(response.callback, response);
361
+
362
+ // 同时通过 WebSocket 通知 Backend
363
+ if (response.sourceClientId && response.sourceClientId.startsWith('backend')) {
364
+ this.notifyBackend(response.sourceClientId, response);
365
+ }
366
+ return;
367
+ }
368
+
369
+ // 3. 处理通过 API 发送的 Command
370
+ if (requestInfo && requestInfo.callback) {
371
+ await this.executeCallback(requestInfo.callback, response);
372
+
373
+ // 如果是最终响应,清理记录
374
+ if (response.type === 'COMMAND_RESPONSE') {
375
+ this.pendingRequests.delete(response.requestRef);
376
+ }
377
+ }
378
+ }
379
+
380
+ // 通知 Backend(用于 Program 命令)
381
+ private notifyBackend(backendClientId: string, response: any) {
382
+ const backendConnection = this.connections.get(backendClientId);
383
+ if (backendConnection) {
384
+ backendConnection.send(JSON.stringify(response));
385
+ }
386
+ }
387
+ }
388
+ ```
389
+
390
+ ## Callback 机制
391
+
392
+ Gateway 统一负责所有类型命令的 callback 处理:
393
+
394
+ ### Command 类型(通过 API 发送)
395
+ 1. EUDI 调用 Gateway API 时提供 callback URL
396
+ 2. Gateway 保存 callback 信息在内存中
397
+ 3. 收到响应后,Gateway 直接调用 callback URL
398
+ 4. 适用于实时性要求高的命令
399
+
400
+ ### Program 类型(通过 WebSocket 发送)
401
+ 1. Backend 通过 WebSocket 发送命令时,在消息中包含 callback URL
402
+ 2. Gateway 从消息中提取 callback 信息
403
+ 3. 收到响应后,Gateway:
404
+ - 调用 callback URL 通知 EUDI
405
+ - 通过 WebSocket 将响应发回 Backend
406
+ 4. Backend 可以更新任务状态,但不负责 callback
407
+
408
+ ### Callback 执行策略
409
+
410
+ ```typescript
411
+ class CallbackExecutor {
412
+ async executeCallback(callback: string, response: any) {
413
+ const maxRetries = 3;
414
+ let lastError;
415
+
416
+ for (let i = 0; i < maxRetries; i++) {
417
+ try {
418
+ const result = await fetch(callback, {
419
+ method: 'POST',
420
+ headers: {
421
+ 'Content-Type': 'application/json',
422
+ 'X-Callback-Attempt': `${i + 1}/${maxRetries}`
423
+ },
424
+ body: JSON.stringify({
425
+ requestRef: response.requestRef,
426
+ status: response.status,
427
+ result: response.result,
428
+ timestamp: response.timestamp,
429
+ executionTime: response.executionTime
430
+ })
431
+ });
432
+
433
+ if (result.ok) {
434
+ console.log(`Callback success: ${callback}`);
435
+ return;
436
+ }
437
+
438
+ lastError = new Error(`HTTP ${result.status}`);
439
+ } catch (error) {
440
+ lastError = error;
441
+ }
442
+
443
+ // 指数退避
444
+ if (i < maxRetries - 1) {
445
+ await sleep(Math.pow(2, i) * 1000);
446
+ }
447
+ }
448
+
449
+ console.error(`Callback failed after ${maxRetries} attempts:`, lastError);
450
+ }
451
+ }
452
+ ```
453
+
454
+ ## Backend 与 Gateway 交互
455
+
456
+ ### 1. HTTP API 接口
457
+
458
+ ```python
459
+ # Backend 调用 Gateway API
460
+ class GatewayClient:
461
+ def __init__(self, base_url: str = "http://gateway:18081"):
462
+ self.base_url = base_url
463
+ self.client = httpx.AsyncClient()
464
+
465
+ async def send_command(
466
+ self,
467
+ target_client_id: str,
468
+ command: dict,
469
+ callback_url: str,
470
+ priority: str = "NORMAL",
471
+ timeout: int = 30000
472
+ ) -> dict:
473
+ """发送命令到设备"""
474
+ payload = {
475
+ "target_client_id": target_client_id,
476
+ "command": command,
477
+ "callback_url": callback_url,
478
+ "priority": priority,
479
+ "timeout": timeout
480
+ }
481
+
482
+ response = await self.client.post(
483
+ f"{self.base_url}/api/commands",
484
+ json=payload
485
+ )
486
+
487
+ return response.json()
488
+
489
+ async def get_command_status(self, request_ref: str) -> dict:
490
+ """查询命令执行状态"""
491
+ response = await self.client.get(
492
+ f"{self.base_url}/api/commands/{request_ref}"
493
+ )
494
+
495
+ return response.json()
496
+
497
+ async def cancel_command(self, request_ref: str) -> dict:
498
+ """取消命令执行"""
499
+ response = await self.client.delete(
500
+ f"{self.base_url}/api/commands/{request_ref}"
501
+ )
502
+
503
+ return response.json()
504
+ ```
505
+
506
+ ### 2. 回调处理
507
+
508
+ ```python
509
+ # Backend 接收 Gateway 的回调
510
+ @app.post("/callback/command/{request_ref}")
511
+ async def handle_command_callback(request_ref: str, callback_data: dict):
512
+ """处理命令执行回调"""
513
+
514
+ # 1. 验证回调数据
515
+ if not validate_callback(callback_data):
516
+ raise HTTPException(status_code=400, detail="Invalid callback data")
517
+
518
+ # 2. 更新业务状态
519
+ await update_publication_status(request_ref, callback_data)
520
+
521
+ # 3. 触发后续业务逻辑
522
+ if callback_data["status"] == "COMPLETED":
523
+ await handle_command_success(request_ref, callback_data)
524
+ elif callback_data["status"] == "FAILED":
525
+ await handle_command_failure(request_ref, callback_data)
526
+
527
+ # 4. 发送前端通知
528
+ await notify_frontend(request_ref, callback_data)
529
+
530
+ return {"status": "received"}
531
+
532
+ @app.post("/callback/progress/{request_ref}")
533
+ async def handle_progress_callback(request_ref: str, progress_data: dict):
534
+ """处理进度更新回调"""
535
+
536
+ # 更新进度状态
537
+ await update_command_progress(request_ref, progress_data)
538
+
539
+ # 实时推送到前端
540
+ await websocket_manager.broadcast({
541
+ "type": "progress_update",
542
+ "request_ref": request_ref,
543
+ "progress": progress_data
544
+ })
545
+
546
+ return {"status": "received"}
547
+ ```
548
+
549
+ ## 错误处理和容错机制
550
+
551
+ ### 1. 连接故障处理
552
+
553
+ ```typescript
554
+ class ConnectionManager {
555
+ private reconnectDelay = 5000;
556
+ private maxReconnectAttempts = 5;
557
+
558
+ async handleConnectionLost(clientId: string): Promise<void> {
559
+ console.log(`Connection lost: ${clientId}`);
560
+
561
+ // 1. 标记连接状态
562
+ this.markConnectionLost(clientId);
563
+
564
+ // 2. 通知相关服务
565
+ await this.notifyConnectionLost(clientId);
566
+
567
+ // 3. 启动重连流程
568
+ this.scheduleReconnect(clientId);
569
+
570
+ // 4. 处理待处理的消息
571
+ this.handlePendingMessages(clientId);
572
+ }
573
+
574
+ private async scheduleReconnect(clientId: string): Promise<void> {
575
+ let attempts = 0;
576
+
577
+ while (attempts < this.maxReconnectAttempts) {
578
+ await this.sleep(this.reconnectDelay * Math.pow(2, attempts));
579
+
580
+ try {
581
+ await this.reconnect(clientId);
582
+ console.log(`Reconnected: ${clientId}`);
583
+ return;
584
+ } catch (error) {
585
+ attempts++;
586
+ console.log(`Reconnect failed (${attempts}/${this.maxReconnectAttempts}): ${error.message}`);
587
+ }
588
+ }
589
+
590
+ // 重连失败,标记为永久离线
591
+ this.markPermanentlyOffline(clientId);
592
+ }
593
+ }
594
+ ```
595
+
596
+ ### 2. 消息超时处理
597
+
598
+ ```typescript
599
+ class TimeoutManager {
600
+ private pendingMessages = new Map<string, TimeoutInfo>();
601
+
602
+ startTimeout(requestRef: string, timeoutMs: number): void {
603
+ const timeoutId = setTimeout(() => {
604
+ this.handleTimeout(requestRef);
605
+ }, timeoutMs);
606
+
607
+ this.pendingMessages.set(requestRef, {
608
+ timeoutId,
609
+ startTime: Date.now(),
610
+ timeoutMs
611
+ });
612
+ }
613
+
614
+ clearTimeout(requestRef: string): void {
615
+ const info = this.pendingMessages.get(requestRef);
616
+ if (info) {
617
+ clearTimeout(info.timeoutId);
618
+ this.pendingMessages.delete(requestRef);
619
+ }
620
+ }
621
+
622
+ private handleTimeout(requestRef: string): void {
623
+ const info = this.pendingMessages.get(requestRef);
624
+ if (!info) return;
625
+
626
+ // 发送超时错误响应
627
+ const timeoutResponse = {
628
+ type: "ERROR",
629
+ code: "TIMEOUT",
630
+ message: `Command timeout after ${info.timeoutMs}ms`,
631
+ requestRef,
632
+ timestamp: new Date().toISOString()
633
+ };
634
+
635
+ this.routeResponse(timeoutResponse);
636
+ this.pendingMessages.delete(requestRef);
637
+ }
638
+ }
639
+ ```
640
+
641
+ ### 3. 错误响应类型
642
+
643
+ ```json
644
+ {
645
+ "type": "ERROR",
646
+ "code": "TARGET_NOT_FOUND",
647
+ "message": "目标客户端不存在",
648
+ "details": {
649
+ "targetClientId": "device-99",
650
+ "reason": "No route found for device device-99"
651
+ },
652
+ "requestRef": "cmd-123",
653
+ "timestamp": "2024-01-20T10:00:00Z"
654
+ }
655
+ ```
656
+
657
+ ```json
658
+ {
659
+ "type": "ERROR",
660
+ "code": "GATEWAY_UNAVAILABLE",
661
+ "message": "网关服务不可用",
662
+ "details": {
663
+ "service": "gateway",
664
+ "lastSeen": "2024-01-20T09:55:00Z",
665
+ "retryAfter": 30
666
+ },
667
+ "requestRef": "cmd-124",
668
+ "timestamp": "2024-01-20T10:00:00Z"
669
+ }
670
+ ```
671
+
672
+ ## 监控和可观测性
673
+
674
+ ### 1. 消息追踪
675
+
676
+ ```typescript
677
+ interface MessageTrace {
678
+ requestRef: string;
679
+ startTime: number;
680
+ path: Array<{
681
+ node: string;
682
+ timestamp: number;
683
+ action: 'send' | 'receive' | 'forward';
684
+ }>;
685
+ status: 'pending' | 'completed' | 'failed' | 'timeout';
686
+ duration?: number;
687
+ }
688
+
689
+ class MessageTracer {
690
+ private traces = new Map<string, MessageTrace>();
691
+
692
+ startTrace(requestRef: string, from: string): void {
693
+ this.traces.set(requestRef, {
694
+ requestRef,
695
+ startTime: Date.now(),
696
+ path: [{
697
+ node: from,
698
+ timestamp: Date.now(),
699
+ action: 'send'
700
+ }],
701
+ status: 'pending'
702
+ });
703
+ }
704
+
705
+ addHop(requestRef: string, node: string, action: 'send' | 'receive' | 'forward'): void {
706
+ const trace = this.traces.get(requestRef);
707
+ if (trace) {
708
+ trace.path.push({
709
+ node,
710
+ timestamp: Date.now(),
711
+ action
712
+ });
713
+ }
714
+ }
715
+
716
+ completeTrace(requestRef: string, status: 'completed' | 'failed' | 'timeout'): void {
717
+ const trace = this.traces.get(requestRef);
718
+ if (trace) {
719
+ trace.status = status;
720
+ trace.duration = Date.now() - trace.startTime;
721
+
722
+ // 发送到监控系统
723
+ this.reportTrace(trace);
724
+
725
+ // 清理
726
+ this.traces.delete(requestRef);
727
+ }
728
+ }
729
+ }
730
+ ```
731
+
732
+ ### 2. 性能指标
733
+
734
+ ```typescript
735
+ class PerformanceMonitor {
736
+ recordRouteLatency(from: string, to: string, duration: number): void {
737
+ metrics.histogram('route_latency_ms', duration, {
738
+ from,
739
+ to
740
+ });
741
+ }
742
+
743
+ recordCommandSuccess(commandType: string, deviceType: string): void {
744
+ metrics.counter('commands_success_total', 1, {
745
+ command_type: commandType,
746
+ device_type: deviceType
747
+ });
748
+ }
749
+
750
+ recordCommandFailure(commandType: string, errorCode: string): void {
751
+ metrics.counter('commands_failure_total', 1, {
752
+ command_type: commandType,
753
+ error_code: errorCode
754
+ });
755
+ }
756
+
757
+ recordConnectionCount(nodeType: string, count: number): void {
758
+ metrics.gauge('active_connections', count, {
759
+ node_type: nodeType
760
+ });
761
+ }
762
+ }
763
+ ```
764
+
765
+ ## 最佳实践
766
+
767
+ ### 1. 消息设计原则
768
+
769
+ ```typescript
770
+ // ✅ 良好的消息设计
771
+ interface CommandMessage {
772
+ type: 'COMMAND';
773
+ requestRef: string; // 唯一请求ID
774
+ targetClientId: string; // 明确的目标
775
+ command: {
776
+ commandType: string; // 命令类型
777
+ // ... 其他字段
778
+ };
779
+ priority: 'LOW' | 'NORMAL' | 'HIGH';
780
+ timeout: number;
781
+ callback?: string; // 回调URL
782
+ timestamp: string;
783
+ version: string;
784
+ }
785
+
786
+ // ❌ 避免的设计
787
+ interface BadMessage {
788
+ cmd: string; // 不明确的字段名
789
+ target: string; // 缺少结构化
790
+ data: any; // 太宽泛的类型
791
+ }
792
+ ```
793
+
794
+ ### 2. 错误处理策略
795
+
796
+ ```typescript
797
+ class ErrorHandler {
798
+ handleRoutingError(error: Error, message: CommandMessage): void {
799
+ // 1. 记录错误
800
+ console.error(`Routing error for ${message.requestRef}:`, error);
801
+
802
+ // 2. 分类错误
803
+ const errorCode = this.classifyError(error);
804
+
805
+ // 3. 发送错误响应
806
+ const errorResponse = {
807
+ type: 'ERROR',
808
+ code: errorCode,
809
+ message: error.message,
810
+ requestRef: message.requestRef,
811
+ timestamp: new Date().toISOString()
812
+ };
813
+
814
+ // 4. 路由错误响应
815
+ this.routeErrorResponse(errorResponse);
816
+
817
+ // 5. 更新监控指标
818
+ metrics.counter('routing_errors_total', 1, {
819
+ error_code: errorCode
820
+ });
821
+ }
822
+ }
823
+ ```
824
+
825
+ ### 3. 连接管理
826
+
827
+ ```typescript
828
+ class ConnectionManager {
829
+ private connections = new Map<string, Connection>();
830
+ private healthChecks = new Map<string, NodeJS.Timeout>();
831
+
832
+ addConnection(clientId: string, ws: WebSocket): void {
833
+ const connection = new Connection(clientId, ws);
834
+ this.connections.set(clientId, connection);
835
+
836
+ // 启动健康检查
837
+ this.startHealthCheck(clientId);
838
+
839
+ // 记录连接指标
840
+ metrics.gauge('active_connections', this.connections.size);
841
+ }
842
+
843
+ private startHealthCheck(clientId: string): void {
844
+ const interval = setInterval(() => {
845
+ const connection = this.connections.get(clientId);
846
+ if (connection && !connection.isHealthy()) {
847
+ this.handleUnhealthyConnection(clientId);
848
+ }
849
+ }, 30000); // 30秒检查一次
850
+
851
+ this.healthChecks.set(clientId, interval);
852
+ }
853
+ }
854
+ ```
855
+
856
+ ## 总结
857
+
858
+ 新架构的消息路由系统具有以下特点:
859
+
860
+ ### 1. 清晰的职责分离
861
+ - **Backend**: 专注业务逻辑、任务调度和数据管理
862
+ - **Gateway**: 提供API接口、协议路由、连接管理和统一callback处理
863
+ - **Edge**: 专注设备代理和适配
864
+ - **Device**: 专注硬件操作执行
865
+
866
+ ### 2. 双通道命令支持
867
+ - **Command 通道**: EUDI → Gateway API → WebSocket,用于实时控制
868
+ - **Program 通道**: EUDI → Backend → WebSocket → Gateway,用于调度任务
869
+
870
+ ### 3. 统一的 Callback 机制
871
+ - Gateway 负责所有类型命令的 callback 执行
872
+ - 支持重试和指数退避策略
873
+ - Program 命令同时通知 Backend 和 EUDI
874
+
875
+ ### 4. 可靠的消息传递
876
+ - 层次化的路由机制
877
+ - 完整的错误处理和重试
878
+ - 消息追踪和监控
879
+ - 连接状态管理
880
+
881
+ ### 5. 良好的可扩展性
882
+ - 服务独立部署和扩展
883
+ - 松耦合的服务交互
884
+ - 标准化的接口协议
885
+ - 灵活的负载均衡
886
+
887
+ ### 6. 强大的可观测性
888
+ - 完整的消息链路追踪
889
+ - 丰富的性能指标
890
+ - 实时的状态监控
891
+ - 详细的错误报告
892
+
893
+ 这种设计确保了系统的高可用性、高可扩展性和易维护性,为地铁显示系统提供了稳定可靠的通信基础。