@gadmin2n/schematics 0.0.87 → 0.0.89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/dist/lib/application/files/gadmin2-game-angle-demo/.dockerignore +16 -2
  2. package/dist/lib/application/files/gadmin2-game-angle-demo/Dockerfile.codegen +40 -0
  3. package/dist/lib/application/files/gadmin2-game-angle-demo/Dockerfile.server +76 -0
  4. package/dist/lib/application/files/gadmin2-game-angle-demo/Dockerfile.web +53 -0
  5. package/dist/lib/application/files/gadmin2-game-angle-demo/Jenkinsfile +219 -33
  6. package/dist/lib/application/files/gadmin2-game-angle-demo/compose-ctl.sh +250 -0
  7. package/dist/lib/application/files/gadmin2-game-angle-demo/config/prisma/workflow.prisma +4 -1
  8. package/dist/lib/application/files/gadmin2-game-angle-demo/dev/postgres/init.sql +12 -0
  9. package/dist/lib/application/files/gadmin2-game-angle-demo/docker-compose.md +170 -0
  10. package/dist/lib/application/files/gadmin2-game-angle-demo/docker-compose.yml +254 -0
  11. package/dist/lib/application/files/gadmin2-game-angle-demo/server/package.json +8 -7
  12. package/dist/lib/application/files/gadmin2-game-angle-demo/server/scripts/lib/page-helpers.ts +1 -1
  13. package/dist/lib/application/files/gadmin2-game-angle-demo/server/scripts/prismaModels.ts +1 -1
  14. package/dist/lib/application/files/gadmin2-game-angle-demo/server/seed/agenda.seed.ts +39 -0
  15. package/dist/lib/application/files/gadmin2-game-angle-demo/server/seed/audit.seed.ts +40 -0
  16. package/dist/lib/application/files/gadmin2-game-angle-demo/server/seed/bootstrap.ts +56 -0
  17. package/dist/lib/application/files/gadmin2-game-angle-demo/server/seed/canvas.seed.ts +39 -0
  18. package/dist/lib/application/files/gadmin2-game-angle-demo/server/{scripts/sync-data-mngt-pages.ts → seed/data-mngt.seed.ts} +36 -20
  19. package/dist/lib/application/files/gadmin2-game-angle-demo/server/seed/game.seed.ts +44 -0
  20. package/dist/lib/application/files/gadmin2-game-angle-demo/server/seed/index.ts +30 -6
  21. package/dist/lib/application/files/gadmin2-game-angle-demo/server/seed/permission.seed.ts +130 -0
  22. package/dist/lib/application/files/gadmin2-game-angle-demo/server/seed/workflow-event-trigger.ts +60 -0
  23. package/dist/lib/application/files/gadmin2-game-angle-demo/server/seed/workflow-node-types.ts +11 -25
  24. package/dist/lib/application/files/gadmin2-game-angle-demo/server/seed/workflow.seed.ts +108 -0
  25. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/main.ts +1 -0
  26. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/agendaJob/agendaJob.controller.spec.ts +31 -2
  27. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/audit/audit.controller.spec.ts +31 -2
  28. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/audit/audit.service.spec.ts +41 -57
  29. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/game/game.controller.spec.ts +31 -2
  30. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/game/game.service.spec.ts +309 -1
  31. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/page/page.controller.spec.ts +31 -2
  32. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/page/page.service.spec.ts +315 -1
  33. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/pageResource/pageResource.controller.spec.ts +31 -2
  34. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/pageResource/pageResource.service.spec.ts +312 -2
  35. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/resource/resource.controller.spec.ts +31 -2
  36. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/resource/resource.service.spec.ts +317 -1
  37. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/role/role.controller.spec.ts +31 -2
  38. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/role/role.service.spec.ts +309 -1
  39. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/rolePages/rolePages.controller.spec.ts +31 -2
  40. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/rolePages/rolePages.service.spec.ts +299 -1
  41. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/roleResource/roleResource.controller.spec.ts +31 -2
  42. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/roleResource/roleResource.service.spec.ts +307 -1
  43. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/user/user.controller.spec.ts +31 -2
  44. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/user/user.service.spec.ts +309 -1
  45. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflow/dsl-validate.util.spec.ts +205 -0
  46. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflow/dsl-validate.util.ts +116 -0
  47. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflow/temporal.service.spec.ts +158 -0
  48. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflow/temporal.service.ts +110 -1
  49. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflow/webhook-signature.util.spec.ts +79 -0
  50. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflow/webhook-signature.util.ts +54 -0
  51. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflow/workflow.controller.ts +34 -0
  52. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflow/workflow.service.spec.ts +457 -0
  53. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflow/workflow.service.ts +241 -4
  54. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflowEventOutbox/workflowEventOutbox.controller.spec.ts +34 -2
  55. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflowEventOutbox/workflowEventOutbox.service.spec.ts +24 -30
  56. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflowNodeInstance/workflowNodeInstance.controller.spec.ts +34 -2
  57. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflowNodeInstance/workflowNodeInstance.service.spec.ts +36 -36
  58. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflowNodeType/workflowNodeType.controller.spec.ts +34 -2
  59. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/workflowNodeType/workflowNodeType.service.spec.ts +48 -24
  60. package/dist/lib/application/files/gadmin2-game-angle-demo/temporal/README.md +312 -3
  61. package/dist/lib/application/files/gadmin2-game-angle-demo/temporal/TODO.md +152 -0
  62. package/dist/lib/application/files/gadmin2-game-angle-demo/temporal/worker/.dockerignore +12 -0
  63. package/dist/lib/application/files/gadmin2-game-angle-demo/temporal/worker/Dockerfile +79 -0
  64. package/dist/lib/application/files/gadmin2-game-angle-demo/temporal/worker/GRACEFUL-DEPLOYMENT.md +270 -0
  65. package/dist/lib/application/files/gadmin2-game-angle-demo/temporal/worker/src/activities/index.ts +1 -1
  66. package/dist/lib/application/files/gadmin2-game-angle-demo/temporal/worker/src/activities/reporting.ts +23 -0
  67. package/dist/lib/application/files/gadmin2-game-angle-demo/temporal/worker/src/index.ts +70 -5
  68. package/dist/lib/application/files/gadmin2-game-angle-demo/temporal/worker/src/outbox-poller.ts +246 -90
  69. package/dist/lib/application/files/gadmin2-game-angle-demo/temporal/worker/src/tests/cron-trigger-workflow.test.ts +20 -0
  70. package/dist/lib/application/files/gadmin2-game-angle-demo/temporal/worker/src/workflows/dsl-workflow.ts +96 -8
  71. package/dist/lib/application/files/gadmin2-game-angle-demo/web/nginx.conf +74 -0
  72. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/components/agentPanel/ElementInspector.tsx +18 -0
  73. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/components/agentPanel/promptGenerator.ts +1 -1
  74. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/helpers/form.tsx +1 -1
  75. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/locales/en/common.json +3 -3
  76. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/locales/zh_CN/common.json +3 -3
  77. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/plugins/devShellPlugin.ts +4 -1
  78. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/canvas/CanvasEditPage.tsx +9 -0
  79. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/canvas/CanvasListPage.tsx +156 -139
  80. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/canvas/CanvasPage.tsx +14 -2
  81. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/canvas/CanvasToolbar.tsx +62 -0
  82. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/canvas/PublishModal.tsx +4 -6
  83. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/canvas/canvasApi.ts +18 -27
  84. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/canvas/canvasDefaults.ts +32 -11
  85. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/canvas/demos.ts +48 -61
  86. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/canvas-page/index.tsx +3 -6
  87. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/workflow/components/DslView.tsx +16 -16
  88. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/workflow/editor.tsx +28 -35
  89. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/workflow/instance-detail.tsx +34 -3
  90. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/workflow/show.tsx +1 -1
  91. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/routes/workflow/types.ts +1 -1
  92. package/dist/lib/application/files/gadmin2-game-angle-demo/web/src/styles/antd.css +6 -0
  93. package/package.json +1 -1
  94. package/dist/lib/application/files/gadmin2-game-angle-demo/Dockerfile +0 -63
  95. package/dist/lib/application/files/gadmin2-game-angle-demo/server/scripts/sync-resources.ts +0 -100
  96. package/dist/lib/application/files/gadmin2-game-angle-demo/server/seed/permissions.ts +0 -302
  97. package/dist/lib/application/files/gadmin2-game-angle-demo/server/src/modules/canvas/canvas.controller.spec.ts +0 -20
  98. package/dist/lib/application/files/gadmin2-game-angle-demo/temporal/sql/create-event-trigger.sql +0 -87
  99. /package/dist/lib/application/files/gadmin2-game-angle-demo/{GRACEFUL-DEPLOYMENT.md → server/GRACEFUL-DEPLOYMENT.md} +0 -0
@@ -0,0 +1,270 @@
1
+ # Temporal Worker — Graceful Deployment 方案
2
+
3
+ 本文档描述 Temporal Worker 在 K8s 环境下零停机滚动升级的方案,与 `server/GRACEFUL-DEPLOYMENT.md` 形成姊妹文档。Worker 与 server 的区别在于:
4
+
5
+ - **不直接接收用户流量**(不挂 Service / Ingress),所以 Endpoints 摘除一项不适用;
6
+ - **持有的工作单元更长**:单个 Activity / Workflow Task 可能跑数十秒到几分钟;
7
+ - **是 Temporal task queue 的消费者**:被 SIGKILL 后,Temporal Server 会等到 ScheduleToCloseTimeout / 心跳超时才把任务返还队列,造成延迟与重复执行风险。
8
+
9
+ ---
10
+
11
+ ## 1. 整体时序
12
+
13
+ ```
14
+ K8s 发起 Pod 删除(Deployment rolling update)
15
+
16
+ └─► 执行 lifecycle.preStop hook
17
+
18
+ └─► sleep 5s(让 readinessProbe 失败 + outbox poller 当前批次有缓冲)
19
+
20
+ └─► 发送 SIGTERM 给容器主进程(node, PID 1)
21
+
22
+ ├─► process.on('SIGTERM') handler 触发
23
+ │ ├─► worker.shutdown() // 停止接取新 task
24
+ │ ├─► outboxPoller.stop() // 不再调度下一次 poll
25
+ │ │ ├─► 等待当前 in-flight poll 完成
26
+ │ │ ├─► pg.end()
27
+ │ │ └─► temporalConn.close()
28
+ │ └─► healthServer.close() // 停止响应 K8s probe
29
+
30
+ └─► worker.run() 等待所有 in-flight Activity/Workflow Task 完成后 resolve
31
+ └─► connection.close() → 进程退出
32
+
33
+ ═══════════════════════════════════════════════════════════════
34
+ terminationGracePeriodSeconds(300s)超时后 → 强制 SIGKILL
35
+ ```
36
+
37
+ **关键差异:** Worker 不需要 "等 iptables 同步" 那种延迟,preStop sleep 主要起两个作用:
38
+ 1. 让 `/health/ready` 提前 503,K8s probe 能察觉副本即将下线(如有上层调度策略可参考);
39
+ 2. 给 outbox poller 当前批次(已 `UPDATE ... SET processed=TRUE` 但还在 `temporalClient.workflow.start` 中的事件)多留几秒缓冲。
40
+
41
+ ---
42
+
43
+ ## 2. 代码改动
44
+
45
+ ### 2.1 `src/index.ts` — 主流程
46
+
47
+ ```ts
48
+ process.on('SIGTERM', () => void shutdown('SIGTERM'));
49
+ process.on('SIGINT', () => void shutdown('SIGINT'));
50
+
51
+ await worker.run(); // 阻塞直到 worker.shutdown() 被调用 + tasks drain
52
+ await connection.close(); // worker.run() resolve 之后再关连接
53
+ ```
54
+
55
+ `shutdown()` 内部:
56
+ 1. `worker.shutdown()` — 同步调用,使 worker 不再接取新 task;
57
+ 2. `outboxPoller.stop()` — 取消下一次 setTimeout,等当前 poll 完成,关 PG / Temporal client;
58
+ 3. `healthServer.close()` — 停止响应 probe。
59
+
60
+ ### 2.2 `src/outbox-poller.ts` — 暴露 stop()
61
+
62
+ `startOutboxPoller()` 现在返回:
63
+
64
+ ```ts
65
+ interface OutboxPollerHandle {
66
+ stop(): Promise<void>;
67
+ }
68
+ ```
69
+
70
+ `stop()` 行为:
71
+ - 设置 `stopped = true`,下一次循环不再被调度;
72
+ - 若处于 setTimeout 等待中:`clearTimeout` 立即取消;
73
+ - 若处于 in-flight poll 中:等当前 poll 完成(含本轮所有事件处理);
74
+ - 关闭 PG client 与 Temporal Connection。
75
+
76
+ ### 2.3 `src/index.ts` — Health 端点
77
+
78
+ | 端点 | 用途 | 行为 |
79
+ |------|------|------|
80
+ | `GET /health/live` | K8s livenessProbe | 进程存活始终 200 |
81
+ | `GET /health/ready` | K8s readinessProbe | `isReady && !isShuttingDown` 才 200,否则 503 |
82
+
83
+ 监听端口由 `HEALTH_PORT` 环境变量控制,默认 `8080`。
84
+
85
+ ### 2.4 `Dockerfile`
86
+
87
+ ```dockerfile
88
+ STOPSIGNAL SIGTERM # 与 K8s 默认信号一致
89
+ EXPOSE 8080 # health 端口
90
+ HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
91
+ CMD wget -qO- http://127.0.0.1:8080/health/live || exit 1
92
+ CMD ["node", "dist/index.js"] # exec form:node 作为 PID 1 直接收 SIGTERM
93
+ ```
94
+
95
+ ---
96
+
97
+ ## 3. K8s Deployment 配置
98
+
99
+ ```yaml
100
+ apiVersion: apps/v1
101
+ kind: Deployment
102
+ metadata:
103
+ name: gadmin-workflow-worker
104
+ spec:
105
+ # ─── 滚动升级策略 ───────────────────────────────────────────
106
+ strategy:
107
+ type: RollingUpdate
108
+ rollingUpdate:
109
+ maxSurge: 1
110
+ maxUnavailable: 1 # Worker 不直接接流量,可以容忍短暂少 1 个副本
111
+
112
+ template:
113
+ spec:
114
+ # ─── 优雅关闭宽限期 ─────────────────────────────────────
115
+ terminationGracePeriodSeconds: 300
116
+ # 给 worker 足够时间完成:
117
+ # - 当前正在执行的 Activity / Workflow Task(最长可能数分钟)
118
+ # - outbox poller 当前批次的事件分发
119
+ # 超过 300s 未退出将被 SIGKILL —— Temporal 端要等心跳超时才能把任务派给其他 worker
120
+
121
+ containers:
122
+ - name: worker
123
+ image: <registry>/<image>:<tag>
124
+ ports:
125
+ - name: health
126
+ containerPort: 8080
127
+
128
+ # ─── preStop Hook ───────────────────────────────────
129
+ lifecycle:
130
+ preStop:
131
+ exec:
132
+ command: ["sh", "-c", "sleep 5"]
133
+ # 让 readinessProbe 提前进入 503 + 给 outbox poller 当前批次缓冲
134
+
135
+ # ─── Readiness Probe ────────────────────────────────
136
+ # Worker 不挂 Service,readinessProbe 主要用来反映"是否健康"给运维面板
137
+ # 也作为部署阶段判断新 Pod 是否启动成功的依据
138
+ readinessProbe:
139
+ httpGet:
140
+ path: /health/ready
141
+ port: 8080
142
+ initialDelaySeconds: 5
143
+ periodSeconds: 5
144
+ failureThreshold: 3
145
+ successThreshold: 1
146
+
147
+ # ─── Liveness Probe ─────────────────────────────────
148
+ # 检测进程僵死(如事件循环被阻塞),失败后 K8s 重启容器
149
+ livenessProbe:
150
+ httpGet:
151
+ path: /health/live
152
+ port: 8080
153
+ initialDelaySeconds: 15
154
+ periodSeconds: 10
155
+ failureThreshold: 3
156
+
157
+ # ─── 资源与环境变量(按需) ─────────────────────────
158
+ env:
159
+ - name: TEMPORAL_ADDRESS
160
+ value: "<temporal-frontend>:7233"
161
+ - name: TEMPORAL_NAMESPACE
162
+ value: "default"
163
+ - name: TEMPORAL_TASK_QUEUE
164
+ value: "workflow-execution"
165
+ - name: DATABASE_URL
166
+ valueFrom:
167
+ secretKeyRef:
168
+ name: gadmin-db
169
+ key: url
170
+ ```
171
+
172
+ ---
173
+
174
+ ## 4. 配置说明
175
+
176
+ ### 4.1 为什么 terminationGracePeriodSeconds = 300?
177
+
178
+ | 因素 | 说明 |
179
+ |------|------|
180
+ | preStop sleep | 5s |
181
+ | 当前 in-flight Activity / Workflow Task | 最长按业务实际定义,常见 30s–几分钟 |
182
+ | 当前 in-flight outbox poll 批次 | 通常 < 5s(仅启动 Temporal workflow,不等执行) |
183
+ | **总计余量** | 300s 覆盖大多数场景 |
184
+
185
+ 如果你的 Activity 可能执行超过 295s(300 − 5s preStop),有两条路:
186
+ 1. **加大 `terminationGracePeriodSeconds`**(例如长跑批量任务);
187
+ 2. **让 Activity 支持心跳取消**:`Context.current().heartbeat()` + `Context.current().cancellationSignal`,shutdown 时 worker 会向 Activity 发送 cancellation,business code 可以选择中断并提前返回,让 Temporal 把任务重派给其他 worker。
188
+
189
+ ### 4.2 maxUnavailable 与 server 的差异
190
+
191
+ server 用 `maxUnavailable: 0`,因为它直面用户流量、Pod 数变少会立刻反映为请求失败。Worker 不接流量,副本短暂少一个不影响 Temporal 的 task 派发(Temporal 自己有 polling 模型 + 流控),所以这里放宽到 `1` 减少升级耗时。如对延迟敏感(例如就只跑 1 副本),可以收紧到 `0`。
192
+
193
+ ### 4.3 Outbox poller 的语义边界
194
+
195
+ ⚠️ **当前实现有一个独立于 graceful 的语义问题**:`UPDATE ... SET processed = TRUE` 与 `temporalClient.workflow.start(...)` 不在同一事务里。如果在两者之间被 SIGKILL(或 stop() 等到 grace 超时),事件已被标记为 processed 但 workflow 实例没建立,下一次 poll 不会再处理它。
196
+
197
+ graceful 关闭把这个窗口最小化(一定会等当前 poll 内的 `for (const row of rows)` 循环走完),但**没有彻底消除**。彻底解决需要把 `UPDATE` 与 `INSERT t_workflow_instance + workflow.start` 改造为:先 INSERT instance + 启 workflow,成功后再 mark outbox processed —— 这是另一个独立的修复。
198
+
199
+ ---
200
+
201
+ ## 5. 验证方法
202
+
203
+ ### 5.1 本地验证 Shutdown
204
+
205
+ ```bash
206
+ cd temporal/worker && yarn build && yarn start &
207
+ WORKER_PID=$!
208
+
209
+ # 等服务启动
210
+ sleep 3
211
+
212
+ # 验证 health
213
+ curl -s http://localhost:8080/health/live # {"status":"ok"}
214
+ curl -s http://localhost:8080/health/ready # {"status":"ok"}
215
+
216
+ # 发送 SIGTERM
217
+ kill -TERM $WORKER_PID
218
+
219
+ # 期望日志依次出现:
220
+ # [Worker] SIGTERM received, beginning graceful shutdown...
221
+ # [Worker] worker.shutdown() called, draining in-flight tasks
222
+ # [OutboxPoller] Stopped, resources released
223
+ # [Worker] Health server closed
224
+ # [Worker] Temporal connection closed. Bye.
225
+ # 进程以 exit code 0 退出
226
+ ```
227
+
228
+ ### 5.2 K8s 环境验证
229
+
230
+ ```bash
231
+ # 观察滚动升级
232
+ kubectl rollout status deployment/gadmin-workflow-worker -w
233
+
234
+ # 看 Pod 事件确认 preStop 执行
235
+ kubectl describe pod <pod-name> | grep -A5 "Events"
236
+
237
+ # 升级期间持续触发事件,验证不丢任务
238
+ # (根据业务,向能触发 outbox 的表持续写入,再去 t_workflow_instance 比对)
239
+ ```
240
+
241
+ ---
242
+
243
+ ## 6. 信号传递链路
244
+
245
+ ```
246
+ K8s kubelet
247
+
248
+ └─► containerd: 发送 STOPSIGNAL (SIGTERM)
249
+
250
+ └─► Node.js process(PID 1,因 CMD 是 exec form)
251
+
252
+ ├─► process.on('SIGTERM') handler
253
+ │ ├─► worker.shutdown() → worker.run() 进入 draining 状态
254
+ │ ├─► outboxPoller.stop() → 关 PG / Temporal client
255
+ │ └─► healthServer.close()
256
+
257
+ └─► await worker.run() resolve → await connection.close() → 进程退出
258
+ ```
259
+
260
+ ---
261
+
262
+ ## 7. 故障场景与应对
263
+
264
+ | 场景 | 现象 | 应对 |
265
+ |------|------|------|
266
+ | Activity 执行时间超过 grace period | 被 SIGKILL;Temporal 等心跳超时后重派 | 让 Activity 支持心跳 + cancellation;或加大 grace period |
267
+ | outbox poll 中途被 SIGKILL | 已 mark processed 但未启 workflow 的事件丢失 | 见 §4.3,需独立修复事务边界 |
268
+ | readinessProbe 失败 | 仅影响监控/调度面板;不影响 task 派发 | 检查日志、PG / Temporal 连接 |
269
+ | livenessProbe 失败 | K8s 重启容器(同样走 SIGTERM → graceful path) | 排查事件循环阻塞(同步重 IO?deadlock?) |
270
+ | `worker.shutdown()` 后迟迟不 resolve | 一定有 Activity 长时间不返回 | 最终 SIGKILL,Temporal 端等超时;治本看 §4.1 第 2 项 |
@@ -3,4 +3,4 @@ export { dbQuery } from './db-query';
3
3
  export { dbExecute } from './db-execute';
4
4
  export { sendNotification } from './send-notification';
5
5
  export { codeExecute } from './code-execute';
6
- export { reportNodeStatus, reportWorkflowStatus } from './reporting';
6
+ export { reportNodeStatus, reportWorkflowStatus, createCronInstance } from './reporting';
@@ -30,6 +30,29 @@ export async function reportNodeStatus(input: ReportNodeStatusInput): Promise<vo
30
30
  }
31
31
  }
32
32
 
33
+ /**
34
+ * Create a workflow instance for a cron-triggered run.
35
+ * Used by cronTriggerWorkflow before delegating to dslWorkflow.
36
+ */
37
+ export async function createCronInstance(input: {
38
+ workflowId: number;
39
+ versionId: number;
40
+ scheduleId: string;
41
+ }): Promise<{ instanceId: number }> {
42
+ const { rows } = await pool.query(
43
+ `INSERT INTO t_workflow_instance
44
+ (workflow_id, version_id, status, context, creator, created_at, updated_at)
45
+ VALUES ($1, $2, 'PENDING', $3, 'cron', NOW(), NOW())
46
+ RETURNING id`,
47
+ [
48
+ input.workflowId,
49
+ input.versionId,
50
+ JSON.stringify({ scheduleId: input.scheduleId, scheduledAt: new Date().toISOString() }),
51
+ ],
52
+ );
53
+ return { instanceId: Number(rows[0].id) };
54
+ }
55
+
33
56
  /**
34
57
  * Report overall workflow execution status.
35
58
  */
@@ -1,8 +1,13 @@
1
+ import * as http from 'http';
1
2
  import { NativeConnection, Worker } from '@temporalio/worker';
2
3
  import * as activities from './activities';
3
4
  import { config } from './config';
4
5
  import { startOutboxPoller } from './outbox-poller';
5
6
 
7
+ // 顶层状态:health server 与 shutdown handler 共享
8
+ let isReady = false;
9
+ let isShuttingDown = false;
10
+
6
11
  async function run() {
7
12
  console.log(`[Worker] Connecting to Temporal at ${config.temporal.address}...`);
8
13
 
@@ -21,13 +26,73 @@ async function run() {
21
26
  console.log(`[Worker] Started. Polling task queue: ${config.temporal.taskQueue}`);
22
27
  console.log(`[Worker] Namespace: ${config.temporal.namespace}`);
23
28
 
24
- // Start outbox poller for event_trigger workflows
25
- startOutboxPoller().catch((err) => {
26
- console.error('[OutboxPoller] Failed to start:', err.message);
27
- });
29
+ // event_trigger workflow 的 outbox poller
30
+ const outboxPoller = await startOutboxPoller();
31
+
32
+ // K8s probe 用的 HTTP health server
33
+ const healthServer = startHealthServer();
34
+
35
+ // 优雅关闭协调器
36
+ const shutdown = async (signal: string) => {
37
+ if (isShuttingDown) return;
38
+ isShuttingDown = true;
39
+ console.log(`[Worker] ${signal} received, beginning graceful shutdown...`);
40
+
41
+ // 1. worker.shutdown() 是同步调用,触发 worker.run() 在当前 activity/workflow 完成后 resolve
42
+ worker.shutdown();
43
+ console.log('[Worker] worker.shutdown() called, draining in-flight tasks');
44
+
45
+ // 2. 停止 outbox poller,释放其 PG / Temporal client 连接
46
+ try {
47
+ await outboxPoller.stop();
48
+ } catch (err) {
49
+ console.error('[Worker] OutboxPoller stop error:', err);
50
+ }
51
+
52
+ // 3. 关闭 health server(停止 K8s probe 请求)
53
+ await new Promise<void>((resolve) => healthServer.close(() => resolve()));
54
+ console.log('[Worker] Health server closed');
55
+ };
28
56
 
29
- // Start polling for workflow tasks
57
+ process.on('SIGTERM', () => void shutdown('SIGTERM'));
58
+ process.on('SIGINT', () => void shutdown('SIGINT'));
59
+
60
+ isReady = true;
61
+
62
+ // worker.run() 会一直阻塞到 worker.shutdown() 被调用并且 in-flight 任务全部 drain
30
63
  await worker.run();
64
+
65
+ // worker.run() resolve 之后再关闭 NativeConnection(worker 仍在用它的时候不能关)
66
+ await connection.close();
67
+ console.log('[Worker] Temporal connection closed. Bye.');
68
+ }
69
+
70
+ /**
71
+ * 最简 HTTP health 端点,用于 K8s liveness/readiness probe。
72
+ * - /health/live — 进程存活就返回 200
73
+ * - /health/ready — 处于运行中且未进入 shutdown 才返回 200,否则 503
74
+ */
75
+ function startHealthServer(): http.Server {
76
+ const port = Number(process.env.WORKER_HEALTH_PORT) || 8081;
77
+ const server = http.createServer((req, res) => {
78
+ if (req.url === '/health/live') {
79
+ res.writeHead(200, { 'Content-Type': 'application/json' });
80
+ res.end(JSON.stringify({ status: 'ok' }));
81
+ return;
82
+ }
83
+ if (req.url === '/health/ready') {
84
+ const ok = isReady && !isShuttingDown;
85
+ res.writeHead(ok ? 200 : 503, { 'Content-Type': 'application/json' });
86
+ res.end(JSON.stringify({ status: ok ? 'ok' : 'shutting_down' }));
87
+ return;
88
+ }
89
+ res.writeHead(404);
90
+ res.end();
91
+ });
92
+ server.listen(port, () => {
93
+ console.log(`[Worker] Health server listening on :${port}`);
94
+ });
95
+ return server;
31
96
  }
32
97
 
33
98
  run().catch((err) => {