@umacloud/knowledge 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. package/00-governance/governance-capabilities.md +557 -0
  2. package/00-governance/knowledge-map.md +39 -0
  3. package/00-governance/maintenance-policy.md +76 -0
  4. package/00-governance/review-checklist.md +81 -0
  5. package/README.md +13 -0
  6. package/ai/01-standards/agent-development-complete.md +691 -0
  7. package/ai/01-standards/llm-application-complete.md +488 -0
  8. package/ai/01-standards/mlops-complete.md +798 -0
  9. package/ai/01-standards/prompt-engineering-complete.md +646 -0
  10. package/ai/01-standards/rag-architecture-complete.md +649 -0
  11. package/ai/02-playbooks/llm-evaluation-playbook.md +847 -0
  12. package/ai/03-checklists/ai-project-checklist.md +215 -0
  13. package/ai/04-antipatterns/ai-antipatterns.md +661 -0
  14. package/ai/05-cases/case-rag-production.md +147 -0
  15. package/ai/06-glossary/ai-glossary.md +162 -0
  16. package/ai/agent-evaluation-benchmark.md +53 -0
  17. package/ai/ai-agent-memory-context-management.md +41 -0
  18. package/ai/ai-cost-capacity-optimization-playbook.md +42 -0
  19. package/ai/ai-data-security-and-compliance-playbook.md +37 -0
  20. package/ai/ai-domain-index-and-checklist.md +40 -0
  21. package/ai/ai-governance-maturity-model.md +50 -0
  22. package/ai/ai-model-selection-and-routing-strategy.md +47 -0
  23. package/ai/ai-observability-and-oncall-runbook.md +52 -0
  24. package/ai/ai-rag-engineering-playbook.md +42 -0
  25. package/ai/ai-red-team-and-safety-evaluation.md +42 -0
  26. package/ai/ai-release-readiness-and-rollback-gate.md +42 -0
  27. package/ai/llm-agent-engineering-deep-dive.md +57 -0
  28. package/ai/prompt-and-tool-guardrails.md +52 -0
  29. package/api/01-standards/enterprise-api-standards.md +198 -0
  30. package/api/01-standards/rest-api-design-guide.md +63 -0
  31. package/api/02-playbooks/api-pagination-playbook.md +93 -0
  32. package/api/02-playbooks/graphql-production-playbook.md +176 -0
  33. package/api/03-checklists/api-review-checklist.md +55 -0
  34. package/api/04-antipatterns/api-antipatterns.md +112 -0
  35. package/architecture/01-standards/api-gateway-patterns.md +496 -0
  36. package/architecture/01-standards/cloud-native-patterns.md +644 -0
  37. package/architecture/01-standards/distributed-systems-patterns.md +591 -0
  38. package/architecture/01-standards/event-driven-architecture.md +595 -0
  39. package/architecture/01-standards/microservices-patterns-complete.md +968 -0
  40. package/architecture/01-standards/microservices-patterns.md +495 -0
  41. package/architecture/01-standards/system-design-interview.md +664 -0
  42. package/architecture/02-playbooks/microservices-patterns-playbook.md +137 -0
  43. package/architecture/02-playbooks/migration-playbook.md +780 -0
  44. package/architecture/02-playbooks/system-design-playbook.md +779 -0
  45. package/architecture/03-checklists/architecture-decision-checklist.md +297 -0
  46. package/architecture/04-antipatterns/architecture-antipatterns.md +417 -0
  47. package/architecture/05-cases/case-netflix-microservices.md +413 -0
  48. package/architecture/06-glossary/architecture-glossary.md +164 -0
  49. package/architecture/adr-template-and-examples.md +38 -0
  50. package/architecture/api-gateway-deep-dive.md +1291 -0
  51. package/architecture/configuration-management.md +1162 -0
  52. package/architecture/distributed-transactions.md +1220 -0
  53. package/architecture/microservices-complete.md +735 -0
  54. package/architecture/resilience-and-disaster-patterns.md +37 -0
  55. package/architecture/service-governance.md +1198 -0
  56. package/architecture/system-architecture-deep-dive.md +37 -0
  57. package/backend/01-standards/analytics-and-growth.md +65 -0
  58. package/backend/01-standards/api-and-error-conventions.md +120 -0
  59. package/backend/01-standards/application-layering-and-packaging.md +160 -0
  60. package/backend/01-standards/auth-implementation.md +104 -0
  61. package/backend/01-standards/backend-framework-idioms.md +74 -0
  62. package/backend/01-standards/background-jobs-and-async.md +66 -0
  63. package/backend/01-standards/caching-strategies-complete.md +390 -0
  64. package/backend/01-standards/config-and-observability.md +77 -0
  65. package/backend/01-standards/data-modeling-and-persistence.md +94 -0
  66. package/backend/01-standards/django-complete.md +1765 -0
  67. package/backend/01-standards/email-and-notifications.md +64 -0
  68. package/backend/01-standards/fastapi-complete.md +925 -0
  69. package/backend/01-standards/file-upload-and-storage.md +66 -0
  70. package/backend/01-standards/graphql-api-complete.md +416 -0
  71. package/backend/01-standards/llm-application-standard.md +78 -0
  72. package/backend/01-standards/message-queue-patterns.md +379 -0
  73. package/backend/01-standards/microservices-and-distributed.md +78 -0
  74. package/backend/01-standards/nestjs-complete.md +2167 -0
  75. package/backend/01-standards/payment-integration.md +80 -0
  76. package/backend/01-standards/rate-limiting-complete.md +451 -0
  77. package/backend/01-standards/realtime-and-websocket.md +65 -0
  78. package/backend/01-standards/search-and-filtering.md +64 -0
  79. package/backend/01-standards/spring-boot-complete.md +445 -0
  80. package/backend/02-playbooks/api-design-playbook.md +718 -0
  81. package/backend/02-playbooks/email-send-playbook.md +130 -0
  82. package/backend/02-playbooks/file-upload-s3-playbook.md +153 -0
  83. package/backend/02-playbooks/typescript-enterprise-playbook.md +133 -0
  84. package/backend/02-playbooks/websocket-realtime-playbook.md +154 -0
  85. package/backend/03-checklists/api-launch-checklist.md +189 -0
  86. package/backend/04-antipatterns/backend-antipatterns.md +1051 -0
  87. package/blockchain/01-standards/blockchain-basics.md +557 -0
  88. package/blockchain/01-standards/smart-contract-development.md +1315 -0
  89. package/cicd/01-standards/deployment-and-delivery-standard.md +96 -0
  90. package/cicd/01-standards/github-actions-complete.md +473 -0
  91. package/cicd/01-standards/release-and-store-submission.md +75 -0
  92. package/cicd/02-playbooks/cicd-pipeline-playbook.md +144 -0
  93. package/cicd/02-playbooks/release-management-playbook.md +605 -0
  94. package/cicd/03-checklists/pipeline-security-checklist.md +168 -0
  95. package/cicd/04-antipatterns/cicd-antipatterns.md +589 -0
  96. package/cicd/05-cases/case-deployment-automation.md +221 -0
  97. package/cicd/05-cases/case-gitops-transformation.md +212 -0
  98. package/cicd/06-glossary/cicd-glossary.md +114 -0
  99. package/cicd/cicd-blueprint-deep-dive.md +38 -0
  100. package/cicd/release-readiness-gate.md +37 -0
  101. package/cloud-native/01-standards/container-security.md +741 -0
  102. package/cloud-native/01-standards/kubernetes-complete.md +812 -0
  103. package/cloud-native/02-playbooks/api-gateway-playbook.md +155 -0
  104. package/cloud-native/02-playbooks/gitops-with-argocd.md +760 -0
  105. package/cloud-native/02-playbooks/k8s-troubleshooting-playbook.md +1942 -0
  106. package/cloud-native/02-playbooks/message-queue-playbook.md +129 -0
  107. package/cloud-native/02-playbooks/multicloud-governance.md +726 -0
  108. package/cloud-native/02-playbooks/serverless-patterns.md +788 -0
  109. package/cloud-native/02-playbooks/service-mesh-playbook.md +612 -0
  110. package/cloud-native/02-playbooks/terraform-iac-playbook.md +143 -0
  111. package/cloud-native/03-checklists/container-security-checklist.md +431 -0
  112. package/cloud-native/03-checklists/k8s-production-readiness-checklist.md +460 -0
  113. package/cloud-native/04-antipatterns/container-antipatterns.md +660 -0
  114. package/cloud-native/04-antipatterns/k8s-antipatterns.md +743 -0
  115. package/cloud-native/05-cases/case-k8s-migration.md +478 -0
  116. package/cloud-native/05-cases/case-k8s-scaling.md +642 -0
  117. package/cloud-native/05-cases/case-k8s-security-incident.md +397 -0
  118. package/cloud-native/06-glossary/cloud-native-glossary.md +337 -0
  119. package/cross-platform/01-standards/cross-platform-frameworks.md +83 -0
  120. package/cross-platform/01-standards/platform-selection-and-architecture.md +77 -0
  121. package/data/01-standards/elasticsearch-complete.md +2098 -0
  122. package/data/01-standards/postgresql-complete.md +1613 -0
  123. package/data/01-standards/redis-complete.md +1527 -0
  124. package/data/02-playbooks/database-optimization-playbook.md +403 -0
  125. package/data/02-playbooks/elasticsearch-production-playbook.md +132 -0
  126. package/data/03-checklists/database-launch-checklist.md +187 -0
  127. package/data/04-antipatterns/database-antipatterns.md +873 -0
  128. package/data/05-cases/case-database-migration.md +310 -0
  129. package/data/06-glossary/database-glossary.md +440 -0
  130. package/data/data-governance-and-modeling-deep-dive.md +39 -0
  131. package/data-engineering/01-standards/airflow-complete.md +523 -0
  132. package/data-engineering/01-standards/kafka-complete.md +1521 -0
  133. package/data-engineering/02-playbooks/spark-etl-playbook.md +496 -0
  134. package/data-engineering/03-checklists/pipeline-launch-checklist.md +194 -0
  135. package/data-engineering/04-antipatterns/data-pipeline-antipatterns.md +684 -0
  136. package/data-engineering/05-cases/case-real-time-pipeline.md +355 -0
  137. package/data-engineering/06-glossary/data-engineering-glossary.md +429 -0
  138. package/database/01-standards/database-schema-standards.md +147 -0
  139. package/database/02-playbooks/postgresql-optimization-quick.md +52 -0
  140. package/database/02-playbooks/postgresql-performance-optimization.md +58 -0
  141. package/database/02-playbooks/postgresql-production-playbook.md +146 -0
  142. package/database/02-playbooks/redis-caching-playbook.md +117 -0
  143. package/database/03-checklists/database-review-checklist.md +50 -0
  144. package/database/04-antipatterns/database-antipatterns.md +112 -0
  145. package/design/01-standards/ui-design-system-complete.md +423 -0
  146. package/design/02-playbooks/design-handoff-playbook.md +254 -0
  147. package/design/02-playbooks/design-review-playbook.md +388 -0
  148. package/design/03-checklists/design-review-checklist.md +246 -0
  149. package/design/04-antipatterns/design-antipatterns.md +378 -0
  150. package/design/05-cases/case-design-system-adoption.md +328 -0
  151. package/design/06-glossary/design-glossary.md +329 -0
  152. package/design/ui-full-lifecycle-cross-platform-playbook.md +571 -0
  153. package/design/ux-system-deep-dive.md +38 -0
  154. package/design-systems/00-craft-rules.md +71 -0
  155. package/design-systems/aesthetic-families.md +43 -0
  156. package/design-systems/anti-ai-slop.md +162 -0
  157. package/design-systems/bold-geometric.md +120 -0
  158. package/design-systems/brutalist-bold.md +103 -0
  159. package/design-systems/editorial-clean.md +109 -0
  160. package/design-systems/glass-aurora.md +108 -0
  161. package/design-systems/modern-minimal.md +145 -0
  162. package/design-systems/premium-luxury.md +106 -0
  163. package/design-systems/product-type-design-map.md +48 -0
  164. package/design-systems/soft-warm.md +123 -0
  165. package/design-systems/tech-utility.md +113 -0
  166. package/desktop/01-standards/desktop-app-standard.md +72 -0
  167. package/desktop/01-standards/desktop-design.md +71 -0
  168. package/development/00-governance/document-template.md +41 -0
  169. package/development/01-standards/api-versioning-strategies.md +432 -0
  170. package/development/01-standards/authentication-patterns-complete.md +479 -0
  171. package/development/01-standards/css-architecture-complete.md +550 -0
  172. package/development/01-standards/database-migration-strategies.md +484 -0
  173. package/development/01-standards/elasticsearch-complete.md +347 -0
  174. package/development/01-standards/git-complete.md +371 -0
  175. package/development/01-standards/golang-complete.md +1565 -0
  176. package/development/01-standards/graphql-complete.md +298 -0
  177. package/development/01-standards/javascript-bundlers-complete.md +469 -0
  178. package/development/01-standards/javascript-typescript-complete.md +528 -0
  179. package/development/01-standards/jest-complete.md +275 -0
  180. package/development/01-standards/linux-complete.md +234 -0
  181. package/development/01-standards/logging-observability-complete.md +526 -0
  182. package/development/01-standards/microservices-communication.md +502 -0
  183. package/development/01-standards/mongodb-complete.md +406 -0
  184. package/development/01-standards/oauth2-complete.md +285 -0
  185. package/development/01-standards/performance-optimization-complete.md +289 -0
  186. package/development/01-standards/playwright-complete.md +247 -0
  187. package/development/01-standards/postgresql-complete.md +456 -0
  188. package/development/01-standards/pytest-complete.md +340 -0
  189. package/development/01-standards/python-async-programming.md +902 -0
  190. package/development/01-standards/python-complete.md +956 -0
  191. package/development/01-standards/python-decorators-complete.md +799 -0
  192. package/development/01-standards/python-design-patterns.md +2854 -0
  193. package/development/01-standards/python-packaging-distribution.md +420 -0
  194. package/development/01-standards/python-testing-strategies.md +607 -0
  195. package/development/01-standards/python-web-frameworks-comparison.md +471 -0
  196. package/development/01-standards/redis-complete.md +317 -0
  197. package/development/01-standards/rest-api-complete.md +316 -0
  198. package/development/01-standards/rust-complete.md +578 -0
  199. package/development/01-standards/typescript-advanced-types.md +1513 -0
  200. package/development/01-standards/web-security-complete.md +292 -0
  201. package/development/02-playbooks/api-design-playbook.md +810 -0
  202. package/development/02-playbooks/database-migration-playbook.md +580 -0
  203. package/development/02-playbooks/debugging-playbook.md +692 -0
  204. package/development/02-playbooks/feature-delivery-playbook.md +430 -0
  205. package/development/02-playbooks/incident-hotfix-playbook.md +387 -0
  206. package/development/02-playbooks/performance-optimization-playbook.md +531 -0
  207. package/development/02-playbooks/performance-tuning-playbook.md +652 -0
  208. package/development/02-playbooks/refactor-playbook.md +403 -0
  209. package/development/02-playbooks/release-playbook.md +469 -0
  210. package/development/03-checklists/architecture-review-checklist.md +168 -0
  211. package/development/03-checklists/data-migration-checklist.md +157 -0
  212. package/development/03-checklists/oncall-handover-checklist.md +173 -0
  213. package/development/03-checklists/pr-checklist.md +158 -0
  214. package/development/03-checklists/production-readiness-checklist.md +190 -0
  215. package/development/03-checklists/release-readiness-checklist.md +154 -0
  216. package/development/03-checklists/security-review-checklist.md +182 -0
  217. package/development/04-antipatterns/api-antipatterns.md +657 -0
  218. package/development/04-antipatterns/architecture-antipatterns.md +686 -0
  219. package/development/04-antipatterns/backend-antipatterns.md +648 -0
  220. package/development/04-antipatterns/cicd-antipatterns.md +540 -0
  221. package/development/04-antipatterns/code-smell-antipatterns.md +571 -0
  222. package/development/04-antipatterns/data-antipatterns.md +658 -0
  223. package/development/04-antipatterns/database-antipatterns.md +578 -0
  224. package/development/04-antipatterns/frontend-antipatterns.md +635 -0
  225. package/development/04-antipatterns/reliability-antipatterns.md +700 -0
  226. package/development/04-antipatterns/security-antipatterns.md +747 -0
  227. package/development/05-cases/case-api-version-migration.md +428 -0
  228. package/development/05-cases/case-authorization-hardening.md +383 -0
  229. package/development/05-cases/case-bluegreen-rollback.md +466 -0
  230. package/development/05-cases/case-cache-snowball-protection.md +485 -0
  231. package/development/05-cases/case-ci-cd-pipeline.md +544 -0
  232. package/development/05-cases/case-database-scaling.md +500 -0
  233. package/development/05-cases/case-db-hotspot-optimization.md +487 -0
  234. package/development/05-cases/case-incident-mttr-reduction.md +563 -0
  235. package/development/05-cases/case-microservice-migration.md +375 -0
  236. package/development/05-cases/case-performance-optimization.md +406 -0
  237. package/development/05-cases/case-security-incident-response.md +345 -0
  238. package/development/06-glossary/full-stack-glossary.md +166 -0
  239. package/development/09-maturity/quarterly-audit-template.md +35 -0
  240. package/development/11-ui-excellence/ui-aesthetic-system.md +41 -0
  241. package/development/11-ui-excellence/ui-engineering-excellence.md +435 -0
  242. package/development/12-scenarios/development-scenarios-guide.md +565 -0
  243. package/development/13-implementation-assets/implementation-toolkit.md +282 -0
  244. package/development/13-implementation-assets/knowledge-gates-execution.md +43 -0
  245. package/development/14-full-lifecycle/software-lifecycle-gates.md +511 -0
  246. package/development/15-lifecycle-templates/project-templates-collection.md +791 -0
  247. package/development/api-contract-and-versioning-guide.md +36 -0
  248. package/development/api-governance-complete.md +43 -0
  249. package/development/backend-engineering-complete.md +43 -0
  250. package/development/code-review-quality-complete.md +43 -0
  251. package/development/concurrency-reliability-complete.md +43 -0
  252. package/development/database-engineering-complete.md +43 -0
  253. package/development/engineering-effectiveness-complete.md +43 -0
  254. package/development/engineering-standards-deep-dive.md +38 -0
  255. package/development/frontend-engineering-complete.md +43 -0
  256. package/development/performance-capacity-complete.md +43 -0
  257. package/development/refactor-migration-complete.md +42 -0
  258. package/development/refactoring-and-techdebt-playbook.md +37 -0
  259. package/development/security-in-development-complete.md +43 -0
  260. package/devops/01-standards/cicd-pipeline-complete.md +262 -0
  261. package/devops/01-standards/docker-complete.md +1490 -0
  262. package/devops/01-standards/github-actions-complete.md +337 -0
  263. package/devops/01-standards/kubernetes-complete.md +638 -0
  264. package/devops/01-standards/terraform-complete.md +2117 -0
  265. package/devops/02-playbooks/docker-compose-playbook.md +233 -0
  266. package/devops/02-playbooks/docker-k8s-production-playbook.md +186 -0
  267. package/devops/02-playbooks/docker-production-playbook.md +952 -0
  268. package/edge-iot/01-standards/edge-iot-complete.md +473 -0
  269. package/experts/architect/api-design.md +178 -0
  270. package/experts/architect/methodology.md +124 -0
  271. package/experts/architect/security.md +75 -0
  272. package/experts/backend-lead/methodology.md +216 -0
  273. package/experts/devops/methodology.md +160 -0
  274. package/experts/frontend-lead/methodology.md +178 -0
  275. package/experts/product-manager/industry/ecommerce.md +43 -0
  276. package/experts/product-manager/industry/saas.md +40 -0
  277. package/experts/product-manager/methodology.md +97 -0
  278. package/experts/qa-lead/methodology.md +123 -0
  279. package/experts/qa-lead/test-strategy.md +128 -0
  280. package/experts/uiux-designer/methodology.md +125 -0
  281. package/frontend/01-standards/accessibility-complete.md +532 -0
  282. package/frontend/01-standards/accessibility-standard.md +74 -0
  283. package/frontend/01-standards/admin-dashboard-and-crud.md +72 -0
  284. package/frontend/01-standards/design-tokens-complete.md +444 -0
  285. package/frontend/01-standards/forms-and-validation.md +77 -0
  286. package/frontend/01-standards/frontend-architecture-and-layering.md +119 -0
  287. package/frontend/01-standards/i18n-and-localization.md +65 -0
  288. package/frontend/01-standards/nextjs-complete.md +451 -0
  289. package/frontend/01-standards/react-complete.md +713 -0
  290. package/frontend/01-standards/react-hooks-complete-guide.md +1100 -0
  291. package/frontend/01-standards/react-hooks-complete.md +1171 -0
  292. package/frontend/01-standards/seo-and-web-vitals.md +77 -0
  293. package/frontend/01-standards/state-management-complete.md +444 -0
  294. package/frontend/01-standards/vue-complete.md +499 -0
  295. package/frontend/01-standards/vue3-complete.md +2002 -0
  296. package/frontend/01-standards/web-framework-best-practices.md +64 -0
  297. package/frontend/01-standards/web-performance-complete.md +495 -0
  298. package/frontend/02-playbooks/accessibility-a11y-playbook.md +161 -0
  299. package/frontend/02-playbooks/frontend-performance-playbook.md +707 -0
  300. package/frontend/02-playbooks/i18n-internationalization-playbook.md +120 -0
  301. package/frontend/02-playbooks/performance-optimization-playbook.md +163 -0
  302. package/frontend/02-playbooks/react-nextjs-production-playbook.md +167 -0
  303. package/frontend/02-playbooks/react-state-management-playbook.md +173 -0
  304. package/frontend/03-checklists/component-quality-checklist.md +166 -0
  305. package/frontend/03-checklists/frontend-launch-checklist.md +299 -0
  306. package/frontend/04-antipatterns/frontend-antipatterns.md +886 -0
  307. package/frontend/05-cases/case-performance-optimization.md +274 -0
  308. package/harmony/01-standards/harmonyos-arkts-standard.md +75 -0
  309. package/harmony/01-standards/harmonyos-design.md +65 -0
  310. package/high-quality-engineering-playbook.md +54 -0
  311. package/incident/01-standards/incident-response-complete.md +303 -0
  312. package/incident/02-playbooks/chaos-engineering-playbook.md +883 -0
  313. package/incident/02-playbooks/postmortem-playbook.md +398 -0
  314. package/incident/03-checklists/incident-readiness-checklist.md +181 -0
  315. package/incident/04-antipatterns/incident-antipatterns.md +490 -0
  316. package/incident/05-cases/case-cascade-failure.md +176 -0
  317. package/incident/06-glossary/incident-glossary.md +114 -0
  318. package/incident/postmortem-and-response-deep-dive.md +39 -0
  319. package/industries/ecommerce/ecommerce-complete.md +631 -0
  320. package/industries/education/education-complete.md +555 -0
  321. package/industries/fintech/fintech-complete.md +501 -0
  322. package/industries/gaming/gaming-complete.md +587 -0
  323. package/industries/healthcare/healthcare-complete.md +452 -0
  324. package/low-code/01-standards/low-code-complete.md +944 -0
  325. package/miniprogram/01-standards/ai-common-mistakes.md +61 -0
  326. package/miniprogram/01-standards/miniprogram-custom-navbar-capsule.md +77 -0
  327. package/miniprogram/01-standards/miniprogram-design.md +61 -0
  328. package/miniprogram/01-standards/miniprogram-standard.md +81 -0
  329. package/mobile/01-standards/android-material-design.md +70 -0
  330. package/mobile/01-standards/flutter-complete.md +384 -0
  331. package/mobile/01-standards/ios-design-hig.md +78 -0
  332. package/mobile/01-standards/mobile-app-standard.md +85 -0
  333. package/mobile/01-standards/react-native-complete.md +352 -0
  334. package/mobile/02-playbooks/mobile-cross-platform-playbook.md +175 -0
  335. package/mobile/02-playbooks/mobile-performance.md +473 -0
  336. package/mobile/03-checklists/mobile-release-checklist.md +234 -0
  337. package/mobile/04-antipatterns/mobile-antipatterns.md +798 -0
  338. package/mobile/05-cases/case-app-performance.md +500 -0
  339. package/mobile/05-cases/case-app-startup-optimization.md +218 -0
  340. package/mobile/06-glossary/mobile-glossary.md +484 -0
  341. package/observability/01-standards/observability-standards.md +103 -0
  342. package/observability/02-playbooks/prometheus-grafana-playbook.md +135 -0
  343. package/observability/02-playbooks/structured-logging-playbook.md +73 -0
  344. package/observability/03-checklists/observability-checklist.md +54 -0
  345. package/observability/04-antipatterns/observability-antipatterns.md +106 -0
  346. package/operations/01-standards/prometheus-monitoring-complete.md +1578 -0
  347. package/operations/02-playbooks/capacity-planning-playbook.md +620 -0
  348. package/operations/03-checklists/production-launch-checklist.md +365 -0
  349. package/operations/04-antipatterns/operations-antipatterns.md +664 -0
  350. package/operations/05-cases/case-sre-practices.md +581 -0
  351. package/operations/06-glossary/operations-glossary.md +120 -0
  352. package/operations/aiops-anomaly-detection.md +758 -0
  353. package/operations/capacity-planning.md +1061 -0
  354. package/operations/chaos-engineering.md +659 -0
  355. package/operations/incident-command-system.md +38 -0
  356. package/operations/observability-complete.md +442 -0
  357. package/operations/slo-sli-playbook.md +517 -0
  358. package/operations/sre-operations-deep-dive.md +39 -0
  359. package/package.json +8 -0
  360. package/performance/01-standards/performance-and-scalability.md +80 -0
  361. package/performance/01-standards/performance-standards.md +156 -0
  362. package/performance/02-playbooks/query-optimization-playbook.md +103 -0
  363. package/performance/03-checklists/performance-checklist.md +56 -0
  364. package/performance/04-antipatterns/performance-antipatterns.md +146 -0
  365. package/product/01-standards/product-management-complete.md +285 -0
  366. package/product/02-playbooks/feature-launch-playbook.md +207 -0
  367. package/product/02-playbooks/user-research-playbook.md +532 -0
  368. package/product/03-checklists/feature-launch-checklist.md +275 -0
  369. package/product/04-antipatterns/product-antipatterns.md +355 -0
  370. package/product/05-cases/case-mvp-to-scale.md +384 -0
  371. package/product/06-glossary/product-glossary.md +462 -0
  372. package/product/feature-prioritization-framework.md +40 -0
  373. package/product/kpi-and-metric-tree.md +37 -0
  374. package/product/product-discovery-and-prd-deep-dive.md +41 -0
  375. package/quantum/01-standards/quantum-complete.md +1186 -0
  376. package/security/01-standards/api-security-complete.md +511 -0
  377. package/security/01-standards/container-runtime-security.md +574 -0
  378. package/security/01-standards/data-protection-gdpr.md +543 -0
  379. package/security/01-standards/owasp-top10-complete.md +1890 -0
  380. package/security/01-standards/secure-coding-baseline.md +90 -0
  381. package/security/01-standards/supply-chain-security.md +441 -0
  382. package/security/01-standards/web-security-checklist.md +108 -0
  383. package/security/01-standards/zero-trust-architecture.md +521 -0
  384. package/security/02-playbooks/auth-sso-playbook.md +166 -0
  385. package/security/02-playbooks/incident-response-security-playbook.md +588 -0
  386. package/security/02-playbooks/owasp-api-security-playbook.md +129 -0
  387. package/security/02-playbooks/payment-integration-playbook.md +119 -0
  388. package/security/02-playbooks/penetration-testing-playbook.md +517 -0
  389. package/security/03-checklists/security-audit-checklist.md +356 -0
  390. package/security/04-antipatterns/security-coding-antipatterns.md +580 -0
  391. package/security/05-cases/case-log4shell-incident.md +537 -0
  392. package/security/05-cases/case-major-breaches.md +468 -0
  393. package/security/06-glossary/security-glossary.md +212 -0
  394. package/security/compliance-automation.md +993 -0
  395. package/security/container-security.md +680 -0
  396. package/security/devsecops-complete.md +426 -0
  397. package/security/sast-dast-sca.md +775 -0
  398. package/security/secrets-management.md +594 -0
  399. package/security/security-architecture-deep-dive.md +37 -0
  400. package/security/threat-modeling-stride-playbook.md +40 -0
  401. package/seed-templates/auth-system.md +59 -0
  402. package/seed-templates/blog-content.md +94 -0
  403. package/seed-templates/dashboard.md +89 -0
  404. package/seed-templates/docs-site.md +73 -0
  405. package/seed-templates/e-commerce.md +50 -0
  406. package/seed-templates/saas-landing.md +92 -0
  407. package/seed-templates/settings-page.md +51 -0
  408. package/testing/01-standards/test-strategy-and-layering.md +83 -0
  409. package/testing/01-standards/testing-strategy-complete.md +422 -0
  410. package/testing/01-standards/unit-testing-best-practices.md +118 -0
  411. package/testing/02-playbooks/e2e-testing-playbook.md +988 -0
  412. package/testing/02-playbooks/testing-strategy-playbook.md +126 -0
  413. package/testing/03-checklists/test-strategy-checklist.md +208 -0
  414. package/testing/04-antipatterns/testing-antipatterns.md +718 -0
  415. package/testing/05-cases/case-testing-transformation.md +300 -0
  416. package/testing/06-glossary/testing-glossary.md +110 -0
  417. package/testing/risk-based-test-matrix.md +36 -0
  418. package/testing/testing-strategy-deep-dive.md +37 -0
@@ -0,0 +1,780 @@
1
+ ---
2
+ title: 系统迁移作战手册
3
+ version: 1.0.0
4
+ last_updated: 2026-03-28
5
+ owner: architecture-team
6
+ tags: [migration, blue-green, canary, strangler-fig, database-migration, zero-downtime]
7
+ status: production
8
+ domain: architecture
9
+ difficulty: intermediate
10
+ quality_score: 70
11
+ ---
12
+
13
+ # 开发:Excellent(11964948@qq.com)
14
+ # 功能:系统迁移全流程作战手册
15
+ # 作用:指导团队完成系统迁移的评估、规划、执行、验证与切换
16
+ # 创建时间:2026-03-28
17
+ # 最后修改:2026-03-28
18
+
19
+ ## 目标
20
+
21
+ 建立系统迁移标准化流程,确保:
22
+ - 迁移过程零数据丢失(RPO = 0)
23
+ - 业务中断时间 < 5 分钟(零停机迁移场景 < 0)
24
+ - 每个迁移步骤可回滚
25
+ - 迁移后系统功能、性能、数据完整性全量验证通过
26
+ - 迁移过程全程可观测、可审计
27
+
28
+ ## 适用场景
29
+
30
+ - 单体到微服务拆分(Strangler Fig 模式)
31
+ - 数据库迁移(MySQL → PostgreSQL / 单机 → 分布式)
32
+ - 云迁移(IDC → 云 / 云A → 云B)
33
+ - 框架升级(Spring Boot 2 → 3 / Django 3 → 5)
34
+ - 基础设施升级(Kubernetes 版本升级 / 操作系统升级)
35
+ - 第三方服务替换(支付渠道切换 / 短信服务商更换)
36
+
37
+ ## 前置条件
38
+
39
+ ### 必要条件
40
+
41
+ - [ ] 迁移目标与成功标准已明确定义
42
+ - [ ] 迁移范围已确定(系统/数据/接口/配置)
43
+ - [ ] 当前系统已有完整监控(基线数据可对比)
44
+ - [ ] 最新全量备份已完成并验证可恢复
45
+ - [ ] 回滚方案已制定并演练
46
+ - [ ] 迁移时间窗口已与业务方确认
47
+ - [ ] 团队已进行迁移方案培训
48
+
49
+ ### 风险评估矩阵
50
+
51
+ | 风险 | 概率 | 影响 | 缓解措施 |
52
+ |------|------|------|---------|
53
+ | 数据不一致 | 中 | 高 | 双写 + 数据校验脚本 |
54
+ | 性能退化 | 中 | 中 | 灰度发布 + 实时监控 |
55
+ | 依赖方不兼容 | 低 | 高 | 提前联调 + 适配层 |
56
+ | 迁移时间超预期 | 中 | 中 | 分批迁移 + 回滚预案 |
57
+ | 数据迁移失败 | 低 | 高 | 增量同步 + 断点续传 |
58
+
59
+ ---
60
+
61
+ ## 一、评估阶段
62
+
63
+ ### 1.1 现状梳理
64
+
65
+ ```yaml
66
+ 系统画像:
67
+ 服务清单:
68
+ - 列出所有服务/模块及其职责
69
+ - 标注每个服务的技术栈版本
70
+ - 记录服务间依赖关系图
71
+
72
+ 数据资产:
73
+ - 数据库列表(类型/版本/大小/表数量)
74
+ - 数据增长趋势(日/月增量)
75
+ - 数据保留策略
76
+ - 敏感数据分类(PII/金融/医疗)
77
+
78
+ 接口清单:
79
+ - 内部 API(服务间调用)
80
+ - 外部 API(第三方集成)
81
+ - 消息队列 Topic
82
+ - 定时任务
83
+
84
+ 基础设施:
85
+ - 服务器清单(规格/数量/利用率)
86
+ - 网络拓扑
87
+ - 存储配置
88
+ - 证书/密钥清单
89
+ ```
90
+
91
+ ### 1.2 迁移复杂度评估
92
+
93
+ ```yaml
94
+ 评分模型(每项 1-5 分,总分 = 加权和):
95
+ 数据量: weight=3
96
+ 1: < 10 GB
97
+ 3: 10-500 GB
98
+ 5: > 500 GB
99
+
100
+ 服务依赖数: weight=3
101
+ 1: 0-2 个依赖
102
+ 3: 3-5 个依赖
103
+ 5: > 5 个依赖
104
+
105
+ 停机容忍度: weight=5
106
+ 1: 可接受 4+ 小时停机
107
+ 3: 可接受 30 分钟停机
108
+ 5: 零停机要求
109
+
110
+ 数据一致性要求: weight=4
111
+ 1: 最终一致即可
112
+ 3: 短暂不一致可接受(< 5分钟)
113
+ 5: 严格强一致
114
+
115
+ 团队经验: weight=2
116
+ 1: 团队有丰富迁移经验
117
+ 3: 部分成员有经验
118
+ 5: 团队无迁移经验
119
+
120
+ 总分解读:
121
+ < 30: 低复杂度 → 简单停机迁移即可
122
+ 30-50: 中复杂度 → 蓝绿部署/灰度发布
123
+ > 50: 高复杂度 → Strangler Fig + 灰度 + 双写
124
+ ```
125
+
126
+ ### 1.3 迁移策略选择
127
+
128
+ ```yaml
129
+ 蓝绿部署(Blue-Green):
130
+ 适用: 整体切换/新旧环境可完全并行
131
+ 优势: 切换快速(秒级)/回滚简单
132
+ 劣势: 需要双倍资源/数据库迁移复杂
133
+ 典型场景: 应用版本升级/云环境切换
134
+
135
+ 金丝雀发布(Canary Release):
136
+ 适用: 渐进式迁移/风险控制优先
137
+ 优势: 风险可控/逐步放量
138
+ 劣势: 需要流量分配能力/双版本共存期较长
139
+ 典型场景: 核心服务升级/新架构验证
140
+
141
+ Strangler Fig(绞杀者模式):
142
+ 适用: 单体到微服务/长期渐进式迁移
143
+ 优势: 低风险/按功能模块迁移
144
+ 劣势: 过渡期维护成本高/需要请求路由层
145
+ 典型场景: 遗留系统现代化
146
+
147
+ 大爆炸迁移(Big Bang):
148
+ 适用: 系统简单/可接受停机/无增量方案
149
+ 优势: 一次到位/无需维护双系统
150
+ 劣势: 风险集中/回滚困难
151
+ 典型场景: 小型内部系统/非核心服务
152
+ ```
153
+
154
+ ---
155
+
156
+ ## 二、规划阶段
157
+
158
+ ### 2.1 迁移计划制定
159
+
160
+ ```yaml
161
+ 里程碑规划:
162
+ M1 - 环境准备(第 1-2 周):
163
+ - 目标环境搭建
164
+ - 网络打通与安全组配置
165
+ - 监控与告警部署
166
+ - 自动化脚本编写
167
+
168
+ M2 - 数据迁移(第 3-4 周):
169
+ - 全量数据同步
170
+ - 增量同步机制建立
171
+ - 数据校验工具开发
172
+ - 数据校验通过
173
+
174
+ M3 - 应用迁移(第 5-6 周):
175
+ - 应用部署到新环境
176
+ - 配置调整与适配
177
+ - 功能回归测试
178
+ - 性能基准测试
179
+
180
+ M4 - 灰度切换(第 7 周):
181
+ - 1% 流量切换 + 24h 观察
182
+ - 10% 流量切换 + 24h 观察
183
+ - 50% 流量切换 + 48h 观察
184
+ - 100% 流量切换
185
+
186
+ M5 - 善后清理(第 8 周):
187
+ - 旧环境保留 7 天(回滚窗口)
188
+ - 旧环境下线
189
+ - 文档更新
190
+ - 复盘会议
191
+ ```
192
+
193
+ ### 2.2 数据迁移方案
194
+
195
+ ```yaml
196
+ 全量迁移:
197
+ # PostgreSQL → PostgreSQL(跨版本/跨实例)
198
+ 方案A - pg_dump/pg_restore:
199
+ 导出: pg_dump -Fc -j 8 -h old-host -d production > full-backup.dump
200
+ 导入: pg_restore -j 8 -h new-host -d production full-backup.dump
201
+ 适用: 数据量 < 100GB
202
+ 耗时: ~1GB/分钟(取决于网络和磁盘)
203
+
204
+ 方案B - 逻辑复制:
205
+ # 老库配置
206
+ ALTER SYSTEM SET wal_level = 'logical';
207
+ # 创建 Publication
208
+ CREATE PUBLICATION migration_pub FOR ALL TABLES;
209
+ # 新库订阅
210
+ CREATE SUBSCRIPTION migration_sub
211
+ CONNECTION 'host=old-host dbname=production'
212
+ PUBLICATION migration_pub;
213
+ 适用: 零停机迁移,数据量不限
214
+ 注意: DDL 不会自动同步,需手动在新库执行
215
+
216
+ 方案C - 异构迁移(MySQL → PostgreSQL):
217
+ 工具: pgloader
218
+ 命令: pgloader mysql://user:pass@old-host/db postgresql://user:pass@new-host/db
219
+ 注意: 数据类型映射需提前验证
220
+
221
+ 增量同步:
222
+ 方案A - CDC(Change Data Capture):
223
+ 工具: Debezium
224
+ 流程: 源DB → Debezium → Kafka → 目标DB
225
+ 配置示例:
226
+ connector.class: io.debezium.connector.postgresql.PostgresConnector
227
+ database.hostname: old-host
228
+ database.port: 5432
229
+ database.dbname: production
230
+ slot.name: debezium_migration
231
+ plugin.name: pgoutput
232
+
233
+ 方案B - 双写:
234
+ 流程: 应用同时写老库和新库
235
+ 实现: 在 Repository 层添加双写逻辑
236
+ 风险: 事务一致性需额外处理
237
+ 适用: CDC 不可用时的降级方案
238
+ ```
239
+
240
+ ### 2.3 数据校验方案
241
+
242
+ ```bash
243
+ # 行数校验
244
+ echo "--- 行数对比 ---"
245
+ for table in users orders products payments; do
246
+ old_count=$(psql -h old-host -d production -t -c "SELECT count(*) FROM ${table}")
247
+ new_count=$(psql -h new-host -d production -t -c "SELECT count(*) FROM ${table}")
248
+ echo "${table}: old=${old_count} new=${new_count} match=$([ $old_count -eq $new_count ] && echo YES || echo NO)"
249
+ done
250
+
251
+ # 校验和对比(采样)
252
+ echo "--- 校验和对比 ---"
253
+ for table in users orders products; do
254
+ old_md5=$(psql -h old-host -d production -t -c "SELECT md5(string_agg(t::text, '')) FROM (SELECT * FROM ${table} ORDER BY id LIMIT 10000) t")
255
+ new_md5=$(psql -h new-host -d production -t -c "SELECT md5(string_agg(t::text, '')) FROM (SELECT * FROM ${table} ORDER BY id LIMIT 10000) t")
256
+ echo "${table}: $([ "$old_md5" = "$new_md5" ] && echo MATCH || echo MISMATCH)"
257
+ done
258
+
259
+ # 业务关键指标对比
260
+ echo "--- 业务指标对比 ---"
261
+ for query in \
262
+ "SELECT sum(total_amount) FROM orders WHERE created_at > now() - interval '24h'" \
263
+ "SELECT count(DISTINCT user_id) FROM orders WHERE created_at > now() - interval '24h'" \
264
+ "SELECT count(*) FROM users WHERE status = 'active'"; do
265
+ old_val=$(psql -h old-host -d production -t -c "$query")
266
+ new_val=$(psql -h new-host -d production -t -c "$query")
267
+ echo "old=${old_val} new=${new_val}"
268
+ done
269
+ ```
270
+
271
+ ---
272
+
273
+ ## 三、执行阶段
274
+
275
+ ### 3.1 蓝绿部署执行
276
+
277
+ ```yaml
278
+ 前提:
279
+ - Green 环境已完全就绪(应用 + 数据 + 配置)
280
+ - 数据同步延迟 < 1 秒
281
+ - Green 环境通过全量回归测试
282
+ - 监控大盘已同时覆盖 Blue 和 Green
283
+
284
+ 切换步骤:
285
+ 1. 确认 Green 环境健康:
286
+ - 所有 Pod/实例 Running
287
+ - 健康检查全部通过
288
+ - 数据同步追平
289
+
290
+ 2. 停止增量同步(如使用逻辑复制):
291
+ # 记录 LSN 位点
292
+ psql -h old-host -c "SELECT pg_current_wal_lsn()"
293
+ # 确认新库追平
294
+ psql -h new-host -c "SELECT * FROM pg_stat_subscription"
295
+
296
+ 3. DNS/负载均衡切换:
297
+ # AWS Route53 加权路由
298
+ aws route53 change-resource-record-sets --hosted-zone-id Z123 \
299
+ --change-batch '{
300
+ "Changes": [{
301
+ "Action": "UPSERT",
302
+ "ResourceRecordSet": {
303
+ "Name": "api.target.com",
304
+ "Type": "CNAME",
305
+ "SetIdentifier": "green",
306
+ "Weight": 100,
307
+ "TTL": 60,
308
+ "ResourceRecords": [{"Value": "green-lb.example.com"}]
309
+ }
310
+ }]
311
+ }'
312
+
313
+ # 或 Nginx 上游切换
314
+ # upstream backend { server green-host:8080; }
315
+ # nginx -s reload
316
+
317
+ 4. 验证切换成功:
318
+ - 实时监控错误率(< 0.1%)
319
+ - 检查请求是否到达 Green 环境
320
+ - 核心业务流程端到端验证
321
+
322
+ 5. 保留 Blue 环境 7 天作为回滚后备
323
+ ```
324
+
325
+ ### 3.2 金丝雀发布执行
326
+
327
+ ```yaml
328
+ # Kubernetes + Istio 金丝雀示例
329
+ 流量分配步骤:
330
+
331
+ 阶段 1 - 1% 流量(观察 24h):
332
+ apiVersion: networking.istio.io/v1beta1
333
+ kind: VirtualService
334
+ metadata:
335
+ name: api-service
336
+ spec:
337
+ hosts:
338
+ - api-service
339
+ http:
340
+ - route:
341
+ - destination:
342
+ host: api-service
343
+ subset: stable
344
+ weight: 99
345
+ - destination:
346
+ host: api-service
347
+ subset: canary
348
+ weight: 1
349
+
350
+ 阶段 2 - 10% 流量(观察 24h):
351
+ # weight: stable=90, canary=10
352
+
353
+ 阶段 3 - 50% 流量(观察 48h):
354
+ # weight: stable=50, canary=50
355
+
356
+ 阶段 4 - 100% 流量:
357
+ # weight: stable=0, canary=100
358
+ # 确认稳定后,将 canary 标记为 stable
359
+
360
+ 观察指标(每个阶段):
361
+ - 错误率对比(canary vs stable)
362
+ - P99 延迟对比
363
+ - 资源利用率
364
+ - 业务指标(转化率/成功率)
365
+ - 用户反馈/客诉
366
+
367
+ 晋级条件:
368
+ - 错误率 canary <= stable × 1.1
369
+ - P99 延迟 canary <= stable × 1.2
370
+ - 无 P0/P1 告警
371
+ - 业务指标无显著下降
372
+ ```
373
+
374
+ ### 3.3 Strangler Fig 执行
375
+
376
+ ```yaml
377
+ 核心原理:
378
+ 新系统逐步接管老系统的功能,老系统逐步被"绞杀"直到完全退役。
379
+ 通过请求路由层(API Gateway / Proxy)控制流量分配。
380
+
381
+ 执行步骤:
382
+
383
+ 阶段 1 - 部署路由层:
384
+ # Nginx 作为路由层示例
385
+ upstream old_system { server old-host:8080; }
386
+ upstream new_user_service { server new-user:8080; }
387
+ upstream new_product_service { server new-product:8080; }
388
+
389
+ server {
390
+ listen 80;
391
+
392
+ # 已迁移的模块 → 新系统
393
+ location /api/v1/users {
394
+ proxy_pass http://new_user_service;
395
+ }
396
+
397
+ # 未迁移的模块 → 老系统
398
+ location / {
399
+ proxy_pass http://old_system;
400
+ }
401
+ }
402
+
403
+ 阶段 2 - 逐模块迁移:
404
+ 迁移顺序(按风险/依赖排序):
405
+ 1. 用户模块(依赖少,独立性高)
406
+ 2. 商品模块(被订单依赖,需先迁移)
407
+ 3. 搜索模块(可独立运行)
408
+ 4. 订单模块(核心,最后迁移)
409
+ 5. 支付模块(核心,最后迁移)
410
+
411
+ 每个模块迁移流程:
412
+ a. 新服务开发并通过测试
413
+ b. 数据迁移/同步
414
+ c. 灰度切换流量(1% → 10% → 50% → 100%)
415
+ d. 老模块代码标记为 deprecated
416
+ e. 数据同步反向验证
417
+ f. 确认稳定后移除老模块路由
418
+
419
+ 阶段 3 - 老系统退役:
420
+ - 所有模块迁移完成
421
+ - 老系统保持只读运行 14 天
422
+ - 确认无遗漏流量
423
+ - 下线老系统
424
+ - 清理老系统基础设施
425
+ ```
426
+
427
+ ### 3.4 数据库迁移执行
428
+
429
+ ```bash
430
+ # 零停机数据库迁移示例(PostgreSQL 版本升级 14 → 16)
431
+
432
+ # 步骤 1: 新实例搭建
433
+ # 使用 RDS/CloudSQL 创建新版本实例
434
+ aws rds create-db-instance \
435
+ --db-instance-identifier prod-pg16 \
436
+ --engine postgres \
437
+ --engine-version 16.2 \
438
+ --db-instance-class db.r6g.xlarge \
439
+ --allocated-storage 500
440
+
441
+ # 步骤 2: 建立逻辑复制
442
+ # 老库
443
+ psql -h old-host -d production -c "
444
+ ALTER SYSTEM SET wal_level = 'logical';
445
+ SELECT pg_reload_conf();
446
+ CREATE PUBLICATION full_migration FOR ALL TABLES;
447
+ "
448
+
449
+ # 新库(先创建相同的表结构)
450
+ pg_dump -h old-host -d production --schema-only | psql -h new-host -d production
451
+
452
+ psql -h new-host -d production -c "
453
+ CREATE SUBSCRIPTION full_migration
454
+ CONNECTION 'host=old-host port=5432 dbname=production user=repl_user password=xxx'
455
+ PUBLICATION full_migration;
456
+ "
457
+
458
+ # 步骤 3: 监控复制状态
459
+ watch -n 5 'psql -h new-host -d production -c "
460
+ SELECT subname, received_lsn, latest_end_lsn, latest_end_time
461
+ FROM pg_stat_subscription;
462
+ "'
463
+
464
+ # 步骤 4: 验证数据一致性
465
+ # 执行上文的数据校验脚本
466
+
467
+ # 步骤 5: 切换(维护窗口内)
468
+ # a. 应用停止写入(或切为只读模式)
469
+ # b. 等待复制追平(lag = 0)
470
+ psql -h new-host -c "SELECT * FROM pg_stat_subscription" # 确认无延迟
471
+ # c. 应用连接串切换到新库
472
+ # d. 验证应用正常
473
+ # e. 删除逻辑复制
474
+ psql -h new-host -c "DROP SUBSCRIPTION full_migration"
475
+ psql -h old-host -c "DROP PUBLICATION full_migration"
476
+ ```
477
+
478
+ ---
479
+
480
+ ## 四、验证阶段
481
+
482
+ ### 4.1 功能验证
483
+
484
+ ```yaml
485
+ 验证矩阵:
486
+ 冒烟测试(切换后 5 分钟内):
487
+ - 核心 API 健康检查通过
488
+ - 登录/注册流程正常
489
+ - 核心查询返回正确数据
490
+ - 写入操作正常(创建订单/更新信息)
491
+
492
+ 回归测试(切换后 1 小时内):
493
+ - 自动化测试套件全量执行
494
+ - 覆盖所有核心业务用例
495
+ - 第三方集成接口联调验证
496
+ - 定时任务正常触发
497
+
498
+ 端到端验证(切换后 4 小时内):
499
+ - 完整业务流程走通(下单→支付→发货→签收)
500
+ - 边界条件测试(大数据量/并发/异常输入)
501
+ - 多端验证(Web/App/小程序/API)
502
+ ```
503
+
504
+ ### 4.2 性能验证
505
+
506
+ ```bash
507
+ # 基准对比测试
508
+ # 使用迁移前相同的压测脚本和参数
509
+
510
+ # k6 压测
511
+ k6 run --env TARGET=https://new-api.target.com load-test.js
512
+
513
+ # 对比关键指标
514
+ echo "=== 性能对比 ==="
515
+ echo "指标 | 迁移前 | 迁移后 | 差异"
516
+ echo "P50 延迟 | 45ms | ?ms | "
517
+ echo "P99 延迟 | 230ms | ?ms | "
518
+ echo "吞吐量(RPS) | 5200 | ? | "
519
+ echo "错误率 | 0.02% | ? | "
520
+ echo "CPU 利用率 | 65% | ? | "
521
+ echo "内存利用率 | 72% | ? | "
522
+
523
+ # 验收标准
524
+ # - P99 延迟不超过迁移前的 120%
525
+ # - 吞吐量不低于迁移前的 90%
526
+ # - 错误率不超过迁移前的 110%
527
+ ```
528
+
529
+ ### 4.3 数据完整性验证
530
+
531
+ ```bash
532
+ # 最终一致性校验(切换后执行)
533
+
534
+ # 1. 全量行数校验
535
+ echo "=== 全量行数校验 ==="
536
+ psql -h new-host -d production -c "
537
+ SELECT tablename, n_live_tup
538
+ FROM pg_stat_user_tables
539
+ ORDER BY n_live_tup DESC;
540
+ "
541
+
542
+ # 2. 关键业务数据校验
543
+ echo "=== 业务数据校验 ==="
544
+ psql -h new-host -d production -c "
545
+ -- 用户总数
546
+ SELECT 'users' as entity, count(*) as total FROM users
547
+ UNION ALL
548
+ -- 活跃订单数
549
+ SELECT 'active_orders', count(*) FROM orders WHERE status NOT IN ('CANCELLED', 'REFUNDED')
550
+ UNION ALL
551
+ -- 商品总数
552
+ SELECT 'products', count(*) FROM products WHERE deleted_at IS NULL
553
+ UNION ALL
554
+ -- 今日交易额
555
+ SELECT 'today_revenue', COALESCE(sum(total_amount), 0)::text::bigint FROM orders WHERE created_at > CURRENT_DATE;
556
+ "
557
+
558
+ # 3. 外键完整性校验
559
+ echo "=== 外键完整性 ==="
560
+ psql -h new-host -d production -c "
561
+ -- 孤儿订单(user_id 不存在)
562
+ SELECT count(*) as orphan_orders FROM orders o
563
+ WHERE NOT EXISTS (SELECT 1 FROM users u WHERE u.id = o.user_id);
564
+
565
+ -- 孤儿支付记录
566
+ SELECT count(*) as orphan_payments FROM payments p
567
+ WHERE NOT EXISTS (SELECT 1 FROM orders o WHERE o.id = p.order_id);
568
+ "
569
+ ```
570
+
571
+ ---
572
+
573
+ ## 五、切换与收尾
574
+
575
+ ### 5.1 切换日 Runbook
576
+
577
+ ```yaml
578
+ 切换日流程(以蓝绿部署为例):
579
+
580
+ T-60min:
581
+ - [ ] 团队全员就位(开发/运维/DBA/产品/客服)
582
+ - [ ] 监控大盘打开(新旧环境并排)
583
+ - [ ] 回滚脚本就绪并测试过
584
+ - [ ] 通知业务方即将切换
585
+
586
+ T-30min:
587
+ - [ ] 最终数据校验通过
588
+ - [ ] Green 环境健康检查全部通过
589
+ - [ ] 数据同步延迟 < 1 秒
590
+
591
+ T-0(切换):
592
+ - [ ] 执行流量切换
593
+ - [ ] 确认流量到达 Green 环境
594
+ - [ ] 冒烟测试通过
595
+
596
+ T+5min:
597
+ - [ ] 核心 API 错误率 < 0.1%
598
+ - [ ] P99 延迟在正常范围
599
+ - [ ] 无 P0/P1 告警
600
+
601
+ T+30min:
602
+ - [ ] 自动化回归测试通过
603
+ - [ ] 业务核心指标正常
604
+ - [ ] 通知业务方切换完成
605
+
606
+ T+24h:
607
+ - [ ] 持续监控无异常
608
+ - [ ] 用户反馈/客诉正常
609
+
610
+ T+7d:
611
+ - [ ] 旧环境下线确认
612
+ - [ ] 清理临时资源(同步任务/中间件/临时配置)
613
+ - [ ] 更新架构文档/运维手册
614
+ - [ ] 迁移复盘会议
615
+ ```
616
+
617
+ ### 5.2 善后清理
618
+
619
+ ```bash
620
+ # 1. 停止数据同步
621
+ psql -h new-host -c "DROP SUBSCRIPTION IF EXISTS full_migration"
622
+ psql -h old-host -c "DROP PUBLICATION IF EXISTS full_migration"
623
+ psql -h old-host -c "SELECT pg_drop_replication_slot('migration_slot')"
624
+
625
+ # 2. 清理旧环境 DNS
626
+ aws route53 change-resource-record-sets --hosted-zone-id Z123 \
627
+ --change-batch '{
628
+ "Changes": [{
629
+ "Action": "DELETE",
630
+ "ResourceRecordSet": {
631
+ "Name": "old-api.target.com",
632
+ "Type": "CNAME",
633
+ "TTL": 300,
634
+ "ResourceRecords": [{"Value": "old-lb.example.com"}]
635
+ }
636
+ }]
637
+ }'
638
+
639
+ # 3. 旧环境资源回收(确认 7 天无回滚需求后)
640
+ # Kubernetes
641
+ kubectl delete namespace old-production
642
+ # 或 AWS
643
+ aws rds delete-db-instance --db-instance-identifier prod-pg14-old --skip-final-snapshot
644
+ aws ec2 terminate-instances --instance-ids i-old1 i-old2 i-old3
645
+
646
+ # 4. 更新配置管理
647
+ # - 移除旧环境的监控告警
648
+ # - 更新 CI/CD 管道
649
+ # - 更新内部 Wiki/文档
650
+ ```
651
+
652
+ ---
653
+
654
+ ## 六、回滚
655
+
656
+ ### 6.1 回滚决策标准
657
+
658
+ ```yaml
659
+ 立即回滚(任一触发):
660
+ - 核心 API 错误率 > 5% 持续 5 分钟
661
+ - P99 延迟 > 迁移前 3 倍持续 10 分钟
662
+ - 数据不一致被确认(丢数据/脏数据)
663
+ - P0 告警且 15 分钟内无法修复
664
+
665
+ 考虑回滚:
666
+ - 核心 API 错误率 1%-5% 持续 15 分钟
667
+ - P99 延迟 > 迁移前 2 倍持续 30 分钟
668
+ - 非核心功能异常影响用户体验
669
+ - 第三方集成方报告异常
670
+
671
+ 不回滚(现场修复):
672
+ - 错误率 < 1% 且可快速定位
673
+ - 性能略有下降但在 SLA 内
674
+ - 非核心功能的已知兼容问题
675
+ ```
676
+
677
+ ### 6.2 回滚步骤
678
+
679
+ ```bash
680
+ # 蓝绿部署回滚(秒级)
681
+ # 将流量切回 Blue 环境
682
+ aws route53 change-resource-record-sets --hosted-zone-id Z123 \
683
+ --change-batch '{
684
+ "Changes": [{
685
+ "Action": "UPSERT",
686
+ "ResourceRecordSet": {
687
+ "Name": "api.target.com",
688
+ "Type": "CNAME",
689
+ "SetIdentifier": "blue",
690
+ "Weight": 100,
691
+ "TTL": 60,
692
+ "ResourceRecords": [{"Value": "blue-lb.example.com"}]
693
+ }
694
+ }]
695
+ }'
696
+
697
+ # 金丝雀回滚
698
+ kubectl apply -f - <<EOF
699
+ apiVersion: networking.istio.io/v1beta1
700
+ kind: VirtualService
701
+ metadata:
702
+ name: api-service
703
+ spec:
704
+ hosts:
705
+ - api-service
706
+ http:
707
+ - route:
708
+ - destination:
709
+ host: api-service
710
+ subset: stable
711
+ weight: 100
712
+ - destination:
713
+ host: api-service
714
+ subset: canary
715
+ weight: 0
716
+ EOF
717
+
718
+ # 数据库回滚(如已切换写入到新库)
719
+ # 1. 应用切回老库连接串
720
+ # 2. 将新库中的增量数据同步回老库
721
+ # 3. 如数据量少,可手动 SQL 补齐
722
+ # 4. 如数据量大,需建立反向逻辑复制
723
+
724
+ # Strangler Fig 回滚(模块级别)
725
+ # 将路由规则中该模块指回老系统
726
+ sed -i 's|proxy_pass http://new_user_service|proxy_pass http://old_system|' /etc/nginx/conf.d/migration.conf
727
+ nginx -s reload
728
+ ```
729
+
730
+ ### 6.3 回滚后处理
731
+
732
+ ```yaml
733
+ 回滚后必做:
734
+ 1. 确认回滚成功:
735
+ - 流量已回到旧环境
736
+ - 核心指标恢复正常
737
+ - 数据一致性确认
738
+
739
+ 2. 根因分析:
740
+ - 收集迁移期间的日志与监控数据
741
+ - 定位失败原因
742
+ - 评估修复工作量
743
+
744
+ 3. 制定修复计划:
745
+ - 修复根因
746
+ - 更新迁移方案
747
+ - 安排第二次迁移窗口
748
+
749
+ 4. 复盘与改进:
750
+ - 回滚原因归档
751
+ - 更新迁移检查清单
752
+ - 团队知识分享
753
+ ```
754
+
755
+ ---
756
+
757
+ ## Agent Checklist
758
+
759
+ 供自动化 Agent 在执行系统迁移流程时逐项核查:
760
+
761
+ - [ ] 迁移目标与成功标准已明确定义
762
+ - [ ] 现状梳理完成(服务/数据/接口/基础设施)
763
+ - [ ] 迁移复杂度已评估
764
+ - [ ] 迁移策略已选择(蓝绿/金丝雀/Strangler Fig/大爆炸)
765
+ - [ ] 迁移计划已制定(里程碑/时间线/责任人)
766
+ - [ ] 数据迁移方案已确定(全量+增量)
767
+ - [ ] 数据校验方案已准备(行数/校验和/业务指标)
768
+ - [ ] 目标环境已搭建并通过验证
769
+ - [ ] 全量数据迁移已完成
770
+ - [ ] 增量同步已建立且延迟可接受
771
+ - [ ] 功能回归测试已通过
772
+ - [ ] 性能基准测试已通过
773
+ - [ ] 回滚方案已演练
774
+ - [ ] 切换日 Runbook 已制定
775
+ - [ ] 流量切换已按计划执行
776
+ - [ ] 切换后冒烟测试通过
777
+ - [ ] 切换后数据完整性验证通过
778
+ - [ ] 切换后 24h 持续监控无异常
779
+ - [ ] 旧环境已在回滚窗口后安全下线
780
+ - [ ] 迁移复盘已完成并归档