@umacloud/knowledge 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. package/00-governance/governance-capabilities.md +557 -0
  2. package/00-governance/knowledge-map.md +39 -0
  3. package/00-governance/maintenance-policy.md +76 -0
  4. package/00-governance/review-checklist.md +81 -0
  5. package/README.md +13 -0
  6. package/ai/01-standards/agent-development-complete.md +691 -0
  7. package/ai/01-standards/llm-application-complete.md +488 -0
  8. package/ai/01-standards/mlops-complete.md +798 -0
  9. package/ai/01-standards/prompt-engineering-complete.md +646 -0
  10. package/ai/01-standards/rag-architecture-complete.md +649 -0
  11. package/ai/02-playbooks/llm-evaluation-playbook.md +847 -0
  12. package/ai/03-checklists/ai-project-checklist.md +215 -0
  13. package/ai/04-antipatterns/ai-antipatterns.md +661 -0
  14. package/ai/05-cases/case-rag-production.md +147 -0
  15. package/ai/06-glossary/ai-glossary.md +162 -0
  16. package/ai/agent-evaluation-benchmark.md +53 -0
  17. package/ai/ai-agent-memory-context-management.md +41 -0
  18. package/ai/ai-cost-capacity-optimization-playbook.md +42 -0
  19. package/ai/ai-data-security-and-compliance-playbook.md +37 -0
  20. package/ai/ai-domain-index-and-checklist.md +40 -0
  21. package/ai/ai-governance-maturity-model.md +50 -0
  22. package/ai/ai-model-selection-and-routing-strategy.md +47 -0
  23. package/ai/ai-observability-and-oncall-runbook.md +52 -0
  24. package/ai/ai-rag-engineering-playbook.md +42 -0
  25. package/ai/ai-red-team-and-safety-evaluation.md +42 -0
  26. package/ai/ai-release-readiness-and-rollback-gate.md +42 -0
  27. package/ai/llm-agent-engineering-deep-dive.md +57 -0
  28. package/ai/prompt-and-tool-guardrails.md +52 -0
  29. package/api/01-standards/enterprise-api-standards.md +198 -0
  30. package/api/01-standards/rest-api-design-guide.md +63 -0
  31. package/api/02-playbooks/api-pagination-playbook.md +93 -0
  32. package/api/02-playbooks/graphql-production-playbook.md +176 -0
  33. package/api/03-checklists/api-review-checklist.md +55 -0
  34. package/api/04-antipatterns/api-antipatterns.md +112 -0
  35. package/architecture/01-standards/api-gateway-patterns.md +496 -0
  36. package/architecture/01-standards/cloud-native-patterns.md +644 -0
  37. package/architecture/01-standards/distributed-systems-patterns.md +591 -0
  38. package/architecture/01-standards/event-driven-architecture.md +595 -0
  39. package/architecture/01-standards/microservices-patterns-complete.md +968 -0
  40. package/architecture/01-standards/microservices-patterns.md +495 -0
  41. package/architecture/01-standards/system-design-interview.md +664 -0
  42. package/architecture/02-playbooks/microservices-patterns-playbook.md +137 -0
  43. package/architecture/02-playbooks/migration-playbook.md +780 -0
  44. package/architecture/02-playbooks/system-design-playbook.md +779 -0
  45. package/architecture/03-checklists/architecture-decision-checklist.md +297 -0
  46. package/architecture/04-antipatterns/architecture-antipatterns.md +417 -0
  47. package/architecture/05-cases/case-netflix-microservices.md +413 -0
  48. package/architecture/06-glossary/architecture-glossary.md +164 -0
  49. package/architecture/adr-template-and-examples.md +38 -0
  50. package/architecture/api-gateway-deep-dive.md +1291 -0
  51. package/architecture/configuration-management.md +1162 -0
  52. package/architecture/distributed-transactions.md +1220 -0
  53. package/architecture/microservices-complete.md +735 -0
  54. package/architecture/resilience-and-disaster-patterns.md +37 -0
  55. package/architecture/service-governance.md +1198 -0
  56. package/architecture/system-architecture-deep-dive.md +37 -0
  57. package/backend/01-standards/analytics-and-growth.md +65 -0
  58. package/backend/01-standards/api-and-error-conventions.md +120 -0
  59. package/backend/01-standards/application-layering-and-packaging.md +160 -0
  60. package/backend/01-standards/auth-implementation.md +104 -0
  61. package/backend/01-standards/backend-framework-idioms.md +74 -0
  62. package/backend/01-standards/background-jobs-and-async.md +66 -0
  63. package/backend/01-standards/caching-strategies-complete.md +390 -0
  64. package/backend/01-standards/config-and-observability.md +77 -0
  65. package/backend/01-standards/data-modeling-and-persistence.md +94 -0
  66. package/backend/01-standards/django-complete.md +1765 -0
  67. package/backend/01-standards/email-and-notifications.md +64 -0
  68. package/backend/01-standards/fastapi-complete.md +925 -0
  69. package/backend/01-standards/file-upload-and-storage.md +66 -0
  70. package/backend/01-standards/graphql-api-complete.md +416 -0
  71. package/backend/01-standards/llm-application-standard.md +78 -0
  72. package/backend/01-standards/message-queue-patterns.md +379 -0
  73. package/backend/01-standards/microservices-and-distributed.md +78 -0
  74. package/backend/01-standards/nestjs-complete.md +2167 -0
  75. package/backend/01-standards/payment-integration.md +80 -0
  76. package/backend/01-standards/rate-limiting-complete.md +451 -0
  77. package/backend/01-standards/realtime-and-websocket.md +65 -0
  78. package/backend/01-standards/search-and-filtering.md +64 -0
  79. package/backend/01-standards/spring-boot-complete.md +445 -0
  80. package/backend/02-playbooks/api-design-playbook.md +718 -0
  81. package/backend/02-playbooks/email-send-playbook.md +130 -0
  82. package/backend/02-playbooks/file-upload-s3-playbook.md +153 -0
  83. package/backend/02-playbooks/typescript-enterprise-playbook.md +133 -0
  84. package/backend/02-playbooks/websocket-realtime-playbook.md +154 -0
  85. package/backend/03-checklists/api-launch-checklist.md +189 -0
  86. package/backend/04-antipatterns/backend-antipatterns.md +1051 -0
  87. package/blockchain/01-standards/blockchain-basics.md +557 -0
  88. package/blockchain/01-standards/smart-contract-development.md +1315 -0
  89. package/cicd/01-standards/deployment-and-delivery-standard.md +96 -0
  90. package/cicd/01-standards/github-actions-complete.md +473 -0
  91. package/cicd/01-standards/release-and-store-submission.md +75 -0
  92. package/cicd/02-playbooks/cicd-pipeline-playbook.md +144 -0
  93. package/cicd/02-playbooks/release-management-playbook.md +605 -0
  94. package/cicd/03-checklists/pipeline-security-checklist.md +168 -0
  95. package/cicd/04-antipatterns/cicd-antipatterns.md +589 -0
  96. package/cicd/05-cases/case-deployment-automation.md +221 -0
  97. package/cicd/05-cases/case-gitops-transformation.md +212 -0
  98. package/cicd/06-glossary/cicd-glossary.md +114 -0
  99. package/cicd/cicd-blueprint-deep-dive.md +38 -0
  100. package/cicd/release-readiness-gate.md +37 -0
  101. package/cloud-native/01-standards/container-security.md +741 -0
  102. package/cloud-native/01-standards/kubernetes-complete.md +812 -0
  103. package/cloud-native/02-playbooks/api-gateway-playbook.md +155 -0
  104. package/cloud-native/02-playbooks/gitops-with-argocd.md +760 -0
  105. package/cloud-native/02-playbooks/k8s-troubleshooting-playbook.md +1942 -0
  106. package/cloud-native/02-playbooks/message-queue-playbook.md +129 -0
  107. package/cloud-native/02-playbooks/multicloud-governance.md +726 -0
  108. package/cloud-native/02-playbooks/serverless-patterns.md +788 -0
  109. package/cloud-native/02-playbooks/service-mesh-playbook.md +612 -0
  110. package/cloud-native/02-playbooks/terraform-iac-playbook.md +143 -0
  111. package/cloud-native/03-checklists/container-security-checklist.md +431 -0
  112. package/cloud-native/03-checklists/k8s-production-readiness-checklist.md +460 -0
  113. package/cloud-native/04-antipatterns/container-antipatterns.md +660 -0
  114. package/cloud-native/04-antipatterns/k8s-antipatterns.md +743 -0
  115. package/cloud-native/05-cases/case-k8s-migration.md +478 -0
  116. package/cloud-native/05-cases/case-k8s-scaling.md +642 -0
  117. package/cloud-native/05-cases/case-k8s-security-incident.md +397 -0
  118. package/cloud-native/06-glossary/cloud-native-glossary.md +337 -0
  119. package/cross-platform/01-standards/cross-platform-frameworks.md +83 -0
  120. package/cross-platform/01-standards/platform-selection-and-architecture.md +77 -0
  121. package/data/01-standards/elasticsearch-complete.md +2098 -0
  122. package/data/01-standards/postgresql-complete.md +1613 -0
  123. package/data/01-standards/redis-complete.md +1527 -0
  124. package/data/02-playbooks/database-optimization-playbook.md +403 -0
  125. package/data/02-playbooks/elasticsearch-production-playbook.md +132 -0
  126. package/data/03-checklists/database-launch-checklist.md +187 -0
  127. package/data/04-antipatterns/database-antipatterns.md +873 -0
  128. package/data/05-cases/case-database-migration.md +310 -0
  129. package/data/06-glossary/database-glossary.md +440 -0
  130. package/data/data-governance-and-modeling-deep-dive.md +39 -0
  131. package/data-engineering/01-standards/airflow-complete.md +523 -0
  132. package/data-engineering/01-standards/kafka-complete.md +1521 -0
  133. package/data-engineering/02-playbooks/spark-etl-playbook.md +496 -0
  134. package/data-engineering/03-checklists/pipeline-launch-checklist.md +194 -0
  135. package/data-engineering/04-antipatterns/data-pipeline-antipatterns.md +684 -0
  136. package/data-engineering/05-cases/case-real-time-pipeline.md +355 -0
  137. package/data-engineering/06-glossary/data-engineering-glossary.md +429 -0
  138. package/database/01-standards/database-schema-standards.md +147 -0
  139. package/database/02-playbooks/postgresql-optimization-quick.md +52 -0
  140. package/database/02-playbooks/postgresql-performance-optimization.md +58 -0
  141. package/database/02-playbooks/postgresql-production-playbook.md +146 -0
  142. package/database/02-playbooks/redis-caching-playbook.md +117 -0
  143. package/database/03-checklists/database-review-checklist.md +50 -0
  144. package/database/04-antipatterns/database-antipatterns.md +112 -0
  145. package/design/01-standards/ui-design-system-complete.md +423 -0
  146. package/design/02-playbooks/design-handoff-playbook.md +254 -0
  147. package/design/02-playbooks/design-review-playbook.md +388 -0
  148. package/design/03-checklists/design-review-checklist.md +246 -0
  149. package/design/04-antipatterns/design-antipatterns.md +378 -0
  150. package/design/05-cases/case-design-system-adoption.md +328 -0
  151. package/design/06-glossary/design-glossary.md +329 -0
  152. package/design/ui-full-lifecycle-cross-platform-playbook.md +571 -0
  153. package/design/ux-system-deep-dive.md +38 -0
  154. package/design-systems/00-craft-rules.md +71 -0
  155. package/design-systems/aesthetic-families.md +43 -0
  156. package/design-systems/anti-ai-slop.md +162 -0
  157. package/design-systems/bold-geometric.md +120 -0
  158. package/design-systems/brutalist-bold.md +103 -0
  159. package/design-systems/editorial-clean.md +109 -0
  160. package/design-systems/glass-aurora.md +108 -0
  161. package/design-systems/modern-minimal.md +145 -0
  162. package/design-systems/premium-luxury.md +106 -0
  163. package/design-systems/product-type-design-map.md +48 -0
  164. package/design-systems/soft-warm.md +123 -0
  165. package/design-systems/tech-utility.md +113 -0
  166. package/desktop/01-standards/desktop-app-standard.md +72 -0
  167. package/desktop/01-standards/desktop-design.md +71 -0
  168. package/development/00-governance/document-template.md +41 -0
  169. package/development/01-standards/api-versioning-strategies.md +432 -0
  170. package/development/01-standards/authentication-patterns-complete.md +479 -0
  171. package/development/01-standards/css-architecture-complete.md +550 -0
  172. package/development/01-standards/database-migration-strategies.md +484 -0
  173. package/development/01-standards/elasticsearch-complete.md +347 -0
  174. package/development/01-standards/git-complete.md +371 -0
  175. package/development/01-standards/golang-complete.md +1565 -0
  176. package/development/01-standards/graphql-complete.md +298 -0
  177. package/development/01-standards/javascript-bundlers-complete.md +469 -0
  178. package/development/01-standards/javascript-typescript-complete.md +528 -0
  179. package/development/01-standards/jest-complete.md +275 -0
  180. package/development/01-standards/linux-complete.md +234 -0
  181. package/development/01-standards/logging-observability-complete.md +526 -0
  182. package/development/01-standards/microservices-communication.md +502 -0
  183. package/development/01-standards/mongodb-complete.md +406 -0
  184. package/development/01-standards/oauth2-complete.md +285 -0
  185. package/development/01-standards/performance-optimization-complete.md +289 -0
  186. package/development/01-standards/playwright-complete.md +247 -0
  187. package/development/01-standards/postgresql-complete.md +456 -0
  188. package/development/01-standards/pytest-complete.md +340 -0
  189. package/development/01-standards/python-async-programming.md +902 -0
  190. package/development/01-standards/python-complete.md +956 -0
  191. package/development/01-standards/python-decorators-complete.md +799 -0
  192. package/development/01-standards/python-design-patterns.md +2854 -0
  193. package/development/01-standards/python-packaging-distribution.md +420 -0
  194. package/development/01-standards/python-testing-strategies.md +607 -0
  195. package/development/01-standards/python-web-frameworks-comparison.md +471 -0
  196. package/development/01-standards/redis-complete.md +317 -0
  197. package/development/01-standards/rest-api-complete.md +316 -0
  198. package/development/01-standards/rust-complete.md +578 -0
  199. package/development/01-standards/typescript-advanced-types.md +1513 -0
  200. package/development/01-standards/web-security-complete.md +292 -0
  201. package/development/02-playbooks/api-design-playbook.md +810 -0
  202. package/development/02-playbooks/database-migration-playbook.md +580 -0
  203. package/development/02-playbooks/debugging-playbook.md +692 -0
  204. package/development/02-playbooks/feature-delivery-playbook.md +430 -0
  205. package/development/02-playbooks/incident-hotfix-playbook.md +387 -0
  206. package/development/02-playbooks/performance-optimization-playbook.md +531 -0
  207. package/development/02-playbooks/performance-tuning-playbook.md +652 -0
  208. package/development/02-playbooks/refactor-playbook.md +403 -0
  209. package/development/02-playbooks/release-playbook.md +469 -0
  210. package/development/03-checklists/architecture-review-checklist.md +168 -0
  211. package/development/03-checklists/data-migration-checklist.md +157 -0
  212. package/development/03-checklists/oncall-handover-checklist.md +173 -0
  213. package/development/03-checklists/pr-checklist.md +158 -0
  214. package/development/03-checklists/production-readiness-checklist.md +190 -0
  215. package/development/03-checklists/release-readiness-checklist.md +154 -0
  216. package/development/03-checklists/security-review-checklist.md +182 -0
  217. package/development/04-antipatterns/api-antipatterns.md +657 -0
  218. package/development/04-antipatterns/architecture-antipatterns.md +686 -0
  219. package/development/04-antipatterns/backend-antipatterns.md +648 -0
  220. package/development/04-antipatterns/cicd-antipatterns.md +540 -0
  221. package/development/04-antipatterns/code-smell-antipatterns.md +571 -0
  222. package/development/04-antipatterns/data-antipatterns.md +658 -0
  223. package/development/04-antipatterns/database-antipatterns.md +578 -0
  224. package/development/04-antipatterns/frontend-antipatterns.md +635 -0
  225. package/development/04-antipatterns/reliability-antipatterns.md +700 -0
  226. package/development/04-antipatterns/security-antipatterns.md +747 -0
  227. package/development/05-cases/case-api-version-migration.md +428 -0
  228. package/development/05-cases/case-authorization-hardening.md +383 -0
  229. package/development/05-cases/case-bluegreen-rollback.md +466 -0
  230. package/development/05-cases/case-cache-snowball-protection.md +485 -0
  231. package/development/05-cases/case-ci-cd-pipeline.md +544 -0
  232. package/development/05-cases/case-database-scaling.md +500 -0
  233. package/development/05-cases/case-db-hotspot-optimization.md +487 -0
  234. package/development/05-cases/case-incident-mttr-reduction.md +563 -0
  235. package/development/05-cases/case-microservice-migration.md +375 -0
  236. package/development/05-cases/case-performance-optimization.md +406 -0
  237. package/development/05-cases/case-security-incident-response.md +345 -0
  238. package/development/06-glossary/full-stack-glossary.md +166 -0
  239. package/development/09-maturity/quarterly-audit-template.md +35 -0
  240. package/development/11-ui-excellence/ui-aesthetic-system.md +41 -0
  241. package/development/11-ui-excellence/ui-engineering-excellence.md +435 -0
  242. package/development/12-scenarios/development-scenarios-guide.md +565 -0
  243. package/development/13-implementation-assets/implementation-toolkit.md +282 -0
  244. package/development/13-implementation-assets/knowledge-gates-execution.md +43 -0
  245. package/development/14-full-lifecycle/software-lifecycle-gates.md +511 -0
  246. package/development/15-lifecycle-templates/project-templates-collection.md +791 -0
  247. package/development/api-contract-and-versioning-guide.md +36 -0
  248. package/development/api-governance-complete.md +43 -0
  249. package/development/backend-engineering-complete.md +43 -0
  250. package/development/code-review-quality-complete.md +43 -0
  251. package/development/concurrency-reliability-complete.md +43 -0
  252. package/development/database-engineering-complete.md +43 -0
  253. package/development/engineering-effectiveness-complete.md +43 -0
  254. package/development/engineering-standards-deep-dive.md +38 -0
  255. package/development/frontend-engineering-complete.md +43 -0
  256. package/development/performance-capacity-complete.md +43 -0
  257. package/development/refactor-migration-complete.md +42 -0
  258. package/development/refactoring-and-techdebt-playbook.md +37 -0
  259. package/development/security-in-development-complete.md +43 -0
  260. package/devops/01-standards/cicd-pipeline-complete.md +262 -0
  261. package/devops/01-standards/docker-complete.md +1490 -0
  262. package/devops/01-standards/github-actions-complete.md +337 -0
  263. package/devops/01-standards/kubernetes-complete.md +638 -0
  264. package/devops/01-standards/terraform-complete.md +2117 -0
  265. package/devops/02-playbooks/docker-compose-playbook.md +233 -0
  266. package/devops/02-playbooks/docker-k8s-production-playbook.md +186 -0
  267. package/devops/02-playbooks/docker-production-playbook.md +952 -0
  268. package/edge-iot/01-standards/edge-iot-complete.md +473 -0
  269. package/experts/architect/api-design.md +178 -0
  270. package/experts/architect/methodology.md +124 -0
  271. package/experts/architect/security.md +75 -0
  272. package/experts/backend-lead/methodology.md +216 -0
  273. package/experts/devops/methodology.md +160 -0
  274. package/experts/frontend-lead/methodology.md +178 -0
  275. package/experts/product-manager/industry/ecommerce.md +43 -0
  276. package/experts/product-manager/industry/saas.md +40 -0
  277. package/experts/product-manager/methodology.md +97 -0
  278. package/experts/qa-lead/methodology.md +123 -0
  279. package/experts/qa-lead/test-strategy.md +128 -0
  280. package/experts/uiux-designer/methodology.md +125 -0
  281. package/frontend/01-standards/accessibility-complete.md +532 -0
  282. package/frontend/01-standards/accessibility-standard.md +74 -0
  283. package/frontend/01-standards/admin-dashboard-and-crud.md +72 -0
  284. package/frontend/01-standards/design-tokens-complete.md +444 -0
  285. package/frontend/01-standards/forms-and-validation.md +77 -0
  286. package/frontend/01-standards/frontend-architecture-and-layering.md +119 -0
  287. package/frontend/01-standards/i18n-and-localization.md +65 -0
  288. package/frontend/01-standards/nextjs-complete.md +451 -0
  289. package/frontend/01-standards/react-complete.md +713 -0
  290. package/frontend/01-standards/react-hooks-complete-guide.md +1100 -0
  291. package/frontend/01-standards/react-hooks-complete.md +1171 -0
  292. package/frontend/01-standards/seo-and-web-vitals.md +77 -0
  293. package/frontend/01-standards/state-management-complete.md +444 -0
  294. package/frontend/01-standards/vue-complete.md +499 -0
  295. package/frontend/01-standards/vue3-complete.md +2002 -0
  296. package/frontend/01-standards/web-framework-best-practices.md +64 -0
  297. package/frontend/01-standards/web-performance-complete.md +495 -0
  298. package/frontend/02-playbooks/accessibility-a11y-playbook.md +161 -0
  299. package/frontend/02-playbooks/frontend-performance-playbook.md +707 -0
  300. package/frontend/02-playbooks/i18n-internationalization-playbook.md +120 -0
  301. package/frontend/02-playbooks/performance-optimization-playbook.md +163 -0
  302. package/frontend/02-playbooks/react-nextjs-production-playbook.md +167 -0
  303. package/frontend/02-playbooks/react-state-management-playbook.md +173 -0
  304. package/frontend/03-checklists/component-quality-checklist.md +166 -0
  305. package/frontend/03-checklists/frontend-launch-checklist.md +299 -0
  306. package/frontend/04-antipatterns/frontend-antipatterns.md +886 -0
  307. package/frontend/05-cases/case-performance-optimization.md +274 -0
  308. package/harmony/01-standards/harmonyos-arkts-standard.md +75 -0
  309. package/harmony/01-standards/harmonyos-design.md +65 -0
  310. package/high-quality-engineering-playbook.md +54 -0
  311. package/incident/01-standards/incident-response-complete.md +303 -0
  312. package/incident/02-playbooks/chaos-engineering-playbook.md +883 -0
  313. package/incident/02-playbooks/postmortem-playbook.md +398 -0
  314. package/incident/03-checklists/incident-readiness-checklist.md +181 -0
  315. package/incident/04-antipatterns/incident-antipatterns.md +490 -0
  316. package/incident/05-cases/case-cascade-failure.md +176 -0
  317. package/incident/06-glossary/incident-glossary.md +114 -0
  318. package/incident/postmortem-and-response-deep-dive.md +39 -0
  319. package/industries/ecommerce/ecommerce-complete.md +631 -0
  320. package/industries/education/education-complete.md +555 -0
  321. package/industries/fintech/fintech-complete.md +501 -0
  322. package/industries/gaming/gaming-complete.md +587 -0
  323. package/industries/healthcare/healthcare-complete.md +452 -0
  324. package/low-code/01-standards/low-code-complete.md +944 -0
  325. package/miniprogram/01-standards/ai-common-mistakes.md +61 -0
  326. package/miniprogram/01-standards/miniprogram-custom-navbar-capsule.md +77 -0
  327. package/miniprogram/01-standards/miniprogram-design.md +61 -0
  328. package/miniprogram/01-standards/miniprogram-standard.md +81 -0
  329. package/mobile/01-standards/android-material-design.md +70 -0
  330. package/mobile/01-standards/flutter-complete.md +384 -0
  331. package/mobile/01-standards/ios-design-hig.md +78 -0
  332. package/mobile/01-standards/mobile-app-standard.md +85 -0
  333. package/mobile/01-standards/react-native-complete.md +352 -0
  334. package/mobile/02-playbooks/mobile-cross-platform-playbook.md +175 -0
  335. package/mobile/02-playbooks/mobile-performance.md +473 -0
  336. package/mobile/03-checklists/mobile-release-checklist.md +234 -0
  337. package/mobile/04-antipatterns/mobile-antipatterns.md +798 -0
  338. package/mobile/05-cases/case-app-performance.md +500 -0
  339. package/mobile/05-cases/case-app-startup-optimization.md +218 -0
  340. package/mobile/06-glossary/mobile-glossary.md +484 -0
  341. package/observability/01-standards/observability-standards.md +103 -0
  342. package/observability/02-playbooks/prometheus-grafana-playbook.md +135 -0
  343. package/observability/02-playbooks/structured-logging-playbook.md +73 -0
  344. package/observability/03-checklists/observability-checklist.md +54 -0
  345. package/observability/04-antipatterns/observability-antipatterns.md +106 -0
  346. package/operations/01-standards/prometheus-monitoring-complete.md +1578 -0
  347. package/operations/02-playbooks/capacity-planning-playbook.md +620 -0
  348. package/operations/03-checklists/production-launch-checklist.md +365 -0
  349. package/operations/04-antipatterns/operations-antipatterns.md +664 -0
  350. package/operations/05-cases/case-sre-practices.md +581 -0
  351. package/operations/06-glossary/operations-glossary.md +120 -0
  352. package/operations/aiops-anomaly-detection.md +758 -0
  353. package/operations/capacity-planning.md +1061 -0
  354. package/operations/chaos-engineering.md +659 -0
  355. package/operations/incident-command-system.md +38 -0
  356. package/operations/observability-complete.md +442 -0
  357. package/operations/slo-sli-playbook.md +517 -0
  358. package/operations/sre-operations-deep-dive.md +39 -0
  359. package/package.json +8 -0
  360. package/performance/01-standards/performance-and-scalability.md +80 -0
  361. package/performance/01-standards/performance-standards.md +156 -0
  362. package/performance/02-playbooks/query-optimization-playbook.md +103 -0
  363. package/performance/03-checklists/performance-checklist.md +56 -0
  364. package/performance/04-antipatterns/performance-antipatterns.md +146 -0
  365. package/product/01-standards/product-management-complete.md +285 -0
  366. package/product/02-playbooks/feature-launch-playbook.md +207 -0
  367. package/product/02-playbooks/user-research-playbook.md +532 -0
  368. package/product/03-checklists/feature-launch-checklist.md +275 -0
  369. package/product/04-antipatterns/product-antipatterns.md +355 -0
  370. package/product/05-cases/case-mvp-to-scale.md +384 -0
  371. package/product/06-glossary/product-glossary.md +462 -0
  372. package/product/feature-prioritization-framework.md +40 -0
  373. package/product/kpi-and-metric-tree.md +37 -0
  374. package/product/product-discovery-and-prd-deep-dive.md +41 -0
  375. package/quantum/01-standards/quantum-complete.md +1186 -0
  376. package/security/01-standards/api-security-complete.md +511 -0
  377. package/security/01-standards/container-runtime-security.md +574 -0
  378. package/security/01-standards/data-protection-gdpr.md +543 -0
  379. package/security/01-standards/owasp-top10-complete.md +1890 -0
  380. package/security/01-standards/secure-coding-baseline.md +90 -0
  381. package/security/01-standards/supply-chain-security.md +441 -0
  382. package/security/01-standards/web-security-checklist.md +108 -0
  383. package/security/01-standards/zero-trust-architecture.md +521 -0
  384. package/security/02-playbooks/auth-sso-playbook.md +166 -0
  385. package/security/02-playbooks/incident-response-security-playbook.md +588 -0
  386. package/security/02-playbooks/owasp-api-security-playbook.md +129 -0
  387. package/security/02-playbooks/payment-integration-playbook.md +119 -0
  388. package/security/02-playbooks/penetration-testing-playbook.md +517 -0
  389. package/security/03-checklists/security-audit-checklist.md +356 -0
  390. package/security/04-antipatterns/security-coding-antipatterns.md +580 -0
  391. package/security/05-cases/case-log4shell-incident.md +537 -0
  392. package/security/05-cases/case-major-breaches.md +468 -0
  393. package/security/06-glossary/security-glossary.md +212 -0
  394. package/security/compliance-automation.md +993 -0
  395. package/security/container-security.md +680 -0
  396. package/security/devsecops-complete.md +426 -0
  397. package/security/sast-dast-sca.md +775 -0
  398. package/security/secrets-management.md +594 -0
  399. package/security/security-architecture-deep-dive.md +37 -0
  400. package/security/threat-modeling-stride-playbook.md +40 -0
  401. package/seed-templates/auth-system.md +59 -0
  402. package/seed-templates/blog-content.md +94 -0
  403. package/seed-templates/dashboard.md +89 -0
  404. package/seed-templates/docs-site.md +73 -0
  405. package/seed-templates/e-commerce.md +50 -0
  406. package/seed-templates/saas-landing.md +92 -0
  407. package/seed-templates/settings-page.md +51 -0
  408. package/testing/01-standards/test-strategy-and-layering.md +83 -0
  409. package/testing/01-standards/testing-strategy-complete.md +422 -0
  410. package/testing/01-standards/unit-testing-best-practices.md +118 -0
  411. package/testing/02-playbooks/e2e-testing-playbook.md +988 -0
  412. package/testing/02-playbooks/testing-strategy-playbook.md +126 -0
  413. package/testing/03-checklists/test-strategy-checklist.md +208 -0
  414. package/testing/04-antipatterns/testing-antipatterns.md +718 -0
  415. package/testing/05-cases/case-testing-transformation.md +300 -0
  416. package/testing/06-glossary/testing-glossary.md +110 -0
  417. package/testing/risk-based-test-matrix.md +36 -0
  418. package/testing/testing-strategy-deep-dive.md +37 -0
@@ -0,0 +1,658 @@
1
+ ---
2
+ id: data-antipatterns
3
+ title: 数据工程反模式指南
4
+ domain: development
5
+ category: 04-antipatterns
6
+ difficulty: intermediate
7
+ tags: [antipatterns, audit, backups, data, development, fields, governance, inconsistency]
8
+ quality_score: 70
9
+ last_updated: 2026-06-15
10
+ ---
11
+ # 数据工程反模式指南
12
+
13
+ > 适用范围:数据管道 / ETL / 数据仓库 / 数据湖
14
+ > 约束级别:SHALL(必须在数据架构评审阶段拦截)
15
+
16
+ ---
17
+
18
+ ## 1. 无索引治理(Missing Index Governance)
19
+
20
+ ### 描述
21
+ 数据库索引无系统性管理:该建的索引不建(慢查询长期存在),不该建的索引不删(冗余索引拖慢写入),索引策略不随查询模式变化而更新。数据量从千级增长到亿级时,问题从慢查询升级为服务不可用。
22
+
23
+ ### 错误示例
24
+ ```sql
25
+ -- 表有 2 亿行,以下查询每天执行 10 万次,无索引
26
+ SELECT * FROM events
27
+ WHERE user_id = 12345
28
+ AND event_type = 'purchase'
29
+ AND created_at > '2024-01-01';
30
+ -- Seq Scan,执行时间 15 秒
31
+
32
+ -- 同时存在大量冗余索引
33
+ CREATE INDEX idx_events_user ON events(user_id);
34
+ CREATE INDEX idx_events_user_type ON events(user_id, event_type);
35
+ CREATE INDEX idx_events_user_type_date ON events(user_id, event_type, created_at);
36
+ CREATE INDEX idx_events_user_date ON events(user_id, created_at);
37
+ CREATE INDEX idx_events_type ON events(event_type);
38
+ -- 5 个索引中有 3 个是冗余的,写入时多维护 3 个索引
39
+ ```
40
+
41
+ ### 正确示例
42
+ ```sql
43
+ -- 根据实际查询模式,保留最优索引组合
44
+ CREATE INDEX idx_events_user_type_date ON events(user_id, event_type, created_at DESC);
45
+ -- 一个复合索引覆盖所有查询模式:
46
+ -- WHERE user_id = ?
47
+ -- WHERE user_id = ? AND event_type = ?
48
+ -- WHERE user_id = ? AND event_type = ? AND created_at > ?
49
+
50
+ -- 删除冗余索引
51
+ DROP INDEX idx_events_user; -- 被 idx_events_user_type_date 包含
52
+ DROP INDEX idx_events_user_type; -- 被 idx_events_user_type_date 包含
53
+ DROP INDEX idx_events_user_date; -- 查询模式已不使用
54
+ DROP INDEX idx_events_type; -- 低选择性,全表扫描更快
55
+ ```
56
+
57
+ ```python
58
+ # 索引巡检自动化
59
+ class IndexGovernance:
60
+ def audit_unused_indexes(self, days: int = 30) -> list[dict]:
61
+ """找出过去 N 天未使用的索引"""
62
+ return self._db.execute("""
63
+ SELECT schemaname, relname, indexrelname, idx_scan, pg_size_pretty(pg_relation_size(indexrelid))
64
+ FROM pg_stat_user_indexes
65
+ WHERE idx_scan = 0
66
+ AND indexrelname NOT LIKE '%_pkey'
67
+ ORDER BY pg_relation_size(indexrelid) DESC
68
+ """).fetchall()
69
+
70
+ def audit_missing_indexes(self) -> list[dict]:
71
+ """找出可能缺失索引的表(顺序扫描次数高)"""
72
+ return self._db.execute("""
73
+ SELECT relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch,
74
+ ROUND(seq_scan::numeric / NULLIF(seq_scan + idx_scan, 0) * 100, 2) AS seq_pct
75
+ FROM pg_stat_user_tables
76
+ WHERE seq_scan > 1000
77
+ AND seq_scan > idx_scan
78
+ ORDER BY seq_tup_read DESC
79
+ LIMIT 20
80
+ """).fetchall()
81
+
82
+ def audit_duplicate_indexes(self) -> list[dict]:
83
+ """找出重复或包含关系的索引"""
84
+ return self._db.execute("""
85
+ SELECT a.indexrelid::regclass AS index_a,
86
+ b.indexrelid::regclass AS index_b,
87
+ pg_size_pretty(pg_relation_size(a.indexrelid)) AS size_a
88
+ FROM pg_index a
89
+ JOIN pg_index b ON a.indrelid = b.indrelid
90
+ AND a.indexrelid != b.indexrelid
91
+ AND a.indkey::text LIKE b.indkey::text || '%'
92
+ WHERE a.indisvalid AND b.indisvalid
93
+ """).fetchall()
94
+ ```
95
+
96
+ ### 检测方法
97
+ - `pg_stat_user_indexes` 中 `idx_scan = 0` 的索引(从未使用)。
98
+ - `pg_stat_user_tables` 中 `seq_scan >> idx_scan` 的表(缺索引)。
99
+ - 慢查询日志中 Top 20 SQL 的执行计划。
100
+ - 定期运行索引巡检脚本。
101
+
102
+ ### 修复步骤
103
+ 1. 收集过去 30 天的慢查询日志和索引使用统计。
104
+ 2. 为高频慢查询创建合适的索引。
105
+ 3. 删除未使用和冗余的索引。
106
+ 4. 建立月度索引巡检制度。
107
+ 5. 在 CI 中对 Schema 变更触发索引影响评估。
108
+
109
+ ### Agent Checklist
110
+ - [ ] 高频查询有索引覆盖
111
+ - [ ] 无使用次数为 0 的冗余索引
112
+ - [ ] 无包含关系的重复索引
113
+ - [ ] 有月度索引巡检机制
114
+ - [ ] Schema 变更有索引影响评估
115
+
116
+ ---
117
+
118
+ ## 2. 缓存与数据库一致性冲突(Cache Inconsistency)
119
+
120
+ ### 描述
121
+ 缓存更新策略与数据库写入策略不一致,导致用户读到过期数据。常见问题:先更新缓存再更新数据库(数据库失败但缓存已更新)、先删缓存再更新数据库(并发读回填旧值)、缓存无过期时间(数据永久不一致)。
122
+
123
+ ### 错误示例
124
+ ```python
125
+ # 先更新缓存再更新数据库 -- 数据库失败时缓存已是新值
126
+ def update_user_name(user_id, new_name):
127
+ cache.set(f"user:{user_id}", {"name": new_name}) # 缓存已更新
128
+ db.execute("UPDATE users SET name = %s WHERE id = %s", (new_name, user_id))
129
+ # 如果数据库更新失败,缓存中是新值,数据库中是旧值
130
+
131
+ # 先删缓存再更新数据库 -- 并发读回填旧值
132
+ def update_product_price(product_id, new_price):
133
+ cache.delete(f"product:{product_id}") # T1: 删缓存
134
+ # T2: 另一个请求读到缓存 miss,从数据库读到旧值,回填缓存
135
+ db.execute("UPDATE products SET price = %s WHERE id = %s", (new_price, product_id))
136
+ # 结果:缓存中是旧价格
137
+
138
+ # 缓存无 TTL -- 永久不一致
139
+ def get_config(key):
140
+ cached = cache.get(f"config:{key}")
141
+ if cached:
142
+ return cached
143
+ value = db.execute("SELECT value FROM config WHERE key = %s", (key,)).fetchone()
144
+ cache.set(f"config:{key}", value) # 无 TTL,永不过期
145
+ return value
146
+ ```
147
+
148
+ ### 正确示例
149
+ ```python
150
+ # 方案 1: Cache-Aside + 延迟双删
151
+ class UserService:
152
+ def update_name(self, user_id: int, new_name: str) -> None:
153
+ # 1. 先更新数据库
154
+ self._db.execute(
155
+ "UPDATE users SET name = %s WHERE id = %s", (new_name, user_id)
156
+ )
157
+ # 2. 删除缓存
158
+ self._cache.delete(f"user:{user_id}")
159
+ # 3. 延迟双删(防止并发读回填旧值)
160
+ asyncio.get_event_loop().call_later(
161
+ 1.0, # 1 秒后再删一次
162
+ self._cache.delete, f"user:{user_id}"
163
+ )
164
+
165
+ def get_user(self, user_id: int) -> User:
166
+ # Cache-Aside 模式
167
+ cached = self._cache.get(f"user:{user_id}")
168
+ if cached:
169
+ return User.model_validate_json(cached)
170
+
171
+ user = self._db.get_user(user_id)
172
+ if user:
173
+ self._cache.setex(
174
+ f"user:{user_id}",
175
+ 300, # 5 分钟 TTL
176
+ user.model_dump_json(),
177
+ )
178
+ return user
179
+
180
+ # 方案 2: Write-Through(强一致性场景)
181
+ class InventoryService:
182
+ def update_stock(self, product_id: int, new_stock: int) -> None:
183
+ with self._db.transaction() as tx:
184
+ tx.execute(
185
+ "UPDATE products SET stock = %s WHERE id = %s", (new_stock, product_id)
186
+ )
187
+ # 在同一个事务中更新缓存(Redis Pipeline)
188
+ self._cache.setex(
189
+ f"stock:{product_id}", 60, str(new_stock)
190
+ )
191
+
192
+ # 方案 3: 事件驱动缓存更新
193
+ class CacheInvalidator:
194
+ """订阅数据库变更事件,异步刷新缓存"""
195
+ async def on_user_updated(self, event: UserUpdatedEvent):
196
+ await self._cache.delete(f"user:{event.user_id}")
197
+ # 预热:重新加载热点数据
198
+ if await self._is_hot_key(f"user:{event.user_id}"):
199
+ user = await self._db.get_user(event.user_id)
200
+ await self._cache.setex(f"user:{event.user_id}", 300, user.json())
201
+ ```
202
+
203
+ ### 检测方法
204
+ - 缓存 `SET` 操作在数据库 `UPDATE` 之前。
205
+ - 缓存无 TTL(`SET` 不带过期时间)。
206
+ - 存在 "先删缓存再更新数据库" 模式且无延迟双删。
207
+ - 数据对账(缓存 vs 数据库)发现不一致。
208
+
209
+ ### 修复步骤
210
+ 1. 确定一致性需求:最终一致性(Cache-Aside + TTL)vs 强一致性(Write-Through)。
211
+ 2. 统一缓存更新模式:先更新数据库,再删除缓存。
212
+ 3. 所有缓存设置 TTL(兜底保护)。
213
+ 4. 对高并发场景添加延迟双删。
214
+ 5. 建立缓存与数据库的定期对账机制。
215
+
216
+ ### Agent Checklist
217
+ - [ ] 缓存更新在数据库更新之后(不是之前)
218
+ - [ ] 所有缓存有 TTL
219
+ - [ ] 高并发场景有延迟双删
220
+ - [ ] 有缓存数据对账机制
221
+ - [ ] 缓存一致性策略有文档
222
+
223
+ ---
224
+
225
+ ## 3. 关键表缺审计字段(Missing Audit Fields)
226
+
227
+ ### 描述
228
+ 业务表缺少 `created_at`、`updated_at`、`created_by`、`updated_by` 等审计字段,出问题时无法追溯数据变更的时间和操作者。在合规场景下(金融、医疗),缺少审计字段可能违反法规。
229
+
230
+ ### 错误示例
231
+ ```sql
232
+ CREATE TABLE orders (
233
+ id SERIAL PRIMARY KEY,
234
+ user_id INTEGER NOT NULL,
235
+ total DECIMAL(10, 2) NOT NULL,
236
+ status VARCHAR(20) NOT NULL
237
+ -- 无 created_at: 不知道订单何时创建
238
+ -- 无 updated_at: 不知道最后一次修改是什么时候
239
+ -- 无 created_by: 不知道谁创建的(系统还是人工)
240
+ -- 无 version: 不知道修改了几次
241
+ );
242
+ ```
243
+
244
+ ### 正确示例
245
+ ```sql
246
+ CREATE TABLE orders (
247
+ id SERIAL PRIMARY KEY,
248
+ user_id INTEGER NOT NULL,
249
+ total DECIMAL(10, 2) NOT NULL,
250
+ status VARCHAR(20) NOT NULL,
251
+ -- 审计字段
252
+ created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
253
+ updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
254
+ created_by INTEGER, -- 操作者 ID
255
+ updated_by INTEGER, -- 最后修改者 ID
256
+ version INTEGER NOT NULL DEFAULT 1, -- 乐观锁版本号
257
+ deleted_at TIMESTAMP WITH TIME ZONE -- 软删除
258
+ );
259
+
260
+ -- 自动更新 updated_at
261
+ CREATE OR REPLACE FUNCTION update_updated_at()
262
+ RETURNS TRIGGER AS $$
263
+ BEGIN
264
+ NEW.updated_at = NOW();
265
+ NEW.version = OLD.version + 1;
266
+ RETURN NEW;
267
+ END;
268
+ $$ LANGUAGE plpgsql;
269
+
270
+ CREATE TRIGGER trigger_orders_updated_at
271
+ BEFORE UPDATE ON orders
272
+ FOR EACH ROW EXECUTE FUNCTION update_updated_at();
273
+
274
+ -- 变更审计日志表
275
+ CREATE TABLE audit_log (
276
+ id BIGSERIAL PRIMARY KEY,
277
+ table_name VARCHAR(100) NOT NULL,
278
+ record_id INTEGER NOT NULL,
279
+ action VARCHAR(10) NOT NULL, -- INSERT / UPDATE / DELETE
280
+ old_values JSONB,
281
+ new_values JSONB,
282
+ changed_by INTEGER,
283
+ changed_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
284
+ );
285
+ ```
286
+
287
+ ```python
288
+ # ORM 自动填充审计字段
289
+ class AuditMixin:
290
+ created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
291
+ updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
292
+ created_by = Column(Integer, nullable=True)
293
+ updated_by = Column(Integer, nullable=True)
294
+ version = Column(Integer, nullable=False, default=1)
295
+
296
+ class Order(Base, AuditMixin):
297
+ __tablename__ = "orders"
298
+ id = Column(Integer, primary_key=True)
299
+ user_id = Column(Integer, nullable=False)
300
+ total = Column(Numeric(10, 2), nullable=False)
301
+ status = Column(String(20), nullable=False)
302
+ ```
303
+
304
+ ### 检测方法
305
+ - 表结构无 `created_at` / `updated_at` 列。
306
+ - 无变更审计日志表。
307
+ - 数据变更后无法追溯操作者和时间。
308
+ - Schema 审查工具检查审计字段覆盖率。
309
+
310
+ ### 修复步骤
311
+ 1. 为所有业务表添加 `created_at`、`updated_at`、`created_by`、`updated_by`、`version` 字段。
312
+ 2. 创建数据库触发器自动更新 `updated_at` 和 `version`。
313
+ 3. 创建审计日志表记录敏感表的变更历史。
314
+ 4. ORM 层使用 Mixin 统一管理审计字段。
315
+ 5. 定期审查审计字段覆盖率。
316
+
317
+ ### Agent Checklist
318
+ - [ ] 所有业务表有 `created_at` + `updated_at`
319
+ - [ ] 敏感表有 `created_by` + `updated_by`
320
+ - [ ] 有数据库触发器自动更新审计字段
321
+ - [ ] 有变更审计日志表
322
+ - [ ] ORM 使用 Mixin 统一管理审计字段
323
+
324
+ ---
325
+
326
+ ## 4. 备份未演练(Untested Backups)
327
+
328
+ ### 描述
329
+ 数据库有定时备份,但从未验证过备份能否成功恢复。备份文件可能已损坏、不完整、格式不兼容,直到真正需要恢复时才发现无法使用。
330
+
331
+ ### 错误示例
332
+ ```bash
333
+ # 每天凌晨备份(看起来在跑)
334
+ 0 3 * * * pg_dump mydb > /backups/mydb_$(date +%Y%m%d).sql
335
+
336
+ # 问题:
337
+ # 1. 从未测试过恢复
338
+ # 2. 备份文件存在同一台机器(机器坏了备份也没了)
339
+ # 3. 无备份大小监控(文件可能是空的)
340
+ # 4. 无保留策略(磁盘早晚满)
341
+ # 5. 不知道恢复需要多长时间
342
+ ```
343
+
344
+ ### 正确示例
345
+ ```python
346
+ import subprocess
347
+ from datetime import datetime
348
+
349
+ class BackupManager:
350
+ def create_backup(self) -> BackupResult:
351
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
352
+ local_path = f"/backups/mydb_{timestamp}.sql.gz"
353
+
354
+ # 1. 创建备份(压缩)
355
+ result = subprocess.run(
356
+ ["pg_dump", "-Fc", "-Z", "9", "-f", local_path, self._db_name],
357
+ capture_output=True, text=True, timeout=3600,
358
+ )
359
+ if result.returncode != 0:
360
+ raise BackupError(f"pg_dump failed: {result.stderr}")
361
+
362
+ # 2. 验证备份文件大小
363
+ file_size = os.path.getsize(local_path)
364
+ if file_size < self._min_expected_size:
365
+ raise BackupError(f"Backup too small: {file_size} bytes")
366
+
367
+ # 3. 上传到远程存储(异地备份)
368
+ s3_key = f"backups/mydb/{timestamp}.sql.gz"
369
+ self._s3.upload_file(local_path, self._bucket, s3_key)
370
+
371
+ # 4. 记录备份元数据
372
+ return BackupResult(
373
+ timestamp=timestamp,
374
+ size=file_size,
375
+ local_path=local_path,
376
+ remote_path=f"s3://{self._bucket}/{s3_key}",
377
+ checksum=self._compute_checksum(local_path),
378
+ )
379
+
380
+ def verify_backup(self, backup: BackupResult) -> bool:
381
+ """在测试环境恢复备份,验证数据完整性"""
382
+ # 1. 下载备份
383
+ local_path = self._s3.download(backup.remote_path)
384
+
385
+ # 2. 恢复到测试数据库
386
+ subprocess.run(
387
+ ["pg_restore", "-d", self._test_db, "-c", local_path],
388
+ check=True, timeout=7200,
389
+ )
390
+
391
+ # 3. 验证关键表的行数
392
+ for table, expected_min in self._verification_tables.items():
393
+ count = self._test_db.execute(f"SELECT COUNT(*) FROM {table}").scalar()
394
+ if count < expected_min:
395
+ raise VerificationError(f"{table} has {count} rows, expected >= {expected_min}")
396
+
397
+ # 4. 验证最新数据的时间戳
398
+ latest = self._test_db.execute(
399
+ "SELECT MAX(updated_at) FROM orders"
400
+ ).scalar()
401
+ if (datetime.now() - latest).hours > 24:
402
+ raise VerificationError("Backup data is more than 24 hours old")
403
+
404
+ return True
405
+
406
+ def cleanup_old_backups(self, retention_days: int = 30):
407
+ """清理超过保留期的备份"""
408
+ cutoff = datetime.now() - timedelta(days=retention_days)
409
+ old_backups = self._list_backups_before(cutoff)
410
+ for backup in old_backups:
411
+ self._s3.delete(backup.remote_path)
412
+ os.remove(backup.local_path)
413
+ ```
414
+
415
+ ```yaml
416
+ # 备份策略
417
+ backup_policy:
418
+ schedule: "0 3 * * *" # 每天凌晨 3 点
419
+ type: "full" # 全量备份
420
+ retention: 30 # 保留 30 天
421
+ storage:
422
+ primary: "s3://backup-bucket/prod/"
423
+ secondary: "gs://backup-bucket-dr/prod/" # 异地容灾
424
+ verification:
425
+ schedule: "0 6 * * 1" # 每周一早上 6 点验证
426
+ target_db: "backup-test-db"
427
+ alerts:
428
+ backup_failed: critical
429
+ backup_too_small: warning
430
+ verification_failed: critical
431
+ no_backup_24h: critical
432
+ ```
433
+
434
+ ### 检测方法
435
+ - 无备份恢复演练记录。
436
+ - 备份文件存储在同一台机器或同一可用区。
437
+ - 无备份大小监控告警。
438
+ - 不知道恢复一次需要多长时间(RTO 未知)。
439
+ - 无备份保留和清理策略。
440
+
441
+ ### 修复步骤
442
+ 1. 备份上传到远程存储(S3 / GCS),至少保留异地一份。
443
+ 2. 每周自动在测试环境执行恢复验证。
444
+ 3. 监控备份文件大小,异常时告警。
445
+ 4. 记录 RTO(恢复时间目标),确保在可接受范围内。
446
+ 5. 设置备份保留策略,自动清理过期备份。
447
+ 6. 每季度手动演练一次完整的灾难恢复流程。
448
+
449
+ ### Agent Checklist
450
+ - [ ] 有自动化备份(每日或更频繁)
451
+ - [ ] 备份存储在异地(不同区域 / 不同云)
452
+ - [ ] 有每周自动恢复验证
453
+ - [ ] 备份大小有监控告警
454
+ - [ ] RTO 已测量且在可接受范围内
455
+ - [ ] 有备份保留和清理策略
456
+
457
+ ---
458
+
459
+ ## 5. 数据管道无幂等(Non-Idempotent Data Pipeline)
460
+
461
+ ### 描述
462
+ ETL / 数据管道在重试或重复运行时产生重复数据。管道失败后重跑导致数据翻倍,或者部分成功的状态无法安全重试。
463
+
464
+ ### 错误示例
465
+ ```python
466
+ # 非幂等的 ETL -- 重跑产生重复数据
467
+ def sync_orders_to_warehouse():
468
+ orders = source_db.execute("SELECT * FROM orders WHERE date = CURRENT_DATE")
469
+ for order in orders:
470
+ warehouse_db.execute(
471
+ "INSERT INTO fact_orders (order_id, amount, date) VALUES (%s, %s, %s)",
472
+ (order["id"], order["amount"], order["date"])
473
+ )
474
+ # 如果中间失败重跑,已插入的数据会重复
475
+ ```
476
+
477
+ ### 正确示例
478
+ ```python
479
+ # 幂等的 ETL -- 使用 UPSERT
480
+ def sync_orders_to_warehouse(date: str):
481
+ orders = source_db.execute(
482
+ "SELECT * FROM orders WHERE date = %s", (date,)
483
+ )
484
+
485
+ with warehouse_db.transaction() as tx:
486
+ for order in orders:
487
+ tx.execute("""
488
+ INSERT INTO fact_orders (order_id, amount, date, synced_at)
489
+ VALUES (%s, %s, %s, NOW())
490
+ ON CONFLICT (order_id)
491
+ DO UPDATE SET amount = EXCLUDED.amount, synced_at = NOW()
492
+ """, (order["id"], order["amount"], order["date"]))
493
+
494
+ # 记录同步水位线
495
+ tx.execute("""
496
+ INSERT INTO sync_watermarks (pipeline, last_sync_date, synced_at)
497
+ VALUES ('orders', %s, NOW())
498
+ ON CONFLICT (pipeline)
499
+ DO UPDATE SET last_sync_date = EXCLUDED.last_sync_date, synced_at = NOW()
500
+ """, (date,))
501
+
502
+ # 分区替换模式(大批量场景)
503
+ def sync_daily_events(date: str):
504
+ """整个分区替换,天然幂等"""
505
+ # 1. 写入临时表
506
+ temp_table = f"tmp_events_{date.replace('-', '')}"
507
+ warehouse_db.execute(f"CREATE TABLE IF NOT EXISTS {temp_table} (LIKE fact_events INCLUDING ALL)")
508
+ warehouse_db.execute(f"TRUNCATE {temp_table}")
509
+
510
+ # 2. 批量导入到临时表
511
+ events = source_db.execute("SELECT * FROM events WHERE date = %s", (date,))
512
+ warehouse_db.bulk_insert(temp_table, events)
513
+
514
+ # 3. 原子替换分区
515
+ warehouse_db.execute(f"""
516
+ ALTER TABLE fact_events DETACH PARTITION fact_events_{date.replace('-', '')};
517
+ ALTER TABLE fact_events ATTACH PARTITION {temp_table}
518
+ FOR VALUES FROM ('{date}') TO ('{date}'::date + INTERVAL '1 day');
519
+ """)
520
+ ```
521
+
522
+ ### 检测方法
523
+ - ETL 使用 `INSERT` 而非 `INSERT ... ON CONFLICT` / `MERGE`。
524
+ - 无同步水位线记录(不知道同步到哪里了)。
525
+ - 重跑管道后数据量翻倍。
526
+ - 无去重逻辑或唯一约束。
527
+
528
+ ### 修复步骤
529
+ 1. 所有 ETL 写入使用 UPSERT(`INSERT ... ON CONFLICT` / `MERGE`)。
530
+ 2. 目标表建立业务唯一约束(防止重复)。
531
+ 3. 记录同步水位线,重跑时从水位线处开始。
532
+ 4. 大批量场景使用分区替换模式。
533
+ 5. ETL 测试包含重复运行场景验证。
534
+
535
+ ### Agent Checklist
536
+ - [ ] ETL 写入使用 UPSERT / MERGE
537
+ - [ ] 目标表有业务唯一约束
538
+ - [ ] 有同步水位线记录
539
+ - [ ] 重复运行不产生重复数据
540
+ - [ ] 有重复运行的测试用例
541
+
542
+ ---
543
+
544
+ ## 6. 数据质量无监控(Missing Data Quality Monitoring)
545
+
546
+ ### 描述
547
+ 数据管道只关注是否执行成功,不检查数据本身的质量。数据可能为空、重复、格式异常、超出合理范围,但管道显示 "成功"。下游消费者发现数据问题时,问题已经扩散。
548
+
549
+ ### 错误示例
550
+ ```python
551
+ # 管道 "成功" 但数据有问题
552
+ def sync_user_data():
553
+ data = fetch_from_api("/users")
554
+ db.bulk_insert("users", data)
555
+ logger.info(f"Synced {len(data)} users") # "成功"
556
+ # 问题:data 可能是空列表、字段可能缺失、email 可能无效
557
+ ```
558
+
559
+ ### 正确示例
560
+ ```python
561
+ from great_expectations import DataContext
562
+
563
+ class DataQualityChecker:
564
+ def check_user_data(self, data: list[dict]) -> QualityReport:
565
+ checks = [
566
+ # 完整性检查
567
+ Check("row_count", lambda d: len(d) > 0, "Data is not empty"),
568
+ Check("row_count_range", lambda d: 100 < len(d) < 1000000, "Row count in expected range"),
569
+
570
+ # 唯一性检查
571
+ Check("unique_ids", lambda d: len(set(r["id"] for r in d)) == len(d), "IDs are unique"),
572
+
573
+ # 格式检查
574
+ Check("valid_emails", lambda d: all(
575
+ re.match(r"^[^@]+@[^@]+\.[^@]+$", r.get("email", "")) for r in d
576
+ ), "All emails are valid format"),
577
+
578
+ # 范围检查
579
+ Check("age_range", lambda d: all(
580
+ 0 < r.get("age", 0) < 150 for r in d
581
+ ), "All ages in valid range"),
582
+
583
+ # 时效性检查
584
+ Check("freshness", lambda d: any(
585
+ parse_date(r.get("updated_at", "1970-01-01")) > datetime.now() - timedelta(hours=24)
586
+ for r in d
587
+ ), "Data is fresh (updated within 24h)"),
588
+
589
+ # NULL 检查
590
+ Check("no_null_names", lambda d: all(
591
+ r.get("name") is not None and r.get("name").strip() != "" for r in d
592
+ ), "No null or empty names"),
593
+ ]
594
+
595
+ results = [check.run(data) for check in checks]
596
+ report = QualityReport(checks=results)
597
+
598
+ if not report.all_passed:
599
+ logger.error("Data quality check failed", failures=report.failures)
600
+ alert_service.send(
601
+ severity="critical" if report.critical_failures else "warning",
602
+ message=f"Data quality issues: {report.summary}",
603
+ )
604
+
605
+ return report
606
+
607
+ # 在管道中集成质量检查
608
+ def sync_user_data():
609
+ data = fetch_from_api("/users")
610
+
611
+ # 入库前检查
612
+ report = quality_checker.check_user_data(data)
613
+ if report.critical_failures:
614
+ raise DataQualityError(f"Critical quality issues: {report.failures}")
615
+
616
+ db.bulk_insert("users", data)
617
+
618
+ # 入库后检查
619
+ db_count = db.execute("SELECT COUNT(*) FROM users WHERE synced_at = NOW()::date").scalar()
620
+ if abs(db_count - len(data)) > 0:
621
+ raise DataQualityError(f"Row count mismatch: API={len(data)}, DB={db_count}")
622
+
623
+ logger.info("User data synced", row_count=len(data), quality_score=report.score)
624
+ ```
625
+
626
+ ### 检测方法
627
+ - 数据管道无质量检查步骤。
628
+ - 下游消费者频繁报告数据异常。
629
+ - 无数据完整性 / 唯一性 / 时效性监控。
630
+ - 管道 "成功" 但数据为空或严重异常。
631
+
632
+ ### 修复步骤
633
+ 1. 为每个数据管道定义质量检查规则(完整性、唯一性、格式、范围、时效性)。
634
+ 2. 入库前执行质量检查,关键规则失败时阻断写入。
635
+ 3. 入库后验证行数一致性。
636
+ 4. 使用 Great Expectations / dbt test / Soda 等工具自动化。
637
+ 5. 质量检查结果纳入监控仪表板。
638
+
639
+ ### Agent Checklist
640
+ - [ ] 数据管道有入库前质量检查
641
+ - [ ] 关键质量规则失败时阻断写入
642
+ - [ ] 有行数一致性验证
643
+ - [ ] 有数据时效性监控
644
+ - [ ] 质量检查结果有仪表板
645
+
646
+ ---
647
+
648
+ ## 全局 Agent Checklist
649
+
650
+ | 检查项 | 阈值 | 工具 |
651
+ |--------|------|------|
652
+ | 高频查询有索引 | 100% | `EXPLAIN ANALYZE` |
653
+ | 冗余索引 | 0 个 | `pg_stat_user_indexes` |
654
+ | 缓存有 TTL | 100% | Code Review |
655
+ | 业务表有审计字段 | 100% | Schema 审查 |
656
+ | 备份恢复验证 | 每周 1 次 | 备份系统 |
657
+ | ETL 幂等 | 100% | 重复运行测试 |
658
+ | 数据质量检查 | 每个管道 | 管道配置审查 |