@umacloud/knowledge 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. package/00-governance/governance-capabilities.md +557 -0
  2. package/00-governance/knowledge-map.md +39 -0
  3. package/00-governance/maintenance-policy.md +76 -0
  4. package/00-governance/review-checklist.md +81 -0
  5. package/README.md +13 -0
  6. package/ai/01-standards/agent-development-complete.md +691 -0
  7. package/ai/01-standards/llm-application-complete.md +488 -0
  8. package/ai/01-standards/mlops-complete.md +798 -0
  9. package/ai/01-standards/prompt-engineering-complete.md +646 -0
  10. package/ai/01-standards/rag-architecture-complete.md +649 -0
  11. package/ai/02-playbooks/llm-evaluation-playbook.md +847 -0
  12. package/ai/03-checklists/ai-project-checklist.md +215 -0
  13. package/ai/04-antipatterns/ai-antipatterns.md +661 -0
  14. package/ai/05-cases/case-rag-production.md +147 -0
  15. package/ai/06-glossary/ai-glossary.md +162 -0
  16. package/ai/agent-evaluation-benchmark.md +53 -0
  17. package/ai/ai-agent-memory-context-management.md +41 -0
  18. package/ai/ai-cost-capacity-optimization-playbook.md +42 -0
  19. package/ai/ai-data-security-and-compliance-playbook.md +37 -0
  20. package/ai/ai-domain-index-and-checklist.md +40 -0
  21. package/ai/ai-governance-maturity-model.md +50 -0
  22. package/ai/ai-model-selection-and-routing-strategy.md +47 -0
  23. package/ai/ai-observability-and-oncall-runbook.md +52 -0
  24. package/ai/ai-rag-engineering-playbook.md +42 -0
  25. package/ai/ai-red-team-and-safety-evaluation.md +42 -0
  26. package/ai/ai-release-readiness-and-rollback-gate.md +42 -0
  27. package/ai/llm-agent-engineering-deep-dive.md +57 -0
  28. package/ai/prompt-and-tool-guardrails.md +52 -0
  29. package/api/01-standards/enterprise-api-standards.md +198 -0
  30. package/api/01-standards/rest-api-design-guide.md +63 -0
  31. package/api/02-playbooks/api-pagination-playbook.md +93 -0
  32. package/api/02-playbooks/graphql-production-playbook.md +176 -0
  33. package/api/03-checklists/api-review-checklist.md +55 -0
  34. package/api/04-antipatterns/api-antipatterns.md +112 -0
  35. package/architecture/01-standards/api-gateway-patterns.md +496 -0
  36. package/architecture/01-standards/cloud-native-patterns.md +644 -0
  37. package/architecture/01-standards/distributed-systems-patterns.md +591 -0
  38. package/architecture/01-standards/event-driven-architecture.md +595 -0
  39. package/architecture/01-standards/microservices-patterns-complete.md +968 -0
  40. package/architecture/01-standards/microservices-patterns.md +495 -0
  41. package/architecture/01-standards/system-design-interview.md +664 -0
  42. package/architecture/02-playbooks/microservices-patterns-playbook.md +137 -0
  43. package/architecture/02-playbooks/migration-playbook.md +780 -0
  44. package/architecture/02-playbooks/system-design-playbook.md +779 -0
  45. package/architecture/03-checklists/architecture-decision-checklist.md +297 -0
  46. package/architecture/04-antipatterns/architecture-antipatterns.md +417 -0
  47. package/architecture/05-cases/case-netflix-microservices.md +413 -0
  48. package/architecture/06-glossary/architecture-glossary.md +164 -0
  49. package/architecture/adr-template-and-examples.md +38 -0
  50. package/architecture/api-gateway-deep-dive.md +1291 -0
  51. package/architecture/configuration-management.md +1162 -0
  52. package/architecture/distributed-transactions.md +1220 -0
  53. package/architecture/microservices-complete.md +735 -0
  54. package/architecture/resilience-and-disaster-patterns.md +37 -0
  55. package/architecture/service-governance.md +1198 -0
  56. package/architecture/system-architecture-deep-dive.md +37 -0
  57. package/backend/01-standards/analytics-and-growth.md +65 -0
  58. package/backend/01-standards/api-and-error-conventions.md +120 -0
  59. package/backend/01-standards/application-layering-and-packaging.md +160 -0
  60. package/backend/01-standards/auth-implementation.md +104 -0
  61. package/backend/01-standards/backend-framework-idioms.md +74 -0
  62. package/backend/01-standards/background-jobs-and-async.md +66 -0
  63. package/backend/01-standards/caching-strategies-complete.md +390 -0
  64. package/backend/01-standards/config-and-observability.md +77 -0
  65. package/backend/01-standards/data-modeling-and-persistence.md +94 -0
  66. package/backend/01-standards/django-complete.md +1765 -0
  67. package/backend/01-standards/email-and-notifications.md +64 -0
  68. package/backend/01-standards/fastapi-complete.md +925 -0
  69. package/backend/01-standards/file-upload-and-storage.md +66 -0
  70. package/backend/01-standards/graphql-api-complete.md +416 -0
  71. package/backend/01-standards/llm-application-standard.md +78 -0
  72. package/backend/01-standards/message-queue-patterns.md +379 -0
  73. package/backend/01-standards/microservices-and-distributed.md +78 -0
  74. package/backend/01-standards/nestjs-complete.md +2167 -0
  75. package/backend/01-standards/payment-integration.md +80 -0
  76. package/backend/01-standards/rate-limiting-complete.md +451 -0
  77. package/backend/01-standards/realtime-and-websocket.md +65 -0
  78. package/backend/01-standards/search-and-filtering.md +64 -0
  79. package/backend/01-standards/spring-boot-complete.md +445 -0
  80. package/backend/02-playbooks/api-design-playbook.md +718 -0
  81. package/backend/02-playbooks/email-send-playbook.md +130 -0
  82. package/backend/02-playbooks/file-upload-s3-playbook.md +153 -0
  83. package/backend/02-playbooks/typescript-enterprise-playbook.md +133 -0
  84. package/backend/02-playbooks/websocket-realtime-playbook.md +154 -0
  85. package/backend/03-checklists/api-launch-checklist.md +189 -0
  86. package/backend/04-antipatterns/backend-antipatterns.md +1051 -0
  87. package/blockchain/01-standards/blockchain-basics.md +557 -0
  88. package/blockchain/01-standards/smart-contract-development.md +1315 -0
  89. package/cicd/01-standards/deployment-and-delivery-standard.md +96 -0
  90. package/cicd/01-standards/github-actions-complete.md +473 -0
  91. package/cicd/01-standards/release-and-store-submission.md +75 -0
  92. package/cicd/02-playbooks/cicd-pipeline-playbook.md +144 -0
  93. package/cicd/02-playbooks/release-management-playbook.md +605 -0
  94. package/cicd/03-checklists/pipeline-security-checklist.md +168 -0
  95. package/cicd/04-antipatterns/cicd-antipatterns.md +589 -0
  96. package/cicd/05-cases/case-deployment-automation.md +221 -0
  97. package/cicd/05-cases/case-gitops-transformation.md +212 -0
  98. package/cicd/06-glossary/cicd-glossary.md +114 -0
  99. package/cicd/cicd-blueprint-deep-dive.md +38 -0
  100. package/cicd/release-readiness-gate.md +37 -0
  101. package/cloud-native/01-standards/container-security.md +741 -0
  102. package/cloud-native/01-standards/kubernetes-complete.md +812 -0
  103. package/cloud-native/02-playbooks/api-gateway-playbook.md +155 -0
  104. package/cloud-native/02-playbooks/gitops-with-argocd.md +760 -0
  105. package/cloud-native/02-playbooks/k8s-troubleshooting-playbook.md +1942 -0
  106. package/cloud-native/02-playbooks/message-queue-playbook.md +129 -0
  107. package/cloud-native/02-playbooks/multicloud-governance.md +726 -0
  108. package/cloud-native/02-playbooks/serverless-patterns.md +788 -0
  109. package/cloud-native/02-playbooks/service-mesh-playbook.md +612 -0
  110. package/cloud-native/02-playbooks/terraform-iac-playbook.md +143 -0
  111. package/cloud-native/03-checklists/container-security-checklist.md +431 -0
  112. package/cloud-native/03-checklists/k8s-production-readiness-checklist.md +460 -0
  113. package/cloud-native/04-antipatterns/container-antipatterns.md +660 -0
  114. package/cloud-native/04-antipatterns/k8s-antipatterns.md +743 -0
  115. package/cloud-native/05-cases/case-k8s-migration.md +478 -0
  116. package/cloud-native/05-cases/case-k8s-scaling.md +642 -0
  117. package/cloud-native/05-cases/case-k8s-security-incident.md +397 -0
  118. package/cloud-native/06-glossary/cloud-native-glossary.md +337 -0
  119. package/cross-platform/01-standards/cross-platform-frameworks.md +83 -0
  120. package/cross-platform/01-standards/platform-selection-and-architecture.md +77 -0
  121. package/data/01-standards/elasticsearch-complete.md +2098 -0
  122. package/data/01-standards/postgresql-complete.md +1613 -0
  123. package/data/01-standards/redis-complete.md +1527 -0
  124. package/data/02-playbooks/database-optimization-playbook.md +403 -0
  125. package/data/02-playbooks/elasticsearch-production-playbook.md +132 -0
  126. package/data/03-checklists/database-launch-checklist.md +187 -0
  127. package/data/04-antipatterns/database-antipatterns.md +873 -0
  128. package/data/05-cases/case-database-migration.md +310 -0
  129. package/data/06-glossary/database-glossary.md +440 -0
  130. package/data/data-governance-and-modeling-deep-dive.md +39 -0
  131. package/data-engineering/01-standards/airflow-complete.md +523 -0
  132. package/data-engineering/01-standards/kafka-complete.md +1521 -0
  133. package/data-engineering/02-playbooks/spark-etl-playbook.md +496 -0
  134. package/data-engineering/03-checklists/pipeline-launch-checklist.md +194 -0
  135. package/data-engineering/04-antipatterns/data-pipeline-antipatterns.md +684 -0
  136. package/data-engineering/05-cases/case-real-time-pipeline.md +355 -0
  137. package/data-engineering/06-glossary/data-engineering-glossary.md +429 -0
  138. package/database/01-standards/database-schema-standards.md +147 -0
  139. package/database/02-playbooks/postgresql-optimization-quick.md +52 -0
  140. package/database/02-playbooks/postgresql-performance-optimization.md +58 -0
  141. package/database/02-playbooks/postgresql-production-playbook.md +146 -0
  142. package/database/02-playbooks/redis-caching-playbook.md +117 -0
  143. package/database/03-checklists/database-review-checklist.md +50 -0
  144. package/database/04-antipatterns/database-antipatterns.md +112 -0
  145. package/design/01-standards/ui-design-system-complete.md +423 -0
  146. package/design/02-playbooks/design-handoff-playbook.md +254 -0
  147. package/design/02-playbooks/design-review-playbook.md +388 -0
  148. package/design/03-checklists/design-review-checklist.md +246 -0
  149. package/design/04-antipatterns/design-antipatterns.md +378 -0
  150. package/design/05-cases/case-design-system-adoption.md +328 -0
  151. package/design/06-glossary/design-glossary.md +329 -0
  152. package/design/ui-full-lifecycle-cross-platform-playbook.md +571 -0
  153. package/design/ux-system-deep-dive.md +38 -0
  154. package/design-systems/00-craft-rules.md +71 -0
  155. package/design-systems/aesthetic-families.md +43 -0
  156. package/design-systems/anti-ai-slop.md +162 -0
  157. package/design-systems/bold-geometric.md +120 -0
  158. package/design-systems/brutalist-bold.md +103 -0
  159. package/design-systems/editorial-clean.md +109 -0
  160. package/design-systems/glass-aurora.md +108 -0
  161. package/design-systems/modern-minimal.md +145 -0
  162. package/design-systems/premium-luxury.md +106 -0
  163. package/design-systems/product-type-design-map.md +48 -0
  164. package/design-systems/soft-warm.md +123 -0
  165. package/design-systems/tech-utility.md +113 -0
  166. package/desktop/01-standards/desktop-app-standard.md +72 -0
  167. package/desktop/01-standards/desktop-design.md +71 -0
  168. package/development/00-governance/document-template.md +41 -0
  169. package/development/01-standards/api-versioning-strategies.md +432 -0
  170. package/development/01-standards/authentication-patterns-complete.md +479 -0
  171. package/development/01-standards/css-architecture-complete.md +550 -0
  172. package/development/01-standards/database-migration-strategies.md +484 -0
  173. package/development/01-standards/elasticsearch-complete.md +347 -0
  174. package/development/01-standards/git-complete.md +371 -0
  175. package/development/01-standards/golang-complete.md +1565 -0
  176. package/development/01-standards/graphql-complete.md +298 -0
  177. package/development/01-standards/javascript-bundlers-complete.md +469 -0
  178. package/development/01-standards/javascript-typescript-complete.md +528 -0
  179. package/development/01-standards/jest-complete.md +275 -0
  180. package/development/01-standards/linux-complete.md +234 -0
  181. package/development/01-standards/logging-observability-complete.md +526 -0
  182. package/development/01-standards/microservices-communication.md +502 -0
  183. package/development/01-standards/mongodb-complete.md +406 -0
  184. package/development/01-standards/oauth2-complete.md +285 -0
  185. package/development/01-standards/performance-optimization-complete.md +289 -0
  186. package/development/01-standards/playwright-complete.md +247 -0
  187. package/development/01-standards/postgresql-complete.md +456 -0
  188. package/development/01-standards/pytest-complete.md +340 -0
  189. package/development/01-standards/python-async-programming.md +902 -0
  190. package/development/01-standards/python-complete.md +956 -0
  191. package/development/01-standards/python-decorators-complete.md +799 -0
  192. package/development/01-standards/python-design-patterns.md +2854 -0
  193. package/development/01-standards/python-packaging-distribution.md +420 -0
  194. package/development/01-standards/python-testing-strategies.md +607 -0
  195. package/development/01-standards/python-web-frameworks-comparison.md +471 -0
  196. package/development/01-standards/redis-complete.md +317 -0
  197. package/development/01-standards/rest-api-complete.md +316 -0
  198. package/development/01-standards/rust-complete.md +578 -0
  199. package/development/01-standards/typescript-advanced-types.md +1513 -0
  200. package/development/01-standards/web-security-complete.md +292 -0
  201. package/development/02-playbooks/api-design-playbook.md +810 -0
  202. package/development/02-playbooks/database-migration-playbook.md +580 -0
  203. package/development/02-playbooks/debugging-playbook.md +692 -0
  204. package/development/02-playbooks/feature-delivery-playbook.md +430 -0
  205. package/development/02-playbooks/incident-hotfix-playbook.md +387 -0
  206. package/development/02-playbooks/performance-optimization-playbook.md +531 -0
  207. package/development/02-playbooks/performance-tuning-playbook.md +652 -0
  208. package/development/02-playbooks/refactor-playbook.md +403 -0
  209. package/development/02-playbooks/release-playbook.md +469 -0
  210. package/development/03-checklists/architecture-review-checklist.md +168 -0
  211. package/development/03-checklists/data-migration-checklist.md +157 -0
  212. package/development/03-checklists/oncall-handover-checklist.md +173 -0
  213. package/development/03-checklists/pr-checklist.md +158 -0
  214. package/development/03-checklists/production-readiness-checklist.md +190 -0
  215. package/development/03-checklists/release-readiness-checklist.md +154 -0
  216. package/development/03-checklists/security-review-checklist.md +182 -0
  217. package/development/04-antipatterns/api-antipatterns.md +657 -0
  218. package/development/04-antipatterns/architecture-antipatterns.md +686 -0
  219. package/development/04-antipatterns/backend-antipatterns.md +648 -0
  220. package/development/04-antipatterns/cicd-antipatterns.md +540 -0
  221. package/development/04-antipatterns/code-smell-antipatterns.md +571 -0
  222. package/development/04-antipatterns/data-antipatterns.md +658 -0
  223. package/development/04-antipatterns/database-antipatterns.md +578 -0
  224. package/development/04-antipatterns/frontend-antipatterns.md +635 -0
  225. package/development/04-antipatterns/reliability-antipatterns.md +700 -0
  226. package/development/04-antipatterns/security-antipatterns.md +747 -0
  227. package/development/05-cases/case-api-version-migration.md +428 -0
  228. package/development/05-cases/case-authorization-hardening.md +383 -0
  229. package/development/05-cases/case-bluegreen-rollback.md +466 -0
  230. package/development/05-cases/case-cache-snowball-protection.md +485 -0
  231. package/development/05-cases/case-ci-cd-pipeline.md +544 -0
  232. package/development/05-cases/case-database-scaling.md +500 -0
  233. package/development/05-cases/case-db-hotspot-optimization.md +487 -0
  234. package/development/05-cases/case-incident-mttr-reduction.md +563 -0
  235. package/development/05-cases/case-microservice-migration.md +375 -0
  236. package/development/05-cases/case-performance-optimization.md +406 -0
  237. package/development/05-cases/case-security-incident-response.md +345 -0
  238. package/development/06-glossary/full-stack-glossary.md +166 -0
  239. package/development/09-maturity/quarterly-audit-template.md +35 -0
  240. package/development/11-ui-excellence/ui-aesthetic-system.md +41 -0
  241. package/development/11-ui-excellence/ui-engineering-excellence.md +435 -0
  242. package/development/12-scenarios/development-scenarios-guide.md +565 -0
  243. package/development/13-implementation-assets/implementation-toolkit.md +282 -0
  244. package/development/13-implementation-assets/knowledge-gates-execution.md +43 -0
  245. package/development/14-full-lifecycle/software-lifecycle-gates.md +511 -0
  246. package/development/15-lifecycle-templates/project-templates-collection.md +791 -0
  247. package/development/api-contract-and-versioning-guide.md +36 -0
  248. package/development/api-governance-complete.md +43 -0
  249. package/development/backend-engineering-complete.md +43 -0
  250. package/development/code-review-quality-complete.md +43 -0
  251. package/development/concurrency-reliability-complete.md +43 -0
  252. package/development/database-engineering-complete.md +43 -0
  253. package/development/engineering-effectiveness-complete.md +43 -0
  254. package/development/engineering-standards-deep-dive.md +38 -0
  255. package/development/frontend-engineering-complete.md +43 -0
  256. package/development/performance-capacity-complete.md +43 -0
  257. package/development/refactor-migration-complete.md +42 -0
  258. package/development/refactoring-and-techdebt-playbook.md +37 -0
  259. package/development/security-in-development-complete.md +43 -0
  260. package/devops/01-standards/cicd-pipeline-complete.md +262 -0
  261. package/devops/01-standards/docker-complete.md +1490 -0
  262. package/devops/01-standards/github-actions-complete.md +337 -0
  263. package/devops/01-standards/kubernetes-complete.md +638 -0
  264. package/devops/01-standards/terraform-complete.md +2117 -0
  265. package/devops/02-playbooks/docker-compose-playbook.md +233 -0
  266. package/devops/02-playbooks/docker-k8s-production-playbook.md +186 -0
  267. package/devops/02-playbooks/docker-production-playbook.md +952 -0
  268. package/edge-iot/01-standards/edge-iot-complete.md +473 -0
  269. package/experts/architect/api-design.md +178 -0
  270. package/experts/architect/methodology.md +124 -0
  271. package/experts/architect/security.md +75 -0
  272. package/experts/backend-lead/methodology.md +216 -0
  273. package/experts/devops/methodology.md +160 -0
  274. package/experts/frontend-lead/methodology.md +178 -0
  275. package/experts/product-manager/industry/ecommerce.md +43 -0
  276. package/experts/product-manager/industry/saas.md +40 -0
  277. package/experts/product-manager/methodology.md +97 -0
  278. package/experts/qa-lead/methodology.md +123 -0
  279. package/experts/qa-lead/test-strategy.md +128 -0
  280. package/experts/uiux-designer/methodology.md +125 -0
  281. package/frontend/01-standards/accessibility-complete.md +532 -0
  282. package/frontend/01-standards/accessibility-standard.md +74 -0
  283. package/frontend/01-standards/admin-dashboard-and-crud.md +72 -0
  284. package/frontend/01-standards/design-tokens-complete.md +444 -0
  285. package/frontend/01-standards/forms-and-validation.md +77 -0
  286. package/frontend/01-standards/frontend-architecture-and-layering.md +119 -0
  287. package/frontend/01-standards/i18n-and-localization.md +65 -0
  288. package/frontend/01-standards/nextjs-complete.md +451 -0
  289. package/frontend/01-standards/react-complete.md +713 -0
  290. package/frontend/01-standards/react-hooks-complete-guide.md +1100 -0
  291. package/frontend/01-standards/react-hooks-complete.md +1171 -0
  292. package/frontend/01-standards/seo-and-web-vitals.md +77 -0
  293. package/frontend/01-standards/state-management-complete.md +444 -0
  294. package/frontend/01-standards/vue-complete.md +499 -0
  295. package/frontend/01-standards/vue3-complete.md +2002 -0
  296. package/frontend/01-standards/web-framework-best-practices.md +64 -0
  297. package/frontend/01-standards/web-performance-complete.md +495 -0
  298. package/frontend/02-playbooks/accessibility-a11y-playbook.md +161 -0
  299. package/frontend/02-playbooks/frontend-performance-playbook.md +707 -0
  300. package/frontend/02-playbooks/i18n-internationalization-playbook.md +120 -0
  301. package/frontend/02-playbooks/performance-optimization-playbook.md +163 -0
  302. package/frontend/02-playbooks/react-nextjs-production-playbook.md +167 -0
  303. package/frontend/02-playbooks/react-state-management-playbook.md +173 -0
  304. package/frontend/03-checklists/component-quality-checklist.md +166 -0
  305. package/frontend/03-checklists/frontend-launch-checklist.md +299 -0
  306. package/frontend/04-antipatterns/frontend-antipatterns.md +886 -0
  307. package/frontend/05-cases/case-performance-optimization.md +274 -0
  308. package/harmony/01-standards/harmonyos-arkts-standard.md +75 -0
  309. package/harmony/01-standards/harmonyos-design.md +65 -0
  310. package/high-quality-engineering-playbook.md +54 -0
  311. package/incident/01-standards/incident-response-complete.md +303 -0
  312. package/incident/02-playbooks/chaos-engineering-playbook.md +883 -0
  313. package/incident/02-playbooks/postmortem-playbook.md +398 -0
  314. package/incident/03-checklists/incident-readiness-checklist.md +181 -0
  315. package/incident/04-antipatterns/incident-antipatterns.md +490 -0
  316. package/incident/05-cases/case-cascade-failure.md +176 -0
  317. package/incident/06-glossary/incident-glossary.md +114 -0
  318. package/incident/postmortem-and-response-deep-dive.md +39 -0
  319. package/industries/ecommerce/ecommerce-complete.md +631 -0
  320. package/industries/education/education-complete.md +555 -0
  321. package/industries/fintech/fintech-complete.md +501 -0
  322. package/industries/gaming/gaming-complete.md +587 -0
  323. package/industries/healthcare/healthcare-complete.md +452 -0
  324. package/low-code/01-standards/low-code-complete.md +944 -0
  325. package/miniprogram/01-standards/ai-common-mistakes.md +61 -0
  326. package/miniprogram/01-standards/miniprogram-custom-navbar-capsule.md +77 -0
  327. package/miniprogram/01-standards/miniprogram-design.md +61 -0
  328. package/miniprogram/01-standards/miniprogram-standard.md +81 -0
  329. package/mobile/01-standards/android-material-design.md +70 -0
  330. package/mobile/01-standards/flutter-complete.md +384 -0
  331. package/mobile/01-standards/ios-design-hig.md +78 -0
  332. package/mobile/01-standards/mobile-app-standard.md +85 -0
  333. package/mobile/01-standards/react-native-complete.md +352 -0
  334. package/mobile/02-playbooks/mobile-cross-platform-playbook.md +175 -0
  335. package/mobile/02-playbooks/mobile-performance.md +473 -0
  336. package/mobile/03-checklists/mobile-release-checklist.md +234 -0
  337. package/mobile/04-antipatterns/mobile-antipatterns.md +798 -0
  338. package/mobile/05-cases/case-app-performance.md +500 -0
  339. package/mobile/05-cases/case-app-startup-optimization.md +218 -0
  340. package/mobile/06-glossary/mobile-glossary.md +484 -0
  341. package/observability/01-standards/observability-standards.md +103 -0
  342. package/observability/02-playbooks/prometheus-grafana-playbook.md +135 -0
  343. package/observability/02-playbooks/structured-logging-playbook.md +73 -0
  344. package/observability/03-checklists/observability-checklist.md +54 -0
  345. package/observability/04-antipatterns/observability-antipatterns.md +106 -0
  346. package/operations/01-standards/prometheus-monitoring-complete.md +1578 -0
  347. package/operations/02-playbooks/capacity-planning-playbook.md +620 -0
  348. package/operations/03-checklists/production-launch-checklist.md +365 -0
  349. package/operations/04-antipatterns/operations-antipatterns.md +664 -0
  350. package/operations/05-cases/case-sre-practices.md +581 -0
  351. package/operations/06-glossary/operations-glossary.md +120 -0
  352. package/operations/aiops-anomaly-detection.md +758 -0
  353. package/operations/capacity-planning.md +1061 -0
  354. package/operations/chaos-engineering.md +659 -0
  355. package/operations/incident-command-system.md +38 -0
  356. package/operations/observability-complete.md +442 -0
  357. package/operations/slo-sli-playbook.md +517 -0
  358. package/operations/sre-operations-deep-dive.md +39 -0
  359. package/package.json +8 -0
  360. package/performance/01-standards/performance-and-scalability.md +80 -0
  361. package/performance/01-standards/performance-standards.md +156 -0
  362. package/performance/02-playbooks/query-optimization-playbook.md +103 -0
  363. package/performance/03-checklists/performance-checklist.md +56 -0
  364. package/performance/04-antipatterns/performance-antipatterns.md +146 -0
  365. package/product/01-standards/product-management-complete.md +285 -0
  366. package/product/02-playbooks/feature-launch-playbook.md +207 -0
  367. package/product/02-playbooks/user-research-playbook.md +532 -0
  368. package/product/03-checklists/feature-launch-checklist.md +275 -0
  369. package/product/04-antipatterns/product-antipatterns.md +355 -0
  370. package/product/05-cases/case-mvp-to-scale.md +384 -0
  371. package/product/06-glossary/product-glossary.md +462 -0
  372. package/product/feature-prioritization-framework.md +40 -0
  373. package/product/kpi-and-metric-tree.md +37 -0
  374. package/product/product-discovery-and-prd-deep-dive.md +41 -0
  375. package/quantum/01-standards/quantum-complete.md +1186 -0
  376. package/security/01-standards/api-security-complete.md +511 -0
  377. package/security/01-standards/container-runtime-security.md +574 -0
  378. package/security/01-standards/data-protection-gdpr.md +543 -0
  379. package/security/01-standards/owasp-top10-complete.md +1890 -0
  380. package/security/01-standards/secure-coding-baseline.md +90 -0
  381. package/security/01-standards/supply-chain-security.md +441 -0
  382. package/security/01-standards/web-security-checklist.md +108 -0
  383. package/security/01-standards/zero-trust-architecture.md +521 -0
  384. package/security/02-playbooks/auth-sso-playbook.md +166 -0
  385. package/security/02-playbooks/incident-response-security-playbook.md +588 -0
  386. package/security/02-playbooks/owasp-api-security-playbook.md +129 -0
  387. package/security/02-playbooks/payment-integration-playbook.md +119 -0
  388. package/security/02-playbooks/penetration-testing-playbook.md +517 -0
  389. package/security/03-checklists/security-audit-checklist.md +356 -0
  390. package/security/04-antipatterns/security-coding-antipatterns.md +580 -0
  391. package/security/05-cases/case-log4shell-incident.md +537 -0
  392. package/security/05-cases/case-major-breaches.md +468 -0
  393. package/security/06-glossary/security-glossary.md +212 -0
  394. package/security/compliance-automation.md +993 -0
  395. package/security/container-security.md +680 -0
  396. package/security/devsecops-complete.md +426 -0
  397. package/security/sast-dast-sca.md +775 -0
  398. package/security/secrets-management.md +594 -0
  399. package/security/security-architecture-deep-dive.md +37 -0
  400. package/security/threat-modeling-stride-playbook.md +40 -0
  401. package/seed-templates/auth-system.md +59 -0
  402. package/seed-templates/blog-content.md +94 -0
  403. package/seed-templates/dashboard.md +89 -0
  404. package/seed-templates/docs-site.md +73 -0
  405. package/seed-templates/e-commerce.md +50 -0
  406. package/seed-templates/saas-landing.md +92 -0
  407. package/seed-templates/settings-page.md +51 -0
  408. package/testing/01-standards/test-strategy-and-layering.md +83 -0
  409. package/testing/01-standards/testing-strategy-complete.md +422 -0
  410. package/testing/01-standards/unit-testing-best-practices.md +118 -0
  411. package/testing/02-playbooks/e2e-testing-playbook.md +988 -0
  412. package/testing/02-playbooks/testing-strategy-playbook.md +126 -0
  413. package/testing/03-checklists/test-strategy-checklist.md +208 -0
  414. package/testing/04-antipatterns/testing-antipatterns.md +718 -0
  415. package/testing/05-cases/case-testing-transformation.md +300 -0
  416. package/testing/06-glossary/testing-glossary.md +110 -0
  417. package/testing/risk-based-test-matrix.md +36 -0
  418. package/testing/testing-strategy-deep-dive.md +37 -0
@@ -0,0 +1,798 @@
1
+ ---
2
+ id: mlops-complete
3
+ title: MLOps 完整指南
4
+ domain: ai
5
+ category: 01-standards
6
+ difficulty: intermediate
7
+ tags: [agent, ai, checklist, complete, mlops, 实验跟踪, 数据漂移检测, 概述]
8
+ quality_score: 70
9
+ last_updated: 2026-06-15
10
+ ---
11
+ # MLOps 完整指南
12
+
13
+ ## 概述
14
+
15
+ MLOps (Machine Learning Operations) 是将机器学习模型从实验到生产的工程化实践体系。本指南覆盖实验跟踪、模型注册、部署策略、生产监控、数据漂移检测和 A/B 测试,适用于传统 ML 和 LLM 应用的全生命周期管理。
16
+
17
+ ### MLOps 成熟度模型
18
+
19
+ ```
20
+ MLOps 成熟度等级:
21
+ ├── Level 0 — 手工作坊
22
+ │ 手动训练、手动部署、无版本管理
23
+ ├── Level 1 — 管道自动化
24
+ │ 自动训练管道、实验跟踪、模型注册
25
+ ├── Level 2 — CI/CD for ML
26
+ │ 自动测试、自动部署、特征存储
27
+ ├── Level 3 — 全自动化
28
+ │ 自动重训练、漂移检测、自动回滚
29
+ └── Level 4 — 自适应
30
+ 自动特征工程、模型选择、超参优化
31
+ ```
32
+
33
+ ---
34
+
35
+ ## 1. 实验跟踪
36
+
37
+ ### 1.1 实验跟踪架构
38
+
39
+ ```
40
+ 实验跟踪要素:
41
+ ├── 代码版本 — Git commit hash
42
+ ├── 数据版本 — 数据集 hash 或 DVC 版本
43
+ ├── 超参数 — 所有训练参数
44
+ ├── 指标 — 训练和验证指标曲线
45
+ ├── 模型产物 — 权重文件、配置
46
+ ├── 环境 — Python 版本、依赖包版本、GPU 型号
47
+ └── 元数据 — 实验者、时间、备注
48
+ ```
49
+
50
+ ### 1.2 MLflow 实验跟踪
51
+
52
+ ```python
53
+ import mlflow
54
+ from mlflow.tracking import MlflowClient
55
+
56
+ class ExperimentTracker:
57
+ """MLflow 实验跟踪封装。"""
58
+
59
+ def __init__(self, experiment_name: str,
60
+ tracking_uri: str = "http://mlflow:5000"):
61
+ mlflow.set_tracking_uri(tracking_uri)
62
+ mlflow.set_experiment(experiment_name)
63
+ self.client = MlflowClient(tracking_uri)
64
+
65
+ def start_run(self, run_name: str, params: dict,
66
+ tags: dict | None = None) -> str:
67
+ """开始一次实验记录。"""
68
+ run = mlflow.start_run(run_name=run_name, tags=tags or {})
69
+ mlflow.log_params(params)
70
+ # 记录代码版本
71
+ import subprocess
72
+ git_hash = subprocess.check_output(
73
+ ["git", "rev-parse", "HEAD"]
74
+ ).decode().strip()
75
+ mlflow.set_tag("git_commit", git_hash)
76
+ return run.info.run_id
77
+
78
+ def log_metrics(self, metrics: dict, step: int | None = None):
79
+ """记录指标。"""
80
+ for key, value in metrics.items():
81
+ mlflow.log_metric(key, value, step=step)
82
+
83
+ def log_model(self, model, model_name: str, input_example=None):
84
+ """记录模型产物。"""
85
+ mlflow.sklearn.log_model(
86
+ model, model_name,
87
+ input_example=input_example,
88
+ registered_model_name=model_name,
89
+ )
90
+
91
+ def end_run(self):
92
+ mlflow.end_run()
93
+
94
+ def compare_runs(self, metric: str, top_k: int = 5) -> list[dict]:
95
+ """比较实验结果,返回 Top-K 最佳运行。"""
96
+ experiment = mlflow.get_experiment_by_name(
97
+ mlflow.get_experiment(
98
+ mlflow.active_run().info.experiment_id
99
+ ).name
100
+ )
101
+ runs = self.client.search_runs(
102
+ experiment_ids=[experiment.experiment_id],
103
+ order_by=[f"metrics.{metric} DESC"],
104
+ max_results=top_k,
105
+ )
106
+ return [
107
+ {
108
+ "run_id": r.info.run_id,
109
+ "run_name": r.info.run_name,
110
+ "params": r.data.params,
111
+ "metrics": r.data.metrics,
112
+ }
113
+ for r in runs
114
+ ]
115
+ ```
116
+
117
+ ### 1.3 LLM 实验跟踪 (特殊需求)
118
+
119
+ ```python
120
+ class LLMExperimentTracker:
121
+ """LLM 应用专用实验跟踪。"""
122
+
123
+ def __init__(self, storage):
124
+ self.storage = storage
125
+
126
+ def log_prompt_experiment(self, experiment: dict):
127
+ """记录 Prompt 实验。"""
128
+ record = {
129
+ "experiment_id": str(uuid.uuid4()),
130
+ "timestamp": datetime.utcnow().isoformat(),
131
+ "prompt_version": experiment["prompt_version"],
132
+ "prompt_template": experiment["template"],
133
+ "model": experiment["model"],
134
+ "temperature": experiment["temperature"],
135
+ "test_cases": experiment["test_cases"],
136
+ "results": experiment["results"],
137
+ "metrics": {
138
+ "accuracy": experiment.get("accuracy"),
139
+ "latency_p50_ms": experiment.get("latency_p50"),
140
+ "latency_p95_ms": experiment.get("latency_p95"),
141
+ "avg_input_tokens": experiment.get("avg_input_tokens"),
142
+ "avg_output_tokens": experiment.get("avg_output_tokens"),
143
+ "cost_per_request": experiment.get("cost_per_request"),
144
+ },
145
+ "evaluator": experiment.get("evaluator", "auto"),
146
+ }
147
+ self.storage.insert("llm_experiments", record)
148
+ return record["experiment_id"]
149
+ ```
150
+
151
+ ---
152
+
153
+ ## 2. 模型注册
154
+
155
+ ### 2.1 模型注册中心
156
+
157
+ ```python
158
+ class ModelRegistry:
159
+ """模型注册中心: 版本管理、阶段转换、元数据管理。"""
160
+
161
+ STAGES = ["development", "staging", "production", "archived"]
162
+
163
+ def __init__(self, mlflow_client: MlflowClient):
164
+ self.client = mlflow_client
165
+
166
+ def register(self, model_name: str, run_id: str,
167
+ description: str = "") -> str:
168
+ """注册新模型版本。"""
169
+ result = self.client.create_model_version(
170
+ name=model_name,
171
+ source=f"runs:/{run_id}/model",
172
+ run_id=run_id,
173
+ description=description,
174
+ )
175
+ return result.version
176
+
177
+ def promote(self, model_name: str, version: str,
178
+ target_stage: str, approval: dict | None = None):
179
+ """推进模型到下一阶段。"""
180
+ if target_stage not in self.STAGES:
181
+ raise ValueError(f"无效阶段: {target_stage}")
182
+
183
+ # 生产阶段需要审批
184
+ if target_stage == "production":
185
+ if not approval or not approval.get("approved_by"):
186
+ raise PermissionError("推进到生产阶段需要审批")
187
+
188
+ self.client.transition_model_version_stage(
189
+ name=model_name,
190
+ version=version,
191
+ stage=target_stage,
192
+ )
193
+
194
+ def get_production_model(self, model_name: str) -> dict | None:
195
+ """获取当前生产版本。"""
196
+ versions = self.client.get_latest_versions(
197
+ model_name, stages=["production"]
198
+ )
199
+ if versions:
200
+ v = versions[0]
201
+ return {
202
+ "version": v.version,
203
+ "run_id": v.run_id,
204
+ "source": v.source,
205
+ "created_at": v.creation_timestamp,
206
+ }
207
+ return None
208
+ ```
209
+
210
+ ### 2.2 模型卡片 (Model Card)
211
+
212
+ ```yaml
213
+ # model_card.yaml — 每个注册模型必须附带
214
+ name: "fraud-detection-v3"
215
+ version: "3.2.1"
216
+ description: "基于 XGBoost 的交易欺诈检测模型"
217
+
218
+ model_details:
219
+ type: "binary_classification"
220
+ framework: "xgboost"
221
+ training_data: "transactions_2024_q1_q3"
222
+ features: 47
223
+ training_samples: 2_800_000
224
+
225
+ performance:
226
+ metrics:
227
+ - name: "AUC-ROC"
228
+ value: 0.9834
229
+ dataset: "test_2024_q4"
230
+ - name: "Precision@0.95_Recall"
231
+ value: 0.89
232
+ dataset: "test_2024_q4"
233
+ - name: "F1"
234
+ value: 0.92
235
+ dataset: "test_2024_q4"
236
+
237
+ fairness:
238
+ evaluated_groups: ["gender", "age_bucket", "region"]
239
+ max_disparity: 0.03
240
+
241
+ limitations:
242
+ - "对新型欺诈模式 (训练数据中未出现) 检出率可能较低"
243
+ - "高峰时段延迟可能超过 SLA (50ms)"
244
+
245
+ ethical_considerations:
246
+ - "模型决策影响用户交易,需要人工复核通道"
247
+ - "年龄和地区不应成为主要判别因子"
248
+ ```
249
+
250
+ ---
251
+
252
+ ## 3. 部署策略
253
+
254
+ ### 3.1 部署模式对比
255
+
256
+ | 模式 | 延迟 | 吞吐量 | 适用场景 |
257
+ |------|------|--------|---------|
258
+ | REST API | 10-100ms | 中 | 在线推理,低并发 |
259
+ | gRPC | 5-50ms | 高 | 在线推理,高并发 |
260
+ | Batch | 分钟级 | 极高 | 离线批量处理 |
261
+ | Streaming | 首 Token < 200ms | 中 | LLM 生成式输出 |
262
+ | Edge | 1-10ms | 低 | 端侧推理,离线场景 |
263
+
264
+ ### 3.2 模型服务部署
265
+
266
+ ```python
267
+ # FastAPI 模型服务
268
+ from fastapi import FastAPI, HTTPException
269
+ from pydantic import BaseModel
270
+ import time
271
+
272
+ app = FastAPI(title="Model Serving API")
273
+
274
+ class PredictionRequest(BaseModel):
275
+ features: dict
276
+ model_version: str | None = None
277
+
278
+ class PredictionResponse(BaseModel):
279
+ prediction: float
280
+ confidence: float
281
+ model_version: str
282
+ latency_ms: float
283
+
284
+ class ModelServer:
285
+ """模型服务器: 支持多版本、金丝雀和回滚。"""
286
+
287
+ def __init__(self, registry: ModelRegistry):
288
+ self.registry = registry
289
+ self.loaded_models: dict[str, object] = {}
290
+ self.active_version: str | None = None
291
+ self.canary_version: str | None = None
292
+ self.canary_ratio: float = 0.0
293
+
294
+ def load_model(self, model_name: str, version: str):
295
+ """加载模型到内存。"""
296
+ import mlflow.pyfunc
297
+ model = mlflow.pyfunc.load_model(
298
+ model_uri=f"models:/{model_name}/{version}"
299
+ )
300
+ self.loaded_models[version] = model
301
+
302
+ def predict(self, request: PredictionRequest) -> PredictionResponse:
303
+ version = self._select_version(request.model_version)
304
+ model = self.loaded_models.get(version)
305
+ if not model:
306
+ raise HTTPException(404, f"模型版本 {version} 未加载")
307
+
308
+ start = time.monotonic()
309
+ result = model.predict(request.features)
310
+ latency = (time.monotonic() - start) * 1000
311
+
312
+ return PredictionResponse(
313
+ prediction=float(result["prediction"]),
314
+ confidence=float(result["confidence"]),
315
+ model_version=version,
316
+ latency_ms=round(latency, 2),
317
+ )
318
+
319
+ def _select_version(self, requested: str | None) -> str:
320
+ """选择模型版本: 支持金丝雀流量分配。"""
321
+ if requested:
322
+ return requested
323
+ if self.canary_version and random.random() < self.canary_ratio:
324
+ return self.canary_version
325
+ return self.active_version
326
+ ```
327
+
328
+ ### 3.3 蓝绿部署与金丝雀
329
+
330
+ ```yaml
331
+ # kubernetes deployment — canary strategy
332
+ apiVersion: flagger.app/v1beta1
333
+ kind: Canary
334
+ metadata:
335
+ name: model-serving
336
+ spec:
337
+ targetRef:
338
+ apiVersion: apps/v1
339
+ kind: Deployment
340
+ name: model-serving
341
+ progressDeadlineSeconds: 600
342
+ service:
343
+ port: 8080
344
+ analysis:
345
+ interval: 1m
346
+ threshold: 5
347
+ maxWeight: 50
348
+ stepWeight: 10
349
+ metrics:
350
+ - name: request-success-rate
351
+ thresholdRange:
352
+ min: 99
353
+ interval: 1m
354
+ - name: prediction-latency-p99
355
+ thresholdRange:
356
+ max: 100
357
+ interval: 1m
358
+ ```
359
+
360
+ ---
361
+
362
+ ## 4. 生产监控
363
+
364
+ ### 4.1 监控指标体系
365
+
366
+ ```
367
+ ML 监控指标:
368
+ ├── 模型性能指标
369
+ │ ├── 准确率/精确率/召回率 (实时 vs 离线)
370
+ │ ├── 预测分布 (与训练集对比)
371
+ │ └── 置信度分布
372
+ ├── 系统性能指标
373
+ │ ├── 推理延迟 (P50/P95/P99)
374
+ │ ├── 吞吐量 (QPS)
375
+ │ ├── 错误率
376
+ │ └── GPU/CPU/内存使用率
377
+ ├── 数据质量指标
378
+ │ ├── 特征缺失率
379
+ │ ├── 异常值比例
380
+ │ └── 数据新鲜度
381
+ └── 业务指标
382
+ ├── 转化率/点击率变化
383
+ ├── 用户反馈
384
+ └── 人工干预率
385
+ ```
386
+
387
+ ### 4.2 监控实现
388
+
389
+ ```python
390
+ from prometheus_client import Counter, Histogram, Gauge, Summary
391
+ import numpy as np
392
+
393
+ # Prometheus 指标定义
394
+ PREDICTION_COUNTER = Counter(
395
+ "model_predictions_total",
396
+ "模型预测总数",
397
+ ["model_name", "model_version", "result_class"],
398
+ )
399
+ PREDICTION_LATENCY = Histogram(
400
+ "model_prediction_latency_seconds",
401
+ "模型预测延迟",
402
+ ["model_name"],
403
+ buckets=[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0],
404
+ )
405
+ PREDICTION_CONFIDENCE = Summary(
406
+ "model_prediction_confidence",
407
+ "预测置信度分布",
408
+ ["model_name"],
409
+ )
410
+ DATA_DRIFT_SCORE = Gauge(
411
+ "model_data_drift_score",
412
+ "数据漂移得分",
413
+ ["model_name", "feature_name"],
414
+ )
415
+
416
+ class ModelMonitor:
417
+ """模型生产监控。"""
418
+
419
+ def __init__(self, model_name: str, reference_data):
420
+ self.model_name = model_name
421
+ self.reference_stats = self._compute_stats(reference_data)
422
+ self.prediction_buffer: list[dict] = []
423
+
424
+ def record_prediction(self, features: dict, prediction: float,
425
+ confidence: float, latency_s: float):
426
+ """记录单次预测并更新监控指标。"""
427
+ result_class = "positive" if prediction > 0.5 else "negative"
428
+ PREDICTION_COUNTER.labels(
429
+ self.model_name, "current", result_class
430
+ ).inc()
431
+ PREDICTION_LATENCY.labels(self.model_name).observe(latency_s)
432
+ PREDICTION_CONFIDENCE.labels(self.model_name).observe(confidence)
433
+
434
+ self.prediction_buffer.append({
435
+ "features": features,
436
+ "prediction": prediction,
437
+ "confidence": confidence,
438
+ })
439
+
440
+ # 每 1000 次预测检查漂移
441
+ if len(self.prediction_buffer) >= 1000:
442
+ self._check_drift()
443
+ self.prediction_buffer = []
444
+
445
+ def _check_drift(self):
446
+ """检查数据漂移 (使用 PSI 方法)。"""
447
+ for feature_name in self.reference_stats:
448
+ current_values = [
449
+ p["features"].get(feature_name)
450
+ for p in self.prediction_buffer
451
+ if p["features"].get(feature_name) is not None
452
+ ]
453
+ if not current_values:
454
+ continue
455
+ psi = self._calculate_psi(
456
+ self.reference_stats[feature_name],
457
+ np.array(current_values),
458
+ )
459
+ DATA_DRIFT_SCORE.labels(self.model_name, feature_name).set(psi)
460
+
461
+ @staticmethod
462
+ def _calculate_psi(reference: np.ndarray, current: np.ndarray,
463
+ bins: int = 10) -> float:
464
+ """计算 Population Stability Index (PSI)。"""
465
+ breakpoints = np.percentile(reference,
466
+ np.linspace(0, 100, bins + 1))
467
+ ref_counts = np.histogram(reference, bins=breakpoints)[0] / len(reference)
468
+ cur_counts = np.histogram(current, bins=breakpoints)[0] / len(current)
469
+
470
+ # 避免除零
471
+ ref_counts = np.clip(ref_counts, 1e-6, None)
472
+ cur_counts = np.clip(cur_counts, 1e-6, None)
473
+
474
+ psi = np.sum((cur_counts - ref_counts) * np.log(cur_counts / ref_counts))
475
+ return float(psi)
476
+
477
+ @staticmethod
478
+ def _compute_stats(data) -> dict:
479
+ """计算参考数据集的统计特征。"""
480
+ stats = {}
481
+ for col in data.columns:
482
+ if data[col].dtype in [np.float64, np.int64]:
483
+ stats[col] = data[col].values
484
+ return stats
485
+ ```
486
+
487
+ ---
488
+
489
+ ## 5. 数据漂移检测
490
+
491
+ ### 5.1 漂移类型与检测方法
492
+
493
+ | 漂移类型 | 描述 | 检测方法 | 告警阈值 |
494
+ |----------|------|---------|----------|
495
+ | 数据漂移 (Data Drift) | 输入特征分布变化 | PSI, KS Test | PSI > 0.2 |
496
+ | 概念漂移 (Concept Drift) | 特征与标签关系变化 | 性能指标下降 | AUC 下降 > 5% |
497
+ | 预测漂移 (Prediction Drift) | 预测分布变化 | PSI on predictions | PSI > 0.15 |
498
+ | 标签漂移 (Label Drift) | 标签分布变化 | Chi-squared test | p-value < 0.05 |
499
+
500
+ ### 5.2 自动漂移检测管道
501
+
502
+ ```python
503
+ from scipy import stats
504
+
505
+ class DriftDetector:
506
+ """自动漂移检测管道。"""
507
+
508
+ def __init__(self, reference_data, alert_callback=None):
509
+ self.reference = reference_data
510
+ self.alert = alert_callback
511
+
512
+ def detect(self, current_data) -> dict:
513
+ """运行全量漂移检测。"""
514
+ report = {"features": {}, "overall_drift": False, "alerts": []}
515
+
516
+ for feature in self.reference.columns:
517
+ if self.reference[feature].dtype in [np.float64, np.int64]:
518
+ result = self._test_numeric(feature, current_data[feature])
519
+ else:
520
+ result = self._test_categorical(feature, current_data[feature])
521
+
522
+ report["features"][feature] = result
523
+ if result["drifted"]:
524
+ report["overall_drift"] = True
525
+ report["alerts"].append(
526
+ f"特征 {feature} 漂移: {result['method']}={result['score']:.4f}"
527
+ )
528
+
529
+ if report["overall_drift"] and self.alert:
530
+ self.alert(report)
531
+ return report
532
+
533
+ def _test_numeric(self, feature: str, current) -> dict:
534
+ ref = self.reference[feature].dropna()
535
+ cur = current.dropna()
536
+
537
+ # KS Test
538
+ ks_stat, ks_pvalue = stats.ks_2samp(ref, cur)
539
+ # PSI
540
+ psi = ModelMonitor._calculate_psi(ref.values, cur.values)
541
+
542
+ return {
543
+ "method": "KS+PSI",
544
+ "ks_statistic": float(ks_stat),
545
+ "ks_pvalue": float(ks_pvalue),
546
+ "psi": float(psi),
547
+ "score": float(psi),
548
+ "drifted": psi > 0.2 or ks_pvalue < 0.01,
549
+ }
550
+
551
+ def _test_categorical(self, feature: str, current) -> dict:
552
+ ref_counts = self.reference[feature].value_counts(normalize=True)
553
+ cur_counts = current.value_counts(normalize=True)
554
+
555
+ # 对齐类别
556
+ all_categories = set(ref_counts.index) | set(cur_counts.index)
557
+ ref_freq = [ref_counts.get(c, 1e-6) for c in all_categories]
558
+ cur_freq = [cur_counts.get(c, 1e-6) for c in all_categories]
559
+
560
+ chi2, pvalue = stats.chisquare(cur_freq, ref_freq)
561
+ return {
562
+ "method": "chi-squared",
563
+ "chi2": float(chi2),
564
+ "pvalue": float(pvalue),
565
+ "score": float(chi2),
566
+ "drifted": pvalue < 0.05,
567
+ }
568
+ ```
569
+
570
+ ### 5.3 自动重训练触发
571
+
572
+ ```python
573
+ class RetrainingTrigger:
574
+ """基于漂移检测的自动重训练触发器。"""
575
+
576
+ def __init__(self, drift_detector: DriftDetector,
577
+ training_pipeline, config: dict):
578
+ self.detector = drift_detector
579
+ self.pipeline = training_pipeline
580
+ self.config = config
581
+ self.consecutive_drift_count = 0
582
+
583
+ def evaluate_and_trigger(self, current_data) -> dict:
584
+ """评估漂移并决定是否触发重训练。"""
585
+ drift_report = self.detector.detect(current_data)
586
+
587
+ if drift_report["overall_drift"]:
588
+ self.consecutive_drift_count += 1
589
+ else:
590
+ self.consecutive_drift_count = 0
591
+
592
+ action = "none"
593
+ if self.consecutive_drift_count >= self.config.get("drift_patience", 3):
594
+ action = "retrain"
595
+ self.pipeline.trigger(
596
+ reason="data_drift",
597
+ drift_report=drift_report,
598
+ data_window=self.config.get("training_window", "90d"),
599
+ )
600
+ self.consecutive_drift_count = 0
601
+
602
+ return {
603
+ "drift_detected": drift_report["overall_drift"],
604
+ "consecutive_count": self.consecutive_drift_count,
605
+ "action": action,
606
+ "report": drift_report,
607
+ }
608
+ ```
609
+
610
+ ---
611
+
612
+ ## 6. A/B 测试
613
+
614
+ ### 6.1 A/B 测试框架
615
+
616
+ ```python
617
+ class ModelABTest:
618
+ """模型 A/B 测试框架。"""
619
+
620
+ def __init__(self, control_model: str, treatment_model: str,
621
+ traffic_split: float = 0.1):
622
+ self.control = control_model
623
+ self.treatment = treatment_model
624
+ self.split = traffic_split
625
+ self.results = {"control": [], "treatment": []}
626
+
627
+ def assign_group(self, user_id: str) -> str:
628
+ """确定性分组: 同一用户始终进入同一组。"""
629
+ bucket = int(hashlib.md5(user_id.encode()).hexdigest(), 16) % 100
630
+ return "treatment" if bucket < self.split * 100 else "control"
631
+
632
+ def record_outcome(self, group: str, prediction: float,
633
+ actual: float, latency_ms: float):
634
+ """记录实验结果。"""
635
+ self.results[group].append({
636
+ "prediction": prediction,
637
+ "actual": actual,
638
+ "latency_ms": latency_ms,
639
+ "correct": (prediction > 0.5) == (actual > 0.5),
640
+ })
641
+
642
+ def analyze(self) -> dict:
643
+ """分析实验结果并判断统计显著性。"""
644
+ control = self.results["control"]
645
+ treatment = self.results["treatment"]
646
+
647
+ if len(control) < 100 or len(treatment) < 100:
648
+ return {"status": "insufficient_data",
649
+ "message": "样本量不足,继续收集数据"}
650
+
651
+ c_acc = np.mean([r["correct"] for r in control])
652
+ t_acc = np.mean([r["correct"] for r in treatment])
653
+
654
+ c_latency = np.mean([r["latency_ms"] for r in control])
655
+ t_latency = np.mean([r["latency_ms"] for r in treatment])
656
+
657
+ # 双样本 Z 检验
658
+ z_stat, p_value = self._proportion_z_test(
659
+ sum(r["correct"] for r in control), len(control),
660
+ sum(r["correct"] for r in treatment), len(treatment),
661
+ )
662
+
663
+ significant = p_value < 0.05
664
+ winner = "treatment" if t_acc > c_acc and significant else "control"
665
+
666
+ return {
667
+ "status": "complete" if significant else "not_significant",
668
+ "control_accuracy": round(c_acc, 4),
669
+ "treatment_accuracy": round(t_acc, 4),
670
+ "accuracy_lift": round((t_acc - c_acc) / c_acc * 100, 2),
671
+ "control_latency_ms": round(c_latency, 1),
672
+ "treatment_latency_ms": round(t_latency, 1),
673
+ "p_value": round(p_value, 4),
674
+ "significant": significant,
675
+ "recommendation": f"推荐 {winner}",
676
+ "sample_sizes": {
677
+ "control": len(control),
678
+ "treatment": len(treatment),
679
+ },
680
+ }
681
+
682
+ @staticmethod
683
+ def _proportion_z_test(x1: int, n1: int,
684
+ x2: int, n2: int) -> tuple[float, float]:
685
+ p1 = x1 / n1
686
+ p2 = x2 / n2
687
+ p_pool = (x1 + x2) / (n1 + n2)
688
+ se = np.sqrt(p_pool * (1 - p_pool) * (1/n1 + 1/n2))
689
+ z = (p2 - p1) / se if se > 0 else 0
690
+ p_value = 2 * (1 - stats.norm.cdf(abs(z)))
691
+ return float(z), float(p_value)
692
+ ```
693
+
694
+ ### 6.2 LLM A/B 测试 (特殊考虑)
695
+
696
+ ```python
697
+ class LLMABTest:
698
+ """LLM 应用 A/B 测试: 除准确率外还关注体验指标。"""
699
+
700
+ METRICS = [
701
+ "task_completion_rate", # 任务完成率
702
+ "user_satisfaction", # 用户满意度 (thumbs up/down)
703
+ "avg_turns", # 平均对话轮次
704
+ "avg_latency_ms", # 平均延迟
705
+ "cost_per_session", # 每会话成本
706
+ "hallucination_rate", # 幻觉率
707
+ ]
708
+
709
+ def analyze_llm(self, control_sessions: list[dict],
710
+ treatment_sessions: list[dict]) -> dict:
711
+ """LLM 多维指标对比分析。"""
712
+ results = {}
713
+ for metric in self.METRICS:
714
+ c_values = [s.get(metric, 0) for s in control_sessions]
715
+ t_values = [s.get(metric, 0) for s in treatment_sessions]
716
+
717
+ t_stat, p_value = stats.ttest_ind(c_values, t_values)
718
+ results[metric] = {
719
+ "control_mean": round(np.mean(c_values), 4),
720
+ "treatment_mean": round(np.mean(t_values), 4),
721
+ "p_value": round(p_value, 4),
722
+ "significant": p_value < 0.05,
723
+ }
724
+ return results
725
+ ```
726
+
727
+ ---
728
+
729
+ ## 7. CI/CD for ML
730
+
731
+ ### 7.1 ML 管道定义
732
+
733
+ ```yaml
734
+ # .github/workflows/ml-pipeline.yml
735
+ name: ML Pipeline
736
+ on:
737
+ push:
738
+ paths: ["models/**", "features/**", "training/**"]
739
+
740
+ jobs:
741
+ data-validation:
742
+ runs-on: ubuntu-latest
743
+ steps:
744
+ - uses: actions/checkout@v4
745
+ - name: Validate data schema
746
+ run: python scripts/validate_data.py
747
+ - name: Check data quality
748
+ run: python scripts/check_data_quality.py
749
+
750
+ training:
751
+ needs: data-validation
752
+ runs-on: [self-hosted, gpu]
753
+ steps:
754
+ - name: Train model
755
+ run: python training/train.py --config configs/prod.yaml
756
+ - name: Evaluate model
757
+ run: python training/evaluate.py
758
+ - name: Register model
759
+ run: python scripts/register_model.py
760
+ if: ${{ env.EVAL_PASSED == 'true' }}
761
+
762
+ model-testing:
763
+ needs: training
764
+ steps:
765
+ - name: Unit tests
766
+ run: pytest tests/model/
767
+ - name: Integration tests
768
+ run: pytest tests/integration/
769
+ - name: Performance benchmark
770
+ run: python scripts/benchmark.py --threshold-latency-p99 100
771
+
772
+ deploy-staging:
773
+ needs: model-testing
774
+ steps:
775
+ - name: Deploy to staging
776
+ run: ./scripts/deploy.sh staging
777
+ - name: Smoke test
778
+ run: python scripts/smoke_test.py --env staging
779
+ - name: Shadow traffic test
780
+ run: python scripts/shadow_test.py --duration 30m
781
+ ```
782
+
783
+ ---
784
+
785
+ ## Agent Checklist
786
+
787
+ - [ ] 每次训练记录代码版本、数据版本、超参数和指标
788
+ - [ ] 模型注册中心运行,生产推进需要审批
789
+ - [ ] 每个模型附带 Model Card (性能、公平性、局限性)
790
+ - [ ] 部署支持金丝雀发布和自动回滚
791
+ - [ ] 推理延迟 P95/P99 有监控和告警
792
+ - [ ] 数据漂移检测管道运行,PSI > 0.2 触发告警
793
+ - [ ] 概念漂移有性能指标监控,AUC 下降 > 5% 触发告警
794
+ - [ ] 自动重训练管道就绪,连续漂移可触发
795
+ - [ ] A/B 测试框架支持确定性分组和统计显著性检验
796
+ - [ ] ML CI/CD 包含数据校验、训练、评估、测试和部署
797
+ - [ ] LLM 特有指标 (幻觉率、成本、满意度) 纳入监控
798
+ - [ ] 模型退役有归档流程和文档记录