code-yangzz 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +102 -0
  2. package/agents/meta-artisan.md +164 -0
  3. package/agents/meta-conductor.md +482 -0
  4. package/agents/meta-genesis.md +165 -0
  5. package/agents/meta-librarian.md +213 -0
  6. package/agents/meta-prism.md +268 -0
  7. package/agents/meta-scout.md +173 -0
  8. package/agents/meta-sentinel.md +161 -0
  9. package/agents/meta-warden.md +304 -0
  10. package/bin/install.js +390 -0
  11. package/bin/lib/utils.js +72 -0
  12. package/bin/lib/watermark.js +176 -0
  13. package/config/CLAUDE.md +363 -0
  14. package/config/settings.json +120 -0
  15. package/hooks/block-dangerous-bash.mjs +36 -0
  16. package/hooks/post-console-log-warn.mjs +27 -0
  17. package/hooks/post-format.mjs +24 -0
  18. package/hooks/post-typecheck.mjs +27 -0
  19. package/hooks/pre-git-push-confirm.mjs +19 -0
  20. package/hooks/stop-completion-guard.mjs +159 -0
  21. package/hooks/stop-console-log-audit.mjs +44 -0
  22. package/hooks/subagent-context.mjs +27 -0
  23. package/hooks/user-prompt-submit.js +233 -0
  24. package/package.json +36 -0
  25. package/prompt-optimizer/prompt-optimizer-meta.md +159 -0
  26. package/skills/agent-teams/SKILL.md +215 -0
  27. package/skills/domains/ai/SKILL.md +34 -0
  28. package/skills/domains/ai/agent-dev.md +242 -0
  29. package/skills/domains/ai/llm-security.md +288 -0
  30. package/skills/domains/ai/prompt-and-eval.md +279 -0
  31. package/skills/domains/ai/rag-system.md +542 -0
  32. package/skills/domains/architecture/SKILL.md +42 -0
  33. package/skills/domains/architecture/api-design.md +225 -0
  34. package/skills/domains/architecture/caching.md +298 -0
  35. package/skills/domains/architecture/cloud-native.md +285 -0
  36. package/skills/domains/architecture/message-queue.md +328 -0
  37. package/skills/domains/architecture/security-arch.md +297 -0
  38. package/skills/domains/data-engineering/SKILL.md +207 -0
  39. package/skills/domains/development/SKILL.md +46 -0
  40. package/skills/domains/development/cpp.md +246 -0
  41. package/skills/domains/development/go.md +323 -0
  42. package/skills/domains/development/java.md +277 -0
  43. package/skills/domains/development/python.md +288 -0
  44. package/skills/domains/development/rust.md +313 -0
  45. package/skills/domains/development/shell.md +313 -0
  46. package/skills/domains/development/typescript.md +277 -0
  47. package/skills/domains/devops/SKILL.md +39 -0
  48. package/skills/domains/devops/cost-optimization.md +271 -0
  49. package/skills/domains/devops/database.md +217 -0
  50. package/skills/domains/devops/devsecops.md +198 -0
  51. package/skills/domains/devops/git-workflow.md +181 -0
  52. package/skills/domains/devops/observability.md +279 -0
  53. package/skills/domains/devops/performance.md +335 -0
  54. package/skills/domains/devops/testing.md +283 -0
  55. package/skills/domains/frontend-design/SKILL.md +38 -0
  56. package/skills/domains/frontend-design/agents/openai.yaml +4 -0
  57. package/skills/domains/frontend-design/claymorphism/SKILL.md +119 -0
  58. package/skills/domains/frontend-design/claymorphism/references/tokens.css +52 -0
  59. package/skills/domains/frontend-design/component-patterns.md +202 -0
  60. package/skills/domains/frontend-design/engineering.md +287 -0
  61. package/skills/domains/frontend-design/glassmorphism/SKILL.md +140 -0
  62. package/skills/domains/frontend-design/glassmorphism/references/tokens.css +32 -0
  63. package/skills/domains/frontend-design/liquid-glass/SKILL.md +137 -0
  64. package/skills/domains/frontend-design/liquid-glass/references/tokens.css +81 -0
  65. package/skills/domains/frontend-design/neubrutalism/SKILL.md +143 -0
  66. package/skills/domains/frontend-design/neubrutalism/references/tokens.css +44 -0
  67. package/skills/domains/frontend-design/state-management.md +680 -0
  68. package/skills/domains/frontend-design/ui-aesthetics.md +110 -0
  69. package/skills/domains/frontend-design/ux-principles.md +156 -0
  70. package/skills/domains/infrastructure/SKILL.md +200 -0
  71. package/skills/domains/mobile/SKILL.md +224 -0
  72. package/skills/domains/orchestration/SKILL.md +29 -0
  73. package/skills/domains/orchestration/multi-agent.md +263 -0
  74. package/skills/domains/security/SKILL.md +72 -0
  75. package/skills/domains/security/blue-team.md +436 -0
  76. package/skills/domains/security/code-audit.md +265 -0
  77. package/skills/domains/security/pentest.md +226 -0
  78. package/skills/domains/security/red-team.md +374 -0
  79. package/skills/domains/security/threat-intel.md +372 -0
  80. package/skills/domains/security/vuln-research.md +369 -0
  81. package/skills/meta-theory/SKILL.md +878 -0
  82. package/skills/meta-theory/references/create-agent.md +248 -0
  83. package/skills/meta-theory/references/dev-governance.md +1108 -0
  84. package/skills/meta-theory/references/intent-amplification.md +190 -0
  85. package/skills/meta-theory/references/meta-theory.md +346 -0
  86. package/skills/meta-theory/references/rhythm-orchestration.md +257 -0
  87. package/skills/meta-theory/references/ten-step-governance.md +285 -0
  88. package/skills/orchestration/multi-agent/SKILL.md +493 -0
  89. package/skills/run_skill.js +128 -0
  90. package/skills/tools/gen-docs/SKILL.md +115 -0
  91. package/skills/tools/gen-docs/agents/openai.yaml +4 -0
  92. package/skills/tools/gen-docs/scripts/doc_generator.js +435 -0
  93. package/skills/tools/lib/shared.js +98 -0
  94. package/skills/tools/override-refusal/SKILL.md +55 -0
  95. package/skills/tools/override-refusal/agents/openai.yaml +4 -0
  96. package/skills/tools/override-refusal/scripts/refusal_rewriter.js +229 -0
  97. package/skills/tools/verify-change/SKILL.md +139 -0
  98. package/skills/tools/verify-change/agents/openai.yaml +4 -0
  99. package/skills/tools/verify-change/scripts/change_analyzer.js +289 -0
  100. package/skills/tools/verify-module/SKILL.md +126 -0
  101. package/skills/tools/verify-module/agents/openai.yaml +4 -0
  102. package/skills/tools/verify-module/scripts/module_scanner.js +171 -0
  103. package/skills/tools/verify-quality/SKILL.md +159 -0
  104. package/skills/tools/verify-quality/agents/openai.yaml +4 -0
  105. package/skills/tools/verify-quality/scripts/quality_checker.js +337 -0
  106. package/skills/tools/verify-security/SKILL.md +142 -0
  107. package/skills/tools/verify-security/agents/openai.yaml +4 -0
  108. package/skills/tools/verify-security/scripts/security_scanner.js +283 -0
@@ -0,0 +1,279 @@
1
+ ---
2
+ name: observability
3
+ description: 可观测性技能文档。日志、指标、追踪三大支柱,告警设计,SLI/SLO/SLA。当用户提到可观测性、日志、监控、指标、追踪、告警、SLO时路由到此。
4
+ ---
5
+
6
+ # 🔧 炼器技能文档 · 可观测性
7
+
8
+ ## 三大支柱
9
+
10
+ ```
11
+ ┌─────────────────────────────────────────┐
12
+ │ 可观测性 (Observability) │
13
+ ├─────────────┬─────────────┬─────────────┤
14
+ │ 📋 日志 │ 📊 指标 │ 🔗 追踪 │
15
+ │ Logs │ Metrics │ Traces │
16
+ │ 离散事件 │ 聚合数值 │ 请求链路 │
17
+ │ What │ How much │ Where │
18
+ └─────────────┴─────────────┴─────────────┘
19
+ ```
20
+
21
+ | 支柱 | 特征 | 适用场景 | 代表工具 |
22
+ |------|------|----------|----------|
23
+ | 日志 | 离散、非结构化/结构化事件 | 调试、审计、错误追踪 | ELK, Loki, CloudWatch |
24
+ | 指标 | 聚合数值、时间序列 | 告警、趋势、容量规划 | Prometheus, Datadog, CloudWatch |
25
+ | 追踪 | 分布式请求链路 | 延迟分析、依赖映射 | Jaeger, Zipkin, X-Ray |
26
+
27
+ ---
28
+
29
+ ## 日志 (Logs)
30
+
31
+ ### 结构化日志
32
+
33
+ ```json
34
+ {
35
+ "timestamp": "2024-01-15T10:30:00.123Z",
36
+ "level": "ERROR",
37
+ "service": "order-service",
38
+ "trace_id": "abc123",
39
+ "span_id": "def456",
40
+ "message": "Payment failed",
41
+ "error": "InsufficientFunds",
42
+ "user_id": "u-789",
43
+ "order_id": "o-012",
44
+ "amount": 99.99,
45
+ "duration_ms": 234
46
+ }
47
+ ```
48
+
49
+ ### 日志级别规范
50
+
51
+ | 级别 | 用途 | 生产环境 |
52
+ |------|------|----------|
53
+ | TRACE | 极细粒度调试 | ❌ 关闭 |
54
+ | DEBUG | 开发调试信息 | ❌ 关闭 |
55
+ | INFO | 业务关键事件 | ✅ 开启 |
56
+ | WARN | 潜在问题,可自愈 | ✅ 开启 |
57
+ | ERROR | 错误,需关注 | ✅ 开启 + 告警 |
58
+ | FATAL | 致命错误,服务不可用 | ✅ 开启 + 紧急告警 |
59
+
60
+ ### 日志聚合架构
61
+
62
+ ```
63
+ 应用 → Filebeat/Fluentd → Kafka(缓冲) → Logstash → Elasticsearch → Kibana
64
+ → S3(归档)
65
+ ```
66
+
67
+ ### 日志最佳实践
68
+
69
+ - ✅ 结构化 JSON 格式
70
+ - ✅ 包含 trace_id 关联追踪
71
+ - ✅ 敏感数据脱敏
72
+ - ✅ 合理的保留策略(热/温/冷)
73
+ - ❌ 不记录密码/Token
74
+ - ❌ 不在循环中打日志
75
+ - ❌ 不用字符串拼接(用参数化)
76
+
77
+ ---
78
+
79
+ ## 指标 (Metrics)
80
+
81
+ ### Prometheus 指标类型
82
+
83
+ | 类型 | 用途 | 示例 |
84
+ |------|------|------|
85
+ | Counter | 只增不减的计数器 | 请求总数、错误总数 |
86
+ | Gauge | 可增可减的瞬时值 | 当前连接数、队列长度 |
87
+ | Histogram | 分布统计(桶) | 请求延迟分布 |
88
+ | Summary | 分布统计(分位数) | 请求延迟 P99 |
89
+
90
+ ### 关键 PromQL
91
+
92
+ ```promql
93
+ # 请求速率
94
+ rate(http_requests_total[5m])
95
+
96
+ # 错误率
97
+ rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m])
98
+
99
+ # P99 延迟
100
+ histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))
101
+
102
+ # CPU 使用率
103
+ 1 - avg(rate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance)
104
+
105
+ # 内存使用率
106
+ (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes
107
+ ```
108
+
109
+ ### Grafana Dashboard 设计
110
+
111
+ ```yaml
112
+ 四大黄金信号 Dashboard:
113
+ Row 1 - 流量:
114
+ - QPS (rate)
115
+ - 按 endpoint 分组
116
+ Row 2 - 错误:
117
+ - 错误率 (%)
118
+ - 按错误类型分组
119
+ Row 3 - 延迟:
120
+ - P50/P95/P99
121
+ - 延迟热力图
122
+ Row 4 - 饱和度:
123
+ - CPU/Memory/Disk
124
+ - 连接池使用率
125
+ ```
126
+
127
+ ---
128
+
129
+ ## 追踪 (Traces)
130
+
131
+ ### OpenTelemetry 集成
132
+
133
+ ```python
134
+ # Python 示例
135
+ from opentelemetry import trace
136
+ from opentelemetry.sdk.trace import TracerProvider
137
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
138
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
139
+
140
+ provider = TracerProvider()
141
+ processor = BatchSpanProcessor(OTLPSpanExporter(endpoint="http://collector:4317"))
142
+ provider.add_span_processor(processor)
143
+ trace.set_tracer_provider(provider)
144
+
145
+ tracer = trace.get_tracer(__name__)
146
+
147
+ @tracer.start_as_current_span("process_order")
148
+ def process_order(order_id: str):
149
+ span = trace.get_current_span()
150
+ span.set_attribute("order.id", order_id)
151
+ # 业务逻辑...
152
+ ```
153
+
154
+ ### 追踪架构
155
+
156
+ ```
157
+ Service-A → Service-B → Service-C
158
+ │ │ │
159
+ └── Span ────┴── Span ────┴── Span
160
+
161
+ Trace (trace_id 贯穿全链路)
162
+ ```
163
+
164
+ ### Context Propagation
165
+
166
+ ```
167
+ HTTP Header: traceparent: 00-{trace_id}-{span_id}-{flags}
168
+ gRPC Metadata: 自动传播
169
+ Message Queue: 消息头注入 trace context
170
+ ```
171
+
172
+ ---
173
+
174
+ ## 告警设计
175
+
176
+ ### 告警分级
177
+
178
+ | 级别 | 响应时间 | 通知方式 | 示例 |
179
+ |------|----------|----------|------|
180
+ | P0 Critical | 立即 | 电话 + PagerDuty | 服务完全不可用 |
181
+ | P1 High | 15 min | Slack + 短信 | 错误率 > 5% |
182
+ | P2 Medium | 1 hour | Slack | 延迟 P99 > 阈值 |
183
+ | P3 Low | 次日 | 邮件/工单 | 磁盘使用 > 70% |
184
+
185
+ ### 告警规则示例
186
+
187
+ ```yaml
188
+ # Prometheus AlertManager
189
+ groups:
190
+ - name: service-alerts
191
+ rules:
192
+ - alert: HighErrorRate
193
+ expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
194
+ for: 5m
195
+ labels:
196
+ severity: critical
197
+ annotations:
198
+ summary: "High error rate on {{ $labels.instance }}"
199
+
200
+ - alert: HighLatency
201
+ expr: histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) > 1
202
+ for: 5m
203
+ labels:
204
+ severity: warning
205
+ ```
206
+
207
+ ### 告警最佳实践
208
+
209
+ - ✅ 基于 SLO 告警,而非资源指标
210
+ - ✅ 设置合理的 `for` 持续时间,避免抖动
211
+ - ✅ 告警必须可操作(收到告警知道该做什么)
212
+ - ✅ 定期审查告警,清理无效告警
213
+ - ❌ 不对每个指标都告警(告警疲劳)
214
+ - ❌ 不设过低阈值(噪音)
215
+
216
+ ---
217
+
218
+ ## SLI / SLO / SLA
219
+
220
+ ### 定义
221
+
222
+ | 概念 | 含义 | 示例 |
223
+ |------|------|------|
224
+ | SLI (指标) | 服务质量的量化度量 | 请求成功率、P99 延迟 |
225
+ | SLO (目标) | SLI 的目标值 | 可用性 99.9%、P99 < 200ms |
226
+ | SLA (协议) | 对外承诺 + 违约后果 | 99.9% 可用,否则赔偿 |
227
+
228
+ ### Error Budget
229
+
230
+ ```
231
+ SLO = 99.9% 可用性
232
+ Error Budget = 1 - 0.999 = 0.1%
233
+ 每月 Error Budget = 30天 × 24小时 × 60分钟 × 0.001 = 43.2 分钟
234
+
235
+ 已消耗: 15 分钟
236
+ 剩余: 28.2 分钟
237
+ ```
238
+
239
+ ### SLO Dashboard
240
+
241
+ ```yaml
242
+ SLO Dashboard:
243
+ - 当前 SLI 值 vs SLO 目标
244
+ - Error Budget 剩余百分比
245
+ - Error Budget 消耗速率
246
+ - 30天滚动窗口趋势
247
+ - Burn Rate 告警状态
248
+ ```
249
+
250
+ ---
251
+
252
+ ## 可观测性清单
253
+
254
+ ```yaml
255
+ 日志:
256
+ - [ ] 结构化 JSON 格式
257
+ - [ ] trace_id 关联
258
+ - [ ] 敏感数据脱敏
259
+ - [ ] 保留策略配置
260
+
261
+ 指标:
262
+ - [ ] 四大黄金信号覆盖
263
+ - [ ] 自定义业务指标
264
+ - [ ] Dashboard 就绪
265
+ - [ ] 告警规则配置
266
+
267
+ 追踪:
268
+ - [ ] OpenTelemetry 集成
269
+ - [ ] 跨服务 Context Propagation
270
+ - [ ] 采样策略配置
271
+ - [ ] 关键路径标注
272
+
273
+ 告警:
274
+ - [ ] 基于 SLO 的告警
275
+ - [ ] 分级通知渠道
276
+ - [ ] Runbook 关联
277
+ - [ ] 定期审查机制
278
+ ```
279
+
@@ -0,0 +1,335 @@
1
+ ---
2
+ name: performance
3
+ description: 性能优化技能文档。性能分析方法论、Profiling、火焰图、基准测试、瓶颈优化。当用户提到性能、延迟、吞吐、Profiling、火焰图、基准测试时路由到此。
4
+ ---
5
+
6
+ # 🔧 炼器技能文档 · 性能优化
7
+
8
+ ## 性能分析方法论
9
+
10
+ ### USE 方法 (Utilization, Saturation, Errors)
11
+
12
+ 对每个资源检查三个维度:
13
+
14
+ | 维度 | 含义 | 工具 |
15
+ |------|------|------|
16
+ | Utilization | 资源繁忙时间占比 | `top`, `vmstat`, `iostat` |
17
+ | Saturation | 排队等待的工作量 | `vmstat`(r列), `iostat`(avgqu-sz) |
18
+ | Errors | 错误事件计数 | `dmesg`, 应用日志 |
19
+
20
+ ```bash
21
+ # CPU USE
22
+ mpstat -P ALL 1 # Utilization per core
23
+ vmstat 1 # Saturation (r > CPU count)
24
+ dmesg | grep -i error # Errors
25
+
26
+ # Memory USE
27
+ free -m # Utilization
28
+ vmstat 1 | awk '{print $3,$4}' # Saturation (si/so > 0 = swapping)
29
+
30
+ # Disk USE
31
+ iostat -xz 1 # Utilization (%util), Saturation (avgqu-sz)
32
+
33
+ # Network USE
34
+ sar -n DEV 1 # Utilization
35
+ netstat -s | grep -i error # Errors
36
+ ```
37
+
38
+ ### RED 方法 (Rate, Errors, Duration)
39
+
40
+ 面向服务的性能指标:
41
+
42
+ | 维度 | 含义 | 示例 |
43
+ |------|------|------|
44
+ | Rate | 每秒请求数 | QPS/RPS |
45
+ | Errors | 每秒错误数 | 5xx/s |
46
+ | Duration | 请求延迟分布 | P50/P95/P99 |
47
+
48
+ ```promql
49
+ # Prometheus PromQL 示例
50
+ rate(http_requests_total[5m]) # Rate
51
+ rate(http_requests_total{status=~"5.."}[5m]) # Errors
52
+ histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) # P99
53
+ ```
54
+
55
+ ---
56
+
57
+ ## Profiling 工具
58
+
59
+ ### CPU Profiling
60
+
61
+ | 语言 | 工具 | 命令 |
62
+ |------|------|------|
63
+ | Python | cProfile / py-spy | `py-spy record -o profile.svg -- python app.py` |
64
+ | Go | pprof | `go tool pprof http://localhost:6060/debug/pprof/profile` |
65
+ | Java | async-profiler | `./profiler.sh -d 30 -f flame.html <pid>` |
66
+ | Node.js | clinic.js | `clinic flame -- node app.js` |
67
+ | Rust | cargo-flamegraph | `cargo flamegraph` |
68
+ | 系统级 | perf | `perf record -g -p <pid> -- sleep 30` |
69
+
70
+ ### Memory Profiling
71
+
72
+ ```bash
73
+ # Python
74
+ python -m memory_profiler script.py
75
+ # 或使用 tracemalloc
76
+ python -c "import tracemalloc; tracemalloc.start(); ..."
77
+
78
+ # Go
79
+ go tool pprof http://localhost:6060/debug/pprof/heap
80
+
81
+ # Java
82
+ jmap -dump:format=b,file=heap.hprof <pid>
83
+ jhat heap.hprof # 或用 MAT/VisualVM 分析
84
+
85
+ # 系统级
86
+ valgrind --tool=massif ./program
87
+ ```
88
+
89
+ ### I/O Profiling
90
+
91
+ ```bash
92
+ # 磁盘 I/O
93
+ iostat -xz 1
94
+ iotop -oP
95
+ strace -e trace=read,write -p <pid>
96
+
97
+ # 网络 I/O
98
+ ss -tnp # 连接状态
99
+ tcpdump -i eth0 -w cap.pcap # 抓包
100
+ ```
101
+
102
+ ---
103
+
104
+ ## 火焰图
105
+
106
+ ### 生成流程
107
+
108
+ ```bash
109
+ # 1. 采集数据
110
+ perf record -F 99 -g -p <pid> -- sleep 30
111
+
112
+ # 2. 生成火焰图
113
+ perf script | stackcollapse-perf.pl | flamegraph.pl > flame.svg
114
+
115
+ # 3. 解读
116
+ # X轴:函数在采样中出现的比例(越宽=越耗时)
117
+ # Y轴:调用栈深度
118
+ # 颜色:随机,无特殊含义
119
+ ```
120
+
121
+ ### 解读要点
122
+
123
+ | 特征 | 含义 | 行动 |
124
+ |------|------|------|
125
+ | 宽平顶 | 该函数自身耗时大 | 优化该函数逻辑 |
126
+ | 宽塔形 | 调用链深但每层都耗时 | 减少调用层级 |
127
+ | 多个窄尖峰 | 多处小开销累积 | 关注热路径 |
128
+
129
+ ---
130
+
131
+ ## 基准测试
132
+
133
+ ### HTTP 基准测试
134
+
135
+ ```bash
136
+ # wrk (推荐)
137
+ wrk -t12 -c400 -d30s http://localhost:8080/api
138
+
139
+ # ab (Apache Bench)
140
+ ab -n 10000 -c 100 http://localhost:8080/api
141
+
142
+ # hey
143
+ hey -n 10000 -c 100 http://localhost:8080/api
144
+
145
+ # k6 (脚本化)
146
+ k6 run --vus 100 --duration 30s script.js
147
+ ```
148
+
149
+ ### 代码级基准测试
150
+
151
+ ```python
152
+ # Python - pytest-benchmark
153
+ def test_sort_benchmark(benchmark):
154
+ data = list(range(1000, 0, -1))
155
+ benchmark(sorted, data)
156
+
157
+ # Go
158
+ func BenchmarkSort(b *testing.B) {
159
+ for i := 0; i < b.N; i++ {
160
+ sort.Ints(generateData())
161
+ }
162
+ }
163
+
164
+ # Rust
165
+ #[bench]
166
+ fn bench_sort(b: &mut Bencher) {
167
+ b.iter(|| sort_data(test::black_box(generate_data())));
168
+ }
169
+ ```
170
+
171
+ ### 基准测试原则
172
+
173
+ 1. **隔离环境** — 独占机器,关闭无关进程
174
+ 2. **预热** — 丢弃前 N 次结果
175
+ 3. **统计显著** — 多次运行取中位数
176
+ 4. **对比基线** — 优化前后对比,而非绝对值
177
+
178
+ ---
179
+
180
+ ## 常见瓶颈优化
181
+
182
+ ### CPU 密集型
183
+
184
+ | 问题 | 优化 |
185
+ |------|------|
186
+ | 热循环 | 算法优化、减少分支 |
187
+ | 序列化/反序列化 | 换用高效格式(protobuf/msgpack) |
188
+ | 正则表达式 | 预编译、简化模式 |
189
+ | 加密运算 | 硬件加速(AES-NI) |
190
+
191
+ ### I/O 密集型
192
+
193
+ | 问题 | 优化 |
194
+ |------|------|
195
+ | 同步阻塞 I/O | 异步 I/O (asyncio/epoll) |
196
+ | 频繁小文件读写 | 批量合并、缓冲区 |
197
+ | 网络往返 | 连接池、批量请求、Pipeline |
198
+ | DNS 解析 | 本地缓存 |
199
+
200
+ ### 内存相关
201
+
202
+ | 问题 | 优化 |
203
+ |------|------|
204
+ | 内存泄漏 | Profiling 定位 + 修复引用 |
205
+ | GC 压力 | 减少分配、对象池 |
206
+ | 缓存未命中 | 数据局部性、紧凑布局 |
207
+ | 大对象 | 流式处理、分片 |
208
+
209
+ ---
210
+
211
+ ## 数据库性能
212
+
213
+ ### 查询优化
214
+
215
+ ```sql
216
+ -- 1. EXPLAIN 分析
217
+ EXPLAIN ANALYZE SELECT * FROM orders WHERE user_id = 123;
218
+
219
+ -- 2. 索引优化
220
+ CREATE INDEX idx_orders_user_id ON orders(user_id);
221
+ CREATE INDEX idx_orders_composite ON orders(user_id, created_at DESC);
222
+
223
+ -- 3. 避免 N+1
224
+ -- 差:循环查询
225
+ -- 好:JOIN 或 IN 批量查询
226
+ SELECT o.*, u.name FROM orders o JOIN users u ON o.user_id = u.id;
227
+
228
+ -- 4. 分页优化
229
+ -- 差:OFFSET 大数值
230
+ SELECT * FROM orders ORDER BY id LIMIT 20 OFFSET 100000;
231
+ -- 好:游标分页
232
+ SELECT * FROM orders WHERE id > 100000 ORDER BY id LIMIT 20;
233
+ ```
234
+
235
+ ### 连接池配置
236
+
237
+ ```yaml
238
+ # HikariCP (Java)
239
+ maximumPoolSize: 10 # CPU核数 * 2 + 磁盘数
240
+ minimumIdle: 5
241
+ connectionTimeout: 30000
242
+ idleTimeout: 600000
243
+
244
+ # 通用公式
245
+ pool_size = (core_count * 2) + effective_spindle_count
246
+ ```
247
+
248
+ ---
249
+
250
+ ## 性能优化清单
251
+
252
+ ```yaml
253
+ 应用层:
254
+ - [ ] 热路径 Profiling 完成
255
+ - [ ] 算法复杂度 ≤ O(n log n)
256
+ - [ ] 无 N+1 查询
257
+ - [ ] 连接池配置合理
258
+ - [ ] 异步 I/O 用于 I/O 密集操作
259
+
260
+ 数据库:
261
+ - [ ] 慢查询 < 100ms (P95)
262
+ - [ ] 索引覆盖高频查询
263
+ - [ ] 无全表扫描
264
+ - [ ] 连接池大小合理
265
+
266
+ 基础设施:
267
+ - [ ] CPU 利用率 < 70% (P95)
268
+ - [ ] 内存利用率 < 80%
269
+ - [ ] 磁盘 I/O 无饱和
270
+ - [ ] 网络无丢包
271
+ ```
272
+
273
+ ---
274
+
275
+ ## 性能测试(源自 performance-testing)
276
+
277
+ ### 测试类型
278
+
279
+ | 类型 | 用户数 | 持续时间 | 目标 |
280
+ |------|--------|----------|------|
281
+ | 负载测试 | 预期峰值 | 30min-2h | 验证性能指标 |
282
+ | 压力测试 | 超出峰值 | 1-3h | 找到崩溃点 |
283
+ | 浸泡测试 | 正常负载 | 8-72h | 检测内存泄漏 |
284
+ | 峰值测试 | 瞬间激增 | 短时间 | 测试弹性 |
285
+
286
+ ### k6 核心模式
287
+
288
+ ```javascript
289
+ // 阶梯式负载
290
+ export const options = {
291
+ stages: [
292
+ { duration: '2m', target: 100 },
293
+ { duration: '5m', target: 100 },
294
+ { duration: '2m', target: 0 },
295
+ ],
296
+ thresholds: {
297
+ http_req_duration: ['p(95)<500'],
298
+ http_req_failed: ['rate<0.01'],
299
+ },
300
+ };
301
+ ```
302
+
303
+ ### 性能基准阈值
304
+
305
+ | 场景 | P95响应时间 | 错误率 | 吞吐量 |
306
+ |------|-------------|--------|--------|
307
+ | API查询 | <200ms | <0.1% | >1000 RPS |
308
+ | API写入 | <500ms | <0.5% | >500 RPS |
309
+ | 页面加载 | <2s | <1% | >100 RPS |
310
+
311
+ ### 工具选型
312
+
313
+ | 工具 | 语言 | 适用场景 |
314
+ |------|------|----------|
315
+ | k6 | JavaScript | 现代化、DevOps集成、云原生 |
316
+ | JMeter | Java/GUI | 功能全面、插件丰富 |
317
+ | Gatling | Scala | 高性能、大规模测试 |
318
+ | Locust | Python | Python生态、分布式 |
319
+
320
+ ### 渐进式测试流程
321
+
322
+ ```
323
+ 1. 基准测试 → 单用户建立基准
324
+ 2. 负载测试 → 预期负载验证性能
325
+ 3. 压力测试 → 超出负载找极限
326
+ 4. 浸泡测试 → 长时间检测泄漏
327
+ ```
328
+
329
+ ### 测试环境要求
330
+
331
+ - 独立环境,配置与生产一致
332
+ - 数据分布模拟真实:70%轻度 / 20%中度 / 10%重度用户
333
+ - 数据隔离:`user_${__VU}_${__ITER}`
334
+ - CI集成:k6 GitHub Action + 阈值门禁
335
+