agy-superpowers 5.1.4 → 5.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/package.json +1 -1
  2. package/template/agent/rules/debug-confirmation-policy.md +34 -0
  3. package/template/agent/rules/language-matching.md +32 -0
  4. package/template/agent/skills/rust-developer/SKILL.md +281 -0
  5. package/template/agent/skills/rust-developer/references/rust-rules/_sections.md +231 -0
  6. package/template/agent/skills/rust-developer/references/rust-rules/anti-clone-excessive.md +124 -0
  7. package/template/agent/skills/rust-developer/references/rust-rules/anti-collect-intermediate.md +131 -0
  8. package/template/agent/skills/rust-developer/references/rust-rules/anti-empty-catch.md +132 -0
  9. package/template/agent/skills/rust-developer/references/rust-rules/anti-expect-lazy.md +95 -0
  10. package/template/agent/skills/rust-developer/references/rust-rules/anti-format-hot-path.md +141 -0
  11. package/template/agent/skills/rust-developer/references/rust-rules/anti-index-over-iter.md +125 -0
  12. package/template/agent/skills/rust-developer/references/rust-rules/anti-lock-across-await.md +127 -0
  13. package/template/agent/skills/rust-developer/references/rust-rules/anti-over-abstraction.md +120 -0
  14. package/template/agent/skills/rust-developer/references/rust-rules/anti-panic-expected.md +131 -0
  15. package/template/agent/skills/rust-developer/references/rust-rules/anti-premature-optimize.md +156 -0
  16. package/template/agent/skills/rust-developer/references/rust-rules/anti-string-for-str.md +122 -0
  17. package/template/agent/skills/rust-developer/references/rust-rules/anti-stringly-typed.md +167 -0
  18. package/template/agent/skills/rust-developer/references/rust-rules/anti-type-erasure.md +134 -0
  19. package/template/agent/skills/rust-developer/references/rust-rules/anti-unwrap-abuse.md +143 -0
  20. package/template/agent/skills/rust-developer/references/rust-rules/anti-vec-for-slice.md +121 -0
  21. package/template/agent/skills/rust-developer/references/rust-rules/api-builder-must-use.md +143 -0
  22. package/template/agent/skills/rust-developer/references/rust-rules/api-builder-pattern.md +187 -0
  23. package/template/agent/skills/rust-developer/references/rust-rules/api-common-traits.md +165 -0
  24. package/template/agent/skills/rust-developer/references/rust-rules/api-default-impl.md +177 -0
  25. package/template/agent/skills/rust-developer/references/rust-rules/api-extension-trait.md +163 -0
  26. package/template/agent/skills/rust-developer/references/rust-rules/api-from-not-into.md +146 -0
  27. package/template/agent/skills/rust-developer/references/rust-rules/api-impl-asref.md +142 -0
  28. package/template/agent/skills/rust-developer/references/rust-rules/api-impl-into.md +160 -0
  29. package/template/agent/skills/rust-developer/references/rust-rules/api-must-use.md +125 -0
  30. package/template/agent/skills/rust-developer/references/rust-rules/api-newtype-safety.md +162 -0
  31. package/template/agent/skills/rust-developer/references/rust-rules/api-non-exhaustive.md +177 -0
  32. package/template/agent/skills/rust-developer/references/rust-rules/api-parse-dont-validate.md +184 -0
  33. package/template/agent/skills/rust-developer/references/rust-rules/api-sealed-trait.md +168 -0
  34. package/template/agent/skills/rust-developer/references/rust-rules/api-serde-optional.md +182 -0
  35. package/template/agent/skills/rust-developer/references/rust-rules/api-typestate.md +199 -0
  36. package/template/agent/skills/rust-developer/references/rust-rules/async-bounded-channel.md +175 -0
  37. package/template/agent/skills/rust-developer/references/rust-rules/async-broadcast-pubsub.md +185 -0
  38. package/template/agent/skills/rust-developer/references/rust-rules/async-cancellation-token.md +203 -0
  39. package/template/agent/skills/rust-developer/references/rust-rules/async-clone-before-await.md +171 -0
  40. package/template/agent/skills/rust-developer/references/rust-rules/async-join-parallel.md +158 -0
  41. package/template/agent/skills/rust-developer/references/rust-rules/async-joinset-structured.md +195 -0
  42. package/template/agent/skills/rust-developer/references/rust-rules/async-mpsc-queue.md +171 -0
  43. package/template/agent/skills/rust-developer/references/rust-rules/async-no-lock-await.md +156 -0
  44. package/template/agent/skills/rust-developer/references/rust-rules/async-oneshot-response.md +191 -0
  45. package/template/agent/skills/rust-developer/references/rust-rules/async-select-racing.md +198 -0
  46. package/template/agent/skills/rust-developer/references/rust-rules/async-spawn-blocking.md +154 -0
  47. package/template/agent/skills/rust-developer/references/rust-rules/async-tokio-fs.md +167 -0
  48. package/template/agent/skills/rust-developer/references/rust-rules/async-tokio-runtime.md +169 -0
  49. package/template/agent/skills/rust-developer/references/rust-rules/async-try-join.md +172 -0
  50. package/template/agent/skills/rust-developer/references/rust-rules/async-watch-latest.md +189 -0
  51. package/template/agent/skills/rust-developer/references/rust-rules/doc-all-public.md +113 -0
  52. package/template/agent/skills/rust-developer/references/rust-rules/doc-cargo-metadata.md +147 -0
  53. package/template/agent/skills/rust-developer/references/rust-rules/doc-errors-section.md +122 -0
  54. package/template/agent/skills/rust-developer/references/rust-rules/doc-examples-section.md +161 -0
  55. package/template/agent/skills/rust-developer/references/rust-rules/doc-hidden-setup.md +149 -0
  56. package/template/agent/skills/rust-developer/references/rust-rules/doc-intra-links.md +138 -0
  57. package/template/agent/skills/rust-developer/references/rust-rules/doc-link-types.md +169 -0
  58. package/template/agent/skills/rust-developer/references/rust-rules/doc-module-inner.md +116 -0
  59. package/template/agent/skills/rust-developer/references/rust-rules/doc-panics-section.md +128 -0
  60. package/template/agent/skills/rust-developer/references/rust-rules/doc-question-mark.md +136 -0
  61. package/template/agent/skills/rust-developer/references/rust-rules/doc-safety-section.md +131 -0
  62. package/template/agent/skills/rust-developer/references/rust-rules/err-anyhow-app.md +179 -0
  63. package/template/agent/skills/rust-developer/references/rust-rules/err-context-chain.md +144 -0
  64. package/template/agent/skills/rust-developer/references/rust-rules/err-custom-type.md +152 -0
  65. package/template/agent/skills/rust-developer/references/rust-rules/err-doc-errors.md +145 -0
  66. package/template/agent/skills/rust-developer/references/rust-rules/err-expect-bugs-only.md +133 -0
  67. package/template/agent/skills/rust-developer/references/rust-rules/err-from-impl.md +152 -0
  68. package/template/agent/skills/rust-developer/references/rust-rules/err-lowercase-msg.md +124 -0
  69. package/template/agent/skills/rust-developer/references/rust-rules/err-no-unwrap-prod.md +115 -0
  70. package/template/agent/skills/rust-developer/references/rust-rules/err-question-mark.md +151 -0
  71. package/template/agent/skills/rust-developer/references/rust-rules/err-result-over-panic.md +130 -0
  72. package/template/agent/skills/rust-developer/references/rust-rules/err-source-chain.md +155 -0
  73. package/template/agent/skills/rust-developer/references/rust-rules/err-thiserror-lib.md +171 -0
  74. package/template/agent/skills/rust-developer/references/rust-rules/lint-cargo-metadata.md +138 -0
  75. package/template/agent/skills/rust-developer/references/rust-rules/lint-deny-correctness.md +107 -0
  76. package/template/agent/skills/rust-developer/references/rust-rules/lint-missing-docs.md +154 -0
  77. package/template/agent/skills/rust-developer/references/rust-rules/lint-pedantic-selective.md +118 -0
  78. package/template/agent/skills/rust-developer/references/rust-rules/lint-rustfmt-check.md +157 -0
  79. package/template/agent/skills/rust-developer/references/rust-rules/lint-unsafe-doc.md +133 -0
  80. package/template/agent/skills/rust-developer/references/rust-rules/lint-warn-complexity.md +131 -0
  81. package/template/agent/skills/rust-developer/references/rust-rules/lint-warn-perf.md +136 -0
  82. package/template/agent/skills/rust-developer/references/rust-rules/lint-warn-style.md +135 -0
  83. package/template/agent/skills/rust-developer/references/rust-rules/lint-warn-suspicious.md +122 -0
  84. package/template/agent/skills/rust-developer/references/rust-rules/lint-workspace-lints.md +172 -0
  85. package/template/agent/skills/rust-developer/references/rust-rules/mem-arena-allocator.md +168 -0
  86. package/template/agent/skills/rust-developer/references/rust-rules/mem-arrayvec.md +142 -0
  87. package/template/agent/skills/rust-developer/references/rust-rules/mem-assert-type-size.md +168 -0
  88. package/template/agent/skills/rust-developer/references/rust-rules/mem-avoid-format.md +147 -0
  89. package/template/agent/skills/rust-developer/references/rust-rules/mem-box-large-variant.md +158 -0
  90. package/template/agent/skills/rust-developer/references/rust-rules/mem-boxed-slice.md +139 -0
  91. package/template/agent/skills/rust-developer/references/rust-rules/mem-clone-from.md +147 -0
  92. package/template/agent/skills/rust-developer/references/rust-rules/mem-compact-string.md +149 -0
  93. package/template/agent/skills/rust-developer/references/rust-rules/mem-reuse-collections.md +174 -0
  94. package/template/agent/skills/rust-developer/references/rust-rules/mem-smaller-integers.md +159 -0
  95. package/template/agent/skills/rust-developer/references/rust-rules/mem-smallvec.md +138 -0
  96. package/template/agent/skills/rust-developer/references/rust-rules/mem-thinvec.md +142 -0
  97. package/template/agent/skills/rust-developer/references/rust-rules/mem-with-capacity.md +156 -0
  98. package/template/agent/skills/rust-developer/references/rust-rules/mem-write-over-format.md +172 -0
  99. package/template/agent/skills/rust-developer/references/rust-rules/mem-zero-copy.md +164 -0
  100. package/template/agent/skills/rust-developer/references/rust-rules/name-acronym-word.md +99 -0
  101. package/template/agent/skills/rust-developer/references/rust-rules/name-as-free.md +104 -0
  102. package/template/agent/skills/rust-developer/references/rust-rules/name-consts-screaming.md +94 -0
  103. package/template/agent/skills/rust-developer/references/rust-rules/name-crate-no-rs.md +78 -0
  104. package/template/agent/skills/rust-developer/references/rust-rules/name-funcs-snake.md +76 -0
  105. package/template/agent/skills/rust-developer/references/rust-rules/name-into-ownership.md +123 -0
  106. package/template/agent/skills/rust-developer/references/rust-rules/name-is-has-bool.md +127 -0
  107. package/template/agent/skills/rust-developer/references/rust-rules/name-iter-convention.md +129 -0
  108. package/template/agent/skills/rust-developer/references/rust-rules/name-iter-method.md +131 -0
  109. package/template/agent/skills/rust-developer/references/rust-rules/name-iter-type-match.md +142 -0
  110. package/template/agent/skills/rust-developer/references/rust-rules/name-lifetime-short.md +86 -0
  111. package/template/agent/skills/rust-developer/references/rust-rules/name-no-get-prefix.md +154 -0
  112. package/template/agent/skills/rust-developer/references/rust-rules/name-to-expensive.md +118 -0
  113. package/template/agent/skills/rust-developer/references/rust-rules/name-type-param-single.md +92 -0
  114. package/template/agent/skills/rust-developer/references/rust-rules/name-types-camel.md +65 -0
  115. package/template/agent/skills/rust-developer/references/rust-rules/name-variants-camel.md +101 -0
  116. package/template/agent/skills/rust-developer/references/rust-rules/opt-bounds-check.md +161 -0
  117. package/template/agent/skills/rust-developer/references/rust-rules/opt-cache-friendly.md +187 -0
  118. package/template/agent/skills/rust-developer/references/rust-rules/opt-codegen-units.md +142 -0
  119. package/template/agent/skills/rust-developer/references/rust-rules/opt-cold-unlikely.md +152 -0
  120. package/template/agent/skills/rust-developer/references/rust-rules/opt-inline-always-rare.md +141 -0
  121. package/template/agent/skills/rust-developer/references/rust-rules/opt-inline-never-cold.md +181 -0
  122. package/template/agent/skills/rust-developer/references/rust-rules/opt-inline-small.md +160 -0
  123. package/template/agent/skills/rust-developer/references/rust-rules/opt-likely-hint.md +171 -0
  124. package/template/agent/skills/rust-developer/references/rust-rules/opt-lto-release.md +130 -0
  125. package/template/agent/skills/rust-developer/references/rust-rules/opt-pgo-profile.md +167 -0
  126. package/template/agent/skills/rust-developer/references/rust-rules/opt-simd-portable.md +144 -0
  127. package/template/agent/skills/rust-developer/references/rust-rules/opt-target-cpu.md +154 -0
  128. package/template/agent/skills/rust-developer/references/rust-rules/own-arc-shared.md +141 -0
  129. package/template/agent/skills/rust-developer/references/rust-rules/own-borrow-over-clone.md +95 -0
  130. package/template/agent/skills/rust-developer/references/rust-rules/own-clone-explicit.md +135 -0
  131. package/template/agent/skills/rust-developer/references/rust-rules/own-copy-small.md +124 -0
  132. package/template/agent/skills/rust-developer/references/rust-rules/own-cow-conditional.md +135 -0
  133. package/template/agent/skills/rust-developer/references/rust-rules/own-lifetime-elision.md +134 -0
  134. package/template/agent/skills/rust-developer/references/rust-rules/own-move-large.md +134 -0
  135. package/template/agent/skills/rust-developer/references/rust-rules/own-mutex-interior.md +105 -0
  136. package/template/agent/skills/rust-developer/references/rust-rules/own-rc-single-thread.md +65 -0
  137. package/template/agent/skills/rust-developer/references/rust-rules/own-refcell-interior.md +97 -0
  138. package/template/agent/skills/rust-developer/references/rust-rules/own-rwlock-readers.md +122 -0
  139. package/template/agent/skills/rust-developer/references/rust-rules/own-slice-over-vec.md +119 -0
  140. package/template/agent/skills/rust-developer/references/rust-rules/perf-black-box-bench.md +153 -0
  141. package/template/agent/skills/rust-developer/references/rust-rules/perf-chain-avoid.md +136 -0
  142. package/template/agent/skills/rust-developer/references/rust-rules/perf-collect-into.md +133 -0
  143. package/template/agent/skills/rust-developer/references/rust-rules/perf-collect-once.md +120 -0
  144. package/template/agent/skills/rust-developer/references/rust-rules/perf-drain-reuse.md +137 -0
  145. package/template/agent/skills/rust-developer/references/rust-rules/perf-entry-api.md +134 -0
  146. package/template/agent/skills/rust-developer/references/rust-rules/perf-extend-batch.md +150 -0
  147. package/template/agent/skills/rust-developer/references/rust-rules/perf-iter-lazy.md +123 -0
  148. package/template/agent/skills/rust-developer/references/rust-rules/perf-iter-over-index.md +113 -0
  149. package/template/agent/skills/rust-developer/references/rust-rules/perf-profile-first.md +175 -0
  150. package/template/agent/skills/rust-developer/references/rust-rules/perf-release-profile.md +149 -0
  151. package/template/agent/skills/rust-developer/references/rust-rules/proj-bin-dir.md +142 -0
  152. package/template/agent/skills/rust-developer/references/rust-rules/proj-flat-small.md +133 -0
  153. package/template/agent/skills/rust-developer/references/rust-rules/proj-lib-main-split.md +148 -0
  154. package/template/agent/skills/rust-developer/references/rust-rules/proj-mod-by-feature.md +130 -0
  155. package/template/agent/skills/rust-developer/references/rust-rules/proj-mod-rs-dir.md +120 -0
  156. package/template/agent/skills/rust-developer/references/rust-rules/proj-prelude-module.md +155 -0
  157. package/template/agent/skills/rust-developer/references/rust-rules/proj-pub-crate-internal.md +139 -0
  158. package/template/agent/skills/rust-developer/references/rust-rules/proj-pub-super-parent.md +135 -0
  159. package/template/agent/skills/rust-developer/references/rust-rules/proj-pub-use-reexport.md +162 -0
  160. package/template/agent/skills/rust-developer/references/rust-rules/proj-workspace-deps.md +186 -0
  161. package/template/agent/skills/rust-developer/references/rust-rules/proj-workspace-large.md +162 -0
  162. package/template/agent/skills/rust-developer/references/rust-rules/test-arrange-act-assert.md +160 -0
  163. package/template/agent/skills/rust-developer/references/rust-rules/test-cfg-test-module.md +151 -0
  164. package/template/agent/skills/rust-developer/references/rust-rules/test-criterion-bench.md +171 -0
  165. package/template/agent/skills/rust-developer/references/rust-rules/test-descriptive-names.md +142 -0
  166. package/template/agent/skills/rust-developer/references/rust-rules/test-doctest-examples.md +168 -0
  167. package/template/agent/skills/rust-developer/references/rust-rules/test-fixture-raii.md +151 -0
  168. package/template/agent/skills/rust-developer/references/rust-rules/test-integration-dir.md +144 -0
  169. package/template/agent/skills/rust-developer/references/rust-rules/test-mock-traits.md +189 -0
  170. package/template/agent/skills/rust-developer/references/rust-rules/test-mockall-mocking.md +226 -0
  171. package/template/agent/skills/rust-developer/references/rust-rules/test-proptest-properties.md +161 -0
  172. package/template/agent/skills/rust-developer/references/rust-rules/test-should-panic.md +130 -0
  173. package/template/agent/skills/rust-developer/references/rust-rules/test-tokio-async.md +154 -0
  174. package/template/agent/skills/rust-developer/references/rust-rules/test-use-super.md +127 -0
  175. package/template/agent/skills/rust-developer/references/rust-rules/type-enum-states.md +154 -0
  176. package/template/agent/skills/rust-developer/references/rust-rules/type-generic-bounds.md +142 -0
  177. package/template/agent/skills/rust-developer/references/rust-rules/type-never-diverge.md +146 -0
  178. package/template/agent/skills/rust-developer/references/rust-rules/type-newtype-ids.md +160 -0
  179. package/template/agent/skills/rust-developer/references/rust-rules/type-newtype-validated.md +159 -0
  180. package/template/agent/skills/rust-developer/references/rust-rules/type-no-stringly.md +144 -0
  181. package/template/agent/skills/rust-developer/references/rust-rules/type-option-nullable.md +137 -0
  182. package/template/agent/skills/rust-developer/references/rust-rules/type-phantom-marker.md +188 -0
  183. package/template/agent/skills/rust-developer/references/rust-rules/type-repr-transparent.md +143 -0
  184. package/template/agent/skills/rust-developer/references/rust-rules/type-result-fallible.md +131 -0
  185. package/template/agent/skills/systematic-debugging/SKILL.md +17 -0
@@ -0,0 +1,167 @@
1
+ # opt-pgo-profile
2
+
3
+ > Use Profile-Guided Optimization (PGO) for maximum performance
4
+
5
+ ## Why It Matters
6
+
7
+ PGO uses real runtime behavior to guide compiler optimization decisions. By profiling actual workloads, the compiler learns which code paths are hot, optimizing them aggressively while deprioritizing cold paths. This can yield 10-30% performance improvements beyond standard optimizations.
8
+
9
+ ## The PGO Process
10
+
11
+ 1. **Instrument**: Build with profiling instrumentation
12
+ 2. **Profile**: Run representative workloads
13
+ 3. **Optimize**: Rebuild using collected profile data
14
+
15
+ ## Step-by-Step
16
+
17
+ ```bash
18
+ # Step 1: Build instrumented binary
19
+ RUSTFLAGS="-Cprofile-generate=/tmp/pgo-data" \
20
+ cargo build --release
21
+
22
+ # Step 2: Run representative workloads
23
+ ./target/release/my_app < test_data_1.txt
24
+ ./target/release/my_app < test_data_2.txt
25
+ ./target/release/my_app < typical_workload.txt
26
+
27
+ # Step 3: Merge profile data
28
+ llvm-profdata merge -o /tmp/pgo-data/merged.profdata /tmp/pgo-data
29
+
30
+ # Step 4: Build optimized binary using profile
31
+ RUSTFLAGS="-Cprofile-use=/tmp/pgo-data/merged.profdata" \
32
+ cargo build --release
33
+ ```
34
+
35
+ ## Cargo Configuration
36
+
37
+ ```toml
38
+ # Cargo.toml
39
+ [profile.release]
40
+ lto = "fat"
41
+ codegen-units = 1
42
+ opt-level = 3
43
+
44
+ # PGO flags set via RUSTFLAGS environment variable
45
+ ```
46
+
47
+ ## Build Script
48
+
49
+ ```bash
50
+ #!/bin/bash
51
+ set -e
52
+
53
+ PGO_DIR=/tmp/pgo-$(date +%s)
54
+
55
+ # Clean
56
+ cargo clean
57
+
58
+ # Instrumented build
59
+ echo "Building instrumented binary..."
60
+ RUSTFLAGS="-Cprofile-generate=$PGO_DIR" cargo build --release
61
+
62
+ # Run workloads
63
+ echo "Collecting profile data..."
64
+ ./target/release/my_app --benchmark-mode
65
+ ./target/release/my_app < test_fixtures/typical.txt
66
+ ./target/release/my_app < test_fixtures/stress.txt
67
+
68
+ # Merge profiles
69
+ echo "Merging profile data..."
70
+ llvm-profdata merge -o $PGO_DIR/merged.profdata $PGO_DIR
71
+
72
+ # Optimized build
73
+ echo "Building optimized binary..."
74
+ RUSTFLAGS="-Cprofile-use=$PGO_DIR/merged.profdata" cargo build --release
75
+
76
+ echo "Done! Optimized binary at target/release/my_app"
77
+ ```
78
+
79
+ ## Representative Workloads
80
+
81
+ ```rust
82
+ // Create benchmarks that match real usage patterns
83
+
84
+ // Good: actual data samples
85
+ fn profile_workload() {
86
+ for file in real_customer_data_samples() {
87
+ process_file(&file);
88
+ }
89
+ }
90
+
91
+ // Good: synthetic but realistic
92
+ fn profile_synthetic() {
93
+ for _ in 0..10000 {
94
+ let data = generate_realistic_data();
95
+ process(&data);
96
+ }
97
+ }
98
+
99
+ // Bad: artificial microbenchmarks
100
+ fn profile_bad() {
101
+ for _ in 0..1000000 {
102
+ small_operation(); // Doesn't reflect real hot paths
103
+ }
104
+ }
105
+ ```
106
+
107
+ ## BOLT Post-Link Optimization
108
+
109
+ For even more gains, combine PGO with BOLT:
110
+
111
+ ```bash
112
+ # After PGO build, apply BOLT
113
+ llvm-bolt target/release/my_app \
114
+ -o target/release/my_app.bolt \
115
+ -data=perf.data \
116
+ -reorder-blocks=ext-tsp \
117
+ -reorder-functions=hfsort
118
+
119
+ # BOLT can add another 5-15% on top of PGO
120
+ ```
121
+
122
+ ## CI/CD Integration
123
+
124
+ ```yaml
125
+ # GitHub Actions example
126
+ jobs:
127
+ pgo-build:
128
+ runs-on: ubuntu-latest
129
+ steps:
130
+ - uses: actions/checkout@v4
131
+
132
+ - name: Install LLVM tools
133
+ run: sudo apt-get install llvm
134
+
135
+ - name: Instrumented build
136
+ run: RUSTFLAGS="-Cprofile-generate=/tmp/pgo" cargo build --release
137
+
138
+ - name: Run profiling workloads
139
+ run: ./scripts/run_profiling_workloads.sh
140
+
141
+ - name: Merge profiles
142
+ run: llvm-profdata merge -o /tmp/pgo/merged.profdata /tmp/pgo
143
+
144
+ - name: Optimized build
145
+ run: RUSTFLAGS="-Cprofile-use=/tmp/pgo/merged.profdata" cargo build --release
146
+
147
+ - name: Upload artifact
148
+ uses: actions/upload-artifact@v4
149
+ with:
150
+ name: optimized-binary
151
+ path: target/release/my_app
152
+ ```
153
+
154
+ ## When to Use PGO
155
+
156
+ | Use PGO | Skip PGO |
157
+ |---------|----------|
158
+ | Production deployments | Development builds |
159
+ | Performance-critical apps | Libraries (users can PGO) |
160
+ | Stable workload patterns | Highly variable workloads |
161
+ | Sufficient profiling data | Quick iteration cycles |
162
+
163
+ ## See Also
164
+
165
+ - [opt-lto-release](./opt-lto-release.md) - LTO works well with PGO
166
+ - [opt-codegen-units](./opt-codegen-units.md) - Single codegen unit for PGO
167
+ - [perf-profile-first](./perf-profile-first.md) - Profiling basics
@@ -0,0 +1,144 @@
1
+ # opt-simd-portable
2
+
3
+ > Use portable SIMD for vectorized operations across architectures
4
+
5
+ ## Why It Matters
6
+
7
+ SIMD (Single Instruction, Multiple Data) processes multiple values per instruction—4x, 8x, or more speedup for suitable algorithms. Rust's portable SIMD (nightly) and crates like `wide` provide cross-platform vectorization without architecture-specific intrinsics. For stable Rust, let LLVM auto-vectorize or use platform-specific crates.
8
+
9
+ ## Autovectorization (Stable)
10
+
11
+ ```rust
12
+ // LLVM often vectorizes simple patterns automatically
13
+ fn sum(data: &[f32]) -> f32 {
14
+ data.iter().sum() // May vectorize to SIMD
15
+ }
16
+
17
+ fn add_arrays(a: &[f32], b: &[f32], out: &mut [f32]) {
18
+ for ((x, y), o) in a.iter().zip(b).zip(out.iter_mut()) {
19
+ *o = x + y; // Often vectorizes
20
+ }
21
+ }
22
+
23
+ // Help autovectorization:
24
+ // 1. Use iterators over indexing
25
+ // 2. Avoid early exits in loops
26
+ // 3. Use chunks_exact for aligned access
27
+ ```
28
+
29
+ ## Portable SIMD (Nightly)
30
+
31
+ ```rust
32
+ #![feature(portable_simd)]
33
+ use std::simd::*;
34
+
35
+ fn sum_simd(data: &[f32]) -> f32 {
36
+ let (prefix, middle, suffix) = data.as_simd::<8>();
37
+
38
+ // Handle unaligned prefix
39
+ let mut sum = prefix.iter().sum::<f32>();
40
+
41
+ // SIMD loop - 8 floats at a time
42
+ let mut simd_sum = f32x8::splat(0.0);
43
+ for chunk in middle {
44
+ simd_sum += *chunk;
45
+ }
46
+ sum += simd_sum.reduce_sum();
47
+
48
+ // Handle unaligned suffix
49
+ sum += suffix.iter().sum::<f32>();
50
+
51
+ sum
52
+ }
53
+
54
+ fn dot_product(a: &[f32], b: &[f32]) -> f32 {
55
+ assert_eq!(a.len(), b.len());
56
+
57
+ let (a_pre, a_mid, a_suf) = a.as_simd::<8>();
58
+ let (b_pre, b_mid, b_suf) = b.as_simd::<8>();
59
+
60
+ let scalar: f32 = a_pre.iter().zip(b_pre).map(|(x, y)| x * y).sum();
61
+
62
+ let mut simd_sum = f32x8::splat(0.0);
63
+ for (av, bv) in a_mid.iter().zip(b_mid) {
64
+ simd_sum += *av * *bv;
65
+ }
66
+
67
+ let suffix: f32 = a_suf.iter().zip(b_suf).map(|(x, y)| x * y).sum();
68
+
69
+ scalar + simd_sum.reduce_sum() + suffix
70
+ }
71
+ ```
72
+
73
+ ## wide Crate (Stable)
74
+
75
+ ```rust
76
+ use wide::*;
77
+
78
+ fn process_simd(data: &mut [f32]) {
79
+ // Process 8 floats at a time
80
+ for chunk in data.chunks_exact_mut(8) {
81
+ let v = f32x8::from(chunk);
82
+ let result = v * f32x8::splat(2.0) + f32x8::splat(1.0);
83
+ chunk.copy_from_slice(&result.to_array());
84
+ }
85
+ }
86
+
87
+ fn blend_images(a: &[u8], b: &[u8], alpha: f32, out: &mut [u8]) {
88
+ let alpha_v = f32x8::splat(alpha);
89
+ let one_minus = f32x8::splat(1.0 - alpha);
90
+
91
+ for ((a_chunk, b_chunk), out_chunk) in
92
+ a.chunks_exact(8).zip(b.chunks_exact(8)).zip(out.chunks_exact_mut(8))
93
+ {
94
+ let av = f32x8::from([
95
+ a_chunk[0] as f32, a_chunk[1] as f32, /* ... */
96
+ ]);
97
+ let bv = f32x8::from([
98
+ b_chunk[0] as f32, b_chunk[1] as f32, /* ... */
99
+ ]);
100
+
101
+ let result = av * one_minus + bv * alpha_v;
102
+ // Convert back to u8...
103
+ }
104
+ }
105
+ ```
106
+
107
+ ## Platform-Specific (When Needed)
108
+
109
+ ```rust
110
+ #[cfg(target_arch = "x86_64")]
111
+ use std::arch::x86_64::*;
112
+
113
+ #[cfg(target_arch = "x86_64")]
114
+ #[target_feature(enable = "avx2")]
115
+ unsafe fn sum_avx2(data: &[f32]) -> f32 {
116
+ let mut sum = _mm256_setzero_ps();
117
+
118
+ for chunk in data.chunks_exact(8) {
119
+ let v = _mm256_loadu_ps(chunk.as_ptr());
120
+ sum = _mm256_add_ps(sum, v);
121
+ }
122
+
123
+ // Horizontal sum
124
+ let high = _mm256_extractf128_ps(sum, 1);
125
+ let low = _mm256_castps256_ps128(sum);
126
+ let sum128 = _mm_add_ps(high, low);
127
+ // ... continue reduction
128
+ }
129
+ ```
130
+
131
+ ## Choosing an Approach
132
+
133
+ | Approach | Stability | Portability | Control |
134
+ |----------|-----------|-------------|---------|
135
+ | Autovectorization | Stable | Excellent | Low |
136
+ | `wide` crate | Stable | Good | Medium |
137
+ | Portable SIMD | Nightly | Excellent | High |
138
+ | Intrinsics | Stable | None | Maximum |
139
+
140
+ ## See Also
141
+
142
+ - [opt-target-cpu](./opt-target-cpu.md) - Enable SIMD features
143
+ - [opt-bounds-check](./opt-bounds-check.md) - Unchecked access for SIMD
144
+ - [perf-profile-first](./perf-profile-first.md) - Identify vectorization opportunities
@@ -0,0 +1,154 @@
1
+ # opt-target-cpu
2
+
3
+ > Use `target-cpu=native` for maximum performance on known deployment targets
4
+
5
+ ## Why It Matters
6
+
7
+ By default, Rust compiles for a generic x86-64 baseline (roughly Sandy Bridge era). Modern CPUs have SIMD extensions (AVX2, AVX-512), improved instructions, and micro-architectural optimizations that go unused. `target-cpu=native` enables all features of your current CPU, potentially unlocking significant speedups.
8
+
9
+ ## Bad
10
+
11
+ ```toml
12
+ # Cargo.toml - compiles for generic x86-64
13
+ [profile.release]
14
+ # No target-cpu specified
15
+ # Binary works everywhere but uses only SSE2
16
+ ```
17
+
18
+ ## Good
19
+
20
+ ```toml
21
+ # .cargo/config.toml - for known deployment target
22
+ [build]
23
+ rustflags = ["-C", "target-cpu=native"]
24
+
25
+ # Or specific CPU for cross-compilation
26
+ # rustflags = ["-C", "target-cpu=skylake"]
27
+ ```
28
+
29
+ ## Via Environment
30
+
31
+ ```bash
32
+ # Build with native optimizations
33
+ RUSTFLAGS="-C target-cpu=native" cargo build --release
34
+
35
+ # Check what features are enabled
36
+ rustc --print cfg -C target-cpu=native | grep target_feature
37
+ ```
38
+
39
+ ## Common Target CPUs
40
+
41
+ ```bash
42
+ # x86-64 targets
43
+ target-cpu=native # Current machine
44
+ target-cpu=x86-64 # Baseline (SSE2)
45
+ target-cpu=x86-64-v2 # SSE4.2, POPCNT
46
+ target-cpu=x86-64-v3 # AVX2, BMI2
47
+ target-cpu=x86-64-v4 # AVX-512
48
+
49
+ # Intel specific
50
+ target-cpu=skylake # 6th gen Core
51
+ target-cpu=alderlake # 12th gen Core
52
+
53
+ # AMD specific
54
+ target-cpu=znver3 # Zen 3
55
+ target-cpu=znver4 # Zen 4
56
+
57
+ # ARM
58
+ target-cpu=apple-m1 # Apple Silicon
59
+ target-cpu=neoverse-n1 # AWS Graviton2
60
+ ```
61
+
62
+ ## Feature Detection at Runtime
63
+
64
+ ```rust
65
+ // For portable binaries that use native features when available
66
+ #[cfg(target_arch = "x86_64")]
67
+ fn process_fast(data: &[u8]) -> u64 {
68
+ if is_x86_feature_detected!("avx2") {
69
+ unsafe { process_avx2(data) }
70
+ } else if is_x86_feature_detected!("sse4.2") {
71
+ unsafe { process_sse42(data) }
72
+ } else {
73
+ process_generic(data)
74
+ }
75
+ }
76
+
77
+ #[target_feature(enable = "avx2")]
78
+ unsafe fn process_avx2(data: &[u8]) -> u64 {
79
+ // AVX2 optimized implementation
80
+ }
81
+ ```
82
+
83
+ ## Multi-Architecture Builds
84
+
85
+ ```bash
86
+ # Build multiple binaries
87
+ RUSTFLAGS="-C target-cpu=x86-64" cargo build --release
88
+ mv target/release/app target/release/app-generic
89
+
90
+ RUSTFLAGS="-C target-cpu=x86-64-v3" cargo build --release
91
+ mv target/release/app target/release/app-avx2
92
+
93
+ # Select at runtime
94
+ if supports_avx2; then
95
+ ./app-avx2
96
+ else
97
+ ./app-generic
98
+ fi
99
+ ```
100
+
101
+ ## Cargo Configuration
102
+
103
+ ```toml
104
+ # .cargo/config.toml
105
+
106
+ # Native builds for development
107
+ [target.x86_64-unknown-linux-gnu]
108
+ rustflags = ["-C", "target-cpu=native"]
109
+
110
+ # AWS deployment (Graviton2)
111
+ [target.aarch64-unknown-linux-gnu]
112
+ rustflags = ["-C", "target-cpu=neoverse-n1"]
113
+
114
+ # Intel server deployment
115
+ [target.x86_64-unknown-linux-gnu.deployment]
116
+ rustflags = ["-C", "target-cpu=skylake-avx512"]
117
+ ```
118
+
119
+ ## What Changes
120
+
121
+ ```rust
122
+ // With AVX2 enabled:
123
+ // - 256-bit SIMD operations
124
+ // - Better autovectorization
125
+ // - FMA (fused multiply-add)
126
+ // - BMI (bit manipulation)
127
+
128
+ // Example: sum of squares
129
+ fn sum_squares(data: &[f64]) -> f64 {
130
+ data.iter().map(|x| x * x).sum()
131
+ }
132
+ // Generic: scalar loop
133
+ // AVX2: processes 4 f64s per iteration
134
+ ```
135
+
136
+ ## Checking Enabled Features
137
+
138
+ ```bash
139
+ # What's enabled for native?
140
+ rustc --print cfg -C target-cpu=native | grep feature
141
+
142
+ # Compare generic vs native
143
+ rustc --print cfg -C target-cpu=x86-64 | grep feature
144
+ rustc --print cfg -C target-cpu=native | grep feature
145
+
146
+ # View generated assembly
147
+ cargo asm --rust --release my_crate::hot_function
148
+ ```
149
+
150
+ ## See Also
151
+
152
+ - [opt-lto-release](./opt-lto-release.md) - Combine with LTO
153
+ - [opt-simd-portable](./opt-simd-portable.md) - Portable SIMD
154
+ - [opt-codegen-units](./opt-codegen-units.md) - Single codegen unit
@@ -0,0 +1,141 @@
1
+ # own-arc-shared
2
+
3
+ > Use `Arc<T>` for thread-safe shared ownership
4
+
5
+ ## Why It Matters
6
+
7
+ `Arc` (Atomic Reference Counted) provides shared ownership across threads. Unlike `Rc`, its reference count is updated atomically, making it safe for concurrent access. Use it when multiple threads need to read the same data.
8
+
9
+ ## Bad
10
+
11
+ ```rust
12
+ use std::rc::Rc;
13
+ use std::thread;
14
+
15
+ let data = Rc::new(vec![1, 2, 3]);
16
+ let data_clone = Rc::clone(&data);
17
+
18
+ // ERROR: Rc cannot be sent between threads safely
19
+ thread::spawn(move || {
20
+ println!("{:?}", data_clone);
21
+ });
22
+ ```
23
+
24
+ ## Good
25
+
26
+ ```rust
27
+ use std::sync::Arc;
28
+ use std::thread;
29
+
30
+ let data = Arc::new(vec![1, 2, 3]);
31
+ let data_clone = Arc::clone(&data);
32
+
33
+ thread::spawn(move || {
34
+ println!("{:?}", data_clone); // Safe!
35
+ });
36
+
37
+ println!("{:?}", data); // Original still accessible
38
+ ```
39
+
40
+ ## Arc with Mutex for Mutable Shared State
41
+
42
+ ```rust
43
+ use std::sync::{Arc, Mutex};
44
+ use std::thread;
45
+
46
+ let counter = Arc::new(Mutex::new(0));
47
+ let mut handles = vec![];
48
+
49
+ for _ in 0..10 {
50
+ let counter = Arc::clone(&counter);
51
+ let handle = thread::spawn(move || {
52
+ let mut num = counter.lock().unwrap();
53
+ *num += 1;
54
+ });
55
+ handles.push(handle);
56
+ }
57
+
58
+ for handle in handles {
59
+ handle.join().unwrap();
60
+ }
61
+
62
+ println!("Result: {}", *counter.lock().unwrap());
63
+ ```
64
+
65
+ ## Arc vs Rc Decision Tree
66
+
67
+ ```
68
+ Need shared ownership?
69
+ ├── No → Use owned value or references
70
+ └── Yes → Will it cross thread boundaries?
71
+ ├── No → Use Rc<T> (cheaper, no atomic ops)
72
+ └── Yes → Use Arc<T>
73
+ └── Need mutation?
74
+ ├── No → Arc<T> is enough
75
+ └── Yes → Arc<Mutex<T>> or Arc<RwLock<T>>
76
+ ```
77
+
78
+ ## Common Patterns
79
+
80
+ ```rust
81
+ use std::sync::Arc;
82
+
83
+ // Shared configuration (read-only)
84
+ struct AppConfig {
85
+ database_url: String,
86
+ max_connections: u32,
87
+ }
88
+
89
+ fn setup_workers(config: Arc<AppConfig>) {
90
+ for i in 0..4 {
91
+ let config = Arc::clone(&config);
92
+ std::thread::spawn(move || {
93
+ println!("Worker {} using db: {}", i, config.database_url);
94
+ });
95
+ }
96
+ }
97
+
98
+ // Shared cache with interior mutability
99
+ use std::sync::RwLock;
100
+ use std::collections::HashMap;
101
+
102
+ type Cache = Arc<RwLock<HashMap<String, String>>>;
103
+
104
+ fn get_cached(cache: &Cache, key: &str) -> Option<String> {
105
+ cache.read().unwrap().get(key).cloned()
106
+ }
107
+
108
+ fn set_cached(cache: &Cache, key: String, value: String) {
109
+ cache.write().unwrap().insert(key, value);
110
+ }
111
+ ```
112
+
113
+ ## Performance Considerations
114
+
115
+ ```rust
116
+ // Arc::clone is cheap - just increments atomic counter
117
+ let a = Arc::new(large_data);
118
+ let b = Arc::clone(&a); // Fast! No data copied
119
+
120
+ // But atomic operations have overhead vs Rc
121
+ // Use Rc in single-threaded contexts for better performance
122
+
123
+ // Avoid cloning Arc in hot loops if possible
124
+ // Bad:
125
+ for item in items {
126
+ let arc = Arc::clone(&shared); // Atomic op each iteration
127
+ process(arc, item);
128
+ }
129
+
130
+ // Better: Clone once outside loop if possible
131
+ let arc = Arc::clone(&shared);
132
+ for item in items {
133
+ process(&arc, item); // Pass reference
134
+ }
135
+ ```
136
+
137
+ ## See Also
138
+
139
+ - [own-rc-single-thread](own-rc-single-thread.md) - Use Rc for single-threaded sharing
140
+ - [own-mutex-interior](own-mutex-interior.md) - Use Mutex for interior mutability
141
+ - [async-clone-before-await](async-clone-before-await.md) - Clone Arc before await points
@@ -0,0 +1,95 @@
1
+ # own-borrow-over-clone
2
+
3
+ > Prefer `&T` borrowing over `.clone()`
4
+
5
+ ## Why It Matters
6
+
7
+ Cloning allocates new memory and copies data, while borrowing is free. Unnecessary clones can significantly impact performance, especially in hot paths or with large data structures.
8
+
9
+ ## Bad
10
+
11
+ ```rust
12
+ fn process(data: &String) {
13
+ let local = data.clone(); // Unnecessary allocation!
14
+ println!("{}", local);
15
+ }
16
+
17
+ fn count_words(text: &String) -> usize {
18
+ let owned = text.clone(); // Why clone just to read?
19
+ owned.split_whitespace().count()
20
+ }
21
+
22
+ // Clone in a loop - multiplied cost
23
+ fn process_all(items: &[String]) {
24
+ for item in items {
25
+ let copy = item.clone(); // N allocations!
26
+ handle(&copy);
27
+ }
28
+ }
29
+ ```
30
+
31
+ ## Good
32
+
33
+ ```rust
34
+ fn process(data: &str) { // Accept &str, more flexible
35
+ println!("{}", data); // No allocation needed
36
+ }
37
+
38
+ fn count_words(text: &str) -> usize {
39
+ text.split_whitespace().count() // Just borrow
40
+ }
41
+
42
+ // Borrow in a loop - zero allocations
43
+ fn process_all(items: &[String]) {
44
+ for item in items {
45
+ handle(item); // Pass reference
46
+ }
47
+ }
48
+ ```
49
+
50
+ ## When Clone Is Acceptable
51
+
52
+ ```rust
53
+ // 1. Need owned data for storage
54
+ struct Cache {
55
+ data: HashMap<String, String>,
56
+ }
57
+
58
+ impl Cache {
59
+ fn insert(&mut self, key: &str, value: &str) {
60
+ // Clone needed - we're storing owned data
61
+ self.data.insert(key.to_string(), value.to_string());
62
+ }
63
+ }
64
+
65
+ // 2. Need to send across threads
66
+ fn spawn_worker(data: &Config) {
67
+ let owned = data.clone(); // Clone needed for 'static
68
+ std::thread::spawn(move || {
69
+ use_config(owned);
70
+ });
71
+ }
72
+
73
+ // 3. Copy types (no heap allocation)
74
+ let x: i32 = 42;
75
+ let y = x; // Copy, not clone - this is fine
76
+ ```
77
+
78
+ ## Evidence
79
+
80
+ From ripgrep's codebase - uses `Cow` to avoid clones:
81
+ ```rust
82
+ // https://github.com/BurntSushi/ripgrep/blob/master/crates/globset/src/pathutil.rs
83
+ pub(crate) fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
84
+ match *path {
85
+ Cow::Borrowed(path) => Cow::Borrowed(&path[last_slash..]),
86
+ Cow::Owned(ref path) => Cow::Owned(path.clone()),
87
+ }
88
+ }
89
+ ```
90
+
91
+ ## See Also
92
+
93
+ - [own-slice-over-vec](own-slice-over-vec.md) - Accept slices instead of references to collections
94
+ - [own-cow-conditional](own-cow-conditional.md) - Use Cow for conditional ownership
95
+ - [mem-clone-from](mem-clone-from.md) - Reuse allocations when cloning