EvoScientist 0.0.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. EvoScientist/EvoScientist.py +157 -0
  2. EvoScientist/__init__.py +24 -0
  3. EvoScientist/__main__.py +4 -0
  4. EvoScientist/backends.py +392 -0
  5. EvoScientist/cli.py +1553 -0
  6. EvoScientist/middleware.py +35 -0
  7. EvoScientist/prompts.py +277 -0
  8. EvoScientist/skills/accelerate/SKILL.md +332 -0
  9. EvoScientist/skills/accelerate/references/custom-plugins.md +453 -0
  10. EvoScientist/skills/accelerate/references/megatron-integration.md +489 -0
  11. EvoScientist/skills/accelerate/references/performance.md +525 -0
  12. EvoScientist/skills/bitsandbytes/SKILL.md +411 -0
  13. EvoScientist/skills/bitsandbytes/references/memory-optimization.md +521 -0
  14. EvoScientist/skills/bitsandbytes/references/qlora-training.md +521 -0
  15. EvoScientist/skills/bitsandbytes/references/quantization-formats.md +447 -0
  16. EvoScientist/skills/find-skills/SKILL.md +133 -0
  17. EvoScientist/skills/find-skills/scripts/install_skill.py +211 -0
  18. EvoScientist/skills/flash-attention/SKILL.md +367 -0
  19. EvoScientist/skills/flash-attention/references/benchmarks.md +215 -0
  20. EvoScientist/skills/flash-attention/references/transformers-integration.md +293 -0
  21. EvoScientist/skills/llama-cpp/SKILL.md +258 -0
  22. EvoScientist/skills/llama-cpp/references/optimization.md +89 -0
  23. EvoScientist/skills/llama-cpp/references/quantization.md +213 -0
  24. EvoScientist/skills/llama-cpp/references/server.md +125 -0
  25. EvoScientist/skills/lm-evaluation-harness/SKILL.md +490 -0
  26. EvoScientist/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  27. EvoScientist/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  28. EvoScientist/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  29. EvoScientist/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  30. EvoScientist/skills/ml-paper-writing/SKILL.md +937 -0
  31. EvoScientist/skills/ml-paper-writing/references/checklists.md +361 -0
  32. EvoScientist/skills/ml-paper-writing/references/citation-workflow.md +562 -0
  33. EvoScientist/skills/ml-paper-writing/references/reviewer-guidelines.md +367 -0
  34. EvoScientist/skills/ml-paper-writing/references/sources.md +159 -0
  35. EvoScientist/skills/ml-paper-writing/references/writing-guide.md +476 -0
  36. EvoScientist/skills/ml-paper-writing/templates/README.md +251 -0
  37. EvoScientist/skills/ml-paper-writing/templates/aaai2026/README.md +534 -0
  38. EvoScientist/skills/ml-paper-writing/templates/aaai2026/aaai2026-unified-supp.tex +144 -0
  39. EvoScientist/skills/ml-paper-writing/templates/aaai2026/aaai2026-unified-template.tex +952 -0
  40. EvoScientist/skills/ml-paper-writing/templates/aaai2026/aaai2026.bib +111 -0
  41. EvoScientist/skills/ml-paper-writing/templates/aaai2026/aaai2026.bst +1493 -0
  42. EvoScientist/skills/ml-paper-writing/templates/aaai2026/aaai2026.sty +315 -0
  43. EvoScientist/skills/ml-paper-writing/templates/acl/README.md +50 -0
  44. EvoScientist/skills/ml-paper-writing/templates/acl/acl.sty +312 -0
  45. EvoScientist/skills/ml-paper-writing/templates/acl/acl_latex.tex +377 -0
  46. EvoScientist/skills/ml-paper-writing/templates/acl/acl_lualatex.tex +101 -0
  47. EvoScientist/skills/ml-paper-writing/templates/acl/acl_natbib.bst +1940 -0
  48. EvoScientist/skills/ml-paper-writing/templates/acl/anthology.bib.txt +26 -0
  49. EvoScientist/skills/ml-paper-writing/templates/acl/custom.bib +70 -0
  50. EvoScientist/skills/ml-paper-writing/templates/acl/formatting.md +326 -0
  51. EvoScientist/skills/ml-paper-writing/templates/colm2025/README.md +3 -0
  52. EvoScientist/skills/ml-paper-writing/templates/colm2025/colm2025_conference.bib +11 -0
  53. EvoScientist/skills/ml-paper-writing/templates/colm2025/colm2025_conference.bst +1440 -0
  54. EvoScientist/skills/ml-paper-writing/templates/colm2025/colm2025_conference.pdf +0 -0
  55. EvoScientist/skills/ml-paper-writing/templates/colm2025/colm2025_conference.sty +218 -0
  56. EvoScientist/skills/ml-paper-writing/templates/colm2025/colm2025_conference.tex +305 -0
  57. EvoScientist/skills/ml-paper-writing/templates/colm2025/fancyhdr.sty +485 -0
  58. EvoScientist/skills/ml-paper-writing/templates/colm2025/math_commands.tex +508 -0
  59. EvoScientist/skills/ml-paper-writing/templates/colm2025/natbib.sty +1246 -0
  60. EvoScientist/skills/ml-paper-writing/templates/iclr2026/fancyhdr.sty +485 -0
  61. EvoScientist/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.bib +24 -0
  62. EvoScientist/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.bst +1440 -0
  63. EvoScientist/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.pdf +0 -0
  64. EvoScientist/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.sty +246 -0
  65. EvoScientist/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.tex +414 -0
  66. EvoScientist/skills/ml-paper-writing/templates/iclr2026/math_commands.tex +508 -0
  67. EvoScientist/skills/ml-paper-writing/templates/iclr2026/natbib.sty +1246 -0
  68. EvoScientist/skills/ml-paper-writing/templates/icml2026/algorithm.sty +79 -0
  69. EvoScientist/skills/ml-paper-writing/templates/icml2026/algorithmic.sty +201 -0
  70. EvoScientist/skills/ml-paper-writing/templates/icml2026/example_paper.bib +75 -0
  71. EvoScientist/skills/ml-paper-writing/templates/icml2026/example_paper.pdf +0 -0
  72. EvoScientist/skills/ml-paper-writing/templates/icml2026/example_paper.tex +662 -0
  73. EvoScientist/skills/ml-paper-writing/templates/icml2026/fancyhdr.sty +864 -0
  74. EvoScientist/skills/ml-paper-writing/templates/icml2026/icml2026.bst +1443 -0
  75. EvoScientist/skills/ml-paper-writing/templates/icml2026/icml2026.sty +767 -0
  76. EvoScientist/skills/ml-paper-writing/templates/icml2026/icml_numpapers.pdf +0 -0
  77. EvoScientist/skills/ml-paper-writing/templates/neurips2025/Makefile +36 -0
  78. EvoScientist/skills/ml-paper-writing/templates/neurips2025/extra_pkgs.tex +53 -0
  79. EvoScientist/skills/ml-paper-writing/templates/neurips2025/main.tex +38 -0
  80. EvoScientist/skills/ml-paper-writing/templates/neurips2025/neurips.sty +382 -0
  81. EvoScientist/skills/peft/SKILL.md +431 -0
  82. EvoScientist/skills/peft/references/advanced-usage.md +514 -0
  83. EvoScientist/skills/peft/references/troubleshooting.md +480 -0
  84. EvoScientist/skills/ray-data/SKILL.md +326 -0
  85. EvoScientist/skills/ray-data/references/integration.md +82 -0
  86. EvoScientist/skills/ray-data/references/transformations.md +83 -0
  87. EvoScientist/skills/skill-creator/LICENSE.txt +202 -0
  88. EvoScientist/skills/skill-creator/SKILL.md +356 -0
  89. EvoScientist/skills/skill-creator/references/output-patterns.md +82 -0
  90. EvoScientist/skills/skill-creator/references/workflows.md +28 -0
  91. EvoScientist/skills/skill-creator/scripts/init_skill.py +303 -0
  92. EvoScientist/skills/skill-creator/scripts/package_skill.py +110 -0
  93. EvoScientist/skills/skill-creator/scripts/quick_validate.py +95 -0
  94. EvoScientist/stream/__init__.py +53 -0
  95. EvoScientist/stream/emitter.py +94 -0
  96. EvoScientist/stream/formatter.py +168 -0
  97. EvoScientist/stream/tracker.py +115 -0
  98. EvoScientist/stream/utils.py +255 -0
  99. EvoScientist/subagent.yaml +147 -0
  100. EvoScientist/tools.py +135 -0
  101. EvoScientist/utils.py +207 -0
  102. evoscientist-0.0.1.dev1.dist-info/METADATA +222 -0
  103. evoscientist-0.0.1.dev1.dist-info/RECORD +107 -0
  104. evoscientist-0.0.1.dev1.dist-info/WHEEL +5 -0
  105. evoscientist-0.0.1.dev1.dist-info/entry_points.txt +2 -0
  106. evoscientist-0.0.1.dev1.dist-info/licenses/LICENSE +21 -0
  107. evoscientist-0.0.1.dev1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,447 @@
1
+ # Quantization Formats
2
+
3
+ Complete guide to INT8, NF4, FP4 quantization formats, double quantization, and custom configurations in bitsandbytes.
4
+
5
+ ## Overview
6
+
7
+ bitsandbytes supports multiple quantization formats:
8
+ - **INT8**: 8-bit integer quantization (LLM.int8())
9
+ - **NF4**: 4-bit NormalFloat (for normally distributed weights)
10
+ - **FP4**: 4-bit FloatPoint (for uniformly distributed weights)
11
+ - **Double Quantization**: Quantize the quantization constants
12
+
13
+ ## INT8 Quantization
14
+
15
+ ### LLM.int8() Algorithm
16
+
17
+ LLM.int8() uses mixed 8-bit/16-bit matrix multiplication:
18
+ - Most features (>99.9%) computed in INT8
19
+ - Outlier features (>threshold) computed in FP16
20
+ - Results combined for final output
21
+
22
+ **Memory**: 50% reduction (2 bytes → 1 byte per parameter)
23
+ **Accuracy**: <0.5% degradation
24
+
25
+ ### Configuration
26
+
27
+ ```python
28
+ from transformers import BitsAndBytesConfig
29
+
30
+ config = BitsAndBytesConfig(
31
+ load_in_8bit=True,
32
+ llm_int8_threshold=6.0, # Outlier threshold
33
+ llm_int8_has_fp16_weight=False, # Use INT8 storage
34
+ llm_int8_skip_modules=["lm_head"] # Skip certain layers
35
+ )
36
+ ```
37
+
38
+ ### Parameters Explained
39
+
40
+ **`llm_int8_threshold`** (default: 6.0):
41
+ - Activations with magnitude > threshold are kept in FP16
42
+ - Lower = more FP16 (slower but more accurate)
43
+ - Higher = more INT8 (faster but less accurate)
44
+
45
+ ```python
46
+ # Conservative (more accurate)
47
+ llm_int8_threshold=5.0
48
+
49
+ # Aggressive (faster)
50
+ llm_int8_threshold=8.0
51
+ ```
52
+
53
+ **`llm_int8_has_fp16_weight`** (default: False):
54
+ - `False`: Store weights in INT8 (50% memory savings)
55
+ - `True`: Store in FP16, quantize only during computation (no memory savings)
56
+
57
+ **`llm_int8_skip_modules`**:
58
+ ```python
59
+ # Skip specific layers (keep in FP16)
60
+ llm_int8_skip_modules=["lm_head", "embed_tokens"]
61
+ ```
62
+
63
+ ### Example
64
+
65
+ ```python
66
+ from transformers import AutoModelForCausalLM
67
+
68
+ model = AutoModelForCausalLM.from_pretrained(
69
+ "meta-llama/Llama-2-13b-hf",
70
+ quantization_config=config,
71
+ device_map="auto"
72
+ )
73
+
74
+ # Memory: 26GB (FP16) → 13GB (INT8)
75
+ ```
76
+
77
+ ### When to Use INT8
78
+
79
+ ✅ **Use INT8 when**:
80
+ - Need high accuracy (<0.5% loss)
81
+ - Model fits with 50% reduction
82
+ - Have Turing+ GPU (tensor cores)
83
+
84
+ ❌ **Don't use when**:
85
+ - Need maximum memory savings (use 4-bit)
86
+ - Inference speed critical (use GPTQ/AWQ)
87
+
88
+ ## 4-Bit Quantization
89
+
90
+ ### NormalFloat4 (NF4)
91
+
92
+ Optimized for normally distributed weights (most neural networks).
93
+
94
+ **How it works**:
95
+ - Bins chosen to minimize quantization error for normal distribution
96
+ - Asymmetric quantization bins
97
+ - Better for transformer weights
98
+
99
+ **Configuration**:
100
+ ```python
101
+ config = BitsAndBytesConfig(
102
+ load_in_4bit=True,
103
+ bnb_4bit_compute_dtype=torch.bfloat16,
104
+ bnb_4bit_quant_type="nf4" # NormalFloat4
105
+ )
106
+ ```
107
+
108
+ **Memory**: 75% reduction (2 bytes → 0.5 bytes per parameter)
109
+
110
+ ### FloatPoint4 (FP4)
111
+
112
+ Standard 4-bit floating point for uniform distributions.
113
+
114
+ **How it works**:
115
+ - Symmetric quantization bins
116
+ - Better for weights with broader dynamic range
117
+ - Less common for transformers
118
+
119
+ **Configuration**:
120
+ ```python
121
+ config = BitsAndBytesConfig(
122
+ load_in_4bit=True,
123
+ bnb_4bit_compute_dtype=torch.bfloat16,
124
+ bnb_4bit_quant_type="fp4" # FloatPoint4
125
+ )
126
+ ```
127
+
128
+ ### NF4 vs FP4 Comparison
129
+
130
+ | Aspect | NF4 | FP4 |
131
+ |--------|-----|-----|
132
+ | Distribution | Normal | Uniform |
133
+ | Typical use | **Transformers** | CNNs, unusual architectures |
134
+ | Accuracy | **Better for LLMs** | Worse for LLMs |
135
+ | Speed | Same | Same |
136
+ | Recommendation | ✅ Default | Use only if NF4 fails |
137
+
138
+ **Rule of thumb**: Always use NF4 for transformers.
139
+
140
+ ### Example Comparison
141
+
142
+ ```python
143
+ # NF4 (recommended)
144
+ nf4_config = BitsAndBytesConfig(
145
+ load_in_4bit=True,
146
+ bnb_4bit_quant_type="nf4"
147
+ )
148
+
149
+ # FP4 (alternative)
150
+ fp4_config = BitsAndBytesConfig(
151
+ load_in_4bit=True,
152
+ bnb_4bit_quant_type="fp4"
153
+ )
154
+
155
+ # Load and compare
156
+ model_nf4 = AutoModelForCausalLM.from_pretrained(
157
+ "meta-llama/Llama-2-7b-hf",
158
+ quantization_config=nf4_config
159
+ )
160
+
161
+ model_fp4 = AutoModelForCausalLM.from_pretrained(
162
+ "meta-llama/Llama-2-7b-hf",
163
+ quantization_config=fp4_config
164
+ )
165
+
166
+ # Typical results on MMLU:
167
+ # NF4: 45.2%
168
+ # FP4: 43.8%
169
+ # FP16: 45.9%
170
+ ```
171
+
172
+ ## Compute Dtype
173
+
174
+ The `bnb_4bit_compute_dtype` controls the precision used for actual computation.
175
+
176
+ ### Options
177
+
178
+ **torch.bfloat16** (recommended):
179
+ ```python
180
+ bnb_4bit_compute_dtype=torch.bfloat16
181
+ ```
182
+ - Good balance of speed and accuracy
183
+ - Recommended for A100/H100
184
+ - Prevents numerical instability
185
+
186
+ **torch.float16**:
187
+ ```python
188
+ bnb_4bit_compute_dtype=torch.float16
189
+ ```
190
+ - Slightly faster than BF16
191
+ - Risk of overflow/underflow
192
+ - Use only if BF16 unavailable
193
+
194
+ **torch.float32**:
195
+ ```python
196
+ bnb_4bit_compute_dtype=torch.float32
197
+ ```
198
+ - Most accurate
199
+ - Slowest (no tensor core acceleration)
200
+ - Debugging only
201
+
202
+ ### Performance Comparison
203
+
204
+ | Dtype | Speed | Accuracy | Memory |
205
+ |-------|-------|----------|--------|
206
+ | FP32 | 1× (baseline) | 100% | 4 bytes |
207
+ | FP16 | 3-4× | 99.5% | 2 bytes |
208
+ | BF16 | 3-4× | **99.8%** | 2 bytes |
209
+
210
+ **Recommendation**: Always use `torch.bfloat16` if supported.
211
+
212
+ ## Double Quantization
213
+
214
+ Quantize the quantization constants for additional memory savings.
215
+
216
+ ### How It Works
217
+
218
+ Standard 4-bit quantization stores:
219
+ - 4-bit quantized weights
220
+ - FP32 scaling factors (4 bytes per block)
221
+
222
+ Double quantization:
223
+ - 4-bit quantized weights
224
+ - **INT8 quantized scaling factors** (1 byte per block)
225
+
226
+ **Additional savings**: ~2-3% memory reduction
227
+
228
+ ### Configuration
229
+
230
+ ```python
231
+ config = BitsAndBytesConfig(
232
+ load_in_4bit=True,
233
+ bnb_4bit_quant_type="nf4",
234
+ bnb_4bit_use_double_quant=True # Enable double quantization
235
+ )
236
+ ```
237
+
238
+ ### Example
239
+
240
+ ```python
241
+ # Without double quant
242
+ model_single = AutoModelForCausalLM.from_pretrained(
243
+ "meta-llama/Llama-2-70b-hf",
244
+ quantization_config=BitsAndBytesConfig(
245
+ load_in_4bit=True,
246
+ bnb_4bit_use_double_quant=False
247
+ )
248
+ )
249
+ # Memory: ~36GB
250
+
251
+ # With double quant
252
+ model_double = AutoModelForCausalLM.from_pretrained(
253
+ "meta-llama/Llama-2-70b-hf",
254
+ quantization_config=BitsAndBytesConfig(
255
+ load_in_4bit=True,
256
+ bnb_4bit_use_double_quant=True
257
+ )
258
+ )
259
+ # Memory: ~35GB (saves ~1GB)
260
+ ```
261
+
262
+ **Accuracy impact**: Negligible (<0.1%)
263
+
264
+ **Recommendation**: Always enable for maximum memory savings.
265
+
266
+ ## Quantization Storage
267
+
268
+ Controls storage dtype for quantized weights (important for FSDP).
269
+
270
+ ### Configuration
271
+
272
+ ```python
273
+ config = BitsAndBytesConfig(
274
+ load_in_4bit=True,
275
+ bnb_4bit_quant_storage=torch.bfloat16 # Storage dtype
276
+ )
277
+ ```
278
+
279
+ ### When to Use
280
+
281
+ **Default (uint8)**:
282
+ - Single GPU training/inference
283
+ - No special requirements
284
+
285
+ **torch.bfloat16** (for FSDP):
286
+ ```python
287
+ bnb_4bit_quant_storage=torch.bfloat16
288
+ ```
289
+ - **Required for FSDP+QLoRA**
290
+ - Ensures 4-bit layers wrapped like regular layers
291
+ - Enables proper model sharding
292
+
293
+ ### Example: FSDP Configuration
294
+
295
+ ```python
296
+ # CRITICAL: Set quant_storage for FSDP
297
+ fsdp_config = BitsAndBytesConfig(
298
+ load_in_4bit=True,
299
+ bnb_4bit_compute_dtype=torch.bfloat16,
300
+ bnb_4bit_quant_type="nf4",
301
+ bnb_4bit_use_double_quant=True,
302
+ bnb_4bit_quant_storage=torch.bfloat16 # Must match torch_dtype!
303
+ )
304
+
305
+ model = AutoModelForCausalLM.from_pretrained(
306
+ "meta-llama/Llama-2-70b-hf",
307
+ quantization_config=fsdp_config,
308
+ torch_dtype=torch.bfloat16 # Must match quant_storage!
309
+ )
310
+ ```
311
+
312
+ ## Recommended Configurations
313
+
314
+ ### Production Inference (Best Accuracy)
315
+
316
+ ```python
317
+ BitsAndBytesConfig(
318
+ load_in_8bit=True,
319
+ llm_int8_threshold=6.0
320
+ )
321
+ ```
322
+
323
+ **Use case**: Maximum accuracy with 50% memory savings
324
+
325
+ ### Production Inference (Maximum Memory Savings)
326
+
327
+ ```python
328
+ BitsAndBytesConfig(
329
+ load_in_4bit=True,
330
+ bnb_4bit_compute_dtype=torch.bfloat16,
331
+ bnb_4bit_quant_type="nf4",
332
+ bnb_4bit_use_double_quant=True
333
+ )
334
+ ```
335
+
336
+ **Use case**: 75% memory reduction with <1% accuracy loss
337
+
338
+ ### QLoRA Training (Single GPU)
339
+
340
+ ```python
341
+ BitsAndBytesConfig(
342
+ load_in_4bit=True,
343
+ bnb_4bit_compute_dtype=torch.bfloat16,
344
+ bnb_4bit_quant_type="nf4",
345
+ bnb_4bit_use_double_quant=True
346
+ )
347
+ ```
348
+
349
+ **Use case**: Fine-tune 70B on RTX 3090
350
+
351
+ ### FSDP + QLoRA (Multi-GPU)
352
+
353
+ ```python
354
+ BitsAndBytesConfig(
355
+ load_in_4bit=True,
356
+ bnb_4bit_compute_dtype=torch.bfloat16,
357
+ bnb_4bit_quant_type="nf4",
358
+ bnb_4bit_use_double_quant=True,
359
+ bnb_4bit_quant_storage=torch.bfloat16 # CRITICAL!
360
+ )
361
+ ```
362
+
363
+ **Use case**: Fine-tune 405B on 8×H100
364
+
365
+ ## Advanced: Block-wise Quantization
366
+
367
+ bitsandbytes uses block-wise quantization:
368
+ - Weights divided into blocks (typically 64 or 128 elements)
369
+ - Each block has own scaling factor
370
+ - Better accuracy than tensor-wise quantization
371
+
372
+ **Block size** (automatically determined):
373
+ ```python
374
+ # Typical block sizes
375
+ # 4-bit: 64 elements per block
376
+ # 8-bit: 64 elements per block
377
+ ```
378
+
379
+ **Cannot be configured** (internal implementation detail).
380
+
381
+ ## Quantization Quality Metrics
382
+
383
+ ### Perplexity (Lower is Better)
384
+
385
+ | Model | FP16 | INT8 | NF4 | NF4+DQ |
386
+ |-------|------|------|-----|--------|
387
+ | Llama 2 7B | 5.12 | 5.14 | 5.18 | 5.19 |
388
+ | Llama 2 13B | 4.88 | 4.90 | 4.93 | 4.94 |
389
+ | Llama 2 70B | 3.32 | 3.33 | 3.35 | 3.36 |
390
+
391
+ **Conclusion**: <1% degradation for all quantization methods
392
+
393
+ ### MMLU Accuracy (Higher is Better)
394
+
395
+ | Model | FP16 | INT8 | NF4 | FP4 |
396
+ |-------|------|------|-----|-----|
397
+ | Llama 2 7B | 45.9% | 45.7% | 45.2% | 43.8% |
398
+ | Llama 2 13B | 54.8% | 54.6% | 54.1% | 52.9% |
399
+ | Llama 2 70B | 68.9% | 68.7% | 68.4% | 67.2% |
400
+
401
+ **Conclusion**: NF4 is significantly better than FP4 for transformers
402
+
403
+ ## Troubleshooting
404
+
405
+ ### "Quantization failed" Error
406
+
407
+ Try different quant type:
408
+ ```python
409
+ # If NF4 fails
410
+ bnb_4bit_quant_type="fp4"
411
+ ```
412
+
413
+ ### Numerical Instability
414
+
415
+ Use BF16 compute:
416
+ ```python
417
+ bnb_4bit_compute_dtype=torch.bfloat16
418
+ ```
419
+
420
+ ### Poor Quality with 4-bit
421
+
422
+ 1. Try 8-bit instead:
423
+ ```python
424
+ load_in_8bit=True
425
+ ```
426
+
427
+ 2. Enable double quantization:
428
+ ```python
429
+ bnb_4bit_use_double_quant=True
430
+ ```
431
+
432
+ 3. Use BF16 compute dtype
433
+
434
+ ### FSDP Errors
435
+
436
+ Ensure quant_storage matches torch_dtype:
437
+ ```python
438
+ bnb_4bit_quant_storage=torch.bfloat16
439
+ torch_dtype=torch.bfloat16 # Must match!
440
+ ```
441
+
442
+ ## References
443
+
444
+ - LLM.int8() paper: "LLM.int8(): 8-bit Matrix Multiplication for Transformers at Scale" (2022)
445
+ - QLoRA paper: "QLoRA: Efficient Finetuning of Quantized LLMs" (2023)
446
+ - bitsandbytes GitHub: https://github.com/bitsandbytes-foundation/bitsandbytes
447
+ - HuggingFace quantization docs: https://huggingface.co/docs/transformers/quantization/bitsandbytes
@@ -0,0 +1,133 @@
1
+ ---
2
+ name: find-skills
3
+ description: Helps users discover and install agent skills when they ask questions like "how do I do X", "find a skill for X", or express interest in extending capabilities. Uses a non-interactive installer script suitable for automated agents.
4
+ ---
5
+
6
+ # Find Skills
7
+
8
+ This skill helps you discover and install skills from the open agent skills ecosystem.
9
+
10
+ ## When to Use This Skill
11
+
12
+ Use this skill when the user:
13
+
14
+ - Asks "how do I do X" where X might be a common task with an existing skill
15
+ - Says "find a skill for X" or "is there a skill for X"
16
+ - Wants to search for tools, templates, or workflows
17
+ - Expresses interest in extending agent capabilities
18
+ - Mentions they wish they had help with a specific domain (design, testing, deployment, etc.)
19
+
20
+ ## Step 1: Search for Skills
21
+
22
+ Use `npx -y skills find` with a relevant keyword to search the ecosystem:
23
+
24
+ ```bash
25
+ npx -y skills find [query]
26
+ ```
27
+
28
+ Examples:
29
+ - User asks "help me with React performance" → `npx -y skills find react performance`
30
+ - User asks "is there a skill for PR reviews?" → `npx -y skills find pr review`
31
+ - User asks "I need to create a changelog" → `npx -y skills find changelog`
32
+
33
+ The search results will show installable skills like:
34
+
35
+ ```
36
+ vercel-labs/agent-skills@vercel-react-best-practices
37
+ └ https://skills.sh/vercel-labs/agent-skills/vercel-react-best-practices
38
+ ```
39
+
40
+ Browse all available skills at: https://skills.sh/
41
+
42
+ ## Step 2: Present Options
43
+
44
+ When you find relevant skills, present them to the user with:
45
+ 1. The skill name and what it does
46
+ 2. A link to learn more on skills.sh
47
+
48
+ Ask the user which skill(s) they want to install. All skills are installed to `./skills/` in the current working directory.
49
+
50
+ ## Step 3: Install with the Script
51
+
52
+ **IMPORTANT: Do NOT use `npx -y skills add` for installation** — it requires interactive prompts.
53
+
54
+ Use the bundled installer script instead:
55
+
56
+ ```bash
57
+ python /skills/find-skills/scripts/install_skill.py --url <github_url>
58
+ ```
59
+
60
+ ### Install Commands
61
+
62
+ **From a GitHub URL** (most common — copy the URL from search results):
63
+ ```bash
64
+ python /skills/find-skills/scripts/install_skill.py \
65
+ --url https://github.com/owner/repo/tree/main/skill-name
66
+ ```
67
+
68
+ **From skills.sh shorthand** (owner/repo@skill):
69
+ ```bash
70
+ python /skills/find-skills/scripts/install_skill.py \
71
+ --url vercel-labs/agent-skills@vercel-react-best-practices
72
+ ```
73
+
74
+ **From repo + path** (install specific skills from a multi-skill repo):
75
+ ```bash
76
+ # Single skill
77
+ python /skills/find-skills/scripts/install_skill.py \
78
+ --repo owner/repo --path skill-name
79
+
80
+ # Multiple skills from same repo
81
+ python /skills/find-skills/scripts/install_skill.py \
82
+ --repo owner/repo --path skill-a --path skill-b
83
+ ```
84
+
85
+ **With a specific git branch or tag**:
86
+ ```bash
87
+ python /skills/find-skills/scripts/install_skill.py \
88
+ --repo owner/repo --path skill-name --ref v2.0
89
+ ```
90
+
91
+ ### Installer Options
92
+
93
+ | Option | Description |
94
+ |--------|-------------|
95
+ | `--url` | GitHub URL or owner/repo@skill shorthand |
96
+ | `--repo` | GitHub repo (owner/repo format) |
97
+ | `--path` | Path to skill inside repo (repeatable) |
98
+ | `--ref` | Git branch or tag |
99
+ | `--dest` | Custom destination directory (default: `./skills`) |
100
+
101
+ ## Step 4: Confirm Installation
102
+
103
+ After installation, verify by listing the skills directory:
104
+
105
+ ```bash
106
+ ls /skills/ # all skills (system + user merged)
107
+ ```
108
+
109
+ Then read the installed skill's SKILL.md to confirm it loaded correctly:
110
+
111
+ ```bash
112
+ read_file /skills/<skill-name>/SKILL.md
113
+ ```
114
+
115
+ ## Common Skill Categories
116
+
117
+ | Category | Example Queries |
118
+ |----------|----------------|
119
+ | Web Development | react, nextjs, typescript, css, tailwind |
120
+ | Testing | testing, jest, playwright, e2e |
121
+ | DevOps | deploy, docker, kubernetes, ci-cd |
122
+ | Documentation | docs, readme, changelog, api-docs |
123
+ | Code Quality | review, lint, refactor, best-practices |
124
+ | Design | ui, ux, design-system, accessibility |
125
+ | Productivity | workflow, automation, git |
126
+
127
+ ## When No Skills Are Found
128
+
129
+ If no relevant skills exist:
130
+
131
+ 1. Acknowledge that no existing skill was found
132
+ 2. Offer to help with the task directly using your general capabilities
133
+ 3. Mention the user could create their own skill with `npx -y skills init`