EvoScientist 0.0.1.dev4__py3-none-any.whl → 0.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. EvoScientist/EvoScientist.py +26 -62
  2. EvoScientist/__init__.py +0 -19
  3. EvoScientist/backends.py +0 -26
  4. EvoScientist/cli.py +1111 -498
  5. EvoScientist/middleware.py +8 -61
  6. EvoScientist/stream/__init__.py +0 -25
  7. EvoScientist/stream/utils.py +16 -23
  8. EvoScientist/tools.py +2 -75
  9. evoscientist-0.1.0rc1.dist-info/METADATA +199 -0
  10. evoscientist-0.1.0rc1.dist-info/RECORD +21 -0
  11. evoscientist-0.1.0rc1.dist-info/entry_points.txt +2 -0
  12. EvoScientist/config.py +0 -274
  13. EvoScientist/llm/__init__.py +0 -21
  14. EvoScientist/llm/models.py +0 -99
  15. EvoScientist/memory.py +0 -715
  16. EvoScientist/onboard.py +0 -725
  17. EvoScientist/paths.py +0 -44
  18. EvoScientist/skills/accelerate/SKILL.md +0 -332
  19. EvoScientist/skills/accelerate/references/custom-plugins.md +0 -453
  20. EvoScientist/skills/accelerate/references/megatron-integration.md +0 -489
  21. EvoScientist/skills/accelerate/references/performance.md +0 -525
  22. EvoScientist/skills/bitsandbytes/SKILL.md +0 -411
  23. EvoScientist/skills/bitsandbytes/references/memory-optimization.md +0 -521
  24. EvoScientist/skills/bitsandbytes/references/qlora-training.md +0 -521
  25. EvoScientist/skills/bitsandbytes/references/quantization-formats.md +0 -447
  26. EvoScientist/skills/find-skills/SKILL.md +0 -133
  27. EvoScientist/skills/find-skills/scripts/install_skill.py +0 -211
  28. EvoScientist/skills/flash-attention/SKILL.md +0 -367
  29. EvoScientist/skills/flash-attention/references/benchmarks.md +0 -215
  30. EvoScientist/skills/flash-attention/references/transformers-integration.md +0 -293
  31. EvoScientist/skills/llama-cpp/SKILL.md +0 -258
  32. EvoScientist/skills/llama-cpp/references/optimization.md +0 -89
  33. EvoScientist/skills/llama-cpp/references/quantization.md +0 -213
  34. EvoScientist/skills/llama-cpp/references/server.md +0 -125
  35. EvoScientist/skills/lm-evaluation-harness/SKILL.md +0 -490
  36. EvoScientist/skills/lm-evaluation-harness/references/api-evaluation.md +0 -490
  37. EvoScientist/skills/lm-evaluation-harness/references/benchmark-guide.md +0 -488
  38. EvoScientist/skills/lm-evaluation-harness/references/custom-tasks.md +0 -602
  39. EvoScientist/skills/lm-evaluation-harness/references/distributed-eval.md +0 -519
  40. EvoScientist/skills/ml-paper-writing/SKILL.md +0 -937
  41. EvoScientist/skills/ml-paper-writing/references/checklists.md +0 -361
  42. EvoScientist/skills/ml-paper-writing/references/citation-workflow.md +0 -562
  43. EvoScientist/skills/ml-paper-writing/references/reviewer-guidelines.md +0 -367
  44. EvoScientist/skills/ml-paper-writing/references/sources.md +0 -159
  45. EvoScientist/skills/ml-paper-writing/references/writing-guide.md +0 -476
  46. EvoScientist/skills/ml-paper-writing/templates/README.md +0 -251
  47. EvoScientist/skills/ml-paper-writing/templates/aaai2026/README.md +0 -534
  48. EvoScientist/skills/ml-paper-writing/templates/aaai2026/aaai2026-unified-supp.tex +0 -144
  49. EvoScientist/skills/ml-paper-writing/templates/aaai2026/aaai2026-unified-template.tex +0 -952
  50. EvoScientist/skills/ml-paper-writing/templates/aaai2026/aaai2026.bib +0 -111
  51. EvoScientist/skills/ml-paper-writing/templates/aaai2026/aaai2026.bst +0 -1493
  52. EvoScientist/skills/ml-paper-writing/templates/aaai2026/aaai2026.sty +0 -315
  53. EvoScientist/skills/ml-paper-writing/templates/acl/README.md +0 -50
  54. EvoScientist/skills/ml-paper-writing/templates/acl/acl.sty +0 -312
  55. EvoScientist/skills/ml-paper-writing/templates/acl/acl_latex.tex +0 -377
  56. EvoScientist/skills/ml-paper-writing/templates/acl/acl_lualatex.tex +0 -101
  57. EvoScientist/skills/ml-paper-writing/templates/acl/acl_natbib.bst +0 -1940
  58. EvoScientist/skills/ml-paper-writing/templates/acl/anthology.bib.txt +0 -26
  59. EvoScientist/skills/ml-paper-writing/templates/acl/custom.bib +0 -70
  60. EvoScientist/skills/ml-paper-writing/templates/acl/formatting.md +0 -326
  61. EvoScientist/skills/ml-paper-writing/templates/colm2025/README.md +0 -3
  62. EvoScientist/skills/ml-paper-writing/templates/colm2025/colm2025_conference.bib +0 -11
  63. EvoScientist/skills/ml-paper-writing/templates/colm2025/colm2025_conference.bst +0 -1440
  64. EvoScientist/skills/ml-paper-writing/templates/colm2025/colm2025_conference.pdf +0 -0
  65. EvoScientist/skills/ml-paper-writing/templates/colm2025/colm2025_conference.sty +0 -218
  66. EvoScientist/skills/ml-paper-writing/templates/colm2025/colm2025_conference.tex +0 -305
  67. EvoScientist/skills/ml-paper-writing/templates/colm2025/fancyhdr.sty +0 -485
  68. EvoScientist/skills/ml-paper-writing/templates/colm2025/math_commands.tex +0 -508
  69. EvoScientist/skills/ml-paper-writing/templates/colm2025/natbib.sty +0 -1246
  70. EvoScientist/skills/ml-paper-writing/templates/iclr2026/fancyhdr.sty +0 -485
  71. EvoScientist/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.bib +0 -24
  72. EvoScientist/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.bst +0 -1440
  73. EvoScientist/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.pdf +0 -0
  74. EvoScientist/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.sty +0 -246
  75. EvoScientist/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.tex +0 -414
  76. EvoScientist/skills/ml-paper-writing/templates/iclr2026/math_commands.tex +0 -508
  77. EvoScientist/skills/ml-paper-writing/templates/iclr2026/natbib.sty +0 -1246
  78. EvoScientist/skills/ml-paper-writing/templates/icml2026/algorithm.sty +0 -79
  79. EvoScientist/skills/ml-paper-writing/templates/icml2026/algorithmic.sty +0 -201
  80. EvoScientist/skills/ml-paper-writing/templates/icml2026/example_paper.bib +0 -75
  81. EvoScientist/skills/ml-paper-writing/templates/icml2026/example_paper.pdf +0 -0
  82. EvoScientist/skills/ml-paper-writing/templates/icml2026/example_paper.tex +0 -662
  83. EvoScientist/skills/ml-paper-writing/templates/icml2026/fancyhdr.sty +0 -864
  84. EvoScientist/skills/ml-paper-writing/templates/icml2026/icml2026.bst +0 -1443
  85. EvoScientist/skills/ml-paper-writing/templates/icml2026/icml2026.sty +0 -767
  86. EvoScientist/skills/ml-paper-writing/templates/icml2026/icml_numpapers.pdf +0 -0
  87. EvoScientist/skills/ml-paper-writing/templates/neurips2025/Makefile +0 -36
  88. EvoScientist/skills/ml-paper-writing/templates/neurips2025/extra_pkgs.tex +0 -53
  89. EvoScientist/skills/ml-paper-writing/templates/neurips2025/main.tex +0 -38
  90. EvoScientist/skills/ml-paper-writing/templates/neurips2025/neurips.sty +0 -382
  91. EvoScientist/skills/peft/SKILL.md +0 -431
  92. EvoScientist/skills/peft/references/advanced-usage.md +0 -514
  93. EvoScientist/skills/peft/references/troubleshooting.md +0 -480
  94. EvoScientist/skills/ray-data/SKILL.md +0 -326
  95. EvoScientist/skills/ray-data/references/integration.md +0 -82
  96. EvoScientist/skills/ray-data/references/transformations.md +0 -83
  97. EvoScientist/skills/skill-creator/LICENSE.txt +0 -202
  98. EvoScientist/skills/skill-creator/SKILL.md +0 -356
  99. EvoScientist/skills/skill-creator/references/output-patterns.md +0 -82
  100. EvoScientist/skills/skill-creator/references/workflows.md +0 -28
  101. EvoScientist/skills/skill-creator/scripts/init_skill.py +0 -303
  102. EvoScientist/skills/skill-creator/scripts/package_skill.py +0 -110
  103. EvoScientist/skills/skill-creator/scripts/quick_validate.py +0 -95
  104. EvoScientist/skills_manager.py +0 -391
  105. EvoScientist/stream/display.py +0 -604
  106. EvoScientist/stream/events.py +0 -415
  107. EvoScientist/stream/state.py +0 -343
  108. evoscientist-0.0.1.dev4.dist-info/METADATA +0 -367
  109. evoscientist-0.0.1.dev4.dist-info/RECORD +0 -117
  110. evoscientist-0.0.1.dev4.dist-info/entry_points.txt +0 -5
  111. {evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc1.dist-info}/WHEEL +0 -0
  112. {evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc1.dist-info}/licenses/LICENSE +0 -0
  113. {evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,514 +0,0 @@
1
- # PEFT Advanced Usage Guide
2
-
3
- ## Advanced LoRA Variants
4
-
5
- ### DoRA (Weight-Decomposed Low-Rank Adaptation)
6
-
7
- DoRA decomposes weights into magnitude and direction components, often achieving better results than standard LoRA:
8
-
9
- ```python
10
- from peft import LoraConfig
11
-
12
- dora_config = LoraConfig(
13
- r=16,
14
- lora_alpha=32,
15
- target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
16
- use_dora=True, # Enable DoRA
17
- task_type="CAUSAL_LM"
18
- )
19
-
20
- model = get_peft_model(model, dora_config)
21
- ```
22
-
23
- **When to use DoRA**:
24
- - Consistently outperforms LoRA on instruction-following tasks
25
- - Slightly higher memory (~10%) due to magnitude vectors
26
- - Best for quality-critical fine-tuning
27
-
28
- ### AdaLoRA (Adaptive Rank)
29
-
30
- Automatically adjusts rank per layer based on importance:
31
-
32
- ```python
33
- from peft import AdaLoraConfig
34
-
35
- adalora_config = AdaLoraConfig(
36
- init_r=64, # Initial rank
37
- target_r=16, # Target average rank
38
- tinit=200, # Warmup steps
39
- tfinal=1000, # Final pruning step
40
- deltaT=10, # Rank update frequency
41
- beta1=0.85,
42
- beta2=0.85,
43
- orth_reg_weight=0.5, # Orthogonality regularization
44
- target_modules=["q_proj", "v_proj"],
45
- task_type="CAUSAL_LM"
46
- )
47
- ```
48
-
49
- **Benefits**:
50
- - Allocates more rank to important layers
51
- - Can reduce total parameters while maintaining quality
52
- - Good for exploring optimal rank distribution
53
-
54
- ### LoRA+ (Asymmetric Learning Rates)
55
-
56
- Different learning rates for A and B matrices:
57
-
58
- ```python
59
- from peft import LoraConfig
60
-
61
- # LoRA+ uses higher LR for B matrix
62
- lora_plus_config = LoraConfig(
63
- r=16,
64
- lora_alpha=32,
65
- target_modules="all-linear",
66
- use_rslora=True, # Rank-stabilized LoRA (related technique)
67
- )
68
-
69
- # Manual implementation of LoRA+
70
- from torch.optim import AdamW
71
-
72
- # Group parameters
73
- lora_A_params = [p for n, p in model.named_parameters() if "lora_A" in n]
74
- lora_B_params = [p for n, p in model.named_parameters() if "lora_B" in n]
75
-
76
- optimizer = AdamW([
77
- {"params": lora_A_params, "lr": 1e-4},
78
- {"params": lora_B_params, "lr": 1e-3}, # 10x higher for B
79
- ])
80
- ```
81
-
82
- ### rsLoRA (Rank-Stabilized LoRA)
83
-
84
- Scales LoRA outputs to stabilize training with different ranks:
85
-
86
- ```python
87
- lora_config = LoraConfig(
88
- r=64,
89
- lora_alpha=64,
90
- use_rslora=True, # Enables rank-stabilized scaling
91
- target_modules="all-linear"
92
- )
93
- ```
94
-
95
- **When to use**:
96
- - When experimenting with different ranks
97
- - Helps maintain consistent behavior across rank values
98
- - Recommended for r > 32
99
-
100
- ## LoftQ (LoRA-Fine-Tuning-aware Quantization)
101
-
102
- Initializes LoRA weights to compensate for quantization error:
103
-
104
- ```python
105
- from peft import LoftQConfig, LoraConfig, get_peft_model
106
- from transformers import AutoModelForCausalLM, BitsAndBytesConfig
107
-
108
- # LoftQ configuration
109
- loftq_config = LoftQConfig(
110
- loftq_bits=4, # Quantization bits
111
- loftq_iter=5, # Alternating optimization iterations
112
- )
113
-
114
- # LoRA config with LoftQ initialization
115
- lora_config = LoraConfig(
116
- r=16,
117
- lora_alpha=32,
118
- target_modules="all-linear",
119
- init_lora_weights="loftq",
120
- loftq_config=loftq_config,
121
- task_type="CAUSAL_LM"
122
- )
123
-
124
- # Load quantized model
125
- bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4")
126
- model = AutoModelForCausalLM.from_pretrained(
127
- "meta-llama/Llama-3.1-8B",
128
- quantization_config=bnb_config
129
- )
130
-
131
- model = get_peft_model(model, lora_config)
132
- ```
133
-
134
- **Benefits over standard QLoRA**:
135
- - Better initial quality after quantization
136
- - Faster convergence
137
- - ~1-2% better final accuracy on benchmarks
138
-
139
- ## Custom Module Targeting
140
-
141
- ### Target specific layers
142
-
143
- ```python
144
- # Target only first and last transformer layers
145
- lora_config = LoraConfig(
146
- r=16,
147
- lora_alpha=32,
148
- target_modules=["model.layers.0.self_attn.q_proj",
149
- "model.layers.0.self_attn.v_proj",
150
- "model.layers.31.self_attn.q_proj",
151
- "model.layers.31.self_attn.v_proj"],
152
- layers_to_transform=[0, 31] # Alternative approach
153
- )
154
- ```
155
-
156
- ### Layer pattern matching
157
-
158
- ```python
159
- # Target layers 0-10 only
160
- lora_config = LoraConfig(
161
- r=16,
162
- lora_alpha=32,
163
- target_modules="all-linear",
164
- layers_to_transform=list(range(11)), # Layers 0-10
165
- layers_pattern="model.layers"
166
- )
167
- ```
168
-
169
- ### Exclude specific layers
170
-
171
- ```python
172
- lora_config = LoraConfig(
173
- r=16,
174
- target_modules="all-linear",
175
- modules_to_save=["lm_head"], # Train these fully (not LoRA)
176
- )
177
- ```
178
-
179
- ## Embedding and LM Head Training
180
-
181
- ### Train embeddings with LoRA
182
-
183
- ```python
184
- from peft import LoraConfig
185
-
186
- # Include embeddings
187
- lora_config = LoraConfig(
188
- r=16,
189
- lora_alpha=32,
190
- target_modules=["q_proj", "v_proj", "embed_tokens"], # Include embeddings
191
- modules_to_save=["lm_head"], # Train lm_head fully
192
- )
193
- ```
194
-
195
- ### Extending vocabulary with LoRA
196
-
197
- ```python
198
- from transformers import AutoModelForCausalLM, AutoTokenizer
199
- from peft import get_peft_model, LoraConfig
200
-
201
- # Add new tokens
202
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")
203
- new_tokens = ["<custom_token_1>", "<custom_token_2>"]
204
- tokenizer.add_tokens(new_tokens)
205
-
206
- # Resize model embeddings
207
- model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
208
- model.resize_token_embeddings(len(tokenizer))
209
-
210
- # Configure LoRA to train new embeddings
211
- lora_config = LoraConfig(
212
- r=16,
213
- target_modules="all-linear",
214
- modules_to_save=["embed_tokens", "lm_head"], # Train these fully
215
- )
216
-
217
- model = get_peft_model(model, lora_config)
218
- ```
219
-
220
- ## Multi-Adapter Patterns
221
-
222
- ### Adapter composition
223
-
224
- ```python
225
- from peft import PeftModel
226
-
227
- # Load model with multiple adapters
228
- model = AutoPeftModelForCausalLM.from_pretrained("./base-adapter")
229
- model.load_adapter("./style-adapter", adapter_name="style")
230
- model.load_adapter("./task-adapter", adapter_name="task")
231
-
232
- # Combine adapters (weighted sum)
233
- model.add_weighted_adapter(
234
- adapters=["style", "task"],
235
- weights=[0.7, 0.3],
236
- adapter_name="combined",
237
- combination_type="linear" # or "cat", "svd"
238
- )
239
-
240
- model.set_adapter("combined")
241
- ```
242
-
243
- ### Adapter stacking
244
-
245
- ```python
246
- # Stack adapters (apply sequentially)
247
- model.add_weighted_adapter(
248
- adapters=["base", "domain", "task"],
249
- weights=[1.0, 1.0, 1.0],
250
- adapter_name="stacked",
251
- combination_type="cat" # Concatenate adapter outputs
252
- )
253
- ```
254
-
255
- ### Dynamic adapter switching
256
-
257
- ```python
258
- import torch
259
-
260
- class MultiAdapterModel:
261
- def __init__(self, base_model_path, adapter_paths):
262
- self.model = AutoPeftModelForCausalLM.from_pretrained(adapter_paths[0])
263
- for name, path in adapter_paths[1:].items():
264
- self.model.load_adapter(path, adapter_name=name)
265
-
266
- def generate(self, prompt, adapter_name="default"):
267
- self.model.set_adapter(adapter_name)
268
- return self.model.generate(**self.tokenize(prompt))
269
-
270
- def generate_ensemble(self, prompt, adapters, weights):
271
- """Generate with weighted adapter ensemble"""
272
- outputs = []
273
- for adapter, weight in zip(adapters, weights):
274
- self.model.set_adapter(adapter)
275
- logits = self.model(**self.tokenize(prompt)).logits
276
- outputs.append(weight * logits)
277
- return torch.stack(outputs).sum(dim=0)
278
- ```
279
-
280
- ## Memory Optimization
281
-
282
- ### Gradient checkpointing with LoRA
283
-
284
- ```python
285
- from peft import prepare_model_for_kbit_training
286
-
287
- # Enable gradient checkpointing
288
- model = prepare_model_for_kbit_training(
289
- model,
290
- use_gradient_checkpointing=True,
291
- gradient_checkpointing_kwargs={"use_reentrant": False}
292
- )
293
- ```
294
-
295
- ### CPU offloading for training
296
-
297
- ```python
298
- from accelerate import Accelerator
299
-
300
- accelerator = Accelerator(
301
- mixed_precision="bf16",
302
- gradient_accumulation_steps=8,
303
- cpu_offload=True # Offload optimizer states to CPU
304
- )
305
-
306
- model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
307
- ```
308
-
309
- ### Memory-efficient attention with LoRA
310
-
311
- ```python
312
- from transformers import AutoModelForCausalLM
313
-
314
- # Combine Flash Attention 2 with LoRA
315
- model = AutoModelForCausalLM.from_pretrained(
316
- "meta-llama/Llama-3.1-8B",
317
- attn_implementation="flash_attention_2",
318
- torch_dtype=torch.bfloat16
319
- )
320
-
321
- # Apply LoRA
322
- model = get_peft_model(model, lora_config)
323
- ```
324
-
325
- ## Inference Optimization
326
-
327
- ### Merge for deployment
328
-
329
- ```python
330
- # Merge adapter weights into base model
331
- merged_model = model.merge_and_unload()
332
-
333
- # Quantize merged model for inference
334
- from transformers import BitsAndBytesConfig
335
-
336
- bnb_config = BitsAndBytesConfig(load_in_4bit=True)
337
- quantized_model = AutoModelForCausalLM.from_pretrained(
338
- "./merged-model",
339
- quantization_config=bnb_config
340
- )
341
- ```
342
-
343
- ### Export to different formats
344
-
345
- ```python
346
- # Export to GGUF (llama.cpp)
347
- # First merge, then convert
348
- merged_model.save_pretrained("./merged-model")
349
-
350
- # Use llama.cpp converter
351
- # python convert-hf-to-gguf.py ./merged-model --outfile model.gguf
352
-
353
- # Export to ONNX
354
- from optimum.onnxruntime import ORTModelForCausalLM
355
-
356
- ort_model = ORTModelForCausalLM.from_pretrained(
357
- "./merged-model",
358
- export=True
359
- )
360
- ort_model.save_pretrained("./onnx-model")
361
- ```
362
-
363
- ### Batch adapter inference
364
-
365
- ```python
366
- from vllm import LLM
367
- from vllm.lora.request import LoRARequest
368
-
369
- # Initialize with LoRA support
370
- llm = LLM(
371
- model="meta-llama/Llama-3.1-8B",
372
- enable_lora=True,
373
- max_lora_rank=64,
374
- max_loras=4 # Max concurrent adapters
375
- )
376
-
377
- # Batch with different adapters
378
- requests = [
379
- ("prompt1", LoRARequest("adapter1", 1, "./adapter1")),
380
- ("prompt2", LoRARequest("adapter2", 2, "./adapter2")),
381
- ("prompt3", LoRARequest("adapter1", 1, "./adapter1")),
382
- ]
383
-
384
- outputs = llm.generate(
385
- [r[0] for r in requests],
386
- lora_request=[r[1] for r in requests]
387
- )
388
- ```
389
-
390
- ## Training Recipes
391
-
392
- ### Instruction tuning recipe
393
-
394
- ```python
395
- lora_config = LoraConfig(
396
- r=16,
397
- lora_alpha=32,
398
- lora_dropout=0.05,
399
- target_modules="all-linear",
400
- bias="none",
401
- task_type="CAUSAL_LM"
402
- )
403
-
404
- training_args = TrainingArguments(
405
- output_dir="./output",
406
- num_train_epochs=3,
407
- per_device_train_batch_size=4,
408
- gradient_accumulation_steps=4,
409
- learning_rate=2e-4,
410
- lr_scheduler_type="cosine",
411
- warmup_ratio=0.03,
412
- bf16=True,
413
- logging_steps=10,
414
- save_strategy="steps",
415
- save_steps=100,
416
- eval_strategy="steps",
417
- eval_steps=100,
418
- )
419
- ```
420
-
421
- ### Code generation recipe
422
-
423
- ```python
424
- lora_config = LoraConfig(
425
- r=32, # Higher rank for code
426
- lora_alpha=64,
427
- lora_dropout=0.1,
428
- target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
429
- bias="none",
430
- task_type="CAUSAL_LM"
431
- )
432
-
433
- training_args = TrainingArguments(
434
- learning_rate=1e-4, # Lower LR for code
435
- num_train_epochs=2,
436
- max_seq_length=2048, # Longer sequences
437
- )
438
- ```
439
-
440
- ### Conversational/Chat recipe
441
-
442
- ```python
443
- from trl import SFTTrainer
444
-
445
- lora_config = LoraConfig(
446
- r=16,
447
- lora_alpha=16, # alpha = r for chat
448
- lora_dropout=0.05,
449
- target_modules="all-linear"
450
- )
451
-
452
- # Use chat template
453
- def format_chat(example):
454
- messages = [
455
- {"role": "user", "content": example["instruction"]},
456
- {"role": "assistant", "content": example["response"]}
457
- ]
458
- return tokenizer.apply_chat_template(messages, tokenize=False)
459
-
460
- trainer = SFTTrainer(
461
- model=model,
462
- peft_config=lora_config,
463
- train_dataset=dataset.map(format_chat),
464
- max_seq_length=1024,
465
- )
466
- ```
467
-
468
- ## Debugging and Validation
469
-
470
- ### Verify adapter application
471
-
472
- ```python
473
- # Check which modules have LoRA
474
- for name, module in model.named_modules():
475
- if hasattr(module, "lora_A"):
476
- print(f"LoRA applied to: {name}")
477
-
478
- # Print detailed config
479
- print(model.peft_config)
480
-
481
- # Check adapter state
482
- print(f"Active adapters: {model.active_adapters}")
483
- print(f"Trainable: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
484
- ```
485
-
486
- ### Compare with base model
487
-
488
- ```python
489
- # Generate with adapter
490
- model.set_adapter("default")
491
- adapter_output = model.generate(**inputs)
492
-
493
- # Generate without adapter
494
- with model.disable_adapter():
495
- base_output = model.generate(**inputs)
496
-
497
- print(f"Adapter: {tokenizer.decode(adapter_output[0])}")
498
- print(f"Base: {tokenizer.decode(base_output[0])}")
499
- ```
500
-
501
- ### Monitor training metrics
502
-
503
- ```python
504
- from transformers import TrainerCallback
505
-
506
- class LoRACallback(TrainerCallback):
507
- def on_log(self, args, state, control, logs=None, **kwargs):
508
- if "loss" in logs:
509
- # Log adapter-specific metrics
510
- model = kwargs["model"]
511
- lora_params = sum(p.numel() for n, p in model.named_parameters()
512
- if "lora" in n and p.requires_grad)
513
- print(f"Step {state.global_step}: loss={logs['loss']:.4f}, lora_params={lora_params}")
514
- ```