EvoScientist 0.0.1.dev4__py3-none-any.whl → 0.1.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- EvoScientist/EvoScientist.py +25 -61
- EvoScientist/__init__.py +0 -19
- EvoScientist/backends.py +0 -26
- EvoScientist/cli.py +1365 -480
- EvoScientist/middleware.py +7 -56
- EvoScientist/skills/clip/SKILL.md +253 -0
- EvoScientist/skills/clip/references/applications.md +207 -0
- EvoScientist/skills/langgraph-docs/SKILL.md +36 -0
- EvoScientist/skills/tensorboard/SKILL.md +629 -0
- EvoScientist/skills/tensorboard/references/integrations.md +638 -0
- EvoScientist/skills/tensorboard/references/profiling.md +545 -0
- EvoScientist/skills/tensorboard/references/visualization.md +620 -0
- EvoScientist/skills/vllm/SKILL.md +364 -0
- EvoScientist/skills/vllm/references/optimization.md +226 -0
- EvoScientist/skills/vllm/references/quantization.md +284 -0
- EvoScientist/skills/vllm/references/server-deployment.md +255 -0
- EvoScientist/skills/vllm/references/troubleshooting.md +447 -0
- EvoScientist/stream/__init__.py +0 -25
- EvoScientist/stream/utils.py +16 -23
- EvoScientist/tools.py +2 -75
- {evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc2.dist-info}/METADATA +8 -153
- {evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc2.dist-info}/RECORD +26 -24
- evoscientist-0.1.0rc2.dist-info/entry_points.txt +2 -0
- EvoScientist/config.py +0 -274
- EvoScientist/llm/__init__.py +0 -21
- EvoScientist/llm/models.py +0 -99
- EvoScientist/memory.py +0 -715
- EvoScientist/onboard.py +0 -725
- EvoScientist/paths.py +0 -44
- EvoScientist/skills_manager.py +0 -391
- EvoScientist/stream/display.py +0 -604
- EvoScientist/stream/events.py +0 -415
- EvoScientist/stream/state.py +0 -343
- evoscientist-0.0.1.dev4.dist-info/entry_points.txt +0 -5
- {evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc2.dist-info}/WHEEL +0 -0
- {evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc2.dist-info}/licenses/LICENSE +0 -0
- {evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,638 @@
|
|
|
1
|
+
# Framework Integration Guide
|
|
2
|
+
|
|
3
|
+
Complete guide to integrating TensorBoard with popular ML frameworks.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
- PyTorch
|
|
7
|
+
- TensorFlow/Keras
|
|
8
|
+
- PyTorch Lightning
|
|
9
|
+
- HuggingFace Transformers
|
|
10
|
+
- Fast.ai
|
|
11
|
+
- JAX
|
|
12
|
+
- scikit-learn
|
|
13
|
+
|
|
14
|
+
## PyTorch
|
|
15
|
+
|
|
16
|
+
### Basic Integration
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
import torch
|
|
20
|
+
import torch.nn as nn
|
|
21
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
22
|
+
|
|
23
|
+
# Create writer
|
|
24
|
+
writer = SummaryWriter('runs/pytorch_experiment')
|
|
25
|
+
|
|
26
|
+
# Model and optimizer
|
|
27
|
+
model = ResNet50()
|
|
28
|
+
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
|
29
|
+
criterion = nn.CrossEntropyLoss()
|
|
30
|
+
|
|
31
|
+
# Log model graph
|
|
32
|
+
dummy_input = torch.randn(1, 3, 224, 224)
|
|
33
|
+
writer.add_graph(model, dummy_input)
|
|
34
|
+
|
|
35
|
+
# Training loop
|
|
36
|
+
for epoch in range(100):
|
|
37
|
+
model.train()
|
|
38
|
+
train_loss = 0.0
|
|
39
|
+
|
|
40
|
+
for batch_idx, (data, target) in enumerate(train_loader):
|
|
41
|
+
optimizer.zero_grad()
|
|
42
|
+
output = model(data)
|
|
43
|
+
loss = criterion(output, target)
|
|
44
|
+
loss.backward()
|
|
45
|
+
optimizer.step()
|
|
46
|
+
|
|
47
|
+
train_loss += loss.item()
|
|
48
|
+
|
|
49
|
+
# Log batch metrics
|
|
50
|
+
if batch_idx % 100 == 0:
|
|
51
|
+
global_step = epoch * len(train_loader) + batch_idx
|
|
52
|
+
writer.add_scalar('Loss/train_batch', loss.item(), global_step)
|
|
53
|
+
|
|
54
|
+
# Epoch metrics
|
|
55
|
+
train_loss /= len(train_loader)
|
|
56
|
+
writer.add_scalar('Loss/train_epoch', train_loss, epoch)
|
|
57
|
+
|
|
58
|
+
# Log histograms
|
|
59
|
+
for name, param in model.named_parameters():
|
|
60
|
+
writer.add_histogram(name, param, epoch)
|
|
61
|
+
|
|
62
|
+
writer.close()
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### torchvision Integration
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from torchvision.utils import make_grid
|
|
69
|
+
|
|
70
|
+
# Log image batch
|
|
71
|
+
for batch_idx, (images, labels) in enumerate(train_loader):
|
|
72
|
+
if batch_idx == 0: # First batch
|
|
73
|
+
img_grid = make_grid(images[:64], nrow=8)
|
|
74
|
+
writer.add_image('Training_batch', img_grid, epoch)
|
|
75
|
+
break
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Distributed Training
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
import torch.distributed as dist
|
|
82
|
+
from torch.nn.parallel import DistributedDataParallel as DDP
|
|
83
|
+
|
|
84
|
+
# Setup
|
|
85
|
+
dist.init_process_group(backend='nccl')
|
|
86
|
+
rank = dist.get_rank()
|
|
87
|
+
|
|
88
|
+
# Only log from rank 0
|
|
89
|
+
if rank == 0:
|
|
90
|
+
writer = SummaryWriter('runs/distributed_experiment')
|
|
91
|
+
|
|
92
|
+
model = DDP(model, device_ids=[rank])
|
|
93
|
+
|
|
94
|
+
for epoch in range(100):
|
|
95
|
+
train_loss = train_epoch()
|
|
96
|
+
|
|
97
|
+
# Log only from rank 0
|
|
98
|
+
if rank == 0:
|
|
99
|
+
writer.add_scalar('Loss/train', train_loss, epoch)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## TensorFlow/Keras
|
|
103
|
+
|
|
104
|
+
### Keras Callback
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
import tensorflow as tf
|
|
108
|
+
|
|
109
|
+
# TensorBoard callback
|
|
110
|
+
tensorboard_callback = tf.keras.callbacks.TensorBoard(
|
|
111
|
+
log_dir='logs/keras_experiment',
|
|
112
|
+
histogram_freq=1, # Log histograms every epoch
|
|
113
|
+
write_graph=True, # Visualize model graph
|
|
114
|
+
write_images=True, # Visualize layer weights as images
|
|
115
|
+
update_freq='epoch', # Log metrics per epoch (or 'batch', or integer)
|
|
116
|
+
profile_batch='10,20', # Profile batches 10-20
|
|
117
|
+
embeddings_freq=1 # Log embeddings every epoch
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Compile model
|
|
121
|
+
model.compile(
|
|
122
|
+
optimizer='adam',
|
|
123
|
+
loss='sparse_categorical_crossentropy',
|
|
124
|
+
metrics=['accuracy']
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Train with callback
|
|
128
|
+
history = model.fit(
|
|
129
|
+
x_train, y_train,
|
|
130
|
+
epochs=10,
|
|
131
|
+
validation_data=(x_val, y_val),
|
|
132
|
+
callbacks=[tensorboard_callback]
|
|
133
|
+
)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Custom Training Loop
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
import tensorflow as tf
|
|
140
|
+
|
|
141
|
+
# Create summary writers
|
|
142
|
+
train_summary_writer = tf.summary.create_file_writer('logs/train')
|
|
143
|
+
val_summary_writer = tf.summary.create_file_writer('logs/val')
|
|
144
|
+
|
|
145
|
+
# Training loop
|
|
146
|
+
for epoch in range(100):
|
|
147
|
+
# Training
|
|
148
|
+
for step, (x_batch, y_batch) in enumerate(train_dataset):
|
|
149
|
+
with tf.GradientTape() as tape:
|
|
150
|
+
predictions = model(x_batch, training=True)
|
|
151
|
+
loss = loss_fn(y_batch, predictions)
|
|
152
|
+
|
|
153
|
+
gradients = tape.gradient(loss, model.trainable_variables)
|
|
154
|
+
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
|
|
155
|
+
|
|
156
|
+
# Log training metrics
|
|
157
|
+
with train_summary_writer.as_default():
|
|
158
|
+
tf.summary.scalar('loss', loss, step=epoch * len(train_dataset) + step)
|
|
159
|
+
|
|
160
|
+
# Validation
|
|
161
|
+
for x_batch, y_batch in val_dataset:
|
|
162
|
+
predictions = model(x_batch, training=False)
|
|
163
|
+
val_loss = loss_fn(y_batch, predictions)
|
|
164
|
+
val_acc = accuracy_fn(y_batch, predictions)
|
|
165
|
+
|
|
166
|
+
# Log validation metrics
|
|
167
|
+
with val_summary_writer.as_default():
|
|
168
|
+
tf.summary.scalar('loss', val_loss, step=epoch)
|
|
169
|
+
tf.summary.scalar('accuracy', val_acc, step=epoch)
|
|
170
|
+
|
|
171
|
+
# Log histograms
|
|
172
|
+
with train_summary_writer.as_default():
|
|
173
|
+
for layer in model.layers:
|
|
174
|
+
for weight in layer.weights:
|
|
175
|
+
tf.summary.histogram(weight.name, weight, step=epoch)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### tf.data Integration
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
# Log dataset samples
|
|
182
|
+
for images, labels in train_dataset.take(1):
|
|
183
|
+
with file_writer.as_default():
|
|
184
|
+
tf.summary.image('Training samples', images, step=0, max_outputs=25)
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## PyTorch Lightning
|
|
188
|
+
|
|
189
|
+
### Built-in Logger
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
import pytorch_lightning as pl
|
|
193
|
+
from pytorch_lightning.loggers import TensorBoardLogger
|
|
194
|
+
|
|
195
|
+
# Create logger
|
|
196
|
+
logger = TensorBoardLogger('logs', name='lightning_experiment')
|
|
197
|
+
|
|
198
|
+
# Lightning module
|
|
199
|
+
class LitModel(pl.LightningModule):
|
|
200
|
+
def __init__(self):
|
|
201
|
+
super().__init__()
|
|
202
|
+
self.model = ResNet50()
|
|
203
|
+
|
|
204
|
+
def training_step(self, batch, batch_idx):
|
|
205
|
+
x, y = batch
|
|
206
|
+
y_hat = self.model(x)
|
|
207
|
+
loss = F.cross_entropy(y_hat, y)
|
|
208
|
+
|
|
209
|
+
# Log metrics
|
|
210
|
+
self.log('train_loss', loss, on_step=True, on_epoch=True)
|
|
211
|
+
|
|
212
|
+
return loss
|
|
213
|
+
|
|
214
|
+
def validation_step(self, batch, batch_idx):
|
|
215
|
+
x, y = batch
|
|
216
|
+
y_hat = self.model(x)
|
|
217
|
+
loss = F.cross_entropy(y_hat, y)
|
|
218
|
+
acc = (y_hat.argmax(dim=1) == y).float().mean()
|
|
219
|
+
|
|
220
|
+
# Log metrics
|
|
221
|
+
self.log('val_loss', loss, on_epoch=True)
|
|
222
|
+
self.log('val_acc', acc, on_epoch=True)
|
|
223
|
+
|
|
224
|
+
return loss
|
|
225
|
+
|
|
226
|
+
def configure_optimizers(self):
|
|
227
|
+
return torch.optim.Adam(self.parameters(), lr=0.001)
|
|
228
|
+
|
|
229
|
+
# Trainer
|
|
230
|
+
trainer = pl.Trainer(
|
|
231
|
+
max_epochs=100,
|
|
232
|
+
logger=logger,
|
|
233
|
+
log_every_n_steps=50
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Train
|
|
237
|
+
model = LitModel()
|
|
238
|
+
trainer.fit(model, train_loader, val_loader)
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### Custom Logging
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
class LitModel(pl.LightningModule):
|
|
245
|
+
def training_step(self, batch, batch_idx):
|
|
246
|
+
x, y = batch
|
|
247
|
+
y_hat = self.model(x)
|
|
248
|
+
loss = F.cross_entropy(y_hat, y)
|
|
249
|
+
|
|
250
|
+
# Log scalar
|
|
251
|
+
self.log('train_loss', loss)
|
|
252
|
+
|
|
253
|
+
# Log images (every 100 batches)
|
|
254
|
+
if batch_idx % 100 == 0:
|
|
255
|
+
from torchvision.utils import make_grid
|
|
256
|
+
img_grid = make_grid(x[:8])
|
|
257
|
+
self.logger.experiment.add_image('train_images', img_grid, self.global_step)
|
|
258
|
+
|
|
259
|
+
# Log histogram
|
|
260
|
+
self.logger.experiment.add_histogram('predictions', y_hat, self.global_step)
|
|
261
|
+
|
|
262
|
+
return loss
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
## HuggingFace Transformers
|
|
266
|
+
|
|
267
|
+
### TrainingArguments Integration
|
|
268
|
+
|
|
269
|
+
```python
|
|
270
|
+
from transformers import Trainer, TrainingArguments
|
|
271
|
+
|
|
272
|
+
training_args = TrainingArguments(
|
|
273
|
+
output_dir='./results',
|
|
274
|
+
num_train_epochs=3,
|
|
275
|
+
per_device_train_batch_size=16,
|
|
276
|
+
per_device_eval_batch_size=64,
|
|
277
|
+
logging_dir='./logs', # TensorBoard log directory
|
|
278
|
+
logging_steps=100, # Log every 100 steps
|
|
279
|
+
evaluation_strategy='epoch',
|
|
280
|
+
save_strategy='epoch',
|
|
281
|
+
load_best_model_at_end=True,
|
|
282
|
+
report_to='tensorboard' # Enable TensorBoard
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
trainer = Trainer(
|
|
286
|
+
model=model,
|
|
287
|
+
args=training_args,
|
|
288
|
+
train_dataset=train_dataset,
|
|
289
|
+
eval_dataset=eval_dataset,
|
|
290
|
+
tokenizer=tokenizer
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Train (automatically logs to TensorBoard)
|
|
294
|
+
trainer.train()
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Custom Metrics
|
|
298
|
+
|
|
299
|
+
```python
|
|
300
|
+
from transformers import Trainer, TrainingArguments
|
|
301
|
+
import numpy as np
|
|
302
|
+
|
|
303
|
+
def compute_metrics(eval_pred):
|
|
304
|
+
"""Custom metrics for evaluation."""
|
|
305
|
+
predictions, labels = eval_pred
|
|
306
|
+
predictions = np.argmax(predictions, axis=1)
|
|
307
|
+
|
|
308
|
+
accuracy = (predictions == labels).mean()
|
|
309
|
+
f1 = f1_score(labels, predictions, average='weighted')
|
|
310
|
+
|
|
311
|
+
return {
|
|
312
|
+
'accuracy': accuracy,
|
|
313
|
+
'f1': f1
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
trainer = Trainer(
|
|
317
|
+
model=model,
|
|
318
|
+
args=training_args,
|
|
319
|
+
train_dataset=train_dataset,
|
|
320
|
+
eval_dataset=eval_dataset,
|
|
321
|
+
compute_metrics=compute_metrics # Custom metrics logged to TensorBoard
|
|
322
|
+
)
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
### Manual Logging
|
|
326
|
+
|
|
327
|
+
```python
|
|
328
|
+
from transformers import TrainerCallback
|
|
329
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
330
|
+
|
|
331
|
+
class TensorBoardCallback(TrainerCallback):
|
|
332
|
+
"""Custom TensorBoard logging."""
|
|
333
|
+
|
|
334
|
+
def __init__(self, log_dir='logs'):
|
|
335
|
+
self.writer = SummaryWriter(log_dir)
|
|
336
|
+
|
|
337
|
+
def on_log(self, args, state, control, logs=None, **kwargs):
|
|
338
|
+
"""Called when logging."""
|
|
339
|
+
if logs:
|
|
340
|
+
for key, value in logs.items():
|
|
341
|
+
self.writer.add_scalar(key, value, state.global_step)
|
|
342
|
+
|
|
343
|
+
def on_train_end(self, args, state, control, **kwargs):
|
|
344
|
+
"""Close writer."""
|
|
345
|
+
self.writer.close()
|
|
346
|
+
|
|
347
|
+
# Use callback
|
|
348
|
+
trainer = Trainer(
|
|
349
|
+
model=model,
|
|
350
|
+
args=training_args,
|
|
351
|
+
train_dataset=train_dataset,
|
|
352
|
+
callbacks=[TensorBoardCallback()]
|
|
353
|
+
)
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
## Fast.ai
|
|
357
|
+
|
|
358
|
+
### Learner Integration
|
|
359
|
+
|
|
360
|
+
```python
|
|
361
|
+
from fastai.vision.all import *
|
|
362
|
+
from fastai.callback.tensorboard import TensorBoardCallback
|
|
363
|
+
|
|
364
|
+
# Create data loaders
|
|
365
|
+
dls = ImageDataLoaders.from_folder(path, train='train', valid='valid')
|
|
366
|
+
|
|
367
|
+
# Create learner
|
|
368
|
+
learn = cnn_learner(dls, resnet50, metrics=accuracy)
|
|
369
|
+
|
|
370
|
+
# Train with TensorBoard logging
|
|
371
|
+
learn.fit_one_cycle(
|
|
372
|
+
10,
|
|
373
|
+
cbs=TensorBoardCallback('logs/fastai', trace_model=True)
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
# View logs
|
|
377
|
+
# tensorboard --logdir=logs/fastai
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
### Custom Callbacks
|
|
381
|
+
|
|
382
|
+
```python
|
|
383
|
+
from fastai.callback.core import Callback
|
|
384
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
385
|
+
|
|
386
|
+
class CustomTensorBoardCallback(Callback):
|
|
387
|
+
"""Custom TensorBoard callback."""
|
|
388
|
+
|
|
389
|
+
def __init__(self, log_dir='logs'):
|
|
390
|
+
self.writer = SummaryWriter(log_dir)
|
|
391
|
+
|
|
392
|
+
def after_batch(self):
|
|
393
|
+
"""Log after each batch."""
|
|
394
|
+
if self.train_iter % 100 == 0:
|
|
395
|
+
self.writer.add_scalar('Loss/train', self.loss, self.train_iter)
|
|
396
|
+
|
|
397
|
+
def after_epoch(self):
|
|
398
|
+
"""Log after each epoch."""
|
|
399
|
+
self.writer.add_scalar('Loss/train_epoch', self.recorder.train_loss, self.epoch)
|
|
400
|
+
self.writer.add_scalar('Loss/val_epoch', self.recorder.valid_loss, self.epoch)
|
|
401
|
+
|
|
402
|
+
# Log metrics
|
|
403
|
+
for i, metric in enumerate(self.recorder.metrics):
|
|
404
|
+
metric_name = self.recorder.metric_names[i+1]
|
|
405
|
+
self.writer.add_scalar(f'Metrics/{metric_name}', metric, self.epoch)
|
|
406
|
+
|
|
407
|
+
# Use callback
|
|
408
|
+
learn.fit_one_cycle(10, cbs=[CustomTensorBoardCallback()])
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
## JAX
|
|
412
|
+
|
|
413
|
+
### Basic Integration
|
|
414
|
+
|
|
415
|
+
```python
|
|
416
|
+
import jax
|
|
417
|
+
import jax.numpy as jnp
|
|
418
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
419
|
+
|
|
420
|
+
writer = SummaryWriter('logs/jax_experiment')
|
|
421
|
+
|
|
422
|
+
# Training loop
|
|
423
|
+
for epoch in range(100):
|
|
424
|
+
for batch in train_batches:
|
|
425
|
+
# JAX training step
|
|
426
|
+
state, loss = train_step(state, batch)
|
|
427
|
+
|
|
428
|
+
# Log to TensorBoard (convert JAX array to numpy)
|
|
429
|
+
writer.add_scalar('Loss/train', float(loss), epoch)
|
|
430
|
+
|
|
431
|
+
# Validation
|
|
432
|
+
val_loss = evaluate(state, val_batches)
|
|
433
|
+
writer.add_scalar('Loss/val', float(val_loss), epoch)
|
|
434
|
+
|
|
435
|
+
writer.close()
|
|
436
|
+
```
|
|
437
|
+
|
|
438
|
+
### Flax Integration
|
|
439
|
+
|
|
440
|
+
```python
|
|
441
|
+
from flax.training import train_state
|
|
442
|
+
import optax
|
|
443
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
444
|
+
|
|
445
|
+
writer = SummaryWriter('logs/flax_experiment')
|
|
446
|
+
|
|
447
|
+
# Create train state
|
|
448
|
+
state = train_state.TrainState.create(
|
|
449
|
+
apply_fn=model.apply,
|
|
450
|
+
params=params,
|
|
451
|
+
tx=optax.adam(0.001)
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
# Training loop
|
|
455
|
+
for epoch in range(100):
|
|
456
|
+
for batch in train_loader:
|
|
457
|
+
state, loss = train_step(state, batch)
|
|
458
|
+
|
|
459
|
+
# Log metrics
|
|
460
|
+
writer.add_scalar('Loss/train', loss.item(), epoch)
|
|
461
|
+
|
|
462
|
+
# Log parameters
|
|
463
|
+
for name, param in state.params.items():
|
|
464
|
+
writer.add_histogram(f'Params/{name}', jnp.array(param), epoch)
|
|
465
|
+
|
|
466
|
+
writer.close()
|
|
467
|
+
```
|
|
468
|
+
|
|
469
|
+
## scikit-learn
|
|
470
|
+
|
|
471
|
+
### Manual Logging
|
|
472
|
+
|
|
473
|
+
```python
|
|
474
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
475
|
+
from sklearn.model_selection import cross_val_score
|
|
476
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
477
|
+
|
|
478
|
+
writer = SummaryWriter('logs/sklearn_experiment')
|
|
479
|
+
|
|
480
|
+
# Hyperparameter search
|
|
481
|
+
for n_estimators in [10, 50, 100, 200]:
|
|
482
|
+
for max_depth in [3, 5, 10, None]:
|
|
483
|
+
# Train model
|
|
484
|
+
model = RandomForestClassifier(
|
|
485
|
+
n_estimators=n_estimators,
|
|
486
|
+
max_depth=max_depth,
|
|
487
|
+
random_state=42
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
# Cross-validation
|
|
491
|
+
scores = cross_val_score(model, X_train, y_train, cv=5)
|
|
492
|
+
|
|
493
|
+
# Log results
|
|
494
|
+
run_name = f'n{n_estimators}_d{max_depth}'
|
|
495
|
+
writer.add_scalar(f'{run_name}/cv_mean', scores.mean(), 0)
|
|
496
|
+
writer.add_scalar(f'{run_name}/cv_std', scores.std(), 0)
|
|
497
|
+
|
|
498
|
+
# Log hyperparameters
|
|
499
|
+
writer.add_hparams(
|
|
500
|
+
{'n_estimators': n_estimators, 'max_depth': max_depth or -1},
|
|
501
|
+
{'cv_accuracy': scores.mean()}
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
writer.close()
|
|
505
|
+
```
|
|
506
|
+
|
|
507
|
+
### GridSearchCV Logging
|
|
508
|
+
|
|
509
|
+
```python
|
|
510
|
+
from sklearn.model_selection import GridSearchCV
|
|
511
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
512
|
+
|
|
513
|
+
writer = SummaryWriter('logs/gridsearch')
|
|
514
|
+
|
|
515
|
+
# Grid search
|
|
516
|
+
param_grid = {
|
|
517
|
+
'n_estimators': [10, 50, 100],
|
|
518
|
+
'max_depth': [3, 5, 10]
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
grid_search = GridSearchCV(
|
|
522
|
+
RandomForestClassifier(),
|
|
523
|
+
param_grid,
|
|
524
|
+
cv=5,
|
|
525
|
+
return_train_score=True
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
grid_search.fit(X_train, y_train)
|
|
529
|
+
|
|
530
|
+
# Log all results
|
|
531
|
+
for i, params in enumerate(grid_search.cv_results_['params']):
|
|
532
|
+
mean_train_score = grid_search.cv_results_['mean_train_score'][i]
|
|
533
|
+
mean_test_score = grid_search.cv_results_['mean_test_score'][i]
|
|
534
|
+
|
|
535
|
+
param_str = '_'.join([f'{k}{v}' for k, v in params.items()])
|
|
536
|
+
|
|
537
|
+
writer.add_scalar(f'{param_str}/train', mean_train_score, 0)
|
|
538
|
+
writer.add_scalar(f'{param_str}/test', mean_test_score, 0)
|
|
539
|
+
|
|
540
|
+
# Log best params
|
|
541
|
+
writer.add_text('Best_params', str(grid_search.best_params_), 0)
|
|
542
|
+
writer.add_scalar('Best_score', grid_search.best_score_, 0)
|
|
543
|
+
|
|
544
|
+
writer.close()
|
|
545
|
+
```
|
|
546
|
+
|
|
547
|
+
## Best Practices
|
|
548
|
+
|
|
549
|
+
### 1. Consistent Naming Conventions
|
|
550
|
+
|
|
551
|
+
```python
|
|
552
|
+
# ✅ Good: Hierarchical names across frameworks
|
|
553
|
+
writer.add_scalar('Loss/train', train_loss, step)
|
|
554
|
+
writer.add_scalar('Loss/val', val_loss, step)
|
|
555
|
+
writer.add_scalar('Metrics/accuracy', accuracy, step)
|
|
556
|
+
|
|
557
|
+
# Works the same in PyTorch, TensorFlow, Lightning
|
|
558
|
+
```
|
|
559
|
+
|
|
560
|
+
### 2. Use Framework-Specific Features
|
|
561
|
+
|
|
562
|
+
```python
|
|
563
|
+
# PyTorch: Use SummaryWriter
|
|
564
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
565
|
+
|
|
566
|
+
# TensorFlow: Use tf.summary
|
|
567
|
+
import tensorflow as tf
|
|
568
|
+
tf.summary.scalar('loss', loss, step=step)
|
|
569
|
+
|
|
570
|
+
# Lightning: Use self.log()
|
|
571
|
+
self.log('train_loss', loss)
|
|
572
|
+
|
|
573
|
+
# Transformers: Use report_to='tensorboard'
|
|
574
|
+
training_args = TrainingArguments(report_to='tensorboard')
|
|
575
|
+
```
|
|
576
|
+
|
|
577
|
+
### 3. Centralize Logging Logic
|
|
578
|
+
|
|
579
|
+
```python
|
|
580
|
+
class MetricLogger:
|
|
581
|
+
"""Universal metric logger."""
|
|
582
|
+
|
|
583
|
+
def __init__(self, log_dir='logs'):
|
|
584
|
+
self.writer = SummaryWriter(log_dir)
|
|
585
|
+
|
|
586
|
+
def log_scalar(self, name, value, step):
|
|
587
|
+
self.writer.add_scalar(name, value, step)
|
|
588
|
+
|
|
589
|
+
def log_image(self, name, image, step):
|
|
590
|
+
self.writer.add_image(name, image, step)
|
|
591
|
+
|
|
592
|
+
def log_histogram(self, name, values, step):
|
|
593
|
+
self.writer.add_histogram(name, values, step)
|
|
594
|
+
|
|
595
|
+
def close(self):
|
|
596
|
+
self.writer.close()
|
|
597
|
+
|
|
598
|
+
# Use across frameworks
|
|
599
|
+
logger = MetricLogger('logs/universal')
|
|
600
|
+
logger.log_scalar('Loss/train', train_loss, epoch)
|
|
601
|
+
```
|
|
602
|
+
|
|
603
|
+
### 4. Framework Detection
|
|
604
|
+
|
|
605
|
+
```python
|
|
606
|
+
def get_tensorboard_writer(framework='auto', log_dir='logs'):
|
|
607
|
+
"""Get TensorBoard writer for any framework."""
|
|
608
|
+
if framework == 'auto':
|
|
609
|
+
# Auto-detect framework
|
|
610
|
+
try:
|
|
611
|
+
import torch
|
|
612
|
+
framework = 'pytorch'
|
|
613
|
+
except ImportError:
|
|
614
|
+
try:
|
|
615
|
+
import tensorflow as tf
|
|
616
|
+
framework = 'tensorflow'
|
|
617
|
+
except ImportError:
|
|
618
|
+
raise ValueError("No supported framework found")
|
|
619
|
+
|
|
620
|
+
if framework == 'pytorch':
|
|
621
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
622
|
+
return SummaryWriter(log_dir)
|
|
623
|
+
|
|
624
|
+
elif framework == 'tensorflow':
|
|
625
|
+
import tensorflow as tf
|
|
626
|
+
return tf.summary.create_file_writer(log_dir)
|
|
627
|
+
|
|
628
|
+
# Use it
|
|
629
|
+
writer = get_tensorboard_writer(log_dir='logs/auto')
|
|
630
|
+
```
|
|
631
|
+
|
|
632
|
+
## Resources
|
|
633
|
+
|
|
634
|
+
- **PyTorch**: https://pytorch.org/docs/stable/tensorboard.html
|
|
635
|
+
- **TensorFlow**: https://www.tensorflow.org/tensorboard
|
|
636
|
+
- **Lightning**: https://pytorch-lightning.readthedocs.io/en/stable/extensions/logging.html
|
|
637
|
+
- **Transformers**: https://huggingface.co/docs/transformers/main_classes/trainer
|
|
638
|
+
- **Fast.ai**: https://docs.fast.ai/callback.tensorboard.html
|