evenet-lite 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evenet_lite-0.1.0/MANIFEST.in +1 -0
- evenet_lite-0.1.0/PKG-INFO +313 -0
- evenet_lite-0.1.0/README.md +284 -0
- evenet_lite-0.1.0/evenet_lite/__init__.py +25 -0
- evenet_lite-0.1.0/evenet_lite/callbacks.py +667 -0
- evenet_lite-0.1.0/evenet_lite/checkpoint.py +27 -0
- evenet_lite-0.1.0/evenet_lite/classifier.py +475 -0
- evenet_lite-0.1.0/evenet_lite/config/default_network_config.yaml +126 -0
- evenet_lite-0.1.0/evenet_lite/data.py +143 -0
- evenet_lite-0.1.0/evenet_lite/hf_utils.py +31 -0
- evenet_lite-0.1.0/evenet_lite/metrics.py +547 -0
- evenet_lite-0.1.0/evenet_lite/model.py +360 -0
- evenet_lite-0.1.0/evenet_lite/optim.py +283 -0
- evenet_lite-0.1.0/evenet_lite/runner.py +158 -0
- evenet_lite-0.1.0/evenet_lite/trainer.py +1409 -0
- evenet_lite-0.1.0/evenet_lite/transform_binning.py +306 -0
- evenet_lite-0.1.0/evenet_lite.egg-info/PKG-INFO +313 -0
- evenet_lite-0.1.0/evenet_lite.egg-info/SOURCES.txt +21 -0
- evenet_lite-0.1.0/evenet_lite.egg-info/dependency_links.txt +1 -0
- evenet_lite-0.1.0/evenet_lite.egg-info/requires.txt +13 -0
- evenet_lite-0.1.0/evenet_lite.egg-info/top_level.txt +1 -0
- evenet_lite-0.1.0/pyproject.toml +45 -0
- evenet_lite-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
recursive-include evenet_lite/config *.yaml
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: evenet-lite
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Lightweight training and inference package built on top of evenet-core.
|
|
5
|
+
Author: EveNet contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: machine-learning,physics,pytorch,evenet
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: evenet-core>=0.1.0
|
|
18
|
+
Requires-Dist: huggingface-hub>=0.24
|
|
19
|
+
Requires-Dist: matplotlib>=3.7
|
|
20
|
+
Requires-Dist: numpy>=1.24
|
|
21
|
+
Requires-Dist: pyyaml>=6.0
|
|
22
|
+
Requires-Dist: scipy>=1.10
|
|
23
|
+
Requires-Dist: torch>=2.1
|
|
24
|
+
Requires-Dist: wandb>=0.16
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
27
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
28
|
+
Requires-Dist: twine>=5.1; extra == "dev"
|
|
29
|
+
|
|
30
|
+
# EveNet-Lite
|
|
31
|
+
|
|
32
|
+
EveNet-Lite is a minimal, PyTorch-first training helper that keeps the EveNet model stack but trims away heavy trainers
|
|
33
|
+
or YAML-driven configuration. It exposes a small sklearn-like API (`fit/predict/evaluate`), a runner that wires up
|
|
34
|
+
distributed training automatically, and convenience tools for checkpointing, sampling, normalization, and pretrained
|
|
35
|
+
weight loading.
|
|
36
|
+
|
|
37
|
+
The repository is self contained; add the repo root to your `PYTHONPATH` or install in editable mode:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install -e .
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Quick start: pipeline runner
|
|
44
|
+
|
|
45
|
+
If you already have tensors prepared for objects/globals/mask, the runner wraps everything needed for a full
|
|
46
|
+
train/validate/evaluate cycle and detects DDP from standard `torchrun` environment variables:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
import torch
|
|
50
|
+
from evenet_lite import run_evenet_lite_training
|
|
51
|
+
|
|
52
|
+
classifier = run_evenet_lite_training(
|
|
53
|
+
train_features={"x": X_train, "globals": G_train, "mask": M_train},
|
|
54
|
+
train_labels=y_train,
|
|
55
|
+
train_weights=w_train,
|
|
56
|
+
val_features={"x": X_val, "globals": G_val, "mask": M_val},
|
|
57
|
+
val_labels=y_val,
|
|
58
|
+
val_weights=w_val,
|
|
59
|
+
eval_features={"x": X_test, "globals": G_test, "mask": M_test},
|
|
60
|
+
eval_labels=y_test,
|
|
61
|
+
eval_weights=w_test,
|
|
62
|
+
class_labels=["background", "signal"],
|
|
63
|
+
sampler="weighted",
|
|
64
|
+
epochs=5,
|
|
65
|
+
batch_size=512,
|
|
66
|
+
checkpoint_path="./checkpoints",
|
|
67
|
+
save_top_k=2,
|
|
68
|
+
debug=True,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# The returned classifier is already fitted and carries the trained normalizer.
|
|
72
|
+
probs = classifier.predict({"x": X_infer, "globals": G_infer, "mask": M_infer})
|
|
73
|
+
metrics = classifier.evaluate({"x": X_eval, "globals": G_eval, "mask": M_eval}, y_eval)
|
|
74
|
+
classifier.save_checkpoint("./checkpoints/final.pt")
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Parameterized training (m_X, m_Y)
|
|
78
|
+
|
|
79
|
+
You can train with per-event parameters (e.g., ``m_X`` and ``m_Y``) and randomize background
|
|
80
|
+
values every step using ``ParameterRandomizationCallback``. The pattern works with the
|
|
81
|
+
runner or your own loop; the only requirements are:
|
|
82
|
+
|
|
83
|
+
1. Provide a ``params`` tensor with shape ``(N, num_params)`` alongside the usual ``x``/``globals``/``mask``
|
|
84
|
+
features. Concatenate it into ``globals`` before calling the runner so the model sees the expanded
|
|
85
|
+
global dimension (and update ``global_input_dim`` accordingly).
|
|
86
|
+
2. Attach ``ParameterRandomizationCallback`` when fitting to resample background parameters each batch while
|
|
87
|
+
leaving signal parameters intact. Optionally set ``min_values``/``max_values`` (one value or a list per
|
|
88
|
+
parameter); otherwise the callback infers bounds from the training set. Or set ``pool_from_signal`` to sample
|
|
89
|
+
directly from signal pool (uniformly sampled from all discreted parameter combinations in the signal events).
|
|
90
|
+
3. Keep validation randomization on (default) to match training, or set ``apply_to_validation=False`` if you
|
|
91
|
+
prefer fixed parameters there. Evaluation is untouched.
|
|
92
|
+
|
|
93
|
+
Minimal usage sketch with ``run_evenet_lite_training``:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from evenet_lite import run_evenet_lite_training
|
|
97
|
+
from evenet_lite.callbacks import ParameterRandomizationCallback
|
|
98
|
+
|
|
99
|
+
# globals_with_params = torch.cat([globals, params], dim=1)
|
|
100
|
+
callbacks = [ParameterRandomizationCallback(min_values=[300, 500], max_values=[800, 1200])]
|
|
101
|
+
|
|
102
|
+
classifier = run_evenet_lite_training(
|
|
103
|
+
train_features={"x": X_train, "globals": globals_with_params, "mask": M_train, "params": params_train},
|
|
104
|
+
train_labels=y_train,
|
|
105
|
+
val_features={"x": X_val, "globals": globals_val_with_params, "mask": M_val, "params": params_val},
|
|
106
|
+
val_labels=y_val,
|
|
107
|
+
callbacks=callbacks,
|
|
108
|
+
global_input_dim=globals_with_params.shape[1],
|
|
109
|
+
)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### What the runner handles
|
|
113
|
+
|
|
114
|
+
- Detects distributed environments (`WORLD_SIZE`, `LOCAL_RANK`) and pins the appropriate CUDA device when available.
|
|
115
|
+
- Builds an `EvenetLiteClassifier` with optional pretrained weights and logging level.
|
|
116
|
+
- Injects normalization automatically unless a custom `NormalizationCallback` is supplied.
|
|
117
|
+
- Forwards checkpointing, early stopping, sampler, and evaluation options to the classifier.
|
|
118
|
+
- Returns the fitted classifier so you can immediately call `predict`, `evaluate`, or `save_checkpoint`.
|
|
119
|
+
|
|
120
|
+
## Custom workflow (manual steps)
|
|
121
|
+
|
|
122
|
+
Prefer to assemble the pieces yourself? You can directly instantiate the classifier, call `fit`, and run
|
|
123
|
+
evaluation/prediction without the runner.
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
import torch
|
|
127
|
+
from evenet_lite import EvenetLiteClassifier
|
|
128
|
+
|
|
129
|
+
# Build the classifier (uses default EveNetLite backbone if none is provided)
|
|
130
|
+
clf = EvenetLiteClassifier(
|
|
131
|
+
class_labels=["background", "signal"],
|
|
132
|
+
device="auto", # cpu, cuda, or auto-detect
|
|
133
|
+
lr=1e-3,
|
|
134
|
+
weight_decay=0.01,
|
|
135
|
+
grad_clip=1.0,
|
|
136
|
+
pretrained=True, # load HF weights by default
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Fit
|
|
140
|
+
clf.fit(
|
|
141
|
+
train_data=({"x": X_train, "globals": G_train, "mask": M_train}, y_train, w_train),
|
|
142
|
+
val_data=({"x": X_val, "globals": G_val, "mask": M_val}, y_val, w_val),
|
|
143
|
+
feature_names={"x": obj_feature_names, "globals": global_feature_names},
|
|
144
|
+
epochs=10,
|
|
145
|
+
batch_size=256,
|
|
146
|
+
sampler="weighted",
|
|
147
|
+
checkpoint_path="./checkpoints",
|
|
148
|
+
save_top_k=1,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Evaluate / predict
|
|
152
|
+
val_metrics = clf.evaluate({"x": X_val, "globals": G_val, "mask": M_val}, y_val, w_val)
|
|
153
|
+
probs = clf.predict({"x": X_test, "globals": G_test, "mask": M_test})
|
|
154
|
+
|
|
155
|
+
# Checkpointing
|
|
156
|
+
clf.save_checkpoint("./checkpoints/latest.pt")
|
|
157
|
+
# Later
|
|
158
|
+
restored = EvenetLiteClassifier(class_labels=["background", "signal"])
|
|
159
|
+
restored.load_checkpoint("./checkpoints/latest.pt",
|
|
160
|
+
feature_names={"x": obj_feature_names, "globals": global_feature_names})
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Data expectations
|
|
164
|
+
|
|
165
|
+
Input tensors follow an xgboost-like contract and are provided directly to the classifier or runner:
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
features = {
|
|
169
|
+
"x": torch.Tensor[N, M, F], # per-object features
|
|
170
|
+
"globals": torch.Tensor[N, G], # event-level features
|
|
171
|
+
"mask": torch.Tensor[N, M], # padding mask
|
|
172
|
+
}
|
|
173
|
+
labels = torch.Tensor[N] # class indices
|
|
174
|
+
weights = torch.Tensor[N] | None # optional per-example weights
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
Feature names passed to `fit` (`feature_names={"x": [...], "globals": [...]}`) should align with the keys above so the
|
|
178
|
+
normalizer can match statistics to columns.
|
|
179
|
+
|
|
180
|
+
## Argument reference
|
|
181
|
+
|
|
182
|
+
The tables below summarize the most-used entrypoints and their arguments. Defaults match the inline values in code.
|
|
183
|
+
|
|
184
|
+
### `EvenetLiteClassifier` constructor
|
|
185
|
+
|
|
186
|
+
| Argument | Default | Description |
|
|
187
|
+
|-------------------------------------------------------------------------------------------|------------------------|------------------------------------------------------------------------------------------------|
|
|
188
|
+
| `class_labels` | **required** | Ordered class names wired into metrics and loss. |
|
|
189
|
+
| `device` | `"auto"` | Chooses CUDA when available; otherwise CPU. |
|
|
190
|
+
| `lr` | `[1e-3, 3e-4, 1e-4]` | Learning rates assigned per optimizer group. |
|
|
191
|
+
| `weight_decay` | `[0.01, 0.01, 0.01]` | Weight decay values aligned with the learning-rate groups. |
|
|
192
|
+
| `model` | `None` | Custom EveNet model; defaults to `EveNetLite` built from `config/default_network_config.yaml`. |
|
|
193
|
+
| `optimizer_fn` / `scheduler_fn` | `None` | Factories for custom optimizer or scheduler. |
|
|
194
|
+
| `grad_clip` | `None` | Max gradient norm when set. |
|
|
195
|
+
| `module_lists` | `[["Classification"], ["ObjectEncoder"], ["PET", "GlobalEmbedding"]]` | Modules assigned to each optimizer group (groups processed in order). |
|
|
196
|
+
| `warmup_epochs` / `warmup_ratio` / `warmup_start_factor` | `1` / `0.1` / `0.1` | Linear warmup configuration. |
|
|
197
|
+
| `min_lr` | `0.0` | Scheduler floor learning rate. |
|
|
198
|
+
| `global_input_dim` / `sequential_input_dim` | `10` / `7` | Input feature dimensions for the default backbone. |
|
|
199
|
+
| `use_wandb` / `wandb` | `False` / `None` | Enable Weights & Biases with optional init kwargs. |
|
|
200
|
+
| `log_level` | `logging.INFO` | Root logging level when constructing the classifier. |
|
|
201
|
+
| `pretrained` | `False` | When `True`, soft-loads weights (default HF repo/filename). |
|
|
202
|
+
| `pretrained_source` | `"hf"` | `"hf"` for Hugging Face hub or `"local"` for a provided path. |
|
|
203
|
+
| `pretrained_path` / `pretrained_repo_id` / `pretrained_filename` / `pretrained_cache_dir` | varies | Location details for pretrained checkpoints. |
|
|
204
|
+
| `num_workers` | 0 | Number of processes passing to pytorch `DataLoader` |
|
|
205
|
+
| `loss_gamma` | `0.0` | Focal-loss gamma (``0`` reduces to standard cross-entropy). |
|
|
206
|
+
|
|
207
|
+
### `EvenetLiteClassifier.fit`
|
|
208
|
+
|
|
209
|
+
| Argument | Default | Description |
|
|
210
|
+
|---------------------------------------------------------------------|--------------------------------|--------------------------------------------------------------------------|
|
|
211
|
+
| `train_data` | **required** | Tuple `(features, labels, weights)` for training. |
|
|
212
|
+
| `val_data` | `None` | Optional validation tuple with same structure as training. |
|
|
213
|
+
| `feature_names` | Defaults to classifier presets | Mapping of feature group to column names for normalization. |
|
|
214
|
+
| `normalization_rules` | Defaults to classifier presets | Per-feature normalization strategy (`log_normalize`, `normalize`, etc.). |
|
|
215
|
+
| `normalization_stats` | `None` | Optional precomputed means/stds per feature group; missing values default to mean 0/std 1. |
|
|
216
|
+
| `callbacks` | `None` | Additional callbacks (normalization is auto-inserted if absent). |
|
|
217
|
+
| `epochs` | `10` | Number of training epochs. |
|
|
218
|
+
| `batch_size` | `256` | Mini-batch size. |
|
|
219
|
+
| `sampler` | `None` | Sampler name (`"weighted"` enables distributed-safe weighted sampler). |
|
|
220
|
+
| `epoch_size` | `None` | Number of samples per epoch when using a sampler. |
|
|
221
|
+
| `checkpoint_path` / `resume_from` | `None` | Directory or filename for checkpoints and optional resume path. |
|
|
222
|
+
| `checkpoint_every` | `1` | Frequency (epochs) for periodic checkpoints when `save_top_k == 0`. |
|
|
223
|
+
| `save_top_k` | `0` | Keep best-k checkpoints ranked by `monitor_metric`. |
|
|
224
|
+
| `monitor_metric` / `minimize_metric` | `"val_loss"` / `True` | Metric and direction for checkpoint ranking. |
|
|
225
|
+
| `early_stop_metric` / `early_stop_minimize` / `early_stop_patience` | `"val_loss"` / `True` / `0` | Early stopping configuration (disabled when patience is 0). |
|
|
226
|
+
| `eval_data` | `None` | Optional test tuple evaluated after training. |
|
|
227
|
+
| `eval_output_path` | `None` | Path to save evaluation outputs when provided. |
|
|
228
|
+
| `eval_batch_size` | `None` | Batch size for evaluation (falls back to training batch size). |
|
|
229
|
+
| `sic_min_bkg_events` | `100` | Minimum background events for SIC metric calculation. |
|
|
230
|
+
| `debug` | `False` | Enables verbose `DebugCallback` logging and diagnostics. |
|
|
231
|
+
|
|
232
|
+
### `EvenetLiteClassifier.predict` / `evaluate`
|
|
233
|
+
|
|
234
|
+
- `predict(features, batch_size=256)`: returns class probabilities using the stored normalizer; requires that `fit` or
|
|
235
|
+
`load_checkpoint` has been called.
|
|
236
|
+
- `evaluate(features, labels, weights=None, batch_size=256)`: computes loss/accuracy (and physics metrics when
|
|
237
|
+
available) on the provided dataset.
|
|
238
|
+
|
|
239
|
+
### `run_evenet_lite_training`
|
|
240
|
+
|
|
241
|
+
| Argument | Default | Description |
|
|
242
|
+
|---------------------------------------------------------------------|--------------------------------------|--------------------------------------------------------------------|
|
|
243
|
+
| `train_features` / `train_labels` / `train_weights` | **required** / **required** / `None` | Training tensors and optional weights. |
|
|
244
|
+
| `class_labels` | **required** | Ordered class names passed to the classifier. |
|
|
245
|
+
| `val_features` / `val_labels` / `val_weights` | `None` | Optional validation tensors and weights. |
|
|
246
|
+
| `feature_names` | `None` | Feature column names forwarded to the classifier. |
|
|
247
|
+
| `normalization_rules` | `None` | Per-feature normalization overrides. |
|
|
248
|
+
| `normalization_stats` | `None` | Optional precomputed means/stds per feature group; missing values default to mean 0/std 1. |
|
|
249
|
+
| `callbacks` | `None` | Extra callbacks (normalization auto-added if missing). |
|
|
250
|
+
| `sampler` / `epoch_size` | `None` | Sampling strategy and epoch size when sampling. |
|
|
251
|
+
| `epochs` / `batch_size` | `10` / `256` | Training loop configuration. |
|
|
252
|
+
| `checkpoint_path` / `resume_from` | `None` | Checkpoint directory/base filename and optional resume path. |
|
|
253
|
+
| `checkpoint_every` | `1` | Epoch frequency for periodic checkpoints when not using top-k. |
|
|
254
|
+
| `save_top_k` | `0` | Number of best checkpoints to retain. |
|
|
255
|
+
| `monitor_metric` / `minimize_metric` | `"val_loss"` / `True` | Metric and direction for best-checkpoint tracking. |
|
|
256
|
+
| `early_stop_metric` / `early_stop_minimize` / `early_stop_patience` | `"val_loss"` / `True` / `0` | Early stopping configuration. |
|
|
257
|
+
| `eval_features` / `eval_labels` / `eval_weights` | `None` | Optional evaluation payload run after training. |
|
|
258
|
+
| `eval_output_path` | `None` | File path to persist evaluation results. |
|
|
259
|
+
| `eval_batch_size` | `None` | Batch size for evaluation (defaults to training batch size). |
|
|
260
|
+
| `sic_min_bkg_events` | `100` | Minimum background events for SIC metric computation. |
|
|
261
|
+
| `debug` | `False` | Enables verbose debugging callback and sampler diagnostics. |
|
|
262
|
+
| `loss_gamma` | `0.0` | Focal-loss gamma (``0`` reduces to standard cross-entropy). |
|
|
263
|
+
| `log_level` | `logging.INFO` | Logging level set before runner diagnostics. |
|
|
264
|
+
| `**classifier_kwargs` | — | Additional arguments forwarded directly to `EvenetLiteClassifier`. |
|
|
265
|
+
|
|
266
|
+
## Distributed training
|
|
267
|
+
|
|
268
|
+
The trainer boots into DDP automatically when `WORLD_SIZE > 1` (e.g., via
|
|
269
|
+
`torchrun --nproc_per_node <num_gpus> script.py`). Rank 0 handles logging and checkpointing; sampler/loader seeds are
|
|
270
|
+
synchronized per epoch. Without distributed environment variables, execution falls back to single process on GPU or CPU
|
|
271
|
+
depending on availability.
|
|
272
|
+
|
|
273
|
+
## Normalization & callbacks
|
|
274
|
+
|
|
275
|
+
- A `NormalizationCallback` is injected automatically during `fit` when one is not provided. You can supply custom
|
|
276
|
+
normalization rules or replace the callback entirely.
|
|
277
|
+
- Pass `normalization_stats` to `EvenetLiteClassifier.fit` or `run_evenet_lite_training` to reuse precomputed
|
|
278
|
+
mean/std pairs without refitting. Provide a mapping per feature group, e.g.:
|
|
279
|
+
|
|
280
|
+
```python
|
|
281
|
+
normalization_stats = {
|
|
282
|
+
"x": {"mean": [0.1, -0.2, 0.0], "std": [1.0, 0.9, 1.1]},
|
|
283
|
+
"globals": {"mean": [0.0, 0.0], "std": [1.0, 1.0]},
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
classifier.fit(
|
|
287
|
+
train_data=(train_features, y_train, w_train),
|
|
288
|
+
val_data=(val_features, y_val, w_val),
|
|
289
|
+
feature_names=feature_names,
|
|
290
|
+
normalization_stats=normalization_stats,
|
|
291
|
+
)
|
|
292
|
+
```
|
|
293
|
+
Any missing groups or columns default to mean `0` and std `1`, and the applied plan is logged as a table on rank 0.
|
|
294
|
+
- Implement custom callbacks by subclassing `Callback` and overriding hooks such as `on_train_start`, `on_epoch_end`, or
|
|
295
|
+
`on_train_end`, then pass instances via the `callbacks` argument of `fit` or the runner.
|
|
296
|
+
|
|
297
|
+
## Checkpointing and pretrained weights
|
|
298
|
+
|
|
299
|
+
- Call `save_checkpoint(path)` on a fitted classifier to persist model, optimizer/scheduler states, and the learned
|
|
300
|
+
normalizer. Use `load_checkpoint(path, feature_names=...)` to restore weights for further training or inference.
|
|
301
|
+
- Enable `pretrained=True` (with optional `pretrained_source`, `pretrained_path`, or Hugging Face repo/filename
|
|
302
|
+
overrides) to soft-load compatible parameters while leaving shape-mismatched layers initialized.
|
|
303
|
+
|
|
304
|
+
## Module guide
|
|
305
|
+
|
|
306
|
+
- `evenet_lite.classifier.EvenetLiteClassifier`: high-level `fit/predict/evaluate` API and pretrained loader.
|
|
307
|
+
- `evenet_lite.runner.run_evenet_lite_training`: convenience pipeline that wires up DDP detection and training.
|
|
308
|
+
- `evenet_lite.trainer.Trainer`: core training loop with DDP, callbacks, metrics, early stopping, and checkpointing.
|
|
309
|
+
- `evenet_lite.data`: dataset wrapper and distributed weighted sampler utilities.
|
|
310
|
+
- `evenet_lite.callbacks`: callback base class, default normalizer, and debug helpers.
|
|
311
|
+
- `evenet_lite.metrics`: accuracy, loss, and physics-driven metrics helpers.
|
|
312
|
+
- `evenet_lite.checkpoint`: rank-safe checkpoint save/load helpers.
|
|
313
|
+
- `evenet_lite.model`: EveNet backbone assembly used by the default classifier.
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
# EveNet-Lite
|
|
2
|
+
|
|
3
|
+
EveNet-Lite is a minimal, PyTorch-first training helper that keeps the EveNet model stack but trims away heavy trainers
|
|
4
|
+
or YAML-driven configuration. It exposes a small sklearn-like API (`fit/predict/evaluate`), a runner that wires up
|
|
5
|
+
distributed training automatically, and convenience tools for checkpointing, sampling, normalization, and pretrained
|
|
6
|
+
weight loading.
|
|
7
|
+
|
|
8
|
+
The repository is self contained; add the repo root to your `PYTHONPATH` or install in editable mode:
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
pip install -e .
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Quick start: pipeline runner
|
|
15
|
+
|
|
16
|
+
If you already have tensors prepared for objects/globals/mask, the runner wraps everything needed for a full
|
|
17
|
+
train/validate/evaluate cycle and detects DDP from standard `torchrun` environment variables:
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
import torch
|
|
21
|
+
from evenet_lite import run_evenet_lite_training
|
|
22
|
+
|
|
23
|
+
classifier = run_evenet_lite_training(
|
|
24
|
+
train_features={"x": X_train, "globals": G_train, "mask": M_train},
|
|
25
|
+
train_labels=y_train,
|
|
26
|
+
train_weights=w_train,
|
|
27
|
+
val_features={"x": X_val, "globals": G_val, "mask": M_val},
|
|
28
|
+
val_labels=y_val,
|
|
29
|
+
val_weights=w_val,
|
|
30
|
+
eval_features={"x": X_test, "globals": G_test, "mask": M_test},
|
|
31
|
+
eval_labels=y_test,
|
|
32
|
+
eval_weights=w_test,
|
|
33
|
+
class_labels=["background", "signal"],
|
|
34
|
+
sampler="weighted",
|
|
35
|
+
epochs=5,
|
|
36
|
+
batch_size=512,
|
|
37
|
+
checkpoint_path="./checkpoints",
|
|
38
|
+
save_top_k=2,
|
|
39
|
+
debug=True,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# The returned classifier is already fitted and carries the trained normalizer.
|
|
43
|
+
probs = classifier.predict({"x": X_infer, "globals": G_infer, "mask": M_infer})
|
|
44
|
+
metrics = classifier.evaluate({"x": X_eval, "globals": G_eval, "mask": M_eval}, y_eval)
|
|
45
|
+
classifier.save_checkpoint("./checkpoints/final.pt")
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Parameterized training (m_X, m_Y)
|
|
49
|
+
|
|
50
|
+
You can train with per-event parameters (e.g., ``m_X`` and ``m_Y``) and randomize background
|
|
51
|
+
values every step using ``ParameterRandomizationCallback``. The pattern works with the
|
|
52
|
+
runner or your own loop; the only requirements are:
|
|
53
|
+
|
|
54
|
+
1. Provide a ``params`` tensor with shape ``(N, num_params)`` alongside the usual ``x``/``globals``/``mask``
|
|
55
|
+
features. Concatenate it into ``globals`` before calling the runner so the model sees the expanded
|
|
56
|
+
global dimension (and update ``global_input_dim`` accordingly).
|
|
57
|
+
2. Attach ``ParameterRandomizationCallback`` when fitting to resample background parameters each batch while
|
|
58
|
+
leaving signal parameters intact. Optionally set ``min_values``/``max_values`` (one value or a list per
|
|
59
|
+
parameter); otherwise the callback infers bounds from the training set. Or set ``pool_from_signal`` to sample
|
|
60
|
+
directly from signal pool (uniformly sampled from all discreted parameter combinations in the signal events).
|
|
61
|
+
3. Keep validation randomization on (default) to match training, or set ``apply_to_validation=False`` if you
|
|
62
|
+
prefer fixed parameters there. Evaluation is untouched.
|
|
63
|
+
|
|
64
|
+
Minimal usage sketch with ``run_evenet_lite_training``:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from evenet_lite import run_evenet_lite_training
|
|
68
|
+
from evenet_lite.callbacks import ParameterRandomizationCallback
|
|
69
|
+
|
|
70
|
+
# globals_with_params = torch.cat([globals, params], dim=1)
|
|
71
|
+
callbacks = [ParameterRandomizationCallback(min_values=[300, 500], max_values=[800, 1200])]
|
|
72
|
+
|
|
73
|
+
classifier = run_evenet_lite_training(
|
|
74
|
+
train_features={"x": X_train, "globals": globals_with_params, "mask": M_train, "params": params_train},
|
|
75
|
+
train_labels=y_train,
|
|
76
|
+
val_features={"x": X_val, "globals": globals_val_with_params, "mask": M_val, "params": params_val},
|
|
77
|
+
val_labels=y_val,
|
|
78
|
+
callbacks=callbacks,
|
|
79
|
+
global_input_dim=globals_with_params.shape[1],
|
|
80
|
+
)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### What the runner handles
|
|
84
|
+
|
|
85
|
+
- Detects distributed environments (`WORLD_SIZE`, `LOCAL_RANK`) and pins the appropriate CUDA device when available.
|
|
86
|
+
- Builds an `EvenetLiteClassifier` with optional pretrained weights and logging level.
|
|
87
|
+
- Injects normalization automatically unless a custom `NormalizationCallback` is supplied.
|
|
88
|
+
- Forwards checkpointing, early stopping, sampler, and evaluation options to the classifier.
|
|
89
|
+
- Returns the fitted classifier so you can immediately call `predict`, `evaluate`, or `save_checkpoint`.
|
|
90
|
+
|
|
91
|
+
## Custom workflow (manual steps)
|
|
92
|
+
|
|
93
|
+
Prefer to assemble the pieces yourself? You can directly instantiate the classifier, call `fit`, and run
|
|
94
|
+
evaluation/prediction without the runner.
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
import torch
|
|
98
|
+
from evenet_lite import EvenetLiteClassifier
|
|
99
|
+
|
|
100
|
+
# Build the classifier (uses default EveNetLite backbone if none is provided)
|
|
101
|
+
clf = EvenetLiteClassifier(
|
|
102
|
+
class_labels=["background", "signal"],
|
|
103
|
+
device="auto", # cpu, cuda, or auto-detect
|
|
104
|
+
lr=1e-3,
|
|
105
|
+
weight_decay=0.01,
|
|
106
|
+
grad_clip=1.0,
|
|
107
|
+
pretrained=True, # load HF weights by default
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Fit
|
|
111
|
+
clf.fit(
|
|
112
|
+
train_data=({"x": X_train, "globals": G_train, "mask": M_train}, y_train, w_train),
|
|
113
|
+
val_data=({"x": X_val, "globals": G_val, "mask": M_val}, y_val, w_val),
|
|
114
|
+
feature_names={"x": obj_feature_names, "globals": global_feature_names},
|
|
115
|
+
epochs=10,
|
|
116
|
+
batch_size=256,
|
|
117
|
+
sampler="weighted",
|
|
118
|
+
checkpoint_path="./checkpoints",
|
|
119
|
+
save_top_k=1,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Evaluate / predict
|
|
123
|
+
val_metrics = clf.evaluate({"x": X_val, "globals": G_val, "mask": M_val}, y_val, w_val)
|
|
124
|
+
probs = clf.predict({"x": X_test, "globals": G_test, "mask": M_test})
|
|
125
|
+
|
|
126
|
+
# Checkpointing
|
|
127
|
+
clf.save_checkpoint("./checkpoints/latest.pt")
|
|
128
|
+
# Later
|
|
129
|
+
restored = EvenetLiteClassifier(class_labels=["background", "signal"])
|
|
130
|
+
restored.load_checkpoint("./checkpoints/latest.pt",
|
|
131
|
+
feature_names={"x": obj_feature_names, "globals": global_feature_names})
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Data expectations
|
|
135
|
+
|
|
136
|
+
Input tensors follow an xgboost-like contract and are provided directly to the classifier or runner:
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
features = {
|
|
140
|
+
"x": torch.Tensor[N, M, F], # per-object features
|
|
141
|
+
"globals": torch.Tensor[N, G], # event-level features
|
|
142
|
+
"mask": torch.Tensor[N, M], # padding mask
|
|
143
|
+
}
|
|
144
|
+
labels = torch.Tensor[N] # class indices
|
|
145
|
+
weights = torch.Tensor[N] | None # optional per-example weights
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Feature names passed to `fit` (`feature_names={"x": [...], "globals": [...]}`) should align with the keys above so the
|
|
149
|
+
normalizer can match statistics to columns.
|
|
150
|
+
|
|
151
|
+
## Argument reference
|
|
152
|
+
|
|
153
|
+
The tables below summarize the most-used entrypoints and their arguments. Defaults match the inline values in code.
|
|
154
|
+
|
|
155
|
+
### `EvenetLiteClassifier` constructor
|
|
156
|
+
|
|
157
|
+
| Argument | Default | Description |
|
|
158
|
+
|-------------------------------------------------------------------------------------------|------------------------|------------------------------------------------------------------------------------------------|
|
|
159
|
+
| `class_labels` | **required** | Ordered class names wired into metrics and loss. |
|
|
160
|
+
| `device` | `"auto"` | Chooses CUDA when available; otherwise CPU. |
|
|
161
|
+
| `lr` | `[1e-3, 3e-4, 1e-4]` | Learning rates assigned per optimizer group. |
|
|
162
|
+
| `weight_decay` | `[0.01, 0.01, 0.01]` | Weight decay values aligned with the learning-rate groups. |
|
|
163
|
+
| `model` | `None` | Custom EveNet model; defaults to `EveNetLite` built from `config/default_network_config.yaml`. |
|
|
164
|
+
| `optimizer_fn` / `scheduler_fn` | `None` | Factories for custom optimizer or scheduler. |
|
|
165
|
+
| `grad_clip` | `None` | Max gradient norm when set. |
|
|
166
|
+
| `module_lists` | `[["Classification"], ["ObjectEncoder"], ["PET", "GlobalEmbedding"]]` | Modules assigned to each optimizer group (groups processed in order). |
|
|
167
|
+
| `warmup_epochs` / `warmup_ratio` / `warmup_start_factor` | `1` / `0.1` / `0.1` | Linear warmup configuration. |
|
|
168
|
+
| `min_lr` | `0.0` | Scheduler floor learning rate. |
|
|
169
|
+
| `global_input_dim` / `sequential_input_dim` | `10` / `7` | Input feature dimensions for the default backbone. |
|
|
170
|
+
| `use_wandb` / `wandb` | `False` / `None` | Enable Weights & Biases with optional init kwargs. |
|
|
171
|
+
| `log_level` | `logging.INFO` | Root logging level when constructing the classifier. |
|
|
172
|
+
| `pretrained` | `False` | When `True`, soft-loads weights (default HF repo/filename). |
|
|
173
|
+
| `pretrained_source` | `"hf"` | `"hf"` for Hugging Face hub or `"local"` for a provided path. |
|
|
174
|
+
| `pretrained_path` / `pretrained_repo_id` / `pretrained_filename` / `pretrained_cache_dir` | varies | Location details for pretrained checkpoints. |
|
|
175
|
+
| `num_workers` | 0 | Number of processes passing to pytorch `DataLoader` |
|
|
176
|
+
| `loss_gamma` | `0.0` | Focal-loss gamma (``0`` reduces to standard cross-entropy). |
|
|
177
|
+
|
|
178
|
+
### `EvenetLiteClassifier.fit`
|
|
179
|
+
|
|
180
|
+
| Argument | Default | Description |
|
|
181
|
+
|---------------------------------------------------------------------|--------------------------------|--------------------------------------------------------------------------|
|
|
182
|
+
| `train_data` | **required** | Tuple `(features, labels, weights)` for training. |
|
|
183
|
+
| `val_data` | `None` | Optional validation tuple with same structure as training. |
|
|
184
|
+
| `feature_names` | Defaults to classifier presets | Mapping of feature group to column names for normalization. |
|
|
185
|
+
| `normalization_rules` | Defaults to classifier presets | Per-feature normalization strategy (`log_normalize`, `normalize`, etc.). |
|
|
186
|
+
| `normalization_stats` | `None` | Optional precomputed means/stds per feature group; missing values default to mean 0/std 1. |
|
|
187
|
+
| `callbacks` | `None` | Additional callbacks (normalization is auto-inserted if absent). |
|
|
188
|
+
| `epochs` | `10` | Number of training epochs. |
|
|
189
|
+
| `batch_size` | `256` | Mini-batch size. |
|
|
190
|
+
| `sampler` | `None` | Sampler name (`"weighted"` enables distributed-safe weighted sampler). |
|
|
191
|
+
| `epoch_size` | `None` | Number of samples per epoch when using a sampler. |
|
|
192
|
+
| `checkpoint_path` / `resume_from` | `None` | Directory or filename for checkpoints and optional resume path. |
|
|
193
|
+
| `checkpoint_every` | `1` | Frequency (epochs) for periodic checkpoints when `save_top_k == 0`. |
|
|
194
|
+
| `save_top_k` | `0` | Keep best-k checkpoints ranked by `monitor_metric`. |
|
|
195
|
+
| `monitor_metric` / `minimize_metric` | `"val_loss"` / `True` | Metric and direction for checkpoint ranking. |
|
|
196
|
+
| `early_stop_metric` / `early_stop_minimize` / `early_stop_patience` | `"val_loss"` / `True` / `0` | Early stopping configuration (disabled when patience is 0). |
|
|
197
|
+
| `eval_data` | `None` | Optional test tuple evaluated after training. |
|
|
198
|
+
| `eval_output_path` | `None` | Path to save evaluation outputs when provided. |
|
|
199
|
+
| `eval_batch_size` | `None` | Batch size for evaluation (falls back to training batch size). |
|
|
200
|
+
| `sic_min_bkg_events` | `100` | Minimum background events for SIC metric calculation. |
|
|
201
|
+
| `debug` | `False` | Enables verbose `DebugCallback` logging and diagnostics. |
|
|
202
|
+
|
|
203
|
+
### `EvenetLiteClassifier.predict` / `evaluate`
|
|
204
|
+
|
|
205
|
+
- `predict(features, batch_size=256)`: returns class probabilities using the stored normalizer; requires that `fit` or
|
|
206
|
+
`load_checkpoint` has been called.
|
|
207
|
+
- `evaluate(features, labels, weights=None, batch_size=256)`: computes loss/accuracy (and physics metrics when
|
|
208
|
+
available) on the provided dataset.
|
|
209
|
+
|
|
210
|
+
### `run_evenet_lite_training`
|
|
211
|
+
|
|
212
|
+
| Argument | Default | Description |
|
|
213
|
+
|---------------------------------------------------------------------|--------------------------------------|--------------------------------------------------------------------|
|
|
214
|
+
| `train_features` / `train_labels` / `train_weights` | **required** / **required** / `None` | Training tensors and optional weights. |
|
|
215
|
+
| `class_labels` | **required** | Ordered class names passed to the classifier. |
|
|
216
|
+
| `val_features` / `val_labels` / `val_weights` | `None` | Optional validation tensors and weights. |
|
|
217
|
+
| `feature_names` | `None` | Feature column names forwarded to the classifier. |
|
|
218
|
+
| `normalization_rules` | `None` | Per-feature normalization overrides. |
|
|
219
|
+
| `normalization_stats` | `None` | Optional precomputed means/stds per feature group; missing values default to mean 0/std 1. |
|
|
220
|
+
| `callbacks` | `None` | Extra callbacks (normalization auto-added if missing). |
|
|
221
|
+
| `sampler` / `epoch_size` | `None` | Sampling strategy and epoch size when sampling. |
|
|
222
|
+
| `epochs` / `batch_size` | `10` / `256` | Training loop configuration. |
|
|
223
|
+
| `checkpoint_path` / `resume_from` | `None` | Checkpoint directory/base filename and optional resume path. |
|
|
224
|
+
| `checkpoint_every` | `1` | Epoch frequency for periodic checkpoints when not using top-k. |
|
|
225
|
+
| `save_top_k` | `0` | Number of best checkpoints to retain. |
|
|
226
|
+
| `monitor_metric` / `minimize_metric` | `"val_loss"` / `True` | Metric and direction for best-checkpoint tracking. |
|
|
227
|
+
| `early_stop_metric` / `early_stop_minimize` / `early_stop_patience` | `"val_loss"` / `True` / `0` | Early stopping configuration. |
|
|
228
|
+
| `eval_features` / `eval_labels` / `eval_weights` | `None` | Optional evaluation payload run after training. |
|
|
229
|
+
| `eval_output_path` | `None` | File path to persist evaluation results. |
|
|
230
|
+
| `eval_batch_size` | `None` | Batch size for evaluation (defaults to training batch size). |
|
|
231
|
+
| `sic_min_bkg_events` | `100` | Minimum background events for SIC metric computation. |
|
|
232
|
+
| `debug` | `False` | Enables verbose debugging callback and sampler diagnostics. |
|
|
233
|
+
| `loss_gamma` | `0.0` | Focal-loss gamma (``0`` reduces to standard cross-entropy). |
|
|
234
|
+
| `log_level` | `logging.INFO` | Logging level set before runner diagnostics. |
|
|
235
|
+
| `**classifier_kwargs` | — | Additional arguments forwarded directly to `EvenetLiteClassifier`. |
|
|
236
|
+
|
|
237
|
+
## Distributed training
|
|
238
|
+
|
|
239
|
+
The trainer boots into DDP automatically when `WORLD_SIZE > 1` (e.g., via
|
|
240
|
+
`torchrun --nproc_per_node <num_gpus> script.py`). Rank 0 handles logging and checkpointing; sampler/loader seeds are
|
|
241
|
+
synchronized per epoch. Without distributed environment variables, execution falls back to single process on GPU or CPU
|
|
242
|
+
depending on availability.
|
|
243
|
+
|
|
244
|
+
## Normalization & callbacks
|
|
245
|
+
|
|
246
|
+
- A `NormalizationCallback` is injected automatically during `fit` when one is not provided. You can supply custom
|
|
247
|
+
normalization rules or replace the callback entirely.
|
|
248
|
+
- Pass `normalization_stats` to `EvenetLiteClassifier.fit` or `run_evenet_lite_training` to reuse precomputed
|
|
249
|
+
mean/std pairs without refitting. Provide a mapping per feature group, e.g.:
|
|
250
|
+
|
|
251
|
+
```python
|
|
252
|
+
normalization_stats = {
|
|
253
|
+
"x": {"mean": [0.1, -0.2, 0.0], "std": [1.0, 0.9, 1.1]},
|
|
254
|
+
"globals": {"mean": [0.0, 0.0], "std": [1.0, 1.0]},
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
classifier.fit(
|
|
258
|
+
train_data=(train_features, y_train, w_train),
|
|
259
|
+
val_data=(val_features, y_val, w_val),
|
|
260
|
+
feature_names=feature_names,
|
|
261
|
+
normalization_stats=normalization_stats,
|
|
262
|
+
)
|
|
263
|
+
```
|
|
264
|
+
Any missing groups or columns default to mean `0` and std `1`, and the applied plan is logged as a table on rank 0.
|
|
265
|
+
- Implement custom callbacks by subclassing `Callback` and overriding hooks such as `on_train_start`, `on_epoch_end`, or
|
|
266
|
+
`on_train_end`, then pass instances via the `callbacks` argument of `fit` or the runner.
|
|
267
|
+
|
|
268
|
+
## Checkpointing and pretrained weights
|
|
269
|
+
|
|
270
|
+
- Call `save_checkpoint(path)` on a fitted classifier to persist model, optimizer/scheduler states, and the learned
|
|
271
|
+
normalizer. Use `load_checkpoint(path, feature_names=...)` to restore weights for further training or inference.
|
|
272
|
+
- Enable `pretrained=True` (with optional `pretrained_source`, `pretrained_path`, or Hugging Face repo/filename
|
|
273
|
+
overrides) to soft-load compatible parameters while leaving shape-mismatched layers initialized.
|
|
274
|
+
|
|
275
|
+
## Module guide
|
|
276
|
+
|
|
277
|
+
- `evenet_lite.classifier.EvenetLiteClassifier`: high-level `fit/predict/evaluate` API and pretrained loader.
|
|
278
|
+
- `evenet_lite.runner.run_evenet_lite_training`: convenience pipeline that wires up DDP detection and training.
|
|
279
|
+
- `evenet_lite.trainer.Trainer`: core training loop with DDP, callbacks, metrics, early stopping, and checkpointing.
|
|
280
|
+
- `evenet_lite.data`: dataset wrapper and distributed weighted sampler utilities.
|
|
281
|
+
- `evenet_lite.callbacks`: callback base class, default normalizer, and debug helpers.
|
|
282
|
+
- `evenet_lite.metrics`: accuracy, loss, and physics-driven metrics helpers.
|
|
283
|
+
- `evenet_lite.checkpoint`: rank-safe checkpoint save/load helpers.
|
|
284
|
+
- `evenet_lite.model`: EveNet backbone assembly used by the default classifier.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from .classifier import EvenetLiteClassifier
|
|
2
|
+
from .callbacks import Callback, DebugCallback, EvenetLiteNormalizer, NormalizationCallback
|
|
3
|
+
from .checkpoint import load_checkpoint, save_checkpoint
|
|
4
|
+
from .data import DistributedWeightedSampler, EvenetTensorDataset
|
|
5
|
+
from .hf_utils import load_pretrained_weights
|
|
6
|
+
from .model import EveNetLite
|
|
7
|
+
from .runner import run_evenet_lite_training
|
|
8
|
+
from .trainer import Trainer, TrainerConfig
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"EvenetLiteClassifier",
|
|
12
|
+
"EvenetLiteNormalizer",
|
|
13
|
+
"NormalizationCallback",
|
|
14
|
+
"DebugCallback",
|
|
15
|
+
"Callback",
|
|
16
|
+
"EveNetLite",
|
|
17
|
+
"Trainer",
|
|
18
|
+
"TrainerConfig",
|
|
19
|
+
"EvenetTensorDataset",
|
|
20
|
+
"DistributedWeightedSampler",
|
|
21
|
+
"run_evenet_lite_training",
|
|
22
|
+
"save_checkpoint",
|
|
23
|
+
"load_checkpoint",
|
|
24
|
+
"load_pretrained_weights",
|
|
25
|
+
]
|