fitstream 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fitstream/__init__.py CHANGED
@@ -0,0 +1,13 @@
1
+ from .batching import iter_batches as iter_batches
2
+ from .events import Event as Event
3
+ from .fit import (
4
+ augment as augment,
5
+ early_stop as early_stop,
6
+ epoch_stream as epoch_stream,
7
+ pipe as pipe,
8
+ take as take,
9
+ tap as tap,
10
+ tick as tick,
11
+ )
12
+ from .sinks import collect as collect, collect_jsonl as collect_jsonl, collect_pd as collect_pd
13
+ from .augmentations import validation_loss as validation_loss
@@ -0,0 +1,57 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Sequence
4
+ from typing import Any
5
+
6
+ import torch
7
+ from torch import nn
8
+
9
+
10
+ def validation_loss(
11
+ val_data: Sequence[torch.Tensor],
12
+ loss_fn: Callable[..., torch.Tensor],
13
+ *,
14
+ key: str = "val_loss",
15
+ last_label: bool = True,
16
+ ) -> Callable[[dict[str, Any]], dict[str, float]]:
17
+ """Create an augmentation that computes validation loss.
18
+
19
+ Args:
20
+ val_data: Tuple of tensors with batch dimension first. When ``last_label=True``,
21
+ the last tensor is treated as the label tensor and all preceding tensors are
22
+ passed to the model.
23
+ loss_fn: Loss function called as ``loss_fn(pred, labels)`` when ``last_label=True``,
24
+ otherwise as ``loss_fn(pred)``.
25
+ key: Name of the key to store the computed loss under.
26
+ last_label: Whether the last tensor in ``val_data`` is the label tensor.
27
+
28
+ Notes:
29
+ Assumes the model and validation tensors are already on the same device.
30
+ Validation is computed on the full validation set (no batching).
31
+ """
32
+ if not val_data:
33
+ raise ValueError("val_data must contain at least one tensor.")
34
+ if last_label and len(val_data) < 2:
35
+ raise ValueError("last_label=True requires at least two tensors (inputs and labels).")
36
+
37
+ if last_label:
38
+ *inputs, labels = val_data
39
+ else:
40
+ inputs = list(val_data)
41
+ labels = None
42
+
43
+ def compute(event: dict[str, Any]) -> dict[str, float]:
44
+ model = event["model"]
45
+ if not isinstance(model, nn.Module):
46
+ raise TypeError("validation_loss expects an event containing a 'model' key.")
47
+
48
+ was_training = model.training
49
+ model.eval()
50
+ with torch.no_grad():
51
+ preds = model(*inputs)
52
+ loss = loss_fn(preds, labels) if last_label else loss_fn(preds)
53
+ if was_training:
54
+ model.train()
55
+ return {key: loss.detach().cpu().item()}
56
+
57
+ return compute
fitstream/batching.py ADDED
@@ -0,0 +1,37 @@
1
+ from typing import Iterable, Sequence
2
+
3
+ import torch
4
+
5
+
6
+ def iter_batches(
7
+ *tensors, batch_size: int = 1, shuffle: bool = True, generator: torch.Generator | None = None
8
+ ) -> Iterable[Sequence[torch.Tensor]]:
9
+ """Yields batches from tensors, optionally shuffled.
10
+
11
+ Args:
12
+ *tensors: One or more tensors that share the same first dimension (sample axis). Each
13
+ yielded batch contains slices from each tensor aligned on that axis.
14
+ batch_size: Number of samples per batch. The final batch may be smaller if the sample
15
+ count is not divisible by the batch size.
16
+ shuffle: Whether to shuffle samples before batching. Shuffling uses the device of the
17
+ first tensor.
18
+ generator: Optional torch.Generator for deterministic shuffling.
19
+
20
+ Yields:
21
+ Tuples of tensors, one per input tensor, representing a batch.
22
+
23
+ Notes:
24
+ This function assumes all tensors have the same number of samples along dimension 0
25
+ and live on the same device. It does not perform explicit validation.
26
+ """
27
+ if not tensors:
28
+ return
29
+ if not shuffle:
30
+ tensor_batches = [tensor.split(batch_size) for tensor in tensors]
31
+ yield from zip(*tensor_batches)
32
+ else:
33
+ device = tensors[0].device
34
+ n_samples = tensors[0].shape[0]
35
+ idx = torch.randperm(n_samples, device=device, generator=generator)
36
+ for idx_chunk in idx.split(batch_size):
37
+ yield tuple(x[idx_chunk] for x in tensors)
fitstream/events.py ADDED
@@ -0,0 +1,19 @@
1
+ from typing import TypedDict
2
+
3
+ from torch import nn
4
+
5
+
6
+ class Event(TypedDict):
7
+ """Per-epoch event emitted by fit/stream utilities.
8
+
9
+ Keys:
10
+ model: Live model reference updated each epoch.
11
+ step: 1-based epoch index.
12
+ train_loss: Mean training loss for the epoch.
13
+ train_time_sec: Wall-clock seconds spent in the epoch.
14
+ """
15
+
16
+ model: nn.Module
17
+ step: int
18
+ train_loss: float
19
+ train_time_sec: float
fitstream/fit.py ADDED
@@ -0,0 +1,208 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Iterable, Iterator, Sequence
4
+ from typing import Any
5
+ import time
6
+
7
+ import torch
8
+ from torch import nn
9
+
10
+ from .batching import iter_batches
11
+ from .events import Event
12
+
13
+ Transform = Callable[[Iterable[dict[str, Any]]], Iterable[dict[str, Any]]]
14
+
15
+
16
+ def augment(
17
+ fn: Callable[[dict[str, Any]], dict[str, Any] | None],
18
+ ) -> Transform:
19
+ """Create a transform that merges extra keys into each event.
20
+
21
+ Args:
22
+ fn: Function called for each event. The returned mapping is shallow-merged
23
+ into the event. Returning ``None`` adds nothing.
24
+
25
+ Returns:
26
+ A transform that accepts an event stream and yields augmented events.
27
+ """
28
+
29
+ def transform(events: Iterable[dict[str, Any]]) -> Iterable[dict[str, Any]]:
30
+ for event in events:
31
+ extra = fn(event) or {}
32
+ if not isinstance(extra, dict):
33
+ raise TypeError("augment function must return a dict or None.")
34
+ yield event | extra
35
+
36
+ return transform
37
+
38
+
39
+ def pipe(stream: Iterable[dict[str, Any]], *stages: Transform) -> Iterable[dict[str, Any]]:
40
+ """Compose stream transforms left-to-right.
41
+
42
+ Args:
43
+ stream: Input event stream.
44
+ stages: Transform functions applied in order.
45
+
46
+ Returns:
47
+ The transformed event stream.
48
+ """
49
+ for stage in stages:
50
+ if not callable(stage):
51
+ raise TypeError("pipe stages must be callable.")
52
+ stream = stage(stream)
53
+ return stream
54
+
55
+
56
+ def take(n: int) -> Transform:
57
+ """Limit an event stream to the first ``n`` events.
58
+
59
+ Can be used directly on a stream or as a pipe stage:
60
+
61
+ - ``take(10)(events)``
62
+ - ``pipe(events, take(10))``
63
+ """
64
+ if n < 0:
65
+ raise ValueError("n must be >= 0.")
66
+
67
+ def stage(events: Iterable[dict[str, Any]]) -> Iterable[dict[str, Any]]:
68
+ count = 0
69
+ for event in events:
70
+ if count >= n:
71
+ break
72
+ yield event
73
+ count += 1
74
+
75
+ return stage
76
+
77
+
78
+ def tap(
79
+ fn: Callable[[dict[str, Any]], Any],
80
+ ) -> Transform:
81
+ """Create a stage that performs side effects and yields events unchanged."""
82
+ if not callable(fn):
83
+ raise TypeError("tap requires a callable.")
84
+
85
+ def stage(events: Iterable[dict[str, Any]]) -> Iterable[dict[str, Any]]:
86
+ for event in events:
87
+ fn(event)
88
+ yield event
89
+
90
+ return stage
91
+
92
+
93
+ def tick(
94
+ fn: Callable[[], Any],
95
+ ) -> Transform:
96
+ """Create a stage that runs a no-arg callback per event and yields events unchanged."""
97
+ if not callable(fn):
98
+ raise TypeError("tick requires a callable.")
99
+
100
+ def stage(events: Iterable[dict[str, Any]]) -> Iterable[dict[str, Any]]:
101
+ for event in events:
102
+ fn()
103
+ yield event
104
+
105
+ return stage
106
+
107
+
108
+ def early_stop(
109
+ key: str,
110
+ patience: int,
111
+ ) -> Transform:
112
+ """Yield events until the metric stops improving for `patience` steps.
113
+
114
+ Use as a pipe stage:
115
+
116
+ - ``pipe(events, early_stop(key="val_loss", patience=10))``
117
+ """
118
+ if patience < 1:
119
+ raise ValueError("patience must be >= 1.")
120
+
121
+ def apply(stream: Iterable[dict[str, Any]]) -> Iterable[dict[str, Any]]:
122
+ best = float("inf")
123
+ bad = 0
124
+ for event in stream:
125
+ value = float(event[key])
126
+ if value < best:
127
+ best = value
128
+ bad = 0
129
+ else:
130
+ bad += 1
131
+ yield event
132
+ if bad >= patience:
133
+ break
134
+
135
+ return apply
136
+
137
+
138
+ def epoch_stream(
139
+ train_data: Sequence[torch.Tensor],
140
+ model: nn.Module,
141
+ optimizer: torch.optim.Optimizer,
142
+ loss_fn: Callable[..., torch.Tensor],
143
+ *,
144
+ batch_size: int = 1,
145
+ shuffle: bool = True,
146
+ last_label: bool = True,
147
+ generator: torch.Generator | None = None,
148
+ ) -> Iterator[Event]:
149
+ """Yield per-epoch training events from in-memory tensors.
150
+
151
+ Args:
152
+ train_data: Tuple of tensors with batch dimension first. When ``last_label=True``,
153
+ the last tensor is treated as the label tensor and all preceding tensors are
154
+ passed to the model.
155
+ model: PyTorch model to train.
156
+ optimizer: Optimizer instance constructed with the model parameters.
157
+ loss_fn: Loss function. Called as ``loss_fn(pred, labels)`` when ``last_label=True``,
158
+ otherwise as ``loss_fn(pred)``.
159
+ batch_size: Number of samples per batch.
160
+ shuffle: Whether to shuffle samples before batching.
161
+ last_label: Whether the last tensor in ``train_data`` is the label tensor.
162
+ generator: Optional torch.Generator forwarded to ``iter_batches`` for reproducible
163
+ shuffling.
164
+
165
+ Notes:
166
+ This function assumes the model and all tensors are already on the same device.
167
+ It does not copy tensors or take snapshots of model weights.
168
+ """
169
+ if batch_size <= 0:
170
+ raise ValueError("batch_size must be a positive integer.")
171
+ if not train_data:
172
+ raise ValueError("train_data must contain at least one tensor.")
173
+ if last_label and len(train_data) < 2:
174
+ raise ValueError("last_label=True requires at least two tensors (inputs and labels).")
175
+
176
+ step = 0
177
+ while True:
178
+ model.train()
179
+ epoch_start = time.perf_counter()
180
+ total_loss = torch.zeros((), device=train_data[0].device)
181
+ total_samples = 0
182
+
183
+ for batch in iter_batches(*train_data, batch_size=batch_size, shuffle=shuffle, generator=generator):
184
+ if last_label:
185
+ *inputs, labels = batch
186
+ else:
187
+ inputs = list(batch)
188
+ labels = None
189
+
190
+ preds = model(*inputs)
191
+ loss = loss_fn(preds, labels) if last_label else loss_fn(preds)
192
+
193
+ optimizer.zero_grad()
194
+ loss.backward()
195
+ optimizer.step()
196
+
197
+ batch_samples = inputs[0].shape[0]
198
+ total_loss += loss.detach() * batch_samples
199
+ total_samples += int(batch_samples)
200
+
201
+ step += 1
202
+ epoch_loss = (total_loss / total_samples).item()
203
+ yield Event(
204
+ model=model,
205
+ step=step,
206
+ train_loss=epoch_loss,
207
+ train_time_sec=time.perf_counter() - epoch_start,
208
+ )
fitstream/sinks.py ADDED
@@ -0,0 +1,66 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable, Sequence
4
+ from pathlib import Path
5
+ from typing import Any, TextIO
6
+ import json
7
+
8
+
9
+ def _filter_event(
10
+ event: dict[str, Any],
11
+ *,
12
+ include: Sequence[str] | None,
13
+ exclude: Sequence[str] | None,
14
+ ) -> dict[str, Any]:
15
+ if include is not None and exclude is not None:
16
+ raise ValueError("Provide only one of include or exclude.")
17
+ if include is None:
18
+ excluded = {"model"}
19
+ if exclude is not None:
20
+ excluded.update(exclude)
21
+ return {k: v for k, v in event.items() if k not in excluded}
22
+ included = set(include)
23
+ included.discard("model")
24
+ return {k: event[k] for k in included if k in event}
25
+
26
+
27
+ def collect(
28
+ events: Iterable[dict[str, Any]],
29
+ *,
30
+ include: Sequence[str] | None = None,
31
+ exclude: Sequence[str] | None = None,
32
+ ) -> list[dict[str, Any]]:
33
+ """Collect an event stream into a list of dicts."""
34
+ return [_filter_event(event, include=include, exclude=exclude) for event in events]
35
+
36
+
37
+ def collect_jsonl(
38
+ events: Iterable[dict[str, Any]],
39
+ dest: str | Path | TextIO,
40
+ *,
41
+ include: Sequence[str] | None = None,
42
+ exclude: Sequence[str] | None = None,
43
+ ) -> None:
44
+ """Write events to JSONL (one JSON object per line)."""
45
+ if isinstance(dest, (str, Path)):
46
+ with Path(dest).open("w") as handle:
47
+ collect_jsonl(events, handle, include=include, exclude=exclude)
48
+ return
49
+ for event in events:
50
+ record = _filter_event(event, include=include, exclude=exclude)
51
+ dest.write(json.dumps(record) + "\n")
52
+
53
+
54
+ def collect_pd(
55
+ events: Iterable[dict[str, Any]],
56
+ *,
57
+ include: Sequence[str] | None = None,
58
+ exclude: Sequence[str] | None = None,
59
+ ):
60
+ """Collect events into a pandas DataFrame."""
61
+ try:
62
+ import pandas as pd
63
+ except Exception as exc: # pragma: no cover - depends on optional dependency
64
+ raise ImportError("pandas is required for collect_pd.") from exc
65
+ rows = [_filter_event(event, include=include, exclude=exclude) for event in events]
66
+ return pd.DataFrame(rows)
@@ -0,0 +1,388 @@
1
+ Metadata-Version: 2.3
2
+ Name: fitstream
3
+ Version: 0.1.1
4
+ Summary: A library to train PyTorch models as a stream of events
5
+ Keywords: machine-learning,deep-learning,neural-networks,pytorch,torch,training,metrics,pipeline,dataloader
6
+ Author: Alex Shtoff
7
+ Author-email: Alex Shtoff <alex.shtf@gmail.com>
8
+ License: Apache License
9
+ Version 2.0, January 2004
10
+ http://www.apache.org/licenses/
11
+
12
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
13
+
14
+ 1. Definitions.
15
+
16
+ "License" shall mean the terms and conditions for use, reproduction,
17
+ and distribution as defined by Sections 1 through 9 of this document.
18
+
19
+ "Licensor" shall mean the copyright owner or entity authorized by
20
+ the copyright owner that is granting the License.
21
+
22
+ "Legal Entity" shall mean the union of the acting entity and all
23
+ other entities that control, are controlled by, or are under common
24
+ control with that entity. For the purposes of this definition,
25
+ "control" means (i) the power, direct or indirect, to cause the
26
+ direction or management of such entity, whether by contract or
27
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
28
+ outstanding shares, or (iii) beneficial ownership of such entity.
29
+
30
+ "You" (or "Your") shall mean an individual or Legal Entity
31
+ exercising permissions granted by this License.
32
+
33
+ "Source" form shall mean the preferred form for making modifications,
34
+ including but not limited to software source code, documentation
35
+ source, and configuration files.
36
+
37
+ "Object" form shall mean any form resulting from mechanical
38
+ transformation or translation of a Source form, including but
39
+ not limited to compiled object code, generated documentation,
40
+ and conversions to other media types.
41
+
42
+ "Work" shall mean the work of authorship, whether in Source or
43
+ Object form, made available under the License, as indicated by a
44
+ copyright notice that is included in or attached to the work
45
+ (an example is provided in the Appendix below).
46
+
47
+ "Derivative Works" shall mean any work, whether in Source or Object
48
+ form, that is based on (or derived from) the Work and for which the
49
+ editorial revisions, annotations, elaborations, or other modifications
50
+ represent, as a whole, an original work of authorship. For the purposes
51
+ of this License, Derivative Works shall not include works that remain
52
+ separable from, or merely link (or bind by name) to the interfaces of,
53
+ the Work and Derivative Works thereof.
54
+
55
+ "Contribution" shall mean any work of authorship, including
56
+ the original version of the Work and any modifications or additions
57
+ to that Work or Derivative Works thereof, that is intentionally
58
+ submitted to Licensor for inclusion in the Work by the copyright owner
59
+ or by an individual or Legal Entity authorized to submit on behalf of
60
+ the copyright owner. For the purposes of this definition, "submitted"
61
+ means any form of electronic, verbal, or written communication sent
62
+ to the Licensor or its representatives, including but not limited to
63
+ communication on electronic mailing lists, source code control systems,
64
+ and issue tracking systems that are managed by, or on behalf of, the
65
+ Licensor for the purpose of discussing and improving the Work, but
66
+ excluding communication that is conspicuously marked or otherwise
67
+ designated in writing by the copyright owner as "Not a Contribution."
68
+
69
+ "Contributor" shall mean Licensor and any individual or Legal Entity
70
+ on behalf of whom a Contribution has been received by Licensor and
71
+ subsequently incorporated within the Work.
72
+
73
+ 2. Grant of Copyright License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ copyright license to reproduce, prepare Derivative Works of,
77
+ publicly display, publicly perform, sublicense, and distribute the
78
+ Work and such Derivative Works in Source or Object form.
79
+
80
+ 3. Grant of Patent License. Subject to the terms and conditions of
81
+ this License, each Contributor hereby grants to You a perpetual,
82
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
83
+ (except as stated in this section) patent license to make, have made,
84
+ use, offer to sell, sell, import, and otherwise transfer the Work,
85
+ where such license applies only to those patent claims licensable
86
+ by such Contributor that are necessarily infringed by their
87
+ Contribution(s) alone or by combination of their Contribution(s)
88
+ with the Work to which such Contribution(s) was submitted. If You
89
+ institute patent litigation against any entity (including a
90
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
91
+ or a Contribution incorporated within the Work constitutes direct
92
+ or contributory patent infringement, then any patent licenses
93
+ granted to You under this License for that Work shall terminate
94
+ as of the date such litigation is filed.
95
+
96
+ 4. Redistribution. You may reproduce and distribute copies of the
97
+ Work or Derivative Works thereof in any medium, with or without
98
+ modifications, and in Source or Object form, provided that You
99
+ meet the following conditions:
100
+
101
+ (a) You must give any other recipients of the Work or
102
+ Derivative Works a copy of this License; and
103
+
104
+ (b) You must cause any modified files to carry prominent notices
105
+ stating that You changed the files; and
106
+
107
+ (c) You must retain, in the Source form of any Derivative Works
108
+ that You distribute, all copyright, patent, trademark, and
109
+ attribution notices from the Source form of the Work,
110
+ excluding those notices that do not pertain to any part of
111
+ the Derivative Works; and
112
+
113
+ (d) If the Work includes a "NOTICE" text file as part of its
114
+ distribution, then any Derivative Works that You distribute must
115
+ include a readable copy of the attribution notices contained
116
+ within such NOTICE file, excluding those notices that do not
117
+ pertain to any part of the Derivative Works, in at least one
118
+ of the following places: within a NOTICE text file distributed
119
+ as part of the Derivative Works; within the Source form or
120
+ documentation, if provided along with the Derivative Works; or,
121
+ within a display generated by the Derivative Works, if and
122
+ wherever such third-party notices normally appear. The contents
123
+ of the NOTICE file are for informational purposes only and
124
+ do not modify the License. You may add Your own attribution
125
+ notices within Derivative Works that You distribute, alongside
126
+ or as an addendum to the NOTICE text from the Work, provided
127
+ that such additional attribution notices cannot be construed
128
+ as modifying the License.
129
+
130
+ You may add Your own copyright statement to Your modifications and
131
+ may provide additional or different license terms and conditions
132
+ for use, reproduction, or distribution of Your modifications, or
133
+ for any such Derivative Works as a whole, provided Your use,
134
+ reproduction, and distribution of the Work otherwise complies with
135
+ the conditions stated in this License.
136
+
137
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
138
+ any Contribution intentionally submitted for inclusion in the Work
139
+ by You to the Licensor shall be under the terms and conditions of
140
+ this License, without any additional terms or conditions.
141
+ Notwithstanding the above, nothing herein shall supersede or modify
142
+ the terms of any separate license agreement you may have executed
143
+ with Licensor regarding such Contributions.
144
+
145
+ 6. Trademarks. This License does not grant permission to use the trade
146
+ names, trademarks, service marks, or product names of the Licensor,
147
+ except as required for reasonable and customary use in describing the
148
+ origin of the Work and reproducing the content of the NOTICE file.
149
+
150
+ 7. Disclaimer of Warranty. Unless required by applicable law or
151
+ agreed to in writing, Licensor provides the Work (and each
152
+ Contributor provides its Contributions) on an "AS IS" BASIS,
153
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
154
+ implied, including, without limitation, any warranties or conditions
155
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
156
+ PARTICULAR PURPOSE. You are solely responsible for determining the
157
+ appropriateness of using or redistributing the Work and assume any
158
+ risks associated with Your exercise of permissions under this License.
159
+
160
+ 8. Limitation of Liability. In no event and under no legal theory,
161
+ whether in tort (including negligence), contract, or otherwise,
162
+ unless required by applicable law (such as deliberate and grossly
163
+ negligent acts) or agreed to in writing, shall any Contributor be
164
+ liable to You for damages, including any direct, indirect, special,
165
+ incidental, or consequential damages of any character arising as a
166
+ result of this License or out of the use or inability to use the
167
+ Work (including but not limited to damages for loss of goodwill,
168
+ work stoppage, computer failure or malfunction, or any and all
169
+ other commercial damages or losses), even if such Contributor
170
+ has been advised of the possibility of such damages.
171
+
172
+ 9. Accepting Warranty or Additional Liability. While redistributing
173
+ the Work or Derivative Works thereof, You may choose to offer,
174
+ and charge a fee for, acceptance of support, warranty, indemnity,
175
+ or other liability obligations and/or rights consistent with this
176
+ License. However, in accepting such obligations, You may act only
177
+ on Your own behalf and on Your sole responsibility, not on behalf
178
+ of any other Contributor, and only if You agree to indemnify,
179
+ defend, and hold each Contributor harmless for any liability
180
+ incurred by, or claims asserted against, such Contributor by reason
181
+ of your accepting any such warranty or additional liability.
182
+
183
+ END OF TERMS AND CONDITIONS
184
+
185
+ APPENDIX: How to apply the Apache License to your work.
186
+
187
+ To apply the Apache License to your work, attach the following
188
+ boilerplate notice, with the fields enclosed by brackets "[]"
189
+ replaced with your own identifying information. (Don't include
190
+ the brackets!) The text should be enclosed in the appropriate
191
+ comment syntax for the file format. We also recommend that a
192
+ file or class name and description of purpose be included on the
193
+ same "printed page" as the copyright notice for easier
194
+ identification within third-party archives.
195
+
196
+ Copyright [yyyy] [name of copyright owner]
197
+
198
+ Licensed under the Apache License, Version 2.0 (the "License");
199
+ you may not use this file except in compliance with the License.
200
+ You may obtain a copy of the License at
201
+
202
+ http://www.apache.org/licenses/LICENSE-2.0
203
+
204
+ Unless required by applicable law or agreed to in writing, software
205
+ distributed under the License is distributed on an "AS IS" BASIS,
206
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
207
+ See the License for the specific language governing permissions and
208
+ limitations under the License.
209
+ Classifier: Development Status :: 3 - Alpha
210
+ Classifier: Intended Audience :: Developers
211
+ Classifier: Intended Audience :: Science/Research
212
+ Classifier: License :: OSI Approved :: Apache Software License
213
+ Classifier: Programming Language :: Python :: 3
214
+ Classifier: Programming Language :: Python :: 3.12
215
+ Classifier: Programming Language :: Python :: 3.13
216
+ Classifier: Programming Language :: Python :: 3.14
217
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
218
+ Classifier: Topic :: Software Development :: Libraries
219
+ Requires-Dist: torch>=2.6.0
220
+ Requires-Python: >=3.12
221
+ Project-URL: Documentation, https://fitstream.readthedocs.io/
222
+ Project-URL: Homepage, https://github.com/alexshtf/fitstream
223
+ Project-URL: Repository, https://github.com/alexshtf/fitstream
224
+ Project-URL: Issues, https://github.com/alexshtf/fitstream/issues
225
+ Description-Content-Type: text/markdown
226
+
227
+ # FitStream
228
+ A tiny library to make PyTorch experiment easy for small models and in-memory datasets.
229
+
230
+ # Getting started
231
+ Using `uv`
232
+ ```shell
233
+ uv add fitstream
234
+ ```
235
+
236
+ Using `pip`:
237
+ ```shell
238
+ pip install fitstream
239
+ ```
240
+
241
+ Training a model:
242
+ ```python
243
+ from torch.optim import Adam
244
+
245
+ from fitstream import epoch_stream, take # epoch_stream is the main entry point
246
+
247
+ X, y = get_data()
248
+ model = get_model()
249
+ loss = get_loss()
250
+ optimizer = Adam(model.parameters())
251
+
252
+ # an infinite stream of training epochs (limit it with `take` or `early_stop`)
253
+ events = epoch_stream((X, y), model, optimizer, loss, batch_size=32, shuffle=True)
254
+ for event in take(10)(events):
255
+ print(f"step={event['step']}, loss={event['train_loss']}")
256
+ # epoch=1, loss=...
257
+ # epoch=2, loss=...
258
+ # ...
259
+ ```
260
+
261
+ # Basics
262
+ The core idea of the library is "training loop as a stream of events". The `epoch_stream` is just an iterable over
263
+ dictionaries comprising of the epoch, the model, and the training loss. Everything we do is transforming or enriching
264
+ these events. FitStream provides a small `pipe(...)` helper to compose transformations left-to-right.
265
+
266
+ ## Augmentation
267
+ The `augment` function turns an "augmenter" (a function that looks at an event and returns extra keys) into a stream
268
+ transform stage. We typically compose stages with `pipe(...)`.
269
+
270
+ Here is an example - we add the norm of the model parameters to each event:
271
+ ```python
272
+ from torch import nn, linalg
273
+ from fitstream import epoch_stream, augment, pipe
274
+
275
+ def model_param_norm(ev: dict) -> dict:
276
+ model_params = nn.utils.parameters_to_vector(ev['model'].parameters())
277
+ return {'model_param_norm': linalg.norm(model_params)}
278
+
279
+
280
+ events = pipe(
281
+ epoch_stream(...),
282
+ augment(model_param_norm),
283
+ )
284
+ for event in events:
285
+ print(f"step={event['step']}",
286
+ f"model_param_norm={event['model_param_norm']}"
287
+ )
288
+ ```
289
+
290
+ We also have some built-in augmentation functions. Here is an example of adding validation loss to each event:
291
+ ```python
292
+ from torch import nn
293
+ from fitstream import epoch_stream, augment, pipe, validation_loss
294
+
295
+ validation_set = get_validation_set()
296
+ events = pipe(
297
+ epoch_stream(...),
298
+ augment(validation_loss(validation_set, nn.CrossEntropyLoss())),
299
+ )
300
+ for event in events:
301
+ print(f"step={event['step']}, val_loss={event['val_loss']}")
302
+ ```
303
+
304
+ We can, of course, augment the stream more than once:
305
+ ```python
306
+ events = pipe(
307
+ epoch_stream(...),
308
+ augment(validation_loss(...)),
309
+ augment(model_param_norm),
310
+ )
311
+ for event in events:
312
+ print(f"step={event['step']}",
313
+ f"val_loss={event['val_loss']}",
314
+ f"model_param_norm={event['model_param_norm']}"
315
+ )
316
+ ```
317
+
318
+ ## Selecting events
319
+ Since the training loop is a standard Python iterable, you can use any Python selection logic. FitStream includes a
320
+ small helper, `take(...)`, to limit the number of epochs:
321
+ ```python
322
+ from fitstream import epoch_stream, take
323
+
324
+ for event in take(100)(epoch_stream(...)):
325
+ print(event)
326
+ # {'step': 1, ....}
327
+ # {'step': 2, ...}
328
+ # ...
329
+ # { 'step': 100, ...}
330
+ ```
331
+
332
+ `fitstream` has some of its own selection primitives, such as early stopping:
333
+ ```python
334
+ from fitstream import augment, early_stop, epoch_stream, pipe, take, validation_loss
335
+
336
+ events = pipe(
337
+ epoch_stream(...),
338
+ augment(validation_loss(...)),
339
+ take(500), # safety cap
340
+ early_stop(key="val_loss", patience=10),
341
+ )
342
+ for event in events:
343
+ print(event)
344
+ ```
345
+
346
+ ## Side effects
347
+ Sometimes you want to log metrics (or write to an external system) without changing the stream. Use `tap(fn)`:
348
+ ```python
349
+ from fitstream import epoch_stream, pipe, tap, take
350
+
351
+ events = pipe(
352
+ epoch_stream(...),
353
+ tap(lambda ev: print(ev["step"], ev["train_loss"])),
354
+ take(10),
355
+ )
356
+ list(events)
357
+ ```
358
+
359
+ ## Sinks
360
+ Iterating over events and doing something yourself can be tedious, so we have some utilities to help you process the
361
+ event stream.
362
+
363
+ It is typically useful to collect all events into a list, but exclude the `model` and keep just the metrics. We have
364
+ the `collect` sink for that:
365
+ ```python
366
+ from fitstream import collect, epoch_stream, take
367
+
368
+ # collect 100 epochs to a list
369
+ history = collect(take(100)(epoch_stream(...)))
370
+ ```
371
+
372
+ We can also store them to a `jsonl` file:
373
+ ```python
374
+ from fitstream import collect_jsonl, epoch_stream, take
375
+
376
+ # collect 100 epochs to json
377
+ collect_jsonl(take(100)(epoch_stream(...)), 'runs/my_experiment.jsonl')
378
+ ```
379
+
380
+ # Documentation
381
+ Full documentation is available at [https://fitstream.readthedocs.io/](https://fitstream.readthedocs.io/).
382
+
383
+ # Development
384
+ - After cloning this repo, run `make setup` to create a virtual environment and install all dependencies.
385
+ - Building is done via `uv build`.
386
+ - Running tests is done via `make test`
387
+ - Building documentation via `make doc`
388
+ - Linting via `make lint`
@@ -0,0 +1,9 @@
1
+ fitstream/__init__.py,sha256=i4FTMn5gOoRnZhXxcGuRCIfGbIk_CQ3NMjvleebF6U8,423
2
+ fitstream/augmentations.py,sha256=KeslNnDEOmmeXkc4d70hguWhl_jdFcEzvrPnmzuM00E,1991
3
+ fitstream/batching.py,sha256=ECzf9A3YaZ2_AySZuSYl57fvqHH_DMZKwUIi6R7mGLs,1500
4
+ fitstream/events.py,sha256=YpmHViJR8WHLK4vpBEPiG6avNM8y9tTm7Sx0DzVsiMM,444
5
+ fitstream/fit.py,sha256=utXohEn5Yy-KSt_LEHx-vtsBIt8ehj_UHnPKRRKmoJE,6264
6
+ fitstream/sinks.py,sha256=ZTsyyGam56pDMRVU6nSh-6lpWEX2amDYr3bYQwyxs9o,2117
7
+ fitstream-0.1.1.dist-info/WHEEL,sha256=5DEXXimM34_d4Gx1AuF9ysMr1_maoEtGKjaILM3s4w4,80
8
+ fitstream-0.1.1.dist-info/METADATA,sha256=ws_-6hbakxiYnOhGl2Mcf8vgVCoy6T89CBm7WY7uKqY,19049
9
+ fitstream-0.1.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: uv 0.9.28
2
+ Generator: uv 0.9.29
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,5 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: fitstream
3
- Version: 0.1.0
4
- Summary: Add your description here
5
- Requires-Python: >=3.12
@@ -1,4 +0,0 @@
1
- fitstream/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- fitstream-0.1.0.dist-info/WHEEL,sha256=fAguSjoiATBe7TNBkJwOjyL1Tt4wwiaQGtNtjRPNMQA,80
3
- fitstream-0.1.0.dist-info/METADATA,sha256=-IzjLUWYXirg121U3bxUSf1FWiqMJPfwLLYGW4cKTEM,112
4
- fitstream-0.1.0.dist-info/RECORD,,