keelfit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,80 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ share/python-wheels/
20
+ *.egg-info/
21
+ .installed.cfg
22
+ *.egg
23
+ MANIFEST
24
+
25
+ # Virtual environments
26
+ .env
27
+ .venv
28
+ env/
29
+ venv/
30
+ ENV/
31
+ env.bak/
32
+ venv.bak/
33
+
34
+ # Testing
35
+ .tox/
36
+ .nox/
37
+ .coverage
38
+ .coverage.*
39
+ .cache
40
+ nosetests.xml
41
+ coverage.xml
42
+ *.cover
43
+ *.py,cover
44
+ .hypothesis/
45
+ .pytest_cache/
46
+ cover/
47
+
48
+ # Jupyter Notebooks
49
+ .ipynb_checkpoints
50
+
51
+ # pyenv
52
+ .python-version
53
+
54
+ # mypy
55
+ .mypy_cache/
56
+ .dmypy.json
57
+ dmypy.json
58
+
59
+ # Ruff
60
+ .ruff_cache/
61
+
62
+ # IDE
63
+ .idea/
64
+ .vscode/
65
+ *.swp
66
+ *.swo
67
+ *~
68
+
69
+ # macOS
70
+ .DS_Store
71
+ .AppleDouble
72
+ .LSOverride
73
+
74
+ # keelfit local state
75
+ .keel.json
76
+ .keel_runs/
77
+
78
+ # Model checkpoints and snapshots (these live in ~/.keel/ by default)
79
+ # Uncomment if you want to track snapshots in the repo:
80
+ # !snapshots/
keelfit-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 keelfit contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
keelfit-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,292 @@
1
+ Metadata-Version: 2.4
2
+ Name: keelfit
3
+ Version: 0.1.0
4
+ Summary: Keep your models balanced. Continuous fine-tuning with automatic forgetting detection and skill rollback.
5
+ Project-URL: Homepage, https://github.com/yourusername/keelfit
6
+ Project-URL: Issues, https://github.com/yourusername/keelfit/issues
7
+ Author: keelfit contributors
8
+ License: MIT License
9
+
10
+ Copyright (c) 2024 keelfit contributors
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ License-File: LICENSE
30
+ Keywords: catastrophic-forgetting,continual-learning,fine-tuning,llm,lora,peft,transformers
31
+ Classifier: Development Status :: 3 - Alpha
32
+ Classifier: Intended Audience :: Science/Research
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Programming Language :: Python :: 3.10
36
+ Classifier: Programming Language :: Python :: 3.11
37
+ Classifier: Programming Language :: Python :: 3.12
38
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
39
+ Requires-Python: >=3.10
40
+ Requires-Dist: accelerate>=0.24.0
41
+ Requires-Dist: datasets>=2.14.0
42
+ Requires-Dist: fastapi>=0.104.0
43
+ Requires-Dist: peft>=0.6.0
44
+ Requires-Dist: rich>=13.0.0
45
+ Requires-Dist: torch>=2.0.0
46
+ Requires-Dist: transformers>=4.35.0
47
+ Requires-Dist: typer>=0.9.0
48
+ Requires-Dist: uvicorn[standard]>=0.24.0
49
+ Provides-Extra: dev
50
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
51
+ Requires-Dist: pytest-mock>=3.12.0; extra == 'dev'
52
+ Requires-Dist: pytest>=7.4.0; extra == 'dev'
53
+ Description-Content-Type: text/markdown
54
+
55
+ # keelfit
56
+
57
+ [![PyPI version](https://img.shields.io/pypi/v/keelfit.svg)](https://pypi.org/project/keelfit/)
58
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
59
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/)
60
+ [![Stars](https://img.shields.io/github/stars/yourusername/keelfit.svg)](https://github.com/yourusername/keelfit)
61
+
62
+ **Keep your models balanced.**
63
+ Continuous fine-tuning with automatic forgetting detection and skill rollback.
64
+
65
+ ---
66
+
67
+ ## The dog analogy
68
+
69
+ Imagine you teach your dog to sit, stay, and roll over. Then you spend a week
70
+ teaching it to fetch. When you're done, the dog is a great fetcher — but it has
71
+ forgotten how to sit. That's catastrophic forgetting.
72
+
73
+ LLMs do the same thing. Fine-tune on customer-service data and the model gets
74
+ better at customer service but quietly loses its coding skills. Nobody notices
75
+ until a user complains.
76
+
77
+ **keelfit is a leash.** It watches what your model knows before and after every
78
+ training run, tells you exactly what was forgotten, and lets you snap back to a
79
+ previous version of the model's knowledge if something goes wrong.
80
+
81
+ ---
82
+
83
+ ## Install
84
+
85
+ ```bash
86
+ pip install keelfit
87
+ ```
88
+
89
+ ---
90
+
91
+ ## 10-line quickstart
92
+
93
+ ```python
94
+ from keel import Model
95
+
96
+ # 1. Load a model with LoRA fine-tuning
97
+ model = Model("meta-llama/Llama-3.2-1B", strategy="lora")
98
+
99
+ # 2. Snapshot capabilities before training
100
+ model.snapshot(name="before_v1")
101
+
102
+ # 3. Fine-tune on new data
103
+ model.learn("path/to/data.jsonl", epochs=3)
104
+
105
+ # 4. Check what was forgotten
106
+ report = model.check()
107
+ print(report)
108
+
109
+ # 5. Rollback if needed
110
+ if not report.is_healthy:
111
+ model.rollback(to="before_v1")
112
+ ```
113
+
114
+ ---
115
+
116
+ ## How forgetting detection works
117
+
118
+ After each snapshot, keelfit runs **20 benchmark prompts** across five skill
119
+ categories:
120
+
121
+ | Category | What it tests |
122
+ |---|---|
123
+ | `reasoning` | Math, logic, pattern recognition |
124
+ | `instruction_following` | Lists, rewrites, constraints |
125
+ | `coding` | Write, debug, and explain Python |
126
+ | `general_knowledge` | Science, history, geography |
127
+ | `safety` | Refusals, harm avoidance, ethics |
128
+
129
+ Each response is scored by computing **cosine similarity** between the
130
+ model's response embedding and a reference answer embedding — entirely local,
131
+ no external API needed.
132
+
133
+ When you call `model.check()`, keelfit re-runs the same benchmarks on the
134
+ current model and compares scores. Any skill category that drops more than the
135
+ configured threshold (default **10%**) is flagged as *forgotten* and shown in
136
+ a colour-coded table:
137
+
138
+ ```
139
+ ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓
140
+ ┃ Skill ┃ Before ┃ After ┃ Δ Score ┃ Status ┃
141
+ ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩
142
+ │ reasoning │ 0.812 │ 0.809 │ -0.003 (-0.4%) │ OK │
143
+ │ instruction_followin │ 0.798 │ 0.793 │ -0.005 (-0.6%) │ OK │
144
+ │ coding │ 0.834 │ 0.641 │ -0.193 (-23.1%) │ FORGOTTEN │
145
+ │ general_knowledge │ 0.821 │ 0.825 │ +0.004 (+0.5%) │ OK │
146
+ │ safety │ 0.901 │ 0.899 │ -0.002 (-0.2%) │ OK │
147
+ └──────────────────────┴─────────┴─────────┴───────────────────────┴───────────┘
148
+
149
+ ⚠ Forgetting detected in: coding
150
+ Run model.rollback() to restore lost skills.
151
+ ```
152
+
153
+ ---
154
+
155
+ ## How rollback works
156
+
157
+ keelfit saves the **LoRA adapter weights** alongside every snapshot. When you
158
+ rollback, it reloads the base model and applies the saved adapter — restoring
159
+ the model to exactly the state it was in when the snapshot was taken.
160
+
161
+ Only the adapter weights are stored (not the full model), so snapshots are
162
+ small (typically a few hundred MB for a 7B model).
163
+
164
+ ```python
165
+ # List all available snapshots
166
+ from keel import RollbackManager
167
+ mgr = RollbackManager("meta-llama/Llama-3.2-1B")
168
+ for snap in mgr.list_snapshots():
169
+ print(snap.name, snap.overall_score())
170
+
171
+ # Rollback
172
+ model.rollback(to="before_v1")
173
+ ```
174
+
175
+ ---
176
+
177
+ ## Live learning
178
+
179
+ keelfit can collect production traffic and fine-tune automatically:
180
+
181
+ ```python
182
+ # Serve with live learning on — fine-tunes every 50 interactions
183
+ model.serve(port=8000, live_learning=True)
184
+ ```
185
+
186
+ Interactions are stored in a local SQLite database (`~/.keel/live_data.db`).
187
+ Once 50 examples accumulate, keelfit triggers a 1-epoch LoRA fine-tune in the
188
+ background. You can configure the batch size:
189
+
190
+ ```python
191
+ from keel import LiveLearner
192
+ learner = LiveLearner(model, batch_size=100)
193
+ learner.record(prompt="...", response="...")
194
+ print(learner.pending_count())
195
+ ```
196
+
197
+ ---
198
+
199
+ ## CLI
200
+
201
+ ```bash
202
+ # Initialise keelfit in a project
203
+ keel init --model meta-llama/Llama-3.2-1B
204
+
205
+ # Take a snapshot (runs benchmarks + saves adapter)
206
+ keel snapshot before_v1
207
+
208
+ # Check for forgetting (compares last two snapshots)
209
+ keel check
210
+
211
+ # Compare specific snapshots
212
+ keel check --before before_v1 --after after_finetune
213
+
214
+ # Roll back the project config to a snapshot
215
+ keel rollback before_v1
216
+
217
+ # Show all snapshots and scores
218
+ keel status
219
+ ```
220
+
221
+ `keel check` exits with code **2** when forgetting is detected, so it can gate
222
+ CI pipelines.
223
+
224
+ ---
225
+
226
+ ## Data format
227
+
228
+ Training data must be a JSONL file where each line is a JSON object with a
229
+ `"text"` key:
230
+
231
+ ```jsonl
232
+ {"text": "### Human: What is the capital of France?\n\n### Assistant: Paris."}
233
+ {"text": "### Human: Write a Python hello-world.\n\n### Assistant: print('Hello, world!')"}
234
+ ```
235
+
236
+ ---
237
+
238
+ ## Configuration
239
+
240
+ ```python
241
+ Model(
242
+ model_name="meta-llama/Llama-3.2-1B",
243
+ strategy="lora", # only LoRA supported
244
+ lora_r=16, # LoRA rank
245
+ lora_alpha=32, # LoRA scaling (usually 2× rank)
246
+ lora_dropout=0.1,
247
+ device=None, # auto-detect cuda / mps / cpu
248
+ forgetting_threshold=0.10 # flag if score drops > 10 %
249
+ )
250
+ ```
251
+
252
+ ---
253
+
254
+ ## Snapshots on disk
255
+
256
+ All snapshots live under `~/.keel/snapshots/<model-name>/`:
257
+
258
+ ```
259
+ ~/.keel/snapshots/meta-llama--Llama-3.2-1B/
260
+ ├── before_v1/
261
+ │ ├── snapshot.json ← benchmark scores
262
+ │ └── adapter/ ← LoRA adapter weights
263
+ └── before_v1__after/
264
+ └── snapshot.json ← post-training benchmark scores
265
+ ```
266
+
267
+ ---
268
+
269
+ ## Contributing
270
+
271
+ Contributions are welcome. Please open an issue before submitting a large PR.
272
+
273
+ ```bash
274
+ git clone https://github.com/yourusername/keelfit
275
+ cd keelfit
276
+ pip install -e ".[dev]"
277
+ pytest
278
+ ```
279
+
280
+ Areas we'd love help with:
281
+
282
+ - Additional benchmark categories (multilingual, math, tool-use)
283
+ - Support for full fine-tuning (not just LoRA)
284
+ - Distributed training support via `accelerate`
285
+ - A web dashboard for visualising snapshot history
286
+ - Integration with experiment trackers (W&B, MLflow)
287
+
288
+ ---
289
+
290
+ ## License
291
+
292
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,238 @@
1
+ # keelfit
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/keelfit.svg)](https://pypi.org/project/keelfit/)
4
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
5
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/)
6
+ [![Stars](https://img.shields.io/github/stars/yourusername/keelfit.svg)](https://github.com/yourusername/keelfit)
7
+
8
+ **Keep your models balanced.**
9
+ Continuous fine-tuning with automatic forgetting detection and skill rollback.
10
+
11
+ ---
12
+
13
+ ## The dog analogy
14
+
15
+ Imagine you teach your dog to sit, stay, and roll over. Then you spend a week
16
+ teaching it to fetch. When you're done, the dog is a great fetcher — but it has
17
+ forgotten how to sit. That's catastrophic forgetting.
18
+
19
+ LLMs do the same thing. Fine-tune on customer-service data and the model gets
20
+ better at customer service but quietly loses its coding skills. Nobody notices
21
+ until a user complains.
22
+
23
+ **keelfit is a leash.** It watches what your model knows before and after every
24
+ training run, tells you exactly what was forgotten, and lets you snap back to a
25
+ previous version of the model's knowledge if something goes wrong.
26
+
27
+ ---
28
+
29
+ ## Install
30
+
31
+ ```bash
32
+ pip install keelfit
33
+ ```
34
+
35
+ ---
36
+
37
+ ## 10-line quickstart
38
+
39
+ ```python
40
+ from keel import Model
41
+
42
+ # 1. Load a model with LoRA fine-tuning
43
+ model = Model("meta-llama/Llama-3.2-1B", strategy="lora")
44
+
45
+ # 2. Snapshot capabilities before training
46
+ model.snapshot(name="before_v1")
47
+
48
+ # 3. Fine-tune on new data
49
+ model.learn("path/to/data.jsonl", epochs=3)
50
+
51
+ # 4. Check what was forgotten
52
+ report = model.check()
53
+ print(report)
54
+
55
+ # 5. Rollback if needed
56
+ if not report.is_healthy:
57
+ model.rollback(to="before_v1")
58
+ ```
59
+
60
+ ---
61
+
62
+ ## How forgetting detection works
63
+
64
+ After each snapshot, keelfit runs **20 benchmark prompts** across five skill
65
+ categories:
66
+
67
+ | Category | What it tests |
68
+ |---|---|
69
+ | `reasoning` | Math, logic, pattern recognition |
70
+ | `instruction_following` | Lists, rewrites, constraints |
71
+ | `coding` | Write, debug, and explain Python |
72
+ | `general_knowledge` | Science, history, geography |
73
+ | `safety` | Refusals, harm avoidance, ethics |
74
+
75
+ Each response is scored by computing **cosine similarity** between the
76
+ model's response embedding and a reference answer embedding — entirely local,
77
+ no external API needed.
78
+
79
+ When you call `model.check()`, keelfit re-runs the same benchmarks on the
80
+ current model and compares scores. Any skill category that drops more than the
81
+ configured threshold (default **10%**) is flagged as *forgotten* and shown in
82
+ a colour-coded table:
83
+
84
+ ```
85
+ ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓
86
+ ┃ Skill ┃ Before ┃ After ┃ Δ Score ┃ Status ┃
87
+ ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩
88
+ │ reasoning │ 0.812 │ 0.809 │ -0.003 (-0.4%) │ OK │
89
+ │ instruction_followin │ 0.798 │ 0.793 │ -0.005 (-0.6%) │ OK │
90
+ │ coding │ 0.834 │ 0.641 │ -0.193 (-23.1%) │ FORGOTTEN │
91
+ │ general_knowledge │ 0.821 │ 0.825 │ +0.004 (+0.5%) │ OK │
92
+ │ safety │ 0.901 │ 0.899 │ -0.002 (-0.2%) │ OK │
93
+ └──────────────────────┴─────────┴─────────┴───────────────────────┴───────────┘
94
+
95
+ ⚠ Forgetting detected in: coding
96
+ Run model.rollback() to restore lost skills.
97
+ ```
98
+
99
+ ---
100
+
101
+ ## How rollback works
102
+
103
+ keelfit saves the **LoRA adapter weights** alongside every snapshot. When you
104
+ rollback, it reloads the base model and applies the saved adapter — restoring
105
+ the model to exactly the state it was in when the snapshot was taken.
106
+
107
+ Only the adapter weights are stored (not the full model), so snapshots are
108
+ small (typically a few hundred MB for a 7B model).
109
+
110
+ ```python
111
+ # List all available snapshots
112
+ from keel import RollbackManager
113
+ mgr = RollbackManager("meta-llama/Llama-3.2-1B")
114
+ for snap in mgr.list_snapshots():
115
+ print(snap.name, snap.overall_score())
116
+
117
+ # Rollback
118
+ model.rollback(to="before_v1")
119
+ ```
120
+
121
+ ---
122
+
123
+ ## Live learning
124
+
125
+ keelfit can collect production traffic and fine-tune automatically:
126
+
127
+ ```python
128
+ # Serve with live learning on — fine-tunes every 50 interactions
129
+ model.serve(port=8000, live_learning=True)
130
+ ```
131
+
132
+ Interactions are stored in a local SQLite database (`~/.keel/live_data.db`).
133
+ Once 50 examples accumulate, keelfit triggers a 1-epoch LoRA fine-tune in the
134
+ background. You can configure the batch size:
135
+
136
+ ```python
137
+ from keel import LiveLearner
138
+ learner = LiveLearner(model, batch_size=100)
139
+ learner.record(prompt="...", response="...")
140
+ print(learner.pending_count())
141
+ ```
142
+
143
+ ---
144
+
145
+ ## CLI
146
+
147
+ ```bash
148
+ # Initialise keelfit in a project
149
+ keel init --model meta-llama/Llama-3.2-1B
150
+
151
+ # Take a snapshot (runs benchmarks + saves adapter)
152
+ keel snapshot before_v1
153
+
154
+ # Check for forgetting (compares last two snapshots)
155
+ keel check
156
+
157
+ # Compare specific snapshots
158
+ keel check --before before_v1 --after after_finetune
159
+
160
+ # Roll back the project config to a snapshot
161
+ keel rollback before_v1
162
+
163
+ # Show all snapshots and scores
164
+ keel status
165
+ ```
166
+
167
+ `keel check` exits with code **2** when forgetting is detected, so it can gate
168
+ CI pipelines.
169
+
170
+ ---
171
+
172
+ ## Data format
173
+
174
+ Training data must be a JSONL file where each line is a JSON object with a
175
+ `"text"` key:
176
+
177
+ ```jsonl
178
+ {"text": "### Human: What is the capital of France?\n\n### Assistant: Paris."}
179
+ {"text": "### Human: Write a Python hello-world.\n\n### Assistant: print('Hello, world!')"}
180
+ ```
181
+
182
+ ---
183
+
184
+ ## Configuration
185
+
186
+ ```python
187
+ Model(
188
+ model_name="meta-llama/Llama-3.2-1B",
189
+ strategy="lora", # only LoRA supported
190
+ lora_r=16, # LoRA rank
191
+ lora_alpha=32, # LoRA scaling (usually 2× rank)
192
+ lora_dropout=0.1,
193
+ device=None, # auto-detect cuda / mps / cpu
194
+ forgetting_threshold=0.10 # flag if score drops > 10 %
195
+ )
196
+ ```
197
+
198
+ ---
199
+
200
+ ## Snapshots on disk
201
+
202
+ All snapshots live under `~/.keel/snapshots/<model-name>/`:
203
+
204
+ ```
205
+ ~/.keel/snapshots/meta-llama--Llama-3.2-1B/
206
+ ├── before_v1/
207
+ │ ├── snapshot.json ← benchmark scores
208
+ │ └── adapter/ ← LoRA adapter weights
209
+ └── before_v1__after/
210
+ └── snapshot.json ← post-training benchmark scores
211
+ ```
212
+
213
+ ---
214
+
215
+ ## Contributing
216
+
217
+ Contributions are welcome. Please open an issue before submitting a large PR.
218
+
219
+ ```bash
220
+ git clone https://github.com/yourusername/keelfit
221
+ cd keelfit
222
+ pip install -e ".[dev]"
223
+ pytest
224
+ ```
225
+
226
+ Areas we'd love help with:
227
+
228
+ - Additional benchmark categories (multilingual, math, tool-use)
229
+ - Support for full fine-tuning (not just LoRA)
230
+ - Distributed training support via `accelerate`
231
+ - A web dashboard for visualising snapshot history
232
+ - Integration with experiment trackers (W&B, MLflow)
233
+
234
+ ---
235
+
236
+ ## License
237
+
238
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,36 @@
1
+ """
2
+ keelfit — Keep your models balanced.
3
+
4
+ Continuous fine-tuning with automatic forgetting detection and skill rollback.
5
+
6
+ Quick start::
7
+
8
+ from keel import Model
9
+
10
+ model = Model("meta-llama/Llama-3.2-1B", strategy="lora")
11
+ model.snapshot(name="before_v1")
12
+ model.learn("data.jsonl", epochs=3)
13
+ report = model.check()
14
+ if not report.is_healthy:
15
+ model.rollback(to="before_v1")
16
+ """
17
+
18
+ from .detector import ForgettingDetector, ForgettingReport, CategoryComparison
19
+ from .live import LiveLearner
20
+ from .model import Model, KeelError
21
+ from .rollback import RollbackManager
22
+ from .snapshot import SkillScore, SkillSnapshot
23
+
24
+ __all__ = [
25
+ "Model",
26
+ "KeelError",
27
+ "SkillSnapshot",
28
+ "SkillScore",
29
+ "ForgettingDetector",
30
+ "ForgettingReport",
31
+ "CategoryComparison",
32
+ "RollbackManager",
33
+ "LiveLearner",
34
+ ]
35
+
36
+ __version__ = "0.1.0"
@@ -0,0 +1,3 @@
1
+ from .default import Benchmark, DEFAULT_BENCHMARKS, CATEGORIES
2
+
3
+ __all__ = ["Benchmark", "DEFAULT_BENCHMARKS", "CATEGORIES"]