mlcompass 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlcompass-0.1.0/LICENSE +21 -0
- mlcompass-0.1.0/PKG-INFO +259 -0
- mlcompass-0.1.0/README.md +214 -0
- mlcompass-0.1.0/pyproject.toml +93 -0
- mlcompass-0.1.0/setup.cfg +4 -0
- mlcompass-0.1.0/src/mlcompass/__init__.py +3 -0
- mlcompass-0.1.0/src/mlcompass/agents/__init__.py +5 -0
- mlcompass-0.1.0/src/mlcompass/agents/advise.py +174 -0
- mlcompass-0.1.0/src/mlcompass/cli.py +293 -0
- mlcompass-0.1.0/src/mlcompass/context.py +222 -0
- mlcompass-0.1.0/src/mlcompass/tools/__init__.py +6 -0
- mlcompass-0.1.0/src/mlcompass/tools/dataset.py +448 -0
- mlcompass-0.1.0/src/mlcompass/ui/__init__.py +7 -0
- mlcompass-0.1.0/src/mlcompass/ui/advise.py +205 -0
- mlcompass-0.1.0/src/mlcompass.egg-info/PKG-INFO +259 -0
- mlcompass-0.1.0/src/mlcompass.egg-info/SOURCES.txt +23 -0
- mlcompass-0.1.0/src/mlcompass.egg-info/dependency_links.txt +1 -0
- mlcompass-0.1.0/src/mlcompass.egg-info/entry_points.txt +2 -0
- mlcompass-0.1.0/src/mlcompass.egg-info/requires.txt +19 -0
- mlcompass-0.1.0/src/mlcompass.egg-info/top_level.txt +1 -0
- mlcompass-0.1.0/tests/test_advisor.py +185 -0
- mlcompass-0.1.0/tests/test_cli_advise.py +303 -0
- mlcompass-0.1.0/tests/test_cli_init.py +70 -0
- mlcompass-0.1.0/tests/test_context.py +133 -0
- mlcompass-0.1.0/tests/test_dataset.py +262 -0
mlcompass-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Hakan Sabunis
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
mlcompass-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mlcompass
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: An LLM agent that sits next to you through your whole ML pipeline
|
|
5
|
+
Author-email: Hakan Sabunis <hakansabunis@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/hakansabunis/mlcompass
|
|
8
|
+
Project-URL: Repository, https://github.com/hakansabunis/mlcompass
|
|
9
|
+
Project-URL: Issues, https://github.com/hakansabunis/mlcompass/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/hakansabunis/mlcompass/blob/main/CHANGELOG.md
|
|
11
|
+
Keywords: machine-learning,llm,agent,training,monitoring,claude,pytorch,tensorboard,wandb,mlops
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Classifier: Topic :: System :: Monitoring
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: agentlite-py>=0.2.0
|
|
29
|
+
Requires-Dist: watchdog>=4.0.0
|
|
30
|
+
Requires-Dist: rich>=13.0.0
|
|
31
|
+
Requires-Dist: click>=8.0.0
|
|
32
|
+
Requires-Dist: pyyaml>=6.0
|
|
33
|
+
Requires-Dist: pandas>=2.0.0
|
|
34
|
+
Provides-Extra: tensorboard
|
|
35
|
+
Requires-Dist: tbparse>=0.0.7; extra == "tensorboard"
|
|
36
|
+
Provides-Extra: wandb
|
|
37
|
+
Requires-Dist: wandb>=0.16.0; extra == "wandb"
|
|
38
|
+
Provides-Extra: dev
|
|
39
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
40
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
41
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
42
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
43
|
+
Requires-Dist: tbparse>=0.0.7; extra == "dev"
|
|
44
|
+
Dynamic: license-file
|
|
45
|
+
|
|
46
|
+
# mlcompass
|
|
47
|
+
|
|
48
|
+
> An LLM agent that sits next to you through your whole ML pipeline —
|
|
49
|
+
> from data, through training, all the way to deployment.
|
|
50
|
+
|
|
51
|
+
[](https://pypi.org/project/mlcompass/)
|
|
52
|
+
[](https://www.python.org)
|
|
53
|
+
[](LICENSE)
|
|
54
|
+
|
|
55
|
+
🚧 **Pre-alpha (v0.0.1)** — under active development. APIs will change before v0.1.
|
|
56
|
+
|
|
57
|
+
## What it does
|
|
58
|
+
|
|
59
|
+
mlcompass is a single CLI that follows your ML project from start
|
|
60
|
+
to finish, keeping context across every step.
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
data.csv train.py results.csv production
|
|
64
|
+
│ │ │ │
|
|
65
|
+
▼ ▼ ▼ ▼
|
|
66
|
+
advise ────► audit + watch ────► evaluate ────► deploy
|
|
67
|
+
compare
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Each command writes to and reads from a shared project context
|
|
71
|
+
(`.mlcompass/`), so by the time you reach `deploy`, the tool already
|
|
72
|
+
knows your dataset, your model choice, your training history, and
|
|
73
|
+
your evaluation results.
|
|
74
|
+
|
|
75
|
+
## Six commands, one tool
|
|
76
|
+
|
|
77
|
+
| Command | When you run it | What you get |
|
|
78
|
+
| ---------- | ---------------------------------------- | ----------------------------------------------------- |
|
|
79
|
+
| `init` | Starting a new project | A `.mlcompass/` folder that tracks decisions |
|
|
80
|
+
| `advise` | You have a CSV, what now? | Models to try, features to derive, pitfalls to avoid |
|
|
81
|
+
| `audit` | Before you press train | Static analysis of training script (seed, val, etc.) |
|
|
82
|
+
| `watch` | While training runs | Live plateau / overfit / NaN detection |
|
|
83
|
+
| `compare` | After several runs | Hypothesis-driven diff between two runs |
|
|
84
|
+
| `evaluate` | Training done | Threshold tuning, confusion matrix, hard examples |
|
|
85
|
+
| `deploy` | Going to production | Latency estimate, dependency check, ONNX advice |
|
|
86
|
+
|
|
87
|
+
## Quick example — `advise` mode
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
mlcompass init churn-project
|
|
91
|
+
mlcompass advise data/customers.csv --target churn
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Output:
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
📊 Dataset analysis (data/customers.csv)
|
|
98
|
+
• 10,000 rows × 23 columns
|
|
99
|
+
• Target: churn (binary, 12% positive)
|
|
100
|
+
• 4 categorical, 18 numerical, 1 datetime
|
|
101
|
+
• 3 columns with >50% missing values (consider dropping)
|
|
102
|
+
|
|
103
|
+
💡 Recommended models
|
|
104
|
+
1. XGBoost / LightGBM → tabular binary baseline
|
|
105
|
+
expected AUC: 0.82 – 0.87
|
|
106
|
+
2. Logistic Regression → interpretable baseline
|
|
107
|
+
expected AUC: 0.76 – 0.80
|
|
108
|
+
3. FT-Transformer → if GPU budget allows
|
|
109
|
+
expected AUC: 0.83 – 0.86
|
|
110
|
+
|
|
111
|
+
🔧 Suggested feature engineering
|
|
112
|
+
• signup_date → derive days_since_signup, month, dayofweek
|
|
113
|
+
• income (3 outliers >3σ) → winsorize at 99th percentile
|
|
114
|
+
• country (47 categories) → target encoding or top-N
|
|
115
|
+
|
|
116
|
+
⚠️ Class imbalance (12% positive)
|
|
117
|
+
• Don't optimize accuracy — use AUC, F1, or recall@k
|
|
118
|
+
• Consider class_weight='balanced' or focal loss
|
|
119
|
+
|
|
120
|
+
Generate a baseline notebook? [y/N]
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Quick example — `watch` mode (Faz 2)
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
mlcompass watch train.py
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
After 8 epochs:
|
|
130
|
+
|
|
131
|
+
```
|
|
132
|
+
⚠️ Epoch 8 — overfitting detected
|
|
133
|
+
Train loss: 0.118 | Val loss: 0.387 (gap 0.27, normal <0.1)
|
|
134
|
+
|
|
135
|
+
Likely cause: regularization is too weak for the model capacity.
|
|
136
|
+
|
|
137
|
+
Suggested fix: increase dropout 0.1 → 0.3
|
|
138
|
+
Apply and restart training? [y/N]
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Why mlcompass
|
|
142
|
+
|
|
143
|
+
The ML ecosystem already has great tools — but each owns one slice
|
|
144
|
+
of the pipeline, and none of them advise:
|
|
145
|
+
|
|
146
|
+
| | pandas-profiling | W&B / TensorBoard | Cursor / Devin | **mlcompass** |
|
|
147
|
+
| ------------------------------- | :--------------: | :---------------: | :------------: | :------------: |
|
|
148
|
+
| Analyzes raw data | ✅ | ❌ | ❌ | ✅ |
|
|
149
|
+
| Recommends models + features | ❌ | ❌ | partial | ✅ |
|
|
150
|
+
| Audits training scripts | ❌ | ❌ | reactive | ✅ |
|
|
151
|
+
| Watches training in real time | ❌ | dashboard | ❌ | ✅ |
|
|
152
|
+
| Diagnoses problems proactively | ❌ | ❌ | reactive | ✅ |
|
|
153
|
+
| Post-training evaluation advice | ❌ | basic | ❌ | ✅ |
|
|
154
|
+
| Deployment readiness check | ❌ | ❌ | ❌ | ✅ |
|
|
155
|
+
| Persistent project memory | ❌ | per-run | ❌ | ✅ |
|
|
156
|
+
| Permission-gated actions | ❌ | ❌ | partial | first-class |
|
|
157
|
+
|
|
158
|
+
mlcompass is the **advisor that sits next to all of these tools** —
|
|
159
|
+
not a replacement for any.
|
|
160
|
+
|
|
161
|
+
## Install
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
pip install mlcompass
|
|
165
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Usage
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
# Start a project
|
|
172
|
+
mlcompass init my-project
|
|
173
|
+
|
|
174
|
+
# Pre-training
|
|
175
|
+
mlcompass advise data.csv --target label
|
|
176
|
+
|
|
177
|
+
# Training-time (Faz 2)
|
|
178
|
+
mlcompass audit train.py
|
|
179
|
+
mlcompass watch train.py
|
|
180
|
+
mlcompass compare run-3 run-7
|
|
181
|
+
|
|
182
|
+
# Post-training (Faz 3)
|
|
183
|
+
mlcompass evaluate results.csv
|
|
184
|
+
|
|
185
|
+
# Deployment (Faz 4)
|
|
186
|
+
mlcompass deploy --target sagemaker
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## How it works
|
|
190
|
+
|
|
191
|
+
Built on [agentlite](https://github.com/hakansabunis/agentlite) — a
|
|
192
|
+
small Claude agent library — mlcompass uses one orchestrator agent
|
|
193
|
+
per command, plus focused sub-agents for sub-tasks:
|
|
194
|
+
|
|
195
|
+
```
|
|
196
|
+
cli.py
|
|
197
|
+
│
|
|
198
|
+
┌─────┴─────┐
|
|
199
|
+
▼ ▼
|
|
200
|
+
advise watch ... deploy
|
|
201
|
+
agent agent
|
|
202
|
+
│ │
|
|
203
|
+
▼ ▼
|
|
204
|
+
ModelAdvisor MetricsWatcher (Haiku, polls)
|
|
205
|
+
(Opus) Diagnostician (Opus, called on anomaly)
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Every action that would modify your code, config, or run a training
|
|
209
|
+
process **asks permission first** — agentlite's permission system is
|
|
210
|
+
first-class, not an afterthought.
|
|
211
|
+
|
|
212
|
+
See [ARCHITECTURE.md](ARCHITECTURE.md) for the full design.
|
|
213
|
+
|
|
214
|
+
## Project context
|
|
215
|
+
|
|
216
|
+
Each mlcompass project keeps a small folder, similar in spirit to
|
|
217
|
+
`.git/`:
|
|
218
|
+
|
|
219
|
+
```
|
|
220
|
+
.mlcompass/
|
|
221
|
+
├── project.yaml # metadata
|
|
222
|
+
├── context.json # decisions, recommendations, active state
|
|
223
|
+
├── datasets/ # registered datasets
|
|
224
|
+
└── runs/ # training run history
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
This is what makes mlcompass more than a chat tool: by the time you
|
|
228
|
+
run `deploy`, every earlier decision is still in memory.
|
|
229
|
+
|
|
230
|
+
## Roadmap
|
|
231
|
+
|
|
232
|
+
| Phase | Commands | Status |
|
|
233
|
+
| --------------- | ------------------------------------- | :-----------: |
|
|
234
|
+
| **Faz 1 (v0.1)**| `init`, `advise` | 🚧 In progress |
|
|
235
|
+
| **Faz 2 (v0.2)**| `audit`, `watch`, `compare` | 📅 Planned |
|
|
236
|
+
| **Faz 3 (v0.3)**| `evaluate` | 📅 Planned |
|
|
237
|
+
| **Faz 4 (v0.4)**| `deploy` | 📅 Planned |
|
|
238
|
+
|
|
239
|
+
See [CHANGELOG.md](CHANGELOG.md) for detailed plans and
|
|
240
|
+
[ARCHITECTURE.md](ARCHITECTURE.md) for the design.
|
|
241
|
+
|
|
242
|
+
## Non-goals
|
|
243
|
+
|
|
244
|
+
To stay focused, mlcompass will **not** try to be:
|
|
245
|
+
|
|
246
|
+
- **AutoML** (use AutoGluon, AutoSklearn)
|
|
247
|
+
- **Experiment tracker** (use MLflow, W&B)
|
|
248
|
+
- **Code assistant** (use Cursor, Copilot, aider)
|
|
249
|
+
- **Monitoring dashboard** (use Grafana, Streamlit)
|
|
250
|
+
|
|
251
|
+
mlcompass **advises**; you decide.
|
|
252
|
+
|
|
253
|
+
## Contributing
|
|
254
|
+
|
|
255
|
+
Pre-alpha — issues and discussions welcome, PRs after v0.1.
|
|
256
|
+
|
|
257
|
+
## License
|
|
258
|
+
|
|
259
|
+
MIT © 2026 Hakan Sabunis
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# mlcompass
|
|
2
|
+
|
|
3
|
+
> An LLM agent that sits next to you through your whole ML pipeline —
|
|
4
|
+
> from data, through training, all the way to deployment.
|
|
5
|
+
|
|
6
|
+
[](https://pypi.org/project/mlcompass/)
|
|
7
|
+
[](https://www.python.org)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
|
|
10
|
+
🚧 **Pre-alpha (v0.0.1)** — under active development. APIs will change before v0.1.
|
|
11
|
+
|
|
12
|
+
## What it does
|
|
13
|
+
|
|
14
|
+
mlcompass is a single CLI that follows your ML project from start
|
|
15
|
+
to finish, keeping context across every step.
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
data.csv train.py results.csv production
|
|
19
|
+
│ │ │ │
|
|
20
|
+
▼ ▼ ▼ ▼
|
|
21
|
+
advise ────► audit + watch ────► evaluate ────► deploy
|
|
22
|
+
compare
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Each command writes to and reads from a shared project context
|
|
26
|
+
(`.mlcompass/`), so by the time you reach `deploy`, the tool already
|
|
27
|
+
knows your dataset, your model choice, your training history, and
|
|
28
|
+
your evaluation results.
|
|
29
|
+
|
|
30
|
+
## Six commands, one tool
|
|
31
|
+
|
|
32
|
+
| Command | When you run it | What you get |
|
|
33
|
+
| ---------- | ---------------------------------------- | ----------------------------------------------------- |
|
|
34
|
+
| `init` | Starting a new project | A `.mlcompass/` folder that tracks decisions |
|
|
35
|
+
| `advise` | You have a CSV, what now? | Models to try, features to derive, pitfalls to avoid |
|
|
36
|
+
| `audit` | Before you press train | Static analysis of training script (seed, val, etc.) |
|
|
37
|
+
| `watch` | While training runs | Live plateau / overfit / NaN detection |
|
|
38
|
+
| `compare` | After several runs | Hypothesis-driven diff between two runs |
|
|
39
|
+
| `evaluate` | Training done | Threshold tuning, confusion matrix, hard examples |
|
|
40
|
+
| `deploy` | Going to production | Latency estimate, dependency check, ONNX advice |
|
|
41
|
+
|
|
42
|
+
## Quick example — `advise` mode
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
mlcompass init churn-project
|
|
46
|
+
mlcompass advise data/customers.csv --target churn
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Output:
|
|
50
|
+
|
|
51
|
+
```
|
|
52
|
+
📊 Dataset analysis (data/customers.csv)
|
|
53
|
+
• 10,000 rows × 23 columns
|
|
54
|
+
• Target: churn (binary, 12% positive)
|
|
55
|
+
• 4 categorical, 18 numerical, 1 datetime
|
|
56
|
+
• 3 columns with >50% missing values (consider dropping)
|
|
57
|
+
|
|
58
|
+
💡 Recommended models
|
|
59
|
+
1. XGBoost / LightGBM → tabular binary baseline
|
|
60
|
+
expected AUC: 0.82 – 0.87
|
|
61
|
+
2. Logistic Regression → interpretable baseline
|
|
62
|
+
expected AUC: 0.76 – 0.80
|
|
63
|
+
3. FT-Transformer → if GPU budget allows
|
|
64
|
+
expected AUC: 0.83 – 0.86
|
|
65
|
+
|
|
66
|
+
🔧 Suggested feature engineering
|
|
67
|
+
• signup_date → derive days_since_signup, month, dayofweek
|
|
68
|
+
• income (3 outliers >3σ) → winsorize at 99th percentile
|
|
69
|
+
• country (47 categories) → target encoding or top-N
|
|
70
|
+
|
|
71
|
+
⚠️ Class imbalance (12% positive)
|
|
72
|
+
• Don't optimize accuracy — use AUC, F1, or recall@k
|
|
73
|
+
• Consider class_weight='balanced' or focal loss
|
|
74
|
+
|
|
75
|
+
Generate a baseline notebook? [y/N]
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Quick example — `watch` mode (Faz 2)
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
mlcompass watch train.py
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
After 8 epochs:
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
⚠️ Epoch 8 — overfitting detected
|
|
88
|
+
Train loss: 0.118 | Val loss: 0.387 (gap 0.27, normal <0.1)
|
|
89
|
+
|
|
90
|
+
Likely cause: regularization is too weak for the model capacity.
|
|
91
|
+
|
|
92
|
+
Suggested fix: increase dropout 0.1 → 0.3
|
|
93
|
+
Apply and restart training? [y/N]
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Why mlcompass
|
|
97
|
+
|
|
98
|
+
The ML ecosystem already has great tools — but each owns one slice
|
|
99
|
+
of the pipeline, and none of them advise:
|
|
100
|
+
|
|
101
|
+
| | pandas-profiling | W&B / TensorBoard | Cursor / Devin | **mlcompass** |
|
|
102
|
+
| ------------------------------- | :--------------: | :---------------: | :------------: | :------------: |
|
|
103
|
+
| Analyzes raw data | ✅ | ❌ | ❌ | ✅ |
|
|
104
|
+
| Recommends models + features | ❌ | ❌ | partial | ✅ |
|
|
105
|
+
| Audits training scripts | ❌ | ❌ | reactive | ✅ |
|
|
106
|
+
| Watches training in real time | ❌ | dashboard | ❌ | ✅ |
|
|
107
|
+
| Diagnoses problems proactively | ❌ | ❌ | reactive | ✅ |
|
|
108
|
+
| Post-training evaluation advice | ❌ | basic | ❌ | ✅ |
|
|
109
|
+
| Deployment readiness check | ❌ | ❌ | ❌ | ✅ |
|
|
110
|
+
| Persistent project memory | ❌ | per-run | ❌ | ✅ |
|
|
111
|
+
| Permission-gated actions | ❌ | ❌ | partial | first-class |
|
|
112
|
+
|
|
113
|
+
mlcompass is the **advisor that sits next to all of these tools** —
|
|
114
|
+
not a replacement for any.
|
|
115
|
+
|
|
116
|
+
## Install
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
pip install mlcompass
|
|
120
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Usage
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
# Start a project
|
|
127
|
+
mlcompass init my-project
|
|
128
|
+
|
|
129
|
+
# Pre-training
|
|
130
|
+
mlcompass advise data.csv --target label
|
|
131
|
+
|
|
132
|
+
# Training-time (Faz 2)
|
|
133
|
+
mlcompass audit train.py
|
|
134
|
+
mlcompass watch train.py
|
|
135
|
+
mlcompass compare run-3 run-7
|
|
136
|
+
|
|
137
|
+
# Post-training (Faz 3)
|
|
138
|
+
mlcompass evaluate results.csv
|
|
139
|
+
|
|
140
|
+
# Deployment (Faz 4)
|
|
141
|
+
mlcompass deploy --target sagemaker
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## How it works
|
|
145
|
+
|
|
146
|
+
Built on [agentlite](https://github.com/hakansabunis/agentlite) — a
|
|
147
|
+
small Claude agent library — mlcompass uses one orchestrator agent
|
|
148
|
+
per command, plus focused sub-agents for sub-tasks:
|
|
149
|
+
|
|
150
|
+
```
|
|
151
|
+
cli.py
|
|
152
|
+
│
|
|
153
|
+
┌─────┴─────┐
|
|
154
|
+
▼ ▼
|
|
155
|
+
advise watch ... deploy
|
|
156
|
+
agent agent
|
|
157
|
+
│ │
|
|
158
|
+
▼ ▼
|
|
159
|
+
ModelAdvisor MetricsWatcher (Haiku, polls)
|
|
160
|
+
(Opus) Diagnostician (Opus, called on anomaly)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Every action that would modify your code, config, or run a training
|
|
164
|
+
process **asks permission first** — agentlite's permission system is
|
|
165
|
+
first-class, not an afterthought.
|
|
166
|
+
|
|
167
|
+
See [ARCHITECTURE.md](ARCHITECTURE.md) for the full design.
|
|
168
|
+
|
|
169
|
+
## Project context
|
|
170
|
+
|
|
171
|
+
Each mlcompass project keeps a small folder, similar in spirit to
|
|
172
|
+
`.git/`:
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
.mlcompass/
|
|
176
|
+
├── project.yaml # metadata
|
|
177
|
+
├── context.json # decisions, recommendations, active state
|
|
178
|
+
├── datasets/ # registered datasets
|
|
179
|
+
└── runs/ # training run history
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
This is what makes mlcompass more than a chat tool: by the time you
|
|
183
|
+
run `deploy`, every earlier decision is still in memory.
|
|
184
|
+
|
|
185
|
+
## Roadmap
|
|
186
|
+
|
|
187
|
+
| Phase | Commands | Status |
|
|
188
|
+
| --------------- | ------------------------------------- | :-----------: |
|
|
189
|
+
| **Faz 1 (v0.1)**| `init`, `advise` | 🚧 In progress |
|
|
190
|
+
| **Faz 2 (v0.2)**| `audit`, `watch`, `compare` | 📅 Planned |
|
|
191
|
+
| **Faz 3 (v0.3)**| `evaluate` | 📅 Planned |
|
|
192
|
+
| **Faz 4 (v0.4)**| `deploy` | 📅 Planned |
|
|
193
|
+
|
|
194
|
+
See [CHANGELOG.md](CHANGELOG.md) for detailed plans and
|
|
195
|
+
[ARCHITECTURE.md](ARCHITECTURE.md) for the design.
|
|
196
|
+
|
|
197
|
+
## Non-goals
|
|
198
|
+
|
|
199
|
+
To stay focused, mlcompass will **not** try to be:
|
|
200
|
+
|
|
201
|
+
- **AutoML** (use AutoGluon, AutoSklearn)
|
|
202
|
+
- **Experiment tracker** (use MLflow, W&B)
|
|
203
|
+
- **Code assistant** (use Cursor, Copilot, aider)
|
|
204
|
+
- **Monitoring dashboard** (use Grafana, Streamlit)
|
|
205
|
+
|
|
206
|
+
mlcompass **advises**; you decide.
|
|
207
|
+
|
|
208
|
+
## Contributing
|
|
209
|
+
|
|
210
|
+
Pre-alpha — issues and discussions welcome, PRs after v0.1.
|
|
211
|
+
|
|
212
|
+
## License
|
|
213
|
+
|
|
214
|
+
MIT © 2026 Hakan Sabunis
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mlcompass"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "An LLM agent that sits next to you through your whole ML pipeline"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Hakan Sabunis", email = "hakansabunis@gmail.com" },
|
|
10
|
+
]
|
|
11
|
+
keywords = [
|
|
12
|
+
"machine-learning",
|
|
13
|
+
"llm",
|
|
14
|
+
"agent",
|
|
15
|
+
"training",
|
|
16
|
+
"monitoring",
|
|
17
|
+
"claude",
|
|
18
|
+
"pytorch",
|
|
19
|
+
"tensorboard",
|
|
20
|
+
"wandb",
|
|
21
|
+
"mlops",
|
|
22
|
+
]
|
|
23
|
+
classifiers = [
|
|
24
|
+
"Development Status :: 3 - Alpha",
|
|
25
|
+
"Intended Audience :: Developers",
|
|
26
|
+
"Intended Audience :: Science/Research",
|
|
27
|
+
"License :: OSI Approved :: MIT License",
|
|
28
|
+
"Operating System :: OS Independent",
|
|
29
|
+
"Programming Language :: Python :: 3",
|
|
30
|
+
"Programming Language :: Python :: 3.10",
|
|
31
|
+
"Programming Language :: Python :: 3.11",
|
|
32
|
+
"Programming Language :: Python :: 3.12",
|
|
33
|
+
"Programming Language :: Python :: 3.13",
|
|
34
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
35
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
36
|
+
"Topic :: System :: Monitoring",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
dependencies = [
|
|
40
|
+
"agentlite-py>=0.2.0",
|
|
41
|
+
"watchdog>=4.0.0",
|
|
42
|
+
"rich>=13.0.0",
|
|
43
|
+
"click>=8.0.0",
|
|
44
|
+
"pyyaml>=6.0",
|
|
45
|
+
"pandas>=2.0.0",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.optional-dependencies]
|
|
49
|
+
tensorboard = ["tbparse>=0.0.7"]
|
|
50
|
+
wandb = ["wandb>=0.16.0"]
|
|
51
|
+
dev = [
|
|
52
|
+
"pytest>=7.0.0",
|
|
53
|
+
"pytest-cov>=4.0.0",
|
|
54
|
+
"ruff>=0.1.0",
|
|
55
|
+
"mypy>=1.0.0",
|
|
56
|
+
"tbparse>=0.0.7",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
[project.scripts]
|
|
60
|
+
mlcompass = "mlcompass.cli:main"
|
|
61
|
+
|
|
62
|
+
[project.urls]
|
|
63
|
+
Homepage = "https://github.com/hakansabunis/mlcompass"
|
|
64
|
+
Repository = "https://github.com/hakansabunis/mlcompass"
|
|
65
|
+
Issues = "https://github.com/hakansabunis/mlcompass/issues"
|
|
66
|
+
Changelog = "https://github.com/hakansabunis/mlcompass/blob/main/CHANGELOG.md"
|
|
67
|
+
|
|
68
|
+
[build-system]
|
|
69
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
70
|
+
build-backend = "setuptools.build_meta"
|
|
71
|
+
|
|
72
|
+
[tool.setuptools.packages.find]
|
|
73
|
+
where = ["src"]
|
|
74
|
+
|
|
75
|
+
[tool.setuptools.package-dir]
|
|
76
|
+
"" = "src"
|
|
77
|
+
|
|
78
|
+
[tool.ruff]
|
|
79
|
+
target-version = "py310"
|
|
80
|
+
line-length = 100
|
|
81
|
+
|
|
82
|
+
[tool.ruff.lint]
|
|
83
|
+
select = ["E", "F", "I", "W", "UP", "B", "SIM"]
|
|
84
|
+
|
|
85
|
+
[tool.mypy]
|
|
86
|
+
python_version = "3.10"
|
|
87
|
+
strict = true
|
|
88
|
+
warn_return_any = true
|
|
89
|
+
warn_unused_configs = true
|
|
90
|
+
|
|
91
|
+
[tool.pytest.ini_options]
|
|
92
|
+
testpaths = ["tests"]
|
|
93
|
+
python_files = ["test_*.py"]
|