scikit-lab 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scikit_lab-0.0.1/PKG-INFO +150 -0
- scikit_lab-0.0.1/README.md +135 -0
- scikit_lab-0.0.1/pyproject.toml +66 -0
- scikit_lab-0.0.1/src/sklab/__init__.py +1 -0
- scikit_lab-0.0.1/src/sklab/_lazy.py +37 -0
- scikit_lab-0.0.1/src/sklab/_logging/__init__.py +0 -0
- scikit_lab-0.0.1/src/sklab/_logging/mlflow.py +51 -0
- scikit_lab-0.0.1/src/sklab/_logging/noop.py +34 -0
- scikit_lab-0.0.1/src/sklab/_logging/wandb.py +59 -0
- scikit_lab-0.0.1/src/sklab/_results.py +77 -0
- scikit_lab-0.0.1/src/sklab/_search/__init__.py +1 -0
- scikit_lab-0.0.1/src/sklab/_search/optuna.py +208 -0
- scikit_lab-0.0.1/src/sklab/_search/sklearn.py +125 -0
- scikit_lab-0.0.1/src/sklab/adapters/__init__.py +1 -0
- scikit_lab-0.0.1/src/sklab/adapters/logging.py +40 -0
- scikit_lab-0.0.1/src/sklab/adapters/search.py +33 -0
- scikit_lab-0.0.1/src/sklab/experiment.py +322 -0
- scikit_lab-0.0.1/src/sklab/logging.py +9 -0
- scikit_lab-0.0.1/src/sklab/search.py +18 -0
- scikit_lab-0.0.1/src/sklab/type_aliases.py +91 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: scikit-lab
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
Requires-Dist: scikit-learn>=1.7.2
|
|
6
|
+
Requires-Dist: mlflow>=3.8.1 ; extra == 'mlflow'
|
|
7
|
+
Requires-Dist: cmaes>=0.11.1 ; extra == 'optuna'
|
|
8
|
+
Requires-Dist: optuna>=4.6.0 ; extra == 'optuna'
|
|
9
|
+
Requires-Dist: wandb>=0.23.1 ; extra == 'wandb'
|
|
10
|
+
Requires-Python: >=3.11
|
|
11
|
+
Provides-Extra: mlflow
|
|
12
|
+
Provides-Extra: optuna
|
|
13
|
+
Provides-Extra: wandb
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+

|
|
17
|
+
[](https://github.com/astral-sh/ruff)
|
|
18
|
+
[](https://github.com/astral-sh/ty)
|
|
19
|
+
|
|
20
|
+
# ๐งช sklab
|
|
21
|
+
|
|
22
|
+
A zero-boilerplate experiment runner for sklearn pipelines. One thing, done well: **run experiments**.
|
|
23
|
+
|
|
24
|
+
**The promise:** Give me a pipeline, I'll give you answers.
|
|
25
|
+
|
|
26
|
+
## What It Does
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from sklearn.pipeline import Pipeline
|
|
30
|
+
from sklearn.preprocessing import StandardScaler
|
|
31
|
+
from sklearn.linear_model import LogisticRegression
|
|
32
|
+
|
|
33
|
+
from sklab import Experiment
|
|
34
|
+
from sklab.search import GridSearchConfig
|
|
35
|
+
|
|
36
|
+
pipeline = Pipeline([
|
|
37
|
+
("scale", StandardScaler()),
|
|
38
|
+
("model", LogisticRegression()),
|
|
39
|
+
])
|
|
40
|
+
|
|
41
|
+
experiment = Experiment(
|
|
42
|
+
pipeline=pipeline,
|
|
43
|
+
scorers={"accuracy": "accuracy", "f1": "f1_macro"},
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
experiment.fit(X_train, y_train)
|
|
47
|
+
|
|
48
|
+
result = experiment.evaluate(X_test, y_test)
|
|
49
|
+
|
|
50
|
+
result = experiment.cross_validate(X, y, cv=5)
|
|
51
|
+
|
|
52
|
+
result = experiment.search(GridSearchConfig(param_grid={...}), X, y, cv=5)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## ๐ช Why
|
|
56
|
+
|
|
57
|
+
sklab wants to help data scientist avoid:
|
|
58
|
+
- Writing the same logging code for every experiment
|
|
59
|
+
- Forgetting to save predictions, then needing them later
|
|
60
|
+
- Copy-pasting matplotlib code for confusion matrices and ROC curves
|
|
61
|
+
- Getting a single number from `cross_val_score` with no insight into fold variance
|
|
62
|
+
|
|
63
|
+
Sklab removes this friction. Results include predictions, probabilities, and diagnostics automatically. Inject a logger once, everything gets tracked. No sprinkling `mlflow.log_*` through your code.
|
|
64
|
+
|
|
65
|
+
## Install
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
uv add scikit-lab
|
|
69
|
+
|
|
70
|
+
# With optional integrations
|
|
71
|
+
uv add "scikit-lab[optuna]" # Optuna search
|
|
72
|
+
uv add "scikit-lab[mlflow]" # MLflow logging
|
|
73
|
+
uv add "scikit-lab[wandb]" # W&B logging
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## ๐ค Contributing
|
|
77
|
+
|
|
78
|
+
### Philosophy
|
|
79
|
+
|
|
80
|
+
Documentation, code and abstraction strive to adhere to the following principles:
|
|
81
|
+
|
|
82
|
+
- **Be useful** โ Every feature solves a real pain point. If it doesn't help you iterate faster, it doesn't belong.
|
|
83
|
+
- **Provide value** โ Every line of code must earn its place. We ship what helps, not what's clever.
|
|
84
|
+
- **Abstractions, not obstructions** โ We remove tedium, not control. You can always drop down to raw sklearn.
|
|
85
|
+
- **Docs are code** โ Every code example runs. If the docs lie, the build fails.
|
|
86
|
+
- **No bloat** โ No distributed training, no deployment, no MLOps platform. Just experiments, done well.
|
|
87
|
+
- **Elegance stems from familiarity** โ The API feels like sklearn because sklearn got it right, and that's what everybody uses. Don't make people learn new abstractions.
|
|
88
|
+
- **A library, not a framework** โ Libraries use familiar concepts; frameworks invent new ones. Study what works in sklearn, HuggingFace, PyTorch - then adopt, don't reinvent. Every new abstraction must earn its place. Design slim wrappers users can see through.
|
|
89
|
+
|
|
90
|
+
### Coding guidelines
|
|
91
|
+
|
|
92
|
+
1. Disclose usage of AI Agents. You are free to use them to contribute. We strive to keep this codebase as agent-friendly as possible. However, you **must** own every line of code the agent writes. This means, as a starter, that you must be able to explain and justify the choice. No slop.
|
|
93
|
+
2. Start your feature request in the [discussions](https://github.com/baggiponte/sklab/discussions) tab. Once the core details are ironed out, we'll move it to the issue tracker.
|
|
94
|
+
3. Agents are encouraged to explore the [plans/](plans/) folder to get a sense of the big picture of the ongoing/relevant developments and to create a new plan if needed.
|
|
95
|
+
4. Code is now free to write. The value we bring is in the ideas, taste and judgment to assert the adherence to the principles above. Let's discuss thoroughly those ideas - including the final API - and code will follow naturally. In other words, treat code as an implementation detail, not as the end goal - this is, and always has been, the ideas we bring.
|
|
96
|
+
5. Keep changes small and reviewable
|
|
97
|
+
|
|
98
|
+
### Setup
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
uv sync
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Commands
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
just format # Ruff format
|
|
108
|
+
just test # Run tests with optuna extra
|
|
109
|
+
just lint # Ruff check + type check
|
|
110
|
+
just docs # Serve docs locally
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Testing
|
|
114
|
+
|
|
115
|
+
- **Docs are code**: every code fence in `docs/` is executed by pytest. If the docs lie, the build fails.
|
|
116
|
+
- Integration tests over mocks
|
|
117
|
+
- Fast and deterministic: small datasets, fixed seeds
|
|
118
|
+
|
|
119
|
+
### Writing Documentation
|
|
120
|
+
|
|
121
|
+
Documentation is a product. We ship it like one.
|
|
122
|
+
|
|
123
|
+
> **Docs teach. Code shows. Neither assumes.**
|
|
124
|
+
|
|
125
|
+
**The three principles:**
|
|
126
|
+
|
|
127
|
+
1. **Problem first, solution second.** Every explanation starts with *why* before *how*.
|
|
128
|
+
2. **Explain at point of use.** Don't front-load theory. Introduce concepts when the reader needs them.
|
|
129
|
+
3. **Link for depth, explain for correctness.** Provide enough context to use the feature correctly; link to authoritative sources for deeper dives.
|
|
130
|
+
|
|
131
|
+
**Quick rules:**
|
|
132
|
+
|
|
133
|
+
| Rule | Example |
|
|
134
|
+
|------|---------|
|
|
135
|
+
| Never assume | Don't say "avoid leakage"โexplain what leakage is |
|
|
136
|
+
| Start with the problem | "Most models have hyperparameters that need tuning..." not "Grid search evaluates..." |
|
|
137
|
+
| Show "what happened" | Explain what code did after each block |
|
|
138
|
+
| Provide decision tables | When to use X vs Y in table format |
|
|
139
|
+
| Include "why it matters" | Connect concepts to practical consequences |
|
|
140
|
+
| End with next steps | Link to related tutorials |
|
|
141
|
+
| Cite sources | Link to papers for algorithms |
|
|
142
|
+
|
|
143
|
+
**Code examples must:**
|
|
144
|
+
|
|
145
|
+
- Run without modification (tested by pytest)
|
|
146
|
+
- Show all imports
|
|
147
|
+
- Use sklearn's built-in datasets
|
|
148
|
+
- Set random seeds for reproducibility
|
|
149
|
+
|
|
150
|
+
See [docs/developer/writing-docs.md](docs/developer/writing-docs.md) for the full style guide.
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+

|
|
2
|
+
[](https://github.com/astral-sh/ruff)
|
|
3
|
+
[](https://github.com/astral-sh/ty)
|
|
4
|
+
|
|
5
|
+
# ๐งช sklab
|
|
6
|
+
|
|
7
|
+
A zero-boilerplate experiment runner for sklearn pipelines. One thing, done well: **run experiments**.
|
|
8
|
+
|
|
9
|
+
**The promise:** Give me a pipeline, I'll give you answers.
|
|
10
|
+
|
|
11
|
+
## What It Does
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from sklearn.pipeline import Pipeline
|
|
15
|
+
from sklearn.preprocessing import StandardScaler
|
|
16
|
+
from sklearn.linear_model import LogisticRegression
|
|
17
|
+
|
|
18
|
+
from sklab import Experiment
|
|
19
|
+
from sklab.search import GridSearchConfig
|
|
20
|
+
|
|
21
|
+
pipeline = Pipeline([
|
|
22
|
+
("scale", StandardScaler()),
|
|
23
|
+
("model", LogisticRegression()),
|
|
24
|
+
])
|
|
25
|
+
|
|
26
|
+
experiment = Experiment(
|
|
27
|
+
pipeline=pipeline,
|
|
28
|
+
scorers={"accuracy": "accuracy", "f1": "f1_macro"},
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
experiment.fit(X_train, y_train)
|
|
32
|
+
|
|
33
|
+
result = experiment.evaluate(X_test, y_test)
|
|
34
|
+
|
|
35
|
+
result = experiment.cross_validate(X, y, cv=5)
|
|
36
|
+
|
|
37
|
+
result = experiment.search(GridSearchConfig(param_grid={...}), X, y, cv=5)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## ๐ช Why
|
|
41
|
+
|
|
42
|
+
sklab wants to help data scientist avoid:
|
|
43
|
+
- Writing the same logging code for every experiment
|
|
44
|
+
- Forgetting to save predictions, then needing them later
|
|
45
|
+
- Copy-pasting matplotlib code for confusion matrices and ROC curves
|
|
46
|
+
- Getting a single number from `cross_val_score` with no insight into fold variance
|
|
47
|
+
|
|
48
|
+
Sklab removes this friction. Results include predictions, probabilities, and diagnostics automatically. Inject a logger once, everything gets tracked. No sprinkling `mlflow.log_*` through your code.
|
|
49
|
+
|
|
50
|
+
## Install
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
uv add scikit-lab
|
|
54
|
+
|
|
55
|
+
# With optional integrations
|
|
56
|
+
uv add "scikit-lab[optuna]" # Optuna search
|
|
57
|
+
uv add "scikit-lab[mlflow]" # MLflow logging
|
|
58
|
+
uv add "scikit-lab[wandb]" # W&B logging
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## ๐ค Contributing
|
|
62
|
+
|
|
63
|
+
### Philosophy
|
|
64
|
+
|
|
65
|
+
Documentation, code and abstraction strive to adhere to the following principles:
|
|
66
|
+
|
|
67
|
+
- **Be useful** โ Every feature solves a real pain point. If it doesn't help you iterate faster, it doesn't belong.
|
|
68
|
+
- **Provide value** โ Every line of code must earn its place. We ship what helps, not what's clever.
|
|
69
|
+
- **Abstractions, not obstructions** โ We remove tedium, not control. You can always drop down to raw sklearn.
|
|
70
|
+
- **Docs are code** โ Every code example runs. If the docs lie, the build fails.
|
|
71
|
+
- **No bloat** โ No distributed training, no deployment, no MLOps platform. Just experiments, done well.
|
|
72
|
+
- **Elegance stems from familiarity** โ The API feels like sklearn because sklearn got it right, and that's what everybody uses. Don't make people learn new abstractions.
|
|
73
|
+
- **A library, not a framework** โ Libraries use familiar concepts; frameworks invent new ones. Study what works in sklearn, HuggingFace, PyTorch - then adopt, don't reinvent. Every new abstraction must earn its place. Design slim wrappers users can see through.
|
|
74
|
+
|
|
75
|
+
### Coding guidelines
|
|
76
|
+
|
|
77
|
+
1. Disclose usage of AI Agents. You are free to use them to contribute. We strive to keep this codebase as agent-friendly as possible. However, you **must** own every line of code the agent writes. This means, as a starter, that you must be able to explain and justify the choice. No slop.
|
|
78
|
+
2. Start your feature request in the [discussions](https://github.com/baggiponte/sklab/discussions) tab. Once the core details are ironed out, we'll move it to the issue tracker.
|
|
79
|
+
3. Agents are encouraged to explore the [plans/](plans/) folder to get a sense of the big picture of the ongoing/relevant developments and to create a new plan if needed.
|
|
80
|
+
4. Code is now free to write. The value we bring is in the ideas, taste and judgment to assert the adherence to the principles above. Let's discuss thoroughly those ideas - including the final API - and code will follow naturally. In other words, treat code as an implementation detail, not as the end goal - this is, and always has been, the ideas we bring.
|
|
81
|
+
5. Keep changes small and reviewable
|
|
82
|
+
|
|
83
|
+
### Setup
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
uv sync
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Commands
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
just format # Ruff format
|
|
93
|
+
just test # Run tests with optuna extra
|
|
94
|
+
just lint # Ruff check + type check
|
|
95
|
+
just docs # Serve docs locally
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Testing
|
|
99
|
+
|
|
100
|
+
- **Docs are code**: every code fence in `docs/` is executed by pytest. If the docs lie, the build fails.
|
|
101
|
+
- Integration tests over mocks
|
|
102
|
+
- Fast and deterministic: small datasets, fixed seeds
|
|
103
|
+
|
|
104
|
+
### Writing Documentation
|
|
105
|
+
|
|
106
|
+
Documentation is a product. We ship it like one.
|
|
107
|
+
|
|
108
|
+
> **Docs teach. Code shows. Neither assumes.**
|
|
109
|
+
|
|
110
|
+
**The three principles:**
|
|
111
|
+
|
|
112
|
+
1. **Problem first, solution second.** Every explanation starts with *why* before *how*.
|
|
113
|
+
2. **Explain at point of use.** Don't front-load theory. Introduce concepts when the reader needs them.
|
|
114
|
+
3. **Link for depth, explain for correctness.** Provide enough context to use the feature correctly; link to authoritative sources for deeper dives.
|
|
115
|
+
|
|
116
|
+
**Quick rules:**
|
|
117
|
+
|
|
118
|
+
| Rule | Example |
|
|
119
|
+
|------|---------|
|
|
120
|
+
| Never assume | Don't say "avoid leakage"โexplain what leakage is |
|
|
121
|
+
| Start with the problem | "Most models have hyperparameters that need tuning..." not "Grid search evaluates..." |
|
|
122
|
+
| Show "what happened" | Explain what code did after each block |
|
|
123
|
+
| Provide decision tables | When to use X vs Y in table format |
|
|
124
|
+
| Include "why it matters" | Connect concepts to practical consequences |
|
|
125
|
+
| End with next steps | Link to related tutorials |
|
|
126
|
+
| Cite sources | Link to papers for algorithms |
|
|
127
|
+
|
|
128
|
+
**Code examples must:**
|
|
129
|
+
|
|
130
|
+
- Run without modification (tested by pytest)
|
|
131
|
+
- Show all imports
|
|
132
|
+
- Use sklearn's built-in datasets
|
|
133
|
+
- Set random seeds for reproducibility
|
|
134
|
+
|
|
135
|
+
See [docs/developer/writing-docs.md](docs/developer/writing-docs.md) for the full style guide.
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "scikit-lab"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "Add your description here"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"scikit-learn>=1.7.2",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
[project.optional-dependencies]
|
|
12
|
+
optuna = [
|
|
13
|
+
"cmaes>=0.11.1",
|
|
14
|
+
"optuna>=4.6.0",
|
|
15
|
+
]
|
|
16
|
+
wandb = [
|
|
17
|
+
"wandb>=0.23.1",
|
|
18
|
+
]
|
|
19
|
+
mlflow = [
|
|
20
|
+
"mlflow>=3.8.1",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[dependency-groups]
|
|
24
|
+
dev = [
|
|
25
|
+
"just>=0.8.165",
|
|
26
|
+
"prek>=0.2.27",
|
|
27
|
+
"ruff>=0.14.11",
|
|
28
|
+
]
|
|
29
|
+
test = [
|
|
30
|
+
"scikit-lab[optuna,wandb,mlflow]",
|
|
31
|
+
"pandas>=2.3.3",
|
|
32
|
+
"polars>=1.37.0",
|
|
33
|
+
"pytest-markdown-docs>=0.9.0",
|
|
34
|
+
"pytest>=9.0.2",
|
|
35
|
+
]
|
|
36
|
+
docs = [
|
|
37
|
+
"mkdocstrings-python",
|
|
38
|
+
"zensical",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[tool.uv]
|
|
42
|
+
default-groups = ["dev", "docs", "test"]
|
|
43
|
+
|
|
44
|
+
[tool.uv.build-backend]
|
|
45
|
+
module-name = "sklab"
|
|
46
|
+
|
|
47
|
+
[tool.pytest.ini_options]
|
|
48
|
+
addopts = ["--import-mode=importlib", "--markdown-docs"]
|
|
49
|
+
testpaths = ["tests", "docs"]
|
|
50
|
+
filterwarnings = [
|
|
51
|
+
"ignore:The `Scope.user` setter is deprecated:DeprecationWarning:wandb\\..*",
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
[tool.ruff]
|
|
55
|
+
line-length = 88
|
|
56
|
+
|
|
57
|
+
[tool.ruff.lint]
|
|
58
|
+
extend-select = ["I", "UP", "TID"]
|
|
59
|
+
fixable = ["I", "UP", "TID", "F401"]
|
|
60
|
+
|
|
61
|
+
[tool.ruff.lint.flake8-tidy-imports]
|
|
62
|
+
ban-relative-imports = "all"
|
|
63
|
+
|
|
64
|
+
[build-system]
|
|
65
|
+
requires = ["uv_build>=0.9.22,<0.10.0"]
|
|
66
|
+
build-backend = "uv_build"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""sklab: a library for machine learning experimentation."""
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Lazy module loading for optional dependencies."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from importlib import import_module
|
|
6
|
+
from types import ModuleType
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LazyModule:
|
|
11
|
+
"""Deferred module import - loads on first attribute access.
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
mlflow = LazyModule("mlflow", install_hint="Install mlflow to use MLflowLogger.")
|
|
15
|
+
|
|
16
|
+
# Later, in any method:
|
|
17
|
+
mlflow.log_params(...) # Import happens here, on first access
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, name: str, *, install_hint: str) -> None:
|
|
21
|
+
self._name = name
|
|
22
|
+
self._install_hint = install_hint
|
|
23
|
+
self._module: ModuleType | None = None
|
|
24
|
+
|
|
25
|
+
def __getattr__(self, attr: str) -> Any:
|
|
26
|
+
if self._module is None:
|
|
27
|
+
try:
|
|
28
|
+
self._module = import_module(self._name)
|
|
29
|
+
except ModuleNotFoundError as exc:
|
|
30
|
+
raise ModuleNotFoundError(
|
|
31
|
+
f"{self._name} is not installed. {self._install_hint}"
|
|
32
|
+
) from exc
|
|
33
|
+
return getattr(self._module, attr)
|
|
34
|
+
|
|
35
|
+
def __repr__(self) -> str:
|
|
36
|
+
status = "loaded" if self._module else "not loaded"
|
|
37
|
+
return f"<LazyModule {self._name!r} ({status})>"
|
|
File without changes
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""MLflow logger."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from sklab._lazy import LazyModule
|
|
10
|
+
|
|
11
|
+
mlflow = LazyModule("mlflow", install_hint="Install mlflow to use MLflowLogger.")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class MLflowLogger:
|
|
16
|
+
"""Logger that tracks experiments with MLflow.
|
|
17
|
+
|
|
18
|
+
MLflow uses module-level functions that operate on the active run,
|
|
19
|
+
so we don't need to store run state.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
experiment_name: str | None = None
|
|
23
|
+
|
|
24
|
+
@contextmanager
|
|
25
|
+
def start_run(self, name=None, config=None, tags=None, nested=False):
|
|
26
|
+
if self.experiment_name:
|
|
27
|
+
mlflow.set_experiment(self.experiment_name)
|
|
28
|
+
with mlflow.start_run(run_name=name, nested=nested):
|
|
29
|
+
if config:
|
|
30
|
+
self.log_params(config)
|
|
31
|
+
if tags:
|
|
32
|
+
self.set_tags(tags)
|
|
33
|
+
yield self
|
|
34
|
+
|
|
35
|
+
def log_params(self, params) -> None:
|
|
36
|
+
mlflow.log_params(dict(params))
|
|
37
|
+
|
|
38
|
+
def log_metrics(self, metrics, step: int | None = None) -> None:
|
|
39
|
+
mlflow.log_metrics(dict(metrics), step=step)
|
|
40
|
+
|
|
41
|
+
def set_tags(self, tags) -> None:
|
|
42
|
+
mlflow.set_tags(dict(tags))
|
|
43
|
+
|
|
44
|
+
def log_artifact(self, path: str, name: str | None = None) -> None:
|
|
45
|
+
if name is None:
|
|
46
|
+
mlflow.log_artifact(path)
|
|
47
|
+
else:
|
|
48
|
+
mlflow.log_artifact(path, name=name)
|
|
49
|
+
|
|
50
|
+
def log_model(self, model: Any, name: str | None = None) -> None:
|
|
51
|
+
mlflow.sklearn.log_model(model, name=name or "model")
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""No-op logger that drops all logging calls."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class NoOpLogger:
|
|
12
|
+
"""Logger that drops all logging calls.
|
|
13
|
+
|
|
14
|
+
Useful as the default logger when no external tracking backend is configured.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
@contextmanager
|
|
18
|
+
def start_run(self, name=None, config=None, tags=None, nested=False):
|
|
19
|
+
yield self
|
|
20
|
+
|
|
21
|
+
def log_params(self, params) -> None:
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
def log_metrics(self, metrics, step: int | None = None) -> None:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
def set_tags(self, tags) -> None:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
def log_artifact(self, path: str, name: str | None = None) -> None:
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
def log_model(self, model: Any, name: str | None = None) -> None:
|
|
34
|
+
pass
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Weights & Biases logger."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from sklab._lazy import LazyModule
|
|
10
|
+
|
|
11
|
+
wandb = LazyModule(
|
|
12
|
+
"wandb", install_hint="Install scikit-lab with the 'wandb' extra."
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class WandbLogger:
|
|
18
|
+
"""Logger that tracks experiments with Weights & Biases.
|
|
19
|
+
|
|
20
|
+
W&B requires calling methods on the run object, so we store
|
|
21
|
+
a reference to the active run in `self._run`.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
project: str | None = None
|
|
25
|
+
entity: str | None = None
|
|
26
|
+
_run: Any = field(default=None, init=False, repr=False)
|
|
27
|
+
|
|
28
|
+
@contextmanager
|
|
29
|
+
def start_run(self, name=None, config=None, tags=None, nested=False):
|
|
30
|
+
with wandb.init(
|
|
31
|
+
project=self.project,
|
|
32
|
+
entity=self.entity,
|
|
33
|
+
name=name,
|
|
34
|
+
config=config or {},
|
|
35
|
+
tags=list(tags.values()) if tags else None,
|
|
36
|
+
reinit=nested,
|
|
37
|
+
) as run:
|
|
38
|
+
self._run = run
|
|
39
|
+
yield self
|
|
40
|
+
self._run = None
|
|
41
|
+
|
|
42
|
+
def log_params(self, params) -> None:
|
|
43
|
+
self._run.config.update(dict(params), allow_val_change=True)
|
|
44
|
+
|
|
45
|
+
def log_metrics(self, metrics, step: int | None = None) -> None:
|
|
46
|
+
self._run.log(dict(metrics), step=step)
|
|
47
|
+
|
|
48
|
+
def set_tags(self, tags) -> None:
|
|
49
|
+
existing = set(self._run.tags or [])
|
|
50
|
+
self._run.tags = sorted(existing | set(tags.values()))
|
|
51
|
+
|
|
52
|
+
def log_artifact(self, path: str, name: str | None = None) -> None:
|
|
53
|
+
artifact = wandb.Artifact(name or "artifact", type="file")
|
|
54
|
+
artifact.add_file(path)
|
|
55
|
+
self._run.log_artifact(artifact)
|
|
56
|
+
|
|
57
|
+
def log_model(self, model: Any, name: str | None = None) -> None:
|
|
58
|
+
if isinstance(model, str):
|
|
59
|
+
self.log_artifact(model, name=name or "model")
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Result dataclasses returned by Experiment methods."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Generic, TypeVar
|
|
8
|
+
|
|
9
|
+
RawT = TypeVar("RawT")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(slots=True)
|
|
13
|
+
class FitResult:
|
|
14
|
+
"""Result of a single fit run.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
estimator: The fitted pipeline/estimator.
|
|
18
|
+
metrics: Empty dict (fit doesn't compute metrics).
|
|
19
|
+
params: Merged parameters used for fitting.
|
|
20
|
+
raw: The fitted estimator (same as estimator, for API consistency).
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
estimator: Any
|
|
24
|
+
metrics: Mapping[str, float]
|
|
25
|
+
params: Mapping[str, Any]
|
|
26
|
+
raw: Any
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(slots=True)
|
|
30
|
+
class EvalResult:
|
|
31
|
+
"""Result of evaluating a fitted estimator on a dataset.
|
|
32
|
+
|
|
33
|
+
Attributes:
|
|
34
|
+
metrics: Computed metric scores.
|
|
35
|
+
raw: The metrics dict (same as metrics, for API consistency).
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
metrics: Mapping[str, float]
|
|
39
|
+
raw: Mapping[str, float]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(slots=True)
|
|
43
|
+
class CVResult:
|
|
44
|
+
"""Result of a cross-validation run.
|
|
45
|
+
|
|
46
|
+
Attributes:
|
|
47
|
+
metrics: Aggregated metrics (mean/std across folds).
|
|
48
|
+
fold_metrics: Per-fold metric values.
|
|
49
|
+
estimator: Final refitted estimator (if refit=True), else None.
|
|
50
|
+
raw: Full sklearn cross_validate() dict, including fit_time,
|
|
51
|
+
score_time, and test scores for each fold.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
metrics: Mapping[str, float]
|
|
55
|
+
fold_metrics: Mapping[str, list[float]]
|
|
56
|
+
estimator: Any | None
|
|
57
|
+
raw: Mapping[str, Any]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass(slots=True)
|
|
61
|
+
class SearchResult(Generic[RawT]):
|
|
62
|
+
"""Result of a hyperparameter search run.
|
|
63
|
+
|
|
64
|
+
Attributes:
|
|
65
|
+
best_params: Best hyperparameters found.
|
|
66
|
+
best_score: Best cross-validation score achieved.
|
|
67
|
+
estimator: Best estimator refitted on full data (if refit=True).
|
|
68
|
+
raw: The underlying search object. For OptunaConfig, this is the
|
|
69
|
+
Optuna Study with full trial history. For sklearn searchers
|
|
70
|
+
(GridSearchCV, RandomizedSearchCV), this is the fitted searcher
|
|
71
|
+
with cv_results_ and other attributes.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
best_params: Mapping[str, Any]
|
|
75
|
+
best_score: float | None
|
|
76
|
+
estimator: Any | None
|
|
77
|
+
raw: RawT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Contains the implementations of the configurations of hyperparameter search classes."""
|