modern-fm 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modern_fm-0.2.1/LICENSE +21 -0
- modern_fm-0.2.1/PKG-INFO +117 -0
- modern_fm-0.2.1/README.md +86 -0
- modern_fm-0.2.1/pyproject.toml +55 -0
- modern_fm-0.2.1/python/modern_fm/__init__.py +17 -0
- modern_fm-0.2.1/python/modern_fm/_backend.py +259 -0
- modern_fm-0.2.1/python/modern_fm/_base.py +87 -0
- modern_fm-0.2.1/python/modern_fm/_early_stop.py +61 -0
- modern_fm-0.2.1/python/modern_fm/_reference.py +116 -0
- modern_fm-0.2.1/python/modern_fm/_reference_train.py +827 -0
- modern_fm-0.2.1/python/modern_fm/_typing.py +9 -0
- modern_fm-0.2.1/python/modern_fm/ffm.py +302 -0
- modern_fm-0.2.1/python/modern_fm/fm.py +555 -0
- modern_fm-0.2.1/python/modern_fm/losses.py +68 -0
- modern_fm-0.2.1/python/modern_fm/preprocessing.py +95 -0
- modern_fm-0.2.1/rust/Cargo.lock +315 -0
- modern_fm-0.2.1/rust/Cargo.toml +22 -0
- modern_fm-0.2.1/rust/src/data.rs +83 -0
- modern_fm-0.2.1/rust/src/ffm.rs +497 -0
- modern_fm-0.2.1/rust/src/fm.rs +503 -0
- modern_fm-0.2.1/rust/src/lib.rs +508 -0
- modern_fm-0.2.1/rust/src/optimizer.rs +274 -0
modern_fm-0.2.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Masaya Kawamata
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
modern_fm-0.2.1/PKG-INFO
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: modern-fm
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Classifier: Development Status :: 4 - Beta
|
|
5
|
+
Classifier: Intended Audience :: Science/Research
|
|
6
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
12
|
+
Classifier: Programming Language :: Rust
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Requires-Dist: numpy>=1.24
|
|
16
|
+
Requires-Dist: scipy>=1.10
|
|
17
|
+
Requires-Dist: pytest>=7 ; extra == 'dev'
|
|
18
|
+
Requires-Dist: ruff>=0.4 ; extra == 'dev'
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Summary: Fast, sklearn-compatible Factorization Machines and Field-aware Factorization Machines
|
|
22
|
+
Keywords: factorization-machines,ffm,ctr,recommender,tabular
|
|
23
|
+
Author: Masaya Kawamata
|
|
24
|
+
License: MIT
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
27
|
+
Project-URL: Homepage, https://github.com/Matapanino/modern_fm
|
|
28
|
+
Project-URL: Issues, https://github.com/Matapanino/modern_fm/issues
|
|
29
|
+
Project-URL: Repository, https://github.com/Matapanino/modern_fm
|
|
30
|
+
|
|
31
|
+
# modern_fm
|
|
32
|
+
|
|
33
|
+
Fast, sklearn-compatible Factorization Machines (FM) and Field-aware
|
|
34
|
+
Factorization Machines (FFM) for Python.
|
|
35
|
+
|
|
36
|
+
**Status: v0.2 (Beta).** A Rust CPU backend (parity-tested against pure-NumPy
|
|
37
|
+
reference implementations) drives sklearn-style estimators — `FMClassifier`
|
|
38
|
+
(binary + multiclass softmax), `FMRegressor`, and `FFMClassifier`
|
|
39
|
+
(binary + multiclass softmax) — with the SGD / AdaGrad / Adam /
|
|
40
|
+
**FTRL-Proximal** optimizers, **mini-batch**
|
|
41
|
+
gradient averaging (`batch_size`), **multi-core training** via `rayon`
|
|
42
|
+
(`n_jobs`), plus `sample_weight`/`class_weight`, `label_smoothing`, early
|
|
43
|
+
stopping, a `CategoricalEncoder`, and `save_model`/`load_model`. FTRL's L1
|
|
44
|
+
(`l1_linear`/`l1_factors`) yields exact-zero weights. See `docs/roadmap.md` for
|
|
45
|
+
remaining niche gaps (FTRL + early stopping, multiclass + early-stopping for FFM).
|
|
46
|
+
|
|
47
|
+
## Installation
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install modern-fm # once published; prebuilt wheels, no Rust needed
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Until the first PyPI release, install from source (requires a Rust toolchain;
|
|
54
|
+
see Development below).
|
|
55
|
+
|
|
56
|
+
## Usage
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from modern_fm import FMClassifier, FFMClassifier
|
|
60
|
+
|
|
61
|
+
model = FMClassifier(
|
|
62
|
+
n_factors=16,
|
|
63
|
+
optimizer="adagrad",
|
|
64
|
+
learning_rate=0.05,
|
|
65
|
+
max_iter=100,
|
|
66
|
+
batch_size=256, # mini-batch gradient averaging (1 = per-row SGD)
|
|
67
|
+
n_jobs=-1, # train batches across all CPU cores
|
|
68
|
+
l2_linear=1e-5,
|
|
69
|
+
l2_factors=1e-5,
|
|
70
|
+
random_state=42,
|
|
71
|
+
)
|
|
72
|
+
model.fit(X_train, y_train)
|
|
73
|
+
proba = model.predict_proba(X_test)
|
|
74
|
+
|
|
75
|
+
# FTRL-Proximal with L1 for sparse linear weights (classic CTR setup)
|
|
76
|
+
sparse = FMClassifier(optimizer="ftrl", l1_linear=1.0, batch_size=256, random_state=42)
|
|
77
|
+
sparse.fit(X_train, y_train)
|
|
78
|
+
|
|
79
|
+
ffm = FFMClassifier(n_factors=8, n_jobs=-1, random_state=42)
|
|
80
|
+
ffm.fit(X_train, y_train, field_ids=field_ids)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
`FMRegressor`, multiclass `FMClassifier` (just pass a target with >2 classes),
|
|
84
|
+
early stopping (`early_stopping=True` or `eval_set=(X_val, y_val)`), and the
|
|
85
|
+
`CategoricalEncoder` are demonstrated in `examples/basic_usage.py`.
|
|
86
|
+
`benchmarks/bench_synthetic.py` reports fit time and predict throughput against
|
|
87
|
+
the NumPy reference floor.
|
|
88
|
+
|
|
89
|
+
## Development
|
|
90
|
+
|
|
91
|
+
Requires Python >= 3.10 and a recent Rust toolchain (1.74+; `rustup update`).
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
python3 -m venv .venv
|
|
95
|
+
.venv/bin/pip install -e ".[dev]" # builds the Rust extension via maturin
|
|
96
|
+
.venv/bin/pytest -q
|
|
97
|
+
.venv/bin/ruff check .
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
`pip install -e .` compiles `rust/` and installs the extension as
|
|
101
|
+
`modern_fm._rust` (maturin mixed layout, config in `pyproject.toml`).
|
|
102
|
+
After editing Rust code, re-run `pip install -e .` to rebuild. Rust-only
|
|
103
|
+
checks:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
cd rust
|
|
107
|
+
PYO3_PYTHON=$PWD/../.venv/bin/python3 cargo test
|
|
108
|
+
PYO3_PYTHON=$PWD/../.venv/bin/python3 cargo clippy
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Without the extension built, the package still works: `modern_fm._backend`
|
|
112
|
+
falls back to the pure-NumPy reference implementations, and the parity tests
|
|
113
|
+
in `tests/test_rust_parity.py` are skipped.
|
|
114
|
+
|
|
115
|
+
Design documents live in `docs/` — start with `docs/requirements.md` and
|
|
116
|
+
`docs/math_spec.md`. The roadmap is in `docs/roadmap.md`.
|
|
117
|
+
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# modern_fm
|
|
2
|
+
|
|
3
|
+
Fast, sklearn-compatible Factorization Machines (FM) and Field-aware
|
|
4
|
+
Factorization Machines (FFM) for Python.
|
|
5
|
+
|
|
6
|
+
**Status: v0.2 (Beta).** A Rust CPU backend (parity-tested against pure-NumPy
|
|
7
|
+
reference implementations) drives sklearn-style estimators — `FMClassifier`
|
|
8
|
+
(binary + multiclass softmax), `FMRegressor`, and `FFMClassifier`
|
|
9
|
+
(binary + multiclass softmax) — with the SGD / AdaGrad / Adam /
|
|
10
|
+
**FTRL-Proximal** optimizers, **mini-batch**
|
|
11
|
+
gradient averaging (`batch_size`), **multi-core training** via `rayon`
|
|
12
|
+
(`n_jobs`), plus `sample_weight`/`class_weight`, `label_smoothing`, early
|
|
13
|
+
stopping, a `CategoricalEncoder`, and `save_model`/`load_model`. FTRL's L1
|
|
14
|
+
(`l1_linear`/`l1_factors`) yields exact-zero weights. See `docs/roadmap.md` for
|
|
15
|
+
remaining niche gaps (FTRL + early stopping, multiclass + early-stopping for FFM).
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install modern-fm # once published; prebuilt wheels, no Rust needed
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Until the first PyPI release, install from source (requires a Rust toolchain;
|
|
24
|
+
see Development below).
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from modern_fm import FMClassifier, FFMClassifier
|
|
30
|
+
|
|
31
|
+
model = FMClassifier(
|
|
32
|
+
n_factors=16,
|
|
33
|
+
optimizer="adagrad",
|
|
34
|
+
learning_rate=0.05,
|
|
35
|
+
max_iter=100,
|
|
36
|
+
batch_size=256, # mini-batch gradient averaging (1 = per-row SGD)
|
|
37
|
+
n_jobs=-1, # train batches across all CPU cores
|
|
38
|
+
l2_linear=1e-5,
|
|
39
|
+
l2_factors=1e-5,
|
|
40
|
+
random_state=42,
|
|
41
|
+
)
|
|
42
|
+
model.fit(X_train, y_train)
|
|
43
|
+
proba = model.predict_proba(X_test)
|
|
44
|
+
|
|
45
|
+
# FTRL-Proximal with L1 for sparse linear weights (classic CTR setup)
|
|
46
|
+
sparse = FMClassifier(optimizer="ftrl", l1_linear=1.0, batch_size=256, random_state=42)
|
|
47
|
+
sparse.fit(X_train, y_train)
|
|
48
|
+
|
|
49
|
+
ffm = FFMClassifier(n_factors=8, n_jobs=-1, random_state=42)
|
|
50
|
+
ffm.fit(X_train, y_train, field_ids=field_ids)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
`FMRegressor`, multiclass `FMClassifier` (just pass a target with >2 classes),
|
|
54
|
+
early stopping (`early_stopping=True` or `eval_set=(X_val, y_val)`), and the
|
|
55
|
+
`CategoricalEncoder` are demonstrated in `examples/basic_usage.py`.
|
|
56
|
+
`benchmarks/bench_synthetic.py` reports fit time and predict throughput against
|
|
57
|
+
the NumPy reference floor.
|
|
58
|
+
|
|
59
|
+
## Development
|
|
60
|
+
|
|
61
|
+
Requires Python >= 3.10 and a recent Rust toolchain (1.74+; `rustup update`).
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
python3 -m venv .venv
|
|
65
|
+
.venv/bin/pip install -e ".[dev]" # builds the Rust extension via maturin
|
|
66
|
+
.venv/bin/pytest -q
|
|
67
|
+
.venv/bin/ruff check .
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
`pip install -e .` compiles `rust/` and installs the extension as
|
|
71
|
+
`modern_fm._rust` (maturin mixed layout, config in `pyproject.toml`).
|
|
72
|
+
After editing Rust code, re-run `pip install -e .` to rebuild. Rust-only
|
|
73
|
+
checks:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
cd rust
|
|
77
|
+
PYO3_PYTHON=$PWD/../.venv/bin/python3 cargo test
|
|
78
|
+
PYO3_PYTHON=$PWD/../.venv/bin/python3 cargo clippy
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Without the extension built, the package still works: `modern_fm._backend`
|
|
82
|
+
falls back to the pure-NumPy reference implementations, and the parity tests
|
|
83
|
+
in `tests/test_rust_parity.py` are skipped.
|
|
84
|
+
|
|
85
|
+
Design documents live in `docs/` — start with `docs/requirements.md` and
|
|
86
|
+
`docs/math_spec.md`. The roadmap is in `docs/roadmap.md`.
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["maturin>=1.7,<2"]
|
|
3
|
+
build-backend = "maturin"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "modern-fm"
|
|
7
|
+
version = "0.2.1"
|
|
8
|
+
description = "Fast, sklearn-compatible Factorization Machines and Field-aware Factorization Machines"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Masaya Kawamata" }]
|
|
13
|
+
keywords = ["factorization-machines", "ffm", "ctr", "recommender", "tabular"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Programming Language :: Rust",
|
|
24
|
+
"Operating System :: OS Independent",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"numpy>=1.24",
|
|
29
|
+
"scipy>=1.10",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
dev = ["pytest>=7", "ruff>=0.4"]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/Matapanino/modern_fm"
|
|
37
|
+
Repository = "https://github.com/Matapanino/modern_fm"
|
|
38
|
+
Issues = "https://github.com/Matapanino/modern_fm/issues"
|
|
39
|
+
|
|
40
|
+
[tool.maturin]
|
|
41
|
+
manifest-path = "rust/Cargo.toml"
|
|
42
|
+
python-source = "python"
|
|
43
|
+
module-name = "modern_fm._rust"
|
|
44
|
+
features = ["pyo3/extension-module"]
|
|
45
|
+
include = [{ path = "LICENSE", format = "sdist" }]
|
|
46
|
+
|
|
47
|
+
[tool.pytest.ini_options]
|
|
48
|
+
testpaths = ["tests"]
|
|
49
|
+
|
|
50
|
+
[tool.ruff]
|
|
51
|
+
line-length = 100
|
|
52
|
+
target-version = "py310"
|
|
53
|
+
|
|
54
|
+
[tool.ruff.lint]
|
|
55
|
+
select = ["E", "F", "W", "I", "NPY", "UP"]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""modern_fm: fast, sklearn-compatible FM / FFM for Python."""
|
|
2
|
+
|
|
3
|
+
from ._base import NotFittedError
|
|
4
|
+
from .ffm import FFMClassifier
|
|
5
|
+
from .fm import FMClassifier, FMRegressor
|
|
6
|
+
from .preprocessing import CategoricalEncoder
|
|
7
|
+
|
|
8
|
+
__version__ = "0.2.1"
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"FMClassifier",
|
|
12
|
+
"FMRegressor",
|
|
13
|
+
"FFMClassifier",
|
|
14
|
+
"CategoricalEncoder",
|
|
15
|
+
"NotFittedError",
|
|
16
|
+
"__version__",
|
|
17
|
+
]
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""Backend dispatch: Rust extension when built, NumPy reference otherwise.
|
|
2
|
+
|
|
3
|
+
Private module. The NumPy implementations in `_reference` remain the ground
|
|
4
|
+
truth; the Rust extension (`modern_fm._rust`, built via maturin) is an
|
|
5
|
+
optimized drop-in whose parity is enforced by tests/test_rust_parity.py.
|
|
6
|
+
|
|
7
|
+
Both prediction and training are dispatched here (FM/FFM predict, FM binary and
|
|
8
|
+
multiclass-softmax training, FFM training); training parity with the reference
|
|
9
|
+
trainers is enforced by tests/test_rust_train_parity.py.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import scipy.sparse as sp
|
|
16
|
+
|
|
17
|
+
from . import _reference, _reference_train
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
from . import _rust
|
|
21
|
+
except ImportError: # extension not built — pure-Python install
|
|
22
|
+
_rust = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def has_rust():
|
|
26
|
+
return _rust is not None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _prep_dense(X):
|
|
30
|
+
return np.ascontiguousarray(X, dtype=np.float64)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _prep_vec(a, dtype=np.float64):
|
|
34
|
+
return np.ascontiguousarray(a, dtype=dtype)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _prep_csr(X):
|
|
38
|
+
X = X.tocsr().astype(np.float64)
|
|
39
|
+
X.sum_duplicates()
|
|
40
|
+
return (
|
|
41
|
+
np.ascontiguousarray(X.indptr, dtype=np.int64),
|
|
42
|
+
np.ascontiguousarray(X.indices, dtype=np.int64),
|
|
43
|
+
np.ascontiguousarray(X.data, dtype=np.float64),
|
|
44
|
+
X.shape[1],
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def fm_predict_fast(X, w0, w, V):
|
|
49
|
+
"""FM prediction (math_spec.md), Rust-accelerated when available."""
|
|
50
|
+
if _rust is None:
|
|
51
|
+
return _reference.fm_predict_fast(X, w0, w, V)
|
|
52
|
+
w = _prep_vec(w)
|
|
53
|
+
V = _prep_dense(V)
|
|
54
|
+
if sp.issparse(X):
|
|
55
|
+
indptr, indices, data, n_features = _prep_csr(X)
|
|
56
|
+
return _rust.fm_predict_fast_csr(indptr, indices, data, n_features, float(w0), w, V)
|
|
57
|
+
return _rust.fm_predict_fast_dense(_prep_dense(X), float(w0), w, V)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def ffm_predict(X, field_ids, w0, w, V):
|
|
61
|
+
"""FFM prediction (math_spec.md), Rust-accelerated when available."""
|
|
62
|
+
if _rust is None:
|
|
63
|
+
return _reference.ffm_predict(X, field_ids, w0, w, V)
|
|
64
|
+
field_ids = _prep_vec(field_ids, dtype=np.int64)
|
|
65
|
+
w = _prep_vec(w)
|
|
66
|
+
V = _prep_dense(V)
|
|
67
|
+
if sp.issparse(X):
|
|
68
|
+
indptr, indices, data, n_features = _prep_csr(X)
|
|
69
|
+
return _rust.ffm_predict_csr(
|
|
70
|
+
indptr, indices, data, n_features, field_ids, float(w0), w, V
|
|
71
|
+
)
|
|
72
|
+
return _rust.ffm_predict_dense(_prep_dense(X), field_ids, float(w0), w, V)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _prep_fit(X, y, params, row_orders):
|
|
76
|
+
"""Common coercion for the Rust fit entry points.
|
|
77
|
+
|
|
78
|
+
Dense X is converted to CSR (exact zeros are skipped either way, matching
|
|
79
|
+
the reference). Returns fresh float64 copies of w and V that the Rust
|
|
80
|
+
kernel mutates in place; the caller's `params` are left untouched.
|
|
81
|
+
"""
|
|
82
|
+
w0, w, V = params
|
|
83
|
+
w = np.array(w, dtype=np.float64, order="C", copy=True)
|
|
84
|
+
V = np.array(V, dtype=np.float64, order="C", copy=True)
|
|
85
|
+
y = _prep_vec(y)
|
|
86
|
+
row_orders = np.ascontiguousarray(row_orders, dtype=np.int64)
|
|
87
|
+
if row_orders.ndim == 1:
|
|
88
|
+
row_orders = row_orders[None, :]
|
|
89
|
+
Xc = X if sp.issparse(X) else sp.csr_matrix(np.asarray(X, dtype=np.float64))
|
|
90
|
+
return _prep_csr(Xc), y, float(w0), w, V, row_orders
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _acc_arrays(state, w, V):
|
|
94
|
+
"""AdaGrad accumulators (acc_w0, acc_w, acc_v) from `state` or fresh zeros.
|
|
95
|
+
|
|
96
|
+
`state` (a mutable list, for the epoch-driven early-stopping path) persists
|
|
97
|
+
the accumulators across calls; None means a single all-epochs run.
|
|
98
|
+
"""
|
|
99
|
+
if state is None:
|
|
100
|
+
return 0.0, np.zeros(len(w)), np.zeros_like(V)
|
|
101
|
+
acc_w0, acc_w, acc_v = state
|
|
102
|
+
return float(acc_w0), _prep_vec(acc_w), _prep_dense(acc_v)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def fm_fit(
|
|
106
|
+
X, y, params, *, loss, optimizer, learning_rate, l2_linear, l2_factors, row_orders,
|
|
107
|
+
l1_linear=0.0, l1_factors=0.0, beta_1=0.9, beta_2=0.999, epsilon=1e-8, ftrl_beta=1.0,
|
|
108
|
+
batch_size=1, n_jobs=1, sample_weight=None, state=None, adam_state=None,
|
|
109
|
+
):
|
|
110
|
+
"""Train an FM (docs/optimization_spec.md).
|
|
111
|
+
|
|
112
|
+
`params` = (w0, w, V) initial values (unchanged); returns new float64
|
|
113
|
+
(w0, w, V). `sample_weight` scales each row's gradient (None -> all ones).
|
|
114
|
+
`batch_size` averages each batch's gradient (batch_size=1 is per-row).
|
|
115
|
+
`n_jobs` (>= 1) splits each batch across that many rayon threads; n_jobs=1
|
|
116
|
+
is the serial path matching the reference. `beta_1`/`beta_2`/`epsilon` apply
|
|
117
|
+
only when optimizer == "adam". `state` carries AdaGrad accumulators in/out
|
|
118
|
+
for epoch-by-epoch training. `adam_state` does the same for Adam moments;
|
|
119
|
+
because the Rust kernel keeps Adam state internal, Adam + early stopping
|
|
120
|
+
(adam_state given) runs on the NumPy reference path. The reference fallback
|
|
121
|
+
is always serial (it is the n_jobs=1 ground truth).
|
|
122
|
+
"""
|
|
123
|
+
if _rust is None or adam_state is not None:
|
|
124
|
+
return _reference_train.fm_fit_reference(
|
|
125
|
+
X, y, params, loss=loss, optimizer=optimizer, learning_rate=learning_rate,
|
|
126
|
+
l2_linear=l2_linear, l2_factors=l2_factors, l1_linear=l1_linear,
|
|
127
|
+
l1_factors=l1_factors, row_orders=row_orders, beta_1=beta_1, beta_2=beta_2,
|
|
128
|
+
epsilon=epsilon, ftrl_beta=ftrl_beta, batch_size=batch_size,
|
|
129
|
+
sample_weight=sample_weight, state=state, adam_state=adam_state,
|
|
130
|
+
)
|
|
131
|
+
(indptr, indices, data, n_features), y, w0, w, V, row_orders = _prep_fit(
|
|
132
|
+
X, y, params, row_orders
|
|
133
|
+
)
|
|
134
|
+
sw = np.ones(len(y)) if sample_weight is None else _prep_vec(sample_weight)
|
|
135
|
+
acc_w0, acc_w, acc_v = _acc_arrays(state, w, V)
|
|
136
|
+
w0, acc_w0 = _rust.fm_fit_csr(
|
|
137
|
+
indptr, indices, data, n_features, y, sw, w0, acc_w0, w, V, acc_w, acc_v,
|
|
138
|
+
loss, optimizer, learning_rate, l2_linear, l2_factors, beta_1, beta_2, epsilon,
|
|
139
|
+
row_orders, batch_size, n_jobs, l1_linear, l1_factors, ftrl_beta,
|
|
140
|
+
)
|
|
141
|
+
if state is not None:
|
|
142
|
+
state[0], state[1], state[2] = acc_w0, acc_w, acc_v
|
|
143
|
+
return w0, w, V
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def fm_fit_multiclass(
|
|
147
|
+
X, y, params, *, optimizer, learning_rate, l2_linear, l2_factors, row_orders,
|
|
148
|
+
label_smoothing=0.0, l1_linear=0.0, l1_factors=0.0, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
|
|
149
|
+
ftrl_beta=1.0, batch_size=1, sample_weight=None, state=None, adam_state=None,
|
|
150
|
+
):
|
|
151
|
+
"""Train a multiclass (softmax) FM (optimization_spec.md).
|
|
152
|
+
|
|
153
|
+
`params` = (w0 (C,), w (C, n), V (C, n, k)) initial values (unchanged);
|
|
154
|
+
`y` holds integer class indices in [0, C). Returns new float64 (w0, w, V).
|
|
155
|
+
`batch_size` averages each batch's gradient (batch_size=1 is per-row).
|
|
156
|
+
`beta_1`/`beta_2`/`epsilon` apply only when optimizer == "adam". The Rust
|
|
157
|
+
multiclass kernel keeps optimizer state internal, so early stopping (`state`
|
|
158
|
+
or `adam_state` given) round-trips it across epochs on the NumPy reference
|
|
159
|
+
path. Otherwise Rust-accelerated when available, reference fallback otherwise.
|
|
160
|
+
"""
|
|
161
|
+
if _rust is None or state is not None or adam_state is not None:
|
|
162
|
+
return _reference_train.fm_fit_multiclass_reference(
|
|
163
|
+
X, y, params, optimizer=optimizer, learning_rate=learning_rate,
|
|
164
|
+
l2_linear=l2_linear, l2_factors=l2_factors, l1_linear=l1_linear,
|
|
165
|
+
l1_factors=l1_factors, row_orders=row_orders, label_smoothing=label_smoothing,
|
|
166
|
+
beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, ftrl_beta=ftrl_beta,
|
|
167
|
+
batch_size=batch_size, sample_weight=sample_weight, state=state,
|
|
168
|
+
adam_state=adam_state,
|
|
169
|
+
)
|
|
170
|
+
w0, w, V = params
|
|
171
|
+
w0 = np.array(w0, dtype=np.float64, order="C", copy=True) # (C,), mutated in place
|
|
172
|
+
w = np.array(w, dtype=np.float64, order="C", copy=True) # (C, n)
|
|
173
|
+
V = np.array(V, dtype=np.float64, order="C", copy=True) # (C, n, k)
|
|
174
|
+
y = _prep_vec(y, dtype=np.int64)
|
|
175
|
+
row_orders = np.ascontiguousarray(row_orders, dtype=np.int64)
|
|
176
|
+
if row_orders.ndim == 1:
|
|
177
|
+
row_orders = row_orders[None, :]
|
|
178
|
+
Xc = X if sp.issparse(X) else sp.csr_matrix(np.asarray(X, dtype=np.float64))
|
|
179
|
+
indptr, indices, data, n_features = _prep_csr(Xc)
|
|
180
|
+
sw = np.ones(len(y)) if sample_weight is None else _prep_vec(sample_weight)
|
|
181
|
+
_rust.fm_fit_multiclass_csr(
|
|
182
|
+
indptr, indices, data, n_features, y, sw, w0, w, V,
|
|
183
|
+
optimizer, learning_rate, l2_linear, l2_factors, label_smoothing,
|
|
184
|
+
beta_1, beta_2, epsilon, row_orders, batch_size, l1_linear, l1_factors, ftrl_beta,
|
|
185
|
+
)
|
|
186
|
+
return w0, w, V
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def ffm_fit(
|
|
190
|
+
X, y, field_ids, params, *, optimizer, learning_rate, l2_linear, l2_factors, row_orders,
|
|
191
|
+
l1_linear=0.0, l1_factors=0.0, beta_1=0.9, beta_2=0.999, epsilon=1e-8, ftrl_beta=1.0,
|
|
192
|
+
batch_size=1, n_jobs=1, sample_weight=None, state=None, adam_state=None,
|
|
193
|
+
):
|
|
194
|
+
"""Train an FFM (logistic loss); see fm_fit. `batch_size` averages each
|
|
195
|
+
batch's gradient (batch_size=1 is per-row); `n_jobs` (>= 1) splits each batch
|
|
196
|
+
across that many rayon threads (n_jobs=1 matches the serial reference).
|
|
197
|
+
Adam + early stopping (adam_state given) runs on the NumPy reference path,
|
|
198
|
+
like fm_fit."""
|
|
199
|
+
if _rust is None or adam_state is not None:
|
|
200
|
+
return _reference_train.ffm_fit_reference(
|
|
201
|
+
X, y, field_ids, params, optimizer=optimizer, learning_rate=learning_rate,
|
|
202
|
+
l2_linear=l2_linear, l2_factors=l2_factors, l1_linear=l1_linear,
|
|
203
|
+
l1_factors=l1_factors, row_orders=row_orders, beta_1=beta_1, beta_2=beta_2,
|
|
204
|
+
epsilon=epsilon, ftrl_beta=ftrl_beta, batch_size=batch_size,
|
|
205
|
+
sample_weight=sample_weight, state=state, adam_state=adam_state,
|
|
206
|
+
)
|
|
207
|
+
field_ids = _prep_vec(field_ids, dtype=np.int64)
|
|
208
|
+
(indptr, indices, data, n_features), y, w0, w, V, row_orders = _prep_fit(
|
|
209
|
+
X, y, params, row_orders
|
|
210
|
+
)
|
|
211
|
+
sw = np.ones(len(y)) if sample_weight is None else _prep_vec(sample_weight)
|
|
212
|
+
acc_w0, acc_w, acc_v = _acc_arrays(state, w, V)
|
|
213
|
+
w0, acc_w0 = _rust.ffm_fit_csr(
|
|
214
|
+
indptr, indices, data, n_features, y, sw, field_ids, w0, acc_w0, w, V, acc_w, acc_v,
|
|
215
|
+
optimizer, learning_rate, l2_linear, l2_factors, beta_1, beta_2, epsilon, row_orders,
|
|
216
|
+
batch_size, n_jobs, l1_linear, l1_factors, ftrl_beta,
|
|
217
|
+
)
|
|
218
|
+
if state is not None:
|
|
219
|
+
state[0], state[1], state[2] = acc_w0, acc_w, acc_v
|
|
220
|
+
return w0, w, V
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def ffm_fit_multiclass(
|
|
224
|
+
X, y, field_ids, params, *, optimizer, learning_rate, l2_linear, l2_factors, row_orders,
|
|
225
|
+
label_smoothing=0.0, l1_linear=0.0, l1_factors=0.0, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
|
|
226
|
+
ftrl_beta=1.0, batch_size=1, sample_weight=None,
|
|
227
|
+
):
|
|
228
|
+
"""Train a multiclass (softmax) FFM (one FFM per class, coupled by softmax).
|
|
229
|
+
|
|
230
|
+
`params` = (w0 (C,), w (C, n), V (C, n, n_fields, k)); `y` holds class indices
|
|
231
|
+
in [0, C). Serial (no n_jobs), like FM multiclass. Rust-accelerated when
|
|
232
|
+
available, NumPy reference fallback otherwise.
|
|
233
|
+
"""
|
|
234
|
+
if _rust is None:
|
|
235
|
+
return _reference_train.ffm_fit_multiclass_reference(
|
|
236
|
+
X, y, field_ids, params, optimizer=optimizer, learning_rate=learning_rate,
|
|
237
|
+
l2_linear=l2_linear, l2_factors=l2_factors, l1_linear=l1_linear,
|
|
238
|
+
l1_factors=l1_factors, row_orders=row_orders, label_smoothing=label_smoothing,
|
|
239
|
+
beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, ftrl_beta=ftrl_beta,
|
|
240
|
+
batch_size=batch_size, sample_weight=sample_weight,
|
|
241
|
+
)
|
|
242
|
+
w0, w, V = params
|
|
243
|
+
w0 = np.array(w0, dtype=np.float64, order="C", copy=True) # (C,), mutated in place
|
|
244
|
+
w = np.array(w, dtype=np.float64, order="C", copy=True) # (C, n)
|
|
245
|
+
V = np.array(V, dtype=np.float64, order="C", copy=True) # (C, n, n_fields, k)
|
|
246
|
+
y = _prep_vec(y, dtype=np.int64)
|
|
247
|
+
field_ids = _prep_vec(field_ids, dtype=np.int64)
|
|
248
|
+
row_orders = np.ascontiguousarray(row_orders, dtype=np.int64)
|
|
249
|
+
if row_orders.ndim == 1:
|
|
250
|
+
row_orders = row_orders[None, :]
|
|
251
|
+
Xc = X if sp.issparse(X) else sp.csr_matrix(np.asarray(X, dtype=np.float64))
|
|
252
|
+
indptr, indices, data, n_features = _prep_csr(Xc)
|
|
253
|
+
sw = np.ones(len(y)) if sample_weight is None else _prep_vec(sample_weight)
|
|
254
|
+
_rust.ffm_fit_multiclass_csr(
|
|
255
|
+
indptr, indices, data, n_features, y, sw, field_ids, w0, w, V,
|
|
256
|
+
optimizer, learning_rate, l2_linear, l2_factors, label_smoothing,
|
|
257
|
+
beta_1, beta_2, epsilon, row_orders, batch_size, l1_linear, l1_factors, ftrl_beta,
|
|
258
|
+
)
|
|
259
|
+
return w0, w, V
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Minimal sklearn-compatible parameter handling.
|
|
2
|
+
|
|
3
|
+
Avoids a hard scikit-learn dependency in v0.1 while keeping the contract:
|
|
4
|
+
__init__ stores constructor arguments verbatim; get_params/set_params
|
|
5
|
+
round-trip them. Phase 3 may swap this for sklearn's BaseEstimator.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import inspect
|
|
11
|
+
import pickle
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NotFittedError(ValueError, AttributeError):
|
|
15
|
+
"""Raised when a predict-like method is called before fit.
|
|
16
|
+
|
|
17
|
+
Inherits ValueError and AttributeError to match sklearn's exception of
|
|
18
|
+
the same name, so generic sklearn-style error handling keeps working.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def check_is_fitted(estimator, attribute="w0_"):
|
|
23
|
+
if not hasattr(estimator, attribute):
|
|
24
|
+
raise NotFittedError(
|
|
25
|
+
f"This {type(estimator).__name__} instance is not fitted yet; "
|
|
26
|
+
"call 'fit' before using this method."
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ParamsMixin:
|
|
31
|
+
@classmethod
|
|
32
|
+
def _param_names(cls):
|
|
33
|
+
sig = inspect.signature(cls.__init__)
|
|
34
|
+
return [name for name in sig.parameters if name != "self"]
|
|
35
|
+
|
|
36
|
+
def get_params(self, deep=True):
|
|
37
|
+
return {name: getattr(self, name) for name in self._param_names()}
|
|
38
|
+
|
|
39
|
+
def set_params(self, **params):
|
|
40
|
+
valid = set(self._param_names())
|
|
41
|
+
for key, value in params.items():
|
|
42
|
+
if key not in valid:
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f"Invalid parameter {key!r} for estimator {type(self).__name__}. "
|
|
45
|
+
f"Valid parameters are: {sorted(valid)}."
|
|
46
|
+
)
|
|
47
|
+
setattr(self, key, value)
|
|
48
|
+
return self
|
|
49
|
+
|
|
50
|
+
def __repr__(self):
|
|
51
|
+
args = ", ".join(f"{k}={v!r}" for k, v in self.get_params().items())
|
|
52
|
+
return f"{type(self).__name__}({args})"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ModelIOMixin:
|
|
56
|
+
"""save_model / load_model for fitted estimators.
|
|
57
|
+
|
|
58
|
+
Stores `{format_version, class, params, attrs}` via pickle, where `attrs`
|
|
59
|
+
are the learned trailing-underscore attributes; this is generic over the
|
|
60
|
+
estimator (binary, regression, multiclass) and round-trips constructor
|
|
61
|
+
params too. The estimators also pickle natively (plain attributes), so
|
|
62
|
+
`pickle.dumps(model)` works as an alternative.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
_IO_VERSION = 1
|
|
66
|
+
|
|
67
|
+
def save_model(self, path):
|
|
68
|
+
check_is_fitted(self)
|
|
69
|
+
state = {
|
|
70
|
+
"format_version": self._IO_VERSION,
|
|
71
|
+
"class": type(self).__name__,
|
|
72
|
+
"params": self.get_params(),
|
|
73
|
+
"attrs": {k: getattr(self, k) for k in vars(self) if k.endswith("_")},
|
|
74
|
+
}
|
|
75
|
+
with open(path, "wb") as f:
|
|
76
|
+
pickle.dump(state, f)
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def load_model(cls, path):
|
|
80
|
+
with open(path, "rb") as f:
|
|
81
|
+
state = pickle.load(f)
|
|
82
|
+
if state.get("class") != cls.__name__:
|
|
83
|
+
raise ValueError(f"{path!r} holds a {state.get('class')!r}, not a {cls.__name__!r}")
|
|
84
|
+
model = cls(**state["params"])
|
|
85
|
+
for key, value in state["attrs"].items():
|
|
86
|
+
setattr(model, key, value)
|
|
87
|
+
return model
|