modern-fm 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Masaya Kawamata
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,117 @@
1
+ Metadata-Version: 2.4
2
+ Name: modern-fm
3
+ Version: 0.2.1
4
+ Classifier: Development Status :: 4 - Beta
5
+ Classifier: Intended Audience :: Science/Research
6
+ Classifier: License :: OSI Approved :: MIT License
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: Python :: 3.10
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Classifier: Programming Language :: Rust
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Requires-Dist: numpy>=1.24
16
+ Requires-Dist: scipy>=1.10
17
+ Requires-Dist: pytest>=7 ; extra == 'dev'
18
+ Requires-Dist: ruff>=0.4 ; extra == 'dev'
19
+ Provides-Extra: dev
20
+ License-File: LICENSE
21
+ Summary: Fast, sklearn-compatible Factorization Machines and Field-aware Factorization Machines
22
+ Keywords: factorization-machines,ffm,ctr,recommender,tabular
23
+ Author: Masaya Kawamata
24
+ License: MIT
25
+ Requires-Python: >=3.10
26
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
27
+ Project-URL: Homepage, https://github.com/Matapanino/modern_fm
28
+ Project-URL: Issues, https://github.com/Matapanino/modern_fm/issues
29
+ Project-URL: Repository, https://github.com/Matapanino/modern_fm
30
+
31
+ # modern_fm
32
+
33
+ Fast, sklearn-compatible Factorization Machines (FM) and Field-aware
34
+ Factorization Machines (FFM) for Python.
35
+
36
+ **Status: v0.2 (Beta).** A Rust CPU backend (parity-tested against pure-NumPy
37
+ reference implementations) drives sklearn-style estimators — `FMClassifier`
38
+ (binary + multiclass softmax), `FMRegressor`, and `FFMClassifier`
39
+ (binary + multiclass softmax) — with the SGD / AdaGrad / Adam /
40
+ **FTRL-Proximal** optimizers, **mini-batch**
41
+ gradient averaging (`batch_size`), **multi-core training** via `rayon`
42
+ (`n_jobs`), plus `sample_weight`/`class_weight`, `label_smoothing`, early
43
+ stopping, a `CategoricalEncoder`, and `save_model`/`load_model`. FTRL's L1
44
+ (`l1_linear`/`l1_factors`) yields exact-zero weights. See `docs/roadmap.md` for
45
+ remaining niche gaps (FTRL + early stopping, multiclass + early-stopping for FFM).
46
+
47
+ ## Installation
48
+
49
+ ```bash
50
+ pip install modern-fm # once published; prebuilt wheels, no Rust needed
51
+ ```
52
+
53
+ Until the first PyPI release, install from source (requires a Rust toolchain;
54
+ see Development below).
55
+
56
+ ## Usage
57
+
58
+ ```python
59
+ from modern_fm import FMClassifier, FFMClassifier
60
+
61
+ model = FMClassifier(
62
+ n_factors=16,
63
+ optimizer="adagrad",
64
+ learning_rate=0.05,
65
+ max_iter=100,
66
+ batch_size=256, # mini-batch gradient averaging (1 = per-row SGD)
67
+ n_jobs=-1, # train batches across all CPU cores
68
+ l2_linear=1e-5,
69
+ l2_factors=1e-5,
70
+ random_state=42,
71
+ )
72
+ model.fit(X_train, y_train)
73
+ proba = model.predict_proba(X_test)
74
+
75
+ # FTRL-Proximal with L1 for sparse linear weights (classic CTR setup)
76
+ sparse = FMClassifier(optimizer="ftrl", l1_linear=1.0, batch_size=256, random_state=42)
77
+ sparse.fit(X_train, y_train)
78
+
79
+ ffm = FFMClassifier(n_factors=8, n_jobs=-1, random_state=42)
80
+ ffm.fit(X_train, y_train, field_ids=field_ids)
81
+ ```
82
+
83
+ `FMRegressor`, multiclass `FMClassifier` (just pass a target with >2 classes),
84
+ early stopping (`early_stopping=True` or `eval_set=(X_val, y_val)`), and the
85
+ `CategoricalEncoder` are demonstrated in `examples/basic_usage.py`.
86
+ `benchmarks/bench_synthetic.py` reports fit time and predict throughput against
87
+ the NumPy reference floor.
88
+
89
+ ## Development
90
+
91
+ Requires Python >= 3.10 and a recent Rust toolchain (1.74+; `rustup update`).
92
+
93
+ ```bash
94
+ python3 -m venv .venv
95
+ .venv/bin/pip install -e ".[dev]" # builds the Rust extension via maturin
96
+ .venv/bin/pytest -q
97
+ .venv/bin/ruff check .
98
+ ```
99
+
100
+ `pip install -e .` compiles `rust/` and installs the extension as
101
+ `modern_fm._rust` (maturin mixed layout, config in `pyproject.toml`).
102
+ After editing Rust code, re-run `pip install -e .` to rebuild. Rust-only
103
+ checks:
104
+
105
+ ```bash
106
+ cd rust
107
+ PYO3_PYTHON=$PWD/../.venv/bin/python3 cargo test
108
+ PYO3_PYTHON=$PWD/../.venv/bin/python3 cargo clippy
109
+ ```
110
+
111
+ Without the extension built, the package still works: `modern_fm._backend`
112
+ falls back to the pure-NumPy reference implementations, and the parity tests
113
+ in `tests/test_rust_parity.py` are skipped.
114
+
115
+ Design documents live in `docs/` — start with `docs/requirements.md` and
116
+ `docs/math_spec.md`. The roadmap is in `docs/roadmap.md`.
117
+
@@ -0,0 +1,86 @@
1
+ # modern_fm
2
+
3
+ Fast, sklearn-compatible Factorization Machines (FM) and Field-aware
4
+ Factorization Machines (FFM) for Python.
5
+
6
+ **Status: v0.2 (Beta).** A Rust CPU backend (parity-tested against pure-NumPy
7
+ reference implementations) drives sklearn-style estimators — `FMClassifier`
8
+ (binary + multiclass softmax), `FMRegressor`, and `FFMClassifier`
9
+ (binary + multiclass softmax) — with the SGD / AdaGrad / Adam /
10
+ **FTRL-Proximal** optimizers, **mini-batch**
11
+ gradient averaging (`batch_size`), **multi-core training** via `rayon`
12
+ (`n_jobs`), plus `sample_weight`/`class_weight`, `label_smoothing`, early
13
+ stopping, a `CategoricalEncoder`, and `save_model`/`load_model`. FTRL's L1
14
+ (`l1_linear`/`l1_factors`) yields exact-zero weights. See `docs/roadmap.md` for
15
+ remaining niche gaps (FTRL + early stopping, multiclass + early-stopping for FFM).
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install modern-fm # once published; prebuilt wheels, no Rust needed
21
+ ```
22
+
23
+ Until the first PyPI release, install from source (requires a Rust toolchain;
24
+ see Development below).
25
+
26
+ ## Usage
27
+
28
+ ```python
29
+ from modern_fm import FMClassifier, FFMClassifier
30
+
31
+ model = FMClassifier(
32
+ n_factors=16,
33
+ optimizer="adagrad",
34
+ learning_rate=0.05,
35
+ max_iter=100,
36
+ batch_size=256, # mini-batch gradient averaging (1 = per-row SGD)
37
+ n_jobs=-1, # train batches across all CPU cores
38
+ l2_linear=1e-5,
39
+ l2_factors=1e-5,
40
+ random_state=42,
41
+ )
42
+ model.fit(X_train, y_train)
43
+ proba = model.predict_proba(X_test)
44
+
45
+ # FTRL-Proximal with L1 for sparse linear weights (classic CTR setup)
46
+ sparse = FMClassifier(optimizer="ftrl", l1_linear=1.0, batch_size=256, random_state=42)
47
+ sparse.fit(X_train, y_train)
48
+
49
+ ffm = FFMClassifier(n_factors=8, n_jobs=-1, random_state=42)
50
+ ffm.fit(X_train, y_train, field_ids=field_ids)
51
+ ```
52
+
53
+ `FMRegressor`, multiclass `FMClassifier` (just pass a target with >2 classes),
54
+ early stopping (`early_stopping=True` or `eval_set=(X_val, y_val)`), and the
55
+ `CategoricalEncoder` are demonstrated in `examples/basic_usage.py`.
56
+ `benchmarks/bench_synthetic.py` reports fit time and predict throughput against
57
+ the NumPy reference floor.
58
+
59
+ ## Development
60
+
61
+ Requires Python >= 3.10 and a recent Rust toolchain (1.74+; `rustup update`).
62
+
63
+ ```bash
64
+ python3 -m venv .venv
65
+ .venv/bin/pip install -e ".[dev]" # builds the Rust extension via maturin
66
+ .venv/bin/pytest -q
67
+ .venv/bin/ruff check .
68
+ ```
69
+
70
+ `pip install -e .` compiles `rust/` and installs the extension as
71
+ `modern_fm._rust` (maturin mixed layout, config in `pyproject.toml`).
72
+ After editing Rust code, re-run `pip install -e .` to rebuild. Rust-only
73
+ checks:
74
+
75
+ ```bash
76
+ cd rust
77
+ PYO3_PYTHON=$PWD/../.venv/bin/python3 cargo test
78
+ PYO3_PYTHON=$PWD/../.venv/bin/python3 cargo clippy
79
+ ```
80
+
81
+ Without the extension built, the package still works: `modern_fm._backend`
82
+ falls back to the pure-NumPy reference implementations, and the parity tests
83
+ in `tests/test_rust_parity.py` are skipped.
84
+
85
+ Design documents live in `docs/` — start with `docs/requirements.md` and
86
+ `docs/math_spec.md`. The roadmap is in `docs/roadmap.md`.
@@ -0,0 +1,55 @@
1
+ [build-system]
2
+ requires = ["maturin>=1.7,<2"]
3
+ build-backend = "maturin"
4
+
5
+ [project]
6
+ name = "modern-fm"
7
+ version = "0.2.1"
8
+ description = "Fast, sklearn-compatible Factorization Machines and Field-aware Factorization Machines"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Masaya Kawamata" }]
13
+ keywords = ["factorization-machines", "ffm", "ctr", "recommender", "tabular"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Science/Research",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Programming Language :: Rust",
24
+ "Operating System :: OS Independent",
25
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
26
+ ]
27
+ dependencies = [
28
+ "numpy>=1.24",
29
+ "scipy>=1.10",
30
+ ]
31
+
32
+ [project.optional-dependencies]
33
+ dev = ["pytest>=7", "ruff>=0.4"]
34
+
35
+ [project.urls]
36
+ Homepage = "https://github.com/Matapanino/modern_fm"
37
+ Repository = "https://github.com/Matapanino/modern_fm"
38
+ Issues = "https://github.com/Matapanino/modern_fm/issues"
39
+
40
+ [tool.maturin]
41
+ manifest-path = "rust/Cargo.toml"
42
+ python-source = "python"
43
+ module-name = "modern_fm._rust"
44
+ features = ["pyo3/extension-module"]
45
+ include = [{ path = "LICENSE", format = "sdist" }]
46
+
47
+ [tool.pytest.ini_options]
48
+ testpaths = ["tests"]
49
+
50
+ [tool.ruff]
51
+ line-length = 100
52
+ target-version = "py310"
53
+
54
+ [tool.ruff.lint]
55
+ select = ["E", "F", "W", "I", "NPY", "UP"]
@@ -0,0 +1,17 @@
1
+ """modern_fm: fast, sklearn-compatible FM / FFM for Python."""
2
+
3
+ from ._base import NotFittedError
4
+ from .ffm import FFMClassifier
5
+ from .fm import FMClassifier, FMRegressor
6
+ from .preprocessing import CategoricalEncoder
7
+
8
+ __version__ = "0.2.1"
9
+
10
+ __all__ = [
11
+ "FMClassifier",
12
+ "FMRegressor",
13
+ "FFMClassifier",
14
+ "CategoricalEncoder",
15
+ "NotFittedError",
16
+ "__version__",
17
+ ]
@@ -0,0 +1,259 @@
1
+ """Backend dispatch: Rust extension when built, NumPy reference otherwise.
2
+
3
+ Private module. The NumPy implementations in `_reference` remain the ground
4
+ truth; the Rust extension (`modern_fm._rust`, built via maturin) is an
5
+ optimized drop-in whose parity is enforced by tests/test_rust_parity.py.
6
+
7
+ Both prediction and training are dispatched here (FM/FFM predict, FM binary and
8
+ multiclass-softmax training, FFM training); training parity with the reference
9
+ trainers is enforced by tests/test_rust_train_parity.py.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import numpy as np
15
+ import scipy.sparse as sp
16
+
17
+ from . import _reference, _reference_train
18
+
19
+ try:
20
+ from . import _rust
21
+ except ImportError: # extension not built — pure-Python install
22
+ _rust = None
23
+
24
+
25
+ def has_rust():
26
+ return _rust is not None
27
+
28
+
29
+ def _prep_dense(X):
30
+ return np.ascontiguousarray(X, dtype=np.float64)
31
+
32
+
33
+ def _prep_vec(a, dtype=np.float64):
34
+ return np.ascontiguousarray(a, dtype=dtype)
35
+
36
+
37
+ def _prep_csr(X):
38
+ X = X.tocsr().astype(np.float64)
39
+ X.sum_duplicates()
40
+ return (
41
+ np.ascontiguousarray(X.indptr, dtype=np.int64),
42
+ np.ascontiguousarray(X.indices, dtype=np.int64),
43
+ np.ascontiguousarray(X.data, dtype=np.float64),
44
+ X.shape[1],
45
+ )
46
+
47
+
48
+ def fm_predict_fast(X, w0, w, V):
49
+ """FM prediction (math_spec.md), Rust-accelerated when available."""
50
+ if _rust is None:
51
+ return _reference.fm_predict_fast(X, w0, w, V)
52
+ w = _prep_vec(w)
53
+ V = _prep_dense(V)
54
+ if sp.issparse(X):
55
+ indptr, indices, data, n_features = _prep_csr(X)
56
+ return _rust.fm_predict_fast_csr(indptr, indices, data, n_features, float(w0), w, V)
57
+ return _rust.fm_predict_fast_dense(_prep_dense(X), float(w0), w, V)
58
+
59
+
60
+ def ffm_predict(X, field_ids, w0, w, V):
61
+ """FFM prediction (math_spec.md), Rust-accelerated when available."""
62
+ if _rust is None:
63
+ return _reference.ffm_predict(X, field_ids, w0, w, V)
64
+ field_ids = _prep_vec(field_ids, dtype=np.int64)
65
+ w = _prep_vec(w)
66
+ V = _prep_dense(V)
67
+ if sp.issparse(X):
68
+ indptr, indices, data, n_features = _prep_csr(X)
69
+ return _rust.ffm_predict_csr(
70
+ indptr, indices, data, n_features, field_ids, float(w0), w, V
71
+ )
72
+ return _rust.ffm_predict_dense(_prep_dense(X), field_ids, float(w0), w, V)
73
+
74
+
75
+ def _prep_fit(X, y, params, row_orders):
76
+ """Common coercion for the Rust fit entry points.
77
+
78
+ Dense X is converted to CSR (exact zeros are skipped either way, matching
79
+ the reference). Returns fresh float64 copies of w and V that the Rust
80
+ kernel mutates in place; the caller's `params` are left untouched.
81
+ """
82
+ w0, w, V = params
83
+ w = np.array(w, dtype=np.float64, order="C", copy=True)
84
+ V = np.array(V, dtype=np.float64, order="C", copy=True)
85
+ y = _prep_vec(y)
86
+ row_orders = np.ascontiguousarray(row_orders, dtype=np.int64)
87
+ if row_orders.ndim == 1:
88
+ row_orders = row_orders[None, :]
89
+ Xc = X if sp.issparse(X) else sp.csr_matrix(np.asarray(X, dtype=np.float64))
90
+ return _prep_csr(Xc), y, float(w0), w, V, row_orders
91
+
92
+
93
+ def _acc_arrays(state, w, V):
94
+ """AdaGrad accumulators (acc_w0, acc_w, acc_v) from `state` or fresh zeros.
95
+
96
+ `state` (a mutable list, for the epoch-driven early-stopping path) persists
97
+ the accumulators across calls; None means a single all-epochs run.
98
+ """
99
+ if state is None:
100
+ return 0.0, np.zeros(len(w)), np.zeros_like(V)
101
+ acc_w0, acc_w, acc_v = state
102
+ return float(acc_w0), _prep_vec(acc_w), _prep_dense(acc_v)
103
+
104
+
105
+ def fm_fit(
106
+ X, y, params, *, loss, optimizer, learning_rate, l2_linear, l2_factors, row_orders,
107
+ l1_linear=0.0, l1_factors=0.0, beta_1=0.9, beta_2=0.999, epsilon=1e-8, ftrl_beta=1.0,
108
+ batch_size=1, n_jobs=1, sample_weight=None, state=None, adam_state=None,
109
+ ):
110
+ """Train an FM (docs/optimization_spec.md).
111
+
112
+ `params` = (w0, w, V) initial values (unchanged); returns new float64
113
+ (w0, w, V). `sample_weight` scales each row's gradient (None -> all ones).
114
+ `batch_size` averages each batch's gradient (batch_size=1 is per-row).
115
+ `n_jobs` (>= 1) splits each batch across that many rayon threads; n_jobs=1
116
+ is the serial path matching the reference. `beta_1`/`beta_2`/`epsilon` apply
117
+ only when optimizer == "adam". `state` carries AdaGrad accumulators in/out
118
+ for epoch-by-epoch training. `adam_state` does the same for Adam moments;
119
+ because the Rust kernel keeps Adam state internal, Adam + early stopping
120
+ (adam_state given) runs on the NumPy reference path. The reference fallback
121
+ is always serial (it is the n_jobs=1 ground truth).
122
+ """
123
+ if _rust is None or adam_state is not None:
124
+ return _reference_train.fm_fit_reference(
125
+ X, y, params, loss=loss, optimizer=optimizer, learning_rate=learning_rate,
126
+ l2_linear=l2_linear, l2_factors=l2_factors, l1_linear=l1_linear,
127
+ l1_factors=l1_factors, row_orders=row_orders, beta_1=beta_1, beta_2=beta_2,
128
+ epsilon=epsilon, ftrl_beta=ftrl_beta, batch_size=batch_size,
129
+ sample_weight=sample_weight, state=state, adam_state=adam_state,
130
+ )
131
+ (indptr, indices, data, n_features), y, w0, w, V, row_orders = _prep_fit(
132
+ X, y, params, row_orders
133
+ )
134
+ sw = np.ones(len(y)) if sample_weight is None else _prep_vec(sample_weight)
135
+ acc_w0, acc_w, acc_v = _acc_arrays(state, w, V)
136
+ w0, acc_w0 = _rust.fm_fit_csr(
137
+ indptr, indices, data, n_features, y, sw, w0, acc_w0, w, V, acc_w, acc_v,
138
+ loss, optimizer, learning_rate, l2_linear, l2_factors, beta_1, beta_2, epsilon,
139
+ row_orders, batch_size, n_jobs, l1_linear, l1_factors, ftrl_beta,
140
+ )
141
+ if state is not None:
142
+ state[0], state[1], state[2] = acc_w0, acc_w, acc_v
143
+ return w0, w, V
144
+
145
+
146
+ def fm_fit_multiclass(
147
+ X, y, params, *, optimizer, learning_rate, l2_linear, l2_factors, row_orders,
148
+ label_smoothing=0.0, l1_linear=0.0, l1_factors=0.0, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
149
+ ftrl_beta=1.0, batch_size=1, sample_weight=None, state=None, adam_state=None,
150
+ ):
151
+ """Train a multiclass (softmax) FM (optimization_spec.md).
152
+
153
+ `params` = (w0 (C,), w (C, n), V (C, n, k)) initial values (unchanged);
154
+ `y` holds integer class indices in [0, C). Returns new float64 (w0, w, V).
155
+ `batch_size` averages each batch's gradient (batch_size=1 is per-row).
156
+ `beta_1`/`beta_2`/`epsilon` apply only when optimizer == "adam". The Rust
157
+ multiclass kernel keeps optimizer state internal, so early stopping (`state`
158
+ or `adam_state` given) round-trips it across epochs on the NumPy reference
159
+ path. Otherwise Rust-accelerated when available, reference fallback otherwise.
160
+ """
161
+ if _rust is None or state is not None or adam_state is not None:
162
+ return _reference_train.fm_fit_multiclass_reference(
163
+ X, y, params, optimizer=optimizer, learning_rate=learning_rate,
164
+ l2_linear=l2_linear, l2_factors=l2_factors, l1_linear=l1_linear,
165
+ l1_factors=l1_factors, row_orders=row_orders, label_smoothing=label_smoothing,
166
+ beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, ftrl_beta=ftrl_beta,
167
+ batch_size=batch_size, sample_weight=sample_weight, state=state,
168
+ adam_state=adam_state,
169
+ )
170
+ w0, w, V = params
171
+ w0 = np.array(w0, dtype=np.float64, order="C", copy=True) # (C,), mutated in place
172
+ w = np.array(w, dtype=np.float64, order="C", copy=True) # (C, n)
173
+ V = np.array(V, dtype=np.float64, order="C", copy=True) # (C, n, k)
174
+ y = _prep_vec(y, dtype=np.int64)
175
+ row_orders = np.ascontiguousarray(row_orders, dtype=np.int64)
176
+ if row_orders.ndim == 1:
177
+ row_orders = row_orders[None, :]
178
+ Xc = X if sp.issparse(X) else sp.csr_matrix(np.asarray(X, dtype=np.float64))
179
+ indptr, indices, data, n_features = _prep_csr(Xc)
180
+ sw = np.ones(len(y)) if sample_weight is None else _prep_vec(sample_weight)
181
+ _rust.fm_fit_multiclass_csr(
182
+ indptr, indices, data, n_features, y, sw, w0, w, V,
183
+ optimizer, learning_rate, l2_linear, l2_factors, label_smoothing,
184
+ beta_1, beta_2, epsilon, row_orders, batch_size, l1_linear, l1_factors, ftrl_beta,
185
+ )
186
+ return w0, w, V
187
+
188
+
189
+ def ffm_fit(
190
+ X, y, field_ids, params, *, optimizer, learning_rate, l2_linear, l2_factors, row_orders,
191
+ l1_linear=0.0, l1_factors=0.0, beta_1=0.9, beta_2=0.999, epsilon=1e-8, ftrl_beta=1.0,
192
+ batch_size=1, n_jobs=1, sample_weight=None, state=None, adam_state=None,
193
+ ):
194
+ """Train an FFM (logistic loss); see fm_fit. `batch_size` averages each
195
+ batch's gradient (batch_size=1 is per-row); `n_jobs` (>= 1) splits each batch
196
+ across that many rayon threads (n_jobs=1 matches the serial reference).
197
+ Adam + early stopping (adam_state given) runs on the NumPy reference path,
198
+ like fm_fit."""
199
+ if _rust is None or adam_state is not None:
200
+ return _reference_train.ffm_fit_reference(
201
+ X, y, field_ids, params, optimizer=optimizer, learning_rate=learning_rate,
202
+ l2_linear=l2_linear, l2_factors=l2_factors, l1_linear=l1_linear,
203
+ l1_factors=l1_factors, row_orders=row_orders, beta_1=beta_1, beta_2=beta_2,
204
+ epsilon=epsilon, ftrl_beta=ftrl_beta, batch_size=batch_size,
205
+ sample_weight=sample_weight, state=state, adam_state=adam_state,
206
+ )
207
+ field_ids = _prep_vec(field_ids, dtype=np.int64)
208
+ (indptr, indices, data, n_features), y, w0, w, V, row_orders = _prep_fit(
209
+ X, y, params, row_orders
210
+ )
211
+ sw = np.ones(len(y)) if sample_weight is None else _prep_vec(sample_weight)
212
+ acc_w0, acc_w, acc_v = _acc_arrays(state, w, V)
213
+ w0, acc_w0 = _rust.ffm_fit_csr(
214
+ indptr, indices, data, n_features, y, sw, field_ids, w0, acc_w0, w, V, acc_w, acc_v,
215
+ optimizer, learning_rate, l2_linear, l2_factors, beta_1, beta_2, epsilon, row_orders,
216
+ batch_size, n_jobs, l1_linear, l1_factors, ftrl_beta,
217
+ )
218
+ if state is not None:
219
+ state[0], state[1], state[2] = acc_w0, acc_w, acc_v
220
+ return w0, w, V
221
+
222
+
223
+ def ffm_fit_multiclass(
224
+ X, y, field_ids, params, *, optimizer, learning_rate, l2_linear, l2_factors, row_orders,
225
+ label_smoothing=0.0, l1_linear=0.0, l1_factors=0.0, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
226
+ ftrl_beta=1.0, batch_size=1, sample_weight=None,
227
+ ):
228
+ """Train a multiclass (softmax) FFM (one FFM per class, coupled by softmax).
229
+
230
+ `params` = (w0 (C,), w (C, n), V (C, n, n_fields, k)); `y` holds class indices
231
+ in [0, C). Serial (no n_jobs), like FM multiclass. Rust-accelerated when
232
+ available, NumPy reference fallback otherwise.
233
+ """
234
+ if _rust is None:
235
+ return _reference_train.ffm_fit_multiclass_reference(
236
+ X, y, field_ids, params, optimizer=optimizer, learning_rate=learning_rate,
237
+ l2_linear=l2_linear, l2_factors=l2_factors, l1_linear=l1_linear,
238
+ l1_factors=l1_factors, row_orders=row_orders, label_smoothing=label_smoothing,
239
+ beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, ftrl_beta=ftrl_beta,
240
+ batch_size=batch_size, sample_weight=sample_weight,
241
+ )
242
+ w0, w, V = params
243
+ w0 = np.array(w0, dtype=np.float64, order="C", copy=True) # (C,), mutated in place
244
+ w = np.array(w, dtype=np.float64, order="C", copy=True) # (C, n)
245
+ V = np.array(V, dtype=np.float64, order="C", copy=True) # (C, n, n_fields, k)
246
+ y = _prep_vec(y, dtype=np.int64)
247
+ field_ids = _prep_vec(field_ids, dtype=np.int64)
248
+ row_orders = np.ascontiguousarray(row_orders, dtype=np.int64)
249
+ if row_orders.ndim == 1:
250
+ row_orders = row_orders[None, :]
251
+ Xc = X if sp.issparse(X) else sp.csr_matrix(np.asarray(X, dtype=np.float64))
252
+ indptr, indices, data, n_features = _prep_csr(Xc)
253
+ sw = np.ones(len(y)) if sample_weight is None else _prep_vec(sample_weight)
254
+ _rust.ffm_fit_multiclass_csr(
255
+ indptr, indices, data, n_features, y, sw, field_ids, w0, w, V,
256
+ optimizer, learning_rate, l2_linear, l2_factors, label_smoothing,
257
+ beta_1, beta_2, epsilon, row_orders, batch_size, l1_linear, l1_factors, ftrl_beta,
258
+ )
259
+ return w0, w, V
@@ -0,0 +1,87 @@
1
+ """Minimal sklearn-compatible parameter handling.
2
+
3
+ Avoids a hard scikit-learn dependency in v0.1 while keeping the contract:
4
+ __init__ stores constructor arguments verbatim; get_params/set_params
5
+ round-trip them. Phase 3 may swap this for sklearn's BaseEstimator.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import inspect
11
+ import pickle
12
+
13
+
14
+ class NotFittedError(ValueError, AttributeError):
15
+ """Raised when a predict-like method is called before fit.
16
+
17
+ Inherits ValueError and AttributeError to match sklearn's exception of
18
+ the same name, so generic sklearn-style error handling keeps working.
19
+ """
20
+
21
+
22
+ def check_is_fitted(estimator, attribute="w0_"):
23
+ if not hasattr(estimator, attribute):
24
+ raise NotFittedError(
25
+ f"This {type(estimator).__name__} instance is not fitted yet; "
26
+ "call 'fit' before using this method."
27
+ )
28
+
29
+
30
+ class ParamsMixin:
31
+ @classmethod
32
+ def _param_names(cls):
33
+ sig = inspect.signature(cls.__init__)
34
+ return [name for name in sig.parameters if name != "self"]
35
+
36
+ def get_params(self, deep=True):
37
+ return {name: getattr(self, name) for name in self._param_names()}
38
+
39
+ def set_params(self, **params):
40
+ valid = set(self._param_names())
41
+ for key, value in params.items():
42
+ if key not in valid:
43
+ raise ValueError(
44
+ f"Invalid parameter {key!r} for estimator {type(self).__name__}. "
45
+ f"Valid parameters are: {sorted(valid)}."
46
+ )
47
+ setattr(self, key, value)
48
+ return self
49
+
50
+ def __repr__(self):
51
+ args = ", ".join(f"{k}={v!r}" for k, v in self.get_params().items())
52
+ return f"{type(self).__name__}({args})"
53
+
54
+
55
+ class ModelIOMixin:
56
+ """save_model / load_model for fitted estimators.
57
+
58
+ Stores `{format_version, class, params, attrs}` via pickle, where `attrs`
59
+ are the learned trailing-underscore attributes; this is generic over the
60
+ estimator (binary, regression, multiclass) and round-trips constructor
61
+ params too. The estimators also pickle natively (plain attributes), so
62
+ `pickle.dumps(model)` works as an alternative.
63
+ """
64
+
65
+ _IO_VERSION = 1
66
+
67
+ def save_model(self, path):
68
+ check_is_fitted(self)
69
+ state = {
70
+ "format_version": self._IO_VERSION,
71
+ "class": type(self).__name__,
72
+ "params": self.get_params(),
73
+ "attrs": {k: getattr(self, k) for k in vars(self) if k.endswith("_")},
74
+ }
75
+ with open(path, "wb") as f:
76
+ pickle.dump(state, f)
77
+
78
+ @classmethod
79
+ def load_model(cls, path):
80
+ with open(path, "rb") as f:
81
+ state = pickle.load(f)
82
+ if state.get("class") != cls.__name__:
83
+ raise ValueError(f"{path!r} holds a {state.get('class')!r}, not a {cls.__name__!r}")
84
+ model = cls(**state["params"])
85
+ for key, value in state["attrs"].items():
86
+ setattr(model, key, value)
87
+ return model