coreLearn 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coreLearn/__init__.py +17 -0
- coreLearn/base.py +25 -0
- coreLearn/distances.py +125 -0
- coreLearn/evaluator.py +143 -0
- coreLearn/knn.py +210 -0
- coreLearn/linear_regression.py +205 -0
- coreLearn/tests/__init__.py +0 -0
- coreLearn/tests/test_distances.py +187 -0
- coreLearn/tests/test_evaluator.py +104 -0
- coreLearn/tests/test_knn.py +101 -0
- coreLearn/tests/test_linear_regression.py +154 -0
- corelearn-0.1.0.dist-info/METADATA +482 -0
- corelearn-0.1.0.dist-info/RECORD +15 -0
- corelearn-0.1.0.dist-info/WHEEL +5 -0
- corelearn-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from .base import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# ---------------------------------------------------------------------------
|
|
9
|
+
# Strategy interface
|
|
10
|
+
# ---------------------------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
class OptimizationStrategy(ABC):
|
|
13
|
+
"""Abstract base class for optimization strategies."""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
|
|
17
|
+
"""Fit weights to the given design matrix X and target vector y."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Concrete strategies
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
class NormalEquationStrategy(OptimizationStrategy):
|
|
25
|
+
"""
|
|
26
|
+
Closed-form solution: w = argmin ||Xw - y||^2
|
|
27
|
+
|
|
28
|
+
Uses np.linalg.lstsq (SVD-based) instead of the textbook (X^T X)^-1 X^T y
|
|
29
|
+
formula. Direct inversion squares the condition number of X and causes
|
|
30
|
+
numerical warnings on ill-conditioned data; lstsq avoids this entirely.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
|
|
34
|
+
weights, _, _, _ = np.linalg.lstsq(X, y, rcond=None)
|
|
35
|
+
return weights
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class GradientDescentStrategy(OptimizationStrategy):
|
|
39
|
+
"""
|
|
40
|
+
Batch gradient descent: w <- w - lr * (1/n) * X^T (Xw - y)
|
|
41
|
+
|
|
42
|
+
Numerical stability:
|
|
43
|
+
- np.errstate(all='ignore') suppresses NumPy overflow/invalid warnings
|
|
44
|
+
that would otherwise fire during the matmul before any check runs.
|
|
45
|
+
- np.isfinite guards catch divergence and stop the loop early.
|
|
46
|
+
|
|
47
|
+
Tip: normalize X and y with StandardScaler before training on
|
|
48
|
+
large or unscaled datasets (see example.py section 2b).
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, learning_rate: float = 0.01, epochs: int = 1000) -> None:
|
|
52
|
+
self.learning_rate: float = learning_rate
|
|
53
|
+
self.epochs: int = epochs
|
|
54
|
+
|
|
55
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
|
|
56
|
+
w: np.ndarray = np.zeros(X.shape[1])
|
|
57
|
+
n: int = len(y)
|
|
58
|
+
|
|
59
|
+
with np.errstate(all='ignore'):
|
|
60
|
+
for _ in range(self.epochs):
|
|
61
|
+
residual: np.ndarray = X @ w - y
|
|
62
|
+
if not np.isfinite(residual).all():
|
|
63
|
+
break
|
|
64
|
+
grad: np.ndarray = X.T @ residual / n
|
|
65
|
+
if not np.isfinite(grad).all():
|
|
66
|
+
break
|
|
67
|
+
w -= self.learning_rate * grad
|
|
68
|
+
|
|
69
|
+
return w
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# LinearRegression model
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
class LinearRegression(BaseModel):
|
|
77
|
+
"""
|
|
78
|
+
Linear Regression model: y = X * w + b
|
|
79
|
+
|
|
80
|
+
The optimization algorithm is selected by name and encapsulated in an
|
|
81
|
+
OptimizationStrategy object — callers never interact with the strategy
|
|
82
|
+
directly (Strategy Pattern).
|
|
83
|
+
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
strategy : 'normal' (default) or 'gradient_descent'
|
|
87
|
+
learning_rate : learning rate — gradient_descent only
|
|
88
|
+
epochs : number of iterations — gradient_descent only
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def __init__(
|
|
92
|
+
self,
|
|
93
|
+
strategy: str = "normal",
|
|
94
|
+
learning_rate: float = 0.01,
|
|
95
|
+
epochs: int = 1000,
|
|
96
|
+
) -> None:
|
|
97
|
+
if strategy == "normal":
|
|
98
|
+
self._strategy: OptimizationStrategy = NormalEquationStrategy()
|
|
99
|
+
elif strategy == "gradient_descent":
|
|
100
|
+
self._strategy = GradientDescentStrategy(
|
|
101
|
+
learning_rate=learning_rate, epochs=epochs
|
|
102
|
+
)
|
|
103
|
+
else:
|
|
104
|
+
raise ValueError(
|
|
105
|
+
f"Unknown strategy '{strategy}'. Use 'normal' or 'gradient_descent'."
|
|
106
|
+
)
|
|
107
|
+
self._weights: np.ndarray | None = None
|
|
108
|
+
self._n_features: int = 0
|
|
109
|
+
|
|
110
|
+
# ------------------------------------------------------------------
|
|
111
|
+
# Training
|
|
112
|
+
# ------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> "LinearRegression":
|
|
115
|
+
"""Fit the model to training data X and target vector y."""
|
|
116
|
+
X = np.array(X, dtype=float)
|
|
117
|
+
y = np.array(y, dtype=float)
|
|
118
|
+
|
|
119
|
+
if X.ndim == 1:
|
|
120
|
+
X = X.reshape(-1, 1) # (n,) -> (n, 1) single-feature convenience
|
|
121
|
+
if X.ndim != 2:
|
|
122
|
+
raise ValueError(
|
|
123
|
+
f"X must be 2-D (n_samples, n_features), got shape {X.shape}."
|
|
124
|
+
)
|
|
125
|
+
if len(X) == 0:
|
|
126
|
+
raise ValueError("Training data X must not be empty.")
|
|
127
|
+
if len(y) == 0:
|
|
128
|
+
raise ValueError("Target vector y must not be empty.")
|
|
129
|
+
if len(X) != len(y):
|
|
130
|
+
raise ValueError(
|
|
131
|
+
f"X and y must have the same number of samples: {len(X)} != {len(y)}."
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
self._n_features = X.shape[1]
|
|
135
|
+
X_b: np.ndarray = np.c_[np.ones(len(X)), X]
|
|
136
|
+
self._weights = self._strategy.fit(X_b, y)
|
|
137
|
+
return self
|
|
138
|
+
|
|
139
|
+
# ------------------------------------------------------------------
|
|
140
|
+
# Prediction
|
|
141
|
+
# ------------------------------------------------------------------
|
|
142
|
+
|
|
143
|
+
def predict(self, X: np.ndarray) -> list[float]:
|
|
144
|
+
"""
|
|
145
|
+
Return predictions as a plain Python list.
|
|
146
|
+
|
|
147
|
+
Returns list (not np.ndarray) to stay consistent with
|
|
148
|
+
BaseModel.predict() — satisfying the Liskov Substitution Principle.
|
|
149
|
+
Use predict_array() when NumPy output is needed.
|
|
150
|
+
"""
|
|
151
|
+
if self._weights is None:
|
|
152
|
+
raise RuntimeError("Call fit() before predict().")
|
|
153
|
+
X = np.array(X, dtype=float)
|
|
154
|
+
if X.ndim == 1:
|
|
155
|
+
X = X.reshape(-1, 1)
|
|
156
|
+
if X.ndim != 2:
|
|
157
|
+
raise ValueError(
|
|
158
|
+
f"X must be 2-D (n_samples, n_features), got shape {X.shape}."
|
|
159
|
+
)
|
|
160
|
+
if len(X) == 0:
|
|
161
|
+
return []
|
|
162
|
+
if X.shape[1] != self._n_features:
|
|
163
|
+
raise ValueError(
|
|
164
|
+
f"Feature count mismatch: model was trained with {self._n_features} "
|
|
165
|
+
f"features, but X has {X.shape[1]}."
|
|
166
|
+
)
|
|
167
|
+
X_b: np.ndarray = np.c_[np.ones(len(X)), X]
|
|
168
|
+
with np.errstate(all='ignore'):
|
|
169
|
+
return (X_b @ self._weights).tolist()
|
|
170
|
+
|
|
171
|
+
def predict_array(self, X: np.ndarray) -> np.ndarray:
|
|
172
|
+
"""Return predictions as a NumPy array."""
|
|
173
|
+
if self._weights is None:
|
|
174
|
+
raise RuntimeError("Call fit() before predict().")
|
|
175
|
+
X = np.array(X, dtype=float)
|
|
176
|
+
if X.ndim == 1:
|
|
177
|
+
X = X.reshape(-1, 1)
|
|
178
|
+
if X.ndim != 2:
|
|
179
|
+
raise ValueError(
|
|
180
|
+
f"X must be 2-D (n_samples, n_features), got shape {X.shape}."
|
|
181
|
+
)
|
|
182
|
+
if len(X) == 0:
|
|
183
|
+
return np.array([])
|
|
184
|
+
if X.shape[1] != self._n_features:
|
|
185
|
+
raise ValueError(
|
|
186
|
+
f"Feature count mismatch: model was trained with {self._n_features} "
|
|
187
|
+
f"features, but X has {X.shape[1]}."
|
|
188
|
+
)
|
|
189
|
+
X_b: np.ndarray = np.c_[np.ones(len(X)), X]
|
|
190
|
+
with np.errstate(all='ignore'):
|
|
191
|
+
return X_b @ self._weights
|
|
192
|
+
|
|
193
|
+
# ------------------------------------------------------------------
|
|
194
|
+
# Properties
|
|
195
|
+
# ------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def coef_(self) -> np.ndarray:
|
|
199
|
+
"""Learned feature coefficients (excludes the bias term)."""
|
|
200
|
+
return self._weights[1:]
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def intercept_(self) -> float:
|
|
204
|
+
"""Learned bias (intercept) term."""
|
|
205
|
+
return float(self._weights[0])
|
|
File without changes
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""Unit tests for DistanceMetric subclasses and DistanceMetricFactory."""
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
from coreLearn import DistanceMetric, DistanceMetricFactory
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# ---------------------------------------------------------------------------
|
|
10
|
+
# EuclideanDistance
|
|
11
|
+
# ---------------------------------------------------------------------------
|
|
12
|
+
|
|
13
|
+
def test_euclidean_basic():
|
|
14
|
+
"""3-4-5 üçgeni: sqrt(3²+4²) = 5."""
|
|
15
|
+
metric = DistanceMetricFactory.create("euclidean")
|
|
16
|
+
assert metric.compute([0, 0], [3, 4]) == pytest.approx(5.0)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_euclidean_same_point():
|
|
20
|
+
"""Aynı noktalar arası mesafe sıfır olmalı."""
|
|
21
|
+
metric = DistanceMetricFactory.create("euclidean")
|
|
22
|
+
assert metric.compute([1, 2, 3], [1, 2, 3]) == pytest.approx(0.0)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_euclidean_1d():
|
|
26
|
+
"""Tek boyutlu: |7 - 3| = 4."""
|
|
27
|
+
metric = DistanceMetricFactory.create("euclidean")
|
|
28
|
+
assert metric.compute([3], [7]) == pytest.approx(4.0)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_euclidean_symmetry():
|
|
32
|
+
"""Mesafe simetriktir: d(a,b) == d(b,a)."""
|
|
33
|
+
metric = DistanceMetricFactory.create("euclidean")
|
|
34
|
+
assert metric.compute([1, 2], [5, 6]) == pytest.approx(metric.compute([5, 6], [1, 2]))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_euclidean_callable():
|
|
38
|
+
"""DistanceMetric nesnesi fonksiyon gibi çağrılabilmeli (__call__)."""
|
|
39
|
+
metric = DistanceMetricFactory.create("euclidean")
|
|
40
|
+
assert metric([0, 0], [3, 4]) == pytest.approx(5.0)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# ManhattanDistance
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
def test_manhattan_basic():
|
|
48
|
+
"""|3-0| + |4-0| = 7."""
|
|
49
|
+
metric = DistanceMetricFactory.create("manhattan")
|
|
50
|
+
assert metric.compute([0, 0], [3, 4]) == pytest.approx(7.0)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_manhattan_same_point():
|
|
54
|
+
"""Aynı noktalar arası mesafe sıfır olmalı."""
|
|
55
|
+
metric = DistanceMetricFactory.create("manhattan")
|
|
56
|
+
assert metric.compute([2, 5], [2, 5]) == pytest.approx(0.0)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_manhattan_1d():
|
|
60
|
+
"""Tek boyutlu: |1 - 9| = 8."""
|
|
61
|
+
metric = DistanceMetricFactory.create("manhattan")
|
|
62
|
+
assert metric.compute([1], [9]) == pytest.approx(8.0)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_manhattan_symmetry():
|
|
66
|
+
"""Mesafe simetriktir: d(a,b) == d(b,a)."""
|
|
67
|
+
metric = DistanceMetricFactory.create("manhattan")
|
|
68
|
+
assert metric.compute([0, 1], [4, 6]) == pytest.approx(metric.compute([4, 6], [0, 1]))
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_manhattan_callable():
|
|
72
|
+
"""DistanceMetric nesnesi fonksiyon gibi çağrılabilmeli (__call__)."""
|
|
73
|
+
metric = DistanceMetricFactory.create("manhattan")
|
|
74
|
+
assert metric([0, 0], [3, 4]) == pytest.approx(7.0)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# ---------------------------------------------------------------------------
|
|
78
|
+
# Euclidean vs Manhattan farkı
|
|
79
|
+
# ---------------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
def test_euclidean_less_or_equal_manhattan():
|
|
82
|
+
"""L2 ≤ L1 her zaman doğrudur (norm ilişkisi)."""
|
|
83
|
+
eu = DistanceMetricFactory.create("euclidean")
|
|
84
|
+
ma = DistanceMetricFactory.create("manhattan")
|
|
85
|
+
a, b = [1, 3, 5], [4, 7, 2]
|
|
86
|
+
assert eu.compute(a, b) <= ma.compute(a, b) + 1e-9
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ---------------------------------------------------------------------------
|
|
90
|
+
# DistanceMetricFactory — create()
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
def test_factory_create_euclidean_type():
|
|
94
|
+
"""Factory 'euclidean' için doğru tip döndürmeli."""
|
|
95
|
+
from coreLearn.distances import EuclideanDistance
|
|
96
|
+
metric = DistanceMetricFactory.create("euclidean")
|
|
97
|
+
assert isinstance(metric, EuclideanDistance)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_factory_create_manhattan_type():
|
|
101
|
+
"""Factory 'manhattan' için doğru tip döndürmeli."""
|
|
102
|
+
from coreLearn.distances import ManhattanDistance
|
|
103
|
+
metric = DistanceMetricFactory.create("manhattan")
|
|
104
|
+
assert isinstance(metric, ManhattanDistance)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_factory_create_unknown_raises():
|
|
108
|
+
"""Bilinmeyen metrik adı ValueError fırlatmalı."""
|
|
109
|
+
with pytest.raises(ValueError, match="Unknown"):
|
|
110
|
+
DistanceMetricFactory.create("chebyshev")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_factory_create_returns_new_instance():
|
|
114
|
+
"""Her create() çağrısı bağımsız bir nesne döndürmeli."""
|
|
115
|
+
m1 = DistanceMetricFactory.create("euclidean")
|
|
116
|
+
m2 = DistanceMetricFactory.create("euclidean")
|
|
117
|
+
assert m1 is not m2
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
# DistanceMetricFactory — available()
|
|
122
|
+
# ---------------------------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
def test_factory_available_contains_defaults():
|
|
125
|
+
"""Varsayılan metrikler listelenebilmeli."""
|
|
126
|
+
names = DistanceMetricFactory.available()
|
|
127
|
+
assert "euclidean" in names
|
|
128
|
+
assert "manhattan" in names
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def test_factory_available_returns_list():
|
|
132
|
+
"""available() bir liste döndürmeli."""
|
|
133
|
+
assert isinstance(DistanceMetricFactory.available(), list)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# ---------------------------------------------------------------------------
|
|
137
|
+
# DistanceMetricFactory — register()
|
|
138
|
+
# ---------------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
def test_factory_register_and_use():
|
|
141
|
+
"""Yeni metrik kaydedilip kullanılabilmeli."""
|
|
142
|
+
|
|
143
|
+
class ChebyshevDistance(DistanceMetric):
|
|
144
|
+
def compute(self, a, b):
|
|
145
|
+
return float(max(abs(x - y) for x, y in zip(a, b)))
|
|
146
|
+
|
|
147
|
+
DistanceMetricFactory.register("chebyshev_tmp", ChebyshevDistance)
|
|
148
|
+
metric = DistanceMetricFactory.create("chebyshev_tmp")
|
|
149
|
+
# [1,2,3] ile [4,6,5] → max(3,4,2) = 4
|
|
150
|
+
assert metric.compute([1, 2, 3], [4, 6, 5]) == pytest.approx(4.0)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_factory_register_appears_in_available():
|
|
154
|
+
"""Kaydedilen metrik available() listesinde görünmeli."""
|
|
155
|
+
|
|
156
|
+
class DummyDistance(DistanceMetric):
|
|
157
|
+
def compute(self, a, b):
|
|
158
|
+
return 0.0
|
|
159
|
+
|
|
160
|
+
DistanceMetricFactory.register("dummy_tmp", DummyDistance)
|
|
161
|
+
assert "dummy_tmp" in DistanceMetricFactory.available()
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def test_factory_register_invalid_class_raises():
|
|
165
|
+
"""DistanceMetric'ten türemeyen sınıf kaydı TypeError fırlatmalı."""
|
|
166
|
+
|
|
167
|
+
class NotAMetric:
|
|
168
|
+
pass
|
|
169
|
+
|
|
170
|
+
with pytest.raises(TypeError):
|
|
171
|
+
DistanceMetricFactory.register("bad", NotAMetric)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def test_factory_register_non_class_raises():
|
|
175
|
+
"""Sınıf olmayan bir nesne kaydı TypeError fırlatmalı."""
|
|
176
|
+
with pytest.raises(TypeError):
|
|
177
|
+
DistanceMetricFactory.register("bad_fn", lambda a, b: 0.0)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# ---------------------------------------------------------------------------
|
|
181
|
+
# DistanceMetric — soyut sınıf doğrudan örneklenemez
|
|
182
|
+
# ---------------------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
def test_distance_metric_is_abstract():
|
|
185
|
+
"""DistanceMetric doğrudan örneklenemez (abstract)."""
|
|
186
|
+
with pytest.raises(TypeError):
|
|
187
|
+
DistanceMetric() # type: ignore[abstract]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Unit tests for evaluator metric functions and Evaluator class."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from coreLearn import (
|
|
5
|
+
Evaluator,
|
|
6
|
+
accuracy, mae, mse, rmse,
|
|
7
|
+
precision, recall, f1_score,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# ---------------------------------------------------------------------------
|
|
12
|
+
# Regression metrics
|
|
13
|
+
# ---------------------------------------------------------------------------
|
|
14
|
+
|
|
15
|
+
def test_mae():
|
|
16
|
+
assert mae([1.0, 2.0, 3.0], [1.5, 2.5, 3.5]) == pytest.approx(0.5)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_mse():
|
|
20
|
+
assert mse([1.0, 2.0, 3.0], [2.0, 3.0, 4.0]) == pytest.approx(1.0)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_rmse():
|
|
24
|
+
assert rmse([1.0, 2.0, 3.0], [2.0, 3.0, 4.0]) == pytest.approx(1.0)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_mae_perfect():
|
|
28
|
+
assert mae([1.0, 2.0, 3.0], [1.0, 2.0, 3.0]) == pytest.approx(0.0)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
# Classification metrics
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
def test_accuracy():
|
|
36
|
+
assert accuracy([0, 1, 1, 0, 1], [0, 1, 0, 0, 1]) == pytest.approx(0.8)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_accuracy_perfect():
|
|
40
|
+
assert accuracy([1, 0, 1], [1, 0, 1]) == pytest.approx(1.0)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_precision_perfect():
|
|
44
|
+
assert precision([0, 0, 1, 1], [0, 0, 1, 1]) == pytest.approx(1.0)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_recall_binary():
|
|
48
|
+
assert recall([1, 1, 0], [1, 0, 0]) == pytest.approx(0.75)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_f1_perfect():
|
|
52
|
+
assert f1_score([0, 1, 0, 1], [0, 1, 0, 1]) == pytest.approx(1.0)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# Input validation
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
def test_empty_raises():
|
|
60
|
+
with pytest.raises(ValueError):
|
|
61
|
+
accuracy([], [])
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_length_mismatch_raises():
|
|
65
|
+
with pytest.raises(ValueError):
|
|
66
|
+
mae([1.0, 2.0], [1.0])
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
# Evaluator class
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
def test_evaluate_regression_keys():
|
|
74
|
+
result = Evaluator.evaluate_regression([1.0, 2.0], [1.0, 2.0])
|
|
75
|
+
assert set(result.keys()) == {"mae", "mse", "rmse"}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_evaluate_regression_perfect():
|
|
79
|
+
result = Evaluator.evaluate_regression([1.0, 2.0, 3.0], [1.0, 2.0, 3.0])
|
|
80
|
+
assert result["mae"] == pytest.approx(0.0)
|
|
81
|
+
assert result["mse"] == pytest.approx(0.0)
|
|
82
|
+
assert result["rmse"] == pytest.approx(0.0)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_evaluate_classification_keys():
|
|
86
|
+
result = Evaluator.evaluate_classification([0, 1, 0, 1], [0, 1, 0, 1])
|
|
87
|
+
assert set(result.keys()) == {"accuracy", "precision", "recall", "f1"}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def test_evaluate_classification_perfect():
|
|
91
|
+
result = Evaluator.evaluate_classification([0, 1, 0, 1], [0, 1, 0, 1])
|
|
92
|
+
assert result["accuracy"] == pytest.approx(1.0)
|
|
93
|
+
assert result["f1"] == pytest.approx(1.0)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_register_regression():
|
|
97
|
+
Evaluator.register("max_err", lambda t, p: max(abs(a - b) for a, b in zip(t, p)), kind="regression")
|
|
98
|
+
result = Evaluator.evaluate_regression([1.0, 2.0, 3.0], [1.5, 2.0, 3.5])
|
|
99
|
+
assert result["max_err"] == pytest.approx(0.5)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_register_unknown_kind_raises():
|
|
103
|
+
with pytest.raises(ValueError):
|
|
104
|
+
Evaluator.register("bad", lambda t, p: 0.0, kind="unknown")
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Unit tests for KNNClassifier."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from coreLearn import KNNClassifier
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_basic_classification():
|
|
8
|
+
X_train = [[1, 1], [2, 2], [3, 3], [7, 7], [8, 8], [9, 9]]
|
|
9
|
+
y_train = [0, 0, 0, 1, 1, 1]
|
|
10
|
+
model = KNNClassifier(k=3)
|
|
11
|
+
model.fit(X_train, y_train)
|
|
12
|
+
assert model.predict([[2, 2]]) == [0]
|
|
13
|
+
assert model.predict([[8, 8]]) == [1]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_manhattan_distance():
|
|
17
|
+
X_train = [[0, 0], [1, 0], [0, 1], [5, 5], [6, 5], [5, 6]]
|
|
18
|
+
y_train = ["A", "A", "A", "B", "B", "B"]
|
|
19
|
+
model = KNNClassifier(k=3, distance="manhattan")
|
|
20
|
+
model.fit(X_train, y_train)
|
|
21
|
+
preds = model.predict([[0.5, 0.5], [5.5, 5.5]])
|
|
22
|
+
assert preds == ["A", "B"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_parallel_predict():
|
|
26
|
+
X_train = [[i] for i in range(20)]
|
|
27
|
+
y_train = [0 if i < 10 else 1 for i in range(20)]
|
|
28
|
+
model = KNNClassifier(k=3, n_jobs=2)
|
|
29
|
+
model.fit(X_train, y_train)
|
|
30
|
+
preds = model.predict([[4], [15]])
|
|
31
|
+
assert preds == [0, 1]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_predict_before_fit_raises():
|
|
35
|
+
with pytest.raises(RuntimeError):
|
|
36
|
+
KNNClassifier().predict([[1, 2]])
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_unknown_distance_raises():
|
|
40
|
+
with pytest.raises(ValueError):
|
|
41
|
+
KNNClassifier(distance="cosine")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_fit_predict_template_method():
|
|
45
|
+
model = KNNClassifier(k=1)
|
|
46
|
+
preds = model.fit_predict([[0], [10]], [0, 1], [[1], [9]])
|
|
47
|
+
assert preds == [0, 1]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
# Parametre doğrulama — fit()
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
def test_k_greater_than_n_samples_raises():
|
|
55
|
+
"""k, eğitim örnek sayısından büyük olamaz."""
|
|
56
|
+
model = KNNClassifier(k=10)
|
|
57
|
+
with pytest.raises(ValueError, match="k"):
|
|
58
|
+
model.fit([[1], [2], [3]], [0, 1, 0])
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_k_zero_raises():
|
|
62
|
+
"""k sıfır veya negatif olamaz."""
|
|
63
|
+
with pytest.raises(ValueError):
|
|
64
|
+
KNNClassifier(k=0)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_k_negative_raises():
|
|
68
|
+
"""k negatif olamaz."""
|
|
69
|
+
with pytest.raises(ValueError):
|
|
70
|
+
KNNClassifier(k=-1)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_n_jobs_zero_raises():
|
|
74
|
+
"""n_jobs sıfır olamaz."""
|
|
75
|
+
with pytest.raises(ValueError):
|
|
76
|
+
KNNClassifier(n_jobs=0)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_n_jobs_negative_raises():
|
|
80
|
+
"""n_jobs negatif olamaz."""
|
|
81
|
+
with pytest.raises(ValueError):
|
|
82
|
+
KNNClassifier(n_jobs=-2)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
# Parametre doğrulama — predict()
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
def test_feature_mismatch_raises():
|
|
90
|
+
"""Tahmin sırasında özellik sayısı eğitimle uyuşmalı."""
|
|
91
|
+
model = KNNClassifier(k=1)
|
|
92
|
+
model.fit([[1, 2], [3, 4]], [0, 1])
|
|
93
|
+
with pytest.raises(ValueError, match="[Ff]eature"):
|
|
94
|
+
model.predict([[1]]) # 1 özellik; model 2 ile eğitildi
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_predict_empty_returns_empty():
|
|
98
|
+
"""Boş X verildiğinde predict boş liste döndürür."""
|
|
99
|
+
model = KNNClassifier(k=1)
|
|
100
|
+
model.fit([[1], [2]], [0, 1])
|
|
101
|
+
assert model.predict([]) == []
|