coreLearn 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. corelearn-0.1.3/PKG-INFO +11 -0
  2. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn/__init__.py +1 -1
  3. corelearn-0.1.3/coreLearn/tests/__init__.py +0 -0
  4. corelearn-0.1.3/coreLearn.egg-info/PKG-INFO +11 -0
  5. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn.egg-info/SOURCES.txt +1 -1
  6. {corelearn-0.1.2 → corelearn-0.1.3}/pyproject.toml +1 -1
  7. corelearn-0.1.2/PKG-INFO +0 -471
  8. corelearn-0.1.2/README.md +0 -459
  9. corelearn-0.1.2/coreLearn.egg-info/PKG-INFO +0 -471
  10. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn/base.py +0 -0
  11. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn/distances.py +0 -0
  12. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn/evaluator.py +0 -0
  13. /corelearn-0.1.2/coreLearn/tests/__init__.py → /corelearn-0.1.3/coreLearn/examples/ewfwe.py +0 -0
  14. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn/knn.py +0 -0
  15. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn/linear_regression.py +0 -0
  16. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn/tests/test_distances.py +0 -0
  17. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn/tests/test_evaluator.py +0 -0
  18. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn/tests/test_knn.py +0 -0
  19. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn/tests/test_linear_regression.py +0 -0
  20. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn.egg-info/dependency_links.txt +0 -0
  21. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn.egg-info/requires.txt +0 -0
  22. {corelearn-0.1.2 → corelearn-0.1.3}/coreLearn.egg-info/top_level.txt +0 -0
  23. {corelearn-0.1.2 → corelearn-0.1.3}/setup.cfg +0 -0
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: coreLearn
3
+ Version: 0.1.3
4
+ Summary: Basic ML algorithms library built from scratch (KNN + Linear Regression)
5
+ Requires-Python: >=3.9
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: numpy>=1.21
8
+ Provides-Extra: dev
9
+ Requires-Dist: pytest>=7.0; extra == "dev"
10
+ Requires-Dist: scikit-learn; extra == "dev"
11
+ Requires-Dist: jupyter; extra == "dev"
@@ -5,7 +5,7 @@ from .linear_regression import LinearRegression
5
5
  from .evaluator import Evaluator, accuracy, mae, mse, rmse, precision, recall, f1_score
6
6
  from .distances import DistanceMetric, DistanceMetricFactory
7
7
 
8
- __version__ = "0.1.0"
8
+ __version__ = "0.1.3"
9
9
 
10
10
  __all__ = [
11
11
  "KNNClassifier",
File without changes
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: coreLearn
3
+ Version: 0.1.3
4
+ Summary: Basic ML algorithms library built from scratch (KNN + Linear Regression)
5
+ Requires-Python: >=3.9
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: numpy>=1.21
8
+ Provides-Extra: dev
9
+ Requires-Dist: pytest>=7.0; extra == "dev"
10
+ Requires-Dist: scikit-learn; extra == "dev"
11
+ Requires-Dist: jupyter; extra == "dev"
@@ -1,4 +1,3 @@
1
- README.md
2
1
  pyproject.toml
3
2
  coreLearn/__init__.py
4
3
  coreLearn/base.py
@@ -11,6 +10,7 @@ coreLearn.egg-info/SOURCES.txt
11
10
  coreLearn.egg-info/dependency_links.txt
12
11
  coreLearn.egg-info/requires.txt
13
12
  coreLearn.egg-info/top_level.txt
13
+ coreLearn/examples/ewfwe.py
14
14
  coreLearn/tests/__init__.py
15
15
  coreLearn/tests/test_distances.py
16
16
  coreLearn/tests/test_evaluator.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "coreLearn"
7
- version = "0.1.2"
7
+ version = "0.1.3"
8
8
  description = "Basic ML algorithms library built from scratch (KNN + Linear Regression)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
corelearn-0.1.2/PKG-INFO DELETED
@@ -1,471 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: coreLearn
3
- Version: 0.1.2
4
- Summary: Basic ML algorithms library built from scratch (KNN + Linear Regression)
5
- Requires-Python: >=3.9
6
- Description-Content-Type: text/markdown
7
- Requires-Dist: numpy>=1.21
8
- Provides-Extra: dev
9
- Requires-Dist: pytest>=7.0; extra == "dev"
10
- Requires-Dist: scikit-learn; extra == "dev"
11
- Requires-Dist: jupyter; extra == "dev"
12
-
13
- # CoreLearn
14
-
15
- A lightweight Python machine learning library built from scratch using only **NumPy**.
16
- Implements KNN classification and Linear Regression.
17
-
18
- ---
19
-
20
- ## Installation
21
-
22
- ```bash
23
- # Download the project using pip:
24
- pip install coreLearn
25
-
26
- ```
27
-
28
- After installation, import from anywhere:
29
-
30
- ```python
31
- from coreLearn import KNNClassifier, LinearRegression, Evaluator
32
- ```
33
-
34
- ---
35
-
36
- ## Quick Start
37
-
38
- ```python
39
- from coreLearn import KNNClassifier, LinearRegression, Evaluator, accuracy, mae
40
-
41
- # --- KNN Classification ---
42
- knn = KNNClassifier(k=5, distance="euclidean", n_jobs=2)
43
- knn.fit(X_train, y_train)
44
- predictions = knn.predict(X_test)
45
- print(accuracy(y_test, predictions))
46
-
47
- # --- Linear Regression ---
48
- lr = LinearRegression(strategy="normal")
49
- lr.fit(X_train, y_train)
50
- predictions = lr.predict(X_test)
51
- print(mae(y_test, predictions))
52
-
53
- # --- Evaluator ---
54
- print(Evaluator.evaluate_regression(y_test, predictions))
55
- # {'mae': ..., 'mse': ..., 'rmse': ...}
56
-
57
- print(Evaluator.evaluate_classification(y_test, knn_preds))
58
- # {'accuracy': ..., 'precision': ..., 'recall': ..., 'f1': ...}
59
- ```
60
-
61
- ---
62
-
63
- ## Package Structure
64
-
65
- ```
66
- coreLearn/
67
- ├── __init__.py ← Public API
68
- ├── base.py ← Abstract base class — Template Method Pattern
69
- ├── distances.py ← Distance metrics — Factory Pattern
70
- ├── knn.py ← KNN Classifier — Recursion + Concurrency + OOP
71
- ├── linear_regression.py ← Linear Regression — Strategy Pattern + OOP
72
- ├── evaluator.py ← Metric engine — Functional Programming
73
- └── tests/
74
- ├── test_knn.py
75
- ├── test_linear_regression.py
76
- ├── test_distances.py
77
- └── test_evaluator.py
78
- ```
79
-
80
- ---
81
-
82
- ## Running Tests
83
-
84
- ```bash
85
- cd coreLearn/
86
- pytest coreLearn/tests/ -v
87
- ```
88
-
89
- ---
90
-
91
- ## Learning Outcomes
92
-
93
- ### 1 — Object-Oriented Programming (OOP)
94
-
95
- **File:** `base.py`, `knn.py`, `linear_regression.py`, `distances.py`
96
-
97
- #### Abstract Base Class & Inheritance
98
-
99
- `BaseModel` is an abstract class that defines the contract every model must follow.
100
- `KNNClassifier` and `LinearRegression` both inherit from it:
101
-
102
- ```python
103
- # base.py
104
- class BaseModel(ABC):
105
- @abstractmethod
106
- def fit(self, X, y) -> "BaseModel": ...
107
-
108
- @abstractmethod
109
- def predict(self, X) -> list: ...
110
-
111
- # knn.py
112
- class KNNClassifier(BaseModel): # ← inheritance
113
- def fit(self, X, y): ...
114
- def predict(self, X): ...
115
-
116
- # linear_regression.py
117
- class LinearRegression(BaseModel): # ← inheritance
118
- def fit(self, X, y): ...
119
- def predict(self, X): ...
120
- ```
121
-
122
- #### Polymorphism
123
-
124
- Both models share the same interface — they can be used interchangeably:
125
-
126
- ```python
127
- for model in [KNNClassifier(k=3), LinearRegression()]:
128
- model.fit(X_train, y_train) # same call, different behaviour
129
- model.predict(X_test) # same call, different behaviour
130
- ```
131
-
132
- #### Encapsulation
133
-
134
- Internal state is hidden with `_` prefixes. Users interact only through the public API:
135
-
136
- ```python
137
- # knn.py
138
- self._metric = DistanceMetricFactory.create(distance) # private
139
- self._tree = None # private
140
-
141
- # linear_regression.py — controlled read access via properties
142
- @property
143
- def coef_(self) -> np.ndarray:
144
- return self._weights[1:]
145
-
146
- @property
147
- def intercept_(self) -> float:
148
- return float(self._weights[0])
149
- ```
150
-
151
- `OptimizationStrategy`, `NormalEquationStrategy`, and `GradientDescentStrategy` inside
152
- `linear_regression.py` form an additional hierarchy demonstrating inheritance within the library.
153
-
154
- ---
155
-
156
- ### 2 — Functional Programming
157
-
158
- **File:** `evaluator.py`
159
-
160
- #### Functions as First-Class Objects
161
-
162
- Metric functions are stored in dictionaries as values and called dynamically:
163
-
164
- ```python
165
- # evaluator.py
166
- _regression_metrics: dict[str, callable] = {
167
- "mae": mae,
168
- "mse": mse,
169
- "rmse": rmse,
170
- }
171
-
172
- @classmethod
173
- def evaluate_regression(cls, y_true, y_pred) -> dict:
174
- # applies every registered function — no if/elif chain
175
- return {name: fn(y_true, y_pred) for name, fn in cls._regression_metrics.items()}
176
- ```
177
-
178
- #### Higher-Order Function — `register()`
179
-
180
- `Evaluator.register()` accepts any callable and plugs it in at runtime.
181
- This is the classic higher-order function pattern: a function (or method) that takes another function as an argument.
182
-
183
- ```python
184
- # Add a custom metric without modifying the Evaluator class
185
- Evaluator.register(
186
- "max_error",
187
- lambda y_true, y_pred: max(abs(a - b) for a, b in zip(y_true, y_pred)),
188
- kind="regression",
189
- )
190
- result = Evaluator.evaluate_regression(y_test, y_pred)
191
- print(result["max_error"]) # available immediately
192
- ```
193
-
194
- #### Pure Functions
195
-
196
- `mae`, `mse`, `rmse`, `accuracy`, `precision`, `recall`, `f1_score` are all pure functions:
197
- - No side effects
198
- - No mutation of inputs
199
- - Same inputs always produce the same output
200
-
201
- ```python
202
- from coreLearn import mae, accuracy
203
- mae([1.0, 2.0, 3.0], [1.5, 2.5, 3.5]) # → 0.5 (always)
204
- accuracy([0, 1, 1], [0, 1, 0]) # → 0.666 (always)
205
- ```
206
-
207
- ---
208
-
209
- ### 3 — Concurrency
210
-
211
- **File:** `knn.py` — `KNNClassifier.predict()`
212
-
213
- `KNNClassifier` uses `ProcessPoolExecutor` to classify test samples in parallel across
214
- multiple CPU processes. Unlike threads, each worker runs in its own process with its
215
- own GIL — enabling true CPU-bound parallelism.
216
-
217
- ```python
218
- # knn.py
219
- def predict(self, X) -> list:
220
- ...
221
- if self.n_jobs == 1:
222
- # sequential — no overhead for small datasets
223
- return [self._predict_one(x) for x in samples]
224
-
225
- # parallel — distribute samples across n_jobs worker processes
226
- args = [(self._tree, x, self.k, self._metric) for x in samples]
227
- with ProcessPoolExecutor(max_workers=self.n_jobs) as executor:
228
- return list(executor.map(_predict_worker, args))
229
- ```
230
-
231
- ```python
232
- # n_jobs=1 → sequential (default, safe for notebooks)
233
- knn = KNNClassifier(k=5, n_jobs=1)
234
-
235
- # n_jobs=4 → 4 parallel worker processes
236
- knn = KNNClassifier(k=5, n_jobs=4)
237
- knn.fit(X_train, y_train)
238
- preds = knn.predict(X_test)
239
- ```
240
-
241
- > **Note:** `ProcessPoolExecutor` requires the `if __name__ == "__main__":` guard on
242
- > Windows/macOS when used in scripts. The `n_jobs=1` default is safe everywhere.
243
-
244
- ---
245
-
246
- ### 4 — Recursion
247
-
248
- **File:** `knn.py` — `KDTree`
249
-
250
- The KD-Tree data structure is built and searched using **mutual recursion**.
251
- Both `_build` and `_search` call themselves with a strictly smaller subproblem each time.
252
-
253
- #### `_build` — Recursive Tree Construction
254
-
255
- **Base case:** empty data → return `None`.
256
- **Recursive case:** split on the median, call `_build` on each half with `depth + 1`.
257
-
258
- ```python
259
- # knn.py
260
- def _build(self, data: list, depth: int):
261
- if not data: # ← base case
262
- return None
263
- axis = depth % len(data[0][0])
264
- data.sort(key=lambda item: item[0][axis])
265
- mid = len(data) // 2
266
- return KDNode(
267
- point = data[mid][0],
268
- label = data[mid][1],
269
- left = self._build(data[:mid], depth + 1), # ← recursion
270
- right = self._build(data[mid + 1:], depth + 1), # ← recursion
271
- )
272
- ```
273
-
274
- #### `_search` — Recursive Nearest-Neighbour Search
275
-
276
- **Base case:** node is `None` → return.
277
- **Recursive case:** visit the near branch, then prune and optionally visit the far branch.
278
-
279
- ```python
280
- # knn.py
281
- def _search(self, node, target, k, metric, depth, best):
282
- if node is None: # ← base case
283
- return
284
- dist = metric(target, node.point)
285
- # update best list ...
286
- self._search(near, target, k, metric, depth + 1, best) # ← recursion
287
- if len(best) < k or abs(diff) < best[-1][0]:
288
- self._search(far, target, k, metric, depth + 1, best) # ← recursion (pruned)
289
- ```
290
-
291
- **Pruning:** the `abs(diff) < best[-1][0]` condition skips the far branch when it cannot
292
- contain a closer neighbour — achieving O(log n) average search complexity.
293
-
294
- ---
295
-
296
- ### 5 — SOLID Principles
297
-
298
- **Files:** all modules
299
-
300
- #### S — Single Responsibility
301
-
302
- Every class has exactly one reason to change:
303
-
304
- | Class | Sole Responsibility |
305
- |-------|-------------------|
306
- | `BaseModel` | Define the common model contract |
307
- | `KDTree` | Spatial nearest-neighbour search |
308
- | `KNNClassifier` | KNN classification logic |
309
- | `LinearRegression` | Linear regression logic |
310
- | `NormalEquationStrategy` | Closed-form weight computation |
311
- | `GradientDescentStrategy` | Iterative gradient-based weight computation |
312
- | `DistanceMetricFactory` | Instantiate distance metric objects by name |
313
- | `Evaluator` | Compute and manage evaluation metrics |
314
-
315
- #### O — Open/Closed
316
-
317
- Classes are open for extension, closed for modification.
318
- New metrics and distance functions can be added **without editing any existing class**:
319
-
320
- ```python
321
- # Add a new metric — Evaluator source code untouched
322
- Evaluator.register("r2", lambda t, p: ..., kind="regression")
323
-
324
- # Add a new distance — KNNClassifier source code untouched
325
- DistanceMetricFactory.register("chebyshev", ChebyshevDistance)
326
- knn = KNNClassifier(k=3, distance="chebyshev")
327
- ```
328
-
329
- #### L — Liskov Substitution
330
-
331
- Any `BaseModel` subclass can replace `BaseModel` without breaking callers:
332
-
333
- ```python
334
- def train_and_score(model: BaseModel, X_train, y_train, X_test, y_test):
335
- preds = model.fit_predict(X_train, y_train, X_test)
336
- return accuracy(y_test, preds)
337
-
338
- train_and_score(KNNClassifier(k=3), ...) # works
339
- train_and_score(LinearRegression(), ...) # works
340
- ```
341
-
342
- #### I — Interface Segregation
343
-
344
- `DistanceMetric` exposes only what is needed — a single `compute()` method.
345
- Implementors are not forced to implement anything they do not use:
346
-
347
- ```python
348
- # distances.py
349
- class DistanceMetric(ABC):
350
- @abstractmethod
351
- def compute(self, a: list, b: list) -> float: ...
352
- # nothing else required
353
- ```
354
-
355
- #### D — Dependency Inversion
356
-
357
- `LinearRegression` depends on the **abstraction** `OptimizationStrategy`,
358
- not on any concrete strategy class:
359
-
360
- ```python
361
- # linear_regression.py
362
- self._weights = self._strategy.fit(X_b, y)
363
- # ↑ OptimizationStrategy interface — concrete class unknown here
364
- ```
365
-
366
- ---
367
-
368
- ### 6 — Architectural & Design Patterns
369
-
370
- - **Core layer** (`base.py`, `distances.py`): abstractions and shared contracts
371
- - **Algorithm layer** (`knn.py`, `linear_regression.py`): concrete ML algorithms
372
- - **Evaluation layer** (`evaluator.py`): metric computation
373
- - **Public API** (`__init__.py`): single entry point, re-exports everything
374
-
375
- #### Pattern 1 — Template Method (`base.py`)
376
-
377
- `fit_predict` defines the fixed skeleton (fit → predict).
378
- Subclasses fill in each step without altering the sequence:
379
-
380
- ```python
381
- # base.py
382
- def fit_predict(self, X_train, y_train, X_test) -> list:
383
- self.fit(X_train, y_train) # ← step 1: implemented by subclass
384
- return self.predict(X_test) # ← step 2: implemented by subclass
385
- ```
386
-
387
- Every model gets `fit_predict` for free through inheritance.
388
-
389
- #### Pattern 2 — Strategy (`linear_regression.py`)
390
-
391
- The optimisation algorithm is swapped at construction time.
392
- `LinearRegression.fit()` never knows which concrete strategy it is using:
393
-
394
- ```python
395
- lr_ne = LinearRegression(strategy="normal") # uses NormalEquationStrategy
396
- lr_gd = LinearRegression(strategy="gradient_descent") # uses GradientDescentStrategy
397
-
398
- # Both models have the same interface — caller code is identical
399
- lr_ne.fit(X_train, y_train)
400
- lr_gd.fit(X_train, y_train)
401
- ```
402
-
403
- To add a third optimiser (e.g. Adam), only a new `OptimizationStrategy` subclass is needed.
404
-
405
- #### Pattern 3 — Factory (`distances.py`)
406
-
407
- `DistanceMetricFactory` centralises object creation.
408
- `KNNClassifier` never imports `EuclideanDistance` or `ManhattanDistance` directly:
409
-
410
- ```python
411
- # distances.py
412
- class DistanceMetricFactory:
413
- _registry = {"euclidean": EuclideanDistance, "manhattan": ManhattanDistance}
414
-
415
- @classmethod
416
- def create(cls, name: str) -> DistanceMetric:
417
- return cls._registry[name]() # create and return
418
-
419
- @classmethod
420
- def register(cls, name: str, metric_class: type) -> None:
421
- cls._registry[name] = metric_class # extend without modifying
422
-
423
- # knn.py — only depends on the factory, not the concrete classes
424
- self._metric = DistanceMetricFactory.create(distance)
425
- ```
426
-
427
- ---
428
-
429
- ## API Reference
430
-
431
- ### `KNNClassifier`
432
-
433
- | Parameter | Type | Default | Description |
434
- |-----------|------|---------|-------------|
435
- | `k` | `int` | `5` | Number of neighbours |
436
- | `distance` | `str` | `"euclidean"` | `"euclidean"` or `"manhattan"` (or any registered name) |
437
- | `n_jobs` | `int` | `1` | Worker processes for prediction (`1` = sequential) |
438
-
439
- ### `LinearRegression`
440
-
441
- | Parameter | Type | Default | Description |
442
- |-----------|------|---------|-------------|
443
- | `strategy` | `str` | `"normal"` | `"normal"` (closed-form) or `"gradient_descent"` |
444
- | `learning_rate` | `float` | `0.01` | Learning rate — gradient descent only |
445
- | `epochs` | `int` | `1000` | Iterations — gradient descent only |
446
-
447
- ### `Evaluator`
448
-
449
- | Method | Description |
450
- |--------|-------------|
451
- | `evaluate_regression(y_true, y_pred)` | Returns `{"mae", "mse", "rmse"}` |
452
- | `evaluate_classification(y_true, y_pred)` | Returns `{"accuracy", "precision", "recall", "f1"}` |
453
- | `register(name, fn, kind)` | Add a custom metric at runtime |
454
-
455
- ### Standalone metric functions
456
-
457
- ```python
458
- from coreLearn import accuracy, mae, mse, rmse, precision, recall, f1_score
459
- ```
460
-
461
- ---
462
-
463
- ## Dependencies
464
-
465
- | Package | Purpose |
466
- |---------|---------|
467
- | `numpy` | Matrix operations, vectorised arithmetic |
468
- | `pytest` | Unit testing |
469
- | `scikit-learn` | Datasets and preprocessing in examples only |
470
- | `pandas` | Data loading in examples only |
471
- | `matplotlib` | Visualisation in examples only |