patentml 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patentml-0.1.0/LICENSE +28 -0
- patentml-0.1.0/PKG-INFO +103 -0
- patentml-0.1.0/README.md +75 -0
- patentml-0.1.0/patentml/__init__.py +169 -0
- patentml-0.1.0/patentml/actor_critic.py +304 -0
- patentml-0.1.0/patentml/adaboost.py +168 -0
- patentml-0.1.0/patentml/bayesian_network.py +300 -0
- patentml-0.1.0/patentml/bayesian_optimiser.py +264 -0
- patentml-0.1.0/patentml/collaborative_filter.py +254 -0
- patentml-0.1.0/patentml/dbscan.py +230 -0
- patentml-0.1.0/patentml/decision_tree.py +289 -0
- patentml-0.1.0/patentml/deep_networks.py +426 -0
- patentml-0.1.0/patentml/em_clustering.py +198 -0
- patentml-0.1.0/patentml/ensemble.py +299 -0
- patentml-0.1.0/patentml/feature_engineering.py +334 -0
- patentml-0.1.0/patentml/gaussian_process.py +256 -0
- patentml-0.1.0/patentml/genetic_algorithm.py +271 -0
- patentml-0.1.0/patentml/genetic_programming.py +289 -0
- patentml-0.1.0/patentml/gradient_boosting.py +227 -0
- patentml-0.1.0/patentml/grammar_gp.py +546 -0
- patentml-0.1.0/patentml/hidden_markov.py +312 -0
- patentml-0.1.0/patentml/isolation_forest.py +223 -0
- patentml-0.1.0/patentml/kalman_filter.py +258 -0
- patentml-0.1.0/patentml/knn.py +255 -0
- patentml-0.1.0/patentml/linear_gp.py +405 -0
- patentml-0.1.0/patentml/mean_shift.py +198 -0
- patentml-0.1.0/patentml/mini_neural_net.py +146 -0
- patentml-0.1.0/patentml/multi_armed_bandit.py +255 -0
- patentml-0.1.0/patentml/neuroevolution.py +260 -0
- patentml-0.1.0/patentml/online_classifier.py +141 -0
- patentml-0.1.0/patentml/optimisers.py +208 -0
- patentml-0.1.0/patentml/particle_swarm.py +319 -0
- patentml-0.1.0/patentml/q_learning.py +229 -0
- patentml-0.1.0/patentml/scalable_kmeans.py +161 -0
- patentml-0.1.0/patentml/simulated_annealing.py +205 -0
- patentml-0.1.0/patentml/spectral_clustering.py +234 -0
- patentml-0.1.0/patentml/svd_reducer.py +248 -0
- patentml-0.1.0/patentml/svm_smo.py +181 -0
- patentml-0.1.0/patentml/text_classifier.py +202 -0
- patentml-0.1.0/patentml/vector_quantisation.py +254 -0
- patentml-0.1.0/patentml/word_embeddings.py +325 -0
- patentml-0.1.0/patentml.egg-info/PKG-INFO +103 -0
- patentml-0.1.0/patentml.egg-info/SOURCES.txt +46 -0
- patentml-0.1.0/patentml.egg-info/dependency_links.txt +1 -0
- patentml-0.1.0/patentml.egg-info/top_level.txt +1 -0
- patentml-0.1.0/pyproject.toml +41 -0
- patentml-0.1.0/setup.cfg +4 -0
- patentml-0.1.0/tests/test_smoke.py +136 -0
patentml-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Martin Carr
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
The algorithms implemented in this library are derived from United States
|
|
26
|
+
patents that have expired and entered the public domain. The implementations
|
|
27
|
+
themselves are original code, copyright as above. Source patents are
|
|
28
|
+
documented per-module and in the package docstring.
|
patentml-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: patentml
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: 37 machine learning algorithms reconstructed from expired US patents. Zero dependencies, pure Python stdlib.
|
|
5
|
+
Author-email: Martin Carr <martincarrsy23@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://getoptimal8.com
|
|
8
|
+
Keywords: machine-learning,zero-dependency,stdlib,patents,genetic-algorithm,genetic-programming,neural-network,reinforcement-learning,clustering,kalman-filter,gaussian-process,embedded,micropython
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Intended Audience :: Education
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Requires-Python: >=3.8
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
|
|
29
|
+
# patentml
|
|
30
|
+
|
|
31
|
+
**Machine learning from expired patents. Zero dependencies. Pure Python stdlib.**
|
|
32
|
+
|
|
33
|
+
37 modules, 131 classes and functions — every algorithm reconstructed from a United States patent that has expired into the public domain. The patents that built modern ML were filed by IBM, Bell Labs, Microsoft Research, Lockheed, AT&T and Lucent between 1995 and 2006. They have all expired. This library is what they describe, as clean modern Python, with no imports beyond the standard library.
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
pip install patentml
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
No numpy. No scipy. No compiled extensions. If it runs Python 3.8+, it runs `patentml` — locked-down corporate machines, serverless functions, air-gapped environments, Pyodide in the browser, and (with light trimming) MicroPython boards.
|
|
40
|
+
|
|
41
|
+
## Quick start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from patentml import RandomForest, ScalableKMeans, ThompsonSampling, KalmanFilter
|
|
45
|
+
|
|
46
|
+
# Classification — US6816847 (Microsoft, 1999)
|
|
47
|
+
forest = RandomForest(n_trees=25)
|
|
48
|
+
forest.fit(X_train, y_train)
|
|
49
|
+
labels = [forest.predict(x) for x in X_test]
|
|
50
|
+
|
|
51
|
+
# Clustering — US6012058 (Microsoft, 1998)
|
|
52
|
+
km = ScalableKMeans(k=3)
|
|
53
|
+
km.fit(points)
|
|
54
|
+
|
|
55
|
+
# Bandits — US6981040 (Utopy, 2000) [919 forward citations]
|
|
56
|
+
bandit = ThompsonSampling(n_arms=4)
|
|
57
|
+
arm = bandit.select()
|
|
58
|
+
bandit.update(arm, reward=1.0)
|
|
59
|
+
|
|
60
|
+
# State estimation — US6795794 (Univ. Illinois, 2002)
|
|
61
|
+
kf = KalmanFilter(dim_state=2, dim_obs=1)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## What's inside
|
|
65
|
+
|
|
66
|
+
| Family | Modules |
|
|
67
|
+
|---|---|
|
|
68
|
+
| Evolutionary & global optimisation | genetic algorithm, genetic programming, grammar GP / grammatical evolution, linear GP, particle swarm, differential evolution, CMA-ES, simulated annealing, ant colony, Bayesian optimiser / EDA, neuroevolution |
|
|
69
|
+
| Neural networks | mini neural net (mini-batch backprop), Conv1D, SimpleRNN, GRU cell, SGD/RMSProp/Adam/AdamW optimisers |
|
|
70
|
+
| Classifiers | decision tree, random forest, AdaBoost, SVM (SMO), online Bayes, naive Bayes, KNN (+ BallTree), gradient boosting |
|
|
71
|
+
| Ensembles | voting, stacking, bagging, weighted |
|
|
72
|
+
| Clustering | scalable & hierarchical k-means, DBSCAN, OPTICS, EM / Gaussian mixture, spectral, mean shift |
|
|
73
|
+
| Reinforcement learning | Q-learning, SARSA, function-approximation Q, actor-critic A2C, PPO-lite, ε-greedy / UCB1 / Thompson / EXP3 / LinUCB bandits |
|
|
74
|
+
| Probabilistic | Bayesian network, hidden Markov model, Gaussian process regression & classification, kernel density estimation |
|
|
75
|
+
| Anomaly detection | isolation forest, one-class SVM |
|
|
76
|
+
| NLP | TF-IDF + naive Bayes text pipeline, word2vec SGNS, PMI embeddings |
|
|
77
|
+
| Recommenders | memory-based & Bayesian collaborative filtering |
|
|
78
|
+
| Dimensionality & features | PCA, randomised SVD, vector quantisation (LBG, product quantiser), scalers, mutual-information ranking, forward selection |
|
|
79
|
+
| State estimation | Kalman filter, extended Kalman filter |
|
|
80
|
+
|
|
81
|
+
## Provenance
|
|
82
|
+
|
|
83
|
+
Every module documents its source patent: number, assignee, filing year, and forward-citation count. Highlights:
|
|
84
|
+
|
|
85
|
+
| Patent | Assignee | Algorithm | Citations |
|
|
86
|
+
|---|---|---|---|
|
|
87
|
+
| US5613012 | SmartTouch (1995) | Voting ensemble | 1,182 |
|
|
88
|
+
| US6981040 | Utopy (2000) | Bandit selection | 919 |
|
|
89
|
+
| US6161130 | Microsoft (1998) | Online classifier | 896 |
|
|
90
|
+
| US6556983 | Microsoft (2000) | Word embeddings (PMI + SGNS) | 645 |
|
|
91
|
+
| US6192360 | Microsoft (1998) | TF-IDF + naive Bayes | 364 |
|
|
92
|
+
| US6317707 | AT&T (1998) | Mean shift + KDE | 269 |
|
|
93
|
+
| US6931384 | Microsoft (2001) | Gaussian process regression | 258 |
|
|
94
|
+
|
|
95
|
+
The full list of ~40 source patents is in the package docstring: `python -c "import patentml; print(patentml.__doc__)"`.
|
|
96
|
+
|
|
97
|
+
All source patents are expired. The implementations are original code, MIT licensed.
|
|
98
|
+
|
|
99
|
+
## Why
|
|
100
|
+
|
|
101
|
+
Modern ML stacks are heavy, opaque, and supply-chain risky. Sometimes you need *one* algorithm — a Kalman filter on a microcontroller, a bandit in a serverless function, k-means in a browser — without 200 MB of compiled wheels. And sometimes you want code you can actually read: every module here is a single self-contained file you can audit in one sitting.
|
|
102
|
+
|
|
103
|
+
These algorithms earned their citations the hard way. They still work.
|
patentml-0.1.0/README.md
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# patentml
|
|
2
|
+
|
|
3
|
+
**Machine learning from expired patents. Zero dependencies. Pure Python stdlib.**
|
|
4
|
+
|
|
5
|
+
37 modules, 131 classes and functions — every algorithm reconstructed from a United States patent that has expired into the public domain. The patents that built modern ML were filed by IBM, Bell Labs, Microsoft Research, Lockheed, AT&T and Lucent between 1995 and 2006. They have all expired. This library is what they describe, as clean modern Python, with no imports beyond the standard library.
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
pip install patentml
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
No numpy. No scipy. No compiled extensions. If it runs Python 3.8+, it runs `patentml` — locked-down corporate machines, serverless functions, air-gapped environments, Pyodide in the browser, and (with light trimming) MicroPython boards.
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from patentml import RandomForest, ScalableKMeans, ThompsonSampling, KalmanFilter
|
|
17
|
+
|
|
18
|
+
# Classification — US6816847 (Microsoft, 1999)
|
|
19
|
+
forest = RandomForest(n_trees=25)
|
|
20
|
+
forest.fit(X_train, y_train)
|
|
21
|
+
labels = [forest.predict(x) for x in X_test]
|
|
22
|
+
|
|
23
|
+
# Clustering — US6012058 (Microsoft, 1998)
|
|
24
|
+
km = ScalableKMeans(k=3)
|
|
25
|
+
km.fit(points)
|
|
26
|
+
|
|
27
|
+
# Bandits — US6981040 (Utopy, 2000) [919 forward citations]
|
|
28
|
+
bandit = ThompsonSampling(n_arms=4)
|
|
29
|
+
arm = bandit.select()
|
|
30
|
+
bandit.update(arm, reward=1.0)
|
|
31
|
+
|
|
32
|
+
# State estimation — US6795794 (Univ. Illinois, 2002)
|
|
33
|
+
kf = KalmanFilter(dim_state=2, dim_obs=1)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## What's inside
|
|
37
|
+
|
|
38
|
+
| Family | Modules |
|
|
39
|
+
|---|---|
|
|
40
|
+
| Evolutionary & global optimisation | genetic algorithm, genetic programming, grammar GP / grammatical evolution, linear GP, particle swarm, differential evolution, CMA-ES, simulated annealing, ant colony, Bayesian optimiser / EDA, neuroevolution |
|
|
41
|
+
| Neural networks | mini neural net (mini-batch backprop), Conv1D, SimpleRNN, GRU cell, SGD/RMSProp/Adam/AdamW optimisers |
|
|
42
|
+
| Classifiers | decision tree, random forest, AdaBoost, SVM (SMO), online Bayes, naive Bayes, KNN (+ BallTree), gradient boosting |
|
|
43
|
+
| Ensembles | voting, stacking, bagging, weighted |
|
|
44
|
+
| Clustering | scalable & hierarchical k-means, DBSCAN, OPTICS, EM / Gaussian mixture, spectral, mean shift |
|
|
45
|
+
| Reinforcement learning | Q-learning, SARSA, function-approximation Q, actor-critic A2C, PPO-lite, ε-greedy / UCB1 / Thompson / EXP3 / LinUCB bandits |
|
|
46
|
+
| Probabilistic | Bayesian network, hidden Markov model, Gaussian process regression & classification, kernel density estimation |
|
|
47
|
+
| Anomaly detection | isolation forest, one-class SVM |
|
|
48
|
+
| NLP | TF-IDF + naive Bayes text pipeline, word2vec SGNS, PMI embeddings |
|
|
49
|
+
| Recommenders | memory-based & Bayesian collaborative filtering |
|
|
50
|
+
| Dimensionality & features | PCA, randomised SVD, vector quantisation (LBG, product quantiser), scalers, mutual-information ranking, forward selection |
|
|
51
|
+
| State estimation | Kalman filter, extended Kalman filter |
|
|
52
|
+
|
|
53
|
+
## Provenance
|
|
54
|
+
|
|
55
|
+
Every module documents its source patent: number, assignee, filing year, and forward-citation count. Highlights:
|
|
56
|
+
|
|
57
|
+
| Patent | Assignee | Algorithm | Citations |
|
|
58
|
+
|---|---|---|---|
|
|
59
|
+
| US5613012 | SmartTouch (1995) | Voting ensemble | 1,182 |
|
|
60
|
+
| US6981040 | Utopy (2000) | Bandit selection | 919 |
|
|
61
|
+
| US6161130 | Microsoft (1998) | Online classifier | 896 |
|
|
62
|
+
| US6556983 | Microsoft (2000) | Word embeddings (PMI + SGNS) | 645 |
|
|
63
|
+
| US6192360 | Microsoft (1998) | TF-IDF + naive Bayes | 364 |
|
|
64
|
+
| US6317707 | AT&T (1998) | Mean shift + KDE | 269 |
|
|
65
|
+
| US6931384 | Microsoft (2001) | Gaussian process regression | 258 |
|
|
66
|
+
|
|
67
|
+
The full list of ~40 source patents is in the package docstring: `python -c "import patentml; print(patentml.__doc__)"`.
|
|
68
|
+
|
|
69
|
+
All source patents are expired. The implementations are original code, MIT licensed.
|
|
70
|
+
|
|
71
|
+
## Why
|
|
72
|
+
|
|
73
|
+
Modern ML stacks are heavy, opaque, and supply-chain risky. Sometimes you need *one* algorithm — a Kalman filter on a microcontroller, a bandit in a serverless function, k-means in a browser — without 200 MB of compiled wheels. And sometimes you want code you can actually read: every module here is a single self-contained file you can audit in one sitting.
|
|
74
|
+
|
|
75
|
+
These algorithms earned their citations the hard way. They still work.
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""
|
|
2
|
+
patentml — Machine Learning from Expired Patents
|
|
3
|
+
All algorithms derived from expired US patents. Zero dependencies. Pure Python stdlib.
|
|
4
|
+
|
|
5
|
+
EXPIRED PATENT SOURCES (selected key ones):
|
|
6
|
+
US5970487 Mitsubishi (1997) GA hardware machine
|
|
7
|
+
US6912587 AT&T (2001) Constraint-weighted GA fitness
|
|
8
|
+
US6477444 Fuji Xerox (2000) Genetic programming tree evolution
|
|
9
|
+
US6212427 Kennedy (1999) Particle swarm optimisation
|
|
10
|
+
US6484115 Storn (1999) Differential evolution
|
|
11
|
+
US7047169 Univ. IL (2002) EDA / Bayesian optimisation
|
|
12
|
+
US7219040 GE (2002) Simulated annealing + constraints
|
|
13
|
+
US5835901 Lockheed (1997) Neuroevolution [142 cites]
|
|
14
|
+
US7162461 SAS (2005) Hybrid NN activation search
|
|
15
|
+
US6128606 AT&T (1997) Modular NN + backprop [53 cites]
|
|
16
|
+
US7747070 Microsoft (2005) CNN on GPU [91 cites]
|
|
17
|
+
US6963862 Texas A&M (2001) Recurrent network training
|
|
18
|
+
US7447669 Nanyang (2004) Ant Colony Optimisation
|
|
19
|
+
US6161130 Microsoft (1998) Online classifier [896 cites]
|
|
20
|
+
US6327581 Microsoft (1998) SVM-SMO [173 cites]
|
|
21
|
+
US7421415 Siemens (2005) AdaBoost + feature selection
|
|
22
|
+
US6816847 Microsoft (1999) Decision tree + ensembles [72 cites]
|
|
23
|
+
US5613012 SmartTouch (1995) Voting ensemble [1182 cites]
|
|
24
|
+
US6012058 Microsoft (1998) Scalable K-means
|
|
25
|
+
US6985172 SW Research(2002) Q-learning / TD + Actor-Critic A2C / PPO-lite [145 cites]
|
|
26
|
+
US6981040 Utopy (2000) Bandit selection [919 cites]
|
|
27
|
+
US7076102 Philips (2002) HMM event learning [116 cites]
|
|
28
|
+
US6529891 Microsoft (1998) PCA / Bayesian model selection [124 cites]
|
|
29
|
+
US6807536 Microsoft (2001) Randomised SVD / low-rank approximation [170 cites]
|
|
30
|
+
US6263337 Microsoft (1998) Scalable EM / Gaussian mixture clustering [142 cites]
|
|
31
|
+
US6192360 Microsoft (1998) Text classifier TF-IDF + Naive Bayes [364 cites]
|
|
32
|
+
US6496816 Microsoft (1998) Collaborative filtering + Bayesian mixture [128 cites]
|
|
33
|
+
US6374251 Microsoft (1998) KNN + BallTree scalable search [187 cites]
|
|
34
|
+
US6049797 Lucent (1998) DBSCAN + OPTICS density clustering [154 cites]
|
|
35
|
+
US7031530 Lockheed (2001) Gradient Boosting Machine (GBM) [188 cites]
|
|
36
|
+
US6795794 U.Illinois (2002) Kalman Filter + Extended KF [127 cites]
|
|
37
|
+
US6931384 Microsoft (2001) Gaussian Process regression + classification [258 cites]
|
|
38
|
+
US6216066 GE (1998) Spectral clustering via Laplacian eigenmap [148 cites]
|
|
39
|
+
US6317707 AT&T (1998) Mean Shift + Kernel Density Estimation [269 cites]
|
|
40
|
+
US7225343 Columbia U (2003) Isolation Forest + One-Class SVM [159 cites]
|
|
41
|
+
US6490698 Microsoft (1999) Bayesian Network + Naive Bayes [102 cites]
|
|
42
|
+
US6421467 Texas Tech (1999) Vector Quantisation LBG + Product Quantiser [101 cites]
|
|
43
|
+
US6556983 Microsoft (2000) Word Embeddings PMI + Word2Vec SGNS [645 cites]
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
__version__ = "0.1.0"
|
|
47
|
+
|
|
48
|
+
from .genetic_algorithm import GeneticAlgorithm, Chromosome, make_population
|
|
49
|
+
from .genetic_programming import GeneticProgramming, rand_tree, Var, Const, BinOp, UnaryOp
|
|
50
|
+
from .grammar_gp import (GrammarGP, GrammaticalEvolution, GNode,
|
|
51
|
+
rand_grammar_tree, make_math_grammar, MATH_GRAMMAR,
|
|
52
|
+
subtree_crossover, subtree_mutation, point_mutation)
|
|
53
|
+
from .linear_gp import (LinearGP, Instruction, Program,
|
|
54
|
+
rand_program, effective_program, effective_instructions,
|
|
55
|
+
linear_crossover, micro_mutation, macro_mutation,
|
|
56
|
+
to_expr_string, execute, OPERATIONS, FAST_OPS)
|
|
57
|
+
from .particle_swarm import ParticleSwarmOptimiser, DifferentialEvolution, CMAES
|
|
58
|
+
from .online_classifier import OnlineBayesClassifier
|
|
59
|
+
from .scalable_kmeans import ScalableKMeans, HierarchicalKMeans
|
|
60
|
+
from .mini_neural_net import MiniNeuralNet
|
|
61
|
+
from .deep_networks import Conv1D, SimpleRNN, GRUCell, AntColonyOptimiser
|
|
62
|
+
from .svm_smo import SVM, linear_kernel, rbf_kernel, poly_kernel
|
|
63
|
+
from .adaboost import AdaBoost, DecisionStump
|
|
64
|
+
from .q_learning import QLearner, SARSALearner, FunctionApproxQLearner
|
|
65
|
+
from .multi_armed_bandit import EpsilonGreedy, UCB1, ThompsonSampling, EXP3, LinUCB
|
|
66
|
+
from .bayesian_optimiser import GaussianEDA, BayesianOptimiser
|
|
67
|
+
from .simulated_annealing import SimulatedAnnealing, MultiObjectiveSA
|
|
68
|
+
from .decision_tree import DecisionTree, RandomForest
|
|
69
|
+
from .optimisers import SGD, RMSProp, Adam, AdamW, LRScheduler, numerical_gradient
|
|
70
|
+
from .hidden_markov import HiddenMarkovModel
|
|
71
|
+
from .neuroevolution import EvoNet, WeightEvolution, HybridNNEvolver, ACTIVATIONS
|
|
72
|
+
from .ensemble import VotingEnsemble, StackingEnsemble, BaggingEnsemble, WeightedEnsemble
|
|
73
|
+
from .knn import KNNClassifier, KNNRegressor, FastKNN, BallTree
|
|
74
|
+
from .dbscan import DBSCAN, OPTICS
|
|
75
|
+
from .gradient_boosting import GBMRegressor, GBMClassifier
|
|
76
|
+
from .kalman_filter import KalmanFilter, ExtendedKalmanFilter
|
|
77
|
+
from .gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier, rbf_kernel as gp_rbf, matern32_kernel, periodic_kernel
|
|
78
|
+
from .spectral_clustering import SpectralClustering, rbf_affinity, knn_affinity
|
|
79
|
+
from .mean_shift import MeanShift, KernelDensityEstimator, estimate_bandwidth
|
|
80
|
+
from .isolation_forest import IsolationForest, OneClassSVM
|
|
81
|
+
from .bayesian_network import BayesianNetwork, NaiveBayes, CPT
|
|
82
|
+
from .vector_quantisation import LBG, AdaptiveVQ, ProductQuantiser
|
|
83
|
+
from .actor_critic import A2C, PPOLite, LinearActor, LinearCritic
|
|
84
|
+
from .word_embeddings import Word2Vec, PMIEmbeddings, build_cooccurrence, tokenise
|
|
85
|
+
from .svd_reducer import randomised_svd, pca as randomised_pca, transform as svd_transform
|
|
86
|
+
from .em_clustering import EMClustering, GaussianComponent
|
|
87
|
+
from .text_classifier import TextPipeline, TFIDFVectoriser, NaiveBayesClassifier
|
|
88
|
+
from .collaborative_filter import MemoryCF, BayesianCF
|
|
89
|
+
from .feature_engineering import (
|
|
90
|
+
PCA, StandardScaler, MinMaxScaler,
|
|
91
|
+
variance_threshold, correlation_filter,
|
|
92
|
+
mi_feature_ranking, forward_feature_selection,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
__all__ = [
|
|
96
|
+
# Evolutionary / Global Optimisation
|
|
97
|
+
"GeneticAlgorithm", "Chromosome", "make_population",
|
|
98
|
+
"GeneticProgramming", "rand_tree", "Var", "Const", "BinOp", "UnaryOp",
|
|
99
|
+
# Grammar-Guided GP / Grammatical Evolution
|
|
100
|
+
"GrammarGP", "GrammaticalEvolution", "GNode",
|
|
101
|
+
"rand_grammar_tree", "make_math_grammar", "MATH_GRAMMAR",
|
|
102
|
+
"subtree_crossover", "subtree_mutation", "point_mutation",
|
|
103
|
+
# Linear GP
|
|
104
|
+
"LinearGP", "Instruction", "Program",
|
|
105
|
+
"rand_program", "effective_program", "effective_instructions",
|
|
106
|
+
"linear_crossover", "micro_mutation", "macro_mutation",
|
|
107
|
+
"to_expr_string", "execute", "OPERATIONS", "FAST_OPS",
|
|
108
|
+
"ParticleSwarmOptimiser", "DifferentialEvolution", "CMAES",
|
|
109
|
+
"GaussianEDA", "BayesianOptimiser",
|
|
110
|
+
"SimulatedAnnealing", "MultiObjectiveSA",
|
|
111
|
+
"AntColonyOptimiser",
|
|
112
|
+
# Neural Networks
|
|
113
|
+
"MiniNeuralNet",
|
|
114
|
+
"Conv1D", "SimpleRNN", "GRUCell",
|
|
115
|
+
"EvoNet", "WeightEvolution", "HybridNNEvolver", "ACTIVATIONS",
|
|
116
|
+
# Gradient Optimisers
|
|
117
|
+
"SGD", "RMSProp", "Adam", "AdamW", "LRScheduler", "numerical_gradient",
|
|
118
|
+
# Classifiers
|
|
119
|
+
"OnlineBayesClassifier",
|
|
120
|
+
"SVM", "linear_kernel", "rbf_kernel", "poly_kernel",
|
|
121
|
+
"AdaBoost", "DecisionStump",
|
|
122
|
+
"DecisionTree", "RandomForest",
|
|
123
|
+
# Ensembles
|
|
124
|
+
"VotingEnsemble", "StackingEnsemble", "BaggingEnsemble", "WeightedEnsemble",
|
|
125
|
+
# Clustering
|
|
126
|
+
"ScalableKMeans", "HierarchicalKMeans",
|
|
127
|
+
# Reinforcement Learning / Bandits
|
|
128
|
+
"QLearner", "SARSALearner", "FunctionApproxQLearner",
|
|
129
|
+
"EpsilonGreedy", "UCB1", "ThompsonSampling", "EXP3", "LinUCB",
|
|
130
|
+
# Sequential Models
|
|
131
|
+
"HiddenMarkovModel",
|
|
132
|
+
# Nearest Neighbours
|
|
133
|
+
"KNNClassifier", "KNNRegressor", "FastKNN", "BallTree",
|
|
134
|
+
# Density Clustering
|
|
135
|
+
"DBSCAN", "OPTICS",
|
|
136
|
+
# Gradient Boosting
|
|
137
|
+
"GBMRegressor", "GBMClassifier",
|
|
138
|
+
# Sequential / State Estimation
|
|
139
|
+
"KalmanFilter", "ExtendedKalmanFilter",
|
|
140
|
+
# Gaussian Processes
|
|
141
|
+
"GaussianProcessRegressor", "GaussianProcessClassifier",
|
|
142
|
+
"gp_rbf", "matern32_kernel", "periodic_kernel",
|
|
143
|
+
# Graph / Spectral Clustering
|
|
144
|
+
"SpectralClustering", "rbf_affinity", "knn_affinity",
|
|
145
|
+
# Density Estimation / Mode Finding
|
|
146
|
+
"MeanShift", "KernelDensityEstimator", "estimate_bandwidth",
|
|
147
|
+
# Anomaly Detection
|
|
148
|
+
"IsolationForest", "OneClassSVM",
|
|
149
|
+
# Probabilistic Graphical Models
|
|
150
|
+
"BayesianNetwork", "NaiveBayes", "CPT",
|
|
151
|
+
# Vector Quantisation
|
|
152
|
+
"LBG", "AdaptiveVQ", "ProductQuantiser",
|
|
153
|
+
# Policy Gradient RL
|
|
154
|
+
"A2C", "PPOLite", "LinearActor", "LinearCritic",
|
|
155
|
+
# Word Embeddings / NLP
|
|
156
|
+
"Word2Vec", "PMIEmbeddings", "build_cooccurrence", "tokenise",
|
|
157
|
+
# Dimensionality Reduction
|
|
158
|
+
"randomised_svd", "randomised_pca", "svd_transform",
|
|
159
|
+
# Clustering (additional)
|
|
160
|
+
"EMClustering", "GaussianComponent",
|
|
161
|
+
# Text / NLP
|
|
162
|
+
"TextPipeline", "TFIDFVectoriser", "NaiveBayesClassifier",
|
|
163
|
+
# Recommender Systems
|
|
164
|
+
"MemoryCF", "BayesianCF",
|
|
165
|
+
# Feature Engineering
|
|
166
|
+
"PCA", "StandardScaler", "MinMaxScaler",
|
|
167
|
+
"variance_threshold", "correlation_filter",
|
|
168
|
+
"mi_feature_ranking", "forward_feature_selection",
|
|
169
|
+
]
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Actor-Critic and Advantage Actor-Critic (A2C) reinforcement learning.
|
|
3
|
+
|
|
4
|
+
Patent basis: US6985172 (Southwest Research Institute, filed 2002, expired 2022)
|
|
5
|
+
"Q-learning with linear reward shaping" — 145 forward citations.
|
|
6
|
+
Covers concurrent value-function and policy optimisation where a critic
|
|
7
|
+
baseline reduces variance in policy gradient estimates.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import math
|
|
11
|
+
import random
|
|
12
|
+
from typing import List, Tuple, Optional, Callable, Dict
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _softmax(logits: List[float]) -> List[float]:
|
|
16
|
+
m = max(logits)
|
|
17
|
+
exps = [math.exp(x - m) for x in logits]
|
|
18
|
+
total = sum(exps)
|
|
19
|
+
return [e / total for e in exps]
|
|
20
|
+
|
|
21
|
+
def _log_softmax(logits: List[float]) -> List[float]:
|
|
22
|
+
probs = _softmax(logits)
|
|
23
|
+
return [math.log(max(p, 1e-300)) for p in probs]
|
|
24
|
+
|
|
25
|
+
def _relu(x: float) -> float:
|
|
26
|
+
return max(0.0, x)
|
|
27
|
+
|
|
28
|
+
def _drelu(x: float) -> float:
|
|
29
|
+
return 1.0 if x > 0 else 0.0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ── Simple linear actor/critic ─────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
class LinearActor:
|
|
35
|
+
"""
|
|
36
|
+
Linear policy: pi(a|s) = softmax(W_a @ s + b_a).
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, n_states: int, n_actions: int, seed: int = 42):
|
|
40
|
+
rng = random.Random(seed)
|
|
41
|
+
scale = 0.1
|
|
42
|
+
self.W = [[rng.gauss(0, scale) for _ in range(n_states)] for _ in range(n_actions)]
|
|
43
|
+
self.b = [0.0] * n_actions
|
|
44
|
+
self.n_states = n_states
|
|
45
|
+
self.n_actions = n_actions
|
|
46
|
+
|
|
47
|
+
def logits(self, state: List[float]) -> List[float]:
|
|
48
|
+
return [sum(self.W[a][s] * state[s] for s in range(self.n_states)) + self.b[a]
|
|
49
|
+
for a in range(self.n_actions)]
|
|
50
|
+
|
|
51
|
+
def probs(self, state: List[float]) -> List[float]:
|
|
52
|
+
return _softmax(self.logits(state))
|
|
53
|
+
|
|
54
|
+
def select_action(self, state: List[float], rng: random.Random) -> int:
|
|
55
|
+
probs = self.probs(state)
|
|
56
|
+
r = rng.random()
|
|
57
|
+
cumulative = 0.0
|
|
58
|
+
for a, p in enumerate(probs):
|
|
59
|
+
cumulative += p
|
|
60
|
+
if r <= cumulative:
|
|
61
|
+
return a
|
|
62
|
+
return len(probs) - 1
|
|
63
|
+
|
|
64
|
+
def update(self, state: List[float], action: int, advantage: float, lr: float) -> None:
|
|
65
|
+
"""Policy gradient update: theta += lr * advantage * grad log pi(a|s)."""
|
|
66
|
+
probs = self.probs(state)
|
|
67
|
+
for a in range(self.n_actions):
|
|
68
|
+
grad = (1.0 - probs[a]) if a == action else -probs[a]
|
|
69
|
+
for s in range(self.n_states):
|
|
70
|
+
self.W[a][s] += lr * advantage * grad * state[s]
|
|
71
|
+
self.b[a] += lr * advantage * grad
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class LinearCritic:
|
|
75
|
+
"""
|
|
76
|
+
Linear value function: V(s) = w_v @ s + b_v.
|
|
77
|
+
Trained by TD(0): V(s) ≈ r + gamma * V(s').
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, n_states: int, seed: int = 42):
|
|
81
|
+
rng = random.Random(seed)
|
|
82
|
+
self.w = [rng.gauss(0, 0.1) for _ in range(n_states)]
|
|
83
|
+
self.b = 0.0
|
|
84
|
+
self.n_states = n_states
|
|
85
|
+
|
|
86
|
+
def value(self, state: List[float]) -> float:
|
|
87
|
+
return sum(self.w[i] * state[i] for i in range(self.n_states)) + self.b
|
|
88
|
+
|
|
89
|
+
def update(self, state: List[float], target: float, lr: float) -> float:
|
|
90
|
+
"""MSE gradient step. Returns TD error."""
|
|
91
|
+
v = self.value(state)
|
|
92
|
+
td = target - v
|
|
93
|
+
for i in range(self.n_states):
|
|
94
|
+
self.w[i] += lr * td * state[i]
|
|
95
|
+
self.b += lr * td
|
|
96
|
+
return td
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# ── Advantage Actor-Critic (A2C) ───────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
class A2C:
|
|
102
|
+
"""
|
|
103
|
+
Advantage Actor-Critic (A2C).
|
|
104
|
+
|
|
105
|
+
Critic estimates V(s). Advantage A(s,a) = r + gamma*V(s') - V(s).
|
|
106
|
+
Actor is updated with policy gradient weighted by advantage.
|
|
107
|
+
Advantage reduces variance compared to pure REINFORCE.
|
|
108
|
+
|
|
109
|
+
Can work with any environment exposing (state, reward, done, info) = step(action).
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
n_states: int,
|
|
115
|
+
n_actions: int,
|
|
116
|
+
lr_actor: float = 0.01,
|
|
117
|
+
lr_critic: float = 0.05,
|
|
118
|
+
gamma: float = 0.99,
|
|
119
|
+
entropy_coef: float = 0.01,
|
|
120
|
+
seed: int = 42,
|
|
121
|
+
):
|
|
122
|
+
self.actor = LinearActor(n_states, n_actions, seed=seed)
|
|
123
|
+
self.critic = LinearCritic(n_states, seed=seed + 1)
|
|
124
|
+
self.lr_actor = lr_actor
|
|
125
|
+
self.lr_critic = lr_critic
|
|
126
|
+
self.gamma = gamma
|
|
127
|
+
self.entropy = entropy_coef
|
|
128
|
+
self._rng = random.Random(seed)
|
|
129
|
+
self.episode_rewards: List[float] = []
|
|
130
|
+
|
|
131
|
+
def select_action(self, state: List[float]) -> int:
|
|
132
|
+
return self.actor.select_action(state, self._rng)
|
|
133
|
+
|
|
134
|
+
def update(
|
|
135
|
+
self,
|
|
136
|
+
state: List[float],
|
|
137
|
+
action: int,
|
|
138
|
+
reward: float,
|
|
139
|
+
next_state: List[float],
|
|
140
|
+
done: bool,
|
|
141
|
+
) -> float:
|
|
142
|
+
"""Single-step TD update. Returns advantage."""
|
|
143
|
+
v_next = 0.0 if done else self.critic.value(next_state)
|
|
144
|
+
td_target = reward + self.gamma * v_next
|
|
145
|
+
advantage = self.critic.update(state, td_target, self.lr_critic)
|
|
146
|
+
# Entropy regularisation: add small gradient toward uniform policy
|
|
147
|
+
probs = self.actor.probs(state)
|
|
148
|
+
for a in range(self.actor.n_actions):
|
|
149
|
+
entropy_grad = -math.log(max(probs[a], 1e-9)) - 1
|
|
150
|
+
self.actor.b[a] += self.lr_actor * self.entropy * entropy_grad
|
|
151
|
+
# Policy gradient
|
|
152
|
+
self.actor.update(state, action, advantage, self.lr_actor)
|
|
153
|
+
return advantage
|
|
154
|
+
|
|
155
|
+
def train(
|
|
156
|
+
self,
|
|
157
|
+
env_fn: Callable,
|
|
158
|
+
n_episodes: int = 500,
|
|
159
|
+
max_steps: int = 200,
|
|
160
|
+
) -> List[float]:
|
|
161
|
+
"""
|
|
162
|
+
Train for n_episodes.
|
|
163
|
+
|
|
164
|
+
env_fn() must return an object with:
|
|
165
|
+
.reset() -> state (list of floats)
|
|
166
|
+
.step(action) -> (next_state, reward, done)
|
|
167
|
+
"""
|
|
168
|
+
self.episode_rewards = []
|
|
169
|
+
for ep in range(n_episodes):
|
|
170
|
+
env = env_fn()
|
|
171
|
+
state = env.reset()
|
|
172
|
+
total = 0.0
|
|
173
|
+
for _ in range(max_steps):
|
|
174
|
+
action = self.select_action(state)
|
|
175
|
+
next_state, reward, done = env.step(action)
|
|
176
|
+
self.update(state, action, reward, next_state, done)
|
|
177
|
+
total += reward
|
|
178
|
+
state = next_state
|
|
179
|
+
if done: break
|
|
180
|
+
self.episode_rewards.append(total)
|
|
181
|
+
return self.episode_rewards
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# ── PPO-lite (clip-based policy optimisation) ──────────────────────────────────
|
|
185
|
+
|
|
186
|
+
class PPOLite:
|
|
187
|
+
"""
|
|
188
|
+
Proximal Policy Optimisation (lite version) — clipped surrogate objective.
|
|
189
|
+
|
|
190
|
+
Collects a batch of transitions, computes advantages, then does k_epochs
|
|
191
|
+
of gradient updates with clipping to prevent too-large policy updates.
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
def __init__(
|
|
195
|
+
self,
|
|
196
|
+
n_states: int,
|
|
197
|
+
n_actions: int,
|
|
198
|
+
lr: float = 0.003,
|
|
199
|
+
gamma: float = 0.99,
|
|
200
|
+
clip_eps: float = 0.2,
|
|
201
|
+
k_epochs: int = 4,
|
|
202
|
+
seed: int = 42,
|
|
203
|
+
):
|
|
204
|
+
self.actor = LinearActor(n_states, n_actions, seed=seed)
|
|
205
|
+
self.critic = LinearCritic(n_states, seed=seed + 1)
|
|
206
|
+
self.lr = lr
|
|
207
|
+
self.gamma = gamma
|
|
208
|
+
self.clip_eps = clip_eps
|
|
209
|
+
self.k_epochs = k_epochs
|
|
210
|
+
self._rng = random.Random(seed)
|
|
211
|
+
# Replay buffer
|
|
212
|
+
self._states: List[List[float]] = []
|
|
213
|
+
self._actions: List[int] = []
|
|
214
|
+
self._rewards: List[float] = []
|
|
215
|
+
self._dones: List[bool] = []
|
|
216
|
+
self._old_logprobs: List[float] = []
|
|
217
|
+
|
|
218
|
+
def select_action(self, state: List[float]) -> Tuple[int, float]:
|
|
219
|
+
probs = self.actor.probs(state)
|
|
220
|
+
action = self.actor.select_action(state, self._rng)
|
|
221
|
+
log_prob = math.log(max(probs[action], 1e-300))
|
|
222
|
+
return action, log_prob
|
|
223
|
+
|
|
224
|
+
def store(self, state, action, reward, done, log_prob):
|
|
225
|
+
self._states.append(state)
|
|
226
|
+
self._actions.append(action)
|
|
227
|
+
self._rewards.append(reward)
|
|
228
|
+
self._dones.append(done)
|
|
229
|
+
self._old_logprobs.append(log_prob)
|
|
230
|
+
|
|
231
|
+
def update(self) -> None:
|
|
232
|
+
"""Compute returns, advantages, then run k_epochs of PPO clip update."""
|
|
233
|
+
n = len(self._rewards)
|
|
234
|
+
if n == 0: return
|
|
235
|
+
# Compute discounted returns
|
|
236
|
+
returns = [0.0] * n
|
|
237
|
+
running = 0.0
|
|
238
|
+
for i in reversed(range(n)):
|
|
239
|
+
if self._dones[i]: running = 0.0
|
|
240
|
+
running = self._rewards[i] + self.gamma * running
|
|
241
|
+
returns[i] = running
|
|
242
|
+
|
|
243
|
+
for _ in range(self.k_epochs):
|
|
244
|
+
for i in range(n):
|
|
245
|
+
state = self._states[i]
|
|
246
|
+
action = self._actions[i]
|
|
247
|
+
ret = returns[i]
|
|
248
|
+
v = self.critic.value(state)
|
|
249
|
+
adv = ret - v
|
|
250
|
+
# Normalise advantage
|
|
251
|
+
adv /= (abs(adv) + 1.0)
|
|
252
|
+
# Critic update
|
|
253
|
+
self.critic.update(state, ret, self.lr)
|
|
254
|
+
# Actor: clipped surrogate
|
|
255
|
+
new_probs = self.actor.probs(state)
|
|
256
|
+
new_logprob = math.log(max(new_probs[action], 1e-300))
|
|
257
|
+
old_logprob = self._old_logprobs[i]
|
|
258
|
+
ratio = math.exp(new_logprob - old_logprob)
|
|
259
|
+
ratio_clip = max(1 - self.clip_eps, min(1 + self.clip_eps, ratio))
|
|
260
|
+
# Gradient from min(ratio*adv, clipped*adv)
|
|
261
|
+
if adv >= 0:
|
|
262
|
+
effective_ratio = min(ratio, 1 + self.clip_eps)
|
|
263
|
+
else:
|
|
264
|
+
effective_ratio = max(ratio, 1 - self.clip_eps)
|
|
265
|
+
self.actor.update(state, action, adv * effective_ratio / (ratio + 1e-9), self.lr)
|
|
266
|
+
|
|
267
|
+
# Clear buffer
|
|
268
|
+
self._states.clear(); self._actions.clear(); self._rewards.clear()
|
|
269
|
+
self._dones.clear(); self._old_logprobs.clear()
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
if __name__ == "__main__":
|
|
273
|
+
# Simple environment: pole balancing proxy via CartPole-like reward signal
|
|
274
|
+
class GridWalk:
|
|
275
|
+
"""1D grid: state=[pos], actions=[left,right], goal at pos=5."""
|
|
276
|
+
def __init__(self): self.pos = 2
|
|
277
|
+
def reset(self): self.pos = 2; return [self.pos / 10.0]
|
|
278
|
+
def step(self, action):
|
|
279
|
+
self.pos += 1 if action == 1 else -1
|
|
280
|
+
self.pos = max(0, min(9, self.pos))
|
|
281
|
+
done = self.pos == 5
|
|
282
|
+
reward = 1.0 if done else -0.01
|
|
283
|
+
return [self.pos / 10.0], reward, done
|
|
284
|
+
|
|
285
|
+
a2c = A2C(n_states=1, n_actions=2, lr_actor=0.05, lr_critic=0.1, gamma=0.99, seed=0)
|
|
286
|
+
rewards = a2c.train(GridWalk, n_episodes=300, max_steps=50)
|
|
287
|
+
last50 = sum(rewards[-50:]) / 50
|
|
288
|
+
print(f"A2C GridWalk: last-50 mean reward = {last50:.3f} (expect > 0.8)")
|
|
289
|
+
|
|
290
|
+
ppo = PPOLite(n_states=1, n_actions=2, lr=0.05, gamma=0.99, seed=1)
|
|
291
|
+
rng = random.Random(42)
|
|
292
|
+
ep_rewards = []
|
|
293
|
+
for ep in range(200):
|
|
294
|
+
env = GridWalk(); state = env.reset(); total = 0.0
|
|
295
|
+
for _ in range(30):
|
|
296
|
+
action, lp = ppo.select_action(state)
|
|
297
|
+
ns, r, done = env.step(action)
|
|
298
|
+
ppo.store(state, action, r, done, lp)
|
|
299
|
+
total += r; state = ns
|
|
300
|
+
if done: break
|
|
301
|
+
ep_rewards.append(total)
|
|
302
|
+
if (ep + 1) % 10 == 0: ppo.update()
|
|
303
|
+
last50_ppo = sum(ep_rewards[-50:]) / 50
|
|
304
|
+
print(f"PPO-lite GridWalk: last-50 mean reward = {last50_ppo:.3f} (expect > 0.8)")
|