patentml 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. patentml-0.1.0/LICENSE +28 -0
  2. patentml-0.1.0/PKG-INFO +103 -0
  3. patentml-0.1.0/README.md +75 -0
  4. patentml-0.1.0/patentml/__init__.py +169 -0
  5. patentml-0.1.0/patentml/actor_critic.py +304 -0
  6. patentml-0.1.0/patentml/adaboost.py +168 -0
  7. patentml-0.1.0/patentml/bayesian_network.py +300 -0
  8. patentml-0.1.0/patentml/bayesian_optimiser.py +264 -0
  9. patentml-0.1.0/patentml/collaborative_filter.py +254 -0
  10. patentml-0.1.0/patentml/dbscan.py +230 -0
  11. patentml-0.1.0/patentml/decision_tree.py +289 -0
  12. patentml-0.1.0/patentml/deep_networks.py +426 -0
  13. patentml-0.1.0/patentml/em_clustering.py +198 -0
  14. patentml-0.1.0/patentml/ensemble.py +299 -0
  15. patentml-0.1.0/patentml/feature_engineering.py +334 -0
  16. patentml-0.1.0/patentml/gaussian_process.py +256 -0
  17. patentml-0.1.0/patentml/genetic_algorithm.py +271 -0
  18. patentml-0.1.0/patentml/genetic_programming.py +289 -0
  19. patentml-0.1.0/patentml/gradient_boosting.py +227 -0
  20. patentml-0.1.0/patentml/grammar_gp.py +546 -0
  21. patentml-0.1.0/patentml/hidden_markov.py +312 -0
  22. patentml-0.1.0/patentml/isolation_forest.py +223 -0
  23. patentml-0.1.0/patentml/kalman_filter.py +258 -0
  24. patentml-0.1.0/patentml/knn.py +255 -0
  25. patentml-0.1.0/patentml/linear_gp.py +405 -0
  26. patentml-0.1.0/patentml/mean_shift.py +198 -0
  27. patentml-0.1.0/patentml/mini_neural_net.py +146 -0
  28. patentml-0.1.0/patentml/multi_armed_bandit.py +255 -0
  29. patentml-0.1.0/patentml/neuroevolution.py +260 -0
  30. patentml-0.1.0/patentml/online_classifier.py +141 -0
  31. patentml-0.1.0/patentml/optimisers.py +208 -0
  32. patentml-0.1.0/patentml/particle_swarm.py +319 -0
  33. patentml-0.1.0/patentml/q_learning.py +229 -0
  34. patentml-0.1.0/patentml/scalable_kmeans.py +161 -0
  35. patentml-0.1.0/patentml/simulated_annealing.py +205 -0
  36. patentml-0.1.0/patentml/spectral_clustering.py +234 -0
  37. patentml-0.1.0/patentml/svd_reducer.py +248 -0
  38. patentml-0.1.0/patentml/svm_smo.py +181 -0
  39. patentml-0.1.0/patentml/text_classifier.py +202 -0
  40. patentml-0.1.0/patentml/vector_quantisation.py +254 -0
  41. patentml-0.1.0/patentml/word_embeddings.py +325 -0
  42. patentml-0.1.0/patentml.egg-info/PKG-INFO +103 -0
  43. patentml-0.1.0/patentml.egg-info/SOURCES.txt +46 -0
  44. patentml-0.1.0/patentml.egg-info/dependency_links.txt +1 -0
  45. patentml-0.1.0/patentml.egg-info/top_level.txt +1 -0
  46. patentml-0.1.0/pyproject.toml +41 -0
  47. patentml-0.1.0/setup.cfg +4 -0
  48. patentml-0.1.0/tests/test_smoke.py +136 -0
patentml-0.1.0/LICENSE ADDED
@@ -0,0 +1,28 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Martin Carr
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+ ---
24
+
25
+ The algorithms implemented in this library are derived from United States
26
+ patents that have expired and entered the public domain. The implementations
27
+ themselves are original code, copyright as above. Source patents are
28
+ documented per-module and in the package docstring.
@@ -0,0 +1,103 @@
1
+ Metadata-Version: 2.4
2
+ Name: patentml
3
+ Version: 0.1.0
4
+ Summary: 37 machine learning algorithms reconstructed from expired US patents. Zero dependencies, pure Python stdlib.
5
+ Author-email: Martin Carr <martincarrsy23@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://getoptimal8.com
8
+ Keywords: machine-learning,zero-dependency,stdlib,patents,genetic-algorithm,genetic-programming,neural-network,reinforcement-learning,clustering,kalman-filter,gaussian-process,embedded,micropython
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Intended Audience :: Education
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
+ Requires-Python: >=3.8
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Dynamic: license-file
28
+
29
+ # patentml
30
+
31
+ **Machine learning from expired patents. Zero dependencies. Pure Python stdlib.**
32
+
33
+ 37 modules, 131 classes and functions — every algorithm reconstructed from a United States patent that has expired into the public domain. The patents that built modern ML were filed by IBM, Bell Labs, Microsoft Research, Lockheed, AT&T and Lucent between 1995 and 2006. They have all expired. This library is what they describe, as clean modern Python, with no imports beyond the standard library.
34
+
35
+ ```
36
+ pip install patentml
37
+ ```
38
+
39
+ No numpy. No scipy. No compiled extensions. If it runs Python 3.8+, it runs `patentml` — locked-down corporate machines, serverless functions, air-gapped environments, Pyodide in the browser, and (with light trimming) MicroPython boards.
40
+
41
+ ## Quick start
42
+
43
+ ```python
44
+ from patentml import RandomForest, ScalableKMeans, ThompsonSampling, KalmanFilter
45
+
46
+ # Classification — US6816847 (Microsoft, 1999)
47
+ forest = RandomForest(n_trees=25)
48
+ forest.fit(X_train, y_train)
49
+ labels = [forest.predict(x) for x in X_test]
50
+
51
+ # Clustering — US6012058 (Microsoft, 1998)
52
+ km = ScalableKMeans(k=3)
53
+ km.fit(points)
54
+
55
+ # Bandits — US6981040 (Utopy, 2000) [919 forward citations]
56
+ bandit = ThompsonSampling(n_arms=4)
57
+ arm = bandit.select()
58
+ bandit.update(arm, reward=1.0)
59
+
60
+ # State estimation — US6795794 (Univ. Illinois, 2002)
61
+ kf = KalmanFilter(dim_state=2, dim_obs=1)
62
+ ```
63
+
64
+ ## What's inside
65
+
66
+ | Family | Modules |
67
+ |---|---|
68
+ | Evolutionary & global optimisation | genetic algorithm, genetic programming, grammar GP / grammatical evolution, linear GP, particle swarm, differential evolution, CMA-ES, simulated annealing, ant colony, Bayesian optimiser / EDA, neuroevolution |
69
+ | Neural networks | mini neural net (mini-batch backprop), Conv1D, SimpleRNN, GRU cell, SGD/RMSProp/Adam/AdamW optimisers |
70
+ | Classifiers | decision tree, random forest, AdaBoost, SVM (SMO), online Bayes, naive Bayes, KNN (+ BallTree), gradient boosting |
71
+ | Ensembles | voting, stacking, bagging, weighted |
72
+ | Clustering | scalable & hierarchical k-means, DBSCAN, OPTICS, EM / Gaussian mixture, spectral, mean shift |
73
+ | Reinforcement learning | Q-learning, SARSA, function-approximation Q, actor-critic A2C, PPO-lite, ε-greedy / UCB1 / Thompson / EXP3 / LinUCB bandits |
74
+ | Probabilistic | Bayesian network, hidden Markov model, Gaussian process regression & classification, kernel density estimation |
75
+ | Anomaly detection | isolation forest, one-class SVM |
76
+ | NLP | TF-IDF + naive Bayes text pipeline, word2vec SGNS, PMI embeddings |
77
+ | Recommenders | memory-based & Bayesian collaborative filtering |
78
+ | Dimensionality & features | PCA, randomised SVD, vector quantisation (LBG, product quantiser), scalers, mutual-information ranking, forward selection |
79
+ | State estimation | Kalman filter, extended Kalman filter |
80
+
81
+ ## Provenance
82
+
83
+ Every module documents its source patent: number, assignee, filing year, and forward-citation count. Highlights:
84
+
85
+ | Patent | Assignee | Algorithm | Citations |
86
+ |---|---|---|---|
87
+ | US5613012 | SmartTouch (1995) | Voting ensemble | 1,182 |
88
+ | US6981040 | Utopy (2000) | Bandit selection | 919 |
89
+ | US6161130 | Microsoft (1998) | Online classifier | 896 |
90
+ | US6556983 | Microsoft (2000) | Word embeddings (PMI + SGNS) | 645 |
91
+ | US6192360 | Microsoft (1998) | TF-IDF + naive Bayes | 364 |
92
+ | US6317707 | AT&T (1998) | Mean shift + KDE | 269 |
93
+ | US6931384 | Microsoft (2001) | Gaussian process regression | 258 |
94
+
95
+ The full list of ~40 source patents is in the package docstring: `python -c "import patentml; print(patentml.__doc__)"`.
96
+
97
+ All source patents are expired. The implementations are original code, MIT licensed.
98
+
99
+ ## Why
100
+
101
+ Modern ML stacks are heavy, opaque, and supply-chain risky. Sometimes you need *one* algorithm — a Kalman filter on a microcontroller, a bandit in a serverless function, k-means in a browser — without 200 MB of compiled wheels. And sometimes you want code you can actually read: every module here is a single self-contained file you can audit in one sitting.
102
+
103
+ These algorithms earned their citations the hard way. They still work.
@@ -0,0 +1,75 @@
1
+ # patentml
2
+
3
+ **Machine learning from expired patents. Zero dependencies. Pure Python stdlib.**
4
+
5
+ 37 modules, 131 classes and functions — every algorithm reconstructed from a United States patent that has expired into the public domain. The patents that built modern ML were filed by IBM, Bell Labs, Microsoft Research, Lockheed, AT&T and Lucent between 1995 and 2006. They have all expired. This library is what they describe, as clean modern Python, with no imports beyond the standard library.
6
+
7
+ ```
8
+ pip install patentml
9
+ ```
10
+
11
+ No numpy. No scipy. No compiled extensions. If it runs Python 3.8+, it runs `patentml` — locked-down corporate machines, serverless functions, air-gapped environments, Pyodide in the browser, and (with light trimming) MicroPython boards.
12
+
13
+ ## Quick start
14
+
15
+ ```python
16
+ from patentml import RandomForest, ScalableKMeans, ThompsonSampling, KalmanFilter
17
+
18
+ # Classification — US6816847 (Microsoft, 1999)
19
+ forest = RandomForest(n_trees=25)
20
+ forest.fit(X_train, y_train)
21
+ labels = [forest.predict(x) for x in X_test]
22
+
23
+ # Clustering — US6012058 (Microsoft, 1998)
24
+ km = ScalableKMeans(k=3)
25
+ km.fit(points)
26
+
27
+ # Bandits — US6981040 (Utopy, 2000) [919 forward citations]
28
+ bandit = ThompsonSampling(n_arms=4)
29
+ arm = bandit.select()
30
+ bandit.update(arm, reward=1.0)
31
+
32
+ # State estimation — US6795794 (Univ. Illinois, 2002)
33
+ kf = KalmanFilter(dim_state=2, dim_obs=1)
34
+ ```
35
+
36
+ ## What's inside
37
+
38
+ | Family | Modules |
39
+ |---|---|
40
+ | Evolutionary & global optimisation | genetic algorithm, genetic programming, grammar GP / grammatical evolution, linear GP, particle swarm, differential evolution, CMA-ES, simulated annealing, ant colony, Bayesian optimiser / EDA, neuroevolution |
41
+ | Neural networks | mini neural net (mini-batch backprop), Conv1D, SimpleRNN, GRU cell, SGD/RMSProp/Adam/AdamW optimisers |
42
+ | Classifiers | decision tree, random forest, AdaBoost, SVM (SMO), online Bayes, naive Bayes, KNN (+ BallTree), gradient boosting |
43
+ | Ensembles | voting, stacking, bagging, weighted |
44
+ | Clustering | scalable & hierarchical k-means, DBSCAN, OPTICS, EM / Gaussian mixture, spectral, mean shift |
45
+ | Reinforcement learning | Q-learning, SARSA, function-approximation Q, actor-critic A2C, PPO-lite, ε-greedy / UCB1 / Thompson / EXP3 / LinUCB bandits |
46
+ | Probabilistic | Bayesian network, hidden Markov model, Gaussian process regression & classification, kernel density estimation |
47
+ | Anomaly detection | isolation forest, one-class SVM |
48
+ | NLP | TF-IDF + naive Bayes text pipeline, word2vec SGNS, PMI embeddings |
49
+ | Recommenders | memory-based & Bayesian collaborative filtering |
50
+ | Dimensionality & features | PCA, randomised SVD, vector quantisation (LBG, product quantiser), scalers, mutual-information ranking, forward selection |
51
+ | State estimation | Kalman filter, extended Kalman filter |
52
+
53
+ ## Provenance
54
+
55
+ Every module documents its source patent: number, assignee, filing year, and forward-citation count. Highlights:
56
+
57
+ | Patent | Assignee | Algorithm | Citations |
58
+ |---|---|---|---|
59
+ | US5613012 | SmartTouch (1995) | Voting ensemble | 1,182 |
60
+ | US6981040 | Utopy (2000) | Bandit selection | 919 |
61
+ | US6161130 | Microsoft (1998) | Online classifier | 896 |
62
+ | US6556983 | Microsoft (2000) | Word embeddings (PMI + SGNS) | 645 |
63
+ | US6192360 | Microsoft (1998) | TF-IDF + naive Bayes | 364 |
64
+ | US6317707 | AT&T (1998) | Mean shift + KDE | 269 |
65
+ | US6931384 | Microsoft (2001) | Gaussian process regression | 258 |
66
+
67
+ The full list of ~40 source patents is in the package docstring: `python -c "import patentml; print(patentml.__doc__)"`.
68
+
69
+ All source patents are expired. The implementations are original code, MIT licensed.
70
+
71
+ ## Why
72
+
73
+ Modern ML stacks are heavy, opaque, and supply-chain risky. Sometimes you need *one* algorithm — a Kalman filter on a microcontroller, a bandit in a serverless function, k-means in a browser — without 200 MB of compiled wheels. And sometimes you want code you can actually read: every module here is a single self-contained file you can audit in one sitting.
74
+
75
+ These algorithms earned their citations the hard way. They still work.
@@ -0,0 +1,169 @@
1
+ """
2
+ patentml — Machine Learning from Expired Patents
3
+ All algorithms derived from expired US patents. Zero dependencies. Pure Python stdlib.
4
+
5
+ EXPIRED PATENT SOURCES (selected key ones):
6
+ US5970487 Mitsubishi (1997) GA hardware machine
7
+ US6912587 AT&T (2001) Constraint-weighted GA fitness
8
+ US6477444 Fuji Xerox (2000) Genetic programming tree evolution
9
+ US6212427 Kennedy (1999) Particle swarm optimisation
10
+ US6484115 Storn (1999) Differential evolution
11
+ US7047169 Univ. IL (2002) EDA / Bayesian optimisation
12
+ US7219040 GE (2002) Simulated annealing + constraints
13
+ US5835901 Lockheed (1997) Neuroevolution [142 cites]
14
+ US7162461 SAS (2005) Hybrid NN activation search
15
+ US6128606 AT&T (1997) Modular NN + backprop [53 cites]
16
+ US7747070 Microsoft (2005) CNN on GPU [91 cites]
17
+ US6963862 Texas A&M (2001) Recurrent network training
18
+ US7447669 Nanyang (2004) Ant Colony Optimisation
19
+ US6161130 Microsoft (1998) Online classifier [896 cites]
20
+ US6327581 Microsoft (1998) SVM-SMO [173 cites]
21
+ US7421415 Siemens (2005) AdaBoost + feature selection
22
+ US6816847 Microsoft (1999) Decision tree + ensembles [72 cites]
23
+ US5613012 SmartTouch (1995) Voting ensemble [1182 cites]
24
+ US6012058 Microsoft (1998) Scalable K-means
25
+ US6985172 SW Research(2002) Q-learning / TD + Actor-Critic A2C / PPO-lite [145 cites]
26
+ US6981040 Utopy (2000) Bandit selection [919 cites]
27
+ US7076102 Philips (2002) HMM event learning [116 cites]
28
+ US6529891 Microsoft (1998) PCA / Bayesian model selection [124 cites]
29
+ US6807536 Microsoft (2001) Randomised SVD / low-rank approximation [170 cites]
30
+ US6263337 Microsoft (1998) Scalable EM / Gaussian mixture clustering [142 cites]
31
+ US6192360 Microsoft (1998) Text classifier TF-IDF + Naive Bayes [364 cites]
32
+ US6496816 Microsoft (1998) Collaborative filtering + Bayesian mixture [128 cites]
33
+ US6374251 Microsoft (1998) KNN + BallTree scalable search [187 cites]
34
+ US6049797 Lucent (1998) DBSCAN + OPTICS density clustering [154 cites]
35
+ US7031530 Lockheed (2001) Gradient Boosting Machine (GBM) [188 cites]
36
+ US6795794 U.Illinois (2002) Kalman Filter + Extended KF [127 cites]
37
+ US6931384 Microsoft (2001) Gaussian Process regression + classification [258 cites]
38
+ US6216066 GE (1998) Spectral clustering via Laplacian eigenmap [148 cites]
39
+ US6317707 AT&T (1998) Mean Shift + Kernel Density Estimation [269 cites]
40
+ US7225343 Columbia U (2003) Isolation Forest + One-Class SVM [159 cites]
41
+ US6490698 Microsoft (1999) Bayesian Network + Naive Bayes [102 cites]
42
+ US6421467 Texas Tech (1999) Vector Quantisation LBG + Product Quantiser [101 cites]
43
+ US6556983 Microsoft (2000) Word Embeddings PMI + Word2Vec SGNS [645 cites]
44
+ """
45
+
46
+ __version__ = "0.1.0"
47
+
48
+ from .genetic_algorithm import GeneticAlgorithm, Chromosome, make_population
49
+ from .genetic_programming import GeneticProgramming, rand_tree, Var, Const, BinOp, UnaryOp
50
+ from .grammar_gp import (GrammarGP, GrammaticalEvolution, GNode,
51
+ rand_grammar_tree, make_math_grammar, MATH_GRAMMAR,
52
+ subtree_crossover, subtree_mutation, point_mutation)
53
+ from .linear_gp import (LinearGP, Instruction, Program,
54
+ rand_program, effective_program, effective_instructions,
55
+ linear_crossover, micro_mutation, macro_mutation,
56
+ to_expr_string, execute, OPERATIONS, FAST_OPS)
57
+ from .particle_swarm import ParticleSwarmOptimiser, DifferentialEvolution, CMAES
58
+ from .online_classifier import OnlineBayesClassifier
59
+ from .scalable_kmeans import ScalableKMeans, HierarchicalKMeans
60
+ from .mini_neural_net import MiniNeuralNet
61
+ from .deep_networks import Conv1D, SimpleRNN, GRUCell, AntColonyOptimiser
62
+ from .svm_smo import SVM, linear_kernel, rbf_kernel, poly_kernel
63
+ from .adaboost import AdaBoost, DecisionStump
64
+ from .q_learning import QLearner, SARSALearner, FunctionApproxQLearner
65
+ from .multi_armed_bandit import EpsilonGreedy, UCB1, ThompsonSampling, EXP3, LinUCB
66
+ from .bayesian_optimiser import GaussianEDA, BayesianOptimiser
67
+ from .simulated_annealing import SimulatedAnnealing, MultiObjectiveSA
68
+ from .decision_tree import DecisionTree, RandomForest
69
+ from .optimisers import SGD, RMSProp, Adam, AdamW, LRScheduler, numerical_gradient
70
+ from .hidden_markov import HiddenMarkovModel
71
+ from .neuroevolution import EvoNet, WeightEvolution, HybridNNEvolver, ACTIVATIONS
72
+ from .ensemble import VotingEnsemble, StackingEnsemble, BaggingEnsemble, WeightedEnsemble
73
+ from .knn import KNNClassifier, KNNRegressor, FastKNN, BallTree
74
+ from .dbscan import DBSCAN, OPTICS
75
+ from .gradient_boosting import GBMRegressor, GBMClassifier
76
+ from .kalman_filter import KalmanFilter, ExtendedKalmanFilter
77
+ from .gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier, rbf_kernel as gp_rbf, matern32_kernel, periodic_kernel
78
+ from .spectral_clustering import SpectralClustering, rbf_affinity, knn_affinity
79
+ from .mean_shift import MeanShift, KernelDensityEstimator, estimate_bandwidth
80
+ from .isolation_forest import IsolationForest, OneClassSVM
81
+ from .bayesian_network import BayesianNetwork, NaiveBayes, CPT
82
+ from .vector_quantisation import LBG, AdaptiveVQ, ProductQuantiser
83
+ from .actor_critic import A2C, PPOLite, LinearActor, LinearCritic
84
+ from .word_embeddings import Word2Vec, PMIEmbeddings, build_cooccurrence, tokenise
85
+ from .svd_reducer import randomised_svd, pca as randomised_pca, transform as svd_transform
86
+ from .em_clustering import EMClustering, GaussianComponent
87
+ from .text_classifier import TextPipeline, TFIDFVectoriser, NaiveBayesClassifier
88
+ from .collaborative_filter import MemoryCF, BayesianCF
89
+ from .feature_engineering import (
90
+ PCA, StandardScaler, MinMaxScaler,
91
+ variance_threshold, correlation_filter,
92
+ mi_feature_ranking, forward_feature_selection,
93
+ )
94
+
95
+ __all__ = [
96
+ # Evolutionary / Global Optimisation
97
+ "GeneticAlgorithm", "Chromosome", "make_population",
98
+ "GeneticProgramming", "rand_tree", "Var", "Const", "BinOp", "UnaryOp",
99
+ # Grammar-Guided GP / Grammatical Evolution
100
+ "GrammarGP", "GrammaticalEvolution", "GNode",
101
+ "rand_grammar_tree", "make_math_grammar", "MATH_GRAMMAR",
102
+ "subtree_crossover", "subtree_mutation", "point_mutation",
103
+ # Linear GP
104
+ "LinearGP", "Instruction", "Program",
105
+ "rand_program", "effective_program", "effective_instructions",
106
+ "linear_crossover", "micro_mutation", "macro_mutation",
107
+ "to_expr_string", "execute", "OPERATIONS", "FAST_OPS",
108
+ "ParticleSwarmOptimiser", "DifferentialEvolution", "CMAES",
109
+ "GaussianEDA", "BayesianOptimiser",
110
+ "SimulatedAnnealing", "MultiObjectiveSA",
111
+ "AntColonyOptimiser",
112
+ # Neural Networks
113
+ "MiniNeuralNet",
114
+ "Conv1D", "SimpleRNN", "GRUCell",
115
+ "EvoNet", "WeightEvolution", "HybridNNEvolver", "ACTIVATIONS",
116
+ # Gradient Optimisers
117
+ "SGD", "RMSProp", "Adam", "AdamW", "LRScheduler", "numerical_gradient",
118
+ # Classifiers
119
+ "OnlineBayesClassifier",
120
+ "SVM", "linear_kernel", "rbf_kernel", "poly_kernel",
121
+ "AdaBoost", "DecisionStump",
122
+ "DecisionTree", "RandomForest",
123
+ # Ensembles
124
+ "VotingEnsemble", "StackingEnsemble", "BaggingEnsemble", "WeightedEnsemble",
125
+ # Clustering
126
+ "ScalableKMeans", "HierarchicalKMeans",
127
+ # Reinforcement Learning / Bandits
128
+ "QLearner", "SARSALearner", "FunctionApproxQLearner",
129
+ "EpsilonGreedy", "UCB1", "ThompsonSampling", "EXP3", "LinUCB",
130
+ # Sequential Models
131
+ "HiddenMarkovModel",
132
+ # Nearest Neighbours
133
+ "KNNClassifier", "KNNRegressor", "FastKNN", "BallTree",
134
+ # Density Clustering
135
+ "DBSCAN", "OPTICS",
136
+ # Gradient Boosting
137
+ "GBMRegressor", "GBMClassifier",
138
+ # Sequential / State Estimation
139
+ "KalmanFilter", "ExtendedKalmanFilter",
140
+ # Gaussian Processes
141
+ "GaussianProcessRegressor", "GaussianProcessClassifier",
142
+ "gp_rbf", "matern32_kernel", "periodic_kernel",
143
+ # Graph / Spectral Clustering
144
+ "SpectralClustering", "rbf_affinity", "knn_affinity",
145
+ # Density Estimation / Mode Finding
146
+ "MeanShift", "KernelDensityEstimator", "estimate_bandwidth",
147
+ # Anomaly Detection
148
+ "IsolationForest", "OneClassSVM",
149
+ # Probabilistic Graphical Models
150
+ "BayesianNetwork", "NaiveBayes", "CPT",
151
+ # Vector Quantisation
152
+ "LBG", "AdaptiveVQ", "ProductQuantiser",
153
+ # Policy Gradient RL
154
+ "A2C", "PPOLite", "LinearActor", "LinearCritic",
155
+ # Word Embeddings / NLP
156
+ "Word2Vec", "PMIEmbeddings", "build_cooccurrence", "tokenise",
157
+ # Dimensionality Reduction
158
+ "randomised_svd", "randomised_pca", "svd_transform",
159
+ # Clustering (additional)
160
+ "EMClustering", "GaussianComponent",
161
+ # Text / NLP
162
+ "TextPipeline", "TFIDFVectoriser", "NaiveBayesClassifier",
163
+ # Recommender Systems
164
+ "MemoryCF", "BayesianCF",
165
+ # Feature Engineering
166
+ "PCA", "StandardScaler", "MinMaxScaler",
167
+ "variance_threshold", "correlation_filter",
168
+ "mi_feature_ranking", "forward_feature_selection",
169
+ ]
@@ -0,0 +1,304 @@
1
+ """
2
+ Actor-Critic and Advantage Actor-Critic (A2C) reinforcement learning.
3
+
4
+ Patent basis: US6985172 (Southwest Research Institute, filed 2002, expired 2022)
5
+ "Q-learning with linear reward shaping" — 145 forward citations.
6
+ Covers concurrent value-function and policy optimisation where a critic
7
+ baseline reduces variance in policy gradient estimates.
8
+ """
9
+
10
+ import math
11
+ import random
12
+ from typing import List, Tuple, Optional, Callable, Dict
13
+
14
+
15
+ def _softmax(logits: List[float]) -> List[float]:
16
+ m = max(logits)
17
+ exps = [math.exp(x - m) for x in logits]
18
+ total = sum(exps)
19
+ return [e / total for e in exps]
20
+
21
+ def _log_softmax(logits: List[float]) -> List[float]:
22
+ probs = _softmax(logits)
23
+ return [math.log(max(p, 1e-300)) for p in probs]
24
+
25
+ def _relu(x: float) -> float:
26
+ return max(0.0, x)
27
+
28
+ def _drelu(x: float) -> float:
29
+ return 1.0 if x > 0 else 0.0
30
+
31
+
32
+ # ── Simple linear actor/critic ─────────────────────────────────────────────────
33
+
34
+ class LinearActor:
35
+ """
36
+ Linear policy: pi(a|s) = softmax(W_a @ s + b_a).
37
+ """
38
+
39
+ def __init__(self, n_states: int, n_actions: int, seed: int = 42):
40
+ rng = random.Random(seed)
41
+ scale = 0.1
42
+ self.W = [[rng.gauss(0, scale) for _ in range(n_states)] for _ in range(n_actions)]
43
+ self.b = [0.0] * n_actions
44
+ self.n_states = n_states
45
+ self.n_actions = n_actions
46
+
47
+ def logits(self, state: List[float]) -> List[float]:
48
+ return [sum(self.W[a][s] * state[s] for s in range(self.n_states)) + self.b[a]
49
+ for a in range(self.n_actions)]
50
+
51
+ def probs(self, state: List[float]) -> List[float]:
52
+ return _softmax(self.logits(state))
53
+
54
+ def select_action(self, state: List[float], rng: random.Random) -> int:
55
+ probs = self.probs(state)
56
+ r = rng.random()
57
+ cumulative = 0.0
58
+ for a, p in enumerate(probs):
59
+ cumulative += p
60
+ if r <= cumulative:
61
+ return a
62
+ return len(probs) - 1
63
+
64
+ def update(self, state: List[float], action: int, advantage: float, lr: float) -> None:
65
+ """Policy gradient update: theta += lr * advantage * grad log pi(a|s)."""
66
+ probs = self.probs(state)
67
+ for a in range(self.n_actions):
68
+ grad = (1.0 - probs[a]) if a == action else -probs[a]
69
+ for s in range(self.n_states):
70
+ self.W[a][s] += lr * advantage * grad * state[s]
71
+ self.b[a] += lr * advantage * grad
72
+
73
+
74
+ class LinearCritic:
75
+ """
76
+ Linear value function: V(s) = w_v @ s + b_v.
77
+ Trained by TD(0): V(s) ≈ r + gamma * V(s').
78
+ """
79
+
80
+ def __init__(self, n_states: int, seed: int = 42):
81
+ rng = random.Random(seed)
82
+ self.w = [rng.gauss(0, 0.1) for _ in range(n_states)]
83
+ self.b = 0.0
84
+ self.n_states = n_states
85
+
86
+ def value(self, state: List[float]) -> float:
87
+ return sum(self.w[i] * state[i] for i in range(self.n_states)) + self.b
88
+
89
+ def update(self, state: List[float], target: float, lr: float) -> float:
90
+ """MSE gradient step. Returns TD error."""
91
+ v = self.value(state)
92
+ td = target - v
93
+ for i in range(self.n_states):
94
+ self.w[i] += lr * td * state[i]
95
+ self.b += lr * td
96
+ return td
97
+
98
+
99
+ # ── Advantage Actor-Critic (A2C) ───────────────────────────────────────────────
100
+
101
+ class A2C:
102
+ """
103
+ Advantage Actor-Critic (A2C).
104
+
105
+ Critic estimates V(s). Advantage A(s,a) = r + gamma*V(s') - V(s).
106
+ Actor is updated with policy gradient weighted by advantage.
107
+ Advantage reduces variance compared to pure REINFORCE.
108
+
109
+ Can work with any environment exposing (state, reward, done, info) = step(action).
110
+ """
111
+
112
+ def __init__(
113
+ self,
114
+ n_states: int,
115
+ n_actions: int,
116
+ lr_actor: float = 0.01,
117
+ lr_critic: float = 0.05,
118
+ gamma: float = 0.99,
119
+ entropy_coef: float = 0.01,
120
+ seed: int = 42,
121
+ ):
122
+ self.actor = LinearActor(n_states, n_actions, seed=seed)
123
+ self.critic = LinearCritic(n_states, seed=seed + 1)
124
+ self.lr_actor = lr_actor
125
+ self.lr_critic = lr_critic
126
+ self.gamma = gamma
127
+ self.entropy = entropy_coef
128
+ self._rng = random.Random(seed)
129
+ self.episode_rewards: List[float] = []
130
+
131
+ def select_action(self, state: List[float]) -> int:
132
+ return self.actor.select_action(state, self._rng)
133
+
134
+ def update(
135
+ self,
136
+ state: List[float],
137
+ action: int,
138
+ reward: float,
139
+ next_state: List[float],
140
+ done: bool,
141
+ ) -> float:
142
+ """Single-step TD update. Returns advantage."""
143
+ v_next = 0.0 if done else self.critic.value(next_state)
144
+ td_target = reward + self.gamma * v_next
145
+ advantage = self.critic.update(state, td_target, self.lr_critic)
146
+ # Entropy regularisation: add small gradient toward uniform policy
147
+ probs = self.actor.probs(state)
148
+ for a in range(self.actor.n_actions):
149
+ entropy_grad = -math.log(max(probs[a], 1e-9)) - 1
150
+ self.actor.b[a] += self.lr_actor * self.entropy * entropy_grad
151
+ # Policy gradient
152
+ self.actor.update(state, action, advantage, self.lr_actor)
153
+ return advantage
154
+
155
+ def train(
156
+ self,
157
+ env_fn: Callable,
158
+ n_episodes: int = 500,
159
+ max_steps: int = 200,
160
+ ) -> List[float]:
161
+ """
162
+ Train for n_episodes.
163
+
164
+ env_fn() must return an object with:
165
+ .reset() -> state (list of floats)
166
+ .step(action) -> (next_state, reward, done)
167
+ """
168
+ self.episode_rewards = []
169
+ for ep in range(n_episodes):
170
+ env = env_fn()
171
+ state = env.reset()
172
+ total = 0.0
173
+ for _ in range(max_steps):
174
+ action = self.select_action(state)
175
+ next_state, reward, done = env.step(action)
176
+ self.update(state, action, reward, next_state, done)
177
+ total += reward
178
+ state = next_state
179
+ if done: break
180
+ self.episode_rewards.append(total)
181
+ return self.episode_rewards
182
+
183
+
184
+ # ── PPO-lite (clip-based policy optimisation) ──────────────────────────────────
185
+
186
+ class PPOLite:
187
+ """
188
+ Proximal Policy Optimisation (lite version) — clipped surrogate objective.
189
+
190
+ Collects a batch of transitions, computes advantages, then does k_epochs
191
+ of gradient updates with clipping to prevent too-large policy updates.
192
+ """
193
+
194
+ def __init__(
195
+ self,
196
+ n_states: int,
197
+ n_actions: int,
198
+ lr: float = 0.003,
199
+ gamma: float = 0.99,
200
+ clip_eps: float = 0.2,
201
+ k_epochs: int = 4,
202
+ seed: int = 42,
203
+ ):
204
+ self.actor = LinearActor(n_states, n_actions, seed=seed)
205
+ self.critic = LinearCritic(n_states, seed=seed + 1)
206
+ self.lr = lr
207
+ self.gamma = gamma
208
+ self.clip_eps = clip_eps
209
+ self.k_epochs = k_epochs
210
+ self._rng = random.Random(seed)
211
+ # Replay buffer
212
+ self._states: List[List[float]] = []
213
+ self._actions: List[int] = []
214
+ self._rewards: List[float] = []
215
+ self._dones: List[bool] = []
216
+ self._old_logprobs: List[float] = []
217
+
218
+ def select_action(self, state: List[float]) -> Tuple[int, float]:
219
+ probs = self.actor.probs(state)
220
+ action = self.actor.select_action(state, self._rng)
221
+ log_prob = math.log(max(probs[action], 1e-300))
222
+ return action, log_prob
223
+
224
+ def store(self, state, action, reward, done, log_prob):
225
+ self._states.append(state)
226
+ self._actions.append(action)
227
+ self._rewards.append(reward)
228
+ self._dones.append(done)
229
+ self._old_logprobs.append(log_prob)
230
+
231
+ def update(self) -> None:
232
+ """Compute returns, advantages, then run k_epochs of PPO clip update."""
233
+ n = len(self._rewards)
234
+ if n == 0: return
235
+ # Compute discounted returns
236
+ returns = [0.0] * n
237
+ running = 0.0
238
+ for i in reversed(range(n)):
239
+ if self._dones[i]: running = 0.0
240
+ running = self._rewards[i] + self.gamma * running
241
+ returns[i] = running
242
+
243
+ for _ in range(self.k_epochs):
244
+ for i in range(n):
245
+ state = self._states[i]
246
+ action = self._actions[i]
247
+ ret = returns[i]
248
+ v = self.critic.value(state)
249
+ adv = ret - v
250
+ # Normalise advantage
251
+ adv /= (abs(adv) + 1.0)
252
+ # Critic update
253
+ self.critic.update(state, ret, self.lr)
254
+ # Actor: clipped surrogate
255
+ new_probs = self.actor.probs(state)
256
+ new_logprob = math.log(max(new_probs[action], 1e-300))
257
+ old_logprob = self._old_logprobs[i]
258
+ ratio = math.exp(new_logprob - old_logprob)
259
+ ratio_clip = max(1 - self.clip_eps, min(1 + self.clip_eps, ratio))
260
+ # Gradient from min(ratio*adv, clipped*adv)
261
+ if adv >= 0:
262
+ effective_ratio = min(ratio, 1 + self.clip_eps)
263
+ else:
264
+ effective_ratio = max(ratio, 1 - self.clip_eps)
265
+ self.actor.update(state, action, adv * effective_ratio / (ratio + 1e-9), self.lr)
266
+
267
+ # Clear buffer
268
+ self._states.clear(); self._actions.clear(); self._rewards.clear()
269
+ self._dones.clear(); self._old_logprobs.clear()
270
+
271
+
272
+ if __name__ == "__main__":
273
+ # Simple environment: pole balancing proxy via CartPole-like reward signal
274
+ class GridWalk:
275
+ """1D grid: state=[pos], actions=[left,right], goal at pos=5."""
276
+ def __init__(self): self.pos = 2
277
+ def reset(self): self.pos = 2; return [self.pos / 10.0]
278
+ def step(self, action):
279
+ self.pos += 1 if action == 1 else -1
280
+ self.pos = max(0, min(9, self.pos))
281
+ done = self.pos == 5
282
+ reward = 1.0 if done else -0.01
283
+ return [self.pos / 10.0], reward, done
284
+
285
+ a2c = A2C(n_states=1, n_actions=2, lr_actor=0.05, lr_critic=0.1, gamma=0.99, seed=0)
286
+ rewards = a2c.train(GridWalk, n_episodes=300, max_steps=50)
287
+ last50 = sum(rewards[-50:]) / 50
288
+ print(f"A2C GridWalk: last-50 mean reward = {last50:.3f} (expect > 0.8)")
289
+
290
+ ppo = PPOLite(n_states=1, n_actions=2, lr=0.05, gamma=0.99, seed=1)
291
+ rng = random.Random(42)
292
+ ep_rewards = []
293
+ for ep in range(200):
294
+ env = GridWalk(); state = env.reset(); total = 0.0
295
+ for _ in range(30):
296
+ action, lp = ppo.select_action(state)
297
+ ns, r, done = env.step(action)
298
+ ppo.store(state, action, r, done, lp)
299
+ total += r; state = ns
300
+ if done: break
301
+ ep_rewards.append(total)
302
+ if (ep + 1) % 10 == 0: ppo.update()
303
+ last50_ppo = sum(ep_rewards[-50:]) / 50
304
+ print(f"PPO-lite GridWalk: last-50 mean reward = {last50_ppo:.3f} (expect > 0.8)")