patentml 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patentml/__init__.py +169 -0
- patentml/actor_critic.py +304 -0
- patentml/adaboost.py +168 -0
- patentml/bayesian_network.py +300 -0
- patentml/bayesian_optimiser.py +264 -0
- patentml/collaborative_filter.py +254 -0
- patentml/dbscan.py +230 -0
- patentml/decision_tree.py +289 -0
- patentml/deep_networks.py +426 -0
- patentml/em_clustering.py +198 -0
- patentml/ensemble.py +299 -0
- patentml/feature_engineering.py +334 -0
- patentml/gaussian_process.py +256 -0
- patentml/genetic_algorithm.py +271 -0
- patentml/genetic_programming.py +289 -0
- patentml/gradient_boosting.py +227 -0
- patentml/grammar_gp.py +546 -0
- patentml/hidden_markov.py +312 -0
- patentml/isolation_forest.py +223 -0
- patentml/kalman_filter.py +258 -0
- patentml/knn.py +255 -0
- patentml/linear_gp.py +405 -0
- patentml/mean_shift.py +198 -0
- patentml/mini_neural_net.py +146 -0
- patentml/multi_armed_bandit.py +255 -0
- patentml/neuroevolution.py +260 -0
- patentml/online_classifier.py +141 -0
- patentml/optimisers.py +208 -0
- patentml/particle_swarm.py +319 -0
- patentml/q_learning.py +229 -0
- patentml/scalable_kmeans.py +161 -0
- patentml/simulated_annealing.py +205 -0
- patentml/spectral_clustering.py +234 -0
- patentml/svd_reducer.py +248 -0
- patentml/svm_smo.py +181 -0
- patentml/text_classifier.py +202 -0
- patentml/vector_quantisation.py +254 -0
- patentml/word_embeddings.py +325 -0
- patentml-0.1.0.dist-info/METADATA +103 -0
- patentml-0.1.0.dist-info/RECORD +43 -0
- patentml-0.1.0.dist-info/WHEEL +5 -0
- patentml-0.1.0.dist-info/licenses/LICENSE +28 -0
- patentml-0.1.0.dist-info/top_level.txt +1 -0
patentml/__init__.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""
|
|
2
|
+
patentml — Machine Learning from Expired Patents
|
|
3
|
+
All algorithms derived from expired US patents. Zero dependencies. Pure Python stdlib.
|
|
4
|
+
|
|
5
|
+
EXPIRED PATENT SOURCES (selected key ones):
|
|
6
|
+
US5970487 Mitsubishi (1997) GA hardware machine
|
|
7
|
+
US6912587 AT&T (2001) Constraint-weighted GA fitness
|
|
8
|
+
US6477444 Fuji Xerox (2000) Genetic programming tree evolution
|
|
9
|
+
US6212427 Kennedy (1999) Particle swarm optimisation
|
|
10
|
+
US6484115 Storn (1999) Differential evolution
|
|
11
|
+
US7047169 Univ. IL (2002) EDA / Bayesian optimisation
|
|
12
|
+
US7219040 GE (2002) Simulated annealing + constraints
|
|
13
|
+
US5835901 Lockheed (1997) Neuroevolution [142 cites]
|
|
14
|
+
US7162461 SAS (2005) Hybrid NN activation search
|
|
15
|
+
US6128606 AT&T (1997) Modular NN + backprop [53 cites]
|
|
16
|
+
US7747070 Microsoft (2005) CNN on GPU [91 cites]
|
|
17
|
+
US6963862 Texas A&M (2001) Recurrent network training
|
|
18
|
+
US7447669 Nanyang (2004) Ant Colony Optimisation
|
|
19
|
+
US6161130 Microsoft (1998) Online classifier [896 cites]
|
|
20
|
+
US6327581 Microsoft (1998) SVM-SMO [173 cites]
|
|
21
|
+
US7421415 Siemens (2005) AdaBoost + feature selection
|
|
22
|
+
US6816847 Microsoft (1999) Decision tree + ensembles [72 cites]
|
|
23
|
+
US5613012 SmartTouch (1995) Voting ensemble [1182 cites]
|
|
24
|
+
US6012058 Microsoft (1998) Scalable K-means
|
|
25
|
+
US6985172 SW Research(2002) Q-learning / TD + Actor-Critic A2C / PPO-lite [145 cites]
|
|
26
|
+
US6981040 Utopy (2000) Bandit selection [919 cites]
|
|
27
|
+
US7076102 Philips (2002) HMM event learning [116 cites]
|
|
28
|
+
US6529891 Microsoft (1998) PCA / Bayesian model selection [124 cites]
|
|
29
|
+
US6807536 Microsoft (2001) Randomised SVD / low-rank approximation [170 cites]
|
|
30
|
+
US6263337 Microsoft (1998) Scalable EM / Gaussian mixture clustering [142 cites]
|
|
31
|
+
US6192360 Microsoft (1998) Text classifier TF-IDF + Naive Bayes [364 cites]
|
|
32
|
+
US6496816 Microsoft (1998) Collaborative filtering + Bayesian mixture [128 cites]
|
|
33
|
+
US6374251 Microsoft (1998) KNN + BallTree scalable search [187 cites]
|
|
34
|
+
US6049797 Lucent (1998) DBSCAN + OPTICS density clustering [154 cites]
|
|
35
|
+
US7031530 Lockheed (2001) Gradient Boosting Machine (GBM) [188 cites]
|
|
36
|
+
US6795794 U.Illinois (2002) Kalman Filter + Extended KF [127 cites]
|
|
37
|
+
US6931384 Microsoft (2001) Gaussian Process regression + classification [258 cites]
|
|
38
|
+
US6216066 GE (1998) Spectral clustering via Laplacian eigenmap [148 cites]
|
|
39
|
+
US6317707 AT&T (1998) Mean Shift + Kernel Density Estimation [269 cites]
|
|
40
|
+
US7225343 Columbia U (2003) Isolation Forest + One-Class SVM [159 cites]
|
|
41
|
+
US6490698 Microsoft (1999) Bayesian Network + Naive Bayes [102 cites]
|
|
42
|
+
US6421467 Texas Tech (1999) Vector Quantisation LBG + Product Quantiser [101 cites]
|
|
43
|
+
US6556983 Microsoft (2000) Word Embeddings PMI + Word2Vec SGNS [645 cites]
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
__version__ = "0.1.0"
|
|
47
|
+
|
|
48
|
+
from .genetic_algorithm import GeneticAlgorithm, Chromosome, make_population
|
|
49
|
+
from .genetic_programming import GeneticProgramming, rand_tree, Var, Const, BinOp, UnaryOp
|
|
50
|
+
from .grammar_gp import (GrammarGP, GrammaticalEvolution, GNode,
|
|
51
|
+
rand_grammar_tree, make_math_grammar, MATH_GRAMMAR,
|
|
52
|
+
subtree_crossover, subtree_mutation, point_mutation)
|
|
53
|
+
from .linear_gp import (LinearGP, Instruction, Program,
|
|
54
|
+
rand_program, effective_program, effective_instructions,
|
|
55
|
+
linear_crossover, micro_mutation, macro_mutation,
|
|
56
|
+
to_expr_string, execute, OPERATIONS, FAST_OPS)
|
|
57
|
+
from .particle_swarm import ParticleSwarmOptimiser, DifferentialEvolution, CMAES
|
|
58
|
+
from .online_classifier import OnlineBayesClassifier
|
|
59
|
+
from .scalable_kmeans import ScalableKMeans, HierarchicalKMeans
|
|
60
|
+
from .mini_neural_net import MiniNeuralNet
|
|
61
|
+
from .deep_networks import Conv1D, SimpleRNN, GRUCell, AntColonyOptimiser
|
|
62
|
+
from .svm_smo import SVM, linear_kernel, rbf_kernel, poly_kernel
|
|
63
|
+
from .adaboost import AdaBoost, DecisionStump
|
|
64
|
+
from .q_learning import QLearner, SARSALearner, FunctionApproxQLearner
|
|
65
|
+
from .multi_armed_bandit import EpsilonGreedy, UCB1, ThompsonSampling, EXP3, LinUCB
|
|
66
|
+
from .bayesian_optimiser import GaussianEDA, BayesianOptimiser
|
|
67
|
+
from .simulated_annealing import SimulatedAnnealing, MultiObjectiveSA
|
|
68
|
+
from .decision_tree import DecisionTree, RandomForest
|
|
69
|
+
from .optimisers import SGD, RMSProp, Adam, AdamW, LRScheduler, numerical_gradient
|
|
70
|
+
from .hidden_markov import HiddenMarkovModel
|
|
71
|
+
from .neuroevolution import EvoNet, WeightEvolution, HybridNNEvolver, ACTIVATIONS
|
|
72
|
+
from .ensemble import VotingEnsemble, StackingEnsemble, BaggingEnsemble, WeightedEnsemble
|
|
73
|
+
from .knn import KNNClassifier, KNNRegressor, FastKNN, BallTree
|
|
74
|
+
from .dbscan import DBSCAN, OPTICS
|
|
75
|
+
from .gradient_boosting import GBMRegressor, GBMClassifier
|
|
76
|
+
from .kalman_filter import KalmanFilter, ExtendedKalmanFilter
|
|
77
|
+
from .gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier, rbf_kernel as gp_rbf, matern32_kernel, periodic_kernel
|
|
78
|
+
from .spectral_clustering import SpectralClustering, rbf_affinity, knn_affinity
|
|
79
|
+
from .mean_shift import MeanShift, KernelDensityEstimator, estimate_bandwidth
|
|
80
|
+
from .isolation_forest import IsolationForest, OneClassSVM
|
|
81
|
+
from .bayesian_network import BayesianNetwork, NaiveBayes, CPT
|
|
82
|
+
from .vector_quantisation import LBG, AdaptiveVQ, ProductQuantiser
|
|
83
|
+
from .actor_critic import A2C, PPOLite, LinearActor, LinearCritic
|
|
84
|
+
from .word_embeddings import Word2Vec, PMIEmbeddings, build_cooccurrence, tokenise
|
|
85
|
+
from .svd_reducer import randomised_svd, pca as randomised_pca, transform as svd_transform
|
|
86
|
+
from .em_clustering import EMClustering, GaussianComponent
|
|
87
|
+
from .text_classifier import TextPipeline, TFIDFVectoriser, NaiveBayesClassifier
|
|
88
|
+
from .collaborative_filter import MemoryCF, BayesianCF
|
|
89
|
+
from .feature_engineering import (
|
|
90
|
+
PCA, StandardScaler, MinMaxScaler,
|
|
91
|
+
variance_threshold, correlation_filter,
|
|
92
|
+
mi_feature_ranking, forward_feature_selection,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
__all__ = [
|
|
96
|
+
# Evolutionary / Global Optimisation
|
|
97
|
+
"GeneticAlgorithm", "Chromosome", "make_population",
|
|
98
|
+
"GeneticProgramming", "rand_tree", "Var", "Const", "BinOp", "UnaryOp",
|
|
99
|
+
# Grammar-Guided GP / Grammatical Evolution
|
|
100
|
+
"GrammarGP", "GrammaticalEvolution", "GNode",
|
|
101
|
+
"rand_grammar_tree", "make_math_grammar", "MATH_GRAMMAR",
|
|
102
|
+
"subtree_crossover", "subtree_mutation", "point_mutation",
|
|
103
|
+
# Linear GP
|
|
104
|
+
"LinearGP", "Instruction", "Program",
|
|
105
|
+
"rand_program", "effective_program", "effective_instructions",
|
|
106
|
+
"linear_crossover", "micro_mutation", "macro_mutation",
|
|
107
|
+
"to_expr_string", "execute", "OPERATIONS", "FAST_OPS",
|
|
108
|
+
"ParticleSwarmOptimiser", "DifferentialEvolution", "CMAES",
|
|
109
|
+
"GaussianEDA", "BayesianOptimiser",
|
|
110
|
+
"SimulatedAnnealing", "MultiObjectiveSA",
|
|
111
|
+
"AntColonyOptimiser",
|
|
112
|
+
# Neural Networks
|
|
113
|
+
"MiniNeuralNet",
|
|
114
|
+
"Conv1D", "SimpleRNN", "GRUCell",
|
|
115
|
+
"EvoNet", "WeightEvolution", "HybridNNEvolver", "ACTIVATIONS",
|
|
116
|
+
# Gradient Optimisers
|
|
117
|
+
"SGD", "RMSProp", "Adam", "AdamW", "LRScheduler", "numerical_gradient",
|
|
118
|
+
# Classifiers
|
|
119
|
+
"OnlineBayesClassifier",
|
|
120
|
+
"SVM", "linear_kernel", "rbf_kernel", "poly_kernel",
|
|
121
|
+
"AdaBoost", "DecisionStump",
|
|
122
|
+
"DecisionTree", "RandomForest",
|
|
123
|
+
# Ensembles
|
|
124
|
+
"VotingEnsemble", "StackingEnsemble", "BaggingEnsemble", "WeightedEnsemble",
|
|
125
|
+
# Clustering
|
|
126
|
+
"ScalableKMeans", "HierarchicalKMeans",
|
|
127
|
+
# Reinforcement Learning / Bandits
|
|
128
|
+
"QLearner", "SARSALearner", "FunctionApproxQLearner",
|
|
129
|
+
"EpsilonGreedy", "UCB1", "ThompsonSampling", "EXP3", "LinUCB",
|
|
130
|
+
# Sequential Models
|
|
131
|
+
"HiddenMarkovModel",
|
|
132
|
+
# Nearest Neighbours
|
|
133
|
+
"KNNClassifier", "KNNRegressor", "FastKNN", "BallTree",
|
|
134
|
+
# Density Clustering
|
|
135
|
+
"DBSCAN", "OPTICS",
|
|
136
|
+
# Gradient Boosting
|
|
137
|
+
"GBMRegressor", "GBMClassifier",
|
|
138
|
+
# Sequential / State Estimation
|
|
139
|
+
"KalmanFilter", "ExtendedKalmanFilter",
|
|
140
|
+
# Gaussian Processes
|
|
141
|
+
"GaussianProcessRegressor", "GaussianProcessClassifier",
|
|
142
|
+
"gp_rbf", "matern32_kernel", "periodic_kernel",
|
|
143
|
+
# Graph / Spectral Clustering
|
|
144
|
+
"SpectralClustering", "rbf_affinity", "knn_affinity",
|
|
145
|
+
# Density Estimation / Mode Finding
|
|
146
|
+
"MeanShift", "KernelDensityEstimator", "estimate_bandwidth",
|
|
147
|
+
# Anomaly Detection
|
|
148
|
+
"IsolationForest", "OneClassSVM",
|
|
149
|
+
# Probabilistic Graphical Models
|
|
150
|
+
"BayesianNetwork", "NaiveBayes", "CPT",
|
|
151
|
+
# Vector Quantisation
|
|
152
|
+
"LBG", "AdaptiveVQ", "ProductQuantiser",
|
|
153
|
+
# Policy Gradient RL
|
|
154
|
+
"A2C", "PPOLite", "LinearActor", "LinearCritic",
|
|
155
|
+
# Word Embeddings / NLP
|
|
156
|
+
"Word2Vec", "PMIEmbeddings", "build_cooccurrence", "tokenise",
|
|
157
|
+
# Dimensionality Reduction
|
|
158
|
+
"randomised_svd", "randomised_pca", "svd_transform",
|
|
159
|
+
# Clustering (additional)
|
|
160
|
+
"EMClustering", "GaussianComponent",
|
|
161
|
+
# Text / NLP
|
|
162
|
+
"TextPipeline", "TFIDFVectoriser", "NaiveBayesClassifier",
|
|
163
|
+
# Recommender Systems
|
|
164
|
+
"MemoryCF", "BayesianCF",
|
|
165
|
+
# Feature Engineering
|
|
166
|
+
"PCA", "StandardScaler", "MinMaxScaler",
|
|
167
|
+
"variance_threshold", "correlation_filter",
|
|
168
|
+
"mi_feature_ranking", "forward_feature_selection",
|
|
169
|
+
]
|
patentml/actor_critic.py
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Actor-Critic and Advantage Actor-Critic (A2C) reinforcement learning.
|
|
3
|
+
|
|
4
|
+
Patent basis: US6985172 (Southwest Research Institute, filed 2002, expired 2022)
|
|
5
|
+
"Q-learning with linear reward shaping" — 145 forward citations.
|
|
6
|
+
Covers concurrent value-function and policy optimisation where a critic
|
|
7
|
+
baseline reduces variance in policy gradient estimates.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import math
|
|
11
|
+
import random
|
|
12
|
+
from typing import List, Tuple, Optional, Callable, Dict
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _softmax(logits: List[float]) -> List[float]:
|
|
16
|
+
m = max(logits)
|
|
17
|
+
exps = [math.exp(x - m) for x in logits]
|
|
18
|
+
total = sum(exps)
|
|
19
|
+
return [e / total for e in exps]
|
|
20
|
+
|
|
21
|
+
def _log_softmax(logits: List[float]) -> List[float]:
|
|
22
|
+
probs = _softmax(logits)
|
|
23
|
+
return [math.log(max(p, 1e-300)) for p in probs]
|
|
24
|
+
|
|
25
|
+
def _relu(x: float) -> float:
|
|
26
|
+
return max(0.0, x)
|
|
27
|
+
|
|
28
|
+
def _drelu(x: float) -> float:
|
|
29
|
+
return 1.0 if x > 0 else 0.0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ── Simple linear actor/critic ─────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
class LinearActor:
|
|
35
|
+
"""
|
|
36
|
+
Linear policy: pi(a|s) = softmax(W_a @ s + b_a).
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, n_states: int, n_actions: int, seed: int = 42):
|
|
40
|
+
rng = random.Random(seed)
|
|
41
|
+
scale = 0.1
|
|
42
|
+
self.W = [[rng.gauss(0, scale) for _ in range(n_states)] for _ in range(n_actions)]
|
|
43
|
+
self.b = [0.0] * n_actions
|
|
44
|
+
self.n_states = n_states
|
|
45
|
+
self.n_actions = n_actions
|
|
46
|
+
|
|
47
|
+
def logits(self, state: List[float]) -> List[float]:
|
|
48
|
+
return [sum(self.W[a][s] * state[s] for s in range(self.n_states)) + self.b[a]
|
|
49
|
+
for a in range(self.n_actions)]
|
|
50
|
+
|
|
51
|
+
def probs(self, state: List[float]) -> List[float]:
|
|
52
|
+
return _softmax(self.logits(state))
|
|
53
|
+
|
|
54
|
+
def select_action(self, state: List[float], rng: random.Random) -> int:
|
|
55
|
+
probs = self.probs(state)
|
|
56
|
+
r = rng.random()
|
|
57
|
+
cumulative = 0.0
|
|
58
|
+
for a, p in enumerate(probs):
|
|
59
|
+
cumulative += p
|
|
60
|
+
if r <= cumulative:
|
|
61
|
+
return a
|
|
62
|
+
return len(probs) - 1
|
|
63
|
+
|
|
64
|
+
def update(self, state: List[float], action: int, advantage: float, lr: float) -> None:
|
|
65
|
+
"""Policy gradient update: theta += lr * advantage * grad log pi(a|s)."""
|
|
66
|
+
probs = self.probs(state)
|
|
67
|
+
for a in range(self.n_actions):
|
|
68
|
+
grad = (1.0 - probs[a]) if a == action else -probs[a]
|
|
69
|
+
for s in range(self.n_states):
|
|
70
|
+
self.W[a][s] += lr * advantage * grad * state[s]
|
|
71
|
+
self.b[a] += lr * advantage * grad
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class LinearCritic:
|
|
75
|
+
"""
|
|
76
|
+
Linear value function: V(s) = w_v @ s + b_v.
|
|
77
|
+
Trained by TD(0): V(s) ≈ r + gamma * V(s').
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, n_states: int, seed: int = 42):
|
|
81
|
+
rng = random.Random(seed)
|
|
82
|
+
self.w = [rng.gauss(0, 0.1) for _ in range(n_states)]
|
|
83
|
+
self.b = 0.0
|
|
84
|
+
self.n_states = n_states
|
|
85
|
+
|
|
86
|
+
def value(self, state: List[float]) -> float:
|
|
87
|
+
return sum(self.w[i] * state[i] for i in range(self.n_states)) + self.b
|
|
88
|
+
|
|
89
|
+
def update(self, state: List[float], target: float, lr: float) -> float:
|
|
90
|
+
"""MSE gradient step. Returns TD error."""
|
|
91
|
+
v = self.value(state)
|
|
92
|
+
td = target - v
|
|
93
|
+
for i in range(self.n_states):
|
|
94
|
+
self.w[i] += lr * td * state[i]
|
|
95
|
+
self.b += lr * td
|
|
96
|
+
return td
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# ── Advantage Actor-Critic (A2C) ───────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
class A2C:
|
|
102
|
+
"""
|
|
103
|
+
Advantage Actor-Critic (A2C).
|
|
104
|
+
|
|
105
|
+
Critic estimates V(s). Advantage A(s,a) = r + gamma*V(s') - V(s).
|
|
106
|
+
Actor is updated with policy gradient weighted by advantage.
|
|
107
|
+
Advantage reduces variance compared to pure REINFORCE.
|
|
108
|
+
|
|
109
|
+
Can work with any environment exposing (state, reward, done, info) = step(action).
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
n_states: int,
|
|
115
|
+
n_actions: int,
|
|
116
|
+
lr_actor: float = 0.01,
|
|
117
|
+
lr_critic: float = 0.05,
|
|
118
|
+
gamma: float = 0.99,
|
|
119
|
+
entropy_coef: float = 0.01,
|
|
120
|
+
seed: int = 42,
|
|
121
|
+
):
|
|
122
|
+
self.actor = LinearActor(n_states, n_actions, seed=seed)
|
|
123
|
+
self.critic = LinearCritic(n_states, seed=seed + 1)
|
|
124
|
+
self.lr_actor = lr_actor
|
|
125
|
+
self.lr_critic = lr_critic
|
|
126
|
+
self.gamma = gamma
|
|
127
|
+
self.entropy = entropy_coef
|
|
128
|
+
self._rng = random.Random(seed)
|
|
129
|
+
self.episode_rewards: List[float] = []
|
|
130
|
+
|
|
131
|
+
def select_action(self, state: List[float]) -> int:
|
|
132
|
+
return self.actor.select_action(state, self._rng)
|
|
133
|
+
|
|
134
|
+
def update(
|
|
135
|
+
self,
|
|
136
|
+
state: List[float],
|
|
137
|
+
action: int,
|
|
138
|
+
reward: float,
|
|
139
|
+
next_state: List[float],
|
|
140
|
+
done: bool,
|
|
141
|
+
) -> float:
|
|
142
|
+
"""Single-step TD update. Returns advantage."""
|
|
143
|
+
v_next = 0.0 if done else self.critic.value(next_state)
|
|
144
|
+
td_target = reward + self.gamma * v_next
|
|
145
|
+
advantage = self.critic.update(state, td_target, self.lr_critic)
|
|
146
|
+
# Entropy regularisation: add small gradient toward uniform policy
|
|
147
|
+
probs = self.actor.probs(state)
|
|
148
|
+
for a in range(self.actor.n_actions):
|
|
149
|
+
entropy_grad = -math.log(max(probs[a], 1e-9)) - 1
|
|
150
|
+
self.actor.b[a] += self.lr_actor * self.entropy * entropy_grad
|
|
151
|
+
# Policy gradient
|
|
152
|
+
self.actor.update(state, action, advantage, self.lr_actor)
|
|
153
|
+
return advantage
|
|
154
|
+
|
|
155
|
+
def train(
|
|
156
|
+
self,
|
|
157
|
+
env_fn: Callable,
|
|
158
|
+
n_episodes: int = 500,
|
|
159
|
+
max_steps: int = 200,
|
|
160
|
+
) -> List[float]:
|
|
161
|
+
"""
|
|
162
|
+
Train for n_episodes.
|
|
163
|
+
|
|
164
|
+
env_fn() must return an object with:
|
|
165
|
+
.reset() -> state (list of floats)
|
|
166
|
+
.step(action) -> (next_state, reward, done)
|
|
167
|
+
"""
|
|
168
|
+
self.episode_rewards = []
|
|
169
|
+
for ep in range(n_episodes):
|
|
170
|
+
env = env_fn()
|
|
171
|
+
state = env.reset()
|
|
172
|
+
total = 0.0
|
|
173
|
+
for _ in range(max_steps):
|
|
174
|
+
action = self.select_action(state)
|
|
175
|
+
next_state, reward, done = env.step(action)
|
|
176
|
+
self.update(state, action, reward, next_state, done)
|
|
177
|
+
total += reward
|
|
178
|
+
state = next_state
|
|
179
|
+
if done: break
|
|
180
|
+
self.episode_rewards.append(total)
|
|
181
|
+
return self.episode_rewards
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# ── PPO-lite (clip-based policy optimisation) ──────────────────────────────────
|
|
185
|
+
|
|
186
|
+
class PPOLite:
|
|
187
|
+
"""
|
|
188
|
+
Proximal Policy Optimisation (lite version) — clipped surrogate objective.
|
|
189
|
+
|
|
190
|
+
Collects a batch of transitions, computes advantages, then does k_epochs
|
|
191
|
+
of gradient updates with clipping to prevent too-large policy updates.
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
def __init__(
|
|
195
|
+
self,
|
|
196
|
+
n_states: int,
|
|
197
|
+
n_actions: int,
|
|
198
|
+
lr: float = 0.003,
|
|
199
|
+
gamma: float = 0.99,
|
|
200
|
+
clip_eps: float = 0.2,
|
|
201
|
+
k_epochs: int = 4,
|
|
202
|
+
seed: int = 42,
|
|
203
|
+
):
|
|
204
|
+
self.actor = LinearActor(n_states, n_actions, seed=seed)
|
|
205
|
+
self.critic = LinearCritic(n_states, seed=seed + 1)
|
|
206
|
+
self.lr = lr
|
|
207
|
+
self.gamma = gamma
|
|
208
|
+
self.clip_eps = clip_eps
|
|
209
|
+
self.k_epochs = k_epochs
|
|
210
|
+
self._rng = random.Random(seed)
|
|
211
|
+
# Replay buffer
|
|
212
|
+
self._states: List[List[float]] = []
|
|
213
|
+
self._actions: List[int] = []
|
|
214
|
+
self._rewards: List[float] = []
|
|
215
|
+
self._dones: List[bool] = []
|
|
216
|
+
self._old_logprobs: List[float] = []
|
|
217
|
+
|
|
218
|
+
def select_action(self, state: List[float]) -> Tuple[int, float]:
|
|
219
|
+
probs = self.actor.probs(state)
|
|
220
|
+
action = self.actor.select_action(state, self._rng)
|
|
221
|
+
log_prob = math.log(max(probs[action], 1e-300))
|
|
222
|
+
return action, log_prob
|
|
223
|
+
|
|
224
|
+
def store(self, state, action, reward, done, log_prob):
|
|
225
|
+
self._states.append(state)
|
|
226
|
+
self._actions.append(action)
|
|
227
|
+
self._rewards.append(reward)
|
|
228
|
+
self._dones.append(done)
|
|
229
|
+
self._old_logprobs.append(log_prob)
|
|
230
|
+
|
|
231
|
+
def update(self) -> None:
|
|
232
|
+
"""Compute returns, advantages, then run k_epochs of PPO clip update."""
|
|
233
|
+
n = len(self._rewards)
|
|
234
|
+
if n == 0: return
|
|
235
|
+
# Compute discounted returns
|
|
236
|
+
returns = [0.0] * n
|
|
237
|
+
running = 0.0
|
|
238
|
+
for i in reversed(range(n)):
|
|
239
|
+
if self._dones[i]: running = 0.0
|
|
240
|
+
running = self._rewards[i] + self.gamma * running
|
|
241
|
+
returns[i] = running
|
|
242
|
+
|
|
243
|
+
for _ in range(self.k_epochs):
|
|
244
|
+
for i in range(n):
|
|
245
|
+
state = self._states[i]
|
|
246
|
+
action = self._actions[i]
|
|
247
|
+
ret = returns[i]
|
|
248
|
+
v = self.critic.value(state)
|
|
249
|
+
adv = ret - v
|
|
250
|
+
# Normalise advantage
|
|
251
|
+
adv /= (abs(adv) + 1.0)
|
|
252
|
+
# Critic update
|
|
253
|
+
self.critic.update(state, ret, self.lr)
|
|
254
|
+
# Actor: clipped surrogate
|
|
255
|
+
new_probs = self.actor.probs(state)
|
|
256
|
+
new_logprob = math.log(max(new_probs[action], 1e-300))
|
|
257
|
+
old_logprob = self._old_logprobs[i]
|
|
258
|
+
ratio = math.exp(new_logprob - old_logprob)
|
|
259
|
+
ratio_clip = max(1 - self.clip_eps, min(1 + self.clip_eps, ratio))
|
|
260
|
+
# Gradient from min(ratio*adv, clipped*adv)
|
|
261
|
+
if adv >= 0:
|
|
262
|
+
effective_ratio = min(ratio, 1 + self.clip_eps)
|
|
263
|
+
else:
|
|
264
|
+
effective_ratio = max(ratio, 1 - self.clip_eps)
|
|
265
|
+
self.actor.update(state, action, adv * effective_ratio / (ratio + 1e-9), self.lr)
|
|
266
|
+
|
|
267
|
+
# Clear buffer
|
|
268
|
+
self._states.clear(); self._actions.clear(); self._rewards.clear()
|
|
269
|
+
self._dones.clear(); self._old_logprobs.clear()
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
if __name__ == "__main__":
|
|
273
|
+
# Simple environment: pole balancing proxy via CartPole-like reward signal
|
|
274
|
+
class GridWalk:
|
|
275
|
+
"""1D grid: state=[pos], actions=[left,right], goal at pos=5."""
|
|
276
|
+
def __init__(self): self.pos = 2
|
|
277
|
+
def reset(self): self.pos = 2; return [self.pos / 10.0]
|
|
278
|
+
def step(self, action):
|
|
279
|
+
self.pos += 1 if action == 1 else -1
|
|
280
|
+
self.pos = max(0, min(9, self.pos))
|
|
281
|
+
done = self.pos == 5
|
|
282
|
+
reward = 1.0 if done else -0.01
|
|
283
|
+
return [self.pos / 10.0], reward, done
|
|
284
|
+
|
|
285
|
+
a2c = A2C(n_states=1, n_actions=2, lr_actor=0.05, lr_critic=0.1, gamma=0.99, seed=0)
|
|
286
|
+
rewards = a2c.train(GridWalk, n_episodes=300, max_steps=50)
|
|
287
|
+
last50 = sum(rewards[-50:]) / 50
|
|
288
|
+
print(f"A2C GridWalk: last-50 mean reward = {last50:.3f} (expect > 0.8)")
|
|
289
|
+
|
|
290
|
+
ppo = PPOLite(n_states=1, n_actions=2, lr=0.05, gamma=0.99, seed=1)
|
|
291
|
+
rng = random.Random(42)
|
|
292
|
+
ep_rewards = []
|
|
293
|
+
for ep in range(200):
|
|
294
|
+
env = GridWalk(); state = env.reset(); total = 0.0
|
|
295
|
+
for _ in range(30):
|
|
296
|
+
action, lp = ppo.select_action(state)
|
|
297
|
+
ns, r, done = env.step(action)
|
|
298
|
+
ppo.store(state, action, r, done, lp)
|
|
299
|
+
total += r; state = ns
|
|
300
|
+
if done: break
|
|
301
|
+
ep_rewards.append(total)
|
|
302
|
+
if (ep + 1) % 10 == 0: ppo.update()
|
|
303
|
+
last50_ppo = sum(ep_rewards[-50:]) / 50
|
|
304
|
+
print(f"PPO-lite GridWalk: last-50 mean reward = {last50_ppo:.3f} (expect > 0.8)")
|
patentml/adaboost.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AdaBoost — Adaptive Boosting with Feature Selection
|
|
3
|
+
Public Domain Implementation
|
|
4
|
+
|
|
5
|
+
Derived from expired patents:
|
|
6
|
+
US7421415 (Siemens Corporate Research, filed 2005)
|
|
7
|
+
"Methods and systems for 3D object detection using learning"
|
|
8
|
+
"using a learning procedure for feature selection based on boosting"
|
|
9
|
+
Expired 2025. Public domain.
|
|
10
|
+
|
|
11
|
+
Also draws from:
|
|
12
|
+
US6816847 (Microsoft, filed 1999, 72 cites) — ensemble of classifiers
|
|
13
|
+
"training a classifier comprises training one of a bayesian classifier,
|
|
14
|
+
a support vector machine, a neural net classifier, and a decision tree"
|
|
15
|
+
|
|
16
|
+
AdaBoost: Freund & Schapire 1997 — theoretical basis, Siemens patent
|
|
17
|
+
covers the application to feature selection in high-dimensional spaces.
|
|
18
|
+
"""
|
|
19
|
+
import math
|
|
20
|
+
import random
|
|
21
|
+
from typing import Callable, List, Optional, Tuple
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DecisionStump:
|
|
25
|
+
"""
|
|
26
|
+
Weak learner: single-feature threshold classifier.
|
|
27
|
+
US7421415: "a weak learning algorithm applied at each boosting round"
|
|
28
|
+
"""
|
|
29
|
+
def __init__(self):
|
|
30
|
+
self.feature_idx: int = 0
|
|
31
|
+
self.threshold: float = 0.0
|
|
32
|
+
self.polarity: int = 1 # 1 or -1
|
|
33
|
+
self.alpha: float = 0.0
|
|
34
|
+
|
|
35
|
+
def fit(self, X: List[List[float]], y: List[int],
|
|
36
|
+
weights: List[float]) -> float:
|
|
37
|
+
"""Find best feature+threshold minimising weighted error."""
|
|
38
|
+
n_features = len(X[0])
|
|
39
|
+
best_error = float("inf")
|
|
40
|
+
|
|
41
|
+
for fi in range(n_features):
|
|
42
|
+
vals = sorted(set(x[fi] for x in X))
|
|
43
|
+
thresholds = [(vals[i] + vals[i+1]) / 2
|
|
44
|
+
for i in range(len(vals) - 1)]
|
|
45
|
+
if not thresholds:
|
|
46
|
+
thresholds = [vals[0]]
|
|
47
|
+
|
|
48
|
+
for thresh in thresholds:
|
|
49
|
+
for polarity in (1, -1):
|
|
50
|
+
preds = [polarity if x[fi] <= thresh else -polarity
|
|
51
|
+
for x in X]
|
|
52
|
+
error = sum(w for w, p, yi in zip(weights, preds, y)
|
|
53
|
+
if p != yi)
|
|
54
|
+
if error < best_error:
|
|
55
|
+
best_error = error
|
|
56
|
+
self.feature_idx = fi
|
|
57
|
+
self.threshold = thresh
|
|
58
|
+
self.polarity = polarity
|
|
59
|
+
|
|
60
|
+
return best_error
|
|
61
|
+
|
|
62
|
+
def predict(self, X: List[List[float]]) -> List[int]:
|
|
63
|
+
return [self.polarity if x[self.feature_idx] <= self.threshold
|
|
64
|
+
else -self.polarity for x in X]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class AdaBoost:
|
|
68
|
+
"""
|
|
69
|
+
Adaptive Boosting with feature selection.
|
|
70
|
+
|
|
71
|
+
US7421415: "for each boosting round t:
|
|
72
|
+
- apply weak learning algorithm to find feature with minimum error
|
|
73
|
+
- compute alpha_t = 0.5 * ln((1-e_t)/e_t)
|
|
74
|
+
- update weights: w_{t+1} = w_t * exp(-alpha_t * y_i * h_t(x_i))
|
|
75
|
+
- normalise weights"
|
|
76
|
+
|
|
77
|
+
The ensemble is: H(x) = sign(sum_t alpha_t * h_t(x))
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(
|
|
81
|
+
self,
|
|
82
|
+
n_estimators: int = 50,
|
|
83
|
+
random_seed: Optional[int] = None,
|
|
84
|
+
):
|
|
85
|
+
self.T = n_estimators
|
|
86
|
+
self.stumps: List[DecisionStump] = []
|
|
87
|
+
if random_seed:
|
|
88
|
+
random.seed(random_seed)
|
|
89
|
+
|
|
90
|
+
def fit(self, X: List[List[float]], y: List[int]) -> "AdaBoost":
|
|
91
|
+
"""
|
|
92
|
+
US7421415 boosting loop:
|
|
93
|
+
"for each boosting round, train weak learner on weighted data"
|
|
94
|
+
"""
|
|
95
|
+
n = len(X)
|
|
96
|
+
weights = [1.0 / n] * n
|
|
97
|
+
|
|
98
|
+
self.stumps = []
|
|
99
|
+
self.feature_importance = [0.0] * len(X[0])
|
|
100
|
+
|
|
101
|
+
for t in range(self.T):
|
|
102
|
+
stump = DecisionStump()
|
|
103
|
+
error = stump.fit(X, y, weights)
|
|
104
|
+
|
|
105
|
+
# Clip error to avoid log(0)
|
|
106
|
+
error = max(1e-10, min(1 - 1e-10, error))
|
|
107
|
+
|
|
108
|
+
# Alpha: confidence of this stump
|
|
109
|
+
stump.alpha = 0.5 * math.log((1 - error) / error)
|
|
110
|
+
|
|
111
|
+
# Track feature importance (US7421415 feature selection)
|
|
112
|
+
self.feature_importance[stump.feature_idx] += abs(stump.alpha)
|
|
113
|
+
|
|
114
|
+
# Update weights: higher weight for misclassified examples
|
|
115
|
+
preds = stump.predict(X)
|
|
116
|
+
new_weights = [
|
|
117
|
+
w * math.exp(-stump.alpha * yi * pi)
|
|
118
|
+
for w, yi, pi in zip(weights, y, preds)
|
|
119
|
+
]
|
|
120
|
+
total = sum(new_weights)
|
|
121
|
+
weights = [w / total for w in new_weights]
|
|
122
|
+
|
|
123
|
+
self.stumps.append(stump)
|
|
124
|
+
|
|
125
|
+
# Normalise feature importance
|
|
126
|
+
total_imp = sum(self.feature_importance) or 1.0
|
|
127
|
+
self.feature_importance = [f / total_imp for f in self.feature_importance]
|
|
128
|
+
|
|
129
|
+
return self
|
|
130
|
+
|
|
131
|
+
def predict_score(self, X: List[List[float]]) -> List[float]:
|
|
132
|
+
"""Return raw ensemble scores (positive = class +1)."""
|
|
133
|
+
scores = [0.0] * len(X)
|
|
134
|
+
for stump in self.stumps:
|
|
135
|
+
preds = stump.predict(X)
|
|
136
|
+
for i, p in enumerate(preds):
|
|
137
|
+
scores[i] += stump.alpha * p
|
|
138
|
+
return scores
|
|
139
|
+
|
|
140
|
+
def predict(self, X: List[List[float]]) -> List[int]:
|
|
141
|
+
return [1 if s >= 0 else -1 for s in self.predict_score(X)]
|
|
142
|
+
|
|
143
|
+
def top_features(self, n: int = 5) -> List[Tuple[int, float]]:
|
|
144
|
+
"""Return (feature_index, importance) sorted by importance."""
|
|
145
|
+
ranked = sorted(enumerate(self.feature_importance),
|
|
146
|
+
key=lambda x: -x[1])
|
|
147
|
+
return ranked[:n]
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
if __name__ == "__main__":
|
|
151
|
+
random.seed(42)
|
|
152
|
+
|
|
153
|
+
# Generate linearly separable data with irrelevant features
|
|
154
|
+
X = [[x1, x2, random.random(), random.random()]
|
|
155
|
+
for x1 in [0.0, 0.5, 1.0, 1.5, 2.0]
|
|
156
|
+
for x2 in [0.0, 0.5, 1.0, 1.5, 2.0]]
|
|
157
|
+
y = [1 if x[0] + x[1] > 2.0 else -1 for x in X]
|
|
158
|
+
|
|
159
|
+
ab = AdaBoost(n_estimators=20, random_seed=42)
|
|
160
|
+
ab.fit(X, y)
|
|
161
|
+
preds = ab.predict(X)
|
|
162
|
+
acc = sum(1 for p, yi in zip(preds, y) if p == yi) / len(y)
|
|
163
|
+
|
|
164
|
+
print("AdaBoost with feature selection (US7421415, Siemens, expired 2025):")
|
|
165
|
+
print(f" Accuracy: {acc:.1%}")
|
|
166
|
+
print(f" Feature importance: {[round(f, 3) for f in ab.feature_importance]}")
|
|
167
|
+
print(f" Top features: {ab.top_features(2)}")
|
|
168
|
+
print(f" (Features 0+1 should dominate; 2+3 are noise)")
|