patentml 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. patentml/__init__.py +169 -0
  2. patentml/actor_critic.py +304 -0
  3. patentml/adaboost.py +168 -0
  4. patentml/bayesian_network.py +300 -0
  5. patentml/bayesian_optimiser.py +264 -0
  6. patentml/collaborative_filter.py +254 -0
  7. patentml/dbscan.py +230 -0
  8. patentml/decision_tree.py +289 -0
  9. patentml/deep_networks.py +426 -0
  10. patentml/em_clustering.py +198 -0
  11. patentml/ensemble.py +299 -0
  12. patentml/feature_engineering.py +334 -0
  13. patentml/gaussian_process.py +256 -0
  14. patentml/genetic_algorithm.py +271 -0
  15. patentml/genetic_programming.py +289 -0
  16. patentml/gradient_boosting.py +227 -0
  17. patentml/grammar_gp.py +546 -0
  18. patentml/hidden_markov.py +312 -0
  19. patentml/isolation_forest.py +223 -0
  20. patentml/kalman_filter.py +258 -0
  21. patentml/knn.py +255 -0
  22. patentml/linear_gp.py +405 -0
  23. patentml/mean_shift.py +198 -0
  24. patentml/mini_neural_net.py +146 -0
  25. patentml/multi_armed_bandit.py +255 -0
  26. patentml/neuroevolution.py +260 -0
  27. patentml/online_classifier.py +141 -0
  28. patentml/optimisers.py +208 -0
  29. patentml/particle_swarm.py +319 -0
  30. patentml/q_learning.py +229 -0
  31. patentml/scalable_kmeans.py +161 -0
  32. patentml/simulated_annealing.py +205 -0
  33. patentml/spectral_clustering.py +234 -0
  34. patentml/svd_reducer.py +248 -0
  35. patentml/svm_smo.py +181 -0
  36. patentml/text_classifier.py +202 -0
  37. patentml/vector_quantisation.py +254 -0
  38. patentml/word_embeddings.py +325 -0
  39. patentml-0.1.0.dist-info/METADATA +103 -0
  40. patentml-0.1.0.dist-info/RECORD +43 -0
  41. patentml-0.1.0.dist-info/WHEEL +5 -0
  42. patentml-0.1.0.dist-info/licenses/LICENSE +28 -0
  43. patentml-0.1.0.dist-info/top_level.txt +1 -0
patentml/__init__.py ADDED
@@ -0,0 +1,169 @@
1
+ """
2
+ patentml — Machine Learning from Expired Patents
3
+ All algorithms derived from expired US patents. Zero dependencies. Pure Python stdlib.
4
+
5
+ EXPIRED PATENT SOURCES (selected key ones):
6
+ US5970487 Mitsubishi (1997) GA hardware machine
7
+ US6912587 AT&T (2001) Constraint-weighted GA fitness
8
+ US6477444 Fuji Xerox (2000) Genetic programming tree evolution
9
+ US6212427 Kennedy (1999) Particle swarm optimisation
10
+ US6484115 Storn (1999) Differential evolution
11
+ US7047169 Univ. IL (2002) EDA / Bayesian optimisation
12
+ US7219040 GE (2002) Simulated annealing + constraints
13
+ US5835901 Lockheed (1997) Neuroevolution [142 cites]
14
+ US7162461 SAS (2005) Hybrid NN activation search
15
+ US6128606 AT&T (1997) Modular NN + backprop [53 cites]
16
+ US7747070 Microsoft (2005) CNN on GPU [91 cites]
17
+ US6963862 Texas A&M (2001) Recurrent network training
18
+ US7447669 Nanyang (2004) Ant Colony Optimisation
19
+ US6161130 Microsoft (1998) Online classifier [896 cites]
20
+ US6327581 Microsoft (1998) SVM-SMO [173 cites]
21
+ US7421415 Siemens (2005) AdaBoost + feature selection
22
+ US6816847 Microsoft (1999) Decision tree + ensembles [72 cites]
23
+ US5613012 SmartTouch (1995) Voting ensemble [1182 cites]
24
+ US6012058 Microsoft (1998) Scalable K-means
25
+ US6985172 SW Research(2002) Q-learning / TD + Actor-Critic A2C / PPO-lite [145 cites]
26
+ US6981040 Utopy (2000) Bandit selection [919 cites]
27
+ US7076102 Philips (2002) HMM event learning [116 cites]
28
+ US6529891 Microsoft (1998) PCA / Bayesian model selection [124 cites]
29
+ US6807536 Microsoft (2001) Randomised SVD / low-rank approximation [170 cites]
30
+ US6263337 Microsoft (1998) Scalable EM / Gaussian mixture clustering [142 cites]
31
+ US6192360 Microsoft (1998) Text classifier TF-IDF + Naive Bayes [364 cites]
32
+ US6496816 Microsoft (1998) Collaborative filtering + Bayesian mixture [128 cites]
33
+ US6374251 Microsoft (1998) KNN + BallTree scalable search [187 cites]
34
+ US6049797 Lucent (1998) DBSCAN + OPTICS density clustering [154 cites]
35
+ US7031530 Lockheed (2001) Gradient Boosting Machine (GBM) [188 cites]
36
+ US6795794 U.Illinois (2002) Kalman Filter + Extended KF [127 cites]
37
+ US6931384 Microsoft (2001) Gaussian Process regression + classification [258 cites]
38
+ US6216066 GE (1998) Spectral clustering via Laplacian eigenmap [148 cites]
39
+ US6317707 AT&T (1998) Mean Shift + Kernel Density Estimation [269 cites]
40
+ US7225343 Columbia U (2003) Isolation Forest + One-Class SVM [159 cites]
41
+ US6490698 Microsoft (1999) Bayesian Network + Naive Bayes [102 cites]
42
+ US6421467 Texas Tech (1999) Vector Quantisation LBG + Product Quantiser [101 cites]
43
+ US6556983 Microsoft (2000) Word Embeddings PMI + Word2Vec SGNS [645 cites]
44
+ """
45
+
46
+ __version__ = "0.1.0"
47
+
48
+ from .genetic_algorithm import GeneticAlgorithm, Chromosome, make_population
49
+ from .genetic_programming import GeneticProgramming, rand_tree, Var, Const, BinOp, UnaryOp
50
+ from .grammar_gp import (GrammarGP, GrammaticalEvolution, GNode,
51
+ rand_grammar_tree, make_math_grammar, MATH_GRAMMAR,
52
+ subtree_crossover, subtree_mutation, point_mutation)
53
+ from .linear_gp import (LinearGP, Instruction, Program,
54
+ rand_program, effective_program, effective_instructions,
55
+ linear_crossover, micro_mutation, macro_mutation,
56
+ to_expr_string, execute, OPERATIONS, FAST_OPS)
57
+ from .particle_swarm import ParticleSwarmOptimiser, DifferentialEvolution, CMAES
58
+ from .online_classifier import OnlineBayesClassifier
59
+ from .scalable_kmeans import ScalableKMeans, HierarchicalKMeans
60
+ from .mini_neural_net import MiniNeuralNet
61
+ from .deep_networks import Conv1D, SimpleRNN, GRUCell, AntColonyOptimiser
62
+ from .svm_smo import SVM, linear_kernel, rbf_kernel, poly_kernel
63
+ from .adaboost import AdaBoost, DecisionStump
64
+ from .q_learning import QLearner, SARSALearner, FunctionApproxQLearner
65
+ from .multi_armed_bandit import EpsilonGreedy, UCB1, ThompsonSampling, EXP3, LinUCB
66
+ from .bayesian_optimiser import GaussianEDA, BayesianOptimiser
67
+ from .simulated_annealing import SimulatedAnnealing, MultiObjectiveSA
68
+ from .decision_tree import DecisionTree, RandomForest
69
+ from .optimisers import SGD, RMSProp, Adam, AdamW, LRScheduler, numerical_gradient
70
+ from .hidden_markov import HiddenMarkovModel
71
+ from .neuroevolution import EvoNet, WeightEvolution, HybridNNEvolver, ACTIVATIONS
72
+ from .ensemble import VotingEnsemble, StackingEnsemble, BaggingEnsemble, WeightedEnsemble
73
+ from .knn import KNNClassifier, KNNRegressor, FastKNN, BallTree
74
+ from .dbscan import DBSCAN, OPTICS
75
+ from .gradient_boosting import GBMRegressor, GBMClassifier
76
+ from .kalman_filter import KalmanFilter, ExtendedKalmanFilter
77
+ from .gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier, rbf_kernel as gp_rbf, matern32_kernel, periodic_kernel
78
+ from .spectral_clustering import SpectralClustering, rbf_affinity, knn_affinity
79
+ from .mean_shift import MeanShift, KernelDensityEstimator, estimate_bandwidth
80
+ from .isolation_forest import IsolationForest, OneClassSVM
81
+ from .bayesian_network import BayesianNetwork, NaiveBayes, CPT
82
+ from .vector_quantisation import LBG, AdaptiveVQ, ProductQuantiser
83
+ from .actor_critic import A2C, PPOLite, LinearActor, LinearCritic
84
+ from .word_embeddings import Word2Vec, PMIEmbeddings, build_cooccurrence, tokenise
85
+ from .svd_reducer import randomised_svd, pca as randomised_pca, transform as svd_transform
86
+ from .em_clustering import EMClustering, GaussianComponent
87
+ from .text_classifier import TextPipeline, TFIDFVectoriser, NaiveBayesClassifier
88
+ from .collaborative_filter import MemoryCF, BayesianCF
89
+ from .feature_engineering import (
90
+ PCA, StandardScaler, MinMaxScaler,
91
+ variance_threshold, correlation_filter,
92
+ mi_feature_ranking, forward_feature_selection,
93
+ )
94
+
95
+ __all__ = [
96
+ # Evolutionary / Global Optimisation
97
+ "GeneticAlgorithm", "Chromosome", "make_population",
98
+ "GeneticProgramming", "rand_tree", "Var", "Const", "BinOp", "UnaryOp",
99
+ # Grammar-Guided GP / Grammatical Evolution
100
+ "GrammarGP", "GrammaticalEvolution", "GNode",
101
+ "rand_grammar_tree", "make_math_grammar", "MATH_GRAMMAR",
102
+ "subtree_crossover", "subtree_mutation", "point_mutation",
103
+ # Linear GP
104
+ "LinearGP", "Instruction", "Program",
105
+ "rand_program", "effective_program", "effective_instructions",
106
+ "linear_crossover", "micro_mutation", "macro_mutation",
107
+ "to_expr_string", "execute", "OPERATIONS", "FAST_OPS",
108
+ "ParticleSwarmOptimiser", "DifferentialEvolution", "CMAES",
109
+ "GaussianEDA", "BayesianOptimiser",
110
+ "SimulatedAnnealing", "MultiObjectiveSA",
111
+ "AntColonyOptimiser",
112
+ # Neural Networks
113
+ "MiniNeuralNet",
114
+ "Conv1D", "SimpleRNN", "GRUCell",
115
+ "EvoNet", "WeightEvolution", "HybridNNEvolver", "ACTIVATIONS",
116
+ # Gradient Optimisers
117
+ "SGD", "RMSProp", "Adam", "AdamW", "LRScheduler", "numerical_gradient",
118
+ # Classifiers
119
+ "OnlineBayesClassifier",
120
+ "SVM", "linear_kernel", "rbf_kernel", "poly_kernel",
121
+ "AdaBoost", "DecisionStump",
122
+ "DecisionTree", "RandomForest",
123
+ # Ensembles
124
+ "VotingEnsemble", "StackingEnsemble", "BaggingEnsemble", "WeightedEnsemble",
125
+ # Clustering
126
+ "ScalableKMeans", "HierarchicalKMeans",
127
+ # Reinforcement Learning / Bandits
128
+ "QLearner", "SARSALearner", "FunctionApproxQLearner",
129
+ "EpsilonGreedy", "UCB1", "ThompsonSampling", "EXP3", "LinUCB",
130
+ # Sequential Models
131
+ "HiddenMarkovModel",
132
+ # Nearest Neighbours
133
+ "KNNClassifier", "KNNRegressor", "FastKNN", "BallTree",
134
+ # Density Clustering
135
+ "DBSCAN", "OPTICS",
136
+ # Gradient Boosting
137
+ "GBMRegressor", "GBMClassifier",
138
+ # Sequential / State Estimation
139
+ "KalmanFilter", "ExtendedKalmanFilter",
140
+ # Gaussian Processes
141
+ "GaussianProcessRegressor", "GaussianProcessClassifier",
142
+ "gp_rbf", "matern32_kernel", "periodic_kernel",
143
+ # Graph / Spectral Clustering
144
+ "SpectralClustering", "rbf_affinity", "knn_affinity",
145
+ # Density Estimation / Mode Finding
146
+ "MeanShift", "KernelDensityEstimator", "estimate_bandwidth",
147
+ # Anomaly Detection
148
+ "IsolationForest", "OneClassSVM",
149
+ # Probabilistic Graphical Models
150
+ "BayesianNetwork", "NaiveBayes", "CPT",
151
+ # Vector Quantisation
152
+ "LBG", "AdaptiveVQ", "ProductQuantiser",
153
+ # Policy Gradient RL
154
+ "A2C", "PPOLite", "LinearActor", "LinearCritic",
155
+ # Word Embeddings / NLP
156
+ "Word2Vec", "PMIEmbeddings", "build_cooccurrence", "tokenise",
157
+ # Dimensionality Reduction
158
+ "randomised_svd", "randomised_pca", "svd_transform",
159
+ # Clustering (additional)
160
+ "EMClustering", "GaussianComponent",
161
+ # Text / NLP
162
+ "TextPipeline", "TFIDFVectoriser", "NaiveBayesClassifier",
163
+ # Recommender Systems
164
+ "MemoryCF", "BayesianCF",
165
+ # Feature Engineering
166
+ "PCA", "StandardScaler", "MinMaxScaler",
167
+ "variance_threshold", "correlation_filter",
168
+ "mi_feature_ranking", "forward_feature_selection",
169
+ ]
@@ -0,0 +1,304 @@
1
+ """
2
+ Actor-Critic and Advantage Actor-Critic (A2C) reinforcement learning.
3
+
4
+ Patent basis: US6985172 (Southwest Research Institute, filed 2002, expired 2022)
5
+ "Q-learning with linear reward shaping" — 145 forward citations.
6
+ Covers concurrent value-function and policy optimisation where a critic
7
+ baseline reduces variance in policy gradient estimates.
8
+ """
9
+
10
+ import math
11
+ import random
12
+ from typing import List, Tuple, Optional, Callable, Dict
13
+
14
+
15
+ def _softmax(logits: List[float]) -> List[float]:
16
+ m = max(logits)
17
+ exps = [math.exp(x - m) for x in logits]
18
+ total = sum(exps)
19
+ return [e / total for e in exps]
20
+
21
+ def _log_softmax(logits: List[float]) -> List[float]:
22
+ probs = _softmax(logits)
23
+ return [math.log(max(p, 1e-300)) for p in probs]
24
+
25
+ def _relu(x: float) -> float:
26
+ return max(0.0, x)
27
+
28
+ def _drelu(x: float) -> float:
29
+ return 1.0 if x > 0 else 0.0
30
+
31
+
32
+ # ── Simple linear actor/critic ─────────────────────────────────────────────────
33
+
34
+ class LinearActor:
35
+ """
36
+ Linear policy: pi(a|s) = softmax(W_a @ s + b_a).
37
+ """
38
+
39
+ def __init__(self, n_states: int, n_actions: int, seed: int = 42):
40
+ rng = random.Random(seed)
41
+ scale = 0.1
42
+ self.W = [[rng.gauss(0, scale) for _ in range(n_states)] for _ in range(n_actions)]
43
+ self.b = [0.0] * n_actions
44
+ self.n_states = n_states
45
+ self.n_actions = n_actions
46
+
47
+ def logits(self, state: List[float]) -> List[float]:
48
+ return [sum(self.W[a][s] * state[s] for s in range(self.n_states)) + self.b[a]
49
+ for a in range(self.n_actions)]
50
+
51
+ def probs(self, state: List[float]) -> List[float]:
52
+ return _softmax(self.logits(state))
53
+
54
+ def select_action(self, state: List[float], rng: random.Random) -> int:
55
+ probs = self.probs(state)
56
+ r = rng.random()
57
+ cumulative = 0.0
58
+ for a, p in enumerate(probs):
59
+ cumulative += p
60
+ if r <= cumulative:
61
+ return a
62
+ return len(probs) - 1
63
+
64
+ def update(self, state: List[float], action: int, advantage: float, lr: float) -> None:
65
+ """Policy gradient update: theta += lr * advantage * grad log pi(a|s)."""
66
+ probs = self.probs(state)
67
+ for a in range(self.n_actions):
68
+ grad = (1.0 - probs[a]) if a == action else -probs[a]
69
+ for s in range(self.n_states):
70
+ self.W[a][s] += lr * advantage * grad * state[s]
71
+ self.b[a] += lr * advantage * grad
72
+
73
+
74
+ class LinearCritic:
75
+ """
76
+ Linear value function: V(s) = w_v @ s + b_v.
77
+ Trained by TD(0): V(s) ≈ r + gamma * V(s').
78
+ """
79
+
80
+ def __init__(self, n_states: int, seed: int = 42):
81
+ rng = random.Random(seed)
82
+ self.w = [rng.gauss(0, 0.1) for _ in range(n_states)]
83
+ self.b = 0.0
84
+ self.n_states = n_states
85
+
86
+ def value(self, state: List[float]) -> float:
87
+ return sum(self.w[i] * state[i] for i in range(self.n_states)) + self.b
88
+
89
+ def update(self, state: List[float], target: float, lr: float) -> float:
90
+ """MSE gradient step. Returns TD error."""
91
+ v = self.value(state)
92
+ td = target - v
93
+ for i in range(self.n_states):
94
+ self.w[i] += lr * td * state[i]
95
+ self.b += lr * td
96
+ return td
97
+
98
+
99
+ # ── Advantage Actor-Critic (A2C) ───────────────────────────────────────────────
100
+
101
+ class A2C:
102
+ """
103
+ Advantage Actor-Critic (A2C).
104
+
105
+ Critic estimates V(s). Advantage A(s,a) = r + gamma*V(s') - V(s).
106
+ Actor is updated with policy gradient weighted by advantage.
107
+ Advantage reduces variance compared to pure REINFORCE.
108
+
109
+ Can work with any environment exposing (state, reward, done, info) = step(action).
110
+ """
111
+
112
+ def __init__(
113
+ self,
114
+ n_states: int,
115
+ n_actions: int,
116
+ lr_actor: float = 0.01,
117
+ lr_critic: float = 0.05,
118
+ gamma: float = 0.99,
119
+ entropy_coef: float = 0.01,
120
+ seed: int = 42,
121
+ ):
122
+ self.actor = LinearActor(n_states, n_actions, seed=seed)
123
+ self.critic = LinearCritic(n_states, seed=seed + 1)
124
+ self.lr_actor = lr_actor
125
+ self.lr_critic = lr_critic
126
+ self.gamma = gamma
127
+ self.entropy = entropy_coef
128
+ self._rng = random.Random(seed)
129
+ self.episode_rewards: List[float] = []
130
+
131
+ def select_action(self, state: List[float]) -> int:
132
+ return self.actor.select_action(state, self._rng)
133
+
134
+ def update(
135
+ self,
136
+ state: List[float],
137
+ action: int,
138
+ reward: float,
139
+ next_state: List[float],
140
+ done: bool,
141
+ ) -> float:
142
+ """Single-step TD update. Returns advantage."""
143
+ v_next = 0.0 if done else self.critic.value(next_state)
144
+ td_target = reward + self.gamma * v_next
145
+ advantage = self.critic.update(state, td_target, self.lr_critic)
146
+ # Entropy regularisation: add small gradient toward uniform policy
147
+ probs = self.actor.probs(state)
148
+ for a in range(self.actor.n_actions):
149
+ entropy_grad = -math.log(max(probs[a], 1e-9)) - 1
150
+ self.actor.b[a] += self.lr_actor * self.entropy * entropy_grad
151
+ # Policy gradient
152
+ self.actor.update(state, action, advantage, self.lr_actor)
153
+ return advantage
154
+
155
+ def train(
156
+ self,
157
+ env_fn: Callable,
158
+ n_episodes: int = 500,
159
+ max_steps: int = 200,
160
+ ) -> List[float]:
161
+ """
162
+ Train for n_episodes.
163
+
164
+ env_fn() must return an object with:
165
+ .reset() -> state (list of floats)
166
+ .step(action) -> (next_state, reward, done)
167
+ """
168
+ self.episode_rewards = []
169
+ for ep in range(n_episodes):
170
+ env = env_fn()
171
+ state = env.reset()
172
+ total = 0.0
173
+ for _ in range(max_steps):
174
+ action = self.select_action(state)
175
+ next_state, reward, done = env.step(action)
176
+ self.update(state, action, reward, next_state, done)
177
+ total += reward
178
+ state = next_state
179
+ if done: break
180
+ self.episode_rewards.append(total)
181
+ return self.episode_rewards
182
+
183
+
184
+ # ── PPO-lite (clip-based policy optimisation) ──────────────────────────────────
185
+
186
+ class PPOLite:
187
+ """
188
+ Proximal Policy Optimisation (lite version) — clipped surrogate objective.
189
+
190
+ Collects a batch of transitions, computes advantages, then does k_epochs
191
+ of gradient updates with clipping to prevent too-large policy updates.
192
+ """
193
+
194
+ def __init__(
195
+ self,
196
+ n_states: int,
197
+ n_actions: int,
198
+ lr: float = 0.003,
199
+ gamma: float = 0.99,
200
+ clip_eps: float = 0.2,
201
+ k_epochs: int = 4,
202
+ seed: int = 42,
203
+ ):
204
+ self.actor = LinearActor(n_states, n_actions, seed=seed)
205
+ self.critic = LinearCritic(n_states, seed=seed + 1)
206
+ self.lr = lr
207
+ self.gamma = gamma
208
+ self.clip_eps = clip_eps
209
+ self.k_epochs = k_epochs
210
+ self._rng = random.Random(seed)
211
+ # Replay buffer
212
+ self._states: List[List[float]] = []
213
+ self._actions: List[int] = []
214
+ self._rewards: List[float] = []
215
+ self._dones: List[bool] = []
216
+ self._old_logprobs: List[float] = []
217
+
218
+ def select_action(self, state: List[float]) -> Tuple[int, float]:
219
+ probs = self.actor.probs(state)
220
+ action = self.actor.select_action(state, self._rng)
221
+ log_prob = math.log(max(probs[action], 1e-300))
222
+ return action, log_prob
223
+
224
+ def store(self, state, action, reward, done, log_prob):
225
+ self._states.append(state)
226
+ self._actions.append(action)
227
+ self._rewards.append(reward)
228
+ self._dones.append(done)
229
+ self._old_logprobs.append(log_prob)
230
+
231
+ def update(self) -> None:
232
+ """Compute returns, advantages, then run k_epochs of PPO clip update."""
233
+ n = len(self._rewards)
234
+ if n == 0: return
235
+ # Compute discounted returns
236
+ returns = [0.0] * n
237
+ running = 0.0
238
+ for i in reversed(range(n)):
239
+ if self._dones[i]: running = 0.0
240
+ running = self._rewards[i] + self.gamma * running
241
+ returns[i] = running
242
+
243
+ for _ in range(self.k_epochs):
244
+ for i in range(n):
245
+ state = self._states[i]
246
+ action = self._actions[i]
247
+ ret = returns[i]
248
+ v = self.critic.value(state)
249
+ adv = ret - v
250
+ # Normalise advantage
251
+ adv /= (abs(adv) + 1.0)
252
+ # Critic update
253
+ self.critic.update(state, ret, self.lr)
254
+ # Actor: clipped surrogate
255
+ new_probs = self.actor.probs(state)
256
+ new_logprob = math.log(max(new_probs[action], 1e-300))
257
+ old_logprob = self._old_logprobs[i]
258
+ ratio = math.exp(new_logprob - old_logprob)
259
+ ratio_clip = max(1 - self.clip_eps, min(1 + self.clip_eps, ratio))
260
+ # Gradient from min(ratio*adv, clipped*adv)
261
+ if adv >= 0:
262
+ effective_ratio = min(ratio, 1 + self.clip_eps)
263
+ else:
264
+ effective_ratio = max(ratio, 1 - self.clip_eps)
265
+ self.actor.update(state, action, adv * effective_ratio / (ratio + 1e-9), self.lr)
266
+
267
+ # Clear buffer
268
+ self._states.clear(); self._actions.clear(); self._rewards.clear()
269
+ self._dones.clear(); self._old_logprobs.clear()
270
+
271
+
272
+ if __name__ == "__main__":
273
+ # Simple environment: pole balancing proxy via CartPole-like reward signal
274
+ class GridWalk:
275
+ """1D grid: state=[pos], actions=[left,right], goal at pos=5."""
276
+ def __init__(self): self.pos = 2
277
+ def reset(self): self.pos = 2; return [self.pos / 10.0]
278
+ def step(self, action):
279
+ self.pos += 1 if action == 1 else -1
280
+ self.pos = max(0, min(9, self.pos))
281
+ done = self.pos == 5
282
+ reward = 1.0 if done else -0.01
283
+ return [self.pos / 10.0], reward, done
284
+
285
+ a2c = A2C(n_states=1, n_actions=2, lr_actor=0.05, lr_critic=0.1, gamma=0.99, seed=0)
286
+ rewards = a2c.train(GridWalk, n_episodes=300, max_steps=50)
287
+ last50 = sum(rewards[-50:]) / 50
288
+ print(f"A2C GridWalk: last-50 mean reward = {last50:.3f} (expect > 0.8)")
289
+
290
+ ppo = PPOLite(n_states=1, n_actions=2, lr=0.05, gamma=0.99, seed=1)
291
+ rng = random.Random(42)
292
+ ep_rewards = []
293
+ for ep in range(200):
294
+ env = GridWalk(); state = env.reset(); total = 0.0
295
+ for _ in range(30):
296
+ action, lp = ppo.select_action(state)
297
+ ns, r, done = env.step(action)
298
+ ppo.store(state, action, r, done, lp)
299
+ total += r; state = ns
300
+ if done: break
301
+ ep_rewards.append(total)
302
+ if (ep + 1) % 10 == 0: ppo.update()
303
+ last50_ppo = sum(ep_rewards[-50:]) / 50
304
+ print(f"PPO-lite GridWalk: last-50 mean reward = {last50_ppo:.3f} (expect > 0.8)")
patentml/adaboost.py ADDED
@@ -0,0 +1,168 @@
1
+ """
2
+ AdaBoost — Adaptive Boosting with Feature Selection
3
+ Public Domain Implementation
4
+
5
+ Derived from expired patents:
6
+ US7421415 (Siemens Corporate Research, filed 2005)
7
+ "Methods and systems for 3D object detection using learning"
8
+ "using a learning procedure for feature selection based on boosting"
9
+ Expired 2025. Public domain.
10
+
11
+ Also draws from:
12
+ US6816847 (Microsoft, filed 1999, 72 cites) — ensemble of classifiers
13
+ "training a classifier comprises training one of a bayesian classifier,
14
+ a support vector machine, a neural net classifier, and a decision tree"
15
+
16
+ AdaBoost: Freund & Schapire 1997 — theoretical basis, Siemens patent
17
+ covers the application to feature selection in high-dimensional spaces.
18
+ """
19
+ import math
20
+ import random
21
+ from typing import Callable, List, Optional, Tuple
22
+
23
+
24
+ class DecisionStump:
25
+ """
26
+ Weak learner: single-feature threshold classifier.
27
+ US7421415: "a weak learning algorithm applied at each boosting round"
28
+ """
29
+ def __init__(self):
30
+ self.feature_idx: int = 0
31
+ self.threshold: float = 0.0
32
+ self.polarity: int = 1 # 1 or -1
33
+ self.alpha: float = 0.0
34
+
35
+ def fit(self, X: List[List[float]], y: List[int],
36
+ weights: List[float]) -> float:
37
+ """Find best feature+threshold minimising weighted error."""
38
+ n_features = len(X[0])
39
+ best_error = float("inf")
40
+
41
+ for fi in range(n_features):
42
+ vals = sorted(set(x[fi] for x in X))
43
+ thresholds = [(vals[i] + vals[i+1]) / 2
44
+ for i in range(len(vals) - 1)]
45
+ if not thresholds:
46
+ thresholds = [vals[0]]
47
+
48
+ for thresh in thresholds:
49
+ for polarity in (1, -1):
50
+ preds = [polarity if x[fi] <= thresh else -polarity
51
+ for x in X]
52
+ error = sum(w for w, p, yi in zip(weights, preds, y)
53
+ if p != yi)
54
+ if error < best_error:
55
+ best_error = error
56
+ self.feature_idx = fi
57
+ self.threshold = thresh
58
+ self.polarity = polarity
59
+
60
+ return best_error
61
+
62
+ def predict(self, X: List[List[float]]) -> List[int]:
63
+ return [self.polarity if x[self.feature_idx] <= self.threshold
64
+ else -self.polarity for x in X]
65
+
66
+
67
+ class AdaBoost:
68
+ """
69
+ Adaptive Boosting with feature selection.
70
+
71
+ US7421415: "for each boosting round t:
72
+ - apply weak learning algorithm to find feature with minimum error
73
+ - compute alpha_t = 0.5 * ln((1-e_t)/e_t)
74
+ - update weights: w_{t+1} = w_t * exp(-alpha_t * y_i * h_t(x_i))
75
+ - normalise weights"
76
+
77
+ The ensemble is: H(x) = sign(sum_t alpha_t * h_t(x))
78
+ """
79
+
80
+ def __init__(
81
+ self,
82
+ n_estimators: int = 50,
83
+ random_seed: Optional[int] = None,
84
+ ):
85
+ self.T = n_estimators
86
+ self.stumps: List[DecisionStump] = []
87
+ if random_seed:
88
+ random.seed(random_seed)
89
+
90
+ def fit(self, X: List[List[float]], y: List[int]) -> "AdaBoost":
91
+ """
92
+ US7421415 boosting loop:
93
+ "for each boosting round, train weak learner on weighted data"
94
+ """
95
+ n = len(X)
96
+ weights = [1.0 / n] * n
97
+
98
+ self.stumps = []
99
+ self.feature_importance = [0.0] * len(X[0])
100
+
101
+ for t in range(self.T):
102
+ stump = DecisionStump()
103
+ error = stump.fit(X, y, weights)
104
+
105
+ # Clip error to avoid log(0)
106
+ error = max(1e-10, min(1 - 1e-10, error))
107
+
108
+ # Alpha: confidence of this stump
109
+ stump.alpha = 0.5 * math.log((1 - error) / error)
110
+
111
+ # Track feature importance (US7421415 feature selection)
112
+ self.feature_importance[stump.feature_idx] += abs(stump.alpha)
113
+
114
+ # Update weights: higher weight for misclassified examples
115
+ preds = stump.predict(X)
116
+ new_weights = [
117
+ w * math.exp(-stump.alpha * yi * pi)
118
+ for w, yi, pi in zip(weights, y, preds)
119
+ ]
120
+ total = sum(new_weights)
121
+ weights = [w / total for w in new_weights]
122
+
123
+ self.stumps.append(stump)
124
+
125
+ # Normalise feature importance
126
+ total_imp = sum(self.feature_importance) or 1.0
127
+ self.feature_importance = [f / total_imp for f in self.feature_importance]
128
+
129
+ return self
130
+
131
+ def predict_score(self, X: List[List[float]]) -> List[float]:
132
+ """Return raw ensemble scores (positive = class +1)."""
133
+ scores = [0.0] * len(X)
134
+ for stump in self.stumps:
135
+ preds = stump.predict(X)
136
+ for i, p in enumerate(preds):
137
+ scores[i] += stump.alpha * p
138
+ return scores
139
+
140
+ def predict(self, X: List[List[float]]) -> List[int]:
141
+ return [1 if s >= 0 else -1 for s in self.predict_score(X)]
142
+
143
+ def top_features(self, n: int = 5) -> List[Tuple[int, float]]:
144
+ """Return (feature_index, importance) sorted by importance."""
145
+ ranked = sorted(enumerate(self.feature_importance),
146
+ key=lambda x: -x[1])
147
+ return ranked[:n]
148
+
149
+
150
+ if __name__ == "__main__":
151
+ random.seed(42)
152
+
153
+ # Generate linearly separable data with irrelevant features
154
+ X = [[x1, x2, random.random(), random.random()]
155
+ for x1 in [0.0, 0.5, 1.0, 1.5, 2.0]
156
+ for x2 in [0.0, 0.5, 1.0, 1.5, 2.0]]
157
+ y = [1 if x[0] + x[1] > 2.0 else -1 for x in X]
158
+
159
+ ab = AdaBoost(n_estimators=20, random_seed=42)
160
+ ab.fit(X, y)
161
+ preds = ab.predict(X)
162
+ acc = sum(1 for p, yi in zip(preds, y) if p == yi) / len(y)
163
+
164
+ print("AdaBoost with feature selection (US7421415, Siemens, expired 2025):")
165
+ print(f" Accuracy: {acc:.1%}")
166
+ print(f" Feature importance: {[round(f, 3) for f in ab.feature_importance]}")
167
+ print(f" Top features: {ab.top_features(2)}")
168
+ print(f" (Features 0+1 should dominate; 2+3 are noise)")