PyPI - evolutionary-policy-optimization - Versions diffs - 0.2.3__tar.gz → 0.2.5__tar.gz - Mend

evolutionary-policy-optimization 0.2.3tar.gz → 0.2.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{evolutionary_policy_optimization-0.2.3 → evolutionary_policy_optimization-0.2.5}/.gitignore RENAMED Viewed

@@ -1,3 +1,5 @@
+data/
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

{evolutionary_policy_optimization-0.2.3 → evolutionary_policy_optimization-0.2.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.2.3
+Version: 0.2.5
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -47,6 +47,9 @@ Provides-Extra: examples
 Requires-Dist: numpy; extra == 'examples'
 Requires-Dist: pufferlib>=2.0.6; extra == 'examples'
 Requires-Dist: tqdm; extra == 'examples'
+Provides-Extra: experimental
+Requires-Dist: tensordict; extra == 'experimental'
+Requires-Dist: torchvision; extra == 'experimental'
 Provides-Extra: test
 Requires-Dist: pytest; extra == 'test'
 Requires-Dist: ruff>=0.4.2; extra == 'test'
@@ -56,7 +59,7 @@ Description-Content-Type: text/markdown
 ## Evolutionary Policy Optimization
-Pytorch implementation of [Evolutionary Policy Optimization](https://web3.arxiv.org/abs/2503.19037), from Wang et al. of the Robotics Institute at Carnegie Mellon University
+Pytorch implementation of [Evolutionary Policy Optimization](https://web3.arxiv.org/abs/2503.19037), from [Wang](https://www.jianrenw.com/) et al. of the Robotics Institute at Carnegie Mellon University
 This paper stands out, as I have witnessed the positive effects first hand in an [exploratory project](https://github.com/lucidrains/firefly-torch) (mixing evolution with gradient based methods). Perhaps the Alexnet moment for genetic algorithms has not come to pass yet.

{evolutionary_policy_optimization-0.2.3 → evolutionary_policy_optimization-0.2.5}/README.md RENAMED Viewed

@@ -2,7 +2,7 @@
 ## Evolutionary Policy Optimization
-Pytorch implementation of [Evolutionary Policy Optimization](https://web3.arxiv.org/abs/2503.19037), from Wang et al. of the Robotics Institute at Carnegie Mellon University
+Pytorch implementation of [Evolutionary Policy Optimization](https://web3.arxiv.org/abs/2503.19037), from [Wang](https://www.jianrenw.com/) et al. of the Robotics Institute at Carnegie Mellon University
 This paper stands out, as I have witnessed the positive effects first hand in an [exploratory project](https://github.com/lucidrains/firefly-torch) (mixing evolution with gradient based methods). Perhaps the Alexnet moment for genetic algorithms has not come to pass yet.

evolutionary_policy_optimization-0.2.5/evolutionary_policy_optimization/experimental.py ADDED Viewed

@@ -0,0 +1,198 @@
+from random import uniform
+import torch
+import torch.nn.functional as F
+from torch.func import vmap, functional_call
+from torch.nn import Module, ParameterList
+from einops import rearrange, reduce, repeat
+def exists(v):
+    return v is not None
+def l2norm(t, dim = -1):
+    return F.normalize(t, dim = dim)
+def crossover_weights(w1, w2):
+    assert w2.shape == w2.shape
+    no_batch = w1.ndim == 2
+    if no_batch:
+        w1, w2 = tuple(rearrange(t, '... -> 1 ...') for t in (w1, w2))
+    assert w1.ndim == 3
+    i, j = w1.shape[-2:]
+    transpose = i < j
+    if transpose:
+        w1, w2 = tuple(rearrange(t, 'b i j -> b j i') for t in (w1, w2))
+    rank = min(w2.shape[1:])
+    assert rank >= 2
+    batch = w1.shape[0]
+    u1, s1, v1 = torch.svd(w1)
+    u2, s2, v2 = torch.svd(w2)
+    batch_randperm = torch.randn((batch, rank), device = w1.device).argsort(dim = -1)
+    mask = batch_randperm < (rank // 2)
+    u = torch.where(mask[:, None, :], u1, u2)
+    s = torch.where(mask, s1, s2)
+    v = torch.where(mask[:, :, None], v1, v2)
+    out = u @ torch.diag_embed(s) @ v.mT
+    if transpose:
+        out = rearrange(out, 'b j i -> b i j')
+    if no_batch:
+        out = rearrange(out, '1 ... -> ...')
+    return out
+def mutate_weight(
+    w,
+    mutation_strength = 1.
+):
+    i, j = w.shape[-2:]
+    transpose = i < j
+    if transpose:
+        w = w.transpose(-1, -2)
+    rank = min(w.shape[1:])
+    assert rank >= 2
+    u, s, v = torch.svd(w)
+    u = u + torch.randn_like(u) * mutation_strength
+    v = v + torch.randn_like(v) * mutation_strength
+    u = l2norm(u, dim = -2)
+    v = l2norm(v, dim = -1)
+    out = u @ torch.diag_embed(s) @ v.mT
+    if transpose:
+        out = out.transpose(-1, -2)
+    return out
+# wrapper that manages network to population
+# able to receive fitness and employ selection + crossover
+class PopulationWrapper(Module):
+    def __init__(
+        self,
+        net: Module,
+        pop_size,
+        num_selected,
+        tournament_size,
+        learning_rate = 1e-3,
+        init_std_dev = 1e-1
+    ):
+        super().__init__()
+        assert num_selected < pop_size
+        assert tournament_size < num_selected
+        self.num_selected = num_selected
+        self.tournament_size = tournament_size
+        self.num_offsprings = pop_size - num_selected
+        self.net = net
+        params = dict(net.named_parameters())
+        device = next(iter(params.values())).device
+        pop_params = {name: (torch.randn((pop_size, *param.shape), device = device) * init_std_dev).requires_grad_() for name, param in params.items()}
+        self.param_names = pop_params.keys()
+        self.param_values = ParameterList(list(pop_params.values()))
+        def _forward(params, data):
+            return functional_call(net, params, data)
+        self.forward_pop_nets = vmap(_forward, in_dims = (0, None))
+    @property
+    def pop_params(self):
+        return dict(zip(self.param_names, self.param_values))
+    def parameters(self):
+        return self.pop_params.values()
+    def genetic_algorithm_step_(
+        self,
+        fitnesses
+    ):
+        fitnesses = reduce(fitnesses, 'b p -> p', 'mean') # average across samples
+        num_selected = self.num_selected
+        # selection
+        sel_fitnesses, sel_indices = fitnesses.topk(num_selected, dim = -1)
+        # tournaments
+        tourn_ids = torch.randn((self.num_offsprings, self.tournament_size)).argsort(dim = -1)
+        tourn_scores = sel_fitnesses[tourn_ids]
+        winner_ids = tourn_scores.topk(2, dim = -1).indices
+        winner_ids = rearrange(winner_ids, 'offsprings couple -> couple offsprings')
+        parent_ids = sel_indices[winner_ids]
+        # crossover
+        for param in self.param_values:
+            parents = param[sel_indices]
+            parent1, parent2 = param[parent_ids]
+            children = parent1.lerp_(parent2, uniform(0.25, 0.75))
+            pop = torch.cat((parents, children))
+            param.data.copy_(pop)
+    def forward(
+        self,
+        data,
+        labels = None,
+        return_logits_with_loss = False
+    ):
+        out = self.forward_pop_nets(dict(self.pop_params), data)
+        if not exists(labels):
+            return out
+        logits = out
+        pop_size = logits.shape[0]
+        losses = F.cross_entropy(
+            rearrange(logits, 'p b ... l -> (p b) l ...'),
+            repeat(labels, 'b ... -> (p b) ...', p = pop_size),
+            reduction = 'none'
+        )
+        losses = rearrange(losses, '(p b) ... -> p b ...', p = pop_size)
+        if not return_logits_with_loss:
+            return losses
+        return losses, logits
+# test
+if __name__ == '__main__':
+    w1 = torch.randn(2, 32, 16)
+    w2 = torch.randn(2, 32, 16)
+    child = crossover_weights(w1, w2)
+    mutated_w1 = mutate_weight(w1)
+    assert child.shape == w2.shape

{evolutionary_policy_optimization-0.2.3 → evolutionary_policy_optimization-0.2.5}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "evolutionary-policy-optimization"
-version = "0.2.3"
+version = "0.2.5"
 description = "EPO - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -46,6 +46,11 @@ examples = [
     "tqdm",
 ]
+experimental = [
+    "tensordict",
+    "torchvision"
+]
 test = [
     "pytest",
     "ruff>=0.4.2",

evolutionary_policy_optimization-0.2.5/train_crossover_weight_space.py ADDED Viewed

@@ -0,0 +1,146 @@
+from random import uniform
+import torch
+from torch import nn, tensor, randn
+import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader
+from torch.optim import Adam
+import torchvision
+import torchvision.transforms as T
+from einops.layers.torch import Rearrange
+from einops import repeat, rearrange
+from evolutionary_policy_optimization.experimental import PopulationWrapper
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+def divisible_by(num, den):
+    return (num % den) == 0
+#data
+class MnistDataset(Dataset):
+    def __init__(self, train):
+        self.mnist = torchvision.datasets.MNIST('./data/mnist', train = train, download = True)
+    def __len__(self):
+        return len(self.mnist)
+    def __getitem__(self, idx):
+        pil, labels = self.mnist[idx]
+        digit_tensor = T.PILToTensor()(pil)
+        return (digit_tensor / 255.).float().to(device), tensor(labels, device = device)
+batch = 32
+train_dataset = MnistDataset(train = True)
+dl = DataLoader(train_dataset, batch_size = batch, shuffle = True, drop_last = True)
+eval_dataset = MnistDataset(train = False)
+eval_dl = DataLoader(eval_dataset, batch_size = batch, shuffle = True, drop_last = True)
+def cycle(dl):
+    while True:
+        for batch in dl:
+            yield batch
+# network
+net = nn.Sequential(
+    Rearrange('... c h w -> ... (c h w)'),
+    nn.Linear(784, 64, bias = False),
+    nn.ReLU(),
+    nn.Linear(64, 10, bias = False),
+).to(device)
+# regular gradient descent
+optim = Adam(net.parameters(), lr = 1e-3)
+iter_train_dl = cycle(dl)
+iter_eval_dl = cycle(eval_dl)
+for i in range(1000):
+    data, labels = next(iter_train_dl)
+    logits = net(data)
+    loss = F.cross_entropy(logits, labels)
+    loss.backward()
+    print(f'{i}: {loss.item():.3f}')
+    optim.step()
+    optim.zero_grad()
+    if divisible_by(i + 1, 100):
+        with torch.no_grad():
+            eval_data, labels = next(iter_eval_dl)
+            logits = net(eval_data)
+            eval_loss = F.cross_entropy(logits, labels)
+            total = labels.shape[0]
+            correct = (logits.argmax(dim = -1) == labels).long().sum().item()
+            print(f'{i}: eval loss: {eval_loss.item():.3f}')
+            print(f'{i}: accuracy: {correct} / {total}')
+# periodic crossover from genetic algorithm on population of networks
+# pop stands for population
+pop_size = 100
+learning_rate = 3e-4
+pop_net = PopulationWrapper(
+    net,
+    pop_size = pop_size,
+    num_selected = 25,
+    tournament_size = 5,
+    learning_rate = 1e-3
+)
+optim = Adam(pop_net.parameters(), lr = learning_rate)
+for i in range(1000):
+    pop_net.train()
+    data, labels = next(iter_train_dl)
+    losses = pop_net(data, labels)
+    losses.sum(dim = 0).mean().backward()
+    print(f'{i}: loss: {losses.mean().item():.3f}')
+    optim.step()
+    optim.zero_grad()
+    # evaluate
+    if divisible_by(i + 1, 100):
+        with torch.no_grad():
+            pop_net.eval()
+            eval_data, labels = next(iter_eval_dl)
+            eval_loss, logits = pop_net(eval_data, labels, return_logits_with_loss = True)
+            total = labels.shape[0] * pop_size
+            correct = (logits.argmax(dim = -1) == labels).long().sum().item()
+            print(f'{i}: eval loss: {eval_loss.mean().item():.3f}')
+            print(f'{i}: accuracy: {correct} / {total}')
+            # genetic algorithm on population
+            fitnesses = 1. / eval_loss
+            pop_net.genetic_algorithm_step_(fitnesses)
+            # new optim
+            optim = Adam(pop_net.parameters(), lr = learning_rate)

evolutionary_policy_optimization-0.2.3/evolutionary_policy_optimization/experimental.py DELETED Viewed

@@ -1,80 +0,0 @@
-import torch
-import torch.nn.functional as F
-from einops import rearrange
-def l2norm(t, dim = -1):
-    return F.normalize(t, dim = dim)
-def crossover_weights(w1, w2, transpose = False):
-    assert w2.shape == w2.shape
-    no_batch = w1.ndim == 2
-    if no_batch:
-        w1, w2 = tuple(rearrange(t, '... -> 1 ...') for t in (w1, w2))
-    assert w1.ndim == 3
-    if transpose:
-        w1, w2 = tuple(rearrange(t, 'b i j -> b j i') for t in (w1, w2))
-    rank = min(w2.shape[1:])
-    assert rank >= 2
-    batch = w1.shape[0]
-    u1, s1, v1 = torch.svd(w1)
-    u2, s2, v2 = torch.svd(w2)
-    batch_randperm = torch.randn((batch, rank), device = w1.device).argsort(dim = -1)
-    mask = batch_randperm < (rank // 2)
-    u = torch.where(mask[:, None, :], u1, u2)
-    s = torch.where(mask, s1, s2)
-    v = torch.where(mask[:, :, None], v1, v2)
-    out = u @ torch.diag_embed(s) @ v.mT
-    if transpose:
-        out = rearrange(out, 'b j i -> b i j')
-    if no_batch:
-        out = rearrange(out, '1 ... -> ...')
-    return out
-def mutate_weight(
-    w,
-    transpose = False,
-    mutation_strength = 1.
-):
-    if transpose:
-        w = w.transpose(-1, -2)
-    rank = min(w2.shape[1:])
-    assert rank >= 2
-    u, s, v = torch.svd(w)
-    u = u + torch.randn_like(u) * mutation_strength
-    v = v + torch.randn_like(v) * mutation_strength
-    u = l2norm(u, dim = -2)
-    v = l2norm(v, dim = -1)
-    out = u @ torch.diag_embed(s) @ v.mT
-    if transpose:
-        out = out.transpose(-1, -2)
-    return out
-if __name__ == '__main__':
-    w1 = torch.randn(32, 16)
-    w2 = torch.randn(32, 16)
-    child = crossover_weights(w1, w2)
-    mutated_w1 = mutate_weight(w1)
-    assert child.shape == w2.shape