evolutionary-policy-optimization 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,6 +61,8 @@ def has_only_one_value(t):
61
61
  return (t == t[0]).all()
62
62
 
63
63
  def all_gather_variable_dim(t, dim = 0, sizes = None):
64
+ device = t.device
65
+
64
66
  if not exists(sizes):
65
67
  sizes = gather_sizes(t, dim = dim)
66
68
 
@@ -77,7 +79,7 @@ def all_gather_variable_dim(t, dim = 0, sizes = None):
77
79
  gathered_tensors = torch.cat(gathered_tensors, dim = dim)
78
80
  seq = torch.arange(max_size, device = device)
79
81
 
80
- mask = einx.less('j i -> (i j)', seq, sizes)
82
+ mask = einx.less('j, i -> (i j)', seq, sizes)
81
83
  seq = torch.arange(mask.shape[-1], device = device)
82
84
  indices = seq[mask]
83
85
 
@@ -644,11 +644,11 @@ class Critic(Module):
644
644
  loss_fn = partial(self.loss_fn, reduction = 'none')
645
645
 
646
646
  if use_improved:
647
- clipped_target = target.clamp(-eps_clip, eps_clip)
648
-
649
647
  old_values_lo = old_values - eps_clip
650
648
  old_values_hi = old_values + eps_clip
651
649
 
650
+ clipped_target = target.clamp(old_values_lo, old_values_hi)
651
+
652
652
  def is_between(lo, hi):
653
653
  return (lo < value) & (value < hi)
654
654
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.2.10
3
+ Version: 0.2.12
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -87,10 +87,10 @@ latent_pool = LatentGenePool(
87
87
  dim_latent = 32,
88
88
  )
89
89
 
90
- state = torch.randn(1, 512)
90
+ state = torch.randn(1, 32)
91
91
 
92
- actor = Actor(512, dim_hiddens = (256, 128), num_actions = 4, dim_latent = 32)
93
- critic = Critic(512, dim_hiddens = (256, 128, 64), dim_latent = 32)
92
+ actor = Actor(dim_state = 32, dim = 256, mlp_depth = 2, num_actions = 4, dim_latent = 32)
93
+ critic = Critic(dim_state = 32, dim = 256, mlp_depth = 3, dim_latent = 32)
94
94
 
95
95
  latent = latent_pool(latent_id = 2)
96
96
 
@@ -289,4 +289,15 @@ That's it
289
289
  }
290
290
  ```
291
291
 
292
+ ```bibtex
293
+ @article{Jiang2022GeneralIR,
294
+ title = {General intelligence requires rethinking exploration},
295
+ author = {Minqi Jiang and Tim Rocktaschel and Edward Grefenstette},
296
+ journal = {Royal Society Open Science},
297
+ year = {2022},
298
+ volume = {10},
299
+ url = {https://api.semanticscholar.org/CorpusID:253523156}
300
+ }
301
+ ```
302
+
292
303
  *Evolution is cleverer than you are.* - Leslie Orgel
@@ -1,10 +1,10 @@
1
1
  evolutionary_policy_optimization/__init__.py,sha256=NyiYDYU7DlpmOTM7xiBQET3r1WwX0ebrgMCBLSQrW3c,288
2
- evolutionary_policy_optimization/distributed.py,sha256=MxyxqxANAuOm8GYb0Yu09EHd_aVLhK2uwgrfuVWciPU,2342
2
+ evolutionary_policy_optimization/distributed.py,sha256=clN8Bvhb6JIQy2F8FTF312B0RN3iYoPkKjZreBtAwks,2366
3
3
  evolutionary_policy_optimization/env_wrappers.py,sha256=bDL06o9_b1iW6k3fw2xifnOnYlzs643tdW6Yv2gsIdw,803
4
- evolutionary_policy_optimization/epo.py,sha256=adUHDtgrXnSOoPLnbPy6xxGl6QLYxbN1mB_sl2KPwgI,53135
4
+ evolutionary_policy_optimization/epo.py,sha256=9zOGucA3jWODOdtk9lBfUAaScOnbVfTLqYV8_x1BhKk,53144
5
5
  evolutionary_policy_optimization/experimental.py,sha256=QZG0__wwFqHN_LJK7e-mHxlIL1mwjlvG6o6bcOpeAKg,6166
6
6
  evolutionary_policy_optimization/mock_env.py,sha256=TLyyRm6tOD0Kdn9QqJJQriaSnsR-YmNQHo4OohmZFG4,1410
7
- evolutionary_policy_optimization-0.2.10.dist-info/METADATA,sha256=dLg2z2BFihNUd0dirhDcabw9yNj0V_2v42Tk_Pg-EwY,9601
8
- evolutionary_policy_optimization-0.2.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- evolutionary_policy_optimization-0.2.10.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
10
- evolutionary_policy_optimization-0.2.10.dist-info/RECORD,,
7
+ evolutionary_policy_optimization-0.2.12.dist-info/METADATA,sha256=tZIKqUx-akzbJZWd0h0hM9F8FWbq6SjpJ7Vqstk4dNo,9972
8
+ evolutionary_policy_optimization-0.2.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ evolutionary_policy_optimization-0.2.12.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
10
+ evolutionary_policy_optimization-0.2.12.dist-info/RECORD,,