evolutionary-policy-optimization 0.1.9__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -146,6 +146,24 @@ def temp_batch_dim(fn):
146
146
 
147
147
  return inner
148
148
 
149
+ # plasticity related
150
+
151
+ def shrink_and_perturb_(
152
+ module,
153
+ shrink_factor = 0.5,
154
+ perturb_factor = 0.01
155
+ ):
156
+ # Shrink & Perturb
157
+ # Ash et al. https://arxiv.org/abs/1910.08475
158
+
159
+ assert 0. <= shrink_factor <= 1.
160
+
161
+ for p in module.parameters():
162
+ noise = torch.randn_like(p.data)
163
+ p.data.mul_(1. - shrink_factor).add_(noise * perturb_factor)
164
+
165
+ return module
166
+
149
167
  # fitness related
150
168
 
151
169
  def get_fitness_scores(
@@ -431,6 +449,8 @@ class Critic(Module):
431
449
 
432
450
  value = self.maybe_bins_to_value(logits)
433
451
 
452
+ loss_fn = partial(self.loss_fn, reduction = 'none')
453
+
434
454
  if use_improved:
435
455
  clipped_target = target.clamp(-eps_clip, eps_clip)
436
456
 
@@ -439,8 +459,8 @@ class Critic(Module):
439
459
 
440
460
  is_between = lambda lo, hi: (lo < value) & (value < hi)
441
461
 
442
- clipped_loss = self.loss_fn(logits, clipped_target, reduction = 'none')
443
- loss = self.loss_fn(logits, target, reduction = 'none')
462
+ clipped_loss = loss_fn(logits, clipped_target)
463
+ loss = loss_fn(logits, target)
444
464
 
445
465
  value_loss = torch.where(
446
466
  is_between(target, old_values_lo) | is_between(old_values_hi, target),
@@ -448,10 +468,10 @@ class Critic(Module):
448
468
  torch.min(loss, clipped_loss)
449
469
  )
450
470
  else:
451
- clipped_value = old_values + (value - old_values).clamp(1. - eps_clip, 1. + eps_clip)
471
+ clipped_value = old_values + (value - old_values).clamp(-eps_clip, eps_clip)
452
472
 
453
- loss = self.loss_fn(logits, target, reduction = 'none')
454
- clipped_loss = self.loss_fn(clipped_value, target, reduction = 'none')
473
+ loss = loss_fn(logits, target)
474
+ clipped_loss = loss_fn(clipped_value, target)
455
475
 
456
476
  value_loss = torch.max(loss, clipped_loss)
457
477
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.1.9
3
+ Version: 0.1.12
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -215,4 +215,15 @@ agent.load('./agent.pt')
215
215
  }
216
216
  ```
217
217
 
218
+ ```bibtex
219
+ @article{Ash2019OnTD,
220
+ title = {On the Difficulty of Warm-Starting Neural Network Training},
221
+ author = {Jordan T. Ash and Ryan P. Adams},
222
+ journal = {ArXiv},
223
+ year = {2019},
224
+ volume = {abs/1910.08475},
225
+ url = {https://api.semanticscholar.org/CorpusID:204788802}
226
+ }
227
+ ```
228
+
218
229
  *Evolution is cleverer than you are.* - Leslie Orgel
@@ -1,10 +1,10 @@
1
1
  evolutionary_policy_optimization/__init__.py,sha256=NyiYDYU7DlpmOTM7xiBQET3r1WwX0ebrgMCBLSQrW3c,288
2
2
  evolutionary_policy_optimization/distributed.py,sha256=7KgZdeS_wxBHo_du9XZFB1Cu318J-Bp66Xdr6Log_20,2423
3
3
  evolutionary_policy_optimization/env_wrappers.py,sha256=bDL06o9_b1iW6k3fw2xifnOnYlzs643tdW6Yv2gsIdw,803
4
- evolutionary_policy_optimization/epo.py,sha256=9GfSvOz6SwjAuZyhyvsLHPY8b2svMQlM3BRjilwsQ-g,45717
4
+ evolutionary_policy_optimization/epo.py,sha256=aOltJBkZVi2FxXao51zdfYaLynIi8T8v3qO1ex2HElg,46058
5
5
  evolutionary_policy_optimization/experimental.py,sha256=-IgqjJ_Wk_CMB1y9YYWpoYqTG9GZHAS6kbRdTluVevg,1563
6
6
  evolutionary_policy_optimization/mock_env.py,sha256=TLyyRm6tOD0Kdn9QqJJQriaSnsR-YmNQHo4OohmZFG4,1410
7
- evolutionary_policy_optimization-0.1.9.dist-info/METADATA,sha256=y5w_NwtKNQ07HeYa5r6hcPn7RsqDpehMmt5vj6mTESQ,7316
8
- evolutionary_policy_optimization-0.1.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- evolutionary_policy_optimization-0.1.9.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
10
- evolutionary_policy_optimization-0.1.9.dist-info/RECORD,,
7
+ evolutionary_policy_optimization-0.1.12.dist-info/METADATA,sha256=B_eK4c8-dp4rr4K0HxRiNJqY6fK10XmwBbsPm-PE0_k,7625
8
+ evolutionary_policy_optimization-0.1.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ evolutionary_policy_optimization-0.1.12.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
10
+ evolutionary_policy_optimization-0.1.12.dist-info/RECORD,,