torch-rb 0.1.2 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/LICENSE.txt +46 -22
  4. data/README.md +18 -6
  5. data/ext/torch/ext.cpp +148 -369
  6. data/ext/torch/extconf.rb +6 -0
  7. data/ext/torch/nn_functions.cpp +615 -0
  8. data/ext/torch/nn_functions.hpp +6 -0
  9. data/ext/torch/templates.cpp +55 -0
  10. data/ext/torch/templates.hpp +242 -0
  11. data/ext/torch/tensor_functions.cpp +1920 -0
  12. data/ext/torch/tensor_functions.hpp +6 -0
  13. data/ext/torch/torch_functions.cpp +2975 -0
  14. data/ext/torch/torch_functions.hpp +6 -0
  15. data/lib/torch.rb +240 -131
  16. data/lib/torch/ext.bundle +0 -0
  17. data/lib/torch/inspector.rb +27 -22
  18. data/lib/torch/native/dispatcher.rb +48 -0
  19. data/lib/torch/native/function.rb +109 -0
  20. data/lib/torch/native/generator.rb +168 -0
  21. data/lib/torch/native/native_functions.yaml +6837 -0
  22. data/lib/torch/native/parser.rb +134 -0
  23. data/lib/torch/nn/alpha_dropout.rb +9 -0
  24. data/lib/torch/nn/avg_pool1d.rb +18 -0
  25. data/lib/torch/nn/avg_pool2d.rb +19 -0
  26. data/lib/torch/nn/avg_pool3d.rb +19 -0
  27. data/lib/torch/nn/avg_poolnd.rb +9 -0
  28. data/lib/torch/nn/batch_norm.rb +75 -0
  29. data/lib/torch/nn/batch_norm1d.rb +11 -0
  30. data/lib/torch/nn/batch_norm2d.rb +11 -0
  31. data/lib/torch/nn/batch_norm3d.rb +11 -0
  32. data/lib/torch/nn/bce_loss.rb +13 -0
  33. data/lib/torch/nn/bce_with_logits_loss.rb +15 -0
  34. data/lib/torch/nn/bilinear.rb +38 -0
  35. data/lib/torch/nn/constant_pad1d.rb +10 -0
  36. data/lib/torch/nn/constant_pad2d.rb +10 -0
  37. data/lib/torch/nn/constant_pad3d.rb +10 -0
  38. data/lib/torch/nn/constant_padnd.rb +18 -0
  39. data/lib/torch/nn/conv1d.rb +22 -0
  40. data/lib/torch/nn/conv2d.rb +16 -38
  41. data/lib/torch/nn/conv3d.rb +22 -0
  42. data/lib/torch/nn/convnd.rb +41 -0
  43. data/lib/torch/nn/cosine_embedding_loss.rb +14 -0
  44. data/lib/torch/nn/cosine_similarity.rb +15 -0
  45. data/lib/torch/nn/cross_entropy_loss.rb +14 -0
  46. data/lib/torch/nn/ctc_loss.rb +15 -0
  47. data/lib/torch/nn/dropout.rb +9 -0
  48. data/lib/torch/nn/dropout2d.rb +9 -0
  49. data/lib/torch/nn/dropout3d.rb +9 -0
  50. data/lib/torch/nn/dropoutnd.rb +15 -0
  51. data/lib/torch/nn/embedding.rb +52 -0
  52. data/lib/torch/nn/embedding_bag.rb +34 -0
  53. data/lib/torch/nn/feature_alpha_dropout.rb +9 -0
  54. data/lib/torch/nn/fold.rb +20 -0
  55. data/lib/torch/nn/functional.rb +411 -22
  56. data/lib/torch/nn/group_norm.rb +36 -0
  57. data/lib/torch/nn/gru.rb +49 -0
  58. data/lib/torch/nn/hardshrink.rb +18 -0
  59. data/lib/torch/nn/hinge_embedding_loss.rb +14 -0
  60. data/lib/torch/nn/identity.rb +14 -0
  61. data/lib/torch/nn/init.rb +58 -1
  62. data/lib/torch/nn/instance_norm.rb +20 -0
  63. data/lib/torch/nn/instance_norm1d.rb +18 -0
  64. data/lib/torch/nn/instance_norm2d.rb +11 -0
  65. data/lib/torch/nn/instance_norm3d.rb +11 -0
  66. data/lib/torch/nn/kl_div_loss.rb +13 -0
  67. data/lib/torch/nn/l1_loss.rb +13 -0
  68. data/lib/torch/nn/layer_norm.rb +35 -0
  69. data/lib/torch/nn/leaky_relu.rb +20 -0
  70. data/lib/torch/nn/linear.rb +12 -11
  71. data/lib/torch/nn/local_response_norm.rb +21 -0
  72. data/lib/torch/nn/log_sigmoid.rb +9 -0
  73. data/lib/torch/nn/log_softmax.rb +14 -0
  74. data/lib/torch/nn/loss.rb +10 -0
  75. data/lib/torch/nn/lp_pool1d.rb +9 -0
  76. data/lib/torch/nn/lp_pool2d.rb +9 -0
  77. data/lib/torch/nn/lp_poolnd.rb +22 -0
  78. data/lib/torch/nn/lstm.rb +66 -0
  79. data/lib/torch/nn/margin_ranking_loss.rb +14 -0
  80. data/lib/torch/nn/max_pool1d.rb +9 -0
  81. data/lib/torch/nn/max_pool2d.rb +9 -0
  82. data/lib/torch/nn/max_pool3d.rb +9 -0
  83. data/lib/torch/nn/max_poolnd.rb +19 -0
  84. data/lib/torch/nn/max_unpool1d.rb +16 -0
  85. data/lib/torch/nn/max_unpool2d.rb +16 -0
  86. data/lib/torch/nn/max_unpool3d.rb +16 -0
  87. data/lib/torch/nn/max_unpoolnd.rb +9 -0
  88. data/lib/torch/nn/module.rb +201 -20
  89. data/lib/torch/nn/mse_loss.rb +2 -2
  90. data/lib/torch/nn/multi_label_margin_loss.rb +13 -0
  91. data/lib/torch/nn/multi_label_soft_margin_loss.rb +13 -0
  92. data/lib/torch/nn/multi_margin_loss.rb +17 -0
  93. data/lib/torch/nn/nll_loss.rb +14 -0
  94. data/lib/torch/nn/pairwise_distance.rb +16 -0
  95. data/lib/torch/nn/parameter.rb +2 -2
  96. data/lib/torch/nn/poisson_nll_loss.rb +16 -0
  97. data/lib/torch/nn/prelu.rb +19 -0
  98. data/lib/torch/nn/reflection_pad1d.rb +10 -0
  99. data/lib/torch/nn/reflection_pad2d.rb +10 -0
  100. data/lib/torch/nn/reflection_padnd.rb +13 -0
  101. data/lib/torch/nn/relu.rb +8 -3
  102. data/lib/torch/nn/replication_pad1d.rb +10 -0
  103. data/lib/torch/nn/replication_pad2d.rb +10 -0
  104. data/lib/torch/nn/replication_pad3d.rb +10 -0
  105. data/lib/torch/nn/replication_padnd.rb +13 -0
  106. data/lib/torch/nn/rnn.rb +22 -0
  107. data/lib/torch/nn/rnn_base.rb +198 -0
  108. data/lib/torch/nn/sequential.rb +1 -10
  109. data/lib/torch/nn/sigmoid.rb +9 -0
  110. data/lib/torch/nn/smooth_l1_loss.rb +13 -0
  111. data/lib/torch/nn/soft_margin_loss.rb +13 -0
  112. data/lib/torch/nn/softmax.rb +18 -0
  113. data/lib/torch/nn/softmax2d.rb +10 -0
  114. data/lib/torch/nn/softmin.rb +14 -0
  115. data/lib/torch/nn/softplus.rb +19 -0
  116. data/lib/torch/nn/softshrink.rb +18 -0
  117. data/lib/torch/nn/softsign.rb +9 -0
  118. data/lib/torch/nn/tanh.rb +9 -0
  119. data/lib/torch/nn/tanhshrink.rb +9 -0
  120. data/lib/torch/nn/triplet_margin_loss.rb +18 -0
  121. data/lib/torch/nn/unfold.rb +19 -0
  122. data/lib/torch/nn/utils.rb +25 -0
  123. data/lib/torch/nn/weighted_loss.rb +10 -0
  124. data/lib/torch/nn/zero_pad2d.rb +9 -0
  125. data/lib/torch/optim/adadelta.rb +57 -0
  126. data/lib/torch/optim/adagrad.rb +71 -0
  127. data/lib/torch/optim/adam.rb +81 -0
  128. data/lib/torch/optim/adamax.rb +68 -0
  129. data/lib/torch/optim/adamw.rb +82 -0
  130. data/lib/torch/optim/asgd.rb +65 -0
  131. data/lib/torch/optim/lr_scheduler/lr_scheduler.rb +33 -0
  132. data/lib/torch/optim/lr_scheduler/step_lr.rb +17 -0
  133. data/lib/torch/optim/optimizer.rb +56 -0
  134. data/lib/torch/optim/rmsprop.rb +76 -0
  135. data/lib/torch/optim/rprop.rb +68 -0
  136. data/lib/torch/optim/sgd.rb +48 -16
  137. data/lib/torch/random.rb +10 -0
  138. data/lib/torch/tensor.rb +71 -30
  139. data/lib/torch/utils/data/data_loader.rb +10 -4
  140. data/lib/torch/utils/data/tensor_dataset.rb +3 -0
  141. data/lib/torch/version.rb +1 -1
  142. metadata +123 -6
@@ -2,28 +2,19 @@ module Torch
2
2
  module NN
3
3
  class Sequential < Module
4
4
  def initialize(*args)
5
- @modules = {}
5
+ super()
6
6
  # TODO support hash arg (named modules)
7
7
  args.each_with_index do |mod, idx|
8
8
  add_module(idx.to_s, mod)
9
9
  end
10
10
  end
11
11
 
12
- def add_module(name, mod)
13
- # TODO add checks
14
- @modules[name] = mod
15
- end
16
-
17
12
  def forward(input)
18
13
  @modules.values.each do |mod|
19
14
  input = mod.call(input)
20
15
  end
21
16
  input
22
17
  end
23
-
24
- def parameters
25
- @modules.flat_map { |_, mod| mod.parameters }
26
- end
27
18
  end
28
19
  end
29
20
  end
@@ -0,0 +1,9 @@
1
+ module Torch
2
+ module NN
3
+ class Sigmoid < Module
4
+ def forward(input)
5
+ Torch.sigmoid(input)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,13 @@
1
+ module Torch
2
+ module NN
3
+ class SmoothL1Loss < Loss
4
+ def initialize(reduction: "mean")
5
+ super(reduction)
6
+ end
7
+
8
+ def forward(input, target)
9
+ F.smooth_l1_loss(input, target, reduction: @reduction)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module Torch
2
+ module NN
3
+ class SoftMarginLoss < Loss
4
+ def initialize(reduction: "mean")
5
+ super(reduction)
6
+ end
7
+
8
+ def forward(input, target)
9
+ F.soft_margin_loss(input, target, reduction: @reduction)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,18 @@
1
+ module Torch
2
+ module NN
3
+ class Softmax < Module
4
+ def initialize(dim: nil)
5
+ super()
6
+ @dim = dim
7
+ end
8
+
9
+ def forward(input)
10
+ F.softmax(input, dim: @dim)
11
+ end
12
+
13
+ def extra_inspect
14
+ format("dim: %s", @dim)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ module Torch
2
+ module NN
3
+ class Softmax2d < Module
4
+ def forward(input)
5
+ raise ArgumentError, "Softmax2d requires a 4D tensor as input" unless input.dim == 4
6
+ F.softmax(input, dim: 1)
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,14 @@
1
+ module Torch
2
+ module NN
3
+ class Softmin < Module
4
+ def initialize(dim: nil)
5
+ super()
6
+ @dim = dim
7
+ end
8
+
9
+ def forward(input)
10
+ F.softmin(input, dim: @dim)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,19 @@
1
+ module Torch
2
+ module NN
3
+ class Softplus < Module
4
+ def initialize(beta: 1, threshold: 20)
5
+ super()
6
+ @beta = beta
7
+ @threshold = threshold
8
+ end
9
+
10
+ def forward(input)
11
+ F.softplus(input, beta: @beta, threshold: @threshold)
12
+ end
13
+
14
+ def extra_inspect
15
+ format("beta: %s, threshold: %s", @beta, @threshold)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,18 @@
1
+ module Torch
2
+ module NN
3
+ class Softshrink < Module
4
+ def initialize(lambd: 0.5)
5
+ super()
6
+ @lambd = lambd
7
+ end
8
+
9
+ def forward(input)
10
+ F.softshrink(input, @lambd)
11
+ end
12
+
13
+ def extra_inspect
14
+ @lambd.to_s
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,9 @@
1
+ module Torch
2
+ module NN
3
+ class Softsign < Module
4
+ def forward(input)
5
+ F.softsign(input)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Torch
2
+ module NN
3
+ class Tanh < Module
4
+ def forward(input)
5
+ Torch.tanh(input)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Torch
2
+ module NN
3
+ class Tanhshrink < Module
4
+ def forward(input)
5
+ F.tanhshrink(input)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,18 @@
1
+ module Torch
2
+ module NN
3
+ class TripletMarginLoss < Loss
4
+ def initialize(margin: 1.0, p: 2.0, eps: 1e-6, swap: false, reduction: "mean")
5
+ super(reduction)
6
+ @margin = margin
7
+ @p = p
8
+ @eps = eps
9
+ @swap = swap
10
+ end
11
+
12
+ def forward(anchor, positive, negative)
13
+ F.triplet_margin_loss(anchor, positive, negative, margin: @margin, p: @p,
14
+ eps: @eps, swap: @swap, reduction: @reduction)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,19 @@
1
+ module Torch
2
+ module NN
3
+ class Unfold < Module
4
+ def initialize(kernel_size, dilation: 1, padding: 0, stride: 1)
5
+ super()
6
+ @kernel_size = kernel_size
7
+ @dilation = dilation
8
+ @padding = padding
9
+ @stride = stride
10
+ end
11
+
12
+ def forward(input)
13
+ F.unfold(input, @kernel_size, dilation: @dilation, padding: @padding, stride: @stride)
14
+ end
15
+
16
+ # TODO add extra_inspect
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,25 @@
1
+ module Torch
2
+ module NN
3
+ module Utils
4
+ def _single(value)
5
+ _ntuple(1, value)
6
+ end
7
+
8
+ def _pair(value)
9
+ _ntuple(2, value)
10
+ end
11
+
12
+ def _triple(value)
13
+ _ntuple(3, value)
14
+ end
15
+
16
+ def _quadrupal(value)
17
+ _ntuple(4, value)
18
+ end
19
+
20
+ def _ntuple(n, value)
21
+ value.is_a?(Array) ? value : [value] * n
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,10 @@
1
+ module Torch
2
+ module NN
3
+ class WeightedLoss < Loss
4
+ def initialize(weight, reduction)
5
+ super(reduction)
6
+ register_buffer("weight", weight)
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,9 @@
1
+ module Torch
2
+ module NN
3
+ class ZeroPad2d < ConstantPad2d
4
+ def initialize(padding)
5
+ super(padding, 0.0)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,57 @@
1
+ # ported from https://github.com/pytorch/pytorch/blob/master/torch/optim/adadelta.py
2
+ module Torch
3
+ module Optim
4
+ class Adadelta < Optimizer
5
+ def initialize(params, lr: 1.0, rho: 0.9, eps: 1e-6, weight_decay: 0)
6
+ raise ArgumentError, "Invalid learning rate: #{lr}" if lr < 0
7
+ raise ArgumentError, "Invalid rho value: #{rho}" if rho < 0 || rho > 1
8
+ raise ArgumentError, "Invalid epsilon value: #{eps}" if eps < 0
9
+ raise ArgumentError, "Invalid weight_decay value: #{weight_decay}" if weight_decay < 0
10
+
11
+ defaults = {lr: lr, rho: rho, eps: eps, weight_decay: weight_decay}
12
+ super(params, defaults)
13
+ end
14
+
15
+ def step(closure = nil)
16
+ loss = nil
17
+ if closure
18
+ loss = closure.call
19
+ end
20
+
21
+ @param_groups.each do |group|
22
+ group[:params].each do |p|
23
+ next unless p.grad
24
+ grad = p.grad.data
25
+ if grad.sparse?
26
+ raise Error, "Adadelta does not support sparse gradients"
27
+ end
28
+ state = @state[p]
29
+
30
+ if state.size == 0
31
+ state[:step] = 0
32
+ state[:square_avg] = Torch.zeros_like(p.data)
33
+ state[:acc_delta] = Torch.zeros_like(p.data)
34
+ end
35
+
36
+ square_avg, acc_delta = state[:square_avg], state[:acc_delta]
37
+ rho, eps = group[:rho], group[:eps]
38
+
39
+ state[:step] += 1
40
+
41
+ if group[:weight_decay] != 0
42
+ grad = grad.add(group[:weight_decay], p.data)
43
+ end
44
+
45
+ square_avg.mul!(rho).addcmul!(1 - rho, grad, grad)
46
+ std = square_avg.add(eps).sqrt!
47
+ delta = acc_delta.add(eps).sqrt!.div!(std).mul!(grad)
48
+ p.data.add!(-group[:lr], delta)
49
+ acc_delta.mul!(rho).addcmul!(1 - rho, delta, delta)
50
+ end
51
+ end
52
+
53
+ loss
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,71 @@
1
+ # ported from https://github.com/pytorch/pytorch/blob/master/torch/optim/adagrad.py
2
+ module Torch
3
+ module Optim
4
+ class Adagrad < Optimizer
5
+ def initialize(params, lr: 1e-2, lr_decay: 0, weight_decay: 0, initial_accumulator_value: 0, eps: 1e-10)
6
+ raise ArgumentError, "Invalid learning rate: #{lr}" if lr < 0
7
+ raise ArgumentError, "Invalid lr_decay value: #{lr_decay}" if lr_decay < 0
8
+ raise ArgumentError, "Invalid initial_accumulator_value value: #{initial_accumulator_value}" if initial_accumulator_value < 0
9
+ raise ArgumentError, "Invalid weight_decay value: #{weight_decay}" if weight_decay < 0
10
+ raise ArgumentError, "Invalid epsilon value: #{eps}" if eps < 0
11
+
12
+ defaults = {lr: lr, lr_decay: lr_decay, eps: eps, weight_decay: weight_decay, initial_accumulator_value: initial_accumulator_value}
13
+ super(params, defaults)
14
+
15
+ @param_groups.each do |group|
16
+ group[:params].each do |p|
17
+ state = @state[p]
18
+ state[:step] = 0
19
+ state[:sum] = Torch.full_like(p.data, initial_accumulator_value)
20
+ end
21
+ end
22
+ end
23
+
24
+ def share_memory
25
+ @param_groups.each do |group|
26
+ group[:params].each do |p|
27
+ state = @state[p]
28
+ state[:sum].share_memory!
29
+ end
30
+ end
31
+ end
32
+
33
+ def step(closure = nil)
34
+ loss = nil
35
+ if closure
36
+ loss = closure.call
37
+ end
38
+
39
+ @param_groups.each do |group|
40
+ group[:params].each do |p|
41
+ next unless p.grad
42
+
43
+ grad = p.grad.data
44
+ state = @state[p]
45
+
46
+ state[:step] += 1
47
+
48
+ if group[:weight_decay] != 0
49
+ if p.grad.data.sparse?
50
+ raise Error, "weight_decay option is not compatible with sparse gradients"
51
+ end
52
+ grad = grad.add(group[:weight_decay], p.data)
53
+ end
54
+
55
+ clr = group[:lr] / (1 + (state[:step] - 1) * group[:lr_decay])
56
+
57
+ if grad.sparse?
58
+ raise NotImplementedYet
59
+ else
60
+ state[:sum].addcmul!(1, grad, grad)
61
+ std = state[:sum].sqrt.add!(group[:eps])
62
+ p.data.addcdiv!(-clr, grad, std)
63
+ end
64
+ end
65
+ end
66
+
67
+ loss
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,81 @@
1
+ # ported from https://github.com/pytorch/pytorch/blob/master/torch/optim/adam.py
2
+ module Torch
3
+ module Optim
4
+ class Adam < Optimizer
5
+ def initialize(params, lr: 1e-3, betas: [0.9, 0.999], eps: 1e-8, weight_decay: 0, amsgrad: false)
6
+ raise ArgumentError, "Invalid learning rate: #{lr}" if lr < 0
7
+ raise ArgumentError, "Invalid epsilon value: #{eps}" if eps < 0
8
+ raise ArgumentError, "Invalid beta parameter at index 0: #{betas[0]}" if betas[0] < 0 || betas[0] >= 1
9
+ raise ArgumentError, "Invalid beta parameter at index 1: #{betas[1]}" if betas[1] < 0 || betas[1] >= 1
10
+
11
+ defaults = {lr: lr, betas: betas, eps: eps, weight_decay: weight_decay, amsgrad: amsgrad}
12
+ super(params, defaults)
13
+ end
14
+
15
+ def step(closure = nil)
16
+ loss = nil
17
+ if closure
18
+ loss = closure.call
19
+ end
20
+
21
+ @param_groups.each do |group|
22
+ group[:params].each do |p|
23
+ next unless p.grad
24
+ grad = p.grad.data
25
+ if grad.sparse?
26
+ raise Error, "Adam does not support sparse gradients, please consider SparseAdam instead"
27
+ end
28
+ amsgrad = group[:amsgrad]
29
+
30
+ state = @state[p]
31
+
32
+ # State initialization
33
+ if state.size == 0
34
+ state[:step] = 0
35
+ # Exponential moving average of gradient values
36
+ state[:exp_avg] = Torch.zeros_like(p.data)
37
+ # Exponential moving average of squared gradient values
38
+ state[:exp_avg_sq] = Torch.zeros_like(p.data)
39
+ if amsgrad
40
+ # Maintains max of all exp. moving avg. of sq. grad. values
41
+ state[:max_exp_avg_sq] = Torch.zeros_like(p.data)
42
+ end
43
+ end
44
+
45
+ exp_avg, exp_avg_sq = state[:exp_avg], state[:exp_avg_sq]
46
+ if amsgrad
47
+ max_exp_avg_sq = state[:max_exp_avg_sq]
48
+ end
49
+ beta1, beta2 = group[:betas]
50
+
51
+ state[:step] += 1
52
+ bias_correction1 = 1 - beta1 ** state[:step]
53
+ bias_correction2 = 1 - beta2 ** state[:step]
54
+
55
+ if group[:weight_decay] != 0
56
+ grad.add!(group[:weight_decay], p.data)
57
+ end
58
+
59
+ # Decay the first and second moment running average coefficient
60
+ exp_avg.mul!(beta1).add!(1 - beta1, grad)
61
+ exp_avg_sq.mul!(beta2).addcmul!(1 - beta2, grad, grad)
62
+ if amsgrad
63
+ # Maintains the maximum of all 2nd moment running avg. till now
64
+ Torch.max(max_exp_avg_sq, exp_avg_sq, out: max_exp_avg_sq)
65
+ # Use the max. for normalizing running avg. of gradient
66
+ denom = (max_exp_avg_sq.sqrt / Math.sqrt(bias_correction2)).add!(group[:eps])
67
+ else
68
+ denom = (exp_avg_sq.sqrt / Math.sqrt(bias_correction2)).add!(group[:eps])
69
+ end
70
+
71
+ step_size = group[:lr] / bias_correction1
72
+
73
+ p.data.addcdiv!(-step_size, exp_avg, denom)
74
+ end
75
+ end
76
+
77
+ loss
78
+ end
79
+ end
80
+ end
81
+ end