torch-rb 0.1.2 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/LICENSE.txt +46 -22
- data/README.md +18 -6
- data/ext/torch/ext.cpp +148 -369
- data/ext/torch/extconf.rb +6 -0
- data/ext/torch/nn_functions.cpp +615 -0
- data/ext/torch/nn_functions.hpp +6 -0
- data/ext/torch/templates.cpp +55 -0
- data/ext/torch/templates.hpp +242 -0
- data/ext/torch/tensor_functions.cpp +1920 -0
- data/ext/torch/tensor_functions.hpp +6 -0
- data/ext/torch/torch_functions.cpp +2975 -0
- data/ext/torch/torch_functions.hpp +6 -0
- data/lib/torch.rb +240 -131
- data/lib/torch/ext.bundle +0 -0
- data/lib/torch/inspector.rb +27 -22
- data/lib/torch/native/dispatcher.rb +48 -0
- data/lib/torch/native/function.rb +109 -0
- data/lib/torch/native/generator.rb +168 -0
- data/lib/torch/native/native_functions.yaml +6837 -0
- data/lib/torch/native/parser.rb +134 -0
- data/lib/torch/nn/alpha_dropout.rb +9 -0
- data/lib/torch/nn/avg_pool1d.rb +18 -0
- data/lib/torch/nn/avg_pool2d.rb +19 -0
- data/lib/torch/nn/avg_pool3d.rb +19 -0
- data/lib/torch/nn/avg_poolnd.rb +9 -0
- data/lib/torch/nn/batch_norm.rb +75 -0
- data/lib/torch/nn/batch_norm1d.rb +11 -0
- data/lib/torch/nn/batch_norm2d.rb +11 -0
- data/lib/torch/nn/batch_norm3d.rb +11 -0
- data/lib/torch/nn/bce_loss.rb +13 -0
- data/lib/torch/nn/bce_with_logits_loss.rb +15 -0
- data/lib/torch/nn/bilinear.rb +38 -0
- data/lib/torch/nn/constant_pad1d.rb +10 -0
- data/lib/torch/nn/constant_pad2d.rb +10 -0
- data/lib/torch/nn/constant_pad3d.rb +10 -0
- data/lib/torch/nn/constant_padnd.rb +18 -0
- data/lib/torch/nn/conv1d.rb +22 -0
- data/lib/torch/nn/conv2d.rb +16 -38
- data/lib/torch/nn/conv3d.rb +22 -0
- data/lib/torch/nn/convnd.rb +41 -0
- data/lib/torch/nn/cosine_embedding_loss.rb +14 -0
- data/lib/torch/nn/cosine_similarity.rb +15 -0
- data/lib/torch/nn/cross_entropy_loss.rb +14 -0
- data/lib/torch/nn/ctc_loss.rb +15 -0
- data/lib/torch/nn/dropout.rb +9 -0
- data/lib/torch/nn/dropout2d.rb +9 -0
- data/lib/torch/nn/dropout3d.rb +9 -0
- data/lib/torch/nn/dropoutnd.rb +15 -0
- data/lib/torch/nn/embedding.rb +52 -0
- data/lib/torch/nn/embedding_bag.rb +34 -0
- data/lib/torch/nn/feature_alpha_dropout.rb +9 -0
- data/lib/torch/nn/fold.rb +20 -0
- data/lib/torch/nn/functional.rb +411 -22
- data/lib/torch/nn/group_norm.rb +36 -0
- data/lib/torch/nn/gru.rb +49 -0
- data/lib/torch/nn/hardshrink.rb +18 -0
- data/lib/torch/nn/hinge_embedding_loss.rb +14 -0
- data/lib/torch/nn/identity.rb +14 -0
- data/lib/torch/nn/init.rb +58 -1
- data/lib/torch/nn/instance_norm.rb +20 -0
- data/lib/torch/nn/instance_norm1d.rb +18 -0
- data/lib/torch/nn/instance_norm2d.rb +11 -0
- data/lib/torch/nn/instance_norm3d.rb +11 -0
- data/lib/torch/nn/kl_div_loss.rb +13 -0
- data/lib/torch/nn/l1_loss.rb +13 -0
- data/lib/torch/nn/layer_norm.rb +35 -0
- data/lib/torch/nn/leaky_relu.rb +20 -0
- data/lib/torch/nn/linear.rb +12 -11
- data/lib/torch/nn/local_response_norm.rb +21 -0
- data/lib/torch/nn/log_sigmoid.rb +9 -0
- data/lib/torch/nn/log_softmax.rb +14 -0
- data/lib/torch/nn/loss.rb +10 -0
- data/lib/torch/nn/lp_pool1d.rb +9 -0
- data/lib/torch/nn/lp_pool2d.rb +9 -0
- data/lib/torch/nn/lp_poolnd.rb +22 -0
- data/lib/torch/nn/lstm.rb +66 -0
- data/lib/torch/nn/margin_ranking_loss.rb +14 -0
- data/lib/torch/nn/max_pool1d.rb +9 -0
- data/lib/torch/nn/max_pool2d.rb +9 -0
- data/lib/torch/nn/max_pool3d.rb +9 -0
- data/lib/torch/nn/max_poolnd.rb +19 -0
- data/lib/torch/nn/max_unpool1d.rb +16 -0
- data/lib/torch/nn/max_unpool2d.rb +16 -0
- data/lib/torch/nn/max_unpool3d.rb +16 -0
- data/lib/torch/nn/max_unpoolnd.rb +9 -0
- data/lib/torch/nn/module.rb +201 -20
- data/lib/torch/nn/mse_loss.rb +2 -2
- data/lib/torch/nn/multi_label_margin_loss.rb +13 -0
- data/lib/torch/nn/multi_label_soft_margin_loss.rb +13 -0
- data/lib/torch/nn/multi_margin_loss.rb +17 -0
- data/lib/torch/nn/nll_loss.rb +14 -0
- data/lib/torch/nn/pairwise_distance.rb +16 -0
- data/lib/torch/nn/parameter.rb +2 -2
- data/lib/torch/nn/poisson_nll_loss.rb +16 -0
- data/lib/torch/nn/prelu.rb +19 -0
- data/lib/torch/nn/reflection_pad1d.rb +10 -0
- data/lib/torch/nn/reflection_pad2d.rb +10 -0
- data/lib/torch/nn/reflection_padnd.rb +13 -0
- data/lib/torch/nn/relu.rb +8 -3
- data/lib/torch/nn/replication_pad1d.rb +10 -0
- data/lib/torch/nn/replication_pad2d.rb +10 -0
- data/lib/torch/nn/replication_pad3d.rb +10 -0
- data/lib/torch/nn/replication_padnd.rb +13 -0
- data/lib/torch/nn/rnn.rb +22 -0
- data/lib/torch/nn/rnn_base.rb +198 -0
- data/lib/torch/nn/sequential.rb +1 -10
- data/lib/torch/nn/sigmoid.rb +9 -0
- data/lib/torch/nn/smooth_l1_loss.rb +13 -0
- data/lib/torch/nn/soft_margin_loss.rb +13 -0
- data/lib/torch/nn/softmax.rb +18 -0
- data/lib/torch/nn/softmax2d.rb +10 -0
- data/lib/torch/nn/softmin.rb +14 -0
- data/lib/torch/nn/softplus.rb +19 -0
- data/lib/torch/nn/softshrink.rb +18 -0
- data/lib/torch/nn/softsign.rb +9 -0
- data/lib/torch/nn/tanh.rb +9 -0
- data/lib/torch/nn/tanhshrink.rb +9 -0
- data/lib/torch/nn/triplet_margin_loss.rb +18 -0
- data/lib/torch/nn/unfold.rb +19 -0
- data/lib/torch/nn/utils.rb +25 -0
- data/lib/torch/nn/weighted_loss.rb +10 -0
- data/lib/torch/nn/zero_pad2d.rb +9 -0
- data/lib/torch/optim/adadelta.rb +57 -0
- data/lib/torch/optim/adagrad.rb +71 -0
- data/lib/torch/optim/adam.rb +81 -0
- data/lib/torch/optim/adamax.rb +68 -0
- data/lib/torch/optim/adamw.rb +82 -0
- data/lib/torch/optim/asgd.rb +65 -0
- data/lib/torch/optim/lr_scheduler/lr_scheduler.rb +33 -0
- data/lib/torch/optim/lr_scheduler/step_lr.rb +17 -0
- data/lib/torch/optim/optimizer.rb +56 -0
- data/lib/torch/optim/rmsprop.rb +76 -0
- data/lib/torch/optim/rprop.rb +68 -0
- data/lib/torch/optim/sgd.rb +48 -16
- data/lib/torch/random.rb +10 -0
- data/lib/torch/tensor.rb +71 -30
- data/lib/torch/utils/data/data_loader.rb +10 -4
- data/lib/torch/utils/data/tensor_dataset.rb +3 -0
- data/lib/torch/version.rb +1 -1
- metadata +123 -6
data/lib/torch/nn/sequential.rb
CHANGED
@@ -2,28 +2,19 @@ module Torch
|
|
2
2
|
module NN
|
3
3
|
class Sequential < Module
|
4
4
|
def initialize(*args)
|
5
|
-
|
5
|
+
super()
|
6
6
|
# TODO support hash arg (named modules)
|
7
7
|
args.each_with_index do |mod, idx|
|
8
8
|
add_module(idx.to_s, mod)
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
-
def add_module(name, mod)
|
13
|
-
# TODO add checks
|
14
|
-
@modules[name] = mod
|
15
|
-
end
|
16
|
-
|
17
12
|
def forward(input)
|
18
13
|
@modules.values.each do |mod|
|
19
14
|
input = mod.call(input)
|
20
15
|
end
|
21
16
|
input
|
22
17
|
end
|
23
|
-
|
24
|
-
def parameters
|
25
|
-
@modules.flat_map { |_, mod| mod.parameters }
|
26
|
-
end
|
27
18
|
end
|
28
19
|
end
|
29
20
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class Softmax < Module
|
4
|
+
def initialize(dim: nil)
|
5
|
+
super()
|
6
|
+
@dim = dim
|
7
|
+
end
|
8
|
+
|
9
|
+
def forward(input)
|
10
|
+
F.softmax(input, dim: @dim)
|
11
|
+
end
|
12
|
+
|
13
|
+
def extra_inspect
|
14
|
+
format("dim: %s", @dim)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class Softplus < Module
|
4
|
+
def initialize(beta: 1, threshold: 20)
|
5
|
+
super()
|
6
|
+
@beta = beta
|
7
|
+
@threshold = threshold
|
8
|
+
end
|
9
|
+
|
10
|
+
def forward(input)
|
11
|
+
F.softplus(input, beta: @beta, threshold: @threshold)
|
12
|
+
end
|
13
|
+
|
14
|
+
def extra_inspect
|
15
|
+
format("beta: %s, threshold: %s", @beta, @threshold)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class TripletMarginLoss < Loss
|
4
|
+
def initialize(margin: 1.0, p: 2.0, eps: 1e-6, swap: false, reduction: "mean")
|
5
|
+
super(reduction)
|
6
|
+
@margin = margin
|
7
|
+
@p = p
|
8
|
+
@eps = eps
|
9
|
+
@swap = swap
|
10
|
+
end
|
11
|
+
|
12
|
+
def forward(anchor, positive, negative)
|
13
|
+
F.triplet_margin_loss(anchor, positive, negative, margin: @margin, p: @p,
|
14
|
+
eps: @eps, swap: @swap, reduction: @reduction)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class Unfold < Module
|
4
|
+
def initialize(kernel_size, dilation: 1, padding: 0, stride: 1)
|
5
|
+
super()
|
6
|
+
@kernel_size = kernel_size
|
7
|
+
@dilation = dilation
|
8
|
+
@padding = padding
|
9
|
+
@stride = stride
|
10
|
+
end
|
11
|
+
|
12
|
+
def forward(input)
|
13
|
+
F.unfold(input, @kernel_size, dilation: @dilation, padding: @padding, stride: @stride)
|
14
|
+
end
|
15
|
+
|
16
|
+
# TODO add extra_inspect
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
module Utils
|
4
|
+
def _single(value)
|
5
|
+
_ntuple(1, value)
|
6
|
+
end
|
7
|
+
|
8
|
+
def _pair(value)
|
9
|
+
_ntuple(2, value)
|
10
|
+
end
|
11
|
+
|
12
|
+
def _triple(value)
|
13
|
+
_ntuple(3, value)
|
14
|
+
end
|
15
|
+
|
16
|
+
def _quadrupal(value)
|
17
|
+
_ntuple(4, value)
|
18
|
+
end
|
19
|
+
|
20
|
+
def _ntuple(n, value)
|
21
|
+
value.is_a?(Array) ? value : [value] * n
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# ported from https://github.com/pytorch/pytorch/blob/master/torch/optim/adadelta.py
|
2
|
+
module Torch
|
3
|
+
module Optim
|
4
|
+
class Adadelta < Optimizer
|
5
|
+
def initialize(params, lr: 1.0, rho: 0.9, eps: 1e-6, weight_decay: 0)
|
6
|
+
raise ArgumentError, "Invalid learning rate: #{lr}" if lr < 0
|
7
|
+
raise ArgumentError, "Invalid rho value: #{rho}" if rho < 0 || rho > 1
|
8
|
+
raise ArgumentError, "Invalid epsilon value: #{eps}" if eps < 0
|
9
|
+
raise ArgumentError, "Invalid weight_decay value: #{weight_decay}" if weight_decay < 0
|
10
|
+
|
11
|
+
defaults = {lr: lr, rho: rho, eps: eps, weight_decay: weight_decay}
|
12
|
+
super(params, defaults)
|
13
|
+
end
|
14
|
+
|
15
|
+
def step(closure = nil)
|
16
|
+
loss = nil
|
17
|
+
if closure
|
18
|
+
loss = closure.call
|
19
|
+
end
|
20
|
+
|
21
|
+
@param_groups.each do |group|
|
22
|
+
group[:params].each do |p|
|
23
|
+
next unless p.grad
|
24
|
+
grad = p.grad.data
|
25
|
+
if grad.sparse?
|
26
|
+
raise Error, "Adadelta does not support sparse gradients"
|
27
|
+
end
|
28
|
+
state = @state[p]
|
29
|
+
|
30
|
+
if state.size == 0
|
31
|
+
state[:step] = 0
|
32
|
+
state[:square_avg] = Torch.zeros_like(p.data)
|
33
|
+
state[:acc_delta] = Torch.zeros_like(p.data)
|
34
|
+
end
|
35
|
+
|
36
|
+
square_avg, acc_delta = state[:square_avg], state[:acc_delta]
|
37
|
+
rho, eps = group[:rho], group[:eps]
|
38
|
+
|
39
|
+
state[:step] += 1
|
40
|
+
|
41
|
+
if group[:weight_decay] != 0
|
42
|
+
grad = grad.add(group[:weight_decay], p.data)
|
43
|
+
end
|
44
|
+
|
45
|
+
square_avg.mul!(rho).addcmul!(1 - rho, grad, grad)
|
46
|
+
std = square_avg.add(eps).sqrt!
|
47
|
+
delta = acc_delta.add(eps).sqrt!.div!(std).mul!(grad)
|
48
|
+
p.data.add!(-group[:lr], delta)
|
49
|
+
acc_delta.mul!(rho).addcmul!(1 - rho, delta, delta)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
loss
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# ported from https://github.com/pytorch/pytorch/blob/master/torch/optim/adagrad.py
|
2
|
+
module Torch
|
3
|
+
module Optim
|
4
|
+
class Adagrad < Optimizer
|
5
|
+
def initialize(params, lr: 1e-2, lr_decay: 0, weight_decay: 0, initial_accumulator_value: 0, eps: 1e-10)
|
6
|
+
raise ArgumentError, "Invalid learning rate: #{lr}" if lr < 0
|
7
|
+
raise ArgumentError, "Invalid lr_decay value: #{lr_decay}" if lr_decay < 0
|
8
|
+
raise ArgumentError, "Invalid initial_accumulator_value value: #{initial_accumulator_value}" if initial_accumulator_value < 0
|
9
|
+
raise ArgumentError, "Invalid weight_decay value: #{weight_decay}" if weight_decay < 0
|
10
|
+
raise ArgumentError, "Invalid epsilon value: #{eps}" if eps < 0
|
11
|
+
|
12
|
+
defaults = {lr: lr, lr_decay: lr_decay, eps: eps, weight_decay: weight_decay, initial_accumulator_value: initial_accumulator_value}
|
13
|
+
super(params, defaults)
|
14
|
+
|
15
|
+
@param_groups.each do |group|
|
16
|
+
group[:params].each do |p|
|
17
|
+
state = @state[p]
|
18
|
+
state[:step] = 0
|
19
|
+
state[:sum] = Torch.full_like(p.data, initial_accumulator_value)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def share_memory
|
25
|
+
@param_groups.each do |group|
|
26
|
+
group[:params].each do |p|
|
27
|
+
state = @state[p]
|
28
|
+
state[:sum].share_memory!
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def step(closure = nil)
|
34
|
+
loss = nil
|
35
|
+
if closure
|
36
|
+
loss = closure.call
|
37
|
+
end
|
38
|
+
|
39
|
+
@param_groups.each do |group|
|
40
|
+
group[:params].each do |p|
|
41
|
+
next unless p.grad
|
42
|
+
|
43
|
+
grad = p.grad.data
|
44
|
+
state = @state[p]
|
45
|
+
|
46
|
+
state[:step] += 1
|
47
|
+
|
48
|
+
if group[:weight_decay] != 0
|
49
|
+
if p.grad.data.sparse?
|
50
|
+
raise Error, "weight_decay option is not compatible with sparse gradients"
|
51
|
+
end
|
52
|
+
grad = grad.add(group[:weight_decay], p.data)
|
53
|
+
end
|
54
|
+
|
55
|
+
clr = group[:lr] / (1 + (state[:step] - 1) * group[:lr_decay])
|
56
|
+
|
57
|
+
if grad.sparse?
|
58
|
+
raise NotImplementedYet
|
59
|
+
else
|
60
|
+
state[:sum].addcmul!(1, grad, grad)
|
61
|
+
std = state[:sum].sqrt.add!(group[:eps])
|
62
|
+
p.data.addcdiv!(-clr, grad, std)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
loss
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# ported from https://github.com/pytorch/pytorch/blob/master/torch/optim/adam.py
|
2
|
+
module Torch
|
3
|
+
module Optim
|
4
|
+
class Adam < Optimizer
|
5
|
+
def initialize(params, lr: 1e-3, betas: [0.9, 0.999], eps: 1e-8, weight_decay: 0, amsgrad: false)
|
6
|
+
raise ArgumentError, "Invalid learning rate: #{lr}" if lr < 0
|
7
|
+
raise ArgumentError, "Invalid epsilon value: #{eps}" if eps < 0
|
8
|
+
raise ArgumentError, "Invalid beta parameter at index 0: #{betas[0]}" if betas[0] < 0 || betas[0] >= 1
|
9
|
+
raise ArgumentError, "Invalid beta parameter at index 1: #{betas[1]}" if betas[1] < 0 || betas[1] >= 1
|
10
|
+
|
11
|
+
defaults = {lr: lr, betas: betas, eps: eps, weight_decay: weight_decay, amsgrad: amsgrad}
|
12
|
+
super(params, defaults)
|
13
|
+
end
|
14
|
+
|
15
|
+
def step(closure = nil)
|
16
|
+
loss = nil
|
17
|
+
if closure
|
18
|
+
loss = closure.call
|
19
|
+
end
|
20
|
+
|
21
|
+
@param_groups.each do |group|
|
22
|
+
group[:params].each do |p|
|
23
|
+
next unless p.grad
|
24
|
+
grad = p.grad.data
|
25
|
+
if grad.sparse?
|
26
|
+
raise Error, "Adam does not support sparse gradients, please consider SparseAdam instead"
|
27
|
+
end
|
28
|
+
amsgrad = group[:amsgrad]
|
29
|
+
|
30
|
+
state = @state[p]
|
31
|
+
|
32
|
+
# State initialization
|
33
|
+
if state.size == 0
|
34
|
+
state[:step] = 0
|
35
|
+
# Exponential moving average of gradient values
|
36
|
+
state[:exp_avg] = Torch.zeros_like(p.data)
|
37
|
+
# Exponential moving average of squared gradient values
|
38
|
+
state[:exp_avg_sq] = Torch.zeros_like(p.data)
|
39
|
+
if amsgrad
|
40
|
+
# Maintains max of all exp. moving avg. of sq. grad. values
|
41
|
+
state[:max_exp_avg_sq] = Torch.zeros_like(p.data)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
exp_avg, exp_avg_sq = state[:exp_avg], state[:exp_avg_sq]
|
46
|
+
if amsgrad
|
47
|
+
max_exp_avg_sq = state[:max_exp_avg_sq]
|
48
|
+
end
|
49
|
+
beta1, beta2 = group[:betas]
|
50
|
+
|
51
|
+
state[:step] += 1
|
52
|
+
bias_correction1 = 1 - beta1 ** state[:step]
|
53
|
+
bias_correction2 = 1 - beta2 ** state[:step]
|
54
|
+
|
55
|
+
if group[:weight_decay] != 0
|
56
|
+
grad.add!(group[:weight_decay], p.data)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Decay the first and second moment running average coefficient
|
60
|
+
exp_avg.mul!(beta1).add!(1 - beta1, grad)
|
61
|
+
exp_avg_sq.mul!(beta2).addcmul!(1 - beta2, grad, grad)
|
62
|
+
if amsgrad
|
63
|
+
# Maintains the maximum of all 2nd moment running avg. till now
|
64
|
+
Torch.max(max_exp_avg_sq, exp_avg_sq, out: max_exp_avg_sq)
|
65
|
+
# Use the max. for normalizing running avg. of gradient
|
66
|
+
denom = (max_exp_avg_sq.sqrt / Math.sqrt(bias_correction2)).add!(group[:eps])
|
67
|
+
else
|
68
|
+
denom = (exp_avg_sq.sqrt / Math.sqrt(bias_correction2)).add!(group[:eps])
|
69
|
+
end
|
70
|
+
|
71
|
+
step_size = group[:lr] / bias_correction1
|
72
|
+
|
73
|
+
p.data.addcdiv!(-step_size, exp_avg, denom)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
loss
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|