torch-rb 0.1.0 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +40 -0
- data/LICENSE.txt +46 -22
- data/README.md +85 -19
- data/ext/torch/ext.cpp +274 -256
- data/ext/torch/extconf.rb +9 -0
- data/ext/torch/nn_functions.cpp +595 -0
- data/ext/torch/nn_functions.hpp +6 -0
- data/ext/torch/templates.hpp +250 -0
- data/ext/torch/tensor_functions.cpp +1860 -0
- data/ext/torch/tensor_functions.hpp +6 -0
- data/ext/torch/torch_functions.cpp +2875 -0
- data/ext/torch/torch_functions.hpp +6 -0
- data/lib/torch.rb +199 -84
- data/lib/torch/ext.bundle +0 -0
- data/lib/torch/inspector.rb +52 -25
- data/lib/torch/native/dispatcher.rb +48 -0
- data/lib/torch/native/function.rb +78 -0
- data/lib/torch/native/generator.rb +149 -0
- data/lib/torch/native/native_functions.yaml +6837 -0
- data/lib/torch/native/parser.rb +97 -0
- data/lib/torch/nn/alpha_dropout.rb +9 -0
- data/lib/torch/nn/avg_pool2d.rb +14 -0
- data/lib/torch/nn/avg_poolnd.rb +9 -0
- data/lib/torch/nn/bce_loss.rb +13 -0
- data/lib/torch/nn/bce_with_logits_loss.rb +15 -0
- data/lib/torch/nn/bilinear.rb +38 -0
- data/lib/torch/nn/conv2d.rb +14 -29
- data/lib/torch/nn/convnd.rb +41 -0
- data/lib/torch/nn/cosine_embedding_loss.rb +14 -0
- data/lib/torch/nn/cosine_similarity.rb +15 -0
- data/lib/torch/nn/cross_entropy_loss.rb +14 -0
- data/lib/torch/nn/ctc_loss.rb +15 -0
- data/lib/torch/nn/dropout.rb +9 -0
- data/lib/torch/nn/dropout2d.rb +9 -0
- data/lib/torch/nn/dropout3d.rb +9 -0
- data/lib/torch/nn/dropoutnd.rb +15 -0
- data/lib/torch/nn/embedding.rb +52 -0
- data/lib/torch/nn/embedding_bag.rb +34 -0
- data/lib/torch/nn/feature_alpha_dropout.rb +9 -0
- data/lib/torch/nn/functional.rb +194 -11
- data/lib/torch/nn/hinge_embedding_loss.rb +14 -0
- data/lib/torch/nn/identity.rb +14 -0
- data/lib/torch/nn/init.rb +58 -1
- data/lib/torch/nn/kl_div_loss.rb +13 -0
- data/lib/torch/nn/l1_loss.rb +13 -0
- data/lib/torch/nn/leaky_relu.rb +20 -0
- data/lib/torch/nn/linear.rb +12 -11
- data/lib/torch/nn/log_softmax.rb +14 -0
- data/lib/torch/nn/loss.rb +10 -0
- data/lib/torch/nn/margin_ranking_loss.rb +14 -0
- data/lib/torch/nn/max_pool2d.rb +9 -0
- data/lib/torch/nn/max_poolnd.rb +19 -0
- data/lib/torch/nn/module.rb +184 -19
- data/lib/torch/nn/mse_loss.rb +2 -2
- data/lib/torch/nn/multi_label_margin_loss.rb +13 -0
- data/lib/torch/nn/multi_label_soft_margin_loss.rb +13 -0
- data/lib/torch/nn/multi_margin_loss.rb +17 -0
- data/lib/torch/nn/nll_loss.rb +14 -0
- data/lib/torch/nn/pairwise_distance.rb +16 -0
- data/lib/torch/nn/parameter.rb +4 -0
- data/lib/torch/nn/poisson_nll_loss.rb +16 -0
- data/lib/torch/nn/prelu.rb +19 -0
- data/lib/torch/nn/relu.rb +8 -3
- data/lib/torch/nn/rnn.rb +22 -0
- data/lib/torch/nn/rnn_base.rb +154 -0
- data/lib/torch/nn/sequential.rb +1 -10
- data/lib/torch/nn/sigmoid.rb +9 -0
- data/lib/torch/nn/smooth_l1_loss.rb +13 -0
- data/lib/torch/nn/soft_margin_loss.rb +13 -0
- data/lib/torch/nn/softmax.rb +18 -0
- data/lib/torch/nn/softmax2d.rb +10 -0
- data/lib/torch/nn/softmin.rb +14 -0
- data/lib/torch/nn/softplus.rb +19 -0
- data/lib/torch/nn/triplet_margin_loss.rb +18 -0
- data/lib/torch/nn/weighted_loss.rb +10 -0
- data/lib/torch/optim/adadelta.rb +57 -0
- data/lib/torch/optim/adagrad.rb +71 -0
- data/lib/torch/optim/adam.rb +81 -0
- data/lib/torch/optim/adamax.rb +68 -0
- data/lib/torch/optim/adamw.rb +82 -0
- data/lib/torch/optim/asgd.rb +65 -0
- data/lib/torch/optim/lr_scheduler/lr_scheduler.rb +33 -0
- data/lib/torch/optim/lr_scheduler/step_lr.rb +17 -0
- data/lib/torch/optim/optimizer.rb +62 -0
- data/lib/torch/optim/rmsprop.rb +76 -0
- data/lib/torch/optim/rprop.rb +68 -0
- data/lib/torch/optim/sgd.rb +60 -0
- data/lib/torch/random.rb +10 -0
- data/lib/torch/tensor.rb +92 -21
- data/lib/torch/utils/data/data_loader.rb +15 -0
- data/lib/torch/utils/data/tensor_dataset.rb +8 -1
- data/lib/torch/version.rb +1 -1
- metadata +74 -3
data/lib/torch/nn/relu.rb
CHANGED
@@ -1,12 +1,17 @@
|
|
1
1
|
module Torch
|
2
2
|
module NN
|
3
3
|
class ReLU < Module
|
4
|
-
def initialize
|
5
|
-
|
4
|
+
def initialize(inplace: false)
|
5
|
+
super()
|
6
|
+
@inplace = inplace
|
6
7
|
end
|
7
8
|
|
8
9
|
def forward(input)
|
9
|
-
F.relu(input
|
10
|
+
F.relu(input, inplace: @inplace)
|
11
|
+
end
|
12
|
+
|
13
|
+
def extra_inspect
|
14
|
+
@inplace ? "inplace: true" : ""
|
10
15
|
end
|
11
16
|
end
|
12
17
|
end
|
data/lib/torch/nn/rnn.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class RNN < RNNBase
|
4
|
+
def initialize(*args, **options)
|
5
|
+
if options.key?(:nonlinearity)
|
6
|
+
if options[:nonlinearity] == "tanh"
|
7
|
+
mode = "RNN_TANH"
|
8
|
+
elsif options[:nonlinearity] == "relu"
|
9
|
+
mode = "RNN_RELU"
|
10
|
+
else
|
11
|
+
raise ArgumentError, "Unknown nonlinearity: #{options[:nonlinearity]}"
|
12
|
+
end
|
13
|
+
options.delete(:nonlinearity)
|
14
|
+
else
|
15
|
+
mode = "RNN_TANH"
|
16
|
+
end
|
17
|
+
|
18
|
+
super(mode, *args, **options)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class RNNBase < Module
|
4
|
+
def initialize(mode, input_size, hidden_size, num_layers: 1, bias: true,
|
5
|
+
batch_first: false, dropout: 0.0, bidirectional: false)
|
6
|
+
|
7
|
+
super()
|
8
|
+
@mode = mode
|
9
|
+
@input_size = input_size
|
10
|
+
@hidden_size = hidden_size
|
11
|
+
@num_layers = num_layers
|
12
|
+
@bias = bias
|
13
|
+
@batch_first = batch_first
|
14
|
+
@dropout = dropout.to_f
|
15
|
+
@bidirectional = bidirectional
|
16
|
+
num_directions = bidirectional ? 2 : 1
|
17
|
+
|
18
|
+
if !dropout.is_a?(Numeric) || !(dropout >= 0 && dropout <= 1)
|
19
|
+
raise ArgumentError, "dropout should be a number in range [0, 1] " +
|
20
|
+
"representing the probability of an element being " +
|
21
|
+
"zeroed"
|
22
|
+
end
|
23
|
+
if dropout > 0 && num_layers == 1
|
24
|
+
warn "dropout option adds dropout after all but last " +
|
25
|
+
"recurrent layer, so non-zero dropout expects " +
|
26
|
+
"num_layers greater than 1, but got dropout=#{dropout} and " +
|
27
|
+
"num_layers=#{num_layers}"
|
28
|
+
end
|
29
|
+
|
30
|
+
gate_size =
|
31
|
+
case mode
|
32
|
+
when "LSTM"
|
33
|
+
4 * hidden_size
|
34
|
+
when "GRU"
|
35
|
+
3 * hidden_size
|
36
|
+
when "RNN_TANH"
|
37
|
+
hidden_size
|
38
|
+
when "RNN_RELU"
|
39
|
+
hidden_size
|
40
|
+
else
|
41
|
+
raise ArgumentError, "Unrecognized RNN mode: #{mode}"
|
42
|
+
end
|
43
|
+
|
44
|
+
@all_weights = []
|
45
|
+
num_layers.times do |layer|
|
46
|
+
num_directions.times do |direction|
|
47
|
+
layer_input_size = layer == 0 ? input_size : hidden_size * num_directions
|
48
|
+
|
49
|
+
w_ih = Parameter.new(Torch::Tensor.new(gate_size, layer_input_size))
|
50
|
+
w_hh = Parameter.new(Torch::Tensor.new(gate_size, hidden_size))
|
51
|
+
b_ih = Parameter.new(Torch::Tensor.new(gate_size))
|
52
|
+
# Second bias vector included for CuDNN compatibility. Only one
|
53
|
+
# bias vector is needed in standard definition.
|
54
|
+
b_hh = Parameter.new(Torch::Tensor.new(gate_size))
|
55
|
+
layer_params = [w_ih, w_hh, b_ih, b_hh]
|
56
|
+
|
57
|
+
suffix = direction == 1 ? "_reverse" : ""
|
58
|
+
param_names = ["weight_ih_l%s%s", "weight_hh_l%s%s"]
|
59
|
+
if bias
|
60
|
+
param_names += ["bias_ih_l%s%s", "bias_hh_l%s%s"]
|
61
|
+
end
|
62
|
+
param_names.map! { |x| x % [layer, suffix] }
|
63
|
+
|
64
|
+
param_names.zip(layer_params) do |name, param|
|
65
|
+
instance_variable_set("@#{name}", param)
|
66
|
+
end
|
67
|
+
@all_weights << param_names
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
flatten_parameters
|
72
|
+
reset_parameters
|
73
|
+
end
|
74
|
+
|
75
|
+
def flatten_parameters
|
76
|
+
# no-op unless module is on the GPU and cuDNN is enabled
|
77
|
+
end
|
78
|
+
|
79
|
+
def _apply(fn)
|
80
|
+
ret = super
|
81
|
+
flatten_parameters
|
82
|
+
ret
|
83
|
+
end
|
84
|
+
|
85
|
+
def reset_parameters
|
86
|
+
stdv = 1.0 / Math.sqrt(@hidden_size)
|
87
|
+
parameters.each do |weight|
|
88
|
+
Init.uniform!(weight, a: -stdv, b: stdv)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def permute_hidden(hx, permutation)
|
93
|
+
raise NotImplementedYet
|
94
|
+
end
|
95
|
+
|
96
|
+
def forward(input, hx: nil)
|
97
|
+
raise NotImplementedYet
|
98
|
+
|
99
|
+
is_packed = false # TODO isinstance(input, PackedSequence)
|
100
|
+
if is_packed
|
101
|
+
input, batch_sizes, sorted_indices, unsorted_indices = input
|
102
|
+
max_batch_size = batch_sizes[0]
|
103
|
+
max_batch_size = max_batch_size.to_i
|
104
|
+
else
|
105
|
+
batch_sizes = nil
|
106
|
+
max_batch_size = @batch_first ? input.size(0) : input.size(1)
|
107
|
+
sorted_indices = nil
|
108
|
+
unsorted_indices = nil
|
109
|
+
end
|
110
|
+
|
111
|
+
if hx.nil?
|
112
|
+
num_directions = @bidirectional ? 2 : 1
|
113
|
+
hx = Torch.zeros(@num_layers * num_directions, max_batch_size,
|
114
|
+
@hidden_size, dtype: input.dtype, device: input.device)
|
115
|
+
else
|
116
|
+
# Each batch of the hidden state should match the input sequence that
|
117
|
+
# the user believes he/she is passing in.
|
118
|
+
hx = permute_hidden(hx, sorted_indices)
|
119
|
+
end
|
120
|
+
|
121
|
+
check_forward_args(input, hx, batch_sizes)
|
122
|
+
_rnn_impls = {
|
123
|
+
"RNN_TANH" => Torch.method(:_rnn_tanh),
|
124
|
+
"RNN_RELU" => Torch.method(:_rnn_relu)
|
125
|
+
}
|
126
|
+
_impl = _rnn_impls[@mode]
|
127
|
+
if batch_sizes.nil?
|
128
|
+
result = _impl.call(input, hx, _get_flat_weights, @bias, @num_layers,
|
129
|
+
@dropout, @training, @bidirectional, @batch_first)
|
130
|
+
else
|
131
|
+
result = _impl.call(input, batch_sizes, hx, _get_flat_weights, @bias,
|
132
|
+
@num_layers, @dropout, @training, @bidirectional)
|
133
|
+
end
|
134
|
+
output = result[0]
|
135
|
+
hidden = result[1]
|
136
|
+
|
137
|
+
if is_packed
|
138
|
+
raise NotImplementedYet
|
139
|
+
# output = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices)
|
140
|
+
end
|
141
|
+
[output, permute_hidden(hidden, unsorted_indices)]
|
142
|
+
end
|
143
|
+
|
144
|
+
# TODO add more parameters
|
145
|
+
def extra_inspect
|
146
|
+
s = String.new("%{input_size}, %{hidden_size}")
|
147
|
+
if @num_layers != 1
|
148
|
+
s += ", num_layers: %{num_layers}"
|
149
|
+
end
|
150
|
+
format(s, input_size: @input_size, hidden_size: @hidden_size, num_layers: @num_layers)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
data/lib/torch/nn/sequential.rb
CHANGED
@@ -2,28 +2,19 @@ module Torch
|
|
2
2
|
module NN
|
3
3
|
class Sequential < Module
|
4
4
|
def initialize(*args)
|
5
|
-
|
5
|
+
super()
|
6
6
|
# TODO support hash arg (named modules)
|
7
7
|
args.each_with_index do |mod, idx|
|
8
8
|
add_module(idx.to_s, mod)
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
-
def add_module(name, mod)
|
13
|
-
# TODO add checks
|
14
|
-
@modules[name] = mod
|
15
|
-
end
|
16
|
-
|
17
12
|
def forward(input)
|
18
13
|
@modules.values.each do |mod|
|
19
14
|
input = mod.call(input)
|
20
15
|
end
|
21
16
|
input
|
22
17
|
end
|
23
|
-
|
24
|
-
def parameters
|
25
|
-
@modules.flat_map { |_, mod| mod.parameters }
|
26
|
-
end
|
27
18
|
end
|
28
19
|
end
|
29
20
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class Softmax < Module
|
4
|
+
def initialize(dim: nil)
|
5
|
+
super()
|
6
|
+
@dim = dim
|
7
|
+
end
|
8
|
+
|
9
|
+
def forward(input)
|
10
|
+
F.softmax(input, dim: @dim)
|
11
|
+
end
|
12
|
+
|
13
|
+
def extra_inspect
|
14
|
+
format("dim: %s", @dim)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class Softplus < Module
|
4
|
+
def initialize(beta: 1, threshold: 20)
|
5
|
+
super()
|
6
|
+
@beta = beta
|
7
|
+
@threshold = threshold
|
8
|
+
end
|
9
|
+
|
10
|
+
def forward(input)
|
11
|
+
F.softplus(input, beta: @beta, threshold: @threshold)
|
12
|
+
end
|
13
|
+
|
14
|
+
def extra_inspect
|
15
|
+
format("beta: %s, threshold: %s", @beta, @threshold)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Torch
|
2
|
+
module NN
|
3
|
+
class TripletMarginLoss < Loss
|
4
|
+
def initialize(margin: 1.0, p: 2.0, eps: 1e-6, swap: false, reduction: "mean")
|
5
|
+
super(reduction)
|
6
|
+
@margin = margin
|
7
|
+
@p = p
|
8
|
+
@eps = eps
|
9
|
+
@swap = swap
|
10
|
+
end
|
11
|
+
|
12
|
+
def forward(anchor, positive, negative)
|
13
|
+
F.triplet_margin_loss(anchor, positive, negative, margin: @margin, p: @p,
|
14
|
+
eps: @eps, swap: @swap, reduction: @reduction)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# ported from https://github.com/pytorch/pytorch/blob/master/torch/optim/adadelta.py
|
2
|
+
module Torch
|
3
|
+
module Optim
|
4
|
+
class Adadelta < Optimizer
|
5
|
+
def initialize(params, lr: 1.0, rho: 0.9, eps: 1e-6, weight_decay: 0)
|
6
|
+
raise ArgumentError, "Invalid learning rate: #{lr}" if lr < 0
|
7
|
+
raise ArgumentError, "Invalid rho value: #{rho}" if rho < 0 || rho > 1
|
8
|
+
raise ArgumentError, "Invalid epsilon value: #{eps}" if eps < 0
|
9
|
+
raise ArgumentError, "Invalid weight_decay value: #{weight_decay}" if weight_decay < 0
|
10
|
+
|
11
|
+
defaults = {lr: lr, rho: rho, eps: eps, weight_decay: weight_decay}
|
12
|
+
super(params, defaults)
|
13
|
+
end
|
14
|
+
|
15
|
+
def step(closure = nil)
|
16
|
+
loss = nil
|
17
|
+
if closure
|
18
|
+
loss = closure.call
|
19
|
+
end
|
20
|
+
|
21
|
+
@param_groups.each do |group|
|
22
|
+
group[:params].each do |p|
|
23
|
+
next unless p.grad
|
24
|
+
grad = p.grad.data
|
25
|
+
if grad.sparse?
|
26
|
+
raise Error, "Adadelta does not support sparse gradients"
|
27
|
+
end
|
28
|
+
state = @state[p]
|
29
|
+
|
30
|
+
if state.size == 0
|
31
|
+
state[:step] = 0
|
32
|
+
state[:square_avg] = Torch.zeros_like(p.data)
|
33
|
+
state[:acc_delta] = Torch.zeros_like(p.data)
|
34
|
+
end
|
35
|
+
|
36
|
+
square_avg, acc_delta = state[:square_avg], state[:acc_delta]
|
37
|
+
rho, eps = group[:rho], group[:eps]
|
38
|
+
|
39
|
+
state[:step] += 1
|
40
|
+
|
41
|
+
if group[:weight_decay] != 0
|
42
|
+
grad = grad.add(group[:weight_decay], p.data)
|
43
|
+
end
|
44
|
+
|
45
|
+
square_avg.mul!(rho).addcmul!(1 - rho, grad, grad)
|
46
|
+
std = square_avg.add(eps).sqrt!
|
47
|
+
delta = acc_delta.add(eps).sqrt!.div!(std).mul!(grad)
|
48
|
+
p.data.add!(-group[:lr], delta)
|
49
|
+
acc_delta.mul!(rho).addcmul!(1 - rho, delta, delta)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
loss
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# ported from https://github.com/pytorch/pytorch/blob/master/torch/optim/adagrad.py
|
2
|
+
module Torch
|
3
|
+
module Optim
|
4
|
+
class Adagrad < Optimizer
|
5
|
+
def initialize(params, lr: 1e-2, lr_decay: 0, weight_decay: 0, initial_accumulator_value: 0, eps: 1e-10)
|
6
|
+
raise ArgumentError, "Invalid learning rate: #{lr}" if lr < 0
|
7
|
+
raise ArgumentError, "Invalid lr_decay value: #{lr_decay}" if lr_decay < 0
|
8
|
+
raise ArgumentError, "Invalid initial_accumulator_value value: #{initial_accumulator_value}" if initial_accumulator_value < 0
|
9
|
+
raise ArgumentError, "Invalid weight_decay value: #{weight_decay}" if weight_decay < 0
|
10
|
+
raise ArgumentError, "Invalid epsilon value: #{eps}" if eps < 0
|
11
|
+
|
12
|
+
defaults = {lr: lr, lr_decay: lr_decay, eps: eps, weight_decay: weight_decay, initial_accumulator_value: initial_accumulator_value}
|
13
|
+
super(params, defaults)
|
14
|
+
|
15
|
+
@param_groups.each do |group|
|
16
|
+
group[:params].each do |p|
|
17
|
+
state = @state[p]
|
18
|
+
state[:step] = 0
|
19
|
+
state[:sum] = Torch.full_like(p.data, initial_accumulator_value)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def share_memory
|
25
|
+
@param_groups.each do |group|
|
26
|
+
group[:params].each do |p|
|
27
|
+
state = @state[p]
|
28
|
+
state[:sum].share_memory!
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def step(closure = nil)
|
34
|
+
loss = nil
|
35
|
+
if closure
|
36
|
+
loss = closure.call
|
37
|
+
end
|
38
|
+
|
39
|
+
@param_groups.each do |group|
|
40
|
+
group[:params].each do |p|
|
41
|
+
next unless p.grad
|
42
|
+
|
43
|
+
grad = p.grad.data
|
44
|
+
state = @state[p]
|
45
|
+
|
46
|
+
state[:step] += 1
|
47
|
+
|
48
|
+
if group[:weight_decay] != 0
|
49
|
+
if p.grad.data.sparse?
|
50
|
+
raise Error, "weight_decay option is not compatible with sparse gradients"
|
51
|
+
end
|
52
|
+
grad = grad.add(group[:weight_decay], p.data)
|
53
|
+
end
|
54
|
+
|
55
|
+
clr = group[:lr] / (1 + (state[:step] - 1) * group[:lr_decay])
|
56
|
+
|
57
|
+
if grad.sparse?
|
58
|
+
raise NotImplementedYet
|
59
|
+
else
|
60
|
+
state[:sum].addcmul!(1, grad, grad)
|
61
|
+
std = state[:sum].sqrt.add!(group[:eps])
|
62
|
+
p.data.addcdiv!(-clr, grad, std)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
loss
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|