torch-rb 0.1.0 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +40 -0
  3. data/LICENSE.txt +46 -22
  4. data/README.md +85 -19
  5. data/ext/torch/ext.cpp +274 -256
  6. data/ext/torch/extconf.rb +9 -0
  7. data/ext/torch/nn_functions.cpp +595 -0
  8. data/ext/torch/nn_functions.hpp +6 -0
  9. data/ext/torch/templates.hpp +250 -0
  10. data/ext/torch/tensor_functions.cpp +1860 -0
  11. data/ext/torch/tensor_functions.hpp +6 -0
  12. data/ext/torch/torch_functions.cpp +2875 -0
  13. data/ext/torch/torch_functions.hpp +6 -0
  14. data/lib/torch.rb +199 -84
  15. data/lib/torch/ext.bundle +0 -0
  16. data/lib/torch/inspector.rb +52 -25
  17. data/lib/torch/native/dispatcher.rb +48 -0
  18. data/lib/torch/native/function.rb +78 -0
  19. data/lib/torch/native/generator.rb +149 -0
  20. data/lib/torch/native/native_functions.yaml +6837 -0
  21. data/lib/torch/native/parser.rb +97 -0
  22. data/lib/torch/nn/alpha_dropout.rb +9 -0
  23. data/lib/torch/nn/avg_pool2d.rb +14 -0
  24. data/lib/torch/nn/avg_poolnd.rb +9 -0
  25. data/lib/torch/nn/bce_loss.rb +13 -0
  26. data/lib/torch/nn/bce_with_logits_loss.rb +15 -0
  27. data/lib/torch/nn/bilinear.rb +38 -0
  28. data/lib/torch/nn/conv2d.rb +14 -29
  29. data/lib/torch/nn/convnd.rb +41 -0
  30. data/lib/torch/nn/cosine_embedding_loss.rb +14 -0
  31. data/lib/torch/nn/cosine_similarity.rb +15 -0
  32. data/lib/torch/nn/cross_entropy_loss.rb +14 -0
  33. data/lib/torch/nn/ctc_loss.rb +15 -0
  34. data/lib/torch/nn/dropout.rb +9 -0
  35. data/lib/torch/nn/dropout2d.rb +9 -0
  36. data/lib/torch/nn/dropout3d.rb +9 -0
  37. data/lib/torch/nn/dropoutnd.rb +15 -0
  38. data/lib/torch/nn/embedding.rb +52 -0
  39. data/lib/torch/nn/embedding_bag.rb +34 -0
  40. data/lib/torch/nn/feature_alpha_dropout.rb +9 -0
  41. data/lib/torch/nn/functional.rb +194 -11
  42. data/lib/torch/nn/hinge_embedding_loss.rb +14 -0
  43. data/lib/torch/nn/identity.rb +14 -0
  44. data/lib/torch/nn/init.rb +58 -1
  45. data/lib/torch/nn/kl_div_loss.rb +13 -0
  46. data/lib/torch/nn/l1_loss.rb +13 -0
  47. data/lib/torch/nn/leaky_relu.rb +20 -0
  48. data/lib/torch/nn/linear.rb +12 -11
  49. data/lib/torch/nn/log_softmax.rb +14 -0
  50. data/lib/torch/nn/loss.rb +10 -0
  51. data/lib/torch/nn/margin_ranking_loss.rb +14 -0
  52. data/lib/torch/nn/max_pool2d.rb +9 -0
  53. data/lib/torch/nn/max_poolnd.rb +19 -0
  54. data/lib/torch/nn/module.rb +184 -19
  55. data/lib/torch/nn/mse_loss.rb +2 -2
  56. data/lib/torch/nn/multi_label_margin_loss.rb +13 -0
  57. data/lib/torch/nn/multi_label_soft_margin_loss.rb +13 -0
  58. data/lib/torch/nn/multi_margin_loss.rb +17 -0
  59. data/lib/torch/nn/nll_loss.rb +14 -0
  60. data/lib/torch/nn/pairwise_distance.rb +16 -0
  61. data/lib/torch/nn/parameter.rb +4 -0
  62. data/lib/torch/nn/poisson_nll_loss.rb +16 -0
  63. data/lib/torch/nn/prelu.rb +19 -0
  64. data/lib/torch/nn/relu.rb +8 -3
  65. data/lib/torch/nn/rnn.rb +22 -0
  66. data/lib/torch/nn/rnn_base.rb +154 -0
  67. data/lib/torch/nn/sequential.rb +1 -10
  68. data/lib/torch/nn/sigmoid.rb +9 -0
  69. data/lib/torch/nn/smooth_l1_loss.rb +13 -0
  70. data/lib/torch/nn/soft_margin_loss.rb +13 -0
  71. data/lib/torch/nn/softmax.rb +18 -0
  72. data/lib/torch/nn/softmax2d.rb +10 -0
  73. data/lib/torch/nn/softmin.rb +14 -0
  74. data/lib/torch/nn/softplus.rb +19 -0
  75. data/lib/torch/nn/triplet_margin_loss.rb +18 -0
  76. data/lib/torch/nn/weighted_loss.rb +10 -0
  77. data/lib/torch/optim/adadelta.rb +57 -0
  78. data/lib/torch/optim/adagrad.rb +71 -0
  79. data/lib/torch/optim/adam.rb +81 -0
  80. data/lib/torch/optim/adamax.rb +68 -0
  81. data/lib/torch/optim/adamw.rb +82 -0
  82. data/lib/torch/optim/asgd.rb +65 -0
  83. data/lib/torch/optim/lr_scheduler/lr_scheduler.rb +33 -0
  84. data/lib/torch/optim/lr_scheduler/step_lr.rb +17 -0
  85. data/lib/torch/optim/optimizer.rb +62 -0
  86. data/lib/torch/optim/rmsprop.rb +76 -0
  87. data/lib/torch/optim/rprop.rb +68 -0
  88. data/lib/torch/optim/sgd.rb +60 -0
  89. data/lib/torch/random.rb +10 -0
  90. data/lib/torch/tensor.rb +92 -21
  91. data/lib/torch/utils/data/data_loader.rb +15 -0
  92. data/lib/torch/utils/data/tensor_dataset.rb +8 -1
  93. data/lib/torch/version.rb +1 -1
  94. metadata +74 -3
data/lib/torch/nn/relu.rb CHANGED
@@ -1,12 +1,17 @@
1
1
  module Torch
2
2
  module NN
3
3
  class ReLU < Module
4
- def initialize #(inplace: false)
5
- # @inplace = inplace
4
+ def initialize(inplace: false)
5
+ super()
6
+ @inplace = inplace
6
7
  end
7
8
 
8
9
  def forward(input)
9
- F.relu(input) #, inplace: @inplace)
10
+ F.relu(input, inplace: @inplace)
11
+ end
12
+
13
+ def extra_inspect
14
+ @inplace ? "inplace: true" : ""
10
15
  end
11
16
  end
12
17
  end
@@ -0,0 +1,22 @@
1
+ module Torch
2
+ module NN
3
+ class RNN < RNNBase
4
+ def initialize(*args, **options)
5
+ if options.key?(:nonlinearity)
6
+ if options[:nonlinearity] == "tanh"
7
+ mode = "RNN_TANH"
8
+ elsif options[:nonlinearity] == "relu"
9
+ mode = "RNN_RELU"
10
+ else
11
+ raise ArgumentError, "Unknown nonlinearity: #{options[:nonlinearity]}"
12
+ end
13
+ options.delete(:nonlinearity)
14
+ else
15
+ mode = "RNN_TANH"
16
+ end
17
+
18
+ super(mode, *args, **options)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,154 @@
1
+ module Torch
2
+ module NN
3
+ class RNNBase < Module
4
+ def initialize(mode, input_size, hidden_size, num_layers: 1, bias: true,
5
+ batch_first: false, dropout: 0.0, bidirectional: false)
6
+
7
+ super()
8
+ @mode = mode
9
+ @input_size = input_size
10
+ @hidden_size = hidden_size
11
+ @num_layers = num_layers
12
+ @bias = bias
13
+ @batch_first = batch_first
14
+ @dropout = dropout.to_f
15
+ @bidirectional = bidirectional
16
+ num_directions = bidirectional ? 2 : 1
17
+
18
+ if !dropout.is_a?(Numeric) || !(dropout >= 0 && dropout <= 1)
19
+ raise ArgumentError, "dropout should be a number in range [0, 1] " +
20
+ "representing the probability of an element being " +
21
+ "zeroed"
22
+ end
23
+ if dropout > 0 && num_layers == 1
24
+ warn "dropout option adds dropout after all but last " +
25
+ "recurrent layer, so non-zero dropout expects " +
26
+ "num_layers greater than 1, but got dropout=#{dropout} and " +
27
+ "num_layers=#{num_layers}"
28
+ end
29
+
30
+ gate_size =
31
+ case mode
32
+ when "LSTM"
33
+ 4 * hidden_size
34
+ when "GRU"
35
+ 3 * hidden_size
36
+ when "RNN_TANH"
37
+ hidden_size
38
+ when "RNN_RELU"
39
+ hidden_size
40
+ else
41
+ raise ArgumentError, "Unrecognized RNN mode: #{mode}"
42
+ end
43
+
44
+ @all_weights = []
45
+ num_layers.times do |layer|
46
+ num_directions.times do |direction|
47
+ layer_input_size = layer == 0 ? input_size : hidden_size * num_directions
48
+
49
+ w_ih = Parameter.new(Torch::Tensor.new(gate_size, layer_input_size))
50
+ w_hh = Parameter.new(Torch::Tensor.new(gate_size, hidden_size))
51
+ b_ih = Parameter.new(Torch::Tensor.new(gate_size))
52
+ # Second bias vector included for CuDNN compatibility. Only one
53
+ # bias vector is needed in standard definition.
54
+ b_hh = Parameter.new(Torch::Tensor.new(gate_size))
55
+ layer_params = [w_ih, w_hh, b_ih, b_hh]
56
+
57
+ suffix = direction == 1 ? "_reverse" : ""
58
+ param_names = ["weight_ih_l%s%s", "weight_hh_l%s%s"]
59
+ if bias
60
+ param_names += ["bias_ih_l%s%s", "bias_hh_l%s%s"]
61
+ end
62
+ param_names.map! { |x| x % [layer, suffix] }
63
+
64
+ param_names.zip(layer_params) do |name, param|
65
+ instance_variable_set("@#{name}", param)
66
+ end
67
+ @all_weights << param_names
68
+ end
69
+ end
70
+
71
+ flatten_parameters
72
+ reset_parameters
73
+ end
74
+
75
+ def flatten_parameters
76
+ # no-op unless module is on the GPU and cuDNN is enabled
77
+ end
78
+
79
+ def _apply(fn)
80
+ ret = super
81
+ flatten_parameters
82
+ ret
83
+ end
84
+
85
+ def reset_parameters
86
+ stdv = 1.0 / Math.sqrt(@hidden_size)
87
+ parameters.each do |weight|
88
+ Init.uniform!(weight, a: -stdv, b: stdv)
89
+ end
90
+ end
91
+
92
+ def permute_hidden(hx, permutation)
93
+ raise NotImplementedYet
94
+ end
95
+
96
+ def forward(input, hx: nil)
97
+ raise NotImplementedYet
98
+
99
+ is_packed = false # TODO isinstance(input, PackedSequence)
100
+ if is_packed
101
+ input, batch_sizes, sorted_indices, unsorted_indices = input
102
+ max_batch_size = batch_sizes[0]
103
+ max_batch_size = max_batch_size.to_i
104
+ else
105
+ batch_sizes = nil
106
+ max_batch_size = @batch_first ? input.size(0) : input.size(1)
107
+ sorted_indices = nil
108
+ unsorted_indices = nil
109
+ end
110
+
111
+ if hx.nil?
112
+ num_directions = @bidirectional ? 2 : 1
113
+ hx = Torch.zeros(@num_layers * num_directions, max_batch_size,
114
+ @hidden_size, dtype: input.dtype, device: input.device)
115
+ else
116
+ # Each batch of the hidden state should match the input sequence that
117
+ # the user believes he/she is passing in.
118
+ hx = permute_hidden(hx, sorted_indices)
119
+ end
120
+
121
+ check_forward_args(input, hx, batch_sizes)
122
+ _rnn_impls = {
123
+ "RNN_TANH" => Torch.method(:_rnn_tanh),
124
+ "RNN_RELU" => Torch.method(:_rnn_relu)
125
+ }
126
+ _impl = _rnn_impls[@mode]
127
+ if batch_sizes.nil?
128
+ result = _impl.call(input, hx, _get_flat_weights, @bias, @num_layers,
129
+ @dropout, @training, @bidirectional, @batch_first)
130
+ else
131
+ result = _impl.call(input, batch_sizes, hx, _get_flat_weights, @bias,
132
+ @num_layers, @dropout, @training, @bidirectional)
133
+ end
134
+ output = result[0]
135
+ hidden = result[1]
136
+
137
+ if is_packed
138
+ raise NotImplementedYet
139
+ # output = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices)
140
+ end
141
+ [output, permute_hidden(hidden, unsorted_indices)]
142
+ end
143
+
144
+ # TODO add more parameters
145
+ def extra_inspect
146
+ s = String.new("%{input_size}, %{hidden_size}")
147
+ if @num_layers != 1
148
+ s += ", num_layers: %{num_layers}"
149
+ end
150
+ format(s, input_size: @input_size, hidden_size: @hidden_size, num_layers: @num_layers)
151
+ end
152
+ end
153
+ end
154
+ end
@@ -2,28 +2,19 @@ module Torch
2
2
  module NN
3
3
  class Sequential < Module
4
4
  def initialize(*args)
5
- @modules = {}
5
+ super()
6
6
  # TODO support hash arg (named modules)
7
7
  args.each_with_index do |mod, idx|
8
8
  add_module(idx.to_s, mod)
9
9
  end
10
10
  end
11
11
 
12
- def add_module(name, mod)
13
- # TODO add checks
14
- @modules[name] = mod
15
- end
16
-
17
12
  def forward(input)
18
13
  @modules.values.each do |mod|
19
14
  input = mod.call(input)
20
15
  end
21
16
  input
22
17
  end
23
-
24
- def parameters
25
- @modules.flat_map { |_, mod| mod.parameters }
26
- end
27
18
  end
28
19
  end
29
20
  end
@@ -0,0 +1,9 @@
1
+ module Torch
2
+ module NN
3
+ class Sigmoid < Module
4
+ def forward(input)
5
+ Torch.sigmoid(input)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,13 @@
1
+ module Torch
2
+ module NN
3
+ class SmoothL1Loss < Loss
4
+ def initialize(reduction: "mean")
5
+ super(reduction)
6
+ end
7
+
8
+ def forward(input, target)
9
+ F.smooth_l1_loss(input, target, reduction: @reduction)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module Torch
2
+ module NN
3
+ class SoftMarginLoss < Loss
4
+ def initialize(reduction: "mean")
5
+ super(reduction)
6
+ end
7
+
8
+ def forward(input, target)
9
+ F.soft_margin_loss(input, target, reduction: @reduction)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,18 @@
1
+ module Torch
2
+ module NN
3
+ class Softmax < Module
4
+ def initialize(dim: nil)
5
+ super()
6
+ @dim = dim
7
+ end
8
+
9
+ def forward(input)
10
+ F.softmax(input, dim: @dim)
11
+ end
12
+
13
+ def extra_inspect
14
+ format("dim: %s", @dim)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ module Torch
2
+ module NN
3
+ class Softmax2d < Module
4
+ def forward(input)
5
+ raise ArgumentError, "Softmax2d requires a 4D tensor as input" unless input.dim == 4
6
+ F.softmax(input, dim: 1)
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,14 @@
1
+ module Torch
2
+ module NN
3
+ class Softmin < Module
4
+ def initialize(dim: nil)
5
+ super()
6
+ @dim = dim
7
+ end
8
+
9
+ def forward(input)
10
+ F.softmin(input, dim: @dim)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,19 @@
1
+ module Torch
2
+ module NN
3
+ class Softplus < Module
4
+ def initialize(beta: 1, threshold: 20)
5
+ super()
6
+ @beta = beta
7
+ @threshold = threshold
8
+ end
9
+
10
+ def forward(input)
11
+ F.softplus(input, beta: @beta, threshold: @threshold)
12
+ end
13
+
14
+ def extra_inspect
15
+ format("beta: %s, threshold: %s", @beta, @threshold)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,18 @@
1
+ module Torch
2
+ module NN
3
+ class TripletMarginLoss < Loss
4
+ def initialize(margin: 1.0, p: 2.0, eps: 1e-6, swap: false, reduction: "mean")
5
+ super(reduction)
6
+ @margin = margin
7
+ @p = p
8
+ @eps = eps
9
+ @swap = swap
10
+ end
11
+
12
+ def forward(anchor, positive, negative)
13
+ F.triplet_margin_loss(anchor, positive, negative, margin: @margin, p: @p,
14
+ eps: @eps, swap: @swap, reduction: @reduction)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ module Torch
2
+ module NN
3
+ class WeightedLoss < Loss
4
+ def initialize(weight, reduction)
5
+ super(reduction)
6
+ register_buffer("weight", weight)
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,57 @@
1
+ # ported from https://github.com/pytorch/pytorch/blob/master/torch/optim/adadelta.py
2
+ module Torch
3
+ module Optim
4
+ class Adadelta < Optimizer
5
+ def initialize(params, lr: 1.0, rho: 0.9, eps: 1e-6, weight_decay: 0)
6
+ raise ArgumentError, "Invalid learning rate: #{lr}" if lr < 0
7
+ raise ArgumentError, "Invalid rho value: #{rho}" if rho < 0 || rho > 1
8
+ raise ArgumentError, "Invalid epsilon value: #{eps}" if eps < 0
9
+ raise ArgumentError, "Invalid weight_decay value: #{weight_decay}" if weight_decay < 0
10
+
11
+ defaults = {lr: lr, rho: rho, eps: eps, weight_decay: weight_decay}
12
+ super(params, defaults)
13
+ end
14
+
15
+ def step(closure = nil)
16
+ loss = nil
17
+ if closure
18
+ loss = closure.call
19
+ end
20
+
21
+ @param_groups.each do |group|
22
+ group[:params].each do |p|
23
+ next unless p.grad
24
+ grad = p.grad.data
25
+ if grad.sparse?
26
+ raise Error, "Adadelta does not support sparse gradients"
27
+ end
28
+ state = @state[p]
29
+
30
+ if state.size == 0
31
+ state[:step] = 0
32
+ state[:square_avg] = Torch.zeros_like(p.data)
33
+ state[:acc_delta] = Torch.zeros_like(p.data)
34
+ end
35
+
36
+ square_avg, acc_delta = state[:square_avg], state[:acc_delta]
37
+ rho, eps = group[:rho], group[:eps]
38
+
39
+ state[:step] += 1
40
+
41
+ if group[:weight_decay] != 0
42
+ grad = grad.add(group[:weight_decay], p.data)
43
+ end
44
+
45
+ square_avg.mul!(rho).addcmul!(1 - rho, grad, grad)
46
+ std = square_avg.add(eps).sqrt!
47
+ delta = acc_delta.add(eps).sqrt!.div!(std).mul!(grad)
48
+ p.data.add!(-group[:lr], delta)
49
+ acc_delta.mul!(rho).addcmul!(1 - rho, delta, delta)
50
+ end
51
+ end
52
+
53
+ loss
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,71 @@
1
+ # ported from https://github.com/pytorch/pytorch/blob/master/torch/optim/adagrad.py
2
+ module Torch
3
+ module Optim
4
+ class Adagrad < Optimizer
5
+ def initialize(params, lr: 1e-2, lr_decay: 0, weight_decay: 0, initial_accumulator_value: 0, eps: 1e-10)
6
+ raise ArgumentError, "Invalid learning rate: #{lr}" if lr < 0
7
+ raise ArgumentError, "Invalid lr_decay value: #{lr_decay}" if lr_decay < 0
8
+ raise ArgumentError, "Invalid initial_accumulator_value value: #{initial_accumulator_value}" if initial_accumulator_value < 0
9
+ raise ArgumentError, "Invalid weight_decay value: #{weight_decay}" if weight_decay < 0
10
+ raise ArgumentError, "Invalid epsilon value: #{eps}" if eps < 0
11
+
12
+ defaults = {lr: lr, lr_decay: lr_decay, eps: eps, weight_decay: weight_decay, initial_accumulator_value: initial_accumulator_value}
13
+ super(params, defaults)
14
+
15
+ @param_groups.each do |group|
16
+ group[:params].each do |p|
17
+ state = @state[p]
18
+ state[:step] = 0
19
+ state[:sum] = Torch.full_like(p.data, initial_accumulator_value)
20
+ end
21
+ end
22
+ end
23
+
24
+ def share_memory
25
+ @param_groups.each do |group|
26
+ group[:params].each do |p|
27
+ state = @state[p]
28
+ state[:sum].share_memory!
29
+ end
30
+ end
31
+ end
32
+
33
+ def step(closure = nil)
34
+ loss = nil
35
+ if closure
36
+ loss = closure.call
37
+ end
38
+
39
+ @param_groups.each do |group|
40
+ group[:params].each do |p|
41
+ next unless p.grad
42
+
43
+ grad = p.grad.data
44
+ state = @state[p]
45
+
46
+ state[:step] += 1
47
+
48
+ if group[:weight_decay] != 0
49
+ if p.grad.data.sparse?
50
+ raise Error, "weight_decay option is not compatible with sparse gradients"
51
+ end
52
+ grad = grad.add(group[:weight_decay], p.data)
53
+ end
54
+
55
+ clr = group[:lr] / (1 + (state[:step] - 1) * group[:lr_decay])
56
+
57
+ if grad.sparse?
58
+ raise NotImplementedYet
59
+ else
60
+ state[:sum].addcmul!(1, grad, grad)
61
+ std = state[:sum].sqrt.add!(group[:eps])
62
+ p.data.addcdiv!(-clr, grad, std)
63
+ end
64
+ end
65
+ end
66
+
67
+ loss
68
+ end
69
+ end
70
+ end
71
+ end