torch-rb 0.1.0 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +40 -0
  3. data/LICENSE.txt +46 -22
  4. data/README.md +85 -19
  5. data/ext/torch/ext.cpp +274 -256
  6. data/ext/torch/extconf.rb +9 -0
  7. data/ext/torch/nn_functions.cpp +595 -0
  8. data/ext/torch/nn_functions.hpp +6 -0
  9. data/ext/torch/templates.hpp +250 -0
  10. data/ext/torch/tensor_functions.cpp +1860 -0
  11. data/ext/torch/tensor_functions.hpp +6 -0
  12. data/ext/torch/torch_functions.cpp +2875 -0
  13. data/ext/torch/torch_functions.hpp +6 -0
  14. data/lib/torch.rb +199 -84
  15. data/lib/torch/ext.bundle +0 -0
  16. data/lib/torch/inspector.rb +52 -25
  17. data/lib/torch/native/dispatcher.rb +48 -0
  18. data/lib/torch/native/function.rb +78 -0
  19. data/lib/torch/native/generator.rb +149 -0
  20. data/lib/torch/native/native_functions.yaml +6837 -0
  21. data/lib/torch/native/parser.rb +97 -0
  22. data/lib/torch/nn/alpha_dropout.rb +9 -0
  23. data/lib/torch/nn/avg_pool2d.rb +14 -0
  24. data/lib/torch/nn/avg_poolnd.rb +9 -0
  25. data/lib/torch/nn/bce_loss.rb +13 -0
  26. data/lib/torch/nn/bce_with_logits_loss.rb +15 -0
  27. data/lib/torch/nn/bilinear.rb +38 -0
  28. data/lib/torch/nn/conv2d.rb +14 -29
  29. data/lib/torch/nn/convnd.rb +41 -0
  30. data/lib/torch/nn/cosine_embedding_loss.rb +14 -0
  31. data/lib/torch/nn/cosine_similarity.rb +15 -0
  32. data/lib/torch/nn/cross_entropy_loss.rb +14 -0
  33. data/lib/torch/nn/ctc_loss.rb +15 -0
  34. data/lib/torch/nn/dropout.rb +9 -0
  35. data/lib/torch/nn/dropout2d.rb +9 -0
  36. data/lib/torch/nn/dropout3d.rb +9 -0
  37. data/lib/torch/nn/dropoutnd.rb +15 -0
  38. data/lib/torch/nn/embedding.rb +52 -0
  39. data/lib/torch/nn/embedding_bag.rb +34 -0
  40. data/lib/torch/nn/feature_alpha_dropout.rb +9 -0
  41. data/lib/torch/nn/functional.rb +194 -11
  42. data/lib/torch/nn/hinge_embedding_loss.rb +14 -0
  43. data/lib/torch/nn/identity.rb +14 -0
  44. data/lib/torch/nn/init.rb +58 -1
  45. data/lib/torch/nn/kl_div_loss.rb +13 -0
  46. data/lib/torch/nn/l1_loss.rb +13 -0
  47. data/lib/torch/nn/leaky_relu.rb +20 -0
  48. data/lib/torch/nn/linear.rb +12 -11
  49. data/lib/torch/nn/log_softmax.rb +14 -0
  50. data/lib/torch/nn/loss.rb +10 -0
  51. data/lib/torch/nn/margin_ranking_loss.rb +14 -0
  52. data/lib/torch/nn/max_pool2d.rb +9 -0
  53. data/lib/torch/nn/max_poolnd.rb +19 -0
  54. data/lib/torch/nn/module.rb +184 -19
  55. data/lib/torch/nn/mse_loss.rb +2 -2
  56. data/lib/torch/nn/multi_label_margin_loss.rb +13 -0
  57. data/lib/torch/nn/multi_label_soft_margin_loss.rb +13 -0
  58. data/lib/torch/nn/multi_margin_loss.rb +17 -0
  59. data/lib/torch/nn/nll_loss.rb +14 -0
  60. data/lib/torch/nn/pairwise_distance.rb +16 -0
  61. data/lib/torch/nn/parameter.rb +4 -0
  62. data/lib/torch/nn/poisson_nll_loss.rb +16 -0
  63. data/lib/torch/nn/prelu.rb +19 -0
  64. data/lib/torch/nn/relu.rb +8 -3
  65. data/lib/torch/nn/rnn.rb +22 -0
  66. data/lib/torch/nn/rnn_base.rb +154 -0
  67. data/lib/torch/nn/sequential.rb +1 -10
  68. data/lib/torch/nn/sigmoid.rb +9 -0
  69. data/lib/torch/nn/smooth_l1_loss.rb +13 -0
  70. data/lib/torch/nn/soft_margin_loss.rb +13 -0
  71. data/lib/torch/nn/softmax.rb +18 -0
  72. data/lib/torch/nn/softmax2d.rb +10 -0
  73. data/lib/torch/nn/softmin.rb +14 -0
  74. data/lib/torch/nn/softplus.rb +19 -0
  75. data/lib/torch/nn/triplet_margin_loss.rb +18 -0
  76. data/lib/torch/nn/weighted_loss.rb +10 -0
  77. data/lib/torch/optim/adadelta.rb +57 -0
  78. data/lib/torch/optim/adagrad.rb +71 -0
  79. data/lib/torch/optim/adam.rb +81 -0
  80. data/lib/torch/optim/adamax.rb +68 -0
  81. data/lib/torch/optim/adamw.rb +82 -0
  82. data/lib/torch/optim/asgd.rb +65 -0
  83. data/lib/torch/optim/lr_scheduler/lr_scheduler.rb +33 -0
  84. data/lib/torch/optim/lr_scheduler/step_lr.rb +17 -0
  85. data/lib/torch/optim/optimizer.rb +62 -0
  86. data/lib/torch/optim/rmsprop.rb +76 -0
  87. data/lib/torch/optim/rprop.rb +68 -0
  88. data/lib/torch/optim/sgd.rb +60 -0
  89. data/lib/torch/random.rb +10 -0
  90. data/lib/torch/tensor.rb +92 -21
  91. data/lib/torch/utils/data/data_loader.rb +15 -0
  92. data/lib/torch/utils/data/tensor_dataset.rb +8 -1
  93. data/lib/torch/version.rb +1 -1
  94. metadata +74 -3
data/lib/torch/nn/relu.rb CHANGED
@@ -1,12 +1,17 @@
1
1
  module Torch
2
2
  module NN
3
3
  class ReLU < Module
4
- def initialize #(inplace: false)
5
- # @inplace = inplace
4
+ def initialize(inplace: false)
5
+ super()
6
+ @inplace = inplace
6
7
  end
7
8
 
8
9
  def forward(input)
9
- F.relu(input) #, inplace: @inplace)
10
+ F.relu(input, inplace: @inplace)
11
+ end
12
+
13
+ def extra_inspect
14
+ @inplace ? "inplace: true" : ""
10
15
  end
11
16
  end
12
17
  end
@@ -0,0 +1,22 @@
1
+ module Torch
2
+ module NN
3
+ class RNN < RNNBase
4
+ def initialize(*args, **options)
5
+ if options.key?(:nonlinearity)
6
+ if options[:nonlinearity] == "tanh"
7
+ mode = "RNN_TANH"
8
+ elsif options[:nonlinearity] == "relu"
9
+ mode = "RNN_RELU"
10
+ else
11
+ raise ArgumentError, "Unknown nonlinearity: #{options[:nonlinearity]}"
12
+ end
13
+ options.delete(:nonlinearity)
14
+ else
15
+ mode = "RNN_TANH"
16
+ end
17
+
18
+ super(mode, *args, **options)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,154 @@
1
+ module Torch
2
+ module NN
3
+ class RNNBase < Module
4
+ def initialize(mode, input_size, hidden_size, num_layers: 1, bias: true,
5
+ batch_first: false, dropout: 0.0, bidirectional: false)
6
+
7
+ super()
8
+ @mode = mode
9
+ @input_size = input_size
10
+ @hidden_size = hidden_size
11
+ @num_layers = num_layers
12
+ @bias = bias
13
+ @batch_first = batch_first
14
+ @dropout = dropout.to_f
15
+ @bidirectional = bidirectional
16
+ num_directions = bidirectional ? 2 : 1
17
+
18
+ if !dropout.is_a?(Numeric) || !(dropout >= 0 && dropout <= 1)
19
+ raise ArgumentError, "dropout should be a number in range [0, 1] " +
20
+ "representing the probability of an element being " +
21
+ "zeroed"
22
+ end
23
+ if dropout > 0 && num_layers == 1
24
+ warn "dropout option adds dropout after all but last " +
25
+ "recurrent layer, so non-zero dropout expects " +
26
+ "num_layers greater than 1, but got dropout=#{dropout} and " +
27
+ "num_layers=#{num_layers}"
28
+ end
29
+
30
+ gate_size =
31
+ case mode
32
+ when "LSTM"
33
+ 4 * hidden_size
34
+ when "GRU"
35
+ 3 * hidden_size
36
+ when "RNN_TANH"
37
+ hidden_size
38
+ when "RNN_RELU"
39
+ hidden_size
40
+ else
41
+ raise ArgumentError, "Unrecognized RNN mode: #{mode}"
42
+ end
43
+
44
+ @all_weights = []
45
+ num_layers.times do |layer|
46
+ num_directions.times do |direction|
47
+ layer_input_size = layer == 0 ? input_size : hidden_size * num_directions
48
+
49
+ w_ih = Parameter.new(Torch::Tensor.new(gate_size, layer_input_size))
50
+ w_hh = Parameter.new(Torch::Tensor.new(gate_size, hidden_size))
51
+ b_ih = Parameter.new(Torch::Tensor.new(gate_size))
52
+ # Second bias vector included for CuDNN compatibility. Only one
53
+ # bias vector is needed in standard definition.
54
+ b_hh = Parameter.new(Torch::Tensor.new(gate_size))
55
+ layer_params = [w_ih, w_hh, b_ih, b_hh]
56
+
57
+ suffix = direction == 1 ? "_reverse" : ""
58
+ param_names = ["weight_ih_l%s%s", "weight_hh_l%s%s"]
59
+ if bias
60
+ param_names += ["bias_ih_l%s%s", "bias_hh_l%s%s"]
61
+ end
62
+ param_names.map! { |x| x % [layer, suffix] }
63
+
64
+ param_names.zip(layer_params) do |name, param|
65
+ instance_variable_set("@#{name}", param)
66
+ end
67
+ @all_weights << param_names
68
+ end
69
+ end
70
+
71
+ flatten_parameters
72
+ reset_parameters
73
+ end
74
+
75
+ def flatten_parameters
76
+ # no-op unless module is on the GPU and cuDNN is enabled
77
+ end
78
+
79
+ def _apply(fn)
80
+ ret = super
81
+ flatten_parameters
82
+ ret
83
+ end
84
+
85
+ def reset_parameters
86
+ stdv = 1.0 / Math.sqrt(@hidden_size)
87
+ parameters.each do |weight|
88
+ Init.uniform!(weight, a: -stdv, b: stdv)
89
+ end
90
+ end
91
+
92
+ def permute_hidden(hx, permutation)
93
+ raise NotImplementedYet
94
+ end
95
+
96
+ def forward(input, hx: nil)
97
+ raise NotImplementedYet
98
+
99
+ is_packed = false # TODO isinstance(input, PackedSequence)
100
+ if is_packed
101
+ input, batch_sizes, sorted_indices, unsorted_indices = input
102
+ max_batch_size = batch_sizes[0]
103
+ max_batch_size = max_batch_size.to_i
104
+ else
105
+ batch_sizes = nil
106
+ max_batch_size = @batch_first ? input.size(0) : input.size(1)
107
+ sorted_indices = nil
108
+ unsorted_indices = nil
109
+ end
110
+
111
+ if hx.nil?
112
+ num_directions = @bidirectional ? 2 : 1
113
+ hx = Torch.zeros(@num_layers * num_directions, max_batch_size,
114
+ @hidden_size, dtype: input.dtype, device: input.device)
115
+ else
116
+ # Each batch of the hidden state should match the input sequence that
117
+ # the user believes he/she is passing in.
118
+ hx = permute_hidden(hx, sorted_indices)
119
+ end
120
+
121
+ check_forward_args(input, hx, batch_sizes)
122
+ _rnn_impls = {
123
+ "RNN_TANH" => Torch.method(:_rnn_tanh),
124
+ "RNN_RELU" => Torch.method(:_rnn_relu)
125
+ }
126
+ _impl = _rnn_impls[@mode]
127
+ if batch_sizes.nil?
128
+ result = _impl.call(input, hx, _get_flat_weights, @bias, @num_layers,
129
+ @dropout, @training, @bidirectional, @batch_first)
130
+ else
131
+ result = _impl.call(input, batch_sizes, hx, _get_flat_weights, @bias,
132
+ @num_layers, @dropout, @training, @bidirectional)
133
+ end
134
+ output = result[0]
135
+ hidden = result[1]
136
+
137
+ if is_packed
138
+ raise NotImplementedYet
139
+ # output = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices)
140
+ end
141
+ [output, permute_hidden(hidden, unsorted_indices)]
142
+ end
143
+
144
+ # TODO add more parameters
145
+ def extra_inspect
146
+ s = String.new("%{input_size}, %{hidden_size}")
147
+ if @num_layers != 1
148
+ s += ", num_layers: %{num_layers}"
149
+ end
150
+ format(s, input_size: @input_size, hidden_size: @hidden_size, num_layers: @num_layers)
151
+ end
152
+ end
153
+ end
154
+ end
@@ -2,28 +2,19 @@ module Torch
2
2
  module NN
3
3
  class Sequential < Module
4
4
  def initialize(*args)
5
- @modules = {}
5
+ super()
6
6
  # TODO support hash arg (named modules)
7
7
  args.each_with_index do |mod, idx|
8
8
  add_module(idx.to_s, mod)
9
9
  end
10
10
  end
11
11
 
12
- def add_module(name, mod)
13
- # TODO add checks
14
- @modules[name] = mod
15
- end
16
-
17
12
  def forward(input)
18
13
  @modules.values.each do |mod|
19
14
  input = mod.call(input)
20
15
  end
21
16
  input
22
17
  end
23
-
24
- def parameters
25
- @modules.flat_map { |_, mod| mod.parameters }
26
- end
27
18
  end
28
19
  end
29
20
  end
@@ -0,0 +1,9 @@
1
+ module Torch
2
+ module NN
3
+ class Sigmoid < Module
4
+ def forward(input)
5
+ Torch.sigmoid(input)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,13 @@
1
+ module Torch
2
+ module NN
3
+ class SmoothL1Loss < Loss
4
+ def initialize(reduction: "mean")
5
+ super(reduction)
6
+ end
7
+
8
+ def forward(input, target)
9
+ F.smooth_l1_loss(input, target, reduction: @reduction)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module Torch
2
+ module NN
3
+ class SoftMarginLoss < Loss
4
+ def initialize(reduction: "mean")
5
+ super(reduction)
6
+ end
7
+
8
+ def forward(input, target)
9
+ F.soft_margin_loss(input, target, reduction: @reduction)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,18 @@
1
+ module Torch
2
+ module NN
3
+ class Softmax < Module
4
+ def initialize(dim: nil)
5
+ super()
6
+ @dim = dim
7
+ end
8
+
9
+ def forward(input)
10
+ F.softmax(input, dim: @dim)
11
+ end
12
+
13
+ def extra_inspect
14
+ format("dim: %s", @dim)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ module Torch
2
+ module NN
3
+ class Softmax2d < Module
4
+ def forward(input)
5
+ raise ArgumentError, "Softmax2d requires a 4D tensor as input" unless input.dim == 4
6
+ F.softmax(input, dim: 1)
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,14 @@
1
+ module Torch
2
+ module NN
3
+ class Softmin < Module
4
+ def initialize(dim: nil)
5
+ super()
6
+ @dim = dim
7
+ end
8
+
9
+ def forward(input)
10
+ F.softmin(input, dim: @dim)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,19 @@
1
+ module Torch
2
+ module NN
3
+ class Softplus < Module
4
+ def initialize(beta: 1, threshold: 20)
5
+ super()
6
+ @beta = beta
7
+ @threshold = threshold
8
+ end
9
+
10
+ def forward(input)
11
+ F.softplus(input, beta: @beta, threshold: @threshold)
12
+ end
13
+
14
+ def extra_inspect
15
+ format("beta: %s, threshold: %s", @beta, @threshold)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,18 @@
1
+ module Torch
2
+ module NN
3
+ class TripletMarginLoss < Loss
4
+ def initialize(margin: 1.0, p: 2.0, eps: 1e-6, swap: false, reduction: "mean")
5
+ super(reduction)
6
+ @margin = margin
7
+ @p = p
8
+ @eps = eps
9
+ @swap = swap
10
+ end
11
+
12
+ def forward(anchor, positive, negative)
13
+ F.triplet_margin_loss(anchor, positive, negative, margin: @margin, p: @p,
14
+ eps: @eps, swap: @swap, reduction: @reduction)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ module Torch
2
+ module NN
3
+ class WeightedLoss < Loss
4
+ def initialize(weight, reduction)
5
+ super(reduction)
6
+ register_buffer("weight", weight)
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,57 @@
1
+ # ported from https://github.com/pytorch/pytorch/blob/master/torch/optim/adadelta.py
2
+ module Torch
3
+ module Optim
4
+ class Adadelta < Optimizer
5
+ def initialize(params, lr: 1.0, rho: 0.9, eps: 1e-6, weight_decay: 0)
6
+ raise ArgumentError, "Invalid learning rate: #{lr}" if lr < 0
7
+ raise ArgumentError, "Invalid rho value: #{rho}" if rho < 0 || rho > 1
8
+ raise ArgumentError, "Invalid epsilon value: #{eps}" if eps < 0
9
+ raise ArgumentError, "Invalid weight_decay value: #{weight_decay}" if weight_decay < 0
10
+
11
+ defaults = {lr: lr, rho: rho, eps: eps, weight_decay: weight_decay}
12
+ super(params, defaults)
13
+ end
14
+
15
+ def step(closure = nil)
16
+ loss = nil
17
+ if closure
18
+ loss = closure.call
19
+ end
20
+
21
+ @param_groups.each do |group|
22
+ group[:params].each do |p|
23
+ next unless p.grad
24
+ grad = p.grad.data
25
+ if grad.sparse?
26
+ raise Error, "Adadelta does not support sparse gradients"
27
+ end
28
+ state = @state[p]
29
+
30
+ if state.size == 0
31
+ state[:step] = 0
32
+ state[:square_avg] = Torch.zeros_like(p.data)
33
+ state[:acc_delta] = Torch.zeros_like(p.data)
34
+ end
35
+
36
+ square_avg, acc_delta = state[:square_avg], state[:acc_delta]
37
+ rho, eps = group[:rho], group[:eps]
38
+
39
+ state[:step] += 1
40
+
41
+ if group[:weight_decay] != 0
42
+ grad = grad.add(group[:weight_decay], p.data)
43
+ end
44
+
45
+ square_avg.mul!(rho).addcmul!(1 - rho, grad, grad)
46
+ std = square_avg.add(eps).sqrt!
47
+ delta = acc_delta.add(eps).sqrt!.div!(std).mul!(grad)
48
+ p.data.add!(-group[:lr], delta)
49
+ acc_delta.mul!(rho).addcmul!(1 - rho, delta, delta)
50
+ end
51
+ end
52
+
53
+ loss
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,71 @@
1
+ # ported from https://github.com/pytorch/pytorch/blob/master/torch/optim/adagrad.py
2
+ module Torch
3
+ module Optim
4
+ class Adagrad < Optimizer
5
+ def initialize(params, lr: 1e-2, lr_decay: 0, weight_decay: 0, initial_accumulator_value: 0, eps: 1e-10)
6
+ raise ArgumentError, "Invalid learning rate: #{lr}" if lr < 0
7
+ raise ArgumentError, "Invalid lr_decay value: #{lr_decay}" if lr_decay < 0
8
+ raise ArgumentError, "Invalid initial_accumulator_value value: #{initial_accumulator_value}" if initial_accumulator_value < 0
9
+ raise ArgumentError, "Invalid weight_decay value: #{weight_decay}" if weight_decay < 0
10
+ raise ArgumentError, "Invalid epsilon value: #{eps}" if eps < 0
11
+
12
+ defaults = {lr: lr, lr_decay: lr_decay, eps: eps, weight_decay: weight_decay, initial_accumulator_value: initial_accumulator_value}
13
+ super(params, defaults)
14
+
15
+ @param_groups.each do |group|
16
+ group[:params].each do |p|
17
+ state = @state[p]
18
+ state[:step] = 0
19
+ state[:sum] = Torch.full_like(p.data, initial_accumulator_value)
20
+ end
21
+ end
22
+ end
23
+
24
+ def share_memory
25
+ @param_groups.each do |group|
26
+ group[:params].each do |p|
27
+ state = @state[p]
28
+ state[:sum].share_memory!
29
+ end
30
+ end
31
+ end
32
+
33
+ def step(closure = nil)
34
+ loss = nil
35
+ if closure
36
+ loss = closure.call
37
+ end
38
+
39
+ @param_groups.each do |group|
40
+ group[:params].each do |p|
41
+ next unless p.grad
42
+
43
+ grad = p.grad.data
44
+ state = @state[p]
45
+
46
+ state[:step] += 1
47
+
48
+ if group[:weight_decay] != 0
49
+ if p.grad.data.sparse?
50
+ raise Error, "weight_decay option is not compatible with sparse gradients"
51
+ end
52
+ grad = grad.add(group[:weight_decay], p.data)
53
+ end
54
+
55
+ clr = group[:lr] / (1 + (state[:step] - 1) * group[:lr_decay])
56
+
57
+ if grad.sparse?
58
+ raise NotImplementedYet
59
+ else
60
+ state[:sum].addcmul!(1, grad, grad)
61
+ std = state[:sum].sqrt.add!(group[:eps])
62
+ p.data.addcdiv!(-clr, grad, std)
63
+ end
64
+ end
65
+ end
66
+
67
+ loss
68
+ end
69
+ end
70
+ end
71
+ end