RubyGems - torchvision - Versions diffs - 0.1.0 → 0.2.1 - Mend

torchvision 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +35 -0
data/LICENSE.txt +1 -1
data/README.md +133 -5
data/lib/torchvision.rb +40 -1
data/lib/torchvision/datasets/cifar10.rb +117 -0
data/lib/torchvision/datasets/cifar100.rb +41 -0
data/lib/torchvision/datasets/dataset_folder.rb +91 -0
data/lib/torchvision/datasets/fashion_mnist.rb +30 -0
data/lib/torchvision/datasets/image_folder.rb +12 -0
data/lib/torchvision/datasets/kmnist.rb +30 -0
data/lib/torchvision/datasets/mnist.rb +47 -76
data/lib/torchvision/datasets/vision_dataset.rb +67 -0
data/lib/torchvision/models/alexnet.rb +42 -0
data/lib/torchvision/models/basic_block.rb +46 -0
data/lib/torchvision/models/bottleneck.rb +47 -0
data/lib/torchvision/models/resnet.rb +129 -0
data/lib/torchvision/models/resnet101.rb +9 -0
data/lib/torchvision/models/resnet152.rb +9 -0
data/lib/torchvision/models/resnet18.rb +9 -0
data/lib/torchvision/models/resnet34.rb +9 -0
data/lib/torchvision/models/resnet50.rb +9 -0
data/lib/torchvision/models/resnext101_32x8d.rb +11 -0
data/lib/torchvision/models/resnext50_32x4d.rb +11 -0
data/lib/torchvision/models/vgg.rb +93 -0
data/lib/torchvision/models/vgg11.rb +9 -0
data/lib/torchvision/models/vgg11_bn.rb +9 -0
data/lib/torchvision/models/vgg13.rb +9 -0
data/lib/torchvision/models/vgg13_bn.rb +9 -0
data/lib/torchvision/models/vgg16.rb +9 -0
data/lib/torchvision/models/vgg16_bn.rb +9 -0
data/lib/torchvision/models/vgg19.rb +9 -0
data/lib/torchvision/models/vgg19_bn.rb +9 -0
data/lib/torchvision/models/wide_resnet101_2.rb +10 -0
data/lib/torchvision/models/wide_resnet50_2.rb +10 -0
data/lib/torchvision/transforms/center_crop.rb +13 -0
data/lib/torchvision/transforms/compose.rb +2 -2
data/lib/torchvision/transforms/functional.rb +142 -7
data/lib/torchvision/transforms/normalize.rb +2 -2
data/lib/torchvision/transforms/random_horizontal_flip.rb +18 -0
data/lib/torchvision/transforms/random_resized_crop.rb +70 -0
data/lib/torchvision/transforms/random_vertical_flip.rb +18 -0
data/lib/torchvision/transforms/resize.rb +13 -0
data/lib/torchvision/transforms/to_tensor.rb +2 -2
data/lib/torchvision/utils.rb +120 -0
data/lib/torchvision/version.rb +1 -1
metadata +50 -57

data/lib/torchvision/models/vgg16.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module VGG16
+      def self.new(**kwargs)
+        VGG.make_model("vgg16", "D", false, **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/vgg16_bn.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module VGG16BN
+      def self.new(**kwargs)
+        VGG.make_model("vgg16_bn", "D", true, **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/vgg19.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module VGG19
+      def self.new(**kwargs)
+        VGG.make_model("vgg19", "E", false, **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/vgg19_bn.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module VGG19BN
+      def self.new(**kwargs)
+        VGG.make_model("vgg19_bn", "E", true, **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/wide_resnet101_2.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module TorchVision
+  module Models
+    module WideResNet101_2
+      def self.new(**kwargs)
+        kwargs[:width_per_group] = 64 * 2
+        ResNet.make_model("wide_resnet101_2", Bottleneck, [3, 4, 23, 3], **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/wide_resnet50_2.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module TorchVision
+  module Models
+    module WideResNet50_2
+      def self.new(**kwargs)
+        kwargs[:width_per_group] = 64 * 2
+        ResNet.make_model("wide_resnet50_2", Bottleneck, [3, 4, 6, 3], **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/transforms/center_crop.rb ADDED Viewed

@@ -0,0 +1,13 @@
+module TorchVision
+  module Transforms
+    class CenterCrop < Torch::NN::Module
+      def initialize(size)
+        @size = size
+      end
+      def forward(img)
+        F.center_crop(img, @size)
+      end
+    end
+  end
+end

data/lib/torchvision/transforms/compose.rb CHANGED Viewed

@@ -1,11 +1,11 @@
 module TorchVision
   module Transforms
-    class Compose
+    class Compose < Torch::NN::Module
       def initialize(transforms)
         @transforms = transforms
       end
-      def call(img)
+      def forward(img)
         @transforms.each do |t|
           img = t.call(img)
         end

data/lib/torchvision/transforms/functional.rb CHANGED Viewed

@@ -22,19 +22,154 @@ module TorchVision
           if std.to_a.any? { |v| v == 0 }
             raise ArgumentError, "std evaluated to zero after conversion to #{dtype}, leading to division by zero."
           end
-          # if mean.ndim == 1
-          #   raise Torch::NotImplementedYet
-          # end
-          # if std.ndim == 1
-          #   raise Torch::NotImplementedYet
-          # end
+          if mean.ndim == 1
+            mean = mean[0...mean.size(0), nil, nil]
+          end
+          if std.ndim == 1
+            std = std[0...std.size(0), nil, nil]
+          end
           tensor.sub!(mean).div!(std)
           tensor
         end
+        def resize(img, size)
+          raise "img should be Vips::Image. Got #{img.class.name}" unless img.is_a?(Vips::Image)
+          if size.is_a?(Integer)
+            w, h = img.size
+            if (w <= h && w == size) || (h <= w && h == size)
+              return img
+            end
+            if w < h
+              ow = size
+              oh = (size * h / w).to_i
+              img.thumbnail_image(ow, height: oh)
+            else
+              oh = size
+              ow = (size * w / h).to_i
+              img.thumbnail_image(ow, height: oh)
+            end
+          else
+            img.thumbnail_image(size[0], height: size[1], size: :force)
+          end
+        end
         # TODO improve
         def to_tensor(pic)
-          Torch.tensor(pic.get_pixels, dtype: :float)
+          if !pic.is_a?(Numo::NArray) && !pic.is_a?(Vips::Image)
+            raise ArgumentError, "pic should be Vips::Image or Numo::NArray. Got #{pic.class.name}"
+          end
+          if pic.is_a?(Numo::NArray) && ![2, 3].include?(pic.ndim)
+            raise ArgumentError, "pic should be 2/3 dimensional. Got #{pic.dim} dimensions."
+          end
+          if pic.is_a?(Numo::NArray)
+            if pic.ndim == 2
+              pic = pic.reshape(*pic.shape, 1)
+            end
+            img = Torch.from_numo(pic.transpose(2, 0, 1))
+            if img.dtype == :uint8
+              return img.float.div(255)
+            else
+              return img
+            end
+          end
+          case pic.format
+          when :uchar
+            img = Torch::ByteTensor.new(Torch::ByteStorage.from_buffer(pic.write_to_memory))
+          else
+            raise Error, "Format not supported yet: #{pic.format}"
+          end
+          img = img.view(pic.height, pic.width, pic.bands)
+          # put it from HWC to CHW format
+          img = img.permute([2, 0, 1]).contiguous
+          img.float.div(255)
+        end
+        def hflip(img)
+          if img.is_a?(Torch::Tensor)
+            assert_image_tensor(img)
+            img.flip(-1)
+          else
+            img.flip(:horizontal)
+          end
+        end
+        def vflip(img)
+          if img.is_a?(Torch::Tensor)
+            assert_image_tensor(img)
+            img.flip(-2)
+          else
+            img.flip(:vertical)
+          end
+        end
+        def crop(img, top, left, height, width)
+          if img.is_a?(Torch::Tensor)
+            assert_image_tensor(img)
+            indexes = [true] * (img.dim - 2)
+            img[*indexes, top...(top + height), left...(left + width)]
+          else
+            img.crop(left, top, width, height)
+          end
+        end
+        def center_crop(img, output_size)
+          if output_size.is_a?(Integer)
+            output_size = [output_size.to_i, output_size.to_i]
+          elsif output_size.is_a?(Array) && output_size.length == 1
+            output_size = [output_size[0], output_size[0]]
+          end
+          image_width, image_height = image_size(img)
+          crop_height, crop_width = output_size
+          if crop_width > image_width || crop_height > image_height
+            padding_ltrb = [
+              crop_width > image_width ? (crop_width - image_width).div(2) : 0,
+              crop_height > image_height ? (crop_height - image_height).div(2) : 0,
+              crop_width > image_width ? (crop_width - image_width + 1).div(2) : 0,
+              crop_height > image_height ? (crop_height - image_height + 1).div(2) : 0
+            ]
+            # TODO
+            img = pad(img, padding_ltrb, fill: 0)
+            image_width, image_height = image_size(img)
+            if crop_width == image_width && crop_height == image_height
+              return img
+            end
+          end
+          crop_top = ((image_height - crop_height) / 2.0).round
+          crop_left = ((image_width - crop_width) / 2.0).round
+          crop(img, crop_top, crop_left, crop_height, crop_width)
+        end
+        # TODO interpolation
+        def resized_crop(img, top, left, height, width, size)
+          img = crop(img, top, left, height, width)
+          img = resize(img, size) #, interpolation)
+          img
+        end
+        private
+        def image_size(img)
+          if img.is_a?(Torch::Tensor)
+            assert_image_tensor(img)
+            [img.shape[-1], img.shape[-2]]
+          else
+            [img.width, img.height]
+          end
+        end
+        def assert_image_tensor(img)
+          if img.ndim < 2
+            raise TypeError, "Tensor is not a torch image."
+          end
         end
       end
     end

data/lib/torchvision/transforms/normalize.rb CHANGED Viewed

@@ -1,13 +1,13 @@
 module TorchVision
   module Transforms
-    class Normalize
+    class Normalize < Torch::NN::Module
       def initialize(mean, std, inplace: false)
         @mean = mean
         @std = std
         @inplace = inplace
       end
-      def call(tensor)
+      def forward(tensor)
         F.normalize(tensor, @mean, @std, inplace: @inplace)
       end
     end

data/lib/torchvision/transforms/random_horizontal_flip.rb ADDED Viewed

@@ -0,0 +1,18 @@
+module TorchVision
+  module Transforms
+    class RandomHorizontalFlip < Torch::NN::Module
+      def initialize(p: 0.5)
+        super()
+        @p = p
+      end
+      def forward(img)
+        if Torch.rand(1).item < @p
+          F.hflip(img)
+        else
+          img
+        end
+      end
+    end
+  end
+end

data/lib/torchvision/transforms/random_resized_crop.rb ADDED Viewed

@@ -0,0 +1,70 @@
+module TorchVision
+  module Transforms
+    class RandomResizedCrop < Torch::NN::Module
+      def initialize(size, scale: [0.08, 1.0], ratio: [3.0 / 4.0, 4.0 / 3.0])
+        super()
+        @size = setup_size(size, "Please provide only two dimensions (h, w) for size.")
+        # @interpolation = interpolation
+        @scale = scale
+        @ratio = ratio
+      end
+      def params(img, scale, ratio)
+        width, height = F.send(:image_size, img)
+        area = height * width
+        log_ratio = Torch.log(Torch.tensor(ratio))
+        10.times do
+          target_area = area * Torch.empty(1).uniform!(scale[0], scale[1]).item
+          aspect_ratio = Torch.exp(
+            Torch.empty(1).uniform!(log_ratio[0], log_ratio[1])
+          ).item
+          w = Math.sqrt(target_area * aspect_ratio).round
+          h = Math.sqrt(target_area / aspect_ratio).round
+          if 0 < w && w <= width && 0 < h && h <= height
+            i = Torch.randint(0, height - h + 1, size: [1]).item
+            j = Torch.randint(0, width - w + 1, size: [1]).item
+            return i, j, h, w
+          end
+        end
+        # Fallback to central crop
+        in_ratio = width.to_f / height.to_f
+        if in_ratio < ratio.min
+          w = width
+          h = (w / ratio.min).round
+        elsif in_ratio > ratio.max
+          h = height
+          w = (h * ratio.max).round
+        else # whole image
+          w = width
+          h = height
+        end
+        i = (height - h).div(2)
+        j = (width - w).div(2)
+        [i, j, h, w]
+      end
+      def forward(img)
+        i, j, h, w = params(img, @scale, @ratio)
+        F.resized_crop(img, i, j, h, w, @size) #, @interpolation)
+      end
+      private
+      def setup_size(size, error_msg)
+        if size.is_a?(Integer)
+          return [size, size]
+        end
+        if size.length != 2
+          raise ArgumentError, error_msg
+        end
+        size
+      end
+    end
+  end
+end

data/lib/torchvision/transforms/random_vertical_flip.rb ADDED Viewed

@@ -0,0 +1,18 @@
+module TorchVision
+  module Transforms
+    class RandomVerticalFlip < Torch::NN::Module
+      def initialize(p: 0.5)
+        super()
+        @p = p
+      end
+      def forward(img)
+        if Torch.rand(1).item < @p
+          F.vflip(img)
+        else
+          img
+        end
+      end
+    end
+  end
+end

data/lib/torchvision/transforms/resize.rb ADDED Viewed

@@ -0,0 +1,13 @@
+module TorchVision
+  module Transforms
+    class Resize < Torch::NN::Module
+      def initialize(size)
+        @size = size
+      end
+      def forward(img)
+        F.resize(img, @size)
+      end
+    end
+  end
+end

data/lib/torchvision/transforms/to_tensor.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module TorchVision
   module Transforms
-    class ToTensor
-      def call(pic)
+    class ToTensor < Torch::NN::Module
+      def forward(pic)
         F.to_tensor(pic)
       end
     end

data/lib/torchvision/utils.rb ADDED Viewed

@@ -0,0 +1,120 @@
+module TorchVision
+  module Utils
+    class << self
+      def make_grid(tensor, nrow: 8, padding: 2, normalize: false, range: nil, scale_each: false, pad_value: 0)
+        unless Torch.tensor?(tensor) || (tensor.is_a?(Array) && tensor.all? { |t| Torch.tensor?(t) })
+          raise ArgumentError, "tensor or list of tensors expected, got #{tensor.class.name}"
+        end
+        # if list of tensors, convert to a 4D mini-batch Tensor
+        if tensor.is_a?(Array)
+          tensor = Torch.stack(tensor, dim: 0)
+        end
+        if tensor.dim == 2 # single image H x W
+          tensor = tensor.unsqueeze(0)
+        end
+        if tensor.dim == 3 # single image
+          if tensor.size(0) == 1 # if single-channel, convert to 3-channel
+            tensor = Torch.cat([tensor, tensor, tensor], 0)
+          end
+          tensor = tensor.unsqueeze(0)
+        end
+        if tensor.dim == 4 && tensor.size(1) == 1 # single-channel images
+          tensor = Torch.cat([tensor, tensor, tensor], 1)
+        end
+        if normalize
+          tensor = tensor.clone # avoid modifying tensor in-place
+          if !range.nil? && !range.is_a?(Array)
+            raise "range has to be an array (min, max) if specified. min and max are numbers"
+          end
+          norm_ip = lambda do |img, min, max|
+            img.clamp!(min, max)
+            img.add!(-min).div!(max - min + 1e-5)
+          end
+          norm_range = lambda do |t, range|
+            if !range.nil?
+              norm_ip.call(t, range[0], range[1])
+            else
+              norm_ip.call(t, t.min.to_f, t.max.to_f)
+            end
+          end
+          if scale_each
+            tensor.each do |t| # loop over mini-batch dimension
+              norm_range.call(t, range)
+            end
+          else
+            norm_range.call(tensor, range)
+          end
+        end
+        if tensor.size(0) == 1
+          return tensor.squeeze(0)
+        end
+        # make the mini-batch of images into a grid
+        nmaps = tensor.size(0)
+        xmaps = [nrow, nmaps].min
+        ymaps = (nmaps.to_f / xmaps).ceil
+        height, width = (tensor.size(2) + padding), (tensor.size(3) + padding)
+        num_channels = tensor.size(1)
+        grid = tensor.new_full([num_channels, height * ymaps + padding, width * xmaps + padding], pad_value)
+        k = 0
+        ymaps.times do |y|
+          xmaps.times do |x|
+            break if k >= nmaps
+            grid.narrow(1, y * height + padding, height - padding).narrow(2, x * width + padding, width - padding).copy!(tensor[k])
+            k += 1
+          end
+        end
+        grid
+      end
+      def save_image(tensor, fp, nrow: 8, padding: 2, normalize: false, range: nil, scale_each: false, pad_value: 0)
+        grid = make_grid(tensor, nrow: nrow, padding: padding, pad_value: pad_value, normalize: normalize, range: range, scale_each: scale_each)
+        # Add 0.5 after unnormalizing to [0, 255] to round to nearest integer
+        ndarr = grid.mul(255).add!(0.5).clamp!(0, 255).permute(1, 2, 0).to("cpu", dtype: :uint8)
+        im = image_from_array(ndarr)
+        im.write_to_file(fp)
+      end
+      # private
+      # Ruby-specific method
+      # TODO use Numo when bridge available
+      def image_from_array(array)
+        case array
+        when Torch::Tensor
+          # TODO support more dtypes
+          raise "Type not supported yet: #{array.dtype}" unless array.dtype == :uint8
+          array = array.contiguous unless array.contiguous?
+          width, height = array.shape
+          bands = array.shape[2] || 1
+          data = FFI::Pointer.new(:uint8, array._data_ptr)
+          data.define_singleton_method(:bytesize) do
+            array.numel * array.element_size
+          end
+          Vips::Image.new_from_memory(data, width, height, bands, :uchar)
+        when Numo::NArray
+          # TODO support more types
+          raise "Type not supported yet: #{array.class.name}" unless array.is_a?(Numo::UInt8)
+          width, height = array.shape
+          bands = array.shape[2] || 1
+          data = array.to_binary
+          Vips::Image.new_from_memory(data, width, height, bands, :uchar)
+        else
+          raise "Expected Torch::Tensor or Numo::NArray, not #{array.class.name}"
+        end
+      end
+    end
+  end
+end