RubyGems - torchvision - Versions diffs - 0.1.0 → 0.2.1 - Mend

torchvision 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +35 -0
data/LICENSE.txt +1 -1
data/README.md +133 -5
data/lib/torchvision.rb +40 -1
data/lib/torchvision/datasets/cifar10.rb +117 -0
data/lib/torchvision/datasets/cifar100.rb +41 -0
data/lib/torchvision/datasets/dataset_folder.rb +91 -0
data/lib/torchvision/datasets/fashion_mnist.rb +30 -0
data/lib/torchvision/datasets/image_folder.rb +12 -0
data/lib/torchvision/datasets/kmnist.rb +30 -0
data/lib/torchvision/datasets/mnist.rb +47 -76
data/lib/torchvision/datasets/vision_dataset.rb +67 -0
data/lib/torchvision/models/alexnet.rb +42 -0
data/lib/torchvision/models/basic_block.rb +46 -0
data/lib/torchvision/models/bottleneck.rb +47 -0
data/lib/torchvision/models/resnet.rb +129 -0
data/lib/torchvision/models/resnet101.rb +9 -0
data/lib/torchvision/models/resnet152.rb +9 -0
data/lib/torchvision/models/resnet18.rb +9 -0
data/lib/torchvision/models/resnet34.rb +9 -0
data/lib/torchvision/models/resnet50.rb +9 -0
data/lib/torchvision/models/resnext101_32x8d.rb +11 -0
data/lib/torchvision/models/resnext50_32x4d.rb +11 -0
data/lib/torchvision/models/vgg.rb +93 -0
data/lib/torchvision/models/vgg11.rb +9 -0
data/lib/torchvision/models/vgg11_bn.rb +9 -0
data/lib/torchvision/models/vgg13.rb +9 -0
data/lib/torchvision/models/vgg13_bn.rb +9 -0
data/lib/torchvision/models/vgg16.rb +9 -0
data/lib/torchvision/models/vgg16_bn.rb +9 -0
data/lib/torchvision/models/vgg19.rb +9 -0
data/lib/torchvision/models/vgg19_bn.rb +9 -0
data/lib/torchvision/models/wide_resnet101_2.rb +10 -0
data/lib/torchvision/models/wide_resnet50_2.rb +10 -0
data/lib/torchvision/transforms/center_crop.rb +13 -0
data/lib/torchvision/transforms/compose.rb +2 -2
data/lib/torchvision/transforms/functional.rb +142 -7
data/lib/torchvision/transforms/normalize.rb +2 -2
data/lib/torchvision/transforms/random_horizontal_flip.rb +18 -0
data/lib/torchvision/transforms/random_resized_crop.rb +70 -0
data/lib/torchvision/transforms/random_vertical_flip.rb +18 -0
data/lib/torchvision/transforms/resize.rb +13 -0
data/lib/torchvision/transforms/to_tensor.rb +2 -2
data/lib/torchvision/utils.rb +120 -0
data/lib/torchvision/version.rb +1 -1
metadata +50 -57

data/lib/torchvision/models/basic_block.rb ADDED Viewed

@@ -0,0 +1,46 @@
+module TorchVision
+  module Models
+    class BasicBlock < Torch::NN::Module
+      def initialize(inplanes, planes, stride: 1, downsample: nil, groups: 1, base_width: 64, dilation: 1, norm_layer: nil)
+        super()
+        norm_layer ||= Torch::NN::BatchNorm2d
+        if groups != 1 || base_width != 64
+          raise ArgumentError, "BasicBlock only supports groups=1 and base_width=64"
+        end
+        if dilation > 1
+          raise NotImplementedError, "Dilation > 1 not supported in BasicBlock"
+        end
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        @conv1 = Torch::NN::Conv2d.new(inplanes, planes, 3, stride: stride, padding: 1, groups: 1, bias: false, dilation: 1)
+        @bn1 = norm_layer.new(planes)
+        @relu = Torch::NN::ReLU.new(inplace: true)
+        @conv2 = Torch::NN::Conv2d.new(planes, planes, 3, stride: 1, padding: 1, groups: 1, bias: false, dilation: 1)
+        @bn2 = norm_layer.new(planes)
+        @downsample = downsample
+        @stride = stride
+      end
+      def forward(x)
+        identity = x
+        out = @conv1.call(x)
+        out = @bn1.call(out)
+        out = @relu.call(out)
+        out = @conv2.call(out)
+        out = @bn2.call(out)
+        identity = @downsample.call(x) if @downsample
+        out += identity
+        out = @relu.call(out)
+        out
+      end
+      def self.expansion
+        1
+      end
+    end
+  end
+end

data/lib/torchvision/models/bottleneck.rb ADDED Viewed

@@ -0,0 +1,47 @@
+module TorchVision
+  module Models
+    class Bottleneck < Torch::NN::Module
+      def initialize(inplanes, planes, stride: 1, downsample: nil, groups: 1, base_width: 64, dilation: 1, norm_layer: nil)
+        super()
+        norm_layer ||= Torch::NN::BatchNorm2d
+        width = (planes * (base_width / 64.0)).to_i * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        @conv1 = Torch::NN::Conv2d.new(inplanes, width, 1, stride: 1, bias: false)
+        @bn1 = norm_layer.new(width)
+        @conv2 = Torch::NN::Conv2d.new(width, width, 3, stride: stride, padding: dilation, groups: groups, bias: false, dilation: dilation)
+        @bn2 = norm_layer.new(width)
+        @conv3 = Torch::NN::Conv2d.new(width, planes * self.class.expansion, 1, stride: 1, bias: false)
+        @bn3 = norm_layer.new(planes * self.class.expansion)
+        @relu = Torch::NN::ReLU.new(inplace: true)
+        @downsample = downsample
+        @stride = stride
+      end
+      def forward(x)
+        identity = x
+        out = @conv1.call(x)
+        out = @bn1.call(out)
+        out = @relu.call(out)
+        out = @conv2.call(out)
+        out = @bn2.call(out)
+        out = @relu.call(out)
+        out = @conv3.call(out)
+        out = @bn3.call(out)
+        identity = @downsample.call(x) if @downsample
+        out += identity
+        out = @relu.call(out)
+        out
+      end
+      def self.expansion
+        4
+      end
+    end
+  end
+end

data/lib/torchvision/models/resnet.rb ADDED Viewed

@@ -0,0 +1,129 @@
+module TorchVision
+  module Models
+    class ResNet < Torch::NN::Module
+      MODEL_URLS = {
+        "resnet18" => "https://download.pytorch.org/models/resnet18-5c106cde.pth",
+        "resnet34" => "https://download.pytorch.org/models/resnet34-333f7ec4.pth",
+        "resnet50" => "https://download.pytorch.org/models/resnet50-19c8e357.pth",
+        "resnet101" => "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth",
+        "resnet152" => "https://download.pytorch.org/models/resnet152-b121ed2d.pth",
+        "resnext50_32x4d" => "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
+        "resnext101_32x8d" => "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
+        "wide_resnet50_2" => "https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth",
+        "wide_resnet101_2" => "https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth"
+      }
+      def initialize(block, layers, num_classes=1000, zero_init_residual: false,
+        groups: 1, width_per_group: 64, replace_stride_with_dilation: nil, norm_layer: nil)
+        super()
+        norm_layer ||= Torch::NN::BatchNorm2d
+        @norm_layer = norm_layer
+        @inplanes = 64
+        @dilation = 1
+        if replace_stride_with_dilation.nil?
+          # each element in the tuple indicates if we should replace
+          # the 2x2 stride with a dilated convolution instead
+          replace_stride_with_dilation = [false, false, false]
+        end
+        if replace_stride_with_dilation.length != 3
+          raise ArgumentError, "replace_stride_with_dilation should be nil or a 3-element tuple, got #{replace_stride_with_dilation}"
+        end
+        @groups = groups
+        @base_width = width_per_group
+        @conv1 = Torch::NN::Conv2d.new(3, @inplanes, 7, stride: 2, padding: 3, bias: false)
+        @bn1 = norm_layer.new(@inplanes)
+        @relu = Torch::NN::ReLU.new(inplace: true)
+        @maxpool = Torch::NN::MaxPool2d.new(3, stride: 2, padding: 1)
+        @layer1 = _make_layer(block, 64, layers[0])
+        @layer2 = _make_layer(block, 128, layers[1], stride: 2, dilate: replace_stride_with_dilation[0])
+        @layer3 = _make_layer(block, 256, layers[2], stride: 2, dilate: replace_stride_with_dilation[1])
+        @layer4 = _make_layer(block, 512, layers[3], stride: 2, dilate: replace_stride_with_dilation[2])
+        @avgpool = Torch::NN::AdaptiveAvgPool2d.new([1, 1])
+        @fc = Torch::NN::Linear.new(512 * block.expansion, num_classes)
+        modules.each do |m|
+          case m
+          when Torch::NN::Conv2d
+            Torch::NN::Init.kaiming_normal!(m.weight, mode: "fan_out", nonlinearity: "relu")
+          when Torch::NN::BatchNorm2d, Torch::NN::GroupNorm
+            Torch::NN::Init.constant!(m.weight, 1)
+            Torch::NN::Init.constant!(m.bias, 0)
+          end
+        end
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual
+          modules.each do |m|
+            case m
+            when Bottleneck
+              Torch::NN::Init.constant!(m.bn3.weight, 0)
+            when BasicBlock
+              Torch::NN::Init.constant!(m.bn2.weight, 0)
+            end
+          end
+        end
+      end
+      def _make_layer(block, planes, blocks, stride: 1, dilate: false)
+        norm_layer = @norm_layer
+        downsample = nil
+        previous_dilation = @dilation
+        if dilate
+          @dilation *= stride
+          stride = 1
+        end
+        if stride != 1 || @inplanes != planes * block.expansion
+          downsample = Torch::NN::Sequential.new(
+            Torch::NN::Conv2d.new(@inplanes, planes * block.expansion, 1, stride: stride, bias: false),
+            norm_layer.new(planes * block.expansion)
+          )
+        end
+        layers = []
+        layers << block.new(@inplanes, planes, stride: stride, downsample: downsample, groups: @groups, base_width: @base_width, dilation: previous_dilation, norm_layer: norm_layer)
+        @inplanes = planes * block.expansion
+        (blocks - 1).times do
+          layers << block.new(@inplanes, planes, groups: @groups, base_width: @base_width, dilation: @dilation, norm_layer: norm_layer)
+        end
+        Torch::NN::Sequential.new(*layers)
+      end
+      def _forward_impl(x)
+        x = @conv1.call(x)
+        x = @bn1.call(x)
+        x = @relu.call(x)
+        x = @maxpool.call(x)
+        x = @layer1.call(x)
+        x = @layer2.call(x)
+        x = @layer3.call(x)
+        x = @layer4.call(x)
+        x = @avgpool.call(x)
+        x = Torch.flatten(x, 1)
+        x = @fc.call(x)
+        x
+      end
+      def forward(x)
+        _forward_impl(x)
+      end
+      def self.make_model(arch, block, layers, pretrained: false, **kwargs)
+        model = ResNet.new(block, layers, **kwargs)
+        if pretrained
+          url = MODEL_URLS[arch]
+          state_dict = Torch::Hub.load_state_dict_from_url(url)
+          model.load_state_dict(state_dict)
+        end
+        model
+      end
+    end
+  end
+end

data/lib/torchvision/models/resnet101.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module ResNet101
+      def self.new(**kwargs)
+        ResNet.make_model("resnet101", Bottleneck, [3, 4, 23, 3], **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/resnet152.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module ResNet152
+      def self.new(**kwargs)
+        ResNet.make_model("resnet152", Bottleneck, [3, 8, 36, 3], **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/resnet18.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module ResNet18
+      def self.new(**kwargs)
+        ResNet.make_model("resnet18", BasicBlock, [2, 2, 2, 2], **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/resnet34.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module ResNet34
+      def self.new(**kwargs)
+        ResNet.make_model("resnet34", BasicBlock, [3, 4, 6, 3], **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/resnet50.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module ResNet50
+      def self.new(**kwargs)
+        ResNet.make_model("resnet50", Bottleneck, [3, 4, 6, 3], **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/resnext101_32x8d.rb ADDED Viewed

@@ -0,0 +1,11 @@
+module TorchVision
+  module Models
+    module ResNext101_32x8d
+      def self.new(**kwargs)
+        kwargs[:groups] = 32
+        kwargs[:width_per_group] = 8
+        ResNet.make_model("resnext101_32x8d", Bottleneck, [3, 4, 23, 3], **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/resnext50_32x4d.rb ADDED Viewed

@@ -0,0 +1,11 @@
+module TorchVision
+  module Models
+    module ResNext50_32x4d
+      def self.new(**kwargs)
+        kwargs[:groups] = 32
+        kwargs[:width_per_group] = 4
+        ResNet.make_model("resnext50_32x4d", Bottleneck, [3, 4, 6, 3], **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/vgg.rb ADDED Viewed

@@ -0,0 +1,93 @@
+module TorchVision
+  module Models
+    class VGG < Torch::NN::Module
+      MODEL_URLS = {
+        "vgg11" => "https://download.pytorch.org/models/vgg11-bbd30ac9.pth",
+        "vgg13" => "https://download.pytorch.org/models/vgg13-c768596a.pth",
+        "vgg16" => "https://download.pytorch.org/models/vgg16-397923af.pth",
+        "vgg19" => "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth",
+        "vgg11_bn" => "https://download.pytorch.org/models/vgg11_bn-6002323d.pth",
+        "vgg13_bn" => "https://download.pytorch.org/models/vgg13_bn-abd245e5.pth",
+        "vgg16_bn" => "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth",
+        "vgg19_bn" => "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth"
+      }
+      def initialize(features, num_classes: 1000, init_weights: true)
+        super()
+        @features = features
+        @avgpool = Torch::NN::AdaptiveAvgPool2d.new([7, 7])
+        @classifier = Torch::NN::Sequential.new(
+          Torch::NN::Linear.new(512 * 7 * 7, 4096),
+          Torch::NN::ReLU.new(inplace: true),
+          Torch::NN::Dropout.new,
+          Torch::NN::Linear.new(4096, 4096),
+          Torch::NN::ReLU.new(inplace: true),
+          Torch::NN::Dropout.new,
+          Torch::NN::Linear.new(4096, num_classes)
+        )
+        _initialize_weights if init_weights
+      end
+      def forward(x)
+        x = @features.call(x)
+        x = @avgpool.call(x)
+        x = Torch.flatten(x, 1)
+        x = @classifier.call(x)
+        x
+      end
+      def _initialize_weights
+        modules.each do |m|
+          case m
+          when Torch::NN::Conv2d
+            Torch::NN::Init.kaiming_normal!(m.weight, mode: "fan_out", nonlinearity: "relu")
+            Torch::NN::Init.constant!(m.bias, 0) if m.bias
+          when Torch::NN::BatchNorm2d
+            Torch::NN::Init.constant!(m.weight, 1)
+            Torch::NN::Init.constant!(m.bias, 0)
+          when Torch::NN::Linear
+            Torch::NN::Init.normal!(m.weight, mean: 0, std: 0.01)
+            Torch::NN::Init.constant!(m.bias, 0)
+          end
+        end
+      end
+      CFGS = {
+        "A" => [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
+        "B" => [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
+        "D" => [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M"],
+        "E" => [64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M"],
+      }
+      def self.make_model(arch, cfg, batch_norm, pretrained: false, **kwargs)
+        kwargs[:init_weights] = false if pretrained
+        model = VGG.new(make_layers(CFGS[cfg], batch_norm), **kwargs)
+        if pretrained
+          url = MODEL_URLS[arch]
+          state_dict = Torch::Hub.load_state_dict_from_url(url)
+          model.load_state_dict(state_dict)
+        end
+        model
+      end
+      def self.make_layers(cfg, batch_norm)
+        layers = []
+        in_channels = 3
+        cfg.each do |v|
+          if v == "M"
+            layers += [Torch::NN::MaxPool2d.new(2, stride: 2)]
+          else
+            conv2d = Torch::NN::Conv2d.new(in_channels, v, 3, padding: 1)
+            if batch_norm
+              layers += [conv2d, Torch::NN::BatchNorm2d.new(v), Torch::NN::ReLU.new(inplace: true)]
+            else
+              layers += [conv2d, Torch::NN::ReLU.new(inplace: true)]
+            end
+            in_channels = v
+          end
+        end
+        Torch::NN::Sequential.new(*layers)
+      end
+    end
+  end
+end

data/lib/torchvision/models/vgg11.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module VGG11
+      def self.new(**kwargs)
+        VGG.make_model("vgg11", "A", false, **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/vgg11_bn.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module VGG11BN
+      def self.new(**kwargs)
+        VGG.make_model("vgg11_bn", "A", true, **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/vgg13.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module VGG13
+      def self.new(**kwargs)
+        VGG.make_model("vgg13", "B", false, **kwargs)
+      end
+    end
+  end
+end

data/lib/torchvision/models/vgg13_bn.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module TorchVision
+  module Models
+    module VGG13BN
+      def self.new(**kwargs)
+        VGG.make_model("vgg13_bn", "B", true, **kwargs)
+      end
+    end
+  end
+end