PyPI - doctra - Versions diffs - 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

doctra 0.3.2py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

doctra/__init__.py +4 -0
doctra/cli/main.py +168 -0
doctra/engines/image_restoration/__init__.py +10 -0
doctra/engines/image_restoration/docres_engine.py +566 -0
doctra/engines/vlm/service.py +0 -12
doctra/parsers/enhanced_pdf_parser.py +370 -0
doctra/parsers/structured_pdf_parser.py +11 -60
doctra/parsers/table_chart_extractor.py +8 -44
doctra/third_party/docres/data/MBD/MBD.py +110 -0
doctra/third_party/docres/data/MBD/MBD_utils.py +291 -0
doctra/third_party/docres/data/MBD/infer.py +151 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/aspp.py +95 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/__init__.py +13 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/drn.py +402 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/mobilenet.py +151 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/resnet.py +170 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/xception.py +288 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/decoder.py +59 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/deeplab.py +81 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/__init__.py +12 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/batchnorm.py +282 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/comm.py +129 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/replicate.py +88 -0
doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/unittest.py +29 -0
doctra/third_party/docres/data/preprocess/crop_merge_image.py +142 -0
doctra/third_party/docres/inference.py +370 -0
doctra/third_party/docres/models/restormer_arch.py +308 -0
doctra/third_party/docres/utils.py +464 -0
doctra/ui/app.py +5 -32
doctra/utils/progress.py +13 -98
doctra/utils/structured_utils.py +45 -49
doctra/version.py +1 -1
{doctra-0.3.2.dist-info → doctra-0.4.0.dist-info}/METADATA +1 -1
doctra-0.4.0.dist-info/RECORD +67 -0
doctra-0.3.2.dist-info/RECORD +0 -44
{doctra-0.3.2.dist-info → doctra-0.4.0.dist-info}/WHEEL +0 -0
{doctra-0.3.2.dist-info → doctra-0.4.0.dist-info}/licenses/LICENSE +0 -0
{doctra-0.3.2.dist-info → doctra-0.4.0.dist-info}/top_level.txt +0 -0

doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/mobilenet.py ADDED Viewed

@@ -0,0 +1,151 @@
+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+import math
+from model.deep_lab_model.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
+import torch.utils.model_zoo as model_zoo
+def conv_bn(inp, oup, stride, BatchNorm):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        BatchNorm(oup),
+        nn.ReLU6(inplace=True)
+    )
+def fixed_padding(inputs, kernel_size, dilation):
+    kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1)
+    pad_total = kernel_size_effective - 1
+    pad_beg = pad_total // 2
+    pad_end = pad_total - pad_beg
+    padded_inputs = F.pad(inputs, (pad_beg, pad_end, pad_beg, pad_end))
+    return padded_inputs
+class InvertedResidual(nn.Module):
+    def __init__(self, inp, oup, stride, dilation, expand_ratio, BatchNorm):
+        super(InvertedResidual, self).__init__()
+        self.stride = stride
+        assert stride in [1, 2]
+        hidden_dim = round(inp * expand_ratio)
+        self.use_res_connect = self.stride == 1 and inp == oup
+        self.kernel_size = 3
+        self.dilation = dilation
+        if expand_ratio == 1:
+            self.conv = nn.Sequential(
+                # dw
+                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, groups=hidden_dim, bias=False),
+                BatchNorm(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, 1, 1, bias=False),
+                BatchNorm(oup),
+            )
+        else:
+            self.conv = nn.Sequential(
+                # pw
+                nn.Conv2d(inp, hidden_dim, 1, 1, 0, 1, bias=False),
+                BatchNorm(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # dw
+                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, groups=hidden_dim, bias=False),
+                BatchNorm(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, 1, bias=False),
+                BatchNorm(oup),
+            )
+    def forward(self, x):
+        x_pad = fixed_padding(x, self.kernel_size, dilation=self.dilation)
+        if self.use_res_connect:
+            x = x + self.conv(x_pad)
+        else:
+            x = self.conv(x_pad)
+        return x
+class MobileNetV2(nn.Module):
+    def __init__(self, output_stride=8, BatchNorm=None, width_mult=1., pretrained=True):
+        super(MobileNetV2, self).__init__()
+        block = InvertedResidual
+        input_channel = 32
+        current_stride = 1
+        rate = 1
+        interverted_residual_setting = [
+            # t, c, n, s
+            [1, 16, 1, 1],
+            [6, 24, 2, 2],
+            [6, 32, 3, 2],
+            [6, 64, 4, 2],
+            [6, 96, 3, 1],
+            [6, 160, 3, 2],
+            [6, 320, 1, 1],
+        ]
+        # building first layer
+        input_channel = int(input_channel * width_mult)
+        self.features = [conv_bn(3, input_channel, 2, BatchNorm)]
+        current_stride *= 2
+        # building inverted residual blocks
+        for t, c, n, s in interverted_residual_setting:
+            if current_stride == output_stride:
+                stride = 1
+                dilation = rate
+                rate *= s
+            else:
+                stride = s
+                dilation = 1
+                current_stride *= s
+            output_channel = int(c * width_mult)
+            for i in range(n):
+                if i == 0:
+                    self.features.append(block(input_channel, output_channel, stride, dilation, t, BatchNorm))
+                else:
+                    self.features.append(block(input_channel, output_channel, 1, dilation, t, BatchNorm))
+                input_channel = output_channel
+        self.features = nn.Sequential(*self.features)
+        self._initialize_weights()
+        if pretrained:
+            self._load_pretrained_model()
+        self.low_level_features = self.features[0:4]
+        self.high_level_features = self.features[4:]
+    def forward(self, x):
+        low_level_feat = self.low_level_features(x)
+        x = self.high_level_features(low_level_feat)
+        return x, low_level_feat
+    def _load_pretrained_model(self):
+        pretrain_dict = model_zoo.load_url('http://jeff95.me/models/mobilenet_v2-6a65762b.pth')
+        model_dict = {}
+        state_dict = self.state_dict()
+        for k, v in pretrain_dict.items():
+            if k in state_dict:
+                model_dict[k] = v
+        state_dict.update(model_dict)
+        self.load_state_dict(state_dict)
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                # m.weight.data.normal_(0, math.sqrt(2. / n))
+                torch.nn.init.kaiming_normal_(m.weight)
+            elif isinstance(m, SynchronizedBatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+if __name__ == "__main__":
+    input = torch.rand(1, 3, 512, 512)
+    model = MobileNetV2(output_stride=16, BatchNorm=nn.BatchNorm2d)
+    output, low_level_feat = model(input)
+    print(output.size())
+    print(low_level_feat.size())

doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/resnet.py ADDED Viewed

@@ -0,0 +1,170 @@
+import math
+import torch.nn as nn
+import torch.utils.model_zoo as model_zoo
+from model.deep_lab_model.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, BatchNorm=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = BatchNorm(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               dilation=dilation, padding=dilation, bias=False)
+        self.bn2 = BatchNorm(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = BatchNorm(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+        self.dilation = dilation
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, output_stride, BatchNorm, pretrained=True):
+        self.inplanes = 64
+        super(ResNet, self).__init__()
+        blocks = [1, 2, 4]
+        if output_stride == 16:
+            strides = [1, 2, 2, 1]
+            dilations = [1, 1, 1, 2]
+        elif output_stride == 8:
+            strides = [1, 2, 1, 1]
+            dilations = [1, 1, 2, 4]
+        else:
+            raise NotImplementedError
+        # Modules
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                                bias=False)
+        self.bn1 = BatchNorm(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0], stride=strides[0], dilation=dilations[0], BatchNorm=BatchNorm)
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=strides[1], dilation=dilations[1], BatchNorm=BatchNorm)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=strides[2], dilation=dilations[2], BatchNorm=BatchNorm)
+        self.layer4 = self._make_MG_unit(block, 512, blocks=blocks, stride=strides[3], dilation=dilations[3], BatchNorm=BatchNorm)
+        # self.layer4 = self._make_layer(block, 512, layers[3], stride=strides[3], dilation=dilations[3], BatchNorm=BatchNorm)
+        self._init_weight()
+        # if pretrained:
+        #     self._load_pretrained_model()
+    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, BatchNorm=None):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                BatchNorm(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, dilation, downsample, BatchNorm))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, dilation=dilation, BatchNorm=BatchNorm))
+        return nn.Sequential(*layers)
+    def _make_MG_unit(self, block, planes, blocks, stride=1, dilation=1, BatchNorm=None):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                BatchNorm(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, dilation=blocks[0]*dilation,
+                            downsample=downsample, BatchNorm=BatchNorm))
+        self.inplanes = planes * block.expansion
+        for i in range(1, len(blocks)):
+            layers.append(block(self.inplanes, planes, stride=1,
+                                dilation=blocks[i]*dilation, BatchNorm=BatchNorm))
+        return nn.Sequential(*layers)
+    def forward(self, input):
+        x = self.conv1(input)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        low_level_feat = x
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x, low_level_feat
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, SynchronizedBatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def _load_pretrained_model(self):
+        import urllib.request
+        import ssl
+        ssl._create_default_https_context = ssl._create_unverified_context
+        response = urllib.request.urlopen('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth')
+        pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth')
+        model_dict = {}
+        state_dict = self.state_dict()
+        for k, v in pretrain_dict.items():
+            if k in state_dict:
+                # if 'conv1' in k:
+                #     continue
+                model_dict[k] = v
+        state_dict.update(model_dict)
+        self.load_state_dict(state_dict)
+def ResNet101(output_stride, BatchNorm, pretrained=True):
+    """Constructs a ResNet-101 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 23, 3], output_stride, BatchNorm, pretrained=pretrained)
+    return model
+if __name__ == "__main__":
+    import torch
+    model = ResNet101(BatchNorm=nn.BatchNorm2d, pretrained=True, output_stride=8)
+    input = torch.rand(1, 3, 512, 512)
+    output, low_level_feat = model(input)
+    print(output.size())
+    print(low_level_feat.size())

doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/xception.py ADDED Viewed

@@ -0,0 +1,288 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.model_zoo as model_zoo
+from model.deep_lab_model.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
+def fixed_padding(inputs, kernel_size, dilation):
+    kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1)
+    pad_total = kernel_size_effective - 1
+    pad_beg = pad_total // 2
+    pad_end = pad_total - pad_beg
+    padded_inputs = F.pad(inputs, (pad_beg, pad_end, pad_beg, pad_end))
+    return padded_inputs
+class SeparableConv2d(nn.Module):
+    def __init__(self, inplanes, planes, kernel_size=3, stride=1, dilation=1, bias=False, BatchNorm=None):
+        super(SeparableConv2d, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size, stride, 0, dilation,
+                               groups=inplanes, bias=bias)
+        self.bn = BatchNorm(inplanes)
+        self.pointwise = nn.Conv2d(inplanes, planes, 1, 1, 0, 1, 1, bias=bias)
+    def forward(self, x):
+        x = fixed_padding(x, self.conv1.kernel_size[0], dilation=self.conv1.dilation[0])
+        x = self.conv1(x)
+        x = self.bn(x)
+        x = self.pointwise(x)
+        return x
+class Block(nn.Module):
+    def __init__(self, inplanes, planes, reps, stride=1, dilation=1, BatchNorm=None,
+                 start_with_relu=True, grow_first=True, is_last=False):
+        super(Block, self).__init__()
+        if planes != inplanes or stride != 1:
+            self.skip = nn.Conv2d(inplanes, planes, 1, stride=stride, bias=False)
+            self.skipbn = BatchNorm(planes)
+        else:
+            self.skip = None
+        self.relu = nn.ReLU(inplace=True)
+        rep = []
+        filters = inplanes
+        if grow_first:
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(inplanes, planes, 3, 1, dilation, BatchNorm=BatchNorm))
+            rep.append(BatchNorm(planes))
+            filters = planes
+        for i in range(reps - 1):
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(filters, filters, 3, 1, dilation, BatchNorm=BatchNorm))
+            rep.append(BatchNorm(filters))
+        if not grow_first:
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(inplanes, planes, 3, 1, dilation, BatchNorm=BatchNorm))
+            rep.append(BatchNorm(planes))
+        if stride != 1:
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(planes, planes, 3, 2, BatchNorm=BatchNorm))
+            rep.append(BatchNorm(planes))
+        if stride == 1 and is_last:
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(planes, planes, 3, 1, BatchNorm=BatchNorm))
+            rep.append(BatchNorm(planes))
+        if not start_with_relu:
+            rep = rep[1:]
+        self.rep = nn.Sequential(*rep)
+    def forward(self, inp):
+        x = self.rep(inp)
+        if self.skip is not None:
+            skip = self.skip(inp)
+            skip = self.skipbn(skip)
+        else:
+            skip = inp
+        x = x + skip
+        return x
+class AlignedXception(nn.Module):
+    """
+    Modified Alighed Xception
+    """
+    def __init__(self, output_stride, BatchNorm,
+                 pretrained=True):
+        super(AlignedXception, self).__init__()
+        if output_stride == 16:
+            entry_block3_stride = 2
+            middle_block_dilation = 1
+            exit_block_dilations = (1, 2)
+        elif output_stride == 8:
+            entry_block3_stride = 1
+            middle_block_dilation = 2
+            exit_block_dilations = (2, 4)
+        else:
+            raise NotImplementedError
+        # Entry flow
+        self.conv1 = nn.Conv2d(3, 32, 3, stride=2, padding=1, bias=False)
+        self.bn1 = BatchNorm(32)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=1, bias=False)
+        self.bn2 = BatchNorm(64)
+        self.block1 = Block(64, 128, reps=2, stride=2, BatchNorm=BatchNorm, start_with_relu=False)
+        self.block2 = Block(128, 256, reps=2, stride=2, BatchNorm=BatchNorm, start_with_relu=False,
+                            grow_first=True)
+        self.block3 = Block(256, 728, reps=2, stride=entry_block3_stride, BatchNorm=BatchNorm,
+                            start_with_relu=True, grow_first=True, is_last=True)
+        # Middle flow
+        self.block4  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block5  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block6  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block7  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block8  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block9  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block10 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block11 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block12 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block13 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block14 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block15 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block16 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block17 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block18 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block19 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        # Exit flow
+        self.block20 = Block(728, 1024, reps=2, stride=1, dilation=exit_block_dilations[0],
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=False, is_last=True)
+        self.conv3 = SeparableConv2d(1024, 1536, 3, stride=1, dilation=exit_block_dilations[1], BatchNorm=BatchNorm)
+        self.bn3 = BatchNorm(1536)
+        self.conv4 = SeparableConv2d(1536, 1536, 3, stride=1, dilation=exit_block_dilations[1], BatchNorm=BatchNorm)
+        self.bn4 = BatchNorm(1536)
+        self.conv5 = SeparableConv2d(1536, 2048, 3, stride=1, dilation=exit_block_dilations[1], BatchNorm=BatchNorm)
+        self.bn5 = BatchNorm(2048)
+        # Init weights
+        self._init_weight()
+        # Load pretrained model
+        if pretrained:
+            self._load_pretrained_model()
+    def forward(self, x):
+        # Entry flow
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+        x = self.block1(x)
+        # add relu here
+        x = self.relu(x)
+        low_level_feat = x
+        x = self.block2(x)
+        x = self.block3(x)
+        # Middle flow
+        x = self.block4(x)
+        x = self.block5(x)
+        x = self.block6(x)
+        x = self.block7(x)
+        x = self.block8(x)
+        x = self.block9(x)
+        x = self.block10(x)
+        x = self.block11(x)
+        x = self.block12(x)
+        x = self.block13(x)
+        x = self.block14(x)
+        x = self.block15(x)
+        x = self.block16(x)
+        x = self.block17(x)
+        x = self.block18(x)
+        x = self.block19(x)
+        # Exit flow
+        x = self.block20(x)
+        x = self.relu(x)
+        x = self.conv3(x)
+        x = self.bn3(x)
+        x = self.relu(x)
+        x = self.conv4(x)
+        x = self.bn4(x)
+        x = self.relu(x)
+        x = self.conv5(x)
+        x = self.bn5(x)
+        x = self.relu(x)
+        return x, low_level_feat
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, SynchronizedBatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def _load_pretrained_model(self):
+        pretrain_dict = model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/xception-b5690688.pth')
+        model_dict = {}
+        state_dict = self.state_dict()
+        for k, v in pretrain_dict.items():
+            if k in state_dict:
+                if 'pointwise' in k:
+                    v = v.unsqueeze(-1).unsqueeze(-1)
+                if k.startswith('block11'):
+                    model_dict[k] = v
+                    model_dict[k.replace('block11', 'block12')] = v
+                    model_dict[k.replace('block11', 'block13')] = v
+                    model_dict[k.replace('block11', 'block14')] = v
+                    model_dict[k.replace('block11', 'block15')] = v
+                    model_dict[k.replace('block11', 'block16')] = v
+                    model_dict[k.replace('block11', 'block17')] = v
+                    model_dict[k.replace('block11', 'block18')] = v
+                    model_dict[k.replace('block11', 'block19')] = v
+                elif k.startswith('block12'):
+                    model_dict[k.replace('block12', 'block20')] = v
+                elif k.startswith('bn3'):
+                    model_dict[k] = v
+                    model_dict[k.replace('bn3', 'bn4')] = v
+                elif k.startswith('conv4'):
+                    model_dict[k.replace('conv4', 'conv5')] = v
+                elif k.startswith('bn4'):
+                    model_dict[k.replace('bn4', 'bn5')] = v
+                else:
+                    model_dict[k] = v
+        state_dict.update(model_dict)
+        self.load_state_dict(state_dict)
+if __name__ == "__main__":
+    import torch
+    model = AlignedXception(BatchNorm=nn.BatchNorm2d, pretrained=True, output_stride=16)
+    input = torch.rand(1, 3, 512, 512)
+    output, low_level_feat = model(input)
+    print(output.size())
+    print(low_level_feat.size())

doctra/third_party/docres/data/MBD/model/deep_lab_model/decoder.py ADDED Viewed

@@ -0,0 +1,59 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from model.deep_lab_model.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
+class Decoder(nn.Module):
+    def __init__(self, num_classes, backbone, BatchNorm):
+        super(Decoder, self).__init__()
+        if backbone == 'resnet' or backbone == 'drn':
+            low_level_inplanes = 256
+        elif backbone == 'xception':
+            low_level_inplanes = 128
+        elif backbone == 'mobilenet':
+            low_level_inplanes = 24
+        else:
+            raise NotImplementedError
+        self.conv1 = nn.Conv2d(low_level_inplanes, 48, 1, bias=False)
+        self.bn1 = BatchNorm(48)
+        self.relu = nn.ReLU()
+        self.last_conv = nn.Sequential(nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1, bias=False),
+                                       BatchNorm(256),
+                                       nn.ReLU(),
+                                       nn.Dropout(0.5),
+                                       nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False),
+                                       BatchNorm(256),
+                                       nn.ReLU(),
+                                       nn.Dropout(0.1),
+                                       nn.Conv2d(256, num_classes, kernel_size=1, stride=1),
+                                       nn.Sigmoid()
+                                       )
+        self._init_weight()
+    def forward(self, x, low_level_feat):
+        low_level_feat = self.conv1(low_level_feat)
+        low_level_feat = self.bn1(low_level_feat)
+        low_level_feat = self.relu(low_level_feat)
+        x = F.interpolate(x, size=low_level_feat.size()[2:], mode='bilinear', align_corners=True)
+        x = torch.cat((x, low_level_feat), dim=1)
+        x = self.last_conv(x)
+        return x
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                torch.nn.init.kaiming_normal_(m.weight)
+            elif isinstance(m, SynchronizedBatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+def build_decoder(num_classes, backbone, BatchNorm):
+    return Decoder(num_classes, backbone, BatchNorm)

doctra 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

doctra 0.3.2py3-none-any.whl → 0.4.0py3-none-any.whl