PyPI - magic-pdf - Versions diffs - 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

magic-pdf 1.2.2py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py ADDED Viewed

@@ -0,0 +1,228 @@
+import torch
+from torch import nn
+from ..backbones.rec_svtrnet import Block, ConvBNLayer
+class Im2Seq(nn.Module):
+    def __init__(self, in_channels, **kwargs):
+        super().__init__()
+        self.out_channels = in_channels
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # assert H == 1
+        x = x.squeeze(dim=2)
+        # x = x.transpose([0, 2, 1])  # paddle (NTC)(batch, width, channels)
+        x = x.permute(0, 2, 1)
+        return x
+class EncoderWithRNN_(nn.Module):
+    def __init__(self, in_channels, hidden_size):
+        super(EncoderWithRNN_, self).__init__()
+        self.out_channels = hidden_size * 2
+        self.rnn1 = nn.LSTM(
+            in_channels,
+            hidden_size,
+            bidirectional=False,
+            batch_first=True,
+            num_layers=2,
+        )
+        self.rnn2 = nn.LSTM(
+            in_channels,
+            hidden_size,
+            bidirectional=False,
+            batch_first=True,
+            num_layers=2,
+        )
+    def forward(self, x):
+        self.rnn1.flatten_parameters()
+        self.rnn2.flatten_parameters()
+        out1, h1 = self.rnn1(x)
+        out2, h2 = self.rnn2(torch.flip(x, [1]))
+        return torch.cat([out1, torch.flip(out2, [1])], 2)
+class EncoderWithRNN(nn.Module):
+    def __init__(self, in_channels, hidden_size):
+        super(EncoderWithRNN, self).__init__()
+        self.out_channels = hidden_size * 2
+        self.lstm = nn.LSTM(
+            in_channels, hidden_size, num_layers=2, batch_first=True, bidirectional=True
+        )  # batch_first:=True
+    def forward(self, x):
+        x, _ = self.lstm(x)
+        return x
+class EncoderWithFC(nn.Module):
+    def __init__(self, in_channels, hidden_size):
+        super(EncoderWithFC, self).__init__()
+        self.out_channels = hidden_size
+        self.fc = nn.Linear(
+            in_channels,
+            hidden_size,
+            bias=True,
+        )
+    def forward(self, x):
+        x = self.fc(x)
+        return x
+class EncoderWithSVTR(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        dims=64,  # XS
+        depth=2,
+        hidden_dims=120,
+        use_guide=False,
+        num_heads=8,
+        qkv_bias=True,
+        mlp_ratio=2.0,
+        drop_rate=0.1,
+        kernel_size=[3, 3],
+        attn_drop_rate=0.1,
+        drop_path=0.0,
+        qk_scale=None,
+    ):
+        super(EncoderWithSVTR, self).__init__()
+        self.depth = depth
+        self.use_guide = use_guide
+        self.conv1 = ConvBNLayer(
+            in_channels,
+            in_channels // 8,
+            kernel_size=kernel_size,
+            padding=[kernel_size[0] // 2, kernel_size[1] // 2],
+            act="swish",
+        )
+        self.conv2 = ConvBNLayer(
+            in_channels // 8, hidden_dims, kernel_size=1, act="swish"
+        )
+        self.svtr_block = nn.ModuleList(
+            [
+                Block(
+                    dim=hidden_dims,
+                    num_heads=num_heads,
+                    mixer="Global",
+                    HW=None,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop_rate,
+                    act_layer="swish",
+                    attn_drop=attn_drop_rate,
+                    drop_path=drop_path,
+                    norm_layer="nn.LayerNorm",
+                    epsilon=1e-05,
+                    prenorm=False,
+                )
+                for i in range(depth)
+            ]
+        )
+        self.norm = nn.LayerNorm(hidden_dims, eps=1e-6)
+        self.conv3 = ConvBNLayer(hidden_dims, in_channels, kernel_size=1, act="swish")
+        # last conv-nxn, the input is concat of input tensor and conv3 output tensor
+        self.conv4 = ConvBNLayer(
+            2 * in_channels, in_channels // 8, padding=1, act="swish"
+        )
+        self.conv1x1 = ConvBNLayer(in_channels // 8, dims, kernel_size=1, act="swish")
+        self.out_channels = dims
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        # weight initialization
+        if isinstance(m, nn.Conv2d):
+            nn.init.kaiming_normal_(m.weight, mode="fan_out")
+            if m.bias is not None:
+                nn.init.zeros_(m.bias)
+        elif isinstance(m, nn.BatchNorm2d):
+            nn.init.ones_(m.weight)
+            nn.init.zeros_(m.bias)
+        elif isinstance(m, nn.Linear):
+            nn.init.normal_(m.weight, 0, 0.01)
+            if m.bias is not None:
+                nn.init.zeros_(m.bias)
+        elif isinstance(m, nn.ConvTranspose2d):
+            nn.init.kaiming_normal_(m.weight, mode="fan_out")
+            if m.bias is not None:
+                nn.init.zeros_(m.bias)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.ones_(m.weight)
+            nn.init.zeros_(m.bias)
+    def forward(self, x):
+        # for use guide
+        if self.use_guide:
+            z = x.clone()
+            z.stop_gradient = True
+        else:
+            z = x
+        # for short cut
+        h = z
+        # reduce dim
+        z = self.conv1(z)
+        z = self.conv2(z)
+        # SVTR global block
+        B, C, H, W = z.shape
+        z = z.flatten(2).permute(0, 2, 1)
+        for blk in self.svtr_block:
+            z = blk(z)
+        z = self.norm(z)
+        # last stage
+        z = z.reshape([-1, H, W, C]).permute(0, 3, 1, 2)
+        z = self.conv3(z)
+        z = torch.cat((h, z), dim=1)
+        z = self.conv1x1(self.conv4(z))
+        return z
+class SequenceEncoder(nn.Module):
+    def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs):
+        super(SequenceEncoder, self).__init__()
+        self.encoder_reshape = Im2Seq(in_channels)
+        self.out_channels = self.encoder_reshape.out_channels
+        self.encoder_type = encoder_type
+        if encoder_type == "reshape":
+            self.only_reshape = True
+        else:
+            support_encoder_dict = {
+                "reshape": Im2Seq,
+                "fc": EncoderWithFC,
+                "rnn": EncoderWithRNN,
+                "svtr": EncoderWithSVTR,
+            }
+            assert encoder_type in support_encoder_dict, "{} must in {}".format(
+                encoder_type, support_encoder_dict.keys()
+            )
+            if encoder_type == "svtr":
+                self.encoder = support_encoder_dict[encoder_type](
+                    self.encoder_reshape.out_channels, **kwargs
+                )
+            else:
+                self.encoder = support_encoder_dict[encoder_type](
+                    self.encoder_reshape.out_channels, hidden_size
+                )
+            self.out_channels = self.encoder.out_channels
+            self.only_reshape = False
+    def forward(self, x):
+        if self.encoder_type != "svtr":
+            x = self.encoder_reshape(x)
+            if not self.only_reshape:
+                x = self.encoder(x)
+            return x
+        else:
+            x = self.encoder(x)
+            x = self.encoder_reshape(x)
+            return x

magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py ADDED Viewed

@@ -0,0 +1,33 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import copy
+__all__ = ['build_post_process']
+def build_post_process(config, global_config=None):
+    from .db_postprocess import DBPostProcess
+    from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, TableLabelDecode, \
+        NRTRLabelDecode, SARLabelDecode, ViTSTRLabelDecode, RFLLabelDecode
+    from .cls_postprocess import ClsPostProcess
+    from .rec_postprocess import CANLabelDecode
+    support_dict = [
+        'DBPostProcess', 'CTCLabelDecode',
+        'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode',
+        'TableLabelDecode', 'NRTRLabelDecode', 'SARLabelDecode',
+        'ViTSTRLabelDecode','CANLabelDecode', 'RFLLabelDecode'
+    ]
+    config = copy.deepcopy(config)
+    module_name = config.pop('name')
+    if global_config is not None:
+        config.update(global_config)
+    assert module_name in support_dict, Exception(
+        'post process only support {}, but got {}'.format(support_dict, module_name))
+    module_class = eval(module_name)(**config)
+    return module_class

magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py ADDED Viewed

@@ -0,0 +1,20 @@
+import torch
+class ClsPostProcess(object):
+    """ Convert between text-label and text-index """
+    def __init__(self, label_list, **kwargs):
+        super(ClsPostProcess, self).__init__()
+        self.label_list = label_list
+    def __call__(self, preds, label=None, *args, **kwargs):
+        if isinstance(preds, torch.Tensor):
+            preds = preds.cpu().numpy()
+        pred_idxs = preds.argmax(axis=1)
+        decode_out = [(self.label_list[idx], preds[i, idx])
+                      for i, idx in enumerate(pred_idxs)]
+        if label is None:
+            return decode_out
+        label = [(self.label_list[idx], 1.0) for idx in label]
+        return decode_out, label

magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py ADDED Viewed

@@ -0,0 +1,179 @@
+"""
+This code is refered from:
+https://github.com/WenmuZhou/DBNet.pytorch/blob/master/post_processing/seg_detector_representer.py
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import cv2
+import torch
+from shapely.geometry import Polygon
+import pyclipper
+class DBPostProcess(object):
+    """
+    The post process for Differentiable Binarization (DB).
+    """
+    def __init__(self,
+                 thresh=0.3,
+                 box_thresh=0.7,
+                 max_candidates=1000,
+                 unclip_ratio=2.0,
+                 use_dilation=False,
+                 score_mode="fast",
+                 **kwargs):
+        self.thresh = thresh
+        self.box_thresh = box_thresh
+        self.max_candidates = max_candidates
+        self.unclip_ratio = unclip_ratio
+        self.min_size = 3
+        self.score_mode = score_mode
+        assert score_mode in [
+            "slow", "fast"
+        ], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
+        self.dilation_kernel = None if not use_dilation else np.array(
+            [[1, 1], [1, 1]])
+    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
+        '''
+        _bitmap: single map with shape (1, H, W),
+                whose values are binarized as {0, 1}
+        '''
+        bitmap = _bitmap
+        height, width = bitmap.shape
+        outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
+                                cv2.CHAIN_APPROX_SIMPLE)
+        if len(outs) == 3:
+            img, contours, _ = outs[0], outs[1], outs[2]
+        elif len(outs) == 2:
+            contours, _ = outs[0], outs[1]
+        num_contours = min(len(contours), self.max_candidates)
+        boxes = []
+        scores = []
+        for index in range(num_contours):
+            contour = contours[index]
+            points, sside = self.get_mini_boxes(contour)
+            if sside < self.min_size:
+                continue
+            points = np.array(points)
+            if self.score_mode == "fast":
+                score = self.box_score_fast(pred, points.reshape(-1, 2))
+            else:
+                score = self.box_score_slow(pred, contour)
+            if self.box_thresh > score:
+                continue
+            box = self.unclip(points).reshape(-1, 1, 2)
+            box, sside = self.get_mini_boxes(box)
+            if sside < self.min_size + 2:
+                continue
+            box = np.array(box)
+            box[:, 0] = np.clip(
+                np.round(box[:, 0] / width * dest_width), 0, dest_width)
+            box[:, 1] = np.clip(
+                np.round(box[:, 1] / height * dest_height), 0, dest_height)
+            boxes.append(box.astype(np.int16))
+            scores.append(score)
+        return np.array(boxes, dtype=np.int16), scores
+    def unclip(self, box):
+        unclip_ratio = self.unclip_ratio
+        poly = Polygon(box)
+        distance = poly.area * unclip_ratio / poly.length
+        offset = pyclipper.PyclipperOffset()
+        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+        expanded = np.array(offset.Execute(distance))
+        return expanded
+    def get_mini_boxes(self, contour):
+        bounding_box = cv2.minAreaRect(contour)
+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+        if points[1][1] > points[0][1]:
+            index_1 = 0
+            index_4 = 1
+        else:
+            index_1 = 1
+            index_4 = 0
+        if points[3][1] > points[2][1]:
+            index_2 = 2
+            index_3 = 3
+        else:
+            index_2 = 3
+            index_3 = 2
+        box = [
+            points[index_1], points[index_2], points[index_3], points[index_4]
+        ]
+        return box, min(bounding_box[1])
+    def box_score_fast(self, bitmap, _box):
+        '''
+        box_score_fast: use bbox mean score as the mean score
+        '''
+        h, w = bitmap.shape[:2]
+        box = _box.copy()
+        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int64), 0, w - 1)
+        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int64), 0, w - 1)
+        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int64), 0, h - 1)
+        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int64), 0, h - 1)
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+        box[:, 0] = box[:, 0] - xmin
+        box[:, 1] = box[:, 1] - ymin
+        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
+    def box_score_slow(self, bitmap, contour):
+        '''
+        box_score_slow: use polyon mean score as the mean score
+        '''
+        h, w = bitmap.shape[:2]
+        contour = contour.copy()
+        contour = np.reshape(contour, (-1, 2))
+        xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
+        xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
+        ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
+        ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+        contour[:, 0] = contour[:, 0] - xmin
+        contour[:, 1] = contour[:, 1] - ymin
+        cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
+    def __call__(self, outs_dict, shape_list):
+        pred = outs_dict['maps']
+        if isinstance(pred, torch.Tensor):
+            pred = pred.cpu().numpy()
+        pred = pred[:, 0, :, :]
+        segmentation = pred > self.thresh
+        boxes_batch = []
+        for batch_index in range(pred.shape[0]):
+            src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
+            if self.dilation_kernel is not None:
+                mask = cv2.dilate(
+                    np.array(segmentation[batch_index]).astype(np.uint8),
+                    self.dilation_kernel)
+            else:
+                mask = segmentation[batch_index]
+            boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
+                                                   src_w, src_h)
+            boxes_batch.append({'points': boxes})
+        return boxes_batch

magic-pdf 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl

magic-pdf 1.2.2py3-none-any.whl → 1.3.1py3-none-any.whl