PyPI - magic-pdf - Versions diffs - 0.5.13__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

magic-pdf 0.5.13py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py ADDED Viewed

@@ -0,0 +1,32 @@
+# coding=utf-8
+# Copyright 2018 The Open AI Team Authors and The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization classes for LayoutLMv3, refer to RoBERTa."""
+from transformers.models.roberta import RobertaTokenizer
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+VOCAB_FILES_NAMES = {
+    "vocab_file": "vocab.json",
+    "merges_file": "merges.txt",
+}
+class LayoutLMv3Tokenizer(RobertaTokenizer):
+    vocab_files_names = VOCAB_FILES_NAMES
+    # pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
+    # max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
+    model_input_names = ["input_ids", "attention_mask"]

magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py ADDED Viewed

@@ -0,0 +1,34 @@
+# coding=utf-8
+# Copyright 2018 The Open AI Team Authors and The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Fast Tokenization classes for LayoutLMv3, refer to RoBERTa."""
+from transformers.models.roberta.tokenization_roberta_fast import RobertaTokenizerFast
+from transformers.utils import logging
+from .tokenization_layoutlmv3 import LayoutLMv3Tokenizer
+logger = logging.get_logger(__name__)
+VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "tokenizer_file": "tokenizer.json"}
+class LayoutLMv3TokenizerFast(RobertaTokenizerFast):
+    vocab_files_names = VOCAB_FILES_NAMES
+    # pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
+    # max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
+    model_input_names = ["input_ids", "attention_mask"]
+    slow_tokenizer_class = LayoutLMv3Tokenizer

magic_pdf/model/pek_sub_modules/layoutlmv3/model_init.py ADDED Viewed

@@ -0,0 +1,150 @@
+from .visualizer import Visualizer
+from .rcnn_vl import *
+from .backbone import *
+from detectron2.config import get_cfg
+from detectron2.config import CfgNode as CN
+from detectron2.data import MetadataCatalog, DatasetCatalog
+from detectron2.data.datasets import register_coco_instances
+from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch, DefaultPredictor
+def add_vit_config(cfg):
+    """
+    Add config for VIT.
+    """
+    _C = cfg
+    _C.MODEL.VIT = CN()
+    # CoaT model name.
+    _C.MODEL.VIT.NAME = ""
+    # Output features from CoaT backbone.
+    _C.MODEL.VIT.OUT_FEATURES = ["layer3", "layer5", "layer7", "layer11"]
+    _C.MODEL.VIT.IMG_SIZE = [224, 224]
+    _C.MODEL.VIT.POS_TYPE = "shared_rel"
+    _C.MODEL.VIT.DROP_PATH = 0.
+    _C.MODEL.VIT.MODEL_KWARGS = "{}"
+    _C.SOLVER.OPTIMIZER = "ADAMW"
+    _C.SOLVER.BACKBONE_MULTIPLIER = 1.0
+    _C.AUG = CN()
+    _C.AUG.DETR = False
+    _C.MODEL.IMAGE_ONLY = True
+    _C.PUBLAYNET_DATA_DIR_TRAIN = ""
+    _C.PUBLAYNET_DATA_DIR_TEST = ""
+    _C.FOOTNOTE_DATA_DIR_TRAIN = ""
+    _C.FOOTNOTE_DATA_DIR_VAL = ""
+    _C.SCIHUB_DATA_DIR_TRAIN = ""
+    _C.SCIHUB_DATA_DIR_TEST = ""
+    _C.JIAOCAI_DATA_DIR_TRAIN = ""
+    _C.JIAOCAI_DATA_DIR_TEST = ""
+    _C.ICDAR_DATA_DIR_TRAIN = ""
+    _C.ICDAR_DATA_DIR_TEST = ""
+    _C.M6DOC_DATA_DIR_TEST = ""
+    _C.DOCSTRUCTBENCH_DATA_DIR_TEST = ""
+    _C.DOCSTRUCTBENCHv2_DATA_DIR_TEST = ""
+    _C.CACHE_DIR = ""
+    _C.MODEL.CONFIG_PATH = ""
+    # effective update steps would be MAX_ITER/GRADIENT_ACCUMULATION_STEPS
+    # maybe need to set MAX_ITER *= GRADIENT_ACCUMULATION_STEPS
+    _C.SOLVER.GRADIENT_ACCUMULATION_STEPS = 1
+def setup(args, device):
+    """
+    Create configs and perform basic setups.
+    """
+    cfg = get_cfg()
+    # add_coat_config(cfg)
+    add_vit_config(cfg)
+    cfg.merge_from_file(args.config_file)
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2  # set threshold for this model
+    cfg.merge_from_list(args.opts)
+    # 使用统一的device配置
+    cfg.MODEL.DEVICE = device
+    cfg.freeze()
+    default_setup(cfg, args)
+    register_coco_instances(
+        "scihub_train",
+        {},
+        cfg.SCIHUB_DATA_DIR_TRAIN + ".json",
+        cfg.SCIHUB_DATA_DIR_TRAIN
+    )
+    return cfg
+class DotDict(dict):
+    def __init__(self, *args, **kwargs):
+        super(DotDict, self).__init__(*args, **kwargs)
+    def __getattr__(self, key):
+        if key not in self.keys():
+            return None
+        value = self[key]
+        if isinstance(value, dict):
+            value = DotDict(value)
+        return value
+    def __setattr__(self, key, value):
+        self[key] = value
+class Layoutlmv3_Predictor(object):
+    def __init__(self, weights, config_file, device):
+        layout_args = {
+            "config_file": config_file,
+            "resume": False,
+            "eval_only": False,
+            "num_gpus": 1,
+            "num_machines": 1,
+            "machine_rank": 0,
+            "dist_url": "tcp://127.0.0.1:57823",
+            "opts": ["MODEL.WEIGHTS", weights],
+        }
+        layout_args = DotDict(layout_args)
+        cfg = setup(layout_args, device)
+        self.mapping = ["title", "plain text", "abandon", "figure", "figure_caption", "table", "table_caption",
+                        "table_footnote", "isolate_formula", "formula_caption"]
+        MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes = self.mapping
+        self.predictor = DefaultPredictor(cfg)
+    def __call__(self, image, ignore_catids=[]):
+        # page_layout_result = {
+        #     "layout_dets": []
+        # }
+        layout_dets = []
+        outputs = self.predictor(image)
+        boxes = outputs["instances"].to("cpu")._fields["pred_boxes"].tensor.tolist()
+        labels = outputs["instances"].to("cpu")._fields["pred_classes"].tolist()
+        scores = outputs["instances"].to("cpu")._fields["scores"].tolist()
+        for bbox_idx in range(len(boxes)):
+            if labels[bbox_idx] in ignore_catids:
+                continue
+            layout_dets.append({
+                "category_id": labels[bbox_idx],
+                "poly": [
+                    boxes[bbox_idx][0], boxes[bbox_idx][1],
+                    boxes[bbox_idx][2], boxes[bbox_idx][1],
+                    boxes[bbox_idx][2], boxes[bbox_idx][3],
+                    boxes[bbox_idx][0], boxes[bbox_idx][3],
+                ],
+                "score": scores[bbox_idx]
+            })
+        return layout_dets

magic_pdf/model/pek_sub_modules/layoutlmv3/rcnn_vl.py ADDED Viewed

@@ -0,0 +1,163 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import numpy as np
+from typing import Dict, List, Optional, Tuple
+import torch
+from torch import nn
+from detectron2.config import configurable
+from detectron2.structures import ImageList, Instances
+from detectron2.utils.events import get_event_storage
+from detectron2.modeling.backbone import Backbone, build_backbone
+from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
+from detectron2.modeling.meta_arch import GeneralizedRCNN
+from detectron2.modeling.postprocessing import detector_postprocess
+from detectron2.modeling.roi_heads.fast_rcnn import fast_rcnn_inference_single_image
+from contextlib import contextmanager
+from itertools import count
+@META_ARCH_REGISTRY.register()
+class VLGeneralizedRCNN(GeneralizedRCNN):
+    """
+    Generalized R-CNN. Any models that contains the following three components:
+    1. Per-image feature extraction (aka backbone)
+    2. Region proposal generation
+    3. Per-region feature extraction and prediction
+    """
+    def forward(self, batched_inputs: List[Dict[str, torch.Tensor]]):
+        """
+        Args:
+            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
+                Each item in the list contains the inputs for one image.
+                For now, each item in the list is a dict that contains:
+                * image: Tensor, image in (C, H, W) format.
+                * instances (optional): groundtruth :class:`Instances`
+                * proposals (optional): :class:`Instances`, precomputed proposals.
+                Other information that's included in the original dicts, such as:
+                * "height", "width" (int): the output resolution of the model, used in inference.
+                  See :meth:`postprocess` for details.
+        Returns:
+            list[dict]:
+                Each dict is the output for one input image.
+                The dict contains one key "instances" whose value is a :class:`Instances`.
+                The :class:`Instances` object has the following keys:
+                "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
+        """
+        if not self.training:
+            return self.inference(batched_inputs)
+        images = self.preprocess_image(batched_inputs)
+        if "instances" in batched_inputs[0]:
+            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+        else:
+            gt_instances = None
+        # features = self.backbone(images.tensor)
+        input = self.get_batch(batched_inputs, images)
+        features = self.backbone(input)
+        if self.proposal_generator is not None:
+            proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
+        else:
+            assert "proposals" in batched_inputs[0]
+            proposals = [x["proposals"].to(self.device) for x in batched_inputs]
+            proposal_losses = {}
+        _, detector_losses = self.roi_heads(images, features, proposals, gt_instances)
+        if self.vis_period > 0:
+            storage = get_event_storage()
+            if storage.iter % self.vis_period == 0:
+                self.visualize_training(batched_inputs, proposals)
+        losses = {}
+        losses.update(detector_losses)
+        losses.update(proposal_losses)
+        return losses
+    def inference(
+        self,
+        batched_inputs: List[Dict[str, torch.Tensor]],
+        detected_instances: Optional[List[Instances]] = None,
+        do_postprocess: bool = True,
+    ):
+        """
+        Run inference on the given inputs.
+        Args:
+            batched_inputs (list[dict]): same as in :meth:`forward`
+            detected_instances (None or list[Instances]): if not None, it
+                contains an `Instances` object per image. The `Instances`
+                object contains "pred_boxes" and "pred_classes" which are
+                known boxes in the image.
+                The inference will then skip the detection of bounding boxes,
+                and only predict other per-ROI outputs.
+            do_postprocess (bool): whether to apply post-processing on the outputs.
+        Returns:
+            When do_postprocess=True, same as in :meth:`forward`.
+            Otherwise, a list[Instances] containing raw network outputs.
+        """
+        assert not self.training
+        images = self.preprocess_image(batched_inputs)
+        # features = self.backbone(images.tensor)
+        input = self.get_batch(batched_inputs, images)
+        features = self.backbone(input)
+        if detected_instances is None:
+            if self.proposal_generator is not None:
+                proposals, _ = self.proposal_generator(images, features, None)
+            else:
+                assert "proposals" in batched_inputs[0]
+                proposals = [x["proposals"].to(self.device) for x in batched_inputs]
+            results, _ = self.roi_heads(images, features, proposals, None)
+        else:
+            detected_instances = [x.to(self.device) for x in detected_instances]
+            results = self.roi_heads.forward_with_given_boxes(features, detected_instances)
+        if do_postprocess:
+            assert not torch.jit.is_scripting(), "Scripting is not supported for postprocess."
+            return GeneralizedRCNN._postprocess(results, batched_inputs, images.image_sizes)
+        else:
+            return results
+    def get_batch(self, examples, images):
+        if len(examples) >= 1 and "bbox" not in examples[0]:  # image_only
+            return {"images": images.tensor}
+        return input
+    def _batch_inference(self, batched_inputs, detected_instances=None):
+        """
+        Execute inference on a list of inputs,
+        using batch size = self.batch_size (e.g., 2), instead of the length of the list.
+        Inputs & outputs have the same format as :meth:`GeneralizedRCNN.inference`
+        """
+        if detected_instances is None:
+            detected_instances = [None] * len(batched_inputs)
+        outputs = []
+        inputs, instances = [], []
+        for idx, input, instance in zip(count(), batched_inputs, detected_instances):
+            inputs.append(input)
+            instances.append(instance)
+            if len(inputs) == 2 or idx == len(batched_inputs) - 1:
+                outputs.extend(
+                    self.inference(
+                        inputs,
+                        instances if instances[0] is not None else None,
+                        do_postprocess=True,  # False
+                    )
+                )
+                inputs, instances = [], []
+        return outputs

magic-pdf 0.5.13__py3-none-any.whl → 0.6.0__py3-none-any.whl

magic-pdf 0.5.13py3-none-any.whl → 0.6.0py3-none-any.whl