PyPI - magic-pdf - Versions diffs - 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

magic-pdf 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py CHANGED Viewed

@@ -1,14 +1,6 @@
-import argparse
-import os
-import re
 import torch
-import unimernet.tasks as tasks
-from PIL import Image
 from torch.utils.data import DataLoader, Dataset
-from torchvision import transforms
-from unimernet.common.config import Config
-from unimernet.processors import load_processor
+from tqdm import tqdm
 class MathDataset(Dataset):
@@ -20,55 +12,24 @@ class MathDataset(Dataset):
         return len(self.image_paths)
     def __getitem__(self, idx):
-        # if not pil image, then convert to pil image
-        if isinstance(self.image_paths[idx], str):
-            raw_image = Image.open(self.image_paths[idx])
-        else:
-            raw_image = self.image_paths[idx]
+        raw_image = self.image_paths[idx]
         if self.transform:
             image = self.transform(raw_image)
             return image
-def latex_rm_whitespace(s: str):
-    """Remove unnecessary whitespace from LaTeX code."""
-    text_reg = r"(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})"
-    letter = "[a-zA-Z]"
-    noletter = "[\W_^\d]"
-    names = [x[0].replace(" ", "") for x in re.findall(text_reg, s)]
-    s = re.sub(text_reg, lambda match: str(names.pop(0)), s)
-    news = s
-    while True:
-        s = news
-        news = re.sub(r"(?!\\ )(%s)\s+?(%s)" % (noletter, noletter), r"\1\2", s)
-        news = re.sub(r"(?!\\ )(%s)\s+?(%s)" % (noletter, letter), r"\1\2", news)
-        news = re.sub(r"(%s)\s+?(%s)" % (letter, noletter), r"\1\2", news)
-        if news == s:
-            break
-    return s
 class UnimernetModel(object):
     def __init__(self, weight_dir, cfg_path, _device_="cpu"):
-        args = argparse.Namespace(cfg_path=cfg_path, options=None)
-        cfg = Config(args)
-        cfg.config.model.pretrained = os.path.join(weight_dir, "pytorch_model.pth")
-        cfg.config.model.model_config.model_name = weight_dir
-        cfg.config.model.tokenizer_config.path = weight_dir
-        task = tasks.setup_task(cfg)
-        self.model = task.build_model(cfg)
+        from .unimernet_hf import UnimernetModel
+        if _device_.startswith("mps"):
+            self.model = UnimernetModel.from_pretrained(weight_dir, attn_implementation="eager")
+        else:
+            self.model = UnimernetModel.from_pretrained(weight_dir)
         self.device = _device_
         self.model.to(_device_)
+        if not _device_.startswith("cpu"):
+            self.model = self.model.to(dtype=torch.float16)
         self.model.eval()
-        vis_processor = load_processor(
-            "formula_image_eval",
-            cfg.config.datasets.formula_rec_eval.vis_processor.eval,
-        )
-        self.mfr_transform = transforms.Compose(
-            [
-                vis_processor,
-            ]
-        )
     def predict(self, mfd_res, image):
         formula_list = []
@@ -84,62 +45,22 @@ class UnimernetModel(object):
                 "latex": "",
             }
             formula_list.append(new_item)
-            pil_img = Image.fromarray(image)
-            bbox_img = pil_img.crop((xmin, ymin, xmax, ymax))
+            bbox_img = image[ymin:ymax, xmin:xmax]
             mf_image_list.append(bbox_img)
-        dataset = MathDataset(mf_image_list, transform=self.mfr_transform)
+        dataset = MathDataset(mf_image_list, transform=self.model.transform)
         dataloader = DataLoader(dataset, batch_size=32, num_workers=0)
         mfr_res = []
         for mf_img in dataloader:
+            mf_img = mf_img.to(dtype=self.model.dtype)
             mf_img = mf_img.to(self.device)
             with torch.no_grad():
                 output = self.model.generate({"image": mf_img})
-            mfr_res.extend(output["pred_str"])
+            mfr_res.extend(output["fixed_str"])
         for res, latex in zip(formula_list, mfr_res):
-            res["latex"] = latex_rm_whitespace(latex)
+            res["latex"] = latex
         return formula_list
-    # def batch_predict(
-    #     self, images_mfd_res: list, images: list, batch_size: int = 64
-    # ) -> list:
-    #     images_formula_list = []
-    #     mf_image_list = []
-    #     backfill_list = []
-    #     for image_index in range(len(images_mfd_res)):
-    #         mfd_res = images_mfd_res[image_index]
-    #         pil_img = Image.fromarray(images[image_index])
-    #         formula_list = []
-    #
-    #         for xyxy, conf, cla in zip(
-    #             mfd_res.boxes.xyxy, mfd_res.boxes.conf, mfd_res.boxes.cls
-    #         ):
-    #             xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
-    #             new_item = {
-    #                 "category_id": 13 + int(cla.item()),
-    #                 "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
-    #                 "score": round(float(conf.item()), 2),
-    #                 "latex": "",
-    #             }
-    #             formula_list.append(new_item)
-    #             bbox_img = pil_img.crop((xmin, ymin, xmax, ymax))
-    #             mf_image_list.append(bbox_img)
-    #
-    #         images_formula_list.append(formula_list)
-    #         backfill_list += formula_list
-    #
-    #     dataset = MathDataset(mf_image_list, transform=self.mfr_transform)
-    #     dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=0)
-    #     mfr_res = []
-    #     for mf_img in dataloader:
-    #         mf_img = mf_img.to(self.device)
-    #         with torch.no_grad():
-    #             output = self.model.generate({"image": mf_img})
-    #         mfr_res.extend(output["pred_str"])
-    #     for res, latex in zip(backfill_list, mfr_res):
-    #         res["latex"] = latex_rm_whitespace(latex)
-    #     return images_formula_list
     def batch_predict(self, images_mfd_res: list, images: list, batch_size: int = 64) -> list:
         images_formula_list = []
         mf_image_list = []
@@ -149,7 +70,7 @@ class UnimernetModel(object):
         # Collect images with their original indices
         for image_index in range(len(images_mfd_res)):
             mfd_res = images_mfd_res[image_index]
-            pil_img = Image.fromarray(images[image_index])
+            np_array_image = images[image_index]
             formula_list = []
             for idx, (xyxy, conf, cla) in enumerate(zip(
@@ -163,7 +84,7 @@ class UnimernetModel(object):
                     "latex": "",
                 }
                 formula_list.append(new_item)
-                bbox_img = pil_img.crop((xmin, ymin, xmax, ymax))
+                bbox_img = np_array_image[ymin:ymax, xmin:xmax]
                 area = (xmax - xmin) * (ymax - ymin)
                 curr_idx = len(mf_image_list)
@@ -182,22 +103,30 @@ class UnimernetModel(object):
         index_mapping = {new_idx: old_idx for new_idx, old_idx in enumerate(sorted_indices)}
         # Create dataset with sorted images
-        dataset = MathDataset(sorted_images, transform=self.mfr_transform)
+        dataset = MathDataset(sorted_images, transform=self.model.transform)
         dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=0)
         # Process batches and store results
         mfr_res = []
-        for mf_img in dataloader:
-            mf_img = mf_img.to(self.device)
-            with torch.no_grad():
-                output = self.model.generate({"image": mf_img})
-            mfr_res.extend(output["pred_str"])
+        # for mf_img in dataloader:
+        with tqdm(total=len(sorted_images), desc="MFR Predict") as pbar:
+            for index, mf_img in enumerate(dataloader):
+                mf_img = mf_img.to(dtype=self.model.dtype)
+                mf_img = mf_img.to(self.device)
+                with torch.no_grad():
+                    output = self.model.generate({"image": mf_img})
+                mfr_res.extend(output["fixed_str"])
+                # 更新进度条，每次增加batch_size，但要注意最后一个batch可能不足batch_size
+                current_batch_size = min(batch_size, len(sorted_images) - index * batch_size)
+                pbar.update(current_batch_size)
         # Restore original order
         unsorted_results = [""] * len(mfr_res)
         for new_idx, latex in enumerate(mfr_res):
             original_idx = index_mapping[new_idx]
-            unsorted_results[original_idx] = latex_rm_whitespace(latex)
+            unsorted_results[original_idx] = latex
         # Fill results back
         for res, latex in zip(backfill_list, unsorted_results):

magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from .unimer_swin import UnimerSwinConfig, UnimerSwinModel, UnimerSwinImageProcessor
+from .unimer_mbart import UnimerMBartConfig, UnimerMBartModel, UnimerMBartForCausalLM
+from .modeling_unimernet import UnimernetModel
+__all__ = [
+    "UnimerSwinConfig",
+    "UnimerSwinModel",
+    "UnimerSwinImageProcessor",
+    "UnimerMBartConfig",
+    "UnimerMBartModel",
+    "UnimerMBartForCausalLM",
+    "UnimernetModel",
+]

magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py ADDED Viewed

@@ -0,0 +1,189 @@
+import os
+import re
+import warnings
+from typing import Optional
+import torch
+from ftfy import fix_text
+from transformers import AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer, PretrainedConfig, PreTrainedModel
+from transformers import VisionEncoderDecoderConfig, VisionEncoderDecoderModel
+from transformers.models.vision_encoder_decoder.modeling_vision_encoder_decoder import logger as base_model_logger
+from .unimer_swin import UnimerSwinConfig, UnimerSwinModel, UnimerSwinImageProcessor
+from .unimer_mbart import UnimerMBartConfig, UnimerMBartForCausalLM
+AutoConfig.register(UnimerSwinConfig.model_type, UnimerSwinConfig)
+AutoConfig.register(UnimerMBartConfig.model_type, UnimerMBartConfig)
+AutoModel.register(UnimerSwinConfig, UnimerSwinModel)
+AutoModelForCausalLM.register(UnimerMBartConfig, UnimerMBartForCausalLM)
+# TODO: rewrite tokenizer
+class TokenizerWrapper:
+    def __init__(self, tokenizer):
+        self.tokenizer = tokenizer
+        self.pad_token_id = self.tokenizer.pad_token_id
+        self.bos_token_id = self.tokenizer.bos_token_id
+        self.eos_token_id = self.tokenizer.eos_token_id
+    def __len__(self):
+        return len(self.tokenizer)
+    def tokenize(self, text, **kwargs):
+        return self.tokenizer(
+            text,
+            return_token_type_ids=False,
+            return_tensors="pt",
+            padding="longest",
+            truncation=True,
+            **kwargs,
+        )
+    def token2str(self, tokens) -> list:
+        generated_text = self.tokenizer.batch_decode(tokens, skip_special_tokens=True)
+        generated_text = [fix_text(text) for text in generated_text]
+        return generated_text
+    def detokenize(self, tokens):
+        toks = [self.tokenizer.convert_ids_to_tokens(tok) for tok in tokens]
+        for b in range(len(toks)):
+            for i in reversed(range(len(toks[b]))):
+                if toks[b][i] is None:
+                    toks[b][i] = ''
+                toks[b][i] = toks[b][i].replace('Ġ', ' ').strip()
+                if toks[b][i] in ([self.tokenizer.bos_token, self.tokenizer.eos_token, self.tokenizer.pad_token]):
+                    del toks[b][i]
+        return toks
+def latex_rm_whitespace(s: str):
+    """Remove unnecessary whitespace from LaTeX code.
+    """
+    text_reg = r'(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})'
+    letter = r'[a-zA-Z]'
+    noletter = r'[\W_^\d]'
+    names = [x[0].replace(' ', '') for x in re.findall(text_reg, s)]
+    s = re.sub(text_reg, lambda _: str(names.pop(0)), s)
+    news = s
+    while True:
+        s = news
+        news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, noletter), r'\1\2', s)
+        news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, letter), r'\1\2', news)
+        news = re.sub(r'(%s)\s+?(%s)' % (letter, noletter), r'\1\2', news)
+        if news == s:
+            break
+    return s
+class UnimernetModel(VisionEncoderDecoderModel):
+    def __init__(
+        self,
+        config: Optional[PretrainedConfig] = None,
+        encoder: Optional[PreTrainedModel] = None,
+        decoder: Optional[PreTrainedModel] = None,
+    ):
+        # VisionEncoderDecoderModel's checking log has bug, disable for temp.
+        base_model_logger.disabled = True
+        try:
+            super().__init__(config, encoder, decoder)
+        finally:
+            base_model_logger.disabled = False
+        if not config or not hasattr(config, "_name_or_path"):
+            raise RuntimeError("config._name_or_path is required by UnimernetModel.")
+        model_path = config._name_or_path
+        self.transform = UnimerSwinImageProcessor()
+        self.tokenizer = TokenizerWrapper(AutoTokenizer.from_pretrained(model_path))
+        self._post_check()
+    def _post_check(self):
+        tokenizer = self.tokenizer
+        if tokenizer.tokenizer.model_max_length != self.config.decoder.max_position_embeddings:
+            warnings.warn(
+                f"decoder.max_position_embeddings={self.config.decoder.max_position_embeddings}," +
+                f" but tokenizer.model_max_length={tokenizer.tokenizer.model_max_length}, will set" +
+                f" tokenizer.model_max_length to {self.config.decoder.max_position_embeddings}.")
+            tokenizer.tokenizer.model_max_length = self.config.decoder.max_position_embeddings
+        assert self.config.decoder.vocab_size == len(tokenizer)
+        assert self.config.decoder_start_token_id == tokenizer.bos_token_id
+        assert self.config.pad_token_id == tokenizer.pad_token_id
+    @classmethod
+    def from_checkpoint(cls, model_path: str, model_filename: str = "pytorch_model.pth", state_dict_strip_prefix="model.model."):
+        config = VisionEncoderDecoderConfig.from_pretrained(model_path)
+        config._name_or_path = model_path
+        config.encoder = UnimerSwinConfig(**vars(config.encoder))
+        config.decoder = UnimerMBartConfig(**vars(config.decoder))
+        encoder = UnimerSwinModel(config.encoder)
+        decoder = UnimerMBartForCausalLM(config.decoder)
+        model = cls(config, encoder, decoder)
+        # load model weights
+        model_file_path = os.path.join(model_path, model_filename)
+        checkpoint = torch.load(model_file_path, map_location="cpu", weights_only=True)
+        state_dict = checkpoint["model"] if "model" in checkpoint else checkpoint
+        if not state_dict:
+            raise RuntimeError("state_dict is empty.")
+        if state_dict_strip_prefix:
+            state_dict = {
+                k[len(state_dict_strip_prefix):] if k.startswith(state_dict_strip_prefix) else k: v
+                for k, v in state_dict.items()
+            }
+        missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
+        if len(unexpected_keys) > 0:
+            warnings.warn("Unexpected key(s) in state_dict: {}.".format(", ".join(f'"{k}"' for k in unexpected_keys)))
+        if len(missing_keys) > 0:
+            raise RuntimeError("Missing key(s) in state_dict: {}.".format(", ".join(f'"{k}"' for k in missing_keys)))
+        return model
+    def forward_bak(self, samples):
+        pixel_values, text = samples["image"], samples["text_input"]
+        text_inputs = self.tokenizer.tokenize(text).to(pixel_values.device)
+        decoder_input_ids, decoder_attention_mask = text_inputs["input_ids"], text_inputs["attention_mask"]
+        num_channels = pixel_values.shape[1]
+        if num_channels == 1:
+            pixel_values = pixel_values.repeat(1, 3, 1, 1)
+        labels = decoder_input_ids * 1
+        labels = labels.masked_fill(labels == self.tokenizer.pad_token_id, -100)
+        loss = self.model(
+            pixel_values=pixel_values,
+            decoder_input_ids=decoder_input_ids[:, :-1],
+            decoder_attention_mask=decoder_attention_mask[:, :-1],
+            labels=labels[:, 1:],
+        ).loss
+        return {"loss": loss}
+    def generate(self, samples, do_sample: bool = False, temperature: float = 0.2, top_p: float = 0.95):
+        pixel_values = samples["image"]
+        num_channels = pixel_values.shape[1]
+        if num_channels == 1:
+            pixel_values = pixel_values.repeat(1, 3, 1, 1)
+        kwargs = {}
+        if do_sample:
+            kwargs["temperature"] = temperature
+            kwargs["top_p"] = top_p
+        outputs = super().generate(
+            pixel_values=pixel_values,
+            max_new_tokens=self.tokenizer.tokenizer.model_max_length, # required
+            decoder_start_token_id=self.tokenizer.tokenizer.bos_token_id,
+            do_sample=do_sample,
+            **kwargs,
+        )
+        outputs = outputs[:, 1:].cpu().numpy()
+        pred_tokens = self.tokenizer.detokenize(outputs)
+        pred_str = self.tokenizer.token2str(outputs)
+        fixed_str = [latex_rm_whitespace(s) for s in pred_str]
+        return {"pred_ids": outputs, "pred_tokens": pred_tokens, "pred_str": pred_str, "fixed_str": fixed_str}

magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+from .configuration_unimer_mbart import UnimerMBartConfig
+from .modeling_unimer_mbart import UnimerMBartModel, UnimerMBartForCausalLM
+__all__ = [
+    "UnimerMBartConfig",
+    "UnimerMBartModel",
+    "UnimerMBartForCausalLM",
+]

magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py ADDED Viewed

@@ -0,0 +1,163 @@
+# coding=utf-8
+# Copyright 2021, The Facebook AI Research Team and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""UnimerMBART model configuration"""
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+class UnimerMBartConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`MBartModel`]. It is used to instantiate an MBART
+    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
+    defaults will yield a similar configuration to that of the MBART
+    [facebook/mbart-large-cc25](https://huggingface.co/facebook/mbart-large-cc25) architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vocab_size (`int`, *optional*, defaults to 50265):
+            Vocabulary size of the MBART model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`MBartModel`] or [`TFMBartModel`].
+        d_model (`int`, *optional*, defaults to 1024):
+            Dimensionality of the layers and the pooler layer.
+        qk_squeeze (`int`, *optional*, defaults to 2):
+            Squeeze ratio for query/key's output dimension. See the [UniMERNet paper](https://arxiv.org/abs/2404.15254).
+            Squeeze Attention maps the query and key to a lower-dimensional space without excessive loss of information,
+            thereby accelerating the computation of attention.
+        encoder_layers (`int`, *optional*, defaults to 12):
+            Number of encoder layers.
+        decoder_layers (`int`, *optional*, defaults to 12):
+            Number of decoder layers.
+        encoder_attention_heads (`int`, *optional*, defaults to 16):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        decoder_attention_heads (`int`, *optional*, defaults to 16):
+            Number of attention heads for each attention layer in the Transformer decoder.
+        decoder_ffn_dim (`int`, *optional*, defaults to 4096):
+            Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
+        encoder_ffn_dim (`int`, *optional*, defaults to 4096):
+            Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
+        activation_function (`str` or `function`, *optional*, defaults to `"gelu"`):
+            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+            `"relu"`, `"silu"` and `"gelu_new"` are supported.
+        dropout (`float`, *optional*, defaults to 0.1):
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+        activation_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for activations inside the fully connected layer.
+        classifier_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for classifier.
+        max_position_embeddings (`int`, *optional*, defaults to 1024):
+            The maximum sequence length that this model might ever be used with. Typically set this to something large
+            just in case (e.g., 512 or 1024 or 2048).
+        init_std (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        encoder_layerdrop (`float`, *optional*, defaults to 0.0):
+            The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
+            for more details.
+        decoder_layerdrop (`float`, *optional*, defaults to 0.0):
+            The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
+            for more details.
+        scale_embedding (`bool`, *optional*, defaults to `False`):
+            Scale embeddings by diving by sqrt(d_model).
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models)
+        forced_eos_token_id (`int`, *optional*, defaults to 2):
+            The id of the token to force as the last generated token when `max_length` is reached. Usually set to
+            `eos_token_id`.
+    Example:
+    ```python
+    >>> from transformers import MBartConfig, MBartModel
+    >>> # Initializing a MBART facebook/mbart-large-cc25 style configuration
+    >>> configuration = MBartConfig()
+    >>> # Initializing a model (with random weights) from the facebook/mbart-large-cc25 style configuration
+    >>> model = MBartModel(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "unimer-mbart"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
+    def __init__(
+        self,
+        vocab_size=50265,
+        max_position_embeddings=1024,
+        encoder_layers=12,
+        encoder_ffn_dim=4096,
+        encoder_attention_heads=16,
+        decoder_layers=12,
+        decoder_ffn_dim=4096,
+        decoder_attention_heads=16,
+        encoder_layerdrop=0.0,
+        decoder_layerdrop=0.0,
+        use_cache=True,
+        is_encoder_decoder=True,
+        activation_function="gelu",
+        d_model=1024,
+        qk_squeeze=2,
+        dropout=0.1,
+        attention_dropout=0.0,
+        activation_dropout=0.0,
+        init_std=0.02,
+        classifier_dropout=0.0,
+        scale_embedding=False,
+        pad_token_id=1,
+        bos_token_id=0,
+        eos_token_id=2,
+        forced_eos_token_id=2,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.d_model = d_model
+        self.qk_squeeze = qk_squeeze
+        self.encoder_ffn_dim = encoder_ffn_dim
+        self.encoder_layers = encoder_layers
+        self.encoder_attention_heads = encoder_attention_heads
+        self.decoder_ffn_dim = decoder_ffn_dim
+        self.decoder_layers = decoder_layers
+        self.decoder_attention_heads = decoder_attention_heads
+        self.dropout = dropout
+        self.attention_dropout = attention_dropout
+        self.activation_dropout = activation_dropout
+        self.activation_function = activation_function
+        self.init_std = init_std
+        self.encoder_layerdrop = encoder_layerdrop
+        self.decoder_layerdrop = decoder_layerdrop
+        self.classifier_dropout = classifier_dropout
+        self.use_cache = use_cache
+        self.num_hidden_layers = encoder_layers
+        self.scale_embedding = scale_embedding  # scale factor will be sqrt(d_model) if True
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            is_encoder_decoder=is_encoder_decoder,
+            forced_eos_token_id=forced_eos_token_id,
+            **kwargs,
+        )

magic-pdf 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

magic-pdf 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl