PyPI - magic-pdf - Versions diffs - 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

magic-pdf 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

magic_pdf/pre_proc/ocr_span_list_modify.py CHANGED Viewed

@@ -41,6 +41,57 @@ def check_chars_is_overlap_in_span(chars):
     return False
+def remove_x_overlapping_chars(span, median_width):
+    """
+    Remove characters from a span that overlap significantly on the x-axis.
+    Args:
+        median_width:
+        span (dict): A span containing a list of chars, each with bbox coordinates
+                    in the format [x0, y0, x1, y1]
+    Returns:
+        dict: The span with overlapping characters removed
+    """
+    if 'chars' not in span or len(span['chars']) < 2:
+        return span
+    overlap_threshold = median_width * 0.3
+    i = 0
+    while i < len(span['chars']) - 1:
+        char1 = span['chars'][i]
+        char2 = span['chars'][i + 1]
+        # Calculate overlap width
+        x_left = max(char1['bbox'][0], char2['bbox'][0])
+        x_right = min(char1['bbox'][2], char2['bbox'][2])
+        if x_right > x_left:  # There is overlap
+            overlap_width = x_right - x_left
+            if overlap_width > overlap_threshold:
+                if char1['c'] == char2['c'] or char1['c'] == ' ' or char2['c'] == ' ':
+                    # Determine which character to remove
+                    width1 = char1['bbox'][2] - char1['bbox'][0]
+                    width2 = char2['bbox'][2] - char2['bbox'][0]
+                    if width1 < width2:
+                        # Remove the narrower character
+                        span['chars'].pop(i)
+                    else:
+                        span['chars'].pop(i + 1)
+                else:
+                    i += 1
+                # Don't increment i since we need to check the new pair
+            else:
+                i += 1
+        else:
+            i += 1
+    return span
 def remove_overlaps_min_spans(spans):
     dropped_spans = []
     #  删除重叠spans中较小的那些

magic_pdf/resources/model_config/model_configs.yaml CHANGED Viewed

@@ -2,7 +2,7 @@ weights:
   layoutlmv3: Layout/LayoutLMv3/model_final.pth
   doclayout_yolo: Layout/YOLO/doclayout_yolo_docstructbench_imgsz1280_2501.pt
   yolo_v8_mfd: MFD/YOLO/yolo_v8_ft.pt
-  unimernet_small: MFR/unimernet_small_2501
+  unimernet_small: MFR/unimernet_hf_small_2503
   struct_eqtable: TabRec/StructEqTable
   tablemaster: TabRec/TableMaster
   rapid_table: TabRec/RapidTable

magic_pdf/tools/cli.py CHANGED Viewed

@@ -1,15 +1,18 @@
 import os
 import shutil
 import tempfile
+from pathlib import Path
 import click
 import fitz
 from loguru import logger
-from pathlib import Path
 import magic_pdf.model as model_config
+from magic_pdf.data.batch_build_dataset import batch_build_dataset
 from magic_pdf.data.data_reader_writer import FileBasedDataReader
+from magic_pdf.data.dataset import Dataset
 from magic_pdf.libs.version import __version__
-from magic_pdf.tools.common import do_parse, parse_pdf_methods
+from magic_pdf.tools.common import batch_do_parse, do_parse, parse_pdf_methods
 from magic_pdf.utils.office_to_pdf import convert_file_to_pdf
 pdf_suffixes = ['.pdf']
@@ -87,37 +90,38 @@ without method specified, auto will be used by default.""",
     default=None,
 )
 def cli(path, output_dir, method, lang, debug_able, start_page_id, end_page_id):
-    model_config.__use_inside_model__ = True
-    model_config.__model_mode__ = 'full'
     os.makedirs(output_dir, exist_ok=True)
     temp_dir = tempfile.mkdtemp()
     def read_fn(path: Path):
         if path.suffix in ms_office_suffixes:
             convert_file_to_pdf(str(path), temp_dir)
-            fn = os.path.join(temp_dir, f"{path.stem}.pdf")
+            fn = os.path.join(temp_dir, f'{path.stem}.pdf')
         elif path.suffix in image_suffixes:
             with open(str(path), 'rb') as f:
                 bits = f.read()
             pdf_bytes = fitz.open(stream=bits).convert_to_pdf()
-            fn = os.path.join(temp_dir, f"{path.stem}.pdf")
+            fn = os.path.join(temp_dir, f'{path.stem}.pdf')
             with open(fn, 'wb') as f:
                 f.write(pdf_bytes)
         elif path.suffix in pdf_suffixes:
             fn = str(path)
         else:
-            raise Exception(f"Unknown file suffix: {path.suffix}")
+            raise Exception(f'Unknown file suffix: {path.suffix}')
         disk_rw = FileBasedDataReader(os.path.dirname(fn))
         return disk_rw.read(os.path.basename(fn))
-    def parse_doc(doc_path: Path):
+    def parse_doc(doc_path: Path, dataset: Dataset | None = None):
         try:
             file_name = str(Path(doc_path).stem)
-            pdf_data = read_fn(doc_path)
+            if dataset is None:
+                pdf_data_or_dataset = read_fn(doc_path)
+            else:
+                pdf_data_or_dataset = dataset
             do_parse(
                 output_dir,
                 file_name,
-                pdf_data,
+                pdf_data_or_dataset,
                 [],
                 method,
                 debug_able,
@@ -130,9 +134,23 @@ def cli(path, output_dir, method, lang, debug_able, start_page_id, end_page_id):
             logger.exception(e)
     if os.path.isdir(path):
+        doc_paths = []
         for doc_path in Path(path).glob('*'):
             if doc_path.suffix in pdf_suffixes + image_suffixes + ms_office_suffixes:
-                parse_doc(doc_path)
+                if doc_path.suffix in ms_office_suffixes:
+                    convert_file_to_pdf(str(doc_path), temp_dir)
+                    doc_path = Path(os.path.join(temp_dir, f'{doc_path.stem}.pdf'))
+                elif doc_path.suffix in image_suffixes:
+                    with open(str(doc_path), 'rb') as f:
+                        bits = f.read()
+                        pdf_bytes = fitz.open(stream=bits).convert_to_pdf()
+                    fn = os.path.join(temp_dir, f'{doc_path.stem}.pdf')
+                    with open(fn, 'wb') as f:
+                        f.write(pdf_bytes)
+                    doc_path = Path(fn)
+                doc_paths.append(doc_path)
+        datasets = batch_build_dataset(doc_paths, 4, lang)
+        batch_do_parse(output_dir, [str(doc_path.stem) for doc_path in doc_paths], datasets, method, debug_able, lang=lang)
     else:
         parse_doc(Path(path))

magic_pdf/tools/common.py CHANGED Viewed

@@ -8,10 +8,10 @@ import magic_pdf.model as model_config
 from magic_pdf.config.enums import SupportedPdfParseMethod
 from magic_pdf.config.make_content_config import DropMode, MakeMode
 from magic_pdf.data.data_reader_writer import FileBasedDataWriter
-from magic_pdf.data.dataset import PymuDocDataset
+from magic_pdf.data.dataset import Dataset, PymuDocDataset
 from magic_pdf.libs.draw_bbox import draw_char_bbox
-from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
-from magic_pdf.operators.models import InferenceResult
+from magic_pdf.model.doc_analyze_by_custom_model import (batch_doc_analyze,
+                                                         doc_analyze)
 # from io import BytesIO
 # from pypdf import PdfReader, PdfWriter
@@ -67,13 +67,13 @@ def convert_pdf_bytes_to_bytes_by_pymupdf(pdf_bytes, start_page_id=0, end_page_i
     return output_bytes
-def do_parse(
+def _do_parse(
     output_dir,
     pdf_file_name,
-    pdf_bytes,
+    pdf_bytes_or_dataset,
     model_list,
     parse_method,
-    debug_able,
+    debug_able=False,
     f_draw_span_bbox=True,
     f_draw_layout_bbox=True,
     f_dump_md=True,
@@ -92,16 +92,21 @@ def do_parse(
     formula_enable=None,
     table_enable=None,
 ):
+    from magic_pdf.operators.models import InferenceResult
     if debug_able:
         logger.warning('debug mode is on')
         f_draw_model_bbox = True
         f_draw_line_sort_bbox = True
         # f_draw_char_bbox = True
-    pdf_bytes = convert_pdf_bytes_to_bytes_by_pymupdf(
-        pdf_bytes, start_page_id, end_page_id
-    )
+    if isinstance(pdf_bytes_or_dataset, bytes):
+        pdf_bytes = convert_pdf_bytes_to_bytes_by_pymupdf(
+            pdf_bytes_or_dataset, start_page_id, end_page_id
+        )
+        ds = PymuDocDataset(pdf_bytes, lang=lang)
+    else:
+        ds = pdf_bytes_or_dataset
+    pdf_bytes = ds._raw_data
     local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
     image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(
@@ -109,8 +114,6 @@ def do_parse(
     )
     image_dir = str(os.path.basename(local_image_dir))
-    ds = PymuDocDataset(pdf_bytes, lang=lang)
     if len(model_list) == 0:
         if model_config.__use_inside_model__:
             if parse_method == 'auto':
@@ -241,5 +244,80 @@ def do_parse(
     logger.info(f'local output dir is {local_md_dir}')
+def do_parse(
+    output_dir,
+    pdf_file_name,
+    pdf_bytes_or_dataset,
+    model_list,
+    parse_method,
+    debug_able=False,
+    f_draw_span_bbox=True,
+    f_draw_layout_bbox=True,
+    f_dump_md=True,
+    f_dump_middle_json=True,
+    f_dump_model_json=True,
+    f_dump_orig_pdf=True,
+    f_dump_content_list=True,
+    f_make_md_mode=MakeMode.MM_MD,
+    f_draw_model_bbox=False,
+    f_draw_line_sort_bbox=False,
+    f_draw_char_bbox=False,
+    start_page_id=0,
+    end_page_id=None,
+    lang=None,
+    layout_model=None,
+    formula_enable=None,
+    table_enable=None,
+):
+    parallel_count = 1
+    if os.environ.get('MINERU_PARALLEL_INFERENCE_COUNT'):
+        parallel_count = int(os.environ['MINERU_PARALLEL_INFERENCE_COUNT'])
+    if parallel_count > 1:
+        if isinstance(pdf_bytes_or_dataset, bytes):
+            pdf_bytes = convert_pdf_bytes_to_bytes_by_pymupdf(
+                pdf_bytes_or_dataset, start_page_id, end_page_id
+            )
+            ds = PymuDocDataset(pdf_bytes, lang=lang)
+        else:
+            ds = pdf_bytes_or_dataset
+        batch_do_parse(output_dir, [pdf_file_name], [ds], parse_method, debug_able, f_draw_span_bbox=f_draw_span_bbox, f_draw_layout_bbox=f_draw_layout_bbox, f_dump_md=f_dump_md, f_dump_middle_json=f_dump_middle_json, f_dump_model_json=f_dump_model_json, f_dump_orig_pdf=f_dump_orig_pdf, f_dump_content_list=f_dump_content_list, f_make_md_mode=f_make_md_mode, f_draw_model_bbox=f_draw_model_bbox, f_draw_line_sort_bbox=f_draw_line_sort_bbox, f_draw_char_bbox=f_draw_char_bbox, lang=lang)
+    else:
+        _do_parse(output_dir, pdf_file_name, pdf_bytes_or_dataset, model_list, parse_method, debug_able, start_page_id=start_page_id, end_page_id=end_page_id, lang=lang, layout_model=layout_model, formula_enable=formula_enable, table_enable=table_enable,  f_draw_span_bbox=f_draw_span_bbox, f_draw_layout_bbox=f_draw_layout_bbox, f_dump_md=f_dump_md, f_dump_middle_json=f_dump_middle_json, f_dump_model_json=f_dump_model_json, f_dump_orig_pdf=f_dump_orig_pdf, f_dump_content_list=f_dump_content_list, f_make_md_mode=f_make_md_mode, f_draw_model_bbox=f_draw_model_bbox, f_draw_line_sort_bbox=f_draw_line_sort_bbox, f_draw_char_bbox=f_draw_char_bbox)
+def batch_do_parse(
+    output_dir,
+    pdf_file_names: list[str],
+    pdf_bytes_or_datasets: list[bytes | Dataset],
+    parse_method,
+    debug_able=False,
+    f_draw_span_bbox=True,
+    f_draw_layout_bbox=True,
+    f_dump_md=True,
+    f_dump_middle_json=True,
+    f_dump_model_json=True,
+    f_dump_orig_pdf=True,
+    f_dump_content_list=True,
+    f_make_md_mode=MakeMode.MM_MD,
+    f_draw_model_bbox=False,
+    f_draw_line_sort_bbox=False,
+    f_draw_char_bbox=False,
+    lang=None,
+    layout_model=None,
+    formula_enable=None,
+    table_enable=None,
+):
+    dss = []
+    for v in pdf_bytes_or_datasets:
+        if isinstance(v, bytes):
+            dss.append(PymuDocDataset(v, lang=lang))
+        else:
+            dss.append(v)
+    infer_results = batch_doc_analyze(dss, parse_method, lang=lang, layout_model=layout_model, formula_enable=formula_enable, table_enable=table_enable)
+    for idx, infer_result in enumerate(infer_results):
+        _do_parse(output_dir, pdf_file_names[idx], dss[idx], infer_result.get_infer_res(), parse_method, debug_able, f_draw_span_bbox=f_draw_span_bbox, f_draw_layout_bbox=f_draw_layout_bbox, f_dump_md=f_dump_md, f_dump_middle_json=f_dump_middle_json, f_dump_model_json=f_dump_model_json, f_dump_orig_pdf=f_dump_orig_pdf, f_dump_content_list=f_dump_content_list, f_make_md_mode=f_make_md_mode, f_draw_model_bbox=f_draw_model_bbox, f_draw_line_sort_bbox=f_draw_line_sort_bbox, f_draw_char_bbox=f_draw_char_bbox, lang=lang)
 parse_pdf_methods = click.Choice(['ocr', 'txt', 'auto'])

{magic_pdf-1.2.1.dist-info → magic_pdf-1.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: magic-pdf
-Version: 1.2.1
+Version: 1.3.0
 Summary: A practical tool for converting PDF to Markdown
 Home-page: https://github.com/opendatalab/MinerU
 Requires-Python: >=3.9
@@ -9,35 +9,30 @@ License-File: LICENSE.md
 Requires-Dist: boto3>=1.28.43
 Requires-Dist: Brotli>=1.1.0
 Requires-Dist: click>=8.1.7
-Requires-Dist: fast-langdetect>=0.2.3
+Requires-Dist: fast-langdetect<0.3.0,>=0.2.3
 Requires-Dist: loguru>=0.6.0
-Requires-Dist: numpy<2.0.0,>=1.21.6
-Requires-Dist: pydantic>=2.7.2
-Requires-Dist: PyMuPDF<=1.24.14,>=1.24.9
+Requires-Dist: numpy>=1.21.6
+Requires-Dist: pydantic<2.11,>=2.7.2
+Requires-Dist: PyMuPDF<1.25.0,>=1.24.9
 Requires-Dist: scikit-learn>=1.0.2
-Requires-Dist: torch>=2.2.2
-Requires-Dist: transformers
+Requires-Dist: torch!=2.5.0,!=2.5.1,<=2.6.0,>=2.2.2
+Requires-Dist: torchvision
+Requires-Dist: transformers<5.0.0,>=4.49.0
 Requires-Dist: pdfminer.six==20231228
+Requires-Dist: tqdm>=4.67.1
 Provides-Extra: full
-Requires-Dist: unimernet==0.2.3; extra == "full"
-Requires-Dist: torch<=2.3.1,>=2.2.2; extra == "full"
-Requires-Dist: torchvision<=0.18.1,>=0.17.2; extra == "full"
 Requires-Dist: ultralytics>=8.3.48; extra == "full"
-Requires-Dist: paddleocr==2.7.3; extra == "full"
-Requires-Dist: struct-eqtable==0.3.2; extra == "full"
-Requires-Dist: einops; extra == "full"
-Requires-Dist: accelerate; extra == "full"
 Requires-Dist: doclayout-yolo==0.0.2b1; extra == "full"
-Requires-Dist: rapidocr-paddle<2.0.0,>=1.4.5; extra == "full"
-Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.4; extra == "full"
+Requires-Dist: dill<1,>=0.3.9; extra == "full"
 Requires-Dist: rapid-table<2.0.0,>=1.0.3; extra == "full"
-Requires-Dist: PyYAML; extra == "full"
-Requires-Dist: openai; extra == "full"
-Requires-Dist: detectron2; extra == "full"
-Requires-Dist: matplotlib; (platform_system == "Linux" or platform_system == "Darwin") and extra == "full"
-Requires-Dist: paddlepaddle==3.0.0rc1; (platform_system == "Linux" or platform_system == "Darwin") and extra == "full"
+Requires-Dist: PyYAML<7,>=6.0.2; extra == "full"
+Requires-Dist: ftfy<7,>=6.3.1; extra == "full"
+Requires-Dist: openai<2,>=1.70.0; extra == "full"
+Requires-Dist: shapely<3,>=2.0.7; extra == "full"
+Requires-Dist: pyclipper<2,>=1.3.0; extra == "full"
+Requires-Dist: omegaconf<3,>=2.3.0; extra == "full"
+Requires-Dist: matplotlib>=3.10; (platform_system == "Linux" or platform_system == "Darwin") and extra == "full"
 Requires-Dist: matplotlib<=3.9.0; platform_system == "Windows" and extra == "full"
-Requires-Dist: paddlepaddle==2.6.1; platform_system == "Windows" and extra == "full"
 Provides-Extra: lite
 Requires-Dist: paddleocr==2.7.3; extra == "lite"
 Requires-Dist: paddlepaddle==3.0.0b1; platform_system == "Linux" and extra == "lite"
@@ -94,6 +89,23 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
 </div>
 # Changelog
+- 2025/04/03 Release of 1.3.0, in this version we made many optimizations and improvements:
+  - Installation and compatibility optimization
+    - By removing the use of `layoutlmv3` in layout, resolved compatibility issues caused by `detectron2`.
+    - Torch version compatibility extended to 2.2~2.6 (excluding 2.5).
+    - CUDA compatibility supports 11.8/12.4/12.6 (CUDA version determined by torch), resolving compatibility issues for some users with 50-series and H-series GPUs.
+    - Python compatible versions expanded to 3.10~3.12, solving the problem of automatic downgrade to 0.6.1 during installation in non-3.10 environments.
+    - Offline deployment process optimized; no internet connection required after successful deployment to download any model files.
+  - Performance optimization
+    - By supporting batch processing of multiple PDF files ([script example](demo/batch_demo.py)), improved parsing speed for small files in batches (compared to version 1.0.1, formula parsing speed increased by over 1400%, overall parsing speed increased by over 500%).
+    - Optimized loading and usage of the mfr model, reducing GPU memory usage and improving parsing speed (requires re-execution of the [model download process](docs/how_to_download_models_en.md) to obtain incremental updates of model files).
+    - Optimized GPU memory usage, requiring only a minimum of 6GB to run this project.
+    - Improved running speed on MPS devices.
+  - Parsing effect optimization
+    - Updated the mfr model to `unimernet(2503)`, solving the issue of lost line breaks in multi-line formulas.
+  - Usability Optimization
+    - By using `paddleocr2torch`, completely replaced the use of the `paddle` framework and `paddleocr` in the project, resolving conflicts between `paddle` and `torch`, as well as thread safety issues caused by the `paddle` framework.
+    - Added a real-time progress bar during the parsing process to accurately track progress, making the wait less painful.
 - 2025/03/03 1.2.1 released, fixed several bugs:
   - Fixed the impact on punctuation marks during full-width to half-width conversion of letters and numbers
   - Fixed caption matching inaccuracies in certain scenarios
@@ -262,7 +274,7 @@ There are three different ways to experience MinerU:
     </tr>
     <tr>
         <td colspan="3">Python Version</td>
-        <td colspan="3">3.10(Please make sure to create a Python 3.10 virtual environment using conda)</td>
+        <td colspan="3">3.10~3.12</td>
     </tr>
     <tr>
         <td colspan="3">Nvidia Driver Version</td>
@@ -272,8 +284,8 @@ There are three different ways to experience MinerU:
     </tr>
     <tr>
         <td colspan="3">CUDA Environment</td>
-        <td>Automatic installation [12.1 (pytorch) + 11.8 (paddle)]</td>
-        <td>11.8 (manual installation) + cuDNN v8.7.0 (manual installation)</td>
+        <td>11.8/12.4/12.6</td>
+        <td>11.8/12.4/12.6</td>
         <td>None</td>
     </tr>
     <tr>
@@ -283,11 +295,11 @@ There are three different ways to experience MinerU:
         <td>None</td>
     </tr>
     <tr>
-        <td rowspan="2">GPU Hardware Support List</td>
-        <td colspan="2">GPU VRAM 8GB or more</td>
-        <td colspan="2">2080~2080Ti / 3060Ti~3090Ti / 4060~4090<br>
-        8G VRAM can enable all acceleration features</td>
-        <td rowspan="2">None</td>
+        <td rowspan="2">GPU/MPS Hardware Support List</td>
+        <td colspan="2">GPU VRAM 6GB or more</td>
+        <td colspan="2">All GPUs with Tensor Cores produced from Volta(2017) onwards.<br>
+        More than 6GB VRAM </td>
+        <td rowspan="2">apple slicon</td>
     </tr>
 </table>
@@ -304,9 +316,9 @@ Synced with dev branch updates:
 #### 1. Install magic-pdf
 ```bash
-conda create -n mineru python=3.10
+conda create -n mineru 'python<3.13' -y
 conda activate mineru
-pip install -U "magic-pdf[full]" --extra-index-url https://wheels.myhloli.com
+pip install -U "magic-pdf[full]"
 ```
 #### 2. Download model weight files
@@ -331,7 +343,7 @@ You can modify certain configurations in this file to enable or disable features
 {
     // other config
     "layout-config": {
-        "model": "doclayout_yolo" // Please change to "layoutlmv3" when using layoutlmv3.
+        "model": "doclayout_yolo"
     },
     "formula-config": {
         "mfd_model": "yolo_v8_mfd",
@@ -339,8 +351,8 @@ You can modify certain configurations in this file to enable or disable features
         "enable": true  // The formula recognition feature is enabled by default. If you need to disable it, please change the value here to "false".
     },
     "table-config": {
-        "model": "rapid_table",  // Default to using "rapid_table", can be switched to "tablemaster" or "struct_eqtable".
-        "sub_model": "slanet_plus",  // When the model is "rapid_table", you can choose a sub_model. The options are "slanet_plus" and "unitable"
+        "model": "rapid_table",
+        "sub_model": "slanet_plus",
         "enable": true, // The table recognition feature is enabled by default. If you need to disable it, please change the value here to "false".
         "max_time": 400
     }
@@ -355,7 +367,7 @@ If your device supports CUDA and meets the GPU requirements of the mainline envi
 - [Windows 10/11 + GPU](docs/README_Windows_CUDA_Acceleration_en_US.md)
 - Quick Deployment with Docker
 > [!IMPORTANT]
-> Docker requires a GPU with at least 8GB of VRAM, and all acceleration features are enabled by default.
+> Docker requires a GPU with at least 6GB of VRAM, and all acceleration features are enabled by default.
 >
 > Before running this Docker, you can use the following command to check if your device supports CUDA acceleration on Docker.
 >
@@ -377,7 +389,7 @@ If your device has NPU acceleration hardware, you can follow the tutorial below
 ### Using MPS
-If your device uses Apple silicon chips, you can enable MPS acceleration for certain supported tasks (such as layout detection and formula detection).
+If your device uses Apple silicon chips, you can enable MPS acceleration for your tasks.
 You can enable MPS acceleration by setting the `device-mode` parameter to `mps` in the `magic-pdf.json` configuration file.
@@ -388,10 +400,6 @@ You can enable MPS acceleration by setting the `device-mode` parameter to `mps`
 }
 ```
-> [!TIP]
-> Since the formula recognition task cannot utilize MPS acceleration, you can disable the formula recognition feature in tasks where it is not needed to achieve optimal performance.
->
-> You can disable the formula recognition feature by setting the `enable` parameter in the `formula-config` section to `false`.
 ## Usage
@@ -465,6 +473,8 @@ This project currently uses PyMuPDF to achieve advanced functionality. However,
 - [StructEqTable](https://github.com/UniModal4Reasoning/StructEqTable-Deploy)
 - [RapidTable](https://github.com/RapidAI/RapidTable)
 - [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)
+- [RapidOCR](https://github.com/RapidAI/RapidOCR)
+- [PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch)
 - [PyMuPDF](https://github.com/pymupdf/PyMuPDF)
 - [layoutreader](https://github.com/ppaanngggg/layoutreader)
 - [fast-langdetect](https://github.com/LlmKira/fast-langdetect)

magic-pdf 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

magic-pdf 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl