PyMuPDF 1.24.4__tar.gz → 1.24.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/PKG-INFO +1 -1
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/changes.txt +17 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/pipcl.py +32 -29
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/scripts/test.py +6 -6
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/setup.py +12 -16
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src/__init__.py +214 -5
- PyMuPDF-1.24.5/src/_apply_pages.py +253 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src/extra.i +0 -39
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src/fitz___init__.py +3 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src/utils.py +13 -12
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/conftest.py +6 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_pagelabels.py +19 -1
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_pylint.py +4 -1
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_textextract.py +47 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/COPYING +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/README.md +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/READMErb.md +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/pyproject.toml +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/pytest.ini +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/scripts/gh_release.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/scripts/sysinstall.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src/__main__.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src/fitz_table.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src/fitz_utils.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src/pymupdf.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src/table.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/__init__.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/__main__.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/_config.h +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/fitz_old.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-annot.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-convert.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-defines.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-devices.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-fields.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-fileobj.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-geo-c.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-geo-py.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-globals.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-other.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-pdfinfo.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-pixmap.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-portfolio.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-python.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-select.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-stext.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/helper-xobject.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/utils.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/src_classic/version.i +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/README.md +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/gentle_compare.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/001003ED.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/1.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/2.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/2201.00069.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/3.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/4.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/Bezier.epub +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/PragmaticaC.otf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/battery-file-22.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/bug1945.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/bug1971.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/chinese-tables.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/chinese-tables.pickle +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/circular-toc.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/cython.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/cython.pickle +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/full_toc.txt +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/github_sample.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/has-bad-fonts.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/image-file1.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/img-transparent.png +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/joined.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/metadata.txt +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/mupdf_explored.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/nur-ruhig.jpg +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/quad-calc-0.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/simple_toc.txt +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/small-table.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/strict-yes-no.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/symbol-list.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/symbols.txt +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test-2333.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test-2462.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test-2812.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test-3143.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test-3150.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test-3207.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test-707448.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test-707673.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test2093.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test2182.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test2238.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_1645_expected_1.22.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_1645_expected_1.24.2.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_1645_expected_1.24.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_1645_expected_1.25.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_1824.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2108.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2270.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2533.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2548.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2553-2.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2553.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2596.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2608_expected +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2634.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2635.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2645_1.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2645_2.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2645_3.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2710.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2730.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2742.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2788.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2791_content.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2791_coverpage.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2861.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2871.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2885.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2904.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2907.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2954.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2957_1.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2957_2.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2969.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_2979.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_3058.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_3062.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_3070.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_3072.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_3087.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_3179.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_3186.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_3197.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_3357.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_3362.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_3376.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_annot_file_info.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/test_delete_image.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/type3font.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/v110-changes.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/resources/widgettest.pdf +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/run_compound.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_2548.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_2634.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_2791.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_2904.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_2907.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_annots.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_badfonts.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_balance_count.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_cluster_drawings.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_crypting.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_docs_samples.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_drawings.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_embeddedfiles.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_extractimage.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_flake8.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_font.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_general.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_geometry.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_imagebbox.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_import.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_insertimage.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_insertpdf.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_linequad.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_metadata.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_mupdf_regressions.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_named_links.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_nonpdf.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_object_manipulation.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_objectstreams.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_optional_content.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_page_links.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_pagedelete.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_pixmap.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_remove-rotation.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_showpdfpage.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_story.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_tables.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_tesseract.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_textbox.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_textsearch.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_toc.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_widgets.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/tests/test_word_delimiters.py +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/valgrind.supp +0 -0
- {PyMuPDF-1.24.4 → PyMuPDF-1.24.5}/wdev.py +0 -0
|
@@ -2,9 +2,26 @@ Change Log
|
|
|
2
2
|
==========
|
|
3
3
|
|
|
4
4
|
|
|
5
|
+
**Changes in version 1.24.5 (2024-05-30)**
|
|
6
|
+
|
|
7
|
+
* Fixed issues:
|
|
8
|
+
|
|
9
|
+
* **Fixed** `3479 <https://github.com/pymupdf/PyMuPDF/issues/3479>`_: regression: fill_textbox: IndexError: pop from empty list
|
|
10
|
+
* **Fixed** `3488 <https://github.com/pymupdf/PyMuPDF/issues/3488>`_: set_toc method error
|
|
11
|
+
|
|
12
|
+
* Other:
|
|
13
|
+
|
|
14
|
+
* Some more fixes to use MuPDF floating formatting.
|
|
15
|
+
* Removed/disabled some unnecessary diagnostics.
|
|
16
|
+
* Fixed utils.do_links() crash.
|
|
17
|
+
* Experimental new functions `pymupdf.apply_pages()` and `pymupdf.get_text()`.
|
|
18
|
+
* Addresses wrong label generation for label styles "a" and "A".
|
|
19
|
+
|
|
20
|
+
|
|
5
21
|
**Changes in version 1.24.4 (2024-05-16)**
|
|
6
22
|
|
|
7
23
|
* **Fixed** `3418 <https://github.com/pymupdf/PyMuPDF/issues/3418>`_: Re-introduced bug, text align add_redact_annot
|
|
24
|
+
* **Fixed** `3472 <https://github.com/pymupdf/PyMuPDF/issues/3472>`_: insert_pdf gives SystemError
|
|
8
25
|
|
|
9
26
|
* Other:
|
|
10
27
|
|
|
@@ -237,21 +237,21 @@ class Package:
|
|
|
237
237
|
>>> assert len(so) == 1
|
|
238
238
|
>>> so = so[0]
|
|
239
239
|
>>> assert os.path.getmtime(so) > t0
|
|
240
|
-
|
|
240
|
+
|
|
241
241
|
Check `entry_points` causes creation of command `foo_cli` when we install
|
|
242
242
|
from our wheel using pip. [As of 2024-02-24 using pipcl's CLI interface
|
|
243
243
|
directly with `setup.py install` does not support entry points.]
|
|
244
|
-
|
|
244
|
+
|
|
245
245
|
>>> print('Creating venv.', file=sys.stderr)
|
|
246
246
|
>>> _ = subprocess.run(
|
|
247
247
|
... f'cd pipcl_test && {sys.executable} -m venv pylocal',
|
|
248
248
|
... shell=1, check=1)
|
|
249
|
-
|
|
249
|
+
|
|
250
250
|
>>> print('Installing from wheel into venv using pip.', file=sys.stderr)
|
|
251
251
|
>>> _ = subprocess.run(
|
|
252
252
|
... f'. pipcl_test/pylocal/bin/activate && pip install pipcl_test/dist/*.whl',
|
|
253
253
|
... shell=1, check=1)
|
|
254
|
-
|
|
254
|
+
|
|
255
255
|
>>> print('Running foo_cli.', file=sys.stderr)
|
|
256
256
|
>>> _ = subprocess.run(
|
|
257
257
|
... f'. pipcl_test/pylocal/bin/activate && foo_cli',
|
|
@@ -299,7 +299,7 @@ class Package:
|
|
|
299
299
|
requires_external = None,
|
|
300
300
|
project_url = None,
|
|
301
301
|
provides_extra = None,
|
|
302
|
-
|
|
302
|
+
|
|
303
303
|
entry_points = None,
|
|
304
304
|
|
|
305
305
|
root = None,
|
|
@@ -374,21 +374,21 @@ class Package:
|
|
|
374
374
|
entry_points:
|
|
375
375
|
String or dict specifying *.dist-info/entry_points.txt, for
|
|
376
376
|
example:
|
|
377
|
-
|
|
377
|
+
|
|
378
378
|
```
|
|
379
379
|
[console_scripts]
|
|
380
380
|
foo_cli = foo.__main__:main
|
|
381
381
|
```
|
|
382
|
-
|
|
382
|
+
|
|
383
383
|
or:
|
|
384
|
-
|
|
384
|
+
|
|
385
385
|
{ 'console_scripts': [
|
|
386
386
|
'foo_cli = foo.__main__:main',
|
|
387
387
|
],
|
|
388
388
|
}
|
|
389
|
-
|
|
389
|
+
|
|
390
390
|
See: https://packaging.python.org/en/latest/specifications/entry-points/
|
|
391
|
-
|
|
391
|
+
|
|
392
392
|
root:
|
|
393
393
|
Root of package, defaults to current directory.
|
|
394
394
|
|
|
@@ -684,7 +684,7 @@ class Package:
|
|
|
684
684
|
# Add <name>-<version>.dist-info/COPYING.
|
|
685
685
|
if self.license:
|
|
686
686
|
add_str(self.license, f'{dist_info_dir}/COPYING')
|
|
687
|
-
|
|
687
|
+
|
|
688
688
|
# Add <name>-<version>.dist-info/entry_points.txt.
|
|
689
689
|
entry_points_text = self._entry_points_text()
|
|
690
690
|
if entry_points_text:
|
|
@@ -735,15 +735,15 @@ class Package:
|
|
|
735
735
|
os.makedirs(sdist_directory, exist_ok=True)
|
|
736
736
|
tarpath = f'{sdist_directory}/{prefix}.tar.gz'
|
|
737
737
|
log2(f'Creating sdist: {tarpath}')
|
|
738
|
-
|
|
738
|
+
|
|
739
739
|
with tarfile.open(tarpath, 'w:gz') as tar:
|
|
740
|
-
|
|
740
|
+
|
|
741
741
|
names_in_tar = list()
|
|
742
742
|
def check_name(name):
|
|
743
743
|
if name in names_in_tar:
|
|
744
744
|
raise Exception(f'Name specified twice: {name}')
|
|
745
745
|
names_in_tar.append(name)
|
|
746
|
-
|
|
746
|
+
|
|
747
747
|
def add(from_, name):
|
|
748
748
|
check_name(name)
|
|
749
749
|
if isinstance(from_, str):
|
|
@@ -757,7 +757,7 @@ class Package:
|
|
|
757
757
|
tar.addfile(ti, io.BytesIO(from_))
|
|
758
758
|
else:
|
|
759
759
|
assert 0
|
|
760
|
-
|
|
760
|
+
|
|
761
761
|
def add_string(text, name):
|
|
762
762
|
textb = text.encode('utf8')
|
|
763
763
|
return add(textb, name)
|
|
@@ -776,7 +776,7 @@ class Package:
|
|
|
776
776
|
if to_rel == 'pyproject.toml':
|
|
777
777
|
found_pyproject_toml = True
|
|
778
778
|
add(from_, to_rel)
|
|
779
|
-
|
|
779
|
+
|
|
780
780
|
if not found_pyproject_toml:
|
|
781
781
|
log0(f'Warning: no pyproject.toml specified.')
|
|
782
782
|
|
|
@@ -884,10 +884,10 @@ class Package:
|
|
|
884
884
|
add_file( from_, to_abs2, to_rel)
|
|
885
885
|
|
|
886
886
|
add_str( self._metainfo(), f'{root2}/{dist_info_dir}/METADATA', f'{dist_info_dir}/METADATA')
|
|
887
|
-
|
|
887
|
+
|
|
888
888
|
if self.license:
|
|
889
889
|
add_str( self.license, f'{root2}/{dist_info_dir}/COPYING', f'{dist_info_dir}/COPYING')
|
|
890
|
-
|
|
890
|
+
|
|
891
891
|
entry_points_text = self._entry_points_text()
|
|
892
892
|
if entry_points_text:
|
|
893
893
|
add_str(
|
|
@@ -1284,7 +1284,7 @@ class Package:
|
|
|
1284
1284
|
if isinstance(p, str):
|
|
1285
1285
|
p = p, p
|
|
1286
1286
|
assert isinstance(p, tuple) and len(p) == 2
|
|
1287
|
-
|
|
1287
|
+
|
|
1288
1288
|
from_, to_ = p
|
|
1289
1289
|
assert isinstance(from_, (str, bytes))
|
|
1290
1290
|
assert isinstance(to_, str)
|
|
@@ -1798,7 +1798,7 @@ def git_items( directory, submodules=False):
|
|
|
1798
1798
|
return ret
|
|
1799
1799
|
|
|
1800
1800
|
|
|
1801
|
-
def run( command, capture=False, check=1):
|
|
1801
|
+
def run( command, capture=False, check=1, verbose=1):
|
|
1802
1802
|
'''
|
|
1803
1803
|
Runs a command using `subprocess.run()`.
|
|
1804
1804
|
|
|
@@ -1818,6 +1818,8 @@ def run( command, capture=False, check=1):
|
|
|
1818
1818
|
check:
|
|
1819
1819
|
If true we raise an exception on error; otherwise we include the
|
|
1820
1820
|
command's returncode in our return value.
|
|
1821
|
+
verbose:
|
|
1822
|
+
If true we show the command.
|
|
1821
1823
|
Returns:
|
|
1822
1824
|
check capture Return
|
|
1823
1825
|
--------------------------
|
|
@@ -1828,7 +1830,8 @@ def run( command, capture=False, check=1):
|
|
|
1828
1830
|
'''
|
|
1829
1831
|
lines = _command_lines( command)
|
|
1830
1832
|
nl = '\n'
|
|
1831
|
-
|
|
1833
|
+
if verbose:
|
|
1834
|
+
log1( f'Running: {nl.join(lines)}')
|
|
1832
1835
|
sep = ' ' if windows() else '\\\n'
|
|
1833
1836
|
command2 = sep.join( lines)
|
|
1834
1837
|
cp = subprocess.run(
|
|
@@ -1924,18 +1927,18 @@ class PythonFlags:
|
|
|
1924
1927
|
stderr=subprocess.DEVNULL,
|
|
1925
1928
|
check=0,
|
|
1926
1929
|
).returncode
|
|
1927
|
-
|
|
1930
|
+
log2(f'{e=} from {pc!r}.')
|
|
1928
1931
|
if e == 0:
|
|
1929
1932
|
python_config = pc
|
|
1930
1933
|
assert python_config, f'Cannot find python-config'
|
|
1931
1934
|
else:
|
|
1932
1935
|
python_config = f'{python_exe}-config'
|
|
1933
|
-
|
|
1936
|
+
log2(f'Using {python_config=}.')
|
|
1934
1937
|
try:
|
|
1935
|
-
self.includes = run( f'{python_config} --includes', capture=1).strip()
|
|
1938
|
+
self.includes = run( f'{python_config} --includes', capture=1, verbose=0).strip()
|
|
1936
1939
|
except Exception as e:
|
|
1937
1940
|
raise Exception('We require python development tools to be installed.') from e
|
|
1938
|
-
self.ldflags = run( f'{python_config} --ldflags', capture=1).strip()
|
|
1941
|
+
self.ldflags = run( f'{python_config} --ldflags', capture=1, verbose=0).strip()
|
|
1939
1942
|
if linux():
|
|
1940
1943
|
# It seems that with python-3.10 on Linux, we can get an
|
|
1941
1944
|
# incorrect -lcrypt flag that on some systems (e.g. WSL)
|
|
@@ -2100,7 +2103,7 @@ def run_if( command, out, *prerequisites):
|
|
|
2100
2103
|
if not doit:
|
|
2101
2104
|
out_mtime = _fs_mtime( out)
|
|
2102
2105
|
if out_mtime == 0:
|
|
2103
|
-
doit = 'File does not exist: {out!
|
|
2106
|
+
doit = f'File does not exist: {out!r}'
|
|
2104
2107
|
|
|
2105
2108
|
cmd_path = f'{out}.cmd'
|
|
2106
2109
|
if os.path.isfile( cmd_path):
|
|
@@ -2155,7 +2158,7 @@ def run_if( command, out, *prerequisites):
|
|
|
2155
2158
|
os.remove( cmd_path)
|
|
2156
2159
|
except Exception:
|
|
2157
2160
|
pass
|
|
2158
|
-
|
|
2161
|
+
log1( f'Running command because: {doit}')
|
|
2159
2162
|
|
|
2160
2163
|
run( command)
|
|
2161
2164
|
|
|
@@ -2164,7 +2167,7 @@ def run_if( command, out, *prerequisites):
|
|
|
2164
2167
|
f.write( command)
|
|
2165
2168
|
return True
|
|
2166
2169
|
else:
|
|
2167
|
-
|
|
2170
|
+
log1( f'Not running command because up to date: {out!r}')
|
|
2168
2171
|
|
|
2169
2172
|
if 0:
|
|
2170
2173
|
log2( f'out_mtime={time.ctime(out_mtime)} pre_mtime={time.ctime(pre_mtime)}.'
|
|
@@ -2361,7 +2364,7 @@ class _Record:
|
|
|
2361
2364
|
log2(f'Adding {to_}')
|
|
2362
2365
|
|
|
2363
2366
|
def add_file(self, from_, to_):
|
|
2364
|
-
|
|
2367
|
+
log1(f'Adding file: {os.path.relpath(from_)} => {to_}')
|
|
2365
2368
|
with open(from_, 'rb') as f:
|
|
2366
2369
|
content = f.read()
|
|
2367
2370
|
self.add_content(content, to_, verbose=False)
|
|
@@ -130,7 +130,7 @@ def main(argv):
|
|
|
130
130
|
build_type = None
|
|
131
131
|
build_mupdf = True
|
|
132
132
|
gdb = False
|
|
133
|
-
|
|
133
|
+
test_fitz = True
|
|
134
134
|
implementations = None
|
|
135
135
|
test_names = list()
|
|
136
136
|
venv = 2
|
|
@@ -157,7 +157,7 @@ def main(argv):
|
|
|
157
157
|
elif arg == '-d':
|
|
158
158
|
build_type = 'debug'
|
|
159
159
|
elif arg == '-f':
|
|
160
|
-
|
|
160
|
+
test_fitz = int(next(args))
|
|
161
161
|
elif arg in ('-h', '--help'):
|
|
162
162
|
show_help()
|
|
163
163
|
return
|
|
@@ -226,7 +226,7 @@ def main(argv):
|
|
|
226
226
|
pytest_options=pytest_options,
|
|
227
227
|
timeout=timeout,
|
|
228
228
|
gdb=gdb,
|
|
229
|
-
|
|
229
|
+
test_fitz=test_fitz,
|
|
230
230
|
)
|
|
231
231
|
|
|
232
232
|
for command in commands:
|
|
@@ -359,7 +359,7 @@ def test(
|
|
|
359
359
|
pytest_options=None,
|
|
360
360
|
timeout=None,
|
|
361
361
|
gdb=False,
|
|
362
|
-
|
|
362
|
+
test_fitz=True,
|
|
363
363
|
):
|
|
364
364
|
'''
|
|
365
365
|
Args:
|
|
@@ -375,7 +375,7 @@ def test(
|
|
|
375
375
|
See top-level option `-p`.
|
|
376
376
|
gdb:
|
|
377
377
|
See top-level option `--gdb`.
|
|
378
|
-
|
|
378
|
+
test_fitz:
|
|
379
379
|
See top-level option `-f`.
|
|
380
380
|
'''
|
|
381
381
|
pymupdf_dir_rel = gh_release.relpath(pymupdf_dir)
|
|
@@ -433,7 +433,7 @@ def test(
|
|
|
433
433
|
for p in glob.glob(f'{pymupdf_dir_rel}/tests/test_*_fitz.py'):
|
|
434
434
|
print(f'Removing {p=}')
|
|
435
435
|
os.remove(p)
|
|
436
|
-
if
|
|
436
|
+
if test_fitz:
|
|
437
437
|
# Create copies of each test file, modified to use `pymupdf`
|
|
438
438
|
# instead of `fitz`.
|
|
439
439
|
for p in glob.glob(f'{pymupdf_dir_rel}/tests/test_*.py'):
|
|
@@ -571,13 +571,6 @@ def build():
|
|
|
571
571
|
build_type,
|
|
572
572
|
)
|
|
573
573
|
|
|
574
|
-
for d in (
|
|
575
|
-
mupdf_build_dir,
|
|
576
|
-
f'{g_root}/src',
|
|
577
|
-
):
|
|
578
|
-
if d:
|
|
579
|
-
run(f'ls -l {os.path.relpath(d)}', check=0)
|
|
580
|
-
|
|
581
574
|
# Generate lists of (from, to) items to return to pipcl. We put MuPDF
|
|
582
575
|
# shared libraries in a separate list so that we can build specific wheels
|
|
583
576
|
# as determined by g_flavour.
|
|
@@ -613,21 +606,26 @@ def build():
|
|
|
613
606
|
|
|
614
607
|
if path_so_leaf_b:
|
|
615
608
|
# Add rebased implementation files.
|
|
616
|
-
add( ret_p, f'{g_root}/src/fitz___init__.py', 'fitz/__init__.py') # For `fitz` module alias.
|
|
617
|
-
add( ret_p, f'{g_root}/src/fitz_table.py', 'fitz/table.py') # For `fitz` module alias.
|
|
618
|
-
add( ret_p, f'{g_root}/src/fitz_utils.py', 'fitz/utils.py') # For `fitz` module alias.
|
|
619
609
|
to_dir = 'pymupdf/'
|
|
620
610
|
add( ret_p, f'{g_root}/src/__init__.py', to_dir)
|
|
621
611
|
add( ret_p, f'{g_root}/src/__main__.py', to_dir)
|
|
622
612
|
add( ret_p, f'{g_root}/src/pymupdf.py', to_dir)
|
|
623
613
|
add( ret_p, f'{g_root}/src/table.py', to_dir)
|
|
624
614
|
add( ret_p, f'{g_root}/src/utils.py', to_dir)
|
|
625
|
-
add( ret_p, f'{g_root}/src/
|
|
626
|
-
add( ret_p, f'{g_root}/src/
|
|
615
|
+
add( ret_p, f'{g_root}/src/_apply_pages.py', to_dir)
|
|
616
|
+
add( ret_p, f'{g_root}/src/build/extra.py', to_dir)
|
|
617
|
+
add( ret_p, f'{g_root}/src/build/{path_so_leaf_b}', to_dir)
|
|
618
|
+
|
|
619
|
+
# Add support for `fitz` backwards compatibility.
|
|
620
|
+
add( ret_p, f'{g_root}/src/fitz___init__.py', 'fitz/__init__.py')
|
|
621
|
+
add( ret_p, f'{g_root}/src/fitz_table.py', 'fitz/table.py')
|
|
622
|
+
add( ret_p, f'{g_root}/src/fitz_utils.py', 'fitz/utils.py')
|
|
627
623
|
|
|
628
624
|
if mupdf_local:
|
|
625
|
+
# Add MuPDF Python API.
|
|
629
626
|
add( ret_p, f'{mupdf_build_dir}/mupdf.py', to_dir)
|
|
630
627
|
|
|
628
|
+
# Add MuPDF shared libraries.
|
|
631
629
|
if windows:
|
|
632
630
|
wp = pipcl.wdev.WindowsPython()
|
|
633
631
|
add( ret_p, f'{mupdf_build_dir}/_mupdf.pyd', to_dir)
|
|
@@ -659,8 +657,6 @@ def build():
|
|
|
659
657
|
else:
|
|
660
658
|
add( ret, f'{g_root}/README.md', '$dist-info/README.md')
|
|
661
659
|
|
|
662
|
-
for f, t in ret:
|
|
663
|
-
log( f'build(): {f} => {t}')
|
|
664
660
|
return ret
|
|
665
661
|
|
|
666
662
|
|
|
@@ -963,7 +959,7 @@ def _build_extension_rebased( mupdf_local, mupdf_build_dir, build_type):
|
|
|
963
959
|
path_so_leaf_b = pipcl.build_extension(
|
|
964
960
|
name = 'extra',
|
|
965
961
|
path_i = f'{g_root}/src/extra.i',
|
|
966
|
-
outdir = f'{g_root}/src',
|
|
962
|
+
outdir = f'{g_root}/src/build',
|
|
967
963
|
includes = includes,
|
|
968
964
|
defines = defines,
|
|
969
965
|
libpaths = libpaths,
|
|
@@ -1117,7 +1113,7 @@ classifier = [
|
|
|
1117
1113
|
# We generate different wheels depending on g_flavour.
|
|
1118
1114
|
#
|
|
1119
1115
|
|
|
1120
|
-
version = '1.24.
|
|
1116
|
+
version = '1.24.5'
|
|
1121
1117
|
version_b = '1.24.3'
|
|
1122
1118
|
|
|
1123
1119
|
if os.path.exists(f'{g_root}/{g_pymupdfb_sdist_marker}'):
|
|
@@ -20,6 +20,7 @@ import re
|
|
|
20
20
|
import string
|
|
21
21
|
import sys
|
|
22
22
|
import tarfile
|
|
23
|
+
import time
|
|
23
24
|
import typing
|
|
24
25
|
import warnings
|
|
25
26
|
import weakref
|
|
@@ -61,6 +62,19 @@ def _set_stream(name, default):
|
|
|
61
62
|
_g_out_log = _set_stream('PYMUPDF_LOG', sys.stdout)
|
|
62
63
|
_g_out_message = _set_stream('PYMUPDF_MESSAGE', sys.stdout)
|
|
63
64
|
|
|
65
|
+
_g_log_items = list()
|
|
66
|
+
_g_log_items_active = False
|
|
67
|
+
|
|
68
|
+
def _log_items():
|
|
69
|
+
return _g_log_items
|
|
70
|
+
|
|
71
|
+
def _log_items_active(active):
|
|
72
|
+
global _g_log_items_active
|
|
73
|
+
_g_log_items_active = active
|
|
74
|
+
|
|
75
|
+
def _log_items_clear():
|
|
76
|
+
del _g_log_items[:]
|
|
77
|
+
|
|
64
78
|
|
|
65
79
|
def log( text='', caller=1):
|
|
66
80
|
'''
|
|
@@ -70,7 +84,10 @@ def log( text='', caller=1):
|
|
|
70
84
|
filename = os.path.relpath(frame_record.filename)
|
|
71
85
|
line = frame_record.lineno
|
|
72
86
|
function = frame_record.function
|
|
73
|
-
|
|
87
|
+
text = f'{filename}:{line}:{function}: {text}'
|
|
88
|
+
if _g_log_items_active:
|
|
89
|
+
_g_log_items.append(text)
|
|
90
|
+
print(text, file=_g_out_log)
|
|
74
91
|
_g_out_log.flush()
|
|
75
92
|
|
|
76
93
|
|
|
@@ -85,7 +102,7 @@ def message(text=''):
|
|
|
85
102
|
def exception_info():
|
|
86
103
|
import traceback
|
|
87
104
|
log(f'exception_info:')
|
|
88
|
-
traceback.
|
|
105
|
+
log(traceback.format_exc())
|
|
89
106
|
|
|
90
107
|
|
|
91
108
|
# PDF names must not contain these characters:
|
|
@@ -183,8 +200,8 @@ def _int_rc(text):
|
|
|
183
200
|
return int(text)
|
|
184
201
|
|
|
185
202
|
VersionFitz = mupdf.FZ_VERSION # MuPDF version.
|
|
186
|
-
VersionBind = "1.24.
|
|
187
|
-
VersionDate = "2024-05-
|
|
203
|
+
VersionBind = "1.24.5" # PyMuPDF version.
|
|
204
|
+
VersionDate = "2024-05-30 00:00:01"
|
|
188
205
|
VersionDate2 = VersionDate.replace('-', '').replace(' ', '').replace(':', '')
|
|
189
206
|
version = (VersionBind, VersionFitz, VersionDate2)
|
|
190
207
|
pymupdf_version_tuple = tuple( [_int_rc(i) for i in VersionBind.split('.')])
|
|
@@ -8994,7 +9011,8 @@ class Page:
|
|
|
8994
9011
|
|
|
8995
9012
|
# prefix with derotation matrix
|
|
8996
9013
|
mat = mat0 * self.derotation_matrix
|
|
8997
|
-
cmd =
|
|
9014
|
+
cmd = _format_g(tuple(mat)) + ' cm '
|
|
9015
|
+
cmd = cmd.encode('utf8')
|
|
8998
9016
|
_ = TOOLS._insert_contents(self, cmd, False) # prepend to page contents
|
|
8999
9017
|
|
|
9000
9018
|
# swap x- and y-coordinates
|
|
@@ -20936,6 +20954,197 @@ def vdist(dir, a, b):
|
|
|
20936
20954
|
return mupdf.fz_abs(dx * dir.y + dy * dir.x)
|
|
20937
20955
|
|
|
20938
20956
|
|
|
20957
|
+
def apply_pages(
|
|
20958
|
+
path,
|
|
20959
|
+
pagefn,
|
|
20960
|
+
*,
|
|
20961
|
+
pagefn_args=(),
|
|
20962
|
+
pagefn_kwargs=dict(),
|
|
20963
|
+
initfn=None,
|
|
20964
|
+
initfn_args=(),
|
|
20965
|
+
initfn_kwargs=dict(),
|
|
20966
|
+
pages=None,
|
|
20967
|
+
method='single',
|
|
20968
|
+
concurrency=None,
|
|
20969
|
+
_stats=False,
|
|
20970
|
+
):
|
|
20971
|
+
'''
|
|
20972
|
+
Returns list of results from `pagefn()`, optionally using concurrency for
|
|
20973
|
+
speed.
|
|
20974
|
+
|
|
20975
|
+
Args:
|
|
20976
|
+
path:
|
|
20977
|
+
Path of document.
|
|
20978
|
+
pagefn:
|
|
20979
|
+
Function to call for each page; is passed (page, *pagefn_args,
|
|
20980
|
+
**pagefn_kwargs). Return value is added to list that we return. If
|
|
20981
|
+
`method` is not 'single', must be a top-level function - nested
|
|
20982
|
+
functions don't work with concurrency.
|
|
20983
|
+
pagefn_args
|
|
20984
|
+
pagefn_kwargs:
|
|
20985
|
+
Additional args to pass to `pagefn`. Must be picklable.
|
|
20986
|
+
initfn:
|
|
20987
|
+
If true, called once in each worker process; is passed
|
|
20988
|
+
(*initfn_args, **initfn_kwargs).
|
|
20989
|
+
initfn_args
|
|
20990
|
+
initfn_kwargs:
|
|
20991
|
+
Args to pass to initfn. Must be picklable.
|
|
20992
|
+
pages:
|
|
20993
|
+
List of page numbers to process, or None to include all pages.
|
|
20994
|
+
method:
|
|
20995
|
+
'single'
|
|
20996
|
+
Do not use concurrency.
|
|
20997
|
+
'mp'
|
|
20998
|
+
Operate concurrently using Python's `multiprocessing` module.
|
|
20999
|
+
'fork'
|
|
21000
|
+
Operate concurrently using custom implementation with
|
|
21001
|
+
`os.fork()`. Does not work on Windows.
|
|
21002
|
+
concurrency:
|
|
21003
|
+
Number of worker processes to use when operating concurrently. If
|
|
21004
|
+
None, we use the number of available CPUs.
|
|
21005
|
+
_stats:
|
|
21006
|
+
Internal, may change or be removed. If true, we output simple
|
|
21007
|
+
timing diagnostics.
|
|
21008
|
+
|
|
21009
|
+
Note: We require a file path rather than a Document, because Document
|
|
21010
|
+
instances do not work properly after a fork - internal file descriptor
|
|
21011
|
+
offsets are shared between the parent and child processes.
|
|
21012
|
+
'''
|
|
21013
|
+
if _stats:
|
|
21014
|
+
t0 = time.time()
|
|
21015
|
+
|
|
21016
|
+
if method == 'single':
|
|
21017
|
+
if initfn:
|
|
21018
|
+
initfn(*initfn_args, **initfn_kwargs)
|
|
21019
|
+
ret = list()
|
|
21020
|
+
document = Document(path)
|
|
21021
|
+
for page in document:
|
|
21022
|
+
r = pagefn(page, *pagefn_args, **initfn_kwargs)
|
|
21023
|
+
ret.append(r)
|
|
21024
|
+
|
|
21025
|
+
else:
|
|
21026
|
+
# Use concurrency.
|
|
21027
|
+
#
|
|
21028
|
+
from . import _apply_pages
|
|
21029
|
+
|
|
21030
|
+
if pages is None:
|
|
21031
|
+
if _stats:
|
|
21032
|
+
t = time.time()
|
|
21033
|
+
with Document(path) as document:
|
|
21034
|
+
num_pages = len(document)
|
|
21035
|
+
pages = list(range(num_pages))
|
|
21036
|
+
if _stats:
|
|
21037
|
+
t = time.time() - t
|
|
21038
|
+
log(f'{t:.2f}s: count pages.')
|
|
21039
|
+
|
|
21040
|
+
if _stats:
|
|
21041
|
+
t = time.time()
|
|
21042
|
+
|
|
21043
|
+
if method == 'mp':
|
|
21044
|
+
ret = _apply_pages._multiprocessing(
|
|
21045
|
+
path,
|
|
21046
|
+
pages,
|
|
21047
|
+
pagefn,
|
|
21048
|
+
pagefn_args,
|
|
21049
|
+
pagefn_kwargs,
|
|
21050
|
+
initfn,
|
|
21051
|
+
initfn_args,
|
|
21052
|
+
initfn_kwargs,
|
|
21053
|
+
concurrency,
|
|
21054
|
+
_stats,
|
|
21055
|
+
)
|
|
21056
|
+
|
|
21057
|
+
elif method == 'fork':
|
|
21058
|
+
ret = _apply_pages._fork(
|
|
21059
|
+
path,
|
|
21060
|
+
pages,
|
|
21061
|
+
pagefn,
|
|
21062
|
+
pagefn_args,
|
|
21063
|
+
pagefn_kwargs,
|
|
21064
|
+
initfn,
|
|
21065
|
+
initfn_args,
|
|
21066
|
+
initfn_kwargs,
|
|
21067
|
+
concurrency,
|
|
21068
|
+
_stats,
|
|
21069
|
+
)
|
|
21070
|
+
|
|
21071
|
+
else:
|
|
21072
|
+
assert 0, f'Unrecognised {method=}.'
|
|
21073
|
+
|
|
21074
|
+
if _stats:
|
|
21075
|
+
t = time.time() - t
|
|
21076
|
+
log(f'{t:.2f}s: work.')
|
|
21077
|
+
|
|
21078
|
+
if _stats:
|
|
21079
|
+
t = time.time() - t0
|
|
21080
|
+
log(f'{t:.2f}s: total.')
|
|
21081
|
+
return ret
|
|
21082
|
+
|
|
21083
|
+
|
|
21084
|
+
def get_text(
|
|
21085
|
+
path,
|
|
21086
|
+
*,
|
|
21087
|
+
pages=None,
|
|
21088
|
+
method='single',
|
|
21089
|
+
concurrency=None,
|
|
21090
|
+
|
|
21091
|
+
option='text',
|
|
21092
|
+
clip=None,
|
|
21093
|
+
flags=None,
|
|
21094
|
+
textpage=None,
|
|
21095
|
+
sort=False,
|
|
21096
|
+
delimiters=None,
|
|
21097
|
+
|
|
21098
|
+
_stats=False,
|
|
21099
|
+
):
|
|
21100
|
+
'''
|
|
21101
|
+
Returns list of results from `Page.get_text()`, optionally using
|
|
21102
|
+
concurrency for speed.
|
|
21103
|
+
|
|
21104
|
+
Args:
|
|
21105
|
+
path:
|
|
21106
|
+
Path of document.
|
|
21107
|
+
pages:
|
|
21108
|
+
List of page numbers to process, or None to include all pages.
|
|
21109
|
+
method:
|
|
21110
|
+
'single'
|
|
21111
|
+
Do not use concurrency.
|
|
21112
|
+
'mp'
|
|
21113
|
+
Operate concurrently using Python's `multiprocessing` module.
|
|
21114
|
+
'fork'
|
|
21115
|
+
Operate concurrently using custom implementation with
|
|
21116
|
+
`os.fork`. Does not work on Windows.
|
|
21117
|
+
concurrency:
|
|
21118
|
+
Number of worker processes to use when operating concurrently. If
|
|
21119
|
+
None, we use the number of available CPUs.
|
|
21120
|
+
option
|
|
21121
|
+
clip
|
|
21122
|
+
flags
|
|
21123
|
+
textpage
|
|
21124
|
+
sort
|
|
21125
|
+
delimiters:
|
|
21126
|
+
Passed to internal calls to `Page.get_text()`.
|
|
21127
|
+
'''
|
|
21128
|
+
args_dict = dict(
|
|
21129
|
+
option=option,
|
|
21130
|
+
clip=clip,
|
|
21131
|
+
flags=flags,
|
|
21132
|
+
textpage=textpage,
|
|
21133
|
+
sort=sort,
|
|
21134
|
+
delimiters=delimiters,
|
|
21135
|
+
)
|
|
21136
|
+
|
|
21137
|
+
return apply_pages(
|
|
21138
|
+
path,
|
|
21139
|
+
Page.get_text,
|
|
21140
|
+
pagefn_kwargs=args_dict,
|
|
21141
|
+
pages=pages,
|
|
21142
|
+
method=method,
|
|
21143
|
+
concurrency=concurrency,
|
|
21144
|
+
_stats=_stats,
|
|
21145
|
+
)
|
|
21146
|
+
|
|
21147
|
+
|
|
20939
21148
|
class TOOLS:
|
|
20940
21149
|
'''
|
|
20941
21150
|
We use @staticmethod to avoid the need to create an instance of this class.
|