PyMuPDF 1.23.23__tar.gz → 1.23.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/PKG-INFO +1 -1
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/changes.txt +13 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/scripts/sysinstall.py +98 -49
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/scripts/test.py +3 -2
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/setup.py +1 -1
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src/__init__.py +3 -3
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src/table.py +87 -46
- PyMuPDF-1.23.24/src_classic/version.i +7 -0
- PyMuPDF-1.23.24/tests/resources/test_3179.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_optional_content.py +65 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_tables.py +37 -0
- PyMuPDF-1.23.23/src_classic/version.i +0 -7
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/COPYING +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/MANIFEST.in +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/README.md +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/READMErb.md +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/mupdf.tgz +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/pipcl.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/pyproject.toml +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/pytest.ini +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/scripts/gh_release.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src/__main__.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src/extra.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src/fitz.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src/utils.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/__init__.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/__main__.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/_config.h +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/fitz_old.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-annot.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-convert.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-defines.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-devices.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-fields.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-fileobj.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-geo-c.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-geo-py.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-globals.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-other.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-pdfinfo.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-pixmap.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-portfolio.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-python.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-select.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-stext.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/helper-xobject.i +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/src_classic/utils.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/README.md +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/001003ED.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/1.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/2.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/2201.00069.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/3.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/4.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/Bezier.epub +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/PragmaticaC.otf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/bug1945.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/bug1971.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/chinese-tables.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/chinese-tables.pickle +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/circular-toc.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/cython.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/cython.pickle +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/full_toc.txt +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/full_toc2.txt +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/github_sample.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/has-bad-fonts.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/image-file1.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/img-transparent.png +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/joined.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/metadata.txt +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/mupdf_explored.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/nur-ruhig.jpg +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/quad-calc-0.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/simple_toc.txt +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/small-table.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/strict-yes-no.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/symbol-list.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/symbols.txt +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test-2333.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test-2462.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test-3143.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test-3150.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test2093.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test2182.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test2238.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_1645_expected.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_1645_expected_1.22.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_1645_expected_1.24.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_1824.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2108.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2270.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2533.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2548.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2553-2.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2553.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2596.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2608_expected +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2634.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2635.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2645_1.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2645_2.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2645_3.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2710.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2730.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2742.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2788.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2791_content.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2791_coverpage.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2861.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2871.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2885.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2904.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2907.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2954.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2957_1.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2957_2.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2969.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_2979.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_3058.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_3062.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_3070.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_3072.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_3087.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_annot_file_info.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/test_delete_image.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/type3font.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/v110-changes.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/resources/widgettest.pdf +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/run_compound.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_2548.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_2634.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_2791.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_2904.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_2907.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_annots.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_badfonts.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_crypting.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_docs_samples.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_drawings.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_embeddedfiles.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_extractimage.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_flake8.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_font.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_general.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_geometry.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_imagebbox.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_insertimage.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_insertpdf.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_linequad.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_metadata.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_named_links.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_nonpdf.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_object_manipulation.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_page_links.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_pagedelete.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_pagelabels.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_pixmap.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_showpdfpage.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_story.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_tesseract.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_textbox.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_textextract.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_textsearch.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_toc.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_widgets.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/tests/test_word_delimiters.py +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/valgrind.supp +0 -0
- {PyMuPDF-1.23.23 → PyMuPDF-1.23.24}/wdev.py +0 -0
|
@@ -2,6 +2,19 @@ Change Log
|
|
|
2
2
|
==========
|
|
3
3
|
|
|
4
4
|
|
|
5
|
+
**Changes in version 1.23.24 (2024-02-19)**
|
|
6
|
+
|
|
7
|
+
* Fixed issues:
|
|
8
|
+
|
|
9
|
+
* **Fixed** `3148 <https://github.com/pymupdf/PyMuPDF/issues/3148>`_: Table extraction - vertical text not handled correctly
|
|
10
|
+
* **Fixed** `3179 <https://github.com/pymupdf/PyMuPDF/issues/3179>`_: Table Detection: Incorrect Separation of Vector Graphics Clusters
|
|
11
|
+
* **Fixed** `3180 <https://github.com/pymupdf/PyMuPDF/issues/3180>`_: Cannot show optional content group: AttributeError: module 'fitz.mupdf' has no attribute 'pdf_array_push_drop'
|
|
12
|
+
|
|
13
|
+
* Other:
|
|
14
|
+
|
|
15
|
+
* Be able to test system install using `sudo pip install` instead of a venv.
|
|
16
|
+
|
|
17
|
+
|
|
5
18
|
**Changes in version 1.23.23 (2024-02-18)**
|
|
6
19
|
|
|
7
20
|
* Fixed issues:
|
|
@@ -24,28 +24,39 @@ Args:
|
|
|
24
24
|
--mupdf-git <git_args>
|
|
25
25
|
Get or update `mupdf_dir` using git. If `mupdf_dir` already
|
|
26
26
|
exists we run `git pull` in it; otherwise we run `git
|
|
27
|
-
clone` with
|
|
27
|
+
clone` with `<git_args> <mupdf_dir>`. For example:
|
|
28
28
|
--mupdf-git "--branch master https://github.com/ArtifexSoftware/mupdf.git"
|
|
29
|
-
--
|
|
30
|
-
|
|
29
|
+
--mupdf-so-mode <mode>
|
|
30
|
+
Used with `install -m <mode> ...` when installing MuPDF. For example
|
|
31
|
+
`--mupdf-so-mode 744`.
|
|
32
|
+
--packages 0|1
|
|
33
|
+
If 1 (the default) we install required system packages such as
|
|
34
|
+
`libfreetype-dev`.
|
|
35
|
+
--pip 0|venv|sudo
|
|
36
|
+
Whether/how to install Python packages.
|
|
37
|
+
If '0' we assume required packages are already available.
|
|
38
|
+
If 'sudo' we install required Python packages using `sudo pip install
|
|
39
|
+
...`.
|
|
40
|
+
If 'venv' (the default) we install Python packages and run installer
|
|
41
|
+
and test commands inside venv's.
|
|
31
42
|
--prefix:
|
|
32
43
|
Directory within `root`; default is `/usr/local`. Must start with `/`.
|
|
44
|
+
--pymupdf-dir <pymupdf_dir>
|
|
45
|
+
Path of PyMuPDF checkout; default is 'PyMuPDF'.
|
|
33
46
|
--root <root>
|
|
34
47
|
Root of install directory; default is `/`.
|
|
35
48
|
--tesseract5 0|1
|
|
36
49
|
If 1 (the default), we force installation of libtesseract-dev version
|
|
37
50
|
5 (which is not available as a default package in Ubuntu-22.04) from
|
|
38
51
|
package repository ppa:alex-p/tesseract-ocr-devel.
|
|
39
|
-
--test-venv <
|
|
40
|
-
|
|
41
|
-
venv
|
|
42
|
-
|
|
43
|
-
instead attempt to install required packages with `pip` in the current
|
|
44
|
-
Python environment.
|
|
52
|
+
--test-venv <test_venv>
|
|
53
|
+
Set the name of the venv in which we run tests (only with `--pip
|
|
54
|
+
venv`); the default is a hard-coded venv name. The venv will be
|
|
55
|
+
created, and required packages installed using `pip`.
|
|
45
56
|
--use-installer 0|1
|
|
46
57
|
If 1 (the default), we use `python -m installer` to install PyMuPDF
|
|
47
|
-
from a generated wheel. Otherwise we use `pip install`, which refuses
|
|
48
|
-
to do a system install with `--root /`, referencing PEP-668.
|
|
58
|
+
from a generated wheel. [Otherwise we use `pip install`, which refuses
|
|
59
|
+
to do a system install with `--root /`, referencing PEP-668.]
|
|
49
60
|
-m 0|1
|
|
50
61
|
If 1 (the default) we build and install MuPDF, otherwise we just show
|
|
51
62
|
what command we would have run.
|
|
@@ -61,12 +72,14 @@ To only show what commands would be run, but not actually run them, specify `-m
|
|
|
61
72
|
'''
|
|
62
73
|
|
|
63
74
|
import glob
|
|
75
|
+
import multiprocessing
|
|
64
76
|
import os
|
|
65
77
|
import platform
|
|
66
78
|
import subprocess
|
|
67
79
|
import sys
|
|
68
80
|
import sysconfig
|
|
69
81
|
|
|
82
|
+
import test as test_py
|
|
70
83
|
|
|
71
84
|
# Requirements for a system build and install:
|
|
72
85
|
#
|
|
@@ -107,6 +120,8 @@ def main():
|
|
|
107
120
|
mupdf = True
|
|
108
121
|
mupdf_dir = 'mupdf'
|
|
109
122
|
mupdf_git = None
|
|
123
|
+
mupdf_so_mode = None
|
|
124
|
+
packages = True
|
|
110
125
|
prefix = '/usr/local'
|
|
111
126
|
pymupdf = True
|
|
112
127
|
pymupdf_dir = os.path.abspath( f'{__file__}/../..')
|
|
@@ -114,6 +129,7 @@ def main():
|
|
|
114
129
|
tesseract5 = True
|
|
115
130
|
test = True
|
|
116
131
|
test_venv = 'venv-pymupdf-sysinstall-test'
|
|
132
|
+
pip = 'venv'
|
|
117
133
|
|
|
118
134
|
# Parse command-line.
|
|
119
135
|
#
|
|
@@ -128,12 +144,15 @@ def main():
|
|
|
128
144
|
return
|
|
129
145
|
elif arg == '--mupdf-dir': mupdf_dir = next(args)
|
|
130
146
|
elif arg == '--mupdf-git': mupdf_git = next(args)
|
|
147
|
+
elif arg == '--mupdf-so-mode': mupdf_so_mode = next(args)
|
|
148
|
+
elif arg == '--packages': packages = int(next(args))
|
|
131
149
|
elif arg == '--prefix': prefix = next(args)
|
|
132
150
|
elif arg == '--pymupdf-dir': pymupdf_dir = next(args)
|
|
133
151
|
elif arg == '--root': root = next(args)
|
|
134
152
|
elif arg == '--tesseract5': tesseract5 = int(next(args))
|
|
135
153
|
elif arg == '--test-venv': test_venv = next(args)
|
|
136
154
|
elif arg == '--use-installer': use_installer = int(next(args))
|
|
155
|
+
elif arg == '--pip': pip = next(args)
|
|
137
156
|
elif arg == '-m': mupdf = int(next(args))
|
|
138
157
|
elif arg == '-p': pymupdf = int(next(args))
|
|
139
158
|
elif arg == '-t': test = int(next(args))
|
|
@@ -141,6 +160,8 @@ def main():
|
|
|
141
160
|
assert 0, f'Unrecognised arg: {arg!r}'
|
|
142
161
|
|
|
143
162
|
assert prefix.startswith('/')
|
|
163
|
+
pip_values = ('0', 'sudo', 'venv')
|
|
164
|
+
assert pip in pip_values, f'Unrecognised --pip value {pip!r} should be one of: {pip_values!r}'
|
|
144
165
|
root = os.path.abspath(root)
|
|
145
166
|
root_prefix = f'{root}{prefix}'.replace('//', '/')
|
|
146
167
|
|
|
@@ -161,44 +182,54 @@ def main():
|
|
|
161
182
|
print(f'## Clone MuPDF into {mupdf_dir}.')
|
|
162
183
|
run(f'git clone --recursive --depth 1 --shallow-submodules {mupdf_git} {mupdf_dir}')
|
|
163
184
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
print('## Install system packages required by MuPDF.')
|
|
167
|
-
run(f'sudo apt update')
|
|
168
|
-
run(f'sudo apt install {" ".join(g_sys_packages)}')
|
|
169
|
-
# Ubuntu-22.04 has freeglut3-dev, not libglut-dev.
|
|
170
|
-
run(f'sudo apt install libglut-dev | sudo apt install freeglut3-dev')
|
|
171
|
-
if tesseract5:
|
|
172
|
-
print(f'## Force installation of libtesseract-dev version 5.')
|
|
173
|
-
# https://stackoverflow.com/questions/76834972/how-can-i-run-pytesseract-python-library-in-ubuntu-22-04
|
|
185
|
+
if packages:
|
|
186
|
+
# Install required system packages. We assume a Debian package system.
|
|
174
187
|
#
|
|
175
|
-
|
|
176
|
-
run('sudo
|
|
177
|
-
run('sudo apt
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
188
|
+
print('## Install system packages required by MuPDF.')
|
|
189
|
+
run(f'sudo apt update')
|
|
190
|
+
run(f'sudo apt install {" ".join(g_sys_packages)}')
|
|
191
|
+
# Ubuntu-22.04 has freeglut3-dev, not libglut-dev.
|
|
192
|
+
run(f'sudo apt install libglut-dev | sudo apt install freeglut3-dev')
|
|
193
|
+
if tesseract5:
|
|
194
|
+
print(f'## Force installation of libtesseract-dev version 5.')
|
|
195
|
+
# https://stackoverflow.com/questions/76834972/how-can-i-run-pytesseract-python-library-in-ubuntu-22-04
|
|
196
|
+
#
|
|
197
|
+
run('sudo apt install -y software-properties-common')
|
|
198
|
+
run('sudo add-apt-repository ppa:alex-p/tesseract-ocr-devel')
|
|
199
|
+
run('sudo apt update')
|
|
200
|
+
run('sudo apt install -y libtesseract-dev')
|
|
201
|
+
else:
|
|
202
|
+
run('sudo apt install libtesseract-dev')
|
|
181
203
|
|
|
182
204
|
# Build+install MuPDF. We use mupd:Makefile's install-shared-python target.
|
|
183
205
|
#
|
|
206
|
+
if pip == 'sudo':
|
|
207
|
+
print('## Installing Python packages required for building MuPDF and PyMuPDF.')
|
|
208
|
+
run(f'sudo pip install --upgrade pip')
|
|
209
|
+
names = (''
|
|
210
|
+
+ test_py.get_pyproject_required(os.path.abspath(f'{__file__}/../../pyproject.toml'))
|
|
211
|
+
+ ' '
|
|
212
|
+
+ test_py.get_pyproject_required(os.path.abspath(f'{mupdf_dir}/pyproject.toml'))
|
|
213
|
+
)
|
|
214
|
+
run(f'sudo pip install {names}')
|
|
215
|
+
|
|
184
216
|
print('## Build and install MuPDF.')
|
|
185
|
-
if 1:
|
|
186
|
-
# Current MuPDF creates softlinks with `ln -s` which breaks if there
|
|
187
|
-
# was a previous build; it should do `ln -sf`. We make things work by
|
|
188
|
-
# deleting any existing softlinks here.
|
|
189
|
-
run(f'rm {root_prefix}/lib/libmupdf.so || true')
|
|
190
|
-
run(f'rm {root_prefix}/lib/libmupdfcpp.so || true')
|
|
191
217
|
command = f'cd {mupdf_dir}'
|
|
192
218
|
command += f' && {sudo}make'
|
|
219
|
+
command += f' -j {multiprocessing.cpu_count()}'
|
|
193
220
|
#command += f' EXE_LDFLAGS=-Wl,--trace' # Makes linker generate diagnostics as it runs.
|
|
194
221
|
command += f' DESTDIR={root}'
|
|
195
222
|
command += f' HAVE_LEPTONICA=yes'
|
|
196
223
|
command += f' HAVE_TESSERACT=yes'
|
|
197
224
|
command += f' USE_SYSTEM_LIBS=yes'
|
|
225
|
+
command += f' VENV_FLAG={"--venv" if pip == "venv" else ""}'
|
|
226
|
+
if mupdf_so_mode:
|
|
227
|
+
command += f' SO_INSTALL_MODE={mupdf_so_mode}'
|
|
198
228
|
command += f' build_prefix=system-libs-'
|
|
199
229
|
command += f' prefix={prefix}'
|
|
200
230
|
command += f' verbose=yes'
|
|
201
231
|
command += f' install-shared-python'
|
|
232
|
+
command += f' INSTALL_MODE=755'
|
|
202
233
|
run( command)
|
|
203
234
|
|
|
204
235
|
# Build+install PyMuPDF.
|
|
@@ -217,23 +248,34 @@ def main():
|
|
|
217
248
|
env += f'PYMUPDF_SETUP_IMPLEMENTATIONS=b' # Only build the rebased implementation.
|
|
218
249
|
if use_installer:
|
|
219
250
|
print(f'## Building wheel.')
|
|
251
|
+
if pip == 'venv':
|
|
252
|
+
venv_name = 'venv-pymupdf-sysinstall'
|
|
220
253
|
run(f'pwd')
|
|
221
254
|
run(f'rm dist/* || true')
|
|
222
|
-
|
|
255
|
+
if pip == 'venv':
|
|
256
|
+
run(f'{sys.executable} -m venv {venv_name}')
|
|
257
|
+
run(f'. {venv_name}/bin/activate && pip install --upgrade pip')
|
|
258
|
+
run(f'. {venv_name}/bin/activate && pip install --upgrade installer')
|
|
259
|
+
run(f'{env} {venv_name}/bin/python -m pip wheel -vv -w dist {os.path.abspath(pymupdf_dir)}')
|
|
260
|
+
elif pip == 'sudo':
|
|
261
|
+
run(f'sudo pip install --upgrade pip')
|
|
262
|
+
run(f'sudo pip install installer')
|
|
263
|
+
run(f'{env} pip wheel -vv -w dist {os.path.abspath(pymupdf_dir)}')
|
|
264
|
+
else:
|
|
265
|
+
log(f'Not installing "installer" because {pip=}.')
|
|
223
266
|
wheel = glob.glob(f'dist/*')
|
|
224
267
|
assert len(wheel) == 1, f'{wheel=}'
|
|
225
268
|
wheel = wheel[0]
|
|
226
269
|
print(f'## Installing wheel using `installer`.')
|
|
227
|
-
venv = 'venv-pymupdf-sysinstall'
|
|
228
|
-
run(f'{sys.executable} -m venv {venv}')
|
|
229
|
-
run(f'. {venv}/bin/activate && pip install --upgrade pip')
|
|
230
|
-
run(f'. {venv}/bin/activate && pip install --upgrade installer')
|
|
231
270
|
pv = '.'.join(platform.python_version_tuple()[:2])
|
|
232
271
|
p = f'{root_prefix}/lib/python{pv}'
|
|
233
272
|
# `python -m installer` fails to overwrite existing files.
|
|
234
273
|
run(f'{sudo}rm -r {p}/site-packages/fitz || true')
|
|
235
274
|
run(f'{sudo}rm -r {p}/site-packages/PyMuPDF-*.dist-info || true')
|
|
236
|
-
|
|
275
|
+
if pip == 'venv':
|
|
276
|
+
run(f'{sudo}{venv_name}/bin/python -m installer --destdir {root} --prefix {prefix} {wheel}')
|
|
277
|
+
else:
|
|
278
|
+
run(f'{sudo}{sys.executable} -m installer --destdir {root} --prefix {prefix} {wheel}')
|
|
237
279
|
# It seems that MuPDF Python bindings are installed into
|
|
238
280
|
# `.../dist-packages` (from mupdf:Mafile's call of `$(shell python3
|
|
239
281
|
# -c "import sysconfig; print(sysconfig.get_path('platlib'))")` while
|
|
@@ -267,7 +309,7 @@ def main():
|
|
|
267
309
|
del sys.path[0]
|
|
268
310
|
pythonpath = pipcl.install_dir(root)
|
|
269
311
|
|
|
270
|
-
# Show contents of installation
|
|
312
|
+
# Show contents of installation directory. This is very slow on github,
|
|
271
313
|
# where /usr/local contains lots of things.
|
|
272
314
|
#run(f'find {root_prefix}|sort')
|
|
273
315
|
|
|
@@ -277,20 +319,27 @@ def main():
|
|
|
277
319
|
def run(command):
|
|
278
320
|
return run_command(command, doit=test)
|
|
279
321
|
import gh_release
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
322
|
+
if pip == 'venv':
|
|
323
|
+
# Create venv.
|
|
324
|
+
run(f'{sys.executable} -m venv {test_venv}')
|
|
325
|
+
# Install required packages.
|
|
326
|
+
command = f'. {test_venv}/bin/activate'
|
|
327
|
+
command += f' && pip install --upgrade pip'
|
|
328
|
+
command += f' && pip install --upgrade {gh_release.test_packages}'
|
|
329
|
+
run(command)
|
|
330
|
+
elif pip == 'sudo':
|
|
331
|
+
run(f'sudo pip install --upgrade {gh_release.test_packages}')
|
|
332
|
+
else:
|
|
333
|
+
log(f'Not installing packages for testing because {pip=}.')
|
|
287
334
|
# Run pytest.
|
|
288
335
|
#
|
|
289
336
|
# We need to set PYTHONPATH and LD_LIBRARY_PATH. In particular we
|
|
290
337
|
# use pipcl.install_dir() to find where pipcl will have installed
|
|
291
338
|
# PyMuPDF.
|
|
292
|
-
command =
|
|
293
|
-
|
|
339
|
+
command = ''
|
|
340
|
+
if pip == 'venv':
|
|
341
|
+
command += f'. {test_venv}/bin/activate &&'
|
|
342
|
+
command += f' LD_LIBRARY_PATH={root_prefix}/lib PYTHONPATH={pythonpath}'
|
|
294
343
|
command += f' pytest -k "not test_color_count and not test_3050" {pymupdf_dir}'
|
|
295
344
|
run(command)
|
|
296
345
|
|
|
@@ -410,12 +410,13 @@ def test(
|
|
|
410
410
|
log('\n' + venv_info(pytest_args=f'{pytest_options} {pytest_arg}'))
|
|
411
411
|
|
|
412
412
|
|
|
413
|
-
def get_pyproject_required():
|
|
413
|
+
def get_pyproject_required(ppt=None):
|
|
414
414
|
'''
|
|
415
415
|
Returns space-separated names of required packages in pyproject.toml. We
|
|
416
416
|
do not do a proper parse and rely on the packages being in a single line.
|
|
417
417
|
'''
|
|
418
|
-
ppt
|
|
418
|
+
if ppt is None:
|
|
419
|
+
ppt = os.path.abspath(f'{__file__}/../../pyproject.toml')
|
|
419
420
|
with open(ppt) as f:
|
|
420
421
|
for line in f:
|
|
421
422
|
m = re.match('^requires = \\[(.*)\\]$', line)
|
|
@@ -17437,7 +17437,7 @@ def JM_set_ocg_arrays_imp(arr, list_):
|
|
|
17437
17437
|
# Not found.
|
|
17438
17438
|
continue
|
|
17439
17439
|
obj = mupdf.pdf_new_indirect(pdf, xref, 0)
|
|
17440
|
-
mupdf.
|
|
17440
|
+
mupdf.pdf_array_push(arr, obj)
|
|
17441
17441
|
|
|
17442
17442
|
|
|
17443
17443
|
def JM_set_resource_property(ref, name, xref):
|
|
@@ -21729,8 +21729,8 @@ def int_rc(text):
|
|
|
21729
21729
|
return int(text)
|
|
21730
21730
|
|
|
21731
21731
|
VersionFitz = "1.23.10" # MuPDF version.
|
|
21732
|
-
VersionBind = "1.23.
|
|
21733
|
-
VersionDate = "2024-02-
|
|
21732
|
+
VersionBind = "1.23.24" # PyMuPDF version.
|
|
21733
|
+
VersionDate = "2024-02-19 00:00:01"
|
|
21734
21734
|
VersionDate2 = VersionDate.replace('-', '').replace(' ', '').replace(':', '')
|
|
21735
21735
|
version = (VersionBind, VersionFitz, VersionDate2)
|
|
21736
21736
|
pymupdf_version_tuple = tuple( [int_rc(i) for i in VersionBind.split('.')])
|
|
@@ -71,6 +71,7 @@ This is implemented as new class TableHeader with the properties:
|
|
|
71
71
|
* external: A bool indicating whether the header is outside the table cells.
|
|
72
72
|
|
|
73
73
|
"""
|
|
74
|
+
|
|
74
75
|
import inspect
|
|
75
76
|
import itertools
|
|
76
77
|
import string
|
|
@@ -361,7 +362,7 @@ class WordExtractor:
|
|
|
361
362
|
keep_blank_chars: bool = False,
|
|
362
363
|
use_text_flow=False,
|
|
363
364
|
horizontal_ltr=True, # Should words be read left-to-right?
|
|
364
|
-
vertical_ttb=
|
|
365
|
+
vertical_ttb=False, # Should vertical words be read top-to-bottom?
|
|
365
366
|
extra_attrs=None,
|
|
366
367
|
split_at_punctuation=False,
|
|
367
368
|
expand_ligatures=True,
|
|
@@ -387,9 +388,20 @@ class WordExtractor:
|
|
|
387
388
|
x0, top, x1, bottom = objects_to_bbox(ordered_chars)
|
|
388
389
|
doctop_adj = ordered_chars[0]["doctop"] - ordered_chars[0]["top"]
|
|
389
390
|
upright = ordered_chars[0]["upright"]
|
|
390
|
-
|
|
391
391
|
direction = 1 if (self.horizontal_ltr if upright else self.vertical_ttb) else -1
|
|
392
392
|
|
|
393
|
+
matrix = ordered_chars[0]["matrix"]
|
|
394
|
+
|
|
395
|
+
rotation = 0
|
|
396
|
+
if not upright and matrix[1] < 0:
|
|
397
|
+
ordered_chars = reversed(ordered_chars)
|
|
398
|
+
rotation = 270
|
|
399
|
+
|
|
400
|
+
if matrix[0] < 0 and matrix[3] < 0:
|
|
401
|
+
rotation = 180
|
|
402
|
+
elif matrix[1] > 0:
|
|
403
|
+
rotation = 90
|
|
404
|
+
|
|
393
405
|
word = {
|
|
394
406
|
"text": "".join(
|
|
395
407
|
self.expansions.get(c["text"], c["text"]) for c in ordered_chars
|
|
@@ -401,6 +413,7 @@ class WordExtractor:
|
|
|
401
413
|
"bottom": bottom,
|
|
402
414
|
"upright": upright,
|
|
403
415
|
"direction": direction,
|
|
416
|
+
"rotation": rotation,
|
|
404
417
|
}
|
|
405
418
|
|
|
406
419
|
for key in self.extra_attrs:
|
|
@@ -552,7 +565,8 @@ class WordExtractor:
|
|
|
552
565
|
return WordMap(list(self.iter_extract_tuples(chars)))
|
|
553
566
|
|
|
554
567
|
def extract_words(self, chars: list) -> list:
|
|
555
|
-
|
|
568
|
+
words = list(word for word, word_chars in self.iter_extract_tuples(chars))
|
|
569
|
+
return words
|
|
556
570
|
|
|
557
571
|
|
|
558
572
|
def extract_words(chars: list, **kwargs) -> list:
|
|
@@ -590,8 +604,21 @@ def extract_text(chars: list, **kwargs) -> str:
|
|
|
590
604
|
**{k: kwargs[k] for k in WORD_EXTRACTOR_KWARGS if k in kwargs}
|
|
591
605
|
)
|
|
592
606
|
words = extractor.extract_words(chars)
|
|
593
|
-
|
|
594
|
-
|
|
607
|
+
rotation = words[0]["rotation"] # rotation cannot change within a cell
|
|
608
|
+
|
|
609
|
+
if rotation == 90:
|
|
610
|
+
words.sort(key=lambda w: (w["x1"], -w["top"]))
|
|
611
|
+
lines = " ".join([w["text"] for w in words])
|
|
612
|
+
elif rotation == 270:
|
|
613
|
+
words.sort(key=lambda w: (-w["x1"], w["top"]))
|
|
614
|
+
lines = " ".join([w["text"] for w in words])
|
|
615
|
+
else:
|
|
616
|
+
lines = cluster_objects(words, itemgetter("doctop"), y_tolerance)
|
|
617
|
+
lines = "\n".join(" ".join(word["text"] for word in line) for line in lines)
|
|
618
|
+
if rotation == 180: # needs extra treatment
|
|
619
|
+
lines = "".join([(c if c != "\n" else " ") for c in reversed(lines)])
|
|
620
|
+
|
|
621
|
+
return lines
|
|
595
622
|
|
|
596
623
|
|
|
597
624
|
def collate_line(
|
|
@@ -1873,49 +1900,78 @@ def make_chars(page, clip=None):
|
|
|
1873
1900
|
# else to lines.
|
|
1874
1901
|
# -----------------------------------------------------------------------------
|
|
1875
1902
|
def make_edges(page, clip=None, tset=None, add_lines=None):
|
|
1903
|
+
global EDGES
|
|
1904
|
+
snap_x = tset.snap_x_tolerance
|
|
1905
|
+
snap_y = tset.snap_y_tolerance
|
|
1906
|
+
lines_strict = (
|
|
1907
|
+
tset.vertical_strategy == "lines_strict"
|
|
1908
|
+
or tset.horizontal_strategy == "lines_strict"
|
|
1909
|
+
)
|
|
1910
|
+
page_height = page.rect.height
|
|
1911
|
+
doctop_basis = page.number * page_height
|
|
1912
|
+
page_number = page.number + 1
|
|
1913
|
+
prect = page.rect
|
|
1914
|
+
if page.rotation in (90, 270):
|
|
1915
|
+
w, h = prect.br
|
|
1916
|
+
prect = Rect(0, 0, h, w)
|
|
1917
|
+
if clip is not None:
|
|
1918
|
+
clip = Rect(clip)
|
|
1919
|
+
else:
|
|
1920
|
+
clip = prect
|
|
1921
|
+
|
|
1922
|
+
def are_neighbors(r1, r2):
|
|
1923
|
+
"""Detect whether r1, r2 are neighbors.
|
|
1924
|
+
|
|
1925
|
+
Defined as:
|
|
1926
|
+
The minimum distance between points of r1 and points of r2 is not
|
|
1927
|
+
larger than some delta.
|
|
1928
|
+
|
|
1929
|
+
This check supports empty rect-likes and thus also lines.
|
|
1930
|
+
"""
|
|
1931
|
+
if (
|
|
1932
|
+
r2.x0 - snap_x <= r1.x0 <= r2.x1 + snap_x
|
|
1933
|
+
or r2.x0 - snap_x <= r1.x1 <= r2.x1 + snap_x
|
|
1934
|
+
) and (
|
|
1935
|
+
r2.y0 - snap_y <= r1.y0 <= r2.y1 + snap_y
|
|
1936
|
+
or r2.y0 - snap_y <= r1.y1 <= r2.y1 + snap_y
|
|
1937
|
+
):
|
|
1938
|
+
return True
|
|
1939
|
+
return False
|
|
1940
|
+
|
|
1876
1941
|
def clean_graphics():
|
|
1877
1942
|
"""Detect and join rectangles of connected vector graphics."""
|
|
1878
|
-
lines_strict = (
|
|
1879
|
-
tset.vertical_strategy == "lines_strict"
|
|
1880
|
-
or tset.horizontal_strategy == "lines_strict"
|
|
1881
|
-
)
|
|
1882
1943
|
# exclude irrelevant graphics
|
|
1883
1944
|
paths = []
|
|
1884
1945
|
for p in page.get_drawings():
|
|
1885
|
-
if (
|
|
1946
|
+
if ( # ignore fill-only graphics if they are no lines
|
|
1886
1947
|
p["type"] == "f"
|
|
1887
1948
|
and lines_strict
|
|
1888
|
-
and p["rect"].width >
|
|
1889
|
-
and p["rect"].height >
|
|
1890
|
-
):
|
|
1949
|
+
and p["rect"].width > snap_x
|
|
1950
|
+
and p["rect"].height > snap_y
|
|
1951
|
+
):
|
|
1891
1952
|
continue
|
|
1892
1953
|
paths.append(p)
|
|
1893
1954
|
|
|
1894
1955
|
prects = sorted([p["rect"] for p in paths], key=lambda r: (r.y1, r.x0))
|
|
1895
1956
|
new_rects = [] # the final list of joined rectangles
|
|
1896
|
-
|
|
1897
1957
|
# -------------------------------------------------------------------------
|
|
1898
|
-
# Strategy:
|
|
1958
|
+
# Strategy: Join rectangles that "almost touch" each other,
|
|
1959
|
+
# Extend first rectangle with any remaining in the list that touches it.
|
|
1960
|
+
# Then move it to final list and continue with the rest.
|
|
1899
1961
|
# -------------------------------------------------------------------------
|
|
1900
1962
|
while prects: # the algorithm will empty this list
|
|
1901
1963
|
r = prects[0] # first rectangle
|
|
1902
1964
|
repeat = True
|
|
1903
|
-
while repeat:
|
|
1904
|
-
repeat = False
|
|
1965
|
+
while repeat: # this loop extends first rect in list
|
|
1966
|
+
repeat = False # will be set to true if any other rect touches
|
|
1905
1967
|
for i in range(len(prects) - 1, -1, -1): # run backwards
|
|
1906
1968
|
if i == 0: # don't touch first rectangle
|
|
1907
1969
|
continue
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
r
|
|
1911
|
-
or r.x0 <= ri.x1 <= r.x1
|
|
1912
|
-
or r.y0 <= ri.y0 <= r.y1
|
|
1913
|
-
or r.y0 <= ri.y1 <= r.y1
|
|
1914
|
-
):
|
|
1915
|
-
r |= ri # join in to first rect
|
|
1916
|
-
prects[0] = r # update first
|
|
1970
|
+
if are_neighbors(r, prects[i]): # touches rect 0!
|
|
1971
|
+
r |= prects[i] # extend first rect
|
|
1972
|
+
prects[0] = +r # update it in list
|
|
1917
1973
|
del prects[i] # delete this rect
|
|
1918
|
-
repeat = True
|
|
1974
|
+
repeat = True # check remaining
|
|
1919
1975
|
|
|
1920
1976
|
# move first item over to result list
|
|
1921
1977
|
new_rects.append(prects.pop(0))
|
|
@@ -1924,26 +1980,11 @@ def make_edges(page, clip=None, tset=None, add_lines=None):
|
|
|
1924
1980
|
new_rects = sorted(list(set(new_rects)), key=lambda r: (r.y1, r.x0))
|
|
1925
1981
|
return [r for r in new_rects if r.width > 5 and r.height > 5], paths
|
|
1926
1982
|
|
|
1927
|
-
global EDGES
|
|
1928
1983
|
bboxes, paths = clean_graphics()
|
|
1929
1984
|
|
|
1930
|
-
page_height = page.rect.height
|
|
1931
|
-
doctop_basis = page.number * page_height
|
|
1932
|
-
page_number = page.number + 1
|
|
1933
|
-
x_tolerance = tset.snap_x_tolerance
|
|
1934
|
-
y_tolerance = tset.snap_y_tolerance
|
|
1935
|
-
prect = page.rect
|
|
1936
|
-
if page.rotation in (90, 270):
|
|
1937
|
-
w, h = prect.br
|
|
1938
|
-
prect = Rect(0, 0, h, w)
|
|
1939
|
-
if clip is not None:
|
|
1940
|
-
clip = Rect(clip)
|
|
1941
|
-
else:
|
|
1942
|
-
clip = prect
|
|
1943
|
-
|
|
1944
1985
|
def is_parallel(p1, p2):
|
|
1945
1986
|
"""Check if line is roughly axis-parallel."""
|
|
1946
|
-
if abs(p1.x - p2.x) <=
|
|
1987
|
+
if abs(p1.x - p2.x) <= snap_x or abs(p1.y - p2.y) <= snap_y:
|
|
1947
1988
|
return True
|
|
1948
1989
|
return False
|
|
1949
1990
|
|
|
@@ -2021,10 +2062,10 @@ def make_edges(page, clip=None, tset=None, add_lines=None):
|
|
|
2021
2062
|
elif i[0] == "re": # a rectangle: decompose into 4 lines
|
|
2022
2063
|
rect = i[1].normalize() # rectangle itself
|
|
2023
2064
|
# ignore minute rectangles
|
|
2024
|
-
if rect.height <=
|
|
2065
|
+
if rect.height <= snap_y and rect.width <= snap_x:
|
|
2025
2066
|
continue
|
|
2026
2067
|
|
|
2027
|
-
if rect.width <=
|
|
2068
|
+
if rect.width <= snap_x: # simulates a vertical line
|
|
2028
2069
|
x = abs(rect.x1 + rect.x0) / 2 # take middle value for x
|
|
2029
2070
|
p1 = Point(x, rect.y0)
|
|
2030
2071
|
p2 = Point(x, rect.y1)
|
|
@@ -2033,7 +2074,7 @@ def make_edges(page, clip=None, tset=None, add_lines=None):
|
|
|
2033
2074
|
EDGES.append(line_to_edge(line_dict))
|
|
2034
2075
|
continue
|
|
2035
2076
|
|
|
2036
|
-
if rect.height <=
|
|
2077
|
+
if rect.height <= snap_y: # simulates a horizontal line
|
|
2037
2078
|
y = abs(rect.y1 + rect.y0) / 2 # take middle value for y
|
|
2038
2079
|
p1 = Point(rect.x0, y)
|
|
2039
2080
|
p2 = Point(rect.x1, y)
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
%pythoncode %{
|
|
2
|
+
VersionFitz = "1.23.10" # MuPDF version.
|
|
3
|
+
VersionBind = "1.23.24" # PyMuPDF version.
|
|
4
|
+
VersionDate = "2024-02-19 00:00:01"
|
|
5
|
+
version = (VersionBind, VersionFitz, "20240219000001")
|
|
6
|
+
pymupdf_version_tuple = tuple( [int(i) for i in VersionFitz.split('.')])
|
|
7
|
+
%}
|
|
Binary file
|
|
@@ -72,3 +72,68 @@ def test_3143():
|
|
|
72
72
|
set1 = set([p["layer"] for p in page.get_drawings()])
|
|
73
73
|
set2 = set([b[2] for b in page.get_bboxlog(layers=True)])
|
|
74
74
|
assert set0 == set1 == set2
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_3180():
|
|
78
|
+
doc = fitz.open()
|
|
79
|
+
page = doc.new_page()
|
|
80
|
+
|
|
81
|
+
# Define the items for the combo box
|
|
82
|
+
combo_items = ['first', 'second', 'third']
|
|
83
|
+
|
|
84
|
+
# Create a combo box field
|
|
85
|
+
combo_box = fitz.Widget() # create a new widget
|
|
86
|
+
combo_box.field_type = fitz.PDF_WIDGET_TYPE_COMBOBOX
|
|
87
|
+
combo_box.field_name = "myComboBox"
|
|
88
|
+
combo_box.field_value = combo_items[0]
|
|
89
|
+
combo_box.choice_values = combo_items
|
|
90
|
+
combo_box.rect = fitz.Rect(50, 50, 200, 75) # position of the combo box
|
|
91
|
+
combo_box.script_change = """
|
|
92
|
+
var value = event.value;
|
|
93
|
+
app.alert('You selected: ' + value);
|
|
94
|
+
|
|
95
|
+
//var group_id = optional_content_group_ids[value];
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
# Insert the combo box into the page
|
|
100
|
+
# https://pymupdf.readthedocs.io/en/latest/page.html#Page.add_widget
|
|
101
|
+
page.add_widget(combo_box)
|
|
102
|
+
|
|
103
|
+
# Create optional content groups
|
|
104
|
+
# https://github.com/pymupdf/PyMuPDF-Utilities/blob/master/jupyter-notebooks/optional-content.ipynb
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# Load images and create OCGs for each
|
|
108
|
+
optional_content_group_ids = {}
|
|
109
|
+
for i, item in enumerate(combo_items):
|
|
110
|
+
optional_content_group_id = doc.add_ocg(item, on=False)
|
|
111
|
+
optional_content_group_ids[item] = optional_content_group_id
|
|
112
|
+
rect = fitz.Rect(50, 100, 250, 300)
|
|
113
|
+
image_file_name = f'{item}.png'
|
|
114
|
+
# xref = page.insert_image(
|
|
115
|
+
# rect,
|
|
116
|
+
# filename=image_file_name,
|
|
117
|
+
# oc=optional_content_group_id,
|
|
118
|
+
# )
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
first_id = optional_content_group_ids['first']
|
|
122
|
+
second_id = optional_content_group_ids['second']
|
|
123
|
+
third_id = optional_content_group_ids['third']
|
|
124
|
+
|
|
125
|
+
# https://pymupdf.readthedocs.io/en/latest/document.html#Document.set_layer
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
doc.set_layer(-1, basestate="OFF")
|
|
129
|
+
layers = doc.get_layer()
|
|
130
|
+
doc.set_layer(config=-1, on=[first_id])
|
|
131
|
+
|
|
132
|
+
# https://pymupdf.readthedocs.io/en/latest/document.html#Document.set_layer_ui_config
|
|
133
|
+
# configs = doc.layer_ui_configs()
|
|
134
|
+
# doc.set_layer_ui_config(0, fitz.PDF_OC_ON)
|
|
135
|
+
# doc.set_layer_ui_config('third', action=2)
|
|
136
|
+
|
|
137
|
+
# Save the PDF
|
|
138
|
+
doc.save(os.path.abspath(f'{__file__}/../../tests/test_3180.pdf'))
|
|
139
|
+
doc.close()
|