PyMuPDF 1.23.18__tar.gz → 1.23.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/PKG-INFO +1 -1
  2. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/changes.txt +18 -0
  3. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/setup.py +1 -1
  4. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src/__init__.py +91 -41
  5. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src/extra.i +46 -0
  6. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src/table.py +45 -39
  7. PyMuPDF-1.23.19/src_classic/version.i +7 -0
  8. PyMuPDF-1.23.19/tests/resources/small-table.pdf +0 -0
  9. PyMuPDF-1.23.19/tests/resources/strict-yes-no.pdf +0 -0
  10. PyMuPDF-1.23.19/tests/resources/test_3087.pdf +0 -0
  11. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_font.py +7 -0
  12. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_insertimage.py +19 -0
  13. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_pagedelete.py +8 -0
  14. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_tables.py +32 -0
  15. PyMuPDF-1.23.18/src_classic/version.i +0 -7
  16. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/COPYING +0 -0
  17. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/MANIFEST.in +0 -0
  18. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/README.md +0 -0
  19. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/READMErb.md +0 -0
  20. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/mupdf.tgz +0 -0
  21. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/pipcl.py +0 -0
  22. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/pyproject.toml +0 -0
  23. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/pytest.ini +0 -0
  24. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/scripts/gh_release.py +0 -0
  25. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/scripts/sysinstall.py +0 -0
  26. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/scripts/test.py +0 -0
  27. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src/__main__.py +0 -0
  28. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src/fitz.py +0 -0
  29. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src/utils.py +0 -0
  30. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/__init__.py +0 -0
  31. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/__main__.py +0 -0
  32. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/_config.h +0 -0
  33. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/fitz_old.i +0 -0
  34. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-annot.i +0 -0
  35. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-convert.i +0 -0
  36. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-defines.i +0 -0
  37. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-devices.i +0 -0
  38. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-fields.i +0 -0
  39. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-fileobj.i +0 -0
  40. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-geo-c.i +0 -0
  41. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-geo-py.i +0 -0
  42. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-globals.i +0 -0
  43. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-other.i +0 -0
  44. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-pdfinfo.i +0 -0
  45. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-pixmap.i +0 -0
  46. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-portfolio.i +0 -0
  47. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-python.i +0 -0
  48. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-select.i +0 -0
  49. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-stext.i +0 -0
  50. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/helper-xobject.i +0 -0
  51. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/src_classic/utils.py +0 -0
  52. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/README.md +0 -0
  53. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/001003ED.pdf +0 -0
  54. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/1.pdf +0 -0
  55. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/2.pdf +0 -0
  56. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/2201.00069.pdf +0 -0
  57. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/3.pdf +0 -0
  58. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/4.pdf +0 -0
  59. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/Bezier.epub +0 -0
  60. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/PragmaticaC.otf +0 -0
  61. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/bug1945.pdf +0 -0
  62. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/bug1971.pdf +0 -0
  63. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/chinese-tables.pdf +0 -0
  64. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/chinese-tables.pickle +0 -0
  65. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/circular-toc.pdf +0 -0
  66. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/cython.pdf +0 -0
  67. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/cython.pickle +0 -0
  68. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/full_toc.txt +0 -0
  69. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/full_toc2.txt +0 -0
  70. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/github_sample.pdf +0 -0
  71. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/has-bad-fonts.pdf +0 -0
  72. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/image-file1.pdf +0 -0
  73. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/img-transparent.png +0 -0
  74. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/joined.pdf +0 -0
  75. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/metadata.txt +0 -0
  76. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/mupdf_explored.pdf +0 -0
  77. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/nur-ruhig.jpg +0 -0
  78. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/quad-calc-0.pdf +0 -0
  79. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/simple_toc.txt +0 -0
  80. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/symbol-list.pdf +0 -0
  81. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/symbols.txt +0 -0
  82. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test-2333.pdf +0 -0
  83. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test-2462.pdf +0 -0
  84. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test2093.pdf +0 -0
  85. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test2182.pdf +0 -0
  86. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test2238.pdf +0 -0
  87. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_1645_expected.pdf +0 -0
  88. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_1645_expected_1.22.pdf +0 -0
  89. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_1824.pdf +0 -0
  90. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2108.pdf +0 -0
  91. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2270.pdf +0 -0
  92. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2533.pdf +0 -0
  93. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2548.pdf +0 -0
  94. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2553-2.pdf +0 -0
  95. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2553.pdf +0 -0
  96. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2596.pdf +0 -0
  97. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2608_expected +0 -0
  98. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2634.pdf +0 -0
  99. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2635.pdf +0 -0
  100. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2645_1.pdf +0 -0
  101. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2645_2.pdf +0 -0
  102. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2645_3.pdf +0 -0
  103. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2710.pdf +0 -0
  104. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2730.pdf +0 -0
  105. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2788.pdf +0 -0
  106. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2791_content.pdf +0 -0
  107. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2791_coverpage.pdf +0 -0
  108. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2861.pdf +0 -0
  109. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2871.pdf +0 -0
  110. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2885.pdf +0 -0
  111. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2904.pdf +0 -0
  112. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2907.pdf +0 -0
  113. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2954.pdf +0 -0
  114. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2957_1.pdf +0 -0
  115. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2957_2.pdf +0 -0
  116. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2969.pdf +0 -0
  117. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_2979.pdf +0 -0
  118. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_3058.pdf +0 -0
  119. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_3062.pdf +0 -0
  120. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_3070.pdf +0 -0
  121. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_annot_file_info.pdf +0 -0
  122. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/test_delete_image.pdf +0 -0
  123. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/type3font.pdf +0 -0
  124. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/v110-changes.pdf +0 -0
  125. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/resources/widgettest.pdf +0 -0
  126. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/run_compound.py +0 -0
  127. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_2548.py +0 -0
  128. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_2634.py +0 -0
  129. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_2791.py +0 -0
  130. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_2904.py +0 -0
  131. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_2907.py +0 -0
  132. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_annots.py +0 -0
  133. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_badfonts.py +0 -0
  134. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_crypting.py +0 -0
  135. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_docs_samples.py +0 -0
  136. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_drawings.py +0 -0
  137. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_embeddedfiles.py +0 -0
  138. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_extractimage.py +0 -0
  139. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_flake8.py +0 -0
  140. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_general.py +0 -0
  141. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_geometry.py +0 -0
  142. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_imagebbox.py +0 -0
  143. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_insertpdf.py +0 -0
  144. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_linequad.py +0 -0
  145. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_metadata.py +0 -0
  146. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_named_links.py +0 -0
  147. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_nonpdf.py +0 -0
  148. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_object_manipulation.py +0 -0
  149. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_optional_content.py +0 -0
  150. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_page_links.py +0 -0
  151. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_pagelabels.py +0 -0
  152. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_pixmap.py +0 -0
  153. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_showpdfpage.py +0 -0
  154. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_story.py +0 -0
  155. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_tesseract.py +0 -0
  156. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_textbox.py +0 -0
  157. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_textextract.py +0 -0
  158. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_textsearch.py +0 -0
  159. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_toc.py +0 -0
  160. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_widgets.py +0 -0
  161. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/tests/test_word_delimiters.py +0 -0
  162. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/valgrind.supp +0 -0
  163. {PyMuPDF-1.23.18 → PyMuPDF-1.23.19}/wdev.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PyMuPDF
3
- Version: 1.23.18
3
+ Version: 1.23.19
4
4
  Summary: A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents.
5
5
  Description-Content-Type: text/markdown
6
6
  Author: Artifex
@@ -2,6 +2,24 @@ Change Log
2
2
  ==========
3
3
 
4
4
 
5
+ **Changes in version 1.23.19 (2024-01-25)**
6
+
7
+ * Bug fixes:
8
+
9
+ * **Fixed** `3087 <https://github.com/pymupdf/PyMuPDF/issues/3087>`_: Exception in insert_image with mask specified
10
+ * **Fixed** `3094 <https://github.com/pymupdf/PyMuPDF/issues/3094>`_: TypeError: '<' not supported between instances of 'FzLocation' and 'int' in doc.delete_pages
11
+
12
+ * Other:
13
+
14
+ * When finding tables:
15
+
16
+ * Allow addition of user-defined "virtual" vector graphics when finding tables.
17
+ * Confirm that the enveloping bboxes of vector graphics are inside the clip rectangle.
18
+ * Avoid slow finding of rectangle intersections.
19
+
20
+ * Added `Font.bbox` property.
21
+
22
+
5
23
  **Changes in version 1.23.18 (2024-01-23)**
6
24
 
7
25
  * Bug fixes:
@@ -1072,7 +1072,7 @@ with open( f'{g_root}/READMErb.md', encoding='utf-8') as f:
1072
1072
  # We generate different wheels depending on g_flavour.
1073
1073
  #
1074
1074
 
1075
- version = '1.23.18'
1075
+ version = '1.23.19'
1076
1076
  version_b = '1.23.9'
1077
1077
 
1078
1078
  tag_python = None
@@ -6038,6 +6038,10 @@ class Font:
6038
6038
  """Return the glyph ascender value."""
6039
6039
  return mupdf.fz_font_ascender(self.this)
6040
6040
 
6041
+ @property
6042
+ def bbox(self):
6043
+ return self.this.fz_font_bbox()
6044
+
6041
6045
  @property
6042
6046
  def buffer(self):
6043
6047
  buffer_ = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( self.this.m_internal.buffer))
@@ -7804,24 +7808,68 @@ class Page:
7804
7808
  #log( 'do_have_imask')
7805
7809
  # mupdf.FzCompressedBuffer is not copyable, so
7806
7810
  # mupdf.fz_compressed_image_buffer() does not work - it cannot
7807
- # return by value. So we need to construct locally from a raw
7808
- # fz_compressed_buffer.
7809
- #cbuf1 = mupdf.fz_compressed_image_buffer(image)
7810
- cbuf1 = mupdf.FzCompressedBuffer( mupdf.ll_fz_compressed_image_buffer( image.m_internal))
7811
- if not cbuf1.m_internal:
7811
+ # return by value. And sharing a fz_compressed_buffer betwen two
7812
+ # `fz_image`'s doesn't work, so we use a raw fz_compressed_buffer
7813
+ # here, not a mupdf.FzCompressedBuffer.
7814
+ #
7815
+ cbuf1 = mupdf.ll_fz_compressed_image_buffer( image.m_internal)
7816
+ if not cbuf1:
7812
7817
  raise ValueError( "uncompressed image cannot have mask")
7813
7818
  bpc = image.bpc()
7814
7819
  colorspace = image.colorspace()
7815
7820
  xres, yres = mupdf.fz_image_resolution(image)
7816
7821
  mask = mupdf.fz_new_image_from_buffer(maskbuf)
7817
- zimg = mupdf.fz_new_image_from_compressed_buffer(
7818
- w, h,
7819
- bpc, colorspace, xres, yres, 1, 0, None,
7820
- None, cbuf1, mask
7821
- )
7822
- image = zimg
7823
- #goto have_image()
7822
+ if mupdf_version_tuple >= (1, 24):
7823
+ zimg = mupdf.ll_fz_new_image_from_compressed_buffer2(
7824
+ w,
7825
+ h,
7826
+ bpc,
7827
+ colorspace.m_internal,
7828
+ xres,
7829
+ yres,
7830
+ 1, # interpolate
7831
+ 0, # imagemask,
7832
+ None, # decode
7833
+ None, # colorkey
7834
+ cbuf1,
7835
+ mask.m_internal,
7836
+ )
7837
+ else:
7838
+ # mupdf.ll_fz_new_image_from_compressed_buffer() is not usable.
7839
+ zimg = extra.fz_new_image_from_compressed_buffer(
7840
+ w,
7841
+ h,
7842
+ bpc,
7843
+ colorspace.m_internal,
7844
+ xres,
7845
+ yres,
7846
+ 1, # interpolate
7847
+ 0, # imagemask,
7848
+ cbuf1,
7849
+ mask.m_internal,
7850
+ )
7851
+
7852
+ zimg = mupdf.FzImage(zimg)
7824
7853
 
7854
+ # `image` and `zimage` both have pointers to the same
7855
+ # `fz_compressed_buffer`, which is not reference counted, and they
7856
+ # both think that they own it.
7857
+ #
7858
+ # So we do what the classic implementataion does, and simply ensure
7859
+ # that `fz_drop_image(image)` is never called. This will leak
7860
+ # some of `image`'s allocations (for example the main `fz_image`
7861
+ # allocation), but it's not trivial to avoid this.
7862
+ #
7863
+ # Perhaps we could manually set `fz_image`'s
7864
+ # `fz_compressed_buffer*` to null? Trouble is we'd have to
7865
+ # cast the `fz_image*` to a `fz_compressed_image*` to see the
7866
+ # `fz_compressed_buffer*`, which is probably not possible from
7867
+ # Python?
7868
+ #
7869
+ image.m_internal = None
7870
+
7871
+ image = zimg
7872
+
7825
7873
  if do_have_image:
7826
7874
  #log( 'do_have_image')
7827
7875
  ref = mupdf.pdf_add_image(pdf, image)
@@ -14083,7 +14131,8 @@ def _remove_dest_range(pdf, numbers):
14083
14131
  target = mupdf.pdf_array_get( dest, 0)
14084
14132
  pno = mupdf.pdf_lookup_page_number( pdf, target)
14085
14133
  elif mupdf.pdf_is_string( dest):
14086
- pno, _, _ = mupdf.fz_resolve_link( pdf.super(), mupdf.pdf_to_text_string( dest))
14134
+ location, _, _ = mupdf.fz_resolve_link( pdf.super(), mupdf.pdf_to_text_string( dest))
14135
+ pno = location.page
14087
14136
  if pno < 0: # page number lookup did not work
14088
14137
  continue
14089
14138
  n1 = pno
@@ -14628,34 +14677,35 @@ def JM_choice_options(annot):
14628
14677
  '''
14629
14678
  annot_obj = mupdf.pdf_annot_obj( annot.this)
14630
14679
 
14631
- # pdf_choice_widget_options() is not usable from python, so we implement it
14632
- # ourselves here.
14633
- #
14634
- # fixme: put this in mupdf python bindings.
14635
- #
14636
- def pdf_choice_widget_options( annot, exportval):
14637
- #log( '{=type(annot)}')
14638
- optarr = mupdf.pdf_dict_get_inheritable( mupdf.pdf_annot_obj(annot.this), PDF_NAME('Opt'))
14639
- #log( '{optarr=}')
14640
- n = mupdf.pdf_array_len(optarr)
14641
- opts = []
14642
- if not n:
14680
+ if mupdf_version_tuple >= (1, 24):
14681
+ opts = mupdf.pdf_choice_widget_options2( annot, 0)
14682
+ else:
14683
+ # pdf_choice_widget_options() is not usable from python, so we
14684
+ # implement it ourselves here.
14685
+ #
14686
+ def pdf_choice_widget_options( annot, exportval):
14687
+ #log( '{=type(annot)}')
14688
+ optarr = mupdf.pdf_dict_get_inheritable( mupdf.pdf_annot_obj(annot.this), PDF_NAME('Opt'))
14689
+ #log( '{optarr=}')
14690
+ n = mupdf.pdf_array_len(optarr)
14691
+ opts = []
14692
+ if not n:
14693
+ return opts
14694
+ optarr = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Opt'))
14695
+ for i in range(n):
14696
+ m = mupdf.pdf_array_len(mupdf.pdf_array_get(optarr, i))
14697
+ if m == 2:
14698
+ val = (
14699
+ mupdf.pdf_to_text_string(mupdf.pdf_array_get(mupdf.pdf_array_get(optarr, i), 0)),
14700
+ mupdf.pdf_to_text_string(mupdf.pdf_array_get(mupdf.pdf_array_get(optarr, i), 1)),
14701
+ )
14702
+ opts.append(val)
14703
+ else:
14704
+ val = JM_UnicodeFromStr(mupdf.pdf_to_text_string(mupdf.pdf_array_get(optarr, i)))
14705
+ opts.append(val)
14643
14706
  return opts
14644
- optarr = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Opt'))
14645
- for i in range(n):
14646
- m = mupdf.pdf_array_len(mupdf.pdf_array_get(optarr, i))
14647
- if m == 2:
14648
- val = (
14649
- mupdf.pdf_to_text_string(mupdf.pdf_array_get(mupdf.pdf_array_get(optarr, i), 0)),
14650
- mupdf.pdf_to_text_string(mupdf.pdf_array_get(mupdf.pdf_array_get(optarr, i), 1)),
14651
- )
14652
- opts.append(val)
14653
- else:
14654
- val = JM_UnicodeFromStr(mupdf.pdf_to_text_string(mupdf.pdf_array_get(optarr, i)))
14655
- opts.append(val)
14656
- return opts
14657
14707
 
14658
- opts = pdf_choice_widget_options( annot, 0)
14708
+ opts = pdf_choice_widget_options( annot, 0)
14659
14709
  n = len( opts)
14660
14710
  if n == 0:
14661
14711
  return # wrong widget type
@@ -21748,8 +21798,8 @@ def int_rc(text):
21748
21798
  return int(text)
21749
21799
 
21750
21800
  VersionFitz = "1.23.9" # MuPDF version.
21751
- VersionBind = "1.23.18" # PyMuPDF version.
21752
- VersionDate = "2024-01-23 00:00:01"
21801
+ VersionBind = "1.23.19" # PyMuPDF version.
21802
+ VersionDate = "2024-01-25 00:00:01"
21753
21803
  VersionDate2 = VersionDate.replace('-', '').replace(' ', '').replace(':', '')
21754
21804
  version = (VersionBind, VersionFitz, VersionDate2)
21755
21805
  pymupdf_version_tuple = tuple( [int_rc(i) for i in VersionBind.split('.')])
@@ -4294,6 +4294,39 @@ no_more_matches:;
4294
4294
  return quads;
4295
4295
  }
4296
4296
 
4297
+ /* MuPDF-1.23.x has an incorrect and unusable
4298
+ fz_new_image_from_compressed_buffer() wrapper that thinks the `decode` and
4299
+ `colorkey` args are out-params. So we provide an alternative wrapper where
4300
+ we always set these to args to null, which is sufficient for PyMuPDF caller
4301
+ `Document._insert_image()`. */
4302
+ fz_image* fz_new_image_from_compressed_buffer(
4303
+ int w,
4304
+ int h,
4305
+ int bpc,
4306
+ fz_colorspace *colorspace,
4307
+ int xres,
4308
+ int yres,
4309
+ int interpolate,
4310
+ int imagemask,
4311
+ fz_compressed_buffer *buffer,
4312
+ fz_image *mask
4313
+ )
4314
+ {
4315
+ return mupdf::ll_fz_new_image_from_compressed_buffer(
4316
+ w,
4317
+ h,
4318
+ bpc,
4319
+ colorspace,
4320
+ xres,
4321
+ yres,
4322
+ interpolate,
4323
+ imagemask,
4324
+ nullptr,
4325
+ nullptr,
4326
+ buffer,
4327
+ mask
4328
+ );
4329
+ }
4297
4330
 
4298
4331
  %}
4299
4332
 
@@ -4468,3 +4501,16 @@ int pixmap_n(mupdf::FzPixmap& pixmap);
4468
4501
  PyObject* JM_search_stext_page(fz_stext_page *page, const char *needle);
4469
4502
 
4470
4503
  PyObject *set_pixel(fz_pixmap* pm, int x, int y, PyObject *color);
4504
+
4505
+ fz_image* fz_new_image_from_compressed_buffer(
4506
+ int w,
4507
+ int h,
4508
+ int bpc,
4509
+ fz_colorspace *colorspace,
4510
+ int xres,
4511
+ int yres,
4512
+ int interpolate,
4513
+ int imagemask,
4514
+ fz_compressed_buffer *buffer,
4515
+ fz_image *mask
4516
+ );
@@ -1872,42 +1872,26 @@ def make_chars(page, clip=None):
1872
1872
  # We are ignoring Bézier curves completely and are converting everything
1873
1873
  # else to lines.
1874
1874
  # -----------------------------------------------------------------------------
1875
- def make_edges(page, clip=None, tset=None):
1876
- def has_text(bbox):
1877
- text = page.get_text(clip=bbox).replace("\n", "").strip()
1878
- if text:
1879
- return True
1880
- return False
1881
-
1875
+ def make_edges(page, clip=None, tset=None, add_lines=None):
1882
1876
  def clean_graphics():
1883
1877
  """Detect and join rectangles of connected vector graphics."""
1884
- # we need to exclude meaningless graphics that e.g. paint a white
1885
- # rectangle on the full page.
1886
-
1887
- parea = abs(page.rect) * 0.8 # area of the full page (80%)
1888
-
1878
+ lines_strict = (
1879
+ tset.vertical_strategy == "lines_strict"
1880
+ or tset.horizontal_strategy == "lines_strict"
1881
+ )
1889
1882
  # exclude irrelevant graphics
1890
1883
  paths = []
1891
1884
  for p in page.get_drawings():
1892
- if abs(p["rect"]) >= parea:
1893
- continue
1894
- if "s" in p["type"]:
1895
- paths.append(p)
1896
- continue
1897
1885
  if (
1898
- p["rect"].width > 3
1899
- and p["rect"].height > 3
1900
- and (
1901
- tset.vertical_strategy == "lines_strict"
1902
- or tset.horizontal_strategy == "lines_strict"
1903
- )
1904
- ):
1886
+ p["type"] == "f"
1887
+ and lines_strict
1888
+ and p["rect"].width > tset.snap_x_tolerance
1889
+ and p["rect"].height > tset.snap_y_tolerance
1890
+ ): # ignore fill-only graphics if they are no lines
1905
1891
  continue
1906
1892
  paths.append(p)
1907
1893
 
1908
- # make a list of vector graphics rectangles (IRects are sufficient)
1909
1894
  prects = sorted([p["rect"] for p in paths], key=lambda r: (r.y1, r.x0))
1910
-
1911
1895
  new_rects = [] # the final list of joined rectangles
1912
1896
 
1913
1897
  # -------------------------------------------------------------------------
@@ -1921,9 +1905,15 @@ def make_edges(page, clip=None, tset=None):
1921
1905
  for i in range(len(prects) - 1, -1, -1): # run backwards
1922
1906
  if i == 0: # don't touch first rectangle
1923
1907
  continue
1924
- if r.intersects(prects[i]):
1925
- r |= prects[i] # join in to first rect
1926
- prects[0] = +r # update first
1908
+ ri = prects[i]
1909
+ if (
1910
+ r.x0 <= ri.x0 <= r.x1
1911
+ or r.x0 <= ri.x1 <= r.x1
1912
+ or r.y0 <= ri.y0 <= r.y1
1913
+ or r.y0 <= ri.y1 <= r.y1
1914
+ ):
1915
+ r |= ri # join in to first rect
1916
+ prects[0] = r # update first
1927
1917
  del prects[i] # delete this rect
1928
1918
  repeat = True
1929
1919
 
@@ -1932,9 +1922,7 @@ def make_edges(page, clip=None, tset=None):
1932
1922
  prects = sorted(list(set(prects)), key=lambda r: (r.y1, r.x0))
1933
1923
 
1934
1924
  new_rects = sorted(list(set(new_rects)), key=lambda r: (r.y1, r.x0))
1935
- return [
1936
- r for r in new_rects if r.width > 5 and r.height > 5 and has_text(r)
1937
- ], paths
1925
+ return [r for r in new_rects if r.width > 5 and r.height > 5], paths
1938
1926
 
1939
1927
  global EDGES
1940
1928
  bboxes, paths = clean_graphics()
@@ -2091,18 +2079,33 @@ def make_edges(page, clip=None, tset=None):
2091
2079
  EDGES.append(line_to_edge(line_dict))
2092
2080
 
2093
2081
  path = {"color": (0, 0, 0), "fill": None, "width": 1}
2094
- for bbox in bboxes:
2082
+ for bbox in bboxes: # add the border lines for all enveloping bboxes
2095
2083
  line_dict = make_line(path, bbox.tl, bbox.tr, clip)
2096
- EDGES.append(line_to_edge(line_dict))
2084
+ if line_dict:
2085
+ EDGES.append(line_to_edge(line_dict))
2097
2086
 
2098
2087
  line_dict = make_line(path, bbox.bl, bbox.br, clip)
2099
- EDGES.append(line_to_edge(line_dict))
2088
+ if line_dict:
2089
+ EDGES.append(line_to_edge(line_dict))
2100
2090
 
2101
2091
  line_dict = make_line(path, bbox.tl, bbox.bl, clip)
2102
- EDGES.append(line_to_edge(line_dict))
2092
+ if line_dict:
2093
+ EDGES.append(line_to_edge(line_dict))
2103
2094
 
2104
2095
  line_dict = make_line(path, bbox.tr, bbox.br, clip)
2105
- EDGES.append(line_to_edge(line_dict))
2096
+ if line_dict:
2097
+ EDGES.append(line_to_edge(line_dict))
2098
+
2099
+ if add_lines is not None: # add user-specified lines
2100
+ assert isinstance(add_lines, (tuple, list))
2101
+ else:
2102
+ add_lines = []
2103
+ for p1, p2 in add_lines:
2104
+ p1 = Point(p1)
2105
+ p2 = Point(p2)
2106
+ line_dict = make_line(path, p1, p2, clip)
2107
+ if line_dict:
2108
+ EDGES.append(line_to_edge(line_dict))
2106
2109
 
2107
2110
 
2108
2111
  def page_rotation_set0(page):
@@ -2152,7 +2155,7 @@ def page_rotation_set0(page):
2152
2155
  def page_rotation_reset(page, xref, rot, mediabox):
2153
2156
  """Reset page rotation to original values.
2154
2157
 
2155
- To be used before we return tabes."""
2158
+ To be used before we return tables."""
2156
2159
  doc = page.parent # document of the page
2157
2160
  doc.update_stream(xref, b" ") # remove de-rotation matrix
2158
2161
  page.set_mediabox(mediabox) # set mediabox to old value
@@ -2185,6 +2188,7 @@ def find_tables(
2185
2188
  text_x_tolerance=3,
2186
2189
  text_y_tolerance=3,
2187
2190
  strategy=None, # offer abbreviation
2191
+ add_lines=None, # optional user-specified lines
2188
2192
  ):
2189
2193
  global CHARS, EDGES
2190
2194
  CHARS = []
@@ -2237,7 +2241,9 @@ def find_tables(
2237
2241
  page.table_settings = tset
2238
2242
 
2239
2243
  make_chars(page, clip=clip) # create character list of page
2240
- make_edges(page, clip=clip, tset=tset) # create lines and curves
2244
+ make_edges(
2245
+ page, clip=clip, tset=tset, add_lines=add_lines
2246
+ ) # create lines and curves
2241
2247
  tables = TableFinder(page, settings=tset)
2242
2248
 
2243
2249
  TOOLS.set_small_glyph_heights(old_small)
@@ -0,0 +1,7 @@
1
+ %pythoncode %{
2
+ VersionFitz = "1.23.9" # MuPDF version.
3
+ VersionBind = "1.23.19" # PyMuPDF version.
4
+ VersionDate = "2024-01-25 00:00:01"
5
+ version = (VersionBind, VersionFitz, "20240125000001")
6
+ pymupdf_version_tuple = tuple( [int(i) for i in VersionFitz.split('.')])
7
+ %}
@@ -16,6 +16,13 @@ def test_font1():
16
16
  assert cl[i] == font.glyph_advance(ord(text[i])) * 20
17
17
  font2 = fitz.Font(fontbuffer=font.buffer)
18
18
  assert font2.valid_codepoints() == font.valid_codepoints()
19
+
20
+ # Also check we can get font's bbox.
21
+ bbox1 = font.bbox
22
+ print(f'{bbox1=}')
23
+ if hasattr(fitz, 'mupdf'):
24
+ bbox2 = font.this.fz_font_bbox()
25
+ assert bbox2 == bbox1
19
26
 
20
27
 
21
28
  def test_font2():
@@ -45,3 +45,22 @@ def test_compress():
45
45
  deflate_fonts=True,
46
46
  pretty=True,
47
47
  )
48
+
49
+ def test_3087():
50
+ path = os.path.abspath(f'{__file__}/../../tests/resources/test_3087.pdf')
51
+
52
+ doc = fitz.open(path)
53
+ page = doc[0]
54
+ print(page.get_images())
55
+ base = doc.extract_image(5)["image"]
56
+ mask = doc.extract_image(5)["image"]
57
+ page = doc.new_page()
58
+ page.insert_image(page.rect, stream=base, mask=mask)
59
+
60
+ doc = fitz.open(path)
61
+ page = doc[0]
62
+ print(page.get_images())
63
+ base = doc.extract_image(5)["image"]
64
+ mask = doc.extract_image(6)["image"]
65
+ page = doc.new_page()
66
+ page.insert_image(page.rect, stream=base, mask=mask)
@@ -13,6 +13,8 @@ Then delete some pages and verify:
13
13
  - the remaining TOC items still point to the correct page
14
14
  - the document has no more links at all
15
15
  """
16
+ import os
17
+
16
18
  import fitz
17
19
 
18
20
  page_count = 100 # initial document length
@@ -67,3 +69,9 @@ def test_deletion():
67
69
  doc.copy_page(0)
68
70
  doc.move_page(0)
69
71
  doc.fullcopy_page(0)
72
+
73
+ def test_3094():
74
+ path = os.path.abspath(f'{__file__}/../../tests/resources/test_2871.pdf')
75
+ document = fitz.open(path)
76
+ pnos = [i for i in range(0, document.page_count, 2)]
77
+ document.delete_pages(pnos)
@@ -182,3 +182,35 @@ def test_3062():
182
182
  tab1 = page.find_tables()[0]
183
183
  cells1 = tab1.cells
184
184
  assert cells1 == cells0
185
+
186
+
187
+ def test_strict_lines():
188
+ """Confirm that ignoring borderless rectangles improves table detection."""
189
+ filename = os.path.join(scriptdir, "resources", "strict-yes-no.pdf")
190
+ doc = fitz.open(filename)
191
+ page = doc[0]
192
+
193
+ tab1 = page.find_tables()[0]
194
+ tab2 = page.find_tables(strategy="lines_strict")[0]
195
+ assert tab2.row_count < tab1.row_count
196
+ assert tab2.col_count < tab1.col_count
197
+
198
+
199
+ def test_add_lines():
200
+ """Test new parameter add_lines for table recognition."""
201
+ filename = os.path.join(scriptdir, "resources", "small-table.pdf")
202
+ doc = fitz.open(filename)
203
+ page = doc[0]
204
+ tab1 = page.find_tables()[0]
205
+ assert tab1.col_count == 1
206
+ assert tab1.row_count == 5
207
+ more_lines = [
208
+ ((238.9949951171875, 200.0), (238.9949951171875, 300.0)),
209
+ ((334.5559997558594, 200.0), (334.5559997558594, 300.0)),
210
+ ((433.1809997558594, 200.0), (433.1809997558594, 300.0)),
211
+ ]
212
+
213
+ # these 3 additional vertical lines should additional 3 columns
214
+ tab2 = page.find_tables(add_lines=more_lines)[0]
215
+ assert tab2.col_count == 4
216
+ assert tab2.row_count == 5
@@ -1,7 +0,0 @@
1
- %pythoncode %{
2
- VersionFitz = "1.23.9" # MuPDF version.
3
- VersionBind = "1.23.18" # PyMuPDF version.
4
- VersionDate = "2024-01-23 00:00:01"
5
- version = (VersionBind, VersionFitz, "20240123000001")
6
- pymupdf_version_tuple = tuple( [int(i) for i in VersionFitz.split('.')])
7
- %}
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes