PyMuPDF 1.23.12__tar.gz → 1.23.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/PKG-INFO +1 -1
  2. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/changes.txt +25 -0
  3. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/setup.py +1 -1
  4. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src/__init__.py +20 -15
  5. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src/table.py +274 -76
  6. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/fitz_old.i +2 -2
  7. PyMuPDF-1.23.14/src_classic/version.i +7 -0
  8. PyMuPDF-1.23.14/tests/resources/test_2979.pdf +0 -0
  9. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_general.py +7 -0
  10. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_tables.py +21 -0
  11. PyMuPDF-1.23.12/src_classic/version.i +0 -7
  12. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/COPYING +0 -0
  13. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/MANIFEST.in +0 -0
  14. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/README.md +0 -0
  15. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/READMErb.md +0 -0
  16. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/mupdf.tgz +0 -0
  17. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/pipcl.py +0 -0
  18. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/pyproject.toml +0 -0
  19. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/pytest.ini +0 -0
  20. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/scripts/gh_release.py +0 -0
  21. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/scripts/sysinstall.py +0 -0
  22. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/scripts/test.py +0 -0
  23. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src/__main__.py +0 -0
  24. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src/extra.i +0 -0
  25. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src/fitz.py +0 -0
  26. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src/utils.py +0 -0
  27. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/__init__.py +0 -0
  28. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/__main__.py +0 -0
  29. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/_config.h +0 -0
  30. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-annot.i +0 -0
  31. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-convert.i +0 -0
  32. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-defines.i +0 -0
  33. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-devices.i +0 -0
  34. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-fields.i +0 -0
  35. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-fileobj.i +0 -0
  36. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-geo-c.i +0 -0
  37. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-geo-py.i +0 -0
  38. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-globals.i +0 -0
  39. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-other.i +0 -0
  40. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-pdfinfo.i +0 -0
  41. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-pixmap.i +0 -0
  42. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-portfolio.i +0 -0
  43. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-python.i +0 -0
  44. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-select.i +0 -0
  45. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-stext.i +0 -0
  46. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/helper-xobject.i +0 -0
  47. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/src_classic/utils.py +0 -0
  48. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/README.md +0 -0
  49. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/001003ED.pdf +0 -0
  50. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/1.pdf +0 -0
  51. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/2.pdf +0 -0
  52. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/2201.00069.pdf +0 -0
  53. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/3.pdf +0 -0
  54. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/4.pdf +0 -0
  55. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/Bezier.epub +0 -0
  56. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/PragmaticaC.otf +0 -0
  57. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/bug1945.pdf +0 -0
  58. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/bug1971.pdf +0 -0
  59. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/chinese-tables.pdf +0 -0
  60. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/chinese-tables.pickle +0 -0
  61. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/circular-toc.pdf +0 -0
  62. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/cython.pdf +0 -0
  63. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/cython.pickle +0 -0
  64. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/full_toc.txt +0 -0
  65. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/full_toc2.txt +0 -0
  66. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/github_sample.pdf +0 -0
  67. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/has-bad-fonts.pdf +0 -0
  68. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/image-file1.pdf +0 -0
  69. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/img-transparent.png +0 -0
  70. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/joined.pdf +0 -0
  71. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/metadata.txt +0 -0
  72. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/mupdf_explored.pdf +0 -0
  73. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/nur-ruhig.jpg +0 -0
  74. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/quad-calc-0.pdf +0 -0
  75. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/simple_toc.txt +0 -0
  76. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/symbol-list.pdf +0 -0
  77. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/symbols.txt +0 -0
  78. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test-2333.pdf +0 -0
  79. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test-2462.pdf +0 -0
  80. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test2093.pdf +0 -0
  81. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test2182.pdf +0 -0
  82. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test2238.pdf +0 -0
  83. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_1645_expected.pdf +0 -0
  84. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_1645_expected_1.22.pdf +0 -0
  85. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_1824.pdf +0 -0
  86. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2108.pdf +0 -0
  87. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2270.pdf +0 -0
  88. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2533.pdf +0 -0
  89. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2548.pdf +0 -0
  90. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2553-2.pdf +0 -0
  91. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2553.pdf +0 -0
  92. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2596.pdf +0 -0
  93. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2608_expected +0 -0
  94. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2634.pdf +0 -0
  95. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2635.pdf +0 -0
  96. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2645_1.pdf +0 -0
  97. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2645_2.pdf +0 -0
  98. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2645_3.pdf +0 -0
  99. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2710.pdf +0 -0
  100. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2730.pdf +0 -0
  101. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2788.pdf +0 -0
  102. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2791_content.pdf +0 -0
  103. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2791_coverpage.pdf +0 -0
  104. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2861.pdf +0 -0
  105. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2871.pdf +0 -0
  106. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2904.pdf +0 -0
  107. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2907.pdf +0 -0
  108. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2954.pdf +0 -0
  109. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2957_1.pdf +0 -0
  110. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2957_2.pdf +0 -0
  111. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_2969.pdf +0 -0
  112. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/test_delete_image.pdf +0 -0
  113. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/type3font.pdf +0 -0
  114. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/v110-changes.pdf +0 -0
  115. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/resources/widgettest.pdf +0 -0
  116. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/run_compound.py +0 -0
  117. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_2548.py +0 -0
  118. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_2634.py +0 -0
  119. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_2791.py +0 -0
  120. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_2904.py +0 -0
  121. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_2907.py +0 -0
  122. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_annots.py +0 -0
  123. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_badfonts.py +0 -0
  124. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_crypting.py +0 -0
  125. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_docs_samples.py +0 -0
  126. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_drawings.py +0 -0
  127. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_embeddedfiles.py +0 -0
  128. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_extractimage.py +0 -0
  129. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_flake8.py +0 -0
  130. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_font.py +0 -0
  131. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_geometry.py +0 -0
  132. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_imagebbox.py +0 -0
  133. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_insertimage.py +0 -0
  134. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_insertpdf.py +0 -0
  135. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_linequad.py +0 -0
  136. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_metadata.py +0 -0
  137. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_named_links.py +0 -0
  138. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_nonpdf.py +0 -0
  139. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_object_manipulation.py +0 -0
  140. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_optional_content.py +0 -0
  141. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_pagedelete.py +0 -0
  142. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_pagelabels.py +0 -0
  143. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_pixmap.py +0 -0
  144. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_showpdfpage.py +0 -0
  145. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_story.py +0 -0
  146. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_tesseract.py +0 -0
  147. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_textbox.py +0 -0
  148. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_textextract.py +0 -0
  149. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_textsearch.py +0 -0
  150. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_toc.py +0 -0
  151. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_widgets.py +0 -0
  152. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/tests/test_word_delimiters.py +0 -0
  153. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/valgrind.supp +0 -0
  154. {PyMuPDF-1.23.12 → PyMuPDF-1.23.14}/wdev.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PyMuPDF
3
- Version: 1.23.12
3
+ Version: 1.23.14
4
4
  Summary: A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents.
5
5
  Description-Content-Type: text/markdown
6
6
  Author: Artifex
@@ -2,6 +2,31 @@ Change Log
2
2
  ==========
3
3
 
4
4
 
5
+ **Changes in version 1.23.14 (2024-01-15)**
6
+
7
+ * Bug fixes:
8
+
9
+ * **Fixed** `3038 <https://github.com/pymupdf/PyMuPDF/issues/3038>`_: JM_pixmap_from_display_list > Assertion Error : Checking for wrong type
10
+ * **Fixed** `3039 <https://github.com/pymupdf/PyMuPDF/issues/3039>`_: Issue with doc.close() not closing the document in PyMuPDF
11
+
12
+ * Other:
13
+
14
+ * Ensure valid "re" rectangles in `Page.get_drawings()` with derotated pages.
15
+
16
+
17
+ **Changes in version 1.23.13 (2024-01-15)**
18
+
19
+ * Bug fixes:
20
+
21
+ * **Fixed** `2979 <https://github.com/pymupdf/PyMuPDF/issues/2979>`_: list index out of range in to_pandas()
22
+ * **Fixed** `3001 <https://github.com/pymupdf/PyMuPDF/issues/3001>`_: Calling find_tables() on one document alters the bounding boxes of a subsequent document
23
+
24
+ * Other:
25
+
26
+ * Fixed `Rect.height` and `Rect.width` to never return negative values.
27
+ * Fixed `TextPage.extractIMGINFO()`'s returned `dictkey_yres` value.
28
+
29
+
5
30
  **Changes in version 1.23.12 (2024-01-12)**
6
31
 
7
32
  * * **Fixed** `3027 <https://github.com/pymupdf/PyMuPDF/issues/3027>`_: Page.get_text throws Attribute Error for 'parent'
@@ -1072,7 +1072,7 @@ with open( f'{g_root}/READMErb.md', encoding='utf-8') as f:
1072
1072
  # We generate different wheels depending on g_flavour.
1073
1073
  #
1074
1074
 
1075
- version = '1.23.12'
1075
+ version = '1.23.14'
1076
1076
  version_b = '1.23.9'
1077
1077
 
1078
1078
  tag_python = None
@@ -2460,7 +2460,9 @@ class DisplayList:
2460
2460
  assert 0, f'Unrecognised {args=}'
2461
2461
 
2462
2462
  def get_pixmap(self, matrix=None, colorspace=None, alpha=0, clip=None):
2463
- if not colorspace:
2463
+ if isinstance(colorspace, Colorspace):
2464
+ colorspace = colorspace.this
2465
+ else:
2464
2466
  colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
2465
2467
  val = JM_pixmap_from_display_list(self.this, matrix, colorspace, alpha, clip, None)
2466
2468
  val.thisown = True
@@ -3558,16 +3560,12 @@ class Document:
3558
3560
  #self._reset_page_refs()
3559
3561
  #self.metadata = None
3560
3562
  #self.stream = None
3561
- #self.is_closed = True
3563
+ self.is_closed = True
3562
3564
  #self.FontInfos = []
3563
3565
  #self.Graftmaps = {}
3564
3566
  #self.ShownPages = {}
3565
3567
  #self.InsertedImages = {}
3566
-
3567
3568
  #self.this = None
3568
- self.close_internal()
3569
-
3570
- def close_internal(self):
3571
3569
  self.this = None
3572
3570
 
3573
3571
  def convert_to_pdf(self, from_page=0, to_page=-1, rotate=0):
@@ -8673,7 +8671,7 @@ class Page:
8673
8671
  cmd = item[0]
8674
8672
  rest = item[1:]
8675
8673
  if cmd == "re":
8676
- item = ("re", Rect(rest[0]), rest[1])
8674
+ item = ("re", Rect(rest[0]).normalize(), rest[1])
8677
8675
  elif cmd == "qu":
8678
8676
  item = ("qu", Quad(rest[0]))
8679
8677
  else:
@@ -8807,7 +8805,7 @@ class Page:
8807
8805
  cmd = item[0]
8808
8806
  rest = item[1:]
8809
8807
  if cmd == "re":
8810
- item = ("re", Rect(rest[0]), rest[1])
8808
+ item = ("re", Rect(rest[0]).normalize(), rest[1])
8811
8809
  elif cmd == "qu":
8812
8810
  item = ("qu", Quad(rest[0]))
8813
8811
  else:
@@ -10778,6 +10776,10 @@ class Rect:
10778
10776
  """Check if containing point-like or rect-like x."""
10779
10777
  return self.__contains__(x)
10780
10778
 
10779
+ @property
10780
+ def height(self):
10781
+ return max(0, self.y1 - self.y0)
10782
+
10781
10783
  def include_point(self, p):
10782
10784
  """Extend to include point-like p."""
10783
10785
  if len(p) != 2:
@@ -10899,15 +10901,17 @@ class Rect:
10899
10901
  self.x0, self.y0, self.x1, self.y1 = util_transform_rect(self, m)
10900
10902
  return self
10901
10903
 
10904
+ @property
10905
+ def width(self):
10906
+ return max(0, self.x1 - self.x0)
10907
+
10902
10908
  __div__ = __truediv__
10903
10909
 
10904
10910
  bl = bottom_left
10905
10911
  br = bottom_right
10906
- height = property(lambda self: abs(self.y1 - self.y0))
10907
10912
  irect = property(round)
10908
10913
  tl = top_left
10909
10914
  tr = top_right
10910
- width = property(lambda self: abs(self.x1 - self.x0))
10911
10915
 
10912
10916
 
10913
10917
  class Shape:
@@ -12518,7 +12522,7 @@ class TextPage:
12518
12522
  block_dict[ dictkey_colorspace] = mupdf.fz_colorspace_n(cs)
12519
12523
  block_dict[ dictkey_cs_name] = mupdf.fz_colorspace_name(cs)
12520
12524
  block_dict[ dictkey_xres] = img.xres()
12521
- block_dict[ dictkey_yres] = img.xres() # fixme: shouldn't this be img.yres()?
12525
+ block_dict[ dictkey_yres] = img.yres()
12522
12526
  block_dict[ dictkey_bpc] = img.bpc()
12523
12527
  block_dict[ dictkey_size] = mupdf.fz_image_size(img)
12524
12528
  if hashes:
@@ -16786,8 +16790,9 @@ def JM_pixmap_from_display_list(
16786
16790
  rect = mupdf.fz_transform_rect(rect, matrix)
16787
16791
  irect = mupdf.fz_round_rect(rect)
16788
16792
 
16789
- assert isinstance( cs, Colorspace)
16790
- pix = mupdf.fz_new_pixmap_with_bbox(cs.this, irect, seps, alpha)
16793
+ assert isinstance( cs, mupdf.FzColorspace)
16794
+
16795
+ pix = mupdf.fz_new_pixmap_with_bbox(cs, irect, seps, alpha)
16791
16796
  if alpha:
16792
16797
  mupdf.fz_clear_pixmap(pix)
16793
16798
  else:
@@ -21734,8 +21739,8 @@ def int_rc(text):
21734
21739
  return int(text)
21735
21740
 
21736
21741
  VersionFitz = "1.23.9" # MuPDF version.
21737
- VersionBind = "1.23.12" # PyMuPDF version.
21738
- VersionDate = "2024-01-12 00:00:01"
21742
+ VersionBind = "1.23.14" # PyMuPDF version.
21743
+ VersionDate = "2024-01-15 00:00:01"
21739
21744
  VersionDate2 = VersionDate.replace('-', '').replace(' ', '').replace(':', '')
21740
21745
  version = (VersionBind, VersionFitz, VersionDate2)
21741
21746
  pymupdf_version_tuple = tuple( [int_rc(i) for i in VersionBind.split('.')])
@@ -71,8 +71,10 @@ This is implemented as new class TableHeader with the properties:
71
71
  * external: A bool indicating whether the header is outside the table cells.
72
72
 
73
73
  """
74
+ import inspect
74
75
  import itertools
75
76
  import string
77
+ from collections.abc import Sequence
76
78
  from dataclasses import dataclass
77
79
  from operator import itemgetter
78
80
 
@@ -91,7 +93,6 @@ from . import (
91
93
 
92
94
  EDGES = [] # vector graphics from PyMuPDF
93
95
  CHARS = [] # text characters from PyMuPDF
94
- TEXTPAGE = None # TextPage of the page for optimized extraction
95
96
  # -------------------------------------------------------------------
96
97
  # End of PyMuPDF interface code
97
98
  # -------------------------------------------------------------------
@@ -141,6 +142,18 @@ LIGATURES = {
141
142
  }
142
143
 
143
144
 
145
+ def to_list(collection) -> list:
146
+ if isinstance(collection, list):
147
+ return collection
148
+ elif isinstance(collection, Sequence):
149
+ return list(collection)
150
+ elif hasattr(collection, "to_dict"):
151
+ res = collection.to_dict("records") # pragma: nocover
152
+ return res
153
+ else:
154
+ return list(collection)
155
+
156
+
144
157
  class TextMap:
145
158
  """
146
159
  A TextMap maps each unicode character in the text to an individual `char`
@@ -546,6 +559,78 @@ def extract_words(chars: list, **kwargs) -> list:
546
559
  return WordExtractor(**kwargs).extract_words(chars)
547
560
 
548
561
 
562
+ TEXTMAP_KWARGS = inspect.signature(WordMap.to_textmap).parameters.keys()
563
+ WORD_EXTRACTOR_KWARGS = inspect.signature(WordExtractor).parameters.keys()
564
+
565
+
566
+ def chars_to_textmap(chars: list, **kwargs) -> TextMap:
567
+ kwargs.update({"presorted": True})
568
+
569
+ extractor = WordExtractor(
570
+ **{k: kwargs[k] for k in WORD_EXTRACTOR_KWARGS if k in kwargs}
571
+ )
572
+ wordmap = extractor.extract_wordmap(chars)
573
+ textmap = wordmap.to_textmap(
574
+ **{k: kwargs[k] for k in TEXTMAP_KWARGS if k in kwargs}
575
+ )
576
+
577
+ return textmap
578
+
579
+
580
+ def extract_text(chars: list, **kwargs) -> str:
581
+ chars = to_list(chars)
582
+ if len(chars) == 0:
583
+ return ""
584
+
585
+ if kwargs.get("layout"):
586
+ return chars_to_textmap(chars, **kwargs).as_string
587
+ else:
588
+ y_tolerance = kwargs.get("y_tolerance", DEFAULT_Y_TOLERANCE)
589
+ extractor = WordExtractor(
590
+ **{k: kwargs[k] for k in WORD_EXTRACTOR_KWARGS if k in kwargs}
591
+ )
592
+ words = extractor.extract_words(chars)
593
+ lines = cluster_objects(words, itemgetter("doctop"), y_tolerance)
594
+ return "\n".join(" ".join(word["text"] for word in line) for line in lines)
595
+
596
+
597
+ def collate_line(
598
+ line_chars: list,
599
+ tolerance=DEFAULT_X_TOLERANCE,
600
+ ) -> str:
601
+ coll = ""
602
+ last_x1 = None
603
+ for char in sorted(line_chars, key=itemgetter("x0")):
604
+ if (last_x1 is not None) and (char["x0"] > (last_x1 + tolerance)):
605
+ coll += " "
606
+ last_x1 = char["x1"]
607
+ coll += char["text"]
608
+ return coll
609
+
610
+
611
+ def dedupe_chars(chars: list, tolerance=1) -> list:
612
+ """
613
+ Removes duplicate chars — those sharing the same text, fontname, size,
614
+ and positioning (within `tolerance`) as other characters in the set.
615
+ """
616
+ key = itemgetter("fontname", "size", "upright", "text")
617
+ pos_key = itemgetter("doctop", "x0")
618
+
619
+ def yield_unique_chars(chars: list):
620
+ sorted_chars = sorted(chars, key=key)
621
+ for grp, grp_chars in itertools.groupby(sorted_chars, key=key):
622
+ for y_cluster in cluster_objects(
623
+ list(grp_chars), itemgetter("doctop"), tolerance
624
+ ):
625
+ for x_cluster in cluster_objects(
626
+ y_cluster, itemgetter("x0"), tolerance
627
+ ):
628
+ yield sorted(x_cluster, key=pos_key)[0]
629
+
630
+ deduped = yield_unique_chars(chars)
631
+ return sorted(deduped, key=chars.index)
632
+
633
+
549
634
  def line_to_edge(line):
550
635
  edge = dict(line)
551
636
  edge["orientation"] = "h" if (line["top"] == line["bottom"]) else "v"
@@ -1162,35 +1247,22 @@ class Table(object):
1162
1247
 
1163
1248
  @property
1164
1249
  def bbox(self):
1165
- """Original replaced by PyMuPDF"""
1166
- rect = EMPTY_RECT()
1167
- for c in self.cells:
1168
- rect |= c
1169
- return tuple(rect)
1250
+ c = self.cells
1251
+ return (
1252
+ min(map(itemgetter(0), c)),
1253
+ min(map(itemgetter(1), c)),
1254
+ max(map(itemgetter(2), c)),
1255
+ max(map(itemgetter(3), c)),
1256
+ )
1170
1257
 
1171
1258
  @property
1172
1259
  def rows(self) -> list:
1173
- """Assign table cells to row cells observing page rotation"""
1174
- rot = self.page.rotation
1175
- if rot == 0:
1176
- # sort by y, then by x
1177
- i1, i2, f1, f2 = 1, 0, 1, 1
1178
- elif rot == 90:
1179
- # sort by x, then by y (desc)
1180
- i1, i2, f1, f2 = 0, 1, -1, 1
1181
- elif rot == 270:
1182
- # sort by x (desc), then by y (asc)
1183
- i1, i2, f1, f2 = 0, 1, 1, -1
1184
- elif rot == 180:
1185
- # sort by y (desc), then by x (desc)
1186
- i1, i2, f1, f2 = 1, 0, -1, -1
1187
-
1188
- xs = sorted(list(set([c[i1] for c in self.cells])), key=lambda x: f2 * x)
1260
+ _sorted = sorted(self.cells, key=itemgetter(1, 0))
1261
+ xs = list(sorted(set(map(itemgetter(0), self.cells))))
1189
1262
  rows = []
1190
- for x in xs:
1191
- row = TableRow(
1192
- sorted([c for c in self.cells if c[i1] == x], key=lambda c: f1 * c[i2])
1193
- )
1263
+ for y, row_cells in itertools.groupby(_sorted, itemgetter(1)):
1264
+ xdict = {cell[0]: cell for cell in row_cells}
1265
+ row = TableRow([xdict.get(x) for x in xs])
1194
1266
  rows.append(row)
1195
1267
  return rows
1196
1268
 
@@ -1202,55 +1274,46 @@ class Table(object):
1202
1274
  def col_count(self) -> int: # PyMuPDF extension
1203
1275
  return max([len(r.cells) for r in self.rows])
1204
1276
 
1205
- def extract(self) -> list:
1206
- """Extract the cell text for the comple table.
1207
-
1208
- Complete replacement by PyMuPDF text extraction.
1209
- """
1210
- global TEXTPAGE
1211
-
1212
- def get_text(cell):
1213
- """Accept char bbox areas with a cell overlap of at least 50%."""
1214
- cell = Rect(cell) # we need a Rect object
1215
- text = "" # result text
1216
- for block in TEXTPAGE.extractRAWDICT()["blocks"]:
1217
- if Rect(block["bbox"]).intersect(cell).is_empty:
1218
- continue
1219
- for line in block["lines"]:
1220
- if Rect(line["bbox"]).intersect(cell).is_empty:
1221
- continue
1222
- for span in line["spans"]:
1223
- chars = span["chars"]
1224
- if text and chars:
1225
- text += "\n" # new span appended after linebreak
1226
- for char in chars:
1227
- bbox = Rect(char["bbox"])
1228
- if abs(bbox & cell) < 0.5 * abs(bbox):
1229
- continue
1230
- text += char["c"]
1231
-
1232
- # no final line break, no wrapping spaces
1233
- return text.rstrip("\n").strip()
1277
+ def extract(self, **kwargs) -> list:
1278
+ chars = CHARS
1279
+ table_arr = []
1234
1280
 
1235
- table_arr = [] # final result
1281
+ def char_in_bbox(char, bbox) -> bool:
1282
+ v_mid = (char["top"] + char["bottom"]) / 2
1283
+ h_mid = (char["x0"] + char["x1"]) / 2
1284
+ x0, top, x1, bottom = bbox
1285
+ return bool(
1286
+ (h_mid >= x0) and (h_mid < x1) and (v_mid >= top) and (v_mid < bottom)
1287
+ )
1236
1288
 
1237
1289
  for row in self.rows:
1238
- arr = [] # text in this row
1290
+ arr = []
1291
+ row_chars = [char for char in chars if char_in_bbox(char, row.bbox)]
1292
+
1239
1293
  for cell in row.cells:
1240
1294
  if cell is None:
1241
1295
  cell_text = None
1242
1296
  else:
1243
- cell_text = get_text(cell)
1297
+ cell_chars = [
1298
+ char for char in row_chars if char_in_bbox(char, cell)
1299
+ ]
1300
+
1301
+ if len(cell_chars):
1302
+ kwargs["x_shift"] = cell[0]
1303
+ kwargs["y_shift"] = cell[1]
1304
+ if "layout" in kwargs:
1305
+ kwargs["layout_width"] = cell[2] - cell[0]
1306
+ kwargs["layout_height"] = cell[3] - cell[1]
1307
+ cell_text = extract_text(cell_chars, **kwargs)
1308
+ else:
1309
+ cell_text = ""
1244
1310
  arr.append(cell_text)
1245
1311
  table_arr.append(arr)
1246
1312
 
1247
1313
  return table_arr
1248
1314
 
1249
- def to_pandas(self):
1250
- """Return a pandas DataFrame version of the table.
1251
-
1252
- This is original PyMuPDF code.
1253
- """
1315
+ def to_pandas(self, **kwargs):
1316
+ """Return a pandas DataFrame version of the table."""
1254
1317
  try:
1255
1318
  import pandas as pd
1256
1319
  except ModuleNotFoundError:
@@ -1362,9 +1425,6 @@ class Table(object):
1362
1425
  cells.append((x0, y0, x1, y1))
1363
1426
  return cells, bbox
1364
1427
 
1365
- # we depend on small glyph heights!
1366
- old_small = TOOLS.set_small_glyph_heights()
1367
- TOOLS.set_small_glyph_heights(True)
1368
1428
  try:
1369
1429
  row = self.rows[0]
1370
1430
  cells = row.cells
@@ -1509,7 +1569,6 @@ class Table(object):
1509
1569
  page.get_textbox(c).replace("\n", " ").replace(" ", " ").strip()
1510
1570
  for c in hdr_cells
1511
1571
  ]
1512
- TOOLS.set_small_glyph_heights(old_small)
1513
1572
  return TableHeader(tuple(hdr_bbox), hdr_cells, hdr_names, True)
1514
1573
 
1515
1574
 
@@ -1756,14 +1815,11 @@ page information themselves.
1756
1815
  # -----------------------------------------------------------------------------
1757
1816
  def make_chars(page, clip=None):
1758
1817
  """Extract text as "rawdict" to fill CHARS."""
1759
- global CHARS, TEXTPAGE
1760
- old_small = TOOLS.set_small_glyph_heights()
1761
- TOOLS.set_small_glyph_heights(True)
1818
+ global CHARS
1762
1819
  page_number = page.number + 1
1763
1820
  page_height = page.rect.height
1764
1821
  ctm = page.transformation_matrix
1765
- TEXTPAGE = page.get_textpage(clip, flags=TEXTFLAGS_TEXT)
1766
- blocks = TEXTPAGE.extractRAWDICT()["blocks"]
1822
+ blocks = page.get_text("rawdict", clip=clip, flags=TEXTFLAGS_TEXT)["blocks"]
1767
1823
  doctop_base = page_height * page.number
1768
1824
  for block in blocks:
1769
1825
  for line in block["lines"]:
@@ -1810,8 +1866,6 @@ def make_chars(page, clip=None):
1810
1866
  }
1811
1867
  CHARS.append(char_dict)
1812
1868
 
1813
- TOOLS.set_small_glyph_heights(old_small)
1814
-
1815
1869
 
1816
1870
  # -----------------------------------------------------------------------------
1817
1871
  # Extract all page vector graphics to fill the EDGES list.
@@ -1819,8 +1873,56 @@ def make_chars(page, clip=None):
1819
1873
  # else to lines.
1820
1874
  # -----------------------------------------------------------------------------
1821
1875
  def make_edges(page, clip=None, tset=None):
1876
+ def has_text(bbox):
1877
+ text = page.get_text(clip=bbox).replace("\n", "").strip()
1878
+ if text:
1879
+ return True
1880
+ return False
1881
+
1882
+ def clean_graphics():
1883
+ """Detect and join rectangles of connected vector graphics."""
1884
+ # we need to exclude meaningless graphics that e.g. paint a white
1885
+ # rectangle on the full page.
1886
+
1887
+ parea = abs(page.rect) * 0.8 # area of the full page (80%)
1888
+
1889
+ # exclude graphics that are too large
1890
+ paths = [p for p in page.get_drawings() if abs(p["rect"]) < parea]
1891
+
1892
+ # make a list of vector graphics rectangles (IRects are sufficient)
1893
+ prects = sorted([p["rect"] for p in paths], key=lambda r: (r.y1, r.x0))
1894
+
1895
+ new_rects = [] # the final list of joined rectangles
1896
+
1897
+ # -------------------------------------------------------------------------
1898
+ # Strategy: join rects that have at least one point in common.
1899
+ # -------------------------------------------------------------------------
1900
+ while prects: # the algorithm will empty this list
1901
+ r = prects[0] # first rectangle
1902
+ repeat = True
1903
+ while repeat:
1904
+ repeat = False
1905
+ for i in range(len(prects) - 1, -1, -1): # run backwards
1906
+ if i == 0: # don't touch first rectangle
1907
+ continue
1908
+ if r.intersects(prects[i]):
1909
+ r |= prects[i] # join in to first rect
1910
+ prects[0] = +r # update first
1911
+ del prects[i] # delete this rect
1912
+ repeat = True
1913
+
1914
+ # move first item over to result list
1915
+ new_rects.append(prects.pop(0))
1916
+ prects = sorted(list(set(prects)), key=lambda r: (r.y1, r.x0))
1917
+
1918
+ new_rects = sorted(list(set(new_rects)), key=lambda r: (r.y1, r.x0))
1919
+ return [
1920
+ r for r in new_rects if r.width > 5 and r.height > 5 and has_text(r)
1921
+ ], paths
1922
+
1822
1923
  global EDGES
1823
- paths = page.get_drawings()
1924
+ bboxes, paths = clean_graphics()
1925
+
1824
1926
  page_height = page.rect.height
1825
1927
  doctop_basis = page.number * page_height
1826
1928
  page_number = page.number + 1
@@ -1896,6 +1998,14 @@ def make_edges(page, clip=None, tset=None):
1896
1998
  return line_dict
1897
1999
 
1898
2000
  for p in paths:
2001
+ if p["type"] == "f" and p["fill"] == (1, 1, 1):
2002
+ continue
2003
+ if p["type"] == "f" and p["rect"].width > 3 and p["rect"].height > 3:
2004
+ if (
2005
+ tset.vertical_strategy == "lines_strict"
2006
+ or tset.horizontal_strategy == "lines_strict"
2007
+ ):
2008
+ continue
1899
2009
  items = p["items"] # items in this path
1900
2010
 
1901
2011
  # if 'closePath', add a line from last to first point
@@ -1913,7 +2023,7 @@ def make_edges(page, clip=None, tset=None):
1913
2023
  EDGES.append(line_to_edge(line_dict))
1914
2024
 
1915
2025
  elif i[0] == "re": # a rectangle: decompose into 4 lines
1916
- rect = i[1] # rectangle itself
2026
+ rect = i[1].normalize() # rectangle itself
1917
2027
  # ignore minute rectangles
1918
2028
  if rect.height <= y_tolerance and rect.width <= x_tolerance:
1919
2029
  continue
@@ -1972,6 +2082,77 @@ def make_edges(page, clip=None, tset=None):
1972
2082
  if line_dict:
1973
2083
  EDGES.append(line_to_edge(line_dict))
1974
2084
 
2085
+ path = {"color": (0, 0, 0), "fill": None, "width": 1}
2086
+ for bbox in bboxes:
2087
+ line_dict = make_line(path, bbox.tl, bbox.tr, clip)
2088
+ EDGES.append(line_to_edge(line_dict))
2089
+
2090
+ line_dict = make_line(path, bbox.bl, bbox.br, clip)
2091
+ EDGES.append(line_to_edge(line_dict))
2092
+
2093
+ line_dict = make_line(path, bbox.tl, bbox.bl, clip)
2094
+ EDGES.append(line_to_edge(line_dict))
2095
+
2096
+ line_dict = make_line(path, bbox.tr, bbox.br, clip)
2097
+ EDGES.append(line_to_edge(line_dict))
2098
+
2099
+
2100
+ def page_rotation_set0(page):
2101
+ """Nullify page rotation.
2102
+
2103
+ To correctly detect tables, page rotation must be zero.
2104
+ This function performs the necessary adjustments and returns information
2105
+ for reverting this changes.
2106
+ """
2107
+ mediabox = page.mediabox
2108
+ rot = page.rotation # contains normalized rotation value
2109
+ # need to derotate the page's content
2110
+ mb = page.mediabox # current mediabox
2111
+
2112
+ if rot == 90:
2113
+ # before derotation, shift content horizontally
2114
+ mat0 = Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0)
2115
+ elif rot == 270:
2116
+ # before derotation, shift content vertically
2117
+ mat0 = Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0)
2118
+ else:
2119
+ mat0 = Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0)
2120
+
2121
+ # prefix with derotation matrix
2122
+ mat = mat0 * page.derotation_matrix
2123
+ cmd = b"%g %g %g %g %g %g cm " % tuple(mat)
2124
+ xref = TOOLS._insert_contents(page, cmd, 0)
2125
+
2126
+ # swap x- and y-coordinates
2127
+ if rot in (90, 270):
2128
+ x0, y0, x1, y1 = mb
2129
+ mb.x0 = y0
2130
+ mb.y0 = x0
2131
+ mb.x1 = y1
2132
+ mb.y1 = x1
2133
+ page.set_mediabox(mb)
2134
+
2135
+ page.set_rotation(0)
2136
+
2137
+ # refresh the page to apply these changes
2138
+ doc = page.parent
2139
+ pno = page.number
2140
+ page = doc[pno]
2141
+ return page, xref, rot, mediabox
2142
+
2143
+
2144
+ def page_rotation_reset(page, xref, rot, mediabox):
2145
+ """Reset page rotation to original values.
2146
+
2147
+ To be used before we return tabes."""
2148
+ doc = page.parent # document of the page
2149
+ doc.update_object(xref, "<<>>") # remove modifying matrix
2150
+ page.set_mediabox(mediabox) # set mediabox to old value
2151
+ page.set_rotation(rot) # set rotation to old value
2152
+ pno = page.number
2153
+ page = doc[pno] # update page info
2154
+ return page
2155
+
1975
2156
 
1976
2157
  def find_tables(
1977
2158
  page,
@@ -1995,10 +2176,18 @@ def find_tables(
1995
2176
  text_tolerance=3,
1996
2177
  text_x_tolerance=3,
1997
2178
  text_y_tolerance=3,
2179
+ strategy=None, # offer abbreviation
1998
2180
  ):
1999
2181
  global CHARS, EDGES
2000
2182
  CHARS = []
2001
2183
  EDGES = []
2184
+ old_small = bool(TOOLS.set_small_glyph_heights()) # save old value
2185
+ TOOLS.set_small_glyph_heights(True) # we need minimum bboxes
2186
+ if page.rotation != 0:
2187
+ page, old_xref, old_rot, old_mediabox = page_rotation_set0(page)
2188
+ else:
2189
+ old_xref, old_rot, old_mediabox = None, None, None
2190
+
2002
2191
  if snap_x_tolerance is None:
2003
2192
  snap_x_tolerance = UNSET
2004
2193
  if snap_y_tolerance is None:
@@ -2011,6 +2200,10 @@ def find_tables(
2011
2200
  intersection_x_tolerance = UNSET
2012
2201
  if intersection_y_tolerance is None:
2013
2202
  intersection_y_tolerance = UNSET
2203
+ if strategy is not None:
2204
+ vertical_strategy = strategy
2205
+ horizontal_strategy = strategy
2206
+
2014
2207
  settings = {
2015
2208
  "vertical_strategy": vertical_strategy,
2016
2209
  "horizontal_strategy": horizontal_strategy,
@@ -2034,7 +2227,12 @@ def find_tables(
2034
2227
  }
2035
2228
  tset = TableSettings.resolve(settings=settings)
2036
2229
  page.table_settings = tset
2230
+
2037
2231
  make_chars(page, clip=clip) # create character list of page
2038
2232
  make_edges(page, clip=clip, tset=tset) # create lines and curves
2039
2233
  tables = TableFinder(page, settings=tset)
2234
+
2235
+ TOOLS.set_small_glyph_heights(old_small)
2236
+ if old_xref is not None:
2237
+ page = page_rotation_reset(page, old_xref, old_rot, old_mediabox)
2040
2238
  return tables
@@ -6344,7 +6344,7 @@ def get_oc_items(self) -> list:
6344
6344
  cmd = item[0]
6345
6345
  rest = item[1:]
6346
6346
  if cmd == "re":
6347
- item = ("re", Rect(rest[0]), rest[1])
6347
+ item = ("re", Rect(rest[0]).normalize(), rest[1])
6348
6348
  elif cmd == "qu":
6349
6349
  item = ("qu", Quad(rest[0]))
6350
6350
  else:
@@ -6479,7 +6479,7 @@ def get_oc_items(self) -> list:
6479
6479
  cmd = item[0]
6480
6480
  rest = item[1:]
6481
6481
  if cmd == "re":
6482
- item = ("re", Rect(rest[0]), rest[1])
6482
+ item = ("re", Rect(rest[0]).normalize(), rest[1])
6483
6483
  elif cmd == "qu":
6484
6484
  item = ("qu", Quad(rest[0]))
6485
6485
  else:
@@ -0,0 +1,7 @@
1
+ %pythoncode %{
2
+ VersionFitz = "1.23.9" # MuPDF version.
3
+ VersionBind = "1.23.14" # PyMuPDF version.
4
+ VersionDate = "2024-01-15 00:00:01"
5
+ version = (VersionBind, VersionFitz, "20240115000001")
6
+ pymupdf_version_tuple = tuple( [int(i) for i in VersionFitz.split('.')])
7
+ %}