node-poppler 5.1.4 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/README.md +2 -0
  2. package/package.json +13 -13
  3. package/src/index.js +20 -8
  4. package/API.md +0 -465
  5. package/CHANGELOG.md +0 -976
  6. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Annot.h +0 -1783
  7. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/AnnotStampImageHelper.h +0 -68
  8. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Array.h +0 -92
  9. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/BBoxOutputDev.h +0 -54
  10. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/CMap.h +0 -128
  11. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/CachedFile.h +0 -149
  12. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/CairoFontEngine.h +0 -122
  13. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/CairoOutputDev.h +0 -468
  14. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/CairoRescaleBox.h +0 -60
  15. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Catalog.h +0 -318
  16. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/CertificateInfo.h +0 -129
  17. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/CharCodeToUnicode.h +0 -143
  18. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/CharTypes.h +0 -24
  19. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/CurlCachedFile.h +0 -37
  20. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/CurlPDFDocBuilder.h +0 -32
  21. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/DateInfo.h +0 -44
  22. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Decrypt.h +0 -147
  23. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Dict.h +0 -126
  24. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Error.h +0 -59
  25. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/ErrorCodes.h +0 -54
  26. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/FDPDFDocBuilder.h +0 -36
  27. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/FILECacheLoader.h +0 -34
  28. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/FileSpec.h +0 -85
  29. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/FontEncodingTables.h +0 -20
  30. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/FontInfo.h +0 -109
  31. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Form.h +0 -734
  32. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Function.h +0 -251
  33. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Gfx.h +0 -373
  34. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/GfxFont.h +0 -466
  35. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/GfxState.h +0 -1759
  36. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/GfxState_helpers.h +0 -90
  37. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/GlobalParams.h +0 -221
  38. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Hints.h +0 -91
  39. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/JArithmeticDecoder.h +0 -131
  40. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/JBIG2Stream.h +0 -130
  41. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/JPEG2000Stream.h +0 -54
  42. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/JSInfo.h +0 -65
  43. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Lexer.h +0 -113
  44. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Linearization.h +0 -43
  45. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Link.h +0 -566
  46. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/LocalPDFDocBuilder.h +0 -32
  47. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/MarkedContentOutputDev.h +0 -132
  48. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Movie.h +0 -123
  49. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/NameToCharCode.h +0 -52
  50. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/NameToUnicodeTable.h +0 -4300
  51. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Object.h +0 -678
  52. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/OptionalContent.h +0 -117
  53. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Outline.h +0 -122
  54. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/OutputDev.h +0 -379
  55. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/PDFDoc.h +0 -419
  56. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/PDFDocBuilder.h +0 -47
  57. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/PDFDocEncoding.h +0 -43
  58. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/PDFDocFactory.h +0 -58
  59. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/PSOutputDev.h +0 -549
  60. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/PSTokenizer.h +0 -47
  61. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Page.h +0 -273
  62. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/PageTransition.h +0 -105
  63. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Parser.h +0 -76
  64. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/PopplerCache.h +0 -65
  65. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/PreScanOutputDev.h +0 -148
  66. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/ProfileData.h +0 -35
  67. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Rendition.h +0 -164
  68. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/SecurityHandler.h +0 -131
  69. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Sound.h +0 -82
  70. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/SplashOutputDev.h +0 -413
  71. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Stream-CCITT.h +0 -334
  72. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/Stream.h +0 -1504
  73. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/StructElement.h +0 -406
  74. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/StructTreeRoot.h +0 -89
  75. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/TextOutputDev.h +0 -918
  76. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/UTF.h +0 -112
  77. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/UnicodeCClassTables.h +0 -983
  78. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/UnicodeCompTables.h +0 -328
  79. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/UnicodeDecompTables.h +0 -7181
  80. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/UnicodeMap.h +0 -128
  81. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/UnicodeMapFuncs.h +0 -35
  82. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/UnicodeMapTables.h +0 -84
  83. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/UnicodeTypeTable.h +0 -49
  84. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/ViewerPreferences.h +0 -87
  85. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/XRef.h +0 -325
  86. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-destination.h +0 -69
  87. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-document.h +0 -135
  88. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-embedded-file.h +0 -55
  89. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-font-private.h +0 -68
  90. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-font.h +0 -95
  91. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-global.h +0 -132
  92. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-image.h +0 -74
  93. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-page-renderer.h +0 -77
  94. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-page-transition.h +0 -85
  95. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-page.h +0 -225
  96. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-rectangle.h +0 -66
  97. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-toc.h +0 -72
  98. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler-version.h +0 -39
  99. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/cpp/poppler_cpp_export.h +0 -42
  100. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/fofi/FoFiBase.h +0 -66
  101. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/fofi/FoFiEncodings.h +0 -43
  102. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/fofi/FoFiIdentifier.h +0 -52
  103. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/fofi/FoFiTrueType.h +0 -190
  104. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/fofi/FoFiType1.h +0 -70
  105. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/fofi/FoFiType1C.h +0 -263
  106. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-action.h +0 -454
  107. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-annot.h +0 -333
  108. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-attachment.h +0 -123
  109. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-date.h +0 -32
  110. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-document.h +0 -541
  111. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-enums.h +0 -191
  112. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-features.h +0 -88
  113. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-form-field.h +0 -254
  114. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-layer.h +0 -50
  115. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-macros.h +0 -42
  116. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-media.h +0 -80
  117. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-movie.h +0 -81
  118. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-page.h +0 -424
  119. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler-structure-element.h +0 -425
  120. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/glib/poppler.h +0 -259
  121. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/GooCheckedOps.h +0 -116
  122. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/GooLikely.h +0 -22
  123. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/GooString.h +0 -262
  124. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/GooTimer.h +0 -59
  125. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/ImgWriter.h +0 -39
  126. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/JpegWriter.h +0 -69
  127. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/PNGWriter.h +0 -68
  128. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/TiffWriter.h +0 -70
  129. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/gdir.h +0 -96
  130. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/gfile.h +0 -156
  131. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/gmem.h +0 -201
  132. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/grandom.h +0 -21
  133. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/goo/gstrtod.h +0 -45
  134. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/poppler-config.h +0 -155
  135. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/poppler_private_export.h +0 -42
  136. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/Splash.h +0 -336
  137. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashBitmap.h +0 -127
  138. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashClip.h +0 -131
  139. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashErrorCodes.h +0 -50
  140. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashFTFont.h +0 -71
  141. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashFTFontEngine.h +0 -72
  142. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashFTFontFile.h +0 -64
  143. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashFont.h +0 -116
  144. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashFontEngine.h +0 -89
  145. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashFontFile.h +0 -98
  146. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashFontFileID.h +0 -40
  147. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashGlyphBitmap.h +0 -23
  148. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashMath.h +0 -215
  149. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashPath.h +0 -138
  150. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashPattern.h +0 -107
  151. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashScreen.h +0 -87
  152. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashState.h +0 -132
  153. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashTypes.h +0 -248
  154. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashXPath.h +0 -95
  155. package/src/lib/win32/poppler-22.04.0/Library/include/poppler/splash/SplashXPathScanner.h +0 -136
  156. package/src/lib/win32/poppler-22.04.0/Library/lib/pkgconfig/poppler-cpp.pc +0 -12
  157. package/src/lib/win32/poppler-22.04.0/Library/lib/pkgconfig/poppler-glib.pc +0 -12
  158. package/src/lib/win32/poppler-22.04.0/Library/lib/pkgconfig/poppler.pc +0 -10
  159. package/src/lib/win32/poppler-22.04.0/Library/lib/poppler-cpp.lib +0 -0
  160. package/src/lib/win32/poppler-22.04.0/Library/lib/poppler-glib.lib +0 -0
  161. package/src/lib/win32/poppler-22.04.0/Library/lib/poppler.lib +0 -0
  162. package/src/lib/win32/poppler-22.04.0/Library/share/man/man1/pdfattach.1 +0 -60
  163. package/src/lib/win32/poppler-22.04.0/Library/share/man/man1/pdfdetach.1 +0 -94
  164. package/src/lib/win32/poppler-22.04.0/Library/share/man/man1/pdffonts.1 +0 -128
  165. package/src/lib/win32/poppler-22.04.0/Library/share/man/man1/pdfimages.1 +0 -265
  166. package/src/lib/win32/poppler-22.04.0/Library/share/man/man1/pdfinfo.1 +0 -198
  167. package/src/lib/win32/poppler-22.04.0/Library/share/man/man1/pdfseparate.1 +0 -60
  168. package/src/lib/win32/poppler-22.04.0/Library/share/man/man1/pdftocairo.1 +0 -346
  169. package/src/lib/win32/poppler-22.04.0/Library/share/man/man1/pdftohtml.1 +0 -118
  170. package/src/lib/win32/poppler-22.04.0/Library/share/man/man1/pdftoppm.1 +0 -230
  171. package/src/lib/win32/poppler-22.04.0/Library/share/man/man1/pdftops.1 +0 -267
  172. package/src/lib/win32/poppler-22.04.0/Library/share/man/man1/pdftotext.1 +0 -157
  173. package/src/lib/win32/poppler-22.04.0/Library/share/man/man1/pdfunite.1 +0 -43
  174. package/src/lib/win32/poppler-22.04.0/share/poppler/CMakeLists.txt +0 -309
  175. package/src/lib/win32/poppler-22.04.0/share/poppler/Makefile +0 -333
  176. package/tsconfig.json +0 -9
@@ -1,918 +0,0 @@
1
- //========================================================================
2
- //
3
- // TextOutputDev.h
4
- //
5
- // Copyright 1997-2003 Glyph & Cog, LLC
6
- //
7
- //========================================================================
8
-
9
- //========================================================================
10
- //
11
- // Modified under the Poppler project - http://poppler.freedesktop.org
12
- //
13
- // All changes made under the Poppler project to this file are licensed
14
- // under GPL version 2 or later
15
- //
16
- // Copyright (C) 2005-2007 Kristian Høgsberg <krh@redhat.com>
17
- // Copyright (C) 2006 Ed Catmur <ed@catmur.co.uk>
18
- // Copyright (C) 2007, 2008, 2011, 2013 Carlos Garcia Campos <carlosgc@gnome.org>
19
- // Copyright (C) 2007, 2017 Adrian Johnson <ajohnson@redneon.com>
20
- // Copyright (C) 2008, 2010, 2015, 2016, 2018, 2019, 2021 Albert Astals Cid <aacid@kde.org>
21
- // Copyright (C) 2010 Brian Ewins <brian.ewins@gmail.com>
22
- // Copyright (C) 2012, 2013, 2015, 2016 Jason Crain <jason@aquaticape.us>
23
- // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
24
- // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
25
- // Copyright (C) 2018 Sanchit Anand <sanxchit@gmail.com>
26
- // Copyright (C) 2018, 2020, 2021 Nelson Benítez León <nbenitezl@gmail.com>
27
- // Copyright (C) 2019, 2022 Oliver Sander <oliver.sander@tu-dresden.de>
28
- // Copyright (C) 2019 Dan Shea <dan.shea@logical-innovations.com>
29
- // Copyright (C) 2020 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
30
- //
31
- // To see a description of the changes please see the Changelog file that
32
- // came with your tarball or type make ChangeLog if you are building from git
33
- //
34
- //========================================================================
35
-
36
- #ifndef TEXTOUTPUTDEV_H
37
- #define TEXTOUTPUTDEV_H
38
-
39
- #include "poppler-config.h"
40
- #include "poppler_private_export.h"
41
- #include <cstdio>
42
- #include "GfxFont.h"
43
- #include "GfxState.h"
44
- #include "OutputDev.h"
45
-
46
- class GooString;
47
- class Gfx;
48
- class GfxFont;
49
- class GfxState;
50
- class UnicodeMap;
51
- class AnnotLink;
52
-
53
- class TextWord;
54
- class TextPool;
55
- class TextLine;
56
- class TextLineFrag;
57
- class TextBlock;
58
- class TextFlow;
59
- class TextLink;
60
- class TextUnderline;
61
- class TextWordList;
62
- class TextPage;
63
- class TextSelectionVisitor;
64
-
65
- //------------------------------------------------------------------------
66
-
67
- typedef void (*TextOutputFunc)(void *stream, const char *text, int len);
68
-
69
- enum SelectionStyle
70
- {
71
- selectionStyleGlyph,
72
- selectionStyleWord,
73
- selectionStyleLine
74
- };
75
-
76
- enum EndOfLineKind
77
- {
78
- eolUnix, // LF
79
- eolDOS, // CR+LF
80
- eolMac // CR
81
- };
82
-
83
- //------------------------------------------------------------------------
84
- // TextFontInfo
85
- //------------------------------------------------------------------------
86
-
87
- class POPPLER_PRIVATE_EXPORT TextFontInfo
88
- {
89
- public:
90
- explicit TextFontInfo(const GfxState *state);
91
- ~TextFontInfo();
92
-
93
- TextFontInfo(const TextFontInfo &) = delete;
94
- TextFontInfo &operator=(const TextFontInfo &) = delete;
95
-
96
- bool matches(const GfxState *state) const;
97
- bool matches(const TextFontInfo *fontInfo) const;
98
- bool matches(const Ref *ref) const;
99
-
100
- // Get the font ascent, or a default value if the font is not set
101
- double getAscent() const;
102
-
103
- // Get the font descent, or a default value if the font is not set
104
- double getDescent() const;
105
-
106
- // Get the writing mode (0 or 1), or 0 if the font is not set
107
- int getWMode() const;
108
-
109
- #ifdef TEXTOUT_WORD_LIST
110
- // Get the font name (which may be NULL).
111
- const GooString *getFontName() const { return fontName; }
112
-
113
- // Get font descriptor flags.
114
- bool isFixedWidth() const { return flags & fontFixedWidth; }
115
- bool isSerif() const { return flags & fontSerif; }
116
- bool isSymbolic() const { return flags & fontSymbolic; }
117
- bool isItalic() const { return flags & fontItalic; }
118
- bool isBold() const { return flags & fontBold; }
119
- #endif
120
-
121
- private:
122
- std::shared_ptr<GfxFont> gfxFont;
123
- #ifdef TEXTOUT_WORD_LIST
124
- GooString *fontName;
125
- int flags;
126
- #endif
127
-
128
- friend class TextWord;
129
- friend class TextPage;
130
- friend class TextSelectionPainter;
131
- };
132
-
133
- //------------------------------------------------------------------------
134
- // TextWord
135
- //------------------------------------------------------------------------
136
-
137
- class POPPLER_PRIVATE_EXPORT TextWord
138
- {
139
- public:
140
- // Constructor.
141
- TextWord(const GfxState *state, int rotA, double fontSize);
142
-
143
- // Destructor.
144
- ~TextWord();
145
-
146
- TextWord(const TextWord &) = delete;
147
- TextWord &operator=(const TextWord &) = delete;
148
-
149
- // Add a character to the word.
150
- void addChar(const GfxState *state, TextFontInfo *fontA, double x, double y, double dx, double dy, int charPosA, int charLen, CharCode c, Unicode u, const Matrix &textMatA);
151
-
152
- // Attempt to add a character to the word as a combining character.
153
- // Either character u or the last character in the word must be an
154
- // acute, dieresis, or other combining character. Returns true if
155
- // the character was added.
156
- bool addCombining(const GfxState *state, TextFontInfo *fontA, double fontSizeA, double x, double y, double dx, double dy, int charPosA, int charLen, CharCode c, Unicode u, const Matrix &textMatA);
157
-
158
- // Merge <word> onto the end of <this>.
159
- void merge(TextWord *word);
160
-
161
- // Compares <this> to <word>, returning -1 (<), 0 (=), or +1 (>),
162
- // based on a primary-axis comparison, e.g., x ordering if rot=0.
163
- int primaryCmp(const TextWord *word) const;
164
-
165
- // Return the distance along the primary axis between <this> and
166
- // <word>.
167
- double primaryDelta(const TextWord *word) const;
168
-
169
- static int cmpYX(const void *p1, const void *p2);
170
-
171
- void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style);
172
-
173
- // Get the TextFontInfo object associated with a character.
174
- const TextFontInfo *getFontInfo(int idx) const { return font[idx]; }
175
-
176
- // Get the next TextWord on the linked list.
177
- const TextWord *getNext() const { return next; }
178
-
179
- #ifdef TEXTOUT_WORD_LIST
180
- int getLength() const { return len; }
181
- const Unicode *getChar(int idx) const { return &text[idx]; }
182
- GooString *getText() const;
183
- const GooString *getFontName(int idx) const { return font[idx]->fontName; }
184
- void getColor(double *r, double *g, double *b) const
185
- {
186
- *r = colorR;
187
- *g = colorG;
188
- *b = colorB;
189
- }
190
- void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) const
191
- {
192
- *xMinA = xMin;
193
- *yMinA = yMin;
194
- *xMaxA = xMax;
195
- *yMaxA = yMax;
196
- }
197
- void getCharBBox(int charIdx, double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) const;
198
- double getFontSize() const { return fontSize; }
199
- int getRotation() const { return rot; }
200
- int getCharPos() const { return charPos[0]; }
201
- int getCharLen() const { return charPos[len] - charPos[0]; }
202
- bool getSpaceAfter() const { return spaceAfter; }
203
- #endif
204
- bool isUnderlined() const { return underlined; }
205
- const AnnotLink *getLink() const { return link; }
206
- double getEdge(int i) const { return edge[i]; }
207
- double getBaseline() const { return base; }
208
- bool hasSpaceAfter() const { return spaceAfter; }
209
- const TextWord *nextWord() const { return next; };
210
-
211
- private:
212
- void ensureCapacity(int capacity);
213
- void setInitialBounds(TextFontInfo *fontA, double x, double y);
214
-
215
- int rot; // rotation, multiple of 90 degrees
216
- // (0, 1, 2, or 3)
217
- int wMode; // horizontal (0) or vertical (1) writing mode
218
- double xMin, xMax; // bounding box x coordinates
219
- double yMin, yMax; // bounding box y coordinates
220
- double base; // baseline x or y coordinate
221
- Unicode *text; // the text
222
- CharCode *charcode; // glyph indices
223
- double *edge; // "near" edge x or y coord of each char
224
- // (plus one extra entry for the last char)
225
- int *charPos; // character position (within content stream)
226
- // of each char (plus one extra entry for
227
- // the last char)
228
- int len; // length of text/edge/charPos/font arrays
229
- int size; // size of text/edge/charPos/font arrays
230
- TextFontInfo **font; // font information for each char
231
- Matrix *textMat; // transformation matrix for each char
232
- double fontSize; // font size
233
- bool spaceAfter; // set if there is a space between this
234
- // word and the next word on the line
235
- bool underlined;
236
- bool invisible; // whether we are invisible (glyphless)
237
- TextWord *next; // next word in line
238
-
239
- #ifdef TEXTOUT_WORD_LIST
240
- double colorR, // word color
241
- colorG, colorB;
242
- #endif
243
-
244
- AnnotLink *link;
245
-
246
- friend class TextPool;
247
- friend class TextLine;
248
- friend class TextBlock;
249
- friend class TextFlow;
250
- friend class TextWordList;
251
- friend class TextPage;
252
-
253
- friend class TextSelectionPainter;
254
- friend class TextSelectionDumper;
255
- };
256
-
257
- //------------------------------------------------------------------------
258
- // TextPool
259
- //------------------------------------------------------------------------
260
-
261
- class TextPool
262
- {
263
- public:
264
- TextPool();
265
- ~TextPool();
266
-
267
- TextPool(const TextPool &) = delete;
268
- TextPool &operator=(const TextPool &) = delete;
269
-
270
- TextWord *getPool(int baseIdx) { return pool[baseIdx - minBaseIdx]; }
271
- void setPool(int baseIdx, TextWord *p) { pool[baseIdx - minBaseIdx] = p; }
272
-
273
- int getBaseIdx(double base) const;
274
-
275
- void addWord(TextWord *word);
276
-
277
- private:
278
- int minBaseIdx; // min baseline bucket index
279
- int maxBaseIdx; // max baseline bucket index
280
- TextWord **pool; // array of linked lists, one for each
281
- // baseline value (multiple of 4 pts)
282
- TextWord *cursor; // pointer to last-accessed word
283
- int cursorBaseIdx; // baseline bucket index of last-accessed word
284
-
285
- friend class TextBlock;
286
- friend class TextPage;
287
- };
288
-
289
- struct TextFlowData;
290
-
291
- //------------------------------------------------------------------------
292
- // TextLine
293
- //------------------------------------------------------------------------
294
-
295
- class TextLine
296
- {
297
- public:
298
- TextLine(TextBlock *blkA, int rotA, double baseA);
299
- ~TextLine();
300
-
301
- TextLine(const TextLine &) = delete;
302
- TextLine &operator=(const TextLine &) = delete;
303
-
304
- void addWord(TextWord *word);
305
-
306
- // Return the distance along the primary axis between <this> and
307
- // <line>.
308
- double primaryDelta(const TextLine *line) const;
309
-
310
- // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
311
- // based on a primary-axis comparison, e.g., x ordering if rot=0.
312
- int primaryCmp(const TextLine *line) const;
313
-
314
- // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
315
- // based on a secondary-axis comparison of the baselines, e.g., y
316
- // ordering if rot=0.
317
- int secondaryCmp(const TextLine *line) const;
318
-
319
- int cmpYX(const TextLine *line) const;
320
-
321
- static int cmpXY(const void *p1, const void *p2);
322
-
323
- void coalesce(const UnicodeMap *uMap);
324
-
325
- void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style);
326
-
327
- // Get the head of the linked list of TextWords.
328
- const TextWord *getWords() const { return words; }
329
-
330
- // Get the next TextLine on the linked list.
331
- const TextLine *getNext() const { return next; }
332
-
333
- // Returns true if the last char of the line is a hyphen.
334
- bool isHyphenated() const { return hyphenated; }
335
-
336
- private:
337
- TextBlock *blk; // parent block
338
- int rot; // text rotation
339
- double xMin, xMax; // bounding box x coordinates
340
- double yMin, yMax; // bounding box y coordinates
341
- double base; // baseline x or y coordinate
342
- TextWord *words; // words in this line
343
- TextWord *lastWord; // last word in this line
344
- Unicode *text; // Unicode text of the line, including
345
- // spaces between words
346
- double *edge; // "near" edge x or y coord of each char
347
- // (plus one extra entry for the last char)
348
- int *col; // starting column number of each Unicode char
349
- int len; // number of Unicode chars
350
- int convertedLen; // total number of converted characters
351
- bool hyphenated; // set if last char is a hyphen
352
- TextLine *next; // next line in block
353
- Unicode *normalized; // normalized form of Unicode text
354
- int normalized_len; // number of normalized Unicode chars
355
- int *normalized_idx; // indices of normalized chars into Unicode text
356
- Unicode *ascii_translation; // ascii translation from the normalized text
357
- int ascii_len; // length of ascii translation text
358
- int *ascii_idx; // indices of ascii chars into Unicode text of line
359
-
360
- friend class TextLineFrag;
361
- friend class TextBlock;
362
- friend class TextFlow;
363
- friend class TextWordList;
364
- friend class TextPage;
365
-
366
- friend class TextSelectionPainter;
367
- friend class TextSelectionSizer;
368
- friend class TextSelectionDumper;
369
- };
370
-
371
- //------------------------------------------------------------------------
372
- // TextBlock
373
- //------------------------------------------------------------------------
374
-
375
- class TextBlock
376
- {
377
- public:
378
- TextBlock(TextPage *pageA, int rotA);
379
- ~TextBlock();
380
-
381
- TextBlock(const TextBlock &) = delete;
382
- TextBlock &operator=(const TextBlock &) = delete;
383
-
384
- void addWord(TextWord *word);
385
-
386
- void coalesce(const UnicodeMap *uMap, double fixedPitch);
387
-
388
- // Update this block's priMin and priMax values, looking at <blk>.
389
- void updatePriMinMax(const TextBlock *blk);
390
-
391
- static int cmpXYPrimaryRot(const void *p1, const void *p2);
392
-
393
- static int cmpYXPrimaryRot(const void *p1, const void *p2);
394
-
395
- int primaryCmp(const TextBlock *blk) const;
396
-
397
- double secondaryDelta(const TextBlock *blk) const;
398
-
399
- // Returns true if <this> is below <blk>, relative to the page's
400
- // primary rotation.
401
- bool isBelow(const TextBlock *blk) const;
402
-
403
- void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style);
404
-
405
- // Get the head of the linked list of TextLines.
406
- const TextLine *getLines() const { return lines; }
407
-
408
- // Get the next TextBlock on the linked list.
409
- const TextBlock *getNext() const { return next; }
410
-
411
- void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) const
412
- {
413
- *xMinA = xMin;
414
- *yMinA = yMin;
415
- *xMaxA = xMax;
416
- *yMaxA = yMax;
417
- }
418
-
419
- int getLineCount() const { return nLines; }
420
-
421
- private:
422
- bool isBeforeByRule1(const TextBlock *blk1);
423
- bool isBeforeByRepeatedRule1(const TextBlock *blkList, const TextBlock *blk1);
424
- bool isBeforeByRule2(const TextBlock *blk1);
425
-
426
- int visitDepthFirst(TextBlock *blkList, int pos1, TextBlock **sorted, int sortPos, bool *visited);
427
- int visitDepthFirst(TextBlock *blkList, int pos1, TextBlock **sorted, int sortPos, bool *visited, TextBlock **cache, int cacheSize);
428
-
429
- TextPage *page; // the parent page
430
- int rot; // text rotation
431
- double xMin, xMax; // bounding box x coordinates
432
- double yMin, yMax; // bounding box y coordinates
433
- double priMin, priMax; // whitespace bounding box along primary axis
434
- double ExMin, ExMax; // extended bounding box x coordinates
435
- double EyMin, EyMax; // extended bounding box y coordinates
436
- int tableId; // id of table to which this block belongs
437
- bool tableEnd; // is this block at end of line of actual table
438
-
439
- TextPool *pool; // pool of words (used only until lines
440
- // are built)
441
- TextLine *lines; // linked list of lines
442
- TextLine *curLine; // most recently added line
443
- int nLines; // number of lines
444
- int charCount; // number of characters in the block
445
- int col; // starting column
446
- int nColumns; // number of columns in the block
447
-
448
- TextBlock *next;
449
- TextBlock *stackNext;
450
-
451
- friend class TextLine;
452
- friend class TextLineFrag;
453
- friend class TextFlow;
454
- friend class TextWordList;
455
- friend class TextPage;
456
- friend class TextSelectionPainter;
457
- friend class TextSelectionDumper;
458
- };
459
-
460
- //------------------------------------------------------------------------
461
- // TextFlow
462
- //------------------------------------------------------------------------
463
-
464
- class TextFlow
465
- {
466
- public:
467
- TextFlow(TextPage *pageA, TextBlock *blk);
468
- ~TextFlow();
469
-
470
- TextFlow(const TextFlow &) = delete;
471
- TextFlow &operator=(const TextFlow &) = delete;
472
-
473
- // Add a block to the end of this flow.
474
- void addBlock(TextBlock *blk);
475
-
476
- // Returns true if <blk> fits below <prevBlk> in the flow, i.e., (1)
477
- // it uses a font no larger than the last block added to the flow,
478
- // and (2) it fits within the flow's [priMin, priMax] along the
479
- // primary axis.
480
- bool blockFits(const TextBlock *blk, const TextBlock *prevBlk) const;
481
-
482
- // Get the head of the linked list of TextBlocks.
483
- const TextBlock *getBlocks() const { return blocks; }
484
-
485
- // Get the next TextFlow on the linked list.
486
- const TextFlow *getNext() const { return next; }
487
-
488
- private:
489
- TextPage *page; // the parent page
490
- double xMin, xMax; // bounding box x coordinates
491
- double yMin, yMax; // bounding box y coordinates
492
- double priMin, priMax; // whitespace bounding box along primary axis
493
- TextBlock *blocks; // blocks in flow
494
- TextBlock *lastBlk; // last block in this flow
495
- TextFlow *next;
496
-
497
- friend class TextWordList;
498
- friend class TextPage;
499
- };
500
-
501
- #ifdef TEXTOUT_WORD_LIST
502
-
503
- //------------------------------------------------------------------------
504
- // TextWordList
505
- //------------------------------------------------------------------------
506
-
507
- class POPPLER_PRIVATE_EXPORT TextWordList
508
- {
509
- public:
510
- // Build a flat word list, in content stream order (if
511
- // text->rawOrder is true), physical layout order (if <physLayout>
512
- // is true and text->rawOrder is false), or reading order (if both
513
- // flags are false).
514
- TextWordList(const TextPage *text, bool physLayout);
515
-
516
- ~TextWordList();
517
-
518
- TextWordList(const TextWordList &) = delete;
519
- TextWordList &operator=(const TextWordList &) = delete;
520
-
521
- // Return the number of words on the list.
522
- int getLength() const;
523
-
524
- // Return the <idx>th word from the list.
525
- TextWord *get(int idx);
526
-
527
- private:
528
- std::vector<TextWord *> words;
529
- };
530
-
531
- #endif // TEXTOUT_WORD_LIST
532
-
533
- class TextWordSelection
534
- {
535
- public:
536
- TextWordSelection(const TextWord *wordA, int beginA, int endA) : word(wordA), begin(beginA), end(endA) { }
537
-
538
- const TextWord *getWord() const { return word; }
539
- int getBegin() const { return begin; }
540
- int getEnd() const { return end; }
541
-
542
- private:
543
- const TextWord *word;
544
- int begin;
545
- int end;
546
-
547
- friend class TextSelectionPainter;
548
- friend class TextSelectionDumper;
549
- };
550
-
551
- //------------------------------------------------------------------------
552
- // TextPage
553
- //------------------------------------------------------------------------
554
-
555
- class POPPLER_PRIVATE_EXPORT TextPage
556
- {
557
- public:
558
- // Constructor.
559
- explicit TextPage(bool rawOrderA, bool discardDiagA = false);
560
-
561
- TextPage(const TextPage &) = delete;
562
- TextPage &operator=(const TextPage &) = delete;
563
-
564
- void incRefCnt();
565
- void decRefCnt();
566
-
567
- // Start a new page.
568
- void startPage(const GfxState *state);
569
-
570
- // End the current page.
571
- void endPage();
572
-
573
- // Update the current font.
574
- void updateFont(const GfxState *state);
575
-
576
- // Begin a new word.
577
- void beginWord(const GfxState *state);
578
-
579
- // Add a character to the current word.
580
- void addChar(const GfxState *state, double x, double y, double dx, double dy, CharCode c, int nBytes, const Unicode *u, int uLen);
581
-
582
- // Add <nChars> invisible characters.
583
- void incCharCount(int nChars);
584
-
585
- // End the current word, sorting it into the list of words.
586
- void endWord();
587
-
588
- // Add a word, sorting it into the list of words.
589
- void addWord(TextWord *word);
590
-
591
- // Add a (potential) underline.
592
- void addUnderline(double x0, double y0, double x1, double y1);
593
-
594
- // Add a hyperlink.
595
- void addLink(int xMin, int yMin, int xMax, int yMax, AnnotLink *link);
596
-
597
- // Coalesce strings that look like parts of the same line.
598
- void coalesce(bool physLayout, double fixedPitch, bool doHTML);
599
- void coalesce(bool physLayout, double fixedPitch, bool doHTML, double minColSpacing1);
600
-
601
- // Find a string. If <startAtTop> is true, starts looking at the
602
- // top of the page; else if <startAtLast> is true, starts looking
603
- // immediately after the last find result; else starts looking at
604
- // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the
605
- // bottom of the page; else if <stopAtLast> is true, stops looking
606
- // just before the last find result; else stops looking at
607
- // <xMax>,<yMax>.
608
- bool findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool backward, bool wholeWord, double *xMin, double *yMin, double *xMax, double *yMax);
609
-
610
- // Adds new parameter ignoreDiacritics, which will do diacritics
611
- // insensitive search, i.e. ignore accents, umlauts, diaeresis,etc.
612
- // while matching. This option will be ignored if <s> contains characters
613
- // which are not pure ascii.
614
- bool findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool ignoreDiacritics, bool backward, bool wholeWord, double *xMin, double *yMin, double *xMax,
615
- double *yMax);
616
-
617
- // Adds new parameter <matchAcrossLines>, which allows <s> to match on text
618
- // spanning from end of a line to the next line. In that case, the rect for
619
- // the part of match that falls on the next line will be stored in
620
- // <continueMatch>, and if hyphenation (i.e. ignoring hyphen at end of line)
621
- // was used while matching at the end of the line prior to <continueMatch>,
622
- // then <ignoredHyphen> will be true, otherwise will be false.
623
- // Only finding across two lines is supported, i.e. it won't match where <s>
624
- // spans more than two lines.
625
- //
626
- // <matchAcrossLines> will be ignored if <backward> is true (as that
627
- // combination has not been implemented yet).
628
- bool findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool ignoreDiacritics, bool matchAcrossLines, bool backward, bool wholeWord, double *xMin, double *yMin,
629
- double *xMax, double *yMax, PDFRectangle *continueMatch, bool *ignoredHyphen);
630
-
631
- // Get the text which is inside the specified rectangle.
632
- GooString *getText(double xMin, double yMin, double xMax, double yMax, EndOfLineKind textEOL) const;
633
-
634
- void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style);
635
-
636
- void drawSelection(OutputDev *out, double scale, int rotation, const PDFRectangle *selection, SelectionStyle style, const GfxColor *glyph_color, const GfxColor *box_color);
637
-
638
- std::vector<PDFRectangle *> *getSelectionRegion(const PDFRectangle *selection, SelectionStyle style, double scale);
639
-
640
- GooString *getSelectionText(const PDFRectangle *selection, SelectionStyle style);
641
-
642
- std::vector<TextWordSelection *> **getSelectionWords(const PDFRectangle *selection, SelectionStyle style, int *nLines);
643
-
644
- // Find a string by character position and length. If found, sets
645
- // the text bounding rectangle and returns true; otherwise returns
646
- // false.
647
- bool findCharRange(int pos, int length, double *xMin, double *yMin, double *xMax, double *yMax) const;
648
-
649
- // Dump contents of page to a file.
650
- void dump(void *outputStream, TextOutputFunc outputFunc, bool physLayout, EndOfLineKind textEOL, bool pageBreaks);
651
-
652
- // Get the head of the linked list of TextFlows.
653
- const TextFlow *getFlows() const { return flows; }
654
-
655
- // If true, will combine characters when a base and combining
656
- // character are drawn on eachother.
657
- void setMergeCombining(bool merge);
658
-
659
- #ifdef TEXTOUT_WORD_LIST
660
- // Build a flat word list, in content stream order (if
661
- // this->rawOrder is true), physical layout order (if <physLayout>
662
- // is true and this->rawOrder is false), or reading order (if both
663
- // flags are false).
664
- std::unique_ptr<TextWordList> makeWordList(bool physLayout);
665
- #endif
666
-
667
- private:
668
- // Destructor.
669
- ~TextPage();
670
-
671
- void clear();
672
- void assignColumns(TextLineFrag *frags, int nFrags, bool rot) const;
673
- int dumpFragment(const Unicode *text, int len, const UnicodeMap *uMap, GooString *s) const;
674
- void adjustRotation(TextLine *line, int start, int end, double *xMin, double *xMax, double *yMin, double *yMax);
675
-
676
- bool rawOrder; // keep text in content stream order
677
- bool discardDiag; // discard diagonal text
678
- bool mergeCombining; // merge when combining and base characters
679
- // are drawn on top of each other
680
-
681
- double pageWidth, pageHeight; // width and height of current page
682
- TextWord *curWord; // currently active string
683
- int charPos; // next character position (within content
684
- // stream)
685
- TextFontInfo *curFont; // current font
686
- double curFontSize; // current font size
687
- int nest; // current nesting level (for Type 3 fonts)
688
- int nTinyChars; // number of "tiny" chars seen so far
689
- bool lastCharOverlap; // set if the last added char overlapped the
690
- // previous char
691
- bool diagonal; // whether the current text is diagonal
692
-
693
- std::unique_ptr<TextPool> pools[4]; // a "pool" of TextWords for each rotation
694
- TextFlow *flows; // linked list of flows
695
- TextBlock **blocks; // array of blocks, in yx order
696
- int nBlocks; // number of blocks
697
- int primaryRot; // primary rotation
698
- bool primaryLR; // primary direction (true means L-to-R,
699
- // false means R-to-L)
700
- TextWord *rawWords; // list of words, in raw order (only if
701
- // rawOrder is set)
702
- TextWord *rawLastWord; // last word on rawWords list
703
-
704
- std::vector<std::unique_ptr<TextFontInfo>> fonts; // all font info objects used on this page
705
-
706
- double lastFindXMin, // coordinates of the last "find" result
707
- lastFindYMin;
708
- bool haveLastFind;
709
-
710
- std::vector<std::unique_ptr<TextUnderline>> underlines;
711
- std::vector<std::unique_ptr<TextLink>> links;
712
-
713
- int refCnt;
714
-
715
- friend class TextLine;
716
- friend class TextLineFrag;
717
- friend class TextBlock;
718
- friend class TextFlow;
719
- friend class TextWordList;
720
- friend class TextSelectionPainter;
721
- friend class TextSelectionDumper;
722
- };
723
-
724
- //------------------------------------------------------------------------
725
- // ActualText
726
- //------------------------------------------------------------------------
727
-
728
- class POPPLER_PRIVATE_EXPORT ActualText
729
- {
730
- public:
731
- // Create an ActualText
732
- explicit ActualText(TextPage *out);
733
- ~ActualText();
734
-
735
- ActualText(const ActualText &) = delete;
736
- ActualText &operator=(const ActualText &) = delete;
737
-
738
- void addChar(const GfxState *state, double x, double y, double dx, double dy, CharCode c, int nBytes, const Unicode *u, int uLen);
739
- void begin(const GfxState *state, const GooString *text);
740
- void end(const GfxState *state);
741
-
742
- private:
743
- TextPage *text;
744
-
745
- GooString *actualText; // replacement text for the span
746
- double actualTextX0;
747
- double actualTextY0;
748
- double actualTextX1;
749
- double actualTextY1;
750
- int actualTextNBytes;
751
- };
752
-
753
- //------------------------------------------------------------------------
754
- // TextOutputDev
755
- //------------------------------------------------------------------------
756
-
757
- class POPPLER_PRIVATE_EXPORT TextOutputDev : public OutputDev
758
- {
759
- public:
760
- static double minColSpacing1_default;
761
-
762
- // Open a text output file. If <fileName> is NULL, no file is
763
- // written (this is useful, e.g., for searching text). If
764
- // <physLayoutA> is true, the original physical layout of the text
765
- // is maintained. If <rawOrder> is true, the text is kept in
766
- // content stream order. If <discardDiag> is true, diagonal text
767
- // is removed from output.
768
- TextOutputDev(const char *fileName, bool physLayoutA, double fixedPitchA, bool rawOrderA, bool append, bool discardDiagA = false);
769
-
770
- // Create a TextOutputDev which will write to a generic stream. If
771
- // <physLayoutA> is true, the original physical layout of the text
772
- // is maintained. If <rawOrder> is true, the text is kept in
773
- // content stream order. If <discardDiag> is true, diagonal text
774
- // is removed from output.
775
- TextOutputDev(TextOutputFunc func, void *stream, bool physLayoutA, double fixedPitchA, bool rawOrderA, bool discardDiagA = false);
776
-
777
- // Destructor.
778
- ~TextOutputDev() override;
779
-
780
- // Check if file was successfully created.
781
- virtual bool isOk() { return ok; }
782
-
783
- //---- get info about output device
784
-
785
- // Does this device use upside-down coordinates?
786
- // (Upside-down means (0,0) is the top left corner of the page.)
787
- bool upsideDown() override { return true; }
788
-
789
- // Does this device use drawChar() or drawString()?
790
- bool useDrawChar() override { return true; }
791
-
792
- // Does this device use beginType3Char/endType3Char? Otherwise,
793
- // text in Type 3 fonts will be drawn with drawChar/drawString.
794
- bool interpretType3Chars() override { return false; }
795
-
796
- // Does this device need non-text content?
797
- bool needNonText() override { return false; }
798
-
799
- // Does this device require incCharCount to be called for text on
800
- // non-shown layers?
801
- bool needCharCount() override { return true; }
802
-
803
- //----- initialization and control
804
-
805
- // Start a page.
806
- void startPage(int pageNum, GfxState *state, XRef *xref) override;
807
-
808
- // End a page.
809
- void endPage() override;
810
-
811
- //----- save/restore graphics state
812
- void restoreState(GfxState *state) override;
813
-
814
- //----- update text state
815
- void updateFont(GfxState *state) override;
816
-
817
- //----- text drawing
818
- void beginString(GfxState *state, const GooString *s) override;
819
- void endString(GfxState *state) override;
820
- void drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode c, int nBytes, const Unicode *u, int uLen) override;
821
- void incCharCount(int nChars) override;
822
- void beginActualText(GfxState *state, const GooString *text) override;
823
- void endActualText(GfxState *state) override;
824
-
825
- //----- path painting
826
- void stroke(GfxState *state) override;
827
- void fill(GfxState *state) override;
828
- void eoFill(GfxState *state) override;
829
-
830
- //----- link borders
831
- void processLink(AnnotLink *link) override;
832
-
833
- //----- special access
834
-
835
- // Find a string. If <startAtTop> is true, starts looking at the
836
- // top of the page; else if <startAtLast> is true, starts looking
837
- // immediately after the last find result; else starts looking at
838
- // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the
839
- // bottom of the page; else if <stopAtLast> is true, stops looking
840
- // just before the last find result; else stops looking at
841
- // <xMax>,<yMax>.
842
- bool findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool backward, bool wholeWord, double *xMin, double *yMin, double *xMax, double *yMax) const;
843
-
844
- // Get the text which is inside the specified rectangle.
845
- GooString *getText(double xMin, double yMin, double xMax, double yMax) const;
846
-
847
- // Find a string by character position and length. If found, sets
848
- // the text bounding rectangle and returns true; otherwise returns
849
- // false.
850
- bool findCharRange(int pos, int length, double *xMin, double *yMin, double *xMax, double *yMax) const;
851
-
852
- void drawSelection(OutputDev *out, double scale, int rotation, const PDFRectangle *selection, SelectionStyle style, const GfxColor *glyph_color, const GfxColor *box_color);
853
-
854
- std::vector<PDFRectangle *> *getSelectionRegion(const PDFRectangle *selection, SelectionStyle style, double scale);
855
-
856
- GooString *getSelectionText(const PDFRectangle *selection, SelectionStyle style);
857
-
858
- // If true, will combine characters when a base and combining
859
- // character are drawn on eachother.
860
- void setMergeCombining(bool merge);
861
-
862
- #ifdef TEXTOUT_WORD_LIST
863
- // Build a flat word list, in content stream order (if
864
- // this->rawOrder is true), physical layout order (if
865
- // this->physLayout is true and this->rawOrder is false), or reading
866
- // order (if both flags are false).
867
- std::unique_ptr<TextWordList> makeWordList();
868
- #endif
869
-
870
- // Returns the TextPage object for the last rasterized page,
871
- // transferring ownership to the caller.
872
- TextPage *takeText();
873
-
874
- // Turn extra processing for HTML conversion on or off.
875
- void enableHTMLExtras(bool doHTMLA) { doHTML = doHTMLA; }
876
-
877
- // Get the head of the linked list of TextFlows for the
878
- // last rasterized page.
879
- const TextFlow *getFlows() const;
880
-
881
- static constexpr EndOfLineKind defaultEndOfLine()
882
- {
883
- #if defined(_WIN32)
884
- return eolDOS;
885
- #else
886
- return eolUnix;
887
- #endif
888
- }
889
- void setTextEOL(EndOfLineKind textEOLA) { textEOL = textEOLA; }
890
- void setTextPageBreaks(bool textPageBreaksA) { textPageBreaks = textPageBreaksA; }
891
- double getMinColSpacing1() const { return minColSpacing1; }
892
- void setMinColSpacing1(double val) { minColSpacing1 = val; }
893
-
894
- private:
895
- TextOutputFunc outputFunc; // output function
896
- void *outputStream; // output stream
897
- bool needClose; // need to close the output file?
898
- // (only if outputStream is a FILE*)
899
- TextPage *text; // text for the current page
900
- bool physLayout; // maintain original physical layout when
901
- // dumping text
902
- double fixedPitch; // if physLayout is true and this is non-zero,
903
- // assume fixed-pitch characters with this
904
- // width
905
- double minColSpacing1; // see default value defined with same name at TextOutputDev.cc
906
- bool rawOrder; // keep text in content stream order
907
- bool discardDiag; // Diagonal text, i.e., text that is not close to one of the
908
- // 0, 90, 180, or 270 degree axes, is discarded. This is useful
909
- // to skip watermarks drawn on top of body text, etc.
910
- bool doHTML; // extra processing for HTML conversion
911
- bool ok; // set up ok?
912
- bool textPageBreaks; // insert end-of-page markers?
913
- EndOfLineKind textEOL; // type of EOL marker to use
914
-
915
- ActualText *actualText;
916
- };
917
-
918
- #endif