pdf2json 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (473) hide show
  1. data/README.markdown +9 -0
  2. data/bin/.gitkeep +0 -0
  3. data/ext/extconf.rb +30 -0
  4. data/lib/pdf2json.rb +8 -0
  5. data/pdf2json-0.52-source/AUTHORS +24 -0
  6. data/pdf2json-0.52-source/CHANGES +11 -0
  7. data/pdf2json-0.52-source/Makefile +84 -0
  8. data/pdf2json-0.52-source/Makefile.in +84 -0
  9. data/pdf2json-0.52-source/aclocal.m4 +274 -0
  10. data/pdf2json-0.52-source/aconf-win32.h +86 -0
  11. data/pdf2json-0.52-source/aconf.h +42 -0
  12. data/pdf2json-0.52-source/aconf.h.in +41 -0
  13. data/pdf2json-0.52-source/autom4te.cache/output.0 +6908 -0
  14. data/pdf2json-0.52-source/autom4te.cache/requests +76 -0
  15. data/pdf2json-0.52-source/autom4te.cache/traces.0 +466 -0
  16. data/pdf2json-0.52-source/config.log +1259 -0
  17. data/pdf2json-0.52-source/config.status +1050 -0
  18. data/pdf2json-0.52-source/configure +6908 -0
  19. data/pdf2json-0.52-source/configure.ac +93 -0
  20. data/pdf2json-0.52-source/doc/pdffonts.1 +130 -0
  21. data/pdf2json-0.52-source/doc/pdffonts.cat +107 -0
  22. data/pdf2json-0.52-source/doc/pdffonts.hlp +117 -0
  23. data/pdf2json-0.52-source/doc/pdfimages.1 +102 -0
  24. data/pdf2json-0.52-source/doc/pdfimages.cat +92 -0
  25. data/pdf2json-0.52-source/doc/pdfimages.hlp +101 -0
  26. data/pdf2json-0.52-source/doc/pdfinfo.1 +158 -0
  27. data/pdf2json-0.52-source/doc/pdfinfo.cat +119 -0
  28. data/pdf2json-0.52-source/doc/pdfinfo.hlp +129 -0
  29. data/pdf2json-0.52-source/doc/pdftoppm.1 +115 -0
  30. data/pdf2json-0.52-source/doc/pdftoppm.cat +105 -0
  31. data/pdf2json-0.52-source/doc/pdftoppm.hlp +114 -0
  32. data/pdf2json-0.52-source/doc/pdftops.1 +229 -0
  33. data/pdf2json-0.52-source/doc/pdftops.cat +221 -0
  34. data/pdf2json-0.52-source/doc/pdftops.hlp +231 -0
  35. data/pdf2json-0.52-source/doc/pdftotext.1 +137 -0
  36. data/pdf2json-0.52-source/doc/pdftotext.cat +120 -0
  37. data/pdf2json-0.52-source/doc/pdftotext.hlp +133 -0
  38. data/pdf2json-0.52-source/doc/sample-xpdfrc +91 -0
  39. data/pdf2json-0.52-source/doc/xpdf.1 +513 -0
  40. data/pdf2json-0.52-source/doc/xpdf.cat +476 -0
  41. data/pdf2json-0.52-source/doc/xpdf.hlp +489 -0
  42. data/pdf2json-0.52-source/doc/xpdfrc.5 +480 -0
  43. data/pdf2json-0.52-source/doc/xpdfrc.cat +474 -0
  44. data/pdf2json-0.52-source/doc/xpdfrc.hlp +479 -0
  45. data/pdf2json-0.52-source/fofi/.DS_Store +0 -0
  46. data/pdf2json-0.52-source/fofi/FoFiBase.cc +156 -0
  47. data/pdf2json-0.52-source/fofi/FoFiBase.h +57 -0
  48. data/pdf2json-0.52-source/fofi/FoFiBase.o +0 -0
  49. data/pdf2json-0.52-source/fofi/FoFiEncodings.cc +994 -0
  50. data/pdf2json-0.52-source/fofi/FoFiEncodings.h +36 -0
  51. data/pdf2json-0.52-source/fofi/FoFiEncodings.o +0 -0
  52. data/pdf2json-0.52-source/fofi/FoFiTrueType.cc +2027 -0
  53. data/pdf2json-0.52-source/fofi/FoFiTrueType.h +174 -0
  54. data/pdf2json-0.52-source/fofi/FoFiTrueType.o +0 -0
  55. data/pdf2json-0.52-source/fofi/FoFiType1.cc +252 -0
  56. data/pdf2json-0.52-source/fofi/FoFiType1.h +59 -0
  57. data/pdf2json-0.52-source/fofi/FoFiType1.o +0 -0
  58. data/pdf2json-0.52-source/fofi/FoFiType1C.cc +2603 -0
  59. data/pdf2json-0.52-source/fofi/FoFiType1C.h +233 -0
  60. data/pdf2json-0.52-source/fofi/FoFiType1C.o +0 -0
  61. data/pdf2json-0.52-source/fofi/Makefile +70 -0
  62. data/pdf2json-0.52-source/fofi/Makefile.dep +0 -0
  63. data/pdf2json-0.52-source/fofi/Makefile.in +70 -0
  64. data/pdf2json-0.52-source/fofi/libfofi.a +0 -0
  65. data/pdf2json-0.52-source/fofi/vms_make.com +0 -0
  66. data/pdf2json-0.52-source/freetype.win32/.DS_Store +0 -0
  67. data/pdf2json-0.52-source/freetype.win32/include/.DS_Store +0 -0
  68. data/pdf2json-0.52-source/freetype.win32/include/freetype/config/ftconfig.h +528 -0
  69. data/pdf2json-0.52-source/freetype.win32/include/freetype/config/ftheader.h +780 -0
  70. data/pdf2json-0.52-source/freetype.win32/include/freetype/config/ftmodule.h +32 -0
  71. data/pdf2json-0.52-source/freetype.win32/include/freetype/config/ftoption.h +733 -0
  72. data/pdf2json-0.52-source/freetype.win32/include/freetype/config/ftstdlib.h +173 -0
  73. data/pdf2json-0.52-source/freetype.win32/include/freetype/freetype.h +3919 -0
  74. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftadvanc.h +179 -0
  75. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftbbox.h +94 -0
  76. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftbdf.h +209 -0
  77. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftbitmap.h +227 -0
  78. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftcache.h +1128 -0
  79. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftchapters.h +103 -0
  80. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftcid.h +166 -0
  81. data/pdf2json-0.52-source/freetype.win32/include/freetype/fterrdef.h +244 -0
  82. data/pdf2json-0.52-source/freetype.win32/include/freetype/fterrors.h +206 -0
  83. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftgasp.h +120 -0
  84. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftglyph.h +613 -0
  85. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftgxval.h +358 -0
  86. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftgzip.h +102 -0
  87. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftimage.h +1313 -0
  88. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftincrem.h +353 -0
  89. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftlcdfil.h +213 -0
  90. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftlist.h +277 -0
  91. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftlzw.h +99 -0
  92. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftmac.h +274 -0
  93. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftmm.h +378 -0
  94. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftmodapi.h +483 -0
  95. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftmoderr.h +155 -0
  96. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftotval.h +203 -0
  97. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftoutln.h +537 -0
  98. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftpfr.h +172 -0
  99. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftrender.h +230 -0
  100. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftsizes.h +159 -0
  101. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftsnames.h +200 -0
  102. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftstroke.h +716 -0
  103. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftsynth.h +80 -0
  104. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftsystem.h +347 -0
  105. data/pdf2json-0.52-source/freetype.win32/include/freetype/fttrigon.h +350 -0
  106. data/pdf2json-0.52-source/freetype.win32/include/freetype/fttypes.h +588 -0
  107. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftwinfnt.h +274 -0
  108. data/pdf2json-0.52-source/freetype.win32/include/freetype/ftxf86.h +83 -0
  109. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/autohint.h +231 -0
  110. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftcalc.h +179 -0
  111. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftdebug.h +250 -0
  112. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftdriver.h +422 -0
  113. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftgloadr.h +168 -0
  114. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftmemory.h +380 -0
  115. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftobjs.h +1428 -0
  116. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftpic.h +67 -0
  117. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftrfork.h +196 -0
  118. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftserv.h +620 -0
  119. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftstream.h +539 -0
  120. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/fttrace.h +139 -0
  121. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftvalid.h +150 -0
  122. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/internal.h +51 -0
  123. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/pcftypes.h +56 -0
  124. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/psaux.h +873 -0
  125. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/pshints.h +712 -0
  126. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svbdf.h +77 -0
  127. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svcid.h +83 -0
  128. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svgldict.h +82 -0
  129. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svgxval.h +72 -0
  130. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svkern.h +51 -0
  131. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svmm.h +104 -0
  132. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svotval.h +55 -0
  133. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svpfr.h +66 -0
  134. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svpostnm.h +79 -0
  135. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svpscmap.h +164 -0
  136. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svpsinfo.h +92 -0
  137. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svsfnt.h +102 -0
  138. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svttcmap.h +106 -0
  139. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svtteng.h +53 -0
  140. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svttglyf.h +67 -0
  141. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svwinfnt.h +50 -0
  142. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svxf86nm.h +55 -0
  143. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/sfnt.h +897 -0
  144. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/t1types.h +270 -0
  145. data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/tttypes.h +1543 -0
  146. data/pdf2json-0.52-source/freetype.win32/include/freetype/t1tables.h +504 -0
  147. data/pdf2json-0.52-source/freetype.win32/include/freetype/ttnameid.h +1247 -0
  148. data/pdf2json-0.52-source/freetype.win32/include/freetype/tttables.h +759 -0
  149. data/pdf2json-0.52-source/freetype.win32/include/freetype/tttags.h +107 -0
  150. data/pdf2json-0.52-source/freetype.win32/include/freetype/ttunpat.h +59 -0
  151. data/pdf2json-0.52-source/freetype.win32/include/ft2build.h +39 -0
  152. data/pdf2json-0.52-source/freetype.win32/lib/freetype_a.lib +0 -0
  153. data/pdf2json-0.52-source/goo/.DS_Store +0 -0
  154. data/pdf2json-0.52-source/goo/FixedPoint.cc +118 -0
  155. data/pdf2json-0.52-source/goo/FixedPoint.h +155 -0
  156. data/pdf2json-0.52-source/goo/FixedPoint.o +0 -0
  157. data/pdf2json-0.52-source/goo/GHash.cc +380 -0
  158. data/pdf2json-0.52-source/goo/GHash.h +78 -0
  159. data/pdf2json-0.52-source/goo/GHash.o +0 -0
  160. data/pdf2json-0.52-source/goo/GList.cc +97 -0
  161. data/pdf2json-0.52-source/goo/GList.h +96 -0
  162. data/pdf2json-0.52-source/goo/GList.o +0 -0
  163. data/pdf2json-0.52-source/goo/GMutex.h +49 -0
  164. data/pdf2json-0.52-source/goo/GString.cc +724 -0
  165. data/pdf2json-0.52-source/goo/GString.cc.fixed +718 -0
  166. data/pdf2json-0.52-source/goo/GString.h +136 -0
  167. data/pdf2json-0.52-source/goo/GString.o +0 -0
  168. data/pdf2json-0.52-source/goo/ImgWriter.o +0 -0
  169. data/pdf2json-0.52-source/goo/JpegWriter.o +0 -0
  170. data/pdf2json-0.52-source/goo/Makefile +72 -0
  171. data/pdf2json-0.52-source/goo/Makefile.dep +0 -0
  172. data/pdf2json-0.52-source/goo/Makefile.in +72 -0
  173. data/pdf2json-0.52-source/goo/PNGWriter.o +0 -0
  174. data/pdf2json-0.52-source/goo/gfile.cc +731 -0
  175. data/pdf2json-0.52-source/goo/gfile.h +138 -0
  176. data/pdf2json-0.52-source/goo/gfile.o +0 -0
  177. data/pdf2json-0.52-source/goo/gmem.cc +264 -0
  178. data/pdf2json-0.52-source/goo/gmem.h +79 -0
  179. data/pdf2json-0.52-source/goo/gmem.o +0 -0
  180. data/pdf2json-0.52-source/goo/gmempp.cc +32 -0
  181. data/pdf2json-0.52-source/goo/gmempp.o +0 -0
  182. data/pdf2json-0.52-source/goo/gtypes.h +29 -0
  183. data/pdf2json-0.52-source/goo/libGoo.a +0 -0
  184. data/pdf2json-0.52-source/goo/parseargs.c +190 -0
  185. data/pdf2json-0.52-source/goo/parseargs.h +71 -0
  186. data/pdf2json-0.52-source/goo/parseargs.o +0 -0
  187. data/pdf2json-0.52-source/goo/vms_directory.c +214 -0
  188. data/pdf2json-0.52-source/goo/vms_dirent.h +67 -0
  189. data/pdf2json-0.52-source/goo/vms_make.com +82 -0
  190. data/pdf2json-0.52-source/goo/vms_sys_dirent.h +54 -0
  191. data/pdf2json-0.52-source/goo/vms_unix_time.h +102 -0
  192. data/pdf2json-0.52-source/goo/vms_unix_times.c +42 -0
  193. data/pdf2json-0.52-source/goo/vms_unlink.c +22 -0
  194. data/pdf2json-0.52-source/ms_make.bat +199 -0
  195. data/pdf2json-0.52-source/splash/.DS_Store +0 -0
  196. data/pdf2json-0.52-source/splash/Makefile +103 -0
  197. data/pdf2json-0.52-source/splash/Makefile.dep +0 -0
  198. data/pdf2json-0.52-source/splash/Makefile.in +103 -0
  199. data/pdf2json-0.52-source/splash/Splash.cc +3310 -0
  200. data/pdf2json-0.52-source/splash/Splash.h +293 -0
  201. data/pdf2json-0.52-source/splash/Splash.o +0 -0
  202. data/pdf2json-0.52-source/splash/SplashBitmap.cc +188 -0
  203. data/pdf2json-0.52-source/splash/SplashBitmap.h +64 -0
  204. data/pdf2json-0.52-source/splash/SplashBitmap.o +0 -0
  205. data/pdf2json-0.52-source/splash/SplashClip.cc +382 -0
  206. data/pdf2json-0.52-source/splash/SplashClip.h +107 -0
  207. data/pdf2json-0.52-source/splash/SplashClip.o +0 -0
  208. data/pdf2json-0.52-source/splash/SplashErrorCodes.h +32 -0
  209. data/pdf2json-0.52-source/splash/SplashFTFont.cc +357 -0
  210. data/pdf2json-0.52-source/splash/SplashFTFont.h +58 -0
  211. data/pdf2json-0.52-source/splash/SplashFTFont.o +0 -0
  212. data/pdf2json-0.52-source/splash/SplashFTFontEngine.cc +179 -0
  213. data/pdf2json-0.52-source/splash/SplashFTFontEngine.h +65 -0
  214. data/pdf2json-0.52-source/splash/SplashFTFontEngine.o +0 -0
  215. data/pdf2json-0.52-source/splash/SplashFTFontFile.cc +114 -0
  216. data/pdf2json-0.52-source/splash/SplashFTFontFile.h +73 -0
  217. data/pdf2json-0.52-source/splash/SplashFTFontFile.o +0 -0
  218. data/pdf2json-0.52-source/splash/SplashFont.cc +176 -0
  219. data/pdf2json-0.52-source/splash/SplashFont.h +104 -0
  220. data/pdf2json-0.52-source/splash/SplashFont.o +0 -0
  221. data/pdf2json-0.52-source/splash/SplashFontEngine.cc +317 -0
  222. data/pdf2json-0.52-source/splash/SplashFontEngine.h +91 -0
  223. data/pdf2json-0.52-source/splash/SplashFontEngine.o +0 -0
  224. data/pdf2json-0.52-source/splash/SplashFontFile.cc +55 -0
  225. data/pdf2json-0.52-source/splash/SplashFontFile.h +60 -0
  226. data/pdf2json-0.52-source/splash/SplashFontFile.o +0 -0
  227. data/pdf2json-0.52-source/splash/SplashFontFileID.cc +23 -0
  228. data/pdf2json-0.52-source/splash/SplashFontFileID.h +30 -0
  229. data/pdf2json-0.52-source/splash/SplashFontFileID.o +0 -0
  230. data/pdf2json-0.52-source/splash/SplashGlyphBitmap.h +26 -0
  231. data/pdf2json-0.52-source/splash/SplashMath.h +89 -0
  232. data/pdf2json-0.52-source/splash/SplashPath.cc +184 -0
  233. data/pdf2json-0.52-source/splash/SplashPath.h +121 -0
  234. data/pdf2json-0.52-source/splash/SplashPath.o +0 -0
  235. data/pdf2json-0.52-source/splash/SplashPattern.cc +40 -0
  236. data/pdf2json-0.52-source/splash/SplashPattern.h +65 -0
  237. data/pdf2json-0.52-source/splash/SplashPattern.o +0 -0
  238. data/pdf2json-0.52-source/splash/SplashScreen.cc +383 -0
  239. data/pdf2json-0.52-source/splash/SplashScreen.h +56 -0
  240. data/pdf2json-0.52-source/splash/SplashScreen.o +0 -0
  241. data/pdf2json-0.52-source/splash/SplashState.cc +165 -0
  242. data/pdf2json-0.52-source/splash/SplashState.h +103 -0
  243. data/pdf2json-0.52-source/splash/SplashState.o +0 -0
  244. data/pdf2json-0.52-source/splash/SplashT1Font.cc +287 -0
  245. data/pdf2json-0.52-source/splash/SplashT1Font.h +57 -0
  246. data/pdf2json-0.52-source/splash/SplashT1Font.o +0 -0
  247. data/pdf2json-0.52-source/splash/SplashT1FontEngine.cc +124 -0
  248. data/pdf2json-0.52-source/splash/SplashT1FontEngine.h +53 -0
  249. data/pdf2json-0.52-source/splash/SplashT1FontEngine.o +0 -0
  250. data/pdf2json-0.52-source/splash/SplashT1FontFile.cc +97 -0
  251. data/pdf2json-0.52-source/splash/SplashT1FontFile.h +58 -0
  252. data/pdf2json-0.52-source/splash/SplashT1FontFile.o +0 -0
  253. data/pdf2json-0.52-source/splash/SplashTypes.h +132 -0
  254. data/pdf2json-0.52-source/splash/SplashXPath.cc +438 -0
  255. data/pdf2json-0.52-source/splash/SplashXPath.h +100 -0
  256. data/pdf2json-0.52-source/splash/SplashXPath.o +0 -0
  257. data/pdf2json-0.52-source/splash/SplashXPathScanner.cc +428 -0
  258. data/pdf2json-0.52-source/splash/SplashXPathScanner.h +87 -0
  259. data/pdf2json-0.52-source/splash/SplashXPathScanner.o +0 -0
  260. data/pdf2json-0.52-source/splash/libsplash.a +0 -0
  261. data/pdf2json-0.52-source/splash/vms_make.com +0 -0
  262. data/pdf2json-0.52-source/src/.DS_Store +0 -0
  263. data/pdf2json-0.52-source/src/GVector.h +101 -0
  264. data/pdf2json-0.52-source/src/ImgOutputDev.cc +1243 -0
  265. data/pdf2json-0.52-source/src/ImgOutputDev.h +307 -0
  266. data/pdf2json-0.52-source/src/ImgOutputDev.o +0 -0
  267. data/pdf2json-0.52-source/src/Makefile +68 -0
  268. data/pdf2json-0.52-source/src/Makefile.in +68 -0
  269. data/pdf2json-0.52-source/src/XmlFonts.cc +367 -0
  270. data/pdf2json-0.52-source/src/XmlFonts.h +91 -0
  271. data/pdf2json-0.52-source/src/XmlFonts.o +0 -0
  272. data/pdf2json-0.52-source/src/XmlLinks.cc +101 -0
  273. data/pdf2json-0.52-source/src/XmlLinks.h +54 -0
  274. data/pdf2json-0.52-source/src/XmlLinks.o +0 -0
  275. data/pdf2json-0.52-source/src/pdf2json +0 -0
  276. data/pdf2json-0.52-source/src/pdf2json.cc +343 -0
  277. data/pdf2json-0.52-source/src/pdf2json.o +0 -0
  278. data/pdf2json-0.52-source/src/pdf2xml.dtd +22 -0
  279. data/pdf2json-0.52-source/src/pdf2xmljson.dtd +9 -0
  280. data/pdf2json-0.52-source/xpdf/.DS_Store +0 -0
  281. data/pdf2json-0.52-source/xpdf/Annot.cc +1556 -0
  282. data/pdf2json-0.52-source/xpdf/Annot.h +142 -0
  283. data/pdf2json-0.52-source/xpdf/Annot.o +0 -0
  284. data/pdf2json-0.52-source/xpdf/Array.cc +73 -0
  285. data/pdf2json-0.52-source/xpdf/Array.h +58 -0
  286. data/pdf2json-0.52-source/xpdf/Array.o +0 -0
  287. data/pdf2json-0.52-source/xpdf/BuiltinFont.cc +65 -0
  288. data/pdf2json-0.52-source/xpdf/BuiltinFont.h +57 -0
  289. data/pdf2json-0.52-source/xpdf/BuiltinFont.o +0 -0
  290. data/pdf2json-0.52-source/xpdf/BuiltinFontTables.cc +4284 -0
  291. data/pdf2json-0.52-source/xpdf/BuiltinFontTables.h +23 -0
  292. data/pdf2json-0.52-source/xpdf/BuiltinFontTables.o +0 -0
  293. data/pdf2json-0.52-source/xpdf/CMap.cc +408 -0
  294. data/pdf2json-0.52-source/xpdf/CMap.h +102 -0
  295. data/pdf2json-0.52-source/xpdf/CMap.o +0 -0
  296. data/pdf2json-0.52-source/xpdf/Catalog.cc +374 -0
  297. data/pdf2json-0.52-source/xpdf/Catalog.h +97 -0
  298. data/pdf2json-0.52-source/xpdf/Catalog.o +0 -0
  299. data/pdf2json-0.52-source/xpdf/CharCodeToUnicode.cc +540 -0
  300. data/pdf2json-0.52-source/xpdf/CharCodeToUnicode.h +117 -0
  301. data/pdf2json-0.52-source/xpdf/CharCodeToUnicode.o +0 -0
  302. data/pdf2json-0.52-source/xpdf/CharTypes.h +24 -0
  303. data/pdf2json-0.52-source/xpdf/CompactFontTables.h +464 -0
  304. data/pdf2json-0.52-source/xpdf/CoreOutputDev.cc +61 -0
  305. data/pdf2json-0.52-source/xpdf/CoreOutputDev.h +61 -0
  306. data/pdf2json-0.52-source/xpdf/Decrypt.cc +776 -0
  307. data/pdf2json-0.52-source/xpdf/Decrypt.h +95 -0
  308. data/pdf2json-0.52-source/xpdf/Decrypt.o +0 -0
  309. data/pdf2json-0.52-source/xpdf/Dict.cc +95 -0
  310. data/pdf2json-0.52-source/xpdf/Dict.h +77 -0
  311. data/pdf2json-0.52-source/xpdf/Dict.o +0 -0
  312. data/pdf2json-0.52-source/xpdf/Error.cc +38 -0
  313. data/pdf2json-0.52-source/xpdf/Error.h +23 -0
  314. data/pdf2json-0.52-source/xpdf/Error.o +0 -0
  315. data/pdf2json-0.52-source/xpdf/ErrorCodes.h +36 -0
  316. data/pdf2json-0.52-source/xpdf/FontEncodingTables.cc +1824 -0
  317. data/pdf2json-0.52-source/xpdf/FontEncodingTables.h +20 -0
  318. data/pdf2json-0.52-source/xpdf/FontEncodingTables.o +0 -0
  319. data/pdf2json-0.52-source/xpdf/Function.cc +1573 -0
  320. data/pdf2json-0.52-source/xpdf/Function.h +229 -0
  321. data/pdf2json-0.52-source/xpdf/Function.o +0 -0
  322. data/pdf2json-0.52-source/xpdf/Gfx.cc +4187 -0
  323. data/pdf2json-0.52-source/xpdf/Gfx.h +312 -0
  324. data/pdf2json-0.52-source/xpdf/Gfx.o +0 -0
  325. data/pdf2json-0.52-source/xpdf/GfxFont.cc +1568 -0
  326. data/pdf2json-0.52-source/xpdf/GfxFont.h +320 -0
  327. data/pdf2json-0.52-source/xpdf/GfxFont.o +0 -0
  328. data/pdf2json-0.52-source/xpdf/GfxState.cc +4137 -0
  329. data/pdf2json-0.52-source/xpdf/GfxState.h +1244 -0
  330. data/pdf2json-0.52-source/xpdf/GfxState.o +0 -0
  331. data/pdf2json-0.52-source/xpdf/GlobalParams.cc +2924 -0
  332. data/pdf2json-0.52-source/xpdf/GlobalParams.cc.old +2908 -0
  333. data/pdf2json-0.52-source/xpdf/GlobalParams.h +466 -0
  334. data/pdf2json-0.52-source/xpdf/GlobalParams.h.old +463 -0
  335. data/pdf2json-0.52-source/xpdf/GlobalParams.o +0 -0
  336. data/pdf2json-0.52-source/xpdf/ImageOutputDev.cc +195 -0
  337. data/pdf2json-0.52-source/xpdf/ImageOutputDev.h +76 -0
  338. data/pdf2json-0.52-source/xpdf/ImageOutputDev.o +0 -0
  339. data/pdf2json-0.52-source/xpdf/JArithmeticDecoder.cc +322 -0
  340. data/pdf2json-0.52-source/xpdf/JArithmeticDecoder.h +109 -0
  341. data/pdf2json-0.52-source/xpdf/JArithmeticDecoder.o +0 -0
  342. data/pdf2json-0.52-source/xpdf/JBIG2Stream.cc +3413 -0
  343. data/pdf2json-0.52-source/xpdf/JBIG2Stream.h +145 -0
  344. data/pdf2json-0.52-source/xpdf/JBIG2Stream.o +0 -0
  345. data/pdf2json-0.52-source/xpdf/JPXStream.cc +3144 -0
  346. data/pdf2json-0.52-source/xpdf/JPXStream.h +351 -0
  347. data/pdf2json-0.52-source/xpdf/JPXStream.o +0 -0
  348. data/pdf2json-0.52-source/xpdf/Lexer.cc +485 -0
  349. data/pdf2json-0.52-source/xpdf/Lexer.h +80 -0
  350. data/pdf2json-0.52-source/xpdf/Lexer.o +0 -0
  351. data/pdf2json-0.52-source/xpdf/Link.cc +806 -0
  352. data/pdf2json-0.52-source/xpdf/Link.cc.old +784 -0
  353. data/pdf2json-0.52-source/xpdf/Link.h +415 -0
  354. data/pdf2json-0.52-source/xpdf/Link.h.old +369 -0
  355. data/pdf2json-0.52-source/xpdf/Link.o +0 -0
  356. data/pdf2json-0.52-source/xpdf/Makefile +232 -0
  357. data/pdf2json-0.52-source/xpdf/Makefile.dep +0 -0
  358. data/pdf2json-0.52-source/xpdf/Makefile.in +232 -0
  359. data/pdf2json-0.52-source/xpdf/NameToCharCode.cc +116 -0
  360. data/pdf2json-0.52-source/xpdf/NameToCharCode.h +42 -0
  361. data/pdf2json-0.52-source/xpdf/NameToCharCode.o +0 -0
  362. data/pdf2json-0.52-source/xpdf/NameToUnicodeTable.h +1097 -0
  363. data/pdf2json-0.52-source/xpdf/Object.cc +231 -0
  364. data/pdf2json-0.52-source/xpdf/Object.h +303 -0
  365. data/pdf2json-0.52-source/xpdf/Object.o +0 -0
  366. data/pdf2json-0.52-source/xpdf/Outline.cc +151 -0
  367. data/pdf2json-0.52-source/xpdf/Outline.h +76 -0
  368. data/pdf2json-0.52-source/xpdf/Outline.o +0 -0
  369. data/pdf2json-0.52-source/xpdf/OutputDev.cc +131 -0
  370. data/pdf2json-0.52-source/xpdf/OutputDev.h +253 -0
  371. data/pdf2json-0.52-source/xpdf/OutputDev.o +0 -0
  372. data/pdf2json-0.52-source/xpdf/PDFCore.cc +2044 -0
  373. data/pdf2json-0.52-source/xpdf/PDFCore.h +321 -0
  374. data/pdf2json-0.52-source/xpdf/PDFDoc.cc +404 -0
  375. data/pdf2json-0.52-source/xpdf/PDFDoc.h +183 -0
  376. data/pdf2json-0.52-source/xpdf/PDFDoc.o +0 -0
  377. data/pdf2json-0.52-source/xpdf/PDFDocEncoding.cc +44 -0
  378. data/pdf2json-0.52-source/xpdf/PDFDocEncoding.h +16 -0
  379. data/pdf2json-0.52-source/xpdf/PDFDocEncoding.o +0 -0
  380. data/pdf2json-0.52-source/xpdf/PSOutputDev.cc +6224 -0
  381. data/pdf2json-0.52-source/xpdf/PSOutputDev.h +395 -0
  382. data/pdf2json-0.52-source/xpdf/PSOutputDev.o +0 -0
  383. data/pdf2json-0.52-source/xpdf/PSTokenizer.cc +135 -0
  384. data/pdf2json-0.52-source/xpdf/PSTokenizer.h +41 -0
  385. data/pdf2json-0.52-source/xpdf/PSTokenizer.o +0 -0
  386. data/pdf2json-0.52-source/xpdf/Page.cc +454 -0
  387. data/pdf2json-0.52-source/xpdf/Page.h +187 -0
  388. data/pdf2json-0.52-source/xpdf/Page.o +0 -0
  389. data/pdf2json-0.52-source/xpdf/Parser.cc +227 -0
  390. data/pdf2json-0.52-source/xpdf/Parser.h +59 -0
  391. data/pdf2json-0.52-source/xpdf/Parser.o +0 -0
  392. data/pdf2json-0.52-source/xpdf/PreScanOutputDev.cc +257 -0
  393. data/pdf2json-0.52-source/xpdf/PreScanOutputDev.h +130 -0
  394. data/pdf2json-0.52-source/xpdf/PreScanOutputDev.o +0 -0
  395. data/pdf2json-0.52-source/xpdf/SecurityHandler.cc +390 -0
  396. data/pdf2json-0.52-source/xpdf/SecurityHandler.h +160 -0
  397. data/pdf2json-0.52-source/xpdf/SecurityHandler.o +0 -0
  398. data/pdf2json-0.52-source/xpdf/SplashOutputDev.cc +2845 -0
  399. data/pdf2json-0.52-source/xpdf/SplashOutputDev.h +247 -0
  400. data/pdf2json-0.52-source/xpdf/SplashOutputDev.o +0 -0
  401. data/pdf2json-0.52-source/xpdf/Stream-CCITT.h +459 -0
  402. data/pdf2json-0.52-source/xpdf/Stream.cc +4627 -0
  403. data/pdf2json-0.52-source/xpdf/Stream.h +858 -0
  404. data/pdf2json-0.52-source/xpdf/Stream.o +0 -0
  405. data/pdf2json-0.52-source/xpdf/TextOutputDev.cc +4090 -0
  406. data/pdf2json-0.52-source/xpdf/TextOutputDev.h +661 -0
  407. data/pdf2json-0.52-source/xpdf/TextOutputDev.o +0 -0
  408. data/pdf2json-0.52-source/xpdf/UTF8.h +56 -0
  409. data/pdf2json-0.52-source/xpdf/UnicodeMap.cc +302 -0
  410. data/pdf2json-0.52-source/xpdf/UnicodeMap.cc.old +293 -0
  411. data/pdf2json-0.52-source/xpdf/UnicodeMap.h +135 -0
  412. data/pdf2json-0.52-source/xpdf/UnicodeMap.h.old +123 -0
  413. data/pdf2json-0.52-source/xpdf/UnicodeMap.o +0 -0
  414. data/pdf2json-0.52-source/xpdf/UnicodeMapTables.h +361 -0
  415. data/pdf2json-0.52-source/xpdf/UnicodeTypeTable.cc +949 -0
  416. data/pdf2json-0.52-source/xpdf/UnicodeTypeTable.h +20 -0
  417. data/pdf2json-0.52-source/xpdf/UnicodeTypeTable.o +0 -0
  418. data/pdf2json-0.52-source/xpdf/XPDFApp.cc +447 -0
  419. data/pdf2json-0.52-source/xpdf/XPDFApp.h +114 -0
  420. data/pdf2json-0.52-source/xpdf/XPDFCore.cc +1655 -0
  421. data/pdf2json-0.52-source/xpdf/XPDFCore.h +251 -0
  422. data/pdf2json-0.52-source/xpdf/XPDFTree.cc +931 -0
  423. data/pdf2json-0.52-source/xpdf/XPDFTree.h +45 -0
  424. data/pdf2json-0.52-source/xpdf/XPDFTreeP.h +87 -0
  425. data/pdf2json-0.52-source/xpdf/XPDFViewer.cc +3488 -0
  426. data/pdf2json-0.52-source/xpdf/XPDFViewer.h +352 -0
  427. data/pdf2json-0.52-source/xpdf/XRef.cc +896 -0
  428. data/pdf2json-0.52-source/xpdf/XRef.h +133 -0
  429. data/pdf2json-0.52-source/xpdf/XRef.o +0 -0
  430. data/pdf2json-0.52-source/xpdf/XpdfPluginAPI.cc +262 -0
  431. data/pdf2json-0.52-source/xpdf/XpdfPluginAPI.h +341 -0
  432. data/pdf2json-0.52-source/xpdf/XpdfPluginAPI.o +0 -0
  433. data/pdf2json-0.52-source/xpdf/about-text.h +48 -0
  434. data/pdf2json-0.52-source/xpdf/about.xbm +6 -0
  435. data/pdf2json-0.52-source/xpdf/backArrow.xbm +6 -0
  436. data/pdf2json-0.52-source/xpdf/backArrowDis.xbm +6 -0
  437. data/pdf2json-0.52-source/xpdf/config.h +112 -0
  438. data/pdf2json-0.52-source/xpdf/dblLeftArrow.xbm +6 -0
  439. data/pdf2json-0.52-source/xpdf/dblLeftArrowDis.xbm +6 -0
  440. data/pdf2json-0.52-source/xpdf/dblRightArrow.xbm +6 -0
  441. data/pdf2json-0.52-source/xpdf/dblRightArrowDis.xbm +6 -0
  442. data/pdf2json-0.52-source/xpdf/find.xbm +6 -0
  443. data/pdf2json-0.52-source/xpdf/findDis.xbm +6 -0
  444. data/pdf2json-0.52-source/xpdf/forwardArrow.xbm +6 -0
  445. data/pdf2json-0.52-source/xpdf/forwardArrowDis.xbm +6 -0
  446. data/pdf2json-0.52-source/xpdf/leftArrow.xbm +5 -0
  447. data/pdf2json-0.52-source/xpdf/leftArrowDis.xbm +5 -0
  448. data/pdf2json-0.52-source/xpdf/libXpdf.a +0 -0
  449. data/pdf2json-0.52-source/xpdf/pdffonts +0 -0
  450. data/pdf2json-0.52-source/xpdf/pdffonts.cc +298 -0
  451. data/pdf2json-0.52-source/xpdf/pdffonts.o +0 -0
  452. data/pdf2json-0.52-source/xpdf/pdfimages +0 -0
  453. data/pdf2json-0.52-source/xpdf/pdfimages.cc +155 -0
  454. data/pdf2json-0.52-source/xpdf/pdfimages.o +0 -0
  455. data/pdf2json-0.52-source/xpdf/pdfinfo +0 -0
  456. data/pdf2json-0.52-source/xpdf/pdfinfo.cc +387 -0
  457. data/pdf2json-0.52-source/xpdf/pdfinfo.o +0 -0
  458. data/pdf2json-0.52-source/xpdf/pdftoppm.cc +203 -0
  459. data/pdf2json-0.52-source/xpdf/pdftops +0 -0
  460. data/pdf2json-0.52-source/xpdf/pdftops.cc +344 -0
  461. data/pdf2json-0.52-source/xpdf/pdftops.o +0 -0
  462. data/pdf2json-0.52-source/xpdf/pdftotext +0 -0
  463. data/pdf2json-0.52-source/xpdf/pdftotext.cc +333 -0
  464. data/pdf2json-0.52-source/xpdf/pdftotext.o +0 -0
  465. data/pdf2json-0.52-source/xpdf/print.xbm +6 -0
  466. data/pdf2json-0.52-source/xpdf/printDis.xbm +6 -0
  467. data/pdf2json-0.52-source/xpdf/rightArrow.xbm +5 -0
  468. data/pdf2json-0.52-source/xpdf/rightArrowDis.xbm +5 -0
  469. data/pdf2json-0.52-source/xpdf/vms_make.com +129 -0
  470. data/pdf2json-0.52-source/xpdf/xpdf.cc +344 -0
  471. data/pdf2json-0.52-source/xpdf/xpdfIcon.xpm +62 -0
  472. data/pdf2json.gemspec +29 -0
  473. metadata +518 -0
@@ -0,0 +1,661 @@
1
+ //========================================================================
2
+ //
3
+ // TextOutputDev.h
4
+ //
5
+ // Copyright 1997-2003 Glyph & Cog, LLC
6
+ //
7
+ //========================================================================
8
+
9
+ #ifndef TEXTOUTPUTDEV_H
10
+ #define TEXTOUTPUTDEV_H
11
+
12
+ #include <aconf.h>
13
+
14
+ #ifdef USE_GCC_PRAGMAS
15
+ #pragma interface
16
+ #endif
17
+
18
+ #include <stdio.h>
19
+ #include "gtypes.h"
20
+ #include "GfxFont.h"
21
+ #include "OutputDev.h"
22
+
23
+ class GString;
24
+ class GList;
25
+ class GfxFont;
26
+ class GfxState;
27
+ class UnicodeMap;
28
+ class Link;
29
+
30
+ class TextWord;
31
+ class TextPool;
32
+ class TextLine;
33
+ class TextLineFrag;
34
+ class TextBlock;
35
+ class TextFlow;
36
+ class TextWordList;
37
+ class TextPage;
38
+
39
+ //------------------------------------------------------------------------
40
+
41
+ typedef void (*TextOutputFunc)(void *stream, char *text, int len);
42
+
43
+ //------------------------------------------------------------------------
44
+ // TextFontInfo
45
+ //------------------------------------------------------------------------
46
+
47
+ class TextFontInfo {
48
+ public:
49
+
50
+ TextFontInfo(GfxState *state);
51
+ ~TextFontInfo();
52
+
53
+ GBool matches(GfxState *state);
54
+
55
+ #if TEXTOUT_WORD_LIST
56
+ // Get the font name (which may be NULL).
57
+ GString *getFontName() { return fontName; }
58
+
59
+ // Get font descriptor flags.
60
+ GBool isFixedWidth() { return flags & fontFixedWidth; }
61
+ GBool isSerif() { return flags & fontSerif; }
62
+ GBool isSymbolic() { return flags & fontSymbolic; }
63
+ GBool isItalic() { return flags & fontItalic; }
64
+ GBool isBold() { return flags & fontBold; }
65
+ #endif
66
+
67
+ private:
68
+
69
+ GfxFont *gfxFont;
70
+ #if TEXTOUT_WORD_LIST
71
+ GString *fontName;
72
+ int flags;
73
+ #endif
74
+
75
+ friend class TextWord;
76
+ friend class TextPage;
77
+ };
78
+
79
+ //------------------------------------------------------------------------
80
+ // TextWord
81
+ //------------------------------------------------------------------------
82
+
83
+ class TextWord {
84
+ public:
85
+
86
+ // Constructor.
87
+ TextWord(GfxState *state, int rotA, double x0, double y0,
88
+ int charPosA, TextFontInfo *fontA, double fontSize);
89
+
90
+ // Destructor.
91
+ ~TextWord();
92
+
93
+ // Add a character to the word.
94
+ void addChar(GfxState *state, double x, double y,
95
+ double dx, double dy, Unicode u);
96
+
97
+ // Merge <word> onto the end of <this>.
98
+ void merge(TextWord *word);
99
+
100
+ // Compares <this> to <word>, returning -1 (<), 0 (=), or +1 (>),
101
+ // based on a primary-axis comparison, e.g., x ordering if rot=0.
102
+ int primaryCmp(TextWord *word);
103
+
104
+ // Return the distance along the primary axis between <this> and
105
+ // <word>.
106
+ double primaryDelta(TextWord *word);
107
+
108
+ static int cmpYX(const void *p1, const void *p2);
109
+
110
+ // Get the TextFontInfo object associated with this word.
111
+ TextFontInfo *getFontInfo() { return font; }
112
+
113
+ // Get the next TextWord on the linked list.
114
+ TextWord *getNext() { return next; }
115
+
116
+ #if TEXTOUT_WORD_LIST
117
+ int getLength() { return len; }
118
+ Unicode getChar(int idx) { return text[idx]; }
119
+ GString *getText();
120
+ GString *getFontName() { return font->fontName; }
121
+ void getColor(double *r, double *g, double *b)
122
+ { *r = colorR; *g = colorG; *b = colorB; }
123
+ void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA)
124
+ { *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; }
125
+ void getCharBBox(int charIdx, double *xMinA, double *yMinA,
126
+ double *xMaxA, double *yMaxA);
127
+ double getFontSize() { return fontSize; }
128
+ int getRotation() { return rot; }
129
+ int getCharPos() { return charPos; }
130
+ int getCharLen() { return charLen; }
131
+ GBool getSpaceAfter() { return spaceAfter; }
132
+ #endif
133
+
134
+ GBool isUnderlined() { return underlined; }
135
+ Link *getLink() { return link; }
136
+
137
+ private:
138
+
139
+ int rot; // rotation, multiple of 90 degrees
140
+ // (0, 1, 2, or 3)
141
+ double xMin, xMax; // bounding box x coordinates
142
+ double yMin, yMax; // bounding box y coordinates
143
+ double base; // baseline x or y coordinate
144
+ Unicode *text; // the text
145
+ double *edge; // "near" edge x or y coord of each char
146
+ // (plus one extra entry for the last char)
147
+ int len; // length of text and edge arrays
148
+ int size; // size of text and edge arrays
149
+ int charPos; // character position (within content stream)
150
+ int charLen; // number of content stream characters in
151
+ // this word
152
+ TextFontInfo *font; // font information
153
+ double fontSize; // font size
154
+ GBool spaceAfter; // set if there is a space between this
155
+ // word and the next word on the line
156
+ TextWord *next; // next word in line
157
+
158
+ #if TEXTOUT_WORD_LIST
159
+ double colorR, // word color
160
+ colorG,
161
+ colorB;
162
+ #endif
163
+
164
+ GBool underlined;
165
+ Link *link;
166
+
167
+ friend class TextPool;
168
+ friend class TextLine;
169
+ friend class TextBlock;
170
+ friend class TextFlow;
171
+ friend class TextWordList;
172
+ friend class TextPage;
173
+ };
174
+
175
+ //------------------------------------------------------------------------
176
+ // TextPool
177
+ //------------------------------------------------------------------------
178
+
179
+ class TextPool {
180
+ public:
181
+
182
+ TextPool();
183
+ ~TextPool();
184
+
185
+ TextWord *getPool(int baseIdx) { return pool[baseIdx - minBaseIdx]; }
186
+ void setPool(int baseIdx, TextWord *p) { pool[baseIdx - minBaseIdx] = p; }
187
+
188
+ int getBaseIdx(double base);
189
+
190
+ void addWord(TextWord *word);
191
+
192
+ private:
193
+
194
+ int minBaseIdx; // min baseline bucket index
195
+ int maxBaseIdx; // max baseline bucket index
196
+ TextWord **pool; // array of linked lists, one for each
197
+ // baseline value (multiple of 4 pts)
198
+ TextWord *cursor; // pointer to last-accessed word
199
+ int cursorBaseIdx; // baseline bucket index of last-accessed word
200
+
201
+ friend class TextBlock;
202
+ friend class TextPage;
203
+ };
204
+
205
+ //------------------------------------------------------------------------
206
+ // TextLine
207
+ //------------------------------------------------------------------------
208
+
209
+ class TextLine {
210
+ public:
211
+
212
+ TextLine(TextBlock *blkA, int rotA, double baseA);
213
+ ~TextLine();
214
+
215
+ void addWord(TextWord *word);
216
+
217
+ // Return the distance along the primary axis between <this> and
218
+ // <line>.
219
+ double primaryDelta(TextLine *line);
220
+
221
+ // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
222
+ // based on a primary-axis comparison, e.g., x ordering if rot=0.
223
+ int primaryCmp(TextLine *line);
224
+
225
+ // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
226
+ // based on a secondary-axis comparison of the baselines, e.g., y
227
+ // ordering if rot=0.
228
+ int secondaryCmp(TextLine *line);
229
+
230
+ int cmpYX(TextLine *line);
231
+
232
+ static int cmpXY(const void *p1, const void *p2);
233
+
234
+ void coalesce(UnicodeMap *uMap);
235
+
236
+ // Get the head of the linked list of TextWords.
237
+ TextWord *getWords() { return words; }
238
+
239
+ // Get the next TextLine on the linked list.
240
+ TextLine *getNext() { return next; }
241
+
242
+ // Returns true if the last char of the line is a hyphen.
243
+ GBool isHyphenated() { return hyphenated; }
244
+
245
+ private:
246
+
247
+ TextBlock *blk; // parent block
248
+ int rot; // text rotation
249
+ double xMin, xMax; // bounding box x coordinates
250
+ double yMin, yMax; // bounding box y coordinates
251
+ double base; // baseline x or y coordinate
252
+ TextWord *words; // words in this line
253
+ TextWord *lastWord; // last word in this line
254
+ Unicode *text; // Unicode text of the line, including
255
+ // spaces between words
256
+ double *edge; // "near" edge x or y coord of each char
257
+ // (plus one extra entry for the last char)
258
+ int *col; // starting column number of each Unicode char
259
+ int len; // number of Unicode chars
260
+ int convertedLen; // total number of converted characters
261
+ GBool hyphenated; // set if last char is a hyphen
262
+ TextLine *next; // next line in block
263
+
264
+ friend class TextLineFrag;
265
+ friend class TextBlock;
266
+ friend class TextFlow;
267
+ friend class TextWordList;
268
+ friend class TextPage;
269
+ };
270
+
271
+ //------------------------------------------------------------------------
272
+ // TextBlock
273
+ //------------------------------------------------------------------------
274
+
275
+ class TextBlock {
276
+ public:
277
+
278
+ TextBlock(TextPage *pageA, int rotA);
279
+ ~TextBlock();
280
+
281
+ void addWord(TextWord *word);
282
+
283
+ void coalesce(UnicodeMap *uMap);
284
+
285
+ // Update this block's priMin and priMax values, looking at <blk>.
286
+ void updatePriMinMax(TextBlock *blk);
287
+
288
+ static int cmpXYPrimaryRot(const void *p1, const void *p2);
289
+
290
+ static int cmpYXPrimaryRot(const void *p1, const void *p2);
291
+
292
+ int primaryCmp(TextBlock *blk);
293
+
294
+ double secondaryDelta(TextBlock *blk);
295
+
296
+ // Returns true if <this> is below <blk>, relative to the page's
297
+ // primary rotation.
298
+ GBool isBelow(TextBlock *blk);
299
+
300
+ // Get the head of the linked list of TextLines.
301
+ TextLine *getLines() { return lines; }
302
+
303
+ // Get the next TextBlock on the linked list.
304
+ TextBlock *getNext() { return next; }
305
+
306
+ private:
307
+
308
+ TextPage *page; // the parent page
309
+ int rot; // text rotation
310
+ double xMin, xMax; // bounding box x coordinates
311
+ double yMin, yMax; // bounding box y coordinates
312
+ double priMin, priMax; // whitespace bounding box along primary axis
313
+
314
+ TextPool *pool; // pool of words (used only until lines
315
+ // are built)
316
+ TextLine *lines; // linked list of lines
317
+ TextLine *curLine; // most recently added line
318
+ int nLines; // number of lines
319
+ int charCount; // number of characters in the block
320
+ int col; // starting column
321
+ int nColumns; // number of columns in the block
322
+
323
+ TextBlock *next;
324
+ TextBlock *stackNext;
325
+
326
+ friend class TextLine;
327
+ friend class TextLineFrag;
328
+ friend class TextFlow;
329
+ friend class TextWordList;
330
+ friend class TextPage;
331
+ };
332
+
333
+ //------------------------------------------------------------------------
334
+ // TextFlow
335
+ //------------------------------------------------------------------------
336
+
337
+ class TextFlow {
338
+ public:
339
+
340
+ TextFlow(TextPage *pageA, TextBlock *blk);
341
+ ~TextFlow();
342
+
343
+ // Add a block to the end of this flow.
344
+ void addBlock(TextBlock *blk);
345
+
346
+ // Returns true if <blk> fits below <prevBlk> in the flow, i.e., (1)
347
+ // it uses a font no larger than the last block added to the flow,
348
+ // and (2) it fits within the flow's [priMin, priMax] along the
349
+ // primary axis.
350
+ GBool blockFits(TextBlock *blk, TextBlock *prevBlk);
351
+
352
+ // Get the head of the linked list of TextBlocks.
353
+ TextBlock *getBlocks() { return blocks; }
354
+
355
+ // Get the next TextFlow on the linked list.
356
+ TextFlow *getNext() { return next; }
357
+
358
+ private:
359
+
360
+ TextPage *page; // the parent page
361
+ double xMin, xMax; // bounding box x coordinates
362
+ double yMin, yMax; // bounding box y coordinates
363
+ double priMin, priMax; // whitespace bounding box along primary axis
364
+ TextBlock *blocks; // blocks in flow
365
+ TextBlock *lastBlk; // last block in this flow
366
+ TextFlow *next;
367
+
368
+ friend class TextWordList;
369
+ friend class TextPage;
370
+ };
371
+
372
+ #if TEXTOUT_WORD_LIST
373
+
374
+ //------------------------------------------------------------------------
375
+ // TextWordList
376
+ //------------------------------------------------------------------------
377
+
378
+ class TextWordList {
379
+ public:
380
+
381
+ // Build a flat word list, in content stream order (if
382
+ // text->rawOrder is true), physical layout order (if <physLayout>
383
+ // is true and text->rawOrder is false), or reading order (if both
384
+ // flags are false).
385
+ TextWordList(TextPage *text, GBool physLayout);
386
+
387
+ ~TextWordList();
388
+
389
+ // Return the number of words on the list.
390
+ int getLength();
391
+
392
+ // Return the <idx>th word from the list.
393
+ TextWord *get(int idx);
394
+
395
+ private:
396
+
397
+ GList *words; // [TextWord]
398
+ };
399
+
400
+ #endif // TEXTOUT_WORD_LIST
401
+
402
+ //------------------------------------------------------------------------
403
+ // TextPage
404
+ //------------------------------------------------------------------------
405
+
406
+ class TextPage {
407
+ public:
408
+
409
+ // Constructor.
410
+ TextPage(GBool rawOrderA);
411
+
412
+ // Destructor.
413
+ ~TextPage();
414
+
415
+ // Start a new page.
416
+ void startPage(GfxState *state);
417
+
418
+ // End the current page.
419
+ void endPage();
420
+
421
+ // Update the current font.
422
+ void updateFont(GfxState *state);
423
+
424
+ // Begin a new word.
425
+ void beginWord(GfxState *state, double x0, double y0);
426
+
427
+ // Add a character to the current word.
428
+ void addChar(GfxState *state, double x, double y,
429
+ double dx, double dy,
430
+ CharCode c, int nBytes, Unicode *u, int uLen);
431
+
432
+ // End the current word, sorting it into the list of words.
433
+ void endWord();
434
+
435
+ // Add a word, sorting it into the list of words.
436
+ void addWord(TextWord *word);
437
+
438
+ // Add a (potential) underline.
439
+ void addUnderline(double x0, double y0, double x1, double y1);
440
+
441
+ // Add a hyperlink.
442
+ void addLink(int xMin, int yMin, int xMax, int yMax, Link *link);
443
+
444
+ // Coalesce strings that look like parts of the same line.
445
+ void coalesce(GBool physLayout, GBool doHTML);
446
+
447
+ // Find a string. If <startAtTop> is true, starts looking at the
448
+ // top of the page; else if <startAtLast> is true, starts looking
449
+ // immediately after the last find result; else starts looking at
450
+ // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the
451
+ // bottom of the page; else if <stopAtLast> is true, stops looking
452
+ // just before the last find result; else stops looking at
453
+ // <xMax>,<yMax>.
454
+ GBool findText(Unicode *s, int len,
455
+ GBool startAtTop, GBool stopAtBottom,
456
+ GBool startAtLast, GBool stopAtLast,
457
+ GBool caseSensitive, GBool backward,
458
+ double *xMin, double *yMin,
459
+ double *xMax, double *yMax);
460
+
461
+ // Get the text which is inside the specified rectangle.
462
+ GString *getText(double xMin, double yMin,
463
+ double xMax, double yMax);
464
+
465
+ // Find a string by character position and length. If found, sets
466
+ // the text bounding rectangle and returns true; otherwise returns
467
+ // false.
468
+ GBool findCharRange(int pos, int length,
469
+ double *xMin, double *yMin,
470
+ double *xMax, double *yMax);
471
+
472
+ // Dump contents of page to a file.
473
+ void dump(void *outputStream, TextOutputFunc outputFunc,
474
+ GBool physLayout);
475
+
476
+ // Get the head of the linked list of TextFlows.
477
+ TextFlow *getFlows() { return flows; }
478
+
479
+ #if TEXTOUT_WORD_LIST
480
+ // Build a flat word list, in content stream order (if
481
+ // this->rawOrder is true), physical layout order (if <physLayout>
482
+ // is true and this->rawOrder is false), or reading order (if both
483
+ // flags are false).
484
+ TextWordList *makeWordList(GBool physLayout);
485
+ #endif
486
+
487
+ private:
488
+
489
+ void clear();
490
+ void assignColumns(TextLineFrag *frags, int nFrags, int rot);
491
+ int dumpFragment(Unicode *text, int len, UnicodeMap *uMap, GString *s);
492
+
493
+ GBool rawOrder; // keep text in content stream order
494
+
495
+ double pageWidth, pageHeight; // width and height of current page
496
+ TextWord *curWord; // currently active string
497
+ int charPos; // next character position (within content
498
+ // stream)
499
+ TextFontInfo *curFont; // current font
500
+ double curFontSize; // current font size
501
+ int nest; // current nesting level (for Type 3 fonts)
502
+ int nTinyChars; // number of "tiny" chars seen so far
503
+ GBool lastCharOverlap; // set if the last added char overlapped the
504
+ // previous char
505
+
506
+ TextPool *pools[4]; // a "pool" of TextWords for each rotation
507
+ TextFlow *flows; // linked list of flows
508
+ TextBlock **blocks; // array of blocks, in yx order
509
+ int nBlocks; // number of blocks
510
+ int primaryRot; // primary rotation
511
+ GBool primaryLR; // primary direction (true means L-to-R,
512
+ // false means R-to-L)
513
+ TextWord *rawWords; // list of words, in raw order (only if
514
+ // rawOrder is set)
515
+ TextWord *rawLastWord; // last word on rawWords list
516
+
517
+ GList *fonts; // all font info objects used on this
518
+ // page [TextFontInfo]
519
+
520
+ double lastFindXMin, // coordinates of the last "find" result
521
+ lastFindYMin;
522
+ GBool haveLastFind;
523
+
524
+ GList *underlines; // [TextUnderline]
525
+ GList *links; // [TextLink]
526
+
527
+ friend class TextLine;
528
+ friend class TextLineFrag;
529
+ friend class TextBlock;
530
+ friend class TextFlow;
531
+ friend class TextWordList;
532
+ };
533
+
534
+ //------------------------------------------------------------------------
535
+ // TextOutputDev
536
+ //------------------------------------------------------------------------
537
+
538
+ class TextOutputDev: public OutputDev {
539
+ public:
540
+
541
+ // Open a text output file. If <fileName> is NULL, no file is
542
+ // written (this is useful, e.g., for searching text). If
543
+ // <physLayoutA> is true, the original physical layout of the text
544
+ // is maintained. If <rawOrder> is true, the text is kept in
545
+ // content stream order.
546
+ TextOutputDev(char *fileName, GBool physLayoutA,
547
+ GBool rawOrderA, GBool append);
548
+
549
+ // Create a TextOutputDev which will write to a generic stream. If
550
+ // <physLayoutA> is true, the original physical layout of the text
551
+ // is maintained. If <rawOrder> is true, the text is kept in
552
+ // content stream order.
553
+ TextOutputDev(TextOutputFunc func, void *stream,
554
+ GBool physLayoutA, GBool rawOrderA);
555
+
556
+ // Destructor.
557
+ virtual ~TextOutputDev();
558
+
559
+ // Check if file was successfully created.
560
+ virtual GBool isOk() { return ok; }
561
+
562
+ //---- get info about output device
563
+
564
+ // Does this device use upside-down coordinates?
565
+ // (Upside-down means (0,0) is the top left corner of the page.)
566
+ virtual GBool upsideDown() { return gTrue; }
567
+
568
+ // Does this device use drawChar() or drawString()?
569
+ virtual GBool useDrawChar() { return gTrue; }
570
+
571
+ // Does this device use beginType3Char/endType3Char? Otherwise,
572
+ // text in Type 3 fonts will be drawn with drawChar/drawString.
573
+ virtual GBool interpretType3Chars() { return gFalse; }
574
+
575
+ // Does this device need non-text content?
576
+ virtual GBool needNonText() { return gFalse; }
577
+
578
+ //----- initialization and control
579
+
580
+ // Start a page.
581
+ virtual void startPage(int pageNum, GfxState *state);
582
+
583
+ // End a page.
584
+ virtual void endPage();
585
+
586
+ //----- update text state
587
+ virtual void updateFont(GfxState *state);
588
+
589
+ //----- text drawing
590
+ virtual void beginString(GfxState *state, GString *s);
591
+ virtual void endString(GfxState *state);
592
+ virtual void drawChar(GfxState *state, double x, double y,
593
+ double dx, double dy,
594
+ double originX, double originY,
595
+ CharCode c, int nBytes, Unicode *u, int uLen);
596
+
597
+ //----- path painting
598
+ virtual void stroke(GfxState *state);
599
+ virtual void fill(GfxState *state);
600
+ virtual void eoFill(GfxState *state);
601
+
602
+ //----- link borders
603
+ virtual void processLink(Link *link, Catalog *catalog);
604
+
605
+ //----- special access
606
+
607
+ // Find a string. If <startAtTop> is true, starts looking at the
608
+ // top of the page; else if <startAtLast> is true, starts looking
609
+ // immediately after the last find result; else starts looking at
610
+ // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the
611
+ // bottom of the page; else if <stopAtLast> is true, stops looking
612
+ // just before the last find result; else stops looking at
613
+ // <xMax>,<yMax>.
614
+ GBool findText(Unicode *s, int len,
615
+ GBool startAtTop, GBool stopAtBottom,
616
+ GBool startAtLast, GBool stopAtLast,
617
+ GBool caseSensitive, GBool backward,
618
+ double *xMin, double *yMin,
619
+ double *xMax, double *yMax);
620
+
621
+ // Get the text which is inside the specified rectangle.
622
+ GString *getText(double xMin, double yMin,
623
+ double xMax, double yMax);
624
+
625
+ // Find a string by character position and length. If found, sets
626
+ // the text bounding rectangle and returns true; otherwise returns
627
+ // false.
628
+ GBool findCharRange(int pos, int length,
629
+ double *xMin, double *yMin,
630
+ double *xMax, double *yMax);
631
+
632
+ #if TEXTOUT_WORD_LIST
633
+ // Build a flat word list, in content stream order (if
634
+ // this->rawOrder is true), physical layout order (if
635
+ // this->physLayout is true and this->rawOrder is false), or reading
636
+ // order (if both flags are false).
637
+ TextWordList *makeWordList();
638
+ #endif
639
+
640
+ // Returns the TextPage object for the last rasterized page,
641
+ // transferring ownership to the caller.
642
+ TextPage *takeText();
643
+
644
+ // Turn extra processing for HTML conversion on or off.
645
+ void enableHTMLExtras(GBool doHTMLA) { doHTML = doHTMLA; }
646
+
647
+ private:
648
+
649
+ TextOutputFunc outputFunc; // output function
650
+ void *outputStream; // output stream
651
+ GBool needClose; // need to close the output file?
652
+ // (only if outputStream is a FILE*)
653
+ TextPage *text; // text for the current page
654
+ GBool physLayout; // maintain original physical layout when
655
+ // dumping text
656
+ GBool rawOrder; // keep text in content stream order
657
+ GBool doHTML; // extra processing for HTML conversion
658
+ GBool ok; // set up ok?
659
+ };
660
+
661
+ #endif