pdf2json 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +9 -0
- data/bin/.gitkeep +0 -0
- data/ext/extconf.rb +30 -0
- data/lib/pdf2json.rb +8 -0
- data/pdf2json-0.52-source/AUTHORS +24 -0
- data/pdf2json-0.52-source/CHANGES +11 -0
- data/pdf2json-0.52-source/Makefile +84 -0
- data/pdf2json-0.52-source/Makefile.in +84 -0
- data/pdf2json-0.52-source/aclocal.m4 +274 -0
- data/pdf2json-0.52-source/aconf-win32.h +86 -0
- data/pdf2json-0.52-source/aconf.h +42 -0
- data/pdf2json-0.52-source/aconf.h.in +41 -0
- data/pdf2json-0.52-source/autom4te.cache/output.0 +6908 -0
- data/pdf2json-0.52-source/autom4te.cache/requests +76 -0
- data/pdf2json-0.52-source/autom4te.cache/traces.0 +466 -0
- data/pdf2json-0.52-source/config.log +1259 -0
- data/pdf2json-0.52-source/config.status +1050 -0
- data/pdf2json-0.52-source/configure +6908 -0
- data/pdf2json-0.52-source/configure.ac +93 -0
- data/pdf2json-0.52-source/doc/pdffonts.1 +130 -0
- data/pdf2json-0.52-source/doc/pdffonts.cat +107 -0
- data/pdf2json-0.52-source/doc/pdffonts.hlp +117 -0
- data/pdf2json-0.52-source/doc/pdfimages.1 +102 -0
- data/pdf2json-0.52-source/doc/pdfimages.cat +92 -0
- data/pdf2json-0.52-source/doc/pdfimages.hlp +101 -0
- data/pdf2json-0.52-source/doc/pdfinfo.1 +158 -0
- data/pdf2json-0.52-source/doc/pdfinfo.cat +119 -0
- data/pdf2json-0.52-source/doc/pdfinfo.hlp +129 -0
- data/pdf2json-0.52-source/doc/pdftoppm.1 +115 -0
- data/pdf2json-0.52-source/doc/pdftoppm.cat +105 -0
- data/pdf2json-0.52-source/doc/pdftoppm.hlp +114 -0
- data/pdf2json-0.52-source/doc/pdftops.1 +229 -0
- data/pdf2json-0.52-source/doc/pdftops.cat +221 -0
- data/pdf2json-0.52-source/doc/pdftops.hlp +231 -0
- data/pdf2json-0.52-source/doc/pdftotext.1 +137 -0
- data/pdf2json-0.52-source/doc/pdftotext.cat +120 -0
- data/pdf2json-0.52-source/doc/pdftotext.hlp +133 -0
- data/pdf2json-0.52-source/doc/sample-xpdfrc +91 -0
- data/pdf2json-0.52-source/doc/xpdf.1 +513 -0
- data/pdf2json-0.52-source/doc/xpdf.cat +476 -0
- data/pdf2json-0.52-source/doc/xpdf.hlp +489 -0
- data/pdf2json-0.52-source/doc/xpdfrc.5 +480 -0
- data/pdf2json-0.52-source/doc/xpdfrc.cat +474 -0
- data/pdf2json-0.52-source/doc/xpdfrc.hlp +479 -0
- data/pdf2json-0.52-source/fofi/.DS_Store +0 -0
- data/pdf2json-0.52-source/fofi/FoFiBase.cc +156 -0
- data/pdf2json-0.52-source/fofi/FoFiBase.h +57 -0
- data/pdf2json-0.52-source/fofi/FoFiBase.o +0 -0
- data/pdf2json-0.52-source/fofi/FoFiEncodings.cc +994 -0
- data/pdf2json-0.52-source/fofi/FoFiEncodings.h +36 -0
- data/pdf2json-0.52-source/fofi/FoFiEncodings.o +0 -0
- data/pdf2json-0.52-source/fofi/FoFiTrueType.cc +2027 -0
- data/pdf2json-0.52-source/fofi/FoFiTrueType.h +174 -0
- data/pdf2json-0.52-source/fofi/FoFiTrueType.o +0 -0
- data/pdf2json-0.52-source/fofi/FoFiType1.cc +252 -0
- data/pdf2json-0.52-source/fofi/FoFiType1.h +59 -0
- data/pdf2json-0.52-source/fofi/FoFiType1.o +0 -0
- data/pdf2json-0.52-source/fofi/FoFiType1C.cc +2603 -0
- data/pdf2json-0.52-source/fofi/FoFiType1C.h +233 -0
- data/pdf2json-0.52-source/fofi/FoFiType1C.o +0 -0
- data/pdf2json-0.52-source/fofi/Makefile +70 -0
- data/pdf2json-0.52-source/fofi/Makefile.dep +0 -0
- data/pdf2json-0.52-source/fofi/Makefile.in +70 -0
- data/pdf2json-0.52-source/fofi/libfofi.a +0 -0
- data/pdf2json-0.52-source/fofi/vms_make.com +0 -0
- data/pdf2json-0.52-source/freetype.win32/.DS_Store +0 -0
- data/pdf2json-0.52-source/freetype.win32/include/.DS_Store +0 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/config/ftconfig.h +528 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/config/ftheader.h +780 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/config/ftmodule.h +32 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/config/ftoption.h +733 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/config/ftstdlib.h +173 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/freetype.h +3919 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftadvanc.h +179 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftbbox.h +94 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftbdf.h +209 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftbitmap.h +227 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftcache.h +1128 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftchapters.h +103 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftcid.h +166 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/fterrdef.h +244 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/fterrors.h +206 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftgasp.h +120 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftglyph.h +613 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftgxval.h +358 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftgzip.h +102 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftimage.h +1313 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftincrem.h +353 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftlcdfil.h +213 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftlist.h +277 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftlzw.h +99 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftmac.h +274 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftmm.h +378 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftmodapi.h +483 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftmoderr.h +155 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftotval.h +203 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftoutln.h +537 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftpfr.h +172 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftrender.h +230 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftsizes.h +159 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftsnames.h +200 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftstroke.h +716 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftsynth.h +80 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftsystem.h +347 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/fttrigon.h +350 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/fttypes.h +588 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftwinfnt.h +274 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ftxf86.h +83 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/autohint.h +231 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftcalc.h +179 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftdebug.h +250 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftdriver.h +422 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftgloadr.h +168 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftmemory.h +380 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftobjs.h +1428 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftpic.h +67 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftrfork.h +196 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftserv.h +620 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftstream.h +539 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/fttrace.h +139 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/ftvalid.h +150 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/internal.h +51 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/pcftypes.h +56 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/psaux.h +873 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/pshints.h +712 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svbdf.h +77 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svcid.h +83 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svgldict.h +82 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svgxval.h +72 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svkern.h +51 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svmm.h +104 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svotval.h +55 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svpfr.h +66 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svpostnm.h +79 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svpscmap.h +164 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svpsinfo.h +92 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svsfnt.h +102 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svttcmap.h +106 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svtteng.h +53 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svttglyf.h +67 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svwinfnt.h +50 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/services/svxf86nm.h +55 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/sfnt.h +897 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/t1types.h +270 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/internal/tttypes.h +1543 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/t1tables.h +504 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ttnameid.h +1247 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/tttables.h +759 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/tttags.h +107 -0
- data/pdf2json-0.52-source/freetype.win32/include/freetype/ttunpat.h +59 -0
- data/pdf2json-0.52-source/freetype.win32/include/ft2build.h +39 -0
- data/pdf2json-0.52-source/freetype.win32/lib/freetype_a.lib +0 -0
- data/pdf2json-0.52-source/goo/.DS_Store +0 -0
- data/pdf2json-0.52-source/goo/FixedPoint.cc +118 -0
- data/pdf2json-0.52-source/goo/FixedPoint.h +155 -0
- data/pdf2json-0.52-source/goo/FixedPoint.o +0 -0
- data/pdf2json-0.52-source/goo/GHash.cc +380 -0
- data/pdf2json-0.52-source/goo/GHash.h +78 -0
- data/pdf2json-0.52-source/goo/GHash.o +0 -0
- data/pdf2json-0.52-source/goo/GList.cc +97 -0
- data/pdf2json-0.52-source/goo/GList.h +96 -0
- data/pdf2json-0.52-source/goo/GList.o +0 -0
- data/pdf2json-0.52-source/goo/GMutex.h +49 -0
- data/pdf2json-0.52-source/goo/GString.cc +724 -0
- data/pdf2json-0.52-source/goo/GString.cc.fixed +718 -0
- data/pdf2json-0.52-source/goo/GString.h +136 -0
- data/pdf2json-0.52-source/goo/GString.o +0 -0
- data/pdf2json-0.52-source/goo/ImgWriter.o +0 -0
- data/pdf2json-0.52-source/goo/JpegWriter.o +0 -0
- data/pdf2json-0.52-source/goo/Makefile +72 -0
- data/pdf2json-0.52-source/goo/Makefile.dep +0 -0
- data/pdf2json-0.52-source/goo/Makefile.in +72 -0
- data/pdf2json-0.52-source/goo/PNGWriter.o +0 -0
- data/pdf2json-0.52-source/goo/gfile.cc +731 -0
- data/pdf2json-0.52-source/goo/gfile.h +138 -0
- data/pdf2json-0.52-source/goo/gfile.o +0 -0
- data/pdf2json-0.52-source/goo/gmem.cc +264 -0
- data/pdf2json-0.52-source/goo/gmem.h +79 -0
- data/pdf2json-0.52-source/goo/gmem.o +0 -0
- data/pdf2json-0.52-source/goo/gmempp.cc +32 -0
- data/pdf2json-0.52-source/goo/gmempp.o +0 -0
- data/pdf2json-0.52-source/goo/gtypes.h +29 -0
- data/pdf2json-0.52-source/goo/libGoo.a +0 -0
- data/pdf2json-0.52-source/goo/parseargs.c +190 -0
- data/pdf2json-0.52-source/goo/parseargs.h +71 -0
- data/pdf2json-0.52-source/goo/parseargs.o +0 -0
- data/pdf2json-0.52-source/goo/vms_directory.c +214 -0
- data/pdf2json-0.52-source/goo/vms_dirent.h +67 -0
- data/pdf2json-0.52-source/goo/vms_make.com +82 -0
- data/pdf2json-0.52-source/goo/vms_sys_dirent.h +54 -0
- data/pdf2json-0.52-source/goo/vms_unix_time.h +102 -0
- data/pdf2json-0.52-source/goo/vms_unix_times.c +42 -0
- data/pdf2json-0.52-source/goo/vms_unlink.c +22 -0
- data/pdf2json-0.52-source/ms_make.bat +199 -0
- data/pdf2json-0.52-source/splash/.DS_Store +0 -0
- data/pdf2json-0.52-source/splash/Makefile +103 -0
- data/pdf2json-0.52-source/splash/Makefile.dep +0 -0
- data/pdf2json-0.52-source/splash/Makefile.in +103 -0
- data/pdf2json-0.52-source/splash/Splash.cc +3310 -0
- data/pdf2json-0.52-source/splash/Splash.h +293 -0
- data/pdf2json-0.52-source/splash/Splash.o +0 -0
- data/pdf2json-0.52-source/splash/SplashBitmap.cc +188 -0
- data/pdf2json-0.52-source/splash/SplashBitmap.h +64 -0
- data/pdf2json-0.52-source/splash/SplashBitmap.o +0 -0
- data/pdf2json-0.52-source/splash/SplashClip.cc +382 -0
- data/pdf2json-0.52-source/splash/SplashClip.h +107 -0
- data/pdf2json-0.52-source/splash/SplashClip.o +0 -0
- data/pdf2json-0.52-source/splash/SplashErrorCodes.h +32 -0
- data/pdf2json-0.52-source/splash/SplashFTFont.cc +357 -0
- data/pdf2json-0.52-source/splash/SplashFTFont.h +58 -0
- data/pdf2json-0.52-source/splash/SplashFTFont.o +0 -0
- data/pdf2json-0.52-source/splash/SplashFTFontEngine.cc +179 -0
- data/pdf2json-0.52-source/splash/SplashFTFontEngine.h +65 -0
- data/pdf2json-0.52-source/splash/SplashFTFontEngine.o +0 -0
- data/pdf2json-0.52-source/splash/SplashFTFontFile.cc +114 -0
- data/pdf2json-0.52-source/splash/SplashFTFontFile.h +73 -0
- data/pdf2json-0.52-source/splash/SplashFTFontFile.o +0 -0
- data/pdf2json-0.52-source/splash/SplashFont.cc +176 -0
- data/pdf2json-0.52-source/splash/SplashFont.h +104 -0
- data/pdf2json-0.52-source/splash/SplashFont.o +0 -0
- data/pdf2json-0.52-source/splash/SplashFontEngine.cc +317 -0
- data/pdf2json-0.52-source/splash/SplashFontEngine.h +91 -0
- data/pdf2json-0.52-source/splash/SplashFontEngine.o +0 -0
- data/pdf2json-0.52-source/splash/SplashFontFile.cc +55 -0
- data/pdf2json-0.52-source/splash/SplashFontFile.h +60 -0
- data/pdf2json-0.52-source/splash/SplashFontFile.o +0 -0
- data/pdf2json-0.52-source/splash/SplashFontFileID.cc +23 -0
- data/pdf2json-0.52-source/splash/SplashFontFileID.h +30 -0
- data/pdf2json-0.52-source/splash/SplashFontFileID.o +0 -0
- data/pdf2json-0.52-source/splash/SplashGlyphBitmap.h +26 -0
- data/pdf2json-0.52-source/splash/SplashMath.h +89 -0
- data/pdf2json-0.52-source/splash/SplashPath.cc +184 -0
- data/pdf2json-0.52-source/splash/SplashPath.h +121 -0
- data/pdf2json-0.52-source/splash/SplashPath.o +0 -0
- data/pdf2json-0.52-source/splash/SplashPattern.cc +40 -0
- data/pdf2json-0.52-source/splash/SplashPattern.h +65 -0
- data/pdf2json-0.52-source/splash/SplashPattern.o +0 -0
- data/pdf2json-0.52-source/splash/SplashScreen.cc +383 -0
- data/pdf2json-0.52-source/splash/SplashScreen.h +56 -0
- data/pdf2json-0.52-source/splash/SplashScreen.o +0 -0
- data/pdf2json-0.52-source/splash/SplashState.cc +165 -0
- data/pdf2json-0.52-source/splash/SplashState.h +103 -0
- data/pdf2json-0.52-source/splash/SplashState.o +0 -0
- data/pdf2json-0.52-source/splash/SplashT1Font.cc +287 -0
- data/pdf2json-0.52-source/splash/SplashT1Font.h +57 -0
- data/pdf2json-0.52-source/splash/SplashT1Font.o +0 -0
- data/pdf2json-0.52-source/splash/SplashT1FontEngine.cc +124 -0
- data/pdf2json-0.52-source/splash/SplashT1FontEngine.h +53 -0
- data/pdf2json-0.52-source/splash/SplashT1FontEngine.o +0 -0
- data/pdf2json-0.52-source/splash/SplashT1FontFile.cc +97 -0
- data/pdf2json-0.52-source/splash/SplashT1FontFile.h +58 -0
- data/pdf2json-0.52-source/splash/SplashT1FontFile.o +0 -0
- data/pdf2json-0.52-source/splash/SplashTypes.h +132 -0
- data/pdf2json-0.52-source/splash/SplashXPath.cc +438 -0
- data/pdf2json-0.52-source/splash/SplashXPath.h +100 -0
- data/pdf2json-0.52-source/splash/SplashXPath.o +0 -0
- data/pdf2json-0.52-source/splash/SplashXPathScanner.cc +428 -0
- data/pdf2json-0.52-source/splash/SplashXPathScanner.h +87 -0
- data/pdf2json-0.52-source/splash/SplashXPathScanner.o +0 -0
- data/pdf2json-0.52-source/splash/libsplash.a +0 -0
- data/pdf2json-0.52-source/splash/vms_make.com +0 -0
- data/pdf2json-0.52-source/src/.DS_Store +0 -0
- data/pdf2json-0.52-source/src/GVector.h +101 -0
- data/pdf2json-0.52-source/src/ImgOutputDev.cc +1243 -0
- data/pdf2json-0.52-source/src/ImgOutputDev.h +307 -0
- data/pdf2json-0.52-source/src/ImgOutputDev.o +0 -0
- data/pdf2json-0.52-source/src/Makefile +68 -0
- data/pdf2json-0.52-source/src/Makefile.in +68 -0
- data/pdf2json-0.52-source/src/XmlFonts.cc +367 -0
- data/pdf2json-0.52-source/src/XmlFonts.h +91 -0
- data/pdf2json-0.52-source/src/XmlFonts.o +0 -0
- data/pdf2json-0.52-source/src/XmlLinks.cc +101 -0
- data/pdf2json-0.52-source/src/XmlLinks.h +54 -0
- data/pdf2json-0.52-source/src/XmlLinks.o +0 -0
- data/pdf2json-0.52-source/src/pdf2json +0 -0
- data/pdf2json-0.52-source/src/pdf2json.cc +343 -0
- data/pdf2json-0.52-source/src/pdf2json.o +0 -0
- data/pdf2json-0.52-source/src/pdf2xml.dtd +22 -0
- data/pdf2json-0.52-source/src/pdf2xmljson.dtd +9 -0
- data/pdf2json-0.52-source/xpdf/.DS_Store +0 -0
- data/pdf2json-0.52-source/xpdf/Annot.cc +1556 -0
- data/pdf2json-0.52-source/xpdf/Annot.h +142 -0
- data/pdf2json-0.52-source/xpdf/Annot.o +0 -0
- data/pdf2json-0.52-source/xpdf/Array.cc +73 -0
- data/pdf2json-0.52-source/xpdf/Array.h +58 -0
- data/pdf2json-0.52-source/xpdf/Array.o +0 -0
- data/pdf2json-0.52-source/xpdf/BuiltinFont.cc +65 -0
- data/pdf2json-0.52-source/xpdf/BuiltinFont.h +57 -0
- data/pdf2json-0.52-source/xpdf/BuiltinFont.o +0 -0
- data/pdf2json-0.52-source/xpdf/BuiltinFontTables.cc +4284 -0
- data/pdf2json-0.52-source/xpdf/BuiltinFontTables.h +23 -0
- data/pdf2json-0.52-source/xpdf/BuiltinFontTables.o +0 -0
- data/pdf2json-0.52-source/xpdf/CMap.cc +408 -0
- data/pdf2json-0.52-source/xpdf/CMap.h +102 -0
- data/pdf2json-0.52-source/xpdf/CMap.o +0 -0
- data/pdf2json-0.52-source/xpdf/Catalog.cc +374 -0
- data/pdf2json-0.52-source/xpdf/Catalog.h +97 -0
- data/pdf2json-0.52-source/xpdf/Catalog.o +0 -0
- data/pdf2json-0.52-source/xpdf/CharCodeToUnicode.cc +540 -0
- data/pdf2json-0.52-source/xpdf/CharCodeToUnicode.h +117 -0
- data/pdf2json-0.52-source/xpdf/CharCodeToUnicode.o +0 -0
- data/pdf2json-0.52-source/xpdf/CharTypes.h +24 -0
- data/pdf2json-0.52-source/xpdf/CompactFontTables.h +464 -0
- data/pdf2json-0.52-source/xpdf/CoreOutputDev.cc +61 -0
- data/pdf2json-0.52-source/xpdf/CoreOutputDev.h +61 -0
- data/pdf2json-0.52-source/xpdf/Decrypt.cc +776 -0
- data/pdf2json-0.52-source/xpdf/Decrypt.h +95 -0
- data/pdf2json-0.52-source/xpdf/Decrypt.o +0 -0
- data/pdf2json-0.52-source/xpdf/Dict.cc +95 -0
- data/pdf2json-0.52-source/xpdf/Dict.h +77 -0
- data/pdf2json-0.52-source/xpdf/Dict.o +0 -0
- data/pdf2json-0.52-source/xpdf/Error.cc +38 -0
- data/pdf2json-0.52-source/xpdf/Error.h +23 -0
- data/pdf2json-0.52-source/xpdf/Error.o +0 -0
- data/pdf2json-0.52-source/xpdf/ErrorCodes.h +36 -0
- data/pdf2json-0.52-source/xpdf/FontEncodingTables.cc +1824 -0
- data/pdf2json-0.52-source/xpdf/FontEncodingTables.h +20 -0
- data/pdf2json-0.52-source/xpdf/FontEncodingTables.o +0 -0
- data/pdf2json-0.52-source/xpdf/Function.cc +1573 -0
- data/pdf2json-0.52-source/xpdf/Function.h +229 -0
- data/pdf2json-0.52-source/xpdf/Function.o +0 -0
- data/pdf2json-0.52-source/xpdf/Gfx.cc +4187 -0
- data/pdf2json-0.52-source/xpdf/Gfx.h +312 -0
- data/pdf2json-0.52-source/xpdf/Gfx.o +0 -0
- data/pdf2json-0.52-source/xpdf/GfxFont.cc +1568 -0
- data/pdf2json-0.52-source/xpdf/GfxFont.h +320 -0
- data/pdf2json-0.52-source/xpdf/GfxFont.o +0 -0
- data/pdf2json-0.52-source/xpdf/GfxState.cc +4137 -0
- data/pdf2json-0.52-source/xpdf/GfxState.h +1244 -0
- data/pdf2json-0.52-source/xpdf/GfxState.o +0 -0
- data/pdf2json-0.52-source/xpdf/GlobalParams.cc +2924 -0
- data/pdf2json-0.52-source/xpdf/GlobalParams.cc.old +2908 -0
- data/pdf2json-0.52-source/xpdf/GlobalParams.h +466 -0
- data/pdf2json-0.52-source/xpdf/GlobalParams.h.old +463 -0
- data/pdf2json-0.52-source/xpdf/GlobalParams.o +0 -0
- data/pdf2json-0.52-source/xpdf/ImageOutputDev.cc +195 -0
- data/pdf2json-0.52-source/xpdf/ImageOutputDev.h +76 -0
- data/pdf2json-0.52-source/xpdf/ImageOutputDev.o +0 -0
- data/pdf2json-0.52-source/xpdf/JArithmeticDecoder.cc +322 -0
- data/pdf2json-0.52-source/xpdf/JArithmeticDecoder.h +109 -0
- data/pdf2json-0.52-source/xpdf/JArithmeticDecoder.o +0 -0
- data/pdf2json-0.52-source/xpdf/JBIG2Stream.cc +3413 -0
- data/pdf2json-0.52-source/xpdf/JBIG2Stream.h +145 -0
- data/pdf2json-0.52-source/xpdf/JBIG2Stream.o +0 -0
- data/pdf2json-0.52-source/xpdf/JPXStream.cc +3144 -0
- data/pdf2json-0.52-source/xpdf/JPXStream.h +351 -0
- data/pdf2json-0.52-source/xpdf/JPXStream.o +0 -0
- data/pdf2json-0.52-source/xpdf/Lexer.cc +485 -0
- data/pdf2json-0.52-source/xpdf/Lexer.h +80 -0
- data/pdf2json-0.52-source/xpdf/Lexer.o +0 -0
- data/pdf2json-0.52-source/xpdf/Link.cc +806 -0
- data/pdf2json-0.52-source/xpdf/Link.cc.old +784 -0
- data/pdf2json-0.52-source/xpdf/Link.h +415 -0
- data/pdf2json-0.52-source/xpdf/Link.h.old +369 -0
- data/pdf2json-0.52-source/xpdf/Link.o +0 -0
- data/pdf2json-0.52-source/xpdf/Makefile +232 -0
- data/pdf2json-0.52-source/xpdf/Makefile.dep +0 -0
- data/pdf2json-0.52-source/xpdf/Makefile.in +232 -0
- data/pdf2json-0.52-source/xpdf/NameToCharCode.cc +116 -0
- data/pdf2json-0.52-source/xpdf/NameToCharCode.h +42 -0
- data/pdf2json-0.52-source/xpdf/NameToCharCode.o +0 -0
- data/pdf2json-0.52-source/xpdf/NameToUnicodeTable.h +1097 -0
- data/pdf2json-0.52-source/xpdf/Object.cc +231 -0
- data/pdf2json-0.52-source/xpdf/Object.h +303 -0
- data/pdf2json-0.52-source/xpdf/Object.o +0 -0
- data/pdf2json-0.52-source/xpdf/Outline.cc +151 -0
- data/pdf2json-0.52-source/xpdf/Outline.h +76 -0
- data/pdf2json-0.52-source/xpdf/Outline.o +0 -0
- data/pdf2json-0.52-source/xpdf/OutputDev.cc +131 -0
- data/pdf2json-0.52-source/xpdf/OutputDev.h +253 -0
- data/pdf2json-0.52-source/xpdf/OutputDev.o +0 -0
- data/pdf2json-0.52-source/xpdf/PDFCore.cc +2044 -0
- data/pdf2json-0.52-source/xpdf/PDFCore.h +321 -0
- data/pdf2json-0.52-source/xpdf/PDFDoc.cc +404 -0
- data/pdf2json-0.52-source/xpdf/PDFDoc.h +183 -0
- data/pdf2json-0.52-source/xpdf/PDFDoc.o +0 -0
- data/pdf2json-0.52-source/xpdf/PDFDocEncoding.cc +44 -0
- data/pdf2json-0.52-source/xpdf/PDFDocEncoding.h +16 -0
- data/pdf2json-0.52-source/xpdf/PDFDocEncoding.o +0 -0
- data/pdf2json-0.52-source/xpdf/PSOutputDev.cc +6224 -0
- data/pdf2json-0.52-source/xpdf/PSOutputDev.h +395 -0
- data/pdf2json-0.52-source/xpdf/PSOutputDev.o +0 -0
- data/pdf2json-0.52-source/xpdf/PSTokenizer.cc +135 -0
- data/pdf2json-0.52-source/xpdf/PSTokenizer.h +41 -0
- data/pdf2json-0.52-source/xpdf/PSTokenizer.o +0 -0
- data/pdf2json-0.52-source/xpdf/Page.cc +454 -0
- data/pdf2json-0.52-source/xpdf/Page.h +187 -0
- data/pdf2json-0.52-source/xpdf/Page.o +0 -0
- data/pdf2json-0.52-source/xpdf/Parser.cc +227 -0
- data/pdf2json-0.52-source/xpdf/Parser.h +59 -0
- data/pdf2json-0.52-source/xpdf/Parser.o +0 -0
- data/pdf2json-0.52-source/xpdf/PreScanOutputDev.cc +257 -0
- data/pdf2json-0.52-source/xpdf/PreScanOutputDev.h +130 -0
- data/pdf2json-0.52-source/xpdf/PreScanOutputDev.o +0 -0
- data/pdf2json-0.52-source/xpdf/SecurityHandler.cc +390 -0
- data/pdf2json-0.52-source/xpdf/SecurityHandler.h +160 -0
- data/pdf2json-0.52-source/xpdf/SecurityHandler.o +0 -0
- data/pdf2json-0.52-source/xpdf/SplashOutputDev.cc +2845 -0
- data/pdf2json-0.52-source/xpdf/SplashOutputDev.h +247 -0
- data/pdf2json-0.52-source/xpdf/SplashOutputDev.o +0 -0
- data/pdf2json-0.52-source/xpdf/Stream-CCITT.h +459 -0
- data/pdf2json-0.52-source/xpdf/Stream.cc +4627 -0
- data/pdf2json-0.52-source/xpdf/Stream.h +858 -0
- data/pdf2json-0.52-source/xpdf/Stream.o +0 -0
- data/pdf2json-0.52-source/xpdf/TextOutputDev.cc +4090 -0
- data/pdf2json-0.52-source/xpdf/TextOutputDev.h +661 -0
- data/pdf2json-0.52-source/xpdf/TextOutputDev.o +0 -0
- data/pdf2json-0.52-source/xpdf/UTF8.h +56 -0
- data/pdf2json-0.52-source/xpdf/UnicodeMap.cc +302 -0
- data/pdf2json-0.52-source/xpdf/UnicodeMap.cc.old +293 -0
- data/pdf2json-0.52-source/xpdf/UnicodeMap.h +135 -0
- data/pdf2json-0.52-source/xpdf/UnicodeMap.h.old +123 -0
- data/pdf2json-0.52-source/xpdf/UnicodeMap.o +0 -0
- data/pdf2json-0.52-source/xpdf/UnicodeMapTables.h +361 -0
- data/pdf2json-0.52-source/xpdf/UnicodeTypeTable.cc +949 -0
- data/pdf2json-0.52-source/xpdf/UnicodeTypeTable.h +20 -0
- data/pdf2json-0.52-source/xpdf/UnicodeTypeTable.o +0 -0
- data/pdf2json-0.52-source/xpdf/XPDFApp.cc +447 -0
- data/pdf2json-0.52-source/xpdf/XPDFApp.h +114 -0
- data/pdf2json-0.52-source/xpdf/XPDFCore.cc +1655 -0
- data/pdf2json-0.52-source/xpdf/XPDFCore.h +251 -0
- data/pdf2json-0.52-source/xpdf/XPDFTree.cc +931 -0
- data/pdf2json-0.52-source/xpdf/XPDFTree.h +45 -0
- data/pdf2json-0.52-source/xpdf/XPDFTreeP.h +87 -0
- data/pdf2json-0.52-source/xpdf/XPDFViewer.cc +3488 -0
- data/pdf2json-0.52-source/xpdf/XPDFViewer.h +352 -0
- data/pdf2json-0.52-source/xpdf/XRef.cc +896 -0
- data/pdf2json-0.52-source/xpdf/XRef.h +133 -0
- data/pdf2json-0.52-source/xpdf/XRef.o +0 -0
- data/pdf2json-0.52-source/xpdf/XpdfPluginAPI.cc +262 -0
- data/pdf2json-0.52-source/xpdf/XpdfPluginAPI.h +341 -0
- data/pdf2json-0.52-source/xpdf/XpdfPluginAPI.o +0 -0
- data/pdf2json-0.52-source/xpdf/about-text.h +48 -0
- data/pdf2json-0.52-source/xpdf/about.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/backArrow.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/backArrowDis.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/config.h +112 -0
- data/pdf2json-0.52-source/xpdf/dblLeftArrow.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/dblLeftArrowDis.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/dblRightArrow.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/dblRightArrowDis.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/find.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/findDis.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/forwardArrow.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/forwardArrowDis.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/leftArrow.xbm +5 -0
- data/pdf2json-0.52-source/xpdf/leftArrowDis.xbm +5 -0
- data/pdf2json-0.52-source/xpdf/libXpdf.a +0 -0
- data/pdf2json-0.52-source/xpdf/pdffonts +0 -0
- data/pdf2json-0.52-source/xpdf/pdffonts.cc +298 -0
- data/pdf2json-0.52-source/xpdf/pdffonts.o +0 -0
- data/pdf2json-0.52-source/xpdf/pdfimages +0 -0
- data/pdf2json-0.52-source/xpdf/pdfimages.cc +155 -0
- data/pdf2json-0.52-source/xpdf/pdfimages.o +0 -0
- data/pdf2json-0.52-source/xpdf/pdfinfo +0 -0
- data/pdf2json-0.52-source/xpdf/pdfinfo.cc +387 -0
- data/pdf2json-0.52-source/xpdf/pdfinfo.o +0 -0
- data/pdf2json-0.52-source/xpdf/pdftoppm.cc +203 -0
- data/pdf2json-0.52-source/xpdf/pdftops +0 -0
- data/pdf2json-0.52-source/xpdf/pdftops.cc +344 -0
- data/pdf2json-0.52-source/xpdf/pdftops.o +0 -0
- data/pdf2json-0.52-source/xpdf/pdftotext +0 -0
- data/pdf2json-0.52-source/xpdf/pdftotext.cc +333 -0
- data/pdf2json-0.52-source/xpdf/pdftotext.o +0 -0
- data/pdf2json-0.52-source/xpdf/print.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/printDis.xbm +6 -0
- data/pdf2json-0.52-source/xpdf/rightArrow.xbm +5 -0
- data/pdf2json-0.52-source/xpdf/rightArrowDis.xbm +5 -0
- data/pdf2json-0.52-source/xpdf/vms_make.com +129 -0
- data/pdf2json-0.52-source/xpdf/xpdf.cc +344 -0
- data/pdf2json-0.52-source/xpdf/xpdfIcon.xpm +62 -0
- data/pdf2json.gemspec +29 -0
- metadata +518 -0
@@ -0,0 +1,661 @@
|
|
1
|
+
//========================================================================
|
2
|
+
//
|
3
|
+
// TextOutputDev.h
|
4
|
+
//
|
5
|
+
// Copyright 1997-2003 Glyph & Cog, LLC
|
6
|
+
//
|
7
|
+
//========================================================================
|
8
|
+
|
9
|
+
#ifndef TEXTOUTPUTDEV_H
|
10
|
+
#define TEXTOUTPUTDEV_H
|
11
|
+
|
12
|
+
#include <aconf.h>
|
13
|
+
|
14
|
+
#ifdef USE_GCC_PRAGMAS
|
15
|
+
#pragma interface
|
16
|
+
#endif
|
17
|
+
|
18
|
+
#include <stdio.h>
|
19
|
+
#include "gtypes.h"
|
20
|
+
#include "GfxFont.h"
|
21
|
+
#include "OutputDev.h"
|
22
|
+
|
23
|
+
class GString;
|
24
|
+
class GList;
|
25
|
+
class GfxFont;
|
26
|
+
class GfxState;
|
27
|
+
class UnicodeMap;
|
28
|
+
class Link;
|
29
|
+
|
30
|
+
class TextWord;
|
31
|
+
class TextPool;
|
32
|
+
class TextLine;
|
33
|
+
class TextLineFrag;
|
34
|
+
class TextBlock;
|
35
|
+
class TextFlow;
|
36
|
+
class TextWordList;
|
37
|
+
class TextPage;
|
38
|
+
|
39
|
+
//------------------------------------------------------------------------
|
40
|
+
|
41
|
+
typedef void (*TextOutputFunc)(void *stream, char *text, int len);
|
42
|
+
|
43
|
+
//------------------------------------------------------------------------
|
44
|
+
// TextFontInfo
|
45
|
+
//------------------------------------------------------------------------
|
46
|
+
|
47
|
+
class TextFontInfo {
|
48
|
+
public:
|
49
|
+
|
50
|
+
TextFontInfo(GfxState *state);
|
51
|
+
~TextFontInfo();
|
52
|
+
|
53
|
+
GBool matches(GfxState *state);
|
54
|
+
|
55
|
+
#if TEXTOUT_WORD_LIST
|
56
|
+
// Get the font name (which may be NULL).
|
57
|
+
GString *getFontName() { return fontName; }
|
58
|
+
|
59
|
+
// Get font descriptor flags.
|
60
|
+
GBool isFixedWidth() { return flags & fontFixedWidth; }
|
61
|
+
GBool isSerif() { return flags & fontSerif; }
|
62
|
+
GBool isSymbolic() { return flags & fontSymbolic; }
|
63
|
+
GBool isItalic() { return flags & fontItalic; }
|
64
|
+
GBool isBold() { return flags & fontBold; }
|
65
|
+
#endif
|
66
|
+
|
67
|
+
private:
|
68
|
+
|
69
|
+
GfxFont *gfxFont;
|
70
|
+
#if TEXTOUT_WORD_LIST
|
71
|
+
GString *fontName;
|
72
|
+
int flags;
|
73
|
+
#endif
|
74
|
+
|
75
|
+
friend class TextWord;
|
76
|
+
friend class TextPage;
|
77
|
+
};
|
78
|
+
|
79
|
+
//------------------------------------------------------------------------
|
80
|
+
// TextWord
|
81
|
+
//------------------------------------------------------------------------
|
82
|
+
|
83
|
+
class TextWord {
|
84
|
+
public:
|
85
|
+
|
86
|
+
// Constructor.
|
87
|
+
TextWord(GfxState *state, int rotA, double x0, double y0,
|
88
|
+
int charPosA, TextFontInfo *fontA, double fontSize);
|
89
|
+
|
90
|
+
// Destructor.
|
91
|
+
~TextWord();
|
92
|
+
|
93
|
+
// Add a character to the word.
|
94
|
+
void addChar(GfxState *state, double x, double y,
|
95
|
+
double dx, double dy, Unicode u);
|
96
|
+
|
97
|
+
// Merge <word> onto the end of <this>.
|
98
|
+
void merge(TextWord *word);
|
99
|
+
|
100
|
+
// Compares <this> to <word>, returning -1 (<), 0 (=), or +1 (>),
|
101
|
+
// based on a primary-axis comparison, e.g., x ordering if rot=0.
|
102
|
+
int primaryCmp(TextWord *word);
|
103
|
+
|
104
|
+
// Return the distance along the primary axis between <this> and
|
105
|
+
// <word>.
|
106
|
+
double primaryDelta(TextWord *word);
|
107
|
+
|
108
|
+
static int cmpYX(const void *p1, const void *p2);
|
109
|
+
|
110
|
+
// Get the TextFontInfo object associated with this word.
|
111
|
+
TextFontInfo *getFontInfo() { return font; }
|
112
|
+
|
113
|
+
// Get the next TextWord on the linked list.
|
114
|
+
TextWord *getNext() { return next; }
|
115
|
+
|
116
|
+
#if TEXTOUT_WORD_LIST
|
117
|
+
int getLength() { return len; }
|
118
|
+
Unicode getChar(int idx) { return text[idx]; }
|
119
|
+
GString *getText();
|
120
|
+
GString *getFontName() { return font->fontName; }
|
121
|
+
void getColor(double *r, double *g, double *b)
|
122
|
+
{ *r = colorR; *g = colorG; *b = colorB; }
|
123
|
+
void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA)
|
124
|
+
{ *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; }
|
125
|
+
void getCharBBox(int charIdx, double *xMinA, double *yMinA,
|
126
|
+
double *xMaxA, double *yMaxA);
|
127
|
+
double getFontSize() { return fontSize; }
|
128
|
+
int getRotation() { return rot; }
|
129
|
+
int getCharPos() { return charPos; }
|
130
|
+
int getCharLen() { return charLen; }
|
131
|
+
GBool getSpaceAfter() { return spaceAfter; }
|
132
|
+
#endif
|
133
|
+
|
134
|
+
GBool isUnderlined() { return underlined; }
|
135
|
+
Link *getLink() { return link; }
|
136
|
+
|
137
|
+
private:
|
138
|
+
|
139
|
+
int rot; // rotation, multiple of 90 degrees
|
140
|
+
// (0, 1, 2, or 3)
|
141
|
+
double xMin, xMax; // bounding box x coordinates
|
142
|
+
double yMin, yMax; // bounding box y coordinates
|
143
|
+
double base; // baseline x or y coordinate
|
144
|
+
Unicode *text; // the text
|
145
|
+
double *edge; // "near" edge x or y coord of each char
|
146
|
+
// (plus one extra entry for the last char)
|
147
|
+
int len; // length of text and edge arrays
|
148
|
+
int size; // size of text and edge arrays
|
149
|
+
int charPos; // character position (within content stream)
|
150
|
+
int charLen; // number of content stream characters in
|
151
|
+
// this word
|
152
|
+
TextFontInfo *font; // font information
|
153
|
+
double fontSize; // font size
|
154
|
+
GBool spaceAfter; // set if there is a space between this
|
155
|
+
// word and the next word on the line
|
156
|
+
TextWord *next; // next word in line
|
157
|
+
|
158
|
+
#if TEXTOUT_WORD_LIST
|
159
|
+
double colorR, // word color
|
160
|
+
colorG,
|
161
|
+
colorB;
|
162
|
+
#endif
|
163
|
+
|
164
|
+
GBool underlined;
|
165
|
+
Link *link;
|
166
|
+
|
167
|
+
friend class TextPool;
|
168
|
+
friend class TextLine;
|
169
|
+
friend class TextBlock;
|
170
|
+
friend class TextFlow;
|
171
|
+
friend class TextWordList;
|
172
|
+
friend class TextPage;
|
173
|
+
};
|
174
|
+
|
175
|
+
//------------------------------------------------------------------------
|
176
|
+
// TextPool
|
177
|
+
//------------------------------------------------------------------------
|
178
|
+
|
179
|
+
class TextPool {
|
180
|
+
public:
|
181
|
+
|
182
|
+
TextPool();
|
183
|
+
~TextPool();
|
184
|
+
|
185
|
+
TextWord *getPool(int baseIdx) { return pool[baseIdx - minBaseIdx]; }
|
186
|
+
void setPool(int baseIdx, TextWord *p) { pool[baseIdx - minBaseIdx] = p; }
|
187
|
+
|
188
|
+
int getBaseIdx(double base);
|
189
|
+
|
190
|
+
void addWord(TextWord *word);
|
191
|
+
|
192
|
+
private:
|
193
|
+
|
194
|
+
int minBaseIdx; // min baseline bucket index
|
195
|
+
int maxBaseIdx; // max baseline bucket index
|
196
|
+
TextWord **pool; // array of linked lists, one for each
|
197
|
+
// baseline value (multiple of 4 pts)
|
198
|
+
TextWord *cursor; // pointer to last-accessed word
|
199
|
+
int cursorBaseIdx; // baseline bucket index of last-accessed word
|
200
|
+
|
201
|
+
friend class TextBlock;
|
202
|
+
friend class TextPage;
|
203
|
+
};
|
204
|
+
|
205
|
+
//------------------------------------------------------------------------
|
206
|
+
// TextLine
|
207
|
+
//------------------------------------------------------------------------
|
208
|
+
|
209
|
+
class TextLine {
|
210
|
+
public:
|
211
|
+
|
212
|
+
TextLine(TextBlock *blkA, int rotA, double baseA);
|
213
|
+
~TextLine();
|
214
|
+
|
215
|
+
void addWord(TextWord *word);
|
216
|
+
|
217
|
+
// Return the distance along the primary axis between <this> and
|
218
|
+
// <line>.
|
219
|
+
double primaryDelta(TextLine *line);
|
220
|
+
|
221
|
+
// Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
|
222
|
+
// based on a primary-axis comparison, e.g., x ordering if rot=0.
|
223
|
+
int primaryCmp(TextLine *line);
|
224
|
+
|
225
|
+
// Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
|
226
|
+
// based on a secondary-axis comparison of the baselines, e.g., y
|
227
|
+
// ordering if rot=0.
|
228
|
+
int secondaryCmp(TextLine *line);
|
229
|
+
|
230
|
+
int cmpYX(TextLine *line);
|
231
|
+
|
232
|
+
static int cmpXY(const void *p1, const void *p2);
|
233
|
+
|
234
|
+
void coalesce(UnicodeMap *uMap);
|
235
|
+
|
236
|
+
// Get the head of the linked list of TextWords.
|
237
|
+
TextWord *getWords() { return words; }
|
238
|
+
|
239
|
+
// Get the next TextLine on the linked list.
|
240
|
+
TextLine *getNext() { return next; }
|
241
|
+
|
242
|
+
// Returns true if the last char of the line is a hyphen.
|
243
|
+
GBool isHyphenated() { return hyphenated; }
|
244
|
+
|
245
|
+
private:
|
246
|
+
|
247
|
+
TextBlock *blk; // parent block
|
248
|
+
int rot; // text rotation
|
249
|
+
double xMin, xMax; // bounding box x coordinates
|
250
|
+
double yMin, yMax; // bounding box y coordinates
|
251
|
+
double base; // baseline x or y coordinate
|
252
|
+
TextWord *words; // words in this line
|
253
|
+
TextWord *lastWord; // last word in this line
|
254
|
+
Unicode *text; // Unicode text of the line, including
|
255
|
+
// spaces between words
|
256
|
+
double *edge; // "near" edge x or y coord of each char
|
257
|
+
// (plus one extra entry for the last char)
|
258
|
+
int *col; // starting column number of each Unicode char
|
259
|
+
int len; // number of Unicode chars
|
260
|
+
int convertedLen; // total number of converted characters
|
261
|
+
GBool hyphenated; // set if last char is a hyphen
|
262
|
+
TextLine *next; // next line in block
|
263
|
+
|
264
|
+
friend class TextLineFrag;
|
265
|
+
friend class TextBlock;
|
266
|
+
friend class TextFlow;
|
267
|
+
friend class TextWordList;
|
268
|
+
friend class TextPage;
|
269
|
+
};
|
270
|
+
|
271
|
+
//------------------------------------------------------------------------
|
272
|
+
// TextBlock
|
273
|
+
//------------------------------------------------------------------------
|
274
|
+
|
275
|
+
class TextBlock {
|
276
|
+
public:
|
277
|
+
|
278
|
+
TextBlock(TextPage *pageA, int rotA);
|
279
|
+
~TextBlock();
|
280
|
+
|
281
|
+
void addWord(TextWord *word);
|
282
|
+
|
283
|
+
void coalesce(UnicodeMap *uMap);
|
284
|
+
|
285
|
+
// Update this block's priMin and priMax values, looking at <blk>.
|
286
|
+
void updatePriMinMax(TextBlock *blk);
|
287
|
+
|
288
|
+
static int cmpXYPrimaryRot(const void *p1, const void *p2);
|
289
|
+
|
290
|
+
static int cmpYXPrimaryRot(const void *p1, const void *p2);
|
291
|
+
|
292
|
+
int primaryCmp(TextBlock *blk);
|
293
|
+
|
294
|
+
double secondaryDelta(TextBlock *blk);
|
295
|
+
|
296
|
+
// Returns true if <this> is below <blk>, relative to the page's
|
297
|
+
// primary rotation.
|
298
|
+
GBool isBelow(TextBlock *blk);
|
299
|
+
|
300
|
+
// Get the head of the linked list of TextLines.
|
301
|
+
TextLine *getLines() { return lines; }
|
302
|
+
|
303
|
+
// Get the next TextBlock on the linked list.
|
304
|
+
TextBlock *getNext() { return next; }
|
305
|
+
|
306
|
+
private:
|
307
|
+
|
308
|
+
TextPage *page; // the parent page
|
309
|
+
int rot; // text rotation
|
310
|
+
double xMin, xMax; // bounding box x coordinates
|
311
|
+
double yMin, yMax; // bounding box y coordinates
|
312
|
+
double priMin, priMax; // whitespace bounding box along primary axis
|
313
|
+
|
314
|
+
TextPool *pool; // pool of words (used only until lines
|
315
|
+
// are built)
|
316
|
+
TextLine *lines; // linked list of lines
|
317
|
+
TextLine *curLine; // most recently added line
|
318
|
+
int nLines; // number of lines
|
319
|
+
int charCount; // number of characters in the block
|
320
|
+
int col; // starting column
|
321
|
+
int nColumns; // number of columns in the block
|
322
|
+
|
323
|
+
TextBlock *next;
|
324
|
+
TextBlock *stackNext;
|
325
|
+
|
326
|
+
friend class TextLine;
|
327
|
+
friend class TextLineFrag;
|
328
|
+
friend class TextFlow;
|
329
|
+
friend class TextWordList;
|
330
|
+
friend class TextPage;
|
331
|
+
};
|
332
|
+
|
333
|
+
//------------------------------------------------------------------------
|
334
|
+
// TextFlow
|
335
|
+
//------------------------------------------------------------------------
|
336
|
+
|
337
|
+
class TextFlow {
|
338
|
+
public:
|
339
|
+
|
340
|
+
TextFlow(TextPage *pageA, TextBlock *blk);
|
341
|
+
~TextFlow();
|
342
|
+
|
343
|
+
// Add a block to the end of this flow.
|
344
|
+
void addBlock(TextBlock *blk);
|
345
|
+
|
346
|
+
// Returns true if <blk> fits below <prevBlk> in the flow, i.e., (1)
|
347
|
+
// it uses a font no larger than the last block added to the flow,
|
348
|
+
// and (2) it fits within the flow's [priMin, priMax] along the
|
349
|
+
// primary axis.
|
350
|
+
GBool blockFits(TextBlock *blk, TextBlock *prevBlk);
|
351
|
+
|
352
|
+
// Get the head of the linked list of TextBlocks.
|
353
|
+
TextBlock *getBlocks() { return blocks; }
|
354
|
+
|
355
|
+
// Get the next TextFlow on the linked list.
|
356
|
+
TextFlow *getNext() { return next; }
|
357
|
+
|
358
|
+
private:
|
359
|
+
|
360
|
+
TextPage *page; // the parent page
|
361
|
+
double xMin, xMax; // bounding box x coordinates
|
362
|
+
double yMin, yMax; // bounding box y coordinates
|
363
|
+
double priMin, priMax; // whitespace bounding box along primary axis
|
364
|
+
TextBlock *blocks; // blocks in flow
|
365
|
+
TextBlock *lastBlk; // last block in this flow
|
366
|
+
TextFlow *next;
|
367
|
+
|
368
|
+
friend class TextWordList;
|
369
|
+
friend class TextPage;
|
370
|
+
};
|
371
|
+
|
372
|
+
#if TEXTOUT_WORD_LIST
|
373
|
+
|
374
|
+
//------------------------------------------------------------------------
|
375
|
+
// TextWordList
|
376
|
+
//------------------------------------------------------------------------
|
377
|
+
|
378
|
+
class TextWordList {
|
379
|
+
public:
|
380
|
+
|
381
|
+
// Build a flat word list, in content stream order (if
|
382
|
+
// text->rawOrder is true), physical layout order (if <physLayout>
|
383
|
+
// is true and text->rawOrder is false), or reading order (if both
|
384
|
+
// flags are false).
|
385
|
+
TextWordList(TextPage *text, GBool physLayout);
|
386
|
+
|
387
|
+
~TextWordList();
|
388
|
+
|
389
|
+
// Return the number of words on the list.
|
390
|
+
int getLength();
|
391
|
+
|
392
|
+
// Return the <idx>th word from the list.
|
393
|
+
TextWord *get(int idx);
|
394
|
+
|
395
|
+
private:
|
396
|
+
|
397
|
+
GList *words; // [TextWord]
|
398
|
+
};
|
399
|
+
|
400
|
+
#endif // TEXTOUT_WORD_LIST
|
401
|
+
|
402
|
+
//------------------------------------------------------------------------
|
403
|
+
// TextPage
|
404
|
+
//------------------------------------------------------------------------
|
405
|
+
|
406
|
+
class TextPage {
|
407
|
+
public:
|
408
|
+
|
409
|
+
// Constructor.
|
410
|
+
TextPage(GBool rawOrderA);
|
411
|
+
|
412
|
+
// Destructor.
|
413
|
+
~TextPage();
|
414
|
+
|
415
|
+
// Start a new page.
|
416
|
+
void startPage(GfxState *state);
|
417
|
+
|
418
|
+
// End the current page.
|
419
|
+
void endPage();
|
420
|
+
|
421
|
+
// Update the current font.
|
422
|
+
void updateFont(GfxState *state);
|
423
|
+
|
424
|
+
// Begin a new word.
|
425
|
+
void beginWord(GfxState *state, double x0, double y0);
|
426
|
+
|
427
|
+
// Add a character to the current word.
|
428
|
+
void addChar(GfxState *state, double x, double y,
|
429
|
+
double dx, double dy,
|
430
|
+
CharCode c, int nBytes, Unicode *u, int uLen);
|
431
|
+
|
432
|
+
// End the current word, sorting it into the list of words.
|
433
|
+
void endWord();
|
434
|
+
|
435
|
+
// Add a word, sorting it into the list of words.
|
436
|
+
void addWord(TextWord *word);
|
437
|
+
|
438
|
+
// Add a (potential) underline.
|
439
|
+
void addUnderline(double x0, double y0, double x1, double y1);
|
440
|
+
|
441
|
+
// Add a hyperlink.
|
442
|
+
void addLink(int xMin, int yMin, int xMax, int yMax, Link *link);
|
443
|
+
|
444
|
+
// Coalesce strings that look like parts of the same line.
|
445
|
+
void coalesce(GBool physLayout, GBool doHTML);
|
446
|
+
|
447
|
+
// Find a string. If <startAtTop> is true, starts looking at the
|
448
|
+
// top of the page; else if <startAtLast> is true, starts looking
|
449
|
+
// immediately after the last find result; else starts looking at
|
450
|
+
// <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the
|
451
|
+
// bottom of the page; else if <stopAtLast> is true, stops looking
|
452
|
+
// just before the last find result; else stops looking at
|
453
|
+
// <xMax>,<yMax>.
|
454
|
+
GBool findText(Unicode *s, int len,
|
455
|
+
GBool startAtTop, GBool stopAtBottom,
|
456
|
+
GBool startAtLast, GBool stopAtLast,
|
457
|
+
GBool caseSensitive, GBool backward,
|
458
|
+
double *xMin, double *yMin,
|
459
|
+
double *xMax, double *yMax);
|
460
|
+
|
461
|
+
// Get the text which is inside the specified rectangle.
|
462
|
+
GString *getText(double xMin, double yMin,
|
463
|
+
double xMax, double yMax);
|
464
|
+
|
465
|
+
// Find a string by character position and length. If found, sets
|
466
|
+
// the text bounding rectangle and returns true; otherwise returns
|
467
|
+
// false.
|
468
|
+
GBool findCharRange(int pos, int length,
|
469
|
+
double *xMin, double *yMin,
|
470
|
+
double *xMax, double *yMax);
|
471
|
+
|
472
|
+
// Dump contents of page to a file.
|
473
|
+
void dump(void *outputStream, TextOutputFunc outputFunc,
|
474
|
+
GBool physLayout);
|
475
|
+
|
476
|
+
// Get the head of the linked list of TextFlows.
|
477
|
+
TextFlow *getFlows() { return flows; }
|
478
|
+
|
479
|
+
#if TEXTOUT_WORD_LIST
|
480
|
+
// Build a flat word list, in content stream order (if
|
481
|
+
// this->rawOrder is true), physical layout order (if <physLayout>
|
482
|
+
// is true and this->rawOrder is false), or reading order (if both
|
483
|
+
// flags are false).
|
484
|
+
TextWordList *makeWordList(GBool physLayout);
|
485
|
+
#endif
|
486
|
+
|
487
|
+
private:
|
488
|
+
|
489
|
+
void clear();
|
490
|
+
void assignColumns(TextLineFrag *frags, int nFrags, int rot);
|
491
|
+
int dumpFragment(Unicode *text, int len, UnicodeMap *uMap, GString *s);
|
492
|
+
|
493
|
+
GBool rawOrder; // keep text in content stream order
|
494
|
+
|
495
|
+
double pageWidth, pageHeight; // width and height of current page
|
496
|
+
TextWord *curWord; // currently active string
|
497
|
+
int charPos; // next character position (within content
|
498
|
+
// stream)
|
499
|
+
TextFontInfo *curFont; // current font
|
500
|
+
double curFontSize; // current font size
|
501
|
+
int nest; // current nesting level (for Type 3 fonts)
|
502
|
+
int nTinyChars; // number of "tiny" chars seen so far
|
503
|
+
GBool lastCharOverlap; // set if the last added char overlapped the
|
504
|
+
// previous char
|
505
|
+
|
506
|
+
TextPool *pools[4]; // a "pool" of TextWords for each rotation
|
507
|
+
TextFlow *flows; // linked list of flows
|
508
|
+
TextBlock **blocks; // array of blocks, in yx order
|
509
|
+
int nBlocks; // number of blocks
|
510
|
+
int primaryRot; // primary rotation
|
511
|
+
GBool primaryLR; // primary direction (true means L-to-R,
|
512
|
+
// false means R-to-L)
|
513
|
+
TextWord *rawWords; // list of words, in raw order (only if
|
514
|
+
// rawOrder is set)
|
515
|
+
TextWord *rawLastWord; // last word on rawWords list
|
516
|
+
|
517
|
+
GList *fonts; // all font info objects used on this
|
518
|
+
// page [TextFontInfo]
|
519
|
+
|
520
|
+
double lastFindXMin, // coordinates of the last "find" result
|
521
|
+
lastFindYMin;
|
522
|
+
GBool haveLastFind;
|
523
|
+
|
524
|
+
GList *underlines; // [TextUnderline]
|
525
|
+
GList *links; // [TextLink]
|
526
|
+
|
527
|
+
friend class TextLine;
|
528
|
+
friend class TextLineFrag;
|
529
|
+
friend class TextBlock;
|
530
|
+
friend class TextFlow;
|
531
|
+
friend class TextWordList;
|
532
|
+
};
|
533
|
+
|
534
|
+
//------------------------------------------------------------------------
|
535
|
+
// TextOutputDev
|
536
|
+
//------------------------------------------------------------------------
|
537
|
+
|
538
|
+
class TextOutputDev: public OutputDev {
|
539
|
+
public:
|
540
|
+
|
541
|
+
// Open a text output file. If <fileName> is NULL, no file is
|
542
|
+
// written (this is useful, e.g., for searching text). If
|
543
|
+
// <physLayoutA> is true, the original physical layout of the text
|
544
|
+
// is maintained. If <rawOrder> is true, the text is kept in
|
545
|
+
// content stream order.
|
546
|
+
TextOutputDev(char *fileName, GBool physLayoutA,
|
547
|
+
GBool rawOrderA, GBool append);
|
548
|
+
|
549
|
+
// Create a TextOutputDev which will write to a generic stream. If
|
550
|
+
// <physLayoutA> is true, the original physical layout of the text
|
551
|
+
// is maintained. If <rawOrder> is true, the text is kept in
|
552
|
+
// content stream order.
|
553
|
+
TextOutputDev(TextOutputFunc func, void *stream,
|
554
|
+
GBool physLayoutA, GBool rawOrderA);
|
555
|
+
|
556
|
+
// Destructor.
|
557
|
+
virtual ~TextOutputDev();
|
558
|
+
|
559
|
+
// Check if file was successfully created.
|
560
|
+
virtual GBool isOk() { return ok; }
|
561
|
+
|
562
|
+
//---- get info about output device
|
563
|
+
|
564
|
+
// Does this device use upside-down coordinates?
|
565
|
+
// (Upside-down means (0,0) is the top left corner of the page.)
|
566
|
+
virtual GBool upsideDown() { return gTrue; }
|
567
|
+
|
568
|
+
// Does this device use drawChar() or drawString()?
|
569
|
+
virtual GBool useDrawChar() { return gTrue; }
|
570
|
+
|
571
|
+
// Does this device use beginType3Char/endType3Char? Otherwise,
|
572
|
+
// text in Type 3 fonts will be drawn with drawChar/drawString.
|
573
|
+
virtual GBool interpretType3Chars() { return gFalse; }
|
574
|
+
|
575
|
+
// Does this device need non-text content?
|
576
|
+
virtual GBool needNonText() { return gFalse; }
|
577
|
+
|
578
|
+
//----- initialization and control
|
579
|
+
|
580
|
+
// Start a page.
|
581
|
+
virtual void startPage(int pageNum, GfxState *state);
|
582
|
+
|
583
|
+
// End a page.
|
584
|
+
virtual void endPage();
|
585
|
+
|
586
|
+
//----- update text state
|
587
|
+
virtual void updateFont(GfxState *state);
|
588
|
+
|
589
|
+
//----- text drawing
|
590
|
+
virtual void beginString(GfxState *state, GString *s);
|
591
|
+
virtual void endString(GfxState *state);
|
592
|
+
virtual void drawChar(GfxState *state, double x, double y,
|
593
|
+
double dx, double dy,
|
594
|
+
double originX, double originY,
|
595
|
+
CharCode c, int nBytes, Unicode *u, int uLen);
|
596
|
+
|
597
|
+
//----- path painting
|
598
|
+
virtual void stroke(GfxState *state);
|
599
|
+
virtual void fill(GfxState *state);
|
600
|
+
virtual void eoFill(GfxState *state);
|
601
|
+
|
602
|
+
//----- link borders
|
603
|
+
virtual void processLink(Link *link, Catalog *catalog);
|
604
|
+
|
605
|
+
//----- special access
|
606
|
+
|
607
|
+
// Find a string. If <startAtTop> is true, starts looking at the
|
608
|
+
// top of the page; else if <startAtLast> is true, starts looking
|
609
|
+
// immediately after the last find result; else starts looking at
|
610
|
+
// <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the
|
611
|
+
// bottom of the page; else if <stopAtLast> is true, stops looking
|
612
|
+
// just before the last find result; else stops looking at
|
613
|
+
// <xMax>,<yMax>.
|
614
|
+
GBool findText(Unicode *s, int len,
|
615
|
+
GBool startAtTop, GBool stopAtBottom,
|
616
|
+
GBool startAtLast, GBool stopAtLast,
|
617
|
+
GBool caseSensitive, GBool backward,
|
618
|
+
double *xMin, double *yMin,
|
619
|
+
double *xMax, double *yMax);
|
620
|
+
|
621
|
+
// Get the text which is inside the specified rectangle.
|
622
|
+
GString *getText(double xMin, double yMin,
|
623
|
+
double xMax, double yMax);
|
624
|
+
|
625
|
+
// Find a string by character position and length. If found, sets
|
626
|
+
// the text bounding rectangle and returns true; otherwise returns
|
627
|
+
// false.
|
628
|
+
GBool findCharRange(int pos, int length,
|
629
|
+
double *xMin, double *yMin,
|
630
|
+
double *xMax, double *yMax);
|
631
|
+
|
632
|
+
#if TEXTOUT_WORD_LIST
|
633
|
+
// Build a flat word list, in content stream order (if
|
634
|
+
// this->rawOrder is true), physical layout order (if
|
635
|
+
// this->physLayout is true and this->rawOrder is false), or reading
|
636
|
+
// order (if both flags are false).
|
637
|
+
TextWordList *makeWordList();
|
638
|
+
#endif
|
639
|
+
|
640
|
+
// Returns the TextPage object for the last rasterized page,
|
641
|
+
// transferring ownership to the caller.
|
642
|
+
TextPage *takeText();
|
643
|
+
|
644
|
+
// Turn extra processing for HTML conversion on or off.
|
645
|
+
void enableHTMLExtras(GBool doHTMLA) { doHTML = doHTMLA; }
|
646
|
+
|
647
|
+
private:
|
648
|
+
|
649
|
+
TextOutputFunc outputFunc; // output function
|
650
|
+
void *outputStream; // output stream
|
651
|
+
GBool needClose; // need to close the output file?
|
652
|
+
// (only if outputStream is a FILE*)
|
653
|
+
TextPage *text; // text for the current page
|
654
|
+
GBool physLayout; // maintain original physical layout when
|
655
|
+
// dumping text
|
656
|
+
GBool rawOrder; // keep text in content stream order
|
657
|
+
GBool doHTML; // extra processing for HTML conversion
|
658
|
+
GBool ok; // set up ok?
|
659
|
+
};
|
660
|
+
|
661
|
+
#endif
|