tesseract_bin 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/ext/tesseract_bin/extconf.rb +17 -0
- data/lib/tesseract_bin.rb +12 -0
- data/tesseract_bin.gemspec +660 -0
- data/test/helper.rb +18 -0
- data/test/test_tesseract_bin.rb +7 -0
- data/vendor/tesseract-2.04/AUTHORS +8 -0
- data/vendor/tesseract-2.04/COPYING +23 -0
- data/vendor/tesseract-2.04/ChangeLog +71 -0
- data/vendor/tesseract-2.04/INSTALL +229 -0
- data/vendor/tesseract-2.04/Makefile.am +20 -0
- data/vendor/tesseract-2.04/Makefile.in +641 -0
- data/vendor/tesseract-2.04/NEWS +1 -0
- data/vendor/tesseract-2.04/README +138 -0
- data/vendor/tesseract-2.04/ReleaseNotes +213 -0
- data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
- data/vendor/tesseract-2.04/StdAfx.h +24 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
- data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
- data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
- data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
- data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
- data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
- data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
- data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
- data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
- data/vendor/tesseract-2.04/ccmain/control.h +198 -0
- data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
- data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
- data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
- data/vendor/tesseract-2.04/ccmain/output.h +116 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
- data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
- data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
- data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
- data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
- data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
- data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
- data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
- data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
- data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
- data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
- data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
- data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
- data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
- data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
- data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
- data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
- data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
- data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
- data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
- data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
- data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
- data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
- data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
- data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
- data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
- data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
- data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
- data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
- data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
- data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
- data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
- data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
- data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
- data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
- data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
- data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
- data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
- data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
- data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
- data/vendor/tesseract-2.04/ccutil/host.h +180 -0
- data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
- data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
- data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
- data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
- data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
- data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
- data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
- data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
- data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
- data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
- data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
- data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
- data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
- data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
- data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
- data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
- data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
- data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
- data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
- data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
- data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
- data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
- data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
- data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
- data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
- data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
- data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
- data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
- data/vendor/tesseract-2.04/classify/baseline.h +91 -0
- data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
- data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
- data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
- data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
- data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
- data/vendor/tesseract-2.04/classify/cluster.h +158 -0
- data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
- data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
- data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
- data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
- data/vendor/tesseract-2.04/classify/extern.h +32 -0
- data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
- data/vendor/tesseract-2.04/classify/extract.h +36 -0
- data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
- data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
- data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
- data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
- data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
- data/vendor/tesseract-2.04/classify/float2int.h +65 -0
- data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
- data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
- data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
- data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
- data/vendor/tesseract-2.04/classify/fxid.h +69 -0
- data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
- data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
- data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
- data/vendor/tesseract-2.04/classify/intfx.h +63 -0
- data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
- data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
- data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
- data/vendor/tesseract-2.04/classify/intproto.h +320 -0
- data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
- data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
- data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
- data/vendor/tesseract-2.04/classify/mf.h +43 -0
- data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
- data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
- data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
- data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
- data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
- data/vendor/tesseract-2.04/classify/mfx.h +52 -0
- data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
- data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
- data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
- data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
- data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
- data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
- data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
- data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
- data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
- data/vendor/tesseract-2.04/classify/protos.h +258 -0
- data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
- data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
- data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
- data/vendor/tesseract-2.04/classify/speckle.h +69 -0
- data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
- data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
- data/vendor/tesseract-2.04/config/config.guess +1466 -0
- data/vendor/tesseract-2.04/config/config.h.in +188 -0
- data/vendor/tesseract-2.04/config/config.sub +1579 -0
- data/vendor/tesseract-2.04/config/depcomp +530 -0
- data/vendor/tesseract-2.04/config/install-sh +269 -0
- data/vendor/tesseract-2.04/config/missing +198 -0
- data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
- data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
- data/vendor/tesseract-2.04/configure +10424 -0
- data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
- data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
- data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
- data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
- data/vendor/tesseract-2.04/cutil/const.h +108 -0
- data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
- data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
- data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
- data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
- data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
- data/vendor/tesseract-2.04/cutil/debug.h +348 -0
- data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
- data/vendor/tesseract-2.04/cutil/efio.h +32 -0
- data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
- data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
- data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
- data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
- data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
- data/vendor/tesseract-2.04/cutil/general.h +33 -0
- data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
- data/vendor/tesseract-2.04/cutil/globals.h +70 -0
- data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
- data/vendor/tesseract-2.04/cutil/listio.h +43 -0
- data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
- data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
- data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
- data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
- data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
- data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
- data/vendor/tesseract-2.04/cutil/structures.h +112 -0
- data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
- data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
- data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
- data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
- data/vendor/tesseract-2.04/cutil/variables.h +170 -0
- data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
- data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
- data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
- data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
- data/vendor/tesseract-2.04/dict/choices.h +241 -0
- data/vendor/tesseract-2.04/dict/context.cpp +270 -0
- data/vendor/tesseract-2.04/dict/context.h +82 -0
- data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
- data/vendor/tesseract-2.04/dict/dawg.h +394 -0
- data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
- data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
- data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
- data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
- data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
- data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
- data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
- data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
- data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
- data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
- data/vendor/tesseract-2.04/dict/permngram.h +33 -0
- data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
- data/vendor/tesseract-2.04/dict/permnum.h +83 -0
- data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
- data/vendor/tesseract-2.04/dict/permute.h +93 -0
- data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
- data/vendor/tesseract-2.04/dict/reduce.h +112 -0
- data/vendor/tesseract-2.04/dict/states.cpp +382 -0
- data/vendor/tesseract-2.04/dict/states.h +111 -0
- data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
- data/vendor/tesseract-2.04/dict/stopper.h +103 -0
- data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
- data/vendor/tesseract-2.04/dict/trie.h +190 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
- data/vendor/tesseract-2.04/eurotext.tif +0 -0
- data/vendor/tesseract-2.04/image/Makefile.am +10 -0
- data/vendor/tesseract-2.04/image/Makefile.in +596 -0
- data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
- data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
- data/vendor/tesseract-2.04/image/img.h +336 -0
- data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
- data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
- data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
- data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
- data/vendor/tesseract-2.04/image/imgio.h +22 -0
- data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
- data/vendor/tesseract-2.04/image/imgs.h +102 -0
- data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
- data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
- data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
- data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
- data/vendor/tesseract-2.04/image/svshowim.h +25 -0
- data/vendor/tesseract-2.04/java/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
- data/vendor/tesseract-2.04/java/makefile +55 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
- data/vendor/tesseract-2.04/phototest.tif +0 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
- data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
- data/vendor/tesseract-2.04/tessdata/confsets +3 -0
- data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
- data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
- data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
- data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
- data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
- data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
- data/vendor/tesseract-2.04/tessdll.cpp +351 -0
- data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
- data/vendor/tesseract-2.04/tessdll.h +143 -0
- data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
- data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
- data/vendor/tesseract-2.04/tesseract.dsw +116 -0
- data/vendor/tesseract-2.04/tesseract.sln +59 -0
- data/vendor/tesseract-2.04/tesseract.spec +188 -0
- data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
- data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
- data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
- data/vendor/tesseract-2.04/testing/README +43 -0
- data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
- data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
- data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
- data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
- data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
- data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
- data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
- data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
- data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
- data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
- data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
- data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
- data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
- data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
- data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
- data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
- data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
- data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
- data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
- data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
- data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
- data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
- data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
- data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
- data/vendor/tesseract-2.04/textord/makerow.h +295 -0
- data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
- data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
- data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
- data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
- data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
- data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
- data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
- data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
- data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
- data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
- data/vendor/tesseract-2.04/textord/tessout.h +76 -0
- data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
- data/vendor/tesseract-2.04/textord/topitch.h +195 -0
- data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
- data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
- data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
- data/vendor/tesseract-2.04/textord/tospace.h +193 -0
- data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
- data/vendor/tesseract-2.04/textord/tovars.h +94 -0
- data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
- data/vendor/tesseract-2.04/textord/underlin.h +53 -0
- data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
- data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
- data/vendor/tesseract-2.04/training/Makefile.am +54 -0
- data/vendor/tesseract-2.04/training/Makefile.in +720 -0
- data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
- data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
- data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
- data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
- data/vendor/tesseract-2.04/training/mergenf.h +106 -0
- data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
- data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
- data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
- data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
- data/vendor/tesseract-2.04/training/name2char.h +38 -0
- data/vendor/tesseract-2.04/training/training.cpp +190 -0
- data/vendor/tesseract-2.04/training/training.h +130 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
- data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
- data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
- data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
- data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
- data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
- data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
- data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
- data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
- data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
- data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
- data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
- data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
- data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
- data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
- data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
- data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
- data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
- data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
- data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
- data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
- data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
- data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
- data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
- data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
- data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
- data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
- data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
- data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
- data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
- data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
- data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
- data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
- data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
- data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
- data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
- data/vendor/tesseract-2.04/wordrec/render.h +58 -0
- data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
- data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
- data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
- data/vendor/tesseract-2.04/wordrec/split.h +115 -0
- data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
- data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
- data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
- data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
- metadata +708 -0
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
///////////////////////////////////////////////////////////////////////
|
|
2
|
+
// File: leptonica_pageseg.cpp
|
|
3
|
+
// Description: Leptonica-based page segmenter.
|
|
4
|
+
// Author: Dan Bloomberg
|
|
5
|
+
// Created: Tue Aug 28 08:56:43 PDT 2007
|
|
6
|
+
//
|
|
7
|
+
// (C) Copyright 2007, Google Inc.
|
|
8
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
// you may not use this file except in compliance with the License.
|
|
10
|
+
// You may obtain a copy of the License at
|
|
11
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
// See the License for the specific language governing permissions and
|
|
16
|
+
// limitations under the License.
|
|
17
|
+
//
|
|
18
|
+
///////////////////////////////////////////////////////////////////////
|
|
19
|
+
|
|
20
|
+
// Include automatically generated configuration file if running autoconf.
|
|
21
|
+
#ifdef HAVE_CONFIG_H
|
|
22
|
+
#include "config_auto.h"
|
|
23
|
+
#endif
|
|
24
|
+
|
|
25
|
+
#include "leptonica_pageseg.h"
|
|
26
|
+
|
|
27
|
+
#ifdef HAVE_LIBLEPT
|
|
28
|
+
// Include leptonica library only if autoconf (or makefile etc) tell us to.
|
|
29
|
+
#include "allheaders.h"
|
|
30
|
+
#endif
|
|
31
|
+
|
|
32
|
+
#ifdef HAVE_LIBLEPT
|
|
33
|
+
// ONLY available if you have Leptonica installed.
|
|
34
|
+
|
|
35
|
+
// class LeptonicaPageSeg
|
|
36
|
+
//
|
|
37
|
+
// Region segmentation
|
|
38
|
+
// bool GetHalftoneMask()
|
|
39
|
+
// bool GetTextlineMask()
|
|
40
|
+
// bool GetTextblockMask()
|
|
41
|
+
//
|
|
42
|
+
// Top-level (for testing/debugging)
|
|
43
|
+
// bool GetAllRegions()
|
|
44
|
+
//
|
|
45
|
+
//
|
|
46
|
+
|
|
47
|
+
//------------------------------------------------------------------
|
|
48
|
+
// Region segmentation
|
|
49
|
+
//------------------------------------------------------------------
|
|
50
|
+
// GetHalftoneMask()
|
|
51
|
+
// Input: pixs (input image, assumed to be at 300 - 400 ppi)
|
|
52
|
+
// &pixht (returns halftone mask; can be NULL)
|
|
53
|
+
// &baht (returns boxa of halftone mask component b.b.s; can be NULL)
|
|
54
|
+
// &paht (returns pixa of halftone mask components; can be NULL)
|
|
55
|
+
// debugflag (set true to write out intermediate images)
|
|
56
|
+
// Return: true if ok, false on error
|
|
57
|
+
// Note: If there are no halftone regions, all requested data structures
|
|
58
|
+
// are returned as NULL. This is not an error.
|
|
59
|
+
bool LeptonicaPageSeg::GetHalftoneMask(Pix *pixs,
|
|
60
|
+
Pix **ppixht,
|
|
61
|
+
Boxa **pbaht,
|
|
62
|
+
Pixa **ppaht,
|
|
63
|
+
bool debugflag) {
|
|
64
|
+
if (!pixs) {
|
|
65
|
+
fprintf(stderr, "pixs not defined\n");
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
int32 debug = debugflag ? 1 : 0;
|
|
70
|
+
|
|
71
|
+
// 2x reduce, to 150 - 200 ppi
|
|
72
|
+
Pix *pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
|
|
73
|
+
pixDisplayWrite(pixr, debug);
|
|
74
|
+
|
|
75
|
+
// Get the halftone mask
|
|
76
|
+
Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, NULL, debug);
|
|
77
|
+
pixDestroy(&pixr);
|
|
78
|
+
if (!pixht2) {
|
|
79
|
+
if (debug)
|
|
80
|
+
printf("No halftone image parts found\n");
|
|
81
|
+
if (ppixht) *ppixht = NULL;
|
|
82
|
+
if (pbaht) *pbaht = NULL;
|
|
83
|
+
if (ppaht) *ppaht = NULL;
|
|
84
|
+
return true;
|
|
85
|
+
} else {
|
|
86
|
+
if (debug)
|
|
87
|
+
printf("Halftone image parts found\n");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
Pix *pixht = pixExpandReplicate(pixht2, 2);
|
|
91
|
+
pixDisplayWrite(pixht, debug);
|
|
92
|
+
pixDestroy(&pixht2);
|
|
93
|
+
|
|
94
|
+
// Fill to capture pixels near the mask edges that were missed
|
|
95
|
+
Pix *pixt = pixSeedfillBinary(NULL, pixht, pixs, 8);
|
|
96
|
+
pixOr(pixht, pixht, pixt);
|
|
97
|
+
pixDestroy(&pixt);
|
|
98
|
+
|
|
99
|
+
if (ppaht) {
|
|
100
|
+
Boxa *boxa = pixConnComp(pixht, ppaht, 4);
|
|
101
|
+
if (pbaht) {
|
|
102
|
+
*pbaht = boxa;
|
|
103
|
+
} else {
|
|
104
|
+
boxaDestroy(&boxa);
|
|
105
|
+
}
|
|
106
|
+
} else if (pbaht) {
|
|
107
|
+
*pbaht = pixConnComp(pixht, NULL, 4);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (ppixht) {
|
|
111
|
+
*ppixht =pixht;
|
|
112
|
+
} else {
|
|
113
|
+
pixDestroy(&pixht);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return true;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
// GetTextlineMask()
|
|
121
|
+
// Input: pixs (input image, assumed to be at 300 - 400 ppi)
|
|
122
|
+
// &pixtm (returns textline mask; can be NULL)
|
|
123
|
+
// &pixvws (returns vertical whitespace mask; can be NULL)
|
|
124
|
+
// &batm (returns boxa of textline mask component b.b.s; can be NULL)
|
|
125
|
+
// &patm (returns pixa of textline mask components; can be NULL)
|
|
126
|
+
// debugflag (set true to write out intermediate images)
|
|
127
|
+
// Return: true if ok, false on error
|
|
128
|
+
bool LeptonicaPageSeg::GetTextlineMask(Pix *pixs,
|
|
129
|
+
Pix **ppixtm,
|
|
130
|
+
Pix **ppixvws,
|
|
131
|
+
Boxa **pbatm,
|
|
132
|
+
Pixa **ppatm,
|
|
133
|
+
bool debugflag) {
|
|
134
|
+
if (!pixs) {
|
|
135
|
+
fprintf(stderr, "pixs not defined\n");
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
int32 debug = debugflag ? 1 : 0;
|
|
140
|
+
|
|
141
|
+
// 2x reduce, to 150 - 200 ppi
|
|
142
|
+
Pix *pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
|
|
143
|
+
pixDisplayWrite(pixr, debug);
|
|
144
|
+
|
|
145
|
+
// Remove the halftone pixels from the image
|
|
146
|
+
Pix *pixtext;
|
|
147
|
+
Pix *pixht2 = pixGenHalftoneMask(pixr, &pixtext, NULL, debug);
|
|
148
|
+
pixDestroy(&pixr);
|
|
149
|
+
pixDestroy(&pixht2);
|
|
150
|
+
|
|
151
|
+
// Get the textline mask at full res
|
|
152
|
+
Pix *pixvws;
|
|
153
|
+
Pix *pixtm2 = pixGenTextlineMask(pixtext, &pixvws, NULL, debug);
|
|
154
|
+
Pix *pixt = pixExpandReplicate(pixtm2, 2);
|
|
155
|
+
pixDestroy(&pixtext);
|
|
156
|
+
pixDestroy(&pixtm2);
|
|
157
|
+
|
|
158
|
+
// Small dilation to capture pixels near the mask edges that were missed
|
|
159
|
+
// Do not use filling here, because the result is then used to find
|
|
160
|
+
// textblocks, and a mistake here gets propagated.
|
|
161
|
+
Pix *pixtm = pixDilateBrick(NULL, pixt, 3, 3);
|
|
162
|
+
pixDestroy(&pixt);
|
|
163
|
+
pixDisplayWrite(pixtm, debug);
|
|
164
|
+
|
|
165
|
+
if (ppatm) {
|
|
166
|
+
Boxa *boxa = pixConnComp(pixtm, ppatm, 4);
|
|
167
|
+
if (pbatm) {
|
|
168
|
+
*pbatm = boxa;
|
|
169
|
+
} else {
|
|
170
|
+
boxaDestroy(&boxa);
|
|
171
|
+
}
|
|
172
|
+
} else if (pbatm) {
|
|
173
|
+
*pbatm = pixConnComp(pixtm, NULL, 4);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (ppixtm) {
|
|
177
|
+
*ppixtm =pixtm;
|
|
178
|
+
} else {
|
|
179
|
+
pixDestroy(&pixtm);
|
|
180
|
+
}
|
|
181
|
+
if (ppixvws) {
|
|
182
|
+
*ppixvws =pixvws;
|
|
183
|
+
} else {
|
|
184
|
+
pixDestroy(&pixvws);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return true;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
// GetTextblockMask()
|
|
192
|
+
// Input: pixs (input image, assumed to be at 300 - 400 ppi)
|
|
193
|
+
// &pixtb (returns textblock mask; can be NULL)
|
|
194
|
+
// &batb (returns boxa of textblock mask component b.b; can be NULL)
|
|
195
|
+
// &patb (returns pixa of textblock mask components; can be NULL)
|
|
196
|
+
// debugflag (set true to write out intermediate images)
|
|
197
|
+
// Return: true if ok, false on error
|
|
198
|
+
// Notes:
|
|
199
|
+
// To obtain a set of polylines of the outer borders of each of the
|
|
200
|
+
// textblock regions, use pixGetOuterBordersPtaa().
|
|
201
|
+
bool LeptonicaPageSeg::GetTextblockMask(Pix *pixs,
|
|
202
|
+
Pix **ppixtb,
|
|
203
|
+
Boxa **pbatb,
|
|
204
|
+
Pixa **ppatb,
|
|
205
|
+
bool debugflag) {
|
|
206
|
+
if (!pixs) {
|
|
207
|
+
fprintf(stderr, "pixs not defined\n");
|
|
208
|
+
return false;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
int32 debug = debugflag ? 1 : 0;
|
|
212
|
+
|
|
213
|
+
// Get the textline mask at 2x reduction
|
|
214
|
+
Pix *pixtm, *pixvws;
|
|
215
|
+
GetTextlineMask(pixs, &pixtm, &pixvws, NULL, NULL, debugflag);
|
|
216
|
+
Pix *pixtm2 = pixReduceRankBinaryCascade(pixtm, 1, 0, 0, 0);
|
|
217
|
+
pixDestroy(&pixtm);
|
|
218
|
+
|
|
219
|
+
// Get the textblock mask
|
|
220
|
+
Pix *pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug);
|
|
221
|
+
Pix *pixt = pixExpandReplicate(pixtb2, 2);
|
|
222
|
+
pixDestroy(&pixtm2);
|
|
223
|
+
pixDestroy(&pixtb2);
|
|
224
|
+
pixDestroy(&pixvws);
|
|
225
|
+
|
|
226
|
+
// Dilate to capture pixels near the mask edges that were missed
|
|
227
|
+
Pix *pixtb = pixDilateBrick(NULL, pixt, 3, 3);
|
|
228
|
+
pixDestroy(&pixt);
|
|
229
|
+
pixDisplayWrite(pixtb, debug);
|
|
230
|
+
|
|
231
|
+
if (ppatb) {
|
|
232
|
+
Boxa *boxa = pixConnComp(pixtb, ppatb, 4);
|
|
233
|
+
if (pbatb) {
|
|
234
|
+
*pbatb = boxa;
|
|
235
|
+
} else {
|
|
236
|
+
boxaDestroy(&boxa);
|
|
237
|
+
}
|
|
238
|
+
} else if (pbatb) {
|
|
239
|
+
*pbatb = pixConnComp(pixtb, NULL, 4);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
if (ppixtb) {
|
|
243
|
+
*ppixtb = pixtb;
|
|
244
|
+
} else {
|
|
245
|
+
pixDestroy(&pixtb);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return true;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
//------------------------------------------------------------------
|
|
253
|
+
// Top-level (for testing/debugging)
|
|
254
|
+
//------------------------------------------------------------------
|
|
255
|
+
// GetAllRegions()
|
|
256
|
+
// Input: pixs (input image, assumed to be at 300 - 400 ppi)
|
|
257
|
+
// &pixhm (returns halftone mask; can be NULL)
|
|
258
|
+
// &pixtm (returns textline mask; can be NULL)
|
|
259
|
+
// &pixtb (returns textblock mask; can be NULL)
|
|
260
|
+
// debugflag (set true to write out intermediate images and data)
|
|
261
|
+
// Return: true if ok, false on error
|
|
262
|
+
// Note: use NULL for input on each mask you don't want.
|
|
263
|
+
bool LeptonicaPageSeg::GetAllRegions(Pix *pixs,
|
|
264
|
+
Pix **ppixhm,
|
|
265
|
+
Pix **ppixtm,
|
|
266
|
+
Pix **ppixtb,
|
|
267
|
+
bool debugflag) {
|
|
268
|
+
if (!pixs || (pixGetDepth(pixs) != 1)) {
|
|
269
|
+
fprintf(stderr, "pixs not read or not 1 bpp\n");
|
|
270
|
+
return 1;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
int32 w, h;
|
|
274
|
+
pixGetDimensions(pixs, &w, &h, NULL);
|
|
275
|
+
int32 debug = debugflag ? 1 : 0;
|
|
276
|
+
|
|
277
|
+
// Segment the page
|
|
278
|
+
Boxa *batm = NULL;
|
|
279
|
+
Boxa *batb = NULL;
|
|
280
|
+
Pixa *patm = NULL;
|
|
281
|
+
Pixa *patb = NULL;
|
|
282
|
+
Pix *pixhm = NULL;
|
|
283
|
+
Pix *pixtm = NULL;
|
|
284
|
+
Pix *pixtb = NULL;
|
|
285
|
+
|
|
286
|
+
startTimer();
|
|
287
|
+
LeptonicaPageSeg::GetHalftoneMask(pixs, &pixhm, NULL, NULL, false);
|
|
288
|
+
if (debug)
|
|
289
|
+
printf("Halftone segmentation time: %f sec\n", stopTimer());
|
|
290
|
+
|
|
291
|
+
startTimer();
|
|
292
|
+
LeptonicaPageSeg::GetTextlineMask(pixs, &pixtm, NULL, &batm, &patm, false);
|
|
293
|
+
if (debug)
|
|
294
|
+
printf("Textline segmentation time: %f sec\n", stopTimer());
|
|
295
|
+
|
|
296
|
+
startTimer();
|
|
297
|
+
LeptonicaPageSeg::GetTextblockMask(pixs, &pixtb, &batb, &patb, debugflag);
|
|
298
|
+
if (debug)
|
|
299
|
+
printf("Textblock segmentation time: %f sec\n", stopTimer());
|
|
300
|
+
|
|
301
|
+
// Display the textlines
|
|
302
|
+
if (debug) {
|
|
303
|
+
Pix *pixt = pixaDisplayRandomCmap(patm, w, h);
|
|
304
|
+
pixcmapResetColor(pixGetColormap(pixt), 0, 255, 255, 255); // white bg
|
|
305
|
+
pixDisplay(pixt, 100, 100);
|
|
306
|
+
pixDisplayWrite(pixt, 1);
|
|
307
|
+
pixDestroy(&pixt);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// Display the textblocks
|
|
311
|
+
if (debug) {
|
|
312
|
+
Pix *pixt = pixaDisplayRandomCmap(patb, w, h);
|
|
313
|
+
pixcmapResetColor(pixGetColormap(pixt), 0, 255, 255, 255);
|
|
314
|
+
pixDisplay(pixt, 100, 100);
|
|
315
|
+
pixDisplayWrite(pixt, 1);
|
|
316
|
+
pixDestroy(&pixt);
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// Identify the outlines of each textblock
|
|
320
|
+
if (debug) {
|
|
321
|
+
Ptaa *ptaa = pixGetOuterBordersPtaa(pixtb);
|
|
322
|
+
Pix *pixt = pixRenderRandomCmapPtaa(pixtb, ptaa, 8, 1);
|
|
323
|
+
PixColormap *cmap = pixGetColormap(pixt);
|
|
324
|
+
pixcmapResetColor(cmap, 0, 130, 130, 130);
|
|
325
|
+
pixDisplayWrite(pixt, debug);
|
|
326
|
+
pixDestroy(&pixt);
|
|
327
|
+
ptaaWrite("junk_ptaa_outlines.ptaa", ptaa, 1);
|
|
328
|
+
ptaaDestroy(&ptaa);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Save b.b. for textblocks
|
|
332
|
+
if (debug) {
|
|
333
|
+
Boxa *ba1 = boxaSelectBySize(batb, 3, 3, L_SELECT_IF_BOTH,
|
|
334
|
+
L_SELECT_IF_GTE, NULL);
|
|
335
|
+
boxaWrite("junk_textblock.boxa", ba1);
|
|
336
|
+
boxaDestroy(&ba1);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
if (ppixhm) {
|
|
340
|
+
*ppixhm = pixhm;
|
|
341
|
+
} else {
|
|
342
|
+
pixDestroy(&pixhm);
|
|
343
|
+
}
|
|
344
|
+
if (ppixtm) {
|
|
345
|
+
*ppixtm = pixtm;
|
|
346
|
+
} else {
|
|
347
|
+
pixDestroy(&pixtm);
|
|
348
|
+
}
|
|
349
|
+
if (ppixtb) {
|
|
350
|
+
*ppixtb = pixtb;
|
|
351
|
+
} else {
|
|
352
|
+
pixDestroy(&pixtb);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
boxaDestroy(&batm);
|
|
356
|
+
boxaDestroy(&batb);
|
|
357
|
+
pixaDestroy(&patm);
|
|
358
|
+
pixaDestroy(&patb);
|
|
359
|
+
pixDestroy(&pixs);
|
|
360
|
+
return true;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
#endif // HAVE_LIBLEPT
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
///////////////////////////////////////////////////////////////////////
|
|
2
|
+
// File: leptonica_pageseg.h
|
|
3
|
+
// Description: Leptonica-based page segmenter.
|
|
4
|
+
// Author: Dan Bloomberg
|
|
5
|
+
// Created: Tue Aug 28 08:56:44 PDT 2007
|
|
6
|
+
//
|
|
7
|
+
// (C) Copyright 2007, Google Inc.
|
|
8
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
// you may not use this file except in compliance with the License.
|
|
10
|
+
// You may obtain a copy of the License at
|
|
11
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
// See the License for the specific language governing permissions and
|
|
16
|
+
// limitations under the License.
|
|
17
|
+
//
|
|
18
|
+
///////////////////////////////////////////////////////////////////////
|
|
19
|
+
|
|
20
|
+
#ifndef LEPTONICA_PAGESEG_H
|
|
21
|
+
#define LEPTONICA_PAGESEG_H
|
|
22
|
+
|
|
23
|
+
class Boxa;
|
|
24
|
+
class Pix;
|
|
25
|
+
class Pixa;
|
|
26
|
+
|
|
27
|
+
class LeptonicaPageSeg {
|
|
28
|
+
public:
|
|
29
|
+
// GetHalftoneMask()
|
|
30
|
+
// Input: pixs (input image, assumed to be at 300 - 400 ppi)
|
|
31
|
+
// &pixht (returns halftone mask; can be NULL)
|
|
32
|
+
// &baht (returns boxa of halftone mask component b.b.s; can be NULL)
|
|
33
|
+
// &paht (returns pixa of halftone mask components; can be NULL)
|
|
34
|
+
// debugflag (set true to write out intermediate images)
|
|
35
|
+
// Return: true if ok, false on error
|
|
36
|
+
// Note: If there are no halftone regions, all requested data structures
|
|
37
|
+
// are returned as NULL. This is not an error.
|
|
38
|
+
static bool GetHalftoneMask(Pix *pixs,
|
|
39
|
+
Pix **ppixht,
|
|
40
|
+
Boxa **pbaht,
|
|
41
|
+
Pixa **ppaht,
|
|
42
|
+
bool debugflag);
|
|
43
|
+
|
|
44
|
+
// GetTextlineMask()
|
|
45
|
+
// Input: pixs (input image, assumed to be at 300 - 400 ppi)
|
|
46
|
+
// &pixtm (returns textline mask; can be NULL)
|
|
47
|
+
// &pixvws (returns vertical whitespace mask; can be NULL)
|
|
48
|
+
// &batm (returns boxa of textline mask component b.b.s; can be NULL)
|
|
49
|
+
// &patm (returns pixa of textline mask components; can be NULL)
|
|
50
|
+
// debugflag (set true to write out intermediate images)
|
|
51
|
+
// Return: true if ok, false on error
|
|
52
|
+
static bool GetTextlineMask(Pix *pixs,
|
|
53
|
+
Pix **ppixtm,
|
|
54
|
+
Pix **ppixvws,
|
|
55
|
+
Boxa **pbatm,
|
|
56
|
+
Pixa **ppatm,
|
|
57
|
+
bool debugflag);
|
|
58
|
+
|
|
59
|
+
// GetTextblockMask()
|
|
60
|
+
// Input: pixs (input image, assumed to be at 300 - 400 ppi)
|
|
61
|
+
// &pixtb (returns textblock mask; can be NULL)
|
|
62
|
+
// &batb (returns boxa of textblock mask component b.b; can be NULL)
|
|
63
|
+
// &patb (returns pixa of textblock mask components; can be NULL)
|
|
64
|
+
// debugflag (set true to write out intermediate images)
|
|
65
|
+
// Return: true if ok, false on error
|
|
66
|
+
// Notes:
|
|
67
|
+
// To obtain a set of polylines of the outer borders of each of the
|
|
68
|
+
// textblock regions, use pixGetOuterBordersPtaa().
|
|
69
|
+
static bool GetTextblockMask(Pix *pixs,
|
|
70
|
+
Pix **ppixtb,
|
|
71
|
+
Boxa **pbatb,
|
|
72
|
+
Pixa **ppatb,
|
|
73
|
+
bool debugflag);
|
|
74
|
+
|
|
75
|
+
// GetAllRegions()
|
|
76
|
+
// Input: pixs (input image, assumed to be at 300 - 400 ppi)
|
|
77
|
+
// &pixhm (returns halftone mask; can be NULL)
|
|
78
|
+
// &pixtm (returns textline mask; can be NULL)
|
|
79
|
+
// &pixtb (returns textblock mask; can be NULL)
|
|
80
|
+
// debugflag (set true to write out intermediate images and data)
|
|
81
|
+
// Return: true if ok, false on error
|
|
82
|
+
// Note: use NULL for input on each mask you don't want.
|
|
83
|
+
static bool GetAllRegions(Pix *pixs,
|
|
84
|
+
Pix **ppixhm,
|
|
85
|
+
Pix **ppixtm,
|
|
86
|
+
Pix **ppixtb,
|
|
87
|
+
bool debugflag);
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
#endif // LEPTONICA_PAGESEG_H
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
///////////////////////////////////////////////////////////////////////
|
|
2
|
+
// File: leptonica_pageseg_interface.cpp
|
|
3
|
+
// Description: Leptonica-based page segmenter interface.
|
|
4
|
+
// Author: Thomas Kielbus
|
|
5
|
+
// Created: Mon Aug 27 10:05:01 PDT 2007
|
|
6
|
+
//
|
|
7
|
+
// (C) Copyright 2007, Google Inc.
|
|
8
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
// you may not use this file except in compliance with the License.
|
|
10
|
+
// You may obtain a copy of the License at
|
|
11
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
// See the License for the specific language governing permissions and
|
|
16
|
+
// limitations under the License.
|
|
17
|
+
//
|
|
18
|
+
///////////////////////////////////////////////////////////////////////
|
|
19
|
+
|
|
20
|
+
// Include automatically generated configuration file if running autoconf.
|
|
21
|
+
#ifdef HAVE_CONFIG_H
|
|
22
|
+
#include "config_auto.h"
|
|
23
|
+
#endif
|
|
24
|
+
|
|
25
|
+
#include "leptonica_pageseg_interface.h"
|
|
26
|
+
|
|
27
|
+
#include "leptonica_pageseg.h"
|
|
28
|
+
#include "imgs.h"
|
|
29
|
+
|
|
30
|
+
#ifdef HAVE_LIBLEPT
|
|
31
|
+
// Include leptonica library only if autoconf (or makefile etc) tell us to.
|
|
32
|
+
#include "allheaders.h"
|
|
33
|
+
#endif
|
|
34
|
+
|
|
35
|
+
#ifdef HAVE_LIBLEPT
|
|
36
|
+
// ONLY available if you have Leptonica installed.
|
|
37
|
+
|
|
38
|
+
// Use the LeptonicaPageSeg class to perform text block detection. Propagates
|
|
39
|
+
// the error if any. LeptonicaPageSeg can also return invalid masks; in this
|
|
40
|
+
// case, return an empty text block mask.
|
|
41
|
+
bool leptonica_pageseg_get_textblock_mask(IMAGE* page_image,
|
|
42
|
+
IMAGE* textblock_mask_image) {
|
|
43
|
+
bool success = true;
|
|
44
|
+
|
|
45
|
+
// Convert the page IMAGE to a PIX
|
|
46
|
+
PIX* page_pix = page_image->ToPix();
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
// Compute the textblock mask PIX
|
|
50
|
+
PIX* textblock_mask_pix = NULL;
|
|
51
|
+
if (LeptonicaPageSeg::GetTextblockMask(page_pix, &textblock_mask_pix,
|
|
52
|
+
NULL, NULL, false)) {
|
|
53
|
+
if (pixGetWidth(textblock_mask_pix) != page_image->get_xsize() ||
|
|
54
|
+
pixGetWidth(textblock_mask_pix) != page_image->get_xsize())
|
|
55
|
+
fprintf(stderr, "WARNING: Leptonica's text block mask (%dx%d)"
|
|
56
|
+
" and the original image (%dx%d) differ in size !\n",
|
|
57
|
+
pixGetWidth(textblock_mask_pix), pixGetHeight(textblock_mask_pix),
|
|
58
|
+
page_image->get_xsize(), page_image->get_ysize());
|
|
59
|
+
|
|
60
|
+
// Create the resulting mask image
|
|
61
|
+
textblock_mask_image->destroy();
|
|
62
|
+
if (pixGetWidth(textblock_mask_pix) <= 0 ||
|
|
63
|
+
pixGetHeight(textblock_mask_pix) <= 0) {
|
|
64
|
+
|
|
65
|
+
// Leptonica failed. Create an empty mask.
|
|
66
|
+
fprintf(stderr, "WARNING: Leptonica's text block mask is invalid.\n");
|
|
67
|
+
textblock_mask_image->create(page_image->get_xsize(),
|
|
68
|
+
page_image->get_ysize(), 1);
|
|
69
|
+
|
|
70
|
+
} else {
|
|
71
|
+
// Leptonica succeeded. Convert textblock_mask PIX to an IMAGE
|
|
72
|
+
textblock_mask_image->FromPix(textblock_mask_pix);
|
|
73
|
+
}
|
|
74
|
+
} else {
|
|
75
|
+
success = false;
|
|
76
|
+
}
|
|
77
|
+
pixDestroy(&page_pix);
|
|
78
|
+
pixDestroy(&textblock_mask_pix);
|
|
79
|
+
return success;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
#endif // HAVE_LIBLEPT
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
///////////////////////////////////////////////////////////////////////
|
|
2
|
+
// File: leptonica_pageseg_interface.h
|
|
3
|
+
// Description: Leptonica-based page segmenter interface.
|
|
4
|
+
// Author: Thomas Kielbus
|
|
5
|
+
// Created: Mon Aug 27 10:05:01 PDT 2007
|
|
6
|
+
//
|
|
7
|
+
// (C) Copyright 2007, Google Inc.
|
|
8
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
// you may not use this file except in compliance with the License.
|
|
10
|
+
// You may obtain a copy of the License at
|
|
11
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
// See the License for the specific language governing permissions and
|
|
16
|
+
// limitations under the License.
|
|
17
|
+
//
|
|
18
|
+
///////////////////////////////////////////////////////////////////////
|
|
19
|
+
|
|
20
|
+
#ifndef LEPTONICA_PAGESEG_INTERFACE_H
|
|
21
|
+
#define LEPTONICA_PAGESEG_INTERFACE_H
|
|
22
|
+
|
|
23
|
+
class IMAGE;
|
|
24
|
+
|
|
25
|
+
// Compute the text block mask of the page_image and put the result into the
|
|
26
|
+
// textblock_mask_image. Return true if no error has occured.
|
|
27
|
+
bool leptonica_pageseg_get_textblock_mask(IMAGE* page_image,
|
|
28
|
+
IMAGE* textblock_mask_image);
|
|
29
|
+
|
|
30
|
+
#endif // LEPTONICA_PAGESEG_INTERFACE_H
|