tesseract_bin 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/ext/tesseract_bin/extconf.rb +17 -0
- data/lib/tesseract_bin.rb +12 -0
- data/tesseract_bin.gemspec +660 -0
- data/test/helper.rb +18 -0
- data/test/test_tesseract_bin.rb +7 -0
- data/vendor/tesseract-2.04/AUTHORS +8 -0
- data/vendor/tesseract-2.04/COPYING +23 -0
- data/vendor/tesseract-2.04/ChangeLog +71 -0
- data/vendor/tesseract-2.04/INSTALL +229 -0
- data/vendor/tesseract-2.04/Makefile.am +20 -0
- data/vendor/tesseract-2.04/Makefile.in +641 -0
- data/vendor/tesseract-2.04/NEWS +1 -0
- data/vendor/tesseract-2.04/README +138 -0
- data/vendor/tesseract-2.04/ReleaseNotes +213 -0
- data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
- data/vendor/tesseract-2.04/StdAfx.h +24 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
- data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
- data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
- data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
- data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
- data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
- data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
- data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
- data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
- data/vendor/tesseract-2.04/ccmain/control.h +198 -0
- data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
- data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
- data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
- data/vendor/tesseract-2.04/ccmain/output.h +116 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
- data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
- data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
- data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
- data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
- data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
- data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
- data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
- data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
- data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
- data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
- data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
- data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
- data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
- data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
- data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
- data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
- data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
- data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
- data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
- data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
- data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
- data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
- data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
- data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
- data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
- data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
- data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
- data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
- data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
- data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
- data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
- data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
- data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
- data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
- data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
- data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
- data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
- data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
- data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
- data/vendor/tesseract-2.04/ccutil/host.h +180 -0
- data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
- data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
- data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
- data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
- data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
- data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
- data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
- data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
- data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
- data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
- data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
- data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
- data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
- data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
- data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
- data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
- data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
- data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
- data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
- data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
- data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
- data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
- data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
- data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
- data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
- data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
- data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
- data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
- data/vendor/tesseract-2.04/classify/baseline.h +91 -0
- data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
- data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
- data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
- data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
- data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
- data/vendor/tesseract-2.04/classify/cluster.h +158 -0
- data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
- data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
- data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
- data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
- data/vendor/tesseract-2.04/classify/extern.h +32 -0
- data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
- data/vendor/tesseract-2.04/classify/extract.h +36 -0
- data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
- data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
- data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
- data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
- data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
- data/vendor/tesseract-2.04/classify/float2int.h +65 -0
- data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
- data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
- data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
- data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
- data/vendor/tesseract-2.04/classify/fxid.h +69 -0
- data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
- data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
- data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
- data/vendor/tesseract-2.04/classify/intfx.h +63 -0
- data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
- data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
- data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
- data/vendor/tesseract-2.04/classify/intproto.h +320 -0
- data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
- data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
- data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
- data/vendor/tesseract-2.04/classify/mf.h +43 -0
- data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
- data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
- data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
- data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
- data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
- data/vendor/tesseract-2.04/classify/mfx.h +52 -0
- data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
- data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
- data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
- data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
- data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
- data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
- data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
- data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
- data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
- data/vendor/tesseract-2.04/classify/protos.h +258 -0
- data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
- data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
- data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
- data/vendor/tesseract-2.04/classify/speckle.h +69 -0
- data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
- data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
- data/vendor/tesseract-2.04/config/config.guess +1466 -0
- data/vendor/tesseract-2.04/config/config.h.in +188 -0
- data/vendor/tesseract-2.04/config/config.sub +1579 -0
- data/vendor/tesseract-2.04/config/depcomp +530 -0
- data/vendor/tesseract-2.04/config/install-sh +269 -0
- data/vendor/tesseract-2.04/config/missing +198 -0
- data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
- data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
- data/vendor/tesseract-2.04/configure +10424 -0
- data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
- data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
- data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
- data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
- data/vendor/tesseract-2.04/cutil/const.h +108 -0
- data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
- data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
- data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
- data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
- data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
- data/vendor/tesseract-2.04/cutil/debug.h +348 -0
- data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
- data/vendor/tesseract-2.04/cutil/efio.h +32 -0
- data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
- data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
- data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
- data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
- data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
- data/vendor/tesseract-2.04/cutil/general.h +33 -0
- data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
- data/vendor/tesseract-2.04/cutil/globals.h +70 -0
- data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
- data/vendor/tesseract-2.04/cutil/listio.h +43 -0
- data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
- data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
- data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
- data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
- data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
- data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
- data/vendor/tesseract-2.04/cutil/structures.h +112 -0
- data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
- data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
- data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
- data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
- data/vendor/tesseract-2.04/cutil/variables.h +170 -0
- data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
- data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
- data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
- data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
- data/vendor/tesseract-2.04/dict/choices.h +241 -0
- data/vendor/tesseract-2.04/dict/context.cpp +270 -0
- data/vendor/tesseract-2.04/dict/context.h +82 -0
- data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
- data/vendor/tesseract-2.04/dict/dawg.h +394 -0
- data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
- data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
- data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
- data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
- data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
- data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
- data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
- data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
- data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
- data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
- data/vendor/tesseract-2.04/dict/permngram.h +33 -0
- data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
- data/vendor/tesseract-2.04/dict/permnum.h +83 -0
- data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
- data/vendor/tesseract-2.04/dict/permute.h +93 -0
- data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
- data/vendor/tesseract-2.04/dict/reduce.h +112 -0
- data/vendor/tesseract-2.04/dict/states.cpp +382 -0
- data/vendor/tesseract-2.04/dict/states.h +111 -0
- data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
- data/vendor/tesseract-2.04/dict/stopper.h +103 -0
- data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
- data/vendor/tesseract-2.04/dict/trie.h +190 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
- data/vendor/tesseract-2.04/eurotext.tif +0 -0
- data/vendor/tesseract-2.04/image/Makefile.am +10 -0
- data/vendor/tesseract-2.04/image/Makefile.in +596 -0
- data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
- data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
- data/vendor/tesseract-2.04/image/img.h +336 -0
- data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
- data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
- data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
- data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
- data/vendor/tesseract-2.04/image/imgio.h +22 -0
- data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
- data/vendor/tesseract-2.04/image/imgs.h +102 -0
- data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
- data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
- data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
- data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
- data/vendor/tesseract-2.04/image/svshowim.h +25 -0
- data/vendor/tesseract-2.04/java/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
- data/vendor/tesseract-2.04/java/makefile +55 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
- data/vendor/tesseract-2.04/phototest.tif +0 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
- data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
- data/vendor/tesseract-2.04/tessdata/confsets +3 -0
- data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
- data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
- data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
- data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
- data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
- data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
- data/vendor/tesseract-2.04/tessdll.cpp +351 -0
- data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
- data/vendor/tesseract-2.04/tessdll.h +143 -0
- data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
- data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
- data/vendor/tesseract-2.04/tesseract.dsw +116 -0
- data/vendor/tesseract-2.04/tesseract.sln +59 -0
- data/vendor/tesseract-2.04/tesseract.spec +188 -0
- data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
- data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
- data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
- data/vendor/tesseract-2.04/testing/README +43 -0
- data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
- data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
- data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
- data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
- data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
- data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
- data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
- data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
- data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
- data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
- data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
- data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
- data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
- data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
- data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
- data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
- data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
- data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
- data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
- data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
- data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
- data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
- data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
- data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
- data/vendor/tesseract-2.04/textord/makerow.h +295 -0
- data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
- data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
- data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
- data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
- data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
- data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
- data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
- data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
- data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
- data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
- data/vendor/tesseract-2.04/textord/tessout.h +76 -0
- data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
- data/vendor/tesseract-2.04/textord/topitch.h +195 -0
- data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
- data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
- data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
- data/vendor/tesseract-2.04/textord/tospace.h +193 -0
- data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
- data/vendor/tesseract-2.04/textord/tovars.h +94 -0
- data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
- data/vendor/tesseract-2.04/textord/underlin.h +53 -0
- data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
- data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
- data/vendor/tesseract-2.04/training/Makefile.am +54 -0
- data/vendor/tesseract-2.04/training/Makefile.in +720 -0
- data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
- data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
- data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
- data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
- data/vendor/tesseract-2.04/training/mergenf.h +106 -0
- data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
- data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
- data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
- data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
- data/vendor/tesseract-2.04/training/name2char.h +38 -0
- data/vendor/tesseract-2.04/training/training.cpp +190 -0
- data/vendor/tesseract-2.04/training/training.h +130 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
- data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
- data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
- data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
- data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
- data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
- data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
- data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
- data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
- data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
- data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
- data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
- data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
- data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
- data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
- data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
- data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
- data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
- data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
- data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
- data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
- data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
- data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
- data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
- data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
- data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
- data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
- data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
- data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
- data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
- data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
- data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
- data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
- data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
- data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
- data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
- data/vendor/tesseract-2.04/wordrec/render.h +58 -0
- data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
- data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
- data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
- data/vendor/tesseract-2.04/wordrec/split.h +115 -0
- data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
- data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
- data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
- data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
- metadata +708 -0
|
@@ -0,0 +1,2628 @@
|
|
|
1
|
+
/**********************************************************************
|
|
2
|
+
* File: makerow.cpp (Formerly makerows.c)
|
|
3
|
+
* Description: Code to arrange blobs into rows of text.
|
|
4
|
+
* Author: Ray Smith
|
|
5
|
+
* Created: Mon Sep 21 14:34:48 BST 1992
|
|
6
|
+
*
|
|
7
|
+
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
** you may not use this file except in compliance with the License.
|
|
10
|
+
** You may obtain a copy of the License at
|
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
** See the License for the specific language governing permissions and
|
|
16
|
+
** limitations under the License.
|
|
17
|
+
*
|
|
18
|
+
**********************************************************************/
|
|
19
|
+
|
|
20
|
+
#include "mfcpch.h"
|
|
21
|
+
#ifdef __UNIX__
|
|
22
|
+
#include <assert.h>
|
|
23
|
+
#endif
|
|
24
|
+
#include "stderr.h"
|
|
25
|
+
#include "blobbox.h"
|
|
26
|
+
#include "lmedsq.h"
|
|
27
|
+
#include "statistc.h"
|
|
28
|
+
#include "drawtord.h"
|
|
29
|
+
#include "blkocc.h"
|
|
30
|
+
#include "sortflts.h"
|
|
31
|
+
#include "oldbasel.h"
|
|
32
|
+
#include "tordmain.h"
|
|
33
|
+
#include "underlin.h"
|
|
34
|
+
#include "makerow.h"
|
|
35
|
+
#include "tprintf.h"
|
|
36
|
+
|
|
37
|
+
#define EXTERN
|
|
38
|
+
|
|
39
|
+
EXTERN BOOL_VAR (textord_heavy_nr, FALSE, "Vigorously remove noise");
|
|
40
|
+
EXTERN BOOL_VAR (textord_show_initial_rows, FALSE,
|
|
41
|
+
"Display row accumulation");
|
|
42
|
+
EXTERN BOOL_VAR (textord_show_parallel_rows, FALSE,
|
|
43
|
+
"Display page correlated rows");
|
|
44
|
+
EXTERN BOOL_VAR (textord_show_expanded_rows, FALSE,
|
|
45
|
+
"Display rows after expanding");
|
|
46
|
+
EXTERN BOOL_VAR (textord_show_final_rows, FALSE,
|
|
47
|
+
"Display rows after final fittin");
|
|
48
|
+
EXTERN BOOL_VAR (textord_show_final_blobs, FALSE,
|
|
49
|
+
"Display blob bounds after pre-ass");
|
|
50
|
+
EXTERN BOOL_VAR (textord_test_landscape, FALSE, "Tests refer to land/port");
|
|
51
|
+
EXTERN BOOL_VAR (textord_parallel_baselines, TRUE,
|
|
52
|
+
"Force parallel baselines");
|
|
53
|
+
EXTERN BOOL_VAR (textord_straight_baselines, FALSE,
|
|
54
|
+
"Force straight baselines");
|
|
55
|
+
EXTERN BOOL_VAR (textord_quadratic_baselines, FALSE, "Use quadratic splines");
|
|
56
|
+
EXTERN BOOL_VAR (textord_old_baselines, TRUE, "Use old baseline algorithm");
|
|
57
|
+
EXTERN BOOL_VAR (textord_old_xheight, TRUE, "Use old xheight algorithm");
|
|
58
|
+
EXTERN BOOL_VAR (textord_fix_xheight_bug, TRUE, "Use spline baseline");
|
|
59
|
+
EXTERN BOOL_VAR (textord_fix_makerow_bug, TRUE, "Prevent multiple baselines");
|
|
60
|
+
EXTERN BOOL_VAR (textord_row_xheights, FALSE, "Use row height policy");
|
|
61
|
+
EXTERN BOOL_VAR (textord_block_xheights, TRUE, "Use block height policy");
|
|
62
|
+
EXTERN BOOL_VAR (textord_xheight_tweak, FALSE, "New min condition on height");
|
|
63
|
+
EXTERN BOOL_VAR (textord_cblob_blockocc, TRUE,
|
|
64
|
+
"Use new projection for underlines");
|
|
65
|
+
EXTERN BOOL_VAR (textord_debug_xheights, FALSE, "Test xheight algorithms");
|
|
66
|
+
EXTERN BOOL_VAR (textord_biased_skewcalc, TRUE,
|
|
67
|
+
"Bias skew estimates with line length");
|
|
68
|
+
EXTERN BOOL_VAR (textord_interpolating_skew, TRUE, "Interpolate across gaps");
|
|
69
|
+
EXTERN INT_VAR (textord_skewsmooth_offset, 2, "For smooth factor");
|
|
70
|
+
EXTERN INT_VAR (textord_skewsmooth_offset2, 1, "For smooth factor");
|
|
71
|
+
EXTERN INT_VAR (textord_test_x, -1, "coord of test pt");
|
|
72
|
+
EXTERN INT_VAR (textord_test_y, -1, "coord of test pt");
|
|
73
|
+
EXTERN INT_VAR (textord_min_blobs_in_row, 4,
|
|
74
|
+
"Min blobs before gradient counted");
|
|
75
|
+
EXTERN INT_VAR (textord_spline_minblobs, 8,
|
|
76
|
+
"Min blobs in each spline segment");
|
|
77
|
+
EXTERN INT_VAR (textord_spline_medianwin, 6,
|
|
78
|
+
"Size of window for spline segmentation");
|
|
79
|
+
EXTERN INT_VAR (textord_min_xheight, 10, "Min credible pixel xheight");
|
|
80
|
+
EXTERN double_VAR (textord_spline_shift_fraction, 0.02,
|
|
81
|
+
"Fraction of line spacing for quad");
|
|
82
|
+
EXTERN double_VAR (textord_spline_outlier_fraction, 0.1,
|
|
83
|
+
"Fraction of line spacing for outlier");
|
|
84
|
+
EXTERN double_VAR (textord_skew_ile, 0.5, "Ile of gradients for page skew");
|
|
85
|
+
EXTERN double_VAR (textord_skew_lag, 0.01,
|
|
86
|
+
"Lag for skew on row accumulation");
|
|
87
|
+
EXTERN double_VAR (textord_linespace_iqrlimit, 0.2,
|
|
88
|
+
"Max iqr/median for linespace");
|
|
89
|
+
EXTERN double_VAR (textord_width_limit, 8, "Max width of blobs to make rows");
|
|
90
|
+
EXTERN double_VAR (textord_chop_width, 1.5, "Max width before chopping");
|
|
91
|
+
EXTERN double_VAR (textord_expansion_factor, 1.0,
|
|
92
|
+
"Factor to expand rows by in expand_rows");
|
|
93
|
+
EXTERN double_VAR (textord_overlap_x, 0.5,
|
|
94
|
+
"Fraction of linespace for good overlap");
|
|
95
|
+
EXTERN double_VAR (textord_merge_desc, 0.25,
|
|
96
|
+
"Fraction of linespace for desc drop");
|
|
97
|
+
EXTERN double_VAR (textord_merge_x, 0.5,
|
|
98
|
+
"Fraction of linespace for x height");
|
|
99
|
+
EXTERN double_VAR (textord_merge_asc, 0.25,
|
|
100
|
+
"Fraction of linespace for asc height");
|
|
101
|
+
EXTERN double_VAR (textord_minxh, 0.25,
|
|
102
|
+
"fraction of linesize for min xheight");
|
|
103
|
+
EXTERN double_VAR (textord_min_linesize, 1.25,
|
|
104
|
+
"* blob height for initial linesize");
|
|
105
|
+
EXTERN double_VAR (textord_excess_blobsize, 1.3,
|
|
106
|
+
"New row made if blob makes row this big");
|
|
107
|
+
EXTERN double_VAR (textord_occupancy_threshold, 0.4,
|
|
108
|
+
"Fraction of neighbourhood");
|
|
109
|
+
EXTERN double_VAR (textord_underline_width, 2.0,
|
|
110
|
+
"Multiple of line_size for underline");
|
|
111
|
+
EXTERN double_VAR (textord_xheight_mode_fraction, 0.4,
|
|
112
|
+
"Min pile height to make xheight");
|
|
113
|
+
EXTERN double_VAR (textord_ascheight_mode_fraction, 0.15,
|
|
114
|
+
"Min pile height to make ascheight");
|
|
115
|
+
EXTERN double_VAR (textord_ascx_ratio_min, 1.2, "Min cap/xheight");
|
|
116
|
+
EXTERN double_VAR (textord_ascx_ratio_max, 1.7, "Max cap/xheight");
|
|
117
|
+
EXTERN double_VAR (textord_descx_ratio_min, 0.15, "Min desc/xheight");
|
|
118
|
+
EXTERN double_VAR (textord_descx_ratio_max, 0.6, "Max desc/xheight");
|
|
119
|
+
EXTERN double_VAR (textord_xheight_error_margin, 0.1, "Accepted variation");
|
|
120
|
+
|
|
121
|
+
#define MAX_HEIGHT_MODES 12
|
|
122
|
+
|
|
123
|
+
/**********************************************************************
|
|
124
|
+
* make_rows
|
|
125
|
+
*
|
|
126
|
+
* Arrange the blobs into rows.
|
|
127
|
+
**********************************************************************/
|
|
128
|
+
|
|
129
|
+
float make_rows( //make rows
|
|
130
|
+
ICOORD page_tr, //top right
|
|
131
|
+
BLOCK_LIST *blocks, //block list
|
|
132
|
+
TO_BLOCK_LIST *land_blocks, //rotated for landscape
|
|
133
|
+
TO_BLOCK_LIST *port_blocks //output list
|
|
134
|
+
) {
|
|
135
|
+
float port_m; //global skew
|
|
136
|
+
float port_err; //global noise
|
|
137
|
+
// float land_m; //global skew
|
|
138
|
+
// float land_err; //global noise
|
|
139
|
+
TO_BLOCK_IT block_it; //iterator
|
|
140
|
+
|
|
141
|
+
//don't do landscape for now
|
|
142
|
+
// block_it.set_to_list(land_blocks);
|
|
143
|
+
// for (block_it.mark_cycle_pt();!block_it.cycled_list();block_it.forward())
|
|
144
|
+
// make_initial_textrows(page_tr,block_it.data(),FCOORD(0,-1),
|
|
145
|
+
// (BOOL8)textord_test_landscape);
|
|
146
|
+
block_it.set_to_list (port_blocks);
|
|
147
|
+
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
|
148
|
+
block_it.forward ())
|
|
149
|
+
make_initial_textrows (page_tr, block_it.data (), FCOORD (1.0f, 0.0f),
|
|
150
|
+
!(BOOL8) textord_test_landscape);
|
|
151
|
+
//compute globally
|
|
152
|
+
compute_page_skew(port_blocks, port_m, port_err);
|
|
153
|
+
// compute_page_skew(land_blocks,land_m,land_err); //compute globally
|
|
154
|
+
// tprintf("Portrait skew gradient=%g, error=%g.\n",
|
|
155
|
+
// port_m,port_err);
|
|
156
|
+
// tprintf("Landscape skew gradient=%g, error=%g.\n",
|
|
157
|
+
// land_m,land_err);
|
|
158
|
+
block_it.set_to_list (port_blocks);
|
|
159
|
+
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
|
160
|
+
block_it.forward ()) {
|
|
161
|
+
cleanup_rows (page_tr, block_it.data (), port_m, FCOORD (1.0f, 0.0f),
|
|
162
|
+
block_it.data ()->block->bounding_box ().left (),
|
|
163
|
+
!(BOOL8) textord_test_landscape);
|
|
164
|
+
}
|
|
165
|
+
block_it.set_to_list (land_blocks);
|
|
166
|
+
// for (block_it.mark_cycle_pt();!block_it.cycled_list();block_it.forward())
|
|
167
|
+
// {
|
|
168
|
+
// cleanup_rows(page_tr,block_it.data(),land_m,FCOORD(0,-1),
|
|
169
|
+
// -block_it.data()->block->bounding_box().top(),
|
|
170
|
+
// (BOOL8)textord_test_landscape);
|
|
171
|
+
// }
|
|
172
|
+
return port_m; //global skew
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
/**********************************************************************
|
|
177
|
+
* make_initial_textrows
|
|
178
|
+
*
|
|
179
|
+
* Arrange the good blobs into rows of text.
|
|
180
|
+
**********************************************************************/
|
|
181
|
+
|
|
182
|
+
void make_initial_textrows( //find lines
|
|
183
|
+
ICOORD page_tr,
|
|
184
|
+
TO_BLOCK *block, //block to do
|
|
185
|
+
FCOORD rotation, //for drawing
|
|
186
|
+
BOOL8 testing_on //correct orientation
|
|
187
|
+
) {
|
|
188
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
189
|
+
|
|
190
|
+
#ifndef GRAPHICS_DISABLED
|
|
191
|
+
ScrollView::Color colour; //of row
|
|
192
|
+
|
|
193
|
+
if (textord_show_initial_rows && testing_on) {
|
|
194
|
+
if (to_win == NULL)
|
|
195
|
+
create_to_win(page_tr);
|
|
196
|
+
}
|
|
197
|
+
#endif
|
|
198
|
+
//guess skew
|
|
199
|
+
assign_blobs_to_rows (block, NULL, 0, TRUE, TRUE, textord_show_initial_rows && testing_on);
|
|
200
|
+
row_it.move_to_first ();
|
|
201
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
|
|
202
|
+
fit_lms_line (row_it.data ());
|
|
203
|
+
#ifndef GRAPHICS_DISABLED
|
|
204
|
+
if (textord_show_initial_rows && testing_on) {
|
|
205
|
+
colour = ScrollView::RED;
|
|
206
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
207
|
+
plot_to_row (row_it.data (), colour, rotation);
|
|
208
|
+
colour = (ScrollView::Color) (colour + 1);
|
|
209
|
+
if (colour > ScrollView::MAGENTA)
|
|
210
|
+
colour = ScrollView::RED;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
#endif
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
/**********************************************************************
|
|
218
|
+
* fit_lms_line
|
|
219
|
+
*
|
|
220
|
+
* Fit an LMS line to a row.
|
|
221
|
+
**********************************************************************/
|
|
222
|
+
|
|
223
|
+
void fit_lms_line( //sort function
|
|
224
|
+
TO_ROW *row //row to fit
|
|
225
|
+
) {
|
|
226
|
+
float m, c; //fitted line
|
|
227
|
+
TBOX box; //blob box
|
|
228
|
+
LMS lms (row->blob_list ()->length ());
|
|
229
|
+
//blobs
|
|
230
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
|
231
|
+
|
|
232
|
+
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
|
|
233
|
+
box = blob_it.data ()->bounding_box ();
|
|
234
|
+
lms.add (FCOORD ((box.left () + box.right ()) / 2.0, box.bottom ()));
|
|
235
|
+
}
|
|
236
|
+
lms.fit (m, c);
|
|
237
|
+
row->set_line (m, c, lms.error ());
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
/**********************************************************************
|
|
242
|
+
* compute_page_skew
|
|
243
|
+
*
|
|
244
|
+
* Compute the skew over a full page by averaging the gradients over
|
|
245
|
+
* all the lines. Get the error of the same row.
|
|
246
|
+
**********************************************************************/
|
|
247
|
+
|
|
248
|
+
void compute_page_skew( //get average gradient
|
|
249
|
+
TO_BLOCK_LIST *blocks, //list of blocks
|
|
250
|
+
float &page_m, //average gradient
|
|
251
|
+
float &page_err //average error
|
|
252
|
+
) {
|
|
253
|
+
inT32 row_count; //total rows
|
|
254
|
+
inT32 blob_count; //total_blobs
|
|
255
|
+
inT32 row_err; //integer error
|
|
256
|
+
float *gradients; //of rows
|
|
257
|
+
float *errors; //of rows
|
|
258
|
+
inT32 row_index; //of total
|
|
259
|
+
TO_ROW *row; //current row
|
|
260
|
+
TO_BLOCK_IT block_it = blocks; //iterator
|
|
261
|
+
TO_ROW_IT row_it;
|
|
262
|
+
|
|
263
|
+
row_count = 0;
|
|
264
|
+
blob_count = 0;
|
|
265
|
+
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
|
266
|
+
block_it.forward ()) {
|
|
267
|
+
row_count += block_it.data ()->get_rows ()->length ();
|
|
268
|
+
//count up rows
|
|
269
|
+
row_it.set_to_list (block_it.data ()->get_rows ());
|
|
270
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
|
|
271
|
+
blob_count += row_it.data ()->blob_list ()->length ();
|
|
272
|
+
}
|
|
273
|
+
if (row_count == 0) {
|
|
274
|
+
page_m = 0.0f;
|
|
275
|
+
page_err = 0.0f;
|
|
276
|
+
return;
|
|
277
|
+
}
|
|
278
|
+
gradients = (float *) alloc_mem (blob_count * sizeof (float));
|
|
279
|
+
//get mem
|
|
280
|
+
errors = (float *) alloc_mem (blob_count * sizeof (float));
|
|
281
|
+
if (gradients == NULL || errors == NULL)
|
|
282
|
+
MEMORY_OUT.error ("compute_page_skew", ABORT, NULL);
|
|
283
|
+
|
|
284
|
+
row_index = 0;
|
|
285
|
+
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
|
286
|
+
block_it.forward ()) {
|
|
287
|
+
row_it.set_to_list (block_it.data ()->get_rows ());
|
|
288
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
289
|
+
row = row_it.data ();
|
|
290
|
+
blob_count = row->blob_list ()->length ();
|
|
291
|
+
row_err = (inT32) ceil (row->line_error ());
|
|
292
|
+
if (row_err <= 0)
|
|
293
|
+
row_err = 1;
|
|
294
|
+
if (textord_biased_skewcalc) {
|
|
295
|
+
blob_count /= row_err;
|
|
296
|
+
for (blob_count /= row_err; blob_count > 0; blob_count--) {
|
|
297
|
+
gradients[row_index] = row->line_m ();
|
|
298
|
+
errors[row_index] = row->line_error ();
|
|
299
|
+
row_index++;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
else if (blob_count >= textord_min_blobs_in_row) {
|
|
303
|
+
//get gradient
|
|
304
|
+
gradients[row_index] = row->line_m ();
|
|
305
|
+
errors[row_index] = row->line_error ();
|
|
306
|
+
row_index++;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
if (row_index == 0) {
|
|
311
|
+
//desperate
|
|
312
|
+
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
|
313
|
+
block_it.forward ()) {
|
|
314
|
+
row_it.set_to_list (block_it.data ()->get_rows ());
|
|
315
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
|
|
316
|
+
row_it.forward ()) {
|
|
317
|
+
row = row_it.data ();
|
|
318
|
+
gradients[row_index] = row->line_m ();
|
|
319
|
+
errors[row_index] = row->line_error ();
|
|
320
|
+
row_index++;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
row_count = row_index;
|
|
325
|
+
row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
|
|
326
|
+
gradients, row_count);
|
|
327
|
+
page_m = gradients[row_index];
|
|
328
|
+
row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
|
|
329
|
+
errors, row_count);
|
|
330
|
+
page_err = errors[row_index];
|
|
331
|
+
free_mem(gradients);
|
|
332
|
+
free_mem(errors);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const double kNoiseSize = 0.5; // Fraction of xheight.
|
|
336
|
+
const int kMinSize = 8; // Min pixels to be xheight.
|
|
337
|
+
|
|
338
|
+
// Return true if the dot looks like it is part of the i.
|
|
339
|
+
// Doesn't work for any other diacritical.
|
|
340
|
+
static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) {
|
|
341
|
+
const TBOX& ibox = i->bounding_box();
|
|
342
|
+
const TBOX& dotbox = dot->bounding_box();
|
|
343
|
+
|
|
344
|
+
// Must overlap horizontally by enough and be high enough.
|
|
345
|
+
int overlap = MIN(dotbox.right(), ibox.right()) -
|
|
346
|
+
MAX(dotbox.left(), ibox.left());
|
|
347
|
+
if (ibox.height() <= 2 * dotbox.height() ||
|
|
348
|
+
(overlap * 2 < ibox.width() && overlap < dotbox.width()))
|
|
349
|
+
return false;
|
|
350
|
+
|
|
351
|
+
// If the i is tall and thin then it is good.
|
|
352
|
+
if (ibox.height() > ibox.width() * 2)
|
|
353
|
+
return true; // The i or ! must be tall and thin.
|
|
354
|
+
|
|
355
|
+
// It might still be tall and thin, but it might be joined to something.
|
|
356
|
+
// So search the outline for a piece of large height close to the edges
|
|
357
|
+
// of the dot.
|
|
358
|
+
const double kHeightFraction = 0.6;
|
|
359
|
+
double target_height = MIN(dotbox.bottom(), ibox.top());
|
|
360
|
+
target_height -= row->line_m()*dotbox.left() + row->line_c();
|
|
361
|
+
target_height *= kHeightFraction;
|
|
362
|
+
int left_min = dotbox.left() - dotbox.width();
|
|
363
|
+
int middle = (dotbox.left() + dotbox.right())/2;
|
|
364
|
+
int right_max = dotbox.right() + dotbox.width();
|
|
365
|
+
int left_miny = 0;
|
|
366
|
+
int left_maxy = 0;
|
|
367
|
+
int right_miny = 0;
|
|
368
|
+
int right_maxy = 0;
|
|
369
|
+
bool found_left = false;
|
|
370
|
+
bool found_right = false;
|
|
371
|
+
bool in_left = false;
|
|
372
|
+
bool in_right = false;
|
|
373
|
+
C_BLOB* blob = i->cblob();
|
|
374
|
+
C_OUTLINE_IT o_it = blob->out_list();
|
|
375
|
+
for (o_it.mark_cycle_pt(); !o_it.cycled_list(); o_it.forward()) {
|
|
376
|
+
C_OUTLINE* outline = o_it.data();
|
|
377
|
+
int length = outline->pathlength();
|
|
378
|
+
ICOORD pos = outline->start_pos();
|
|
379
|
+
for (int step = 0; step < length; pos += outline->step(step++)) {
|
|
380
|
+
int x = pos.x();
|
|
381
|
+
int y = pos.y();
|
|
382
|
+
if (x >= left_min && x < middle && !found_left) {
|
|
383
|
+
// We are in the left part so find min and max y.
|
|
384
|
+
if (in_left) {
|
|
385
|
+
if (y > left_maxy) left_maxy = y;
|
|
386
|
+
if (y < left_miny) left_miny = y;
|
|
387
|
+
} else {
|
|
388
|
+
left_maxy = left_miny = y;
|
|
389
|
+
in_left = true;
|
|
390
|
+
}
|
|
391
|
+
} else if (in_left) {
|
|
392
|
+
// We just left the left so look for size.
|
|
393
|
+
if (left_maxy - left_miny > target_height) {
|
|
394
|
+
if (found_right)
|
|
395
|
+
return true;
|
|
396
|
+
found_left = true;
|
|
397
|
+
}
|
|
398
|
+
in_left = false;
|
|
399
|
+
}
|
|
400
|
+
if (x <= right_max && x > middle && !found_right) {
|
|
401
|
+
// We are in the right part so find min and max y.
|
|
402
|
+
if (in_right) {
|
|
403
|
+
if (y > right_maxy) right_maxy = y;
|
|
404
|
+
if (y < right_miny) right_miny = y;
|
|
405
|
+
} else {
|
|
406
|
+
right_maxy = right_miny = y;
|
|
407
|
+
in_right = true;
|
|
408
|
+
}
|
|
409
|
+
} else if (in_right) {
|
|
410
|
+
// We just left the right so look for size.
|
|
411
|
+
if (right_maxy - right_miny > target_height) {
|
|
412
|
+
if (found_left)
|
|
413
|
+
return true;
|
|
414
|
+
found_right = true;
|
|
415
|
+
}
|
|
416
|
+
in_right = false;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
return false;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
static void vigorous_noise_removal(TO_BLOCK* block) {
|
|
424
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
425
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
426
|
+
TO_ROW* row = row_it.data();
|
|
427
|
+
BLOBNBOX_IT b_it = row->blob_list();
|
|
428
|
+
// Estimate the xheight on the row.
|
|
429
|
+
int max_height = 0;
|
|
430
|
+
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
|
431
|
+
BLOBNBOX* blob = b_it.data();
|
|
432
|
+
if (blob->bounding_box().height() > max_height)
|
|
433
|
+
max_height = blob->bounding_box().height();
|
|
434
|
+
}
|
|
435
|
+
STATS hstats(0, max_height + 1);
|
|
436
|
+
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
|
437
|
+
BLOBNBOX* blob = b_it.data();
|
|
438
|
+
int height = blob->bounding_box().height();
|
|
439
|
+
if (height >= kMinSize)
|
|
440
|
+
hstats.add(blob->bounding_box().height(), 1);
|
|
441
|
+
}
|
|
442
|
+
float xheight = hstats.median();
|
|
443
|
+
// Delete small objects.
|
|
444
|
+
BLOBNBOX* prev = NULL;
|
|
445
|
+
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
|
446
|
+
BLOBNBOX* blob = b_it.data();
|
|
447
|
+
const TBOX& box = blob->bounding_box();
|
|
448
|
+
if (box.height() < kNoiseSize * xheight) {
|
|
449
|
+
// Small so delete unless it looks like an i dot.
|
|
450
|
+
if (prev != NULL) {
|
|
451
|
+
if (dot_of_i(blob, prev, row))
|
|
452
|
+
continue; // Looks OK.
|
|
453
|
+
}
|
|
454
|
+
if (!b_it.at_last()) {
|
|
455
|
+
BLOBNBOX* next = b_it.data_relative(1);
|
|
456
|
+
if (dot_of_i(blob, next, row))
|
|
457
|
+
continue; // Looks OK.
|
|
458
|
+
}
|
|
459
|
+
// It might be noise so get rid of it.
|
|
460
|
+
if (blob->blob() != NULL)
|
|
461
|
+
delete blob->blob();
|
|
462
|
+
if (blob->cblob() != NULL)
|
|
463
|
+
delete blob->cblob();
|
|
464
|
+
delete b_it.extract();
|
|
465
|
+
} else {
|
|
466
|
+
prev = blob;
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
/**********************************************************************
|
|
473
|
+
* cleanup_rows
|
|
474
|
+
*
|
|
475
|
+
* Remove overlapping rows and fit all the blobs to what's left.
|
|
476
|
+
**********************************************************************/
|
|
477
|
+
|
|
478
|
+
void cleanup_rows( //find lines
|
|
479
|
+
ICOORD page_tr, //top right
|
|
480
|
+
TO_BLOCK *block, //block to do
|
|
481
|
+
float gradient, //gradient to fit
|
|
482
|
+
FCOORD rotation, //for drawing
|
|
483
|
+
inT32 block_edge, //edge of block
|
|
484
|
+
BOOL8 testing_on //correct orientation
|
|
485
|
+
) {
|
|
486
|
+
//iterators
|
|
487
|
+
BLOBNBOX_IT blob_it = &block->blobs;
|
|
488
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
489
|
+
|
|
490
|
+
#ifndef GRAPHICS_DISABLED
|
|
491
|
+
if (textord_show_parallel_rows && testing_on) {
|
|
492
|
+
if (to_win == NULL)
|
|
493
|
+
create_to_win(page_tr);
|
|
494
|
+
}
|
|
495
|
+
#endif
|
|
496
|
+
//get row coords
|
|
497
|
+
fit_parallel_rows(block,
|
|
498
|
+
gradient,
|
|
499
|
+
rotation,
|
|
500
|
+
block_edge,
|
|
501
|
+
textord_show_parallel_rows &&testing_on);
|
|
502
|
+
delete_non_dropout_rows(block,
|
|
503
|
+
gradient,
|
|
504
|
+
rotation,
|
|
505
|
+
block_edge,
|
|
506
|
+
textord_show_parallel_rows &&testing_on);
|
|
507
|
+
expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
|
|
508
|
+
blob_it.set_to_list (&block->blobs);
|
|
509
|
+
row_it.set_to_list (block->get_rows ());
|
|
510
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
|
|
511
|
+
blob_it.add_list_after (row_it.data ()->blob_list ());
|
|
512
|
+
//give blobs back
|
|
513
|
+
assign_blobs_to_rows (block, &gradient, 1, FALSE, FALSE, FALSE);
|
|
514
|
+
//now new rows must be genuine
|
|
515
|
+
blob_it.set_to_list (&block->blobs);
|
|
516
|
+
blob_it.add_list_after (&block->large_blobs);
|
|
517
|
+
assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE);
|
|
518
|
+
//safe to use big ones now
|
|
519
|
+
blob_it.set_to_list (&block->blobs);
|
|
520
|
+
//throw all blobs in
|
|
521
|
+
blob_it.add_list_after (&block->noise_blobs);
|
|
522
|
+
blob_it.add_list_after (&block->small_blobs);
|
|
523
|
+
assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE);
|
|
524
|
+
//no rows for noise
|
|
525
|
+
row_it.set_to_list (block->get_rows ());
|
|
526
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
|
|
527
|
+
row_it.data ()->blob_list ()->sort (blob_x_order);
|
|
528
|
+
fit_parallel_rows(block, gradient, rotation, block_edge, FALSE);
|
|
529
|
+
if (textord_heavy_nr) {
|
|
530
|
+
vigorous_noise_removal(block);
|
|
531
|
+
}
|
|
532
|
+
separate_underlines(block, gradient, rotation, testing_on);
|
|
533
|
+
pre_associate_blobs(page_tr, block, rotation, testing_on);
|
|
534
|
+
|
|
535
|
+
#ifndef GRAPHICS_DISABLED
|
|
536
|
+
if (textord_show_final_rows && testing_on) {
|
|
537
|
+
if (to_win == NULL)
|
|
538
|
+
create_to_win(page_tr);
|
|
539
|
+
}
|
|
540
|
+
#endif
|
|
541
|
+
|
|
542
|
+
fit_parallel_rows(block, gradient, rotation, block_edge, FALSE);
|
|
543
|
+
// textord_show_final_rows && testing_on);
|
|
544
|
+
make_spline_rows(block,
|
|
545
|
+
gradient,
|
|
546
|
+
rotation,
|
|
547
|
+
block_edge,
|
|
548
|
+
textord_show_final_rows &&testing_on);
|
|
549
|
+
if (!textord_old_xheight || !textord_old_baselines)
|
|
550
|
+
compute_block_xheight(block, gradient);
|
|
551
|
+
if (textord_restore_underlines)
|
|
552
|
+
//fix underlines
|
|
553
|
+
restore_underlined_blobs(block);
|
|
554
|
+
#ifndef GRAPHICS_DISABLED
|
|
555
|
+
if (textord_show_final_rows && testing_on) {
|
|
556
|
+
plot_blob_list (to_win, &block->blobs,
|
|
557
|
+
ScrollView::MAGENTA, ScrollView::WHITE);
|
|
558
|
+
//show discarded blobs
|
|
559
|
+
plot_blob_list (to_win, &block->underlines,
|
|
560
|
+
ScrollView::YELLOW, ScrollView::CORAL);
|
|
561
|
+
}
|
|
562
|
+
if (textord_show_final_rows && testing_on && block->blobs.length () > 0)
|
|
563
|
+
tprintf ("%d blobs discarded as noise\n", block->blobs.length ());
|
|
564
|
+
if (textord_show_final_rows && testing_on) {
|
|
565
|
+
draw_meanlines(block, gradient, block_edge, ScrollView::WHITE, rotation);
|
|
566
|
+
}
|
|
567
|
+
#endif
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
/**********************************************************************
|
|
572
|
+
* delete_non_dropout_rows
|
|
573
|
+
*
|
|
574
|
+
* Compute the linespacing and offset.
|
|
575
|
+
**********************************************************************/
|
|
576
|
+
|
|
577
|
+
void delete_non_dropout_rows( //find lines
|
|
578
|
+
TO_BLOCK *block, //block to do
|
|
579
|
+
float gradient, //global skew
|
|
580
|
+
FCOORD rotation, //deskew vector
|
|
581
|
+
inT32 block_edge, //left edge
|
|
582
|
+
BOOL8 testing_on //correct orientation
|
|
583
|
+
) {
|
|
584
|
+
TBOX block_box; //deskewed block
|
|
585
|
+
inT32 *deltas; //change in occupation
|
|
586
|
+
inT32 *occupation; //of pixel coords
|
|
587
|
+
inT32 max_y; //in block
|
|
588
|
+
inT32 min_y;
|
|
589
|
+
inT32 line_index; //of scan line
|
|
590
|
+
inT32 line_count; //no of scan lines
|
|
591
|
+
inT32 distance; //to drop-out
|
|
592
|
+
inT32 xleft; //of block
|
|
593
|
+
inT32 ybottom; //of block
|
|
594
|
+
TO_ROW *row; //current row
|
|
595
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
596
|
+
BLOBNBOX_IT blob_it = &block->blobs;
|
|
597
|
+
|
|
598
|
+
if (row_it.length () == 0)
|
|
599
|
+
return; //empty block
|
|
600
|
+
block_box = deskew_block_coords (block, gradient);
|
|
601
|
+
xleft = block->block->bounding_box ().left ();
|
|
602
|
+
ybottom = block->block->bounding_box ().bottom ();
|
|
603
|
+
min_y = block_box.bottom () - 1;
|
|
604
|
+
max_y = block_box.top () + 1;
|
|
605
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
606
|
+
line_index = (inT32) floor (row_it.data ()->intercept ());
|
|
607
|
+
if (line_index <= min_y)
|
|
608
|
+
min_y = line_index - 1;
|
|
609
|
+
if (line_index >= max_y)
|
|
610
|
+
max_y = line_index + 1;
|
|
611
|
+
}
|
|
612
|
+
line_count = max_y - min_y + 1;
|
|
613
|
+
if (line_count <= 0)
|
|
614
|
+
return; //empty block
|
|
615
|
+
deltas = (inT32 *) alloc_mem (line_count * sizeof (inT32));
|
|
616
|
+
occupation = (inT32 *) alloc_mem (line_count * sizeof (inT32));
|
|
617
|
+
if (deltas == NULL || occupation == NULL)
|
|
618
|
+
MEMORY_OUT.error ("compute_line_spacing", ABORT, NULL);
|
|
619
|
+
|
|
620
|
+
compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas);
|
|
621
|
+
compute_occupation_threshold ((inT32)
|
|
622
|
+
ceil (block->line_spacing *
|
|
623
|
+
(textord_merge_desc +
|
|
624
|
+
textord_merge_asc)),
|
|
625
|
+
(inT32) ceil (block->line_spacing *
|
|
626
|
+
(textord_merge_x +
|
|
627
|
+
textord_merge_asc)),
|
|
628
|
+
max_y - min_y + 1, occupation, deltas);
|
|
629
|
+
#ifndef GRAPHICS_DISABLED
|
|
630
|
+
if (testing_on) {
|
|
631
|
+
draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas);
|
|
632
|
+
}
|
|
633
|
+
#endif
|
|
634
|
+
compute_dropout_distances(occupation, deltas, line_count);
|
|
635
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
636
|
+
row = row_it.data ();
|
|
637
|
+
line_index = (inT32) floor (row->intercept ());
|
|
638
|
+
distance = deltas[line_index - min_y];
|
|
639
|
+
if (find_best_dropout_row (row, distance, block->line_spacing / 2,
|
|
640
|
+
line_index, &row_it, testing_on)) {
|
|
641
|
+
#ifndef GRAPHICS_DISABLED
|
|
642
|
+
if (testing_on)
|
|
643
|
+
plot_parallel_row(row, gradient, block_edge,
|
|
644
|
+
ScrollView::WHITE, rotation);
|
|
645
|
+
#endif
|
|
646
|
+
blob_it.add_list_after (row_it.data ()->blob_list ());
|
|
647
|
+
delete row_it.extract (); //too far away
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
651
|
+
blob_it.add_list_after (row_it.data ()->blob_list ());
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
free_mem(deltas);
|
|
655
|
+
free_mem(occupation);
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
/**********************************************************************
|
|
660
|
+
* find_best_dropout_row
|
|
661
|
+
*
|
|
662
|
+
* Delete this row if it has a neighbour with better dropout characteristics.
|
|
663
|
+
* TRUE is returned if the row should be deleted.
|
|
664
|
+
**********************************************************************/
|
|
665
|
+
|
|
666
|
+
BOOL8 find_best_dropout_row( //find neighbours
|
|
667
|
+
TO_ROW *row, //row to test
|
|
668
|
+
inT32 distance, //dropout dist
|
|
669
|
+
float dist_limit, //threshold distance
|
|
670
|
+
inT32 line_index, //index of row
|
|
671
|
+
TO_ROW_IT *row_it, //current position
|
|
672
|
+
BOOL8 testing_on //correct orientation
|
|
673
|
+
) {
|
|
674
|
+
inT32 next_index; //of neigbouring row
|
|
675
|
+
inT32 row_offset; //from current row
|
|
676
|
+
inT32 abs_dist; //absolute distance
|
|
677
|
+
inT8 row_inc; //increment to row_index
|
|
678
|
+
TO_ROW *next_row; //nextious row
|
|
679
|
+
|
|
680
|
+
if (testing_on)
|
|
681
|
+
tprintf ("Row at %g(%g), dropout dist=%d,",
|
|
682
|
+
row->intercept (), row->parallel_c (), distance);
|
|
683
|
+
if (distance < 0) {
|
|
684
|
+
row_inc = 1;
|
|
685
|
+
abs_dist = -distance;
|
|
686
|
+
}
|
|
687
|
+
else {
|
|
688
|
+
row_inc = -1;
|
|
689
|
+
abs_dist = distance;
|
|
690
|
+
}
|
|
691
|
+
if (abs_dist > dist_limit) {
|
|
692
|
+
if (testing_on) {
|
|
693
|
+
tprintf (" too far - deleting\n");
|
|
694
|
+
}
|
|
695
|
+
return TRUE;
|
|
696
|
+
}
|
|
697
|
+
if ((distance < 0 && !row_it->at_last ())
|
|
698
|
+
|| (distance >= 0 && !row_it->at_first ())) {
|
|
699
|
+
row_offset = row_inc;
|
|
700
|
+
do {
|
|
701
|
+
next_row = row_it->data_relative (row_offset);
|
|
702
|
+
next_index = (inT32) floor (next_row->intercept ());
|
|
703
|
+
if ((distance < 0
|
|
704
|
+
&& next_index < line_index
|
|
705
|
+
&& next_index > line_index + distance + distance)
|
|
706
|
+
|| (distance >= 0
|
|
707
|
+
&& next_index > line_index
|
|
708
|
+
&& next_index < line_index + distance + distance)) {
|
|
709
|
+
if (testing_on) {
|
|
710
|
+
tprintf (" nearer neighbour (%d) at %g\n",
|
|
711
|
+
line_index + distance - next_index,
|
|
712
|
+
next_row->intercept ());
|
|
713
|
+
}
|
|
714
|
+
return TRUE; //other is nearer
|
|
715
|
+
}
|
|
716
|
+
else if (next_index == line_index
|
|
717
|
+
|| next_index == line_index + distance + distance) {
|
|
718
|
+
if (row->believability () <= next_row->believability ()) {
|
|
719
|
+
if (testing_on) {
|
|
720
|
+
tprintf (" equal but more believable at %g (%g/%g)\n",
|
|
721
|
+
next_row->intercept (),
|
|
722
|
+
row->believability (),
|
|
723
|
+
next_row->believability ());
|
|
724
|
+
}
|
|
725
|
+
return TRUE; //other is more believable
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
row_offset += row_inc;
|
|
729
|
+
}
|
|
730
|
+
while ((next_index == line_index
|
|
731
|
+
|| next_index == line_index + distance + distance)
|
|
732
|
+
&& row_offset < row_it->length ());
|
|
733
|
+
if (testing_on)
|
|
734
|
+
tprintf (" keeping\n");
|
|
735
|
+
}
|
|
736
|
+
return FALSE;
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
/**********************************************************************
|
|
741
|
+
* deskew_block_coords
|
|
742
|
+
*
|
|
743
|
+
* Compute the bounding box of all the blobs in the block
|
|
744
|
+
* if they were deskewed without actually doing it.
|
|
745
|
+
**********************************************************************/
|
|
746
|
+
|
|
747
|
+
TBOX deskew_block_coords( //block box
|
|
748
|
+
TO_BLOCK *block, //block to do
|
|
749
|
+
float gradient //global skew
|
|
750
|
+
) {
|
|
751
|
+
TBOX result; //block bounds
|
|
752
|
+
TBOX blob_box; //of block
|
|
753
|
+
FCOORD rotation; //deskew vector
|
|
754
|
+
float length; //of gradient vector
|
|
755
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
756
|
+
TO_ROW *row; //current row
|
|
757
|
+
BLOBNBOX *blob; //current blob
|
|
758
|
+
BLOBNBOX_IT blob_it; //iterator
|
|
759
|
+
|
|
760
|
+
length = sqrt (gradient * gradient + 1);
|
|
761
|
+
rotation = FCOORD (1 / length, -gradient / length);
|
|
762
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
763
|
+
row = row_it.data ();
|
|
764
|
+
blob_it.set_to_list (row->blob_list ());
|
|
765
|
+
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
|
|
766
|
+
blob_it.forward ()) {
|
|
767
|
+
blob = blob_it.data ();
|
|
768
|
+
blob_box = blob->bounding_box ();
|
|
769
|
+
blob_box.rotate (rotation);//de-skew it
|
|
770
|
+
result += blob_box;
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
return result;
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
/**********************************************************************
|
|
778
|
+
* compute_line_occupation
|
|
779
|
+
*
|
|
780
|
+
* Compute the pixel projection back on the y axis given the global
|
|
781
|
+
* skew. Also compute the 1st derivative.
|
|
782
|
+
**********************************************************************/
|
|
783
|
+
|
|
784
|
+
void compute_line_occupation( //project blobs
|
|
785
|
+
TO_BLOCK *block, //block to do
|
|
786
|
+
float gradient, //global skew
|
|
787
|
+
inT32 min_y, //min coord in block
|
|
788
|
+
inT32 max_y, //in block
|
|
789
|
+
inT32 *occupation, //output projection
|
|
790
|
+
inT32 *deltas //derivative
|
|
791
|
+
) {
|
|
792
|
+
inT32 line_count; //maxy-miny+1
|
|
793
|
+
inT32 line_index; //of scan line
|
|
794
|
+
int index; //array index for daft compilers
|
|
795
|
+
float top, bottom; //coords of blob
|
|
796
|
+
inT32 width; //of blob
|
|
797
|
+
TO_ROW *row; //current row
|
|
798
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
799
|
+
BLOBNBOX *blob; //current blob
|
|
800
|
+
BLOBNBOX_IT blob_it; //iterator
|
|
801
|
+
float length; //of skew vector
|
|
802
|
+
TBOX blob_box; //bounding box
|
|
803
|
+
FCOORD rotation; //inverse of skew
|
|
804
|
+
|
|
805
|
+
line_count = max_y - min_y + 1;
|
|
806
|
+
length = sqrt (gradient * gradient + 1);
|
|
807
|
+
rotation = FCOORD (1 / length, -gradient / length);
|
|
808
|
+
for (line_index = 0; line_index < line_count; line_index++)
|
|
809
|
+
deltas[line_index] = 0;
|
|
810
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
811
|
+
row = row_it.data ();
|
|
812
|
+
blob_it.set_to_list (row->blob_list ());
|
|
813
|
+
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
|
|
814
|
+
blob_it.forward ()) {
|
|
815
|
+
blob = blob_it.data ();
|
|
816
|
+
blob_box = blob->bounding_box ();
|
|
817
|
+
blob_box.rotate (rotation);//de-skew it
|
|
818
|
+
top = blob_box.top ();
|
|
819
|
+
bottom = blob_box.bottom ();
|
|
820
|
+
width =
|
|
821
|
+
(inT32) floor ((FLOAT32) (blob_box.right () - blob_box.left ()));
|
|
822
|
+
if ((inT32) floor (bottom) < min_y
|
|
823
|
+
|| (inT32) floor (bottom) - min_y >= line_count)
|
|
824
|
+
fprintf (stderr,
|
|
825
|
+
"Bad y coord of bottom, " INT32FORMAT "(" INT32FORMAT ","
|
|
826
|
+
INT32FORMAT ")\n", (inT32) floor (bottom), min_y, max_y);
|
|
827
|
+
//count transitions
|
|
828
|
+
index = (inT32) floor (bottom) - min_y;
|
|
829
|
+
deltas[index] += width;
|
|
830
|
+
if ((inT32) floor (top) < min_y
|
|
831
|
+
|| (inT32) floor (top) - min_y >= line_count)
|
|
832
|
+
fprintf (stderr,
|
|
833
|
+
"Bad y coord of top, " INT32FORMAT "(" INT32FORMAT ","
|
|
834
|
+
INT32FORMAT ")\n", (inT32) floor (top), min_y, max_y);
|
|
835
|
+
index = (inT32) floor (top) - min_y;
|
|
836
|
+
deltas[index] -= width;
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
occupation[0] = deltas[0];
|
|
840
|
+
for (line_index = 1; line_index < line_count; line_index++)
|
|
841
|
+
occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
|
|
845
|
+
/**********************************************************************
|
|
846
|
+
* compute_occupation_threshold
|
|
847
|
+
*
|
|
848
|
+
* Compute thresholds for textline or not for the occupation array.
|
|
849
|
+
**********************************************************************/
|
|
850
|
+
|
|
851
|
+
void compute_occupation_threshold( //project blobs
|
|
852
|
+
inT32 low_window, //below result point
|
|
853
|
+
inT32 high_window, //above result point
|
|
854
|
+
inT32 line_count, //array sizes
|
|
855
|
+
inT32 *occupation, //input projection
|
|
856
|
+
inT32 *thresholds //output thresholds
|
|
857
|
+
) {
|
|
858
|
+
inT32 line_index; //of thresholds line
|
|
859
|
+
inT32 low_index; //in occupation
|
|
860
|
+
inT32 high_index; //in occupation
|
|
861
|
+
inT32 sum; //current average
|
|
862
|
+
inT32 divisor; //to get thresholds
|
|
863
|
+
inT32 min_index; //of min occ
|
|
864
|
+
inT32 min_occ; //min in locality
|
|
865
|
+
inT32 test_index; //for finding min
|
|
866
|
+
|
|
867
|
+
divisor =
|
|
868
|
+
(inT32) ceil ((low_window + high_window) / textord_occupancy_threshold);
|
|
869
|
+
if (low_window + high_window < line_count) {
|
|
870
|
+
for (sum = 0, high_index = 0; high_index < low_window; high_index++)
|
|
871
|
+
sum += occupation[high_index];
|
|
872
|
+
for (low_index = 0; low_index < high_window; low_index++, high_index++)
|
|
873
|
+
sum += occupation[high_index];
|
|
874
|
+
min_occ = occupation[0];
|
|
875
|
+
min_index = 0;
|
|
876
|
+
for (test_index = 1; test_index < high_index; test_index++) {
|
|
877
|
+
if (occupation[test_index] <= min_occ) {
|
|
878
|
+
min_occ = occupation[test_index];
|
|
879
|
+
min_index = test_index; //find min in region
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
for (line_index = 0; line_index < low_window; line_index++)
|
|
883
|
+
thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
|
|
884
|
+
//same out to end
|
|
885
|
+
for (low_index = 0; high_index < line_count; low_index++, high_index++) {
|
|
886
|
+
sum -= occupation[low_index];
|
|
887
|
+
sum += occupation[high_index];
|
|
888
|
+
if (occupation[high_index] <= min_occ) {
|
|
889
|
+
//find min in region
|
|
890
|
+
min_occ = occupation[high_index];
|
|
891
|
+
min_index = high_index;
|
|
892
|
+
}
|
|
893
|
+
//lost min from region
|
|
894
|
+
if (min_index <= low_index) {
|
|
895
|
+
min_occ = occupation[low_index + 1];
|
|
896
|
+
min_index = low_index + 1;
|
|
897
|
+
for (test_index = low_index + 2; test_index <= high_index;
|
|
898
|
+
test_index++) {
|
|
899
|
+
if (occupation[test_index] <= min_occ) {
|
|
900
|
+
min_occ = occupation[test_index];
|
|
901
|
+
//find min in region
|
|
902
|
+
min_index = test_index;
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
else {
|
|
910
|
+
min_occ = occupation[0];
|
|
911
|
+
min_index = 0;
|
|
912
|
+
for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
|
|
913
|
+
if (occupation[low_index] < min_occ) {
|
|
914
|
+
min_occ = occupation[low_index];
|
|
915
|
+
min_index = low_index;
|
|
916
|
+
}
|
|
917
|
+
sum += occupation[low_index];
|
|
918
|
+
}
|
|
919
|
+
line_index = 0;
|
|
920
|
+
}
|
|
921
|
+
for (; line_index < line_count; line_index++)
|
|
922
|
+
thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
|
|
923
|
+
//same out to end
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
/**********************************************************************
|
|
928
|
+
* compute_dropout_distances
|
|
929
|
+
*
|
|
930
|
+
* Compute the distance from each coordinate to the nearest dropout.
|
|
931
|
+
**********************************************************************/
|
|
932
|
+
|
|
933
|
+
void compute_dropout_distances( //project blobs
|
|
934
|
+
inT32 *occupation, //input projection
|
|
935
|
+
inT32 *thresholds, //output thresholds
|
|
936
|
+
inT32 line_count //array sizes
|
|
937
|
+
) {
|
|
938
|
+
inT32 line_index; //of thresholds line
|
|
939
|
+
inT32 distance; //from prev dropout
|
|
940
|
+
inT32 next_dist; //to next dropout
|
|
941
|
+
inT32 back_index; //for back filling
|
|
942
|
+
inT32 prev_threshold; //before overwrite
|
|
943
|
+
|
|
944
|
+
distance = -line_count;
|
|
945
|
+
line_index = 0;
|
|
946
|
+
do {
|
|
947
|
+
do {
|
|
948
|
+
distance--;
|
|
949
|
+
prev_threshold = thresholds[line_index];
|
|
950
|
+
//distance from prev
|
|
951
|
+
thresholds[line_index] = distance;
|
|
952
|
+
line_index++;
|
|
953
|
+
}
|
|
954
|
+
while (line_index < line_count
|
|
955
|
+
&& (occupation[line_index] < thresholds[line_index]
|
|
956
|
+
|| occupation[line_index - 1] >= prev_threshold));
|
|
957
|
+
if (line_index < line_count) {
|
|
958
|
+
back_index = line_index - 1;
|
|
959
|
+
next_dist = 1;
|
|
960
|
+
while (next_dist < -distance && back_index >= 0) {
|
|
961
|
+
thresholds[back_index] = next_dist;
|
|
962
|
+
back_index--;
|
|
963
|
+
next_dist++;
|
|
964
|
+
distance++;
|
|
965
|
+
}
|
|
966
|
+
distance = 1;
|
|
967
|
+
}
|
|
968
|
+
}
|
|
969
|
+
while (line_index < line_count);
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
|
|
973
|
+
/**********************************************************************
|
|
974
|
+
* expand_rows
|
|
975
|
+
*
|
|
976
|
+
* Expand each row to the least of its allowed size and touching its
|
|
977
|
+
* neighbours. If the expansion would entirely swallow a neighbouring row
|
|
978
|
+
* then do so.
|
|
979
|
+
**********************************************************************/
|
|
980
|
+
|
|
981
|
+
void expand_rows( //find lines
|
|
982
|
+
ICOORD page_tr, //top right
|
|
983
|
+
TO_BLOCK *block, //block to do
|
|
984
|
+
float gradient, //gradient to fit
|
|
985
|
+
FCOORD rotation, //for drawing
|
|
986
|
+
inT32 block_edge, //edge of block
|
|
987
|
+
BOOL8 testing_on //correct orientation
|
|
988
|
+
) {
|
|
989
|
+
BOOL8 swallowed_row; //eaten a neighbour
|
|
990
|
+
float y_max, y_min; //new row limits
|
|
991
|
+
float y_bottom, y_top; //allowed limits
|
|
992
|
+
TO_ROW *test_row; //next row
|
|
993
|
+
TO_ROW *row; //current row
|
|
994
|
+
//iterators
|
|
995
|
+
BLOBNBOX_IT blob_it = &block->blobs;
|
|
996
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
997
|
+
|
|
998
|
+
#ifndef GRAPHICS_DISABLED
|
|
999
|
+
if (textord_show_expanded_rows && testing_on) {
|
|
1000
|
+
if (to_win == NULL)
|
|
1001
|
+
create_to_win(page_tr);
|
|
1002
|
+
}
|
|
1003
|
+
#endif
|
|
1004
|
+
|
|
1005
|
+
adjust_row_limits(block); //shift min,max.
|
|
1006
|
+
if (textord_new_initial_xheight) {
|
|
1007
|
+
if (block->get_rows ()->length () == 0)
|
|
1008
|
+
return;
|
|
1009
|
+
compute_row_stats(block, textord_show_expanded_rows &&testing_on);
|
|
1010
|
+
}
|
|
1011
|
+
assign_blobs_to_rows (block, &gradient, 4, TRUE, FALSE, FALSE);
|
|
1012
|
+
//get real membership
|
|
1013
|
+
if (block->get_rows ()->length () == 0)
|
|
1014
|
+
return;
|
|
1015
|
+
fit_parallel_rows(block,
|
|
1016
|
+
gradient,
|
|
1017
|
+
rotation,
|
|
1018
|
+
block_edge,
|
|
1019
|
+
textord_show_expanded_rows &&testing_on);
|
|
1020
|
+
if (!textord_new_initial_xheight)
|
|
1021
|
+
compute_row_stats(block, textord_show_expanded_rows &&testing_on);
|
|
1022
|
+
row_it.move_to_last ();
|
|
1023
|
+
do {
|
|
1024
|
+
row = row_it.data ();
|
|
1025
|
+
y_max = row->max_y (); //get current limits
|
|
1026
|
+
y_min = row->min_y ();
|
|
1027
|
+
y_bottom = row->intercept () - block->line_size * textord_expansion_factor *
|
|
1028
|
+
textord_merge_desc;
|
|
1029
|
+
y_top = row->intercept () + block->line_size * textord_expansion_factor *
|
|
1030
|
+
(textord_merge_x + textord_merge_asc);
|
|
1031
|
+
if (y_min > y_bottom) { //expansion allowed
|
|
1032
|
+
if (textord_show_expanded_rows && testing_on)
|
|
1033
|
+
tprintf("Expanding bottom of row at %f from %f to %f\n",
|
|
1034
|
+
row->intercept(), y_min, y_bottom);
|
|
1035
|
+
//expandable
|
|
1036
|
+
swallowed_row = TRUE;
|
|
1037
|
+
while (swallowed_row && !row_it.at_last ()) {
|
|
1038
|
+
swallowed_row = FALSE;
|
|
1039
|
+
//get next one
|
|
1040
|
+
test_row = row_it.data_relative (1);
|
|
1041
|
+
//overlaps space
|
|
1042
|
+
if (test_row->max_y () > y_bottom) {
|
|
1043
|
+
if (test_row->min_y () > y_bottom) {
|
|
1044
|
+
if (textord_show_expanded_rows && testing_on)
|
|
1045
|
+
tprintf("Eating row below at %f\n", test_row->intercept());
|
|
1046
|
+
row_it.forward ();
|
|
1047
|
+
#ifndef GRAPHICS_DISABLED
|
|
1048
|
+
if (textord_show_expanded_rows && testing_on)
|
|
1049
|
+
plot_parallel_row(test_row,
|
|
1050
|
+
gradient,
|
|
1051
|
+
block_edge,
|
|
1052
|
+
ScrollView::WHITE,
|
|
1053
|
+
rotation);
|
|
1054
|
+
#endif
|
|
1055
|
+
blob_it.set_to_list (row->blob_list ());
|
|
1056
|
+
blob_it.add_list_after (test_row->blob_list ());
|
|
1057
|
+
//swallow complete row
|
|
1058
|
+
delete row_it.extract ();
|
|
1059
|
+
row_it.backward ();
|
|
1060
|
+
swallowed_row = TRUE;
|
|
1061
|
+
}
|
|
1062
|
+
else if (test_row->max_y () < y_min) {
|
|
1063
|
+
//shorter limit
|
|
1064
|
+
y_bottom = test_row->max_y ();
|
|
1065
|
+
if (textord_show_expanded_rows && testing_on)
|
|
1066
|
+
tprintf("Truncating limit to %f due to touching row at %f\n",
|
|
1067
|
+
y_bottom, test_row->intercept());
|
|
1068
|
+
}
|
|
1069
|
+
else {
|
|
1070
|
+
y_bottom = y_min; //can't expand it
|
|
1071
|
+
if (textord_show_expanded_rows && testing_on)
|
|
1072
|
+
tprintf("Not expanding limit beyond %f due to touching row at %f\n",
|
|
1073
|
+
y_bottom, test_row->intercept());
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
y_min = y_bottom; //expand it
|
|
1078
|
+
}
|
|
1079
|
+
if (y_max < y_top) { //expansion allowed
|
|
1080
|
+
if (textord_show_expanded_rows && testing_on)
|
|
1081
|
+
tprintf("Expanding top of row at %f from %f to %f\n",
|
|
1082
|
+
row->intercept(), y_max, y_top);
|
|
1083
|
+
swallowed_row = TRUE;
|
|
1084
|
+
while (swallowed_row && !row_it.at_first ()) {
|
|
1085
|
+
swallowed_row = FALSE;
|
|
1086
|
+
//get one above
|
|
1087
|
+
test_row = row_it.data_relative (-1);
|
|
1088
|
+
if (test_row->min_y () < y_top) {
|
|
1089
|
+
if (test_row->max_y () < y_top) {
|
|
1090
|
+
if (textord_show_expanded_rows && testing_on)
|
|
1091
|
+
tprintf("Eating row above at %f\n", test_row->intercept());
|
|
1092
|
+
row_it.backward ();
|
|
1093
|
+
blob_it.set_to_list (row->blob_list ());
|
|
1094
|
+
#ifndef GRAPHICS_DISABLED
|
|
1095
|
+
if (textord_show_expanded_rows && testing_on)
|
|
1096
|
+
plot_parallel_row(test_row,
|
|
1097
|
+
gradient,
|
|
1098
|
+
block_edge,
|
|
1099
|
+
ScrollView::WHITE,
|
|
1100
|
+
rotation);
|
|
1101
|
+
#endif
|
|
1102
|
+
blob_it.add_list_after (test_row->blob_list ());
|
|
1103
|
+
//swallow complete row
|
|
1104
|
+
delete row_it.extract ();
|
|
1105
|
+
row_it.forward ();
|
|
1106
|
+
swallowed_row = TRUE;
|
|
1107
|
+
}
|
|
1108
|
+
else if (test_row->min_y () < y_max) {
|
|
1109
|
+
//shorter limit
|
|
1110
|
+
y_top = test_row->min_y ();
|
|
1111
|
+
if (textord_show_expanded_rows && testing_on)
|
|
1112
|
+
tprintf("Truncating limit to %f due to touching row at %f\n",
|
|
1113
|
+
y_top, test_row->intercept());
|
|
1114
|
+
}
|
|
1115
|
+
else {
|
|
1116
|
+
y_top = y_max; //can't expand it
|
|
1117
|
+
if (textord_show_expanded_rows && testing_on)
|
|
1118
|
+
tprintf("Not expanding limit beyond %f due to touching row at %f\n",
|
|
1119
|
+
y_top, test_row->intercept());
|
|
1120
|
+
}
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
y_max = y_top;
|
|
1124
|
+
}
|
|
1125
|
+
//new limits
|
|
1126
|
+
row->set_limits (y_min, y_max);
|
|
1127
|
+
row_it.backward ();
|
|
1128
|
+
}
|
|
1129
|
+
while (!row_it.at_last ());
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
|
|
1133
|
+
/**********************************************************************
|
|
1134
|
+
* adjust_row_limits
|
|
1135
|
+
*
|
|
1136
|
+
* Change the limits of rows to suit the default fractions.
|
|
1137
|
+
**********************************************************************/
|
|
1138
|
+
|
|
1139
|
+
void adjust_row_limits( //tidy limits
|
|
1140
|
+
TO_BLOCK *block //block to do
|
|
1141
|
+
) {
|
|
1142
|
+
TO_ROW *row; //current row
|
|
1143
|
+
float size; //size of row
|
|
1144
|
+
float ymax; //top of row
|
|
1145
|
+
float ymin; //bottom of row
|
|
1146
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
1147
|
+
|
|
1148
|
+
if (textord_show_expanded_rows)
|
|
1149
|
+
tprintf("Adjusting row limits for block(%d,%d)\n",
|
|
1150
|
+
block->block->bounding_box().left(),
|
|
1151
|
+
block->block->bounding_box().top());
|
|
1152
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
1153
|
+
row = row_it.data ();
|
|
1154
|
+
size = row->max_y () - row->min_y ();
|
|
1155
|
+
if (textord_show_expanded_rows)
|
|
1156
|
+
tprintf("Row at %f has min %f, max %f, size %f\n",
|
|
1157
|
+
row->intercept(), row->min_y(), row->max_y(), size);
|
|
1158
|
+
size /= textord_merge_x + textord_merge_asc + textord_merge_desc;
|
|
1159
|
+
ymax = size * (textord_merge_x + textord_merge_asc);
|
|
1160
|
+
ymin = -size * textord_merge_desc;
|
|
1161
|
+
row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
|
|
1162
|
+
row->merged = FALSE;
|
|
1163
|
+
}
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
|
|
1167
|
+
/**********************************************************************
|
|
1168
|
+
* compute_row_stats
|
|
1169
|
+
*
|
|
1170
|
+
* Compute the linespacing and offset.
|
|
1171
|
+
**********************************************************************/
|
|
1172
|
+
|
|
1173
|
+
void compute_row_stats( //find lines
|
|
1174
|
+
TO_BLOCK *block, //block to do
|
|
1175
|
+
BOOL8 testing_on //correct orientation
|
|
1176
|
+
) {
|
|
1177
|
+
inT32 row_index; //of median
|
|
1178
|
+
TO_ROW *row; //current row
|
|
1179
|
+
TO_ROW *prev_row; //previous row
|
|
1180
|
+
float iqr; //inter quartile range
|
|
1181
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
1182
|
+
//number of rows
|
|
1183
|
+
inT16 rowcount = row_it.length ();
|
|
1184
|
+
TO_ROW **rows; //for choose nth
|
|
1185
|
+
|
|
1186
|
+
rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
|
|
1187
|
+
if (rows == NULL)
|
|
1188
|
+
MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);
|
|
1189
|
+
rowcount = 0;
|
|
1190
|
+
prev_row = NULL;
|
|
1191
|
+
row_it.move_to_last (); //start at bottom
|
|
1192
|
+
do {
|
|
1193
|
+
row = row_it.data ();
|
|
1194
|
+
if (prev_row != NULL) {
|
|
1195
|
+
rows[rowcount++] = prev_row;
|
|
1196
|
+
prev_row->spacing = row->intercept () - prev_row->intercept ();
|
|
1197
|
+
if (testing_on)
|
|
1198
|
+
tprintf ("Row at %g yields spacing of %g\n",
|
|
1199
|
+
row->intercept (), prev_row->spacing);
|
|
1200
|
+
}
|
|
1201
|
+
prev_row = row;
|
|
1202
|
+
row_it.backward ();
|
|
1203
|
+
}
|
|
1204
|
+
while (!row_it.at_last ());
|
|
1205
|
+
block->key_row = prev_row;
|
|
1206
|
+
block->baseline_offset =
|
|
1207
|
+
fmod (prev_row->parallel_c (), block->line_spacing);
|
|
1208
|
+
if (testing_on)
|
|
1209
|
+
tprintf ("Blob based spacing=(%g,%g), offset=%g",
|
|
1210
|
+
block->line_size, block->line_spacing, block->baseline_offset);
|
|
1211
|
+
if (rowcount > 0) {
|
|
1212
|
+
row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount,
|
|
1213
|
+
sizeof (TO_ROW *), row_spacing_order);
|
|
1214
|
+
iqr = rows[row_index]->spacing;
|
|
1215
|
+
row_index = choose_nth_item (rowcount / 4, rows, rowcount,
|
|
1216
|
+
sizeof (TO_ROW *), row_spacing_order);
|
|
1217
|
+
iqr -= rows[row_index]->spacing;
|
|
1218
|
+
row_index = choose_nth_item (rowcount / 2, rows, rowcount,
|
|
1219
|
+
sizeof (TO_ROW *), row_spacing_order);
|
|
1220
|
+
block->key_row = rows[row_index];
|
|
1221
|
+
if (testing_on)
|
|
1222
|
+
tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
|
|
1223
|
+
if (rowcount > 2
|
|
1224
|
+
&& iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
|
|
1225
|
+
if (!textord_new_initial_xheight) {
|
|
1226
|
+
if (rows[row_index]->spacing < block->line_spacing
|
|
1227
|
+
&& rows[row_index]->spacing > block->line_size)
|
|
1228
|
+
//within range
|
|
1229
|
+
block->line_size = rows[row_index]->spacing;
|
|
1230
|
+
//spacing=size
|
|
1231
|
+
else if (rows[row_index]->spacing > block->line_spacing)
|
|
1232
|
+
block->line_size = block->line_spacing;
|
|
1233
|
+
//too big so use max
|
|
1234
|
+
}
|
|
1235
|
+
else {
|
|
1236
|
+
if (rows[row_index]->spacing < block->line_spacing)
|
|
1237
|
+
block->line_size = rows[row_index]->spacing;
|
|
1238
|
+
else
|
|
1239
|
+
block->line_size = block->line_spacing;
|
|
1240
|
+
//too big so use max
|
|
1241
|
+
}
|
|
1242
|
+
if (block->line_size < textord_min_xheight)
|
|
1243
|
+
block->line_size = (float) textord_min_xheight;
|
|
1244
|
+
block->line_spacing = rows[row_index]->spacing;
|
|
1245
|
+
block->max_blob_size =
|
|
1246
|
+
block->line_spacing * textord_excess_blobsize;
|
|
1247
|
+
}
|
|
1248
|
+
block->baseline_offset = fmod (rows[row_index]->intercept (),
|
|
1249
|
+
block->line_spacing);
|
|
1250
|
+
}
|
|
1251
|
+
if (testing_on)
|
|
1252
|
+
tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
|
|
1253
|
+
block->line_size, block->line_spacing, block->baseline_offset);
|
|
1254
|
+
free_mem(rows);
|
|
1255
|
+
}
|
|
1256
|
+
|
|
1257
|
+
|
|
1258
|
+
/**********************************************************************
|
|
1259
|
+
* compute_block_xheight
|
|
1260
|
+
*
|
|
1261
|
+
* Compute the xheight of the individual rows, then correlate them
|
|
1262
|
+
* and interpret ascenderless lines, correcting xheights.
|
|
1263
|
+
**********************************************************************/
|
|
1264
|
+
|
|
1265
|
+
void compute_block_xheight( //find lines
|
|
1266
|
+
TO_BLOCK *block, //block to do
|
|
1267
|
+
float gradient //global skew
|
|
1268
|
+
) {
|
|
1269
|
+
TO_ROW *row; //current row
|
|
1270
|
+
int xh_count, desc_count; //no of samples
|
|
1271
|
+
float block_median; //median blob size
|
|
1272
|
+
int asc_count, cap_count;
|
|
1273
|
+
inT32 min_size, max_size; //limits on xheight
|
|
1274
|
+
inT32 evidence; //no of samples on row
|
|
1275
|
+
float xh_sum, desc_sum; //for averages
|
|
1276
|
+
float asc_sum, cap_sum;
|
|
1277
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
1278
|
+
STATS row_heights; //block evidence
|
|
1279
|
+
|
|
1280
|
+
if (row_it.empty ())
|
|
1281
|
+
return; //no rows
|
|
1282
|
+
block_median = median_block_xheight (block, gradient);
|
|
1283
|
+
block_median *= 2;
|
|
1284
|
+
if (block_median < block->line_size)
|
|
1285
|
+
block_median = block->line_size;
|
|
1286
|
+
// tprintf("Block median=%g, linesize=%g\n",
|
|
1287
|
+
// block_median,block->line_size);
|
|
1288
|
+
max_size = (inT32) ceil (block_median);
|
|
1289
|
+
min_size = (inT32) floor (block_median * textord_minxh);
|
|
1290
|
+
row_heights.set_range (min_size, max_size + 1);
|
|
1291
|
+
xh_count = desc_count = asc_count = cap_count = 0;
|
|
1292
|
+
xh_sum = desc_sum = asc_sum = cap_sum = 0.0f;
|
|
1293
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
1294
|
+
row = row_it.data ();
|
|
1295
|
+
evidence = compute_row_xheight (row, min_size, max_size, gradient);
|
|
1296
|
+
if (row->xheight > 0 && row->ascrise > 0) {
|
|
1297
|
+
row_heights.add ((inT32) row->xheight, evidence);
|
|
1298
|
+
xh_count += evidence;
|
|
1299
|
+
asc_sum += row->ascrise;
|
|
1300
|
+
asc_count++;
|
|
1301
|
+
}
|
|
1302
|
+
else if (row->xheight > 0) {
|
|
1303
|
+
cap_sum += row->xheight; //assume just caps
|
|
1304
|
+
cap_count++;
|
|
1305
|
+
}
|
|
1306
|
+
if (row->descdrop != 0) {
|
|
1307
|
+
desc_sum += row->descdrop;
|
|
1308
|
+
desc_count++;
|
|
1309
|
+
}
|
|
1310
|
+
}
|
|
1311
|
+
if (xh_count > 0) {
|
|
1312
|
+
//median
|
|
1313
|
+
xh_sum = row_heights.ile (0.5);
|
|
1314
|
+
asc_sum /= asc_count;
|
|
1315
|
+
}
|
|
1316
|
+
else if (cap_count > 0) {
|
|
1317
|
+
cap_sum /= cap_count; //must assume caps
|
|
1318
|
+
xh_sum =
|
|
1319
|
+
cap_sum * textord_merge_x / (textord_merge_x + textord_merge_asc);
|
|
1320
|
+
asc_sum =
|
|
1321
|
+
cap_sum * textord_merge_asc / (textord_merge_x + textord_merge_asc);
|
|
1322
|
+
}
|
|
1323
|
+
else {
|
|
1324
|
+
//default sizes
|
|
1325
|
+
xh_sum = block_median * textord_merge_x;
|
|
1326
|
+
asc_sum = block_median * textord_merge_asc;
|
|
1327
|
+
}
|
|
1328
|
+
if (desc_count > 0) {
|
|
1329
|
+
desc_sum /= desc_count;
|
|
1330
|
+
}
|
|
1331
|
+
else {
|
|
1332
|
+
desc_sum = xh_sum * textord_merge_desc / textord_merge_x;
|
|
1333
|
+
}
|
|
1334
|
+
// tprintf("Block average x height=%g, count=%d, asc=%g/%d, desc=%g/%d,cap=%g/%d\n",
|
|
1335
|
+
// xh_sum,xh_count,asc_sum,asc_count,desc_sum,desc_count,
|
|
1336
|
+
// cap_sum,cap_count);
|
|
1337
|
+
if (xh_sum < textord_min_xheight)
|
|
1338
|
+
xh_sum = (float) textord_min_xheight;
|
|
1339
|
+
block->xheight = xh_sum;
|
|
1340
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
1341
|
+
correct_row_xheight (row_it.data (), xh_sum, asc_sum, desc_sum);
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
|
|
1345
|
+
|
|
1346
|
+
/**********************************************************************
|
|
1347
|
+
* median_block_xheight
|
|
1348
|
+
*
|
|
1349
|
+
* Compute the linespacing and offset.
|
|
1350
|
+
**********************************************************************/
|
|
1351
|
+
|
|
1352
|
+
float median_block_xheight( //find lines
|
|
1353
|
+
TO_BLOCK *block, //block to do
|
|
1354
|
+
float gradient //global skew
|
|
1355
|
+
) {
|
|
1356
|
+
TO_ROW *row; //current row
|
|
1357
|
+
float result; //output size
|
|
1358
|
+
float xcentre; //centre of blob
|
|
1359
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
1360
|
+
BLOBNBOX_IT blob_it;
|
|
1361
|
+
BLOBNBOX *blob; //current blob
|
|
1362
|
+
float *heights; //for choose nth
|
|
1363
|
+
inT32 blob_count; //blobs in block
|
|
1364
|
+
inT32 blob_index; //current blob
|
|
1365
|
+
|
|
1366
|
+
blob_count = 0;
|
|
1367
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
|
|
1368
|
+
blob_count += row_it.data ()->blob_list ()->length ();
|
|
1369
|
+
heights = (float *) alloc_mem (blob_count * sizeof (float));
|
|
1370
|
+
if (heights == NULL)
|
|
1371
|
+
MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);
|
|
1372
|
+
|
|
1373
|
+
blob_index = 0;
|
|
1374
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
1375
|
+
row = row_it.data ();
|
|
1376
|
+
blob_it.set_to_list (row->blob_list ());
|
|
1377
|
+
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
|
|
1378
|
+
blob_it.forward ()) {
|
|
1379
|
+
blob = blob_it.data ();
|
|
1380
|
+
if (!blob->joined_to_prev ()) {
|
|
1381
|
+
xcentre =
|
|
1382
|
+
(blob->bounding_box ().left () +
|
|
1383
|
+
blob->bounding_box ().right ()) / 2.0f;
|
|
1384
|
+
heights[blob_index] =
|
|
1385
|
+
blob->bounding_box ().top () - gradient * xcentre -
|
|
1386
|
+
row->parallel_c ();
|
|
1387
|
+
if (heights[blob_index] > 0)
|
|
1388
|
+
blob_index++;
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1391
|
+
}
|
|
1392
|
+
ASSERT_HOST (blob_index > 0); //dont expect 0
|
|
1393
|
+
blob_count = blob_index;
|
|
1394
|
+
blob_index = choose_nth_item (blob_count / 2, heights, blob_count);
|
|
1395
|
+
result = heights[blob_index];
|
|
1396
|
+
free_mem(heights);
|
|
1397
|
+
return result;
|
|
1398
|
+
}
|
|
1399
|
+
|
|
1400
|
+
|
|
1401
|
+
/**********************************************************************
|
|
1402
|
+
* compute_row_xheight
|
|
1403
|
+
*
|
|
1404
|
+
* Estimate the xheight of this row.
|
|
1405
|
+
* Compute the ascender rise and descender drop at the same time.
|
|
1406
|
+
**********************************************************************/
|
|
1407
|
+
|
|
1408
|
+
inT32 compute_row_xheight( //find lines
|
|
1409
|
+
TO_ROW *row, //row to do
|
|
1410
|
+
inT32 min_height, //min xheight
|
|
1411
|
+
inT32 max_height, //max xheight
|
|
1412
|
+
float gradient //global skew
|
|
1413
|
+
) {
|
|
1414
|
+
BOOL8 in_best_pile; //control of mode size
|
|
1415
|
+
inT32 prev_size; //previous size
|
|
1416
|
+
float xcentre; //centre of blob
|
|
1417
|
+
float height; //height of blob
|
|
1418
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
|
1419
|
+
BLOBNBOX *blob; //current blob
|
|
1420
|
+
inT32 blob_count; //blobs in block
|
|
1421
|
+
inT32 x; //xheight index
|
|
1422
|
+
inT32 asc; //ascender index
|
|
1423
|
+
inT32 blob_index; //current blob
|
|
1424
|
+
inT32 mode_count; //no of modes
|
|
1425
|
+
inT32 best_count; //count of best x so far
|
|
1426
|
+
float ratio; //size ratio
|
|
1427
|
+
inT32 modes[MAX_HEIGHT_MODES]; //biggest piles
|
|
1428
|
+
STATS heights (min_height, max_height + 1);
|
|
1429
|
+
|
|
1430
|
+
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
|
|
1431
|
+
blob = blob_it.data ();
|
|
1432
|
+
if (!blob->joined_to_prev ()) {
|
|
1433
|
+
xcentre =
|
|
1434
|
+
(blob->bounding_box ().left () +
|
|
1435
|
+
blob->bounding_box ().right ()) / 2.0f;
|
|
1436
|
+
height = blob->bounding_box ().top ();
|
|
1437
|
+
if (textord_fix_xheight_bug)
|
|
1438
|
+
height -= row->baseline.y (xcentre);
|
|
1439
|
+
else
|
|
1440
|
+
height -= gradient * xcentre + row->parallel_c ();
|
|
1441
|
+
if (height >= min_height && height <= max_height
|
|
1442
|
+
&& (!textord_xheight_tweak || height > textord_min_xheight))
|
|
1443
|
+
heights.add ((inT32) floor (height + 0.5), 1);
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
blob_index = heights.mode (); //find mode
|
|
1447
|
+
//get count of mode
|
|
1448
|
+
blob_count = heights.pile_count (blob_index);
|
|
1449
|
+
if (textord_debug_xheights)
|
|
1450
|
+
tprintf ("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d,%d\n",
|
|
1451
|
+
min_height, max_height, blob_index, blob_count,
|
|
1452
|
+
heights.get_total (), row->blob_list ()->length ());
|
|
1453
|
+
row->ascrise = 0.0f;
|
|
1454
|
+
row->xheight = 0.0f;
|
|
1455
|
+
row->descdrop = 0.0f; //undefined;
|
|
1456
|
+
in_best_pile = FALSE;
|
|
1457
|
+
prev_size = -MAX_INT32;
|
|
1458
|
+
best_count = 0;
|
|
1459
|
+
if (blob_count > 0) {
|
|
1460
|
+
//get biggest ones
|
|
1461
|
+
mode_count = compute_height_modes (&heights, min_height, max_height, modes, MAX_HEIGHT_MODES);
|
|
1462
|
+
for (x = 0; x < mode_count - 1; x++) {
|
|
1463
|
+
if (modes[x] != prev_size + 1)
|
|
1464
|
+
in_best_pile = FALSE; //had empty height
|
|
1465
|
+
if (heights.pile_count (modes[x])
|
|
1466
|
+
>= blob_count * textord_xheight_mode_fraction
|
|
1467
|
+
&& (in_best_pile || heights.pile_count (modes[x]) > best_count)) {
|
|
1468
|
+
for (asc = x + 1; asc < mode_count; asc++) {
|
|
1469
|
+
ratio = (float) modes[asc] / modes[x];
|
|
1470
|
+
if (textord_ascx_ratio_min < ratio
|
|
1471
|
+
&& ratio < textord_ascx_ratio_max
|
|
1472
|
+
&& heights.pile_count (modes[asc])
|
|
1473
|
+
>= blob_count * textord_ascheight_mode_fraction) {
|
|
1474
|
+
if (heights.pile_count (modes[x]) > best_count) {
|
|
1475
|
+
in_best_pile = TRUE;
|
|
1476
|
+
best_count = heights.pile_count (modes[x]);
|
|
1477
|
+
}
|
|
1478
|
+
// tprintf("X=%d, asc=%d, count=%d, ratio=%g\n",
|
|
1479
|
+
// modes[x],modes[asc]-modes[x],
|
|
1480
|
+
// heights.pile_count(modes[x]),
|
|
1481
|
+
// ratio);
|
|
1482
|
+
prev_size = modes[x];
|
|
1483
|
+
row->xheight = (float) modes[x];
|
|
1484
|
+
row->ascrise = (float) (modes[asc] - modes[x]);
|
|
1485
|
+
}
|
|
1486
|
+
}
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
if (row->xheight == 0) {
|
|
1490
|
+
//single mode
|
|
1491
|
+
row->xheight = (float) blob_index;
|
|
1492
|
+
row->ascrise = 0.0f;
|
|
1493
|
+
if (textord_debug_xheights)
|
|
1494
|
+
tprintf ("Single mode xheight set to %g\n", row->xheight);
|
|
1495
|
+
}
|
|
1496
|
+
else if (textord_debug_xheights)
|
|
1497
|
+
tprintf ("Multi-mode xheight set to %g, asc=%g\n",
|
|
1498
|
+
row->xheight, row->ascrise);
|
|
1499
|
+
row->descdrop = (float) compute_row_descdrop (row, gradient);
|
|
1500
|
+
//find descenders
|
|
1501
|
+
}
|
|
1502
|
+
return best_count;
|
|
1503
|
+
}
|
|
1504
|
+
|
|
1505
|
+
|
|
1506
|
+
/**********************************************************************
|
|
1507
|
+
* compute_row_descdrop
|
|
1508
|
+
*
|
|
1509
|
+
* Estimate the descdrop of this row.
|
|
1510
|
+
**********************************************************************/
|
|
1511
|
+
|
|
1512
|
+
inT32 compute_row_descdrop( //find lines
|
|
1513
|
+
TO_ROW *row, //row to do
|
|
1514
|
+
float gradient //global skew
|
|
1515
|
+
) {
|
|
1516
|
+
inT32 min_height = (inT32) floor (row->xheight * textord_descx_ratio_min);
|
|
1517
|
+
inT32 max_height = (inT32) floor (row->xheight * textord_descx_ratio_max);
|
|
1518
|
+
float xcentre; //centre of blob
|
|
1519
|
+
float height; //height of blob
|
|
1520
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
|
1521
|
+
BLOBNBOX *blob; //current blob
|
|
1522
|
+
inT32 blob_count; //blobs in block
|
|
1523
|
+
inT32 blob_index; //current blob
|
|
1524
|
+
STATS heights (min_height, max_height + 1);
|
|
1525
|
+
|
|
1526
|
+
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
|
|
1527
|
+
blob = blob_it.data ();
|
|
1528
|
+
if (!blob->joined_to_prev ()) {
|
|
1529
|
+
xcentre =
|
|
1530
|
+
(blob->bounding_box ().left () +
|
|
1531
|
+
blob->bounding_box ().right ()) / 2.0f;
|
|
1532
|
+
height =
|
|
1533
|
+
gradient * xcentre + row->parallel_c () -
|
|
1534
|
+
blob->bounding_box ().bottom ();
|
|
1535
|
+
if (height >= min_height && height <= max_height)
|
|
1536
|
+
heights.add ((inT32) floor (height + 0.5), 1);
|
|
1537
|
+
}
|
|
1538
|
+
}
|
|
1539
|
+
blob_index = heights.mode (); //find mode
|
|
1540
|
+
//get count of mode
|
|
1541
|
+
blob_count = heights.pile_count (blob_index);
|
|
1542
|
+
return blob_count > 0 ? -blob_index : 0;
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1545
|
+
|
|
1546
|
+
/**********************************************************************
|
|
1547
|
+
* compute_height_modes
|
|
1548
|
+
*
|
|
1549
|
+
* Find the top maxmodes values in the input array and put their
|
|
1550
|
+
* indices in the output in the order in which they occurred.
|
|
1551
|
+
**********************************************************************/
|
|
1552
|
+
|
|
1553
|
+
inT32 compute_height_modes( //find lines
|
|
1554
|
+
STATS *heights, //stats to search
|
|
1555
|
+
inT32 min_height, //bottom of range
|
|
1556
|
+
inT32 max_height, //top of range
|
|
1557
|
+
inT32 *modes, //output array
|
|
1558
|
+
inT32 maxmodes //size of modes
|
|
1559
|
+
) {
|
|
1560
|
+
inT32 pile_count; //no in source pile
|
|
1561
|
+
inT32 src_count; //no of source entries
|
|
1562
|
+
inT32 src_index; //current entry
|
|
1563
|
+
inT32 least_count; //height of smalllest
|
|
1564
|
+
inT32 least_index; //index of least
|
|
1565
|
+
inT32 dest_count; //index in modes
|
|
1566
|
+
|
|
1567
|
+
src_count = max_height + 1 - min_height;
|
|
1568
|
+
dest_count = 0;
|
|
1569
|
+
least_count = MAX_INT32;
|
|
1570
|
+
least_index = -1;
|
|
1571
|
+
for (src_index = 0; src_index < src_count; src_index++) {
|
|
1572
|
+
pile_count = heights->pile_count (min_height + src_index);
|
|
1573
|
+
if (pile_count > 0) {
|
|
1574
|
+
if (dest_count < maxmodes) {
|
|
1575
|
+
if (pile_count < least_count) {
|
|
1576
|
+
//find smallest in array
|
|
1577
|
+
least_count = pile_count;
|
|
1578
|
+
least_index = dest_count;
|
|
1579
|
+
}
|
|
1580
|
+
modes[dest_count++] = min_height + src_index;
|
|
1581
|
+
}
|
|
1582
|
+
else if (pile_count >= least_count) {
|
|
1583
|
+
while (least_index < maxmodes - 1) {
|
|
1584
|
+
modes[least_index] = modes[least_index + 1];
|
|
1585
|
+
//shuffle up
|
|
1586
|
+
least_index++;
|
|
1587
|
+
}
|
|
1588
|
+
//new one on end
|
|
1589
|
+
modes[maxmodes - 1] = min_height + src_index;
|
|
1590
|
+
if (pile_count == least_count) {
|
|
1591
|
+
//new smallest
|
|
1592
|
+
least_index = maxmodes - 1;
|
|
1593
|
+
}
|
|
1594
|
+
else {
|
|
1595
|
+
least_count = heights->pile_count (modes[0]);
|
|
1596
|
+
least_index = 0;
|
|
1597
|
+
for (dest_count = 1; dest_count < maxmodes; dest_count++) {
|
|
1598
|
+
pile_count = heights->pile_count (modes[dest_count]);
|
|
1599
|
+
if (pile_count < least_count) {
|
|
1600
|
+
//find smallest
|
|
1601
|
+
least_count = pile_count;
|
|
1602
|
+
least_index = dest_count;
|
|
1603
|
+
}
|
|
1604
|
+
}
|
|
1605
|
+
}
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
return dest_count;
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
|
|
1613
|
+
/**********************************************************************
|
|
1614
|
+
* correct_row_xheight
|
|
1615
|
+
*
|
|
1616
|
+
* Adjust the xheight etc of this row if not within reasonable limits
|
|
1617
|
+
* of the average for the block.
|
|
1618
|
+
**********************************************************************/
|
|
1619
|
+
|
|
1620
|
+
void correct_row_xheight( //fix bad values
|
|
1621
|
+
TO_ROW *row, //row to fix
|
|
1622
|
+
float xheight, //average values
|
|
1623
|
+
float ascrise,
|
|
1624
|
+
float descdrop) {
|
|
1625
|
+
if (textord_row_xheights) {
|
|
1626
|
+
if (row->xheight <= 0)
|
|
1627
|
+
row->xheight = xheight;
|
|
1628
|
+
if (row->ascrise < row->xheight * (textord_ascx_ratio_min - 1)) {
|
|
1629
|
+
if (row->xheight >= xheight * (1 - textord_xheight_error_margin)
|
|
1630
|
+
&& row->xheight <= xheight * (1 + textord_xheight_error_margin)) {
|
|
1631
|
+
row->all_caps = FALSE;
|
|
1632
|
+
row->ascrise = ascrise;
|
|
1633
|
+
}
|
|
1634
|
+
else if (row->xheight >=
|
|
1635
|
+
(xheight + ascrise) * (1 - textord_xheight_error_margin)
|
|
1636
|
+
&& row->xheight <=
|
|
1637
|
+
(xheight + ascrise) * (1 + textord_xheight_error_margin)) {
|
|
1638
|
+
row->all_caps = TRUE;
|
|
1639
|
+
//it was caps
|
|
1640
|
+
row->ascrise = row->xheight - xheight;
|
|
1641
|
+
row->xheight = xheight;
|
|
1642
|
+
}
|
|
1643
|
+
else {
|
|
1644
|
+
row->all_caps = TRUE;
|
|
1645
|
+
row->ascrise = row->xheight * ascrise / (xheight + ascrise);
|
|
1646
|
+
row->xheight -= row->ascrise;
|
|
1647
|
+
}
|
|
1648
|
+
}
|
|
1649
|
+
else
|
|
1650
|
+
row->all_caps = FALSE;
|
|
1651
|
+
row->ascrise = ascrise;
|
|
1652
|
+
if (row->descdrop >= -row->xheight * (textord_ascx_ratio_min - 1))
|
|
1653
|
+
row->descdrop = descdrop;
|
|
1654
|
+
}
|
|
1655
|
+
else {
|
|
1656
|
+
if (row->xheight < xheight * (1 - textord_xheight_error_margin)
|
|
1657
|
+
|| row->xheight > xheight * (1 + textord_xheight_error_margin))
|
|
1658
|
+
row->xheight = xheight; //set to average
|
|
1659
|
+
row->all_caps = row->ascrise <= 0;
|
|
1660
|
+
if (row->ascrise < ascrise * (1 - textord_xheight_error_margin)
|
|
1661
|
+
|| row->ascrise > ascrise * (1 + textord_xheight_error_margin))
|
|
1662
|
+
row->ascrise = ascrise; //set to average
|
|
1663
|
+
if (row->descdrop < descdrop * (1 - textord_xheight_error_margin)
|
|
1664
|
+
|| row->descdrop > descdrop * (1 + textord_xheight_error_margin))
|
|
1665
|
+
row->descdrop = descdrop; //set to average
|
|
1666
|
+
}
|
|
1667
|
+
}
|
|
1668
|
+
|
|
1669
|
+
|
|
1670
|
+
/**********************************************************************
|
|
1671
|
+
* separate_underlines
|
|
1672
|
+
*
|
|
1673
|
+
* Test wide objects for being potential underlines. If they are then
|
|
1674
|
+
* put them in a separate list in the block.
|
|
1675
|
+
**********************************************************************/
|
|
1676
|
+
|
|
1677
|
+
void separate_underlines( //make rough chars
|
|
1678
|
+
TO_BLOCK *block, //block to do
|
|
1679
|
+
float gradient, //skew angle
|
|
1680
|
+
FCOORD rotation, //inverse landscape
|
|
1681
|
+
BOOL8 testing_on //correct orientation
|
|
1682
|
+
) {
|
|
1683
|
+
BLOBNBOX *blob; //current blob
|
|
1684
|
+
PBLOB *poly_blob; //rotated blob
|
|
1685
|
+
C_BLOB *rotated_blob; //rotated blob
|
|
1686
|
+
TO_ROW *row; //current row
|
|
1687
|
+
float length; //of g_vec
|
|
1688
|
+
TBOX blob_box;
|
|
1689
|
+
FCOORD blob_rotation; //inverse of rotation
|
|
1690
|
+
FCOORD g_vec; //skew rotation
|
|
1691
|
+
BLOBNBOX_IT blob_it; //iterator
|
|
1692
|
+
//iterator
|
|
1693
|
+
BLOBNBOX_IT under_it = &block->underlines;
|
|
1694
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
1695
|
+
|
|
1696
|
+
//length of vector
|
|
1697
|
+
length = sqrt (1 + gradient * gradient);
|
|
1698
|
+
g_vec = FCOORD (1 / length, -gradient / length);
|
|
1699
|
+
blob_rotation = FCOORD (rotation.x (), -rotation.y ());
|
|
1700
|
+
blob_rotation.rotate (g_vec); //unoding everything
|
|
1701
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
1702
|
+
row = row_it.data ();
|
|
1703
|
+
//get blobs
|
|
1704
|
+
blob_it.set_to_list (row->blob_list ());
|
|
1705
|
+
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
|
|
1706
|
+
blob_it.forward ()) {
|
|
1707
|
+
blob = blob_it.data ();
|
|
1708
|
+
blob_box = blob->bounding_box ();
|
|
1709
|
+
if (blob_box.width () > block->line_size * textord_underline_width) {
|
|
1710
|
+
if (textord_cblob_blockocc && blob->cblob () != NULL) {
|
|
1711
|
+
rotated_blob = crotate_cblob (blob->cblob (),
|
|
1712
|
+
blob_rotation);
|
|
1713
|
+
if (test_underline (testing_on && textord_show_final_rows,
|
|
1714
|
+
rotated_blob, (inT16) row->intercept (),
|
|
1715
|
+
(inT16) (block->line_size *
|
|
1716
|
+
(textord_merge_x +
|
|
1717
|
+
textord_merge_asc / 2.0f)))) {
|
|
1718
|
+
under_it.add_after_then_move (blob_it.extract ());
|
|
1719
|
+
if (testing_on && textord_show_final_rows) {
|
|
1720
|
+
tprintf ("Underlined blob at (%d,%d)->(%d,%d) ",
|
|
1721
|
+
rotated_blob->bounding_box ().left (),
|
|
1722
|
+
rotated_blob->bounding_box ().bottom (),
|
|
1723
|
+
rotated_blob->bounding_box ().right (),
|
|
1724
|
+
rotated_blob->bounding_box ().top ());
|
|
1725
|
+
tprintf ("(Was (%d,%d)->(%d,%d))\n",
|
|
1726
|
+
blob_box.left (), blob_box.bottom (),
|
|
1727
|
+
blob_box.right (), blob_box.top ());
|
|
1728
|
+
}
|
|
1729
|
+
}
|
|
1730
|
+
delete rotated_blob;
|
|
1731
|
+
}
|
|
1732
|
+
else {
|
|
1733
|
+
if (blob->blob () != NULL) {
|
|
1734
|
+
// if (testing_on && textord_show_final_rows)
|
|
1735
|
+
// tprintf("Rotating by (%g,%g)\n",
|
|
1736
|
+
// blob_rotation.x(),blob_rotation.y());
|
|
1737
|
+
poly_blob = rotate_blob (blob->blob (), blob_rotation);
|
|
1738
|
+
}
|
|
1739
|
+
else
|
|
1740
|
+
poly_blob = rotate_cblob (blob->cblob (),
|
|
1741
|
+
block->line_size,
|
|
1742
|
+
blob_rotation);
|
|
1743
|
+
if (test_underline
|
|
1744
|
+
(testing_on
|
|
1745
|
+
&& textord_show_final_rows, poly_blob,
|
|
1746
|
+
row->intercept (),
|
|
1747
|
+
block->line_size * (textord_merge_x +
|
|
1748
|
+
textord_merge_asc / 2))) {
|
|
1749
|
+
if (testing_on && textord_show_final_rows) {
|
|
1750
|
+
tprintf ("Underlined blob at (%d,%d)->(%d,%d) ",
|
|
1751
|
+
poly_blob->bounding_box ().left (),
|
|
1752
|
+
poly_blob->bounding_box ().bottom (),
|
|
1753
|
+
poly_blob->bounding_box ().right (),
|
|
1754
|
+
poly_blob->bounding_box ().top ());
|
|
1755
|
+
tprintf ("(Was (%d,%d)->(%d,%d))\n",
|
|
1756
|
+
blob_box.left (), blob_box.bottom (),
|
|
1757
|
+
blob_box.right (), blob_box.top ());
|
|
1758
|
+
}
|
|
1759
|
+
under_it.add_after_then_move (blob_it.extract ());
|
|
1760
|
+
}
|
|
1761
|
+
delete poly_blob;
|
|
1762
|
+
}
|
|
1763
|
+
}
|
|
1764
|
+
}
|
|
1765
|
+
}
|
|
1766
|
+
}
|
|
1767
|
+
|
|
1768
|
+
|
|
1769
|
+
/**********************************************************************
|
|
1770
|
+
* pre_associate_blobs
|
|
1771
|
+
*
|
|
1772
|
+
* Associate overlapping blobs and fake chop wide blobs.
|
|
1773
|
+
**********************************************************************/
|
|
1774
|
+
|
|
1775
|
+
void pre_associate_blobs( //make rough chars
|
|
1776
|
+
ICOORD page_tr, //top right
|
|
1777
|
+
TO_BLOCK *block, //block to do
|
|
1778
|
+
FCOORD rotation, //inverse landscape
|
|
1779
|
+
BOOL8 testing_on //correct orientation
|
|
1780
|
+
) {
|
|
1781
|
+
#ifndef GRAPHICS_DISABLED
|
|
1782
|
+
ScrollView::Color colour; //of boxes
|
|
1783
|
+
#endif
|
|
1784
|
+
inT16 overlap; //of adjacent boxes
|
|
1785
|
+
BLOBNBOX *blob; //current blob
|
|
1786
|
+
BLOBNBOX *nextblob; //next in list
|
|
1787
|
+
TBOX blob_box;
|
|
1788
|
+
TBOX next_box; //next blob
|
|
1789
|
+
FCOORD blob_rotation; //inverse of rotation
|
|
1790
|
+
BLOBNBOX_IT blob_it; //iterator
|
|
1791
|
+
BLOBNBOX_IT start_it; //iterator
|
|
1792
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
1793
|
+
|
|
1794
|
+
#ifndef GRAPHICS_DISABLED
|
|
1795
|
+
colour = ScrollView::RED;
|
|
1796
|
+
#endif
|
|
1797
|
+
|
|
1798
|
+
blob_rotation = FCOORD (rotation.x (), -rotation.y ());
|
|
1799
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
1800
|
+
//get blobs
|
|
1801
|
+
blob_it.set_to_list (row_it.data ()->blob_list ());
|
|
1802
|
+
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
|
|
1803
|
+
blob_it.forward ()) {
|
|
1804
|
+
blob = blob_it.data ();
|
|
1805
|
+
blob_box = blob->bounding_box ();
|
|
1806
|
+
start_it = blob_it; //save start point
|
|
1807
|
+
// if (testing_on && textord_show_final_blobs)
|
|
1808
|
+
// {
|
|
1809
|
+
// tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
|
|
1810
|
+
// blob_box.left(),blob_box.bottom(),
|
|
1811
|
+
// blob_box.right(),blob_box.top(),
|
|
1812
|
+
// (void*)blob,blob_it.length());
|
|
1813
|
+
// }
|
|
1814
|
+
do {
|
|
1815
|
+
if (!blob_it.at_last ()) {
|
|
1816
|
+
nextblob = blob_it.data_relative (1);
|
|
1817
|
+
next_box = nextblob->bounding_box ();
|
|
1818
|
+
overlap = next_box.width ();
|
|
1819
|
+
if (blob_box.left () > next_box.left ())
|
|
1820
|
+
overlap -= blob_box.left () - next_box.left ();
|
|
1821
|
+
if (blob_box.right () < next_box.right ())
|
|
1822
|
+
overlap -= next_box.right () - blob_box.right ();
|
|
1823
|
+
if (overlap >= next_box.width () / 2
|
|
1824
|
+
|| overlap >= blob_box.width () / 2) {
|
|
1825
|
+
//merge new blob
|
|
1826
|
+
blob->merge (nextblob);
|
|
1827
|
+
//get bigger box
|
|
1828
|
+
blob_box = blob->bounding_box ();
|
|
1829
|
+
blob_it.forward ();
|
|
1830
|
+
}
|
|
1831
|
+
else
|
|
1832
|
+
overlap = -1; //no overlap
|
|
1833
|
+
}
|
|
1834
|
+
else
|
|
1835
|
+
overlap = -1; //no overlap
|
|
1836
|
+
}
|
|
1837
|
+
while (overlap >= 0);
|
|
1838
|
+
blob->chop (&start_it, &blob_it,
|
|
1839
|
+
blob_rotation,
|
|
1840
|
+
block->line_size * textord_merge_x *
|
|
1841
|
+
textord_chop_width);
|
|
1842
|
+
//attempt chop
|
|
1843
|
+
}
|
|
1844
|
+
#ifndef GRAPHICS_DISABLED
|
|
1845
|
+
if (testing_on && textord_show_final_blobs) {
|
|
1846
|
+
if (to_win == NULL)
|
|
1847
|
+
create_to_win(page_tr);
|
|
1848
|
+
to_win->Pen(colour);
|
|
1849
|
+
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
|
|
1850
|
+
blob_it.forward ()) {
|
|
1851
|
+
blob = blob_it.data ();
|
|
1852
|
+
blob_box = blob->bounding_box ();
|
|
1853
|
+
blob_box.rotate (rotation);
|
|
1854
|
+
if (!blob->joined_to_prev ()) {
|
|
1855
|
+
to_win->Rectangle (blob_box.left (), blob_box.bottom (),
|
|
1856
|
+
blob_box.right (), blob_box.top ());
|
|
1857
|
+
}
|
|
1858
|
+
}
|
|
1859
|
+
colour = (ScrollView::Color) (colour + 1);
|
|
1860
|
+
if (colour > ScrollView::MAGENTA)
|
|
1861
|
+
colour = ScrollView::RED;
|
|
1862
|
+
}
|
|
1863
|
+
#endif
|
|
1864
|
+
}
|
|
1865
|
+
}
|
|
1866
|
+
|
|
1867
|
+
|
|
1868
|
+
/**********************************************************************
|
|
1869
|
+
* fit_parallel_rows
|
|
1870
|
+
*
|
|
1871
|
+
* Re-fit the rows in the block to the given gradient.
|
|
1872
|
+
**********************************************************************/
|
|
1873
|
+
|
|
1874
|
+
void fit_parallel_rows( //find lines
|
|
1875
|
+
TO_BLOCK *block, //block to do
|
|
1876
|
+
float gradient, //gradient to fit
|
|
1877
|
+
FCOORD rotation, //for drawing
|
|
1878
|
+
inT32 block_edge, //edge of block
|
|
1879
|
+
BOOL8 testing_on //correct orientation
|
|
1880
|
+
) {
|
|
1881
|
+
#ifndef GRAPHICS_DISABLED
|
|
1882
|
+
ScrollView::Color colour; //of row
|
|
1883
|
+
#endif
|
|
1884
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
1885
|
+
|
|
1886
|
+
row_it.move_to_first ();
|
|
1887
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
1888
|
+
if (row_it.data ()->blob_list ()->empty ())
|
|
1889
|
+
delete row_it.extract (); //nothing in it
|
|
1890
|
+
else
|
|
1891
|
+
fit_parallel_lms (gradient, row_it.data ());
|
|
1892
|
+
}
|
|
1893
|
+
#ifndef GRAPHICS_DISABLED
|
|
1894
|
+
if (testing_on) {
|
|
1895
|
+
colour = ScrollView::RED;
|
|
1896
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
1897
|
+
plot_parallel_row (row_it.data (), gradient,
|
|
1898
|
+
block_edge, colour, rotation);
|
|
1899
|
+
colour = (ScrollView::Color) (colour + 1);
|
|
1900
|
+
if (colour > ScrollView::MAGENTA)
|
|
1901
|
+
colour = ScrollView::RED;
|
|
1902
|
+
}
|
|
1903
|
+
}
|
|
1904
|
+
#endif
|
|
1905
|
+
row_it.sort (row_y_order); //may have gone out of order
|
|
1906
|
+
}
|
|
1907
|
+
|
|
1908
|
+
|
|
1909
|
+
/**********************************************************************
|
|
1910
|
+
* fit_parallel_lms
|
|
1911
|
+
*
|
|
1912
|
+
* Fit an LMS line to a row.
|
|
1913
|
+
* Make the fit parallel to the given gradient and set the
|
|
1914
|
+
* row accordingly.
|
|
1915
|
+
**********************************************************************/
|
|
1916
|
+
|
|
1917
|
+
void fit_parallel_lms( //sort function
|
|
1918
|
+
float gradient, //forced gradient
|
|
1919
|
+
TO_ROW *row //row to fit
|
|
1920
|
+
) {
|
|
1921
|
+
float c; //fitted line
|
|
1922
|
+
int blobcount; //no of blobs
|
|
1923
|
+
TBOX box; //blob box
|
|
1924
|
+
LMS lms (row->blob_list ()->length ());
|
|
1925
|
+
//blobs
|
|
1926
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
|
1927
|
+
|
|
1928
|
+
blobcount = 0;
|
|
1929
|
+
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
|
|
1930
|
+
if (!blob_it.data ()->joined_to_prev ()) {
|
|
1931
|
+
box = blob_it.data ()->bounding_box ();
|
|
1932
|
+
lms.
|
|
1933
|
+
add (FCOORD ((box.left () + box.right ()) / 2.0, box.bottom ()));
|
|
1934
|
+
blobcount++;
|
|
1935
|
+
}
|
|
1936
|
+
}
|
|
1937
|
+
lms.constrained_fit (gradient, c);
|
|
1938
|
+
row->set_parallel_line (gradient, c, lms.error ());
|
|
1939
|
+
if (textord_straight_baselines && blobcount > lms_line_trials) {
|
|
1940
|
+
lms.fit (gradient, c);
|
|
1941
|
+
}
|
|
1942
|
+
//set the other too
|
|
1943
|
+
row->set_line (gradient, c, lms.error ());
|
|
1944
|
+
}
|
|
1945
|
+
|
|
1946
|
+
|
|
1947
|
+
/**********************************************************************
|
|
1948
|
+
* make_spline_rows
|
|
1949
|
+
*
|
|
1950
|
+
* Re-fit the rows in the block to the given gradient.
|
|
1951
|
+
**********************************************************************/
|
|
1952
|
+
|
|
1953
|
+
void make_spline_rows( //find lines
|
|
1954
|
+
TO_BLOCK *block, //block to do
|
|
1955
|
+
float gradient, //gradient to fit
|
|
1956
|
+
FCOORD rotation, //for drawing
|
|
1957
|
+
inT32 block_edge, //edge of block
|
|
1958
|
+
BOOL8 testing_on //correct orientation
|
|
1959
|
+
) {
|
|
1960
|
+
#ifndef GRAPHICS_DISABLED
|
|
1961
|
+
ScrollView::Color colour; //of row
|
|
1962
|
+
#endif
|
|
1963
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
1964
|
+
|
|
1965
|
+
row_it.move_to_first ();
|
|
1966
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
1967
|
+
if (row_it.data ()->blob_list ()->empty ())
|
|
1968
|
+
delete row_it.extract (); //nothing in it
|
|
1969
|
+
else
|
|
1970
|
+
make_baseline_spline (row_it.data (), block);
|
|
1971
|
+
}
|
|
1972
|
+
if (textord_old_baselines) {
|
|
1973
|
+
#ifndef GRAPHICS_DISABLED
|
|
1974
|
+
if (testing_on) {
|
|
1975
|
+
colour = ScrollView::RED;
|
|
1976
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
|
|
1977
|
+
row_it.forward ()) {
|
|
1978
|
+
row_it.data ()->baseline.plot (to_win, colour);
|
|
1979
|
+
colour = (ScrollView::Color) (colour + 1);
|
|
1980
|
+
if (colour > ScrollView::MAGENTA)
|
|
1981
|
+
colour = ScrollView::RED;
|
|
1982
|
+
}
|
|
1983
|
+
}
|
|
1984
|
+
#endif
|
|
1985
|
+
make_old_baselines(block, testing_on);
|
|
1986
|
+
}
|
|
1987
|
+
#ifndef GRAPHICS_DISABLED
|
|
1988
|
+
if (testing_on) {
|
|
1989
|
+
colour = ScrollView::RED;
|
|
1990
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
1991
|
+
row_it.data ()->baseline.plot (to_win, colour);
|
|
1992
|
+
colour = (ScrollView::Color) (colour + 1);
|
|
1993
|
+
if (colour > ScrollView::MAGENTA)
|
|
1994
|
+
colour = ScrollView::RED;
|
|
1995
|
+
}
|
|
1996
|
+
}
|
|
1997
|
+
#endif
|
|
1998
|
+
}
|
|
1999
|
+
|
|
2000
|
+
|
|
2001
|
+
/**********************************************************************
|
|
2002
|
+
* make_baseline_spline
|
|
2003
|
+
*
|
|
2004
|
+
* Fit an LMS line to a row.
|
|
2005
|
+
* Make the fit parallel to the given gradient and set the
|
|
2006
|
+
* row accordingly.
|
|
2007
|
+
**********************************************************************/
|
|
2008
|
+
|
|
2009
|
+
void make_baseline_spline( //sort function
|
|
2010
|
+
TO_ROW *row, //row to fit
|
|
2011
|
+
TO_BLOCK *block //block it came from
|
|
2012
|
+
) {
|
|
2013
|
+
float b, c; //fitted curve
|
|
2014
|
+
float middle; //x middle of blob
|
|
2015
|
+
TBOX box; //blob box
|
|
2016
|
+
LMS lms (row->blob_list ()->length ());
|
|
2017
|
+
//blobs
|
|
2018
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
|
2019
|
+
inT32 *xstarts; //spline boundaries
|
|
2020
|
+
double *coeffs; //quadratic coeffs
|
|
2021
|
+
inT32 segments; //no of segments
|
|
2022
|
+
inT32 segment; //current segment
|
|
2023
|
+
|
|
2024
|
+
xstarts =
|
|
2025
|
+
(inT32 *) alloc_mem ((row->blob_list ()->length () + 1) * sizeof (inT32));
|
|
2026
|
+
if (segment_baseline (row, block, segments, xstarts)
|
|
2027
|
+
&& !textord_straight_baselines && !textord_parallel_baselines) {
|
|
2028
|
+
if (textord_quadratic_baselines) {
|
|
2029
|
+
coeffs = (double *) alloc_mem (segments * 3 * sizeof (double));
|
|
2030
|
+
for (segment = 0; segment < segments; segment++) {
|
|
2031
|
+
lms.clear ();
|
|
2032
|
+
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
|
|
2033
|
+
blob_it.forward ()) {
|
|
2034
|
+
if (!blob_it.data ()->joined_to_prev ()) {
|
|
2035
|
+
box = blob_it.data ()->bounding_box ();
|
|
2036
|
+
middle = (box.left () + box.right ()) / 2.0;
|
|
2037
|
+
if (middle >= xstarts[segment]
|
|
2038
|
+
&& middle < xstarts[segment + 1]) {
|
|
2039
|
+
lms.add (FCOORD (middle, box.bottom ()));
|
|
2040
|
+
}
|
|
2041
|
+
}
|
|
2042
|
+
}
|
|
2043
|
+
if (textord_quadratic_baselines)
|
|
2044
|
+
lms.fit_quadratic (block->line_size *
|
|
2045
|
+
textord_spline_outlier_fraction,
|
|
2046
|
+
coeffs[segment * 3], b, c);
|
|
2047
|
+
else {
|
|
2048
|
+
lms.fit (b, c);
|
|
2049
|
+
coeffs[segment * 3] = 0;
|
|
2050
|
+
}
|
|
2051
|
+
coeffs[segment * 3 + 1] = b;
|
|
2052
|
+
coeffs[segment * 3 + 2] = c;
|
|
2053
|
+
}
|
|
2054
|
+
}
|
|
2055
|
+
else
|
|
2056
|
+
coeffs = linear_spline_baseline (row, block, segments, xstarts);
|
|
2057
|
+
}
|
|
2058
|
+
else {
|
|
2059
|
+
xstarts[1] = xstarts[segments];
|
|
2060
|
+
segments = 1;
|
|
2061
|
+
coeffs = (double *) alloc_mem (3 * sizeof (double));
|
|
2062
|
+
coeffs[0] = 0;
|
|
2063
|
+
coeffs[1] = row->line_m ();
|
|
2064
|
+
coeffs[2] = row->line_c ();
|
|
2065
|
+
}
|
|
2066
|
+
row->baseline = QSPLINE (segments, xstarts, coeffs);
|
|
2067
|
+
free_mem(coeffs);
|
|
2068
|
+
free_mem(xstarts);
|
|
2069
|
+
}
|
|
2070
|
+
|
|
2071
|
+
|
|
2072
|
+
/**********************************************************************
|
|
2073
|
+
* segment_baseline
|
|
2074
|
+
*
|
|
2075
|
+
* Divide the baseline up into segments which require a different
|
|
2076
|
+
* quadratic fitted to them.
|
|
2077
|
+
* Return TRUE if enough blobs were far enough away to need a quadratic.
|
|
2078
|
+
**********************************************************************/
|
|
2079
|
+
|
|
2080
|
+
BOOL8
|
|
2081
|
+
segment_baseline ( //split baseline
|
|
2082
|
+
TO_ROW * row, //row to fit
|
|
2083
|
+
TO_BLOCK * block, //block it came from
|
|
2084
|
+
inT32 & segments, //no fo segments
|
|
2085
|
+
inT32 xstarts[] //coords of segments
|
|
2086
|
+
) {
|
|
2087
|
+
BOOL8 needs_curve; //needs curved line
|
|
2088
|
+
int blobcount; //no of blobs
|
|
2089
|
+
int blobindex; //current blob
|
|
2090
|
+
int last_state; //above, on , below
|
|
2091
|
+
int state; //of current blob
|
|
2092
|
+
float yshift; //from baseline
|
|
2093
|
+
TBOX box; //blob box
|
|
2094
|
+
TBOX new_box; //new_it box
|
|
2095
|
+
float middle; //xcentre of blob
|
|
2096
|
+
//blobs
|
|
2097
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
|
2098
|
+
BLOBNBOX_IT new_it = blob_it; //front end
|
|
2099
|
+
SORTED_FLOATS yshifts; //shifts from baseline
|
|
2100
|
+
|
|
2101
|
+
needs_curve = FALSE;
|
|
2102
|
+
box = box_next_pre_chopped (&blob_it);
|
|
2103
|
+
xstarts[0] = box.left ();
|
|
2104
|
+
segments = 1;
|
|
2105
|
+
blobcount = row->blob_list ()->length ();
|
|
2106
|
+
if (textord_oldbl_debug)
|
|
2107
|
+
tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
|
|
2108
|
+
blobcount, box.left (), box.bottom ());
|
|
2109
|
+
if (blobcount <= textord_spline_medianwin
|
|
2110
|
+
|| blobcount < textord_spline_minblobs) {
|
|
2111
|
+
blob_it.move_to_last ();
|
|
2112
|
+
box = blob_it.data ()->bounding_box ();
|
|
2113
|
+
xstarts[1] = box.right ();
|
|
2114
|
+
return FALSE;
|
|
2115
|
+
}
|
|
2116
|
+
last_state = 0;
|
|
2117
|
+
new_it.mark_cycle_pt ();
|
|
2118
|
+
for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
|
|
2119
|
+
new_box = box_next_pre_chopped (&new_it);
|
|
2120
|
+
middle = (new_box.left () + new_box.right ()) / 2.0;
|
|
2121
|
+
yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
|
|
2122
|
+
//record shift
|
|
2123
|
+
yshifts.add (yshift, blobindex);
|
|
2124
|
+
if (new_it.cycled_list ()) {
|
|
2125
|
+
xstarts[1] = new_box.right ();
|
|
2126
|
+
return FALSE;
|
|
2127
|
+
}
|
|
2128
|
+
}
|
|
2129
|
+
for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
|
|
2130
|
+
box = box_next_pre_chopped (&blob_it);
|
|
2131
|
+
do {
|
|
2132
|
+
new_box = box_next_pre_chopped (&new_it);
|
|
2133
|
+
//get middle one
|
|
2134
|
+
yshift = yshifts[textord_spline_medianwin / 2];
|
|
2135
|
+
if (yshift > textord_spline_shift_fraction * block->line_size)
|
|
2136
|
+
state = 1;
|
|
2137
|
+
else if (-yshift > textord_spline_shift_fraction * block->line_size)
|
|
2138
|
+
state = -1;
|
|
2139
|
+
else
|
|
2140
|
+
state = 0;
|
|
2141
|
+
if (state != 0)
|
|
2142
|
+
needs_curve = TRUE;
|
|
2143
|
+
// tprintf("State=%d, prev=%d, shift=%g\n",
|
|
2144
|
+
// state,last_state,yshift);
|
|
2145
|
+
if (state != last_state && blobcount > textord_spline_minblobs) {
|
|
2146
|
+
xstarts[segments++] = box.left ();
|
|
2147
|
+
blobcount = 0;
|
|
2148
|
+
}
|
|
2149
|
+
last_state = state;
|
|
2150
|
+
yshifts.remove (blobindex - textord_spline_medianwin);
|
|
2151
|
+
box = box_next_pre_chopped (&blob_it);
|
|
2152
|
+
middle = (new_box.left () + new_box.right ()) / 2.0;
|
|
2153
|
+
yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
|
|
2154
|
+
yshifts.add (yshift, blobindex);
|
|
2155
|
+
blobindex++;
|
|
2156
|
+
blobcount++;
|
|
2157
|
+
}
|
|
2158
|
+
while (!new_it.cycled_list ());
|
|
2159
|
+
if (blobcount > textord_spline_minblobs || segments == 1) {
|
|
2160
|
+
xstarts[segments] = new_box.right ();
|
|
2161
|
+
}
|
|
2162
|
+
else {
|
|
2163
|
+
xstarts[--segments] = new_box.right ();
|
|
2164
|
+
}
|
|
2165
|
+
if (textord_oldbl_debug)
|
|
2166
|
+
tprintf ("Made %d segments on row at (%d,%d)\n",
|
|
2167
|
+
segments, box.right (), box.bottom ());
|
|
2168
|
+
return needs_curve;
|
|
2169
|
+
}
|
|
2170
|
+
|
|
2171
|
+
|
|
2172
|
+
/**********************************************************************
|
|
2173
|
+
* linear_spline_baseline
|
|
2174
|
+
*
|
|
2175
|
+
* Divide the baseline up into segments which require a different
|
|
2176
|
+
* quadratic fitted to them.
|
|
2177
|
+
* Return TRUE if enough blobs were far enough away to need a quadratic.
|
|
2178
|
+
**********************************************************************/
|
|
2179
|
+
|
|
2180
|
+
double *
|
|
2181
|
+
linear_spline_baseline ( //split baseline
|
|
2182
|
+
TO_ROW * row, //row to fit
|
|
2183
|
+
TO_BLOCK * block, //block it came from
|
|
2184
|
+
inT32 & segments, //no fo segments
|
|
2185
|
+
inT32 xstarts[] //coords of segments
|
|
2186
|
+
) {
|
|
2187
|
+
int blobcount; //no of blobs
|
|
2188
|
+
int blobindex; //current blob
|
|
2189
|
+
int index1, index2; //blob numbers
|
|
2190
|
+
int blobs_per_segment; //blobs in each
|
|
2191
|
+
TBOX box; //blob box
|
|
2192
|
+
TBOX new_box; //new_it box
|
|
2193
|
+
float middle; //xcentre of blob
|
|
2194
|
+
//blobs
|
|
2195
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
|
2196
|
+
BLOBNBOX_IT new_it = blob_it; //front end
|
|
2197
|
+
float b, c; //fitted curve
|
|
2198
|
+
LMS lms (row->blob_list ()->length ());
|
|
2199
|
+
double *coeffs; //quadratic coeffs
|
|
2200
|
+
inT32 segment; //current segment
|
|
2201
|
+
|
|
2202
|
+
box = box_next_pre_chopped (&blob_it);
|
|
2203
|
+
xstarts[0] = box.left ();
|
|
2204
|
+
blobcount = 1;
|
|
2205
|
+
while (!blob_it.at_first ()) {
|
|
2206
|
+
blobcount++;
|
|
2207
|
+
box = box_next_pre_chopped (&blob_it);
|
|
2208
|
+
}
|
|
2209
|
+
segments = blobcount / textord_spline_medianwin;
|
|
2210
|
+
if (segments < 1)
|
|
2211
|
+
segments = 1;
|
|
2212
|
+
blobs_per_segment = blobcount / segments;
|
|
2213
|
+
coeffs = (double *) alloc_mem (segments * 3 * sizeof (double));
|
|
2214
|
+
if (textord_oldbl_debug)
|
|
2215
|
+
tprintf
|
|
2216
|
+
("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
|
|
2217
|
+
blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
|
|
2218
|
+
segment = 1;
|
|
2219
|
+
for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
|
|
2220
|
+
box_next_pre_chopped(&new_it);
|
|
2221
|
+
index1 = 0;
|
|
2222
|
+
blobindex = index2;
|
|
2223
|
+
do {
|
|
2224
|
+
blobindex += blobs_per_segment;
|
|
2225
|
+
lms.clear ();
|
|
2226
|
+
while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
|
|
2227
|
+
box = box_next_pre_chopped (&blob_it);
|
|
2228
|
+
middle = (box.left () + box.right ()) / 2.0;
|
|
2229
|
+
lms.add (FCOORD (middle, box.bottom ()));
|
|
2230
|
+
index1++;
|
|
2231
|
+
if (index1 == blobindex - blobs_per_segment / 2
|
|
2232
|
+
|| index1 == blobcount - 1) {
|
|
2233
|
+
xstarts[segment] = box.left ();
|
|
2234
|
+
}
|
|
2235
|
+
}
|
|
2236
|
+
lms.fit (b, c);
|
|
2237
|
+
coeffs[segment * 3 - 3] = 0;
|
|
2238
|
+
coeffs[segment * 3 - 2] = b;
|
|
2239
|
+
coeffs[segment * 3 - 1] = c;
|
|
2240
|
+
segment++;
|
|
2241
|
+
if (segment > segments)
|
|
2242
|
+
break;
|
|
2243
|
+
|
|
2244
|
+
blobindex += blobs_per_segment;
|
|
2245
|
+
lms.clear ();
|
|
2246
|
+
while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
|
|
2247
|
+
new_box = box_next_pre_chopped (&new_it);
|
|
2248
|
+
middle = (new_box.left () + new_box.right ()) / 2.0;
|
|
2249
|
+
lms.add (FCOORD (middle, new_box.bottom ()));
|
|
2250
|
+
index2++;
|
|
2251
|
+
if (index2 == blobindex - blobs_per_segment / 2
|
|
2252
|
+
|| index2 == blobcount - 1) {
|
|
2253
|
+
xstarts[segment] = new_box.left ();
|
|
2254
|
+
}
|
|
2255
|
+
}
|
|
2256
|
+
lms.fit (b, c);
|
|
2257
|
+
coeffs[segment * 3 - 3] = 0;
|
|
2258
|
+
coeffs[segment * 3 - 2] = b;
|
|
2259
|
+
coeffs[segment * 3 - 1] = c;
|
|
2260
|
+
segment++;
|
|
2261
|
+
}
|
|
2262
|
+
while (segment <= segments);
|
|
2263
|
+
return coeffs;
|
|
2264
|
+
}
|
|
2265
|
+
|
|
2266
|
+
|
|
2267
|
+
/**********************************************************************
|
|
2268
|
+
* assign_blobs_to_rows
|
|
2269
|
+
*
|
|
2270
|
+
* Make enough rows to allocate all the given blobs to one.
|
|
2271
|
+
* If a block skew is given, use that, else attempt to track it.
|
|
2272
|
+
**********************************************************************/
|
|
2273
|
+
|
|
2274
|
+
void assign_blobs_to_rows( //find lines
|
|
2275
|
+
TO_BLOCK *block, //block to do
|
|
2276
|
+
float *gradient, //block skew
|
|
2277
|
+
int pass, //identification
|
|
2278
|
+
BOOL8 reject_misses, //chuck big ones out
|
|
2279
|
+
BOOL8 make_new_rows, //add rows for unmatched
|
|
2280
|
+
BOOL8 drawing_skew //draw smoothed skew
|
|
2281
|
+
) {
|
|
2282
|
+
OVERLAP_STATE overlap_result; //what to do with it
|
|
2283
|
+
float ycoord; //current y
|
|
2284
|
+
float top, bottom; //of blob
|
|
2285
|
+
float g_length = 1.0f; //from gradient
|
|
2286
|
+
inT16 row_count; //no of rows
|
|
2287
|
+
inT16 left_x; //left edge
|
|
2288
|
+
inT16 last_x; //previous edge
|
|
2289
|
+
float block_skew; //y delta
|
|
2290
|
+
float smooth_factor; //for new coords
|
|
2291
|
+
float near_dist; //dist to nearest row
|
|
2292
|
+
ICOORD testpt; //testing only
|
|
2293
|
+
BLOBNBOX *blob; //current blob
|
|
2294
|
+
TO_ROW *row; //current row
|
|
2295
|
+
TO_ROW *dest_row; //row to put blob in
|
|
2296
|
+
//iterators
|
|
2297
|
+
BLOBNBOX_IT blob_it = &block->blobs;
|
|
2298
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
2299
|
+
|
|
2300
|
+
ycoord =
|
|
2301
|
+
(block->block->bounding_box ().bottom () +
|
|
2302
|
+
block->block->bounding_box ().top ()) / 2.0f;
|
|
2303
|
+
if (gradient != NULL)
|
|
2304
|
+
g_length = sqrt (1 + *gradient * *gradient);
|
|
2305
|
+
#ifndef GRAPHICS_DISABLED
|
|
2306
|
+
if (drawing_skew)
|
|
2307
|
+
to_win->SetCursor(block->block->bounding_box ().left (), ycoord);
|
|
2308
|
+
#endif
|
|
2309
|
+
testpt = ICOORD (textord_test_x, textord_test_y);
|
|
2310
|
+
blob_it.sort (blob_x_order);
|
|
2311
|
+
smooth_factor = 1.0;
|
|
2312
|
+
block_skew = 0.0f;
|
|
2313
|
+
row_count = row_it.length (); //might have rows
|
|
2314
|
+
if (!blob_it.empty ()) {
|
|
2315
|
+
left_x = blob_it.data ()->bounding_box ().left ();
|
|
2316
|
+
}
|
|
2317
|
+
else {
|
|
2318
|
+
left_x = block->block->bounding_box ().left ();
|
|
2319
|
+
}
|
|
2320
|
+
last_x = left_x;
|
|
2321
|
+
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
|
|
2322
|
+
blob = blob_it.data ();
|
|
2323
|
+
if (gradient != NULL) {
|
|
2324
|
+
block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
|
|
2325
|
+
+ *gradient / g_length * blob->bounding_box ().left ();
|
|
2326
|
+
}
|
|
2327
|
+
else if (blob->bounding_box ().left () - last_x > block->line_size / 2
|
|
2328
|
+
&& last_x - left_x > block->line_size * 2
|
|
2329
|
+
&& textord_interpolating_skew) {
|
|
2330
|
+
// tprintf("Interpolating skew from %g",block_skew);
|
|
2331
|
+
block_skew *= (float) (blob->bounding_box ().left () - left_x)
|
|
2332
|
+
/ (last_x - left_x);
|
|
2333
|
+
// tprintf("to %g\n",block_skew);
|
|
2334
|
+
}
|
|
2335
|
+
last_x = blob->bounding_box ().left ();
|
|
2336
|
+
top = blob->bounding_box ().top () - block_skew;
|
|
2337
|
+
bottom = blob->bounding_box ().bottom () - block_skew;
|
|
2338
|
+
#ifndef GRAPHICS_DISABLED
|
|
2339
|
+
if (drawing_skew)
|
|
2340
|
+
to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew);
|
|
2341
|
+
#endif
|
|
2342
|
+
if (!row_it.empty ()) {
|
|
2343
|
+
for (row_it.move_to_first ();
|
|
2344
|
+
!row_it.at_last () && row_it.data ()->min_y () > top;
|
|
2345
|
+
row_it.forward ());
|
|
2346
|
+
row = row_it.data ();
|
|
2347
|
+
if (row->min_y () <= top && row->max_y () >= bottom) {
|
|
2348
|
+
//any overlap
|
|
2349
|
+
dest_row = row;
|
|
2350
|
+
overlap_result = most_overlapping_row (&row_it, dest_row,
|
|
2351
|
+
top, bottom,
|
|
2352
|
+
block->line_size,
|
|
2353
|
+
blob->bounding_box ().
|
|
2354
|
+
contains (testpt));
|
|
2355
|
+
if (overlap_result == NEW_ROW && !reject_misses)
|
|
2356
|
+
overlap_result = ASSIGN;
|
|
2357
|
+
}
|
|
2358
|
+
else {
|
|
2359
|
+
overlap_result = NEW_ROW;
|
|
2360
|
+
if (!make_new_rows) {
|
|
2361
|
+
near_dist = row_it.data_relative (-1)->min_y () - top;
|
|
2362
|
+
//below bottom
|
|
2363
|
+
if (bottom < row->min_y ()) {
|
|
2364
|
+
if (row->min_y () - bottom <=
|
|
2365
|
+
(block->line_spacing -
|
|
2366
|
+
block->line_size) * textord_merge_desc) {
|
|
2367
|
+
//done it
|
|
2368
|
+
overlap_result = ASSIGN;
|
|
2369
|
+
dest_row = row;
|
|
2370
|
+
}
|
|
2371
|
+
}
|
|
2372
|
+
else if (near_dist > 0
|
|
2373
|
+
&& near_dist < bottom - row->max_y ()) {
|
|
2374
|
+
row_it.backward ();
|
|
2375
|
+
dest_row = row_it.data ();
|
|
2376
|
+
if (dest_row->min_y () - bottom <=
|
|
2377
|
+
(block->line_spacing -
|
|
2378
|
+
block->line_size) * textord_merge_desc) {
|
|
2379
|
+
//done it
|
|
2380
|
+
overlap_result = ASSIGN;
|
|
2381
|
+
}
|
|
2382
|
+
}
|
|
2383
|
+
else {
|
|
2384
|
+
if (top - row->max_y () <=
|
|
2385
|
+
(block->line_spacing -
|
|
2386
|
+
block->line_size) * (textord_overlap_x +
|
|
2387
|
+
textord_merge_asc)) {
|
|
2388
|
+
//done it
|
|
2389
|
+
overlap_result = ASSIGN;
|
|
2390
|
+
dest_row = row;
|
|
2391
|
+
}
|
|
2392
|
+
}
|
|
2393
|
+
}
|
|
2394
|
+
}
|
|
2395
|
+
if (overlap_result == ASSIGN)
|
|
2396
|
+
dest_row->add_blob (blob_it.extract (), top, bottom,
|
|
2397
|
+
block->line_size);
|
|
2398
|
+
if (overlap_result == NEW_ROW) {
|
|
2399
|
+
if (make_new_rows && top - bottom < block->max_blob_size) {
|
|
2400
|
+
dest_row =
|
|
2401
|
+
new TO_ROW (blob_it.extract (), top, bottom,
|
|
2402
|
+
block->line_size);
|
|
2403
|
+
row_count++;
|
|
2404
|
+
if (bottom > row_it.data ()->min_y ())
|
|
2405
|
+
row_it.add_before_then_move (dest_row);
|
|
2406
|
+
//insert in right place
|
|
2407
|
+
else
|
|
2408
|
+
row_it.add_after_then_move (dest_row);
|
|
2409
|
+
smooth_factor =
|
|
2410
|
+
1.0 / (row_count * textord_skew_lag +
|
|
2411
|
+
textord_skewsmooth_offset);
|
|
2412
|
+
}
|
|
2413
|
+
else
|
|
2414
|
+
overlap_result = REJECT;
|
|
2415
|
+
}
|
|
2416
|
+
}
|
|
2417
|
+
else if (make_new_rows && top - bottom < block->max_blob_size) {
|
|
2418
|
+
overlap_result = NEW_ROW;
|
|
2419
|
+
dest_row =
|
|
2420
|
+
new TO_ROW (blob_it.extract (), top, bottom, block->line_size);
|
|
2421
|
+
row_count++;
|
|
2422
|
+
row_it.add_after_then_move (dest_row);
|
|
2423
|
+
smooth_factor = 1.0 / (row_count * textord_skew_lag +
|
|
2424
|
+
textord_skewsmooth_offset2);
|
|
2425
|
+
}
|
|
2426
|
+
else
|
|
2427
|
+
overlap_result = REJECT;
|
|
2428
|
+
if (blob->bounding_box ().contains (testpt)) {
|
|
2429
|
+
if (overlap_result != REJECT) {
|
|
2430
|
+
tprintf ("Test blob assigned to row at (%g,%g) on pass %d\n",
|
|
2431
|
+
dest_row->min_y (), dest_row->max_y (), pass);
|
|
2432
|
+
}
|
|
2433
|
+
else {
|
|
2434
|
+
tprintf ("Test blob assigned to no row on pass %d\n", pass);
|
|
2435
|
+
}
|
|
2436
|
+
}
|
|
2437
|
+
if (overlap_result != REJECT) {
|
|
2438
|
+
while (!row_it.at_first ()
|
|
2439
|
+
&& row_it.data ()->min_y () >
|
|
2440
|
+
row_it.data_relative (-1)->min_y ()) {
|
|
2441
|
+
row = row_it.extract ();
|
|
2442
|
+
row_it.backward ();
|
|
2443
|
+
row_it.add_before_then_move (row);
|
|
2444
|
+
}
|
|
2445
|
+
while (!row_it.at_last ()
|
|
2446
|
+
&& row_it.data ()->min_y () <
|
|
2447
|
+
row_it.data_relative (1)->min_y ()) {
|
|
2448
|
+
row = row_it.extract ();
|
|
2449
|
+
row_it.forward ();
|
|
2450
|
+
//keep rows in order
|
|
2451
|
+
row_it.add_after_then_move (row);
|
|
2452
|
+
}
|
|
2453
|
+
block_skew = (1 - smooth_factor) * block_skew
|
|
2454
|
+
+ smooth_factor * (blob->bounding_box ().bottom () -
|
|
2455
|
+
dest_row->initial_min_y ());
|
|
2456
|
+
}
|
|
2457
|
+
}
|
|
2458
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
2459
|
+
if (row_it.data ()->blob_list ()->empty ())
|
|
2460
|
+
delete row_it.extract (); //discard empty rows
|
|
2461
|
+
}
|
|
2462
|
+
}
|
|
2463
|
+
|
|
2464
|
+
|
|
2465
|
+
/**********************************************************************
|
|
2466
|
+
* most_overlapping_row
|
|
2467
|
+
*
|
|
2468
|
+
* Return the row which most overlaps the blob.
|
|
2469
|
+
**********************************************************************/
|
|
2470
|
+
|
|
2471
|
+
OVERLAP_STATE most_overlapping_row( //find best row
|
|
2472
|
+
TO_ROW_IT *row_it, //iterator
|
|
2473
|
+
TO_ROW *&best_row, //output row
|
|
2474
|
+
float top, //top of blob
|
|
2475
|
+
float bottom, //bottom of blob
|
|
2476
|
+
float rowsize, //max row size
|
|
2477
|
+
BOOL8 testing_blob //test stuff
|
|
2478
|
+
) {
|
|
2479
|
+
OVERLAP_STATE result; //result of tests
|
|
2480
|
+
float overlap; //of blob & row
|
|
2481
|
+
float bestover; //nearest row
|
|
2482
|
+
float merge_top, merge_bottom; //size of merged row
|
|
2483
|
+
ICOORD testpt; //testing only
|
|
2484
|
+
TO_ROW *row; //current row
|
|
2485
|
+
TO_ROW *test_row; //for multiple overlaps
|
|
2486
|
+
BLOBNBOX_IT blob_it; //for merging rows
|
|
2487
|
+
|
|
2488
|
+
result = ASSIGN;
|
|
2489
|
+
row = row_it->data ();
|
|
2490
|
+
bestover = top - bottom;
|
|
2491
|
+
if (top > row->max_y ())
|
|
2492
|
+
bestover -= top - row->max_y ();
|
|
2493
|
+
if (bottom < row->min_y ())
|
|
2494
|
+
//compute overlap
|
|
2495
|
+
bestover -= row->min_y () - bottom;
|
|
2496
|
+
if (testing_blob) {
|
|
2497
|
+
tprintf ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f\n",
|
|
2498
|
+
bottom, top, row->min_y (), row->max_y (), bestover);
|
|
2499
|
+
}
|
|
2500
|
+
test_row = row;
|
|
2501
|
+
do {
|
|
2502
|
+
if (!row_it->at_last ()) {
|
|
2503
|
+
row_it->forward ();
|
|
2504
|
+
test_row = row_it->data ();
|
|
2505
|
+
if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
|
|
2506
|
+
merge_top =
|
|
2507
|
+
test_row->max_y () >
|
|
2508
|
+
row->max_y ()? test_row->max_y () : row->max_y ();
|
|
2509
|
+
merge_bottom =
|
|
2510
|
+
test_row->min_y () <
|
|
2511
|
+
row->min_y ()? test_row->min_y () : row->min_y ();
|
|
2512
|
+
if (merge_top - merge_bottom <= rowsize) {
|
|
2513
|
+
if (testing_blob) {
|
|
2514
|
+
tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
|
|
2515
|
+
row->min_y (), row->max_y (),
|
|
2516
|
+
test_row->min_y (), test_row->max_y ());
|
|
2517
|
+
}
|
|
2518
|
+
test_row->set_limits (merge_bottom, merge_top);
|
|
2519
|
+
blob_it.set_to_list (test_row->blob_list ());
|
|
2520
|
+
blob_it.add_list_after (row->blob_list ());
|
|
2521
|
+
blob_it.sort (blob_x_order);
|
|
2522
|
+
row_it->backward ();
|
|
2523
|
+
delete row_it->extract ();
|
|
2524
|
+
row_it->forward ();
|
|
2525
|
+
bestover = -1.0f; //force replacement
|
|
2526
|
+
}
|
|
2527
|
+
overlap = top - bottom;
|
|
2528
|
+
if (top > test_row->max_y ())
|
|
2529
|
+
overlap -= top - test_row->max_y ();
|
|
2530
|
+
if (bottom < test_row->min_y ())
|
|
2531
|
+
overlap -= test_row->min_y () - bottom;
|
|
2532
|
+
if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
|
|
2533
|
+
result = REJECT;
|
|
2534
|
+
}
|
|
2535
|
+
if (overlap > bestover) {
|
|
2536
|
+
bestover = overlap; //find biggest overlap
|
|
2537
|
+
row = test_row;
|
|
2538
|
+
}
|
|
2539
|
+
if (testing_blob) {
|
|
2540
|
+
tprintf
|
|
2541
|
+
("Test blob y=(%g,%g), row=(%f,%f), overlap=%f->%f\n",
|
|
2542
|
+
bottom, top, test_row->min_y (), test_row->max_y (),
|
|
2543
|
+
overlap, bestover);
|
|
2544
|
+
}
|
|
2545
|
+
}
|
|
2546
|
+
}
|
|
2547
|
+
}
|
|
2548
|
+
while (!row_it->at_last ()
|
|
2549
|
+
&& test_row->min_y () <= top && test_row->max_y () >= bottom);
|
|
2550
|
+
while (row_it->data () != row)
|
|
2551
|
+
row_it->backward (); //make it point to row
|
|
2552
|
+
//doesn't overlap much
|
|
2553
|
+
if (top - bottom - bestover > rowsize * textord_overlap_x &&
|
|
2554
|
+
(!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x)
|
|
2555
|
+
&& result == ASSIGN)
|
|
2556
|
+
result = NEW_ROW; //doesn't overlap enough
|
|
2557
|
+
best_row = row;
|
|
2558
|
+
return result;
|
|
2559
|
+
}
|
|
2560
|
+
|
|
2561
|
+
|
|
2562
|
+
/**********************************************************************
|
|
2563
|
+
* blob_x_order
|
|
2564
|
+
*
|
|
2565
|
+
* Sort function to sort blobs in x from page left.
|
|
2566
|
+
**********************************************************************/
|
|
2567
|
+
|
|
2568
|
+
int blob_x_order( //sort function
|
|
2569
|
+
const void *item1, //items to compare
|
|
2570
|
+
const void *item2) {
|
|
2571
|
+
//converted ptr
|
|
2572
|
+
BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
|
|
2573
|
+
//converted ptr
|
|
2574
|
+
BLOBNBOX *blob2 = *(BLOBNBOX **) item2;
|
|
2575
|
+
|
|
2576
|
+
if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
|
|
2577
|
+
return -1;
|
|
2578
|
+
else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
|
|
2579
|
+
return 1;
|
|
2580
|
+
else
|
|
2581
|
+
return 0;
|
|
2582
|
+
}
|
|
2583
|
+
|
|
2584
|
+
|
|
2585
|
+
/**********************************************************************
|
|
2586
|
+
* row_y_order
|
|
2587
|
+
*
|
|
2588
|
+
* Sort function to sort rows in y from page top.
|
|
2589
|
+
**********************************************************************/
|
|
2590
|
+
|
|
2591
|
+
int row_y_order( //sort function
|
|
2592
|
+
const void *item1, //items to compare
|
|
2593
|
+
const void *item2) {
|
|
2594
|
+
//converted ptr
|
|
2595
|
+
TO_ROW *row1 = *(TO_ROW **) item1;
|
|
2596
|
+
//converted ptr
|
|
2597
|
+
TO_ROW *row2 = *(TO_ROW **) item2;
|
|
2598
|
+
|
|
2599
|
+
if (row1->parallel_c () > row2->parallel_c ())
|
|
2600
|
+
return -1;
|
|
2601
|
+
else if (row1->parallel_c () < row2->parallel_c ())
|
|
2602
|
+
return 1;
|
|
2603
|
+
else
|
|
2604
|
+
return 0;
|
|
2605
|
+
}
|
|
2606
|
+
|
|
2607
|
+
|
|
2608
|
+
/**********************************************************************
|
|
2609
|
+
* row_spacing_order
|
|
2610
|
+
*
|
|
2611
|
+
* Qsort style function to compare 2 TO_ROWS based on their spacing value.
|
|
2612
|
+
**********************************************************************/
|
|
2613
|
+
|
|
2614
|
+
int row_spacing_order( //sort function
|
|
2615
|
+
const void *item1, //items to compare
|
|
2616
|
+
const void *item2) {
|
|
2617
|
+
//converted ptr
|
|
2618
|
+
TO_ROW *row1 = *(TO_ROW **) item1;
|
|
2619
|
+
//converted ptr
|
|
2620
|
+
TO_ROW *row2 = *(TO_ROW **) item2;
|
|
2621
|
+
|
|
2622
|
+
if (row1->spacing < row2->spacing)
|
|
2623
|
+
return -1;
|
|
2624
|
+
else if (row1->spacing > row2->spacing)
|
|
2625
|
+
return 1;
|
|
2626
|
+
else
|
|
2627
|
+
return 0;
|
|
2628
|
+
}
|