tesseract_bin 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/ext/tesseract_bin/extconf.rb +17 -0
- data/lib/tesseract_bin.rb +12 -0
- data/tesseract_bin.gemspec +660 -0
- data/test/helper.rb +18 -0
- data/test/test_tesseract_bin.rb +7 -0
- data/vendor/tesseract-2.04/AUTHORS +8 -0
- data/vendor/tesseract-2.04/COPYING +23 -0
- data/vendor/tesseract-2.04/ChangeLog +71 -0
- data/vendor/tesseract-2.04/INSTALL +229 -0
- data/vendor/tesseract-2.04/Makefile.am +20 -0
- data/vendor/tesseract-2.04/Makefile.in +641 -0
- data/vendor/tesseract-2.04/NEWS +1 -0
- data/vendor/tesseract-2.04/README +138 -0
- data/vendor/tesseract-2.04/ReleaseNotes +213 -0
- data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
- data/vendor/tesseract-2.04/StdAfx.h +24 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
- data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
- data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
- data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
- data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
- data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
- data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
- data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
- data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
- data/vendor/tesseract-2.04/ccmain/control.h +198 -0
- data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
- data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
- data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
- data/vendor/tesseract-2.04/ccmain/output.h +116 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
- data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
- data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
- data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
- data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
- data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
- data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
- data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
- data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
- data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
- data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
- data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
- data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
- data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
- data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
- data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
- data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
- data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
- data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
- data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
- data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
- data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
- data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
- data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
- data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
- data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
- data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
- data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
- data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
- data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
- data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
- data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
- data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
- data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
- data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
- data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
- data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
- data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
- data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
- data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
- data/vendor/tesseract-2.04/ccutil/host.h +180 -0
- data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
- data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
- data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
- data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
- data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
- data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
- data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
- data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
- data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
- data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
- data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
- data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
- data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
- data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
- data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
- data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
- data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
- data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
- data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
- data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
- data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
- data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
- data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
- data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
- data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
- data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
- data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
- data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
- data/vendor/tesseract-2.04/classify/baseline.h +91 -0
- data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
- data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
- data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
- data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
- data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
- data/vendor/tesseract-2.04/classify/cluster.h +158 -0
- data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
- data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
- data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
- data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
- data/vendor/tesseract-2.04/classify/extern.h +32 -0
- data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
- data/vendor/tesseract-2.04/classify/extract.h +36 -0
- data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
- data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
- data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
- data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
- data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
- data/vendor/tesseract-2.04/classify/float2int.h +65 -0
- data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
- data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
- data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
- data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
- data/vendor/tesseract-2.04/classify/fxid.h +69 -0
- data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
- data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
- data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
- data/vendor/tesseract-2.04/classify/intfx.h +63 -0
- data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
- data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
- data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
- data/vendor/tesseract-2.04/classify/intproto.h +320 -0
- data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
- data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
- data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
- data/vendor/tesseract-2.04/classify/mf.h +43 -0
- data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
- data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
- data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
- data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
- data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
- data/vendor/tesseract-2.04/classify/mfx.h +52 -0
- data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
- data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
- data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
- data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
- data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
- data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
- data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
- data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
- data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
- data/vendor/tesseract-2.04/classify/protos.h +258 -0
- data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
- data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
- data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
- data/vendor/tesseract-2.04/classify/speckle.h +69 -0
- data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
- data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
- data/vendor/tesseract-2.04/config/config.guess +1466 -0
- data/vendor/tesseract-2.04/config/config.h.in +188 -0
- data/vendor/tesseract-2.04/config/config.sub +1579 -0
- data/vendor/tesseract-2.04/config/depcomp +530 -0
- data/vendor/tesseract-2.04/config/install-sh +269 -0
- data/vendor/tesseract-2.04/config/missing +198 -0
- data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
- data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
- data/vendor/tesseract-2.04/configure +10424 -0
- data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
- data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
- data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
- data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
- data/vendor/tesseract-2.04/cutil/const.h +108 -0
- data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
- data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
- data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
- data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
- data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
- data/vendor/tesseract-2.04/cutil/debug.h +348 -0
- data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
- data/vendor/tesseract-2.04/cutil/efio.h +32 -0
- data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
- data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
- data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
- data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
- data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
- data/vendor/tesseract-2.04/cutil/general.h +33 -0
- data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
- data/vendor/tesseract-2.04/cutil/globals.h +70 -0
- data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
- data/vendor/tesseract-2.04/cutil/listio.h +43 -0
- data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
- data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
- data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
- data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
- data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
- data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
- data/vendor/tesseract-2.04/cutil/structures.h +112 -0
- data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
- data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
- data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
- data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
- data/vendor/tesseract-2.04/cutil/variables.h +170 -0
- data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
- data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
- data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
- data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
- data/vendor/tesseract-2.04/dict/choices.h +241 -0
- data/vendor/tesseract-2.04/dict/context.cpp +270 -0
- data/vendor/tesseract-2.04/dict/context.h +82 -0
- data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
- data/vendor/tesseract-2.04/dict/dawg.h +394 -0
- data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
- data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
- data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
- data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
- data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
- data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
- data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
- data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
- data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
- data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
- data/vendor/tesseract-2.04/dict/permngram.h +33 -0
- data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
- data/vendor/tesseract-2.04/dict/permnum.h +83 -0
- data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
- data/vendor/tesseract-2.04/dict/permute.h +93 -0
- data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
- data/vendor/tesseract-2.04/dict/reduce.h +112 -0
- data/vendor/tesseract-2.04/dict/states.cpp +382 -0
- data/vendor/tesseract-2.04/dict/states.h +111 -0
- data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
- data/vendor/tesseract-2.04/dict/stopper.h +103 -0
- data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
- data/vendor/tesseract-2.04/dict/trie.h +190 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
- data/vendor/tesseract-2.04/eurotext.tif +0 -0
- data/vendor/tesseract-2.04/image/Makefile.am +10 -0
- data/vendor/tesseract-2.04/image/Makefile.in +596 -0
- data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
- data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
- data/vendor/tesseract-2.04/image/img.h +336 -0
- data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
- data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
- data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
- data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
- data/vendor/tesseract-2.04/image/imgio.h +22 -0
- data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
- data/vendor/tesseract-2.04/image/imgs.h +102 -0
- data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
- data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
- data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
- data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
- data/vendor/tesseract-2.04/image/svshowim.h +25 -0
- data/vendor/tesseract-2.04/java/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
- data/vendor/tesseract-2.04/java/makefile +55 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
- data/vendor/tesseract-2.04/phototest.tif +0 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
- data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
- data/vendor/tesseract-2.04/tessdata/confsets +3 -0
- data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
- data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
- data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
- data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
- data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
- data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
- data/vendor/tesseract-2.04/tessdll.cpp +351 -0
- data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
- data/vendor/tesseract-2.04/tessdll.h +143 -0
- data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
- data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
- data/vendor/tesseract-2.04/tesseract.dsw +116 -0
- data/vendor/tesseract-2.04/tesseract.sln +59 -0
- data/vendor/tesseract-2.04/tesseract.spec +188 -0
- data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
- data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
- data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
- data/vendor/tesseract-2.04/testing/README +43 -0
- data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
- data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
- data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
- data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
- data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
- data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
- data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
- data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
- data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
- data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
- data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
- data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
- data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
- data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
- data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
- data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
- data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
- data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
- data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
- data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
- data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
- data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
- data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
- data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
- data/vendor/tesseract-2.04/textord/makerow.h +295 -0
- data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
- data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
- data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
- data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
- data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
- data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
- data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
- data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
- data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
- data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
- data/vendor/tesseract-2.04/textord/tessout.h +76 -0
- data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
- data/vendor/tesseract-2.04/textord/topitch.h +195 -0
- data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
- data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
- data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
- data/vendor/tesseract-2.04/textord/tospace.h +193 -0
- data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
- data/vendor/tesseract-2.04/textord/tovars.h +94 -0
- data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
- data/vendor/tesseract-2.04/textord/underlin.h +53 -0
- data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
- data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
- data/vendor/tesseract-2.04/training/Makefile.am +54 -0
- data/vendor/tesseract-2.04/training/Makefile.in +720 -0
- data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
- data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
- data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
- data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
- data/vendor/tesseract-2.04/training/mergenf.h +106 -0
- data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
- data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
- data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
- data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
- data/vendor/tesseract-2.04/training/name2char.h +38 -0
- data/vendor/tesseract-2.04/training/training.cpp +190 -0
- data/vendor/tesseract-2.04/training/training.h +130 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
- data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
- data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
- data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
- data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
- data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
- data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
- data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
- data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
- data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
- data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
- data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
- data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
- data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
- data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
- data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
- data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
- data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
- data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
- data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
- data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
- data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
- data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
- data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
- data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
- data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
- data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
- data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
- data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
- data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
- data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
- data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
- data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
- data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
- data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
- data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
- data/vendor/tesseract-2.04/wordrec/render.h +58 -0
- data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
- data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
- data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
- data/vendor/tesseract-2.04/wordrec/split.h +115 -0
- data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
- data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
- data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
- data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
- metadata +708 -0
@@ -0,0 +1,1761 @@
|
|
1
|
+
/**********************************************************************
|
2
|
+
* File: oldbasel.cpp (Formerly oldbl.c)
|
3
|
+
* Description: A re-implementation of the old baseline algorithm.
|
4
|
+
* Author: Ray Smith
|
5
|
+
* Created: Wed Oct 6 09:41:48 BST 1993
|
6
|
+
*
|
7
|
+
* (C) Copyright 1993, Hewlett-Packard Ltd.
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
9
|
+
** you may not use this file except in compliance with the License.
|
10
|
+
** You may obtain a copy of the License at
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
** See the License for the specific language governing permissions and
|
16
|
+
** limitations under the License.
|
17
|
+
*
|
18
|
+
**********************************************************************/
|
19
|
+
|
20
|
+
#include "mfcpch.h"
|
21
|
+
#include "statistc.h"
|
22
|
+
#include "quadlsq.h"
|
23
|
+
#include "lmedsq.h"
|
24
|
+
#include "makerow.h"
|
25
|
+
#include "drawtord.h"
|
26
|
+
#include "oldbasel.h"
|
27
|
+
#include "tprintf.h"
|
28
|
+
|
29
|
+
#define EXTERN
|
30
|
+
|
31
|
+
EXTERN BOOL_VAR (textord_really_old_xheight, FALSE,
|
32
|
+
"Use original wiseowl xheight");
|
33
|
+
EXTERN BOOL_VAR (textord_oldbl_debug, FALSE, "Debug old baseline generation");
|
34
|
+
EXTERN BOOL_VAR (textord_debug_baselines, FALSE, "Debug baseline generation");
|
35
|
+
EXTERN BOOL_VAR (textord_oldbl_paradef, TRUE, "Use para default mechanism");
|
36
|
+
EXTERN BOOL_VAR (textord_oldbl_split_splines, TRUE, "Split stepped splines");
|
37
|
+
EXTERN BOOL_VAR (textord_oldbl_merge_parts, TRUE, "Merge suspect partitions");
|
38
|
+
EXTERN BOOL_VAR (oldbl_corrfix, TRUE, "Improve correlation of heights");
|
39
|
+
EXTERN BOOL_VAR (oldbl_xhfix, FALSE,
|
40
|
+
"Fix bug in modes threshold for xheights");
|
41
|
+
EXTERN BOOL_VAR(textord_ocropus_mode, FALSE, "Make baselines for ocropus");
|
42
|
+
EXTERN double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc");
|
43
|
+
EXTERN INT_VAR (oldbl_holed_losscount, 10,
|
44
|
+
"Max lost before fallback line used");
|
45
|
+
EXTERN double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot");
|
46
|
+
EXTERN double_VAR (textord_oldbl_jumplimit, 0.15,
|
47
|
+
"X fraction for new partition");
|
48
|
+
|
49
|
+
#define TURNLIMIT 1 /*min size for turning point */
|
50
|
+
#define X_HEIGHT_FRACTION 0.7 /*x-height/caps height */
|
51
|
+
#define DESCENDER_FRACTION 0.5 /*descender/x-height */
|
52
|
+
#define MIN_ASC_FRACTION 0.20 /*min size of ascenders */
|
53
|
+
#define MIN_DESC_FRACTION 0.25 /*min size of descenders */
|
54
|
+
#define MINASCRISE 2.0 /*min ascender/desc step */
|
55
|
+
#define MAXHEIGHTVARIANCE 0.15 /*accepted variation in x-height */
|
56
|
+
#define MAXHEIGHT 300 /*max blob height */
|
57
|
+
#define MAXOVERLAP 0.1 /*max 10% missed overlap */
|
58
|
+
#define MAXBADRUN 2 /*max non best for failed */
|
59
|
+
#define HEIGHTBUCKETS 200 /* Num of buckets */
|
60
|
+
#define DELTAHEIGHT 5.0 /* Small amount of diff */
|
61
|
+
#define GOODHEIGHT 5
|
62
|
+
#define MAXLOOPS 10
|
63
|
+
#define MODENUM 10
|
64
|
+
#define MAXPARTS 6
|
65
|
+
#define SPLINESIZE 23
|
66
|
+
|
67
|
+
#define ABS(x) ((x)<0 ? (-(x)) : (x))
|
68
|
+
|
69
|
+
/**********************************************************************
|
70
|
+
* make_old_baselines
|
71
|
+
*
|
72
|
+
* Top level function to make baselines the old way.
|
73
|
+
**********************************************************************/
|
74
|
+
|
75
|
+
void make_old_baselines( //make splines
|
76
|
+
TO_BLOCK *block, //block to do
|
77
|
+
BOOL8 testing_on //correct orientation
|
78
|
+
) {
|
79
|
+
QSPLINE *prev_baseline; //baseline of previous row
|
80
|
+
TO_ROW *row; //current row
|
81
|
+
TO_ROW_IT row_it = block->get_rows ();
|
82
|
+
BLOBNBOX_IT blob_it;
|
83
|
+
|
84
|
+
prev_baseline = NULL; //nothing yet
|
85
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
86
|
+
row = row_it.data ();
|
87
|
+
find_textlines (block, row, 2, NULL);
|
88
|
+
if (row->xheight <= 0 && prev_baseline != NULL)
|
89
|
+
find_textlines (block, row, 2, prev_baseline);
|
90
|
+
if (row->xheight > 0)
|
91
|
+
//was a good one
|
92
|
+
prev_baseline = &row->baseline;
|
93
|
+
else {
|
94
|
+
prev_baseline = NULL;
|
95
|
+
blob_it.set_to_list (row->blob_list ());
|
96
|
+
if (textord_debug_baselines)
|
97
|
+
tprintf ("Row baseline generation failed on row at (%d,%d)\n",
|
98
|
+
blob_it.data ()->bounding_box ().left (),
|
99
|
+
blob_it.data ()->bounding_box ().bottom ());
|
100
|
+
}
|
101
|
+
}
|
102
|
+
correlate_lines(block);
|
103
|
+
}
|
104
|
+
|
105
|
+
|
106
|
+
/**********************************************************************
|
107
|
+
* correlate_lines
|
108
|
+
*
|
109
|
+
* Correlate the x-heights and ascender heights of a block to fill-in
|
110
|
+
* the ascender height and descender height for rows without one.
|
111
|
+
* Also fix baselines of rows without a decent fit.
|
112
|
+
**********************************************************************/
|
113
|
+
|
114
|
+
void correlate_lines( //cleanup lines
|
115
|
+
TO_BLOCK *block //block to do
|
116
|
+
) {
|
117
|
+
TO_ROW **rows; //array of ptrs
|
118
|
+
int rowcount; /*no of rows to do */
|
119
|
+
register int rowindex; /*no of row */
|
120
|
+
//iterator
|
121
|
+
TO_ROW_IT row_it = block->get_rows ();
|
122
|
+
|
123
|
+
rowcount = row_it.length ();
|
124
|
+
if (rowcount == 0) {
|
125
|
+
//default value
|
126
|
+
block->xheight = block->line_size;
|
127
|
+
return; /*none to do */
|
128
|
+
}
|
129
|
+
rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
|
130
|
+
rowindex = 0;
|
131
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
|
132
|
+
//make array
|
133
|
+
rows[rowindex++] = row_it.data ();
|
134
|
+
|
135
|
+
/*try to fix bad lines */
|
136
|
+
correlate_neighbours(block, rows, rowcount);
|
137
|
+
|
138
|
+
block->xheight = (float) correlate_with_stats (rows, rowcount);
|
139
|
+
/*use stats */
|
140
|
+
if (block->xheight <= 0)
|
141
|
+
//desperate
|
142
|
+
block->xheight = block->line_size * textord_merge_x;
|
143
|
+
if (block->xheight < textord_min_xheight)
|
144
|
+
block->xheight = (float) textord_min_xheight;
|
145
|
+
|
146
|
+
free_mem(rows);
|
147
|
+
}
|
148
|
+
|
149
|
+
|
150
|
+
/**********************************************************************
|
151
|
+
* correlate_neighbours
|
152
|
+
*
|
153
|
+
* Try to fix rows that had a bad spline fit by using neighbours.
|
154
|
+
**********************************************************************/
|
155
|
+
|
156
|
+
void correlate_neighbours( //fix bad rows
|
157
|
+
TO_BLOCK *block, /*block rows are in */
|
158
|
+
TO_ROW **rows, /*rows of block */
|
159
|
+
int rowcount /*no of rows to do */
|
160
|
+
) {
|
161
|
+
TO_ROW *row; /*current row */
|
162
|
+
register int rowindex; /*no of row */
|
163
|
+
register int otherrow; /*second row */
|
164
|
+
int upperrow; /*row above to use */
|
165
|
+
int lowerrow; /*row below to use */
|
166
|
+
float biggest;
|
167
|
+
|
168
|
+
for (rowindex = 0; rowindex < rowcount; rowindex++) {
|
169
|
+
row = rows[rowindex]; /*current row */
|
170
|
+
if (row->xheight < 0) {
|
171
|
+
/*quadratic failed */
|
172
|
+
for (otherrow = rowindex - 2;
|
173
|
+
otherrow >= 0
|
174
|
+
&& (rows[otherrow]->xheight < 0.0
|
175
|
+
|| !row->baseline.overlap (&rows[otherrow]->baseline,
|
176
|
+
MAXOVERLAP)); otherrow--);
|
177
|
+
upperrow = otherrow; /*decent row above */
|
178
|
+
for (otherrow = rowindex + 1;
|
179
|
+
otherrow < rowcount
|
180
|
+
&& (rows[otherrow]->xheight < 0.0
|
181
|
+
|| !row->baseline.overlap (&rows[otherrow]->baseline,
|
182
|
+
MAXOVERLAP)); otherrow++);
|
183
|
+
lowerrow = otherrow; /*decent row below */
|
184
|
+
if (upperrow >= 0)
|
185
|
+
find_textlines (block, row, 2, &rows[upperrow]->baseline);
|
186
|
+
if (row->xheight < 0 && lowerrow < rowcount)
|
187
|
+
find_textlines (block, row, 2, &rows[lowerrow]->baseline);
|
188
|
+
if (row->xheight < 0) {
|
189
|
+
if (upperrow >= 0)
|
190
|
+
find_textlines (block, row, 1, &rows[upperrow]->baseline);
|
191
|
+
else if (lowerrow < rowcount)
|
192
|
+
find_textlines (block, row, 1, &rows[lowerrow]->baseline);
|
193
|
+
}
|
194
|
+
}
|
195
|
+
}
|
196
|
+
|
197
|
+
for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
|
198
|
+
row = rows[rowindex]; /*current row */
|
199
|
+
if (row->xheight < 0) /*linear failed */
|
200
|
+
/*make do */
|
201
|
+
row->xheight = -row->xheight;
|
202
|
+
biggest = MAX (biggest, row->xheight);
|
203
|
+
}
|
204
|
+
}
|
205
|
+
|
206
|
+
|
207
|
+
/**********************************************************************
|
208
|
+
* correlate_with_stats
|
209
|
+
*
|
210
|
+
* correlate the x-heights and ascender heights of a block to fill-in
|
211
|
+
* the ascender height and descender height for rows without one.
|
212
|
+
**********************************************************************/
|
213
|
+
|
214
|
+
int correlate_with_stats( //fix xheights
|
215
|
+
TO_ROW **rows, /*rows of block */
|
216
|
+
int rowcount /*no of rows to do */
|
217
|
+
) {
|
218
|
+
TO_ROW *row; /*current row */
|
219
|
+
register int rowindex; /*no of row */
|
220
|
+
float lineheight; /*mean x-height */
|
221
|
+
float ascheight; /*average ascenders */
|
222
|
+
float minascheight; /*min allowed ascheight */
|
223
|
+
int xcount; /*no of samples for xheight */
|
224
|
+
float fullheight; /*mean top height */
|
225
|
+
int fullcount; /*no of samples */
|
226
|
+
float descheight; /*mean descender drop */
|
227
|
+
float mindescheight; /*min allowed descheight */
|
228
|
+
int desccount; /*no of samples */
|
229
|
+
float xshift; /*shift in xheight */
|
230
|
+
|
231
|
+
/*no samples */
|
232
|
+
xcount = fullcount = desccount = 0;
|
233
|
+
lineheight = ascheight = fullheight = descheight = 0.0;
|
234
|
+
for (rowindex = 0; rowindex < rowcount; rowindex++) {
|
235
|
+
row = rows[rowindex]; /*current row */
|
236
|
+
if (row->ascrise > 0.0) { /*got ascenders? */
|
237
|
+
lineheight += row->xheight;/*average x-heights */
|
238
|
+
ascheight += row->ascrise; /*average ascenders */
|
239
|
+
xcount++;
|
240
|
+
}
|
241
|
+
else {
|
242
|
+
fullheight += row->xheight;/*assume full height */
|
243
|
+
fullcount++;
|
244
|
+
}
|
245
|
+
if (row->descdrop < 0.0) { /*got descenders? */
|
246
|
+
/*average descenders */
|
247
|
+
descheight += row->descdrop;
|
248
|
+
desccount++;
|
249
|
+
}
|
250
|
+
}
|
251
|
+
|
252
|
+
if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {
|
253
|
+
lineheight /= xcount; /*average x-height */
|
254
|
+
/*average caps height */
|
255
|
+
fullheight = lineheight + ascheight / xcount;
|
256
|
+
/*must be decent size */
|
257
|
+
if (fullheight < lineheight * (1 + MIN_ASC_FRACTION))
|
258
|
+
fullheight = lineheight * (1 + MIN_ASC_FRACTION);
|
259
|
+
}
|
260
|
+
else {
|
261
|
+
fullheight /= fullcount; /*average max height */
|
262
|
+
/*guess x-height */
|
263
|
+
lineheight = fullheight * X_HEIGHT_FRACTION;
|
264
|
+
}
|
265
|
+
if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2))
|
266
|
+
descheight /= desccount; /*average descenders */
|
267
|
+
else
|
268
|
+
/*guess descenders */
|
269
|
+
descheight = -lineheight * DESCENDER_FRACTION;
|
270
|
+
|
271
|
+
minascheight = lineheight * MIN_ASC_FRACTION;
|
272
|
+
mindescheight = -lineheight * MIN_DESC_FRACTION;
|
273
|
+
for (rowindex = 0; rowindex < rowcount; rowindex++) {
|
274
|
+
row = rows[rowindex]; /*do each row */
|
275
|
+
row->all_caps = FALSE;
|
276
|
+
if (row->ascrise / row->xheight < MIN_ASC_FRACTION) {
|
277
|
+
/*no ascenders */
|
278
|
+
if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
|
279
|
+
&& row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) {
|
280
|
+
row->ascrise = fullheight - lineheight;
|
281
|
+
/*shift in x */
|
282
|
+
xshift = lineheight - row->xheight;
|
283
|
+
/*set to average */
|
284
|
+
row->xheight = lineheight;
|
285
|
+
|
286
|
+
}
|
287
|
+
else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE)
|
288
|
+
&& row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) {
|
289
|
+
row->ascrise = row->xheight - lineheight;
|
290
|
+
xshift = -row->ascrise; /*shift in x */
|
291
|
+
/*set to average */
|
292
|
+
row->xheight = lineheight;
|
293
|
+
row->all_caps = TRUE;
|
294
|
+
}
|
295
|
+
else {
|
296
|
+
row->ascrise = (fullheight - lineheight) * row->xheight
|
297
|
+
/ fullheight;
|
298
|
+
xshift = -row->ascrise; /*shift in x */
|
299
|
+
/*scale it */
|
300
|
+
row->xheight -= row->ascrise;
|
301
|
+
row->all_caps = TRUE;
|
302
|
+
}
|
303
|
+
if (row->ascrise < minascheight)
|
304
|
+
row->ascrise =
|
305
|
+
row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION);
|
306
|
+
}
|
307
|
+
if (row->descdrop > mindescheight) {
|
308
|
+
if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
|
309
|
+
&& row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE))
|
310
|
+
/*set to average */
|
311
|
+
row->descdrop = descheight;
|
312
|
+
else
|
313
|
+
row->descdrop = -row->xheight * DESCENDER_FRACTION;
|
314
|
+
}
|
315
|
+
}
|
316
|
+
return (int) lineheight; //block xheight
|
317
|
+
}
|
318
|
+
|
319
|
+
|
320
|
+
/**********************************************************************
|
321
|
+
* find_textlines
|
322
|
+
*
|
323
|
+
* Compute the baseline for the given row.
|
324
|
+
**********************************************************************/
|
325
|
+
|
326
|
+
void find_textlines( //get baseline
|
327
|
+
TO_BLOCK *block, //block row is in
|
328
|
+
TO_ROW *row, //row to do
|
329
|
+
int degree, //required approximation
|
330
|
+
QSPLINE *spline //starting spline
|
331
|
+
) {
|
332
|
+
int partcount; /*no of partitions of */
|
333
|
+
BOOL8 holed_line; //lost too many blobs
|
334
|
+
int bestpart; /*biggest partition */
|
335
|
+
char *partids; /*partition no of each blob */
|
336
|
+
int partsizes[MAXPARTS]; /*no in each partition */
|
337
|
+
int lineheight; /*guessed x-height */
|
338
|
+
float jumplimit; /*allowed delta change */
|
339
|
+
int *xcoords; /*useful sample points */
|
340
|
+
int *ycoords; /*useful sample points */
|
341
|
+
TBOX *blobcoords; /*edges of blob rectangles */
|
342
|
+
int blobcount; /*no of blobs on line */
|
343
|
+
float *ydiffs; /*diffs from 1st approx */
|
344
|
+
int pointcount; /*no of coords */
|
345
|
+
int xstarts[SPLINESIZE + 1]; //segment boundaries
|
346
|
+
int segments; //no of segments
|
347
|
+
|
348
|
+
//no of blobs in row
|
349
|
+
blobcount = row->blob_list ()->length ();
|
350
|
+
partids = (char *) alloc_mem (blobcount * sizeof (char));
|
351
|
+
xcoords = (int *) alloc_mem (blobcount * sizeof (int));
|
352
|
+
ycoords = (int *) alloc_mem (blobcount * sizeof (int));
|
353
|
+
blobcoords = (TBOX *) alloc_mem (blobcount * sizeof (TBOX));
|
354
|
+
ydiffs = (float *) alloc_mem (blobcount * sizeof (float));
|
355
|
+
|
356
|
+
lineheight = get_blob_coords (row, (int) block->line_size, blobcoords,
|
357
|
+
holed_line, blobcount);
|
358
|
+
/*limit for line change */
|
359
|
+
jumplimit = lineheight * textord_oldbl_jumplimit;
|
360
|
+
if (jumplimit < MINASCRISE)
|
361
|
+
jumplimit = MINASCRISE;
|
362
|
+
|
363
|
+
if (textord_oldbl_debug) {
|
364
|
+
tprintf
|
365
|
+
("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n",
|
366
|
+
block->line_size, lineheight, jumplimit);
|
367
|
+
}
|
368
|
+
if (holed_line)
|
369
|
+
make_holed_baseline (blobcoords, blobcount, spline, &row->baseline,
|
370
|
+
row->line_m ());
|
371
|
+
else
|
372
|
+
make_first_baseline (blobcoords, blobcount,
|
373
|
+
xcoords, ycoords, spline, &row->baseline, jumplimit);
|
374
|
+
#ifndef GRAPHICS_DISABLED
|
375
|
+
if (textord_show_final_rows)
|
376
|
+
row->baseline.plot (to_win, ScrollView::GOLDENROD);
|
377
|
+
#endif
|
378
|
+
if (blobcount > 1) {
|
379
|
+
bestpart = partition_line (blobcoords, blobcount,
|
380
|
+
&partcount, partids, partsizes,
|
381
|
+
&row->baseline, jumplimit, ydiffs);
|
382
|
+
pointcount = partition_coords (blobcoords, blobcount,
|
383
|
+
partids, bestpart, xcoords, ycoords);
|
384
|
+
segments = segment_spline (blobcoords, blobcount,
|
385
|
+
xcoords, ycoords,
|
386
|
+
degree, pointcount, xstarts);
|
387
|
+
if (!holed_line) {
|
388
|
+
do {
|
389
|
+
row->baseline = QSPLINE (xstarts, segments,
|
390
|
+
xcoords, ycoords, pointcount, degree);
|
391
|
+
}
|
392
|
+
while (textord_oldbl_split_splines
|
393
|
+
&& split_stepped_spline (&row->baseline, jumplimit / 2,
|
394
|
+
xcoords, xstarts, segments));
|
395
|
+
}
|
396
|
+
find_lesser_parts(row,
|
397
|
+
blobcoords,
|
398
|
+
blobcount,
|
399
|
+
partids,
|
400
|
+
partsizes,
|
401
|
+
partcount,
|
402
|
+
bestpart);
|
403
|
+
|
404
|
+
}
|
405
|
+
else {
|
406
|
+
row->xheight = -1.0f; /*failed */
|
407
|
+
row->descdrop = 0.0f;
|
408
|
+
row->ascrise = 0.0f;
|
409
|
+
}
|
410
|
+
row->baseline.extrapolate (row->line_m (),
|
411
|
+
block->block->bounding_box ().left (),
|
412
|
+
block->block->bounding_box ().right ());
|
413
|
+
if (textord_really_old_xheight)
|
414
|
+
old_first_xheight (row, blobcoords, lineheight,
|
415
|
+
blobcount, &row->baseline, jumplimit);
|
416
|
+
else
|
417
|
+
make_first_xheight (row, blobcoords, lineheight, (int) block->line_size,
|
418
|
+
blobcount, &row->baseline, jumplimit);
|
419
|
+
free_mem(partids);
|
420
|
+
free_mem(xcoords);
|
421
|
+
free_mem(ycoords);
|
422
|
+
free_mem(blobcoords);
|
423
|
+
free_mem(ydiffs);
|
424
|
+
}
|
425
|
+
|
426
|
+
|
427
|
+
/**********************************************************************
|
428
|
+
* get_blob_coords
|
429
|
+
*
|
430
|
+
* Fill the blobcoords array with the coordinates of the blobs
|
431
|
+
* in the row. The return value is the first guess atthe line height.
|
432
|
+
**********************************************************************/
|
433
|
+
|
434
|
+
int get_blob_coords( //get boxes
|
435
|
+
TO_ROW *row, //row to use
|
436
|
+
inT32 lineheight, //block level
|
437
|
+
TBOX *blobcoords, //ouput boxes
|
438
|
+
BOOL8 &holed_line, //lost a lot of blobs
|
439
|
+
int &outcount //no of real blobs
|
440
|
+
) {
|
441
|
+
//blobs
|
442
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
443
|
+
register int blobindex; /*no along text line */
|
444
|
+
int losscount; //lost blobs
|
445
|
+
int maxlosscount; //greatest lost blobs
|
446
|
+
/*height stat collection */
|
447
|
+
STATS heightstat (0, MAXHEIGHT);
|
448
|
+
|
449
|
+
if (blob_it.empty ())
|
450
|
+
return 0; //none
|
451
|
+
maxlosscount = 0;
|
452
|
+
losscount = 0;
|
453
|
+
blob_it.mark_cycle_pt ();
|
454
|
+
blobindex = 0;
|
455
|
+
do {
|
456
|
+
blobcoords[blobindex] = box_next_pre_chopped (&blob_it);
|
457
|
+
if (blobcoords[blobindex].height () > lineheight * 0.25)
|
458
|
+
heightstat.add (blobcoords[blobindex].height (), 1);
|
459
|
+
if (blobindex == 0
|
460
|
+
|| blobcoords[blobindex].height () > lineheight * 0.25
|
461
|
+
|| blob_it.cycled_list ()) {
|
462
|
+
blobindex++; /*no of merged blobs */
|
463
|
+
losscount = 0;
|
464
|
+
}
|
465
|
+
else {
|
466
|
+
if (blobcoords[blobindex].height ()
|
467
|
+
< blobcoords[blobindex].width () * oldbl_dot_error_size
|
468
|
+
&& blobcoords[blobindex].width ()
|
469
|
+
< blobcoords[blobindex].height () * oldbl_dot_error_size) {
|
470
|
+
//counts as dot
|
471
|
+
blobindex++;
|
472
|
+
losscount = 0;
|
473
|
+
}
|
474
|
+
else {
|
475
|
+
losscount++; //lost it
|
476
|
+
if (losscount > maxlosscount)
|
477
|
+
//remember max
|
478
|
+
maxlosscount = losscount;
|
479
|
+
}
|
480
|
+
}
|
481
|
+
}
|
482
|
+
while (!blob_it.cycled_list ());
|
483
|
+
|
484
|
+
holed_line = maxlosscount > oldbl_holed_losscount;
|
485
|
+
outcount = blobindex; /*total blobs */
|
486
|
+
|
487
|
+
if (heightstat.get_total () > 1)
|
488
|
+
/*guess x-height */
|
489
|
+
return (int) heightstat.ile (0.25);
|
490
|
+
else
|
491
|
+
return blobcoords[0].height ();
|
492
|
+
}
|
493
|
+
|
494
|
+
|
495
|
+
/**********************************************************************
|
496
|
+
* make_first_baseline
|
497
|
+
*
|
498
|
+
* Make the first estimate at a baseline, either by shifting
|
499
|
+
* a supplied previous spline, or by doing a piecewise linear
|
500
|
+
* approximation using all the blobs.
|
501
|
+
**********************************************************************/
|
502
|
+
|
503
|
+
void
|
504
|
+
make_first_baseline ( //initial approximation
|
505
|
+
TBOX blobcoords[], /*blob bounding boxes */
|
506
|
+
int blobcount, /*no of blobcoords */
|
507
|
+
int xcoords[], /*coords for spline */
|
508
|
+
int ycoords[], /*approximator */
|
509
|
+
QSPLINE * spline, /*initial spline */
|
510
|
+
QSPLINE * baseline, /*output spline */
|
511
|
+
float jumplimit /*guess half descenders */
|
512
|
+
) {
|
513
|
+
int leftedge; /*left edge of line */
|
514
|
+
int rightedge; /*right edge of line */
|
515
|
+
int blobindex; /*current blob */
|
516
|
+
int segment; /*current segment */
|
517
|
+
float prevy, thisy, nexty; /*3 y coords */
|
518
|
+
float y1, y2, y3; /*3 smooth blobs */
|
519
|
+
float maxmax, minmin; /*absolute limits */
|
520
|
+
int x2 = 0; /*right edge of old y3 */
|
521
|
+
int ycount; /*no of ycoords in use */
|
522
|
+
float yturns[SPLINESIZE]; /*y coords of turn pts */
|
523
|
+
int xturns[SPLINESIZE]; /*xcoords of turn pts */
|
524
|
+
int xstarts[SPLINESIZE + 1];
|
525
|
+
int segments; //no of segments
|
526
|
+
ICOORD shift; //shift of spline
|
527
|
+
|
528
|
+
prevy = 0;
|
529
|
+
/*left edge of row */
|
530
|
+
leftedge = blobcoords[0].left ();
|
531
|
+
/*right edge of line */
|
532
|
+
rightedge = blobcoords[blobcount - 1].right ();
|
533
|
+
if (spline == NULL /*no given spline */
|
534
|
+
|| spline->segments < 3 /*or trivial */
|
535
|
+
/*or too non-overlap */
|
536
|
+
|| spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge)
|
537
|
+
|| spline->xcoords[spline->segments - 1] < rightedge
|
538
|
+
- MAXOVERLAP * (rightedge - leftedge)) {
|
539
|
+
if (textord_oldbl_paradef)
|
540
|
+
return; //use default
|
541
|
+
xstarts[0] = blobcoords[0].left () - 1;
|
542
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
543
|
+
xcoords[blobindex] = (blobcoords[blobindex].left ()
|
544
|
+
+ blobcoords[blobindex].right ()) / 2;
|
545
|
+
ycoords[blobindex] = blobcoords[blobindex].bottom ();
|
546
|
+
}
|
547
|
+
xstarts[1] = blobcoords[blobcount - 1].right () + 1;
|
548
|
+
segments = 1; /*no of segments */
|
549
|
+
|
550
|
+
/*linear */
|
551
|
+
*baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
|
552
|
+
|
553
|
+
if (blobcount >= 3) {
|
554
|
+
y1 = y2 = y3 = 0.0f;
|
555
|
+
ycount = 0;
|
556
|
+
segment = 0; /*no of segments */
|
557
|
+
maxmax = minmin = 0.0f;
|
558
|
+
thisy = ycoords[0] - baseline->y (xcoords[0]);
|
559
|
+
nexty = ycoords[1] - baseline->y (xcoords[1]);
|
560
|
+
for (blobindex = 2; blobindex < blobcount; blobindex++) {
|
561
|
+
prevy = thisy; /*shift ycoords */
|
562
|
+
thisy = nexty;
|
563
|
+
nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]);
|
564
|
+
/*middle of smooth y */
|
565
|
+
if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) {
|
566
|
+
y1 = y2; /*shift window */
|
567
|
+
y2 = y3;
|
568
|
+
y3 = thisy; /*middle point */
|
569
|
+
ycount++;
|
570
|
+
/*local max */
|
571
|
+
if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
|
572
|
+
/*local min */
|
573
|
+
|| (y1 > y2 && y2 <= y3))) {
|
574
|
+
if (segment < SPLINESIZE - 2) {
|
575
|
+
/*turning pt */
|
576
|
+
xturns[segment] = x2;
|
577
|
+
yturns[segment] = y2;
|
578
|
+
segment++; /*no of spline segs */
|
579
|
+
}
|
580
|
+
}
|
581
|
+
if (ycount == 1) {
|
582
|
+
maxmax = minmin = y3;/*initialise limits */
|
583
|
+
}
|
584
|
+
else {
|
585
|
+
if (y3 > maxmax)
|
586
|
+
maxmax = y3; /*biggest max */
|
587
|
+
if (y3 < minmin)
|
588
|
+
minmin = y3; /*smallest min */
|
589
|
+
}
|
590
|
+
/*possible turning pt */
|
591
|
+
x2 = blobcoords[blobindex - 1].right ();
|
592
|
+
}
|
593
|
+
}
|
594
|
+
|
595
|
+
jumplimit *= 1.2;
|
596
|
+
/*must be wavy */
|
597
|
+
if (maxmax - minmin > jumplimit) {
|
598
|
+
ycount = segment; /*no of segments */
|
599
|
+
for (blobindex = 0, segment = 1; blobindex < ycount;
|
600
|
+
blobindex++) {
|
601
|
+
if (yturns[blobindex] > minmin + jumplimit
|
602
|
+
|| yturns[blobindex] < maxmax - jumplimit) {
|
603
|
+
/*significant peak */
|
604
|
+
if (segment == 1
|
605
|
+
|| yturns[blobindex] > prevy + jumplimit
|
606
|
+
|| yturns[blobindex] < prevy - jumplimit) {
|
607
|
+
/*different to previous */
|
608
|
+
xstarts[segment] = xturns[blobindex];
|
609
|
+
segment++;
|
610
|
+
prevy = yturns[blobindex];
|
611
|
+
}
|
612
|
+
/*bigger max */
|
613
|
+
else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
|
614
|
+
/*smaller min */
|
615
|
+
|| (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
|
616
|
+
xstarts[segment - 1] = xturns[blobindex];
|
617
|
+
/*improved previous */
|
618
|
+
prevy = yturns[blobindex];
|
619
|
+
}
|
620
|
+
}
|
621
|
+
}
|
622
|
+
xstarts[segment] = blobcoords[blobcount - 1].right () + 1;
|
623
|
+
segments = segment; /*no of segments */
|
624
|
+
/*linear */
|
625
|
+
*baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
|
626
|
+
}
|
627
|
+
}
|
628
|
+
}
|
629
|
+
else {
|
630
|
+
*baseline = *spline; /*copy it */
|
631
|
+
shift = ICOORD (0, (inT16) (blobcoords[0].bottom ()
|
632
|
+
- spline->y (blobcoords[0].right ())));
|
633
|
+
baseline->move (shift);
|
634
|
+
}
|
635
|
+
}
|
636
|
+
|
637
|
+
|
638
|
+
/**********************************************************************
|
639
|
+
* make_holed_baseline
|
640
|
+
*
|
641
|
+
* Make the first estimate at a baseline, either by shifting
|
642
|
+
* a supplied previous spline, or by doing a piecewise linear
|
643
|
+
* approximation using all the blobs.
|
644
|
+
**********************************************************************/
|
645
|
+
|
646
|
+
void
|
647
|
+
make_holed_baseline ( //initial approximation
|
648
|
+
TBOX blobcoords[], /*blob bounding boxes */
|
649
|
+
int blobcount, /*no of blobcoords */
|
650
|
+
QSPLINE * spline, /*initial spline */
|
651
|
+
QSPLINE * baseline, /*output spline */
|
652
|
+
float gradient //of line
|
653
|
+
) {
|
654
|
+
int leftedge; /*left edge of line */
|
655
|
+
int rightedge; /*right edge of line */
|
656
|
+
int blobindex; /*current blob */
|
657
|
+
float x; //centre of row
|
658
|
+
ICOORD shift; //shift of spline
|
659
|
+
|
660
|
+
LMS lms(blobcount); //straight baseline
|
661
|
+
inT32 xstarts[2]; //straight line
|
662
|
+
double coeffs[3];
|
663
|
+
float c; //line parameter
|
664
|
+
|
665
|
+
/*left edge of row */
|
666
|
+
leftedge = blobcoords[0].left ();
|
667
|
+
/*right edge of line */
|
668
|
+
rightedge = blobcoords[blobcount - 1].right ();
|
669
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
670
|
+
lms.add (FCOORD ((blobcoords[blobindex].left () +
|
671
|
+
blobcoords[blobindex].right ()) / 2.0,
|
672
|
+
blobcoords[blobindex].bottom ()));
|
673
|
+
}
|
674
|
+
lms.constrained_fit (gradient, c);
|
675
|
+
xstarts[0] = leftedge;
|
676
|
+
xstarts[1] = rightedge;
|
677
|
+
coeffs[0] = 0;
|
678
|
+
coeffs[1] = gradient;
|
679
|
+
coeffs[2] = c;
|
680
|
+
*baseline = QSPLINE (1, xstarts, coeffs);
|
681
|
+
if (spline != NULL /*no given spline */
|
682
|
+
&& spline->segments >= 3 /*or trivial */
|
683
|
+
/*or too non-overlap */
|
684
|
+
&& spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge)
|
685
|
+
&& spline->xcoords[spline->segments - 1] >= rightedge
|
686
|
+
- MAXOVERLAP * (rightedge - leftedge)) {
|
687
|
+
*baseline = *spline; /*copy it */
|
688
|
+
x = (leftedge + rightedge) / 2.0;
|
689
|
+
shift = ICOORD (0, (inT16) (gradient * x + c - spline->y (x)));
|
690
|
+
baseline->move (shift);
|
691
|
+
}
|
692
|
+
}
|
693
|
+
|
694
|
+
|
695
|
+
/**********************************************************************
|
696
|
+
* partition_line
|
697
|
+
*
|
698
|
+
* Partition a row of blobs into different groups of continuous
|
699
|
+
* y position. jumplimit specifies the max allowable limit on a jump
|
700
|
+
* before a new partition is started.
|
701
|
+
* The return value is the biggest partition
|
702
|
+
**********************************************************************/
|
703
|
+
|
704
|
+
int
|
705
|
+
partition_line ( //partition blobs
|
706
|
+
TBOX blobcoords[], //bounding boxes
|
707
|
+
int blobcount, /*no of blobs on row */
|
708
|
+
int *numparts, /*number of partitions */
|
709
|
+
char partids[], /*partition no of each blob */
|
710
|
+
int partsizes[], /*no in each partition */
|
711
|
+
QSPLINE * spline, /*curve to fit to */
|
712
|
+
float jumplimit, /*allowed delta change */
|
713
|
+
float ydiffs[] /*diff from spline */
|
714
|
+
) {
|
715
|
+
register int blobindex; /*no along text line */
|
716
|
+
int bestpart; /*best new partition */
|
717
|
+
int biggestpart; /*part with most members */
|
718
|
+
float diff; /*difference from line */
|
719
|
+
int startx; /*index of start blob */
|
720
|
+
float partdiffs[MAXPARTS]; /*step between parts */
|
721
|
+
|
722
|
+
for (bestpart = 0; bestpart < MAXPARTS; bestpart++)
|
723
|
+
partsizes[bestpart] = 0; /*zero them all */
|
724
|
+
|
725
|
+
startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs);
|
726
|
+
*numparts = 1; /*1 partition */
|
727
|
+
bestpart = -1; /*first point */
|
728
|
+
for (blobindex = startx; blobindex < blobcount; blobindex++) {
|
729
|
+
/*do each blob in row */
|
730
|
+
diff = ydiffs[blobindex]; /*diff from line */
|
731
|
+
if (textord_oldbl_debug) {
|
732
|
+
tprintf ("%d(%d,%d), ", blobindex,
|
733
|
+
blobcoords[blobindex].left (),
|
734
|
+
blobcoords[blobindex].bottom ());
|
735
|
+
}
|
736
|
+
bestpart =
|
737
|
+
choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
|
738
|
+
/*record partition */
|
739
|
+
partids[blobindex] = bestpart;
|
740
|
+
partsizes[bestpart]++; /*another in it */
|
741
|
+
}
|
742
|
+
|
743
|
+
bestpart = -1; /*first point */
|
744
|
+
partsizes[0]--; /*doing 1st pt again */
|
745
|
+
/*do each blob in row */
|
746
|
+
for (blobindex = startx; blobindex >= 0; blobindex--) {
|
747
|
+
diff = ydiffs[blobindex]; /*diff from line */
|
748
|
+
if (textord_oldbl_debug) {
|
749
|
+
tprintf ("%d(%d,%d), ", blobindex,
|
750
|
+
blobcoords[blobindex].left (),
|
751
|
+
blobcoords[blobindex].bottom ());
|
752
|
+
}
|
753
|
+
bestpart =
|
754
|
+
choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
|
755
|
+
/*record partition */
|
756
|
+
partids[blobindex] = bestpart;
|
757
|
+
partsizes[bestpart]++; /*another in it */
|
758
|
+
}
|
759
|
+
|
760
|
+
for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
|
761
|
+
if (partsizes[bestpart] >= partsizes[biggestpart])
|
762
|
+
biggestpart = bestpart; /*new biggest */
|
763
|
+
if (textord_oldbl_merge_parts)
|
764
|
+
merge_oldbl_parts(blobcoords,
|
765
|
+
blobcount,
|
766
|
+
partids,
|
767
|
+
partsizes,
|
768
|
+
biggestpart,
|
769
|
+
jumplimit);
|
770
|
+
return biggestpart; /*biggest partition */
|
771
|
+
}
|
772
|
+
|
773
|
+
|
774
|
+
/**********************************************************************
|
775
|
+
* merge_oldbl_parts
|
776
|
+
*
|
777
|
+
* For any adjacent group of blobs in a different part, put them in the
|
778
|
+
* main part if they fit closely to neighbours in the main part.
|
779
|
+
**********************************************************************/
|
780
|
+
|
781
|
+
void
|
782
|
+
merge_oldbl_parts ( //partition blobs
|
783
|
+
TBOX blobcoords[], //bounding boxes
|
784
|
+
int blobcount, /*no of blobs on row */
|
785
|
+
char partids[], /*partition no of each blob */
|
786
|
+
int partsizes[], /*no in each partition */
|
787
|
+
int biggestpart, //major partition
|
788
|
+
float jumplimit /*allowed delta change */
|
789
|
+
) {
|
790
|
+
BOOL8 found_one; //found a bestpart blob
|
791
|
+
BOOL8 close_one; //found was close enough
|
792
|
+
register int blobindex; /*no along text line */
|
793
|
+
int prevpart; //previous iteration
|
794
|
+
int runlength; //no in this part
|
795
|
+
float diff; /*difference from line */
|
796
|
+
int startx; /*index of start blob */
|
797
|
+
int test_blob; //another index
|
798
|
+
FCOORD coord; //blob coordinate
|
799
|
+
float m, c; //fitted line
|
800
|
+
QLSQ stats; //line stuff
|
801
|
+
|
802
|
+
prevpart = biggestpart;
|
803
|
+
runlength = 0;
|
804
|
+
startx = 0;
|
805
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
806
|
+
if (partids[blobindex] != prevpart) {
|
807
|
+
// tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n",
|
808
|
+
// blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
|
809
|
+
// prevpart,partids[blobindex],runlength);
|
810
|
+
if (prevpart != biggestpart && runlength > MAXBADRUN) {
|
811
|
+
stats.clear ();
|
812
|
+
for (test_blob = startx; test_blob < blobindex; test_blob++) {
|
813
|
+
coord = FCOORD ((blobcoords[test_blob].left ()
|
814
|
+
+ blobcoords[test_blob].right ()) / 2.0,
|
815
|
+
blobcoords[test_blob].bottom ());
|
816
|
+
stats.add (coord.x (), coord.y ());
|
817
|
+
}
|
818
|
+
stats.fit (1);
|
819
|
+
m = stats.get_b ();
|
820
|
+
c = stats.get_c ();
|
821
|
+
if (textord_oldbl_debug)
|
822
|
+
tprintf ("Fitted line y=%g x + %g\n", m, c);
|
823
|
+
found_one = FALSE;
|
824
|
+
close_one = FALSE;
|
825
|
+
for (test_blob = 1; !found_one
|
826
|
+
&& (startx - test_blob >= 0
|
827
|
+
|| blobindex + test_blob <= blobcount); test_blob++) {
|
828
|
+
if (startx - test_blob >= 0
|
829
|
+
&& partids[startx - test_blob] == biggestpart) {
|
830
|
+
found_one = TRUE;
|
831
|
+
coord = FCOORD ((blobcoords[startx - test_blob].left ()
|
832
|
+
+ blobcoords[startx -
|
833
|
+
test_blob].right ()) /
|
834
|
+
2.0,
|
835
|
+
blobcoords[startx -
|
836
|
+
test_blob].bottom ());
|
837
|
+
diff = m * coord.x () + c - coord.y ();
|
838
|
+
if (textord_oldbl_debug)
|
839
|
+
tprintf
|
840
|
+
("Diff of common blob to suspect part=%g at (%g,%g)\n",
|
841
|
+
diff, coord.x (), coord.y ());
|
842
|
+
if (diff < jumplimit && -diff < jumplimit)
|
843
|
+
close_one = TRUE;
|
844
|
+
}
|
845
|
+
if (blobindex + test_blob <= blobcount
|
846
|
+
&& partids[blobindex + test_blob - 1] == biggestpart) {
|
847
|
+
found_one = TRUE;
|
848
|
+
coord =
|
849
|
+
FCOORD ((blobcoords[blobindex + test_blob - 1].
|
850
|
+
left () + blobcoords[blobindex + test_blob -
|
851
|
+
1].right ()) / 2.0,
|
852
|
+
blobcoords[blobindex + test_blob -
|
853
|
+
1].bottom ());
|
854
|
+
diff = m * coord.x () + c - coord.y ();
|
855
|
+
if (textord_oldbl_debug)
|
856
|
+
tprintf
|
857
|
+
("Diff of common blob to suspect part=%g at (%g,%g)\n",
|
858
|
+
diff, coord.x (), coord.y ());
|
859
|
+
if (diff < jumplimit && -diff < jumplimit)
|
860
|
+
close_one = TRUE;
|
861
|
+
}
|
862
|
+
}
|
863
|
+
if (close_one) {
|
864
|
+
if (textord_oldbl_debug)
|
865
|
+
tprintf
|
866
|
+
("Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
|
867
|
+
runlength, biggestpart, prevpart,
|
868
|
+
blobcoords[startx].left (),
|
869
|
+
blobcoords[startx].bottom ());
|
870
|
+
//switch sides
|
871
|
+
partsizes[prevpart] -= runlength;
|
872
|
+
for (test_blob = startx; test_blob < blobindex; test_blob++)
|
873
|
+
partids[test_blob] = biggestpart;
|
874
|
+
}
|
875
|
+
}
|
876
|
+
prevpart = partids[blobindex];
|
877
|
+
runlength = 1;
|
878
|
+
startx = blobindex;
|
879
|
+
}
|
880
|
+
else
|
881
|
+
runlength++;
|
882
|
+
}
|
883
|
+
}
|
884
|
+
|
885
|
+
|
886
|
+
/**********************************************************************
|
887
|
+
* get_ydiffs
|
888
|
+
*
|
889
|
+
* Get the differences between the blobs and the spline,
|
890
|
+
* putting them in ydiffs. The return value is the index
|
891
|
+
* of the blob in the middle of the "best behaved" region
|
892
|
+
**********************************************************************/
|
893
|
+
|
894
|
+
int
|
895
|
+
get_ydiffs ( //evaluate differences
|
896
|
+
TBOX blobcoords[], //bounding boxes
|
897
|
+
int blobcount, /*no of blobs */
|
898
|
+
QSPLINE * spline, /*approximating spline */
|
899
|
+
float ydiffs[] /*output */
|
900
|
+
) {
|
901
|
+
register int blobindex; /*current blob */
|
902
|
+
int xcentre; /*xcoord */
|
903
|
+
int lastx; /*last xcentre */
|
904
|
+
float diffsum; /*sum of diffs */
|
905
|
+
float diff; /*current difference */
|
906
|
+
float drift; /*sum of spline steps */
|
907
|
+
float bestsum; /*smallest diffsum */
|
908
|
+
int bestindex; /*index of bestsum */
|
909
|
+
|
910
|
+
diffsum = 0.0f;
|
911
|
+
bestindex = 0;
|
912
|
+
bestsum = (float) MAX_INT32;
|
913
|
+
drift = 0.0f;
|
914
|
+
lastx = blobcoords[0].left ();
|
915
|
+
/*do each blob in row */
|
916
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
917
|
+
/*centre of blob */
|
918
|
+
xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
|
919
|
+
//step functions in spline
|
920
|
+
drift += spline->step (lastx, xcentre);
|
921
|
+
lastx = xcentre;
|
922
|
+
diff = blobcoords[blobindex].bottom ();
|
923
|
+
diff -= spline->y (xcentre);
|
924
|
+
diff += drift;
|
925
|
+
ydiffs[blobindex] = diff; /*store difference */
|
926
|
+
if (blobindex > 2)
|
927
|
+
/*remove old one */
|
928
|
+
diffsum -= ABS (ydiffs[blobindex - 3]);
|
929
|
+
diffsum += ABS (diff); /*add new one */
|
930
|
+
if (blobindex >= 2 && diffsum < bestsum) {
|
931
|
+
bestsum = diffsum; /*find min sum */
|
932
|
+
bestindex = blobindex - 1; /*middle of set */
|
933
|
+
}
|
934
|
+
}
|
935
|
+
return bestindex;
|
936
|
+
}
|
937
|
+
|
938
|
+
|
939
|
+
/**********************************************************************
|
940
|
+
* choose_partition
|
941
|
+
*
|
942
|
+
* Choose a partition for the point and return the index.
|
943
|
+
**********************************************************************/
|
944
|
+
|
945
|
+
int
|
946
|
+
choose_partition ( //select partition
|
947
|
+
register float diff, /*diff from spline */
|
948
|
+
float partdiffs[], /*diff on all parts */
|
949
|
+
int lastpart, /*last assigned partition */
|
950
|
+
float jumplimit, /*new part threshold */
|
951
|
+
int *partcount /*no of partitions */
|
952
|
+
) {
|
953
|
+
register int partition; /*partition no */
|
954
|
+
int bestpart; /*best new partition */
|
955
|
+
float bestdelta; /*best gap from a part */
|
956
|
+
static float drift; /*drift from spline */
|
957
|
+
float delta; /*diff from part */
|
958
|
+
static float lastdelta; /*previous delta */
|
959
|
+
|
960
|
+
if (lastpart < 0) {
|
961
|
+
partdiffs[0] = diff;
|
962
|
+
lastpart = 0; /*first point */
|
963
|
+
drift = 0.0f;
|
964
|
+
lastdelta = 0.0f;
|
965
|
+
}
|
966
|
+
/*adjusted diff from part */
|
967
|
+
delta = diff - partdiffs[lastpart] - drift;
|
968
|
+
if (textord_oldbl_debug) {
|
969
|
+
tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, drift);
|
970
|
+
}
|
971
|
+
if (ABS (delta) > jumplimit / 2) {
|
972
|
+
/*delta on part 0 */
|
973
|
+
bestdelta = diff - partdiffs[0] - drift;
|
974
|
+
bestpart = 0; /*0 best so far */
|
975
|
+
for (partition = 1; partition < *partcount; partition++) {
|
976
|
+
delta = diff - partdiffs[partition] - drift;
|
977
|
+
if (ABS (delta) < ABS (bestdelta)) {
|
978
|
+
bestdelta = delta;
|
979
|
+
bestpart = partition; /*part with nearest jump */
|
980
|
+
}
|
981
|
+
}
|
982
|
+
delta = bestdelta;
|
983
|
+
/*too far away */
|
984
|
+
if (ABS (bestdelta) > jumplimit
|
985
|
+
&& *partcount < MAXPARTS) { /*and spare part left */
|
986
|
+
bestpart = (*partcount)++; /*best was new one */
|
987
|
+
/*start new one */
|
988
|
+
partdiffs[bestpart] = diff - drift;
|
989
|
+
delta = 0.0f;
|
990
|
+
}
|
991
|
+
}
|
992
|
+
else {
|
993
|
+
bestpart = lastpart; /*best was last one */
|
994
|
+
}
|
995
|
+
|
996
|
+
if (bestpart == lastpart
|
997
|
+
&& (ABS (delta - lastdelta) < jumplimit / 2
|
998
|
+
|| ABS (delta) < jumplimit / 2))
|
999
|
+
/*smooth the drift */
|
1000
|
+
drift = (3 * drift + delta) / 3;
|
1001
|
+
lastdelta = delta;
|
1002
|
+
|
1003
|
+
if (textord_oldbl_debug) {
|
1004
|
+
tprintf ("P=%d\n", bestpart);
|
1005
|
+
}
|
1006
|
+
|
1007
|
+
return bestpart;
|
1008
|
+
}
|
1009
|
+
|
1010
|
+
|
1011
|
+
///*merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking
|
1012
|
+
//partitions and gives all the rest partid 0*/
|
1013
|
+
//
|
1014
|
+
//merge_partitions(partids,partcount,blobcount,bestpart)
|
1015
|
+
//register char *partids; /*partition numbers*/
|
1016
|
+
//int partcount; /*no of partitions*/
|
1017
|
+
//int blobcount; /*no of blobs*/
|
1018
|
+
//int bestpart; /*best partition*/
|
1019
|
+
//{
|
1020
|
+
// register int blobindex; /*no along text line*/
|
1021
|
+
// int runlength; /*run of same partition*/
|
1022
|
+
// int bestrun; /*biggest runlength*/
|
1023
|
+
//
|
1024
|
+
// bestrun=0; /*no runs yet*/
|
1025
|
+
// runlength=1;
|
1026
|
+
// for (blobindex=1;blobindex<blobcount;blobindex++)
|
1027
|
+
// { if (partids[blobindex]!=partids[blobindex-1])
|
1028
|
+
// { if (runlength>bestrun)
|
1029
|
+
// bestrun=runlength; /*find biggest run*/
|
1030
|
+
// runlength=1; /*new run*/
|
1031
|
+
// }
|
1032
|
+
// else
|
1033
|
+
// { runlength++;
|
1034
|
+
// }
|
1035
|
+
// }
|
1036
|
+
// if (runlength>bestrun)
|
1037
|
+
// bestrun=runlength;
|
1038
|
+
//
|
1039
|
+
// for (blobindex=0;blobindex<blobcount;blobindex++)
|
1040
|
+
// { if (blobindex<1
|
1041
|
+
// || partids[blobindex]!=partids[blobindex-1])
|
1042
|
+
// { if ((blobindex+1>=blobcount
|
1043
|
+
// || partids[blobindex]!=partids[blobindex+1])
|
1044
|
+
// /*loner*/
|
1045
|
+
// && (bestrun>2 || partids[blobindex]!=bestpart))
|
1046
|
+
// { partids[blobindex]=partcount; /*discard loner*/
|
1047
|
+
// }
|
1048
|
+
// else if (blobindex+1<blobcount
|
1049
|
+
// && partids[blobindex]==partids[blobindex+1]
|
1050
|
+
// /*pair*/
|
1051
|
+
// && (blobindex+2>=blobcount
|
1052
|
+
// || partids[blobindex]!=partids[blobindex+2])
|
1053
|
+
// && (bestrun>3 || partids[blobindex]!=bestpart))
|
1054
|
+
// { partids[blobindex]=partcount; /*discard both*/
|
1055
|
+
// partids[blobindex+1]=partcount;
|
1056
|
+
// }
|
1057
|
+
// }
|
1058
|
+
// }
|
1059
|
+
// for (blobindex=0;blobindex<blobcount;blobindex++)
|
1060
|
+
// { if (partids[blobindex]<partcount)
|
1061
|
+
// partids[blobindex]=0; /*all others together*/
|
1062
|
+
// }
|
1063
|
+
//}
|
1064
|
+
|
1065
|
+
/**********************************************************************
|
1066
|
+
* partition_coords
|
1067
|
+
*
|
1068
|
+
* Get the x,y coordinates of all points in the bestpart and put them
|
1069
|
+
* in xcoords,ycoords. Return the number of points found.
|
1070
|
+
**********************************************************************/
|
1071
|
+
|
1072
|
+
int
|
1073
|
+
partition_coords ( //find relevant coords
|
1074
|
+
TBOX blobcoords[], //bounding boxes
|
1075
|
+
int blobcount, /*no of blobs in row */
|
1076
|
+
char partids[], /*partition no of each blob */
|
1077
|
+
int bestpart, /*best new partition */
|
1078
|
+
int xcoords[], /*points to work on */
|
1079
|
+
int ycoords[] /*points to work on */
|
1080
|
+
) {
|
1081
|
+
register int blobindex; /*no along text line */
|
1082
|
+
int pointcount; /*no of points */
|
1083
|
+
|
1084
|
+
pointcount = 0;
|
1085
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
1086
|
+
if (partids[blobindex] == bestpart) {
|
1087
|
+
/*centre of blob */
|
1088
|
+
xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
|
1089
|
+
ycoords[pointcount++] = blobcoords[blobindex].bottom ();
|
1090
|
+
}
|
1091
|
+
}
|
1092
|
+
return pointcount; /*no of points found */
|
1093
|
+
}
|
1094
|
+
|
1095
|
+
|
1096
|
+
/**********************************************************************
|
1097
|
+
* segment_spline
|
1098
|
+
*
|
1099
|
+
* Segment the row at midpoints between maxima and minima of the x,y pairs.
|
1100
|
+
* The xstarts of the segments are returned and the number found.
|
1101
|
+
**********************************************************************/
|
1102
|
+
|
1103
|
+
int
|
1104
|
+
segment_spline ( //make xstarts
|
1105
|
+
TBOX blobcoords[], //boundign boxes
|
1106
|
+
int blobcount, /*no of blobs in row */
|
1107
|
+
int xcoords[], /*points to work on */
|
1108
|
+
int ycoords[], /*points to work on */
|
1109
|
+
int degree, int pointcount, /*no of points */
|
1110
|
+
int xstarts[] //result
|
1111
|
+
) {
|
1112
|
+
register int ptindex; /*no along text line */
|
1113
|
+
register int segment; /*partition no */
|
1114
|
+
int lastmin, lastmax; /*possible turn points */
|
1115
|
+
int turnpoints[SPLINESIZE]; /*good turning points */
|
1116
|
+
int turncount; /*no of turning points */
|
1117
|
+
int max_x; //max specified coord
|
1118
|
+
|
1119
|
+
xstarts[0] = xcoords[0] - 1; //leftmost defined pt
|
1120
|
+
max_x = xcoords[pointcount - 1] + 1;
|
1121
|
+
if (degree < 2)
|
1122
|
+
pointcount = 0;
|
1123
|
+
turncount = 0; /*no turning points yet */
|
1124
|
+
if (pointcount > 3) {
|
1125
|
+
ptindex = 1;
|
1126
|
+
lastmax = lastmin = 0; /*start with first one */
|
1127
|
+
while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
|
1128
|
+
/*minimum */
|
1129
|
+
if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
|
1130
|
+
if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
|
1131
|
+
if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
|
1132
|
+
/*new max point */
|
1133
|
+
turnpoints[turncount++] = lastmax;
|
1134
|
+
lastmin = ptindex; /*latest minimum */
|
1135
|
+
}
|
1136
|
+
else if (ycoords[ptindex] < ycoords[lastmin]) {
|
1137
|
+
lastmin = ptindex; /*lower minimum */
|
1138
|
+
}
|
1139
|
+
}
|
1140
|
+
|
1141
|
+
/*maximum */
|
1142
|
+
if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
|
1143
|
+
if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
|
1144
|
+
if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
|
1145
|
+
/*new min point */
|
1146
|
+
turnpoints[turncount++] = lastmin;
|
1147
|
+
lastmax = ptindex; /*latest maximum */
|
1148
|
+
}
|
1149
|
+
else if (ycoords[ptindex] > ycoords[lastmax]) {
|
1150
|
+
lastmax = ptindex; /*higher maximum */
|
1151
|
+
}
|
1152
|
+
}
|
1153
|
+
ptindex++;
|
1154
|
+
}
|
1155
|
+
/*possible global min */
|
1156
|
+
if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT
|
1157
|
+
&& (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
|
1158
|
+
if (turncount < SPLINESIZE - 1)
|
1159
|
+
/*2 more turns */
|
1160
|
+
turnpoints[turncount++] = lastmax;
|
1161
|
+
if (turncount < SPLINESIZE - 1)
|
1162
|
+
turnpoints[turncount++] = ptindex;
|
1163
|
+
}
|
1164
|
+
else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
|
1165
|
+
/*possible global max */
|
1166
|
+
&& (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
|
1167
|
+
if (turncount < SPLINESIZE - 1)
|
1168
|
+
/*2 more turns */
|
1169
|
+
turnpoints[turncount++] = lastmin;
|
1170
|
+
if (turncount < SPLINESIZE - 1)
|
1171
|
+
turnpoints[turncount++] = ptindex;
|
1172
|
+
}
|
1173
|
+
else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
|
1174
|
+
&& turncount < SPLINESIZE - 1) {
|
1175
|
+
if (ycoords[ptindex] > ycoords[lastmax])
|
1176
|
+
turnpoints[turncount++] = ptindex;
|
1177
|
+
else
|
1178
|
+
turnpoints[turncount++] = lastmax;
|
1179
|
+
}
|
1180
|
+
else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
|
1181
|
+
&& turncount < SPLINESIZE - 1) {
|
1182
|
+
if (ycoords[ptindex] < ycoords[lastmin])
|
1183
|
+
turnpoints[turncount++] = ptindex;
|
1184
|
+
else
|
1185
|
+
turnpoints[turncount++] = lastmin;
|
1186
|
+
}
|
1187
|
+
}
|
1188
|
+
|
1189
|
+
if (textord_oldbl_debug && turncount > 0)
|
1190
|
+
tprintf ("First turn is %d at (%d,%d)\n",
|
1191
|
+
turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
|
1192
|
+
for (segment = 1; segment < turncount; segment++) {
|
1193
|
+
/*centre y coord */
|
1194
|
+
lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
|
1195
|
+
|
1196
|
+
/* fix alg so that it works with both rising and falling sections */
|
1197
|
+
if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
|
1198
|
+
/*find rising y centre */
|
1199
|
+
for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++);
|
1200
|
+
else
|
1201
|
+
/*find falling y centre */
|
1202
|
+
for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++);
|
1203
|
+
|
1204
|
+
/*centre x */
|
1205
|
+
xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
|
1206
|
+
+ xcoords[turnpoints[segment - 1]]
|
1207
|
+
+ xcoords[turnpoints[segment]] + 2) / 4;
|
1208
|
+
/*halfway between turns */
|
1209
|
+
if (textord_oldbl_debug)
|
1210
|
+
tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
|
1211
|
+
segment, turnpoints[segment],
|
1212
|
+
xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
|
1213
|
+
ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
|
1214
|
+
}
|
1215
|
+
|
1216
|
+
xstarts[segment] = max_x;
|
1217
|
+
return segment; /*no of splines */
|
1218
|
+
}
|
1219
|
+
|
1220
|
+
|
1221
|
+
/**********************************************************************
|
1222
|
+
* split_stepped_spline
|
1223
|
+
*
|
1224
|
+
* Re-segment the spline in cases where there is a big step function.
|
1225
|
+
* Return TRUE if any were done.
|
1226
|
+
**********************************************************************/
|
1227
|
+
|
1228
|
+
BOOL8
|
1229
|
+
split_stepped_spline ( //make xstarts
|
1230
|
+
QSPLINE * baseline, //current shot
|
1231
|
+
float jumplimit, //max step fuction
|
1232
|
+
int xcoords[], /*points to work on */
|
1233
|
+
int xstarts[], //result
|
1234
|
+
int &segments //no of segments
|
1235
|
+
) {
|
1236
|
+
BOOL8 doneany; //return value
|
1237
|
+
register int segment; /*partition no */
|
1238
|
+
int startindex, centreindex, endindex;
|
1239
|
+
float leftcoord, rightcoord;
|
1240
|
+
int leftindex, rightindex;
|
1241
|
+
float step; //spline step
|
1242
|
+
|
1243
|
+
doneany = FALSE;
|
1244
|
+
startindex = 0;
|
1245
|
+
for (segment = 1; segment < segments - 1; segment++) {
|
1246
|
+
step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
|
1247
|
+
(xstarts[segment] + xstarts[segment + 1]) / 2.0);
|
1248
|
+
if (step < 0)
|
1249
|
+
step = -step;
|
1250
|
+
if (step > jumplimit) {
|
1251
|
+
while (xcoords[startindex] < xstarts[segment - 1])
|
1252
|
+
startindex++;
|
1253
|
+
centreindex = startindex;
|
1254
|
+
while (xcoords[centreindex] < xstarts[segment])
|
1255
|
+
centreindex++;
|
1256
|
+
endindex = centreindex;
|
1257
|
+
while (xcoords[endindex] < xstarts[segment + 1])
|
1258
|
+
endindex++;
|
1259
|
+
if (segments >= SPLINESIZE) {
|
1260
|
+
if (textord_debug_baselines)
|
1261
|
+
tprintf ("Too many segments to resegment spline!!\n");
|
1262
|
+
}
|
1263
|
+
else if (endindex - startindex >= textord_spline_medianwin * 3) {
|
1264
|
+
while (centreindex - startindex <
|
1265
|
+
textord_spline_medianwin * 3 / 2)
|
1266
|
+
centreindex++;
|
1267
|
+
while (endindex - centreindex <
|
1268
|
+
textord_spline_medianwin * 3 / 2)
|
1269
|
+
centreindex--;
|
1270
|
+
leftindex = (startindex + startindex + centreindex) / 3;
|
1271
|
+
rightindex = (centreindex + endindex + endindex) / 3;
|
1272
|
+
leftcoord =
|
1273
|
+
(xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
|
1274
|
+
rightcoord =
|
1275
|
+
(xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
|
1276
|
+
while (xcoords[leftindex] > leftcoord
|
1277
|
+
&& leftindex - startindex > textord_spline_medianwin)
|
1278
|
+
leftindex--;
|
1279
|
+
while (xcoords[leftindex] < leftcoord
|
1280
|
+
&& centreindex - leftindex >
|
1281
|
+
textord_spline_medianwin / 2)
|
1282
|
+
leftindex++;
|
1283
|
+
if (xcoords[leftindex] - leftcoord >
|
1284
|
+
leftcoord - xcoords[leftindex - 1])
|
1285
|
+
leftindex--;
|
1286
|
+
while (xcoords[rightindex] > rightcoord
|
1287
|
+
&& rightindex - centreindex >
|
1288
|
+
textord_spline_medianwin / 2)
|
1289
|
+
rightindex--;
|
1290
|
+
while (xcoords[rightindex] < rightcoord
|
1291
|
+
&& endindex - rightindex > textord_spline_medianwin)
|
1292
|
+
rightindex++;
|
1293
|
+
if (xcoords[rightindex] - rightcoord >
|
1294
|
+
rightcoord - xcoords[rightindex - 1])
|
1295
|
+
rightindex--;
|
1296
|
+
if (textord_debug_baselines)
|
1297
|
+
tprintf ("Splitting spline at %d with step %g at (%d,%d)\n",
|
1298
|
+
xstarts[segment],
|
1299
|
+
baseline->
|
1300
|
+
step ((xstarts[segment - 1] +
|
1301
|
+
xstarts[segment]) / 2.0,
|
1302
|
+
(xstarts[segment] +
|
1303
|
+
xstarts[segment + 1]) / 2.0),
|
1304
|
+
(xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
|
1305
|
+
(xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
|
1306
|
+
insert_spline_point (xstarts, segment,
|
1307
|
+
(xcoords[leftindex - 1] +
|
1308
|
+
xcoords[leftindex]) / 2,
|
1309
|
+
(xcoords[rightindex - 1] +
|
1310
|
+
xcoords[rightindex]) / 2, segments);
|
1311
|
+
doneany = TRUE;
|
1312
|
+
}
|
1313
|
+
else if (textord_debug_baselines) {
|
1314
|
+
tprintf
|
1315
|
+
("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
|
1316
|
+
startindex, centreindex, endindex,
|
1317
|
+
(inT32) textord_spline_medianwin);
|
1318
|
+
}
|
1319
|
+
}
|
1320
|
+
// else tprintf("Spline step at %d is %g\n",
|
1321
|
+
// xstarts[segment],
|
1322
|
+
// baseline->step((xstarts[segment-1]+xstarts[segment])/2.0,
|
1323
|
+
// (xstarts[segment]+xstarts[segment+1])/2.0));
|
1324
|
+
}
|
1325
|
+
return doneany;
|
1326
|
+
}
|
1327
|
+
|
1328
|
+
|
1329
|
+
/**********************************************************************
|
1330
|
+
* insert_spline_point
|
1331
|
+
*
|
1332
|
+
* Insert a new spline point and shuffle up the others.
|
1333
|
+
**********************************************************************/
|
1334
|
+
|
1335
|
+
void
|
1336
|
+
insert_spline_point ( //get descenders
|
1337
|
+
int xstarts[], //starts to shuffle
|
1338
|
+
int segment, //insertion pt
|
1339
|
+
int coord1, //coords to add
|
1340
|
+
int coord2, int &segments //total segments
|
1341
|
+
) {
|
1342
|
+
int index; //for shuffling
|
1343
|
+
|
1344
|
+
for (index = segments; index > segment; index--)
|
1345
|
+
xstarts[index + 1] = xstarts[index];
|
1346
|
+
segments++;
|
1347
|
+
xstarts[segment] = coord1;
|
1348
|
+
xstarts[segment + 1] = coord2;
|
1349
|
+
}
|
1350
|
+
|
1351
|
+
|
1352
|
+
/**********************************************************************
|
1353
|
+
* find_lesser_parts
|
1354
|
+
*
|
1355
|
+
* Average the step from the spline for the other partitions
|
1356
|
+
* and find the commonest partition which has a descender.
|
1357
|
+
**********************************************************************/
|
1358
|
+
|
1359
|
+
void
|
1360
|
+
find_lesser_parts ( //get descenders
|
1361
|
+
TO_ROW * row, //row to process
|
1362
|
+
TBOX blobcoords[], //bounding boxes
|
1363
|
+
int blobcount, /*no of blobs */
|
1364
|
+
char partids[], /*partition of each blob */
|
1365
|
+
int partsizes[], /*size of each part */
|
1366
|
+
int partcount, /*no of partitions */
|
1367
|
+
int bestpart /*biggest partition */
|
1368
|
+
) {
|
1369
|
+
register int blobindex; /*index of blob */
|
1370
|
+
register int partition; /*current partition */
|
1371
|
+
int xcentre; /*centre of blob */
|
1372
|
+
int poscount; /*count of best up step */
|
1373
|
+
int negcount; /*count of best down step */
|
1374
|
+
float partsteps[MAXPARTS]; /*average step to part */
|
1375
|
+
float bestpos; /*best up step */
|
1376
|
+
float bestneg; /*best down step */
|
1377
|
+
int runlength; /*length of bad run */
|
1378
|
+
int biggestrun; /*biggest bad run */
|
1379
|
+
|
1380
|
+
biggestrun = 0;
|
1381
|
+
for (partition = 0; partition < partcount; partition++)
|
1382
|
+
partsteps[partition] = 0.0; /*zero accumulators */
|
1383
|
+
for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
|
1384
|
+
xcentre = (blobcoords[blobindex].left ()
|
1385
|
+
+ blobcoords[blobindex].right ()) >> 1;
|
1386
|
+
/*in other parts */
|
1387
|
+
if (partids[blobindex] != bestpart) {
|
1388
|
+
runlength++; /*run of non bests */
|
1389
|
+
if (runlength > biggestrun)
|
1390
|
+
biggestrun = runlength;
|
1391
|
+
partsteps[partids[blobindex]] += blobcoords[blobindex].bottom ()
|
1392
|
+
- row->baseline.y (xcentre);
|
1393
|
+
}
|
1394
|
+
else
|
1395
|
+
runlength = 0;
|
1396
|
+
}
|
1397
|
+
if (biggestrun > MAXBADRUN)
|
1398
|
+
row->xheight = -1.0f; /*failed */
|
1399
|
+
else
|
1400
|
+
row->xheight = 1.0f; /*success */
|
1401
|
+
poscount = negcount = 0;
|
1402
|
+
bestpos = bestneg = 0.0; /*no step yet */
|
1403
|
+
for (partition = 0; partition < partcount; partition++) {
|
1404
|
+
if (partition != bestpart) {
|
1405
|
+
|
1406
|
+
//by jetsoft divide by zero possible
|
1407
|
+
if (partsizes[partition]==0)
|
1408
|
+
partsteps[partition]=0;
|
1409
|
+
else
|
1410
|
+
partsteps[partition] /= partsizes[partition];
|
1411
|
+
//
|
1412
|
+
|
1413
|
+
|
1414
|
+
if (partsteps[partition] >= MINASCRISE
|
1415
|
+
&& partsizes[partition] > poscount) {
|
1416
|
+
/*ascender rise */
|
1417
|
+
bestpos = partsteps[partition];
|
1418
|
+
/*2nd most popular */
|
1419
|
+
poscount = partsizes[partition];
|
1420
|
+
}
|
1421
|
+
if (partsteps[partition] <= -MINASCRISE
|
1422
|
+
&& partsizes[partition] > negcount) {
|
1423
|
+
/*ascender rise */
|
1424
|
+
bestneg = partsteps[partition];
|
1425
|
+
/*2nd most popular */
|
1426
|
+
negcount = partsizes[partition];
|
1427
|
+
}
|
1428
|
+
}
|
1429
|
+
}
|
1430
|
+
/*average x-height */
|
1431
|
+
partsteps[bestpart] /= blobcount;
|
1432
|
+
row->descdrop = bestneg;
|
1433
|
+
}
|
1434
|
+
|
1435
|
+
|
1436
|
+
/**********************************************************************
|
1437
|
+
* old_first_xheight
|
1438
|
+
*
|
1439
|
+
* Makes an x-height spline by copying the baseline and shifting it.
|
1440
|
+
* It estimates the x-height across the line to use as the shift.
|
1441
|
+
* It also finds the ascender height if it can.
|
1442
|
+
**********************************************************************/
|
1443
|
+
|
1444
|
+
void
|
1445
|
+
old_first_xheight ( //the wiseowl way
|
1446
|
+
TO_ROW * row, /*current row */
|
1447
|
+
TBOX blobcoords[], /*blob bounding boxes */
|
1448
|
+
int initialheight, //initial guess
|
1449
|
+
int blobcount, /*blobs in blobcoords */
|
1450
|
+
QSPLINE * baseline, /*established */
|
1451
|
+
float jumplimit /*min ascender height */
|
1452
|
+
) {
|
1453
|
+
register int blobindex; /*current blob */
|
1454
|
+
/*height statistics */
|
1455
|
+
STATS heightstat (0, MAXHEIGHT);
|
1456
|
+
int height; /*height of blob */
|
1457
|
+
int xcentre; /*centre of blob */
|
1458
|
+
int lineheight; /*approx xheight */
|
1459
|
+
float ascenders; /*ascender sum */
|
1460
|
+
int asccount; /*no of ascenders */
|
1461
|
+
float xsum; /*xheight sum */
|
1462
|
+
int xcount; /*xheight count */
|
1463
|
+
register float diff; /*height difference */
|
1464
|
+
|
1465
|
+
if (blobcount > 1) {
|
1466
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
1467
|
+
xcentre = (blobcoords[blobindex].left ()
|
1468
|
+
+ blobcoords[blobindex].right ()) / 2;
|
1469
|
+
/*height of blob */
|
1470
|
+
height = (int) (blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5);
|
1471
|
+
if (height > initialheight * oldbl_xhfract
|
1472
|
+
&& height > textord_min_xheight)
|
1473
|
+
heightstat.add (height, 1);
|
1474
|
+
}
|
1475
|
+
if (heightstat.get_total () > 3) {
|
1476
|
+
lineheight = (int) heightstat.ile (0.25);
|
1477
|
+
if (lineheight <= 0)
|
1478
|
+
lineheight = (int) heightstat.ile (0.5);
|
1479
|
+
}
|
1480
|
+
else
|
1481
|
+
lineheight = initialheight;
|
1482
|
+
}
|
1483
|
+
else {
|
1484
|
+
lineheight = (int) (blobcoords[0].top ()
|
1485
|
+
- baseline->y ((blobcoords[0].left ()
|
1486
|
+
+ blobcoords[0].right ()) / 2) +
|
1487
|
+
0.5);
|
1488
|
+
}
|
1489
|
+
|
1490
|
+
xsum = 0.0f;
|
1491
|
+
xcount = 0;
|
1492
|
+
for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
|
1493
|
+
blobindex++) {
|
1494
|
+
xcentre = (blobcoords[blobindex].left ()
|
1495
|
+
+ blobcoords[blobindex].right ()) / 2;
|
1496
|
+
diff = blobcoords[blobindex].top () - baseline->y (xcentre);
|
1497
|
+
/*is it ascender */
|
1498
|
+
if (diff > lineheight + jumplimit) {
|
1499
|
+
ascenders += diff;
|
1500
|
+
asccount++; /*count ascenders */
|
1501
|
+
}
|
1502
|
+
else if (diff > lineheight - jumplimit) {
|
1503
|
+
xsum += diff; /*mean xheight */
|
1504
|
+
xcount++;
|
1505
|
+
}
|
1506
|
+
}
|
1507
|
+
if (xcount > 0)
|
1508
|
+
xsum /= xcount; /*average xheight */
|
1509
|
+
else
|
1510
|
+
xsum = (float) lineheight; /*guess it */
|
1511
|
+
row->xheight *= xsum;
|
1512
|
+
if (asccount > 0)
|
1513
|
+
row->ascrise = ascenders / asccount - xsum;
|
1514
|
+
else
|
1515
|
+
row->ascrise = 0.0f; /*had none */
|
1516
|
+
if (row->xheight == 0)
|
1517
|
+
row->xheight = -1.0f;
|
1518
|
+
}
|
1519
|
+
|
1520
|
+
|
1521
|
+
/**********************************************************************
|
1522
|
+
* make_first_xheight
|
1523
|
+
*
|
1524
|
+
* Makes an x-height spline by copying the baseline and shifting it.
|
1525
|
+
* It estimates the x-height across the line to use as the shift.
|
1526
|
+
* It also finds the ascender height if it can.
|
1527
|
+
**********************************************************************/
|
1528
|
+
|
1529
|
+
void
|
1530
|
+
make_first_xheight ( //find xheight
|
1531
|
+
TO_ROW * row, /*current row */
|
1532
|
+
TBOX blobcoords[], /*blob bounding boxes */
|
1533
|
+
int lineheight, //initial guess
|
1534
|
+
int init_lineheight, //block level guess
|
1535
|
+
int blobcount, /*blobs in blobcoords */
|
1536
|
+
QSPLINE * baseline, /*established */
|
1537
|
+
float jumplimit /*min ascender height */
|
1538
|
+
) {
|
1539
|
+
STATS heightstat (0, HEIGHTBUCKETS);
|
1540
|
+
int lefts[HEIGHTBUCKETS];
|
1541
|
+
int rights[HEIGHTBUCKETS];
|
1542
|
+
int modelist[MODENUM];
|
1543
|
+
int blobindex;
|
1544
|
+
int mode_count; //blobs to count in thr
|
1545
|
+
int sign_bit;
|
1546
|
+
int mode_threshold;
|
1547
|
+
const int kBaselineTouch = 2; // This really should change with resolution.
|
1548
|
+
const int kGoodStrength = 8; // Strength of baseline-touching heights.
|
1549
|
+
const float kMinHeight = 0.25; // Min fraction of lineheight to use.
|
1550
|
+
|
1551
|
+
sign_bit = row->xheight > 0 ? 1 : -1;
|
1552
|
+
|
1553
|
+
memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0]));
|
1554
|
+
memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0]));
|
1555
|
+
mode_count = 0;
|
1556
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
1557
|
+
int xcenter = (blobcoords[blobindex].left () +
|
1558
|
+
blobcoords[blobindex].right ()) / 2;
|
1559
|
+
float base = baseline->y(xcenter);
|
1560
|
+
float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
|
1561
|
+
int strength = textord_ocropus_mode &&
|
1562
|
+
bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
|
1563
|
+
int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5);
|
1564
|
+
if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) {
|
1565
|
+
if (height > lineheight * oldbl_xhfract
|
1566
|
+
&& height > textord_min_xheight) {
|
1567
|
+
heightstat.add (height, strength);
|
1568
|
+
if (height < HEIGHTBUCKETS) {
|
1569
|
+
if (xcenter > rights[height])
|
1570
|
+
rights[height] = xcenter;
|
1571
|
+
if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height]))
|
1572
|
+
lefts[height] = xcenter;
|
1573
|
+
}
|
1574
|
+
}
|
1575
|
+
mode_count += strength;
|
1576
|
+
}
|
1577
|
+
}
|
1578
|
+
|
1579
|
+
mode_threshold = (int) (blobcount * 0.1);
|
1580
|
+
if (oldbl_dot_error_size > 1 || oldbl_xhfix)
|
1581
|
+
mode_threshold = (int) (mode_count * 0.1);
|
1582
|
+
|
1583
|
+
if (textord_oldbl_debug) {
|
1584
|
+
tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",
|
1585
|
+
blobcount, mode_count, mode_threshold);
|
1586
|
+
}
|
1587
|
+
find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
|
1588
|
+
if (textord_oldbl_debug) {
|
1589
|
+
for (blobindex = 0; blobindex < MODENUM; blobindex++)
|
1590
|
+
tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]);
|
1591
|
+
tprintf ("\n");
|
1592
|
+
}
|
1593
|
+
pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
|
1594
|
+
|
1595
|
+
if (textord_oldbl_debug)
|
1596
|
+
tprintf ("Output xheight=%g\n", row->xheight);
|
1597
|
+
if (row->xheight < 0 && textord_oldbl_debug)
|
1598
|
+
tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight);
|
1599
|
+
|
1600
|
+
if (sign_bit < 0)
|
1601
|
+
row->xheight = -row->xheight;
|
1602
|
+
}
|
1603
|
+
|
1604
|
+
/**********************************************************************
|
1605
|
+
* find_top_modes
|
1606
|
+
*
|
1607
|
+
* Fill the input array with the indices of the top ten modes of the
|
1608
|
+
* input distribution.
|
1609
|
+
**********************************************************************/
|
1610
|
+
|
1611
|
+
const int kMinModeFactorOcropus = 32;
|
1612
|
+
const int kMinModeFactor = 12;
|
1613
|
+
|
1614
|
+
void
|
1615
|
+
find_top_modes ( //get modes
|
1616
|
+
STATS * stats, //stats to hack
|
1617
|
+
int statnum, //no of piles
|
1618
|
+
int modelist[], int modenum //no of modes to get
|
1619
|
+
) {
|
1620
|
+
int mode_count;
|
1621
|
+
int last_i = 0;
|
1622
|
+
int last_max = MAX_INT32;
|
1623
|
+
int i;
|
1624
|
+
int mode;
|
1625
|
+
int total_max = 0;
|
1626
|
+
int mode_factor = textord_ocropus_mode ?
|
1627
|
+
kMinModeFactorOcropus : kMinModeFactor;
|
1628
|
+
|
1629
|
+
for (mode_count = 0; mode_count < modenum; mode_count++) {
|
1630
|
+
mode = 0;
|
1631
|
+
for (i = 0; i < statnum; i++) {
|
1632
|
+
if (stats->pile_count (i) > stats->pile_count (mode)) {
|
1633
|
+
if ((stats->pile_count (i) < last_max) ||
|
1634
|
+
((stats->pile_count (i) == last_max) && (i > last_i))) {
|
1635
|
+
mode = i;
|
1636
|
+
}
|
1637
|
+
}
|
1638
|
+
}
|
1639
|
+
last_i = mode;
|
1640
|
+
last_max = stats->pile_count (last_i);
|
1641
|
+
total_max += last_max;
|
1642
|
+
if (last_max <= total_max / mode_factor)
|
1643
|
+
mode = 0;
|
1644
|
+
modelist[mode_count] = mode;
|
1645
|
+
}
|
1646
|
+
}
|
1647
|
+
|
1648
|
+
|
1649
|
+
/**********************************************************************
|
1650
|
+
* pick_x_height
|
1651
|
+
*
|
1652
|
+
* Choose based on the height modes the best x height value.
|
1653
|
+
**********************************************************************/
|
1654
|
+
|
1655
|
+
void pick_x_height(TO_ROW * row, //row to do
|
1656
|
+
int modelist[],
|
1657
|
+
int lefts[], int rights[],
|
1658
|
+
STATS * heightstat,
|
1659
|
+
int mode_threshold) {
|
1660
|
+
int x;
|
1661
|
+
int y;
|
1662
|
+
int z;
|
1663
|
+
float ratio;
|
1664
|
+
int found_one_bigger = FALSE;
|
1665
|
+
int best_x_height = 0;
|
1666
|
+
int best_asc = 0;
|
1667
|
+
int num_in_best;
|
1668
|
+
|
1669
|
+
for (x = 0; x < MODENUM; x++) {
|
1670
|
+
for (y = 0; y < MODENUM; y++) {
|
1671
|
+
/* Check for two modes */
|
1672
|
+
if (modelist[x] && modelist[y] &&
|
1673
|
+
heightstat->pile_count (modelist[x]) > mode_threshold &&
|
1674
|
+
(!textord_ocropus_mode ||
|
1675
|
+
MIN(rights[modelist[x]], rights[modelist[y]]) >
|
1676
|
+
MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
|
1677
|
+
ratio = (float) modelist[y] / (float) modelist[x];
|
1678
|
+
if (1.2 < ratio && ratio < 1.8) {
|
1679
|
+
/* Two modes found */
|
1680
|
+
best_x_height = modelist[x];
|
1681
|
+
num_in_best = heightstat->pile_count (modelist[x]);
|
1682
|
+
|
1683
|
+
/* Try to get one higher */
|
1684
|
+
do {
|
1685
|
+
found_one_bigger = FALSE;
|
1686
|
+
for (z = 0; z < MODENUM; z++) {
|
1687
|
+
if (modelist[z] == best_x_height + 1 &&
|
1688
|
+
(!textord_ocropus_mode ||
|
1689
|
+
MIN(rights[modelist[x]], rights[modelist[y]]) >
|
1690
|
+
MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
|
1691
|
+
ratio = (float) modelist[y] / (float) modelist[z];
|
1692
|
+
if ((1.2 < ratio && ratio < 1.8) &&
|
1693
|
+
/* Should be half of best */
|
1694
|
+
heightstat->pile_count (modelist[z]) >
|
1695
|
+
num_in_best * 0.5) {
|
1696
|
+
best_x_height++;
|
1697
|
+
found_one_bigger = TRUE;
|
1698
|
+
break;
|
1699
|
+
}
|
1700
|
+
}
|
1701
|
+
}
|
1702
|
+
}
|
1703
|
+
while (found_one_bigger);
|
1704
|
+
|
1705
|
+
/* try to get a higher ascender */
|
1706
|
+
|
1707
|
+
best_asc = modelist[y];
|
1708
|
+
num_in_best = heightstat->pile_count (modelist[y]);
|
1709
|
+
|
1710
|
+
/* Try to get one higher */
|
1711
|
+
do {
|
1712
|
+
found_one_bigger = FALSE;
|
1713
|
+
for (z = 0; z < MODENUM; z++) {
|
1714
|
+
if (modelist[z] > best_asc &&
|
1715
|
+
(!textord_ocropus_mode ||
|
1716
|
+
MIN(rights[modelist[x]], rights[modelist[y]]) >
|
1717
|
+
MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
|
1718
|
+
ratio = (float) modelist[z] / (float) best_x_height;
|
1719
|
+
if ((1.2 < ratio && ratio < 1.8) &&
|
1720
|
+
/* Should be half of best */
|
1721
|
+
heightstat->pile_count (modelist[z]) >
|
1722
|
+
num_in_best * 0.5) {
|
1723
|
+
best_asc = modelist[z];
|
1724
|
+
found_one_bigger = TRUE;
|
1725
|
+
break;
|
1726
|
+
}
|
1727
|
+
}
|
1728
|
+
}
|
1729
|
+
}
|
1730
|
+
while (found_one_bigger);
|
1731
|
+
|
1732
|
+
row->xheight = (float) best_x_height;
|
1733
|
+
row->ascrise = (float) best_asc - best_x_height;
|
1734
|
+
return;
|
1735
|
+
}
|
1736
|
+
}
|
1737
|
+
}
|
1738
|
+
}
|
1739
|
+
|
1740
|
+
best_x_height = modelist[0]; /* Single Mode found */
|
1741
|
+
num_in_best = heightstat->pile_count (best_x_height);
|
1742
|
+
do {
|
1743
|
+
/* Try to get one higher */
|
1744
|
+
found_one_bigger = FALSE;
|
1745
|
+
for (z = 1; z < MODENUM; z++) {
|
1746
|
+
/* Should be half of best */
|
1747
|
+
if ((modelist[z] == best_x_height + 1) &&
|
1748
|
+
(heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) {
|
1749
|
+
best_x_height++;
|
1750
|
+
found_one_bigger = TRUE;
|
1751
|
+
break;
|
1752
|
+
}
|
1753
|
+
}
|
1754
|
+
}
|
1755
|
+
while (found_one_bigger);
|
1756
|
+
|
1757
|
+
row->ascrise = 0.0f;
|
1758
|
+
row->xheight = (float) best_x_height;
|
1759
|
+
if (row->xheight == 0)
|
1760
|
+
row->xheight = -1.0f;
|
1761
|
+
}
|