tesseract_bin 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/ext/tesseract_bin/extconf.rb +17 -0
- data/lib/tesseract_bin.rb +12 -0
- data/tesseract_bin.gemspec +660 -0
- data/test/helper.rb +18 -0
- data/test/test_tesseract_bin.rb +7 -0
- data/vendor/tesseract-2.04/AUTHORS +8 -0
- data/vendor/tesseract-2.04/COPYING +23 -0
- data/vendor/tesseract-2.04/ChangeLog +71 -0
- data/vendor/tesseract-2.04/INSTALL +229 -0
- data/vendor/tesseract-2.04/Makefile.am +20 -0
- data/vendor/tesseract-2.04/Makefile.in +641 -0
- data/vendor/tesseract-2.04/NEWS +1 -0
- data/vendor/tesseract-2.04/README +138 -0
- data/vendor/tesseract-2.04/ReleaseNotes +213 -0
- data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
- data/vendor/tesseract-2.04/StdAfx.h +24 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
- data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
- data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
- data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
- data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
- data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
- data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
- data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
- data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
- data/vendor/tesseract-2.04/ccmain/control.h +198 -0
- data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
- data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
- data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
- data/vendor/tesseract-2.04/ccmain/output.h +116 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
- data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
- data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
- data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
- data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
- data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
- data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
- data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
- data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
- data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
- data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
- data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
- data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
- data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
- data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
- data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
- data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
- data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
- data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
- data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
- data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
- data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
- data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
- data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
- data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
- data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
- data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
- data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
- data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
- data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
- data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
- data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
- data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
- data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
- data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
- data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
- data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
- data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
- data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
- data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
- data/vendor/tesseract-2.04/ccutil/host.h +180 -0
- data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
- data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
- data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
- data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
- data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
- data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
- data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
- data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
- data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
- data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
- data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
- data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
- data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
- data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
- data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
- data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
- data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
- data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
- data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
- data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
- data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
- data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
- data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
- data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
- data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
- data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
- data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
- data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
- data/vendor/tesseract-2.04/classify/baseline.h +91 -0
- data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
- data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
- data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
- data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
- data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
- data/vendor/tesseract-2.04/classify/cluster.h +158 -0
- data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
- data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
- data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
- data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
- data/vendor/tesseract-2.04/classify/extern.h +32 -0
- data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
- data/vendor/tesseract-2.04/classify/extract.h +36 -0
- data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
- data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
- data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
- data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
- data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
- data/vendor/tesseract-2.04/classify/float2int.h +65 -0
- data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
- data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
- data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
- data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
- data/vendor/tesseract-2.04/classify/fxid.h +69 -0
- data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
- data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
- data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
- data/vendor/tesseract-2.04/classify/intfx.h +63 -0
- data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
- data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
- data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
- data/vendor/tesseract-2.04/classify/intproto.h +320 -0
- data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
- data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
- data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
- data/vendor/tesseract-2.04/classify/mf.h +43 -0
- data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
- data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
- data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
- data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
- data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
- data/vendor/tesseract-2.04/classify/mfx.h +52 -0
- data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
- data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
- data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
- data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
- data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
- data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
- data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
- data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
- data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
- data/vendor/tesseract-2.04/classify/protos.h +258 -0
- data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
- data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
- data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
- data/vendor/tesseract-2.04/classify/speckle.h +69 -0
- data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
- data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
- data/vendor/tesseract-2.04/config/config.guess +1466 -0
- data/vendor/tesseract-2.04/config/config.h.in +188 -0
- data/vendor/tesseract-2.04/config/config.sub +1579 -0
- data/vendor/tesseract-2.04/config/depcomp +530 -0
- data/vendor/tesseract-2.04/config/install-sh +269 -0
- data/vendor/tesseract-2.04/config/missing +198 -0
- data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
- data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
- data/vendor/tesseract-2.04/configure +10424 -0
- data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
- data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
- data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
- data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
- data/vendor/tesseract-2.04/cutil/const.h +108 -0
- data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
- data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
- data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
- data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
- data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
- data/vendor/tesseract-2.04/cutil/debug.h +348 -0
- data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
- data/vendor/tesseract-2.04/cutil/efio.h +32 -0
- data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
- data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
- data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
- data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
- data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
- data/vendor/tesseract-2.04/cutil/general.h +33 -0
- data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
- data/vendor/tesseract-2.04/cutil/globals.h +70 -0
- data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
- data/vendor/tesseract-2.04/cutil/listio.h +43 -0
- data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
- data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
- data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
- data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
- data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
- data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
- data/vendor/tesseract-2.04/cutil/structures.h +112 -0
- data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
- data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
- data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
- data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
- data/vendor/tesseract-2.04/cutil/variables.h +170 -0
- data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
- data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
- data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
- data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
- data/vendor/tesseract-2.04/dict/choices.h +241 -0
- data/vendor/tesseract-2.04/dict/context.cpp +270 -0
- data/vendor/tesseract-2.04/dict/context.h +82 -0
- data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
- data/vendor/tesseract-2.04/dict/dawg.h +394 -0
- data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
- data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
- data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
- data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
- data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
- data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
- data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
- data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
- data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
- data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
- data/vendor/tesseract-2.04/dict/permngram.h +33 -0
- data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
- data/vendor/tesseract-2.04/dict/permnum.h +83 -0
- data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
- data/vendor/tesseract-2.04/dict/permute.h +93 -0
- data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
- data/vendor/tesseract-2.04/dict/reduce.h +112 -0
- data/vendor/tesseract-2.04/dict/states.cpp +382 -0
- data/vendor/tesseract-2.04/dict/states.h +111 -0
- data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
- data/vendor/tesseract-2.04/dict/stopper.h +103 -0
- data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
- data/vendor/tesseract-2.04/dict/trie.h +190 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
- data/vendor/tesseract-2.04/eurotext.tif +0 -0
- data/vendor/tesseract-2.04/image/Makefile.am +10 -0
- data/vendor/tesseract-2.04/image/Makefile.in +596 -0
- data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
- data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
- data/vendor/tesseract-2.04/image/img.h +336 -0
- data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
- data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
- data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
- data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
- data/vendor/tesseract-2.04/image/imgio.h +22 -0
- data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
- data/vendor/tesseract-2.04/image/imgs.h +102 -0
- data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
- data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
- data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
- data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
- data/vendor/tesseract-2.04/image/svshowim.h +25 -0
- data/vendor/tesseract-2.04/java/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
- data/vendor/tesseract-2.04/java/makefile +55 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
- data/vendor/tesseract-2.04/phototest.tif +0 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
- data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
- data/vendor/tesseract-2.04/tessdata/confsets +3 -0
- data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
- data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
- data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
- data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
- data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
- data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
- data/vendor/tesseract-2.04/tessdll.cpp +351 -0
- data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
- data/vendor/tesseract-2.04/tessdll.h +143 -0
- data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
- data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
- data/vendor/tesseract-2.04/tesseract.dsw +116 -0
- data/vendor/tesseract-2.04/tesseract.sln +59 -0
- data/vendor/tesseract-2.04/tesseract.spec +188 -0
- data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
- data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
- data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
- data/vendor/tesseract-2.04/testing/README +43 -0
- data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
- data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
- data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
- data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
- data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
- data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
- data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
- data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
- data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
- data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
- data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
- data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
- data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
- data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
- data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
- data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
- data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
- data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
- data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
- data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
- data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
- data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
- data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
- data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
- data/vendor/tesseract-2.04/textord/makerow.h +295 -0
- data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
- data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
- data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
- data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
- data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
- data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
- data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
- data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
- data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
- data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
- data/vendor/tesseract-2.04/textord/tessout.h +76 -0
- data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
- data/vendor/tesseract-2.04/textord/topitch.h +195 -0
- data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
- data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
- data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
- data/vendor/tesseract-2.04/textord/tospace.h +193 -0
- data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
- data/vendor/tesseract-2.04/textord/tovars.h +94 -0
- data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
- data/vendor/tesseract-2.04/textord/underlin.h +53 -0
- data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
- data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
- data/vendor/tesseract-2.04/training/Makefile.am +54 -0
- data/vendor/tesseract-2.04/training/Makefile.in +720 -0
- data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
- data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
- data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
- data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
- data/vendor/tesseract-2.04/training/mergenf.h +106 -0
- data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
- data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
- data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
- data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
- data/vendor/tesseract-2.04/training/name2char.h +38 -0
- data/vendor/tesseract-2.04/training/training.cpp +190 -0
- data/vendor/tesseract-2.04/training/training.h +130 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
- data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
- data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
- data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
- data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
- data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
- data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
- data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
- data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
- data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
- data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
- data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
- data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
- data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
- data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
- data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
- data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
- data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
- data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
- data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
- data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
- data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
- data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
- data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
- data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
- data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
- data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
- data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
- data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
- data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
- data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
- data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
- data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
- data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
- data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
- data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
- data/vendor/tesseract-2.04/wordrec/render.h +58 -0
- data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
- data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
- data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
- data/vendor/tesseract-2.04/wordrec/split.h +115 -0
- data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
- data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
- data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
- data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
- metadata +708 -0
|
@@ -0,0 +1,1761 @@
|
|
|
1
|
+
/**********************************************************************
|
|
2
|
+
* File: oldbasel.cpp (Formerly oldbl.c)
|
|
3
|
+
* Description: A re-implementation of the old baseline algorithm.
|
|
4
|
+
* Author: Ray Smith
|
|
5
|
+
* Created: Wed Oct 6 09:41:48 BST 1993
|
|
6
|
+
*
|
|
7
|
+
* (C) Copyright 1993, Hewlett-Packard Ltd.
|
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
** you may not use this file except in compliance with the License.
|
|
10
|
+
** You may obtain a copy of the License at
|
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
** See the License for the specific language governing permissions and
|
|
16
|
+
** limitations under the License.
|
|
17
|
+
*
|
|
18
|
+
**********************************************************************/
|
|
19
|
+
|
|
20
|
+
#include "mfcpch.h"
|
|
21
|
+
#include "statistc.h"
|
|
22
|
+
#include "quadlsq.h"
|
|
23
|
+
#include "lmedsq.h"
|
|
24
|
+
#include "makerow.h"
|
|
25
|
+
#include "drawtord.h"
|
|
26
|
+
#include "oldbasel.h"
|
|
27
|
+
#include "tprintf.h"
|
|
28
|
+
|
|
29
|
+
#define EXTERN
|
|
30
|
+
|
|
31
|
+
EXTERN BOOL_VAR (textord_really_old_xheight, FALSE,
|
|
32
|
+
"Use original wiseowl xheight");
|
|
33
|
+
EXTERN BOOL_VAR (textord_oldbl_debug, FALSE, "Debug old baseline generation");
|
|
34
|
+
EXTERN BOOL_VAR (textord_debug_baselines, FALSE, "Debug baseline generation");
|
|
35
|
+
EXTERN BOOL_VAR (textord_oldbl_paradef, TRUE, "Use para default mechanism");
|
|
36
|
+
EXTERN BOOL_VAR (textord_oldbl_split_splines, TRUE, "Split stepped splines");
|
|
37
|
+
EXTERN BOOL_VAR (textord_oldbl_merge_parts, TRUE, "Merge suspect partitions");
|
|
38
|
+
EXTERN BOOL_VAR (oldbl_corrfix, TRUE, "Improve correlation of heights");
|
|
39
|
+
EXTERN BOOL_VAR (oldbl_xhfix, FALSE,
|
|
40
|
+
"Fix bug in modes threshold for xheights");
|
|
41
|
+
EXTERN BOOL_VAR(textord_ocropus_mode, FALSE, "Make baselines for ocropus");
|
|
42
|
+
EXTERN double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc");
|
|
43
|
+
EXTERN INT_VAR (oldbl_holed_losscount, 10,
|
|
44
|
+
"Max lost before fallback line used");
|
|
45
|
+
EXTERN double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot");
|
|
46
|
+
EXTERN double_VAR (textord_oldbl_jumplimit, 0.15,
|
|
47
|
+
"X fraction for new partition");
|
|
48
|
+
|
|
49
|
+
#define TURNLIMIT 1 /*min size for turning point */
|
|
50
|
+
#define X_HEIGHT_FRACTION 0.7 /*x-height/caps height */
|
|
51
|
+
#define DESCENDER_FRACTION 0.5 /*descender/x-height */
|
|
52
|
+
#define MIN_ASC_FRACTION 0.20 /*min size of ascenders */
|
|
53
|
+
#define MIN_DESC_FRACTION 0.25 /*min size of descenders */
|
|
54
|
+
#define MINASCRISE 2.0 /*min ascender/desc step */
|
|
55
|
+
#define MAXHEIGHTVARIANCE 0.15 /*accepted variation in x-height */
|
|
56
|
+
#define MAXHEIGHT 300 /*max blob height */
|
|
57
|
+
#define MAXOVERLAP 0.1 /*max 10% missed overlap */
|
|
58
|
+
#define MAXBADRUN 2 /*max non best for failed */
|
|
59
|
+
#define HEIGHTBUCKETS 200 /* Num of buckets */
|
|
60
|
+
#define DELTAHEIGHT 5.0 /* Small amount of diff */
|
|
61
|
+
#define GOODHEIGHT 5
|
|
62
|
+
#define MAXLOOPS 10
|
|
63
|
+
#define MODENUM 10
|
|
64
|
+
#define MAXPARTS 6
|
|
65
|
+
#define SPLINESIZE 23
|
|
66
|
+
|
|
67
|
+
#define ABS(x) ((x)<0 ? (-(x)) : (x))
|
|
68
|
+
|
|
69
|
+
/**********************************************************************
|
|
70
|
+
* make_old_baselines
|
|
71
|
+
*
|
|
72
|
+
* Top level function to make baselines the old way.
|
|
73
|
+
**********************************************************************/
|
|
74
|
+
|
|
75
|
+
void make_old_baselines( //make splines
|
|
76
|
+
TO_BLOCK *block, //block to do
|
|
77
|
+
BOOL8 testing_on //correct orientation
|
|
78
|
+
) {
|
|
79
|
+
QSPLINE *prev_baseline; //baseline of previous row
|
|
80
|
+
TO_ROW *row; //current row
|
|
81
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
82
|
+
BLOBNBOX_IT blob_it;
|
|
83
|
+
|
|
84
|
+
prev_baseline = NULL; //nothing yet
|
|
85
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
|
86
|
+
row = row_it.data ();
|
|
87
|
+
find_textlines (block, row, 2, NULL);
|
|
88
|
+
if (row->xheight <= 0 && prev_baseline != NULL)
|
|
89
|
+
find_textlines (block, row, 2, prev_baseline);
|
|
90
|
+
if (row->xheight > 0)
|
|
91
|
+
//was a good one
|
|
92
|
+
prev_baseline = &row->baseline;
|
|
93
|
+
else {
|
|
94
|
+
prev_baseline = NULL;
|
|
95
|
+
blob_it.set_to_list (row->blob_list ());
|
|
96
|
+
if (textord_debug_baselines)
|
|
97
|
+
tprintf ("Row baseline generation failed on row at (%d,%d)\n",
|
|
98
|
+
blob_it.data ()->bounding_box ().left (),
|
|
99
|
+
blob_it.data ()->bounding_box ().bottom ());
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
correlate_lines(block);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
/**********************************************************************
|
|
107
|
+
* correlate_lines
|
|
108
|
+
*
|
|
109
|
+
* Correlate the x-heights and ascender heights of a block to fill-in
|
|
110
|
+
* the ascender height and descender height for rows without one.
|
|
111
|
+
* Also fix baselines of rows without a decent fit.
|
|
112
|
+
**********************************************************************/
|
|
113
|
+
|
|
114
|
+
void correlate_lines( //cleanup lines
|
|
115
|
+
TO_BLOCK *block //block to do
|
|
116
|
+
) {
|
|
117
|
+
TO_ROW **rows; //array of ptrs
|
|
118
|
+
int rowcount; /*no of rows to do */
|
|
119
|
+
register int rowindex; /*no of row */
|
|
120
|
+
//iterator
|
|
121
|
+
TO_ROW_IT row_it = block->get_rows ();
|
|
122
|
+
|
|
123
|
+
rowcount = row_it.length ();
|
|
124
|
+
if (rowcount == 0) {
|
|
125
|
+
//default value
|
|
126
|
+
block->xheight = block->line_size;
|
|
127
|
+
return; /*none to do */
|
|
128
|
+
}
|
|
129
|
+
rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
|
|
130
|
+
rowindex = 0;
|
|
131
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
|
|
132
|
+
//make array
|
|
133
|
+
rows[rowindex++] = row_it.data ();
|
|
134
|
+
|
|
135
|
+
/*try to fix bad lines */
|
|
136
|
+
correlate_neighbours(block, rows, rowcount);
|
|
137
|
+
|
|
138
|
+
block->xheight = (float) correlate_with_stats (rows, rowcount);
|
|
139
|
+
/*use stats */
|
|
140
|
+
if (block->xheight <= 0)
|
|
141
|
+
//desperate
|
|
142
|
+
block->xheight = block->line_size * textord_merge_x;
|
|
143
|
+
if (block->xheight < textord_min_xheight)
|
|
144
|
+
block->xheight = (float) textord_min_xheight;
|
|
145
|
+
|
|
146
|
+
free_mem(rows);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
/**********************************************************************
|
|
151
|
+
* correlate_neighbours
|
|
152
|
+
*
|
|
153
|
+
* Try to fix rows that had a bad spline fit by using neighbours.
|
|
154
|
+
**********************************************************************/
|
|
155
|
+
|
|
156
|
+
void correlate_neighbours( //fix bad rows
|
|
157
|
+
TO_BLOCK *block, /*block rows are in */
|
|
158
|
+
TO_ROW **rows, /*rows of block */
|
|
159
|
+
int rowcount /*no of rows to do */
|
|
160
|
+
) {
|
|
161
|
+
TO_ROW *row; /*current row */
|
|
162
|
+
register int rowindex; /*no of row */
|
|
163
|
+
register int otherrow; /*second row */
|
|
164
|
+
int upperrow; /*row above to use */
|
|
165
|
+
int lowerrow; /*row below to use */
|
|
166
|
+
float biggest;
|
|
167
|
+
|
|
168
|
+
for (rowindex = 0; rowindex < rowcount; rowindex++) {
|
|
169
|
+
row = rows[rowindex]; /*current row */
|
|
170
|
+
if (row->xheight < 0) {
|
|
171
|
+
/*quadratic failed */
|
|
172
|
+
for (otherrow = rowindex - 2;
|
|
173
|
+
otherrow >= 0
|
|
174
|
+
&& (rows[otherrow]->xheight < 0.0
|
|
175
|
+
|| !row->baseline.overlap (&rows[otherrow]->baseline,
|
|
176
|
+
MAXOVERLAP)); otherrow--);
|
|
177
|
+
upperrow = otherrow; /*decent row above */
|
|
178
|
+
for (otherrow = rowindex + 1;
|
|
179
|
+
otherrow < rowcount
|
|
180
|
+
&& (rows[otherrow]->xheight < 0.0
|
|
181
|
+
|| !row->baseline.overlap (&rows[otherrow]->baseline,
|
|
182
|
+
MAXOVERLAP)); otherrow++);
|
|
183
|
+
lowerrow = otherrow; /*decent row below */
|
|
184
|
+
if (upperrow >= 0)
|
|
185
|
+
find_textlines (block, row, 2, &rows[upperrow]->baseline);
|
|
186
|
+
if (row->xheight < 0 && lowerrow < rowcount)
|
|
187
|
+
find_textlines (block, row, 2, &rows[lowerrow]->baseline);
|
|
188
|
+
if (row->xheight < 0) {
|
|
189
|
+
if (upperrow >= 0)
|
|
190
|
+
find_textlines (block, row, 1, &rows[upperrow]->baseline);
|
|
191
|
+
else if (lowerrow < rowcount)
|
|
192
|
+
find_textlines (block, row, 1, &rows[lowerrow]->baseline);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
|
|
198
|
+
row = rows[rowindex]; /*current row */
|
|
199
|
+
if (row->xheight < 0) /*linear failed */
|
|
200
|
+
/*make do */
|
|
201
|
+
row->xheight = -row->xheight;
|
|
202
|
+
biggest = MAX (biggest, row->xheight);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
/**********************************************************************
|
|
208
|
+
* correlate_with_stats
|
|
209
|
+
*
|
|
210
|
+
* correlate the x-heights and ascender heights of a block to fill-in
|
|
211
|
+
* the ascender height and descender height for rows without one.
|
|
212
|
+
**********************************************************************/
|
|
213
|
+
|
|
214
|
+
int correlate_with_stats( //fix xheights
|
|
215
|
+
TO_ROW **rows, /*rows of block */
|
|
216
|
+
int rowcount /*no of rows to do */
|
|
217
|
+
) {
|
|
218
|
+
TO_ROW *row; /*current row */
|
|
219
|
+
register int rowindex; /*no of row */
|
|
220
|
+
float lineheight; /*mean x-height */
|
|
221
|
+
float ascheight; /*average ascenders */
|
|
222
|
+
float minascheight; /*min allowed ascheight */
|
|
223
|
+
int xcount; /*no of samples for xheight */
|
|
224
|
+
float fullheight; /*mean top height */
|
|
225
|
+
int fullcount; /*no of samples */
|
|
226
|
+
float descheight; /*mean descender drop */
|
|
227
|
+
float mindescheight; /*min allowed descheight */
|
|
228
|
+
int desccount; /*no of samples */
|
|
229
|
+
float xshift; /*shift in xheight */
|
|
230
|
+
|
|
231
|
+
/*no samples */
|
|
232
|
+
xcount = fullcount = desccount = 0;
|
|
233
|
+
lineheight = ascheight = fullheight = descheight = 0.0;
|
|
234
|
+
for (rowindex = 0; rowindex < rowcount; rowindex++) {
|
|
235
|
+
row = rows[rowindex]; /*current row */
|
|
236
|
+
if (row->ascrise > 0.0) { /*got ascenders? */
|
|
237
|
+
lineheight += row->xheight;/*average x-heights */
|
|
238
|
+
ascheight += row->ascrise; /*average ascenders */
|
|
239
|
+
xcount++;
|
|
240
|
+
}
|
|
241
|
+
else {
|
|
242
|
+
fullheight += row->xheight;/*assume full height */
|
|
243
|
+
fullcount++;
|
|
244
|
+
}
|
|
245
|
+
if (row->descdrop < 0.0) { /*got descenders? */
|
|
246
|
+
/*average descenders */
|
|
247
|
+
descheight += row->descdrop;
|
|
248
|
+
desccount++;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {
|
|
253
|
+
lineheight /= xcount; /*average x-height */
|
|
254
|
+
/*average caps height */
|
|
255
|
+
fullheight = lineheight + ascheight / xcount;
|
|
256
|
+
/*must be decent size */
|
|
257
|
+
if (fullheight < lineheight * (1 + MIN_ASC_FRACTION))
|
|
258
|
+
fullheight = lineheight * (1 + MIN_ASC_FRACTION);
|
|
259
|
+
}
|
|
260
|
+
else {
|
|
261
|
+
fullheight /= fullcount; /*average max height */
|
|
262
|
+
/*guess x-height */
|
|
263
|
+
lineheight = fullheight * X_HEIGHT_FRACTION;
|
|
264
|
+
}
|
|
265
|
+
if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2))
|
|
266
|
+
descheight /= desccount; /*average descenders */
|
|
267
|
+
else
|
|
268
|
+
/*guess descenders */
|
|
269
|
+
descheight = -lineheight * DESCENDER_FRACTION;
|
|
270
|
+
|
|
271
|
+
minascheight = lineheight * MIN_ASC_FRACTION;
|
|
272
|
+
mindescheight = -lineheight * MIN_DESC_FRACTION;
|
|
273
|
+
for (rowindex = 0; rowindex < rowcount; rowindex++) {
|
|
274
|
+
row = rows[rowindex]; /*do each row */
|
|
275
|
+
row->all_caps = FALSE;
|
|
276
|
+
if (row->ascrise / row->xheight < MIN_ASC_FRACTION) {
|
|
277
|
+
/*no ascenders */
|
|
278
|
+
if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
|
|
279
|
+
&& row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) {
|
|
280
|
+
row->ascrise = fullheight - lineheight;
|
|
281
|
+
/*shift in x */
|
|
282
|
+
xshift = lineheight - row->xheight;
|
|
283
|
+
/*set to average */
|
|
284
|
+
row->xheight = lineheight;
|
|
285
|
+
|
|
286
|
+
}
|
|
287
|
+
else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE)
|
|
288
|
+
&& row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) {
|
|
289
|
+
row->ascrise = row->xheight - lineheight;
|
|
290
|
+
xshift = -row->ascrise; /*shift in x */
|
|
291
|
+
/*set to average */
|
|
292
|
+
row->xheight = lineheight;
|
|
293
|
+
row->all_caps = TRUE;
|
|
294
|
+
}
|
|
295
|
+
else {
|
|
296
|
+
row->ascrise = (fullheight - lineheight) * row->xheight
|
|
297
|
+
/ fullheight;
|
|
298
|
+
xshift = -row->ascrise; /*shift in x */
|
|
299
|
+
/*scale it */
|
|
300
|
+
row->xheight -= row->ascrise;
|
|
301
|
+
row->all_caps = TRUE;
|
|
302
|
+
}
|
|
303
|
+
if (row->ascrise < minascheight)
|
|
304
|
+
row->ascrise =
|
|
305
|
+
row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION);
|
|
306
|
+
}
|
|
307
|
+
if (row->descdrop > mindescheight) {
|
|
308
|
+
if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
|
|
309
|
+
&& row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE))
|
|
310
|
+
/*set to average */
|
|
311
|
+
row->descdrop = descheight;
|
|
312
|
+
else
|
|
313
|
+
row->descdrop = -row->xheight * DESCENDER_FRACTION;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
return (int) lineheight; //block xheight
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
/**********************************************************************
|
|
321
|
+
* find_textlines
|
|
322
|
+
*
|
|
323
|
+
* Compute the baseline for the given row.
|
|
324
|
+
**********************************************************************/
|
|
325
|
+
|
|
326
|
+
void find_textlines( //get baseline
|
|
327
|
+
TO_BLOCK *block, //block row is in
|
|
328
|
+
TO_ROW *row, //row to do
|
|
329
|
+
int degree, //required approximation
|
|
330
|
+
QSPLINE *spline //starting spline
|
|
331
|
+
) {
|
|
332
|
+
int partcount; /*no of partitions of */
|
|
333
|
+
BOOL8 holed_line; //lost too many blobs
|
|
334
|
+
int bestpart; /*biggest partition */
|
|
335
|
+
char *partids; /*partition no of each blob */
|
|
336
|
+
int partsizes[MAXPARTS]; /*no in each partition */
|
|
337
|
+
int lineheight; /*guessed x-height */
|
|
338
|
+
float jumplimit; /*allowed delta change */
|
|
339
|
+
int *xcoords; /*useful sample points */
|
|
340
|
+
int *ycoords; /*useful sample points */
|
|
341
|
+
TBOX *blobcoords; /*edges of blob rectangles */
|
|
342
|
+
int blobcount; /*no of blobs on line */
|
|
343
|
+
float *ydiffs; /*diffs from 1st approx */
|
|
344
|
+
int pointcount; /*no of coords */
|
|
345
|
+
int xstarts[SPLINESIZE + 1]; //segment boundaries
|
|
346
|
+
int segments; //no of segments
|
|
347
|
+
|
|
348
|
+
//no of blobs in row
|
|
349
|
+
blobcount = row->blob_list ()->length ();
|
|
350
|
+
partids = (char *) alloc_mem (blobcount * sizeof (char));
|
|
351
|
+
xcoords = (int *) alloc_mem (blobcount * sizeof (int));
|
|
352
|
+
ycoords = (int *) alloc_mem (blobcount * sizeof (int));
|
|
353
|
+
blobcoords = (TBOX *) alloc_mem (blobcount * sizeof (TBOX));
|
|
354
|
+
ydiffs = (float *) alloc_mem (blobcount * sizeof (float));
|
|
355
|
+
|
|
356
|
+
lineheight = get_blob_coords (row, (int) block->line_size, blobcoords,
|
|
357
|
+
holed_line, blobcount);
|
|
358
|
+
/*limit for line change */
|
|
359
|
+
jumplimit = lineheight * textord_oldbl_jumplimit;
|
|
360
|
+
if (jumplimit < MINASCRISE)
|
|
361
|
+
jumplimit = MINASCRISE;
|
|
362
|
+
|
|
363
|
+
if (textord_oldbl_debug) {
|
|
364
|
+
tprintf
|
|
365
|
+
("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n",
|
|
366
|
+
block->line_size, lineheight, jumplimit);
|
|
367
|
+
}
|
|
368
|
+
if (holed_line)
|
|
369
|
+
make_holed_baseline (blobcoords, blobcount, spline, &row->baseline,
|
|
370
|
+
row->line_m ());
|
|
371
|
+
else
|
|
372
|
+
make_first_baseline (blobcoords, blobcount,
|
|
373
|
+
xcoords, ycoords, spline, &row->baseline, jumplimit);
|
|
374
|
+
#ifndef GRAPHICS_DISABLED
|
|
375
|
+
if (textord_show_final_rows)
|
|
376
|
+
row->baseline.plot (to_win, ScrollView::GOLDENROD);
|
|
377
|
+
#endif
|
|
378
|
+
if (blobcount > 1) {
|
|
379
|
+
bestpart = partition_line (blobcoords, blobcount,
|
|
380
|
+
&partcount, partids, partsizes,
|
|
381
|
+
&row->baseline, jumplimit, ydiffs);
|
|
382
|
+
pointcount = partition_coords (blobcoords, blobcount,
|
|
383
|
+
partids, bestpart, xcoords, ycoords);
|
|
384
|
+
segments = segment_spline (blobcoords, blobcount,
|
|
385
|
+
xcoords, ycoords,
|
|
386
|
+
degree, pointcount, xstarts);
|
|
387
|
+
if (!holed_line) {
|
|
388
|
+
do {
|
|
389
|
+
row->baseline = QSPLINE (xstarts, segments,
|
|
390
|
+
xcoords, ycoords, pointcount, degree);
|
|
391
|
+
}
|
|
392
|
+
while (textord_oldbl_split_splines
|
|
393
|
+
&& split_stepped_spline (&row->baseline, jumplimit / 2,
|
|
394
|
+
xcoords, xstarts, segments));
|
|
395
|
+
}
|
|
396
|
+
find_lesser_parts(row,
|
|
397
|
+
blobcoords,
|
|
398
|
+
blobcount,
|
|
399
|
+
partids,
|
|
400
|
+
partsizes,
|
|
401
|
+
partcount,
|
|
402
|
+
bestpart);
|
|
403
|
+
|
|
404
|
+
}
|
|
405
|
+
else {
|
|
406
|
+
row->xheight = -1.0f; /*failed */
|
|
407
|
+
row->descdrop = 0.0f;
|
|
408
|
+
row->ascrise = 0.0f;
|
|
409
|
+
}
|
|
410
|
+
row->baseline.extrapolate (row->line_m (),
|
|
411
|
+
block->block->bounding_box ().left (),
|
|
412
|
+
block->block->bounding_box ().right ());
|
|
413
|
+
if (textord_really_old_xheight)
|
|
414
|
+
old_first_xheight (row, blobcoords, lineheight,
|
|
415
|
+
blobcount, &row->baseline, jumplimit);
|
|
416
|
+
else
|
|
417
|
+
make_first_xheight (row, blobcoords, lineheight, (int) block->line_size,
|
|
418
|
+
blobcount, &row->baseline, jumplimit);
|
|
419
|
+
free_mem(partids);
|
|
420
|
+
free_mem(xcoords);
|
|
421
|
+
free_mem(ycoords);
|
|
422
|
+
free_mem(blobcoords);
|
|
423
|
+
free_mem(ydiffs);
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
/**********************************************************************
|
|
428
|
+
* get_blob_coords
|
|
429
|
+
*
|
|
430
|
+
* Fill the blobcoords array with the coordinates of the blobs
|
|
431
|
+
* in the row. The return value is the first guess atthe line height.
|
|
432
|
+
**********************************************************************/
|
|
433
|
+
|
|
434
|
+
int get_blob_coords( //get boxes
|
|
435
|
+
TO_ROW *row, //row to use
|
|
436
|
+
inT32 lineheight, //block level
|
|
437
|
+
TBOX *blobcoords, //ouput boxes
|
|
438
|
+
BOOL8 &holed_line, //lost a lot of blobs
|
|
439
|
+
int &outcount //no of real blobs
|
|
440
|
+
) {
|
|
441
|
+
//blobs
|
|
442
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
|
443
|
+
register int blobindex; /*no along text line */
|
|
444
|
+
int losscount; //lost blobs
|
|
445
|
+
int maxlosscount; //greatest lost blobs
|
|
446
|
+
/*height stat collection */
|
|
447
|
+
STATS heightstat (0, MAXHEIGHT);
|
|
448
|
+
|
|
449
|
+
if (blob_it.empty ())
|
|
450
|
+
return 0; //none
|
|
451
|
+
maxlosscount = 0;
|
|
452
|
+
losscount = 0;
|
|
453
|
+
blob_it.mark_cycle_pt ();
|
|
454
|
+
blobindex = 0;
|
|
455
|
+
do {
|
|
456
|
+
blobcoords[blobindex] = box_next_pre_chopped (&blob_it);
|
|
457
|
+
if (blobcoords[blobindex].height () > lineheight * 0.25)
|
|
458
|
+
heightstat.add (blobcoords[blobindex].height (), 1);
|
|
459
|
+
if (blobindex == 0
|
|
460
|
+
|| blobcoords[blobindex].height () > lineheight * 0.25
|
|
461
|
+
|| blob_it.cycled_list ()) {
|
|
462
|
+
blobindex++; /*no of merged blobs */
|
|
463
|
+
losscount = 0;
|
|
464
|
+
}
|
|
465
|
+
else {
|
|
466
|
+
if (blobcoords[blobindex].height ()
|
|
467
|
+
< blobcoords[blobindex].width () * oldbl_dot_error_size
|
|
468
|
+
&& blobcoords[blobindex].width ()
|
|
469
|
+
< blobcoords[blobindex].height () * oldbl_dot_error_size) {
|
|
470
|
+
//counts as dot
|
|
471
|
+
blobindex++;
|
|
472
|
+
losscount = 0;
|
|
473
|
+
}
|
|
474
|
+
else {
|
|
475
|
+
losscount++; //lost it
|
|
476
|
+
if (losscount > maxlosscount)
|
|
477
|
+
//remember max
|
|
478
|
+
maxlosscount = losscount;
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
while (!blob_it.cycled_list ());
|
|
483
|
+
|
|
484
|
+
holed_line = maxlosscount > oldbl_holed_losscount;
|
|
485
|
+
outcount = blobindex; /*total blobs */
|
|
486
|
+
|
|
487
|
+
if (heightstat.get_total () > 1)
|
|
488
|
+
/*guess x-height */
|
|
489
|
+
return (int) heightstat.ile (0.25);
|
|
490
|
+
else
|
|
491
|
+
return blobcoords[0].height ();
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
/**********************************************************************
|
|
496
|
+
* make_first_baseline
|
|
497
|
+
*
|
|
498
|
+
* Make the first estimate at a baseline, either by shifting
|
|
499
|
+
* a supplied previous spline, or by doing a piecewise linear
|
|
500
|
+
* approximation using all the blobs.
|
|
501
|
+
**********************************************************************/
|
|
502
|
+
|
|
503
|
+
void
|
|
504
|
+
make_first_baseline ( //initial approximation
|
|
505
|
+
TBOX blobcoords[], /*blob bounding boxes */
|
|
506
|
+
int blobcount, /*no of blobcoords */
|
|
507
|
+
int xcoords[], /*coords for spline */
|
|
508
|
+
int ycoords[], /*approximator */
|
|
509
|
+
QSPLINE * spline, /*initial spline */
|
|
510
|
+
QSPLINE * baseline, /*output spline */
|
|
511
|
+
float jumplimit /*guess half descenders */
|
|
512
|
+
) {
|
|
513
|
+
int leftedge; /*left edge of line */
|
|
514
|
+
int rightedge; /*right edge of line */
|
|
515
|
+
int blobindex; /*current blob */
|
|
516
|
+
int segment; /*current segment */
|
|
517
|
+
float prevy, thisy, nexty; /*3 y coords */
|
|
518
|
+
float y1, y2, y3; /*3 smooth blobs */
|
|
519
|
+
float maxmax, minmin; /*absolute limits */
|
|
520
|
+
int x2 = 0; /*right edge of old y3 */
|
|
521
|
+
int ycount; /*no of ycoords in use */
|
|
522
|
+
float yturns[SPLINESIZE]; /*y coords of turn pts */
|
|
523
|
+
int xturns[SPLINESIZE]; /*xcoords of turn pts */
|
|
524
|
+
int xstarts[SPLINESIZE + 1];
|
|
525
|
+
int segments; //no of segments
|
|
526
|
+
ICOORD shift; //shift of spline
|
|
527
|
+
|
|
528
|
+
prevy = 0;
|
|
529
|
+
/*left edge of row */
|
|
530
|
+
leftedge = blobcoords[0].left ();
|
|
531
|
+
/*right edge of line */
|
|
532
|
+
rightedge = blobcoords[blobcount - 1].right ();
|
|
533
|
+
if (spline == NULL /*no given spline */
|
|
534
|
+
|| spline->segments < 3 /*or trivial */
|
|
535
|
+
/*or too non-overlap */
|
|
536
|
+
|| spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge)
|
|
537
|
+
|| spline->xcoords[spline->segments - 1] < rightedge
|
|
538
|
+
- MAXOVERLAP * (rightedge - leftedge)) {
|
|
539
|
+
if (textord_oldbl_paradef)
|
|
540
|
+
return; //use default
|
|
541
|
+
xstarts[0] = blobcoords[0].left () - 1;
|
|
542
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
|
543
|
+
xcoords[blobindex] = (blobcoords[blobindex].left ()
|
|
544
|
+
+ blobcoords[blobindex].right ()) / 2;
|
|
545
|
+
ycoords[blobindex] = blobcoords[blobindex].bottom ();
|
|
546
|
+
}
|
|
547
|
+
xstarts[1] = blobcoords[blobcount - 1].right () + 1;
|
|
548
|
+
segments = 1; /*no of segments */
|
|
549
|
+
|
|
550
|
+
/*linear */
|
|
551
|
+
*baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
|
|
552
|
+
|
|
553
|
+
if (blobcount >= 3) {
|
|
554
|
+
y1 = y2 = y3 = 0.0f;
|
|
555
|
+
ycount = 0;
|
|
556
|
+
segment = 0; /*no of segments */
|
|
557
|
+
maxmax = minmin = 0.0f;
|
|
558
|
+
thisy = ycoords[0] - baseline->y (xcoords[0]);
|
|
559
|
+
nexty = ycoords[1] - baseline->y (xcoords[1]);
|
|
560
|
+
for (blobindex = 2; blobindex < blobcount; blobindex++) {
|
|
561
|
+
prevy = thisy; /*shift ycoords */
|
|
562
|
+
thisy = nexty;
|
|
563
|
+
nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]);
|
|
564
|
+
/*middle of smooth y */
|
|
565
|
+
if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) {
|
|
566
|
+
y1 = y2; /*shift window */
|
|
567
|
+
y2 = y3;
|
|
568
|
+
y3 = thisy; /*middle point */
|
|
569
|
+
ycount++;
|
|
570
|
+
/*local max */
|
|
571
|
+
if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
|
|
572
|
+
/*local min */
|
|
573
|
+
|| (y1 > y2 && y2 <= y3))) {
|
|
574
|
+
if (segment < SPLINESIZE - 2) {
|
|
575
|
+
/*turning pt */
|
|
576
|
+
xturns[segment] = x2;
|
|
577
|
+
yturns[segment] = y2;
|
|
578
|
+
segment++; /*no of spline segs */
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
if (ycount == 1) {
|
|
582
|
+
maxmax = minmin = y3;/*initialise limits */
|
|
583
|
+
}
|
|
584
|
+
else {
|
|
585
|
+
if (y3 > maxmax)
|
|
586
|
+
maxmax = y3; /*biggest max */
|
|
587
|
+
if (y3 < minmin)
|
|
588
|
+
minmin = y3; /*smallest min */
|
|
589
|
+
}
|
|
590
|
+
/*possible turning pt */
|
|
591
|
+
x2 = blobcoords[blobindex - 1].right ();
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
jumplimit *= 1.2;
|
|
596
|
+
/*must be wavy */
|
|
597
|
+
if (maxmax - minmin > jumplimit) {
|
|
598
|
+
ycount = segment; /*no of segments */
|
|
599
|
+
for (blobindex = 0, segment = 1; blobindex < ycount;
|
|
600
|
+
blobindex++) {
|
|
601
|
+
if (yturns[blobindex] > minmin + jumplimit
|
|
602
|
+
|| yturns[blobindex] < maxmax - jumplimit) {
|
|
603
|
+
/*significant peak */
|
|
604
|
+
if (segment == 1
|
|
605
|
+
|| yturns[blobindex] > prevy + jumplimit
|
|
606
|
+
|| yturns[blobindex] < prevy - jumplimit) {
|
|
607
|
+
/*different to previous */
|
|
608
|
+
xstarts[segment] = xturns[blobindex];
|
|
609
|
+
segment++;
|
|
610
|
+
prevy = yturns[blobindex];
|
|
611
|
+
}
|
|
612
|
+
/*bigger max */
|
|
613
|
+
else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
|
|
614
|
+
/*smaller min */
|
|
615
|
+
|| (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
|
|
616
|
+
xstarts[segment - 1] = xturns[blobindex];
|
|
617
|
+
/*improved previous */
|
|
618
|
+
prevy = yturns[blobindex];
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
xstarts[segment] = blobcoords[blobcount - 1].right () + 1;
|
|
623
|
+
segments = segment; /*no of segments */
|
|
624
|
+
/*linear */
|
|
625
|
+
*baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
else {
|
|
630
|
+
*baseline = *spline; /*copy it */
|
|
631
|
+
shift = ICOORD (0, (inT16) (blobcoords[0].bottom ()
|
|
632
|
+
- spline->y (blobcoords[0].right ())));
|
|
633
|
+
baseline->move (shift);
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
/**********************************************************************
|
|
639
|
+
* make_holed_baseline
|
|
640
|
+
*
|
|
641
|
+
* Make the first estimate at a baseline, either by shifting
|
|
642
|
+
* a supplied previous spline, or by doing a piecewise linear
|
|
643
|
+
* approximation using all the blobs.
|
|
644
|
+
**********************************************************************/
|
|
645
|
+
|
|
646
|
+
void
|
|
647
|
+
make_holed_baseline ( //initial approximation
|
|
648
|
+
TBOX blobcoords[], /*blob bounding boxes */
|
|
649
|
+
int blobcount, /*no of blobcoords */
|
|
650
|
+
QSPLINE * spline, /*initial spline */
|
|
651
|
+
QSPLINE * baseline, /*output spline */
|
|
652
|
+
float gradient //of line
|
|
653
|
+
) {
|
|
654
|
+
int leftedge; /*left edge of line */
|
|
655
|
+
int rightedge; /*right edge of line */
|
|
656
|
+
int blobindex; /*current blob */
|
|
657
|
+
float x; //centre of row
|
|
658
|
+
ICOORD shift; //shift of spline
|
|
659
|
+
|
|
660
|
+
LMS lms(blobcount); //straight baseline
|
|
661
|
+
inT32 xstarts[2]; //straight line
|
|
662
|
+
double coeffs[3];
|
|
663
|
+
float c; //line parameter
|
|
664
|
+
|
|
665
|
+
/*left edge of row */
|
|
666
|
+
leftedge = blobcoords[0].left ();
|
|
667
|
+
/*right edge of line */
|
|
668
|
+
rightedge = blobcoords[blobcount - 1].right ();
|
|
669
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
|
670
|
+
lms.add (FCOORD ((blobcoords[blobindex].left () +
|
|
671
|
+
blobcoords[blobindex].right ()) / 2.0,
|
|
672
|
+
blobcoords[blobindex].bottom ()));
|
|
673
|
+
}
|
|
674
|
+
lms.constrained_fit (gradient, c);
|
|
675
|
+
xstarts[0] = leftedge;
|
|
676
|
+
xstarts[1] = rightedge;
|
|
677
|
+
coeffs[0] = 0;
|
|
678
|
+
coeffs[1] = gradient;
|
|
679
|
+
coeffs[2] = c;
|
|
680
|
+
*baseline = QSPLINE (1, xstarts, coeffs);
|
|
681
|
+
if (spline != NULL /*no given spline */
|
|
682
|
+
&& spline->segments >= 3 /*or trivial */
|
|
683
|
+
/*or too non-overlap */
|
|
684
|
+
&& spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge)
|
|
685
|
+
&& spline->xcoords[spline->segments - 1] >= rightedge
|
|
686
|
+
- MAXOVERLAP * (rightedge - leftedge)) {
|
|
687
|
+
*baseline = *spline; /*copy it */
|
|
688
|
+
x = (leftedge + rightedge) / 2.0;
|
|
689
|
+
shift = ICOORD (0, (inT16) (gradient * x + c - spline->y (x)));
|
|
690
|
+
baseline->move (shift);
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
/**********************************************************************
|
|
696
|
+
* partition_line
|
|
697
|
+
*
|
|
698
|
+
* Partition a row of blobs into different groups of continuous
|
|
699
|
+
* y position. jumplimit specifies the max allowable limit on a jump
|
|
700
|
+
* before a new partition is started.
|
|
701
|
+
* The return value is the biggest partition
|
|
702
|
+
**********************************************************************/
|
|
703
|
+
|
|
704
|
+
int
|
|
705
|
+
partition_line ( //partition blobs
|
|
706
|
+
TBOX blobcoords[], //bounding boxes
|
|
707
|
+
int blobcount, /*no of blobs on row */
|
|
708
|
+
int *numparts, /*number of partitions */
|
|
709
|
+
char partids[], /*partition no of each blob */
|
|
710
|
+
int partsizes[], /*no in each partition */
|
|
711
|
+
QSPLINE * spline, /*curve to fit to */
|
|
712
|
+
float jumplimit, /*allowed delta change */
|
|
713
|
+
float ydiffs[] /*diff from spline */
|
|
714
|
+
) {
|
|
715
|
+
register int blobindex; /*no along text line */
|
|
716
|
+
int bestpart; /*best new partition */
|
|
717
|
+
int biggestpart; /*part with most members */
|
|
718
|
+
float diff; /*difference from line */
|
|
719
|
+
int startx; /*index of start blob */
|
|
720
|
+
float partdiffs[MAXPARTS]; /*step between parts */
|
|
721
|
+
|
|
722
|
+
for (bestpart = 0; bestpart < MAXPARTS; bestpart++)
|
|
723
|
+
partsizes[bestpart] = 0; /*zero them all */
|
|
724
|
+
|
|
725
|
+
startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs);
|
|
726
|
+
*numparts = 1; /*1 partition */
|
|
727
|
+
bestpart = -1; /*first point */
|
|
728
|
+
for (blobindex = startx; blobindex < blobcount; blobindex++) {
|
|
729
|
+
/*do each blob in row */
|
|
730
|
+
diff = ydiffs[blobindex]; /*diff from line */
|
|
731
|
+
if (textord_oldbl_debug) {
|
|
732
|
+
tprintf ("%d(%d,%d), ", blobindex,
|
|
733
|
+
blobcoords[blobindex].left (),
|
|
734
|
+
blobcoords[blobindex].bottom ());
|
|
735
|
+
}
|
|
736
|
+
bestpart =
|
|
737
|
+
choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
|
|
738
|
+
/*record partition */
|
|
739
|
+
partids[blobindex] = bestpart;
|
|
740
|
+
partsizes[bestpart]++; /*another in it */
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
bestpart = -1; /*first point */
|
|
744
|
+
partsizes[0]--; /*doing 1st pt again */
|
|
745
|
+
/*do each blob in row */
|
|
746
|
+
for (blobindex = startx; blobindex >= 0; blobindex--) {
|
|
747
|
+
diff = ydiffs[blobindex]; /*diff from line */
|
|
748
|
+
if (textord_oldbl_debug) {
|
|
749
|
+
tprintf ("%d(%d,%d), ", blobindex,
|
|
750
|
+
blobcoords[blobindex].left (),
|
|
751
|
+
blobcoords[blobindex].bottom ());
|
|
752
|
+
}
|
|
753
|
+
bestpart =
|
|
754
|
+
choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
|
|
755
|
+
/*record partition */
|
|
756
|
+
partids[blobindex] = bestpart;
|
|
757
|
+
partsizes[bestpart]++; /*another in it */
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
|
|
761
|
+
if (partsizes[bestpart] >= partsizes[biggestpart])
|
|
762
|
+
biggestpart = bestpart; /*new biggest */
|
|
763
|
+
if (textord_oldbl_merge_parts)
|
|
764
|
+
merge_oldbl_parts(blobcoords,
|
|
765
|
+
blobcount,
|
|
766
|
+
partids,
|
|
767
|
+
partsizes,
|
|
768
|
+
biggestpart,
|
|
769
|
+
jumplimit);
|
|
770
|
+
return biggestpart; /*biggest partition */
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
/**********************************************************************
|
|
775
|
+
* merge_oldbl_parts
|
|
776
|
+
*
|
|
777
|
+
* For any adjacent group of blobs in a different part, put them in the
|
|
778
|
+
* main part if they fit closely to neighbours in the main part.
|
|
779
|
+
**********************************************************************/
|
|
780
|
+
|
|
781
|
+
void
|
|
782
|
+
merge_oldbl_parts ( //partition blobs
|
|
783
|
+
TBOX blobcoords[], //bounding boxes
|
|
784
|
+
int blobcount, /*no of blobs on row */
|
|
785
|
+
char partids[], /*partition no of each blob */
|
|
786
|
+
int partsizes[], /*no in each partition */
|
|
787
|
+
int biggestpart, //major partition
|
|
788
|
+
float jumplimit /*allowed delta change */
|
|
789
|
+
) {
|
|
790
|
+
BOOL8 found_one; //found a bestpart blob
|
|
791
|
+
BOOL8 close_one; //found was close enough
|
|
792
|
+
register int blobindex; /*no along text line */
|
|
793
|
+
int prevpart; //previous iteration
|
|
794
|
+
int runlength; //no in this part
|
|
795
|
+
float diff; /*difference from line */
|
|
796
|
+
int startx; /*index of start blob */
|
|
797
|
+
int test_blob; //another index
|
|
798
|
+
FCOORD coord; //blob coordinate
|
|
799
|
+
float m, c; //fitted line
|
|
800
|
+
QLSQ stats; //line stuff
|
|
801
|
+
|
|
802
|
+
prevpart = biggestpart;
|
|
803
|
+
runlength = 0;
|
|
804
|
+
startx = 0;
|
|
805
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
|
806
|
+
if (partids[blobindex] != prevpart) {
|
|
807
|
+
// tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n",
|
|
808
|
+
// blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
|
|
809
|
+
// prevpart,partids[blobindex],runlength);
|
|
810
|
+
if (prevpart != biggestpart && runlength > MAXBADRUN) {
|
|
811
|
+
stats.clear ();
|
|
812
|
+
for (test_blob = startx; test_blob < blobindex; test_blob++) {
|
|
813
|
+
coord = FCOORD ((blobcoords[test_blob].left ()
|
|
814
|
+
+ blobcoords[test_blob].right ()) / 2.0,
|
|
815
|
+
blobcoords[test_blob].bottom ());
|
|
816
|
+
stats.add (coord.x (), coord.y ());
|
|
817
|
+
}
|
|
818
|
+
stats.fit (1);
|
|
819
|
+
m = stats.get_b ();
|
|
820
|
+
c = stats.get_c ();
|
|
821
|
+
if (textord_oldbl_debug)
|
|
822
|
+
tprintf ("Fitted line y=%g x + %g\n", m, c);
|
|
823
|
+
found_one = FALSE;
|
|
824
|
+
close_one = FALSE;
|
|
825
|
+
for (test_blob = 1; !found_one
|
|
826
|
+
&& (startx - test_blob >= 0
|
|
827
|
+
|| blobindex + test_blob <= blobcount); test_blob++) {
|
|
828
|
+
if (startx - test_blob >= 0
|
|
829
|
+
&& partids[startx - test_blob] == biggestpart) {
|
|
830
|
+
found_one = TRUE;
|
|
831
|
+
coord = FCOORD ((blobcoords[startx - test_blob].left ()
|
|
832
|
+
+ blobcoords[startx -
|
|
833
|
+
test_blob].right ()) /
|
|
834
|
+
2.0,
|
|
835
|
+
blobcoords[startx -
|
|
836
|
+
test_blob].bottom ());
|
|
837
|
+
diff = m * coord.x () + c - coord.y ();
|
|
838
|
+
if (textord_oldbl_debug)
|
|
839
|
+
tprintf
|
|
840
|
+
("Diff of common blob to suspect part=%g at (%g,%g)\n",
|
|
841
|
+
diff, coord.x (), coord.y ());
|
|
842
|
+
if (diff < jumplimit && -diff < jumplimit)
|
|
843
|
+
close_one = TRUE;
|
|
844
|
+
}
|
|
845
|
+
if (blobindex + test_blob <= blobcount
|
|
846
|
+
&& partids[blobindex + test_blob - 1] == biggestpart) {
|
|
847
|
+
found_one = TRUE;
|
|
848
|
+
coord =
|
|
849
|
+
FCOORD ((blobcoords[blobindex + test_blob - 1].
|
|
850
|
+
left () + blobcoords[blobindex + test_blob -
|
|
851
|
+
1].right ()) / 2.0,
|
|
852
|
+
blobcoords[blobindex + test_blob -
|
|
853
|
+
1].bottom ());
|
|
854
|
+
diff = m * coord.x () + c - coord.y ();
|
|
855
|
+
if (textord_oldbl_debug)
|
|
856
|
+
tprintf
|
|
857
|
+
("Diff of common blob to suspect part=%g at (%g,%g)\n",
|
|
858
|
+
diff, coord.x (), coord.y ());
|
|
859
|
+
if (diff < jumplimit && -diff < jumplimit)
|
|
860
|
+
close_one = TRUE;
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
if (close_one) {
|
|
864
|
+
if (textord_oldbl_debug)
|
|
865
|
+
tprintf
|
|
866
|
+
("Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
|
|
867
|
+
runlength, biggestpart, prevpart,
|
|
868
|
+
blobcoords[startx].left (),
|
|
869
|
+
blobcoords[startx].bottom ());
|
|
870
|
+
//switch sides
|
|
871
|
+
partsizes[prevpart] -= runlength;
|
|
872
|
+
for (test_blob = startx; test_blob < blobindex; test_blob++)
|
|
873
|
+
partids[test_blob] = biggestpart;
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
prevpart = partids[blobindex];
|
|
877
|
+
runlength = 1;
|
|
878
|
+
startx = blobindex;
|
|
879
|
+
}
|
|
880
|
+
else
|
|
881
|
+
runlength++;
|
|
882
|
+
}
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
/**********************************************************************
|
|
887
|
+
* get_ydiffs
|
|
888
|
+
*
|
|
889
|
+
* Get the differences between the blobs and the spline,
|
|
890
|
+
* putting them in ydiffs. The return value is the index
|
|
891
|
+
* of the blob in the middle of the "best behaved" region
|
|
892
|
+
**********************************************************************/
|
|
893
|
+
|
|
894
|
+
int
|
|
895
|
+
get_ydiffs ( //evaluate differences
|
|
896
|
+
TBOX blobcoords[], //bounding boxes
|
|
897
|
+
int blobcount, /*no of blobs */
|
|
898
|
+
QSPLINE * spline, /*approximating spline */
|
|
899
|
+
float ydiffs[] /*output */
|
|
900
|
+
) {
|
|
901
|
+
register int blobindex; /*current blob */
|
|
902
|
+
int xcentre; /*xcoord */
|
|
903
|
+
int lastx; /*last xcentre */
|
|
904
|
+
float diffsum; /*sum of diffs */
|
|
905
|
+
float diff; /*current difference */
|
|
906
|
+
float drift; /*sum of spline steps */
|
|
907
|
+
float bestsum; /*smallest diffsum */
|
|
908
|
+
int bestindex; /*index of bestsum */
|
|
909
|
+
|
|
910
|
+
diffsum = 0.0f;
|
|
911
|
+
bestindex = 0;
|
|
912
|
+
bestsum = (float) MAX_INT32;
|
|
913
|
+
drift = 0.0f;
|
|
914
|
+
lastx = blobcoords[0].left ();
|
|
915
|
+
/*do each blob in row */
|
|
916
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
|
917
|
+
/*centre of blob */
|
|
918
|
+
xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
|
|
919
|
+
//step functions in spline
|
|
920
|
+
drift += spline->step (lastx, xcentre);
|
|
921
|
+
lastx = xcentre;
|
|
922
|
+
diff = blobcoords[blobindex].bottom ();
|
|
923
|
+
diff -= spline->y (xcentre);
|
|
924
|
+
diff += drift;
|
|
925
|
+
ydiffs[blobindex] = diff; /*store difference */
|
|
926
|
+
if (blobindex > 2)
|
|
927
|
+
/*remove old one */
|
|
928
|
+
diffsum -= ABS (ydiffs[blobindex - 3]);
|
|
929
|
+
diffsum += ABS (diff); /*add new one */
|
|
930
|
+
if (blobindex >= 2 && diffsum < bestsum) {
|
|
931
|
+
bestsum = diffsum; /*find min sum */
|
|
932
|
+
bestindex = blobindex - 1; /*middle of set */
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
return bestindex;
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
/**********************************************************************
|
|
940
|
+
* choose_partition
|
|
941
|
+
*
|
|
942
|
+
* Choose a partition for the point and return the index.
|
|
943
|
+
**********************************************************************/
|
|
944
|
+
|
|
945
|
+
int
|
|
946
|
+
choose_partition ( //select partition
|
|
947
|
+
register float diff, /*diff from spline */
|
|
948
|
+
float partdiffs[], /*diff on all parts */
|
|
949
|
+
int lastpart, /*last assigned partition */
|
|
950
|
+
float jumplimit, /*new part threshold */
|
|
951
|
+
int *partcount /*no of partitions */
|
|
952
|
+
) {
|
|
953
|
+
register int partition; /*partition no */
|
|
954
|
+
int bestpart; /*best new partition */
|
|
955
|
+
float bestdelta; /*best gap from a part */
|
|
956
|
+
static float drift; /*drift from spline */
|
|
957
|
+
float delta; /*diff from part */
|
|
958
|
+
static float lastdelta; /*previous delta */
|
|
959
|
+
|
|
960
|
+
if (lastpart < 0) {
|
|
961
|
+
partdiffs[0] = diff;
|
|
962
|
+
lastpart = 0; /*first point */
|
|
963
|
+
drift = 0.0f;
|
|
964
|
+
lastdelta = 0.0f;
|
|
965
|
+
}
|
|
966
|
+
/*adjusted diff from part */
|
|
967
|
+
delta = diff - partdiffs[lastpart] - drift;
|
|
968
|
+
if (textord_oldbl_debug) {
|
|
969
|
+
tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, drift);
|
|
970
|
+
}
|
|
971
|
+
if (ABS (delta) > jumplimit / 2) {
|
|
972
|
+
/*delta on part 0 */
|
|
973
|
+
bestdelta = diff - partdiffs[0] - drift;
|
|
974
|
+
bestpart = 0; /*0 best so far */
|
|
975
|
+
for (partition = 1; partition < *partcount; partition++) {
|
|
976
|
+
delta = diff - partdiffs[partition] - drift;
|
|
977
|
+
if (ABS (delta) < ABS (bestdelta)) {
|
|
978
|
+
bestdelta = delta;
|
|
979
|
+
bestpart = partition; /*part with nearest jump */
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
delta = bestdelta;
|
|
983
|
+
/*too far away */
|
|
984
|
+
if (ABS (bestdelta) > jumplimit
|
|
985
|
+
&& *partcount < MAXPARTS) { /*and spare part left */
|
|
986
|
+
bestpart = (*partcount)++; /*best was new one */
|
|
987
|
+
/*start new one */
|
|
988
|
+
partdiffs[bestpart] = diff - drift;
|
|
989
|
+
delta = 0.0f;
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
else {
|
|
993
|
+
bestpart = lastpart; /*best was last one */
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
if (bestpart == lastpart
|
|
997
|
+
&& (ABS (delta - lastdelta) < jumplimit / 2
|
|
998
|
+
|| ABS (delta) < jumplimit / 2))
|
|
999
|
+
/*smooth the drift */
|
|
1000
|
+
drift = (3 * drift + delta) / 3;
|
|
1001
|
+
lastdelta = delta;
|
|
1002
|
+
|
|
1003
|
+
if (textord_oldbl_debug) {
|
|
1004
|
+
tprintf ("P=%d\n", bestpart);
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
return bestpart;
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
///*merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking
|
|
1012
|
+
//partitions and gives all the rest partid 0*/
|
|
1013
|
+
//
|
|
1014
|
+
//merge_partitions(partids,partcount,blobcount,bestpart)
|
|
1015
|
+
//register char *partids; /*partition numbers*/
|
|
1016
|
+
//int partcount; /*no of partitions*/
|
|
1017
|
+
//int blobcount; /*no of blobs*/
|
|
1018
|
+
//int bestpart; /*best partition*/
|
|
1019
|
+
//{
|
|
1020
|
+
// register int blobindex; /*no along text line*/
|
|
1021
|
+
// int runlength; /*run of same partition*/
|
|
1022
|
+
// int bestrun; /*biggest runlength*/
|
|
1023
|
+
//
|
|
1024
|
+
// bestrun=0; /*no runs yet*/
|
|
1025
|
+
// runlength=1;
|
|
1026
|
+
// for (blobindex=1;blobindex<blobcount;blobindex++)
|
|
1027
|
+
// { if (partids[blobindex]!=partids[blobindex-1])
|
|
1028
|
+
// { if (runlength>bestrun)
|
|
1029
|
+
// bestrun=runlength; /*find biggest run*/
|
|
1030
|
+
// runlength=1; /*new run*/
|
|
1031
|
+
// }
|
|
1032
|
+
// else
|
|
1033
|
+
// { runlength++;
|
|
1034
|
+
// }
|
|
1035
|
+
// }
|
|
1036
|
+
// if (runlength>bestrun)
|
|
1037
|
+
// bestrun=runlength;
|
|
1038
|
+
//
|
|
1039
|
+
// for (blobindex=0;blobindex<blobcount;blobindex++)
|
|
1040
|
+
// { if (blobindex<1
|
|
1041
|
+
// || partids[blobindex]!=partids[blobindex-1])
|
|
1042
|
+
// { if ((blobindex+1>=blobcount
|
|
1043
|
+
// || partids[blobindex]!=partids[blobindex+1])
|
|
1044
|
+
// /*loner*/
|
|
1045
|
+
// && (bestrun>2 || partids[blobindex]!=bestpart))
|
|
1046
|
+
// { partids[blobindex]=partcount; /*discard loner*/
|
|
1047
|
+
// }
|
|
1048
|
+
// else if (blobindex+1<blobcount
|
|
1049
|
+
// && partids[blobindex]==partids[blobindex+1]
|
|
1050
|
+
// /*pair*/
|
|
1051
|
+
// && (blobindex+2>=blobcount
|
|
1052
|
+
// || partids[blobindex]!=partids[blobindex+2])
|
|
1053
|
+
// && (bestrun>3 || partids[blobindex]!=bestpart))
|
|
1054
|
+
// { partids[blobindex]=partcount; /*discard both*/
|
|
1055
|
+
// partids[blobindex+1]=partcount;
|
|
1056
|
+
// }
|
|
1057
|
+
// }
|
|
1058
|
+
// }
|
|
1059
|
+
// for (blobindex=0;blobindex<blobcount;blobindex++)
|
|
1060
|
+
// { if (partids[blobindex]<partcount)
|
|
1061
|
+
// partids[blobindex]=0; /*all others together*/
|
|
1062
|
+
// }
|
|
1063
|
+
//}
|
|
1064
|
+
|
|
1065
|
+
/**********************************************************************
|
|
1066
|
+
* partition_coords
|
|
1067
|
+
*
|
|
1068
|
+
* Get the x,y coordinates of all points in the bestpart and put them
|
|
1069
|
+
* in xcoords,ycoords. Return the number of points found.
|
|
1070
|
+
**********************************************************************/
|
|
1071
|
+
|
|
1072
|
+
int
|
|
1073
|
+
partition_coords ( //find relevant coords
|
|
1074
|
+
TBOX blobcoords[], //bounding boxes
|
|
1075
|
+
int blobcount, /*no of blobs in row */
|
|
1076
|
+
char partids[], /*partition no of each blob */
|
|
1077
|
+
int bestpart, /*best new partition */
|
|
1078
|
+
int xcoords[], /*points to work on */
|
|
1079
|
+
int ycoords[] /*points to work on */
|
|
1080
|
+
) {
|
|
1081
|
+
register int blobindex; /*no along text line */
|
|
1082
|
+
int pointcount; /*no of points */
|
|
1083
|
+
|
|
1084
|
+
pointcount = 0;
|
|
1085
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
|
1086
|
+
if (partids[blobindex] == bestpart) {
|
|
1087
|
+
/*centre of blob */
|
|
1088
|
+
xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
|
|
1089
|
+
ycoords[pointcount++] = blobcoords[blobindex].bottom ();
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
return pointcount; /*no of points found */
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
|
|
1096
|
+
/**********************************************************************
|
|
1097
|
+
* segment_spline
|
|
1098
|
+
*
|
|
1099
|
+
* Segment the row at midpoints between maxima and minima of the x,y pairs.
|
|
1100
|
+
* The xstarts of the segments are returned and the number found.
|
|
1101
|
+
**********************************************************************/
|
|
1102
|
+
|
|
1103
|
+
int
|
|
1104
|
+
segment_spline ( //make xstarts
|
|
1105
|
+
TBOX blobcoords[], //boundign boxes
|
|
1106
|
+
int blobcount, /*no of blobs in row */
|
|
1107
|
+
int xcoords[], /*points to work on */
|
|
1108
|
+
int ycoords[], /*points to work on */
|
|
1109
|
+
int degree, int pointcount, /*no of points */
|
|
1110
|
+
int xstarts[] //result
|
|
1111
|
+
) {
|
|
1112
|
+
register int ptindex; /*no along text line */
|
|
1113
|
+
register int segment; /*partition no */
|
|
1114
|
+
int lastmin, lastmax; /*possible turn points */
|
|
1115
|
+
int turnpoints[SPLINESIZE]; /*good turning points */
|
|
1116
|
+
int turncount; /*no of turning points */
|
|
1117
|
+
int max_x; //max specified coord
|
|
1118
|
+
|
|
1119
|
+
xstarts[0] = xcoords[0] - 1; //leftmost defined pt
|
|
1120
|
+
max_x = xcoords[pointcount - 1] + 1;
|
|
1121
|
+
if (degree < 2)
|
|
1122
|
+
pointcount = 0;
|
|
1123
|
+
turncount = 0; /*no turning points yet */
|
|
1124
|
+
if (pointcount > 3) {
|
|
1125
|
+
ptindex = 1;
|
|
1126
|
+
lastmax = lastmin = 0; /*start with first one */
|
|
1127
|
+
while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
|
|
1128
|
+
/*minimum */
|
|
1129
|
+
if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
|
|
1130
|
+
if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
|
|
1131
|
+
if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
|
|
1132
|
+
/*new max point */
|
|
1133
|
+
turnpoints[turncount++] = lastmax;
|
|
1134
|
+
lastmin = ptindex; /*latest minimum */
|
|
1135
|
+
}
|
|
1136
|
+
else if (ycoords[ptindex] < ycoords[lastmin]) {
|
|
1137
|
+
lastmin = ptindex; /*lower minimum */
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
/*maximum */
|
|
1142
|
+
if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
|
|
1143
|
+
if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
|
|
1144
|
+
if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
|
|
1145
|
+
/*new min point */
|
|
1146
|
+
turnpoints[turncount++] = lastmin;
|
|
1147
|
+
lastmax = ptindex; /*latest maximum */
|
|
1148
|
+
}
|
|
1149
|
+
else if (ycoords[ptindex] > ycoords[lastmax]) {
|
|
1150
|
+
lastmax = ptindex; /*higher maximum */
|
|
1151
|
+
}
|
|
1152
|
+
}
|
|
1153
|
+
ptindex++;
|
|
1154
|
+
}
|
|
1155
|
+
/*possible global min */
|
|
1156
|
+
if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT
|
|
1157
|
+
&& (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
|
|
1158
|
+
if (turncount < SPLINESIZE - 1)
|
|
1159
|
+
/*2 more turns */
|
|
1160
|
+
turnpoints[turncount++] = lastmax;
|
|
1161
|
+
if (turncount < SPLINESIZE - 1)
|
|
1162
|
+
turnpoints[turncount++] = ptindex;
|
|
1163
|
+
}
|
|
1164
|
+
else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
|
|
1165
|
+
/*possible global max */
|
|
1166
|
+
&& (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
|
|
1167
|
+
if (turncount < SPLINESIZE - 1)
|
|
1168
|
+
/*2 more turns */
|
|
1169
|
+
turnpoints[turncount++] = lastmin;
|
|
1170
|
+
if (turncount < SPLINESIZE - 1)
|
|
1171
|
+
turnpoints[turncount++] = ptindex;
|
|
1172
|
+
}
|
|
1173
|
+
else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
|
|
1174
|
+
&& turncount < SPLINESIZE - 1) {
|
|
1175
|
+
if (ycoords[ptindex] > ycoords[lastmax])
|
|
1176
|
+
turnpoints[turncount++] = ptindex;
|
|
1177
|
+
else
|
|
1178
|
+
turnpoints[turncount++] = lastmax;
|
|
1179
|
+
}
|
|
1180
|
+
else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
|
|
1181
|
+
&& turncount < SPLINESIZE - 1) {
|
|
1182
|
+
if (ycoords[ptindex] < ycoords[lastmin])
|
|
1183
|
+
turnpoints[turncount++] = ptindex;
|
|
1184
|
+
else
|
|
1185
|
+
turnpoints[turncount++] = lastmin;
|
|
1186
|
+
}
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
if (textord_oldbl_debug && turncount > 0)
|
|
1190
|
+
tprintf ("First turn is %d at (%d,%d)\n",
|
|
1191
|
+
turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
|
|
1192
|
+
for (segment = 1; segment < turncount; segment++) {
|
|
1193
|
+
/*centre y coord */
|
|
1194
|
+
lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
|
|
1195
|
+
|
|
1196
|
+
/* fix alg so that it works with both rising and falling sections */
|
|
1197
|
+
if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
|
|
1198
|
+
/*find rising y centre */
|
|
1199
|
+
for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++);
|
|
1200
|
+
else
|
|
1201
|
+
/*find falling y centre */
|
|
1202
|
+
for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++);
|
|
1203
|
+
|
|
1204
|
+
/*centre x */
|
|
1205
|
+
xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
|
|
1206
|
+
+ xcoords[turnpoints[segment - 1]]
|
|
1207
|
+
+ xcoords[turnpoints[segment]] + 2) / 4;
|
|
1208
|
+
/*halfway between turns */
|
|
1209
|
+
if (textord_oldbl_debug)
|
|
1210
|
+
tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
|
|
1211
|
+
segment, turnpoints[segment],
|
|
1212
|
+
xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
|
|
1213
|
+
ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
xstarts[segment] = max_x;
|
|
1217
|
+
return segment; /*no of splines */
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
|
|
1221
|
+
/**********************************************************************
|
|
1222
|
+
* split_stepped_spline
|
|
1223
|
+
*
|
|
1224
|
+
* Re-segment the spline in cases where there is a big step function.
|
|
1225
|
+
* Return TRUE if any were done.
|
|
1226
|
+
**********************************************************************/
|
|
1227
|
+
|
|
1228
|
+
BOOL8
|
|
1229
|
+
split_stepped_spline ( //make xstarts
|
|
1230
|
+
QSPLINE * baseline, //current shot
|
|
1231
|
+
float jumplimit, //max step fuction
|
|
1232
|
+
int xcoords[], /*points to work on */
|
|
1233
|
+
int xstarts[], //result
|
|
1234
|
+
int &segments //no of segments
|
|
1235
|
+
) {
|
|
1236
|
+
BOOL8 doneany; //return value
|
|
1237
|
+
register int segment; /*partition no */
|
|
1238
|
+
int startindex, centreindex, endindex;
|
|
1239
|
+
float leftcoord, rightcoord;
|
|
1240
|
+
int leftindex, rightindex;
|
|
1241
|
+
float step; //spline step
|
|
1242
|
+
|
|
1243
|
+
doneany = FALSE;
|
|
1244
|
+
startindex = 0;
|
|
1245
|
+
for (segment = 1; segment < segments - 1; segment++) {
|
|
1246
|
+
step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
|
|
1247
|
+
(xstarts[segment] + xstarts[segment + 1]) / 2.0);
|
|
1248
|
+
if (step < 0)
|
|
1249
|
+
step = -step;
|
|
1250
|
+
if (step > jumplimit) {
|
|
1251
|
+
while (xcoords[startindex] < xstarts[segment - 1])
|
|
1252
|
+
startindex++;
|
|
1253
|
+
centreindex = startindex;
|
|
1254
|
+
while (xcoords[centreindex] < xstarts[segment])
|
|
1255
|
+
centreindex++;
|
|
1256
|
+
endindex = centreindex;
|
|
1257
|
+
while (xcoords[endindex] < xstarts[segment + 1])
|
|
1258
|
+
endindex++;
|
|
1259
|
+
if (segments >= SPLINESIZE) {
|
|
1260
|
+
if (textord_debug_baselines)
|
|
1261
|
+
tprintf ("Too many segments to resegment spline!!\n");
|
|
1262
|
+
}
|
|
1263
|
+
else if (endindex - startindex >= textord_spline_medianwin * 3) {
|
|
1264
|
+
while (centreindex - startindex <
|
|
1265
|
+
textord_spline_medianwin * 3 / 2)
|
|
1266
|
+
centreindex++;
|
|
1267
|
+
while (endindex - centreindex <
|
|
1268
|
+
textord_spline_medianwin * 3 / 2)
|
|
1269
|
+
centreindex--;
|
|
1270
|
+
leftindex = (startindex + startindex + centreindex) / 3;
|
|
1271
|
+
rightindex = (centreindex + endindex + endindex) / 3;
|
|
1272
|
+
leftcoord =
|
|
1273
|
+
(xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
|
|
1274
|
+
rightcoord =
|
|
1275
|
+
(xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
|
|
1276
|
+
while (xcoords[leftindex] > leftcoord
|
|
1277
|
+
&& leftindex - startindex > textord_spline_medianwin)
|
|
1278
|
+
leftindex--;
|
|
1279
|
+
while (xcoords[leftindex] < leftcoord
|
|
1280
|
+
&& centreindex - leftindex >
|
|
1281
|
+
textord_spline_medianwin / 2)
|
|
1282
|
+
leftindex++;
|
|
1283
|
+
if (xcoords[leftindex] - leftcoord >
|
|
1284
|
+
leftcoord - xcoords[leftindex - 1])
|
|
1285
|
+
leftindex--;
|
|
1286
|
+
while (xcoords[rightindex] > rightcoord
|
|
1287
|
+
&& rightindex - centreindex >
|
|
1288
|
+
textord_spline_medianwin / 2)
|
|
1289
|
+
rightindex--;
|
|
1290
|
+
while (xcoords[rightindex] < rightcoord
|
|
1291
|
+
&& endindex - rightindex > textord_spline_medianwin)
|
|
1292
|
+
rightindex++;
|
|
1293
|
+
if (xcoords[rightindex] - rightcoord >
|
|
1294
|
+
rightcoord - xcoords[rightindex - 1])
|
|
1295
|
+
rightindex--;
|
|
1296
|
+
if (textord_debug_baselines)
|
|
1297
|
+
tprintf ("Splitting spline at %d with step %g at (%d,%d)\n",
|
|
1298
|
+
xstarts[segment],
|
|
1299
|
+
baseline->
|
|
1300
|
+
step ((xstarts[segment - 1] +
|
|
1301
|
+
xstarts[segment]) / 2.0,
|
|
1302
|
+
(xstarts[segment] +
|
|
1303
|
+
xstarts[segment + 1]) / 2.0),
|
|
1304
|
+
(xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
|
|
1305
|
+
(xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
|
|
1306
|
+
insert_spline_point (xstarts, segment,
|
|
1307
|
+
(xcoords[leftindex - 1] +
|
|
1308
|
+
xcoords[leftindex]) / 2,
|
|
1309
|
+
(xcoords[rightindex - 1] +
|
|
1310
|
+
xcoords[rightindex]) / 2, segments);
|
|
1311
|
+
doneany = TRUE;
|
|
1312
|
+
}
|
|
1313
|
+
else if (textord_debug_baselines) {
|
|
1314
|
+
tprintf
|
|
1315
|
+
("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
|
|
1316
|
+
startindex, centreindex, endindex,
|
|
1317
|
+
(inT32) textord_spline_medianwin);
|
|
1318
|
+
}
|
|
1319
|
+
}
|
|
1320
|
+
// else tprintf("Spline step at %d is %g\n",
|
|
1321
|
+
// xstarts[segment],
|
|
1322
|
+
// baseline->step((xstarts[segment-1]+xstarts[segment])/2.0,
|
|
1323
|
+
// (xstarts[segment]+xstarts[segment+1])/2.0));
|
|
1324
|
+
}
|
|
1325
|
+
return doneany;
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
|
|
1329
|
+
/**********************************************************************
|
|
1330
|
+
* insert_spline_point
|
|
1331
|
+
*
|
|
1332
|
+
* Insert a new spline point and shuffle up the others.
|
|
1333
|
+
**********************************************************************/
|
|
1334
|
+
|
|
1335
|
+
void
|
|
1336
|
+
insert_spline_point ( //get descenders
|
|
1337
|
+
int xstarts[], //starts to shuffle
|
|
1338
|
+
int segment, //insertion pt
|
|
1339
|
+
int coord1, //coords to add
|
|
1340
|
+
int coord2, int &segments //total segments
|
|
1341
|
+
) {
|
|
1342
|
+
int index; //for shuffling
|
|
1343
|
+
|
|
1344
|
+
for (index = segments; index > segment; index--)
|
|
1345
|
+
xstarts[index + 1] = xstarts[index];
|
|
1346
|
+
segments++;
|
|
1347
|
+
xstarts[segment] = coord1;
|
|
1348
|
+
xstarts[segment + 1] = coord2;
|
|
1349
|
+
}
|
|
1350
|
+
|
|
1351
|
+
|
|
1352
|
+
/**********************************************************************
|
|
1353
|
+
* find_lesser_parts
|
|
1354
|
+
*
|
|
1355
|
+
* Average the step from the spline for the other partitions
|
|
1356
|
+
* and find the commonest partition which has a descender.
|
|
1357
|
+
**********************************************************************/
|
|
1358
|
+
|
|
1359
|
+
void
|
|
1360
|
+
find_lesser_parts ( //get descenders
|
|
1361
|
+
TO_ROW * row, //row to process
|
|
1362
|
+
TBOX blobcoords[], //bounding boxes
|
|
1363
|
+
int blobcount, /*no of blobs */
|
|
1364
|
+
char partids[], /*partition of each blob */
|
|
1365
|
+
int partsizes[], /*size of each part */
|
|
1366
|
+
int partcount, /*no of partitions */
|
|
1367
|
+
int bestpart /*biggest partition */
|
|
1368
|
+
) {
|
|
1369
|
+
register int blobindex; /*index of blob */
|
|
1370
|
+
register int partition; /*current partition */
|
|
1371
|
+
int xcentre; /*centre of blob */
|
|
1372
|
+
int poscount; /*count of best up step */
|
|
1373
|
+
int negcount; /*count of best down step */
|
|
1374
|
+
float partsteps[MAXPARTS]; /*average step to part */
|
|
1375
|
+
float bestpos; /*best up step */
|
|
1376
|
+
float bestneg; /*best down step */
|
|
1377
|
+
int runlength; /*length of bad run */
|
|
1378
|
+
int biggestrun; /*biggest bad run */
|
|
1379
|
+
|
|
1380
|
+
biggestrun = 0;
|
|
1381
|
+
for (partition = 0; partition < partcount; partition++)
|
|
1382
|
+
partsteps[partition] = 0.0; /*zero accumulators */
|
|
1383
|
+
for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
|
|
1384
|
+
xcentre = (blobcoords[blobindex].left ()
|
|
1385
|
+
+ blobcoords[blobindex].right ()) >> 1;
|
|
1386
|
+
/*in other parts */
|
|
1387
|
+
if (partids[blobindex] != bestpart) {
|
|
1388
|
+
runlength++; /*run of non bests */
|
|
1389
|
+
if (runlength > biggestrun)
|
|
1390
|
+
biggestrun = runlength;
|
|
1391
|
+
partsteps[partids[blobindex]] += blobcoords[blobindex].bottom ()
|
|
1392
|
+
- row->baseline.y (xcentre);
|
|
1393
|
+
}
|
|
1394
|
+
else
|
|
1395
|
+
runlength = 0;
|
|
1396
|
+
}
|
|
1397
|
+
if (biggestrun > MAXBADRUN)
|
|
1398
|
+
row->xheight = -1.0f; /*failed */
|
|
1399
|
+
else
|
|
1400
|
+
row->xheight = 1.0f; /*success */
|
|
1401
|
+
poscount = negcount = 0;
|
|
1402
|
+
bestpos = bestneg = 0.0; /*no step yet */
|
|
1403
|
+
for (partition = 0; partition < partcount; partition++) {
|
|
1404
|
+
if (partition != bestpart) {
|
|
1405
|
+
|
|
1406
|
+
//by jetsoft divide by zero possible
|
|
1407
|
+
if (partsizes[partition]==0)
|
|
1408
|
+
partsteps[partition]=0;
|
|
1409
|
+
else
|
|
1410
|
+
partsteps[partition] /= partsizes[partition];
|
|
1411
|
+
//
|
|
1412
|
+
|
|
1413
|
+
|
|
1414
|
+
if (partsteps[partition] >= MINASCRISE
|
|
1415
|
+
&& partsizes[partition] > poscount) {
|
|
1416
|
+
/*ascender rise */
|
|
1417
|
+
bestpos = partsteps[partition];
|
|
1418
|
+
/*2nd most popular */
|
|
1419
|
+
poscount = partsizes[partition];
|
|
1420
|
+
}
|
|
1421
|
+
if (partsteps[partition] <= -MINASCRISE
|
|
1422
|
+
&& partsizes[partition] > negcount) {
|
|
1423
|
+
/*ascender rise */
|
|
1424
|
+
bestneg = partsteps[partition];
|
|
1425
|
+
/*2nd most popular */
|
|
1426
|
+
negcount = partsizes[partition];
|
|
1427
|
+
}
|
|
1428
|
+
}
|
|
1429
|
+
}
|
|
1430
|
+
/*average x-height */
|
|
1431
|
+
partsteps[bestpart] /= blobcount;
|
|
1432
|
+
row->descdrop = bestneg;
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
|
|
1436
|
+
/**********************************************************************
|
|
1437
|
+
* old_first_xheight
|
|
1438
|
+
*
|
|
1439
|
+
* Makes an x-height spline by copying the baseline and shifting it.
|
|
1440
|
+
* It estimates the x-height across the line to use as the shift.
|
|
1441
|
+
* It also finds the ascender height if it can.
|
|
1442
|
+
**********************************************************************/
|
|
1443
|
+
|
|
1444
|
+
void
|
|
1445
|
+
old_first_xheight ( //the wiseowl way
|
|
1446
|
+
TO_ROW * row, /*current row */
|
|
1447
|
+
TBOX blobcoords[], /*blob bounding boxes */
|
|
1448
|
+
int initialheight, //initial guess
|
|
1449
|
+
int blobcount, /*blobs in blobcoords */
|
|
1450
|
+
QSPLINE * baseline, /*established */
|
|
1451
|
+
float jumplimit /*min ascender height */
|
|
1452
|
+
) {
|
|
1453
|
+
register int blobindex; /*current blob */
|
|
1454
|
+
/*height statistics */
|
|
1455
|
+
STATS heightstat (0, MAXHEIGHT);
|
|
1456
|
+
int height; /*height of blob */
|
|
1457
|
+
int xcentre; /*centre of blob */
|
|
1458
|
+
int lineheight; /*approx xheight */
|
|
1459
|
+
float ascenders; /*ascender sum */
|
|
1460
|
+
int asccount; /*no of ascenders */
|
|
1461
|
+
float xsum; /*xheight sum */
|
|
1462
|
+
int xcount; /*xheight count */
|
|
1463
|
+
register float diff; /*height difference */
|
|
1464
|
+
|
|
1465
|
+
if (blobcount > 1) {
|
|
1466
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
|
1467
|
+
xcentre = (blobcoords[blobindex].left ()
|
|
1468
|
+
+ blobcoords[blobindex].right ()) / 2;
|
|
1469
|
+
/*height of blob */
|
|
1470
|
+
height = (int) (blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5);
|
|
1471
|
+
if (height > initialheight * oldbl_xhfract
|
|
1472
|
+
&& height > textord_min_xheight)
|
|
1473
|
+
heightstat.add (height, 1);
|
|
1474
|
+
}
|
|
1475
|
+
if (heightstat.get_total () > 3) {
|
|
1476
|
+
lineheight = (int) heightstat.ile (0.25);
|
|
1477
|
+
if (lineheight <= 0)
|
|
1478
|
+
lineheight = (int) heightstat.ile (0.5);
|
|
1479
|
+
}
|
|
1480
|
+
else
|
|
1481
|
+
lineheight = initialheight;
|
|
1482
|
+
}
|
|
1483
|
+
else {
|
|
1484
|
+
lineheight = (int) (blobcoords[0].top ()
|
|
1485
|
+
- baseline->y ((blobcoords[0].left ()
|
|
1486
|
+
+ blobcoords[0].right ()) / 2) +
|
|
1487
|
+
0.5);
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1490
|
+
xsum = 0.0f;
|
|
1491
|
+
xcount = 0;
|
|
1492
|
+
for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
|
|
1493
|
+
blobindex++) {
|
|
1494
|
+
xcentre = (blobcoords[blobindex].left ()
|
|
1495
|
+
+ blobcoords[blobindex].right ()) / 2;
|
|
1496
|
+
diff = blobcoords[blobindex].top () - baseline->y (xcentre);
|
|
1497
|
+
/*is it ascender */
|
|
1498
|
+
if (diff > lineheight + jumplimit) {
|
|
1499
|
+
ascenders += diff;
|
|
1500
|
+
asccount++; /*count ascenders */
|
|
1501
|
+
}
|
|
1502
|
+
else if (diff > lineheight - jumplimit) {
|
|
1503
|
+
xsum += diff; /*mean xheight */
|
|
1504
|
+
xcount++;
|
|
1505
|
+
}
|
|
1506
|
+
}
|
|
1507
|
+
if (xcount > 0)
|
|
1508
|
+
xsum /= xcount; /*average xheight */
|
|
1509
|
+
else
|
|
1510
|
+
xsum = (float) lineheight; /*guess it */
|
|
1511
|
+
row->xheight *= xsum;
|
|
1512
|
+
if (asccount > 0)
|
|
1513
|
+
row->ascrise = ascenders / asccount - xsum;
|
|
1514
|
+
else
|
|
1515
|
+
row->ascrise = 0.0f; /*had none */
|
|
1516
|
+
if (row->xheight == 0)
|
|
1517
|
+
row->xheight = -1.0f;
|
|
1518
|
+
}
|
|
1519
|
+
|
|
1520
|
+
|
|
1521
|
+
/**********************************************************************
|
|
1522
|
+
* make_first_xheight
|
|
1523
|
+
*
|
|
1524
|
+
* Makes an x-height spline by copying the baseline and shifting it.
|
|
1525
|
+
* It estimates the x-height across the line to use as the shift.
|
|
1526
|
+
* It also finds the ascender height if it can.
|
|
1527
|
+
**********************************************************************/
|
|
1528
|
+
|
|
1529
|
+
void
|
|
1530
|
+
make_first_xheight ( //find xheight
|
|
1531
|
+
TO_ROW * row, /*current row */
|
|
1532
|
+
TBOX blobcoords[], /*blob bounding boxes */
|
|
1533
|
+
int lineheight, //initial guess
|
|
1534
|
+
int init_lineheight, //block level guess
|
|
1535
|
+
int blobcount, /*blobs in blobcoords */
|
|
1536
|
+
QSPLINE * baseline, /*established */
|
|
1537
|
+
float jumplimit /*min ascender height */
|
|
1538
|
+
) {
|
|
1539
|
+
STATS heightstat (0, HEIGHTBUCKETS);
|
|
1540
|
+
int lefts[HEIGHTBUCKETS];
|
|
1541
|
+
int rights[HEIGHTBUCKETS];
|
|
1542
|
+
int modelist[MODENUM];
|
|
1543
|
+
int blobindex;
|
|
1544
|
+
int mode_count; //blobs to count in thr
|
|
1545
|
+
int sign_bit;
|
|
1546
|
+
int mode_threshold;
|
|
1547
|
+
const int kBaselineTouch = 2; // This really should change with resolution.
|
|
1548
|
+
const int kGoodStrength = 8; // Strength of baseline-touching heights.
|
|
1549
|
+
const float kMinHeight = 0.25; // Min fraction of lineheight to use.
|
|
1550
|
+
|
|
1551
|
+
sign_bit = row->xheight > 0 ? 1 : -1;
|
|
1552
|
+
|
|
1553
|
+
memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0]));
|
|
1554
|
+
memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0]));
|
|
1555
|
+
mode_count = 0;
|
|
1556
|
+
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
|
1557
|
+
int xcenter = (blobcoords[blobindex].left () +
|
|
1558
|
+
blobcoords[blobindex].right ()) / 2;
|
|
1559
|
+
float base = baseline->y(xcenter);
|
|
1560
|
+
float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
|
|
1561
|
+
int strength = textord_ocropus_mode &&
|
|
1562
|
+
bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
|
|
1563
|
+
int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5);
|
|
1564
|
+
if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) {
|
|
1565
|
+
if (height > lineheight * oldbl_xhfract
|
|
1566
|
+
&& height > textord_min_xheight) {
|
|
1567
|
+
heightstat.add (height, strength);
|
|
1568
|
+
if (height < HEIGHTBUCKETS) {
|
|
1569
|
+
if (xcenter > rights[height])
|
|
1570
|
+
rights[height] = xcenter;
|
|
1571
|
+
if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height]))
|
|
1572
|
+
lefts[height] = xcenter;
|
|
1573
|
+
}
|
|
1574
|
+
}
|
|
1575
|
+
mode_count += strength;
|
|
1576
|
+
}
|
|
1577
|
+
}
|
|
1578
|
+
|
|
1579
|
+
mode_threshold = (int) (blobcount * 0.1);
|
|
1580
|
+
if (oldbl_dot_error_size > 1 || oldbl_xhfix)
|
|
1581
|
+
mode_threshold = (int) (mode_count * 0.1);
|
|
1582
|
+
|
|
1583
|
+
if (textord_oldbl_debug) {
|
|
1584
|
+
tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",
|
|
1585
|
+
blobcount, mode_count, mode_threshold);
|
|
1586
|
+
}
|
|
1587
|
+
find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
|
|
1588
|
+
if (textord_oldbl_debug) {
|
|
1589
|
+
for (blobindex = 0; blobindex < MODENUM; blobindex++)
|
|
1590
|
+
tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]);
|
|
1591
|
+
tprintf ("\n");
|
|
1592
|
+
}
|
|
1593
|
+
pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
|
|
1594
|
+
|
|
1595
|
+
if (textord_oldbl_debug)
|
|
1596
|
+
tprintf ("Output xheight=%g\n", row->xheight);
|
|
1597
|
+
if (row->xheight < 0 && textord_oldbl_debug)
|
|
1598
|
+
tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight);
|
|
1599
|
+
|
|
1600
|
+
if (sign_bit < 0)
|
|
1601
|
+
row->xheight = -row->xheight;
|
|
1602
|
+
}
|
|
1603
|
+
|
|
1604
|
+
/**********************************************************************
|
|
1605
|
+
* find_top_modes
|
|
1606
|
+
*
|
|
1607
|
+
* Fill the input array with the indices of the top ten modes of the
|
|
1608
|
+
* input distribution.
|
|
1609
|
+
**********************************************************************/
|
|
1610
|
+
|
|
1611
|
+
const int kMinModeFactorOcropus = 32;
|
|
1612
|
+
const int kMinModeFactor = 12;
|
|
1613
|
+
|
|
1614
|
+
void
|
|
1615
|
+
find_top_modes ( //get modes
|
|
1616
|
+
STATS * stats, //stats to hack
|
|
1617
|
+
int statnum, //no of piles
|
|
1618
|
+
int modelist[], int modenum //no of modes to get
|
|
1619
|
+
) {
|
|
1620
|
+
int mode_count;
|
|
1621
|
+
int last_i = 0;
|
|
1622
|
+
int last_max = MAX_INT32;
|
|
1623
|
+
int i;
|
|
1624
|
+
int mode;
|
|
1625
|
+
int total_max = 0;
|
|
1626
|
+
int mode_factor = textord_ocropus_mode ?
|
|
1627
|
+
kMinModeFactorOcropus : kMinModeFactor;
|
|
1628
|
+
|
|
1629
|
+
for (mode_count = 0; mode_count < modenum; mode_count++) {
|
|
1630
|
+
mode = 0;
|
|
1631
|
+
for (i = 0; i < statnum; i++) {
|
|
1632
|
+
if (stats->pile_count (i) > stats->pile_count (mode)) {
|
|
1633
|
+
if ((stats->pile_count (i) < last_max) ||
|
|
1634
|
+
((stats->pile_count (i) == last_max) && (i > last_i))) {
|
|
1635
|
+
mode = i;
|
|
1636
|
+
}
|
|
1637
|
+
}
|
|
1638
|
+
}
|
|
1639
|
+
last_i = mode;
|
|
1640
|
+
last_max = stats->pile_count (last_i);
|
|
1641
|
+
total_max += last_max;
|
|
1642
|
+
if (last_max <= total_max / mode_factor)
|
|
1643
|
+
mode = 0;
|
|
1644
|
+
modelist[mode_count] = mode;
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
|
|
1648
|
+
|
|
1649
|
+
/**********************************************************************
|
|
1650
|
+
* pick_x_height
|
|
1651
|
+
*
|
|
1652
|
+
* Choose based on the height modes the best x height value.
|
|
1653
|
+
**********************************************************************/
|
|
1654
|
+
|
|
1655
|
+
void pick_x_height(TO_ROW * row, //row to do
|
|
1656
|
+
int modelist[],
|
|
1657
|
+
int lefts[], int rights[],
|
|
1658
|
+
STATS * heightstat,
|
|
1659
|
+
int mode_threshold) {
|
|
1660
|
+
int x;
|
|
1661
|
+
int y;
|
|
1662
|
+
int z;
|
|
1663
|
+
float ratio;
|
|
1664
|
+
int found_one_bigger = FALSE;
|
|
1665
|
+
int best_x_height = 0;
|
|
1666
|
+
int best_asc = 0;
|
|
1667
|
+
int num_in_best;
|
|
1668
|
+
|
|
1669
|
+
for (x = 0; x < MODENUM; x++) {
|
|
1670
|
+
for (y = 0; y < MODENUM; y++) {
|
|
1671
|
+
/* Check for two modes */
|
|
1672
|
+
if (modelist[x] && modelist[y] &&
|
|
1673
|
+
heightstat->pile_count (modelist[x]) > mode_threshold &&
|
|
1674
|
+
(!textord_ocropus_mode ||
|
|
1675
|
+
MIN(rights[modelist[x]], rights[modelist[y]]) >
|
|
1676
|
+
MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
|
|
1677
|
+
ratio = (float) modelist[y] / (float) modelist[x];
|
|
1678
|
+
if (1.2 < ratio && ratio < 1.8) {
|
|
1679
|
+
/* Two modes found */
|
|
1680
|
+
best_x_height = modelist[x];
|
|
1681
|
+
num_in_best = heightstat->pile_count (modelist[x]);
|
|
1682
|
+
|
|
1683
|
+
/* Try to get one higher */
|
|
1684
|
+
do {
|
|
1685
|
+
found_one_bigger = FALSE;
|
|
1686
|
+
for (z = 0; z < MODENUM; z++) {
|
|
1687
|
+
if (modelist[z] == best_x_height + 1 &&
|
|
1688
|
+
(!textord_ocropus_mode ||
|
|
1689
|
+
MIN(rights[modelist[x]], rights[modelist[y]]) >
|
|
1690
|
+
MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
|
|
1691
|
+
ratio = (float) modelist[y] / (float) modelist[z];
|
|
1692
|
+
if ((1.2 < ratio && ratio < 1.8) &&
|
|
1693
|
+
/* Should be half of best */
|
|
1694
|
+
heightstat->pile_count (modelist[z]) >
|
|
1695
|
+
num_in_best * 0.5) {
|
|
1696
|
+
best_x_height++;
|
|
1697
|
+
found_one_bigger = TRUE;
|
|
1698
|
+
break;
|
|
1699
|
+
}
|
|
1700
|
+
}
|
|
1701
|
+
}
|
|
1702
|
+
}
|
|
1703
|
+
while (found_one_bigger);
|
|
1704
|
+
|
|
1705
|
+
/* try to get a higher ascender */
|
|
1706
|
+
|
|
1707
|
+
best_asc = modelist[y];
|
|
1708
|
+
num_in_best = heightstat->pile_count (modelist[y]);
|
|
1709
|
+
|
|
1710
|
+
/* Try to get one higher */
|
|
1711
|
+
do {
|
|
1712
|
+
found_one_bigger = FALSE;
|
|
1713
|
+
for (z = 0; z < MODENUM; z++) {
|
|
1714
|
+
if (modelist[z] > best_asc &&
|
|
1715
|
+
(!textord_ocropus_mode ||
|
|
1716
|
+
MIN(rights[modelist[x]], rights[modelist[y]]) >
|
|
1717
|
+
MAX(lefts[modelist[x]], lefts[modelist[y]]))) {
|
|
1718
|
+
ratio = (float) modelist[z] / (float) best_x_height;
|
|
1719
|
+
if ((1.2 < ratio && ratio < 1.8) &&
|
|
1720
|
+
/* Should be half of best */
|
|
1721
|
+
heightstat->pile_count (modelist[z]) >
|
|
1722
|
+
num_in_best * 0.5) {
|
|
1723
|
+
best_asc = modelist[z];
|
|
1724
|
+
found_one_bigger = TRUE;
|
|
1725
|
+
break;
|
|
1726
|
+
}
|
|
1727
|
+
}
|
|
1728
|
+
}
|
|
1729
|
+
}
|
|
1730
|
+
while (found_one_bigger);
|
|
1731
|
+
|
|
1732
|
+
row->xheight = (float) best_x_height;
|
|
1733
|
+
row->ascrise = (float) best_asc - best_x_height;
|
|
1734
|
+
return;
|
|
1735
|
+
}
|
|
1736
|
+
}
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
|
|
1740
|
+
best_x_height = modelist[0]; /* Single Mode found */
|
|
1741
|
+
num_in_best = heightstat->pile_count (best_x_height);
|
|
1742
|
+
do {
|
|
1743
|
+
/* Try to get one higher */
|
|
1744
|
+
found_one_bigger = FALSE;
|
|
1745
|
+
for (z = 1; z < MODENUM; z++) {
|
|
1746
|
+
/* Should be half of best */
|
|
1747
|
+
if ((modelist[z] == best_x_height + 1) &&
|
|
1748
|
+
(heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) {
|
|
1749
|
+
best_x_height++;
|
|
1750
|
+
found_one_bigger = TRUE;
|
|
1751
|
+
break;
|
|
1752
|
+
}
|
|
1753
|
+
}
|
|
1754
|
+
}
|
|
1755
|
+
while (found_one_bigger);
|
|
1756
|
+
|
|
1757
|
+
row->ascrise = 0.0f;
|
|
1758
|
+
row->xheight = (float) best_x_height;
|
|
1759
|
+
if (row->xheight == 0)
|
|
1760
|
+
row->xheight = -1.0f;
|
|
1761
|
+
}
|