tesseract_bin 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/ext/tesseract_bin/extconf.rb +17 -0
- data/lib/tesseract_bin.rb +12 -0
- data/tesseract_bin.gemspec +660 -0
- data/test/helper.rb +18 -0
- data/test/test_tesseract_bin.rb +7 -0
- data/vendor/tesseract-2.04/AUTHORS +8 -0
- data/vendor/tesseract-2.04/COPYING +23 -0
- data/vendor/tesseract-2.04/ChangeLog +71 -0
- data/vendor/tesseract-2.04/INSTALL +229 -0
- data/vendor/tesseract-2.04/Makefile.am +20 -0
- data/vendor/tesseract-2.04/Makefile.in +641 -0
- data/vendor/tesseract-2.04/NEWS +1 -0
- data/vendor/tesseract-2.04/README +138 -0
- data/vendor/tesseract-2.04/ReleaseNotes +213 -0
- data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
- data/vendor/tesseract-2.04/StdAfx.h +24 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
- data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
- data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
- data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
- data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
- data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
- data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
- data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
- data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
- data/vendor/tesseract-2.04/ccmain/control.h +198 -0
- data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
- data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
- data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
- data/vendor/tesseract-2.04/ccmain/output.h +116 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
- data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
- data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
- data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
- data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
- data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
- data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
- data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
- data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
- data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
- data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
- data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
- data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
- data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
- data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
- data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
- data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
- data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
- data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
- data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
- data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
- data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
- data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
- data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
- data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
- data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
- data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
- data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
- data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
- data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
- data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
- data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
- data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
- data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
- data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
- data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
- data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
- data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
- data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
- data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
- data/vendor/tesseract-2.04/ccutil/host.h +180 -0
- data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
- data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
- data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
- data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
- data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
- data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
- data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
- data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
- data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
- data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
- data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
- data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
- data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
- data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
- data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
- data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
- data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
- data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
- data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
- data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
- data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
- data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
- data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
- data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
- data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
- data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
- data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
- data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
- data/vendor/tesseract-2.04/classify/baseline.h +91 -0
- data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
- data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
- data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
- data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
- data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
- data/vendor/tesseract-2.04/classify/cluster.h +158 -0
- data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
- data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
- data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
- data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
- data/vendor/tesseract-2.04/classify/extern.h +32 -0
- data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
- data/vendor/tesseract-2.04/classify/extract.h +36 -0
- data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
- data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
- data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
- data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
- data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
- data/vendor/tesseract-2.04/classify/float2int.h +65 -0
- data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
- data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
- data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
- data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
- data/vendor/tesseract-2.04/classify/fxid.h +69 -0
- data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
- data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
- data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
- data/vendor/tesseract-2.04/classify/intfx.h +63 -0
- data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
- data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
- data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
- data/vendor/tesseract-2.04/classify/intproto.h +320 -0
- data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
- data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
- data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
- data/vendor/tesseract-2.04/classify/mf.h +43 -0
- data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
- data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
- data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
- data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
- data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
- data/vendor/tesseract-2.04/classify/mfx.h +52 -0
- data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
- data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
- data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
- data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
- data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
- data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
- data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
- data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
- data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
- data/vendor/tesseract-2.04/classify/protos.h +258 -0
- data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
- data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
- data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
- data/vendor/tesseract-2.04/classify/speckle.h +69 -0
- data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
- data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
- data/vendor/tesseract-2.04/config/config.guess +1466 -0
- data/vendor/tesseract-2.04/config/config.h.in +188 -0
- data/vendor/tesseract-2.04/config/config.sub +1579 -0
- data/vendor/tesseract-2.04/config/depcomp +530 -0
- data/vendor/tesseract-2.04/config/install-sh +269 -0
- data/vendor/tesseract-2.04/config/missing +198 -0
- data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
- data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
- data/vendor/tesseract-2.04/configure +10424 -0
- data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
- data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
- data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
- data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
- data/vendor/tesseract-2.04/cutil/const.h +108 -0
- data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
- data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
- data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
- data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
- data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
- data/vendor/tesseract-2.04/cutil/debug.h +348 -0
- data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
- data/vendor/tesseract-2.04/cutil/efio.h +32 -0
- data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
- data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
- data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
- data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
- data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
- data/vendor/tesseract-2.04/cutil/general.h +33 -0
- data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
- data/vendor/tesseract-2.04/cutil/globals.h +70 -0
- data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
- data/vendor/tesseract-2.04/cutil/listio.h +43 -0
- data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
- data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
- data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
- data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
- data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
- data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
- data/vendor/tesseract-2.04/cutil/structures.h +112 -0
- data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
- data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
- data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
- data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
- data/vendor/tesseract-2.04/cutil/variables.h +170 -0
- data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
- data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
- data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
- data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
- data/vendor/tesseract-2.04/dict/choices.h +241 -0
- data/vendor/tesseract-2.04/dict/context.cpp +270 -0
- data/vendor/tesseract-2.04/dict/context.h +82 -0
- data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
- data/vendor/tesseract-2.04/dict/dawg.h +394 -0
- data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
- data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
- data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
- data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
- data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
- data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
- data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
- data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
- data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
- data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
- data/vendor/tesseract-2.04/dict/permngram.h +33 -0
- data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
- data/vendor/tesseract-2.04/dict/permnum.h +83 -0
- data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
- data/vendor/tesseract-2.04/dict/permute.h +93 -0
- data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
- data/vendor/tesseract-2.04/dict/reduce.h +112 -0
- data/vendor/tesseract-2.04/dict/states.cpp +382 -0
- data/vendor/tesseract-2.04/dict/states.h +111 -0
- data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
- data/vendor/tesseract-2.04/dict/stopper.h +103 -0
- data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
- data/vendor/tesseract-2.04/dict/trie.h +190 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
- data/vendor/tesseract-2.04/eurotext.tif +0 -0
- data/vendor/tesseract-2.04/image/Makefile.am +10 -0
- data/vendor/tesseract-2.04/image/Makefile.in +596 -0
- data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
- data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
- data/vendor/tesseract-2.04/image/img.h +336 -0
- data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
- data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
- data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
- data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
- data/vendor/tesseract-2.04/image/imgio.h +22 -0
- data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
- data/vendor/tesseract-2.04/image/imgs.h +102 -0
- data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
- data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
- data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
- data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
- data/vendor/tesseract-2.04/image/svshowim.h +25 -0
- data/vendor/tesseract-2.04/java/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
- data/vendor/tesseract-2.04/java/makefile +55 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
- data/vendor/tesseract-2.04/phototest.tif +0 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
- data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
- data/vendor/tesseract-2.04/tessdata/confsets +3 -0
- data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
- data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
- data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
- data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
- data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
- data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
- data/vendor/tesseract-2.04/tessdll.cpp +351 -0
- data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
- data/vendor/tesseract-2.04/tessdll.h +143 -0
- data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
- data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
- data/vendor/tesseract-2.04/tesseract.dsw +116 -0
- data/vendor/tesseract-2.04/tesseract.sln +59 -0
- data/vendor/tesseract-2.04/tesseract.spec +188 -0
- data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
- data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
- data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
- data/vendor/tesseract-2.04/testing/README +43 -0
- data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
- data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
- data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
- data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
- data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
- data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
- data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
- data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
- data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
- data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
- data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
- data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
- data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
- data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
- data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
- data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
- data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
- data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
- data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
- data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
- data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
- data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
- data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
- data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
- data/vendor/tesseract-2.04/textord/makerow.h +295 -0
- data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
- data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
- data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
- data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
- data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
- data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
- data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
- data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
- data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
- data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
- data/vendor/tesseract-2.04/textord/tessout.h +76 -0
- data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
- data/vendor/tesseract-2.04/textord/topitch.h +195 -0
- data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
- data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
- data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
- data/vendor/tesseract-2.04/textord/tospace.h +193 -0
- data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
- data/vendor/tesseract-2.04/textord/tovars.h +94 -0
- data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
- data/vendor/tesseract-2.04/textord/underlin.h +53 -0
- data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
- data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
- data/vendor/tesseract-2.04/training/Makefile.am +54 -0
- data/vendor/tesseract-2.04/training/Makefile.in +720 -0
- data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
- data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
- data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
- data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
- data/vendor/tesseract-2.04/training/mergenf.h +106 -0
- data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
- data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
- data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
- data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
- data/vendor/tesseract-2.04/training/name2char.h +38 -0
- data/vendor/tesseract-2.04/training/training.cpp +190 -0
- data/vendor/tesseract-2.04/training/training.h +130 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
- data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
- data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
- data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
- data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
- data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
- data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
- data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
- data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
- data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
- data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
- data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
- data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
- data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
- data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
- data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
- data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
- data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
- data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
- data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
- data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
- data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
- data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
- data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
- data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
- data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
- data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
- data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
- data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
- data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
- data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
- data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
- data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
- data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
- data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
- data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
- data/vendor/tesseract-2.04/wordrec/render.h +58 -0
- data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
- data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
- data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
- data/vendor/tesseract-2.04/wordrec/split.h +115 -0
- data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
- data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
- data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
- data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
- metadata +708 -0
@@ -0,0 +1,2019 @@
|
|
1
|
+
/**********************************************************************
|
2
|
+
* File: topitch.cpp (Formerly to_pitch.c)
|
3
|
+
* Description: Code to determine fixed pitchness and the pitch if fixed.
|
4
|
+
* Author: Ray Smith
|
5
|
+
* Created: Tue Aug 24 16:57:29 BST 1993
|
6
|
+
*
|
7
|
+
* (C) Copyright 1993, Hewlett-Packard Ltd.
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
9
|
+
** you may not use this file except in compliance with the License.
|
10
|
+
** You may obtain a copy of the License at
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
** See the License for the specific language governing permissions and
|
16
|
+
** limitations under the License.
|
17
|
+
*
|
18
|
+
**********************************************************************/
|
19
|
+
|
20
|
+
#include "mfcpch.h"
|
21
|
+
#ifdef __UNIX__
|
22
|
+
#include <assert.h>
|
23
|
+
#endif
|
24
|
+
#include "stderr.h"
|
25
|
+
#include "blobbox.h"
|
26
|
+
#include "lmedsq.h"
|
27
|
+
#include "statistc.h"
|
28
|
+
#include "drawtord.h"
|
29
|
+
#include "makerow.h"
|
30
|
+
#include "pitsync1.h"
|
31
|
+
#include "pithsync.h"
|
32
|
+
#include "blobcmpl.h"
|
33
|
+
#include "tovars.h"
|
34
|
+
#include "wordseg.h"
|
35
|
+
#include "topitch.h"
|
36
|
+
#include "secname.h"
|
37
|
+
|
38
|
+
#define EXTERN
|
39
|
+
|
40
|
+
EXTERN BOOL_VAR (textord_all_prop, FALSE, "All doc is proportial text");
|
41
|
+
EXTERN BOOL_VAR (textord_debug_pitch_test, FALSE,
|
42
|
+
"Debug on fixed pitch test");
|
43
|
+
EXTERN BOOL_VAR (textord_disable_pitch_test, FALSE,
|
44
|
+
"Turn off dp fixed pitch algorithm");
|
45
|
+
EXTERN BOOL_VAR (textord_fast_pitch_test, FALSE,
|
46
|
+
"Do even faster pitch algorithm");
|
47
|
+
EXTERN BOOL_VAR (textord_debug_pitch_metric, FALSE,
|
48
|
+
"Write full metric stuff");
|
49
|
+
EXTERN BOOL_VAR (textord_show_row_cuts, FALSE, "Draw row-level cuts");
|
50
|
+
EXTERN BOOL_VAR (textord_show_page_cuts, FALSE, "Draw page-level cuts");
|
51
|
+
EXTERN BOOL_VAR (textord_pitch_cheat, FALSE,
|
52
|
+
"Use correct answer for fixed/prop");
|
53
|
+
EXTERN BOOL_VAR (textord_blockndoc_fixed, FALSE,
|
54
|
+
"Attempt whole doc/block fixed pitch");
|
55
|
+
EXTERN double_VAR (textord_projection_scale, 0.200, "Ding rate for mid-cuts");
|
56
|
+
EXTERN double_VAR (textord_balance_factor, 1.0,
|
57
|
+
"Ding rate for unbalanced char cells");
|
58
|
+
EXTERN double_VAR (textord_repch_width_variance, 0.2,
|
59
|
+
"Max width change of gap/blob");
|
60
|
+
|
61
|
+
#define FIXED_WIDTH_MULTIPLE 5
|
62
|
+
#define BLOCK_STATS_CLUSTERS 10
|
63
|
+
#define MAX_ALLOWED_PITCH 100 //max pixel pitch.
|
64
|
+
|
65
|
+
/**********************************************************************
|
66
|
+
* compute_fixed_pitch
|
67
|
+
*
|
68
|
+
* Decide whether each row is fixed pitch individually.
|
69
|
+
* Correlate definite and uncertain results to obtain an individual
|
70
|
+
* result for each row in the TO_ROW class.
|
71
|
+
**********************************************************************/
|
72
|
+
|
73
|
+
void compute_fixed_pitch( //determine pitch
|
74
|
+
ICOORD page_tr, //top right
|
75
|
+
TO_BLOCK_LIST *port_blocks, //input list
|
76
|
+
float gradient, //page skew
|
77
|
+
FCOORD rotation, //for drawing
|
78
|
+
BOOL8 testing_on //correct orientation
|
79
|
+
) {
|
80
|
+
TO_BLOCK_IT block_it; //iterator
|
81
|
+
TO_BLOCK *block; //current block;
|
82
|
+
TO_ROW_IT row_it; //row iterator
|
83
|
+
TO_ROW *row; //current row
|
84
|
+
int block_index; //block number
|
85
|
+
int row_index; //row number
|
86
|
+
|
87
|
+
#ifndef GRAPHICS_DISABLED
|
88
|
+
if (textord_show_initial_words && testing_on) {
|
89
|
+
if (to_win == NULL)
|
90
|
+
create_to_win(page_tr);
|
91
|
+
}
|
92
|
+
#endif
|
93
|
+
|
94
|
+
block_it.set_to_list (port_blocks);
|
95
|
+
block_index = 1;
|
96
|
+
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
97
|
+
block_it.forward ()) {
|
98
|
+
block = block_it.data ();
|
99
|
+
compute_block_pitch(block, rotation, block_index, testing_on);
|
100
|
+
block_index++;
|
101
|
+
}
|
102
|
+
|
103
|
+
if (!try_doc_fixed (page_tr, port_blocks, gradient)) {
|
104
|
+
block_index = 1;
|
105
|
+
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
106
|
+
block_it.forward ()) {
|
107
|
+
block = block_it.data ();
|
108
|
+
if (!try_block_fixed (block, block_index))
|
109
|
+
try_rows_fixed(block, block_index, testing_on);
|
110
|
+
block_index++;
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
block_index = 1;
|
115
|
+
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
116
|
+
block_it.forward ()) {
|
117
|
+
block = block_it.data ();
|
118
|
+
row_it.set_to_list (block->get_rows ());
|
119
|
+
row_index = 1;
|
120
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
121
|
+
row = row_it.data ();
|
122
|
+
fix_row_pitch(row, block, port_blocks, row_index, block_index);
|
123
|
+
row_index++;
|
124
|
+
}
|
125
|
+
if (testing_on
|
126
|
+
&& ((textord_debug_pitch_test && block->block->text_region () != NULL)
|
127
|
+
|| textord_blocksall_fixed || textord_blocksall_prop)) {
|
128
|
+
tprintf ("Corr:");
|
129
|
+
print_block_counts(block, block_index);
|
130
|
+
}
|
131
|
+
block_index++;
|
132
|
+
}
|
133
|
+
#ifndef GRAPHICS_DISABLED
|
134
|
+
if (textord_show_initial_words && testing_on) {
|
135
|
+
//overlap_picture_ops(TRUE);
|
136
|
+
ScrollView::Update();
|
137
|
+
}
|
138
|
+
#endif
|
139
|
+
}
|
140
|
+
|
141
|
+
|
142
|
+
/**********************************************************************
|
143
|
+
* fix_row_pitch
|
144
|
+
*
|
145
|
+
* Get a pitch_decision for this row by voting among similar rows in the
|
146
|
+
* block, then similar rows over all the page, or any other rows at all.
|
147
|
+
**********************************************************************/
|
148
|
+
|
149
|
+
void fix_row_pitch( //get some value
|
150
|
+
TO_ROW *bad_row, //row to fix
|
151
|
+
TO_BLOCK *bad_block, //block of bad_row
|
152
|
+
TO_BLOCK_LIST *blocks, //blocks to scan
|
153
|
+
inT32 row_target, //number of row
|
154
|
+
inT32 block_target //number of block
|
155
|
+
) {
|
156
|
+
const char *res_string; //decision on line
|
157
|
+
inT16 mid_cuts;
|
158
|
+
int block_votes; //votes in block
|
159
|
+
int like_votes; //votes over page
|
160
|
+
int other_votes; //votes of unlike blocks
|
161
|
+
int block_index; //number of block
|
162
|
+
int row_index; //number of row
|
163
|
+
int maxwidth; //max pitch
|
164
|
+
TO_BLOCK_IT block_it = blocks; //block iterator
|
165
|
+
TO_ROW_IT row_it;
|
166
|
+
TO_BLOCK *block; //current block
|
167
|
+
TO_ROW *row; //current row
|
168
|
+
float sp_sd; //space deviation
|
169
|
+
STATS block_stats; //pitches in block
|
170
|
+
STATS like_stats; //pitches in page
|
171
|
+
|
172
|
+
block_votes = like_votes = other_votes = 0;
|
173
|
+
maxwidth = (inT32) ceil (bad_row->xheight * textord_words_maxspace);
|
174
|
+
if (bad_row->pitch_decision != PITCH_DEF_FIXED
|
175
|
+
&& bad_row->pitch_decision != PITCH_DEF_PROP) {
|
176
|
+
block_stats.set_range (0, maxwidth);
|
177
|
+
like_stats.set_range (0, maxwidth);
|
178
|
+
block_index = 1;
|
179
|
+
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
180
|
+
block_it.forward ()) {
|
181
|
+
block = block_it.data ();
|
182
|
+
row_index = 1;
|
183
|
+
row_it.set_to_list (block->get_rows ());
|
184
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
|
185
|
+
row_it.forward ()) {
|
186
|
+
row = row_it.data ();
|
187
|
+
if ((bad_row->all_caps
|
188
|
+
&& row->xheight + row->ascrise
|
189
|
+
<
|
190
|
+
(bad_row->xheight + bad_row->ascrise) * (1 +
|
191
|
+
textord_pitch_rowsimilarity)
|
192
|
+
&& row->xheight + row->ascrise >
|
193
|
+
(bad_row->xheight + bad_row->ascrise) * (1 -
|
194
|
+
textord_pitch_rowsimilarity))
|
195
|
+
|| (!bad_row->all_caps
|
196
|
+
&& row->xheight <
|
197
|
+
bad_row->xheight * (1 + textord_pitch_rowsimilarity)
|
198
|
+
&& row->xheight >
|
199
|
+
bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
|
200
|
+
if (block_index == block_target) {
|
201
|
+
if (row->pitch_decision == PITCH_DEF_FIXED) {
|
202
|
+
block_votes += textord_words_veto_power;
|
203
|
+
block_stats.add ((inT32) row->fixed_pitch,
|
204
|
+
textord_words_veto_power);
|
205
|
+
}
|
206
|
+
else if (row->pitch_decision == PITCH_MAYBE_FIXED
|
207
|
+
|| row->pitch_decision == PITCH_CORR_FIXED) {
|
208
|
+
block_votes++;
|
209
|
+
block_stats.add ((inT32) row->fixed_pitch, 1);
|
210
|
+
}
|
211
|
+
else if (row->pitch_decision == PITCH_DEF_PROP)
|
212
|
+
block_votes -= textord_words_veto_power;
|
213
|
+
else if (row->pitch_decision == PITCH_MAYBE_PROP
|
214
|
+
|| row->pitch_decision == PITCH_CORR_PROP)
|
215
|
+
block_votes--;
|
216
|
+
}
|
217
|
+
else {
|
218
|
+
if (row->pitch_decision == PITCH_DEF_FIXED) {
|
219
|
+
like_votes += textord_words_veto_power;
|
220
|
+
like_stats.add ((inT32) row->fixed_pitch,
|
221
|
+
textord_words_veto_power);
|
222
|
+
}
|
223
|
+
else if (row->pitch_decision == PITCH_MAYBE_FIXED
|
224
|
+
|| row->pitch_decision == PITCH_CORR_FIXED) {
|
225
|
+
like_votes++;
|
226
|
+
like_stats.add ((inT32) row->fixed_pitch, 1);
|
227
|
+
}
|
228
|
+
else if (row->pitch_decision == PITCH_DEF_PROP)
|
229
|
+
like_votes -= textord_words_veto_power;
|
230
|
+
else if (row->pitch_decision == PITCH_MAYBE_PROP
|
231
|
+
|| row->pitch_decision == PITCH_CORR_PROP)
|
232
|
+
like_votes--;
|
233
|
+
}
|
234
|
+
}
|
235
|
+
else {
|
236
|
+
if (row->pitch_decision == PITCH_DEF_FIXED)
|
237
|
+
other_votes += textord_words_veto_power;
|
238
|
+
else if (row->pitch_decision == PITCH_MAYBE_FIXED
|
239
|
+
|| row->pitch_decision == PITCH_CORR_FIXED)
|
240
|
+
other_votes++;
|
241
|
+
else if (row->pitch_decision == PITCH_DEF_PROP)
|
242
|
+
other_votes -= textord_words_veto_power;
|
243
|
+
else if (row->pitch_decision == PITCH_MAYBE_PROP
|
244
|
+
|| row->pitch_decision == PITCH_CORR_PROP)
|
245
|
+
other_votes--;
|
246
|
+
}
|
247
|
+
row_index++;
|
248
|
+
}
|
249
|
+
block_index++;
|
250
|
+
}
|
251
|
+
if (block_votes > textord_words_veto_power) {
|
252
|
+
bad_row->fixed_pitch = block_stats.ile (0.5);
|
253
|
+
bad_row->pitch_decision = PITCH_CORR_FIXED;
|
254
|
+
}
|
255
|
+
else if (block_votes <= textord_words_veto_power && like_votes > 0) {
|
256
|
+
bad_row->fixed_pitch = like_stats.ile (0.5);
|
257
|
+
bad_row->pitch_decision = PITCH_CORR_FIXED;
|
258
|
+
}
|
259
|
+
else {
|
260
|
+
bad_row->pitch_decision = PITCH_CORR_PROP;
|
261
|
+
#ifndef SECURE_NAMES
|
262
|
+
if (block_votes == 0 && like_votes == 0 && other_votes > 0
|
263
|
+
&& (textord_debug_pitch_test || textord_debug_pitch_metric))
|
264
|
+
tprintf
|
265
|
+
("Warning:row %d of block %d set prop with no like rows against trend\n",
|
266
|
+
row_target, block_target);
|
267
|
+
#endif
|
268
|
+
}
|
269
|
+
}
|
270
|
+
if (textord_debug_pitch_metric) {
|
271
|
+
tprintf (":b_votes=%d:l_votes=%d:o_votes=%d",
|
272
|
+
block_votes, like_votes, other_votes);
|
273
|
+
if (bad_row->pitch_decision == PITCH_CORR_PROP
|
274
|
+
|| bad_row->pitch_decision == PITCH_DEF_PROP) {
|
275
|
+
res_string = bad_block->block->text_region () != NULL ?
|
276
|
+
(bad_block->block->text_region ()->
|
277
|
+
is_prop ()? "CP" : "WP") : "XP";
|
278
|
+
}
|
279
|
+
else {
|
280
|
+
res_string = bad_block->block->text_region () != NULL ?
|
281
|
+
(bad_block->block->text_region ()->
|
282
|
+
is_prop ()? "WF" : "CF") : "XF";
|
283
|
+
}
|
284
|
+
tprintf (":Blk=%d:Row=%d:%c:",
|
285
|
+
block_target, row_target,
|
286
|
+
bad_block->block->text_region () != NULL ?
|
287
|
+
(bad_block->block->text_region ()->
|
288
|
+
is_prop ()? 'P' : 'F') : 'X');
|
289
|
+
tprintf ("x=%g:asc=%g:corr_res=%s\n", bad_row->xheight,
|
290
|
+
bad_row->ascrise, res_string);
|
291
|
+
}
|
292
|
+
if (textord_pitch_cheat && bad_block->block->text_region () != NULL)
|
293
|
+
bad_row->pitch_decision =
|
294
|
+
bad_block->block->text_region ()->
|
295
|
+
is_prop ()? PITCH_CORR_PROP : PITCH_CORR_FIXED;
|
296
|
+
if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
|
297
|
+
if (bad_row->fixed_pitch < textord_min_xheight) {
|
298
|
+
if (block_votes > 0)
|
299
|
+
bad_row->fixed_pitch = block_stats.ile (0.5);
|
300
|
+
else if (block_votes == 0 && like_votes > 0)
|
301
|
+
bad_row->fixed_pitch = like_stats.ile (0.5);
|
302
|
+
else {
|
303
|
+
tprintf
|
304
|
+
("Warning:guessing pitch as xheight on row %d, block %d\n",
|
305
|
+
row_target, block_target);
|
306
|
+
bad_row->fixed_pitch = bad_row->xheight;
|
307
|
+
}
|
308
|
+
}
|
309
|
+
if (bad_row->fixed_pitch < textord_min_xheight)
|
310
|
+
bad_row->fixed_pitch = (float) textord_min_xheight;
|
311
|
+
bad_row->kern_size = bad_row->fixed_pitch / 4;
|
312
|
+
bad_row->min_space = (inT32) (bad_row->fixed_pitch * 0.6);
|
313
|
+
bad_row->max_nonspace = (inT32) (bad_row->fixed_pitch * 0.4);
|
314
|
+
bad_row->space_threshold =
|
315
|
+
(bad_row->min_space + bad_row->max_nonspace) / 2;
|
316
|
+
bad_row->space_size = bad_row->fixed_pitch;
|
317
|
+
if (bad_row->char_cells.empty ())
|
318
|
+
tune_row_pitch (bad_row, &bad_row->projection,
|
319
|
+
bad_row->projection_left, bad_row->projection_right,
|
320
|
+
(bad_row->fixed_pitch +
|
321
|
+
bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
|
322
|
+
sp_sd, mid_cuts, &bad_row->char_cells, FALSE);
|
323
|
+
}
|
324
|
+
else if (bad_row->pitch_decision == PITCH_CORR_PROP
|
325
|
+
|| bad_row->pitch_decision == PITCH_DEF_PROP) {
|
326
|
+
bad_row->fixed_pitch = 0.0f;
|
327
|
+
bad_row->char_cells.clear ();
|
328
|
+
}
|
329
|
+
}
|
330
|
+
|
331
|
+
|
332
|
+
/**********************************************************************
|
333
|
+
* compute_block_pitch
|
334
|
+
*
|
335
|
+
* Decide whether each block is fixed pitch individually.
|
336
|
+
**********************************************************************/
|
337
|
+
|
338
|
+
void compute_block_pitch( //process each block
|
339
|
+
TO_BLOCK *block, //input list
|
340
|
+
FCOORD rotation, //for drawing
|
341
|
+
inT32 block_index, //block number
|
342
|
+
BOOL8 testing_on //correct orientation
|
343
|
+
) {
|
344
|
+
TBOX block_box; //bounding box
|
345
|
+
|
346
|
+
block_box = block->block->bounding_box ();
|
347
|
+
if (testing_on && textord_debug_pitch_test) {
|
348
|
+
tprintf ("Block %d at (%d,%d)->(%d,%d)\n",
|
349
|
+
block_index,
|
350
|
+
block_box.left (), block_box.bottom (),
|
351
|
+
block_box.right (), block_box.top ());
|
352
|
+
}
|
353
|
+
block->min_space = (inT32) floor (block->xheight
|
354
|
+
* textord_words_default_minspace);
|
355
|
+
block->max_nonspace = (inT32) ceil (block->xheight
|
356
|
+
* textord_words_default_nonspace);
|
357
|
+
block->fixed_pitch = 0.0f;
|
358
|
+
block->space_size = (float) block->min_space;
|
359
|
+
block->kern_size = (float) block->max_nonspace;
|
360
|
+
block->pr_nonsp = block->xheight * words_default_prop_nonspace;
|
361
|
+
block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop;
|
362
|
+
if (!block->get_rows ()->empty ()) {
|
363
|
+
ASSERT_HOST (block->xheight > 0);
|
364
|
+
if (textord_repeat_extraction)
|
365
|
+
find_repeated_chars(block, textord_show_initial_words &&testing_on);
|
366
|
+
#ifndef GRAPHICS_DISABLED
|
367
|
+
if (textord_show_initial_words && testing_on)
|
368
|
+
//overlap_picture_ops(TRUE);
|
369
|
+
ScrollView::Update();
|
370
|
+
#endif
|
371
|
+
compute_rows_pitch(block,
|
372
|
+
block_index,
|
373
|
+
textord_debug_pitch_test &&testing_on);
|
374
|
+
}
|
375
|
+
}
|
376
|
+
|
377
|
+
|
378
|
+
/**********************************************************************
|
379
|
+
* compute_rows_pitch
|
380
|
+
*
|
381
|
+
* Decide whether each row is fixed pitch individually.
|
382
|
+
**********************************************************************/
|
383
|
+
|
384
|
+
BOOL8 compute_rows_pitch( //find line stats
|
385
|
+
TO_BLOCK *block, //block to do
|
386
|
+
inT32 block_index, //block number
|
387
|
+
BOOL8 testing_on //correct orientation
|
388
|
+
) {
|
389
|
+
inT32 maxwidth; //of spaces
|
390
|
+
TO_ROW *row; //current row
|
391
|
+
inT32 row_index; //row number.
|
392
|
+
float lower, upper; //cluster thresholds
|
393
|
+
TO_ROW_IT row_it = block->get_rows ();
|
394
|
+
|
395
|
+
row_index = 1;
|
396
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
397
|
+
row = row_it.data ();
|
398
|
+
ASSERT_HOST (row->xheight > 0);
|
399
|
+
row->compute_vertical_projection ();
|
400
|
+
maxwidth = (inT32) ceil (row->xheight * textord_words_maxspace);
|
401
|
+
if (row_pitch_stats (row, maxwidth, testing_on)
|
402
|
+
&& find_row_pitch (row, maxwidth,
|
403
|
+
textord_dotmatrix_gap + 1, block, block_index,
|
404
|
+
row_index, testing_on)) {
|
405
|
+
if (row->fixed_pitch == 0) {
|
406
|
+
lower = row->pr_nonsp;
|
407
|
+
upper = row->pr_space;
|
408
|
+
row->space_size = upper;
|
409
|
+
row->kern_size = lower;
|
410
|
+
}
|
411
|
+
}
|
412
|
+
else {
|
413
|
+
row->fixed_pitch = 0.0f; //insufficient data
|
414
|
+
row->pitch_decision = PITCH_DUNNO;
|
415
|
+
}
|
416
|
+
row_index++;
|
417
|
+
}
|
418
|
+
return FALSE;
|
419
|
+
}
|
420
|
+
|
421
|
+
|
422
|
+
/**********************************************************************
|
423
|
+
* try_doc_fixed
|
424
|
+
*
|
425
|
+
* Attempt to call the entire document fixed pitch.
|
426
|
+
**********************************************************************/
|
427
|
+
|
428
|
+
BOOL8 try_doc_fixed( //determine pitch
|
429
|
+
ICOORD page_tr, //top right
|
430
|
+
TO_BLOCK_LIST *port_blocks, //input list
|
431
|
+
float gradient //page skew
|
432
|
+
) {
|
433
|
+
inT16 master_x; //uniform shifts
|
434
|
+
inT16 pitch; //median pitch.
|
435
|
+
int x; //profile coord
|
436
|
+
int prop_blocks; //correct counts
|
437
|
+
int fixed_blocks;
|
438
|
+
int total_row_count; //total in page
|
439
|
+
//iterator
|
440
|
+
TO_BLOCK_IT block_it = port_blocks;
|
441
|
+
TO_BLOCK *block; //current block;
|
442
|
+
TO_ROW_IT row_it; //row iterator
|
443
|
+
TO_ROW *row; //current row
|
444
|
+
inT16 projection_left; //edges
|
445
|
+
inT16 projection_right;
|
446
|
+
inT16 row_left; //edges of row
|
447
|
+
inT16 row_right;
|
448
|
+
ICOORDELT_LIST *master_cells; //cells for page
|
449
|
+
float master_y; //uniform shifts
|
450
|
+
float shift_factor; //page skew correction
|
451
|
+
float row_shift; //shift for row
|
452
|
+
float final_pitch; //output pitch
|
453
|
+
float row_y; //baseline
|
454
|
+
STATS projection; //entire page
|
455
|
+
STATS pitches (0, MAX_ALLOWED_PITCH);
|
456
|
+
//for median
|
457
|
+
float sp_sd; //space sd
|
458
|
+
inT16 mid_cuts; //no of cheap cuts
|
459
|
+
float pitch_sd; //sync rating
|
460
|
+
|
461
|
+
if (block_it.empty ()
|
462
|
+
// || block_it.data()==block_it.data_relative(1)
|
463
|
+
|| !textord_blockndoc_fixed)
|
464
|
+
return FALSE;
|
465
|
+
shift_factor = gradient / (gradient * gradient + 1);
|
466
|
+
row_it.set_to_list (block_it.data ()->get_rows ());
|
467
|
+
master_x = row_it.data ()->projection_left;
|
468
|
+
master_y = row_it.data ()->baseline.y (master_x);
|
469
|
+
projection_left = MAX_INT16;
|
470
|
+
projection_right = -MAX_INT16;
|
471
|
+
prop_blocks = 0;
|
472
|
+
fixed_blocks = 0;
|
473
|
+
total_row_count = 0;
|
474
|
+
|
475
|
+
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
476
|
+
block_it.forward ()) {
|
477
|
+
block = block_it.data ();
|
478
|
+
if (block->block->text_region () != NULL) {
|
479
|
+
if (block->block->text_region ()->is_prop ())
|
480
|
+
prop_blocks++;
|
481
|
+
else
|
482
|
+
fixed_blocks++;
|
483
|
+
}
|
484
|
+
row_it.set_to_list (block->get_rows ());
|
485
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
486
|
+
row = row_it.data ();
|
487
|
+
total_row_count++;
|
488
|
+
if (row->fixed_pitch > 0)
|
489
|
+
pitches.add ((inT32) (row->fixed_pitch), 1);
|
490
|
+
//find median
|
491
|
+
row_y = row->baseline.y (master_x);
|
492
|
+
row_left =
|
493
|
+
(inT16) (row->projection_left -
|
494
|
+
shift_factor * (master_y - row_y));
|
495
|
+
row_right =
|
496
|
+
(inT16) (row->projection_right -
|
497
|
+
shift_factor * (master_y - row_y));
|
498
|
+
if (row_left < projection_left)
|
499
|
+
projection_left = row_left;
|
500
|
+
if (row_right > projection_right)
|
501
|
+
projection_right = row_right;
|
502
|
+
}
|
503
|
+
}
|
504
|
+
if (pitches.get_total () == 0)
|
505
|
+
return FALSE;
|
506
|
+
projection.set_range (projection_left, projection_right);
|
507
|
+
|
508
|
+
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
509
|
+
block_it.forward ()) {
|
510
|
+
block = block_it.data ();
|
511
|
+
row_it.set_to_list (block->get_rows ());
|
512
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
513
|
+
row = row_it.data ();
|
514
|
+
row_y = row->baseline.y (master_x);
|
515
|
+
row_left =
|
516
|
+
(inT16) (row->projection_left -
|
517
|
+
shift_factor * (master_y - row_y));
|
518
|
+
for (x = row->projection_left; x < row->projection_right;
|
519
|
+
x++, row_left++) {
|
520
|
+
projection.add (row_left, row->projection.pile_count (x));
|
521
|
+
}
|
522
|
+
}
|
523
|
+
}
|
524
|
+
|
525
|
+
row_it.set_to_list (block_it.data ()->get_rows ());
|
526
|
+
row = row_it.data ();
|
527
|
+
#ifndef GRAPHICS_DISABLED
|
528
|
+
if (textord_show_page_cuts && to_win != NULL)
|
529
|
+
projection.plot (to_win, projection_left,
|
530
|
+
row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
|
531
|
+
#endif
|
532
|
+
final_pitch = pitches.ile (0.5);
|
533
|
+
pitch = (inT16) final_pitch;
|
534
|
+
pitch_sd =
|
535
|
+
tune_row_pitch (row, &projection, projection_left, projection_right,
|
536
|
+
pitch * 0.75, final_pitch, sp_sd, mid_cuts,
|
537
|
+
&row->char_cells, FALSE);
|
538
|
+
|
539
|
+
if (textord_debug_pitch_metric)
|
540
|
+
tprintf
|
541
|
+
("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
|
542
|
+
prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
|
543
|
+
pitch_sd / total_row_count, pitch_sd / pitch,
|
544
|
+
pitch_sd / total_row_count / pitch);
|
545
|
+
|
546
|
+
#ifndef GRAPHICS_DISABLED
|
547
|
+
if (textord_show_page_cuts && to_win != NULL) {
|
548
|
+
master_cells = &row->char_cells;
|
549
|
+
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
550
|
+
block_it.forward ()) {
|
551
|
+
block = block_it.data ();
|
552
|
+
row_it.set_to_list (block->get_rows ());
|
553
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
|
554
|
+
row_it.forward ()) {
|
555
|
+
row = row_it.data ();
|
556
|
+
row_y = row->baseline.y (master_x);
|
557
|
+
row_shift = shift_factor * (master_y - row_y);
|
558
|
+
plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
|
559
|
+
}
|
560
|
+
}
|
561
|
+
}
|
562
|
+
#endif
|
563
|
+
row->char_cells.clear ();
|
564
|
+
return FALSE;
|
565
|
+
}
|
566
|
+
|
567
|
+
|
568
|
+
/**********************************************************************
|
569
|
+
* try_block_fixed
|
570
|
+
*
|
571
|
+
* Try to call the entire block fixed.
|
572
|
+
**********************************************************************/
|
573
|
+
|
574
|
+
BOOL8 try_block_fixed( //find line stats
|
575
|
+
TO_BLOCK *block, //block to do
|
576
|
+
inT32 block_index //block number
|
577
|
+
) {
|
578
|
+
return FALSE;
|
579
|
+
}
|
580
|
+
|
581
|
+
|
582
|
+
/**********************************************************************
|
583
|
+
* try_rows_fixed
|
584
|
+
*
|
585
|
+
* Decide whether each row is fixed pitch individually.
|
586
|
+
**********************************************************************/
|
587
|
+
|
588
|
+
BOOL8 try_rows_fixed( //find line stats
|
589
|
+
TO_BLOCK *block, //block to do
|
590
|
+
inT32 block_index, //block number
|
591
|
+
BOOL8 testing_on //correct orientation
|
592
|
+
) {
|
593
|
+
inT32 maxwidth; //of spaces
|
594
|
+
TO_ROW *row; //current row
|
595
|
+
inT32 row_index; //row number.
|
596
|
+
inT32 def_fixed = 0; //counters
|
597
|
+
inT32 def_prop = 0;
|
598
|
+
inT32 maybe_fixed = 0;
|
599
|
+
inT32 maybe_prop = 0;
|
600
|
+
inT32 dunno = 0;
|
601
|
+
inT32 corr_fixed = 0;
|
602
|
+
inT32 corr_prop = 0;
|
603
|
+
float lower, upper; //cluster thresholds
|
604
|
+
TO_ROW_IT row_it = block->get_rows ();
|
605
|
+
|
606
|
+
row_index = 1;
|
607
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
608
|
+
row = row_it.data ();
|
609
|
+
ASSERT_HOST (row->xheight > 0);
|
610
|
+
maxwidth = (inT32) ceil (row->xheight * textord_words_maxspace);
|
611
|
+
if (row->fixed_pitch > 0 && fixed_pitch_row (row, block_index)) {
|
612
|
+
if (row->fixed_pitch == 0) {
|
613
|
+
lower = row->pr_nonsp;
|
614
|
+
upper = row->pr_space;
|
615
|
+
row->space_size = upper;
|
616
|
+
row->kern_size = lower;
|
617
|
+
}
|
618
|
+
}
|
619
|
+
row_index++;
|
620
|
+
}
|
621
|
+
count_block_votes(block,
|
622
|
+
def_fixed,
|
623
|
+
def_prop,
|
624
|
+
maybe_fixed,
|
625
|
+
maybe_prop,
|
626
|
+
corr_fixed,
|
627
|
+
corr_prop,
|
628
|
+
dunno);
|
629
|
+
if (testing_on
|
630
|
+
&& (textord_debug_pitch_test
|
631
|
+
|| textord_blocksall_prop || textord_blocksall_fixed)) {
|
632
|
+
tprintf ("Initially:");
|
633
|
+
print_block_counts(block, block_index);
|
634
|
+
}
|
635
|
+
if (def_fixed > def_prop * textord_words_veto_power)
|
636
|
+
block->pitch_decision = PITCH_DEF_FIXED;
|
637
|
+
else if (def_prop > def_fixed * textord_words_veto_power)
|
638
|
+
block->pitch_decision = PITCH_DEF_PROP;
|
639
|
+
else if (def_fixed > 0 || def_prop > 0)
|
640
|
+
block->pitch_decision = PITCH_DUNNO;
|
641
|
+
else if (maybe_fixed > maybe_prop * textord_words_veto_power)
|
642
|
+
block->pitch_decision = PITCH_MAYBE_FIXED;
|
643
|
+
else if (maybe_prop > maybe_fixed * textord_words_veto_power)
|
644
|
+
block->pitch_decision = PITCH_MAYBE_PROP;
|
645
|
+
else
|
646
|
+
block->pitch_decision = PITCH_DUNNO;
|
647
|
+
return FALSE;
|
648
|
+
}
|
649
|
+
|
650
|
+
|
651
|
+
/**********************************************************************
|
652
|
+
* print_block_counts
|
653
|
+
*
|
654
|
+
* Count up how many rows have what decision and print the results.
|
655
|
+
**********************************************************************/
|
656
|
+
|
657
|
+
void print_block_counts( //find line stats
|
658
|
+
TO_BLOCK *block, //block to do
|
659
|
+
inT32 block_index //block number
|
660
|
+
) {
|
661
|
+
inT32 def_fixed = 0; //counters
|
662
|
+
inT32 def_prop = 0;
|
663
|
+
inT32 maybe_fixed = 0;
|
664
|
+
inT32 maybe_prop = 0;
|
665
|
+
inT32 dunno = 0;
|
666
|
+
inT32 corr_fixed = 0;
|
667
|
+
inT32 corr_prop = 0;
|
668
|
+
|
669
|
+
count_block_votes(block,
|
670
|
+
def_fixed,
|
671
|
+
def_prop,
|
672
|
+
maybe_fixed,
|
673
|
+
maybe_prop,
|
674
|
+
corr_fixed,
|
675
|
+
corr_prop,
|
676
|
+
dunno);
|
677
|
+
tprintf ("Block %d has (%d,%d,%d)",
|
678
|
+
block_index, def_fixed, maybe_fixed, corr_fixed);
|
679
|
+
if ((textord_blocksall_prop
|
680
|
+
|| (block->block->text_region () != NULL
|
681
|
+
&& block->block->text_region ()->is_prop ())) && (def_fixed
|
682
|
+
|| maybe_fixed
|
683
|
+
|| corr_fixed))
|
684
|
+
tprintf (" (Wrongly)");
|
685
|
+
tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
|
686
|
+
if ((textord_blocksall_fixed
|
687
|
+
|| (block->block->text_region () != NULL
|
688
|
+
&& !block->block->text_region ()->is_prop ())) && (def_prop
|
689
|
+
|| maybe_prop
|
690
|
+
|| corr_prop))
|
691
|
+
tprintf (" (Wrongly)");
|
692
|
+
tprintf (" prop, %d dunno\n", dunno);
|
693
|
+
}
|
694
|
+
|
695
|
+
|
696
|
+
/**********************************************************************
|
697
|
+
* count_block_votes
|
698
|
+
*
|
699
|
+
* Count the number of rows in the block with each kind of pitch_decision.
|
700
|
+
**********************************************************************/
|
701
|
+
|
702
|
+
void count_block_votes( //find line stats
|
703
|
+
TO_BLOCK *block, //block to do
|
704
|
+
inT32 &def_fixed, //add to counts
|
705
|
+
inT32 &def_prop,
|
706
|
+
inT32 &maybe_fixed,
|
707
|
+
inT32 &maybe_prop,
|
708
|
+
inT32 &corr_fixed,
|
709
|
+
inT32 &corr_prop,
|
710
|
+
inT32 &dunno) {
|
711
|
+
TO_ROW *row; //current row
|
712
|
+
TO_ROW_IT row_it = block->get_rows ();
|
713
|
+
|
714
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
715
|
+
row = row_it.data ();
|
716
|
+
switch (row->pitch_decision) {
|
717
|
+
case PITCH_DUNNO:
|
718
|
+
dunno++;
|
719
|
+
break;
|
720
|
+
case PITCH_DEF_PROP:
|
721
|
+
def_prop++;
|
722
|
+
break;
|
723
|
+
case PITCH_MAYBE_PROP:
|
724
|
+
maybe_prop++;
|
725
|
+
break;
|
726
|
+
case PITCH_DEF_FIXED:
|
727
|
+
def_fixed++;
|
728
|
+
break;
|
729
|
+
case PITCH_MAYBE_FIXED:
|
730
|
+
maybe_fixed++;
|
731
|
+
break;
|
732
|
+
case PITCH_CORR_PROP:
|
733
|
+
corr_prop++;
|
734
|
+
break;
|
735
|
+
case PITCH_CORR_FIXED:
|
736
|
+
corr_fixed++;
|
737
|
+
break;
|
738
|
+
}
|
739
|
+
}
|
740
|
+
}
|
741
|
+
|
742
|
+
|
743
|
+
/**********************************************************************
|
744
|
+
* row_pitch_stats
|
745
|
+
*
|
746
|
+
* Decide whether each row is fixed pitch individually.
|
747
|
+
**********************************************************************/
|
748
|
+
|
749
|
+
BOOL8 row_pitch_stats( //find line stats
|
750
|
+
TO_ROW *row, //current row
|
751
|
+
inT32 maxwidth, //of spaces
|
752
|
+
BOOL8 testing_on //correct orientation
|
753
|
+
) {
|
754
|
+
BLOBNBOX *blob; //current blob
|
755
|
+
int gap_index; //current gap
|
756
|
+
inT32 prev_x; //end of prev blob
|
757
|
+
inT32 cluster_count; //no of clusters
|
758
|
+
inT32 prev_count; //of clusters
|
759
|
+
inT32 smooth_factor; //for smoothing stats
|
760
|
+
TBOX blob_box; //bounding box
|
761
|
+
float lower, upper; //cluster thresholds
|
762
|
+
//gap sizes
|
763
|
+
float gaps[BLOCK_STATS_CLUSTERS];
|
764
|
+
//blobs
|
765
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
766
|
+
STATS gap_stats (0, maxwidth);
|
767
|
+
STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
|
768
|
+
//clusters
|
769
|
+
|
770
|
+
smooth_factor =
|
771
|
+
(inT32) (row->xheight * textord_wordstats_smooth_factor + 1.5);
|
772
|
+
if (!blob_it.empty ()) {
|
773
|
+
prev_x = blob_it.data ()->bounding_box ().right ();
|
774
|
+
blob_it.forward ();
|
775
|
+
while (!blob_it.at_first ()) {
|
776
|
+
blob = blob_it.data ();
|
777
|
+
if (!blob->joined_to_prev ()) {
|
778
|
+
blob_box = blob->bounding_box ();
|
779
|
+
if (blob_box.left () - prev_x < maxwidth)
|
780
|
+
gap_stats.add (blob_box.left () - prev_x, 1);
|
781
|
+
prev_x = blob_box.right ();
|
782
|
+
}
|
783
|
+
blob_it.forward ();
|
784
|
+
}
|
785
|
+
}
|
786
|
+
if (gap_stats.get_total () == 0) {
|
787
|
+
return FALSE;
|
788
|
+
}
|
789
|
+
cluster_count = 0;
|
790
|
+
lower = row->xheight * words_initial_lower;
|
791
|
+
upper = row->xheight * words_initial_upper;
|
792
|
+
gap_stats.smooth (smooth_factor);
|
793
|
+
do {
|
794
|
+
prev_count = cluster_count;
|
795
|
+
cluster_count = gap_stats.cluster (lower, upper,
|
796
|
+
textord_spacesize_ratioprop,
|
797
|
+
BLOCK_STATS_CLUSTERS, cluster_stats);
|
798
|
+
}
|
799
|
+
while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
|
800
|
+
if (cluster_count < 1) {
|
801
|
+
return FALSE;
|
802
|
+
}
|
803
|
+
for (gap_index = 0; gap_index < cluster_count; gap_index++)
|
804
|
+
gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
|
805
|
+
//get medians
|
806
|
+
if (testing_on) {
|
807
|
+
tprintf ("cluster_count=%d:", cluster_count);
|
808
|
+
for (gap_index = 0; gap_index < cluster_count; gap_index++)
|
809
|
+
tprintf (" %g(%d)", gaps[gap_index],
|
810
|
+
cluster_stats[gap_index + 1].get_total ());
|
811
|
+
tprintf ("\n");
|
812
|
+
}
|
813
|
+
qsort (gaps, cluster_count, sizeof (float), sort_floats2);
|
814
|
+
|
815
|
+
//Try to find proportional non-space and space for row.
|
816
|
+
lower = row->xheight * words_default_prop_nonspace;
|
817
|
+
upper = row->xheight * textord_words_min_minspace;
|
818
|
+
for (gap_index = 0; gap_index < cluster_count
|
819
|
+
&& gaps[gap_index] < lower; gap_index++);
|
820
|
+
if (gap_index == 0) {
|
821
|
+
if (testing_on)
|
822
|
+
tprintf ("No clusters below nonspace threshold!!\n");
|
823
|
+
if (cluster_count > 1) {
|
824
|
+
row->pr_nonsp = gaps[0];
|
825
|
+
row->pr_space = gaps[1];
|
826
|
+
}
|
827
|
+
else {
|
828
|
+
row->pr_nonsp = lower;
|
829
|
+
row->pr_space = gaps[0];
|
830
|
+
}
|
831
|
+
}
|
832
|
+
else {
|
833
|
+
row->pr_nonsp = gaps[gap_index - 1];
|
834
|
+
while (gap_index < cluster_count && gaps[gap_index] < upper)
|
835
|
+
gap_index++;
|
836
|
+
if (gap_index == cluster_count) {
|
837
|
+
if (testing_on)
|
838
|
+
tprintf ("No clusters above nonspace threshold!!\n");
|
839
|
+
row->pr_space = lower * textord_spacesize_ratioprop;
|
840
|
+
}
|
841
|
+
else
|
842
|
+
row->pr_space = gaps[gap_index];
|
843
|
+
}
|
844
|
+
|
845
|
+
//Now try to find the fixed pitch space and non-space.
|
846
|
+
upper = row->xheight * words_default_fixed_space;
|
847
|
+
for (gap_index = 0; gap_index < cluster_count
|
848
|
+
&& gaps[gap_index] < upper; gap_index++);
|
849
|
+
if (gap_index == 0) {
|
850
|
+
if (testing_on)
|
851
|
+
tprintf ("No clusters below space threshold!!\n");
|
852
|
+
row->fp_nonsp = upper;
|
853
|
+
row->fp_space = gaps[0];
|
854
|
+
}
|
855
|
+
else {
|
856
|
+
row->fp_nonsp = gaps[gap_index - 1];
|
857
|
+
if (gap_index == cluster_count) {
|
858
|
+
if (testing_on)
|
859
|
+
tprintf ("No clusters above space threshold!!\n");
|
860
|
+
row->fp_space = row->xheight;
|
861
|
+
}
|
862
|
+
else
|
863
|
+
row->fp_space = gaps[gap_index];
|
864
|
+
}
|
865
|
+
if (testing_on) {
|
866
|
+
tprintf
|
867
|
+
("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
|
868
|
+
row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
|
869
|
+
}
|
870
|
+
return TRUE; //computed some stats
|
871
|
+
}
|
872
|
+
|
873
|
+
|
874
|
+
/**********************************************************************
|
875
|
+
* find_row_pitch
|
876
|
+
*
|
877
|
+
* Check to see if this row could be fixed pitch using the given spacings.
|
878
|
+
* Blobs with gaps smaller than the lower threshold are assumed to be one.
|
879
|
+
* The larger threshold is the word gap threshold.
|
880
|
+
**********************************************************************/
|
881
|
+
|
882
|
+
BOOL8 find_row_pitch( //find lines
|
883
|
+
TO_ROW *row, //row to do
|
884
|
+
inT32 maxwidth, //max permitted space
|
885
|
+
inT32 dm_gap, //ignorable gaps
|
886
|
+
TO_BLOCK *block, //block of row
|
887
|
+
inT32 block_index, //block_number
|
888
|
+
inT32 row_index, //number of row
|
889
|
+
BOOL8 testing_on //correct orientation
|
890
|
+
) {
|
891
|
+
BOOL8 used_dm_model; //looks lik dot matrix
|
892
|
+
float min_space; //estimate threshold
|
893
|
+
float non_space; //gap size
|
894
|
+
float gap_iqr; //interquartile range
|
895
|
+
float pitch_iqr;
|
896
|
+
float dm_gap_iqr; //interquartile range
|
897
|
+
float dm_pitch_iqr;
|
898
|
+
float dm_pitch; //pitch with dm on
|
899
|
+
float pitch; //revised estimate
|
900
|
+
float initial_pitch; //guess at pitch
|
901
|
+
STATS gap_stats (0, maxwidth);
|
902
|
+
//centre-centre
|
903
|
+
STATS pitch_stats (0, maxwidth);
|
904
|
+
|
905
|
+
row->fixed_pitch = 0.0f;
|
906
|
+
initial_pitch = row->fp_space;
|
907
|
+
if (initial_pitch > row->xheight * (1 + words_default_fixed_limit))
|
908
|
+
initial_pitch = row->xheight;//keep pitch decent
|
909
|
+
non_space = row->fp_nonsp;
|
910
|
+
if (non_space > initial_pitch)
|
911
|
+
non_space = initial_pitch;
|
912
|
+
min_space = (initial_pitch + non_space) / 2;
|
913
|
+
|
914
|
+
if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
|
915
|
+
initial_pitch, min_space, TRUE, FALSE, dm_gap)) {
|
916
|
+
dm_gap_iqr = 0.0001;
|
917
|
+
dm_pitch_iqr = maxwidth * 2.0f;
|
918
|
+
dm_pitch = initial_pitch;
|
919
|
+
}
|
920
|
+
else {
|
921
|
+
dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
|
922
|
+
dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
|
923
|
+
dm_pitch = pitch_stats.ile (0.5);
|
924
|
+
}
|
925
|
+
gap_stats.clear ();
|
926
|
+
pitch_stats.clear ();
|
927
|
+
if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
|
928
|
+
initial_pitch, min_space, TRUE, FALSE, 0)) {
|
929
|
+
gap_iqr = 0.0001;
|
930
|
+
pitch_iqr = maxwidth * 3.0f;
|
931
|
+
}
|
932
|
+
else {
|
933
|
+
gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
|
934
|
+
pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
|
935
|
+
if (testing_on)
|
936
|
+
tprintf
|
937
|
+
("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
|
938
|
+
initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
|
939
|
+
initial_pitch = pitch_stats.ile (0.5);
|
940
|
+
if (min_space > initial_pitch
|
941
|
+
&& count_pitch_stats (row, &gap_stats, &pitch_stats,
|
942
|
+
initial_pitch, initial_pitch, TRUE, FALSE, 0)) {
|
943
|
+
min_space = initial_pitch;
|
944
|
+
gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
|
945
|
+
pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
|
946
|
+
if (testing_on)
|
947
|
+
tprintf
|
948
|
+
("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
|
949
|
+
initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
|
950
|
+
initial_pitch = pitch_stats.ile (0.5);
|
951
|
+
}
|
952
|
+
}
|
953
|
+
if (textord_debug_pitch_metric)
|
954
|
+
tprintf ("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
|
955
|
+
block_index, row_index,
|
956
|
+
block->block->text_region () != NULL ?
|
957
|
+
(block->block->text_region ()->is_prop ()? 'P' : 'F') : 'X',
|
958
|
+
pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
|
959
|
+
pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D'
|
960
|
+
: (pitch_iqr * dm_gap_iqr <=
|
961
|
+
dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
|
962
|
+
if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
|
963
|
+
row->pitch_decision = PITCH_DUNNO;
|
964
|
+
if (textord_debug_pitch_metric)
|
965
|
+
tprintf ("\n");
|
966
|
+
return FALSE; //insufficient data
|
967
|
+
}
|
968
|
+
if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
|
969
|
+
if (testing_on)
|
970
|
+
tprintf
|
971
|
+
("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
|
972
|
+
pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
|
973
|
+
gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
|
974
|
+
pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
|
975
|
+
pitch = pitch_stats.ile (0.5);
|
976
|
+
used_dm_model = FALSE;
|
977
|
+
}
|
978
|
+
else {
|
979
|
+
if (testing_on)
|
980
|
+
tprintf
|
981
|
+
("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
|
982
|
+
pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
|
983
|
+
gap_iqr = dm_gap_iqr;
|
984
|
+
pitch_iqr = dm_pitch_iqr;
|
985
|
+
pitch = dm_pitch;
|
986
|
+
used_dm_model = TRUE;
|
987
|
+
}
|
988
|
+
if (textord_debug_pitch_metric) {
|
989
|
+
tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
|
990
|
+
pitch_iqr, gap_iqr, pitch);
|
991
|
+
tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
|
992
|
+
pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
|
993
|
+
pitch_iqr < gap_iqr * textord_fpiqr_ratio
|
994
|
+
&& pitch_iqr < block->xheight * textord_max_pitch_iqr
|
995
|
+
&& pitch < block->xheight * textord_words_default_maxspace
|
996
|
+
? 'F' : 'P');
|
997
|
+
}
|
998
|
+
if (pitch_iqr < gap_iqr * textord_fpiqr_ratio
|
999
|
+
&& pitch_iqr < block->xheight * textord_max_pitch_iqr
|
1000
|
+
&& pitch < block->xheight * textord_words_default_maxspace)
|
1001
|
+
row->pitch_decision = PITCH_MAYBE_FIXED;
|
1002
|
+
else
|
1003
|
+
row->pitch_decision = PITCH_MAYBE_PROP;
|
1004
|
+
row->fixed_pitch = pitch;
|
1005
|
+
row->kern_size = gap_stats.ile (0.5);
|
1006
|
+
row->min_space = (inT32) (row->fixed_pitch + non_space) / 2;
|
1007
|
+
if (row->min_space > row->fixed_pitch)
|
1008
|
+
row->min_space = (inT32) row->fixed_pitch;
|
1009
|
+
row->max_nonspace = row->min_space;
|
1010
|
+
row->space_size = row->fixed_pitch;
|
1011
|
+
row->space_threshold = (row->max_nonspace + row->min_space) / 2;
|
1012
|
+
row->used_dm_model = used_dm_model;
|
1013
|
+
return TRUE;
|
1014
|
+
}
|
1015
|
+
|
1016
|
+
|
1017
|
+
/**********************************************************************
|
1018
|
+
* fixed_pitch_row
|
1019
|
+
*
|
1020
|
+
* Check to see if this row could be fixed pitch using the given spacings.
|
1021
|
+
* Blobs with gaps smaller than the lower threshold are assumed to be one.
|
1022
|
+
* The larger threshold is the word gap threshold.
|
1023
|
+
**********************************************************************/
|
1024
|
+
|
1025
|
+
BOOL8 fixed_pitch_row( //find lines
|
1026
|
+
TO_ROW *row, //row to do
|
1027
|
+
inT32 block_index //block_number
|
1028
|
+
) {
|
1029
|
+
const char *res_string; //pitch result
|
1030
|
+
inT16 mid_cuts; //no of cheap cuts
|
1031
|
+
float non_space; //gap size
|
1032
|
+
float pitch_sd; //error on pitch
|
1033
|
+
float sp_sd; //space sd
|
1034
|
+
|
1035
|
+
non_space = row->fp_nonsp;
|
1036
|
+
if (non_space > row->fixed_pitch)
|
1037
|
+
non_space = row->fixed_pitch;
|
1038
|
+
if (textord_all_prop) {
|
1039
|
+
// Set the decision to definitely proportional.
|
1040
|
+
pitch_sd = textord_words_def_prop * row->fixed_pitch;
|
1041
|
+
row->pitch_decision = PITCH_DEF_PROP;
|
1042
|
+
} else {
|
1043
|
+
pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left,
|
1044
|
+
row->projection_right,
|
1045
|
+
(row->fixed_pitch + non_space * 3) / 4,
|
1046
|
+
row->fixed_pitch, sp_sd, mid_cuts,
|
1047
|
+
&row->char_cells,
|
1048
|
+
block_index == textord_debug_block);
|
1049
|
+
if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
|
1050
|
+
&& ((pitsync_linear_version & 3) < 3
|
1051
|
+
|| ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model
|
1052
|
+
|| sp_sd > 20
|
1053
|
+
|| (pitch_sd == 0 && sp_sd > 10))))) {
|
1054
|
+
if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
|
1055
|
+
&& !row->all_caps
|
1056
|
+
&& ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
|
1057
|
+
row->pitch_decision = PITCH_DEF_FIXED;
|
1058
|
+
else
|
1059
|
+
row->pitch_decision = PITCH_MAYBE_FIXED;
|
1060
|
+
}
|
1061
|
+
else if ((pitsync_linear_version & 3) < 3
|
1062
|
+
|| sp_sd > 20
|
1063
|
+
|| mid_cuts > 0
|
1064
|
+
|| pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
|
1065
|
+
if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
|
1066
|
+
row->pitch_decision = PITCH_MAYBE_PROP;
|
1067
|
+
else
|
1068
|
+
row->pitch_decision = PITCH_DEF_PROP;
|
1069
|
+
}
|
1070
|
+
else
|
1071
|
+
row->pitch_decision = PITCH_DUNNO;
|
1072
|
+
}
|
1073
|
+
|
1074
|
+
if (textord_debug_pitch_metric) {
|
1075
|
+
res_string = "??";
|
1076
|
+
switch (row->pitch_decision) {
|
1077
|
+
case PITCH_DEF_PROP:
|
1078
|
+
res_string = "DP";
|
1079
|
+
break;
|
1080
|
+
case PITCH_MAYBE_PROP:
|
1081
|
+
res_string = "MP";
|
1082
|
+
break;
|
1083
|
+
case PITCH_DEF_FIXED:
|
1084
|
+
res_string = "DF";
|
1085
|
+
break;
|
1086
|
+
case PITCH_MAYBE_FIXED:
|
1087
|
+
res_string = "MF";
|
1088
|
+
default:
|
1089
|
+
res_string = "??";
|
1090
|
+
}
|
1091
|
+
tprintf (":sd/p=%g:occ=%g:init_res=%s\n",
|
1092
|
+
pitch_sd / row->fixed_pitch, sp_sd, res_string);
|
1093
|
+
}
|
1094
|
+
return TRUE;
|
1095
|
+
}
|
1096
|
+
|
1097
|
+
|
1098
|
+
/**********************************************************************
|
1099
|
+
* count_pitch_stats
|
1100
|
+
*
|
1101
|
+
* Count up the gap and pitch stats on the block to see if it is fixed pitch.
|
1102
|
+
* Blobs with gaps smaller than the lower threshold are assumed to be one.
|
1103
|
+
* The larger threshold is the word gap threshold.
|
1104
|
+
* The return value indicates whether there were any decent values to use.
|
1105
|
+
**********************************************************************/
|
1106
|
+
|
1107
|
+
BOOL8 count_pitch_stats( //find lines
|
1108
|
+
TO_ROW *row, //row to do
|
1109
|
+
STATS *gap_stats, //blob gaps
|
1110
|
+
STATS *pitch_stats, //centre-centre stats
|
1111
|
+
float initial_pitch, //guess at pitch
|
1112
|
+
float min_space, //estimate space size
|
1113
|
+
BOOL8 ignore_outsize, //discard big objects
|
1114
|
+
BOOL8 split_outsize, //split big objects
|
1115
|
+
inT32 dm_gap //ignorable gaps
|
1116
|
+
) {
|
1117
|
+
BOOL8 prev_valid; //not word broken
|
1118
|
+
BLOBNBOX *blob; //current blob
|
1119
|
+
//blobs
|
1120
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
1121
|
+
inT32 prev_right; //end of prev blob
|
1122
|
+
inT32 prev_centre; //centre of previous blob
|
1123
|
+
inT32 x_centre; //centre of this blob
|
1124
|
+
inT32 blob_width; //width of blob
|
1125
|
+
inT32 width_units; //no of widths in blob
|
1126
|
+
float width; //blob width
|
1127
|
+
TBOX blob_box; //bounding box
|
1128
|
+
TBOX joined_box; //of super blob
|
1129
|
+
|
1130
|
+
gap_stats->clear ();
|
1131
|
+
pitch_stats->clear ();
|
1132
|
+
if (blob_it.empty ())
|
1133
|
+
return FALSE;
|
1134
|
+
prev_valid = FALSE;
|
1135
|
+
prev_centre = 0;
|
1136
|
+
prev_right = 0; //stop complier warning
|
1137
|
+
joined_box = blob_it.data ()->bounding_box ();
|
1138
|
+
do {
|
1139
|
+
blob_it.forward ();
|
1140
|
+
blob = blob_it.data ();
|
1141
|
+
if (!blob->joined_to_prev ()) {
|
1142
|
+
blob_box = blob->bounding_box ();
|
1143
|
+
if ((blob_box.left () - joined_box.right () < dm_gap
|
1144
|
+
&& !blob_it.at_first ())
|
1145
|
+
|| (blob->cblob () == NULL && blob->blob () == NULL))
|
1146
|
+
joined_box += blob_box; //merge blobs
|
1147
|
+
else {
|
1148
|
+
blob_width = joined_box.width ();
|
1149
|
+
if (split_outsize) {
|
1150
|
+
width_units =
|
1151
|
+
(inT32) floor ((float) blob_width / initial_pitch + 0.5);
|
1152
|
+
if (width_units < 1)
|
1153
|
+
width_units = 1;
|
1154
|
+
width_units--;
|
1155
|
+
}
|
1156
|
+
else if (ignore_outsize) {
|
1157
|
+
width = (float) blob_width / initial_pitch;
|
1158
|
+
width_units = width < 1 + words_default_fixed_limit
|
1159
|
+
&& width > 1 - words_default_fixed_limit ? 0 : -1;
|
1160
|
+
}
|
1161
|
+
else
|
1162
|
+
width_units = 0; //everything in
|
1163
|
+
x_centre = (inT32) (joined_box.left ()
|
1164
|
+
+ (blob_width -
|
1165
|
+
width_units * initial_pitch) / 2);
|
1166
|
+
if (prev_valid && width_units >= 0) {
|
1167
|
+
// if (width_units>0)
|
1168
|
+
// {
|
1169
|
+
// tprintf("wu=%d, width=%d, xc=%d, adding %d\n",
|
1170
|
+
// width_units,blob_width,x_centre,x_centre-prev_centre);
|
1171
|
+
// }
|
1172
|
+
gap_stats->add (joined_box.left () - prev_right, 1);
|
1173
|
+
pitch_stats->add (x_centre - prev_centre, 1);
|
1174
|
+
}
|
1175
|
+
prev_centre = (inT32) (x_centre + width_units * initial_pitch);
|
1176
|
+
prev_right = joined_box.right ();
|
1177
|
+
prev_valid = blob_box.left () - joined_box.right () < min_space;
|
1178
|
+
prev_valid = prev_valid && width_units >= 0;
|
1179
|
+
joined_box = blob_box;
|
1180
|
+
}
|
1181
|
+
}
|
1182
|
+
}
|
1183
|
+
while (!blob_it.at_first ());
|
1184
|
+
return gap_stats->get_total () >= 3;
|
1185
|
+
}
|
1186
|
+
|
1187
|
+
|
1188
|
+
/**********************************************************************
|
1189
|
+
* tune_row_pitch
|
1190
|
+
*
|
1191
|
+
* Use a dp algorithm to fit the character cells and return the sd of
|
1192
|
+
* the cell size over the row.
|
1193
|
+
**********************************************************************/
|
1194
|
+
|
1195
|
+
float tune_row_pitch( //find fp cells
|
1196
|
+
TO_ROW *row, //row to do
|
1197
|
+
STATS *projection, //vertical projection
|
1198
|
+
inT16 projection_left, //edge of projection
|
1199
|
+
inT16 projection_right, //edge of projection
|
1200
|
+
float space_size, //size of blank
|
1201
|
+
float &initial_pitch, //guess at pitch
|
1202
|
+
float &best_sp_sd, //space sd
|
1203
|
+
inT16 &best_mid_cuts, //no of cheap cuts
|
1204
|
+
ICOORDELT_LIST *best_cells, //row cells
|
1205
|
+
BOOL8 testing_on //inidividual words
|
1206
|
+
) {
|
1207
|
+
int pitch_delta; //offset pitch
|
1208
|
+
inT16 mid_cuts; //cheap cuts
|
1209
|
+
float pitch_sd; //current sd
|
1210
|
+
float best_sd; //best result
|
1211
|
+
float best_pitch; //pitch for best result
|
1212
|
+
float initial_sd; //starting error
|
1213
|
+
float sp_sd; //space sd
|
1214
|
+
ICOORDELT_LIST test_cells; //row cells
|
1215
|
+
ICOORDELT_IT best_it; //start of best list
|
1216
|
+
|
1217
|
+
if (textord_fast_pitch_test)
|
1218
|
+
return tune_row_pitch2 (row, projection, projection_left,
|
1219
|
+
projection_right, space_size, initial_pitch,
|
1220
|
+
best_sp_sd,
|
1221
|
+
//space sd
|
1222
|
+
best_mid_cuts, best_cells, testing_on);
|
1223
|
+
if (textord_disable_pitch_test) {
|
1224
|
+
best_sp_sd = initial_pitch;
|
1225
|
+
return initial_pitch;
|
1226
|
+
}
|
1227
|
+
initial_sd =
|
1228
|
+
compute_pitch_sd(row,
|
1229
|
+
projection,
|
1230
|
+
projection_left,
|
1231
|
+
projection_right,
|
1232
|
+
space_size,
|
1233
|
+
initial_pitch,
|
1234
|
+
best_sp_sd,
|
1235
|
+
best_mid_cuts,
|
1236
|
+
best_cells,
|
1237
|
+
testing_on);
|
1238
|
+
best_sd = initial_sd;
|
1239
|
+
best_pitch = initial_pitch;
|
1240
|
+
if (testing_on)
|
1241
|
+
tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
|
1242
|
+
for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
|
1243
|
+
pitch_sd =
|
1244
|
+
compute_pitch_sd (row, projection, projection_left, projection_right,
|
1245
|
+
space_size, initial_pitch + pitch_delta, sp_sd,
|
1246
|
+
mid_cuts, &test_cells, testing_on);
|
1247
|
+
if (testing_on)
|
1248
|
+
tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
|
1249
|
+
pitch_sd);
|
1250
|
+
if (pitch_sd < best_sd) {
|
1251
|
+
best_sd = pitch_sd;
|
1252
|
+
best_mid_cuts = mid_cuts;
|
1253
|
+
best_sp_sd = sp_sd;
|
1254
|
+
best_pitch = initial_pitch + pitch_delta;
|
1255
|
+
best_cells->clear ();
|
1256
|
+
best_it.set_to_list (best_cells);
|
1257
|
+
best_it.add_list_after (&test_cells);
|
1258
|
+
}
|
1259
|
+
else
|
1260
|
+
test_cells.clear ();
|
1261
|
+
if (pitch_sd > initial_sd)
|
1262
|
+
break; //getting worse
|
1263
|
+
}
|
1264
|
+
for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
|
1265
|
+
pitch_sd =
|
1266
|
+
compute_pitch_sd (row, projection, projection_left, projection_right,
|
1267
|
+
space_size, initial_pitch - pitch_delta, sp_sd,
|
1268
|
+
mid_cuts, &test_cells, testing_on);
|
1269
|
+
if (testing_on)
|
1270
|
+
tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
|
1271
|
+
pitch_sd);
|
1272
|
+
if (pitch_sd < best_sd) {
|
1273
|
+
best_sd = pitch_sd;
|
1274
|
+
best_mid_cuts = mid_cuts;
|
1275
|
+
best_sp_sd = sp_sd;
|
1276
|
+
best_pitch = initial_pitch - pitch_delta;
|
1277
|
+
best_cells->clear ();
|
1278
|
+
best_it.set_to_list (best_cells);
|
1279
|
+
best_it.add_list_after (&test_cells);
|
1280
|
+
}
|
1281
|
+
else
|
1282
|
+
test_cells.clear ();
|
1283
|
+
if (pitch_sd > initial_sd)
|
1284
|
+
break;
|
1285
|
+
}
|
1286
|
+
initial_pitch = best_pitch;
|
1287
|
+
|
1288
|
+
if (textord_debug_pitch_metric)
|
1289
|
+
print_pitch_sd(row,
|
1290
|
+
projection,
|
1291
|
+
projection_left,
|
1292
|
+
projection_right,
|
1293
|
+
space_size,
|
1294
|
+
best_pitch);
|
1295
|
+
|
1296
|
+
return best_sd;
|
1297
|
+
}
|
1298
|
+
|
1299
|
+
|
1300
|
+
/**********************************************************************
|
1301
|
+
* tune_row_pitch
|
1302
|
+
*
|
1303
|
+
* Use a dp algorithm to fit the character cells and return the sd of
|
1304
|
+
* the cell size over the row.
|
1305
|
+
**********************************************************************/
|
1306
|
+
|
1307
|
+
float tune_row_pitch2( //find fp cells
|
1308
|
+
TO_ROW *row, //row to do
|
1309
|
+
STATS *projection, //vertical projection
|
1310
|
+
inT16 projection_left, //edge of projection
|
1311
|
+
inT16 projection_right, //edge of projection
|
1312
|
+
float space_size, //size of blank
|
1313
|
+
float &initial_pitch, //guess at pitch
|
1314
|
+
float &best_sp_sd, //space sd
|
1315
|
+
inT16 &best_mid_cuts, //no of cheap cuts
|
1316
|
+
ICOORDELT_LIST *best_cells, //row cells
|
1317
|
+
BOOL8 testing_on //inidividual words
|
1318
|
+
) {
|
1319
|
+
int pitch_delta; //offset pitch
|
1320
|
+
inT16 pixel; //pixel coord
|
1321
|
+
inT16 best_pixel; //pixel coord
|
1322
|
+
inT16 best_delta; //best pitch
|
1323
|
+
inT16 best_pitch; //best pitch
|
1324
|
+
inT16 start; //of good range
|
1325
|
+
inT16 end; //of good range
|
1326
|
+
inT32 best_count; //lowest sum
|
1327
|
+
float best_sd; //best result
|
1328
|
+
STATS *sum_proj; //summed projection
|
1329
|
+
|
1330
|
+
best_sp_sd = initial_pitch;
|
1331
|
+
|
1332
|
+
if (textord_disable_pitch_test) {
|
1333
|
+
return initial_pitch;
|
1334
|
+
}
|
1335
|
+
sum_proj = new STATS[textord_pitch_range * 2 + 1];
|
1336
|
+
if (sum_proj == NULL)
|
1337
|
+
return initial_pitch;
|
1338
|
+
best_pitch = (inT32) initial_pitch;
|
1339
|
+
|
1340
|
+
for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
|
1341
|
+
pitch_delta++)
|
1342
|
+
sum_proj[textord_pitch_range + pitch_delta].set_range (0,
|
1343
|
+
best_pitch +
|
1344
|
+
pitch_delta + 1);
|
1345
|
+
for (pixel = projection_left; pixel <= projection_right; pixel++) {
|
1346
|
+
for (pitch_delta = -textord_pitch_range;
|
1347
|
+
pitch_delta <= textord_pitch_range; pitch_delta++)
|
1348
|
+
sum_proj[textord_pitch_range +
|
1349
|
+
pitch_delta].add ((pixel - projection_left) % (best_pitch +
|
1350
|
+
pitch_delta),
|
1351
|
+
projection->pile_count (pixel));
|
1352
|
+
}
|
1353
|
+
best_count = sum_proj[textord_pitch_range].pile_count (0);
|
1354
|
+
best_delta = 0;
|
1355
|
+
best_pixel = 0;
|
1356
|
+
for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
|
1357
|
+
pitch_delta++) {
|
1358
|
+
for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
|
1359
|
+
if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel)
|
1360
|
+
< best_count) {
|
1361
|
+
best_count =
|
1362
|
+
sum_proj[textord_pitch_range +
|
1363
|
+
pitch_delta].pile_count (pixel);
|
1364
|
+
best_delta = pitch_delta;
|
1365
|
+
best_pixel = pixel;
|
1366
|
+
}
|
1367
|
+
}
|
1368
|
+
}
|
1369
|
+
if (testing_on)
|
1370
|
+
tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
|
1371
|
+
initial_pitch, best_delta, best_count);
|
1372
|
+
best_pitch += best_delta;
|
1373
|
+
initial_pitch = best_pitch;
|
1374
|
+
best_count++;
|
1375
|
+
best_count += best_count;
|
1376
|
+
for (start = best_pixel - 2; start > best_pixel - best_pitch
|
1377
|
+
&& sum_proj[textord_pitch_range +
|
1378
|
+
best_delta].pile_count (start % best_pitch) <= best_count;
|
1379
|
+
start--);
|
1380
|
+
for (end = best_pixel + 2;
|
1381
|
+
end < best_pixel + best_pitch
|
1382
|
+
&& sum_proj[textord_pitch_range +
|
1383
|
+
best_delta].pile_count (end % best_pitch) <= best_count;
|
1384
|
+
end++);
|
1385
|
+
|
1386
|
+
best_sd =
|
1387
|
+
compute_pitch_sd(row,
|
1388
|
+
projection,
|
1389
|
+
projection_left,
|
1390
|
+
projection_right,
|
1391
|
+
space_size,
|
1392
|
+
initial_pitch,
|
1393
|
+
best_sp_sd,
|
1394
|
+
best_mid_cuts,
|
1395
|
+
best_cells,
|
1396
|
+
testing_on,
|
1397
|
+
start,
|
1398
|
+
end);
|
1399
|
+
if (testing_on)
|
1400
|
+
tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
|
1401
|
+
best_sd);
|
1402
|
+
|
1403
|
+
if (textord_debug_pitch_metric)
|
1404
|
+
print_pitch_sd(row,
|
1405
|
+
projection,
|
1406
|
+
projection_left,
|
1407
|
+
projection_right,
|
1408
|
+
space_size,
|
1409
|
+
initial_pitch);
|
1410
|
+
|
1411
|
+
delete[]sum_proj;
|
1412
|
+
|
1413
|
+
return best_sd;
|
1414
|
+
}
|
1415
|
+
|
1416
|
+
|
1417
|
+
/**********************************************************************
|
1418
|
+
* compute_pitch_sd
|
1419
|
+
*
|
1420
|
+
* Use a dp algorithm to fit the character cells and return the sd of
|
1421
|
+
* the cell size over the row.
|
1422
|
+
**********************************************************************/
|
1423
|
+
|
1424
|
+
float compute_pitch_sd( //find fp cells
|
1425
|
+
TO_ROW *row, //row to do
|
1426
|
+
STATS *projection, //vertical projection
|
1427
|
+
inT16 projection_left, //edge
|
1428
|
+
inT16 projection_right, //edge
|
1429
|
+
float space_size, //size of blank
|
1430
|
+
float initial_pitch, //guess at pitch
|
1431
|
+
float &sp_sd, //space sd
|
1432
|
+
inT16 &mid_cuts, //no of free cuts
|
1433
|
+
ICOORDELT_LIST *row_cells, //list of chop pts
|
1434
|
+
BOOL8 testing_on, //inidividual words
|
1435
|
+
inT16 start, //start of good range
|
1436
|
+
inT16 end //end of good range
|
1437
|
+
) {
|
1438
|
+
inT16 occupation; //no of cells in word.
|
1439
|
+
//blobs
|
1440
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
1441
|
+
BLOBNBOX_IT start_it; //start of word
|
1442
|
+
BLOBNBOX_IT plot_it; //for plotting
|
1443
|
+
inT16 blob_count; //no of blobs
|
1444
|
+
TBOX blob_box; //bounding box
|
1445
|
+
TBOX prev_box; //of super blob
|
1446
|
+
inT32 prev_right; //of word sync
|
1447
|
+
int scale_factor; //on scores for big words
|
1448
|
+
inT32 sp_count; //spaces
|
1449
|
+
FPSEGPT_LIST seg_list; //char cells
|
1450
|
+
FPSEGPT_IT seg_it; //iterator
|
1451
|
+
inT16 segpos; //position of segment
|
1452
|
+
inT16 cellpos; //previous cell boundary
|
1453
|
+
//iterator
|
1454
|
+
ICOORDELT_IT cell_it = row_cells;
|
1455
|
+
ICOORDELT *cell; //new cell
|
1456
|
+
double sqsum; //sum of squares
|
1457
|
+
double spsum; //of spaces
|
1458
|
+
double sp_var; //space error
|
1459
|
+
double word_sync; //result for word
|
1460
|
+
inT32 total_count; //total blobs
|
1461
|
+
|
1462
|
+
if ((pitsync_linear_version & 3) > 1) {
|
1463
|
+
word_sync = compute_pitch_sd2 (row, projection, projection_left,
|
1464
|
+
projection_right, initial_pitch,
|
1465
|
+
occupation, mid_cuts, row_cells,
|
1466
|
+
testing_on, start, end);
|
1467
|
+
sp_sd = occupation;
|
1468
|
+
return word_sync;
|
1469
|
+
}
|
1470
|
+
mid_cuts = 0;
|
1471
|
+
cellpos = 0;
|
1472
|
+
total_count = 0;
|
1473
|
+
sqsum = 0;
|
1474
|
+
sp_count = 0;
|
1475
|
+
spsum = 0;
|
1476
|
+
prev_right = -1;
|
1477
|
+
if (blob_it.empty ())
|
1478
|
+
return space_size * 10;
|
1479
|
+
#ifndef GRAPHICS_DISABLED
|
1480
|
+
if (testing_on && to_win > 0) {
|
1481
|
+
blob_box = blob_it.data ()->bounding_box ();
|
1482
|
+
projection->plot (to_win, projection_left,
|
1483
|
+
row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
|
1484
|
+
}
|
1485
|
+
#endif
|
1486
|
+
start_it = blob_it;
|
1487
|
+
blob_count = 0;
|
1488
|
+
blob_box = box_next (&blob_it);//first blob
|
1489
|
+
blob_it.mark_cycle_pt ();
|
1490
|
+
do {
|
1491
|
+
for (; blob_count > 0; blob_count--)
|
1492
|
+
box_next(&start_it);
|
1493
|
+
do {
|
1494
|
+
prev_box = blob_box;
|
1495
|
+
blob_count++;
|
1496
|
+
blob_box = box_next (&blob_it);
|
1497
|
+
}
|
1498
|
+
while (!blob_it.cycled_list ()
|
1499
|
+
&& blob_box.left () - prev_box.right () < space_size);
|
1500
|
+
plot_it = start_it;
|
1501
|
+
if (pitsync_linear_version & 3)
|
1502
|
+
word_sync =
|
1503
|
+
check_pitch_sync2 (&start_it, blob_count, (inT16) initial_pitch, 2,
|
1504
|
+
projection, projection_left, projection_right,
|
1505
|
+
row->xheight * textord_projection_scale,
|
1506
|
+
occupation, &seg_list, start, end);
|
1507
|
+
else
|
1508
|
+
word_sync =
|
1509
|
+
check_pitch_sync (&start_it, blob_count, (inT16) initial_pitch, 2,
|
1510
|
+
projection, &seg_list);
|
1511
|
+
if (testing_on) {
|
1512
|
+
tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ",
|
1513
|
+
prev_box.right (), prev_box.top (),
|
1514
|
+
seg_list.length () - 1, word_sync);
|
1515
|
+
seg_it.set_to_list (&seg_list);
|
1516
|
+
for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
|
1517
|
+
seg_it.forward ()) {
|
1518
|
+
if (seg_it.data ()->faked)
|
1519
|
+
tprintf ("(F)");
|
1520
|
+
tprintf ("%d, ", seg_it.data ()->position ());
|
1521
|
+
// tprintf("C=%g, s=%g, sq=%g\n",
|
1522
|
+
// seg_it.data()->cost_function(),
|
1523
|
+
// seg_it.data()->sum(),
|
1524
|
+
// seg_it.data()->squares());
|
1525
|
+
}
|
1526
|
+
tprintf ("\n");
|
1527
|
+
}
|
1528
|
+
#ifndef GRAPHICS_DISABLED
|
1529
|
+
if (textord_show_fixed_cuts && blob_count > 0 && to_win > 0)
|
1530
|
+
plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
|
1531
|
+
#endif
|
1532
|
+
seg_it.set_to_list (&seg_list);
|
1533
|
+
if (prev_right >= 0) {
|
1534
|
+
sp_var = seg_it.data ()->position () - prev_right;
|
1535
|
+
sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
|
1536
|
+
sp_var *= sp_var;
|
1537
|
+
spsum += sp_var;
|
1538
|
+
sp_count++;
|
1539
|
+
}
|
1540
|
+
for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
|
1541
|
+
segpos = seg_it.data ()->position ();
|
1542
|
+
if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
|
1543
|
+
//big gap
|
1544
|
+
while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
|
1545
|
+
cell = new ICOORDELT (cellpos + (inT16) initial_pitch, 0);
|
1546
|
+
cell_it.add_after_then_move (cell);
|
1547
|
+
cellpos += (inT16) initial_pitch;
|
1548
|
+
}
|
1549
|
+
//make new one
|
1550
|
+
cell = new ICOORDELT (segpos, 0);
|
1551
|
+
cell_it.add_after_then_move (cell);
|
1552
|
+
cellpos = segpos;
|
1553
|
+
}
|
1554
|
+
else if (segpos > cellpos - initial_pitch / 2) {
|
1555
|
+
cell = cell_it.data ();
|
1556
|
+
//average positions
|
1557
|
+
cell->set_x ((cellpos + segpos) / 2);
|
1558
|
+
cellpos = cell->x ();
|
1559
|
+
}
|
1560
|
+
}
|
1561
|
+
seg_it.move_to_last ();
|
1562
|
+
prev_right = seg_it.data ()->position ();
|
1563
|
+
if (textord_pitch_scalebigwords) {
|
1564
|
+
scale_factor = (seg_list.length () - 2) / 2;
|
1565
|
+
if (scale_factor < 1)
|
1566
|
+
scale_factor = 1;
|
1567
|
+
}
|
1568
|
+
else
|
1569
|
+
scale_factor = 1;
|
1570
|
+
sqsum += word_sync * scale_factor;
|
1571
|
+
total_count += (seg_list.length () - 1) * scale_factor;
|
1572
|
+
seg_list.clear ();
|
1573
|
+
}
|
1574
|
+
while (!blob_it.cycled_list ());
|
1575
|
+
sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
|
1576
|
+
return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
|
1577
|
+
}
|
1578
|
+
|
1579
|
+
|
1580
|
+
/**********************************************************************
|
1581
|
+
* compute_pitch_sd2
|
1582
|
+
*
|
1583
|
+
* Use a dp algorithm to fit the character cells and return the sd of
|
1584
|
+
* the cell size over the row.
|
1585
|
+
**********************************************************************/
|
1586
|
+
|
1587
|
+
float compute_pitch_sd2( //find fp cells
|
1588
|
+
TO_ROW *row, //row to do
|
1589
|
+
STATS *projection, //vertical projection
|
1590
|
+
inT16 projection_left, //edge
|
1591
|
+
inT16 projection_right, //edge
|
1592
|
+
float initial_pitch, //guess at pitch
|
1593
|
+
inT16 &occupation, //no of occupied cells
|
1594
|
+
inT16 &mid_cuts, //no of free cuts
|
1595
|
+
ICOORDELT_LIST *row_cells, //list of chop pts
|
1596
|
+
BOOL8 testing_on, //inidividual words
|
1597
|
+
inT16 start, //start of good range
|
1598
|
+
inT16 end //end of good range
|
1599
|
+
) {
|
1600
|
+
//blobs
|
1601
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
1602
|
+
BLOBNBOX_IT plot_it;
|
1603
|
+
inT16 blob_count; //no of blobs
|
1604
|
+
TBOX blob_box; //bounding box
|
1605
|
+
FPSEGPT_LIST seg_list; //char cells
|
1606
|
+
FPSEGPT_IT seg_it; //iterator
|
1607
|
+
inT16 segpos; //position of segment
|
1608
|
+
//iterator
|
1609
|
+
ICOORDELT_IT cell_it = row_cells;
|
1610
|
+
ICOORDELT *cell; //new cell
|
1611
|
+
double word_sync; //result for word
|
1612
|
+
|
1613
|
+
mid_cuts = 0;
|
1614
|
+
if (blob_it.empty ()) {
|
1615
|
+
occupation = 0;
|
1616
|
+
return initial_pitch * 10;
|
1617
|
+
}
|
1618
|
+
#ifndef GRAPHICS_DISABLED
|
1619
|
+
if (testing_on && to_win > 0) {
|
1620
|
+
projection->plot (to_win, projection_left,
|
1621
|
+
row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
|
1622
|
+
}
|
1623
|
+
#endif
|
1624
|
+
blob_count = 0;
|
1625
|
+
blob_it.mark_cycle_pt ();
|
1626
|
+
do {
|
1627
|
+
//first blob
|
1628
|
+
blob_box = box_next (&blob_it);
|
1629
|
+
blob_count++;
|
1630
|
+
}
|
1631
|
+
while (!blob_it.cycled_list ());
|
1632
|
+
plot_it = blob_it;
|
1633
|
+
word_sync = check_pitch_sync2 (&blob_it, blob_count, (inT16) initial_pitch,
|
1634
|
+
2, projection, projection_left,
|
1635
|
+
projection_right,
|
1636
|
+
row->xheight * textord_projection_scale,
|
1637
|
+
occupation, &seg_list, start, end);
|
1638
|
+
if (testing_on) {
|
1639
|
+
tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ",
|
1640
|
+
blob_box.right (), blob_box.top (),
|
1641
|
+
seg_list.length () - 1, word_sync);
|
1642
|
+
seg_it.set_to_list (&seg_list);
|
1643
|
+
for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
|
1644
|
+
if (seg_it.data ()->faked)
|
1645
|
+
tprintf ("(F)");
|
1646
|
+
tprintf ("%d, ", seg_it.data ()->position ());
|
1647
|
+
// tprintf("C=%g, s=%g, sq=%g\n",
|
1648
|
+
// seg_it.data()->cost_function(),
|
1649
|
+
// seg_it.data()->sum(),
|
1650
|
+
// seg_it.data()->squares());
|
1651
|
+
}
|
1652
|
+
tprintf ("\n");
|
1653
|
+
}
|
1654
|
+
#ifndef GRAPHICS_DISABLED
|
1655
|
+
if (textord_show_fixed_cuts && blob_count > 0 && to_win > 0)
|
1656
|
+
plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
|
1657
|
+
#endif
|
1658
|
+
seg_it.set_to_list (&seg_list);
|
1659
|
+
for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
|
1660
|
+
segpos = seg_it.data ()->position ();
|
1661
|
+
//make new one
|
1662
|
+
cell = new ICOORDELT (segpos, 0);
|
1663
|
+
cell_it.add_after_then_move (cell);
|
1664
|
+
if (seg_it.at_last ())
|
1665
|
+
mid_cuts = seg_it.data ()->cheap_cuts ();
|
1666
|
+
}
|
1667
|
+
seg_list.clear ();
|
1668
|
+
return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
|
1669
|
+
}
|
1670
|
+
|
1671
|
+
|
1672
|
+
/**********************************************************************
|
1673
|
+
* print_pitch_sd
|
1674
|
+
*
|
1675
|
+
* Use a dp algorithm to fit the character cells and return the sd of
|
1676
|
+
* the cell size over the row.
|
1677
|
+
**********************************************************************/
|
1678
|
+
|
1679
|
+
void print_pitch_sd( //find fp cells
|
1680
|
+
TO_ROW *row, //row to do
|
1681
|
+
STATS *projection, //vertical projection
|
1682
|
+
inT16 projection_left, //edges //size of blank
|
1683
|
+
inT16 projection_right,
|
1684
|
+
float space_size,
|
1685
|
+
float initial_pitch //guess at pitch
|
1686
|
+
) {
|
1687
|
+
const char *res2; //pitch result
|
1688
|
+
inT16 occupation; //used cells
|
1689
|
+
float sp_sd; //space sd
|
1690
|
+
//blobs
|
1691
|
+
BLOBNBOX_IT blob_it = row->blob_list ();
|
1692
|
+
BLOBNBOX_IT start_it; //start of word
|
1693
|
+
BLOBNBOX_IT row_start; //start of row
|
1694
|
+
inT16 blob_count; //no of blobs
|
1695
|
+
inT16 total_blob_count; //total blobs in line
|
1696
|
+
TBOX blob_box; //bounding box
|
1697
|
+
TBOX prev_box; //of super blob
|
1698
|
+
inT32 prev_right; //of word sync
|
1699
|
+
int scale_factor; //on scores for big words
|
1700
|
+
inT32 sp_count; //spaces
|
1701
|
+
FPSEGPT_LIST seg_list; //char cells
|
1702
|
+
FPSEGPT_IT seg_it; //iterator
|
1703
|
+
double sqsum; //sum of squares
|
1704
|
+
double spsum; //of spaces
|
1705
|
+
double sp_var; //space error
|
1706
|
+
double word_sync; //result for word
|
1707
|
+
double total_count; //total cuts
|
1708
|
+
|
1709
|
+
if (blob_it.empty ())
|
1710
|
+
return;
|
1711
|
+
row_start = blob_it;
|
1712
|
+
total_blob_count = 0;
|
1713
|
+
|
1714
|
+
total_count = 0;
|
1715
|
+
sqsum = 0;
|
1716
|
+
sp_count = 0;
|
1717
|
+
spsum = 0;
|
1718
|
+
prev_right = -1;
|
1719
|
+
blob_it = row_start;
|
1720
|
+
start_it = blob_it;
|
1721
|
+
blob_count = 0;
|
1722
|
+
blob_box = box_next (&blob_it);//first blob
|
1723
|
+
blob_it.mark_cycle_pt ();
|
1724
|
+
do {
|
1725
|
+
for (; blob_count > 0; blob_count--)
|
1726
|
+
box_next(&start_it);
|
1727
|
+
do {
|
1728
|
+
prev_box = blob_box;
|
1729
|
+
blob_count++;
|
1730
|
+
blob_box = box_next (&blob_it);
|
1731
|
+
}
|
1732
|
+
while (!blob_it.cycled_list ()
|
1733
|
+
&& blob_box.left () - prev_box.right () < space_size);
|
1734
|
+
word_sync =
|
1735
|
+
check_pitch_sync2 (&start_it, blob_count, (inT16) initial_pitch, 2,
|
1736
|
+
projection, projection_left, projection_right,
|
1737
|
+
row->xheight * textord_projection_scale,
|
1738
|
+
occupation, &seg_list, 0, 0);
|
1739
|
+
total_blob_count += blob_count;
|
1740
|
+
seg_it.set_to_list (&seg_list);
|
1741
|
+
if (prev_right >= 0) {
|
1742
|
+
sp_var = seg_it.data ()->position () - prev_right;
|
1743
|
+
sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
|
1744
|
+
sp_var *= sp_var;
|
1745
|
+
spsum += sp_var;
|
1746
|
+
sp_count++;
|
1747
|
+
}
|
1748
|
+
seg_it.move_to_last ();
|
1749
|
+
prev_right = seg_it.data ()->position ();
|
1750
|
+
if (textord_pitch_scalebigwords) {
|
1751
|
+
scale_factor = (seg_list.length () - 2) / 2;
|
1752
|
+
if (scale_factor < 1)
|
1753
|
+
scale_factor = 1;
|
1754
|
+
}
|
1755
|
+
else
|
1756
|
+
scale_factor = 1;
|
1757
|
+
sqsum += word_sync * scale_factor;
|
1758
|
+
total_count += (seg_list.length () - 1) * scale_factor;
|
1759
|
+
seg_list.clear ();
|
1760
|
+
}
|
1761
|
+
while (!blob_it.cycled_list ());
|
1762
|
+
sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
|
1763
|
+
word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
|
1764
|
+
tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
|
1765
|
+
word_sync, word_sync / initial_pitch, sp_sd,
|
1766
|
+
word_sync < textord_words_pitchsd_threshold * initial_pitch
|
1767
|
+
? 'F' : 'P');
|
1768
|
+
|
1769
|
+
start_it = row_start;
|
1770
|
+
blob_it = row_start;
|
1771
|
+
word_sync =
|
1772
|
+
check_pitch_sync2 (&blob_it, total_blob_count, (inT16) initial_pitch, 2,
|
1773
|
+
projection, projection_left, projection_right,
|
1774
|
+
row->xheight * textord_projection_scale, occupation,
|
1775
|
+
&seg_list, 0, 0);
|
1776
|
+
if (occupation > 1)
|
1777
|
+
word_sync /= occupation;
|
1778
|
+
word_sync = sqrt (word_sync);
|
1779
|
+
|
1780
|
+
#ifndef GRAPHICS_DISABLED
|
1781
|
+
if (textord_show_row_cuts && to_win != NULL)
|
1782
|
+
plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
|
1783
|
+
#endif
|
1784
|
+
seg_list.clear ();
|
1785
|
+
if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
|
1786
|
+
if (word_sync < textord_words_def_fixed * initial_pitch
|
1787
|
+
&& !row->all_caps)
|
1788
|
+
res2 = "DF";
|
1789
|
+
else
|
1790
|
+
res2 = "MF";
|
1791
|
+
}
|
1792
|
+
else
|
1793
|
+
res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
|
1794
|
+
tprintf
|
1795
|
+
("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
|
1796
|
+
word_sync, word_sync / initial_pitch,
|
1797
|
+
word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P',
|
1798
|
+
occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps);
|
1799
|
+
}
|
1800
|
+
|
1801
|
+
|
1802
|
+
/**********************************************************************
|
1803
|
+
* sort_floats
|
1804
|
+
*
|
1805
|
+
* qsort function to sort 2 floats.
|
1806
|
+
**********************************************************************/
|
1807
|
+
|
1808
|
+
int sort_floats2( //qsort function
|
1809
|
+
const void *arg1, //ptrs to floats
|
1810
|
+
const void *arg2) {
|
1811
|
+
float diff; //difference
|
1812
|
+
|
1813
|
+
diff = *((float *) arg1) - *((float *) arg2);
|
1814
|
+
if (diff > 0)
|
1815
|
+
return 1;
|
1816
|
+
else if (diff < 0)
|
1817
|
+
return -1;
|
1818
|
+
else
|
1819
|
+
return 0;
|
1820
|
+
}
|
1821
|
+
|
1822
|
+
|
1823
|
+
/**********************************************************************
|
1824
|
+
* find_repeated_chars
|
1825
|
+
*
|
1826
|
+
* Find 4 or more adjacent chars which are the same and put them
|
1827
|
+
* into words in advance of fixed pitch checking and word generation.
|
1828
|
+
**********************************************************************/
|
1829
|
+
|
1830
|
+
void find_repeated_chars( //search for equal chars
|
1831
|
+
TO_BLOCK *block, //block to search
|
1832
|
+
BOOL8 testing_on //dbug mode
|
1833
|
+
) {
|
1834
|
+
BOOL8 bol; //start of line
|
1835
|
+
TO_ROW *row; //current row
|
1836
|
+
TO_ROW_IT row_it = block->get_rows ();
|
1837
|
+
ROW *real_row; //output row
|
1838
|
+
WERD_IT word_it; //new words
|
1839
|
+
WERD *word; //new word
|
1840
|
+
BLOBNBOX *bblob; //current blob
|
1841
|
+
BLOBNBOX *nextblob; //neighbour to compare
|
1842
|
+
BLOBNBOX_IT box_it; //iterator
|
1843
|
+
BLOBNBOX_IT search_it; //forward search
|
1844
|
+
inT32 blobcount; //no of neighbours
|
1845
|
+
inT32 matched_blobcount; //no of matches
|
1846
|
+
inT32 blobindex; //in row
|
1847
|
+
inT32 row_length; //blobs in row
|
1848
|
+
inT32 width_change; //max width change
|
1849
|
+
inT32 blob_width; //required blob width
|
1850
|
+
inT32 space_width; //required gap width
|
1851
|
+
inT32 prev_right; //right edge of last blob
|
1852
|
+
float rating; //match rating
|
1853
|
+
PBLOB *pblob1; //polygonal blob
|
1854
|
+
PBLOB *pblob2; //second blob
|
1855
|
+
TBOX word_box; //for plotting
|
1856
|
+
|
1857
|
+
if (row_it.empty ())
|
1858
|
+
return; //empty block
|
1859
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
1860
|
+
row = row_it.data ();
|
1861
|
+
box_it.set_to_list (row->blob_list ());
|
1862
|
+
row_length = row->blob_list ()->length ();
|
1863
|
+
blobindex = 0;
|
1864
|
+
word_it.set_to_list (&row->rep_words);
|
1865
|
+
bol = TRUE;
|
1866
|
+
if (!box_it.empty ()) {
|
1867
|
+
real_row = new ROW (row,
|
1868
|
+
(inT16) block->kern_size,
|
1869
|
+
(inT16) block->space_size);
|
1870
|
+
do {
|
1871
|
+
bblob = box_it.data ();
|
1872
|
+
blobcount = 1;
|
1873
|
+
search_it = box_it;
|
1874
|
+
search_it.forward ();
|
1875
|
+
matched_blobcount = 1;
|
1876
|
+
width_change = MAX_INT16;
|
1877
|
+
blob_width = 0;
|
1878
|
+
space_width = 0;
|
1879
|
+
prev_right = bblob->bounding_box ().right ();
|
1880
|
+
if (bblob->bounding_box ().height () * 2 < row->xheight
|
1881
|
+
&& !bblob->joined_to_prev ()
|
1882
|
+
&& (bblob->blob () != NULL || bblob->cblob () != NULL)) {
|
1883
|
+
if (bblob->cblob () != NULL)
|
1884
|
+
pblob1 = new PBLOB (bblob->cblob (), row->xheight);
|
1885
|
+
else
|
1886
|
+
pblob1 = bblob->blob ();
|
1887
|
+
|
1888
|
+
rating = 0.0f;
|
1889
|
+
while (rating < textord_repeat_rating
|
1890
|
+
&& blobindex + blobcount < row_length
|
1891
|
+
&& ((nextblob = search_it.data ())->blob () != NULL
|
1892
|
+
|| nextblob->cblob () != NULL)
|
1893
|
+
&& nextblob->bounding_box ().height () * 2 <
|
1894
|
+
row->xheight) {
|
1895
|
+
if (blobcount == 1) {
|
1896
|
+
space_width = nextblob->bounding_box ().left ()
|
1897
|
+
- bblob->bounding_box ().right ();
|
1898
|
+
blob_width = bblob->bounding_box ().width ();
|
1899
|
+
width_change =
|
1900
|
+
blob_width >
|
1901
|
+
space_width ? blob_width : space_width;
|
1902
|
+
width_change =
|
1903
|
+
(inT32) (width_change *
|
1904
|
+
textord_repch_width_variance);
|
1905
|
+
if (width_change < 3)
|
1906
|
+
width_change = 3;
|
1907
|
+
}
|
1908
|
+
if (nextblob->bounding_box ().width () >
|
1909
|
+
blob_width + width_change
|
1910
|
+
|| nextblob->bounding_box ().width () <
|
1911
|
+
blob_width - width_change
|
1912
|
+
|| nextblob->bounding_box ().left () - prev_right >
|
1913
|
+
space_width + width_change
|
1914
|
+
|| nextblob->bounding_box ().left () - prev_right <
|
1915
|
+
space_width - width_change) {
|
1916
|
+
if (testing_on)
|
1917
|
+
tprintf
|
1918
|
+
("Repch terminated:bw=%d, sw=%d, wc=%d, pr=%d, nb=(%d,%d)\n",
|
1919
|
+
blob_width, space_width, width_change,
|
1920
|
+
prev_right, nextblob->bounding_box ().left (),
|
1921
|
+
nextblob->bounding_box ().right ());
|
1922
|
+
break; //not good enough
|
1923
|
+
}
|
1924
|
+
if (nextblob->blob () != NULL)
|
1925
|
+
rating = compare_blobs (pblob1, real_row,
|
1926
|
+
nextblob->blob (), real_row);
|
1927
|
+
else {
|
1928
|
+
pblob2 =
|
1929
|
+
new PBLOB (nextblob->cblob (), row->xheight);
|
1930
|
+
rating =
|
1931
|
+
compare_blobs(pblob1, real_row, pblob2, real_row);
|
1932
|
+
delete pblob2;
|
1933
|
+
}
|
1934
|
+
if (rating < textord_repeat_rating) {
|
1935
|
+
// if (testing_on)
|
1936
|
+
// tprintf("Blob at (%d,%d)->(%d,%d) had rating %g\n",
|
1937
|
+
// nextblob->bounding_box().left(),
|
1938
|
+
// nextblob->bounding_box().bottom(),
|
1939
|
+
// nextblob->bounding_box().right(),
|
1940
|
+
// nextblob->bounding_box().top(),
|
1941
|
+
// rating);
|
1942
|
+
blobcount++;
|
1943
|
+
search_it.forward ();
|
1944
|
+
matched_blobcount++;
|
1945
|
+
while (blobindex + blobcount < row_length
|
1946
|
+
&& (search_it.data ()->joined_to_prev () ||
|
1947
|
+
(search_it.data()->blob() == NULL &&
|
1948
|
+
search_it.data()->cblob() == NULL))) {
|
1949
|
+
search_it.forward ();
|
1950
|
+
blobcount++; //suck in joined bits
|
1951
|
+
}
|
1952
|
+
}
|
1953
|
+
prev_right = nextblob->bounding_box ().right ();
|
1954
|
+
}
|
1955
|
+
if (bblob->cblob () != NULL)
|
1956
|
+
delete pblob1;
|
1957
|
+
|
1958
|
+
if (matched_blobcount >= textord_repeat_threshold) {
|
1959
|
+
word =
|
1960
|
+
make_real_word (&box_it, blobcount, bol, FALSE, FALSE,
|
1961
|
+
1);
|
1962
|
+
#ifndef GRAPHICS_DISABLED
|
1963
|
+
if (testing_on) {
|
1964
|
+
word_box = word->bounding_box ();
|
1965
|
+
tprintf
|
1966
|
+
("Found repeated word of %d blobs (%d matched) from (%d,%d)->(%d,%d)\n",
|
1967
|
+
blobcount, matched_blobcount, word_box.left (),
|
1968
|
+
word_box.bottom (), word_box.right (),
|
1969
|
+
word_box.top ());
|
1970
|
+
//perimeter_color_index(to_win, RED);
|
1971
|
+
to_win->Pen(255,0,0);
|
1972
|
+
//interior_style(to_win, INT_HOLLOW, TRUE);
|
1973
|
+
to_win->Rectangle(word_box.left (),
|
1974
|
+
word_box.bottom (), word_box.right (),
|
1975
|
+
word_box.top ());
|
1976
|
+
}
|
1977
|
+
#endif
|
1978
|
+
word->set_flag (W_REP_CHAR, TRUE);
|
1979
|
+
word->set_flag (W_DONT_CHOP, TRUE);
|
1980
|
+
word_it.add_after_then_move (word);
|
1981
|
+
blobindex += blobcount;
|
1982
|
+
}
|
1983
|
+
}
|
1984
|
+
bol = FALSE;
|
1985
|
+
box_it.forward (); //next one
|
1986
|
+
blobindex++;
|
1987
|
+
}
|
1988
|
+
//until all done
|
1989
|
+
while (!box_it.at_first ());
|
1990
|
+
delete real_row;
|
1991
|
+
}
|
1992
|
+
}
|
1993
|
+
}
|
1994
|
+
|
1995
|
+
|
1996
|
+
/**********************************************************************
|
1997
|
+
* plot_fp_word
|
1998
|
+
*
|
1999
|
+
* Plot a block of words as if fixed pitch.
|
2000
|
+
**********************************************************************/
|
2001
|
+
|
2002
|
+
#ifndef GRAPHICS_DISABLED
|
2003
|
+
void plot_fp_word( //draw block of words
|
2004
|
+
TO_BLOCK *block, //block to draw
|
2005
|
+
float pitch, //pitch to draw with
|
2006
|
+
float nonspace //for space threshold
|
2007
|
+
) {
|
2008
|
+
TO_ROW *row; //current row
|
2009
|
+
TO_ROW_IT row_it = block->get_rows ();
|
2010
|
+
|
2011
|
+
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
2012
|
+
row = row_it.data ();
|
2013
|
+
row->min_space = (inT32) ((pitch + nonspace) / 2);
|
2014
|
+
row->max_nonspace = row->min_space;
|
2015
|
+
row->space_threshold = row->min_space;
|
2016
|
+
plot_word_decisions (to_win, (inT16) pitch, row);
|
2017
|
+
}
|
2018
|
+
}
|
2019
|
+
#endif
|