tesseract_bin 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/ext/tesseract_bin/extconf.rb +17 -0
- data/lib/tesseract_bin.rb +12 -0
- data/tesseract_bin.gemspec +660 -0
- data/test/helper.rb +18 -0
- data/test/test_tesseract_bin.rb +7 -0
- data/vendor/tesseract-2.04/AUTHORS +8 -0
- data/vendor/tesseract-2.04/COPYING +23 -0
- data/vendor/tesseract-2.04/ChangeLog +71 -0
- data/vendor/tesseract-2.04/INSTALL +229 -0
- data/vendor/tesseract-2.04/Makefile.am +20 -0
- data/vendor/tesseract-2.04/Makefile.in +641 -0
- data/vendor/tesseract-2.04/NEWS +1 -0
- data/vendor/tesseract-2.04/README +138 -0
- data/vendor/tesseract-2.04/ReleaseNotes +213 -0
- data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
- data/vendor/tesseract-2.04/StdAfx.h +24 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
- data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
- data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
- data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
- data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
- data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
- data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
- data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
- data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
- data/vendor/tesseract-2.04/ccmain/control.h +198 -0
- data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
- data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
- data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
- data/vendor/tesseract-2.04/ccmain/output.h +116 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
- data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
- data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
- data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
- data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
- data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
- data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
- data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
- data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
- data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
- data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
- data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
- data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
- data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
- data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
- data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
- data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
- data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
- data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
- data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
- data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
- data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
- data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
- data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
- data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
- data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
- data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
- data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
- data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
- data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
- data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
- data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
- data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
- data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
- data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
- data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
- data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
- data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
- data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
- data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
- data/vendor/tesseract-2.04/ccutil/host.h +180 -0
- data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
- data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
- data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
- data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
- data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
- data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
- data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
- data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
- data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
- data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
- data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
- data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
- data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
- data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
- data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
- data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
- data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
- data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
- data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
- data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
- data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
- data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
- data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
- data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
- data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
- data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
- data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
- data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
- data/vendor/tesseract-2.04/classify/baseline.h +91 -0
- data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
- data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
- data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
- data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
- data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
- data/vendor/tesseract-2.04/classify/cluster.h +158 -0
- data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
- data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
- data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
- data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
- data/vendor/tesseract-2.04/classify/extern.h +32 -0
- data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
- data/vendor/tesseract-2.04/classify/extract.h +36 -0
- data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
- data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
- data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
- data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
- data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
- data/vendor/tesseract-2.04/classify/float2int.h +65 -0
- data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
- data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
- data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
- data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
- data/vendor/tesseract-2.04/classify/fxid.h +69 -0
- data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
- data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
- data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
- data/vendor/tesseract-2.04/classify/intfx.h +63 -0
- data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
- data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
- data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
- data/vendor/tesseract-2.04/classify/intproto.h +320 -0
- data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
- data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
- data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
- data/vendor/tesseract-2.04/classify/mf.h +43 -0
- data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
- data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
- data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
- data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
- data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
- data/vendor/tesseract-2.04/classify/mfx.h +52 -0
- data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
- data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
- data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
- data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
- data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
- data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
- data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
- data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
- data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
- data/vendor/tesseract-2.04/classify/protos.h +258 -0
- data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
- data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
- data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
- data/vendor/tesseract-2.04/classify/speckle.h +69 -0
- data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
- data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
- data/vendor/tesseract-2.04/config/config.guess +1466 -0
- data/vendor/tesseract-2.04/config/config.h.in +188 -0
- data/vendor/tesseract-2.04/config/config.sub +1579 -0
- data/vendor/tesseract-2.04/config/depcomp +530 -0
- data/vendor/tesseract-2.04/config/install-sh +269 -0
- data/vendor/tesseract-2.04/config/missing +198 -0
- data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
- data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
- data/vendor/tesseract-2.04/configure +10424 -0
- data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
- data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
- data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
- data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
- data/vendor/tesseract-2.04/cutil/const.h +108 -0
- data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
- data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
- data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
- data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
- data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
- data/vendor/tesseract-2.04/cutil/debug.h +348 -0
- data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
- data/vendor/tesseract-2.04/cutil/efio.h +32 -0
- data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
- data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
- data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
- data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
- data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
- data/vendor/tesseract-2.04/cutil/general.h +33 -0
- data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
- data/vendor/tesseract-2.04/cutil/globals.h +70 -0
- data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
- data/vendor/tesseract-2.04/cutil/listio.h +43 -0
- data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
- data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
- data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
- data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
- data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
- data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
- data/vendor/tesseract-2.04/cutil/structures.h +112 -0
- data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
- data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
- data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
- data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
- data/vendor/tesseract-2.04/cutil/variables.h +170 -0
- data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
- data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
- data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
- data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
- data/vendor/tesseract-2.04/dict/choices.h +241 -0
- data/vendor/tesseract-2.04/dict/context.cpp +270 -0
- data/vendor/tesseract-2.04/dict/context.h +82 -0
- data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
- data/vendor/tesseract-2.04/dict/dawg.h +394 -0
- data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
- data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
- data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
- data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
- data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
- data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
- data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
- data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
- data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
- data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
- data/vendor/tesseract-2.04/dict/permngram.h +33 -0
- data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
- data/vendor/tesseract-2.04/dict/permnum.h +83 -0
- data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
- data/vendor/tesseract-2.04/dict/permute.h +93 -0
- data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
- data/vendor/tesseract-2.04/dict/reduce.h +112 -0
- data/vendor/tesseract-2.04/dict/states.cpp +382 -0
- data/vendor/tesseract-2.04/dict/states.h +111 -0
- data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
- data/vendor/tesseract-2.04/dict/stopper.h +103 -0
- data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
- data/vendor/tesseract-2.04/dict/trie.h +190 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
- data/vendor/tesseract-2.04/eurotext.tif +0 -0
- data/vendor/tesseract-2.04/image/Makefile.am +10 -0
- data/vendor/tesseract-2.04/image/Makefile.in +596 -0
- data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
- data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
- data/vendor/tesseract-2.04/image/img.h +336 -0
- data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
- data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
- data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
- data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
- data/vendor/tesseract-2.04/image/imgio.h +22 -0
- data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
- data/vendor/tesseract-2.04/image/imgs.h +102 -0
- data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
- data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
- data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
- data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
- data/vendor/tesseract-2.04/image/svshowim.h +25 -0
- data/vendor/tesseract-2.04/java/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
- data/vendor/tesseract-2.04/java/makefile +55 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
- data/vendor/tesseract-2.04/phototest.tif +0 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
- data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
- data/vendor/tesseract-2.04/tessdata/confsets +3 -0
- data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
- data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
- data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
- data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
- data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
- data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
- data/vendor/tesseract-2.04/tessdll.cpp +351 -0
- data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
- data/vendor/tesseract-2.04/tessdll.h +143 -0
- data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
- data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
- data/vendor/tesseract-2.04/tesseract.dsw +116 -0
- data/vendor/tesseract-2.04/tesseract.sln +59 -0
- data/vendor/tesseract-2.04/tesseract.spec +188 -0
- data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
- data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
- data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
- data/vendor/tesseract-2.04/testing/README +43 -0
- data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
- data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
- data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
- data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
- data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
- data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
- data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
- data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
- data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
- data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
- data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
- data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
- data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
- data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
- data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
- data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
- data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
- data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
- data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
- data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
- data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
- data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
- data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
- data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
- data/vendor/tesseract-2.04/textord/makerow.h +295 -0
- data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
- data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
- data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
- data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
- data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
- data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
- data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
- data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
- data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
- data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
- data/vendor/tesseract-2.04/textord/tessout.h +76 -0
- data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
- data/vendor/tesseract-2.04/textord/topitch.h +195 -0
- data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
- data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
- data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
- data/vendor/tesseract-2.04/textord/tospace.h +193 -0
- data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
- data/vendor/tesseract-2.04/textord/tovars.h +94 -0
- data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
- data/vendor/tesseract-2.04/textord/underlin.h +53 -0
- data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
- data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
- data/vendor/tesseract-2.04/training/Makefile.am +54 -0
- data/vendor/tesseract-2.04/training/Makefile.in +720 -0
- data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
- data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
- data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
- data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
- data/vendor/tesseract-2.04/training/mergenf.h +106 -0
- data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
- data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
- data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
- data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
- data/vendor/tesseract-2.04/training/name2char.h +38 -0
- data/vendor/tesseract-2.04/training/training.cpp +190 -0
- data/vendor/tesseract-2.04/training/training.h +130 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
- data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
- data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
- data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
- data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
- data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
- data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
- data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
- data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
- data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
- data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
- data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
- data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
- data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
- data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
- data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
- data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
- data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
- data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
- data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
- data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
- data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
- data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
- data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
- data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
- data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
- data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
- data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
- data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
- data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
- data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
- data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
- data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
- data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
- data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
- data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
- data/vendor/tesseract-2.04/wordrec/render.h +58 -0
- data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
- data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
- data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
- data/vendor/tesseract-2.04/wordrec/split.h +115 -0
- data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
- data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
- data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
- data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
- metadata +708 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**********************************************************************
|
|
2
|
+
* to_spacing
|
|
3
|
+
*
|
|
4
|
+
* Compute fuzzy word spacing thresholds for each row.
|
|
5
|
+
**********************************************************************/
|
|
6
|
+
|
|
7
|
+
#ifndef TOSPACE_H
|
|
8
|
+
#define TOSPACE_H
|
|
9
|
+
|
|
10
|
+
#include "blobbox.h"
|
|
11
|
+
#include "gap_map.h"
|
|
12
|
+
#include "statistc.h"
|
|
13
|
+
#include "notdll.h"
|
|
14
|
+
extern BOOL_VAR_H (tosp_old_to_method, FALSE, "Space stats use prechopping?");
|
|
15
|
+
extern BOOL_VAR_H (tosp_only_use_prop_rows, TRUE,
|
|
16
|
+
"Block stats to use fixed pitch rows?");
|
|
17
|
+
extern BOOL_VAR_H (tosp_use_pre_chopping, FALSE,
|
|
18
|
+
"Space stats use prechopping?");
|
|
19
|
+
extern BOOL_VAR_H (tosp_old_to_bug_fix, FALSE,
|
|
20
|
+
"Fix suspected bug in old code");
|
|
21
|
+
extern BOOL_VAR_H (tosp_block_use_cert_spaces, TRUE,
|
|
22
|
+
"Only stat OBVIOUS spaces");
|
|
23
|
+
extern BOOL_VAR_H (tosp_row_use_cert_spaces, TRUE,
|
|
24
|
+
"Only stat OBVIOUS spaces");
|
|
25
|
+
extern BOOL_VAR_H (tosp_narrow_blobs_not_cert, TRUE,
|
|
26
|
+
"Only stat OBVIOUS spaces");
|
|
27
|
+
extern BOOL_VAR_H (tosp_row_use_cert_spaces1, TRUE,
|
|
28
|
+
"Only stat OBVIOUS spaces");
|
|
29
|
+
extern BOOL_VAR_H (tosp_recovery_isolated_row_stats, TRUE,
|
|
30
|
+
"Use row alone when inadequate cert spaces");
|
|
31
|
+
extern BOOL_VAR_H (tosp_only_small_gaps_for_kern, FALSE, "Better guess");
|
|
32
|
+
extern BOOL_VAR_H (tosp_all_flips_fuzzy, FALSE, "Pass ANY flip to context?");
|
|
33
|
+
extern BOOL_VAR_H (tosp_fuzzy_limit_all, TRUE,
|
|
34
|
+
"Dont restrict kn->sp fuzzy limit to tables");
|
|
35
|
+
extern BOOL_VAR_H (tosp_stats_use_xht_gaps, TRUE,
|
|
36
|
+
"Use within xht gap for wd breaks");
|
|
37
|
+
extern BOOL_VAR_H (tosp_use_xht_gaps, TRUE,
|
|
38
|
+
"Use within xht gap for wd breaks");
|
|
39
|
+
extern BOOL_VAR_H (tosp_only_use_xht_gaps, FALSE,
|
|
40
|
+
"Only use within xht gap for wd breaks");
|
|
41
|
+
extern BOOL_VAR_H (tosp_rule_9_test_punct, FALSE,
|
|
42
|
+
"Dont chng kn to space next to punct");
|
|
43
|
+
extern BOOL_VAR_H (tosp_flip_fuzz_kn_to_sp, TRUE, "Default flip");
|
|
44
|
+
extern BOOL_VAR_H (tosp_flip_fuzz_sp_to_kn, TRUE, "Default flip");
|
|
45
|
+
extern BOOL_VAR_H (tosp_improve_thresh, FALSE,
|
|
46
|
+
"Enable improvement heuristic");
|
|
47
|
+
extern INT_VAR_H (tosp_debug_level, 0, "Debug data");
|
|
48
|
+
extern INT_VAR_H (tosp_enough_space_samples_for_median, 3,
|
|
49
|
+
"or should we use mean");
|
|
50
|
+
extern INT_VAR_H (tosp_redo_kern_limit, 10,
|
|
51
|
+
"No.samples reqd to reestimate for row");
|
|
52
|
+
extern INT_VAR_H (tosp_few_samples, 40,
|
|
53
|
+
"No.gaps reqd with 1 large gap to treat as a table");
|
|
54
|
+
extern INT_VAR_H (tosp_short_row, 20,
|
|
55
|
+
"No.gaps reqd with few cert spaces to use certs");
|
|
56
|
+
extern INT_VAR_H (tosp_sanity_method, 1, "How to avoid being silly");
|
|
57
|
+
extern double_VAR_H (tosp_threshold_bias1, 0,
|
|
58
|
+
"how far between kern and space?");
|
|
59
|
+
extern double_VAR_H (tosp_threshold_bias2, 0,
|
|
60
|
+
"how far between kern and space?");
|
|
61
|
+
extern double_VAR_H (tosp_narrow_fraction, 0.3,
|
|
62
|
+
"Fract of xheight for narrow");
|
|
63
|
+
extern double_VAR_H (tosp_narrow_aspect_ratio, 0.48,
|
|
64
|
+
"narrow if w/h less than this");
|
|
65
|
+
extern double_VAR_H (tosp_wide_fraction, 0.52, "Fract of xheight for wide");
|
|
66
|
+
extern double_VAR_H (tosp_wide_aspect_ratio, 0.0,
|
|
67
|
+
"wide if w/h less than this");
|
|
68
|
+
extern double_VAR_H (tosp_fuzzy_space_factor, 0.6,
|
|
69
|
+
"Fract of xheight for fuzz sp");
|
|
70
|
+
extern double_VAR_H (tosp_fuzzy_space_factor1, 0.5,
|
|
71
|
+
"Fract of xheight for fuzz sp");
|
|
72
|
+
extern double_VAR_H (tosp_fuzzy_space_factor2, 0.72,
|
|
73
|
+
"Fract of xheight for fuzz sp");
|
|
74
|
+
extern double_VAR_H (tosp_gap_factor, 0.83, "gap ratio to flip sp->kern");
|
|
75
|
+
extern double_VAR_H (tosp_kern_gap_factor1, 2.0,
|
|
76
|
+
"gap ratio to flip kern->sp");
|
|
77
|
+
extern double_VAR_H (tosp_kern_gap_factor2, 1.3,
|
|
78
|
+
"gap ratio to flip kern->sp");
|
|
79
|
+
extern double_VAR_H (tosp_kern_gap_factor3, 2.5,
|
|
80
|
+
"gap ratio to flip kern->sp");
|
|
81
|
+
extern double_VAR_H (tosp_ignore_big_gaps, -1, "xht multiplier");
|
|
82
|
+
extern double_VAR_H (tosp_ignore_very_big_gaps, 3.5, "xht multiplier");
|
|
83
|
+
extern double_VAR_H (tosp_rep_space, 1.6, "rep gap multiplier for space");
|
|
84
|
+
extern double_VAR_H (tosp_enough_small_gaps, 0.65,
|
|
85
|
+
"Fract of kerns reqd for isolated row stats");
|
|
86
|
+
extern double_VAR_H (tosp_table_kn_sp_ratio, 2.25,
|
|
87
|
+
"Min difference of kn & sp in table");
|
|
88
|
+
extern double_VAR_H (tosp_table_xht_sp_ratio, 0.33,
|
|
89
|
+
"Expect spaces bigger than this");
|
|
90
|
+
extern double_VAR_H (tosp_table_fuzzy_kn_sp_ratio, 3.0,
|
|
91
|
+
"Fuzzy if less than this");
|
|
92
|
+
extern double_VAR_H (tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");
|
|
93
|
+
extern double_VAR_H (tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");
|
|
94
|
+
extern double_VAR_H (tosp_min_sane_kn_sp, 1.5,
|
|
95
|
+
"Dont trust spaces less than this time kn");
|
|
96
|
+
extern double_VAR_H (tosp_init_guess_kn_mult, 2.2,
|
|
97
|
+
"Thresh guess - mult kn by this");
|
|
98
|
+
extern double_VAR_H (tosp_init_guess_xht_mult, 0.28,
|
|
99
|
+
"Thresh guess - mult xht by this");
|
|
100
|
+
extern double_VAR_H (tosp_max_sane_kn_thresh, 5.0,
|
|
101
|
+
"Multiplier on kn to limit thresh");
|
|
102
|
+
extern double_VAR_H (tosp_flip_caution, 0.0,
|
|
103
|
+
"Dont autoflip kn to sp when large separation");
|
|
104
|
+
extern double_VAR_H (tosp_large_kerning, 0.19,
|
|
105
|
+
"Limit use of xht gap with large kns");
|
|
106
|
+
extern double_VAR_H (tosp_dont_fool_with_small_kerns, -1,
|
|
107
|
+
"Limit use of xht gap with odd small kns");
|
|
108
|
+
extern double_VAR_H (tosp_near_lh_edge, 0,
|
|
109
|
+
"Dont reduce box if the top left is non blank");
|
|
110
|
+
extern double_VAR_H (tosp_silly_kn_sp_gap, 0.2,
|
|
111
|
+
"Dont let sp minus kn get too small");
|
|
112
|
+
extern double_VAR_H (tosp_pass_wide_fuzz_sp_to_context, 0.75,
|
|
113
|
+
"How wide fuzzies need context");
|
|
114
|
+
void to_spacing( //set spacing
|
|
115
|
+
ICOORD page_tr, //topright of page
|
|
116
|
+
TO_BLOCK_LIST *blocks //blocks on page
|
|
117
|
+
);
|
|
118
|
+
//DEBUG USE ONLY
|
|
119
|
+
void block_spacing_stats(TO_BLOCK *block,
|
|
120
|
+
GAPMAP *gapmap,
|
|
121
|
+
BOOL8 &old_text_ord_proportional,
|
|
122
|
+
inT16 &block_space_gap_width, //resulting estimate
|
|
123
|
+
inT16 &block_non_space_gap_width //resulting estimate
|
|
124
|
+
);
|
|
125
|
+
//estimate for block
|
|
126
|
+
void row_spacing_stats(TO_ROW *row,
|
|
127
|
+
GAPMAP *gapmap,
|
|
128
|
+
inT16 block_idx,
|
|
129
|
+
inT16 row_idx,
|
|
130
|
+
inT16 block_space_gap_width,
|
|
131
|
+
inT16 block_non_space_gap_width //estimate for block
|
|
132
|
+
);
|
|
133
|
+
//estimate for block
|
|
134
|
+
void old_to_method(TO_ROW *row,
|
|
135
|
+
STATS *all_gap_stats,
|
|
136
|
+
STATS *space_gap_stats,
|
|
137
|
+
STATS *small_gap_stats,
|
|
138
|
+
inT16 block_space_gap_width,
|
|
139
|
+
inT16 block_non_space_gap_width //estimate for block
|
|
140
|
+
);
|
|
141
|
+
BOOL8 isolated_row_stats(TO_ROW *row,
|
|
142
|
+
GAPMAP *gapmap,
|
|
143
|
+
STATS *all_gap_stats,
|
|
144
|
+
BOOL8 suspected_table,
|
|
145
|
+
inT16 block_idx,
|
|
146
|
+
inT16 row_idx);
|
|
147
|
+
inT16 stats_count_under(STATS *stats, inT16 threshold);
|
|
148
|
+
void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats);
|
|
149
|
+
ROW *make_prop_words( //find lines
|
|
150
|
+
TO_ROW *row, //row to make
|
|
151
|
+
FCOORD rotation //for drawing
|
|
152
|
+
);
|
|
153
|
+
BOOL8 make_a_word_break( //decide on word break
|
|
154
|
+
TO_ROW *row, //row being made
|
|
155
|
+
TBOX blob_box, //for next_blob //how many blanks?
|
|
156
|
+
inT16 prev_gap,
|
|
157
|
+
TBOX prev_blob_box,
|
|
158
|
+
inT16 real_current_gap,
|
|
159
|
+
inT16 within_xht_current_gap,
|
|
160
|
+
TBOX next_blob_box,
|
|
161
|
+
inT16 next_gap,
|
|
162
|
+
uinT8 &blanks,
|
|
163
|
+
BOOL8 &fuzzy_sp,
|
|
164
|
+
BOOL8 &fuzzy_non);
|
|
165
|
+
BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box);
|
|
166
|
+
BOOL8 wide_blob(TO_ROW *row, TBOX blob_box);
|
|
167
|
+
BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box);
|
|
168
|
+
//A COPY FOR PEEKING
|
|
169
|
+
void peek_at_next_gap(TO_ROW *row,
|
|
170
|
+
BLOBNBOX_IT box_it,
|
|
171
|
+
TBOX &next_blob_box,
|
|
172
|
+
inT16 &next_gap,
|
|
173
|
+
inT16 &next_within_xht_gap);
|
|
174
|
+
void mark_gap( //Debug stuff
|
|
175
|
+
TBOX blob, //blob following gap
|
|
176
|
+
inT16 rule, // heuristic id
|
|
177
|
+
inT16 prev_gap,
|
|
178
|
+
inT16 prev_blob_width,
|
|
179
|
+
inT16 current_gap,
|
|
180
|
+
inT16 next_blob_width,
|
|
181
|
+
inT16 next_gap);
|
|
182
|
+
float find_mean_blob_spacing(WERD *word);
|
|
183
|
+
BOOL8 ignore_big_gap(TO_ROW *row,
|
|
184
|
+
inT32 row_length,
|
|
185
|
+
GAPMAP *gapmap,
|
|
186
|
+
inT16 left,
|
|
187
|
+
inT16 right);
|
|
188
|
+
TBOX reduced_box_next( //get bounding box
|
|
189
|
+
TO_ROW *row, //current row
|
|
190
|
+
BLOBNBOX_IT *it //iterator to blobds
|
|
191
|
+
);
|
|
192
|
+
TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht);
|
|
193
|
+
#endif
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**********************************************************************
|
|
2
|
+
* File: tovars.cpp (Formerly to_vars.c)
|
|
3
|
+
* Description: Variables used by textord.
|
|
4
|
+
* Author: Ray Smith
|
|
5
|
+
* Created: Tue Aug 24 16:55:02 BST 1993
|
|
6
|
+
*
|
|
7
|
+
* (C) Copyright 1993, Hewlett-Packard Ltd.
|
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
** you may not use this file except in compliance with the License.
|
|
10
|
+
** You may obtain a copy of the License at
|
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
** See the License for the specific language governing permissions and
|
|
16
|
+
** limitations under the License.
|
|
17
|
+
*
|
|
18
|
+
**********************************************************************/
|
|
19
|
+
|
|
20
|
+
#include "mfcpch.h"
|
|
21
|
+
#include "tovars.h"
|
|
22
|
+
|
|
23
|
+
#define EXTERN
|
|
24
|
+
|
|
25
|
+
EXTERN BOOL_VAR (textord_show_initial_words, FALSE, "Display separate words");
|
|
26
|
+
EXTERN BOOL_VAR (textord_show_new_words, FALSE, "Display separate words");
|
|
27
|
+
EXTERN BOOL_VAR (textord_show_fixed_words, FALSE,
|
|
28
|
+
"Display forced fixed pitch words");
|
|
29
|
+
EXTERN BOOL_VAR (textord_blocksall_fixed, FALSE, "Moan about prop blocks");
|
|
30
|
+
EXTERN BOOL_VAR (textord_blocksall_prop, FALSE,
|
|
31
|
+
"Moan about fixed pitch blocks");
|
|
32
|
+
EXTERN BOOL_VAR (textord_blocksall_testing, FALSE, "Dump stats when moaning");
|
|
33
|
+
EXTERN BOOL_VAR (textord_test_mode, FALSE, "Do current test");
|
|
34
|
+
EXTERN BOOL_VAR (textord_repeat_extraction, TRUE, "Extract repeated chars");
|
|
35
|
+
EXTERN INT_VAR (textord_dotmatrix_gap, 3,
|
|
36
|
+
"Max pixel gap for broken pixed pitch");
|
|
37
|
+
EXTERN INT_VAR (textord_repeat_threshold, 4,
|
|
38
|
+
"Min multiple for repeated char");
|
|
39
|
+
EXTERN INT_VAR (textord_debug_block, 0, "Block to do debug on");
|
|
40
|
+
EXTERN INT_VAR (textord_pitch_range, 2, "Max range test on pitch");
|
|
41
|
+
EXTERN double_VAR (textord_repeat_rating, 6, "Min rating for equal blobs");
|
|
42
|
+
EXTERN double_VAR (textord_wordstats_smooth_factor, 0.05,
|
|
43
|
+
"Smoothing gap stats");
|
|
44
|
+
EXTERN double_VAR (textord_width_smooth_factor, 0.10,
|
|
45
|
+
"Smoothing width stats");
|
|
46
|
+
EXTERN double_VAR (textord_words_width_ile, 0.4,
|
|
47
|
+
"Ile of blob widths for space est");
|
|
48
|
+
EXTERN double_VAR (textord_words_maxspace, 4.0, "Multiple of xheight");
|
|
49
|
+
EXTERN double_VAR (textord_words_default_maxspace, 3.5,
|
|
50
|
+
"Max believable third space");
|
|
51
|
+
EXTERN double_VAR (textord_words_default_minspace, 0.6,
|
|
52
|
+
"Fraction of xheight");
|
|
53
|
+
EXTERN double_VAR (textord_words_min_minspace, 0.3, "Fraction of xheight");
|
|
54
|
+
EXTERN double_VAR (textord_words_default_nonspace, 0.2,
|
|
55
|
+
"Fraction of xheight");
|
|
56
|
+
EXTERN double_VAR (textord_words_initial_lower, 0.25,
|
|
57
|
+
"Max inital cluster size");
|
|
58
|
+
EXTERN double_VAR (textord_words_initial_upper, 0.15,
|
|
59
|
+
"Min initial cluster spacing");
|
|
60
|
+
EXTERN double_VAR (textord_words_minlarge, 0.75,
|
|
61
|
+
"Fraction of valid gaps needed");
|
|
62
|
+
EXTERN double_VAR (textord_words_pitchsd_threshold, 0.040,
|
|
63
|
+
"Pitch sync threshold");
|
|
64
|
+
EXTERN double_VAR (textord_words_def_fixed, 0.016,
|
|
65
|
+
"Threshold for definite fixed");
|
|
66
|
+
EXTERN double_VAR (textord_words_def_prop, 0.090,
|
|
67
|
+
"Threshold for definite prop");
|
|
68
|
+
EXTERN INT_VAR (textord_words_veto_power, 5,
|
|
69
|
+
"Rows required to outvote a veto");
|
|
70
|
+
EXTERN double_VAR (textord_pitch_rowsimilarity, 0.08,
|
|
71
|
+
"Fraction of xheight for sameness");
|
|
72
|
+
EXTERN BOOL_VAR (textord_pitch_scalebigwords, FALSE,
|
|
73
|
+
"Scale scores on big words");
|
|
74
|
+
EXTERN double_VAR (words_initial_lower, 0.5, "Max inital cluster size");
|
|
75
|
+
EXTERN double_VAR (words_initial_upper, 0.15, "Min initial cluster spacing");
|
|
76
|
+
EXTERN double_VAR (words_default_prop_nonspace, 0.25, "Fraction of xheight");
|
|
77
|
+
EXTERN double_VAR (words_default_fixed_space, 0.75, "Fraction of xheight");
|
|
78
|
+
EXTERN double_VAR (words_default_fixed_limit, 0.6, "Allowed size variance");
|
|
79
|
+
EXTERN double_VAR (textord_words_definite_spread, 0.30,
|
|
80
|
+
"Non-fuzzy spacing region");
|
|
81
|
+
EXTERN double_VAR (textord_spacesize_ratiofp, 2.8,
|
|
82
|
+
"Min ratio space/nonspace");
|
|
83
|
+
EXTERN double_VAR (textord_spacesize_ratioprop, 2.0,
|
|
84
|
+
"Min ratio space/nonspace");
|
|
85
|
+
EXTERN double_VAR (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold");
|
|
86
|
+
EXTERN double_VAR (textord_max_pitch_iqr, 0.20, "Xh fraction noise in pitch");
|
|
87
|
+
EXTERN double_VAR (textord_fp_min_width, 0.5, "Min width of decent blobs");
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/**********************************************************************
|
|
2
|
+
* File: tovars.h (Formerly to_vars.h)
|
|
3
|
+
* Description: Variables used by textord.
|
|
4
|
+
* Author: Ray Smith
|
|
5
|
+
* Created: Tue Aug 24 16:55:02 BST 1993
|
|
6
|
+
*
|
|
7
|
+
* (C) Copyright 1993, Hewlett-Packard Ltd.
|
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
** you may not use this file except in compliance with the License.
|
|
10
|
+
** You may obtain a copy of the License at
|
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
** See the License for the specific language governing permissions and
|
|
16
|
+
** limitations under the License.
|
|
17
|
+
*
|
|
18
|
+
**********************************************************************/
|
|
19
|
+
|
|
20
|
+
#ifndef TOVARS_H
|
|
21
|
+
#define TOVARS_H
|
|
22
|
+
|
|
23
|
+
#include "varable.h"
|
|
24
|
+
#include "notdll.h"
|
|
25
|
+
|
|
26
|
+
extern BOOL_VAR_H (textord_show_initial_words, FALSE,
|
|
27
|
+
"Display separate words");
|
|
28
|
+
extern BOOL_VAR_H (textord_show_new_words, FALSE, "Display separate words");
|
|
29
|
+
extern BOOL_VAR_H (textord_show_fixed_words, FALSE,
|
|
30
|
+
"Display forced fixed pitch words");
|
|
31
|
+
extern BOOL_VAR_H (textord_blocksall_fixed, FALSE, "Moan about prop blocks");
|
|
32
|
+
extern BOOL_VAR_H (textord_blocksall_prop, FALSE,
|
|
33
|
+
"Moan about fixed pitch blocks");
|
|
34
|
+
extern BOOL_VAR_H (textord_blocksall_testing, FALSE,
|
|
35
|
+
"Dump stats when moaning");
|
|
36
|
+
extern BOOL_VAR_H (textord_test_mode, FALSE, "Do current test");
|
|
37
|
+
extern BOOL_VAR_H (textord_repeat_extraction, TRUE, "Extract repeated chars");
|
|
38
|
+
extern INT_VAR_H (textord_dotmatrix_gap, 3,
|
|
39
|
+
"Max pixel gap for broken pixed pitch");
|
|
40
|
+
extern INT_VAR_H (textord_repeat_threshold, 4,
|
|
41
|
+
"Min multiple for repeated char");
|
|
42
|
+
extern INT_VAR_H (textord_debug_block, 0, "Block to do debug on");
|
|
43
|
+
extern INT_VAR_H (textord_pitch_range, 2, "Max range test on pitch");
|
|
44
|
+
extern double_VAR_H (textord_repeat_rating, 6, "Min rating for equal blobs");
|
|
45
|
+
extern double_VAR_H (textord_wordstats_smooth_factor, 0.05,
|
|
46
|
+
"Smoothing gap stats");
|
|
47
|
+
extern double_VAR_H (textord_width_smooth_factor, 0.10,
|
|
48
|
+
"Smoothing width stats");
|
|
49
|
+
extern double_VAR_H (textord_words_width_ile, 0.4,
|
|
50
|
+
"Ile of blob widths for space est");
|
|
51
|
+
extern double_VAR_H (textord_words_maxspace, 4.0, "Multiple of xheight");
|
|
52
|
+
extern double_VAR_H (textord_words_default_maxspace, 3.5,
|
|
53
|
+
"Max believable third space");
|
|
54
|
+
extern double_VAR_H (textord_words_default_minspace, 0.6,
|
|
55
|
+
"Fraction of xheight");
|
|
56
|
+
extern double_VAR_H (textord_words_min_minspace, 0.3, "Fraction of xheight");
|
|
57
|
+
extern double_VAR_H (textord_words_default_nonspace, 0.2,
|
|
58
|
+
"Fraction of xheight");
|
|
59
|
+
extern double_VAR_H (textord_words_initial_lower, 0.25,
|
|
60
|
+
"Max inital cluster size");
|
|
61
|
+
extern double_VAR_H (textord_words_initial_upper, 0.15,
|
|
62
|
+
"Min initial cluster spacing");
|
|
63
|
+
extern double_VAR_H (textord_words_minlarge, 0.75,
|
|
64
|
+
"Fraction of valid gaps needed");
|
|
65
|
+
extern double_VAR_H (textord_words_pitchsd_threshold, 0.025,
|
|
66
|
+
"Pitch sync threshold");
|
|
67
|
+
extern double_VAR_H (textord_words_def_fixed, 0.01,
|
|
68
|
+
"Threshold for definite fixed");
|
|
69
|
+
extern double_VAR_H (textord_words_def_prop, 0.06,
|
|
70
|
+
"Threshold for definite prop");
|
|
71
|
+
extern INT_VAR_H (textord_words_veto_power, 5,
|
|
72
|
+
"Rows required to outvote a veto");
|
|
73
|
+
extern double_VAR_H (textord_pitch_rowsimilarity, 0.08,
|
|
74
|
+
"Fraction of xheight for sameness");
|
|
75
|
+
extern BOOL_VAR_H (textord_pitch_scalebigwords, FALSE,
|
|
76
|
+
"Scale scores on big words");
|
|
77
|
+
extern double_VAR_H (words_initial_lower, 0.5, "Max inital cluster size");
|
|
78
|
+
extern double_VAR_H (words_initial_upper, 0.15,
|
|
79
|
+
"Min initial cluster spacing");
|
|
80
|
+
extern double_VAR_H (words_default_prop_nonspace, 0.25,
|
|
81
|
+
"Fraction of xheight");
|
|
82
|
+
extern double_VAR_H (words_default_fixed_space, 0.75, "Fraction of xheight");
|
|
83
|
+
extern double_VAR_H (words_default_fixed_limit, 0.6, "Allowed size variance");
|
|
84
|
+
extern double_VAR_H (textord_words_definite_spread, 0.30,
|
|
85
|
+
"Non-fuzzy spacing region");
|
|
86
|
+
extern double_VAR_H (textord_spacesize_ratiofp, 2.8,
|
|
87
|
+
"Min ratio space/nonspace");
|
|
88
|
+
extern double_VAR_H (textord_spacesize_ratioprop, 2.0,
|
|
89
|
+
"Min ratio space/nonspace");
|
|
90
|
+
extern double_VAR_H (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold");
|
|
91
|
+
extern double_VAR_H (textord_max_pitch_iqr, 0.20,
|
|
92
|
+
"Xh fraction noise in pitch");
|
|
93
|
+
extern double_VAR_H (textord_fp_min_width, 0.5, "Min width of decent blobs");
|
|
94
|
+
#endif
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
/**********************************************************************
|
|
2
|
+
* File: underlin.cpp (Formerly undrline.c)
|
|
3
|
+
* Description: Code to chop blobs apart from underlines.
|
|
4
|
+
* Author: Ray Smith
|
|
5
|
+
* Created: Mon Aug 8 11:14:00 BST 1994
|
|
6
|
+
*
|
|
7
|
+
* (C) Copyright 1994, Hewlett-Packard Ltd.
|
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
** you may not use this file except in compliance with the License.
|
|
10
|
+
** You may obtain a copy of the License at
|
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
** See the License for the specific language governing permissions and
|
|
16
|
+
** limitations under the License.
|
|
17
|
+
*
|
|
18
|
+
**********************************************************************/
|
|
19
|
+
|
|
20
|
+
#include "mfcpch.h"
|
|
21
|
+
#ifdef __UNIX__
|
|
22
|
+
#include <assert.h>
|
|
23
|
+
#endif
|
|
24
|
+
#include "underlin.h"
|
|
25
|
+
|
|
26
|
+
#define PROJECTION_MARGIN 10 //arbitrary
|
|
27
|
+
#define EXTERN
|
|
28
|
+
|
|
29
|
+
EXTERN double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore");
|
|
30
|
+
EXTERN BOOL_VAR (textord_restore_underlines, TRUE,
|
|
31
|
+
"Chop underlines & put back");
|
|
32
|
+
|
|
33
|
+
/**********************************************************************
|
|
34
|
+
* restore_underlined_blobs
|
|
35
|
+
*
|
|
36
|
+
* Find underlined blobs and put them back in the row.
|
|
37
|
+
**********************************************************************/
|
|
38
|
+
|
|
39
|
+
void restore_underlined_blobs( //get chop points
|
|
40
|
+
TO_BLOCK *block //block to do
|
|
41
|
+
) {
|
|
42
|
+
inT16 chop_coord; //chop boundary
|
|
43
|
+
TBOX blob_box; //of underline
|
|
44
|
+
BLOBNBOX *u_line; //underline bit
|
|
45
|
+
TO_ROW *row; //best row for blob
|
|
46
|
+
ICOORDELT_LIST chop_cells; //blobs to cut out
|
|
47
|
+
//real underlines
|
|
48
|
+
BLOBNBOX_LIST residual_underlines;
|
|
49
|
+
OUTLINE_LIST left_outlines; //in current blob
|
|
50
|
+
OUTLINE_LIST right_outlines; //for next blob
|
|
51
|
+
C_OUTLINE_LIST left_coutlines;
|
|
52
|
+
C_OUTLINE_LIST right_coutlines;
|
|
53
|
+
ICOORDELT_IT cell_it = &chop_cells;
|
|
54
|
+
//under lines
|
|
55
|
+
BLOBNBOX_IT under_it = &block->underlines;
|
|
56
|
+
BLOBNBOX_IT ru_it = &residual_underlines;
|
|
57
|
+
|
|
58
|
+
if (block->get_rows()->empty())
|
|
59
|
+
return; // Don't crash if there are no rows.
|
|
60
|
+
for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
|
|
61
|
+
under_it.forward ()) {
|
|
62
|
+
u_line = under_it.extract ();
|
|
63
|
+
blob_box = u_line->bounding_box ();
|
|
64
|
+
row = most_overlapping_row (block->get_rows (), u_line);
|
|
65
|
+
find_underlined_blobs (u_line, &row->baseline, row->xheight,
|
|
66
|
+
row->xheight * textord_underline_offset,
|
|
67
|
+
&chop_cells);
|
|
68
|
+
cell_it.set_to_list (&chop_cells);
|
|
69
|
+
for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
|
|
70
|
+
cell_it.forward ()) {
|
|
71
|
+
chop_coord = cell_it.data ()->x ();
|
|
72
|
+
if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
|
|
73
|
+
split_to_blob (u_line, chop_coord,
|
|
74
|
+
textord_fp_chop_error + 0.5,
|
|
75
|
+
&left_outlines, &left_coutlines,
|
|
76
|
+
&right_outlines, &right_coutlines);
|
|
77
|
+
if (!left_outlines.empty ())
|
|
78
|
+
ru_it.
|
|
79
|
+
add_after_then_move (new
|
|
80
|
+
BLOBNBOX (new PBLOB (&left_outlines)));
|
|
81
|
+
else if (!left_coutlines.empty ())
|
|
82
|
+
ru_it.
|
|
83
|
+
add_after_then_move (new
|
|
84
|
+
BLOBNBOX (new
|
|
85
|
+
C_BLOB (&left_coutlines)));
|
|
86
|
+
//right edge of lbob
|
|
87
|
+
chop_coord = cell_it.data ()->y ();
|
|
88
|
+
split_to_blob (NULL, chop_coord,
|
|
89
|
+
textord_fp_chop_error + 0.5,
|
|
90
|
+
&left_outlines, &left_coutlines,
|
|
91
|
+
&right_outlines, &right_coutlines);
|
|
92
|
+
if (!left_outlines.empty ())
|
|
93
|
+
row->insert_blob (new BLOBNBOX (new PBLOB (&left_outlines)));
|
|
94
|
+
else if (!left_coutlines.empty ())
|
|
95
|
+
row->
|
|
96
|
+
insert_blob (new BLOBNBOX (new C_BLOB (&left_coutlines)));
|
|
97
|
+
else {
|
|
98
|
+
ASSERT_HOST(FALSE);
|
|
99
|
+
fprintf (stderr,
|
|
100
|
+
"Error:no outlines after chopping from %d to %d from (%d,%d)->(%d,%d)\n",
|
|
101
|
+
cell_it.data ()->x (), cell_it.data ()->y (),
|
|
102
|
+
blob_box.left (), blob_box.bottom (),
|
|
103
|
+
blob_box.right (), blob_box.top ());
|
|
104
|
+
}
|
|
105
|
+
u_line = NULL; //no more blobs to add
|
|
106
|
+
}
|
|
107
|
+
delete cell_it.extract ();
|
|
108
|
+
}
|
|
109
|
+
if (!right_outlines.empty () || !right_coutlines.empty ()) {
|
|
110
|
+
split_to_blob (NULL, blob_box.right (),
|
|
111
|
+
textord_fp_chop_error + 0.5,
|
|
112
|
+
&left_outlines, &left_coutlines,
|
|
113
|
+
&right_outlines, &right_coutlines);
|
|
114
|
+
if (!left_outlines.empty ())
|
|
115
|
+
ru_it.
|
|
116
|
+
add_after_then_move (new BLOBNBOX (new PBLOB (&left_outlines)));
|
|
117
|
+
else if (!left_coutlines.empty ())
|
|
118
|
+
ru_it.
|
|
119
|
+
add_after_then_move (new
|
|
120
|
+
BLOBNBOX (new C_BLOB (&left_coutlines)));
|
|
121
|
+
}
|
|
122
|
+
if (u_line != NULL) {
|
|
123
|
+
if (u_line->blob() != NULL)
|
|
124
|
+
delete u_line->blob();
|
|
125
|
+
if (u_line->cblob() != NULL)
|
|
126
|
+
delete u_line->cblob();
|
|
127
|
+
delete u_line;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
if (!ru_it.empty ()) {
|
|
131
|
+
ru_it.move_to_first ();
|
|
132
|
+
for (ru_it.mark_cycle_pt (); !ru_it.cycled_list (); ru_it.forward ()) {
|
|
133
|
+
under_it.add_after_then_move (ru_it.extract ());
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
/**********************************************************************
|
|
140
|
+
* most_overlapping_row
|
|
141
|
+
*
|
|
142
|
+
* Return the row which most overlaps the blob.
|
|
143
|
+
**********************************************************************/
|
|
144
|
+
|
|
145
|
+
TO_ROW *most_overlapping_row( //find best row
|
|
146
|
+
TO_ROW_LIST *rows, //list of rows
|
|
147
|
+
BLOBNBOX *blob //blob to place
|
|
148
|
+
) {
|
|
149
|
+
inT16 x = (blob->bounding_box ().left ()
|
|
150
|
+
+ blob->bounding_box ().right ()) / 2;
|
|
151
|
+
TO_ROW_IT row_it = rows; //row iterator
|
|
152
|
+
TO_ROW *row; //current row
|
|
153
|
+
TO_ROW *best_row; //output row
|
|
154
|
+
float overlap; //of blob & row
|
|
155
|
+
float bestover; //best overlap
|
|
156
|
+
|
|
157
|
+
best_row = NULL;
|
|
158
|
+
bestover = (float) -MAX_INT32;
|
|
159
|
+
if (row_it.empty ())
|
|
160
|
+
return NULL;
|
|
161
|
+
row = row_it.data ();
|
|
162
|
+
row_it.mark_cycle_pt ();
|
|
163
|
+
while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
|
|
164
|
+
&& !row_it.cycled_list ()) {
|
|
165
|
+
best_row = row;
|
|
166
|
+
bestover =
|
|
167
|
+
blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
|
|
168
|
+
row_it.forward ();
|
|
169
|
+
row = row_it.data ();
|
|
170
|
+
}
|
|
171
|
+
while (row->baseline.y (x) + row->xheight + row->ascrise
|
|
172
|
+
>= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
|
|
173
|
+
overlap = row->baseline.y (x) + row->xheight + row->ascrise;
|
|
174
|
+
if (blob->bounding_box ().top () < overlap)
|
|
175
|
+
overlap = blob->bounding_box ().top ();
|
|
176
|
+
if (blob->bounding_box ().bottom () >
|
|
177
|
+
row->baseline.y (x) + row->descdrop)
|
|
178
|
+
overlap -= blob->bounding_box ().bottom ();
|
|
179
|
+
else
|
|
180
|
+
overlap -= row->baseline.y (x) + row->descdrop;
|
|
181
|
+
if (overlap > bestover) {
|
|
182
|
+
bestover = overlap;
|
|
183
|
+
best_row = row;
|
|
184
|
+
}
|
|
185
|
+
row_it.forward ();
|
|
186
|
+
row = row_it.data ();
|
|
187
|
+
}
|
|
188
|
+
if (bestover < 0
|
|
189
|
+
&& row->baseline.y (x) + row->xheight + row->ascrise
|
|
190
|
+
- blob->bounding_box ().bottom () > bestover)
|
|
191
|
+
best_row = row;
|
|
192
|
+
return best_row;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
/**********************************************************************
|
|
197
|
+
* find_underlined_blobs
|
|
198
|
+
*
|
|
199
|
+
* Find the start and end coords of blobs in the underline.
|
|
200
|
+
**********************************************************************/
|
|
201
|
+
|
|
202
|
+
void find_underlined_blobs( //get chop points
|
|
203
|
+
BLOBNBOX *u_line, //underlined unit
|
|
204
|
+
QSPLINE *baseline, //actual baseline
|
|
205
|
+
float xheight, //height of line
|
|
206
|
+
float baseline_offset, //amount to shrinke it
|
|
207
|
+
ICOORDELT_LIST *chop_cells //places to chop
|
|
208
|
+
) {
|
|
209
|
+
inT16 x, y; //sides of blob
|
|
210
|
+
ICOORD blob_chop; //sides of blob
|
|
211
|
+
TBOX blob_box = u_line->bounding_box ();
|
|
212
|
+
//cell iterator
|
|
213
|
+
ICOORDELT_IT cell_it = chop_cells;
|
|
214
|
+
STATS upper_proj (blob_box.left (), blob_box.right () + 1);
|
|
215
|
+
STATS middle_proj (blob_box.left (), blob_box.right () + 1);
|
|
216
|
+
STATS lower_proj (blob_box.left (), blob_box.right () + 1);
|
|
217
|
+
C_OUTLINE_IT out_it; //outlines of blob
|
|
218
|
+
|
|
219
|
+
ASSERT_HOST (u_line->cblob () != NULL);
|
|
220
|
+
|
|
221
|
+
out_it.set_to_list (u_line->cblob ()->out_list ());
|
|
222
|
+
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
|
|
223
|
+
vertical_cunderline_projection (out_it.data (),
|
|
224
|
+
baseline, xheight, baseline_offset,
|
|
225
|
+
&lower_proj, &middle_proj, &upper_proj);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
for (x = blob_box.left (); x < blob_box.right (); x++) {
|
|
229
|
+
if (middle_proj.pile_count (x) > 0) {
|
|
230
|
+
for (y = x + 1;
|
|
231
|
+
y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
|
|
232
|
+
blob_chop = ICOORD (x, y);
|
|
233
|
+
cell_it.add_after_then_move (new ICOORDELT (blob_chop));
|
|
234
|
+
x = y;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
/**********************************************************************
|
|
241
|
+
* vertical_cunderline_projection
|
|
242
|
+
*
|
|
243
|
+
* Compute the vertical projection of a outline from its outlines
|
|
244
|
+
* and add to the given STATS.
|
|
245
|
+
**********************************************************************/
|
|
246
|
+
|
|
247
|
+
void vertical_cunderline_projection( //project outlines
|
|
248
|
+
C_OUTLINE *outline, //outline to project
|
|
249
|
+
QSPLINE *baseline, //actual baseline
|
|
250
|
+
float xheight, //height of line
|
|
251
|
+
float baseline_offset, //amount to shrinke it
|
|
252
|
+
STATS *lower_proj, //below baseline
|
|
253
|
+
STATS *middle_proj, //centre region
|
|
254
|
+
STATS *upper_proj //top region
|
|
255
|
+
) {
|
|
256
|
+
ICOORD pos; //current point
|
|
257
|
+
ICOORD step; //edge step
|
|
258
|
+
inT16 lower_y, upper_y; //region limits
|
|
259
|
+
inT32 length; //of outline
|
|
260
|
+
inT16 stepindex; //current step
|
|
261
|
+
C_OUTLINE_IT out_it = outline->child ();
|
|
262
|
+
|
|
263
|
+
pos = outline->start_pos ();
|
|
264
|
+
length = outline->pathlength ();
|
|
265
|
+
for (stepindex = 0; stepindex < length; stepindex++) {
|
|
266
|
+
step = outline->step (stepindex);
|
|
267
|
+
if (step.x () > 0) {
|
|
268
|
+
lower_y =
|
|
269
|
+
(inT16) floor (baseline->y (pos.x ()) + baseline_offset + 0.5);
|
|
270
|
+
upper_y =
|
|
271
|
+
(inT16) floor (baseline->y (pos.x ()) + baseline_offset +
|
|
272
|
+
xheight + 0.5);
|
|
273
|
+
if (pos.y () >= lower_y) {
|
|
274
|
+
lower_proj->add (pos.x (), -lower_y);
|
|
275
|
+
if (pos.y () >= upper_y) {
|
|
276
|
+
middle_proj->add (pos.x (), lower_y - upper_y);
|
|
277
|
+
upper_proj->add (pos.x (), upper_y - pos.y ());
|
|
278
|
+
}
|
|
279
|
+
else
|
|
280
|
+
middle_proj->add (pos.x (), lower_y - pos.y ());
|
|
281
|
+
}
|
|
282
|
+
else
|
|
283
|
+
lower_proj->add (pos.x (), -pos.y ());
|
|
284
|
+
}
|
|
285
|
+
else if (step.x () < 0) {
|
|
286
|
+
lower_y =
|
|
287
|
+
(inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
|
|
288
|
+
0.5);
|
|
289
|
+
upper_y =
|
|
290
|
+
(inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
|
|
291
|
+
xheight + 0.5);
|
|
292
|
+
if (pos.y () >= lower_y) {
|
|
293
|
+
lower_proj->add (pos.x () - 1, lower_y);
|
|
294
|
+
if (pos.y () >= upper_y) {
|
|
295
|
+
middle_proj->add (pos.x () - 1, upper_y - lower_y);
|
|
296
|
+
upper_proj->add (pos.x () - 1, pos.y () - upper_y);
|
|
297
|
+
}
|
|
298
|
+
else
|
|
299
|
+
middle_proj->add (pos.x () - 1, pos.y () - lower_y);
|
|
300
|
+
}
|
|
301
|
+
else
|
|
302
|
+
lower_proj->add (pos.x () - 1, pos.y ());
|
|
303
|
+
}
|
|
304
|
+
pos += step;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
|
|
308
|
+
vertical_cunderline_projection (out_it.data (),
|
|
309
|
+
baseline, xheight, baseline_offset,
|
|
310
|
+
lower_proj, middle_proj, upper_proj);
|
|
311
|
+
}
|
|
312
|
+
}
|