tesseract_bin 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +23 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/ext/tesseract_bin/extconf.rb +17 -0
- data/lib/tesseract_bin.rb +12 -0
- data/tesseract_bin.gemspec +660 -0
- data/test/helper.rb +18 -0
- data/test/test_tesseract_bin.rb +7 -0
- data/vendor/tesseract-2.04/AUTHORS +8 -0
- data/vendor/tesseract-2.04/COPYING +23 -0
- data/vendor/tesseract-2.04/ChangeLog +71 -0
- data/vendor/tesseract-2.04/INSTALL +229 -0
- data/vendor/tesseract-2.04/Makefile.am +20 -0
- data/vendor/tesseract-2.04/Makefile.in +641 -0
- data/vendor/tesseract-2.04/NEWS +1 -0
- data/vendor/tesseract-2.04/README +138 -0
- data/vendor/tesseract-2.04/ReleaseNotes +213 -0
- data/vendor/tesseract-2.04/StdAfx.cpp +8 -0
- data/vendor/tesseract-2.04/StdAfx.h +24 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.am +63 -0
- data/vendor/tesseract-2.04/ccmain/Makefile.in +735 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.cpp +1082 -0
- data/vendor/tesseract-2.04/ccmain/adaptions.h +109 -0
- data/vendor/tesseract-2.04/ccmain/applybox.cpp +891 -0
- data/vendor/tesseract-2.04/ccmain/applybox.h +73 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.cpp +1105 -0
- data/vendor/tesseract-2.04/ccmain/baseapi.h +256 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.cpp +76 -0
- data/vendor/tesseract-2.04/ccmain/blobcmp.h +29 -0
- data/vendor/tesseract-2.04/ccmain/callnet.cpp +93 -0
- data/vendor/tesseract-2.04/ccmain/callnet.h +32 -0
- data/vendor/tesseract-2.04/ccmain/charcut.cpp +704 -0
- data/vendor/tesseract-2.04/ccmain/charcut.h +120 -0
- data/vendor/tesseract-2.04/ccmain/charsample.cpp +699 -0
- data/vendor/tesseract-2.04/ccmain/control.cpp +1842 -0
- data/vendor/tesseract-2.04/ccmain/control.h +198 -0
- data/vendor/tesseract-2.04/ccmain/docqual.cpp +1481 -0
- data/vendor/tesseract-2.04/ccmain/docqual.h +155 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.cpp +82 -0
- data/vendor/tesseract-2.04/ccmain/expandblob.h +13 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.cpp +989 -0
- data/vendor/tesseract-2.04/ccmain/fixspace.h +72 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.cpp +825 -0
- data/vendor/tesseract-2.04/ccmain/fixxht.h +93 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.cpp +154 -0
- data/vendor/tesseract-2.04/ccmain/imgscale.h +32 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.cpp +391 -0
- data/vendor/tesseract-2.04/ccmain/matmatch.h +48 -0
- data/vendor/tesseract-2.04/ccmain/output.cpp +1273 -0
- data/vendor/tesseract-2.04/ccmain/output.h +116 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.cpp +666 -0
- data/vendor/tesseract-2.04/ccmain/pagewalk.h +155 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.cpp +107 -0
- data/vendor/tesseract-2.04/ccmain/paircmp.h +43 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.cpp +1867 -0
- data/vendor/tesseract-2.04/ccmain/pgedit.h +181 -0
- data/vendor/tesseract-2.04/ccmain/reject.cpp +1775 -0
- data/vendor/tesseract-2.04/ccmain/reject.h +181 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.cpp +366 -0
- data/vendor/tesseract-2.04/ccmain/scaleimg.h +35 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.cpp +375 -0
- data/vendor/tesseract-2.04/ccmain/tessbox.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.cpp +278 -0
- data/vendor/tesseract-2.04/ccmain/tessedit.h +49 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.cpp +110 -0
- data/vendor/tesseract-2.04/ccmain/tessembedded.h +38 -0
- data/vendor/tesseract-2.04/ccmain/tesseractfull.cc +37 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.cpp +387 -0
- data/vendor/tesseract-2.04/ccmain/tesseractmain.h +58 -0
- data/vendor/tesseract-2.04/ccmain/tessio.h +110 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.cpp +38 -0
- data/vendor/tesseract-2.04/ccmain/tessvars.h +48 -0
- data/vendor/tesseract-2.04/ccmain/tfacep.h +62 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.cpp +443 -0
- data/vendor/tesseract-2.04/ccmain/tfacepp.h +85 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.cpp +549 -0
- data/vendor/tesseract-2.04/ccmain/tstruct.h +108 -0
- data/vendor/tesseract-2.04/ccmain/varabled.cpp +346 -0
- data/vendor/tesseract-2.04/ccmain/varabled.h +125 -0
- data/vendor/tesseract-2.04/ccmain/werdit.cpp +193 -0
- data/vendor/tesseract-2.04/ccmain/werdit.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.am +25 -0
- data/vendor/tesseract-2.04/ccstruct/Makefile.in +650 -0
- data/vendor/tesseract-2.04/ccstruct/blckerr.h +29 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.cpp +778 -0
- data/vendor/tesseract-2.04/ccstruct/blobbox.h +381 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.cpp +247 -0
- data/vendor/tesseract-2.04/ccstruct/blobs.h +119 -0
- data/vendor/tesseract-2.04/ccstruct/blread.cpp +537 -0
- data/vendor/tesseract-2.04/ccstruct/blread.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/callcpp.cpp +252 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.cpp +650 -0
- data/vendor/tesseract-2.04/ccstruct/coutln.h +186 -0
- data/vendor/tesseract-2.04/ccstruct/crakedge.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.cpp +133 -0
- data/vendor/tesseract-2.04/ccstruct/genblob.h +52 -0
- data/vendor/tesseract-2.04/ccstruct/hpddef.h +39 -0
- data/vendor/tesseract-2.04/ccstruct/hpdsizes.h +8 -0
- data/vendor/tesseract-2.04/ccstruct/ipoints.h +479 -0
- data/vendor/tesseract-2.04/ccstruct/labls.cpp +188 -0
- data/vendor/tesseract-2.04/ccstruct/labls.h +38 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.cpp +249 -0
- data/vendor/tesseract-2.04/ccstruct/linlsq.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.cpp +453 -0
- data/vendor/tesseract-2.04/ccstruct/lmedsq.h +84 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.cpp +100 -0
- data/vendor/tesseract-2.04/ccstruct/mod128.h +85 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.cpp +176 -0
- data/vendor/tesseract-2.04/ccstruct/normalis.h +108 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.cpp +369 -0
- data/vendor/tesseract-2.04/ccstruct/ocrblock.h +235 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.cpp +216 -0
- data/vendor/tesseract-2.04/ccstruct/ocrrow.h +133 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.cpp +879 -0
- data/vendor/tesseract-2.04/ccstruct/pageblk.h +318 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.cpp +330 -0
- data/vendor/tesseract-2.04/ccstruct/pageres.h +313 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.cpp +361 -0
- data/vendor/tesseract-2.04/ccstruct/pdblock.h +181 -0
- data/vendor/tesseract-2.04/ccstruct/pdclass.h +54 -0
- data/vendor/tesseract-2.04/ccstruct/points.cpp +102 -0
- data/vendor/tesseract-2.04/ccstruct/points.h +299 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.cpp +588 -0
- data/vendor/tesseract-2.04/ccstruct/polyaprx.h +51 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.cpp +398 -0
- data/vendor/tesseract-2.04/ccstruct/polyblk.h +122 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.cpp +357 -0
- data/vendor/tesseract-2.04/ccstruct/polyblob.h +102 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.cpp +23 -0
- data/vendor/tesseract-2.04/ccstruct/polyvert.h +58 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.cpp +441 -0
- data/vendor/tesseract-2.04/ccstruct/poutline.h +125 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.cpp +147 -0
- data/vendor/tesseract-2.04/ccstruct/quadlsq.h +67 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.cpp +21 -0
- data/vendor/tesseract-2.04/ccstruct/quadratc.h +63 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.cpp +382 -0
- data/vendor/tesseract-2.04/ccstruct/quspline.h +113 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.cpp +372 -0
- data/vendor/tesseract-2.04/ccstruct/ratngs.h +198 -0
- data/vendor/tesseract-2.04/ccstruct/rect.cpp +229 -0
- data/vendor/tesseract-2.04/ccstruct/rect.h +320 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.cpp +545 -0
- data/vendor/tesseract-2.04/ccstruct/rejctmap.h +284 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.cpp +89 -0
- data/vendor/tesseract-2.04/ccstruct/rwpoly.h +45 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.cpp +905 -0
- data/vendor/tesseract-2.04/ccstruct/statistc.h +135 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.cpp +296 -0
- data/vendor/tesseract-2.04/ccstruct/stepblob.h +88 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.cpp +230 -0
- data/vendor/tesseract-2.04/ccstruct/txtregn.h +155 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.cpp +63 -0
- data/vendor/tesseract-2.04/ccstruct/vecfuncs.h +91 -0
- data/vendor/tesseract-2.04/ccstruct/werd.cpp +967 -0
- data/vendor/tesseract-2.04/ccstruct/werd.h +277 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.am +19 -0
- data/vendor/tesseract-2.04/ccutil/Makefile.in +626 -0
- data/vendor/tesseract-2.04/ccutil/basedir.cpp +118 -0
- data/vendor/tesseract-2.04/ccutil/basedir.h +32 -0
- data/vendor/tesseract-2.04/ccutil/bits16.cpp +30 -0
- data/vendor/tesseract-2.04/ccutil/bits16.h +61 -0
- data/vendor/tesseract-2.04/ccutil/boxread.cpp +105 -0
- data/vendor/tesseract-2.04/ccutil/boxread.h +44 -0
- data/vendor/tesseract-2.04/ccutil/clst.cpp +626 -0
- data/vendor/tesseract-2.04/ccutil/clst.h +1085 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.cpp +500 -0
- data/vendor/tesseract-2.04/ccutil/debugwin.h +103 -0
- data/vendor/tesseract-2.04/ccutil/elst.cpp +593 -0
- data/vendor/tesseract-2.04/ccutil/elst.h +1125 -0
- data/vendor/tesseract-2.04/ccutil/elst2.cpp +606 -0
- data/vendor/tesseract-2.04/ccutil/elst2.h +1121 -0
- data/vendor/tesseract-2.04/ccutil/errcode.cpp +104 -0
- data/vendor/tesseract-2.04/ccutil/errcode.h +104 -0
- data/vendor/tesseract-2.04/ccutil/fileerr.h +34 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.cpp +115 -0
- data/vendor/tesseract-2.04/ccutil/globaloc.h +40 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.cpp +57 -0
- data/vendor/tesseract-2.04/ccutil/hashfn.h +30 -0
- data/vendor/tesseract-2.04/ccutil/host.h +180 -0
- data/vendor/tesseract-2.04/ccutil/hosthplb.h +1 -0
- data/vendor/tesseract-2.04/ccutil/lsterr.h +43 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.cpp +126 -0
- data/vendor/tesseract-2.04/ccutil/mainblk.h +39 -0
- data/vendor/tesseract-2.04/ccutil/memblk.cpp +1106 -0
- data/vendor/tesseract-2.04/ccutil/memblk.h +189 -0
- data/vendor/tesseract-2.04/ccutil/memry.cpp +532 -0
- data/vendor/tesseract-2.04/ccutil/memry.h +192 -0
- data/vendor/tesseract-2.04/ccutil/memryerr.h +38 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.cpp +5 -0
- data/vendor/tesseract-2.04/ccutil/mfcpch.h +37 -0
- data/vendor/tesseract-2.04/ccutil/ndminx.h +31 -0
- data/vendor/tesseract-2.04/ccutil/notdll.h +28 -0
- data/vendor/tesseract-2.04/ccutil/nwmain.h +176 -0
- data/vendor/tesseract-2.04/ccutil/ocrclass.h +345 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.cpp +772 -0
- data/vendor/tesseract-2.04/ccutil/ocrshell.h +191 -0
- data/vendor/tesseract-2.04/ccutil/platform.h +18 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.cpp +543 -0
- data/vendor/tesseract-2.04/ccutil/scanutils.h +55 -0
- data/vendor/tesseract-2.04/ccutil/secname.h +9 -0
- data/vendor/tesseract-2.04/ccutil/serialis.cpp +117 -0
- data/vendor/tesseract-2.04/ccutil/serialis.h +93 -0
- data/vendor/tesseract-2.04/ccutil/stderr.h +26 -0
- data/vendor/tesseract-2.04/ccutil/strngs.cpp +495 -0
- data/vendor/tesseract-2.04/ccutil/strngs.h +138 -0
- data/vendor/tesseract-2.04/ccutil/tessclas.h +135 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.cpp +61 -0
- data/vendor/tesseract-2.04/ccutil/tessopt.h +30 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.cpp +122 -0
- data/vendor/tesseract-2.04/ccutil/tprintf.h +35 -0
- data/vendor/tesseract-2.04/ccutil/unichar.cpp +144 -0
- data/vendor/tesseract-2.04/ccutil/unichar.h +84 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.cpp +172 -0
- data/vendor/tesseract-2.04/ccutil/unicharmap.h +82 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.cpp +307 -0
- data/vendor/tesseract-2.04/ccutil/unicharset.h +267 -0
- data/vendor/tesseract-2.04/ccutil/varable.cpp +672 -0
- data/vendor/tesseract-2.04/ccutil/varable.h +419 -0
- data/vendor/tesseract-2.04/classify/Makefile.am +24 -0
- data/vendor/tesseract-2.04/classify/Makefile.in +647 -0
- data/vendor/tesseract-2.04/classify/adaptive.cpp +535 -0
- data/vendor/tesseract-2.04/classify/adaptive.h +199 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.cpp +2958 -0
- data/vendor/tesseract-2.04/classify/adaptmatch.h +86 -0
- data/vendor/tesseract-2.04/classify/baseline.cpp +58 -0
- data/vendor/tesseract-2.04/classify/baseline.h +91 -0
- data/vendor/tesseract-2.04/classify/blobclass.cpp +123 -0
- data/vendor/tesseract-2.04/classify/blobclass.h +49 -0
- data/vendor/tesseract-2.04/classify/chartoname.cpp +74 -0
- data/vendor/tesseract-2.04/classify/chartoname.h +21 -0
- data/vendor/tesseract-2.04/classify/cluster.cpp +2834 -0
- data/vendor/tesseract-2.04/classify/cluster.h +158 -0
- data/vendor/tesseract-2.04/classify/clusttool.cpp +507 -0
- data/vendor/tesseract-2.04/classify/clusttool.h +70 -0
- data/vendor/tesseract-2.04/classify/cutoffs.cpp +73 -0
- data/vendor/tesseract-2.04/classify/cutoffs.h +49 -0
- data/vendor/tesseract-2.04/classify/extern.h +32 -0
- data/vendor/tesseract-2.04/classify/extract.cpp +100 -0
- data/vendor/tesseract-2.04/classify/extract.h +36 -0
- data/vendor/tesseract-2.04/classify/featdefs.cpp +244 -0
- data/vendor/tesseract-2.04/classify/featdefs.h +71 -0
- data/vendor/tesseract-2.04/classify/flexfx.cpp +87 -0
- data/vendor/tesseract-2.04/classify/flexfx.h +34 -0
- data/vendor/tesseract-2.04/classify/float2int.cpp +126 -0
- data/vendor/tesseract-2.04/classify/float2int.h +65 -0
- data/vendor/tesseract-2.04/classify/fpoint.cpp +73 -0
- data/vendor/tesseract-2.04/classify/fpoint.h +63 -0
- data/vendor/tesseract-2.04/classify/fxdefs.cpp +74 -0
- data/vendor/tesseract-2.04/classify/fxdefs.h +93 -0
- data/vendor/tesseract-2.04/classify/fxid.h +69 -0
- data/vendor/tesseract-2.04/classify/hideedge.cpp +35 -0
- data/vendor/tesseract-2.04/classify/hideedge.h +76 -0
- data/vendor/tesseract-2.04/classify/intfx.cpp +608 -0
- data/vendor/tesseract-2.04/classify/intfx.h +63 -0
- data/vendor/tesseract-2.04/classify/intmatcher.cpp +1524 -0
- data/vendor/tesseract-2.04/classify/intmatcher.h +199 -0
- data/vendor/tesseract-2.04/classify/intproto.cpp +1823 -0
- data/vendor/tesseract-2.04/classify/intproto.h +320 -0
- data/vendor/tesseract-2.04/classify/kdtree.cpp +884 -0
- data/vendor/tesseract-2.04/classify/kdtree.h +118 -0
- data/vendor/tesseract-2.04/classify/mf.cpp +106 -0
- data/vendor/tesseract-2.04/classify/mf.h +43 -0
- data/vendor/tesseract-2.04/classify/mfdefs.cpp +58 -0
- data/vendor/tesseract-2.04/classify/mfdefs.h +60 -0
- data/vendor/tesseract-2.04/classify/mfoutline.cpp +1087 -0
- data/vendor/tesseract-2.04/classify/mfoutline.h +277 -0
- data/vendor/tesseract-2.04/classify/mfx.cpp +436 -0
- data/vendor/tesseract-2.04/classify/mfx.h +52 -0
- data/vendor/tesseract-2.04/classify/normfeat.cpp +132 -0
- data/vendor/tesseract-2.04/classify/normfeat.h +63 -0
- data/vendor/tesseract-2.04/classify/normmatch.cpp +305 -0
- data/vendor/tesseract-2.04/classify/normmatch.h +38 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.cpp +310 -0
- data/vendor/tesseract-2.04/classify/ocrfeatures.h +148 -0
- data/vendor/tesseract-2.04/classify/outfeat.cpp +262 -0
- data/vendor/tesseract-2.04/classify/outfeat.h +76 -0
- data/vendor/tesseract-2.04/classify/picofeat.cpp +297 -0
- data/vendor/tesseract-2.04/classify/picofeat.h +65 -0
- data/vendor/tesseract-2.04/classify/protos.cpp +472 -0
- data/vendor/tesseract-2.04/classify/protos.h +258 -0
- data/vendor/tesseract-2.04/classify/sigmenu.cpp +225 -0
- data/vendor/tesseract-2.04/classify/sigmenu.h +39 -0
- data/vendor/tesseract-2.04/classify/speckle.cpp +127 -0
- data/vendor/tesseract-2.04/classify/speckle.h +69 -0
- data/vendor/tesseract-2.04/classify/xform2d.cpp +120 -0
- data/vendor/tesseract-2.04/classify/xform2d.h +60 -0
- data/vendor/tesseract-2.04/config/config.guess +1466 -0
- data/vendor/tesseract-2.04/config/config.h.in +188 -0
- data/vendor/tesseract-2.04/config/config.sub +1579 -0
- data/vendor/tesseract-2.04/config/depcomp +530 -0
- data/vendor/tesseract-2.04/config/install-sh +269 -0
- data/vendor/tesseract-2.04/config/missing +198 -0
- data/vendor/tesseract-2.04/config/mkinstalldirs +40 -0
- data/vendor/tesseract-2.04/config/stamp-h.in +0 -0
- data/vendor/tesseract-2.04/configure +10424 -0
- data/vendor/tesseract-2.04/cutil/Makefile.am +14 -0
- data/vendor/tesseract-2.04/cutil/Makefile.in +612 -0
- data/vendor/tesseract-2.04/cutil/bitvec.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/bitvec.h +100 -0
- data/vendor/tesseract-2.04/cutil/callcpp.h +190 -0
- data/vendor/tesseract-2.04/cutil/const.h +108 -0
- data/vendor/tesseract-2.04/cutil/cutil.cpp +92 -0
- data/vendor/tesseract-2.04/cutil/cutil.h +159 -0
- data/vendor/tesseract-2.04/cutil/danerror.cpp +144 -0
- data/vendor/tesseract-2.04/cutil/danerror.h +41 -0
- data/vendor/tesseract-2.04/cutil/debug.cpp +97 -0
- data/vendor/tesseract-2.04/cutil/debug.h +348 -0
- data/vendor/tesseract-2.04/cutil/efio.cpp +62 -0
- data/vendor/tesseract-2.04/cutil/efio.h +32 -0
- data/vendor/tesseract-2.04/cutil/emalloc.cpp +91 -0
- data/vendor/tesseract-2.04/cutil/emalloc.h +44 -0
- data/vendor/tesseract-2.04/cutil/freelist.cpp +75 -0
- data/vendor/tesseract-2.04/cutil/freelist.h +45 -0
- data/vendor/tesseract-2.04/cutil/funcdefs.h +35 -0
- data/vendor/tesseract-2.04/cutil/general.h +33 -0
- data/vendor/tesseract-2.04/cutil/globals.cpp +69 -0
- data/vendor/tesseract-2.04/cutil/globals.h +70 -0
- data/vendor/tesseract-2.04/cutil/listio.cpp +68 -0
- data/vendor/tesseract-2.04/cutil/listio.h +43 -0
- data/vendor/tesseract-2.04/cutil/minmax.h +40 -0
- data/vendor/tesseract-2.04/cutil/oldheap.cpp +337 -0
- data/vendor/tesseract-2.04/cutil/oldheap.h +126 -0
- data/vendor/tesseract-2.04/cutil/oldlist.cpp +393 -0
- data/vendor/tesseract-2.04/cutil/oldlist.h +350 -0
- data/vendor/tesseract-2.04/cutil/structures.cpp +66 -0
- data/vendor/tesseract-2.04/cutil/structures.h +112 -0
- data/vendor/tesseract-2.04/cutil/tessarray.cpp +115 -0
- data/vendor/tesseract-2.04/cutil/tessarray.h +166 -0
- data/vendor/tesseract-2.04/cutil/tordvars.cpp +95 -0
- data/vendor/tesseract-2.04/cutil/tordvars.h +61 -0
- data/vendor/tesseract-2.04/cutil/variables.cpp +317 -0
- data/vendor/tesseract-2.04/cutil/variables.h +170 -0
- data/vendor/tesseract-2.04/dict/Makefile.am +13 -0
- data/vendor/tesseract-2.04/dict/Makefile.in +609 -0
- data/vendor/tesseract-2.04/dict/choicearr.h +96 -0
- data/vendor/tesseract-2.04/dict/choices.cpp +210 -0
- data/vendor/tesseract-2.04/dict/choices.h +241 -0
- data/vendor/tesseract-2.04/dict/context.cpp +270 -0
- data/vendor/tesseract-2.04/dict/context.h +82 -0
- data/vendor/tesseract-2.04/dict/dawg.cpp +363 -0
- data/vendor/tesseract-2.04/dict/dawg.h +394 -0
- data/vendor/tesseract-2.04/dict/hyphen.cpp +84 -0
- data/vendor/tesseract-2.04/dict/hyphen.h +125 -0
- data/vendor/tesseract-2.04/dict/lookdawg.cpp +228 -0
- data/vendor/tesseract-2.04/dict/lookdawg.h +76 -0
- data/vendor/tesseract-2.04/dict/makedawg.cpp +449 -0
- data/vendor/tesseract-2.04/dict/makedawg.h +83 -0
- data/vendor/tesseract-2.04/dict/matchdefs.h +145 -0
- data/vendor/tesseract-2.04/dict/permdawg.cpp +415 -0
- data/vendor/tesseract-2.04/dict/permdawg.h +98 -0
- data/vendor/tesseract-2.04/dict/permngram.cpp +358 -0
- data/vendor/tesseract-2.04/dict/permngram.h +33 -0
- data/vendor/tesseract-2.04/dict/permnum.cpp +522 -0
- data/vendor/tesseract-2.04/dict/permnum.h +83 -0
- data/vendor/tesseract-2.04/dict/permute.cpp +1704 -0
- data/vendor/tesseract-2.04/dict/permute.h +93 -0
- data/vendor/tesseract-2.04/dict/reduce.cpp +424 -0
- data/vendor/tesseract-2.04/dict/reduce.h +112 -0
- data/vendor/tesseract-2.04/dict/states.cpp +382 -0
- data/vendor/tesseract-2.04/dict/states.h +111 -0
- data/vendor/tesseract-2.04/dict/stopper.cpp +1458 -0
- data/vendor/tesseract-2.04/dict/stopper.h +103 -0
- data/vendor/tesseract-2.04/dict/trie.cpp +683 -0
- data/vendor/tesseract-2.04/dict/trie.h +190 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.am +2 -0
- data/vendor/tesseract-2.04/dlltest/Makefile.in +388 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.cpp +163 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.dsp +186 -0
- data/vendor/tesseract-2.04/dlltest/dlltest.vcproj +637 -0
- data/vendor/tesseract-2.04/eurotext.tif +0 -0
- data/vendor/tesseract-2.04/image/Makefile.am +10 -0
- data/vendor/tesseract-2.04/image/Makefile.in +596 -0
- data/vendor/tesseract-2.04/image/bitstrm.cpp +157 -0
- data/vendor/tesseract-2.04/image/bitstrm.h +73 -0
- data/vendor/tesseract-2.04/image/img.h +336 -0
- data/vendor/tesseract-2.04/image/imgbmp.cpp +223 -0
- data/vendor/tesseract-2.04/image/imgbmp.h +50 -0
- data/vendor/tesseract-2.04/image/imgerrs.h +35 -0
- data/vendor/tesseract-2.04/image/imgio.cpp +321 -0
- data/vendor/tesseract-2.04/image/imgio.h +22 -0
- data/vendor/tesseract-2.04/image/imgs.cpp +1764 -0
- data/vendor/tesseract-2.04/image/imgs.h +102 -0
- data/vendor/tesseract-2.04/image/imgtiff.cpp +723 -0
- data/vendor/tesseract-2.04/image/imgtiff.h +89 -0
- data/vendor/tesseract-2.04/image/imgunpk.h +1377 -0
- data/vendor/tesseract-2.04/image/svshowim.cpp +40 -0
- data/vendor/tesseract-2.04/image/svshowim.h +25 -0
- data/vendor/tesseract-2.04/java/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.am +1 -0
- data/vendor/tesseract-2.04/java/com/google/Makefile.in +470 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.am +4 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/Makefile.in +473 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ScrollView.java +421 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.am +5 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/Makefile.in +474 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEvent.java +87 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventHandler.java +296 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/events/SVEventType.java +31 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.am +7 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/Makefile.in +476 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVAbstractMenuItem.java +58 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVCheckboxMenuItem.java +60 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVEmptyMenuItem.java +48 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVImageHandler.java +228 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuBar.java +130 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVMenuItem.java +61 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVPopupMenu.java +142 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVSubMenuItem.java +41 -0
- data/vendor/tesseract-2.04/java/com/google/scrollview/ui/SVWindow.java +643 -0
- data/vendor/tesseract-2.04/java/makefile +55 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.am +13 -0
- data/vendor/tesseract-2.04/pageseg/Makefile.in +596 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.cpp +363 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg.h +90 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.cpp +82 -0
- data/vendor/tesseract-2.04/pageseg/leptonica_pageseg_interface.h +30 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.cpp +170 -0
- data/vendor/tesseract-2.04/pageseg/pageseg.h +29 -0
- data/vendor/tesseract-2.04/phototest.tif +0 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.am +31 -0
- data/vendor/tesseract-2.04/tessdata/Makefile.in +529 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/configs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/configs/api_config +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train +19 -0
- data/vendor/tesseract-2.04/tessdata/configs/box.train.stderr +18 -0
- data/vendor/tesseract-2.04/tessdata/configs/inter +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/kannada +4 -0
- data/vendor/tesseract-2.04/tessdata/configs/makebox +1 -0
- data/vendor/tesseract-2.04/tessdata/configs/unlv +3 -0
- data/vendor/tesseract-2.04/tessdata/confsets +3 -0
- data/vendor/tesseract-2.04/tessdata/eng.DangAmbigs +39 -0
- data/vendor/tesseract-2.04/tessdata/eng.freq-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.inttemp +0 -0
- data/vendor/tesseract-2.04/tessdata/eng.normproto +1247 -0
- data/vendor/tesseract-2.04/tessdata/eng.pffmtable +111 -0
- data/vendor/tesseract-2.04/tessdata/eng.unicharset +113 -0
- data/vendor/tesseract-2.04/tessdata/eng.user-words +921 -0
- data/vendor/tesseract-2.04/tessdata/eng.word-dawg +0 -0
- data/vendor/tesseract-2.04/tessdata/makedummies +8 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.am +3 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/Makefile.in +344 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/batch.nochop +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/matdemo +7 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/msdemo +13 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/nobatch +2 -0
- data/vendor/tesseract-2.04/tessdata/tessconfigs/segdemo +9 -0
- data/vendor/tesseract-2.04/tessdll.cpp +351 -0
- data/vendor/tesseract-2.04/tessdll.dsp +2050 -0
- data/vendor/tesseract-2.04/tessdll.h +143 -0
- data/vendor/tesseract-2.04/tessdll.vcproj +5495 -0
- data/vendor/tesseract-2.04/tesseract.dsp +2124 -0
- data/vendor/tesseract-2.04/tesseract.dsw +116 -0
- data/vendor/tesseract-2.04/tesseract.sln +59 -0
- data/vendor/tesseract-2.04/tesseract.spec +188 -0
- data/vendor/tesseract-2.04/tesseract.vcproj +5859 -0
- data/vendor/tesseract-2.04/testing/Makefile.am +2 -0
- data/vendor/tesseract-2.04/testing/Makefile.in +312 -0
- data/vendor/tesseract-2.04/testing/README +43 -0
- data/vendor/tesseract-2.04/testing/counttestset.sh +61 -0
- data/vendor/tesseract-2.04/testing/reorgdata.sh +44 -0
- data/vendor/tesseract-2.04/testing/reports/1995.bus.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.doe3.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.mag.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/1995.news.3B.sum +1 -0
- data/vendor/tesseract-2.04/testing/reports/2.03.summary +9 -0
- data/vendor/tesseract-2.04/testing/reports/2.04.summary +9 -0
- data/vendor/tesseract-2.04/testing/runalltests.sh +110 -0
- data/vendor/tesseract-2.04/testing/runtestset.sh +61 -0
- data/vendor/tesseract-2.04/textord/Makefile.am +20 -0
- data/vendor/tesseract-2.04/textord/Makefile.in +624 -0
- data/vendor/tesseract-2.04/textord/blkocc.cpp +809 -0
- data/vendor/tesseract-2.04/textord/blkocc.h +327 -0
- data/vendor/tesseract-2.04/textord/blobcmpl.h +31 -0
- data/vendor/tesseract-2.04/textord/drawedg.cpp +77 -0
- data/vendor/tesseract-2.04/textord/drawedg.h +34 -0
- data/vendor/tesseract-2.04/textord/drawtord.cpp +469 -0
- data/vendor/tesseract-2.04/textord/drawtord.h +107 -0
- data/vendor/tesseract-2.04/textord/edgblob.cpp +412 -0
- data/vendor/tesseract-2.04/textord/edgblob.h +100 -0
- data/vendor/tesseract-2.04/textord/edgloop.cpp +211 -0
- data/vendor/tesseract-2.04/textord/edgloop.h +66 -0
- data/vendor/tesseract-2.04/textord/fpchop.cpp +1641 -0
- data/vendor/tesseract-2.04/textord/fpchop.h +238 -0
- data/vendor/tesseract-2.04/textord/gap_map.cpp +166 -0
- data/vendor/tesseract-2.04/textord/gap_map.h +40 -0
- data/vendor/tesseract-2.04/textord/makerow.cpp +2628 -0
- data/vendor/tesseract-2.04/textord/makerow.h +295 -0
- data/vendor/tesseract-2.04/textord/oldbasel.cpp +1761 -0
- data/vendor/tesseract-2.04/textord/oldbasel.h +195 -0
- data/vendor/tesseract-2.04/textord/pithsync.cpp +696 -0
- data/vendor/tesseract-2.04/textord/pithsync.h +134 -0
- data/vendor/tesseract-2.04/textord/pitsync1.cpp +425 -0
- data/vendor/tesseract-2.04/textord/pitsync1.h +135 -0
- data/vendor/tesseract-2.04/textord/scanedg.cpp +452 -0
- data/vendor/tesseract-2.04/textord/scanedg.h +74 -0
- data/vendor/tesseract-2.04/textord/sortflts.cpp +80 -0
- data/vendor/tesseract-2.04/textord/sortflts.h +64 -0
- data/vendor/tesseract-2.04/textord/tessout.h +76 -0
- data/vendor/tesseract-2.04/textord/topitch.cpp +2019 -0
- data/vendor/tesseract-2.04/textord/topitch.h +195 -0
- data/vendor/tesseract-2.04/textord/tordmain.cpp +907 -0
- data/vendor/tesseract-2.04/textord/tordmain.h +132 -0
- data/vendor/tesseract-2.04/textord/tospace.cpp +1939 -0
- data/vendor/tesseract-2.04/textord/tospace.h +193 -0
- data/vendor/tesseract-2.04/textord/tovars.cpp +87 -0
- data/vendor/tesseract-2.04/textord/tovars.h +94 -0
- data/vendor/tesseract-2.04/textord/underlin.cpp +312 -0
- data/vendor/tesseract-2.04/textord/underlin.h +53 -0
- data/vendor/tesseract-2.04/textord/wordseg.cpp +620 -0
- data/vendor/tesseract-2.04/textord/wordseg.h +70 -0
- data/vendor/tesseract-2.04/training/Makefile.am +54 -0
- data/vendor/tesseract-2.04/training/Makefile.in +720 -0
- data/vendor/tesseract-2.04/training/cnTraining.cpp +855 -0
- data/vendor/tesseract-2.04/training/cntraining.dsp +243 -0
- data/vendor/tesseract-2.04/training/cntraining.vcproj +950 -0
- data/vendor/tesseract-2.04/training/mergenf.cpp +451 -0
- data/vendor/tesseract-2.04/training/mergenf.h +106 -0
- data/vendor/tesseract-2.04/training/mfTraining.cpp +1341 -0
- data/vendor/tesseract-2.04/training/mftraining.dsp +285 -0
- data/vendor/tesseract-2.04/training/mftraining.vcproj +1055 -0
- data/vendor/tesseract-2.04/training/name2char.cpp +166 -0
- data/vendor/tesseract-2.04/training/name2char.h +38 -0
- data/vendor/tesseract-2.04/training/training.cpp +190 -0
- data/vendor/tesseract-2.04/training/training.h +130 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.cpp +140 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.dsp +335 -0
- data/vendor/tesseract-2.04/training/unicharset_extractor.vcproj +769 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.cpp +69 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.dsp +319 -0
- data/vendor/tesseract-2.04/training/wordlist2dawg.vcproj +1113 -0
- data/vendor/tesseract-2.04/viewer/Makefile.am +9 -0
- data/vendor/tesseract-2.04/viewer/Makefile.in +591 -0
- data/vendor/tesseract-2.04/viewer/scrollview.cpp +825 -0
- data/vendor/tesseract-2.04/viewer/scrollview.h +414 -0
- data/vendor/tesseract-2.04/viewer/svmnode.cpp +140 -0
- data/vendor/tesseract-2.04/viewer/svmnode.h +94 -0
- data/vendor/tesseract-2.04/viewer/svpaint.cpp +220 -0
- data/vendor/tesseract-2.04/viewer/svutil.cpp +347 -0
- data/vendor/tesseract-2.04/viewer/svutil.h +138 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.am +23 -0
- data/vendor/tesseract-2.04/wordrec/Makefile.in +641 -0
- data/vendor/tesseract-2.04/wordrec/associate.cpp +62 -0
- data/vendor/tesseract-2.04/wordrec/associate.h +93 -0
- data/vendor/tesseract-2.04/wordrec/badwords.cpp +106 -0
- data/vendor/tesseract-2.04/wordrec/badwords.h +51 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.cpp +526 -0
- data/vendor/tesseract-2.04/wordrec/bestfirst.h +203 -0
- data/vendor/tesseract-2.04/wordrec/charsample.h +208 -0
- data/vendor/tesseract-2.04/wordrec/chop.cpp +458 -0
- data/vendor/tesseract-2.04/wordrec/chop.h +153 -0
- data/vendor/tesseract-2.04/wordrec/chopper.cpp +750 -0
- data/vendor/tesseract-2.04/wordrec/chopper.h +104 -0
- data/vendor/tesseract-2.04/wordrec/closed.cpp +136 -0
- data/vendor/tesseract-2.04/wordrec/closed.h +65 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/djmenus.h +33 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.cpp +92 -0
- data/vendor/tesseract-2.04/wordrec/drawfx.h +33 -0
- data/vendor/tesseract-2.04/wordrec/findseam.cpp +566 -0
- data/vendor/tesseract-2.04/wordrec/findseam.h +69 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.cpp +226 -0
- data/vendor/tesseract-2.04/wordrec/gradechop.h +91 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.cpp +194 -0
- data/vendor/tesseract-2.04/wordrec/heuristic.h +120 -0
- data/vendor/tesseract-2.04/wordrec/makechop.cpp +281 -0
- data/vendor/tesseract-2.04/wordrec/makechop.h +69 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.cpp +191 -0
- data/vendor/tesseract-2.04/wordrec/matchtab.h +45 -0
- data/vendor/tesseract-2.04/wordrec/matrix.cpp +118 -0
- data/vendor/tesseract-2.04/wordrec/matrix.h +104 -0
- data/vendor/tesseract-2.04/wordrec/measure.h +135 -0
- data/vendor/tesseract-2.04/wordrec/metrics.cpp +363 -0
- data/vendor/tesseract-2.04/wordrec/metrics.h +130 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.cpp +51 -0
- data/vendor/tesseract-2.04/wordrec/mfvars.h +27 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.cpp +110 -0
- data/vendor/tesseract-2.04/wordrec/msmenus.h +45 -0
- data/vendor/tesseract-2.04/wordrec/olutil.cpp +153 -0
- data/vendor/tesseract-2.04/wordrec/olutil.h +128 -0
- data/vendor/tesseract-2.04/wordrec/outlines.cpp +172 -0
- data/vendor/tesseract-2.04/wordrec/outlines.h +148 -0
- data/vendor/tesseract-2.04/wordrec/pieces.cpp +410 -0
- data/vendor/tesseract-2.04/wordrec/pieces.h +154 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.cpp +134 -0
- data/vendor/tesseract-2.04/wordrec/plotedges.h +71 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.cpp +116 -0
- data/vendor/tesseract-2.04/wordrec/plotseg.h +73 -0
- data/vendor/tesseract-2.04/wordrec/render.cpp +152 -0
- data/vendor/tesseract-2.04/wordrec/render.h +58 -0
- data/vendor/tesseract-2.04/wordrec/seam.cpp +482 -0
- data/vendor/tesseract-2.04/wordrec/seam.h +136 -0
- data/vendor/tesseract-2.04/wordrec/split.cpp +182 -0
- data/vendor/tesseract-2.04/wordrec/split.h +115 -0
- data/vendor/tesseract-2.04/wordrec/tally.cpp +68 -0
- data/vendor/tesseract-2.04/wordrec/tally.h +94 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.cpp +108 -0
- data/vendor/tesseract-2.04/wordrec/tessinit.h +46 -0
- data/vendor/tesseract-2.04/wordrec/tface.cpp +272 -0
- data/vendor/tesseract-2.04/wordrec/tface.h +35 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.cpp +284 -0
- data/vendor/tesseract-2.04/wordrec/wordclass.h +64 -0
- metadata +708 -0
@@ -0,0 +1,2958 @@
|
|
1
|
+
/******************************************************************************
|
2
|
+
** Filename: adaptmatch.c
|
3
|
+
** Purpose: High level adaptive matcher.
|
4
|
+
** Author: Dan Johnson
|
5
|
+
** History: Mon Mar 11 10:00:10 1991, DSJ, Created.
|
6
|
+
**
|
7
|
+
** (c) Copyright Hewlett-Packard Company, 1988.
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
9
|
+
** you may not use this file except in compliance with the License.
|
10
|
+
** You may obtain a copy of the License at
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
** See the License for the specific language governing permissions and
|
16
|
+
** limitations under the License.
|
17
|
+
******************************************************************************/
|
18
|
+
|
19
|
+
/**----------------------------------------------------------------------------
|
20
|
+
Include Files and Type Defines
|
21
|
+
----------------------------------------------------------------------------**/
|
22
|
+
#include <ctype.h>
|
23
|
+
#include "adaptmatch.h"
|
24
|
+
#include "normfeat.h"
|
25
|
+
#include "mfoutline.h"
|
26
|
+
#include "picofeat.h"
|
27
|
+
#include "float2int.h"
|
28
|
+
#include "outfeat.h"
|
29
|
+
#include "emalloc.h"
|
30
|
+
#include "intfx.h"
|
31
|
+
#include "permnum.h"
|
32
|
+
#include "speckle.h"
|
33
|
+
#include "efio.h"
|
34
|
+
#include "normmatch.h"
|
35
|
+
#include "stopper.h"
|
36
|
+
#include "permute.h"
|
37
|
+
#include "context.h"
|
38
|
+
#include "ndminx.h"
|
39
|
+
#include "intproto.h"
|
40
|
+
#include "const.h"
|
41
|
+
#include "globals.h"
|
42
|
+
#include "werd.h"
|
43
|
+
#include "callcpp.h"
|
44
|
+
#include "tordvars.h"
|
45
|
+
|
46
|
+
#include <stdio.h>
|
47
|
+
#include <string.h>
|
48
|
+
#include <ctype.h>
|
49
|
+
#include <stdlib.h>
|
50
|
+
#include <math.h>
|
51
|
+
#ifdef __UNIX__
|
52
|
+
#include <assert.h>
|
53
|
+
#endif
|
54
|
+
|
55
|
+
#define ADAPT_TEMPLATE_SUFFIX ".a"
|
56
|
+
#define BUILT_IN_TEMPLATES_FILE "inttemp"
|
57
|
+
#define BUILT_IN_CUTOFFS_FILE "pffmtable"
|
58
|
+
|
59
|
+
#define MAX_MATCHES 10
|
60
|
+
#define UNLIKELY_NUM_FEAT 200
|
61
|
+
#define NO_DEBUG 0
|
62
|
+
#define MAX_ADAPTABLE_WERD_SIZE 40
|
63
|
+
|
64
|
+
#define ADAPTABLE_WERD (GOOD_NUMBER + 0.05)
|
65
|
+
|
66
|
+
#define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT)
|
67
|
+
|
68
|
+
#define WORST_POSSIBLE_RATING (1.0)
|
69
|
+
|
70
|
+
typedef struct
|
71
|
+
{
|
72
|
+
inT32 BlobLength;
|
73
|
+
int NumMatches;
|
74
|
+
CLASS_ID Classes[MAX_NUM_CLASSES];
|
75
|
+
FLOAT32 Ratings[MAX_CLASS_ID + 1];
|
76
|
+
uinT8 Configs[MAX_CLASS_ID + 1];
|
77
|
+
FLOAT32 BestRating;
|
78
|
+
CLASS_ID BestClass;
|
79
|
+
uinT8 BestConfig;
|
80
|
+
CLASS_PRUNER_RESULTS CPResults;
|
81
|
+
}
|
82
|
+
|
83
|
+
|
84
|
+
ADAPT_RESULTS;
|
85
|
+
|
86
|
+
typedef struct
|
87
|
+
{
|
88
|
+
ADAPT_TEMPLATES Templates;
|
89
|
+
CLASS_ID ClassId;
|
90
|
+
int ConfigId;
|
91
|
+
}
|
92
|
+
|
93
|
+
|
94
|
+
PROTO_KEY;
|
95
|
+
|
96
|
+
/**----------------------------------------------------------------------------
|
97
|
+
Private Macros
|
98
|
+
----------------------------------------------------------------------------**/
|
99
|
+
#define MarginalMatch(Rating) \
|
100
|
+
((Rating) > GreatAdaptiveMatch)
|
101
|
+
|
102
|
+
#define TempConfigReliable(Config) \
|
103
|
+
((Config)->NumTimesSeen >= ReliableConfigThreshold)
|
104
|
+
|
105
|
+
#define InitIntFX() (FeaturesHaveBeenExtracted = FALSE)
|
106
|
+
|
107
|
+
/**----------------------------------------------------------------------------
|
108
|
+
Private Function Prototypes
|
109
|
+
----------------------------------------------------------------------------**/
|
110
|
+
void AdaptToChar(TBLOB *Blob,
|
111
|
+
LINE_STATS *LineStats,
|
112
|
+
CLASS_ID ClassId,
|
113
|
+
FLOAT32 Threshold);
|
114
|
+
|
115
|
+
void AdaptToPunc(TBLOB *Blob,
|
116
|
+
LINE_STATS *LineStats,
|
117
|
+
CLASS_ID ClassId,
|
118
|
+
FLOAT32 Threshold);
|
119
|
+
|
120
|
+
void AddNewResult(ADAPT_RESULTS *Results,
|
121
|
+
CLASS_ID ClassId,
|
122
|
+
FLOAT32 Rating,
|
123
|
+
int ConfigId);
|
124
|
+
|
125
|
+
void AmbigClassifier(TBLOB *Blob,
|
126
|
+
LINE_STATS *LineStats,
|
127
|
+
INT_TEMPLATES Templates,
|
128
|
+
UNICHAR_ID *Ambiguities,
|
129
|
+
ADAPT_RESULTS *Results);
|
130
|
+
|
131
|
+
UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
|
132
|
+
LINE_STATS *LineStats,
|
133
|
+
ADAPT_TEMPLATES Templates,
|
134
|
+
ADAPT_RESULTS *Results);
|
135
|
+
|
136
|
+
void make_config_pruner(INT_TEMPLATES templates, CONFIG_PRUNER *config_pruner);
|
137
|
+
|
138
|
+
void CharNormClassifier(TBLOB *Blob,
|
139
|
+
LINE_STATS *LineStats,
|
140
|
+
INT_TEMPLATES Templates,
|
141
|
+
ADAPT_RESULTS *Results);
|
142
|
+
|
143
|
+
void ClassifyAsNoise(TBLOB *Blob,
|
144
|
+
LINE_STATS *LineStats,
|
145
|
+
ADAPT_RESULTS *Results);
|
146
|
+
|
147
|
+
int CompareCurrentRatings(const void *arg1,
|
148
|
+
const void *arg2);
|
149
|
+
|
150
|
+
LIST ConvertMatchesToChoices(ADAPT_RESULTS *Results);
|
151
|
+
|
152
|
+
void DebugAdaptiveClassifier(TBLOB *Blob,
|
153
|
+
LINE_STATS *LineStats,
|
154
|
+
ADAPT_RESULTS *Results);
|
155
|
+
|
156
|
+
void DoAdaptiveMatch(TBLOB *Blob,
|
157
|
+
LINE_STATS *LineStats,
|
158
|
+
ADAPT_RESULTS *Results);
|
159
|
+
|
160
|
+
void GetAdaptThresholds(TWERD * Word,
|
161
|
+
LINE_STATS * LineStats,
|
162
|
+
const WERD_CHOICE& BestChoice,
|
163
|
+
const WERD_CHOICE& BestRawChoice, FLOAT32 Thresholds[]);
|
164
|
+
|
165
|
+
UNICHAR_ID *GetAmbiguities(TBLOB *Blob,
|
166
|
+
LINE_STATS *LineStats,
|
167
|
+
CLASS_ID CorrectClass);
|
168
|
+
|
169
|
+
int GetBaselineFeatures(TBLOB *Blob,
|
170
|
+
LINE_STATS *LineStats,
|
171
|
+
INT_TEMPLATES Templates,
|
172
|
+
INT_FEATURE_ARRAY IntFeatures,
|
173
|
+
CLASS_NORMALIZATION_ARRAY CharNormArray,
|
174
|
+
inT32 *BlobLength);
|
175
|
+
|
176
|
+
FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId);
|
177
|
+
|
178
|
+
int GetCharNormFeatures(TBLOB *Blob,
|
179
|
+
LINE_STATS *LineStats,
|
180
|
+
INT_TEMPLATES Templates,
|
181
|
+
INT_FEATURE_ARRAY IntFeatures,
|
182
|
+
CLASS_NORMALIZATION_ARRAY CharNormArray,
|
183
|
+
inT32 *BlobLength);
|
184
|
+
|
185
|
+
int GetIntBaselineFeatures(TBLOB *Blob,
|
186
|
+
LINE_STATS *LineStats,
|
187
|
+
INT_TEMPLATES Templates,
|
188
|
+
INT_FEATURE_ARRAY IntFeatures,
|
189
|
+
CLASS_NORMALIZATION_ARRAY CharNormArray,
|
190
|
+
inT32 *BlobLength);
|
191
|
+
|
192
|
+
int GetIntCharNormFeatures(TBLOB *Blob,
|
193
|
+
LINE_STATS *LineStats,
|
194
|
+
INT_TEMPLATES Templates,
|
195
|
+
INT_FEATURE_ARRAY IntFeatures,
|
196
|
+
CLASS_NORMALIZATION_ARRAY CharNormArray,
|
197
|
+
inT32 *BlobLength);
|
198
|
+
|
199
|
+
void InitMatcherRatings(register FLOAT32 *Rating);
|
200
|
+
|
201
|
+
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
|
202
|
+
CLASS_ID ClassId,
|
203
|
+
int NumFeatures,
|
204
|
+
INT_FEATURE_ARRAY Features,
|
205
|
+
FEATURE_SET FloatFeatures);
|
206
|
+
|
207
|
+
PROTO_ID MakeNewTempProtos(FEATURE_SET Features,
|
208
|
+
int NumBadFeat,
|
209
|
+
FEATURE_ID BadFeat[],
|
210
|
+
INT_CLASS IClass,
|
211
|
+
ADAPT_CLASS Class, BIT_VECTOR TempProtoMask);
|
212
|
+
|
213
|
+
void MakePermanent(ADAPT_TEMPLATES Templates,
|
214
|
+
CLASS_ID ClassId,
|
215
|
+
int ConfigId,
|
216
|
+
TBLOB *Blob,
|
217
|
+
LINE_STATS *LineStats);
|
218
|
+
|
219
|
+
int MakeTempProtoPerm(void *item1, void *item2);
|
220
|
+
|
221
|
+
int NumBlobsIn(TWERD *Word);
|
222
|
+
|
223
|
+
int NumOutlinesInBlob(TBLOB *Blob);
|
224
|
+
|
225
|
+
void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results);
|
226
|
+
|
227
|
+
void RemoveBadMatches(ADAPT_RESULTS *Results);
|
228
|
+
|
229
|
+
void RemoveExtraPuncs(ADAPT_RESULTS *Results);
|
230
|
+
|
231
|
+
void SetAdaptiveThreshold(FLOAT32 Threshold);
|
232
|
+
void ShowBestMatchFor(TBLOB *Blob,
|
233
|
+
LINE_STATS *LineStats,
|
234
|
+
CLASS_ID ClassId,
|
235
|
+
BOOL8 AdaptiveOn,
|
236
|
+
BOOL8 PreTrainedOn);
|
237
|
+
|
238
|
+
|
239
|
+
/**----------------------------------------------------------------------------
|
240
|
+
Global Data Definitions and Declarations
|
241
|
+
----------------------------------------------------------------------------**/
|
242
|
+
/* name of current image file being processed */
|
243
|
+
extern char imagefile[];
|
244
|
+
INT_VAR(tessedit_single_match, FALSE, "Top choice only from CP");
|
245
|
+
|
246
|
+
/* variables used to hold performance statistics */
|
247
|
+
static int AdaptiveMatcherCalls = 0;
|
248
|
+
static int BaselineClassifierCalls = 0;
|
249
|
+
static int CharNormClassifierCalls = 0;
|
250
|
+
static int AmbigClassifierCalls = 0;
|
251
|
+
static int NumWordsAdaptedTo = 0;
|
252
|
+
static int NumCharsAdaptedTo = 0;
|
253
|
+
static int NumBaselineClassesTried = 0;
|
254
|
+
static int NumCharNormClassesTried = 0;
|
255
|
+
static int NumAmbigClassesTried = 0;
|
256
|
+
static int NumClassesOutput = 0;
|
257
|
+
static int NumAdaptationsFailed = 0;
|
258
|
+
|
259
|
+
/* define globals used to hold onto extracted features. This is used
|
260
|
+
to map from the old scheme in which baseline features and char norm
|
261
|
+
features are extracted separately, to the new scheme in which they
|
262
|
+
are extracted at the same time. */
|
263
|
+
static BOOL8 FeaturesHaveBeenExtracted = FALSE;
|
264
|
+
static BOOL8 FeaturesOK = TRUE;
|
265
|
+
static INT_FEATURE_ARRAY BaselineFeatures;
|
266
|
+
static INT_FEATURE_ARRAY CharNormFeatures;
|
267
|
+
static INT_FX_RESULT_STRUCT FXInfo;
|
268
|
+
|
269
|
+
/* use a global variable to hold onto the current ratings so that the
|
270
|
+
comparison function passes to qsort can get at them */
|
271
|
+
static FLOAT32 *CurrentRatings;
|
272
|
+
|
273
|
+
/* define globals to hold filenames of training data */
|
274
|
+
static const char *BuiltInTemplatesFile = BUILT_IN_TEMPLATES_FILE;
|
275
|
+
static const char *BuiltInCutoffsFile = BUILT_IN_CUTOFFS_FILE;
|
276
|
+
static CLASS_CUTOFF_ARRAY CharNormCutoffs;
|
277
|
+
static CLASS_CUTOFF_ARRAY BaselineCutoffs;
|
278
|
+
|
279
|
+
/* use global variables to hold onto built-in templates and adapted
|
280
|
+
templates */
|
281
|
+
static INT_TEMPLATES PreTrainedTemplates;
|
282
|
+
static ADAPT_TEMPLATES AdaptedTemplates;
|
283
|
+
|
284
|
+
/* create dummy proto and config masks for use with the built-in templates */
|
285
|
+
static BIT_VECTOR AllProtosOn;
|
286
|
+
static BIT_VECTOR PrunedProtos;
|
287
|
+
static BIT_VECTOR AllConfigsOn;
|
288
|
+
static BIT_VECTOR AllProtosOff;
|
289
|
+
static BIT_VECTOR AllConfigsOff;
|
290
|
+
static BIT_VECTOR TempProtoMask;
|
291
|
+
|
292
|
+
/* define control knobs for adaptive matcher */
|
293
|
+
make_toggle_const(EnableAdaptiveMatcher, 1, MakeEnableAdaptiveMatcher);
|
294
|
+
/* PREV DEFAULT 0 */
|
295
|
+
|
296
|
+
make_toggle_const(UsePreAdaptedTemplates, 0, MakeUsePreAdaptedTemplates);
|
297
|
+
make_toggle_const(SaveAdaptedTemplates, 0, MakeSaveAdaptedTemplates);
|
298
|
+
|
299
|
+
make_toggle_var(EnableAdaptiveDebugger, 0, MakeEnableAdaptiveDebugger,
|
300
|
+
18, 1, SetEnableAdaptiveDebugger, "Enable match debugger");
|
301
|
+
|
302
|
+
make_int_var(MatcherDebugLevel, 0, MakeMatcherDebugLevel,
|
303
|
+
18, 2, SetMatcherDebugLevel, "Matcher Debug Level: ");
|
304
|
+
|
305
|
+
make_int_var(MatchDebugFlags, 0, MakeMatchDebugFlags,
|
306
|
+
18, 3, SetMatchDebugFlags, "Matcher Debug Flags: ");
|
307
|
+
|
308
|
+
make_toggle_var(EnableLearning, 1, MakeEnableLearning,
|
309
|
+
18, 4, SetEnableLearning, "Enable learning");
|
310
|
+
/* PREV DEFAULT 0 */
|
311
|
+
/*record it for multiple pages */
|
312
|
+
static int old_enable_learning = 1;
|
313
|
+
|
314
|
+
make_int_var(LearningDebugLevel, 0, MakeLearningDebugLevel,
|
315
|
+
18, 5, SetLearningDebugLevel, "Learning Debug Level: ");
|
316
|
+
|
317
|
+
make_float_var(GoodAdaptiveMatch, 0.125, MakeGoodAdaptiveMatch,
|
318
|
+
18, 6, SetGoodAdaptiveMatch, "Good Match (0-1): ");
|
319
|
+
|
320
|
+
make_float_var(GreatAdaptiveMatch, 0.0, MakeGreatAdaptiveMatch,
|
321
|
+
18, 7, SetGreatAdaptiveMatch, "Great Match (0-1): ");
|
322
|
+
/* PREV DEFAULT 0.10 */
|
323
|
+
|
324
|
+
make_float_var(PerfectRating, 0.02, MakePerfectRating,
|
325
|
+
18, 8, SetPerfectRating, "Perfect Match (0-1): ");
|
326
|
+
|
327
|
+
make_float_var(BadMatchPad, 0.15, MakeBadMatchPad,
|
328
|
+
18, 9, SetBadMatchPad, "Bad Match Pad (0-1): ");
|
329
|
+
|
330
|
+
make_float_var(RatingMargin, 0.1, MakeRatingMargin,
|
331
|
+
18, 10, SetRatingMargin, "New template margin (0-1): ");
|
332
|
+
|
333
|
+
make_float_var(NoiseBlobLength, 12.0, MakeNoiseBlobLength,
|
334
|
+
18, 11, SetNoiseBlobLength, "Avg. noise blob length: ");
|
335
|
+
|
336
|
+
make_int_var(MinNumPermClasses, 1, MakeMinNumPermClasses,
|
337
|
+
18, 12, SetMinNumPermClasses, "Min # of permanent classes: ");
|
338
|
+
/* PREV DEFAULT 200 */
|
339
|
+
|
340
|
+
make_int_var(ReliableConfigThreshold, 2, MakeReliableConfigThreshold,
|
341
|
+
18, 13, SetReliableConfigThreshold,
|
342
|
+
"Reliable Config Threshold: ");
|
343
|
+
|
344
|
+
make_float_var(MaxAngleDelta, 0.015, MakeMaxAngleDelta,
|
345
|
+
18, 14, SetMaxAngleDelta,
|
346
|
+
"Maximum angle delta for proto clustering: ");
|
347
|
+
|
348
|
+
make_toggle_var(EnableIntFX, 1, MakeEnableIntFX,
|
349
|
+
18, 15, SetEnableIntFX, "Enable integer fx");
|
350
|
+
/* PREV DEFAULT 0 */
|
351
|
+
|
352
|
+
make_toggle_var(EnableNewAdaptRules, 1, MakeEnableNewAdaptRules,
|
353
|
+
18, 16, SetEnableNewAdaptRules,
|
354
|
+
"Enable new adaptation rules");
|
355
|
+
/* PREV DEFAULT 0 */
|
356
|
+
|
357
|
+
make_float_var(RatingScale, 1.5, MakeRatingScale,
|
358
|
+
18, 17, SetRatingScale, "Rating scale: ");
|
359
|
+
|
360
|
+
make_float_var(CertaintyScale, 20.0, MakeCertaintyScale,
|
361
|
+
18, 18, SetCertaintyScale, "CertaintyScale: ");
|
362
|
+
|
363
|
+
make_int_var(FailedAdaptionsBeforeReset, 150, MakeFailedAdaptionsBeforeReset,
|
364
|
+
18, 19, SetFailedAdaptionsBeforeReset,
|
365
|
+
"Number of failed adaptions before adapted templates reset: ");
|
366
|
+
double_VAR(tessedit_class_miss_scale, 0.00390625,
|
367
|
+
"Scale factor for features not used");
|
368
|
+
|
369
|
+
int tess_cn_matching = 0;
|
370
|
+
int tess_bn_matching = 0;
|
371
|
+
|
372
|
+
/**----------------------------------------------------------------------------
|
373
|
+
Public Code
|
374
|
+
----------------------------------------------------------------------------**/
|
375
|
+
/*---------------------------------------------------------------------------*/
|
376
|
+
LIST AdaptiveClassifier(TBLOB *Blob, TBLOB *DotBlob, TEXTROW *Row) {
|
377
|
+
/*
|
378
|
+
** Parameters:
|
379
|
+
** Blob blob to be classified
|
380
|
+
** DotBlob (obsolete)
|
381
|
+
** Row row of text that word appears in
|
382
|
+
** Globals:
|
383
|
+
** CurrentRatings
|
384
|
+
used by compare function for qsort
|
385
|
+
** Operation: This routine calls the adaptive matcher which returns
|
386
|
+
** (in an array) the class id of each class matched. It also
|
387
|
+
** returns the number of classes matched.
|
388
|
+
** For each class matched it places the best rating
|
389
|
+
** found for that class into the Ratings array.
|
390
|
+
** Bad matches are then removed so that they don't need to be
|
391
|
+
** sorted. The remaining good matches are then sorted and
|
392
|
+
** converted to choices.
|
393
|
+
** This routine also performs some simple speckle filtering.
|
394
|
+
** Return: List of choices found by adaptive matcher.
|
395
|
+
** Exceptions: none
|
396
|
+
** History: Mon Mar 11 10:00:58 1991, DSJ, Created.
|
397
|
+
*/
|
398
|
+
LIST Choices;
|
399
|
+
ADAPT_RESULTS* Results = new ADAPT_RESULTS;
|
400
|
+
LINE_STATS LineStats;
|
401
|
+
|
402
|
+
if (FailedAdaptionsBeforeReset >= 0 &&
|
403
|
+
NumAdaptationsFailed >= FailedAdaptionsBeforeReset) {
|
404
|
+
NumAdaptationsFailed = 0;
|
405
|
+
ResetAdaptiveClassifier();
|
406
|
+
}
|
407
|
+
if (AdaptedTemplates == NULL)
|
408
|
+
AdaptedTemplates = NewAdaptedTemplates ();
|
409
|
+
EnterClassifyMode;
|
410
|
+
|
411
|
+
Results->BlobLength = MAX_INT32;
|
412
|
+
Results->NumMatches = 0;
|
413
|
+
Results->BestRating = WORST_POSSIBLE_RATING;
|
414
|
+
Results->BestClass = NO_CLASS;
|
415
|
+
Results->BestConfig = 0;
|
416
|
+
GetLineStatsFromRow(Row, &LineStats);
|
417
|
+
InitMatcherRatings (Results->Ratings);
|
418
|
+
|
419
|
+
DoAdaptiveMatch(Blob, &LineStats, Results);
|
420
|
+
RemoveBadMatches(Results);
|
421
|
+
|
422
|
+
/* save ratings in a global so that CompareCurrentRatings() can see them */
|
423
|
+
CurrentRatings = Results->Ratings;
|
424
|
+
qsort((void*) (Results->Classes), Results->NumMatches,
|
425
|
+
sizeof (CLASS_ID), CompareCurrentRatings);
|
426
|
+
RemoveExtraPuncs(Results);
|
427
|
+
Choices = ConvertMatchesToChoices(Results);
|
428
|
+
|
429
|
+
if (MatcherDebugLevel >= 1) {
|
430
|
+
cprintf ("AD Matches = ");
|
431
|
+
PrintAdaptiveMatchResults(stdout, Results);
|
432
|
+
}
|
433
|
+
|
434
|
+
if (LargeSpeckle (Blob, Row))
|
435
|
+
Choices = AddLargeSpeckleTo (Choices);
|
436
|
+
|
437
|
+
#ifndef GRAPHICS_DISABLED
|
438
|
+
if (EnableAdaptiveDebugger)
|
439
|
+
DebugAdaptiveClassifier(Blob, &LineStats, Results);
|
440
|
+
#endif
|
441
|
+
|
442
|
+
NumClassesOutput += count (Choices);
|
443
|
+
if (Choices == NIL) {
|
444
|
+
char empty_lengths[] = {0};
|
445
|
+
if (!bln_numericmode)
|
446
|
+
tprintf ("Nil classification!\n"); // Should never normally happen.
|
447
|
+
return (append_choice (NIL, "", empty_lengths, 50.0f, -20.0f, -1));
|
448
|
+
}
|
449
|
+
|
450
|
+
delete Results;
|
451
|
+
return Choices;
|
452
|
+
} /* AdaptiveClassifier */
|
453
|
+
|
454
|
+
|
455
|
+
/*---------------------------------------------------------------------------*/
|
456
|
+
void AdaptToWord(TWERD *Word,
|
457
|
+
TEXTROW *Row,
|
458
|
+
const WERD_CHOICE& BestChoice,
|
459
|
+
const WERD_CHOICE& BestRawChoice,
|
460
|
+
const char *rejmap) {
|
461
|
+
/*
|
462
|
+
** Parameters:
|
463
|
+
** Word
|
464
|
+
word to be adapted to
|
465
|
+
** Row
|
466
|
+
row of text that word is found in
|
467
|
+
** BestChoice
|
468
|
+
best choice for word found by system
|
469
|
+
** BestRawChoice
|
470
|
+
best choice for word found by classifier only
|
471
|
+
** Globals:
|
472
|
+
** EnableLearning
|
473
|
+
TRUE if learning is enabled
|
474
|
+
** Operation: This routine implements a preliminary version of the
|
475
|
+
** rules which are used to decide which characters to adapt to.
|
476
|
+
** A word is adapted to if it is in the dictionary or if it
|
477
|
+
** is a "good" number (no trailing units, etc.). It cannot
|
478
|
+
** contain broken or merged characters. Within that word, only
|
479
|
+
** letters and digits are adapted to (no punctuation).
|
480
|
+
** Return: none
|
481
|
+
** Exceptions: none
|
482
|
+
** History: Thu Mar 14 07:40:36 1991, DSJ, Created.
|
483
|
+
*/
|
484
|
+
TBLOB *Blob;
|
485
|
+
LINE_STATS LineStats;
|
486
|
+
FLOAT32 Thresholds[MAX_ADAPTABLE_WERD_SIZE];
|
487
|
+
FLOAT32 *Threshold;
|
488
|
+
const char *map = rejmap;
|
489
|
+
char map_char = '1';
|
490
|
+
const char* BestChoice_string = BestChoice.string().string();
|
491
|
+
const char* BestChoice_lengths = BestChoice.lengths().string();
|
492
|
+
|
493
|
+
if (strlen(BestChoice_lengths) > MAX_ADAPTABLE_WERD_SIZE)
|
494
|
+
return;
|
495
|
+
|
496
|
+
if (EnableLearning) {
|
497
|
+
NumWordsAdaptedTo++;
|
498
|
+
|
499
|
+
#ifndef SECURE_NAMES
|
500
|
+
if (LearningDebugLevel >= 1)
|
501
|
+
cprintf ("\n\nAdapting to word = %s\n", BestChoice.string().string());
|
502
|
+
#endif
|
503
|
+
GetLineStatsFromRow(Row, &LineStats);
|
504
|
+
|
505
|
+
GetAdaptThresholds(Word,
|
506
|
+
&LineStats,
|
507
|
+
BestChoice,
|
508
|
+
BestRawChoice,
|
509
|
+
Thresholds);
|
510
|
+
|
511
|
+
for (Blob = Word->blobs, Threshold = Thresholds; Blob != NULL;
|
512
|
+
Blob = Blob->next, BestChoice_string += *(BestChoice_lengths++),
|
513
|
+
Threshold++) {
|
514
|
+
InitIntFX();
|
515
|
+
|
516
|
+
if (rejmap != NULL)
|
517
|
+
map_char = *map++;
|
518
|
+
|
519
|
+
assert (map_char == '1' || map_char == '0');
|
520
|
+
|
521
|
+
if (map_char == '1') {
|
522
|
+
|
523
|
+
// if (unicharset.get_isalpha (BestChoice_string, *BestChoice_lengths) ||
|
524
|
+
// unicharset.get_isdigit (BestChoice_string, *BestChoice_lengths)) {
|
525
|
+
/* SPECIAL RULE: don't adapt to an 'i' which is the first char
|
526
|
+
in a word because they are too ambiguous with 'I'.
|
527
|
+
The new adaptation rules should account for this
|
528
|
+
automatically, since they exclude ambiguous words from
|
529
|
+
adaptation, but for safety's sake we'll leave the rule in.
|
530
|
+
Also, don't adapt to i's that have only 1 blob in them
|
531
|
+
because this creates too much ambiguity for broken
|
532
|
+
characters. */
|
533
|
+
if (*BestChoice_lengths == 1 &&
|
534
|
+
(*BestChoice_string == 'i'
|
535
|
+
|| (il1_adaption_test && *BestChoice_string == 'I' &&
|
536
|
+
(Blob->next == NULL ||
|
537
|
+
unicharset.get_islower (BestChoice_string + *BestChoice_lengths,
|
538
|
+
*(BestChoice_lengths + 1)))))
|
539
|
+
&& (Blob == Word->blobs
|
540
|
+
|| (!(unicharset.get_isalpha (BestChoice_string -
|
541
|
+
*(BestChoice_lengths - 1),
|
542
|
+
*(BestChoice_lengths - 1)) ||
|
543
|
+
unicharset.get_isdigit (BestChoice_string -
|
544
|
+
*(BestChoice_lengths - 1),
|
545
|
+
*(BestChoice_lengths - 1))))
|
546
|
+
|
547
|
+
|| (!il1_adaption_test && NumOutlinesInBlob(Blob) != 2))) {
|
548
|
+
if (LearningDebugLevel >= 1)
|
549
|
+
cprintf ("Rejecting char = %s\n", unicharset.id_to_unichar(
|
550
|
+
unicharset.unichar_to_id(BestChoice_string,
|
551
|
+
*BestChoice_lengths)));
|
552
|
+
}
|
553
|
+
else {
|
554
|
+
#ifndef SECURE_NAMES
|
555
|
+
if (LearningDebugLevel >= 1)
|
556
|
+
cprintf ("Adapting to char = %s, thr= %g\n",
|
557
|
+
unicharset.id_to_unichar(
|
558
|
+
unicharset.unichar_to_id(BestChoice_string,
|
559
|
+
*BestChoice_lengths)),
|
560
|
+
*Threshold);
|
561
|
+
#endif
|
562
|
+
AdaptToChar(Blob, &LineStats,
|
563
|
+
unicharset.unichar_to_id(BestChoice_string,
|
564
|
+
*BestChoice_lengths),
|
565
|
+
*Threshold);
|
566
|
+
}
|
567
|
+
// }
|
568
|
+
// else
|
569
|
+
// AdaptToPunc(Blob, &LineStats,
|
570
|
+
// unicharset.unichar_to_id(BestChoice_string,
|
571
|
+
// *BestChoice_lengths),
|
572
|
+
// *Threshold);
|
573
|
+
}
|
574
|
+
}
|
575
|
+
if (LearningDebugLevel >= 1)
|
576
|
+
cprintf ("\n");
|
577
|
+
}
|
578
|
+
} /* AdaptToWord */
|
579
|
+
|
580
|
+
|
581
|
+
/*---------------------------------------------------------------------------*/
|
582
|
+
void EndAdaptiveClassifier() {
|
583
|
+
/*
|
584
|
+
** Parameters: none
|
585
|
+
** Globals:
|
586
|
+
** AdaptedTemplates
|
587
|
+
current set of adapted templates
|
588
|
+
** SaveAdaptedTemplates
|
589
|
+
TRUE if templates should be saved
|
590
|
+
** EnableAdaptiveMatcher
|
591
|
+
TRUE if adaptive matcher is enabled
|
592
|
+
** Operation: This routine performs cleanup operations on the
|
593
|
+
** adaptive classifier. It should be called before the
|
594
|
+
** program is terminated. Its main function is to save
|
595
|
+
** the adapted templates to a file.
|
596
|
+
** Return: none
|
597
|
+
** Exceptions: none
|
598
|
+
** History: Tue Mar 19 14:37:06 1991, DSJ, Created.
|
599
|
+
*/
|
600
|
+
char Filename[256];
|
601
|
+
FILE *File;
|
602
|
+
|
603
|
+
#ifndef SECURE_NAMES
|
604
|
+
if (EnableAdaptiveMatcher && SaveAdaptedTemplates) {
|
605
|
+
strcpy(Filename, imagefile);
|
606
|
+
strcat(Filename, ADAPT_TEMPLATE_SUFFIX);
|
607
|
+
File = fopen (Filename, "wb");
|
608
|
+
if (File == NULL)
|
609
|
+
cprintf ("Unable to save adapted templates to %s!\n", Filename);
|
610
|
+
else {
|
611
|
+
cprintf ("\nSaving adapted templates to %s ...", Filename);
|
612
|
+
fflush(stdout);
|
613
|
+
WriteAdaptedTemplates(File, AdaptedTemplates);
|
614
|
+
cprintf ("\n");
|
615
|
+
fclose(File);
|
616
|
+
}
|
617
|
+
}
|
618
|
+
#endif
|
619
|
+
if (PreTrainedTemplates == NULL)
|
620
|
+
return; // This function isn't safe to run twice.
|
621
|
+
EndDangerousAmbigs();
|
622
|
+
FreeNormProtos();
|
623
|
+
free_int_templates(PreTrainedTemplates);
|
624
|
+
PreTrainedTemplates = NULL;
|
625
|
+
FreeBitVector(AllProtosOn);
|
626
|
+
FreeBitVector(PrunedProtos);
|
627
|
+
FreeBitVector(AllConfigsOn);
|
628
|
+
FreeBitVector(AllProtosOff);
|
629
|
+
FreeBitVector(AllConfigsOff);
|
630
|
+
FreeBitVector(TempProtoMask);
|
631
|
+
AllProtosOn = NULL;
|
632
|
+
PrunedProtos = NULL;
|
633
|
+
AllConfigsOn = NULL;
|
634
|
+
AllProtosOff = NULL;
|
635
|
+
AllConfigsOff = NULL;
|
636
|
+
TempProtoMask = NULL;
|
637
|
+
} /* EndAdaptiveClassifier */
|
638
|
+
|
639
|
+
|
640
|
+
/*---------------------------------------------------------------------------*/
|
641
|
+
void InitAdaptiveClassifier() {
|
642
|
+
/*
|
643
|
+
** Parameters: none
|
644
|
+
** Globals:
|
645
|
+
** BuiltInTemplatesFile
|
646
|
+
file to get built-in temps from
|
647
|
+
** BuiltInCutoffsFile
|
648
|
+
file to get avg. feat per class from
|
649
|
+
** PreTrainedTemplates
|
650
|
+
pre-trained configs and protos
|
651
|
+
** AdaptedTemplates
|
652
|
+
templates adapted to current page
|
653
|
+
** CharNormCutoffs
|
654
|
+
avg # of features per class
|
655
|
+
** AllProtosOn
|
656
|
+
dummy proto mask with all bits 1
|
657
|
+
** AllConfigsOn
|
658
|
+
dummy config mask with all bits 1
|
659
|
+
** UsePreAdaptedTemplates
|
660
|
+
enables use of pre-adapted templates
|
661
|
+
** Operation: This routine reads in the training information needed
|
662
|
+
** by the adaptive classifier and saves it into global
|
663
|
+
** variables.
|
664
|
+
** Return: none
|
665
|
+
** Exceptions: none
|
666
|
+
** History: Mon Mar 11 12:49:34 1991, DSJ, Created.
|
667
|
+
*/
|
668
|
+
int i;
|
669
|
+
FILE *File;
|
670
|
+
STRING Filename;
|
671
|
+
|
672
|
+
if (!EnableAdaptiveMatcher)
|
673
|
+
return;
|
674
|
+
if (PreTrainedTemplates != NULL)
|
675
|
+
EndAdaptiveClassifier(); // Don't leak with multiple inits.
|
676
|
+
|
677
|
+
Filename = language_data_path_prefix;
|
678
|
+
Filename += BuiltInTemplatesFile;
|
679
|
+
#ifndef SECURE_NAMES
|
680
|
+
// cprintf( "\nReading built-in templates from %s ...",
|
681
|
+
// Filename);
|
682
|
+
fflush(stdout);
|
683
|
+
#endif
|
684
|
+
|
685
|
+
#ifdef __UNIX__
|
686
|
+
File = Efopen (Filename.string(), "r");
|
687
|
+
#else
|
688
|
+
File = Efopen (Filename.string(), "rb");
|
689
|
+
#endif
|
690
|
+
PreTrainedTemplates = ReadIntTemplates (File, TRUE);
|
691
|
+
fclose(File);
|
692
|
+
|
693
|
+
Filename = language_data_path_prefix;
|
694
|
+
Filename += BuiltInCutoffsFile;
|
695
|
+
#ifndef SECURE_NAMES
|
696
|
+
// cprintf( "\nReading built-in pico-feature cutoffs from %s ...",
|
697
|
+
// Filename);
|
698
|
+
fflush(stdout);
|
699
|
+
#endif
|
700
|
+
ReadNewCutoffs (Filename.string(), PreTrainedTemplates->IndexFor,
|
701
|
+
CharNormCutoffs);
|
702
|
+
|
703
|
+
GetNormProtos();
|
704
|
+
|
705
|
+
InitIntegerMatcher();
|
706
|
+
InitIntegerFX();
|
707
|
+
|
708
|
+
AllProtosOn = NewBitVector(MAX_NUM_PROTOS);
|
709
|
+
PrunedProtos = NewBitVector(MAX_NUM_PROTOS);
|
710
|
+
AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS);
|
711
|
+
AllProtosOff = NewBitVector(MAX_NUM_PROTOS);
|
712
|
+
AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS);
|
713
|
+
TempProtoMask = NewBitVector(MAX_NUM_PROTOS);
|
714
|
+
set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS));
|
715
|
+
set_all_bits(PrunedProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
|
716
|
+
set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS));
|
717
|
+
zero_all_bits(AllProtosOff, WordsInVectorOfSize(MAX_NUM_PROTOS));
|
718
|
+
zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS));
|
719
|
+
|
720
|
+
if (UsePreAdaptedTemplates) {
|
721
|
+
Filename = imagefile;
|
722
|
+
Filename += ADAPT_TEMPLATE_SUFFIX;
|
723
|
+
File = fopen (Filename.string(), "rb");
|
724
|
+
if (File == NULL)
|
725
|
+
AdaptedTemplates = NewAdaptedTemplates ();
|
726
|
+
else {
|
727
|
+
#ifndef SECURE_NAMES
|
728
|
+
cprintf ("\nReading pre-adapted templates from %s ...", Filename.string());
|
729
|
+
fflush(stdout);
|
730
|
+
#endif
|
731
|
+
AdaptedTemplates = ReadAdaptedTemplates (File);
|
732
|
+
cprintf ("\n");
|
733
|
+
fclose(File);
|
734
|
+
PrintAdaptedTemplates(stdout, AdaptedTemplates);
|
735
|
+
|
736
|
+
for (i = 0; i < (AdaptedTemplates->Templates)->NumClasses; i++) {
|
737
|
+
BaselineCutoffs[i] =
|
738
|
+
CharNormCutoffs[PreTrainedTemplates->IndexFor[
|
739
|
+
AdaptedTemplates->Templates->ClassIdFor[i]]];
|
740
|
+
}
|
741
|
+
}
|
742
|
+
} else {
|
743
|
+
if (AdaptedTemplates != NULL)
|
744
|
+
free_adapted_templates(AdaptedTemplates);
|
745
|
+
AdaptedTemplates = NewAdaptedTemplates ();
|
746
|
+
}
|
747
|
+
old_enable_learning = EnableLearning;
|
748
|
+
|
749
|
+
} /* InitAdaptiveClassifier */
|
750
|
+
|
751
|
+
void ResetAdaptiveClassifier() {
|
752
|
+
free_adapted_templates(AdaptedTemplates);
|
753
|
+
AdaptedTemplates = NULL;
|
754
|
+
}
|
755
|
+
|
756
|
+
|
757
|
+
/*---------------------------------------------------------------------------*/
|
758
|
+
void InitAdaptiveClassifierVars() {
|
759
|
+
/*
|
760
|
+
** Parameters: none
|
761
|
+
** Globals: none
|
762
|
+
** Operation: This routine installs the control knobs used by the
|
763
|
+
** adaptive matcher.
|
764
|
+
** Return: none
|
765
|
+
** Exceptions: none
|
766
|
+
** History: Mon Mar 11 12:49:34 1991, DSJ, Created.
|
767
|
+
*/
|
768
|
+
VALUE dummy;
|
769
|
+
|
770
|
+
string_variable (BuiltInTemplatesFile, "BuiltInTemplatesFile",
|
771
|
+
BUILT_IN_TEMPLATES_FILE);
|
772
|
+
string_variable (BuiltInCutoffsFile, "BuiltInCutoffsFile",
|
773
|
+
BUILT_IN_CUTOFFS_FILE);
|
774
|
+
|
775
|
+
MakeEnableAdaptiveMatcher();
|
776
|
+
MakeUsePreAdaptedTemplates();
|
777
|
+
MakeSaveAdaptedTemplates();
|
778
|
+
|
779
|
+
MakeEnableLearning();
|
780
|
+
MakeEnableAdaptiveDebugger();
|
781
|
+
MakeBadMatchPad();
|
782
|
+
MakeGoodAdaptiveMatch();
|
783
|
+
MakeGreatAdaptiveMatch();
|
784
|
+
MakeNoiseBlobLength();
|
785
|
+
MakeMinNumPermClasses();
|
786
|
+
MakeReliableConfigThreshold();
|
787
|
+
MakeMaxAngleDelta();
|
788
|
+
MakeLearningDebugLevel();
|
789
|
+
MakeMatcherDebugLevel();
|
790
|
+
MakeMatchDebugFlags();
|
791
|
+
MakeRatingMargin();
|
792
|
+
MakePerfectRating();
|
793
|
+
MakeEnableIntFX();
|
794
|
+
MakeEnableNewAdaptRules();
|
795
|
+
MakeRatingScale();
|
796
|
+
MakeCertaintyScale();
|
797
|
+
MakeFailedAdaptionsBeforeReset();
|
798
|
+
|
799
|
+
InitPicoFXVars();
|
800
|
+
InitOutlineFXVars(); //?
|
801
|
+
|
802
|
+
} /* InitAdaptiveClassifierVars */
|
803
|
+
|
804
|
+
|
805
|
+
/*---------------------------------------------------------------------------*/
|
806
|
+
void PrintAdaptiveStatistics(FILE *File) {
|
807
|
+
/*
|
808
|
+
** Parameters:
|
809
|
+
** File
|
810
|
+
open text file to print adaptive statistics to
|
811
|
+
** Globals: none
|
812
|
+
** Operation: Print to File the statistics which have been gathered
|
813
|
+
** for the adaptive matcher.
|
814
|
+
** Return: none
|
815
|
+
** Exceptions: none
|
816
|
+
** History: Thu Apr 18 14:37:37 1991, DSJ, Created.
|
817
|
+
*/
|
818
|
+
#ifndef SECURE_NAMES
|
819
|
+
|
820
|
+
fprintf (File, "\nADAPTIVE MATCHER STATISTICS:\n");
|
821
|
+
fprintf (File, "\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
|
822
|
+
fprintf (File, "\tNum classes output = %d (Avg = %4.2f)\n",
|
823
|
+
NumClassesOutput,
|
824
|
+
((AdaptiveMatcherCalls == 0) ? (0.0) :
|
825
|
+
((float) NumClassesOutput / AdaptiveMatcherCalls)));
|
826
|
+
fprintf (File, "\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
|
827
|
+
BaselineClassifierCalls,
|
828
|
+
((BaselineClassifierCalls == 0) ? (0.0) :
|
829
|
+
((float) NumBaselineClassesTried / BaselineClassifierCalls)));
|
830
|
+
fprintf (File, "\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
|
831
|
+
CharNormClassifierCalls,
|
832
|
+
((CharNormClassifierCalls == 0) ? (0.0) :
|
833
|
+
((float) NumCharNormClassesTried / CharNormClassifierCalls)));
|
834
|
+
fprintf (File, "\t\tAmbig Classifier: %4d calls (%4.2f classes/call)\n",
|
835
|
+
AmbigClassifierCalls,
|
836
|
+
((AmbigClassifierCalls == 0) ? (0.0) :
|
837
|
+
((float) NumAmbigClassesTried / AmbigClassifierCalls)));
|
838
|
+
|
839
|
+
fprintf (File, "\nADAPTIVE LEARNER STATISTICS:\n");
|
840
|
+
fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
|
841
|
+
fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
|
842
|
+
|
843
|
+
if (UsePreAdaptedTemplates)
|
844
|
+
PrintAdaptedTemplates(File, AdaptedTemplates);
|
845
|
+
#endif
|
846
|
+
} /* PrintAdaptiveStatistics */
|
847
|
+
|
848
|
+
|
849
|
+
/*---------------------------------------------------------------------------*/
|
850
|
+
void SettupPass1() {
|
851
|
+
/*
|
852
|
+
** Parameters: none
|
853
|
+
** Globals:
|
854
|
+
** EnableLearning
|
855
|
+
set to TRUE by this routine
|
856
|
+
** Operation: This routine prepares the adaptive matcher for the start
|
857
|
+
** of the first pass. Learning is enabled (unless it is
|
858
|
+
** disabled for the whole program).
|
859
|
+
** Return: none
|
860
|
+
** Exceptions: none
|
861
|
+
** History: Mon Apr 15 16:39:29 1991, DSJ, Created.
|
862
|
+
*/
|
863
|
+
/* Note: this is somewhat redundant, it simply says that if learning is
|
864
|
+
enabled then it will remain enabled on the first pass. If it is
|
865
|
+
disabled, then it will remain disabled. This is only put here to
|
866
|
+
make it very clear that learning is controlled directly by the global
|
867
|
+
setting of EnableLearning. */
|
868
|
+
EnableLearning = old_enable_learning;
|
869
|
+
|
870
|
+
SettupStopperPass1();
|
871
|
+
|
872
|
+
} /* SettupPass1 */
|
873
|
+
|
874
|
+
|
875
|
+
/*---------------------------------------------------------------------------*/
|
876
|
+
void SettupPass2() {
|
877
|
+
/*
|
878
|
+
** Parameters: none
|
879
|
+
** Globals:
|
880
|
+
** EnableLearning
|
881
|
+
set to FALSE by this routine
|
882
|
+
** Operation: This routine prepares the adaptive matcher for the start
|
883
|
+
** of the second pass. Further learning is disabled.
|
884
|
+
** Return: none
|
885
|
+
** Exceptions: none
|
886
|
+
** History: Mon Apr 15 16:39:29 1991, DSJ, Created.
|
887
|
+
*/
|
888
|
+
EnableLearning = FALSE;
|
889
|
+
SettupStopperPass2();
|
890
|
+
|
891
|
+
} /* SettupPass2 */
|
892
|
+
|
893
|
+
|
894
|
+
/*---------------------------------------------------------------------------*/
|
895
|
+
void MakeNewAdaptedClass(TBLOB *Blob,
|
896
|
+
LINE_STATS *LineStats,
|
897
|
+
CLASS_ID ClassId,
|
898
|
+
ADAPT_TEMPLATES Templates) {
|
899
|
+
/*
|
900
|
+
** Parameters:
|
901
|
+
** Blob
|
902
|
+
blob to model new class after
|
903
|
+
** LineStats
|
904
|
+
statistics for text row blob is in
|
905
|
+
** ClassId
|
906
|
+
id of new class to be created
|
907
|
+
** Templates
|
908
|
+
adapted templates to add new class to
|
909
|
+
** Globals:
|
910
|
+
** AllProtosOn
|
911
|
+
dummy mask with all 1's
|
912
|
+
** BaselineCutoffs
|
913
|
+
kludge needed to get cutoffs
|
914
|
+
** PreTrainedTemplates
|
915
|
+
kludge needed to get cutoffs
|
916
|
+
** Operation: This routine creates a new adapted class and uses Blob
|
917
|
+
** as the model for the first config in that class.
|
918
|
+
** Return: none
|
919
|
+
** Exceptions: none
|
920
|
+
** History: Thu Mar 14 12:49:39 1991, DSJ, Created.
|
921
|
+
*/
|
922
|
+
FEATURE_SET Features;
|
923
|
+
int Fid, Pid;
|
924
|
+
FEATURE Feature;
|
925
|
+
int NumFeatures;
|
926
|
+
TEMP_PROTO TempProto;
|
927
|
+
PROTO Proto;
|
928
|
+
ADAPT_CLASS Class;
|
929
|
+
INT_CLASS IClass;
|
930
|
+
CLASS_INDEX ClassIndex;
|
931
|
+
TEMP_CONFIG Config;
|
932
|
+
|
933
|
+
NormMethod = baseline;
|
934
|
+
Features = ExtractOutlineFeatures (Blob, LineStats);
|
935
|
+
NumFeatures = Features->NumFeatures;
|
936
|
+
if (NumFeatures > UNLIKELY_NUM_FEAT) {
|
937
|
+
FreeFeatureSet(Features);
|
938
|
+
return;
|
939
|
+
}
|
940
|
+
|
941
|
+
Class = NewAdaptedClass ();
|
942
|
+
ClassIndex = AddAdaptedClass (Templates, Class, ClassId);
|
943
|
+
Config = NewTempConfig (NumFeatures - 1);
|
944
|
+
TempConfigFor (Class, 0) = Config;
|
945
|
+
|
946
|
+
/* this is a kludge to construct cutoffs for adapted templates */
|
947
|
+
if (Templates == AdaptedTemplates)
|
948
|
+
BaselineCutoffs[ClassIndex] =
|
949
|
+
CharNormCutoffs[PreTrainedTemplates->IndexFor[ClassId]];
|
950
|
+
|
951
|
+
IClass = ClassForClassId (Templates->Templates, ClassId);
|
952
|
+
|
953
|
+
for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
|
954
|
+
Pid = AddIntProto (IClass);
|
955
|
+
assert (Pid != NO_PROTO);
|
956
|
+
|
957
|
+
Feature = Features->Features[Fid];
|
958
|
+
TempProto = NewTempProto ();
|
959
|
+
Proto = &(TempProto->Proto);
|
960
|
+
|
961
|
+
/* compute proto params - NOTE that Y_DIM_OFFSET must be used because
|
962
|
+
ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
|
963
|
+
instead of the -0.25 to 0.75 used in baseline normalization */
|
964
|
+
Proto->Angle = Feature->Params[OutlineFeatDir];
|
965
|
+
Proto->X = Feature->Params[OutlineFeatX];
|
966
|
+
Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
|
967
|
+
Proto->Length = Feature->Params[OutlineFeatLength];
|
968
|
+
FillABC(Proto);
|
969
|
+
|
970
|
+
TempProto->ProtoId = Pid;
|
971
|
+
SET_BIT (Config->Protos, Pid);
|
972
|
+
|
973
|
+
ConvertProto(Proto, Pid, IClass);
|
974
|
+
AddProtoToProtoPruner(Proto, Pid, IClass);
|
975
|
+
|
976
|
+
Class->TempProtos = push (Class->TempProtos, TempProto);
|
977
|
+
}
|
978
|
+
FreeFeatureSet(Features);
|
979
|
+
|
980
|
+
AddIntConfig(IClass);
|
981
|
+
ConvertConfig (AllProtosOn, 0, IClass);
|
982
|
+
|
983
|
+
if (LearningDebugLevel >= 1) {
|
984
|
+
cprintf ("Added new class '%s' with index %d and %d protos.\n",
|
985
|
+
unicharset.id_to_unichar(ClassId), ClassIndex, NumFeatures);
|
986
|
+
}
|
987
|
+
} /* MakeNewAdaptedClass */
|
988
|
+
|
989
|
+
|
990
|
+
/*---------------------------------------------------------------------------*/
|
991
|
+
int GetAdaptiveFeatures(TBLOB *Blob,
|
992
|
+
LINE_STATS *LineStats,
|
993
|
+
INT_FEATURE_ARRAY IntFeatures,
|
994
|
+
FEATURE_SET *FloatFeatures) {
|
995
|
+
/*
|
996
|
+
** Parameters:
|
997
|
+
** Blob
|
998
|
+
blob to extract features from
|
999
|
+
** LineStats
|
1000
|
+
statistics about text row blob is in
|
1001
|
+
** IntFeatures
|
1002
|
+
array to fill with integer features
|
1003
|
+
** FloatFeatures
|
1004
|
+
place to return actual floating-pt features
|
1005
|
+
** Globals: none
|
1006
|
+
** Operation: This routine sets up the feature extractor to extract
|
1007
|
+
** baseline normalized pico-features.
|
1008
|
+
** The extracted pico-features are converted
|
1009
|
+
** to integer form and placed in IntFeatures. The original
|
1010
|
+
** floating-pt. features are returned in FloatFeatures.
|
1011
|
+
** Return: Number of pico-features returned (0 if an error occurred)
|
1012
|
+
** Exceptions: none
|
1013
|
+
** History: Tue Mar 12 17:55:18 1991, DSJ, Created.
|
1014
|
+
*/
|
1015
|
+
FEATURE_SET Features;
|
1016
|
+
int NumFeatures;
|
1017
|
+
|
1018
|
+
NormMethod = baseline;
|
1019
|
+
Features = ExtractPicoFeatures (Blob, LineStats);
|
1020
|
+
|
1021
|
+
NumFeatures = Features->NumFeatures;
|
1022
|
+
if (NumFeatures > UNLIKELY_NUM_FEAT) {
|
1023
|
+
FreeFeatureSet(Features);
|
1024
|
+
return (0);
|
1025
|
+
}
|
1026
|
+
|
1027
|
+
ComputeIntFeatures(Features, IntFeatures);
|
1028
|
+
*FloatFeatures = Features;
|
1029
|
+
|
1030
|
+
return (NumFeatures);
|
1031
|
+
|
1032
|
+
} /* GetAdaptiveFeatures */
|
1033
|
+
|
1034
|
+
|
1035
|
+
/**----------------------------------------------------------------------------
|
1036
|
+
Private Code
|
1037
|
+
----------------------------------------------------------------------------**/
|
1038
|
+
/*---------------------------------------------------------------------------*/
|
1039
|
+
int AdaptableWord(TWERD *Word,
|
1040
|
+
const char *BestChoice,
|
1041
|
+
const char *BestChoice_lengths,
|
1042
|
+
const char *BestRawChoice,
|
1043
|
+
const char *BestRawChoice_lengths) {
|
1044
|
+
/*
|
1045
|
+
** Parameters:
|
1046
|
+
** Word
|
1047
|
+
current word
|
1048
|
+
** BestChoice
|
1049
|
+
best overall choice for word with context
|
1050
|
+
** BestRawChoice
|
1051
|
+
best choice for word without context
|
1052
|
+
** Globals: none
|
1053
|
+
** Operation: Return TRUE if the specified word is acceptable for
|
1054
|
+
** adaptation.
|
1055
|
+
** Return: TRUE or FALSE
|
1056
|
+
** Exceptions: none
|
1057
|
+
** History: Thu May 30 14:25:06 1991, DSJ, Created.
|
1058
|
+
*/
|
1059
|
+
int BestChoiceLength;
|
1060
|
+
|
1061
|
+
return ( /* rules that apply in general - simplest to compute first */
|
1062
|
+
/* EnableLearning && */
|
1063
|
+
/* new rules */
|
1064
|
+
BestChoice != NULL && BestRawChoice != NULL && Word != NULL &&
|
1065
|
+
(BestChoiceLength = strlen (BestChoice_lengths)) > 0 &&
|
1066
|
+
BestChoiceLength == NumBlobsIn (Word) &&
|
1067
|
+
BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE && (
|
1068
|
+
(EnableNewAdaptRules
|
1069
|
+
&&
|
1070
|
+
CurrentBestChoiceAdjustFactor
|
1071
|
+
()
|
1072
|
+
<=
|
1073
|
+
ADAPTABLE_WERD
|
1074
|
+
&&
|
1075
|
+
AlternativeChoicesWorseThan
|
1076
|
+
(ADAPTABLE_WERD)
|
1077
|
+
&&
|
1078
|
+
CurrentBestChoiceIs
|
1079
|
+
(BestChoice, BestChoice_lengths))
|
1080
|
+
||
|
1081
|
+
/* old rules */
|
1082
|
+
(!EnableNewAdaptRules
|
1083
|
+
&&
|
1084
|
+
BestChoiceLength
|
1085
|
+
==
|
1086
|
+
strlen
|
1087
|
+
(BestRawChoice_lengths)
|
1088
|
+
&&
|
1089
|
+
((valid_word (BestChoice) && case_ok (BestChoice, BestChoice_lengths)) || (valid_number (BestChoice, BestChoice_lengths) && pure_number (BestChoice, BestChoice_lengths))) && punctuation_ok (BestChoice, BestChoice_lengths) != -1 && punctuation_ok (BestChoice, BestChoice_lengths) <= 1)));
|
1090
|
+
|
1091
|
+
} /* AdaptableWord */
|
1092
|
+
|
1093
|
+
|
1094
|
+
/*---------------------------------------------------------------------------*/
|
1095
|
+
void AdaptToChar(TBLOB *Blob,
|
1096
|
+
LINE_STATS *LineStats,
|
1097
|
+
CLASS_ID ClassId,
|
1098
|
+
FLOAT32 Threshold) {
|
1099
|
+
/*
|
1100
|
+
** Parameters:
|
1101
|
+
** Blob
|
1102
|
+
blob to add to templates for ClassId
|
1103
|
+
** LineStats
|
1104
|
+
statistics about text line blob is in
|
1105
|
+
** ClassId
|
1106
|
+
class to add blob to
|
1107
|
+
** Threshold
|
1108
|
+
minimum match rating to existing template
|
1109
|
+
** Globals:
|
1110
|
+
** AdaptedTemplates
|
1111
|
+
current set of adapted templates
|
1112
|
+
** AllProtosOn
|
1113
|
+
dummy mask to match against all protos
|
1114
|
+
** AllConfigsOn
|
1115
|
+
dummy mask to match against all configs
|
1116
|
+
** Operation:
|
1117
|
+
** Return: none
|
1118
|
+
** Exceptions: none
|
1119
|
+
** History: Thu Mar 14 09:36:03 1991, DSJ, Created.
|
1120
|
+
*/
|
1121
|
+
int NumFeatures;
|
1122
|
+
INT_FEATURE_ARRAY IntFeatures;
|
1123
|
+
INT_RESULT_STRUCT IntResult;
|
1124
|
+
CLASS_INDEX ClassIndex;
|
1125
|
+
INT_CLASS IClass;
|
1126
|
+
ADAPT_CLASS Class;
|
1127
|
+
TEMP_CONFIG TempConfig;
|
1128
|
+
FEATURE_SET FloatFeatures;
|
1129
|
+
int NewTempConfigId;
|
1130
|
+
|
1131
|
+
NumCharsAdaptedTo++;
|
1132
|
+
if (!LegalClassId (ClassId))
|
1133
|
+
return;
|
1134
|
+
|
1135
|
+
if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) {
|
1136
|
+
MakeNewAdaptedClass(Blob, LineStats, ClassId, AdaptedTemplates);
|
1137
|
+
}
|
1138
|
+
else {
|
1139
|
+
IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId);
|
1140
|
+
ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId];
|
1141
|
+
Class = AdaptedTemplates->Class[ClassIndex];
|
1142
|
+
|
1143
|
+
NumFeatures = GetAdaptiveFeatures (Blob, LineStats,
|
1144
|
+
IntFeatures, &FloatFeatures);
|
1145
|
+
if (NumFeatures <= 0)
|
1146
|
+
return;
|
1147
|
+
|
1148
|
+
SetBaseLineMatch();
|
1149
|
+
IntegerMatcher (IClass, AllProtosOn, AllConfigsOn,
|
1150
|
+
NumFeatures, NumFeatures, IntFeatures, 0,
|
1151
|
+
&IntResult, NO_DEBUG);
|
1152
|
+
|
1153
|
+
SetAdaptiveThreshold(Threshold);
|
1154
|
+
|
1155
|
+
if (IntResult.Rating <= Threshold) {
|
1156
|
+
if (ConfigIsPermanent (Class, IntResult.Config)) {
|
1157
|
+
if (LearningDebugLevel >= 1)
|
1158
|
+
cprintf ("Found good match to perm config %d = %4.1f%%.\n",
|
1159
|
+
IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
|
1160
|
+
FreeFeatureSet(FloatFeatures);
|
1161
|
+
return;
|
1162
|
+
}
|
1163
|
+
|
1164
|
+
TempConfig = TempConfigFor (Class, IntResult.Config);
|
1165
|
+
IncreaseConfidence(TempConfig);
|
1166
|
+
if (LearningDebugLevel >= 1)
|
1167
|
+
cprintf ("Increasing reliability of temp config %d to %d.\n",
|
1168
|
+
IntResult.Config, TempConfig->NumTimesSeen);
|
1169
|
+
|
1170
|
+
if (TempConfigReliable (TempConfig))
|
1171
|
+
MakePermanent (AdaptedTemplates, ClassId, IntResult.Config,
|
1172
|
+
Blob, LineStats);
|
1173
|
+
}
|
1174
|
+
else {
|
1175
|
+
if (LearningDebugLevel >= 1)
|
1176
|
+
cprintf ("Found poor match to temp config %d = %4.1f%%.\n",
|
1177
|
+
IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
|
1178
|
+
NewTempConfigId = MakeNewTemporaryConfig(AdaptedTemplates,
|
1179
|
+
ClassId,
|
1180
|
+
NumFeatures,
|
1181
|
+
IntFeatures,
|
1182
|
+
FloatFeatures);
|
1183
|
+
|
1184
|
+
if (NewTempConfigId >= 0 &&
|
1185
|
+
TempConfigReliable (TempConfigFor (Class, NewTempConfigId)))
|
1186
|
+
MakePermanent (AdaptedTemplates, ClassId, NewTempConfigId,
|
1187
|
+
Blob, LineStats);
|
1188
|
+
|
1189
|
+
#ifndef GRAPHICS_DISABLED
|
1190
|
+
if (LearningDebugLevel >= 1) {
|
1191
|
+
IntegerMatcher (IClass, AllProtosOn, AllConfigsOn,
|
1192
|
+
NumFeatures, NumFeatures, IntFeatures, 0,
|
1193
|
+
&IntResult, NO_DEBUG);
|
1194
|
+
cprintf ("Best match to temp config %d = %4.1f%%.\n",
|
1195
|
+
IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
|
1196
|
+
if (LearningDebugLevel >= 2) {
|
1197
|
+
uinT32 ConfigMask;
|
1198
|
+
ConfigMask = 1 << IntResult.Config;
|
1199
|
+
ShowMatchDisplay();
|
1200
|
+
IntegerMatcher (IClass, AllProtosOn, (BIT_VECTOR)&ConfigMask,
|
1201
|
+
NumFeatures, NumFeatures, IntFeatures, 0,
|
1202
|
+
&IntResult, 6 | 0x19);
|
1203
|
+
UpdateMatchDisplay();
|
1204
|
+
GetClassToDebug ("Adapting");
|
1205
|
+
}
|
1206
|
+
}
|
1207
|
+
#endif // GRAPHICS_DISABLED
|
1208
|
+
}
|
1209
|
+
FreeFeatureSet(FloatFeatures);
|
1210
|
+
}
|
1211
|
+
} /* AdaptToChar */
|
1212
|
+
|
1213
|
+
|
1214
|
+
/*---------------------------------------------------------------------------*/
|
1215
|
+
void AdaptToPunc(TBLOB *Blob,
|
1216
|
+
LINE_STATS *LineStats,
|
1217
|
+
CLASS_ID ClassId,
|
1218
|
+
FLOAT32 Threshold) {
|
1219
|
+
/*
|
1220
|
+
** Parameters:
|
1221
|
+
** Blob
|
1222
|
+
blob to add to templates for ClassId
|
1223
|
+
** LineStats
|
1224
|
+
statistics about text line blob is in
|
1225
|
+
** ClassId
|
1226
|
+
class to add blob to
|
1227
|
+
** Threshold
|
1228
|
+
minimum match rating to existing template
|
1229
|
+
** Globals:
|
1230
|
+
** PreTrainedTemplates
|
1231
|
+
current set of built-in templates
|
1232
|
+
** Operation:
|
1233
|
+
** Return: none
|
1234
|
+
** Exceptions: none
|
1235
|
+
** History: Thu Mar 14 09:36:03 1991, DSJ, Created.
|
1236
|
+
*/
|
1237
|
+
ADAPT_RESULTS Results;
|
1238
|
+
int i;
|
1239
|
+
|
1240
|
+
Results.BlobLength = MAX_INT32;
|
1241
|
+
Results.NumMatches = 0;
|
1242
|
+
Results.BestRating = WORST_POSSIBLE_RATING;
|
1243
|
+
Results.BestClass = NO_CLASS;
|
1244
|
+
Results.BestConfig = 0;
|
1245
|
+
InitMatcherRatings (Results.Ratings);
|
1246
|
+
CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results);
|
1247
|
+
RemoveBadMatches(&Results);
|
1248
|
+
|
1249
|
+
if (Results.NumMatches != 1) {
|
1250
|
+
if (LearningDebugLevel >= 1) {
|
1251
|
+
cprintf ("Rejecting punc = %s (Alternatives = ",
|
1252
|
+
unicharset.id_to_unichar(ClassId));
|
1253
|
+
|
1254
|
+
for (i = 0; i < Results.NumMatches; i++)
|
1255
|
+
cprintf ("%s", unicharset.id_to_unichar(Results.Classes[i]));
|
1256
|
+
cprintf (")\n");
|
1257
|
+
}
|
1258
|
+
return;
|
1259
|
+
}
|
1260
|
+
|
1261
|
+
#ifndef SECURE_NAMES
|
1262
|
+
if (LearningDebugLevel >= 1)
|
1263
|
+
cprintf ("Adapting to punc = %s, thr= %g\n",
|
1264
|
+
unicharset.id_to_unichar(ClassId), Threshold);
|
1265
|
+
#endif
|
1266
|
+
AdaptToChar(Blob, LineStats, ClassId, Threshold);
|
1267
|
+
|
1268
|
+
} /* AdaptToPunc */
|
1269
|
+
|
1270
|
+
|
1271
|
+
/*---------------------------------------------------------------------------*/
|
1272
|
+
void AddNewResult(ADAPT_RESULTS *Results,
|
1273
|
+
CLASS_ID ClassId,
|
1274
|
+
FLOAT32 Rating,
|
1275
|
+
int ConfigId) {
|
1276
|
+
/*
|
1277
|
+
** Parameters:
|
1278
|
+
** Results
|
1279
|
+
results to add new result to
|
1280
|
+
** ClassId
|
1281
|
+
class of new result
|
1282
|
+
** Rating
|
1283
|
+
rating of new result
|
1284
|
+
** ConfigId
|
1285
|
+
config id of new result
|
1286
|
+
** Globals:
|
1287
|
+
** BadMatchPad
|
1288
|
+
defines limits of an acceptable match
|
1289
|
+
** Operation: This routine adds the result of a classification into
|
1290
|
+
** Results. If the new rating is much worse than the current
|
1291
|
+
** best rating, it is not entered into results because it
|
1292
|
+
** would end up being stripped later anyway. If the new rating
|
1293
|
+
** is better than the old rating for the class, it replaces the
|
1294
|
+
** old rating. If this is the first rating for the class, the
|
1295
|
+
** class is added to the list of matched classes in Results.
|
1296
|
+
** If the new rating is better than the best so far, it
|
1297
|
+
** becomes the best so far.
|
1298
|
+
** Return: none
|
1299
|
+
** Exceptions: none
|
1300
|
+
** History: Tue Mar 12 18:19:29 1991, DSJ, Created.
|
1301
|
+
*/
|
1302
|
+
FLOAT32 OldRating;
|
1303
|
+
INT_CLASS_STRUCT* CharClass = NULL;
|
1304
|
+
|
1305
|
+
OldRating = Results->Ratings[ClassId];
|
1306
|
+
if (Rating <= Results->BestRating + BadMatchPad && Rating < OldRating) {
|
1307
|
+
Results->Ratings[ClassId] = Rating;
|
1308
|
+
if (ClassId != NO_CLASS)
|
1309
|
+
CharClass = ClassForClassId(PreTrainedTemplates, ClassId);
|
1310
|
+
if (CharClass != NULL && CharClass->NumConfigs == 32)
|
1311
|
+
Results->Configs[ClassId] = ConfigId;
|
1312
|
+
else
|
1313
|
+
Results->Configs[ClassId] = ~0;
|
1314
|
+
|
1315
|
+
if (Rating < Results->BestRating) {
|
1316
|
+
Results->BestRating = Rating;
|
1317
|
+
Results->BestClass = ClassId;
|
1318
|
+
Results->BestConfig = ConfigId;
|
1319
|
+
}
|
1320
|
+
|
1321
|
+
/* if this is first rating for class, add to list of classes matched */
|
1322
|
+
if (OldRating == WORST_POSSIBLE_RATING)
|
1323
|
+
Results->Classes[Results->NumMatches++] = ClassId;
|
1324
|
+
}
|
1325
|
+
} /* AddNewResult */
|
1326
|
+
|
1327
|
+
|
1328
|
+
/*---------------------------------------------------------------------------*/
|
1329
|
+
void AmbigClassifier(TBLOB *Blob,
|
1330
|
+
LINE_STATS *LineStats,
|
1331
|
+
INT_TEMPLATES Templates,
|
1332
|
+
UNICHAR_ID *Ambiguities,
|
1333
|
+
ADAPT_RESULTS *Results) {
|
1334
|
+
/*
|
1335
|
+
** Parameters:
|
1336
|
+
** Blob
|
1337
|
+
blob to be classified
|
1338
|
+
** LineStats
|
1339
|
+
statistics for text line Blob is in
|
1340
|
+
** Templates
|
1341
|
+
built-in templates to classify against
|
1342
|
+
** Ambiguities
|
1343
|
+
array of class id's to match against
|
1344
|
+
** Results
|
1345
|
+
place to put match results
|
1346
|
+
** Globals:
|
1347
|
+
** AllProtosOn
|
1348
|
+
mask that enables all protos
|
1349
|
+
** AllConfigsOn
|
1350
|
+
mask that enables all configs
|
1351
|
+
** Operation: This routine is identical to CharNormClassifier()
|
1352
|
+
** except that it does no class pruning. It simply matches
|
1353
|
+
** the unknown blob against the classes listed in
|
1354
|
+
** Ambiguities.
|
1355
|
+
** Return: none
|
1356
|
+
** Exceptions: none
|
1357
|
+
** History: Tue Mar 12 19:40:36 1991, DSJ, Created.
|
1358
|
+
*/
|
1359
|
+
int NumFeatures;
|
1360
|
+
INT_FEATURE_ARRAY IntFeatures;
|
1361
|
+
CLASS_NORMALIZATION_ARRAY CharNormArray;
|
1362
|
+
INT_RESULT_STRUCT IntResult;
|
1363
|
+
CLASS_ID ClassId;
|
1364
|
+
CLASS_INDEX ClassIndex;
|
1365
|
+
|
1366
|
+
AmbigClassifierCalls++;
|
1367
|
+
|
1368
|
+
NumFeatures = GetCharNormFeatures (Blob, LineStats,
|
1369
|
+
Templates,
|
1370
|
+
IntFeatures, CharNormArray,
|
1371
|
+
&(Results->BlobLength));
|
1372
|
+
if (NumFeatures <= 0)
|
1373
|
+
return;
|
1374
|
+
|
1375
|
+
if (MatcherDebugLevel >= 2)
|
1376
|
+
cprintf ("AM Matches = ");
|
1377
|
+
|
1378
|
+
while (*Ambiguities >= 0) {
|
1379
|
+
ClassId = *Ambiguities;
|
1380
|
+
ClassIndex = Templates->IndexFor[ClassId];
|
1381
|
+
|
1382
|
+
SetCharNormMatch();
|
1383
|
+
IntegerMatcher (ClassForClassId (Templates, ClassId),
|
1384
|
+
AllProtosOn, AllConfigsOn,
|
1385
|
+
Results->BlobLength, NumFeatures, IntFeatures,
|
1386
|
+
CharNormArray[ClassIndex], &IntResult, NO_DEBUG);
|
1387
|
+
|
1388
|
+
if (MatcherDebugLevel >= 2)
|
1389
|
+
cprintf ("%s-%-2d %2.0f ", unicharset.id_to_unichar(ClassId),
|
1390
|
+
IntResult.Config,
|
1391
|
+
IntResult.Rating * 100.0);
|
1392
|
+
|
1393
|
+
AddNewResult (Results, ClassId, IntResult.Rating, IntResult.Config);
|
1394
|
+
|
1395
|
+
Ambiguities++;
|
1396
|
+
|
1397
|
+
NumAmbigClassesTried++;
|
1398
|
+
}
|
1399
|
+
if (MatcherDebugLevel >= 2)
|
1400
|
+
cprintf ("\n");
|
1401
|
+
|
1402
|
+
} /* AmbigClassifier */
|
1403
|
+
|
1404
|
+
/*---------------------------------------------------------------------------*/
|
1405
|
+
// Factored-out calls to IntegerMatcher based on class pruner results.
|
1406
|
+
// Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.
|
1407
|
+
void MasterMatcher(INT_TEMPLATES templates,
|
1408
|
+
inT16 num_features,
|
1409
|
+
INT_FEATURE_ARRAY features,
|
1410
|
+
CLASS_NORMALIZATION_ARRAY norm_factors,
|
1411
|
+
ADAPT_CLASS* classes,
|
1412
|
+
int debug,
|
1413
|
+
int num_classes,
|
1414
|
+
CLASS_PRUNER_RESULTS results,
|
1415
|
+
ADAPT_RESULTS* final_results) {
|
1416
|
+
for (int c = 0; c < num_classes; c++) {
|
1417
|
+
CLASS_ID class_id = results[c].Class;
|
1418
|
+
INT_RESULT_STRUCT& int_result = results[c].IMResult;
|
1419
|
+
CLASS_INDEX class_index = templates->IndexFor[class_id];
|
1420
|
+
BIT_VECTOR protos = classes != NULL ? classes[class_index]->PermProtos
|
1421
|
+
: AllProtosOn;
|
1422
|
+
BIT_VECTOR configs = classes != NULL ? classes[class_index]->PermConfigs
|
1423
|
+
: AllConfigsOn;
|
1424
|
+
|
1425
|
+
IntegerMatcher(ClassForClassId(templates, class_id),
|
1426
|
+
protos, configs, final_results->BlobLength,
|
1427
|
+
num_features, features, norm_factors[class_index],
|
1428
|
+
&int_result, NO_DEBUG);
|
1429
|
+
// Compute class feature corrections.
|
1430
|
+
double miss_penalty = tessedit_class_miss_scale *
|
1431
|
+
int_result.FeatureMisses;
|
1432
|
+
if (MatcherDebugLevel >= 2 || display_ratings > 1) {
|
1433
|
+
cprintf("%s-%-2d %2.1f(CP%2.1f, IM%2.1f + MP%2.1f) ",
|
1434
|
+
unicharset.id_to_unichar(class_id), int_result.Config,
|
1435
|
+
(int_result.Rating + miss_penalty) * 100.0,
|
1436
|
+
results[c].Rating * 100.0,
|
1437
|
+
int_result.Rating * 100.0, miss_penalty * 100.0);
|
1438
|
+
if (c % 4 == 3)
|
1439
|
+
cprintf ("\n");
|
1440
|
+
}
|
1441
|
+
int_result.Rating += miss_penalty;
|
1442
|
+
if (int_result.Rating > WORST_POSSIBLE_RATING)
|
1443
|
+
int_result.Rating = WORST_POSSIBLE_RATING;
|
1444
|
+
AddNewResult(final_results, class_id, int_result.Rating, int_result.Config);
|
1445
|
+
}
|
1446
|
+
if (MatcherDebugLevel >= 2 || display_ratings > 1)
|
1447
|
+
cprintf("\n");
|
1448
|
+
}
|
1449
|
+
|
1450
|
+
/*---------------------------------------------------------------------------*/
|
1451
|
+
UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
|
1452
|
+
LINE_STATS *LineStats,
|
1453
|
+
ADAPT_TEMPLATES Templates,
|
1454
|
+
ADAPT_RESULTS *Results) {
|
1455
|
+
/*
|
1456
|
+
** Parameters:
|
1457
|
+
** Blob
|
1458
|
+
blob to be classified
|
1459
|
+
** LineStats
|
1460
|
+
statistics for text line Blob is in
|
1461
|
+
** Templates
|
1462
|
+
current set of adapted templates
|
1463
|
+
** Results
|
1464
|
+
place to put match results
|
1465
|
+
** Globals:
|
1466
|
+
** BaselineCutoffs
|
1467
|
+
expected num features for each class
|
1468
|
+
** Operation: This routine extracts baseline normalized features
|
1469
|
+
** from the unknown character and matches them against the
|
1470
|
+
** specified set of templates. The classes which match
|
1471
|
+
** are added to Results.
|
1472
|
+
** Return: Array of possible ambiguous chars that should be checked.
|
1473
|
+
** Exceptions: none
|
1474
|
+
** History: Tue Mar 12 19:38:03 1991, DSJ, Created.
|
1475
|
+
*/
|
1476
|
+
int NumFeatures;
|
1477
|
+
int NumClasses;
|
1478
|
+
INT_FEATURE_ARRAY IntFeatures;
|
1479
|
+
CLASS_NORMALIZATION_ARRAY CharNormArray;
|
1480
|
+
CLASS_ID ClassId;
|
1481
|
+
CLASS_INDEX ClassIndex;
|
1482
|
+
|
1483
|
+
BaselineClassifierCalls++;
|
1484
|
+
|
1485
|
+
NumFeatures = GetBaselineFeatures (Blob, LineStats,
|
1486
|
+
Templates->Templates,
|
1487
|
+
IntFeatures, CharNormArray,
|
1488
|
+
&(Results->BlobLength));
|
1489
|
+
if (NumFeatures <= 0)
|
1490
|
+
return NULL;
|
1491
|
+
|
1492
|
+
NumClasses = ClassPruner (Templates->Templates, NumFeatures,
|
1493
|
+
IntFeatures, CharNormArray,
|
1494
|
+
BaselineCutoffs, Results->CPResults,
|
1495
|
+
MatchDebugFlags);
|
1496
|
+
|
1497
|
+
NumBaselineClassesTried += NumClasses;
|
1498
|
+
|
1499
|
+
if (MatcherDebugLevel >= 2 || display_ratings > 1)
|
1500
|
+
cprintf ("BL Matches = ");
|
1501
|
+
|
1502
|
+
SetBaseLineMatch();
|
1503
|
+
MasterMatcher(Templates->Templates, NumFeatures, IntFeatures, CharNormArray,
|
1504
|
+
Templates->Class, MatchDebugFlags, NumClasses,
|
1505
|
+
Results->CPResults, Results);
|
1506
|
+
|
1507
|
+
ClassId = Results->BestClass;
|
1508
|
+
if (ClassId == NO_CLASS)
|
1509
|
+
return (NULL);
|
1510
|
+
/* this is a bug - maybe should return "" */
|
1511
|
+
|
1512
|
+
ClassIndex = Templates->Templates->IndexFor[ClassId];
|
1513
|
+
return (Templates->Class[ClassIndex]->
|
1514
|
+
Config[Results->BestConfig].Perm);
|
1515
|
+
} /* BaselineClassifier */
|
1516
|
+
|
1517
|
+
|
1518
|
+
/*---------------------------------------------------------------------------*/
|
1519
|
+
void CharNormClassifier(TBLOB *Blob,
|
1520
|
+
LINE_STATS *LineStats,
|
1521
|
+
INT_TEMPLATES Templates,
|
1522
|
+
ADAPT_RESULTS *Results) {
|
1523
|
+
/*
|
1524
|
+
** Parameters:
|
1525
|
+
** Blob
|
1526
|
+
blob to be classified
|
1527
|
+
** LineStats
|
1528
|
+
statistics for text line Blob is in
|
1529
|
+
** Templates
|
1530
|
+
templates to classify unknown against
|
1531
|
+
** Results
|
1532
|
+
place to put match results
|
1533
|
+
** Globals:
|
1534
|
+
** CharNormCutoffs
|
1535
|
+
expected num features for each class
|
1536
|
+
** AllProtosOn
|
1537
|
+
mask that enables all protos
|
1538
|
+
** AllConfigsOn
|
1539
|
+
mask that enables all configs
|
1540
|
+
** Operation: This routine extracts character normalized features
|
1541
|
+
** from the unknown character and matches them against the
|
1542
|
+
** specified set of templates. The classes which match
|
1543
|
+
** are added to Results.
|
1544
|
+
** Return: none
|
1545
|
+
** Exceptions: none
|
1546
|
+
** History: Tue Mar 12 16:02:52 1991, DSJ, Created.
|
1547
|
+
*/
|
1548
|
+
int NumFeatures;
|
1549
|
+
int NumClasses;
|
1550
|
+
INT_FEATURE_ARRAY IntFeatures;
|
1551
|
+
CLASS_NORMALIZATION_ARRAY CharNormArray;
|
1552
|
+
|
1553
|
+
CharNormClassifierCalls++;
|
1554
|
+
|
1555
|
+
NumFeatures = GetCharNormFeatures(Blob, LineStats,
|
1556
|
+
Templates,
|
1557
|
+
IntFeatures, CharNormArray,
|
1558
|
+
&(Results->BlobLength));
|
1559
|
+
if (NumFeatures <= 0)
|
1560
|
+
return;
|
1561
|
+
|
1562
|
+
NumClasses = ClassPruner(Templates, NumFeatures,
|
1563
|
+
IntFeatures, CharNormArray,
|
1564
|
+
CharNormCutoffs, Results->CPResults,
|
1565
|
+
MatchDebugFlags);
|
1566
|
+
|
1567
|
+
if (tessedit_single_match && NumClasses > 1)
|
1568
|
+
NumClasses = 1;
|
1569
|
+
NumCharNormClassesTried += NumClasses;
|
1570
|
+
|
1571
|
+
if (MatcherDebugLevel >= 2 || display_ratings > 1)
|
1572
|
+
cprintf("CN Matches = ");
|
1573
|
+
|
1574
|
+
SetCharNormMatch();
|
1575
|
+
MasterMatcher(Templates, NumFeatures, IntFeatures, CharNormArray,
|
1576
|
+
NULL, MatchDebugFlags, NumClasses,
|
1577
|
+
Results->CPResults, Results);
|
1578
|
+
} /* CharNormClassifier */
|
1579
|
+
|
1580
|
+
|
1581
|
+
/*---------------------------------------------------------------------------*/
|
1582
|
+
void ClassifyAsNoise(TBLOB *Blob,
|
1583
|
+
LINE_STATS *LineStats,
|
1584
|
+
ADAPT_RESULTS *Results) {
|
1585
|
+
/*
|
1586
|
+
** Parameters:
|
1587
|
+
** Blob
|
1588
|
+
blob to be classified
|
1589
|
+
** LineStats
|
1590
|
+
statistics for text line Blob is in
|
1591
|
+
** Results
|
1592
|
+
results to add noise classification to
|
1593
|
+
** Globals:
|
1594
|
+
** NoiseBlobLength
|
1595
|
+
avg. length of a noise blob
|
1596
|
+
** Operation: This routine computes a rating which reflects the
|
1597
|
+
** likelihood that the blob being classified is a noise
|
1598
|
+
** blob. NOTE: assumes that the blob length has already been
|
1599
|
+
** computed and placed into Results.
|
1600
|
+
** Return: none
|
1601
|
+
** Exceptions: none
|
1602
|
+
** History: Tue Mar 12 18:36:52 1991, DSJ, Created.
|
1603
|
+
*/
|
1604
|
+
register FLOAT32 Rating;
|
1605
|
+
|
1606
|
+
Rating = Results->BlobLength / NoiseBlobLength;
|
1607
|
+
Rating *= Rating;
|
1608
|
+
Rating /= 1.0 + Rating;
|
1609
|
+
|
1610
|
+
AddNewResult (Results, NO_CLASS, Rating, 0);
|
1611
|
+
} /* ClassifyAsNoise */
|
1612
|
+
|
1613
|
+
|
1614
|
+
/*---------------------------------------------------------------------------*/
|
1615
|
+
int CompareCurrentRatings( //CLASS_ID *Class1,
|
1616
|
+
const void *arg1,
|
1617
|
+
const void *arg2) { //CLASS_ID *Class2)
|
1618
|
+
/*
|
1619
|
+
** Parameters:
|
1620
|
+
** Class1, Class2
|
1621
|
+
classes whose ratings are to be compared
|
1622
|
+
** Globals:
|
1623
|
+
** CurrentRatings
|
1624
|
+
contains actual ratings for each class
|
1625
|
+
** Operation: This routine gets the ratings for the 2 specified classes
|
1626
|
+
** from a global variable (CurrentRatings) and returns:
|
1627
|
+
** -1 if Rating1 < Rating2
|
1628
|
+
** 0 if Rating1 = Rating2
|
1629
|
+
** 1 if Rating1 > Rating2
|
1630
|
+
** Return: Order of classes based on their ratings (see above).
|
1631
|
+
** Exceptions: none
|
1632
|
+
** History: Tue Mar 12 14:18:31 1991, DSJ, Created.
|
1633
|
+
*/
|
1634
|
+
FLOAT32 Rating1, Rating2;
|
1635
|
+
CLASS_ID *Class1 = (CLASS_ID *) arg1;
|
1636
|
+
CLASS_ID *Class2 = (CLASS_ID *) arg2;
|
1637
|
+
|
1638
|
+
Rating1 = CurrentRatings[*Class1];
|
1639
|
+
Rating2 = CurrentRatings[*Class2];
|
1640
|
+
|
1641
|
+
if (Rating1 < Rating2)
|
1642
|
+
return (-1);
|
1643
|
+
else if (Rating1 > Rating2)
|
1644
|
+
return (1);
|
1645
|
+
else
|
1646
|
+
return (0);
|
1647
|
+
|
1648
|
+
} /* CompareCurrentRatings */
|
1649
|
+
|
1650
|
+
|
1651
|
+
/*---------------------------------------------------------------------------*/
|
1652
|
+
LIST ConvertMatchesToChoices(ADAPT_RESULTS *Results) {
|
1653
|
+
/*
|
1654
|
+
** Parameters:
|
1655
|
+
** Results
|
1656
|
+
adaptive matcher results to convert to choices
|
1657
|
+
** Globals: none
|
1658
|
+
** Operation: This routine creates a choice for each matching class
|
1659
|
+
** in Results (up to MAX_MATCHES) and returns a list of
|
1660
|
+
** these choices. The match
|
1661
|
+
** ratings are converted to be the ratings and certainties
|
1662
|
+
** as used by the context checkers.
|
1663
|
+
** Return: List of choices.
|
1664
|
+
** Exceptions: none
|
1665
|
+
** History: Tue Mar 12 08:55:37 1991, DSJ, Created.
|
1666
|
+
*/
|
1667
|
+
int i;
|
1668
|
+
LIST Choices;
|
1669
|
+
CLASS_ID NextMatch;
|
1670
|
+
FLOAT32 Rating;
|
1671
|
+
FLOAT32 Certainty;
|
1672
|
+
const char *NextMatch_unichar;
|
1673
|
+
char choice_lengths[2] = {0, 0};
|
1674
|
+
|
1675
|
+
if (Results->NumMatches > MAX_MATCHES)
|
1676
|
+
Results->NumMatches = MAX_MATCHES;
|
1677
|
+
|
1678
|
+
for (Choices = NIL, i = 0; i < Results->NumMatches; i++) {
|
1679
|
+
NextMatch = Results->Classes[i];
|
1680
|
+
Rating = Certainty = Results->Ratings[NextMatch];
|
1681
|
+
Rating *= RatingScale * Results->BlobLength;
|
1682
|
+
Certainty *= -CertaintyScale;
|
1683
|
+
if (NextMatch != NO_CLASS)
|
1684
|
+
NextMatch_unichar = unicharset.id_to_unichar(NextMatch);
|
1685
|
+
else
|
1686
|
+
NextMatch_unichar = "";
|
1687
|
+
choice_lengths[0] = strlen(NextMatch_unichar);
|
1688
|
+
Choices = append_choice (Choices,
|
1689
|
+
NextMatch_unichar,
|
1690
|
+
choice_lengths,
|
1691
|
+
Rating, Certainty,
|
1692
|
+
Results->Configs[NextMatch],
|
1693
|
+
unicharset.get_script(NextMatch));
|
1694
|
+
}
|
1695
|
+
return (Choices);
|
1696
|
+
|
1697
|
+
} /* ConvertMatchesToChoices */
|
1698
|
+
|
1699
|
+
|
1700
|
+
/*---------------------------------------------------------------------------*/
|
1701
|
+
#ifndef GRAPHICS_DISABLED
|
1702
|
+
void DebugAdaptiveClassifier(TBLOB *Blob,
|
1703
|
+
LINE_STATS *LineStats,
|
1704
|
+
ADAPT_RESULTS *Results) {
|
1705
|
+
/*
|
1706
|
+
** Parameters:
|
1707
|
+
** Blob
|
1708
|
+
blob whose classification is being debugged
|
1709
|
+
** LineStats
|
1710
|
+
statistics for text line blob is in
|
1711
|
+
** Results
|
1712
|
+
results of match being debugged
|
1713
|
+
** Globals: none
|
1714
|
+
** Operation:
|
1715
|
+
** Return: none
|
1716
|
+
** Exceptions: none
|
1717
|
+
** History: Wed Mar 13 16:44:41 1991, DSJ, Created.
|
1718
|
+
*/
|
1719
|
+
const char *Prompt =
|
1720
|
+
"Left-click in IntegerMatch Window to continue or right click to debug...";
|
1721
|
+
const char *DebugMode = "All Templates";
|
1722
|
+
CLASS_ID LastClass = Results->BestClass;
|
1723
|
+
CLASS_ID ClassId;
|
1724
|
+
BOOL8 AdaptiveOn = TRUE;
|
1725
|
+
BOOL8 PreTrainedOn = TRUE;
|
1726
|
+
|
1727
|
+
ShowMatchDisplay();
|
1728
|
+
cprintf ("\nDebugging class = %s (%s) ...\n",
|
1729
|
+
unicharset.id_to_unichar(LastClass), DebugMode);
|
1730
|
+
ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn);
|
1731
|
+
UpdateMatchDisplay();
|
1732
|
+
|
1733
|
+
while ((ClassId = GetClassToDebug (Prompt)) != 0) {
|
1734
|
+
#if 0
|
1735
|
+
switch (ClassId) {
|
1736
|
+
case 'b':
|
1737
|
+
AdaptiveOn = TRUE;
|
1738
|
+
PreTrainedOn = FALSE;
|
1739
|
+
DebugMode = "Adaptive Templates Only";
|
1740
|
+
break;
|
1741
|
+
|
1742
|
+
case 'c':
|
1743
|
+
AdaptiveOn = FALSE;
|
1744
|
+
PreTrainedOn = TRUE;
|
1745
|
+
DebugMode = "PreTrained Templates Only";
|
1746
|
+
break;
|
1747
|
+
|
1748
|
+
case 'a':
|
1749
|
+
AdaptiveOn = TRUE;
|
1750
|
+
PreTrainedOn = TRUE;
|
1751
|
+
DebugMode = "All Templates";
|
1752
|
+
break;
|
1753
|
+
|
1754
|
+
default:
|
1755
|
+
LastClass = ClassId;
|
1756
|
+
break;
|
1757
|
+
}
|
1758
|
+
#endif
|
1759
|
+
LastClass = ClassId;
|
1760
|
+
|
1761
|
+
ShowMatchDisplay();
|
1762
|
+
cprintf ("\nDebugging class = %d = %s (%s) ...\n",
|
1763
|
+
LastClass, unicharset.id_to_unichar(LastClass), DebugMode);
|
1764
|
+
ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn);
|
1765
|
+
UpdateMatchDisplay();
|
1766
|
+
}
|
1767
|
+
} /* DebugAdaptiveClassifier */
|
1768
|
+
#endif
|
1769
|
+
|
1770
|
+
/*---------------------------------------------------------------------------*/
|
1771
|
+
void DoAdaptiveMatch(TBLOB *Blob,
|
1772
|
+
LINE_STATS *LineStats,
|
1773
|
+
ADAPT_RESULTS *Results) {
|
1774
|
+
/*
|
1775
|
+
** Parameters:
|
1776
|
+
** Blob
|
1777
|
+
blob to be classified
|
1778
|
+
** LineStats
|
1779
|
+
statistics for text line Blob is in
|
1780
|
+
** Results
|
1781
|
+
place to put match results
|
1782
|
+
** Globals:
|
1783
|
+
** PreTrainedTemplates
|
1784
|
+
built-in training templates
|
1785
|
+
** AdaptedTemplates
|
1786
|
+
templates adapted for this page
|
1787
|
+
** GreatAdaptiveMatch
|
1788
|
+
rating limit for a great match
|
1789
|
+
** Operation: This routine performs an adaptive classification.
|
1790
|
+
** If we have not yet adapted to enough classes, a simple
|
1791
|
+
** classification to the pre-trained templates is performed.
|
1792
|
+
** Otherwise, we match the blob against the adapted templates.
|
1793
|
+
** If the adapted templates do not match well, we try a
|
1794
|
+
** match against the pre-trained templates. If an adapted
|
1795
|
+
** template match is found, we do a match to any pre-trained
|
1796
|
+
** templates which could be ambiguous. The results from all
|
1797
|
+
** of these classifications are merged together into Results.
|
1798
|
+
** Return: none
|
1799
|
+
** Exceptions: none
|
1800
|
+
** History: Tue Mar 12 08:50:11 1991, DSJ, Created.
|
1801
|
+
*/
|
1802
|
+
UNICHAR_ID *Ambiguities;
|
1803
|
+
|
1804
|
+
AdaptiveMatcherCalls++;
|
1805
|
+
InitIntFX();
|
1806
|
+
|
1807
|
+
if (AdaptedTemplates->NumPermClasses < MinNumPermClasses
|
1808
|
+
|| tess_cn_matching) {
|
1809
|
+
CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results);
|
1810
|
+
}
|
1811
|
+
else {
|
1812
|
+
Ambiguities = BaselineClassifier (Blob, LineStats,
|
1813
|
+
AdaptedTemplates, Results);
|
1814
|
+
|
1815
|
+
if ((Results->NumMatches > 0 && MarginalMatch (Results->BestRating)
|
1816
|
+
&& !tess_bn_matching) || Results->NumMatches == 0) {
|
1817
|
+
CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results);
|
1818
|
+
}
|
1819
|
+
else if (Ambiguities && *Ambiguities >= 0) {
|
1820
|
+
AmbigClassifier(Blob,
|
1821
|
+
LineStats,
|
1822
|
+
PreTrainedTemplates,
|
1823
|
+
Ambiguities,
|
1824
|
+
Results);
|
1825
|
+
}
|
1826
|
+
}
|
1827
|
+
|
1828
|
+
if (Results->NumMatches == 0)
|
1829
|
+
ClassifyAsNoise(Blob, LineStats, Results);
|
1830
|
+
/**/} /* DoAdaptiveMatch */
|
1831
|
+
|
1832
|
+
/*---------------------------------------------------------------------------*/
|
1833
|
+
void
|
1834
|
+
GetAdaptThresholds (TWERD * Word,
|
1835
|
+
LINE_STATS * LineStats,
|
1836
|
+
const WERD_CHOICE& BestChoice,
|
1837
|
+
const WERD_CHOICE& BestRawChoice, FLOAT32 Thresholds[]) {
|
1838
|
+
/*
|
1839
|
+
** Parameters:
|
1840
|
+
** Word
|
1841
|
+
current word
|
1842
|
+
** LineStats
|
1843
|
+
line stats for row word is in
|
1844
|
+
** BestChoice
|
1845
|
+
best choice for current word with context
|
1846
|
+
** BestRawChoice
|
1847
|
+
best choice for current word without context
|
1848
|
+
** Thresholds
|
1849
|
+
array of thresholds to be filled in
|
1850
|
+
** Globals:
|
1851
|
+
** EnableNewAdaptRules
|
1852
|
+
** GoodAdaptiveMatch
|
1853
|
+
** PerfectRating
|
1854
|
+
** RatingMargin
|
1855
|
+
** Operation: This routine tries to estimate how tight the adaptation
|
1856
|
+
** threshold should be set for each character in the current
|
1857
|
+
** word. In general, the routine tries to set tighter
|
1858
|
+
** thresholds for a character when the current set of templates
|
1859
|
+
** would have made an error on that character. It tries
|
1860
|
+
** to set a threshold tight enough to eliminate the error.
|
1861
|
+
** Two different sets of rules can be used to determine the
|
1862
|
+
** desired thresholds.
|
1863
|
+
** Return: none (results are returned in Thresholds)
|
1864
|
+
** Exceptions: none
|
1865
|
+
** History: Fri May 31 09:22:08 1991, DSJ, Created.
|
1866
|
+
*/
|
1867
|
+
TBLOB *Blob;
|
1868
|
+
const char* BestChoice_string = BestChoice.string().string();
|
1869
|
+
const char* BestChoice_lengths = BestChoice.lengths().string();
|
1870
|
+
const char* BestRawChoice_string = BestRawChoice.string().string();
|
1871
|
+
const char* BestRawChoice_lengths = BestRawChoice.lengths().string();
|
1872
|
+
|
1873
|
+
if (EnableNewAdaptRules && /* new rules */
|
1874
|
+
CurrentBestChoiceIs (BestChoice_string, BestChoice_lengths)) {
|
1875
|
+
FindClassifierErrors(PerfectRating,
|
1876
|
+
GoodAdaptiveMatch,
|
1877
|
+
RatingMargin,
|
1878
|
+
Thresholds);
|
1879
|
+
}
|
1880
|
+
else { /* old rules */
|
1881
|
+
for (Blob = Word->blobs;
|
1882
|
+
Blob != NULL;
|
1883
|
+
Blob = Blob->next, BestChoice_string += *(BestChoice_lengths++),
|
1884
|
+
BestRawChoice_string += *(BestRawChoice_lengths++), Thresholds++)
|
1885
|
+
if (*(BestChoice_lengths) == *(BestRawChoice_lengths) &&
|
1886
|
+
strncmp(BestChoice_string, BestRawChoice_string,
|
1887
|
+
*(BestChoice_lengths)) == 0)
|
1888
|
+
*Thresholds = GoodAdaptiveMatch;
|
1889
|
+
else {
|
1890
|
+
/* the blob was incorrectly classified - find the rating threshold
|
1891
|
+
needed to create a template which will correct the error with
|
1892
|
+
some margin. However, don't waste time trying to make
|
1893
|
+
templates which are too tight. */
|
1894
|
+
*Thresholds = GetBestRatingFor (Blob, LineStats,
|
1895
|
+
unicharset.unichar_to_id(
|
1896
|
+
BestChoice_string,
|
1897
|
+
*BestChoice_lengths));
|
1898
|
+
*Thresholds *= (1.0 - RatingMargin);
|
1899
|
+
if (*Thresholds > GoodAdaptiveMatch)
|
1900
|
+
*Thresholds = GoodAdaptiveMatch;
|
1901
|
+
if (*Thresholds < PerfectRating)
|
1902
|
+
*Thresholds = PerfectRating;
|
1903
|
+
}
|
1904
|
+
}
|
1905
|
+
} /* GetAdaptThresholds */
|
1906
|
+
|
1907
|
+
/*---------------------------------------------------------------------------*/
|
1908
|
+
UNICHAR_ID *GetAmbiguities(TBLOB *Blob,
|
1909
|
+
LINE_STATS *LineStats,
|
1910
|
+
CLASS_ID CorrectClass) {
|
1911
|
+
/*
|
1912
|
+
** Parameters:
|
1913
|
+
** Blob
|
1914
|
+
blob to get classification ambiguities for
|
1915
|
+
** LineStats
|
1916
|
+
statistics for text line blob is in
|
1917
|
+
** CorrectClass
|
1918
|
+
correct class for Blob
|
1919
|
+
** Globals:
|
1920
|
+
** CurrentRatings
|
1921
|
+
used by qsort compare routine
|
1922
|
+
** PreTrainedTemplates
|
1923
|
+
built-in templates
|
1924
|
+
** Operation: This routine matches blob to the built-in templates
|
1925
|
+
** to find out if there are any classes other than the correct
|
1926
|
+
** class which are potential ambiguities.
|
1927
|
+
** Return: String containing all possible ambiguous classes.
|
1928
|
+
** Exceptions: none
|
1929
|
+
** History: Fri Mar 15 08:08:22 1991, DSJ, Created.
|
1930
|
+
*/
|
1931
|
+
ADAPT_RESULTS Results;
|
1932
|
+
UNICHAR_ID *Ambiguities;
|
1933
|
+
int i;
|
1934
|
+
|
1935
|
+
EnterClassifyMode;
|
1936
|
+
|
1937
|
+
Results.NumMatches = 0;
|
1938
|
+
Results.BestRating = WORST_POSSIBLE_RATING;
|
1939
|
+
Results.BestClass = NO_CLASS;
|
1940
|
+
Results.BestConfig = 0;
|
1941
|
+
InitMatcherRatings (Results.Ratings);
|
1942
|
+
|
1943
|
+
CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results);
|
1944
|
+
RemoveBadMatches(&Results);
|
1945
|
+
|
1946
|
+
/* save ratings in a global so that CompareCurrentRatings() can see them */
|
1947
|
+
CurrentRatings = Results.Ratings;
|
1948
|
+
qsort ((void *) (Results.Classes), Results.NumMatches,
|
1949
|
+
sizeof (CLASS_ID), CompareCurrentRatings);
|
1950
|
+
|
1951
|
+
/* copy the class id's into an string of ambiguities - don't copy if
|
1952
|
+
the correct class is the only class id matched */
|
1953
|
+
Ambiguities = (UNICHAR_ID *) Emalloc (sizeof (UNICHAR_ID) *
|
1954
|
+
(Results.NumMatches + 1));
|
1955
|
+
if (Results.NumMatches > 1 ||
|
1956
|
+
(Results.NumMatches == 1 && Results.Classes[0] != CorrectClass)) {
|
1957
|
+
for (i = 0; i < Results.NumMatches; i++)
|
1958
|
+
Ambiguities[i] = Results.Classes[i];
|
1959
|
+
Ambiguities[i] = -1;
|
1960
|
+
}
|
1961
|
+
else
|
1962
|
+
Ambiguities[0] = -1;
|
1963
|
+
|
1964
|
+
return (Ambiguities);
|
1965
|
+
|
1966
|
+
} /* GetAmbiguities */
|
1967
|
+
|
1968
|
+
/*---------------------------------------------------------------------------*/
|
1969
|
+
int GetBaselineFeatures(TBLOB *Blob,
|
1970
|
+
LINE_STATS *LineStats,
|
1971
|
+
INT_TEMPLATES Templates,
|
1972
|
+
INT_FEATURE_ARRAY IntFeatures,
|
1973
|
+
CLASS_NORMALIZATION_ARRAY CharNormArray,
|
1974
|
+
inT32 *BlobLength) {
|
1975
|
+
/*
|
1976
|
+
** Parameters:
|
1977
|
+
** Blob
|
1978
|
+
blob to extract features from
|
1979
|
+
** LineStats
|
1980
|
+
statistics about text row blob is in
|
1981
|
+
** Templates
|
1982
|
+
used to compute char norm adjustments
|
1983
|
+
** IntFeatures
|
1984
|
+
array to fill with integer features
|
1985
|
+
** CharNormArray
|
1986
|
+
array to fill with dummy char norm adjustments
|
1987
|
+
** BlobLength
|
1988
|
+
length of blob in baseline-normalized units
|
1989
|
+
** Globals: none
|
1990
|
+
** Operation: This routine sets up the feature extractor to extract
|
1991
|
+
** baseline normalized pico-features.
|
1992
|
+
** The extracted pico-features are converted
|
1993
|
+
** to integer form and placed in IntFeatures. CharNormArray
|
1994
|
+
** is filled with 0's to indicate to the matcher that no
|
1995
|
+
** character normalization adjustment needs to be done.
|
1996
|
+
** The total length of all blob outlines
|
1997
|
+
** in baseline normalized units is also returned.
|
1998
|
+
** Return: Number of pico-features returned (0 if an error occurred)
|
1999
|
+
** Exceptions: none
|
2000
|
+
** History: Tue Mar 12 17:55:18 1991, DSJ, Created.
|
2001
|
+
*/
|
2002
|
+
FEATURE_SET Features;
|
2003
|
+
int NumFeatures;
|
2004
|
+
|
2005
|
+
if (EnableIntFX)
|
2006
|
+
return (GetIntBaselineFeatures (Blob, LineStats, Templates,
|
2007
|
+
IntFeatures, CharNormArray, BlobLength));
|
2008
|
+
|
2009
|
+
NormMethod = baseline;
|
2010
|
+
Features = ExtractPicoFeatures (Blob, LineStats);
|
2011
|
+
|
2012
|
+
NumFeatures = Features->NumFeatures;
|
2013
|
+
*BlobLength = NumFeatures;
|
2014
|
+
if (NumFeatures > UNLIKELY_NUM_FEAT) {
|
2015
|
+
FreeFeatureSet(Features);
|
2016
|
+
return (0);
|
2017
|
+
}
|
2018
|
+
|
2019
|
+
ComputeIntFeatures(Features, IntFeatures);
|
2020
|
+
ClearCharNormArray(Templates, CharNormArray);
|
2021
|
+
|
2022
|
+
FreeFeatureSet(Features);
|
2023
|
+
return (NumFeatures);
|
2024
|
+
|
2025
|
+
} /* GetBaselineFeatures */
|
2026
|
+
|
2027
|
+
/*---------------------------------------------------------------------------*/
|
2028
|
+
FLOAT32 GetBestRatingFor(TBLOB *Blob,
|
2029
|
+
LINE_STATS *LineStats,
|
2030
|
+
CLASS_ID ClassId) {
|
2031
|
+
/*
|
2032
|
+
** Parameters:
|
2033
|
+
** Blob
|
2034
|
+
blob to get best rating for
|
2035
|
+
** LineStats
|
2036
|
+
statistics about text line blob is in
|
2037
|
+
** ClassId
|
2038
|
+
class blob is to be compared to
|
2039
|
+
** Globals:
|
2040
|
+
** PreTrainedTemplates
|
2041
|
+
built-in templates
|
2042
|
+
** AdaptedTemplates
|
2043
|
+
current set of adapted templates
|
2044
|
+
** AllProtosOn
|
2045
|
+
dummy mask to enable all protos
|
2046
|
+
** AllConfigsOn
|
2047
|
+
dummy mask to enable all configs
|
2048
|
+
** Operation: This routine classifies Blob against both sets of
|
2049
|
+
** templates for the specified class and returns the best
|
2050
|
+
** rating found.
|
2051
|
+
** Return: Best rating for match of Blob to ClassId.
|
2052
|
+
** Exceptions: none
|
2053
|
+
** History: Tue Apr 9 09:01:24 1991, DSJ, Created.
|
2054
|
+
*/
|
2055
|
+
int NumCNFeatures, NumBLFeatures;
|
2056
|
+
INT_FEATURE_ARRAY CNFeatures, BLFeatures;
|
2057
|
+
INT_RESULT_STRUCT CNResult, BLResult;
|
2058
|
+
CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust;
|
2059
|
+
CLASS_INDEX ClassIndex;
|
2060
|
+
inT32 BlobLength;
|
2061
|
+
|
2062
|
+
CNResult.Rating = BLResult.Rating = 1.0;
|
2063
|
+
|
2064
|
+
if (!LegalClassId (ClassId))
|
2065
|
+
return (1.0);
|
2066
|
+
|
2067
|
+
if (!UnusedClassIdIn (PreTrainedTemplates, ClassId)) {
|
2068
|
+
NumCNFeatures = GetCharNormFeatures (Blob, LineStats,
|
2069
|
+
PreTrainedTemplates,
|
2070
|
+
CNFeatures, CNAdjust, &BlobLength);
|
2071
|
+
if (NumCNFeatures > 0) {
|
2072
|
+
ClassIndex = PreTrainedTemplates->IndexFor[ClassId];
|
2073
|
+
|
2074
|
+
SetCharNormMatch();
|
2075
|
+
IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId),
|
2076
|
+
AllProtosOn, AllConfigsOn,
|
2077
|
+
BlobLength, NumCNFeatures, CNFeatures,
|
2078
|
+
CNAdjust[ClassIndex], &CNResult, NO_DEBUG);
|
2079
|
+
}
|
2080
|
+
}
|
2081
|
+
|
2082
|
+
if (!UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) {
|
2083
|
+
NumBLFeatures = GetBaselineFeatures (Blob, LineStats,
|
2084
|
+
AdaptedTemplates->Templates,
|
2085
|
+
BLFeatures, BLAdjust, &BlobLength);
|
2086
|
+
if (NumBLFeatures > 0) {
|
2087
|
+
ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId];
|
2088
|
+
|
2089
|
+
SetBaseLineMatch();
|
2090
|
+
IntegerMatcher (ClassForClassId
|
2091
|
+
(AdaptedTemplates->Templates, ClassId),
|
2092
|
+
AdaptedTemplates->Class[ClassIndex]->PermProtos,
|
2093
|
+
AdaptedTemplates->Class[ClassIndex]->PermConfigs,
|
2094
|
+
BlobLength, NumBLFeatures, BLFeatures,
|
2095
|
+
BLAdjust[ClassIndex], &BLResult, NO_DEBUG);
|
2096
|
+
}
|
2097
|
+
}
|
2098
|
+
|
2099
|
+
return (MIN (BLResult.Rating, CNResult.Rating));
|
2100
|
+
|
2101
|
+
} /* GetBestRatingFor */
|
2102
|
+
|
2103
|
+
/*---------------------------------------------------------------------------*/
|
2104
|
+
int GetCharNormFeatures(TBLOB *Blob,
|
2105
|
+
LINE_STATS *LineStats,
|
2106
|
+
INT_TEMPLATES Templates,
|
2107
|
+
INT_FEATURE_ARRAY IntFeatures,
|
2108
|
+
CLASS_NORMALIZATION_ARRAY CharNormArray,
|
2109
|
+
inT32 *BlobLength) {
|
2110
|
+
/*
|
2111
|
+
** Parameters:
|
2112
|
+
** Blob
|
2113
|
+
blob to extract features from
|
2114
|
+
** LineStats
|
2115
|
+
statistics about text row blob is in
|
2116
|
+
** Templates
|
2117
|
+
used to compute char norm adjustments
|
2118
|
+
** IntFeatures
|
2119
|
+
array to fill with integer features
|
2120
|
+
** CharNormArray
|
2121
|
+
array to fill with char norm adjustments
|
2122
|
+
** BlobLength
|
2123
|
+
length of blob in baseline-normalized units
|
2124
|
+
** Globals: none
|
2125
|
+
** Operation: This routine sets up the feature extractor to extract
|
2126
|
+
** character normalization features and character normalized
|
2127
|
+
** pico-features. The extracted pico-features are converted
|
2128
|
+
** to integer form and placed in IntFeatures. The character
|
2129
|
+
** normalization features are matched to each class in
|
2130
|
+
** templates and the resulting adjustment factors are returned
|
2131
|
+
** in CharNormArray. The total length of all blob outlines
|
2132
|
+
** in baseline normalized units is also returned.
|
2133
|
+
** Return: Number of pico-features returned (0 if an error occurred)
|
2134
|
+
** Exceptions: none
|
2135
|
+
** History: Tue Mar 12 17:55:18 1991, DSJ, Created.
|
2136
|
+
*/
|
2137
|
+
return (GetIntCharNormFeatures (Blob, LineStats, Templates,
|
2138
|
+
IntFeatures, CharNormArray, BlobLength));
|
2139
|
+
} /* GetCharNormFeatures */
|
2140
|
+
|
2141
|
+
/*---------------------------------------------------------------------------*/
|
2142
|
+
int GetIntBaselineFeatures(TBLOB *Blob,
|
2143
|
+
LINE_STATS *LineStats,
|
2144
|
+
INT_TEMPLATES Templates,
|
2145
|
+
INT_FEATURE_ARRAY IntFeatures,
|
2146
|
+
CLASS_NORMALIZATION_ARRAY CharNormArray,
|
2147
|
+
inT32 *BlobLength) {
|
2148
|
+
/*
|
2149
|
+
** Parameters:
|
2150
|
+
** Blob
|
2151
|
+
blob to extract features from
|
2152
|
+
** LineStats
|
2153
|
+
statistics about text row blob is in
|
2154
|
+
** Templates
|
2155
|
+
used to compute char norm adjustments
|
2156
|
+
** IntFeatures
|
2157
|
+
array to fill with integer features
|
2158
|
+
** CharNormArray
|
2159
|
+
array to fill with dummy char norm adjustments
|
2160
|
+
** BlobLength
|
2161
|
+
length of blob in baseline-normalized units
|
2162
|
+
** Globals:
|
2163
|
+
** FeaturesHaveBeenExtracted
|
2164
|
+
TRUE if fx has been done
|
2165
|
+
** BaselineFeatures
|
2166
|
+
holds extracted baseline feat
|
2167
|
+
** CharNormFeatures
|
2168
|
+
holds extracted char norm feat
|
2169
|
+
** FXInfo
|
2170
|
+
holds misc. FX info
|
2171
|
+
** Operation: This routine calls the integer (Hardware) feature
|
2172
|
+
** extractor if it has not been called before for this blob.
|
2173
|
+
** The results from the feature extractor are placed into
|
2174
|
+
** globals so that they can be used in other routines without
|
2175
|
+
** re-extracting the features.
|
2176
|
+
** It then copies the baseline features into the IntFeatures
|
2177
|
+
** array provided by the caller.
|
2178
|
+
** Return: Number of features extracted or 0 if an error occured.
|
2179
|
+
** Exceptions: none
|
2180
|
+
** History: Tue May 28 10:40:52 1991, DSJ, Created.
|
2181
|
+
*/
|
2182
|
+
register INT_FEATURE Src, Dest, End;
|
2183
|
+
|
2184
|
+
if (!FeaturesHaveBeenExtracted) {
|
2185
|
+
FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures,
|
2186
|
+
CharNormFeatures, &FXInfo);
|
2187
|
+
FeaturesHaveBeenExtracted = TRUE;
|
2188
|
+
}
|
2189
|
+
|
2190
|
+
if (!FeaturesOK) {
|
2191
|
+
*BlobLength = FXInfo.NumBL;
|
2192
|
+
return (0);
|
2193
|
+
}
|
2194
|
+
|
2195
|
+
for (Src = BaselineFeatures, End = Src + FXInfo.NumBL, Dest = IntFeatures;
|
2196
|
+
Src < End; *Dest++ = *Src++);
|
2197
|
+
|
2198
|
+
ClearCharNormArray(Templates, CharNormArray);
|
2199
|
+
*BlobLength = FXInfo.NumBL;
|
2200
|
+
return (FXInfo.NumBL);
|
2201
|
+
|
2202
|
+
} /* GetIntBaselineFeatures */
|
2203
|
+
|
2204
|
+
/*---------------------------------------------------------------------------*/
|
2205
|
+
int GetIntCharNormFeatures(TBLOB *Blob,
|
2206
|
+
LINE_STATS *LineStats,
|
2207
|
+
INT_TEMPLATES Templates,
|
2208
|
+
INT_FEATURE_ARRAY IntFeatures,
|
2209
|
+
CLASS_NORMALIZATION_ARRAY CharNormArray,
|
2210
|
+
inT32 *BlobLength) {
|
2211
|
+
/*
|
2212
|
+
** Parameters:
|
2213
|
+
** Blob
|
2214
|
+
blob to extract features from
|
2215
|
+
** LineStats
|
2216
|
+
statistics about text row blob is in
|
2217
|
+
** Templates
|
2218
|
+
used to compute char norm adjustments
|
2219
|
+
** IntFeatures
|
2220
|
+
array to fill with integer features
|
2221
|
+
** CharNormArray
|
2222
|
+
array to fill with dummy char norm adjustments
|
2223
|
+
** BlobLength
|
2224
|
+
length of blob in baseline-normalized units
|
2225
|
+
** Globals:
|
2226
|
+
** FeaturesHaveBeenExtracted
|
2227
|
+
TRUE if fx has been done
|
2228
|
+
** BaselineFeatures
|
2229
|
+
holds extracted baseline feat
|
2230
|
+
** CharNormFeatures
|
2231
|
+
holds extracted char norm feat
|
2232
|
+
** FXInfo
|
2233
|
+
holds misc. FX info
|
2234
|
+
** Operation: This routine calls the integer (Hardware) feature
|
2235
|
+
** extractor if it has not been called before for this blob.
|
2236
|
+
** The results from the feature extractor are placed into
|
2237
|
+
** globals so that they can be used in other routines without
|
2238
|
+
** re-extracting the features.
|
2239
|
+
** It then copies the char norm features into the IntFeatures
|
2240
|
+
** array provided by the caller.
|
2241
|
+
** Return: Number of features extracted or 0 if an error occured.
|
2242
|
+
** Exceptions: none
|
2243
|
+
** History: Tue May 28 10:40:52 1991, DSJ, Created.
|
2244
|
+
*/
|
2245
|
+
register INT_FEATURE Src, Dest, End;
|
2246
|
+
FEATURE NormFeature;
|
2247
|
+
FLOAT32 Baseline, Scale;
|
2248
|
+
|
2249
|
+
if (!FeaturesHaveBeenExtracted) {
|
2250
|
+
FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures,
|
2251
|
+
CharNormFeatures, &FXInfo);
|
2252
|
+
FeaturesHaveBeenExtracted = TRUE;
|
2253
|
+
}
|
2254
|
+
|
2255
|
+
if (!FeaturesOK) {
|
2256
|
+
*BlobLength = FXInfo.NumBL;
|
2257
|
+
return (0);
|
2258
|
+
}
|
2259
|
+
|
2260
|
+
for (Src = CharNormFeatures, End = Src + FXInfo.NumCN, Dest = IntFeatures;
|
2261
|
+
Src < End; *Dest++ = *Src++);
|
2262
|
+
|
2263
|
+
NormFeature = NewFeature (&CharNormDesc);
|
2264
|
+
Baseline = BaselineAt (LineStats, FXInfo.Xmean);
|
2265
|
+
Scale = ComputeScaleFactor (LineStats);
|
2266
|
+
NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale;
|
2267
|
+
NormFeature->Params[CharNormLength] =
|
2268
|
+
FXInfo.Length * Scale / LENGTH_COMPRESSION;
|
2269
|
+
NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale;
|
2270
|
+
NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale;
|
2271
|
+
ComputeIntCharNormArray(NormFeature, Templates, CharNormArray);
|
2272
|
+
FreeFeature(NormFeature);
|
2273
|
+
|
2274
|
+
*BlobLength = FXInfo.NumBL;
|
2275
|
+
return (FXInfo.NumCN);
|
2276
|
+
|
2277
|
+
} /* GetIntCharNormFeatures */
|
2278
|
+
|
2279
|
+
/*---------------------------------------------------------------------------*/
|
2280
|
+
void InitMatcherRatings(register FLOAT32 *Rating) {
|
2281
|
+
/*
|
2282
|
+
** Parameters:
|
2283
|
+
** Rating
|
2284
|
+
ptr to array of ratings to be initialized
|
2285
|
+
** Globals: none
|
2286
|
+
** Operation: This routine initializes the best rating for each class
|
2287
|
+
** to be the worst possible rating (1.0).
|
2288
|
+
** Return: none
|
2289
|
+
** Exceptions: none
|
2290
|
+
** History: Tue Mar 12 13:43:28 1991, DSJ, Created.
|
2291
|
+
*/
|
2292
|
+
register FLOAT32 *LastRating;
|
2293
|
+
register FLOAT32 WorstRating = WORST_POSSIBLE_RATING;
|
2294
|
+
|
2295
|
+
for (LastRating = Rating + MAX_CLASS_ID;
|
2296
|
+
Rating <= LastRating; *Rating++ = WorstRating);
|
2297
|
+
|
2298
|
+
} /* InitMatcherRatings */
|
2299
|
+
|
2300
|
+
/*---------------------------------------------------------------------------*/
|
2301
|
+
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
|
2302
|
+
CLASS_ID ClassId,
|
2303
|
+
int NumFeatures,
|
2304
|
+
INT_FEATURE_ARRAY Features,
|
2305
|
+
FEATURE_SET FloatFeatures) {
|
2306
|
+
/*
|
2307
|
+
** Parameters:
|
2308
|
+
** Templates
|
2309
|
+
adapted templates to add new config to
|
2310
|
+
** ClassId
|
2311
|
+
class id to associate with new config
|
2312
|
+
** NumFeatures
|
2313
|
+
number of features in IntFeatures
|
2314
|
+
** Features
|
2315
|
+
features describing model for new config
|
2316
|
+
** FloatFeatures
|
2317
|
+
floating-pt representation of features
|
2318
|
+
** Globals:
|
2319
|
+
** AllProtosOn
|
2320
|
+
mask to enable all protos
|
2321
|
+
** AllConfigsOff
|
2322
|
+
mask to disable all configs
|
2323
|
+
** TempProtoMask
|
2324
|
+
defines old protos matched in new config
|
2325
|
+
** Operation:
|
2326
|
+
** Return: The id of the new config created, a negative integer in
|
2327
|
+
** case of error.
|
2328
|
+
** Exceptions: none
|
2329
|
+
** History: Fri Mar 15 08:49:46 1991, DSJ, Created.
|
2330
|
+
*/
|
2331
|
+
CLASS_INDEX ClassIndex;
|
2332
|
+
INT_CLASS IClass;
|
2333
|
+
ADAPT_CLASS Class;
|
2334
|
+
PROTO_ID OldProtos[MAX_NUM_PROTOS];
|
2335
|
+
FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
|
2336
|
+
int NumOldProtos;
|
2337
|
+
int NumBadFeatures;
|
2338
|
+
int MaxProtoId, OldMaxProtoId;
|
2339
|
+
int BlobLength = 0;
|
2340
|
+
int MaskSize;
|
2341
|
+
int ConfigId;
|
2342
|
+
TEMP_CONFIG Config;
|
2343
|
+
int i;
|
2344
|
+
int debug_level = NO_DEBUG;
|
2345
|
+
|
2346
|
+
if (LearningDebugLevel >= 3)
|
2347
|
+
debug_level =
|
2348
|
+
PRINT_MATCH_SUMMARY | PRINT_FEATURE_MATCHES | PRINT_PROTO_MATCHES;
|
2349
|
+
|
2350
|
+
ClassIndex = Templates->Templates->IndexFor[ClassId];
|
2351
|
+
IClass = ClassForClassId (Templates->Templates, ClassId);
|
2352
|
+
Class = Templates->Class[ClassIndex];
|
2353
|
+
|
2354
|
+
if (IClass->NumConfigs >= MAX_NUM_CONFIGS)
|
2355
|
+
{
|
2356
|
+
++NumAdaptationsFailed;
|
2357
|
+
if (LearningDebugLevel >= 1)
|
2358
|
+
cprintf ("Cannot make new temporary config: maximum number exceeded.\n");
|
2359
|
+
return -1;
|
2360
|
+
}
|
2361
|
+
|
2362
|
+
OldMaxProtoId = IClass->NumProtos - 1;
|
2363
|
+
|
2364
|
+
NumOldProtos = FindGoodProtos (IClass, AllProtosOn, AllConfigsOff,
|
2365
|
+
BlobLength, NumFeatures, Features,
|
2366
|
+
OldProtos, debug_level);
|
2367
|
+
|
2368
|
+
MaskSize = WordsInVectorOfSize (MAX_NUM_PROTOS);
|
2369
|
+
zero_all_bits(TempProtoMask, MaskSize);
|
2370
|
+
for (i = 0; i < NumOldProtos; i++)
|
2371
|
+
SET_BIT (TempProtoMask, OldProtos[i]);
|
2372
|
+
|
2373
|
+
NumBadFeatures = FindBadFeatures (IClass, TempProtoMask, AllConfigsOn,
|
2374
|
+
BlobLength, NumFeatures, Features,
|
2375
|
+
BadFeatures, debug_level);
|
2376
|
+
|
2377
|
+
MaxProtoId = MakeNewTempProtos (FloatFeatures, NumBadFeatures, BadFeatures,
|
2378
|
+
IClass, Class, TempProtoMask);
|
2379
|
+
if (MaxProtoId == NO_PROTO)
|
2380
|
+
{
|
2381
|
+
++NumAdaptationsFailed;
|
2382
|
+
if (LearningDebugLevel >= 1)
|
2383
|
+
cprintf ("Cannot make new temp protos: maximum number exceeded.\n");
|
2384
|
+
return -1;
|
2385
|
+
}
|
2386
|
+
|
2387
|
+
ConfigId = AddIntConfig (IClass);
|
2388
|
+
ConvertConfig(TempProtoMask, ConfigId, IClass);
|
2389
|
+
Config = NewTempConfig (MaxProtoId);
|
2390
|
+
TempConfigFor (Class, ConfigId) = Config;
|
2391
|
+
copy_all_bits (TempProtoMask, Config->Protos, Config->ProtoVectorSize);
|
2392
|
+
|
2393
|
+
if (LearningDebugLevel >= 1)
|
2394
|
+
cprintf ("Making new temp config %d using %d old and %d new protos.\n",
|
2395
|
+
ConfigId, NumOldProtos, MaxProtoId - OldMaxProtoId);
|
2396
|
+
|
2397
|
+
return ConfigId;
|
2398
|
+
} /* MakeNewTemporaryConfig */
|
2399
|
+
|
2400
|
+
/*---------------------------------------------------------------------------*/
|
2401
|
+
PROTO_ID
|
2402
|
+
MakeNewTempProtos (FEATURE_SET Features,
|
2403
|
+
int NumBadFeat,
|
2404
|
+
FEATURE_ID BadFeat[],
|
2405
|
+
INT_CLASS IClass,
|
2406
|
+
ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) {
|
2407
|
+
/*
|
2408
|
+
** Parameters:
|
2409
|
+
** Features
|
2410
|
+
floating-pt features describing new character
|
2411
|
+
** NumBadFeat
|
2412
|
+
number of bad features to turn into protos
|
2413
|
+
** BadFeat
|
2414
|
+
feature id's of bad features
|
2415
|
+
** IClass
|
2416
|
+
integer class templates to add new protos to
|
2417
|
+
** Class
|
2418
|
+
adapted class templates to add new protos to
|
2419
|
+
** TempProtoMask
|
2420
|
+
proto mask to add new protos to
|
2421
|
+
** Globals: none
|
2422
|
+
** Operation: This routine finds sets of sequential bad features
|
2423
|
+
** that all have the same angle and converts each set into
|
2424
|
+
** a new temporary proto. The temp proto is added to the
|
2425
|
+
** proto pruner for IClass, pushed onto the list of temp
|
2426
|
+
** protos in Class, and added to TempProtoMask.
|
2427
|
+
** Return: Max proto id in class after all protos have been added.
|
2428
|
+
** Exceptions: none
|
2429
|
+
** History: Fri Mar 15 11:39:38 1991, DSJ, Created.
|
2430
|
+
*/
|
2431
|
+
FEATURE_ID *ProtoStart;
|
2432
|
+
FEATURE_ID *ProtoEnd;
|
2433
|
+
FEATURE_ID *LastBad;
|
2434
|
+
TEMP_PROTO TempProto;
|
2435
|
+
PROTO Proto;
|
2436
|
+
FEATURE F1, F2;
|
2437
|
+
FLOAT32 X1, X2, Y1, Y2;
|
2438
|
+
FLOAT32 A1, A2, AngleDelta;
|
2439
|
+
FLOAT32 SegmentLength;
|
2440
|
+
PROTO_ID Pid;
|
2441
|
+
|
2442
|
+
for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
|
2443
|
+
ProtoStart < LastBad; ProtoStart = ProtoEnd) {
|
2444
|
+
F1 = Features->Features[*ProtoStart];
|
2445
|
+
X1 = F1->Params[PicoFeatX];
|
2446
|
+
Y1 = F1->Params[PicoFeatY];
|
2447
|
+
A1 = F1->Params[PicoFeatDir];
|
2448
|
+
|
2449
|
+
for (ProtoEnd = ProtoStart + 1,
|
2450
|
+
SegmentLength = GetPicoFeatureLength ();
|
2451
|
+
ProtoEnd < LastBad;
|
2452
|
+
ProtoEnd++, SegmentLength += GetPicoFeatureLength ()) {
|
2453
|
+
F2 = Features->Features[*ProtoEnd];
|
2454
|
+
X2 = F2->Params[PicoFeatX];
|
2455
|
+
Y2 = F2->Params[PicoFeatY];
|
2456
|
+
A2 = F2->Params[PicoFeatDir];
|
2457
|
+
|
2458
|
+
AngleDelta = fabs (A1 - A2);
|
2459
|
+
if (AngleDelta > 0.5)
|
2460
|
+
AngleDelta = 1.0 - AngleDelta;
|
2461
|
+
|
2462
|
+
if (AngleDelta > MaxAngleDelta ||
|
2463
|
+
fabs (X1 - X2) > SegmentLength ||
|
2464
|
+
fabs (Y1 - Y2) > SegmentLength)
|
2465
|
+
break;
|
2466
|
+
}
|
2467
|
+
|
2468
|
+
F2 = Features->Features[*(ProtoEnd - 1)];
|
2469
|
+
X2 = F2->Params[PicoFeatX];
|
2470
|
+
Y2 = F2->Params[PicoFeatY];
|
2471
|
+
A2 = F2->Params[PicoFeatDir];
|
2472
|
+
|
2473
|
+
Pid = AddIntProto (IClass);
|
2474
|
+
if (Pid == NO_PROTO)
|
2475
|
+
return (NO_PROTO);
|
2476
|
+
|
2477
|
+
TempProto = NewTempProto ();
|
2478
|
+
Proto = &(TempProto->Proto);
|
2479
|
+
|
2480
|
+
/* compute proto params - NOTE that Y_DIM_OFFSET must be used because
|
2481
|
+
ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
|
2482
|
+
instead of the -0.25 to 0.75 used in baseline normalization */
|
2483
|
+
Proto->Length = SegmentLength;
|
2484
|
+
Proto->Angle = A1;
|
2485
|
+
Proto->X = (X1 + X2) / 2.0;
|
2486
|
+
Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
|
2487
|
+
FillABC(Proto);
|
2488
|
+
|
2489
|
+
TempProto->ProtoId = Pid;
|
2490
|
+
SET_BIT(TempProtoMask, Pid);
|
2491
|
+
|
2492
|
+
ConvertProto(Proto, Pid, IClass);
|
2493
|
+
AddProtoToProtoPruner(Proto, Pid, IClass);
|
2494
|
+
|
2495
|
+
Class->TempProtos = push (Class->TempProtos, TempProto);
|
2496
|
+
}
|
2497
|
+
return (IClass->NumProtos - 1);
|
2498
|
+
} /* MakeNewTempProtos */
|
2499
|
+
|
2500
|
+
/*---------------------------------------------------------------------------*/
|
2501
|
+
void MakePermanent(ADAPT_TEMPLATES Templates,
|
2502
|
+
CLASS_ID ClassId,
|
2503
|
+
int ConfigId,
|
2504
|
+
TBLOB *Blob,
|
2505
|
+
LINE_STATS *LineStats) {
|
2506
|
+
/*
|
2507
|
+
** Parameters:
|
2508
|
+
** Templates
|
2509
|
+
current set of adaptive templates
|
2510
|
+
** ClassId
|
2511
|
+
class containing config to be made permanent
|
2512
|
+
** ConfigId
|
2513
|
+
config to be made permanent
|
2514
|
+
** Blob
|
2515
|
+
current blob being adapted to
|
2516
|
+
** LineStats
|
2517
|
+
statistics about text line Blob is in
|
2518
|
+
** Globals: none
|
2519
|
+
** Operation:
|
2520
|
+
** Return: none
|
2521
|
+
** Exceptions: none
|
2522
|
+
** History: Thu Mar 14 15:54:08 1991, DSJ, Created.
|
2523
|
+
*/
|
2524
|
+
UNICHAR_ID *Ambigs;
|
2525
|
+
TEMP_CONFIG Config;
|
2526
|
+
CLASS_INDEX ClassIndex;
|
2527
|
+
ADAPT_CLASS Class;
|
2528
|
+
PROTO_KEY ProtoKey;
|
2529
|
+
|
2530
|
+
ClassIndex = Templates->Templates->IndexFor[ClassId];
|
2531
|
+
Class = Templates->Class[ClassIndex];
|
2532
|
+
Config = TempConfigFor (Class, ConfigId);
|
2533
|
+
|
2534
|
+
MakeConfigPermanent(Class, ConfigId);
|
2535
|
+
if (Class->NumPermConfigs == 0)
|
2536
|
+
Templates->NumPermClasses++;
|
2537
|
+
Class->NumPermConfigs++;
|
2538
|
+
|
2539
|
+
ProtoKey.Templates = Templates;
|
2540
|
+
ProtoKey.ClassId = ClassId;
|
2541
|
+
ProtoKey.ConfigId = ConfigId;
|
2542
|
+
Class->TempProtos = delete_d (Class->TempProtos, &ProtoKey,
|
2543
|
+
MakeTempProtoPerm);
|
2544
|
+
FreeTempConfig(Config);
|
2545
|
+
|
2546
|
+
Ambigs = GetAmbiguities (Blob, LineStats, ClassId);
|
2547
|
+
PermConfigFor (Class, ConfigId) = Ambigs;
|
2548
|
+
|
2549
|
+
if (LearningDebugLevel >= 1) {
|
2550
|
+
cprintf ("Making config %d permanent with ambiguities '",
|
2551
|
+
ConfigId, Ambigs);
|
2552
|
+
for (UNICHAR_ID *AmbigsPointer = Ambigs;
|
2553
|
+
*AmbigsPointer >= 0; ++AmbigsPointer)
|
2554
|
+
cprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
|
2555
|
+
cprintf("'.\n");
|
2556
|
+
}
|
2557
|
+
|
2558
|
+
} /* MakePermanent */
|
2559
|
+
|
2560
|
+
/*---------------------------------------------------------------------------*/
|
2561
|
+
int MakeTempProtoPerm(void *item1, //TEMP_PROTO TempProto,
|
2562
|
+
void *item2) { //PROTO_KEY *ProtoKey)
|
2563
|
+
/*
|
2564
|
+
** Parameters:
|
2565
|
+
** TempProto
|
2566
|
+
temporary proto to compare to key
|
2567
|
+
** ProtoKey
|
2568
|
+
defines which protos to make permanent
|
2569
|
+
** Globals: none
|
2570
|
+
** Operation: This routine converts TempProto to be permanent if
|
2571
|
+
** its proto id is used by the configuration specified in
|
2572
|
+
** ProtoKey.
|
2573
|
+
** Return: TRUE if TempProto is converted, FALSE otherwise
|
2574
|
+
** Exceptions: none
|
2575
|
+
** History: Thu Mar 14 18:49:54 1991, DSJ, Created.
|
2576
|
+
*/
|
2577
|
+
CLASS_INDEX ClassIndex;
|
2578
|
+
ADAPT_CLASS Class;
|
2579
|
+
TEMP_CONFIG Config;
|
2580
|
+
TEMP_PROTO TempProto;
|
2581
|
+
PROTO_KEY *ProtoKey;
|
2582
|
+
|
2583
|
+
TempProto = (TEMP_PROTO) item1;
|
2584
|
+
ProtoKey = (PROTO_KEY *) item2;
|
2585
|
+
|
2586
|
+
ClassIndex = ProtoKey->Templates->Templates->IndexFor[ProtoKey->ClassId];
|
2587
|
+
Class = ProtoKey->Templates->Class[ClassIndex];
|
2588
|
+
Config = TempConfigFor (Class, ProtoKey->ConfigId);
|
2589
|
+
|
2590
|
+
if (TempProto->ProtoId > Config->MaxProtoId ||
|
2591
|
+
!test_bit (Config->Protos, TempProto->ProtoId))
|
2592
|
+
return (FALSE);
|
2593
|
+
|
2594
|
+
MakeProtoPermanent (Class, TempProto->ProtoId);
|
2595
|
+
AddProtoToClassPruner (&(TempProto->Proto), ProtoKey->ClassId,
|
2596
|
+
ProtoKey->Templates->Templates);
|
2597
|
+
FreeTempProto(TempProto);
|
2598
|
+
|
2599
|
+
return (TRUE);
|
2600
|
+
|
2601
|
+
} /* MakeTempProtoPerm */
|
2602
|
+
|
2603
|
+
/*---------------------------------------------------------------------------*/
|
2604
|
+
int NumBlobsIn(TWERD *Word) {
|
2605
|
+
/*
|
2606
|
+
** Parameters:
|
2607
|
+
** Word
|
2608
|
+
word to count blobs in
|
2609
|
+
** Globals: none
|
2610
|
+
** Operation: This routine returns the number of blobs in Word.
|
2611
|
+
** Return: Number of blobs in Word.
|
2612
|
+
** Exceptions: none
|
2613
|
+
** History: Thu Mar 14 08:30:27 1991, DSJ, Created.
|
2614
|
+
*/
|
2615
|
+
register TBLOB *Blob;
|
2616
|
+
register int NumBlobs;
|
2617
|
+
|
2618
|
+
if (Word == NULL)
|
2619
|
+
return (0);
|
2620
|
+
|
2621
|
+
for (Blob = Word->blobs, NumBlobs = 0;
|
2622
|
+
Blob != NULL; Blob = Blob->next, NumBlobs++);
|
2623
|
+
|
2624
|
+
return (NumBlobs);
|
2625
|
+
|
2626
|
+
} /* NumBlobsIn */
|
2627
|
+
|
2628
|
+
/*---------------------------------------------------------------------------*/
|
2629
|
+
int NumOutlinesInBlob(TBLOB *Blob) {
|
2630
|
+
/*
|
2631
|
+
** Parameters:
|
2632
|
+
** Blob
|
2633
|
+
blob to count outlines in
|
2634
|
+
** Globals: none
|
2635
|
+
** Operation: This routine returns the number of OUTER outlines
|
2636
|
+
** in Blob.
|
2637
|
+
** Return: Number of outer outlines in Blob.
|
2638
|
+
** Exceptions: none
|
2639
|
+
** History: Mon Jun 10 15:46:20 1991, DSJ, Created.
|
2640
|
+
*/
|
2641
|
+
register TESSLINE *Outline;
|
2642
|
+
register int NumOutlines;
|
2643
|
+
|
2644
|
+
if (Blob == NULL)
|
2645
|
+
return (0);
|
2646
|
+
|
2647
|
+
for (Outline = Blob->outlines, NumOutlines = 0;
|
2648
|
+
Outline != NULL; Outline = Outline->next, NumOutlines++);
|
2649
|
+
|
2650
|
+
return (NumOutlines);
|
2651
|
+
|
2652
|
+
} /* NumOutlinesInBlob */
|
2653
|
+
|
2654
|
+
/*---------------------------------------------------------------------------*/
|
2655
|
+
void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) {
|
2656
|
+
/*
|
2657
|
+
** Parameters:
|
2658
|
+
** File
|
2659
|
+
open text file to write Results to
|
2660
|
+
** Results
|
2661
|
+
match results to write to File
|
2662
|
+
** Globals: none
|
2663
|
+
** Operation: This routine writes the matches in Results to File.
|
2664
|
+
** Return: none
|
2665
|
+
** Exceptions: none
|
2666
|
+
** History: Mon Mar 18 09:24:53 1991, DSJ, Created.
|
2667
|
+
*/
|
2668
|
+
for (int i = 0; i < Results->NumMatches; ++i) {
|
2669
|
+
cprintf("%s(%d) %.2f ",
|
2670
|
+
unicharset.debug_str(Results->Classes[i]).string(),
|
2671
|
+
Results->Classes[i],
|
2672
|
+
Results->Ratings[Results->Classes[i]] * 100.0);
|
2673
|
+
}
|
2674
|
+
} /* PrintAdaptiveMatchResults */
|
2675
|
+
|
2676
|
+
/*---------------------------------------------------------------------------*/
|
2677
|
+
void RemoveBadMatches(ADAPT_RESULTS *Results) {
|
2678
|
+
/*
|
2679
|
+
** Parameters:
|
2680
|
+
** Results
|
2681
|
+
contains matches to be filtered
|
2682
|
+
** Globals:
|
2683
|
+
** BadMatchPad
|
2684
|
+
defines a "bad match"
|
2685
|
+
** Operation: This routine steps thru each matching class in Results
|
2686
|
+
** and removes it from the match list if its rating
|
2687
|
+
** is worse than the BestRating plus a pad. In other words,
|
2688
|
+
** all good matches get moved to the front of the classes
|
2689
|
+
** array.
|
2690
|
+
** Return: none
|
2691
|
+
** Exceptions: none
|
2692
|
+
** History: Tue Mar 12 13:51:03 1991, DSJ, Created.
|
2693
|
+
*/
|
2694
|
+
int Next, NextGood;
|
2695
|
+
FLOAT32 *Rating = Results->Ratings;
|
2696
|
+
CLASS_ID *Match = Results->Classes;
|
2697
|
+
FLOAT32 BadMatchThreshold;
|
2698
|
+
static const char* romans = "i v x I V X";
|
2699
|
+
BadMatchThreshold = Results->BestRating + BadMatchPad;
|
2700
|
+
|
2701
|
+
if (bln_numericmode) {
|
2702
|
+
UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
|
2703
|
+
unicharset.unichar_to_id("1") : -1;
|
2704
|
+
UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
|
2705
|
+
unicharset.unichar_to_id("0") : -1;
|
2706
|
+
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
|
2707
|
+
if (Rating[Match[Next]] <= BadMatchThreshold) {
|
2708
|
+
if (!unicharset.get_isalpha(Match[Next]) ||
|
2709
|
+
strstr(romans, unicharset.id_to_unichar(Match[Next])) != NULL) {
|
2710
|
+
Match[NextGood++] = Match[Next];
|
2711
|
+
} else if (unichar_id_one >= 0 && unicharset.eq(Match[Next], "l") &&
|
2712
|
+
Rating[unichar_id_one] >= BadMatchThreshold) {
|
2713
|
+
Match[NextGood++] = unichar_id_one;
|
2714
|
+
Rating[unichar_id_one] = Rating[unicharset.unichar_to_id("l")];
|
2715
|
+
} else if (unichar_id_zero >= 0 && unicharset.eq(Match[Next], "O") &&
|
2716
|
+
Rating[unichar_id_zero] >= BadMatchThreshold) {
|
2717
|
+
Match[NextGood++] = unichar_id_zero;
|
2718
|
+
Rating[unichar_id_zero] = Rating[unicharset.unichar_to_id("O")];
|
2719
|
+
}
|
2720
|
+
}
|
2721
|
+
}
|
2722
|
+
}
|
2723
|
+
else {
|
2724
|
+
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
|
2725
|
+
if (Rating[Match[Next]] <= BadMatchThreshold)
|
2726
|
+
Match[NextGood++] = Match[Next];
|
2727
|
+
}
|
2728
|
+
}
|
2729
|
+
|
2730
|
+
Results->NumMatches = NextGood;
|
2731
|
+
|
2732
|
+
} /* RemoveBadMatches */
|
2733
|
+
|
2734
|
+
/*----------------------------------------------------------------------------------*/
|
2735
|
+
void RemoveExtraPuncs(ADAPT_RESULTS *Results) {
|
2736
|
+
/*
|
2737
|
+
** Parameters:
|
2738
|
+
** Results
|
2739
|
+
contains matches to be filtered
|
2740
|
+
** Globals:
|
2741
|
+
** BadMatchPad
|
2742
|
+
defines a "bad match"
|
2743
|
+
** Operation: This routine steps thru each matching class in Results
|
2744
|
+
** and removes it from the match list if its rating
|
2745
|
+
** is worse than the BestRating plus a pad. In other words,
|
2746
|
+
** all good matches get moved to the front of the classes
|
2747
|
+
** array.
|
2748
|
+
** Return: none
|
2749
|
+
** Exceptions: none
|
2750
|
+
** History: Tue Mar 12 13:51:03 1991, DSJ, Created.
|
2751
|
+
*/
|
2752
|
+
int Next, NextGood;
|
2753
|
+
int punc_count; /*no of garbage characters */
|
2754
|
+
int digit_count;
|
2755
|
+
CLASS_ID *Match = Results->Classes;
|
2756
|
+
/*garbage characters */
|
2757
|
+
static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
|
2758
|
+
static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
|
2759
|
+
|
2760
|
+
punc_count = 0;
|
2761
|
+
digit_count = 0;
|
2762
|
+
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
|
2763
|
+
if (strstr (punc_chars,
|
2764
|
+
unicharset.id_to_unichar(Match[Next])) == NULL) {
|
2765
|
+
if (strstr (digit_chars,
|
2766
|
+
unicharset.id_to_unichar(Match[Next])) == NULL) {
|
2767
|
+
Match[NextGood++] = Match[Next];
|
2768
|
+
}
|
2769
|
+
else {
|
2770
|
+
if (digit_count < 1)
|
2771
|
+
Match[NextGood++] = Match[Next];
|
2772
|
+
digit_count++;
|
2773
|
+
}
|
2774
|
+
}
|
2775
|
+
else {
|
2776
|
+
if (punc_count < 2)
|
2777
|
+
Match[NextGood++] = Match[Next];
|
2778
|
+
punc_count++; /*count them */
|
2779
|
+
}
|
2780
|
+
}
|
2781
|
+
Results->NumMatches = NextGood;
|
2782
|
+
} /* RemoveExtraPuncs */
|
2783
|
+
|
2784
|
+
/*---------------------------------------------------------------------------*/
|
2785
|
+
void SetAdaptiveThreshold(FLOAT32 Threshold) {
|
2786
|
+
/*
|
2787
|
+
** Parameters:
|
2788
|
+
** Threshold
|
2789
|
+
threshold for creating new templates
|
2790
|
+
** Globals:
|
2791
|
+
** GoodAdaptiveMatch
|
2792
|
+
default good match rating
|
2793
|
+
** Operation: This routine resets the internal thresholds inside
|
2794
|
+
** the integer matcher to correspond to the specified
|
2795
|
+
** threshold.
|
2796
|
+
** Return: none
|
2797
|
+
** Exceptions: none
|
2798
|
+
** History: Tue Apr 9 08:33:13 1991, DSJ, Created.
|
2799
|
+
*/
|
2800
|
+
if (Threshold == GoodAdaptiveMatch) {
|
2801
|
+
/* the blob was probably classified correctly - use the default rating
|
2802
|
+
threshold */
|
2803
|
+
SetProtoThresh (0.9);
|
2804
|
+
SetFeatureThresh (0.9);
|
2805
|
+
}
|
2806
|
+
else {
|
2807
|
+
/* the blob was probably incorrectly classified */
|
2808
|
+
SetProtoThresh (1.0 - Threshold);
|
2809
|
+
SetFeatureThresh (1.0 - Threshold);
|
2810
|
+
}
|
2811
|
+
} /* SetAdaptiveThreshold */
|
2812
|
+
|
2813
|
+
/*---------------------------------------------------------------------------*/
|
2814
|
+
void ShowBestMatchFor(TBLOB *Blob,
|
2815
|
+
LINE_STATS *LineStats,
|
2816
|
+
CLASS_ID ClassId,
|
2817
|
+
BOOL8 AdaptiveOn,
|
2818
|
+
BOOL8 PreTrainedOn) {
|
2819
|
+
/*
|
2820
|
+
** Parameters:
|
2821
|
+
** Blob
|
2822
|
+
blob to show best matching config for
|
2823
|
+
** LineStats
|
2824
|
+
statistics for text line Blob is in
|
2825
|
+
** ClassId
|
2826
|
+
class whose configs are to be searched
|
2827
|
+
** AdaptiveOn
|
2828
|
+
TRUE if adaptive configs are enabled
|
2829
|
+
** PreTrainedOn
|
2830
|
+
TRUE if pretrained configs are enabled
|
2831
|
+
** Globals:
|
2832
|
+
** PreTrainedTemplates
|
2833
|
+
built-in training
|
2834
|
+
** AdaptedTemplates
|
2835
|
+
adaptive templates
|
2836
|
+
** AllProtosOn
|
2837
|
+
dummy proto mask
|
2838
|
+
** AllConfigsOn
|
2839
|
+
dummy config mask
|
2840
|
+
** Operation: This routine compares Blob to both sets of templates
|
2841
|
+
** (adaptive and pre-trained) and then displays debug
|
2842
|
+
** information for the config which matched best.
|
2843
|
+
** Return: none
|
2844
|
+
** Exceptions: none
|
2845
|
+
** History: Fri Mar 22 08:43:52 1991, DSJ, Created.
|
2846
|
+
*/
|
2847
|
+
int NumCNFeatures = 0, NumBLFeatures = 0;
|
2848
|
+
INT_FEATURE_ARRAY CNFeatures, BLFeatures;
|
2849
|
+
INT_RESULT_STRUCT CNResult, BLResult;
|
2850
|
+
CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust;
|
2851
|
+
CLASS_INDEX ClassIndex;
|
2852
|
+
inT32 BlobLength;
|
2853
|
+
uinT32 ConfigMask;
|
2854
|
+
static int next_config = -1;
|
2855
|
+
|
2856
|
+
if (PreTrainedOn) next_config = -1;
|
2857
|
+
|
2858
|
+
CNResult.Rating = BLResult.Rating = 2.0;
|
2859
|
+
|
2860
|
+
if (!LegalClassId (ClassId)) {
|
2861
|
+
cprintf ("%d is not a legal class id!!\n", ClassId);
|
2862
|
+
return;
|
2863
|
+
}
|
2864
|
+
|
2865
|
+
if (PreTrainedOn) {
|
2866
|
+
if (UnusedClassIdIn (PreTrainedTemplates, ClassId))
|
2867
|
+
cprintf ("No built-in templates for class %d = %s\n",
|
2868
|
+
ClassId, unicharset.id_to_unichar(ClassId));
|
2869
|
+
else {
|
2870
|
+
NumCNFeatures = GetCharNormFeatures (Blob, LineStats,
|
2871
|
+
PreTrainedTemplates,
|
2872
|
+
CNFeatures, CNAdjust,
|
2873
|
+
&BlobLength);
|
2874
|
+
if (NumCNFeatures <= 0)
|
2875
|
+
cprintf ("Illegal blob (char norm features)!\n");
|
2876
|
+
else {
|
2877
|
+
ClassIndex = PreTrainedTemplates->IndexFor[ClassId];
|
2878
|
+
|
2879
|
+
SetCharNormMatch();
|
2880
|
+
IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId),
|
2881
|
+
AllProtosOn, AllConfigsOn,
|
2882
|
+
BlobLength, NumCNFeatures, CNFeatures,
|
2883
|
+
CNAdjust[ClassIndex], &CNResult, NO_DEBUG);
|
2884
|
+
|
2885
|
+
cprintf ("Best built-in template match is config %2d (%4.1f) (cn=%d)\n",
|
2886
|
+
CNResult.Config, CNResult.Rating * 100.0, CNAdjust[ClassIndex]);
|
2887
|
+
}
|
2888
|
+
}
|
2889
|
+
}
|
2890
|
+
|
2891
|
+
if (AdaptiveOn) {
|
2892
|
+
if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId))
|
2893
|
+
cprintf ("No AD templates for class %d = %s\n",
|
2894
|
+
ClassId, unicharset.id_to_unichar(ClassId));
|
2895
|
+
else {
|
2896
|
+
NumBLFeatures = GetBaselineFeatures (Blob, LineStats,
|
2897
|
+
AdaptedTemplates->Templates,
|
2898
|
+
BLFeatures, BLAdjust,
|
2899
|
+
&BlobLength);
|
2900
|
+
if (NumBLFeatures <= 0)
|
2901
|
+
cprintf ("Illegal blob (baseline features)!\n");
|
2902
|
+
else {
|
2903
|
+
ClassIndex =AdaptedTemplates->Templates->IndexFor[ClassId];
|
2904
|
+
|
2905
|
+
SetBaseLineMatch();
|
2906
|
+
IntegerMatcher (ClassForClassId
|
2907
|
+
(AdaptedTemplates->Templates, ClassId),
|
2908
|
+
AllProtosOn, AllConfigsOn,
|
2909
|
+
// AdaptedTemplates->Class[ClassIndex]->PermProtos,
|
2910
|
+
// AdaptedTemplates->Class[ClassIndex]->PermConfigs,
|
2911
|
+
BlobLength, NumBLFeatures, BLFeatures,
|
2912
|
+
BLAdjust[ClassIndex], &BLResult, NO_DEBUG);
|
2913
|
+
|
2914
|
+
#ifndef SECURE_NAMES
|
2915
|
+
int ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId];
|
2916
|
+
ADAPT_CLASS Class = AdaptedTemplates->Class[ClassIndex];
|
2917
|
+
cprintf ("Best adaptive template match is config %2d (%4.1f) %s\n",
|
2918
|
+
BLResult.Config, BLResult.Rating * 100.0,
|
2919
|
+
ConfigIsPermanent(Class, BLResult.Config) ? "Perm" : "Temp");
|
2920
|
+
#endif
|
2921
|
+
}
|
2922
|
+
}
|
2923
|
+
}
|
2924
|
+
|
2925
|
+
cprintf ("\n");
|
2926
|
+
if (BLResult.Rating < CNResult.Rating) {
|
2927
|
+
ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId];
|
2928
|
+
if (next_config < 0) {
|
2929
|
+
ConfigMask = 1 << BLResult.Config;
|
2930
|
+
next_config = 0;
|
2931
|
+
} else {
|
2932
|
+
ConfigMask = 1 << next_config;
|
2933
|
+
++next_config;
|
2934
|
+
}
|
2935
|
+
NormMethod = baseline;
|
2936
|
+
|
2937
|
+
SetBaseLineMatch();
|
2938
|
+
IntegerMatcher (ClassForClassId (AdaptedTemplates->Templates, ClassId),
|
2939
|
+
AllProtosOn,
|
2940
|
+
// AdaptedTemplates->Class[ClassIndex]->PermProtos,
|
2941
|
+
(BIT_VECTOR) & ConfigMask,
|
2942
|
+
BlobLength, NumBLFeatures, BLFeatures,
|
2943
|
+
BLAdjust[ClassIndex], &BLResult, MatchDebugFlags);
|
2944
|
+
cprintf ("Adaptive template match for config %2d is %4.1f\n",
|
2945
|
+
BLResult.Config, BLResult.Rating * 100.0);
|
2946
|
+
}
|
2947
|
+
else {
|
2948
|
+
ClassIndex = PreTrainedTemplates->IndexFor[ClassId];
|
2949
|
+
ConfigMask = 1 << CNResult.Config;
|
2950
|
+
NormMethod = character;
|
2951
|
+
|
2952
|
+
SetCharNormMatch();
|
2953
|
+
//xiaofan
|
2954
|
+
IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), AllProtosOn, (BIT_VECTOR) & ConfigMask,
|
2955
|
+
BlobLength, NumCNFeatures, CNFeatures,
|
2956
|
+
CNAdjust[ClassIndex], &CNResult, MatchDebugFlags);
|
2957
|
+
}
|
2958
|
+
} /* ShowBestMatchFor */
|